diff options
Diffstat (limited to 'drivers/gpu/drm/amd/amdgpu')
214 files changed, 12462 insertions, 2608 deletions
diff --git a/drivers/gpu/drm/amd/amdgpu/Makefile b/drivers/gpu/drm/amd/amdgpu/Makefile index 260e32ef7bae..1f6b56ec99f6 100644 --- a/drivers/gpu/drm/amd/amdgpu/Makefile +++ b/drivers/gpu/drm/amd/amdgpu/Makefile @@ -70,7 +70,8 @@ amdgpu-y += amdgpu_device.o amdgpu_doorbell_mgr.o amdgpu_kms.o \ amdgpu_cs.o amdgpu_bios.o amdgpu_benchmark.o \ atombios_dp.o amdgpu_afmt.o amdgpu_trace_points.o \ atombios_encoders.o amdgpu_sa.o atombios_i2c.o \ - amdgpu_dma_buf.o amdgpu_vm.o amdgpu_vm_pt.o amdgpu_ib.o amdgpu_pll.o \ + amdgpu_dma_buf.o amdgpu_vm.o amdgpu_vm_pt.o amdgpu_vm_tlb_fence.o \ + amdgpu_ib.o amdgpu_pll.o \ amdgpu_ucode.o amdgpu_bo_list.o amdgpu_ctx.o amdgpu_sync.o \ amdgpu_gtt_mgr.o amdgpu_preempt_mgr.o amdgpu_vram_mgr.o amdgpu_virt.o \ amdgpu_atomfirmware.o amdgpu_vf_error.o amdgpu_sched.o \ @@ -80,7 +81,7 @@ amdgpu-y += amdgpu_device.o amdgpu_doorbell_mgr.o amdgpu_kms.o \ amdgpu_umc.o smu_v11_0_i2c.o amdgpu_fru_eeprom.o amdgpu_rap.o \ amdgpu_fw_attestation.o amdgpu_securedisplay.o \ amdgpu_eeprom.o amdgpu_mca.o amdgpu_psp_ta.o amdgpu_lsdma.o \ - amdgpu_ring_mux.o amdgpu_xcp.o amdgpu_seq64.o + amdgpu_ring_mux.o amdgpu_xcp.o amdgpu_seq64.o amdgpu_aca.o amdgpu_dev_coredump.o amdgpu-$(CONFIG_PROC_FS) += amdgpu_fdinfo.o @@ -98,7 +99,7 @@ amdgpu-y += \ vega20_reg_init.o nbio_v7_4.o nbio_v2_3.o nv.o arct_reg_init.o mxgpu_nv.o \ nbio_v7_2.o hdp_v4_0.o hdp_v5_0.o aldebaran_reg_init.o aldebaran.o soc21.o \ sienna_cichlid.o smu_v13_0_10.o nbio_v4_3.o hdp_v6_0.o nbio_v7_7.o hdp_v5_2.o lsdma_v6_0.o \ - nbio_v7_9.o aqua_vanjaram.o nbio_v7_11.o + nbio_v7_9.o aqua_vanjaram.o nbio_v7_11.o lsdma_v7_0.o hdp_v7_0.o nbif_v6_3_1.o # add DF block amdgpu-y += \ @@ -132,7 +133,8 @@ amdgpu-y += \ vega20_ih.o \ navi10_ih.o \ ih_v6_0.o \ - ih_v6_1.o + ih_v6_1.o \ + ih_v7_0.o # add PSP block amdgpu-y += \ @@ -143,7 +145,8 @@ amdgpu-y += \ psp_v11_0_8.o \ psp_v12_0.o \ psp_v13_0.o \ - psp_v13_0_4.o + psp_v13_0_4.o \ + psp_v14_0.o # add DCE block amdgpu-y += \ @@ -208,6 +211,7 @@ amdgpu-y += \ vcn_v4_0.o \ vcn_v4_0_3.o \ vcn_v4_0_5.o \ + vcn_v5_0_0.o \ amdgpu_jpeg.o \ jpeg_v1_0.o \ jpeg_v2_0.o \ @@ -215,7 +219,8 @@ amdgpu-y += \ jpeg_v3_0.o \ jpeg_v4_0.o \ jpeg_v4_0_3.o \ - jpeg_v4_0_5.o + jpeg_v4_0_5.o \ + jpeg_v5_0_0.o # add VPE block amdgpu-y += \ @@ -233,7 +238,8 @@ amdgpu-y += \ athub_v1_0.o \ athub_v2_0.o \ athub_v2_1.o \ - athub_v3_0.o + athub_v3_0.o \ + athub_v4_1_0.o # add SMUIO block amdgpu-y += \ @@ -242,7 +248,8 @@ amdgpu-y += \ smuio_v11_0_6.o \ smuio_v13_0.o \ smuio_v13_0_3.o \ - smuio_v13_0_6.o + smuio_v13_0_6.o \ + smuio_v14_0_2.o # add reset block amdgpu-y += \ diff --git a/drivers/gpu/drm/amd/amdgpu/aldebaran.c b/drivers/gpu/drm/amd/amdgpu/aldebaran.c index 576067d66bb9..d0a8da67dc2a 100644 --- a/drivers/gpu/drm/amd/amdgpu/aldebaran.c +++ b/drivers/gpu/drm/amd/amdgpu/aldebaran.c @@ -97,7 +97,7 @@ static int aldebaran_mode2_suspend_ip(struct amdgpu_device *adev) adev->ip_blocks[i].status.hw = false; } - return r; + return 0; } static int diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h index 79827a6dcd7f..f87d53e183c3 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h @@ -107,6 +107,7 @@ #include "amdgpu_smuio.h" #include "amdgpu_fdinfo.h" #include "amdgpu_mca.h" +#include "amdgpu_aca.h" #include "amdgpu_ras.h" #include "amdgpu_xcp.h" #include "amdgpu_seq64.h" @@ -114,14 +115,12 @@ #define MAX_GPU_INSTANCE 64 -struct amdgpu_gpu_instance -{ +struct amdgpu_gpu_instance { struct amdgpu_device *adev; int mgpu_fan_enabled; }; -struct amdgpu_mgpu_info -{ +struct amdgpu_mgpu_info { struct amdgpu_gpu_instance gpu_ins[MAX_GPU_INSTANCE]; struct mutex mutex; uint32_t num_gpu; @@ -140,8 +139,15 @@ enum amdgpu_ss { AMDGPU_SS_DRV_UNLOAD }; -struct amdgpu_watchdog_timer -{ +struct amdgpu_hwip_reg_entry { + u32 hwip; + u32 inst; + u32 seg; + u32 reg_offset; + const char *reg_name; +}; + +struct amdgpu_watchdog_timer { bool timeout_fatal_disable; uint32_t period; /* maxCycles = (1 << period), the number of cycles before a timeout */ }; @@ -196,9 +202,10 @@ extern int amdgpu_emu_mode; extern uint amdgpu_smu_memory_pool_size; extern int amdgpu_smu_pptable_id; extern uint amdgpu_dc_feature_mask; +extern uint amdgpu_freesync_vid_mode; extern uint amdgpu_dc_debug_mask; extern uint amdgpu_dc_visual_confirm; -extern uint amdgpu_dm_abm_level; +extern int amdgpu_dm_abm_level; extern int amdgpu_backlight; extern int amdgpu_damage_clips; extern struct amdgpu_mgpu_info mgpu_info; @@ -211,6 +218,7 @@ extern int amdgpu_async_gfx_ring; extern int amdgpu_mcbp; extern int amdgpu_discovery; extern int amdgpu_mes; +extern int amdgpu_mes_log_enable; extern int amdgpu_mes_kiq; extern int amdgpu_noretry; extern int amdgpu_force_asic_type; @@ -494,6 +502,7 @@ struct amdgpu_wb { uint64_t gpu_addr; u32 num_wb; /* Number of wb slots actually reserved for amdgpu. */ unsigned long used[DIV_ROUND_UP(AMDGPU_MAX_WB, BITS_PER_LONG)]; + spinlock_t lock; }; int amdgpu_device_wb_get(struct amdgpu_device *adev, u32 *wb); @@ -606,7 +615,7 @@ struct amdgpu_asic_funcs { /* PCIe replay counter */ uint64_t (*get_pcie_replay_count)(struct amdgpu_device *adev); /* device supports BACO */ - bool (*supports_baco)(struct amdgpu_device *adev); + int (*supports_baco)(struct amdgpu_device *adev); /* pre asic_init quirks */ void (*pre_asic_init)(struct amdgpu_device *adev); /* enter/exit umd stable pstate */ @@ -1046,6 +1055,9 @@ struct amdgpu_device { /* MCA */ struct amdgpu_mca mca; + /* ACA */ + struct amdgpu_aca aca; + struct amdgpu_ip_block ip_blocks[AMDGPU_MAX_IP_NUM]; uint32_t harvest_ip_mask; int num_ip_blocks; @@ -1095,6 +1107,7 @@ struct amdgpu_device { long sdma_timeout; long video_timeout; long compute_timeout; + long psp_timeout; uint64_t unique_id; uint64_t df_perfmon_config_assign_mask[AMDGPU_MAX_DF_PERFMONS]; @@ -1332,6 +1345,7 @@ int emu_soc_asic_init(struct amdgpu_device *adev); #define WREG32_FIELD_OFFSET(reg, offset, field, val) \ WREG32(mm##reg + offset, (RREG32(mm##reg + offset) & ~REG_FIELD_MASK(reg, field)) | (val) << REG_FIELD_SHIFT(reg, field)) +#define AMDGPU_GET_REG_FIELD(x, h, l) (((x) & GENMASK_ULL(h, l)) >> (l)) /* * BIOS helpers. */ @@ -1403,7 +1417,8 @@ bool amdgpu_device_supports_atpx(struct drm_device *dev); bool amdgpu_device_supports_px(struct drm_device *dev); bool amdgpu_device_supports_boco(struct drm_device *dev); bool amdgpu_device_supports_smart_shift(struct drm_device *dev); -bool amdgpu_device_supports_baco(struct drm_device *dev); +int amdgpu_device_supports_baco(struct drm_device *dev); +void amdgpu_device_detect_runtime_pm_mode(struct amdgpu_device *adev); bool amdgpu_device_is_peer_accessible(struct amdgpu_device *adev, struct amdgpu_device *peer_adev); int amdgpu_device_baco_enter(struct drm_device *dev); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_aca.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_aca.c new file mode 100644 index 000000000000..c50202215f6b --- /dev/null +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_aca.c @@ -0,0 +1,902 @@ +/* + * Copyright 2023 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + */ + +#include <linux/list.h> +#include "amdgpu.h" +#include "amdgpu_aca.h" +#include "amdgpu_ras.h" + +#define ACA_BANK_HWID(type, hwid, mcatype) [ACA_HWIP_TYPE_##type] = {hwid, mcatype} + +typedef int bank_handler_t(struct aca_handle *handle, struct aca_bank *bank, enum aca_smu_type type, void *data); + +struct aca_banks { + int nr_banks; + struct list_head list; +}; + +struct aca_hwip { + int hwid; + int mcatype; +}; + +static struct aca_hwip aca_hwid_mcatypes[ACA_HWIP_TYPE_COUNT] = { + ACA_BANK_HWID(SMU, 0x01, 0x01), + ACA_BANK_HWID(PCS_XGMI, 0x50, 0x00), + ACA_BANK_HWID(UMC, 0x96, 0x00), +}; + +static void aca_banks_init(struct aca_banks *banks) +{ + if (!banks) + return; + + memset(banks, 0, sizeof(*banks)); + INIT_LIST_HEAD(&banks->list); +} + +static int aca_banks_add_bank(struct aca_banks *banks, struct aca_bank *bank) +{ + struct aca_bank_node *node; + + if (!bank) + return -EINVAL; + + node = kvzalloc(sizeof(*node), GFP_KERNEL); + if (!node) + return -ENOMEM; + + memcpy(&node->bank, bank, sizeof(*bank)); + + INIT_LIST_HEAD(&node->node); + list_add_tail(&node->node, &banks->list); + + banks->nr_banks++; + + return 0; +} + +static void aca_banks_release(struct aca_banks *banks) +{ + struct aca_bank_node *node, *tmp; + + list_for_each_entry_safe(node, tmp, &banks->list, node) { + list_del(&node->node); + kvfree(node); + } +} + +static int aca_smu_get_valid_aca_count(struct amdgpu_device *adev, enum aca_smu_type type, u32 *count) +{ + struct amdgpu_aca *aca = &adev->aca; + const struct aca_smu_funcs *smu_funcs = aca->smu_funcs; + + if (!count) + return -EINVAL; + + if (!smu_funcs || !smu_funcs->get_valid_aca_count) + return -EOPNOTSUPP; + + return smu_funcs->get_valid_aca_count(adev, type, count); +} + +static struct aca_regs_dump { + const char *name; + int reg_idx; +} aca_regs[] = { + {"CONTROL", ACA_REG_IDX_CTL}, + {"STATUS", ACA_REG_IDX_STATUS}, + {"ADDR", ACA_REG_IDX_ADDR}, + {"MISC", ACA_REG_IDX_MISC0}, + {"CONFIG", ACA_REG_IDX_CONFG}, + {"IPID", ACA_REG_IDX_IPID}, + {"SYND", ACA_REG_IDX_SYND}, + {"DESTAT", ACA_REG_IDX_DESTAT}, + {"DEADDR", ACA_REG_IDX_DEADDR}, + {"CONTROL_MASK", ACA_REG_IDX_CTL_MASK}, +}; + +static void aca_smu_bank_dump(struct amdgpu_device *adev, int idx, int total, struct aca_bank *bank, + struct ras_query_context *qctx) +{ + u64 event_id = qctx ? qctx->event_id : 0ULL; + int i; + + RAS_EVENT_LOG(adev, event_id, HW_ERR "Accelerator Check Architecture events logged\n"); + /* plus 1 for output format, e.g: ACA[08/08]: xxxx */ + for (i = 0; i < ARRAY_SIZE(aca_regs); i++) + RAS_EVENT_LOG(adev, event_id, HW_ERR "ACA[%02d/%02d].%s=0x%016llx\n", + idx + 1, total, aca_regs[i].name, bank->regs[aca_regs[i].reg_idx]); +} + +static int aca_smu_get_valid_aca_banks(struct amdgpu_device *adev, enum aca_smu_type type, + int start, int count, + struct aca_banks *banks, struct ras_query_context *qctx) +{ + struct amdgpu_aca *aca = &adev->aca; + const struct aca_smu_funcs *smu_funcs = aca->smu_funcs; + struct aca_bank bank; + int i, max_count, ret; + + if (!count) + return 0; + + if (!smu_funcs || !smu_funcs->get_valid_aca_bank) + return -EOPNOTSUPP; + + switch (type) { + case ACA_SMU_TYPE_UE: + max_count = smu_funcs->max_ue_bank_count; + break; + case ACA_SMU_TYPE_CE: + max_count = smu_funcs->max_ce_bank_count; + break; + default: + return -EINVAL; + } + + if (start + count >= max_count) + return -EINVAL; + + count = min_t(int, count, max_count); + for (i = 0; i < count; i++) { + memset(&bank, 0, sizeof(bank)); + ret = smu_funcs->get_valid_aca_bank(adev, type, start + i, &bank); + if (ret) + return ret; + + bank.type = type; + + aca_smu_bank_dump(adev, i, count, &bank, qctx); + + ret = aca_banks_add_bank(banks, &bank); + if (ret) + return ret; + } + + return 0; +} + +static bool aca_bank_hwip_is_matched(struct aca_bank *bank, enum aca_hwip_type type) +{ + + struct aca_hwip *hwip; + int hwid, mcatype; + u64 ipid; + + if (!bank || type == ACA_HWIP_TYPE_UNKNOW) + return false; + + hwip = &aca_hwid_mcatypes[type]; + if (!hwip->hwid) + return false; + + ipid = bank->regs[ACA_REG_IDX_IPID]; + hwid = ACA_REG__IPID__HARDWAREID(ipid); + mcatype = ACA_REG__IPID__MCATYPE(ipid); + + return hwip->hwid == hwid && hwip->mcatype == mcatype; +} + +static bool aca_bank_is_valid(struct aca_handle *handle, struct aca_bank *bank, enum aca_smu_type type) +{ + const struct aca_bank_ops *bank_ops = handle->bank_ops; + + if (!aca_bank_hwip_is_matched(bank, handle->hwip)) + return false; + + if (!bank_ops->aca_bank_is_valid) + return true; + + return bank_ops->aca_bank_is_valid(handle, bank, type, handle->data); +} + +static struct aca_bank_error *new_bank_error(struct aca_error *aerr, struct aca_bank_info *info) +{ + struct aca_bank_error *bank_error; + + bank_error = kvzalloc(sizeof(*bank_error), GFP_KERNEL); + if (!bank_error) + return NULL; + + INIT_LIST_HEAD(&bank_error->node); + memcpy(&bank_error->info, info, sizeof(*info)); + + mutex_lock(&aerr->lock); + list_add_tail(&bank_error->node, &aerr->list); + mutex_unlock(&aerr->lock); + + return bank_error; +} + +static struct aca_bank_error *find_bank_error(struct aca_error *aerr, struct aca_bank_info *info) +{ + struct aca_bank_error *bank_error = NULL; + struct aca_bank_info *tmp_info; + bool found = false; + + mutex_lock(&aerr->lock); + list_for_each_entry(bank_error, &aerr->list, node) { + tmp_info = &bank_error->info; + if (tmp_info->socket_id == info->socket_id && + tmp_info->die_id == info->die_id) { + found = true; + goto out_unlock; + } + } + +out_unlock: + mutex_unlock(&aerr->lock); + + return found ? bank_error : NULL; +} + +static void aca_bank_error_remove(struct aca_error *aerr, struct aca_bank_error *bank_error) +{ + if (!aerr || !bank_error) + return; + + list_del(&bank_error->node); + aerr->nr_errors--; + + kvfree(bank_error); +} + +static struct aca_bank_error *get_bank_error(struct aca_error *aerr, struct aca_bank_info *info) +{ + struct aca_bank_error *bank_error; + + if (!aerr || !info) + return NULL; + + bank_error = find_bank_error(aerr, info); + if (bank_error) + return bank_error; + + return new_bank_error(aerr, info); +} + +int aca_error_cache_log_bank_error(struct aca_handle *handle, struct aca_bank_info *info, + enum aca_error_type type, u64 count) +{ + struct aca_error_cache *error_cache = &handle->error_cache; + struct aca_bank_error *bank_error; + struct aca_error *aerr; + + if (!handle || !info || type >= ACA_ERROR_TYPE_COUNT) + return -EINVAL; + + if (!count) + return 0; + + aerr = &error_cache->errors[type]; + bank_error = get_bank_error(aerr, info); + if (!bank_error) + return -ENOMEM; + + bank_error->count += count; + + return 0; +} + +static int aca_bank_parser(struct aca_handle *handle, struct aca_bank *bank, enum aca_smu_type type) +{ + const struct aca_bank_ops *bank_ops = handle->bank_ops; + + if (!bank) + return -EINVAL; + + if (!bank_ops->aca_bank_parser) + return -EOPNOTSUPP; + + return bank_ops->aca_bank_parser(handle, bank, type, + handle->data); +} + +static int handler_aca_log_bank_error(struct aca_handle *handle, struct aca_bank *bank, + enum aca_smu_type type, void *data) +{ + int ret; + + ret = aca_bank_parser(handle, bank, type); + if (ret) + return ret; + + return 0; +} + +static int aca_dispatch_bank(struct aca_handle_manager *mgr, struct aca_bank *bank, + enum aca_smu_type type, bank_handler_t handler, void *data) +{ + struct aca_handle *handle; + int ret; + + if (list_empty(&mgr->list)) + return 0; + + list_for_each_entry(handle, &mgr->list, node) { + if (!aca_bank_is_valid(handle, bank, type)) + continue; + + ret = handler(handle, bank, type, data); + if (ret) + return ret; + } + + return 0; +} + +static int aca_dispatch_banks(struct aca_handle_manager *mgr, struct aca_banks *banks, + enum aca_smu_type type, bank_handler_t handler, void *data) +{ + struct aca_bank_node *node; + struct aca_bank *bank; + int ret; + + if (!mgr || !banks) + return -EINVAL; + + /* pre check to avoid unnecessary operations */ + if (list_empty(&mgr->list) || list_empty(&banks->list)) + return 0; + + list_for_each_entry(node, &banks->list, node) { + bank = &node->bank; + + ret = aca_dispatch_bank(mgr, bank, type, handler, data); + if (ret) + return ret; + } + + return 0; +} + +static bool aca_bank_should_update(struct amdgpu_device *adev, enum aca_smu_type type) +{ + struct amdgpu_aca *aca = &adev->aca; + bool ret = true; + + /* + * Because the UE Valid MCA count will only be cleared after reset, + * in order to avoid repeated counting of the error count, + * the aca bank is only updated once during the gpu recovery stage. + */ + if (type == ACA_SMU_TYPE_UE) { + if (amdgpu_ras_intr_triggered()) + ret = atomic_cmpxchg(&aca->ue_update_flag, 0, 1) == 0; + else + atomic_set(&aca->ue_update_flag, 0); + } + + return ret; +} + +static int aca_banks_update(struct amdgpu_device *adev, enum aca_smu_type type, + bank_handler_t handler, struct ras_query_context *qctx, void *data) +{ + struct amdgpu_aca *aca = &adev->aca; + struct aca_banks banks; + u32 count = 0; + int ret; + + if (list_empty(&aca->mgr.list)) + return 0; + + if (!aca_bank_should_update(adev, type)) + return 0; + + ret = aca_smu_get_valid_aca_count(adev, type, &count); + if (ret) + return ret; + + if (!count) + return 0; + + aca_banks_init(&banks); + + ret = aca_smu_get_valid_aca_banks(adev, type, 0, count, &banks, qctx); + if (ret) + goto err_release_banks; + + if (list_empty(&banks.list)) { + ret = 0; + goto err_release_banks; + } + + ret = aca_dispatch_banks(&aca->mgr, &banks, type, + handler, data); + if (ret) + goto err_release_banks; + +err_release_banks: + aca_banks_release(&banks); + + return ret; +} + +static int aca_log_aca_error_data(struct aca_bank_error *bank_error, enum aca_error_type type, struct ras_err_data *err_data) +{ + struct aca_bank_info *info; + struct amdgpu_smuio_mcm_config_info mcm_info; + u64 count; + + if (type >= ACA_ERROR_TYPE_COUNT) + return -EINVAL; + + count = bank_error->count; + if (!count) + return 0; + + info = &bank_error->info; + mcm_info.die_id = info->die_id; + mcm_info.socket_id = info->socket_id; + + switch (type) { + case ACA_ERROR_TYPE_UE: + amdgpu_ras_error_statistic_ue_count(err_data, &mcm_info, NULL, count); + break; + case ACA_ERROR_TYPE_CE: + amdgpu_ras_error_statistic_ce_count(err_data, &mcm_info, NULL, count); + break; + case ACA_ERROR_TYPE_DEFERRED: + amdgpu_ras_error_statistic_de_count(err_data, &mcm_info, NULL, count); + break; + default: + break; + } + + return 0; +} + +static int aca_log_aca_error(struct aca_handle *handle, enum aca_error_type type, struct ras_err_data *err_data) +{ + struct aca_error_cache *error_cache = &handle->error_cache; + struct aca_error *aerr = &error_cache->errors[type]; + struct aca_bank_error *bank_error, *tmp; + + mutex_lock(&aerr->lock); + + if (list_empty(&aerr->list)) + goto out_unlock; + + list_for_each_entry_safe(bank_error, tmp, &aerr->list, node) { + aca_log_aca_error_data(bank_error, type, err_data); + aca_bank_error_remove(aerr, bank_error); + } + +out_unlock: + mutex_unlock(&aerr->lock); + + return 0; +} + +static int __aca_get_error_data(struct amdgpu_device *adev, struct aca_handle *handle, enum aca_error_type type, + struct ras_err_data *err_data, struct ras_query_context *qctx) +{ + enum aca_smu_type smu_type; + int ret; + + switch (type) { + case ACA_ERROR_TYPE_UE: + smu_type = ACA_SMU_TYPE_UE; + break; + case ACA_ERROR_TYPE_CE: + case ACA_ERROR_TYPE_DEFERRED: + smu_type = ACA_SMU_TYPE_CE; + break; + default: + return -EINVAL; + } + + /* udpate aca bank to aca source error_cache first */ + ret = aca_banks_update(adev, smu_type, handler_aca_log_bank_error, qctx, NULL); + if (ret) + return ret; + + return aca_log_aca_error(handle, type, err_data); +} + +static bool aca_handle_is_valid(struct aca_handle *handle) +{ + if (!handle->mask || !list_empty(&handle->node)) + return false; + + return true; +} + +int amdgpu_aca_get_error_data(struct amdgpu_device *adev, struct aca_handle *handle, + enum aca_error_type type, struct ras_err_data *err_data, + struct ras_query_context *qctx) +{ + if (!handle || !err_data) + return -EINVAL; + + if (aca_handle_is_valid(handle)) + return -EOPNOTSUPP; + + if (!(BIT(type) & handle->mask)) + return 0; + + return __aca_get_error_data(adev, handle, type, err_data, qctx); +} + +static void aca_error_init(struct aca_error *aerr, enum aca_error_type type) +{ + mutex_init(&aerr->lock); + INIT_LIST_HEAD(&aerr->list); + aerr->type = type; + aerr->nr_errors = 0; +} + +static void aca_init_error_cache(struct aca_handle *handle) +{ + struct aca_error_cache *error_cache = &handle->error_cache; + int type; + + for (type = ACA_ERROR_TYPE_UE; type < ACA_ERROR_TYPE_COUNT; type++) + aca_error_init(&error_cache->errors[type], type); +} + +static void aca_error_fini(struct aca_error *aerr) +{ + struct aca_bank_error *bank_error, *tmp; + + mutex_lock(&aerr->lock); + list_for_each_entry_safe(bank_error, tmp, &aerr->list, node) + aca_bank_error_remove(aerr, bank_error); + + mutex_destroy(&aerr->lock); +} + +static void aca_fini_error_cache(struct aca_handle *handle) +{ + struct aca_error_cache *error_cache = &handle->error_cache; + int type; + + for (type = ACA_ERROR_TYPE_UE; type < ACA_ERROR_TYPE_COUNT; type++) + aca_error_fini(&error_cache->errors[type]); +} + +static int add_aca_handle(struct amdgpu_device *adev, struct aca_handle_manager *mgr, struct aca_handle *handle, + const char *name, const struct aca_info *ras_info, void *data) +{ + memset(handle, 0, sizeof(*handle)); + + handle->adev = adev; + handle->mgr = mgr; + handle->name = name; + handle->hwip = ras_info->hwip; + handle->mask = ras_info->mask; + handle->bank_ops = ras_info->bank_ops; + handle->data = data; + aca_init_error_cache(handle); + + INIT_LIST_HEAD(&handle->node); + list_add_tail(&handle->node, &mgr->list); + mgr->nr_handles++; + + return 0; +} + +static ssize_t aca_sysfs_read(struct device *dev, + struct device_attribute *attr, char *buf) +{ + struct aca_handle *handle = container_of(attr, struct aca_handle, aca_attr); + + /* NOTE: the aca cache will be auto cleared once read, + * So the driver should unify the query entry point, forward request to ras query interface directly */ + return amdgpu_ras_aca_sysfs_read(dev, attr, handle, buf, handle->data); +} + +static int add_aca_sysfs(struct amdgpu_device *adev, struct aca_handle *handle) +{ + struct device_attribute *aca_attr = &handle->aca_attr; + + snprintf(handle->attr_name, sizeof(handle->attr_name) - 1, "aca_%s", handle->name); + aca_attr->show = aca_sysfs_read; + aca_attr->attr.name = handle->attr_name; + aca_attr->attr.mode = S_IRUGO; + sysfs_attr_init(&aca_attr->attr); + + return sysfs_add_file_to_group(&adev->dev->kobj, + &aca_attr->attr, + "ras"); +} + +int amdgpu_aca_add_handle(struct amdgpu_device *adev, struct aca_handle *handle, + const char *name, const struct aca_info *ras_info, void *data) +{ + struct amdgpu_aca *aca = &adev->aca; + int ret; + + if (!amdgpu_aca_is_enabled(adev)) + return 0; + + ret = add_aca_handle(adev, &aca->mgr, handle, name, ras_info, data); + if (ret) + return ret; + + return add_aca_sysfs(adev, handle); +} + +static void remove_aca_handle(struct aca_handle *handle) +{ + struct aca_handle_manager *mgr = handle->mgr; + + aca_fini_error_cache(handle); + list_del(&handle->node); + mgr->nr_handles--; +} + +static void remove_aca_sysfs(struct aca_handle *handle) +{ + struct amdgpu_device *adev = handle->adev; + struct device_attribute *aca_attr = &handle->aca_attr; + + if (adev->dev->kobj.sd) + sysfs_remove_file_from_group(&adev->dev->kobj, + &aca_attr->attr, + "ras"); +} + +void amdgpu_aca_remove_handle(struct aca_handle *handle) +{ + if (!handle || list_empty(&handle->node)) + return; + + remove_aca_sysfs(handle); + remove_aca_handle(handle); +} + +static int aca_manager_init(struct aca_handle_manager *mgr) +{ + INIT_LIST_HEAD(&mgr->list); + mgr->nr_handles = 0; + + return 0; +} + +static void aca_manager_fini(struct aca_handle_manager *mgr) +{ + struct aca_handle *handle, *tmp; + + list_for_each_entry_safe(handle, tmp, &mgr->list, node) + amdgpu_aca_remove_handle(handle); +} + +bool amdgpu_aca_is_enabled(struct amdgpu_device *adev) +{ + return adev->aca.is_enabled; +} + +int amdgpu_aca_init(struct amdgpu_device *adev) +{ + struct amdgpu_aca *aca = &adev->aca; + int ret; + + atomic_set(&aca->ue_update_flag, 0); + + ret = aca_manager_init(&aca->mgr); + if (ret) + return ret; + + return 0; +} + +void amdgpu_aca_fini(struct amdgpu_device *adev) +{ + struct amdgpu_aca *aca = &adev->aca; + + aca_manager_fini(&aca->mgr); + + atomic_set(&aca->ue_update_flag, 0); +} + +int amdgpu_aca_reset(struct amdgpu_device *adev) +{ + amdgpu_aca_fini(adev); + + return amdgpu_aca_init(adev); +} + +void amdgpu_aca_set_smu_funcs(struct amdgpu_device *adev, const struct aca_smu_funcs *smu_funcs) +{ + struct amdgpu_aca *aca = &adev->aca; + + WARN_ON(aca->smu_funcs); + aca->smu_funcs = smu_funcs; +} + +int aca_bank_info_decode(struct aca_bank *bank, struct aca_bank_info *info) +{ + u64 ipid; + u32 instidhi, instidlo; + + if (!bank || !info) + return -EINVAL; + + ipid = bank->regs[ACA_REG_IDX_IPID]; + info->hwid = ACA_REG__IPID__HARDWAREID(ipid); + info->mcatype = ACA_REG__IPID__MCATYPE(ipid); + /* + * Unfied DieID Format: SAASS. A:AID, S:Socket. + * Unfied DieID[4:4] = InstanceId[0:0] + * Unfied DieID[0:3] = InstanceIdHi[0:3] + */ + instidhi = ACA_REG__IPID__INSTANCEIDHI(ipid); + instidlo = ACA_REG__IPID__INSTANCEIDLO(ipid); + info->die_id = ((instidhi >> 2) & 0x03); + info->socket_id = ((instidlo & 0x1) << 2) | (instidhi & 0x03); + + return 0; +} + +static int aca_bank_get_error_code(struct amdgpu_device *adev, struct aca_bank *bank) +{ + struct amdgpu_aca *aca = &adev->aca; + const struct aca_smu_funcs *smu_funcs = aca->smu_funcs; + + if (!smu_funcs || !smu_funcs->parse_error_code) + return -EOPNOTSUPP; + + return smu_funcs->parse_error_code(adev, bank); +} + +int aca_bank_check_error_codes(struct amdgpu_device *adev, struct aca_bank *bank, int *err_codes, int size) +{ + int i, error_code; + + if (!bank || !err_codes) + return -EINVAL; + + error_code = aca_bank_get_error_code(adev, bank); + if (error_code < 0) + return error_code; + + for (i = 0; i < size; i++) { + if (err_codes[i] == error_code) + return 0; + } + + return -EINVAL; +} + +int amdgpu_aca_smu_set_debug_mode(struct amdgpu_device *adev, bool en) +{ + struct amdgpu_aca *aca = &adev->aca; + const struct aca_smu_funcs *smu_funcs = aca->smu_funcs; + + if (!smu_funcs || !smu_funcs->set_debug_mode) + return -EOPNOTSUPP; + + return smu_funcs->set_debug_mode(adev, en); +} + +#if defined(CONFIG_DEBUG_FS) +static int amdgpu_aca_smu_debug_mode_set(void *data, u64 val) +{ + struct amdgpu_device *adev = (struct amdgpu_device *)data; + int ret; + + ret = amdgpu_ras_set_aca_debug_mode(adev, val ? true : false); + if (ret) + return ret; + + dev_info(adev->dev, "amdgpu set smu aca debug mode %s success\n", val ? "on" : "off"); + + return 0; +} + +static void aca_dump_entry(struct seq_file *m, struct aca_bank *bank, enum aca_smu_type type, int idx) +{ + struct aca_bank_info info; + int i, ret; + + ret = aca_bank_info_decode(bank, &info); + if (ret) + return; + + seq_printf(m, "aca entry[%d].type: %s\n", idx, type == ACA_SMU_TYPE_UE ? "UE" : "CE"); + seq_printf(m, "aca entry[%d].info: socketid:%d aid:%d hwid:0x%03x mcatype:0x%04x\n", + idx, info.socket_id, info.die_id, info.hwid, info.mcatype); + + for (i = 0; i < ARRAY_SIZE(aca_regs); i++) + seq_printf(m, "aca entry[%d].regs[%d]: 0x%016llx\n", idx, aca_regs[i].reg_idx, bank->regs[aca_regs[i].reg_idx]); +} + +struct aca_dump_context { + struct seq_file *m; + int idx; +}; + +static int handler_aca_bank_dump(struct aca_handle *handle, struct aca_bank *bank, + enum aca_smu_type type, void *data) +{ + struct aca_dump_context *ctx = (struct aca_dump_context *)data; + + aca_dump_entry(ctx->m, bank, type, ctx->idx++); + + return handler_aca_log_bank_error(handle, bank, type, NULL); +} + +static int aca_dump_show(struct seq_file *m, enum aca_smu_type type) +{ + struct amdgpu_device *adev = (struct amdgpu_device *)m->private; + struct aca_dump_context context = { + .m = m, + .idx = 0, + }; + + return aca_banks_update(adev, type, handler_aca_bank_dump, NULL, (void *)&context); +} + +static int aca_dump_ce_show(struct seq_file *m, void *unused) +{ + return aca_dump_show(m, ACA_SMU_TYPE_CE); +} + +static int aca_dump_ce_open(struct inode *inode, struct file *file) +{ + return single_open(file, aca_dump_ce_show, inode->i_private); +} + +static const struct file_operations aca_ce_dump_debug_fops = { + .owner = THIS_MODULE, + .open = aca_dump_ce_open, + .read = seq_read, + .llseek = seq_lseek, + .release = single_release, +}; + +static int aca_dump_ue_show(struct seq_file *m, void *unused) +{ + return aca_dump_show(m, ACA_SMU_TYPE_UE); +} + +static int aca_dump_ue_open(struct inode *inode, struct file *file) +{ + return single_open(file, aca_dump_ue_show, inode->i_private); +} + +static const struct file_operations aca_ue_dump_debug_fops = { + .owner = THIS_MODULE, + .open = aca_dump_ue_open, + .read = seq_read, + .llseek = seq_lseek, + .release = single_release, +}; + +DEFINE_DEBUGFS_ATTRIBUTE(aca_debug_mode_fops, NULL, amdgpu_aca_smu_debug_mode_set, "%llu\n"); +#endif + +void amdgpu_aca_smu_debugfs_init(struct amdgpu_device *adev, struct dentry *root) +{ +#if defined(CONFIG_DEBUG_FS) + if (!root || adev->ip_versions[MP1_HWIP][0] != IP_VERSION(13, 0, 6)) + return; + + debugfs_create_file("aca_debug_mode", 0200, root, adev, &aca_debug_mode_fops); + debugfs_create_file("aca_ue_dump", 0400, root, adev, &aca_ue_dump_debug_fops); + debugfs_create_file("aca_ce_dump", 0400, root, adev, &aca_ce_dump_debug_fops); +#endif +} diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_aca.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_aca.h new file mode 100644 index 000000000000..5ef6b745f222 --- /dev/null +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_aca.h @@ -0,0 +1,211 @@ +/* + * Copyright 2023 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + */ + +#ifndef __AMDGPU_ACA_H__ +#define __AMDGPU_ACA_H__ + +#include <linux/list.h> + +struct ras_err_data; +struct ras_query_context; + +#define ACA_MAX_REGS_COUNT (16) + +#define ACA_REG_FIELD(x, h, l) (((x) & GENMASK_ULL(h, l)) >> l) +#define ACA_REG__STATUS__VAL(x) ACA_REG_FIELD(x, 63, 63) +#define ACA_REG__STATUS__OVERFLOW(x) ACA_REG_FIELD(x, 62, 62) +#define ACA_REG__STATUS__UC(x) ACA_REG_FIELD(x, 61, 61) +#define ACA_REG__STATUS__EN(x) ACA_REG_FIELD(x, 60, 60) +#define ACA_REG__STATUS__MISCV(x) ACA_REG_FIELD(x, 59, 59) +#define ACA_REG__STATUS__ADDRV(x) ACA_REG_FIELD(x, 58, 58) +#define ACA_REG__STATUS__PCC(x) ACA_REG_FIELD(x, 57, 57) +#define ACA_REG__STATUS__ERRCOREIDVAL(x) ACA_REG_FIELD(x, 56, 56) +#define ACA_REG__STATUS__TCC(x) ACA_REG_FIELD(x, 55, 55) +#define ACA_REG__STATUS__SYNDV(x) ACA_REG_FIELD(x, 53, 53) +#define ACA_REG__STATUS__CECC(x) ACA_REG_FIELD(x, 46, 46) +#define ACA_REG__STATUS__UECC(x) ACA_REG_FIELD(x, 45, 45) +#define ACA_REG__STATUS__DEFERRED(x) ACA_REG_FIELD(x, 44, 44) +#define ACA_REG__STATUS__POISON(x) ACA_REG_FIELD(x, 43, 43) +#define ACA_REG__STATUS__SCRUB(x) ACA_REG_FIELD(x, 40, 40) +#define ACA_REG__STATUS__ERRCOREID(x) ACA_REG_FIELD(x, 37, 32) +#define ACA_REG__STATUS__ADDRLSB(x) ACA_REG_FIELD(x, 29, 24) +#define ACA_REG__STATUS__ERRORCODEEXT(x) ACA_REG_FIELD(x, 21, 16) +#define ACA_REG__STATUS__ERRORCODE(x) ACA_REG_FIELD(x, 15, 0) + +#define ACA_REG__IPID__MCATYPE(x) ACA_REG_FIELD(x, 63, 48) +#define ACA_REG__IPID__INSTANCEIDHI(x) ACA_REG_FIELD(x, 47, 44) +#define ACA_REG__IPID__HARDWAREID(x) ACA_REG_FIELD(x, 43, 32) +#define ACA_REG__IPID__INSTANCEIDLO(x) ACA_REG_FIELD(x, 31, 0) + +#define ACA_REG__MISC0__VALID(x) ACA_REG_FIELD(x, 63, 63) +#define ACA_REG__MISC0__OVRFLW(x) ACA_REG_FIELD(x, 48, 48) +#define ACA_REG__MISC0__ERRCNT(x) ACA_REG_FIELD(x, 43, 32) + +#define ACA_REG__SYND__ERRORINFORMATION(x) ACA_REG_FIELD(x, 17, 0) + +/* NOTE: The following codes refers to the smu header file */ +#define ACA_EXTERROR_CODE_CE 0x3a +#define ACA_EXTERROR_CODE_FAULT 0x3b + +#define ACA_ERROR_UE_MASK BIT_MASK(ACA_ERROR_TYPE_UE) +#define ACA_ERROR_CE_MASK BIT_MASK(ACA_ERROR_TYPE_CE) +#define ACA_ERROR_DEFERRED_MASK BIT_MASK(ACA_ERROR_TYPE_DEFERRED) + +enum aca_reg_idx { + ACA_REG_IDX_CTL = 0, + ACA_REG_IDX_STATUS = 1, + ACA_REG_IDX_ADDR = 2, + ACA_REG_IDX_MISC0 = 3, + ACA_REG_IDX_CONFG = 4, + ACA_REG_IDX_IPID = 5, + ACA_REG_IDX_SYND = 6, + ACA_REG_IDX_DESTAT = 8, + ACA_REG_IDX_DEADDR = 9, + ACA_REG_IDX_CTL_MASK = 10, + ACA_REG_IDX_COUNT = 16, +}; + +enum aca_hwip_type { + ACA_HWIP_TYPE_UNKNOW = -1, + ACA_HWIP_TYPE_PSP = 0, + ACA_HWIP_TYPE_UMC, + ACA_HWIP_TYPE_SMU, + ACA_HWIP_TYPE_PCS_XGMI, + ACA_HWIP_TYPE_COUNT, +}; + +enum aca_error_type { + ACA_ERROR_TYPE_INVALID = -1, + ACA_ERROR_TYPE_UE = 0, + ACA_ERROR_TYPE_CE, + ACA_ERROR_TYPE_DEFERRED, + ACA_ERROR_TYPE_COUNT +}; + +enum aca_smu_type { + ACA_SMU_TYPE_UE = 0, + ACA_SMU_TYPE_CE, + ACA_SMU_TYPE_COUNT, +}; + +struct aca_bank { + enum aca_smu_type type; + u64 regs[ACA_MAX_REGS_COUNT]; +}; + +struct aca_bank_node { + struct aca_bank bank; + struct list_head node; +}; + +struct aca_bank_info { + int die_id; + int socket_id; + int hwid; + int mcatype; +}; + +struct aca_bank_error { + struct list_head node; + struct aca_bank_info info; + u64 count; +}; + +struct aca_error { + struct list_head list; + struct mutex lock; + enum aca_error_type type; + int nr_errors; +}; + +struct aca_handle_manager { + struct list_head list; + int nr_handles; +}; + +struct aca_error_cache { + struct aca_error errors[ACA_ERROR_TYPE_COUNT]; +}; + +struct aca_handle { + struct list_head node; + enum aca_hwip_type hwip; + struct amdgpu_device *adev; + struct aca_handle_manager *mgr; + struct aca_error_cache error_cache; + const struct aca_bank_ops *bank_ops; + struct device_attribute aca_attr; + char attr_name[64]; + const char *name; + u32 mask; + void *data; +}; + +struct aca_bank_ops { + int (*aca_bank_parser)(struct aca_handle *handle, struct aca_bank *bank, enum aca_smu_type type, void *data); + bool (*aca_bank_is_valid)(struct aca_handle *handle, struct aca_bank *bank, enum aca_smu_type type, + void *data); +}; + +struct aca_smu_funcs { + int max_ue_bank_count; + int max_ce_bank_count; + int (*set_debug_mode)(struct amdgpu_device *adev, bool enable); + int (*get_valid_aca_count)(struct amdgpu_device *adev, enum aca_smu_type type, u32 *count); + int (*get_valid_aca_bank)(struct amdgpu_device *adev, enum aca_smu_type type, int idx, struct aca_bank *bank); + int (*parse_error_code)(struct amdgpu_device *adev, struct aca_bank *bank); +}; + +struct amdgpu_aca { + struct aca_handle_manager mgr; + const struct aca_smu_funcs *smu_funcs; + atomic_t ue_update_flag; + bool is_enabled; +}; + +struct aca_info { + enum aca_hwip_type hwip; + const struct aca_bank_ops *bank_ops; + u32 mask; +}; + +int amdgpu_aca_init(struct amdgpu_device *adev); +void amdgpu_aca_fini(struct amdgpu_device *adev); +int amdgpu_aca_reset(struct amdgpu_device *adev); +void amdgpu_aca_set_smu_funcs(struct amdgpu_device *adev, const struct aca_smu_funcs *smu_funcs); +bool amdgpu_aca_is_enabled(struct amdgpu_device *adev); + +int aca_bank_info_decode(struct aca_bank *bank, struct aca_bank_info *info); +int aca_bank_check_error_codes(struct amdgpu_device *adev, struct aca_bank *bank, int *err_codes, int size); + +int amdgpu_aca_add_handle(struct amdgpu_device *adev, struct aca_handle *handle, + const char *name, const struct aca_info *aca_info, void *data); +void amdgpu_aca_remove_handle(struct aca_handle *handle); +int amdgpu_aca_get_error_data(struct amdgpu_device *adev, struct aca_handle *handle, + enum aca_error_type type, struct ras_err_data *err_data, + struct ras_query_context *qctx); +int amdgpu_aca_smu_set_debug_mode(struct amdgpu_device *adev, bool en); +void amdgpu_aca_smu_debugfs_init(struct amdgpu_device *adev, struct dentry *root); +int aca_error_cache_log_bank_error(struct aca_handle *handle, struct aca_bank_info *info, + enum aca_error_type type, u64 count); +#endif diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_acp.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_acp.c index 6d72355ac492..bf6c4a0d0525 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_acp.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_acp.c @@ -637,6 +637,8 @@ static const struct amd_ip_funcs acp_ip_funcs = { .soft_reset = acp_soft_reset, .set_clockgating_state = acp_set_clockgating_state, .set_powergating_state = acp_set_powergating_state, + .dump_ip_state = NULL, + .print_ip_state = NULL, }; const struct amdgpu_ip_block_version acp_ip_block = { diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c index 41db030ddc4e..7ba05f030dd1 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c @@ -146,7 +146,7 @@ int amdgpu_amdkfd_drm_client_create(struct amdgpu_device *adev) { int ret; - if (!adev->kfd.init_complete) + if (!adev->kfd.init_complete || adev->kfd.client.dev) return 0; ret = drm_client_init(&adev->ddev, &adev->kfd.client, "kfd", @@ -742,9 +742,22 @@ void amdgpu_amdkfd_debug_mem_fence(struct amdgpu_device *adev) amdgpu_device_flush_hdp(adev, NULL); } -void amdgpu_amdkfd_ras_poison_consumption_handler(struct amdgpu_device *adev, bool reset) +bool amdgpu_amdkfd_is_fed(struct amdgpu_device *adev) { - amdgpu_umc_poison_handler(adev, reset); + return amdgpu_ras_get_fed_status(adev); +} + +void amdgpu_amdkfd_ras_pasid_poison_consumption_handler(struct amdgpu_device *adev, + enum amdgpu_ras_block block, uint16_t pasid, + pasid_notify pasid_fn, void *data, uint32_t reset) +{ + amdgpu_umc_pasid_poison_handler(adev, block, pasid, pasid_fn, data, reset); +} + +void amdgpu_amdkfd_ras_poison_consumption_handler(struct amdgpu_device *adev, + enum amdgpu_ras_block block, uint32_t reset) +{ + amdgpu_umc_pasid_poison_handler(adev, block, 0, NULL, NULL, reset); } int amdgpu_amdkfd_send_close_event_drain_irq(struct amdgpu_device *adev, @@ -763,12 +776,20 @@ int amdgpu_amdkfd_send_close_event_drain_irq(struct amdgpu_device *adev, return 0; } -bool amdgpu_amdkfd_ras_query_utcl2_poison_status(struct amdgpu_device *adev) +bool amdgpu_amdkfd_ras_query_utcl2_poison_status(struct amdgpu_device *adev, + int hub_inst, int hub_type) { - if (adev->gfx.ras && adev->gfx.ras->query_utcl2_poison_status) - return adev->gfx.ras->query_utcl2_poison_status(adev); - else - return false; + if (!hub_type) { + if (adev->gfxhub.funcs->query_utcl2_poison_status) + return adev->gfxhub.funcs->query_utcl2_poison_status(adev, hub_inst); + else + return false; + } else { + if (adev->mmhub.funcs->query_utcl2_poison_status) + return adev->mmhub.funcs->query_utcl2_poison_status(adev, hub_inst); + else + return false; + } } int amdgpu_amdkfd_check_and_lock_kfd(struct amdgpu_device *adev) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h index 27c61c535e29..1de021ebdd46 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h @@ -193,6 +193,9 @@ struct amdgpu_amdkfd_fence *to_amdgpu_amdkfd_fence(struct dma_fence *f); int amdgpu_amdkfd_remove_fence_on_pt_pd_bos(struct amdgpu_bo *bo); int amdgpu_amdkfd_evict_userptr(struct mmu_interval_notifier *mni, unsigned long cur_seq, struct kgd_mem *mem); +int amdgpu_amdkfd_bo_validate_and_fence(struct amdgpu_bo *bo, + uint32_t domain, + struct dma_fence *fence); #else static inline bool amdkfd_fence_check_mm(struct dma_fence *f, struct mm_struct *mm) @@ -218,6 +221,13 @@ int amdgpu_amdkfd_evict_userptr(struct mmu_interval_notifier *mni, { return 0; } +static inline +int amdgpu_amdkfd_bo_validate_and_fence(struct amdgpu_bo *bo, + uint32_t domain, + struct dma_fence *fence) +{ + return 0; +} #endif /* Shared API */ int amdgpu_amdkfd_alloc_gtt_mem(struct amdgpu_device *adev, size_t size, @@ -310,7 +320,7 @@ int amdgpu_amdkfd_gpuvm_map_gtt_bo_to_kernel(struct kgd_mem *mem, void **kptr, uint64_t *size); void amdgpu_amdkfd_gpuvm_unmap_gtt_bo_from_kernel(struct kgd_mem *mem); -int amdgpu_amdkfd_map_gtt_bo_to_gart(struct amdgpu_device *adev, struct amdgpu_bo *bo); +int amdgpu_amdkfd_map_gtt_bo_to_gart(struct amdgpu_bo *bo); int amdgpu_amdkfd_gpuvm_restore_process_bos(void *process_info, struct dma_fence __rcu **ef); @@ -326,11 +336,18 @@ void amdgpu_amdkfd_debug_mem_fence(struct amdgpu_device *adev); int amdgpu_amdkfd_get_tile_config(struct amdgpu_device *adev, struct tile_config *config); void amdgpu_amdkfd_ras_poison_consumption_handler(struct amdgpu_device *adev, - bool reset); + enum amdgpu_ras_block block, uint32_t reset); + +void amdgpu_amdkfd_ras_pasid_poison_consumption_handler(struct amdgpu_device *adev, + enum amdgpu_ras_block block, uint16_t pasid, + pasid_notify pasid_fn, void *data, uint32_t reset); + +bool amdgpu_amdkfd_is_fed(struct amdgpu_device *adev); bool amdgpu_amdkfd_bo_mapped_to_dev(struct amdgpu_device *adev, struct kgd_mem *mem); void amdgpu_amdkfd_block_mmu_notifications(void *p); int amdgpu_amdkfd_criu_resume(void *p); -bool amdgpu_amdkfd_ras_query_utcl2_poison_status(struct amdgpu_device *adev); +bool amdgpu_amdkfd_ras_query_utcl2_poison_status(struct amdgpu_device *adev, + int hub_inst, int hub_type); int amdgpu_amdkfd_reserve_mem_limit(struct amdgpu_device *adev, uint64_t size, u32 alloc_flag, int8_t xcp_id); void amdgpu_amdkfd_unreserve_mem_limit(struct amdgpu_device *adev, diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10.c index 69810b3f1c63..3ab6c3aa0ad1 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10.c @@ -881,6 +881,7 @@ uint32_t kgd_gfx_v10_set_wave_launch_mode(struct amdgpu_device *adev, } #define TCP_WATCH_STRIDE (mmTCP_WATCH1_ADDR_H - mmTCP_WATCH0_ADDR_H) +#define SQ_WATCH_STRIDE (mmSQ_WATCH1_ADDR_H - mmSQ_WATCH0_ADDR_H) uint32_t kgd_gfx_v10_set_address_watch(struct amdgpu_device *adev, uint64_t watch_address, uint32_t watch_address_mask, @@ -889,55 +890,93 @@ uint32_t kgd_gfx_v10_set_address_watch(struct amdgpu_device *adev, uint32_t debug_vmid, uint32_t inst) { + /* SQ_WATCH?_ADDR_* and TCP_WATCH?_ADDR_* are programmed with the + * same values. + */ uint32_t watch_address_high; uint32_t watch_address_low; - uint32_t watch_address_cntl; - - watch_address_cntl = 0; + uint32_t tcp_watch_address_cntl; + uint32_t sq_watch_address_cntl; watch_address_low = lower_32_bits(watch_address); watch_address_high = upper_32_bits(watch_address) & 0xffff; - watch_address_cntl = REG_SET_FIELD(watch_address_cntl, + tcp_watch_address_cntl = 0; + tcp_watch_address_cntl = REG_SET_FIELD(tcp_watch_address_cntl, TCP_WATCH0_CNTL, VMID, debug_vmid); - watch_address_cntl = REG_SET_FIELD(watch_address_cntl, + tcp_watch_address_cntl = REG_SET_FIELD(tcp_watch_address_cntl, TCP_WATCH0_CNTL, MODE, watch_mode); - watch_address_cntl = REG_SET_FIELD(watch_address_cntl, + tcp_watch_address_cntl = REG_SET_FIELD(tcp_watch_address_cntl, TCP_WATCH0_CNTL, MASK, watch_address_mask >> 7); + sq_watch_address_cntl = 0; + sq_watch_address_cntl = REG_SET_FIELD(sq_watch_address_cntl, + SQ_WATCH0_CNTL, + VMID, + debug_vmid); + sq_watch_address_cntl = REG_SET_FIELD(sq_watch_address_cntl, + SQ_WATCH0_CNTL, + MODE, + watch_mode); + sq_watch_address_cntl = REG_SET_FIELD(sq_watch_address_cntl, + SQ_WATCH0_CNTL, + MASK, + watch_address_mask >> 6); + /* Turning off this watch point until we set all the registers */ - watch_address_cntl = REG_SET_FIELD(watch_address_cntl, + tcp_watch_address_cntl = REG_SET_FIELD(tcp_watch_address_cntl, TCP_WATCH0_CNTL, VALID, 0); - WREG32((SOC15_REG_OFFSET(GC, 0, mmTCP_WATCH0_CNTL) + (watch_id * TCP_WATCH_STRIDE)), - watch_address_cntl); + tcp_watch_address_cntl); + + sq_watch_address_cntl = REG_SET_FIELD(sq_watch_address_cntl, + SQ_WATCH0_CNTL, + VALID, + 0); + WREG32((SOC15_REG_OFFSET(GC, 0, mmSQ_WATCH0_CNTL) + + (watch_id * SQ_WATCH_STRIDE)), + sq_watch_address_cntl); + /* Program {TCP,SQ}_WATCH?_ADDR* */ WREG32((SOC15_REG_OFFSET(GC, 0, mmTCP_WATCH0_ADDR_H) + (watch_id * TCP_WATCH_STRIDE)), watch_address_high); - WREG32((SOC15_REG_OFFSET(GC, 0, mmTCP_WATCH0_ADDR_L) + (watch_id * TCP_WATCH_STRIDE)), watch_address_low); + WREG32((SOC15_REG_OFFSET(GC, 0, mmSQ_WATCH0_ADDR_H) + + (watch_id * SQ_WATCH_STRIDE)), + watch_address_high); + WREG32((SOC15_REG_OFFSET(GC, 0, mmSQ_WATCH0_ADDR_L) + + (watch_id * SQ_WATCH_STRIDE)), + watch_address_low); + /* Enable the watch point */ - watch_address_cntl = REG_SET_FIELD(watch_address_cntl, + tcp_watch_address_cntl = REG_SET_FIELD(tcp_watch_address_cntl, TCP_WATCH0_CNTL, VALID, 1); - WREG32((SOC15_REG_OFFSET(GC, 0, mmTCP_WATCH0_CNTL) + (watch_id * TCP_WATCH_STRIDE)), - watch_address_cntl); + tcp_watch_address_cntl); + + sq_watch_address_cntl = REG_SET_FIELD(sq_watch_address_cntl, + SQ_WATCH0_CNTL, + VALID, + 1); + WREG32((SOC15_REG_OFFSET(GC, 0, mmSQ_WATCH0_CNTL) + + (watch_id * SQ_WATCH_STRIDE)), + sq_watch_address_cntl); return 0; } @@ -953,8 +992,14 @@ uint32_t kgd_gfx_v10_clear_address_watch(struct amdgpu_device *adev, (watch_id * TCP_WATCH_STRIDE)), watch_address_cntl); + WREG32((SOC15_REG_OFFSET(GC, 0, mmSQ_WATCH0_CNTL) + + (watch_id * SQ_WATCH_STRIDE)), + watch_address_cntl); + return 0; } +#undef TCP_WATCH_STRIDE +#undef SQ_WATCH_STRIDE /* kgd_gfx_v10_get_iq_wait_times: Returns the mmCP_IQ_WAIT_TIME1/2 values diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c index 231fd927dcfb..e4d4e55c08ad 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c @@ -220,7 +220,7 @@ int amdgpu_amdkfd_reserve_mem_limit(struct amdgpu_device *adev, (kfd_mem_limit.ttm_mem_used + ttm_mem_needed > kfd_mem_limit.max_ttm_mem_limit) || (adev && xcp_id >= 0 && adev->kfd.vram_used[xcp_id] + vram_needed > - vram_size - reserved_for_pt)) { + vram_size - reserved_for_pt - atomic64_read(&adev->vram_pin_size))) { ret = -ENOMEM; goto release; } @@ -426,9 +426,9 @@ validate_fail: return ret; } -static int amdgpu_amdkfd_bo_validate_and_fence(struct amdgpu_bo *bo, - uint32_t domain, - struct dma_fence *fence) +int amdgpu_amdkfd_bo_validate_and_fence(struct amdgpu_bo *bo, + uint32_t domain, + struct dma_fence *fence) { int ret = amdgpu_bo_reserve(bo, false); @@ -464,13 +464,15 @@ static int amdgpu_amdkfd_validate_vm_bo(void *_unused, struct amdgpu_bo *bo) * again. Page directories are only updated after updating page * tables. */ -static int vm_validate_pt_pd_bos(struct amdgpu_vm *vm) +static int vm_validate_pt_pd_bos(struct amdgpu_vm *vm, + struct ww_acquire_ctx *ticket) { struct amdgpu_bo *pd = vm->root.bo; struct amdgpu_device *adev = amdgpu_ttm_adev(pd->tbo.bdev); int ret; - ret = amdgpu_vm_validate_pt_bos(adev, vm, amdgpu_amdkfd_validate_vm_bo, NULL); + ret = amdgpu_vm_validate(adev, vm, ticket, + amdgpu_amdkfd_validate_vm_bo, NULL); if (ret) { pr_err("failed to validate PT BOs\n"); return ret; @@ -1310,14 +1312,15 @@ update_gpuvm_pte_failed: return ret; } -static int process_validate_vms(struct amdkfd_process_info *process_info) +static int process_validate_vms(struct amdkfd_process_info *process_info, + struct ww_acquire_ctx *ticket) { struct amdgpu_vm *peer_vm; int ret; list_for_each_entry(peer_vm, &process_info->vm_list_head, vm_list_node) { - ret = vm_validate_pt_pd_bos(peer_vm); + ret = vm_validate_pt_pd_bos(peer_vm, ticket); if (ret) return ret; } @@ -1402,7 +1405,7 @@ static int init_kfd_vm(struct amdgpu_vm *vm, void **process_info, ret = amdgpu_bo_reserve(vm->root.bo, true); if (ret) goto reserve_pd_fail; - ret = vm_validate_pt_pd_bos(vm); + ret = vm_validate_pt_pd_bos(vm, NULL); if (ret) { pr_err("validate_pt_pd_bos() failed\n"); goto validate_pd_fail; @@ -1851,6 +1854,7 @@ err_node_allow: err_bo_create: amdgpu_amdkfd_unreserve_mem_limit(adev, aligned_size, flags, xcp_id); err_reserve_limit: + amdgpu_sync_free(&(*mem)->sync); mutex_destroy(&(*mem)->lock); if (gobj) drm_gem_object_put(gobj); @@ -2043,7 +2047,7 @@ int amdgpu_amdkfd_gpuvm_map_memory_to_gpu( bo->tbo.resource->mem_type == TTM_PL_SYSTEM) is_invalid_userptr = true; - ret = vm_validate_pt_pd_bos(avm); + ret = vm_validate_pt_pd_bos(avm, NULL); if (unlikely(ret)) goto out_unreserve; @@ -2136,7 +2140,7 @@ int amdgpu_amdkfd_gpuvm_unmap_memory_from_gpu( goto unreserve_out; } - ret = vm_validate_pt_pd_bos(avm); + ret = vm_validate_pt_pd_bos(avm, NULL); if (unlikely(ret)) goto unreserve_out; @@ -2186,13 +2190,12 @@ int amdgpu_amdkfd_gpuvm_sync_memory( /** * amdgpu_amdkfd_map_gtt_bo_to_gart - Map BO to GART and increment reference count - * @adev: Device to which allocated BO belongs * @bo: Buffer object to be mapped * * Before return, bo reference count is incremented. To release the reference and unpin/ * unmap the BO, call amdgpu_amdkfd_free_gtt_mem. */ -int amdgpu_amdkfd_map_gtt_bo_to_gart(struct amdgpu_device *adev, struct amdgpu_bo *bo) +int amdgpu_amdkfd_map_gtt_bo_to_gart(struct amdgpu_bo *bo) { int ret; @@ -2634,7 +2637,7 @@ static int validate_invalid_user_pages(struct amdkfd_process_info *process_info) } } - ret = process_validate_vms(process_info); + ret = process_validate_vms(process_info, NULL); if (ret) goto unreserve_out; @@ -2867,14 +2870,16 @@ int amdgpu_amdkfd_gpuvm_restore_process_bos(void *info, struct dma_fence __rcu * mutex_lock(&process_info->lock); - drm_exec_init(&exec, 0, 0); + drm_exec_init(&exec, DRM_EXEC_IGNORE_DUPLICATES, 0); drm_exec_until_all_locked(&exec) { list_for_each_entry(peer_vm, &process_info->vm_list_head, vm_list_node) { ret = amdgpu_vm_lock_pd(peer_vm, &exec, 2); drm_exec_retry_on_contention(&exec); - if (unlikely(ret)) + if (unlikely(ret)) { + pr_err("Locking VM PD failed, ret: %d\n", ret); goto ttm_reserve_fail; + } } /* Reserve all BOs and page tables/directory. Add all BOs from @@ -2887,25 +2892,21 @@ int amdgpu_amdkfd_gpuvm_restore_process_bos(void *info, struct dma_fence __rcu * gobj = &mem->bo->tbo.base; ret = drm_exec_prepare_obj(&exec, gobj, 1); drm_exec_retry_on_contention(&exec); - if (unlikely(ret)) + if (unlikely(ret)) { + pr_err("drm_exec_prepare_obj failed, ret: %d\n", ret); goto ttm_reserve_fail; + } } } amdgpu_sync_create(&sync_obj); - /* Validate PDs and PTs */ - ret = process_validate_vms(process_info); - if (ret) - goto validate_map_fail; - - /* Validate BOs and map them to GPUVM (update VM page tables). */ + /* Validate BOs managed by KFD */ list_for_each_entry(mem, &process_info->kfd_bo_list, validate_list) { struct amdgpu_bo *bo = mem->bo; uint32_t domain = mem->domain; - struct kfd_mem_attachment *attachment; struct dma_resv_iter cursor; struct dma_fence *fence; @@ -2930,6 +2931,25 @@ int amdgpu_amdkfd_gpuvm_restore_process_bos(void *info, struct dma_fence __rcu * goto validate_map_fail; } } + } + + if (failed_size) + pr_debug("0x%lx/0x%lx in system\n", failed_size, total_size); + + /* Validate PDs, PTs and evicted DMABuf imports last. Otherwise BO + * validations above would invalidate DMABuf imports again. + */ + ret = process_validate_vms(process_info, &exec.ticket); + if (ret) { + pr_debug("Validating VMs failed, ret: %d\n", ret); + goto validate_map_fail; + } + + /* Update mappings managed by KFD. */ + list_for_each_entry(mem, &process_info->kfd_bo_list, + validate_list) { + struct kfd_mem_attachment *attachment; + list_for_each_entry(attachment, &mem->attachments, list) { if (!attachment->is_mapped) continue; @@ -2946,9 +2966,6 @@ int amdgpu_amdkfd_gpuvm_restore_process_bos(void *info, struct dma_fence __rcu * } } - if (failed_size) - pr_debug("0x%lx/0x%lx in system\n", failed_size, total_size); - /* Update mappings not managed by KFD */ list_for_each_entry(peer_vm, &process_info->vm_list_head, vm_list_node) { @@ -3020,7 +3037,7 @@ int amdgpu_amdkfd_gpuvm_restore_process_bos(void *info, struct dma_fence __rcu * &process_info->eviction_fence->base, DMA_RESV_USAGE_BOOKKEEP); } - /* Attach eviction fence to PD / PT BOs */ + /* Attach eviction fence to PD / PT BOs and DMABuf imports */ list_for_each_entry(peer_vm, &process_info->vm_list_head, vm_list_node) { struct amdgpu_bo *bo = peer_vm->root.bo; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_atombios.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_atombios.c index dce9e7d5e4ec..52b12c1718eb 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_atombios.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_atombios.c @@ -1018,7 +1018,8 @@ int amdgpu_atombios_get_clock_dividers(struct amdgpu_device *adev, if (clock_type == COMPUTE_ENGINE_PLL_PARAM) { args.v3.ulClockParams = cpu_to_le32((clock_type << 24) | clock); - amdgpu_atom_execute_table(adev->mode_info.atom_context, index, (uint32_t *)&args); + amdgpu_atom_execute_table(adev->mode_info.atom_context, index, (uint32_t *)&args, + sizeof(args)); dividers->post_div = args.v3.ucPostDiv; dividers->enable_post_div = (args.v3.ucCntlFlag & @@ -1038,7 +1039,8 @@ int amdgpu_atombios_get_clock_dividers(struct amdgpu_device *adev, if (strobe_mode) args.v5.ucInputFlag = ATOM_PLL_INPUT_FLAG_PLL_STROBE_MODE_EN; - amdgpu_atom_execute_table(adev->mode_info.atom_context, index, (uint32_t *)&args); + amdgpu_atom_execute_table(adev->mode_info.atom_context, index, (uint32_t *)&args, + sizeof(args)); dividers->post_div = args.v5.ucPostDiv; dividers->enable_post_div = (args.v5.ucCntlFlag & @@ -1056,7 +1058,8 @@ int amdgpu_atombios_get_clock_dividers(struct amdgpu_device *adev, /* fusion */ args.v4.ulClock = cpu_to_le32(clock); /* 10 khz */ - amdgpu_atom_execute_table(adev->mode_info.atom_context, index, (uint32_t *)&args); + amdgpu_atom_execute_table(adev->mode_info.atom_context, index, (uint32_t *)&args, + sizeof(args)); dividers->post_divider = dividers->post_div = args.v4.ucPostDiv; dividers->real_clock = le32_to_cpu(args.v4.ulClock); @@ -1067,7 +1070,8 @@ int amdgpu_atombios_get_clock_dividers(struct amdgpu_device *adev, args.v6_in.ulClock.ulComputeClockFlag = clock_type; args.v6_in.ulClock.ulClockFreq = cpu_to_le32(clock); /* 10 khz */ - amdgpu_atom_execute_table(adev->mode_info.atom_context, index, (uint32_t *)&args); + amdgpu_atom_execute_table(adev->mode_info.atom_context, index, (uint32_t *)&args, + sizeof(args)); dividers->whole_fb_div = le16_to_cpu(args.v6_out.ulFbDiv.usFbDiv); dividers->frac_fb_div = le16_to_cpu(args.v6_out.ulFbDiv.usFbDivFrac); @@ -1109,7 +1113,8 @@ int amdgpu_atombios_get_memory_pll_dividers(struct amdgpu_device *adev, if (strobe_mode) args.ucInputFlag |= MPLL_INPUT_FLAG_STROBE_MODE_EN; - amdgpu_atom_execute_table(adev->mode_info.atom_context, index, (uint32_t *)&args); + amdgpu_atom_execute_table(adev->mode_info.atom_context, index, (uint32_t *)&args, + sizeof(args)); mpll_param->clkfrac = le16_to_cpu(args.ulFbDiv.usFbDivFrac); mpll_param->clkf = le16_to_cpu(args.ulFbDiv.usFbDiv); @@ -1151,7 +1156,8 @@ void amdgpu_atombios_set_engine_dram_timings(struct amdgpu_device *adev, if (mem_clock) args.sReserved.ulClock = cpu_to_le32(mem_clock & SET_CLOCK_FREQ_MASK); - amdgpu_atom_execute_table(adev->mode_info.atom_context, index, (uint32_t *)&args); + amdgpu_atom_execute_table(adev->mode_info.atom_context, index, (uint32_t *)&args, + sizeof(args)); } void amdgpu_atombios_get_default_voltages(struct amdgpu_device *adev, @@ -1205,7 +1211,8 @@ int amdgpu_atombios_get_max_vddc(struct amdgpu_device *adev, u8 voltage_type, args.v2.ucVoltageMode = 0; args.v2.usVoltageLevel = 0; - amdgpu_atom_execute_table(adev->mode_info.atom_context, index, (uint32_t *)&args); + amdgpu_atom_execute_table(adev->mode_info.atom_context, index, (uint32_t *)&args, + sizeof(args)); *voltage = le16_to_cpu(args.v2.usVoltageLevel); break; @@ -1214,7 +1221,8 @@ int amdgpu_atombios_get_max_vddc(struct amdgpu_device *adev, u8 voltage_type, args.v3.ucVoltageMode = ATOM_GET_VOLTAGE_LEVEL; args.v3.usVoltageLevel = cpu_to_le16(voltage_id); - amdgpu_atom_execute_table(adev->mode_info.atom_context, index, (uint32_t *)&args); + amdgpu_atom_execute_table(adev->mode_info.atom_context, index, (uint32_t *)&args, + sizeof(args)); *voltage = le16_to_cpu(args.v3.usVoltageLevel); break; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_atomfirmware.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_atomfirmware.c index fb2681dd6b33..a6d64bdbbb14 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_atomfirmware.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_atomfirmware.c @@ -34,6 +34,7 @@ union firmware_info { struct atom_firmware_info_v3_2 v32; struct atom_firmware_info_v3_3 v33; struct atom_firmware_info_v3_4 v34; + struct atom_firmware_info_v3_5 v35; }; /* @@ -872,6 +873,10 @@ int amdgpu_atomfirmware_get_fw_reserved_fb_size(struct amdgpu_device *adev) fw_reserved_fb_size = (firmware_info->v34.fw_reserved_size_in_kb << 10); break; + case 5: + fw_reserved_fb_size = + (firmware_info->v35.fw_reserved_size_in_kb << 10); + break; default: fw_reserved_fb_size = 0; break; @@ -941,5 +946,6 @@ int amdgpu_atomfirmware_asic_init(struct amdgpu_device *adev, bool fb_reset) return -EINVAL; } - return amdgpu_atom_execute_table(ctx, ATOM_CMD_INIT, (uint32_t *)&asic_init_ps_v2_1); + return amdgpu_atom_execute_table(ctx, ATOM_CMD_INIT, (uint32_t *)&asic_init_ps_v2_1, + sizeof(asic_init_ps_v2_1)); } diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_atomfirmware.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_atomfirmware.h index c7eb2caec65a..649b5530d8ae 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_atomfirmware.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_atomfirmware.h @@ -36,7 +36,7 @@ int amdgpu_atomfirmware_get_clock_info(struct amdgpu_device *adev); int amdgpu_atomfirmware_get_gfx_info(struct amdgpu_device *adev); bool amdgpu_atomfirmware_mem_ecc_supported(struct amdgpu_device *adev); bool amdgpu_atomfirmware_sram_ecc_supported(struct amdgpu_device *adev); -bool amdgpu_atomfirmware_ras_rom_addr(struct amdgpu_device *adev, uint8_t* i2c_address); +bool amdgpu_atomfirmware_ras_rom_addr(struct amdgpu_device *adev, uint8_t *i2c_address); bool amdgpu_atomfirmware_mem_training_supported(struct amdgpu_device *adev); bool amdgpu_atomfirmware_dynamic_boot_config_supported(struct amdgpu_device *adev); int amdgpu_atomfirmware_get_fw_reserved_fb_size(struct amdgpu_device *adev); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_benchmark.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_benchmark.c index edc6377ec5ff..199693369c7c 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_benchmark.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_benchmark.c @@ -39,7 +39,7 @@ static int amdgpu_benchmark_do_move(struct amdgpu_device *adev, unsigned size, for (i = 0; i < n; i++) { struct amdgpu_ring *ring = adev->mman.buffer_funcs_ring; r = amdgpu_copy_buffer(ring, saddr, daddr, size, NULL, &fence, - false, false, false); + false, false, 0); if (r) goto exit_do_move; r = dma_fence_wait(fence, false); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c index 6adeddfb3d56..ec888fc6ead8 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c @@ -819,7 +819,7 @@ retry: p->bytes_moved += ctx.bytes_moved; if (!amdgpu_gmc_vram_full_visible(&adev->gmc) && - amdgpu_bo_in_cpu_visible_vram(bo)) + amdgpu_res_cpu_visible(adev, bo->tbo.resource)) p->bytes_moved_vis += ctx.bytes_moved; if (unlikely(r == -ENOMEM) && domain != bo->allowed_domains) { @@ -952,10 +952,10 @@ static int amdgpu_cs_parser_bos(struct amdgpu_cs_parser *p, p->bytes_moved = 0; p->bytes_moved_vis = 0; - r = amdgpu_vm_validate_pt_bos(p->adev, &fpriv->vm, - amdgpu_cs_bo_validate, p); + r = amdgpu_vm_validate(p->adev, &fpriv->vm, NULL, + amdgpu_cs_bo_validate, p); if (r) { - DRM_ERROR("amdgpu_vm_validate_pt_bos() failed.\n"); + DRM_ERROR("amdgpu_vm_validate() failed.\n"); goto out_free_user_pages; } diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_csa.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_csa.c index 796fa6f1420b..cfdf558b48b6 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_csa.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_csa.c @@ -28,9 +28,8 @@ uint64_t amdgpu_csa_vaddr(struct amdgpu_device *adev) { - uint64_t addr = adev->vm_manager.max_pfn << AMDGPU_GPU_PAGE_SHIFT; + uint64_t addr = AMDGPU_VA_RESERVED_CSA_START(adev); - addr -= AMDGPU_VA_RESERVED_SIZE; addr = amdgpu_gmc_sign_extend(addr); return addr; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c index 1afbb2e932c6..b62ae3c91a9d 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c @@ -1782,9 +1782,14 @@ static int amdgpu_debugfs_vm_info_show(struct seq_file *m, void *unused) list_for_each_entry(file, &dev->filelist, lhead) { struct amdgpu_fpriv *fpriv = file->driver_priv; struct amdgpu_vm *vm = &fpriv->vm; + struct amdgpu_task_info *ti; + + ti = amdgpu_vm_get_task_info_vm(vm); + if (ti) { + seq_printf(m, "pid:%d\tProcess:%s ----------\n", ti->pid, ti->process_name); + amdgpu_vm_put_task_info(ti); + } - seq_printf(m, "pid:%d\tProcess:%s ----------\n", - vm->task_info.pid, vm->task_info.process_name); r = amdgpu_bo_reserve(vm->root.bo, true); if (r) break; @@ -2060,12 +2065,13 @@ static ssize_t amdgpu_reset_dump_register_list_write(struct file *f, struct amdgpu_device *adev = (struct amdgpu_device *)file_inode(f)->i_private; char reg_offset[11]; uint32_t *new = NULL, *tmp = NULL; - int ret, i = 0, len = 0; + unsigned int len = 0; + int ret, i = 0; do { memset(reg_offset, 0, 11); if (copy_from_user(reg_offset, buf + len, - min(10, ((int)size-len)))) { + min(10, (size-len)))) { ret = -EFAULT; goto error_free; } diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_dev_coredump.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_dev_coredump.c new file mode 100644 index 000000000000..c1cb62683695 --- /dev/null +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_dev_coredump.c @@ -0,0 +1,360 @@ +// SPDX-License-Identifier: MIT +/* + * Copyright 2024 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + */ + +#include <generated/utsrelease.h> +#include <linux/devcoredump.h> +#include "amdgpu_dev_coredump.h" +#include "atom.h" + +#ifndef CONFIG_DEV_COREDUMP +void amdgpu_coredump(struct amdgpu_device *adev, bool vram_lost, + struct amdgpu_reset_context *reset_context) +{ +} +#else + +const char *hw_ip_names[MAX_HWIP] = { + [GC_HWIP] = "GC", + [HDP_HWIP] = "HDP", + [SDMA0_HWIP] = "SDMA0", + [SDMA1_HWIP] = "SDMA1", + [SDMA2_HWIP] = "SDMA2", + [SDMA3_HWIP] = "SDMA3", + [SDMA4_HWIP] = "SDMA4", + [SDMA5_HWIP] = "SDMA5", + [SDMA6_HWIP] = "SDMA6", + [SDMA7_HWIP] = "SDMA7", + [LSDMA_HWIP] = "LSDMA", + [MMHUB_HWIP] = "MMHUB", + [ATHUB_HWIP] = "ATHUB", + [NBIO_HWIP] = "NBIO", + [MP0_HWIP] = "MP0", + [MP1_HWIP] = "MP1", + [UVD_HWIP] = "UVD/JPEG/VCN", + [VCN1_HWIP] = "VCN1", + [VCE_HWIP] = "VCE", + [VPE_HWIP] = "VPE", + [DF_HWIP] = "DF", + [DCE_HWIP] = "DCE", + [OSSSYS_HWIP] = "OSSSYS", + [SMUIO_HWIP] = "SMUIO", + [PWR_HWIP] = "PWR", + [NBIF_HWIP] = "NBIF", + [THM_HWIP] = "THM", + [CLK_HWIP] = "CLK", + [UMC_HWIP] = "UMC", + [RSMU_HWIP] = "RSMU", + [XGMI_HWIP] = "XGMI", + [DCI_HWIP] = "DCI", + [PCIE_HWIP] = "PCIE", +}; + +static void amdgpu_devcoredump_fw_info(struct amdgpu_device *adev, + struct drm_printer *p) +{ + uint32_t version; + uint32_t feature; + uint8_t smu_program, smu_major, smu_minor, smu_debug; + struct atom_context *ctx = adev->mode_info.atom_context; + + drm_printf(p, "VCE feature version: %u, fw version: 0x%08x\n", + adev->vce.fb_version, adev->vce.fw_version); + drm_printf(p, "UVD feature version: %u, fw version: 0x%08x\n", 0, + adev->uvd.fw_version); + drm_printf(p, "GMC feature version: %u, fw version: 0x%08x\n", 0, + adev->gmc.fw_version); + drm_printf(p, "ME feature version: %u, fw version: 0x%08x\n", + adev->gfx.me_feature_version, adev->gfx.me_fw_version); + drm_printf(p, "PFP feature version: %u, fw version: 0x%08x\n", + adev->gfx.pfp_feature_version, adev->gfx.pfp_fw_version); + drm_printf(p, "CE feature version: %u, fw version: 0x%08x\n", + adev->gfx.ce_feature_version, adev->gfx.ce_fw_version); + drm_printf(p, "RLC feature version: %u, fw version: 0x%08x\n", + adev->gfx.rlc_feature_version, adev->gfx.rlc_fw_version); + + drm_printf(p, "RLC SRLC feature version: %u, fw version: 0x%08x\n", + adev->gfx.rlc_srlc_feature_version, + adev->gfx.rlc_srlc_fw_version); + drm_printf(p, "RLC SRLG feature version: %u, fw version: 0x%08x\n", + adev->gfx.rlc_srlg_feature_version, + adev->gfx.rlc_srlg_fw_version); + drm_printf(p, "RLC SRLS feature version: %u, fw version: 0x%08x\n", + adev->gfx.rlc_srls_feature_version, + adev->gfx.rlc_srls_fw_version); + drm_printf(p, "RLCP feature version: %u, fw version: 0x%08x\n", + adev->gfx.rlcp_ucode_feature_version, + adev->gfx.rlcp_ucode_version); + drm_printf(p, "RLCV feature version: %u, fw version: 0x%08x\n", + adev->gfx.rlcv_ucode_feature_version, + adev->gfx.rlcv_ucode_version); + drm_printf(p, "MEC feature version: %u, fw version: 0x%08x\n", + adev->gfx.mec_feature_version, adev->gfx.mec_fw_version); + + if (adev->gfx.mec2_fw) + drm_printf(p, "MEC2 feature version: %u, fw version: 0x%08x\n", + adev->gfx.mec2_feature_version, + adev->gfx.mec2_fw_version); + + drm_printf(p, "IMU feature version: %u, fw version: 0x%08x\n", 0, + adev->gfx.imu_fw_version); + drm_printf(p, "PSP SOS feature version: %u, fw version: 0x%08x\n", + adev->psp.sos.feature_version, adev->psp.sos.fw_version); + drm_printf(p, "PSP ASD feature version: %u, fw version: 0x%08x\n", + adev->psp.asd_context.bin_desc.feature_version, + adev->psp.asd_context.bin_desc.fw_version); + + drm_printf(p, "TA XGMI feature version: 0x%08x, fw version: 0x%08x\n", + adev->psp.xgmi_context.context.bin_desc.feature_version, + adev->psp.xgmi_context.context.bin_desc.fw_version); + drm_printf(p, "TA RAS feature version: 0x%08x, fw version: 0x%08x\n", + adev->psp.ras_context.context.bin_desc.feature_version, + adev->psp.ras_context.context.bin_desc.fw_version); + drm_printf(p, "TA HDCP feature version: 0x%08x, fw version: 0x%08x\n", + adev->psp.hdcp_context.context.bin_desc.feature_version, + adev->psp.hdcp_context.context.bin_desc.fw_version); + drm_printf(p, "TA DTM feature version: 0x%08x, fw version: 0x%08x\n", + adev->psp.dtm_context.context.bin_desc.feature_version, + adev->psp.dtm_context.context.bin_desc.fw_version); + drm_printf(p, "TA RAP feature version: 0x%08x, fw version: 0x%08x\n", + adev->psp.rap_context.context.bin_desc.feature_version, + adev->psp.rap_context.context.bin_desc.fw_version); + drm_printf(p, + "TA SECURE DISPLAY feature version: 0x%08x, fw version: 0x%08x\n", + adev->psp.securedisplay_context.context.bin_desc.feature_version, + adev->psp.securedisplay_context.context.bin_desc.fw_version); + + /* SMC firmware */ + version = adev->pm.fw_version; + + smu_program = (version >> 24) & 0xff; + smu_major = (version >> 16) & 0xff; + smu_minor = (version >> 8) & 0xff; + smu_debug = (version >> 0) & 0xff; + drm_printf(p, + "SMC feature version: %u, program: %d, fw version: 0x%08x (%d.%d.%d)\n", + 0, smu_program, version, smu_major, smu_minor, smu_debug); + + /* SDMA firmware */ + for (int i = 0; i < adev->sdma.num_instances; i++) { + drm_printf(p, + "SDMA%d feature version: %u, firmware version: 0x%08x\n", + i, adev->sdma.instance[i].feature_version, + adev->sdma.instance[i].fw_version); + } + + drm_printf(p, "VCN feature version: %u, fw version: 0x%08x\n", 0, + adev->vcn.fw_version); + drm_printf(p, "DMCU feature version: %u, fw version: 0x%08x\n", 0, + adev->dm.dmcu_fw_version); + drm_printf(p, "DMCUB feature version: %u, fw version: 0x%08x\n", 0, + adev->dm.dmcub_fw_version); + drm_printf(p, "PSP TOC feature version: %u, fw version: 0x%08x\n", + adev->psp.toc.feature_version, adev->psp.toc.fw_version); + + version = adev->mes.kiq_version & AMDGPU_MES_VERSION_MASK; + feature = (adev->mes.kiq_version & AMDGPU_MES_FEAT_VERSION_MASK) >> + AMDGPU_MES_FEAT_VERSION_SHIFT; + drm_printf(p, "MES_KIQ feature version: %u, fw version: 0x%08x\n", + feature, version); + + version = adev->mes.sched_version & AMDGPU_MES_VERSION_MASK; + feature = (adev->mes.sched_version & AMDGPU_MES_FEAT_VERSION_MASK) >> + AMDGPU_MES_FEAT_VERSION_SHIFT; + drm_printf(p, "MES feature version: %u, fw version: 0x%08x\n", feature, + version); + + drm_printf(p, "VPE feature version: %u, fw version: 0x%08x\n", + adev->vpe.feature_version, adev->vpe.fw_version); + + drm_printf(p, "\nVBIOS Information\n"); + drm_printf(p, "vbios name : %s\n", ctx->name); + drm_printf(p, "vbios pn : %s\n", ctx->vbios_pn); + drm_printf(p, "vbios version : %d\n", ctx->version); + drm_printf(p, "vbios ver_str : %s\n", ctx->vbios_ver_str); + drm_printf(p, "vbios date : %s\n", ctx->date); +} + +static ssize_t +amdgpu_devcoredump_read(char *buffer, loff_t offset, size_t count, + void *data, size_t datalen) +{ + struct drm_printer p; + struct amdgpu_coredump_info *coredump = data; + struct drm_print_iterator iter; + struct amdgpu_vm_fault_info *fault_info; + int i, ver; + + iter.data = buffer; + iter.offset = 0; + iter.start = offset; + iter.remain = count; + + p = drm_coredump_printer(&iter); + + drm_printf(&p, "**** AMDGPU Device Coredump ****\n"); + drm_printf(&p, "version: " AMDGPU_COREDUMP_VERSION "\n"); + drm_printf(&p, "kernel: " UTS_RELEASE "\n"); + drm_printf(&p, "module: " KBUILD_MODNAME "\n"); + drm_printf(&p, "time: %lld.%09ld\n", coredump->reset_time.tv_sec, + coredump->reset_time.tv_nsec); + + if (coredump->reset_task_info.pid) + drm_printf(&p, "process_name: %s PID: %d\n", + coredump->reset_task_info.process_name, + coredump->reset_task_info.pid); + + /* GPU IP's information of the SOC */ + drm_printf(&p, "\nIP Information\n"); + drm_printf(&p, "SOC Family: %d\n", coredump->adev->family); + drm_printf(&p, "SOC Revision id: %d\n", coredump->adev->rev_id); + drm_printf(&p, "SOC External Revision id: %d\n", coredump->adev->external_rev_id); + + for (int i = 1; i < MAX_HWIP; i++) { + for (int j = 0; j < HWIP_MAX_INSTANCE; j++) { + ver = coredump->adev->ip_versions[i][j]; + if (ver) + drm_printf(&p, "HWIP: %s[%d][%d]: v%d.%d.%d.%d.%d\n", + hw_ip_names[i], i, j, + IP_VERSION_MAJ(ver), + IP_VERSION_MIN(ver), + IP_VERSION_REV(ver), + IP_VERSION_VARIANT(ver), + IP_VERSION_SUBREV(ver)); + } + } + + /* IP firmware information */ + drm_printf(&p, "\nIP Firmwares\n"); + amdgpu_devcoredump_fw_info(coredump->adev, &p); + + if (coredump->ring) { + drm_printf(&p, "\nRing timed out details\n"); + drm_printf(&p, "IP Type: %d Ring Name: %s\n", + coredump->ring->funcs->type, + coredump->ring->name); + } + + /* Add page fault information */ + fault_info = &coredump->adev->vm_manager.fault_info; + drm_printf(&p, "\n[%s] Page fault observed\n", + fault_info->vmhub ? "mmhub" : "gfxhub"); + drm_printf(&p, "Faulty page starting at address: 0x%016llx\n", fault_info->addr); + drm_printf(&p, "Protection fault status register: 0x%x\n\n", fault_info->status); + + /* dump the ip state for each ip */ + drm_printf(&p, "IP Dump\n"); + for (int i = 0; i < coredump->adev->num_ip_blocks; i++) { + if (coredump->adev->ip_blocks[i].version->funcs->print_ip_state) { + drm_printf(&p, "IP: %s\n", + coredump->adev->ip_blocks[i] + .version->funcs->name); + coredump->adev->ip_blocks[i] + .version->funcs->print_ip_state( + (void *)coredump->adev, &p); + drm_printf(&p, "\n"); + } + } + + /* Add ring buffer information */ + drm_printf(&p, "Ring buffer information\n"); + for (int i = 0; i < coredump->adev->num_rings; i++) { + int j = 0; + struct amdgpu_ring *ring = coredump->adev->rings[i]; + + drm_printf(&p, "ring name: %s\n", ring->name); + drm_printf(&p, "Rptr: 0x%llx Wptr: 0x%llx RB mask: %x\n", + amdgpu_ring_get_rptr(ring), + amdgpu_ring_get_wptr(ring), + ring->buf_mask); + drm_printf(&p, "Ring size in dwords: %d\n", + ring->ring_size / 4); + drm_printf(&p, "Ring contents\n"); + drm_printf(&p, "Offset \t Value\n"); + + while (j < ring->ring_size) { + drm_printf(&p, "0x%x \t 0x%x\n", j, ring->ring[j / 4]); + j += 4; + } + } + + if (coredump->reset_vram_lost) + drm_printf(&p, "VRAM is lost due to GPU reset!\n"); + if (coredump->adev->reset_info.num_regs) { + drm_printf(&p, "AMDGPU register dumps:\nOffset: Value:\n"); + + for (i = 0; i < coredump->adev->reset_info.num_regs; i++) + drm_printf(&p, "0x%08x: 0x%08x\n", + coredump->adev->reset_info.reset_dump_reg_list[i], + coredump->adev->reset_info.reset_dump_reg_value[i]); + } + + return count - iter.remain; +} + +static void amdgpu_devcoredump_free(void *data) +{ + kfree(data); +} + +void amdgpu_coredump(struct amdgpu_device *adev, bool vram_lost, + struct amdgpu_reset_context *reset_context) +{ + struct amdgpu_coredump_info *coredump; + struct drm_device *dev = adev_to_drm(adev); + struct amdgpu_job *job = reset_context->job; + struct drm_sched_job *s_job; + + coredump = kzalloc(sizeof(*coredump), GFP_NOWAIT); + + if (!coredump) { + DRM_ERROR("%s: failed to allocate memory for coredump\n", __func__); + return; + } + + coredump->reset_vram_lost = vram_lost; + + if (reset_context->job && reset_context->job->vm) { + struct amdgpu_task_info *ti; + struct amdgpu_vm *vm = reset_context->job->vm; + + ti = amdgpu_vm_get_task_info_vm(vm); + if (ti) { + coredump->reset_task_info = *ti; + amdgpu_vm_put_task_info(ti); + } + } + + if (job) { + s_job = &job->base; + coredump->ring = to_amdgpu_ring(s_job->sched); + } + + coredump->adev = adev; + + ktime_get_ts64(&coredump->reset_time); + + dev_coredumpm(dev->dev, THIS_MODULE, coredump, 0, GFP_NOWAIT, + amdgpu_devcoredump_read, amdgpu_devcoredump_free); +} +#endif diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_dev_coredump.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_dev_coredump.h new file mode 100644 index 000000000000..52459512cb2b --- /dev/null +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_dev_coredump.h @@ -0,0 +1,47 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Copyright 2024 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + */ + +#ifndef __AMDGPU_DEV_COREDUMP_H__ +#define __AMDGPU_DEV_COREDUMP_H__ + +#include "amdgpu.h" +#include "amdgpu_reset.h" + +#ifdef CONFIG_DEV_COREDUMP + +#define AMDGPU_COREDUMP_VERSION "1" + +struct amdgpu_coredump_info { + struct amdgpu_device *adev; + struct amdgpu_task_info reset_task_info; + struct timespec64 reset_time; + bool reset_vram_lost; + struct amdgpu_ring *ring; +}; +#endif + +void amdgpu_coredump(struct amdgpu_device *adev, bool vram_lost, + struct amdgpu_reset_context *reset_context); + +#endif diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c index 94bdb5fa6ebc..861ccff78af9 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c @@ -74,6 +74,7 @@ #include "amdgpu_fru_eeprom.h" #include "amdgpu_reset.h" #include "amdgpu_virt.h" +#include "amdgpu_dev_coredump.h" #include <linux/suspend.h> #include <drm/task_barrier.h> @@ -96,6 +97,9 @@ MODULE_FIRMWARE("amdgpu/navi12_gpu_info.bin"); #define AMDGPU_RESUME_MS 2000 #define AMDGPU_MAX_RETRY_LIMIT 2 #define AMDGPU_RETRY_SRIOV_RESET(r) ((r) == -EBUSY || (r) == -ETIMEDOUT || (r) == -EINVAL) +#define AMDGPU_PCIE_INDEX_FALLBACK (0x38 >> 2) +#define AMDGPU_PCIE_INDEX_HI_FALLBACK (0x44 >> 2) +#define AMDGPU_PCIE_DATA_FALLBACK (0x3C >> 2) static const struct drm_driver amdgpu_kms_driver; @@ -140,6 +144,8 @@ const char *amdgpu_asic_name[] = { "LAST", }; +static inline void amdgpu_device_stop_pending_resets(struct amdgpu_device *adev); + /** * DOC: pcie_replay_count * @@ -332,16 +338,93 @@ bool amdgpu_device_supports_boco(struct drm_device *dev) * * @dev: drm_device pointer * - * Returns true if the device supporte BACO, - * otherwise return false. + * Return: + * 1 if the device supporte BACO; + * 3 if the device support MACO (only works if BACO is supported) + * otherwise return 0. */ -bool amdgpu_device_supports_baco(struct drm_device *dev) +int amdgpu_device_supports_baco(struct drm_device *dev) { struct amdgpu_device *adev = drm_to_adev(dev); return amdgpu_asic_supports_baco(adev); } +void amdgpu_device_detect_runtime_pm_mode(struct amdgpu_device *adev) +{ + struct drm_device *dev; + int bamaco_support; + + dev = adev_to_drm(adev); + + adev->pm.rpm_mode = AMDGPU_RUNPM_NONE; + bamaco_support = amdgpu_device_supports_baco(dev); + + switch (amdgpu_runtime_pm) { + case 2: + if (bamaco_support & MACO_SUPPORT) { + adev->pm.rpm_mode = AMDGPU_RUNPM_BAMACO; + dev_info(adev->dev, "Forcing BAMACO for runtime pm\n"); + } else if (bamaco_support == BACO_SUPPORT) { + adev->pm.rpm_mode = AMDGPU_RUNPM_BACO; + dev_info(adev->dev, "Requested mode BAMACO not available,fallback to use BACO\n"); + } + break; + case 1: + if (bamaco_support & BACO_SUPPORT) { + adev->pm.rpm_mode = AMDGPU_RUNPM_BACO; + dev_info(adev->dev, "Forcing BACO for runtime pm\n"); + } + break; + case -1: + case -2: + if (amdgpu_device_supports_px(dev)) { /* enable PX as runtime mode */ + adev->pm.rpm_mode = AMDGPU_RUNPM_PX; + dev_info(adev->dev, "Using ATPX for runtime pm\n"); + } else if (amdgpu_device_supports_boco(dev)) { /* enable boco as runtime mode */ + adev->pm.rpm_mode = AMDGPU_RUNPM_BOCO; + dev_info(adev->dev, "Using BOCO for runtime pm\n"); + } else { + if (!bamaco_support) + goto no_runtime_pm; + + switch (adev->asic_type) { + case CHIP_VEGA20: + case CHIP_ARCTURUS: + /* BACO are not supported on vega20 and arctrus */ + break; + case CHIP_VEGA10: + /* enable BACO as runpm mode if noretry=0 */ + if (!adev->gmc.noretry) + adev->pm.rpm_mode = AMDGPU_RUNPM_BACO; + break; + default: + /* enable BACO as runpm mode on CI+ */ + adev->pm.rpm_mode = AMDGPU_RUNPM_BACO; + break; + } + + if (adev->pm.rpm_mode == AMDGPU_RUNPM_BACO) { + if (bamaco_support & MACO_SUPPORT) { + adev->pm.rpm_mode = AMDGPU_RUNPM_BAMACO; + dev_info(adev->dev, "Using BAMACO for runtime pm\n"); + } else { + dev_info(adev->dev, "Using BACO for runtime pm\n"); + } + } + } + break; + case 0: + dev_info(adev->dev, "runtime pm is manually disabled\n"); + break; + default: + break; + } + +no_runtime_pm: + if (adev->pm.rpm_mode == AMDGPU_RUNPM_NONE) + dev_info(adev->dev, "Runtime PM not available\n"); +} /** * amdgpu_device_supports_smart_shift - Is the device dGPU with * smart shift support @@ -781,12 +864,22 @@ u32 amdgpu_device_indirect_rreg_ext(struct amdgpu_device *adev, void __iomem *pcie_index_hi_offset; void __iomem *pcie_data_offset; - pcie_index = adev->nbio.funcs->get_pcie_index_offset(adev); - pcie_data = adev->nbio.funcs->get_pcie_data_offset(adev); - if ((reg_addr >> 32) && (adev->nbio.funcs->get_pcie_index_hi_offset)) - pcie_index_hi = adev->nbio.funcs->get_pcie_index_hi_offset(adev); - else + if (unlikely(!adev->nbio.funcs)) { + pcie_index = AMDGPU_PCIE_INDEX_FALLBACK; + pcie_data = AMDGPU_PCIE_DATA_FALLBACK; + } else { + pcie_index = adev->nbio.funcs->get_pcie_index_offset(adev); + pcie_data = adev->nbio.funcs->get_pcie_data_offset(adev); + } + + if (reg_addr >> 32) { + if (unlikely(!adev->nbio.funcs)) + pcie_index_hi = AMDGPU_PCIE_INDEX_HI_FALLBACK; + else + pcie_index_hi = adev->nbio.funcs->get_pcie_index_hi_offset(adev); + } else { pcie_index_hi = 0; + } spin_lock_irqsave(&adev->pcie_idx_lock, flags); pcie_index_offset = (void __iomem *)adev->rmmio + pcie_index * 4; @@ -1218,8 +1311,6 @@ static int amdgpu_device_asic_init(struct amdgpu_device *adev) amdgpu_ip_version(adev, GC_HWIP, 0) >= IP_VERSION(11, 0, 0)) { amdgpu_psp_wait_for_bootloader(adev); ret = amdgpu_atomfirmware_asic_init(adev, true); - /* TODO: check the return val and stop device initialization if boot fails */ - amdgpu_psp_query_boot_status(adev); return ret; } else { return amdgpu_atom_asic_init(adev->mode_info.atom_context); @@ -1391,13 +1482,17 @@ static int amdgpu_device_wb_init(struct amdgpu_device *adev) */ int amdgpu_device_wb_get(struct amdgpu_device *adev, u32 *wb) { - unsigned long offset = find_first_zero_bit(adev->wb.used, adev->wb.num_wb); + unsigned long flags, offset; + spin_lock_irqsave(&adev->wb.lock, flags); + offset = find_first_zero_bit(adev->wb.used, adev->wb.num_wb); if (offset < adev->wb.num_wb) { __set_bit(offset, adev->wb.used); + spin_unlock_irqrestore(&adev->wb.lock, flags); *wb = offset << 3; /* convert to dw offset */ return 0; } else { + spin_unlock_irqrestore(&adev->wb.lock, flags); return -EINVAL; } } @@ -1412,9 +1507,13 @@ int amdgpu_device_wb_get(struct amdgpu_device *adev, u32 *wb) */ void amdgpu_device_wb_free(struct amdgpu_device *adev, u32 wb) { + unsigned long flags; + wb >>= 3; + spin_lock_irqsave(&adev->wb.lock, flags); if (wb < adev->wb.num_wb) __clear_bit(wb, adev->wb.used); + spin_unlock_irqrestore(&adev->wb.lock, flags); } /** @@ -1442,6 +1541,10 @@ int amdgpu_device_resize_fb_bar(struct amdgpu_device *adev) if (amdgpu_sriov_vf(adev)) return 0; + /* PCI_EXT_CAP_ID_VNDR extended capability is located at 0x100 */ + if (!pci_find_ext_capability(adev->pdev, PCI_EXT_CAP_ID_VNDR)) + DRM_WARN("System can't access extended configuration space, please check!!\n"); + /* skip if the bios has already enabled large BAR */ if (adev->gmc.real_vram_size && (pci_resource_len(adev->pdev, 0) >= adev->gmc.real_vram_size)) @@ -3966,6 +4069,7 @@ int amdgpu_device_init(struct amdgpu_device *adev, spin_lock_init(&adev->se_cac_idx_lock); spin_lock_init(&adev->audio_endpt_idx_lock); spin_lock_init(&adev->mm_stats.lock); + spin_lock_init(&adev->wb.lock); INIT_LIST_HEAD(&adev->shadow_list); mutex_init(&adev->shadow_list_lock); @@ -4039,13 +4143,13 @@ int amdgpu_device_init(struct amdgpu_device *adev, return r; } + amdgpu_device_set_mcbp(adev); + /* early init functions */ r = amdgpu_device_ip_early_init(adev); if (r) return r; - amdgpu_device_set_mcbp(adev); - /* Get rid of things like offb */ r = drm_aperture_remove_conflicting_pci_framebuffers(adev->pdev, &amdgpu_kms_driver); if (r) @@ -4054,6 +4158,13 @@ int amdgpu_device_init(struct amdgpu_device *adev, /* Enable TMZ based on IP_VERSION */ amdgpu_gmc_tmz_set(adev); + if (amdgpu_sriov_vf(adev) && + amdgpu_ip_version(adev, GC_HWIP, 0) >= IP_VERSION(10, 3, 0)) + /* VF MMIO access (except mailbox range) from CPU + * will be blocked during sriov runtime + */ + adev->virt.caps |= AMDGPU_VF_MMIO_ACCESS_PROTECT; + amdgpu_gmc_noretry_set(adev); /* Need to get xgmi info early to decide the reset behavior*/ if (adev->gmc.xgmi.supported) { @@ -4120,18 +4231,22 @@ int amdgpu_device_init(struct amdgpu_device *adev, adev->ip_blocks[i].status.hw = true; } } + } else if (amdgpu_ip_version(adev, MP1_HWIP, 0) == IP_VERSION(13, 0, 10) && + !amdgpu_device_has_display_hardware(adev)) { + r = psp_gpu_reset(adev); } else { - tmp = amdgpu_reset_method; - /* It should do a default reset when loading or reloading the driver, - * regardless of the module parameter reset_method. - */ - amdgpu_reset_method = AMD_RESET_METHOD_NONE; - r = amdgpu_asic_reset(adev); - amdgpu_reset_method = tmp; - if (r) { - dev_err(adev->dev, "asic reset on init failed\n"); - goto failed; - } + tmp = amdgpu_reset_method; + /* It should do a default reset when loading or reloading the driver, + * regardless of the module parameter reset_method. + */ + amdgpu_reset_method = AMD_RESET_METHOD_NONE; + r = amdgpu_asic_reset(adev); + amdgpu_reset_method = tmp; + } + + if (r) { + dev_err(adev->dev, "asic reset on init failed\n"); + goto failed; } } @@ -4524,6 +4639,8 @@ int amdgpu_device_prepare(struct drm_device *dev) if (r) goto unprepare; + flush_delayed_work(&adev->gfx.gfx_off_delay_work); + for (i = 0; i < adev->num_ip_blocks; i++) { if (!adev->ip_blocks[i].status.valid) continue; @@ -4953,12 +5070,15 @@ static int amdgpu_device_reset_sriov(struct amdgpu_device *adev, retry: amdgpu_amdkfd_pre_reset(adev); + amdgpu_device_stop_pending_resets(adev); + if (from_hypervisor) r = amdgpu_virt_request_full_gpu(adev, true); else r = amdgpu_virt_reset_gpu(adev); if (r) return r; + amdgpu_ras_set_fed(adev, false); amdgpu_irq_gpu_reset_resume_helper(adev); /* some sw clean up VF needs to do before recover */ @@ -5242,11 +5362,21 @@ int amdgpu_do_asic_reset(struct list_head *device_list_handle, struct amdgpu_device *tmp_adev = NULL; bool need_full_reset, skip_hw_reset, vram_lost = false; int r = 0; + uint32_t i; /* Try reset handler method first */ tmp_adev = list_first_entry(device_list_handle, struct amdgpu_device, reset_list); - amdgpu_reset_reg_dumps(tmp_adev); + + if (!test_bit(AMDGPU_SKIP_COREDUMP, &reset_context->flags)) { + amdgpu_reset_reg_dumps(tmp_adev); + + /* Trigger ip dump before we reset the asic */ + for (i = 0; i < tmp_adev->num_ip_blocks; i++) + if (tmp_adev->ip_blocks[i].version->funcs->dump_ip_state) + tmp_adev->ip_blocks[i].version->funcs + ->dump_ip_state((void *)tmp_adev); + } reset_context->reset_device_list = device_list_handle; r = amdgpu_reset_perform_reset(tmp_adev, reset_context); @@ -5306,6 +5436,7 @@ int amdgpu_do_asic_reset(struct list_head *device_list_handle, list_for_each_entry(tmp_adev, device_list_handle, reset_list) { if (need_full_reset) { /* post card */ + amdgpu_ras_set_fed(tmp_adev, false); r = amdgpu_device_asic_init(tmp_adev); if (r) { dev_warn(tmp_adev->dev, "asic atom init failed!"); @@ -5318,7 +5449,8 @@ int amdgpu_do_asic_reset(struct list_head *device_list_handle, vram_lost = amdgpu_device_check_vram_lost(tmp_adev); - amdgpu_coredump(tmp_adev, vram_lost, reset_context); + if (!test_bit(AMDGPU_SKIP_COREDUMP, &reset_context->flags)) + amdgpu_coredump(tmp_adev, vram_lost, reset_context); if (vram_lost) { DRM_INFO("VRAM is lost due to GPU reset!\n"); @@ -5516,6 +5648,23 @@ static inline void amdgpu_device_stop_pending_resets(struct amdgpu_device *adev) } +static int amdgpu_device_health_check(struct list_head *device_list_handle) +{ + struct amdgpu_device *tmp_adev; + int ret = 0; + u32 status; + + list_for_each_entry(tmp_adev, device_list_handle, reset_list) { + pci_read_config_dword(tmp_adev->pdev, PCI_COMMAND, &status); + if (PCI_POSSIBLE_ERROR(status)) { + dev_err(tmp_adev->dev, "device lost from bus!"); + ret = -ENODEV; + } + } + + return ret; +} + /** * amdgpu_device_gpu_recover - reset the asic and recover scheduler * @@ -5587,6 +5736,12 @@ int amdgpu_device_gpu_recover(struct amdgpu_device *adev, device_list_handle = &device_list; } + if (!amdgpu_sriov_vf(adev)) { + r = amdgpu_device_health_check(device_list_handle); + if (r) + goto end_reset; + } + /* We need to lock reset domain only once both for XGMI and single device */ tmp_adev = list_first_entry(device_list_handle, struct amdgpu_device, reset_list); @@ -5669,11 +5824,12 @@ retry: /* Rest of adevs pre asic reset from XGMI hive. */ tmp_adev->asic_reset_res = r; } - /* - * Drop all pending non scheduler resets. Scheduler resets - * were already dropped during drm_sched_stop - */ - amdgpu_device_stop_pending_resets(tmp_adev); + if (!amdgpu_sriov_vf(tmp_adev)) + /* + * Drop all pending non scheduler resets. Scheduler resets + * were already dropped during drm_sched_stop + */ + amdgpu_device_stop_pending_resets(tmp_adev); } /* Actual ASIC resets if needed.*/ @@ -5686,6 +5842,7 @@ retry: /* Rest of adevs pre asic reset from XGMI hive. */ /* Aldebaran and gfx_11_0_3 support ras in SRIOV, so need resume ras during reset */ if (amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(9, 4, 2) || + amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(9, 4, 3) || amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(11, 0, 3)) amdgpu_ras_resume(adev); } else { @@ -5751,6 +5908,7 @@ skip_sched_resume: reset_list); amdgpu_device_unlock_reset_domain(tmp_adev->reset_domain); +end_reset: if (hive) { mutex_unlock(&hive->hive_lock); amdgpu_put_xgmi_hive(hive); @@ -6107,6 +6265,20 @@ pci_ers_result_t amdgpu_pci_slot_reset(struct pci_dev *pdev) struct amdgpu_reset_context reset_context; u32 memsize; struct list_head device_list; + struct amdgpu_hive_info *hive; + int hive_ras_recovery = 0; + struct amdgpu_ras *ras; + + /* PCI error slot reset should be skipped During RAS recovery */ + hive = amdgpu_get_xgmi_hive(adev); + if (hive) { + hive_ras_recovery = atomic_read(&hive->ras_recovery); + amdgpu_put_xgmi_hive(hive); + } + ras = amdgpu_ras_get_context(adev); + if ((amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(9, 4, 3)) && + ras && (atomic_read(&ras->in_recovery) || hive_ras_recovery)) + return PCI_ERS_RESULT_RECOVERED; DRM_INFO("PCI error: slot reset callback!!\n"); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.c index c7d60dd0fb97..0e31bdb4b7cb 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.c @@ -27,6 +27,7 @@ #include "amdgpu_discovery.h" #include "soc15_hw_ip.h" #include "discovery.h" +#include "amdgpu_ras.h" #include "soc15.h" #include "gfx_v9_0.h" @@ -60,20 +61,24 @@ #include "nbio_v4_3.h" #include "nbio_v7_2.h" #include "nbio_v7_7.h" +#include "nbif_v6_3_1.h" #include "hdp_v5_0.h" #include "hdp_v5_2.h" #include "hdp_v6_0.h" +#include "hdp_v7_0.h" #include "nv.h" #include "soc21.h" #include "navi10_ih.h" #include "ih_v6_0.h" #include "ih_v6_1.h" +#include "ih_v7_0.h" #include "gfx_v10_0.h" #include "gfx_v11_0.h" #include "sdma_v5_0.h" #include "sdma_v5_2.h" #include "sdma_v6_0.h" #include "lsdma_v6_0.h" +#include "lsdma_v7_0.h" #include "vcn_v2_0.h" #include "jpeg_v2_0.h" #include "vcn_v3_0.h" @@ -92,12 +97,16 @@ #include "smuio_v13_0.h" #include "smuio_v13_0_3.h" #include "smuio_v13_0_6.h" +#include "smuio_v14_0_2.h" +#include "vcn_v5_0_0.h" +#include "jpeg_v5_0_0.h" #include "amdgpu_vpe.h" #define FIRMWARE_IP_DISCOVERY "amdgpu/ip_discovery.bin" MODULE_FIRMWARE(FIRMWARE_IP_DISCOVERY); +#define mmIP_DISCOVERY_VERSION 0x16A00 #define mmRCC_CONFIG_MEMSIZE 0xde3 #define mmMP0_SMN_C2PMSG_33 0x16061 #define mmMM_INDEX 0x0 @@ -237,6 +246,9 @@ static int amdgpu_discovery_read_binary_from_sysmem(struct amdgpu_device *adev, return -ENOENT; } +#define IP_DISCOVERY_V2 2 +#define IP_DISCOVERY_V4 4 + static int amdgpu_discovery_read_binary_from_mem(struct amdgpu_device *adev, uint8_t *binary) { @@ -251,14 +263,14 @@ static int amdgpu_discovery_read_binary_from_mem(struct amdgpu_device *adev, * wait for this to complete. Once the C2PMSG is updated, we can * continue. */ - if (dev_is_removable(&adev->pdev->dev)) { - for (i = 0; i < 1000; i++) { - msg = RREG32(mmMP0_SMN_C2PMSG_33); - if (msg & 0x80000000) - break; - msleep(1); - } + + for (i = 0; i < 1000; i++) { + msg = RREG32(mmMP0_SMN_C2PMSG_33); + if (msg & 0x80000000) + break; + usleep_range(1000, 1100); } + vram_size = (uint64_t)RREG32(mmRCC_CONFIG_MEMSIZE) << 20; if (vram_size) { @@ -518,7 +530,9 @@ static int amdgpu_discovery_init(struct amdgpu_device *adev) out: kfree(adev->mman.discovery_bin); adev->mman.discovery_bin = NULL; - + if ((amdgpu_discovery != 2) && + (RREG32(mmIP_DISCOVERY_VERSION) == 4)) + amdgpu_ras_query_boot_status(adev, 4); return r; } @@ -1278,11 +1292,10 @@ static int amdgpu_discovery_reg_base_init(struct amdgpu_device *adev) * 0b10 : encode is disabled * 0b01 : decode is disabled */ - adev->vcn.vcn_config[adev->vcn.num_vcn_inst] = - ip->revision & 0xc0; - ip->revision &= ~0xc0; if (adev->vcn.num_vcn_inst < AMDGPU_MAX_VCN_INSTANCES) { + adev->vcn.vcn_config[adev->vcn.num_vcn_inst] = + ip->revision & 0xc0; adev->vcn.num_vcn_inst++; adev->vcn.inst_mask |= (1U << ip->instance_number); @@ -1293,6 +1306,7 @@ static int amdgpu_discovery_reg_base_init(struct amdgpu_device *adev) adev->vcn.num_vcn_inst + 1, AMDGPU_MAX_VCN_INSTANCES); } + ip->revision &= ~0xc0; } if (le16_to_cpu(ip->hw_id) == SDMA0_HWID || le16_to_cpu(ip->hw_id) == SDMA1_HWID || @@ -1310,6 +1324,15 @@ static int amdgpu_discovery_reg_base_init(struct amdgpu_device *adev) } } + if (le16_to_cpu(ip->hw_id) == VPE_HWID) { + if (adev->vpe.num_instances < AMDGPU_MAX_VPE_INSTANCES) + adev->vpe.num_instances++; + else + dev_err(adev->dev, "Too many VPE instances: %d vs %d\n", + adev->vpe.num_instances + 1, + AMDGPU_MAX_VPE_INSTANCES); + } + if (le16_to_cpu(ip->hw_id) == UMC_HWID) { adev->gmc.num_umc++; adev->umc.node_inst_num++; @@ -1674,6 +1697,7 @@ static int amdgpu_discovery_set_common_ip_blocks(struct amdgpu_device *adev) case IP_VERSION(11, 0, 3): case IP_VERSION(11, 0, 4): case IP_VERSION(11, 5, 0): + case IP_VERSION(11, 5, 1): amdgpu_device_ip_block_add(adev, &soc21_common_ip_block); break; default: @@ -1721,6 +1745,7 @@ static int amdgpu_discovery_set_gmc_ip_blocks(struct amdgpu_device *adev) case IP_VERSION(11, 0, 3): case IP_VERSION(11, 0, 4): case IP_VERSION(11, 5, 0): + case IP_VERSION(11, 5, 1): amdgpu_device_ip_block_add(adev, &gmc_v11_0_ip_block); break; default: @@ -1763,6 +1788,9 @@ static int amdgpu_discovery_set_ih_ip_blocks(struct amdgpu_device *adev) case IP_VERSION(6, 1, 0): amdgpu_device_ip_block_add(adev, &ih_v6_1_ip_block); break; + case IP_VERSION(7, 0, 0): + amdgpu_device_ip_block_add(adev, &ih_v7_0_ip_block); + break; default: dev_err(adev->dev, "Failed to add ih ip block(OSSSYS_HWIP:0x%x)\n", @@ -1812,11 +1840,16 @@ static int amdgpu_discovery_set_psp_ip_blocks(struct amdgpu_device *adev) case IP_VERSION(13, 0, 10): case IP_VERSION(13, 0, 11): case IP_VERSION(14, 0, 0): + case IP_VERSION(14, 0, 1): amdgpu_device_ip_block_add(adev, &psp_v13_0_ip_block); break; case IP_VERSION(13, 0, 4): amdgpu_device_ip_block_add(adev, &psp_v13_0_4_ip_block); break; + case IP_VERSION(14, 0, 2): + case IP_VERSION(14, 0, 3): + amdgpu_device_ip_block_add(adev, &psp_v14_0_ip_block); + break; default: dev_err(adev->dev, "Failed to add psp ip block(MP0_HWIP:0x%x)\n", @@ -1867,6 +1900,9 @@ static int amdgpu_discovery_set_smu_ip_blocks(struct amdgpu_device *adev) amdgpu_device_ip_block_add(adev, &smu_v13_0_ip_block); break; case IP_VERSION(14, 0, 0): + case IP_VERSION(14, 0, 1): + case IP_VERSION(14, 0, 2): + case IP_VERSION(14, 0, 3): amdgpu_device_ip_block_add(adev, &smu_v14_0_ip_block); break; default: @@ -1917,6 +1953,7 @@ static int amdgpu_discovery_set_display_ip_blocks(struct amdgpu_device *adev) case IP_VERSION(3, 2, 0): case IP_VERSION(3, 2, 1): case IP_VERSION(3, 5, 0): + case IP_VERSION(3, 5, 1): if (amdgpu_sriov_vf(adev)) amdgpu_discovery_set_sriov_display(adev); else @@ -1986,6 +2023,7 @@ static int amdgpu_discovery_set_gc_ip_blocks(struct amdgpu_device *adev) case IP_VERSION(11, 0, 3): case IP_VERSION(11, 0, 4): case IP_VERSION(11, 5, 0): + case IP_VERSION(11, 5, 1): amdgpu_device_ip_block_add(adev, &gfx_v11_0_ip_block); break; default: @@ -2033,6 +2071,7 @@ static int amdgpu_discovery_set_sdma_ip_blocks(struct amdgpu_device *adev) case IP_VERSION(6, 0, 2): case IP_VERSION(6, 0, 3): case IP_VERSION(6, 1, 0): + case IP_VERSION(6, 1, 1): amdgpu_device_ip_block_add(adev, &sdma_v6_0_ip_block); break; default: @@ -2119,9 +2158,14 @@ static int amdgpu_discovery_set_mm_ip_blocks(struct amdgpu_device *adev) amdgpu_device_ip_block_add(adev, &jpeg_v4_0_3_ip_block); break; case IP_VERSION(4, 0, 5): + case IP_VERSION(4, 0, 6): amdgpu_device_ip_block_add(adev, &vcn_v4_0_5_ip_block); amdgpu_device_ip_block_add(adev, &jpeg_v4_0_5_ip_block); break; + case IP_VERSION(5, 0, 0): + amdgpu_device_ip_block_add(adev, &vcn_v5_0_0_ip_block); + amdgpu_device_ip_block_add(adev, &jpeg_v5_0_0_ip_block); + break; default: dev_err(adev->dev, "Failed to add vcn/jpeg ip block(UVD_HWIP:0x%x)\n", @@ -2160,6 +2204,7 @@ static int amdgpu_discovery_set_mes_ip_blocks(struct amdgpu_device *adev) case IP_VERSION(11, 0, 3): case IP_VERSION(11, 0, 4): case IP_VERSION(11, 5, 0): + case IP_VERSION(11, 5, 1): amdgpu_device_ip_block_add(adev, &mes_v11_0_ip_block); adev->enable_mes = true; adev->enable_mes_kiq = true; @@ -2185,6 +2230,7 @@ static int amdgpu_discovery_set_vpe_ip_blocks(struct amdgpu_device *adev) { switch (amdgpu_ip_version(adev, VPE_HWIP, 0)) { case IP_VERSION(6, 1, 0): + case IP_VERSION(6, 1, 1): amdgpu_device_ip_block_add(adev, &vpe_v6_1_ip_block); break; default: @@ -2198,6 +2244,7 @@ static int amdgpu_discovery_set_umsch_mm_ip_blocks(struct amdgpu_device *adev) { switch (amdgpu_ip_version(adev, VCN_HWIP, 0)) { case IP_VERSION(4, 0, 5): + case IP_VERSION(4, 0, 6): if (amdgpu_umsch_mm & 0x1) { amdgpu_device_ip_block_add(adev, &umsch_mm_v4_0_ip_block); adev->enable_umsch_mm = true; @@ -2438,6 +2485,7 @@ int amdgpu_discovery_set_ip_blocks(struct amdgpu_device *adev) adev->family = AMDGPU_FAMILY_GC_11_0_1; break; case IP_VERSION(11, 5, 0): + case IP_VERSION(11, 5, 1): adev->family = AMDGPU_FAMILY_GC_11_5_0; break; default: @@ -2457,6 +2505,7 @@ int amdgpu_discovery_set_ip_blocks(struct amdgpu_device *adev) case IP_VERSION(11, 0, 1): case IP_VERSION(11, 0, 4): case IP_VERSION(11, 5, 0): + case IP_VERSION(11, 5, 1): adev->flags |= AMD_IS_APU; break; default: @@ -2493,6 +2542,7 @@ int amdgpu_discovery_set_ip_blocks(struct amdgpu_device *adev) adev->nbio.hdp_flush_reg = &nbio_v7_9_hdp_flush_reg; break; case IP_VERSION(7, 11, 0): + case IP_VERSION(7, 11, 1): adev->nbio.funcs = &nbio_v7_11_funcs; adev->nbio.hdp_flush_reg = &nbio_v7_11_hdp_flush_reg; break; @@ -2528,6 +2578,10 @@ int amdgpu_discovery_set_ip_blocks(struct amdgpu_device *adev) adev->nbio.funcs = &nbio_v7_7_funcs; adev->nbio.hdp_flush_reg = &nbio_v7_7_hdp_flush_reg; break; + case IP_VERSION(6, 3, 1): + adev->nbio.funcs = &nbif_v6_3_1_funcs; + adev->nbio.hdp_flush_reg = &nbif_v6_3_1_hdp_flush_reg; + break; default: break; } @@ -2560,6 +2614,9 @@ int amdgpu_discovery_set_ip_blocks(struct amdgpu_device *adev) case IP_VERSION(6, 1, 0): adev->hdp.funcs = &hdp_v6_0_funcs; break; + case IP_VERSION(7, 0, 0): + adev->hdp.funcs = &hdp_v7_0_funcs; + break; default: break; } @@ -2624,8 +2681,12 @@ int amdgpu_discovery_set_ip_blocks(struct amdgpu_device *adev) case IP_VERSION(13, 0, 6): case IP_VERSION(13, 0, 8): case IP_VERSION(14, 0, 0): + case IP_VERSION(14, 0, 1): adev->smuio.funcs = &smuio_v13_0_6_funcs; break; + case IP_VERSION(14, 0, 2): + adev->smuio.funcs = &smuio_v14_0_2_funcs; + break; default: break; } @@ -2637,6 +2698,10 @@ int amdgpu_discovery_set_ip_blocks(struct amdgpu_device *adev) case IP_VERSION(6, 0, 3): adev->lsdma.funcs = &lsdma_v6_0_funcs; break; + case IP_VERSION(7, 0, 0): + case IP_VERSION(7, 0, 1): + adev->lsdma.funcs = &lsdma_v7_0_funcs; + break; default: break; } diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_display.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_display.c index b8fbe97efe1d..3ecc7ef95172 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_display.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_display.c @@ -1350,14 +1350,6 @@ int amdgpu_display_modeset_create_props(struct amdgpu_device *adev) "dither", amdgpu_dither_enum_list, sz); - if (adev->dc_enabled) { - adev->mode_info.abm_level_property = - drm_property_create_range(adev_to_drm(adev), 0, - "abm level", 0, 4); - if (!adev->mode_info.abm_level_property) - return -ENOMEM; - } - return 0; } diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_dma_buf.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_dma_buf.c index decbbe3d4f06..055ba2ea4c12 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_dma_buf.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_dma_buf.c @@ -377,6 +377,10 @@ amdgpu_dma_buf_move_notify(struct dma_buf_attachment *attach) struct amdgpu_vm_bo_base *bo_base; int r; + /* FIXME: This should be after the "if", but needs a fix to make sure + * DMABuf imports are initialized in the right VM list. + */ + amdgpu_vm_bo_invalidate(adev, bo, false); if (!bo->tbo.resource || bo->tbo.resource->mem_type == TTM_PL_SYSTEM) return; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c index 586f4d03039d..ea14f1c8f430 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c @@ -195,10 +195,12 @@ int amdgpu_async_gfx_ring = 1; int amdgpu_mcbp = -1; int amdgpu_discovery = -1; int amdgpu_mes; +int amdgpu_mes_log_enable = 0; int amdgpu_mes_kiq; int amdgpu_noretry = -1; int amdgpu_force_asic_type = -1; int amdgpu_tmz = -1; /* auto */ +uint amdgpu_freesync_vid_mode; int amdgpu_reset_method = -1; /* auto */ int amdgpu_num_kcq = -1; int amdgpu_smartshift_bias; @@ -367,7 +369,7 @@ module_param_named(aspm, amdgpu_aspm, int, 0444); * Setting the value to 0 disables this functionality. * Setting the value to -2 is auto enabled with power down when displays are attached. */ -MODULE_PARM_DESC(runpm, "PX runtime pm (2 = force enable with BAMACO, 1 = force enable with BACO, 0 = disable, -1 = auto, -2 = autowith displays)"); +MODULE_PARM_DESC(runpm, "PX runtime pm (2 = force enable with BAMACO, 1 = force enable with BACO, 0 = disable, -1 = auto, -2 = auto with displays)"); module_param_named(runpm, amdgpu_runtime_pm, int, 0444); /** @@ -594,7 +596,7 @@ module_param_named(timeout_period, amdgpu_watchdog_timer.period, uint, 0644); #ifdef CONFIG_DRM_AMDGPU_SI #if IS_ENABLED(CONFIG_DRM_RADEON) || IS_ENABLED(CONFIG_DRM_RADEON_MODULE) -int amdgpu_si_support = 0; +int amdgpu_si_support; MODULE_PARM_DESC(si_support, "SI support (1 = enabled, 0 = disabled (default))"); #else int amdgpu_si_support = 1; @@ -613,7 +615,7 @@ module_param_named(si_support, amdgpu_si_support, int, 0444); #ifdef CONFIG_DRM_AMDGPU_CIK #if IS_ENABLED(CONFIG_DRM_RADEON) || IS_ENABLED(CONFIG_DRM_RADEON_MODULE) -int amdgpu_cik_support = 0; +int amdgpu_cik_support; MODULE_PARM_DESC(cik_support, "CIK support (1 = enabled, 0 = disabled (default))"); #else int amdgpu_cik_support = 1; @@ -667,6 +669,15 @@ MODULE_PARM_DESC(mes, module_param_named(mes, amdgpu_mes, int, 0444); /** + * DOC: mes_log_enable (int) + * Enable Micro Engine Scheduler log. This is used to enable/disable MES internal log. + * (0 = disabled (default), 1 = enabled) + */ +MODULE_PARM_DESC(mes_log_enable, + "Enable Micro Engine Scheduler log (0 = disabled (default), 1 = enabled)"); +module_param_named(mes_log_enable, amdgpu_mes_log_enable, int, 0444); + +/** * DOC: mes_kiq (int) * Enable Micro Engine Scheduler KIQ. This is a new engine pipe for kiq. * (0 = disabled (default), 1 = enabled) @@ -849,12 +860,13 @@ module_param_named(visualconfirm, amdgpu_dc_visual_confirm, uint, 0444); * the ABM algorithm, with 1 being the least reduction and 4 being the most * reduction. * - * Defaults to 0, or disabled. Userspace can still override this level later - * after boot. + * Defaults to -1, or disabled. Userspace can only override this level after + * boot if it's set to auto. */ -uint amdgpu_dm_abm_level; -MODULE_PARM_DESC(abmlevel, "ABM level (0 = off (default), 1-4 = backlight reduction level) "); -module_param_named(abmlevel, amdgpu_dm_abm_level, uint, 0444); +int amdgpu_dm_abm_level = -1; +MODULE_PARM_DESC(abmlevel, + "ABM level (0 = off, 1-4 = backlight reduction level, -1 auto (default))"); +module_param_named(abmlevel, amdgpu_dm_abm_level, int, 0444); int amdgpu_backlight = -1; MODULE_PARM_DESC(backlight, "Backlight control (0 = pwm, 1 = aux, -1 auto (default))"); @@ -883,11 +895,37 @@ MODULE_PARM_DESC(tmz, "Enable TMZ feature (-1 = auto (default), 0 = off, 1 = on) module_param_named(tmz, amdgpu_tmz, int, 0444); /** + * DOC: freesync_video (uint) + * Enable the optimization to adjust front porch timing to achieve seamless + * mode change experience when setting a freesync supported mode for which full + * modeset is not needed. + * + * The Display Core will add a set of modes derived from the base FreeSync + * video mode into the corresponding connector's mode list based on commonly + * used refresh rates and VRR range of the connected display, when users enable + * this feature. From the userspace perspective, they can see a seamless mode + * change experience when the change between different refresh rates under the + * same resolution. Additionally, userspace applications such as Video playback + * can read this modeset list and change the refresh rate based on the video + * frame rate. Finally, the userspace can also derive an appropriate mode for a + * particular refresh rate based on the FreeSync Mode and add it to the + * connector's mode list. + * + * Note: This is an experimental feature. + * + * The default value: 0 (off). + */ +MODULE_PARM_DESC( + freesync_video, + "Enable freesync modesetting optimization feature (0 = off (default), 1 = on)"); +module_param_named(freesync_video, amdgpu_freesync_vid_mode, uint, 0444); + +/** * DOC: reset_method (int) * GPU reset method (-1 = auto (default), 0 = legacy, 1 = mode0, 2 = mode1, 3 = mode2, 4 = baco) */ MODULE_PARM_DESC(reset_method, "GPU reset method (-1 = auto (default), 0 = legacy, 1 = mode0, 2 = mode1, 3 = mode2, 4 = baco/bamaco)"); -module_param_named(reset_method, amdgpu_reset_method, int, 0444); +module_param_named(reset_method, amdgpu_reset_method, int, 0644); /** * DOC: bad_page_threshold (int) Bad page threshold is specifies the @@ -2443,6 +2481,7 @@ static void amdgpu_drv_delayed_reset_work_handler(struct work_struct *work) /* Use a common context, just need to make sure full reset is done */ set_bit(AMDGPU_SKIP_HW_RESET, &reset_context.flags); + set_bit(AMDGPU_SKIP_COREDUMP, &reset_context.flags); r = amdgpu_do_asic_reset(&device_list, &reset_context); if (r) { @@ -2451,8 +2490,11 @@ static void amdgpu_drv_delayed_reset_work_handler(struct work_struct *work) } for (i = 0; i < mgpu_info.num_dgpu; i++) { adev = mgpu_info.gpu_ins[i].adev; - if (!adev->kfd.init_complete) + if (!adev->kfd.init_complete) { + kgd2kfd_init_zone_device(adev); amdgpu_amdkfd_device_init(adev); + amdgpu_amdkfd_drm_client_create(adev); + } amdgpu_ttm_set_buffer_funcs_status(adev, true); } } @@ -2665,7 +2707,7 @@ static int amdgpu_pmops_runtime_suspend(struct device *dev) } adev->in_runpm = true; - if (amdgpu_device_supports_px(drm_dev)) + if (adev->pm.rpm_mode == AMDGPU_RUNPM_PX) drm_dev->switch_power_state = DRM_SWITCH_POWER_CHANGING; /* @@ -2675,7 +2717,7 @@ static int amdgpu_pmops_runtime_suspend(struct device *dev) * platforms. * TODO: this may be also needed for PX capable platform. */ - if (amdgpu_device_supports_boco(drm_dev)) + if (adev->pm.rpm_mode == AMDGPU_RUNPM_BOCO) adev->mp1_state = PP_MP1_STATE_UNLOAD; ret = amdgpu_device_prepare(drm_dev); @@ -2684,15 +2726,15 @@ static int amdgpu_pmops_runtime_suspend(struct device *dev) ret = amdgpu_device_suspend(drm_dev, false); if (ret) { adev->in_runpm = false; - if (amdgpu_device_supports_boco(drm_dev)) + if (adev->pm.rpm_mode == AMDGPU_RUNPM_BOCO) adev->mp1_state = PP_MP1_STATE_NONE; return ret; } - if (amdgpu_device_supports_boco(drm_dev)) + if (adev->pm.rpm_mode == AMDGPU_RUNPM_BOCO) adev->mp1_state = PP_MP1_STATE_NONE; - if (amdgpu_device_supports_px(drm_dev)) { + if (adev->pm.rpm_mode == AMDGPU_RUNPM_PX) { /* Only need to handle PCI state in the driver for ATPX * PCI core handles it for _PR3. */ @@ -2701,9 +2743,10 @@ static int amdgpu_pmops_runtime_suspend(struct device *dev) pci_ignore_hotplug(pdev); pci_set_power_state(pdev, PCI_D3cold); drm_dev->switch_power_state = DRM_SWITCH_POWER_DYNAMIC_OFF; - } else if (amdgpu_device_supports_boco(drm_dev)) { + } else if (adev->pm.rpm_mode == AMDGPU_RUNPM_BOCO) { /* nothing to do */ - } else if (amdgpu_device_supports_baco(drm_dev)) { + } else if ((adev->pm.rpm_mode == AMDGPU_RUNPM_BACO) || + (adev->pm.rpm_mode == AMDGPU_RUNPM_BAMACO)) { amdgpu_device_baco_enter(drm_dev); } @@ -2726,7 +2769,7 @@ static int amdgpu_pmops_runtime_resume(struct device *dev) if (!pci_device_is_present(adev->pdev)) adev->no_hw_access = true; - if (amdgpu_device_supports_px(drm_dev)) { + if (adev->pm.rpm_mode == AMDGPU_RUNPM_PX) { drm_dev->switch_power_state = DRM_SWITCH_POWER_CHANGING; /* Only need to handle PCI state in the driver for ATPX @@ -2738,22 +2781,23 @@ static int amdgpu_pmops_runtime_resume(struct device *dev) if (ret) return ret; pci_set_master(pdev); - } else if (amdgpu_device_supports_boco(drm_dev)) { + } else if (adev->pm.rpm_mode == AMDGPU_RUNPM_BOCO) { /* Only need to handle PCI state in the driver for ATPX * PCI core handles it for _PR3. */ pci_set_master(pdev); - } else if (amdgpu_device_supports_baco(drm_dev)) { + } else if ((adev->pm.rpm_mode == AMDGPU_RUNPM_BACO) || + (adev->pm.rpm_mode == AMDGPU_RUNPM_BAMACO)) { amdgpu_device_baco_exit(drm_dev); } ret = amdgpu_device_resume(drm_dev, false); if (ret) { - if (amdgpu_device_supports_px(drm_dev)) + if (adev->pm.rpm_mode == AMDGPU_RUNPM_PX) pci_disable_device(pdev); return ret; } - if (amdgpu_device_supports_px(drm_dev)) + if (adev->pm.rpm_mode == AMDGPU_RUNPM_PX) drm_dev->switch_power_state = DRM_SWITCH_POWER_ON; adev->in_runpm = false; return 0; @@ -2763,8 +2807,7 @@ static int amdgpu_pmops_runtime_idle(struct device *dev) { struct drm_device *drm_dev = dev_get_drvdata(dev); struct amdgpu_device *adev = drm_to_adev(drm_dev); - /* we don't want the main rpm_idle to call suspend - we want to autosuspend */ - int ret = 1; + int ret; if (adev->pm.rpm_mode == AMDGPU_RUNPM_NONE) { pm_runtime_forbid(dev); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_fdinfo.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_fdinfo.c index 5706b282a0c7..c7df7fa3459f 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_fdinfo.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_fdinfo.c @@ -97,6 +97,10 @@ void amdgpu_show_fdinfo(struct drm_printer *p, struct drm_file *file) stats.requested_visible_vram/1024UL); drm_printf(p, "amd-requested-gtt:\t%llu KiB\n", stats.requested_gtt/1024UL); + drm_printf(p, "drm-shared-vram:\t%llu KiB\n", stats.vram_shared/1024UL); + drm_printf(p, "drm-shared-gtt:\t%llu KiB\n", stats.gtt_shared/1024UL); + drm_printf(p, "drm-shared-cpu:\t%llu KiB\n", stats.cpu_shared/1024UL); + for (hw_ip = 0; hw_ip < AMDGPU_HW_IP_NUM; ++hw_ip) { if (!usage[hw_ip]) continue; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c index 70bff8cecfda..10832b470448 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c @@ -61,9 +61,7 @@ static struct kmem_cache *amdgpu_fence_slab; int amdgpu_fence_slab_init(void) { - amdgpu_fence_slab = kmem_cache_create( - "amdgpu_fence", sizeof(struct amdgpu_fence), 0, - SLAB_HWCACHE_ALIGN, NULL); + amdgpu_fence_slab = KMEM_CACHE(amdgpu_fence, SLAB_HWCACHE_ALIGN); if (!amdgpu_fence_slab) return -ENOMEM; return 0; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c index 49a5f1c73b3e..67c234bcf89f 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c @@ -187,7 +187,40 @@ static int amdgpu_gem_object_open(struct drm_gem_object *obj, else ++bo_va->ref_count; amdgpu_bo_unreserve(abo); - return 0; + + /* Validate and add eviction fence to DMABuf imports with dynamic + * attachment in compute VMs. Re-validation will be done by + * amdgpu_vm_validate. Fences are on the reservation shared with the + * export, which is currently required to be validated and fenced + * already by amdgpu_amdkfd_gpuvm_restore_process_bos. + * + * Nested locking below for the case that a GEM object is opened in + * kfd_mem_export_dmabuf. Since the lock below is only taken for imports, + * but not for export, this is a different lock class that cannot lead to + * circular lock dependencies. + */ + if (!vm->is_compute_context || !vm->process_info) + return 0; + if (!obj->import_attach || + !dma_buf_is_dynamic(obj->import_attach->dmabuf)) + return 0; + mutex_lock_nested(&vm->process_info->lock, 1); + if (!WARN_ON(!vm->process_info->eviction_fence)) { + r = amdgpu_amdkfd_bo_validate_and_fence(abo, AMDGPU_GEM_DOMAIN_GTT, + &vm->process_info->eviction_fence->base); + if (r) { + struct amdgpu_task_info *ti = amdgpu_vm_get_task_info_vm(vm); + + dev_warn(adev->dev, "validate_and_fence failed: %d\n", r); + if (ti) { + dev_warn(adev->dev, "pid %d\n", ti->pid); + amdgpu_vm_put_task_info(ti); + } + } + } + mutex_unlock(&vm->process_info->lock); + + return r; } static void amdgpu_gem_object_close(struct drm_gem_object *obj, @@ -682,10 +715,10 @@ int amdgpu_gem_va_ioctl(struct drm_device *dev, void *data, uint64_t vm_size; int r = 0; - if (args->va_address < AMDGPU_VA_RESERVED_SIZE) { + if (args->va_address < AMDGPU_VA_RESERVED_BOTTOM) { dev_dbg(dev->dev, "va_address 0x%llx is in reserved area 0x%llx\n", - args->va_address, AMDGPU_VA_RESERVED_SIZE); + args->va_address, AMDGPU_VA_RESERVED_BOTTOM); return -EINVAL; } @@ -701,7 +734,7 @@ int amdgpu_gem_va_ioctl(struct drm_device *dev, void *data, args->va_address &= AMDGPU_GMC_HOLE_MASK; vm_size = adev->vm_manager.max_pfn * AMDGPU_GPU_PAGE_SIZE; - vm_size -= AMDGPU_VA_RESERVED_SIZE; + vm_size -= AMDGPU_VA_RESERVED_TOP; if (args->va_address + args->map_size > vm_size) { dev_dbg(dev->dev, "va_address 0x%llx is in top reserved area 0x%llx\n", diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c index 6ddc8e3360e2..1d955652f3ba 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c @@ -304,11 +304,11 @@ static int amdgpu_gfx_kiq_acquire(struct amdgpu_device *adev, return -EINVAL; } -int amdgpu_gfx_kiq_init_ring(struct amdgpu_device *adev, - struct amdgpu_ring *ring, - struct amdgpu_irq_src *irq, int xcc_id) +int amdgpu_gfx_kiq_init_ring(struct amdgpu_device *adev, int xcc_id) { struct amdgpu_kiq *kiq = &adev->gfx.kiq[xcc_id]; + struct amdgpu_irq_src *irq = &kiq->irq; + struct amdgpu_ring *ring = &kiq->ring; int r = 0; spin_lock_init(&kiq->ring_lock); @@ -329,7 +329,8 @@ int amdgpu_gfx_kiq_init_ring(struct amdgpu_device *adev, ring->eop_gpu_addr = kiq->eop_gpu_addr; ring->no_scheduler = true; - sprintf(ring->name, "kiq_%d.%d.%d.%d", xcc_id, ring->me, ring->pipe, ring->queue); + snprintf(ring->name, sizeof(ring->name), "kiq_%d.%d.%d.%d", + xcc_id, ring->me, ring->pipe, ring->queue); r = amdgpu_ring_init(adev, ring, 1024, irq, AMDGPU_CP_KIQ_IRQ_DRIVER0, AMDGPU_RING_PRIO_DEFAULT, NULL); if (r) @@ -642,8 +643,8 @@ int amdgpu_gfx_enable_kcq(struct amdgpu_device *adev, int xcc_id) kiq->pmf->kiq_set_resources(kiq_ring, queue_mask); for (i = 0; i < adev->gfx.num_compute_rings; i++) { j = i + xcc_id * adev->gfx.num_compute_rings; - kiq->pmf->kiq_map_queues(kiq_ring, - &adev->gfx.compute_ring[j]); + kiq->pmf->kiq_map_queues(kiq_ring, + &adev->gfx.compute_ring[j]); } r = amdgpu_ring_test_helper(kiq_ring); @@ -686,7 +687,7 @@ int amdgpu_gfx_enable_kgq(struct amdgpu_device *adev, int xcc_id) r = amdgpu_ring_test_helper(kiq_ring); spin_unlock(&kiq->ring_lock); if (r) - DRM_ERROR("KCQ enable failed\n"); + DRM_ERROR("KGQ enable failed\n"); return r; } @@ -1205,7 +1206,8 @@ void amdgpu_gfx_cp_init_microcode(struct amdgpu_device *adev, fw_size = le32_to_cpu(cp_hdr_v2_0->data_size_bytes); break; default: - break; + dev_err(adev->dev, "Invalid ucode id %u\n", ucode_id); + return; } if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) { diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h index f23bafec71c5..64f197bbc866 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h @@ -259,7 +259,6 @@ struct amdgpu_cu_info { struct amdgpu_gfx_ras { struct amdgpu_ras_block_object ras_block; void (*enable_watchdog_timer)(struct amdgpu_device *adev); - bool (*query_utcl2_poison_status)(struct amdgpu_device *adev); int (*rlc_gc_fed_irq)(struct amdgpu_device *adev, struct amdgpu_irq_src *source, struct amdgpu_iv_entry *entry); @@ -434,6 +433,10 @@ struct amdgpu_gfx { uint32_t num_xcc_per_xcp; struct mutex partition_mutex; bool mcbp; /* mid command buffer preemption */ + + /* IP reg dump */ + uint32_t *ip_dump; + uint32_t reg_count; }; struct amdgpu_gfx_ras_reg_entry { @@ -471,9 +474,7 @@ static inline u32 amdgpu_gfx_create_bitmask(u32 bit_width) void amdgpu_gfx_parse_disable_cu(unsigned *mask, unsigned max_se, unsigned max_sh); -int amdgpu_gfx_kiq_init_ring(struct amdgpu_device *adev, - struct amdgpu_ring *ring, - struct amdgpu_irq_src *irq, int xcc_id); +int amdgpu_gfx_kiq_init_ring(struct amdgpu_device *adev, int xcc_id); void amdgpu_gfx_kiq_free_ring(struct amdgpu_ring *ring); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfxhub.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfxhub.h index c7b44aeb671b..103a837ccc71 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfxhub.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfxhub.h @@ -38,6 +38,8 @@ struct amdgpu_gfxhub_funcs { void (*mode2_save_regs)(struct amdgpu_device *adev); void (*mode2_restore_regs)(struct amdgpu_device *adev); void (*halt)(struct amdgpu_device *adev); + bool (*query_utcl2_poison_status)(struct amdgpu_device *adev, + int xcc_id); }; struct amdgpu_gfxhub { diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c index 55784a9f26c4..be4629cdac04 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c @@ -52,7 +52,7 @@ int amdgpu_gmc_pdb0_alloc(struct amdgpu_device *adev) struct amdgpu_bo_param bp; u64 vram_size = adev->gmc.xgmi.node_segment_size * adev->gmc.xgmi.num_physical_nodes; uint32_t pde0_page_shift = adev->gmc.vmid0_page_table_block_size + 21; - uint32_t npdes = (vram_size + (1ULL << pde0_page_shift) -1) >> pde0_page_shift; + uint32_t npdes = (vram_size + (1ULL << pde0_page_shift) - 1) >> pde0_page_shift; memset(&bp, 0, sizeof(bp)); bp.size = PAGE_ALIGN((npdes + 1) * 8); @@ -746,6 +746,59 @@ error_unlock_reset: return r; } +void amdgpu_gmc_fw_reg_write_reg_wait(struct amdgpu_device *adev, + uint32_t reg0, uint32_t reg1, + uint32_t ref, uint32_t mask, + uint32_t xcc_inst) +{ + struct amdgpu_kiq *kiq = &adev->gfx.kiq[xcc_inst]; + struct amdgpu_ring *ring = &kiq->ring; + signed long r, cnt = 0; + unsigned long flags; + uint32_t seq; + + if (adev->mes.ring.sched.ready) { + amdgpu_mes_reg_write_reg_wait(adev, reg0, reg1, + ref, mask); + return; + } + + spin_lock_irqsave(&kiq->ring_lock, flags); + amdgpu_ring_alloc(ring, 32); + amdgpu_ring_emit_reg_write_reg_wait(ring, reg0, reg1, + ref, mask); + r = amdgpu_fence_emit_polling(ring, &seq, MAX_KIQ_REG_WAIT); + if (r) + goto failed_undo; + + amdgpu_ring_commit(ring); + spin_unlock_irqrestore(&kiq->ring_lock, flags); + + r = amdgpu_fence_wait_polling(ring, seq, MAX_KIQ_REG_WAIT); + + /* don't wait anymore for IRQ context */ + if (r < 1 && in_interrupt()) + goto failed_kiq; + + might_sleep(); + while (r < 1 && cnt++ < MAX_KIQ_REG_TRY) { + + msleep(MAX_KIQ_REG_BAILOUT_INTERVAL); + r = amdgpu_fence_wait_polling(ring, seq, MAX_KIQ_REG_WAIT); + } + + if (cnt > MAX_KIQ_REG_TRY) + goto failed_kiq; + + return; + +failed_undo: + amdgpu_ring_undo(ring); + spin_unlock_irqrestore(&kiq->ring_lock, flags); +failed_kiq: + dev_err(adev->dev, "failed to write reg %x wait reg %x\n", reg0, reg1); +} + /** * amdgpu_gmc_tmz_set -- check and set if a device supports TMZ * @adev: amdgpu_device pointer @@ -790,6 +843,7 @@ void amdgpu_gmc_tmz_set(struct amdgpu_device *adev) case IP_VERSION(10, 3, 3): case IP_VERSION(11, 0, 4): case IP_VERSION(11, 5, 0): + case IP_VERSION(11, 5, 1): /* Don't enable it by default yet. */ if (amdgpu_tmz < 1) { diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.h index e699d1ca8deb..17f40ea1104b 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.h @@ -417,6 +417,10 @@ void amdgpu_gmc_flush_gpu_tlb(struct amdgpu_device *adev, uint32_t vmid, int amdgpu_gmc_flush_gpu_tlb_pasid(struct amdgpu_device *adev, uint16_t pasid, uint32_t flush_type, bool all_hub, uint32_t inst); +void amdgpu_gmc_fw_reg_write_reg_wait(struct amdgpu_device *adev, + uint32_t reg0, uint32_t reg1, + uint32_t ref, uint32_t mask, + uint32_t xcc_inst); extern void amdgpu_gmc_tmz_set(struct amdgpu_device *adev); extern void amdgpu_gmc_noretry_set(struct amdgpu_device *adev); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_hmm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_hmm.c index 55b65fc04b65..431ec72655ec 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_hmm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_hmm.c @@ -129,13 +129,25 @@ static const struct mmu_interval_notifier_ops amdgpu_hmm_hsa_ops = { */ int amdgpu_hmm_register(struct amdgpu_bo *bo, unsigned long addr) { + int r; + if (bo->kfd_bo) - return mmu_interval_notifier_insert(&bo->notifier, current->mm, + r = mmu_interval_notifier_insert(&bo->notifier, current->mm, addr, amdgpu_bo_size(bo), &amdgpu_hmm_hsa_ops); - return mmu_interval_notifier_insert(&bo->notifier, current->mm, addr, - amdgpu_bo_size(bo), - &amdgpu_hmm_gfx_ops); + else + r = mmu_interval_notifier_insert(&bo->notifier, current->mm, addr, + amdgpu_bo_size(bo), + &amdgpu_hmm_gfx_ops); + if (r) + /* + * Make sure amdgpu_hmm_unregister() doesn't call + * mmu_interval_notifier_remove() when the notifier isn't properly + * initialized. + */ + bo->notifier.mm = NULL; + + return r; } /** diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_i2c.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_i2c.c index d79cb13e1aa8..00d6211e0fbf 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_i2c.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_i2c.c @@ -279,7 +279,7 @@ amdgpu_i2c_lookup(struct amdgpu_device *adev, return NULL; } -static void amdgpu_i2c_get_byte(struct amdgpu_i2c_chan *i2c_bus, +static int amdgpu_i2c_get_byte(struct amdgpu_i2c_chan *i2c_bus, u8 slave_addr, u8 addr, u8 *val) @@ -304,16 +304,18 @@ static void amdgpu_i2c_get_byte(struct amdgpu_i2c_chan *i2c_bus, out_buf[0] = addr; out_buf[1] = 0; - if (i2c_transfer(&i2c_bus->adapter, msgs, 2) == 2) { - *val = in_buf[0]; - DRM_DEBUG("val = 0x%02x\n", *val); - } else { - DRM_DEBUG("i2c 0x%02x 0x%02x read failed\n", - addr, *val); + if (i2c_transfer(&i2c_bus->adapter, msgs, 2) != 2) { + DRM_DEBUG("i2c 0x%02x read failed\n", addr); + return -EIO; } + + *val = in_buf[0]; + DRM_DEBUG("val = 0x%02x\n", *val); + + return 0; } -static void amdgpu_i2c_put_byte(struct amdgpu_i2c_chan *i2c_bus, +static int amdgpu_i2c_put_byte(struct amdgpu_i2c_chan *i2c_bus, u8 slave_addr, u8 addr, u8 val) @@ -329,9 +331,12 @@ static void amdgpu_i2c_put_byte(struct amdgpu_i2c_chan *i2c_bus, out_buf[0] = addr; out_buf[1] = val; - if (i2c_transfer(&i2c_bus->adapter, &msg, 1) != 1) - DRM_DEBUG("i2c 0x%02x 0x%02x write failed\n", - addr, val); + if (i2c_transfer(&i2c_bus->adapter, &msg, 1) != 1) { + DRM_DEBUG("i2c 0x%02x 0x%02x write failed\n", addr, val); + return -EIO; + } + + return 0; } /* ddc router switching */ @@ -346,16 +351,18 @@ amdgpu_i2c_router_select_ddc_port(const struct amdgpu_connector *amdgpu_connecto if (!amdgpu_connector->router_bus) return; - amdgpu_i2c_get_byte(amdgpu_connector->router_bus, + if (amdgpu_i2c_get_byte(amdgpu_connector->router_bus, amdgpu_connector->router.i2c_addr, - 0x3, &val); + 0x3, &val)) + return; val &= ~amdgpu_connector->router.ddc_mux_control_pin; amdgpu_i2c_put_byte(amdgpu_connector->router_bus, amdgpu_connector->router.i2c_addr, 0x3, val); - amdgpu_i2c_get_byte(amdgpu_connector->router_bus, + if (amdgpu_i2c_get_byte(amdgpu_connector->router_bus, amdgpu_connector->router.i2c_addr, - 0x1, &val); + 0x1, &val)) + return; val &= ~amdgpu_connector->router.ddc_mux_control_pin; val |= amdgpu_connector->router.ddc_mux_state; amdgpu_i2c_put_byte(amdgpu_connector->router_bus, @@ -375,16 +382,18 @@ amdgpu_i2c_router_select_cd_port(const struct amdgpu_connector *amdgpu_connector if (!amdgpu_connector->router_bus) return; - amdgpu_i2c_get_byte(amdgpu_connector->router_bus, + if (amdgpu_i2c_get_byte(amdgpu_connector->router_bus, amdgpu_connector->router.i2c_addr, - 0x3, &val); + 0x3, &val)) + return; val &= ~amdgpu_connector->router.cd_mux_control_pin; amdgpu_i2c_put_byte(amdgpu_connector->router_bus, amdgpu_connector->router.i2c_addr, 0x3, val); - amdgpu_i2c_get_byte(amdgpu_connector->router_bus, + if (amdgpu_i2c_get_byte(amdgpu_connector->router_bus, amdgpu_connector->router.i2c_addr, - 0x1, &val); + 0x1, &val)) + return; val &= ~amdgpu_connector->router.cd_mux_control_pin; val |= amdgpu_connector->router.cd_mux_state; amdgpu_i2c_put_byte(amdgpu_connector->router_bus, diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c index 6aa3b1d845ab..8b512dc28df8 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c @@ -131,7 +131,6 @@ int amdgpu_ib_schedule(struct amdgpu_ring *ring, unsigned int num_ibs, struct amdgpu_ib *ib = &ibs[0]; struct dma_fence *tmp = NULL; bool need_ctx_switch; - unsigned int patch_offset = ~0; struct amdgpu_vm *vm; uint64_t fence_ctx; uint32_t status = 0, alloc_size; @@ -139,10 +138,11 @@ int amdgpu_ib_schedule(struct amdgpu_ring *ring, unsigned int num_ibs, bool secure, init_shadow; u64 shadow_va, csa_va, gds_va; int vmid = AMDGPU_JOB_GET_VMID(job); + bool need_pipe_sync = false; + unsigned int cond_exec; unsigned int i; int r = 0; - bool need_pipe_sync = false; if (num_ibs == 0) return -EINVAL; @@ -228,7 +228,8 @@ int amdgpu_ib_schedule(struct amdgpu_ring *ring, unsigned int num_ibs, init_shadow, vmid); if (ring->funcs->init_cond_exec) - patch_offset = amdgpu_ring_init_cond_exec(ring); + cond_exec = amdgpu_ring_init_cond_exec(ring, + ring->cond_exe_gpu_addr); amdgpu_device_flush_hdp(adev, ring); @@ -278,16 +279,9 @@ int amdgpu_ib_schedule(struct amdgpu_ring *ring, unsigned int num_ibs, fence_flags | AMDGPU_FENCE_FLAG_64BIT); } - if (ring->funcs->emit_gfx_shadow) { + if (ring->funcs->emit_gfx_shadow && ring->funcs->init_cond_exec) { amdgpu_ring_emit_gfx_shadow(ring, 0, 0, 0, false, 0); - - if (ring->funcs->init_cond_exec) { - unsigned int ce_offset = ~0; - - ce_offset = amdgpu_ring_init_cond_exec(ring); - if (ce_offset != ~0 && ring->funcs->patch_cond_exec) - amdgpu_ring_patch_cond_exec(ring, ce_offset); - } + amdgpu_ring_init_cond_exec(ring, ring->cond_exe_gpu_addr); } r = amdgpu_fence_emit(ring, f, job, fence_flags); @@ -302,8 +296,7 @@ int amdgpu_ib_schedule(struct amdgpu_ring *ring, unsigned int num_ibs, if (ring->funcs->insert_end) ring->funcs->insert_end(ring); - if (patch_offset != ~0 && ring->funcs->patch_cond_exec) - amdgpu_ring_patch_cond_exec(ring, patch_offset); + amdgpu_ring_patch_cond_exec(ring, cond_exec); ring->current_ctx = fence_ctx; if (vm && ring->funcs->emit_switch_buffer) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ids.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ids.c index ddd0891da116..3d7fcdeaf8cf 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ids.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ids.c @@ -62,9 +62,8 @@ int amdgpu_pasid_alloc(unsigned int bits) int pasid = -EINVAL; for (bits = min(bits, 31U); bits > 0; bits--) { - pasid = ida_simple_get(&amdgpu_pasid_ida, - 1U << (bits - 1), 1U << bits, - GFP_KERNEL); + pasid = ida_alloc_range(&amdgpu_pasid_ida, 1U << (bits - 1), + (1U << bits) - 1, GFP_KERNEL); if (pasid != -ENOSPC) break; } @@ -82,7 +81,7 @@ int amdgpu_pasid_alloc(unsigned int bits) void amdgpu_pasid_free(u32 pasid) { trace_amdgpu_pasid_freed(pasid); - ida_simple_remove(&amdgpu_pasid_ida, pasid); + ida_free(&amdgpu_pasid_ida, pasid); } static void amdgpu_pasid_free_cb(struct dma_fence *fence, diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_irq.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_irq.c index 7e6d09730e6d..665c63f55278 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_irq.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_irq.c @@ -445,6 +445,14 @@ void amdgpu_irq_dispatch(struct amdgpu_device *adev, entry.ih = ih; entry.iv_entry = (const uint32_t *)&ih->ring[ring_index]; + + /* + * timestamp is not supported on some legacy SOCs (cik, cz, iceland, + * si and tonga), so initialize timestamp and timestamp_src to 0 + */ + entry.timestamp = 0; + entry.timestamp_src = 0; + amdgpu_ih_decode_iv(adev, &entry); trace_amdgpu_iv(ih - &adev->irq.ih, &entry); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c index 71a5cf37b472..e4742b65032d 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c @@ -35,7 +35,7 @@ static enum drm_gpu_sched_stat amdgpu_job_timedout(struct drm_sched_job *s_job) { struct amdgpu_ring *ring = to_amdgpu_ring(s_job->sched); struct amdgpu_job *job = to_amdgpu_job(s_job); - struct amdgpu_task_info ti; + struct amdgpu_task_info *ti; struct amdgpu_device *adev = ring->adev; int idx; int r; @@ -48,7 +48,7 @@ static enum drm_gpu_sched_stat amdgpu_job_timedout(struct drm_sched_job *s_job) return DRM_GPU_SCHED_STAT_ENODEV; } - memset(&ti, 0, sizeof(struct amdgpu_task_info)); + adev->job_hang = true; if (amdgpu_gpu_recovery && @@ -58,12 +58,16 @@ static enum drm_gpu_sched_stat amdgpu_job_timedout(struct drm_sched_job *s_job) goto exit; } - amdgpu_vm_get_task_info(ring->adev, job->pasid, &ti); DRM_ERROR("ring %s timeout, signaled seq=%u, emitted seq=%u\n", - job->base.sched->name, atomic_read(&ring->fence_drv.last_seq), - ring->fence_drv.sync_seq); - DRM_ERROR("Process information: process %s pid %d thread %s pid %d\n", - ti.process_name, ti.tgid, ti.task_name, ti.pid); + job->base.sched->name, atomic_read(&ring->fence_drv.last_seq), + ring->fence_drv.sync_seq); + + ti = amdgpu_vm_get_task_info_pasid(ring->adev, job->pasid); + if (ti) { + DRM_ERROR("Process information: process %s pid %d thread %s pid %d\n", + ti->process_name, ti->tgid, ti->task_name, ti->pid); + amdgpu_vm_put_task_info(ti); + } dma_fence_set_error(&s_job->s_fence->finished, -ETIME); @@ -300,12 +304,15 @@ static struct dma_fence *amdgpu_job_run(struct drm_sched_job *sched_job) dma_fence_set_error(finished, -ECANCELED); if (finished->error < 0) { - DRM_INFO("Skip scheduling IBs!\n"); + dev_dbg(adev->dev, "Skip scheduling IBs in ring(%s)", + ring->name); } else { r = amdgpu_ib_schedule(ring, job->num_ibs, job->ibs, job, &fence); if (r) - DRM_ERROR("Error scheduling IBs (%d)\n", r); + dev_err(adev->dev, + "Error scheduling IBs (%d) in ring(%s)", r, + ring->name); } job->job_run_counter++; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_jpeg.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_jpeg.c index 2ff2897fd1db..6df99cb00d9a 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_jpeg.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_jpeg.c @@ -36,10 +36,35 @@ static void amdgpu_jpeg_idle_work_handler(struct work_struct *work); int amdgpu_jpeg_sw_init(struct amdgpu_device *adev) { + int i, r; + INIT_DELAYED_WORK(&adev->jpeg.idle_work, amdgpu_jpeg_idle_work_handler); mutex_init(&adev->jpeg.jpeg_pg_lock); atomic_set(&adev->jpeg.total_submission_cnt, 0); + if ((adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) && + (adev->pg_flags & AMD_PG_SUPPORT_JPEG_DPG)) + adev->jpeg.indirect_sram = true; + + for (i = 0; i < adev->jpeg.num_jpeg_inst; i++) { + if (adev->jpeg.harvest_config & (1 << i)) + continue; + + if (adev->jpeg.indirect_sram) { + r = amdgpu_bo_create_kernel(adev, 64 * 2 * 4, PAGE_SIZE, + AMDGPU_GEM_DOMAIN_VRAM | + AMDGPU_GEM_DOMAIN_GTT, + &adev->jpeg.inst[i].dpg_sram_bo, + &adev->jpeg.inst[i].dpg_sram_gpu_addr, + &adev->jpeg.inst[i].dpg_sram_cpu_addr); + if (r) { + dev_err(adev->dev, + "JPEG %d (%d) failed to allocate DPG bo\n", i, r); + return r; + } + } + } + return 0; } @@ -51,6 +76,11 @@ int amdgpu_jpeg_sw_fini(struct amdgpu_device *adev) if (adev->jpeg.harvest_config & (1 << i)) continue; + amdgpu_bo_free_kernel( + &adev->jpeg.inst[i].dpg_sram_bo, + &adev->jpeg.inst[i].dpg_sram_gpu_addr, + (void **)&adev->jpeg.inst[i].dpg_sram_cpu_addr); + for (j = 0; j < adev->jpeg.num_jpeg_rings; ++j) amdgpu_ring_fini(&adev->jpeg.inst[i].ring_dec[j]); } @@ -210,12 +240,15 @@ int amdgpu_jpeg_dec_ring_test_ib(struct amdgpu_ring *ring, long timeout) } else { r = 0; } + if (!amdgpu_sriov_vf(adev)) { for (i = 0; i < adev->usec_timeout; i++) { tmp = RREG32(adev->jpeg.inst[ring->me].external.jpeg_pitch[ring->pipe]); if (tmp == 0xDEADBEEF) break; udelay(1); + if (amdgpu_emu_mode == 1) + udelay(10); } if (i >= adev->usec_timeout) @@ -296,3 +329,16 @@ int amdgpu_jpeg_ras_sw_init(struct amdgpu_device *adev) return 0; } + +int amdgpu_jpeg_psp_update_sram(struct amdgpu_device *adev, int inst_idx, + enum AMDGPU_UCODE_ID ucode_id) +{ + struct amdgpu_firmware_info ucode = { + .ucode_id = AMDGPU_UCODE_ID_JPEG_RAM, + .mc_addr = adev->jpeg.inst[inst_idx].dpg_sram_gpu_addr, + .ucode_size = ((uintptr_t)adev->jpeg.inst[inst_idx].dpg_sram_curr_addr - + (uintptr_t)adev->jpeg.inst[inst_idx].dpg_sram_cpu_addr), + }; + + return psp_execute_ip_fw_load(&adev->psp, &ucode); +} diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_jpeg.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_jpeg.h index ffe47e9f5bf2..aea31d61d991 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_jpeg.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_jpeg.h @@ -32,6 +32,34 @@ #define AMDGPU_JPEG_HARVEST_JPEG0 (1 << 0) #define AMDGPU_JPEG_HARVEST_JPEG1 (1 << 1) +#define WREG32_SOC15_JPEG_DPG_MODE(inst_idx, offset, value, indirect) \ + do { \ + if (!indirect) { \ + WREG32_SOC15(JPEG, GET_INST(JPEG, inst_idx), \ + mmUVD_DPG_LMA_DATA, value); \ + WREG32_SOC15( \ + JPEG, GET_INST(JPEG, inst_idx), \ + mmUVD_DPG_LMA_CTL, \ + (UVD_DPG_LMA_CTL__READ_WRITE_MASK | \ + offset << UVD_DPG_LMA_CTL__READ_WRITE_ADDR__SHIFT | \ + indirect << UVD_DPG_LMA_CTL__SRAM_SEL__SHIFT)); \ + } else { \ + *adev->jpeg.inst[inst_idx].dpg_sram_curr_addr++ = \ + offset; \ + *adev->jpeg.inst[inst_idx].dpg_sram_curr_addr++ = \ + value; \ + } \ + } while (0) + +#define RREG32_SOC15_JPEG_DPG_MODE(inst_idx, offset, mask_en) \ + ({ \ + WREG32_SOC15(JPEG, inst_idx, mmUVD_DPG_LMA_CTL, \ + (0x0 << UVD_DPG_LMA_CTL__READ_WRITE__SHIFT | \ + mask_en << UVD_DPG_LMA_CTL__MASK_EN__SHIFT | \ + offset << UVD_DPG_LMA_CTL__READ_WRITE_ADDR__SHIFT)); \ + RREG32_SOC15(JPEG, inst_idx, mmUVD_DPG_LMA_DATA); \ + }) + struct amdgpu_jpeg_reg{ unsigned jpeg_pitch[AMDGPU_MAX_JPEG_RINGS]; }; @@ -41,6 +69,11 @@ struct amdgpu_jpeg_inst { struct amdgpu_irq_src irq; struct amdgpu_irq_src ras_poison_irq; struct amdgpu_jpeg_reg external; + struct amdgpu_bo *dpg_sram_bo; + struct dpg_pause_state pause_state; + void *dpg_sram_cpu_addr; + uint64_t dpg_sram_gpu_addr; + uint32_t *dpg_sram_curr_addr; uint8_t aid_id; }; @@ -63,6 +96,7 @@ struct amdgpu_jpeg { uint16_t inst_mask; uint8_t num_inst_per_aid; + bool indirect_sram; }; int amdgpu_jpeg_sw_init(struct amdgpu_device *adev); @@ -82,5 +116,7 @@ int amdgpu_jpeg_process_poison_irq(struct amdgpu_device *adev, int amdgpu_jpeg_ras_late_init(struct amdgpu_device *adev, struct ras_common_if *ras_block); int amdgpu_jpeg_ras_sw_init(struct amdgpu_device *adev); +int amdgpu_jpeg_psp_update_sram(struct amdgpu_device *adev, int inst_idx, + enum AMDGPU_UCODE_ID ucode_id); #endif /*__AMDGPU_JPEG_H__*/ diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c index bf4f48fe438d..a0ea6fe8d060 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c @@ -149,38 +149,7 @@ int amdgpu_driver_load_kms(struct amdgpu_device *adev, unsigned long flags) goto out; } - adev->pm.rpm_mode = AMDGPU_RUNPM_NONE; - if (amdgpu_device_supports_px(dev) && - (amdgpu_runtime_pm != 0)) { /* enable PX as runtime mode */ - adev->pm.rpm_mode = AMDGPU_RUNPM_PX; - dev_info(adev->dev, "Using ATPX for runtime pm\n"); - } else if (amdgpu_device_supports_boco(dev) && - (amdgpu_runtime_pm != 0)) { /* enable boco as runtime mode */ - adev->pm.rpm_mode = AMDGPU_RUNPM_BOCO; - dev_info(adev->dev, "Using BOCO for runtime pm\n"); - } else if (amdgpu_device_supports_baco(dev) && - (amdgpu_runtime_pm != 0)) { - switch (adev->asic_type) { - case CHIP_VEGA20: - case CHIP_ARCTURUS: - /* enable BACO as runpm mode if runpm=1 */ - if (amdgpu_runtime_pm > 0) - adev->pm.rpm_mode = AMDGPU_RUNPM_BACO; - break; - case CHIP_VEGA10: - /* enable BACO as runpm mode if noretry=0 */ - if (!adev->gmc.noretry) - adev->pm.rpm_mode = AMDGPU_RUNPM_BACO; - break; - default: - /* enable BACO as runpm mode on CI+ */ - adev->pm.rpm_mode = AMDGPU_RUNPM_BACO; - break; - } - - if (adev->pm.rpm_mode == AMDGPU_RUNPM_BACO) - dev_info(adev->dev, "Using BACO for runtime pm\n"); - } + amdgpu_device_detect_runtime_pm_mode(adev); /* Call ACPI methods: require modeset init * but failure is not fatal @@ -894,14 +863,14 @@ int amdgpu_info_ioctl(struct drm_device *dev, void *data, struct drm_file *filp) dev_info->ids_flags |= AMDGPU_IDS_FLAGS_CONFORMANT_TRUNC_COORD; vm_size = adev->vm_manager.max_pfn * AMDGPU_GPU_PAGE_SIZE; - vm_size -= AMDGPU_VA_RESERVED_SIZE; + vm_size -= AMDGPU_VA_RESERVED_TOP; /* Older VCE FW versions are buggy and can handle only 40bits */ if (adev->vce.fw_version && adev->vce.fw_version < AMDGPU_VCE_FW_53_45) vm_size = min(vm_size, 1ULL << 40); - dev_info->virtual_address_offset = AMDGPU_VA_RESERVED_SIZE; + dev_info->virtual_address_offset = AMDGPU_VA_RESERVED_BOTTOM; dev_info->virtual_address_max = min(vm_size, AMDGPU_GMC_HOLE_START); @@ -1114,6 +1083,15 @@ int amdgpu_info_ioctl(struct drm_device *dev, void *data, struct drm_file *filp) } ui32 >>= 8; break; + case AMDGPU_INFO_SENSOR_GPU_INPUT_POWER: + /* get input GPU power */ + if (amdgpu_dpm_read_sensor(adev, + AMDGPU_PP_SENSOR_GPU_INPUT_POWER, + (void *)&ui32, &ui32_size)) { + return -EINVAL; + } + ui32 >>= 8; + break; case AMDGPU_INFO_SENSOR_VDDNB: /* get VDDNB in millivolts */ if (amdgpu_dpm_read_sensor(adev, @@ -1370,6 +1348,10 @@ int amdgpu_driver_open_kms(struct drm_device *dev, struct drm_file *file_priv) goto error_vm; } + r = amdgpu_seq64_map(adev, &fpriv->vm, &fpriv->seq64_va); + if (r) + goto error_vm; + mutex_init(&fpriv->bo_list_lock); idr_init_base(&fpriv->bo_list_handles, 1); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_mca.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_mca.c index 59fafb8392e0..0734490347db 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_mca.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_mca.c @@ -27,6 +27,16 @@ #include "umc/umc_6_7_0_offset.h" #include "umc/umc_6_7_0_sh_mask.h" +static bool amdgpu_mca_is_deferred_error(struct amdgpu_device *adev, + uint64_t mc_status) +{ + if (adev->umc.ras->check_ecc_err_status) + return adev->umc.ras->check_ecc_err_status(adev, + AMDGPU_MCA_ERROR_TYPE_DE, &mc_status); + + return false; +} + void amdgpu_mca_query_correctable_error_count(struct amdgpu_device *adev, uint64_t mc_status_addr, unsigned long *error_count) @@ -200,22 +210,26 @@ int amdgpu_mca_smu_set_debug_mode(struct amdgpu_device *adev, bool enable) return -EOPNOTSUPP; } -static void amdgpu_mca_smu_mca_bank_dump(struct amdgpu_device *adev, int idx, struct mca_bank_entry *entry) +static void amdgpu_mca_smu_mca_bank_dump(struct amdgpu_device *adev, int idx, struct mca_bank_entry *entry, + struct ras_query_context *qctx) { - dev_info(adev->dev, "[Hardware error] Accelerator Check Architecture events logged\n"); - dev_info(adev->dev, "[Hardware error] aca entry[%02d].STATUS=0x%016llx\n", - idx, entry->regs[MCA_REG_IDX_STATUS]); - dev_info(adev->dev, "[Hardware error] aca entry[%02d].ADDR=0x%016llx\n", - idx, entry->regs[MCA_REG_IDX_ADDR]); - dev_info(adev->dev, "[Hardware error] aca entry[%02d].MISC0=0x%016llx\n", - idx, entry->regs[MCA_REG_IDX_MISC0]); - dev_info(adev->dev, "[Hardware error] aca entry[%02d].IPID=0x%016llx\n", - idx, entry->regs[MCA_REG_IDX_IPID]); - dev_info(adev->dev, "[Hardware error] aca entry[%02d].SYND=0x%016llx\n", - idx, entry->regs[MCA_REG_IDX_SYND]); + u64 event_id = qctx->event_id; + + RAS_EVENT_LOG(adev, event_id, HW_ERR "Accelerator Check Architecture events logged\n"); + RAS_EVENT_LOG(adev, event_id, HW_ERR "aca entry[%02d].STATUS=0x%016llx\n", + idx, entry->regs[MCA_REG_IDX_STATUS]); + RAS_EVENT_LOG(adev, event_id, HW_ERR "aca entry[%02d].ADDR=0x%016llx\n", + idx, entry->regs[MCA_REG_IDX_ADDR]); + RAS_EVENT_LOG(adev, event_id, HW_ERR "aca entry[%02d].MISC0=0x%016llx\n", + idx, entry->regs[MCA_REG_IDX_MISC0]); + RAS_EVENT_LOG(adev, event_id, HW_ERR "aca entry[%02d].IPID=0x%016llx\n", + idx, entry->regs[MCA_REG_IDX_IPID]); + RAS_EVENT_LOG(adev, event_id, HW_ERR "aca entry[%02d].SYND=0x%016llx\n", + idx, entry->regs[MCA_REG_IDX_SYND]); } -int amdgpu_mca_smu_log_ras_error(struct amdgpu_device *adev, enum amdgpu_ras_block blk, enum amdgpu_mca_error_type type, struct ras_err_data *err_data) +int amdgpu_mca_smu_log_ras_error(struct amdgpu_device *adev, enum amdgpu_ras_block blk, enum amdgpu_mca_error_type type, + struct ras_err_data *err_data, struct ras_query_context *qctx) { struct amdgpu_smuio_mcm_config_info mcm_info; struct ras_err_addr err_addr = {0}; @@ -234,7 +248,7 @@ int amdgpu_mca_smu_log_ras_error(struct amdgpu_device *adev, enum amdgpu_ras_blo list_for_each_entry(node, &mca_set.list, node) { entry = &node->entry; - amdgpu_mca_smu_mca_bank_dump(adev, i++, entry); + amdgpu_mca_smu_mca_bank_dump(adev, i++, entry, qctx); count = 0; ret = amdgpu_mca_smu_parse_mca_error_count(adev, blk, type, entry, &count); @@ -256,9 +270,14 @@ int amdgpu_mca_smu_log_ras_error(struct amdgpu_device *adev, enum amdgpu_ras_blo if (type == AMDGPU_MCA_ERROR_TYPE_UE) amdgpu_ras_error_statistic_ue_count(err_data, &mcm_info, &err_addr, (uint64_t)count); - else - amdgpu_ras_error_statistic_ce_count(err_data, - &mcm_info, &err_addr, (uint64_t)count); + else { + if (amdgpu_mca_is_deferred_error(adev, entry->regs[MCA_REG_IDX_STATUS])) + amdgpu_ras_error_statistic_de_count(err_data, + &mcm_info, &err_addr, (uint64_t)count); + else + amdgpu_ras_error_statistic_ce_count(err_data, + &mcm_info, &err_addr, (uint64_t)count); + } } out_mca_release: diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_mca.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_mca.h index b399f1b62887..e5bf07ce3451 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_mca.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_mca.h @@ -65,6 +65,7 @@ enum amdgpu_mca_ip { enum amdgpu_mca_error_type { AMDGPU_MCA_ERROR_TYPE_UE = 0, AMDGPU_MCA_ERROR_TYPE_CE, + AMDGPU_MCA_ERROR_TYPE_DE, }; struct amdgpu_mca_ras_block { @@ -168,6 +169,7 @@ void amdgpu_mca_smu_debugfs_init(struct amdgpu_device *adev, struct dentry *root void amdgpu_mca_bank_set_init(struct mca_bank_set *mca_set); int amdgpu_mca_bank_set_add_entry(struct mca_bank_set *mca_set, struct mca_bank_entry *entry); void amdgpu_mca_bank_set_release(struct mca_bank_set *mca_set); -int amdgpu_mca_smu_log_ras_error(struct amdgpu_device *adev, enum amdgpu_ras_block blk, enum amdgpu_mca_error_type type, struct ras_err_data *err_data); +int amdgpu_mca_smu_log_ras_error(struct amdgpu_device *adev, enum amdgpu_ras_block blk, enum amdgpu_mca_error_type type, + struct ras_err_data *err_data, struct ras_query_context *qctx); #endif diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.c index da48b6da0107..5ca5c47ab54e 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.c @@ -32,6 +32,18 @@ #define AMDGPU_MES_MAX_NUM_OF_QUEUES_PER_PROCESS 1024 #define AMDGPU_ONE_DOORBELL_SIZE 8 +signed long amdgpu_mes_fence_wait_polling(u64 *fence, + u64 wait_seq, + signed long timeout) +{ + + while ((s64)(wait_seq - *fence) > 0 && timeout > 0) { + udelay(2); + timeout -= 2; + } + return timeout > 0 ? timeout : 0; +} + int amdgpu_mes_doorbell_process_slice(struct amdgpu_device *adev) { return roundup(AMDGPU_ONE_DOORBELL_SIZE * @@ -40,7 +52,6 @@ int amdgpu_mes_doorbell_process_slice(struct amdgpu_device *adev) } static int amdgpu_mes_kernel_doorbell_get(struct amdgpu_device *adev, - struct amdgpu_mes_process *process, int ip_type, uint64_t *doorbell_index) { unsigned int offset, found; @@ -65,7 +76,6 @@ static int amdgpu_mes_kernel_doorbell_get(struct amdgpu_device *adev, } static void amdgpu_mes_kernel_doorbell_free(struct amdgpu_device *adev, - struct amdgpu_mes_process *process, uint32_t doorbell_index) { unsigned int old, rel_index; @@ -102,7 +112,10 @@ static int amdgpu_mes_event_log_init(struct amdgpu_device *adev) { int r; - r = amdgpu_bo_create_kernel(adev, PAGE_SIZE, PAGE_SIZE, + if (!amdgpu_mes_log_enable) + return 0; + + r = amdgpu_bo_create_kernel(adev, AMDGPU_MES_LOG_BUFFER_SIZE, PAGE_SIZE, AMDGPU_GEM_DOMAIN_GTT, &adev->mes.event_log_gpu_obj, &adev->mes.event_log_gpu_addr, @@ -653,7 +666,7 @@ int amdgpu_mes_add_hw_queue(struct amdgpu_device *adev, int gang_id, *queue_id = queue->queue_id = r; /* allocate a doorbell index for the queue */ - r = amdgpu_mes_kernel_doorbell_get(adev, gang->process, + r = amdgpu_mes_kernel_doorbell_get(adev, qprops->queue_type, &qprops->doorbell_off); if (r) @@ -711,8 +724,7 @@ int amdgpu_mes_add_hw_queue(struct amdgpu_device *adev, int gang_id, return 0; clean_up_doorbell: - amdgpu_mes_kernel_doorbell_free(adev, gang->process, - qprops->doorbell_off); + amdgpu_mes_kernel_doorbell_free(adev, qprops->doorbell_off); clean_up_queue_id: spin_lock_irqsave(&adev->mes.queue_id_lock, flags); idr_remove(&adev->mes.queue_id_idr, queue->queue_id); @@ -766,8 +778,7 @@ int amdgpu_mes_remove_hw_queue(struct amdgpu_device *adev, int queue_id) queue_id); list_del(&queue->list); - amdgpu_mes_kernel_doorbell_free(adev, gang->process, - queue->doorbell_off); + amdgpu_mes_kernel_doorbell_free(adev, queue->doorbell_off); amdgpu_mes_unlock(&adev->mes); amdgpu_mes_queue_free_mqd(queue); @@ -775,6 +786,28 @@ int amdgpu_mes_remove_hw_queue(struct amdgpu_device *adev, int queue_id) return 0; } +int amdgpu_mes_map_legacy_queue(struct amdgpu_device *adev, + struct amdgpu_ring *ring) +{ + struct mes_map_legacy_queue_input queue_input; + int r; + + memset(&queue_input, 0, sizeof(queue_input)); + + queue_input.queue_type = ring->funcs->type; + queue_input.doorbell_offset = ring->doorbell_index; + queue_input.pipe_id = ring->pipe; + queue_input.queue_id = ring->queue; + queue_input.mqd_addr = amdgpu_bo_gpu_offset(ring->mqd_obj); + queue_input.wptr_addr = ring->wptr_gpu_addr; + + r = adev->mes.funcs->map_legacy_queue(&adev->mes, &queue_input); + if (r) + DRM_ERROR("failed to map legacy queue\n"); + + return r; +} + int amdgpu_mes_unmap_legacy_queue(struct amdgpu_device *adev, struct amdgpu_ring *ring, enum amdgpu_unmap_queues_action action, @@ -1129,6 +1162,7 @@ void amdgpu_mes_remove_ring(struct amdgpu_device *adev, return; amdgpu_mes_remove_hw_queue(adev, ring->hw_queue_id); + del_timer_sync(&ring->fence_drv.fallback_timer); amdgpu_ring_fini(ring); kfree(ring); } @@ -1398,7 +1432,7 @@ int amdgpu_mes_self_test(struct amdgpu_device *adev) goto error_fini; } - ctx_data.meta_data_gpu_addr = AMDGPU_VA_RESERVED_SIZE; + ctx_data.meta_data_gpu_addr = AMDGPU_VA_RESERVED_BOTTOM; r = amdgpu_mes_ctx_map_meta_data(adev, vm, &ctx_data); if (r) { DRM_ERROR("failed to map ctx meta data\n"); @@ -1471,7 +1505,7 @@ int amdgpu_mes_init_microcode(struct amdgpu_device *adev, int pipe) const struct mes_firmware_header_v1_0 *mes_hdr; struct amdgpu_firmware_info *info; char ucode_prefix[30]; - char fw_name[40]; + char fw_name[50]; bool need_retry = false; int r; @@ -1549,12 +1583,11 @@ static int amdgpu_debugfs_mes_event_log_show(struct seq_file *m, void *unused) uint32_t *mem = (uint32_t *)(adev->mes.event_log_cpu_addr); seq_hex_dump(m, "", DUMP_PREFIX_OFFSET, 32, 4, - mem, PAGE_SIZE, false); + mem, AMDGPU_MES_LOG_BUFFER_SIZE, false); return 0; } - DEFINE_SHOW_ATTRIBUTE(amdgpu_debugfs_mes_event_log); #endif @@ -1565,9 +1598,9 @@ void amdgpu_debugfs_mes_event_log_init(struct amdgpu_device *adev) #if defined(CONFIG_DEBUG_FS) struct drm_minor *minor = adev_to_drm(adev)->primary; struct dentry *root = minor->debugfs_root; - - debugfs_create_file("amdgpu_mes_event_log", 0444, root, - adev, &amdgpu_debugfs_mes_event_log_fops); + if (adev->enable_mes && amdgpu_mes_log_enable) + debugfs_create_file("amdgpu_mes_event_log", 0444, root, + adev, &amdgpu_debugfs_mes_event_log_fops); #endif } diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.h index 7d4f93fea937..df9f0404d842 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.h @@ -52,6 +52,7 @@ enum amdgpu_mes_priority_level { #define AMDGPU_MES_PROC_CTX_SIZE 0x1000 /* one page area */ #define AMDGPU_MES_GANG_CTX_SIZE 0x1000 /* one page area */ +#define AMDGPU_MES_LOG_BUFFER_SIZE 0x4000 /* Maximu log buffer size for MES */ struct amdgpu_mes_funcs; @@ -140,6 +141,12 @@ struct amdgpu_mes { /* ip specific functions */ const struct amdgpu_mes_funcs *funcs; + + /* mes resource_1 bo*/ + struct amdgpu_bo *resource_1; + uint64_t resource_1_gpu_addr; + void *resource_1_addr; + }; struct amdgpu_mes_process { @@ -241,6 +248,15 @@ struct mes_remove_queue_input { uint64_t gang_context_addr; }; +struct mes_map_legacy_queue_input { + uint32_t queue_type; + uint32_t doorbell_offset; + uint32_t pipe_id; + uint32_t queue_id; + uint64_t mqd_addr; + uint64_t wptr_addr; +}; + struct mes_unmap_legacy_queue_input { enum amdgpu_unmap_queues_action action; uint32_t queue_type; @@ -317,6 +333,9 @@ struct amdgpu_mes_funcs { int (*remove_hw_queue)(struct amdgpu_mes *mes, struct mes_remove_queue_input *input); + int (*map_legacy_queue)(struct amdgpu_mes *mes, + struct mes_map_legacy_queue_input *input); + int (*unmap_legacy_queue)(struct amdgpu_mes *mes, struct mes_unmap_legacy_queue_input *input); @@ -333,6 +352,10 @@ struct amdgpu_mes_funcs { #define amdgpu_mes_kiq_hw_init(adev) (adev)->mes.kiq_hw_init((adev)) #define amdgpu_mes_kiq_hw_fini(adev) (adev)->mes.kiq_hw_fini((adev)) +signed long amdgpu_mes_fence_wait_polling(u64 *fence, + u64 wait_seq, + signed long timeout); + int amdgpu_mes_ctx_get_offs(struct amdgpu_ring *ring, unsigned int id_offs); int amdgpu_mes_init_microcode(struct amdgpu_device *adev, int pipe); @@ -356,6 +379,8 @@ int amdgpu_mes_add_hw_queue(struct amdgpu_device *adev, int gang_id, int *queue_id); int amdgpu_mes_remove_hw_queue(struct amdgpu_device *adev, int queue_id); +int amdgpu_mes_map_legacy_queue(struct amdgpu_device *adev, + struct amdgpu_ring *ring); int amdgpu_mes_unmap_legacy_queue(struct amdgpu_device *adev, struct amdgpu_ring *ring, enum amdgpu_unmap_queues_action action, diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_mmhub.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_mmhub.h index 1ca9d4ed8063..95d676ee207f 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_mmhub.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_mmhub.h @@ -63,6 +63,8 @@ struct amdgpu_mmhub_funcs { uint64_t page_table_base); void (*update_power_gating)(struct amdgpu_device *adev, bool enable); + bool (*query_utcl2_poison_status)(struct amdgpu_device *adev, + int hub_inst); }; struct amdgpu_mmhub { diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_mode.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_mode.h index 2e4911050cc5..1fe21a70ddd0 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_mode.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_mode.h @@ -324,8 +324,6 @@ struct amdgpu_mode_info { struct drm_property *audio_property; /* FMT dithering */ struct drm_property *dither_property; - /* Adaptive Backlight Modulation (power feature) */ - struct drm_property *abm_level_property; /* hardcoded DFP edid from BIOS */ struct edid *bios_hardcoded_edid; int bios_hardcoded_edid_size; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_nbio.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_nbio.c index 51ca544a7094..d085687a47ea 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_nbio.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_nbio.c @@ -53,14 +53,6 @@ u64 amdgpu_nbio_get_pcie_replay_count(struct amdgpu_device *adev) return 0; } -void amdgpu_nbio_get_pcie_usage(struct amdgpu_device *adev, uint64_t *count0, - uint64_t *count1) -{ - if (adev->nbio.funcs->get_pcie_usage) - adev->nbio.funcs->get_pcie_usage(adev, count0, count1); - -} - int amdgpu_nbio_ras_late_init(struct amdgpu_device *adev, struct ras_common_if *ras_block) { int r; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_nbio.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_nbio.h index 65e35059de40..7b8c03be1d9e 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_nbio.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_nbio.h @@ -102,8 +102,6 @@ struct amdgpu_nbio_funcs { u32 (*get_memory_partition_mode)(struct amdgpu_device *adev, u32 *supp_modes); u64 (*get_pcie_replay_count)(struct amdgpu_device *adev); - void (*get_pcie_usage)(struct amdgpu_device *adev, uint64_t *count0, - uint64_t *count1); }; struct amdgpu_nbio { @@ -116,7 +114,6 @@ struct amdgpu_nbio { }; int amdgpu_nbio_ras_sw_init(struct amdgpu_device *adev); -void amdgpu_nbio_get_pcie_usage(struct amdgpu_device *adev, uint64_t *count0, uint64_t *count1); int amdgpu_nbio_ras_late_init(struct amdgpu_device *adev, struct ras_common_if *ras_block); u64 amdgpu_nbio_get_pcie_replay_count(struct amdgpu_device *adev); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c index 425cebcc5cbf..8d8c39be6129 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c @@ -39,6 +39,7 @@ #include "amdgpu.h" #include "amdgpu_trace.h" #include "amdgpu_amdkfd.h" +#include "amdgpu_vram_mgr.h" /** * DOC: amdgpu_object @@ -153,8 +154,10 @@ void amdgpu_bo_placement_from_domain(struct amdgpu_bo *abo, u32 domain) else places[c].flags |= TTM_PL_FLAG_TOPDOWN; - if (flags & AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS) + if (abo->tbo.type == ttm_bo_type_kernel && + flags & AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS) places[c].flags |= TTM_PL_FLAG_CONTIGUOUS; + c++; } @@ -173,6 +176,12 @@ void amdgpu_bo_placement_from_domain(struct amdgpu_bo *abo, u32 domain) abo->flags & AMDGPU_GEM_CREATE_PREEMPTIBLE ? AMDGPU_PL_PREEMPT : TTM_PL_TT; places[c].flags = 0; + /* + * When GTT is just an alternative to VRAM make sure that we + * only use it as fallback and still try to fill up VRAM first. + */ + if (domain & abo->preferred_domains & AMDGPU_GEM_DOMAIN_VRAM) + places[c].flags |= TTM_PL_FLAG_FALLBACK; c++; } @@ -220,9 +229,6 @@ void amdgpu_bo_placement_from_domain(struct amdgpu_bo *abo, u32 domain) placement->num_placement = c; placement->placement = places; - - placement->num_busy_placement = c; - placement->busy_placement = places; } /** @@ -598,8 +604,7 @@ int amdgpu_bo_create(struct amdgpu_device *adev, if (!amdgpu_bo_support_uswc(bo->flags)) bo->flags &= ~AMDGPU_GEM_CREATE_CPU_GTT_USWC; - if (adev->ras_enabled) - bo->flags |= AMDGPU_GEM_CREATE_VRAM_WIPE_ON_RELEASE; + bo->flags |= AMDGPU_GEM_CREATE_VRAM_WIPE_ON_RELEASE; bo->tbo.bdev = &adev->mman.bdev; if (bp->domain & (AMDGPU_GEM_DOMAIN_GWS | AMDGPU_GEM_DOMAIN_OA | @@ -608,6 +613,8 @@ int amdgpu_bo_create(struct amdgpu_device *adev, else amdgpu_bo_placement_from_domain(bo, bp->domain); if (bp->type == ttm_bo_type_kernel) + bo->tbo.priority = 2; + else if (!(bp->flags & AMDGPU_GEM_CREATE_DISCARDABLE)) bo->tbo.priority = 1; if (!bp->destroy) @@ -620,8 +627,7 @@ int amdgpu_bo_create(struct amdgpu_device *adev, return r; if (!amdgpu_gmc_vram_full_visible(&adev->gmc) && - bo->tbo.resource->mem_type == TTM_PL_VRAM && - amdgpu_bo_in_cpu_visible_vram(bo)) + amdgpu_res_cpu_visible(adev, bo->tbo.resource)) amdgpu_cs_report_moved_bytes(adev, ctx.bytes_moved, ctx.bytes_moved); else @@ -631,7 +637,7 @@ int amdgpu_bo_create(struct amdgpu_device *adev, bo->tbo.resource->mem_type == TTM_PL_VRAM) { struct dma_fence *fence; - r = amdgpu_fill_buffer(bo, 0, bo->tbo.base.resv, &fence, true); + r = amdgpu_ttm_clear_buffer(bo, bo->tbo.base.resv, &fence); if (unlikely(r)) goto fail_unreserve; @@ -761,7 +767,7 @@ int amdgpu_bo_restore_shadow(struct amdgpu_bo *shadow, struct dma_fence **fence) return amdgpu_copy_buffer(ring, shadow_addr, parent_addr, amdgpu_bo_size(shadow), NULL, fence, - true, false, false); + true, false, 0); } /** @@ -963,6 +969,10 @@ int amdgpu_bo_pin_restricted(struct amdgpu_bo *bo, u32 domain, if (!bo->placements[i].lpfn || (lpfn && lpfn < bo->placements[i].lpfn)) bo->placements[i].lpfn = lpfn; + + if (bo->flags & AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS && + bo->placements[i].mem_type == TTM_PL_VRAM) + bo->placements[i].flags |= TTM_PL_FLAG_CONTIGUOUS; } r = ttm_bo_validate(&bo->tbo, &bo->placement, &ctx); @@ -1245,14 +1255,18 @@ int amdgpu_bo_get_metadata(struct amdgpu_bo *bo, void *buffer, * amdgpu_bo_move_notify - notification about a memory move * @bo: pointer to a buffer object * @evict: if this move is evicting the buffer from the graphics address space + * @new_mem: new resource for backing the BO * * Marks the corresponding &amdgpu_bo buffer object as invalid, also performs * bookkeeping. * TTM driver callback which is called when ttm moves a buffer. */ -void amdgpu_bo_move_notify(struct ttm_buffer_object *bo, bool evict) +void amdgpu_bo_move_notify(struct ttm_buffer_object *bo, + bool evict, + struct ttm_resource *new_mem) { struct amdgpu_device *adev = amdgpu_ttm_adev(bo->bdev); + struct ttm_resource *old_mem = bo->resource; struct amdgpu_bo *abo; if (!amdgpu_bo_is_amdgpu_bo(bo)) @@ -1264,37 +1278,50 @@ void amdgpu_bo_move_notify(struct ttm_buffer_object *bo, bool evict) amdgpu_bo_kunmap(abo); if (abo->tbo.base.dma_buf && !abo->tbo.base.import_attach && - bo->resource->mem_type != TTM_PL_SYSTEM) + old_mem && old_mem->mem_type != TTM_PL_SYSTEM) dma_buf_move_notify(abo->tbo.base.dma_buf); - /* remember the eviction */ - if (evict) - atomic64_inc(&adev->num_evictions); + /* move_notify is called before move happens */ + trace_amdgpu_bo_move(abo, new_mem ? new_mem->mem_type : -1, + old_mem ? old_mem->mem_type : -1); } void amdgpu_bo_get_memory(struct amdgpu_bo *bo, struct amdgpu_mem_stats *stats) { + struct amdgpu_device *adev = amdgpu_ttm_adev(bo->tbo.bdev); + struct ttm_resource *res = bo->tbo.resource; uint64_t size = amdgpu_bo_size(bo); + struct drm_gem_object *obj; unsigned int domain; + bool shared; /* Abort if the BO doesn't currently have a backing store */ - if (!bo->tbo.resource) + if (!res) return; - domain = amdgpu_mem_type_to_domain(bo->tbo.resource->mem_type); + obj = &bo->tbo.base; + shared = drm_gem_object_is_shared_for_memory_stats(obj); + + domain = amdgpu_mem_type_to_domain(res->mem_type); switch (domain) { case AMDGPU_GEM_DOMAIN_VRAM: stats->vram += size; - if (amdgpu_bo_in_cpu_visible_vram(bo)) + if (amdgpu_res_cpu_visible(adev, bo->tbo.resource)) stats->visible_vram += size; + if (shared) + stats->vram_shared += size; break; case AMDGPU_GEM_DOMAIN_GTT: stats->gtt += size; + if (shared) + stats->gtt_shared += size; break; case AMDGPU_GEM_DOMAIN_CPU: default: stats->cpu += size; + if (shared) + stats->cpu_shared += size; break; } @@ -1351,8 +1378,9 @@ void amdgpu_bo_release_notify(struct ttm_buffer_object *bo) if (WARN_ON_ONCE(!dma_resv_trylock(bo->base.resv))) return; - r = amdgpu_fill_buffer(abo, AMDGPU_POISON, bo->base.resv, &fence, true); + r = amdgpu_fill_buffer(abo, 0, bo->base.resv, &fence, true); if (!WARN_ON(r)) { + amdgpu_vram_mgr_set_cleared(bo->resource); amdgpu_bo_fence(abo, fence, false); dma_fence_put(fence); } @@ -1381,10 +1409,7 @@ vm_fault_t amdgpu_bo_fault_reserve_notify(struct ttm_buffer_object *bo) /* Remember that this BO was accessed by the CPU */ abo->flags |= AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED; - if (bo->resource->mem_type != TTM_PL_VRAM) - return 0; - - if (amdgpu_bo_in_cpu_visible_vram(abo)) + if (amdgpu_res_cpu_visible(adev, bo->resource)) return 0; /* Can't move a pinned BO to visible VRAM */ @@ -1397,8 +1422,7 @@ vm_fault_t amdgpu_bo_fault_reserve_notify(struct ttm_buffer_object *bo) AMDGPU_GEM_DOMAIN_GTT); /* Avoid costly evictions; only set GTT as a busy placement */ - abo->placement.num_busy_placement = 1; - abo->placement.busy_placement = &abo->placements[1]; + abo->placements[0].flags |= TTM_PL_FLAG_DESIRED; r = ttm_bo_validate(bo, &abo->placement, &ctx); if (unlikely(r == -EBUSY || r == -ERESTARTSYS)) @@ -1408,7 +1432,7 @@ vm_fault_t amdgpu_bo_fault_reserve_notify(struct ttm_buffer_object *bo) /* this should never happen */ if (bo->resource->mem_type == TTM_PL_VRAM && - !amdgpu_bo_in_cpu_visible_vram(abo)) + !amdgpu_res_cpu_visible(adev, bo->resource)) return VM_FAULT_SIGBUS; ttm_bo_move_to_lru_tail_unlocked(bo); @@ -1572,6 +1596,7 @@ uint32_t amdgpu_bo_get_preferred_domain(struct amdgpu_device *adev, */ u64 amdgpu_bo_print_info(int id, struct amdgpu_bo *bo, struct seq_file *m) { + struct amdgpu_device *adev = amdgpu_ttm_adev(bo->tbo.bdev); struct dma_buf_attachment *attachment; struct dma_buf *dma_buf; const char *placement; @@ -1580,10 +1605,11 @@ u64 amdgpu_bo_print_info(int id, struct amdgpu_bo *bo, struct seq_file *m) if (dma_resv_trylock(bo->tbo.base.resv)) { unsigned int domain; + domain = amdgpu_mem_type_to_domain(bo->tbo.resource->mem_type); switch (domain) { case AMDGPU_GEM_DOMAIN_VRAM: - if (amdgpu_bo_in_cpu_visible_vram(bo)) + if (amdgpu_res_cpu_visible(adev, bo->tbo.resource)) placement = "VRAM VISIBLE"; else placement = "VRAM"; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h index a3ea8a82db23..bc42ccbde659 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h @@ -138,12 +138,18 @@ struct amdgpu_bo_vm { struct amdgpu_mem_stats { /* current VRAM usage, includes visible VRAM */ uint64_t vram; + /* current shared VRAM usage, includes visible VRAM */ + uint64_t vram_shared; /* current visible VRAM usage */ uint64_t visible_vram; /* current GTT usage */ uint64_t gtt; + /* current shared GTT usage */ + uint64_t gtt_shared; /* current system memory usage */ uint64_t cpu; + /* current shared system memory usage */ + uint64_t cpu_shared; /* sum of evicted buffers, includes visible VRAM */ uint64_t evicted_vram; /* sum of evicted buffers due to CPU access */ @@ -245,28 +251,6 @@ static inline u64 amdgpu_bo_mmap_offset(struct amdgpu_bo *bo) } /** - * amdgpu_bo_in_cpu_visible_vram - check if BO is (partly) in visible VRAM - */ -static inline bool amdgpu_bo_in_cpu_visible_vram(struct amdgpu_bo *bo) -{ - struct amdgpu_device *adev = amdgpu_ttm_adev(bo->tbo.bdev); - struct amdgpu_res_cursor cursor; - - if (!bo->tbo.resource || bo->tbo.resource->mem_type != TTM_PL_VRAM) - return false; - - amdgpu_res_first(bo->tbo.resource, 0, amdgpu_bo_size(bo), &cursor); - while (cursor.remaining) { - if (cursor.start < adev->gmc.visible_vram_size) - return true; - - amdgpu_res_next(&cursor, cursor.size); - } - - return false; -} - -/** * amdgpu_bo_explicit_sync - return whether the bo is explicitly synced */ static inline bool amdgpu_bo_explicit_sync(struct amdgpu_bo *bo) @@ -344,7 +328,9 @@ int amdgpu_bo_set_metadata (struct amdgpu_bo *bo, void *metadata, int amdgpu_bo_get_metadata(struct amdgpu_bo *bo, void *buffer, size_t buffer_size, uint32_t *metadata_size, uint64_t *flags); -void amdgpu_bo_move_notify(struct ttm_buffer_object *bo, bool evict); +void amdgpu_bo_move_notify(struct ttm_buffer_object *bo, + bool evict, + struct ttm_resource *new_mem); void amdgpu_bo_release_notify(struct ttm_buffer_object *bo); vm_fault_t amdgpu_bo_fault_reserve_notify(struct ttm_buffer_object *bo); void amdgpu_bo_fence(struct amdgpu_bo *bo, struct dma_fence *fence, diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c index 0328616473f8..4bd4602d11b1 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c @@ -38,6 +38,7 @@ #include "psp_v12_0.h" #include "psp_v13_0.h" #include "psp_v13_0_4.h" +#include "psp_v14_0.h" #include "amdgpu_ras.h" #include "amdgpu_securedisplay.h" @@ -162,20 +163,26 @@ static int psp_early_init(void *handle) struct amdgpu_device *adev = (struct amdgpu_device *)handle; struct psp_context *psp = &adev->psp; + psp->autoload_supported = true; + psp->boot_time_tmr = true; + switch (amdgpu_ip_version(adev, MP0_HWIP, 0)) { case IP_VERSION(9, 0, 0): psp_v3_1_set_psp_funcs(psp); psp->autoload_supported = false; + psp->boot_time_tmr = false; break; case IP_VERSION(10, 0, 0): case IP_VERSION(10, 0, 1): psp_v10_0_set_psp_funcs(psp); psp->autoload_supported = false; + psp->boot_time_tmr = false; break; case IP_VERSION(11, 0, 2): case IP_VERSION(11, 0, 4): psp_v11_0_set_psp_funcs(psp); psp->autoload_supported = false; + psp->boot_time_tmr = false; break; case IP_VERSION(11, 0, 0): case IP_VERSION(11, 0, 7): @@ -188,15 +195,20 @@ static int psp_early_init(void *handle) case IP_VERSION(11, 0, 12): case IP_VERSION(11, 0, 13): psp_v11_0_set_psp_funcs(psp); - psp->autoload_supported = true; + psp->boot_time_tmr = false; break; case IP_VERSION(11, 0, 3): case IP_VERSION(12, 0, 1): psp_v12_0_set_psp_funcs(psp); + psp->autoload_supported = false; + psp->boot_time_tmr = false; break; case IP_VERSION(13, 0, 2): + psp->boot_time_tmr = false; + fallthrough; case IP_VERSION(13, 0, 6): psp_v13_0_set_psp_funcs(psp); + psp->autoload_supported = false; break; case IP_VERSION(13, 0, 1): case IP_VERSION(13, 0, 3): @@ -204,25 +216,31 @@ static int psp_early_init(void *handle) case IP_VERSION(13, 0, 8): case IP_VERSION(13, 0, 11): case IP_VERSION(14, 0, 0): + case IP_VERSION(14, 0, 1): psp_v13_0_set_psp_funcs(psp); - psp->autoload_supported = true; + psp->boot_time_tmr = false; break; case IP_VERSION(11, 0, 8): if (adev->apu_flags & AMD_APU_IS_CYAN_SKILLFISH2) { psp_v11_0_8_set_psp_funcs(psp); - psp->autoload_supported = false; } + psp->autoload_supported = false; + psp->boot_time_tmr = false; break; case IP_VERSION(13, 0, 0): case IP_VERSION(13, 0, 7): case IP_VERSION(13, 0, 10): psp_v13_0_set_psp_funcs(psp); - psp->autoload_supported = true; adev->psp.sup_ifwi_up = !amdgpu_sriov_vf(adev); + psp->boot_time_tmr = false; break; case IP_VERSION(13, 0, 4): psp_v13_0_4_set_psp_funcs(psp); - psp->autoload_supported = true; + psp->boot_time_tmr = false; + break; + case IP_VERSION(14, 0, 2): + case IP_VERSION(14, 0, 3): + psp_v14_0_set_psp_funcs(psp); break; default: return -EINVAL; @@ -230,6 +248,8 @@ static int psp_early_init(void *handle) psp->adev = adev; + adev->psp_timeout = 20000; + psp_check_pmfw_centralized_cstate_management(psp); if (amdgpu_sriov_vf(adev)) @@ -291,21 +311,22 @@ static int psp_memory_training_init(struct psp_context *psp) struct psp_memory_training_context *ctx = &psp->mem_train_ctx; if (ctx->init != PSP_MEM_TRAIN_RESERVE_SUCCESS) { - DRM_DEBUG("memory training is not supported!\n"); + dev_dbg(psp->adev->dev, "memory training is not supported!\n"); return 0; } ctx->sys_cache = kzalloc(ctx->train_data_size, GFP_KERNEL); if (ctx->sys_cache == NULL) { - DRM_ERROR("alloc mem_train_ctx.sys_cache failed!\n"); + dev_err(psp->adev->dev, "alloc mem_train_ctx.sys_cache failed!\n"); ret = -ENOMEM; goto Err_out; } - DRM_DEBUG("train_data_size:%llx,p2c_train_data_offset:%llx,c2p_train_data_offset:%llx.\n", - ctx->train_data_size, - ctx->p2c_train_data_offset, - ctx->c2p_train_data_offset); + dev_dbg(psp->adev->dev, + "train_data_size:%llx,p2c_train_data_offset:%llx,c2p_train_data_offset:%llx.\n", + ctx->train_data_size, + ctx->p2c_train_data_offset, + ctx->c2p_train_data_offset); ctx->init = PSP_MEM_TRAIN_INIT_SUCCESS; return 0; @@ -407,7 +428,7 @@ static int psp_sw_init(void *handle) psp->cmd = kzalloc(sizeof(struct psp_gfx_cmd_resp), GFP_KERNEL); if (!psp->cmd) { - DRM_ERROR("Failed to allocate memory to command buffer!\n"); + dev_err(adev->dev, "Failed to allocate memory to command buffer!\n"); ret = -ENOMEM; } @@ -454,13 +475,13 @@ static int psp_sw_init(void *handle) if (mem_training_ctx->enable_mem_training) { ret = psp_memory_training_init(psp); if (ret) { - DRM_ERROR("Failed to initialize memory training!\n"); + dev_err(adev->dev, "Failed to initialize memory training!\n"); return ret; } ret = psp_mem_training(psp, PSP_MEM_TRAIN_COLD_BOOT); if (ret) { - DRM_ERROR("Failed to process memory training!\n"); + dev_err(adev->dev, "Failed to process memory training!\n"); return ret; } } @@ -626,7 +647,7 @@ psp_cmd_submit_buf(struct psp_context *psp, { int ret; int index; - int timeout = 20000; + int timeout = psp->adev->psp_timeout; bool ras_intr = false; bool skip_unsupport = false; @@ -675,9 +696,11 @@ psp_cmd_submit_buf(struct psp_context *psp, */ if (!skip_unsupport && (psp->cmd_buf_mem->resp.status || !timeout) && !ras_intr) { if (ucode) - DRM_WARN("failed to load ucode %s(0x%X) ", - amdgpu_ucode_name(ucode->ucode_id), ucode->ucode_id); - DRM_WARN("psp gfx command %s(0x%X) failed and response status is (0x%X)\n", + dev_warn(psp->adev->dev, + "failed to load ucode %s(0x%X) ", + amdgpu_ucode_name(ucode->ucode_id), ucode->ucode_id); + dev_warn(psp->adev->dev, + "psp gfx command %s(0x%X) failed and response status is (0x%X)\n", psp_gfx_cmd_name(psp->cmd_buf_mem->cmd_id), psp->cmd_buf_mem->cmd_id, psp->cmd_buf_mem->resp.status); /* If any firmware (including CAP) load fails under SRIOV, it should @@ -771,16 +794,6 @@ static int psp_load_toc(struct psp_context *psp, return ret; } -static bool psp_boottime_tmr(struct psp_context *psp) -{ - switch (amdgpu_ip_version(psp->adev, MP0_HWIP, 0)) { - case IP_VERSION(13, 0, 6): - return true; - default: - return false; - } -} - /* Set up Trusted Memory Region */ static int psp_tmr_init(struct psp_context *psp) { @@ -807,12 +820,12 @@ static int psp_tmr_init(struct psp_context *psp) psp->fw_pri_buf) { ret = psp_load_toc(psp, &tmr_size); if (ret) { - DRM_ERROR("Failed to load toc\n"); + dev_err(psp->adev->dev, "Failed to load toc\n"); return ret; } } - if (!psp->tmr_bo) { + if (!psp->tmr_bo && !psp->boot_time_tmr) { pptr = amdgpu_sriov_vf(psp->adev) ? &tmr_buf : NULL; ret = amdgpu_bo_create_kernel(psp->adev, tmr_size, PSP_TMR_ALIGNMENT, @@ -855,7 +868,7 @@ static int psp_tmr_load(struct psp_context *psp) psp_prep_tmr_cmd_buf(psp, cmd, psp->tmr_mc_addr, psp->tmr_bo); if (psp->tmr_bo) - DRM_INFO("reserve 0x%lx from 0x%llx for PSP TMR\n", + dev_info(psp->adev->dev, "reserve 0x%lx from 0x%llx for PSP TMR\n", amdgpu_bo_size(psp->tmr_bo), psp->tmr_mc_addr); ret = psp_cmd_submit_buf(psp, NULL, cmd, @@ -1040,6 +1053,11 @@ static int psp_asd_initialize(struct psp_context *psp) if (amdgpu_sriov_vf(psp->adev) || !psp->asd_context.bin_desc.size_bytes) return 0; + /* bypass asd if display hardware is not available */ + if (!amdgpu_device_has_display_hardware(psp->adev) && + amdgpu_ip_version(psp->adev, MP0_HWIP, 0) >= IP_VERSION(13, 0, 10)) + return 0; + psp->asd_context.mem_context.shared_mc_addr = 0; psp->asd_context.mem_context.shared_mem_size = PSP_ASD_SHARED_MEM_SIZE; psp->asd_context.ta_load_type = GFX_CMD_ID_LOAD_ASD; @@ -1113,7 +1131,7 @@ int psp_reg_program(struct psp_context *psp, enum psp_reg_prog_id reg, psp_prep_reg_prog_cmd_buf(cmd, reg, value); ret = psp_cmd_submit_buf(psp, NULL, cmd, psp->fence_buf_mc_addr); if (ret) - DRM_ERROR("PSP failed to program reg id %d", reg); + dev_err(psp->adev->dev, "PSP failed to program reg id %d\n", reg); release_psp_cmd_buf(psp); @@ -1526,22 +1544,22 @@ static void psp_ras_ta_check_status(struct psp_context *psp) switch (ras_cmd->ras_status) { case TA_RAS_STATUS__ERROR_UNSUPPORTED_IP: dev_warn(psp->adev->dev, - "RAS WARNING: cmd failed due to unsupported ip\n"); + "RAS WARNING: cmd failed due to unsupported ip\n"); break; case TA_RAS_STATUS__ERROR_UNSUPPORTED_ERROR_INJ: dev_warn(psp->adev->dev, - "RAS WARNING: cmd failed due to unsupported error injection\n"); + "RAS WARNING: cmd failed due to unsupported error injection\n"); break; case TA_RAS_STATUS__SUCCESS: break; case TA_RAS_STATUS__TEE_ERROR_ACCESS_DENIED: if (ras_cmd->cmd_id == TA_RAS_COMMAND__TRIGGER_ERROR) dev_warn(psp->adev->dev, - "RAS WARNING: Inject error to critical region is not allowed\n"); + "RAS WARNING: Inject error to critical region is not allowed\n"); break; default: dev_warn(psp->adev->dev, - "RAS WARNING: ras status = 0x%X\n", ras_cmd->ras_status); + "RAS WARNING: ras status = 0x%X\n", ras_cmd->ras_status); break; } } @@ -1565,7 +1583,7 @@ int psp_ras_invoke(struct psp_context *psp, uint32_t ta_cmd_id) return ret; if (ras_cmd->if_version > RAS_TA_HOST_IF_VER) { - DRM_WARN("RAS: Unsupported Interface"); + dev_warn(psp->adev->dev, "RAS: Unsupported Interface\n"); return -EINVAL; } @@ -1715,7 +1733,7 @@ int psp_ras_initialize(struct psp_context *psp) psp->ras_context.context.initialized = true; else { if (ras_cmd->ras_status) - dev_warn(psp->adev->dev, "RAS Init Status: 0x%X\n", ras_cmd->ras_status); + dev_warn(adev->dev, "RAS Init Status: 0x%X\n", ras_cmd->ras_status); /* fail to load RAS TA */ psp->ras_context.context.initialized = false; @@ -1779,6 +1797,31 @@ int psp_ras_trigger_error(struct psp_context *psp, return 0; } + +int psp_ras_query_address(struct psp_context *psp, + struct ta_ras_query_address_input *addr_in, + struct ta_ras_query_address_output *addr_out) +{ + struct ta_ras_shared_memory *ras_cmd; + int ret; + + if (!psp->ras_context.context.initialized) + return -EINVAL; + + ras_cmd = (struct ta_ras_shared_memory *)psp->ras_context.context.mem_context.shared_buf; + memset(ras_cmd, 0, sizeof(struct ta_ras_shared_memory)); + + ras_cmd->cmd_id = TA_RAS_COMMAND__QUERY_ADDRESS; + ras_cmd->ras_in_message.address = *addr_in; + + ret = psp_ras_invoke(psp, ras_cmd->cmd_id); + if (ret || ras_cmd->ras_status || psp->cmd_buf_mem->resp.status) + return -EINVAL; + + *addr_out = ras_cmd->ras_out_message.address; + + return 0; +} // ras end // HDCP start @@ -1792,6 +1835,10 @@ static int psp_hdcp_initialize(struct psp_context *psp) if (amdgpu_sriov_vf(psp->adev)) return 0; + /* bypass hdcp initialization if dmu is harvested */ + if (!amdgpu_device_has_display_hardware(psp->adev)) + return 0; + if (!psp->hdcp_context.context.bin_desc.size_bytes || !psp->hdcp_context.context.bin_desc.start_addr) { dev_info(psp->adev->dev, "HDCP: optional hdcp ta ucode is not available\n"); @@ -1824,6 +1871,9 @@ int psp_hdcp_invoke(struct psp_context *psp, uint32_t ta_cmd_id) if (amdgpu_sriov_vf(psp->adev)) return 0; + if (!psp->hdcp_context.context.initialized) + return 0; + return psp_ta_invoke(psp, ta_cmd_id, &psp->hdcp_context.context); } @@ -1859,6 +1909,10 @@ static int psp_dtm_initialize(struct psp_context *psp) if (amdgpu_sriov_vf(psp->adev)) return 0; + /* bypass dtm initialization if dmu is harvested */ + if (!amdgpu_device_has_display_hardware(psp->adev)) + return 0; + if (!psp->dtm_context.context.bin_desc.size_bytes || !psp->dtm_context.context.bin_desc.start_addr) { dev_info(psp->adev->dev, "DTM: optional dtm ta ucode is not available\n"); @@ -1891,6 +1945,9 @@ int psp_dtm_invoke(struct psp_context *psp, uint32_t ta_cmd_id) if (amdgpu_sriov_vf(psp->adev)) return 0; + if (!psp->dtm_context.context.initialized) + return 0; + return psp_ta_invoke(psp, ta_cmd_id, &psp->dtm_context.context); } @@ -2025,6 +2082,10 @@ static int psp_securedisplay_initialize(struct psp_context *psp) if (amdgpu_sriov_vf(psp->adev)) return 0; + /* bypass securedisplay initialization if dmu is harvested */ + if (!amdgpu_device_has_display_hardware(psp->adev)) + return 0; + if (!psp->securedisplay_context.context.bin_desc.size_bytes || !psp->securedisplay_context.context.bin_desc.start_addr) { dev_info(psp->adev->dev, "SECUREDISPLAY: securedisplay ta ucode is not available\n"); @@ -2125,19 +2186,14 @@ int amdgpu_psp_wait_for_bootloader(struct amdgpu_device *adev) return ret; } -int amdgpu_psp_query_boot_status(struct amdgpu_device *adev) +bool amdgpu_psp_get_ras_capability(struct psp_context *psp) { - struct psp_context *psp = &adev->psp; - int ret = 0; - - if (amdgpu_sriov_vf(adev) || (adev->flags & AMD_IS_APU)) - return 0; - if (psp->funcs && - psp->funcs->query_boot_status) - ret = psp->funcs->query_boot_status(psp); - - return ret; + psp->funcs->get_ras_capability) { + return psp->funcs->get_ras_capability(psp); + } else { + return false; + } } static int psp_hw_start(struct psp_context *psp) @@ -2150,7 +2206,7 @@ static int psp_hw_start(struct psp_context *psp) (psp->funcs->bootloader_load_kdb != NULL)) { ret = psp_bootloader_load_kdb(psp); if (ret) { - DRM_ERROR("PSP load kdb failed!\n"); + dev_err(adev->dev, "PSP load kdb failed!\n"); return ret; } } @@ -2159,7 +2215,7 @@ static int psp_hw_start(struct psp_context *psp) (psp->funcs->bootloader_load_spl != NULL)) { ret = psp_bootloader_load_spl(psp); if (ret) { - DRM_ERROR("PSP load spl failed!\n"); + dev_err(adev->dev, "PSP load spl failed!\n"); return ret; } } @@ -2168,7 +2224,7 @@ static int psp_hw_start(struct psp_context *psp) (psp->funcs->bootloader_load_sysdrv != NULL)) { ret = psp_bootloader_load_sysdrv(psp); if (ret) { - DRM_ERROR("PSP load sys drv failed!\n"); + dev_err(adev->dev, "PSP load sys drv failed!\n"); return ret; } } @@ -2177,7 +2233,7 @@ static int psp_hw_start(struct psp_context *psp) (psp->funcs->bootloader_load_soc_drv != NULL)) { ret = psp_bootloader_load_soc_drv(psp); if (ret) { - DRM_ERROR("PSP load soc drv failed!\n"); + dev_err(adev->dev, "PSP load soc drv failed!\n"); return ret; } } @@ -2186,7 +2242,7 @@ static int psp_hw_start(struct psp_context *psp) (psp->funcs->bootloader_load_intf_drv != NULL)) { ret = psp_bootloader_load_intf_drv(psp); if (ret) { - DRM_ERROR("PSP load intf drv failed!\n"); + dev_err(adev->dev, "PSP load intf drv failed!\n"); return ret; } } @@ -2195,7 +2251,7 @@ static int psp_hw_start(struct psp_context *psp) (psp->funcs->bootloader_load_dbg_drv != NULL)) { ret = psp_bootloader_load_dbg_drv(psp); if (ret) { - DRM_ERROR("PSP load dbg drv failed!\n"); + dev_err(adev->dev, "PSP load dbg drv failed!\n"); return ret; } } @@ -2204,7 +2260,16 @@ static int psp_hw_start(struct psp_context *psp) (psp->funcs->bootloader_load_ras_drv != NULL)) { ret = psp_bootloader_load_ras_drv(psp); if (ret) { - DRM_ERROR("PSP load ras_drv failed!\n"); + dev_err(adev->dev, "PSP load ras_drv failed!\n"); + return ret; + } + } + + if ((is_psp_fw_valid(psp->ipkeymgr_drv)) && + (psp->funcs->bootloader_load_ipkeymgr_drv != NULL)) { + ret = psp_bootloader_load_ipkeymgr_drv(psp); + if (ret) { + dev_err(adev->dev, "PSP load ipkeymgr_drv failed!\n"); return ret; } } @@ -2213,7 +2278,7 @@ static int psp_hw_start(struct psp_context *psp) (psp->funcs->bootloader_load_sos != NULL)) { ret = psp_bootloader_load_sos(psp); if (ret) { - DRM_ERROR("PSP load sos failed!\n"); + dev_err(adev->dev, "PSP load sos failed!\n"); return ret; } } @@ -2221,17 +2286,17 @@ static int psp_hw_start(struct psp_context *psp) ret = psp_ring_create(psp, PSP_RING_TYPE__KM); if (ret) { - DRM_ERROR("PSP create ring failed!\n"); + dev_err(adev->dev, "PSP create ring failed!\n"); return ret; } if (amdgpu_sriov_vf(adev) && amdgpu_in_reset(adev)) goto skip_pin_bo; - if (!psp_boottime_tmr(psp)) { + if (!psp->boot_time_tmr || psp->autoload_supported) { ret = psp_tmr_init(psp); if (ret) { - DRM_ERROR("PSP tmr init failed!\n"); + dev_err(adev->dev, "PSP tmr init failed!\n"); return ret; } } @@ -2248,10 +2313,12 @@ skip_pin_bo: return ret; } - ret = psp_tmr_load(psp); - if (ret) { - DRM_ERROR("PSP load tmr failed!\n"); - return ret; + if (!psp->boot_time_tmr || !psp->autoload_supported) { + ret = psp_tmr_load(psp); + if (ret) { + dev_err(adev->dev, "PSP load tmr failed!\n"); + return ret; + } } return 0; @@ -2462,6 +2529,9 @@ static int psp_get_fw_type(struct amdgpu_firmware_info *ucode, case AMDGPU_UCODE_ID_P2S_TABLE: *type = GFX_FW_TYPE_P2S_TABLE; break; + case AMDGPU_UCODE_ID_JPEG_RAM: + *type = GFX_FW_TYPE_JPEG_RAM; + break; case AMDGPU_UCODE_ID_MAXIMUM: default: return -EINVAL; @@ -2518,7 +2588,8 @@ static void psp_print_fw_hdr(struct psp_context *psp, } } -static int psp_prep_load_ip_fw_cmd_buf(struct amdgpu_firmware_info *ucode, +static int psp_prep_load_ip_fw_cmd_buf(struct psp_context *psp, + struct amdgpu_firmware_info *ucode, struct psp_gfx_cmd_resp *cmd) { int ret; @@ -2531,7 +2602,7 @@ static int psp_prep_load_ip_fw_cmd_buf(struct amdgpu_firmware_info *ucode, ret = psp_get_fw_type(ucode, &cmd->cmd.cmd_load_ip_fw.fw_type); if (ret) - DRM_ERROR("Unknown firmware type\n"); + dev_err(psp->adev->dev, "Unknown firmware type\n"); return ret; } @@ -2542,7 +2613,7 @@ int psp_execute_ip_fw_load(struct psp_context *psp, int ret = 0; struct psp_gfx_cmd_resp *cmd = acquire_psp_cmd_buf(psp); - ret = psp_prep_load_ip_fw_cmd_buf(ucode, cmd); + ret = psp_prep_load_ip_fw_cmd_buf(psp, ucode, cmd); if (!ret) { ret = psp_cmd_submit_buf(psp, ucode, cmd, psp->fence_buf_mc_addr); @@ -2560,7 +2631,8 @@ static int psp_load_p2s_table(struct psp_context *psp) struct amdgpu_firmware_info *ucode = &adev->firmware.ucode[AMDGPU_UCODE_ID_P2S_TABLE]; - if (adev->in_runpm && (adev->pm.rpm_mode == AMDGPU_RUNPM_BACO)) + if (adev->in_runpm && ((adev->pm.rpm_mode == AMDGPU_RUNPM_BACO) || + (adev->pm.rpm_mode == AMDGPU_RUNPM_BAMACO))) return 0; if (amdgpu_ip_version(adev, MP0_HWIP, 0) == IP_VERSION(13, 0, 6)) { @@ -2590,7 +2662,8 @@ static int psp_load_smu_fw(struct psp_context *psp) * Skip SMU FW reloading in case of using BACO for runpm only, * as SMU is always alive. */ - if (adev->in_runpm && (adev->pm.rpm_mode == AMDGPU_RUNPM_BACO)) + if (adev->in_runpm && ((adev->pm.rpm_mode == AMDGPU_RUNPM_BACO) || + (adev->pm.rpm_mode == AMDGPU_RUNPM_BAMACO))) return 0; if (!ucode->fw || amdgpu_sriov_vf(psp->adev)) @@ -2601,13 +2674,13 @@ static int psp_load_smu_fw(struct psp_context *psp) amdgpu_ip_version(adev, MP0_HWIP, 0) == IP_VERSION(11, 0, 2)))) { ret = amdgpu_dpm_set_mp1_state(adev, PP_MP1_STATE_UNLOAD); if (ret) - DRM_WARN("Failed to set MP1 state prepare for reload\n"); + dev_err(adev->dev, "Failed to set MP1 state prepare for reload\n"); } ret = psp_execute_ip_fw_load(psp, ucode); if (ret) - DRM_ERROR("PSP load smu failed!\n"); + dev_err(adev->dev, "PSP load smu failed!\n"); return ret; } @@ -2712,7 +2785,7 @@ static int psp_load_non_psp_fw(struct psp_context *psp) adev->virt.autoload_ucode_id : AMDGPU_UCODE_ID_RLC_G)) { ret = psp_rlc_autoload_start(psp); if (ret) { - DRM_ERROR("Failed to start rlc autoload\n"); + dev_err(adev->dev, "Failed to start rlc autoload\n"); return ret; } } @@ -2734,7 +2807,7 @@ static int psp_load_fw(struct amdgpu_device *adev) ret = psp_ring_init(psp, PSP_RING_TYPE__KM); if (ret) { - DRM_ERROR("PSP ring init failed!\n"); + dev_err(adev->dev, "PSP ring init failed!\n"); goto failed; } } @@ -2749,13 +2822,13 @@ static int psp_load_fw(struct amdgpu_device *adev) ret = psp_asd_initialize(psp); if (ret) { - DRM_ERROR("PSP load asd failed!\n"); + dev_err(adev->dev, "PSP load asd failed!\n"); goto failed1; } ret = psp_rl_load(adev); if (ret) { - DRM_ERROR("PSP load RL failed!\n"); + dev_err(adev->dev, "PSP load RL failed!\n"); goto failed1; } @@ -2775,7 +2848,7 @@ static int psp_load_fw(struct amdgpu_device *adev) ret = psp_ras_initialize(psp); if (ret) dev_err(psp->adev->dev, - "RAS: Failed to initialize RAS\n"); + "RAS: Failed to initialize RAS\n"); ret = psp_hdcp_initialize(psp); if (ret) @@ -2828,7 +2901,7 @@ static int psp_hw_init(void *handle) ret = psp_load_fw(adev); if (ret) { - DRM_ERROR("PSP firmware loading failed\n"); + dev_err(adev->dev, "PSP firmware loading failed\n"); goto failed; } @@ -2875,7 +2948,7 @@ static int psp_suspend(void *handle) psp->xgmi_context.context.initialized) { ret = psp_xgmi_terminate(psp); if (ret) { - DRM_ERROR("Failed to terminate xgmi ta\n"); + dev_err(adev->dev, "Failed to terminate xgmi ta\n"); goto out; } } @@ -2883,46 +2956,46 @@ static int psp_suspend(void *handle) if (psp->ta_fw) { ret = psp_ras_terminate(psp); if (ret) { - DRM_ERROR("Failed to terminate ras ta\n"); + dev_err(adev->dev, "Failed to terminate ras ta\n"); goto out; } ret = psp_hdcp_terminate(psp); if (ret) { - DRM_ERROR("Failed to terminate hdcp ta\n"); + dev_err(adev->dev, "Failed to terminate hdcp ta\n"); goto out; } ret = psp_dtm_terminate(psp); if (ret) { - DRM_ERROR("Failed to terminate dtm ta\n"); + dev_err(adev->dev, "Failed to terminate dtm ta\n"); goto out; } ret = psp_rap_terminate(psp); if (ret) { - DRM_ERROR("Failed to terminate rap ta\n"); + dev_err(adev->dev, "Failed to terminate rap ta\n"); goto out; } ret = psp_securedisplay_terminate(psp); if (ret) { - DRM_ERROR("Failed to terminate securedisplay ta\n"); + dev_err(adev->dev, "Failed to terminate securedisplay ta\n"); goto out; } } ret = psp_asd_terminate(psp); if (ret) { - DRM_ERROR("Failed to terminate asd\n"); + dev_err(adev->dev, "Failed to terminate asd\n"); goto out; } ret = psp_tmr_terminate(psp); if (ret) { - DRM_ERROR("Failed to terminate tmr\n"); + dev_err(adev->dev, "Failed to terminate tmr\n"); goto out; } ret = psp_ring_stop(psp, PSP_RING_TYPE__KM); if (ret) - DRM_ERROR("PSP ring stop failed\n"); + dev_err(adev->dev, "PSP ring stop failed\n"); out: return ret; @@ -2934,12 +3007,12 @@ static int psp_resume(void *handle) struct amdgpu_device *adev = (struct amdgpu_device *)handle; struct psp_context *psp = &adev->psp; - DRM_INFO("PSP is resuming...\n"); + dev_info(adev->dev, "PSP is resuming...\n"); if (psp->mem_train_ctx.enable_mem_training) { ret = psp_mem_training(psp, PSP_MEM_TRAIN_RESUME); if (ret) { - DRM_ERROR("Failed to process memory training!\n"); + dev_err(adev->dev, "Failed to process memory training!\n"); return ret; } } @@ -2956,7 +3029,7 @@ static int psp_resume(void *handle) ret = psp_asd_initialize(psp); if (ret) { - DRM_ERROR("PSP load asd failed!\n"); + dev_err(adev->dev, "PSP load asd failed!\n"); goto failed; } @@ -2980,7 +3053,7 @@ static int psp_resume(void *handle) ret = psp_ras_initialize(psp); if (ret) dev_err(psp->adev->dev, - "RAS: Failed to initialize RAS\n"); + "RAS: Failed to initialize RAS\n"); ret = psp_hdcp_initialize(psp); if (ret) @@ -3008,7 +3081,7 @@ static int psp_resume(void *handle) return 0; failed: - DRM_ERROR("PSP resume failed\n"); + dev_err(adev->dev, "PSP resume failed\n"); mutex_unlock(&adev->firmware.mutex); return ret; } @@ -3069,9 +3142,11 @@ int psp_ring_cmd_submit(struct psp_context *psp, write_frame = ring_buffer_start + (psp_write_ptr_reg / rb_frame_size_dw); /* Check invalid write_frame ptr address */ if ((write_frame < ring_buffer_start) || (ring_buffer_end < write_frame)) { - DRM_ERROR("ring_buffer_start = %p; ring_buffer_end = %p; write_frame = %p\n", - ring_buffer_start, ring_buffer_end, write_frame); - DRM_ERROR("write_frame is pointing to address out of bounds\n"); + dev_err(adev->dev, + "ring_buffer_start = %p; ring_buffer_end = %p; write_frame = %p\n", + ring_buffer_start, ring_buffer_end, write_frame); + dev_err(adev->dev, + "write_frame is pointing to address out of bounds\n"); return -EINVAL; } @@ -3214,6 +3289,12 @@ static int parse_sos_bin_descriptor(struct psp_context *psp, psp->ras_drv.size_bytes = le32_to_cpu(desc->size_bytes); psp->ras_drv.start_addr = ucode_start_addr; break; + case PSP_FW_TYPE_PSP_IPKEYMGR_DRV: + psp->ipkeymgr_drv.fw_version = le32_to_cpu(desc->fw_version); + psp->ipkeymgr_drv.feature_version = le32_to_cpu(desc->fw_version); + psp->ipkeymgr_drv.size_bytes = le32_to_cpu(desc->size_bytes); + psp->ipkeymgr_drv.start_addr = ucode_start_addr; + break; default: dev_warn(psp->adev->dev, "Unsupported PSP FW type: %d\n", desc->fw_type); break; @@ -3597,7 +3678,7 @@ static ssize_t psp_usbc_pd_fw_sysfs_read(struct device *dev, int ret; if (!adev->ip_blocks[AMD_IP_BLOCK_TYPE_PSP].status.late_initialized) { - DRM_INFO("PSP block is not ready yet."); + dev_info(adev->dev, "PSP block is not ready yet\n."); return -EBUSY; } @@ -3606,7 +3687,7 @@ static ssize_t psp_usbc_pd_fw_sysfs_read(struct device *dev, mutex_unlock(&adev->psp.mutex); if (ret) { - DRM_ERROR("Failed to read USBC PD FW, err = %d", ret); + dev_err(adev->dev, "Failed to read USBC PD FW, err = %d\n", ret); return ret; } @@ -3628,7 +3709,7 @@ static ssize_t psp_usbc_pd_fw_sysfs_write(struct device *dev, void *fw_pri_cpu_addr; if (!adev->ip_blocks[AMD_IP_BLOCK_TYPE_PSP].status.late_initialized) { - DRM_INFO("PSP block is not ready yet."); + dev_err(adev->dev, "PSP block is not ready yet."); return -EBUSY; } @@ -3661,7 +3742,7 @@ rel_buf: release_firmware(usbc_pd_fw); fail: if (ret) { - DRM_ERROR("Failed to load USBC PD FW, err = %d", ret); + dev_err(adev->dev, "Failed to load USBC PD FW, err = %d", ret); count = ret; } @@ -3708,7 +3789,7 @@ static ssize_t amdgpu_psp_vbflash_write(struct file *filp, struct kobject *kobj, /* Safeguard against memory drain */ if (adev->psp.vbflash_image_size > AMD_VBIOS_FILE_MAX_SIZE_B) { - dev_err(adev->dev, "File size cannot exceed %u", AMD_VBIOS_FILE_MAX_SIZE_B); + dev_err(adev->dev, "File size cannot exceed %u\n", AMD_VBIOS_FILE_MAX_SIZE_B); kvfree(adev->psp.vbflash_tmp_buf); adev->psp.vbflash_tmp_buf = NULL; adev->psp.vbflash_image_size = 0; @@ -3727,7 +3808,7 @@ static ssize_t amdgpu_psp_vbflash_write(struct file *filp, struct kobject *kobj, adev->psp.vbflash_image_size += count; mutex_unlock(&adev->psp.mutex); - dev_dbg(adev->dev, "IFWI staged for update"); + dev_dbg(adev->dev, "IFWI staged for update\n"); return count; } @@ -3747,7 +3828,7 @@ static ssize_t amdgpu_psp_vbflash_read(struct file *filp, struct kobject *kobj, if (adev->psp.vbflash_image_size == 0) return -EINVAL; - dev_dbg(adev->dev, "PSP IFWI flash process initiated"); + dev_dbg(adev->dev, "PSP IFWI flash process initiated\n"); ret = amdgpu_bo_create_kernel(adev, adev->psp.vbflash_image_size, AMDGPU_GPU_PAGE_SIZE, @@ -3772,11 +3853,11 @@ rel_buf: adev->psp.vbflash_image_size = 0; if (ret) { - dev_err(adev->dev, "Failed to load IFWI, err = %d", ret); + dev_err(adev->dev, "Failed to load IFWI, err = %d\n", ret); return ret; } - dev_dbg(adev->dev, "PSP IFWI flash process done"); + dev_dbg(adev->dev, "PSP IFWI flash process done\n"); return 0; } @@ -3930,3 +4011,11 @@ const struct amdgpu_ip_block_version psp_v13_0_4_ip_block = { .rev = 4, .funcs = &psp_ip_funcs, }; + +const struct amdgpu_ip_block_version psp_v14_0_ip_block = { + .type = AMD_IP_BLOCK_TYPE_PSP, + .major = 14, + .minor = 0, + .rev = 0, + .funcs = &psp_ip_funcs, +}; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.h index c4d9cbde55b9..3635303e6548 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.h @@ -73,8 +73,10 @@ enum psp_bootloader_cmd { PSP_BL__LOAD_KEY_DATABASE = 0x80000, PSP_BL__LOAD_SOCDRV = 0xB0000, PSP_BL__LOAD_DBGDRV = 0xC0000, + PSP_BL__LOAD_HADDRV = PSP_BL__LOAD_DBGDRV, PSP_BL__LOAD_INTFDRV = 0xD0000, - PSP_BL__LOAD_RASDRV = 0xE0000, + PSP_BL__LOAD_RASDRV = 0xE0000, + PSP_BL__LOAD_IPKEYMGRDRV = 0xF0000, PSP_BL__DRAM_LONG_TRAIN = 0x100000, PSP_BL__DRAM_SHORT_TRAIN = 0x200000, PSP_BL__LOAD_TOS_SPL_TABLE = 0x10000000, @@ -117,6 +119,7 @@ struct psp_funcs { int (*bootloader_load_intf_drv)(struct psp_context *psp); int (*bootloader_load_dbg_drv)(struct psp_context *psp); int (*bootloader_load_ras_drv)(struct psp_context *psp); + int (*bootloader_load_ipkeymgr_drv)(struct psp_context *psp); int (*bootloader_load_sos)(struct psp_context *psp); int (*ring_create)(struct psp_context *psp, enum psp_ring_type ring_type); @@ -134,7 +137,7 @@ struct psp_funcs { int (*update_spirom)(struct psp_context *psp, uint64_t fw_pri_mc_addr); int (*vbflash_stat)(struct psp_context *psp); int (*fatal_error_recovery_quirk)(struct psp_context *psp); - int (*query_boot_status)(struct psp_context *psp); + bool (*get_ras_capability)(struct psp_context *psp); }; struct ta_funcs { @@ -203,7 +206,7 @@ struct psp_ras_context { #define GDDR6_MEM_TRAINING_DATA_SIZE_IN_BYTES 0x1000 #define GDDR6_MEM_TRAINING_OFFSET 0x8000 /*Define the VRAM size that will be encroached by BIST training.*/ -#define GDDR6_MEM_TRAINING_ENCROACHED_SIZE 0x2000000 +#define BIST_MEM_TRAINING_ENCROACHED_SIZE 0x2000000 enum psp_memory_training_init_flag { PSP_MEM_TRAIN_NOT_SUPPORT = 0x0, @@ -336,6 +339,7 @@ struct psp_context { struct psp_bin_desc intf_drv; struct psp_bin_desc dbg_drv; struct psp_bin_desc ras_drv; + struct psp_bin_desc ipkeymgr_drv; /* tmr buffer */ struct amdgpu_bo *tmr_bo; @@ -364,6 +368,8 @@ struct psp_context { atomic_t fence_value; /* flag to mark whether gfx fw autoload is supported or not */ bool autoload_supported; + /* flag to mark whether psp use runtime TMR or boottime TMR */ + bool boot_time_tmr; /* flag to mark whether df cstate management centralized to PMFW */ bool pmfw_centralized_cstate_management; @@ -422,6 +428,9 @@ struct amdgpu_psp_funcs { #define psp_bootloader_load_ras_drv(psp) \ ((psp)->funcs->bootloader_load_ras_drv ? \ (psp)->funcs->bootloader_load_ras_drv((psp)) : 0) +#define psp_bootloader_load_ipkeymgr_drv(psp) \ + ((psp)->funcs->bootloader_load_ipkeymgr_drv ? \ + (psp)->funcs->bootloader_load_ipkeymgr_drv((psp)) : 0) #define psp_bootloader_load_sos(psp) \ ((psp)->funcs->bootloader_load_sos ? (psp)->funcs->bootloader_load_sos((psp)) : 0) #define psp_smu_reload_quirk(psp) \ @@ -463,6 +472,7 @@ extern const struct amdgpu_ip_block_version psp_v11_0_8_ip_block; extern const struct amdgpu_ip_block_version psp_v12_0_ip_block; extern const struct amdgpu_ip_block_version psp_v13_0_ip_block; extern const struct amdgpu_ip_block_version psp_v13_0_4_ip_block; +extern const struct amdgpu_ip_block_version psp_v14_0_ip_block; extern int psp_wait_for(struct psp_context *psp, uint32_t reg_index, uint32_t field_val, uint32_t mask, bool check_changed); @@ -502,6 +512,9 @@ int psp_ras_enable_features(struct psp_context *psp, int psp_ras_trigger_error(struct psp_context *psp, struct ta_ras_trigger_error_input *info, uint32_t instance_mask); int psp_ras_terminate(struct psp_context *psp); +int psp_ras_query_address(struct psp_context *psp, + struct ta_ras_query_address_input *addr_in, + struct ta_ras_query_address_output *addr_out); int psp_hdcp_invoke(struct psp_context *psp, uint32_t ta_cmd_id); int psp_dtm_invoke(struct psp_context *psp, uint32_t ta_cmd_id); @@ -538,7 +551,5 @@ int psp_spatial_partition(struct psp_context *psp, int mode); int is_psp_fw_valid(struct psp_bin_desc bin); int amdgpu_psp_wait_for_bootloader(struct amdgpu_device *adev); - -int amdgpu_psp_query_boot_status(struct amdgpu_device *adev); - +bool amdgpu_psp_get_ras_capability(struct psp_context *psp); #endif diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c index 31823a30dea2..1adc81a55734 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c @@ -39,6 +39,7 @@ #include "nbio_v7_9.h" #include "atom.h" #include "amdgpu_reset.h" +#include "amdgpu_psp.h" #ifdef CONFIG_X86_MCE_AMD #include <asm/mce.h> @@ -73,6 +74,8 @@ const char *ras_block_string[] = { "mca", "vcn", "jpeg", + "ih", + "mpio", }; const char *ras_mca_block_string[] = { @@ -94,7 +97,8 @@ const char *get_ras_block_str(struct ras_common_if *ras_block) if (!ras_block) return "NULL"; - if (ras_block->block >= AMDGPU_RAS_BLOCK_COUNT) + if (ras_block->block >= AMDGPU_RAS_BLOCK_COUNT || + ras_block->block >= ARRAY_SIZE(ras_block_string)) return "OUT OF RANGE"; if (ras_block->block == AMDGPU_RAS_BLOCK__MCA) @@ -116,6 +120,10 @@ const char *get_ras_block_str(struct ras_common_if *ras_block) /* typical ECC bad page rate is 1 bad page per 100MB VRAM */ #define RAS_BAD_PAGE_COVER (100 * 1024 * 1024ULL) +#define MAX_UMC_POISON_POLLING_TIME_ASYNC 100 //ms + +#define AMDGPU_RAS_RETIRE_PAGE_INTERVAL 100 //ms + enum amdgpu_ras_retire_page_reservation { AMDGPU_RAS_RETIRE_PAGE_RESERVED, AMDGPU_RAS_RETIRE_PAGE_PENDING, @@ -628,8 +636,12 @@ static ssize_t amdgpu_ras_sysfs_read(struct device *dev, dev_warn(obj->adev->dev, "Failed to reset error counter and error status"); } - return sysfs_emit(buf, "%s: %lu\n%s: %lu\n", "ue", info.ue_count, - "ce", info.ce_count); + if (info.head.block == AMDGPU_RAS_BLOCK__UMC) + return sysfs_emit(buf, "%s: %lu\n%s: %lu\n%s: %lu\n", "ue", info.ue_count, + "ce", info.ce_count, "de", info.de_count); + else + return sysfs_emit(buf, "%s: %lu\n%s: %lu\n", "ue", info.ue_count, + "ce", info.ce_count); } /* obj begin */ @@ -1035,55 +1047,83 @@ static void amdgpu_ras_get_ecc_info(struct amdgpu_device *adev, struct ras_err_d static void amdgpu_ras_error_print_error_data(struct amdgpu_device *adev, struct ras_manager *ras_mgr, struct ras_err_data *err_data, + struct ras_query_context *qctx, const char *blk_name, - bool is_ue) + bool is_ue, + bool is_de) { struct amdgpu_smuio_mcm_config_info *mcm_info; struct ras_err_node *err_node; struct ras_err_info *err_info; + u64 event_id = qctx->event_id; if (is_ue) { for_each_ras_error(err_node, err_data) { err_info = &err_node->err_info; mcm_info = &err_info->mcm_info; if (err_info->ue_count) { - dev_info(adev->dev, "socket: %d, die: %d, " - "%lld new uncorrectable hardware errors detected in %s block\n", - mcm_info->socket_id, - mcm_info->die_id, - err_info->ue_count, - blk_name); + RAS_EVENT_LOG(adev, event_id, "socket: %d, die: %d, " + "%lld new uncorrectable hardware errors detected in %s block\n", + mcm_info->socket_id, + mcm_info->die_id, + err_info->ue_count, + blk_name); } } for_each_ras_error(err_node, &ras_mgr->err_data) { err_info = &err_node->err_info; mcm_info = &err_info->mcm_info; - dev_info(adev->dev, "socket: %d, die: %d, " - "%lld uncorrectable hardware errors detected in total in %s block\n", - mcm_info->socket_id, mcm_info->die_id, err_info->ue_count, blk_name); + RAS_EVENT_LOG(adev, event_id, "socket: %d, die: %d, " + "%lld uncorrectable hardware errors detected in total in %s block\n", + mcm_info->socket_id, mcm_info->die_id, err_info->ue_count, blk_name); } } else { - for_each_ras_error(err_node, err_data) { - err_info = &err_node->err_info; - mcm_info = &err_info->mcm_info; - if (err_info->ce_count) { - dev_info(adev->dev, "socket: %d, die: %d, " - "%lld new correctable hardware errors detected in %s block\n", - mcm_info->socket_id, - mcm_info->die_id, - err_info->ce_count, - blk_name); + if (is_de) { + for_each_ras_error(err_node, err_data) { + err_info = &err_node->err_info; + mcm_info = &err_info->mcm_info; + if (err_info->de_count) { + RAS_EVENT_LOG(adev, event_id, "socket: %d, die: %d, " + "%lld new deferred hardware errors detected in %s block\n", + mcm_info->socket_id, + mcm_info->die_id, + err_info->de_count, + blk_name); + } } - } - for_each_ras_error(err_node, &ras_mgr->err_data) { - err_info = &err_node->err_info; - mcm_info = &err_info->mcm_info; - dev_info(adev->dev, "socket: %d, die: %d, " - "%lld correctable hardware errors detected in total in %s block\n", - mcm_info->socket_id, mcm_info->die_id, err_info->ce_count, blk_name); + for_each_ras_error(err_node, &ras_mgr->err_data) { + err_info = &err_node->err_info; + mcm_info = &err_info->mcm_info; + RAS_EVENT_LOG(adev, event_id, "socket: %d, die: %d, " + "%lld deferred hardware errors detected in total in %s block\n", + mcm_info->socket_id, mcm_info->die_id, + err_info->de_count, blk_name); + } + } else { + for_each_ras_error(err_node, err_data) { + err_info = &err_node->err_info; + mcm_info = &err_info->mcm_info; + if (err_info->ce_count) { + RAS_EVENT_LOG(adev, event_id, "socket: %d, die: %d, " + "%lld new correctable hardware errors detected in %s block\n", + mcm_info->socket_id, + mcm_info->die_id, + err_info->ce_count, + blk_name); + } + } + + for_each_ras_error(err_node, &ras_mgr->err_data) { + err_info = &err_node->err_info; + mcm_info = &err_info->mcm_info; + RAS_EVENT_LOG(adev, event_id, "socket: %d, die: %d, " + "%lld correctable hardware errors detected in total in %s block\n", + mcm_info->socket_id, mcm_info->die_id, + err_info->ce_count, blk_name); + } } } } @@ -1095,55 +1135,81 @@ static inline bool err_data_has_source_info(struct ras_err_data *data) static void amdgpu_ras_error_generate_report(struct amdgpu_device *adev, struct ras_query_if *query_if, - struct ras_err_data *err_data) + struct ras_err_data *err_data, + struct ras_query_context *qctx) { struct ras_manager *ras_mgr = amdgpu_ras_find_obj(adev, &query_if->head); const char *blk_name = get_ras_block_str(&query_if->head); + u64 event_id = qctx->event_id; if (err_data->ce_count) { if (err_data_has_source_info(err_data)) { - amdgpu_ras_error_print_error_data(adev, ras_mgr, err_data, blk_name, false); + amdgpu_ras_error_print_error_data(adev, ras_mgr, err_data, qctx, + blk_name, false, false); } else if (!adev->aid_mask && adev->smuio.funcs && adev->smuio.funcs->get_socket_id && adev->smuio.funcs->get_die_id) { - dev_info(adev->dev, "socket: %d, die: %d " - "%ld correctable hardware errors " - "detected in %s block\n", - adev->smuio.funcs->get_socket_id(adev), - adev->smuio.funcs->get_die_id(adev), - ras_mgr->err_data.ce_count, - blk_name); + RAS_EVENT_LOG(adev, event_id, "socket: %d, die: %d " + "%ld correctable hardware errors " + "detected in %s block\n", + adev->smuio.funcs->get_socket_id(adev), + adev->smuio.funcs->get_die_id(adev), + ras_mgr->err_data.ce_count, + blk_name); } else { - dev_info(adev->dev, "%ld correctable hardware errors " - "detected in %s block\n", - ras_mgr->err_data.ce_count, - blk_name); + RAS_EVENT_LOG(adev, event_id, "%ld correctable hardware errors " + "detected in %s block\n", + ras_mgr->err_data.ce_count, + blk_name); } } if (err_data->ue_count) { if (err_data_has_source_info(err_data)) { - amdgpu_ras_error_print_error_data(adev, ras_mgr, err_data, blk_name, true); + amdgpu_ras_error_print_error_data(adev, ras_mgr, err_data, qctx, + blk_name, true, false); } else if (!adev->aid_mask && adev->smuio.funcs && adev->smuio.funcs->get_socket_id && adev->smuio.funcs->get_die_id) { - dev_info(adev->dev, "socket: %d, die: %d " - "%ld uncorrectable hardware errors " - "detected in %s block\n", - adev->smuio.funcs->get_socket_id(adev), - adev->smuio.funcs->get_die_id(adev), - ras_mgr->err_data.ue_count, - blk_name); + RAS_EVENT_LOG(adev, event_id, "socket: %d, die: %d " + "%ld uncorrectable hardware errors " + "detected in %s block\n", + adev->smuio.funcs->get_socket_id(adev), + adev->smuio.funcs->get_die_id(adev), + ras_mgr->err_data.ue_count, + blk_name); } else { - dev_info(adev->dev, "%ld uncorrectable hardware errors " - "detected in %s block\n", - ras_mgr->err_data.ue_count, - blk_name); + RAS_EVENT_LOG(adev, event_id, "%ld uncorrectable hardware errors " + "detected in %s block\n", + ras_mgr->err_data.ue_count, + blk_name); } } + if (err_data->de_count) { + if (err_data_has_source_info(err_data)) { + amdgpu_ras_error_print_error_data(adev, ras_mgr, err_data, qctx, + blk_name, false, true); + } else if (!adev->aid_mask && + adev->smuio.funcs && + adev->smuio.funcs->get_socket_id && + adev->smuio.funcs->get_die_id) { + RAS_EVENT_LOG(adev, event_id, "socket: %d, die: %d " + "%ld deferred hardware errors " + "detected in %s block\n", + adev->smuio.funcs->get_socket_id(adev), + adev->smuio.funcs->get_die_id(adev), + ras_mgr->err_data.de_count, + blk_name); + } else { + RAS_EVENT_LOG(adev, event_id, "%ld deferred hardware errors " + "detected in %s block\n", + ras_mgr->err_data.de_count, + blk_name); + } + } } static void amdgpu_rasmgr_error_data_statistic_update(struct ras_manager *obj, struct ras_err_data *err_data) @@ -1154,7 +1220,8 @@ static void amdgpu_rasmgr_error_data_statistic_update(struct ras_manager *obj, s if (err_data_has_source_info(err_data)) { for_each_ras_error(err_node, err_data) { err_info = &err_node->err_info; - + amdgpu_ras_error_statistic_de_count(&obj->err_data, + &err_info->mcm_info, NULL, err_info->de_count); amdgpu_ras_error_statistic_ce_count(&obj->err_data, &err_info->mcm_info, NULL, err_info->ce_count); amdgpu_ras_error_statistic_ue_count(&obj->err_data, @@ -1164,16 +1231,86 @@ static void amdgpu_rasmgr_error_data_statistic_update(struct ras_manager *obj, s /* for legacy asic path which doesn't has error source info */ obj->err_data.ue_count += err_data->ue_count; obj->err_data.ce_count += err_data->ce_count; + obj->err_data.de_count += err_data->de_count; } } +static struct ras_manager *get_ras_manager(struct amdgpu_device *adev, enum amdgpu_ras_block blk) +{ + struct ras_common_if head; + + memset(&head, 0, sizeof(head)); + head.block = blk; + + return amdgpu_ras_find_obj(adev, &head); +} + +int amdgpu_ras_bind_aca(struct amdgpu_device *adev, enum amdgpu_ras_block blk, + const struct aca_info *aca_info, void *data) +{ + struct ras_manager *obj; + + /* in resume phase, no need to create aca fs node */ + if (adev->in_suspend || amdgpu_in_reset(adev)) + return 0; + + obj = get_ras_manager(adev, blk); + if (!obj) + return -EINVAL; + + return amdgpu_aca_add_handle(adev, &obj->aca_handle, ras_block_str(blk), aca_info, data); +} + +int amdgpu_ras_unbind_aca(struct amdgpu_device *adev, enum amdgpu_ras_block blk) +{ + struct ras_manager *obj; + + obj = get_ras_manager(adev, blk); + if (!obj) + return -EINVAL; + + amdgpu_aca_remove_handle(&obj->aca_handle); + + return 0; +} + +static int amdgpu_aca_log_ras_error_data(struct amdgpu_device *adev, enum amdgpu_ras_block blk, + enum aca_error_type type, struct ras_err_data *err_data, + struct ras_query_context *qctx) +{ + struct ras_manager *obj; + + obj = get_ras_manager(adev, blk); + if (!obj) + return -EINVAL; + + return amdgpu_aca_get_error_data(adev, &obj->aca_handle, type, err_data, qctx); +} + +ssize_t amdgpu_ras_aca_sysfs_read(struct device *dev, struct device_attribute *attr, + struct aca_handle *handle, char *buf, void *data) +{ + struct ras_manager *obj = container_of(handle, struct ras_manager, aca_handle); + struct ras_query_if info = { + .head = obj->head, + }; + + if (amdgpu_ras_query_error_status(obj->adev, &info)) + return -EINVAL; + + return sysfs_emit(buf, "%s: %lu\n%s: %lu\n%s: %lu\n", "ue", info.ue_count, + "ce", info.ce_count, "de", info.ue_count); +} + static int amdgpu_ras_query_error_status_helper(struct amdgpu_device *adev, struct ras_query_if *info, struct ras_err_data *err_data, + struct ras_query_context *qctx, unsigned int error_query_mode) { enum amdgpu_ras_block blk = info ? info->head.block : AMDGPU_RAS_BLOCK_COUNT; struct amdgpu_ras_block_object *block_obj = NULL; + int ret; if (blk == AMDGPU_RAS_BLOCK_COUNT) return -EINVAL; @@ -1203,9 +1340,23 @@ static int amdgpu_ras_query_error_status_helper(struct amdgpu_device *adev, } } } else { - /* FIXME: add code to check return value later */ - amdgpu_mca_smu_log_ras_error(adev, blk, AMDGPU_MCA_ERROR_TYPE_UE, err_data); - amdgpu_mca_smu_log_ras_error(adev, blk, AMDGPU_MCA_ERROR_TYPE_CE, err_data); + if (amdgpu_aca_is_enabled(adev)) { + ret = amdgpu_aca_log_ras_error_data(adev, blk, ACA_ERROR_TYPE_UE, err_data, qctx); + if (ret) + return ret; + + ret = amdgpu_aca_log_ras_error_data(adev, blk, ACA_ERROR_TYPE_CE, err_data, qctx); + if (ret) + return ret; + + ret = amdgpu_aca_log_ras_error_data(adev, blk, ACA_ERROR_TYPE_DEFERRED, err_data, qctx); + if (ret) + return ret; + } else { + /* FIXME: add code to check return value later */ + amdgpu_mca_smu_log_ras_error(adev, blk, AMDGPU_MCA_ERROR_TYPE_UE, err_data, qctx); + amdgpu_mca_smu_log_ras_error(adev, blk, AMDGPU_MCA_ERROR_TYPE_CE, err_data, qctx); + } } return 0; @@ -1216,6 +1367,7 @@ int amdgpu_ras_query_error_status(struct amdgpu_device *adev, struct ras_query_i { struct ras_manager *obj = amdgpu_ras_find_obj(adev, &info->head); struct ras_err_data err_data; + struct ras_query_context qctx; unsigned int error_query_mode; int ret; @@ -1229,8 +1381,12 @@ int amdgpu_ras_query_error_status(struct amdgpu_device *adev, struct ras_query_i if (!amdgpu_ras_get_error_query_mode(adev, &error_query_mode)) return -EINVAL; + memset(&qctx, 0, sizeof(qctx)); + qctx.event_id = amdgpu_ras_acquire_event_id(adev, amdgpu_ras_intr_triggered() ? + RAS_EVENT_TYPE_ISR : RAS_EVENT_TYPE_INVALID); ret = amdgpu_ras_query_error_status_helper(adev, info, &err_data, + &qctx, error_query_mode); if (ret) goto out_fini_err_data; @@ -1239,8 +1395,9 @@ int amdgpu_ras_query_error_status(struct amdgpu_device *adev, struct ras_query_i info->ue_count = obj->err_data.ue_count; info->ce_count = obj->err_data.ce_count; + info->de_count = obj->err_data.de_count; - amdgpu_ras_error_generate_report(adev, info, &err_data); + amdgpu_ras_error_generate_report(adev, info, &err_data, &qctx); out_fini_err_data: amdgpu_ras_error_data_fini(&err_data); @@ -1254,6 +1411,7 @@ int amdgpu_ras_reset_error_count(struct amdgpu_device *adev, struct amdgpu_ras_block_object *block_obj = amdgpu_ras_get_ras_block(adev, block, 0); struct amdgpu_ras *ras = amdgpu_ras_get_context(adev); const struct amdgpu_mca_smu_funcs *mca_funcs = adev->mca.mca_funcs; + const struct aca_smu_funcs *smu_funcs = adev->aca.smu_funcs; struct amdgpu_hive_info *hive; int hive_ras_recovery = 0; @@ -1264,7 +1422,7 @@ int amdgpu_ras_reset_error_count(struct amdgpu_device *adev, } if (!amdgpu_ras_is_supported(adev, block) || - !amdgpu_ras_get_mca_debug_mode(adev)) + !amdgpu_ras_get_aca_debug_mode(adev)) return -EOPNOTSUPP; hive = amdgpu_get_xgmi_hive(adev); @@ -1276,7 +1434,8 @@ int amdgpu_ras_reset_error_count(struct amdgpu_device *adev, /* skip ras error reset in gpu reset */ if ((amdgpu_in_reset(adev) || atomic_read(&ras->in_recovery) || hive_ras_recovery) && - mca_funcs && mca_funcs->mca_set_debug_mode) + ((smu_funcs && smu_funcs->set_debug_mode) || + (mca_funcs && mca_funcs->mca_set_debug_mode))) return -EOPNOTSUPP; if (block_obj->hw_ops->reset_ras_error_count) @@ -1772,7 +1931,10 @@ void amdgpu_ras_debugfs_create_all(struct amdgpu_device *adev) } } - amdgpu_mca_smu_debugfs_init(adev, dir); + if (amdgpu_aca_is_enabled(adev)) + amdgpu_aca_smu_debugfs_init(adev, dir); + else + amdgpu_mca_smu_debugfs_init(adev, dir); } /* debugfs end */ @@ -1900,7 +2062,7 @@ static void amdgpu_ras_interrupt_poison_consumption_handler(struct ras_manager * } } - amdgpu_umc_poison_handler(adev, false); + amdgpu_umc_poison_handler(adev, obj->head.block, 0); if (block_obj->hw_ops && block_obj->hw_ops->handle_poison_consumption) poison_stat = block_obj->hw_ops->handle_poison_consumption(adev); @@ -1920,6 +2082,17 @@ static void amdgpu_ras_interrupt_poison_creation_handler(struct ras_manager *obj { dev_info(obj->adev->dev, "Poison is created\n"); + + if (amdgpu_ip_version(obj->adev, UMC_HWIP, 0) >= IP_VERSION(12, 0, 0)) { + struct amdgpu_ras *con = amdgpu_ras_get_context(obj->adev); + + amdgpu_ras_put_poison_req(obj->adev, + AMDGPU_RAS_BLOCK__UMC, 0, NULL, NULL, false); + + atomic_inc(&con->page_retirement_req_cnt); + + wake_up(&con->page_retirement_wq); + } } static void amdgpu_ras_interrupt_umc_handler(struct ras_manager *obj, @@ -1951,6 +2124,7 @@ static void amdgpu_ras_interrupt_umc_handler(struct ras_manager *obj, */ obj->err_data.ue_count += err_data.ue_count; obj->err_data.ce_count += err_data.ce_count; + obj->err_data.de_count += err_data.de_count; } amdgpu_ras_error_data_fini(&err_data); @@ -2229,7 +2403,7 @@ static int amdgpu_ras_badpages_read(struct amdgpu_device *adev, .flags = AMDGPU_RAS_RETIRE_PAGE_RESERVED, }; status = amdgpu_vram_mgr_query_page_status(&adev->mman.vram_mgr, - data->bps[i].retired_page); + data->bps[i].retired_page << AMDGPU_GPU_PAGE_SHIFT); if (status == -EBUSY) (*bps)[i].flags = AMDGPU_RAS_RETIRE_PAGE_PENDING; else if (status == -ENOENT) @@ -2242,6 +2416,19 @@ out: return ret; } +static void amdgpu_ras_set_fed_all(struct amdgpu_device *adev, + struct amdgpu_hive_info *hive, bool status) +{ + struct amdgpu_device *tmp_adev; + + if (hive) { + list_for_each_entry(tmp_adev, &hive->device_list, gmc.xgmi.head) + amdgpu_ras_set_fed(tmp_adev, status); + } else { + amdgpu_ras_set_fed(adev, status); + } +} + static void amdgpu_ras_do_recovery(struct work_struct *work) { struct amdgpu_ras *ras = @@ -2251,8 +2438,21 @@ static void amdgpu_ras_do_recovery(struct work_struct *work) struct list_head device_list, *device_list_handle = NULL; struct amdgpu_hive_info *hive = amdgpu_get_xgmi_hive(adev); - if (hive) + if (hive) { atomic_set(&hive->ras_recovery, 1); + + /* If any device which is part of the hive received RAS fatal + * error interrupt, set fatal error status on all. This + * condition will need a recovery, and flag will be cleared + * as part of recovery. + */ + list_for_each_entry(remote_adev, &hive->device_list, + gmc.xgmi.head) + if (amdgpu_ras_get_fed_status(remote_adev)) { + amdgpu_ras_set_fed_all(adev, hive, true); + break; + } + } if (!ras->disable_ras_err_cnt_harvest) { /* Build list of devices to query RAS related errors */ @@ -2362,9 +2562,7 @@ int amdgpu_ras_add_bad_pages(struct amdgpu_device *adev, goto out; } - amdgpu_vram_mgr_reserve_range(&adev->mman.vram_mgr, - bps[i].retired_page << AMDGPU_GPU_PAGE_SHIFT, - AMDGPU_GPU_PAGE_SIZE); + amdgpu_ras_reserve_page(adev, bps[i].retired_page); memcpy(&data->bps[data->count], &bps[i], sizeof(*data->bps)); data->count++; @@ -2520,6 +2718,210 @@ static void amdgpu_ras_validate_threshold(struct amdgpu_device *adev, } } +int amdgpu_ras_put_poison_req(struct amdgpu_device *adev, + enum amdgpu_ras_block block, uint16_t pasid, + pasid_notify pasid_fn, void *data, uint32_t reset) +{ + int ret = 0; + struct ras_poison_msg poison_msg; + struct amdgpu_ras *con = amdgpu_ras_get_context(adev); + + memset(&poison_msg, 0, sizeof(poison_msg)); + poison_msg.block = block; + poison_msg.pasid = pasid; + poison_msg.reset = reset; + poison_msg.pasid_fn = pasid_fn; + poison_msg.data = data; + + ret = kfifo_put(&con->poison_fifo, poison_msg); + if (!ret) { + dev_err(adev->dev, "Poison message fifo is full!\n"); + return -ENOSPC; + } + + return 0; +} + +static int amdgpu_ras_get_poison_req(struct amdgpu_device *adev, + struct ras_poison_msg *poison_msg) +{ + struct amdgpu_ras *con = amdgpu_ras_get_context(adev); + + return kfifo_get(&con->poison_fifo, poison_msg); +} + +static void amdgpu_ras_ecc_log_init(struct ras_ecc_log_info *ecc_log) +{ + mutex_init(&ecc_log->lock); + + /* Set any value as siphash key */ + memset(&ecc_log->ecc_key, 0xad, sizeof(ecc_log->ecc_key)); + + INIT_RADIX_TREE(&ecc_log->de_page_tree, GFP_KERNEL); + ecc_log->de_updated = false; +} + +static void amdgpu_ras_ecc_log_fini(struct ras_ecc_log_info *ecc_log) +{ + struct radix_tree_iter iter; + void __rcu **slot; + struct ras_ecc_err *ecc_err; + + mutex_lock(&ecc_log->lock); + radix_tree_for_each_slot(slot, &ecc_log->de_page_tree, &iter, 0) { + ecc_err = radix_tree_deref_slot(slot); + kfree(ecc_err->err_pages.pfn); + kfree(ecc_err); + radix_tree_iter_delete(&ecc_log->de_page_tree, &iter, slot); + } + mutex_unlock(&ecc_log->lock); + + mutex_destroy(&ecc_log->lock); + ecc_log->de_updated = false; +} + +static void amdgpu_ras_do_page_retirement(struct work_struct *work) +{ + struct amdgpu_ras *con = container_of(work, struct amdgpu_ras, + page_retirement_dwork.work); + struct amdgpu_device *adev = con->adev; + struct ras_err_data err_data; + + if (amdgpu_in_reset(adev) || atomic_read(&con->in_recovery)) + return; + + amdgpu_ras_error_data_init(&err_data); + + amdgpu_umc_handle_bad_pages(adev, &err_data); + + amdgpu_ras_error_data_fini(&err_data); + + mutex_lock(&con->umc_ecc_log.lock); + if (radix_tree_tagged(&con->umc_ecc_log.de_page_tree, + UMC_ECC_NEW_DETECTED_TAG)) + schedule_delayed_work(&con->page_retirement_dwork, + msecs_to_jiffies(AMDGPU_RAS_RETIRE_PAGE_INTERVAL)); + mutex_unlock(&con->umc_ecc_log.lock); +} + +static int amdgpu_ras_query_ecc_status(struct amdgpu_device *adev, + enum amdgpu_ras_block ras_block, uint32_t timeout_ms) +{ + int ret = 0; + struct ras_ecc_log_info *ecc_log; + struct ras_query_if info; + uint32_t timeout = timeout_ms; + struct amdgpu_ras *ras = amdgpu_ras_get_context(adev); + + memset(&info, 0, sizeof(info)); + info.head.block = ras_block; + + ecc_log = &ras->umc_ecc_log; + ecc_log->de_updated = false; + do { + ret = amdgpu_ras_query_error_status(adev, &info); + if (ret) { + dev_err(adev->dev, "Failed to query ras error! ret:%d\n", ret); + return ret; + } + + if (timeout && !ecc_log->de_updated) { + msleep(1); + timeout--; + } + } while (timeout && !ecc_log->de_updated); + + if (timeout_ms && !timeout) { + dev_warn(adev->dev, "Can't find deferred error\n"); + return -ETIMEDOUT; + } + + return 0; +} + +static void amdgpu_ras_poison_creation_handler(struct amdgpu_device *adev, + uint32_t timeout) +{ + struct amdgpu_ras *con = amdgpu_ras_get_context(adev); + int ret; + + ret = amdgpu_ras_query_ecc_status(adev, AMDGPU_RAS_BLOCK__UMC, timeout); + if (!ret) + schedule_delayed_work(&con->page_retirement_dwork, 0); +} + +static int amdgpu_ras_poison_consumption_handler(struct amdgpu_device *adev, + struct ras_poison_msg *poison_msg) +{ + struct amdgpu_ras *con = amdgpu_ras_get_context(adev); + uint32_t reset = poison_msg->reset; + uint16_t pasid = poison_msg->pasid; + + kgd2kfd_set_sram_ecc_flag(adev->kfd.dev); + + if (poison_msg->pasid_fn) + poison_msg->pasid_fn(adev, pasid, poison_msg->data); + + if (reset) { + flush_delayed_work(&con->page_retirement_dwork); + + con->gpu_reset_flags |= reset; + amdgpu_ras_reset_gpu(adev); + } + + return 0; +} + +static int amdgpu_ras_page_retirement_thread(void *param) +{ + struct amdgpu_device *adev = (struct amdgpu_device *)param; + struct amdgpu_ras *con = amdgpu_ras_get_context(adev); + struct ras_poison_msg poison_msg; + enum amdgpu_ras_block ras_block; + bool poison_creation_is_handled = false; + + while (!kthread_should_stop()) { + + wait_event_interruptible(con->page_retirement_wq, + kthread_should_stop() || + atomic_read(&con->page_retirement_req_cnt)); + + if (kthread_should_stop()) + break; + + atomic_dec(&con->page_retirement_req_cnt); + + if (!amdgpu_ras_get_poison_req(adev, &poison_msg)) + continue; + + ras_block = poison_msg.block; + + dev_info(adev->dev, "Start processing ras block %s(%d)\n", + ras_block_str(ras_block), ras_block); + + if (ras_block == AMDGPU_RAS_BLOCK__UMC) { + amdgpu_ras_poison_creation_handler(adev, + MAX_UMC_POISON_POLLING_TIME_ASYNC); + poison_creation_is_handled = true; + } else { + /* poison_creation_is_handled: + * false: no poison creation interrupt, but it has poison + * consumption interrupt. + * true: It has poison creation interrupt at the beginning, + * but it has no poison creation interrupt later. + */ + amdgpu_ras_poison_creation_handler(adev, + poison_creation_is_handled ? + 0 : MAX_UMC_POISON_POLLING_TIME_ASYNC); + + amdgpu_ras_poison_consumption_handler(adev, &poison_msg); + poison_creation_is_handled = false; + } + } + + return 0; +} + int amdgpu_ras_recovery_init(struct amdgpu_device *adev) { struct amdgpu_ras *con = amdgpu_ras_get_context(adev); @@ -2583,6 +2985,20 @@ int amdgpu_ras_recovery_init(struct amdgpu_device *adev) } } + mutex_init(&con->page_rsv_lock); + INIT_KFIFO(con->poison_fifo); + mutex_init(&con->page_retirement_lock); + init_waitqueue_head(&con->page_retirement_wq); + atomic_set(&con->page_retirement_req_cnt, 0); + con->page_retirement_thread = + kthread_run(amdgpu_ras_page_retirement_thread, adev, "umc_page_retirement"); + if (IS_ERR(con->page_retirement_thread)) { + con->page_retirement_thread = NULL; + dev_warn(adev->dev, "Failed to create umc_page_retirement thread!!!\n"); + } + + INIT_DELAYED_WORK(&con->page_retirement_dwork, amdgpu_ras_do_page_retirement); + amdgpu_ras_ecc_log_init(&con->umc_ecc_log); #ifdef CONFIG_X86_MCE_AMD if ((adev->asic_type == CHIP_ALDEBARAN) && (adev->gmc.xgmi.connected_to_cpu)) @@ -2618,8 +3034,19 @@ static int amdgpu_ras_recovery_fini(struct amdgpu_device *adev) if (!data) return 0; + if (con->page_retirement_thread) + kthread_stop(con->page_retirement_thread); + + atomic_set(&con->page_retirement_req_cnt, 0); + + mutex_destroy(&con->page_rsv_lock); + cancel_work_sync(&con->recovery_work); + cancel_delayed_work_sync(&con->page_retirement_dwork); + + amdgpu_ras_ecc_log_fini(&con->umc_ecc_log); + mutex_lock(&con->recovery_lock); con->eh_data = NULL; kfree(data->bps); @@ -2679,6 +3106,87 @@ static void amdgpu_ras_get_quirks(struct amdgpu_device *adev) adev->ras_hw_enabled |= (1 << AMDGPU_RAS_BLOCK__GFX); } +/* Query ras capablity via atomfirmware interface */ +static void amdgpu_ras_query_ras_capablity_from_vbios(struct amdgpu_device *adev) +{ + /* mem_ecc cap */ + if (amdgpu_atomfirmware_mem_ecc_supported(adev)) { + dev_info(adev->dev, "MEM ECC is active.\n"); + adev->ras_hw_enabled |= (1 << AMDGPU_RAS_BLOCK__UMC | + 1 << AMDGPU_RAS_BLOCK__DF); + } else { + dev_info(adev->dev, "MEM ECC is not presented.\n"); + } + + /* sram_ecc cap */ + if (amdgpu_atomfirmware_sram_ecc_supported(adev)) { + dev_info(adev->dev, "SRAM ECC is active.\n"); + if (!amdgpu_sriov_vf(adev)) + adev->ras_hw_enabled |= ~(1 << AMDGPU_RAS_BLOCK__UMC | + 1 << AMDGPU_RAS_BLOCK__DF); + else + adev->ras_hw_enabled |= (1 << AMDGPU_RAS_BLOCK__PCIE_BIF | + 1 << AMDGPU_RAS_BLOCK__SDMA | + 1 << AMDGPU_RAS_BLOCK__GFX); + + /* + * VCN/JPEG RAS can be supported on both bare metal and + * SRIOV environment + */ + if (amdgpu_ip_version(adev, VCN_HWIP, 0) == IP_VERSION(2, 6, 0) || + amdgpu_ip_version(adev, VCN_HWIP, 0) == IP_VERSION(4, 0, 0) || + amdgpu_ip_version(adev, VCN_HWIP, 0) == IP_VERSION(4, 0, 3)) + adev->ras_hw_enabled |= (1 << AMDGPU_RAS_BLOCK__VCN | + 1 << AMDGPU_RAS_BLOCK__JPEG); + else + adev->ras_hw_enabled &= ~(1 << AMDGPU_RAS_BLOCK__VCN | + 1 << AMDGPU_RAS_BLOCK__JPEG); + + /* + * XGMI RAS is not supported if xgmi num physical nodes + * is zero + */ + if (!adev->gmc.xgmi.num_physical_nodes) + adev->ras_hw_enabled &= ~(1 << AMDGPU_RAS_BLOCK__XGMI_WAFL); + } else { + dev_info(adev->dev, "SRAM ECC is not presented.\n"); + } +} + +/* Query poison mode from umc/df IP callbacks */ +static void amdgpu_ras_query_poison_mode(struct amdgpu_device *adev) +{ + struct amdgpu_ras *con = amdgpu_ras_get_context(adev); + bool df_poison, umc_poison; + + /* poison setting is useless on SRIOV guest */ + if (amdgpu_sriov_vf(adev) || !con) + return; + + /* Init poison supported flag, the default value is false */ + if (adev->gmc.xgmi.connected_to_cpu || + adev->gmc.is_app_apu) { + /* enabled by default when GPU is connected to CPU */ + con->poison_supported = true; + } else if (adev->df.funcs && + adev->df.funcs->query_ras_poison_mode && + adev->umc.ras && + adev->umc.ras->query_ras_poison_mode) { + df_poison = + adev->df.funcs->query_ras_poison_mode(adev); + umc_poison = + adev->umc.ras->query_ras_poison_mode(adev); + + /* Only poison is set in both DF and UMC, we can support it */ + if (df_poison && umc_poison) + con->poison_supported = true; + else if (df_poison != umc_poison) + dev_warn(adev->dev, + "Poison setting is inconsistent in DF/UMC(%d:%d)!\n", + df_poison, umc_poison); + } +} + /* * check hardware's ras ability which will be saved in hw_supported. * if hardware does not support ras, we can skip some ras initializtion and @@ -2695,49 +3203,13 @@ static void amdgpu_ras_check_supported(struct amdgpu_device *adev) if (!amdgpu_ras_asic_supported(adev)) return; - if (!adev->gmc.xgmi.connected_to_cpu && !adev->gmc.is_app_apu) { - if (amdgpu_atomfirmware_mem_ecc_supported(adev)) { - dev_info(adev->dev, "MEM ECC is active.\n"); - adev->ras_hw_enabled |= (1 << AMDGPU_RAS_BLOCK__UMC | - 1 << AMDGPU_RAS_BLOCK__DF); - } else { - dev_info(adev->dev, "MEM ECC is not presented.\n"); - } + /* query ras capability from psp */ + if (amdgpu_psp_get_ras_capability(&adev->psp)) + goto init_ras_enabled_flag; - if (amdgpu_atomfirmware_sram_ecc_supported(adev)) { - dev_info(adev->dev, "SRAM ECC is active.\n"); - if (!amdgpu_sriov_vf(adev)) - adev->ras_hw_enabled |= ~(1 << AMDGPU_RAS_BLOCK__UMC | - 1 << AMDGPU_RAS_BLOCK__DF); - else - adev->ras_hw_enabled |= (1 << AMDGPU_RAS_BLOCK__PCIE_BIF | - 1 << AMDGPU_RAS_BLOCK__SDMA | - 1 << AMDGPU_RAS_BLOCK__GFX); - - /* VCN/JPEG RAS can be supported on both bare metal and - * SRIOV environment - */ - if (amdgpu_ip_version(adev, VCN_HWIP, 0) == - IP_VERSION(2, 6, 0) || - amdgpu_ip_version(adev, VCN_HWIP, 0) == - IP_VERSION(4, 0, 0) || - amdgpu_ip_version(adev, VCN_HWIP, 0) == - IP_VERSION(4, 0, 3)) - adev->ras_hw_enabled |= (1 << AMDGPU_RAS_BLOCK__VCN | - 1 << AMDGPU_RAS_BLOCK__JPEG); - else - adev->ras_hw_enabled &= ~(1 << AMDGPU_RAS_BLOCK__VCN | - 1 << AMDGPU_RAS_BLOCK__JPEG); - - /* - * XGMI RAS is not supported if xgmi num physical nodes - * is zero - */ - if (!adev->gmc.xgmi.num_physical_nodes) - adev->ras_hw_enabled &= ~(1 << AMDGPU_RAS_BLOCK__XGMI_WAFL); - } else { - dev_info(adev->dev, "SRAM ECC is not presented.\n"); - } + /* query ras capablity from bios */ + if (!adev->gmc.xgmi.connected_to_cpu && !adev->gmc.is_app_apu) { + amdgpu_ras_query_ras_capablity_from_vbios(adev); } else { /* driver only manages a few IP blocks RAS feature * when GPU is connected cpu through XGMI */ @@ -2746,13 +3218,21 @@ static void amdgpu_ras_check_supported(struct amdgpu_device *adev) 1 << AMDGPU_RAS_BLOCK__MMHUB); } + /* apply asic specific settings (vega20 only for now) */ amdgpu_ras_get_quirks(adev); + /* query poison mode from umc/df ip callback */ + amdgpu_ras_query_poison_mode(adev); + +init_ras_enabled_flag: /* hw_supported needs to be aligned with RAS block mask. */ adev->ras_hw_enabled &= AMDGPU_RAS_BLOCK_MASK; adev->ras_enabled = amdgpu_ras_enable == 0 ? 0 : adev->ras_hw_enabled & amdgpu_ras_mask; + + /* aca is disabled by default */ + adev->aca.is_enabled = false; } static void amdgpu_ras_counte_dw(struct work_struct *work) @@ -2780,45 +3260,41 @@ Out: pm_runtime_put_autosuspend(dev->dev); } -static void amdgpu_ras_query_poison_mode(struct amdgpu_device *adev) +static int amdgpu_get_ras_schema(struct amdgpu_device *adev) { - struct amdgpu_ras *con = amdgpu_ras_get_context(adev); - bool df_poison, umc_poison; + return amdgpu_ras_is_poison_mode_supported(adev) ? AMDGPU_RAS_ERROR__POISON : 0 | + AMDGPU_RAS_ERROR__SINGLE_CORRECTABLE | + AMDGPU_RAS_ERROR__MULTI_UNCORRECTABLE | + AMDGPU_RAS_ERROR__PARITY; +} - /* poison setting is useless on SRIOV guest */ - if (amdgpu_sriov_vf(adev) || !con) +static void ras_event_mgr_init(struct ras_event_manager *mgr) +{ + int i; + + for (i = 0; i < ARRAY_SIZE(mgr->seqnos); i++) + atomic64_set(&mgr->seqnos[i], 0); +} + +static void amdgpu_ras_event_mgr_init(struct amdgpu_device *adev) +{ + struct amdgpu_ras *ras = amdgpu_ras_get_context(adev); + struct amdgpu_hive_info *hive; + + if (!ras) return; - /* Init poison supported flag, the default value is false */ - if (adev->gmc.xgmi.connected_to_cpu || - adev->gmc.is_app_apu) { - /* enabled by default when GPU is connected to CPU */ - con->poison_supported = true; - } else if (adev->df.funcs && - adev->df.funcs->query_ras_poison_mode && - adev->umc.ras && - adev->umc.ras->query_ras_poison_mode) { - df_poison = - adev->df.funcs->query_ras_poison_mode(adev); - umc_poison = - adev->umc.ras->query_ras_poison_mode(adev); + hive = amdgpu_get_xgmi_hive(adev); + ras->event_mgr = hive ? &hive->event_mgr : &ras->__event_mgr; - /* Only poison is set in both DF and UMC, we can support it */ - if (df_poison && umc_poison) - con->poison_supported = true; - else if (df_poison != umc_poison) - dev_warn(adev->dev, - "Poison setting is inconsistent in DF/UMC(%d:%d)!\n", - df_poison, umc_poison); + /* init event manager with node 0 on xgmi system */ + if (!amdgpu_in_reset(adev)) { + if (!hive || adev->gmc.xgmi.node_id == 0) + ras_event_mgr_init(ras->event_mgr); } -} -static int amdgpu_get_ras_schema(struct amdgpu_device *adev) -{ - return amdgpu_ras_is_poison_mode_supported(adev) ? AMDGPU_RAS_ERROR__POISON : 0 | - AMDGPU_RAS_ERROR__SINGLE_CORRECTABLE | - AMDGPU_RAS_ERROR__MULTI_UNCORRECTABLE | - AMDGPU_RAS_ERROR__PARITY; + if (hive) + amdgpu_put_xgmi_hive(hive); } int amdgpu_ras_init(struct amdgpu_device *adev) @@ -2917,12 +3393,11 @@ int amdgpu_ras_init(struct amdgpu_device *adev) goto release_con; } - amdgpu_ras_query_poison_mode(adev); - /* Packed socket_id to ras feature mask bits[31:29] */ if (adev->smuio.funcs && adev->smuio.funcs->get_socket_id) - con->features |= ((adev->smuio.funcs->get_socket_id(adev)) << 29); + con->features |= ((adev->smuio.funcs->get_socket_id(adev)) << + AMDGPU_RAS_FEATURES_SOCKETID_SHIFT); /* Get RAS schema for particular SOC */ con->schema = amdgpu_get_ras_schema(adev); @@ -3128,7 +3603,7 @@ void amdgpu_ras_suspend(struct amdgpu_device *adev) amdgpu_ras_disable_all_features(adev, 0); /* Make sure all ras objects are disabled. */ - if (con->features) + if (AMDGPU_RAS_GET_FEATURES(con->features)) amdgpu_ras_disable_all_features(adev, 1); } @@ -3142,15 +3617,31 @@ int amdgpu_ras_late_init(struct amdgpu_device *adev) if (amdgpu_sriov_vf(adev)) return 0; - amdgpu_ras_set_mca_debug_mode(adev, false); + amdgpu_ras_event_mgr_init(adev); + + if (amdgpu_aca_is_enabled(adev)) { + if (amdgpu_in_reset(adev)) + r = amdgpu_aca_reset(adev); + else + r = amdgpu_aca_init(adev); + if (r) + return r; + + amdgpu_ras_set_aca_debug_mode(adev, false); + } else { + amdgpu_ras_set_mca_debug_mode(adev, false); + } list_for_each_entry_safe(node, tmp, &adev->ras_list, node) { - if (!node->ras_obj) { + obj = node->ras_obj; + if (!obj) { dev_warn(adev->dev, "Warning: abnormal ras list node.\n"); continue; } - obj = node->ras_obj; + if (!amdgpu_ras_is_supported(adev, obj->ras_comm.block)) + continue; + if (obj->ras_late_init) { r = obj->ras_late_init(adev, &obj->ras_comm); if (r) { @@ -3175,7 +3666,7 @@ int amdgpu_ras_pre_fini(struct amdgpu_device *adev) /* Need disable ras on all IPs here before ip [hw/sw]fini */ - if (con->features) + if (AMDGPU_RAS_GET_FEATURES(con->features)) amdgpu_ras_disable_all_features(adev, 0); amdgpu_ras_recovery_fini(adev); return 0; @@ -3208,10 +3699,13 @@ int amdgpu_ras_fini(struct amdgpu_device *adev) amdgpu_ras_fs_fini(adev); amdgpu_ras_interrupt_remove_all(adev); - WARN(con->features, "Feature mask is not cleared"); + if (amdgpu_aca_is_enabled(adev)) + amdgpu_aca_fini(adev); - if (con->features) - amdgpu_ras_disable_all_features(adev, 1); + WARN(AMDGPU_RAS_GET_FEATURES(con->features), "Feature mask is not cleared"); + + if (AMDGPU_RAS_GET_FEATURES(con->features)) + amdgpu_ras_disable_all_features(adev, 0); cancel_delayed_work_sync(&con->ras_counte_delay_work); @@ -3221,14 +3715,59 @@ int amdgpu_ras_fini(struct amdgpu_device *adev) return 0; } +bool amdgpu_ras_get_fed_status(struct amdgpu_device *adev) +{ + struct amdgpu_ras *ras; + + ras = amdgpu_ras_get_context(adev); + if (!ras) + return false; + + return atomic_read(&ras->fed); +} + +void amdgpu_ras_set_fed(struct amdgpu_device *adev, bool status) +{ + struct amdgpu_ras *ras; + + ras = amdgpu_ras_get_context(adev); + if (ras) + atomic_set(&ras->fed, !!status); +} + +bool amdgpu_ras_event_id_is_valid(struct amdgpu_device *adev, u64 id) +{ + return !(id & BIT_ULL(63)); +} + +u64 amdgpu_ras_acquire_event_id(struct amdgpu_device *adev, enum ras_event_type type) +{ + struct amdgpu_ras *ras = amdgpu_ras_get_context(adev); + u64 id; + + switch (type) { + case RAS_EVENT_TYPE_ISR: + id = (u64)atomic64_read(&ras->event_mgr->seqnos[type]); + break; + case RAS_EVENT_TYPE_INVALID: + default: + id = BIT_ULL(63) | 0ULL; + break; + } + + return id; +} + void amdgpu_ras_global_ras_isr(struct amdgpu_device *adev) { if (atomic_cmpxchg(&amdgpu_ras_in_intr, 0, 1) == 0) { struct amdgpu_ras *ras = amdgpu_ras_get_context(adev); + u64 event_id = (u64)atomic64_inc_return(&ras->event_mgr->seqnos[RAS_EVENT_TYPE_ISR]); - dev_info(adev->dev, "uncorrectable hardware error" - "(ERREVENT_ATHUB_INTERRUPT) detected!\n"); + RAS_EVENT_LOG(adev, event_id, "uncorrectable hardware error" + "(ERREVENT_ATHUB_INTERRUPT) detected!\n"); + amdgpu_ras_set_fed(adev, true); ras->gpu_reset_flags |= AMDGPU_RAS_GPU_RESET_MODE1_RESET; amdgpu_ras_reset_gpu(adev); } @@ -3401,6 +3940,7 @@ int amdgpu_ras_is_supported(struct amdgpu_device *adev, block == AMDGPU_RAS_BLOCK__SDMA || block == AMDGPU_RAS_BLOCK__VCN || block == AMDGPU_RAS_BLOCK__JPEG) && + (amdgpu_ras_mask & (1 << block)) && amdgpu_ras_is_poison_mode_supported(adev) && amdgpu_ras_get_ras_block(adev, block, 0)) ret = 1; @@ -3425,22 +3965,41 @@ int amdgpu_ras_set_mca_debug_mode(struct amdgpu_device *adev, bool enable) if (con) { ret = amdgpu_mca_smu_set_debug_mode(adev, enable); if (!ret) - con->is_mca_debug_mode = enable; + con->is_aca_debug_mode = enable; } return ret; } -bool amdgpu_ras_get_mca_debug_mode(struct amdgpu_device *adev) +int amdgpu_ras_set_aca_debug_mode(struct amdgpu_device *adev, bool enable) { struct amdgpu_ras *con = amdgpu_ras_get_context(adev); + int ret = 0; + + if (con) { + if (amdgpu_aca_is_enabled(adev)) + ret = amdgpu_aca_smu_set_debug_mode(adev, enable); + else + ret = amdgpu_mca_smu_set_debug_mode(adev, enable); + if (!ret) + con->is_aca_debug_mode = enable; + } + + return ret; +} + +bool amdgpu_ras_get_aca_debug_mode(struct amdgpu_device *adev) +{ + struct amdgpu_ras *con = amdgpu_ras_get_context(adev); + const struct aca_smu_funcs *smu_funcs = adev->aca.smu_funcs; const struct amdgpu_mca_smu_funcs *mca_funcs = adev->mca.mca_funcs; if (!con) return false; - if (mca_funcs && mca_funcs->mca_set_debug_mode) - return con->is_mca_debug_mode; + if ((amdgpu_aca_is_enabled(adev) && smu_funcs && smu_funcs->set_debug_mode) || + (!amdgpu_aca_is_enabled(adev) && mca_funcs && mca_funcs->mca_set_debug_mode)) + return con->is_aca_debug_mode; else return true; } @@ -3450,15 +4009,16 @@ bool amdgpu_ras_get_error_query_mode(struct amdgpu_device *adev, { struct amdgpu_ras *con = amdgpu_ras_get_context(adev); const struct amdgpu_mca_smu_funcs *mca_funcs = adev->mca.mca_funcs; + const struct aca_smu_funcs *smu_funcs = adev->aca.smu_funcs; if (!con) { *error_query_mode = AMDGPU_RAS_INVALID_ERROR_QUERY; return false; } - if (mca_funcs && mca_funcs->mca_set_debug_mode) + if ((smu_funcs && smu_funcs->set_debug_mode) || (mca_funcs && mca_funcs->mca_set_debug_mode)) *error_query_mode = - (con->is_mca_debug_mode) ? AMDGPU_RAS_DIRECT_ERROR_QUERY : AMDGPU_RAS_FIRMWARE_ERROR_QUERY; + (con->is_aca_debug_mode) ? AMDGPU_RAS_DIRECT_ERROR_QUERY : AMDGPU_RAS_FIRMWARE_ERROR_QUERY; else *error_query_mode = AMDGPU_RAS_DIRECT_ERROR_QUERY; @@ -3699,8 +4259,7 @@ static int ras_err_info_cmp(void *priv, const struct list_head *a, const struct } static struct ras_err_info *amdgpu_ras_error_get_info(struct ras_err_data *err_data, - struct amdgpu_smuio_mcm_config_info *mcm_info, - struct ras_err_addr *err_addr) + struct amdgpu_smuio_mcm_config_info *mcm_info) { struct ras_err_node *err_node; @@ -3712,10 +4271,9 @@ static struct ras_err_info *amdgpu_ras_error_get_info(struct ras_err_data *err_d if (!err_node) return NULL; - memcpy(&err_node->err_info.mcm_info, mcm_info, sizeof(*mcm_info)); + INIT_LIST_HEAD(&err_node->err_info.err_addr_list); - if (err_addr) - memcpy(&err_node->err_info.err_addr, err_addr, sizeof(*err_addr)); + memcpy(&err_node->err_info.mcm_info, mcm_info, sizeof(*mcm_info)); err_data->err_list_count++; list_add_tail(&err_node->node, &err_data->err_node_list); @@ -3724,6 +4282,31 @@ static struct ras_err_info *amdgpu_ras_error_get_info(struct ras_err_data *err_d return &err_node->err_info; } +void amdgpu_ras_add_mca_err_addr(struct ras_err_info *err_info, struct ras_err_addr *err_addr) +{ + struct ras_err_addr *mca_err_addr; + + /* This function will be retired. */ + return; + mca_err_addr = kzalloc(sizeof(*mca_err_addr), GFP_KERNEL); + if (!mca_err_addr) + return; + + INIT_LIST_HEAD(&mca_err_addr->node); + + mca_err_addr->err_status = err_addr->err_status; + mca_err_addr->err_ipid = err_addr->err_ipid; + mca_err_addr->err_addr = err_addr->err_addr; + + list_add_tail(&mca_err_addr->node, &err_info->err_addr_list); +} + +void amdgpu_ras_del_mca_err_addr(struct ras_err_info *err_info, struct ras_err_addr *mca_err_addr) +{ + list_del(&mca_err_addr->node); + kfree(mca_err_addr); +} + int amdgpu_ras_error_statistic_ue_count(struct ras_err_data *err_data, struct amdgpu_smuio_mcm_config_info *mcm_info, struct ras_err_addr *err_addr, u64 count) @@ -3736,10 +4319,13 @@ int amdgpu_ras_error_statistic_ue_count(struct ras_err_data *err_data, if (!count) return 0; - err_info = amdgpu_ras_error_get_info(err_data, mcm_info, err_addr); + err_info = amdgpu_ras_error_get_info(err_data, mcm_info); if (!err_info) return -EINVAL; + if (err_addr && err_addr->err_status) + amdgpu_ras_add_mca_err_addr(err_info, err_addr); + err_info->ue_count += count; err_data->ue_count += count; @@ -3758,7 +4344,7 @@ int amdgpu_ras_error_statistic_ce_count(struct ras_err_data *err_data, if (!count) return 0; - err_info = amdgpu_ras_error_get_info(err_data, mcm_info, err_addr); + err_info = amdgpu_ras_error_get_info(err_data, mcm_info); if (!err_info) return -EINVAL; @@ -3767,3 +4353,151 @@ int amdgpu_ras_error_statistic_ce_count(struct ras_err_data *err_data, return 0; } + +int amdgpu_ras_error_statistic_de_count(struct ras_err_data *err_data, + struct amdgpu_smuio_mcm_config_info *mcm_info, + struct ras_err_addr *err_addr, u64 count) +{ + struct ras_err_info *err_info; + + if (!err_data || !mcm_info) + return -EINVAL; + + if (!count) + return 0; + + err_info = amdgpu_ras_error_get_info(err_data, mcm_info); + if (!err_info) + return -EINVAL; + + if (err_addr && err_addr->err_status) + amdgpu_ras_add_mca_err_addr(err_info, err_addr); + + err_info->de_count += count; + err_data->de_count += count; + + return 0; +} + +#define mmMP0_SMN_C2PMSG_92 0x1609C +#define mmMP0_SMN_C2PMSG_126 0x160BE +static void amdgpu_ras_boot_time_error_reporting(struct amdgpu_device *adev, + u32 instance, u32 boot_error) +{ + u32 socket_id, aid_id, hbm_id; + u32 reg_data; + u64 reg_addr; + + socket_id = AMDGPU_RAS_GPU_ERR_SOCKET_ID(boot_error); + aid_id = AMDGPU_RAS_GPU_ERR_AID_ID(boot_error); + hbm_id = AMDGPU_RAS_GPU_ERR_HBM_ID(boot_error); + + /* The pattern for smn addressing in other SOC could be different from + * the one for aqua_vanjaram. We should revisit the code if the pattern + * is changed. In such case, replace the aqua_vanjaram implementation + * with more common helper */ + reg_addr = (mmMP0_SMN_C2PMSG_92 << 2) + + aqua_vanjaram_encode_ext_smn_addressing(instance); + + reg_data = amdgpu_device_indirect_rreg_ext(adev, reg_addr); + dev_err(adev->dev, "socket: %d, aid: %d, firmware boot failed, fw status is 0x%x\n", + socket_id, aid_id, reg_data); + + if (AMDGPU_RAS_GPU_ERR_MEM_TRAINING(boot_error)) + dev_info(adev->dev, "socket: %d, aid: %d, hbm: %d, memory training failed\n", + socket_id, aid_id, hbm_id); + + if (AMDGPU_RAS_GPU_ERR_FW_LOAD(boot_error)) + dev_info(adev->dev, "socket: %d, aid: %d, firmware load failed at boot time\n", + socket_id, aid_id); + + if (AMDGPU_RAS_GPU_ERR_WAFL_LINK_TRAINING(boot_error)) + dev_info(adev->dev, "socket: %d, aid: %d, wafl link training failed\n", + socket_id, aid_id); + + if (AMDGPU_RAS_GPU_ERR_XGMI_LINK_TRAINING(boot_error)) + dev_info(adev->dev, "socket: %d, aid: %d, xgmi link training failed\n", + socket_id, aid_id); + + if (AMDGPU_RAS_GPU_ERR_USR_CP_LINK_TRAINING(boot_error)) + dev_info(adev->dev, "socket: %d, aid: %d, usr cp link training failed\n", + socket_id, aid_id); + + if (AMDGPU_RAS_GPU_ERR_USR_DP_LINK_TRAINING(boot_error)) + dev_info(adev->dev, "socket: %d, aid: %d, usr dp link training failed\n", + socket_id, aid_id); + + if (AMDGPU_RAS_GPU_ERR_HBM_MEM_TEST(boot_error)) + dev_info(adev->dev, "socket: %d, aid: %d, hbm: %d, hbm memory test failed\n", + socket_id, aid_id, hbm_id); + + if (AMDGPU_RAS_GPU_ERR_HBM_BIST_TEST(boot_error)) + dev_info(adev->dev, "socket: %d, aid: %d, hbm: %d, hbm bist test failed\n", + socket_id, aid_id, hbm_id); +} + +static int amdgpu_ras_wait_for_boot_complete(struct amdgpu_device *adev, + u32 instance, u32 *boot_error) +{ + u32 reg_addr; + u32 reg_data; + int retry_loop; + + reg_addr = (mmMP0_SMN_C2PMSG_92 << 2) + + aqua_vanjaram_encode_ext_smn_addressing(instance); + + for (retry_loop = 0; retry_loop < AMDGPU_RAS_BOOT_STATUS_POLLING_LIMIT; retry_loop++) { + reg_data = amdgpu_device_indirect_rreg_ext(adev, reg_addr); + if ((reg_data & AMDGPU_RAS_BOOT_STATUS_MASK) == AMDGPU_RAS_BOOT_STEADY_STATUS) { + *boot_error = AMDGPU_RAS_BOOT_SUCEESS; + return 0; + } + msleep(1); + } + + /* The pattern for smn addressing in other SOC could be different from + * the one for aqua_vanjaram. We should revisit the code if the pattern + * is changed. In such case, replace the aqua_vanjaram implementation + * with more common helper */ + reg_addr = (mmMP0_SMN_C2PMSG_126 << 2) + + aqua_vanjaram_encode_ext_smn_addressing(instance); + + for (retry_loop = 0; retry_loop < AMDGPU_RAS_BOOT_STATUS_POLLING_LIMIT; retry_loop++) { + reg_data = amdgpu_device_indirect_rreg_ext(adev, reg_addr); + if (AMDGPU_RAS_GPU_ERR_BOOT_STATUS(reg_data)) { + *boot_error = reg_data; + return 0; + } + msleep(1); + } + + *boot_error = reg_data; + return -ETIME; +} + +void amdgpu_ras_query_boot_status(struct amdgpu_device *adev, u32 num_instances) +{ + u32 boot_error = 0; + u32 i; + + for (i = 0; i < num_instances; i++) { + if (amdgpu_ras_wait_for_boot_complete(adev, i, &boot_error)) + amdgpu_ras_boot_time_error_reporting(adev, i, boot_error); + } +} + +int amdgpu_ras_reserve_page(struct amdgpu_device *adev, uint64_t pfn) +{ + struct amdgpu_ras *con = amdgpu_ras_get_context(adev); + struct amdgpu_vram_mgr *mgr = &adev->mman.vram_mgr; + uint64_t start = pfn << AMDGPU_GPU_PAGE_SHIFT; + int ret = 0; + + mutex_lock(&con->page_rsv_lock); + ret = amdgpu_vram_mgr_query_page_status(mgr, start); + if (ret == -ENOENT) + ret = amdgpu_vram_mgr_reserve_range(mgr, start, AMDGPU_GPU_PAGE_SIZE); + mutex_unlock(&con->page_rsv_lock); + + return ret; +} diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h index 76fb85628716..c8980d5f6540 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h @@ -26,12 +26,34 @@ #include <linux/debugfs.h> #include <linux/list.h> +#include <linux/kfifo.h> +#include <linux/radix-tree.h> +#include <linux/siphash.h> #include "ta_ras_if.h" #include "amdgpu_ras_eeprom.h" #include "amdgpu_smuio.h" +#include "amdgpu_aca.h" struct amdgpu_iv_entry; +#define AMDGPU_RAS_GPU_ERR_MEM_TRAINING(x) AMDGPU_GET_REG_FIELD(x, 0, 0) +#define AMDGPU_RAS_GPU_ERR_FW_LOAD(x) AMDGPU_GET_REG_FIELD(x, 1, 1) +#define AMDGPU_RAS_GPU_ERR_WAFL_LINK_TRAINING(x) AMDGPU_GET_REG_FIELD(x, 2, 2) +#define AMDGPU_RAS_GPU_ERR_XGMI_LINK_TRAINING(x) AMDGPU_GET_REG_FIELD(x, 3, 3) +#define AMDGPU_RAS_GPU_ERR_USR_CP_LINK_TRAINING(x) AMDGPU_GET_REG_FIELD(x, 4, 4) +#define AMDGPU_RAS_GPU_ERR_USR_DP_LINK_TRAINING(x) AMDGPU_GET_REG_FIELD(x, 5, 5) +#define AMDGPU_RAS_GPU_ERR_HBM_MEM_TEST(x) AMDGPU_GET_REG_FIELD(x, 6, 6) +#define AMDGPU_RAS_GPU_ERR_HBM_BIST_TEST(x) AMDGPU_GET_REG_FIELD(x, 7, 7) +#define AMDGPU_RAS_GPU_ERR_SOCKET_ID(x) AMDGPU_GET_REG_FIELD(x, 10, 8) +#define AMDGPU_RAS_GPU_ERR_AID_ID(x) AMDGPU_GET_REG_FIELD(x, 12, 11) +#define AMDGPU_RAS_GPU_ERR_HBM_ID(x) AMDGPU_GET_REG_FIELD(x, 13, 13) +#define AMDGPU_RAS_GPU_ERR_BOOT_STATUS(x) AMDGPU_GET_REG_FIELD(x, 31, 31) + +#define AMDGPU_RAS_BOOT_STATUS_POLLING_LIMIT 1000 +#define AMDGPU_RAS_BOOT_STEADY_STATUS 0xBA +#define AMDGPU_RAS_BOOT_STATUS_MASK 0xFF +#define AMDGPU_RAS_BOOT_SUCEESS 0x80000000 + #define AMDGPU_RAS_FLAG_INIT_BY_VBIOS (0x1 << 0) /* position of instance value in sub_block_index of * ta_ras_trigger_error_input, the sub block uses lower 12 bits @@ -39,6 +61,20 @@ struct amdgpu_iv_entry; #define AMDGPU_RAS_INST_MASK 0xfffff000 #define AMDGPU_RAS_INST_SHIFT 0xc +#define AMDGPU_RAS_FEATURES_SOCKETID_SHIFT 29 +#define AMDGPU_RAS_FEATURES_SOCKETID_MASK 0xe0000000 + +/* The high three bits indicates socketid */ +#define AMDGPU_RAS_GET_FEATURES(val) ((val) & ~AMDGPU_RAS_FEATURES_SOCKETID_MASK) + +#define RAS_EVENT_LOG(_adev, _id, _fmt, ...) \ +do { \ + if (amdgpu_ras_event_id_is_valid((_adev), (_id))) \ + dev_info((_adev)->dev, "{%llu}" _fmt, (_id), ##__VA_ARGS__); \ + else \ + dev_info((_adev)->dev, _fmt, ##__VA_ARGS__); \ +} while (0) + enum amdgpu_ras_block { AMDGPU_RAS_BLOCK__UMC = 0, AMDGPU_RAS_BLOCK__SDMA, @@ -57,6 +93,8 @@ enum amdgpu_ras_block { AMDGPU_RAS_BLOCK__MCA, AMDGPU_RAS_BLOCK__VCN, AMDGPU_RAS_BLOCK__JPEG, + AMDGPU_RAS_BLOCK__IH, + AMDGPU_RAS_BLOCK__MPIO, AMDGPU_RAS_BLOCK__LAST }; @@ -392,6 +430,52 @@ struct umc_ecc_info { int record_ce_addr_supported; }; +enum ras_event_type { + RAS_EVENT_TYPE_INVALID = -1, + RAS_EVENT_TYPE_ISR = 0, + RAS_EVENT_TYPE_COUNT, +}; + +struct ras_event_manager { + atomic64_t seqnos[RAS_EVENT_TYPE_COUNT]; +}; + +struct ras_query_context { + enum ras_event_type type; + u64 event_id; +}; + +typedef int (*pasid_notify)(struct amdgpu_device *adev, + uint16_t pasid, void *data); + +struct ras_poison_msg { + enum amdgpu_ras_block block; + uint16_t pasid; + uint32_t reset; + pasid_notify pasid_fn; + void *data; +}; + +struct ras_err_pages { + uint32_t count; + uint64_t *pfn; +}; + +struct ras_ecc_err { + u64 hash_index; + uint64_t status; + uint64_t ipid; + uint64_t addr; + struct ras_err_pages err_pages; +}; + +struct ras_ecc_log_info { + struct mutex lock; + siphash_key_t ecc_key; + struct radix_tree_root de_page_tree; + bool de_updated; +}; + struct amdgpu_ras { /* ras infrastructure */ /* for ras itself. */ @@ -441,10 +525,27 @@ struct amdgpu_ras { /* Indicates smu whether need update bad channel info */ bool update_channel_flag; /* Record status of smu mca debug mode */ - bool is_mca_debug_mode; + bool is_aca_debug_mode; /* Record special requirements of gpu reset caller */ uint32_t gpu_reset_flags; + + struct task_struct *page_retirement_thread; + wait_queue_head_t page_retirement_wq; + struct mutex page_retirement_lock; + atomic_t page_retirement_req_cnt; + struct mutex page_rsv_lock; + DECLARE_KFIFO(poison_fifo, struct ras_poison_msg, 128); + struct ras_ecc_log_info umc_ecc_log; + struct delayed_work page_retirement_dwork; + + /* Fatal error detected flag */ + atomic_t fed; + + /* RAS event manager */ + struct ras_event_manager __event_mgr; + struct ras_event_manager *event_mgr; + }; struct ras_fs_data { @@ -453,6 +554,7 @@ struct ras_fs_data { }; struct ras_err_addr { + struct list_head node; uint64_t err_status; uint64_t err_ipid; uint64_t err_addr; @@ -462,7 +564,8 @@ struct ras_err_info { struct amdgpu_smuio_mcm_config_info mcm_info; u64 ce_count; u64 ue_count; - struct ras_err_addr err_addr; + u64 de_count; + struct list_head err_addr_list; }; struct ras_err_node { @@ -473,8 +576,10 @@ struct ras_err_node { struct ras_err_data { unsigned long ue_count; unsigned long ce_count; + unsigned long de_count; unsigned long err_addr_cnt; struct eeprom_table_record *err_addr; + unsigned long err_addr_len; u32 err_list_count; struct list_head err_node_list; }; @@ -529,6 +634,8 @@ struct ras_manager { struct ras_ih_data ih_data; struct ras_err_data err_data; + + struct aca_handle aca_handle; }; struct ras_badpage { @@ -548,6 +655,7 @@ struct ras_query_if { struct ras_common_if head; unsigned long ue_count; unsigned long ce_count; + unsigned long de_count; }; struct ras_inject_if { @@ -781,7 +889,8 @@ struct amdgpu_ras* amdgpu_ras_get_context(struct amdgpu_device *adev); int amdgpu_ras_set_context(struct amdgpu_device *adev, struct amdgpu_ras *ras_con); int amdgpu_ras_set_mca_debug_mode(struct amdgpu_device *adev, bool enable); -bool amdgpu_ras_get_mca_debug_mode(struct amdgpu_device *adev); +int amdgpu_ras_set_aca_debug_mode(struct amdgpu_device *adev, bool enable); +bool amdgpu_ras_get_aca_debug_mode(struct amdgpu_device *adev); bool amdgpu_ras_get_error_query_mode(struct amdgpu_device *adev, unsigned int *mode); @@ -818,5 +927,33 @@ int amdgpu_ras_error_statistic_ce_count(struct ras_err_data *err_data, int amdgpu_ras_error_statistic_ue_count(struct ras_err_data *err_data, struct amdgpu_smuio_mcm_config_info *mcm_info, struct ras_err_addr *err_addr, u64 count); +int amdgpu_ras_error_statistic_de_count(struct ras_err_data *err_data, + struct amdgpu_smuio_mcm_config_info *mcm_info, + struct ras_err_addr *err_addr, u64 count); +void amdgpu_ras_query_boot_status(struct amdgpu_device *adev, u32 num_instances); +int amdgpu_ras_bind_aca(struct amdgpu_device *adev, enum amdgpu_ras_block blk, + const struct aca_info *aca_info, void *data); +int amdgpu_ras_unbind_aca(struct amdgpu_device *adev, enum amdgpu_ras_block blk); + +ssize_t amdgpu_ras_aca_sysfs_read(struct device *dev, struct device_attribute *attr, + struct aca_handle *handle, char *buf, void *data); + +void amdgpu_ras_add_mca_err_addr(struct ras_err_info *err_info, + struct ras_err_addr *err_addr); + +void amdgpu_ras_del_mca_err_addr(struct ras_err_info *err_info, + struct ras_err_addr *mca_err_addr); + +void amdgpu_ras_set_fed(struct amdgpu_device *adev, bool status); +bool amdgpu_ras_get_fed_status(struct amdgpu_device *adev); + +bool amdgpu_ras_event_id_is_valid(struct amdgpu_device *adev, u64 id); +u64 amdgpu_ras_acquire_event_id(struct amdgpu_device *adev, enum ras_event_type type); + +int amdgpu_ras_reserve_page(struct amdgpu_device *adev, uint64_t pfn); + +int amdgpu_ras_put_poison_req(struct amdgpu_device *adev, + enum amdgpu_ras_block block, uint16_t pasid, + pasid_notify pasid_fn, void *data, uint32_t reset); #endif diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras_eeprom.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras_eeprom.c index 2fde93b00cab..06a62a8a992e 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras_eeprom.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras_eeprom.c @@ -404,6 +404,22 @@ static int amdgpu_ras_eeprom_correct_header_tag( return res; } +static void amdgpu_ras_set_eeprom_table_version(struct amdgpu_ras_eeprom_control *control) +{ + struct amdgpu_device *adev = to_amdgpu_device(control); + struct amdgpu_ras_eeprom_table_header *hdr = &control->tbl_hdr; + + switch (amdgpu_ip_version(adev, UMC_HWIP, 0)) { + case IP_VERSION(8, 10, 0): + case IP_VERSION(12, 0, 0): + hdr->version = RAS_TABLE_VER_V2_1; + return; + default: + hdr->version = RAS_TABLE_VER_V1; + return; + } +} + /** * amdgpu_ras_eeprom_reset_table -- Reset the RAS EEPROM table * @control: pointer to control structure @@ -423,11 +439,7 @@ int amdgpu_ras_eeprom_reset_table(struct amdgpu_ras_eeprom_control *control) mutex_lock(&control->ras_tbl_mutex); hdr->header = RAS_TABLE_HDR_VAL; - if (adev->umc.ras && - adev->umc.ras->set_eeprom_table_version) - adev->umc.ras->set_eeprom_table_version(hdr); - else - hdr->version = RAS_TABLE_VER_V1; + amdgpu_ras_set_eeprom_table_version(control); if (hdr->version == RAS_TABLE_VER_V2_1) { hdr->first_rec_offset = RAS_RECORD_START_V2_1; @@ -735,6 +747,9 @@ amdgpu_ras_eeprom_update_header(struct amdgpu_ras_eeprom_control *control) control->tbl_rai.rma_status = GPU_RETIRED__ECC_REACH_THRESHOLD; control->tbl_rai.health_percent = 0; } + + /* ignore the -ENOTSUPP return value */ + amdgpu_dpm_send_rma_reason(adev); } if (control->tbl_hdr.version == RAS_TABLE_VER_V2_1) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_res_cursor.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_res_cursor.h index 381101d2bf05..50fcd86e1033 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_res_cursor.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_res_cursor.h @@ -164,4 +164,29 @@ static inline void amdgpu_res_next(struct amdgpu_res_cursor *cur, uint64_t size) } } +/** + * amdgpu_res_cleared - check if blocks are cleared + * + * @cur: the cursor to extract the block + * + * Check if the @cur block is cleared + */ +static inline bool amdgpu_res_cleared(struct amdgpu_res_cursor *cur) +{ + struct drm_buddy_block *block; + + switch (cur->mem_type) { + case TTM_PL_VRAM: + block = cur->node; + + if (!amdgpu_vram_mgr_is_cleared(block)) + return false; + break; + default: + return false; + } + + return true; +} + #endif diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_reset.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_reset.c index 4baa300121d8..ea4873f6ccd1 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_reset.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_reset.c @@ -21,9 +21,6 @@ * */ -#include <linux/devcoredump.h> -#include <generated/utsrelease.h> - #include "amdgpu_reset.h" #include "aldebaran.h" #include "sienna_cichlid.h" @@ -161,83 +158,3 @@ void amdgpu_device_unlock_reset_domain(struct amdgpu_reset_domain *reset_domain) atomic_set(&reset_domain->in_gpu_reset, 0); up_write(&reset_domain->sem); } - -#ifndef CONFIG_DEV_COREDUMP -void amdgpu_coredump(struct amdgpu_device *adev, bool vram_lost, - struct amdgpu_reset_context *reset_context) -{ -} -#else -static ssize_t -amdgpu_devcoredump_read(char *buffer, loff_t offset, size_t count, - void *data, size_t datalen) -{ - struct drm_printer p; - struct amdgpu_coredump_info *coredump = data; - struct drm_print_iterator iter; - int i; - - iter.data = buffer; - iter.offset = 0; - iter.start = offset; - iter.remain = count; - - p = drm_coredump_printer(&iter); - - drm_printf(&p, "**** AMDGPU Device Coredump ****\n"); - drm_printf(&p, "version: " AMDGPU_COREDUMP_VERSION "\n"); - drm_printf(&p, "kernel: " UTS_RELEASE "\n"); - drm_printf(&p, "module: " KBUILD_MODNAME "\n"); - drm_printf(&p, "time: %lld.%09ld\n", coredump->reset_time.tv_sec, - coredump->reset_time.tv_nsec); - - if (coredump->reset_task_info.pid) - drm_printf(&p, "process_name: %s PID: %d\n", - coredump->reset_task_info.process_name, - coredump->reset_task_info.pid); - - if (coredump->reset_vram_lost) - drm_printf(&p, "VRAM is lost due to GPU reset!\n"); - if (coredump->adev->reset_info.num_regs) { - drm_printf(&p, "AMDGPU register dumps:\nOffset: Value:\n"); - - for (i = 0; i < coredump->adev->reset_info.num_regs; i++) - drm_printf(&p, "0x%08x: 0x%08x\n", - coredump->adev->reset_info.reset_dump_reg_list[i], - coredump->adev->reset_info.reset_dump_reg_value[i]); - } - - return count - iter.remain; -} - -static void amdgpu_devcoredump_free(void *data) -{ - kfree(data); -} - -void amdgpu_coredump(struct amdgpu_device *adev, bool vram_lost, - struct amdgpu_reset_context *reset_context) -{ - struct amdgpu_coredump_info *coredump; - struct drm_device *dev = adev_to_drm(adev); - - coredump = kzalloc(sizeof(*coredump), GFP_NOWAIT); - - if (!coredump) { - DRM_ERROR("%s: failed to allocate memory for coredump\n", __func__); - return; - } - - coredump->reset_vram_lost = vram_lost; - - if (reset_context->job && reset_context->job->vm) - coredump->reset_task_info = reset_context->job->vm->task_info; - - coredump->adev = adev; - - ktime_get_ts64(&coredump->reset_time); - - dev_coredumpm(dev->dev, THIS_MODULE, coredump, 0, GFP_NOWAIT, - amdgpu_devcoredump_read, amdgpu_devcoredump_free); -} -#endif diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_reset.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_reset.h index 19899f6b9b2b..b11d190ece53 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_reset.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_reset.h @@ -32,6 +32,7 @@ enum AMDGPU_RESET_FLAGS { AMDGPU_NEED_FULL_RESET = 0, AMDGPU_SKIP_HW_RESET = 1, + AMDGPU_SKIP_COREDUMP = 2, }; struct amdgpu_reset_context { @@ -88,18 +89,6 @@ struct amdgpu_reset_domain { atomic_t reset_res; }; -#ifdef CONFIG_DEV_COREDUMP - -#define AMDGPU_COREDUMP_VERSION "1" - -struct amdgpu_coredump_info { - struct amdgpu_device *adev; - struct amdgpu_task_info reset_task_info; - struct timespec64 reset_time; - bool reset_vram_lost; -}; -#endif - int amdgpu_reset_init(struct amdgpu_device *adev); int amdgpu_reset_fini(struct amdgpu_device *adev); @@ -140,9 +129,6 @@ void amdgpu_device_lock_reset_domain(struct amdgpu_reset_domain *reset_domain); void amdgpu_device_unlock_reset_domain(struct amdgpu_reset_domain *reset_domain); -void amdgpu_coredump(struct amdgpu_device *adev, bool vram_lost, - struct amdgpu_reset_context *reset_context); - #define for_each_handler(i, handler, reset_ctl) \ for (i = 0; (i < AMDGPU_RESET_MAX_HANDLERS) && \ (handler = (*reset_ctl->reset_handlers)[i]); \ diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c index 5505d646f43a..06f0a6534a94 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c @@ -524,46 +524,58 @@ static ssize_t amdgpu_debugfs_mqd_read(struct file *f, char __user *buf, { struct amdgpu_ring *ring = file_inode(f)->i_private; volatile u32 *mqd; - int r; + u32 *kbuf; + int r, i; uint32_t value, result; if (*pos & 3 || size & 3) return -EINVAL; - result = 0; + kbuf = kmalloc(ring->mqd_size, GFP_KERNEL); + if (!kbuf) + return -ENOMEM; r = amdgpu_bo_reserve(ring->mqd_obj, false); if (unlikely(r != 0)) - return r; + goto err_free; r = amdgpu_bo_kmap(ring->mqd_obj, (void **)&mqd); - if (r) { - amdgpu_bo_unreserve(ring->mqd_obj); - return r; - } + if (r) + goto err_unreserve; + /* + * Copy to local buffer to avoid put_user(), which might fault + * and acquire mmap_sem, under reservation_ww_class_mutex. + */ + for (i = 0; i < ring->mqd_size/sizeof(u32); i++) + kbuf[i] = mqd[i]; + + amdgpu_bo_kunmap(ring->mqd_obj); + amdgpu_bo_unreserve(ring->mqd_obj); + + result = 0; while (size) { if (*pos >= ring->mqd_size) - goto done; + break; - value = mqd[*pos/4]; + value = kbuf[*pos/4]; r = put_user(value, (uint32_t *)buf); if (r) - goto done; + goto err_free; buf += 4; result += 4; size -= 4; *pos += 4; } -done: - amdgpu_bo_kunmap(ring->mqd_obj); - mqd = NULL; - amdgpu_bo_unreserve(ring->mqd_obj); - if (r) - return r; - + kfree(kbuf); return result; + +err_unreserve: + amdgpu_bo_unreserve(ring->mqd_obj); +err_free: + kfree(kbuf); + return r; } static const struct file_operations amdgpu_debugfs_mqd_fops = { diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h index fe1a61eb6e4c..582053f1cd56 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h @@ -209,8 +209,7 @@ struct amdgpu_ring_funcs { void (*insert_end)(struct amdgpu_ring *ring); /* pad the indirect buffer to the necessary number of dw */ void (*pad_ib)(struct amdgpu_ring *ring, struct amdgpu_ib *ib); - unsigned (*init_cond_exec)(struct amdgpu_ring *ring); - void (*patch_cond_exec)(struct amdgpu_ring *ring, unsigned offset); + unsigned (*init_cond_exec)(struct amdgpu_ring *ring, uint64_t addr); /* note usage for clock and power gating */ void (*begin_use)(struct amdgpu_ring *ring); void (*end_use)(struct amdgpu_ring *ring); @@ -286,6 +285,9 @@ struct amdgpu_ring { unsigned cond_exe_offs; u64 cond_exe_gpu_addr; volatile u32 *cond_exe_cpu_addr; + unsigned int set_q_mode_offs; + volatile u32 *set_q_mode_ptr; + u64 set_q_mode_token; unsigned vm_hub; unsigned vm_inv_eng; struct dma_fence *vmid_wait; @@ -327,8 +329,7 @@ struct amdgpu_ring { #define amdgpu_ring_emit_reg_write_reg_wait(r, d0, d1, v, m) (r)->funcs->emit_reg_write_reg_wait((r), (d0), (d1), (v), (m)) #define amdgpu_ring_emit_frame_cntl(r, b, s) (r)->funcs->emit_frame_cntl((r), (b), (s)) #define amdgpu_ring_pad_ib(r, ib) ((r)->funcs->pad_ib((r), (ib))) -#define amdgpu_ring_init_cond_exec(r) (r)->funcs->init_cond_exec((r)) -#define amdgpu_ring_patch_cond_exec(r,o) (r)->funcs->patch_cond_exec((r),(o)) +#define amdgpu_ring_init_cond_exec(r, a) (r)->funcs->init_cond_exec((r), (a)) #define amdgpu_ring_preempt_ib(r) (r)->funcs->preempt_ib(r) #define amdgpu_ring_patch_cntl(r, o) ((r)->funcs->patch_cntl((r), (o))) #define amdgpu_ring_patch_ce(r, o) ((r)->funcs->patch_ce((r), (o))) @@ -411,6 +412,30 @@ static inline void amdgpu_ring_write_multiple(struct amdgpu_ring *ring, ring->count_dw -= count_dw; } +/** + * amdgpu_ring_patch_cond_exec - patch dw count of conditional execute + * @ring: amdgpu_ring structure + * @offset: offset returned by amdgpu_ring_init_cond_exec + * + * Calculate the dw count and patch it into a cond_exec command. + */ +static inline void amdgpu_ring_patch_cond_exec(struct amdgpu_ring *ring, + unsigned int offset) +{ + unsigned cur; + + if (!ring->funcs->init_cond_exec) + return; + + WARN_ON(offset > ring->buf_mask); + WARN_ON(ring->ring[offset] != 0); + + cur = (ring->wptr - 1) & ring->buf_mask; + if (cur < offset) + cur += ring->ring_size >> 2; + ring->ring[offset] = cur - offset; +} + #define amdgpu_mes_ctx_get_offs_gpu_addr(ring, offset) \ (ring->is_mes_queue && ring->mes_ctx ? \ (ring->mes_ctx->meta_data_gpu_addr + offset) : 0) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring_mux.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring_mux.c index e1ee1c7117fb..d234b7ccfaaf 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring_mux.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring_mux.c @@ -159,9 +159,7 @@ int amdgpu_ring_mux_init(struct amdgpu_ring_mux *mux, struct amdgpu_ring *ring, mux->ring_entry_size = entry_size; mux->s_resubmit = false; - amdgpu_mux_chunk_slab = kmem_cache_create("amdgpu_mux_chunk", - sizeof(struct amdgpu_mux_chunk), 0, - SLAB_HWCACHE_ALIGN, NULL); + amdgpu_mux_chunk_slab = KMEM_CACHE(amdgpu_mux_chunk, SLAB_HWCACHE_ALIGN); if (!amdgpu_mux_chunk_slab) { DRM_ERROR("create amdgpu_mux_chunk cache failed\n"); return -ENOMEM; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_rlc.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_rlc.c index 2c3675d91614..db5791e1a7ce 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_rlc.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_rlc.c @@ -241,7 +241,7 @@ void amdgpu_gfx_rlc_setup_cp_table(struct amdgpu_device *adev) table_size = le32_to_cpu(hdr->jt_size); } - for (i = 0; i < table_size; i ++) { + for (i = 0; i < table_size; i++) { dst_ptr[bo_offset + i] = cpu_to_le32(le32_to_cpu(fw_data[table_offset + i])); } diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_rlc.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_rlc.h index b591d33af264..5a17e0ff2ab8 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_rlc.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_rlc.h @@ -169,7 +169,7 @@ struct amdgpu_rlc_funcs { void (*stop)(struct amdgpu_device *adev); void (*reset)(struct amdgpu_device *adev); void (*start)(struct amdgpu_device *adev); - void (*update_spm_vmid)(struct amdgpu_device *adev, unsigned vmid); + void (*update_spm_vmid)(struct amdgpu_device *adev, struct amdgpu_ring *ring, unsigned vmid); bool (*is_rlcg_access_range)(struct amdgpu_device *adev, uint32_t reg); }; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_sdma.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_sdma.h index 173a2a308078..b51a82e711df 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_sdma.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_sdma.h @@ -132,7 +132,7 @@ struct amdgpu_buffer_funcs { uint64_t dst_offset, /* number of byte to transfer */ uint32_t byte_count, - bool tmz); + uint32_t copy_flags); /* maximum bytes in a single operation */ uint32_t fill_max_bytes; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_seq64.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_seq64.c index 7a6a67275404..e22cb2b5cd92 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_seq64.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_seq64.c @@ -36,13 +36,24 @@ */ /** + * amdgpu_seq64_get_va_base - Get the seq64 va base address + * + * @adev: amdgpu_device pointer + * + * Returns: + * va base address on success + */ +static inline u64 amdgpu_seq64_get_va_base(struct amdgpu_device *adev) +{ + return AMDGPU_VA_RESERVED_SEQ64_START(adev); +} + +/** * amdgpu_seq64_map - Map the seq64 memory to VM * * @adev: amdgpu_device pointer * @vm: vm pointer * @bo_va: bo_va pointer - * @seq64_addr: seq64 vaddr start address - * @size: seq64 pool size * * Map the seq64 memory to the given VM. * @@ -50,11 +61,11 @@ * 0 on success or a negative error code on failure */ int amdgpu_seq64_map(struct amdgpu_device *adev, struct amdgpu_vm *vm, - struct amdgpu_bo_va **bo_va, u64 seq64_addr, - uint32_t size) + struct amdgpu_bo_va **bo_va) { struct amdgpu_bo *bo; struct drm_exec exec; + u64 seq64_addr; int r; bo = adev->seq64.sbo; @@ -77,9 +88,9 @@ int amdgpu_seq64_map(struct amdgpu_device *adev, struct amdgpu_vm *vm, goto error; } - r = amdgpu_vm_bo_map(adev, *bo_va, seq64_addr, 0, size, - AMDGPU_PTE_READABLE | AMDGPU_PTE_WRITEABLE | - AMDGPU_PTE_EXECUTABLE); + seq64_addr = amdgpu_seq64_get_va_base(adev); + r = amdgpu_vm_bo_map(adev, *bo_va, seq64_addr, 0, AMDGPU_VA_RESERVED_SEQ64_SIZE, + AMDGPU_PTE_READABLE); if (r) { DRM_ERROR("failed to do bo_map on userq sem, err=%d\n", r); amdgpu_vm_bo_del(adev, *bo_va); @@ -144,31 +155,25 @@ error: * amdgpu_seq64_alloc - Allocate a 64 bit memory * * @adev: amdgpu_device pointer - * @gpu_addr: allocated gpu VA start address - * @cpu_addr: allocated cpu VA start address + * @va: VA to access the seq in process address space + * @cpu_addr: CPU address to access the seq * * Alloc a 64 bit memory from seq64 pool. * * Returns: * 0 on success or a negative error code on failure */ -int amdgpu_seq64_alloc(struct amdgpu_device *adev, u64 *gpu_addr, - u64 **cpu_addr) +int amdgpu_seq64_alloc(struct amdgpu_device *adev, u64 *va, u64 **cpu_addr) { unsigned long bit_pos; - u32 offset; bit_pos = find_first_zero_bit(adev->seq64.used, adev->seq64.num_sem); + if (bit_pos >= adev->seq64.num_sem) + return -ENOSPC; - if (bit_pos < adev->seq64.num_sem) { - __set_bit(bit_pos, adev->seq64.used); - offset = bit_pos << 6; /* convert to qw offset */ - } else { - return -EINVAL; - } - - *gpu_addr = offset + AMDGPU_SEQ64_VADDR_START; - *cpu_addr = offset + adev->seq64.cpu_base_addr; + __set_bit(bit_pos, adev->seq64.used); + *va = bit_pos * sizeof(u64) + amdgpu_seq64_get_va_base(adev); + *cpu_addr = bit_pos + adev->seq64.cpu_base_addr; return 0; } @@ -177,20 +182,17 @@ int amdgpu_seq64_alloc(struct amdgpu_device *adev, u64 *gpu_addr, * amdgpu_seq64_free - Free the given 64 bit memory * * @adev: amdgpu_device pointer - * @gpu_addr: gpu start address to be freed + * @va: gpu start address to be freed * * Free the given 64 bit memory from seq64 pool. - * */ -void amdgpu_seq64_free(struct amdgpu_device *adev, u64 gpu_addr) +void amdgpu_seq64_free(struct amdgpu_device *adev, u64 va) { - u32 offset; - - offset = gpu_addr - AMDGPU_SEQ64_VADDR_START; + unsigned long bit_pos; - offset >>= 6; - if (offset < adev->seq64.num_sem) - __clear_bit(offset, adev->seq64.used); + bit_pos = (va - amdgpu_seq64_get_va_base(adev)) / sizeof(u64); + if (bit_pos < adev->seq64.num_sem) + __clear_bit(bit_pos, adev->seq64.used); } /** @@ -229,7 +231,7 @@ int amdgpu_seq64_init(struct amdgpu_device *adev) * AMDGPU_MAX_SEQ64_SLOTS * sizeof(u64) * 8 = AMDGPU_MAX_SEQ64_SLOTS * 64bit slots */ - r = amdgpu_bo_create_kernel(adev, AMDGPU_SEQ64_SIZE, + r = amdgpu_bo_create_kernel(adev, AMDGPU_VA_RESERVED_SEQ64_SIZE, PAGE_SIZE, AMDGPU_GEM_DOMAIN_GTT, &adev->seq64.sbo, NULL, (void **)&adev->seq64.cpu_base_addr); @@ -238,7 +240,7 @@ int amdgpu_seq64_init(struct amdgpu_device *adev) return r; } - memset(adev->seq64.cpu_base_addr, 0, AMDGPU_SEQ64_SIZE); + memset(adev->seq64.cpu_base_addr, 0, AMDGPU_VA_RESERVED_SEQ64_SIZE); adev->seq64.num_sem = AMDGPU_MAX_SEQ64_SLOTS; memset(&adev->seq64.used, 0, sizeof(adev->seq64.used)); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_seq64.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_seq64.h index 2196e72be508..4203b2ab318d 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_seq64.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_seq64.h @@ -25,10 +25,9 @@ #ifndef __AMDGPU_SEQ64_H__ #define __AMDGPU_SEQ64_H__ -#define AMDGPU_SEQ64_SIZE (2ULL << 20) -#define AMDGPU_MAX_SEQ64_SLOTS (AMDGPU_SEQ64_SIZE / (sizeof(u64) * 8)) -#define AMDGPU_SEQ64_VADDR_OFFSET 0x50000 -#define AMDGPU_SEQ64_VADDR_START (AMDGPU_VA_RESERVED_SIZE + AMDGPU_SEQ64_VADDR_OFFSET) +#include "amdgpu_vm.h" + +#define AMDGPU_MAX_SEQ64_SLOTS (AMDGPU_VA_RESERVED_SEQ64_SIZE / sizeof(u64)) struct amdgpu_seq64 { struct amdgpu_bo *sbo; @@ -42,7 +41,7 @@ int amdgpu_seq64_init(struct amdgpu_device *adev); int amdgpu_seq64_alloc(struct amdgpu_device *adev, u64 *gpu_addr, u64 **cpu_addr); void amdgpu_seq64_free(struct amdgpu_device *adev, u64 gpu_addr); int amdgpu_seq64_map(struct amdgpu_device *adev, struct amdgpu_vm *vm, - struct amdgpu_bo_va **bo_va, u64 seq64_addr, uint32_t size); + struct amdgpu_bo_va **bo_va); void amdgpu_seq64_unmap(struct amdgpu_device *adev, struct amdgpu_fpriv *fpriv); #endif diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_smuio.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_smuio.h index ff4435181055..ec9d12f85f39 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_smuio.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_smuio.h @@ -44,6 +44,7 @@ struct amdgpu_smuio_funcs { u32 (*get_socket_id)(struct amdgpu_device *adev); enum amdgpu_pkg_type (*get_pkg_type)(struct amdgpu_device *adev); bool (*is_host_gpu_xgmi_supported)(struct amdgpu_device *adev); + u64 (*get_gpu_clock_counter)(struct amdgpu_device *adev); }; struct amdgpu_smuio { diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_sync.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_sync.c index 1b013a44ca99..bdf1ef825d89 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_sync.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_sync.c @@ -441,9 +441,7 @@ void amdgpu_sync_free(struct amdgpu_sync *sync) */ int amdgpu_sync_init(void) { - amdgpu_sync_slab = kmem_cache_create( - "amdgpu_sync", sizeof(struct amdgpu_sync_entry), 0, - SLAB_HWCACHE_ALIGN, NULL); + amdgpu_sync_slab = KMEM_CACHE(amdgpu_sync_entry, SLAB_HWCACHE_ALIGN); if (!amdgpu_sync_slab) return -ENOMEM; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c index 75c9fd2c6c2a..e785f128411d 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c @@ -102,23 +102,19 @@ static void amdgpu_evict_flags(struct ttm_buffer_object *bo, /* Don't handle scatter gather BOs */ if (bo->type == ttm_bo_type_sg) { placement->num_placement = 0; - placement->num_busy_placement = 0; return; } /* Object isn't an AMDGPU object so ignore */ if (!amdgpu_bo_is_amdgpu_bo(bo)) { placement->placement = &placements; - placement->busy_placement = &placements; placement->num_placement = 1; - placement->num_busy_placement = 1; return; } abo = ttm_to_amdgpu_bo(bo); if (abo->flags & AMDGPU_GEM_CREATE_DISCARDABLE) { placement->num_placement = 0; - placement->num_busy_placement = 0; return; } @@ -128,16 +124,16 @@ static void amdgpu_evict_flags(struct ttm_buffer_object *bo, case AMDGPU_PL_OA: case AMDGPU_PL_DOORBELL: placement->num_placement = 0; - placement->num_busy_placement = 0; return; case TTM_PL_VRAM: if (!adev->mman.buffer_funcs_enabled) { /* Move to system memory */ amdgpu_bo_placement_from_domain(abo, AMDGPU_GEM_DOMAIN_CPU); + } else if (!amdgpu_gmc_vram_full_visible(&adev->gmc) && !(abo->flags & AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED) && - amdgpu_bo_in_cpu_visible_vram(abo)) { + amdgpu_res_cpu_visible(adev, bo->resource)) { /* Try evicting to the CPU inaccessible part of VRAM * first, but only set GTT as busy placement, so this @@ -149,8 +145,7 @@ static void amdgpu_evict_flags(struct ttm_buffer_object *bo, AMDGPU_GEM_DOMAIN_CPU); abo->placements[0].fpfn = adev->gmc.visible_vram_size >> PAGE_SHIFT; abo->placements[0].lpfn = 0; - abo->placement.busy_placement = &abo->placements[1]; - abo->placement.num_busy_placement = 1; + abo->placements[0].flags |= TTM_PL_FLAG_DESIRED; } else { /* Move to GTT memory */ amdgpu_bo_placement_from_domain(abo, AMDGPU_GEM_DOMAIN_GTT | @@ -241,7 +236,7 @@ static int amdgpu_ttm_map_buffer(struct ttm_buffer_object *bo, dst_addr = amdgpu_bo_gpu_offset(adev->gart.bo); dst_addr += window * AMDGPU_GTT_MAX_TRANSFER_SIZE * 8; amdgpu_emit_copy_buffer(adev, &job->ibs[0], src_addr, - dst_addr, num_bytes, false); + dst_addr, num_bytes, 0); amdgpu_ring_pad_ib(ring, &job->ibs[0]); WARN_ON(job->ibs[0].length_dw > num_dw); @@ -301,6 +296,8 @@ int amdgpu_ttm_copy_mem_to_mem(struct amdgpu_device *adev, struct dma_fence *fence = NULL; int r = 0; + uint32_t copy_flags = 0; + if (!adev->mman.buffer_funcs_enabled) { DRM_ERROR("Trying to move memory with ring turned off.\n"); return -EINVAL; @@ -328,8 +325,11 @@ int amdgpu_ttm_copy_mem_to_mem(struct amdgpu_device *adev, if (r) goto error; - r = amdgpu_copy_buffer(ring, from, to, cur_size, - resv, &next, false, true, tmz); + if (tmz) + copy_flags |= AMDGPU_COPY_FLAGS_TMZ; + + r = amdgpu_copy_buffer(ring, from, to, cur_size, resv, + &next, false, true, copy_flags); if (r) goto error; @@ -383,11 +383,12 @@ static int amdgpu_move_blit(struct ttm_buffer_object *bo, (abo->flags & AMDGPU_GEM_CREATE_VRAM_WIPE_ON_RELEASE)) { struct dma_fence *wipe_fence = NULL; - r = amdgpu_fill_buffer(abo, AMDGPU_POISON, NULL, &wipe_fence, - false); + r = amdgpu_fill_buffer(abo, 0, NULL, &wipe_fence, + false); if (r) { goto error; } else if (wipe_fence) { + amdgpu_vram_mgr_set_cleared(bo->resource); dma_fence_put(fence); fence = wipe_fence; } @@ -408,40 +409,55 @@ error: return r; } -/* - * amdgpu_mem_visible - Check that memory can be accessed by ttm_bo_move_memcpy +/** + * amdgpu_res_cpu_visible - Check that resource can be accessed by CPU + * @adev: amdgpu device + * @res: the resource to check * - * Called by amdgpu_bo_move() + * Returns: true if the full resource is CPU visible, false otherwise. */ -static bool amdgpu_mem_visible(struct amdgpu_device *adev, - struct ttm_resource *mem) +bool amdgpu_res_cpu_visible(struct amdgpu_device *adev, + struct ttm_resource *res) { - u64 mem_size = (u64)mem->size; struct amdgpu_res_cursor cursor; - u64 end; - if (mem->mem_type == TTM_PL_SYSTEM || - mem->mem_type == TTM_PL_TT) + if (!res) + return false; + + if (res->mem_type == TTM_PL_SYSTEM || res->mem_type == TTM_PL_TT || + res->mem_type == AMDGPU_PL_PREEMPT || res->mem_type == AMDGPU_PL_DOORBELL) return true; - if (mem->mem_type != TTM_PL_VRAM) + + if (res->mem_type != TTM_PL_VRAM) return false; - amdgpu_res_first(mem, 0, mem_size, &cursor); - end = cursor.start + cursor.size; + amdgpu_res_first(res, 0, res->size, &cursor); while (cursor.remaining) { + if ((cursor.start + cursor.size) > adev->gmc.visible_vram_size) + return false; amdgpu_res_next(&cursor, cursor.size); + } - if (!cursor.remaining) - break; + return true; +} - /* ttm_resource_ioremap only supports contiguous memory */ - if (end != cursor.start) - return false; +/* + * amdgpu_res_copyable - Check that memory can be accessed by ttm_bo_move_memcpy + * + * Called by amdgpu_bo_move() + */ +static bool amdgpu_res_copyable(struct amdgpu_device *adev, + struct ttm_resource *mem) +{ + if (!amdgpu_res_cpu_visible(adev, mem)) + return false; - end = cursor.start + cursor.size; - } + /* ttm_resource_ioremap only supports contiguous memory */ + if (mem->mem_type == TTM_PL_VRAM && + !(mem->placement & TTM_PL_FLAG_CONTIGUOUS)) + return false; - return end <= adev->gmc.visible_vram_size; + return true; } /* @@ -471,14 +487,16 @@ static int amdgpu_bo_move(struct ttm_buffer_object *bo, bool evict, if (!old_mem || (old_mem->mem_type == TTM_PL_SYSTEM && bo->ttm == NULL)) { + amdgpu_bo_move_notify(bo, evict, new_mem); ttm_bo_move_null(bo, new_mem); - goto out; + return 0; } if (old_mem->mem_type == TTM_PL_SYSTEM && (new_mem->mem_type == TTM_PL_TT || new_mem->mem_type == AMDGPU_PL_PREEMPT)) { + amdgpu_bo_move_notify(bo, evict, new_mem); ttm_bo_move_null(bo, new_mem); - goto out; + return 0; } if ((old_mem->mem_type == TTM_PL_TT || old_mem->mem_type == AMDGPU_PL_PREEMPT) && @@ -488,9 +506,10 @@ static int amdgpu_bo_move(struct ttm_buffer_object *bo, bool evict, return r; amdgpu_ttm_backend_unbind(bo->bdev, bo->ttm); + amdgpu_bo_move_notify(bo, evict, new_mem); ttm_resource_free(bo, &bo->resource); ttm_bo_assign_mem(bo, new_mem); - goto out; + return 0; } if (old_mem->mem_type == AMDGPU_PL_GDS || @@ -502,8 +521,9 @@ static int amdgpu_bo_move(struct ttm_buffer_object *bo, bool evict, new_mem->mem_type == AMDGPU_PL_OA || new_mem->mem_type == AMDGPU_PL_DOORBELL) { /* Nothing to save here */ + amdgpu_bo_move_notify(bo, evict, new_mem); ttm_bo_move_null(bo, new_mem); - goto out; + return 0; } if (bo->type == ttm_bo_type_device && @@ -515,27 +535,28 @@ static int amdgpu_bo_move(struct ttm_buffer_object *bo, bool evict, abo->flags &= ~AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED; } - if (adev->mman.buffer_funcs_enabled) { - if (((old_mem->mem_type == TTM_PL_SYSTEM && - new_mem->mem_type == TTM_PL_VRAM) || - (old_mem->mem_type == TTM_PL_VRAM && - new_mem->mem_type == TTM_PL_SYSTEM))) { - hop->fpfn = 0; - hop->lpfn = 0; - hop->mem_type = TTM_PL_TT; - hop->flags = TTM_PL_FLAG_TEMPORARY; - return -EMULTIHOP; - } + if (adev->mman.buffer_funcs_enabled && + ((old_mem->mem_type == TTM_PL_SYSTEM && + new_mem->mem_type == TTM_PL_VRAM) || + (old_mem->mem_type == TTM_PL_VRAM && + new_mem->mem_type == TTM_PL_SYSTEM))) { + hop->fpfn = 0; + hop->lpfn = 0; + hop->mem_type = TTM_PL_TT; + hop->flags = TTM_PL_FLAG_TEMPORARY; + return -EMULTIHOP; + } + amdgpu_bo_move_notify(bo, evict, new_mem); + if (adev->mman.buffer_funcs_enabled) r = amdgpu_move_blit(bo, evict, new_mem, old_mem); - } else { + else r = -ENODEV; - } if (r) { /* Check that all memory is CPU accessible */ - if (!amdgpu_mem_visible(adev, old_mem) || - !amdgpu_mem_visible(adev, new_mem)) { + if (!amdgpu_res_copyable(adev, old_mem) || + !amdgpu_res_copyable(adev, new_mem)) { pr_err("Move buffer fallback to memcpy unavailable\n"); return r; } @@ -545,11 +566,10 @@ static int amdgpu_bo_move(struct ttm_buffer_object *bo, bool evict, return r; } - trace_amdgpu_bo_move(abo, new_mem->mem_type, old_mem->mem_type); -out: - /* update statistics */ + /* update statistics after the move */ + if (evict) + atomic64_inc(&adev->num_evictions); atomic64_add(bo->base.size, &adev->num_bytes_moved); - amdgpu_bo_move_notify(bo, evict); return 0; } @@ -562,7 +582,6 @@ static int amdgpu_ttm_io_mem_reserve(struct ttm_device *bdev, struct ttm_resource *mem) { struct amdgpu_device *adev = amdgpu_ttm_adev(bdev); - size_t bus_size = (size_t)mem->size; switch (mem->mem_type) { case TTM_PL_SYSTEM: @@ -573,9 +592,6 @@ static int amdgpu_ttm_io_mem_reserve(struct ttm_device *bdev, break; case TTM_PL_VRAM: mem->bus.offset = mem->start << PAGE_SHIFT; - /* check if it's visible */ - if ((mem->bus.offset + bus_size) > adev->gmc.visible_vram_size) - return -EINVAL; if (adev->mman.aper_base_kaddr && mem->placement & TTM_PL_FLAG_CONTIGUOUS) @@ -869,6 +885,7 @@ static void amdgpu_ttm_gart_bind(struct amdgpu_device *adev, amdgpu_gart_bind(adev, gtt->offset, ttm->num_pages, gtt->ttm.dma_address, flags); } + gtt->bound = true; } /* @@ -966,8 +983,6 @@ int amdgpu_ttm_alloc_gart(struct ttm_buffer_object *bo) /* allocate GART space */ placement.num_placement = 1; placement.placement = &placements; - placement.num_busy_placement = 1; - placement.busy_placement = &placements; placements.fpfn = 0; placements.lpfn = adev->gmc.gart_size >> PAGE_SHIFT; placements.mem_type = TTM_PL_TT; @@ -1483,7 +1498,7 @@ static int amdgpu_ttm_access_memory_sdma(struct ttm_buffer_object *bo, swap(src_addr, dst_addr); amdgpu_emit_copy_buffer(adev, &job->ibs[0], src_addr, dst_addr, - PAGE_SIZE, false); + PAGE_SIZE, 0); amdgpu_ring_pad_ib(adev->mman.buffer_funcs_ring, &job->ibs[0]); WARN_ON(job->ibs[0].length_dw > num_dw); @@ -1554,7 +1569,7 @@ static int amdgpu_ttm_access_memory(struct ttm_buffer_object *bo, static void amdgpu_bo_delete_mem_notify(struct ttm_buffer_object *bo) { - amdgpu_bo_move_notify(bo, false); + amdgpu_bo_move_notify(bo, false, NULL); } static struct ttm_device_funcs amdgpu_bo_driver = { @@ -2134,7 +2149,7 @@ int amdgpu_copy_buffer(struct amdgpu_ring *ring, uint64_t src_offset, uint64_t dst_offset, uint32_t byte_count, struct dma_resv *resv, struct dma_fence **fence, bool direct_submit, - bool vm_needs_flush, bool tmz) + bool vm_needs_flush, uint32_t copy_flags) { struct amdgpu_device *adev = ring->adev; unsigned int num_loops, num_dw; @@ -2160,8 +2175,7 @@ int amdgpu_copy_buffer(struct amdgpu_ring *ring, uint64_t src_offset, uint32_t cur_size_in_bytes = min(byte_count, max_bytes); amdgpu_emit_copy_buffer(adev, &job->ibs[0], src_offset, - dst_offset, cur_size_in_bytes, tmz); - + dst_offset, cur_size_in_bytes, copy_flags); src_offset += cur_size_in_bytes; dst_offset += cur_size_in_bytes; byte_count -= cur_size_in_bytes; @@ -2221,6 +2235,71 @@ static int amdgpu_ttm_fill_mem(struct amdgpu_ring *ring, uint32_t src_data, return 0; } +/** + * amdgpu_ttm_clear_buffer - clear memory buffers + * @bo: amdgpu buffer object + * @resv: reservation object + * @fence: dma_fence associated with the operation + * + * Clear the memory buffer resource. + * + * Returns: + * 0 for success or a negative error code on failure. + */ +int amdgpu_ttm_clear_buffer(struct amdgpu_bo *bo, + struct dma_resv *resv, + struct dma_fence **fence) +{ + struct amdgpu_device *adev = amdgpu_ttm_adev(bo->tbo.bdev); + struct amdgpu_ring *ring = adev->mman.buffer_funcs_ring; + struct amdgpu_res_cursor cursor; + u64 addr; + int r; + + if (!adev->mman.buffer_funcs_enabled) + return -EINVAL; + + if (!fence) + return -EINVAL; + + *fence = dma_fence_get_stub(); + + amdgpu_res_first(bo->tbo.resource, 0, amdgpu_bo_size(bo), &cursor); + + mutex_lock(&adev->mman.gtt_window_lock); + while (cursor.remaining) { + struct dma_fence *next = NULL; + u64 size; + + if (amdgpu_res_cleared(&cursor)) { + amdgpu_res_next(&cursor, cursor.size); + continue; + } + + /* Never clear more than 256MiB at once to avoid timeouts */ + size = min(cursor.size, 256ULL << 20); + + r = amdgpu_ttm_map_buffer(&bo->tbo, bo->tbo.resource, &cursor, + 1, ring, false, &size, &addr); + if (r) + goto err; + + r = amdgpu_ttm_fill_mem(ring, 0, addr, size, resv, + &next, true, true); + if (r) + goto err; + + dma_fence_put(*fence); + *fence = next; + + amdgpu_res_next(&cursor, size); + } +err: + mutex_unlock(&adev->mman.gtt_window_lock); + + return r; +} + int amdgpu_fill_buffer(struct amdgpu_bo *bo, uint32_t src_data, struct dma_resv *resv, diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h index 65ec82141a8e..b6f53129dea3 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h @@ -38,8 +38,6 @@ #define AMDGPU_GTT_MAX_TRANSFER_SIZE 512 #define AMDGPU_GTT_NUM_TRANSFER_WINDOWS 2 -#define AMDGPU_POISON 0xd0bed0be - extern const struct attribute_group amdgpu_vram_mgr_attr_group; extern const struct attribute_group amdgpu_gtt_mgr_attr_group; @@ -111,6 +109,8 @@ struct amdgpu_copy_mem { unsigned long offset; }; +#define AMDGPU_COPY_FLAGS_TMZ (1 << 0) + int amdgpu_gtt_mgr_init(struct amdgpu_device *adev, uint64_t gtt_size); void amdgpu_gtt_mgr_fini(struct amdgpu_device *adev); int amdgpu_preempt_mgr_init(struct amdgpu_device *adev); @@ -139,6 +139,9 @@ int amdgpu_vram_mgr_reserve_range(struct amdgpu_vram_mgr *mgr, int amdgpu_vram_mgr_query_page_status(struct amdgpu_vram_mgr *mgr, uint64_t start); +bool amdgpu_res_cpu_visible(struct amdgpu_device *adev, + struct ttm_resource *res); + int amdgpu_ttm_init(struct amdgpu_device *adev); void amdgpu_ttm_fini(struct amdgpu_device *adev); void amdgpu_ttm_set_buffer_funcs_status(struct amdgpu_device *adev, @@ -148,13 +151,16 @@ int amdgpu_copy_buffer(struct amdgpu_ring *ring, uint64_t src_offset, uint64_t dst_offset, uint32_t byte_count, struct dma_resv *resv, struct dma_fence **fence, bool direct_submit, - bool vm_needs_flush, bool tmz); + bool vm_needs_flush, uint32_t copy_flags); int amdgpu_ttm_copy_mem_to_mem(struct amdgpu_device *adev, const struct amdgpu_copy_mem *src, const struct amdgpu_copy_mem *dst, uint64_t size, bool tmz, struct dma_resv *resv, struct dma_fence **f); +int amdgpu_ttm_clear_buffer(struct amdgpu_bo *bo, + struct dma_resv *resv, + struct dma_fence **fence); int amdgpu_fill_buffer(struct amdgpu_bo *bo, uint32_t src_data, struct dma_resv *resv, diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.c index 3e12763e477a..0867fd9e15ba 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.c @@ -556,6 +556,8 @@ amdgpu_ucode_get_load_type(struct amdgpu_device *adev, int load_type) default: if (!load_type) return AMDGPU_FW_LOAD_DIRECT; + else if (load_type == 3) + return AMDGPU_FW_LOAD_RLC_BACKDOOR_AUTO; else return AMDGPU_FW_LOAD_PSP; } @@ -678,6 +680,8 @@ const char *amdgpu_ucode_name(enum AMDGPU_UCODE_ID ucode_id) return "UMSCH_MM_DATA"; case AMDGPU_UCODE_ID_UMSCH_MM_CMD_BUFFER: return "UMSCH_MM_CMD_BUFFER"; + case AMDGPU_UCODE_ID_JPEG_RAM: + return "JPEG"; default: return "UNKNOWN UCODE"; } @@ -1060,7 +1064,8 @@ static int amdgpu_ucode_patch_jt(struct amdgpu_firmware_info *ucode, int amdgpu_ucode_create_bo(struct amdgpu_device *adev) { - if (adev->firmware.load_type != AMDGPU_FW_LOAD_DIRECT) { + if ((adev->firmware.load_type != AMDGPU_FW_LOAD_DIRECT) && + (adev->firmware.load_type != AMDGPU_FW_LOAD_RLC_BACKDOOR_AUTO)) { amdgpu_bo_create_kernel(adev, adev->firmware.fw_size, PAGE_SIZE, (amdgpu_sriov_vf(adev) || adev->debug_use_vram_fw_buf) ? AMDGPU_GEM_DOMAIN_VRAM : AMDGPU_GEM_DOMAIN_GTT, diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.h index 4244a13f9f22..105d4de0613a 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.h @@ -125,6 +125,7 @@ enum psp_fw_type { PSP_FW_TYPE_PSP_INTF_DRV, PSP_FW_TYPE_PSP_DBG_DRV, PSP_FW_TYPE_PSP_RAS_DRV, + PSP_FW_TYPE_PSP_IPKEYMGR_DRV, PSP_FW_TYPE_MAX_INDEX, }; @@ -511,6 +512,7 @@ enum AMDGPU_UCODE_ID { AMDGPU_UCODE_ID_UMSCH_MM_DATA, AMDGPU_UCODE_ID_UMSCH_MM_CMD_BUFFER, AMDGPU_UCODE_ID_P2S_TABLE, + AMDGPU_UCODE_ID_JPEG_RAM, AMDGPU_UCODE_ID_MAXIMUM, }; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_umc.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_umc.c index d65e21914d8c..540e0f066b26 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_umc.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_umc.c @@ -21,8 +21,12 @@ * */ +#include <linux/sort.h> #include "amdgpu.h" #include "umc_v6_7.h" +#define MAX_UMC_POISON_POLLING_TIME_SYNC 20 //ms + +#define MAX_UMC_HASH_STRING_SIZE 256 static int amdgpu_umc_convert_error_address(struct amdgpu_device *adev, struct ras_err_data *err_data, uint64_t err_addr, @@ -62,6 +66,8 @@ int amdgpu_umc_page_retirement_mca(struct amdgpu_device *adev, goto out_fini_err_data; } + err_data.err_addr_len = adev->umc.max_ras_err_cnt_per_query; + /* * Translate UMC channel address to Physical address */ @@ -85,18 +91,21 @@ out_fini_err_data: return ret; } -static int amdgpu_umc_do_page_retirement(struct amdgpu_device *adev, - void *ras_error_status, - struct amdgpu_iv_entry *entry, - bool reset) +void amdgpu_umc_handle_bad_pages(struct amdgpu_device *adev, + void *ras_error_status) { struct ras_err_data *err_data = (struct ras_err_data *)ras_error_status; struct amdgpu_ras *con = amdgpu_ras_get_context(adev); + unsigned int error_query_mode; int ret = 0; + unsigned long err_count; - kgd2kfd_set_sram_ecc_flag(adev->kfd.dev); + amdgpu_ras_get_error_query_mode(adev, &error_query_mode); + + mutex_lock(&con->page_retirement_lock); ret = amdgpu_dpm_get_ecc_info(adev, (void *)&(con->umc_ecc)); - if (ret == -EOPNOTSUPP) { + if (ret == -EOPNOTSUPP && + error_query_mode == AMDGPU_RAS_DIRECT_ERROR_QUERY) { if (adev->umc.ras && adev->umc.ras->ras_block.hw_ops && adev->umc.ras->ras_block.hw_ops->query_ras_error_count) adev->umc.ras->ras_block.hw_ops->query_ras_error_count(adev, ras_error_status); @@ -114,13 +123,16 @@ static int amdgpu_umc_do_page_retirement(struct amdgpu_device *adev, if(!err_data->err_addr) dev_warn(adev->dev, "Failed to alloc memory for " "umc error address record!\n"); + else + err_data->err_addr_len = adev->umc.max_ras_err_cnt_per_query; /* umc query_ras_error_address is also responsible for clearing * error status */ adev->umc.ras->ras_block.hw_ops->query_ras_error_address(adev, ras_error_status); } - } else if (!ret) { + } else if (error_query_mode == AMDGPU_RAS_FIRMWARE_ERROR_QUERY || + (!ret && error_query_mode == AMDGPU_RAS_DIRECT_ERROR_QUERY)) { if (adev->umc.ras && adev->umc.ras->ecc_info_query_ras_error_count) adev->umc.ras->ecc_info_query_ras_error_count(adev, ras_error_status); @@ -138,6 +150,8 @@ static int amdgpu_umc_do_page_retirement(struct amdgpu_device *adev, if(!err_data->err_addr) dev_warn(adev->dev, "Failed to alloc memory for " "umc error address record!\n"); + else + err_data->err_addr_len = adev->umc.max_ras_err_cnt_per_query; /* umc query_ras_error_address is also responsible for clearing * error status @@ -147,16 +161,13 @@ static int amdgpu_umc_do_page_retirement(struct amdgpu_device *adev, } /* only uncorrectable error needs gpu reset */ - if (err_data->ue_count) { - dev_info(adev->dev, "%ld uncorrectable hardware errors " - "detected in UMC block\n", - err_data->ue_count); - + if (err_data->ue_count || err_data->de_count) { + err_count = err_data->ue_count + err_data->de_count; if ((amdgpu_bad_page_threshold != 0) && err_data->err_addr_cnt) { amdgpu_ras_add_bad_pages(adev, err_data->err_addr, err_data->err_addr_cnt); - amdgpu_ras_save_bad_pages(adev, &(err_data->ue_count)); + amdgpu_ras_save_bad_pages(adev, &err_count); amdgpu_dpm_send_hbm_bad_pages_num(adev, con->eeprom_control.ras_num_recs); @@ -165,20 +176,86 @@ static int amdgpu_umc_do_page_retirement(struct amdgpu_device *adev, con->update_channel_flag = false; } } - - if (reset) { - /* use mode-2 reset for poison consumption */ - if (!entry) - con->gpu_reset_flags |= AMDGPU_RAS_GPU_RESET_MODE2_RESET; - amdgpu_ras_reset_gpu(adev); - } } kfree(err_data->err_addr); + err_data->err_addr = NULL; + + mutex_unlock(&con->page_retirement_lock); +} + +static int amdgpu_umc_do_page_retirement(struct amdgpu_device *adev, + void *ras_error_status, + struct amdgpu_iv_entry *entry, + uint32_t reset) +{ + struct ras_err_data *err_data = (struct ras_err_data *)ras_error_status; + struct amdgpu_ras *con = amdgpu_ras_get_context(adev); + + kgd2kfd_set_sram_ecc_flag(adev->kfd.dev); + amdgpu_umc_handle_bad_pages(adev, ras_error_status); + + if (err_data->ue_count && reset) { + con->gpu_reset_flags |= reset; + amdgpu_ras_reset_gpu(adev); + } + return AMDGPU_RAS_SUCCESS; } -int amdgpu_umc_poison_handler(struct amdgpu_device *adev, bool reset) +int amdgpu_umc_bad_page_polling_timeout(struct amdgpu_device *adev, + uint32_t reset, uint32_t timeout_ms) +{ + struct ras_err_data err_data; + struct ras_common_if head = { + .block = AMDGPU_RAS_BLOCK__UMC, + }; + struct ras_manager *obj = amdgpu_ras_find_obj(adev, &head); + uint32_t timeout = timeout_ms; + + memset(&err_data, 0, sizeof(err_data)); + amdgpu_ras_error_data_init(&err_data); + + do { + + amdgpu_umc_handle_bad_pages(adev, &err_data); + + if (timeout && !err_data.de_count) { + msleep(1); + timeout--; + } + + } while (timeout && !err_data.de_count); + + if (!timeout) + dev_warn(adev->dev, "Can't find bad pages\n"); + + if (err_data.de_count) + dev_info(adev->dev, "%ld new deferred hardware errors detected\n", err_data.de_count); + + if (obj) { + obj->err_data.ue_count += err_data.ue_count; + obj->err_data.ce_count += err_data.ce_count; + obj->err_data.de_count += err_data.de_count; + } + + amdgpu_ras_error_data_fini(&err_data); + + kgd2kfd_set_sram_ecc_flag(adev->kfd.dev); + + if (reset) { + struct amdgpu_ras *con = amdgpu_ras_get_context(adev); + + con->gpu_reset_flags |= reset; + amdgpu_ras_reset_gpu(adev); + } + + return 0; +} + +int amdgpu_umc_pasid_poison_handler(struct amdgpu_device *adev, + enum amdgpu_ras_block block, uint16_t pasid, + pasid_notify pasid_fn, void *data, uint32_t reset) { int ret = AMDGPU_RAS_SUCCESS; @@ -195,27 +272,39 @@ int amdgpu_umc_poison_handler(struct amdgpu_device *adev, bool reset) } if (!amdgpu_sriov_vf(adev)) { - struct ras_err_data err_data; - struct ras_common_if head = { - .block = AMDGPU_RAS_BLOCK__UMC, - }; - struct ras_manager *obj = amdgpu_ras_find_obj(adev, &head); + if (amdgpu_ip_version(adev, UMC_HWIP, 0) < IP_VERSION(12, 0, 0)) { + struct ras_err_data err_data; + struct ras_common_if head = { + .block = AMDGPU_RAS_BLOCK__UMC, + }; + struct ras_manager *obj = amdgpu_ras_find_obj(adev, &head); + + ret = amdgpu_ras_error_data_init(&err_data); + if (ret) + return ret; - ret = amdgpu_ras_error_data_init(&err_data); - if (ret) - return ret; + ret = amdgpu_umc_do_page_retirement(adev, &err_data, NULL, reset); - ret = amdgpu_umc_do_page_retirement(adev, &err_data, NULL, reset); + if (ret == AMDGPU_RAS_SUCCESS && obj) { + obj->err_data.ue_count += err_data.ue_count; + obj->err_data.ce_count += err_data.ce_count; + obj->err_data.de_count += err_data.de_count; + } - if (ret == AMDGPU_RAS_SUCCESS && obj) { - obj->err_data.ue_count += err_data.ue_count; - obj->err_data.ce_count += err_data.ce_count; - } + amdgpu_ras_error_data_fini(&err_data); + } else { + struct amdgpu_ras *con = amdgpu_ras_get_context(adev); - amdgpu_ras_error_data_fini(&err_data); + amdgpu_ras_put_poison_req(adev, + block, pasid, pasid_fn, data, reset); + + atomic_inc(&con->page_retirement_req_cnt); + + wake_up(&con->page_retirement_wq); + } } else { if (adev->virt.ops && adev->virt.ops->ras_poison_handler) - adev->virt.ops->ras_poison_handler(adev); + adev->virt.ops->ras_poison_handler(adev, block); else dev_warn(adev->dev, "No ras_poison_handler interface in SRIOV!\n"); @@ -224,11 +313,19 @@ int amdgpu_umc_poison_handler(struct amdgpu_device *adev, bool reset) return ret; } +int amdgpu_umc_poison_handler(struct amdgpu_device *adev, + enum amdgpu_ras_block block, uint32_t reset) +{ + return amdgpu_umc_pasid_poison_handler(adev, + block, 0, NULL, NULL, reset); +} + int amdgpu_umc_process_ras_data_cb(struct amdgpu_device *adev, void *ras_error_status, struct amdgpu_iv_entry *entry) { - return amdgpu_umc_do_page_retirement(adev, ras_error_status, entry, true); + return amdgpu_umc_do_page_retirement(adev, ras_error_status, entry, + AMDGPU_RAS_GPU_RESET_MODE1_RESET); } int amdgpu_umc_ras_sw_init(struct amdgpu_device *adev) @@ -305,14 +402,20 @@ int amdgpu_umc_process_ecc_irq(struct amdgpu_device *adev, return 0; } -void amdgpu_umc_fill_error_record(struct ras_err_data *err_data, +int amdgpu_umc_fill_error_record(struct ras_err_data *err_data, uint64_t err_addr, uint64_t retired_page, uint32_t channel_index, uint32_t umc_inst) { - struct eeprom_table_record *err_rec = - &err_data->err_addr[err_data->err_addr_cnt]; + struct eeprom_table_record *err_rec; + + if (!err_data || + !err_data->err_addr || + (err_data->err_addr_cnt >= err_data->err_addr_len)) + return -EINVAL; + + err_rec = &err_data->err_addr[err_data->err_addr_cnt]; err_rec->address = err_addr; /* page frame address is saved */ @@ -324,6 +427,8 @@ void amdgpu_umc_fill_error_record(struct ras_err_data *err_data, err_rec->mcumc_id = umc_inst; err_data->err_addr_cnt++; + + return 0; } int amdgpu_umc_loop_channels(struct amdgpu_device *adev, @@ -356,3 +461,76 @@ int amdgpu_umc_loop_channels(struct amdgpu_device *adev, return 0; } + +int amdgpu_umc_update_ecc_status(struct amdgpu_device *adev, + uint64_t status, uint64_t ipid, uint64_t addr) +{ + if (adev->umc.ras->update_ecc_status) + return adev->umc.ras->update_ecc_status(adev, + status, ipid, addr); + return 0; +} + +static int amdgpu_umc_uint64_cmp(const void *a, const void *b) +{ + uint64_t *addr_a = (uint64_t *)a; + uint64_t *addr_b = (uint64_t *)b; + + if (*addr_a > *addr_b) + return 1; + else if (*addr_a < *addr_b) + return -1; + else + return 0; +} + +/* Use string hash to avoid logging the same bad pages repeatedly */ +int amdgpu_umc_build_pages_hash(struct amdgpu_device *adev, + uint64_t *pfns, int len, uint64_t *val) +{ + struct amdgpu_ras *con = amdgpu_ras_get_context(adev); + char buf[MAX_UMC_HASH_STRING_SIZE] = {0}; + int offset = 0, i = 0; + uint64_t hash_val; + + if (!pfns || !len) + return -EINVAL; + + sort(pfns, len, sizeof(uint64_t), amdgpu_umc_uint64_cmp, NULL); + + for (i = 0; i < len; i++) + offset += snprintf(&buf[offset], sizeof(buf) - offset, "%llx", pfns[i]); + + hash_val = siphash(buf, offset, &con->umc_ecc_log.ecc_key); + + *val = hash_val; + + return 0; +} + +int amdgpu_umc_logs_ecc_err(struct amdgpu_device *adev, + struct radix_tree_root *ecc_tree, struct ras_ecc_err *ecc_err) +{ + struct amdgpu_ras *con = amdgpu_ras_get_context(adev); + struct ras_ecc_log_info *ecc_log; + int ret; + + ecc_log = &con->umc_ecc_log; + + mutex_lock(&ecc_log->lock); + ret = radix_tree_insert(ecc_tree, ecc_err->hash_index, ecc_err); + if (!ret) { + struct ras_err_pages *err_pages = &ecc_err->err_pages; + int i; + + /* Reserve memory */ + for (i = 0; i < err_pages->count; i++) + amdgpu_ras_reserve_page(adev, err_pages->pfn[i]); + + radix_tree_tag_set(ecc_tree, + ecc_err->hash_index, UMC_ECC_NEW_DETECTED_TAG); + } + mutex_unlock(&ecc_log->lock); + + return ret; +} diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_umc.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_umc.h index 417a6726c71b..5f50c69c3cec 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_umc.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_umc.h @@ -21,7 +21,7 @@ #ifndef __AMDGPU_UMC_H__ #define __AMDGPU_UMC_H__ #include "amdgpu_ras.h" - +#include "amdgpu_mca.h" /* * (addr / 256) * 4096, the higher 26 bits in ErrorAddr * is the index of 4KB block @@ -52,6 +52,8 @@ #define LOOP_UMC_EACH_NODE_INST_AND_CH(node_inst, umc_inst, ch_inst) \ LOOP_UMC_NODE_INST((node_inst)) LOOP_UMC_INST_AND_CH((umc_inst), (ch_inst)) +/* Page retirement tag */ +#define UMC_ECC_NEW_DETECTED_TAG 0x1 typedef int (*umc_func)(struct amdgpu_device *adev, uint32_t node_inst, uint32_t umc_inst, uint32_t ch_inst, void *data); @@ -64,8 +66,10 @@ struct amdgpu_umc_ras { void *ras_error_status); void (*ecc_info_query_ras_error_address)(struct amdgpu_device *adev, void *ras_error_status); - /* support different eeprom table version for different asic */ - void (*set_eeprom_table_version)(struct amdgpu_ras_eeprom_table_header *hdr); + bool (*check_ecc_err_status)(struct amdgpu_device *adev, + enum amdgpu_mca_error_type type, void *ras_error_status); + int (*update_ecc_status)(struct amdgpu_device *adev, + uint64_t status, uint64_t ipid, uint64_t addr); }; struct amdgpu_umc_funcs { @@ -100,11 +104,15 @@ struct amdgpu_umc { int amdgpu_umc_ras_sw_init(struct amdgpu_device *adev); int amdgpu_umc_ras_late_init(struct amdgpu_device *adev, struct ras_common_if *ras_block); -int amdgpu_umc_poison_handler(struct amdgpu_device *adev, bool reset); +int amdgpu_umc_poison_handler(struct amdgpu_device *adev, + enum amdgpu_ras_block block, uint32_t reset); +int amdgpu_umc_pasid_poison_handler(struct amdgpu_device *adev, + enum amdgpu_ras_block block, uint16_t pasid, + pasid_notify pasid_fn, void *data, uint32_t reset); int amdgpu_umc_process_ecc_irq(struct amdgpu_device *adev, struct amdgpu_irq_src *source, struct amdgpu_iv_entry *entry); -void amdgpu_umc_fill_error_record(struct ras_err_data *err_data, +int amdgpu_umc_fill_error_record(struct ras_err_data *err_data, uint64_t err_addr, uint64_t retired_page, uint32_t channel_index, @@ -118,4 +126,17 @@ int amdgpu_umc_page_retirement_mca(struct amdgpu_device *adev, int amdgpu_umc_loop_channels(struct amdgpu_device *adev, umc_func func, void *data); + +int amdgpu_umc_bad_page_polling_timeout(struct amdgpu_device *adev, + uint32_t reset, uint32_t timeout_ms); + +int amdgpu_umc_update_ecc_status(struct amdgpu_device *adev, + uint64_t status, uint64_t ipid, uint64_t addr); +int amdgpu_umc_build_pages_hash(struct amdgpu_device *adev, + uint64_t *pfns, int len, uint64_t *val); +int amdgpu_umc_logs_ecc_err(struct amdgpu_device *adev, + struct radix_tree_root *ecc_tree, struct ras_ecc_err *ecc_err); + +void amdgpu_umc_handle_bad_pages(struct amdgpu_device *adev, + void *ras_error_status); #endif diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_umr.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_umr.h index 107f9bb0e24f..5b27fc41ffbf 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_umr.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_umr.h @@ -69,12 +69,12 @@ struct amdgpu_debugfs_gprwave_data { }; enum AMDGPU_DEBUGFS_REGS2_CMDS { - AMDGPU_DEBUGFS_REGS2_CMD_SET_STATE=0, + AMDGPU_DEBUGFS_REGS2_CMD_SET_STATE = 0, AMDGPU_DEBUGFS_REGS2_CMD_SET_STATE_V2, }; enum AMDGPU_DEBUGFS_GPRWAVE_CMDS { - AMDGPU_DEBUGFS_GPRWAVE_CMD_SET_STATE=0, + AMDGPU_DEBUGFS_GPRWAVE_CMD_SET_STATE = 0, }; //reg2 interface diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_umsch_mm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_umsch_mm.c index bfbf59326ee1..e01c1c8e64c4 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_umsch_mm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_umsch_mm.c @@ -189,10 +189,13 @@ static void setup_vpe_queue(struct amdgpu_device *adev, mqd->rptr_val = 0; mqd->unmapped = 1; + if (adev->vpe.collaborate_mode) + memcpy(++mqd, test->mqd_data_cpu_addr, sizeof(struct MQD_INFO)); + qinfo->mqd_addr = test->mqd_data_gpu_addr; qinfo->csa_addr = test->ctx_data_gpu_addr + offsetof(struct umsch_mm_test_ctx_data, vpe_ctx_csa); - qinfo->doorbell_offset_0 = (adev->doorbell_index.vpe_ring + 1) << 1; + qinfo->doorbell_offset_0 = 0; qinfo->doorbell_offset_1 = 0; } @@ -287,7 +290,10 @@ static int submit_vpe_queue(struct amdgpu_device *adev, struct umsch_mm_test *te ring[5] = 0; mqd->wptr_val = (6 << 2); - // WDOORBELL32(adev->umsch_mm.agdb_index[CONTEXT_PRIORITY_LEVEL_NORMAL], mqd->wptr_val); + if (adev->vpe.collaborate_mode) + (++mqd)->wptr_val = (6 << 2); + + WDOORBELL32(adev->umsch_mm.agdb_index[CONTEXT_PRIORITY_LEVEL_NORMAL], mqd->wptr_val); for (i = 0; i < adev->usec_timeout; i++) { if (*fence == test_pattern) @@ -358,7 +364,7 @@ static int setup_umsch_mm_test(struct amdgpu_device *adev, memset(test->ring_data_cpu_addr, 0, sizeof(struct umsch_mm_test_ring_data)); - test->ring_data_gpu_addr = AMDGPU_VA_RESERVED_SIZE; + test->ring_data_gpu_addr = AMDGPU_VA_RESERVED_BOTTOM; r = map_ring_data(adev, test->vm, test->ring_data_obj, &test->bo_va, test->ring_data_gpu_addr, sizeof(struct umsch_mm_test_ring_data)); if (r) @@ -571,6 +577,7 @@ int amdgpu_umsch_mm_init_microcode(struct amdgpu_umsch_mm *umsch) switch (amdgpu_ip_version(adev, VCN_HWIP, 0)) { case IP_VERSION(4, 0, 5): + case IP_VERSION(4, 0, 6): fw_name = "amdgpu/umsch_mm_4_0_0.bin"; break; default: @@ -750,6 +757,7 @@ static int umsch_mm_early_init(void *handle) switch (amdgpu_ip_version(adev, VCN_HWIP, 0)) { case IP_VERSION(4, 0, 5): + case IP_VERSION(4, 0, 6): umsch_mm_v4_0_set_funcs(&adev->umsch_mm); break; default: @@ -766,6 +774,9 @@ static int umsch_mm_late_init(void *handle) { struct amdgpu_device *adev = (struct amdgpu_device *)handle; + if (amdgpu_in_reset(adev) || adev->in_s0ix || adev->in_suspend) + return 0; + return umsch_mm_test(adev); } @@ -867,6 +878,8 @@ static const struct amd_ip_funcs umsch_mm_v4_0_ip_funcs = { .hw_fini = umsch_mm_hw_fini, .suspend = umsch_mm_suspend, .resume = umsch_mm_resume, + .dump_ip_state = NULL, + .print_ip_state = NULL, }; const struct amdgpu_ip_block_version umsch_mm_v4_0_ip_block = { diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_umsch_mm.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_umsch_mm.h index 8258a43a6236..5014b5af95fd 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_umsch_mm.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_umsch_mm.h @@ -33,13 +33,6 @@ enum UMSCH_SWIP_ENGINE_TYPE { UMSCH_SWIP_ENGINE_TYPE_MAX }; -enum UMSCH_SWIP_AFFINITY_TYPE { - UMSCH_SWIP_AFFINITY_TYPE_ANY = 0, - UMSCH_SWIP_AFFINITY_TYPE_VCN0 = 1, - UMSCH_SWIP_AFFINITY_TYPE_VCN1 = 2, - UMSCH_SWIP_AFFINITY_TYPE_MAX -}; - enum UMSCH_CONTEXT_PRIORITY_LEVEL { CONTEXT_PRIORITY_LEVEL_IDLE = 0, CONTEXT_PRIORITY_LEVEL_NORMAL = 1, @@ -51,13 +44,15 @@ enum UMSCH_CONTEXT_PRIORITY_LEVEL { struct umsch_mm_set_resource_input { uint32_t vmid_mask_mm_vcn; uint32_t vmid_mask_mm_vpe; + uint32_t collaboration_mask_vpe; uint32_t logging_vmid; uint32_t engine_mask; union { struct { uint32_t disable_reset : 1; uint32_t disable_umsch_mm_log : 1; - uint32_t reserved : 30; + uint32_t use_rs64mem_for_proc_ctx_csa : 1; + uint32_t reserved : 29; }; uint32_t uint32_all; }; @@ -78,15 +73,18 @@ struct umsch_mm_add_queue_input { uint32_t doorbell_offset_1; enum UMSCH_SWIP_ENGINE_TYPE engine_type; uint32_t affinity; - enum UMSCH_SWIP_AFFINITY_TYPE affinity_type; uint64_t mqd_addr; uint64_t h_context; uint64_t h_queue; uint32_t vm_context_cntl; + uint32_t process_csa_array_index; + uint32_t context_csa_array_index; + struct { uint32_t is_context_suspended : 1; - uint32_t reserved : 31; + uint32_t collaboration_mode : 1; + uint32_t reserved : 30; }; }; @@ -94,6 +92,7 @@ struct umsch_mm_remove_queue_input { uint32_t doorbell_offset_0; uint32_t doorbell_offset_1; uint64_t context_csa_addr; + uint32_t context_csa_array_index; }; struct MQD_INFO { @@ -103,6 +102,7 @@ struct MQD_INFO { uint32_t wptr_val; uint32_t rptr_val; uint32_t unmapped; + uint32_t vmid; }; struct amdgpu_umsch_mm; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vce.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vce.c index 59acf424a078..968ca2c84ef7 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vce.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vce.c @@ -743,7 +743,8 @@ int amdgpu_vce_ring_parse_cs(struct amdgpu_cs_parser *p, uint32_t created = 0; uint32_t allocated = 0; uint32_t tmp, handle = 0; - uint32_t *size = &tmp; + uint32_t dummy = 0xffffffff; + uint32_t *size = &dummy; unsigned int idx; int i, r = 0; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.c index f4963330c772..677eb141554e 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.c @@ -59,6 +59,9 @@ #define FIRMWARE_VCN4_0_3 "amdgpu/vcn_4_0_3.bin" #define FIRMWARE_VCN4_0_4 "amdgpu/vcn_4_0_4.bin" #define FIRMWARE_VCN4_0_5 "amdgpu/vcn_4_0_5.bin" +#define FIRMWARE_VCN4_0_6 "amdgpu/vcn_4_0_6.bin" +#define FIRMWARE_VCN4_0_6_1 "amdgpu/vcn_4_0_6_1.bin" +#define FIRMWARE_VCN5_0_0 "amdgpu/vcn_5_0_0.bin" MODULE_FIRMWARE(FIRMWARE_RAVEN); MODULE_FIRMWARE(FIRMWARE_PICASSO); @@ -82,21 +85,32 @@ MODULE_FIRMWARE(FIRMWARE_VCN4_0_2); MODULE_FIRMWARE(FIRMWARE_VCN4_0_3); MODULE_FIRMWARE(FIRMWARE_VCN4_0_4); MODULE_FIRMWARE(FIRMWARE_VCN4_0_5); +MODULE_FIRMWARE(FIRMWARE_VCN4_0_6); +MODULE_FIRMWARE(FIRMWARE_VCN4_0_6_1); +MODULE_FIRMWARE(FIRMWARE_VCN5_0_0); static void amdgpu_vcn_idle_work_handler(struct work_struct *work); int amdgpu_vcn_early_init(struct amdgpu_device *adev) { - char ucode_prefix[30]; + char ucode_prefix[25]; char fw_name[40]; - int r; + int r, i; - amdgpu_ucode_ip_version_decode(adev, UVD_HWIP, ucode_prefix, sizeof(ucode_prefix)); - snprintf(fw_name, sizeof(fw_name), "amdgpu/%s.bin", ucode_prefix); - r = amdgpu_ucode_request(adev, &adev->vcn.fw, fw_name); - if (r) - amdgpu_ucode_release(&adev->vcn.fw); + for (i = 0; i < adev->vcn.num_vcn_inst; i++) { + amdgpu_ucode_ip_version_decode(adev, UVD_HWIP, ucode_prefix, sizeof(ucode_prefix)); + snprintf(fw_name, sizeof(fw_name), "amdgpu/%s.bin", ucode_prefix); + if (amdgpu_ip_version(adev, UVD_HWIP, 0) == IP_VERSION(4, 0, 6) && + i == 1) { + snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_%d.bin", ucode_prefix, i); + } + r = amdgpu_ucode_request(adev, &adev->vcn.fw[i], fw_name); + if (r) { + amdgpu_ucode_release(&adev->vcn.fw[i]); + return r; + } + } return r; } @@ -137,7 +151,7 @@ int amdgpu_vcn_sw_init(struct amdgpu_device *adev) } } - hdr = (const struct common_firmware_header *)adev->vcn.fw->data; + hdr = (const struct common_firmware_header *)adev->vcn.fw[0]->data; adev->vcn.fw_version = le32_to_cpu(hdr->ucode_version); /* Bit 20-23, it is encode major and non-zero for new naming convention. @@ -171,7 +185,10 @@ int amdgpu_vcn_sw_init(struct amdgpu_device *adev) if (adev->firmware.load_type != AMDGPU_FW_LOAD_PSP) bo_size += AMDGPU_GPU_PAGE_ALIGN(le32_to_cpu(hdr->ucode_size_bytes) + 8); - if (amdgpu_ip_version(adev, UVD_HWIP, 0) >= IP_VERSION(4, 0, 0)) { + if (amdgpu_ip_version(adev, UVD_HWIP, 0) >= IP_VERSION(5, 0, 0)) { + fw_shared_size = AMDGPU_GPU_PAGE_ALIGN(sizeof(struct amdgpu_vcn5_fw_shared)); + log_offset = offsetof(struct amdgpu_vcn5_fw_shared, fw_log); + } else if (amdgpu_ip_version(adev, UVD_HWIP, 0) >= IP_VERSION(4, 0, 0)) { fw_shared_size = AMDGPU_GPU_PAGE_ALIGN(sizeof(struct amdgpu_vcn4_fw_shared)); log_offset = offsetof(struct amdgpu_vcn4_fw_shared, fw_log); } else { @@ -252,9 +269,10 @@ int amdgpu_vcn_sw_fini(struct amdgpu_device *adev) for (i = 0; i < adev->vcn.num_enc_rings; ++i) amdgpu_ring_fini(&adev->vcn.inst[j].ring_enc[i]); + + amdgpu_ucode_release(&adev->vcn.fw[j]); } - amdgpu_ucode_release(&adev->vcn.fw); mutex_destroy(&adev->vcn.vcn1_jpeg1_workaround); mutex_destroy(&adev->vcn.vcn_pg_lock); @@ -350,11 +368,12 @@ int amdgpu_vcn_resume(struct amdgpu_device *adev) const struct common_firmware_header *hdr; unsigned int offset; - hdr = (const struct common_firmware_header *)adev->vcn.fw->data; + hdr = (const struct common_firmware_header *)adev->vcn.fw[i]->data; if (adev->firmware.load_type != AMDGPU_FW_LOAD_PSP) { offset = le32_to_cpu(hdr->ucode_array_offset_bytes); if (drm_dev_enter(adev_to_drm(adev), &idx)) { - memcpy_toio(adev->vcn.inst[i].cpu_addr, adev->vcn.fw->data + offset, + memcpy_toio(adev->vcn.inst[i].cpu_addr, + adev->vcn.fw[i]->data + offset, le32_to_cpu(hdr->ucode_size_bytes)); drm_dev_exit(idx); } @@ -1039,11 +1058,11 @@ void amdgpu_vcn_setup_ucode(struct amdgpu_device *adev) if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) { const struct common_firmware_header *hdr; - hdr = (const struct common_firmware_header *)adev->vcn.fw->data; - for (i = 0; i < adev->vcn.num_vcn_inst; i++) { if (adev->vcn.harvest_config & (1 << i)) continue; + + hdr = (const struct common_firmware_header *)adev->vcn.fw[i]->data; /* currently only support 2 FW instances */ if (i >= 2) { dev_info(adev->dev, "More then 2 VCN FW instances!\n"); @@ -1051,7 +1070,7 @@ void amdgpu_vcn_setup_ucode(struct amdgpu_device *adev) } idx = AMDGPU_UCODE_ID_VCN + i; adev->firmware.ucode[idx].ucode_id = idx; - adev->firmware.ucode[idx].fw = adev->vcn.fw; + adev->firmware.ucode[idx].fw = adev->vcn.fw[i]; adev->firmware.fw_size += ALIGN(le32_to_cpu(hdr->ucode_size_bytes), PAGE_SIZE); @@ -1189,7 +1208,7 @@ int amdgpu_vcn_process_poison_irq(struct amdgpu_device *adev, amdgpu_ras_interrupt_dispatch(adev, &ih_data); } else { if (adev->virt.ops && adev->virt.ops->ras_poison_handler) - adev->virt.ops->ras_poison_handler(adev); + adev->virt.ops->ras_poison_handler(adev, ras_if->block); else dev_warn(adev->dev, "No ras_poison_handler interface in SRIOV for VCN!\n"); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.h index 514c98ea144f..9f06def236fd 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.h @@ -160,6 +160,48 @@ } \ } while (0) +#define SOC24_DPG_MODE_OFFSET(ip, inst_idx, reg) \ + ({ \ + uint32_t internal_reg_offset, addr; \ + bool video_range, aon_range; \ + \ + addr = (adev->reg_offset[ip##_HWIP][inst_idx][reg##_BASE_IDX] + reg); \ + addr <<= 2; \ + video_range = ((((0xFFFFF & addr) >= (VCN_VID_SOC_ADDRESS)) && \ + ((0xFFFFF & addr) < ((VCN_VID_SOC_ADDRESS + 0x2600))))); \ + aon_range = ((((0xFFFFF & addr) >= (VCN_AON_SOC_ADDRESS)) && \ + ((0xFFFFF & addr) < ((VCN_AON_SOC_ADDRESS + 0x600))))); \ + if (video_range) \ + internal_reg_offset = ((0xFFFFF & addr) - (VCN_VID_SOC_ADDRESS) + \ + (VCN_VID_IP_ADDRESS)); \ + else if (aon_range) \ + internal_reg_offset = ((0xFFFFF & addr) - (VCN_AON_SOC_ADDRESS) + \ + (VCN_AON_IP_ADDRESS)); \ + else \ + internal_reg_offset = (0xFFFFF & addr); \ + \ + internal_reg_offset >>= 2; \ + }) + +#define WREG32_SOC24_DPG_MODE(inst_idx, offset, value, mask_en, indirect) \ + do { \ + if (!indirect) { \ + WREG32_SOC15(VCN, GET_INST(VCN, inst_idx), \ + regUVD_DPG_LMA_DATA, value); \ + WREG32_SOC15( \ + VCN, GET_INST(VCN, inst_idx), \ + regUVD_DPG_LMA_CTL, \ + (0x1 << UVD_DPG_LMA_CTL__READ_WRITE__SHIFT | \ + mask_en << UVD_DPG_LMA_CTL__MASK_EN__SHIFT | \ + offset << UVD_DPG_LMA_CTL__READ_WRITE_ADDR__SHIFT)); \ + } else { \ + *adev->vcn.inst[inst_idx].dpg_sram_curr_addr++ = \ + offset; \ + *adev->vcn.inst[inst_idx].dpg_sram_curr_addr++ = \ + value; \ + } \ + } while (0) + #define AMDGPU_FW_SHARED_FLAG_0_UNIFIED_QUEUE (1 << 2) #define AMDGPU_FW_SHARED_FLAG_0_DRM_KEY_INJECT (1 << 4) #define AMDGPU_VCN_FW_SHARED_FLAG_0_RB (1 << 6) @@ -264,7 +306,7 @@ struct amdgpu_vcn_ras { struct amdgpu_vcn { unsigned fw_version; struct delayed_work idle_work; - const struct firmware *fw; /* VCN firmware */ + const struct firmware *fw[AMDGPU_MAX_VCN_INSTANCES]; /* VCN firmware */ unsigned num_enc_rings; enum amd_powergating_state cur_state; bool indirect_sram; @@ -412,6 +454,16 @@ struct amdgpu_vcn_rb_metadata { uint8_t pad[26]; }; +struct amdgpu_vcn5_fw_shared { + uint32_t present_flag_0; + uint8_t pad[12]; + struct amdgpu_fw_shared_unified_queue_struct sq; + uint8_t pad1[8]; + struct amdgpu_fw_shared_fw_logging fw_log; + struct amdgpu_fw_shared_rb_setup rb_setup; + uint8_t pad2[4]; +}; + #define VCN_BLOCK_ENCODE_DISABLE_MASK 0x80 #define VCN_BLOCK_DECODE_DISABLE_MASK 0x40 #define VCN_BLOCK_QUEUE_DISABLE_MASK 0xC0 diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c index 0dcff2889e25..54ab51a4ada7 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c @@ -32,6 +32,7 @@ #include "amdgpu.h" #include "amdgpu_ras.h" +#include "amdgpu_reset.h" #include "vi.h" #include "soc15.h" #include "nv.h" @@ -71,59 +72,6 @@ void amdgpu_virt_init_setting(struct amdgpu_device *adev) amdgpu_num_kcq = 2; } -void amdgpu_virt_kiq_reg_write_reg_wait(struct amdgpu_device *adev, - uint32_t reg0, uint32_t reg1, - uint32_t ref, uint32_t mask, - uint32_t xcc_inst) -{ - struct amdgpu_kiq *kiq = &adev->gfx.kiq[xcc_inst]; - struct amdgpu_ring *ring = &kiq->ring; - signed long r, cnt = 0; - unsigned long flags; - uint32_t seq; - - if (adev->mes.ring.sched.ready) { - amdgpu_mes_reg_write_reg_wait(adev, reg0, reg1, - ref, mask); - return; - } - - spin_lock_irqsave(&kiq->ring_lock, flags); - amdgpu_ring_alloc(ring, 32); - amdgpu_ring_emit_reg_write_reg_wait(ring, reg0, reg1, - ref, mask); - r = amdgpu_fence_emit_polling(ring, &seq, MAX_KIQ_REG_WAIT); - if (r) - goto failed_undo; - - amdgpu_ring_commit(ring); - spin_unlock_irqrestore(&kiq->ring_lock, flags); - - r = amdgpu_fence_wait_polling(ring, seq, MAX_KIQ_REG_WAIT); - - /* don't wait anymore for IRQ context */ - if (r < 1 && in_interrupt()) - goto failed_kiq; - - might_sleep(); - while (r < 1 && cnt++ < MAX_KIQ_REG_TRY) { - - msleep(MAX_KIQ_REG_BAILOUT_INTERVAL); - r = amdgpu_fence_wait_polling(ring, seq, MAX_KIQ_REG_WAIT); - } - - if (cnt > MAX_KIQ_REG_TRY) - goto failed_kiq; - - return; - -failed_undo: - amdgpu_ring_undo(ring); - spin_unlock_irqrestore(&kiq->ring_lock, flags); -failed_kiq: - dev_err(adev->dev, "failed to write reg %x wait reg %x\n", reg0, reg1); -} - /** * amdgpu_virt_request_full_gpu() - request full gpu access * @adev: amdgpu device. @@ -303,11 +251,11 @@ static int amdgpu_virt_init_ras_err_handler_data(struct amdgpu_device *adev) if (!*data) goto data_failure; - bps = kmalloc_array(align_space, sizeof((*data)->bps), GFP_KERNEL); + bps = kmalloc_array(align_space, sizeof(*(*data)->bps), GFP_KERNEL); if (!bps) goto bps_failure; - bps_bo = kmalloc_array(align_space, sizeof((*data)->bps_bo), GFP_KERNEL); + bps_bo = kmalloc_array(align_space, sizeof(*(*data)->bps_bo), GFP_KERNEL); if (!bps_bo) goto bps_bo_failure; @@ -340,8 +288,10 @@ static void amdgpu_virt_ras_release_bp(struct amdgpu_device *adev) for (i = data->last_reserved - 1; i >= 0; i--) { bo = data->bps_bo[i]; - amdgpu_bo_free_kernel(&bo, NULL, NULL); - data->bps_bo[i] = bo; + if (bo) { + amdgpu_bo_free_kernel(&bo, NULL, NULL); + data->bps_bo[i] = bo; + } data->last_reserved = i; } } @@ -381,6 +331,8 @@ static void amdgpu_virt_ras_reserve_bps(struct amdgpu_device *adev) { struct amdgpu_virt *virt = &adev->virt; struct amdgpu_virt_ras_err_handler_data *data = virt->virt_eh_data; + struct amdgpu_vram_mgr *mgr = &adev->mman.vram_mgr; + struct ttm_resource_manager *man = &mgr->manager; struct amdgpu_bo *bo = NULL; uint64_t bp; int i; @@ -396,12 +348,18 @@ static void amdgpu_virt_ras_reserve_bps(struct amdgpu_device *adev) * 2) a ras bad page has been reserved (duplicate error injection * for one page); */ - if (amdgpu_bo_create_kernel_at(adev, bp << AMDGPU_GPU_PAGE_SHIFT, - AMDGPU_GPU_PAGE_SIZE, - &bo, NULL)) - DRM_DEBUG("RAS WARN: reserve vram for retired page %llx fail\n", bp); - - data->bps_bo[i] = bo; + if (ttm_resource_manager_used(man)) { + amdgpu_vram_mgr_reserve_range(&adev->mman.vram_mgr, + bp << AMDGPU_GPU_PAGE_SHIFT, + AMDGPU_GPU_PAGE_SIZE); + data->bps_bo[i] = NULL; + } else { + if (amdgpu_bo_create_kernel_at(adev, bp << AMDGPU_GPU_PAGE_SHIFT, + AMDGPU_GPU_PAGE_SIZE, + &bo, NULL)) + DRM_DEBUG("RAS WARN: reserve vram for retired page %llx fail\n", bp); + data->bps_bo[i] = bo; + } data->last_reserved = i + 1; bo = NULL; } @@ -467,7 +425,7 @@ static int amdgpu_virt_read_pf2vf_data(struct amdgpu_device *adev) return -EINVAL; if (pf2vf_info->size > 1024) { - DRM_ERROR("invalid pf2vf message size\n"); + dev_err(adev->dev, "invalid pf2vf message size: 0x%x\n", pf2vf_info->size); return -EINVAL; } @@ -478,7 +436,9 @@ static int amdgpu_virt_read_pf2vf_data(struct amdgpu_device *adev) adev->virt.fw_reserve.p_pf2vf, pf2vf_info->size, adev->virt.fw_reserve.checksum_key, checksum); if (checksum != checkval) { - DRM_ERROR("invalid pf2vf message\n"); + dev_err(adev->dev, + "invalid pf2vf message: header checksum=0x%x calculated checksum=0x%x\n", + checksum, checkval); return -EINVAL; } @@ -492,7 +452,9 @@ static int amdgpu_virt_read_pf2vf_data(struct amdgpu_device *adev) adev->virt.fw_reserve.p_pf2vf, pf2vf_info->size, 0, checksum); if (checksum != checkval) { - DRM_ERROR("invalid pf2vf message\n"); + dev_err(adev->dev, + "invalid pf2vf message: header checksum=0x%x calculated checksum=0x%x\n", + checksum, checkval); return -EINVAL; } @@ -528,7 +490,7 @@ static int amdgpu_virt_read_pf2vf_data(struct amdgpu_device *adev) ((struct amd_sriov_msg_pf2vf_info *)pf2vf_info)->uuid; break; default: - DRM_ERROR("invalid pf2vf version\n"); + dev_err(adev->dev, "invalid pf2vf version: 0x%x\n", pf2vf_info->version); return -EINVAL; } @@ -614,6 +576,11 @@ static int amdgpu_virt_write_vf2pf_data(struct amdgpu_device *adev) vf2pf_info->decode_usage = 0; vf2pf_info->dummy_page_addr = (uint64_t)adev->dummy_page_addr; + vf2pf_info->mes_info_addr = (uint64_t)adev->mes.resource_1_gpu_addr; + + if (adev->mes.resource_1) { + vf2pf_info->mes_info_size = adev->mes.resource_1->tbo.base.size; + } vf2pf_info->checksum = amd_sriov_msg_checksum( vf2pf_info, vf2pf_info->header.size, 0, 0); @@ -627,8 +594,22 @@ static void amdgpu_virt_update_vf2pf_work_item(struct work_struct *work) int ret; ret = amdgpu_virt_read_pf2vf_data(adev); - if (ret) + if (ret) { + adev->virt.vf2pf_update_retry_cnt++; + if ((adev->virt.vf2pf_update_retry_cnt >= AMDGPU_VF2PF_UPDATE_MAX_RETRY_LIMIT) && + amdgpu_sriov_runtime(adev) && !amdgpu_in_reset(adev)) { + amdgpu_ras_set_fed(adev, true); + if (amdgpu_reset_domain_schedule(adev->reset_domain, + &adev->virt.flr_work)) + return; + else + dev_err(adev->dev, "Failed to queue work! at %s", __func__); + } + goto out; + } + + adev->virt.vf2pf_update_retry_cnt = 0; amdgpu_virt_write_vf2pf_data(adev); out: @@ -649,6 +630,7 @@ void amdgpu_virt_init_data_exchange(struct amdgpu_device *adev) adev->virt.fw_reserve.p_pf2vf = NULL; adev->virt.fw_reserve.p_vf2pf = NULL; adev->virt.vf2pf_update_interval_ms = 0; + adev->virt.vf2pf_update_retry_cnt = 0; if (adev->mman.fw_vram_usage_va && adev->mman.drv_vram_usage_va) { DRM_WARN("Currently fw_vram and drv_vram should not have values at the same time!"); @@ -748,12 +730,6 @@ void amdgpu_detect_virtualization(struct amdgpu_device *adev) adev->virt.caps |= AMDGPU_PASSTHROUGH_MODE; } - if (amdgpu_sriov_vf(adev) && adev->asic_type == CHIP_SIENNA_CICHLID) - /* VF MMIO access (except mailbox range) from CPU - * will be blocked during sriov runtime - */ - adev->virt.caps |= AMDGPU_VF_MMIO_ACCESS_PROTECT; - /* we have the ability to check now */ if (amdgpu_sriov_vf(adev)) { switch (adev->asic_type) { @@ -1022,7 +998,7 @@ u32 amdgpu_virt_rlcg_reg_rw(struct amdgpu_device *adev, u32 offset, u32 v, u32 f * SCRATCH_REG0 = read/write value * SCRATCH_REG1[30:28] = command * SCRATCH_REG1[19:0] = address in dword - * SCRATCH_REG1[26:24] = Error reporting + * SCRATCH_REG1[27:24] = Error reporting */ writel(v, scratch_reg0); writel((offset | flag), scratch_reg1); @@ -1036,7 +1012,8 @@ u32 amdgpu_virt_rlcg_reg_rw(struct amdgpu_device *adev, u32 offset, u32 v, u32 f udelay(10); } - if (i >= timeout) { + tmp = readl(scratch_reg1); + if (i >= timeout || (tmp & AMDGPU_RLCG_SCRATCH1_ERROR_MASK) != 0) { if (amdgpu_sriov_rlcg_error_report_enabled(adev)) { if (tmp & AMDGPU_RLCG_VFGATE_DISABLED) { dev_err(adev->dev, diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.h index d4207e44141f..642f1fd287d8 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.h @@ -45,12 +45,15 @@ #define AMDGPU_RLCG_REG_NOT_IN_RANGE 0x1000000 #define AMDGPU_RLCG_SCRATCH1_ADDRESS_MASK 0xFFFFF +#define AMDGPU_RLCG_SCRATCH1_ERROR_MASK 0xF000000 /* all asic after AI use this offset */ #define mmRCC_IOV_FUNC_IDENTIFIER 0xDE5 /* tonga/fiji use this offset */ #define mmBIF_IOV_FUNC_IDENTIFIER 0x1503 +#define AMDGPU_VF2PF_UPDATE_MAX_RETRY_LIMIT 5 + enum amdgpu_sriov_vf_mode { SRIOV_VF_MODE_BARE_METAL = 0, SRIOV_VF_MODE_ONE_VF, @@ -88,7 +91,8 @@ struct amdgpu_virt_ops { int (*wait_reset)(struct amdgpu_device *adev); void (*trans_msg)(struct amdgpu_device *adev, enum idh_request req, u32 data1, u32 data2, u32 data3); - void (*ras_poison_handler)(struct amdgpu_device *adev); + void (*ras_poison_handler)(struct amdgpu_device *adev, + enum amdgpu_ras_block block); }; /* @@ -128,6 +132,8 @@ enum AMDGIM_FEATURE_FLAG { AMDGIM_FEATURE_AV1_SUPPORT = (1 << 6), /* VCN RB decouple */ AMDGIM_FEATURE_VCN_RB_DECOUPLE = (1 << 7), + /* MES info */ + AMDGIM_FEATURE_MES_INFO_ENABLE = (1 << 8), }; enum AMDGIM_REG_ACCESS_FLAG { @@ -255,6 +261,7 @@ struct amdgpu_virt { /* vf2pf message */ struct delayed_work vf2pf_work; uint32_t vf2pf_update_interval_ms; + int vf2pf_update_retry_cnt; /* multimedia bandwidth config */ bool is_mm_bw_enabled; @@ -330,12 +337,10 @@ static inline bool is_virtual_machine(void) ((adev)->virt.gim_feature & AMDGIM_FEATURE_AV1_SUPPORT) #define amdgpu_sriov_is_vcn_rb_decouple(adev) \ ((adev)->virt.gim_feature & AMDGIM_FEATURE_VCN_RB_DECOUPLE) +#define amdgpu_sriov_is_mes_info_enable(adev) \ + ((adev)->virt.gim_feature & AMDGIM_FEATURE_MES_INFO_ENABLE) bool amdgpu_virt_mmio_blocked(struct amdgpu_device *adev); void amdgpu_virt_init_setting(struct amdgpu_device *adev); -void amdgpu_virt_kiq_reg_write_reg_wait(struct amdgpu_device *adev, - uint32_t reg0, uint32_t rreg1, - uint32_t ref, uint32_t mask, - uint32_t xcc_inst); int amdgpu_virt_request_full_gpu(struct amdgpu_device *adev, bool init); int amdgpu_virt_release_full_gpu(struct amdgpu_device *adev, bool init); int amdgpu_virt_reset_gpu(struct amdgpu_device *adev); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vkms.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vkms.c index 453a4b786cfc..e30eecd02ae1 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vkms.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vkms.c @@ -658,10 +658,11 @@ static const struct amd_ip_funcs amdgpu_vkms_ip_funcs = { .soft_reset = amdgpu_vkms_soft_reset, .set_clockgating_state = amdgpu_vkms_set_clockgating_state, .set_powergating_state = amdgpu_vkms_set_powergating_state, + .dump_ip_state = NULL, + .print_ip_state = NULL, }; -const struct amdgpu_ip_block_version amdgpu_vkms_ip_block = -{ +const struct amdgpu_ip_block_version amdgpu_vkms_ip_block = { .type = AMD_IP_BLOCK_TYPE_DCE, .major = 1, .minor = 0, diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c index b8fcb6c55698..4e2391c83d7c 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c @@ -234,6 +234,22 @@ static void amdgpu_vm_bo_invalidated(struct amdgpu_vm_bo_base *vm_bo) } /** + * amdgpu_vm_bo_evicted_user - vm_bo is evicted + * + * @vm_bo: vm_bo which is evicted + * + * State for BOs used by user mode queues which are not at the location they + * should be. + */ +static void amdgpu_vm_bo_evicted_user(struct amdgpu_vm_bo_base *vm_bo) +{ + vm_bo->moved = true; + spin_lock(&vm_bo->vm->status_lock); + list_move(&vm_bo->vm_status, &vm_bo->vm->evicted_user); + spin_unlock(&vm_bo->vm->status_lock); +} + +/** * amdgpu_vm_bo_relocated - vm_bo is reloacted * * @vm_bo: vm_bo which is relocated @@ -427,21 +443,25 @@ uint64_t amdgpu_vm_generation(struct amdgpu_device *adev, struct amdgpu_vm *vm) } /** - * amdgpu_vm_validate_pt_bos - validate the page table BOs + * amdgpu_vm_validate - validate evicted BOs tracked in the VM * * @adev: amdgpu device pointer * @vm: vm providing the BOs + * @ticket: optional reservation ticket used to reserve the VM * @validate: callback to do the validation * @param: parameter for the validation callback * - * Validate the page table BOs on command submission if neccessary. + * Validate the page table BOs and per-VM BOs on command submission if + * necessary. If a ticket is given, also try to validate evicted user queue + * BOs. They must already be reserved with the given ticket. * * Returns: * Validation result. */ -int amdgpu_vm_validate_pt_bos(struct amdgpu_device *adev, struct amdgpu_vm *vm, - int (*validate)(void *p, struct amdgpu_bo *bo), - void *param) +int amdgpu_vm_validate(struct amdgpu_device *adev, struct amdgpu_vm *vm, + struct ww_acquire_ctx *ticket, + int (*validate)(void *p, struct amdgpu_bo *bo), + void *param) { struct amdgpu_vm_bo_base *bo_base; struct amdgpu_bo *shadow; @@ -484,6 +504,34 @@ int amdgpu_vm_validate_pt_bos(struct amdgpu_device *adev, struct amdgpu_vm *vm, } spin_lock(&vm->status_lock); } + while (ticket && !list_empty(&vm->evicted_user)) { + bo_base = list_first_entry(&vm->evicted_user, + struct amdgpu_vm_bo_base, + vm_status); + spin_unlock(&vm->status_lock); + + bo = bo_base->bo; + + if (dma_resv_locking_ctx(bo->tbo.base.resv) != ticket) { + struct amdgpu_task_info *ti = amdgpu_vm_get_task_info_vm(vm); + + pr_warn_ratelimited("Evicted user BO is not reserved\n"); + if (ti) { + pr_warn_ratelimited("pid %d\n", ti->pid); + amdgpu_vm_put_task_info(ti); + } + + return -EINVAL; + } + + r = validate(param, bo); + if (r) + return r; + + amdgpu_vm_bo_invalidated(bo_base); + + spin_lock(&vm->status_lock); + } spin_unlock(&vm->status_lock); amdgpu_vm_eviction_lock(vm); @@ -610,7 +658,7 @@ int amdgpu_vm_flush(struct amdgpu_ring *ring, struct amdgpu_job *job, bool vm_flush_needed = job->vm_needs_flush; struct dma_fence *fence = NULL; bool pasid_mapping_needed = false; - unsigned patch_offset = 0; + unsigned int patch; int r; if (amdgpu_vmid_had_gpu_reset(adev, id)) { @@ -637,7 +685,8 @@ int amdgpu_vm_flush(struct amdgpu_ring *ring, struct amdgpu_job *job, amdgpu_ring_ib_begin(ring); if (ring->funcs->init_cond_exec) - patch_offset = amdgpu_ring_init_cond_exec(ring); + patch = amdgpu_ring_init_cond_exec(ring, + ring->cond_exe_gpu_addr); if (need_pipe_sync) amdgpu_ring_emit_pipeline_sync(ring); @@ -651,7 +700,7 @@ int amdgpu_vm_flush(struct amdgpu_ring *ring, struct amdgpu_job *job, amdgpu_gmc_emit_pasid_mapping(ring, job->vmid, job->pasid); if (spm_update_needed && adev->gfx.rlc.funcs->update_spm_vmid) - adev->gfx.rlc.funcs->update_spm_vmid(adev, job->vmid); + adev->gfx.rlc.funcs->update_spm_vmid(adev, ring, job->vmid); if (!ring->is_mes_queue && ring->funcs->emit_gds_switch && gds_switch_needed) { @@ -685,8 +734,7 @@ int amdgpu_vm_flush(struct amdgpu_ring *ring, struct amdgpu_job *job, } dma_fence_put(fence); - if (ring->funcs->patch_cond_exec) - amdgpu_ring_patch_cond_exec(ring, patch_offset); + amdgpu_ring_patch_cond_exec(ring, patch); /* the double SWITCH_BUFFER here *cannot* be skipped by COND_EXEC */ if (ring->funcs->emit_switch_buffer) { @@ -838,6 +886,44 @@ static void amdgpu_vm_tlb_seq_cb(struct dma_fence *fence, } /** + * amdgpu_vm_tlb_flush - prepare TLB flush + * + * @params: parameters for update + * @fence: input fence to sync TLB flush with + * @tlb_cb: the callback structure + * + * Increments the tlb sequence to make sure that future CS execute a VM flush. + */ +static void +amdgpu_vm_tlb_flush(struct amdgpu_vm_update_params *params, + struct dma_fence **fence, + struct amdgpu_vm_tlb_seq_struct *tlb_cb) +{ + struct amdgpu_vm *vm = params->vm; + + if (!fence || !*fence) + return; + + tlb_cb->vm = vm; + if (!dma_fence_add_callback(*fence, &tlb_cb->cb, + amdgpu_vm_tlb_seq_cb)) { + dma_fence_put(vm->last_tlb_flush); + vm->last_tlb_flush = dma_fence_get(*fence); + } else { + amdgpu_vm_tlb_seq_cb(NULL, &tlb_cb->cb); + } + + /* Prepare a TLB flush fence to be attached to PTs */ + if (!params->unlocked && vm->is_compute_context) { + amdgpu_vm_tlb_fence_create(params->adev, vm, fence); + + /* Makes sure no PD/PT is freed before the flush */ + dma_resv_add_fence(vm->root.bo->tbo.base.resv, *fence, + DMA_RESV_USAGE_BOOKKEEP); + } +} + +/** * amdgpu_vm_update_range - update a range in the vm page table * * @adev: amdgpu_device pointer to use for commands @@ -868,8 +954,8 @@ int amdgpu_vm_update_range(struct amdgpu_device *adev, struct amdgpu_vm *vm, struct ttm_resource *res, dma_addr_t *pages_addr, struct dma_fence **fence) { - struct amdgpu_vm_update_params params; struct amdgpu_vm_tlb_seq_struct *tlb_cb; + struct amdgpu_vm_update_params params; struct amdgpu_res_cursor cursor; enum amdgpu_sync_mode sync_mode; int r, idx; @@ -879,8 +965,8 @@ int amdgpu_vm_update_range(struct amdgpu_device *adev, struct amdgpu_vm *vm, tlb_cb = kmalloc(sizeof(*tlb_cb), GFP_KERNEL); if (!tlb_cb) { - r = -ENOMEM; - goto error_unlock; + drm_dev_exit(idx); + return -ENOMEM; } /* Vega20+XGMI where PTEs get inadvertently cached in L2 texture cache, @@ -900,7 +986,9 @@ int amdgpu_vm_update_range(struct amdgpu_device *adev, struct amdgpu_vm *vm, params.immediate = immediate; params.pages_addr = pages_addr; params.unlocked = unlocked; + params.needs_flush = flush_tlb; params.allow_override = allow_override; + INIT_LIST_HEAD(¶ms.tlb_flush_waitlist); /* Implicitly sync to command submissions in the same VM before * unmapping. Sync to moving fences before mapping. @@ -983,24 +1071,18 @@ int amdgpu_vm_update_range(struct amdgpu_device *adev, struct amdgpu_vm *vm, } r = vm->update_funcs->commit(¶ms, fence); + if (r) + goto error_free; - if (flush_tlb || params.table_freed) { - tlb_cb->vm = vm; - if (fence && *fence && - !dma_fence_add_callback(*fence, &tlb_cb->cb, - amdgpu_vm_tlb_seq_cb)) { - dma_fence_put(vm->last_tlb_flush); - vm->last_tlb_flush = dma_fence_get(*fence); - } else { - amdgpu_vm_tlb_seq_cb(NULL, &tlb_cb->cb); - } + if (params.needs_flush) { + amdgpu_vm_tlb_flush(¶ms, fence, tlb_cb); tlb_cb = NULL; } + amdgpu_vm_pt_free_list(adev, ¶ms); + error_free: kfree(tlb_cb); - -error_unlock: amdgpu_vm_eviction_unlock(vm); drm_dev_exit(idx); return r; @@ -1343,10 +1425,6 @@ int amdgpu_vm_clear_freed(struct amdgpu_device *adev, struct amdgpu_bo_va_mapping, list); list_del(&mapping->list); - if (vm->pte_support_ats && - mapping->start < AMDGPU_GMC_HOLE_START) - init_pte_value = AMDGPU_PTE_DEFAULT_ATC; - r = amdgpu_vm_update_range(adev, vm, false, false, true, false, resv, mapping->start, mapping->last, init_pte_value, 0, 0, NULL, NULL, @@ -1426,11 +1504,21 @@ int amdgpu_vm_handle_moved(struct amdgpu_device *adev, } r = amdgpu_vm_bo_update(adev, bo_va, clear); - if (r) - return r; if (unlock) dma_resv_unlock(resv); + if (r) + return r; + + /* Remember evicted DMABuf imports in compute VMs for later + * validation + */ + if (vm->is_compute_context && + bo_va->base.bo->tbo.base.import_attach && + (!bo_va->base.bo->tbo.resource || + bo_va->base.bo->tbo.resource->mem_type == TTM_PL_SYSTEM)) + amdgpu_vm_bo_evicted_user(&bo_va->base); + spin_lock(&vm->status_lock); } spin_unlock(&vm->status_lock); @@ -1559,6 +1647,37 @@ static void amdgpu_vm_bo_insert_map(struct amdgpu_device *adev, trace_amdgpu_vm_bo_map(bo_va, mapping); } +/* Validate operation parameters to prevent potential abuse */ +static int amdgpu_vm_verify_parameters(struct amdgpu_device *adev, + struct amdgpu_bo *bo, + uint64_t saddr, + uint64_t offset, + uint64_t size) +{ + uint64_t tmp, lpfn; + + if (saddr & AMDGPU_GPU_PAGE_MASK + || offset & AMDGPU_GPU_PAGE_MASK + || size & AMDGPU_GPU_PAGE_MASK) + return -EINVAL; + + if (check_add_overflow(saddr, size, &tmp) + || check_add_overflow(offset, size, &tmp) + || size == 0 /* which also leads to end < begin */) + return -EINVAL; + + /* make sure object fit at this offset */ + if (bo && offset + size > amdgpu_bo_size(bo)) + return -EINVAL; + + /* Ensure last pfn not exceed max_pfn */ + lpfn = (saddr + size - 1) >> AMDGPU_GPU_PAGE_SHIFT; + if (lpfn >= adev->vm_manager.max_pfn) + return -EINVAL; + + return 0; +} + /** * amdgpu_vm_bo_map - map bo inside a vm * @@ -1585,21 +1704,14 @@ int amdgpu_vm_bo_map(struct amdgpu_device *adev, struct amdgpu_bo *bo = bo_va->base.bo; struct amdgpu_vm *vm = bo_va->base.vm; uint64_t eaddr; + int r; - /* validate the parameters */ - if (saddr & ~PAGE_MASK || offset & ~PAGE_MASK || size & ~PAGE_MASK) - return -EINVAL; - if (saddr + size <= saddr || offset + size <= offset) - return -EINVAL; - - /* make sure object fit at this offset */ - eaddr = saddr + size - 1; - if ((bo && offset + size > amdgpu_bo_size(bo)) || - (eaddr >= adev->vm_manager.max_pfn << AMDGPU_GPU_PAGE_SHIFT)) - return -EINVAL; + r = amdgpu_vm_verify_parameters(adev, bo, saddr, offset, size); + if (r) + return r; saddr /= AMDGPU_GPU_PAGE_SIZE; - eaddr /= AMDGPU_GPU_PAGE_SIZE; + eaddr = saddr + (size - 1) / AMDGPU_GPU_PAGE_SIZE; tmp = amdgpu_vm_it_iter_first(&vm->va, saddr, eaddr); if (tmp) { @@ -1652,17 +1764,9 @@ int amdgpu_vm_bo_replace_map(struct amdgpu_device *adev, uint64_t eaddr; int r; - /* validate the parameters */ - if (saddr & ~PAGE_MASK || offset & ~PAGE_MASK || size & ~PAGE_MASK) - return -EINVAL; - if (saddr + size <= saddr || offset + size <= offset) - return -EINVAL; - - /* make sure object fit at this offset */ - eaddr = saddr + size - 1; - if ((bo && offset + size > amdgpu_bo_size(bo)) || - (eaddr >= adev->vm_manager.max_pfn << AMDGPU_GPU_PAGE_SHIFT)) - return -EINVAL; + r = amdgpu_vm_verify_parameters(adev, bo, saddr, offset, size); + if (r) + return r; /* Allocate all the needed memory */ mapping = kmalloc(sizeof(*mapping), GFP_KERNEL); @@ -1676,7 +1780,7 @@ int amdgpu_vm_bo_replace_map(struct amdgpu_device *adev, } saddr /= AMDGPU_GPU_PAGE_SIZE; - eaddr /= AMDGPU_GPU_PAGE_SIZE; + eaddr = saddr + (size - 1) / AMDGPU_GPU_PAGE_SIZE; mapping->start = saddr; mapping->last = eaddr; @@ -1763,10 +1867,14 @@ int amdgpu_vm_bo_clear_mappings(struct amdgpu_device *adev, struct amdgpu_bo_va_mapping *before, *after, *tmp, *next; LIST_HEAD(removed); uint64_t eaddr; + int r; + + r = amdgpu_vm_verify_parameters(adev, NULL, saddr, 0, size); + if (r) + return r; - eaddr = saddr + size - 1; saddr /= AMDGPU_GPU_PAGE_SIZE; - eaddr /= AMDGPU_GPU_PAGE_SIZE; + eaddr = saddr + (size - 1) / AMDGPU_GPU_PAGE_SIZE; /* Allocate all the needed memory */ before = kzalloc(sizeof(*before), GFP_KERNEL); @@ -2173,6 +2281,108 @@ long amdgpu_vm_wait_idle(struct amdgpu_vm *vm, long timeout) return dma_fence_wait_timeout(vm->last_unlocked, true, timeout); } +static void amdgpu_vm_destroy_task_info(struct kref *kref) +{ + struct amdgpu_task_info *ti = container_of(kref, struct amdgpu_task_info, refcount); + + kfree(ti); +} + +static inline struct amdgpu_vm * +amdgpu_vm_get_vm_from_pasid(struct amdgpu_device *adev, u32 pasid) +{ + struct amdgpu_vm *vm; + unsigned long flags; + + xa_lock_irqsave(&adev->vm_manager.pasids, flags); + vm = xa_load(&adev->vm_manager.pasids, pasid); + xa_unlock_irqrestore(&adev->vm_manager.pasids, flags); + + return vm; +} + +/** + * amdgpu_vm_put_task_info - reference down the vm task_info ptr + * + * @task_info: task_info struct under discussion. + * + * frees the vm task_info ptr at the last put + */ +void amdgpu_vm_put_task_info(struct amdgpu_task_info *task_info) +{ + kref_put(&task_info->refcount, amdgpu_vm_destroy_task_info); +} + +/** + * amdgpu_vm_get_task_info_vm - Extracts task info for a vm. + * + * @vm: VM to get info from + * + * Returns the reference counted task_info structure, which must be + * referenced down with amdgpu_vm_put_task_info. + */ +struct amdgpu_task_info * +amdgpu_vm_get_task_info_vm(struct amdgpu_vm *vm) +{ + struct amdgpu_task_info *ti = NULL; + + if (vm) { + ti = vm->task_info; + kref_get(&vm->task_info->refcount); + } + + return ti; +} + +/** + * amdgpu_vm_get_task_info_pasid - Extracts task info for a PASID. + * + * @adev: drm device pointer + * @pasid: PASID identifier for VM + * + * Returns the reference counted task_info structure, which must be + * referenced down with amdgpu_vm_put_task_info. + */ +struct amdgpu_task_info * +amdgpu_vm_get_task_info_pasid(struct amdgpu_device *adev, u32 pasid) +{ + return amdgpu_vm_get_task_info_vm( + amdgpu_vm_get_vm_from_pasid(adev, pasid)); +} + +static int amdgpu_vm_create_task_info(struct amdgpu_vm *vm) +{ + vm->task_info = kzalloc(sizeof(struct amdgpu_task_info), GFP_KERNEL); + if (!vm->task_info) + return -ENOMEM; + + kref_init(&vm->task_info->refcount); + return 0; +} + +/** + * amdgpu_vm_set_task_info - Sets VMs task info. + * + * @vm: vm for which to set the info + */ +void amdgpu_vm_set_task_info(struct amdgpu_vm *vm) +{ + if (!vm->task_info) + return; + + if (vm->task_info->pid == current->pid) + return; + + vm->task_info->pid = current->pid; + get_task_comm(vm->task_info->task_name, current); + + if (current->group_leader->mm != current->mm) + return; + + vm->task_info->tgid = current->group_leader->pid; + get_task_comm(vm->task_info->process_name, current->group_leader); +} + /** * amdgpu_vm_init - initialize a vm instance * @@ -2196,6 +2406,7 @@ int amdgpu_vm_init(struct amdgpu_device *adev, struct amdgpu_vm *vm, for (i = 0; i < AMDGPU_MAX_VMHUBS; i++) vm->reserved_vmid[i] = NULL; INIT_LIST_HEAD(&vm->evicted); + INIT_LIST_HEAD(&vm->evicted_user); INIT_LIST_HEAD(&vm->relocated); INIT_LIST_HEAD(&vm->moved); INIT_LIST_HEAD(&vm->idle); @@ -2211,7 +2422,6 @@ int amdgpu_vm_init(struct amdgpu_device *adev, struct amdgpu_vm *vm, if (r) return r; - vm->pte_support_ats = false; vm->is_compute_context = false; vm->use_cpu_for_update = !!(adev->vm_manager.vm_update_mode & @@ -2235,6 +2445,7 @@ int amdgpu_vm_init(struct amdgpu_device *adev, struct amdgpu_vm *vm, mutex_init(&vm->eviction_lock); vm->evicting = false; + vm->tlb_fence_context = dma_fence_context_alloc(1); r = amdgpu_vm_pt_create(adev, vm, adev->vm_manager.root_level, false, &root, xcp_id); @@ -2258,6 +2469,10 @@ int amdgpu_vm_init(struct amdgpu_device *adev, struct amdgpu_vm *vm, if (r) goto error_free_root; + r = amdgpu_vm_create_task_info(vm); + if (r) + DRM_DEBUG("Failed to create task info for VM\n"); + amdgpu_bo_unreserve(vm->root.bo); amdgpu_bo_unref(&root_bo); @@ -2297,30 +2512,12 @@ error_free_delayed: */ int amdgpu_vm_make_compute(struct amdgpu_device *adev, struct amdgpu_vm *vm) { - bool pte_support_ats = (adev->asic_type == CHIP_RAVEN); int r; r = amdgpu_bo_reserve(vm->root.bo, true); if (r) return r; - /* Check if PD needs to be reinitialized and do it before - * changing any other state, in case it fails. - */ - if (pte_support_ats != vm->pte_support_ats) { - /* Sanity checks */ - if (!amdgpu_vm_pt_is_root_clean(adev, vm)) { - r = -EINVAL; - goto unreserve_bo; - } - - vm->pte_support_ats = pte_support_ats; - r = amdgpu_vm_pt_clear(adev, vm, to_amdgpu_bo_vm(vm->root.bo), - false); - if (r) - goto unreserve_bo; - } - /* Update VM state */ vm->use_cpu_for_update = !!(adev->vm_manager.vm_update_mode & AMDGPU_VM_USE_CPU_FOR_COMPUTE); @@ -2397,6 +2594,7 @@ void amdgpu_vm_fini(struct amdgpu_device *adev, struct amdgpu_vm *vm) root = amdgpu_bo_ref(vm->root.bo); amdgpu_bo_reserve(root, true); + amdgpu_vm_put_task_info(vm->task_info); amdgpu_vm_set_pasid(adev, vm, 0); dma_fence_wait(vm->last_unlocked, false); dma_fence_put(vm->last_unlocked); @@ -2554,48 +2752,6 @@ int amdgpu_vm_ioctl(struct drm_device *dev, void *data, struct drm_file *filp) } /** - * amdgpu_vm_get_task_info - Extracts task info for a PASID. - * - * @adev: drm device pointer - * @pasid: PASID identifier for VM - * @task_info: task_info to fill. - */ -void amdgpu_vm_get_task_info(struct amdgpu_device *adev, u32 pasid, - struct amdgpu_task_info *task_info) -{ - struct amdgpu_vm *vm; - unsigned long flags; - - xa_lock_irqsave(&adev->vm_manager.pasids, flags); - - vm = xa_load(&adev->vm_manager.pasids, pasid); - if (vm) - *task_info = vm->task_info; - - xa_unlock_irqrestore(&adev->vm_manager.pasids, flags); -} - -/** - * amdgpu_vm_set_task_info - Sets VMs task info. - * - * @vm: vm for which to set the info - */ -void amdgpu_vm_set_task_info(struct amdgpu_vm *vm) -{ - if (vm->task_info.pid) - return; - - vm->task_info.pid = current->pid; - get_task_comm(vm->task_info.task_name, current); - - if (current->group_leader->mm != current->mm) - return; - - vm->task_info.tgid = current->group_leader->pid; - get_task_comm(vm->task_info.process_name, current->group_leader); -} - -/** * amdgpu_vm_handle_fault - graceful handling of VM faults. * @adev: amdgpu device pointer * @pasid: PASID of the VM @@ -2823,6 +2979,14 @@ void amdgpu_vm_update_fault_cache(struct amdgpu_device *adev, if (vm && status) { vm->fault_info.addr = addr; vm->fault_info.status = status; + /* + * Update the fault information globally for later usage + * when vm could be stale or freed. + */ + adev->vm_manager.fault_info.addr = addr; + adev->vm_manager.fault_info.vmhub = vmhub; + adev->vm_manager.fault_info.status = status; + if (AMDGPU_IS_GFXHUB(vmhub)) { vm->fault_info.vmhub = AMDGPU_VMHUB_TYPE_GFX; vm->fault_info.vmhub |= diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h index 4740dd65b99d..54d7da396de0 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h @@ -135,8 +135,21 @@ struct amdgpu_mem_stats; #define AMDGPU_IS_MMHUB0(x) ((x) >= AMDGPU_MMHUB0_START && (x) < AMDGPU_MMHUB1_START) #define AMDGPU_IS_MMHUB1(x) ((x) >= AMDGPU_MMHUB1_START && (x) < AMDGPU_MAX_VMHUBS) -/* Reserve 2MB at top/bottom of address space for kernel use */ -#define AMDGPU_VA_RESERVED_SIZE (2ULL << 20) +/* Reserve space at top/bottom of address space for kernel use */ +#define AMDGPU_VA_RESERVED_CSA_SIZE (2ULL << 20) +#define AMDGPU_VA_RESERVED_CSA_START(adev) (((adev)->vm_manager.max_pfn \ + << AMDGPU_GPU_PAGE_SHIFT) \ + - AMDGPU_VA_RESERVED_CSA_SIZE) +#define AMDGPU_VA_RESERVED_SEQ64_SIZE (2ULL << 20) +#define AMDGPU_VA_RESERVED_SEQ64_START(adev) (AMDGPU_VA_RESERVED_CSA_START(adev) \ + - AMDGPU_VA_RESERVED_SEQ64_SIZE) +#define AMDGPU_VA_RESERVED_TRAP_SIZE (2ULL << 12) +#define AMDGPU_VA_RESERVED_TRAP_START(adev) (AMDGPU_VA_RESERVED_SEQ64_START(adev) \ + - AMDGPU_VA_RESERVED_TRAP_SIZE) +#define AMDGPU_VA_RESERVED_BOTTOM (1ULL << 16) +#define AMDGPU_VA_RESERVED_TOP (AMDGPU_VA_RESERVED_TRAP_SIZE + \ + AMDGPU_VA_RESERVED_SEQ64_SIZE + \ + AMDGPU_VA_RESERVED_CSA_SIZE) /* See vm_update_mode */ #define AMDGPU_VM_USE_CPU_FOR_GFX (1 << 0) @@ -190,10 +203,11 @@ struct amdgpu_vm_pte_funcs { }; struct amdgpu_task_info { - char process_name[TASK_COMM_LEN]; - char task_name[TASK_COMM_LEN]; - pid_t pid; - pid_t tgid; + char process_name[TASK_COMM_LEN]; + char task_name[TASK_COMM_LEN]; + pid_t pid; + pid_t tgid; + struct kref refcount; }; /** @@ -243,15 +257,20 @@ struct amdgpu_vm_update_params { unsigned int num_dw_left; /** - * @table_freed: return true if page table is freed when updating + * @needs_flush: true whenever we need to invalidate the TLB */ - bool table_freed; + bool needs_flush; /** * @allow_override: true for memory that is not uncached: allows MTYPE * to be overridden for NUMA local memory. */ bool allow_override; + + /** + * @tlb_flush_waitlist: temporary storage for BOs until tlb_flush + */ + struct list_head tlb_flush_waitlist; }; struct amdgpu_vm_update_funcs { @@ -288,9 +307,12 @@ struct amdgpu_vm { /* Lock to protect vm_bo add/del/move on all lists of vm */ spinlock_t status_lock; - /* BOs who needs a validation */ + /* Per-VM and PT BOs who needs a validation */ struct list_head evicted; + /* BOs for user mode queues that need a validation */ + struct list_head evicted_user; + /* PT BOs which relocated and their parent need an update */ struct list_head relocated; @@ -325,6 +347,7 @@ struct amdgpu_vm { atomic64_t tlb_seq; struct dma_fence *last_tlb_flush; atomic64_t kfd_last_flushed_seq; + uint64_t tlb_fence_context; /* How many times we had to re-generate the page tables */ uint64_t generation; @@ -341,9 +364,6 @@ struct amdgpu_vm { /* Functions to use for VM table updates */ const struct amdgpu_vm_update_funcs *update_funcs; - /* Flag to indicate ATS support from PTE for GFX9 */ - bool pte_support_ats; - /* Up to 128 pending retry page faults */ DECLARE_KFIFO(faults, u64, 128); @@ -357,7 +377,7 @@ struct amdgpu_vm { uint64_t pd_phys_addr; /* Some basic info about the task */ - struct amdgpu_task_info task_info; + struct amdgpu_task_info *task_info; /* Store positions of group of BOs */ struct ttm_lru_bulk_move lru_bulk_move; @@ -408,6 +428,8 @@ struct amdgpu_vm_manager { * look up VM of a page fault */ struct xarray pasids; + /* Global registration of recent page fault information */ + struct amdgpu_vm_fault_info fault_info; }; struct amdgpu_bo_va_mapping; @@ -434,9 +456,10 @@ int amdgpu_vm_lock_pd(struct amdgpu_vm *vm, struct drm_exec *exec, unsigned int num_fences); bool amdgpu_vm_ready(struct amdgpu_vm *vm); uint64_t amdgpu_vm_generation(struct amdgpu_device *adev, struct amdgpu_vm *vm); -int amdgpu_vm_validate_pt_bos(struct amdgpu_device *adev, struct amdgpu_vm *vm, - int (*callback)(void *p, struct amdgpu_bo *bo), - void *param); +int amdgpu_vm_validate(struct amdgpu_device *adev, struct amdgpu_vm *vm, + struct ww_acquire_ctx *ticket, + int (*callback)(void *p, struct amdgpu_bo *bo), + void *param); int amdgpu_vm_flush(struct amdgpu_ring *ring, struct amdgpu_job *job, bool need_pipe_sync); int amdgpu_vm_update_pdes(struct amdgpu_device *adev, struct amdgpu_vm *vm, bool immediate); @@ -497,8 +520,14 @@ bool amdgpu_vm_need_pipeline_sync(struct amdgpu_ring *ring, struct amdgpu_job *job); void amdgpu_vm_check_compute_bug(struct amdgpu_device *adev); -void amdgpu_vm_get_task_info(struct amdgpu_device *adev, u32 pasid, - struct amdgpu_task_info *task_info); +struct amdgpu_task_info * +amdgpu_vm_get_task_info_pasid(struct amdgpu_device *adev, u32 pasid); + +struct amdgpu_task_info * +amdgpu_vm_get_task_info_vm(struct amdgpu_vm *vm); + +void amdgpu_vm_put_task_info(struct amdgpu_task_info *task_info); + bool amdgpu_vm_handle_fault(struct amdgpu_device *adev, u32 pasid, u32 vmid, u32 node_id, uint64_t addr, bool write_fault); @@ -516,8 +545,6 @@ int amdgpu_vm_pt_create(struct amdgpu_device *adev, struct amdgpu_vm *vm, int level, bool immediate, struct amdgpu_bo_vm **vmbo, int32_t xcp_id); void amdgpu_vm_pt_free_root(struct amdgpu_device *adev, struct amdgpu_vm *vm); -bool amdgpu_vm_pt_is_root_clean(struct amdgpu_device *adev, - struct amdgpu_vm *vm); int amdgpu_vm_pde_update(struct amdgpu_vm_update_params *params, struct amdgpu_vm_bo_base *entry); @@ -525,6 +552,8 @@ int amdgpu_vm_ptes_update(struct amdgpu_vm_update_params *params, uint64_t start, uint64_t end, uint64_t dst, uint64_t flags); void amdgpu_vm_pt_free_work(struct work_struct *work); +void amdgpu_vm_pt_free_list(struct amdgpu_device *adev, + struct amdgpu_vm_update_params *params); #if defined(CONFIG_DEBUG_FS) void amdgpu_debugfs_vm_bo_info(struct amdgpu_vm *vm, struct seq_file *m); @@ -590,5 +619,8 @@ void amdgpu_vm_update_fault_cache(struct amdgpu_device *adev, uint64_t addr, uint32_t status, unsigned int vmhub); +void amdgpu_vm_tlb_fence_create(struct amdgpu_device *adev, + struct amdgpu_vm *vm, + struct dma_fence **fence); #endif diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm_cpu.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm_cpu.c index 6e31621452de..3895bd7d176a 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm_cpu.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm_cpu.c @@ -108,7 +108,9 @@ static int amdgpu_vm_cpu_update(struct amdgpu_vm_update_params *p, static int amdgpu_vm_cpu_commit(struct amdgpu_vm_update_params *p, struct dma_fence **fence) { - /* Flush HDP */ + if (p->needs_flush) + atomic64_inc(&p->vm->tlb_seq); + mb(); amdgpu_device_flush_hdp(p->adev, NULL); return 0; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm_pt.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm_pt.c index a160265ddc07..7fdd306a48a0 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm_pt.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm_pt.c @@ -90,22 +90,6 @@ static unsigned int amdgpu_vm_pt_num_entries(struct amdgpu_device *adev, } /** - * amdgpu_vm_pt_num_ats_entries - return the number of ATS entries in the root PD - * - * @adev: amdgpu_device pointer - * - * Returns: - * The number of entries in the root page directory which needs the ATS setting. - */ -static unsigned int amdgpu_vm_pt_num_ats_entries(struct amdgpu_device *adev) -{ - unsigned int shift; - - shift = amdgpu_vm_pt_level_shift(adev, adev->vm_manager.root_level); - return AMDGPU_GMC_HOLE_START >> (shift + AMDGPU_GPU_PAGE_SHIFT); -} - -/** * amdgpu_vm_pt_entries_mask - the mask to get the entry number of a PD/PT * * @adev: amdgpu_device pointer @@ -379,7 +363,7 @@ int amdgpu_vm_pt_clear(struct amdgpu_device *adev, struct amdgpu_vm *vm, struct ttm_operation_ctx ctx = { true, false }; struct amdgpu_vm_update_params params; struct amdgpu_bo *ancestor = &vmbo->bo; - unsigned int entries, ats_entries; + unsigned int entries; struct amdgpu_bo *bo = &vmbo->bo; uint64_t addr; int r, idx; @@ -394,27 +378,6 @@ int amdgpu_vm_pt_clear(struct amdgpu_device *adev, struct amdgpu_vm *vm, } entries = amdgpu_bo_size(bo) / 8; - if (!vm->pte_support_ats) { - ats_entries = 0; - - } else if (!bo->parent) { - ats_entries = amdgpu_vm_pt_num_ats_entries(adev); - ats_entries = min(ats_entries, entries); - entries -= ats_entries; - - } else { - struct amdgpu_vm_bo_base *pt; - - pt = ancestor->vm_bo; - ats_entries = amdgpu_vm_pt_num_ats_entries(adev); - if ((pt - to_amdgpu_bo_vm(vm->root.bo)->entries) >= - ats_entries) { - ats_entries = 0; - } else { - ats_entries = entries; - entries = 0; - } - } r = ttm_bo_validate(&bo->tbo, &bo->placement, &ctx); if (r) @@ -445,44 +408,24 @@ int amdgpu_vm_pt_clear(struct amdgpu_device *adev, struct amdgpu_vm *vm, goto exit; addr = 0; - if (ats_entries) { - uint64_t value = 0, flags; - flags = AMDGPU_PTE_DEFAULT_ATC; + uint64_t value = 0, flags = 0; + if (adev->asic_type >= CHIP_VEGA10) { if (level != AMDGPU_VM_PTB) { /* Handle leaf PDEs as PTEs */ flags |= AMDGPU_PDE_PTE; - amdgpu_gmc_get_vm_pde(adev, level, &value, &flags); + amdgpu_gmc_get_vm_pde(adev, level, + &value, &flags); + } else { + /* Workaround for fault priority problem on GMC9 */ + flags = AMDGPU_PTE_EXECUTABLE; } - - r = vm->update_funcs->update(¶ms, vmbo, addr, 0, - ats_entries, value, flags); - if (r) - goto exit; - - addr += ats_entries * 8; } - if (entries) { - uint64_t value = 0, flags = 0; - - if (adev->asic_type >= CHIP_VEGA10) { - if (level != AMDGPU_VM_PTB) { - /* Handle leaf PDEs as PTEs */ - flags |= AMDGPU_PDE_PTE; - amdgpu_gmc_get_vm_pde(adev, level, - &value, &flags); - } else { - /* Workaround for fault priority problem on GMC9 */ - flags = AMDGPU_PTE_EXECUTABLE; - } - } - - r = vm->update_funcs->update(¶ms, vmbo, addr, 0, entries, - value, flags); - if (r) - goto exit; - } + r = vm->update_funcs->update(¶ms, vmbo, addr, 0, entries, + value, flags); + if (r) + goto exit; r = vm->update_funcs->commit(¶ms, NULL); exit: @@ -679,79 +622,76 @@ void amdgpu_vm_pt_free_work(struct work_struct *work) } /** - * amdgpu_vm_pt_free_dfs - free PD/PT levels + * amdgpu_vm_pt_free_list - free PD/PT levels * * @adev: amdgpu device structure - * @vm: amdgpu vm structure - * @start: optional cursor where to start freeing PDs/PTs - * @unlocked: vm resv unlock status + * @params: see amdgpu_vm_update_params definition * - * Free the page directory or page table level and all sub levels. + * Free the page directory objects saved in the flush list */ -static void amdgpu_vm_pt_free_dfs(struct amdgpu_device *adev, - struct amdgpu_vm *vm, - struct amdgpu_vm_pt_cursor *start, - bool unlocked) +void amdgpu_vm_pt_free_list(struct amdgpu_device *adev, + struct amdgpu_vm_update_params *params) { - struct amdgpu_vm_pt_cursor cursor; - struct amdgpu_vm_bo_base *entry; + struct amdgpu_vm_bo_base *entry, *next; + struct amdgpu_vm *vm = params->vm; + bool unlocked = params->unlocked; + + if (list_empty(¶ms->tlb_flush_waitlist)) + return; if (unlocked) { spin_lock(&vm->status_lock); - for_each_amdgpu_vm_pt_dfs_safe(adev, vm, start, cursor, entry) - list_move(&entry->vm_status, &vm->pt_freed); - - if (start) - list_move(&start->entry->vm_status, &vm->pt_freed); + list_splice_init(¶ms->tlb_flush_waitlist, &vm->pt_freed); spin_unlock(&vm->status_lock); schedule_work(&vm->pt_free_work); return; } - for_each_amdgpu_vm_pt_dfs_safe(adev, vm, start, cursor, entry) + list_for_each_entry_safe(entry, next, ¶ms->tlb_flush_waitlist, vm_status) amdgpu_vm_pt_free(entry); - - if (start) - amdgpu_vm_pt_free(start->entry); } /** - * amdgpu_vm_pt_free_root - free root PD - * @adev: amdgpu device structure - * @vm: amdgpu vm structure + * amdgpu_vm_pt_add_list - add PD/PT level to the flush list * - * Free the root page directory and everything below it. + * @params: parameters for the update + * @cursor: first PT entry to start DF search from, non NULL + * + * This list will be freed after TLB flush. */ -void amdgpu_vm_pt_free_root(struct amdgpu_device *adev, struct amdgpu_vm *vm) +static void amdgpu_vm_pt_add_list(struct amdgpu_vm_update_params *params, + struct amdgpu_vm_pt_cursor *cursor) { - amdgpu_vm_pt_free_dfs(adev, vm, NULL, false); + struct amdgpu_vm_pt_cursor seek; + struct amdgpu_vm_bo_base *entry; + + spin_lock(¶ms->vm->status_lock); + for_each_amdgpu_vm_pt_dfs_safe(params->adev, params->vm, cursor, seek, entry) { + if (entry && entry->bo) + list_move(&entry->vm_status, ¶ms->tlb_flush_waitlist); + } + + /* enter start node now */ + list_move(&cursor->entry->vm_status, ¶ms->tlb_flush_waitlist); + spin_unlock(¶ms->vm->status_lock); } /** - * amdgpu_vm_pt_is_root_clean - check if a root PD is clean - * - * @adev: amdgpu_device pointer - * @vm: the VM to check - * - * Check all entries of the root PD, if any subsequent PDs are allocated, - * it means there are page table creating and filling, and is no a clean - * VM + * amdgpu_vm_pt_free_root - free root PD + * @adev: amdgpu device structure + * @vm: amdgpu vm structure * - * Returns: - * 0 if this VM is clean + * Free the root page directory and everything below it. */ -bool amdgpu_vm_pt_is_root_clean(struct amdgpu_device *adev, - struct amdgpu_vm *vm) +void amdgpu_vm_pt_free_root(struct amdgpu_device *adev, struct amdgpu_vm *vm) { - enum amdgpu_vm_level root = adev->vm_manager.root_level; - unsigned int entries = amdgpu_vm_pt_num_entries(adev, root); - unsigned int i = 0; + struct amdgpu_vm_pt_cursor cursor; + struct amdgpu_vm_bo_base *entry; - for (i = 0; i < entries; i++) { - if (to_amdgpu_bo_vm(vm->root.bo)->entries[i].bo) - return false; + for_each_amdgpu_vm_pt_dfs_safe(adev, vm, NULL, cursor, entry) { + if (entry) + amdgpu_vm_pt_free(entry); } - return true; } /** @@ -1027,7 +967,7 @@ int amdgpu_vm_ptes_update(struct amdgpu_vm_update_params *params, trace_amdgpu_vm_update_ptes(params, frag_start, upd_end, min(nptes, 32u), dst, incr, upd_flags, - vm->task_info.tgid, + vm->task_info ? vm->task_info->tgid : 0, vm->immediate.fence_context); amdgpu_vm_pte_update_flags(params, to_amdgpu_bo_vm(pt), cursor.level, pe_start, dst, @@ -1056,10 +996,8 @@ int amdgpu_vm_ptes_update(struct amdgpu_vm_update_params *params, while (cursor.pfn < frag_start) { /* Make sure previous mapping is freed */ if (cursor.entry->bo) { - params->table_freed = true; - amdgpu_vm_pt_free_dfs(adev, params->vm, - &cursor, - params->unlocked); + params->needs_flush = true; + amdgpu_vm_pt_add_list(params, &cursor); } amdgpu_vm_pt_next(adev, &cursor); } diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm_sdma.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm_sdma.c index 349416e176a1..66e8a016126b 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm_sdma.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm_sdma.c @@ -126,6 +126,10 @@ static int amdgpu_vm_sdma_commit(struct amdgpu_vm_update_params *p, WARN_ON(ib->length_dw == 0); amdgpu_ring_pad_ib(ring, ib); + + if (p->needs_flush) + atomic64_inc(&p->vm->tlb_seq); + WARN_ON(ib->length_dw > p->num_dw_left); f = amdgpu_job_submit(p->job); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm_tlb_fence.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm_tlb_fence.c new file mode 100644 index 000000000000..51cddfa3f1e8 --- /dev/null +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm_tlb_fence.c @@ -0,0 +1,112 @@ +// SPDX-License-Identifier: GPL-2.0 OR MIT +/* + * Copyright 2023 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + */ + +#include <linux/dma-fence.h> +#include <linux/workqueue.h> + +#include "amdgpu.h" +#include "amdgpu_vm.h" +#include "amdgpu_gmc.h" + +struct amdgpu_tlb_fence { + struct dma_fence base; + struct amdgpu_device *adev; + struct dma_fence *dependency; + struct work_struct work; + spinlock_t lock; + uint16_t pasid; + +}; + +static const char *amdgpu_tlb_fence_get_driver_name(struct dma_fence *fence) +{ + return "amdgpu tlb fence"; +} + +static const char *amdgpu_tlb_fence_get_timeline_name(struct dma_fence *f) +{ + return "amdgpu tlb timeline"; +} + +static void amdgpu_tlb_fence_work(struct work_struct *work) +{ + struct amdgpu_tlb_fence *f = container_of(work, typeof(*f), work); + int r; + + if (f->dependency) { + dma_fence_wait(f->dependency, false); + dma_fence_put(f->dependency); + f->dependency = NULL; + } + + r = amdgpu_gmc_flush_gpu_tlb_pasid(f->adev, f->pasid, 2, true, 0); + if (r) { + dev_err(f->adev->dev, "TLB flush failed for PASID %d.\n", + f->pasid); + dma_fence_set_error(&f->base, r); + } + + dma_fence_signal(&f->base); + dma_fence_put(&f->base); +} + +static const struct dma_fence_ops amdgpu_tlb_fence_ops = { + .use_64bit_seqno = true, + .get_driver_name = amdgpu_tlb_fence_get_driver_name, + .get_timeline_name = amdgpu_tlb_fence_get_timeline_name +}; + +void amdgpu_vm_tlb_fence_create(struct amdgpu_device *adev, struct amdgpu_vm *vm, + struct dma_fence **fence) +{ + struct amdgpu_tlb_fence *f; + + f = kmalloc(sizeof(*f), GFP_KERNEL); + if (!f) { + /* + * We can't fail since the PDEs and PTEs are already updated, so + * just block for the dependency and execute the TLB flush + */ + if (*fence) + dma_fence_wait(*fence, false); + + amdgpu_gmc_flush_gpu_tlb_pasid(adev, vm->pasid, 2, true, 0); + *fence = dma_fence_get_stub(); + return; + } + + f->adev = adev; + f->dependency = *fence; + f->pasid = vm->pasid; + INIT_WORK(&f->work, amdgpu_tlb_fence_work); + spin_lock_init(&f->lock); + + dma_fence_init(&f->base, &amdgpu_tlb_fence_ops, &f->lock, + vm->tlb_fence_context, atomic64_read(&vm->tlb_seq)); + + /* TODO: We probably need a separate wq here */ + dma_fence_get(&f->base); + schedule_work(&f->work); + + *fence = &f->base; +} diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vpe.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vpe.c index b9a15d51eb5c..c23d97d34b7e 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vpe.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vpe.c @@ -205,7 +205,7 @@ disable_dpm: dpm_ctl &= 0xfffffffe; /* Disable DPM */ WREG32(vpe_get_reg_offset(vpe, 0, vpe->regs.dpm_enable), dpm_ctl); dev_dbg(adev->dev, "%s: disable vpe dpm\n", __func__); - return 0; + return -EINVAL; } int amdgpu_vpe_psp_update_sram(struct amdgpu_device *adev) @@ -297,6 +297,10 @@ static int vpe_early_init(void *handle) case IP_VERSION(6, 1, 0): vpe_v6_1_set_funcs(vpe); break; + case IP_VERSION(6, 1, 1): + vpe_v6_1_set_funcs(vpe); + vpe->collaborate_mode = true; + break; default: return -EINVAL; } @@ -304,6 +308,8 @@ static int vpe_early_init(void *handle) vpe_set_ring_funcs(adev); vpe_set_regs(vpe); + dev_info(adev->dev, "VPE: collaborate mode %s", vpe->collaborate_mode ? "true" : "false"); + return 0; } @@ -390,6 +396,12 @@ static int vpe_hw_init(void *handle) struct amdgpu_vpe *vpe = &adev->vpe; int ret; + /* Power on VPE */ + ret = amdgpu_device_ip_set_powergating_state(adev, AMD_IP_BLOCK_TYPE_VPE, + AMD_PG_STATE_UNGATE); + if (ret) + return ret; + ret = vpe_load_microcode(vpe); if (ret) return ret; @@ -457,6 +469,18 @@ static uint64_t vpe_get_csa_mc_addr(struct amdgpu_ring *ring, uint32_t vmid) return csa_mc_addr; } +static void vpe_ring_emit_pred_exec(struct amdgpu_ring *ring, + uint32_t device_select, + uint32_t exec_count) +{ + if (!ring->adev->vpe.collaborate_mode) + return; + + amdgpu_ring_write(ring, VPE_CMD_HEADER(VPE_CMD_OPCODE_PRED_EXE, 0) | + (device_select << 16)); + amdgpu_ring_write(ring, exec_count & 0x1fff); +} + static void vpe_ring_emit_ib(struct amdgpu_ring *ring, struct amdgpu_job *job, struct amdgpu_ib *ib, @@ -505,6 +529,8 @@ static void vpe_ring_emit_pipeline_sync(struct amdgpu_ring *ring) uint32_t seq = ring->fence_drv.sync_seq; uint64_t addr = ring->fence_drv.gpu_addr; + vpe_ring_emit_pred_exec(ring, 0, 6); + /* wait for idle */ amdgpu_ring_write(ring, VPE_CMD_HEADER(VPE_CMD_OPCODE_POLL_REGMEM, VPE_POLL_REGMEM_SUBOP_REGMEM) | @@ -520,6 +546,8 @@ static void vpe_ring_emit_pipeline_sync(struct amdgpu_ring *ring) static void vpe_ring_emit_wreg(struct amdgpu_ring *ring, uint32_t reg, uint32_t val) { + vpe_ring_emit_pred_exec(ring, 0, 3); + amdgpu_ring_write(ring, VPE_CMD_HEADER(VPE_CMD_OPCODE_REG_WRITE, 0)); amdgpu_ring_write(ring, reg << 2); amdgpu_ring_write(ring, val); @@ -528,6 +556,8 @@ static void vpe_ring_emit_wreg(struct amdgpu_ring *ring, uint32_t reg, uint32_t static void vpe_ring_emit_reg_wait(struct amdgpu_ring *ring, uint32_t reg, uint32_t val, uint32_t mask) { + vpe_ring_emit_pred_exec(ring, 0, 6); + amdgpu_ring_write(ring, VPE_CMD_HEADER(VPE_CMD_OPCODE_POLL_REGMEM, VPE_POLL_REGMEM_SUBOP_REGMEM) | VPE_CMD_POLL_REGMEM_HEADER_FUNC(3) | /* equal */ @@ -546,34 +576,21 @@ static void vpe_ring_emit_vm_flush(struct amdgpu_ring *ring, unsigned int vmid, amdgpu_gmc_emit_flush_gpu_tlb(ring, vmid, pd_addr); } -static unsigned int vpe_ring_init_cond_exec(struct amdgpu_ring *ring) +static unsigned int vpe_ring_init_cond_exec(struct amdgpu_ring *ring, + uint64_t addr) { unsigned int ret; amdgpu_ring_write(ring, VPE_CMD_HEADER(VPE_CMD_OPCODE_COND_EXE, 0)); - amdgpu_ring_write(ring, lower_32_bits(ring->cond_exe_gpu_addr)); - amdgpu_ring_write(ring, upper_32_bits(ring->cond_exe_gpu_addr)); + amdgpu_ring_write(ring, lower_32_bits(addr)); + amdgpu_ring_write(ring, upper_32_bits(addr)); amdgpu_ring_write(ring, 1); - ret = ring->wptr & ring->buf_mask;/* this is the offset we need patch later */ - amdgpu_ring_write(ring, 0x55aa55aa);/* insert dummy here and patch it later */ + ret = ring->wptr & ring->buf_mask; + amdgpu_ring_write(ring, 0); return ret; } -static void vpe_ring_patch_cond_exec(struct amdgpu_ring *ring, unsigned int offset) -{ - unsigned int cur; - - WARN_ON_ONCE(offset > ring->buf_mask); - WARN_ON_ONCE(ring->ring[offset] != 0x55aa55aa); - - cur = (ring->wptr - 1) & ring->buf_mask; - if (cur > offset) - ring->ring[offset] = cur - offset; - else - ring->ring[offset] = (ring->buf_mask + 1) - offset + cur; -} - static int vpe_ring_preempt_ib(struct amdgpu_ring *ring) { struct amdgpu_device *adev = ring->adev; @@ -695,16 +712,22 @@ static void vpe_ring_set_wptr(struct amdgpu_ring *ring) upper_32_bits(ring->wptr << 2)); atomic64_set((atomic64_t *)ring->wptr_cpu_addr, ring->wptr << 2); WDOORBELL64(ring->doorbell_index, ring->wptr << 2); + if (vpe->collaborate_mode) + WDOORBELL64(ring->doorbell_index + 4, ring->wptr << 2); } else { - dev_dbg(adev->dev, "Not using doorbell, \ - regVPEC_QUEUE0_RB_WPTR == 0x%08x, \ - regVPEC_QUEUE0_RB_WPTR_HI == 0x%08x\n", - lower_32_bits(ring->wptr << 2), - upper_32_bits(ring->wptr << 2)); - WREG32(vpe_get_reg_offset(vpe, ring->me, vpe->regs.queue0_rb_wptr_lo), - lower_32_bits(ring->wptr << 2)); - WREG32(vpe_get_reg_offset(vpe, ring->me, vpe->regs.queue0_rb_wptr_hi), - upper_32_bits(ring->wptr << 2)); + int i; + + for (i = 0; i < vpe->num_instances; i++) { + dev_dbg(adev->dev, "Not using doorbell, \ + regVPEC_QUEUE0_RB_WPTR == 0x%08x, \ + regVPEC_QUEUE0_RB_WPTR_HI == 0x%08x\n", + lower_32_bits(ring->wptr << 2), + upper_32_bits(ring->wptr << 2)); + WREG32(vpe_get_reg_offset(vpe, i, vpe->regs.queue0_rb_wptr_lo), + lower_32_bits(ring->wptr << 2)); + WREG32(vpe_get_reg_offset(vpe, i, vpe->regs.queue0_rb_wptr_hi), + upper_32_bits(ring->wptr << 2)); + } } } @@ -864,7 +887,6 @@ static const struct amdgpu_ring_funcs vpe_ring_funcs = { .test_ring = vpe_ring_test_ring, .test_ib = vpe_ring_test_ib, .init_cond_exec = vpe_ring_init_cond_exec, - .patch_cond_exec = vpe_ring_patch_cond_exec, .preempt_ib = vpe_ring_preempt_ib, .begin_use = vpe_ring_begin_use, .end_use = vpe_ring_end_use, diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vpe.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_vpe.h index 1153ddaea64d..231d86d0953e 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vpe.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vpe.h @@ -27,6 +27,8 @@ #include "amdgpu_irq.h" #include "vpe_6_1_fw_if.h" +#define AMDGPU_MAX_VPE_INSTANCES 2 + struct amdgpu_vpe; struct vpe_funcs { @@ -74,6 +76,9 @@ struct amdgpu_vpe { uint32_t *cmdbuf_cpu_addr; struct delayed_work idle_work; bool context_started; + + uint32_t num_instances; + bool collaborate_mode; }; int amdgpu_vpe_psp_update_sram(struct amdgpu_device *adev); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c index 8db880244324..6c30eceec896 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c @@ -450,6 +450,7 @@ static int amdgpu_vram_mgr_new(struct ttm_resource_manager *man, { struct amdgpu_vram_mgr *mgr = to_vram_mgr(man); struct amdgpu_device *adev = to_amdgpu_device(mgr); + struct amdgpu_bo *bo = ttm_to_amdgpu_bo(tbo); u64 vis_usage = 0, max_bytes, min_block_size; struct amdgpu_vram_mgr_resource *vres; u64 size, remaining_size, lpfn, fpfn; @@ -468,7 +469,7 @@ static int amdgpu_vram_mgr_new(struct ttm_resource_manager *man, if (tbo->type != ttm_bo_type_kernel) max_bytes -= AMDGPU_VM_RESERVED_VRAM; - if (place->flags & TTM_PL_FLAG_CONTIGUOUS) { + if (bo->flags & AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS) { pages_per_block = ~0ul; } else { #ifdef CONFIG_TRANSPARENT_HUGEPAGE @@ -477,7 +478,7 @@ static int amdgpu_vram_mgr_new(struct ttm_resource_manager *man, /* default to 2MB */ pages_per_block = 2UL << (20UL - PAGE_SHIFT); #endif - pages_per_block = max_t(uint32_t, pages_per_block, + pages_per_block = max_t(u32, pages_per_block, tbo->page_alignment); } @@ -498,9 +499,12 @@ static int amdgpu_vram_mgr_new(struct ttm_resource_manager *man, if (place->flags & TTM_PL_FLAG_TOPDOWN) vres->flags |= DRM_BUDDY_TOPDOWN_ALLOCATION; - if (place->flags & TTM_PL_FLAG_CONTIGUOUS) + if (bo->flags & AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS) vres->flags |= DRM_BUDDY_CONTIGUOUS_ALLOCATION; + if (bo->flags & AMDGPU_GEM_CREATE_VRAM_CLEARED) + vres->flags |= DRM_BUDDY_CLEAR_ALLOCATION; + if (fpfn || lpfn != mgr->mm.size) /* Allocate blocks in desired range */ vres->flags |= DRM_BUDDY_RANGE_ALLOCATION; @@ -514,21 +518,31 @@ static int amdgpu_vram_mgr_new(struct ttm_resource_manager *man, else min_block_size = mgr->default_page_size; - BUG_ON(min_block_size < mm->chunk_size); - /* Limit maximum size to 2GiB due to SG table limitations */ size = min(remaining_size, 2ULL << 30); if ((size >= (u64)pages_per_block << PAGE_SHIFT) && - !(size & (((u64)pages_per_block << PAGE_SHIFT) - 1))) + !(size & (((u64)pages_per_block << PAGE_SHIFT) - 1))) min_block_size = (u64)pages_per_block << PAGE_SHIFT; + BUG_ON(min_block_size < mm->chunk_size); + r = drm_buddy_alloc_blocks(mm, fpfn, lpfn, size, min_block_size, &vres->blocks, vres->flags); + + if (unlikely(r == -ENOSPC) && pages_per_block == ~0ul && + !(place->flags & TTM_PL_FLAG_CONTIGUOUS)) { + vres->flags &= ~DRM_BUDDY_CONTIGUOUS_ALLOCATION; + pages_per_block = max_t(u32, 2UL << (20UL - PAGE_SHIFT), + tbo->page_alignment); + + continue; + } + if (unlikely(r)) goto error_free_blocks; @@ -571,7 +585,7 @@ static int amdgpu_vram_mgr_new(struct ttm_resource_manager *man, return 0; error_free_blocks: - drm_buddy_free_list(mm, &vres->blocks); + drm_buddy_free_list(mm, &vres->blocks, 0); mutex_unlock(&mgr->lock); error_fini: ttm_resource_fini(man, &vres->base); @@ -604,7 +618,7 @@ static void amdgpu_vram_mgr_del(struct ttm_resource_manager *man, amdgpu_vram_mgr_do_reserve(man); - drm_buddy_free_list(mm, &vres->blocks); + drm_buddy_free_list(mm, &vres->blocks, vres->flags); mutex_unlock(&mgr->lock); atomic64_sub(vis_usage, &mgr->vis_usage); @@ -912,7 +926,7 @@ void amdgpu_vram_mgr_fini(struct amdgpu_device *adev) kfree(rsv); list_for_each_entry_safe(rsv, temp, &mgr->reserved_pages, blocks) { - drm_buddy_free_list(&mgr->mm, &rsv->allocated); + drm_buddy_free_list(&mgr->mm, &rsv->allocated, 0); kfree(rsv); } if (!adev->gmc.is_app_apu) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.h index 0e04e42cf809..b256cbc2bc27 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.h @@ -53,10 +53,20 @@ static inline u64 amdgpu_vram_mgr_block_size(struct drm_buddy_block *block) return (u64)PAGE_SIZE << drm_buddy_block_order(block); } +static inline bool amdgpu_vram_mgr_is_cleared(struct drm_buddy_block *block) +{ + return drm_buddy_block_is_clear(block); +} + static inline struct amdgpu_vram_mgr_resource * to_amdgpu_vram_mgr_resource(struct ttm_resource *res) { return container_of(res, struct amdgpu_vram_mgr_resource, base); } +static inline void amdgpu_vram_mgr_set_cleared(struct ttm_resource *res) +{ + to_amdgpu_vram_mgr_resource(res)->flags |= DRM_BUDDY_CLEARED; +} + #endif diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.c index a6c88f2fe6e5..dd2ec48cf5c2 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.c @@ -1035,15 +1035,88 @@ int amdgpu_xgmi_remove_device(struct amdgpu_device *adev) return 0; } +static int xgmi_v6_4_0_aca_bank_parser(struct aca_handle *handle, struct aca_bank *bank, + enum aca_smu_type type, void *data) +{ + struct amdgpu_device *adev = handle->adev; + struct aca_bank_info info; + const char *error_str; + u64 status, count; + int ret, ext_error_code; + + ret = aca_bank_info_decode(bank, &info); + if (ret) + return ret; + + status = bank->regs[ACA_REG_IDX_STATUS]; + ext_error_code = ACA_REG__STATUS__ERRORCODEEXT(status); + + error_str = ext_error_code < ARRAY_SIZE(xgmi_v6_4_0_ras_error_code_ext) ? + xgmi_v6_4_0_ras_error_code_ext[ext_error_code] : NULL; + if (error_str) + dev_info(adev->dev, "%s detected\n", error_str); + + count = ACA_REG__MISC0__ERRCNT(bank->regs[ACA_REG_IDX_MISC0]); + + switch (type) { + case ACA_SMU_TYPE_UE: + if (ext_error_code != 0 && ext_error_code != 9) + count = 0ULL; + + ret = aca_error_cache_log_bank_error(handle, &info, ACA_ERROR_TYPE_UE, count); + break; + case ACA_SMU_TYPE_CE: + count = ext_error_code == 6 ? count : 0ULL; + ret = aca_error_cache_log_bank_error(handle, &info, ACA_ERROR_TYPE_CE, count); + break; + default: + return -EINVAL; + } + + return ret; +} + +static const struct aca_bank_ops xgmi_v6_4_0_aca_bank_ops = { + .aca_bank_parser = xgmi_v6_4_0_aca_bank_parser, +}; + +static const struct aca_info xgmi_v6_4_0_aca_info = { + .hwip = ACA_HWIP_TYPE_PCS_XGMI, + .mask = ACA_ERROR_UE_MASK | ACA_ERROR_CE_MASK, + .bank_ops = &xgmi_v6_4_0_aca_bank_ops, +}; + static int amdgpu_xgmi_ras_late_init(struct amdgpu_device *adev, struct ras_common_if *ras_block) { + int r; + if (!adev->gmc.xgmi.supported || adev->gmc.xgmi.num_physical_nodes == 0) return 0; amdgpu_ras_reset_error_count(adev, AMDGPU_RAS_BLOCK__XGMI_WAFL); - return amdgpu_ras_block_late_init(adev, ras_block); + r = amdgpu_ras_block_late_init(adev, ras_block); + if (r) + return r; + + switch (amdgpu_ip_version(adev, XGMI_HWIP, 0)) { + case IP_VERSION(6, 4, 0): + r = amdgpu_ras_bind_aca(adev, AMDGPU_RAS_BLOCK__XGMI_WAFL, + &xgmi_v6_4_0_aca_info, NULL); + if (r) + goto late_fini; + break; + default: + break; + } + + return 0; + +late_fini: + amdgpu_ras_block_late_fini(adev, ras_block); + + return r; } uint64_t amdgpu_xgmi_get_relative_phy_addr(struct amdgpu_device *adev, @@ -1099,7 +1172,7 @@ static void amdgpu_xgmi_legacy_reset_ras_error_count(struct amdgpu_device *adev) static void __xgmi_v6_4_0_reset_error_count(struct amdgpu_device *adev, int xgmi_inst, u64 mca_base) { - WREG64_MCA(xgmi_inst, mca_base, MCA_REG_IDX_STATUS, 0ULL); + WREG64_MCA(xgmi_inst, mca_base, ACA_REG_IDX_STATUS, 0ULL); } static void xgmi_v6_4_0_reset_error_count(struct amdgpu_device *adev, int xgmi_inst) @@ -1277,12 +1350,12 @@ static void amdgpu_xgmi_legacy_query_ras_error_count(struct amdgpu_device *adev, err_data->ce_count += ce_cnt; } -static enum amdgpu_mca_error_type xgmi_v6_4_0_pcs_mca_get_error_type(struct amdgpu_device *adev, u64 status) +static enum aca_error_type xgmi_v6_4_0_pcs_mca_get_error_type(struct amdgpu_device *adev, u64 status) { const char *error_str; int ext_error_code; - ext_error_code = MCA_REG__STATUS__ERRORCODEEXT(status); + ext_error_code = ACA_REG__STATUS__ERRORCODEEXT(status); error_str = ext_error_code < ARRAY_SIZE(xgmi_v6_4_0_ras_error_code_ext) ? xgmi_v6_4_0_ras_error_code_ext[ext_error_code] : NULL; @@ -1291,9 +1364,9 @@ static enum amdgpu_mca_error_type xgmi_v6_4_0_pcs_mca_get_error_type(struct amdg switch (ext_error_code) { case 0: - return AMDGPU_MCA_ERROR_TYPE_UE; + return ACA_ERROR_TYPE_UE; case 6: - return AMDGPU_MCA_ERROR_TYPE_CE; + return ACA_ERROR_TYPE_CE; default: return -EINVAL; } @@ -1307,22 +1380,22 @@ static void __xgmi_v6_4_0_query_error_count(struct amdgpu_device *adev, struct a int xgmi_inst = mcm_info->die_id; u64 status = 0; - status = RREG64_MCA(xgmi_inst, mca_base, MCA_REG_IDX_STATUS); - if (!MCA_REG__STATUS__VAL(status)) + status = RREG64_MCA(xgmi_inst, mca_base, ACA_REG_IDX_STATUS); + if (!ACA_REG__STATUS__VAL(status)) return; switch (xgmi_v6_4_0_pcs_mca_get_error_type(adev, status)) { - case AMDGPU_MCA_ERROR_TYPE_UE: + case ACA_ERROR_TYPE_UE: amdgpu_ras_error_statistic_ue_count(err_data, mcm_info, NULL, 1ULL); break; - case AMDGPU_MCA_ERROR_TYPE_CE: + case ACA_ERROR_TYPE_CE: amdgpu_ras_error_statistic_ce_count(err_data, mcm_info, NULL, 1ULL); break; default: break; } - WREG64_MCA(xgmi_inst, mca_base, MCA_REG_IDX_STATUS, 0ULL); + WREG64_MCA(xgmi_inst, mca_base, ACA_REG_IDX_STATUS, 0ULL); } static void xgmi_v6_4_0_query_error_count(struct amdgpu_device *adev, int xgmi_inst, struct ras_err_data *err_data) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.h index 1592c63b3099..a3bfc16de6d4 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.h @@ -44,6 +44,7 @@ struct amdgpu_hive_info { struct amdgpu_reset_domain *reset_domain; atomic_t ras_recovery; + struct ras_event_manager event_mgr; }; struct amdgpu_pcs_ras_field { diff --git a/drivers/gpu/drm/amd/amdgpu/amdgv_sriovmsg.h b/drivers/gpu/drm/amd/amdgpu/amdgv_sriovmsg.h index 51a14f6d93bd..fb2b394bb9c5 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgv_sriovmsg.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgv_sriovmsg.h @@ -94,7 +94,8 @@ union amd_sriov_msg_feature_flags { uint32_t reg_indirect_acc : 1; uint32_t av1_support : 1; uint32_t vcn_rb_decouple : 1; - uint32_t reserved : 24; + uint32_t mes_info_enable : 1; + uint32_t reserved : 23; } flags; uint32_t all; }; @@ -157,7 +158,7 @@ struct amd_sriov_msg_pf2vf_info_header { uint32_t reserved[2]; }; -#define AMD_SRIOV_MSG_PF2VF_INFO_FILLED_SIZE (48) +#define AMD_SRIOV_MSG_PF2VF_INFO_FILLED_SIZE (49) struct amd_sriov_msg_pf2vf_info { /* header contains size and version */ struct amd_sriov_msg_pf2vf_info_header header; @@ -208,6 +209,8 @@ struct amd_sriov_msg_pf2vf_info { struct amd_sriov_msg_uuid_info uuid_info; /* PCIE atomic ops support flag */ uint32_t pcie_atomic_ops_support_flags; + /* Portion of GPU memory occupied by VF. MAX value is 65535, but set to uint32_t to maintain alignment with reserved size */ + uint32_t gpu_capacity; /* reserved */ uint32_t reserved[256 - AMD_SRIOV_MSG_PF2VF_INFO_FILLED_SIZE]; }; @@ -221,7 +224,7 @@ struct amd_sriov_msg_vf2pf_info_header { uint32_t reserved[2]; }; -#define AMD_SRIOV_MSG_VF2PF_INFO_FILLED_SIZE (70) +#define AMD_SRIOV_MSG_VF2PF_INFO_FILLED_SIZE (73) struct amd_sriov_msg_vf2pf_info { /* header contains size and version */ struct amd_sriov_msg_vf2pf_info_header header; @@ -265,7 +268,9 @@ struct amd_sriov_msg_vf2pf_info { uint32_t version; } ucode_info[AMD_SRIOV_MSG_RESERVE_UCODE]; uint64_t dummy_page_addr; - + /* FB allocated for guest MES to record UQ info */ + uint64_t mes_info_addr; + uint32_t mes_info_size; /* reserved */ uint32_t reserved[256 - AMD_SRIOV_MSG_VF2PF_INFO_FILLED_SIZE]; }; diff --git a/drivers/gpu/drm/amd/amdgpu/aqua_vanjaram.c b/drivers/gpu/drm/amd/amdgpu/aqua_vanjaram.c index d6f808acfb17..414ea3f560a7 100644 --- a/drivers/gpu/drm/amd/amdgpu/aqua_vanjaram.c +++ b/drivers/gpu/drm/amd/amdgpu/aqua_vanjaram.c @@ -62,6 +62,11 @@ void aqua_vanjaram_doorbell_index_init(struct amdgpu_device *adev) adev->doorbell_index.max_assignment = AMDGPU_DOORBELL_LAYOUT1_MAX_ASSIGNMENT << 1; } +static bool aqua_vanjaram_xcp_vcn_shared(struct amdgpu_device *adev) +{ + return (adev->xcp_mgr->num_xcps > adev->vcn.num_vcn_inst); +} + static void aqua_vanjaram_set_xcp_id(struct amdgpu_device *adev, uint32_t inst_idx, struct amdgpu_ring *ring) { @@ -87,7 +92,7 @@ static void aqua_vanjaram_set_xcp_id(struct amdgpu_device *adev, case AMDGPU_RING_TYPE_VCN_ENC: case AMDGPU_RING_TYPE_VCN_JPEG: ip_blk = AMDGPU_XCP_VCN; - if (adev->xcp_mgr->mode == AMDGPU_CPX_PARTITION_MODE) + if (aqua_vanjaram_xcp_vcn_shared(adev)) inst_mask = 1 << (inst_idx * 2); break; default: @@ -140,10 +145,12 @@ static int aqua_vanjaram_xcp_sched_list_update( aqua_vanjaram_xcp_gpu_sched_update(adev, ring, ring->xcp_id); - /* VCN is shared by two partitions under CPX MODE */ + /* VCN may be shared by two partitions under CPX MODE in certain + * configs. + */ if ((ring->funcs->type == AMDGPU_RING_TYPE_VCN_ENC || - ring->funcs->type == AMDGPU_RING_TYPE_VCN_JPEG) && - adev->xcp_mgr->mode == AMDGPU_CPX_PARTITION_MODE) + ring->funcs->type == AMDGPU_RING_TYPE_VCN_JPEG) && + aqua_vanjaram_xcp_vcn_shared(adev)) aqua_vanjaram_xcp_gpu_sched_update(adev, ring, ring->xcp_id + 1); } @@ -623,7 +630,7 @@ static int aqua_vanjaram_xcp_mgr_init(struct amdgpu_device *adev) int aqua_vanjaram_init_soc_config(struct amdgpu_device *adev) { - u32 mask, inst_mask = adev->sdma.sdma_mask; + u32 mask, avail_inst, inst_mask = adev->sdma.sdma_mask; int ret, i; /* generally 1 AID supports 4 instances */ @@ -635,7 +642,9 @@ int aqua_vanjaram_init_soc_config(struct amdgpu_device *adev) for (mask = (1 << adev->sdma.num_inst_per_aid) - 1; inst_mask; inst_mask >>= adev->sdma.num_inst_per_aid, ++i) { - if ((inst_mask & mask) == mask) + avail_inst = inst_mask & mask; + if (avail_inst == mask || avail_inst == 0x3 || + avail_inst == 0xc) adev->aid_mask |= (1 << i); } diff --git a/drivers/gpu/drm/amd/amdgpu/athub_v4_1_0.c b/drivers/gpu/drm/amd/amdgpu/athub_v4_1_0.c new file mode 100644 index 000000000000..8a0773b80864 --- /dev/null +++ b/drivers/gpu/drm/amd/amdgpu/athub_v4_1_0.c @@ -0,0 +1,122 @@ +/* + * Copyright 2023 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + */ + +#include "amdgpu.h" +#include "athub_v4_1_0.h" +#include "athub/athub_4_1_0_offset.h" +#include "athub/athub_4_1_0_sh_mask.h" +#include "soc15_common.h" + +static uint32_t athub_v4_1_0_get_cg_cntl(struct amdgpu_device *adev) +{ + uint32_t data; + + switch (amdgpu_ip_version(adev, ATHUB_HWIP, 0)) { + case IP_VERSION(4, 1, 0): + data = RREG32_SOC15(ATHUB, 0, regATHUB_MISC_CNTL); + break; + default: + data = 0; + break; + } + return data; +} + +static void athub_v4_1_0_set_cg_cntl(struct amdgpu_device *adev, uint32_t data) +{ + switch (amdgpu_ip_version(adev, ATHUB_HWIP, 0)) { + case IP_VERSION(4, 1, 0): + WREG32_SOC15(ATHUB, 0, regATHUB_MISC_CNTL, data); + break; + default: + break; + } +} + +static void +athub_v4_1_0_update_medium_grain_clock_gating(struct amdgpu_device *adev, + bool enable) +{ + uint32_t def, data; + + def = data = athub_v4_1_0_get_cg_cntl(adev); + + if (enable && (adev->cg_flags & AMD_CG_SUPPORT_ATHUB_MGCG)) + data |= ATHUB_MISC_CNTL__CG_ENABLE_MASK; + else + data &= ~ATHUB_MISC_CNTL__CG_ENABLE_MASK; + + if (def != data) + athub_v4_1_0_set_cg_cntl(adev, data); +} + +static void +athub_v4_1_0_update_medium_grain_light_sleep(struct amdgpu_device *adev, + bool enable) +{ + uint32_t def, data; + + def = data = athub_v4_1_0_get_cg_cntl(adev); + + if (enable && (adev->cg_flags & AMD_CG_SUPPORT_ATHUB_LS)) + data |= ATHUB_MISC_CNTL__CG_MEM_LS_ENABLE_MASK; + else + data &= ~ATHUB_MISC_CNTL__CG_MEM_LS_ENABLE_MASK; + + if (def != data) + athub_v4_1_0_set_cg_cntl(adev, data); +} + +int athub_v4_1_0_set_clockgating(struct amdgpu_device *adev, + enum amd_clockgating_state state) +{ + if (amdgpu_sriov_vf(adev)) + return 0; + + switch (amdgpu_ip_version(adev, ATHUB_HWIP, 0)) { + case IP_VERSION(4, 1, 0): + athub_v4_1_0_update_medium_grain_clock_gating(adev, + state == AMD_CG_STATE_GATE); + athub_v4_1_0_update_medium_grain_light_sleep(adev, + state == AMD_CG_STATE_GATE); + break; + default: + break; + } + + return 0; +} + +void athub_v4_1_0_get_clockgating(struct amdgpu_device *adev, u64 *flags) +{ + int data; + + /* AMD_CG_SUPPORT_ATHUB_MGCG */ + data = athub_v4_1_0_get_cg_cntl(adev); + if (data & ATHUB_MISC_CNTL__CG_ENABLE_MASK) + *flags |= AMD_CG_SUPPORT_ATHUB_MGCG; + + /* AMD_CG_SUPPORT_ATHUB_LS */ + if (data & ATHUB_MISC_CNTL__CG_MEM_LS_ENABLE_MASK) + *flags |= AMD_CG_SUPPORT_ATHUB_LS; +} diff --git a/drivers/gpu/drm/amd/amdgpu/athub_v4_1_0.h b/drivers/gpu/drm/amd/amdgpu/athub_v4_1_0.h new file mode 100644 index 000000000000..4d18d0998fa8 --- /dev/null +++ b/drivers/gpu/drm/amd/amdgpu/athub_v4_1_0.h @@ -0,0 +1,30 @@ +/* + * Copyright 2023 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + */ +#ifndef __ATHUB_V4_1_0_H__ +#define __ATHUB_V4_1_0_H__ + +int athub_v4_1_0_set_clockgating(struct amdgpu_device *adev, + enum amd_clockgating_state state); +void athub_v4_1_0_get_clockgating(struct amdgpu_device *adev, u64 *flags); + +#endif diff --git a/drivers/gpu/drm/amd/amdgpu/atom.c b/drivers/gpu/drm/amd/amdgpu/atom.c index a33e890c70d9..d552e013354c 100644 --- a/drivers/gpu/drm/amd/amdgpu/atom.c +++ b/drivers/gpu/drm/amd/amdgpu/atom.c @@ -62,6 +62,7 @@ typedef struct { struct atom_context *ctx; uint32_t *ps, *ws; + int ps_size, ws_size; int ps_shift; uint16_t start; unsigned last_jump; @@ -70,8 +71,8 @@ typedef struct { } atom_exec_context; int amdgpu_atom_debug; -static int amdgpu_atom_execute_table_locked(struct atom_context *ctx, int index, uint32_t *params); -int amdgpu_atom_execute_table(struct atom_context *ctx, int index, uint32_t *params); +static int amdgpu_atom_execute_table_locked(struct atom_context *ctx, int index, uint32_t *params, int params_size); +int amdgpu_atom_execute_table(struct atom_context *ctx, int index, uint32_t *params, int params_size); static uint32_t atom_arg_mask[8] = { 0xFFFFFFFF, 0xFFFF, 0xFFFF00, 0xFFFF0000, 0xFF, 0xFF00, 0xFF0000, @@ -223,7 +224,10 @@ static uint32_t atom_get_src_int(atom_exec_context *ctx, uint8_t attr, (*ptr)++; /* get_unaligned_le32 avoids unaligned accesses from atombios * tables, noticed on a DEC Alpha. */ - val = get_unaligned_le32((u32 *)&ctx->ps[idx]); + if (idx < ctx->ps_size) + val = get_unaligned_le32((u32 *)&ctx->ps[idx]); + else + pr_info("PS index out of range: %i > %i\n", idx, ctx->ps_size); if (print) DEBUG("PS[0x%02X,0x%04X]", idx, val); break; @@ -261,7 +265,10 @@ static uint32_t atom_get_src_int(atom_exec_context *ctx, uint8_t attr, val = gctx->reg_block; break; default: - val = ctx->ws[idx]; + if (idx < ctx->ws_size) + val = ctx->ws[idx]; + else + pr_info("WS index out of range: %i > %i\n", idx, ctx->ws_size); } break; case ATOM_ARG_ID: @@ -313,7 +320,7 @@ static uint32_t atom_get_src_int(atom_exec_context *ctx, uint8_t attr, DEBUG("IMM 0x%02X\n", val); return val; } - return 0; + break; case ATOM_ARG_PLL: idx = U8(*ptr); (*ptr)++; @@ -495,6 +502,10 @@ static void atom_put_dst(atom_exec_context *ctx, int arg, uint8_t attr, idx = U8(*ptr); (*ptr)++; DEBUG("PS[0x%02X]", idx); + if (idx >= ctx->ps_size) { + pr_info("PS index out of range: %i > %i\n", idx, ctx->ps_size); + return; + } ctx->ps[idx] = cpu_to_le32(val); break; case ATOM_ARG_WS: @@ -527,6 +538,10 @@ static void atom_put_dst(atom_exec_context *ctx, int arg, uint8_t attr, gctx->reg_block = val; break; default: + if (idx >= ctx->ws_size) { + pr_info("WS index out of range: %i > %i\n", idx, ctx->ws_size); + return; + } ctx->ws[idx] = val; } break; @@ -624,7 +639,7 @@ static void atom_op_calltable(atom_exec_context *ctx, int *ptr, int arg) else SDEBUG(" table: %d\n", idx); if (U16(ctx->ctx->cmd_table + 4 + 2 * idx)) - r = amdgpu_atom_execute_table_locked(ctx->ctx, idx, ctx->ps + ctx->ps_shift); + r = amdgpu_atom_execute_table_locked(ctx->ctx, idx, ctx->ps + ctx->ps_shift, ctx->ps_size - ctx->ps_shift); if (r) { ctx->abort = true; } @@ -1203,7 +1218,7 @@ static struct { atom_op_div32, ATOM_ARG_WS}, }; -static int amdgpu_atom_execute_table_locked(struct atom_context *ctx, int index, uint32_t *params) +static int amdgpu_atom_execute_table_locked(struct atom_context *ctx, int index, uint32_t *params, int params_size) { int base = CU16(ctx->cmd_table + 4 + 2 * index); int len, ws, ps, ptr; @@ -1225,12 +1240,17 @@ static int amdgpu_atom_execute_table_locked(struct atom_context *ctx, int index, ectx.ps_shift = ps / 4; ectx.start = base; ectx.ps = params; + ectx.ps_size = params_size; ectx.abort = false; ectx.last_jump = 0; - if (ws) + ectx.last_jump_jiffies = 0; + if (ws) { ectx.ws = kcalloc(4, ws, GFP_KERNEL); - else + ectx.ws_size = ws; + } else { ectx.ws = NULL; + ectx.ws_size = 0; + } debug_depth++; while (1) { @@ -1264,7 +1284,7 @@ free: return ret; } -int amdgpu_atom_execute_table(struct atom_context *ctx, int index, uint32_t *params) +int amdgpu_atom_execute_table(struct atom_context *ctx, int index, uint32_t *params, int params_size) { int r; @@ -1280,7 +1300,7 @@ int amdgpu_atom_execute_table(struct atom_context *ctx, int index, uint32_t *par /* reset divmul */ ctx->divmul[0] = 0; ctx->divmul[1] = 0; - r = amdgpu_atom_execute_table_locked(ctx, index, params); + r = amdgpu_atom_execute_table_locked(ctx, index, params, params_size); mutex_unlock(&ctx->mutex); return r; } @@ -1552,7 +1572,7 @@ int amdgpu_atom_asic_init(struct atom_context *ctx) if (!CU16(ctx->cmd_table + 4 + 2 * ATOM_CMD_INIT)) return 1; - ret = amdgpu_atom_execute_table(ctx, ATOM_CMD_INIT, ps); + ret = amdgpu_atom_execute_table(ctx, ATOM_CMD_INIT, ps, 16); if (ret) return ret; diff --git a/drivers/gpu/drm/amd/amdgpu/atom.h b/drivers/gpu/drm/amd/amdgpu/atom.h index c11cf18a0f18..b807f6639a4c 100644 --- a/drivers/gpu/drm/amd/amdgpu/atom.h +++ b/drivers/gpu/drm/amd/amdgpu/atom.h @@ -156,7 +156,7 @@ struct atom_context { extern int amdgpu_atom_debug; struct atom_context *amdgpu_atom_parse(struct card_info *card, void *bios); -int amdgpu_atom_execute_table(struct atom_context *ctx, int index, uint32_t *params); +int amdgpu_atom_execute_table(struct atom_context *ctx, int index, uint32_t *params, int params_size); int amdgpu_atom_asic_init(struct atom_context *ctx); void amdgpu_atom_destroy(struct atom_context *ctx); bool amdgpu_atom_parse_data_header(struct atom_context *ctx, int index, uint16_t *size, diff --git a/drivers/gpu/drm/amd/amdgpu/atombios_crtc.c b/drivers/gpu/drm/amd/amdgpu/atombios_crtc.c index 10098fdd33fc..3dfc28840a7d 100644 --- a/drivers/gpu/drm/amd/amdgpu/atombios_crtc.c +++ b/drivers/gpu/drm/amd/amdgpu/atombios_crtc.c @@ -77,7 +77,7 @@ void amdgpu_atombios_crtc_overscan_setup(struct drm_crtc *crtc, args.usOverscanTop = cpu_to_le16(amdgpu_crtc->v_border); break; } - amdgpu_atom_execute_table(adev->mode_info.atom_context, index, (uint32_t *)&args); + amdgpu_atom_execute_table(adev->mode_info.atom_context, index, (uint32_t *)&args, sizeof(args)); } void amdgpu_atombios_crtc_scaler_setup(struct drm_crtc *crtc) @@ -106,7 +106,7 @@ void amdgpu_atombios_crtc_scaler_setup(struct drm_crtc *crtc) args.ucEnable = ATOM_SCALER_DISABLE; break; } - amdgpu_atom_execute_table(adev->mode_info.atom_context, index, (uint32_t *)&args); + amdgpu_atom_execute_table(adev->mode_info.atom_context, index, (uint32_t *)&args, sizeof(args)); } void amdgpu_atombios_crtc_lock(struct drm_crtc *crtc, int lock) @@ -123,7 +123,7 @@ void amdgpu_atombios_crtc_lock(struct drm_crtc *crtc, int lock) args.ucCRTC = amdgpu_crtc->crtc_id; args.ucEnable = lock; - amdgpu_atom_execute_table(adev->mode_info.atom_context, index, (uint32_t *)&args); + amdgpu_atom_execute_table(adev->mode_info.atom_context, index, (uint32_t *)&args, sizeof(args)); } void amdgpu_atombios_crtc_enable(struct drm_crtc *crtc, int state) @@ -139,7 +139,7 @@ void amdgpu_atombios_crtc_enable(struct drm_crtc *crtc, int state) args.ucCRTC = amdgpu_crtc->crtc_id; args.ucEnable = state; - amdgpu_atom_execute_table(adev->mode_info.atom_context, index, (uint32_t *)&args); + amdgpu_atom_execute_table(adev->mode_info.atom_context, index, (uint32_t *)&args, sizeof(args)); } void amdgpu_atombios_crtc_blank(struct drm_crtc *crtc, int state) @@ -155,7 +155,7 @@ void amdgpu_atombios_crtc_blank(struct drm_crtc *crtc, int state) args.ucCRTC = amdgpu_crtc->crtc_id; args.ucBlanking = state; - amdgpu_atom_execute_table(adev->mode_info.atom_context, index, (uint32_t *)&args); + amdgpu_atom_execute_table(adev->mode_info.atom_context, index, (uint32_t *)&args, sizeof(args)); } void amdgpu_atombios_crtc_powergate(struct drm_crtc *crtc, int state) @@ -171,7 +171,7 @@ void amdgpu_atombios_crtc_powergate(struct drm_crtc *crtc, int state) args.ucDispPipeId = amdgpu_crtc->crtc_id; args.ucEnable = state; - amdgpu_atom_execute_table(adev->mode_info.atom_context, index, (uint32_t *)&args); + amdgpu_atom_execute_table(adev->mode_info.atom_context, index, (uint32_t *)&args, sizeof(args)); } void amdgpu_atombios_crtc_powergate_init(struct amdgpu_device *adev) @@ -183,7 +183,7 @@ void amdgpu_atombios_crtc_powergate_init(struct amdgpu_device *adev) args.ucEnable = ATOM_INIT; - amdgpu_atom_execute_table(adev->mode_info.atom_context, index, (uint32_t *)&args); + amdgpu_atom_execute_table(adev->mode_info.atom_context, index, (uint32_t *)&args, sizeof(args)); } void amdgpu_atombios_crtc_set_dtd_timing(struct drm_crtc *crtc, @@ -228,7 +228,7 @@ void amdgpu_atombios_crtc_set_dtd_timing(struct drm_crtc *crtc, args.susModeMiscInfo.usAccess = cpu_to_le16(misc); args.ucCRTC = amdgpu_crtc->crtc_id; - amdgpu_atom_execute_table(adev->mode_info.atom_context, index, (uint32_t *)&args); + amdgpu_atom_execute_table(adev->mode_info.atom_context, index, (uint32_t *)&args, sizeof(args)); } union atom_enable_ss { @@ -293,7 +293,7 @@ static void amdgpu_atombios_crtc_program_ss(struct amdgpu_device *adev, args.v3.usSpreadSpectrumStep = cpu_to_le16(ss->step); args.v3.ucEnable = enable; - amdgpu_atom_execute_table(adev->mode_info.atom_context, index, (uint32_t *)&args); + amdgpu_atom_execute_table(adev->mode_info.atom_context, index, (uint32_t *)&args, sizeof(args)); } union adjust_pixel_clock { @@ -395,7 +395,7 @@ static u32 amdgpu_atombios_crtc_adjust_pll(struct drm_crtc *crtc, ADJUST_DISPLAY_CONFIG_SS_ENABLE; amdgpu_atom_execute_table(adev->mode_info.atom_context, - index, (uint32_t *)&args); + index, (uint32_t *)&args, sizeof(args)); adjusted_clock = le16_to_cpu(args.v1.usPixelClock) * 10; break; case 3: @@ -428,7 +428,7 @@ static u32 amdgpu_atombios_crtc_adjust_pll(struct drm_crtc *crtc, args.v3.sInput.ucExtTransmitterID = 0; amdgpu_atom_execute_table(adev->mode_info.atom_context, - index, (uint32_t *)&args); + index, (uint32_t *)&args, sizeof(args)); adjusted_clock = le32_to_cpu(args.v3.sOutput.ulDispPllFreq) * 10; if (args.v3.sOutput.ucRefDiv) { amdgpu_crtc->pll_flags |= AMDGPU_PLL_USE_FRAC_FB_DIV; @@ -514,7 +514,7 @@ void amdgpu_atombios_crtc_set_disp_eng_pll(struct amdgpu_device *adev, DRM_ERROR("Unknown table version %d %d\n", frev, crev); return; } - amdgpu_atom_execute_table(adev->mode_info.atom_context, index, (uint32_t *)&args); + amdgpu_atom_execute_table(adev->mode_info.atom_context, index, (uint32_t *)&args, sizeof(args)); } union set_dce_clock { @@ -544,7 +544,7 @@ u32 amdgpu_atombios_crtc_set_dce_clock(struct amdgpu_device *adev, args.v2_1.asParam.ulDCEClkFreq = cpu_to_le32(freq); /* 10kHz units */ args.v2_1.asParam.ucDCEClkType = clk_type; args.v2_1.asParam.ucDCEClkSrc = clk_src; - amdgpu_atom_execute_table(adev->mode_info.atom_context, index, (uint32_t *)&args); + amdgpu_atom_execute_table(adev->mode_info.atom_context, index, (uint32_t *)&args, sizeof(args)); ret_freq = le32_to_cpu(args.v2_1.asParam.ulDCEClkFreq) * 10; break; default: @@ -740,7 +740,7 @@ void amdgpu_atombios_crtc_program_pll(struct drm_crtc *crtc, return; } - amdgpu_atom_execute_table(adev->mode_info.atom_context, index, (uint32_t *)&args); + amdgpu_atom_execute_table(adev->mode_info.atom_context, index, (uint32_t *)&args, sizeof(args)); } int amdgpu_atombios_crtc_prepare_pll(struct drm_crtc *crtc, diff --git a/drivers/gpu/drm/amd/amdgpu/atombios_dp.c b/drivers/gpu/drm/amd/amdgpu/atombios_dp.c index 87c41e0e9b7c..622634c08c7b 100644 --- a/drivers/gpu/drm/amd/amdgpu/atombios_dp.c +++ b/drivers/gpu/drm/amd/amdgpu/atombios_dp.c @@ -83,7 +83,7 @@ static int amdgpu_atombios_dp_process_aux_ch(struct amdgpu_i2c_chan *chan, args.v2.ucDelay = delay / 10; args.v2.ucHPD_ID = chan->rec.hpd; - amdgpu_atom_execute_table(adev->mode_info.atom_context, index, (uint32_t *)&args); + amdgpu_atom_execute_table(adev->mode_info.atom_context, index, (uint32_t *)&args, sizeof(args)); *ack = args.v2.ucReplyStatus; @@ -301,7 +301,7 @@ static u8 amdgpu_atombios_dp_encoder_service(struct amdgpu_device *adev, args.ucLaneNum = lane_num; args.ucStatus = 0; - amdgpu_atom_execute_table(adev->mode_info.atom_context, index, (uint32_t *)&args); + amdgpu_atom_execute_table(adev->mode_info.atom_context, index, (uint32_t *)&args, sizeof(args)); return args.ucStatus; } diff --git a/drivers/gpu/drm/amd/amdgpu/atombios_encoders.c b/drivers/gpu/drm/amd/amdgpu/atombios_encoders.c index 7672abe6c140..25feab188dfe 100644 --- a/drivers/gpu/drm/amd/amdgpu/atombios_encoders.c +++ b/drivers/gpu/drm/amd/amdgpu/atombios_encoders.c @@ -335,7 +335,7 @@ amdgpu_atombios_encoder_setup_dac(struct drm_encoder *encoder, int action) args.ucDacStandard = ATOM_DAC1_PS2; args.usPixelClock = cpu_to_le16(amdgpu_encoder->pixel_clock / 10); - amdgpu_atom_execute_table(adev->mode_info.atom_context, index, (uint32_t *)&args); + amdgpu_atom_execute_table(adev->mode_info.atom_context, index, (uint32_t *)&args, sizeof(args)); } @@ -432,7 +432,7 @@ amdgpu_atombios_encoder_setup_dvo(struct drm_encoder *encoder, int action) break; } - amdgpu_atom_execute_table(adev->mode_info.atom_context, index, (uint32_t *)&args); + amdgpu_atom_execute_table(adev->mode_info.atom_context, index, (uint32_t *)&args, sizeof(args)); } int amdgpu_atombios_encoder_get_encoder_mode(struct drm_encoder *encoder) @@ -732,7 +732,7 @@ amdgpu_atombios_encoder_setup_dig_encoder(struct drm_encoder *encoder, break; } - amdgpu_atom_execute_table(adev->mode_info.atom_context, index, (uint32_t *)&args); + amdgpu_atom_execute_table(adev->mode_info.atom_context, index, (uint32_t *)&args, sizeof(args)); } @@ -1136,7 +1136,7 @@ amdgpu_atombios_encoder_setup_dig_transmitter(struct drm_encoder *encoder, int a break; } - amdgpu_atom_execute_table(adev->mode_info.atom_context, index, (uint32_t *)&args); + amdgpu_atom_execute_table(adev->mode_info.atom_context, index, (uint32_t *)&args, sizeof(args)); } bool @@ -1164,7 +1164,7 @@ amdgpu_atombios_encoder_set_edp_panel_power(struct drm_connector *connector, args.v1.ucAction = action; - amdgpu_atom_execute_table(adev->mode_info.atom_context, index, (uint32_t *)&args); + amdgpu_atom_execute_table(adev->mode_info.atom_context, index, (uint32_t *)&args, sizeof(args)); /* wait for the panel to power up */ if (action == ATOM_TRANSMITTER_ACTION_POWER_ON) { @@ -1288,7 +1288,7 @@ amdgpu_atombios_encoder_setup_external_encoder(struct drm_encoder *encoder, DRM_ERROR("Unknown table version: %d, %d\n", frev, crev); return; } - amdgpu_atom_execute_table(adev->mode_info.atom_context, index, (uint32_t *)&args); + amdgpu_atom_execute_table(adev->mode_info.atom_context, index, (uint32_t *)&args, sizeof(args)); } static void @@ -1633,7 +1633,7 @@ amdgpu_atombios_encoder_set_crtc_source(struct drm_encoder *encoder) return; } - amdgpu_atom_execute_table(adev->mode_info.atom_context, index, (uint32_t *)&args); + amdgpu_atom_execute_table(adev->mode_info.atom_context, index, (uint32_t *)&args, sizeof(args)); } /* This only needs to be called once at startup */ @@ -1706,7 +1706,7 @@ amdgpu_atombios_encoder_dac_load_detect(struct drm_encoder *encoder, args.sDacload.ucMisc = DAC_LOAD_MISC_YPrPb; } - amdgpu_atom_execute_table(adev->mode_info.atom_context, index, (uint32_t *)&args); + amdgpu_atom_execute_table(adev->mode_info.atom_context, index, (uint32_t *)&args, sizeof(args)); return true; } else diff --git a/drivers/gpu/drm/amd/amdgpu/atombios_i2c.c b/drivers/gpu/drm/amd/amdgpu/atombios_i2c.c index af0335535f82..a6501114322f 100644 --- a/drivers/gpu/drm/amd/amdgpu/atombios_i2c.c +++ b/drivers/gpu/drm/amd/amdgpu/atombios_i2c.c @@ -86,7 +86,7 @@ static int amdgpu_atombios_i2c_process_i2c_ch(struct amdgpu_i2c_chan *chan, args.ucSlaveAddr = slave_addr << 1; args.ucLineNumber = chan->rec.i2c_id; - amdgpu_atom_execute_table(adev->mode_info.atom_context, index, (uint32_t *)&args); + amdgpu_atom_execute_table(adev->mode_info.atom_context, index, (uint32_t *)&args, sizeof(args)); /* error */ if (args.ucStatus != HW_ASSISTED_I2C_STATUS_SUCCESS) { @@ -172,5 +172,5 @@ void amdgpu_atombios_i2c_channel_trans(struct amdgpu_device *adev, u8 slave_addr args.ucSlaveAddr = slave_addr; args.ucLineNumber = line_number; - amdgpu_atom_execute_table(adev->mode_info.atom_context, index, (uint32_t *)&args); + amdgpu_atom_execute_table(adev->mode_info.atom_context, index, (uint32_t *)&args, sizeof(args)); } diff --git a/drivers/gpu/drm/amd/amdgpu/cik.c b/drivers/gpu/drm/amd/amdgpu/cik.c index 4dfaa017cf7f..cf1d5d462b67 100644 --- a/drivers/gpu/drm/amd/amdgpu/cik.c +++ b/drivers/gpu/drm/amd/amdgpu/cik.c @@ -1375,14 +1375,14 @@ static int cik_asic_pci_config_reset(struct amdgpu_device *adev) return r; } -static bool cik_asic_supports_baco(struct amdgpu_device *adev) +static int cik_asic_supports_baco(struct amdgpu_device *adev) { switch (adev->asic_type) { case CHIP_BONAIRE: case CHIP_HAWAII: return amdgpu_dpm_is_baco_supported(adev); default: - return false; + return 0; } } @@ -1638,28 +1638,18 @@ static void cik_pcie_gen3_enable(struct amdgpu_device *adev) PCI_EXP_LNKCTL_HAWD); /* linkctl2 */ - pcie_capability_read_word(root, PCI_EXP_LNKCTL2, - &tmp16); - tmp16 &= ~(PCI_EXP_LNKCTL2_ENTER_COMP | - PCI_EXP_LNKCTL2_TX_MARGIN); - tmp16 |= (bridge_cfg2 & - (PCI_EXP_LNKCTL2_ENTER_COMP | - PCI_EXP_LNKCTL2_TX_MARGIN)); - pcie_capability_write_word(root, - PCI_EXP_LNKCTL2, - tmp16); - - pcie_capability_read_word(adev->pdev, - PCI_EXP_LNKCTL2, - &tmp16); - tmp16 &= ~(PCI_EXP_LNKCTL2_ENTER_COMP | - PCI_EXP_LNKCTL2_TX_MARGIN); - tmp16 |= (gpu_cfg2 & - (PCI_EXP_LNKCTL2_ENTER_COMP | - PCI_EXP_LNKCTL2_TX_MARGIN)); - pcie_capability_write_word(adev->pdev, - PCI_EXP_LNKCTL2, - tmp16); + pcie_capability_clear_and_set_word(root, PCI_EXP_LNKCTL2, + PCI_EXP_LNKCTL2_ENTER_COMP | + PCI_EXP_LNKCTL2_TX_MARGIN, + bridge_cfg2 & + (PCI_EXP_LNKCTL2_ENTER_COMP | + PCI_EXP_LNKCTL2_TX_MARGIN)); + pcie_capability_clear_and_set_word(adev->pdev, PCI_EXP_LNKCTL2, + PCI_EXP_LNKCTL2_ENTER_COMP | + PCI_EXP_LNKCTL2_TX_MARGIN, + gpu_cfg2 & + (PCI_EXP_LNKCTL2_ENTER_COMP | + PCI_EXP_LNKCTL2_TX_MARGIN)); tmp = RREG32_PCIE(ixPCIE_LC_CNTL4); tmp &= ~PCIE_LC_CNTL4__LC_SET_QUIESCE_MASK; @@ -1674,16 +1664,15 @@ static void cik_pcie_gen3_enable(struct amdgpu_device *adev) speed_cntl &= ~PCIE_LC_SPEED_CNTL__LC_FORCE_DIS_SW_SPEED_CHANGE_MASK; WREG32_PCIE(ixPCIE_LC_SPEED_CNTL, speed_cntl); - pcie_capability_read_word(adev->pdev, PCI_EXP_LNKCTL2, &tmp16); - tmp16 &= ~PCI_EXP_LNKCTL2_TLS; - + tmp16 = 0; if (adev->pm.pcie_gen_mask & CAIL_PCIE_LINK_SPEED_SUPPORT_GEN3) tmp16 |= PCI_EXP_LNKCTL2_TLS_8_0GT; /* gen3 */ else if (adev->pm.pcie_gen_mask & CAIL_PCIE_LINK_SPEED_SUPPORT_GEN2) tmp16 |= PCI_EXP_LNKCTL2_TLS_5_0GT; /* gen2 */ else tmp16 |= PCI_EXP_LNKCTL2_TLS_2_5GT; /* gen1 */ - pcie_capability_write_word(adev->pdev, PCI_EXP_LNKCTL2, tmp16); + pcie_capability_clear_and_set_word(adev->pdev, PCI_EXP_LNKCTL2, + PCI_EXP_LNKCTL2_TLS, tmp16); speed_cntl = RREG32_PCIE(ixPCIE_LC_SPEED_CNTL); speed_cntl |= PCIE_LC_SPEED_CNTL__LC_INITIATE_LINK_SPEED_CHANGE_MASK; @@ -2221,6 +2210,8 @@ static const struct amd_ip_funcs cik_common_ip_funcs = { .soft_reset = cik_common_soft_reset, .set_clockgating_state = cik_common_set_clockgating_state, .set_powergating_state = cik_common_set_powergating_state, + .dump_ip_state = NULL, + .print_ip_state = NULL, }; static const struct amdgpu_ip_block_version cik_common_ip_block = diff --git a/drivers/gpu/drm/amd/amdgpu/cik_ih.c b/drivers/gpu/drm/amd/amdgpu/cik_ih.c index f24e34dc33d1..576baa9dbb0e 100644 --- a/drivers/gpu/drm/amd/amdgpu/cik_ih.c +++ b/drivers/gpu/drm/amd/amdgpu/cik_ih.c @@ -435,6 +435,8 @@ static const struct amd_ip_funcs cik_ih_ip_funcs = { .soft_reset = cik_ih_soft_reset, .set_clockgating_state = cik_ih_set_clockgating_state, .set_powergating_state = cik_ih_set_powergating_state, + .dump_ip_state = NULL, + .print_ip_state = NULL, }; static const struct amdgpu_ih_funcs cik_ih_funcs = { diff --git a/drivers/gpu/drm/amd/amdgpu/cik_sdma.c b/drivers/gpu/drm/amd/amdgpu/cik_sdma.c index a3fccc4c1f43..6948ebda0fa2 100644 --- a/drivers/gpu/drm/amd/amdgpu/cik_sdma.c +++ b/drivers/gpu/drm/amd/amdgpu/cik_sdma.c @@ -1228,6 +1228,8 @@ static const struct amd_ip_funcs cik_sdma_ip_funcs = { .soft_reset = cik_sdma_soft_reset, .set_clockgating_state = cik_sdma_set_clockgating_state, .set_powergating_state = cik_sdma_set_powergating_state, + .dump_ip_state = NULL, + .print_ip_state = NULL, }; static const struct amdgpu_ring_funcs cik_sdma_ring_funcs = { @@ -1290,7 +1292,7 @@ static void cik_sdma_set_irq_funcs(struct amdgpu_device *adev) * @src_offset: src GPU address * @dst_offset: dst GPU address * @byte_count: number of bytes to xfer - * @tmz: is this a secure operation + * @copy_flags: unused * * Copy GPU buffers using the DMA engine (CIK). * Used by the amdgpu ttm implementation to move pages if @@ -1300,7 +1302,7 @@ static void cik_sdma_emit_copy_buffer(struct amdgpu_ib *ib, uint64_t src_offset, uint64_t dst_offset, uint32_t byte_count, - bool tmz) + uint32_t copy_flags) { ib->ptr[ib->length_dw++] = SDMA_PACKET(SDMA_OPCODE_COPY, SDMA_COPY_SUB_OPCODE_LINEAR, 0); ib->ptr[ib->length_dw++] = byte_count; diff --git a/drivers/gpu/drm/amd/amdgpu/clearstate_gfx9.h b/drivers/gpu/drm/amd/amdgpu/clearstate_gfx9.h index 567a904804bc..9c85ca6358c1 100644 --- a/drivers/gpu/drm/amd/amdgpu/clearstate_gfx9.h +++ b/drivers/gpu/drm/amd/amdgpu/clearstate_gfx9.h @@ -21,8 +21,7 @@ * */ -static const unsigned int gfx9_SECT_CONTEXT_def_1[] = -{ +static const unsigned int gfx9_SECT_CONTEXT_def_1[] = { 0x00000000, // DB_RENDER_CONTROL 0x00000000, // DB_COUNT_CONTROL 0x00000000, // DB_DEPTH_VIEW @@ -236,8 +235,7 @@ static const unsigned int gfx9_SECT_CONTEXT_def_1[] = 0x00000000, // PA_SC_VPORT_ZMIN_15 0x3f800000, // PA_SC_VPORT_ZMAX_15 }; -static const unsigned int gfx9_SECT_CONTEXT_def_2[] = -{ +static const unsigned int gfx9_SECT_CONTEXT_def_2[] = { 0x00000000, // PA_SC_SCREEN_EXTENT_CONTROL 0x00000000, // PA_SC_TILE_STEERING_OVERRIDE 0x00000000, // CP_PERFMON_CNTX_CNTL @@ -521,15 +519,13 @@ static const unsigned int gfx9_SECT_CONTEXT_def_2[] = 0x00000000, // CB_MRT6_EPITCH 0x00000000, // CB_MRT7_EPITCH }; -static const unsigned int gfx9_SECT_CONTEXT_def_3[] = -{ +static const unsigned int gfx9_SECT_CONTEXT_def_3[] = { 0x00000000, // PA_CL_POINT_X_RAD 0x00000000, // PA_CL_POINT_Y_RAD 0x00000000, // PA_CL_POINT_SIZE 0x00000000, // PA_CL_POINT_CULL_RAD }; -static const unsigned int gfx9_SECT_CONTEXT_def_4[] = -{ +static const unsigned int gfx9_SECT_CONTEXT_def_4[] = { 0x00000000, // DB_DEPTH_CONTROL 0x00000000, // DB_EQAA 0x00000000, // CB_COLOR_CONTROL @@ -688,17 +684,14 @@ static const unsigned int gfx9_SECT_CONTEXT_def_4[] = 0x00000000, // VGT_GS_OUT_PRIM_TYPE 0x00000000, // IA_ENHANCE }; -static const unsigned int gfx9_SECT_CONTEXT_def_5[] = -{ +static const unsigned int gfx9_SECT_CONTEXT_def_5[] = { 0x00000000, // WD_ENHANCE 0x00000000, // VGT_PRIMITIVEID_EN }; -static const unsigned int gfx9_SECT_CONTEXT_def_6[] = -{ +static const unsigned int gfx9_SECT_CONTEXT_def_6[] = { 0x00000000, // VGT_PRIMITIVEID_RESET }; -static const unsigned int gfx9_SECT_CONTEXT_def_7[] = -{ +static const unsigned int gfx9_SECT_CONTEXT_def_7[] = { 0x00000000, // VGT_GS_MAX_PRIMS_PER_SUBGROUP 0x00000000, // VGT_DRAW_PAYLOAD_CNTL 0, // HOLE @@ -766,8 +759,7 @@ static const unsigned int gfx9_SECT_CONTEXT_def_7[] = 0x00000000, // VGT_STRMOUT_CONFIG 0x00000000, // VGT_STRMOUT_BUFFER_CONFIG }; -static const unsigned int gfx9_SECT_CONTEXT_def_8[] = -{ +static const unsigned int gfx9_SECT_CONTEXT_def_8[] = { 0x00000000, // PA_SC_CENTROID_PRIORITY_0 0x00000000, // PA_SC_CENTROID_PRIORITY_1 0x00001000, // PA_SC_LINE_CNTL @@ -924,8 +916,7 @@ static const unsigned int gfx9_SECT_CONTEXT_def_8[] = 0x00000000, // CB_COLOR7_DCC_BASE 0x00000000, // CB_COLOR7_DCC_BASE_EXT }; -static const struct cs_extent_def gfx9_SECT_CONTEXT_defs[] = -{ +static const struct cs_extent_def gfx9_SECT_CONTEXT_defs[] = { {gfx9_SECT_CONTEXT_def_1, 0x0000a000, 212 }, {gfx9_SECT_CONTEXT_def_2, 0x0000a0d6, 282 }, {gfx9_SECT_CONTEXT_def_3, 0x0000a1f5, 4 }, diff --git a/drivers/gpu/drm/amd/amdgpu/clearstate_si.h b/drivers/gpu/drm/amd/amdgpu/clearstate_si.h index 66e39cdb5cb0..5fd96ddd7f0f 100644 --- a/drivers/gpu/drm/amd/amdgpu/clearstate_si.h +++ b/drivers/gpu/drm/amd/amdgpu/clearstate_si.h @@ -21,8 +21,7 @@ * */ -static const u32 si_SECT_CONTEXT_def_1[] = -{ +static const u32 si_SECT_CONTEXT_def_1[] = { 0x00000000, // DB_RENDER_CONTROL 0x00000000, // DB_COUNT_CONTROL 0x00000000, // DB_DEPTH_VIEW @@ -236,8 +235,7 @@ static const u32 si_SECT_CONTEXT_def_1[] = 0x00000000, // PA_SC_VPORT_ZMIN_15 0x3f800000, // PA_SC_VPORT_ZMAX_15 }; -static const u32 si_SECT_CONTEXT_def_2[] = -{ +static const u32 si_SECT_CONTEXT_def_2[] = { 0x00000000, // CP_PERFMON_CNTX_CNTL 0x00000000, // CP_RINGID 0x00000000, // CP_VMID @@ -511,8 +509,7 @@ static const u32 si_SECT_CONTEXT_def_2[] = 0x00000000, // CB_BLEND6_CONTROL 0x00000000, // CB_BLEND7_CONTROL }; -static const u32 si_SECT_CONTEXT_def_3[] = -{ +static const u32 si_SECT_CONTEXT_def_3[] = { 0x00000000, // PA_CL_POINT_X_RAD 0x00000000, // PA_CL_POINT_Y_RAD 0x00000000, // PA_CL_POINT_SIZE @@ -520,8 +517,7 @@ static const u32 si_SECT_CONTEXT_def_3[] = 0x00000000, // VGT_DMA_BASE_HI 0x00000000, // VGT_DMA_BASE }; -static const u32 si_SECT_CONTEXT_def_4[] = -{ +static const u32 si_SECT_CONTEXT_def_4[] = { 0x00000000, // DB_DEPTH_CONTROL 0x00000000, // DB_EQAA 0x00000000, // CB_COLOR_CONTROL @@ -680,16 +676,13 @@ static const u32 si_SECT_CONTEXT_def_4[] = 0x00000000, // VGT_GS_OUT_PRIM_TYPE 0x00000000, // IA_ENHANCE }; -static const u32 si_SECT_CONTEXT_def_5[] = -{ +static const u32 si_SECT_CONTEXT_def_5[] = { 0x00000000, // VGT_PRIMITIVEID_EN }; -static const u32 si_SECT_CONTEXT_def_6[] = -{ +static const u32 si_SECT_CONTEXT_def_6[] = { 0x00000000, // VGT_PRIMITIVEID_RESET }; -static const u32 si_SECT_CONTEXT_def_7[] = -{ +static const u32 si_SECT_CONTEXT_def_7[] = { 0x00000000, // VGT_MULTI_PRIM_IB_RESET_EN 0, // HOLE 0, // HOLE @@ -924,8 +917,7 @@ static const u32 si_SECT_CONTEXT_def_7[] = 0x00000000, // CB_COLOR7_CLEAR_WORD0 0x00000000, // CB_COLOR7_CLEAR_WORD1 }; -static const struct cs_extent_def si_SECT_CONTEXT_defs[] = -{ +static const struct cs_extent_def si_SECT_CONTEXT_defs[] = { {si_SECT_CONTEXT_def_1, 0x0000a000, 212 }, {si_SECT_CONTEXT_def_2, 0x0000a0d8, 272 }, {si_SECT_CONTEXT_def_3, 0x0000a1f5, 6 }, diff --git a/drivers/gpu/drm/amd/amdgpu/cz_ih.c b/drivers/gpu/drm/amd/amdgpu/cz_ih.c index c19681492efa..072643787384 100644 --- a/drivers/gpu/drm/amd/amdgpu/cz_ih.c +++ b/drivers/gpu/drm/amd/amdgpu/cz_ih.c @@ -433,6 +433,8 @@ static const struct amd_ip_funcs cz_ih_ip_funcs = { .soft_reset = cz_ih_soft_reset, .set_clockgating_state = cz_ih_set_clockgating_state, .set_powergating_state = cz_ih_set_powergating_state, + .dump_ip_state = NULL, + .print_ip_state = NULL, }; static const struct amdgpu_ih_funcs cz_ih_funcs = { diff --git a/drivers/gpu/drm/amd/amdgpu/dce_v10_0.c b/drivers/gpu/drm/amd/amdgpu/dce_v10_0.c index 587ee632a3b8..b44fce44c066 100644 --- a/drivers/gpu/drm/amd/amdgpu/dce_v10_0.c +++ b/drivers/gpu/drm/amd/amdgpu/dce_v10_0.c @@ -52,6 +52,7 @@ static void dce_v10_0_set_display_funcs(struct amdgpu_device *adev); static void dce_v10_0_set_irq_funcs(struct amdgpu_device *adev); +static void dce_v10_0_hpd_int_ack(struct amdgpu_device *adev, int hpd); static const u32 crtc_offsets[] = { CRTC0_REGISTER_OFFSET, @@ -364,6 +365,7 @@ static void dce_v10_0_hpd_init(struct amdgpu_device *adev) AMDGPU_HPD_DISCONNECT_INT_DELAY_IN_MS); WREG32(mmDC_HPD_TOGGLE_FILT_CNTL + hpd_offsets[amdgpu_connector->hpd.hpd], tmp); + dce_v10_0_hpd_int_ack(adev, amdgpu_connector->hpd.hpd); dce_v10_0_hpd_set_polarity(adev, amdgpu_connector->hpd.hpd); amdgpu_irq_get(adev, &adev->hpd_irq, amdgpu_connector->hpd.hpd); @@ -3331,6 +3333,8 @@ static const struct amd_ip_funcs dce_v10_0_ip_funcs = { .soft_reset = dce_v10_0_soft_reset, .set_clockgating_state = dce_v10_0_set_clockgating_state, .set_powergating_state = dce_v10_0_set_powergating_state, + .dump_ip_state = NULL, + .print_ip_state = NULL, }; static void diff --git a/drivers/gpu/drm/amd/amdgpu/dce_v11_0.c b/drivers/gpu/drm/amd/amdgpu/dce_v11_0.c index f22ec27365bd..80b2e7f79acf 100644 --- a/drivers/gpu/drm/amd/amdgpu/dce_v11_0.c +++ b/drivers/gpu/drm/amd/amdgpu/dce_v11_0.c @@ -52,6 +52,7 @@ static void dce_v11_0_set_display_funcs(struct amdgpu_device *adev); static void dce_v11_0_set_irq_funcs(struct amdgpu_device *adev); +static void dce_v11_0_hpd_int_ack(struct amdgpu_device *adev, int hpd); static const u32 crtc_offsets[] = { @@ -388,6 +389,7 @@ static void dce_v11_0_hpd_init(struct amdgpu_device *adev) AMDGPU_HPD_DISCONNECT_INT_DELAY_IN_MS); WREG32(mmDC_HPD_TOGGLE_FILT_CNTL + hpd_offsets[amdgpu_connector->hpd.hpd], tmp); + dce_v11_0_hpd_int_ack(adev, amdgpu_connector->hpd.hpd); dce_v11_0_hpd_set_polarity(adev, amdgpu_connector->hpd.hpd); amdgpu_irq_get(adev, &adev->hpd_irq, amdgpu_connector->hpd.hpd); } @@ -3462,6 +3464,8 @@ static const struct amd_ip_funcs dce_v11_0_ip_funcs = { .soft_reset = dce_v11_0_soft_reset, .set_clockgating_state = dce_v11_0_set_clockgating_state, .set_powergating_state = dce_v11_0_set_powergating_state, + .dump_ip_state = NULL, + .print_ip_state = NULL, }; static void diff --git a/drivers/gpu/drm/amd/amdgpu/dce_v6_0.c b/drivers/gpu/drm/amd/amdgpu/dce_v6_0.c index 4dbe9b3259b5..db20012600f5 100644 --- a/drivers/gpu/drm/amd/amdgpu/dce_v6_0.c +++ b/drivers/gpu/drm/amd/amdgpu/dce_v6_0.c @@ -273,6 +273,21 @@ static void dce_v6_0_hpd_set_polarity(struct amdgpu_device *adev, WREG32(mmDC_HPD1_INT_CONTROL + hpd_offsets[hpd], tmp); } +static void dce_v6_0_hpd_int_ack(struct amdgpu_device *adev, + int hpd) +{ + u32 tmp; + + if (hpd >= adev->mode_info.num_hpd) { + DRM_DEBUG("invalid hdp %d\n", hpd); + return; + } + + tmp = RREG32(mmDC_HPD1_INT_CONTROL + hpd_offsets[hpd]); + tmp |= DC_HPD1_INT_CONTROL__DC_HPD1_INT_ACK_MASK; + WREG32(mmDC_HPD1_INT_CONTROL + hpd_offsets[hpd], tmp); +} + /** * dce_v6_0_hpd_init - hpd setup callback. * @@ -312,6 +327,7 @@ static void dce_v6_0_hpd_init(struct amdgpu_device *adev) continue; } + dce_v6_0_hpd_int_ack(adev, amdgpu_connector->hpd.hpd); dce_v6_0_hpd_set_polarity(adev, amdgpu_connector->hpd.hpd); amdgpu_irq_get(adev, &adev->hpd_irq, amdgpu_connector->hpd.hpd); } @@ -3089,7 +3105,7 @@ static int dce_v6_0_hpd_irq(struct amdgpu_device *adev, struct amdgpu_irq_src *source, struct amdgpu_iv_entry *entry) { - uint32_t disp_int, mask, tmp; + uint32_t disp_int, mask; unsigned hpd; if (entry->src_data[0] >= adev->mode_info.num_hpd) { @@ -3102,9 +3118,7 @@ static int dce_v6_0_hpd_irq(struct amdgpu_device *adev, mask = interrupt_status_offsets[hpd].hpd; if (disp_int & mask) { - tmp = RREG32(mmDC_HPD1_INT_CONTROL + hpd_offsets[hpd]); - tmp |= DC_HPD1_INT_CONTROL__DC_HPD1_INT_ACK_MASK; - WREG32(mmDC_HPD1_INT_CONTROL + hpd_offsets[hpd], tmp); + dce_v6_0_hpd_int_ack(adev, hpd); schedule_delayed_work(&adev->hotplug_work, 0); DRM_DEBUG("IH: HPD%d\n", hpd + 1); } @@ -3140,6 +3154,8 @@ static const struct amd_ip_funcs dce_v6_0_ip_funcs = { .soft_reset = dce_v6_0_soft_reset, .set_clockgating_state = dce_v6_0_set_clockgating_state, .set_powergating_state = dce_v6_0_set_powergating_state, + .dump_ip_state = NULL, + .print_ip_state = NULL, }; static void diff --git a/drivers/gpu/drm/amd/amdgpu/dce_v8_0.c b/drivers/gpu/drm/amd/amdgpu/dce_v8_0.c index 05bcce23385e..5b56100ec902 100644 --- a/drivers/gpu/drm/amd/amdgpu/dce_v8_0.c +++ b/drivers/gpu/drm/amd/amdgpu/dce_v8_0.c @@ -265,6 +265,21 @@ static void dce_v8_0_hpd_set_polarity(struct amdgpu_device *adev, WREG32(mmDC_HPD1_INT_CONTROL + hpd_offsets[hpd], tmp); } +static void dce_v8_0_hpd_int_ack(struct amdgpu_device *adev, + int hpd) +{ + u32 tmp; + + if (hpd >= adev->mode_info.num_hpd) { + DRM_DEBUG("invalid hdp %d\n", hpd); + return; + } + + tmp = RREG32(mmDC_HPD1_INT_CONTROL + hpd_offsets[hpd]); + tmp |= DC_HPD1_INT_CONTROL__DC_HPD1_INT_ACK_MASK; + WREG32(mmDC_HPD1_INT_CONTROL + hpd_offsets[hpd], tmp); +} + /** * dce_v8_0_hpd_init - hpd setup callback. * @@ -304,6 +319,7 @@ static void dce_v8_0_hpd_init(struct amdgpu_device *adev) continue; } + dce_v8_0_hpd_int_ack(adev, amdgpu_connector->hpd.hpd); dce_v8_0_hpd_set_polarity(adev, amdgpu_connector->hpd.hpd); amdgpu_irq_get(adev, &adev->hpd_irq, amdgpu_connector->hpd.hpd); } @@ -3177,7 +3193,7 @@ static int dce_v8_0_hpd_irq(struct amdgpu_device *adev, struct amdgpu_irq_src *source, struct amdgpu_iv_entry *entry) { - uint32_t disp_int, mask, tmp; + uint32_t disp_int, mask; unsigned hpd; if (entry->src_data[0] >= adev->mode_info.num_hpd) { @@ -3190,9 +3206,7 @@ static int dce_v8_0_hpd_irq(struct amdgpu_device *adev, mask = interrupt_status_offsets[hpd].hpd; if (disp_int & mask) { - tmp = RREG32(mmDC_HPD1_INT_CONTROL + hpd_offsets[hpd]); - tmp |= DC_HPD1_INT_CONTROL__DC_HPD1_INT_ACK_MASK; - WREG32(mmDC_HPD1_INT_CONTROL + hpd_offsets[hpd], tmp); + dce_v8_0_hpd_int_ack(adev, hpd); schedule_delayed_work(&adev->hotplug_work, 0); DRM_DEBUG("IH: HPD%d\n", hpd + 1); } @@ -3228,6 +3242,8 @@ static const struct amd_ip_funcs dce_v8_0_ip_funcs = { .soft_reset = dce_v8_0_soft_reset, .set_clockgating_state = dce_v8_0_set_clockgating_state, .set_powergating_state = dce_v8_0_set_powergating_state, + .dump_ip_state = NULL, + .print_ip_state = NULL, }; static void diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c index dcdecb18b230..536287ddd2ec 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c @@ -276,6 +276,99 @@ MODULE_FIRMWARE("amdgpu/gc_10_3_7_mec.bin"); MODULE_FIRMWARE("amdgpu/gc_10_3_7_mec2.bin"); MODULE_FIRMWARE("amdgpu/gc_10_3_7_rlc.bin"); +static const struct amdgpu_hwip_reg_entry gc_reg_list_10_1[] = { + SOC15_REG_ENTRY_STR(GC, 0, mmGRBM_STATUS), + SOC15_REG_ENTRY_STR(GC, 0, mmGRBM_STATUS2), + SOC15_REG_ENTRY_STR(GC, 0, mmGRBM_STATUS3), + SOC15_REG_ENTRY_STR(GC, 0, mmCP_STALLED_STAT1), + SOC15_REG_ENTRY_STR(GC, 0, mmCP_STALLED_STAT2), + SOC15_REG_ENTRY_STR(GC, 0, mmCP_CPC_STALLED_STAT1), + SOC15_REG_ENTRY_STR(GC, 0, mmCP_CPF_STALLED_STAT1), + SOC15_REG_ENTRY_STR(GC, 0, mmCP_BUSY_STAT), + SOC15_REG_ENTRY_STR(GC, 0, mmCP_CPC_BUSY_STAT), + SOC15_REG_ENTRY_STR(GC, 0, mmCP_CPF_BUSY_STAT), + SOC15_REG_ENTRY_STR(GC, 0, mmCP_CPC_BUSY_STAT2), + SOC15_REG_ENTRY_STR(GC, 0, mmCP_CPF_BUSY_STAT2), + SOC15_REG_ENTRY_STR(GC, 0, mmCP_CPF_STATUS), + SOC15_REG_ENTRY_STR(GC, 0, mmCP_GFX_ERROR), + SOC15_REG_ENTRY_STR(GC, 0, mmCP_GFX_HPD_STATUS0), + SOC15_REG_ENTRY_STR(GC, 0, mmCP_RB_BASE), + SOC15_REG_ENTRY_STR(GC, 0, mmCP_RB_RPTR), + SOC15_REG_ENTRY_STR(GC, 0, mmCP_RB_WPTR), + SOC15_REG_ENTRY_STR(GC, 0, mmCP_RB0_BASE), + SOC15_REG_ENTRY_STR(GC, 0, mmCP_RB0_RPTR), + SOC15_REG_ENTRY_STR(GC, 0, mmCP_RB0_WPTR), + SOC15_REG_ENTRY_STR(GC, 0, mmCP_RB1_BASE), + SOC15_REG_ENTRY_STR(GC, 0, mmCP_RB1_RPTR), + SOC15_REG_ENTRY_STR(GC, 0, mmCP_RB1_WPTR), + SOC15_REG_ENTRY_STR(GC, 0, mmCP_RB2_BASE), + SOC15_REG_ENTRY_STR(GC, 0, mmCP_RB2_WPTR), + SOC15_REG_ENTRY_STR(GC, 0, mmCP_RB2_WPTR), + SOC15_REG_ENTRY_STR(GC, 0, mmCP_CE_IB1_CMD_BUFSZ), + SOC15_REG_ENTRY_STR(GC, 0, mmCP_CE_IB2_CMD_BUFSZ), + SOC15_REG_ENTRY_STR(GC, 0, mmCP_IB1_CMD_BUFSZ), + SOC15_REG_ENTRY_STR(GC, 0, mmCP_IB2_CMD_BUFSZ), + SOC15_REG_ENTRY_STR(GC, 0, mmCP_CE_IB1_BASE_LO), + SOC15_REG_ENTRY_STR(GC, 0, mmCP_CE_IB1_BASE_HI), + SOC15_REG_ENTRY_STR(GC, 0, mmCP_CE_IB1_BUFSZ), + SOC15_REG_ENTRY_STR(GC, 0, mmCP_CE_IB2_BASE_LO), + SOC15_REG_ENTRY_STR(GC, 0, mmCP_CE_IB2_BASE_HI), + SOC15_REG_ENTRY_STR(GC, 0, mmCP_CE_IB2_BUFSZ), + SOC15_REG_ENTRY_STR(GC, 0, mmCP_IB1_BASE_LO), + SOC15_REG_ENTRY_STR(GC, 0, mmCP_IB1_BASE_HI), + SOC15_REG_ENTRY_STR(GC, 0, mmCP_IB1_BUFSZ), + SOC15_REG_ENTRY_STR(GC, 0, mmCP_IB2_BASE_LO), + SOC15_REG_ENTRY_STR(GC, 0, mmCP_IB2_BASE_HI), + SOC15_REG_ENTRY_STR(GC, 0, mmCP_IB2_BUFSZ), + SOC15_REG_ENTRY_STR(GC, 0, mmCPF_UTCL1_STATUS), + SOC15_REG_ENTRY_STR(GC, 0, mmCPC_UTCL1_STATUS), + SOC15_REG_ENTRY_STR(GC, 0, mmCPG_UTCL1_STATUS), + SOC15_REG_ENTRY_STR(GC, 0, mmGDS_PROTECTION_FAULT), + SOC15_REG_ENTRY_STR(GC, 0, mmGDS_VM_PROTECTION_FAULT), + SOC15_REG_ENTRY_STR(GC, 0, mmIA_UTCL1_STATUS), + SOC15_REG_ENTRY_STR(GC, 0, mmIA_UTCL1_STATUS_2), + SOC15_REG_ENTRY_STR(GC, 0, mmPA_CL_CNTL_STATUS), + SOC15_REG_ENTRY_STR(GC, 0, mmRLC_UTCL1_STATUS), + SOC15_REG_ENTRY_STR(GC, 0, mmRMI_UTCL1_STATUS), + SOC15_REG_ENTRY_STR(GC, 0, mmSQC_DCACHE_UTCL0_STATUS), + SOC15_REG_ENTRY_STR(GC, 0, mmSQC_ICACHE_UTCL0_STATUS), + SOC15_REG_ENTRY_STR(GC, 0, mmSQG_UTCL0_STATUS), + SOC15_REG_ENTRY_STR(GC, 0, mmTCP_UTCL0_STATUS), + SOC15_REG_ENTRY_STR(GC, 0, mmWD_UTCL1_STATUS), + SOC15_REG_ENTRY_STR(GC, 0, mmGCVM_L2_PROTECTION_FAULT_CNTL), + SOC15_REG_ENTRY_STR(GC, 0, mmGCVM_L2_PROTECTION_FAULT_STATUS), + SOC15_REG_ENTRY_STR(GC, 0, mmCP_DEBUG), + SOC15_REG_ENTRY_STR(GC, 0, mmCP_MEC_CNTL), + SOC15_REG_ENTRY_STR(GC, 0, mmCP_MES_CNTL), + SOC15_REG_ENTRY_STR(GC, 0, mmCP_CE_INSTR_PNTR), + SOC15_REG_ENTRY_STR(GC, 0, mmCP_MEC1_INSTR_PNTR), + SOC15_REG_ENTRY_STR(GC, 0, mmCP_MEC2_INSTR_PNTR), + SOC15_REG_ENTRY_STR(GC, 0, mmCP_MES_DEBUG_INTERRUPT_INSTR_PNTR), + SOC15_REG_ENTRY_STR(GC, 0, mmCP_MES_INSTR_PNTR), + SOC15_REG_ENTRY_STR(GC, 0, mmCP_ME_INSTR_PNTR), + SOC15_REG_ENTRY_STR(GC, 0, mmCP_PFP_INSTR_PNTR), + SOC15_REG_ENTRY_STR(GC, 0, mmCP_CPC_STATUS), + SOC15_REG_ENTRY_STR(GC, 0, mmRLC_STAT), + SOC15_REG_ENTRY_STR(GC, 0, mmRLC_SMU_COMMAND), + SOC15_REG_ENTRY_STR(GC, 0, mmRLC_SMU_MESSAGE), + SOC15_REG_ENTRY_STR(GC, 0, mmRLC_SMU_ARGUMENT_1), + SOC15_REG_ENTRY_STR(GC, 0, mmRLC_SMU_ARGUMENT_2), + SOC15_REG_ENTRY_STR(GC, 0, mmRLC_SMU_ARGUMENT_3), + SOC15_REG_ENTRY_STR(GC, 0, mmRLC_SMU_ARGUMENT_4), + SOC15_REG_ENTRY_STR(GC, 0, mmSMU_RLC_RESPONSE), + SOC15_REG_ENTRY_STR(GC, 0, mmRLC_SAFE_MODE), + SOC15_REG_ENTRY_STR(GC, 0, mmRLC_SMU_SAFE_MODE), + SOC15_REG_ENTRY_STR(GC, 0, mmRLC_RLCS_GPM_STAT_2), + SOC15_REG_ENTRY_STR(GC, 0, mmRLC_SPP_STATUS), + SOC15_REG_ENTRY_STR(GC, 0, mmRLC_RLCS_BOOTLOAD_STATUS), + SOC15_REG_ENTRY_STR(GC, 0, mmRLC_INT_STAT), + SOC15_REG_ENTRY_STR(GC, 0, mmRLC_GPM_GENERAL_6), + SOC15_REG_ENTRY_STR(GC, 0, mmRLC_GPM_DEBUG_INST_A), + SOC15_REG_ENTRY_STR(GC, 0, mmRLC_GPM_DEBUG_INST_B), + SOC15_REG_ENTRY_STR(GC, 0, mmRLC_GPM_DEBUG_INST_ADDR), + SOC15_REG_ENTRY_STR(GC, 0, mmRLC_LX6_CORE_PDEBUG_INST) +}; + static const struct soc15_reg_golden golden_settings_gc_10_1[] = { SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL_4, 0xffffffff, 0x00400014), SOC15_REG_GOLDEN_VALUE(GC, 0, mmCGTT_CPF_CLK_CTRL, 0xfcff8fff, 0xf8000100), @@ -3657,6 +3750,9 @@ static void gfx_v10_0_init_spm_golden_registers(struct amdgpu_device *adev) static void gfx_v10_0_init_golden_registers(struct amdgpu_device *adev) { + if (amdgpu_sriov_vf(adev)) + return; + switch (amdgpu_ip_version(adev, GC_HWIP, 0)) { case IP_VERSION(10, 1, 10): soc15_program_register_sequence(adev, @@ -3961,7 +4057,7 @@ static void gfx_v10_0_check_gfxoff_flag(struct amdgpu_device *adev) static int gfx_v10_0_init_microcode(struct amdgpu_device *adev) { - char fw_name[40]; + char fw_name[53]; char ucode_prefix[30]; const char *wks = ""; int err; @@ -4487,10 +4583,26 @@ static int gfx_v10_0_compute_ring_init(struct amdgpu_device *adev, int ring_id, hw_prio, NULL); } +static void gfx_v10_0_alloc_dump_mem(struct amdgpu_device *adev) +{ + uint32_t reg_count = ARRAY_SIZE(gc_reg_list_10_1); + uint32_t *ptr; + + ptr = kcalloc(reg_count, sizeof(uint32_t), GFP_KERNEL); + if (ptr == NULL) { + DRM_ERROR("Failed to allocate memory for IP Dump\n"); + adev->gfx.ip_dump = NULL; + adev->gfx.reg_count = 0; + } else { + adev->gfx.ip_dump = ptr; + adev->gfx.reg_count = reg_count; + } +} + static int gfx_v10_0_sw_init(void *handle) { int i, j, k, r, ring_id = 0; - struct amdgpu_kiq *kiq; + int xcc_id = 0; struct amdgpu_device *adev = (struct amdgpu_device *)handle; switch (amdgpu_ip_version(adev, GC_HWIP, 0)) { @@ -4515,7 +4627,7 @@ static int gfx_v10_0_sw_init(void *handle) case IP_VERSION(10, 3, 3): case IP_VERSION(10, 3, 7): adev->gfx.me.num_me = 1; - adev->gfx.me.num_pipe_per_me = 1; + adev->gfx.me.num_pipe_per_me = 2; adev->gfx.me.num_queue_per_pipe = 1; adev->gfx.mec.num_mec = 2; adev->gfx.mec.num_pipe_per_mec = 4; @@ -4619,8 +4731,7 @@ static int gfx_v10_0_sw_init(void *handle) return r; } - kiq = &adev->gfx.kiq[0]; - r = amdgpu_gfx_kiq_init_ring(adev, &kiq->ring, &kiq->irq, 0); + r = amdgpu_gfx_kiq_init_ring(adev, xcc_id); if (r) return r; } @@ -4640,6 +4751,8 @@ static int gfx_v10_0_sw_init(void *handle) gfx_v10_0_gpu_early_init(adev); + gfx_v10_0_alloc_dump_mem(adev); + return 0; } @@ -4692,6 +4805,8 @@ static int gfx_v10_0_sw_fini(void *handle) gfx_v10_0_free_microcode(adev); + kfree(adev->gfx.ip_dump); + return 0; } @@ -4983,7 +5098,8 @@ static void gfx_v10_0_constants_init(struct amdgpu_device *adev) u32 tmp; int i; - WREG32_FIELD15(GC, 0, GRBM_CNTL, READ_TIMEOUT, 0xff); + if (!amdgpu_sriov_vf(adev)) + WREG32_FIELD15(GC, 0, GRBM_CNTL, READ_TIMEOUT, 0xff); gfx_v10_0_setup_rb(adev); gfx_v10_0_get_cu_info(adev, &adev->gfx.cu_info); @@ -7164,7 +7280,7 @@ static int gfx_v10_0_hw_init(void *handle) if (amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(10, 3, 0)) gfx_v10_3_program_pbb_mode(adev); - if (amdgpu_ip_version(adev, GC_HWIP, 0) >= IP_VERSION(10, 3, 0)) + if (amdgpu_ip_version(adev, GC_HWIP, 0) >= IP_VERSION(10, 3, 0) && !amdgpu_sriov_vf(adev)) gfx_v10_3_set_power_brake_sequence(adev); return r; @@ -7947,7 +8063,7 @@ static void gfx_v10_0_update_spm_vmid_internal(struct amdgpu_device *adev, WREG32_SOC15_NO_KIQ(GC, 0, mmRLC_SPM_MC_CNTL, data); } -static void gfx_v10_0_update_spm_vmid(struct amdgpu_device *adev, unsigned int vmid) +static void gfx_v10_0_update_spm_vmid(struct amdgpu_device *adev, struct amdgpu_ring *ring, unsigned int vmid) { amdgpu_gfx_off_ctrl(adev, false); @@ -8314,7 +8430,7 @@ static void gfx_v10_0_ring_emit_hdp_flush(struct amdgpu_ring *ring) } reg_mem_engine = 0; } else { - ref_and_mask = nbio_hf_reg->ref_and_mask_cp0; + ref_and_mask = nbio_hf_reg->ref_and_mask_cp0 << ring->pipe; reg_mem_engine = 1; /* pfp */ } @@ -8543,34 +8659,23 @@ static void gfx_v10_0_ring_emit_cntxcntl(struct amdgpu_ring *ring, amdgpu_ring_write(ring, 0); } -static unsigned int gfx_v10_0_ring_emit_init_cond_exec(struct amdgpu_ring *ring) +static unsigned int gfx_v10_0_ring_emit_init_cond_exec(struct amdgpu_ring *ring, + uint64_t addr) { unsigned int ret; amdgpu_ring_write(ring, PACKET3(PACKET3_COND_EXEC, 3)); - amdgpu_ring_write(ring, lower_32_bits(ring->cond_exe_gpu_addr)); - amdgpu_ring_write(ring, upper_32_bits(ring->cond_exe_gpu_addr)); - amdgpu_ring_write(ring, 0); /* discard following DWs if *cond_exec_gpu_addr==0 */ + amdgpu_ring_write(ring, lower_32_bits(addr)); + amdgpu_ring_write(ring, upper_32_bits(addr)); + /* discard following DWs if *cond_exec_gpu_addr==0 */ + amdgpu_ring_write(ring, 0); ret = ring->wptr & ring->buf_mask; - amdgpu_ring_write(ring, 0x55aa55aa); /* patch dummy value later */ + /* patch dummy value later */ + amdgpu_ring_write(ring, 0); return ret; } -static void gfx_v10_0_ring_emit_patch_cond_exec(struct amdgpu_ring *ring, unsigned int offset) -{ - unsigned int cur; - - BUG_ON(offset > ring->buf_mask); - BUG_ON(ring->ring[offset] != 0x55aa55aa); - - cur = (ring->wptr - 1) & ring->buf_mask; - if (likely(cur > offset)) - ring->ring[offset] = cur - offset; - else - ring->ring[offset] = (ring->buf_mask + 1) - offset + cur; -} - static int gfx_v10_0_ring_preempt_ib(struct amdgpu_ring *ring) { int i, r = 0; @@ -9162,6 +9267,36 @@ static void gfx_v10_0_emit_mem_sync(struct amdgpu_ring *ring) amdgpu_ring_write(ring, gcr_cntl); /* GCR_CNTL */ } +static void gfx_v10_ip_print(void *handle, struct drm_printer *p) +{ + struct amdgpu_device *adev = (struct amdgpu_device *)handle; + uint32_t i; + uint32_t reg_count = ARRAY_SIZE(gc_reg_list_10_1); + + if (!adev->gfx.ip_dump) + return; + + for (i = 0; i < reg_count; i++) + drm_printf(p, "%-50s \t 0x%08x\n", + gc_reg_list_10_1[i].reg_name, + adev->gfx.ip_dump[i]); +} + +static void gfx_v10_ip_dump(void *handle) +{ + struct amdgpu_device *adev = (struct amdgpu_device *)handle; + uint32_t i; + uint32_t reg_count = ARRAY_SIZE(gc_reg_list_10_1); + + if (!adev->gfx.ip_dump) + return; + + amdgpu_gfx_off_ctrl(adev, false); + for (i = 0; i < reg_count; i++) + adev->gfx.ip_dump[i] = RREG32(SOC15_REG_ENTRY_OFFSET(gc_reg_list_10_1[i])); + amdgpu_gfx_off_ctrl(adev, true); +} + static const struct amd_ip_funcs gfx_v10_0_ip_funcs = { .name = "gfx_v10_0", .early_init = gfx_v10_0_early_init, @@ -9178,6 +9313,8 @@ static const struct amd_ip_funcs gfx_v10_0_ip_funcs = { .set_clockgating_state = gfx_v10_0_set_clockgating_state, .set_powergating_state = gfx_v10_0_set_powergating_state, .get_clockgating_state = gfx_v10_0_get_clockgating_state, + .dump_ip_state = gfx_v10_ip_dump, + .print_ip_state = gfx_v10_ip_print, }; static const struct amdgpu_ring_funcs gfx_v10_0_ring_funcs_gfx = { @@ -9194,7 +9331,7 @@ static const struct amdgpu_ring_funcs gfx_v10_0_ring_funcs_gfx = { 7 + /* PIPELINE_SYNC */ SOC15_FLUSH_GPU_TLB_NUM_WREG * 5 + SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 7 + - 2 + /* VM_FLUSH */ + 4 + /* VM_FLUSH */ 8 + /* FENCE for VM_FLUSH */ 20 + /* GDS switch */ 4 + /* double SWITCH_BUFFER, @@ -9225,7 +9362,6 @@ static const struct amdgpu_ring_funcs gfx_v10_0_ring_funcs_gfx = { .emit_switch_buffer = gfx_v10_0_ring_emit_sb, .emit_cntxcntl = gfx_v10_0_ring_emit_cntxcntl, .init_cond_exec = gfx_v10_0_ring_emit_init_cond_exec, - .patch_cond_exec = gfx_v10_0_ring_emit_patch_cond_exec, .preempt_ib = gfx_v10_0_ring_preempt_ib, .emit_frame_cntl = gfx_v10_0_ring_emit_frame_cntl, .emit_wreg = gfx_v10_0_ring_emit_wreg, @@ -9285,7 +9421,6 @@ static const struct amdgpu_ring_funcs gfx_v10_0_ring_funcs_kiq = { 7 + /* gfx_v10_0_ring_emit_pipeline_sync */ SOC15_FLUSH_GPU_TLB_NUM_WREG * 5 + SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 7 + - 2 + /* gfx_v10_0_ring_emit_vm_flush */ 8 + 8 + 8, /* gfx_v10_0_ring_emit_fence_kiq x3 for user fence, vm fence */ .emit_ib_size = 7, /* gfx_v10_0_ring_emit_ib_compute */ .emit_ib = gfx_v10_0_ring_emit_ib_compute, diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c index 4f3bfdc75b37..ad6431013c73 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c @@ -89,6 +89,10 @@ MODULE_FIRMWARE("amdgpu/gc_11_5_0_pfp.bin"); MODULE_FIRMWARE("amdgpu/gc_11_5_0_me.bin"); MODULE_FIRMWARE("amdgpu/gc_11_5_0_mec.bin"); MODULE_FIRMWARE("amdgpu/gc_11_5_0_rlc.bin"); +MODULE_FIRMWARE("amdgpu/gc_11_5_1_pfp.bin"); +MODULE_FIRMWARE("amdgpu/gc_11_5_1_me.bin"); +MODULE_FIRMWARE("amdgpu/gc_11_5_1_mec.bin"); +MODULE_FIRMWARE("amdgpu/gc_11_5_1_rlc.bin"); static const struct soc15_reg_golden golden_settings_gc_11_0[] = { SOC15_REG_GOLDEN_VALUE(GC, 0, regTCP_CNTL, 0x20000000, 0x20000000) @@ -506,7 +510,7 @@ static void gfx_v11_0_check_fw_cp_gfx_shadow(struct amdgpu_device *adev) static int gfx_v11_0_init_microcode(struct amdgpu_device *adev) { char fw_name[40]; - char ucode_prefix[30]; + char ucode_prefix[25]; int err; const struct rlc_firmware_header_v2_0 *rlc_hdr; uint16_t version_major; @@ -727,7 +731,7 @@ static int gfx_v11_0_rlc_init(struct amdgpu_device *adev) /* init spm vmid with 0xf */ if (adev->gfx.rlc.funcs->update_spm_vmid) - adev->gfx.rlc.funcs->update_spm_vmid(adev, 0xf); + adev->gfx.rlc.funcs->update_spm_vmid(adev, NULL, 0xf); return 0; } @@ -907,6 +911,7 @@ static int gfx_v11_0_gpu_early_init(struct amdgpu_device *adev) case IP_VERSION(11, 0, 1): case IP_VERSION(11, 0, 4): case IP_VERSION(11, 5, 0): + case IP_VERSION(11, 5, 1): adev->gfx.config.max_hw_contexts = 8; adev->gfx.config.sc_prim_fifo_size_frontend = 0x20; adev->gfx.config.sc_prim_fifo_size_backend = 0x100; @@ -1329,7 +1334,7 @@ static int gfx_v11_0_rlc_backdoor_autoload_enable(struct amdgpu_device *adev) static int gfx_v11_0_sw_init(void *handle) { int i, j, k, r, ring_id = 0; - struct amdgpu_kiq *kiq; + int xcc_id = 0; struct amdgpu_device *adev = (struct amdgpu_device *)handle; switch (amdgpu_ip_version(adev, GC_HWIP, 0)) { @@ -1346,6 +1351,7 @@ static int gfx_v11_0_sw_init(void *handle) case IP_VERSION(11, 0, 1): case IP_VERSION(11, 0, 4): case IP_VERSION(11, 5, 0): + case IP_VERSION(11, 5, 1): adev->gfx.me.num_me = 1; adev->gfx.me.num_pipe_per_me = 1; adev->gfx.me.num_queue_per_pipe = 1; @@ -1454,8 +1460,7 @@ static int gfx_v11_0_sw_init(void *handle) return r; } - kiq = &adev->gfx.kiq[0]; - r = amdgpu_gfx_kiq_init_ring(adev, &kiq->ring, &kiq->irq, 0); + r = amdgpu_gfx_kiq_init_ring(adev, xcc_id); if (r) return r; } @@ -1630,7 +1635,7 @@ static void gfx_v11_0_setup_rb(struct amdgpu_device *adev) active_rb_bitmap |= (0x3 << (i * rb_bitmap_width_per_sa)); } - active_rb_bitmap |= global_active_rb_bitmap; + active_rb_bitmap &= global_active_rb_bitmap; adev->gfx.config.backend_enable_mask = active_rb_bitmap; adev->gfx.config.num_rbs = hweight32(active_rb_bitmap); } @@ -2588,7 +2593,8 @@ static int gfx_v11_0_wait_for_rlc_autoload_complete(struct amdgpu_device *adev) IP_VERSION(11, 0, 1) || amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(11, 0, 4) || - amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(11, 5, 0)) + amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(11, 5, 0) || + amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(11, 5, 1)) bootload_status = RREG32_SOC15(GC, 0, regRLC_RLCS_BOOTLOAD_STATUS_gc_11_0_1); else @@ -4500,14 +4506,11 @@ static int gfx_v11_0_soft_reset(void *handle) gfx_v11_0_set_safe_mode(adev, 0); + mutex_lock(&adev->srbm_mutex); for (i = 0; i < adev->gfx.mec.num_mec; ++i) { for (j = 0; j < adev->gfx.mec.num_queue_per_pipe; j++) { for (k = 0; k < adev->gfx.mec.num_pipe_per_mec; k++) { - tmp = RREG32_SOC15(GC, 0, regGRBM_GFX_CNTL); - tmp = REG_SET_FIELD(tmp, GRBM_GFX_CNTL, MEID, i); - tmp = REG_SET_FIELD(tmp, GRBM_GFX_CNTL, QUEUEID, j); - tmp = REG_SET_FIELD(tmp, GRBM_GFX_CNTL, PIPEID, k); - WREG32_SOC15(GC, 0, regGRBM_GFX_CNTL, tmp); + soc21_grbm_select(adev, i, k, j, 0); WREG32_SOC15(GC, 0, regCP_HQD_DEQUEUE_REQUEST, 0x2); WREG32_SOC15(GC, 0, regSPI_COMPUTE_QUEUE_RESET, 0x1); @@ -4517,16 +4520,14 @@ static int gfx_v11_0_soft_reset(void *handle) for (i = 0; i < adev->gfx.me.num_me; ++i) { for (j = 0; j < adev->gfx.me.num_queue_per_pipe; j++) { for (k = 0; k < adev->gfx.me.num_pipe_per_me; k++) { - tmp = RREG32_SOC15(GC, 0, regGRBM_GFX_CNTL); - tmp = REG_SET_FIELD(tmp, GRBM_GFX_CNTL, MEID, i); - tmp = REG_SET_FIELD(tmp, GRBM_GFX_CNTL, QUEUEID, j); - tmp = REG_SET_FIELD(tmp, GRBM_GFX_CNTL, PIPEID, k); - WREG32_SOC15(GC, 0, regGRBM_GFX_CNTL, tmp); + soc21_grbm_select(adev, i, k, j, 0); WREG32_SOC15(GC, 0, regCP_GFX_HQD_DEQUEUE_REQUEST, 0x1); } } } + soc21_grbm_select(adev, 0, 0, 0, 0); + mutex_unlock(&adev->srbm_mutex); /* Try to acquire the gfx mutex before access to CP_VMID_RESET */ r = gfx_v11_0_request_gfx_index_mutex(adev, 1); @@ -5027,7 +5028,7 @@ static int gfx_v11_0_update_gfx_clock_gating(struct amdgpu_device *adev, return 0; } -static void gfx_v11_0_update_spm_vmid(struct amdgpu_device *adev, unsigned vmid) +static void gfx_v11_0_update_spm_vmid(struct amdgpu_device *adev, struct amdgpu_ring *ring, unsigned vmid) { u32 data; @@ -5041,6 +5042,14 @@ static void gfx_v11_0_update_spm_vmid(struct amdgpu_device *adev, unsigned vmid) WREG32_SOC15_NO_KIQ(GC, 0, regRLC_SPM_MC_CNTL, data); amdgpu_gfx_off_ctrl(adev, true); + + if (ring + && amdgpu_sriov_is_pp_one_vf(adev) + && ((ring->funcs->type == AMDGPU_RING_TYPE_GFX) + || (ring->funcs->type == AMDGPU_RING_TYPE_COMPUTE))) { + uint32_t reg = SOC15_REG_OFFSET(GC, 0, regRLC_SPM_MC_CNTL); + amdgpu_ring_emit_wreg(ring, reg, data); + } } static const struct amdgpu_rlc_funcs gfx_v11_0_rlc_funcs = { @@ -5074,6 +5083,7 @@ static void gfx_v11_cntl_power_gating(struct amdgpu_device *adev, bool enable) case IP_VERSION(11, 0, 1): case IP_VERSION(11, 0, 4): case IP_VERSION(11, 5, 0): + case IP_VERSION(11, 5, 1): WREG32_SOC15(GC, 0, regRLC_PG_DELAY_3, RLC_PG_DELAY_3_DEFAULT_GC_11_0_1); break; default: @@ -5109,6 +5119,7 @@ static int gfx_v11_0_set_powergating_state(void *handle, case IP_VERSION(11, 0, 1): case IP_VERSION(11, 0, 4): case IP_VERSION(11, 5, 0): + case IP_VERSION(11, 5, 1): if (!enable) amdgpu_gfx_off_ctrl(adev, false); @@ -5140,6 +5151,7 @@ static int gfx_v11_0_set_clockgating_state(void *handle, case IP_VERSION(11, 0, 3): case IP_VERSION(11, 0, 4): case IP_VERSION(11, 5, 0): + case IP_VERSION(11, 5, 1): gfx_v11_0_update_gfx_clock_gating(adev, state == AMD_CG_STATE_GATE); break; @@ -5444,6 +5456,12 @@ static void gfx_v11_0_ring_emit_vm_flush(struct amdgpu_ring *ring, amdgpu_ring_write(ring, PACKET3(PACKET3_PFP_SYNC_ME, 0)); amdgpu_ring_write(ring, 0x0); } + + /* Make sure that we can't skip the SET_Q_MODE packets when the VM + * changed in any way. + */ + ring->set_q_mode_offs = 0; + ring->set_q_mode_ptr = NULL; } static void gfx_v11_0_ring_emit_fence_kiq(struct amdgpu_ring *ring, u64 addr, @@ -5493,16 +5511,81 @@ static void gfx_v11_0_ring_emit_cntxcntl(struct amdgpu_ring *ring, amdgpu_ring_write(ring, 0); } +static unsigned gfx_v11_0_ring_emit_init_cond_exec(struct amdgpu_ring *ring, + uint64_t addr) +{ + unsigned ret; + + amdgpu_ring_write(ring, PACKET3(PACKET3_COND_EXEC, 3)); + amdgpu_ring_write(ring, lower_32_bits(addr)); + amdgpu_ring_write(ring, upper_32_bits(addr)); + /* discard following DWs if *cond_exec_gpu_addr==0 */ + amdgpu_ring_write(ring, 0); + ret = ring->wptr & ring->buf_mask; + /* patch dummy value later */ + amdgpu_ring_write(ring, 0); + + return ret; +} + static void gfx_v11_0_ring_emit_gfx_shadow(struct amdgpu_ring *ring, u64 shadow_va, u64 csa_va, u64 gds_va, bool init_shadow, int vmid) { struct amdgpu_device *adev = ring->adev; + unsigned int offs, end; - if (!adev->gfx.cp_gfx_shadow) + if (!adev->gfx.cp_gfx_shadow || !ring->ring_obj) return; + /* + * The logic here isn't easy to understand because we need to keep state + * accross multiple executions of the function as well as between the + * CPU and GPU. The general idea is that the newly written GPU command + * has a condition on the previous one and only executed if really + * necessary. + */ + + /* + * The dw in the NOP controls if the next SET_Q_MODE packet should be + * executed or not. Reserve 64bits just to be on the save side. + */ + amdgpu_ring_write(ring, PACKET3(PACKET3_NOP, 1)); + offs = ring->wptr & ring->buf_mask; + + /* + * We start with skipping the prefix SET_Q_MODE and always executing + * the postfix SET_Q_MODE packet. This is changed below with a + * WRITE_DATA command when the postfix executed. + */ + amdgpu_ring_write(ring, shadow_va ? 1 : 0); + amdgpu_ring_write(ring, 0); + + if (ring->set_q_mode_offs) { + uint64_t addr; + + addr = amdgpu_bo_gpu_offset(ring->ring_obj); + addr += ring->set_q_mode_offs << 2; + end = gfx_v11_0_ring_emit_init_cond_exec(ring, addr); + } + + /* + * When the postfix SET_Q_MODE packet executes we need to make sure that the + * next prefix SET_Q_MODE packet executes as well. + */ + if (!shadow_va) { + uint64_t addr; + + addr = amdgpu_bo_gpu_offset(ring->ring_obj); + addr += offs << 2; + amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3)); + amdgpu_ring_write(ring, WRITE_DATA_DST_SEL(5) | WR_CONFIRM); + amdgpu_ring_write(ring, lower_32_bits(addr)); + amdgpu_ring_write(ring, upper_32_bits(addr)); + amdgpu_ring_write(ring, 0x1); + } + amdgpu_ring_write(ring, PACKET3(PACKET3_SET_Q_PREEMPTION_MODE, 7)); amdgpu_ring_write(ring, lower_32_bits(shadow_va)); amdgpu_ring_write(ring, upper_32_bits(shadow_va)); @@ -5514,33 +5597,26 @@ static void gfx_v11_0_ring_emit_gfx_shadow(struct amdgpu_ring *ring, PACKET3_SET_Q_PREEMPTION_MODE_IB_VMID(vmid) : 0); amdgpu_ring_write(ring, init_shadow ? PACKET3_SET_Q_PREEMPTION_MODE_INIT_SHADOW_MEM : 0); -} -static unsigned gfx_v11_0_ring_emit_init_cond_exec(struct amdgpu_ring *ring) -{ - unsigned ret; + if (ring->set_q_mode_offs) + amdgpu_ring_patch_cond_exec(ring, end); - amdgpu_ring_write(ring, PACKET3(PACKET3_COND_EXEC, 3)); - amdgpu_ring_write(ring, lower_32_bits(ring->cond_exe_gpu_addr)); - amdgpu_ring_write(ring, upper_32_bits(ring->cond_exe_gpu_addr)); - amdgpu_ring_write(ring, 0); /* discard following DWs if *cond_exec_gpu_addr==0 */ - ret = ring->wptr & ring->buf_mask; - amdgpu_ring_write(ring, 0x55aa55aa); /* patch dummy value later */ + if (shadow_va) { + uint64_t token = shadow_va ^ csa_va ^ gds_va ^ vmid; - return ret; -} + /* + * If the tokens match try to skip the last postfix SET_Q_MODE + * packet to avoid saving/restoring the state all the time. + */ + if (ring->set_q_mode_ptr && ring->set_q_mode_token == token) + *ring->set_q_mode_ptr = 0; -static void gfx_v11_0_ring_emit_patch_cond_exec(struct amdgpu_ring *ring, unsigned offset) -{ - unsigned cur; - BUG_ON(offset > ring->buf_mask); - BUG_ON(ring->ring[offset] != 0x55aa55aa); + ring->set_q_mode_token = token; + } else { + ring->set_q_mode_ptr = &ring->ring[ring->set_q_mode_offs]; + } - cur = (ring->wptr - 1) & ring->buf_mask; - if (likely(cur > offset)) - ring->ring[offset] = cur - offset; - else - ring->ring[offset] = (ring->buf_mask + 1) - offset + cur; + ring->set_q_mode_offs = offs; } static int gfx_v11_0_ring_preempt_ib(struct amdgpu_ring *ring) @@ -6093,6 +6169,8 @@ static const struct amd_ip_funcs gfx_v11_0_ip_funcs = { .set_clockgating_state = gfx_v11_0_set_clockgating_state, .set_powergating_state = gfx_v11_0_set_powergating_state, .get_clockgating_state = gfx_v11_0_get_clockgating_state, + .dump_ip_state = NULL, + .print_ip_state = NULL, }; static const struct amdgpu_ring_funcs gfx_v11_0_ring_funcs_gfx = { @@ -6104,13 +6182,14 @@ static const struct amdgpu_ring_funcs gfx_v11_0_ring_funcs_gfx = { .get_rptr = gfx_v11_0_ring_get_rptr_gfx, .get_wptr = gfx_v11_0_ring_get_wptr_gfx, .set_wptr = gfx_v11_0_ring_set_wptr_gfx, - .emit_frame_size = /* totally 242 maximum if 16 IBs */ + .emit_frame_size = /* totally 247 maximum if 16 IBs */ + 5 + /* update_spm_vmid */ 5 + /* COND_EXEC */ - 9 + /* SET_Q_PREEMPTION_MODE */ + 22 + /* SET_Q_PREEMPTION_MODE */ 7 + /* PIPELINE_SYNC */ SOC15_FLUSH_GPU_TLB_NUM_WREG * 5 + SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 7 + - 2 + /* VM_FLUSH */ + 4 + /* VM_FLUSH */ 8 + /* FENCE for VM_FLUSH */ 20 + /* GDS switch */ 5 + /* COND_EXEC */ @@ -6119,6 +6198,7 @@ static const struct amdgpu_ring_funcs gfx_v11_0_ring_funcs_gfx = { 31 + /* DE_META */ 3 + /* CNTX_CTRL */ 5 + /* HDP_INVL */ + 22 + /* SET_Q_PREEMPTION_MODE */ 8 + 8 + /* FENCE x2 */ 8, /* gfx_v11_0_emit_mem_sync */ .emit_ib_size = 4, /* gfx_v11_0_ring_emit_ib_gfx */ @@ -6135,7 +6215,6 @@ static const struct amdgpu_ring_funcs gfx_v11_0_ring_funcs_gfx = { .emit_cntxcntl = gfx_v11_0_ring_emit_cntxcntl, .emit_gfx_shadow = gfx_v11_0_ring_emit_gfx_shadow, .init_cond_exec = gfx_v11_0_ring_emit_init_cond_exec, - .patch_cond_exec = gfx_v11_0_ring_emit_patch_cond_exec, .preempt_ib = gfx_v11_0_ring_preempt_ib, .emit_frame_cntl = gfx_v11_0_ring_emit_frame_cntl, .emit_wreg = gfx_v11_0_ring_emit_wreg, @@ -6154,6 +6233,7 @@ static const struct amdgpu_ring_funcs gfx_v11_0_ring_funcs_compute = { .get_wptr = gfx_v11_0_ring_get_wptr_compute, .set_wptr = gfx_v11_0_ring_set_wptr_compute, .emit_frame_size = + 5 + /* update_spm_vmid */ 20 + /* gfx_v11_0_ring_emit_gds_switch */ 7 + /* gfx_v11_0_ring_emit_hdp_flush */ 5 + /* hdp invalidate */ @@ -6195,7 +6275,6 @@ static const struct amdgpu_ring_funcs gfx_v11_0_ring_funcs_kiq = { 7 + /* gfx_v11_0_ring_emit_pipeline_sync */ SOC15_FLUSH_GPU_TLB_NUM_WREG * 5 + SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 7 + - 2 + /* gfx_v11_0_ring_emit_vm_flush */ 8 + 8 + 8, /* gfx_v11_0_ring_emit_fence_kiq x3 for user fence, vm fence */ .emit_ib_size = 7, /* gfx_v11_0_ring_emit_ib_compute */ .emit_ib = gfx_v11_0_ring_emit_ib_compute, diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v11_0_3.c b/drivers/gpu/drm/amd/amdgpu/gfx_v11_0_3.c index 26d6286d86c9..9e7ce1e6bc06 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v11_0_3.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v11_0_3.c @@ -69,7 +69,7 @@ static int gfx_v11_0_3_rlc_gc_fed_irq(struct amdgpu_device *adev, amdgpu_ras_interrupt_dispatch(adev, &ih_data); } else { if (adev->virt.ops && adev->virt.ops->ras_poison_handler) - adev->virt.ops->ras_poison_handler(adev); + adev->virt.ops->ras_poison_handler(adev, ras_if->block); else dev_warn(adev->dev, "No ras_poison_handler interface in SRIOV for %s!\n", ras_if->name); diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v6_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v6_0.c index 34f9211b2679..d0992ce9fb47 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v6_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v6_0.c @@ -3457,6 +3457,8 @@ static const struct amd_ip_funcs gfx_v6_0_ip_funcs = { .soft_reset = gfx_v6_0_soft_reset, .set_clockgating_state = gfx_v6_0_set_clockgating_state, .set_powergating_state = gfx_v6_0_set_powergating_state, + .dump_ip_state = NULL, + .print_ip_state = NULL, }; static const struct amdgpu_ring_funcs gfx_v6_0_ring_funcs_gfx = { diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c index c2faf6b4c2fc..541dbd70d8c7 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c @@ -3274,7 +3274,7 @@ static int gfx_v7_0_rlc_init(struct amdgpu_device *adev) /* init spm vmid with 0xf */ if (adev->gfx.rlc.funcs->update_spm_vmid) - adev->gfx.rlc.funcs->update_spm_vmid(adev, 0xf); + adev->gfx.rlc.funcs->update_spm_vmid(adev, NULL, 0xf); return 0; } @@ -3500,7 +3500,7 @@ static int gfx_v7_0_rlc_resume(struct amdgpu_device *adev) return 0; } -static void gfx_v7_0_update_spm_vmid(struct amdgpu_device *adev, unsigned vmid) +static void gfx_v7_0_update_spm_vmid(struct amdgpu_device *adev, struct amdgpu_ring *ring, unsigned vmid) { u32 data; @@ -4977,6 +4977,8 @@ static const struct amd_ip_funcs gfx_v7_0_ip_funcs = { .soft_reset = gfx_v7_0_soft_reset, .set_clockgating_state = gfx_v7_0_set_clockgating_state, .set_powergating_state = gfx_v7_0_set_powergating_state, + .dump_ip_state = NULL, + .print_ip_state = NULL, }; static const struct amdgpu_ring_funcs gfx_v7_0_ring_funcs_gfx = { diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c index 1943beb135c4..2f0e72caee1a 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c @@ -1288,7 +1288,7 @@ static int gfx_v8_0_rlc_init(struct amdgpu_device *adev) /* init spm vmid with 0xf */ if (adev->gfx.rlc.funcs->update_spm_vmid) - adev->gfx.rlc.funcs->update_spm_vmid(adev, 0xf); + adev->gfx.rlc.funcs->update_spm_vmid(adev, NULL, 0xf); return 0; } @@ -1900,8 +1900,8 @@ static void gfx_v8_0_sq_irq_work_func(struct work_struct *work); static int gfx_v8_0_sw_init(void *handle) { int i, j, k, r, ring_id; + int xcc_id = 0; struct amdgpu_ring *ring; - struct amdgpu_kiq *kiq; struct amdgpu_device *adev = (struct amdgpu_device *)handle; switch (adev->asic_type) { @@ -2022,8 +2022,7 @@ static int gfx_v8_0_sw_init(void *handle) return r; } - kiq = &adev->gfx.kiq[0]; - r = amdgpu_gfx_kiq_init_ring(adev, &kiq->ring, &kiq->irq, 0); + r = amdgpu_gfx_kiq_init_ring(adev, xcc_id); if (r) return r; @@ -5579,7 +5578,7 @@ static void gfx_v8_0_unset_safe_mode(struct amdgpu_device *adev, int xcc_id) } } -static void gfx_v8_0_update_spm_vmid(struct amdgpu_device *adev, unsigned vmid) +static void gfx_v8_0_update_spm_vmid(struct amdgpu_device *adev, struct amdgpu_ring *ring, unsigned vmid) { u32 data; @@ -6327,33 +6326,22 @@ static void gfx_v8_ring_emit_cntxcntl(struct amdgpu_ring *ring, uint32_t flags) amdgpu_ring_write(ring, 0); } -static unsigned gfx_v8_0_ring_emit_init_cond_exec(struct amdgpu_ring *ring) +static unsigned gfx_v8_0_ring_emit_init_cond_exec(struct amdgpu_ring *ring, + uint64_t addr) { unsigned ret; amdgpu_ring_write(ring, PACKET3(PACKET3_COND_EXEC, 3)); - amdgpu_ring_write(ring, lower_32_bits(ring->cond_exe_gpu_addr)); - amdgpu_ring_write(ring, upper_32_bits(ring->cond_exe_gpu_addr)); - amdgpu_ring_write(ring, 0); /* discard following DWs if *cond_exec_gpu_addr==0 */ + amdgpu_ring_write(ring, lower_32_bits(addr)); + amdgpu_ring_write(ring, upper_32_bits(addr)); + /* discard following DWs if *cond_exec_gpu_addr==0 */ + amdgpu_ring_write(ring, 0); ret = ring->wptr & ring->buf_mask; - amdgpu_ring_write(ring, 0x55aa55aa); /* patch dummy value later */ + /* patch dummy value later */ + amdgpu_ring_write(ring, 0); return ret; } -static void gfx_v8_0_ring_emit_patch_cond_exec(struct amdgpu_ring *ring, unsigned offset) -{ - unsigned cur; - - BUG_ON(offset > ring->buf_mask); - BUG_ON(ring->ring[offset] != 0x55aa55aa); - - cur = (ring->wptr & ring->buf_mask) - 1; - if (likely(cur > offset)) - ring->ring[offset] = cur - offset; - else - ring->ring[offset] = (ring->ring_size >> 2) - offset + cur; -} - static void gfx_v8_0_ring_emit_rreg(struct amdgpu_ring *ring, uint32_t reg, uint32_t reg_val_offs) { @@ -6890,6 +6878,8 @@ static const struct amd_ip_funcs gfx_v8_0_ip_funcs = { .set_clockgating_state = gfx_v8_0_set_clockgating_state, .set_powergating_state = gfx_v8_0_set_powergating_state, .get_clockgating_state = gfx_v8_0_get_clockgating_state, + .dump_ip_state = NULL, + .print_ip_state = NULL, }; static const struct amdgpu_ring_funcs gfx_v8_0_ring_funcs_gfx = { @@ -6933,7 +6923,6 @@ static const struct amdgpu_ring_funcs gfx_v8_0_ring_funcs_gfx = { .emit_switch_buffer = gfx_v8_ring_emit_sb, .emit_cntxcntl = gfx_v8_ring_emit_cntxcntl, .init_cond_exec = gfx_v8_0_ring_emit_init_cond_exec, - .patch_cond_exec = gfx_v8_0_ring_emit_patch_cond_exec, .emit_wreg = gfx_v8_0_ring_emit_wreg, .soft_recovery = gfx_v8_0_ring_soft_recovery, .emit_mem_sync = gfx_v8_0_emit_mem_sync, diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c index 3bc6943365a4..3c8c5abf35ab 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c @@ -1249,7 +1249,7 @@ static void gfx_v9_0_check_if_need_gfxoff(struct amdgpu_device *adev) static int gfx_v9_0_init_cp_gfx_microcode(struct amdgpu_device *adev, char *chip_name) { - char fw_name[30]; + char fw_name[50]; int err; snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_pfp.bin", chip_name); @@ -1282,7 +1282,7 @@ out: static int gfx_v9_0_init_rlc_microcode(struct amdgpu_device *adev, char *chip_name) { - char fw_name[30]; + char fw_name[53]; int err; const struct rlc_firmware_header_v2_0 *rlc_hdr; uint16_t version_major; @@ -1337,7 +1337,7 @@ static bool gfx_v9_0_load_mec2_fw_bin_support(struct amdgpu_device *adev) static int gfx_v9_0_init_cp_compute_microcode(struct amdgpu_device *adev, char *chip_name) { - char fw_name[30]; + char fw_name[50]; int err; if (amdgpu_sriov_vf(adev) && (adev->asic_type == CHIP_ALDEBARAN)) @@ -1997,8 +1997,8 @@ static int gfx_v9_0_compute_ring_init(struct amdgpu_device *adev, int ring_id, static int gfx_v9_0_sw_init(void *handle) { int i, j, k, r, ring_id; + int xcc_id = 0; struct amdgpu_ring *ring; - struct amdgpu_kiq *kiq; struct amdgpu_device *adev = (struct amdgpu_device *)handle; unsigned int hw_prio; @@ -2080,7 +2080,7 @@ static int gfx_v9_0_sw_init(void *handle) ring->doorbell_index = adev->doorbell_index.gfx_ring0 << 1; /* disable scheduler on the real ring */ - ring->no_scheduler = true; + ring->no_scheduler = adev->gfx.mcbp; ring->vm_hub = AMDGPU_GFXHUB(0); r = amdgpu_ring_init(adev, ring, 1024, &adev->gfx.eop_irq, AMDGPU_CP_IRQ_GFX_ME0_PIPE0_EOP, @@ -2090,7 +2090,7 @@ static int gfx_v9_0_sw_init(void *handle) } /* set up the software rings */ - if (adev->gfx.num_gfx_rings) { + if (adev->gfx.mcbp && adev->gfx.num_gfx_rings) { for (i = 0; i < GFX9_NUM_SW_GFX_RINGS; i++) { ring = &adev->gfx.sw_gfx_ring[i]; ring->ring_obj = NULL; @@ -2151,8 +2151,7 @@ static int gfx_v9_0_sw_init(void *handle) return r; } - kiq = &adev->gfx.kiq[0]; - r = amdgpu_gfx_kiq_init_ring(adev, &kiq->ring, &kiq->irq, 0); + r = amdgpu_gfx_kiq_init_ring(adev, xcc_id); if (r) return r; @@ -2181,7 +2180,7 @@ static int gfx_v9_0_sw_fini(void *handle) int i; struct amdgpu_device *adev = (struct amdgpu_device *)handle; - if (adev->gfx.num_gfx_rings) { + if (adev->gfx.mcbp && adev->gfx.num_gfx_rings) { for (i = 0; i < GFX9_NUM_SW_GFX_RINGS; i++) amdgpu_ring_fini(&adev->gfx.sw_gfx_ring[i]); amdgpu_ring_mux_fini(&adev->gfx.muxer); @@ -4902,7 +4901,7 @@ static void gfx_v9_0_update_spm_vmid_internal(struct amdgpu_device *adev, WREG32_SOC15(GC, 0, mmRLC_SPM_MC_CNTL, data); } -static void gfx_v9_0_update_spm_vmid(struct amdgpu_device *adev, unsigned int vmid) +static void gfx_v9_0_update_spm_vmid(struct amdgpu_device *adev, struct amdgpu_ring *ring, unsigned int vmid) { amdgpu_gfx_off_ctrl(adev, false); @@ -5611,31 +5610,21 @@ static void gfx_v9_ring_emit_cntxcntl(struct amdgpu_ring *ring, uint32_t flags) amdgpu_ring_write(ring, 0); } -static unsigned gfx_v9_0_ring_emit_init_cond_exec(struct amdgpu_ring *ring) +static unsigned gfx_v9_0_ring_emit_init_cond_exec(struct amdgpu_ring *ring, + uint64_t addr) { unsigned ret; amdgpu_ring_write(ring, PACKET3(PACKET3_COND_EXEC, 3)); - amdgpu_ring_write(ring, lower_32_bits(ring->cond_exe_gpu_addr)); - amdgpu_ring_write(ring, upper_32_bits(ring->cond_exe_gpu_addr)); - amdgpu_ring_write(ring, 0); /* discard following DWs if *cond_exec_gpu_addr==0 */ + amdgpu_ring_write(ring, lower_32_bits(addr)); + amdgpu_ring_write(ring, upper_32_bits(addr)); + /* discard following DWs if *cond_exec_gpu_addr==0 */ + amdgpu_ring_write(ring, 0); ret = ring->wptr & ring->buf_mask; - amdgpu_ring_write(ring, 0x55aa55aa); /* patch dummy value later */ + /* patch dummy value later */ + amdgpu_ring_write(ring, 0); return ret; } -static void gfx_v9_0_ring_emit_patch_cond_exec(struct amdgpu_ring *ring, unsigned offset) -{ - unsigned cur; - BUG_ON(offset > ring->buf_mask); - BUG_ON(ring->ring[offset] != 0x55aa55aa); - - cur = (ring->wptr - 1) & ring->buf_mask; - if (likely(cur > offset)) - ring->ring[offset] = cur - offset; - else - ring->ring[offset] = (ring->ring_size>>2) - offset + cur; -} - static void gfx_v9_0_ring_emit_rreg(struct amdgpu_ring *ring, uint32_t reg, uint32_t reg_val_offs) { @@ -5910,11 +5899,14 @@ static int gfx_v9_0_eop_irq(struct amdgpu_device *adev, switch (me_id) { case 0: - if (adev->gfx.num_gfx_rings && - !amdgpu_mcbp_handle_trailing_fence_irq(&adev->gfx.muxer)) { - /* Fence signals are handled on the software rings*/ - for (i = 0; i < GFX9_NUM_SW_GFX_RINGS; i++) - amdgpu_fence_process(&adev->gfx.sw_gfx_ring[i]); + if (adev->gfx.num_gfx_rings) { + if (!adev->gfx.mcbp) { + amdgpu_fence_process(&adev->gfx.gfx_ring[0]); + } else if (!amdgpu_mcbp_handle_trailing_fence_irq(&adev->gfx.muxer)) { + /* Fence signals are handled on the software rings*/ + for (i = 0; i < GFX9_NUM_SW_GFX_RINGS; i++) + amdgpu_fence_process(&adev->gfx.sw_gfx_ring[i]); + } } break; case 1: @@ -6864,6 +6856,8 @@ static const struct amd_ip_funcs gfx_v9_0_ip_funcs = { .set_clockgating_state = gfx_v9_0_set_clockgating_state, .set_powergating_state = gfx_v9_0_set_powergating_state, .get_clockgating_state = gfx_v9_0_get_clockgating_state, + .dump_ip_state = NULL, + .print_ip_state = NULL, }; static const struct amdgpu_ring_funcs gfx_v9_0_ring_funcs_gfx = { @@ -6909,7 +6903,6 @@ static const struct amdgpu_ring_funcs gfx_v9_0_ring_funcs_gfx = { .emit_switch_buffer = gfx_v9_ring_emit_sb, .emit_cntxcntl = gfx_v9_ring_emit_cntxcntl, .init_cond_exec = gfx_v9_0_ring_emit_init_cond_exec, - .patch_cond_exec = gfx_v9_0_ring_emit_patch_cond_exec, .preempt_ib = gfx_v9_0_ring_preempt_ib, .emit_frame_cntl = gfx_v9_0_ring_emit_frame_cntl, .emit_wreg = gfx_v9_0_ring_emit_wreg, @@ -6964,7 +6957,6 @@ static const struct amdgpu_ring_funcs gfx_v9_0_sw_ring_funcs_gfx = { .emit_switch_buffer = gfx_v9_ring_emit_sb, .emit_cntxcntl = gfx_v9_ring_emit_cntxcntl, .init_cond_exec = gfx_v9_0_ring_emit_init_cond_exec, - .patch_cond_exec = gfx_v9_0_ring_emit_patch_cond_exec, .emit_frame_cntl = gfx_v9_0_ring_emit_frame_cntl, .emit_wreg = gfx_v9_0_ring_emit_wreg, .emit_reg_wait = gfx_v9_0_ring_emit_reg_wait, @@ -6991,7 +6983,6 @@ static const struct amdgpu_ring_funcs gfx_v9_0_ring_funcs_compute = { 7 + /* gfx_v9_0_ring_emit_pipeline_sync */ SOC15_FLUSH_GPU_TLB_NUM_WREG * 5 + SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 7 + - 2 + /* gfx_v9_0_ring_emit_vm_flush */ 8 + 8 + 8 + /* gfx_v9_0_ring_emit_fence x3 for user fence, vm fence */ 7 + /* gfx_v9_0_emit_mem_sync */ 5 + /* gfx_v9_0_emit_wave_limit for updating mmSPI_WCL_PIPE_PERCENT_GFX register */ @@ -7029,7 +7020,6 @@ static const struct amdgpu_ring_funcs gfx_v9_0_ring_funcs_kiq = { 7 + /* gfx_v9_0_ring_emit_pipeline_sync */ SOC15_FLUSH_GPU_TLB_NUM_WREG * 5 + SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 7 + - 2 + /* gfx_v9_0_ring_emit_vm_flush */ 8 + 8 + 8, /* gfx_v9_0_ring_emit_fence_kiq x3 for user fence, vm fence */ .emit_ib_size = 7, /* gfx_v9_0_ring_emit_ib_compute */ .emit_fence = gfx_v9_0_ring_emit_fence_kiq, @@ -7051,7 +7041,7 @@ static void gfx_v9_0_set_ring_funcs(struct amdgpu_device *adev) for (i = 0; i < adev->gfx.num_gfx_rings; i++) adev->gfx.gfx_ring[i].funcs = &gfx_v9_0_ring_funcs_gfx; - if (adev->gfx.num_gfx_rings) { + if (adev->gfx.mcbp && adev->gfx.num_gfx_rings) { for (i = 0; i < GFX9_NUM_SW_GFX_RINGS; i++) adev->gfx.sw_gfx_ring[i].funcs = &gfx_v9_0_sw_ring_funcs_gfx; } diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_4.c b/drivers/gpu/drm/amd/amdgpu/gfx_v9_4.c index bc8416afb62c..f53b379d8971 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_4.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_4.c @@ -970,8 +970,9 @@ static void gfx_v9_4_reset_ras_error_count(struct amdgpu_device *adev) WREG32_SOC15(GC, 0, mmATC_L2_CACHE_4K_DSM_INDEX, 255); } -static const struct soc15_reg_entry gfx_v9_4_ea_err_status_regs = - { SOC15_REG_ENTRY(GC, 0, mmGCEA_ERR_STATUS), 0, 1, 32 }; +static const struct soc15_reg_entry gfx_v9_4_ea_err_status_regs = { + SOC15_REG_ENTRY(GC, 0, mmGCEA_ERR_STATUS), 0, 1, 32 +}; static void gfx_v9_4_query_ras_error_status(struct amdgpu_device *adev) { diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_2.c b/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_2.c index 065b2bd5f5a6..3f4fd2f08163 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_2.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_2.c @@ -1909,18 +1909,7 @@ static void gfx_v9_4_2_reset_sq_timeout_status(struct amdgpu_device *adev) mutex_unlock(&adev->grbm_idx_mutex); } -static bool gfx_v9_4_2_query_uctl2_poison_status(struct amdgpu_device *adev) -{ - u32 status = 0; - struct amdgpu_vmhub *hub; - - hub = &adev->vmhub[AMDGPU_GFXHUB(0)]; - status = RREG32(hub->vm_l2_pro_fault_status); - /* reset page fault status */ - WREG32_P(hub->vm_l2_pro_fault_cntl, 1, ~1); - return REG_GET_FIELD(status, VM_L2_PROTECTION_FAULT_STATUS, FED); -} struct amdgpu_ras_block_hw_ops gfx_v9_4_2_ras_ops = { .query_ras_error_count = &gfx_v9_4_2_query_ras_error_count, @@ -1934,5 +1923,4 @@ struct amdgpu_gfx_ras gfx_v9_4_2_ras = { .hw_ops = &gfx_v9_4_2_ras_ops, }, .enable_watchdog_timer = &gfx_v9_4_2_enable_watchdog_timer, - .query_utcl2_poison_status = gfx_v9_4_2_query_uctl2_poison_status, }; diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c b/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c index 131cddbdda0d..7b16e8cca86a 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c @@ -38,6 +38,7 @@ #include "gfx_v9_4_3.h" #include "amdgpu_xcp.h" +#include "amdgpu_aca.h" MODULE_FIRMWARE("amdgpu/gc_9_4_3_mec.bin"); MODULE_FIRMWARE("amdgpu/gc_9_4_3_rlc.bin"); @@ -48,6 +49,10 @@ MODULE_FIRMWARE("amdgpu/gc_9_4_3_rlc.bin"); #define GOLDEN_GB_ADDR_CONFIG 0x2a114042 #define CP_HQD_PERSISTENT_STATE_DEFAULT 0xbe05301 +#define mmSMNAID_XCD0_MCA_SMU 0x36430400 /* SMN AID XCD0 */ +#define mmSMNAID_XCD1_MCA_SMU 0x38430400 /* SMN AID XCD1 */ +#define mmSMNXCD_XCD0_MCA_SMU 0x40430400 /* SMN XCD XCD0 */ + struct amdgpu_gfx_ras gfx_v9_4_3_ras; static void gfx_v9_4_3_set_ring_funcs(struct amdgpu_device *adev); @@ -426,16 +431,16 @@ out: static int gfx_v9_4_3_init_microcode(struct amdgpu_device *adev) { - const char *chip_name; + char ucode_prefix[15]; int r; - chip_name = "gc_9_4_3"; + amdgpu_ucode_ip_version_decode(adev, GC_HWIP, ucode_prefix, sizeof(ucode_prefix)); - r = gfx_v9_4_3_init_rlc_microcode(adev, chip_name); + r = gfx_v9_4_3_init_rlc_microcode(adev, ucode_prefix); if (r) return r; - r = gfx_v9_4_3_init_cp_compute_microcode(adev, chip_name); + r = gfx_v9_4_3_init_cp_compute_microcode(adev, ucode_prefix); if (r) return r; @@ -675,6 +680,72 @@ static const struct amdgpu_gfx_funcs gfx_v9_4_3_gfx_funcs = { .ih_node_to_logical_xcc = &gfx_v9_4_3_ih_to_xcc_inst, }; +static int gfx_v9_4_3_aca_bank_parser(struct aca_handle *handle, + struct aca_bank *bank, enum aca_smu_type type, + void *data) +{ + struct aca_bank_info info; + u64 misc0; + u32 instlo; + int ret; + + ret = aca_bank_info_decode(bank, &info); + if (ret) + return ret; + + /* NOTE: overwrite info.die_id with xcd id for gfx */ + instlo = ACA_REG__IPID__INSTANCEIDLO(bank->regs[ACA_REG_IDX_IPID]); + instlo &= GENMASK(31, 1); + info.die_id = instlo == mmSMNAID_XCD0_MCA_SMU ? 0 : 1; + + misc0 = bank->regs[ACA_REG_IDX_MISC0]; + + switch (type) { + case ACA_SMU_TYPE_UE: + ret = aca_error_cache_log_bank_error(handle, &info, + ACA_ERROR_TYPE_UE, 1ULL); + break; + case ACA_SMU_TYPE_CE: + ret = aca_error_cache_log_bank_error(handle, &info, + ACA_ERROR_TYPE_CE, ACA_REG__MISC0__ERRCNT(misc0)); + break; + default: + return -EINVAL; + } + + return ret; +} + +static bool gfx_v9_4_3_aca_bank_is_valid(struct aca_handle *handle, struct aca_bank *bank, + enum aca_smu_type type, void *data) +{ + u32 instlo; + + instlo = ACA_REG__IPID__INSTANCEIDLO(bank->regs[ACA_REG_IDX_IPID]); + instlo &= GENMASK(31, 1); + switch (instlo) { + case mmSMNAID_XCD0_MCA_SMU: + case mmSMNAID_XCD1_MCA_SMU: + case mmSMNXCD_XCD0_MCA_SMU: + return true; + default: + break; + } + + return false; +} + +static const struct aca_bank_ops gfx_v9_4_3_aca_bank_ops = { + .aca_bank_parser = gfx_v9_4_3_aca_bank_parser, + .aca_bank_is_valid = gfx_v9_4_3_aca_bank_is_valid, +}; + +static const struct aca_info gfx_v9_4_3_aca_info = { + .hwip = ACA_HWIP_TYPE_SMU, + .mask = ACA_ERROR_UE_MASK | ACA_ERROR_CE_MASK, + .bank_ops = &gfx_v9_4_3_aca_bank_ops, +}; + static int gfx_v9_4_3_gpu_early_init(struct amdgpu_device *adev) { u32 gb_addr_config; @@ -778,7 +849,6 @@ static int gfx_v9_4_3_compute_ring_init(struct amdgpu_device *adev, int ring_id, static int gfx_v9_4_3_sw_init(void *handle) { int i, j, k, r, ring_id, xcc_id, num_xcc; - struct amdgpu_kiq *kiq; struct amdgpu_device *adev = (struct amdgpu_device *)handle; adev->gfx.mec.num_mec = 2; @@ -847,8 +917,7 @@ static int gfx_v9_4_3_sw_init(void *handle) return r; } - kiq = &adev->gfx.kiq[xcc_id]; - r = amdgpu_gfx_kiq_init_ring(adev, &kiq->ring, &kiq->irq, xcc_id); + r = amdgpu_gfx_kiq_init_ring(adev, xcc_id); if (r) return r; @@ -1109,7 +1178,7 @@ static int gfx_v9_4_3_rlc_init(struct amdgpu_device *adev) { /* init spm vmid with 0xf */ if (adev->gfx.rlc.funcs->update_spm_vmid) - adev->gfx.rlc.funcs->update_spm_vmid(adev, 0xf); + adev->gfx.rlc.funcs->update_spm_vmid(adev, NULL, 0xf); return 0; } @@ -1320,7 +1389,7 @@ static int gfx_v9_4_3_rlc_resume(struct amdgpu_device *adev) return 0; } -static void gfx_v9_4_3_update_spm_vmid(struct amdgpu_device *adev, +static void gfx_v9_4_3_update_spm_vmid(struct amdgpu_device *adev, struct amdgpu_ring *ring, unsigned vmid) { u32 reg, data; @@ -2335,10 +2404,10 @@ gfx_v9_4_3_xcc_update_coarse_grain_clock_gating(struct amdgpu_device *adev, if (def != data) WREG32_SOC15(GC, GET_INST(GC, xcc_id), regRLC_CGTT_MGCG_OVERRIDE, data); - /* enable cgcg FSM(0x0000363F) */ + /* CGCG Hysteresis: 400us */ def = RREG32_SOC15(GC, GET_INST(GC, xcc_id), regRLC_CGCG_CGLS_CTRL); - data = (0x36 + data = (0x2710 << RLC_CGCG_CGLS_CTRL__CGCG_GFX_IDLE_THRESHOLD__SHIFT) | RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK; if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGLS) @@ -2347,10 +2416,10 @@ gfx_v9_4_3_xcc_update_coarse_grain_clock_gating(struct amdgpu_device *adev, if (def != data) WREG32_SOC15(GC, GET_INST(GC, xcc_id), regRLC_CGCG_CGLS_CTRL, data); - /* set IDLE_POLL_COUNT(0x00900100) */ + /* set IDLE_POLL_COUNT(0x33450100)*/ def = RREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_RB_WPTR_POLL_CNTL); data = (0x0100 << CP_RB_WPTR_POLL_CNTL__POLL_FREQUENCY__SHIFT) | - (0x0090 << CP_RB_WPTR_POLL_CNTL__IDLE_POLL_COUNT__SHIFT); + (0x3345 << CP_RB_WPTR_POLL_CNTL__IDLE_POLL_COUNT__SHIFT); if (def != data) WREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_RB_WPTR_POLL_CNTL, data); } else { @@ -3888,6 +3957,9 @@ static void gfx_v9_4_3_inst_enable_watchdog_timer(struct amdgpu_device *adev, uint32_t i; uint32_t data; + if (amdgpu_sriov_vf(adev)) + return; + data = RREG32_SOC15(GC, GET_INST(GC, 0), regSQ_TIMEOUT_CONFIG); data = REG_SET_FIELD(data, SQ_TIMEOUT_CONFIG, TIMEOUT_FATAL_DISABLE, amdgpu_watchdog_timer.timeout_fatal_disable ? 1 : 0); @@ -3944,6 +4016,8 @@ static const struct amd_ip_funcs gfx_v9_4_3_ip_funcs = { .set_clockgating_state = gfx_v9_4_3_set_clockgating_state, .set_powergating_state = gfx_v9_4_3_set_powergating_state, .get_clockgating_state = gfx_v9_4_3_get_clockgating_state, + .dump_ip_state = NULL, + .print_ip_state = NULL, }; static const struct amdgpu_ring_funcs gfx_v9_4_3_ring_funcs_compute = { @@ -4242,9 +4316,32 @@ struct amdgpu_ras_block_hw_ops gfx_v9_4_3_ras_ops = { .reset_ras_error_count = &gfx_v9_4_3_reset_ras_error_count, }; +static int gfx_v9_4_3_ras_late_init(struct amdgpu_device *adev, struct ras_common_if *ras_block) +{ + int r; + + r = amdgpu_ras_block_late_init(adev, ras_block); + if (r) + return r; + + r = amdgpu_ras_bind_aca(adev, AMDGPU_RAS_BLOCK__GFX, + &gfx_v9_4_3_aca_info, + NULL); + if (r) + goto late_fini; + + return 0; + +late_fini: + amdgpu_ras_block_late_fini(adev, ras_block); + + return r; +} + struct amdgpu_gfx_ras gfx_v9_4_3_ras = { .ras_block = { .hw_ops = &gfx_v9_4_3_ras_ops, + .ras_late_init = &gfx_v9_4_3_ras_late_init, }, .enable_watchdog_timer = &gfx_v9_4_3_enable_watchdog_timer, }; diff --git a/drivers/gpu/drm/amd/amdgpu/gfxhub_v1_0.c b/drivers/gpu/drm/amd/amdgpu/gfxhub_v1_0.c index 22175da0e16a..d200310d1731 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfxhub_v1_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfxhub_v1_0.c @@ -443,6 +443,22 @@ static void gfxhub_v1_0_init(struct amdgpu_device *adev) mmVM_INVALIDATE_ENG0_ADDR_RANGE_LO32; } +static bool gfxhub_v1_0_query_utcl2_poison_status(struct amdgpu_device *adev, + int xcc_id) +{ + u32 status = 0; + struct amdgpu_vmhub *hub; + + if (amdgpu_ip_version(adev, GC_HWIP, 0) != IP_VERSION(9, 4, 2)) + return false; + + hub = &adev->vmhub[AMDGPU_GFXHUB(0)]; + status = RREG32(hub->vm_l2_pro_fault_status); + /* reset page fault status */ + WREG32_P(hub->vm_l2_pro_fault_cntl, 1, ~1); + + return REG_GET_FIELD(status, VM_L2_PROTECTION_FAULT_STATUS, FED); +} const struct amdgpu_gfxhub_funcs gfxhub_v1_0_funcs = { .get_mc_fb_offset = gfxhub_v1_0_get_mc_fb_offset, @@ -452,4 +468,5 @@ const struct amdgpu_gfxhub_funcs gfxhub_v1_0_funcs = { .set_fault_enable_default = gfxhub_v1_0_set_fault_enable_default, .init = gfxhub_v1_0_init, .get_xgmi_info = gfxhub_v1_1_get_xgmi_info, + .query_utcl2_poison_status = gfxhub_v1_0_query_utcl2_poison_status, }; diff --git a/drivers/gpu/drm/amd/amdgpu/gfxhub_v1_2.c b/drivers/gpu/drm/amd/amdgpu/gfxhub_v1_2.c index 49aecdcee006..77df8c9cbad2 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfxhub_v1_2.c +++ b/drivers/gpu/drm/amd/amdgpu/gfxhub_v1_2.c @@ -620,6 +620,20 @@ static int gfxhub_v1_2_get_xgmi_info(struct amdgpu_device *adev) return 0; } +static bool gfxhub_v1_2_query_utcl2_poison_status(struct amdgpu_device *adev, + int xcc_id) +{ + u32 fed, status; + + status = RREG32_SOC15(GC, GET_INST(GC, xcc_id), regVM_L2_PROTECTION_FAULT_STATUS); + fed = REG_GET_FIELD(status, VM_L2_PROTECTION_FAULT_STATUS, FED); + /* reset page fault status */ + WREG32_P(SOC15_REG_OFFSET(GC, GET_INST(GC, xcc_id), + regVM_L2_PROTECTION_FAULT_STATUS), 1, ~1); + + return fed; +} + const struct amdgpu_gfxhub_funcs gfxhub_v1_2_funcs = { .get_mc_fb_offset = gfxhub_v1_2_get_mc_fb_offset, .setup_vm_pt_regs = gfxhub_v1_2_setup_vm_pt_regs, @@ -628,6 +642,7 @@ const struct amdgpu_gfxhub_funcs gfxhub_v1_2_funcs = { .set_fault_enable_default = gfxhub_v1_2_set_fault_enable_default, .init = gfxhub_v1_2_init, .get_xgmi_info = gfxhub_v1_2_get_xgmi_info, + .query_utcl2_poison_status = gfxhub_v1_2_query_utcl2_poison_status, }; static int gfxhub_v1_2_xcp_resume(void *handle, uint32_t inst_mask) diff --git a/drivers/gpu/drm/amd/amdgpu/gfxhub_v2_1.c b/drivers/gpu/drm/amd/amdgpu/gfxhub_v2_1.c index cd0e8a321e46..17509f32f61a 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfxhub_v2_1.c +++ b/drivers/gpu/drm/amd/amdgpu/gfxhub_v2_1.c @@ -155,6 +155,9 @@ static void gfxhub_v2_1_init_system_aperture_regs(struct amdgpu_device *adev) { uint64_t value; + if (amdgpu_sriov_vf(adev)) + return; + /* Program the AGP BAR */ WREG32_SOC15(GC, 0, mmGCMC_VM_AGP_BASE, 0); WREG32_SOC15(GC, 0, mmGCMC_VM_AGP_BOT, adev->gmc.agp_start >> 24); diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c index 6c5185608854..d933e19e0cf5 100644 --- a/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c @@ -105,7 +105,7 @@ static int gmc_v10_0_process_interrupt(struct amdgpu_device *adev, struct amdgpu_vmhub *hub = &adev->vmhub[vmhub_index]; bool retry_fault = !!(entry->src_data[1] & 0x80); bool write_fault = !!(entry->src_data[1] & 0x20); - struct amdgpu_task_info task_info; + struct amdgpu_task_info *task_info; uint32_t status = 0; u64 addr; @@ -157,18 +157,22 @@ static int gmc_v10_0_process_interrupt(struct amdgpu_device *adev, if (!printk_ratelimit()) return 0; - memset(&task_info, 0, sizeof(struct amdgpu_task_info)); - amdgpu_vm_get_task_info(adev, entry->pasid, &task_info); - dev_err(adev->dev, - "[%s] page fault (src_id:%u ring:%u vmid:%u pasid:%u, for process %s pid %d thread %s pid %d)\n", + "[%s] page fault (src_id:%u ring:%u vmid:%u pasid:%u)\n", entry->vmid_src ? "mmhub" : "gfxhub", - entry->src_id, entry->ring_id, entry->vmid, - entry->pasid, task_info.process_name, task_info.tgid, - task_info.task_name, task_info.pid); + entry->src_id, entry->ring_id, entry->vmid, entry->pasid); + task_info = amdgpu_vm_get_task_info_pasid(adev, entry->pasid); + if (task_info) { + dev_err(adev->dev, + " in process %s pid %d thread %s pid %d\n", + task_info->process_name, task_info->tgid, + task_info->task_name, task_info->pid); + amdgpu_vm_put_task_info(task_info); + } + dev_err(adev->dev, " in page starting at address 0x%016llx from client 0x%x (%s)\n", - addr, entry->client_id, - soc15_ih_clientid_name[entry->client_id]); + addr, entry->client_id, + soc15_ih_clientid_name[entry->client_id]); if (!amdgpu_sriov_vf(adev)) hub->vmhub_funcs->print_l2_protection_fault_status(adev, @@ -262,16 +266,17 @@ static void gmc_v10_0_flush_gpu_tlb(struct amdgpu_device *adev, uint32_t vmid, /* flush hdp cache */ adev->hdp.funcs->flush_hdp(adev, NULL); - /* For SRIOV run time, driver shouldn't access the register through MMIO - * Directly use kiq to do the vm invalidation instead + /* This is necessary for SRIOV as well as for GFXOFF to function + * properly under bare metal */ if (adev->gfx.kiq[0].ring.sched.ready && !adev->enable_mes && (amdgpu_sriov_runtime(adev) || !amdgpu_sriov_vf(adev))) { - amdgpu_virt_kiq_reg_write_reg_wait(adev, req, ack, inv_req, - 1 << vmid, GET_INST(GC, 0)); + amdgpu_gmc_fw_reg_write_reg_wait(adev, req, ack, inv_req, + 1 << vmid, GET_INST(GC, 0)); return; } + /* This path is needed before KIQ/MES/GFXOFF are set up */ hub_ip = (vmhub == AMDGPU_GFXHUB(0)) ? GC_HWIP : MMHUB_HWIP; spin_lock(&adev->gmc.invalidate_lock); diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v11_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v11_0.c index c9c653cfc765..527dc917e049 100644 --- a/drivers/gpu/drm/amd/amdgpu/gmc_v11_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v11_0.c @@ -126,19 +126,24 @@ static int gmc_v11_0_process_interrupt(struct amdgpu_device *adev, } if (printk_ratelimit()) { - struct amdgpu_task_info task_info; - - memset(&task_info, 0, sizeof(struct amdgpu_task_info)); - amdgpu_vm_get_task_info(adev, entry->pasid, &task_info); + struct amdgpu_task_info *task_info; dev_err(adev->dev, - "[%s] page fault (src_id:%u ring:%u vmid:%u pasid:%u, for process %s pid %d thread %s pid %d)\n", + "[%s] page fault (src_id:%u ring:%u vmid:%u pasid:%u)\n", entry->vmid_src ? "mmhub" : "gfxhub", - entry->src_id, entry->ring_id, entry->vmid, - entry->pasid, task_info.process_name, task_info.tgid, - task_info.task_name, task_info.pid); + entry->src_id, entry->ring_id, entry->vmid, entry->pasid); + task_info = amdgpu_vm_get_task_info_pasid(adev, entry->pasid); + if (task_info) { + dev_err(adev->dev, + " in process %s pid %d thread %s pid %d)\n", + task_info->process_name, task_info->tgid, + task_info->task_name, task_info->pid); + amdgpu_vm_put_task_info(task_info); + } + dev_err(adev->dev, " in page starting at address 0x%016llx from client %d\n", - addr, entry->client_id); + addr, entry->client_id); + if (!amdgpu_sriov_vf(adev)) hub->vmhub_funcs->print_l2_protection_fault_status(adev, status); } @@ -223,16 +228,17 @@ static void gmc_v11_0_flush_gpu_tlb(struct amdgpu_device *adev, uint32_t vmid, /* flush hdp cache */ adev->hdp.funcs->flush_hdp(adev, NULL); - /* For SRIOV run time, driver shouldn't access the register through MMIO - * Directly use kiq to do the vm invalidation instead + /* This is necessary for SRIOV as well as for GFXOFF to function + * properly under bare metal */ if ((adev->gfx.kiq[0].ring.sched.ready || adev->mes.ring.sched.ready) && (amdgpu_sriov_runtime(adev) || !amdgpu_sriov_vf(adev))) { - amdgpu_virt_kiq_reg_write_reg_wait(adev, req, ack, inv_req, - 1 << vmid, GET_INST(GC, 0)); + amdgpu_gmc_fw_reg_write_reg_wait(adev, req, ack, inv_req, + 1 << vmid, GET_INST(GC, 0)); return; } + /* This path is needed before KIQ/MES/GFXOFF are set up */ hub_ip = (vmhub == AMDGPU_GFXHUB(0)) ? GC_HWIP : MMHUB_HWIP; spin_lock(&adev->gmc.invalidate_lock); @@ -570,6 +576,7 @@ static void gmc_v11_0_set_mmhub_funcs(struct amdgpu_device *adev) adev->mmhub.funcs = &mmhub_v3_0_2_funcs; break; case IP_VERSION(3, 3, 0): + case IP_VERSION(3, 3, 1): adev->mmhub.funcs = &mmhub_v3_3_funcs; break; default: @@ -585,6 +592,7 @@ static void gmc_v11_0_set_gfxhub_funcs(struct amdgpu_device *adev) adev->gfxhub.funcs = &gfxhub_v3_0_3_funcs; break; case IP_VERSION(11, 5, 0): + case IP_VERSION(11, 5, 1): adev->gfxhub.funcs = &gfxhub_v11_5_0_funcs; break; default: @@ -746,6 +754,7 @@ static int gmc_v11_0_sw_init(void *handle) case IP_VERSION(11, 0, 3): case IP_VERSION(11, 0, 4): case IP_VERSION(11, 5, 0): + case IP_VERSION(11, 5, 1): set_bit(AMDGPU_GFXHUB(0), adev->vmhubs_mask); set_bit(AMDGPU_MMHUB0(0), adev->vmhubs_mask); /* diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v6_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v6_0.c index 59d9215e5556..3e38d8bfcb69 100644 --- a/drivers/gpu/drm/amd/amdgpu/gmc_v6_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v6_0.c @@ -435,9 +435,10 @@ static void gmc_v6_0_set_prt(struct amdgpu_device *adev, bool enable) WREG32(mmVM_PRT_CNTL, tmp); if (enable) { - uint32_t low = AMDGPU_VA_RESERVED_SIZE >> AMDGPU_GPU_PAGE_SHIFT; + uint32_t low = AMDGPU_VA_RESERVED_BOTTOM >> + AMDGPU_GPU_PAGE_SHIFT; uint32_t high = adev->vm_manager.max_pfn - - (AMDGPU_VA_RESERVED_SIZE >> AMDGPU_GPU_PAGE_SHIFT); + (AMDGPU_VA_RESERVED_TOP >> AMDGPU_GPU_PAGE_SHIFT); WREG32(mmVM_PRT_APERTURE0_LOW_ADDR, low); WREG32(mmVM_PRT_APERTURE1_LOW_ADDR, low); @@ -1114,6 +1115,8 @@ static const struct amd_ip_funcs gmc_v6_0_ip_funcs = { .soft_reset = gmc_v6_0_soft_reset, .set_clockgating_state = gmc_v6_0_set_clockgating_state, .set_powergating_state = gmc_v6_0_set_powergating_state, + .dump_ip_state = NULL, + .print_ip_state = NULL, }; static const struct amdgpu_gmc_funcs gmc_v6_0_gmc_funcs = { diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c index 45a2f8e031a2..85df8fc81065 100644 --- a/drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c @@ -563,9 +563,10 @@ static void gmc_v7_0_set_prt(struct amdgpu_device *adev, bool enable) WREG32(mmVM_PRT_CNTL, tmp); if (enable) { - uint32_t low = AMDGPU_VA_RESERVED_SIZE >> AMDGPU_GPU_PAGE_SHIFT; + uint32_t low = AMDGPU_VA_RESERVED_BOTTOM >> + AMDGPU_GPU_PAGE_SHIFT; uint32_t high = adev->vm_manager.max_pfn - - (AMDGPU_VA_RESERVED_SIZE >> AMDGPU_GPU_PAGE_SHIFT); + (AMDGPU_VA_RESERVED_TOP >> AMDGPU_GPU_PAGE_SHIFT); WREG32(mmVM_PRT_APERTURE0_LOW_ADDR, low); WREG32(mmVM_PRT_APERTURE1_LOW_ADDR, low); @@ -1353,6 +1354,8 @@ static const struct amd_ip_funcs gmc_v7_0_ip_funcs = { .soft_reset = gmc_v7_0_soft_reset, .set_clockgating_state = gmc_v7_0_set_clockgating_state, .set_powergating_state = gmc_v7_0_set_powergating_state, + .dump_ip_state = NULL, + .print_ip_state = NULL, }; static const struct amdgpu_gmc_funcs gmc_v7_0_gmc_funcs = { diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c index 4422b27a3cc2..fc97757e33d9 100644 --- a/drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c @@ -777,9 +777,10 @@ static void gmc_v8_0_set_prt(struct amdgpu_device *adev, bool enable) WREG32(mmVM_PRT_CNTL, tmp); if (enable) { - uint32_t low = AMDGPU_VA_RESERVED_SIZE >> AMDGPU_GPU_PAGE_SHIFT; + uint32_t low = AMDGPU_VA_RESERVED_BOTTOM >> + AMDGPU_GPU_PAGE_SHIFT; uint32_t high = adev->vm_manager.max_pfn - - (AMDGPU_VA_RESERVED_SIZE >> AMDGPU_GPU_PAGE_SHIFT); + (AMDGPU_VA_RESERVED_TOP >> AMDGPU_GPU_PAGE_SHIFT); WREG32(mmVM_PRT_APERTURE0_LOW_ADDR, low); WREG32(mmVM_PRT_APERTURE1_LOW_ADDR, low); @@ -1444,18 +1445,24 @@ static int gmc_v8_0_process_interrupt(struct amdgpu_device *adev, gmc_v8_0_set_fault_enable_default(adev, false); if (printk_ratelimit()) { - struct amdgpu_task_info task_info; + struct amdgpu_task_info *task_info; - memset(&task_info, 0, sizeof(struct amdgpu_task_info)); - amdgpu_vm_get_task_info(adev, entry->pasid, &task_info); + dev_err(adev->dev, "GPU fault detected: %d 0x%08x\n", + entry->src_id, entry->src_data[0]); + + task_info = amdgpu_vm_get_task_info_pasid(adev, entry->pasid); + if (task_info) { + dev_err(adev->dev, " for process %s pid %d thread %s pid %d\n", + task_info->process_name, task_info->tgid, + task_info->task_name, task_info->pid); + amdgpu_vm_put_task_info(task_info); + } - dev_err(adev->dev, "GPU fault detected: %d 0x%08x for process %s pid %d thread %s pid %d\n", - entry->src_id, entry->src_data[0], task_info.process_name, - task_info.tgid, task_info.task_name, task_info.pid); dev_err(adev->dev, " VM_CONTEXT1_PROTECTION_FAULT_ADDR 0x%08X\n", - addr); + addr); dev_err(adev->dev, " VM_CONTEXT1_PROTECTION_FAULT_STATUS 0x%08X\n", status); + gmc_v8_0_vm_decode_fault(adev, status, addr, mc_client, entry->pasid); } @@ -1710,6 +1717,8 @@ static const struct amd_ip_funcs gmc_v8_0_ip_funcs = { .set_clockgating_state = gmc_v8_0_set_clockgating_state, .set_powergating_state = gmc_v8_0_set_powergating_state, .get_clockgating_state = gmc_v8_0_get_clockgating_state, + .dump_ip_state = NULL, + .print_ip_state = NULL, }; static const struct amdgpu_gmc_funcs gmc_v8_0_gmc_funcs = { diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c index e67a62db9e12..c4ec1358f3aa 100644 --- a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c @@ -496,14 +496,14 @@ static int gmc_v9_0_vm_fault_interrupt_state(struct amdgpu_device *adev, if (j >= AMDGPU_MMHUB0(0)) tmp = RREG32_SOC15_IP(MMHUB, reg); else - tmp = RREG32_SOC15_IP(GC, reg); + tmp = RREG32_XCC(reg, j); tmp &= ~bits; if (j >= AMDGPU_MMHUB0(0)) WREG32_SOC15_IP(MMHUB, reg, tmp); else - WREG32_SOC15_IP(GC, reg, tmp); + WREG32_XCC(reg, tmp, j); } } break; @@ -524,14 +524,14 @@ static int gmc_v9_0_vm_fault_interrupt_state(struct amdgpu_device *adev, if (j >= AMDGPU_MMHUB0(0)) tmp = RREG32_SOC15_IP(MMHUB, reg); else - tmp = RREG32_SOC15_IP(GC, reg); + tmp = RREG32_XCC(reg, j); tmp |= bits; if (j >= AMDGPU_MMHUB0(0)) WREG32_SOC15_IP(MMHUB, reg, tmp); else - WREG32_SOC15_IP(GC, reg, tmp); + WREG32_XCC(reg, tmp, j); } } break; @@ -548,8 +548,8 @@ static int gmc_v9_0_process_interrupt(struct amdgpu_device *adev, { bool retry_fault = !!(entry->src_data[1] & 0x80); bool write_fault = !!(entry->src_data[1] & 0x20); - uint32_t status = 0, cid = 0, rw = 0; - struct amdgpu_task_info task_info; + uint32_t status = 0, cid = 0, rw = 0, fed = 0; + struct amdgpu_task_info *task_info; struct amdgpu_vmhub *hub; const char *mmhub_cid; const char *hub_name; @@ -626,15 +626,20 @@ static int gmc_v9_0_process_interrupt(struct amdgpu_device *adev, if (!printk_ratelimit()) return 0; - memset(&task_info, 0, sizeof(struct amdgpu_task_info)); - amdgpu_vm_get_task_info(adev, entry->pasid, &task_info); - dev_err(adev->dev, - "[%s] %s page fault (src_id:%u ring:%u vmid:%u pasid:%u, for process %s pid %d thread %s pid %d)\n", - hub_name, retry_fault ? "retry" : "no-retry", - entry->src_id, entry->ring_id, entry->vmid, - entry->pasid, task_info.process_name, task_info.tgid, - task_info.task_name, task_info.pid); + "[%s] %s page fault (src_id:%u ring:%u vmid:%u pasid:%u)\n", hub_name, + retry_fault ? "retry" : "no-retry", + entry->src_id, entry->ring_id, entry->vmid, entry->pasid); + + task_info = amdgpu_vm_get_task_info_pasid(adev, entry->pasid); + if (task_info) { + dev_err(adev->dev, + " for process %s pid %d thread %s pid %d)\n", + task_info->process_name, task_info->tgid, + task_info->task_name, task_info->pid); + amdgpu_vm_put_task_info(task_info); + } + dev_err(adev->dev, " in page starting at address 0x%016llx from IH client 0x%x (%s)\n", addr, entry->client_id, soc15_ih_clientid_name[entry->client_id]); @@ -659,6 +664,13 @@ static int gmc_v9_0_process_interrupt(struct amdgpu_device *adev, status = RREG32(hub->vm_l2_pro_fault_status); cid = REG_GET_FIELD(status, VM_L2_PROTECTION_FAULT_STATUS, CID); rw = REG_GET_FIELD(status, VM_L2_PROTECTION_FAULT_STATUS, RW); + fed = REG_GET_FIELD(status, VM_L2_PROTECTION_FAULT_STATUS, FED); + + /* for fed error, kfd will handle it, return directly */ + if (fed && amdgpu_ras_is_poison_mode_supported(adev) && + (amdgpu_ip_version(adev, GC_HWIP, 0) >= IP_VERSION(9, 4, 2))) + return 0; + WREG32_P(hub->vm_l2_pro_fault_cntl, 1, ~1); amdgpu_vm_update_fault_cache(adev, entry->pasid, addr, status, vmhub); @@ -829,23 +841,25 @@ static void gmc_v9_0_flush_gpu_tlb(struct amdgpu_device *adev, uint32_t vmid, req = hub->vm_inv_eng0_req + hub->eng_distance * eng; ack = hub->vm_inv_eng0_ack + hub->eng_distance * eng; - /* This is necessary for a HW workaround under SRIOV as well - * as GFXOFF under bare metal - */ if (vmhub >= AMDGPU_MMHUB0(0)) inst = GET_INST(GC, 0); else inst = vmhub; + + /* This is necessary for SRIOV as well as for GFXOFF to function + * properly under bare metal + */ if (adev->gfx.kiq[inst].ring.sched.ready && (amdgpu_sriov_runtime(adev) || !amdgpu_sriov_vf(adev))) { uint32_t req = hub->vm_inv_eng0_req + hub->eng_distance * eng; uint32_t ack = hub->vm_inv_eng0_ack + hub->eng_distance * eng; - amdgpu_virt_kiq_reg_write_reg_wait(adev, req, ack, inv_req, - 1 << vmid, inst); + amdgpu_gmc_fw_reg_write_reg_wait(adev, req, ack, inv_req, + 1 << vmid, inst); return; } + /* This path is needed before KIQ/MES/GFXOFF are set up */ spin_lock(&adev->gmc.invalidate_lock); /* @@ -1443,7 +1457,6 @@ static void gmc_v9_0_set_umc_funcs(struct amdgpu_device *adev) adev->umc.channel_offs = UMC_V12_0_PER_CHANNEL_OFFSET; adev->umc.active_mask = adev->aid_mask; adev->umc.retire_unit = UMC_V12_0_BAD_PAGE_NUM_PER_CHANNEL; - adev->umc.channel_idx_tbl = &umc_v12_0_channel_idx_tbl[0][0][0]; if (!adev->gmc.xgmi.connected_to_cpu && !adev->gmc.is_app_apu) adev->umc.ras = &umc_v12_0_ras; break; diff --git a/drivers/gpu/drm/amd/amdgpu/hdp_v7_0.c b/drivers/gpu/drm/amd/amdgpu/hdp_v7_0.c new file mode 100644 index 000000000000..8d7d0813e331 --- /dev/null +++ b/drivers/gpu/drm/amd/amdgpu/hdp_v7_0.c @@ -0,0 +1,142 @@ +/* + * Copyright 2023 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + */ +#include "amdgpu.h" +#include "amdgpu_atombios.h" +#include "hdp_v7_0.h" + +#include "hdp/hdp_7_0_0_offset.h" +#include "hdp/hdp_7_0_0_sh_mask.h" +#include <uapi/linux/kfd_ioctl.h> + +static void hdp_v7_0_flush_hdp(struct amdgpu_device *adev, + struct amdgpu_ring *ring) +{ + if (!ring || !ring->funcs->emit_wreg) + WREG32_NO_KIQ((adev->rmmio_remap.reg_offset + KFD_MMIO_REMAP_HDP_MEM_FLUSH_CNTL) >> 2, 0); + else + amdgpu_ring_emit_wreg(ring, (adev->rmmio_remap.reg_offset + KFD_MMIO_REMAP_HDP_MEM_FLUSH_CNTL) >> 2, 0); +} + +static void hdp_v7_0_update_clock_gating(struct amdgpu_device *adev, + bool enable) +{ + uint32_t hdp_clk_cntl, hdp_clk_cntl1; + uint32_t hdp_mem_pwr_cntl; + + if (!(adev->cg_flags & (AMD_CG_SUPPORT_HDP_LS | + AMD_CG_SUPPORT_HDP_DS | + AMD_CG_SUPPORT_HDP_SD))) + return; + + hdp_clk_cntl = hdp_clk_cntl1 = RREG32_SOC15(HDP, 0,regHDP_CLK_CNTL); + hdp_mem_pwr_cntl = RREG32_SOC15(HDP, 0, regHDP_MEM_POWER_CTRL); + + /* Before doing clock/power mode switch, + * forced on IPH & RC clock */ + hdp_clk_cntl = REG_SET_FIELD(hdp_clk_cntl, HDP_CLK_CNTL, + RC_MEM_CLK_SOFT_OVERRIDE, 1); + WREG32_SOC15(HDP, 0, regHDP_CLK_CNTL, hdp_clk_cntl); + + /* disable clock and power gating before any changing */ + hdp_mem_pwr_cntl = REG_SET_FIELD(hdp_mem_pwr_cntl, HDP_MEM_POWER_CTRL, + ATOMIC_MEM_POWER_CTRL_EN, 0); + hdp_mem_pwr_cntl = REG_SET_FIELD(hdp_mem_pwr_cntl, HDP_MEM_POWER_CTRL, + ATOMIC_MEM_POWER_LS_EN, 0); + hdp_mem_pwr_cntl = REG_SET_FIELD(hdp_mem_pwr_cntl, HDP_MEM_POWER_CTRL, + ATOMIC_MEM_POWER_DS_EN, 0); + hdp_mem_pwr_cntl = REG_SET_FIELD(hdp_mem_pwr_cntl, HDP_MEM_POWER_CTRL, + ATOMIC_MEM_POWER_SD_EN, 0); + hdp_mem_pwr_cntl = REG_SET_FIELD(hdp_mem_pwr_cntl, HDP_MEM_POWER_CTRL, + RC_MEM_POWER_CTRL_EN, 0); + hdp_mem_pwr_cntl = REG_SET_FIELD(hdp_mem_pwr_cntl, HDP_MEM_POWER_CTRL, + RC_MEM_POWER_LS_EN, 0); + hdp_mem_pwr_cntl = REG_SET_FIELD(hdp_mem_pwr_cntl, HDP_MEM_POWER_CTRL, + RC_MEM_POWER_DS_EN, 0); + hdp_mem_pwr_cntl = REG_SET_FIELD(hdp_mem_pwr_cntl, HDP_MEM_POWER_CTRL, + RC_MEM_POWER_SD_EN, 0); + WREG32_SOC15(HDP, 0, regHDP_MEM_POWER_CTRL, hdp_mem_pwr_cntl); + + /* Already disabled above. The actions below are for "enabled" only */ + if (enable) { + /* only one clock gating mode (LS/DS/SD) can be enabled */ + if (adev->cg_flags & AMD_CG_SUPPORT_HDP_SD) { + hdp_mem_pwr_cntl = REG_SET_FIELD(hdp_mem_pwr_cntl, + HDP_MEM_POWER_CTRL, + ATOMIC_MEM_POWER_SD_EN, 1); + hdp_mem_pwr_cntl = REG_SET_FIELD(hdp_mem_pwr_cntl, + HDP_MEM_POWER_CTRL, + RC_MEM_POWER_SD_EN, 1); + } else if (adev->cg_flags & AMD_CG_SUPPORT_HDP_LS) { + hdp_mem_pwr_cntl = REG_SET_FIELD(hdp_mem_pwr_cntl, + HDP_MEM_POWER_CTRL, + ATOMIC_MEM_POWER_LS_EN, 1); + hdp_mem_pwr_cntl = REG_SET_FIELD(hdp_mem_pwr_cntl, + HDP_MEM_POWER_CTRL, + RC_MEM_POWER_LS_EN, 1); + } else if (adev->cg_flags & AMD_CG_SUPPORT_HDP_DS) { + hdp_mem_pwr_cntl = REG_SET_FIELD(hdp_mem_pwr_cntl, + HDP_MEM_POWER_CTRL, + ATOMIC_MEM_POWER_DS_EN, 1); + hdp_mem_pwr_cntl = REG_SET_FIELD(hdp_mem_pwr_cntl, + HDP_MEM_POWER_CTRL, + RC_MEM_POWER_DS_EN, 1); + } + + /* confirmed that IPH_MEM_POWER_CTRL_EN and RC_MEM_POWER_CTRL_EN have to + * be set for SRAM LS/DS/SD */ + if (adev->cg_flags & (AMD_CG_SUPPORT_HDP_LS | AMD_CG_SUPPORT_HDP_DS | + AMD_CG_SUPPORT_HDP_SD)) { + hdp_mem_pwr_cntl = REG_SET_FIELD(hdp_mem_pwr_cntl, HDP_MEM_POWER_CTRL, + ATOMIC_MEM_POWER_CTRL_EN, 1); + hdp_mem_pwr_cntl = REG_SET_FIELD(hdp_mem_pwr_cntl, HDP_MEM_POWER_CTRL, + RC_MEM_POWER_CTRL_EN, 1); + WREG32_SOC15(HDP, 0, regHDP_MEM_POWER_CTRL, hdp_mem_pwr_cntl); + } + } + + /* disable IPH & RC clock override after clock/power mode changing */ + hdp_clk_cntl = REG_SET_FIELD(hdp_clk_cntl, HDP_CLK_CNTL, + RC_MEM_CLK_SOFT_OVERRIDE, 0); + WREG32_SOC15(HDP, 0, regHDP_CLK_CNTL, hdp_clk_cntl); +} + +static void hdp_v7_0_get_clockgating_state(struct amdgpu_device *adev, + u64 *flags) +{ + uint32_t tmp; + + /* AMD_CG_SUPPORT_HDP_LS/DS/SD */ + tmp = RREG32_SOC15(HDP, 0, regHDP_MEM_POWER_CTRL); + if (tmp & HDP_MEM_POWER_CTRL__ATOMIC_MEM_POWER_LS_EN_MASK) + *flags |= AMD_CG_SUPPORT_HDP_LS; + else if (tmp & HDP_MEM_POWER_CTRL__ATOMIC_MEM_POWER_DS_EN_MASK) + *flags |= AMD_CG_SUPPORT_HDP_DS; + else if (tmp & HDP_MEM_POWER_CTRL__ATOMIC_MEM_POWER_SD_EN_MASK) + *flags |= AMD_CG_SUPPORT_HDP_SD; +} + +const struct amdgpu_hdp_funcs hdp_v7_0_funcs = { + .flush_hdp = hdp_v7_0_flush_hdp, + .update_clock_gating = hdp_v7_0_update_clock_gating, + .get_clock_gating_state = hdp_v7_0_get_clockgating_state, +}; diff --git a/drivers/gpu/drm/amd/amdgpu/hdp_v7_0.h b/drivers/gpu/drm/amd/amdgpu/hdp_v7_0.h new file mode 100644 index 000000000000..25b69201402d --- /dev/null +++ b/drivers/gpu/drm/amd/amdgpu/hdp_v7_0.h @@ -0,0 +1,31 @@ +/* + * Copyright 2023 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + */ + +#ifndef __HDP_V7_0_H__ +#define __HDP_V7_0_H__ + +#include "soc15_common.h" + +extern const struct amdgpu_hdp_funcs hdp_v7_0_funcs; + +#endif diff --git a/drivers/gpu/drm/amd/amdgpu/iceland_ih.c b/drivers/gpu/drm/amd/amdgpu/iceland_ih.c index 2c02ae69883d..07984f7c3ae7 100644 --- a/drivers/gpu/drm/amd/amdgpu/iceland_ih.c +++ b/drivers/gpu/drm/amd/amdgpu/iceland_ih.c @@ -425,6 +425,8 @@ static const struct amd_ip_funcs iceland_ih_ip_funcs = { .soft_reset = iceland_ih_soft_reset, .set_clockgating_state = iceland_ih_set_clockgating_state, .set_powergating_state = iceland_ih_set_powergating_state, + .dump_ip_state = NULL, + .print_ip_state = NULL, }; static const struct amdgpu_ih_funcs iceland_ih_funcs = { diff --git a/drivers/gpu/drm/amd/amdgpu/ih_v6_0.c b/drivers/gpu/drm/amd/amdgpu/ih_v6_0.c index ad4ad39f128f..3cb64c8f7175 100644 --- a/drivers/gpu/drm/amd/amdgpu/ih_v6_0.c +++ b/drivers/gpu/drm/amd/amdgpu/ih_v6_0.c @@ -346,6 +346,21 @@ static int ih_v6_0_irq_init(struct amdgpu_device *adev) DELAY, 3); WREG32_SOC15(OSSSYS, 0, regIH_MSI_STORM_CTRL, tmp); + /* Redirect the interrupts to IH RB1 for dGPU */ + if (adev->irq.ih1.ring_size) { + tmp = RREG32_SOC15(OSSSYS, 0, regIH_RING1_CLIENT_CFG_INDEX); + tmp = REG_SET_FIELD(tmp, IH_RING1_CLIENT_CFG_INDEX, INDEX, 0); + WREG32_SOC15(OSSSYS, 0, regIH_RING1_CLIENT_CFG_INDEX, tmp); + + tmp = RREG32_SOC15(OSSSYS, 0, regIH_RING1_CLIENT_CFG_DATA); + tmp = REG_SET_FIELD(tmp, IH_RING1_CLIENT_CFG_DATA, CLIENT_ID, 0xa); + tmp = REG_SET_FIELD(tmp, IH_RING1_CLIENT_CFG_DATA, SOURCE_ID, 0x0); + tmp = REG_SET_FIELD(tmp, IH_RING1_CLIENT_CFG_DATA, + SOURCE_ID_MATCH_ENABLE, 0x1); + + WREG32_SOC15(OSSSYS, 0, regIH_RING1_CLIENT_CFG_DATA, tmp); + } + pci_set_master(adev->pdev); /* enable interrupts */ @@ -549,8 +564,15 @@ static int ih_v6_0_sw_init(void *handle) adev->irq.ih.use_doorbell = true; adev->irq.ih.doorbell_index = adev->doorbell_index.ih << 1; - adev->irq.ih1.ring_size = 0; - adev->irq.ih2.ring_size = 0; + if (!(adev->flags & AMD_IS_APU)) { + r = amdgpu_ih_ring_init(adev, &adev->irq.ih1, IH_RING_SIZE, + use_bus_addr); + if (r) + return r; + + adev->irq.ih1.use_doorbell = true; + adev->irq.ih1.doorbell_index = (adev->doorbell_index.ih + 1) << 1; + } /* initialize ih control register offset */ ih_v6_0_init_register_offset(adev); @@ -748,6 +770,8 @@ static const struct amd_ip_funcs ih_v6_0_ip_funcs = { .set_clockgating_state = ih_v6_0_set_clockgating_state, .set_powergating_state = ih_v6_0_set_powergating_state, .get_clockgating_state = ih_v6_0_get_clockgating_state, + .dump_ip_state = NULL, + .print_ip_state = NULL, }; static const struct amdgpu_ih_funcs ih_v6_0_funcs = { diff --git a/drivers/gpu/drm/amd/amdgpu/ih_v6_1.c b/drivers/gpu/drm/amd/amdgpu/ih_v6_1.c index b8da0fc29378..0fbf5fa7b0f8 100644 --- a/drivers/gpu/drm/amd/amdgpu/ih_v6_1.c +++ b/drivers/gpu/drm/amd/amdgpu/ih_v6_1.c @@ -346,6 +346,21 @@ static int ih_v6_1_irq_init(struct amdgpu_device *adev) DELAY, 3); WREG32_SOC15(OSSSYS, 0, regIH_MSI_STORM_CTRL, tmp); + /* Redirect the interrupts to IH RB1 for dGPU */ + if (adev->irq.ih1.ring_size) { + tmp = RREG32_SOC15(OSSSYS, 0, regIH_RING1_CLIENT_CFG_INDEX); + tmp = REG_SET_FIELD(tmp, IH_RING1_CLIENT_CFG_INDEX, INDEX, 0); + WREG32_SOC15(OSSSYS, 0, regIH_RING1_CLIENT_CFG_INDEX, tmp); + + tmp = RREG32_SOC15(OSSSYS, 0, regIH_RING1_CLIENT_CFG_DATA); + tmp = REG_SET_FIELD(tmp, IH_RING1_CLIENT_CFG_DATA, CLIENT_ID, 0xa); + tmp = REG_SET_FIELD(tmp, IH_RING1_CLIENT_CFG_DATA, SOURCE_ID, 0x0); + tmp = REG_SET_FIELD(tmp, IH_RING1_CLIENT_CFG_DATA, + SOURCE_ID_MATCH_ENABLE, 0x1); + + WREG32_SOC15(OSSSYS, 0, regIH_RING1_CLIENT_CFG_DATA, tmp); + } + pci_set_master(adev->pdev); /* enable interrupts */ @@ -550,8 +565,15 @@ static int ih_v6_1_sw_init(void *handle) adev->irq.ih.use_doorbell = true; adev->irq.ih.doorbell_index = adev->doorbell_index.ih << 1; - adev->irq.ih1.ring_size = 0; - adev->irq.ih2.ring_size = 0; + if (!(adev->flags & AMD_IS_APU)) { + r = amdgpu_ih_ring_init(adev, &adev->irq.ih1, IH_RING_SIZE, + use_bus_addr); + if (r) + return r; + + adev->irq.ih1.use_doorbell = true; + adev->irq.ih1.doorbell_index = (adev->doorbell_index.ih + 1) << 1; + } /* initialize ih control register offset */ ih_v6_1_init_register_offset(adev); @@ -753,6 +775,8 @@ static const struct amd_ip_funcs ih_v6_1_ip_funcs = { .set_clockgating_state = ih_v6_1_set_clockgating_state, .set_powergating_state = ih_v6_1_set_powergating_state, .get_clockgating_state = ih_v6_1_get_clockgating_state, + .dump_ip_state = NULL, + .print_ip_state = NULL, }; static const struct amdgpu_ih_funcs ih_v6_1_funcs = { diff --git a/drivers/gpu/drm/amd/amdgpu/ih_v7_0.c b/drivers/gpu/drm/amd/amdgpu/ih_v7_0.c new file mode 100644 index 000000000000..aa6235dd4f2b --- /dev/null +++ b/drivers/gpu/drm/amd/amdgpu/ih_v7_0.c @@ -0,0 +1,775 @@ +/* + * Copyright 2023 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + */ + +#include <linux/pci.h> + +#include "amdgpu.h" +#include "amdgpu_ih.h" + +#include "oss/osssys_7_0_0_offset.h" +#include "oss/osssys_7_0_0_sh_mask.h" + +#include "soc15_common.h" +#include "ih_v7_0.h" + +#define MAX_REARM_RETRY 10 + +static void ih_v7_0_set_interrupt_funcs(struct amdgpu_device *adev); + +/** + * ih_v7_0_init_register_offset - Initialize register offset for ih rings + * + * @adev: amdgpu_device pointer + * + * Initialize register offset ih rings (IH_V7_0). + */ +static void ih_v7_0_init_register_offset(struct amdgpu_device *adev) +{ + struct amdgpu_ih_regs *ih_regs; + + /* ih ring 2 is removed + * ih ring and ih ring 1 are available */ + if (adev->irq.ih.ring_size) { + ih_regs = &adev->irq.ih.ih_regs; + ih_regs->ih_rb_base = SOC15_REG_OFFSET(OSSSYS, 0, regIH_RB_BASE); + ih_regs->ih_rb_base_hi = SOC15_REG_OFFSET(OSSSYS, 0, regIH_RB_BASE_HI); + ih_regs->ih_rb_cntl = SOC15_REG_OFFSET(OSSSYS, 0, regIH_RB_CNTL); + ih_regs->ih_rb_wptr = SOC15_REG_OFFSET(OSSSYS, 0, regIH_RB_WPTR); + ih_regs->ih_rb_rptr = SOC15_REG_OFFSET(OSSSYS, 0, regIH_RB_RPTR); + ih_regs->ih_doorbell_rptr = SOC15_REG_OFFSET(OSSSYS, 0, regIH_DOORBELL_RPTR); + ih_regs->ih_rb_wptr_addr_lo = SOC15_REG_OFFSET(OSSSYS, 0, regIH_RB_WPTR_ADDR_LO); + ih_regs->ih_rb_wptr_addr_hi = SOC15_REG_OFFSET(OSSSYS, 0, regIH_RB_WPTR_ADDR_HI); + ih_regs->psp_reg_id = PSP_REG_IH_RB_CNTL; + } + + if (adev->irq.ih1.ring_size) { + ih_regs = &adev->irq.ih1.ih_regs; + ih_regs->ih_rb_base = SOC15_REG_OFFSET(OSSSYS, 0, regIH_RB_BASE_RING1); + ih_regs->ih_rb_base_hi = SOC15_REG_OFFSET(OSSSYS, 0, regIH_RB_BASE_HI_RING1); + ih_regs->ih_rb_cntl = SOC15_REG_OFFSET(OSSSYS, 0, regIH_RB_CNTL_RING1); + ih_regs->ih_rb_wptr = SOC15_REG_OFFSET(OSSSYS, 0, regIH_RB_WPTR_RING1); + ih_regs->ih_rb_rptr = SOC15_REG_OFFSET(OSSSYS, 0, regIH_RB_RPTR_RING1); + ih_regs->ih_doorbell_rptr = SOC15_REG_OFFSET(OSSSYS, 0, regIH_DOORBELL_RPTR_RING1); + ih_regs->psp_reg_id = PSP_REG_IH_RB_CNTL_RING1; + } +} + +/** + * force_update_wptr_for_self_int - Force update the wptr for self interrupt + * + * @adev: amdgpu_device pointer + * @threshold: threshold to trigger the wptr reporting + * @timeout: timeout to trigger the wptr reporting + * @enabled: Enable/disable timeout flush mechanism + * + * threshold input range: 0 ~ 15, default 0, + * real_threshold = 2^threshold + * timeout input range: 0 ~ 20, default 8, + * real_timeout = (2^timeout) * 1024 / (socclk_freq) + * + * Force update wptr for self interrupt ( >= SIENNA_CICHLID). + */ +static void +force_update_wptr_for_self_int(struct amdgpu_device *adev, + u32 threshold, u32 timeout, bool enabled) +{ + u32 ih_cntl, ih_rb_cntl; + + ih_cntl = RREG32_SOC15(OSSSYS, 0, regIH_CNTL2); + ih_rb_cntl = RREG32_SOC15(OSSSYS, 0, regIH_RB_CNTL_RING1); + + ih_cntl = REG_SET_FIELD(ih_cntl, IH_CNTL2, + SELF_IV_FORCE_WPTR_UPDATE_TIMEOUT, timeout); + ih_cntl = REG_SET_FIELD(ih_cntl, IH_CNTL2, + SELF_IV_FORCE_WPTR_UPDATE_ENABLE, enabled); + ih_rb_cntl = REG_SET_FIELD(ih_rb_cntl, IH_RB_CNTL_RING1, + RB_USED_INT_THRESHOLD, threshold); + + if (amdgpu_sriov_vf(adev) && amdgpu_sriov_reg_indirect_ih(adev)) { + if (psp_reg_program(&adev->psp, PSP_REG_IH_RB_CNTL_RING1, ih_rb_cntl)) + return; + } else { + WREG32_SOC15(OSSSYS, 0, regIH_RB_CNTL_RING1, ih_rb_cntl); + } + + WREG32_SOC15(OSSSYS, 0, regIH_CNTL2, ih_cntl); +} + +/** + * ih_v7_0_toggle_ring_interrupts - toggle the interrupt ring buffer + * + * @adev: amdgpu_device pointer + * @ih: amdgpu_ih_ring pointet + * @enable: true - enable the interrupts, false - disable the interrupts + * + * Toggle the interrupt ring buffer (IH_V7_0) + */ +static int ih_v7_0_toggle_ring_interrupts(struct amdgpu_device *adev, + struct amdgpu_ih_ring *ih, + bool enable) +{ + struct amdgpu_ih_regs *ih_regs; + uint32_t tmp; + + ih_regs = &ih->ih_regs; + + tmp = RREG32(ih_regs->ih_rb_cntl); + tmp = REG_SET_FIELD(tmp, IH_RB_CNTL, RB_ENABLE, (enable ? 1 : 0)); + /* enable_intr field is only valid in ring0 */ + if (ih == &adev->irq.ih) + tmp = REG_SET_FIELD(tmp, IH_RB_CNTL, ENABLE_INTR, (enable ? 1 : 0)); + + if (amdgpu_sriov_vf(adev) && amdgpu_sriov_reg_indirect_ih(adev)) { + if (psp_reg_program(&adev->psp, ih_regs->psp_reg_id, tmp)) + return -ETIMEDOUT; + } else { + WREG32(ih_regs->ih_rb_cntl, tmp); + } + + if (enable) { + ih->enabled = true; + } else { + /* set rptr, wptr to 0 */ + WREG32(ih_regs->ih_rb_rptr, 0); + WREG32(ih_regs->ih_rb_wptr, 0); + ih->enabled = false; + ih->rptr = 0; + } + + return 0; +} + +/** + * ih_v7_0_toggle_interrupts - Toggle all the available interrupt ring buffers + * + * @adev: amdgpu_device pointer + * @enable: enable or disable interrupt ring buffers + * + * Toggle all the available interrupt ring buffers (IH_V7_0). + */ +static int ih_v7_0_toggle_interrupts(struct amdgpu_device *adev, bool enable) +{ + struct amdgpu_ih_ring *ih[] = {&adev->irq.ih, &adev->irq.ih1}; + int i; + int r; + + for (i = 0; i < ARRAY_SIZE(ih); i++) { + if (ih[i]->ring_size) { + r = ih_v7_0_toggle_ring_interrupts(adev, ih[i], enable); + if (r) + return r; + } + } + + return 0; +} + +static uint32_t ih_v7_0_rb_cntl(struct amdgpu_ih_ring *ih, uint32_t ih_rb_cntl) +{ + int rb_bufsz = order_base_2(ih->ring_size / 4); + + ih_rb_cntl = REG_SET_FIELD(ih_rb_cntl, IH_RB_CNTL, + MC_SPACE, ih->use_bus_addr ? 2 : 4); + ih_rb_cntl = REG_SET_FIELD(ih_rb_cntl, IH_RB_CNTL, + WPTR_OVERFLOW_CLEAR, 1); + ih_rb_cntl = REG_SET_FIELD(ih_rb_cntl, IH_RB_CNTL, + WPTR_OVERFLOW_ENABLE, 1); + ih_rb_cntl = REG_SET_FIELD(ih_rb_cntl, IH_RB_CNTL, RB_SIZE, rb_bufsz); + /* Ring Buffer write pointer writeback. If enabled, IH_RB_WPTR register + * value is written to memory + */ + ih_rb_cntl = REG_SET_FIELD(ih_rb_cntl, IH_RB_CNTL, + WPTR_WRITEBACK_ENABLE, 1); + ih_rb_cntl = REG_SET_FIELD(ih_rb_cntl, IH_RB_CNTL, MC_SNOOP, 1); + ih_rb_cntl = REG_SET_FIELD(ih_rb_cntl, IH_RB_CNTL, MC_RO, 0); + ih_rb_cntl = REG_SET_FIELD(ih_rb_cntl, IH_RB_CNTL, MC_VMID, 0); + + return ih_rb_cntl; +} + +static uint32_t ih_v7_0_doorbell_rptr(struct amdgpu_ih_ring *ih) +{ + u32 ih_doorbell_rtpr = 0; + + if (ih->use_doorbell) { + ih_doorbell_rtpr = REG_SET_FIELD(ih_doorbell_rtpr, + IH_DOORBELL_RPTR, OFFSET, + ih->doorbell_index); + ih_doorbell_rtpr = REG_SET_FIELD(ih_doorbell_rtpr, + IH_DOORBELL_RPTR, + ENABLE, 1); + } else { + ih_doorbell_rtpr = REG_SET_FIELD(ih_doorbell_rtpr, + IH_DOORBELL_RPTR, + ENABLE, 0); + } + return ih_doorbell_rtpr; +} + +/** + * ih_v7_0_enable_ring - enable an ih ring buffer + * + * @adev: amdgpu_device pointer + * @ih: amdgpu_ih_ring pointer + * + * Enable an ih ring buffer (IH_V7_0) + */ +static int ih_v7_0_enable_ring(struct amdgpu_device *adev, + struct amdgpu_ih_ring *ih) +{ + struct amdgpu_ih_regs *ih_regs; + uint32_t tmp; + + ih_regs = &ih->ih_regs; + + /* Ring Buffer base. [39:8] of 40-bit address of the beginning of the ring buffer*/ + WREG32(ih_regs->ih_rb_base, ih->gpu_addr >> 8); + WREG32(ih_regs->ih_rb_base_hi, (ih->gpu_addr >> 40) & 0xff); + + tmp = RREG32(ih_regs->ih_rb_cntl); + tmp = ih_v7_0_rb_cntl(ih, tmp); + if (ih == &adev->irq.ih) + tmp = REG_SET_FIELD(tmp, IH_RB_CNTL, RPTR_REARM, !!adev->irq.msi_enabled); + if (ih == &adev->irq.ih1) { + tmp = REG_SET_FIELD(tmp, IH_RB_CNTL, WPTR_OVERFLOW_ENABLE, 0); + tmp = REG_SET_FIELD(tmp, IH_RB_CNTL, RB_FULL_DRAIN_ENABLE, 1); + } + + if (amdgpu_sriov_vf(adev) && amdgpu_sriov_reg_indirect_ih(adev)) { + if (psp_reg_program(&adev->psp, ih_regs->psp_reg_id, tmp)) { + DRM_ERROR("PSP program IH_RB_CNTL failed!\n"); + return -ETIMEDOUT; + } + } else { + WREG32(ih_regs->ih_rb_cntl, tmp); + } + + if (ih == &adev->irq.ih) { + /* set the ih ring 0 writeback address whether it's enabled or not */ + WREG32(ih_regs->ih_rb_wptr_addr_lo, lower_32_bits(ih->wptr_addr)); + WREG32(ih_regs->ih_rb_wptr_addr_hi, upper_32_bits(ih->wptr_addr) & 0xFFFF); + } + + /* set rptr, wptr to 0 */ + WREG32(ih_regs->ih_rb_wptr, 0); + WREG32(ih_regs->ih_rb_rptr, 0); + + WREG32(ih_regs->ih_doorbell_rptr, ih_v7_0_doorbell_rptr(ih)); + + return 0; +} + +/** + * ih_v7_0_irq_init - init and enable the interrupt ring + * + * @adev: amdgpu_device pointer + * + * Allocate a ring buffer for the interrupt controller, + * enable the RLC, disable interrupts, enable the IH + * ring buffer and enable it. + * Called at device load and reume. + * Returns 0 for success, errors for failure. + */ +static int ih_v7_0_irq_init(struct amdgpu_device *adev) +{ + struct amdgpu_ih_ring *ih[] = {&adev->irq.ih, &adev->irq.ih1}; + u32 ih_chicken; + u32 tmp; + int ret; + int i; + + /* disable irqs */ + ret = ih_v7_0_toggle_interrupts(adev, false); + if (ret) + return ret; + + adev->nbio.funcs->ih_control(adev); + + if (unlikely((adev->firmware.load_type == AMDGPU_FW_LOAD_DIRECT) || + (adev->firmware.load_type == AMDGPU_FW_LOAD_RLC_BACKDOOR_AUTO))) { + if (ih[0]->use_bus_addr) { + ih_chicken = RREG32_SOC15(OSSSYS, 0, regIH_CHICKEN); + ih_chicken = REG_SET_FIELD(ih_chicken, + IH_CHICKEN, MC_SPACE_GPA_ENABLE, 1); + WREG32_SOC15(OSSSYS, 0, regIH_CHICKEN, ih_chicken); + } + } + + for (i = 0; i < ARRAY_SIZE(ih); i++) { + if (ih[i]->ring_size) { + ret = ih_v7_0_enable_ring(adev, ih[i]); + if (ret) + return ret; + } + } + + /* update doorbell range for ih ring 0 */ + adev->nbio.funcs->ih_doorbell_range(adev, ih[0]->use_doorbell, + ih[0]->doorbell_index); + + tmp = RREG32_SOC15(OSSSYS, 0, regIH_STORM_CLIENT_LIST_CNTL); + tmp = REG_SET_FIELD(tmp, IH_STORM_CLIENT_LIST_CNTL, + CLIENT18_IS_STORM_CLIENT, 1); + WREG32_SOC15(OSSSYS, 0, regIH_STORM_CLIENT_LIST_CNTL, tmp); + + tmp = RREG32_SOC15(OSSSYS, 0, regIH_INT_FLOOD_CNTL); + tmp = REG_SET_FIELD(tmp, IH_INT_FLOOD_CNTL, FLOOD_CNTL_ENABLE, 1); + WREG32_SOC15(OSSSYS, 0, regIH_INT_FLOOD_CNTL, tmp); + + /* GC/MMHUB UTCL2 page fault interrupts are configured as + * MSI storm capable interrupts by deafult. The delay is + * used to avoid ISR being called too frequently + * when page fault happens on several continuous page + * and thus avoid MSI storm */ + tmp = RREG32_SOC15(OSSSYS, 0, regIH_MSI_STORM_CTRL); + tmp = REG_SET_FIELD(tmp, IH_MSI_STORM_CTRL, + DELAY, 3); + WREG32_SOC15(OSSSYS, 0, regIH_MSI_STORM_CTRL, tmp); + + pci_set_master(adev->pdev); + + /* enable interrupts */ + ret = ih_v7_0_toggle_interrupts(adev, true); + if (ret) + return ret; + /* enable wptr force update for self int */ + force_update_wptr_for_self_int(adev, 0, 8, true); + + if (adev->irq.ih_soft.ring_size) + adev->irq.ih_soft.enabled = true; + + return 0; +} + +/** + * ih_v7_0_irq_disable - disable interrupts + * + * @adev: amdgpu_device pointer + * + * Disable interrupts on the hw. + */ +static void ih_v7_0_irq_disable(struct amdgpu_device *adev) +{ + force_update_wptr_for_self_int(adev, 0, 8, false); + ih_v7_0_toggle_interrupts(adev, false); + + /* Wait and acknowledge irq */ + mdelay(1); +} + +/** + * ih_v7_0_get_wptr() - get the IH ring buffer wptr + * + * @adev: amdgpu_device pointer + * @ih: IH ring buffer to fetch wptr + * + * Get the IH ring buffer wptr from either the register + * or the writeback memory buffer. Also check for + * ring buffer overflow and deal with it. + * Returns the value of the wptr. + */ +static u32 ih_v7_0_get_wptr(struct amdgpu_device *adev, + struct amdgpu_ih_ring *ih) +{ + u32 wptr, tmp; + struct amdgpu_ih_regs *ih_regs; + + wptr = le32_to_cpu(*ih->wptr_cpu); + ih_regs = &ih->ih_regs; + + if (!REG_GET_FIELD(wptr, IH_RB_WPTR, RB_OVERFLOW)) + goto out; + + wptr = RREG32_NO_KIQ(ih_regs->ih_rb_wptr); + if (!REG_GET_FIELD(wptr, IH_RB_WPTR, RB_OVERFLOW)) + goto out; + wptr = REG_SET_FIELD(wptr, IH_RB_WPTR, RB_OVERFLOW, 0); + + /* When a ring buffer overflow happen start parsing interrupt + * from the last not overwritten vector (wptr + 32). Hopefully + * this should allow us to catch up. + */ + tmp = (wptr + 32) & ih->ptr_mask; + dev_warn(adev->dev, "IH ring buffer overflow " + "(0x%08X, 0x%08X, 0x%08X)\n", + wptr, ih->rptr, tmp); + ih->rptr = tmp; + + tmp = RREG32_NO_KIQ(ih_regs->ih_rb_cntl); + tmp = REG_SET_FIELD(tmp, IH_RB_CNTL, WPTR_OVERFLOW_CLEAR, 1); + WREG32_NO_KIQ(ih_regs->ih_rb_cntl, tmp); + + /* Unset the CLEAR_OVERFLOW bit immediately so new overflows + * can be detected. + */ + tmp = REG_SET_FIELD(tmp, IH_RB_CNTL, WPTR_OVERFLOW_CLEAR, 0); + WREG32_NO_KIQ(ih_regs->ih_rb_cntl, tmp); +out: + return (wptr & ih->ptr_mask); +} + +/** + * ih_v7_0_irq_rearm - rearm IRQ if lost + * + * @adev: amdgpu_device pointer + * @ih: IH ring to match + * + */ +static void ih_v7_0_irq_rearm(struct amdgpu_device *adev, + struct amdgpu_ih_ring *ih) +{ + uint32_t v = 0; + uint32_t i = 0; + struct amdgpu_ih_regs *ih_regs; + + ih_regs = &ih->ih_regs; + + /* Rearm IRQ / re-write doorbell if doorbell write is lost */ + for (i = 0; i < MAX_REARM_RETRY; i++) { + v = RREG32_NO_KIQ(ih_regs->ih_rb_rptr); + if ((v < ih->ring_size) && (v != ih->rptr)) + WDOORBELL32(ih->doorbell_index, ih->rptr); + else + break; + } +} + +/** + * ih_v7_0_set_rptr - set the IH ring buffer rptr + * + * @adev: amdgpu_device pointer + * @ih: IH ring buffer to set rptr + */ +static void ih_v7_0_set_rptr(struct amdgpu_device *adev, + struct amdgpu_ih_ring *ih) +{ + struct amdgpu_ih_regs *ih_regs; + + if (ih->use_doorbell) { + /* XXX check if swapping is necessary on BE */ + *ih->rptr_cpu = ih->rptr; + WDOORBELL32(ih->doorbell_index, ih->rptr); + + if (amdgpu_sriov_vf(adev)) + ih_v7_0_irq_rearm(adev, ih); + } else { + ih_regs = &ih->ih_regs; + WREG32(ih_regs->ih_rb_rptr, ih->rptr); + } +} + +/** + * ih_v7_0_self_irq - dispatch work for ring 1 + * + * @adev: amdgpu_device pointer + * @source: irq source + * @entry: IV with WPTR update + * + * Update the WPTR from the IV and schedule work to handle the entries. + */ +static int ih_v7_0_self_irq(struct amdgpu_device *adev, + struct amdgpu_irq_src *source, + struct amdgpu_iv_entry *entry) +{ + uint32_t wptr = cpu_to_le32(entry->src_data[0]); + + switch (entry->ring_id) { + case 1: + *adev->irq.ih1.wptr_cpu = wptr; + schedule_work(&adev->irq.ih1_work); + break; + default: break; + } + return 0; +} + +static const struct amdgpu_irq_src_funcs ih_v7_0_self_irq_funcs = { + .process = ih_v7_0_self_irq, +}; + +static void ih_v7_0_set_self_irq_funcs(struct amdgpu_device *adev) +{ + adev->irq.self_irq.num_types = 0; + adev->irq.self_irq.funcs = &ih_v7_0_self_irq_funcs; +} + +static int ih_v7_0_early_init(void *handle) +{ + struct amdgpu_device *adev = (struct amdgpu_device *)handle; + + ih_v7_0_set_interrupt_funcs(adev); + ih_v7_0_set_self_irq_funcs(adev); + return 0; +} + +static int ih_v7_0_sw_init(void *handle) +{ + int r; + struct amdgpu_device *adev = (struct amdgpu_device *)handle; + bool use_bus_addr; + + r = amdgpu_irq_add_id(adev, SOC21_IH_CLIENTID_IH, 0, + &adev->irq.self_irq); + + if (r) + return r; + + /* use gpu virtual address for ih ring + * until ih_checken is programmed to allow + * use bus address for ih ring by psp bl */ + use_bus_addr = + (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) ? false : true; + r = amdgpu_ih_ring_init(adev, &adev->irq.ih, 256 * 1024, use_bus_addr); + if (r) + return r; + + adev->irq.ih.use_doorbell = true; + adev->irq.ih.doorbell_index = adev->doorbell_index.ih << 1; + + adev->irq.ih1.ring_size = 0; + adev->irq.ih2.ring_size = 0; + + /* initialize ih control register offset */ + ih_v7_0_init_register_offset(adev); + + r = amdgpu_ih_ring_init(adev, &adev->irq.ih_soft, PAGE_SIZE, true); + if (r) + return r; + + r = amdgpu_irq_init(adev); + + return r; +} + +static int ih_v7_0_sw_fini(void *handle) +{ + struct amdgpu_device *adev = (struct amdgpu_device *)handle; + + amdgpu_irq_fini_sw(adev); + + return 0; +} + +static int ih_v7_0_hw_init(void *handle) +{ + int r; + struct amdgpu_device *adev = (struct amdgpu_device *)handle; + + r = ih_v7_0_irq_init(adev); + if (r) + return r; + + return 0; +} + +static int ih_v7_0_hw_fini(void *handle) +{ + struct amdgpu_device *adev = (struct amdgpu_device *)handle; + + ih_v7_0_irq_disable(adev); + + return 0; +} + +static int ih_v7_0_suspend(void *handle) +{ + struct amdgpu_device *adev = (struct amdgpu_device *)handle; + + return ih_v7_0_hw_fini(adev); +} + +static int ih_v7_0_resume(void *handle) +{ + struct amdgpu_device *adev = (struct amdgpu_device *)handle; + + return ih_v7_0_hw_init(adev); +} + +static bool ih_v7_0_is_idle(void *handle) +{ + /* todo */ + return true; +} + +static int ih_v7_0_wait_for_idle(void *handle) +{ + /* todo */ + return -ETIMEDOUT; +} + +static int ih_v7_0_soft_reset(void *handle) +{ + /* todo */ + return 0; +} + +static void ih_v7_0_update_clockgating_state(struct amdgpu_device *adev, + bool enable) +{ + uint32_t data, def, field_val; + + if (adev->cg_flags & AMD_CG_SUPPORT_IH_CG) { + def = data = RREG32_SOC15(OSSSYS, 0, regIH_CLK_CTRL); + field_val = enable ? 0 : 1; + data = REG_SET_FIELD(data, IH_CLK_CTRL, + DBUS_MUX_CLK_SOFT_OVERRIDE, field_val); + data = REG_SET_FIELD(data, IH_CLK_CTRL, + OSSSYS_SHARE_CLK_SOFT_OVERRIDE, field_val); + data = REG_SET_FIELD(data, IH_CLK_CTRL, + LIMIT_SMN_CLK_SOFT_OVERRIDE, field_val); + data = REG_SET_FIELD(data, IH_CLK_CTRL, + DYN_CLK_SOFT_OVERRIDE, field_val); + data = REG_SET_FIELD(data, IH_CLK_CTRL, + REG_CLK_SOFT_OVERRIDE, field_val); + if (def != data) + WREG32_SOC15(OSSSYS, 0, regIH_CLK_CTRL, data); + } + + return; +} + +static int ih_v7_0_set_clockgating_state(void *handle, + enum amd_clockgating_state state) +{ + struct amdgpu_device *adev = (struct amdgpu_device *)handle; + + ih_v7_0_update_clockgating_state(adev, + state == AMD_CG_STATE_GATE); + return 0; +} + +static void ih_v7_0_update_ih_mem_power_gating(struct amdgpu_device *adev, + bool enable) +{ + uint32_t ih_mem_pwr_cntl; + + /* Disable ih sram power cntl before switch powergating mode */ + ih_mem_pwr_cntl = RREG32_SOC15(OSSSYS, 0, regIH_MEM_POWER_CTRL); + ih_mem_pwr_cntl = REG_SET_FIELD(ih_mem_pwr_cntl, IH_MEM_POWER_CTRL, + IH_BUFFER_MEM_POWER_CTRL_EN, 0); + WREG32_SOC15(OSSSYS, 0, regIH_MEM_POWER_CTRL, ih_mem_pwr_cntl); + + /* It is recommended to set mem powergating mode to DS mode */ + if (enable) { + /* mem power mode */ + ih_mem_pwr_cntl = REG_SET_FIELD(ih_mem_pwr_cntl, IH_MEM_POWER_CTRL, + IH_BUFFER_MEM_POWER_LS_EN, 0); + ih_mem_pwr_cntl = REG_SET_FIELD(ih_mem_pwr_cntl, IH_MEM_POWER_CTRL, + IH_BUFFER_MEM_POWER_DS_EN, 1); + ih_mem_pwr_cntl = REG_SET_FIELD(ih_mem_pwr_cntl, IH_MEM_POWER_CTRL, + IH_BUFFER_MEM_POWER_SD_EN, 0); + /* cam mem power mode */ + ih_mem_pwr_cntl = REG_SET_FIELD(ih_mem_pwr_cntl, IH_MEM_POWER_CTRL, + IH_RETRY_INT_CAM_MEM_POWER_LS_EN, 0); + ih_mem_pwr_cntl = REG_SET_FIELD(ih_mem_pwr_cntl, IH_MEM_POWER_CTRL, + IH_RETRY_INT_CAM_MEM_POWER_DS_EN, 1); + ih_mem_pwr_cntl = REG_SET_FIELD(ih_mem_pwr_cntl, IH_MEM_POWER_CTRL, + IH_RETRY_INT_CAM_MEM_POWER_SD_EN, 0); + /* re-enable power cntl */ + ih_mem_pwr_cntl = REG_SET_FIELD(ih_mem_pwr_cntl, IH_MEM_POWER_CTRL, + IH_BUFFER_MEM_POWER_CTRL_EN, 1); + } else { + /* mem power mode */ + ih_mem_pwr_cntl = REG_SET_FIELD(ih_mem_pwr_cntl, IH_MEM_POWER_CTRL, + IH_BUFFER_MEM_POWER_LS_EN, 0); + ih_mem_pwr_cntl = REG_SET_FIELD(ih_mem_pwr_cntl, IH_MEM_POWER_CTRL, + IH_BUFFER_MEM_POWER_DS_EN, 0); + ih_mem_pwr_cntl = REG_SET_FIELD(ih_mem_pwr_cntl, IH_MEM_POWER_CTRL, + IH_BUFFER_MEM_POWER_SD_EN, 0); + /* cam mem power mode */ + ih_mem_pwr_cntl = REG_SET_FIELD(ih_mem_pwr_cntl, IH_MEM_POWER_CTRL, + IH_RETRY_INT_CAM_MEM_POWER_LS_EN, 0); + ih_mem_pwr_cntl = REG_SET_FIELD(ih_mem_pwr_cntl, IH_MEM_POWER_CTRL, + IH_RETRY_INT_CAM_MEM_POWER_DS_EN, 0); + ih_mem_pwr_cntl = REG_SET_FIELD(ih_mem_pwr_cntl, IH_MEM_POWER_CTRL, + IH_RETRY_INT_CAM_MEM_POWER_SD_EN, 0); + /* re-enable power cntl*/ + ih_mem_pwr_cntl = REG_SET_FIELD(ih_mem_pwr_cntl, IH_MEM_POWER_CTRL, + IH_BUFFER_MEM_POWER_CTRL_EN, 1); + } + + WREG32_SOC15(OSSSYS, 0, regIH_MEM_POWER_CTRL, ih_mem_pwr_cntl); +} + +static int ih_v7_0_set_powergating_state(void *handle, + enum amd_powergating_state state) +{ + struct amdgpu_device *adev = (struct amdgpu_device *)handle; + bool enable = (state == AMD_PG_STATE_GATE); + + if (adev->pg_flags & AMD_PG_SUPPORT_IH_SRAM_PG) + ih_v7_0_update_ih_mem_power_gating(adev, enable); + + return 0; +} + +static void ih_v7_0_get_clockgating_state(void *handle, u64 *flags) +{ + struct amdgpu_device *adev = (struct amdgpu_device *)handle; + + if (!RREG32_SOC15(OSSSYS, 0, regIH_CLK_CTRL)) + *flags |= AMD_CG_SUPPORT_IH_CG; + + return; +} + +static const struct amd_ip_funcs ih_v7_0_ip_funcs = { + .name = "ih_v7_0", + .early_init = ih_v7_0_early_init, + .late_init = NULL, + .sw_init = ih_v7_0_sw_init, + .sw_fini = ih_v7_0_sw_fini, + .hw_init = ih_v7_0_hw_init, + .hw_fini = ih_v7_0_hw_fini, + .suspend = ih_v7_0_suspend, + .resume = ih_v7_0_resume, + .is_idle = ih_v7_0_is_idle, + .wait_for_idle = ih_v7_0_wait_for_idle, + .soft_reset = ih_v7_0_soft_reset, + .set_clockgating_state = ih_v7_0_set_clockgating_state, + .set_powergating_state = ih_v7_0_set_powergating_state, + .get_clockgating_state = ih_v7_0_get_clockgating_state, + .dump_ip_state = NULL, + .print_ip_state = NULL, +}; + +static const struct amdgpu_ih_funcs ih_v7_0_funcs = { + .get_wptr = ih_v7_0_get_wptr, + .decode_iv = amdgpu_ih_decode_iv_helper, + .decode_iv_ts = amdgpu_ih_decode_iv_ts_helper, + .set_rptr = ih_v7_0_set_rptr +}; + +static void ih_v7_0_set_interrupt_funcs(struct amdgpu_device *adev) +{ + adev->irq.ih_funcs = &ih_v7_0_funcs; +} + +const struct amdgpu_ip_block_version ih_v7_0_ip_block = +{ + .type = AMD_IP_BLOCK_TYPE_IH, + .major = 7, + .minor = 0, + .rev = 0, + .funcs = &ih_v7_0_ip_funcs, +}; diff --git a/drivers/gpu/drm/amd/amdgpu/ih_v7_0.h b/drivers/gpu/drm/amd/amdgpu/ih_v7_0.h new file mode 100644 index 000000000000..af9dcbc451fd --- /dev/null +++ b/drivers/gpu/drm/amd/amdgpu/ih_v7_0.h @@ -0,0 +1,28 @@ +/* + * Copyright 2023 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + */ +#ifndef __IH_V7_0_IH_H__ +#define __IH_V7_0_IH_H__ + +extern const struct amdgpu_ip_block_version ih_v7_0_ip_block; + +#endif diff --git a/drivers/gpu/drm/amd/amdgpu/imu_v11_0.c b/drivers/gpu/drm/amd/amdgpu/imu_v11_0.c index c0bdab3bf0e4..3e91a8e42c21 100644 --- a/drivers/gpu/drm/amd/amdgpu/imu_v11_0.c +++ b/drivers/gpu/drm/amd/amdgpu/imu_v11_0.c @@ -37,6 +37,7 @@ MODULE_FIRMWARE("amdgpu/gc_11_0_2_imu.bin"); MODULE_FIRMWARE("amdgpu/gc_11_0_3_imu.bin"); MODULE_FIRMWARE("amdgpu/gc_11_0_4_imu.bin"); MODULE_FIRMWARE("amdgpu/gc_11_5_0_imu.bin"); +MODULE_FIRMWARE("amdgpu/gc_11_5_1_imu.bin"); static int imu_v11_0_init_microcode(struct amdgpu_device *adev) { diff --git a/drivers/gpu/drm/amd/amdgpu/jpeg_v2_0.c b/drivers/gpu/drm/amd/amdgpu/jpeg_v2_0.c index 1c8116d75f63..ef3e42f6b841 100644 --- a/drivers/gpu/drm/amd/amdgpu/jpeg_v2_0.c +++ b/drivers/gpu/drm/amd/amdgpu/jpeg_v2_0.c @@ -759,6 +759,8 @@ static const struct amd_ip_funcs jpeg_v2_0_ip_funcs = { .post_soft_reset = NULL, .set_clockgating_state = jpeg_v2_0_set_clockgating_state, .set_powergating_state = jpeg_v2_0_set_powergating_state, + .dump_ip_state = NULL, + .print_ip_state = NULL, }; static const struct amdgpu_ring_funcs jpeg_v2_0_dec_ring_vm_funcs = { diff --git a/drivers/gpu/drm/amd/amdgpu/jpeg_v2_5.c b/drivers/gpu/drm/amd/amdgpu/jpeg_v2_5.c index e67a337457ed..afeaf3c64e27 100644 --- a/drivers/gpu/drm/amd/amdgpu/jpeg_v2_5.c +++ b/drivers/gpu/drm/amd/amdgpu/jpeg_v2_5.c @@ -551,7 +551,7 @@ static int jpeg_v2_5_set_powergating_state(void *handle, struct amdgpu_device *adev = (struct amdgpu_device *)handle; int ret; - if(state == adev->jpeg.cur_state) + if (state == adev->jpeg.cur_state) return 0; if (state == AMD_PG_STATE_GATE) @@ -559,7 +559,7 @@ static int jpeg_v2_5_set_powergating_state(void *handle, else ret = jpeg_v2_5_start(adev); - if(!ret) + if (!ret) adev->jpeg.cur_state = state; return ret; @@ -632,6 +632,8 @@ static const struct amd_ip_funcs jpeg_v2_5_ip_funcs = { .post_soft_reset = NULL, .set_clockgating_state = jpeg_v2_5_set_clockgating_state, .set_powergating_state = jpeg_v2_5_set_powergating_state, + .dump_ip_state = NULL, + .print_ip_state = NULL, }; static const struct amd_ip_funcs jpeg_v2_6_ip_funcs = { @@ -652,6 +654,8 @@ static const struct amd_ip_funcs jpeg_v2_6_ip_funcs = { .post_soft_reset = NULL, .set_clockgating_state = jpeg_v2_5_set_clockgating_state, .set_powergating_state = jpeg_v2_5_set_powergating_state, + .dump_ip_state = NULL, + .print_ip_state = NULL, }; static const struct amdgpu_ring_funcs jpeg_v2_5_dec_ring_vm_funcs = { @@ -754,8 +758,7 @@ static void jpeg_v2_5_set_irq_funcs(struct amdgpu_device *adev) } } -const struct amdgpu_ip_block_version jpeg_v2_5_ip_block = -{ +const struct amdgpu_ip_block_version jpeg_v2_5_ip_block = { .type = AMD_IP_BLOCK_TYPE_JPEG, .major = 2, .minor = 5, @@ -763,8 +766,7 @@ const struct amdgpu_ip_block_version jpeg_v2_5_ip_block = .funcs = &jpeg_v2_5_ip_funcs, }; -const struct amdgpu_ip_block_version jpeg_v2_6_ip_block = -{ +const struct amdgpu_ip_block_version jpeg_v2_6_ip_block = { .type = AMD_IP_BLOCK_TYPE_JPEG, .major = 2, .minor = 6, diff --git a/drivers/gpu/drm/amd/amdgpu/jpeg_v3_0.c b/drivers/gpu/drm/amd/amdgpu/jpeg_v3_0.c index a92481da60cd..1c7cf4800bf7 100644 --- a/drivers/gpu/drm/amd/amdgpu/jpeg_v3_0.c +++ b/drivers/gpu/drm/amd/amdgpu/jpeg_v3_0.c @@ -557,6 +557,8 @@ static const struct amd_ip_funcs jpeg_v3_0_ip_funcs = { .post_soft_reset = NULL, .set_clockgating_state = jpeg_v3_0_set_clockgating_state, .set_powergating_state = jpeg_v3_0_set_powergating_state, + .dump_ip_state = NULL, + .print_ip_state = NULL, }; static const struct amdgpu_ring_funcs jpeg_v3_0_dec_ring_vm_funcs = { diff --git a/drivers/gpu/drm/amd/amdgpu/jpeg_v4_0.c b/drivers/gpu/drm/amd/amdgpu/jpeg_v4_0.c index 88ea58d5c4ab..237fe5df5a8f 100644 --- a/drivers/gpu/drm/amd/amdgpu/jpeg_v4_0.c +++ b/drivers/gpu/drm/amd/amdgpu/jpeg_v4_0.c @@ -719,6 +719,8 @@ static const struct amd_ip_funcs jpeg_v4_0_ip_funcs = { .post_soft_reset = NULL, .set_clockgating_state = jpeg_v4_0_set_clockgating_state, .set_powergating_state = jpeg_v4_0_set_powergating_state, + .dump_ip_state = NULL, + .print_ip_state = NULL, }; static const struct amdgpu_ring_funcs jpeg_v4_0_dec_ring_vm_funcs = { diff --git a/drivers/gpu/drm/amd/amdgpu/jpeg_v4_0_3.c b/drivers/gpu/drm/amd/amdgpu/jpeg_v4_0_3.c index 82b6b62c170b..d66af11aa66c 100644 --- a/drivers/gpu/drm/amd/amdgpu/jpeg_v4_0_3.c +++ b/drivers/gpu/drm/amd/amdgpu/jpeg_v4_0_3.c @@ -652,7 +652,7 @@ static void jpeg_v4_0_3_dec_ring_set_wptr(struct amdgpu_ring *ring) * * Write a start command to the ring. */ -static void jpeg_v4_0_3_dec_ring_insert_start(struct amdgpu_ring *ring) +void jpeg_v4_0_3_dec_ring_insert_start(struct amdgpu_ring *ring) { if (!amdgpu_sriov_vf(ring->adev)) { amdgpu_ring_write(ring, PACKETJ(regUVD_JRBC_EXTERNAL_REG_INTERNAL_OFFSET, @@ -672,7 +672,7 @@ static void jpeg_v4_0_3_dec_ring_insert_start(struct amdgpu_ring *ring) * * Write a end command to the ring. */ -static void jpeg_v4_0_3_dec_ring_insert_end(struct amdgpu_ring *ring) +void jpeg_v4_0_3_dec_ring_insert_end(struct amdgpu_ring *ring) { if (!amdgpu_sriov_vf(ring->adev)) { amdgpu_ring_write(ring, PACKETJ(regUVD_JRBC_EXTERNAL_REG_INTERNAL_OFFSET, @@ -695,7 +695,7 @@ static void jpeg_v4_0_3_dec_ring_insert_end(struct amdgpu_ring *ring) * * Write a fence and a trap command to the ring. */ -static void jpeg_v4_0_3_dec_ring_emit_fence(struct amdgpu_ring *ring, u64 addr, u64 seq, +void jpeg_v4_0_3_dec_ring_emit_fence(struct amdgpu_ring *ring, u64 addr, u64 seq, unsigned int flags) { WARN_ON(flags & AMDGPU_FENCE_FLAG_64BIT); @@ -764,7 +764,7 @@ static void jpeg_v4_0_3_dec_ring_emit_fence(struct amdgpu_ring *ring, u64 addr, * * Write ring commands to execute the indirect buffer. */ -static void jpeg_v4_0_3_dec_ring_emit_ib(struct amdgpu_ring *ring, +void jpeg_v4_0_3_dec_ring_emit_ib(struct amdgpu_ring *ring, struct amdgpu_job *job, struct amdgpu_ib *ib, uint32_t flags) @@ -815,7 +815,7 @@ static void jpeg_v4_0_3_dec_ring_emit_ib(struct amdgpu_ring *ring, amdgpu_ring_write(ring, 0x2); } -static void jpeg_v4_0_3_dec_ring_emit_reg_wait(struct amdgpu_ring *ring, uint32_t reg, +void jpeg_v4_0_3_dec_ring_emit_reg_wait(struct amdgpu_ring *ring, uint32_t reg, uint32_t val, uint32_t mask) { uint32_t reg_offset = (reg << 2); @@ -842,7 +842,7 @@ static void jpeg_v4_0_3_dec_ring_emit_reg_wait(struct amdgpu_ring *ring, uint32_ amdgpu_ring_write(ring, mask); } -static void jpeg_v4_0_3_dec_ring_emit_vm_flush(struct amdgpu_ring *ring, +void jpeg_v4_0_3_dec_ring_emit_vm_flush(struct amdgpu_ring *ring, unsigned int vmid, uint64_t pd_addr) { struct amdgpu_vmhub *hub = &ring->adev->vmhub[ring->vm_hub]; @@ -857,7 +857,7 @@ static void jpeg_v4_0_3_dec_ring_emit_vm_flush(struct amdgpu_ring *ring, jpeg_v4_0_3_dec_ring_emit_reg_wait(ring, data0, data1, mask); } -static void jpeg_v4_0_3_dec_ring_emit_wreg(struct amdgpu_ring *ring, uint32_t reg, uint32_t val) +void jpeg_v4_0_3_dec_ring_emit_wreg(struct amdgpu_ring *ring, uint32_t reg, uint32_t val) { uint32_t reg_offset = (reg << 2); @@ -875,7 +875,7 @@ static void jpeg_v4_0_3_dec_ring_emit_wreg(struct amdgpu_ring *ring, uint32_t re amdgpu_ring_write(ring, val); } -static void jpeg_v4_0_3_dec_ring_nop(struct amdgpu_ring *ring, uint32_t count) +void jpeg_v4_0_3_dec_ring_nop(struct amdgpu_ring *ring, uint32_t count) { int i; @@ -1053,6 +1053,8 @@ static const struct amd_ip_funcs jpeg_v4_0_3_ip_funcs = { .post_soft_reset = NULL, .set_clockgating_state = jpeg_v4_0_3_set_clockgating_state, .set_powergating_state = jpeg_v4_0_3_set_powergating_state, + .dump_ip_state = NULL, + .print_ip_state = NULL, }; static const struct amdgpu_ring_funcs jpeg_v4_0_3_dec_ring_vm_funcs = { diff --git a/drivers/gpu/drm/amd/amdgpu/jpeg_v4_0_3.h b/drivers/gpu/drm/amd/amdgpu/jpeg_v4_0_3.h index 22483dc66351..747a3e5f6856 100644 --- a/drivers/gpu/drm/amd/amdgpu/jpeg_v4_0_3.h +++ b/drivers/gpu/drm/amd/amdgpu/jpeg_v4_0_3.h @@ -48,4 +48,19 @@ extern const struct amdgpu_ip_block_version jpeg_v4_0_3_ip_block; +void jpeg_v4_0_3_dec_ring_emit_ib(struct amdgpu_ring *ring, + struct amdgpu_job *job, + struct amdgpu_ib *ib, + uint32_t flags); +void jpeg_v4_0_3_dec_ring_emit_fence(struct amdgpu_ring *ring, u64 addr, u64 seq, + unsigned int flags); +void jpeg_v4_0_3_dec_ring_emit_vm_flush(struct amdgpu_ring *ring, + unsigned int vmid, uint64_t pd_addr); +void jpeg_v4_0_3_dec_ring_nop(struct amdgpu_ring *ring, uint32_t count); +void jpeg_v4_0_3_dec_ring_insert_start(struct amdgpu_ring *ring); +void jpeg_v4_0_3_dec_ring_insert_end(struct amdgpu_ring *ring); +void jpeg_v4_0_3_dec_ring_emit_wreg(struct amdgpu_ring *ring, uint32_t reg, uint32_t val); +void jpeg_v4_0_3_dec_ring_emit_reg_wait(struct amdgpu_ring *ring, uint32_t reg, + uint32_t val, uint32_t mask); + #endif /* __JPEG_V4_0_3_H__ */ diff --git a/drivers/gpu/drm/amd/amdgpu/jpeg_v4_0_5.c b/drivers/gpu/drm/amd/amdgpu/jpeg_v4_0_5.c index 78b74daf4eeb..da6bb9022b80 100644 --- a/drivers/gpu/drm/amd/amdgpu/jpeg_v4_0_5.c +++ b/drivers/gpu/drm/amd/amdgpu/jpeg_v4_0_5.c @@ -34,7 +34,17 @@ #include "vcn/vcn_4_0_5_sh_mask.h" #include "ivsrcid/vcn/irqsrcs_vcn_4_0.h" -#define regUVD_JPEG_PITCH_INTERNAL_OFFSET 0x401f +#define mmUVD_DPG_LMA_CTL regUVD_DPG_LMA_CTL +#define mmUVD_DPG_LMA_CTL_BASE_IDX regUVD_DPG_LMA_CTL_BASE_IDX +#define mmUVD_DPG_LMA_DATA regUVD_DPG_LMA_DATA +#define mmUVD_DPG_LMA_DATA_BASE_IDX regUVD_DPG_LMA_DATA_BASE_IDX + +#define regUVD_JPEG_PITCH_INTERNAL_OFFSET 0x401f +#define regJPEG_DEC_GFX10_ADDR_CONFIG_INTERNAL_OFFSET 0x4026 +#define regJPEG_SYS_INT_EN_INTERNAL_OFFSET 0x4141 +#define regJPEG_CGC_CTRL_INTERNAL_OFFSET 0x4161 +#define regJPEG_CGC_GATE_INTERNAL_OFFSET 0x4160 +#define regUVD_NO_OP_INTERNAL_OFFSET 0x0029 static void jpeg_v4_0_5_set_dec_ring_funcs(struct amdgpu_device *adev); static void jpeg_v4_0_5_set_irq_funcs(struct amdgpu_device *adev); @@ -43,6 +53,11 @@ static int jpeg_v4_0_5_set_powergating_state(void *handle, static void jpeg_v4_0_5_dec_ring_set_wptr(struct amdgpu_ring *ring); +static int amdgpu_ih_clientid_jpeg[] = { + SOC15_IH_CLIENTID_VCN, + SOC15_IH_CLIENTID_VCN1 +}; + /** * jpeg_v4_0_5_early_init - set function pointers * @@ -54,8 +69,20 @@ static int jpeg_v4_0_5_early_init(void *handle) { struct amdgpu_device *adev = (struct amdgpu_device *)handle; + switch (amdgpu_ip_version(adev, UVD_HWIP, 0)) { + case IP_VERSION(4, 0, 5): + adev->jpeg.num_jpeg_inst = 1; + break; + case IP_VERSION(4, 0, 6): + adev->jpeg.num_jpeg_inst = 2; + break; + default: + DRM_DEV_ERROR(adev->dev, + "Failed to init vcn ip block(UVD_HWIP:0x%x)\n", + amdgpu_ip_version(adev, UVD_HWIP, 0)); + return -EINVAL; + } - adev->jpeg.num_jpeg_inst = 1; adev->jpeg.num_jpeg_rings = 1; jpeg_v4_0_5_set_dec_ring_funcs(adev); @@ -75,25 +102,30 @@ static int jpeg_v4_0_5_sw_init(void *handle) { struct amdgpu_device *adev = (struct amdgpu_device *)handle; struct amdgpu_ring *ring; - int r; - - /* JPEG TRAP */ - r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_VCN, - VCN_4_0__SRCID__JPEG_DECODE, &adev->jpeg.inst->irq); - if (r) - return r; - - /* JPEG DJPEG POISON EVENT */ - r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_VCN, - VCN_4_0__SRCID_DJPEG0_POISON, &adev->jpeg.inst->irq); - if (r) - return r; - - /* JPEG EJPEG POISON EVENT */ - r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_VCN, - VCN_4_0__SRCID_EJPEG0_POISON, &adev->jpeg.inst->irq); - if (r) - return r; + int r, i; + + for (i = 0; i < adev->jpeg.num_jpeg_inst; ++i) { + if (adev->jpeg.harvest_config & (1 << i)) + continue; + + /* JPEG TRAP */ + r = amdgpu_irq_add_id(adev, amdgpu_ih_clientid_jpeg[i], + VCN_4_0__SRCID__JPEG_DECODE, &adev->jpeg.inst[i].irq); + if (r) + return r; + + /* JPEG DJPEG POISON EVENT */ + r = amdgpu_irq_add_id(adev, amdgpu_ih_clientid_jpeg[i], + VCN_4_0__SRCID_DJPEG0_POISON, &adev->jpeg.inst[i].irq); + if (r) + return r; + + /* JPEG EJPEG POISON EVENT */ + r = amdgpu_irq_add_id(adev, amdgpu_ih_clientid_jpeg[i], + VCN_4_0__SRCID_EJPEG0_POISON, &adev->jpeg.inst[i].irq); + if (r) + return r; + } r = amdgpu_jpeg_sw_init(adev); if (r) @@ -103,21 +135,23 @@ static int jpeg_v4_0_5_sw_init(void *handle) if (r) return r; - ring = adev->jpeg.inst->ring_dec; - ring->use_doorbell = true; - ring->doorbell_index = amdgpu_sriov_vf(adev) ? - (((adev->doorbell_index.vcn.vcn_ring0_1) << 1) + 4) : - ((adev->doorbell_index.vcn.vcn_ring0_1 << 1) + 1); - ring->vm_hub = AMDGPU_MMHUB0(0); - - sprintf(ring->name, "jpeg_dec"); - r = amdgpu_ring_init(adev, ring, 512, &adev->jpeg.inst->irq, 0, - AMDGPU_RING_PRIO_DEFAULT, NULL); - if (r) - return r; - - adev->jpeg.internal.jpeg_pitch[0] = regUVD_JPEG_PITCH_INTERNAL_OFFSET; - adev->jpeg.inst->external.jpeg_pitch[0] = SOC15_REG_OFFSET(JPEG, 0, regUVD_JPEG_PITCH); + for (i = 0; i < adev->jpeg.num_jpeg_inst; ++i) { + if (adev->jpeg.harvest_config & (1 << i)) + continue; + + ring = adev->jpeg.inst[i].ring_dec; + ring->use_doorbell = true; + ring->vm_hub = AMDGPU_MMHUB0(0); + ring->doorbell_index = (adev->doorbell_index.vcn.vcn_ring0_1 << 1) + 1 + 8 * i; + sprintf(ring->name, "jpeg_dec_%d", i); + r = amdgpu_ring_init(adev, ring, 512, &adev->jpeg.inst[i].irq, + 0, AMDGPU_RING_PRIO_DEFAULT, NULL); + if (r) + return r; + + adev->jpeg.internal.jpeg_pitch[0] = regUVD_JPEG_PITCH_INTERNAL_OFFSET; + adev->jpeg.inst[i].external.jpeg_pitch[0] = SOC15_REG_OFFSET(JPEG, i, regUVD_JPEG_PITCH); + } return 0; } @@ -152,14 +186,27 @@ static int jpeg_v4_0_5_sw_fini(void *handle) static int jpeg_v4_0_5_hw_init(void *handle) { struct amdgpu_device *adev = (struct amdgpu_device *)handle; - struct amdgpu_ring *ring = adev->jpeg.inst->ring_dec; - int r; + struct amdgpu_ring *ring; + int r, i; - r = amdgpu_ring_test_helper(ring); - if (r) - return r; + // TODO: Enable ring test with DPG support + if (adev->pg_flags & AMD_PG_SUPPORT_JPEG_DPG) { + DRM_DEV_INFO(adev->dev, "JPEG decode initialized successfully under DPG Mode"); + return 0; + } + + for (i = 0; i < adev->jpeg.num_jpeg_inst; ++i) { + if (adev->jpeg.harvest_config & (1 << i)) + continue; - DRM_DEV_INFO(adev->dev, "JPEG decode initialized successfully.\n"); + ring = adev->jpeg.inst[i].ring_dec; + r = amdgpu_ring_test_helper(ring); + if (r) + return r; + } + + if (!r) + DRM_INFO("JPEG decode initialized successfully under SPG Mode\n"); return 0; } @@ -174,14 +221,20 @@ static int jpeg_v4_0_5_hw_init(void *handle) static int jpeg_v4_0_5_hw_fini(void *handle) { struct amdgpu_device *adev = (struct amdgpu_device *)handle; + int i; cancel_delayed_work_sync(&adev->vcn.idle_work); - if (!amdgpu_sriov_vf(adev)) { - if (adev->jpeg.cur_state != AMD_PG_STATE_GATE && - RREG32_SOC15(JPEG, 0, regUVD_JRBC_STATUS)) - jpeg_v4_0_5_set_powergating_state(adev, AMD_PG_STATE_GATE); - } + for (i = 0; i < adev->jpeg.num_jpeg_inst; ++i) { + if (adev->jpeg.harvest_config & (1 << i)) + continue; + + if (!amdgpu_sriov_vf(adev)) { + if (adev->jpeg.cur_state != AMD_PG_STATE_GATE && + RREG32_SOC15(JPEG, i, regUVD_JRBC_STATUS)) + jpeg_v4_0_5_set_powergating_state(adev, AMD_PG_STATE_GATE); + } + } return 0; } @@ -227,11 +280,11 @@ static int jpeg_v4_0_5_resume(void *handle) return r; } -static void jpeg_v4_0_5_disable_clock_gating(struct amdgpu_device *adev) +static void jpeg_v4_0_5_disable_clock_gating(struct amdgpu_device *adev, int inst) { uint32_t data = 0; - data = RREG32_SOC15(JPEG, 0, regJPEG_CGC_CTRL); + data = RREG32_SOC15(JPEG, inst, regJPEG_CGC_CTRL); if (adev->cg_flags & AMD_CG_SUPPORT_JPEG_MGCG) { data |= 1 << JPEG_CGC_CTRL__DYN_CLOCK_MODE__SHIFT; data &= (~JPEG_CGC_CTRL__JPEG_DEC_MODE_MASK); @@ -241,21 +294,21 @@ static void jpeg_v4_0_5_disable_clock_gating(struct amdgpu_device *adev) data |= 1 << JPEG_CGC_CTRL__CLK_GATE_DLY_TIMER__SHIFT; data |= 4 << JPEG_CGC_CTRL__CLK_OFF_DELAY__SHIFT; - WREG32_SOC15(JPEG, 0, regJPEG_CGC_CTRL, data); + WREG32_SOC15(JPEG, inst, regJPEG_CGC_CTRL, data); - data = RREG32_SOC15(JPEG, 0, regJPEG_CGC_GATE); + data = RREG32_SOC15(JPEG, inst, regJPEG_CGC_GATE); data &= ~(JPEG_CGC_GATE__JPEG_DEC_MASK | JPEG_CGC_GATE__JPEG2_DEC_MASK | JPEG_CGC_GATE__JMCIF_MASK | JPEG_CGC_GATE__JRBBM_MASK); - WREG32_SOC15(JPEG, 0, regJPEG_CGC_GATE, data); + WREG32_SOC15(JPEG, inst, regJPEG_CGC_GATE, data); } -static void jpeg_v4_0_5_enable_clock_gating(struct amdgpu_device *adev) +static void jpeg_v4_0_5_enable_clock_gating(struct amdgpu_device *adev, int inst) { uint32_t data = 0; - data = RREG32_SOC15(JPEG, 0, regJPEG_CGC_CTRL); + data = RREG32_SOC15(JPEG, inst, regJPEG_CGC_CTRL); if (adev->cg_flags & AMD_CG_SUPPORT_JPEG_MGCG) { data |= 1 << JPEG_CGC_CTRL__DYN_CLOCK_MODE__SHIFT; data |= JPEG_CGC_CTRL__JPEG_DEC_MODE_MASK; @@ -265,47 +318,66 @@ static void jpeg_v4_0_5_enable_clock_gating(struct amdgpu_device *adev) data |= 1 << JPEG_CGC_CTRL__CLK_GATE_DLY_TIMER__SHIFT; data |= 4 << JPEG_CGC_CTRL__CLK_OFF_DELAY__SHIFT; - WREG32_SOC15(JPEG, 0, regJPEG_CGC_CTRL, data); + WREG32_SOC15(JPEG, inst, regJPEG_CGC_CTRL, data); - data = RREG32_SOC15(JPEG, 0, regJPEG_CGC_GATE); + data = RREG32_SOC15(JPEG, inst, regJPEG_CGC_GATE); data |= (JPEG_CGC_GATE__JPEG_DEC_MASK |JPEG_CGC_GATE__JPEG2_DEC_MASK |JPEG_CGC_GATE__JMCIF_MASK |JPEG_CGC_GATE__JRBBM_MASK); - WREG32_SOC15(JPEG, 0, regJPEG_CGC_GATE, data); + WREG32_SOC15(JPEG, inst, regJPEG_CGC_GATE, data); } -static int jpeg_v4_0_5_disable_static_power_gating(struct amdgpu_device *adev) +static void jpeg_engine_4_0_5_dpg_clock_gating_mode(struct amdgpu_device *adev, + int inst_idx, uint8_t indirect) +{ + uint32_t data = 0; + + if (adev->cg_flags & AMD_CG_SUPPORT_JPEG_MGCG) + data |= 1 << JPEG_CGC_CTRL__DYN_CLOCK_MODE__SHIFT; + else + data |= 0 << JPEG_CGC_CTRL__DYN_CLOCK_MODE__SHIFT; + + data |= 1 << JPEG_CGC_CTRL__CLK_GATE_DLY_TIMER__SHIFT; + data |= 4 << JPEG_CGC_CTRL__CLK_OFF_DELAY__SHIFT; + WREG32_SOC15_JPEG_DPG_MODE(inst_idx, regJPEG_CGC_CTRL_INTERNAL_OFFSET, data, indirect); + + data = 0; + WREG32_SOC15_JPEG_DPG_MODE(inst_idx, regJPEG_CGC_GATE_INTERNAL_OFFSET, + data, indirect); +} + +static int jpeg_v4_0_5_disable_static_power_gating(struct amdgpu_device *adev, int inst) { if (adev->pg_flags & AMD_PG_SUPPORT_JPEG) { - WREG32(SOC15_REG_OFFSET(JPEG, 0, regUVD_IPX_DLDO_CONFIG), + WREG32(SOC15_REG_OFFSET(JPEG, inst, regUVD_IPX_DLDO_CONFIG), 1 << UVD_IPX_DLDO_CONFIG__ONO1_PWR_CONFIG__SHIFT); - SOC15_WAIT_ON_RREG(JPEG, 0, regUVD_IPX_DLDO_STATUS, + SOC15_WAIT_ON_RREG(JPEG, inst, regUVD_IPX_DLDO_STATUS, 0, UVD_IPX_DLDO_STATUS__ONO1_PWR_STATUS_MASK); } /* disable anti hang mechanism */ - WREG32_P(SOC15_REG_OFFSET(JPEG, 0, regUVD_JPEG_POWER_STATUS), 0, + WREG32_P(SOC15_REG_OFFSET(JPEG, inst, regUVD_JPEG_POWER_STATUS), 0, ~UVD_JPEG_POWER_STATUS__JPEG_POWER_STATUS_MASK); /* keep the JPEG in static PG mode */ - WREG32_P(SOC15_REG_OFFSET(JPEG, 0, regUVD_JPEG_POWER_STATUS), 0, + WREG32_P(SOC15_REG_OFFSET(JPEG, inst, regUVD_JPEG_POWER_STATUS), 0, ~UVD_JPEG_POWER_STATUS__JPEG_PG_MODE_MASK); return 0; } -static int jpeg_v4_0_5_enable_static_power_gating(struct amdgpu_device *adev) +static int jpeg_v4_0_5_enable_static_power_gating(struct amdgpu_device *adev, int inst) { /* enable anti hang mechanism */ - WREG32_P(SOC15_REG_OFFSET(JPEG, 0, regUVD_JPEG_POWER_STATUS), + WREG32_P(SOC15_REG_OFFSET(JPEG, inst, regUVD_JPEG_POWER_STATUS), UVD_JPEG_POWER_STATUS__JPEG_POWER_STATUS_MASK, ~UVD_JPEG_POWER_STATUS__JPEG_POWER_STATUS_MASK); if (adev->pg_flags & AMD_PG_SUPPORT_JPEG) { - WREG32(SOC15_REG_OFFSET(JPEG, 0, regUVD_IPX_DLDO_CONFIG), + WREG32(SOC15_REG_OFFSET(JPEG, inst, regUVD_IPX_DLDO_CONFIG), 2 << UVD_IPX_DLDO_CONFIG__ONO1_PWR_CONFIG__SHIFT); - SOC15_WAIT_ON_RREG(JPEG, 0, regUVD_IPX_DLDO_STATUS, + SOC15_WAIT_ON_RREG(JPEG, inst, regUVD_IPX_DLDO_STATUS, 1 << UVD_IPX_DLDO_STATUS__ONO1_PWR_STATUS__SHIFT, UVD_IPX_DLDO_STATUS__ONO1_PWR_STATUS_MASK); } @@ -314,61 +386,153 @@ static int jpeg_v4_0_5_enable_static_power_gating(struct amdgpu_device *adev) } /** - * jpeg_v4_0_5_start - start JPEG block + * jpeg_v4_0_5_start_dpg_mode - Jpeg start with dpg mode * * @adev: amdgpu_device pointer + * @inst_idx: instance number index + * @indirect: indirectly write sram * - * Setup and start the JPEG block + * Start JPEG block with dpg mode */ -static int jpeg_v4_0_5_start(struct amdgpu_device *adev) +static void jpeg_v4_0_5_start_dpg_mode(struct amdgpu_device *adev, int inst_idx, bool indirect) { - struct amdgpu_ring *ring = adev->jpeg.inst->ring_dec; - int r; + struct amdgpu_ring *ring = adev->jpeg.inst[inst_idx].ring_dec; + uint32_t reg_data = 0; - if (adev->pm.dpm_enabled) - amdgpu_dpm_enable_jpeg(adev, true); + /* enable anti hang mechanism */ + reg_data = RREG32_SOC15(JPEG, inst_idx, regUVD_JPEG_POWER_STATUS); + reg_data &= ~UVD_JPEG_POWER_STATUS__JPEG_POWER_STATUS_MASK; + reg_data |= 0x1; + WREG32_SOC15(JPEG, inst_idx, regUVD_JPEG_POWER_STATUS, reg_data); - /* doorbell programming is done for every playback */ - adev->nbio.funcs->vcn_doorbell_range(adev, ring->use_doorbell, - (adev->doorbell_index.vcn.vcn_ring0_1 << 1), 0); + if (adev->pg_flags & AMD_PG_SUPPORT_JPEG) { + WREG32(SOC15_REG_OFFSET(JPEG, inst_idx, regUVD_IPX_DLDO_CONFIG), + 2 << UVD_IPX_DLDO_CONFIG__ONO1_PWR_CONFIG__SHIFT); + SOC15_WAIT_ON_RREG(JPEG, inst_idx, regUVD_IPX_DLDO_STATUS, + 1 << UVD_IPX_DLDO_STATUS__ONO1_PWR_STATUS__SHIFT, + UVD_IPX_DLDO_STATUS__ONO1_PWR_STATUS_MASK); + } - WREG32_SOC15(VCN, 0, regVCN_JPEG_DB_CTRL, - ring->doorbell_index << VCN_JPEG_DB_CTRL__OFFSET__SHIFT | - VCN_JPEG_DB_CTRL__EN_MASK); + reg_data = RREG32_SOC15(JPEG, inst_idx, regUVD_JPEG_POWER_STATUS); + reg_data |= UVD_JPEG_POWER_STATUS__JPEG_PG_MODE_MASK; + WREG32_SOC15(JPEG, inst_idx, regUVD_JPEG_POWER_STATUS, reg_data); - /* disable power gating */ - r = jpeg_v4_0_5_disable_static_power_gating(adev); - if (r) - return r; + if (indirect) + adev->jpeg.inst[inst_idx].dpg_sram_curr_addr = + (uint32_t *)adev->jpeg.inst[inst_idx].dpg_sram_cpu_addr; - /* JPEG disable CGC */ - jpeg_v4_0_5_disable_clock_gating(adev); + jpeg_engine_4_0_5_dpg_clock_gating_mode(adev, inst_idx, indirect); /* MJPEG global tiling registers */ - WREG32_SOC15(JPEG, 0, regJPEG_DEC_GFX10_ADDR_CONFIG, - adev->gfx.config.gb_addr_config); - + WREG32_SOC15_JPEG_DPG_MODE(inst_idx, regJPEG_DEC_GFX10_ADDR_CONFIG_INTERNAL_OFFSET, + adev->gfx.config.gb_addr_config, indirect); + /* enable System Interrupt for JRBC */ + WREG32_SOC15_JPEG_DPG_MODE(inst_idx, regJPEG_SYS_INT_EN_INTERNAL_OFFSET, + JPEG_SYS_INT_EN__DJRBC_MASK, indirect); - /* enable JMI channel */ - WREG32_P(SOC15_REG_OFFSET(JPEG, 0, regUVD_JMI_CNTL), 0, - ~UVD_JMI_CNTL__SOFT_RESET_MASK); + /* add nop to workaround PSP size check */ + WREG32_SOC15_JPEG_DPG_MODE(inst_idx, regUVD_NO_OP_INTERNAL_OFFSET, 0, indirect); - /* enable System Interrupt for JRBC */ - WREG32_P(SOC15_REG_OFFSET(JPEG, 0, regJPEG_SYS_INT_EN), - JPEG_SYS_INT_EN__DJRBC_MASK, - ~JPEG_SYS_INT_EN__DJRBC_MASK); + if (indirect) + amdgpu_jpeg_psp_update_sram(adev, inst_idx, 0); - WREG32_SOC15(JPEG, 0, regUVD_LMI_JRBC_RB_VMID, 0); - WREG32_SOC15(JPEG, 0, regUVD_JRBC_RB_CNTL, (0x00000001L | 0x00000002L)); - WREG32_SOC15(JPEG, 0, regUVD_LMI_JRBC_RB_64BIT_BAR_LOW, + WREG32_SOC15(JPEG, inst_idx, regUVD_LMI_JRBC_RB_VMID, 0); + WREG32_SOC15(JPEG, inst_idx, regUVD_JRBC_RB_CNTL, (0x00000001L | 0x00000002L)); + WREG32_SOC15(JPEG, inst_idx, regUVD_LMI_JRBC_RB_64BIT_BAR_LOW, lower_32_bits(ring->gpu_addr)); - WREG32_SOC15(JPEG, 0, regUVD_LMI_JRBC_RB_64BIT_BAR_HIGH, + WREG32_SOC15(JPEG, inst_idx, regUVD_LMI_JRBC_RB_64BIT_BAR_HIGH, upper_32_bits(ring->gpu_addr)); - WREG32_SOC15(JPEG, 0, regUVD_JRBC_RB_RPTR, 0); - WREG32_SOC15(JPEG, 0, regUVD_JRBC_RB_WPTR, 0); - WREG32_SOC15(JPEG, 0, regUVD_JRBC_RB_CNTL, 0x00000002L); - WREG32_SOC15(JPEG, 0, regUVD_JRBC_RB_SIZE, ring->ring_size / 4); - ring->wptr = RREG32_SOC15(JPEG, 0, regUVD_JRBC_RB_WPTR); + WREG32_SOC15(JPEG, inst_idx, regUVD_JRBC_RB_RPTR, 0); + WREG32_SOC15(JPEG, inst_idx, regUVD_JRBC_RB_WPTR, 0); + WREG32_SOC15(JPEG, inst_idx, regUVD_JRBC_RB_CNTL, 0x00000002L); + WREG32_SOC15(JPEG, inst_idx, regUVD_JRBC_RB_SIZE, ring->ring_size / 4); + ring->wptr = RREG32_SOC15(JPEG, inst_idx, regUVD_JRBC_RB_WPTR); +} + +/** + * jpeg_v4_0_5_stop_dpg_mode - Jpeg stop with dpg mode + * + * @adev: amdgpu_device pointer + * @inst_idx: instance number index + * + * Stop JPEG block with dpg mode + */ +static void jpeg_v4_0_5_stop_dpg_mode(struct amdgpu_device *adev, int inst_idx) +{ + uint32_t reg_data = 0; + + reg_data = RREG32_SOC15(JPEG, inst_idx, regUVD_JPEG_POWER_STATUS); + reg_data &= ~UVD_JPEG_POWER_STATUS__JPEG_PG_MODE_MASK; + WREG32_SOC15(JPEG, inst_idx, regUVD_JPEG_POWER_STATUS, reg_data); + +} + +/** + * jpeg_v4_0_5_start - start JPEG block + * + * @adev: amdgpu_device pointer + * + * Setup and start the JPEG block + */ +static int jpeg_v4_0_5_start(struct amdgpu_device *adev) +{ + struct amdgpu_ring *ring; + int r, i; + + if (adev->pm.dpm_enabled) + amdgpu_dpm_enable_jpeg(adev, true); + + for (i = 0; i < adev->jpeg.num_jpeg_inst; ++i) { + if (adev->jpeg.harvest_config & (1 << i)) + continue; + + ring = adev->jpeg.inst[i].ring_dec; + /* doorbell programming is done for every playback */ + adev->nbio.funcs->vcn_doorbell_range(adev, ring->use_doorbell, + (adev->doorbell_index.vcn.vcn_ring0_1 << 1) + 8 * i, i); + + WREG32_SOC15(VCN, i, regVCN_JPEG_DB_CTRL, + ring->doorbell_index << VCN_JPEG_DB_CTRL__OFFSET__SHIFT | + VCN_JPEG_DB_CTRL__EN_MASK); + + if (adev->pg_flags & AMD_PG_SUPPORT_JPEG_DPG) { + jpeg_v4_0_5_start_dpg_mode(adev, i, adev->jpeg.indirect_sram); + continue; + } + + /* disable power gating */ + r = jpeg_v4_0_5_disable_static_power_gating(adev, i); + if (r) + return r; + + /* JPEG disable CGC */ + jpeg_v4_0_5_disable_clock_gating(adev, i); + + /* MJPEG global tiling registers */ + WREG32_SOC15(JPEG, i, regJPEG_DEC_GFX10_ADDR_CONFIG, + adev->gfx.config.gb_addr_config); + + /* enable JMI channel */ + WREG32_P(SOC15_REG_OFFSET(JPEG, i, regUVD_JMI_CNTL), 0, + ~UVD_JMI_CNTL__SOFT_RESET_MASK); + + /* enable System Interrupt for JRBC */ + WREG32_P(SOC15_REG_OFFSET(JPEG, i, regJPEG_SYS_INT_EN), + JPEG_SYS_INT_EN__DJRBC_MASK, + ~JPEG_SYS_INT_EN__DJRBC_MASK); + + WREG32_SOC15(JPEG, i, regUVD_LMI_JRBC_RB_VMID, 0); + WREG32_SOC15(JPEG, i, regUVD_JRBC_RB_CNTL, (0x00000001L | 0x00000002L)); + WREG32_SOC15(JPEG, i, regUVD_LMI_JRBC_RB_64BIT_BAR_LOW, + lower_32_bits(ring->gpu_addr)); + WREG32_SOC15(JPEG, i, regUVD_LMI_JRBC_RB_64BIT_BAR_HIGH, + upper_32_bits(ring->gpu_addr)); + WREG32_SOC15(JPEG, i, regUVD_JRBC_RB_RPTR, 0); + WREG32_SOC15(JPEG, i, regUVD_JRBC_RB_WPTR, 0); + WREG32_SOC15(JPEG, i, regUVD_JRBC_RB_CNTL, 0x00000002L); + WREG32_SOC15(JPEG, i, regUVD_JRBC_RB_SIZE, ring->ring_size / 4); + ring->wptr = RREG32_SOC15(JPEG, i, regUVD_JRBC_RB_WPTR); + } return 0; } @@ -382,20 +546,29 @@ static int jpeg_v4_0_5_start(struct amdgpu_device *adev) */ static int jpeg_v4_0_5_stop(struct amdgpu_device *adev) { - int r; + int r, i; - /* reset JMI */ - WREG32_P(SOC15_REG_OFFSET(JPEG, 0, regUVD_JMI_CNTL), - UVD_JMI_CNTL__SOFT_RESET_MASK, - ~UVD_JMI_CNTL__SOFT_RESET_MASK); + for (i = 0; i < adev->jpeg.num_jpeg_inst; ++i) { + if (adev->jpeg.harvest_config & (1 << i)) + continue; - jpeg_v4_0_5_enable_clock_gating(adev); + if (adev->pg_flags & AMD_PG_SUPPORT_JPEG_DPG) { + jpeg_v4_0_5_stop_dpg_mode(adev, i); + continue; + } - /* enable power gating */ - r = jpeg_v4_0_5_enable_static_power_gating(adev); - if (r) - return r; + /* reset JMI */ + WREG32_P(SOC15_REG_OFFSET(JPEG, i, regUVD_JMI_CNTL), + UVD_JMI_CNTL__SOFT_RESET_MASK, + ~UVD_JMI_CNTL__SOFT_RESET_MASK); + + jpeg_v4_0_5_enable_clock_gating(adev, i); + /* enable power gating */ + r = jpeg_v4_0_5_enable_static_power_gating(adev, i); + if (r) + return r; + } if (adev->pm.dpm_enabled) amdgpu_dpm_enable_jpeg(adev, false); @@ -413,7 +586,7 @@ static uint64_t jpeg_v4_0_5_dec_ring_get_rptr(struct amdgpu_ring *ring) { struct amdgpu_device *adev = ring->adev; - return RREG32_SOC15(JPEG, 0, regUVD_JRBC_RB_RPTR); + return RREG32_SOC15(JPEG, ring->me, regUVD_JRBC_RB_RPTR); } /** @@ -430,7 +603,7 @@ static uint64_t jpeg_v4_0_5_dec_ring_get_wptr(struct amdgpu_ring *ring) if (ring->use_doorbell) return *ring->wptr_cpu_addr; else - return RREG32_SOC15(JPEG, 0, regUVD_JRBC_RB_WPTR); + return RREG32_SOC15(JPEG, ring->me, regUVD_JRBC_RB_WPTR); } /** @@ -448,29 +621,41 @@ static void jpeg_v4_0_5_dec_ring_set_wptr(struct amdgpu_ring *ring) *ring->wptr_cpu_addr = lower_32_bits(ring->wptr); WDOORBELL32(ring->doorbell_index, lower_32_bits(ring->wptr)); } else { - WREG32_SOC15(JPEG, 0, regUVD_JRBC_RB_WPTR, lower_32_bits(ring->wptr)); + WREG32_SOC15(JPEG, ring->me, regUVD_JRBC_RB_WPTR, lower_32_bits(ring->wptr)); } } static bool jpeg_v4_0_5_is_idle(void *handle) { struct amdgpu_device *adev = (struct amdgpu_device *)handle; - int ret = 1; + int i, ret = 1; - ret &= (((RREG32_SOC15(JPEG, 0, regUVD_JRBC_STATUS) & - UVD_JRBC_STATUS__RB_JOB_DONE_MASK) == - UVD_JRBC_STATUS__RB_JOB_DONE_MASK)); + for (i = 0; i < adev->jpeg.num_jpeg_inst; ++i) { + if (adev->jpeg.harvest_config & (1 << i)) + continue; + ret &= (((RREG32_SOC15(JPEG, i, regUVD_JRBC_STATUS) & + UVD_JRBC_STATUS__RB_JOB_DONE_MASK) == + UVD_JRBC_STATUS__RB_JOB_DONE_MASK)); + } return ret; } static int jpeg_v4_0_5_wait_for_idle(void *handle) { struct amdgpu_device *adev = (struct amdgpu_device *)handle; + int i; + + for (i = 0; i < adev->jpeg.num_jpeg_inst; ++i) { + if (adev->jpeg.harvest_config & (1 << i)) + continue; + + return SOC15_WAIT_ON_RREG(JPEG, i, regUVD_JRBC_STATUS, + UVD_JRBC_STATUS__RB_JOB_DONE_MASK, + UVD_JRBC_STATUS__RB_JOB_DONE_MASK); + } - return SOC15_WAIT_ON_RREG(JPEG, 0, regUVD_JRBC_STATUS, - UVD_JRBC_STATUS__RB_JOB_DONE_MASK, - UVD_JRBC_STATUS__RB_JOB_DONE_MASK); + return 0; } static int jpeg_v4_0_5_set_clockgating_state(void *handle, @@ -478,13 +663,20 @@ static int jpeg_v4_0_5_set_clockgating_state(void *handle, { struct amdgpu_device *adev = (struct amdgpu_device *)handle; bool enable = (state == AMD_CG_STATE_GATE) ? true : false; + int i; - if (enable) { - if (!jpeg_v4_0_5_is_idle(handle)) - return -EBUSY; - jpeg_v4_0_5_enable_clock_gating(adev); - } else { - jpeg_v4_0_5_disable_clock_gating(adev); + for (i = 0; i < adev->jpeg.num_jpeg_inst; ++i) { + if (adev->jpeg.harvest_config & (1 << i)) + continue; + + if (enable) { + if (!jpeg_v4_0_5_is_idle(handle)) + return -EBUSY; + + jpeg_v4_0_5_enable_clock_gating(adev, i); + } else { + jpeg_v4_0_5_disable_clock_gating(adev, i); + } } return 0; @@ -519,11 +711,25 @@ static int jpeg_v4_0_5_process_interrupt(struct amdgpu_device *adev, struct amdgpu_irq_src *source, struct amdgpu_iv_entry *entry) { + uint32_t ip_instance; + DRM_DEBUG("IH: JPEG TRAP\n"); + switch (entry->client_id) { + case SOC15_IH_CLIENTID_VCN: + ip_instance = 0; + break; + case SOC15_IH_CLIENTID_VCN1: + ip_instance = 1; + break; + default: + DRM_ERROR("Unhandled client id: %d\n", entry->client_id); + return 0; + } + switch (entry->src_id) { case VCN_4_0__SRCID__JPEG_DECODE: - amdgpu_fence_process(adev->jpeg.inst->ring_dec); + amdgpu_fence_process(adev->jpeg.inst[ip_instance].ring_dec); break; case VCN_4_0__SRCID_DJPEG0_POISON: case VCN_4_0__SRCID_EJPEG0_POISON: @@ -556,6 +762,8 @@ static const struct amd_ip_funcs jpeg_v4_0_5_ip_funcs = { .post_soft_reset = NULL, .set_clockgating_state = jpeg_v4_0_5_set_clockgating_state, .set_powergating_state = jpeg_v4_0_5_set_powergating_state, + .dump_ip_state = NULL, + .print_ip_state = NULL, }; static const struct amdgpu_ring_funcs jpeg_v4_0_5_dec_ring_vm_funcs = { @@ -589,8 +797,16 @@ static const struct amdgpu_ring_funcs jpeg_v4_0_5_dec_ring_vm_funcs = { static void jpeg_v4_0_5_set_dec_ring_funcs(struct amdgpu_device *adev) { - adev->jpeg.inst->ring_dec->funcs = &jpeg_v4_0_5_dec_ring_vm_funcs; - DRM_DEV_INFO(adev->dev, "JPEG decode is enabled in VM mode\n"); + int i; + + for (i = 0; i < adev->jpeg.num_jpeg_inst; ++i) { + if (adev->jpeg.harvest_config & (1 << i)) + continue; + + adev->jpeg.inst[i].ring_dec->funcs = &jpeg_v4_0_5_dec_ring_vm_funcs; + adev->jpeg.inst[i].ring_dec->me = i; + DRM_DEV_INFO(adev->dev, "JPEG%d decode is enabled in VM mode\n", i); + } } static const struct amdgpu_irq_src_funcs jpeg_v4_0_5_irq_funcs = { @@ -599,8 +815,15 @@ static const struct amdgpu_irq_src_funcs jpeg_v4_0_5_irq_funcs = { static void jpeg_v4_0_5_set_irq_funcs(struct amdgpu_device *adev) { - adev->jpeg.inst->irq.num_types = 1; - adev->jpeg.inst->irq.funcs = &jpeg_v4_0_5_irq_funcs; + int i; + + for (i = 0; i < adev->jpeg.num_jpeg_inst; ++i) { + if (adev->jpeg.harvest_config & (1 << i)) + continue; + + adev->jpeg.inst[i].irq.num_types = 1; + adev->jpeg.inst[i].irq.funcs = &jpeg_v4_0_5_irq_funcs; + } } const struct amdgpu_ip_block_version jpeg_v4_0_5_ip_block = { diff --git a/drivers/gpu/drm/amd/amdgpu/jpeg_v5_0_0.c b/drivers/gpu/drm/amd/amdgpu/jpeg_v5_0_0.c new file mode 100644 index 000000000000..64c856bfe0cb --- /dev/null +++ b/drivers/gpu/drm/amd/amdgpu/jpeg_v5_0_0.c @@ -0,0 +1,572 @@ +/* + * Copyright 2023 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + */ + +#include "amdgpu.h" +#include "amdgpu_jpeg.h" +#include "amdgpu_pm.h" +#include "soc15.h" +#include "soc15d.h" +#include "jpeg_v4_0_3.h" + +#include "vcn/vcn_5_0_0_offset.h" +#include "vcn/vcn_5_0_0_sh_mask.h" +#include "ivsrcid/vcn/irqsrcs_vcn_4_0.h" + +static void jpeg_v5_0_0_set_dec_ring_funcs(struct amdgpu_device *adev); +static void jpeg_v5_0_0_set_irq_funcs(struct amdgpu_device *adev); +static int jpeg_v5_0_0_set_powergating_state(void *handle, + enum amd_powergating_state state); + +/** + * jpeg_v5_0_0_early_init - set function pointers + * + * @handle: amdgpu_device pointer + * + * Set ring and irq function pointers + */ +static int jpeg_v5_0_0_early_init(void *handle) +{ + struct amdgpu_device *adev = (struct amdgpu_device *)handle; + + adev->jpeg.num_jpeg_inst = 1; + adev->jpeg.num_jpeg_rings = 1; + + jpeg_v5_0_0_set_dec_ring_funcs(adev); + jpeg_v5_0_0_set_irq_funcs(adev); + + return 0; +} + +/** + * jpeg_v5_0_0_sw_init - sw init for JPEG block + * + * @handle: amdgpu_device pointer + * + * Load firmware and sw initialization + */ +static int jpeg_v5_0_0_sw_init(void *handle) +{ + struct amdgpu_device *adev = (struct amdgpu_device *)handle; + struct amdgpu_ring *ring; + int r; + + /* JPEG TRAP */ + r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_VCN, + VCN_4_0__SRCID__JPEG_DECODE, &adev->jpeg.inst->irq); + if (r) + return r; + + r = amdgpu_jpeg_sw_init(adev); + if (r) + return r; + + r = amdgpu_jpeg_resume(adev); + if (r) + return r; + + ring = adev->jpeg.inst->ring_dec; + ring->use_doorbell = true; + ring->doorbell_index = (adev->doorbell_index.vcn.vcn_ring0_1 << 1) + 1; + ring->vm_hub = AMDGPU_MMHUB0(0); + + sprintf(ring->name, "jpeg_dec"); + r = amdgpu_ring_init(adev, ring, 512, &adev->jpeg.inst->irq, 0, + AMDGPU_RING_PRIO_DEFAULT, NULL); + if (r) + return r; + + adev->jpeg.internal.jpeg_pitch[0] = regUVD_JPEG_PITCH_INTERNAL_OFFSET; + adev->jpeg.inst->external.jpeg_pitch[0] = SOC15_REG_OFFSET(JPEG, 0, regUVD_JPEG_PITCH); + + return 0; +} + +/** + * jpeg_v5_0_0_sw_fini - sw fini for JPEG block + * + * @handle: amdgpu_device pointer + * + * JPEG suspend and free up sw allocation + */ +static int jpeg_v5_0_0_sw_fini(void *handle) +{ + struct amdgpu_device *adev = (struct amdgpu_device *)handle; + int r; + + r = amdgpu_jpeg_suspend(adev); + if (r) + return r; + + r = amdgpu_jpeg_sw_fini(adev); + + return r; +} + +/** + * jpeg_v5_0_0_hw_init - start and test JPEG block + * + * @handle: amdgpu_device pointer + * + */ +static int jpeg_v5_0_0_hw_init(void *handle) +{ + struct amdgpu_device *adev = (struct amdgpu_device *)handle; + struct amdgpu_ring *ring = adev->jpeg.inst->ring_dec; + int r; + + adev->nbio.funcs->vcn_doorbell_range(adev, ring->use_doorbell, + (adev->doorbell_index.vcn.vcn_ring0_1 << 1), 0); + + WREG32_SOC15(VCN, 0, regVCN_JPEG_DB_CTRL, + ring->doorbell_index << VCN_JPEG_DB_CTRL__OFFSET__SHIFT | + VCN_JPEG_DB_CTRL__EN_MASK); + + r = amdgpu_ring_test_helper(ring); + if (r) + return r; + + DRM_DEV_INFO(adev->dev, "JPEG decode initialized successfully.\n"); + + return 0; +} + +/** + * jpeg_v5_0_0_hw_fini - stop the hardware block + * + * @handle: amdgpu_device pointer + * + * Stop the JPEG block, mark ring as not ready any more + */ +static int jpeg_v5_0_0_hw_fini(void *handle) +{ + struct amdgpu_device *adev = (struct amdgpu_device *)handle; + + cancel_delayed_work_sync(&adev->vcn.idle_work); + + if (adev->jpeg.cur_state != AMD_PG_STATE_GATE && + RREG32_SOC15(JPEG, 0, regUVD_JRBC_STATUS)) + jpeg_v5_0_0_set_powergating_state(adev, AMD_PG_STATE_GATE); + + return 0; +} + +/** + * jpeg_v5_0_0_suspend - suspend JPEG block + * + * @handle: amdgpu_device pointer + * + * HW fini and suspend JPEG block + */ +static int jpeg_v5_0_0_suspend(void *handle) +{ + struct amdgpu_device *adev = (struct amdgpu_device *)handle; + int r; + + r = jpeg_v5_0_0_hw_fini(adev); + if (r) + return r; + + r = amdgpu_jpeg_suspend(adev); + + return r; +} + +/** + * jpeg_v5_0_0_resume - resume JPEG block + * + * @handle: amdgpu_device pointer + * + * Resume firmware and hw init JPEG block + */ +static int jpeg_v5_0_0_resume(void *handle) +{ + struct amdgpu_device *adev = (struct amdgpu_device *)handle; + int r; + + r = amdgpu_jpeg_resume(adev); + if (r) + return r; + + r = jpeg_v5_0_0_hw_init(adev); + + return r; +} + +static void jpeg_v5_0_0_disable_clock_gating(struct amdgpu_device *adev) +{ + uint32_t data = 0; + + WREG32_SOC15(JPEG, 0, regJPEG_CGC_GATE, data); + + data = RREG32_SOC15(JPEG, 0, regJPEG_CGC_CTRL); + data &= ~(JPEG_CGC_CTRL__JPEG0_DEC_MODE_MASK + | JPEG_CGC_CTRL__JPEG_ENC_MODE_MASK); + WREG32_SOC15(JPEG, 0, regJPEG_CGC_CTRL, data); +} + +static void jpeg_v5_0_0_enable_clock_gating(struct amdgpu_device *adev) +{ + uint32_t data = 0; + + data = RREG32_SOC15(JPEG, 0, regJPEG_CGC_CTRL); + + data |= 1 << JPEG_CGC_CTRL__JPEG0_DEC_MODE__SHIFT; + WREG32_SOC15(JPEG, 0, regJPEG_CGC_CTRL, data); + + data = RREG32_SOC15(JPEG, 0, regJPEG_CGC_GATE); + data |= (JPEG_CGC_GATE__JPEG0_DEC_MASK + |JPEG_CGC_GATE__JPEG_ENC_MASK + |JPEG_CGC_GATE__JMCIF_MASK + |JPEG_CGC_GATE__JRBBM_MASK); + WREG32_SOC15(JPEG, 0, regJPEG_CGC_GATE, data); +} + +static int jpeg_v5_0_0_disable_static_power_gating(struct amdgpu_device *adev) +{ + uint32_t data = 0; + + data = 1 << UVD_IPX_DLDO_CONFIG__ONO1_PWR_CONFIG__SHIFT; + WREG32_SOC15(JPEG, 0, regUVD_IPX_DLDO_CONFIG, data); + SOC15_WAIT_ON_RREG(JPEG, 0, regUVD_IPX_DLDO_STATUS, 0, + UVD_IPX_DLDO_STATUS__ONO1_PWR_STATUS_MASK); + + /* disable anti hang mechanism */ + WREG32_P(SOC15_REG_OFFSET(JPEG, 0, regUVD_JPEG_POWER_STATUS), 0, + ~UVD_JPEG_POWER_STATUS__JPEG_POWER_STATUS_MASK); + + /* keep the JPEG in static PG mode */ + WREG32_P(SOC15_REG_OFFSET(JPEG, 0, regUVD_JPEG_POWER_STATUS), 0, + ~UVD_JPEG_POWER_STATUS__JPEG_PG_MODE_MASK); + + return 0; +} + +static int jpeg_v5_0_0_enable_static_power_gating(struct amdgpu_device *adev) +{ + /* enable anti hang mechanism */ + WREG32_P(SOC15_REG_OFFSET(JPEG, 0, regUVD_JPEG_POWER_STATUS), + UVD_JPEG_POWER_STATUS__JPEG_POWER_STATUS_MASK, + ~UVD_JPEG_POWER_STATUS__JPEG_POWER_STATUS_MASK); + + if (adev->pg_flags & AMD_PG_SUPPORT_JPEG) { + WREG32(SOC15_REG_OFFSET(JPEG, 0, regUVD_IPX_DLDO_CONFIG), + 2 << UVD_IPX_DLDO_CONFIG__ONO1_PWR_CONFIG__SHIFT); + SOC15_WAIT_ON_RREG(JPEG, 0, regUVD_IPX_DLDO_STATUS, + 1 << UVD_IPX_DLDO_STATUS__ONO1_PWR_STATUS__SHIFT, + UVD_IPX_DLDO_STATUS__ONO1_PWR_STATUS_MASK); + } + + return 0; +} + +/** + * jpeg_v5_0_0_start - start JPEG block + * + * @adev: amdgpu_device pointer + * + * Setup and start the JPEG block + */ +static int jpeg_v5_0_0_start(struct amdgpu_device *adev) +{ + struct amdgpu_ring *ring = adev->jpeg.inst->ring_dec; + int r; + + if (adev->pm.dpm_enabled) + amdgpu_dpm_enable_jpeg(adev, true); + + /* disable power gating */ + r = jpeg_v5_0_0_disable_static_power_gating(adev); + if (r) + return r; + + /* JPEG disable CGC */ + jpeg_v5_0_0_disable_clock_gating(adev); + + /* MJPEG global tiling registers */ + WREG32_SOC15(JPEG, 0, regJPEG_DEC_GFX10_ADDR_CONFIG, + adev->gfx.config.gb_addr_config); + + + /* enable JMI channel */ + WREG32_P(SOC15_REG_OFFSET(JPEG, 0, regUVD_JMI_CNTL), 0, + ~UVD_JMI_CNTL__SOFT_RESET_MASK); + + /* enable System Interrupt for JRBC */ + WREG32_P(SOC15_REG_OFFSET(JPEG, 0, regJPEG_SYS_INT_EN), + JPEG_SYS_INT_EN__DJRBC0_MASK, + ~JPEG_SYS_INT_EN__DJRBC0_MASK); + + WREG32_SOC15(JPEG, 0, regUVD_LMI_JRBC_RB_VMID, 0); + WREG32_SOC15(JPEG, 0, regUVD_JRBC_RB_CNTL, (0x00000001L | 0x00000002L)); + WREG32_SOC15(JPEG, 0, regUVD_LMI_JRBC_RB_64BIT_BAR_LOW, + lower_32_bits(ring->gpu_addr)); + WREG32_SOC15(JPEG, 0, regUVD_LMI_JRBC_RB_64BIT_BAR_HIGH, + upper_32_bits(ring->gpu_addr)); + WREG32_SOC15(JPEG, 0, regUVD_JRBC_RB_RPTR, 0); + WREG32_SOC15(JPEG, 0, regUVD_JRBC_RB_WPTR, 0); + WREG32_SOC15(JPEG, 0, regUVD_JRBC_RB_CNTL, 0x00000002L); + WREG32_SOC15(JPEG, 0, regUVD_JRBC_RB_SIZE, ring->ring_size / 4); + ring->wptr = RREG32_SOC15(JPEG, 0, regUVD_JRBC_RB_WPTR); + + return 0; +} + +/** + * jpeg_v5_0_0_stop - stop JPEG block + * + * @adev: amdgpu_device pointer + * + * stop the JPEG block + */ +static int jpeg_v5_0_0_stop(struct amdgpu_device *adev) +{ + int r; + + /* reset JMI */ + WREG32_P(SOC15_REG_OFFSET(JPEG, 0, regUVD_JMI_CNTL), + UVD_JMI_CNTL__SOFT_RESET_MASK, + ~UVD_JMI_CNTL__SOFT_RESET_MASK); + + jpeg_v5_0_0_enable_clock_gating(adev); + + /* enable power gating */ + r = jpeg_v5_0_0_enable_static_power_gating(adev); + if (r) + return r; + + if (adev->pm.dpm_enabled) + amdgpu_dpm_enable_jpeg(adev, false); + + return 0; +} + +/** + * jpeg_v5_0_0_dec_ring_get_rptr - get read pointer + * + * @ring: amdgpu_ring pointer + * + * Returns the current hardware read pointer + */ +static uint64_t jpeg_v5_0_0_dec_ring_get_rptr(struct amdgpu_ring *ring) +{ + struct amdgpu_device *adev = ring->adev; + + return RREG32_SOC15(JPEG, 0, regUVD_JRBC_RB_RPTR); +} + +/** + * jpeg_v5_0_0_dec_ring_get_wptr - get write pointer + * + * @ring: amdgpu_ring pointer + * + * Returns the current hardware write pointer + */ +static uint64_t jpeg_v5_0_0_dec_ring_get_wptr(struct amdgpu_ring *ring) +{ + struct amdgpu_device *adev = ring->adev; + + if (ring->use_doorbell) + return *ring->wptr_cpu_addr; + else + return RREG32_SOC15(JPEG, 0, regUVD_JRBC_RB_WPTR); +} + +/** + * jpeg_v5_0_0_dec_ring_set_wptr - set write pointer + * + * @ring: amdgpu_ring pointer + * + * Commits the write pointer to the hardware + */ +static void jpeg_v5_0_0_dec_ring_set_wptr(struct amdgpu_ring *ring) +{ + struct amdgpu_device *adev = ring->adev; + + if (ring->use_doorbell) { + *ring->wptr_cpu_addr = lower_32_bits(ring->wptr); + WDOORBELL32(ring->doorbell_index, lower_32_bits(ring->wptr)); + } else { + WREG32_SOC15(JPEG, 0, regUVD_JRBC_RB_WPTR, lower_32_bits(ring->wptr)); + } +} + +static bool jpeg_v5_0_0_is_idle(void *handle) +{ + struct amdgpu_device *adev = (struct amdgpu_device *)handle; + int ret = 1; + + ret &= (((RREG32_SOC15(JPEG, 0, regUVD_JRBC_STATUS) & + UVD_JRBC_STATUS__RB_JOB_DONE_MASK) == + UVD_JRBC_STATUS__RB_JOB_DONE_MASK)); + + return ret; +} + +static int jpeg_v5_0_0_wait_for_idle(void *handle) +{ + struct amdgpu_device *adev = (struct amdgpu_device *)handle; + + return SOC15_WAIT_ON_RREG(JPEG, 0, regUVD_JRBC_STATUS, + UVD_JRBC_STATUS__RB_JOB_DONE_MASK, + UVD_JRBC_STATUS__RB_JOB_DONE_MASK); +} + +static int jpeg_v5_0_0_set_clockgating_state(void *handle, + enum amd_clockgating_state state) +{ + struct amdgpu_device *adev = (struct amdgpu_device *)handle; + bool enable = (state == AMD_CG_STATE_GATE) ? true : false; + + if (enable) { + if (!jpeg_v5_0_0_is_idle(handle)) + return -EBUSY; + jpeg_v5_0_0_enable_clock_gating(adev); + } else { + jpeg_v5_0_0_disable_clock_gating(adev); + } + + return 0; +} + +static int jpeg_v5_0_0_set_powergating_state(void *handle, + enum amd_powergating_state state) +{ + struct amdgpu_device *adev = (struct amdgpu_device *)handle; + int ret; + + if (state == adev->jpeg.cur_state) + return 0; + + if (state == AMD_PG_STATE_GATE) + ret = jpeg_v5_0_0_stop(adev); + else + ret = jpeg_v5_0_0_start(adev); + + if (!ret) + adev->jpeg.cur_state = state; + + return ret; +} + +static int jpeg_v5_0_0_set_interrupt_state(struct amdgpu_device *adev, + struct amdgpu_irq_src *source, + unsigned int type, + enum amdgpu_interrupt_state state) +{ + return 0; +} + +static int jpeg_v5_0_0_process_interrupt(struct amdgpu_device *adev, + struct amdgpu_irq_src *source, + struct amdgpu_iv_entry *entry) +{ + DRM_DEBUG("IH: JPEG TRAP\n"); + + switch (entry->src_id) { + case VCN_4_0__SRCID__JPEG_DECODE: + amdgpu_fence_process(adev->jpeg.inst->ring_dec); + break; + default: + DRM_DEV_ERROR(adev->dev, "Unhandled interrupt: %d %d\n", + entry->src_id, entry->src_data[0]); + break; + } + + return 0; +} + +static const struct amd_ip_funcs jpeg_v5_0_0_ip_funcs = { + .name = "jpeg_v5_0_0", + .early_init = jpeg_v5_0_0_early_init, + .late_init = NULL, + .sw_init = jpeg_v5_0_0_sw_init, + .sw_fini = jpeg_v5_0_0_sw_fini, + .hw_init = jpeg_v5_0_0_hw_init, + .hw_fini = jpeg_v5_0_0_hw_fini, + .suspend = jpeg_v5_0_0_suspend, + .resume = jpeg_v5_0_0_resume, + .is_idle = jpeg_v5_0_0_is_idle, + .wait_for_idle = jpeg_v5_0_0_wait_for_idle, + .check_soft_reset = NULL, + .pre_soft_reset = NULL, + .soft_reset = NULL, + .post_soft_reset = NULL, + .set_clockgating_state = jpeg_v5_0_0_set_clockgating_state, + .set_powergating_state = jpeg_v5_0_0_set_powergating_state, + .dump_ip_state = NULL, + .print_ip_state = NULL, +}; + +static const struct amdgpu_ring_funcs jpeg_v5_0_0_dec_ring_vm_funcs = { + .type = AMDGPU_RING_TYPE_VCN_JPEG, + .align_mask = 0xf, + .get_rptr = jpeg_v5_0_0_dec_ring_get_rptr, + .get_wptr = jpeg_v5_0_0_dec_ring_get_wptr, + .set_wptr = jpeg_v5_0_0_dec_ring_set_wptr, + .emit_frame_size = + SOC15_FLUSH_GPU_TLB_NUM_WREG * 6 + + SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 8 + + 8 + /* jpeg_v5_0_0_dec_ring_emit_vm_flush */ + 22 + 22 + /* jpeg_v5_0_0_dec_ring_emit_fence x2 vm fence */ + 8 + 16, + .emit_ib_size = 22, /* jpeg_v5_0_0_dec_ring_emit_ib */ + .emit_ib = jpeg_v4_0_3_dec_ring_emit_ib, + .emit_fence = jpeg_v4_0_3_dec_ring_emit_fence, + .emit_vm_flush = jpeg_v4_0_3_dec_ring_emit_vm_flush, + .test_ring = amdgpu_jpeg_dec_ring_test_ring, + .test_ib = amdgpu_jpeg_dec_ring_test_ib, + .insert_nop = jpeg_v4_0_3_dec_ring_nop, + .insert_start = jpeg_v4_0_3_dec_ring_insert_start, + .insert_end = jpeg_v4_0_3_dec_ring_insert_end, + .pad_ib = amdgpu_ring_generic_pad_ib, + .begin_use = amdgpu_jpeg_ring_begin_use, + .end_use = amdgpu_jpeg_ring_end_use, + .emit_wreg = jpeg_v4_0_3_dec_ring_emit_wreg, + .emit_reg_wait = jpeg_v4_0_3_dec_ring_emit_reg_wait, + .emit_reg_write_reg_wait = amdgpu_ring_emit_reg_write_reg_wait_helper, +}; + +static void jpeg_v5_0_0_set_dec_ring_funcs(struct amdgpu_device *adev) +{ + adev->jpeg.inst->ring_dec->funcs = &jpeg_v5_0_0_dec_ring_vm_funcs; + DRM_DEV_INFO(adev->dev, "JPEG decode is enabled in VM mode\n"); +} + +static const struct amdgpu_irq_src_funcs jpeg_v5_0_0_irq_funcs = { + .set = jpeg_v5_0_0_set_interrupt_state, + .process = jpeg_v5_0_0_process_interrupt, +}; + +static void jpeg_v5_0_0_set_irq_funcs(struct amdgpu_device *adev) +{ + adev->jpeg.inst->irq.num_types = 1; + adev->jpeg.inst->irq.funcs = &jpeg_v5_0_0_irq_funcs; +} + +const struct amdgpu_ip_block_version jpeg_v5_0_0_ip_block = { + .type = AMD_IP_BLOCK_TYPE_JPEG, + .major = 5, + .minor = 0, + .rev = 0, + .funcs = &jpeg_v5_0_0_ip_funcs, +}; diff --git a/drivers/gpu/drm/amd/amdgpu/jpeg_v5_0_0.h b/drivers/gpu/drm/amd/amdgpu/jpeg_v5_0_0.h new file mode 100644 index 000000000000..bd348336b215 --- /dev/null +++ b/drivers/gpu/drm/amd/amdgpu/jpeg_v5_0_0.h @@ -0,0 +1,29 @@ +/* + * Copyright 2023 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + */ + +#ifndef __JPEG_V5_0_0_H__ +#define __JPEG_V5_0_0_H__ + +extern const struct amdgpu_ip_block_version jpeg_v5_0_0_ip_block; + +#endif /* __JPEG_V5_0_0_H__ */ diff --git a/drivers/gpu/drm/amd/amdgpu/lsdma_v7_0.c b/drivers/gpu/drm/amd/amdgpu/lsdma_v7_0.c new file mode 100644 index 000000000000..396262044ea8 --- /dev/null +++ b/drivers/gpu/drm/amd/amdgpu/lsdma_v7_0.c @@ -0,0 +1,121 @@ +/* + * Copyright 2023 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + */ + +#include <linux/delay.h> +#include "amdgpu.h" +#include "lsdma_v7_0.h" +#include "amdgpu_lsdma.h" + +#include "lsdma/lsdma_7_0_0_offset.h" +#include "lsdma/lsdma_7_0_0_sh_mask.h" + +static int lsdma_v7_0_wait_pio_status(struct amdgpu_device *adev) +{ + return amdgpu_lsdma_wait_for(adev, SOC15_REG_OFFSET(LSDMA, 0, regLSDMA_PIO_STATUS), + LSDMA_PIO_STATUS__PIO_IDLE_MASK | LSDMA_PIO_STATUS__PIO_FIFO_EMPTY_MASK, + LSDMA_PIO_STATUS__PIO_IDLE_MASK | LSDMA_PIO_STATUS__PIO_FIFO_EMPTY_MASK); +} + +static int lsdma_v7_0_copy_mem(struct amdgpu_device *adev, + uint64_t src_addr, + uint64_t dst_addr, + uint64_t size) +{ + int ret; + uint32_t tmp; + + WREG32_SOC15(LSDMA, 0, regLSDMA_PIO_SRC_ADDR_LO, lower_32_bits(src_addr)); + WREG32_SOC15(LSDMA, 0, regLSDMA_PIO_SRC_ADDR_HI, upper_32_bits(src_addr)); + + WREG32_SOC15(LSDMA, 0, regLSDMA_PIO_DST_ADDR_LO, lower_32_bits(dst_addr)); + WREG32_SOC15(LSDMA, 0, regLSDMA_PIO_DST_ADDR_HI, upper_32_bits(dst_addr)); + + WREG32_SOC15(LSDMA, 0, regLSDMA_PIO_CONTROL, 0x0); + + tmp = RREG32_SOC15(LSDMA, 0, regLSDMA_PIO_COMMAND); + tmp = REG_SET_FIELD(tmp, LSDMA_PIO_COMMAND, BYTE_COUNT, size); + tmp = REG_SET_FIELD(tmp, LSDMA_PIO_COMMAND, SRC_LOCATION, 0); + tmp = REG_SET_FIELD(tmp, LSDMA_PIO_COMMAND, DST_LOCATION, 0); + tmp = REG_SET_FIELD(tmp, LSDMA_PIO_COMMAND, SRC_ADDR_INC, 0); + tmp = REG_SET_FIELD(tmp, LSDMA_PIO_COMMAND, DST_ADDR_INC, 0); + tmp = REG_SET_FIELD(tmp, LSDMA_PIO_COMMAND, OVERLAP_DISABLE, 0); + tmp = REG_SET_FIELD(tmp, LSDMA_PIO_COMMAND, CONSTANT_FILL, 0); + WREG32_SOC15(LSDMA, 0, regLSDMA_PIO_COMMAND, tmp); + + ret = lsdma_v7_0_wait_pio_status(adev); + if (ret) + dev_err(adev->dev, "LSDMA PIO failed to copy memory!\n"); + + return ret; +} + +static int lsdma_v7_0_fill_mem(struct amdgpu_device *adev, + uint64_t dst_addr, + uint32_t data, + uint64_t size) +{ + int ret; + uint32_t tmp; + + WREG32_SOC15(LSDMA, 0, regLSDMA_PIO_CONSTFILL_DATA, data); + + WREG32_SOC15(LSDMA, 0, regLSDMA_PIO_DST_ADDR_LO, lower_32_bits(dst_addr)); + WREG32_SOC15(LSDMA, 0, regLSDMA_PIO_DST_ADDR_HI, upper_32_bits(dst_addr)); + + WREG32_SOC15(LSDMA, 0, regLSDMA_PIO_CONTROL, 0x0); + + tmp = RREG32_SOC15(LSDMA, 0, regLSDMA_PIO_COMMAND); + tmp = REG_SET_FIELD(tmp, LSDMA_PIO_COMMAND, BYTE_COUNT, size); + tmp = REG_SET_FIELD(tmp, LSDMA_PIO_COMMAND, SRC_LOCATION, 0); + tmp = REG_SET_FIELD(tmp, LSDMA_PIO_COMMAND, DST_LOCATION, 0); + tmp = REG_SET_FIELD(tmp, LSDMA_PIO_COMMAND, SRC_ADDR_INC, 0); + tmp = REG_SET_FIELD(tmp, LSDMA_PIO_COMMAND, DST_ADDR_INC, 0); + tmp = REG_SET_FIELD(tmp, LSDMA_PIO_COMMAND, OVERLAP_DISABLE, 0); + tmp = REG_SET_FIELD(tmp, LSDMA_PIO_COMMAND, CONSTANT_FILL, 1); + WREG32_SOC15(LSDMA, 0, regLSDMA_PIO_COMMAND, tmp); + + ret = lsdma_v7_0_wait_pio_status(adev); + if (ret) + dev_err(adev->dev, "LSDMA PIO failed to fill memory!\n"); + + return ret; +} + +static void lsdma_v7_0_update_memory_power_gating(struct amdgpu_device *adev, + bool enable) +{ + uint32_t tmp; + + tmp = RREG32_SOC15(LSDMA, 0, regLSDMA_MEM_POWER_CTRL); + tmp = REG_SET_FIELD(tmp, LSDMA_MEM_POWER_CTRL, MEM_POWER_CTRL_EN, 0); + WREG32_SOC15(LSDMA, 0, regLSDMA_MEM_POWER_CTRL, tmp); + + tmp = REG_SET_FIELD(tmp, LSDMA_MEM_POWER_CTRL, MEM_POWER_CTRL_EN, enable); + WREG32_SOC15(LSDMA, 0, regLSDMA_MEM_POWER_CTRL, tmp); +} + +const struct amdgpu_lsdma_funcs lsdma_v7_0_funcs = { + .copy_mem = lsdma_v7_0_copy_mem, + .fill_mem = lsdma_v7_0_fill_mem, + .update_memory_power_gating = lsdma_v7_0_update_memory_power_gating +}; diff --git a/drivers/gpu/drm/amd/amdgpu/lsdma_v7_0.h b/drivers/gpu/drm/amd/amdgpu/lsdma_v7_0.h new file mode 100644 index 000000000000..52b4485cdd98 --- /dev/null +++ b/drivers/gpu/drm/amd/amdgpu/lsdma_v7_0.h @@ -0,0 +1,31 @@ +/* + * Copyright 2023 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + */ + +#ifndef __LSDMA_V7_0_H__ +#define __LSDMA_V7_0_H__ + +#include "soc15_common.h" + +extern const struct amdgpu_lsdma_funcs lsdma_v7_0_funcs; + +#endif /* __LSDMA_V7_0_H__ */ diff --git a/drivers/gpu/drm/amd/amdgpu/mes_v10_1.c b/drivers/gpu/drm/amd/amdgpu/mes_v10_1.c index 1e5ad1e08d2a..a626bf904926 100644 --- a/drivers/gpu/drm/amd/amdgpu/mes_v10_1.c +++ b/drivers/gpu/drm/amd/amdgpu/mes_v10_1.c @@ -1176,6 +1176,8 @@ static const struct amd_ip_funcs mes_v10_1_ip_funcs = { .hw_fini = mes_v10_1_hw_fini, .suspend = mes_v10_1_suspend, .resume = mes_v10_1_resume, + .dump_ip_state = NULL, + .print_ip_state = NULL, }; const struct amdgpu_ip_block_version mes_v10_1_ip_block = { diff --git a/drivers/gpu/drm/amd/amdgpu/mes_v11_0.c b/drivers/gpu/drm/amd/amdgpu/mes_v11_0.c index 26d71a22395d..0d1407f25005 100644 --- a/drivers/gpu/drm/amd/amdgpu/mes_v11_0.c +++ b/drivers/gpu/drm/amd/amdgpu/mes_v11_0.c @@ -49,6 +49,8 @@ MODULE_FIRMWARE("amdgpu/gc_11_0_4_mes_2.bin"); MODULE_FIRMWARE("amdgpu/gc_11_0_4_mes1.bin"); MODULE_FIRMWARE("amdgpu/gc_11_5_0_mes_2.bin"); MODULE_FIRMWARE("amdgpu/gc_11_5_0_mes1.bin"); +MODULE_FIRMWARE("amdgpu/gc_11_5_1_mes_2.bin"); +MODULE_FIRMWARE("amdgpu/gc_11_5_1_mes1.bin"); static int mes_v11_0_hw_fini(void *handle); @@ -56,6 +58,7 @@ static int mes_v11_0_kiq_hw_init(struct amdgpu_device *adev); static int mes_v11_0_kiq_hw_fini(struct amdgpu_device *adev); #define MES_EOP_SIZE 2048 +#define GFX_MES_DRAM_SIZE 0x80000 static void mes_v11_0_ring_set_wptr(struct amdgpu_ring *ring) { @@ -97,18 +100,76 @@ static const struct amdgpu_ring_funcs mes_v11_0_ring_funcs = { .insert_nop = amdgpu_ring_insert_nop, }; +static const char *mes_v11_0_opcodes[] = { + "SET_HW_RSRC", + "SET_SCHEDULING_CONFIG", + "ADD_QUEUE", + "REMOVE_QUEUE", + "PERFORM_YIELD", + "SET_GANG_PRIORITY_LEVEL", + "SUSPEND", + "RESUME", + "RESET", + "SET_LOG_BUFFER", + "CHANGE_GANG_PRORITY", + "QUERY_SCHEDULER_STATUS", + "PROGRAM_GDS", + "SET_DEBUG_VMID", + "MISC", + "UPDATE_ROOT_PAGE_TABLE", + "AMD_LOG", +}; + +static const char *mes_v11_0_misc_opcodes[] = { + "WRITE_REG", + "INV_GART", + "QUERY_STATUS", + "READ_REG", + "WAIT_REG_MEM", + "SET_SHADER_DEBUGGER", +}; + +static const char *mes_v11_0_get_op_string(union MESAPI__MISC *x_pkt) +{ + const char *op_str = NULL; + + if (x_pkt->header.opcode < ARRAY_SIZE(mes_v11_0_opcodes)) + op_str = mes_v11_0_opcodes[x_pkt->header.opcode]; + + return op_str; +} + +static const char *mes_v11_0_get_misc_op_string(union MESAPI__MISC *x_pkt) +{ + const char *op_str = NULL; + + if ((x_pkt->header.opcode == MES_SCH_API_MISC) && + (x_pkt->opcode < ARRAY_SIZE(mes_v11_0_misc_opcodes))) + op_str = mes_v11_0_misc_opcodes[x_pkt->opcode]; + + return op_str; +} + static int mes_v11_0_submit_pkt_and_poll_completion(struct amdgpu_mes *mes, void *pkt, int size, int api_status_off) { int ndw = size / 4; signed long r; - union MESAPI__ADD_QUEUE *x_pkt = pkt; + union MESAPI__MISC *x_pkt = pkt; struct MES_API_STATUS *api_status; struct amdgpu_device *adev = mes->adev; struct amdgpu_ring *ring = &mes->ring; unsigned long flags; - signed long timeout = adev->usec_timeout; + signed long timeout = 3000000; /* 3000 ms */ + const char *op_str, *misc_op_str; + u32 fence_offset; + u64 fence_gpu_addr; + u64 *fence_ptr; + int ret; + + if (x_pkt->header.opcode >= MES_SCH_API_MAX) + return -EINVAL; if (amdgpu_emu_mode) { timeout *= 100; @@ -118,27 +179,52 @@ static int mes_v11_0_submit_pkt_and_poll_completion(struct amdgpu_mes *mes, } BUG_ON(size % 4 != 0); + ret = amdgpu_device_wb_get(adev, &fence_offset); + if (ret) + return ret; + fence_gpu_addr = + adev->wb.gpu_addr + (fence_offset * 4); + fence_ptr = (u64 *)&adev->wb.wb[fence_offset]; + *fence_ptr = 0; + spin_lock_irqsave(&mes->ring_lock, flags); if (amdgpu_ring_alloc(ring, ndw)) { spin_unlock_irqrestore(&mes->ring_lock, flags); + amdgpu_device_wb_free(adev, fence_offset); return -ENOMEM; } api_status = (struct MES_API_STATUS *)((char *)pkt + api_status_off); - api_status->api_completion_fence_addr = mes->ring.fence_drv.gpu_addr; - api_status->api_completion_fence_value = ++mes->ring.fence_drv.sync_seq; + api_status->api_completion_fence_addr = fence_gpu_addr; + api_status->api_completion_fence_value = 1; amdgpu_ring_write_multiple(ring, pkt, ndw); amdgpu_ring_commit(ring); spin_unlock_irqrestore(&mes->ring_lock, flags); - DRM_DEBUG("MES msg=%d was emitted\n", x_pkt->header.opcode); + op_str = mes_v11_0_get_op_string(x_pkt); + misc_op_str = mes_v11_0_get_misc_op_string(x_pkt); - r = amdgpu_fence_wait_polling(ring, ring->fence_drv.sync_seq, - timeout); + if (misc_op_str) + dev_dbg(adev->dev, "MES msg=%s (%s) was emitted\n", op_str, misc_op_str); + else if (op_str) + dev_dbg(adev->dev, "MES msg=%s was emitted\n", op_str); + else + dev_dbg(adev->dev, "MES msg=%d was emitted\n", x_pkt->header.opcode); + + r = amdgpu_mes_fence_wait_polling(fence_ptr, (u64)1, timeout); + amdgpu_device_wb_free(adev, fence_offset); if (r < 1) { - DRM_ERROR("MES failed to response msg=%d\n", - x_pkt->header.opcode); + + if (misc_op_str) + dev_err(adev->dev, "MES failed to respond to msg=%s (%s)\n", + op_str, misc_op_str); + else if (op_str) + dev_err(adev->dev, "MES failed to respond to msg=%s\n", + op_str); + else + dev_err(adev->dev, "MES failed to respond to msg=%d\n", + x_pkt->header.opcode); while (halt_if_hws_hang) schedule(); @@ -408,14 +494,47 @@ static int mes_v11_0_set_hw_resources(struct amdgpu_mes *mes) mes_set_hw_res_pkt.enable_reg_active_poll = 1; mes_set_hw_res_pkt.enable_level_process_quantum_check = 1; mes_set_hw_res_pkt.oversubscription_timer = 50; - mes_set_hw_res_pkt.enable_mes_event_int_logging = 1; - mes_set_hw_res_pkt.event_intr_history_gpu_mc_ptr = mes->event_log_gpu_addr; + if (amdgpu_mes_log_enable) { + mes_set_hw_res_pkt.enable_mes_event_int_logging = 1; + mes_set_hw_res_pkt.event_intr_history_gpu_mc_ptr = + mes->event_log_gpu_addr; + } return mes_v11_0_submit_pkt_and_poll_completion(mes, &mes_set_hw_res_pkt, sizeof(mes_set_hw_res_pkt), offsetof(union MESAPI_SET_HW_RESOURCES, api_status)); } +static int mes_v11_0_set_hw_resources_1(struct amdgpu_mes *mes) +{ + int size = 128 * PAGE_SIZE; + int ret = 0; + struct amdgpu_device *adev = mes->adev; + union MESAPI_SET_HW_RESOURCES_1 mes_set_hw_res_pkt; + memset(&mes_set_hw_res_pkt, 0, sizeof(mes_set_hw_res_pkt)); + + mes_set_hw_res_pkt.header.type = MES_API_TYPE_SCHEDULER; + mes_set_hw_res_pkt.header.opcode = MES_SCH_API_SET_HW_RSRC_1; + mes_set_hw_res_pkt.header.dwsize = API_FRAME_SIZE_IN_DWORDS; + mes_set_hw_res_pkt.enable_mes_info_ctx = 1; + + ret = amdgpu_bo_create_kernel(adev, size, PAGE_SIZE, + AMDGPU_GEM_DOMAIN_VRAM, + &mes->resource_1, + &mes->resource_1_gpu_addr, + &mes->resource_1_addr); + if (ret) { + dev_err(adev->dev, "(%d) failed to create mes resource_1 bo\n", ret); + return ret; + } + + mes_set_hw_res_pkt.mes_info_ctx_mc_addr = mes->resource_1_gpu_addr; + mes_set_hw_res_pkt.mes_info_ctx_size = mes->resource_1->tbo.base.size; + return mes_v11_0_submit_pkt_and_poll_completion(mes, + &mes_set_hw_res_pkt, sizeof(mes_set_hw_res_pkt), + offsetof(union MESAPI_SET_HW_RESOURCES_1, api_status)); +} + static const struct amdgpu_mes_funcs mes_v11_0_funcs = { .add_hw_queue = mes_v11_0_add_hw_queue, .remove_hw_queue = mes_v11_0_remove_hw_queue, @@ -475,7 +594,13 @@ static int mes_v11_0_allocate_ucode_data_buffer(struct amdgpu_device *adev, le32_to_cpu(mes_hdr->mes_ucode_data_offset_bytes)); fw_size = le32_to_cpu(mes_hdr->mes_ucode_data_size_bytes); - r = amdgpu_bo_create_reserved(adev, fw_size, + if (fw_size > GFX_MES_DRAM_SIZE) { + dev_err(adev->dev, "PIPE%d ucode data fw size (%d) is greater than dram size (%d)\n", + pipe, fw_size, GFX_MES_DRAM_SIZE); + return -EINVAL; + } + + r = amdgpu_bo_create_reserved(adev, GFX_MES_DRAM_SIZE, 64 * 1024, AMDGPU_GEM_DOMAIN_VRAM | AMDGPU_GEM_DOMAIN_GTT, @@ -611,8 +736,8 @@ static int mes_v11_0_load_microcode(struct amdgpu_device *adev, WREG32_SOC15(GC, 0, regCP_MES_MDBASE_HI, upper_32_bits(adev->mes.data_fw_gpu_addr[pipe])); - /* Set 0x3FFFF (256K-1) to CP_MES_MDBOUND_LO */ - WREG32_SOC15(GC, 0, regCP_MES_MDBOUND_LO, 0x3FFFF); + /* Set 0x7FFFF (512K-1) to CP_MES_MDBOUND_LO */ + WREG32_SOC15(GC, 0, regCP_MES_MDBOUND_LO, 0x7FFFF); if (prime_icache) { /* invalidate ICACHE */ @@ -1191,6 +1316,14 @@ static int mes_v11_0_hw_init(void *handle) if (r) goto failure; + if (amdgpu_sriov_is_mes_info_enable(adev)) { + r = mes_v11_0_set_hw_resources_1(&adev->mes); + if (r) { + DRM_ERROR("failed mes_v11_0_set_hw_resources_1, r=%d\n", r); + goto failure; + } + } + r = mes_v11_0_query_sched_status(&adev->mes); if (r) { DRM_ERROR("MES is busy\n"); @@ -1214,6 +1347,11 @@ failure: static int mes_v11_0_hw_fini(void *handle) { + struct amdgpu_device *adev = (struct amdgpu_device *)handle; + if (amdgpu_sriov_is_mes_info_enable(adev)) { + amdgpu_bo_free_kernel(&adev->mes.resource_1, &adev->mes.resource_1_gpu_addr, + &adev->mes.resource_1_addr); + } return 0; } @@ -1279,6 +1417,8 @@ static const struct amd_ip_funcs mes_v11_0_ip_funcs = { .hw_fini = mes_v11_0_hw_fini, .suspend = mes_v11_0_suspend, .resume = mes_v11_0_resume, + .dump_ip_state = NULL, + .print_ip_state = NULL, }; const struct amdgpu_ip_block_version mes_v11_0_ip_block = { diff --git a/drivers/gpu/drm/amd/amdgpu/mmhub_v1_8.c b/drivers/gpu/drm/amd/amdgpu/mmhub_v1_8.c index fb53aacdcba2..7a1ff298417a 100644 --- a/drivers/gpu/drm/amd/amdgpu/mmhub_v1_8.c +++ b/drivers/gpu/drm/amd/amdgpu/mmhub_v1_8.c @@ -33,6 +33,7 @@ #define regVM_L2_CNTL3_DEFAULT 0x80100007 #define regVM_L2_CNTL4_DEFAULT 0x000000c1 +#define mmSMNAID_AID0_MCA_SMU 0x03b30400 static u64 mmhub_v1_8_get_fb_location(struct amdgpu_device *adev) { @@ -558,6 +559,20 @@ static void mmhub_v1_8_get_clockgating(struct amdgpu_device *adev, u64 *flags) } +static bool mmhub_v1_8_query_utcl2_poison_status(struct amdgpu_device *adev, + int hub_inst) +{ + u32 fed, status; + + status = RREG32_SOC15(MMHUB, hub_inst, regVM_L2_PROTECTION_FAULT_STATUS); + fed = REG_GET_FIELD(status, VM_L2_PROTECTION_FAULT_STATUS, FED); + /* reset page fault status */ + WREG32_P(SOC15_REG_OFFSET(MMHUB, hub_inst, + regVM_L2_PROTECTION_FAULT_STATUS), 1, ~1); + + return fed; +} + const struct amdgpu_mmhub_funcs mmhub_v1_8_funcs = { .get_fb_location = mmhub_v1_8_get_fb_location, .init = mmhub_v1_8_init, @@ -567,6 +582,7 @@ const struct amdgpu_mmhub_funcs mmhub_v1_8_funcs = { .setup_vm_pt_regs = mmhub_v1_8_setup_vm_pt_regs, .set_clockgating = mmhub_v1_8_set_clockgating, .get_clockgating = mmhub_v1_8_get_clockgating, + .query_utcl2_poison_status = mmhub_v1_8_query_utcl2_poison_status, }; static const struct amdgpu_ras_err_status_reg_entry mmhub_v1_8_ce_reg_list[] = { @@ -705,8 +721,98 @@ static const struct amdgpu_ras_block_hw_ops mmhub_v1_8_ras_hw_ops = { .reset_ras_error_count = mmhub_v1_8_reset_ras_error_count, }; +static int mmhub_v1_8_aca_bank_parser(struct aca_handle *handle, struct aca_bank *bank, + enum aca_smu_type type, void *data) +{ + struct aca_bank_info info; + u64 misc0; + int ret; + + ret = aca_bank_info_decode(bank, &info); + if (ret) + return ret; + + misc0 = bank->regs[ACA_REG_IDX_MISC0]; + switch (type) { + case ACA_SMU_TYPE_UE: + ret = aca_error_cache_log_bank_error(handle, &info, ACA_ERROR_TYPE_UE, + 1ULL); + break; + case ACA_SMU_TYPE_CE: + ret = aca_error_cache_log_bank_error(handle, &info, ACA_ERROR_TYPE_CE, + ACA_REG__MISC0__ERRCNT(misc0)); + break; + default: + return -EINVAL; + } + + return ret; +} + +/* reference to smu driver if header file */ +static int mmhub_v1_8_err_codes[] = { + 0, 1, 2, 3, 4, /* CODE_DAGB0 - 4 */ + 5, 6, 7, 8, 9, /* CODE_EA0 - 4 */ + 10, /* CODE_UTCL2_ROUTER */ + 11, /* CODE_VML2 */ + 12, /* CODE_VML2_WALKER */ + 13, /* CODE_MMCANE */ +}; + +static bool mmhub_v1_8_aca_bank_is_valid(struct aca_handle *handle, struct aca_bank *bank, + enum aca_smu_type type, void *data) +{ + u32 instlo; + + instlo = ACA_REG__IPID__INSTANCEIDLO(bank->regs[ACA_REG_IDX_IPID]); + instlo &= GENMASK(31, 1); + + if (instlo != mmSMNAID_AID0_MCA_SMU) + return false; + + if (aca_bank_check_error_codes(handle->adev, bank, + mmhub_v1_8_err_codes, + ARRAY_SIZE(mmhub_v1_8_err_codes))) + return false; + + return true; +} + +static const struct aca_bank_ops mmhub_v1_8_aca_bank_ops = { + .aca_bank_parser = mmhub_v1_8_aca_bank_parser, + .aca_bank_is_valid = mmhub_v1_8_aca_bank_is_valid, +}; + +static const struct aca_info mmhub_v1_8_aca_info = { + .hwip = ACA_HWIP_TYPE_SMU, + .mask = ACA_ERROR_UE_MASK, + .bank_ops = &mmhub_v1_8_aca_bank_ops, +}; + +static int mmhub_v1_8_ras_late_init(struct amdgpu_device *adev, struct ras_common_if *ras_block) +{ + int r; + + r = amdgpu_ras_block_late_init(adev, ras_block); + if (r) + return r; + + r = amdgpu_ras_bind_aca(adev, AMDGPU_RAS_BLOCK__MMHUB, + &mmhub_v1_8_aca_info, NULL); + if (r) + goto late_fini; + + return 0; + +late_fini: + amdgpu_ras_block_late_fini(adev, ras_block); + + return r; +} + struct amdgpu_mmhub_ras mmhub_v1_8_ras = { .ras_block = { .hw_ops = &mmhub_v1_8_ras_hw_ops, + .ras_late_init = mmhub_v1_8_ras_late_init, }, }; diff --git a/drivers/gpu/drm/amd/amdgpu/mmhub_v3_3.c b/drivers/gpu/drm/amd/amdgpu/mmhub_v3_3.c index dc4812ecc98d..238ea40c2450 100644 --- a/drivers/gpu/drm/amd/amdgpu/mmhub_v3_3.c +++ b/drivers/gpu/drm/amd/amdgpu/mmhub_v3_3.c @@ -98,16 +98,16 @@ mmhub_v3_3_print_l2_protection_fault_status(struct amdgpu_device *adev, switch (amdgpu_ip_version(adev, MMHUB_HWIP, 0)) { case IP_VERSION(3, 3, 0): - mmhub_cid = mmhub_client_ids_v3_3[cid][rw]; + case IP_VERSION(3, 3, 1): + mmhub_cid = cid < ARRAY_SIZE(mmhub_client_ids_v3_3) ? + mmhub_client_ids_v3_3[cid][rw] : + cid == 0x140 ? "UMSCH" : NULL; break; default: mmhub_cid = NULL; break; } - if (!mmhub_cid && cid == 0x140) - mmhub_cid = "UMSCH"; - dev_err(adev->dev, "\t Faulty UTCL2 client ID: %s (0x%x)\n", mmhub_cid ? mmhub_cid : "unknown", cid); dev_err(adev->dev, "\t MORE_FAULTS: 0x%lx\n", diff --git a/drivers/gpu/drm/amd/amdgpu/mxgpu_ai.c b/drivers/gpu/drm/amd/amdgpu/mxgpu_ai.c index 63725b2ebc03..0c7275bca8f7 100644 --- a/drivers/gpu/drm/amd/amdgpu/mxgpu_ai.c +++ b/drivers/gpu/drm/amd/amdgpu/mxgpu_ai.c @@ -276,6 +276,8 @@ static void xgpu_ai_mailbox_flr_work(struct work_struct *work) timeout -= 10; } while (timeout > 1); + dev_warn(adev->dev, "waiting IDH_FLR_NOTIFICATION_CMPL timeout\n"); + flr_done: atomic_set(&adev->reset_domain->in_gpu_reset, 0); up_write(&adev->reset_domain->sem); @@ -404,7 +406,8 @@ static int xgpu_ai_request_init_data(struct amdgpu_device *adev) return xgpu_ai_send_access_requests(adev, IDH_REQ_GPU_INIT_DATA); } -static void xgpu_ai_ras_poison_handler(struct amdgpu_device *adev) +static void xgpu_ai_ras_poison_handler(struct amdgpu_device *adev, + enum amdgpu_ras_block block) { xgpu_ai_send_access_requests(adev, IDH_RAS_POISON); } diff --git a/drivers/gpu/drm/amd/amdgpu/mxgpu_nv.c b/drivers/gpu/drm/amd/amdgpu/mxgpu_nv.c index 6a68ee946f1c..aba00d961627 100644 --- a/drivers/gpu/drm/amd/amdgpu/mxgpu_nv.c +++ b/drivers/gpu/drm/amd/amdgpu/mxgpu_nv.c @@ -152,14 +152,14 @@ static void xgpu_nv_mailbox_trans_msg (struct amdgpu_device *adev, xgpu_nv_mailbox_set_valid(adev, false); } -static int xgpu_nv_send_access_requests(struct amdgpu_device *adev, - enum idh_request req) +static int xgpu_nv_send_access_requests_with_param(struct amdgpu_device *adev, + enum idh_request req, u32 data1, u32 data2, u32 data3) { int r, retry = 1; enum idh_event event = -1; send_request: - xgpu_nv_mailbox_trans_msg(adev, req, 0, 0, 0); + xgpu_nv_mailbox_trans_msg(adev, req, data1, data2, data3); switch (req) { case IDH_REQ_GPU_INIT_ACCESS: @@ -170,6 +170,10 @@ send_request: case IDH_REQ_GPU_INIT_DATA: event = IDH_REQ_GPU_INIT_DATA_READY; break; + case IDH_RAS_POISON: + if (data1 != 0) + event = IDH_RAS_POISON_READY; + break; default: break; } @@ -206,6 +210,13 @@ send_request: return 0; } +static int xgpu_nv_send_access_requests(struct amdgpu_device *adev, + enum idh_request req) +{ + return xgpu_nv_send_access_requests_with_param(adev, + req, 0, 0, 0); +} + static int xgpu_nv_request_reset(struct amdgpu_device *adev) { int ret, i = 0; @@ -298,6 +309,8 @@ static void xgpu_nv_mailbox_flr_work(struct work_struct *work) timeout -= 10; } while (timeout > 1); + dev_warn(adev->dev, "waiting IDH_FLR_NOTIFICATION_CMPL timeout\n"); + flr_done: atomic_set(&adev->reset_domain->in_gpu_reset, 0); up_write(&adev->reset_domain->sem); @@ -424,9 +437,16 @@ void xgpu_nv_mailbox_put_irq(struct amdgpu_device *adev) amdgpu_irq_put(adev, &adev->virt.rcv_irq, 0); } -static void xgpu_nv_ras_poison_handler(struct amdgpu_device *adev) +static void xgpu_nv_ras_poison_handler(struct amdgpu_device *adev, + enum amdgpu_ras_block block) { - xgpu_nv_send_access_requests(adev, IDH_RAS_POISON); + if (amdgpu_ip_version(adev, UMC_HWIP, 0) < IP_VERSION(12, 0, 0)) { + xgpu_nv_send_access_requests(adev, IDH_RAS_POISON); + } else { + amdgpu_virt_fini_data_exchange(adev); + xgpu_nv_send_access_requests_with_param(adev, + IDH_RAS_POISON, block, 0, 0); + } } const struct amdgpu_virt_ops xgpu_nv_virt_ops = { diff --git a/drivers/gpu/drm/amd/amdgpu/mxgpu_nv.h b/drivers/gpu/drm/amd/amdgpu/mxgpu_nv.h index d0221ce08769..1e8fd90cab43 100644 --- a/drivers/gpu/drm/amd/amdgpu/mxgpu_nv.h +++ b/drivers/gpu/drm/amd/amdgpu/mxgpu_nv.h @@ -51,6 +51,7 @@ enum idh_event { IDH_FAIL, IDH_QUERY_ALIVE, IDH_REQ_GPU_INIT_DATA_READY, + IDH_RAS_POISON_READY, IDH_TEXT_MESSAGE = 255, }; diff --git a/drivers/gpu/drm/amd/amdgpu/navi10_ih.c b/drivers/gpu/drm/amd/amdgpu/navi10_ih.c index de93614726c9..b281462093f1 100644 --- a/drivers/gpu/drm/amd/amdgpu/navi10_ih.c +++ b/drivers/gpu/drm/amd/amdgpu/navi10_ih.c @@ -713,6 +713,8 @@ static const struct amd_ip_funcs navi10_ih_ip_funcs = { .set_clockgating_state = navi10_ih_set_clockgating_state, .set_powergating_state = navi10_ih_set_powergating_state, .get_clockgating_state = navi10_ih_get_clockgating_state, + .dump_ip_state = NULL, + .print_ip_state = NULL, }; static const struct amdgpu_ih_funcs navi10_ih_funcs = { @@ -728,8 +730,7 @@ static void navi10_ih_set_interrupt_funcs(struct amdgpu_device *adev) adev->irq.ih_funcs = &navi10_ih_funcs; } -const struct amdgpu_ip_block_version navi10_ih_ip_block = -{ +const struct amdgpu_ip_block_version navi10_ih_ip_block = { .type = AMD_IP_BLOCK_TYPE_IH, .major = 5, .minor = 0, diff --git a/drivers/gpu/drm/amd/amdgpu/nbif_v6_3_1.c b/drivers/gpu/drm/amd/amdgpu/nbif_v6_3_1.c new file mode 100644 index 000000000000..96ed00ac81ac --- /dev/null +++ b/drivers/gpu/drm/amd/amdgpu/nbif_v6_3_1.c @@ -0,0 +1,495 @@ +/* + * Copyright 2023 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + */ +#include "amdgpu.h" +#include "amdgpu_atombios.h" +#include "nbif_v6_3_1.h" + +#include "nbif/nbif_6_3_1_offset.h" +#include "nbif/nbif_6_3_1_sh_mask.h" +#include "pcie/pcie_6_1_0_offset.h" +#include "pcie/pcie_6_1_0_sh_mask.h" +#include <uapi/linux/kfd_ioctl.h> + +static void nbif_v6_3_1_remap_hdp_registers(struct amdgpu_device *adev) +{ + WREG32_SOC15(NBIO, 0, regBIF_BX0_REMAP_HDP_MEM_FLUSH_CNTL, + adev->rmmio_remap.reg_offset + KFD_MMIO_REMAP_HDP_MEM_FLUSH_CNTL); + WREG32_SOC15(NBIO, 0, regBIF_BX0_REMAP_HDP_REG_FLUSH_CNTL, + adev->rmmio_remap.reg_offset + KFD_MMIO_REMAP_HDP_REG_FLUSH_CNTL); +} + +static u32 nbif_v6_3_1_get_rev_id(struct amdgpu_device *adev) +{ + u32 tmp = RREG32_SOC15(NBIO, 0, regRCC_STRAP0_RCC_DEV0_EPF0_STRAP0); + + tmp &= RCC_STRAP0_RCC_DEV0_EPF0_STRAP0__STRAP_ATI_REV_ID_DEV0_F0_MASK; + tmp >>= RCC_STRAP0_RCC_DEV0_EPF0_STRAP0__STRAP_ATI_REV_ID_DEV0_F0__SHIFT; + + return tmp; +} + +static void nbif_v6_3_1_mc_access_enable(struct amdgpu_device *adev, bool enable) +{ + if (enable) + WREG32_SOC15(NBIO, 0, regBIF_BX0_BIF_FB_EN, + BIF_BX0_BIF_FB_EN__FB_READ_EN_MASK | + BIF_BX0_BIF_FB_EN__FB_WRITE_EN_MASK); + else + WREG32_SOC15(NBIO, 0, regBIF_BX0_BIF_FB_EN, 0); +} + +static u32 nbif_v6_3_1_get_memsize(struct amdgpu_device *adev) +{ + return RREG32_SOC15(NBIO, 0, regRCC_DEV0_EPF0_RCC_CONFIG_MEMSIZE); +} + +static void nbif_v6_3_1_sdma_doorbell_range(struct amdgpu_device *adev, + int instance, bool use_doorbell, + int doorbell_index, + int doorbell_size) +{ + if (instance == 0) { + u32 doorbell_range = RREG32_SOC15(NBIO, 0, regGDC_S2A0_S2A_DOORBELL_ENTRY_2_CTRL); + + if (use_doorbell) { + doorbell_range = REG_SET_FIELD(doorbell_range, + GDC_S2A0_S2A_DOORBELL_ENTRY_2_CTRL, + S2A_DOORBELL_PORT2_ENABLE, + 0x1); + doorbell_range = REG_SET_FIELD(doorbell_range, + GDC_S2A0_S2A_DOORBELL_ENTRY_2_CTRL, + S2A_DOORBELL_PORT2_AWID, + 0xe); + doorbell_range = REG_SET_FIELD(doorbell_range, + GDC_S2A0_S2A_DOORBELL_ENTRY_2_CTRL, + S2A_DOORBELL_PORT2_RANGE_OFFSET, + doorbell_index); + doorbell_range = REG_SET_FIELD(doorbell_range, + GDC_S2A0_S2A_DOORBELL_ENTRY_2_CTRL, + S2A_DOORBELL_PORT2_RANGE_SIZE, + doorbell_size); + doorbell_range = REG_SET_FIELD(doorbell_range, + GDC_S2A0_S2A_DOORBELL_ENTRY_2_CTRL, + S2A_DOORBELL_PORT2_AWADDR_31_28_VALUE, + 0x3); + } else + doorbell_range = REG_SET_FIELD(doorbell_range, + GDC_S2A0_S2A_DOORBELL_ENTRY_2_CTRL, + S2A_DOORBELL_PORT2_RANGE_SIZE, + 0); + + WREG32_SOC15(NBIO, 0, regGDC_S2A0_S2A_DOORBELL_ENTRY_2_CTRL, doorbell_range); + } +} + +static void nbif_v6_3_1_vcn_doorbell_range(struct amdgpu_device *adev, + bool use_doorbell, int doorbell_index, + int instance) +{ + u32 doorbell_range; + + if (instance) + doorbell_range = RREG32_SOC15(NBIO, 0, regGDC_S2A0_S2A_DOORBELL_ENTRY_5_CTRL); + else + doorbell_range = RREG32_SOC15(NBIO, 0, regGDC_S2A0_S2A_DOORBELL_ENTRY_4_CTRL); + + if (use_doorbell) { + doorbell_range = REG_SET_FIELD(doorbell_range, + GDC_S2A0_S2A_DOORBELL_ENTRY_4_CTRL, + S2A_DOORBELL_PORT4_ENABLE, + 0x1); + doorbell_range = REG_SET_FIELD(doorbell_range, + GDC_S2A0_S2A_DOORBELL_ENTRY_4_CTRL, + S2A_DOORBELL_PORT4_AWID, + instance ? 0x7 : 0x4); + doorbell_range = REG_SET_FIELD(doorbell_range, + GDC_S2A0_S2A_DOORBELL_ENTRY_4_CTRL, + S2A_DOORBELL_PORT4_RANGE_OFFSET, + doorbell_index); + doorbell_range = REG_SET_FIELD(doorbell_range, + GDC_S2A0_S2A_DOORBELL_ENTRY_4_CTRL, + S2A_DOORBELL_PORT4_RANGE_SIZE, + 8); + doorbell_range = REG_SET_FIELD(doorbell_range, + GDC_S2A0_S2A_DOORBELL_ENTRY_4_CTRL, + S2A_DOORBELL_PORT4_AWADDR_31_28_VALUE, + instance ? 0x7 : 0x4); + } else + doorbell_range = REG_SET_FIELD(doorbell_range, + GDC_S2A0_S2A_DOORBELL_ENTRY_4_CTRL, + S2A_DOORBELL_PORT4_RANGE_SIZE, + 0); + + if (instance) + WREG32_SOC15(NBIO, 0, regGDC_S2A0_S2A_DOORBELL_ENTRY_5_CTRL, doorbell_range); + else + WREG32_SOC15(NBIO, 0, regGDC_S2A0_S2A_DOORBELL_ENTRY_4_CTRL, doorbell_range); +} + +static void nbif_v6_3_1_gc_doorbell_init(struct amdgpu_device *adev) +{ + WREG32_SOC15(NBIO, 0, regGDC_S2A0_S2A_DOORBELL_ENTRY_0_CTRL, 0x30000007); + WREG32_SOC15(NBIO, 0, regGDC_S2A0_S2A_DOORBELL_ENTRY_3_CTRL, 0x3000000d); +} + +static void nbif_v6_3_1_enable_doorbell_aperture(struct amdgpu_device *adev, + bool enable) +{ + WREG32_FIELD15_PREREG(NBIO, 0, RCC_DEV0_EPF0_RCC_DOORBELL_APER_EN, + BIF_DOORBELL_APER_EN, enable ? 1 : 0); +} + +static void +nbif_v6_3_1_enable_doorbell_selfring_aperture(struct amdgpu_device *adev, + bool enable) +{ + u32 tmp = 0; + + if (enable) { + tmp = REG_SET_FIELD(tmp, BIF_BX_PF0_DOORBELL_SELFRING_GPA_APER_CNTL, + DOORBELL_SELFRING_GPA_APER_EN, 1) | + REG_SET_FIELD(tmp, BIF_BX_PF0_DOORBELL_SELFRING_GPA_APER_CNTL, + DOORBELL_SELFRING_GPA_APER_MODE, 1) | + REG_SET_FIELD(tmp, BIF_BX_PF0_DOORBELL_SELFRING_GPA_APER_CNTL, + DOORBELL_SELFRING_GPA_APER_SIZE, 0); + + WREG32_SOC15(NBIO, 0, regBIF_BX_PF0_DOORBELL_SELFRING_GPA_APER_BASE_LOW, + lower_32_bits(adev->doorbell.base)); + WREG32_SOC15(NBIO, 0, regBIF_BX_PF0_DOORBELL_SELFRING_GPA_APER_BASE_HIGH, + upper_32_bits(adev->doorbell.base)); + } + + WREG32_SOC15(NBIO, 0, regBIF_BX_PF0_DOORBELL_SELFRING_GPA_APER_CNTL, tmp); +} + +static void nbif_v6_3_1_ih_doorbell_range(struct amdgpu_device *adev, + bool use_doorbell, int doorbell_index) +{ + u32 ih_doorbell_range = RREG32_SOC15(NBIO, 0, regGDC_S2A0_S2A_DOORBELL_ENTRY_1_CTRL); + + if (use_doorbell) { + ih_doorbell_range = REG_SET_FIELD(ih_doorbell_range, + GDC_S2A0_S2A_DOORBELL_ENTRY_1_CTRL, + S2A_DOORBELL_PORT1_ENABLE, + 0x1); + ih_doorbell_range = REG_SET_FIELD(ih_doorbell_range, + GDC_S2A0_S2A_DOORBELL_ENTRY_1_CTRL, + S2A_DOORBELL_PORT1_AWID, + 0x0); + ih_doorbell_range = REG_SET_FIELD(ih_doorbell_range, + GDC_S2A0_S2A_DOORBELL_ENTRY_1_CTRL, + S2A_DOORBELL_PORT1_RANGE_OFFSET, + doorbell_index); + ih_doorbell_range = REG_SET_FIELD(ih_doorbell_range, + GDC_S2A0_S2A_DOORBELL_ENTRY_1_CTRL, + S2A_DOORBELL_PORT1_RANGE_SIZE, + 2); + ih_doorbell_range = REG_SET_FIELD(ih_doorbell_range, + GDC_S2A0_S2A_DOORBELL_ENTRY_1_CTRL, + S2A_DOORBELL_PORT1_AWADDR_31_28_VALUE, + 0x0); + } else + ih_doorbell_range = REG_SET_FIELD(ih_doorbell_range, + GDC_S2A0_S2A_DOORBELL_ENTRY_1_CTRL, + S2A_DOORBELL_PORT1_RANGE_SIZE, + 0); + + WREG32_SOC15(NBIO, 0, regGDC_S2A0_S2A_DOORBELL_ENTRY_1_CTRL, ih_doorbell_range); +} + +static void nbif_v6_3_1_ih_control(struct amdgpu_device *adev) +{ + u32 interrupt_cntl; + + /* setup interrupt control */ + WREG32_SOC15(NBIO, 0, regBIF_BX0_INTERRUPT_CNTL2, adev->dummy_page_addr >> 8); + + interrupt_cntl = RREG32_SOC15(NBIO, 0, regBIF_BX0_INTERRUPT_CNTL); + /* + * BIF_BX0_INTERRUPT_CNTL__IH_DUMMY_RD_OVERRIDE_MASK=0 - dummy read disabled with msi, enabled without msi + * BIF_BX0_INTERRUPT_CNTL__IH_DUMMY_RD_OVERRIDE_MASK=1 - dummy read controlled by IH_DUMMY_RD_EN + */ + interrupt_cntl = REG_SET_FIELD(interrupt_cntl, BIF_BX0_INTERRUPT_CNTL, + IH_DUMMY_RD_OVERRIDE, 0); + + /* BIF_BX0_INTERRUPT_CNTL__IH_REQ_NONSNOOP_EN_MASK=1 if ring is in non-cacheable memory, e.g., vram */ + interrupt_cntl = REG_SET_FIELD(interrupt_cntl, BIF_BX0_INTERRUPT_CNTL, + IH_REQ_NONSNOOP_EN, 0); + + WREG32_SOC15(NBIO, 0, regBIF_BX0_INTERRUPT_CNTL, interrupt_cntl); +} + +static void +nbif_v6_3_1_update_medium_grain_clock_gating(struct amdgpu_device *adev, + bool enable) +{ +} + +static void +nbif_v6_3_1_update_medium_grain_light_sleep(struct amdgpu_device *adev, + bool enable) +{ +} + +static void +nbif_v6_3_1_get_clockgating_state(struct amdgpu_device *adev, + u64 *flags) +{ +} + +static u32 nbif_v6_3_1_get_hdp_flush_req_offset(struct amdgpu_device *adev) +{ + return SOC15_REG_OFFSET(NBIO, 0, regBIF_BX_PF0_GPU_HDP_FLUSH_REQ); +} + +static u32 nbif_v6_3_1_get_hdp_flush_done_offset(struct amdgpu_device *adev) +{ + return SOC15_REG_OFFSET(NBIO, 0, regBIF_BX_PF0_GPU_HDP_FLUSH_DONE); +} + +static u32 nbif_v6_3_1_get_pcie_index_offset(struct amdgpu_device *adev) +{ + return SOC15_REG_OFFSET(NBIO, 0, regBIF_BX_PF0_RSMU_INDEX); +} + +static u32 nbif_v6_3_1_get_pcie_data_offset(struct amdgpu_device *adev) +{ + return SOC15_REG_OFFSET(NBIO, 0, regBIF_BX_PF0_RSMU_DATA); +} + +const struct nbio_hdp_flush_reg nbif_v6_3_1_hdp_flush_reg = { + .ref_and_mask_cp0 = BIF_BX_PF0_GPU_HDP_FLUSH_DONE__CP0_MASK, + .ref_and_mask_cp1 = BIF_BX_PF0_GPU_HDP_FLUSH_DONE__CP1_MASK, + .ref_and_mask_cp2 = BIF_BX_PF0_GPU_HDP_FLUSH_DONE__CP2_MASK, + .ref_and_mask_cp3 = BIF_BX_PF0_GPU_HDP_FLUSH_DONE__CP3_MASK, + .ref_and_mask_cp4 = BIF_BX_PF0_GPU_HDP_FLUSH_DONE__CP4_MASK, + .ref_and_mask_cp5 = BIF_BX_PF0_GPU_HDP_FLUSH_DONE__CP5_MASK, + .ref_and_mask_cp6 = BIF_BX_PF0_GPU_HDP_FLUSH_DONE__CP6_MASK, + .ref_and_mask_cp7 = BIF_BX_PF0_GPU_HDP_FLUSH_DONE__CP7_MASK, + .ref_and_mask_cp8 = BIF_BX_PF0_GPU_HDP_FLUSH_DONE__CP8_MASK, + .ref_and_mask_cp9 = BIF_BX_PF0_GPU_HDP_FLUSH_DONE__CP9_MASK, + .ref_and_mask_sdma0 = BIF_BX_PF0_GPU_HDP_FLUSH_DONE__SDMA0_MASK, + .ref_and_mask_sdma1 = BIF_BX_PF0_GPU_HDP_FLUSH_DONE__SDMA1_MASK, +}; + +static void nbif_v6_3_1_init_registers(struct amdgpu_device *adev) +{ + uint32_t data; + + data = RREG32_SOC15(NBIO, 0, regRCC_DEV0_EPF2_STRAP2); + data &= ~RCC_DEV0_EPF2_STRAP2__STRAP_NO_SOFT_RESET_DEV0_F2_MASK; + WREG32_SOC15(NBIO, 0, regRCC_DEV0_EPF2_STRAP2, data); +} + +static u32 nbif_v6_3_1_get_rom_offset(struct amdgpu_device *adev) +{ + u32 data, rom_offset; + + data = RREG32_SOC15(NBIO, 0, regREGS_ROM_OFFSET_CTRL); + rom_offset = REG_GET_FIELD(data, REGS_ROM_OFFSET_CTRL, ROM_OFFSET); + + return rom_offset; +} + +#ifdef CONFIG_PCIEASPM +static void nbif_v6_3_1_program_ltr(struct amdgpu_device *adev) +{ + uint32_t def, data; + + def = RREG32_SOC15(NBIO, 0, regRCC_EP_DEV0_0_EP_PCIE_TX_LTR_CNTL); + data = 0x35EB; + data &= ~RCC_EP_DEV0_0_EP_PCIE_TX_LTR_CNTL__LTR_PRIV_MSG_DIS_IN_PM_NON_D0_MASK; + data &= ~RCC_EP_DEV0_0_EP_PCIE_TX_LTR_CNTL__LTR_PRIV_RST_LTR_IN_DL_DOWN_MASK; + if (def != data) + WREG32_SOC15(NBIO, 0, regRCC_EP_DEV0_0_EP_PCIE_TX_LTR_CNTL, data); + + def = data = RREG32_SOC15(NBIO, 0, regRCC_STRAP0_RCC_BIF_STRAP2); + data &= ~RCC_STRAP0_RCC_BIF_STRAP2__STRAP_LTR_IN_ASPML1_DIS_MASK; + if (def != data) + WREG32_SOC15(NBIO, 0, regRCC_STRAP0_RCC_BIF_STRAP2, data); + + def = data = RREG32_SOC15(NBIO, 0, regBIF_CFG_DEV0_EPF0_DEVICE_CNTL2); + if (adev->pdev->ltr_path) + data |= BIF_CFG_DEV0_EPF0_DEVICE_CNTL2__LTR_EN_MASK; + else + data &= ~BIF_CFG_DEV0_EPF0_DEVICE_CNTL2__LTR_EN_MASK; + if (def != data) + WREG32_SOC15(NBIO, 0, regBIF_CFG_DEV0_EPF0_DEVICE_CNTL2, data); +} +#endif + +static void nbif_v6_3_1_program_aspm(struct amdgpu_device *adev) +{ +#ifdef CONFIG_PCIEASPM + uint32_t def, data; + + def = data = RREG32_SOC15(PCIE, 0, regPCIE_LC_CNTL); + data &= ~PCIE_LC_CNTL__LC_L1_INACTIVITY_MASK; + data &= ~PCIE_LC_CNTL__LC_L0S_INACTIVITY_MASK; + data |= PCIE_LC_CNTL__LC_PMI_TO_L1_DIS_MASK; + if (def != data) + WREG32_SOC15(PCIE, 0, regPCIE_LC_CNTL, data); + + def = data = RREG32_SOC15(PCIE, 0, regPCIE_LC_CNTL7); + data |= PCIE_LC_CNTL7__LC_NBIF_ASPM_INPUT_EN_MASK; + if (def != data) + WREG32_SOC15(PCIE, 0, regPCIE_LC_CNTL7, data); + + def = data = RREG32_SOC15(PCIE, 0, regPCIE_LC_CNTL3); + data |= PCIE_LC_CNTL3__LC_DSC_DONT_ENTER_L23_AFTER_PME_ACK_MASK; + if (def != data) + WREG32_SOC15(PCIE, 0, regPCIE_LC_CNTL3, data); + + def = data = RREG32_SOC15(NBIO, 0, regRCC_STRAP0_RCC_BIF_STRAP3); + data &= ~RCC_STRAP0_RCC_BIF_STRAP3__STRAP_VLINK_ASPM_IDLE_TIMER_MASK; + data &= ~RCC_STRAP0_RCC_BIF_STRAP3__STRAP_VLINK_PM_L1_ENTRY_TIMER_MASK; + if (def != data) + WREG32_SOC15(NBIO, 0, regRCC_STRAP0_RCC_BIF_STRAP3, data); + + def = data = RREG32_SOC15(NBIO, 0, regRCC_STRAP0_RCC_BIF_STRAP5); + data &= ~RCC_STRAP0_RCC_BIF_STRAP5__STRAP_VLINK_LDN_ENTRY_TIMER_MASK; + if (def != data) + WREG32_SOC15(NBIO, 0, regRCC_STRAP0_RCC_BIF_STRAP5, data); + + def = data = RREG32_SOC15(NBIO, 0, regBIF_CFG_DEV0_EPF0_DEVICE_CNTL2); + data &= ~BIF_CFG_DEV0_EPF0_DEVICE_CNTL2__LTR_EN_MASK; + if (def != data) + WREG32_SOC15(NBIO, 0, regBIF_CFG_DEV0_EPF0_DEVICE_CNTL2, data); + + WREG32_SOC15(NBIO, 0, regBIF_CFG_DEV0_EPF0_PCIE_LTR_CAP, 0x10011001); + +#if 0 + /* regPSWUSP0_PCIE_LC_CNTL2 should be replace by PCIE_LC_CNTL2 or someone else ? */ + def = data = RREG32_SOC15(NBIO, 0, regPSWUSP0_PCIE_LC_CNTL2); + data |= PSWUSP0_PCIE_LC_CNTL2__LC_ALLOW_PDWN_IN_L1_MASK | + PSWUSP0_PCIE_LC_CNTL2__LC_ALLOW_PDWN_IN_L23_MASK; + data &= ~PSWUSP0_PCIE_LC_CNTL2__LC_RCV_L0_TO_RCV_L0S_DIS_MASK; + if (def != data) + WREG32_SOC15(NBIO, 0, regPSWUSP0_PCIE_LC_CNTL2, data); +#endif + def = data = RREG32_SOC15(PCIE, 0, regPCIE_LC_CNTL4); + data |= PCIE_LC_CNTL4__LC_L1_POWERDOWN_MASK; + if (def != data) + WREG32_SOC15(PCIE, 0, regPCIE_LC_CNTL4, data); + + def = data = RREG32_SOC15(PCIE, 0, regPCIE_LC_RXRECOVER_RXSTANDBY_CNTL); + data |= PCIE_LC_RXRECOVER_RXSTANDBY_CNTL__LC_RX_L0S_STANDBY_EN_MASK; + if (def != data) + WREG32_SOC15(PCIE, 0, regPCIE_LC_RXRECOVER_RXSTANDBY_CNTL, data); + + nbif_v6_3_1_program_ltr(adev); + + def = data = RREG32_SOC15(NBIO, 0, regRCC_STRAP0_RCC_BIF_STRAP3); + data |= 0x5DE0 << RCC_STRAP0_RCC_BIF_STRAP3__STRAP_VLINK_ASPM_IDLE_TIMER__SHIFT; + data |= 0x0010 << RCC_STRAP0_RCC_BIF_STRAP3__STRAP_VLINK_PM_L1_ENTRY_TIMER__SHIFT; + if (def != data) + WREG32_SOC15(NBIO, 0, regRCC_STRAP0_RCC_BIF_STRAP3, data); + + def = data = RREG32_SOC15(NBIO, 0, regRCC_STRAP0_RCC_BIF_STRAP5); + data |= 0x0010 << RCC_STRAP0_RCC_BIF_STRAP5__STRAP_VLINK_LDN_ENTRY_TIMER__SHIFT; + if (def != data) + WREG32_SOC15(NBIO, 0, regRCC_STRAP0_RCC_BIF_STRAP5, data); + + def = data = RREG32_SOC15(PCIE, 0, regPCIE_LC_CNTL); + data |= 0x0 << PCIE_LC_CNTL__LC_L0S_INACTIVITY__SHIFT; + data |= 0x9 << PCIE_LC_CNTL__LC_L1_INACTIVITY__SHIFT; + data &= ~PCIE_LC_CNTL__LC_PMI_TO_L1_DIS_MASK; + if (def != data) + WREG32_SOC15(PCIE, 0, regPCIE_LC_CNTL, data); + + def = data = RREG32_SOC15(PCIE, 0, regPCIE_LC_CNTL3); + data &= ~PCIE_LC_CNTL3__LC_DSC_DONT_ENTER_L23_AFTER_PME_ACK_MASK; + if (def != data) + WREG32_SOC15(PCIE, 0, regPCIE_LC_CNTL3, data); +#endif +} + +const struct amdgpu_nbio_funcs nbif_v6_3_1_funcs = { + .get_hdp_flush_req_offset = nbif_v6_3_1_get_hdp_flush_req_offset, + .get_hdp_flush_done_offset = nbif_v6_3_1_get_hdp_flush_done_offset, + .get_pcie_index_offset = nbif_v6_3_1_get_pcie_index_offset, + .get_pcie_data_offset = nbif_v6_3_1_get_pcie_data_offset, + .get_rev_id = nbif_v6_3_1_get_rev_id, + .mc_access_enable = nbif_v6_3_1_mc_access_enable, + .get_memsize = nbif_v6_3_1_get_memsize, + .sdma_doorbell_range = nbif_v6_3_1_sdma_doorbell_range, + .vcn_doorbell_range = nbif_v6_3_1_vcn_doorbell_range, + .gc_doorbell_init = nbif_v6_3_1_gc_doorbell_init, + .enable_doorbell_aperture = nbif_v6_3_1_enable_doorbell_aperture, + .enable_doorbell_selfring_aperture = nbif_v6_3_1_enable_doorbell_selfring_aperture, + .ih_doorbell_range = nbif_v6_3_1_ih_doorbell_range, + .update_medium_grain_clock_gating = nbif_v6_3_1_update_medium_grain_clock_gating, + .update_medium_grain_light_sleep = nbif_v6_3_1_update_medium_grain_light_sleep, + .get_clockgating_state = nbif_v6_3_1_get_clockgating_state, + .ih_control = nbif_v6_3_1_ih_control, + .init_registers = nbif_v6_3_1_init_registers, + .remap_hdp_registers = nbif_v6_3_1_remap_hdp_registers, + .get_rom_offset = nbif_v6_3_1_get_rom_offset, + .program_aspm = nbif_v6_3_1_program_aspm, +}; + + +static void nbif_v6_3_1_sriov_ih_doorbell_range(struct amdgpu_device *adev, + bool use_doorbell, int doorbell_index) +{ +} + +static void nbif_v6_3_1_sriov_sdma_doorbell_range(struct amdgpu_device *adev, + int instance, bool use_doorbell, + int doorbell_index, + int doorbell_size) +{ +} + +static void nbif_v6_3_1_sriov_vcn_doorbell_range(struct amdgpu_device *adev, + bool use_doorbell, + int doorbell_index, int instance) +{ +} + +static void nbif_v6_3_1_sriov_gc_doorbell_init(struct amdgpu_device *adev) +{ +} + +const struct amdgpu_nbio_funcs nbif_v6_3_1_sriov_funcs = { + .get_hdp_flush_req_offset = nbif_v6_3_1_get_hdp_flush_req_offset, + .get_hdp_flush_done_offset = nbif_v6_3_1_get_hdp_flush_done_offset, + .get_pcie_index_offset = nbif_v6_3_1_get_pcie_index_offset, + .get_pcie_data_offset = nbif_v6_3_1_get_pcie_data_offset, + .get_rev_id = nbif_v6_3_1_get_rev_id, + .mc_access_enable = nbif_v6_3_1_mc_access_enable, + .get_memsize = nbif_v6_3_1_get_memsize, + .sdma_doorbell_range = nbif_v6_3_1_sriov_sdma_doorbell_range, + .vcn_doorbell_range = nbif_v6_3_1_sriov_vcn_doorbell_range, + .gc_doorbell_init = nbif_v6_3_1_sriov_gc_doorbell_init, + .enable_doorbell_aperture = nbif_v6_3_1_enable_doorbell_aperture, + .enable_doorbell_selfring_aperture = nbif_v6_3_1_enable_doorbell_selfring_aperture, + .ih_doorbell_range = nbif_v6_3_1_sriov_ih_doorbell_range, + .update_medium_grain_clock_gating = nbif_v6_3_1_update_medium_grain_clock_gating, + .update_medium_grain_light_sleep = nbif_v6_3_1_update_medium_grain_light_sleep, + .get_clockgating_state = nbif_v6_3_1_get_clockgating_state, + .ih_control = nbif_v6_3_1_ih_control, + .init_registers = nbif_v6_3_1_init_registers, + .remap_hdp_registers = nbif_v6_3_1_remap_hdp_registers, + .get_rom_offset = nbif_v6_3_1_get_rom_offset, +}; diff --git a/drivers/gpu/drm/amd/amdgpu/nbif_v6_3_1.h b/drivers/gpu/drm/amd/amdgpu/nbif_v6_3_1.h new file mode 100644 index 000000000000..b7f2e0d88905 --- /dev/null +++ b/drivers/gpu/drm/amd/amdgpu/nbif_v6_3_1.h @@ -0,0 +1,33 @@ +/* + * Copyright 2023 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + */ + +#ifndef __NBIO_V6_3_1_H__ +#define __NBIO_V6_3_1_H__ + +#include "soc15_common.h" + +extern const struct nbio_hdp_flush_reg nbif_v6_3_1_hdp_flush_reg; +extern const struct amdgpu_nbio_funcs nbif_v6_3_1_funcs; +extern const struct amdgpu_nbio_funcs nbif_v6_3_1_sriov_funcs; + +#endif diff --git a/drivers/gpu/drm/amd/amdgpu/nbio_v7_11.c b/drivers/gpu/drm/amd/amdgpu/nbio_v7_11.c index 1f52b4b1db03..05020141c0ae 100644 --- a/drivers/gpu/drm/amd/amdgpu/nbio_v7_11.c +++ b/drivers/gpu/drm/amd/amdgpu/nbio_v7_11.c @@ -89,7 +89,9 @@ static void nbio_v7_11_vpe_doorbell_range(struct amdgpu_device *adev, int instan bool use_doorbell, int doorbell_index, int doorbell_size) { - u32 reg = SOC15_REG_OFFSET(NBIO, 0, regGDC0_BIF_VPE_DOORBELL_RANGE); + u32 reg = instance == 0 ? + SOC15_REG_OFFSET(NBIO, 0, regGDC0_BIF_VPE_DOORBELL_RANGE) : + SOC15_REG_OFFSET(NBIO, 0, regGDC0_BIF_VPE1_DOORBELL_RANGE); u32 doorbell_range = RREG32_PCIE_PORT(reg); if (use_doorbell) { @@ -112,7 +114,10 @@ static void nbio_v7_11_vcn_doorbell_range(struct amdgpu_device *adev, bool use_doorbell, int doorbell_index, int instance) { - u32 reg = SOC15_REG_OFFSET(NBIO, 0, regGDC0_BIF_VCN0_DOORBELL_RANGE); + u32 reg = instance == 0 ? + SOC15_REG_OFFSET(NBIO, 0, regGDC0_BIF_VCN0_DOORBELL_RANGE): + SOC15_REG_OFFSET(NBIO, 0, regGDC0_BIF_VCN1_DOORBELL_RANGE); + u32 doorbell_range = RREG32_PCIE_PORT(reg); if (use_doorbell) { diff --git a/drivers/gpu/drm/amd/amdgpu/nbio_v7_9.c b/drivers/gpu/drm/amd/amdgpu/nbio_v7_9.c index b4723d68eab0..40d1e209eab7 100644 --- a/drivers/gpu/drm/amd/amdgpu/nbio_v7_9.c +++ b/drivers/gpu/drm/amd/amdgpu/nbio_v7_9.c @@ -35,15 +35,6 @@ /* Core 0 Port 0 counter */ #define smnPCIEP_NAK_COUNTER 0x1A340218 -#define smnPCIE_PERF_CNTL_TXCLK3 0x1A38021c -#define smnPCIE_PERF_CNTL_TXCLK7 0x1A380888 -#define smnPCIE_PERF_COUNT_CNTL 0x1A380200 -#define smnPCIE_PERF_COUNT0_TXCLK3 0x1A380220 -#define smnPCIE_PERF_COUNT0_TXCLK7 0x1A38088C -#define smnPCIE_PERF_COUNT0_UPVAL_TXCLK3 0x1A3808F8 -#define smnPCIE_PERF_COUNT0_UPVAL_TXCLK7 0x1A380918 - - static void nbio_v7_9_remap_hdp_registers(struct amdgpu_device *adev) { WREG32_SOC15(NBIO, 0, regBIF_BX0_REMAP_HDP_MEM_FLUSH_CNTL, @@ -484,59 +475,6 @@ static u64 nbio_v7_9_get_pcie_replay_count(struct amdgpu_device *adev) return (nak_r + nak_g); } -static void nbio_v7_9_get_pcie_usage(struct amdgpu_device *adev, uint64_t *count0, - uint64_t *count1) -{ - uint32_t perfctrrx = 0; - uint32_t perfctrtx = 0; - - /* This reports 0 on APUs, so return to avoid writing/reading registers - * that may or may not be different from their GPU counterparts - */ - if (adev->flags & AMD_IS_APU) - return; - - /* Use TXCLK3 counter group for rx event */ - /* Use TXCLK7 counter group for tx event */ - /* Set the 2 events that we wish to watch, defined above */ - /* 40 is event# for received msgs */ - /* 2 is event# of posted requests sent */ - perfctrrx = REG_SET_FIELD(perfctrrx, PCIE_PERF_CNTL_TXCLK3, EVENT0_SEL, 40); - perfctrtx = REG_SET_FIELD(perfctrtx, PCIE_PERF_CNTL_TXCLK7, EVENT0_SEL, 2); - - /* Write to enable desired perf counters */ - WREG32_PCIE(smnPCIE_PERF_CNTL_TXCLK3, perfctrrx); - WREG32_PCIE(smnPCIE_PERF_CNTL_TXCLK7, perfctrtx); - - /* Zero out and enable SHADOW_WR - * Write 0x6: - * Bit 1 = Global Shadow wr(1) - * Bit 2 = Global counter reset enable(1) - */ - WREG32_PCIE(smnPCIE_PERF_COUNT_CNTL, 0x00000006); - - /* Enable Gloabl Counter - * Write 0x1: - * Bit 0 = Global Counter Enable(1) - */ - WREG32_PCIE(smnPCIE_PERF_COUNT_CNTL, 0x00000001); - - msleep(1000); - - /* Disable Global Counter, Reset and enable SHADOW_WR - * Write 0x6: - * Bit 1 = Global Shadow wr(1) - * Bit 2 = Global counter reset enable(1) - */ - WREG32_PCIE(smnPCIE_PERF_COUNT_CNTL, 0x00000006); - - /* Get the upper and lower count */ - *count0 = RREG32_PCIE(smnPCIE_PERF_COUNT0_TXCLK3) | - ((uint64_t)RREG32_PCIE(smnPCIE_PERF_COUNT0_UPVAL_TXCLK3) << 32); - *count1 = RREG32_PCIE(smnPCIE_PERF_COUNT0_TXCLK7) | - ((uint64_t)RREG32_PCIE(smnPCIE_PERF_COUNT0_UPVAL_TXCLK7) << 32); -} - const struct amdgpu_nbio_funcs nbio_v7_9_funcs = { .get_hdp_flush_req_offset = nbio_v7_9_get_hdp_flush_req_offset, .get_hdp_flush_done_offset = nbio_v7_9_get_hdp_flush_done_offset, @@ -561,7 +499,6 @@ const struct amdgpu_nbio_funcs nbio_v7_9_funcs = { .get_memory_partition_mode = nbio_v7_9_get_memory_partition_mode, .init_registers = nbio_v7_9_init_registers, .get_pcie_replay_count = nbio_v7_9_get_pcie_replay_count, - .get_pcie_usage = nbio_v7_9_get_pcie_usage, }; static void nbio_v7_9_query_ras_error_count(struct amdgpu_device *adev, diff --git a/drivers/gpu/drm/amd/amdgpu/nv.c b/drivers/gpu/drm/amd/amdgpu/nv.c index 4d7976b77767..12e54047bf79 100644 --- a/drivers/gpu/drm/amd/amdgpu/nv.c +++ b/drivers/gpu/drm/amd/amdgpu/nv.c @@ -110,7 +110,7 @@ static const struct amdgpu_video_codec_info sc_video_codecs_decode_array_vcn0[] {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_MPEG4_AVC, 4096, 4096, 52)}, {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_VC1, 4096, 4096, 4)}, {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_HEVC, 8192, 4352, 186)}, - {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_JPEG, 4096, 4096, 0)}, + {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_JPEG, 16384, 16384, 0)}, {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_VP9, 8192, 4352, 0)}, {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_AV1, 8192, 4352, 0)}, }; @@ -121,7 +121,7 @@ static const struct amdgpu_video_codec_info sc_video_codecs_decode_array_vcn1[] {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_MPEG4_AVC, 4096, 4096, 52)}, {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_VC1, 4096, 4096, 4)}, {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_HEVC, 8192, 4352, 186)}, - {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_JPEG, 4096, 4096, 0)}, + {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_JPEG, 16384, 16384, 0)}, {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_VP9, 8192, 4352, 0)}, }; @@ -199,7 +199,7 @@ static const struct amdgpu_video_codec_info yc_video_codecs_decode_array[] = { {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_MPEG4_AVC, 4096, 4096, 52)}, {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_HEVC, 8192, 4352, 186)}, {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_VP9, 8192, 4352, 0)}, - {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_JPEG, 4096, 4096, 0)}, + {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_JPEG, 16384, 16384, 0)}, {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_AV1, 8192, 4352, 0)}, }; @@ -1131,4 +1131,6 @@ static const struct amd_ip_funcs nv_common_ip_funcs = { .set_clockgating_state = nv_common_set_clockgating_state, .set_powergating_state = nv_common_set_powergating_state, .get_clockgating_state = nv_common_get_clockgating_state, + .dump_ip_state = NULL, + .print_ip_state = NULL, }; diff --git a/drivers/gpu/drm/amd/amdgpu/psp_gfx_if.h b/drivers/gpu/drm/amd/amdgpu/psp_gfx_if.h index 4bb5e10217bb..7566973ed8f5 100644 --- a/drivers/gpu/drm/amd/amdgpu/psp_gfx_if.h +++ b/drivers/gpu/drm/amd/amdgpu/psp_gfx_if.h @@ -296,6 +296,7 @@ enum psp_gfx_fw_type { GFX_FW_TYPE_VPEC_FW1 = 100, /* VPEC FW1 To Save VPE */ GFX_FW_TYPE_VPEC_FW2 = 101, /* VPEC FW2 To Save VPE */ GFX_FW_TYPE_VPE = 102, + GFX_FW_TYPE_JPEG_RAM = 128, /**< JPEG Command buffer */ GFX_FW_TYPE_P2S_TABLE = 129, GFX_FW_TYPE_MAX }; diff --git a/drivers/gpu/drm/amd/amdgpu/psp_v11_0.c b/drivers/gpu/drm/amd/amdgpu/psp_v11_0.c index efa37e3b7931..2395f1856962 100644 --- a/drivers/gpu/drm/amd/amdgpu/psp_v11_0.c +++ b/drivers/gpu/drm/amd/amdgpu/psp_v11_0.c @@ -506,7 +506,7 @@ static int psp_v11_0_memory_training(struct psp_context *psp, uint32_t ops) * before training, and restore it after training to avoid * VRAM corruption. */ - sz = GDDR6_MEM_TRAINING_ENCROACHED_SIZE; + sz = BIST_MEM_TRAINING_ENCROACHED_SIZE; if (adev->gmc.visible_vram_size < sz || !adev->mman.aper_base_kaddr) { DRM_ERROR("visible_vram_size %llx or aper_base_kaddr %p is not initialized.\n", diff --git a/drivers/gpu/drm/amd/amdgpu/psp_v13_0.c b/drivers/gpu/drm/amd/amdgpu/psp_v13_0.c index df1844d0800f..0da50ea46eaf 100644 --- a/drivers/gpu/drm/amd/amdgpu/psp_v13_0.c +++ b/drivers/gpu/drm/amd/amdgpu/psp_v13_0.c @@ -27,6 +27,7 @@ #include "amdgpu_ucode.h" #include "soc15_common.h" #include "psp_v13_0.h" +#include "amdgpu_ras.h" #include "mp/mp_13_0_2_offset.h" #include "mp/mp_13_0_2_sh_mask.h" @@ -52,6 +53,8 @@ MODULE_FIRMWARE("amdgpu/psp_13_0_6_sos.bin"); MODULE_FIRMWARE("amdgpu/psp_13_0_6_ta.bin"); MODULE_FIRMWARE("amdgpu/psp_14_0_0_toc.bin"); MODULE_FIRMWARE("amdgpu/psp_14_0_0_ta.bin"); +MODULE_FIRMWARE("amdgpu/psp_14_0_1_toc.bin"); +MODULE_FIRMWARE("amdgpu/psp_14_0_1_ta.bin"); /* For large FW files the time to complete can be very long */ #define USBC_PD_POLLING_LIMIT_S 240 @@ -100,6 +103,7 @@ static int psp_v13_0_init_microcode(struct psp_context *psp) case IP_VERSION(13, 0, 8): case IP_VERSION(13, 0, 11): case IP_VERSION(14, 0, 0): + case IP_VERSION(14, 0, 1): err = psp_init_toc_microcode(psp, ucode_prefix); if (err) return err; @@ -187,11 +191,18 @@ static int psp_v13_0_wait_for_bootloader(struct psp_context *psp) static int psp_v13_0_wait_for_bootloader_steady_state(struct psp_context *psp) { struct amdgpu_device *adev = psp->adev; + int ret; if (amdgpu_ip_version(adev, MP0_HWIP, 0) == IP_VERSION(13, 0, 6)) { - psp_v13_0_wait_for_vmbx_ready(psp); + ret = psp_v13_0_wait_for_vmbx_ready(psp); + if (ret) + amdgpu_ras_query_boot_status(adev, 4); + + ret = psp_v13_0_wait_for_bootloader(psp); + if (ret) + amdgpu_ras_query_boot_status(adev, 4); - return psp_v13_0_wait_for_bootloader(psp); + return ret; } return 0; @@ -553,7 +564,7 @@ static int psp_v13_0_memory_training(struct psp_context *psp, uint32_t ops) * before training, and restore it after training to avoid * VRAM corruption. */ - sz = GDDR6_MEM_TRAINING_ENCROACHED_SIZE; + sz = BIST_MEM_TRAINING_ENCROACHED_SIZE; if (adev->gmc.visible_vram_size < sz || !adev->mman.aper_base_kaddr) { dev_err(adev->dev, "visible_vram_size %llx or aper_base_kaddr %p is not initialized.\n", @@ -763,81 +774,28 @@ static int psp_v13_0_fatal_error_recovery_quirk(struct psp_context *psp) return 0; } - -static void psp_v13_0_boot_error_reporting(struct amdgpu_device *adev, - uint32_t inst, - uint32_t boot_error) -{ - uint32_t socket_id; - uint32_t aid_id; - uint32_t hbm_id; - uint32_t reg_data; - - socket_id = REG_GET_FIELD(boot_error, MP0_SMN_C2PMSG_126, SOCKET_ID); - aid_id = REG_GET_FIELD(boot_error, MP0_SMN_C2PMSG_126, AID_ID); - hbm_id = REG_GET_FIELD(boot_error, MP0_SMN_C2PMSG_126, HBM_ID); - - reg_data = RREG32_SOC15(MP0, inst, regMP0_SMN_C2PMSG_109); - dev_info(adev->dev, "socket: %d, aid: %d, firmware boot failed, fw status is 0x%x\n", - socket_id, aid_id, reg_data); - - if (REG_GET_FIELD(boot_error, MP0_SMN_C2PMSG_126, GPU_ERR_MEM_TRAINING)) - dev_info(adev->dev, "socket: %d, aid: %d, hbm: %d, memory training failed\n", - socket_id, aid_id, hbm_id); - - if (REG_GET_FIELD(boot_error, MP0_SMN_C2PMSG_126, GPU_ERR_FW_LOAD)) - dev_info(adev->dev, "socket: %d, aid: %d, firmware load failed at boot time\n", - socket_id, aid_id); - - if (REG_GET_FIELD(boot_error, MP0_SMN_C2PMSG_126, GPU_ERR_WAFL_LINK_TRAINING)) - dev_info(adev->dev, "socket: %d, aid: %d, wafl link training failed\n", - socket_id, aid_id); - - if (REG_GET_FIELD(boot_error, MP0_SMN_C2PMSG_126, GPU_ERR_XGMI_LINK_TRAINING)) - dev_info(adev->dev, "socket: %d, aid: %d, xgmi link training failed\n", - socket_id, aid_id); - - if (REG_GET_FIELD(boot_error, MP0_SMN_C2PMSG_126, GPU_ERR_USR_CP_LINK_TRAINING)) - dev_info(adev->dev, "socket: %d, aid: %d, usr cp link training failed\n", - socket_id, aid_id); - - if (REG_GET_FIELD(boot_error, MP0_SMN_C2PMSG_126, GPU_ERR_USR_DP_LINK_TRAINING)) - dev_info(adev->dev, "socket: %d, aid: %d, usr dp link training failed\n", - socket_id, aid_id); - - if (REG_GET_FIELD(boot_error, MP0_SMN_C2PMSG_126, GPU_ERR_HBM_MEM_TEST)) - dev_info(adev->dev, "socket: %d, aid: %d, hbm: %d, hbm memory test failed\n", - socket_id, aid_id, hbm_id); - - if (REG_GET_FIELD(boot_error, MP0_SMN_C2PMSG_126, GPU_ERR_HBM_BIST_TEST)) - dev_info(adev->dev, "socket: %d, aid: %d, hbm: %d, hbm bist test failed\n", - socket_id, aid_id, hbm_id); -} - -static int psp_v13_0_query_boot_status(struct psp_context *psp) +static bool psp_v13_0_get_ras_capability(struct psp_context *psp) { struct amdgpu_device *adev = psp->adev; - int inst_mask = adev->aid_mask; - uint32_t reg_data; - uint32_t i; - int ret = 0; + struct amdgpu_ras *con = amdgpu_ras_get_context(adev); + u32 reg_data; - if (amdgpu_ip_version(adev, MP0_HWIP, 0) != IP_VERSION(13, 0, 6)) - return 0; + /* query ras cap should be done from host side */ + if (amdgpu_sriov_vf(adev)) + return false; - if (RREG32_SOC15(MP0, 0, regMP0_SMN_C2PMSG_59) < 0x00a10109) - return 0; + if (!con) + return false; - for_each_inst(i, inst_mask) { - reg_data = RREG32_SOC15(MP0, i, regMP0_SMN_C2PMSG_126); - if (!REG_GET_FIELD(reg_data, MP0_SMN_C2PMSG_126, BOOT_STATUS)) { - psp_v13_0_boot_error_reporting(adev, i, reg_data); - ret = -EINVAL; - break; - } + if ((amdgpu_ip_version(adev, MP0_HWIP, 0) == IP_VERSION(13, 0, 6)) && + (!(adev->flags & AMD_IS_APU))) { + reg_data = RREG32_SOC15(MP0, 0, regMP0_SMN_C2PMSG_127); + adev->ras_hw_enabled = (reg_data & GENMASK_ULL(23, 0)); + con->poison_supported = ((reg_data & GENMASK_ULL(24, 24)) >> 24) ? true : false; + return true; + } else { + return false; } - - return ret; } static const struct psp_funcs psp_v13_0_funcs = { @@ -862,7 +820,7 @@ static const struct psp_funcs psp_v13_0_funcs = { .update_spirom = psp_v13_0_update_spirom, .vbflash_stat = psp_v13_0_vbflash_status, .fatal_error_recovery_quirk = psp_v13_0_fatal_error_recovery_quirk, - .query_boot_status = psp_v13_0_query_boot_status, + .get_ras_capability = psp_v13_0_get_ras_capability, }; void psp_v13_0_set_psp_funcs(struct psp_context *psp) diff --git a/drivers/gpu/drm/amd/amdgpu/psp_v14_0.c b/drivers/gpu/drm/amd/amdgpu/psp_v14_0.c new file mode 100644 index 000000000000..f08a32c18694 --- /dev/null +++ b/drivers/gpu/drm/amd/amdgpu/psp_v14_0.c @@ -0,0 +1,678 @@ +/* + * Copyright 2023 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + */ +#include <drm/drm_drv.h> +#include <linux/vmalloc.h> +#include "amdgpu.h" +#include "amdgpu_psp.h" +#include "amdgpu_ucode.h" +#include "soc15_common.h" +#include "psp_v14_0.h" + +#include "mp/mp_14_0_2_offset.h" +#include "mp/mp_14_0_2_sh_mask.h" + +MODULE_FIRMWARE("amdgpu/psp_14_0_2_sos.bin"); +MODULE_FIRMWARE("amdgpu/psp_14_0_3_sos.bin"); + +/* For large FW files the time to complete can be very long */ +#define USBC_PD_POLLING_LIMIT_S 240 + +/* Read USB-PD from LFB */ +#define GFX_CMD_USB_PD_USE_LFB 0x480 + +/* VBIOS gfl defines */ +#define MBOX_READY_MASK 0x80000000 +#define MBOX_STATUS_MASK 0x0000FFFF +#define MBOX_COMMAND_MASK 0x00FF0000 +#define MBOX_READY_FLAG 0x80000000 +#define C2PMSG_CMD_SPI_UPDATE_ROM_IMAGE_ADDR_LO 0x2 +#define C2PMSG_CMD_SPI_UPDATE_ROM_IMAGE_ADDR_HI 0x3 +#define C2PMSG_CMD_SPI_UPDATE_FLASH_IMAGE 0x4 + +/* memory training timeout define */ +#define MEM_TRAIN_SEND_MSG_TIMEOUT_US 3000000 + +static int psp_v14_0_init_microcode(struct psp_context *psp) +{ + struct amdgpu_device *adev = psp->adev; + char ucode_prefix[30]; + int err = 0; + + amdgpu_ucode_ip_version_decode(adev, MP0_HWIP, ucode_prefix, sizeof(ucode_prefix)); + + switch (amdgpu_ip_version(adev, MP0_HWIP, 0)) { + case IP_VERSION(14, 0, 2): + case IP_VERSION(14, 0, 3): + err = psp_init_sos_microcode(psp, ucode_prefix); + if (err) + return err; + break; + default: + BUG(); + } + + return 0; +} + +static bool psp_v14_0_is_sos_alive(struct psp_context *psp) +{ + struct amdgpu_device *adev = psp->adev; + uint32_t sol_reg; + + sol_reg = RREG32_SOC15(MP0, 0, regMPASP_SMN_C2PMSG_81); + + return sol_reg != 0x0; +} + +static int psp_v14_0_wait_for_bootloader(struct psp_context *psp) +{ + struct amdgpu_device *adev = psp->adev; + + int ret; + int retry_loop; + + for (retry_loop = 0; retry_loop < 10; retry_loop++) { + /* Wait for bootloader to signify that is + ready having bit 31 of C2PMSG_35 set to 1 */ + ret = psp_wait_for(psp, + SOC15_REG_OFFSET(MP0, 0, regMPASP_SMN_C2PMSG_35), + 0x80000000, + 0x80000000, + false); + + if (ret == 0) + return 0; + } + + return ret; +} + +static int psp_v14_0_bootloader_load_component(struct psp_context *psp, + struct psp_bin_desc *bin_desc, + enum psp_bootloader_cmd bl_cmd) +{ + int ret; + uint32_t psp_gfxdrv_command_reg = 0; + struct amdgpu_device *adev = psp->adev; + + /* Check tOS sign of life register to confirm sys driver and sOS + * are already been loaded. + */ + if (psp_v14_0_is_sos_alive(psp)) + return 0; + + ret = psp_v14_0_wait_for_bootloader(psp); + if (ret) + return ret; + + memset(psp->fw_pri_buf, 0, PSP_1_MEG); + + /* Copy PSP KDB binary to memory */ + memcpy(psp->fw_pri_buf, bin_desc->start_addr, bin_desc->size_bytes); + + /* Provide the PSP KDB to bootloader */ + WREG32_SOC15(MP0, 0, regMPASP_SMN_C2PMSG_36, + (uint32_t)(psp->fw_pri_mc_addr >> 20)); + psp_gfxdrv_command_reg = bl_cmd; + WREG32_SOC15(MP0, 0, regMPASP_SMN_C2PMSG_35, + psp_gfxdrv_command_reg); + + ret = psp_v14_0_wait_for_bootloader(psp); + + return ret; +} + +static int psp_v14_0_bootloader_load_kdb(struct psp_context *psp) +{ + return psp_v14_0_bootloader_load_component(psp, &psp->kdb, PSP_BL__LOAD_KEY_DATABASE); +} + +static int psp_v14_0_bootloader_load_spl(struct psp_context *psp) +{ + return psp_v14_0_bootloader_load_component(psp, &psp->kdb, PSP_BL__LOAD_TOS_SPL_TABLE); +} + +static int psp_v14_0_bootloader_load_sysdrv(struct psp_context *psp) +{ + return psp_v14_0_bootloader_load_component(psp, &psp->sys, PSP_BL__LOAD_SYSDRV); +} + +static int psp_v14_0_bootloader_load_soc_drv(struct psp_context *psp) +{ + return psp_v14_0_bootloader_load_component(psp, &psp->soc_drv, PSP_BL__LOAD_SOCDRV); +} + +static int psp_v14_0_bootloader_load_intf_drv(struct psp_context *psp) +{ + return psp_v14_0_bootloader_load_component(psp, &psp->intf_drv, PSP_BL__LOAD_INTFDRV); +} + +static int psp_v14_0_bootloader_load_dbg_drv(struct psp_context *psp) +{ + /* dbg_drv was renamed to had_drv in psp v14 */ + return psp_v14_0_bootloader_load_component(psp, &psp->dbg_drv, PSP_BL__LOAD_HADDRV); +} + +static int psp_v14_0_bootloader_load_ras_drv(struct psp_context *psp) +{ + return psp_v14_0_bootloader_load_component(psp, &psp->ras_drv, PSP_BL__LOAD_RASDRV); +} + +static int psp_v14_0_bootloader_load_ipkeymgr_drv(struct psp_context *psp) +{ + return psp_v14_0_bootloader_load_component(psp, &psp->ipkeymgr_drv, PSP_BL__LOAD_IPKEYMGRDRV); +} + +static int psp_v14_0_bootloader_load_sos(struct psp_context *psp) +{ + int ret; + unsigned int psp_gfxdrv_command_reg = 0; + struct amdgpu_device *adev = psp->adev; + + /* Check sOS sign of life register to confirm sys driver and sOS + * are already been loaded. + */ + if (psp_v14_0_is_sos_alive(psp)) + return 0; + + ret = psp_v14_0_wait_for_bootloader(psp); + if (ret) + return ret; + + memset(psp->fw_pri_buf, 0, PSP_1_MEG); + + /* Copy Secure OS binary to PSP memory */ + memcpy(psp->fw_pri_buf, psp->sos.start_addr, psp->sos.size_bytes); + + /* Provide the PSP secure OS to bootloader */ + WREG32_SOC15(MP0, 0, regMPASP_SMN_C2PMSG_36, + (uint32_t)(psp->fw_pri_mc_addr >> 20)); + psp_gfxdrv_command_reg = PSP_BL__LOAD_SOSDRV; + WREG32_SOC15(MP0, 0, regMPASP_SMN_C2PMSG_35, + psp_gfxdrv_command_reg); + + /* there might be handshake issue with hardware which needs delay */ + mdelay(20); + ret = psp_wait_for(psp, SOC15_REG_OFFSET(MP0, 0, regMPASP_SMN_C2PMSG_81), + RREG32_SOC15(MP0, 0, regMPASP_SMN_C2PMSG_81), + 0, true); + + return ret; +} + +static int psp_v14_0_ring_stop(struct psp_context *psp, + enum psp_ring_type ring_type) +{ + int ret = 0; + struct amdgpu_device *adev = psp->adev; + + if (amdgpu_sriov_vf(adev)) { + /* Write the ring destroy command*/ + WREG32_SOC15(MP0, 0, regMPASP_SMN_C2PMSG_101, + GFX_CTRL_CMD_ID_DESTROY_GPCOM_RING); + /* there might be handshake issue with hardware which needs delay */ + mdelay(20); + /* Wait for response flag (bit 31) */ + ret = psp_wait_for(psp, SOC15_REG_OFFSET(MP0, 0, regMPASP_SMN_C2PMSG_101), + 0x80000000, 0x80000000, false); + } else { + /* Write the ring destroy command*/ + WREG32_SOC15(MP0, 0, regMPASP_SMN_C2PMSG_64, + GFX_CTRL_CMD_ID_DESTROY_RINGS); + /* there might be handshake issue with hardware which needs delay */ + mdelay(20); + /* Wait for response flag (bit 31) */ + ret = psp_wait_for(psp, SOC15_REG_OFFSET(MP0, 0, regMPASP_SMN_C2PMSG_64), + 0x80000000, 0x80000000, false); + } + + return ret; +} + +static int psp_v14_0_ring_create(struct psp_context *psp, + enum psp_ring_type ring_type) +{ + int ret = 0; + unsigned int psp_ring_reg = 0; + struct psp_ring *ring = &psp->km_ring; + struct amdgpu_device *adev = psp->adev; + + if (amdgpu_sriov_vf(adev)) { + ret = psp_v14_0_ring_stop(psp, ring_type); + if (ret) { + DRM_ERROR("psp_v14_0_ring_stop_sriov failed!\n"); + return ret; + } + + /* Write low address of the ring to C2PMSG_102 */ + psp_ring_reg = lower_32_bits(ring->ring_mem_mc_addr); + WREG32_SOC15(MP0, 0, regMPASP_SMN_C2PMSG_102, psp_ring_reg); + /* Write high address of the ring to C2PMSG_103 */ + psp_ring_reg = upper_32_bits(ring->ring_mem_mc_addr); + WREG32_SOC15(MP0, 0, regMPASP_SMN_C2PMSG_103, psp_ring_reg); + + /* Write the ring initialization command to C2PMSG_101 */ + WREG32_SOC15(MP0, 0, regMPASP_SMN_C2PMSG_101, + GFX_CTRL_CMD_ID_INIT_GPCOM_RING); + + /* there might be handshake issue with hardware which needs delay */ + mdelay(20); + + /* Wait for response flag (bit 31) in C2PMSG_101 */ + ret = psp_wait_for(psp, SOC15_REG_OFFSET(MP0, 0, regMPASP_SMN_C2PMSG_101), + 0x80000000, 0x8000FFFF, false); + + } else { + /* Wait for sOS ready for ring creation */ + ret = psp_wait_for(psp, SOC15_REG_OFFSET(MP0, 0, regMPASP_SMN_C2PMSG_64), + 0x80000000, 0x80000000, false); + if (ret) { + DRM_ERROR("Failed to wait for trust OS ready for ring creation\n"); + return ret; + } + + /* Write low address of the ring to C2PMSG_69 */ + psp_ring_reg = lower_32_bits(ring->ring_mem_mc_addr); + WREG32_SOC15(MP0, 0, regMPASP_SMN_C2PMSG_69, psp_ring_reg); + /* Write high address of the ring to C2PMSG_70 */ + psp_ring_reg = upper_32_bits(ring->ring_mem_mc_addr); + WREG32_SOC15(MP0, 0, regMPASP_SMN_C2PMSG_70, psp_ring_reg); + /* Write size of ring to C2PMSG_71 */ + psp_ring_reg = ring->ring_size; + WREG32_SOC15(MP0, 0, regMPASP_SMN_C2PMSG_71, psp_ring_reg); + /* Write the ring initialization command to C2PMSG_64 */ + psp_ring_reg = ring_type; + psp_ring_reg = psp_ring_reg << 16; + WREG32_SOC15(MP0, 0, regMPASP_SMN_C2PMSG_64, psp_ring_reg); + + /* there might be handshake issue with hardware which needs delay */ + mdelay(20); + + /* Wait for response flag (bit 31) in C2PMSG_64 */ + ret = psp_wait_for(psp, SOC15_REG_OFFSET(MP0, 0, regMPASP_SMN_C2PMSG_64), + 0x80000000, 0x8000FFFF, false); + } + + return ret; +} + +static int psp_v14_0_ring_destroy(struct psp_context *psp, + enum psp_ring_type ring_type) +{ + int ret = 0; + struct psp_ring *ring = &psp->km_ring; + struct amdgpu_device *adev = psp->adev; + + ret = psp_v14_0_ring_stop(psp, ring_type); + if (ret) + DRM_ERROR("Fail to stop psp ring\n"); + + amdgpu_bo_free_kernel(&adev->firmware.rbuf, + &ring->ring_mem_mc_addr, + (void **)&ring->ring_mem); + + return ret; +} + +static uint32_t psp_v14_0_ring_get_wptr(struct psp_context *psp) +{ + uint32_t data; + struct amdgpu_device *adev = psp->adev; + + if (amdgpu_sriov_vf(adev)) + data = RREG32_SOC15(MP0, 0, regMPASP_SMN_C2PMSG_102); + else + data = RREG32_SOC15(MP0, 0, regMPASP_SMN_C2PMSG_67); + + return data; +} + +static void psp_v14_0_ring_set_wptr(struct psp_context *psp, uint32_t value) +{ + struct amdgpu_device *adev = psp->adev; + + if (amdgpu_sriov_vf(adev)) { + WREG32_SOC15(MP0, 0, regMPASP_SMN_C2PMSG_102, value); + WREG32_SOC15(MP0, 0, regMPASP_SMN_C2PMSG_101, + GFX_CTRL_CMD_ID_CONSUME_CMD); + } else + WREG32_SOC15(MP0, 0, regMPASP_SMN_C2PMSG_67, value); +} + +static int psp_v14_0_memory_training_send_msg(struct psp_context *psp, int msg) +{ + int ret; + int i; + uint32_t data_32; + int max_wait; + struct amdgpu_device *adev = psp->adev; + + data_32 = (psp->mem_train_ctx.c2p_train_data_offset >> 20); + WREG32_SOC15(MP0, 0, regMPASP_SMN_C2PMSG_36, data_32); + WREG32_SOC15(MP0, 0, regMPASP_SMN_C2PMSG_35, msg); + + max_wait = MEM_TRAIN_SEND_MSG_TIMEOUT_US / adev->usec_timeout; + for (i = 0; i < max_wait; i++) { + ret = psp_wait_for(psp, SOC15_REG_OFFSET(MP0, 0, regMPASP_SMN_C2PMSG_35), + 0x80000000, 0x80000000, false); + if (ret == 0) + break; + } + if (i < max_wait) + ret = 0; + else + ret = -ETIME; + + dev_dbg(adev->dev, "training %s %s, cost %d @ %d ms\n", + (msg == PSP_BL__DRAM_SHORT_TRAIN) ? "short" : "long", + (ret == 0) ? "succeed" : "failed", + i, adev->usec_timeout/1000); + return ret; +} + + +static int psp_v14_0_memory_training(struct psp_context *psp, uint32_t ops) +{ + struct psp_memory_training_context *ctx = &psp->mem_train_ctx; + uint32_t *pcache = (uint32_t *)ctx->sys_cache; + struct amdgpu_device *adev = psp->adev; + uint32_t p2c_header[4]; + uint32_t sz; + void *buf; + int ret, idx; + + if (ctx->init == PSP_MEM_TRAIN_NOT_SUPPORT) { + dev_dbg(adev->dev, "Memory training is not supported.\n"); + return 0; + } else if (ctx->init != PSP_MEM_TRAIN_INIT_SUCCESS) { + dev_err(adev->dev, "Memory training initialization failure.\n"); + return -EINVAL; + } + + if (psp_v14_0_is_sos_alive(psp)) { + dev_dbg(adev->dev, "SOS is alive, skip memory training.\n"); + return 0; + } + + amdgpu_device_vram_access(adev, ctx->p2c_train_data_offset, p2c_header, sizeof(p2c_header), false); + dev_dbg(adev->dev, "sys_cache[%08x,%08x,%08x,%08x] p2c_header[%08x,%08x,%08x,%08x]\n", + pcache[0], pcache[1], pcache[2], pcache[3], + p2c_header[0], p2c_header[1], p2c_header[2], p2c_header[3]); + + if (ops & PSP_MEM_TRAIN_SEND_SHORT_MSG) { + dev_dbg(adev->dev, "Short training depends on restore.\n"); + ops |= PSP_MEM_TRAIN_RESTORE; + } + + if ((ops & PSP_MEM_TRAIN_RESTORE) && + pcache[0] != MEM_TRAIN_SYSTEM_SIGNATURE) { + dev_dbg(adev->dev, "sys_cache[0] is invalid, restore depends on save.\n"); + ops |= PSP_MEM_TRAIN_SAVE; + } + + if (p2c_header[0] == MEM_TRAIN_SYSTEM_SIGNATURE && + !(pcache[0] == MEM_TRAIN_SYSTEM_SIGNATURE && + pcache[3] == p2c_header[3])) { + dev_dbg(adev->dev, "sys_cache is invalid or out-of-date, need save training data to sys_cache.\n"); + ops |= PSP_MEM_TRAIN_SAVE; + } + + if ((ops & PSP_MEM_TRAIN_SAVE) && + p2c_header[0] != MEM_TRAIN_SYSTEM_SIGNATURE) { + dev_dbg(adev->dev, "p2c_header[0] is invalid, save depends on long training.\n"); + ops |= PSP_MEM_TRAIN_SEND_LONG_MSG; + } + + if (ops & PSP_MEM_TRAIN_SEND_LONG_MSG) { + ops &= ~PSP_MEM_TRAIN_SEND_SHORT_MSG; + ops |= PSP_MEM_TRAIN_SAVE; + } + + dev_dbg(adev->dev, "Memory training ops:%x.\n", ops); + + if (ops & PSP_MEM_TRAIN_SEND_LONG_MSG) { + /* + * Long training will encroach a certain amount on the bottom of VRAM; + * save the content from the bottom of VRAM to system memory + * before training, and restore it after training to avoid + * VRAM corruption. + */ + sz = BIST_MEM_TRAINING_ENCROACHED_SIZE; + + if (adev->gmc.visible_vram_size < sz || !adev->mman.aper_base_kaddr) { + dev_err(adev->dev, "visible_vram_size %llx or aper_base_kaddr %p is not initialized.\n", + adev->gmc.visible_vram_size, + adev->mman.aper_base_kaddr); + return -EINVAL; + } + + buf = vmalloc(sz); + if (!buf) { + dev_err(adev->dev, "failed to allocate system memory.\n"); + return -ENOMEM; + } + + if (drm_dev_enter(adev_to_drm(adev), &idx)) { + memcpy_fromio(buf, adev->mman.aper_base_kaddr, sz); + ret = psp_v14_0_memory_training_send_msg(psp, PSP_BL__DRAM_LONG_TRAIN); + if (ret) { + DRM_ERROR("Send long training msg failed.\n"); + vfree(buf); + drm_dev_exit(idx); + return ret; + } + + memcpy_toio(adev->mman.aper_base_kaddr, buf, sz); + adev->hdp.funcs->flush_hdp(adev, NULL); + vfree(buf); + drm_dev_exit(idx); + } else { + vfree(buf); + return -ENODEV; + } + } + + if (ops & PSP_MEM_TRAIN_SAVE) { + amdgpu_device_vram_access(psp->adev, ctx->p2c_train_data_offset, ctx->sys_cache, ctx->train_data_size, false); + } + + if (ops & PSP_MEM_TRAIN_RESTORE) { + amdgpu_device_vram_access(psp->adev, ctx->c2p_train_data_offset, ctx->sys_cache, ctx->train_data_size, true); + } + + if (ops & PSP_MEM_TRAIN_SEND_SHORT_MSG) { + ret = psp_v14_0_memory_training_send_msg(psp, (amdgpu_force_long_training > 0) ? + PSP_BL__DRAM_LONG_TRAIN : PSP_BL__DRAM_SHORT_TRAIN); + if (ret) { + dev_err(adev->dev, "send training msg failed.\n"); + return ret; + } + } + ctx->training_cnt++; + return 0; +} + +static int psp_v14_0_load_usbc_pd_fw(struct psp_context *psp, uint64_t fw_pri_mc_addr) +{ + struct amdgpu_device *adev = psp->adev; + uint32_t reg_status; + int ret, i = 0; + + /* + * LFB address which is aligned to 1MB address and has to be + * right-shifted by 20 so that LFB address can be passed on a 32-bit C2P + * register + */ + WREG32_SOC15(MP0, 0, regMPASP_SMN_C2PMSG_36, (fw_pri_mc_addr >> 20)); + + ret = psp_wait_for(psp, SOC15_REG_OFFSET(MP0, 0, regMPASP_SMN_C2PMSG_35), + 0x80000000, 0x80000000, false); + if (ret) + return ret; + + /* Fireup interrupt so PSP can pick up the address */ + WREG32_SOC15(MP0, 0, regMPASP_SMN_C2PMSG_35, (GFX_CMD_USB_PD_USE_LFB << 16)); + + /* FW load takes very long time */ + do { + msleep(1000); + reg_status = RREG32_SOC15(MP0, 0, regMPASP_SMN_C2PMSG_35); + + if (reg_status & 0x80000000) + goto done; + + } while (++i < USBC_PD_POLLING_LIMIT_S); + + return -ETIME; +done: + + if ((reg_status & 0xFFFF) != 0) { + DRM_ERROR("Address load failed - MP0_SMN_C2PMSG_35.Bits [15:0] = %04x\n", + reg_status & 0xFFFF); + return -EIO; + } + + return 0; +} + +static int psp_v14_0_read_usbc_pd_fw(struct psp_context *psp, uint32_t *fw_ver) +{ + struct amdgpu_device *adev = psp->adev; + int ret; + + WREG32_SOC15(MP0, 0, regMPASP_SMN_C2PMSG_35, C2PMSG_CMD_GFX_USB_PD_FW_VER); + + ret = psp_wait_for(psp, SOC15_REG_OFFSET(MP0, 0, regMPASP_SMN_C2PMSG_35), + 0x80000000, 0x80000000, false); + if (!ret) + *fw_ver = RREG32_SOC15(MP0, 0, regMPASP_SMN_C2PMSG_36); + + return ret; +} + +static int psp_v14_0_exec_spi_cmd(struct psp_context *psp, int cmd) +{ + uint32_t reg_status = 0, reg_val = 0; + struct amdgpu_device *adev = psp->adev; + int ret; + + /* clear MBX ready (MBOX_READY_MASK bit is 0) and set update command */ + reg_val |= (cmd << 16); + WREG32_SOC15(MP0, 0, regMPASP_SMN_C2PMSG_115, reg_val); + + /* Ring the doorbell */ + WREG32_SOC15(MP0, 0, regMPASP_SMN_C2PMSG_73, 1); + + if (cmd == C2PMSG_CMD_SPI_UPDATE_FLASH_IMAGE) + ret = psp_wait_for_spirom_update(psp, SOC15_REG_OFFSET(MP0, 0, regMPASP_SMN_C2PMSG_115), + MBOX_READY_FLAG, MBOX_READY_MASK, PSP_SPIROM_UPDATE_TIMEOUT); + else + ret = psp_wait_for(psp, SOC15_REG_OFFSET(MP0, 0, regMPASP_SMN_C2PMSG_115), + MBOX_READY_FLAG, MBOX_READY_MASK, false); + + ret = psp_wait_for(psp, SOC15_REG_OFFSET(MP0, 0, regMPASP_SMN_C2PMSG_115), + MBOX_READY_FLAG, MBOX_READY_MASK, false); + if (ret) { + dev_err(adev->dev, "SPI cmd %x timed out, ret = %d", cmd, ret); + return ret; + } + + reg_status = RREG32_SOC15(MP0, 0, regMPASP_SMN_C2PMSG_115); + if ((reg_status & 0xFFFF) != 0) { + dev_err(adev->dev, "SPI cmd %x failed, fail status = %04x\n", + cmd, reg_status & 0xFFFF); + return -EIO; + } + + return 0; +} + +static int psp_v14_0_update_spirom(struct psp_context *psp, + uint64_t fw_pri_mc_addr) +{ + struct amdgpu_device *adev = psp->adev; + int ret; + + /* Confirm PSP is ready to start */ + ret = psp_wait_for(psp, SOC15_REG_OFFSET(MP0, 0, regMPASP_SMN_C2PMSG_115), + MBOX_READY_FLAG, MBOX_READY_MASK, false); + if (ret) { + dev_err(adev->dev, "PSP Not ready to start processing, ret = %d", ret); + return ret; + } + + WREG32_SOC15(MP0, 0, regMPASP_SMN_C2PMSG_116, lower_32_bits(fw_pri_mc_addr)); + + ret = psp_v14_0_exec_spi_cmd(psp, C2PMSG_CMD_SPI_UPDATE_ROM_IMAGE_ADDR_LO); + if (ret) + return ret; + + WREG32_SOC15(MP0, 0, regMPASP_SMN_C2PMSG_116, upper_32_bits(fw_pri_mc_addr)); + + ret = psp_v14_0_exec_spi_cmd(psp, C2PMSG_CMD_SPI_UPDATE_ROM_IMAGE_ADDR_HI); + if (ret) + return ret; + + psp->vbflash_done = true; + + ret = psp_v14_0_exec_spi_cmd(psp, C2PMSG_CMD_SPI_UPDATE_FLASH_IMAGE); + if (ret) + return ret; + + return 0; +} + +static int psp_v14_0_vbflash_status(struct psp_context *psp) +{ + struct amdgpu_device *adev = psp->adev; + + return RREG32_SOC15(MP0, 0, regMPASP_SMN_C2PMSG_115); +} + +static const struct psp_funcs psp_v14_0_funcs = { + .init_microcode = psp_v14_0_init_microcode, + .bootloader_load_kdb = psp_v14_0_bootloader_load_kdb, + .bootloader_load_spl = psp_v14_0_bootloader_load_spl, + .bootloader_load_sysdrv = psp_v14_0_bootloader_load_sysdrv, + .bootloader_load_soc_drv = psp_v14_0_bootloader_load_soc_drv, + .bootloader_load_intf_drv = psp_v14_0_bootloader_load_intf_drv, + .bootloader_load_dbg_drv = psp_v14_0_bootloader_load_dbg_drv, + .bootloader_load_ras_drv = psp_v14_0_bootloader_load_ras_drv, + .bootloader_load_ipkeymgr_drv = psp_v14_0_bootloader_load_ipkeymgr_drv, + .bootloader_load_sos = psp_v14_0_bootloader_load_sos, + .ring_create = psp_v14_0_ring_create, + .ring_stop = psp_v14_0_ring_stop, + .ring_destroy = psp_v14_0_ring_destroy, + .ring_get_wptr = psp_v14_0_ring_get_wptr, + .ring_set_wptr = psp_v14_0_ring_set_wptr, + .mem_training = psp_v14_0_memory_training, + .load_usbc_pd_fw = psp_v14_0_load_usbc_pd_fw, + .read_usbc_pd_fw = psp_v14_0_read_usbc_pd_fw, + .update_spirom = psp_v14_0_update_spirom, + .vbflash_stat = psp_v14_0_vbflash_status +}; + +void psp_v14_0_set_psp_funcs(struct psp_context *psp) +{ + psp->funcs = &psp_v14_0_funcs; +} diff --git a/drivers/gpu/drm/amd/amdgpu/psp_v14_0.h b/drivers/gpu/drm/amd/amdgpu/psp_v14_0.h new file mode 100644 index 000000000000..dd18ba2cfad5 --- /dev/null +++ b/drivers/gpu/drm/amd/amdgpu/psp_v14_0.h @@ -0,0 +1,32 @@ +/* + * Copyright 2023 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + */ +#ifndef __PSP_V14_0_H__ +#define __PSP_V14_0_H__ + +#include "amdgpu_psp.h" + +#define PSP_SPIROM_UPDATE_TIMEOUT 60000 /* 60s */ + +void psp_v14_0_set_psp_funcs(struct psp_context *psp); + +#endif diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v2_4.c b/drivers/gpu/drm/amd/amdgpu/sdma_v2_4.c index 8d5d86675a7f..ac8a9b9b3e52 100644 --- a/drivers/gpu/drm/amd/amdgpu/sdma_v2_4.c +++ b/drivers/gpu/drm/amd/amdgpu/sdma_v2_4.c @@ -57,22 +57,19 @@ static void sdma_v2_4_set_irq_funcs(struct amdgpu_device *adev); MODULE_FIRMWARE("amdgpu/topaz_sdma.bin"); MODULE_FIRMWARE("amdgpu/topaz_sdma1.bin"); -static const u32 sdma_offsets[SDMA_MAX_INSTANCE] = -{ +static const u32 sdma_offsets[SDMA_MAX_INSTANCE] = { SDMA0_REGISTER_OFFSET, SDMA1_REGISTER_OFFSET }; -static const u32 golden_settings_iceland_a11[] = -{ +static const u32 golden_settings_iceland_a11[] = { mmSDMA0_CHICKEN_BITS, 0xfc910007, 0x00810007, mmSDMA0_CLK_CTRL, 0xff000fff, 0x00000000, mmSDMA1_CHICKEN_BITS, 0xfc910007, 0x00810007, mmSDMA1_CLK_CTRL, 0xff000fff, 0x00000000, }; -static const u32 iceland_mgcg_cgcg_init[] = -{ +static const u32 iceland_mgcg_cgcg_init[] = { mmSDMA0_CLK_CTRL, 0xff000ff0, 0x00000100, mmSDMA1_CLK_CTRL, 0xff000ff0, 0x00000100 }; @@ -142,7 +139,8 @@ static int sdma_v2_4_init_microcode(struct amdgpu_device *adev) case CHIP_TOPAZ: chip_name = "topaz"; break; - default: BUG(); + default: + BUG(); } for (i = 0; i < adev->sdma.num_instances; i++) { @@ -1115,6 +1113,8 @@ static const struct amd_ip_funcs sdma_v2_4_ip_funcs = { .soft_reset = sdma_v2_4_soft_reset, .set_clockgating_state = sdma_v2_4_set_clockgating_state, .set_powergating_state = sdma_v2_4_set_powergating_state, + .dump_ip_state = NULL, + .print_ip_state = NULL, }; static const struct amdgpu_ring_funcs sdma_v2_4_ring_funcs = { @@ -1178,7 +1178,7 @@ static void sdma_v2_4_set_irq_funcs(struct amdgpu_device *adev) * @src_offset: src GPU address * @dst_offset: dst GPU address * @byte_count: number of bytes to xfer - * @tmz: unused + * @copy_flags: unused * * Copy GPU buffers using the DMA engine (VI). * Used by the amdgpu ttm implementation to move pages if @@ -1188,7 +1188,7 @@ static void sdma_v2_4_emit_copy_buffer(struct amdgpu_ib *ib, uint64_t src_offset, uint64_t dst_offset, uint32_t byte_count, - bool tmz) + uint32_t copy_flags) { ib->ptr[ib->length_dw++] = SDMA_PKT_HEADER_OP(SDMA_OP_COPY) | SDMA_PKT_HEADER_SUB_OP(SDMA_SUBOP_COPY_LINEAR); @@ -1258,8 +1258,7 @@ static void sdma_v2_4_set_vm_pte_funcs(struct amdgpu_device *adev) adev->vm_manager.vm_pte_num_scheds = adev->sdma.num_instances; } -const struct amdgpu_ip_block_version sdma_v2_4_ip_block = -{ +const struct amdgpu_ip_block_version sdma_v2_4_ip_block = { .type = AMD_IP_BLOCK_TYPE_SDMA, .major = 2, .minor = 4, diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v3_0.c b/drivers/gpu/drm/amd/amdgpu/sdma_v3_0.c index 2ad615be4bb3..b8ebdc4ae6f6 100644 --- a/drivers/gpu/drm/amd/amdgpu/sdma_v3_0.c +++ b/drivers/gpu/drm/amd/amdgpu/sdma_v3_0.c @@ -1553,6 +1553,8 @@ static const struct amd_ip_funcs sdma_v3_0_ip_funcs = { .set_clockgating_state = sdma_v3_0_set_clockgating_state, .set_powergating_state = sdma_v3_0_set_powergating_state, .get_clockgating_state = sdma_v3_0_get_clockgating_state, + .dump_ip_state = NULL, + .print_ip_state = NULL, }; static const struct amdgpu_ring_funcs sdma_v3_0_ring_funcs = { @@ -1616,7 +1618,7 @@ static void sdma_v3_0_set_irq_funcs(struct amdgpu_device *adev) * @src_offset: src GPU address * @dst_offset: dst GPU address * @byte_count: number of bytes to xfer - * @tmz: unused + * @copy_flags: unused * * Copy GPU buffers using the DMA engine (VI). * Used by the amdgpu ttm implementation to move pages if @@ -1626,7 +1628,7 @@ static void sdma_v3_0_emit_copy_buffer(struct amdgpu_ib *ib, uint64_t src_offset, uint64_t dst_offset, uint32_t byte_count, - bool tmz) + uint32_t copy_flags) { ib->ptr[ib->length_dw++] = SDMA_PKT_HEADER_OP(SDMA_OP_COPY) | SDMA_PKT_HEADER_SUB_OP(SDMA_SUBOP_COPY_LINEAR); diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c b/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c index 3d68dd5523c6..101038395c3b 100644 --- a/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c +++ b/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c @@ -2021,6 +2021,9 @@ static int sdma_v4_0_process_trap_irq(struct amdgpu_device *adev, DRM_DEBUG("IH: SDMA trap\n"); instance = sdma_v4_0_irq_id_to_seq(entry->client_id); + if (instance < 0) + return instance; + switch (entry->ring_id) { case 0: amdgpu_fence_process(&adev->sdma.instance[instance].ring); @@ -2104,7 +2107,7 @@ static int sdma_v4_0_print_iv_entry(struct amdgpu_device *adev, struct amdgpu_iv_entry *entry) { int instance; - struct amdgpu_task_info task_info; + struct amdgpu_task_info *task_info; u64 addr; instance = sdma_v4_0_irq_id_to_seq(entry->client_id); @@ -2116,15 +2119,20 @@ static int sdma_v4_0_print_iv_entry(struct amdgpu_device *adev, addr = (u64)entry->src_data[0] << 12; addr |= ((u64)entry->src_data[1] & 0xf) << 44; - memset(&task_info, 0, sizeof(struct amdgpu_task_info)); - amdgpu_vm_get_task_info(adev, entry->pasid, &task_info); - dev_dbg_ratelimited(adev->dev, - "[sdma%d] address:0x%016llx src_id:%u ring:%u vmid:%u " - "pasid:%u, for process %s pid %d thread %s pid %d\n", - instance, addr, entry->src_id, entry->ring_id, entry->vmid, - entry->pasid, task_info.process_name, task_info.tgid, - task_info.task_name, task_info.pid); + "[sdma%d] address:0x%016llx src_id:%u ring:%u vmid:%u pasid:%u\n", + instance, addr, entry->src_id, entry->ring_id, entry->vmid, + entry->pasid); + + task_info = amdgpu_vm_get_task_info_pasid(adev, entry->pasid); + if (task_info) { + dev_dbg_ratelimited(adev->dev, + " for process %s pid %d thread %s pid %d\n", + task_info->process_name, task_info->tgid, + task_info->task_name, task_info->pid); + amdgpu_vm_put_task_info(task_info); + } + return 0; } @@ -2443,7 +2451,7 @@ static void sdma_v4_0_set_irq_funcs(struct amdgpu_device *adev) * @src_offset: src GPU address * @dst_offset: dst GPU address * @byte_count: number of bytes to xfer - * @tmz: if a secure copy should be used + * @copy_flags: copy flags for the buffers * * Copy GPU buffers using the DMA engine (VEGA10/12). * Used by the amdgpu ttm implementation to move pages if @@ -2453,11 +2461,11 @@ static void sdma_v4_0_emit_copy_buffer(struct amdgpu_ib *ib, uint64_t src_offset, uint64_t dst_offset, uint32_t byte_count, - bool tmz) + uint32_t copy_flags) { ib->ptr[ib->length_dw++] = SDMA_PKT_HEADER_OP(SDMA_OP_COPY) | SDMA_PKT_HEADER_SUB_OP(SDMA_SUBOP_COPY_LINEAR) | - SDMA_PKT_COPY_LINEAR_HEADER_TMZ(tmz ? 1 : 0); + SDMA_PKT_COPY_LINEAR_HEADER_TMZ((copy_flags & AMDGPU_COPY_FLAGS_TMZ) ? 1 : 0); ib->ptr[ib->length_dw++] = byte_count - 1; ib->ptr[ib->length_dw++] = 0; /* src/dst endian swap */ ib->ptr[ib->length_dw++] = lower_32_bits(src_offset); diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v4_4_2.c b/drivers/gpu/drm/amd/amdgpu/sdma_v4_4_2.c index 2d688dca26be..341b24d8320b 100644 --- a/drivers/gpu/drm/amd/amdgpu/sdma_v4_4_2.c +++ b/drivers/gpu/drm/amd/amdgpu/sdma_v4_4_2.c @@ -45,6 +45,8 @@ MODULE_FIRMWARE("amdgpu/sdma_4_4_2.bin"); +#define mmSMNAID_AID0_MCA_SMU 0x03b30400 + #define WREG32_SDMA(instance, offset, value) \ WREG32(sdma_v4_4_2_get_reg_offset(adev, (instance), (offset)), value) #define RREG32_SDMA(instance, offset) \ @@ -366,7 +368,8 @@ static void sdma_v4_4_2_ring_emit_hdp_flush(struct amdgpu_ring *ring) u32 ref_and_mask = 0; const struct nbio_hdp_flush_reg *nbio_hf_reg = adev->nbio.hdp_flush_reg; - ref_and_mask = nbio_hf_reg->ref_and_mask_sdma0 << ring->me; + ref_and_mask = nbio_hf_reg->ref_and_mask_sdma0 + << (ring->me % adev->sdma.num_inst_per_aid); sdma_v4_4_2_wait_reg_mem(ring, 0, 1, adev->nbio.funcs->get_hdp_flush_done_offset(adev), @@ -429,16 +432,11 @@ static void sdma_v4_4_2_inst_gfx_stop(struct amdgpu_device *adev, struct amdgpu_ring *sdma[AMDGPU_MAX_SDMA_INSTANCES]; u32 doorbell_offset, doorbell; u32 rb_cntl, ib_cntl; - int i, unset = 0; + int i; for_each_inst(i, inst_mask) { sdma[i] = &adev->sdma.instance[i].ring; - if ((adev->mman.buffer_funcs_ring == sdma[i]) && unset != 1) { - amdgpu_ttm_set_buffer_funcs_status(adev, false); - unset = 1; - } - rb_cntl = RREG32_SDMA(i, regSDMA_GFX_RB_CNTL); rb_cntl = REG_SET_FIELD(rb_cntl, SDMA_GFX_RB_CNTL, RB_ENABLE, 0); WREG32_SDMA(i, regSDMA_GFX_RB_CNTL, rb_cntl); @@ -485,20 +483,10 @@ static void sdma_v4_4_2_inst_rlc_stop(struct amdgpu_device *adev, static void sdma_v4_4_2_inst_page_stop(struct amdgpu_device *adev, uint32_t inst_mask) { - struct amdgpu_ring *sdma[AMDGPU_MAX_SDMA_INSTANCES]; u32 rb_cntl, ib_cntl; int i; - bool unset = false; for_each_inst(i, inst_mask) { - sdma[i] = &adev->sdma.instance[i].page; - - if ((adev->mman.buffer_funcs_ring == sdma[i]) && - (!unset)) { - amdgpu_ttm_set_buffer_funcs_status(adev, false); - unset = true; - } - rb_cntl = RREG32_SDMA(i, regSDMA_PAGE_RB_CNTL); rb_cntl = REG_SET_FIELD(rb_cntl, SDMA_PAGE_RB_CNTL, RB_ENABLE, 0); @@ -610,7 +598,7 @@ static uint32_t sdma_v4_4_2_rb_cntl(struct amdgpu_ring *ring, uint32_t rb_cntl) /* Set ring buffer size in dwords */ uint32_t rb_bufsz = order_base_2(ring->ring_size / 4); - barrier(); /* work around https://bugs.llvm.org/show_bug.cgi?id=42576 */ + barrier(); /* work around https://llvm.org/pr42576 */ rb_cntl = REG_SET_FIELD(rb_cntl, SDMA_GFX_RB_CNTL, RB_SIZE, rb_bufsz); #ifdef __BIG_ENDIAN rb_cntl = REG_SET_FIELD(rb_cntl, SDMA_GFX_RB_CNTL, RB_SWAP_ENABLE, 1); @@ -948,13 +936,7 @@ static int sdma_v4_4_2_inst_start(struct amdgpu_device *adev, r = amdgpu_ring_test_helper(page); if (r) return r; - - if (adev->mman.buffer_funcs_ring == page) - amdgpu_ttm_set_buffer_funcs_status(adev, true); } - - if (adev->mman.buffer_funcs_ring == ring) - amdgpu_ttm_set_buffer_funcs_status(adev, true); } return r; @@ -1621,19 +1603,9 @@ static int sdma_v4_4_2_set_ecc_irq_state(struct amdgpu_device *adev, u32 sdma_cntl; sdma_cntl = RREG32_SDMA(type, regSDMA_CNTL); - switch (state) { - case AMDGPU_IRQ_STATE_DISABLE: - sdma_cntl = REG_SET_FIELD(sdma_cntl, SDMA_CNTL, - DRAM_ECC_INT_ENABLE, 0); - WREG32_SDMA(type, regSDMA_CNTL, sdma_cntl); - break; - /* sdma ecc interrupt is enabled by default - * driver doesn't need to do anything to - * enable the interrupt */ - case AMDGPU_IRQ_STATE_ENABLE: - default: - break; - } + sdma_cntl = REG_SET_FIELD(sdma_cntl, SDMA_CNTL, DRAM_ECC_INT_ENABLE, + state == AMDGPU_IRQ_STATE_ENABLE ? 1 : 0); + WREG32_SDMA(type, regSDMA_CNTL, sdma_cntl); return 0; } @@ -1642,7 +1614,7 @@ static int sdma_v4_4_2_print_iv_entry(struct amdgpu_device *adev, struct amdgpu_iv_entry *entry) { int instance; - struct amdgpu_task_info task_info; + struct amdgpu_task_info *task_info; u64 addr; instance = sdma_v4_4_2_irq_id_to_seq(entry->client_id); @@ -1654,15 +1626,19 @@ static int sdma_v4_4_2_print_iv_entry(struct amdgpu_device *adev, addr = (u64)entry->src_data[0] << 12; addr |= ((u64)entry->src_data[1] & 0xf) << 44; - memset(&task_info, 0, sizeof(struct amdgpu_task_info)); - amdgpu_vm_get_task_info(adev, entry->pasid, &task_info); - dev_dbg_ratelimited(adev->dev, - "[sdma%d] address:0x%016llx src_id:%u ring:%u vmid:%u " - "pasid:%u, for process %s pid %d thread %s pid %d\n", - instance, addr, entry->src_id, entry->ring_id, entry->vmid, - entry->pasid, task_info.process_name, task_info.tgid, - task_info.task_name, task_info.pid); + "[sdma%d] address:0x%016llx src_id:%u ring:%u vmid:%u pasid:%u\n", + instance, addr, entry->src_id, entry->ring_id, entry->vmid, + entry->pasid); + + task_info = amdgpu_vm_get_task_info_pasid(adev, entry->pasid); + if (task_info) { + dev_dbg_ratelimited(adev->dev, " for process %s pid %d thread %s pid %d\n", + task_info->process_name, task_info->tgid, + task_info->task_name, task_info->pid); + amdgpu_vm_put_task_info(task_info); + } + return 0; } @@ -1969,7 +1945,7 @@ static void sdma_v4_4_2_set_irq_funcs(struct amdgpu_device *adev) * @src_offset: src GPU address * @dst_offset: dst GPU address * @byte_count: number of bytes to xfer - * @tmz: if a secure copy should be used + * @copy_flags: copy flags for the buffers * * Copy GPU buffers using the DMA engine. * Used by the amdgpu ttm implementation to move pages if @@ -1979,11 +1955,11 @@ static void sdma_v4_4_2_emit_copy_buffer(struct amdgpu_ib *ib, uint64_t src_offset, uint64_t dst_offset, uint32_t byte_count, - bool tmz) + uint32_t copy_flags) { ib->ptr[ib->length_dw++] = SDMA_PKT_HEADER_OP(SDMA_OP_COPY) | SDMA_PKT_HEADER_SUB_OP(SDMA_SUBOP_COPY_LINEAR) | - SDMA_PKT_COPY_LINEAR_HEADER_TMZ(tmz ? 1 : 0); + SDMA_PKT_COPY_LINEAR_HEADER_TMZ((copy_flags & AMDGPU_COPY_FLAGS_TMZ) ? 1 : 0); ib->ptr[ib->length_dw++] = byte_count - 1; ib->ptr[ib->length_dw++] = 0; /* src/dst endian swap */ ib->ptr[ib->length_dw++] = lower_32_bits(src_offset); @@ -2204,9 +2180,83 @@ static const struct amdgpu_ras_block_hw_ops sdma_v4_4_2_ras_hw_ops = { .reset_ras_error_count = sdma_v4_4_2_reset_ras_error_count, }; +static int sdma_v4_4_2_aca_bank_parser(struct aca_handle *handle, struct aca_bank *bank, + enum aca_smu_type type, void *data) +{ + struct aca_bank_info info; + u64 misc0; + int ret; + + ret = aca_bank_info_decode(bank, &info); + if (ret) + return ret; + + misc0 = bank->regs[ACA_REG_IDX_MISC0]; + switch (type) { + case ACA_SMU_TYPE_UE: + ret = aca_error_cache_log_bank_error(handle, &info, ACA_ERROR_TYPE_UE, + 1ULL); + break; + case ACA_SMU_TYPE_CE: + ret = aca_error_cache_log_bank_error(handle, &info, ACA_ERROR_TYPE_CE, + ACA_REG__MISC0__ERRCNT(misc0)); + break; + default: + return -EINVAL; + } + + return ret; +} + +/* CODE_SDMA0 - CODE_SDMA4, reference to smu driver if header file */ +static int sdma_v4_4_2_err_codes[] = { 33, 34, 35, 36 }; + +static bool sdma_v4_4_2_aca_bank_is_valid(struct aca_handle *handle, struct aca_bank *bank, + enum aca_smu_type type, void *data) +{ + u32 instlo; + + instlo = ACA_REG__IPID__INSTANCEIDLO(bank->regs[ACA_REG_IDX_IPID]); + instlo &= GENMASK(31, 1); + + if (instlo != mmSMNAID_AID0_MCA_SMU) + return false; + + if (aca_bank_check_error_codes(handle->adev, bank, + sdma_v4_4_2_err_codes, + ARRAY_SIZE(sdma_v4_4_2_err_codes))) + return false; + + return true; +} + +static const struct aca_bank_ops sdma_v4_4_2_aca_bank_ops = { + .aca_bank_parser = sdma_v4_4_2_aca_bank_parser, + .aca_bank_is_valid = sdma_v4_4_2_aca_bank_is_valid, +}; + +static const struct aca_info sdma_v4_4_2_aca_info = { + .hwip = ACA_HWIP_TYPE_SMU, + .mask = ACA_ERROR_UE_MASK, + .bank_ops = &sdma_v4_4_2_aca_bank_ops, +}; + +static int sdma_v4_4_2_ras_late_init(struct amdgpu_device *adev, struct ras_common_if *ras_block) +{ + int r; + + r = amdgpu_sdma_ras_late_init(adev, ras_block); + if (r) + return r; + + return amdgpu_ras_bind_aca(adev, AMDGPU_RAS_BLOCK__SDMA, + &sdma_v4_4_2_aca_info, NULL); +} + static struct amdgpu_sdma_ras sdma_v4_4_2_ras = { .ras_block = { .hw_ops = &sdma_v4_4_2_ras_hw_ops, + .ras_late_init = sdma_v4_4_2_ras_late_init, }, }; diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v5_0.c b/drivers/gpu/drm/amd/amdgpu/sdma_v5_0.c index 3c485e5a531a..b7d33d78bce0 100644 --- a/drivers/gpu/drm/amd/amdgpu/sdma_v5_0.c +++ b/drivers/gpu/drm/amd/amdgpu/sdma_v5_0.c @@ -249,35 +249,23 @@ static int sdma_v5_0_init_microcode(struct amdgpu_device *adev) return ret; } -static unsigned sdma_v5_0_ring_init_cond_exec(struct amdgpu_ring *ring) +static unsigned sdma_v5_0_ring_init_cond_exec(struct amdgpu_ring *ring, + uint64_t addr) { unsigned ret; amdgpu_ring_write(ring, SDMA_PKT_HEADER_OP(SDMA_OP_COND_EXE)); - amdgpu_ring_write(ring, lower_32_bits(ring->cond_exe_gpu_addr)); - amdgpu_ring_write(ring, upper_32_bits(ring->cond_exe_gpu_addr)); + amdgpu_ring_write(ring, lower_32_bits(addr)); + amdgpu_ring_write(ring, upper_32_bits(addr)); amdgpu_ring_write(ring, 1); - ret = ring->wptr & ring->buf_mask;/* this is the offset we need patch later */ - amdgpu_ring_write(ring, 0x55aa55aa);/* insert dummy here and patch it later */ + /* this is the offset we need patch later */ + ret = ring->wptr & ring->buf_mask; + /* insert dummy here and patch it later */ + amdgpu_ring_write(ring, 0); return ret; } -static void sdma_v5_0_ring_patch_cond_exec(struct amdgpu_ring *ring, - unsigned offset) -{ - unsigned cur; - - BUG_ON(offset > ring->buf_mask); - BUG_ON(ring->ring[offset] != 0x55aa55aa); - - cur = (ring->wptr - 1) & ring->buf_mask; - if (cur > offset) - ring->ring[offset] = cur - offset; - else - ring->ring[offset] = (ring->buf_mask + 1) - offset + cur; -} - /** * sdma_v5_0_ring_get_rptr - get the current read pointer * @@ -1011,7 +999,8 @@ static int sdma_v5_0_ring_test_ring(struct amdgpu_ring *ring) r = amdgpu_ring_alloc(ring, 20); if (r) { DRM_ERROR("amdgpu: dma failed to lock ring %d (%d).\n", ring->idx, r); - amdgpu_device_wb_free(adev, index); + if (!ring->is_mes_queue) + amdgpu_device_wb_free(adev, index); return r; } @@ -1780,7 +1769,6 @@ static const struct amdgpu_ring_funcs sdma_v5_0_ring_funcs = { .emit_reg_wait = sdma_v5_0_ring_emit_reg_wait, .emit_reg_write_reg_wait = sdma_v5_0_ring_emit_reg_write_reg_wait, .init_cond_exec = sdma_v5_0_ring_init_cond_exec, - .patch_cond_exec = sdma_v5_0_ring_patch_cond_exec, .preempt_ib = sdma_v5_0_ring_preempt_ib, }; @@ -1818,7 +1806,7 @@ static void sdma_v5_0_set_irq_funcs(struct amdgpu_device *adev) * @src_offset: src GPU address * @dst_offset: dst GPU address * @byte_count: number of bytes to xfer - * @tmz: if a secure copy should be used + * @copy_flags: copy flags for the buffers * * Copy GPU buffers using the DMA engine (NAVI10). * Used by the amdgpu ttm implementation to move pages if @@ -1828,11 +1816,11 @@ static void sdma_v5_0_emit_copy_buffer(struct amdgpu_ib *ib, uint64_t src_offset, uint64_t dst_offset, uint32_t byte_count, - bool tmz) + uint32_t copy_flags) { ib->ptr[ib->length_dw++] = SDMA_PKT_HEADER_OP(SDMA_OP_COPY) | SDMA_PKT_HEADER_SUB_OP(SDMA_SUBOP_COPY_LINEAR) | - SDMA_PKT_COPY_LINEAR_HEADER_TMZ(tmz ? 1 : 0); + SDMA_PKT_COPY_LINEAR_HEADER_TMZ((copy_flags & AMDGPU_COPY_FLAGS_TMZ) ? 1 : 0); ib->ptr[ib->length_dw++] = byte_count - 1; ib->ptr[ib->length_dw++] = 0; /* src/dst endian swap */ ib->ptr[ib->length_dw++] = lower_32_bits(src_offset); diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v5_2.c b/drivers/gpu/drm/amd/amdgpu/sdma_v5_2.c index 0058f3f7cf6e..cc9e961f0078 100644 --- a/drivers/gpu/drm/amd/amdgpu/sdma_v5_2.c +++ b/drivers/gpu/drm/amd/amdgpu/sdma_v5_2.c @@ -89,35 +89,23 @@ static u32 sdma_v5_2_get_reg_offset(struct amdgpu_device *adev, u32 instance, u3 return base + internal_offset; } -static unsigned sdma_v5_2_ring_init_cond_exec(struct amdgpu_ring *ring) +static unsigned sdma_v5_2_ring_init_cond_exec(struct amdgpu_ring *ring, + uint64_t addr) { unsigned ret; amdgpu_ring_write(ring, SDMA_PKT_HEADER_OP(SDMA_OP_COND_EXE)); - amdgpu_ring_write(ring, lower_32_bits(ring->cond_exe_gpu_addr)); - amdgpu_ring_write(ring, upper_32_bits(ring->cond_exe_gpu_addr)); + amdgpu_ring_write(ring, lower_32_bits(addr)); + amdgpu_ring_write(ring, upper_32_bits(addr)); amdgpu_ring_write(ring, 1); - ret = ring->wptr & ring->buf_mask;/* this is the offset we need patch later */ - amdgpu_ring_write(ring, 0x55aa55aa);/* insert dummy here and patch it later */ + /* this is the offset we need patch later */ + ret = ring->wptr & ring->buf_mask; + /* insert dummy here and patch it later */ + amdgpu_ring_write(ring, 0); return ret; } -static void sdma_v5_2_ring_patch_cond_exec(struct amdgpu_ring *ring, - unsigned offset) -{ - unsigned cur; - - BUG_ON(offset > ring->buf_mask); - BUG_ON(ring->ring[offset] != 0x55aa55aa); - - cur = (ring->wptr - 1) & ring->buf_mask; - if (cur > offset) - ring->ring[offset] = cur - offset; - else - ring->ring[offset] = (ring->buf_mask + 1) - offset + cur; -} - /** * sdma_v5_2_ring_get_rptr - get the current read pointer * @@ -292,17 +280,21 @@ static void sdma_v5_2_ring_emit_hdp_flush(struct amdgpu_ring *ring) u32 ref_and_mask = 0; const struct nbio_hdp_flush_reg *nbio_hf_reg = adev->nbio.hdp_flush_reg; - ref_and_mask = nbio_hf_reg->ref_and_mask_sdma0 << ring->me; - - amdgpu_ring_write(ring, SDMA_PKT_HEADER_OP(SDMA_OP_POLL_REGMEM) | - SDMA_PKT_POLL_REGMEM_HEADER_HDP_FLUSH(1) | - SDMA_PKT_POLL_REGMEM_HEADER_FUNC(3)); /* == */ - amdgpu_ring_write(ring, (adev->nbio.funcs->get_hdp_flush_done_offset(adev)) << 2); - amdgpu_ring_write(ring, (adev->nbio.funcs->get_hdp_flush_req_offset(adev)) << 2); - amdgpu_ring_write(ring, ref_and_mask); /* reference */ - amdgpu_ring_write(ring, ref_and_mask); /* mask */ - amdgpu_ring_write(ring, SDMA_PKT_POLL_REGMEM_DW5_RETRY_COUNT(0xfff) | - SDMA_PKT_POLL_REGMEM_DW5_INTERVAL(10)); /* retry count, poll interval */ + if (ring->me > 1) { + amdgpu_asic_flush_hdp(adev, ring); + } else { + ref_and_mask = nbio_hf_reg->ref_and_mask_sdma0 << ring->me; + + amdgpu_ring_write(ring, SDMA_PKT_HEADER_OP(SDMA_OP_POLL_REGMEM) | + SDMA_PKT_POLL_REGMEM_HEADER_HDP_FLUSH(1) | + SDMA_PKT_POLL_REGMEM_HEADER_FUNC(3)); /* == */ + amdgpu_ring_write(ring, (adev->nbio.funcs->get_hdp_flush_done_offset(adev)) << 2); + amdgpu_ring_write(ring, (adev->nbio.funcs->get_hdp_flush_req_offset(adev)) << 2); + amdgpu_ring_write(ring, ref_and_mask); /* reference */ + amdgpu_ring_write(ring, ref_and_mask); /* mask */ + amdgpu_ring_write(ring, SDMA_PKT_POLL_REGMEM_DW5_RETRY_COUNT(0xfff) | + SDMA_PKT_POLL_REGMEM_DW5_INTERVAL(10)); /* retry count, poll interval */ + } } /** @@ -847,7 +839,8 @@ static int sdma_v5_2_ring_test_ring(struct amdgpu_ring *ring) r = amdgpu_ring_alloc(ring, 20); if (r) { DRM_ERROR("amdgpu: dma failed to lock ring %d (%d).\n", ring->idx, r); - amdgpu_device_wb_free(adev, index); + if (!ring->is_mes_queue) + amdgpu_device_wb_free(adev, index); return r; } @@ -1722,7 +1715,6 @@ static const struct amdgpu_ring_funcs sdma_v5_2_ring_funcs = { .emit_reg_wait = sdma_v5_2_ring_emit_reg_wait, .emit_reg_write_reg_wait = sdma_v5_2_ring_emit_reg_write_reg_wait, .init_cond_exec = sdma_v5_2_ring_init_cond_exec, - .patch_cond_exec = sdma_v5_2_ring_patch_cond_exec, .preempt_ib = sdma_v5_2_ring_preempt_ib, }; @@ -1760,7 +1752,7 @@ static void sdma_v5_2_set_irq_funcs(struct amdgpu_device *adev) * @src_offset: src GPU address * @dst_offset: dst GPU address * @byte_count: number of bytes to xfer - * @tmz: if a secure copy should be used + * @copy_flags: copy flags for the buffers * * Copy GPU buffers using the DMA engine. * Used by the amdgpu ttm implementation to move pages if @@ -1770,11 +1762,11 @@ static void sdma_v5_2_emit_copy_buffer(struct amdgpu_ib *ib, uint64_t src_offset, uint64_t dst_offset, uint32_t byte_count, - bool tmz) + uint32_t copy_flags) { ib->ptr[ib->length_dw++] = SDMA_PKT_HEADER_OP(SDMA_OP_COPY) | SDMA_PKT_HEADER_SUB_OP(SDMA_SUBOP_COPY_LINEAR) | - SDMA_PKT_COPY_LINEAR_HEADER_TMZ(tmz ? 1 : 0); + SDMA_PKT_COPY_LINEAR_HEADER_TMZ((copy_flags & AMDGPU_COPY_FLAGS_TMZ) ? 1 : 0); ib->ptr[ib->length_dw++] = byte_count - 1; ib->ptr[ib->length_dw++] = 0; /* src/dst endian swap */ ib->ptr[ib->length_dw++] = lower_32_bits(src_offset); diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v6_0.c b/drivers/gpu/drm/amd/amdgpu/sdma_v6_0.c index 3c7ddd219de8..c833b6b8373b 100644 --- a/drivers/gpu/drm/amd/amdgpu/sdma_v6_0.c +++ b/drivers/gpu/drm/amd/amdgpu/sdma_v6_0.c @@ -49,6 +49,7 @@ MODULE_FIRMWARE("amdgpu/sdma_6_0_1.bin"); MODULE_FIRMWARE("amdgpu/sdma_6_0_2.bin"); MODULE_FIRMWARE("amdgpu/sdma_6_0_3.bin"); MODULE_FIRMWARE("amdgpu/sdma_6_1_0.bin"); +MODULE_FIRMWARE("amdgpu/sdma_6_1_1.bin"); #define SDMA1_REG_OFFSET 0x600 #define SDMA0_HYP_DEC_REG_START 0x5880 @@ -79,35 +80,23 @@ static u32 sdma_v6_0_get_reg_offset(struct amdgpu_device *adev, u32 instance, u3 return base + internal_offset; } -static unsigned sdma_v6_0_ring_init_cond_exec(struct amdgpu_ring *ring) +static unsigned sdma_v6_0_ring_init_cond_exec(struct amdgpu_ring *ring, + uint64_t addr) { unsigned ret; amdgpu_ring_write(ring, SDMA_PKT_COPY_LINEAR_HEADER_OP(SDMA_OP_COND_EXE)); - amdgpu_ring_write(ring, lower_32_bits(ring->cond_exe_gpu_addr)); - amdgpu_ring_write(ring, upper_32_bits(ring->cond_exe_gpu_addr)); + amdgpu_ring_write(ring, lower_32_bits(addr)); + amdgpu_ring_write(ring, upper_32_bits(addr)); amdgpu_ring_write(ring, 1); - ret = ring->wptr & ring->buf_mask;/* this is the offset we need patch later */ - amdgpu_ring_write(ring, 0x55aa55aa);/* insert dummy here and patch it later */ + /* this is the offset we need patch later */ + ret = ring->wptr & ring->buf_mask; + /* insert dummy here and patch it later */ + amdgpu_ring_write(ring, 0); return ret; } -static void sdma_v6_0_ring_patch_cond_exec(struct amdgpu_ring *ring, - unsigned offset) -{ - unsigned cur; - - BUG_ON(offset > ring->buf_mask); - BUG_ON(ring->ring[offset] != 0x55aa55aa); - - cur = (ring->wptr - 1) & ring->buf_mask; - if (cur > offset) - ring->ring[offset] = cur - offset; - else - ring->ring[offset] = (ring->buf_mask + 1) - offset + cur; -} - /** * sdma_v6_0_ring_get_rptr - get the current read pointer * @@ -518,6 +507,13 @@ static int sdma_v6_0_gfx_resume(struct amdgpu_device *adev) /* set minor_ptr_update to 0 after wptr programed */ WREG32_SOC15_IP(GC, sdma_v6_0_get_reg_offset(adev, i, regSDMA0_QUEUE0_MINOR_PTR_UPDATE), 0); + /* Set up sdma hang watchdog */ + temp = RREG32_SOC15_IP(GC, sdma_v6_0_get_reg_offset(adev, i, regSDMA0_WATCHDOG_CNTL)); + /* 100ms per unit */ + temp = REG_SET_FIELD(temp, SDMA0_WATCHDOG_CNTL, QUEUE_HANG_COUNT, + max(adev->usec_timeout/100000, 1)); + WREG32_SOC15_IP(GC, sdma_v6_0_get_reg_offset(adev, i, regSDMA0_WATCHDOG_CNTL), temp); + /* Set up RESP_MODE to non-copy addresses */ temp = RREG32_SOC15_IP(GC, sdma_v6_0_get_reg_offset(adev, i, regSDMA0_UTCL1_CNTL)); temp = REG_SET_FIELD(temp, SDMA0_UTCL1_CNTL, RESP_MODE, 3); @@ -865,7 +861,8 @@ static int sdma_v6_0_ring_test_ring(struct amdgpu_ring *ring) r = amdgpu_ring_alloc(ring, 5); if (r) { DRM_ERROR("amdgpu: dma failed to lock ring %d (%d).\n", ring->idx, r); - amdgpu_device_wb_free(adev, index); + if (!ring->is_mes_queue) + amdgpu_device_wb_free(adev, index); return r; } @@ -1541,7 +1538,6 @@ static const struct amdgpu_ring_funcs sdma_v6_0_ring_funcs = { .emit_reg_wait = sdma_v6_0_ring_emit_reg_wait, .emit_reg_write_reg_wait = sdma_v6_0_ring_emit_reg_write_reg_wait, .init_cond_exec = sdma_v6_0_ring_init_cond_exec, - .patch_cond_exec = sdma_v6_0_ring_patch_cond_exec, .preempt_ib = sdma_v6_0_ring_preempt_ib, }; @@ -1579,7 +1575,7 @@ static void sdma_v6_0_set_irq_funcs(struct amdgpu_device *adev) * @src_offset: src GPU address * @dst_offset: dst GPU address * @byte_count: number of bytes to xfer - * @tmz: if a secure copy should be used + * @copy_flags: copy flags for the buffers * * Copy GPU buffers using the DMA engine. * Used by the amdgpu ttm implementation to move pages if @@ -1589,11 +1585,11 @@ static void sdma_v6_0_emit_copy_buffer(struct amdgpu_ib *ib, uint64_t src_offset, uint64_t dst_offset, uint32_t byte_count, - bool tmz) + uint32_t copy_flags) { ib->ptr[ib->length_dw++] = SDMA_PKT_COPY_LINEAR_HEADER_OP(SDMA_OP_COPY) | SDMA_PKT_COPY_LINEAR_HEADER_SUB_OP(SDMA_SUBOP_COPY_LINEAR) | - SDMA_PKT_COPY_LINEAR_HEADER_TMZ(tmz ? 1 : 0); + SDMA_PKT_COPY_LINEAR_HEADER_TMZ((copy_flags & AMDGPU_COPY_FLAGS_TMZ) ? 1 : 0); ib->ptr[ib->length_dw++] = byte_count - 1; ib->ptr[ib->length_dw++] = 0; /* src/dst endian swap */ ib->ptr[ib->length_dw++] = lower_32_bits(src_offset); diff --git a/drivers/gpu/drm/amd/amdgpu/si.c b/drivers/gpu/drm/amd/amdgpu/si.c index a757526153e5..85235470e872 100644 --- a/drivers/gpu/drm/amd/amdgpu/si.c +++ b/drivers/gpu/drm/amd/amdgpu/si.c @@ -1409,9 +1409,9 @@ static int si_gpu_pci_config_reset(struct amdgpu_device *adev) return r; } -static bool si_asic_supports_baco(struct amdgpu_device *adev) +static int si_asic_supports_baco(struct amdgpu_device *adev) { - return false; + return 0; } static enum amd_reset_method @@ -2331,28 +2331,18 @@ static void si_pcie_gen3_enable(struct amdgpu_device *adev) gpu_cfg & PCI_EXP_LNKCTL_HAWD); - pcie_capability_read_word(root, PCI_EXP_LNKCTL2, - &tmp16); - tmp16 &= ~(PCI_EXP_LNKCTL2_ENTER_COMP | - PCI_EXP_LNKCTL2_TX_MARGIN); - tmp16 |= (bridge_cfg2 & - (PCI_EXP_LNKCTL2_ENTER_COMP | - PCI_EXP_LNKCTL2_TX_MARGIN)); - pcie_capability_write_word(root, - PCI_EXP_LNKCTL2, - tmp16); - - pcie_capability_read_word(adev->pdev, - PCI_EXP_LNKCTL2, - &tmp16); - tmp16 &= ~(PCI_EXP_LNKCTL2_ENTER_COMP | - PCI_EXP_LNKCTL2_TX_MARGIN); - tmp16 |= (gpu_cfg2 & - (PCI_EXP_LNKCTL2_ENTER_COMP | - PCI_EXP_LNKCTL2_TX_MARGIN)); - pcie_capability_write_word(adev->pdev, - PCI_EXP_LNKCTL2, - tmp16); + pcie_capability_clear_and_set_word(root, PCI_EXP_LNKCTL2, + PCI_EXP_LNKCTL2_ENTER_COMP | + PCI_EXP_LNKCTL2_TX_MARGIN, + bridge_cfg2 & + (PCI_EXP_LNKCTL2_ENTER_COMP | + PCI_EXP_LNKCTL2_TX_MARGIN)); + pcie_capability_clear_and_set_word(adev->pdev, PCI_EXP_LNKCTL2, + PCI_EXP_LNKCTL2_ENTER_COMP | + PCI_EXP_LNKCTL2_TX_MARGIN, + gpu_cfg2 & + (PCI_EXP_LNKCTL2_ENTER_COMP | + PCI_EXP_LNKCTL2_TX_MARGIN)); tmp = RREG32_PCIE_PORT(PCIE_LC_CNTL4); tmp &= ~LC_SET_QUIESCE; @@ -2365,16 +2355,15 @@ static void si_pcie_gen3_enable(struct amdgpu_device *adev) speed_cntl &= ~LC_FORCE_DIS_SW_SPEED_CHANGE; WREG32_PCIE_PORT(PCIE_LC_SPEED_CNTL, speed_cntl); - pcie_capability_read_word(adev->pdev, PCI_EXP_LNKCTL2, &tmp16); - tmp16 &= ~PCI_EXP_LNKCTL2_TLS; - + tmp16 = 0; if (adev->pm.pcie_gen_mask & CAIL_PCIE_LINK_SPEED_SUPPORT_GEN3) tmp16 |= PCI_EXP_LNKCTL2_TLS_8_0GT; /* gen3 */ else if (adev->pm.pcie_gen_mask & CAIL_PCIE_LINK_SPEED_SUPPORT_GEN2) tmp16 |= PCI_EXP_LNKCTL2_TLS_5_0GT; /* gen2 */ else tmp16 |= PCI_EXP_LNKCTL2_TLS_2_5GT; /* gen1 */ - pcie_capability_write_word(adev->pdev, PCI_EXP_LNKCTL2, tmp16); + pcie_capability_clear_and_set_word(adev->pdev, PCI_EXP_LNKCTL2, + PCI_EXP_LNKCTL2_TLS, tmp16); speed_cntl = RREG32_PCIE_PORT(PCIE_LC_SPEED_CNTL); speed_cntl |= LC_INITIATE_LINK_SPEED_CHANGE; @@ -2717,6 +2706,8 @@ static const struct amd_ip_funcs si_common_ip_funcs = { .soft_reset = si_common_soft_reset, .set_clockgating_state = si_common_set_clockgating_state, .set_powergating_state = si_common_set_powergating_state, + .dump_ip_state = NULL, + .print_ip_state = NULL, }; static const struct amdgpu_ip_block_version si_common_ip_block = diff --git a/drivers/gpu/drm/amd/amdgpu/si_dma.c b/drivers/gpu/drm/amd/amdgpu/si_dma.c index 9aa0e11ee673..11db5b755832 100644 --- a/drivers/gpu/drm/amd/amdgpu/si_dma.c +++ b/drivers/gpu/drm/amd/amdgpu/si_dma.c @@ -708,6 +708,8 @@ static const struct amd_ip_funcs si_dma_ip_funcs = { .soft_reset = si_dma_soft_reset, .set_clockgating_state = si_dma_set_clockgating_state, .set_powergating_state = si_dma_set_powergating_state, + .dump_ip_state = NULL, + .print_ip_state = NULL, }; static const struct amdgpu_ring_funcs si_dma_ring_funcs = { @@ -761,7 +763,7 @@ static void si_dma_set_irq_funcs(struct amdgpu_device *adev) * @src_offset: src GPU address * @dst_offset: dst GPU address * @byte_count: number of bytes to xfer - * @tmz: is this a secure operation + * @copy_flags: unused * * Copy GPU buffers using the DMA engine (VI). * Used by the amdgpu ttm implementation to move pages if @@ -771,7 +773,7 @@ static void si_dma_emit_copy_buffer(struct amdgpu_ib *ib, uint64_t src_offset, uint64_t dst_offset, uint32_t byte_count, - bool tmz) + uint32_t copy_flags) { ib->ptr[ib->length_dw++] = DMA_PACKET(DMA_PACKET_COPY, 1, 0, 0, byte_count); diff --git a/drivers/gpu/drm/amd/amdgpu/si_ih.c b/drivers/gpu/drm/amd/amdgpu/si_ih.c index cada9f300a7f..5237395e4fab 100644 --- a/drivers/gpu/drm/amd/amdgpu/si_ih.c +++ b/drivers/gpu/drm/amd/amdgpu/si_ih.c @@ -296,6 +296,8 @@ static const struct amd_ip_funcs si_ih_ip_funcs = { .soft_reset = si_ih_soft_reset, .set_clockgating_state = si_ih_set_clockgating_state, .set_powergating_state = si_ih_set_powergating_state, + .dump_ip_state = NULL, + .print_ip_state = NULL, }; static const struct amdgpu_ih_funcs si_ih_funcs = { diff --git a/drivers/gpu/drm/amd/amdgpu/sienna_cichlid.c b/drivers/gpu/drm/amd/amdgpu/sienna_cichlid.c index 93f6772d1b24..481217c32d85 100644 --- a/drivers/gpu/drm/amd/amdgpu/sienna_cichlid.c +++ b/drivers/gpu/drm/amd/amdgpu/sienna_cichlid.c @@ -92,7 +92,7 @@ static int sienna_cichlid_mode2_suspend_ip(struct amdgpu_device *adev) adev->ip_blocks[i].status.hw = false; } - return r; + return 0; } static int diff --git a/drivers/gpu/drm/amd/amdgpu/smuio_v14_0_2.c b/drivers/gpu/drm/amd/amdgpu/smuio_v14_0_2.c new file mode 100644 index 000000000000..2a51a70d4846 --- /dev/null +++ b/drivers/gpu/drm/amd/amdgpu/smuio_v14_0_2.c @@ -0,0 +1,62 @@ +/* + * Copyright 2023 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + */ +#include "amdgpu.h" +#include "smuio_v14_0_2.h" +#include "smuio/smuio_14_0_2_offset.h" +#include "smuio/smuio_14_0_2_sh_mask.h" +#include <linux/preempt.h> + +static u32 smuio_v14_0_2_get_rom_index_offset(struct amdgpu_device *adev) +{ + return SOC15_REG_OFFSET(SMUIO, 0, regROM_INDEX); +} + +static u32 smuio_v14_0_2_get_rom_data_offset(struct amdgpu_device *adev) +{ + return SOC15_REG_OFFSET(SMUIO, 0, regROM_DATA); +} + +static u64 smuio_v14_0_2_get_gpu_clock_counter(struct amdgpu_device *adev) +{ + u64 clock; + u64 clock_counter_lo, clock_counter_hi_pre, clock_counter_hi_after; + + preempt_disable(); + clock_counter_hi_pre = (u64)RREG32_SOC15(SMUIO, 0, regGOLDEN_TSC_COUNT_UPPER); + clock_counter_lo = (u64)RREG32_SOC15(SMUIO, 0, regGOLDEN_TSC_COUNT_LOWER); + /* the clock counter may be udpated during polling the counters */ + clock_counter_hi_after = (u64)RREG32_SOC15(SMUIO, 0, regGOLDEN_TSC_COUNT_UPPER); + if (clock_counter_hi_pre != clock_counter_hi_after) + clock_counter_lo = (u64)RREG32_SOC15(SMUIO, 0, regGOLDEN_TSC_COUNT_LOWER); + preempt_enable(); + + clock = clock_counter_lo | (clock_counter_hi_after << 32ULL); + + return clock; +} + +const struct amdgpu_smuio_funcs smuio_v14_0_2_funcs = { + .get_rom_index_offset = smuio_v14_0_2_get_rom_index_offset, + .get_rom_data_offset = smuio_v14_0_2_get_rom_data_offset, + .get_gpu_clock_counter = smuio_v14_0_2_get_gpu_clock_counter, +}; diff --git a/drivers/gpu/drm/amd/amdgpu/smuio_v14_0_2.h b/drivers/gpu/drm/amd/amdgpu/smuio_v14_0_2.h new file mode 100644 index 000000000000..6e617f832d90 --- /dev/null +++ b/drivers/gpu/drm/amd/amdgpu/smuio_v14_0_2.h @@ -0,0 +1,30 @@ +/* + * Copyright 2023 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + */ +#ifndef __SMUIO_V14_0_2_H__ +#define __SMUIO_V14_0_2_H__ + +#include "soc15_common.h" + +extern const struct amdgpu_smuio_funcs smuio_v14_0_2_funcs; + +#endif /* __SMUIO_V14_0_2_H__ */ diff --git a/drivers/gpu/drm/amd/amdgpu/soc15.c b/drivers/gpu/drm/amd/amdgpu/soc15.c index 1c614451dead..170f02e96717 100644 --- a/drivers/gpu/drm/amd/amdgpu/soc15.c +++ b/drivers/gpu/drm/amd/amdgpu/soc15.c @@ -143,7 +143,7 @@ static const struct amdgpu_video_codec_info rn_video_codecs_decode_array[] = {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_MPEG4_AVC, 4096, 4096, 52)}, {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_VC1, 4096, 4096, 4)}, {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_HEVC, 8192, 4352, 186)}, - {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_JPEG, 4096, 4096, 0)}, + {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_JPEG, 16384, 16384, 0)}, {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_VP9, 8192, 4352, 0)}, }; @@ -156,7 +156,7 @@ static const struct amdgpu_video_codecs rn_video_codecs_decode = static const struct amdgpu_video_codec_info vcn_4_0_3_video_codecs_decode_array[] = { {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_MPEG4_AVC, 4096, 4096, 52)}, {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_HEVC, 8192, 4352, 186)}, - {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_JPEG, 4096, 4096, 0)}, + {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_JPEG, 16384, 16384, 0)}, {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_VP9, 8192, 4352, 0)}, {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_AV1, 8192, 4352, 0)}, }; @@ -502,7 +502,7 @@ static int soc15_asic_baco_reset(struct amdgpu_device *adev) static enum amd_reset_method soc15_asic_reset_method(struct amdgpu_device *adev) { - bool baco_reset = false; + int baco_reset = 0; bool connected_to_cpu = false; struct amdgpu_ras *ras = amdgpu_ras_get_context(adev); @@ -540,7 +540,7 @@ soc15_asic_reset_method(struct amdgpu_device *adev) */ if (ras && adev->ras_enabled && adev->pm.fw_version <= 0x283400) - baco_reset = false; + baco_reset = 0; } else { baco_reset = amdgpu_dpm_is_baco_supported(adev); } @@ -620,7 +620,7 @@ static int soc15_asic_reset(struct amdgpu_device *adev) } } -static bool soc15_supports_baco(struct amdgpu_device *adev) +static int soc15_supports_baco(struct amdgpu_device *adev) { switch (amdgpu_ip_version(adev, MP1_HWIP, 0)) { case IP_VERSION(9, 0, 0): @@ -628,13 +628,13 @@ static bool soc15_supports_baco(struct amdgpu_device *adev) if (adev->asic_type == CHIP_VEGA20) { if (adev->psp.sos.fw_version >= 0x80067) return amdgpu_dpm_is_baco_supported(adev); - return false; + return 0; } else { return amdgpu_dpm_is_baco_supported(adev); } break; default: - return false; + return 0; } } @@ -918,7 +918,6 @@ static const struct amdgpu_asic_funcs aqua_vanjaram_asic_funcs = .get_config_memsize = &soc15_get_config_memsize, .need_full_reset = &soc15_need_full_reset, .init_doorbell_index = &aqua_vanjaram_doorbell_index_init, - .get_pcie_usage = &amdgpu_nbio_get_pcie_usage, .need_reset_on_init = &soc15_need_reset_on_init, .get_pcie_replay_count = &amdgpu_nbio_get_pcie_replay_count, .supports_baco = &soc15_supports_baco, @@ -1301,7 +1300,8 @@ static int soc15_common_hw_fini(void *handle) if (amdgpu_sriov_vf(adev)) xgpu_ai_mailbox_put_irq(adev); - if (adev->nbio.ras_if && + if ((!amdgpu_sriov_vf(adev)) && + adev->nbio.ras_if && amdgpu_ras_is_supported(adev, adev->nbio.ras_if->block)) { if (adev->nbio.ras && adev->nbio.ras->init_ras_controller_interrupt) @@ -1501,4 +1501,6 @@ static const struct amd_ip_funcs soc15_common_ip_funcs = { .set_clockgating_state = soc15_common_set_clockgating_state, .set_powergating_state = soc15_common_set_powergating_state, .get_clockgating_state= soc15_common_get_clockgating_state, + .dump_ip_state = NULL, + .print_ip_state = NULL, }; diff --git a/drivers/gpu/drm/amd/amdgpu/soc15.h b/drivers/gpu/drm/amd/amdgpu/soc15.h index 1444b7765e4b..282584a48be0 100644 --- a/drivers/gpu/drm/amd/amdgpu/soc15.h +++ b/drivers/gpu/drm/amd/amdgpu/soc15.h @@ -88,6 +88,8 @@ struct soc15_ras_field_entry { }; #define SOC15_REG_ENTRY(ip, inst, reg) ip##_HWIP, inst, reg##_BASE_IDX, reg +#define SOC15_REG_ENTRY_STR(ip, inst, reg) \ + { ip##_HWIP, inst, reg##_BASE_IDX, reg, #reg } #define SOC15_REG_ENTRY_OFFSET(entry) (adev->reg_offset[entry.hwip][entry.inst][entry.seg] + entry.reg_offset) diff --git a/drivers/gpu/drm/amd/amdgpu/soc21.c b/drivers/gpu/drm/amd/amdgpu/soc21.c index 4d7188912edf..fb6797467571 100644 --- a/drivers/gpu/drm/amd/amdgpu/soc21.c +++ b/drivers/gpu/drm/amd/amdgpu/soc21.c @@ -72,7 +72,7 @@ static const struct amdgpu_video_codecs vcn_4_0_0_video_codecs_encode_vcn1 = { static const struct amdgpu_video_codec_info vcn_4_0_0_video_codecs_decode_array_vcn0[] = { {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_MPEG4_AVC, 4096, 4096, 52)}, {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_HEVC, 8192, 4352, 186)}, - {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_JPEG, 4096, 4096, 0)}, + {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_JPEG, 16384, 16384, 0)}, {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_VP9, 8192, 4352, 0)}, {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_AV1, 8192, 4352, 0)}, }; @@ -80,7 +80,7 @@ static const struct amdgpu_video_codec_info vcn_4_0_0_video_codecs_decode_array_ static const struct amdgpu_video_codec_info vcn_4_0_0_video_codecs_decode_array_vcn1[] = { {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_MPEG4_AVC, 4096, 4096, 52)}, {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_HEVC, 8192, 4352, 186)}, - {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_JPEG, 4096, 4096, 0)}, + {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_JPEG, 16384, 16384, 0)}, {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_VP9, 8192, 4352, 0)}, }; @@ -185,6 +185,12 @@ static int soc21_query_video_codecs(struct amdgpu_device *adev, bool encode, } } return 0; + case IP_VERSION(4, 0, 6): + if (encode) + *codecs = &vcn_4_0_0_video_codecs_encode_vcn0; + else + *codecs = &vcn_4_0_0_video_codecs_decode_vcn0; + return 0; default: return -EINVAL; } @@ -382,6 +388,7 @@ soc21_asic_reset_method(struct amdgpu_device *adev) case IP_VERSION(13, 0, 4): case IP_VERSION(13, 0, 11): case IP_VERSION(14, 0, 0): + case IP_VERSION(14, 0, 1): return AMD_RESET_METHOD_MODE2; default: if (amdgpu_dpm_is_baco_supported(adev)) @@ -450,10 +457,8 @@ static bool soc21_need_full_reset(struct amdgpu_device *adev) { switch (amdgpu_ip_version(adev, GC_HWIP, 0)) { case IP_VERSION(11, 0, 0): - return amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__UMC); case IP_VERSION(11, 0, 2): case IP_VERSION(11, 0, 3): - return false; default: return true; } @@ -711,10 +716,43 @@ static int soc21_common_early_init(void *handle) AMD_CG_SUPPORT_BIF_MGCG | AMD_CG_SUPPORT_BIF_LS; adev->pg_flags = AMD_PG_SUPPORT_VCN_DPG | + AMD_PG_SUPPORT_JPEG_DPG | AMD_PG_SUPPORT_VCN | AMD_PG_SUPPORT_JPEG | AMD_PG_SUPPORT_GFX_PG; - adev->external_rev_id = adev->rev_id + 0x1; + if (adev->rev_id == 0) + adev->external_rev_id = 0x1; + else + adev->external_rev_id = adev->rev_id + 0x10; + break; + case IP_VERSION(11, 5, 1): + adev->cg_flags = + AMD_CG_SUPPORT_GFX_CGCG | + AMD_CG_SUPPORT_GFX_CGLS | + AMD_CG_SUPPORT_GFX_MGCG | + AMD_CG_SUPPORT_GFX_FGCG | + AMD_CG_SUPPORT_REPEATER_FGCG | + AMD_CG_SUPPORT_GFX_PERF_CLK | + AMD_CG_SUPPORT_GFX_3D_CGCG | + AMD_CG_SUPPORT_GFX_3D_CGLS | + AMD_CG_SUPPORT_MC_MGCG | + AMD_CG_SUPPORT_MC_LS | + AMD_CG_SUPPORT_HDP_LS | + AMD_CG_SUPPORT_HDP_DS | + AMD_CG_SUPPORT_HDP_SD | + AMD_CG_SUPPORT_ATHUB_MGCG | + AMD_CG_SUPPORT_ATHUB_LS | + AMD_CG_SUPPORT_IH_CG | + AMD_CG_SUPPORT_BIF_MGCG | + AMD_CG_SUPPORT_BIF_LS | + AMD_CG_SUPPORT_VCN_MGCG | + AMD_CG_SUPPORT_JPEG_MGCG; + adev->pg_flags = + AMD_PG_SUPPORT_GFX_PG | + AMD_PG_SUPPORT_VCN | + AMD_PG_SUPPORT_VCN_DPG | + AMD_PG_SUPPORT_JPEG; + adev->external_rev_id = adev->rev_id + 0xc1; break; default: /* FIXME: not supported yet */ @@ -832,10 +870,35 @@ static int soc21_common_suspend(void *handle) return soc21_common_hw_fini(adev); } +static bool soc21_need_reset_on_resume(struct amdgpu_device *adev) +{ + u32 sol_reg1, sol_reg2; + + /* Will reset for the following suspend abort cases. + * 1) Only reset dGPU side. + * 2) S3 suspend got aborted and TOS is active. + */ + if (!(adev->flags & AMD_IS_APU) && adev->in_s3 && + !adev->suspend_complete) { + sol_reg1 = RREG32_SOC15(MP0, 0, regMP0_SMN_C2PMSG_81); + msleep(100); + sol_reg2 = RREG32_SOC15(MP0, 0, regMP0_SMN_C2PMSG_81); + + return (sol_reg1 != sol_reg2); + } + + return false; +} + static int soc21_common_resume(void *handle) { struct amdgpu_device *adev = (struct amdgpu_device *)handle; + if (soc21_need_reset_on_resume(adev)) { + dev_info(adev->dev, "S3 suspend aborted, resetting..."); + soc21_asic_reset(adev); + } + return soc21_common_hw_init(adev); } @@ -865,6 +928,7 @@ static int soc21_common_set_clockgating_state(void *handle, case IP_VERSION(7, 7, 0): case IP_VERSION(7, 7, 1): case IP_VERSION(7, 11, 0): + case IP_VERSION(7, 11, 1): adev->nbio.funcs->update_medium_grain_clock_gating(adev, state == AMD_CG_STATE_GATE); adev->nbio.funcs->update_medium_grain_light_sleep(adev, @@ -921,4 +985,6 @@ static const struct amd_ip_funcs soc21_common_ip_funcs = { .set_clockgating_state = soc21_common_set_clockgating_state, .set_powergating_state = soc21_common_set_powergating_state, .get_clockgating_state = soc21_common_get_clockgating_state, + .dump_ip_state = NULL, + .print_ip_state = NULL, }; diff --git a/drivers/gpu/drm/amd/amdgpu/ta_ras_if.h b/drivers/gpu/drm/amd/amdgpu/ta_ras_if.h index 879bb7af297c..3ac56a9645eb 100644 --- a/drivers/gpu/drm/amd/amdgpu/ta_ras_if.h +++ b/drivers/gpu/drm/amd/amdgpu/ta_ras_if.h @@ -36,6 +36,9 @@ enum ras_command { TA_RAS_COMMAND__ENABLE_FEATURES = 0, TA_RAS_COMMAND__DISABLE_FEATURES, TA_RAS_COMMAND__TRIGGER_ERROR, + TA_RAS_COMMAND__QUERY_BLOCK_INFO, + TA_RAS_COMMAND__QUERY_SUB_BLOCK_INFO, + TA_RAS_COMMAND__QUERY_ADDRESS, }; enum ta_ras_status { @@ -105,6 +108,11 @@ enum ta_ras_error_type { TA_RAS_ERROR__POISON = 8, }; +enum ta_ras_address_type { + TA_RAS_MCA_TO_PA, + TA_RAS_PA_TO_MCA, +}; + /* Input/output structures for RAS commands */ /**********************************************************/ @@ -133,12 +141,39 @@ struct ta_ras_init_flags { uint8_t channel_dis_num; }; +struct ta_ras_mca_addr { + uint64_t err_addr; + uint32_t ch_inst; + uint32_t umc_inst; + uint32_t node_inst; + uint32_t socket_id; +}; + +struct ta_ras_phy_addr { + uint64_t pa; + uint32_t bank; + uint32_t channel_idx; +}; + +struct ta_ras_query_address_input { + enum ta_ras_address_type addr_type; + struct ta_ras_mca_addr ma; + struct ta_ras_phy_addr pa; +}; + struct ta_ras_output_flags { uint8_t ras_init_success_flag; uint8_t err_inject_switch_disable_flag; uint8_t reg_access_failure_flag; }; +struct ta_ras_query_address_output { + /* don't use the flags here */ + struct ta_ras_output_flags flags; + struct ta_ras_mca_addr ma; + struct ta_ras_phy_addr pa; +}; + /* Common input structure for RAS callbacks */ /**********************************************************/ union ta_ras_cmd_input { @@ -146,12 +181,14 @@ union ta_ras_cmd_input { struct ta_ras_enable_features_input enable_features; struct ta_ras_disable_features_input disable_features; struct ta_ras_trigger_error_input trigger_error; + struct ta_ras_query_address_input address; uint32_t reserve_pad[256]; }; union ta_ras_cmd_output { struct ta_ras_output_flags flags; + struct ta_ras_query_address_output address; uint32_t reserve_pad[256]; }; diff --git a/drivers/gpu/drm/amd/amdgpu/tonga_ih.c b/drivers/gpu/drm/amd/amdgpu/tonga_ih.c index 450b6e831509..24d49d813607 100644 --- a/drivers/gpu/drm/amd/amdgpu/tonga_ih.c +++ b/drivers/gpu/drm/amd/amdgpu/tonga_ih.c @@ -486,6 +486,8 @@ static const struct amd_ip_funcs tonga_ih_ip_funcs = { .post_soft_reset = tonga_ih_post_soft_reset, .set_clockgating_state = tonga_ih_set_clockgating_state, .set_powergating_state = tonga_ih_set_powergating_state, + .dump_ip_state = NULL, + .print_ip_state = NULL, }; static const struct amdgpu_ih_funcs tonga_ih_funcs = { diff --git a/drivers/gpu/drm/amd/amdgpu/umc_v12_0.c b/drivers/gpu/drm/amd/amdgpu/umc_v12_0.c index 7458a218e89d..bfe61d86ee6c 100644 --- a/drivers/gpu/drm/amd/amdgpu/umc_v12_0.c +++ b/drivers/gpu/drm/amd/amdgpu/umc_v12_0.c @@ -28,27 +28,7 @@ #include "umc/umc_12_0_0_sh_mask.h" #include "mp/mp_13_0_6_sh_mask.h" -const uint32_t - umc_v12_0_channel_idx_tbl[] - [UMC_V12_0_UMC_INSTANCE_NUM] - [UMC_V12_0_CHANNEL_INSTANCE_NUM] = { - {{3, 7, 11, 15, 2, 6, 10, 14}, {1, 5, 9, 13, 0, 4, 8, 12}, - {19, 23, 27, 31, 18, 22, 26, 30}, {17, 21, 25, 29, 16, 20, 24, 28}}, - {{47, 43, 39, 35, 46, 42, 38, 34}, {45, 41, 37, 33, 44, 40, 36, 32}, - {63, 59, 55, 51, 62, 58, 54, 50}, {61, 57, 53, 49, 60, 56, 52, 48}}, - {{79, 75, 71, 67, 78, 74, 70, 66}, {77, 73, 69, 65, 76, 72, 68, 64}, - {95, 91, 87, 83, 94, 90, 86, 82}, {93, 89, 85, 81, 92, 88, 84, 80}}, - {{99, 103, 107, 111, 98, 102, 106, 110}, {97, 101, 105, 109, 96, 100, 104, 108}, - {115, 119, 123, 127, 114, 118, 122, 126}, {113, 117, 121, 125, 112, 116, 120, 124}} - }; - -/* mapping of MCA error address to normalized address */ -static const uint32_t umc_v12_0_ma2na_mapping[] = { - 0, 5, 6, 8, 9, 14, 12, 13, - 10, 11, 15, 16, 17, 18, 19, 20, - 21, 22, 23, 24, 25, 26, 27, 28, - 24, 7, 29, 30, -}; +#define MAX_ECC_NUM_PER_RETIREMENT 32 static inline uint64_t get_umc_v12_0_reg_offset(struct amdgpu_device *adev, uint32_t node_inst, @@ -89,12 +69,28 @@ static void umc_v12_0_reset_error_count(struct amdgpu_device *adev) umc_v12_0_reset_error_count_per_channel, NULL); } +bool umc_v12_0_is_deferred_error(struct amdgpu_device *adev, uint64_t mc_umc_status) +{ + dev_info(adev->dev, + "MCA_UMC_STATUS(0x%llx): Val:%llu, Poison:%llu, Deferred:%llu, PCC:%llu, UC:%llu, TCC:%llu\n", + mc_umc_status, + REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, Val), + REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, Poison), + REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, Deferred), + REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, PCC), + REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, UC), + REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, TCC) + ); + + return (amdgpu_ras_is_poison_mode_supported(adev) && + (REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, Val) == 1) && + (REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, Deferred) == 1)); +} + bool umc_v12_0_is_uncorrectable_error(struct amdgpu_device *adev, uint64_t mc_umc_status) { - if (amdgpu_ras_is_poison_mode_supported(adev) && - (REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, Val) == 1) && - (REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, Deferred) == 1)) - return true; + if (umc_v12_0_is_deferred_error(adev, mc_umc_status)) + return false; return ((REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, Val) == 1) && (REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, PCC) == 1 || @@ -104,9 +100,7 @@ bool umc_v12_0_is_uncorrectable_error(struct amdgpu_device *adev, uint64_t mc_um bool umc_v12_0_is_correctable_error(struct amdgpu_device *adev, uint64_t mc_umc_status) { - if (amdgpu_ras_is_poison_mode_supported(adev) && - (REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, Val) == 1) && - (REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, Deferred) == 1)) + if (umc_v12_0_is_deferred_error(adev, mc_umc_status)) return false; return (REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, Val) == 1 && @@ -119,9 +113,10 @@ bool umc_v12_0_is_correctable_error(struct amdgpu_device *adev, uint64_t mc_umc_ !(umc_v12_0_is_uncorrectable_error(adev, mc_umc_status))))); } -static void umc_v12_0_query_correctable_error_count(struct amdgpu_device *adev, +static void umc_v12_0_query_error_count_per_type(struct amdgpu_device *adev, uint64_t umc_reg_offset, - unsigned long *error_count) + unsigned long *error_count, + check_error_type_func error_type_func) { uint64_t mc_umc_status; uint64_t mc_umc_status_addr; @@ -129,31 +124,11 @@ static void umc_v12_0_query_correctable_error_count(struct amdgpu_device *adev, mc_umc_status_addr = SOC15_REG_OFFSET(UMC, 0, regMCA_UMC_UMC0_MCUMC_STATUST0); - /* Rely on MCUMC_STATUS for correctable error counter - * MCUMC_STATUS is a 64 bit register - */ + /* Check MCUMC_STATUS */ mc_umc_status = RREG64_PCIE_EXT((mc_umc_status_addr + umc_reg_offset) * 4); - if (umc_v12_0_is_correctable_error(adev, mc_umc_status)) - *error_count += 1; -} - -static void umc_v12_0_query_uncorrectable_error_count(struct amdgpu_device *adev, - uint64_t umc_reg_offset, - unsigned long *error_count) -{ - uint64_t mc_umc_status; - uint64_t mc_umc_status_addr; - - mc_umc_status_addr = - SOC15_REG_OFFSET(UMC, 0, regMCA_UMC_UMC0_MCUMC_STATUST0); - - /* Check the MCUMC_STATUS. */ - mc_umc_status = - RREG64_PCIE_EXT((mc_umc_status_addr + umc_reg_offset) * 4); - - if (umc_v12_0_is_uncorrectable_error(adev, mc_umc_status)) + if (error_type_func(adev, mc_umc_status)) *error_count += 1; } @@ -162,7 +137,7 @@ static int umc_v12_0_query_error_count(struct amdgpu_device *adev, uint32_t ch_inst, void *data) { struct ras_err_data *err_data = (struct ras_err_data *)data; - unsigned long ue_count = 0, ce_count = 0; + unsigned long ue_count = 0, ce_count = 0, de_count = 0; /* NOTE: node_inst is converted by adev->umc.active_mask and the range is [0-3], * which can be used as die ID directly */ @@ -174,11 +149,16 @@ static int umc_v12_0_query_error_count(struct amdgpu_device *adev, uint64_t umc_reg_offset = get_umc_v12_0_reg_offset(adev, node_inst, umc_inst, ch_inst); - umc_v12_0_query_correctable_error_count(adev, umc_reg_offset, &ce_count); - umc_v12_0_query_uncorrectable_error_count(adev, umc_reg_offset, &ue_count); + umc_v12_0_query_error_count_per_type(adev, umc_reg_offset, + &ce_count, umc_v12_0_is_correctable_error); + umc_v12_0_query_error_count_per_type(adev, umc_reg_offset, + &ue_count, umc_v12_0_is_uncorrectable_error); + umc_v12_0_query_error_count_per_type(adev, umc_reg_offset, + &de_count, umc_v12_0_is_deferred_error); amdgpu_ras_error_statistic_ue_count(err_data, &mcm_info, NULL, ue_count); amdgpu_ras_error_statistic_ce_count(err_data, &mcm_info, NULL, ce_count); + amdgpu_ras_error_statistic_de_count(err_data, &mcm_info, NULL, de_count); return 0; } @@ -192,80 +172,35 @@ static void umc_v12_0_query_ras_error_count(struct amdgpu_device *adev, umc_v12_0_reset_error_count(adev); } -static bool umc_v12_0_bit_wise_xor(uint32_t val) +static void umc_v12_0_convert_error_address(struct amdgpu_device *adev, + struct ras_err_data *err_data, + struct ta_ras_query_address_input *addr_in) { - bool result = 0; - int i; + uint32_t col, row, row_xor, bank, channel_index; + uint64_t soc_pa, retired_page, column, err_addr; + struct ta_ras_query_address_output addr_out; - for (i = 0; i < 32; i++) - result = result ^ ((val >> i) & 0x1); + err_addr = addr_in->ma.err_addr; + addr_in->addr_type = TA_RAS_MCA_TO_PA; + if (psp_ras_query_address(&adev->psp, addr_in, &addr_out)) { + dev_warn(adev->dev, "Failed to query RAS physical address for 0x%llx", + err_addr); - return result; -} + return; + } + + soc_pa = addr_out.pa.pa; + bank = addr_out.pa.bank; + channel_index = addr_out.pa.channel_idx; -static void umc_v12_0_convert_error_address(struct amdgpu_device *adev, - struct ras_err_data *err_data, uint64_t err_addr, - uint32_t ch_inst, uint32_t umc_inst, - uint32_t node_inst) -{ - uint32_t channel_index, i; - uint64_t soc_pa, na, retired_page, column; - uint32_t bank_hash0, bank_hash1, bank_hash2, bank_hash3, col, row, row_xor; - uint32_t bank0, bank1, bank2, bank3, bank; - - bank_hash0 = (err_addr >> UMC_V12_0_MCA_B0_BIT) & 0x1ULL; - bank_hash1 = (err_addr >> UMC_V12_0_MCA_B1_BIT) & 0x1ULL; - bank_hash2 = (err_addr >> UMC_V12_0_MCA_B2_BIT) & 0x1ULL; - bank_hash3 = (err_addr >> UMC_V12_0_MCA_B3_BIT) & 0x1ULL; col = (err_addr >> 1) & 0x1fULL; row = (err_addr >> 10) & 0x3fffULL; - - /* apply bank hash algorithm */ - bank0 = - bank_hash0 ^ (UMC_V12_0_XOR_EN0 & - (umc_v12_0_bit_wise_xor(col & UMC_V12_0_COL_XOR0) ^ - (umc_v12_0_bit_wise_xor(row & UMC_V12_0_ROW_XOR0)))); - bank1 = - bank_hash1 ^ (UMC_V12_0_XOR_EN1 & - (umc_v12_0_bit_wise_xor(col & UMC_V12_0_COL_XOR1) ^ - (umc_v12_0_bit_wise_xor(row & UMC_V12_0_ROW_XOR1)))); - bank2 = - bank_hash2 ^ (UMC_V12_0_XOR_EN2 & - (umc_v12_0_bit_wise_xor(col & UMC_V12_0_COL_XOR2) ^ - (umc_v12_0_bit_wise_xor(row & UMC_V12_0_ROW_XOR2)))); - bank3 = - bank_hash3 ^ (UMC_V12_0_XOR_EN3 & - (umc_v12_0_bit_wise_xor(col & UMC_V12_0_COL_XOR3) ^ - (umc_v12_0_bit_wise_xor(row & UMC_V12_0_ROW_XOR3)))); - - bank = bank0 | (bank1 << 1) | (bank2 << 2) | (bank3 << 3); - err_addr &= ~0x3c0ULL; - err_addr |= (bank << UMC_V12_0_MCA_B0_BIT); - - na = 0x0; - /* convert mca error address to normalized address */ - for (i = 1; i < ARRAY_SIZE(umc_v12_0_ma2na_mapping); i++) - na |= ((err_addr >> i) & 0x1ULL) << umc_v12_0_ma2na_mapping[i]; - - channel_index = - adev->umc.channel_idx_tbl[node_inst * adev->umc.umc_inst_num * - adev->umc.channel_inst_num + - umc_inst * adev->umc.channel_inst_num + - ch_inst]; - /* translate umc channel address to soc pa, 3 parts are included */ - soc_pa = ADDR_OF_32KB_BLOCK(na) | - ADDR_OF_256B_BLOCK(channel_index) | - OFFSET_IN_256B_BLOCK(na); - - /* the umc channel bits are not original values, they are hashed */ - UMC_V12_0_SET_CHANNEL_HASH(channel_index, soc_pa); - + row_xor = row ^ (0x1ULL << 13); /* clear [C3 C2] in soc physical address */ soc_pa &= ~(0x3ULL << UMC_V12_0_PA_C2_BIT); /* clear [C4] in soc physical address */ soc_pa &= ~(0x1ULL << UMC_V12_0_PA_C4_BIT); - row_xor = row ^ (0x1ULL << 13); /* loop for all possibilities of [C4 C3 C2] */ for (column = 0; column < UMC_V12_0_NA_MAP_PA_NUM; column++) { retired_page = soc_pa | ((column & 0x3) << UMC_V12_0_PA_C2_BIT); @@ -277,7 +212,7 @@ static void umc_v12_0_convert_error_address(struct amdgpu_device *adev, "Error Address(PA):0x%-10llx Row:0x%-4x Col:0x%-2x Bank:0x%x Channel:0x%x\n", retired_page, row, col, bank, channel_index); amdgpu_umc_fill_error_record(err_data, err_addr, - retired_page, channel_index, umc_inst); + retired_page, channel_index, addr_in->ma.umc_inst); /* shift R13 bit */ retired_page ^= (0x1ULL << UMC_V12_0_PA_R13_BIT); @@ -285,18 +220,79 @@ static void umc_v12_0_convert_error_address(struct amdgpu_device *adev, "Error Address(PA):0x%-10llx Row:0x%-4x Col:0x%-2x Bank:0x%x Channel:0x%x\n", retired_page, row_xor, col, bank, channel_index); amdgpu_umc_fill_error_record(err_data, err_addr, - retired_page, channel_index, umc_inst); + retired_page, channel_index, addr_in->ma.umc_inst); + } +} + +static int umc_v12_0_convert_err_addr(struct amdgpu_device *adev, + struct ta_ras_query_address_input *addr_in, + uint64_t *pfns, int len) +{ + uint32_t col, row, row_xor, bank, channel_index; + uint64_t soc_pa, retired_page, column, err_addr; + struct ta_ras_query_address_output addr_out; + uint32_t pos = 0; + + err_addr = addr_in->ma.err_addr; + addr_in->addr_type = TA_RAS_MCA_TO_PA; + if (psp_ras_query_address(&adev->psp, addr_in, &addr_out)) { + dev_warn(adev->dev, "Failed to query RAS physical address for 0x%llx", + err_addr); + return 0; } + + soc_pa = addr_out.pa.pa; + bank = addr_out.pa.bank; + channel_index = addr_out.pa.channel_idx; + + col = (err_addr >> 1) & 0x1fULL; + row = (err_addr >> 10) & 0x3fffULL; + row_xor = row ^ (0x1ULL << 13); + /* clear [C3 C2] in soc physical address */ + soc_pa &= ~(0x3ULL << UMC_V12_0_PA_C2_BIT); + /* clear [C4] in soc physical address */ + soc_pa &= ~(0x1ULL << UMC_V12_0_PA_C4_BIT); + + /* loop for all possibilities of [C4 C3 C2] */ + for (column = 0; column < UMC_V12_0_NA_MAP_PA_NUM; column++) { + retired_page = soc_pa | ((column & 0x3) << UMC_V12_0_PA_C2_BIT); + retired_page |= (((column & 0x4) >> 2) << UMC_V12_0_PA_C4_BIT); + + if (pos >= len) + return 0; + pfns[pos++] = retired_page >> AMDGPU_GPU_PAGE_SHIFT; + + /* include column bit 0 and 1 */ + col &= 0x3; + col |= (column << 2); + dev_info(adev->dev, + "Error Address(PA):0x%-10llx Row:0x%-4x Col:0x%-2x Bank:0x%x Channel:0x%x\n", + retired_page, row, col, bank, channel_index); + + /* shift R13 bit */ + retired_page ^= (0x1ULL << UMC_V12_0_PA_R13_BIT); + + if (pos >= len) + return 0; + pfns[pos++] = retired_page >> AMDGPU_GPU_PAGE_SHIFT; + + dev_info(adev->dev, + "Error Address(PA):0x%-10llx Row:0x%-4x Col:0x%-2x Bank:0x%x Channel:0x%x\n", + retired_page, row_xor, col, bank, channel_index); + } + + return pos; } static int umc_v12_0_query_error_address(struct amdgpu_device *adev, uint32_t node_inst, uint32_t umc_inst, uint32_t ch_inst, void *data) { + struct ras_err_data *err_data = (struct ras_err_data *)data; + struct ta_ras_query_address_input addr_in; uint64_t mc_umc_status_addr; uint64_t mc_umc_status, err_addr; uint64_t mc_umc_addrt0; - struct ras_err_data *err_data = (struct ras_err_data *)data; uint64_t umc_reg_offset = get_umc_v12_0_reg_offset(adev, node_inst, umc_inst, ch_inst); @@ -316,10 +312,8 @@ static int umc_v12_0_query_error_address(struct amdgpu_device *adev, } /* calculate error address if ue error is detected */ - if (REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, Val) == 1 && - REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, AddrV) == 1 && - REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, UC) == 1) { - + if (umc_v12_0_is_uncorrectable_error(adev, mc_umc_status) || + umc_v12_0_is_deferred_error(adev, mc_umc_status)) { mc_umc_addrt0 = SOC15_REG_OFFSET(UMC, 0, regMCA_UMC_UMC0_MCUMC_ADDRT0); @@ -327,8 +321,19 @@ static int umc_v12_0_query_error_address(struct amdgpu_device *adev, err_addr = REG_GET_FIELD(err_addr, MCA_UMC_UMC0_MCUMC_ADDRT0, ErrorAddr); - umc_v12_0_convert_error_address(adev, err_data, err_addr, - ch_inst, umc_inst, node_inst); + if (!adev->aid_mask && + adev->smuio.funcs && + adev->smuio.funcs->get_socket_id) + addr_in.ma.socket_id = adev->smuio.funcs->get_socket_id(adev); + else + addr_in.ma.socket_id = 0; + + addr_in.ma.err_addr = err_addr; + addr_in.ma.ch_inst = ch_inst; + addr_in.ma.umc_inst = umc_inst; + addr_in.ma.node_inst = node_inst; + + umc_v12_0_convert_error_address(adev, err_data, &addr_in); } /* clear umc status */ @@ -371,13 +376,20 @@ static int umc_v12_0_err_cnt_init_per_channel(struct amdgpu_device *adev, return 0; } +#ifdef TO_BE_REMOVED static void umc_v12_0_ecc_info_query_ras_error_count(struct amdgpu_device *adev, void *ras_error_status) { + struct ras_query_context qctx; + + memset(&qctx, 0, sizeof(qctx)); + qctx.event_id = amdgpu_ras_acquire_event_id(adev, amdgpu_ras_intr_triggered() ? + RAS_EVENT_TYPE_ISR : RAS_EVENT_TYPE_INVALID); + amdgpu_mca_smu_log_ras_error(adev, - AMDGPU_RAS_BLOCK__UMC, AMDGPU_MCA_ERROR_TYPE_CE, ras_error_status); + AMDGPU_RAS_BLOCK__UMC, AMDGPU_MCA_ERROR_TYPE_CE, ras_error_status, &qctx); amdgpu_mca_smu_log_ras_error(adev, - AMDGPU_RAS_BLOCK__UMC, AMDGPU_MCA_ERROR_TYPE_UE, ras_error_status); + AMDGPU_RAS_BLOCK__UMC, AMDGPU_MCA_ERROR_TYPE_UE, ras_error_status, &qctx); } static void umc_v12_0_ecc_info_query_ras_error_address(struct amdgpu_device *adev, @@ -385,44 +397,74 @@ static void umc_v12_0_ecc_info_query_ras_error_address(struct amdgpu_device *ade { struct ras_err_node *err_node; uint64_t mc_umc_status; + struct ras_err_info *err_info; + struct ras_err_addr *mca_err_addr, *tmp; struct ras_err_data *err_data = (struct ras_err_data *)ras_error_status; + struct ta_ras_query_address_input addr_in; for_each_ras_error(err_node, err_data) { - mc_umc_status = err_node->err_info.err_addr.err_status; - if (!mc_umc_status) + err_info = &err_node->err_info; + if (list_empty(&err_info->err_addr_list)) continue; - if (umc_v12_0_is_uncorrectable_error(adev, mc_umc_status)) { - uint64_t mca_addr, err_addr, mca_ipid; - uint32_t InstanceIdLo; - struct amdgpu_smuio_mcm_config_info *mcm_info; - - mcm_info = &err_node->err_info.mcm_info; - mca_addr = err_node->err_info.err_addr.err_addr; - mca_ipid = err_node->err_info.err_addr.err_ipid; - - err_addr = REG_GET_FIELD(mca_addr, MCA_UMC_UMC0_MCUMC_ADDRT0, ErrorAddr); - InstanceIdLo = REG_GET_FIELD(mca_ipid, MCMP1_IPIDT0, InstanceIdLo); - - dev_info(adev->dev, "UMC:IPID:0x%llx, aid:%d, inst:%d, ch:%d, err_addr:0x%llx\n", - mca_ipid, - mcm_info->die_id, - MCA_IPID_LO_2_UMC_INST(InstanceIdLo), - MCA_IPID_LO_2_UMC_CH(InstanceIdLo), - err_addr); - - umc_v12_0_convert_error_address(adev, - err_data, err_addr, - MCA_IPID_LO_2_UMC_CH(InstanceIdLo), - MCA_IPID_LO_2_UMC_INST(InstanceIdLo), - mcm_info->die_id); - - /* Clear umc error address content */ - memset(&err_node->err_info.err_addr, - 0, sizeof(err_node->err_info.err_addr)); + addr_in.ma.node_inst = err_info->mcm_info.die_id; + addr_in.ma.socket_id = err_info->mcm_info.socket_id; + + list_for_each_entry_safe(mca_err_addr, tmp, &err_info->err_addr_list, node) { + mc_umc_status = mca_err_addr->err_status; + if (mc_umc_status && + (umc_v12_0_is_uncorrectable_error(adev, mc_umc_status) || + umc_v12_0_is_deferred_error(adev, mc_umc_status))) { + uint64_t mca_addr, err_addr, mca_ipid; + uint32_t InstanceIdLo; + + mca_addr = mca_err_addr->err_addr; + mca_ipid = mca_err_addr->err_ipid; + + err_addr = REG_GET_FIELD(mca_addr, + MCA_UMC_UMC0_MCUMC_ADDRT0, ErrorAddr); + InstanceIdLo = REG_GET_FIELD(mca_ipid, MCMP1_IPIDT0, InstanceIdLo); + + addr_in.ma.err_addr = err_addr; + addr_in.ma.ch_inst = MCA_IPID_LO_2_UMC_CH(InstanceIdLo); + addr_in.ma.umc_inst = MCA_IPID_LO_2_UMC_INST(InstanceIdLo); + + dev_info(adev->dev, "UMC:IPID:0x%llx, aid:%d, inst:%d, ch:%d, err_addr:0x%llx\n", + mca_ipid, + err_info->mcm_info.die_id, + MCA_IPID_LO_2_UMC_INST(InstanceIdLo), + MCA_IPID_LO_2_UMC_CH(InstanceIdLo), + err_addr); + + umc_v12_0_convert_error_address(adev, + err_data, &addr_in); + } + + /* Delete error address node from list and free memory */ + amdgpu_ras_del_mca_err_addr(err_info, mca_err_addr); } } } +#endif + +static bool umc_v12_0_check_ecc_err_status(struct amdgpu_device *adev, + enum amdgpu_mca_error_type type, void *ras_error_status) +{ + uint64_t mc_umc_status = *(uint64_t *)ras_error_status; + + switch (type) { + case AMDGPU_MCA_ERROR_TYPE_UE: + return umc_v12_0_is_uncorrectable_error(adev, mc_umc_status); + case AMDGPU_MCA_ERROR_TYPE_CE: + return umc_v12_0_is_correctable_error(adev, mc_umc_status); + case AMDGPU_MCA_ERROR_TYPE_DE: + return umc_v12_0_is_deferred_error(adev, mc_umc_status); + default: + return false; + } + + return false; +} static void umc_v12_0_err_cnt_init(struct amdgpu_device *adev) { @@ -444,12 +486,223 @@ const struct amdgpu_ras_block_hw_ops umc_v12_0_ras_hw_ops = { .query_ras_error_address = umc_v12_0_query_ras_error_address, }; +static int umc_v12_0_aca_bank_parser(struct aca_handle *handle, struct aca_bank *bank, + enum aca_smu_type type, void *data) +{ + struct amdgpu_device *adev = handle->adev; + struct aca_bank_info info; + enum aca_error_type err_type; + u64 status, count; + u32 ext_error_code; + int ret; + + status = bank->regs[ACA_REG_IDX_STATUS]; + if (umc_v12_0_is_deferred_error(adev, status)) + err_type = ACA_ERROR_TYPE_DEFERRED; + else if (umc_v12_0_is_uncorrectable_error(adev, status)) + err_type = ACA_ERROR_TYPE_UE; + else if (umc_v12_0_is_correctable_error(adev, status)) + err_type = ACA_ERROR_TYPE_CE; + else + return 0; + + ret = aca_bank_info_decode(bank, &info); + if (ret) + return ret; + + amdgpu_umc_update_ecc_status(adev, + bank->regs[ACA_REG_IDX_STATUS], + bank->regs[ACA_REG_IDX_IPID], + bank->regs[ACA_REG_IDX_ADDR]); + + ext_error_code = ACA_REG__STATUS__ERRORCODEEXT(status); + count = ext_error_code == 0 ? + ACA_REG__MISC0__ERRCNT(bank->regs[ACA_REG_IDX_MISC0]) : 1ULL; + + return aca_error_cache_log_bank_error(handle, &info, err_type, count); +} + +static const struct aca_bank_ops umc_v12_0_aca_bank_ops = { + .aca_bank_parser = umc_v12_0_aca_bank_parser, +}; + +const struct aca_info umc_v12_0_aca_info = { + .hwip = ACA_HWIP_TYPE_UMC, + .mask = ACA_ERROR_UE_MASK | ACA_ERROR_CE_MASK | ACA_ERROR_DEFERRED_MASK, + .bank_ops = &umc_v12_0_aca_bank_ops, +}; + +static int umc_v12_0_ras_late_init(struct amdgpu_device *adev, struct ras_common_if *ras_block) +{ + int ret; + + ret = amdgpu_umc_ras_late_init(adev, ras_block); + if (ret) + return ret; + + ret = amdgpu_ras_bind_aca(adev, AMDGPU_RAS_BLOCK__UMC, + &umc_v12_0_aca_info, NULL); + if (ret) + return ret; + + return 0; +} + +static int umc_v12_0_update_ecc_status(struct amdgpu_device *adev, + uint64_t status, uint64_t ipid, uint64_t addr) +{ + struct amdgpu_ras *con = amdgpu_ras_get_context(adev); + uint16_t hwid, mcatype; + struct ta_ras_query_address_input addr_in; + uint64_t page_pfn[UMC_V12_0_BAD_PAGE_NUM_PER_CHANNEL]; + uint64_t err_addr, hash_val = 0; + struct ras_ecc_err *ecc_err; + int count; + int ret; + + hwid = REG_GET_FIELD(ipid, MCMP1_IPIDT0, HardwareID); + mcatype = REG_GET_FIELD(ipid, MCMP1_IPIDT0, McaType); + + if ((hwid != MCA_UMC_HWID_V12_0) || (mcatype != MCA_UMC_MCATYPE_V12_0)) + return 0; + + if (!status) + return 0; + + if (!umc_v12_0_is_deferred_error(adev, status)) + return 0; + + err_addr = REG_GET_FIELD(addr, + MCA_UMC_UMC0_MCUMC_ADDRT0, ErrorAddr); + + dev_info(adev->dev, + "UMC:IPID:0x%llx, socket:%llu, aid:%llu, inst:%llu, ch:%llu, err_addr:0x%llx\n", + ipid, + MCA_IPID_2_SOCKET_ID(ipid), + MCA_IPID_2_DIE_ID(ipid), + MCA_IPID_2_UMC_INST(ipid), + MCA_IPID_2_UMC_CH(ipid), + err_addr); + + memset(page_pfn, 0, sizeof(page_pfn)); + + memset(&addr_in, 0, sizeof(addr_in)); + addr_in.ma.err_addr = err_addr; + addr_in.ma.ch_inst = MCA_IPID_2_UMC_CH(ipid); + addr_in.ma.umc_inst = MCA_IPID_2_UMC_INST(ipid); + addr_in.ma.node_inst = MCA_IPID_2_DIE_ID(ipid); + addr_in.ma.socket_id = MCA_IPID_2_SOCKET_ID(ipid); + + count = umc_v12_0_convert_err_addr(adev, + &addr_in, page_pfn, ARRAY_SIZE(page_pfn)); + if (count <= 0) { + dev_warn(adev->dev, "Fail to convert error address! count:%d\n", count); + return 0; + } + + ret = amdgpu_umc_build_pages_hash(adev, + page_pfn, count, &hash_val); + if (ret) { + dev_err(adev->dev, "Fail to build error pages hash\n"); + return ret; + } + + ecc_err = kzalloc(sizeof(*ecc_err), GFP_KERNEL); + if (!ecc_err) + return -ENOMEM; + + ecc_err->err_pages.pfn = kcalloc(count, sizeof(*ecc_err->err_pages.pfn), GFP_KERNEL); + if (!ecc_err->err_pages.pfn) { + kfree(ecc_err); + return -ENOMEM; + } + + memcpy(ecc_err->err_pages.pfn, page_pfn, count * sizeof(*ecc_err->err_pages.pfn)); + ecc_err->err_pages.count = count; + + ecc_err->hash_index = hash_val; + ecc_err->status = status; + ecc_err->ipid = ipid; + ecc_err->addr = addr; + + ret = amdgpu_umc_logs_ecc_err(adev, &con->umc_ecc_log.de_page_tree, ecc_err); + if (ret) { + if (ret == -EEXIST) + con->umc_ecc_log.de_updated = true; + else + dev_err(adev->dev, "Fail to log ecc error! ret:%d\n", ret); + + kfree(ecc_err->err_pages.pfn); + kfree(ecc_err); + return ret; + } + + con->umc_ecc_log.de_updated = true; + + return 0; +} + +static int umc_v12_0_fill_error_record(struct amdgpu_device *adev, + struct ras_ecc_err *ecc_err, void *ras_error_status) +{ + struct ras_err_data *err_data = (struct ras_err_data *)ras_error_status; + uint32_t i = 0; + int ret = 0; + + if (!err_data || !ecc_err) + return -EINVAL; + + for (i = 0; i < ecc_err->err_pages.count; i++) { + ret = amdgpu_umc_fill_error_record(err_data, + ecc_err->addr, + ecc_err->err_pages.pfn[i] << AMDGPU_GPU_PAGE_SHIFT, + MCA_IPID_2_UMC_CH(ecc_err->ipid), + MCA_IPID_2_UMC_INST(ecc_err->ipid)); + if (ret) + break; + } + + err_data->de_count++; + + return ret; +} + +static void umc_v12_0_query_ras_ecc_err_addr(struct amdgpu_device *adev, + void *ras_error_status) +{ + struct amdgpu_ras *con = amdgpu_ras_get_context(adev); + struct ras_ecc_err *entries[MAX_ECC_NUM_PER_RETIREMENT]; + struct radix_tree_root *ecc_tree; + int new_detected, ret, i; + + ecc_tree = &con->umc_ecc_log.de_page_tree; + + mutex_lock(&con->umc_ecc_log.lock); + new_detected = radix_tree_gang_lookup_tag(ecc_tree, (void **)entries, + 0, ARRAY_SIZE(entries), UMC_ECC_NEW_DETECTED_TAG); + for (i = 0; i < new_detected; i++) { + if (!entries[i]) + continue; + + ret = umc_v12_0_fill_error_record(adev, entries[i], ras_error_status); + if (ret) { + dev_err(adev->dev, "Fail to fill umc error record, ret:%d\n", ret); + break; + } + radix_tree_tag_clear(ecc_tree, entries[i]->hash_index, UMC_ECC_NEW_DETECTED_TAG); + } + mutex_unlock(&con->umc_ecc_log.lock); +} + struct amdgpu_umc_ras umc_v12_0_ras = { .ras_block = { .hw_ops = &umc_v12_0_ras_hw_ops, + .ras_late_init = umc_v12_0_ras_late_init, }, .err_cnt_init = umc_v12_0_err_cnt_init, .query_ras_poison_mode = umc_v12_0_query_ras_poison_mode, - .ecc_info_query_ras_error_count = umc_v12_0_ecc_info_query_ras_error_count, - .ecc_info_query_ras_error_address = umc_v12_0_ecc_info_query_ras_error_address, + .ecc_info_query_ras_error_address = umc_v12_0_query_ras_ecc_err_addr, + .check_ecc_err_status = umc_v12_0_check_ecc_err_status, + .update_ecc_status = umc_v12_0_update_ecc_status, }; + diff --git a/drivers/gpu/drm/amd/amdgpu/umc_v12_0.h b/drivers/gpu/drm/amd/amdgpu/umc_v12_0.h index e8de3a92251a..b4974793850b 100644 --- a/drivers/gpu/drm/amd/amdgpu/umc_v12_0.h +++ b/drivers/gpu/drm/amd/amdgpu/umc_v12_0.h @@ -55,79 +55,37 @@ #define UMC_V12_0_NA_MAP_PA_NUM 8 /* R13 bit shift should be considered, double the number */ #define UMC_V12_0_BAD_PAGE_NUM_PER_CHANNEL (UMC_V12_0_NA_MAP_PA_NUM * 2) -/* bank bits in MCA error address */ -#define UMC_V12_0_MCA_B0_BIT 6 -#define UMC_V12_0_MCA_B1_BIT 7 -#define UMC_V12_0_MCA_B2_BIT 8 -#define UMC_V12_0_MCA_B3_BIT 9 + /* column bits in SOC physical address */ #define UMC_V12_0_PA_C2_BIT 15 #define UMC_V12_0_PA_C4_BIT 21 /* row bits in SOC physical address */ #define UMC_V12_0_PA_R13_BIT 35 -/* channel index bits in SOC physical address */ -#define UMC_V12_0_PA_CH4_BIT 12 -#define UMC_V12_0_PA_CH5_BIT 13 -#define UMC_V12_0_PA_CH6_BIT 14 - -/* bank hash settings */ -#define UMC_V12_0_XOR_EN0 1 -#define UMC_V12_0_XOR_EN1 1 -#define UMC_V12_0_XOR_EN2 1 -#define UMC_V12_0_XOR_EN3 1 -#define UMC_V12_0_COL_XOR0 0x0 -#define UMC_V12_0_COL_XOR1 0x0 -#define UMC_V12_0_COL_XOR2 0x800 -#define UMC_V12_0_COL_XOR3 0x1000 -#define UMC_V12_0_ROW_XOR0 0x11111 -#define UMC_V12_0_ROW_XOR1 0x22222 -#define UMC_V12_0_ROW_XOR2 0x4444 -#define UMC_V12_0_ROW_XOR3 0x8888 - -/* channel hash settings */ -#define UMC_V12_0_HASH_4K 0 -#define UMC_V12_0_HASH_64K 1 -#define UMC_V12_0_HASH_2M 1 -#define UMC_V12_0_HASH_1G 1 -#define UMC_V12_0_HASH_1T 1 - -/* XOR some bits of PA into CH4~CH6 bits (bits 12~14 of PA), - * hash bit is only effective when related setting is enabled - */ -#define UMC_V12_0_CHANNEL_HASH_CH4(channel_idx, pa) ((((channel_idx) >> 5) & 0x1) ^ \ - (((pa) >> 20) & 0x1ULL & UMC_V12_0_HASH_64K) ^ \ - (((pa) >> 27) & 0x1ULL & UMC_V12_0_HASH_2M) ^ \ - (((pa) >> 34) & 0x1ULL & UMC_V12_0_HASH_1G) ^ \ - (((pa) >> 41) & 0x1ULL & UMC_V12_0_HASH_1T)) -#define UMC_V12_0_CHANNEL_HASH_CH5(channel_idx, pa) ((((channel_idx) >> 6) & 0x1) ^ \ - (((pa) >> 21) & 0x1ULL & UMC_V12_0_HASH_64K) ^ \ - (((pa) >> 28) & 0x1ULL & UMC_V12_0_HASH_2M) ^ \ - (((pa) >> 35) & 0x1ULL & UMC_V12_0_HASH_1G) ^ \ - (((pa) >> 42) & 0x1ULL & UMC_V12_0_HASH_1T)) -#define UMC_V12_0_CHANNEL_HASH_CH6(channel_idx, pa) ((((channel_idx) >> 4) & 0x1) ^ \ - (((pa) >> 19) & 0x1ULL & UMC_V12_0_HASH_64K) ^ \ - (((pa) >> 26) & 0x1ULL & UMC_V12_0_HASH_2M) ^ \ - (((pa) >> 33) & 0x1ULL & UMC_V12_0_HASH_1G) ^ \ - (((pa) >> 40) & 0x1ULL & UMC_V12_0_HASH_1T) ^ \ - (((pa) >> 47) & 0x1ULL & UMC_V12_0_HASH_4K)) -#define UMC_V12_0_SET_CHANNEL_HASH(channel_idx, pa) do { \ - (pa) &= ~(0x7ULL << UMC_V12_0_PA_CH4_BIT); \ - (pa) |= (UMC_V12_0_CHANNEL_HASH_CH4(channel_idx, pa) << UMC_V12_0_PA_CH4_BIT); \ - (pa) |= (UMC_V12_0_CHANNEL_HASH_CH5(channel_idx, pa) << UMC_V12_0_PA_CH5_BIT); \ - (pa) |= (UMC_V12_0_CHANNEL_HASH_CH6(channel_idx, pa) << UMC_V12_0_PA_CH6_BIT); \ - } while (0) + +#define MCA_UMC_HWID_V12_0 0x96 +#define MCA_UMC_MCATYPE_V12_0 0x0 #define MCA_IPID_LO_2_UMC_CH(_ipid_lo) (((((_ipid_lo) >> 20) & 0x1) * 4) + \ (((_ipid_lo) >> 12) & 0xF)) #define MCA_IPID_LO_2_UMC_INST(_ipid_lo) (((_ipid_lo) >> 21) & 0x7) +#define MCA_IPID_2_DIE_ID(ipid) ((REG_GET_FIELD(ipid, MCMP1_IPIDT0, InstanceIdHi) >> 2) & 0x03) + +#define MCA_IPID_2_UMC_CH(ipid) \ + (MCA_IPID_LO_2_UMC_CH(REG_GET_FIELD(ipid, MCMP1_IPIDT0, InstanceIdLo))) + +#define MCA_IPID_2_UMC_INST(ipid) \ + (MCA_IPID_LO_2_UMC_INST(REG_GET_FIELD(ipid, MCMP1_IPIDT0, InstanceIdLo))) + +#define MCA_IPID_2_SOCKET_ID(ipid) \ + (((REG_GET_FIELD(ipid, MCMP1_IPIDT0, InstanceIdLo) & 0x1) << 2) | \ + (REG_GET_FIELD(ipid, MCMP1_IPIDT0, InstanceIdHi) & 0x03)) + +bool umc_v12_0_is_deferred_error(struct amdgpu_device *adev, uint64_t mc_umc_status); bool umc_v12_0_is_uncorrectable_error(struct amdgpu_device *adev, uint64_t mc_umc_status); bool umc_v12_0_is_correctable_error(struct amdgpu_device *adev, uint64_t mc_umc_status); -extern const uint32_t - umc_v12_0_channel_idx_tbl[] - [UMC_V12_0_UMC_INSTANCE_NUM] - [UMC_V12_0_CHANNEL_INSTANCE_NUM]; +typedef bool (*check_error_type_func)(struct amdgpu_device *adev, uint64_t mc_umc_status); extern struct amdgpu_umc_ras umc_v12_0_ras; diff --git a/drivers/gpu/drm/amd/amdgpu/umc_v6_0.c b/drivers/gpu/drm/amd/amdgpu/umc_v6_0.c index 0d6b50528d76..97fa88ed770c 100644 --- a/drivers/gpu/drm/amd/amdgpu/umc_v6_0.c +++ b/drivers/gpu/drm/amd/amdgpu/umc_v6_0.c @@ -25,7 +25,7 @@ static void umc_v6_0_init_registers(struct amdgpu_device *adev) { - unsigned i,j; + unsigned i, j; for (i = 0; i < 4; i++) for (j = 0; j < 4; j++) diff --git a/drivers/gpu/drm/amd/amdgpu/umc_v8_10.c b/drivers/gpu/drm/amd/amdgpu/umc_v8_10.c index c4c77257710c..a32f87992f20 100644 --- a/drivers/gpu/drm/amd/amdgpu/umc_v8_10.c +++ b/drivers/gpu/drm/amd/amdgpu/umc_v8_10.c @@ -442,11 +442,6 @@ static void umc_v8_10_ecc_info_query_ras_error_address(struct amdgpu_device *ade umc_v8_10_ecc_info_query_error_address, ras_error_status); } -static void umc_v8_10_set_eeprom_table_version(struct amdgpu_ras_eeprom_table_header *hdr) -{ - hdr->version = RAS_TABLE_VER_V2_1; -} - const struct amdgpu_ras_block_hw_ops umc_v8_10_ras_hw_ops = { .query_ras_error_count = umc_v8_10_query_ras_error_count, .query_ras_error_address = umc_v8_10_query_ras_error_address, @@ -460,5 +455,4 @@ struct amdgpu_umc_ras umc_v8_10_ras = { .query_ras_poison_mode = umc_v8_10_query_ras_poison_mode, .ecc_info_query_ras_error_count = umc_v8_10_ecc_info_query_ras_error_count, .ecc_info_query_ras_error_address = umc_v8_10_ecc_info_query_ras_error_address, - .set_eeprom_table_version = umc_v8_10_set_eeprom_table_version, }; diff --git a/drivers/gpu/drm/amd/amdgpu/umsch_mm_v4_0.c b/drivers/gpu/drm/amd/amdgpu/umsch_mm_v4_0.c index 8e7b763cfdb7..bd57896ab85d 100644 --- a/drivers/gpu/drm/amd/amdgpu/umsch_mm_v4_0.c +++ b/drivers/gpu/drm/amd/amdgpu/umsch_mm_v4_0.c @@ -60,7 +60,7 @@ static int umsch_mm_v4_0_load_microcode(struct amdgpu_umsch_mm *umsch) umsch->cmd_buf_curr_ptr = umsch->cmd_buf_ptr; - if (amdgpu_ip_version(adev, VCN_HWIP, 0) == IP_VERSION(4, 0, 5)) { + if (amdgpu_ip_version(adev, VCN_HWIP, 0) >= IP_VERSION(4, 0, 5)) { WREG32_SOC15(VCN, 0, regUVD_IPX_DLDO_CONFIG, 1 << UVD_IPX_DLDO_CONFIG__ONO0_PWR_CONFIG__SHIFT); SOC15_WAIT_ON_RREG(VCN, 0, regUVD_IPX_DLDO_STATUS, @@ -225,6 +225,8 @@ static int umsch_mm_v4_0_ring_start(struct amdgpu_umsch_mm *umsch) WREG32_SOC15(VCN, 0, regVCN_UMSCH_RB_SIZE, ring->ring_size); + ring->wptr = 0; + data = RREG32_SOC15(VCN, 0, regVCN_RB_ENABLE); data &= ~(VCN_RB_ENABLE__AUDIO_RB_EN_MASK); WREG32_SOC15(VCN, 0, regVCN_RB_ENABLE, data); @@ -248,7 +250,7 @@ static int umsch_mm_v4_0_ring_stop(struct amdgpu_umsch_mm *umsch) data = REG_SET_FIELD(data, VCN_UMSCH_RB_DB_CTRL, EN, 0); WREG32_SOC15(VCN, 0, regVCN_UMSCH_RB_DB_CTRL, data); - if (amdgpu_ip_version(adev, VCN_HWIP, 0) == IP_VERSION(4, 0, 5)) { + if (amdgpu_ip_version(adev, VCN_HWIP, 0) >= IP_VERSION(4, 0, 5)) { WREG32_SOC15(VCN, 0, regUVD_IPX_DLDO_CONFIG, 2 << UVD_IPX_DLDO_CONFIG__ONO0_PWR_CONFIG__SHIFT); SOC15_WAIT_ON_RREG(VCN, 0, regUVD_IPX_DLDO_STATUS, @@ -271,6 +273,8 @@ static int umsch_mm_v4_0_set_hw_resources(struct amdgpu_umsch_mm *umsch) set_hw_resources.vmid_mask_mm_vcn = umsch->vmid_mask_mm_vcn; set_hw_resources.vmid_mask_mm_vpe = umsch->vmid_mask_mm_vpe; + set_hw_resources.collaboration_mask_vpe = + adev->vpe.collaborate_mode ? 0x3 : 0x0; set_hw_resources.engine_mask = umsch->engine_mask; set_hw_resources.vcn0_hqd_mask[0] = umsch->vcn0_hqd_mask; @@ -346,6 +350,7 @@ static int umsch_mm_v4_0_add_queue(struct amdgpu_umsch_mm *umsch, add_queue.h_queue = input_ptr->h_queue; add_queue.vm_context_cntl = input_ptr->vm_context_cntl; add_queue.is_context_suspended = input_ptr->is_context_suspended; + add_queue.collaboration_mode = adev->vpe.collaborate_mode ? 1 : 0; add_queue.api_status.api_completion_fence_addr = umsch->ring.fence_drv.gpu_addr; add_queue.api_status.api_completion_fence_value = ++umsch->ring.fence_drv.sync_seq; diff --git a/drivers/gpu/drm/amd/amdgpu/uvd_v3_1.c b/drivers/gpu/drm/amd/amdgpu/uvd_v3_1.c index a6006f231c65..805d6662c88b 100644 --- a/drivers/gpu/drm/amd/amdgpu/uvd_v3_1.c +++ b/drivers/gpu/drm/amd/amdgpu/uvd_v3_1.c @@ -819,6 +819,8 @@ static const struct amd_ip_funcs uvd_v3_1_ip_funcs = { .soft_reset = uvd_v3_1_soft_reset, .set_clockgating_state = uvd_v3_1_set_clockgating_state, .set_powergating_state = uvd_v3_1_set_powergating_state, + .dump_ip_state = NULL, + .print_ip_state = NULL, }; const struct amdgpu_ip_block_version uvd_v3_1_ip_block = { diff --git a/drivers/gpu/drm/amd/amdgpu/uvd_v4_2.c b/drivers/gpu/drm/amd/amdgpu/uvd_v4_2.c index 1aa09ad7bbe3..3f19c606f4de 100644 --- a/drivers/gpu/drm/amd/amdgpu/uvd_v4_2.c +++ b/drivers/gpu/drm/amd/amdgpu/uvd_v4_2.c @@ -769,6 +769,8 @@ static const struct amd_ip_funcs uvd_v4_2_ip_funcs = { .soft_reset = uvd_v4_2_soft_reset, .set_clockgating_state = uvd_v4_2_set_clockgating_state, .set_powergating_state = uvd_v4_2_set_powergating_state, + .dump_ip_state = NULL, + .print_ip_state = NULL, }; static const struct amdgpu_ring_funcs uvd_v4_2_ring_funcs = { diff --git a/drivers/gpu/drm/amd/amdgpu/uvd_v5_0.c b/drivers/gpu/drm/amd/amdgpu/uvd_v5_0.c index f8b229b75435..efd903c21d48 100644 --- a/drivers/gpu/drm/amd/amdgpu/uvd_v5_0.c +++ b/drivers/gpu/drm/amd/amdgpu/uvd_v5_0.c @@ -877,6 +877,8 @@ static const struct amd_ip_funcs uvd_v5_0_ip_funcs = { .set_clockgating_state = uvd_v5_0_set_clockgating_state, .set_powergating_state = uvd_v5_0_set_powergating_state, .get_clockgating_state = uvd_v5_0_get_clockgating_state, + .dump_ip_state = NULL, + .print_ip_state = NULL, }; static const struct amdgpu_ring_funcs uvd_v5_0_ring_funcs = { diff --git a/drivers/gpu/drm/amd/amdgpu/uvd_v6_0.c b/drivers/gpu/drm/amd/amdgpu/uvd_v6_0.c index a9a6880f44e3..495de5068455 100644 --- a/drivers/gpu/drm/amd/amdgpu/uvd_v6_0.c +++ b/drivers/gpu/drm/amd/amdgpu/uvd_v6_0.c @@ -1545,6 +1545,8 @@ static const struct amd_ip_funcs uvd_v6_0_ip_funcs = { .set_clockgating_state = uvd_v6_0_set_clockgating_state, .set_powergating_state = uvd_v6_0_set_powergating_state, .get_clockgating_state = uvd_v6_0_get_clockgating_state, + .dump_ip_state = NULL, + .print_ip_state = NULL, }; static const struct amdgpu_ring_funcs uvd_v6_0_ring_phys_funcs = { diff --git a/drivers/gpu/drm/amd/amdgpu/vce_v2_0.c b/drivers/gpu/drm/amd/amdgpu/vce_v2_0.c index a08e7abca423..66fada199bda 100644 --- a/drivers/gpu/drm/amd/amdgpu/vce_v2_0.c +++ b/drivers/gpu/drm/amd/amdgpu/vce_v2_0.c @@ -626,6 +626,8 @@ static const struct amd_ip_funcs vce_v2_0_ip_funcs = { .soft_reset = vce_v2_0_soft_reset, .set_clockgating_state = vce_v2_0_set_clockgating_state, .set_powergating_state = vce_v2_0_set_powergating_state, + .dump_ip_state = NULL, + .print_ip_state = NULL, }; static const struct amdgpu_ring_funcs vce_v2_0_ring_funcs = { diff --git a/drivers/gpu/drm/amd/amdgpu/vce_v3_0.c b/drivers/gpu/drm/amd/amdgpu/vce_v3_0.c index f4760748d349..32517c364cf7 100644 --- a/drivers/gpu/drm/amd/amdgpu/vce_v3_0.c +++ b/drivers/gpu/drm/amd/amdgpu/vce_v3_0.c @@ -913,6 +913,8 @@ static const struct amd_ip_funcs vce_v3_0_ip_funcs = { .set_clockgating_state = vce_v3_0_set_clockgating_state, .set_powergating_state = vce_v3_0_set_powergating_state, .get_clockgating_state = vce_v3_0_get_clockgating_state, + .dump_ip_state = NULL, + .print_ip_state = NULL, }; static const struct amdgpu_ring_funcs vce_v3_0_ring_phys_funcs = { diff --git a/drivers/gpu/drm/amd/amdgpu/vcn_v1_0.c b/drivers/gpu/drm/amd/amdgpu/vcn_v1_0.c index 25ba27151ac0..cb253bd3a2a2 100644 --- a/drivers/gpu/drm/amd/amdgpu/vcn_v1_0.c +++ b/drivers/gpu/drm/amd/amdgpu/vcn_v1_0.c @@ -304,7 +304,7 @@ static int vcn_v1_0_resume(void *handle) */ static void vcn_v1_0_mc_resume_spg_mode(struct amdgpu_device *adev) { - uint32_t size = AMDGPU_GPU_PAGE_ALIGN(adev->vcn.fw->size + 4); + uint32_t size = AMDGPU_GPU_PAGE_ALIGN(adev->vcn.fw[0]->size + 4); uint32_t offset; /* cache window 0: fw */ @@ -371,7 +371,7 @@ static void vcn_v1_0_mc_resume_spg_mode(struct amdgpu_device *adev) static void vcn_v1_0_mc_resume_dpg_mode(struct amdgpu_device *adev) { - uint32_t size = AMDGPU_GPU_PAGE_ALIGN(adev->vcn.fw->size + 4); + uint32_t size = AMDGPU_GPU_PAGE_ALIGN(adev->vcn.fw[0]->size + 4); uint32_t offset; /* cache window 0: fw */ @@ -1902,6 +1902,8 @@ static const struct amd_ip_funcs vcn_v1_0_ip_funcs = { .post_soft_reset = NULL /* vcn_v1_0_post_soft_reset */, .set_clockgating_state = vcn_v1_0_set_clockgating_state, .set_powergating_state = vcn_v1_0_set_powergating_state, + .dump_ip_state = NULL, + .print_ip_state = NULL, }; /* diff --git a/drivers/gpu/drm/amd/amdgpu/vcn_v2_0.c b/drivers/gpu/drm/amd/amdgpu/vcn_v2_0.c index 18794394c5a0..f18fd61c435e 100644 --- a/drivers/gpu/drm/amd/amdgpu/vcn_v2_0.c +++ b/drivers/gpu/drm/amd/amdgpu/vcn_v2_0.c @@ -330,7 +330,7 @@ static int vcn_v2_0_resume(void *handle) */ static void vcn_v2_0_mc_resume(struct amdgpu_device *adev) { - uint32_t size = AMDGPU_GPU_PAGE_ALIGN(adev->vcn.fw->size + 4); + uint32_t size = AMDGPU_GPU_PAGE_ALIGN(adev->vcn.fw[0]->size + 4); uint32_t offset; if (amdgpu_sriov_vf(adev)) @@ -386,7 +386,7 @@ static void vcn_v2_0_mc_resume(struct amdgpu_device *adev) static void vcn_v2_0_mc_resume_dpg_mode(struct amdgpu_device *adev, bool indirect) { - uint32_t size = AMDGPU_GPU_PAGE_ALIGN(adev->vcn.fw->size + 4); + uint32_t size = AMDGPU_GPU_PAGE_ALIGN(adev->vcn.fw[0]->size + 4); uint32_t offset; /* cache window 0: fw */ @@ -1878,7 +1878,7 @@ static int vcn_v2_0_start_sriov(struct amdgpu_device *adev) init_table += header->vcn_table_offset; - size = AMDGPU_GPU_PAGE_ALIGN(adev->vcn.fw->size + 4); + size = AMDGPU_GPU_PAGE_ALIGN(adev->vcn.fw[0]->size + 4); MMSCH_V2_0_INSERT_DIRECT_RD_MOD_WT( SOC15_REG_OFFSET(UVD, i, mmUVD_STATUS), @@ -2008,6 +2008,8 @@ static const struct amd_ip_funcs vcn_v2_0_ip_funcs = { .post_soft_reset = NULL, .set_clockgating_state = vcn_v2_0_set_clockgating_state, .set_powergating_state = vcn_v2_0_set_powergating_state, + .dump_ip_state = NULL, + .print_ip_state = NULL, }; static const struct amdgpu_ring_funcs vcn_v2_0_dec_ring_vm_funcs = { diff --git a/drivers/gpu/drm/amd/amdgpu/vcn_v2_5.c b/drivers/gpu/drm/amd/amdgpu/vcn_v2_5.c index aba403d71806..baec14bde2a2 100644 --- a/drivers/gpu/drm/amd/amdgpu/vcn_v2_5.c +++ b/drivers/gpu/drm/amd/amdgpu/vcn_v2_5.c @@ -414,13 +414,15 @@ static int vcn_v2_5_resume(void *handle) */ static void vcn_v2_5_mc_resume(struct amdgpu_device *adev) { - uint32_t size = AMDGPU_GPU_PAGE_ALIGN(adev->vcn.fw->size + 4); + uint32_t size; uint32_t offset; int i; for (i = 0; i < adev->vcn.num_vcn_inst; ++i) { if (adev->vcn.harvest_config & (1 << i)) continue; + + size = AMDGPU_GPU_PAGE_ALIGN(adev->vcn.fw[i]->size + 4); /* cache window 0: fw */ if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) { WREG32_SOC15(VCN, i, mmUVD_LMI_VCPU_CACHE_64BIT_BAR_LOW, @@ -469,7 +471,7 @@ static void vcn_v2_5_mc_resume(struct amdgpu_device *adev) static void vcn_v2_5_mc_resume_dpg_mode(struct amdgpu_device *adev, int inst_idx, bool indirect) { - uint32_t size = AMDGPU_GPU_PAGE_ALIGN(adev->vcn.fw->size + 4); + uint32_t size = AMDGPU_GPU_PAGE_ALIGN(adev->vcn.fw[inst_idx]->size + 4); uint32_t offset; /* cache window 0: fw */ @@ -1240,7 +1242,7 @@ static int vcn_v2_5_sriov_start(struct amdgpu_device *adev) SOC15_REG_OFFSET(VCN, i, mmUVD_STATUS), ~UVD_STATUS__UVD_BUSY, UVD_STATUS__UVD_BUSY); - size = AMDGPU_GPU_PAGE_ALIGN(adev->vcn.fw->size + 4); + size = AMDGPU_GPU_PAGE_ALIGN(adev->vcn.fw[i]->size + 4); /* mc resume*/ if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) { MMSCH_V1_0_INSERT_DIRECT_WT( @@ -1899,6 +1901,8 @@ static const struct amd_ip_funcs vcn_v2_5_ip_funcs = { .post_soft_reset = NULL, .set_clockgating_state = vcn_v2_5_set_clockgating_state, .set_powergating_state = vcn_v2_5_set_powergating_state, + .dump_ip_state = NULL, + .print_ip_state = NULL, }; static const struct amd_ip_funcs vcn_v2_6_ip_funcs = { @@ -1919,6 +1923,8 @@ static const struct amd_ip_funcs vcn_v2_6_ip_funcs = { .post_soft_reset = NULL, .set_clockgating_state = vcn_v2_5_set_clockgating_state, .set_powergating_state = vcn_v2_5_set_powergating_state, + .dump_ip_state = NULL, + .print_ip_state = NULL, }; const struct amdgpu_ip_block_version vcn_v2_5_ip_block = diff --git a/drivers/gpu/drm/amd/amdgpu/vcn_v3_0.c b/drivers/gpu/drm/amd/amdgpu/vcn_v3_0.c index e02af4de521c..6b31cf4b8aac 100644 --- a/drivers/gpu/drm/amd/amdgpu/vcn_v3_0.c +++ b/drivers/gpu/drm/amd/amdgpu/vcn_v3_0.c @@ -359,6 +359,7 @@ static int vcn_v3_0_hw_init(void *handle) } } + return 0; done: if (!r) DRM_INFO("VCN decode and encode initialized successfully(under %s).\n", @@ -449,7 +450,7 @@ static int vcn_v3_0_resume(void *handle) */ static void vcn_v3_0_mc_resume(struct amdgpu_device *adev, int inst) { - uint32_t size = AMDGPU_GPU_PAGE_ALIGN(adev->vcn.fw->size + 4); + uint32_t size = AMDGPU_GPU_PAGE_ALIGN(adev->vcn.fw[inst]->size + 4); uint32_t offset; /* cache window 0: fw */ @@ -499,7 +500,7 @@ static void vcn_v3_0_mc_resume(struct amdgpu_device *adev, int inst) static void vcn_v3_0_mc_resume_dpg_mode(struct amdgpu_device *adev, int inst_idx, bool indirect) { - uint32_t size = AMDGPU_GPU_PAGE_ALIGN(adev->vcn.fw->size + 4); + uint32_t size = AMDGPU_GPU_PAGE_ALIGN(adev->vcn.fw[inst_idx]->size + 4); uint32_t offset; /* cache window 0: fw */ @@ -1332,7 +1333,7 @@ static int vcn_v3_0_start_sriov(struct amdgpu_device *adev) mmUVD_STATUS), ~UVD_STATUS__UVD_BUSY, UVD_STATUS__UVD_BUSY); - cache_size = AMDGPU_GPU_PAGE_ALIGN(adev->vcn.fw->size + 4); + cache_size = AMDGPU_GPU_PAGE_ALIGN(adev->vcn.fw[i]->size + 4); if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) { MMSCH_V3_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCN, i, @@ -2230,6 +2231,8 @@ static const struct amd_ip_funcs vcn_v3_0_ip_funcs = { .post_soft_reset = NULL, .set_clockgating_state = vcn_v3_0_set_clockgating_state, .set_powergating_state = vcn_v3_0_set_powergating_state, + .dump_ip_state = NULL, + .print_ip_state = NULL, }; const struct amdgpu_ip_block_version vcn_v3_0_ip_block = { diff --git a/drivers/gpu/drm/amd/amdgpu/vcn_v4_0.c b/drivers/gpu/drm/amd/amdgpu/vcn_v4_0.c index 8ab01ae919d2..ac1b8ead03b3 100644 --- a/drivers/gpu/drm/amd/amdgpu/vcn_v4_0.c +++ b/drivers/gpu/drm/amd/amdgpu/vcn_v4_0.c @@ -288,6 +288,7 @@ static int vcn_v4_0_hw_init(void *handle) } } + return 0; done: if (!r) DRM_INFO("VCN decode and encode initialized successfully(under %s).\n", @@ -382,7 +383,7 @@ static void vcn_v4_0_mc_resume(struct amdgpu_device *adev, int inst) uint32_t offset, size; const struct common_firmware_header *hdr; - hdr = (const struct common_firmware_header *)adev->vcn.fw->data; + hdr = (const struct common_firmware_header *)adev->vcn.fw[inst]->data; size = AMDGPU_GPU_PAGE_ALIGN(le32_to_cpu(hdr->ucode_size_bytes) + 8); /* cache window 0: fw */ @@ -442,7 +443,7 @@ static void vcn_v4_0_mc_resume_dpg_mode(struct amdgpu_device *adev, int inst_idx { uint32_t offset, size; const struct common_firmware_header *hdr; - hdr = (const struct common_firmware_header *)adev->vcn.fw->data; + hdr = (const struct common_firmware_header *)adev->vcn.fw[inst_idx]->data; size = AMDGPU_GPU_PAGE_ALIGN(le32_to_cpu(hdr->ucode_size_bytes) + 8); /* cache window 0: fw */ @@ -1289,7 +1290,7 @@ static int vcn_v4_0_start_sriov(struct amdgpu_device *adev) regUVD_STATUS), ~UVD_STATUS__UVD_BUSY, UVD_STATUS__UVD_BUSY); - cache_size = AMDGPU_GPU_PAGE_ALIGN(adev->vcn.fw->size + 4); + cache_size = AMDGPU_GPU_PAGE_ALIGN(adev->vcn.fw[i]->size + 4); if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) { MMSCH_V4_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCN, i, @@ -2130,6 +2131,8 @@ static const struct amd_ip_funcs vcn_v4_0_ip_funcs = { .post_soft_reset = NULL, .set_clockgating_state = vcn_v4_0_set_clockgating_state, .set_powergating_state = vcn_v4_0_set_powergating_state, + .dump_ip_state = NULL, + .print_ip_state = NULL, }; const struct amdgpu_ip_block_version vcn_v4_0_ip_block = { diff --git a/drivers/gpu/drm/amd/amdgpu/vcn_v4_0_3.c b/drivers/gpu/drm/amd/amdgpu/vcn_v4_0_3.c index 810bbfccd6f2..2279d8fce03d 100644 --- a/drivers/gpu/drm/amd/amdgpu/vcn_v4_0_3.c +++ b/drivers/gpu/drm/amd/amdgpu/vcn_v4_0_3.c @@ -332,7 +332,7 @@ static void vcn_v4_0_3_mc_resume(struct amdgpu_device *adev, int inst_idx) uint32_t offset, size, vcn_inst; const struct common_firmware_header *hdr; - hdr = (const struct common_firmware_header *)adev->vcn.fw->data; + hdr = (const struct common_firmware_header *)adev->vcn.fw[inst_idx]->data; size = AMDGPU_GPU_PAGE_ALIGN(le32_to_cpu(hdr->ucode_size_bytes) + 8); vcn_inst = GET_INST(VCN, inst_idx); @@ -407,7 +407,7 @@ static void vcn_v4_0_3_mc_resume_dpg_mode(struct amdgpu_device *adev, int inst_i uint32_t offset, size; const struct common_firmware_header *hdr; - hdr = (const struct common_firmware_header *)adev->vcn.fw->data; + hdr = (const struct common_firmware_header *)adev->vcn.fw[inst_idx]->data; size = AMDGPU_GPU_PAGE_ALIGN(le32_to_cpu(hdr->ucode_size_bytes) + 8); /* cache window 0: fw */ @@ -894,7 +894,7 @@ static int vcn_v4_0_3_start_sriov(struct amdgpu_device *adev) MMSCH_V4_0_INSERT_DIRECT_RD_MOD_WT(SOC15_REG_OFFSET(VCN, 0, regUVD_STATUS), ~UVD_STATUS__UVD_BUSY, UVD_STATUS__UVD_BUSY); - cache_size = AMDGPU_GPU_PAGE_ALIGN(adev->vcn.fw->size + 4); + cache_size = AMDGPU_GPU_PAGE_ALIGN(adev->vcn.fw[i]->size + 4); if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) { MMSCH_V4_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCN, 0, @@ -1660,6 +1660,8 @@ static const struct amd_ip_funcs vcn_v4_0_3_ip_funcs = { .post_soft_reset = NULL, .set_clockgating_state = vcn_v4_0_3_set_clockgating_state, .set_powergating_state = vcn_v4_0_3_set_powergating_state, + .dump_ip_state = NULL, + .print_ip_state = NULL, }; const struct amdgpu_ip_block_version vcn_v4_0_3_ip_block = { diff --git a/drivers/gpu/drm/amd/amdgpu/vcn_v4_0_5.c b/drivers/gpu/drm/amd/amdgpu/vcn_v4_0_5.c index 49e4c3c09aca..81fb99729f37 100644 --- a/drivers/gpu/drm/amd/amdgpu/vcn_v4_0_5.c +++ b/drivers/gpu/drm/amd/amdgpu/vcn_v4_0_5.c @@ -45,7 +45,7 @@ #define mmUVD_DPG_LMA_DATA_BASE_IDX regUVD_DPG_LMA_DATA_BASE_IDX #define VCN_VID_SOC_ADDRESS_2_0 0x1fb00 -#define VCN1_VID_SOC_ADDRESS_3_0 0x48300 +#define VCN1_VID_SOC_ADDRESS_3_0 (0x48300 + 0x38000) #define VCN_HARVEST_MMSCH 0 @@ -237,6 +237,7 @@ static int vcn_v4_0_5_hw_init(void *handle) goto done; } + return 0; done: if (!r) DRM_INFO("VCN decode and encode initialized successfully(under %s).\n", @@ -329,7 +330,7 @@ static void vcn_v4_0_5_mc_resume(struct amdgpu_device *adev, int inst) uint32_t offset, size; const struct common_firmware_header *hdr; - hdr = (const struct common_firmware_header *)adev->vcn.fw->data; + hdr = (const struct common_firmware_header *)adev->vcn.fw[inst]->data; size = AMDGPU_GPU_PAGE_ALIGN(le32_to_cpu(hdr->ucode_size_bytes) + 8); /* cache window 0: fw */ @@ -390,7 +391,7 @@ static void vcn_v4_0_5_mc_resume_dpg_mode(struct amdgpu_device *adev, int inst_i uint32_t offset, size; const struct common_firmware_header *hdr; - hdr = (const struct common_firmware_header *)adev->vcn.fw->data; + hdr = (const struct common_firmware_header *)adev->vcn.fw[inst_idx]->data; size = AMDGPU_GPU_PAGE_ALIGN(le32_to_cpu(hdr->ucode_size_bytes) + 8); /* cache window 0: fw */ @@ -486,7 +487,8 @@ static void vcn_v4_0_5_mc_resume_dpg_mode(struct amdgpu_device *adev, int inst_i /* VCN global tiling registers */ WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET( - VCN, 0, regUVD_GFX10_ADDR_CONFIG), adev->gfx.config.gb_addr_config, 0, indirect); + VCN, inst_idx, regUVD_GFX10_ADDR_CONFIG), + adev->gfx.config.gb_addr_config, 0, indirect); } /** @@ -911,7 +913,6 @@ static int vcn_v4_0_5_start_dpg_mode(struct amdgpu_device *adev, int inst_idx, b VCN, inst_idx, regUVD_MASTINT_EN), UVD_MASTINT_EN__VCPU_EN_MASK, 0, indirect); - if (indirect) amdgpu_vcn_psp_update_sram(adev, inst_idx, 0); @@ -1684,6 +1685,9 @@ static int vcn_v4_0_5_process_interrupt(struct amdgpu_device *adev, struct amdgp case SOC15_IH_CLIENTID_VCN: ip_instance = 0; break; + case SOC15_IH_CLIENTID_VCN1: + ip_instance = 1; + break; default: DRM_ERROR("Unhandled client id: %d\n", entry->client_id); return 0; @@ -1749,6 +1753,8 @@ static const struct amd_ip_funcs vcn_v4_0_5_ip_funcs = { .post_soft_reset = NULL, .set_clockgating_state = vcn_v4_0_5_set_clockgating_state, .set_powergating_state = vcn_v4_0_5_set_powergating_state, + .dump_ip_state = NULL, + .print_ip_state = NULL, }; const struct amdgpu_ip_block_version vcn_v4_0_5_ip_block = { diff --git a/drivers/gpu/drm/amd/amdgpu/vcn_v5_0_0.c b/drivers/gpu/drm/amd/amdgpu/vcn_v5_0_0.c new file mode 100644 index 000000000000..851975b5ce29 --- /dev/null +++ b/drivers/gpu/drm/amd/amdgpu/vcn_v5_0_0.c @@ -0,0 +1,1342 @@ +/* + * Copyright 2023 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + */ + +#include <linux/firmware.h> +#include "amdgpu.h" +#include "amdgpu_vcn.h" +#include "amdgpu_pm.h" +#include "soc15.h" +#include "soc15d.h" +#include "soc15_hw_ip.h" +#include "vcn_v2_0.h" + +#include "vcn/vcn_5_0_0_offset.h" +#include "vcn/vcn_5_0_0_sh_mask.h" +#include "ivsrcid/vcn/irqsrcs_vcn_4_0.h" +#include "vcn_v5_0_0.h" + +#include <drm/drm_drv.h> + +static int amdgpu_ih_clientid_vcns[] = { + SOC15_IH_CLIENTID_VCN, + SOC15_IH_CLIENTID_VCN1 +}; + +static void vcn_v5_0_0_set_unified_ring_funcs(struct amdgpu_device *adev); +static void vcn_v5_0_0_set_irq_funcs(struct amdgpu_device *adev); +static int vcn_v5_0_0_set_powergating_state(void *handle, + enum amd_powergating_state state); +static int vcn_v5_0_0_pause_dpg_mode(struct amdgpu_device *adev, + int inst_idx, struct dpg_pause_state *new_state); +static void vcn_v5_0_0_unified_ring_set_wptr(struct amdgpu_ring *ring); + +/** + * vcn_v5_0_0_early_init - set function pointers and load microcode + * + * @handle: amdgpu_device pointer + * + * Set ring and irq function pointers + * Load microcode from filesystem + */ +static int vcn_v5_0_0_early_init(void *handle) +{ + struct amdgpu_device *adev = (struct amdgpu_device *)handle; + + /* re-use enc ring as unified ring */ + adev->vcn.num_enc_rings = 1; + + vcn_v5_0_0_set_unified_ring_funcs(adev); + vcn_v5_0_0_set_irq_funcs(adev); + + return amdgpu_vcn_early_init(adev); +} + +/** + * vcn_v5_0_0_sw_init - sw init for VCN block + * + * @handle: amdgpu_device pointer + * + * Load firmware and sw initialization + */ +static int vcn_v5_0_0_sw_init(void *handle) +{ + struct amdgpu_ring *ring; + struct amdgpu_device *adev = (struct amdgpu_device *)handle; + int i, r; + + r = amdgpu_vcn_sw_init(adev); + if (r) + return r; + + amdgpu_vcn_setup_ucode(adev); + + r = amdgpu_vcn_resume(adev); + if (r) + return r; + + for (i = 0; i < adev->vcn.num_vcn_inst; i++) { + volatile struct amdgpu_vcn5_fw_shared *fw_shared; + + if (adev->vcn.harvest_config & (1 << i)) + continue; + + atomic_set(&adev->vcn.inst[i].sched_score, 0); + + /* VCN UNIFIED TRAP */ + r = amdgpu_irq_add_id(adev, amdgpu_ih_clientid_vcns[i], + VCN_4_0__SRCID__UVD_ENC_GENERAL_PURPOSE, &adev->vcn.inst[i].irq); + if (r) + return r; + + /* VCN POISON TRAP */ + r = amdgpu_irq_add_id(adev, amdgpu_ih_clientid_vcns[i], + VCN_4_0__SRCID_UVD_POISON, &adev->vcn.inst[i].irq); + if (r) + return r; + + ring = &adev->vcn.inst[i].ring_enc[0]; + ring->use_doorbell = true; + ring->doorbell_index = (adev->doorbell_index.vcn.vcn_ring0_1 << 1) + 2 + 8 * i; + + ring->vm_hub = AMDGPU_MMHUB0(0); + sprintf(ring->name, "vcn_unified_%d", i); + + r = amdgpu_ring_init(adev, ring, 512, &adev->vcn.inst[i].irq, 0, + AMDGPU_RING_PRIO_0, &adev->vcn.inst[i].sched_score); + if (r) + return r; + + fw_shared = adev->vcn.inst[i].fw_shared.cpu_addr; + fw_shared->present_flag_0 = cpu_to_le32(AMDGPU_FW_SHARED_FLAG_0_UNIFIED_QUEUE); + fw_shared->sq.is_enabled = 1; + + if (amdgpu_vcnfw_log) + amdgpu_vcn_fwlog_init(&adev->vcn.inst[i]); + } + + if (adev->pg_flags & AMD_PG_SUPPORT_VCN_DPG) + adev->vcn.pause_dpg_mode = vcn_v5_0_0_pause_dpg_mode; + + return 0; +} + +/** + * vcn_v5_0_0_sw_fini - sw fini for VCN block + * + * @handle: amdgpu_device pointer + * + * VCN suspend and free up sw allocation + */ +static int vcn_v5_0_0_sw_fini(void *handle) +{ + struct amdgpu_device *adev = (struct amdgpu_device *)handle; + int i, r, idx; + + if (drm_dev_enter(adev_to_drm(adev), &idx)) { + for (i = 0; i < adev->vcn.num_vcn_inst; i++) { + volatile struct amdgpu_vcn5_fw_shared *fw_shared; + + if (adev->vcn.harvest_config & (1 << i)) + continue; + + fw_shared = adev->vcn.inst[i].fw_shared.cpu_addr; + fw_shared->present_flag_0 = 0; + fw_shared->sq.is_enabled = 0; + } + + drm_dev_exit(idx); + } + + r = amdgpu_vcn_suspend(adev); + if (r) + return r; + + r = amdgpu_vcn_sw_fini(adev); + + return r; +} + +/** + * vcn_v5_0_0_hw_init - start and test VCN block + * + * @handle: amdgpu_device pointer + * + * Initialize the hardware, boot up the VCPU and do some testing + */ +static int vcn_v5_0_0_hw_init(void *handle) +{ + struct amdgpu_device *adev = (struct amdgpu_device *)handle; + struct amdgpu_ring *ring; + int i, r; + + for (i = 0; i < adev->vcn.num_vcn_inst; ++i) { + if (adev->vcn.harvest_config & (1 << i)) + continue; + + ring = &adev->vcn.inst[i].ring_enc[0]; + + adev->nbio.funcs->vcn_doorbell_range(adev, ring->use_doorbell, + ((adev->doorbell_index.vcn.vcn_ring0_1 << 1) + 8 * i), i); + + r = amdgpu_ring_test_helper(ring); + if (r) + goto done; + } + + return 0; +done: + if (!r) + DRM_INFO("VCN decode and encode initialized successfully(under %s).\n", + (adev->pg_flags & AMD_PG_SUPPORT_VCN_DPG)?"DPG Mode":"SPG Mode"); + + return r; +} + +/** + * vcn_v5_0_0_hw_fini - stop the hardware block + * + * @handle: amdgpu_device pointer + * + * Stop the VCN block, mark ring as not ready any more + */ +static int vcn_v5_0_0_hw_fini(void *handle) +{ + struct amdgpu_device *adev = (struct amdgpu_device *)handle; + int i; + + cancel_delayed_work_sync(&adev->vcn.idle_work); + + for (i = 0; i < adev->vcn.num_vcn_inst; ++i) { + if (adev->vcn.harvest_config & (1 << i)) + continue; + + amdgpu_irq_put(adev, &adev->vcn.inst[i].irq, 0); + } + + return 0; +} + +/** + * vcn_v5_0_0_suspend - suspend VCN block + * + * @handle: amdgpu_device pointer + * + * HW fini and suspend VCN block + */ +static int vcn_v5_0_0_suspend(void *handle) +{ + int r; + struct amdgpu_device *adev = (struct amdgpu_device *)handle; + + r = vcn_v5_0_0_hw_fini(adev); + if (r) + return r; + + r = amdgpu_vcn_suspend(adev); + + return r; +} + +/** + * vcn_v5_0_0_resume - resume VCN block + * + * @handle: amdgpu_device pointer + * + * Resume firmware and hw init VCN block + */ +static int vcn_v5_0_0_resume(void *handle) +{ + int r; + struct amdgpu_device *adev = (struct amdgpu_device *)handle; + + r = amdgpu_vcn_resume(adev); + if (r) + return r; + + r = vcn_v5_0_0_hw_init(adev); + + return r; +} + +/** + * vcn_v5_0_0_mc_resume - memory controller programming + * + * @adev: amdgpu_device pointer + * @inst: instance number + * + * Let the VCN memory controller know it's offsets + */ +static void vcn_v5_0_0_mc_resume(struct amdgpu_device *adev, int inst) +{ + uint32_t offset, size; + const struct common_firmware_header *hdr; + + hdr = (const struct common_firmware_header *)adev->vcn.fw[inst]->data; + size = AMDGPU_GPU_PAGE_ALIGN(le32_to_cpu(hdr->ucode_size_bytes) + 8); + + /* cache window 0: fw */ + if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) { + WREG32_SOC15(VCN, inst, regUVD_LMI_VCPU_CACHE_64BIT_BAR_LOW, + (adev->firmware.ucode[AMDGPU_UCODE_ID_VCN + inst].tmr_mc_addr_lo)); + WREG32_SOC15(VCN, inst, regUVD_LMI_VCPU_CACHE_64BIT_BAR_HIGH, + (adev->firmware.ucode[AMDGPU_UCODE_ID_VCN + inst].tmr_mc_addr_hi)); + WREG32_SOC15(VCN, inst, regUVD_VCPU_CACHE_OFFSET0, 0); + offset = 0; + } else { + WREG32_SOC15(VCN, inst, regUVD_LMI_VCPU_CACHE_64BIT_BAR_LOW, + lower_32_bits(adev->vcn.inst[inst].gpu_addr)); + WREG32_SOC15(VCN, inst, regUVD_LMI_VCPU_CACHE_64BIT_BAR_HIGH, + upper_32_bits(adev->vcn.inst[inst].gpu_addr)); + offset = size; + WREG32_SOC15(VCN, inst, regUVD_VCPU_CACHE_OFFSET0, AMDGPU_UVD_FIRMWARE_OFFSET >> 3); + } + WREG32_SOC15(VCN, inst, regUVD_VCPU_CACHE_SIZE0, size); + + /* cache window 1: stack */ + WREG32_SOC15(VCN, inst, regUVD_LMI_VCPU_CACHE1_64BIT_BAR_LOW, + lower_32_bits(adev->vcn.inst[inst].gpu_addr + offset)); + WREG32_SOC15(VCN, inst, regUVD_LMI_VCPU_CACHE1_64BIT_BAR_HIGH, + upper_32_bits(adev->vcn.inst[inst].gpu_addr + offset)); + WREG32_SOC15(VCN, inst, regUVD_VCPU_CACHE_OFFSET1, 0); + WREG32_SOC15(VCN, inst, regUVD_VCPU_CACHE_SIZE1, AMDGPU_VCN_STACK_SIZE); + + /* cache window 2: context */ + WREG32_SOC15(VCN, inst, regUVD_LMI_VCPU_CACHE2_64BIT_BAR_LOW, + lower_32_bits(adev->vcn.inst[inst].gpu_addr + offset + AMDGPU_VCN_STACK_SIZE)); + WREG32_SOC15(VCN, inst, regUVD_LMI_VCPU_CACHE2_64BIT_BAR_HIGH, + upper_32_bits(adev->vcn.inst[inst].gpu_addr + offset + AMDGPU_VCN_STACK_SIZE)); + WREG32_SOC15(VCN, inst, regUVD_VCPU_CACHE_OFFSET2, 0); + WREG32_SOC15(VCN, inst, regUVD_VCPU_CACHE_SIZE2, AMDGPU_VCN_CONTEXT_SIZE); + + /* non-cache window */ + WREG32_SOC15(VCN, inst, regUVD_LMI_VCPU_NC0_64BIT_BAR_LOW, + lower_32_bits(adev->vcn.inst[inst].fw_shared.gpu_addr)); + WREG32_SOC15(VCN, inst, regUVD_LMI_VCPU_NC0_64BIT_BAR_HIGH, + upper_32_bits(adev->vcn.inst[inst].fw_shared.gpu_addr)); + WREG32_SOC15(VCN, inst, regUVD_VCPU_NONCACHE_OFFSET0, 0); + WREG32_SOC15(VCN, inst, regUVD_VCPU_NONCACHE_SIZE0, + AMDGPU_GPU_PAGE_ALIGN(sizeof(struct amdgpu_vcn5_fw_shared))); +} + +/** + * vcn_v5_0_0_mc_resume_dpg_mode - memory controller programming for dpg mode + * + * @adev: amdgpu_device pointer + * @inst_idx: instance number index + * @indirect: indirectly write sram + * + * Let the VCN memory controller know it's offsets with dpg mode + */ +static void vcn_v5_0_0_mc_resume_dpg_mode(struct amdgpu_device *adev, int inst_idx, bool indirect) +{ + uint32_t offset, size; + const struct common_firmware_header *hdr; + + hdr = (const struct common_firmware_header *)adev->vcn.fw[inst_idx]->data; + size = AMDGPU_GPU_PAGE_ALIGN(le32_to_cpu(hdr->ucode_size_bytes) + 8); + + /* cache window 0: fw */ + if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) { + if (!indirect) { + WREG32_SOC24_DPG_MODE(inst_idx, SOC24_DPG_MODE_OFFSET( + VCN, inst_idx, regUVD_LMI_VCPU_CACHE_64BIT_BAR_LOW), + (adev->firmware.ucode[AMDGPU_UCODE_ID_VCN + inst_idx].tmr_mc_addr_lo), 0, indirect); + WREG32_SOC24_DPG_MODE(inst_idx, SOC24_DPG_MODE_OFFSET( + VCN, inst_idx, regUVD_LMI_VCPU_CACHE_64BIT_BAR_HIGH), + (adev->firmware.ucode[AMDGPU_UCODE_ID_VCN + inst_idx].tmr_mc_addr_hi), 0, indirect); + WREG32_SOC24_DPG_MODE(inst_idx, SOC24_DPG_MODE_OFFSET( + VCN, inst_idx, regUVD_VCPU_CACHE_OFFSET0), 0, 0, indirect); + } else { + WREG32_SOC24_DPG_MODE(inst_idx, SOC24_DPG_MODE_OFFSET( + VCN, inst_idx, regUVD_LMI_VCPU_CACHE_64BIT_BAR_LOW), 0, 0, indirect); + WREG32_SOC24_DPG_MODE(inst_idx, SOC24_DPG_MODE_OFFSET( + VCN, inst_idx, regUVD_LMI_VCPU_CACHE_64BIT_BAR_HIGH), 0, 0, indirect); + WREG32_SOC24_DPG_MODE(inst_idx, SOC24_DPG_MODE_OFFSET( + VCN, inst_idx, regUVD_VCPU_CACHE_OFFSET0), 0, 0, indirect); + } + offset = 0; + } else { + WREG32_SOC24_DPG_MODE(inst_idx, SOC24_DPG_MODE_OFFSET( + VCN, inst_idx, regUVD_LMI_VCPU_CACHE_64BIT_BAR_LOW), + lower_32_bits(adev->vcn.inst[inst_idx].gpu_addr), 0, indirect); + WREG32_SOC24_DPG_MODE(inst_idx, SOC24_DPG_MODE_OFFSET( + VCN, inst_idx, regUVD_LMI_VCPU_CACHE_64BIT_BAR_HIGH), + upper_32_bits(adev->vcn.inst[inst_idx].gpu_addr), 0, indirect); + offset = size; + WREG32_SOC24_DPG_MODE(inst_idx, SOC24_DPG_MODE_OFFSET( + VCN, inst_idx, regUVD_VCPU_CACHE_OFFSET0), + AMDGPU_UVD_FIRMWARE_OFFSET >> 3, 0, indirect); + } + + if (!indirect) + WREG32_SOC24_DPG_MODE(inst_idx, SOC24_DPG_MODE_OFFSET( + VCN, inst_idx, regUVD_VCPU_CACHE_SIZE0), size, 0, indirect); + else + WREG32_SOC24_DPG_MODE(inst_idx, SOC24_DPG_MODE_OFFSET( + VCN, inst_idx, regUVD_VCPU_CACHE_SIZE0), 0, 0, indirect); + + /* cache window 1: stack */ + if (!indirect) { + WREG32_SOC24_DPG_MODE(inst_idx, SOC24_DPG_MODE_OFFSET( + VCN, inst_idx, regUVD_LMI_VCPU_CACHE1_64BIT_BAR_LOW), + lower_32_bits(adev->vcn.inst[inst_idx].gpu_addr + offset), 0, indirect); + WREG32_SOC24_DPG_MODE(inst_idx, SOC24_DPG_MODE_OFFSET( + VCN, inst_idx, regUVD_LMI_VCPU_CACHE1_64BIT_BAR_HIGH), + upper_32_bits(adev->vcn.inst[inst_idx].gpu_addr + offset), 0, indirect); + WREG32_SOC24_DPG_MODE(inst_idx, SOC24_DPG_MODE_OFFSET( + VCN, inst_idx, regUVD_VCPU_CACHE_OFFSET1), 0, 0, indirect); + } else { + WREG32_SOC24_DPG_MODE(inst_idx, SOC24_DPG_MODE_OFFSET( + VCN, inst_idx, regUVD_LMI_VCPU_CACHE1_64BIT_BAR_LOW), 0, 0, indirect); + WREG32_SOC24_DPG_MODE(inst_idx, SOC24_DPG_MODE_OFFSET( + VCN, inst_idx, regUVD_LMI_VCPU_CACHE1_64BIT_BAR_HIGH), 0, 0, indirect); + WREG32_SOC24_DPG_MODE(inst_idx, SOC24_DPG_MODE_OFFSET( + VCN, inst_idx, regUVD_VCPU_CACHE_OFFSET1), 0, 0, indirect); + } + WREG32_SOC24_DPG_MODE(inst_idx, SOC24_DPG_MODE_OFFSET( + VCN, inst_idx, regUVD_VCPU_CACHE_SIZE1), AMDGPU_VCN_STACK_SIZE, 0, indirect); + + /* cache window 2: context */ + WREG32_SOC24_DPG_MODE(inst_idx, SOC24_DPG_MODE_OFFSET( + VCN, inst_idx, regUVD_LMI_VCPU_CACHE2_64BIT_BAR_LOW), + lower_32_bits(adev->vcn.inst[inst_idx].gpu_addr + offset + AMDGPU_VCN_STACK_SIZE), 0, indirect); + WREG32_SOC24_DPG_MODE(inst_idx, SOC24_DPG_MODE_OFFSET( + VCN, inst_idx, regUVD_LMI_VCPU_CACHE2_64BIT_BAR_HIGH), + upper_32_bits(adev->vcn.inst[inst_idx].gpu_addr + offset + AMDGPU_VCN_STACK_SIZE), 0, indirect); + WREG32_SOC24_DPG_MODE(inst_idx, SOC24_DPG_MODE_OFFSET( + VCN, inst_idx, regUVD_VCPU_CACHE_OFFSET2), 0, 0, indirect); + WREG32_SOC24_DPG_MODE(inst_idx, SOC24_DPG_MODE_OFFSET( + VCN, inst_idx, regUVD_VCPU_CACHE_SIZE2), AMDGPU_VCN_CONTEXT_SIZE, 0, indirect); + + /* non-cache window */ + WREG32_SOC24_DPG_MODE(inst_idx, SOC24_DPG_MODE_OFFSET( + VCN, inst_idx, regUVD_LMI_VCPU_NC0_64BIT_BAR_LOW), + lower_32_bits(adev->vcn.inst[inst_idx].fw_shared.gpu_addr), 0, indirect); + WREG32_SOC24_DPG_MODE(inst_idx, SOC24_DPG_MODE_OFFSET( + VCN, inst_idx, regUVD_LMI_VCPU_NC0_64BIT_BAR_HIGH), + upper_32_bits(adev->vcn.inst[inst_idx].fw_shared.gpu_addr), 0, indirect); + WREG32_SOC24_DPG_MODE(inst_idx, SOC24_DPG_MODE_OFFSET( + VCN, inst_idx, regUVD_VCPU_NONCACHE_OFFSET0), 0, 0, indirect); + WREG32_SOC24_DPG_MODE(inst_idx, SOC24_DPG_MODE_OFFSET( + VCN, inst_idx, regUVD_VCPU_NONCACHE_SIZE0), + AMDGPU_GPU_PAGE_ALIGN(sizeof(struct amdgpu_vcn5_fw_shared)), 0, indirect); + + /* VCN global tiling registers */ + WREG32_SOC24_DPG_MODE(inst_idx, SOC24_DPG_MODE_OFFSET( + VCN, 0, regUVD_GFX10_ADDR_CONFIG), adev->gfx.config.gb_addr_config, 0, indirect); + + return; +} + +/** + * vcn_v5_0_0_disable_static_power_gating - disable VCN static power gating + * + * @adev: amdgpu_device pointer + * @inst: instance number + * + * Disable static power gating for VCN block + */ +static void vcn_v5_0_0_disable_static_power_gating(struct amdgpu_device *adev, int inst) +{ + uint32_t data = 0; + + if (adev->pg_flags & AMD_PG_SUPPORT_VCN) { + data = 1 << UVD_IPX_DLDO_CONFIG__ONO2_PWR_CONFIG__SHIFT; + WREG32_SOC15(VCN, inst, regUVD_IPX_DLDO_CONFIG, data); + SOC15_WAIT_ON_RREG(VCN, inst, regUVD_IPX_DLDO_STATUS, 0, + UVD_IPX_DLDO_STATUS__ONO2_PWR_STATUS_MASK); + + data = 2 << UVD_IPX_DLDO_CONFIG__ONO3_PWR_CONFIG__SHIFT; + WREG32_SOC15(VCN, inst, regUVD_IPX_DLDO_CONFIG, data); + SOC15_WAIT_ON_RREG(VCN, inst, regUVD_IPX_DLDO_STATUS, + 1 << UVD_IPX_DLDO_STATUS__ONO3_PWR_STATUS__SHIFT, + UVD_IPX_DLDO_STATUS__ONO3_PWR_STATUS_MASK); + + data = 2 << UVD_IPX_DLDO_CONFIG__ONO4_PWR_CONFIG__SHIFT; + WREG32_SOC15(VCN, inst, regUVD_IPX_DLDO_CONFIG, data); + SOC15_WAIT_ON_RREG(VCN, inst, regUVD_IPX_DLDO_STATUS, + 1 << UVD_IPX_DLDO_STATUS__ONO4_PWR_STATUS__SHIFT, + UVD_IPX_DLDO_STATUS__ONO4_PWR_STATUS_MASK); + + data = 2 << UVD_IPX_DLDO_CONFIG__ONO5_PWR_CONFIG__SHIFT; + WREG32_SOC15(VCN, inst, regUVD_IPX_DLDO_CONFIG, data); + SOC15_WAIT_ON_RREG(VCN, inst, regUVD_IPX_DLDO_STATUS, + 1 << UVD_IPX_DLDO_STATUS__ONO5_PWR_STATUS__SHIFT, + UVD_IPX_DLDO_STATUS__ONO5_PWR_STATUS_MASK); + } else { + data = 1 << UVD_IPX_DLDO_CONFIG__ONO2_PWR_CONFIG__SHIFT; + WREG32_SOC15(VCN, inst, regUVD_IPX_DLDO_CONFIG, data); + SOC15_WAIT_ON_RREG(VCN, inst, regUVD_IPX_DLDO_STATUS, 0, + UVD_IPX_DLDO_STATUS__ONO2_PWR_STATUS_MASK); + + data = 1 << UVD_IPX_DLDO_CONFIG__ONO3_PWR_CONFIG__SHIFT; + WREG32_SOC15(VCN, inst, regUVD_IPX_DLDO_CONFIG, data); + SOC15_WAIT_ON_RREG(VCN, inst, regUVD_IPX_DLDO_STATUS, 0, + UVD_IPX_DLDO_STATUS__ONO3_PWR_STATUS_MASK); + + data = 1 << UVD_IPX_DLDO_CONFIG__ONO4_PWR_CONFIG__SHIFT; + WREG32_SOC15(VCN, inst, regUVD_IPX_DLDO_CONFIG, data); + SOC15_WAIT_ON_RREG(VCN, inst, regUVD_IPX_DLDO_STATUS, 0, + UVD_IPX_DLDO_STATUS__ONO4_PWR_STATUS_MASK); + + data = 1 << UVD_IPX_DLDO_CONFIG__ONO5_PWR_CONFIG__SHIFT; + WREG32_SOC15(VCN, inst, regUVD_IPX_DLDO_CONFIG, data); + SOC15_WAIT_ON_RREG(VCN, inst, regUVD_IPX_DLDO_STATUS, 0, + UVD_IPX_DLDO_STATUS__ONO5_PWR_STATUS_MASK); + } + + data = RREG32_SOC15(VCN, inst, regUVD_POWER_STATUS); + data &= ~0x103; + if (adev->pg_flags & AMD_PG_SUPPORT_VCN) + data |= UVD_PGFSM_CONFIG__UVDM_UVDU_PWR_ON | + UVD_POWER_STATUS__UVD_PG_EN_MASK; + + WREG32_SOC15(VCN, inst, regUVD_POWER_STATUS, data); + return; +} + +/** + * vcn_v5_0_0_enable_static_power_gating - enable VCN static power gating + * + * @adev: amdgpu_device pointer + * @inst: instance number + * + * Enable static power gating for VCN block + */ +static void vcn_v5_0_0_enable_static_power_gating(struct amdgpu_device *adev, int inst) +{ + uint32_t data; + + if (adev->pg_flags & AMD_PG_SUPPORT_VCN) { + /* Before power off, this indicator has to be turned on */ + data = RREG32_SOC15(VCN, inst, regUVD_POWER_STATUS); + data &= ~UVD_POWER_STATUS__UVD_POWER_STATUS_MASK; + data |= UVD_POWER_STATUS__UVD_POWER_STATUS_TILES_OFF; + WREG32_SOC15(VCN, inst, regUVD_POWER_STATUS, data); + + data = 2 << UVD_IPX_DLDO_CONFIG__ONO5_PWR_CONFIG__SHIFT; + WREG32_SOC15(VCN, inst, regUVD_IPX_DLDO_CONFIG, data); + SOC15_WAIT_ON_RREG(VCN, inst, regUVD_IPX_DLDO_STATUS, + 1 << UVD_IPX_DLDO_STATUS__ONO5_PWR_STATUS__SHIFT, + UVD_IPX_DLDO_STATUS__ONO5_PWR_STATUS_MASK); + + data = 2 << UVD_IPX_DLDO_CONFIG__ONO4_PWR_CONFIG__SHIFT; + WREG32_SOC15(VCN, inst, regUVD_IPX_DLDO_CONFIG, data); + SOC15_WAIT_ON_RREG(VCN, inst, regUVD_IPX_DLDO_STATUS, + 1 << UVD_IPX_DLDO_STATUS__ONO4_PWR_STATUS__SHIFT, + UVD_IPX_DLDO_STATUS__ONO4_PWR_STATUS_MASK); + + data = 2 << UVD_IPX_DLDO_CONFIG__ONO3_PWR_CONFIG__SHIFT; + WREG32_SOC15(VCN, inst, regUVD_IPX_DLDO_CONFIG, data); + SOC15_WAIT_ON_RREG(VCN, inst, regUVD_IPX_DLDO_STATUS, + 1 << UVD_IPX_DLDO_STATUS__ONO3_PWR_STATUS__SHIFT, + UVD_IPX_DLDO_STATUS__ONO3_PWR_STATUS_MASK); + + data = 2 << UVD_IPX_DLDO_CONFIG__ONO2_PWR_CONFIG__SHIFT; + WREG32_SOC15(VCN, inst, regUVD_IPX_DLDO_CONFIG, data); + SOC15_WAIT_ON_RREG(VCN, inst, regUVD_IPX_DLDO_STATUS, + 1 << UVD_IPX_DLDO_STATUS__ONO2_PWR_STATUS__SHIFT, + UVD_IPX_DLDO_STATUS__ONO2_PWR_STATUS_MASK); + } + return; +} + +/** + * vcn_v5_0_0_disable_clock_gating - disable VCN clock gating + * + * @adev: amdgpu_device pointer + * @inst: instance number + * + * Disable clock gating for VCN block + */ +static void vcn_v5_0_0_disable_clock_gating(struct amdgpu_device *adev, int inst) +{ + return; +} + +#if 0 +/** + * vcn_v5_0_0_disable_clock_gating_dpg_mode - disable VCN clock gating dpg mode + * + * @adev: amdgpu_device pointer + * @sram_sel: sram select + * @inst_idx: instance number index + * @indirect: indirectly write sram + * + * Disable clock gating for VCN block with dpg mode + */ +static void vcn_v5_0_0_disable_clock_gating_dpg_mode(struct amdgpu_device *adev, uint8_t sram_sel, + int inst_idx, uint8_t indirect) +{ + return; +} +#endif + +/** + * vcn_v5_0_0_enable_clock_gating - enable VCN clock gating + * + * @adev: amdgpu_device pointer + * @inst: instance number + * + * Enable clock gating for VCN block + */ +static void vcn_v5_0_0_enable_clock_gating(struct amdgpu_device *adev, int inst) +{ + return; +} + +/** + * vcn_v5_0_0_start_dpg_mode - VCN start with dpg mode + * + * @adev: amdgpu_device pointer + * @inst_idx: instance number index + * @indirect: indirectly write sram + * + * Start VCN block with dpg mode + */ +static int vcn_v5_0_0_start_dpg_mode(struct amdgpu_device *adev, int inst_idx, bool indirect) +{ + volatile struct amdgpu_vcn5_fw_shared *fw_shared = adev->vcn.inst[inst_idx].fw_shared.cpu_addr; + struct amdgpu_ring *ring; + uint32_t tmp; + + /* disable register anti-hang mechanism */ + WREG32_P(SOC15_REG_OFFSET(VCN, inst_idx, regUVD_POWER_STATUS), 1, + ~UVD_POWER_STATUS__UVD_POWER_STATUS_MASK); + + /* enable dynamic power gating mode */ + tmp = RREG32_SOC15(VCN, inst_idx, regUVD_POWER_STATUS); + tmp |= UVD_POWER_STATUS__UVD_PG_MODE_MASK; + tmp |= UVD_POWER_STATUS__UVD_PG_EN_MASK; + WREG32_SOC15(VCN, inst_idx, regUVD_POWER_STATUS, tmp); + + if (indirect) + adev->vcn.inst[inst_idx].dpg_sram_curr_addr = (uint32_t *)adev->vcn.inst[inst_idx].dpg_sram_cpu_addr; + + /* enable VCPU clock */ + tmp = (0xFF << UVD_VCPU_CNTL__PRB_TIMEOUT_VAL__SHIFT); + tmp |= UVD_VCPU_CNTL__CLK_EN_MASK | UVD_VCPU_CNTL__BLK_RST_MASK; + WREG32_SOC24_DPG_MODE(inst_idx, SOC24_DPG_MODE_OFFSET( + VCN, inst_idx, regUVD_VCPU_CNTL), tmp, 0, indirect); + + /* disable master interrupt */ + WREG32_SOC24_DPG_MODE(inst_idx, SOC24_DPG_MODE_OFFSET( + VCN, inst_idx, regUVD_MASTINT_EN), 0, 0, indirect); + + /* setup regUVD_LMI_CTRL */ + tmp = (UVD_LMI_CTRL__WRITE_CLEAN_TIMER_EN_MASK | + UVD_LMI_CTRL__REQ_MODE_MASK | + UVD_LMI_CTRL__CRC_RESET_MASK | + UVD_LMI_CTRL__MASK_MC_URGENT_MASK | + UVD_LMI_CTRL__DATA_COHERENCY_EN_MASK | + UVD_LMI_CTRL__VCPU_DATA_COHERENCY_EN_MASK | + (8 << UVD_LMI_CTRL__WRITE_CLEAN_TIMER__SHIFT) | + 0x00100000L); + WREG32_SOC24_DPG_MODE(inst_idx, SOC24_DPG_MODE_OFFSET( + VCN, inst_idx, regUVD_LMI_CTRL), tmp, 0, indirect); + + vcn_v5_0_0_mc_resume_dpg_mode(adev, inst_idx, indirect); + + tmp = (0xFF << UVD_VCPU_CNTL__PRB_TIMEOUT_VAL__SHIFT); + tmp |= UVD_VCPU_CNTL__CLK_EN_MASK; + WREG32_SOC24_DPG_MODE(inst_idx, SOC24_DPG_MODE_OFFSET( + VCN, inst_idx, regUVD_VCPU_CNTL), tmp, 0, indirect); + + /* enable LMI MC and UMC channels */ + tmp = 0x1f << UVD_LMI_CTRL2__RE_OFLD_MIF_WR_REQ_NUM__SHIFT; + WREG32_SOC24_DPG_MODE(inst_idx, SOC24_DPG_MODE_OFFSET( + VCN, inst_idx, regUVD_LMI_CTRL2), tmp, 0, indirect); + + /* enable master interrupt */ + WREG32_SOC24_DPG_MODE(inst_idx, SOC24_DPG_MODE_OFFSET( + VCN, inst_idx, regUVD_MASTINT_EN), + UVD_MASTINT_EN__VCPU_EN_MASK, 0, indirect); + + if (indirect) + amdgpu_vcn_psp_update_sram(adev, inst_idx, 0); + + ring = &adev->vcn.inst[inst_idx].ring_enc[0]; + + WREG32_SOC15(VCN, inst_idx, regUVD_RB_BASE_LO, ring->gpu_addr); + WREG32_SOC15(VCN, inst_idx, regUVD_RB_BASE_HI, upper_32_bits(ring->gpu_addr)); + WREG32_SOC15(VCN, inst_idx, regUVD_RB_SIZE, ring->ring_size / 4); + + tmp = RREG32_SOC15(VCN, inst_idx, regVCN_RB_ENABLE); + tmp &= ~(VCN_RB_ENABLE__RB1_EN_MASK); + WREG32_SOC15(VCN, inst_idx, regVCN_RB_ENABLE, tmp); + fw_shared->sq.queue_mode |= FW_QUEUE_RING_RESET; + WREG32_SOC15(VCN, inst_idx, regUVD_RB_RPTR, 0); + WREG32_SOC15(VCN, inst_idx, regUVD_RB_WPTR, 0); + + tmp = RREG32_SOC15(VCN, inst_idx, regUVD_RB_RPTR); + WREG32_SOC15(VCN, inst_idx, regUVD_RB_WPTR, tmp); + ring->wptr = RREG32_SOC15(VCN, inst_idx, regUVD_RB_WPTR); + + tmp = RREG32_SOC15(VCN, inst_idx, regVCN_RB_ENABLE); + tmp |= VCN_RB_ENABLE__RB1_EN_MASK; + WREG32_SOC15(VCN, inst_idx, regVCN_RB_ENABLE, tmp); + fw_shared->sq.queue_mode &= ~(FW_QUEUE_RING_RESET | FW_QUEUE_DPG_HOLD_OFF); + + WREG32_SOC15(VCN, inst_idx, regVCN_RB1_DB_CTRL, + ring->doorbell_index << VCN_RB1_DB_CTRL__OFFSET__SHIFT | + VCN_RB1_DB_CTRL__EN_MASK); + + return 0; +} + +/** + * vcn_v5_0_0_start - VCN start + * + * @adev: amdgpu_device pointer + * + * Start VCN block + */ +static int vcn_v5_0_0_start(struct amdgpu_device *adev) +{ + volatile struct amdgpu_vcn5_fw_shared *fw_shared; + struct amdgpu_ring *ring; + uint32_t tmp; + int i, j, k, r; + + if (adev->pm.dpm_enabled) + amdgpu_dpm_enable_uvd(adev, true); + + for (i = 0; i < adev->vcn.num_vcn_inst; ++i) { + fw_shared = adev->vcn.inst[i].fw_shared.cpu_addr; + + if (adev->pg_flags & AMD_PG_SUPPORT_VCN_DPG) { + r = vcn_v5_0_0_start_dpg_mode(adev, i, adev->vcn.indirect_sram); + continue; + } + + /* disable VCN power gating */ + vcn_v5_0_0_disable_static_power_gating(adev, i); + + /* set VCN status busy */ + tmp = RREG32_SOC15(VCN, i, regUVD_STATUS) | UVD_STATUS__UVD_BUSY; + WREG32_SOC15(VCN, i, regUVD_STATUS, tmp); + + /* enable VCPU clock */ + WREG32_P(SOC15_REG_OFFSET(VCN, i, regUVD_VCPU_CNTL), + UVD_VCPU_CNTL__CLK_EN_MASK, ~UVD_VCPU_CNTL__CLK_EN_MASK); + + /* disable master interrupt */ + WREG32_P(SOC15_REG_OFFSET(VCN, i, regUVD_MASTINT_EN), 0, + ~UVD_MASTINT_EN__VCPU_EN_MASK); + + /* enable LMI MC and UMC channels */ + WREG32_P(SOC15_REG_OFFSET(VCN, i, regUVD_LMI_CTRL2), 0, + ~UVD_LMI_CTRL2__STALL_ARB_UMC_MASK); + + tmp = RREG32_SOC15(VCN, i, regUVD_SOFT_RESET); + tmp &= ~UVD_SOFT_RESET__LMI_SOFT_RESET_MASK; + tmp &= ~UVD_SOFT_RESET__LMI_UMC_SOFT_RESET_MASK; + WREG32_SOC15(VCN, i, regUVD_SOFT_RESET, tmp); + + /* setup regUVD_LMI_CTRL */ + tmp = RREG32_SOC15(VCN, i, regUVD_LMI_CTRL); + WREG32_SOC15(VCN, i, regUVD_LMI_CTRL, tmp | + UVD_LMI_CTRL__WRITE_CLEAN_TIMER_EN_MASK | + UVD_LMI_CTRL__MASK_MC_URGENT_MASK | + UVD_LMI_CTRL__DATA_COHERENCY_EN_MASK | + UVD_LMI_CTRL__VCPU_DATA_COHERENCY_EN_MASK); + + vcn_v5_0_0_mc_resume(adev, i); + + /* VCN global tiling registers */ + WREG32_SOC15(VCN, i, regUVD_GFX10_ADDR_CONFIG, + adev->gfx.config.gb_addr_config); + + /* unblock VCPU register access */ + WREG32_P(SOC15_REG_OFFSET(VCN, i, regUVD_RB_ARB_CTRL), 0, + ~UVD_RB_ARB_CTRL__VCPU_DIS_MASK); + + /* release VCPU reset to boot */ + WREG32_P(SOC15_REG_OFFSET(VCN, i, regUVD_VCPU_CNTL), 0, + ~UVD_VCPU_CNTL__BLK_RST_MASK); + + for (j = 0; j < 10; ++j) { + uint32_t status; + + for (k = 0; k < 100; ++k) { + status = RREG32_SOC15(VCN, i, regUVD_STATUS); + if (status & 2) + break; + mdelay(10); + if (amdgpu_emu_mode == 1) + msleep(1); + } + + if (amdgpu_emu_mode == 1) { + r = -1; + if (status & 2) { + r = 0; + break; + } + } else { + r = 0; + if (status & 2) + break; + + dev_err(adev->dev, + "VCN[%d] is not responding, trying to reset the VCPU!!!\n", i); + WREG32_P(SOC15_REG_OFFSET(VCN, i, regUVD_VCPU_CNTL), + UVD_VCPU_CNTL__BLK_RST_MASK, + ~UVD_VCPU_CNTL__BLK_RST_MASK); + mdelay(10); + WREG32_P(SOC15_REG_OFFSET(VCN, i, regUVD_VCPU_CNTL), 0, + ~UVD_VCPU_CNTL__BLK_RST_MASK); + + mdelay(10); + r = -1; + } + } + + if (r) { + dev_err(adev->dev, "VCN[%d] is not responding, giving up!!!\n", i); + return r; + } + + /* enable master interrupt */ + WREG32_P(SOC15_REG_OFFSET(VCN, i, regUVD_MASTINT_EN), + UVD_MASTINT_EN__VCPU_EN_MASK, + ~UVD_MASTINT_EN__VCPU_EN_MASK); + + /* clear the busy bit of VCN_STATUS */ + WREG32_P(SOC15_REG_OFFSET(VCN, i, regUVD_STATUS), 0, + ~(2 << UVD_STATUS__VCPU_REPORT__SHIFT)); + + ring = &adev->vcn.inst[i].ring_enc[0]; + WREG32_SOC15(VCN, i, regVCN_RB1_DB_CTRL, + ring->doorbell_index << VCN_RB1_DB_CTRL__OFFSET__SHIFT | + VCN_RB1_DB_CTRL__EN_MASK); + + WREG32_SOC15(VCN, i, regUVD_RB_BASE_LO, ring->gpu_addr); + WREG32_SOC15(VCN, i, regUVD_RB_BASE_HI, upper_32_bits(ring->gpu_addr)); + WREG32_SOC15(VCN, i, regUVD_RB_SIZE, ring->ring_size / 4); + + tmp = RREG32_SOC15(VCN, i, regVCN_RB_ENABLE); + tmp &= ~(VCN_RB_ENABLE__RB1_EN_MASK); + WREG32_SOC15(VCN, i, regVCN_RB_ENABLE, tmp); + fw_shared->sq.queue_mode |= FW_QUEUE_RING_RESET; + WREG32_SOC15(VCN, i, regUVD_RB_RPTR, 0); + WREG32_SOC15(VCN, i, regUVD_RB_WPTR, 0); + + tmp = RREG32_SOC15(VCN, i, regUVD_RB_RPTR); + WREG32_SOC15(VCN, i, regUVD_RB_WPTR, tmp); + ring->wptr = RREG32_SOC15(VCN, i, regUVD_RB_WPTR); + + tmp = RREG32_SOC15(VCN, i, regVCN_RB_ENABLE); + tmp |= VCN_RB_ENABLE__RB1_EN_MASK; + WREG32_SOC15(VCN, i, regVCN_RB_ENABLE, tmp); + fw_shared->sq.queue_mode &= ~(FW_QUEUE_RING_RESET | FW_QUEUE_DPG_HOLD_OFF); + } + + return 0; +} + +/** + * vcn_v5_0_0_stop_dpg_mode - VCN stop with dpg mode + * + * @adev: amdgpu_device pointer + * @inst_idx: instance number index + * + * Stop VCN block with dpg mode + */ +static void vcn_v5_0_0_stop_dpg_mode(struct amdgpu_device *adev, int inst_idx) +{ + struct dpg_pause_state state = {.fw_based = VCN_DPG_STATE__UNPAUSE}; + uint32_t tmp; + + vcn_v5_0_0_pause_dpg_mode(adev, inst_idx, &state); + + /* Wait for power status to be 1 */ + SOC15_WAIT_ON_RREG(VCN, inst_idx, regUVD_POWER_STATUS, 1, + UVD_POWER_STATUS__UVD_POWER_STATUS_MASK); + + /* wait for read ptr to be equal to write ptr */ + tmp = RREG32_SOC15(VCN, inst_idx, regUVD_RB_WPTR); + SOC15_WAIT_ON_RREG(VCN, inst_idx, regUVD_RB_RPTR, tmp, 0xFFFFFFFF); + + /* disable dynamic power gating mode */ + WREG32_P(SOC15_REG_OFFSET(VCN, inst_idx, regUVD_POWER_STATUS), 0, + ~UVD_POWER_STATUS__UVD_PG_MODE_MASK); + + return; +} + +/** + * vcn_v5_0_0_stop - VCN stop + * + * @adev: amdgpu_device pointer + * + * Stop VCN block + */ +static int vcn_v5_0_0_stop(struct amdgpu_device *adev) +{ + volatile struct amdgpu_vcn5_fw_shared *fw_shared; + uint32_t tmp; + int i, r = 0; + + for (i = 0; i < adev->vcn.num_vcn_inst; ++i) { + fw_shared = adev->vcn.inst[i].fw_shared.cpu_addr; + fw_shared->sq.queue_mode |= FW_QUEUE_DPG_HOLD_OFF; + + if (adev->pg_flags & AMD_PG_SUPPORT_VCN_DPG) { + vcn_v5_0_0_stop_dpg_mode(adev, i); + continue; + } + + /* wait for vcn idle */ + r = SOC15_WAIT_ON_RREG(VCN, i, regUVD_STATUS, UVD_STATUS__IDLE, 0x7); + if (r) + return r; + + tmp = UVD_LMI_STATUS__VCPU_LMI_WRITE_CLEAN_MASK | + UVD_LMI_STATUS__READ_CLEAN_MASK | + UVD_LMI_STATUS__WRITE_CLEAN_MASK | + UVD_LMI_STATUS__WRITE_CLEAN_RAW_MASK; + r = SOC15_WAIT_ON_RREG(VCN, i, regUVD_LMI_STATUS, tmp, tmp); + if (r) + return r; + + /* disable LMI UMC channel */ + tmp = RREG32_SOC15(VCN, i, regUVD_LMI_CTRL2); + tmp |= UVD_LMI_CTRL2__STALL_ARB_UMC_MASK; + WREG32_SOC15(VCN, i, regUVD_LMI_CTRL2, tmp); + tmp = UVD_LMI_STATUS__UMC_READ_CLEAN_RAW_MASK | + UVD_LMI_STATUS__UMC_WRITE_CLEAN_RAW_MASK; + r = SOC15_WAIT_ON_RREG(VCN, i, regUVD_LMI_STATUS, tmp, tmp); + if (r) + return r; + + /* block VCPU register access */ + WREG32_P(SOC15_REG_OFFSET(VCN, i, regUVD_RB_ARB_CTRL), + UVD_RB_ARB_CTRL__VCPU_DIS_MASK, + ~UVD_RB_ARB_CTRL__VCPU_DIS_MASK); + + /* reset VCPU */ + WREG32_P(SOC15_REG_OFFSET(VCN, i, regUVD_VCPU_CNTL), + UVD_VCPU_CNTL__BLK_RST_MASK, + ~UVD_VCPU_CNTL__BLK_RST_MASK); + + /* disable VCPU clock */ + WREG32_P(SOC15_REG_OFFSET(VCN, i, regUVD_VCPU_CNTL), 0, + ~(UVD_VCPU_CNTL__CLK_EN_MASK)); + + /* apply soft reset */ + tmp = RREG32_SOC15(VCN, i, regUVD_SOFT_RESET); + tmp |= UVD_SOFT_RESET__LMI_UMC_SOFT_RESET_MASK; + WREG32_SOC15(VCN, i, regUVD_SOFT_RESET, tmp); + tmp = RREG32_SOC15(VCN, i, regUVD_SOFT_RESET); + tmp |= UVD_SOFT_RESET__LMI_SOFT_RESET_MASK; + WREG32_SOC15(VCN, i, regUVD_SOFT_RESET, tmp); + + /* clear status */ + WREG32_SOC15(VCN, i, regUVD_STATUS, 0); + + /* enable VCN power gating */ + vcn_v5_0_0_enable_static_power_gating(adev, i); + } + + if (adev->pm.dpm_enabled) + amdgpu_dpm_enable_uvd(adev, false); + + return 0; +} + +/** + * vcn_v5_0_0_pause_dpg_mode - VCN pause with dpg mode + * + * @adev: amdgpu_device pointer + * @inst_idx: instance number index + * @new_state: pause state + * + * Pause dpg mode for VCN block + */ +static int vcn_v5_0_0_pause_dpg_mode(struct amdgpu_device *adev, int inst_idx, + struct dpg_pause_state *new_state) +{ + uint32_t reg_data = 0; + int ret_code; + + /* pause/unpause if state is changed */ + if (adev->vcn.inst[inst_idx].pause_state.fw_based != new_state->fw_based) { + DRM_DEV_DEBUG(adev->dev, "dpg pause state changed %d -> %d", + adev->vcn.inst[inst_idx].pause_state.fw_based, new_state->fw_based); + reg_data = RREG32_SOC15(VCN, inst_idx, regUVD_DPG_PAUSE) & + (~UVD_DPG_PAUSE__NJ_PAUSE_DPG_ACK_MASK); + + if (new_state->fw_based == VCN_DPG_STATE__PAUSE) { + ret_code = SOC15_WAIT_ON_RREG(VCN, inst_idx, regUVD_POWER_STATUS, 0x1, + UVD_POWER_STATUS__UVD_POWER_STATUS_MASK); + + if (!ret_code) { + /* pause DPG */ + reg_data |= UVD_DPG_PAUSE__NJ_PAUSE_DPG_REQ_MASK; + WREG32_SOC15(VCN, inst_idx, regUVD_DPG_PAUSE, reg_data); + + /* wait for ACK */ + SOC15_WAIT_ON_RREG(VCN, inst_idx, regUVD_DPG_PAUSE, + UVD_DPG_PAUSE__NJ_PAUSE_DPG_ACK_MASK, + UVD_DPG_PAUSE__NJ_PAUSE_DPG_ACK_MASK); + } + } else { + /* unpause dpg, no need to wait */ + reg_data &= ~UVD_DPG_PAUSE__NJ_PAUSE_DPG_REQ_MASK; + WREG32_SOC15(VCN, inst_idx, regUVD_DPG_PAUSE, reg_data); + } + adev->vcn.inst[inst_idx].pause_state.fw_based = new_state->fw_based; + } + + return 0; +} + +/** + * vcn_v5_0_0_unified_ring_get_rptr - get unified read pointer + * + * @ring: amdgpu_ring pointer + * + * Returns the current hardware unified read pointer + */ +static uint64_t vcn_v5_0_0_unified_ring_get_rptr(struct amdgpu_ring *ring) +{ + struct amdgpu_device *adev = ring->adev; + + if (ring != &adev->vcn.inst[ring->me].ring_enc[0]) + DRM_ERROR("wrong ring id is identified in %s", __func__); + + return RREG32_SOC15(VCN, ring->me, regUVD_RB_RPTR); +} + +/** + * vcn_v5_0_0_unified_ring_get_wptr - get unified write pointer + * + * @ring: amdgpu_ring pointer + * + * Returns the current hardware unified write pointer + */ +static uint64_t vcn_v5_0_0_unified_ring_get_wptr(struct amdgpu_ring *ring) +{ + struct amdgpu_device *adev = ring->adev; + + if (ring != &adev->vcn.inst[ring->me].ring_enc[0]) + DRM_ERROR("wrong ring id is identified in %s", __func__); + + if (ring->use_doorbell) + return *ring->wptr_cpu_addr; + else + return RREG32_SOC15(VCN, ring->me, regUVD_RB_WPTR); +} + +/** + * vcn_v5_0_0_unified_ring_set_wptr - set enc write pointer + * + * @ring: amdgpu_ring pointer + * + * Commits the enc write pointer to the hardware + */ +static void vcn_v5_0_0_unified_ring_set_wptr(struct amdgpu_ring *ring) +{ + struct amdgpu_device *adev = ring->adev; + + if (ring != &adev->vcn.inst[ring->me].ring_enc[0]) + DRM_ERROR("wrong ring id is identified in %s", __func__); + + if (ring->use_doorbell) { + *ring->wptr_cpu_addr = lower_32_bits(ring->wptr); + WDOORBELL32(ring->doorbell_index, lower_32_bits(ring->wptr)); + } else { + WREG32_SOC15(VCN, ring->me, regUVD_RB_WPTR, lower_32_bits(ring->wptr)); + } +} + +static const struct amdgpu_ring_funcs vcn_v5_0_0_unified_ring_vm_funcs = { + .type = AMDGPU_RING_TYPE_VCN_ENC, + .align_mask = 0x3f, + .nop = VCN_ENC_CMD_NO_OP, + .get_rptr = vcn_v5_0_0_unified_ring_get_rptr, + .get_wptr = vcn_v5_0_0_unified_ring_get_wptr, + .set_wptr = vcn_v5_0_0_unified_ring_set_wptr, + .emit_frame_size = + SOC15_FLUSH_GPU_TLB_NUM_WREG * 3 + + SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 4 + + 4 + /* vcn_v2_0_enc_ring_emit_vm_flush */ + 5 + 5 + /* vcn_v2_0_enc_ring_emit_fence x2 vm fence */ + 1, /* vcn_v2_0_enc_ring_insert_end */ + .emit_ib_size = 5, /* vcn_v2_0_enc_ring_emit_ib */ + .emit_ib = vcn_v2_0_enc_ring_emit_ib, + .emit_fence = vcn_v2_0_enc_ring_emit_fence, + .emit_vm_flush = vcn_v2_0_enc_ring_emit_vm_flush, + .test_ring = amdgpu_vcn_enc_ring_test_ring, + .test_ib = amdgpu_vcn_unified_ring_test_ib, + .insert_nop = amdgpu_ring_insert_nop, + .insert_end = vcn_v2_0_enc_ring_insert_end, + .pad_ib = amdgpu_ring_generic_pad_ib, + .begin_use = amdgpu_vcn_ring_begin_use, + .end_use = amdgpu_vcn_ring_end_use, + .emit_wreg = vcn_v2_0_enc_ring_emit_wreg, + .emit_reg_wait = vcn_v2_0_enc_ring_emit_reg_wait, + .emit_reg_write_reg_wait = amdgpu_ring_emit_reg_write_reg_wait_helper, +}; + +/** + * vcn_v5_0_0_set_unified_ring_funcs - set unified ring functions + * + * @adev: amdgpu_device pointer + * + * Set unified ring functions + */ +static void vcn_v5_0_0_set_unified_ring_funcs(struct amdgpu_device *adev) +{ + int i; + + for (i = 0; i < adev->vcn.num_vcn_inst; ++i) { + if (adev->vcn.harvest_config & (1 << i)) + continue; + + adev->vcn.inst[i].ring_enc[0].funcs = &vcn_v5_0_0_unified_ring_vm_funcs; + adev->vcn.inst[i].ring_enc[0].me = i; + + DRM_INFO("VCN(%d) encode/decode are enabled in VM mode\n", i); + } +} + +/** + * vcn_v5_0_0_is_idle - check VCN block is idle + * + * @handle: amdgpu_device pointer + * + * Check whether VCN block is idle + */ +static bool vcn_v5_0_0_is_idle(void *handle) +{ + struct amdgpu_device *adev = (struct amdgpu_device *)handle; + int i, ret = 1; + + for (i = 0; i < adev->vcn.num_vcn_inst; ++i) { + if (adev->vcn.harvest_config & (1 << i)) + continue; + + ret &= (RREG32_SOC15(VCN, i, regUVD_STATUS) == UVD_STATUS__IDLE); + } + + return ret; +} + +/** + * vcn_v5_0_0_wait_for_idle - wait for VCN block idle + * + * @handle: amdgpu_device pointer + * + * Wait for VCN block idle + */ +static int vcn_v5_0_0_wait_for_idle(void *handle) +{ + struct amdgpu_device *adev = (struct amdgpu_device *)handle; + int i, ret = 0; + + for (i = 0; i < adev->vcn.num_vcn_inst; ++i) { + if (adev->vcn.harvest_config & (1 << i)) + continue; + + ret = SOC15_WAIT_ON_RREG(VCN, i, regUVD_STATUS, UVD_STATUS__IDLE, + UVD_STATUS__IDLE); + if (ret) + return ret; + } + + return ret; +} + +/** + * vcn_v5_0_0_set_clockgating_state - set VCN block clockgating state + * + * @handle: amdgpu_device pointer + * @state: clock gating state + * + * Set VCN block clockgating state + */ +static int vcn_v5_0_0_set_clockgating_state(void *handle, enum amd_clockgating_state state) +{ + struct amdgpu_device *adev = (struct amdgpu_device *)handle; + bool enable = (state == AMD_CG_STATE_GATE) ? true : false; + int i; + + for (i = 0; i < adev->vcn.num_vcn_inst; ++i) { + if (adev->vcn.harvest_config & (1 << i)) + continue; + + if (enable) { + if (RREG32_SOC15(VCN, i, regUVD_STATUS) != UVD_STATUS__IDLE) + return -EBUSY; + vcn_v5_0_0_enable_clock_gating(adev, i); + } else { + vcn_v5_0_0_disable_clock_gating(adev, i); + } + } + + return 0; +} + +/** + * vcn_v5_0_0_set_powergating_state - set VCN block powergating state + * + * @handle: amdgpu_device pointer + * @state: power gating state + * + * Set VCN block powergating state + */ +static int vcn_v5_0_0_set_powergating_state(void *handle, enum amd_powergating_state state) +{ + struct amdgpu_device *adev = (struct amdgpu_device *)handle; + int ret; + + if (state == adev->vcn.cur_state) + return 0; + + if (state == AMD_PG_STATE_GATE) + ret = vcn_v5_0_0_stop(adev); + else + ret = vcn_v5_0_0_start(adev); + + if (!ret) + adev->vcn.cur_state = state; + + return ret; +} + +/** + * vcn_v5_0_0_set_interrupt_state - set VCN block interrupt state + * + * @adev: amdgpu_device pointer + * @source: interrupt sources + * @type: interrupt types + * @state: interrupt states + * + * Set VCN block interrupt state + */ +static int vcn_v5_0_0_set_interrupt_state(struct amdgpu_device *adev, struct amdgpu_irq_src *source, + unsigned type, enum amdgpu_interrupt_state state) +{ + return 0; +} + +/** + * vcn_v5_0_0_process_interrupt - process VCN block interrupt + * + * @adev: amdgpu_device pointer + * @source: interrupt sources + * @entry: interrupt entry from clients and sources + * + * Process VCN block interrupt + */ +static int vcn_v5_0_0_process_interrupt(struct amdgpu_device *adev, struct amdgpu_irq_src *source, + struct amdgpu_iv_entry *entry) +{ + uint32_t ip_instance; + + switch (entry->client_id) { + case SOC15_IH_CLIENTID_VCN: + ip_instance = 0; + break; + case SOC15_IH_CLIENTID_VCN1: + ip_instance = 1; + break; + default: + DRM_ERROR("Unhandled client id: %d\n", entry->client_id); + return 0; + } + + DRM_DEBUG("IH: VCN TRAP\n"); + + switch (entry->src_id) { + case VCN_4_0__SRCID__UVD_ENC_GENERAL_PURPOSE: + amdgpu_fence_process(&adev->vcn.inst[ip_instance].ring_enc[0]); + break; + case VCN_4_0__SRCID_UVD_POISON: + amdgpu_vcn_process_poison_irq(adev, source, entry); + break; + default: + DRM_ERROR("Unhandled interrupt: %d %d\n", + entry->src_id, entry->src_data[0]); + break; + } + + return 0; +} + +static const struct amdgpu_irq_src_funcs vcn_v5_0_0_irq_funcs = { + .set = vcn_v5_0_0_set_interrupt_state, + .process = vcn_v5_0_0_process_interrupt, +}; + +/** + * vcn_v5_0_0_set_irq_funcs - set VCN block interrupt irq functions + * + * @adev: amdgpu_device pointer + * + * Set VCN block interrupt irq functions + */ +static void vcn_v5_0_0_set_irq_funcs(struct amdgpu_device *adev) +{ + int i; + + for (i = 0; i < adev->vcn.num_vcn_inst; ++i) { + if (adev->vcn.harvest_config & (1 << i)) + continue; + + adev->vcn.inst[i].irq.num_types = adev->vcn.num_enc_rings + 1; + adev->vcn.inst[i].irq.funcs = &vcn_v5_0_0_irq_funcs; + } +} + +static const struct amd_ip_funcs vcn_v5_0_0_ip_funcs = { + .name = "vcn_v5_0_0", + .early_init = vcn_v5_0_0_early_init, + .late_init = NULL, + .sw_init = vcn_v5_0_0_sw_init, + .sw_fini = vcn_v5_0_0_sw_fini, + .hw_init = vcn_v5_0_0_hw_init, + .hw_fini = vcn_v5_0_0_hw_fini, + .suspend = vcn_v5_0_0_suspend, + .resume = vcn_v5_0_0_resume, + .is_idle = vcn_v5_0_0_is_idle, + .wait_for_idle = vcn_v5_0_0_wait_for_idle, + .check_soft_reset = NULL, + .pre_soft_reset = NULL, + .soft_reset = NULL, + .post_soft_reset = NULL, + .set_clockgating_state = vcn_v5_0_0_set_clockgating_state, + .set_powergating_state = vcn_v5_0_0_set_powergating_state, + .dump_ip_state = NULL, + .print_ip_state = NULL, +}; + +const struct amdgpu_ip_block_version vcn_v5_0_0_ip_block = { + .type = AMD_IP_BLOCK_TYPE_VCN, + .major = 5, + .minor = 0, + .rev = 0, + .funcs = &vcn_v5_0_0_ip_funcs, +}; diff --git a/drivers/gpu/drm/amd/amdgpu/vcn_v5_0_0.h b/drivers/gpu/drm/amd/amdgpu/vcn_v5_0_0.h new file mode 100644 index 000000000000..51bbccd4360f --- /dev/null +++ b/drivers/gpu/drm/amd/amdgpu/vcn_v5_0_0.h @@ -0,0 +1,37 @@ +/* + * Copyright 2023 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + */ + +#ifndef __VCN_V5_0_0_H__ +#define __VCN_V5_0_0_H__ + +#define VCN_VID_SOC_ADDRESS 0x1FC00 +#define VCN_AON_SOC_ADDRESS 0x1F800 +#define VCN1_VID_SOC_ADDRESS 0x48300 +#define VCN1_AON_SOC_ADDRESS 0x48000 + +#define VCN_VID_IP_ADDRESS 0x0 +#define VCN_AON_IP_ADDRESS 0x30000 + +extern const struct amdgpu_ip_block_version vcn_v5_0_0_ip_block; + +#endif /* __VCN_V5_0_0_H__ */ diff --git a/drivers/gpu/drm/amd/amdgpu/vega20_ih.c b/drivers/gpu/drm/amd/amdgpu/vega20_ih.c index db66e6cccaf2..b9e785846637 100644 --- a/drivers/gpu/drm/amd/amdgpu/vega20_ih.c +++ b/drivers/gpu/drm/amd/amdgpu/vega20_ih.c @@ -291,27 +291,29 @@ static int vega20_ih_irq_init(struct amdgpu_device *adev) adev->nbio.funcs->ih_control(adev); - if ((amdgpu_ip_version(adev, OSSSYS_HWIP, 0) == IP_VERSION(4, 2, 1)) && - adev->firmware.load_type == AMDGPU_FW_LOAD_DIRECT) { - ih_chicken = RREG32_SOC15(OSSSYS, 0, mmIH_CHICKEN); - if (adev->irq.ih.use_bus_addr) { - ih_chicken = REG_SET_FIELD(ih_chicken, IH_CHICKEN, - MC_SPACE_GPA_ENABLE, 1); + if (!amdgpu_sriov_vf(adev)) { + if ((amdgpu_ip_version(adev, OSSSYS_HWIP, 0) == IP_VERSION(4, 2, 1)) && + adev->firmware.load_type == AMDGPU_FW_LOAD_DIRECT) { + ih_chicken = RREG32_SOC15(OSSSYS, 0, mmIH_CHICKEN); + if (adev->irq.ih.use_bus_addr) { + ih_chicken = REG_SET_FIELD(ih_chicken, IH_CHICKEN, + MC_SPACE_GPA_ENABLE, 1); + } + WREG32_SOC15(OSSSYS, 0, mmIH_CHICKEN, ih_chicken); } - WREG32_SOC15(OSSSYS, 0, mmIH_CHICKEN, ih_chicken); - } - /* psp firmware won't program IH_CHICKEN for aldebaran - * driver needs to program it properly according to - * MC_SPACE type in IH_RB_CNTL */ - if ((amdgpu_ip_version(adev, OSSSYS_HWIP, 0) == IP_VERSION(4, 4, 0)) || - (amdgpu_ip_version(adev, OSSSYS_HWIP, 0) == IP_VERSION(4, 4, 2))) { - ih_chicken = RREG32_SOC15(OSSSYS, 0, mmIH_CHICKEN_ALDEBARAN); - if (adev->irq.ih.use_bus_addr) { - ih_chicken = REG_SET_FIELD(ih_chicken, IH_CHICKEN, - MC_SPACE_GPA_ENABLE, 1); + /* psp firmware won't program IH_CHICKEN for aldebaran + * driver needs to program it properly according to + * MC_SPACE type in IH_RB_CNTL */ + if ((amdgpu_ip_version(adev, OSSSYS_HWIP, 0) == IP_VERSION(4, 4, 0)) || + (amdgpu_ip_version(adev, OSSSYS_HWIP, 0) == IP_VERSION(4, 4, 2))) { + ih_chicken = RREG32_SOC15(OSSSYS, 0, mmIH_CHICKEN_ALDEBARAN); + if (adev->irq.ih.use_bus_addr) { + ih_chicken = REG_SET_FIELD(ih_chicken, IH_CHICKEN, + MC_SPACE_GPA_ENABLE, 1); + } + WREG32_SOC15(OSSSYS, 0, mmIH_CHICKEN_ALDEBARAN, ih_chicken); } - WREG32_SOC15(OSSSYS, 0, mmIH_CHICKEN_ALDEBARAN, ih_chicken); } for (i = 0; i < ARRAY_SIZE(ih); i++) { diff --git a/drivers/gpu/drm/amd/amdgpu/vi.c b/drivers/gpu/drm/amd/amdgpu/vi.c index 1a98812981f4..d39c670f6220 100644 --- a/drivers/gpu/drm/amd/amdgpu/vi.c +++ b/drivers/gpu/drm/amd/amdgpu/vi.c @@ -897,7 +897,7 @@ static int vi_asic_pci_config_reset(struct amdgpu_device *adev) return r; } -static bool vi_asic_supports_baco(struct amdgpu_device *adev) +static int vi_asic_supports_baco(struct amdgpu_device *adev) { switch (adev->asic_type) { case CHIP_FIJI: @@ -908,14 +908,14 @@ static bool vi_asic_supports_baco(struct amdgpu_device *adev) case CHIP_TOPAZ: return amdgpu_dpm_is_baco_supported(adev); default: - return false; + return 0; } } static enum amd_reset_method vi_asic_reset_method(struct amdgpu_device *adev) { - bool baco_reset; + int baco_reset; if (amdgpu_reset_method == AMD_RESET_METHOD_LEGACY || amdgpu_reset_method == AMD_RESET_METHOD_BACO) @@ -935,7 +935,7 @@ vi_asic_reset_method(struct amdgpu_device *adev) baco_reset = amdgpu_dpm_is_baco_supported(adev); break; default: - baco_reset = false; + baco_reset = 0; break; } @@ -2058,6 +2058,8 @@ static const struct amd_ip_funcs vi_common_ip_funcs = { .set_clockgating_state = vi_common_set_clockgating_state, .set_powergating_state = vi_common_set_powergating_state, .get_clockgating_state = vi_common_get_clockgating_state, + .dump_ip_state = NULL, + .print_ip_state = NULL, }; static const struct amdgpu_ip_block_version vi_common_ip_block = diff --git a/drivers/gpu/drm/amd/amdgpu/vpe_6_1_fw_if.h b/drivers/gpu/drm/amd/amdgpu/vpe_6_1_fw_if.h index 9b550deb48d3..47534dbbd137 100644 --- a/drivers/gpu/drm/amd/amdgpu/vpe_6_1_fw_if.h +++ b/drivers/gpu/drm/amd/amdgpu/vpe_6_1_fw_if.h @@ -40,7 +40,8 @@ enum VPE_CMD_OPCODE { VPE_CMD_OPCODE_POLL_REGMEM = 0x8, VPE_CMD_OPCODE_COND_EXE = 0x9, VPE_CMD_OPCODE_ATOMIC = 0xA, - VPE_CMD_OPCODE_PLANE_FILL = 0xB, + VPE_CMD_OPCODE_PRED_EXE = 0xB, + VPE_CMD_OPCODE_COLLAB_SYNC = 0xC, VPE_CMD_OPCODE_TIMESTAMP = 0xD }; diff --git a/drivers/gpu/drm/amd/amdgpu/vpe_v6_1.c b/drivers/gpu/drm/amd/amdgpu/vpe_v6_1.c index d20060a51e05..09315dd5a1ec 100644 --- a/drivers/gpu/drm/amd/amdgpu/vpe_v6_1.c +++ b/drivers/gpu/drm/amd/amdgpu/vpe_v6_1.c @@ -33,14 +33,38 @@ #include "vpe/vpe_6_1_0_sh_mask.h" MODULE_FIRMWARE("amdgpu/vpe_6_1_0.bin"); +MODULE_FIRMWARE("amdgpu/vpe_6_1_1.bin"); #define VPE_THREAD1_UCODE_OFFSET 0x8000 +#define regVPEC_COLLABORATE_CNTL 0x0013 +#define regVPEC_COLLABORATE_CNTL_BASE_IDX 0 +#define VPEC_COLLABORATE_CNTL__COLLABORATE_MODE_EN__SHIFT 0x0 +#define VPEC_COLLABORATE_CNTL__COLLABORATE_MODE_EN_MASK 0x00000001L + +#define regVPEC_COLLABORATE_CFG 0x0014 +#define regVPEC_COLLABORATE_CFG_BASE_IDX 0 +#define VPEC_COLLABORATE_CFG__MASTER_ID__SHIFT 0x0 +#define VPEC_COLLABORATE_CFG__MASTER_EN__SHIFT 0x3 +#define VPEC_COLLABORATE_CFG__SLAVE0_ID__SHIFT 0x4 +#define VPEC_COLLABORATE_CFG__SLAVE0_EN__SHIFT 0x7 +#define VPEC_COLLABORATE_CFG__MASTER_ID_MASK 0x00000007L +#define VPEC_COLLABORATE_CFG__MASTER_EN_MASK 0x00000008L +#define VPEC_COLLABORATE_CFG__SLAVE0_ID_MASK 0x00000070L +#define VPEC_COLLABORATE_CFG__SLAVE0_EN_MASK 0x00000080L + +#define regVPEC_CNTL_6_1_1 0x0016 +#define regVPEC_CNTL_6_1_1_BASE_IDX 0 +#define regVPEC_QUEUE_RESET_REQ_6_1_1 0x002c +#define regVPEC_QUEUE_RESET_REQ_6_1_1_BASE_IDX 0 +#define regVPEC_PUB_DUMMY2_6_1_1 0x004c +#define regVPEC_PUB_DUMMY2_6_1_1_BASE_IDX 0 + static uint32_t vpe_v6_1_get_reg_offset(struct amdgpu_vpe *vpe, uint32_t inst, uint32_t offset) { uint32_t base; - base = vpe->ring.adev->reg_offset[VPE_HWIP][0][0]; + base = vpe->ring.adev->reg_offset[VPE_HWIP][inst][0]; return base + offset; } @@ -48,12 +72,14 @@ static uint32_t vpe_v6_1_get_reg_offset(struct amdgpu_vpe *vpe, uint32_t inst, u static void vpe_v6_1_halt(struct amdgpu_vpe *vpe, bool halt) { struct amdgpu_device *adev = vpe->ring.adev; - uint32_t f32_cntl; + uint32_t i, f32_cntl; - f32_cntl = RREG32(vpe_get_reg_offset(vpe, 0, regVPEC_F32_CNTL)); - f32_cntl = REG_SET_FIELD(f32_cntl, VPEC_F32_CNTL, HALT, halt ? 1 : 0); - f32_cntl = REG_SET_FIELD(f32_cntl, VPEC_F32_CNTL, TH1_RESET, halt ? 1 : 0); - WREG32(vpe_get_reg_offset(vpe, 0, regVPEC_F32_CNTL), f32_cntl); + for (i = 0; i < vpe->num_instances; i++) { + f32_cntl = RREG32(vpe_get_reg_offset(vpe, i, regVPEC_F32_CNTL)); + f32_cntl = REG_SET_FIELD(f32_cntl, VPEC_F32_CNTL, HALT, halt ? 1 : 0); + f32_cntl = REG_SET_FIELD(f32_cntl, VPEC_F32_CNTL, TH1_RESET, halt ? 1 : 0); + WREG32(vpe_get_reg_offset(vpe, i, regVPEC_F32_CNTL), f32_cntl); + } } static int vpe_v6_1_irq_init(struct amdgpu_vpe *vpe) @@ -70,20 +96,64 @@ static int vpe_v6_1_irq_init(struct amdgpu_vpe *vpe) return 0; } +static void vpe_v6_1_set_collaborate_mode(struct amdgpu_vpe *vpe, bool enable) +{ + struct amdgpu_device *adev = vpe->ring.adev; + uint32_t vpe_colla_cntl, vpe_colla_cfg, i; + + if (!vpe->collaborate_mode) + return; + + for (i = 0; i < vpe->num_instances; i++) { + vpe_colla_cntl = RREG32(vpe_get_reg_offset(vpe, i, regVPEC_COLLABORATE_CNTL)); + vpe_colla_cntl = REG_SET_FIELD(vpe_colla_cntl, VPEC_COLLABORATE_CNTL, + COLLABORATE_MODE_EN, enable ? 1 : 0); + WREG32(vpe_get_reg_offset(vpe, i, regVPEC_COLLABORATE_CNTL), vpe_colla_cntl); + + vpe_colla_cfg = RREG32(vpe_get_reg_offset(vpe, i, regVPEC_COLLABORATE_CFG)); + vpe_colla_cfg = REG_SET_FIELD(vpe_colla_cfg, VPEC_COLLABORATE_CFG, MASTER_ID, 0); + vpe_colla_cfg = REG_SET_FIELD(vpe_colla_cfg, VPEC_COLLABORATE_CFG, MASTER_EN, enable ? 1 : 0); + vpe_colla_cfg = REG_SET_FIELD(vpe_colla_cfg, VPEC_COLLABORATE_CFG, SLAVE0_ID, 1); + vpe_colla_cfg = REG_SET_FIELD(vpe_colla_cfg, VPEC_COLLABORATE_CFG, SLAVE0_EN, enable ? 1 : 0); + WREG32(vpe_get_reg_offset(vpe, i, regVPEC_COLLABORATE_CFG), vpe_colla_cfg); + } +} + static int vpe_v6_1_load_microcode(struct amdgpu_vpe *vpe) { struct amdgpu_device *adev = vpe->ring.adev; const struct vpe_firmware_header_v1_0 *vpe_hdr; const __le32 *data; uint32_t ucode_offset[2], ucode_size[2]; - uint32_t i, size_dw; + uint32_t i, j, size_dw; uint32_t ret; - // disable UMSCH_INT_ENABLE - ret = RREG32(vpe_get_reg_offset(vpe, 0, regVPEC_CNTL)); - ret = REG_SET_FIELD(ret, VPEC_CNTL, UMSCH_INT_ENABLE, 0); - WREG32(vpe_get_reg_offset(vpe, 0, regVPEC_CNTL), ret); + /* disable UMSCH_INT_ENABLE */ + for (j = 0; j < vpe->num_instances; j++) { + + if (amdgpu_ip_version(adev, VPE_HWIP, 0) == IP_VERSION(6, 1, 1)) + ret = RREG32(vpe_get_reg_offset(vpe, j, regVPEC_CNTL_6_1_1)); + else + ret = RREG32(vpe_get_reg_offset(vpe, j, regVPEC_CNTL)); + + ret = REG_SET_FIELD(ret, VPEC_CNTL, UMSCH_INT_ENABLE, 0); + + if (amdgpu_ip_version(adev, VPE_HWIP, 0) == IP_VERSION(6, 1, 1)) + WREG32(vpe_get_reg_offset(vpe, j, regVPEC_CNTL_6_1_1), ret); + else + WREG32(vpe_get_reg_offset(vpe, j, regVPEC_CNTL), ret); + } + + /* setup collaborate mode */ + vpe_v6_1_set_collaborate_mode(vpe, true); + /* setup DPM */ + if (amdgpu_vpe_configure_dpm(vpe)) + dev_warn(adev->dev, "VPE failed to enable DPM\n"); + /* + * For VPE 6.1.1, still only need to add master's offset, and psp will apply it to slave as well. + * Here use instance 0 as master. + */ if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) { uint32_t f32_offset, f32_cntl; @@ -95,12 +165,7 @@ static int vpe_v6_1_load_microcode(struct amdgpu_vpe *vpe) adev->vpe.cmdbuf_cpu_addr[0] = f32_offset; adev->vpe.cmdbuf_cpu_addr[1] = f32_cntl; - amdgpu_vpe_psp_update_sram(adev); - - /* Config DPM */ - amdgpu_vpe_configure_dpm(vpe); - - return 0; + return amdgpu_vpe_psp_update_sram(adev); } vpe_hdr = (const struct vpe_firmware_header_v1_0 *)adev->vpe.fw->data; @@ -114,26 +179,25 @@ static int vpe_v6_1_load_microcode(struct amdgpu_vpe *vpe) vpe_v6_1_halt(vpe, true); - for (i = 0; i < 2; i++) { - if (i > 0) - WREG32(vpe_get_reg_offset(vpe, 0, regVPEC_UCODE_ADDR), VPE_THREAD1_UCODE_OFFSET); - else - WREG32(vpe_get_reg_offset(vpe, 0, regVPEC_UCODE_ADDR), 0); - - data = (const __le32 *)(adev->vpe.fw->data + ucode_offset[i]); - size_dw = ucode_size[i] / sizeof(__le32); - - while (size_dw--) { - if (amdgpu_emu_mode && size_dw % 500 == 0) - msleep(1); - WREG32(vpe_get_reg_offset(vpe, 0, regVPEC_UCODE_DATA), le32_to_cpup(data++)); + for (j = 0; j < vpe->num_instances; j++) { + for (i = 0; i < 2; i++) { + if (i > 0) + WREG32(vpe_get_reg_offset(vpe, j, regVPEC_UCODE_ADDR), VPE_THREAD1_UCODE_OFFSET); + else + WREG32(vpe_get_reg_offset(vpe, j, regVPEC_UCODE_ADDR), 0); + + data = (const __le32 *)(adev->vpe.fw->data + ucode_offset[i]); + size_dw = ucode_size[i] / sizeof(__le32); + + while (size_dw--) { + if (amdgpu_emu_mode && size_dw % 500 == 0) + msleep(1); + WREG32(vpe_get_reg_offset(vpe, j, regVPEC_UCODE_DATA), le32_to_cpup(data++)); + } } - } vpe_v6_1_halt(vpe, false); - /* Config DPM */ - amdgpu_vpe_configure_dpm(vpe); return 0; } @@ -142,68 +206,68 @@ static int vpe_v6_1_ring_start(struct amdgpu_vpe *vpe) { struct amdgpu_ring *ring = &vpe->ring; struct amdgpu_device *adev = ring->adev; - uint32_t rb_bufsz, rb_cntl; - uint32_t ib_cntl; uint32_t doorbell, doorbell_offset; + uint32_t rb_bufsz, rb_cntl; + uint32_t ib_cntl, i; int ret; - rb_bufsz = order_base_2(ring->ring_size / 4); - rb_cntl = RREG32(vpe_get_reg_offset(vpe, 0, regVPEC_QUEUE0_RB_CNTL)); - rb_cntl = REG_SET_FIELD(rb_cntl, VPEC_QUEUE0_RB_CNTL, RB_SIZE, rb_bufsz); - rb_cntl = REG_SET_FIELD(rb_cntl, VPEC_QUEUE0_RB_CNTL, RB_PRIV, 1); - rb_cntl = REG_SET_FIELD(rb_cntl, VPEC_QUEUE0_RB_CNTL, RB_VMID, 0); - WREG32(vpe_get_reg_offset(vpe, 0, regVPEC_QUEUE0_RB_CNTL), rb_cntl); - - WREG32(vpe_get_reg_offset(vpe, 0, regVPEC_QUEUE0_RB_RPTR), 0); - WREG32(vpe_get_reg_offset(vpe, 0, regVPEC_QUEUE0_RB_RPTR_HI), 0); - WREG32(vpe_get_reg_offset(vpe, 0, regVPEC_QUEUE0_RB_WPTR), 0); - WREG32(vpe_get_reg_offset(vpe, 0, regVPEC_QUEUE0_RB_WPTR_HI), 0); - - WREG32(vpe_get_reg_offset(vpe, 0, regVPEC_QUEUE0_RB_RPTR_ADDR_LO), - lower_32_bits(ring->rptr_gpu_addr) & 0xFFFFFFFC); - WREG32(vpe_get_reg_offset(vpe, 0, regVPEC_QUEUE0_RB_RPTR_ADDR_HI), - upper_32_bits(ring->rptr_gpu_addr) & 0xFFFFFFFF); - - WREG32(vpe_get_reg_offset(vpe, 0, regVPEC_QUEUE0_RB_BASE), ring->gpu_addr >> 8); - WREG32(vpe_get_reg_offset(vpe, 0, regVPEC_QUEUE0_RB_BASE_HI), ring->gpu_addr >> 40); - - ring->wptr = 0; - - /* before programing wptr to a less value, need set minor_ptr_update first */ - WREG32(vpe_get_reg_offset(vpe, 0, regVPEC_QUEUE0_MINOR_PTR_UPDATE), 1); - - WREG32(vpe_get_reg_offset(vpe, 0, regVPEC_QUEUE0_RB_WPTR), lower_32_bits(ring->wptr) << 2); - WREG32(vpe_get_reg_offset(vpe, 0, regVPEC_QUEUE0_RB_WPTR_HI), upper_32_bits(ring->wptr) << 2); - - /* set minor_ptr_update to 0 after wptr programed */ - WREG32(vpe_get_reg_offset(vpe, 0, regVPEC_QUEUE0_MINOR_PTR_UPDATE), 0); - - doorbell = RREG32(vpe_get_reg_offset(vpe, 0, regVPEC_QUEUE0_DOORBELL)); - doorbell_offset = RREG32(vpe_get_reg_offset(vpe, 0, regVPEC_QUEUE0_DOORBELL_OFFSET)); - - doorbell = REG_SET_FIELD(doorbell, VPEC_QUEUE0_DOORBELL, ENABLE, ring->use_doorbell ? 1 : 0); - doorbell_offset = REG_SET_FIELD(doorbell_offset, VPEC_QUEUE0_DOORBELL_OFFSET, OFFSET, ring->doorbell_index); - - WREG32(vpe_get_reg_offset(vpe, 0, regVPEC_QUEUE0_DOORBELL), doorbell); - WREG32(vpe_get_reg_offset(vpe, 0, regVPEC_QUEUE0_DOORBELL_OFFSET), doorbell_offset); - - adev->nbio.funcs->vpe_doorbell_range(adev, 0, ring->use_doorbell, ring->doorbell_index, 2); - - rb_cntl = REG_SET_FIELD(rb_cntl, VPEC_QUEUE0_RB_CNTL, RPTR_WRITEBACK_ENABLE, 1); - rb_cntl = REG_SET_FIELD(rb_cntl, VPEC_QUEUE0_RB_CNTL, RB_ENABLE, 1); - WREG32(vpe_get_reg_offset(vpe, 0, regVPEC_QUEUE0_RB_CNTL), rb_cntl); - - ib_cntl = RREG32(vpe_get_reg_offset(vpe, 0, regVPEC_QUEUE0_IB_CNTL)); - ib_cntl = REG_SET_FIELD(ib_cntl, VPEC_QUEUE0_IB_CNTL, IB_ENABLE, 1); - WREG32(vpe_get_reg_offset(vpe, 0, regVPEC_QUEUE0_IB_CNTL), ib_cntl); - - ring->sched.ready = true; + for (i = 0; i < vpe->num_instances; i++) { + /* Set ring buffer size in dwords */ + rb_bufsz = order_base_2(ring->ring_size / 4); + rb_cntl = RREG32(vpe_get_reg_offset(vpe, i, regVPEC_QUEUE0_RB_CNTL)); + rb_cntl = REG_SET_FIELD(rb_cntl, VPEC_QUEUE0_RB_CNTL, RB_SIZE, rb_bufsz); + rb_cntl = REG_SET_FIELD(rb_cntl, VPEC_QUEUE0_RB_CNTL, RB_PRIV, 1); + rb_cntl = REG_SET_FIELD(rb_cntl, VPEC_QUEUE0_RB_CNTL, RB_VMID, 0); + WREG32(vpe_get_reg_offset(vpe, i, regVPEC_QUEUE0_RB_CNTL), rb_cntl); + + /* Initialize the ring buffer's read and write pointers */ + WREG32(vpe_get_reg_offset(vpe, i, regVPEC_QUEUE0_RB_RPTR), 0); + WREG32(vpe_get_reg_offset(vpe, i, regVPEC_QUEUE0_RB_RPTR_HI), 0); + WREG32(vpe_get_reg_offset(vpe, i, regVPEC_QUEUE0_RB_WPTR), 0); + WREG32(vpe_get_reg_offset(vpe, i, regVPEC_QUEUE0_RB_WPTR_HI), 0); + + /* set the wb address whether it's enabled or not */ + WREG32(vpe_get_reg_offset(vpe, i, regVPEC_QUEUE0_RB_RPTR_ADDR_LO), + lower_32_bits(ring->rptr_gpu_addr) & 0xFFFFFFFC); + WREG32(vpe_get_reg_offset(vpe, i, regVPEC_QUEUE0_RB_RPTR_ADDR_HI), + upper_32_bits(ring->rptr_gpu_addr) & 0xFFFFFFFF); + + rb_cntl = REG_SET_FIELD(rb_cntl, VPEC_QUEUE0_RB_CNTL, RPTR_WRITEBACK_ENABLE, 1); + + WREG32(vpe_get_reg_offset(vpe, i, regVPEC_QUEUE0_RB_BASE), ring->gpu_addr >> 8); + WREG32(vpe_get_reg_offset(vpe, i, regVPEC_QUEUE0_RB_BASE_HI), ring->gpu_addr >> 40); + + ring->wptr = 0; + + /* before programing wptr to a less value, need set minor_ptr_update first */ + WREG32(vpe_get_reg_offset(vpe, i, regVPEC_QUEUE0_MINOR_PTR_UPDATE), 1); + WREG32(vpe_get_reg_offset(vpe, i, regVPEC_QUEUE0_RB_WPTR), lower_32_bits(ring->wptr) << 2); + WREG32(vpe_get_reg_offset(vpe, i, regVPEC_QUEUE0_RB_WPTR_HI), upper_32_bits(ring->wptr) << 2); + /* set minor_ptr_update to 0 after wptr programed */ + WREG32(vpe_get_reg_offset(vpe, i, regVPEC_QUEUE0_MINOR_PTR_UPDATE), 0); + + doorbell_offset = RREG32(vpe_get_reg_offset(vpe, i, regVPEC_QUEUE0_DOORBELL_OFFSET)); + doorbell_offset = REG_SET_FIELD(doorbell_offset, VPEC_QUEUE0_DOORBELL_OFFSET, OFFSET, ring->doorbell_index + i*4); + WREG32(vpe_get_reg_offset(vpe, i, regVPEC_QUEUE0_DOORBELL_OFFSET), doorbell_offset); + + doorbell = RREG32(vpe_get_reg_offset(vpe, i, regVPEC_QUEUE0_DOORBELL)); + doorbell = REG_SET_FIELD(doorbell, VPEC_QUEUE0_DOORBELL, ENABLE, ring->use_doorbell ? 1 : 0); + WREG32(vpe_get_reg_offset(vpe, i, regVPEC_QUEUE0_DOORBELL), doorbell); + + adev->nbio.funcs->vpe_doorbell_range(adev, i, ring->use_doorbell, ring->doorbell_index + i*4, 4); + + rb_cntl = REG_SET_FIELD(rb_cntl, VPEC_QUEUE0_RB_CNTL, RPTR_WRITEBACK_ENABLE, 1); + rb_cntl = REG_SET_FIELD(rb_cntl, VPEC_QUEUE0_RB_CNTL, RB_ENABLE, 1); + WREG32(vpe_get_reg_offset(vpe, i, regVPEC_QUEUE0_RB_CNTL), rb_cntl); + + ib_cntl = RREG32(vpe_get_reg_offset(vpe, i, regVPEC_QUEUE0_IB_CNTL)); + ib_cntl = REG_SET_FIELD(ib_cntl, VPEC_QUEUE0_IB_CNTL, IB_ENABLE, 1); + WREG32(vpe_get_reg_offset(vpe, i, regVPEC_QUEUE0_IB_CNTL), ib_cntl); + } ret = amdgpu_ring_test_helper(ring); - if (ret) { - ring->sched.ready = false; + if (ret) return ret; - } return 0; } @@ -211,17 +275,30 @@ static int vpe_v6_1_ring_start(struct amdgpu_vpe *vpe) static int vpe_v_6_1_ring_stop(struct amdgpu_vpe *vpe) { struct amdgpu_device *adev = vpe->ring.adev; - uint32_t queue_reset; + uint32_t queue_reset, i; int ret; - queue_reset = RREG32(vpe_get_reg_offset(vpe, 0, regVPEC_QUEUE_RESET_REQ)); - queue_reset = REG_SET_FIELD(queue_reset, VPEC_QUEUE_RESET_REQ, QUEUE0_RESET, 1); - WREG32(vpe_get_reg_offset(vpe, 0, regVPEC_QUEUE_RESET_REQ), queue_reset); + for (i = 0; i < vpe->num_instances; i++) { + if (amdgpu_ip_version(adev, VPE_HWIP, 0) == IP_VERSION(6, 1, 1)) + queue_reset = RREG32(vpe_get_reg_offset(vpe, i, regVPEC_QUEUE_RESET_REQ_6_1_1)); + else + queue_reset = RREG32(vpe_get_reg_offset(vpe, i, regVPEC_QUEUE_RESET_REQ)); + + queue_reset = REG_SET_FIELD(queue_reset, VPEC_QUEUE_RESET_REQ, QUEUE0_RESET, 1); + + if (amdgpu_ip_version(adev, VPE_HWIP, 0) == IP_VERSION(6, 1, 1)) { + WREG32(vpe_get_reg_offset(vpe, i, regVPEC_QUEUE_RESET_REQ_6_1_1), queue_reset); + ret = SOC15_WAIT_ON_RREG(VPE, i, regVPEC_QUEUE_RESET_REQ_6_1_1, 0, + VPEC_QUEUE_RESET_REQ__QUEUE0_RESET_MASK); + } else { + WREG32(vpe_get_reg_offset(vpe, i, regVPEC_QUEUE_RESET_REQ), queue_reset); + ret = SOC15_WAIT_ON_RREG(VPE, i, regVPEC_QUEUE_RESET_REQ, 0, + VPEC_QUEUE_RESET_REQ__QUEUE0_RESET_MASK); + } - ret = SOC15_WAIT_ON_RREG(VPE, 0, regVPEC_QUEUE_RESET_REQ, 0, - VPEC_QUEUE_RESET_REQ__QUEUE0_RESET_MASK); - if (ret) - dev_err(adev->dev, "VPE queue reset failed\n"); + if (ret) + dev_err(adev->dev, "VPE queue reset failed\n"); + } vpe->ring.sched.ready = false; @@ -236,10 +313,18 @@ static int vpe_v6_1_set_trap_irq_state(struct amdgpu_device *adev, struct amdgpu_vpe *vpe = &adev->vpe; uint32_t vpe_cntl; - vpe_cntl = RREG32(vpe_get_reg_offset(vpe, 0, regVPEC_CNTL)); + if (amdgpu_ip_version(adev, VPE_HWIP, 0) == IP_VERSION(6, 1, 1)) + vpe_cntl = RREG32(vpe_get_reg_offset(vpe, 0, regVPEC_CNTL_6_1_1)); + else + vpe_cntl = RREG32(vpe_get_reg_offset(vpe, 0, regVPEC_CNTL)); + vpe_cntl = REG_SET_FIELD(vpe_cntl, VPEC_CNTL, TRAP_ENABLE, state == AMDGPU_IRQ_STATE_ENABLE ? 1 : 0); - WREG32(vpe_get_reg_offset(vpe, 0, regVPEC_CNTL), vpe_cntl); + + if (amdgpu_ip_version(adev, VPE_HWIP, 0) == IP_VERSION(6, 1, 1)) + WREG32(vpe_get_reg_offset(vpe, 0, regVPEC_CNTL_6_1_1), vpe_cntl); + else + WREG32(vpe_get_reg_offset(vpe, 0, regVPEC_CNTL), vpe_cntl); return 0; } @@ -264,13 +349,19 @@ static int vpe_v6_1_process_trap_irq(struct amdgpu_device *adev, static int vpe_v6_1_set_regs(struct amdgpu_vpe *vpe) { + struct amdgpu_device *adev = container_of(vpe, struct amdgpu_device, vpe); + vpe->regs.queue0_rb_rptr_lo = regVPEC_QUEUE0_RB_RPTR; vpe->regs.queue0_rb_rptr_hi = regVPEC_QUEUE0_RB_RPTR_HI; vpe->regs.queue0_rb_wptr_lo = regVPEC_QUEUE0_RB_WPTR; vpe->regs.queue0_rb_wptr_hi = regVPEC_QUEUE0_RB_WPTR_HI; vpe->regs.queue0_preempt = regVPEC_QUEUE0_PREEMPT; - vpe->regs.dpm_enable = regVPEC_PUB_DUMMY2; + if (amdgpu_ip_version(adev, VPE_HWIP, 0) == IP_VERSION(6, 1, 1)) + vpe->regs.dpm_enable = regVPEC_PUB_DUMMY2_6_1_1; + else + vpe->regs.dpm_enable = regVPEC_PUB_DUMMY2; + vpe->regs.dpm_pratio = regVPEC_QUEUE6_DUMMY4; vpe->regs.dpm_request_interval = regVPEC_QUEUE5_DUMMY3; vpe->regs.dpm_decision_threshold = regVPEC_QUEUE5_DUMMY4; |