diff options
Diffstat (limited to 'drivers/gpu/drm/amd/amdgpu')
239 files changed, 15362 insertions, 3921 deletions
diff --git a/drivers/gpu/drm/amd/amdgpu/Kconfig b/drivers/gpu/drm/amd/amdgpu/Kconfig index 22d88f8ef527..b0365cc1374e 100644 --- a/drivers/gpu/drm/amd/amdgpu/Kconfig +++ b/drivers/gpu/drm/amd/amdgpu/Kconfig @@ -2,13 +2,15 @@ config DRM_AMDGPU tristate "AMD GPU" - depends on DRM && PCI && MMU + depends on DRM + depends on DRM_DISPLAY_DP_HELPER + depends on DRM_DISPLAY_HDCP_HELPER + depends on DRM_DISPLAY_HDMI_HELPER + depends on DRM_DISPLAY_HELPER + depends on MMU + depends on PCI depends on !UML select FW_LOADER - select DRM_DISPLAY_DP_HELPER - select DRM_DISPLAY_HDMI_HELPER - select DRM_DISPLAY_HDCP_HELPER - select DRM_DISPLAY_HELPER select DRM_KMS_HELPER select DRM_SCHED select DRM_TTM diff --git a/drivers/gpu/drm/amd/amdgpu/Makefile b/drivers/gpu/drm/amd/amdgpu/Makefile index d58e74ae8ade..1f6b56ec99f6 100644 --- a/drivers/gpu/drm/amd/amdgpu/Makefile +++ b/drivers/gpu/drm/amd/amdgpu/Makefile @@ -70,7 +70,8 @@ amdgpu-y += amdgpu_device.o amdgpu_doorbell_mgr.o amdgpu_kms.o \ amdgpu_cs.o amdgpu_bios.o amdgpu_benchmark.o \ atombios_dp.o amdgpu_afmt.o amdgpu_trace_points.o \ atombios_encoders.o amdgpu_sa.o atombios_i2c.o \ - amdgpu_dma_buf.o amdgpu_vm.o amdgpu_vm_pt.o amdgpu_ib.o amdgpu_pll.o \ + amdgpu_dma_buf.o amdgpu_vm.o amdgpu_vm_pt.o amdgpu_vm_tlb_fence.o \ + amdgpu_ib.o amdgpu_pll.o \ amdgpu_ucode.o amdgpu_bo_list.o amdgpu_ctx.o amdgpu_sync.o \ amdgpu_gtt_mgr.o amdgpu_preempt_mgr.o amdgpu_vram_mgr.o amdgpu_virt.o \ amdgpu_atomfirmware.o amdgpu_vf_error.o amdgpu_sched.o \ @@ -80,7 +81,7 @@ amdgpu-y += amdgpu_device.o amdgpu_doorbell_mgr.o amdgpu_kms.o \ amdgpu_umc.o smu_v11_0_i2c.o amdgpu_fru_eeprom.o amdgpu_rap.o \ amdgpu_fw_attestation.o amdgpu_securedisplay.o \ amdgpu_eeprom.o amdgpu_mca.o amdgpu_psp_ta.o amdgpu_lsdma.o \ - amdgpu_ring_mux.o amdgpu_xcp.o + amdgpu_ring_mux.o amdgpu_xcp.o amdgpu_seq64.o amdgpu_aca.o amdgpu_dev_coredump.o amdgpu-$(CONFIG_PROC_FS) += amdgpu_fdinfo.o @@ -98,13 +99,14 @@ amdgpu-y += \ vega20_reg_init.o nbio_v7_4.o nbio_v2_3.o nv.o arct_reg_init.o mxgpu_nv.o \ nbio_v7_2.o hdp_v4_0.o hdp_v5_0.o aldebaran_reg_init.o aldebaran.o soc21.o \ sienna_cichlid.o smu_v13_0_10.o nbio_v4_3.o hdp_v6_0.o nbio_v7_7.o hdp_v5_2.o lsdma_v6_0.o \ - nbio_v7_9.o aqua_vanjaram.o nbio_v7_11.o + nbio_v7_9.o aqua_vanjaram.o nbio_v7_11.o lsdma_v7_0.o hdp_v7_0.o nbif_v6_3_1.o # add DF block amdgpu-y += \ df_v1_7.o \ df_v3_6.o \ - df_v4_3.o + df_v4_3.o \ + df_v4_6_2.o # add GMC block amdgpu-y += \ @@ -131,7 +133,8 @@ amdgpu-y += \ vega20_ih.o \ navi10_ih.o \ ih_v6_0.o \ - ih_v6_1.o + ih_v6_1.o \ + ih_v7_0.o # add PSP block amdgpu-y += \ @@ -142,7 +145,8 @@ amdgpu-y += \ psp_v11_0_8.o \ psp_v12_0.o \ psp_v13_0.o \ - psp_v13_0_4.o + psp_v13_0_4.o \ + psp_v14_0.o # add DCE block amdgpu-y += \ @@ -207,6 +211,7 @@ amdgpu-y += \ vcn_v4_0.o \ vcn_v4_0_3.o \ vcn_v4_0_5.o \ + vcn_v5_0_0.o \ amdgpu_jpeg.o \ jpeg_v1_0.o \ jpeg_v2_0.o \ @@ -214,7 +219,8 @@ amdgpu-y += \ jpeg_v3_0.o \ jpeg_v4_0.o \ jpeg_v4_0_3.o \ - jpeg_v4_0_5.o + jpeg_v4_0_5.o \ + jpeg_v5_0_0.o # add VPE block amdgpu-y += \ @@ -232,7 +238,8 @@ amdgpu-y += \ athub_v1_0.o \ athub_v2_0.o \ athub_v2_1.o \ - athub_v3_0.o + athub_v3_0.o \ + athub_v4_1_0.o # add SMUIO block amdgpu-y += \ @@ -241,7 +248,8 @@ amdgpu-y += \ smuio_v11_0_6.o \ smuio_v13_0.o \ smuio_v13_0_3.o \ - smuio_v13_0_6.o + smuio_v13_0_6.o \ + smuio_v14_0_2.o # add reset block amdgpu-y += \ diff --git a/drivers/gpu/drm/amd/amdgpu/aldebaran.c b/drivers/gpu/drm/amd/amdgpu/aldebaran.c index 02f4c6f9d4f6..576067d66bb9 100644 --- a/drivers/gpu/drm/amd/amdgpu/aldebaran.c +++ b/drivers/gpu/drm/amd/amdgpu/aldebaran.c @@ -330,6 +330,7 @@ aldebaran_mode2_restore_hwcontext(struct amdgpu_reset_control *reset_ctl, { struct list_head *reset_device_list = reset_context->reset_device_list; struct amdgpu_device *tmp_adev = NULL; + struct amdgpu_ras *con; int r; if (reset_device_list == NULL) @@ -355,7 +356,30 @@ aldebaran_mode2_restore_hwcontext(struct amdgpu_reset_control *reset_ctl, */ amdgpu_register_gpu_instance(tmp_adev); - /* Resume RAS */ + /* Resume RAS, ecc_irq */ + con = amdgpu_ras_get_context(tmp_adev); + if (!amdgpu_sriov_vf(tmp_adev) && con) { + if (tmp_adev->sdma.ras && + tmp_adev->sdma.ras->ras_block.ras_late_init) { + r = tmp_adev->sdma.ras->ras_block.ras_late_init(tmp_adev, + &tmp_adev->sdma.ras->ras_block.ras_comm); + if (r) { + dev_err(tmp_adev->dev, "SDMA failed to execute ras_late_init! ret:%d\n", r); + goto end; + } + } + + if (tmp_adev->gfx.ras && + tmp_adev->gfx.ras->ras_block.ras_late_init) { + r = tmp_adev->gfx.ras->ras_block.ras_late_init(tmp_adev, + &tmp_adev->gfx.ras->ras_block.ras_comm); + if (r) { + dev_err(tmp_adev->dev, "GFX failed to execute ras_late_init! ret:%d\n", r); + goto end; + } + } + } + amdgpu_ras_resume(tmp_adev); /* Update PSP FW topology after reset */ diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h index 17c4872a0287..e0d7f4ee7e16 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h @@ -107,19 +107,20 @@ #include "amdgpu_smuio.h" #include "amdgpu_fdinfo.h" #include "amdgpu_mca.h" +#include "amdgpu_aca.h" #include "amdgpu_ras.h" #include "amdgpu_xcp.h" +#include "amdgpu_seq64.h" +#include "amdgpu_reg_state.h" #define MAX_GPU_INSTANCE 64 -struct amdgpu_gpu_instance -{ +struct amdgpu_gpu_instance { struct amdgpu_device *adev; int mgpu_fan_enabled; }; -struct amdgpu_mgpu_info -{ +struct amdgpu_mgpu_info { struct amdgpu_gpu_instance gpu_ins[MAX_GPU_INSTANCE]; struct mutex mutex; uint32_t num_gpu; @@ -138,8 +139,7 @@ enum amdgpu_ss { AMDGPU_SS_DRV_UNLOAD }; -struct amdgpu_watchdog_timer -{ +struct amdgpu_watchdog_timer { bool timeout_fatal_disable; uint32_t period; /* maxCycles = (1 << period), the number of cycles before a timeout */ }; @@ -194,10 +194,12 @@ extern int amdgpu_emu_mode; extern uint amdgpu_smu_memory_pool_size; extern int amdgpu_smu_pptable_id; extern uint amdgpu_dc_feature_mask; +extern uint amdgpu_freesync_vid_mode; extern uint amdgpu_dc_debug_mask; extern uint amdgpu_dc_visual_confirm; -extern uint amdgpu_dm_abm_level; +extern int amdgpu_dm_abm_level; extern int amdgpu_backlight; +extern int amdgpu_damage_clips; extern struct amdgpu_mgpu_info mgpu_info; extern int amdgpu_ras_enable; extern uint amdgpu_ras_mask; @@ -208,6 +210,7 @@ extern int amdgpu_async_gfx_ring; extern int amdgpu_mcbp; extern int amdgpu_discovery; extern int amdgpu_mes; +extern int amdgpu_mes_log_enable; extern int amdgpu_mes_kiq; extern int amdgpu_noretry; extern int amdgpu_force_asic_type; @@ -248,6 +251,9 @@ extern int amdgpu_umsch_mm; extern int amdgpu_seamless; extern int amdgpu_user_partt_mode; +extern int amdgpu_agp; + +extern int amdgpu_wbrf; #define AMDGPU_VM_MAX_NUM_CTX 4096 #define AMDGPU_SG_THRESHOLD (256*1024*1024) @@ -363,9 +369,6 @@ struct amdgpu_ip_block_version { const struct amd_ip_funcs *funcs; }; -#define HW_REV(_Major, _Minor, _Rev) \ - ((((uint32_t) (_Major)) << 16) | ((uint32_t) (_Minor) << 8) | ((uint32_t) (_Rev))) - struct amdgpu_ip_block { struct amdgpu_ip_block_status status; const struct amdgpu_ip_block_version *version; @@ -470,6 +473,7 @@ struct amdgpu_fpriv { struct amdgpu_vm vm; struct amdgpu_bo_va *prt_va; struct amdgpu_bo_va *csa_va; + struct amdgpu_bo_va *seq64_va; struct mutex bo_list_lock; struct idr bo_list_handles; struct amdgpu_ctx_mgr ctx_mgr; @@ -508,6 +512,31 @@ struct amdgpu_allowed_register_entry { bool grbm_indexed; }; +/** + * enum amd_reset_method - Methods for resetting AMD GPU devices + * + * @AMD_RESET_METHOD_NONE: The device will not be reset. + * @AMD_RESET_LEGACY: Method reserved for SI, CIK and VI ASICs. + * @AMD_RESET_MODE0: Reset the entire ASIC. Not currently available for the + * any device. + * @AMD_RESET_MODE1: Resets all IP blocks on the ASIC (SDMA, GFX, VCN, etc.) + * individually. Suitable only for some discrete GPU, not + * available for all ASICs. + * @AMD_RESET_MODE2: Resets a lesser level of IPs compared to MODE1. Which IPs + * are reset depends on the ASIC. Notably doesn't reset IPs + * shared with the CPU on APUs or the memory controllers (so + * VRAM is not lost). Not available on all ASICs. + * @AMD_RESET_BACO: BACO (Bus Alive, Chip Off) method powers off and on the card + * but without powering off the PCI bus. Suitable only for + * discrete GPUs. + * @AMD_RESET_PCI: Does a full bus reset using core Linux subsystem PCI reset + * and does a secondary bus reset or FLR, depending on what the + * underlying hardware supports. + * + * Methods available for AMD GPU driver for resetting the device. Not all + * methods are suitable for every device. User can override the method using + * module parameter `reset_method`. + */ enum amd_reset_method { AMD_RESET_METHOD_NONE = -1, AMD_RESET_METHOD_LEGACY = 0, @@ -577,7 +606,7 @@ struct amdgpu_asic_funcs { /* PCIe replay counter */ uint64_t (*get_pcie_replay_count)(struct amdgpu_device *adev); /* device supports BACO */ - bool (*supports_baco)(struct amdgpu_device *adev); + int (*supports_baco)(struct amdgpu_device *adev); /* pre asic_init quirks */ void (*pre_asic_init)(struct amdgpu_device *adev); /* enter/exit umd stable pstate */ @@ -587,6 +616,10 @@ struct amdgpu_asic_funcs { const struct amdgpu_video_codecs **codecs); /* encode "> 32bits" smn addressing */ u64 (*encode_ext_smn_addressing)(int ext_id); + + ssize_t (*get_reg_state)(struct amdgpu_device *adev, + enum amdgpu_reg_state reg_state, void *buf, + size_t max_size); }; /* @@ -759,6 +792,7 @@ struct amdgpu_mqd_prop { uint64_t eop_gpu_addr; uint32_t hqd_pipe_priority; uint32_t hqd_queue_priority; + bool allow_tunneling; bool hqd_active; }; @@ -773,6 +807,17 @@ struct amdgpu_mqd { struct amdgpu_reset_domain; struct amdgpu_fru_info; +struct amdgpu_reset_info { + /* reset dump register */ + u32 *reset_dump_reg_list; + u32 *reset_dump_reg_value; + int num_regs; + +#ifdef CONFIG_DEV_COREDUMP + struct amdgpu_coredump_info *coredump_info; +#endif +}; + /* * Non-zero (true) if the GPU has VRAM. Zero (false) otherwise. */ @@ -977,6 +1022,9 @@ struct amdgpu_device { /* GDS */ struct amdgpu_gds gds; + /* for userq and VM fences */ + struct amdgpu_seq64 seq64; + /* KFD */ struct amdgpu_kfd_dev kfd; @@ -998,6 +1046,9 @@ struct amdgpu_device { /* MCA */ struct amdgpu_mca mca; + /* ACA */ + struct amdgpu_aca aca; + struct amdgpu_ip_block ip_blocks[AMDGPU_MAX_IP_NUM]; uint32_t harvest_ip_mask; int num_ip_blocks; @@ -1031,6 +1082,8 @@ struct amdgpu_device { bool in_s3; bool in_s4; bool in_s0ix; + /* indicate amdgpu suspension status */ + bool suspend_complete; enum pp_mp1_state mp1_state; struct amdgpu_doorbell_index doorbell_index; @@ -1045,6 +1098,7 @@ struct amdgpu_device { long sdma_timeout; long video_timeout; long compute_timeout; + long psp_timeout; uint64_t unique_id; uint64_t df_perfmon_config_assign_mask[AMDGPU_MAX_DF_PERFMONS]; @@ -1081,10 +1135,7 @@ struct amdgpu_device { struct mutex benchmark_mutex; - /* reset dump register */ - uint32_t *reset_dump_reg_list; - uint32_t *reset_dump_reg_value; - int num_regs; + struct amdgpu_reset_info reset_info; bool scpm_enabled; uint32_t scpm_status; @@ -1100,6 +1151,7 @@ struct amdgpu_device { bool debug_vm; bool debug_largebar; bool debug_disable_soft_recovery; + bool debug_use_vram_fw_buf; }; static inline uint32_t amdgpu_ip_version(const struct amdgpu_device *adev, @@ -1111,14 +1163,12 @@ static inline uint32_t amdgpu_ip_version(const struct amdgpu_device *adev, return adev->ip_versions[ip][inst] & ~0xFFU; } -#ifdef CONFIG_DEV_COREDUMP -struct amdgpu_coredump_info { - struct amdgpu_device *adev; - struct amdgpu_task_info reset_task_info; - struct timespec64 reset_time; - bool reset_vram_lost; -}; -#endif +static inline uint32_t amdgpu_ip_version_full(const struct amdgpu_device *adev, + uint8_t ip, uint8_t inst) +{ + /* This returns full version - major/minor/rev/variant/subrevision */ + return adev->ip_versions[ip][inst]; +} static inline struct amdgpu_device *drm_to_adev(struct drm_device *ddev) { @@ -1156,11 +1206,18 @@ uint32_t amdgpu_device_rreg(struct amdgpu_device *adev, uint32_t reg, uint32_t acc_flags); u32 amdgpu_device_indirect_rreg_ext(struct amdgpu_device *adev, u64 reg_addr); +uint32_t amdgpu_device_xcc_rreg(struct amdgpu_device *adev, + uint32_t reg, uint32_t acc_flags, + uint32_t xcc_id); void amdgpu_device_wreg(struct amdgpu_device *adev, uint32_t reg, uint32_t v, uint32_t acc_flags); void amdgpu_device_indirect_wreg_ext(struct amdgpu_device *adev, u64 reg_addr, u32 reg_data); +void amdgpu_device_xcc_wreg(struct amdgpu_device *adev, + uint32_t reg, uint32_t v, + uint32_t acc_flags, + uint32_t xcc_id); void amdgpu_mm_wreg_mmio_rlc(struct amdgpu_device *adev, uint32_t reg, uint32_t v, uint32_t xcc_id); void amdgpu_mm_wreg8(struct amdgpu_device *adev, uint32_t offset, uint8_t value); @@ -1201,8 +1258,8 @@ int emu_soc_asic_init(struct amdgpu_device *adev); #define RREG32_NO_KIQ(reg) amdgpu_device_rreg(adev, (reg), AMDGPU_REGS_NO_KIQ) #define WREG32_NO_KIQ(reg, v) amdgpu_device_wreg(adev, (reg), (v), AMDGPU_REGS_NO_KIQ) -#define RREG32_KIQ(reg) amdgpu_kiq_rreg(adev, (reg)) -#define WREG32_KIQ(reg, v) amdgpu_kiq_wreg(adev, (reg), (v)) +#define RREG32_KIQ(reg) amdgpu_kiq_rreg(adev, (reg), 0) +#define WREG32_KIQ(reg, v) amdgpu_kiq_wreg(adev, (reg), (v), 0) #define RREG8(reg) amdgpu_mm_rreg8(adev, (reg)) #define WREG8(reg, v) amdgpu_mm_wreg8(adev, (reg), (v)) @@ -1212,6 +1269,8 @@ int emu_soc_asic_init(struct amdgpu_device *adev); #define WREG32(reg, v) amdgpu_device_wreg(adev, (reg), (v), 0) #define REG_SET(FIELD, v) (((v) << FIELD##_SHIFT) & FIELD##_MASK) #define REG_GET(FIELD, v) (((v) << FIELD##_SHIFT) & FIELD##_MASK) +#define RREG32_XCC(reg, inst) amdgpu_device_xcc_rreg(adev, (reg), 0, inst) +#define WREG32_XCC(reg, v, inst) amdgpu_device_xcc_wreg(adev, (reg), (v), 0, inst) #define RREG32_PCIE(reg) adev->pcie_rreg(adev, (reg)) #define WREG32_PCIE(reg, v) adev->pcie_wreg(adev, (reg), (v)) #define RREG32_PCIE_PORT(reg) adev->pciep_rreg(adev, (reg)) @@ -1277,6 +1336,7 @@ int emu_soc_asic_init(struct amdgpu_device *adev); #define WREG32_FIELD_OFFSET(reg, offset, field, val) \ WREG32(mm##reg + offset, (RREG32(mm##reg + offset) & ~REG_FIELD_MASK(reg, field)) | (val) << REG_FIELD_SHIFT(reg, field)) +#define AMDGPU_GET_REG_FIELD(x, h, l) (((x) & GENMASK_ULL(h, l)) >> (l)) /* * BIOS helpers. */ @@ -1334,9 +1394,7 @@ void amdgpu_device_pci_config_reset(struct amdgpu_device *adev); int amdgpu_device_pci_reset(struct amdgpu_device *adev); bool amdgpu_device_need_post(struct amdgpu_device *adev); bool amdgpu_device_seamless_boot_supported(struct amdgpu_device *adev); -bool amdgpu_device_pcie_dynamic_switching_supported(void); bool amdgpu_device_should_use_aspm(struct amdgpu_device *adev); -bool amdgpu_device_aspm_support_quirk(void); void amdgpu_cs_report_moved_bytes(struct amdgpu_device *adev, u64 num_bytes, u64 num_vis_bytes); @@ -1350,7 +1408,8 @@ bool amdgpu_device_supports_atpx(struct drm_device *dev); bool amdgpu_device_supports_px(struct drm_device *dev); bool amdgpu_device_supports_boco(struct drm_device *dev); bool amdgpu_device_supports_smart_shift(struct drm_device *dev); -bool amdgpu_device_supports_baco(struct drm_device *dev); +int amdgpu_device_supports_baco(struct drm_device *dev); +void amdgpu_device_detect_runtime_pm_mode(struct amdgpu_device *adev); bool amdgpu_device_is_peer_accessible(struct amdgpu_device *adev, struct amdgpu_device *peer_adev); int amdgpu_device_baco_enter(struct drm_device *dev); @@ -1497,9 +1556,11 @@ static inline int amdgpu_acpi_smart_shift_update(struct drm_device *dev, #if defined(CONFIG_ACPI) && defined(CONFIG_SUSPEND) bool amdgpu_acpi_is_s3_active(struct amdgpu_device *adev); bool amdgpu_acpi_is_s0ix_active(struct amdgpu_device *adev); +void amdgpu_choose_low_power_state(struct amdgpu_device *adev); #else static inline bool amdgpu_acpi_is_s0ix_active(struct amdgpu_device *adev) { return false; } static inline bool amdgpu_acpi_is_s3_active(struct amdgpu_device *adev) { return false; } +static inline void amdgpu_choose_low_power_state(struct amdgpu_device *adev) { } #endif #if defined(CONFIG_DRM_AMD_DC) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_aca.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_aca.c new file mode 100644 index 000000000000..c50202215f6b --- /dev/null +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_aca.c @@ -0,0 +1,902 @@ +/* + * Copyright 2023 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + */ + +#include <linux/list.h> +#include "amdgpu.h" +#include "amdgpu_aca.h" +#include "amdgpu_ras.h" + +#define ACA_BANK_HWID(type, hwid, mcatype) [ACA_HWIP_TYPE_##type] = {hwid, mcatype} + +typedef int bank_handler_t(struct aca_handle *handle, struct aca_bank *bank, enum aca_smu_type type, void *data); + +struct aca_banks { + int nr_banks; + struct list_head list; +}; + +struct aca_hwip { + int hwid; + int mcatype; +}; + +static struct aca_hwip aca_hwid_mcatypes[ACA_HWIP_TYPE_COUNT] = { + ACA_BANK_HWID(SMU, 0x01, 0x01), + ACA_BANK_HWID(PCS_XGMI, 0x50, 0x00), + ACA_BANK_HWID(UMC, 0x96, 0x00), +}; + +static void aca_banks_init(struct aca_banks *banks) +{ + if (!banks) + return; + + memset(banks, 0, sizeof(*banks)); + INIT_LIST_HEAD(&banks->list); +} + +static int aca_banks_add_bank(struct aca_banks *banks, struct aca_bank *bank) +{ + struct aca_bank_node *node; + + if (!bank) + return -EINVAL; + + node = kvzalloc(sizeof(*node), GFP_KERNEL); + if (!node) + return -ENOMEM; + + memcpy(&node->bank, bank, sizeof(*bank)); + + INIT_LIST_HEAD(&node->node); + list_add_tail(&node->node, &banks->list); + + banks->nr_banks++; + + return 0; +} + +static void aca_banks_release(struct aca_banks *banks) +{ + struct aca_bank_node *node, *tmp; + + list_for_each_entry_safe(node, tmp, &banks->list, node) { + list_del(&node->node); + kvfree(node); + } +} + +static int aca_smu_get_valid_aca_count(struct amdgpu_device *adev, enum aca_smu_type type, u32 *count) +{ + struct amdgpu_aca *aca = &adev->aca; + const struct aca_smu_funcs *smu_funcs = aca->smu_funcs; + + if (!count) + return -EINVAL; + + if (!smu_funcs || !smu_funcs->get_valid_aca_count) + return -EOPNOTSUPP; + + return smu_funcs->get_valid_aca_count(adev, type, count); +} + +static struct aca_regs_dump { + const char *name; + int reg_idx; +} aca_regs[] = { + {"CONTROL", ACA_REG_IDX_CTL}, + {"STATUS", ACA_REG_IDX_STATUS}, + {"ADDR", ACA_REG_IDX_ADDR}, + {"MISC", ACA_REG_IDX_MISC0}, + {"CONFIG", ACA_REG_IDX_CONFG}, + {"IPID", ACA_REG_IDX_IPID}, + {"SYND", ACA_REG_IDX_SYND}, + {"DESTAT", ACA_REG_IDX_DESTAT}, + {"DEADDR", ACA_REG_IDX_DEADDR}, + {"CONTROL_MASK", ACA_REG_IDX_CTL_MASK}, +}; + +static void aca_smu_bank_dump(struct amdgpu_device *adev, int idx, int total, struct aca_bank *bank, + struct ras_query_context *qctx) +{ + u64 event_id = qctx ? qctx->event_id : 0ULL; + int i; + + RAS_EVENT_LOG(adev, event_id, HW_ERR "Accelerator Check Architecture events logged\n"); + /* plus 1 for output format, e.g: ACA[08/08]: xxxx */ + for (i = 0; i < ARRAY_SIZE(aca_regs); i++) + RAS_EVENT_LOG(adev, event_id, HW_ERR "ACA[%02d/%02d].%s=0x%016llx\n", + idx + 1, total, aca_regs[i].name, bank->regs[aca_regs[i].reg_idx]); +} + +static int aca_smu_get_valid_aca_banks(struct amdgpu_device *adev, enum aca_smu_type type, + int start, int count, + struct aca_banks *banks, struct ras_query_context *qctx) +{ + struct amdgpu_aca *aca = &adev->aca; + const struct aca_smu_funcs *smu_funcs = aca->smu_funcs; + struct aca_bank bank; + int i, max_count, ret; + + if (!count) + return 0; + + if (!smu_funcs || !smu_funcs->get_valid_aca_bank) + return -EOPNOTSUPP; + + switch (type) { + case ACA_SMU_TYPE_UE: + max_count = smu_funcs->max_ue_bank_count; + break; + case ACA_SMU_TYPE_CE: + max_count = smu_funcs->max_ce_bank_count; + break; + default: + return -EINVAL; + } + + if (start + count >= max_count) + return -EINVAL; + + count = min_t(int, count, max_count); + for (i = 0; i < count; i++) { + memset(&bank, 0, sizeof(bank)); + ret = smu_funcs->get_valid_aca_bank(adev, type, start + i, &bank); + if (ret) + return ret; + + bank.type = type; + + aca_smu_bank_dump(adev, i, count, &bank, qctx); + + ret = aca_banks_add_bank(banks, &bank); + if (ret) + return ret; + } + + return 0; +} + +static bool aca_bank_hwip_is_matched(struct aca_bank *bank, enum aca_hwip_type type) +{ + + struct aca_hwip *hwip; + int hwid, mcatype; + u64 ipid; + + if (!bank || type == ACA_HWIP_TYPE_UNKNOW) + return false; + + hwip = &aca_hwid_mcatypes[type]; + if (!hwip->hwid) + return false; + + ipid = bank->regs[ACA_REG_IDX_IPID]; + hwid = ACA_REG__IPID__HARDWAREID(ipid); + mcatype = ACA_REG__IPID__MCATYPE(ipid); + + return hwip->hwid == hwid && hwip->mcatype == mcatype; +} + +static bool aca_bank_is_valid(struct aca_handle *handle, struct aca_bank *bank, enum aca_smu_type type) +{ + const struct aca_bank_ops *bank_ops = handle->bank_ops; + + if (!aca_bank_hwip_is_matched(bank, handle->hwip)) + return false; + + if (!bank_ops->aca_bank_is_valid) + return true; + + return bank_ops->aca_bank_is_valid(handle, bank, type, handle->data); +} + +static struct aca_bank_error *new_bank_error(struct aca_error *aerr, struct aca_bank_info *info) +{ + struct aca_bank_error *bank_error; + + bank_error = kvzalloc(sizeof(*bank_error), GFP_KERNEL); + if (!bank_error) + return NULL; + + INIT_LIST_HEAD(&bank_error->node); + memcpy(&bank_error->info, info, sizeof(*info)); + + mutex_lock(&aerr->lock); + list_add_tail(&bank_error->node, &aerr->list); + mutex_unlock(&aerr->lock); + + return bank_error; +} + +static struct aca_bank_error *find_bank_error(struct aca_error *aerr, struct aca_bank_info *info) +{ + struct aca_bank_error *bank_error = NULL; + struct aca_bank_info *tmp_info; + bool found = false; + + mutex_lock(&aerr->lock); + list_for_each_entry(bank_error, &aerr->list, node) { + tmp_info = &bank_error->info; + if (tmp_info->socket_id == info->socket_id && + tmp_info->die_id == info->die_id) { + found = true; + goto out_unlock; + } + } + +out_unlock: + mutex_unlock(&aerr->lock); + + return found ? bank_error : NULL; +} + +static void aca_bank_error_remove(struct aca_error *aerr, struct aca_bank_error *bank_error) +{ + if (!aerr || !bank_error) + return; + + list_del(&bank_error->node); + aerr->nr_errors--; + + kvfree(bank_error); +} + +static struct aca_bank_error *get_bank_error(struct aca_error *aerr, struct aca_bank_info *info) +{ + struct aca_bank_error *bank_error; + + if (!aerr || !info) + return NULL; + + bank_error = find_bank_error(aerr, info); + if (bank_error) + return bank_error; + + return new_bank_error(aerr, info); +} + +int aca_error_cache_log_bank_error(struct aca_handle *handle, struct aca_bank_info *info, + enum aca_error_type type, u64 count) +{ + struct aca_error_cache *error_cache = &handle->error_cache; + struct aca_bank_error *bank_error; + struct aca_error *aerr; + + if (!handle || !info || type >= ACA_ERROR_TYPE_COUNT) + return -EINVAL; + + if (!count) + return 0; + + aerr = &error_cache->errors[type]; + bank_error = get_bank_error(aerr, info); + if (!bank_error) + return -ENOMEM; + + bank_error->count += count; + + return 0; +} + +static int aca_bank_parser(struct aca_handle *handle, struct aca_bank *bank, enum aca_smu_type type) +{ + const struct aca_bank_ops *bank_ops = handle->bank_ops; + + if (!bank) + return -EINVAL; + + if (!bank_ops->aca_bank_parser) + return -EOPNOTSUPP; + + return bank_ops->aca_bank_parser(handle, bank, type, + handle->data); +} + +static int handler_aca_log_bank_error(struct aca_handle *handle, struct aca_bank *bank, + enum aca_smu_type type, void *data) +{ + int ret; + + ret = aca_bank_parser(handle, bank, type); + if (ret) + return ret; + + return 0; +} + +static int aca_dispatch_bank(struct aca_handle_manager *mgr, struct aca_bank *bank, + enum aca_smu_type type, bank_handler_t handler, void *data) +{ + struct aca_handle *handle; + int ret; + + if (list_empty(&mgr->list)) + return 0; + + list_for_each_entry(handle, &mgr->list, node) { + if (!aca_bank_is_valid(handle, bank, type)) + continue; + + ret = handler(handle, bank, type, data); + if (ret) + return ret; + } + + return 0; +} + +static int aca_dispatch_banks(struct aca_handle_manager *mgr, struct aca_banks *banks, + enum aca_smu_type type, bank_handler_t handler, void *data) +{ + struct aca_bank_node *node; + struct aca_bank *bank; + int ret; + + if (!mgr || !banks) + return -EINVAL; + + /* pre check to avoid unnecessary operations */ + if (list_empty(&mgr->list) || list_empty(&banks->list)) + return 0; + + list_for_each_entry(node, &banks->list, node) { + bank = &node->bank; + + ret = aca_dispatch_bank(mgr, bank, type, handler, data); + if (ret) + return ret; + } + + return 0; +} + +static bool aca_bank_should_update(struct amdgpu_device *adev, enum aca_smu_type type) +{ + struct amdgpu_aca *aca = &adev->aca; + bool ret = true; + + /* + * Because the UE Valid MCA count will only be cleared after reset, + * in order to avoid repeated counting of the error count, + * the aca bank is only updated once during the gpu recovery stage. + */ + if (type == ACA_SMU_TYPE_UE) { + if (amdgpu_ras_intr_triggered()) + ret = atomic_cmpxchg(&aca->ue_update_flag, 0, 1) == 0; + else + atomic_set(&aca->ue_update_flag, 0); + } + + return ret; +} + +static int aca_banks_update(struct amdgpu_device *adev, enum aca_smu_type type, + bank_handler_t handler, struct ras_query_context *qctx, void *data) +{ + struct amdgpu_aca *aca = &adev->aca; + struct aca_banks banks; + u32 count = 0; + int ret; + + if (list_empty(&aca->mgr.list)) + return 0; + + if (!aca_bank_should_update(adev, type)) + return 0; + + ret = aca_smu_get_valid_aca_count(adev, type, &count); + if (ret) + return ret; + + if (!count) + return 0; + + aca_banks_init(&banks); + + ret = aca_smu_get_valid_aca_banks(adev, type, 0, count, &banks, qctx); + if (ret) + goto err_release_banks; + + if (list_empty(&banks.list)) { + ret = 0; + goto err_release_banks; + } + + ret = aca_dispatch_banks(&aca->mgr, &banks, type, + handler, data); + if (ret) + goto err_release_banks; + +err_release_banks: + aca_banks_release(&banks); + + return ret; +} + +static int aca_log_aca_error_data(struct aca_bank_error *bank_error, enum aca_error_type type, struct ras_err_data *err_data) +{ + struct aca_bank_info *info; + struct amdgpu_smuio_mcm_config_info mcm_info; + u64 count; + + if (type >= ACA_ERROR_TYPE_COUNT) + return -EINVAL; + + count = bank_error->count; + if (!count) + return 0; + + info = &bank_error->info; + mcm_info.die_id = info->die_id; + mcm_info.socket_id = info->socket_id; + + switch (type) { + case ACA_ERROR_TYPE_UE: + amdgpu_ras_error_statistic_ue_count(err_data, &mcm_info, NULL, count); + break; + case ACA_ERROR_TYPE_CE: + amdgpu_ras_error_statistic_ce_count(err_data, &mcm_info, NULL, count); + break; + case ACA_ERROR_TYPE_DEFERRED: + amdgpu_ras_error_statistic_de_count(err_data, &mcm_info, NULL, count); + break; + default: + break; + } + + return 0; +} + +static int aca_log_aca_error(struct aca_handle *handle, enum aca_error_type type, struct ras_err_data *err_data) +{ + struct aca_error_cache *error_cache = &handle->error_cache; + struct aca_error *aerr = &error_cache->errors[type]; + struct aca_bank_error *bank_error, *tmp; + + mutex_lock(&aerr->lock); + + if (list_empty(&aerr->list)) + goto out_unlock; + + list_for_each_entry_safe(bank_error, tmp, &aerr->list, node) { + aca_log_aca_error_data(bank_error, type, err_data); + aca_bank_error_remove(aerr, bank_error); + } + +out_unlock: + mutex_unlock(&aerr->lock); + + return 0; +} + +static int __aca_get_error_data(struct amdgpu_device *adev, struct aca_handle *handle, enum aca_error_type type, + struct ras_err_data *err_data, struct ras_query_context *qctx) +{ + enum aca_smu_type smu_type; + int ret; + + switch (type) { + case ACA_ERROR_TYPE_UE: + smu_type = ACA_SMU_TYPE_UE; + break; + case ACA_ERROR_TYPE_CE: + case ACA_ERROR_TYPE_DEFERRED: + smu_type = ACA_SMU_TYPE_CE; + break; + default: + return -EINVAL; + } + + /* udpate aca bank to aca source error_cache first */ + ret = aca_banks_update(adev, smu_type, handler_aca_log_bank_error, qctx, NULL); + if (ret) + return ret; + + return aca_log_aca_error(handle, type, err_data); +} + +static bool aca_handle_is_valid(struct aca_handle *handle) +{ + if (!handle->mask || !list_empty(&handle->node)) + return false; + + return true; +} + +int amdgpu_aca_get_error_data(struct amdgpu_device *adev, struct aca_handle *handle, + enum aca_error_type type, struct ras_err_data *err_data, + struct ras_query_context *qctx) +{ + if (!handle || !err_data) + return -EINVAL; + + if (aca_handle_is_valid(handle)) + return -EOPNOTSUPP; + + if (!(BIT(type) & handle->mask)) + return 0; + + return __aca_get_error_data(adev, handle, type, err_data, qctx); +} + +static void aca_error_init(struct aca_error *aerr, enum aca_error_type type) +{ + mutex_init(&aerr->lock); + INIT_LIST_HEAD(&aerr->list); + aerr->type = type; + aerr->nr_errors = 0; +} + +static void aca_init_error_cache(struct aca_handle *handle) +{ + struct aca_error_cache *error_cache = &handle->error_cache; + int type; + + for (type = ACA_ERROR_TYPE_UE; type < ACA_ERROR_TYPE_COUNT; type++) + aca_error_init(&error_cache->errors[type], type); +} + +static void aca_error_fini(struct aca_error *aerr) +{ + struct aca_bank_error *bank_error, *tmp; + + mutex_lock(&aerr->lock); + list_for_each_entry_safe(bank_error, tmp, &aerr->list, node) + aca_bank_error_remove(aerr, bank_error); + + mutex_destroy(&aerr->lock); +} + +static void aca_fini_error_cache(struct aca_handle *handle) +{ + struct aca_error_cache *error_cache = &handle->error_cache; + int type; + + for (type = ACA_ERROR_TYPE_UE; type < ACA_ERROR_TYPE_COUNT; type++) + aca_error_fini(&error_cache->errors[type]); +} + +static int add_aca_handle(struct amdgpu_device *adev, struct aca_handle_manager *mgr, struct aca_handle *handle, + const char *name, const struct aca_info *ras_info, void *data) +{ + memset(handle, 0, sizeof(*handle)); + + handle->adev = adev; + handle->mgr = mgr; + handle->name = name; + handle->hwip = ras_info->hwip; + handle->mask = ras_info->mask; + handle->bank_ops = ras_info->bank_ops; + handle->data = data; + aca_init_error_cache(handle); + + INIT_LIST_HEAD(&handle->node); + list_add_tail(&handle->node, &mgr->list); + mgr->nr_handles++; + + return 0; +} + +static ssize_t aca_sysfs_read(struct device *dev, + struct device_attribute *attr, char *buf) +{ + struct aca_handle *handle = container_of(attr, struct aca_handle, aca_attr); + + /* NOTE: the aca cache will be auto cleared once read, + * So the driver should unify the query entry point, forward request to ras query interface directly */ + return amdgpu_ras_aca_sysfs_read(dev, attr, handle, buf, handle->data); +} + +static int add_aca_sysfs(struct amdgpu_device *adev, struct aca_handle *handle) +{ + struct device_attribute *aca_attr = &handle->aca_attr; + + snprintf(handle->attr_name, sizeof(handle->attr_name) - 1, "aca_%s", handle->name); + aca_attr->show = aca_sysfs_read; + aca_attr->attr.name = handle->attr_name; + aca_attr->attr.mode = S_IRUGO; + sysfs_attr_init(&aca_attr->attr); + + return sysfs_add_file_to_group(&adev->dev->kobj, + &aca_attr->attr, + "ras"); +} + +int amdgpu_aca_add_handle(struct amdgpu_device *adev, struct aca_handle *handle, + const char *name, const struct aca_info *ras_info, void *data) +{ + struct amdgpu_aca *aca = &adev->aca; + int ret; + + if (!amdgpu_aca_is_enabled(adev)) + return 0; + + ret = add_aca_handle(adev, &aca->mgr, handle, name, ras_info, data); + if (ret) + return ret; + + return add_aca_sysfs(adev, handle); +} + +static void remove_aca_handle(struct aca_handle *handle) +{ + struct aca_handle_manager *mgr = handle->mgr; + + aca_fini_error_cache(handle); + list_del(&handle->node); + mgr->nr_handles--; +} + +static void remove_aca_sysfs(struct aca_handle *handle) +{ + struct amdgpu_device *adev = handle->adev; + struct device_attribute *aca_attr = &handle->aca_attr; + + if (adev->dev->kobj.sd) + sysfs_remove_file_from_group(&adev->dev->kobj, + &aca_attr->attr, + "ras"); +} + +void amdgpu_aca_remove_handle(struct aca_handle *handle) +{ + if (!handle || list_empty(&handle->node)) + return; + + remove_aca_sysfs(handle); + remove_aca_handle(handle); +} + +static int aca_manager_init(struct aca_handle_manager *mgr) +{ + INIT_LIST_HEAD(&mgr->list); + mgr->nr_handles = 0; + + return 0; +} + +static void aca_manager_fini(struct aca_handle_manager *mgr) +{ + struct aca_handle *handle, *tmp; + + list_for_each_entry_safe(handle, tmp, &mgr->list, node) + amdgpu_aca_remove_handle(handle); +} + +bool amdgpu_aca_is_enabled(struct amdgpu_device *adev) +{ + return adev->aca.is_enabled; +} + +int amdgpu_aca_init(struct amdgpu_device *adev) +{ + struct amdgpu_aca *aca = &adev->aca; + int ret; + + atomic_set(&aca->ue_update_flag, 0); + + ret = aca_manager_init(&aca->mgr); + if (ret) + return ret; + + return 0; +} + +void amdgpu_aca_fini(struct amdgpu_device *adev) +{ + struct amdgpu_aca *aca = &adev->aca; + + aca_manager_fini(&aca->mgr); + + atomic_set(&aca->ue_update_flag, 0); +} + +int amdgpu_aca_reset(struct amdgpu_device *adev) +{ + amdgpu_aca_fini(adev); + + return amdgpu_aca_init(adev); +} + +void amdgpu_aca_set_smu_funcs(struct amdgpu_device *adev, const struct aca_smu_funcs *smu_funcs) +{ + struct amdgpu_aca *aca = &adev->aca; + + WARN_ON(aca->smu_funcs); + aca->smu_funcs = smu_funcs; +} + +int aca_bank_info_decode(struct aca_bank *bank, struct aca_bank_info *info) +{ + u64 ipid; + u32 instidhi, instidlo; + + if (!bank || !info) + return -EINVAL; + + ipid = bank->regs[ACA_REG_IDX_IPID]; + info->hwid = ACA_REG__IPID__HARDWAREID(ipid); + info->mcatype = ACA_REG__IPID__MCATYPE(ipid); + /* + * Unfied DieID Format: SAASS. A:AID, S:Socket. + * Unfied DieID[4:4] = InstanceId[0:0] + * Unfied DieID[0:3] = InstanceIdHi[0:3] + */ + instidhi = ACA_REG__IPID__INSTANCEIDHI(ipid); + instidlo = ACA_REG__IPID__INSTANCEIDLO(ipid); + info->die_id = ((instidhi >> 2) & 0x03); + info->socket_id = ((instidlo & 0x1) << 2) | (instidhi & 0x03); + + return 0; +} + +static int aca_bank_get_error_code(struct amdgpu_device *adev, struct aca_bank *bank) +{ + struct amdgpu_aca *aca = &adev->aca; + const struct aca_smu_funcs *smu_funcs = aca->smu_funcs; + + if (!smu_funcs || !smu_funcs->parse_error_code) + return -EOPNOTSUPP; + + return smu_funcs->parse_error_code(adev, bank); +} + +int aca_bank_check_error_codes(struct amdgpu_device *adev, struct aca_bank *bank, int *err_codes, int size) +{ + int i, error_code; + + if (!bank || !err_codes) + return -EINVAL; + + error_code = aca_bank_get_error_code(adev, bank); + if (error_code < 0) + return error_code; + + for (i = 0; i < size; i++) { + if (err_codes[i] == error_code) + return 0; + } + + return -EINVAL; +} + +int amdgpu_aca_smu_set_debug_mode(struct amdgpu_device *adev, bool en) +{ + struct amdgpu_aca *aca = &adev->aca; + const struct aca_smu_funcs *smu_funcs = aca->smu_funcs; + + if (!smu_funcs || !smu_funcs->set_debug_mode) + return -EOPNOTSUPP; + + return smu_funcs->set_debug_mode(adev, en); +} + +#if defined(CONFIG_DEBUG_FS) +static int amdgpu_aca_smu_debug_mode_set(void *data, u64 val) +{ + struct amdgpu_device *adev = (struct amdgpu_device *)data; + int ret; + + ret = amdgpu_ras_set_aca_debug_mode(adev, val ? true : false); + if (ret) + return ret; + + dev_info(adev->dev, "amdgpu set smu aca debug mode %s success\n", val ? "on" : "off"); + + return 0; +} + +static void aca_dump_entry(struct seq_file *m, struct aca_bank *bank, enum aca_smu_type type, int idx) +{ + struct aca_bank_info info; + int i, ret; + + ret = aca_bank_info_decode(bank, &info); + if (ret) + return; + + seq_printf(m, "aca entry[%d].type: %s\n", idx, type == ACA_SMU_TYPE_UE ? "UE" : "CE"); + seq_printf(m, "aca entry[%d].info: socketid:%d aid:%d hwid:0x%03x mcatype:0x%04x\n", + idx, info.socket_id, info.die_id, info.hwid, info.mcatype); + + for (i = 0; i < ARRAY_SIZE(aca_regs); i++) + seq_printf(m, "aca entry[%d].regs[%d]: 0x%016llx\n", idx, aca_regs[i].reg_idx, bank->regs[aca_regs[i].reg_idx]); +} + +struct aca_dump_context { + struct seq_file *m; + int idx; +}; + +static int handler_aca_bank_dump(struct aca_handle *handle, struct aca_bank *bank, + enum aca_smu_type type, void *data) +{ + struct aca_dump_context *ctx = (struct aca_dump_context *)data; + + aca_dump_entry(ctx->m, bank, type, ctx->idx++); + + return handler_aca_log_bank_error(handle, bank, type, NULL); +} + +static int aca_dump_show(struct seq_file *m, enum aca_smu_type type) +{ + struct amdgpu_device *adev = (struct amdgpu_device *)m->private; + struct aca_dump_context context = { + .m = m, + .idx = 0, + }; + + return aca_banks_update(adev, type, handler_aca_bank_dump, NULL, (void *)&context); +} + +static int aca_dump_ce_show(struct seq_file *m, void *unused) +{ + return aca_dump_show(m, ACA_SMU_TYPE_CE); +} + +static int aca_dump_ce_open(struct inode *inode, struct file *file) +{ + return single_open(file, aca_dump_ce_show, inode->i_private); +} + +static const struct file_operations aca_ce_dump_debug_fops = { + .owner = THIS_MODULE, + .open = aca_dump_ce_open, + .read = seq_read, + .llseek = seq_lseek, + .release = single_release, +}; + +static int aca_dump_ue_show(struct seq_file *m, void *unused) +{ + return aca_dump_show(m, ACA_SMU_TYPE_UE); +} + +static int aca_dump_ue_open(struct inode *inode, struct file *file) +{ + return single_open(file, aca_dump_ue_show, inode->i_private); +} + +static const struct file_operations aca_ue_dump_debug_fops = { + .owner = THIS_MODULE, + .open = aca_dump_ue_open, + .read = seq_read, + .llseek = seq_lseek, + .release = single_release, +}; + +DEFINE_DEBUGFS_ATTRIBUTE(aca_debug_mode_fops, NULL, amdgpu_aca_smu_debug_mode_set, "%llu\n"); +#endif + +void amdgpu_aca_smu_debugfs_init(struct amdgpu_device *adev, struct dentry *root) +{ +#if defined(CONFIG_DEBUG_FS) + if (!root || adev->ip_versions[MP1_HWIP][0] != IP_VERSION(13, 0, 6)) + return; + + debugfs_create_file("aca_debug_mode", 0200, root, adev, &aca_debug_mode_fops); + debugfs_create_file("aca_ue_dump", 0400, root, adev, &aca_ue_dump_debug_fops); + debugfs_create_file("aca_ce_dump", 0400, root, adev, &aca_ce_dump_debug_fops); +#endif +} diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_aca.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_aca.h new file mode 100644 index 000000000000..5ef6b745f222 --- /dev/null +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_aca.h @@ -0,0 +1,211 @@ +/* + * Copyright 2023 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + */ + +#ifndef __AMDGPU_ACA_H__ +#define __AMDGPU_ACA_H__ + +#include <linux/list.h> + +struct ras_err_data; +struct ras_query_context; + +#define ACA_MAX_REGS_COUNT (16) + +#define ACA_REG_FIELD(x, h, l) (((x) & GENMASK_ULL(h, l)) >> l) +#define ACA_REG__STATUS__VAL(x) ACA_REG_FIELD(x, 63, 63) +#define ACA_REG__STATUS__OVERFLOW(x) ACA_REG_FIELD(x, 62, 62) +#define ACA_REG__STATUS__UC(x) ACA_REG_FIELD(x, 61, 61) +#define ACA_REG__STATUS__EN(x) ACA_REG_FIELD(x, 60, 60) +#define ACA_REG__STATUS__MISCV(x) ACA_REG_FIELD(x, 59, 59) +#define ACA_REG__STATUS__ADDRV(x) ACA_REG_FIELD(x, 58, 58) +#define ACA_REG__STATUS__PCC(x) ACA_REG_FIELD(x, 57, 57) +#define ACA_REG__STATUS__ERRCOREIDVAL(x) ACA_REG_FIELD(x, 56, 56) +#define ACA_REG__STATUS__TCC(x) ACA_REG_FIELD(x, 55, 55) +#define ACA_REG__STATUS__SYNDV(x) ACA_REG_FIELD(x, 53, 53) +#define ACA_REG__STATUS__CECC(x) ACA_REG_FIELD(x, 46, 46) +#define ACA_REG__STATUS__UECC(x) ACA_REG_FIELD(x, 45, 45) +#define ACA_REG__STATUS__DEFERRED(x) ACA_REG_FIELD(x, 44, 44) +#define ACA_REG__STATUS__POISON(x) ACA_REG_FIELD(x, 43, 43) +#define ACA_REG__STATUS__SCRUB(x) ACA_REG_FIELD(x, 40, 40) +#define ACA_REG__STATUS__ERRCOREID(x) ACA_REG_FIELD(x, 37, 32) +#define ACA_REG__STATUS__ADDRLSB(x) ACA_REG_FIELD(x, 29, 24) +#define ACA_REG__STATUS__ERRORCODEEXT(x) ACA_REG_FIELD(x, 21, 16) +#define ACA_REG__STATUS__ERRORCODE(x) ACA_REG_FIELD(x, 15, 0) + +#define ACA_REG__IPID__MCATYPE(x) ACA_REG_FIELD(x, 63, 48) +#define ACA_REG__IPID__INSTANCEIDHI(x) ACA_REG_FIELD(x, 47, 44) +#define ACA_REG__IPID__HARDWAREID(x) ACA_REG_FIELD(x, 43, 32) +#define ACA_REG__IPID__INSTANCEIDLO(x) ACA_REG_FIELD(x, 31, 0) + +#define ACA_REG__MISC0__VALID(x) ACA_REG_FIELD(x, 63, 63) +#define ACA_REG__MISC0__OVRFLW(x) ACA_REG_FIELD(x, 48, 48) +#define ACA_REG__MISC0__ERRCNT(x) ACA_REG_FIELD(x, 43, 32) + +#define ACA_REG__SYND__ERRORINFORMATION(x) ACA_REG_FIELD(x, 17, 0) + +/* NOTE: The following codes refers to the smu header file */ +#define ACA_EXTERROR_CODE_CE 0x3a +#define ACA_EXTERROR_CODE_FAULT 0x3b + +#define ACA_ERROR_UE_MASK BIT_MASK(ACA_ERROR_TYPE_UE) +#define ACA_ERROR_CE_MASK BIT_MASK(ACA_ERROR_TYPE_CE) +#define ACA_ERROR_DEFERRED_MASK BIT_MASK(ACA_ERROR_TYPE_DEFERRED) + +enum aca_reg_idx { + ACA_REG_IDX_CTL = 0, + ACA_REG_IDX_STATUS = 1, + ACA_REG_IDX_ADDR = 2, + ACA_REG_IDX_MISC0 = 3, + ACA_REG_IDX_CONFG = 4, + ACA_REG_IDX_IPID = 5, + ACA_REG_IDX_SYND = 6, + ACA_REG_IDX_DESTAT = 8, + ACA_REG_IDX_DEADDR = 9, + ACA_REG_IDX_CTL_MASK = 10, + ACA_REG_IDX_COUNT = 16, +}; + +enum aca_hwip_type { + ACA_HWIP_TYPE_UNKNOW = -1, + ACA_HWIP_TYPE_PSP = 0, + ACA_HWIP_TYPE_UMC, + ACA_HWIP_TYPE_SMU, + ACA_HWIP_TYPE_PCS_XGMI, + ACA_HWIP_TYPE_COUNT, +}; + +enum aca_error_type { + ACA_ERROR_TYPE_INVALID = -1, + ACA_ERROR_TYPE_UE = 0, + ACA_ERROR_TYPE_CE, + ACA_ERROR_TYPE_DEFERRED, + ACA_ERROR_TYPE_COUNT +}; + +enum aca_smu_type { + ACA_SMU_TYPE_UE = 0, + ACA_SMU_TYPE_CE, + ACA_SMU_TYPE_COUNT, +}; + +struct aca_bank { + enum aca_smu_type type; + u64 regs[ACA_MAX_REGS_COUNT]; +}; + +struct aca_bank_node { + struct aca_bank bank; + struct list_head node; +}; + +struct aca_bank_info { + int die_id; + int socket_id; + int hwid; + int mcatype; +}; + +struct aca_bank_error { + struct list_head node; + struct aca_bank_info info; + u64 count; +}; + +struct aca_error { + struct list_head list; + struct mutex lock; + enum aca_error_type type; + int nr_errors; +}; + +struct aca_handle_manager { + struct list_head list; + int nr_handles; +}; + +struct aca_error_cache { + struct aca_error errors[ACA_ERROR_TYPE_COUNT]; +}; + +struct aca_handle { + struct list_head node; + enum aca_hwip_type hwip; + struct amdgpu_device *adev; + struct aca_handle_manager *mgr; + struct aca_error_cache error_cache; + const struct aca_bank_ops *bank_ops; + struct device_attribute aca_attr; + char attr_name[64]; + const char *name; + u32 mask; + void *data; +}; + +struct aca_bank_ops { + int (*aca_bank_parser)(struct aca_handle *handle, struct aca_bank *bank, enum aca_smu_type type, void *data); + bool (*aca_bank_is_valid)(struct aca_handle *handle, struct aca_bank *bank, enum aca_smu_type type, + void *data); +}; + +struct aca_smu_funcs { + int max_ue_bank_count; + int max_ce_bank_count; + int (*set_debug_mode)(struct amdgpu_device *adev, bool enable); + int (*get_valid_aca_count)(struct amdgpu_device *adev, enum aca_smu_type type, u32 *count); + int (*get_valid_aca_bank)(struct amdgpu_device *adev, enum aca_smu_type type, int idx, struct aca_bank *bank); + int (*parse_error_code)(struct amdgpu_device *adev, struct aca_bank *bank); +}; + +struct amdgpu_aca { + struct aca_handle_manager mgr; + const struct aca_smu_funcs *smu_funcs; + atomic_t ue_update_flag; + bool is_enabled; +}; + +struct aca_info { + enum aca_hwip_type hwip; + const struct aca_bank_ops *bank_ops; + u32 mask; +}; + +int amdgpu_aca_init(struct amdgpu_device *adev); +void amdgpu_aca_fini(struct amdgpu_device *adev); +int amdgpu_aca_reset(struct amdgpu_device *adev); +void amdgpu_aca_set_smu_funcs(struct amdgpu_device *adev, const struct aca_smu_funcs *smu_funcs); +bool amdgpu_aca_is_enabled(struct amdgpu_device *adev); + +int aca_bank_info_decode(struct aca_bank *bank, struct aca_bank_info *info); +int aca_bank_check_error_codes(struct amdgpu_device *adev, struct aca_bank *bank, int *err_codes, int size); + +int amdgpu_aca_add_handle(struct amdgpu_device *adev, struct aca_handle *handle, + const char *name, const struct aca_info *aca_info, void *data); +void amdgpu_aca_remove_handle(struct aca_handle *handle); +int amdgpu_aca_get_error_data(struct amdgpu_device *adev, struct aca_handle *handle, + enum aca_error_type type, struct ras_err_data *err_data, + struct ras_query_context *qctx); +int amdgpu_aca_smu_set_debug_mode(struct amdgpu_device *adev, bool en); +void amdgpu_aca_smu_debugfs_init(struct amdgpu_device *adev, struct dentry *root); +int aca_error_cache_log_bank_error(struct aca_handle *handle, struct aca_bank_info *info, + enum aca_error_type type, u64 count); +#endif diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_acpi.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_acpi.c index 2bca37044ad0..7099ff9cf8c5 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_acpi.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_acpi.c @@ -68,7 +68,7 @@ struct amdgpu_acpi_xcc_info { struct amdgpu_acpi_dev_info { struct list_head list; struct list_head xcc_list; - uint16_t bdf; + uint32_t sbdf; uint16_t supp_xcp_mode; uint16_t xcp_mode; uint16_t mem_mode; @@ -927,7 +927,7 @@ static acpi_status amdgpu_acpi_get_node_id(acpi_handle handle, #endif } -static struct amdgpu_acpi_dev_info *amdgpu_acpi_get_dev(u16 bdf) +static struct amdgpu_acpi_dev_info *amdgpu_acpi_get_dev(u32 sbdf) { struct amdgpu_acpi_dev_info *acpi_dev; @@ -935,14 +935,14 @@ static struct amdgpu_acpi_dev_info *amdgpu_acpi_get_dev(u16 bdf) return NULL; list_for_each_entry(acpi_dev, &amdgpu_acpi_dev_list, list) - if (acpi_dev->bdf == bdf) + if (acpi_dev->sbdf == sbdf) return acpi_dev; return NULL; } static int amdgpu_acpi_dev_init(struct amdgpu_acpi_dev_info **dev_info, - struct amdgpu_acpi_xcc_info *xcc_info, u16 bdf) + struct amdgpu_acpi_xcc_info *xcc_info, u32 sbdf) { struct amdgpu_acpi_dev_info *tmp; union acpi_object *obj; @@ -955,7 +955,7 @@ static int amdgpu_acpi_dev_init(struct amdgpu_acpi_dev_info **dev_info, INIT_LIST_HEAD(&tmp->xcc_list); INIT_LIST_HEAD(&tmp->list); - tmp->bdf = bdf; + tmp->sbdf = sbdf; obj = acpi_evaluate_dsm_typed(xcc_info->handle, &amd_xcc_dsm_guid, 0, AMD_XCC_DSM_GET_SUPP_MODE, NULL, @@ -1007,7 +1007,7 @@ static int amdgpu_acpi_dev_init(struct amdgpu_acpi_dev_info **dev_info, DRM_DEBUG_DRIVER( "New dev(%x): Supported xcp mode: %x curr xcp_mode : %x mem mode : %x, tmr base: %llx tmr size: %llx ", - tmp->bdf, tmp->supp_xcp_mode, tmp->xcp_mode, tmp->mem_mode, + tmp->sbdf, tmp->supp_xcp_mode, tmp->xcp_mode, tmp->mem_mode, tmp->tmr_base, tmp->tmr_size); list_add_tail(&tmp->list, &amdgpu_acpi_dev_list); *dev_info = tmp; @@ -1023,7 +1023,7 @@ out: } static int amdgpu_acpi_get_xcc_info(struct amdgpu_acpi_xcc_info *xcc_info, - u16 *bdf) + u32 *sbdf) { union acpi_object *obj; acpi_status status; @@ -1054,8 +1054,10 @@ static int amdgpu_acpi_get_xcc_info(struct amdgpu_acpi_xcc_info *xcc_info, xcc_info->phy_id = (obj->integer.value >> 32) & 0xFF; /* xcp node of this xcc [47:40] */ xcc_info->xcp_node = (obj->integer.value >> 40) & 0xFF; + /* PF domain of this xcc [31:16] */ + *sbdf = (obj->integer.value) & 0xFFFF0000; /* PF bus/dev/fn of this xcc [63:48] */ - *bdf = (obj->integer.value >> 48) & 0xFFFF; + *sbdf |= (obj->integer.value >> 48) & 0xFFFF; ACPI_FREE(obj); obj = NULL; @@ -1079,7 +1081,7 @@ static int amdgpu_acpi_enumerate_xcc(void) struct acpi_device *acpi_dev; char hid[ACPI_ID_LEN]; int ret, id; - u16 bdf; + u32 sbdf; INIT_LIST_HEAD(&amdgpu_acpi_dev_list); xa_init(&numa_info_xa); @@ -1107,16 +1109,16 @@ static int amdgpu_acpi_enumerate_xcc(void) xcc_info->handle = acpi_device_handle(acpi_dev); acpi_dev_put(acpi_dev); - ret = amdgpu_acpi_get_xcc_info(xcc_info, &bdf); + ret = amdgpu_acpi_get_xcc_info(xcc_info, &sbdf); if (ret) { kfree(xcc_info); continue; } - dev_info = amdgpu_acpi_get_dev(bdf); + dev_info = amdgpu_acpi_get_dev(sbdf); if (!dev_info) - ret = amdgpu_acpi_dev_init(&dev_info, xcc_info, bdf); + ret = amdgpu_acpi_dev_init(&dev_info, xcc_info, sbdf); if (ret == -ENOMEM) return ret; @@ -1136,13 +1138,14 @@ int amdgpu_acpi_get_tmr_info(struct amdgpu_device *adev, u64 *tmr_offset, u64 *tmr_size) { struct amdgpu_acpi_dev_info *dev_info; - u16 bdf; + u32 sbdf; if (!tmr_offset || !tmr_size) return -EINVAL; - bdf = pci_dev_id(adev->pdev); - dev_info = amdgpu_acpi_get_dev(bdf); + sbdf = (pci_domain_nr(adev->pdev->bus) << 16); + sbdf |= pci_dev_id(adev->pdev); + dev_info = amdgpu_acpi_get_dev(sbdf); if (!dev_info) return -ENOENT; @@ -1157,13 +1160,14 @@ int amdgpu_acpi_get_mem_info(struct amdgpu_device *adev, int xcc_id, { struct amdgpu_acpi_dev_info *dev_info; struct amdgpu_acpi_xcc_info *xcc_info; - u16 bdf; + u32 sbdf; if (!numa_info) return -EINVAL; - bdf = pci_dev_id(adev->pdev); - dev_info = amdgpu_acpi_get_dev(bdf); + sbdf = (pci_domain_nr(adev->pdev->bus) << 16); + sbdf |= pci_dev_id(adev->pdev); + dev_info = amdgpu_acpi_get_dev(sbdf); if (!dev_info) return -ENOENT; @@ -1389,14 +1393,11 @@ void amdgpu_acpi_detect(void) struct pci_dev *pdev = NULL; int ret; - while ((pdev = pci_get_class(PCI_CLASS_DISPLAY_VGA << 8, pdev)) != NULL) { - if (!atif->handle) - amdgpu_atif_pci_probe_handle(pdev); - if (!atcs->handle) - amdgpu_atcs_pci_probe_handle(pdev); - } + while ((pdev = pci_get_base_class(PCI_BASE_CLASS_DISPLAY, pdev))) { + if ((pdev->class != PCI_CLASS_DISPLAY_VGA << 8) && + (pdev->class != PCI_CLASS_DISPLAY_OTHER << 8)) + continue; - while ((pdev = pci_get_class(PCI_CLASS_DISPLAY_OTHER << 8, pdev)) != NULL) { if (!atif->handle) amdgpu_atif_pci_probe_handle(pdev); if (!atcs->handle) @@ -1493,6 +1494,9 @@ bool amdgpu_acpi_is_s0ix_active(struct amdgpu_device *adev) if (adev->asic_type < CHIP_RAVEN) return false; + if (!(adev->pm.pp_feature & PP_GFXOFF_MASK)) + return false; + /* * If ACPI_FADT_LOW_POWER_S0 is not set in the FADT, it is generally * risky to do any special firmware-related preparations for entering @@ -1515,4 +1519,22 @@ bool amdgpu_acpi_is_s0ix_active(struct amdgpu_device *adev) #endif /* CONFIG_AMD_PMC */ } +/** + * amdgpu_choose_low_power_state + * + * @adev: amdgpu_device_pointer + * + * Choose the target low power state for the GPU + */ +void amdgpu_choose_low_power_state(struct amdgpu_device *adev) +{ + if (adev->in_runpm) + return; + + if (amdgpu_acpi_is_s0ix_active(adev)) + adev->in_s0ix = true; + else if (amdgpu_acpi_is_s3_active(adev)) + adev->in_s3 = true; +} + #endif /* CONFIG_SUSPEND */ diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c index b8412202a1b0..3b4591f554f1 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c @@ -138,6 +138,30 @@ static void amdgpu_amdkfd_reset_work(struct work_struct *work) amdgpu_device_gpu_recover(adev, NULL, &reset_context); } +static const struct drm_client_funcs kfd_client_funcs = { + .unregister = drm_client_release, +}; + +int amdgpu_amdkfd_drm_client_create(struct amdgpu_device *adev) +{ + int ret; + + if (!adev->kfd.init_complete || adev->kfd.client.dev) + return 0; + + ret = drm_client_init(&adev->ddev, &adev->kfd.client, "kfd", + &kfd_client_funcs); + if (ret) { + dev_err(adev->dev, "Failed to init DRM client: %d\n", + ret); + return ret; + } + + drm_client_register(&adev->kfd.client); + + return 0; +} + void amdgpu_amdkfd_device_init(struct amdgpu_device *adev) { int i; @@ -547,7 +571,7 @@ int amdgpu_amdkfd_get_xgmi_bandwidth_mbytes(struct amdgpu_device *dst, struct amdgpu_device *adev = dst, *peer_adev; int num_links; - if (adev->asic_type != CHIP_ALDEBARAN) + if (amdgpu_ip_version(adev, GC_HWIP, 0) < IP_VERSION(9, 4, 2)) return 0; if (src) @@ -684,10 +708,8 @@ err: void amdgpu_amdkfd_set_compute_idle(struct amdgpu_device *adev, bool idle) { enum amd_powergating_state state = idle ? AMD_PG_STATE_GATE : AMD_PG_STATE_UNGATE; - /* Temporary workaround to fix issues observed in some - * compute applications when GFXOFF is enabled on GFX11. - */ - if (IP_VERSION_MAJ(amdgpu_ip_version(adev, GC_HWIP, 0)) == 11) { + if (IP_VERSION_MAJ(amdgpu_ip_version(adev, GC_HWIP, 0)) == 11 && + ((adev->mes.kiq_version & AMDGPU_MES_VERSION_MASK) <= 64)) { pr_debug("GFXOFF is %s\n", idle ? "enabled" : "disabled"); amdgpu_gfx_off_ctrl(adev, idle); } else if ((IP_VERSION_MAJ(amdgpu_ip_version(adev, GC_HWIP, 0)) == 9) && @@ -710,35 +732,6 @@ bool amdgpu_amdkfd_is_kfd_vmid(struct amdgpu_device *adev, u32 vmid) return false; } -int amdgpu_amdkfd_flush_gpu_tlb_vmid(struct amdgpu_device *adev, - uint16_t vmid) -{ - if (adev->family == AMDGPU_FAMILY_AI) { - int i; - - for_each_set_bit(i, adev->vmhubs_mask, AMDGPU_MAX_VMHUBS) - amdgpu_gmc_flush_gpu_tlb(adev, vmid, i, 0); - } else { - amdgpu_gmc_flush_gpu_tlb(adev, vmid, AMDGPU_GFXHUB(0), 0); - } - - return 0; -} - -int amdgpu_amdkfd_flush_gpu_tlb_pasid(struct amdgpu_device *adev, - uint16_t pasid, - enum TLB_FLUSH_TYPE flush_type, - uint32_t inst) -{ - bool all_hub = false; - - if (adev->family == AMDGPU_FAMILY_AI || - adev->family == AMDGPU_FAMILY_RV) - all_hub = true; - - return amdgpu_gmc_flush_gpu_tlb_pasid(adev, pasid, flush_type, all_hub, inst); -} - bool amdgpu_amdkfd_have_atomics_support(struct amdgpu_device *adev) { return adev->have_atomics_support; @@ -749,9 +742,15 @@ void amdgpu_amdkfd_debug_mem_fence(struct amdgpu_device *adev) amdgpu_device_flush_hdp(adev, NULL); } -void amdgpu_amdkfd_ras_poison_consumption_handler(struct amdgpu_device *adev, bool reset) +bool amdgpu_amdkfd_is_fed(struct amdgpu_device *adev) { - amdgpu_umc_poison_handler(adev, reset); + return amdgpu_ras_get_fed_status(adev); +} + +void amdgpu_amdkfd_ras_poison_consumption_handler(struct amdgpu_device *adev, + enum amdgpu_ras_block block, uint32_t reset) +{ + amdgpu_umc_poison_handler(adev, block, reset); } int amdgpu_amdkfd_send_close_event_drain_irq(struct amdgpu_device *adev, @@ -770,12 +769,20 @@ int amdgpu_amdkfd_send_close_event_drain_irq(struct amdgpu_device *adev, return 0; } -bool amdgpu_amdkfd_ras_query_utcl2_poison_status(struct amdgpu_device *adev) +bool amdgpu_amdkfd_ras_query_utcl2_poison_status(struct amdgpu_device *adev, + int hub_inst, int hub_type) { - if (adev->gfx.ras && adev->gfx.ras->query_utcl2_poison_status) - return adev->gfx.ras->query_utcl2_poison_status(adev); - else - return false; + if (!hub_type) { + if (adev->gfxhub.funcs->query_utcl2_poison_status) + return adev->gfxhub.funcs->query_utcl2_poison_status(adev, hub_inst); + else + return false; + } else { + if (adev->mmhub.funcs->query_utcl2_poison_status) + return adev->mmhub.funcs->query_utcl2_poison_status(adev, hub_inst); + else + return false; + } } int amdgpu_amdkfd_check_and_lock_kfd(struct amdgpu_device *adev) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h index dac983da961d..c51954c9052e 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h @@ -33,6 +33,7 @@ #include <linux/mmu_notifier.h> #include <linux/memremap.h> #include <kgd_kfd_interface.h> +#include <drm/drm_client.h> #include "amdgpu_sync.h" #include "amdgpu_vm.h" #include "amdgpu_xcp.h" @@ -83,6 +84,7 @@ struct kgd_mem { struct amdgpu_sync sync; + uint32_t gem_handle; bool aql_queue; bool is_imported; }; @@ -105,6 +107,9 @@ struct amdgpu_kfd_dev { /* HMM page migration MEMORY_DEVICE_PRIVATE mapping */ struct dev_pagemap pgmap; + + /* Client for KFD BO GEM handle allocations */ + struct drm_client_dev client; }; enum kgd_engine_type { @@ -162,11 +167,6 @@ int amdgpu_amdkfd_submit_ib(struct amdgpu_device *adev, uint32_t *ib_cmd, uint32_t ib_len); void amdgpu_amdkfd_set_compute_idle(struct amdgpu_device *adev, bool idle); bool amdgpu_amdkfd_have_atomics_support(struct amdgpu_device *adev); -int amdgpu_amdkfd_flush_gpu_tlb_vmid(struct amdgpu_device *adev, - uint16_t vmid); -int amdgpu_amdkfd_flush_gpu_tlb_pasid(struct amdgpu_device *adev, - uint16_t pasid, enum TLB_FLUSH_TYPE flush_type, - uint32_t inst); bool amdgpu_amdkfd_is_kfd_vmid(struct amdgpu_device *adev, u32 vmid); @@ -182,6 +182,8 @@ int amdgpu_queue_mask_bit_to_set_resource_bit(struct amdgpu_device *adev, struct amdgpu_amdkfd_fence *amdgpu_amdkfd_fence_create(u64 context, struct mm_struct *mm, struct svm_range_bo *svm_bo); + +int amdgpu_amdkfd_drm_client_create(struct amdgpu_device *adev); #if defined(CONFIG_DEBUG_FS) int kfd_debugfs_kfd_mem_limits(struct seq_file *m, void *data); #endif @@ -191,6 +193,9 @@ struct amdgpu_amdkfd_fence *to_amdgpu_amdkfd_fence(struct dma_fence *f); int amdgpu_amdkfd_remove_fence_on_pt_pd_bos(struct amdgpu_bo *bo); int amdgpu_amdkfd_evict_userptr(struct mmu_interval_notifier *mni, unsigned long cur_seq, struct kgd_mem *mem); +int amdgpu_amdkfd_bo_validate_and_fence(struct amdgpu_bo *bo, + uint32_t domain, + struct dma_fence *fence); #else static inline bool amdkfd_fence_check_mm(struct dma_fence *f, struct mm_struct *mm) @@ -216,6 +221,13 @@ int amdgpu_amdkfd_evict_userptr(struct mmu_interval_notifier *mni, { return 0; } +static inline +int amdgpu_amdkfd_bo_validate_and_fence(struct amdgpu_bo *bo, + uint32_t domain, + struct dma_fence *fence) +{ + return 0; +} #endif /* Shared API */ int amdgpu_amdkfd_alloc_gtt_mem(struct amdgpu_device *adev, size_t size, @@ -301,35 +313,36 @@ int amdgpu_amdkfd_gpuvm_map_memory_to_gpu(struct amdgpu_device *adev, struct kgd_mem *mem, void *drm_priv); int amdgpu_amdkfd_gpuvm_unmap_memory_from_gpu( struct amdgpu_device *adev, struct kgd_mem *mem, void *drm_priv); -void amdgpu_amdkfd_gpuvm_dmaunmap_mem(struct kgd_mem *mem, void *drm_priv); +int amdgpu_amdkfd_gpuvm_dmaunmap_mem(struct kgd_mem *mem, void *drm_priv); int amdgpu_amdkfd_gpuvm_sync_memory( struct amdgpu_device *adev, struct kgd_mem *mem, bool intr); int amdgpu_amdkfd_gpuvm_map_gtt_bo_to_kernel(struct kgd_mem *mem, void **kptr, uint64_t *size); void amdgpu_amdkfd_gpuvm_unmap_gtt_bo_from_kernel(struct kgd_mem *mem); -int amdgpu_amdkfd_map_gtt_bo_to_gart(struct amdgpu_device *adev, struct amdgpu_bo *bo); +int amdgpu_amdkfd_map_gtt_bo_to_gart(struct amdgpu_bo *bo); int amdgpu_amdkfd_gpuvm_restore_process_bos(void *process_info, - struct dma_fence **ef); + struct dma_fence __rcu **ef); int amdgpu_amdkfd_gpuvm_get_vm_fault_info(struct amdgpu_device *adev, struct kfd_vm_fault_info *info); -int amdgpu_amdkfd_gpuvm_import_dmabuf(struct amdgpu_device *adev, - struct dma_buf *dmabuf, - uint64_t va, void *drm_priv, - struct kgd_mem **mem, uint64_t *size, - uint64_t *mmap_offset); +int amdgpu_amdkfd_gpuvm_import_dmabuf_fd(struct amdgpu_device *adev, int fd, + uint64_t va, void *drm_priv, + struct kgd_mem **mem, uint64_t *size, + uint64_t *mmap_offset); int amdgpu_amdkfd_gpuvm_export_dmabuf(struct kgd_mem *mem, struct dma_buf **dmabuf); void amdgpu_amdkfd_debug_mem_fence(struct amdgpu_device *adev); int amdgpu_amdkfd_get_tile_config(struct amdgpu_device *adev, struct tile_config *config); void amdgpu_amdkfd_ras_poison_consumption_handler(struct amdgpu_device *adev, - bool reset); + enum amdgpu_ras_block block, uint32_t reset); +bool amdgpu_amdkfd_is_fed(struct amdgpu_device *adev); bool amdgpu_amdkfd_bo_mapped_to_dev(struct amdgpu_device *adev, struct kgd_mem *mem); void amdgpu_amdkfd_block_mmu_notifications(void *p); int amdgpu_amdkfd_criu_resume(void *p); -bool amdgpu_amdkfd_ras_query_utcl2_poison_status(struct amdgpu_device *adev); +bool amdgpu_amdkfd_ras_query_utcl2_poison_status(struct amdgpu_device *adev, + int hub_inst, int hub_type); int amdgpu_amdkfd_reserve_mem_limit(struct amdgpu_device *adev, uint64_t size, u32 alloc_flag, int8_t xcp_id); void amdgpu_amdkfd_unreserve_mem_limit(struct amdgpu_device *adev, diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_arcturus.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_arcturus.c index 625db444df1c..3a3f3ce09f00 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_arcturus.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_arcturus.c @@ -200,7 +200,7 @@ int kgd_arcturus_hqd_sdma_dump(struct amdgpu_device *adev, #undef HQD_N_REGS #define HQD_N_REGS (19+6+7+10) - *dump = kmalloc_array(HQD_N_REGS * 2, sizeof(uint32_t), GFP_KERNEL); + *dump = kmalloc_array(HQD_N_REGS, sizeof(**dump), GFP_KERNEL); if (*dump == NULL) return -ENOMEM; @@ -290,7 +290,7 @@ static int suspend_resume_compute_scheduler(struct amdgpu_device *adev, bool sus for (i = 0; i < adev->gfx.num_compute_rings; i++) { struct amdgpu_ring *ring = &adev->gfx.compute_ring[i]; - if (!(ring && ring->sched.thread)) + if (!amdgpu_ring_sched_ready(ring)) continue; /* stop secheduler and drain ring. */ diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_fence.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_fence.c index 469785d33791..1ef758ac5076 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_fence.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_fence.c @@ -90,7 +90,7 @@ struct amdgpu_amdkfd_fence *to_amdgpu_amdkfd_fence(struct dma_fence *f) return NULL; fence = container_of(f, struct amdgpu_amdkfd_fence, base); - if (fence && f->ops == &amdkfd_fence_ops) + if (f->ops == &amdkfd_fence_ops) return fence; return NULL; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gc_9_4_3.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gc_9_4_3.c index 490c8f5ddb60..a5c7259cf2a3 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gc_9_4_3.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gc_9_4_3.c @@ -141,7 +141,7 @@ static int kgd_gfx_v9_4_3_hqd_sdma_dump(struct amdgpu_device *adev, (*dump)[i++][1] = RREG32(addr); \ } while (0) - *dump = kmalloc_array(HQD_N_REGS * 2, sizeof(uint32_t), GFP_KERNEL); + *dump = kmalloc_array(HQD_N_REGS, sizeof(**dump), GFP_KERNEL); if (*dump == NULL) return -ENOMEM; @@ -300,14 +300,13 @@ static int kgd_gfx_v9_4_3_hqd_load(struct amdgpu_device *adev, void *mqd, hqd_end = SOC15_REG_OFFSET(GC, GET_INST(GC, inst), regCP_HQD_AQL_DISPATCH_ID_HI); for (reg = hqd_base; reg <= hqd_end; reg++) - WREG32_RLC(reg, mqd_hqd[reg - hqd_base]); + WREG32_XCC(reg, mqd_hqd[reg - hqd_base], inst); /* Activate doorbell logic before triggering WPTR poll. */ data = REG_SET_FIELD(m->cp_hqd_pq_doorbell_control, CP_HQD_PQ_DOORBELL_CONTROL, DOORBELL_EN, 1); - WREG32_RLC(SOC15_REG_OFFSET(GC, GET_INST(GC, inst), regCP_HQD_PQ_DOORBELL_CONTROL), - data); + WREG32_SOC15_RLC(GC, GET_INST(GC, inst), regCP_HQD_PQ_DOORBELL_CONTROL, data); if (wptr) { /* Don't read wptr with get_user because the user @@ -336,27 +335,24 @@ static int kgd_gfx_v9_4_3_hqd_load(struct amdgpu_device *adev, void *mqd, guessed_wptr += m->cp_hqd_pq_wptr_lo & ~(queue_size - 1); guessed_wptr += (uint64_t)m->cp_hqd_pq_wptr_hi << 32; - WREG32_RLC(SOC15_REG_OFFSET(GC, GET_INST(GC, inst), regCP_HQD_PQ_WPTR_LO), - lower_32_bits(guessed_wptr)); - WREG32_RLC(SOC15_REG_OFFSET(GC, GET_INST(GC, inst), regCP_HQD_PQ_WPTR_HI), - upper_32_bits(guessed_wptr)); - WREG32_RLC(SOC15_REG_OFFSET(GC, GET_INST(GC, inst), regCP_HQD_PQ_WPTR_POLL_ADDR), - lower_32_bits((uintptr_t)wptr)); - WREG32_RLC(SOC15_REG_OFFSET(GC, GET_INST(GC, inst), - regCP_HQD_PQ_WPTR_POLL_ADDR_HI), + WREG32_SOC15_RLC(GC, GET_INST(GC, inst), regCP_HQD_PQ_WPTR_LO, + lower_32_bits(guessed_wptr)); + WREG32_SOC15_RLC(GC, GET_INST(GC, inst), regCP_HQD_PQ_WPTR_HI, + upper_32_bits(guessed_wptr)); + WREG32_SOC15_RLC(GC, GET_INST(GC, inst), regCP_HQD_PQ_WPTR_POLL_ADDR, + lower_32_bits((uintptr_t)wptr)); + WREG32_SOC15_RLC(GC, GET_INST(GC, inst), regCP_HQD_PQ_WPTR_POLL_ADDR_HI, upper_32_bits((uintptr_t)wptr)); - WREG32(SOC15_REG_OFFSET(GC, GET_INST(GC, inst), regCP_PQ_WPTR_POLL_CNTL1), - (uint32_t)kgd_gfx_v9_get_queue_mask(adev, pipe_id, - queue_id)); + WREG32_SOC15_RLC(GC, GET_INST(GC, inst), regCP_PQ_WPTR_POLL_CNTL1, + (uint32_t)kgd_gfx_v9_get_queue_mask(adev, pipe_id, queue_id)); } /* Start the EOP fetcher */ - WREG32_RLC(SOC15_REG_OFFSET(GC, GET_INST(GC, inst), regCP_HQD_EOP_RPTR), - REG_SET_FIELD(m->cp_hqd_eop_rptr, - CP_HQD_EOP_RPTR, INIT_FETCHER, 1)); + WREG32_SOC15_RLC(GC, GET_INST(GC, inst), regCP_HQD_EOP_RPTR, + REG_SET_FIELD(m->cp_hqd_eop_rptr, CP_HQD_EOP_RPTR, INIT_FETCHER, 1)); data = REG_SET_FIELD(m->cp_hqd_active, CP_HQD_ACTIVE, ACTIVE, 1); - WREG32_RLC(SOC15_REG_OFFSET(GC, GET_INST(GC, inst), regCP_HQD_ACTIVE), data); + WREG32_SOC15_RLC(GC, GET_INST(GC, inst), regCP_HQD_ACTIVE, data); kgd_gfx_v9_release_queue(adev, inst); @@ -494,15 +490,15 @@ static uint32_t kgd_gfx_v9_4_3_set_address_watch( VALID, 1); - WREG32_RLC((SOC15_REG_OFFSET(GC, GET_INST(GC, inst), + WREG32_XCC((SOC15_REG_OFFSET(GC, GET_INST(GC, inst), regTCP_WATCH0_ADDR_H) + (watch_id * TCP_WATCH_STRIDE)), - watch_address_high); + watch_address_high, inst); - WREG32_RLC((SOC15_REG_OFFSET(GC, GET_INST(GC, inst), + WREG32_XCC((SOC15_REG_OFFSET(GC, GET_INST(GC, inst), regTCP_WATCH0_ADDR_L) + (watch_id * TCP_WATCH_STRIDE)), - watch_address_low); + watch_address_low, inst); return watch_address_cntl; } diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v7.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v7.c index 6bf448ab3dff..ca4a6b82817f 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v7.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v7.c @@ -214,7 +214,7 @@ static int kgd_hqd_dump(struct amdgpu_device *adev, (*dump)[i++][1] = RREG32(addr); \ } while (0) - *dump = kmalloc_array(HQD_N_REGS * 2, sizeof(uint32_t), GFP_KERNEL); + *dump = kmalloc_array(HQD_N_REGS, sizeof(**dump), GFP_KERNEL); if (*dump == NULL) return -ENOMEM; @@ -301,7 +301,7 @@ static int kgd_hqd_sdma_dump(struct amdgpu_device *adev, #undef HQD_N_REGS #define HQD_N_REGS (19+4) - *dump = kmalloc_array(HQD_N_REGS * 2, sizeof(uint32_t), GFP_KERNEL); + *dump = kmalloc_array(HQD_N_REGS, sizeof(**dump), GFP_KERNEL); if (*dump == NULL) return -ENOMEM; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v8.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v8.c index cd06e4a6d1da..0f3e2944edd7 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v8.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v8.c @@ -238,7 +238,7 @@ static int kgd_hqd_dump(struct amdgpu_device *adev, (*dump)[i++][1] = RREG32(addr); \ } while (0) - *dump = kmalloc_array(HQD_N_REGS * 2, sizeof(uint32_t), GFP_KERNEL); + *dump = kmalloc_array(HQD_N_REGS, sizeof(**dump), GFP_KERNEL); if (*dump == NULL) return -ENOMEM; @@ -324,7 +324,7 @@ static int kgd_hqd_sdma_dump(struct amdgpu_device *adev, #undef HQD_N_REGS #define HQD_N_REGS (19+4+2+3+7) - *dump = kmalloc_array(HQD_N_REGS * 2, sizeof(uint32_t), GFP_KERNEL); + *dump = kmalloc_array(HQD_N_REGS, sizeof(**dump), GFP_KERNEL); if (*dump == NULL) return -ENOMEM; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c index 51011e8ee90d..5a35a8ca8922 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c @@ -91,8 +91,8 @@ void kgd_gfx_v9_program_sh_mem_settings(struct amdgpu_device *adev, uint32_t vmi { kgd_gfx_v9_lock_srbm(adev, 0, 0, 0, vmid, inst); - WREG32_RLC(SOC15_REG_OFFSET(GC, GET_INST(GC, inst), mmSH_MEM_CONFIG), sh_mem_config); - WREG32_RLC(SOC15_REG_OFFSET(GC, GET_INST(GC, inst), mmSH_MEM_BASES), sh_mem_bases); + WREG32_SOC15_RLC(GC, GET_INST(GC, inst), mmSH_MEM_CONFIG, sh_mem_config); + WREG32_SOC15_RLC(GC, GET_INST(GC, inst), mmSH_MEM_BASES, sh_mem_bases); /* APE1 no longer exists on GFX9 */ kgd_gfx_v9_unlock_srbm(adev, inst); @@ -239,14 +239,13 @@ int kgd_gfx_v9_hqd_load(struct amdgpu_device *adev, void *mqd, for (reg = hqd_base; reg <= SOC15_REG_OFFSET(GC, GET_INST(GC, inst), mmCP_HQD_PQ_WPTR_HI); reg++) - WREG32_RLC(reg, mqd_hqd[reg - hqd_base]); + WREG32_XCC(reg, mqd_hqd[reg - hqd_base], inst); /* Activate doorbell logic before triggering WPTR poll. */ data = REG_SET_FIELD(m->cp_hqd_pq_doorbell_control, CP_HQD_PQ_DOORBELL_CONTROL, DOORBELL_EN, 1); - WREG32_RLC(SOC15_REG_OFFSET(GC, GET_INST(GC, inst), mmCP_HQD_PQ_DOORBELL_CONTROL), - data); + WREG32_SOC15_RLC(GC, GET_INST(GC, inst), mmCP_HQD_PQ_DOORBELL_CONTROL, data); if (wptr) { /* Don't read wptr with get_user because the user @@ -275,25 +274,24 @@ int kgd_gfx_v9_hqd_load(struct amdgpu_device *adev, void *mqd, guessed_wptr += m->cp_hqd_pq_wptr_lo & ~(queue_size - 1); guessed_wptr += (uint64_t)m->cp_hqd_pq_wptr_hi << 32; - WREG32_RLC(SOC15_REG_OFFSET(GC, GET_INST(GC, inst), mmCP_HQD_PQ_WPTR_LO), - lower_32_bits(guessed_wptr)); - WREG32_RLC(SOC15_REG_OFFSET(GC, GET_INST(GC, inst), mmCP_HQD_PQ_WPTR_HI), - upper_32_bits(guessed_wptr)); - WREG32_RLC(SOC15_REG_OFFSET(GC, GET_INST(GC, inst), mmCP_HQD_PQ_WPTR_POLL_ADDR), - lower_32_bits((uintptr_t)wptr)); - WREG32_RLC(SOC15_REG_OFFSET(GC, GET_INST(GC, inst), mmCP_HQD_PQ_WPTR_POLL_ADDR_HI), - upper_32_bits((uintptr_t)wptr)); - WREG32_SOC15(GC, GET_INST(GC, inst), mmCP_PQ_WPTR_POLL_CNTL1, - (uint32_t)kgd_gfx_v9_get_queue_mask(adev, pipe_id, queue_id)); + WREG32_SOC15_RLC(GC, GET_INST(GC, inst), mmCP_HQD_PQ_WPTR_LO, + lower_32_bits(guessed_wptr)); + WREG32_SOC15_RLC(GC, GET_INST(GC, inst), mmCP_HQD_PQ_WPTR_HI, + upper_32_bits(guessed_wptr)); + WREG32_SOC15_RLC(GC, GET_INST(GC, inst), mmCP_HQD_PQ_WPTR_POLL_ADDR, + lower_32_bits((uintptr_t)wptr)); + WREG32_SOC15_RLC(GC, GET_INST(GC, inst), mmCP_HQD_PQ_WPTR_POLL_ADDR_HI, + upper_32_bits((uintptr_t)wptr)); + WREG32_SOC15_RLC(GC, GET_INST(GC, inst), mmCP_PQ_WPTR_POLL_CNTL1, + (uint32_t)kgd_gfx_v9_get_queue_mask(adev, pipe_id, queue_id)); } /* Start the EOP fetcher */ - WREG32_RLC(SOC15_REG_OFFSET(GC, GET_INST(GC, inst), mmCP_HQD_EOP_RPTR), - REG_SET_FIELD(m->cp_hqd_eop_rptr, - CP_HQD_EOP_RPTR, INIT_FETCHER, 1)); + WREG32_SOC15_RLC(GC, GET_INST(GC, inst), mmCP_HQD_EOP_RPTR, + REG_SET_FIELD(m->cp_hqd_eop_rptr, CP_HQD_EOP_RPTR, INIT_FETCHER, 1)); data = REG_SET_FIELD(m->cp_hqd_active, CP_HQD_ACTIVE, ACTIVE, 1); - WREG32_RLC(SOC15_REG_OFFSET(GC, GET_INST(GC, inst), mmCP_HQD_ACTIVE), data); + WREG32_SOC15_RLC(GC, GET_INST(GC, inst), mmCP_HQD_ACTIVE, data); kgd_gfx_v9_release_queue(adev, inst); @@ -365,7 +363,7 @@ int kgd_gfx_v9_hqd_dump(struct amdgpu_device *adev, (*dump)[i++][1] = RREG32(addr); \ } while (0) - *dump = kmalloc_array(HQD_N_REGS * 2, sizeof(uint32_t), GFP_KERNEL); + *dump = kmalloc_array(HQD_N_REGS, sizeof(**dump), GFP_KERNEL); if (*dump == NULL) return -ENOMEM; @@ -462,7 +460,7 @@ static int kgd_hqd_sdma_dump(struct amdgpu_device *adev, #undef HQD_N_REGS #define HQD_N_REGS (19+6+7+10) - *dump = kmalloc_array(HQD_N_REGS * 2, sizeof(uint32_t), GFP_KERNEL); + *dump = kmalloc_array(HQD_N_REGS, sizeof(**dump), GFP_KERNEL); if (*dump == NULL) return -ENOMEM; @@ -556,7 +554,7 @@ int kgd_gfx_v9_hqd_destroy(struct amdgpu_device *adev, void *mqd, break; } - WREG32_RLC(SOC15_REG_OFFSET(GC, GET_INST(GC, inst), mmCP_HQD_DEQUEUE_REQUEST), type); + WREG32_SOC15_RLC(GC, GET_INST(GC, inst), mmCP_HQD_DEQUEUE_REQUEST, type); end_jiffies = (utimeout * HZ / 1000) + jiffies; while (true) { @@ -908,8 +906,8 @@ void kgd_gfx_v9_get_iq_wait_times(struct amdgpu_device *adev, uint32_t inst) { - *wait_times = RREG32(SOC15_REG_OFFSET(GC, GET_INST(GC, inst), - mmCP_IQ_WAIT_TIME2)); + *wait_times = RREG32_SOC15_RLC(GC, GET_INST(GC, inst), + mmCP_IQ_WAIT_TIME2); } void kgd_gfx_v9_set_vm_context_page_table_base(struct amdgpu_device *adev, diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c index bd1d4b2b6b16..2131de36e3da 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c @@ -25,6 +25,7 @@ #include <linux/pagemap.h> #include <linux/sched/mm.h> #include <linux/sched/task.h> +#include <linux/fdtable.h> #include <drm/ttm/ttm_tt.h> #include <drm/drm_exec.h> @@ -425,6 +426,32 @@ validate_fail: return ret; } +int amdgpu_amdkfd_bo_validate_and_fence(struct amdgpu_bo *bo, + uint32_t domain, + struct dma_fence *fence) +{ + int ret = amdgpu_bo_reserve(bo, false); + + if (ret) + return ret; + + ret = amdgpu_amdkfd_bo_validate(bo, domain, true); + if (ret) + goto unreserve_out; + + ret = dma_resv_reserve_fences(bo->tbo.base.resv, 1); + if (ret) + goto unreserve_out; + + dma_resv_add_fence(bo->tbo.base.resv, fence, + DMA_RESV_USAGE_BOOKKEEP); + +unreserve_out: + amdgpu_bo_unreserve(bo); + + return ret; +} + static int amdgpu_amdkfd_validate_vm_bo(void *_unused, struct amdgpu_bo *bo) { return amdgpu_amdkfd_bo_validate(bo, bo->allowed_domains, false); @@ -437,13 +464,15 @@ static int amdgpu_amdkfd_validate_vm_bo(void *_unused, struct amdgpu_bo *bo) * again. Page directories are only updated after updating page * tables. */ -static int vm_validate_pt_pd_bos(struct amdgpu_vm *vm) +static int vm_validate_pt_pd_bos(struct amdgpu_vm *vm, + struct ww_acquire_ctx *ticket) { struct amdgpu_bo *pd = vm->root.bo; struct amdgpu_device *adev = amdgpu_ttm_adev(pd->tbo.bdev); int ret; - ret = amdgpu_vm_validate_pt_bos(adev, vm, amdgpu_amdkfd_validate_vm_bo, NULL); + ret = amdgpu_vm_validate(adev, vm, ticket, + amdgpu_amdkfd_validate_vm_bo, NULL); if (ret) { pr_err("failed to validate PT BOs\n"); return ret; @@ -780,13 +809,22 @@ kfd_mem_dmaunmap_attachment(struct kgd_mem *mem, static int kfd_mem_export_dmabuf(struct kgd_mem *mem) { if (!mem->dmabuf) { - struct dma_buf *ret = amdgpu_gem_prime_export( - &mem->bo->tbo.base, + struct amdgpu_device *bo_adev; + struct dma_buf *dmabuf; + int r, fd; + + bo_adev = amdgpu_ttm_adev(mem->bo->tbo.bdev); + r = drm_gem_prime_handle_to_fd(&bo_adev->ddev, bo_adev->kfd.client.file, + mem->gem_handle, mem->alloc_flags & KFD_IOC_ALLOC_MEM_FLAGS_WRITABLE ? - DRM_RDWR : 0); - if (IS_ERR(ret)) - return PTR_ERR(ret); - mem->dmabuf = ret; + DRM_RDWR : 0, &fd); + if (r) + return r; + dmabuf = dma_buf_get(fd); + close_fd(fd); + if (WARN_ON_ONCE(IS_ERR(dmabuf))) + return PTR_ERR(dmabuf); + mem->dmabuf = dmabuf; } return 0; @@ -1111,14 +1149,14 @@ static int reserve_bo_and_vm(struct kgd_mem *mem, ctx->n_vms = 1; ctx->sync = &mem->sync; - drm_exec_init(&ctx->exec, DRM_EXEC_INTERRUPTIBLE_WAIT); + drm_exec_init(&ctx->exec, DRM_EXEC_INTERRUPTIBLE_WAIT, 0); drm_exec_until_all_locked(&ctx->exec) { ret = amdgpu_vm_lock_pd(vm, &ctx->exec, 2); drm_exec_retry_on_contention(&ctx->exec); if (unlikely(ret)) goto error; - ret = drm_exec_lock_obj(&ctx->exec, &bo->tbo.base); + ret = drm_exec_prepare_obj(&ctx->exec, &bo->tbo.base, 1); drm_exec_retry_on_contention(&ctx->exec); if (unlikely(ret)) goto error; @@ -1150,7 +1188,7 @@ static int reserve_bo_and_cond_vms(struct kgd_mem *mem, int ret; ctx->sync = &mem->sync; - drm_exec_init(&ctx->exec, DRM_EXEC_INTERRUPTIBLE_WAIT); + drm_exec_init(&ctx->exec, DRM_EXEC_INTERRUPTIBLE_WAIT, 0); drm_exec_until_all_locked(&ctx->exec) { ctx->n_vms = 0; list_for_each_entry(entry, &mem->attachments, list) { @@ -1274,14 +1312,15 @@ update_gpuvm_pte_failed: return ret; } -static int process_validate_vms(struct amdkfd_process_info *process_info) +static int process_validate_vms(struct amdkfd_process_info *process_info, + struct ww_acquire_ctx *ticket) { struct amdgpu_vm *peer_vm; int ret; list_for_each_entry(peer_vm, &process_info->vm_list_head, vm_list_node) { - ret = vm_validate_pt_pd_bos(peer_vm); + ret = vm_validate_pt_pd_bos(peer_vm, ticket); if (ret) return ret; } @@ -1358,7 +1397,6 @@ static int init_kfd_vm(struct amdgpu_vm *vm, void **process_info, amdgpu_amdkfd_restore_userptr_worker); *process_info = info; - *ef = dma_fence_get(&info->eviction_fence->base); } vm->process_info = *process_info; @@ -1367,7 +1405,7 @@ static int init_kfd_vm(struct amdgpu_vm *vm, void **process_info, ret = amdgpu_bo_reserve(vm->root.bo, true); if (ret) goto reserve_pd_fail; - ret = vm_validate_pt_pd_bos(vm); + ret = vm_validate_pt_pd_bos(vm, NULL); if (ret) { pr_err("validate_pt_pd_bos() failed\n"); goto validate_pd_fail; @@ -1389,6 +1427,8 @@ static int init_kfd_vm(struct amdgpu_vm *vm, void **process_info, list_add_tail(&vm->vm_list_node, &(vm->process_info->vm_list_head)); vm->process_info->n_vms++; + + *ef = dma_fence_get(&vm->process_info->eviction_fence->base); mutex_unlock(&vm->process_info->lock); return 0; @@ -1400,10 +1440,7 @@ validate_pd_fail: reserve_pd_fail: vm->process_info = NULL; if (info) { - /* Two fence references: one in info and one in *ef */ dma_fence_put(&info->eviction_fence->base); - dma_fence_put(*ef); - *ef = NULL; *process_info = NULL; put_pid(info->pid); create_evict_fence_fail: @@ -1597,7 +1634,8 @@ int amdgpu_amdkfd_criu_resume(void *p) goto out_unlock; } WRITE_ONCE(pinfo->block_mmu_notifications, false); - schedule_delayed_work(&pinfo->restore_userptr_work, 0); + queue_delayed_work(system_freezable_wq, + &pinfo->restore_userptr_work, 0); out_unlock: mutex_unlock(&pinfo->lock); @@ -1753,6 +1791,9 @@ int amdgpu_amdkfd_gpuvm_alloc_memory_of_gpu( pr_debug("Failed to allow vma node access. ret %d\n", ret); goto err_node_allow; } + ret = drm_gem_handle_create(adev->kfd.client.file, gobj, &(*mem)->gem_handle); + if (ret) + goto err_gem_handle_create; bo = gem_to_amdgpu_bo(gobj); if (bo_type == ttm_bo_type_sg) { bo->tbo.sg = sg; @@ -1784,6 +1825,15 @@ int amdgpu_amdkfd_gpuvm_alloc_memory_of_gpu( } bo->allowed_domains = AMDGPU_GEM_DOMAIN_GTT; bo->preferred_domains = AMDGPU_GEM_DOMAIN_GTT; + } else { + mutex_lock(&avm->process_info->lock); + if (avm->process_info->eviction_fence && + !dma_fence_is_signaled(&avm->process_info->eviction_fence->base)) + ret = amdgpu_amdkfd_bo_validate_and_fence(bo, domain, + &avm->process_info->eviction_fence->base); + mutex_unlock(&avm->process_info->lock); + if (ret) + goto err_validate_bo; } if (offset) @@ -1793,7 +1843,10 @@ int amdgpu_amdkfd_gpuvm_alloc_memory_of_gpu( allocate_init_user_pages_failed: err_pin_bo: +err_validate_bo: remove_kgd_mem_from_kfd_bo_list(*mem, avm->process_info); + drm_gem_handle_delete(adev->kfd.client.file, (*mem)->gem_handle); +err_gem_handle_create: drm_vma_node_revoke(&gobj->vma_node, drm_priv); err_node_allow: /* Don't unreserve system mem limit twice */ @@ -1801,6 +1854,7 @@ err_node_allow: err_bo_create: amdgpu_amdkfd_unreserve_mem_limit(adev, aligned_size, flags, xcp_id); err_reserve_limit: + amdgpu_sync_free(&(*mem)->sync); mutex_destroy(&(*mem)->lock); if (gobj) drm_gem_object_put(gobj); @@ -1866,10 +1920,6 @@ int amdgpu_amdkfd_gpuvm_free_memory_of_gpu( if (unlikely(ret)) return ret; - /* The eviction fence should be removed by the last unmap. - * TODO: Log an error condition if the bo still has the eviction fence - * attached - */ amdgpu_amdkfd_remove_eviction_fence(mem->bo, process_info->eviction_fence); pr_debug("Release VA 0x%llx - 0x%llx\n", mem->va, @@ -1910,8 +1960,11 @@ int amdgpu_amdkfd_gpuvm_free_memory_of_gpu( /* Free the BO*/ drm_vma_node_revoke(&mem->bo->tbo.base.vma_node, drm_priv); - if (mem->dmabuf) + drm_gem_handle_delete(adev->kfd.client.file, mem->gem_handle); + if (mem->dmabuf) { dma_buf_put(mem->dmabuf); + mem->dmabuf = NULL; + } mutex_destroy(&mem->lock); /* If this releases the last reference, it will end up calling @@ -1994,23 +2047,10 @@ int amdgpu_amdkfd_gpuvm_map_memory_to_gpu( bo->tbo.resource->mem_type == TTM_PL_SYSTEM) is_invalid_userptr = true; - ret = vm_validate_pt_pd_bos(avm); + ret = vm_validate_pt_pd_bos(avm, NULL); if (unlikely(ret)) goto out_unreserve; - if (mem->mapped_to_gpu_memory == 0 && - !amdgpu_ttm_tt_get_usermm(bo->tbo.ttm)) { - /* Validate BO only once. The eviction fence gets added to BO - * the first time it is mapped. Validate will wait for all - * background evictions to complete. - */ - ret = amdgpu_amdkfd_bo_validate(bo, domain, true); - if (ret) { - pr_debug("Validate failed\n"); - goto out_unreserve; - } - } - list_for_each_entry(entry, &mem->attachments, list) { if (entry->bo_va->base.vm != avm || entry->is_mapped) continue; @@ -2037,10 +2077,6 @@ int amdgpu_amdkfd_gpuvm_map_memory_to_gpu( mem->mapped_to_gpu_memory); } - if (!amdgpu_ttm_tt_get_usermm(bo->tbo.ttm) && !bo->tbo.pin_count) - dma_resv_add_fence(bo->tbo.base.resv, - &avm->process_info->eviction_fence->base, - DMA_RESV_USAGE_BOOKKEEP); ret = unreserve_bo_and_vms(&ctx, false, false); goto out; @@ -2053,28 +2089,41 @@ out: return ret; } -void amdgpu_amdkfd_gpuvm_dmaunmap_mem(struct kgd_mem *mem, void *drm_priv) +int amdgpu_amdkfd_gpuvm_dmaunmap_mem(struct kgd_mem *mem, void *drm_priv) { struct kfd_mem_attachment *entry; struct amdgpu_vm *vm; + int ret; vm = drm_priv_to_vm(drm_priv); mutex_lock(&mem->lock); + ret = amdgpu_bo_reserve(mem->bo, true); + if (ret) + goto out; + list_for_each_entry(entry, &mem->attachments, list) { - if (entry->bo_va->base.vm == vm) - kfd_mem_dmaunmap_attachment(mem, entry); + if (entry->bo_va->base.vm != vm) + continue; + if (entry->bo_va->base.bo->tbo.ttm && + !entry->bo_va->base.bo->tbo.ttm->sg) + continue; + + kfd_mem_dmaunmap_attachment(mem, entry); } + amdgpu_bo_unreserve(mem->bo); +out: mutex_unlock(&mem->lock); + + return ret; } int amdgpu_amdkfd_gpuvm_unmap_memory_from_gpu( struct amdgpu_device *adev, struct kgd_mem *mem, void *drm_priv) { struct amdgpu_vm *avm = drm_priv_to_vm(drm_priv); - struct amdkfd_process_info *process_info = avm->process_info; unsigned long bo_size = mem->bo->tbo.base.size; struct kfd_mem_attachment *entry; struct bo_vm_reservation_context ctx; @@ -2091,7 +2140,7 @@ int amdgpu_amdkfd_gpuvm_unmap_memory_from_gpu( goto unreserve_out; } - ret = vm_validate_pt_pd_bos(avm); + ret = vm_validate_pt_pd_bos(avm, NULL); if (unlikely(ret)) goto unreserve_out; @@ -2115,15 +2164,6 @@ int amdgpu_amdkfd_gpuvm_unmap_memory_from_gpu( mem->mapped_to_gpu_memory); } - /* If BO is unmapped from all VMs, unfence it. It can be evicted if - * required. - */ - if (mem->mapped_to_gpu_memory == 0 && - !amdgpu_ttm_tt_get_usermm(mem->bo->tbo.ttm) && - !mem->bo->tbo.pin_count) - amdgpu_amdkfd_remove_eviction_fence(mem->bo, - process_info->eviction_fence); - unreserve_out: unreserve_bo_and_vms(&ctx, false, false); out: @@ -2150,13 +2190,12 @@ int amdgpu_amdkfd_gpuvm_sync_memory( /** * amdgpu_amdkfd_map_gtt_bo_to_gart - Map BO to GART and increment reference count - * @adev: Device to which allocated BO belongs * @bo: Buffer object to be mapped * * Before return, bo reference count is incremented. To release the reference and unpin/ * unmap the BO, call amdgpu_amdkfd_free_gtt_mem. */ -int amdgpu_amdkfd_map_gtt_bo_to_gart(struct amdgpu_device *adev, struct amdgpu_bo *bo) +int amdgpu_amdkfd_map_gtt_bo_to_gart(struct amdgpu_bo *bo) { int ret; @@ -2290,34 +2329,26 @@ int amdgpu_amdkfd_gpuvm_get_vm_fault_info(struct amdgpu_device *adev, return 0; } -int amdgpu_amdkfd_gpuvm_import_dmabuf(struct amdgpu_device *adev, - struct dma_buf *dma_buf, - uint64_t va, void *drm_priv, - struct kgd_mem **mem, uint64_t *size, - uint64_t *mmap_offset) +static int import_obj_create(struct amdgpu_device *adev, + struct dma_buf *dma_buf, + struct drm_gem_object *obj, + uint64_t va, void *drm_priv, + struct kgd_mem **mem, uint64_t *size, + uint64_t *mmap_offset) { struct amdgpu_vm *avm = drm_priv_to_vm(drm_priv); - struct drm_gem_object *obj; struct amdgpu_bo *bo; int ret; - obj = amdgpu_gem_prime_import(adev_to_drm(adev), dma_buf); - if (IS_ERR(obj)) - return PTR_ERR(obj); - bo = gem_to_amdgpu_bo(obj); if (!(bo->preferred_domains & (AMDGPU_GEM_DOMAIN_VRAM | - AMDGPU_GEM_DOMAIN_GTT))) { + AMDGPU_GEM_DOMAIN_GTT))) /* Only VRAM and GTT BOs are supported */ - ret = -EINVAL; - goto err_put_obj; - } + return -EINVAL; *mem = kzalloc(sizeof(struct kgd_mem), GFP_KERNEL); - if (!*mem) { - ret = -ENOMEM; - goto err_put_obj; - } + if (!*mem) + return -ENOMEM; ret = drm_vma_node_allow(&obj->vma_node, drm_priv); if (ret) @@ -2351,12 +2382,57 @@ int amdgpu_amdkfd_gpuvm_import_dmabuf(struct amdgpu_device *adev, amdgpu_sync_create(&(*mem)->sync); (*mem)->is_imported = true; + mutex_lock(&avm->process_info->lock); + if (avm->process_info->eviction_fence && + !dma_fence_is_signaled(&avm->process_info->eviction_fence->base)) + ret = amdgpu_amdkfd_bo_validate_and_fence(bo, (*mem)->domain, + &avm->process_info->eviction_fence->base); + mutex_unlock(&avm->process_info->lock); + if (ret) + goto err_remove_mem; + return 0; +err_remove_mem: + remove_kgd_mem_from_kfd_bo_list(*mem, avm->process_info); + drm_vma_node_revoke(&obj->vma_node, drm_priv); err_free_mem: kfree(*mem); + return ret; +} + +int amdgpu_amdkfd_gpuvm_import_dmabuf_fd(struct amdgpu_device *adev, int fd, + uint64_t va, void *drm_priv, + struct kgd_mem **mem, uint64_t *size, + uint64_t *mmap_offset) +{ + struct drm_gem_object *obj; + uint32_t handle; + int ret; + + ret = drm_gem_prime_fd_to_handle(&adev->ddev, adev->kfd.client.file, fd, + &handle); + if (ret) + return ret; + obj = drm_gem_object_lookup(adev->kfd.client.file, handle); + if (!obj) { + ret = -EINVAL; + goto err_release_handle; + } + + ret = import_obj_create(adev, obj->dma_buf, obj, va, drm_priv, mem, size, + mmap_offset); + if (ret) + goto err_put_obj; + + (*mem)->gem_handle = handle; + + return 0; + err_put_obj: drm_gem_object_put(obj); +err_release_handle: + drm_gem_handle_delete(adev->kfd.client.file, handle); return ret; } @@ -2409,7 +2485,8 @@ int amdgpu_amdkfd_evict_userptr(struct mmu_interval_notifier *mni, KFD_QUEUE_EVICTION_TRIGGER_USERPTR); if (r) pr_err("Failed to quiesce KFD\n"); - schedule_delayed_work(&process_info->restore_userptr_work, + queue_delayed_work(system_freezable_wq, + &process_info->restore_userptr_work, msecs_to_jiffies(AMDGPU_USERPTR_RESTORE_DELAY_MS)); } mutex_unlock(&process_info->notifier_lock); @@ -2535,7 +2612,7 @@ static int validate_invalid_user_pages(struct amdkfd_process_info *process_info) amdgpu_sync_create(&sync); - drm_exec_init(&exec, 0); + drm_exec_init(&exec, 0, 0); /* Reserve all BOs and page tables for validation */ drm_exec_until_all_locked(&exec) { /* Reserve all the page directories */ @@ -2560,7 +2637,7 @@ static int validate_invalid_user_pages(struct amdkfd_process_info *process_info) } } - ret = process_validate_vms(process_info); + ret = process_validate_vms(process_info, NULL); if (ret) goto unreserve_out; @@ -2732,7 +2809,8 @@ unlock_out: /* If validation failed, reschedule another attempt */ if (evicted_bos) { - schedule_delayed_work(&process_info->restore_userptr_work, + queue_delayed_work(system_freezable_wq, + &process_info->restore_userptr_work, msecs_to_jiffies(AMDGPU_USERPTR_RESTORE_DELAY_MS)); kfd_smi_event_queue_restore_rescheduled(mm); @@ -2741,6 +2819,23 @@ unlock_out: put_task_struct(usertask); } +static void replace_eviction_fence(struct dma_fence __rcu **ef, + struct dma_fence *new_ef) +{ + struct dma_fence *old_ef = rcu_replace_pointer(*ef, new_ef, true + /* protected by process_info->lock */); + + /* If we're replacing an unsignaled eviction fence, that fence will + * never be signaled, and if anyone is still waiting on that fence, + * they will hang forever. This should never happen. We should only + * replace the fence in restore_work that only gets scheduled after + * eviction work signaled the fence. + */ + WARN_ONCE(!dma_fence_is_signaled(old_ef), + "Replacing unsignaled eviction fence"); + dma_fence_put(old_ef); +} + /** amdgpu_amdkfd_gpuvm_restore_process_bos - Restore all BOs for the given * KFD process identified by process_info * @@ -2759,12 +2854,11 @@ unlock_out: * 7. Add fence to all PD and PT BOs. * 8. Unreserve all BOs */ -int amdgpu_amdkfd_gpuvm_restore_process_bos(void *info, struct dma_fence **ef) +int amdgpu_amdkfd_gpuvm_restore_process_bos(void *info, struct dma_fence __rcu **ef) { struct amdkfd_process_info *process_info = info; struct amdgpu_vm *peer_vm; struct kgd_mem *mem; - struct amdgpu_amdkfd_fence *new_fence; struct list_head duplicate_save; struct amdgpu_sync sync_obj; unsigned long failed_size = 0; @@ -2776,14 +2870,16 @@ int amdgpu_amdkfd_gpuvm_restore_process_bos(void *info, struct dma_fence **ef) mutex_lock(&process_info->lock); - drm_exec_init(&exec, 0); + drm_exec_init(&exec, DRM_EXEC_IGNORE_DUPLICATES, 0); drm_exec_until_all_locked(&exec) { list_for_each_entry(peer_vm, &process_info->vm_list_head, vm_list_node) { ret = amdgpu_vm_lock_pd(peer_vm, &exec, 2); drm_exec_retry_on_contention(&exec); - if (unlikely(ret)) + if (unlikely(ret)) { + pr_err("Locking VM PD failed, ret: %d\n", ret); goto ttm_reserve_fail; + } } /* Reserve all BOs and page tables/directory. Add all BOs from @@ -2796,31 +2892,21 @@ int amdgpu_amdkfd_gpuvm_restore_process_bos(void *info, struct dma_fence **ef) gobj = &mem->bo->tbo.base; ret = drm_exec_prepare_obj(&exec, gobj, 1); drm_exec_retry_on_contention(&exec); - if (unlikely(ret)) + if (unlikely(ret)) { + pr_err("drm_exec_prepare_obj failed, ret: %d\n", ret); goto ttm_reserve_fail; + } } } amdgpu_sync_create(&sync_obj); - /* Validate PDs and PTs */ - ret = process_validate_vms(process_info); - if (ret) - goto validate_map_fail; - - ret = process_sync_pds_resv(process_info, &sync_obj); - if (ret) { - pr_debug("Memory eviction: Failed to sync to PD BO moving fence. Try again\n"); - goto validate_map_fail; - } - - /* Validate BOs and map them to GPUVM (update VM page tables). */ + /* Validate BOs managed by KFD */ list_for_each_entry(mem, &process_info->kfd_bo_list, validate_list) { struct amdgpu_bo *bo = mem->bo; uint32_t domain = mem->domain; - struct kfd_mem_attachment *attachment; struct dma_resv_iter cursor; struct dma_fence *fence; @@ -2845,6 +2931,25 @@ int amdgpu_amdkfd_gpuvm_restore_process_bos(void *info, struct dma_fence **ef) goto validate_map_fail; } } + } + + if (failed_size) + pr_debug("0x%lx/0x%lx in system\n", failed_size, total_size); + + /* Validate PDs, PTs and evicted DMABuf imports last. Otherwise BO + * validations above would invalidate DMABuf imports again. + */ + ret = process_validate_vms(process_info, &exec.ticket); + if (ret) { + pr_debug("Validating VMs failed, ret: %d\n", ret); + goto validate_map_fail; + } + + /* Update mappings managed by KFD. */ + list_for_each_entry(mem, &process_info->kfd_bo_list, + validate_list) { + struct kfd_mem_attachment *attachment; + list_for_each_entry(attachment, &mem->attachments, list) { if (!attachment->is_mapped) continue; @@ -2861,8 +2966,18 @@ int amdgpu_amdkfd_gpuvm_restore_process_bos(void *info, struct dma_fence **ef) } } - if (failed_size) - pr_debug("0x%lx/0x%lx in system\n", failed_size, total_size); + /* Update mappings not managed by KFD */ + list_for_each_entry(peer_vm, &process_info->vm_list_head, + vm_list_node) { + struct amdgpu_device *adev = amdgpu_ttm_adev( + peer_vm->root.bo->tbo.bdev); + + ret = amdgpu_vm_handle_moved(adev, peer_vm, &exec.ticket); + if (ret) { + pr_debug("Memory eviction: handle moved failed. Try again\n"); + goto validate_map_fail; + } + } /* Update page directories */ ret = process_update_pds(process_info, &sync_obj); @@ -2871,25 +2986,47 @@ int amdgpu_amdkfd_gpuvm_restore_process_bos(void *info, struct dma_fence **ef) goto validate_map_fail; } + /* Sync with fences on all the page tables. They implicitly depend on any + * move fences from amdgpu_vm_handle_moved above. + */ + ret = process_sync_pds_resv(process_info, &sync_obj); + if (ret) { + pr_debug("Memory eviction: Failed to sync to PD BO moving fence. Try again\n"); + goto validate_map_fail; + } + /* Wait for validate and PT updates to finish */ amdgpu_sync_wait(&sync_obj, false); - /* Release old eviction fence and create new one, because fence only - * goes from unsignaled to signaled, fence cannot be reused. - * Use context and mm from the old fence. + /* The old eviction fence may be unsignaled if restore happens + * after a GPU reset or suspend/resume. Keep the old fence in that + * case. Otherwise release the old eviction fence and create new + * one, because fence only goes from unsignaled to signaled once + * and cannot be reused. Use context and mm from the old fence. + * + * If an old eviction fence signals after this check, that's OK. + * Anyone signaling an eviction fence must stop the queues first + * and schedule another restore worker. */ - new_fence = amdgpu_amdkfd_fence_create( + if (dma_fence_is_signaled(&process_info->eviction_fence->base)) { + struct amdgpu_amdkfd_fence *new_fence = + amdgpu_amdkfd_fence_create( process_info->eviction_fence->base.context, process_info->eviction_fence->mm, NULL); - if (!new_fence) { - pr_err("Failed to create eviction fence\n"); - ret = -ENOMEM; - goto validate_map_fail; + + if (!new_fence) { + pr_err("Failed to create eviction fence\n"); + ret = -ENOMEM; + goto validate_map_fail; + } + dma_fence_put(&process_info->eviction_fence->base); + process_info->eviction_fence = new_fence; + replace_eviction_fence(ef, dma_fence_get(&new_fence->base)); + } else { + WARN_ONCE(*ef != &process_info->eviction_fence->base, + "KFD eviction fence doesn't match KGD process_info"); } - dma_fence_put(&process_info->eviction_fence->base); - process_info->eviction_fence = new_fence; - *ef = dma_fence_get(&new_fence->base); /* Attach new eviction fence to all BOs except pinned ones */ list_for_each_entry(mem, &process_info->kfd_bo_list, validate_list) { @@ -2900,7 +3037,7 @@ int amdgpu_amdkfd_gpuvm_restore_process_bos(void *info, struct dma_fence **ef) &process_info->eviction_fence->base, DMA_RESV_USAGE_BOOKKEEP); } - /* Attach eviction fence to PD / PT BOs */ + /* Attach eviction fence to PD / PT BOs and DMABuf imports */ list_for_each_entry(peer_vm, &process_info->vm_list_head, vm_list_node) { struct amdgpu_bo *bo = peer_vm->root.bo; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_atombios.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_atombios.c index dce9e7d5e4ec..52b12c1718eb 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_atombios.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_atombios.c @@ -1018,7 +1018,8 @@ int amdgpu_atombios_get_clock_dividers(struct amdgpu_device *adev, if (clock_type == COMPUTE_ENGINE_PLL_PARAM) { args.v3.ulClockParams = cpu_to_le32((clock_type << 24) | clock); - amdgpu_atom_execute_table(adev->mode_info.atom_context, index, (uint32_t *)&args); + amdgpu_atom_execute_table(adev->mode_info.atom_context, index, (uint32_t *)&args, + sizeof(args)); dividers->post_div = args.v3.ucPostDiv; dividers->enable_post_div = (args.v3.ucCntlFlag & @@ -1038,7 +1039,8 @@ int amdgpu_atombios_get_clock_dividers(struct amdgpu_device *adev, if (strobe_mode) args.v5.ucInputFlag = ATOM_PLL_INPUT_FLAG_PLL_STROBE_MODE_EN; - amdgpu_atom_execute_table(adev->mode_info.atom_context, index, (uint32_t *)&args); + amdgpu_atom_execute_table(adev->mode_info.atom_context, index, (uint32_t *)&args, + sizeof(args)); dividers->post_div = args.v5.ucPostDiv; dividers->enable_post_div = (args.v5.ucCntlFlag & @@ -1056,7 +1058,8 @@ int amdgpu_atombios_get_clock_dividers(struct amdgpu_device *adev, /* fusion */ args.v4.ulClock = cpu_to_le32(clock); /* 10 khz */ - amdgpu_atom_execute_table(adev->mode_info.atom_context, index, (uint32_t *)&args); + amdgpu_atom_execute_table(adev->mode_info.atom_context, index, (uint32_t *)&args, + sizeof(args)); dividers->post_divider = dividers->post_div = args.v4.ucPostDiv; dividers->real_clock = le32_to_cpu(args.v4.ulClock); @@ -1067,7 +1070,8 @@ int amdgpu_atombios_get_clock_dividers(struct amdgpu_device *adev, args.v6_in.ulClock.ulComputeClockFlag = clock_type; args.v6_in.ulClock.ulClockFreq = cpu_to_le32(clock); /* 10 khz */ - amdgpu_atom_execute_table(adev->mode_info.atom_context, index, (uint32_t *)&args); + amdgpu_atom_execute_table(adev->mode_info.atom_context, index, (uint32_t *)&args, + sizeof(args)); dividers->whole_fb_div = le16_to_cpu(args.v6_out.ulFbDiv.usFbDiv); dividers->frac_fb_div = le16_to_cpu(args.v6_out.ulFbDiv.usFbDivFrac); @@ -1109,7 +1113,8 @@ int amdgpu_atombios_get_memory_pll_dividers(struct amdgpu_device *adev, if (strobe_mode) args.ucInputFlag |= MPLL_INPUT_FLAG_STROBE_MODE_EN; - amdgpu_atom_execute_table(adev->mode_info.atom_context, index, (uint32_t *)&args); + amdgpu_atom_execute_table(adev->mode_info.atom_context, index, (uint32_t *)&args, + sizeof(args)); mpll_param->clkfrac = le16_to_cpu(args.ulFbDiv.usFbDivFrac); mpll_param->clkf = le16_to_cpu(args.ulFbDiv.usFbDiv); @@ -1151,7 +1156,8 @@ void amdgpu_atombios_set_engine_dram_timings(struct amdgpu_device *adev, if (mem_clock) args.sReserved.ulClock = cpu_to_le32(mem_clock & SET_CLOCK_FREQ_MASK); - amdgpu_atom_execute_table(adev->mode_info.atom_context, index, (uint32_t *)&args); + amdgpu_atom_execute_table(adev->mode_info.atom_context, index, (uint32_t *)&args, + sizeof(args)); } void amdgpu_atombios_get_default_voltages(struct amdgpu_device *adev, @@ -1205,7 +1211,8 @@ int amdgpu_atombios_get_max_vddc(struct amdgpu_device *adev, u8 voltage_type, args.v2.ucVoltageMode = 0; args.v2.usVoltageLevel = 0; - amdgpu_atom_execute_table(adev->mode_info.atom_context, index, (uint32_t *)&args); + amdgpu_atom_execute_table(adev->mode_info.atom_context, index, (uint32_t *)&args, + sizeof(args)); *voltage = le16_to_cpu(args.v2.usVoltageLevel); break; @@ -1214,7 +1221,8 @@ int amdgpu_atombios_get_max_vddc(struct amdgpu_device *adev, u8 voltage_type, args.v3.ucVoltageMode = ATOM_GET_VOLTAGE_LEVEL; args.v3.usVoltageLevel = cpu_to_le16(voltage_id); - amdgpu_atom_execute_table(adev->mode_info.atom_context, index, (uint32_t *)&args); + amdgpu_atom_execute_table(adev->mode_info.atom_context, index, (uint32_t *)&args, + sizeof(args)); *voltage = le16_to_cpu(args.v3.usVoltageLevel); break; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_atomfirmware.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_atomfirmware.c index fb2681dd6b33..a6d64bdbbb14 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_atomfirmware.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_atomfirmware.c @@ -34,6 +34,7 @@ union firmware_info { struct atom_firmware_info_v3_2 v32; struct atom_firmware_info_v3_3 v33; struct atom_firmware_info_v3_4 v34; + struct atom_firmware_info_v3_5 v35; }; /* @@ -872,6 +873,10 @@ int amdgpu_atomfirmware_get_fw_reserved_fb_size(struct amdgpu_device *adev) fw_reserved_fb_size = (firmware_info->v34.fw_reserved_size_in_kb << 10); break; + case 5: + fw_reserved_fb_size = + (firmware_info->v35.fw_reserved_size_in_kb << 10); + break; default: fw_reserved_fb_size = 0; break; @@ -941,5 +946,6 @@ int amdgpu_atomfirmware_asic_init(struct amdgpu_device *adev, bool fb_reset) return -EINVAL; } - return amdgpu_atom_execute_table(ctx, ATOM_CMD_INIT, (uint32_t *)&asic_init_ps_v2_1); + return amdgpu_atom_execute_table(ctx, ATOM_CMD_INIT, (uint32_t *)&asic_init_ps_v2_1, + sizeof(asic_init_ps_v2_1)); } diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_atomfirmware.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_atomfirmware.h index c7eb2caec65a..649b5530d8ae 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_atomfirmware.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_atomfirmware.h @@ -36,7 +36,7 @@ int amdgpu_atomfirmware_get_clock_info(struct amdgpu_device *adev); int amdgpu_atomfirmware_get_gfx_info(struct amdgpu_device *adev); bool amdgpu_atomfirmware_mem_ecc_supported(struct amdgpu_device *adev); bool amdgpu_atomfirmware_sram_ecc_supported(struct amdgpu_device *adev); -bool amdgpu_atomfirmware_ras_rom_addr(struct amdgpu_device *adev, uint8_t* i2c_address); +bool amdgpu_atomfirmware_ras_rom_addr(struct amdgpu_device *adev, uint8_t *i2c_address); bool amdgpu_atomfirmware_mem_training_supported(struct amdgpu_device *adev); bool amdgpu_atomfirmware_dynamic_boot_config_supported(struct amdgpu_device *adev); int amdgpu_atomfirmware_get_fw_reserved_fb_size(struct amdgpu_device *adev); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_bios.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_bios.c index 38ccec913f00..618e469e3622 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_bios.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_bios.c @@ -29,6 +29,7 @@ #include "amdgpu.h" #include "atom.h" +#include <linux/device.h> #include <linux/pci.h> #include <linux/slab.h> #include <linux/acpi.h> @@ -287,7 +288,15 @@ static bool amdgpu_atrm_get_bios(struct amdgpu_device *adev) if (adev->flags & AMD_IS_APU) return false; - while ((pdev = pci_get_class(PCI_CLASS_DISPLAY_VGA << 8, pdev)) != NULL) { + /* ATRM is for on-platform devices only */ + if (dev_is_removable(&adev->pdev->dev)) + return false; + + while ((pdev = pci_get_base_class(PCI_BASE_CLASS_DISPLAY, pdev))) { + if ((pdev->class != PCI_CLASS_DISPLAY_VGA << 8) && + (pdev->class != PCI_CLASS_DISPLAY_OTHER << 8)) + continue; + dhandle = ACPI_HANDLE(&pdev->dev); if (!dhandle) continue; @@ -299,20 +308,6 @@ static bool amdgpu_atrm_get_bios(struct amdgpu_device *adev) } } - if (!found) { - while ((pdev = pci_get_class(PCI_CLASS_DISPLAY_OTHER << 8, pdev)) != NULL) { - dhandle = ACPI_HANDLE(&pdev->dev); - if (!dhandle) - continue; - - status = acpi_get_handle(dhandle, "ATRM", &atrm_handle); - if (ACPI_SUCCESS(status)) { - found = true; - break; - } - } - } - if (!found) return false; pci_dev_put(pdev); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.c index 781e5c5ce04d..702f6610d024 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.c @@ -172,6 +172,7 @@ int amdgpu_bo_list_get(struct amdgpu_fpriv *fpriv, int id, } rcu_read_unlock(); + *result = NULL; return -ENOENT; } diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_connectors.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_connectors.c index 7473a42f7d45..9caba10315a8 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_connectors.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_connectors.c @@ -103,7 +103,7 @@ int amdgpu_connector_get_monitor_bpc(struct drm_connector *connector) struct amdgpu_connector *amdgpu_connector = to_amdgpu_connector(connector); struct amdgpu_connector_atom_dig *dig_connector; int bpc = 8; - unsigned mode_clock, max_tmds_clock; + unsigned int mode_clock, max_tmds_clock; switch (connector->connector_type) { case DRM_MODE_CONNECTOR_DVII: @@ -255,6 +255,7 @@ struct edid *amdgpu_connector_edid(struct drm_connector *connector) return amdgpu_connector->edid; } else if (edid_blob) { struct edid *edid = kmemdup(edid_blob->data, edid_blob->length, GFP_KERNEL); + if (edid) amdgpu_connector->edid = edid; } @@ -581,6 +582,7 @@ static int amdgpu_connector_set_property(struct drm_connector *connector, amdgpu_encoder = to_amdgpu_encoder(connector->encoder); } else { const struct drm_connector_helper_funcs *connector_funcs = connector->helper_private; + amdgpu_encoder = to_amdgpu_encoder(connector_funcs->best_encoder(connector)); } @@ -797,6 +799,7 @@ static int amdgpu_connector_set_lcd_property(struct drm_connector *connector, amdgpu_encoder = to_amdgpu_encoder(connector->encoder); else { const struct drm_connector_helper_funcs *connector_funcs = connector->helper_private; + amdgpu_encoder = to_amdgpu_encoder(connector_funcs->best_encoder(connector)); } @@ -979,6 +982,41 @@ amdgpu_connector_check_hpd_status_unchanged(struct drm_connector *connector) return false; } +static void amdgpu_connector_shared_ddc(enum drm_connector_status *status, + struct drm_connector *connector, + struct amdgpu_connector *amdgpu_connector) +{ + struct drm_connector *list_connector; + struct drm_connector_list_iter iter; + struct amdgpu_connector *list_amdgpu_connector; + struct drm_device *dev = connector->dev; + struct amdgpu_device *adev = drm_to_adev(dev); + + if (amdgpu_connector->shared_ddc && *status == connector_status_connected) { + drm_connector_list_iter_begin(dev, &iter); + drm_for_each_connector_iter(list_connector, + &iter) { + if (connector == list_connector) + continue; + list_amdgpu_connector = to_amdgpu_connector(list_connector); + if (list_amdgpu_connector->shared_ddc && + list_amdgpu_connector->ddc_bus->rec.i2c_id == + amdgpu_connector->ddc_bus->rec.i2c_id) { + /* cases where both connectors are digital */ + if (list_connector->connector_type != DRM_MODE_CONNECTOR_VGA) { + /* hpd is our only option in this case */ + if (!amdgpu_display_hpd_sense(adev, + amdgpu_connector->hpd.hpd)) { + amdgpu_connector_free_edid(connector); + *status = connector_status_disconnected; + } + } + } + } + drm_connector_list_iter_end(&iter); + } +} + /* * DVI is complicated * Do a DDC probe, if DDC probe passes, get the full EDID so @@ -1065,32 +1103,7 @@ amdgpu_connector_dvi_detect(struct drm_connector *connector, bool force) * DDC line. The latter is more complex because with DVI<->HDMI adapters * you don't really know what's connected to which port as both are digital. */ - if (amdgpu_connector->shared_ddc && (ret == connector_status_connected)) { - struct drm_connector *list_connector; - struct drm_connector_list_iter iter; - struct amdgpu_connector *list_amdgpu_connector; - - drm_connector_list_iter_begin(dev, &iter); - drm_for_each_connector_iter(list_connector, - &iter) { - if (connector == list_connector) - continue; - list_amdgpu_connector = to_amdgpu_connector(list_connector); - if (list_amdgpu_connector->shared_ddc && - (list_amdgpu_connector->ddc_bus->rec.i2c_id == - amdgpu_connector->ddc_bus->rec.i2c_id)) { - /* cases where both connectors are digital */ - if (list_connector->connector_type != DRM_MODE_CONNECTOR_VGA) { - /* hpd is our only option in this case */ - if (!amdgpu_display_hpd_sense(adev, amdgpu_connector->hpd.hpd)) { - amdgpu_connector_free_edid(connector); - ret = connector_status_disconnected; - } - } - } - } - drm_connector_list_iter_end(&iter); - } + amdgpu_connector_shared_ddc(&ret, connector, amdgpu_connector); } } @@ -1192,6 +1205,7 @@ amdgpu_connector_dvi_encoder(struct drm_connector *connector) static void amdgpu_connector_dvi_force(struct drm_connector *connector) { struct amdgpu_connector *amdgpu_connector = to_amdgpu_connector(connector); + if (connector->force == DRM_FORCE_ON) amdgpu_connector->use_digital = false; if (connector->force == DRM_FORCE_ON_DIGITAL) @@ -1426,6 +1440,7 @@ amdgpu_connector_dp_detect(struct drm_connector *connector, bool force) ret = connector_status_connected; else if (amdgpu_connector->dac_load_detect) { /* try load detection */ const struct drm_encoder_helper_funcs *encoder_funcs = encoder->helper_private; + ret = encoder_funcs->detect(encoder, connector); } } diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c index 122472d88756..ec888fc6ead8 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c @@ -65,7 +65,8 @@ static int amdgpu_cs_parser_init(struct amdgpu_cs_parser *p, } amdgpu_sync_create(&p->sync); - drm_exec_init(&p->exec, DRM_EXEC_INTERRUPTIBLE_WAIT); + drm_exec_init(&p->exec, DRM_EXEC_INTERRUPTIBLE_WAIT | + DRM_EXEC_IGNORE_DUPLICATES, 0); return 0; } @@ -206,7 +207,7 @@ static int amdgpu_cs_pass1(struct amdgpu_cs_parser *p, } for (i = 0; i < p->nchunks; i++) { - struct drm_amdgpu_cs_chunk __user **chunk_ptr = NULL; + struct drm_amdgpu_cs_chunk __user *chunk_ptr = NULL; struct drm_amdgpu_cs_chunk user_chunk; uint32_t __user *cdata; @@ -818,7 +819,7 @@ retry: p->bytes_moved += ctx.bytes_moved; if (!amdgpu_gmc_vram_full_visible(&adev->gmc) && - amdgpu_bo_in_cpu_visible_vram(bo)) + amdgpu_res_cpu_visible(adev, bo->tbo.resource)) p->bytes_moved_vis += ctx.bytes_moved; if (unlikely(r == -ENOMEM) && domain != bo->allowed_domains) { @@ -869,9 +870,9 @@ static int amdgpu_cs_parser_bos(struct amdgpu_cs_parser *p, struct amdgpu_bo *bo = e->bo; int i; - e->user_pages = kvmalloc_array(bo->tbo.ttm->num_pages, - sizeof(struct page *), - GFP_KERNEL | __GFP_ZERO); + e->user_pages = kvcalloc(bo->tbo.ttm->num_pages, + sizeof(struct page *), + GFP_KERNEL); if (!e->user_pages) { DRM_ERROR("kvmalloc_array failure\n"); r = -ENOMEM; @@ -951,10 +952,10 @@ static int amdgpu_cs_parser_bos(struct amdgpu_cs_parser *p, p->bytes_moved = 0; p->bytes_moved_vis = 0; - r = amdgpu_vm_validate_pt_bos(p->adev, &fpriv->vm, - amdgpu_cs_bo_validate, p); + r = amdgpu_vm_validate(p->adev, &fpriv->vm, NULL, + amdgpu_cs_bo_validate, p); if (r) { - DRM_ERROR("amdgpu_vm_validate_pt_bos() failed.\n"); + DRM_ERROR("amdgpu_vm_validate() failed.\n"); goto out_free_user_pages; } @@ -1116,6 +1117,11 @@ static int amdgpu_cs_vm_handling(struct amdgpu_cs_parser *p) return r; } + /* FIXME: In theory this loop shouldn't be needed any more when + * amdgpu_vm_handle_moved handles all moved BOs that are reserved + * with p->ticket. But removing it caused test regressions, so I'm + * leaving it here for now. + */ amdgpu_bo_list_for_each_entry(e, p->bo_list) { bo_va = e->bo_va; if (bo_va == NULL) @@ -1130,7 +1136,7 @@ static int amdgpu_cs_vm_handling(struct amdgpu_cs_parser *p) return r; } - r = amdgpu_vm_handle_moved(adev, vm); + r = amdgpu_vm_handle_moved(adev, vm, &p->exec.ticket); if (r) return r; @@ -1409,7 +1415,7 @@ int amdgpu_cs_ioctl(struct drm_device *dev, void *data, struct drm_file *filp) if (r == -ENOMEM) DRM_ERROR("Not enough memory for command submission!\n"); else if (r != -ERESTARTSYS && r != -EAGAIN) - DRM_ERROR("Failed to process the buffer list %d!\n", r); + DRM_DEBUG("Failed to process the buffer list %d!\n", r); goto error_fini; } diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_csa.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_csa.c index 720011019741..cfdf558b48b6 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_csa.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_csa.c @@ -28,9 +28,8 @@ uint64_t amdgpu_csa_vaddr(struct amdgpu_device *adev) { - uint64_t addr = adev->vm_manager.max_pfn << AMDGPU_GPU_PAGE_SHIFT; + uint64_t addr = AMDGPU_VA_RESERVED_CSA_START(adev); - addr -= AMDGPU_VA_RESERVED_SIZE; addr = amdgpu_gmc_sign_extend(addr); return addr; @@ -70,7 +69,7 @@ int amdgpu_map_static_csa(struct amdgpu_device *adev, struct amdgpu_vm *vm, struct drm_exec exec; int r; - drm_exec_init(&exec, DRM_EXEC_INTERRUPTIBLE_WAIT); + drm_exec_init(&exec, DRM_EXEC_INTERRUPTIBLE_WAIT, 0); drm_exec_until_all_locked(&exec) { r = amdgpu_vm_lock_pd(vm, &exec, 0); if (likely(!r)) @@ -110,7 +109,7 @@ int amdgpu_unmap_static_csa(struct amdgpu_device *adev, struct amdgpu_vm *vm, struct drm_exec exec; int r; - drm_exec_init(&exec, DRM_EXEC_INTERRUPTIBLE_WAIT); + drm_exec_init(&exec, DRM_EXEC_INTERRUPTIBLE_WAIT, 0); drm_exec_until_all_locked(&exec) { r = amdgpu_vm_lock_pd(vm, &exec, 0); if (likely(!r)) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c index 1835d86f6fdc..5cb33ac99f70 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c @@ -56,6 +56,10 @@ bool amdgpu_ctx_priority_is_valid(int32_t ctx_prio) return true; default: case AMDGPU_CTX_PRIORITY_UNSET: + /* UNSET priority is not valid and we don't carry that + * around, but set it to NORMAL in the only place this + * function is called, amdgpu_ctx_ioctl(). + */ return false; } } @@ -69,10 +73,10 @@ amdgpu_ctx_to_drm_sched_prio(int32_t ctx_prio) return DRM_SCHED_PRIORITY_NORMAL; case AMDGPU_CTX_PRIORITY_VERY_LOW: - return DRM_SCHED_PRIORITY_MIN; + return DRM_SCHED_PRIORITY_LOW; case AMDGPU_CTX_PRIORITY_LOW: - return DRM_SCHED_PRIORITY_MIN; + return DRM_SCHED_PRIORITY_LOW; case AMDGPU_CTX_PRIORITY_NORMAL: return DRM_SCHED_PRIORITY_NORMAL; @@ -96,9 +100,6 @@ amdgpu_ctx_to_drm_sched_prio(int32_t ctx_prio) static int amdgpu_ctx_priority_permit(struct drm_file *filp, int32_t priority) { - if (!amdgpu_ctx_priority_is_valid(priority)) - return -EINVAL; - /* NORMAL and below are accessible by everyone */ if (priority <= AMDGPU_CTX_PRIORITY_NORMAL) return 0; @@ -633,8 +634,6 @@ static int amdgpu_ctx_query2(struct amdgpu_device *adev, return 0; } - - static int amdgpu_ctx_stable_pstate(struct amdgpu_device *adev, struct amdgpu_fpriv *fpriv, uint32_t id, bool set, u32 *stable_pstate) @@ -677,8 +676,10 @@ int amdgpu_ctx_ioctl(struct drm_device *dev, void *data, id = args->in.ctx_id; priority = args->in.priority; - /* For backwards compatibility reasons, we need to accept - * ioctls with garbage in the priority field */ + /* For backwards compatibility, we need to accept ioctls with garbage + * in the priority field. Garbage values in the priority field, result + * in the priority being set to NORMAL. + */ if (!amdgpu_ctx_priority_is_valid(priority)) priority = AMDGPU_CTX_PRIORITY_NORMAL; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c index a4faea4fa0b5..f5d0fa207a88 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c @@ -540,7 +540,11 @@ static ssize_t amdgpu_debugfs_regs_pcie_read(struct file *f, char __user *buf, while (size) { uint32_t value; - value = RREG32_PCIE(*pos); + if (upper_32_bits(*pos)) + value = RREG32_PCIE_EXT(*pos); + else + value = RREG32_PCIE(*pos); + r = put_user(value, (uint32_t *)buf); if (r) goto out; @@ -600,7 +604,10 @@ static ssize_t amdgpu_debugfs_regs_pcie_write(struct file *f, const char __user if (r) goto out; - WREG32_PCIE(*pos, value); + if (upper_32_bits(*pos)) + WREG32_PCIE_EXT(*pos, value); + else + WREG32_PCIE(*pos, value); result += 4; buf += 4; @@ -638,6 +645,9 @@ static ssize_t amdgpu_debugfs_regs_didt_read(struct file *f, char __user *buf, if (size & 0x3 || *pos & 0x3) return -EINVAL; + if (!adev->didt_rreg) + return -EOPNOTSUPP; + r = pm_runtime_get_sync(adev_to_drm(adev)->dev); if (r < 0) { pm_runtime_put_autosuspend(adev_to_drm(adev)->dev); @@ -694,6 +704,9 @@ static ssize_t amdgpu_debugfs_regs_didt_write(struct file *f, const char __user if (size & 0x3 || *pos & 0x3) return -EINVAL; + if (!adev->didt_wreg) + return -EOPNOTSUPP; + r = pm_runtime_get_sync(adev_to_drm(adev)->dev); if (r < 0) { pm_runtime_put_autosuspend(adev_to_drm(adev)->dev); @@ -748,6 +761,9 @@ static ssize_t amdgpu_debugfs_regs_smc_read(struct file *f, char __user *buf, ssize_t result = 0; int r; + if (!adev->smc_rreg) + return -EOPNOTSUPP; + if (size & 0x3 || *pos & 0x3) return -EINVAL; @@ -804,6 +820,9 @@ static ssize_t amdgpu_debugfs_regs_smc_write(struct file *f, const char __user * ssize_t result = 0; int r; + if (!adev->smc_wreg) + return -EOPNOTSUPP; + if (size & 0x3 || *pos & 0x3) return -EINVAL; @@ -1659,9 +1678,9 @@ static int amdgpu_debugfs_test_ib_show(struct seq_file *m, void *unused) for (i = 0; i < AMDGPU_MAX_RINGS; i++) { struct amdgpu_ring *ring = adev->rings[i]; - if (!ring || !ring->sched.thread) + if (!amdgpu_ring_sched_ready(ring)) continue; - kthread_park(ring->sched.thread); + drm_sched_wqueue_stop(&ring->sched); } seq_puts(m, "run ib test:\n"); @@ -1675,9 +1694,9 @@ static int amdgpu_debugfs_test_ib_show(struct seq_file *m, void *unused) for (i = 0; i < AMDGPU_MAX_RINGS; i++) { struct amdgpu_ring *ring = adev->rings[i]; - if (!ring || !ring->sched.thread) + if (!amdgpu_ring_sched_ready(ring)) continue; - kthread_unpark(ring->sched.thread); + drm_sched_wqueue_start(&ring->sched); } up_write(&adev->reset_domain->sem); @@ -1763,9 +1782,14 @@ static int amdgpu_debugfs_vm_info_show(struct seq_file *m, void *unused) list_for_each_entry(file, &dev->filelist, lhead) { struct amdgpu_fpriv *fpriv = file->driver_priv; struct amdgpu_vm *vm = &fpriv->vm; + struct amdgpu_task_info *ti; + + ti = amdgpu_vm_get_task_info_vm(vm); + if (ti) { + seq_printf(m, "pid:%d\tProcess:%s ----------\n", ti->pid, ti->process_name); + amdgpu_vm_put_task_info(ti); + } - seq_printf(m, "pid:%d\tProcess:%s ----------\n", - vm->task_info.pid, vm->task_info.process_name); r = amdgpu_bo_reserve(vm->root.bo, true); if (r) break; @@ -1897,7 +1921,8 @@ static int amdgpu_debugfs_ib_preempt(void *data, u64 val) ring = adev->rings[val]; - if (!ring || !ring->funcs->preempt_ib || !ring->sched.thread) + if (!amdgpu_ring_sched_ready(ring) || + !ring->funcs->preempt_ib) return -EINVAL; /* the last preemption failed */ @@ -1915,7 +1940,7 @@ static int amdgpu_debugfs_ib_preempt(void *data, u64 val) goto pro_end; /* stop the scheduler */ - kthread_park(ring->sched.thread); + drm_sched_wqueue_stop(&ring->sched); /* preempt the IB */ r = amdgpu_ring_preempt_ib(ring); @@ -1949,7 +1974,7 @@ static int amdgpu_debugfs_ib_preempt(void *data, u64 val) failure: /* restart the scheduler */ - kthread_unpark(ring->sched.thread); + drm_sched_wqueue_start(&ring->sched); up_read(&adev->reset_domain->sem); @@ -2016,8 +2041,8 @@ static ssize_t amdgpu_reset_dump_register_list_read(struct file *f, if (ret) return ret; - for (i = 0; i < adev->num_regs; i++) { - sprintf(reg_offset, "0x%x\n", adev->reset_dump_reg_list[i]); + for (i = 0; i < adev->reset_info.num_regs; i++) { + sprintf(reg_offset, "0x%x\n", adev->reset_info.reset_dump_reg_list[i]); up_read(&adev->reset_domain->sem); if (copy_to_user(buf + len, reg_offset, strlen(reg_offset))) return -EFAULT; @@ -2074,9 +2099,9 @@ static ssize_t amdgpu_reset_dump_register_list_write(struct file *f, if (ret) goto error_free; - swap(adev->reset_dump_reg_list, tmp); - swap(adev->reset_dump_reg_value, new); - adev->num_regs = i; + swap(adev->reset_info.reset_dump_reg_list, tmp); + swap(adev->reset_info.reset_dump_reg_value, new); + adev->reset_info.num_regs = i; up_write(&adev->reset_domain->sem); ret = size; @@ -2134,6 +2159,8 @@ int amdgpu_debugfs_init(struct amdgpu_device *adev) amdgpu_debugfs_firmware_init(adev); amdgpu_ta_if_debugfs_init(adev); + amdgpu_debugfs_mes_event_log_init(adev); + #if defined(CONFIG_DRM_AMD_DC) if (adev->dc_enabled) dtn_debugfs_init(adev); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.h index 371a6f0deb29..0425432d8659 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.h @@ -32,3 +32,5 @@ void amdgpu_debugfs_fini(struct amdgpu_device *adev); void amdgpu_debugfs_fence_init(struct amdgpu_device *adev); void amdgpu_debugfs_firmware_init(struct amdgpu_device *adev); void amdgpu_debugfs_gem_init(struct amdgpu_device *adev); +void amdgpu_debugfs_mes_event_log_init(struct amdgpu_device *adev); + diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_dev_coredump.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_dev_coredump.c new file mode 100644 index 000000000000..64fe564b8036 --- /dev/null +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_dev_coredump.c @@ -0,0 +1,346 @@ +// SPDX-License-Identifier: MIT +/* + * Copyright 2024 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + */ + +#include <generated/utsrelease.h> +#include <linux/devcoredump.h> +#include "amdgpu_dev_coredump.h" +#include "atom.h" + +#ifndef CONFIG_DEV_COREDUMP +void amdgpu_coredump(struct amdgpu_device *adev, bool vram_lost, + struct amdgpu_reset_context *reset_context) +{ +} +#else + +const char *hw_ip_names[MAX_HWIP] = { + [GC_HWIP] = "GC", + [HDP_HWIP] = "HDP", + [SDMA0_HWIP] = "SDMA0", + [SDMA1_HWIP] = "SDMA1", + [SDMA2_HWIP] = "SDMA2", + [SDMA3_HWIP] = "SDMA3", + [SDMA4_HWIP] = "SDMA4", + [SDMA5_HWIP] = "SDMA5", + [SDMA6_HWIP] = "SDMA6", + [SDMA7_HWIP] = "SDMA7", + [LSDMA_HWIP] = "LSDMA", + [MMHUB_HWIP] = "MMHUB", + [ATHUB_HWIP] = "ATHUB", + [NBIO_HWIP] = "NBIO", + [MP0_HWIP] = "MP0", + [MP1_HWIP] = "MP1", + [UVD_HWIP] = "UVD/JPEG/VCN", + [VCN1_HWIP] = "VCN1", + [VCE_HWIP] = "VCE", + [VPE_HWIP] = "VPE", + [DF_HWIP] = "DF", + [DCE_HWIP] = "DCE", + [OSSSYS_HWIP] = "OSSSYS", + [SMUIO_HWIP] = "SMUIO", + [PWR_HWIP] = "PWR", + [NBIF_HWIP] = "NBIF", + [THM_HWIP] = "THM", + [CLK_HWIP] = "CLK", + [UMC_HWIP] = "UMC", + [RSMU_HWIP] = "RSMU", + [XGMI_HWIP] = "XGMI", + [DCI_HWIP] = "DCI", + [PCIE_HWIP] = "PCIE", +}; + +static void amdgpu_devcoredump_fw_info(struct amdgpu_device *adev, + struct drm_printer *p) +{ + uint32_t version; + uint32_t feature; + uint8_t smu_program, smu_major, smu_minor, smu_debug; + struct atom_context *ctx = adev->mode_info.atom_context; + + drm_printf(p, "VCE feature version: %u, fw version: 0x%08x\n", + adev->vce.fb_version, adev->vce.fw_version); + drm_printf(p, "UVD feature version: %u, fw version: 0x%08x\n", 0, + adev->uvd.fw_version); + drm_printf(p, "GMC feature version: %u, fw version: 0x%08x\n", 0, + adev->gmc.fw_version); + drm_printf(p, "ME feature version: %u, fw version: 0x%08x\n", + adev->gfx.me_feature_version, adev->gfx.me_fw_version); + drm_printf(p, "PFP feature version: %u, fw version: 0x%08x\n", + adev->gfx.pfp_feature_version, adev->gfx.pfp_fw_version); + drm_printf(p, "CE feature version: %u, fw version: 0x%08x\n", + adev->gfx.ce_feature_version, adev->gfx.ce_fw_version); + drm_printf(p, "RLC feature version: %u, fw version: 0x%08x\n", + adev->gfx.rlc_feature_version, adev->gfx.rlc_fw_version); + + drm_printf(p, "RLC SRLC feature version: %u, fw version: 0x%08x\n", + adev->gfx.rlc_srlc_feature_version, + adev->gfx.rlc_srlc_fw_version); + drm_printf(p, "RLC SRLG feature version: %u, fw version: 0x%08x\n", + adev->gfx.rlc_srlg_feature_version, + adev->gfx.rlc_srlg_fw_version); + drm_printf(p, "RLC SRLS feature version: %u, fw version: 0x%08x\n", + adev->gfx.rlc_srls_feature_version, + adev->gfx.rlc_srls_fw_version); + drm_printf(p, "RLCP feature version: %u, fw version: 0x%08x\n", + adev->gfx.rlcp_ucode_feature_version, + adev->gfx.rlcp_ucode_version); + drm_printf(p, "RLCV feature version: %u, fw version: 0x%08x\n", + adev->gfx.rlcv_ucode_feature_version, + adev->gfx.rlcv_ucode_version); + drm_printf(p, "MEC feature version: %u, fw version: 0x%08x\n", + adev->gfx.mec_feature_version, adev->gfx.mec_fw_version); + + if (adev->gfx.mec2_fw) + drm_printf(p, "MEC2 feature version: %u, fw version: 0x%08x\n", + adev->gfx.mec2_feature_version, + adev->gfx.mec2_fw_version); + + drm_printf(p, "IMU feature version: %u, fw version: 0x%08x\n", 0, + adev->gfx.imu_fw_version); + drm_printf(p, "PSP SOS feature version: %u, fw version: 0x%08x\n", + adev->psp.sos.feature_version, adev->psp.sos.fw_version); + drm_printf(p, "PSP ASD feature version: %u, fw version: 0x%08x\n", + adev->psp.asd_context.bin_desc.feature_version, + adev->psp.asd_context.bin_desc.fw_version); + + drm_printf(p, "TA XGMI feature version: 0x%08x, fw version: 0x%08x\n", + adev->psp.xgmi_context.context.bin_desc.feature_version, + adev->psp.xgmi_context.context.bin_desc.fw_version); + drm_printf(p, "TA RAS feature version: 0x%08x, fw version: 0x%08x\n", + adev->psp.ras_context.context.bin_desc.feature_version, + adev->psp.ras_context.context.bin_desc.fw_version); + drm_printf(p, "TA HDCP feature version: 0x%08x, fw version: 0x%08x\n", + adev->psp.hdcp_context.context.bin_desc.feature_version, + adev->psp.hdcp_context.context.bin_desc.fw_version); + drm_printf(p, "TA DTM feature version: 0x%08x, fw version: 0x%08x\n", + adev->psp.dtm_context.context.bin_desc.feature_version, + adev->psp.dtm_context.context.bin_desc.fw_version); + drm_printf(p, "TA RAP feature version: 0x%08x, fw version: 0x%08x\n", + adev->psp.rap_context.context.bin_desc.feature_version, + adev->psp.rap_context.context.bin_desc.fw_version); + drm_printf(p, + "TA SECURE DISPLAY feature version: 0x%08x, fw version: 0x%08x\n", + adev->psp.securedisplay_context.context.bin_desc.feature_version, + adev->psp.securedisplay_context.context.bin_desc.fw_version); + + /* SMC firmware */ + version = adev->pm.fw_version; + + smu_program = (version >> 24) & 0xff; + smu_major = (version >> 16) & 0xff; + smu_minor = (version >> 8) & 0xff; + smu_debug = (version >> 0) & 0xff; + drm_printf(p, + "SMC feature version: %u, program: %d, fw version: 0x%08x (%d.%d.%d)\n", + 0, smu_program, version, smu_major, smu_minor, smu_debug); + + /* SDMA firmware */ + for (int i = 0; i < adev->sdma.num_instances; i++) { + drm_printf(p, + "SDMA%d feature version: %u, firmware version: 0x%08x\n", + i, adev->sdma.instance[i].feature_version, + adev->sdma.instance[i].fw_version); + } + + drm_printf(p, "VCN feature version: %u, fw version: 0x%08x\n", 0, + adev->vcn.fw_version); + drm_printf(p, "DMCU feature version: %u, fw version: 0x%08x\n", 0, + adev->dm.dmcu_fw_version); + drm_printf(p, "DMCUB feature version: %u, fw version: 0x%08x\n", 0, + adev->dm.dmcub_fw_version); + drm_printf(p, "PSP TOC feature version: %u, fw version: 0x%08x\n", + adev->psp.toc.feature_version, adev->psp.toc.fw_version); + + version = adev->mes.kiq_version & AMDGPU_MES_VERSION_MASK; + feature = (adev->mes.kiq_version & AMDGPU_MES_FEAT_VERSION_MASK) >> + AMDGPU_MES_FEAT_VERSION_SHIFT; + drm_printf(p, "MES_KIQ feature version: %u, fw version: 0x%08x\n", + feature, version); + + version = adev->mes.sched_version & AMDGPU_MES_VERSION_MASK; + feature = (adev->mes.sched_version & AMDGPU_MES_FEAT_VERSION_MASK) >> + AMDGPU_MES_FEAT_VERSION_SHIFT; + drm_printf(p, "MES feature version: %u, fw version: 0x%08x\n", feature, + version); + + drm_printf(p, "VPE feature version: %u, fw version: 0x%08x\n", + adev->vpe.feature_version, adev->vpe.fw_version); + + drm_printf(p, "\nVBIOS Information\n"); + drm_printf(p, "vbios name : %s\n", ctx->name); + drm_printf(p, "vbios pn : %s\n", ctx->vbios_pn); + drm_printf(p, "vbios version : %d\n", ctx->version); + drm_printf(p, "vbios ver_str : %s\n", ctx->vbios_ver_str); + drm_printf(p, "vbios date : %s\n", ctx->date); +} + +static ssize_t +amdgpu_devcoredump_read(char *buffer, loff_t offset, size_t count, + void *data, size_t datalen) +{ + struct drm_printer p; + struct amdgpu_coredump_info *coredump = data; + struct drm_print_iterator iter; + struct amdgpu_vm_fault_info *fault_info; + int i, ver; + + iter.data = buffer; + iter.offset = 0; + iter.start = offset; + iter.remain = count; + + p = drm_coredump_printer(&iter); + + drm_printf(&p, "**** AMDGPU Device Coredump ****\n"); + drm_printf(&p, "version: " AMDGPU_COREDUMP_VERSION "\n"); + drm_printf(&p, "kernel: " UTS_RELEASE "\n"); + drm_printf(&p, "module: " KBUILD_MODNAME "\n"); + drm_printf(&p, "time: %lld.%09ld\n", coredump->reset_time.tv_sec, + coredump->reset_time.tv_nsec); + + if (coredump->reset_task_info.pid) + drm_printf(&p, "process_name: %s PID: %d\n", + coredump->reset_task_info.process_name, + coredump->reset_task_info.pid); + + /* GPU IP's information of the SOC */ + drm_printf(&p, "\nIP Information\n"); + drm_printf(&p, "SOC Family: %d\n", coredump->adev->family); + drm_printf(&p, "SOC Revision id: %d\n", coredump->adev->rev_id); + drm_printf(&p, "SOC External Revision id: %d\n", coredump->adev->external_rev_id); + + for (int i = 1; i < MAX_HWIP; i++) { + for (int j = 0; j < HWIP_MAX_INSTANCE; j++) { + ver = coredump->adev->ip_versions[i][j]; + if (ver) + drm_printf(&p, "HWIP: %s[%d][%d]: v%d.%d.%d.%d.%d\n", + hw_ip_names[i], i, j, + IP_VERSION_MAJ(ver), + IP_VERSION_MIN(ver), + IP_VERSION_REV(ver), + IP_VERSION_VARIANT(ver), + IP_VERSION_SUBREV(ver)); + } + } + + /* IP firmware information */ + drm_printf(&p, "\nIP Firmwares\n"); + amdgpu_devcoredump_fw_info(coredump->adev, &p); + + if (coredump->ring) { + drm_printf(&p, "\nRing timed out details\n"); + drm_printf(&p, "IP Type: %d Ring Name: %s\n", + coredump->ring->funcs->type, + coredump->ring->name); + } + + /* Add page fault information */ + fault_info = &coredump->adev->vm_manager.fault_info; + drm_printf(&p, "\n[%s] Page fault observed\n", + fault_info->vmhub ? "mmhub" : "gfxhub"); + drm_printf(&p, "Faulty page starting at address: 0x%016llx\n", fault_info->addr); + drm_printf(&p, "Protection fault status register: 0x%x\n\n", fault_info->status); + + /* Add ring buffer information */ + drm_printf(&p, "Ring buffer information\n"); + for (int i = 0; i < coredump->adev->num_rings; i++) { + int j = 0; + struct amdgpu_ring *ring = coredump->adev->rings[i]; + + drm_printf(&p, "ring name: %s\n", ring->name); + drm_printf(&p, "Rptr: 0x%llx Wptr: 0x%llx RB mask: %x\n", + amdgpu_ring_get_rptr(ring), + amdgpu_ring_get_wptr(ring), + ring->buf_mask); + drm_printf(&p, "Ring size in dwords: %d\n", + ring->ring_size / 4); + drm_printf(&p, "Ring contents\n"); + drm_printf(&p, "Offset \t Value\n"); + + while (j < ring->ring_size) { + drm_printf(&p, "0x%x \t 0x%x\n", j, ring->ring[j / 4]); + j += 4; + } + } + + if (coredump->reset_vram_lost) + drm_printf(&p, "VRAM is lost due to GPU reset!\n"); + if (coredump->adev->reset_info.num_regs) { + drm_printf(&p, "AMDGPU register dumps:\nOffset: Value:\n"); + + for (i = 0; i < coredump->adev->reset_info.num_regs; i++) + drm_printf(&p, "0x%08x: 0x%08x\n", + coredump->adev->reset_info.reset_dump_reg_list[i], + coredump->adev->reset_info.reset_dump_reg_value[i]); + } + + return count - iter.remain; +} + +static void amdgpu_devcoredump_free(void *data) +{ + kfree(data); +} + +void amdgpu_coredump(struct amdgpu_device *adev, bool vram_lost, + struct amdgpu_reset_context *reset_context) +{ + struct amdgpu_coredump_info *coredump; + struct drm_device *dev = adev_to_drm(adev); + struct amdgpu_job *job = reset_context->job; + struct drm_sched_job *s_job; + + coredump = kzalloc(sizeof(*coredump), GFP_NOWAIT); + + if (!coredump) { + DRM_ERROR("%s: failed to allocate memory for coredump\n", __func__); + return; + } + + coredump->reset_vram_lost = vram_lost; + + if (reset_context->job && reset_context->job->vm) { + struct amdgpu_task_info *ti; + struct amdgpu_vm *vm = reset_context->job->vm; + + ti = amdgpu_vm_get_task_info_vm(vm); + if (ti) { + coredump->reset_task_info = *ti; + amdgpu_vm_put_task_info(ti); + } + } + + if (job) { + s_job = &job->base; + coredump->ring = to_amdgpu_ring(s_job->sched); + } + + coredump->adev = adev; + + ktime_get_ts64(&coredump->reset_time); + + dev_coredumpm(dev->dev, THIS_MODULE, coredump, 0, GFP_NOWAIT, + amdgpu_devcoredump_read, amdgpu_devcoredump_free); +} +#endif diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_dev_coredump.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_dev_coredump.h new file mode 100644 index 000000000000..52459512cb2b --- /dev/null +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_dev_coredump.h @@ -0,0 +1,47 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Copyright 2024 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + */ + +#ifndef __AMDGPU_DEV_COREDUMP_H__ +#define __AMDGPU_DEV_COREDUMP_H__ + +#include "amdgpu.h" +#include "amdgpu_reset.h" + +#ifdef CONFIG_DEV_COREDUMP + +#define AMDGPU_COREDUMP_VERSION "1" + +struct amdgpu_coredump_info { + struct amdgpu_device *adev; + struct amdgpu_task_info reset_task_info; + struct timespec64 reset_time; + bool reset_vram_lost; + struct amdgpu_ring *ring; +}; +#endif + +void amdgpu_coredump(struct amdgpu_device *adev, bool vram_lost, + struct amdgpu_reset_context *reset_context); + +#endif diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c index 0f98f720d9ca..f3b7cb18fd46 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c @@ -32,8 +32,6 @@ #include <linux/slab.h> #include <linux/iommu.h> #include <linux/pci.h> -#include <linux/devcoredump.h> -#include <generated/utsrelease.h> #include <linux/pci-p2pdma.h> #include <linux/apple-gmux.h> @@ -43,6 +41,7 @@ #include <drm/drm_fb_helper.h> #include <drm/drm_probe_helper.h> #include <drm/amdgpu_drm.h> +#include <linux/device.h> #include <linux/vgaarb.h> #include <linux/vga_switcheroo.h> #include <linux/efi.h> @@ -74,6 +73,8 @@ #include "amdgpu_pmu.h" #include "amdgpu_fru_eeprom.h" #include "amdgpu_reset.h" +#include "amdgpu_virt.h" +#include "amdgpu_dev_coredump.h" #include <linux/suspend.h> #include <drm/task_barrier.h> @@ -96,6 +97,9 @@ MODULE_FIRMWARE("amdgpu/navi12_gpu_info.bin"); #define AMDGPU_RESUME_MS 2000 #define AMDGPU_MAX_RETRY_LIMIT 2 #define AMDGPU_RETRY_SRIOV_RESET(r) ((r) == -EBUSY || (r) == -ETIMEDOUT || (r) == -EINVAL) +#define AMDGPU_PCIE_INDEX_FALLBACK (0x38 >> 2) +#define AMDGPU_PCIE_INDEX_HI_FALLBACK (0x44 >> 2) +#define AMDGPU_PCIE_DATA_FALLBACK (0x3C >> 2) static const struct drm_driver amdgpu_kms_driver; @@ -140,6 +144,8 @@ const char *amdgpu_asic_name[] = { "LAST", }; +static inline void amdgpu_device_stop_pending_resets(struct amdgpu_device *adev); + /** * DOC: pcie_replay_count * @@ -162,6 +168,65 @@ static ssize_t amdgpu_device_get_pcie_replay_count(struct device *dev, static DEVICE_ATTR(pcie_replay_count, 0444, amdgpu_device_get_pcie_replay_count, NULL); +static ssize_t amdgpu_sysfs_reg_state_get(struct file *f, struct kobject *kobj, + struct bin_attribute *attr, char *buf, + loff_t ppos, size_t count) +{ + struct device *dev = kobj_to_dev(kobj); + struct drm_device *ddev = dev_get_drvdata(dev); + struct amdgpu_device *adev = drm_to_adev(ddev); + ssize_t bytes_read; + + switch (ppos) { + case AMDGPU_SYS_REG_STATE_XGMI: + bytes_read = amdgpu_asic_get_reg_state( + adev, AMDGPU_REG_STATE_TYPE_XGMI, buf, count); + break; + case AMDGPU_SYS_REG_STATE_WAFL: + bytes_read = amdgpu_asic_get_reg_state( + adev, AMDGPU_REG_STATE_TYPE_WAFL, buf, count); + break; + case AMDGPU_SYS_REG_STATE_PCIE: + bytes_read = amdgpu_asic_get_reg_state( + adev, AMDGPU_REG_STATE_TYPE_PCIE, buf, count); + break; + case AMDGPU_SYS_REG_STATE_USR: + bytes_read = amdgpu_asic_get_reg_state( + adev, AMDGPU_REG_STATE_TYPE_USR, buf, count); + break; + case AMDGPU_SYS_REG_STATE_USR_1: + bytes_read = amdgpu_asic_get_reg_state( + adev, AMDGPU_REG_STATE_TYPE_USR_1, buf, count); + break; + default: + return -EINVAL; + } + + return bytes_read; +} + +BIN_ATTR(reg_state, 0444, amdgpu_sysfs_reg_state_get, NULL, + AMDGPU_SYS_REG_STATE_END); + +int amdgpu_reg_state_sysfs_init(struct amdgpu_device *adev) +{ + int ret; + + if (!amdgpu_asic_get_reg_state_supported(adev)) + return 0; + + ret = sysfs_create_bin_file(&adev->dev->kobj, &bin_attr_reg_state); + + return ret; +} + +void amdgpu_reg_state_sysfs_fini(struct amdgpu_device *adev) +{ + if (!amdgpu_asic_get_reg_state_supported(adev)) + return; + sysfs_remove_bin_file(&adev->dev->kobj, &bin_attr_reg_state); +} + /** * DOC: board_info * @@ -273,16 +338,93 @@ bool amdgpu_device_supports_boco(struct drm_device *dev) * * @dev: drm_device pointer * - * Returns true if the device supporte BACO, - * otherwise return false. + * Return: + * 1 if the device supporte BACO; + * 3 if the device support MACO (only works if BACO is supported) + * otherwise return 0. */ -bool amdgpu_device_supports_baco(struct drm_device *dev) +int amdgpu_device_supports_baco(struct drm_device *dev) { struct amdgpu_device *adev = drm_to_adev(dev); return amdgpu_asic_supports_baco(adev); } +void amdgpu_device_detect_runtime_pm_mode(struct amdgpu_device *adev) +{ + struct drm_device *dev; + int bamaco_support; + + dev = adev_to_drm(adev); + + adev->pm.rpm_mode = AMDGPU_RUNPM_NONE; + bamaco_support = amdgpu_device_supports_baco(dev); + + switch (amdgpu_runtime_pm) { + case 2: + if (bamaco_support & MACO_SUPPORT) { + adev->pm.rpm_mode = AMDGPU_RUNPM_BAMACO; + dev_info(adev->dev, "Forcing BAMACO for runtime pm\n"); + } else if (bamaco_support == BACO_SUPPORT) { + adev->pm.rpm_mode = AMDGPU_RUNPM_BACO; + dev_info(adev->dev, "Requested mode BAMACO not available,fallback to use BACO\n"); + } + break; + case 1: + if (bamaco_support & BACO_SUPPORT) { + adev->pm.rpm_mode = AMDGPU_RUNPM_BACO; + dev_info(adev->dev, "Forcing BACO for runtime pm\n"); + } + break; + case -1: + case -2: + if (amdgpu_device_supports_px(dev)) { /* enable PX as runtime mode */ + adev->pm.rpm_mode = AMDGPU_RUNPM_PX; + dev_info(adev->dev, "Using ATPX for runtime pm\n"); + } else if (amdgpu_device_supports_boco(dev)) { /* enable boco as runtime mode */ + adev->pm.rpm_mode = AMDGPU_RUNPM_BOCO; + dev_info(adev->dev, "Using BOCO for runtime pm\n"); + } else { + if (!bamaco_support) + goto no_runtime_pm; + + switch (adev->asic_type) { + case CHIP_VEGA20: + case CHIP_ARCTURUS: + /* BACO are not supported on vega20 and arctrus */ + break; + case CHIP_VEGA10: + /* enable BACO as runpm mode if noretry=0 */ + if (!adev->gmc.noretry) + adev->pm.rpm_mode = AMDGPU_RUNPM_BACO; + break; + default: + /* enable BACO as runpm mode on CI+ */ + adev->pm.rpm_mode = AMDGPU_RUNPM_BACO; + break; + } + + if (adev->pm.rpm_mode == AMDGPU_RUNPM_BACO) { + if (bamaco_support & MACO_SUPPORT) { + adev->pm.rpm_mode = AMDGPU_RUNPM_BAMACO; + dev_info(adev->dev, "Using BAMACO for runtime pm\n"); + } else { + dev_info(adev->dev, "Using BACO for runtime pm\n"); + } + } + } + break; + case 0: + dev_info(adev->dev, "runtime pm is manually disabled\n"); + break; + default: + break; + } + +no_runtime_pm: + if (adev->pm.rpm_mode == AMDGPU_RUNPM_NONE) + dev_info(adev->dev, "Runtime PM not available\n"); +} /** * amdgpu_device_supports_smart_shift - Is the device dGPU with * smart shift support @@ -473,7 +615,7 @@ uint32_t amdgpu_device_rreg(struct amdgpu_device *adev, if (!(acc_flags & AMDGPU_REGS_NO_KIQ) && amdgpu_sriov_runtime(adev) && down_read_trylock(&adev->reset_domain->sem)) { - ret = amdgpu_kiq_rreg(adev, reg); + ret = amdgpu_kiq_rreg(adev, reg, 0); up_read(&adev->reset_domain->sem); } else { ret = readl(((void __iomem *)adev->rmmio) + (reg * 4)); @@ -510,6 +652,49 @@ uint8_t amdgpu_mm_rreg8(struct amdgpu_device *adev, uint32_t offset) BUG(); } + +/** + * amdgpu_device_xcc_rreg - read a memory mapped IO or indirect register with specific XCC + * + * @adev: amdgpu_device pointer + * @reg: dword aligned register offset + * @acc_flags: access flags which require special behavior + * @xcc_id: xcc accelerated compute core id + * + * Returns the 32 bit value from the offset specified. + */ +uint32_t amdgpu_device_xcc_rreg(struct amdgpu_device *adev, + uint32_t reg, uint32_t acc_flags, + uint32_t xcc_id) +{ + uint32_t ret, rlcg_flag; + + if (amdgpu_device_skip_hw_access(adev)) + return 0; + + if ((reg * 4) < adev->rmmio_size) { + if (amdgpu_sriov_vf(adev) && + !amdgpu_sriov_runtime(adev) && + adev->gfx.rlc.rlcg_reg_access_supported && + amdgpu_virt_get_rlcg_reg_access_flag(adev, acc_flags, + GC_HWIP, false, + &rlcg_flag)) { + ret = amdgpu_virt_rlcg_reg_rw(adev, reg, 0, rlcg_flag, xcc_id); + } else if (!(acc_flags & AMDGPU_REGS_NO_KIQ) && + amdgpu_sriov_runtime(adev) && + down_read_trylock(&adev->reset_domain->sem)) { + ret = amdgpu_kiq_rreg(adev, reg, xcc_id); + up_read(&adev->reset_domain->sem); + } else { + ret = readl(((void __iomem *)adev->rmmio) + (reg * 4)); + } + } else { + ret = adev->pcie_rreg(adev, reg * 4); + } + + return ret; +} + /* * MMIO register write with bytes helper functions * @offset:bytes offset from MMIO start @@ -557,7 +742,7 @@ void amdgpu_device_wreg(struct amdgpu_device *adev, if (!(acc_flags & AMDGPU_REGS_NO_KIQ) && amdgpu_sriov_runtime(adev) && down_read_trylock(&adev->reset_domain->sem)) { - amdgpu_kiq_wreg(adev, reg, v); + amdgpu_kiq_wreg(adev, reg, v, 0); up_read(&adev->reset_domain->sem); } else { writel(v, ((void __iomem *)adev->rmmio) + (reg * 4)); @@ -599,6 +784,47 @@ void amdgpu_mm_wreg_mmio_rlc(struct amdgpu_device *adev, } /** + * amdgpu_device_xcc_wreg - write to a memory mapped IO or indirect register with specific XCC + * + * @adev: amdgpu_device pointer + * @reg: dword aligned register offset + * @v: 32 bit value to write to the register + * @acc_flags: access flags which require special behavior + * @xcc_id: xcc accelerated compute core id + * + * Writes the value specified to the offset specified. + */ +void amdgpu_device_xcc_wreg(struct amdgpu_device *adev, + uint32_t reg, uint32_t v, + uint32_t acc_flags, uint32_t xcc_id) +{ + uint32_t rlcg_flag; + + if (amdgpu_device_skip_hw_access(adev)) + return; + + if ((reg * 4) < adev->rmmio_size) { + if (amdgpu_sriov_vf(adev) && + !amdgpu_sriov_runtime(adev) && + adev->gfx.rlc.rlcg_reg_access_supported && + amdgpu_virt_get_rlcg_reg_access_flag(adev, acc_flags, + GC_HWIP, true, + &rlcg_flag)) { + amdgpu_virt_rlcg_reg_rw(adev, reg, v, rlcg_flag, xcc_id); + } else if (!(acc_flags & AMDGPU_REGS_NO_KIQ) && + amdgpu_sriov_runtime(adev) && + down_read_trylock(&adev->reset_domain->sem)) { + amdgpu_kiq_wreg(adev, reg, v, xcc_id); + up_read(&adev->reset_domain->sem); + } else { + writel(v, ((void __iomem *)adev->rmmio) + (reg * 4)); + } + } else { + adev->pcie_wreg(adev, reg * 4, v); + } +} + +/** * amdgpu_device_indirect_rreg - read an indirect register * * @adev: amdgpu_device pointer @@ -638,12 +864,22 @@ u32 amdgpu_device_indirect_rreg_ext(struct amdgpu_device *adev, void __iomem *pcie_index_hi_offset; void __iomem *pcie_data_offset; - pcie_index = adev->nbio.funcs->get_pcie_index_offset(adev); - pcie_data = adev->nbio.funcs->get_pcie_data_offset(adev); - if ((reg_addr >> 32) && (adev->nbio.funcs->get_pcie_index_hi_offset)) - pcie_index_hi = adev->nbio.funcs->get_pcie_index_hi_offset(adev); - else + if (unlikely(!adev->nbio.funcs)) { + pcie_index = AMDGPU_PCIE_INDEX_FALLBACK; + pcie_data = AMDGPU_PCIE_DATA_FALLBACK; + } else { + pcie_index = adev->nbio.funcs->get_pcie_index_offset(adev); + pcie_data = adev->nbio.funcs->get_pcie_data_offset(adev); + } + + if (reg_addr >> 32) { + if (unlikely(!adev->nbio.funcs)) + pcie_index_hi = AMDGPU_PCIE_INDEX_HI_FALLBACK; + else + pcie_index_hi = adev->nbio.funcs->get_pcie_index_hi_offset(adev); + } else { pcie_index_hi = 0; + } spin_lock_irqsave(&adev->pcie_idx_lock, flags); pcie_index_offset = (void __iomem *)adev->rmmio + pcie_index * 4; @@ -1297,6 +1533,10 @@ int amdgpu_device_resize_fb_bar(struct amdgpu_device *adev) if (amdgpu_sriov_vf(adev)) return 0; + /* PCI_EXT_CAP_ID_VNDR extended capability is located at 0x100 */ + if (!pci_find_ext_capability(adev->pdev, PCI_EXT_CAP_ID_VNDR)) + DRM_WARN("System can't access extended configuration space, please check!!\n"); + /* skip if the bios has already enabled large BAR */ if (adev->gmc.real_vram_size && (pci_resource_len(adev->pdev, 0) >= adev->gmc.real_vram_size)) @@ -1399,6 +1639,7 @@ bool amdgpu_device_need_post(struct amdgpu_device *adev) return true; fw_ver = *((uint32_t *)adev->pm.fw->data + 69); + release_firmware(adev->pm.fw); if (fw_ver < 0x00160e00) return true; } @@ -1454,22 +1695,26 @@ bool amdgpu_device_seamless_boot_supported(struct amdgpu_device *adev) if (adev->mman.keep_stolen_vga_memory) return false; - return adev->ip_versions[DCE_HWIP][0] >= IP_VERSION(3, 0, 0); + return amdgpu_ip_version(adev, DCE_HWIP, 0) >= IP_VERSION(3, 0, 0); } /* - * Intel hosts such as Raptor Lake and Sapphire Rapids don't support dynamic - * speed switching. Until we have confirmation from Intel that a specific host - * supports it, it's safer that we keep it disabled for all. + * Intel hosts such as Rocket Lake, Alder Lake, Raptor Lake and Sapphire Rapids + * don't support dynamic speed switching. Until we have confirmation from Intel + * that a specific host supports it, it's safer that we keep it disabled for all. * * https://edc.intel.com/content/www/us/en/design/products/platforms/details/raptor-lake-s/13th-generation-core-processors-datasheet-volume-1-of-2/005/pci-express-support/ * https://gitlab.freedesktop.org/drm/amd/-/issues/2663 */ -bool amdgpu_device_pcie_dynamic_switching_supported(void) +static bool amdgpu_device_pcie_dynamic_switching_supported(struct amdgpu_device *adev) { #if IS_ENABLED(CONFIG_X86) struct cpuinfo_x86 *c = &cpu_data(0); + /* eGPU change speeds based on USB4 fabric conditions */ + if (dev_is_removable(adev->dev)) + return true; + if (c->x86_vendor == X86_VENDOR_INTEL) return false; #endif @@ -1498,20 +1743,13 @@ bool amdgpu_device_should_use_aspm(struct amdgpu_device *adev) default: return false; } + if (adev->flags & AMD_IS_APU) + return false; + if (!(adev->pm.pp_feature & PP_PCIE_DPM_MASK)) + return false; return pcie_aspm_enabled(adev->pdev); } -bool amdgpu_device_aspm_support_quirk(void) -{ -#if IS_ENABLED(CONFIG_X86) - struct cpuinfo_x86 *c = &cpu_data(0); - - return !(c->x86 == 6 && c->x86_model == INTEL_FAM6_ALDERLAKE); -#else - return true; -#endif -} - /* if we get transitioned to only one device, take VGA back */ /** * amdgpu_device_vga_set_decode - enable/disable vga decode @@ -2109,15 +2347,8 @@ static int amdgpu_device_parse_gpu_info_fw(struct amdgpu_device *adev) adev->firmware.gpu_info_fw = NULL; - if (adev->mman.discovery_bin) { - /* - * FIXME: The bounding box is still needed by Navi12, so - * temporarily read it from gpu_info firmware. Should be dropped - * when DAL no longer needs it. - */ - if (adev->asic_type != CHIP_NAVI12) - return 0; - } + if (adev->mman.discovery_bin) + return 0; switch (adev->asic_type) { default: @@ -2232,7 +2463,6 @@ out: */ static int amdgpu_device_ip_early_init(struct amdgpu_device *adev) { - struct drm_device *dev = adev_to_drm(adev); struct pci_dev *parent; int i, r; bool total; @@ -2303,7 +2533,7 @@ static int amdgpu_device_ip_early_init(struct amdgpu_device *adev) (amdgpu_is_atpx_hybrid() || amdgpu_has_atpx_dgpu_power_cntl()) && ((adev->flags & AMD_IS_APU) == 0) && - !pci_is_thunderbolt_attached(to_pci_dev(dev->dev))) + !dev_is_removable(&adev->pdev->dev)) adev->flags |= AMD_IS_PX; if (!(adev->flags & AMD_IS_APU)) { @@ -2317,6 +2547,8 @@ static int amdgpu_device_ip_early_init(struct amdgpu_device *adev) adev->pm.pp_feature &= ~PP_GFXOFF_MASK; if (amdgpu_sriov_vf(adev) && adev->asic_type == CHIP_SIENNA_CICHLID) adev->pm.pp_feature &= ~PP_OVERDRIVE_MASK; + if (!amdgpu_device_pcie_dynamic_switching_supported(adev)) + adev->pm.pp_feature &= ~PP_PCIE_DPM_MASK; total = true; for (i = 0; i < adev->num_ip_blocks; i++) { @@ -2493,7 +2725,8 @@ static int amdgpu_device_init_schedulers(struct amdgpu_device *adev) break; } - r = drm_sched_init(&ring->sched, &amdgpu_sched_ops, + r = drm_sched_init(&ring->sched, &amdgpu_sched_ops, NULL, + DRM_SCHED_PRIORITY_COUNT, ring->num_hw_submission, 0, timeout, adev->reset_domain->wq, ring->sched_score, ring->name, @@ -2503,6 +2736,18 @@ static int amdgpu_device_init_schedulers(struct amdgpu_device *adev) ring->name); return r; } + r = amdgpu_uvd_entity_init(adev, ring); + if (r) { + DRM_ERROR("Failed to create UVD scheduling entity on ring %s.\n", + ring->name); + return r; + } + r = amdgpu_vce_entity_init(adev, ring); + if (r) { + DRM_ERROR("Failed to create VCE scheduling entity on ring %s.\n", + ring->name); + return r; + } } amdgpu_xcp_update_partition_sched_list(adev); @@ -2583,6 +2828,12 @@ static int amdgpu_device_ip_init(struct amdgpu_device *adev) goto init_failed; } } + + r = amdgpu_seq64_init(adev); + if (r) { + DRM_ERROR("allocate seq64 failed %d\n", r); + goto init_failed; + } } } @@ -2663,6 +2914,9 @@ static int amdgpu_device_ip_init(struct amdgpu_device *adev) if (r) goto init_failed; + if (adev->mman.buffer_funcs_ring->sched.ready) + amdgpu_ttm_set_buffer_funcs_status(adev, true); + /* Don't init kfd if whole hive need to be reset during init */ if (!adev->gmc.xgmi.pending_reset) { kgd2kfd_init_zone_device(adev); @@ -3042,6 +3296,7 @@ static int amdgpu_device_ip_fini(struct amdgpu_device *adev) amdgpu_device_wb_fini(adev); amdgpu_device_mem_scratch_fini(adev); amdgpu_ib_pool_fini(adev); + amdgpu_seq64_fini(adev); } r = adev->ip_blocks[i].version->funcs->sw_fini((void *)adev); @@ -3260,6 +3515,8 @@ int amdgpu_device_ip_suspend(struct amdgpu_device *adev) amdgpu_virt_request_full_gpu(adev, false); } + amdgpu_ttm_set_buffer_funcs_status(adev, false); + r = amdgpu_device_ip_suspend_phase1(adev); if (r) return r; @@ -3449,6 +3706,9 @@ static int amdgpu_device_ip_resume(struct amdgpu_device *adev) r = amdgpu_device_ip_resume_phase2(adev); + if (adev->mman.buffer_funcs_ring->sched.ready) + amdgpu_ttm_set_buffer_funcs_status(adev, true); + return r; } @@ -3578,9 +3838,7 @@ static void amdgpu_device_xgmi_reset_func(struct work_struct *__work) if (adev->asic_reset_res) goto fail; - if (adev->mmhub.ras && adev->mmhub.ras->ras_block.hw_ops && - adev->mmhub.ras->ras_block.hw_ops->reset_ras_error_count) - adev->mmhub.ras->ras_block.hw_ops->reset_ras_error_count(adev); + amdgpu_ras_reset_error_count(adev, AMDGPU_RAS_BLOCK__MMHUB); } else { task_barrier_full(&hive->tb); @@ -3692,10 +3950,6 @@ static void amdgpu_device_set_mcbp(struct amdgpu_device *adev) adev->gfx.mcbp = true; else if (amdgpu_mcbp == 0) adev->gfx.mcbp = false; - else if ((amdgpu_ip_version(adev, GC_HWIP, 0) >= IP_VERSION(9, 0, 0)) && - (amdgpu_ip_version(adev, GC_HWIP, 0) < IP_VERSION(10, 0, 0)) && - adev->gfx.num_gfx_rings) - adev->gfx.mcbp = true; if (amdgpu_sriov_vf(adev)) adev->gfx.mcbp = true; @@ -3880,13 +4134,13 @@ int amdgpu_device_init(struct amdgpu_device *adev, return r; } + amdgpu_device_set_mcbp(adev); + /* early init functions */ r = amdgpu_device_ip_early_init(adev); if (r) return r; - amdgpu_device_set_mcbp(adev); - /* Get rid of things like offb */ r = drm_aperture_remove_conflicting_pci_framebuffers(adev->pdev, &amdgpu_kms_driver); if (r) @@ -3895,6 +4149,13 @@ int amdgpu_device_init(struct amdgpu_device *adev, /* Enable TMZ based on IP_VERSION */ amdgpu_gmc_tmz_set(adev); + if (amdgpu_sriov_vf(adev) && + amdgpu_ip_version(adev, GC_HWIP, 0) >= IP_VERSION(10, 3, 0)) + /* VF MMIO access (except mailbox range) from CPU + * will be blocked during sriov runtime + */ + adev->virt.caps |= AMDGPU_VF_MMIO_ACCESS_PROTECT; + amdgpu_gmc_noretry_set(adev); /* Need to get xgmi info early to decide the reset behavior*/ if (adev->gmc.xgmi.supported) { @@ -3961,18 +4222,22 @@ int amdgpu_device_init(struct amdgpu_device *adev, adev->ip_blocks[i].status.hw = true; } } + } else if (amdgpu_ip_version(adev, MP1_HWIP, 0) == IP_VERSION(13, 0, 10) && + !amdgpu_device_has_display_hardware(adev)) { + r = psp_gpu_reset(adev); } else { - tmp = amdgpu_reset_method; - /* It should do a default reset when loading or reloading the driver, - * regardless of the module parameter reset_method. - */ - amdgpu_reset_method = AMD_RESET_METHOD_NONE; - r = amdgpu_asic_reset(adev); - amdgpu_reset_method = tmp; - if (r) { - dev_err(adev->dev, "asic reset on init failed\n"); - goto failed; - } + tmp = amdgpu_reset_method; + /* It should do a default reset when loading or reloading the driver, + * regardless of the module parameter reset_method. + */ + amdgpu_reset_method = AMD_RESET_METHOD_NONE; + r = amdgpu_asic_reset(adev); + amdgpu_reset_method = tmp; + } + + if (r) { + dev_err(adev->dev, "asic reset on init failed\n"); + goto failed; } } @@ -4113,6 +4378,7 @@ fence_driver_init: "Could not create amdgpu board attributes\n"); amdgpu_fru_sysfs_init(adev); + amdgpu_reg_state_sysfs_init(adev); if (IS_ENABLED(CONFIG_PERF_EVENTS)) r = amdgpu_pmu_init(adev); @@ -4132,7 +4398,7 @@ fence_driver_init: px = amdgpu_device_supports_px(ddev); - if (px || (!pci_is_thunderbolt_attached(adev->pdev) && + if (px || (!dev_is_removable(&adev->pdev->dev) && apple_gmux_detect(NULL, NULL))) vga_switcheroo_register_client(adev->pdev, &amdgpu_switcheroo_ops, px); @@ -4235,9 +4501,13 @@ void amdgpu_device_fini_hw(struct amdgpu_device *adev) sysfs_remove_files(&adev->dev->kobj, amdgpu_dev_attributes); amdgpu_fru_sysfs_fini(adev); + amdgpu_reg_state_sysfs_fini(adev); + /* disable ras feature must before hw fini */ amdgpu_ras_pre_fini(adev); + amdgpu_ttm_set_buffer_funcs_status(adev, false); + amdgpu_device_ip_fini_early(adev); amdgpu_irq_fini_hw(adev); @@ -4280,7 +4550,7 @@ void amdgpu_device_fini_sw(struct amdgpu_device *adev) px = amdgpu_device_supports_px(adev_to_drm(adev)); - if (px || (!pci_is_thunderbolt_attached(adev->pdev) && + if (px || (!dev_is_removable(&adev->pdev->dev) && apple_gmux_detect(NULL, NULL))) vga_switcheroo_unregister_client(adev->pdev); @@ -4350,13 +4620,17 @@ int amdgpu_device_prepare(struct drm_device *dev) struct amdgpu_device *adev = drm_to_adev(dev); int i, r; + amdgpu_choose_low_power_state(adev); + if (dev->switch_power_state == DRM_SWITCH_POWER_OFF) return 0; /* Evict the majority of BOs before starting suspend sequence */ r = amdgpu_device_evict_resources(adev); if (r) - return r; + goto unprepare; + + flush_delayed_work(&adev->gfx.gfx_off_delay_work); for (i = 0; i < adev->num_ip_blocks; i++) { if (!adev->ip_blocks[i].status.valid) @@ -4365,10 +4639,15 @@ int amdgpu_device_prepare(struct drm_device *dev) continue; r = adev->ip_blocks[i].version->funcs->prepare_suspend((void *)adev); if (r) - return r; + goto unprepare; } return 0; + +unprepare: + adev->in_s0ix = adev->in_s3 = false; + + return r; } /** @@ -4405,7 +4684,6 @@ int amdgpu_device_suspend(struct drm_device *dev, bool fbcon) drm_fb_helper_set_suspend_unlocked(adev_to_drm(adev)->fb_helper, true); cancel_delayed_work_sync(&adev->delayed_init_work); - flush_delayed_work(&adev->gfx.gfx_off_delay_work); amdgpu_ras_suspend(adev); @@ -4418,6 +4696,8 @@ int amdgpu_device_suspend(struct drm_device *dev, bool fbcon) if (r) return r; + amdgpu_ttm_set_buffer_funcs_status(adev, false); + amdgpu_fence_driver_hw_fini(adev); amdgpu_device_ip_suspend_phase2(adev); @@ -4425,6 +4705,10 @@ int amdgpu_device_suspend(struct drm_device *dev, bool fbcon) if (amdgpu_sriov_vf(adev)) amdgpu_virt_release_full_gpu(adev, false); + r = amdgpu_dpm_notify_rlc_state(adev, false); + if (r) + return r; + return 0; } @@ -4470,19 +4754,18 @@ int amdgpu_device_resume(struct drm_device *dev, bool fbcon) } amdgpu_fence_driver_hw_init(adev); - r = amdgpu_device_ip_late_init(adev); - if (r) - goto exit; - - queue_delayed_work(system_wq, &adev->delayed_init_work, - msecs_to_jiffies(AMDGPU_RESUME_MS)); - if (!adev->in_s0ix) { r = amdgpu_amdkfd_resume(adev, adev->in_runpm); if (r) goto exit; } + r = amdgpu_device_ip_late_init(adev); + if (r) + goto exit; + + queue_delayed_work(system_wq, &adev->delayed_init_work, + msecs_to_jiffies(AMDGPU_RESUME_MS)); exit: if (amdgpu_sriov_vf(adev)) { amdgpu_virt_init_data_exchange(adev); @@ -4778,12 +5061,15 @@ static int amdgpu_device_reset_sriov(struct amdgpu_device *adev, retry: amdgpu_amdkfd_pre_reset(adev); + amdgpu_device_stop_pending_resets(adev); + if (from_hypervisor) r = amdgpu_virt_request_full_gpu(adev, true); else r = amdgpu_virt_reset_gpu(adev); if (r) return r; + amdgpu_ras_set_fed(adev, false); amdgpu_irq_gpu_reset_resume_helper(adev); /* some sw clean up VF needs to do before recover */ @@ -4852,7 +5138,7 @@ bool amdgpu_device_has_job_running(struct amdgpu_device *adev) for (i = 0; i < AMDGPU_MAX_RINGS; ++i) { struct amdgpu_ring *ring = adev->rings[i]; - if (!ring || !ring->sched.thread) + if (!amdgpu_ring_sched_ready(ring)) continue; spin_lock(&ring->sched.job_list_lock); @@ -4991,7 +5277,7 @@ int amdgpu_device_pre_asic_reset(struct amdgpu_device *adev, for (i = 0; i < AMDGPU_MAX_RINGS; ++i) { struct amdgpu_ring *ring = adev->rings[i]; - if (!ring || !ring->sched.thread) + if (!amdgpu_ring_sched_ready(ring)) continue; /* Clear job fence from fence drv to avoid force_completion @@ -5050,90 +5336,16 @@ static int amdgpu_reset_reg_dumps(struct amdgpu_device *adev) lockdep_assert_held(&adev->reset_domain->sem); - for (i = 0; i < adev->num_regs; i++) { - adev->reset_dump_reg_value[i] = RREG32(adev->reset_dump_reg_list[i]); - trace_amdgpu_reset_reg_dumps(adev->reset_dump_reg_list[i], - adev->reset_dump_reg_value[i]); - } + for (i = 0; i < adev->reset_info.num_regs; i++) { + adev->reset_info.reset_dump_reg_value[i] = + RREG32(adev->reset_info.reset_dump_reg_list[i]); - return 0; -} - -#ifndef CONFIG_DEV_COREDUMP -static void amdgpu_coredump(struct amdgpu_device *adev, bool vram_lost, - struct amdgpu_reset_context *reset_context) -{ -} -#else -static ssize_t amdgpu_devcoredump_read(char *buffer, loff_t offset, - size_t count, void *data, size_t datalen) -{ - struct drm_printer p; - struct amdgpu_coredump_info *coredump = data; - struct drm_print_iterator iter; - int i; - - iter.data = buffer; - iter.offset = 0; - iter.start = offset; - iter.remain = count; - - p = drm_coredump_printer(&iter); - - drm_printf(&p, "**** AMDGPU Device Coredump ****\n"); - drm_printf(&p, "kernel: " UTS_RELEASE "\n"); - drm_printf(&p, "module: " KBUILD_MODNAME "\n"); - drm_printf(&p, "time: %lld.%09ld\n", coredump->reset_time.tv_sec, coredump->reset_time.tv_nsec); - if (coredump->reset_task_info.pid) - drm_printf(&p, "process_name: %s PID: %d\n", - coredump->reset_task_info.process_name, - coredump->reset_task_info.pid); - - if (coredump->reset_vram_lost) - drm_printf(&p, "VRAM is lost due to GPU reset!\n"); - if (coredump->adev->num_regs) { - drm_printf(&p, "AMDGPU register dumps:\nOffset: Value:\n"); - - for (i = 0; i < coredump->adev->num_regs; i++) - drm_printf(&p, "0x%08x: 0x%08x\n", - coredump->adev->reset_dump_reg_list[i], - coredump->adev->reset_dump_reg_value[i]); - } - - return count - iter.remain; -} - -static void amdgpu_devcoredump_free(void *data) -{ - kfree(data); -} - -static void amdgpu_coredump(struct amdgpu_device *adev, bool vram_lost, - struct amdgpu_reset_context *reset_context) -{ - struct amdgpu_coredump_info *coredump; - struct drm_device *dev = adev_to_drm(adev); - - coredump = kzalloc(sizeof(*coredump), GFP_NOWAIT); - - if (!coredump) { - DRM_ERROR("%s: failed to allocate memory for coredump\n", __func__); - return; + trace_amdgpu_reset_reg_dumps(adev->reset_info.reset_dump_reg_list[i], + adev->reset_info.reset_dump_reg_value[i]); } - coredump->reset_vram_lost = vram_lost; - - if (reset_context->job && reset_context->job->vm) - coredump->reset_task_info = reset_context->job->vm->task_info; - - coredump->adev = adev; - - ktime_get_ts64(&coredump->reset_time); - - dev_coredumpm(dev->dev, THIS_MODULE, coredump, 0, GFP_NOWAIT, - amdgpu_devcoredump_read, amdgpu_devcoredump_free); + return 0; } -#endif int amdgpu_do_asic_reset(struct list_head *device_list_handle, struct amdgpu_reset_context *reset_context) @@ -5141,12 +5353,13 @@ int amdgpu_do_asic_reset(struct list_head *device_list_handle, struct amdgpu_device *tmp_adev = NULL; bool need_full_reset, skip_hw_reset, vram_lost = false; int r = 0; - bool gpu_reset_for_dev_remove = 0; /* Try reset handler method first */ tmp_adev = list_first_entry(device_list_handle, struct amdgpu_device, reset_list); - amdgpu_reset_reg_dumps(tmp_adev); + + if (!test_bit(AMDGPU_SKIP_COREDUMP, &reset_context->flags)) + amdgpu_reset_reg_dumps(tmp_adev); reset_context->reset_device_list = device_list_handle; r = amdgpu_reset_perform_reset(tmp_adev, reset_context); @@ -5161,10 +5374,6 @@ int amdgpu_do_asic_reset(struct list_head *device_list_handle, test_bit(AMDGPU_NEED_FULL_RESET, &reset_context->flags); skip_hw_reset = test_bit(AMDGPU_SKIP_HW_RESET, &reset_context->flags); - gpu_reset_for_dev_remove = - test_bit(AMDGPU_RESET_FOR_DEVICE_REMOVE, &reset_context->flags) && - test_bit(AMDGPU_NEED_FULL_RESET, &reset_context->flags); - /* * ASIC reset has to be done on all XGMI hive nodes ASAP * to allow proper links negotiation in FW (within 1 sec) @@ -5201,29 +5410,16 @@ int amdgpu_do_asic_reset(struct list_head *device_list_handle, if (!r && amdgpu_ras_intr_triggered()) { list_for_each_entry(tmp_adev, device_list_handle, reset_list) { - if (tmp_adev->mmhub.ras && tmp_adev->mmhub.ras->ras_block.hw_ops && - tmp_adev->mmhub.ras->ras_block.hw_ops->reset_ras_error_count) - tmp_adev->mmhub.ras->ras_block.hw_ops->reset_ras_error_count(tmp_adev); + amdgpu_ras_reset_error_count(tmp_adev, AMDGPU_RAS_BLOCK__MMHUB); } amdgpu_ras_intr_cleared(); } - /* Since the mode1 reset affects base ip blocks, the - * phase1 ip blocks need to be resumed. Otherwise there - * will be a BIOS signature error and the psp bootloader - * can't load kdb on the next amdgpu install. - */ - if (gpu_reset_for_dev_remove) { - list_for_each_entry(tmp_adev, device_list_handle, reset_list) - amdgpu_device_ip_resume_phase1(tmp_adev); - - goto end; - } - list_for_each_entry(tmp_adev, device_list_handle, reset_list) { if (need_full_reset) { /* post card */ + amdgpu_ras_set_fed(tmp_adev, false); r = amdgpu_device_asic_init(tmp_adev); if (r) { dev_warn(tmp_adev->dev, "asic atom init failed!"); @@ -5236,7 +5432,8 @@ int amdgpu_do_asic_reset(struct list_head *device_list_handle, vram_lost = amdgpu_device_check_vram_lost(tmp_adev); - amdgpu_coredump(tmp_adev, vram_lost, reset_context); + if (!test_bit(AMDGPU_SKIP_COREDUMP, &reset_context->flags)) + amdgpu_coredump(tmp_adev, vram_lost, reset_context); if (vram_lost) { DRM_INFO("VRAM is lost due to GPU reset!\n"); @@ -5256,6 +5453,9 @@ int amdgpu_do_asic_reset(struct list_head *device_list_handle, if (r) goto out; + if (tmp_adev->mman.buffer_funcs_ring->sched.ready) + amdgpu_ttm_set_buffer_funcs_status(tmp_adev, true); + if (vram_lost) amdgpu_device_fill_reset_magic(tmp_adev); @@ -5431,6 +5631,23 @@ static inline void amdgpu_device_stop_pending_resets(struct amdgpu_device *adev) } +static int amdgpu_device_health_check(struct list_head *device_list_handle) +{ + struct amdgpu_device *tmp_adev; + int ret = 0; + u32 status; + + list_for_each_entry(tmp_adev, device_list_handle, reset_list) { + pci_read_config_dword(tmp_adev->pdev, PCI_COMMAND, &status); + if (PCI_POSSIBLE_ERROR(status)) { + dev_err(tmp_adev->dev, "device lost from bus!"); + ret = -ENODEV; + } + } + + return ret; +} + /** * amdgpu_device_gpu_recover - reset the asic and recover scheduler * @@ -5454,11 +5671,6 @@ int amdgpu_device_gpu_recover(struct amdgpu_device *adev, int i, r = 0; bool need_emergency_restart = false; bool audio_suspended = false; - bool gpu_reset_for_dev_remove = false; - - gpu_reset_for_dev_remove = - test_bit(AMDGPU_RESET_FOR_DEVICE_REMOVE, &reset_context->flags) && - test_bit(AMDGPU_NEED_FULL_RESET, &reset_context->flags); /* * Special case: RAS triggered and full reset isn't supported @@ -5496,7 +5708,7 @@ int amdgpu_device_gpu_recover(struct amdgpu_device *adev, if (!amdgpu_sriov_vf(adev) && (adev->gmc.xgmi.num_physical_nodes > 1)) { list_for_each_entry(tmp_adev, &hive->device_list, gmc.xgmi.head) { list_add_tail(&tmp_adev->reset_list, &device_list); - if (gpu_reset_for_dev_remove && adev->shutdown) + if (adev->shutdown) tmp_adev->shutdown = true; } if (!list_is_first(&adev->reset_list, &device_list)) @@ -5507,6 +5719,12 @@ int amdgpu_device_gpu_recover(struct amdgpu_device *adev, device_list_handle = &device_list; } + if (!amdgpu_sriov_vf(adev)) { + r = amdgpu_device_health_check(device_list_handle); + if (r) + goto end_reset; + } + /* We need to lock reset domain only once both for XGMI and single device */ tmp_adev = list_first_entry(device_list_handle, struct amdgpu_device, reset_list); @@ -5553,7 +5771,7 @@ int amdgpu_device_gpu_recover(struct amdgpu_device *adev, for (i = 0; i < AMDGPU_MAX_RINGS; ++i) { struct amdgpu_ring *ring = tmp_adev->rings[i]; - if (!ring || !ring->sched.thread) + if (!amdgpu_ring_sched_ready(ring)) continue; drm_sched_stop(&ring->sched, job ? &job->base : NULL); @@ -5581,10 +5799,6 @@ int amdgpu_device_gpu_recover(struct amdgpu_device *adev, retry: /* Rest of adevs pre asic reset from XGMI hive. */ list_for_each_entry(tmp_adev, device_list_handle, reset_list) { - if (gpu_reset_for_dev_remove) { - /* Workaroud for ASICs need to disable SMC first */ - amdgpu_device_smu_fini_early(tmp_adev); - } r = amdgpu_device_pre_asic_reset(tmp_adev, reset_context); /*TODO Should we stop ?*/ if (r) { @@ -5593,11 +5807,12 @@ retry: /* Rest of adevs pre asic reset from XGMI hive. */ tmp_adev->asic_reset_res = r; } - /* - * Drop all pending non scheduler resets. Scheduler resets - * were already dropped during drm_sched_stop - */ - amdgpu_device_stop_pending_resets(tmp_adev); + if (!amdgpu_sriov_vf(tmp_adev)) + /* + * Drop all pending non scheduler resets. Scheduler resets + * were already dropped during drm_sched_stop + */ + amdgpu_device_stop_pending_resets(tmp_adev); } /* Actual ASIC resets if needed.*/ @@ -5610,15 +5825,13 @@ retry: /* Rest of adevs pre asic reset from XGMI hive. */ /* Aldebaran and gfx_11_0_3 support ras in SRIOV, so need resume ras during reset */ if (amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(9, 4, 2) || + amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(9, 4, 3) || amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(11, 0, 3)) amdgpu_ras_resume(adev); } else { r = amdgpu_do_asic_reset(device_list_handle, reset_context); if (r && r == -EAGAIN) goto retry; - - if (!r && gpu_reset_for_dev_remove) - goto recover_end; } skip_hw_reset: @@ -5629,16 +5842,12 @@ skip_hw_reset: for (i = 0; i < AMDGPU_MAX_RINGS; ++i) { struct amdgpu_ring *ring = tmp_adev->rings[i]; - if (!ring || !ring->sched.thread) + if (!amdgpu_ring_sched_ready(ring)) continue; drm_sched_start(&ring->sched, true); } - if (adev->enable_mes && - amdgpu_ip_version(adev, GC_HWIP, 0) != IP_VERSION(11, 0, 3)) - amdgpu_mes_self_test(tmp_adev); - if (!drm_drv_uses_atomic_modeset(adev_to_drm(tmp_adev)) && !job_signaled) drm_helper_resume_force_mode(adev_to_drm(tmp_adev)); @@ -5678,11 +5887,11 @@ skip_sched_resume: amdgpu_ras_set_error_query_ready(tmp_adev, true); } -recover_end: tmp_adev = list_first_entry(device_list_handle, struct amdgpu_device, reset_list); amdgpu_device_unlock_reset_domain(tmp_adev->reset_domain); +end_reset: if (hive) { mutex_unlock(&hive->hive_lock); amdgpu_put_xgmi_hive(hive); @@ -5696,6 +5905,39 @@ recover_end: } /** + * amdgpu_device_partner_bandwidth - find the bandwidth of appropriate partner + * + * @adev: amdgpu_device pointer + * @speed: pointer to the speed of the link + * @width: pointer to the width of the link + * + * Evaluate the hierarchy to find the speed and bandwidth capabilities of the + * first physical partner to an AMD dGPU. + * This will exclude any virtual switches and links. + */ +static void amdgpu_device_partner_bandwidth(struct amdgpu_device *adev, + enum pci_bus_speed *speed, + enum pcie_link_width *width) +{ + struct pci_dev *parent = adev->pdev; + + if (!speed || !width) + return; + + *speed = PCI_SPEED_UNKNOWN; + *width = PCIE_LNK_WIDTH_UNKNOWN; + + while ((parent = pci_upstream_bridge(parent))) { + /* skip upstream/downstream switches internal to dGPU*/ + if (parent->vendor == PCI_VENDOR_ID_ATI) + continue; + *speed = pcie_get_speed_cap(parent); + *width = pcie_get_width_cap(parent); + break; + } +} + +/** * amdgpu_device_get_pcie_info - fence pcie info about the PCIE slot * * @adev: amdgpu_device pointer @@ -5728,8 +5970,8 @@ static void amdgpu_device_get_pcie_info(struct amdgpu_device *adev) if (adev->pm.pcie_gen_mask && adev->pm.pcie_mlw_mask) return; - pcie_bandwidth_available(adev->pdev, NULL, - &platform_speed_cap, &platform_link_width); + amdgpu_device_partner_bandwidth(adev, &platform_speed_cap, + &platform_link_width); if (adev->pm.pcie_gen_mask == 0) { /* asic caps */ @@ -5956,7 +6198,7 @@ pci_ers_result_t amdgpu_pci_error_detected(struct pci_dev *pdev, pci_channel_sta for (i = 0; i < AMDGPU_MAX_RINGS; ++i) { struct amdgpu_ring *ring = adev->rings[i]; - if (!ring || !ring->sched.thread) + if (!amdgpu_ring_sched_ready(ring)) continue; drm_sched_stop(&ring->sched, NULL); @@ -6006,6 +6248,20 @@ pci_ers_result_t amdgpu_pci_slot_reset(struct pci_dev *pdev) struct amdgpu_reset_context reset_context; u32 memsize; struct list_head device_list; + struct amdgpu_hive_info *hive; + int hive_ras_recovery = 0; + struct amdgpu_ras *ras; + + /* PCI error slot reset should be skipped During RAS recovery */ + hive = amdgpu_get_xgmi_hive(adev); + if (hive) { + hive_ras_recovery = atomic_read(&hive->ras_recovery); + amdgpu_put_xgmi_hive(hive); + } + ras = amdgpu_ras_get_context(adev); + if ((amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(9, 4, 3)) && + ras && (atomic_read(&ras->in_recovery) || hive_ras_recovery)) + return PCI_ERS_RESULT_RECOVERED; DRM_INFO("PCI error: slot reset callback!!\n"); @@ -6084,7 +6340,7 @@ void amdgpu_pci_resume(struct pci_dev *pdev) for (i = 0; i < AMDGPU_MAX_RINGS; ++i) { struct amdgpu_ring *ring = adev->rings[i]; - if (!ring || !ring->sched.thread) + if (!amdgpu_ring_sched_ready(ring)) continue; drm_sched_start(&ring->sched, true); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.c index b6cddcad122f..0e31bdb4b7cb 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.c @@ -27,6 +27,7 @@ #include "amdgpu_discovery.h" #include "soc15_hw_ip.h" #include "discovery.h" +#include "amdgpu_ras.h" #include "soc15.h" #include "gfx_v9_0.h" @@ -35,6 +36,7 @@ #include "df_v1_7.h" #include "df_v3_6.h" #include "df_v4_3.h" +#include "df_v4_6_2.h" #include "nbio_v6_1.h" #include "nbio_v7_0.h" #include "nbio_v7_4.h" @@ -59,20 +61,24 @@ #include "nbio_v4_3.h" #include "nbio_v7_2.h" #include "nbio_v7_7.h" +#include "nbif_v6_3_1.h" #include "hdp_v5_0.h" #include "hdp_v5_2.h" #include "hdp_v6_0.h" +#include "hdp_v7_0.h" #include "nv.h" #include "soc21.h" #include "navi10_ih.h" #include "ih_v6_0.h" #include "ih_v6_1.h" +#include "ih_v7_0.h" #include "gfx_v10_0.h" #include "gfx_v11_0.h" #include "sdma_v5_0.h" #include "sdma_v5_2.h" #include "sdma_v6_0.h" #include "lsdma_v6_0.h" +#include "lsdma_v7_0.h" #include "vcn_v2_0.h" #include "jpeg_v2_0.h" #include "vcn_v3_0.h" @@ -91,13 +97,18 @@ #include "smuio_v13_0.h" #include "smuio_v13_0_3.h" #include "smuio_v13_0_6.h" +#include "smuio_v14_0_2.h" +#include "vcn_v5_0_0.h" +#include "jpeg_v5_0_0.h" #include "amdgpu_vpe.h" #define FIRMWARE_IP_DISCOVERY "amdgpu/ip_discovery.bin" MODULE_FIRMWARE(FIRMWARE_IP_DISCOVERY); +#define mmIP_DISCOVERY_VERSION 0x16A00 #define mmRCC_CONFIG_MEMSIZE 0xde3 +#define mmMP0_SMN_C2PMSG_33 0x16061 #define mmMM_INDEX 0x0 #define mmMM_INDEX_HI 0x6 #define mmMM_DATA 0x1 @@ -235,11 +246,32 @@ static int amdgpu_discovery_read_binary_from_sysmem(struct amdgpu_device *adev, return -ENOENT; } +#define IP_DISCOVERY_V2 2 +#define IP_DISCOVERY_V4 4 + static int amdgpu_discovery_read_binary_from_mem(struct amdgpu_device *adev, uint8_t *binary) { - uint64_t vram_size = (uint64_t)RREG32(mmRCC_CONFIG_MEMSIZE) << 20; - int ret = 0; + uint64_t vram_size; + u32 msg; + int i, ret = 0; + + /* It can take up to a second for IFWI init to complete on some dGPUs, + * but generally it should be in the 60-100ms range. Normally this starts + * as soon as the device gets power so by the time the OS loads this has long + * completed. However, when a card is hotplugged via e.g., USB4, we need to + * wait for this to complete. Once the C2PMSG is updated, we can + * continue. + */ + + for (i = 0; i < 1000; i++) { + msg = RREG32(mmMP0_SMN_C2PMSG_33); + if (msg & 0x80000000) + break; + usleep_range(1000, 1100); + } + + vram_size = (uint64_t)RREG32(mmRCC_CONFIG_MEMSIZE) << 20; if (vram_size) { uint64_t pos = vram_size - DISCOVERY_TMR_OFFSET; @@ -498,7 +530,9 @@ static int amdgpu_discovery_init(struct amdgpu_device *adev) out: kfree(adev->mman.discovery_bin); adev->mman.discovery_bin = NULL; - + if ((amdgpu_discovery != 2) && + (RREG32(mmIP_DISCOVERY_VERSION) == 4)) + amdgpu_ras_query_boot_status(adev, 4); return r; } @@ -1258,11 +1292,10 @@ static int amdgpu_discovery_reg_base_init(struct amdgpu_device *adev) * 0b10 : encode is disabled * 0b01 : decode is disabled */ - adev->vcn.vcn_config[adev->vcn.num_vcn_inst] = - ip->revision & 0xc0; - ip->revision &= ~0xc0; if (adev->vcn.num_vcn_inst < AMDGPU_MAX_VCN_INSTANCES) { + adev->vcn.vcn_config[adev->vcn.num_vcn_inst] = + ip->revision & 0xc0; adev->vcn.num_vcn_inst++; adev->vcn.inst_mask |= (1U << ip->instance_number); @@ -1273,6 +1306,7 @@ static int amdgpu_discovery_reg_base_init(struct amdgpu_device *adev) adev->vcn.num_vcn_inst + 1, AMDGPU_MAX_VCN_INSTANCES); } + ip->revision &= ~0xc0; } if (le16_to_cpu(ip->hw_id) == SDMA0_HWID || le16_to_cpu(ip->hw_id) == SDMA1_HWID || @@ -1290,6 +1324,15 @@ static int amdgpu_discovery_reg_base_init(struct amdgpu_device *adev) } } + if (le16_to_cpu(ip->hw_id) == VPE_HWID) { + if (adev->vpe.num_instances < AMDGPU_MAX_VPE_INSTANCES) + adev->vpe.num_instances++; + else + dev_err(adev->dev, "Too many VPE instances: %d vs %d\n", + adev->vpe.num_instances + 1, + AMDGPU_MAX_VPE_INSTANCES); + } + if (le16_to_cpu(ip->hw_id) == UMC_HWID) { adev->gmc.num_umc++; adev->umc.node_inst_num++; @@ -1487,7 +1530,7 @@ static int amdgpu_discovery_get_gfx_info(struct amdgpu_device *adev) adev->gfx.config.num_sc_per_sh = le32_to_cpu(gc_info->v2.gc_num_sc_per_se) / le32_to_cpu(gc_info->v2.gc_num_sh_per_se); adev->gfx.config.num_packer_per_sc = le32_to_cpu(gc_info->v2.gc_num_packer_per_sc); - if (le16_to_cpu(gc_info->v2.header.version_minor == 1)) { + if (le16_to_cpu(gc_info->v2.header.version_minor) == 1) { adev->gfx.config.gc_num_tcp_per_sa = le32_to_cpu(gc_info->v2_1.gc_num_tcp_per_sh); adev->gfx.config.gc_tcp_size_per_cu = le32_to_cpu(gc_info->v2_1.gc_tcp_size_per_cu); adev->gfx.config.gc_num_sdp_interface = le32_to_cpu(gc_info->v2_1.gc_num_sdp_interface); /* per XCD */ @@ -1654,6 +1697,7 @@ static int amdgpu_discovery_set_common_ip_blocks(struct amdgpu_device *adev) case IP_VERSION(11, 0, 3): case IP_VERSION(11, 0, 4): case IP_VERSION(11, 5, 0): + case IP_VERSION(11, 5, 1): amdgpu_device_ip_block_add(adev, &soc21_common_ip_block); break; default: @@ -1701,6 +1745,7 @@ static int amdgpu_discovery_set_gmc_ip_blocks(struct amdgpu_device *adev) case IP_VERSION(11, 0, 3): case IP_VERSION(11, 0, 4): case IP_VERSION(11, 5, 0): + case IP_VERSION(11, 5, 1): amdgpu_device_ip_block_add(adev, &gmc_v11_0_ip_block); break; default: @@ -1743,6 +1788,9 @@ static int amdgpu_discovery_set_ih_ip_blocks(struct amdgpu_device *adev) case IP_VERSION(6, 1, 0): amdgpu_device_ip_block_add(adev, &ih_v6_1_ip_block); break; + case IP_VERSION(7, 0, 0): + amdgpu_device_ip_block_add(adev, &ih_v7_0_ip_block); + break; default: dev_err(adev->dev, "Failed to add ih ip block(OSSSYS_HWIP:0x%x)\n", @@ -1792,11 +1840,16 @@ static int amdgpu_discovery_set_psp_ip_blocks(struct amdgpu_device *adev) case IP_VERSION(13, 0, 10): case IP_VERSION(13, 0, 11): case IP_VERSION(14, 0, 0): + case IP_VERSION(14, 0, 1): amdgpu_device_ip_block_add(adev, &psp_v13_0_ip_block); break; case IP_VERSION(13, 0, 4): amdgpu_device_ip_block_add(adev, &psp_v13_0_4_ip_block); break; + case IP_VERSION(14, 0, 2): + case IP_VERSION(14, 0, 3): + amdgpu_device_ip_block_add(adev, &psp_v14_0_ip_block); + break; default: dev_err(adev->dev, "Failed to add psp ip block(MP0_HWIP:0x%x)\n", @@ -1847,6 +1900,9 @@ static int amdgpu_discovery_set_smu_ip_blocks(struct amdgpu_device *adev) amdgpu_device_ip_block_add(adev, &smu_v13_0_ip_block); break; case IP_VERSION(14, 0, 0): + case IP_VERSION(14, 0, 1): + case IP_VERSION(14, 0, 2): + case IP_VERSION(14, 0, 3): amdgpu_device_ip_block_add(adev, &smu_v14_0_ip_block); break; default: @@ -1897,6 +1953,7 @@ static int amdgpu_discovery_set_display_ip_blocks(struct amdgpu_device *adev) case IP_VERSION(3, 2, 0): case IP_VERSION(3, 2, 1): case IP_VERSION(3, 5, 0): + case IP_VERSION(3, 5, 1): if (amdgpu_sriov_vf(adev)) amdgpu_discovery_set_sriov_display(adev); else @@ -1943,8 +2000,6 @@ static int amdgpu_discovery_set_gc_ip_blocks(struct amdgpu_device *adev) amdgpu_device_ip_block_add(adev, &gfx_v9_0_ip_block); break; case IP_VERSION(9, 4, 3): - if (!amdgpu_exp_hw_support) - return -EINVAL; amdgpu_device_ip_block_add(adev, &gfx_v9_4_3_ip_block); break; case IP_VERSION(10, 1, 10): @@ -1968,6 +2023,7 @@ static int amdgpu_discovery_set_gc_ip_blocks(struct amdgpu_device *adev) case IP_VERSION(11, 0, 3): case IP_VERSION(11, 0, 4): case IP_VERSION(11, 5, 0): + case IP_VERSION(11, 5, 1): amdgpu_device_ip_block_add(adev, &gfx_v11_0_ip_block); break; default: @@ -2015,6 +2071,7 @@ static int amdgpu_discovery_set_sdma_ip_blocks(struct amdgpu_device *adev) case IP_VERSION(6, 0, 2): case IP_VERSION(6, 0, 3): case IP_VERSION(6, 1, 0): + case IP_VERSION(6, 1, 1): amdgpu_device_ip_block_add(adev, &sdma_v6_0_ip_block); break; default: @@ -2101,9 +2158,14 @@ static int amdgpu_discovery_set_mm_ip_blocks(struct amdgpu_device *adev) amdgpu_device_ip_block_add(adev, &jpeg_v4_0_3_ip_block); break; case IP_VERSION(4, 0, 5): + case IP_VERSION(4, 0, 6): amdgpu_device_ip_block_add(adev, &vcn_v4_0_5_ip_block); amdgpu_device_ip_block_add(adev, &jpeg_v4_0_5_ip_block); break; + case IP_VERSION(5, 0, 0): + amdgpu_device_ip_block_add(adev, &vcn_v5_0_0_ip_block); + amdgpu_device_ip_block_add(adev, &jpeg_v5_0_0_ip_block); + break; default: dev_err(adev->dev, "Failed to add vcn/jpeg ip block(UVD_HWIP:0x%x)\n", @@ -2142,6 +2204,7 @@ static int amdgpu_discovery_set_mes_ip_blocks(struct amdgpu_device *adev) case IP_VERSION(11, 0, 3): case IP_VERSION(11, 0, 4): case IP_VERSION(11, 5, 0): + case IP_VERSION(11, 5, 1): amdgpu_device_ip_block_add(adev, &mes_v11_0_ip_block); adev->enable_mes = true; adev->enable_mes_kiq = true; @@ -2167,6 +2230,7 @@ static int amdgpu_discovery_set_vpe_ip_blocks(struct amdgpu_device *adev) { switch (amdgpu_ip_version(adev, VPE_HWIP, 0)) { case IP_VERSION(6, 1, 0): + case IP_VERSION(6, 1, 1): amdgpu_device_ip_block_add(adev, &vpe_v6_1_ip_block); break; default: @@ -2180,6 +2244,7 @@ static int amdgpu_discovery_set_umsch_mm_ip_blocks(struct amdgpu_device *adev) { switch (amdgpu_ip_version(adev, VCN_HWIP, 0)) { case IP_VERSION(4, 0, 5): + case IP_VERSION(4, 0, 6): if (amdgpu_umsch_mm & 0x1) { amdgpu_device_ip_block_add(adev, &umsch_mm_v4_0_ip_block); adev->enable_umsch_mm = true; @@ -2420,6 +2485,7 @@ int amdgpu_discovery_set_ip_blocks(struct amdgpu_device *adev) adev->family = AMDGPU_FAMILY_GC_11_0_1; break; case IP_VERSION(11, 5, 0): + case IP_VERSION(11, 5, 1): adev->family = AMDGPU_FAMILY_GC_11_5_0; break; default: @@ -2439,6 +2505,7 @@ int amdgpu_discovery_set_ip_blocks(struct amdgpu_device *adev) case IP_VERSION(11, 0, 1): case IP_VERSION(11, 0, 4): case IP_VERSION(11, 5, 0): + case IP_VERSION(11, 5, 1): adev->flags |= AMD_IS_APU; break; default: @@ -2448,6 +2515,9 @@ int amdgpu_discovery_set_ip_blocks(struct amdgpu_device *adev) if (amdgpu_ip_version(adev, XGMI_HWIP, 0) == IP_VERSION(4, 8, 0)) adev->gmc.xgmi.supported = true; + if (amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(9, 4, 3)) + adev->ip_versions[XGMI_HWIP][0] = IP_VERSION(6, 4, 0); + /* set NBIO version */ switch (amdgpu_ip_version(adev, NBIO_HWIP, 0)) { case IP_VERSION(6, 1, 0): @@ -2472,6 +2542,7 @@ int amdgpu_discovery_set_ip_blocks(struct amdgpu_device *adev) adev->nbio.hdp_flush_reg = &nbio_v7_9_hdp_flush_reg; break; case IP_VERSION(7, 11, 0): + case IP_VERSION(7, 11, 1): adev->nbio.funcs = &nbio_v7_11_funcs; adev->nbio.hdp_flush_reg = &nbio_v7_11_hdp_flush_reg; break; @@ -2507,6 +2578,10 @@ int amdgpu_discovery_set_ip_blocks(struct amdgpu_device *adev) adev->nbio.funcs = &nbio_v7_7_funcs; adev->nbio.hdp_flush_reg = &nbio_v7_7_hdp_flush_reg; break; + case IP_VERSION(6, 3, 1): + adev->nbio.funcs = &nbif_v6_3_1_funcs; + adev->nbio.hdp_flush_reg = &nbif_v6_3_1_hdp_flush_reg; + break; default: break; } @@ -2539,6 +2614,9 @@ int amdgpu_discovery_set_ip_blocks(struct amdgpu_device *adev) case IP_VERSION(6, 1, 0): adev->hdp.funcs = &hdp_v6_0_funcs; break; + case IP_VERSION(7, 0, 0): + adev->hdp.funcs = &hdp_v7_0_funcs; + break; default: break; } @@ -2559,6 +2637,9 @@ int amdgpu_discovery_set_ip_blocks(struct amdgpu_device *adev) case IP_VERSION(4, 3, 0): adev->df.funcs = &df_v4_3_funcs; break; + case IP_VERSION(4, 6, 2): + adev->df.funcs = &df_v4_6_2_funcs; + break; default: break; } @@ -2600,8 +2681,12 @@ int amdgpu_discovery_set_ip_blocks(struct amdgpu_device *adev) case IP_VERSION(13, 0, 6): case IP_VERSION(13, 0, 8): case IP_VERSION(14, 0, 0): + case IP_VERSION(14, 0, 1): adev->smuio.funcs = &smuio_v13_0_6_funcs; break; + case IP_VERSION(14, 0, 2): + adev->smuio.funcs = &smuio_v14_0_2_funcs; + break; default: break; } @@ -2613,6 +2698,10 @@ int amdgpu_discovery_set_ip_blocks(struct amdgpu_device *adev) case IP_VERSION(6, 0, 3): adev->lsdma.funcs = &lsdma_v6_0_funcs; break; + case IP_VERSION(7, 0, 0): + case IP_VERSION(7, 0, 1): + adev->lsdma.funcs = &lsdma_v7_0_funcs; + break; default: break; } diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_display.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_display.c index 0cacd0b9f8be..3ecc7ef95172 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_display.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_display.c @@ -340,14 +340,11 @@ int amdgpu_display_crtc_set_config(struct drm_mode_set *set, adev->have_disp_power_ref = true; return ret; } - /* if we have no active crtcs, then drop the power ref - * we got before + /* if we have no active crtcs, then go to + * drop the power ref we got before */ - if (!active && adev->have_disp_power_ref) { - pm_runtime_put_autosuspend(dev->dev); + if (!active && adev->have_disp_power_ref) adev->have_disp_power_ref = false; - } - out: /* drop the power reference we got coming in here */ pm_runtime_put_autosuspend(dev->dev); @@ -1353,14 +1350,6 @@ int amdgpu_display_modeset_create_props(struct amdgpu_device *adev) "dither", amdgpu_dither_enum_list, sz); - if (adev->dc_enabled) { - adev->mode_info.abm_level_property = - drm_property_create_range(adev_to_drm(adev), 0, - "abm level", 0, 4); - if (!adev->mode_info.abm_level_property) - return -ENOMEM; - } - return 0; } diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_dma_buf.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_dma_buf.c index b5e28fa3f414..055ba2ea4c12 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_dma_buf.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_dma_buf.c @@ -42,6 +42,7 @@ #include <linux/dma-fence-array.h> #include <linux/pci-p2pdma.h> #include <linux/pm_runtime.h> +#include "amdgpu_trace.h" /** * amdgpu_dma_buf_attach - &dma_buf_ops.attach implementation @@ -63,6 +64,7 @@ static int amdgpu_dma_buf_attach(struct dma_buf *dmabuf, attach->peer2peer = false; r = pm_runtime_get_sync(adev_to_drm(adev)->dev); + trace_amdgpu_runpm_reference_dumps(1, __func__); if (r < 0) goto out; @@ -70,6 +72,7 @@ static int amdgpu_dma_buf_attach(struct dma_buf *dmabuf, out: pm_runtime_put_autosuspend(adev_to_drm(adev)->dev); + trace_amdgpu_runpm_reference_dumps(0, __func__); return r; } @@ -90,6 +93,7 @@ static void amdgpu_dma_buf_detach(struct dma_buf *dmabuf, pm_runtime_mark_last_busy(adev_to_drm(adev)->dev); pm_runtime_put_autosuspend(adev_to_drm(adev)->dev); + trace_amdgpu_runpm_reference_dumps(0, __func__); } /** @@ -373,6 +377,10 @@ amdgpu_dma_buf_move_notify(struct dma_buf_attachment *attach) struct amdgpu_vm_bo_base *bo_base; int r; + /* FIXME: This should be after the "if", but needs a fix to make sure + * DMABuf imports are initialized in the right VM list. + */ + amdgpu_vm_bo_invalidate(adev, bo, false); if (!bo->tbo.resource || bo->tbo.resource->mem_type == TTM_PL_SYSTEM) return; @@ -409,7 +417,7 @@ amdgpu_dma_buf_move_notify(struct dma_buf_attachment *attach) if (!r) r = amdgpu_vm_clear_freed(adev, vm, NULL); if (!r) - r = amdgpu_vm_handle_moved(adev, vm); + r = amdgpu_vm_handle_moved(adev, vm, ticket); if (r && r != -EBUSY) DRM_ERROR("Failed to invalidate VM page tables (%d))\n", diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c index 6cc6e3991410..c512f70b8272 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c @@ -115,9 +115,10 @@ * 3.54.0 - Add AMDGPU_CTX_QUERY2_FLAGS_RESET_IN_PROGRESS support * - 3.55.0 - Add AMDGPU_INFO_GPUVM_FAULT query * - 3.56.0 - Update IB start address and size alignment for decode and encode + * - 3.57.0 - Compute tunneling on GFX10+ */ #define KMS_DRIVER_MAJOR 3 -#define KMS_DRIVER_MINOR 56 +#define KMS_DRIVER_MINOR 57 #define KMS_DRIVER_PATCHLEVEL 0 /* @@ -127,6 +128,7 @@ enum AMDGPU_DEBUG_MASK { AMDGPU_DEBUG_VM = BIT(0), AMDGPU_DEBUG_LARGEBAR = BIT(1), AMDGPU_DEBUG_DISABLE_GPU_SOFT_RECOVERY = BIT(2), + AMDGPU_DEBUG_USE_VRAM_FW_BUF = BIT(3), }; unsigned int amdgpu_vram_limit = UINT_MAX; @@ -193,10 +195,12 @@ int amdgpu_async_gfx_ring = 1; int amdgpu_mcbp = -1; int amdgpu_discovery = -1; int amdgpu_mes; +int amdgpu_mes_log_enable = 0; int amdgpu_mes_kiq; int amdgpu_noretry = -1; int amdgpu_force_asic_type = -1; int amdgpu_tmz = -1; /* auto */ +uint amdgpu_freesync_vid_mode; int amdgpu_reset_method = -1; /* auto */ int amdgpu_num_kcq = -1; int amdgpu_smartshift_bias; @@ -207,6 +211,9 @@ int amdgpu_user_partt_mode = AMDGPU_AUTO_COMPUTE_PARTITION_MODE; int amdgpu_umsch_mm; int amdgpu_seamless = -1; /* auto */ uint amdgpu_debug_mask; +int amdgpu_agp = -1; /* auto */ +int amdgpu_wbrf = -1; +int amdgpu_damage_clips = -1; /* auto */ static void amdgpu_drv_delayed_reset_work_handler(struct work_struct *work); @@ -362,7 +369,7 @@ module_param_named(aspm, amdgpu_aspm, int, 0444); * Setting the value to 0 disables this functionality. * Setting the value to -2 is auto enabled with power down when displays are attached. */ -MODULE_PARM_DESC(runpm, "PX runtime pm (2 = force enable with BAMACO, 1 = force enable with BACO, 0 = disable, -1 = auto, -2 = autowith displays)"); +MODULE_PARM_DESC(runpm, "PX runtime pm (2 = force enable with BAMACO, 1 = force enable with BACO, 0 = disable, -1 = auto, -2 = auto with displays)"); module_param_named(runpm, amdgpu_runtime_pm, int, 0444); /** @@ -589,7 +596,7 @@ module_param_named(timeout_period, amdgpu_watchdog_timer.period, uint, 0644); #ifdef CONFIG_DRM_AMDGPU_SI #if IS_ENABLED(CONFIG_DRM_RADEON) || IS_ENABLED(CONFIG_DRM_RADEON_MODULE) -int amdgpu_si_support = 0; +int amdgpu_si_support; MODULE_PARM_DESC(si_support, "SI support (1 = enabled, 0 = disabled (default))"); #else int amdgpu_si_support = 1; @@ -608,7 +615,7 @@ module_param_named(si_support, amdgpu_si_support, int, 0444); #ifdef CONFIG_DRM_AMDGPU_CIK #if IS_ENABLED(CONFIG_DRM_RADEON) || IS_ENABLED(CONFIG_DRM_RADEON_MODULE) -int amdgpu_cik_support = 0; +int amdgpu_cik_support; MODULE_PARM_DESC(cik_support, "CIK support (1 = enabled, 0 = disabled (default))"); #else int amdgpu_cik_support = 1; @@ -662,6 +669,15 @@ MODULE_PARM_DESC(mes, module_param_named(mes, amdgpu_mes, int, 0444); /** + * DOC: mes_log_enable (int) + * Enable Micro Engine Scheduler log. This is used to enable/disable MES internal log. + * (0 = disabled (default), 1 = enabled) + */ +MODULE_PARM_DESC(mes_log_enable, + "Enable Micro Engine Scheduler log (0 = disabled (default), 1 = enabled)"); +module_param_named(mes_log_enable, amdgpu_mes_log_enable, int, 0444); + +/** * DOC: mes_kiq (int) * Enable Micro Engine Scheduler KIQ. This is a new engine pipe for kiq. * (0 = disabled (default), 1 = enabled) @@ -844,18 +860,31 @@ module_param_named(visualconfirm, amdgpu_dc_visual_confirm, uint, 0444); * the ABM algorithm, with 1 being the least reduction and 4 being the most * reduction. * - * Defaults to 0, or disabled. Userspace can still override this level later - * after boot. + * Defaults to -1, or disabled. Userspace can only override this level after + * boot if it's set to auto. */ -uint amdgpu_dm_abm_level; -MODULE_PARM_DESC(abmlevel, "ABM level (0 = off (default), 1-4 = backlight reduction level) "); -module_param_named(abmlevel, amdgpu_dm_abm_level, uint, 0444); +int amdgpu_dm_abm_level = -1; +MODULE_PARM_DESC(abmlevel, + "ABM level (0 = off, 1-4 = backlight reduction level, -1 auto (default))"); +module_param_named(abmlevel, amdgpu_dm_abm_level, int, 0444); int amdgpu_backlight = -1; MODULE_PARM_DESC(backlight, "Backlight control (0 = pwm, 1 = aux, -1 auto (default))"); module_param_named(backlight, amdgpu_backlight, bint, 0444); /** + * DOC: damageclips (int) + * Enable or disable damage clips support. If damage clips support is disabled, + * we will force full frame updates, irrespective of what user space sends to + * us. + * + * Defaults to -1 (where it is enabled unless a PSR-SU display is detected). + */ +MODULE_PARM_DESC(damageclips, + "Damage clips support (0 = disable, 1 = enable, -1 auto (default))"); +module_param_named(damageclips, amdgpu_damage_clips, int, 0444); + +/** * DOC: tmz (int) * Trusted Memory Zone (TMZ) is a method to protect data being written * to or read from memory. @@ -866,6 +895,32 @@ MODULE_PARM_DESC(tmz, "Enable TMZ feature (-1 = auto (default), 0 = off, 1 = on) module_param_named(tmz, amdgpu_tmz, int, 0444); /** + * DOC: freesync_video (uint) + * Enable the optimization to adjust front porch timing to achieve seamless + * mode change experience when setting a freesync supported mode for which full + * modeset is not needed. + * + * The Display Core will add a set of modes derived from the base FreeSync + * video mode into the corresponding connector's mode list based on commonly + * used refresh rates and VRR range of the connected display, when users enable + * this feature. From the userspace perspective, they can see a seamless mode + * change experience when the change between different refresh rates under the + * same resolution. Additionally, userspace applications such as Video playback + * can read this modeset list and change the refresh rate based on the video + * frame rate. Finally, the userspace can also derive an appropriate mode for a + * particular refresh rate based on the FreeSync Mode and add it to the + * connector's mode list. + * + * Note: This is an experimental feature. + * + * The default value: 0 (off). + */ +MODULE_PARM_DESC( + freesync_video, + "Enable freesync modesetting optimization feature (0 = off (default), 1 = on)"); +module_param_named(freesync_video, amdgpu_freesync_vid_mode, uint, 0444); + +/** * DOC: reset_method (int) * GPU reset method (-1 = auto (default), 0 = legacy, 1 = mode0, 2 = mode1, 3 = mode2, 4 = baco) */ @@ -961,6 +1016,31 @@ module_param_named(seamless, amdgpu_seamless, int, 0444); MODULE_PARM_DESC(debug_mask, "debug options for amdgpu, disabled by default"); module_param_named(debug_mask, amdgpu_debug_mask, uint, 0444); +/** + * DOC: agp (int) + * Enable the AGP aperture. This provides an aperture in the GPU's internal + * address space for direct access to system memory. Note that these accesses + * are non-snooped, so they are only used for access to uncached memory. + */ +MODULE_PARM_DESC(agp, "AGP (-1 = auto (default), 0 = disable, 1 = enable)"); +module_param_named(agp, amdgpu_agp, int, 0444); + +/** + * DOC: wbrf (int) + * Enable Wifi RFI interference mitigation feature. + * Due to electrical and mechanical constraints there may be likely interference of + * relatively high-powered harmonics of the (G-)DDR memory clocks with local radio + * module frequency bands used by Wifi 6/6e/7. To mitigate the possible RFI interference, + * with this feature enabled, PMFW will use either “shadowed P-State” or “P-State” based + * on active list of frequencies in-use (to be avoided) as part of initial setting or + * P-state transition. However, there may be potential performance impact with this + * feature enabled. + * (0 = disabled, 1 = enabled, -1 = auto (default setting, will be enabled if supported)) + */ +MODULE_PARM_DESC(wbrf, + "Enable Wifi RFI interference mitigation (0 = disabled, 1 = enabled, -1 = auto(default)"); +module_param_named(wbrf, amdgpu_wbrf, int, 0444); + /* These devices are not supported by amdgpu. * They are supported by the mach64, r128, radeon drivers */ @@ -2041,6 +2121,14 @@ static const struct pci_device_id pciidlist[] = { MODULE_DEVICE_TABLE(pci, pciidlist); +static const struct amdgpu_asic_type_quirk asic_type_quirks[] = { + /* differentiate between P10 and P11 asics with the same DID */ + {0x67FF, 0xE3, CHIP_POLARIS10}, + {0x67FF, 0xE7, CHIP_POLARIS10}, + {0x67FF, 0xF3, CHIP_POLARIS10}, + {0x67FF, 0xF7, CHIP_POLARIS10}, +}; + static const struct drm_driver amdgpu_kms_driver; static void amdgpu_get_secondary_funcs(struct amdgpu_device *adev) @@ -2081,6 +2169,27 @@ static void amdgpu_init_debug_options(struct amdgpu_device *adev) pr_info("debug: soft reset for GPU recovery disabled\n"); adev->debug_disable_soft_recovery = true; } + + if (amdgpu_debug_mask & AMDGPU_DEBUG_USE_VRAM_FW_BUF) { + pr_info("debug: place fw in vram for frontdoor loading\n"); + adev->debug_use_vram_fw_buf = true; + } +} + +static unsigned long amdgpu_fix_asic_type(struct pci_dev *pdev, unsigned long flags) +{ + int i; + + for (i = 0; i < ARRAY_SIZE(asic_type_quirks); i++) { + if (pdev->device == asic_type_quirks[i].device && + pdev->revision == asic_type_quirks[i].revision) { + flags &= ~AMD_ASIC_MASK; + flags |= asic_type_quirks[i].type; + break; + } + } + + return flags; } static int amdgpu_pci_probe(struct pci_dev *pdev, @@ -2110,15 +2219,8 @@ static int amdgpu_pci_probe(struct pci_dev *pdev, "See modparam exp_hw_support\n"); return -ENODEV; } - /* differentiate between P10 and P11 asics with the same DID */ - if (pdev->device == 0x67FF && - (pdev->revision == 0xE3 || - pdev->revision == 0xE7 || - pdev->revision == 0xF3 || - pdev->revision == 0xF7)) { - flags &= ~AMD_ASIC_MASK; - flags |= CHIP_POLARIS10; - } + + flags = amdgpu_fix_asic_type(pdev, flags); /* Due to hardware bugs, S/G Display on raven requires a 1:1 IOMMU mapping, * however, SME requires an indirect IOMMU mapping because the encryption @@ -2183,6 +2285,8 @@ static int amdgpu_pci_probe(struct pci_dev *pdev, pci_set_drvdata(pdev, ddev); + amdgpu_init_debug_options(adev); + ret = amdgpu_driver_load_kms(adev, flags); if (ret) goto err_pci; @@ -2202,6 +2306,10 @@ retry_init: if (ret) goto err_pci; + ret = amdgpu_amdkfd_drm_client_create(adev); + if (ret) + goto err_pci; + /* * 1. don't init fbdev on hw without DCE * 2. don't init fbdev if there are no connectors @@ -2236,6 +2344,8 @@ retry_init: pm_runtime_mark_last_busy(ddev->dev); pm_runtime_put_autosuspend(ddev->dev); + pci_wake_from_d3(pdev, TRUE); + /* * For runpm implemented via BACO, PMFW will handle the * timing for BACO in and out: @@ -2261,8 +2371,6 @@ retry_init: amdgpu_get_secondary_funcs(adev); } - amdgpu_init_debug_options(adev); - return 0; err_pci: @@ -2284,38 +2392,6 @@ amdgpu_pci_remove(struct pci_dev *pdev) pm_runtime_forbid(dev->dev); } - if (amdgpu_ip_version(adev, MP1_HWIP, 0) == IP_VERSION(13, 0, 2) && - !amdgpu_sriov_vf(adev)) { - bool need_to_reset_gpu = false; - - if (adev->gmc.xgmi.num_physical_nodes > 1) { - struct amdgpu_hive_info *hive; - - hive = amdgpu_get_xgmi_hive(adev); - if (hive->device_remove_count == 0) - need_to_reset_gpu = true; - hive->device_remove_count++; - amdgpu_put_xgmi_hive(hive); - } else { - need_to_reset_gpu = true; - } - - /* Workaround for ASICs need to reset SMU. - * Called only when the first device is removed. - */ - if (need_to_reset_gpu) { - struct amdgpu_reset_context reset_context; - - adev->shutdown = true; - memset(&reset_context, 0, sizeof(reset_context)); - reset_context.method = AMD_RESET_METHOD_NONE; - reset_context.reset_req_dev = adev; - set_bit(AMDGPU_NEED_FULL_RESET, &reset_context.flags); - set_bit(AMDGPU_RESET_FOR_DEVICE_REMOVE, &reset_context.flags); - amdgpu_device_gpu_recover(adev, NULL, &reset_context); - } - } - amdgpu_driver_unload_kms(dev); /* @@ -2405,6 +2481,7 @@ static void amdgpu_drv_delayed_reset_work_handler(struct work_struct *work) /* Use a common context, just need to make sure full reset is done */ set_bit(AMDGPU_SKIP_HW_RESET, &reset_context.flags); + set_bit(AMDGPU_SKIP_COREDUMP, &reset_context.flags); r = amdgpu_do_asic_reset(&device_list, &reset_context); if (r) { @@ -2413,8 +2490,11 @@ static void amdgpu_drv_delayed_reset_work_handler(struct work_struct *work) } for (i = 0; i < mgpu_info.num_dgpu; i++) { adev = mgpu_info.gpu_ins[i].adev; - if (!adev->kfd.init_complete) + if (!adev->kfd.init_complete) { + kgd2kfd_init_zone_device(adev); amdgpu_amdkfd_device_init(adev); + amdgpu_amdkfd_drm_client_create(adev); + } amdgpu_ttm_set_buffer_funcs_status(adev, true); } } @@ -2451,6 +2531,7 @@ static int amdgpu_pmops_suspend(struct device *dev) struct drm_device *drm_dev = dev_get_drvdata(dev); struct amdgpu_device *adev = drm_to_adev(drm_dev); + adev->suspend_complete = false; if (amdgpu_acpi_is_s0ix_active(adev)) adev->in_s0ix = true; else if (amdgpu_acpi_is_s3_active(adev)) @@ -2465,6 +2546,7 @@ static int amdgpu_pmops_suspend_noirq(struct device *dev) struct drm_device *drm_dev = dev_get_drvdata(dev); struct amdgpu_device *adev = drm_to_adev(drm_dev); + adev->suspend_complete = true; if (amdgpu_acpi_should_gpu_reset(adev)) return amdgpu_asic_reset(adev); @@ -2625,7 +2707,7 @@ static int amdgpu_pmops_runtime_suspend(struct device *dev) } adev->in_runpm = true; - if (amdgpu_device_supports_px(drm_dev)) + if (adev->pm.rpm_mode == AMDGPU_RUNPM_PX) drm_dev->switch_power_state = DRM_SWITCH_POWER_CHANGING; /* @@ -2635,7 +2717,7 @@ static int amdgpu_pmops_runtime_suspend(struct device *dev) * platforms. * TODO: this may be also needed for PX capable platform. */ - if (amdgpu_device_supports_boco(drm_dev)) + if (adev->pm.rpm_mode == AMDGPU_RUNPM_BOCO) adev->mp1_state = PP_MP1_STATE_UNLOAD; ret = amdgpu_device_prepare(drm_dev); @@ -2644,15 +2726,15 @@ static int amdgpu_pmops_runtime_suspend(struct device *dev) ret = amdgpu_device_suspend(drm_dev, false); if (ret) { adev->in_runpm = false; - if (amdgpu_device_supports_boco(drm_dev)) + if (adev->pm.rpm_mode == AMDGPU_RUNPM_BOCO) adev->mp1_state = PP_MP1_STATE_NONE; return ret; } - if (amdgpu_device_supports_boco(drm_dev)) + if (adev->pm.rpm_mode == AMDGPU_RUNPM_BOCO) adev->mp1_state = PP_MP1_STATE_NONE; - if (amdgpu_device_supports_px(drm_dev)) { + if (adev->pm.rpm_mode == AMDGPU_RUNPM_PX) { /* Only need to handle PCI state in the driver for ATPX * PCI core handles it for _PR3. */ @@ -2661,9 +2743,10 @@ static int amdgpu_pmops_runtime_suspend(struct device *dev) pci_ignore_hotplug(pdev); pci_set_power_state(pdev, PCI_D3cold); drm_dev->switch_power_state = DRM_SWITCH_POWER_DYNAMIC_OFF; - } else if (amdgpu_device_supports_boco(drm_dev)) { + } else if (adev->pm.rpm_mode == AMDGPU_RUNPM_BOCO) { /* nothing to do */ - } else if (amdgpu_device_supports_baco(drm_dev)) { + } else if ((adev->pm.rpm_mode == AMDGPU_RUNPM_BACO) || + (adev->pm.rpm_mode == AMDGPU_RUNPM_BAMACO)) { amdgpu_device_baco_enter(drm_dev); } @@ -2686,7 +2769,7 @@ static int amdgpu_pmops_runtime_resume(struct device *dev) if (!pci_device_is_present(adev->pdev)) adev->no_hw_access = true; - if (amdgpu_device_supports_px(drm_dev)) { + if (adev->pm.rpm_mode == AMDGPU_RUNPM_PX) { drm_dev->switch_power_state = DRM_SWITCH_POWER_CHANGING; /* Only need to handle PCI state in the driver for ATPX @@ -2698,22 +2781,23 @@ static int amdgpu_pmops_runtime_resume(struct device *dev) if (ret) return ret; pci_set_master(pdev); - } else if (amdgpu_device_supports_boco(drm_dev)) { + } else if (adev->pm.rpm_mode == AMDGPU_RUNPM_BOCO) { /* Only need to handle PCI state in the driver for ATPX * PCI core handles it for _PR3. */ pci_set_master(pdev); - } else if (amdgpu_device_supports_baco(drm_dev)) { + } else if ((adev->pm.rpm_mode == AMDGPU_RUNPM_BACO) || + (adev->pm.rpm_mode == AMDGPU_RUNPM_BAMACO)) { amdgpu_device_baco_exit(drm_dev); } ret = amdgpu_device_resume(drm_dev, false); if (ret) { - if (amdgpu_device_supports_px(drm_dev)) + if (adev->pm.rpm_mode == AMDGPU_RUNPM_PX) pci_disable_device(pdev); return ret; } - if (amdgpu_device_supports_px(drm_dev)) + if (adev->pm.rpm_mode == AMDGPU_RUNPM_PX) drm_dev->switch_power_state = DRM_SWITCH_POWER_ON; adev->in_runpm = false; return 0; @@ -2723,8 +2807,7 @@ static int amdgpu_pmops_runtime_idle(struct device *dev) { struct drm_device *drm_dev = dev_get_drvdata(dev); struct amdgpu_device *adev = drm_to_adev(drm_dev); - /* we don't want the main rpm_idle to call suspend - we want to autosuspend */ - int ret = 1; + int ret; if (adev->pm.rpm_mode == AMDGPU_RUNPM_NONE) { pm_runtime_forbid(dev); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_fdinfo.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_fdinfo.c index cba7e6cdc7cc..c7df7fa3459f 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_fdinfo.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_fdinfo.c @@ -56,21 +56,15 @@ static const char *amdgpu_ip_name[AMDGPU_HW_IP_NUM] = { void amdgpu_show_fdinfo(struct drm_printer *p, struct drm_file *file) { - struct amdgpu_device *adev = drm_to_adev(file->minor->dev); struct amdgpu_fpriv *fpriv = file->driver_priv; struct amdgpu_vm *vm = &fpriv->vm; struct amdgpu_mem_stats stats; ktime_t usage[AMDGPU_HW_IP_NUM]; - uint32_t bus, dev, fn, domain; unsigned int hw_ip; int ret; memset(&stats, 0, sizeof(stats)); - bus = adev->pdev->bus->number; - domain = pci_domain_nr(adev->pdev->bus); - dev = PCI_SLOT(adev->pdev->devfn); - fn = PCI_FUNC(adev->pdev->devfn); ret = amdgpu_bo_reserve(vm->root.bo, false); if (ret) @@ -88,9 +82,6 @@ void amdgpu_show_fdinfo(struct drm_printer *p, struct drm_file *file) */ drm_printf(p, "pasid:\t%u\n", fpriv->vm.pasid); - drm_printf(p, "drm-driver:\t%s\n", file->minor->dev->driver->name); - drm_printf(p, "drm-pdev:\t%04x:%02x:%02x.%d\n", domain, bus, dev, fn); - drm_printf(p, "drm-client-id:\t%llu\n", vm->immediate.fence_context); drm_printf(p, "drm-memory-vram:\t%llu KiB\n", stats.vram/1024UL); drm_printf(p, "drm-memory-gtt: \t%llu KiB\n", stats.gtt/1024UL); drm_printf(p, "drm-memory-cpu: \t%llu KiB\n", stats.cpu/1024UL); @@ -106,6 +97,10 @@ void amdgpu_show_fdinfo(struct drm_printer *p, struct drm_file *file) stats.requested_visible_vram/1024UL); drm_printf(p, "amd-requested-gtt:\t%llu KiB\n", stats.requested_gtt/1024UL); + drm_printf(p, "drm-shared-vram:\t%llu KiB\n", stats.vram_shared/1024UL); + drm_printf(p, "drm-shared-gtt:\t%llu KiB\n", stats.gtt_shared/1024UL); + drm_printf(p, "drm-shared-cpu:\t%llu KiB\n", stats.cpu_shared/1024UL); + for (hw_ip = 0; hw_ip < AMDGPU_HW_IP_NUM; ++hw_ip) { if (!usage[hw_ip]) continue; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c index dc230212746a..10832b470448 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c @@ -61,9 +61,7 @@ static struct kmem_cache *amdgpu_fence_slab; int amdgpu_fence_slab_init(void) { - amdgpu_fence_slab = kmem_cache_create( - "amdgpu_fence", sizeof(struct amdgpu_fence), 0, - SLAB_HWCACHE_ALIGN, NULL); + amdgpu_fence_slab = KMEM_CACHE(amdgpu_fence, SLAB_HWCACHE_ALIGN); if (!amdgpu_fence_slab) return -ENOMEM; return 0; @@ -183,6 +181,7 @@ int amdgpu_fence_emit(struct amdgpu_ring *ring, struct dma_fence **f, struct amd amdgpu_ring_emit_fence(ring, ring->fence_drv.gpu_addr, seq, flags | AMDGPU_FENCE_FLAG_INT); pm_runtime_get_noresume(adev_to_drm(adev)->dev); + trace_amdgpu_runpm_reference_dumps(1, __func__); ptr = &ring->fence_drv.fences[seq & ring->fence_drv.num_fences_mask]; if (unlikely(rcu_dereference_protected(*ptr, 1))) { struct dma_fence *old; @@ -310,6 +309,7 @@ bool amdgpu_fence_process(struct amdgpu_ring *ring) dma_fence_put(fence); pm_runtime_mark_last_busy(adev_to_drm(adev)->dev); pm_runtime_put_autosuspend(adev_to_drm(adev)->dev); + trace_amdgpu_runpm_reference_dumps(0, __func__); } while (last_seq != seq); return true; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gart.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_gart.c index 73b8cca35bab..c623e23049d1 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gart.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gart.c @@ -121,6 +121,7 @@ int amdgpu_gart_table_ram_alloc(struct amdgpu_device *adev) struct amdgpu_bo_param bp; dma_addr_t dma_addr; struct page *p; + unsigned long x; int ret; if (adev->gart.bo != NULL) @@ -130,6 +131,10 @@ int amdgpu_gart_table_ram_alloc(struct amdgpu_device *adev) if (!p) return -ENOMEM; + /* assign pages to this device */ + for (x = 0; x < (1UL << order); x++) + p[x].mapping = adev->mman.bdev.dev_mapping; + /* If the hardware does not support UTCL2 snooping of the CPU caches * then set_memory_wc() could be used as a workaround to mark the pages * as write combine memory. @@ -223,6 +228,7 @@ void amdgpu_gart_table_ram_free(struct amdgpu_device *adev) unsigned int order = get_order(adev->gart.table_size); struct sg_table *sg = adev->gart.bo->tbo.sg; struct page *p; + unsigned long x; int ret; ret = amdgpu_bo_reserve(adev->gart.bo, false); @@ -234,6 +240,8 @@ void amdgpu_gart_table_ram_free(struct amdgpu_device *adev) sg_free_table(sg); kfree(sg); p = virt_to_page(adev->gart.ptr); + for (x = 0; x < (1UL << order); x++) + p[x].mapping = NULL; __free_pages(p, order); adev->gart.ptr = NULL; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c index 84beeaa4d21c..67c234bcf89f 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c @@ -187,7 +187,40 @@ static int amdgpu_gem_object_open(struct drm_gem_object *obj, else ++bo_va->ref_count; amdgpu_bo_unreserve(abo); - return 0; + + /* Validate and add eviction fence to DMABuf imports with dynamic + * attachment in compute VMs. Re-validation will be done by + * amdgpu_vm_validate. Fences are on the reservation shared with the + * export, which is currently required to be validated and fenced + * already by amdgpu_amdkfd_gpuvm_restore_process_bos. + * + * Nested locking below for the case that a GEM object is opened in + * kfd_mem_export_dmabuf. Since the lock below is only taken for imports, + * but not for export, this is a different lock class that cannot lead to + * circular lock dependencies. + */ + if (!vm->is_compute_context || !vm->process_info) + return 0; + if (!obj->import_attach || + !dma_buf_is_dynamic(obj->import_attach->dmabuf)) + return 0; + mutex_lock_nested(&vm->process_info->lock, 1); + if (!WARN_ON(!vm->process_info->eviction_fence)) { + r = amdgpu_amdkfd_bo_validate_and_fence(abo, AMDGPU_GEM_DOMAIN_GTT, + &vm->process_info->eviction_fence->base); + if (r) { + struct amdgpu_task_info *ti = amdgpu_vm_get_task_info_vm(vm); + + dev_warn(adev->dev, "validate_and_fence failed: %d\n", r); + if (ti) { + dev_warn(adev->dev, "pid %d\n", ti->pid); + amdgpu_vm_put_task_info(ti); + } + } + } + mutex_unlock(&vm->process_info->lock); + + return r; } static void amdgpu_gem_object_close(struct drm_gem_object *obj, @@ -203,7 +236,7 @@ static void amdgpu_gem_object_close(struct drm_gem_object *obj, struct drm_exec exec; long r; - drm_exec_init(&exec, DRM_EXEC_IGNORE_DUPLICATES); + drm_exec_init(&exec, DRM_EXEC_IGNORE_DUPLICATES, 0); drm_exec_until_all_locked(&exec) { r = drm_exec_prepare_obj(&exec, &bo->tbo.base, 1); drm_exec_retry_on_contention(&exec); @@ -682,10 +715,10 @@ int amdgpu_gem_va_ioctl(struct drm_device *dev, void *data, uint64_t vm_size; int r = 0; - if (args->va_address < AMDGPU_VA_RESERVED_SIZE) { + if (args->va_address < AMDGPU_VA_RESERVED_BOTTOM) { dev_dbg(dev->dev, "va_address 0x%llx is in reserved area 0x%llx\n", - args->va_address, AMDGPU_VA_RESERVED_SIZE); + args->va_address, AMDGPU_VA_RESERVED_BOTTOM); return -EINVAL; } @@ -701,7 +734,7 @@ int amdgpu_gem_va_ioctl(struct drm_device *dev, void *data, args->va_address &= AMDGPU_GMC_HOLE_MASK; vm_size = adev->vm_manager.max_pfn * AMDGPU_GPU_PAGE_SIZE; - vm_size -= AMDGPU_VA_RESERVED_SIZE; + vm_size -= AMDGPU_VA_RESERVED_TOP; if (args->va_address + args->map_size > vm_size) { dev_dbg(dev->dev, "va_address 0x%llx is in top reserved area 0x%llx\n", @@ -739,7 +772,7 @@ int amdgpu_gem_va_ioctl(struct drm_device *dev, void *data, } drm_exec_init(&exec, DRM_EXEC_INTERRUPTIBLE_WAIT | - DRM_EXEC_IGNORE_DUPLICATES); + DRM_EXEC_IGNORE_DUPLICATES, 0); drm_exec_until_all_locked(&exec) { if (gobj) { r = drm_exec_lock_obj(&exec, gobj); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c index 9a158018ae16..55d5508987ff 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c @@ -29,6 +29,7 @@ #include "amdgpu_rlc.h" #include "amdgpu_ras.h" #include "amdgpu_xcp.h" +#include "amdgpu_xgmi.h" /* delay 0.1 second to enable gfx off feature */ #define GFX_OFF_DELAY_ENABLE msecs_to_jiffies(100) @@ -303,11 +304,11 @@ static int amdgpu_gfx_kiq_acquire(struct amdgpu_device *adev, return -EINVAL; } -int amdgpu_gfx_kiq_init_ring(struct amdgpu_device *adev, - struct amdgpu_ring *ring, - struct amdgpu_irq_src *irq, int xcc_id) +int amdgpu_gfx_kiq_init_ring(struct amdgpu_device *adev, int xcc_id) { struct amdgpu_kiq *kiq = &adev->gfx.kiq[xcc_id]; + struct amdgpu_irq_src *irq = &kiq->irq; + struct amdgpu_ring *ring = &kiq->ring; int r = 0; spin_lock_init(&kiq->ring_lock); @@ -328,7 +329,8 @@ int amdgpu_gfx_kiq_init_ring(struct amdgpu_device *adev, ring->eop_gpu_addr = kiq->eop_gpu_addr; ring->no_scheduler = true; - sprintf(ring->name, "kiq_%d.%d.%d.%d", xcc_id, ring->me, ring->pipe, ring->queue); + snprintf(ring->name, sizeof(ring->name), "kiq_%d.%d.%d.%d", + xcc_id, ring->me, ring->pipe, ring->queue); r = amdgpu_ring_init(adev, ring, 1024, irq, AMDGPU_CP_KIQ_IRQ_DRIVER0, AMDGPU_RING_PRIO_DEFAULT, NULL); if (r) @@ -384,9 +386,11 @@ int amdgpu_gfx_mqd_sw_init(struct amdgpu_device *adev, struct amdgpu_ring *ring = &kiq->ring; u32 domain = AMDGPU_GEM_DOMAIN_GTT; +#if !defined(CONFIG_ARM) && !defined(CONFIG_ARM64) /* Only enable on gfx10 and 11 for now to avoid changing behavior on older chips */ if (amdgpu_ip_version(adev, GC_HWIP, 0) >= IP_VERSION(10, 0, 0)) domain |= AMDGPU_GEM_DOMAIN_VRAM; +#endif /* create MQD for KIQ */ if (!adev->enable_mes_kiq && !ring->mqd_obj) { @@ -501,6 +505,9 @@ int amdgpu_gfx_disable_kcq(struct amdgpu_device *adev, int xcc_id) { struct amdgpu_kiq *kiq = &adev->gfx.kiq[xcc_id]; struct amdgpu_ring *kiq_ring = &kiq->ring; + struct amdgpu_hive_info *hive; + struct amdgpu_ras *ras; + int hive_ras_recovery = 0; int i, r = 0; int j; @@ -521,6 +528,23 @@ int amdgpu_gfx_disable_kcq(struct amdgpu_device *adev, int xcc_id) RESET_QUEUES, 0, 0); } + /** + * This is workaround: only skip kiq_ring test + * during ras recovery in suspend stage for gfx9.4.3 + */ + hive = amdgpu_get_xgmi_hive(adev); + if (hive) { + hive_ras_recovery = atomic_read(&hive->ras_recovery); + amdgpu_put_xgmi_hive(hive); + } + + ras = amdgpu_ras_get_context(adev); + if ((amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(9, 4, 3)) && + ras && (atomic_read(&ras->in_recovery) || hive_ras_recovery)) { + spin_unlock(&kiq->ring_lock); + return 0; + } + if (kiq_ring->sched.ready && !adev->job_hang) r = amdgpu_ring_test_helper(kiq_ring); spin_unlock(&kiq->ring_lock); @@ -619,8 +643,8 @@ int amdgpu_gfx_enable_kcq(struct amdgpu_device *adev, int xcc_id) kiq->pmf->kiq_set_resources(kiq_ring, queue_mask); for (i = 0; i < adev->gfx.num_compute_rings; i++) { j = i + xcc_id * adev->gfx.num_compute_rings; - kiq->pmf->kiq_map_queues(kiq_ring, - &adev->gfx.compute_ring[j]); + kiq->pmf->kiq_map_queues(kiq_ring, + &adev->gfx.compute_ring[j]); } r = amdgpu_ring_test_helper(kiq_ring); @@ -663,7 +687,7 @@ int amdgpu_gfx_enable_kgq(struct amdgpu_device *adev, int xcc_id) r = amdgpu_ring_test_helper(kiq_ring); spin_unlock(&kiq->ring_lock); if (r) - DRM_ERROR("KCQ enable failed\n"); + DRM_ERROR("KGQ enable failed\n"); return r; } @@ -700,8 +724,15 @@ void amdgpu_gfx_off_ctrl(struct amdgpu_device *adev, bool enable) if (adev->gfx.gfx_off_req_count == 0 && !adev->gfx.gfx_off_state) { - schedule_delayed_work(&adev->gfx.gfx_off_delay_work, + /* If going to s2idle, no need to wait */ + if (adev->in_s0ix) { + if (!amdgpu_dpm_set_powergating_by_smu(adev, + AMD_IP_BLOCK_TYPE_GFX, true)) + adev->gfx.gfx_off_state = true; + } else { + schedule_delayed_work(&adev->gfx.gfx_off_delay_work, delay); + } } } else { if (adev->gfx.gfx_off_req_count == 0) { @@ -908,12 +939,12 @@ void amdgpu_gfx_ras_error_func(struct amdgpu_device *adev, func(adev, ras_error_status, i); } -uint32_t amdgpu_kiq_rreg(struct amdgpu_device *adev, uint32_t reg) +uint32_t amdgpu_kiq_rreg(struct amdgpu_device *adev, uint32_t reg, uint32_t xcc_id) { signed long r, cnt = 0; unsigned long flags; uint32_t seq, reg_val_offs = 0, value = 0; - struct amdgpu_kiq *kiq = &adev->gfx.kiq[0]; + struct amdgpu_kiq *kiq = &adev->gfx.kiq[xcc_id]; struct amdgpu_ring *ring = &kiq->ring; if (amdgpu_device_skip_hw_access(adev)) @@ -976,12 +1007,12 @@ failed_kiq_read: return ~0; } -void amdgpu_kiq_wreg(struct amdgpu_device *adev, uint32_t reg, uint32_t v) +void amdgpu_kiq_wreg(struct amdgpu_device *adev, uint32_t reg, uint32_t v, uint32_t xcc_id) { signed long r, cnt = 0; unsigned long flags; uint32_t seq; - struct amdgpu_kiq *kiq = &adev->gfx.kiq[0]; + struct amdgpu_kiq *kiq = &adev->gfx.kiq[xcc_id]; struct amdgpu_ring *ring = &kiq->ring; BUG_ON(!ring->funcs->emit_wreg); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h index 7088c5015675..04a86dff71e6 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h @@ -259,7 +259,6 @@ struct amdgpu_cu_info { struct amdgpu_gfx_ras { struct amdgpu_ras_block_object ras_block; void (*enable_watchdog_timer)(struct amdgpu_device *adev); - bool (*query_utcl2_poison_status)(struct amdgpu_device *adev); int (*rlc_gc_fed_irq)(struct amdgpu_device *adev, struct amdgpu_irq_src *source, struct amdgpu_iv_entry *entry); @@ -471,9 +470,7 @@ static inline u32 amdgpu_gfx_create_bitmask(u32 bit_width) void amdgpu_gfx_parse_disable_cu(unsigned *mask, unsigned max_se, unsigned max_sh); -int amdgpu_gfx_kiq_init_ring(struct amdgpu_device *adev, - struct amdgpu_ring *ring, - struct amdgpu_irq_src *irq, int xcc_id); +int amdgpu_gfx_kiq_init_ring(struct amdgpu_device *adev, int xcc_id); void amdgpu_gfx_kiq_free_ring(struct amdgpu_ring *ring); @@ -521,8 +518,8 @@ int amdgpu_gfx_process_ras_data_cb(struct amdgpu_device *adev, int amdgpu_gfx_cp_ecc_error_irq(struct amdgpu_device *adev, struct amdgpu_irq_src *source, struct amdgpu_iv_entry *entry); -uint32_t amdgpu_kiq_rreg(struct amdgpu_device *adev, uint32_t reg); -void amdgpu_kiq_wreg(struct amdgpu_device *adev, uint32_t reg, uint32_t v); +uint32_t amdgpu_kiq_rreg(struct amdgpu_device *adev, uint32_t reg, uint32_t xcc_id); +void amdgpu_kiq_wreg(struct amdgpu_device *adev, uint32_t reg, uint32_t v, uint32_t xcc_id); int amdgpu_gfx_get_num_kcq(struct amdgpu_device *adev); void amdgpu_gfx_cp_init_microcode(struct amdgpu_device *adev, uint32_t ucode_id); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfxhub.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfxhub.h index c7b44aeb671b..103a837ccc71 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfxhub.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfxhub.h @@ -38,6 +38,8 @@ struct amdgpu_gfxhub_funcs { void (*mode2_save_regs)(struct amdgpu_device *adev); void (*mode2_restore_regs)(struct amdgpu_device *adev); void (*halt)(struct amdgpu_device *adev); + bool (*query_utcl2_poison_status)(struct amdgpu_device *adev, + int xcc_id); }; struct amdgpu_gfxhub { diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c index a02992bff6af..be4629cdac04 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c @@ -52,7 +52,7 @@ int amdgpu_gmc_pdb0_alloc(struct amdgpu_device *adev) struct amdgpu_bo_param bp; u64 vram_size = adev->gmc.xgmi.node_segment_size * adev->gmc.xgmi.num_physical_nodes; uint32_t pde0_page_shift = adev->gmc.vmid0_page_table_block_size + 21; - uint32_t npdes = (vram_size + (1ULL << pde0_page_shift) -1) >> pde0_page_shift; + uint32_t npdes = (vram_size + (1ULL << pde0_page_shift) - 1) >> pde0_page_shift; memset(&bp, 0, sizeof(bp)); bp.size = PAGE_ALIGN((npdes + 1) * 8); @@ -181,6 +181,9 @@ uint64_t amdgpu_gmc_agp_addr(struct ttm_buffer_object *bo) { struct amdgpu_device *adev = amdgpu_ttm_adev(bo->bdev); + if (!bo->ttm) + return AMDGPU_BO_INVALID_OFFSET; + if (bo->ttm->num_pages != 1 || bo->ttm->caching == ttm_cached) return AMDGPU_BO_INVALID_OFFSET; @@ -743,6 +746,59 @@ error_unlock_reset: return r; } +void amdgpu_gmc_fw_reg_write_reg_wait(struct amdgpu_device *adev, + uint32_t reg0, uint32_t reg1, + uint32_t ref, uint32_t mask, + uint32_t xcc_inst) +{ + struct amdgpu_kiq *kiq = &adev->gfx.kiq[xcc_inst]; + struct amdgpu_ring *ring = &kiq->ring; + signed long r, cnt = 0; + unsigned long flags; + uint32_t seq; + + if (adev->mes.ring.sched.ready) { + amdgpu_mes_reg_write_reg_wait(adev, reg0, reg1, + ref, mask); + return; + } + + spin_lock_irqsave(&kiq->ring_lock, flags); + amdgpu_ring_alloc(ring, 32); + amdgpu_ring_emit_reg_write_reg_wait(ring, reg0, reg1, + ref, mask); + r = amdgpu_fence_emit_polling(ring, &seq, MAX_KIQ_REG_WAIT); + if (r) + goto failed_undo; + + amdgpu_ring_commit(ring); + spin_unlock_irqrestore(&kiq->ring_lock, flags); + + r = amdgpu_fence_wait_polling(ring, seq, MAX_KIQ_REG_WAIT); + + /* don't wait anymore for IRQ context */ + if (r < 1 && in_interrupt()) + goto failed_kiq; + + might_sleep(); + while (r < 1 && cnt++ < MAX_KIQ_REG_TRY) { + + msleep(MAX_KIQ_REG_BAILOUT_INTERVAL); + r = amdgpu_fence_wait_polling(ring, seq, MAX_KIQ_REG_WAIT); + } + + if (cnt > MAX_KIQ_REG_TRY) + goto failed_kiq; + + return; + +failed_undo: + amdgpu_ring_undo(ring); + spin_unlock_irqrestore(&kiq->ring_lock, flags); +failed_kiq: + dev_err(adev->dev, "failed to write reg %x wait reg %x\n", reg0, reg1); +} + /** * amdgpu_gmc_tmz_set -- check and set if a device supports TMZ * @adev: amdgpu_device pointer @@ -786,6 +842,8 @@ void amdgpu_gmc_tmz_set(struct amdgpu_device *adev) /* YELLOW_CARP*/ case IP_VERSION(10, 3, 3): case IP_VERSION(11, 0, 4): + case IP_VERSION(11, 5, 0): + case IP_VERSION(11, 5, 1): /* Don't enable it by default yet. */ if (amdgpu_tmz < 1) { @@ -825,7 +883,10 @@ void amdgpu_gmc_noretry_set(struct amdgpu_device *adev) gc_ver == IP_VERSION(9, 4, 3) || gc_ver >= IP_VERSION(10, 3, 0)); - gmc->noretry = (amdgpu_noretry == -1) ? noretry_default : amdgpu_noretry; + if (!amdgpu_sriov_xnack_support(adev)) + gmc->noretry = 1; + else + gmc->noretry = (amdgpu_noretry == -1) ? noretry_default : amdgpu_noretry; } void amdgpu_gmc_set_vm_fault_masks(struct amdgpu_device *adev, int hub_type, @@ -1038,21 +1099,28 @@ int amdgpu_gmc_vram_checking(struct amdgpu_device *adev) * seconds, so here, we just pick up three parts for emulation. */ ret = memcmp(vram_ptr, cptr, 10); - if (ret) - return ret; + if (ret) { + ret = -EIO; + goto release_buffer; + } ret = memcmp(vram_ptr + (size / 2), cptr, 10); - if (ret) - return ret; + if (ret) { + ret = -EIO; + goto release_buffer; + } ret = memcmp(vram_ptr + size - 10, cptr, 10); - if (ret) - return ret; + if (ret) { + ret = -EIO; + goto release_buffer; + } +release_buffer: amdgpu_bo_free_kernel(&vram_bo, &vram_gpu, &vram_ptr); - return 0; + return ret; } static ssize_t current_memory_partition_show( diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.h index e699d1ca8deb..17f40ea1104b 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.h @@ -417,6 +417,10 @@ void amdgpu_gmc_flush_gpu_tlb(struct amdgpu_device *adev, uint32_t vmid, int amdgpu_gmc_flush_gpu_tlb_pasid(struct amdgpu_device *adev, uint16_t pasid, uint32_t flush_type, bool all_hub, uint32_t inst); +void amdgpu_gmc_fw_reg_write_reg_wait(struct amdgpu_device *adev, + uint32_t reg0, uint32_t reg1, + uint32_t ref, uint32_t mask, + uint32_t xcc_inst); extern void amdgpu_gmc_tmz_set(struct amdgpu_device *adev); extern void amdgpu_gmc_noretry_set(struct amdgpu_device *adev); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_hmm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_hmm.c index 081267161d40..431ec72655ec 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_hmm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_hmm.c @@ -129,13 +129,25 @@ static const struct mmu_interval_notifier_ops amdgpu_hmm_hsa_ops = { */ int amdgpu_hmm_register(struct amdgpu_bo *bo, unsigned long addr) { + int r; + if (bo->kfd_bo) - return mmu_interval_notifier_insert(&bo->notifier, current->mm, + r = mmu_interval_notifier_insert(&bo->notifier, current->mm, addr, amdgpu_bo_size(bo), &amdgpu_hmm_hsa_ops); - return mmu_interval_notifier_insert(&bo->notifier, current->mm, addr, - amdgpu_bo_size(bo), - &amdgpu_hmm_gfx_ops); + else + r = mmu_interval_notifier_insert(&bo->notifier, current->mm, addr, + amdgpu_bo_size(bo), + &amdgpu_hmm_gfx_ops); + if (r) + /* + * Make sure amdgpu_hmm_unregister() doesn't call + * mmu_interval_notifier_remove() when the notifier isn't properly + * initialized. + */ + bo->notifier.mm = NULL; + + return r; } /** @@ -190,8 +202,8 @@ int amdgpu_hmm_range_get_pages(struct mmu_interval_notifier *notifier, pr_debug("hmm range: start = 0x%lx, end = 0x%lx", hmm_range->start, hmm_range->end); - /* Assuming 128MB takes maximum 1 second to fault page address */ - timeout = max((hmm_range->end - hmm_range->start) >> 27, 1UL); + /* Assuming 64MB takes maximum 1 second to fault page address */ + timeout = max((hmm_range->end - hmm_range->start) >> 26, 1UL); timeout *= HMM_RANGE_DEFAULT_TIMEOUT; timeout = jiffies + msecs_to_jiffies(timeout); @@ -199,6 +211,7 @@ retry: hmm_range->notifier_seq = mmu_interval_read_begin(notifier); r = hmm_range_fault(hmm_range); if (unlikely(r)) { + schedule(); /* * FIXME: This timeout should encompass the retry from * mmu_interval_read_retry() as well. @@ -212,7 +225,6 @@ retry: break; hmm_range->hmm_pfns += MAX_WALK_BYTE >> PAGE_SHIFT; hmm_range->start = hmm_range->end; - schedule(); } while (hmm_range->end < end); hmm_range->start = start; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_i2c.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_i2c.c index 82608df43396..d79cb13e1aa8 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_i2c.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_i2c.c @@ -175,7 +175,6 @@ struct amdgpu_i2c_chan *amdgpu_i2c_create(struct drm_device *dev, i2c->rec = *rec; i2c->adapter.owner = THIS_MODULE; - i2c->adapter.class = I2C_CLASS_DDC; i2c->adapter.dev.parent = dev->dev; i2c->dev = dev; i2c_set_adapdata(&i2c->adapter, i2c); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c index 6aa3b1d845ab..8b512dc28df8 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c @@ -131,7 +131,6 @@ int amdgpu_ib_schedule(struct amdgpu_ring *ring, unsigned int num_ibs, struct amdgpu_ib *ib = &ibs[0]; struct dma_fence *tmp = NULL; bool need_ctx_switch; - unsigned int patch_offset = ~0; struct amdgpu_vm *vm; uint64_t fence_ctx; uint32_t status = 0, alloc_size; @@ -139,10 +138,11 @@ int amdgpu_ib_schedule(struct amdgpu_ring *ring, unsigned int num_ibs, bool secure, init_shadow; u64 shadow_va, csa_va, gds_va; int vmid = AMDGPU_JOB_GET_VMID(job); + bool need_pipe_sync = false; + unsigned int cond_exec; unsigned int i; int r = 0; - bool need_pipe_sync = false; if (num_ibs == 0) return -EINVAL; @@ -228,7 +228,8 @@ int amdgpu_ib_schedule(struct amdgpu_ring *ring, unsigned int num_ibs, init_shadow, vmid); if (ring->funcs->init_cond_exec) - patch_offset = amdgpu_ring_init_cond_exec(ring); + cond_exec = amdgpu_ring_init_cond_exec(ring, + ring->cond_exe_gpu_addr); amdgpu_device_flush_hdp(adev, ring); @@ -278,16 +279,9 @@ int amdgpu_ib_schedule(struct amdgpu_ring *ring, unsigned int num_ibs, fence_flags | AMDGPU_FENCE_FLAG_64BIT); } - if (ring->funcs->emit_gfx_shadow) { + if (ring->funcs->emit_gfx_shadow && ring->funcs->init_cond_exec) { amdgpu_ring_emit_gfx_shadow(ring, 0, 0, 0, false, 0); - - if (ring->funcs->init_cond_exec) { - unsigned int ce_offset = ~0; - - ce_offset = amdgpu_ring_init_cond_exec(ring); - if (ce_offset != ~0 && ring->funcs->patch_cond_exec) - amdgpu_ring_patch_cond_exec(ring, ce_offset); - } + amdgpu_ring_init_cond_exec(ring, ring->cond_exe_gpu_addr); } r = amdgpu_fence_emit(ring, f, job, fence_flags); @@ -302,8 +296,7 @@ int amdgpu_ib_schedule(struct amdgpu_ring *ring, unsigned int num_ibs, if (ring->funcs->insert_end) ring->funcs->insert_end(ring); - if (patch_offset != ~0 && ring->funcs->patch_cond_exec) - amdgpu_ring_patch_cond_exec(ring, patch_offset); + amdgpu_ring_patch_cond_exec(ring, cond_exec); ring->current_ctx = fence_ctx; if (vm && ring->funcs->emit_switch_buffer) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ids.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ids.c index ddd0891da116..3d7fcdeaf8cf 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ids.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ids.c @@ -62,9 +62,8 @@ int amdgpu_pasid_alloc(unsigned int bits) int pasid = -EINVAL; for (bits = min(bits, 31U); bits > 0; bits--) { - pasid = ida_simple_get(&amdgpu_pasid_ida, - 1U << (bits - 1), 1U << bits, - GFP_KERNEL); + pasid = ida_alloc_range(&amdgpu_pasid_ida, 1U << (bits - 1), + (1U << bits) - 1, GFP_KERNEL); if (pasid != -ENOSPC) break; } @@ -82,7 +81,7 @@ int amdgpu_pasid_alloc(unsigned int bits) void amdgpu_pasid_free(u32 pasid) { trace_amdgpu_pasid_freed(pasid); - ida_simple_remove(&amdgpu_pasid_ida, pasid); + ida_free(&amdgpu_pasid_ida, pasid); } static void amdgpu_pasid_free_cb(struct dma_fence *fence, diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c index 78476bc75b4e..e4742b65032d 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c @@ -35,7 +35,7 @@ static enum drm_gpu_sched_stat amdgpu_job_timedout(struct drm_sched_job *s_job) { struct amdgpu_ring *ring = to_amdgpu_ring(s_job->sched); struct amdgpu_job *job = to_amdgpu_job(s_job); - struct amdgpu_task_info ti; + struct amdgpu_task_info *ti; struct amdgpu_device *adev = ring->adev; int idx; int r; @@ -48,7 +48,7 @@ static enum drm_gpu_sched_stat amdgpu_job_timedout(struct drm_sched_job *s_job) return DRM_GPU_SCHED_STAT_ENODEV; } - memset(&ti, 0, sizeof(struct amdgpu_task_info)); + adev->job_hang = true; if (amdgpu_gpu_recovery && @@ -58,12 +58,16 @@ static enum drm_gpu_sched_stat amdgpu_job_timedout(struct drm_sched_job *s_job) goto exit; } - amdgpu_vm_get_task_info(ring->adev, job->pasid, &ti); DRM_ERROR("ring %s timeout, signaled seq=%u, emitted seq=%u\n", - job->base.sched->name, atomic_read(&ring->fence_drv.last_seq), - ring->fence_drv.sync_seq); - DRM_ERROR("Process information: process %s pid %d thread %s pid %d\n", - ti.process_name, ti.tgid, ti.task_name, ti.pid); + job->base.sched->name, atomic_read(&ring->fence_drv.last_seq), + ring->fence_drv.sync_seq); + + ti = amdgpu_vm_get_task_info_pasid(ring->adev, job->pasid); + if (ti) { + DRM_ERROR("Process information: process %s pid %d thread %s pid %d\n", + ti->process_name, ti->tgid, ti->task_name, ti->pid); + amdgpu_vm_put_task_info(ti); + } dma_fence_set_error(&s_job->s_fence->finished, -ETIME); @@ -115,7 +119,7 @@ int amdgpu_job_alloc(struct amdgpu_device *adev, struct amdgpu_vm *vm, if (!entity) return 0; - return drm_sched_job_init(&(*job)->base, entity, owner); + return drm_sched_job_init(&(*job)->base, entity, 1, owner); } int amdgpu_job_alloc_with_ib(struct amdgpu_device *adev, @@ -300,12 +304,15 @@ static struct dma_fence *amdgpu_job_run(struct drm_sched_job *sched_job) dma_fence_set_error(finished, -ECANCELED); if (finished->error < 0) { - DRM_INFO("Skip scheduling IBs!\n"); + dev_dbg(adev->dev, "Skip scheduling IBs in ring(%s)", + ring->name); } else { r = amdgpu_ib_schedule(ring, job->num_ibs, job->ibs, job, &fence); if (r) - DRM_ERROR("Error scheduling IBs (%d)\n", r); + dev_err(adev->dev, + "Error scheduling IBs (%d) in ring(%s)", r, + ring->name); } job->job_run_counter++; @@ -325,8 +332,8 @@ void amdgpu_job_stop_all_jobs_on_sched(struct drm_gpu_scheduler *sched) int i; /* Signal all jobs not yet scheduled */ - for (i = DRM_SCHED_PRIORITY_COUNT - 1; i >= DRM_SCHED_PRIORITY_MIN; i--) { - struct drm_sched_rq *rq = &sched->sched_rq[i]; + for (i = DRM_SCHED_PRIORITY_KERNEL; i < sched->num_rqs; i++) { + struct drm_sched_rq *rq = sched->sched_rq[i]; spin_lock(&rq->lock); list_for_each_entry(s_entity, &rq->entities, list) { while ((s_job = to_drm_sched_job(spsc_queue_pop(&s_entity->job_queue)))) { diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_jpeg.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_jpeg.c index 2ff2897fd1db..6df99cb00d9a 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_jpeg.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_jpeg.c @@ -36,10 +36,35 @@ static void amdgpu_jpeg_idle_work_handler(struct work_struct *work); int amdgpu_jpeg_sw_init(struct amdgpu_device *adev) { + int i, r; + INIT_DELAYED_WORK(&adev->jpeg.idle_work, amdgpu_jpeg_idle_work_handler); mutex_init(&adev->jpeg.jpeg_pg_lock); atomic_set(&adev->jpeg.total_submission_cnt, 0); + if ((adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) && + (adev->pg_flags & AMD_PG_SUPPORT_JPEG_DPG)) + adev->jpeg.indirect_sram = true; + + for (i = 0; i < adev->jpeg.num_jpeg_inst; i++) { + if (adev->jpeg.harvest_config & (1 << i)) + continue; + + if (adev->jpeg.indirect_sram) { + r = amdgpu_bo_create_kernel(adev, 64 * 2 * 4, PAGE_SIZE, + AMDGPU_GEM_DOMAIN_VRAM | + AMDGPU_GEM_DOMAIN_GTT, + &adev->jpeg.inst[i].dpg_sram_bo, + &adev->jpeg.inst[i].dpg_sram_gpu_addr, + &adev->jpeg.inst[i].dpg_sram_cpu_addr); + if (r) { + dev_err(adev->dev, + "JPEG %d (%d) failed to allocate DPG bo\n", i, r); + return r; + } + } + } + return 0; } @@ -51,6 +76,11 @@ int amdgpu_jpeg_sw_fini(struct amdgpu_device *adev) if (adev->jpeg.harvest_config & (1 << i)) continue; + amdgpu_bo_free_kernel( + &adev->jpeg.inst[i].dpg_sram_bo, + &adev->jpeg.inst[i].dpg_sram_gpu_addr, + (void **)&adev->jpeg.inst[i].dpg_sram_cpu_addr); + for (j = 0; j < adev->jpeg.num_jpeg_rings; ++j) amdgpu_ring_fini(&adev->jpeg.inst[i].ring_dec[j]); } @@ -210,12 +240,15 @@ int amdgpu_jpeg_dec_ring_test_ib(struct amdgpu_ring *ring, long timeout) } else { r = 0; } + if (!amdgpu_sriov_vf(adev)) { for (i = 0; i < adev->usec_timeout; i++) { tmp = RREG32(adev->jpeg.inst[ring->me].external.jpeg_pitch[ring->pipe]); if (tmp == 0xDEADBEEF) break; udelay(1); + if (amdgpu_emu_mode == 1) + udelay(10); } if (i >= adev->usec_timeout) @@ -296,3 +329,16 @@ int amdgpu_jpeg_ras_sw_init(struct amdgpu_device *adev) return 0; } + +int amdgpu_jpeg_psp_update_sram(struct amdgpu_device *adev, int inst_idx, + enum AMDGPU_UCODE_ID ucode_id) +{ + struct amdgpu_firmware_info ucode = { + .ucode_id = AMDGPU_UCODE_ID_JPEG_RAM, + .mc_addr = adev->jpeg.inst[inst_idx].dpg_sram_gpu_addr, + .ucode_size = ((uintptr_t)adev->jpeg.inst[inst_idx].dpg_sram_curr_addr - + (uintptr_t)adev->jpeg.inst[inst_idx].dpg_sram_cpu_addr), + }; + + return psp_execute_ip_fw_load(&adev->psp, &ucode); +} diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_jpeg.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_jpeg.h index ffe47e9f5bf2..aea31d61d991 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_jpeg.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_jpeg.h @@ -32,6 +32,34 @@ #define AMDGPU_JPEG_HARVEST_JPEG0 (1 << 0) #define AMDGPU_JPEG_HARVEST_JPEG1 (1 << 1) +#define WREG32_SOC15_JPEG_DPG_MODE(inst_idx, offset, value, indirect) \ + do { \ + if (!indirect) { \ + WREG32_SOC15(JPEG, GET_INST(JPEG, inst_idx), \ + mmUVD_DPG_LMA_DATA, value); \ + WREG32_SOC15( \ + JPEG, GET_INST(JPEG, inst_idx), \ + mmUVD_DPG_LMA_CTL, \ + (UVD_DPG_LMA_CTL__READ_WRITE_MASK | \ + offset << UVD_DPG_LMA_CTL__READ_WRITE_ADDR__SHIFT | \ + indirect << UVD_DPG_LMA_CTL__SRAM_SEL__SHIFT)); \ + } else { \ + *adev->jpeg.inst[inst_idx].dpg_sram_curr_addr++ = \ + offset; \ + *adev->jpeg.inst[inst_idx].dpg_sram_curr_addr++ = \ + value; \ + } \ + } while (0) + +#define RREG32_SOC15_JPEG_DPG_MODE(inst_idx, offset, mask_en) \ + ({ \ + WREG32_SOC15(JPEG, inst_idx, mmUVD_DPG_LMA_CTL, \ + (0x0 << UVD_DPG_LMA_CTL__READ_WRITE__SHIFT | \ + mask_en << UVD_DPG_LMA_CTL__MASK_EN__SHIFT | \ + offset << UVD_DPG_LMA_CTL__READ_WRITE_ADDR__SHIFT)); \ + RREG32_SOC15(JPEG, inst_idx, mmUVD_DPG_LMA_DATA); \ + }) + struct amdgpu_jpeg_reg{ unsigned jpeg_pitch[AMDGPU_MAX_JPEG_RINGS]; }; @@ -41,6 +69,11 @@ struct amdgpu_jpeg_inst { struct amdgpu_irq_src irq; struct amdgpu_irq_src ras_poison_irq; struct amdgpu_jpeg_reg external; + struct amdgpu_bo *dpg_sram_bo; + struct dpg_pause_state pause_state; + void *dpg_sram_cpu_addr; + uint64_t dpg_sram_gpu_addr; + uint32_t *dpg_sram_curr_addr; uint8_t aid_id; }; @@ -63,6 +96,7 @@ struct amdgpu_jpeg { uint16_t inst_mask; uint8_t num_inst_per_aid; + bool indirect_sram; }; int amdgpu_jpeg_sw_init(struct amdgpu_device *adev); @@ -82,5 +116,7 @@ int amdgpu_jpeg_process_poison_irq(struct amdgpu_device *adev, int amdgpu_jpeg_ras_late_init(struct amdgpu_device *adev, struct ras_common_if *ras_block); int amdgpu_jpeg_ras_sw_init(struct amdgpu_device *adev); +int amdgpu_jpeg_psp_update_sram(struct amdgpu_device *adev, int inst_idx, + enum AMDGPU_UCODE_ID ucode_id); #endif /*__AMDGPU_JPEG_H__*/ diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c index 583cf03950cd..a0ea6fe8d060 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c @@ -149,38 +149,7 @@ int amdgpu_driver_load_kms(struct amdgpu_device *adev, unsigned long flags) goto out; } - adev->pm.rpm_mode = AMDGPU_RUNPM_NONE; - if (amdgpu_device_supports_px(dev) && - (amdgpu_runtime_pm != 0)) { /* enable PX as runtime mode */ - adev->pm.rpm_mode = AMDGPU_RUNPM_PX; - dev_info(adev->dev, "Using ATPX for runtime pm\n"); - } else if (amdgpu_device_supports_boco(dev) && - (amdgpu_runtime_pm != 0)) { /* enable boco as runtime mode */ - adev->pm.rpm_mode = AMDGPU_RUNPM_BOCO; - dev_info(adev->dev, "Using BOCO for runtime pm\n"); - } else if (amdgpu_device_supports_baco(dev) && - (amdgpu_runtime_pm != 0)) { - switch (adev->asic_type) { - case CHIP_VEGA20: - case CHIP_ARCTURUS: - /* enable BACO as runpm mode if runpm=1 */ - if (amdgpu_runtime_pm > 0) - adev->pm.rpm_mode = AMDGPU_RUNPM_BACO; - break; - case CHIP_VEGA10: - /* enable BACO as runpm mode if noretry=0 */ - if (!adev->gmc.noretry) - adev->pm.rpm_mode = AMDGPU_RUNPM_BACO; - break; - default: - /* enable BACO as runpm mode on CI+ */ - adev->pm.rpm_mode = AMDGPU_RUNPM_BACO; - break; - } - - if (adev->pm.rpm_mode == AMDGPU_RUNPM_BACO) - dev_info(adev->dev, "Using BACO for runtime pm\n"); - } + amdgpu_device_detect_runtime_pm_mode(adev); /* Call ACPI methods: require modeset init * but failure is not fatal @@ -894,14 +863,14 @@ int amdgpu_info_ioctl(struct drm_device *dev, void *data, struct drm_file *filp) dev_info->ids_flags |= AMDGPU_IDS_FLAGS_CONFORMANT_TRUNC_COORD; vm_size = adev->vm_manager.max_pfn * AMDGPU_GPU_PAGE_SIZE; - vm_size -= AMDGPU_VA_RESERVED_SIZE; + vm_size -= AMDGPU_VA_RESERVED_TOP; /* Older VCE FW versions are buggy and can handle only 40bits */ if (adev->vce.fw_version && adev->vce.fw_version < AMDGPU_VCE_FW_53_45) vm_size = min(vm_size, 1ULL << 40); - dev_info->virtual_address_offset = AMDGPU_VA_RESERVED_SIZE; + dev_info->virtual_address_offset = AMDGPU_VA_RESERVED_BOTTOM; dev_info->virtual_address_max = min(vm_size, AMDGPU_GMC_HOLE_START); @@ -1105,6 +1074,20 @@ int amdgpu_info_ioctl(struct drm_device *dev, void *data, struct drm_file *filp) if (amdgpu_dpm_read_sensor(adev, AMDGPU_PP_SENSOR_GPU_AVG_POWER, (void *)&ui32, &ui32_size)) { + /* fall back to input power for backwards compat */ + if (amdgpu_dpm_read_sensor(adev, + AMDGPU_PP_SENSOR_GPU_INPUT_POWER, + (void *)&ui32, &ui32_size)) { + return -EINVAL; + } + } + ui32 >>= 8; + break; + case AMDGPU_INFO_SENSOR_GPU_INPUT_POWER: + /* get input GPU power */ + if (amdgpu_dpm_read_sensor(adev, + AMDGPU_PP_SENSOR_GPU_INPUT_POWER, + (void *)&ui32, &ui32_size)) { return -EINVAL; } ui32 >>= 8; @@ -1365,6 +1348,10 @@ int amdgpu_driver_open_kms(struct drm_device *dev, struct drm_file *file_priv) goto error_vm; } + r = amdgpu_seq64_map(adev, &fpriv->vm, &fpriv->seq64_va); + if (r) + goto error_vm; + mutex_init(&fpriv->bo_list_lock); idr_init_base(&fpriv->bo_list_handles, 1); @@ -1428,6 +1415,8 @@ void amdgpu_driver_postclose_kms(struct drm_device *dev, fpriv->csa_va = NULL; } + amdgpu_seq64_unmap(adev, fpriv); + pasid = fpriv->vm.pasid; pd = amdgpu_bo_ref(fpriv->vm.root.bo); if (!WARN_ON(amdgpu_bo_reserve(pd, true))) { diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_mca.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_mca.c index 5a828c175e3a..0734490347db 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_mca.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_mca.c @@ -27,6 +27,16 @@ #include "umc/umc_6_7_0_offset.h" #include "umc/umc_6_7_0_sh_mask.h" +static bool amdgpu_mca_is_deferred_error(struct amdgpu_device *adev, + uint64_t mc_status) +{ + if (adev->umc.ras->check_ecc_err_status) + return adev->umc.ras->check_ecc_err_status(adev, + AMDGPU_MCA_ERROR_TYPE_DE, &mc_status); + + return false; +} + void amdgpu_mca_query_correctable_error_count(struct amdgpu_device *adev, uint64_t mc_status_addr, unsigned long *error_count) @@ -143,6 +153,46 @@ int amdgpu_mca_mpio_ras_sw_init(struct amdgpu_device *adev) return 0; } +void amdgpu_mca_bank_set_init(struct mca_bank_set *mca_set) +{ + if (!mca_set) + return; + + memset(mca_set, 0, sizeof(*mca_set)); + INIT_LIST_HEAD(&mca_set->list); +} + +int amdgpu_mca_bank_set_add_entry(struct mca_bank_set *mca_set, struct mca_bank_entry *entry) +{ + struct mca_bank_node *node; + + if (!entry) + return -EINVAL; + + node = kvzalloc(sizeof(*node), GFP_KERNEL); + if (!node) + return -ENOMEM; + + memcpy(&node->entry, entry, sizeof(*entry)); + + INIT_LIST_HEAD(&node->node); + list_add_tail(&node->node, &mca_set->list); + + mca_set->nr_entries++; + + return 0; +} + +void amdgpu_mca_bank_set_release(struct mca_bank_set *mca_set) +{ + struct mca_bank_node *node, *tmp; + + list_for_each_entry_safe(node, tmp, &mca_set->list, node) { + list_del(&node->node); + kvfree(node); + } +} + void amdgpu_mca_smu_init_funcs(struct amdgpu_device *adev, const struct amdgpu_mca_smu_funcs *mca_funcs) { struct amdgpu_mca *mca = &adev->mca; @@ -160,6 +210,83 @@ int amdgpu_mca_smu_set_debug_mode(struct amdgpu_device *adev, bool enable) return -EOPNOTSUPP; } +static void amdgpu_mca_smu_mca_bank_dump(struct amdgpu_device *adev, int idx, struct mca_bank_entry *entry, + struct ras_query_context *qctx) +{ + u64 event_id = qctx->event_id; + + RAS_EVENT_LOG(adev, event_id, HW_ERR "Accelerator Check Architecture events logged\n"); + RAS_EVENT_LOG(adev, event_id, HW_ERR "aca entry[%02d].STATUS=0x%016llx\n", + idx, entry->regs[MCA_REG_IDX_STATUS]); + RAS_EVENT_LOG(adev, event_id, HW_ERR "aca entry[%02d].ADDR=0x%016llx\n", + idx, entry->regs[MCA_REG_IDX_ADDR]); + RAS_EVENT_LOG(adev, event_id, HW_ERR "aca entry[%02d].MISC0=0x%016llx\n", + idx, entry->regs[MCA_REG_IDX_MISC0]); + RAS_EVENT_LOG(adev, event_id, HW_ERR "aca entry[%02d].IPID=0x%016llx\n", + idx, entry->regs[MCA_REG_IDX_IPID]); + RAS_EVENT_LOG(adev, event_id, HW_ERR "aca entry[%02d].SYND=0x%016llx\n", + idx, entry->regs[MCA_REG_IDX_SYND]); +} + +int amdgpu_mca_smu_log_ras_error(struct amdgpu_device *adev, enum amdgpu_ras_block blk, enum amdgpu_mca_error_type type, + struct ras_err_data *err_data, struct ras_query_context *qctx) +{ + struct amdgpu_smuio_mcm_config_info mcm_info; + struct ras_err_addr err_addr = {0}; + struct mca_bank_set mca_set; + struct mca_bank_node *node; + struct mca_bank_entry *entry; + uint32_t count; + int ret, i = 0; + + amdgpu_mca_bank_set_init(&mca_set); + + ret = amdgpu_mca_smu_get_mca_set(adev, blk, type, &mca_set); + if (ret) + goto out_mca_release; + + list_for_each_entry(node, &mca_set.list, node) { + entry = &node->entry; + + amdgpu_mca_smu_mca_bank_dump(adev, i++, entry, qctx); + + count = 0; + ret = amdgpu_mca_smu_parse_mca_error_count(adev, blk, type, entry, &count); + if (ret) + goto out_mca_release; + + if (!count) + continue; + + mcm_info.socket_id = entry->info.socket_id; + mcm_info.die_id = entry->info.aid; + + if (blk == AMDGPU_RAS_BLOCK__UMC) { + err_addr.err_status = entry->regs[MCA_REG_IDX_STATUS]; + err_addr.err_ipid = entry->regs[MCA_REG_IDX_IPID]; + err_addr.err_addr = entry->regs[MCA_REG_IDX_ADDR]; + } + + if (type == AMDGPU_MCA_ERROR_TYPE_UE) + amdgpu_ras_error_statistic_ue_count(err_data, + &mcm_info, &err_addr, (uint64_t)count); + else { + if (amdgpu_mca_is_deferred_error(adev, entry->regs[MCA_REG_IDX_STATUS])) + amdgpu_ras_error_statistic_de_count(err_data, + &mcm_info, &err_addr, (uint64_t)count); + else + amdgpu_ras_error_statistic_ce_count(err_data, + &mcm_info, &err_addr, (uint64_t)count); + } + } + +out_mca_release: + amdgpu_mca_bank_set_release(&mca_set); + + return ret; +} + + int amdgpu_mca_smu_get_valid_mca_count(struct amdgpu_device *adev, enum amdgpu_mca_error_type type, uint32_t *count) { const struct amdgpu_mca_smu_funcs *mca_funcs = adev->mca.mca_funcs; @@ -173,17 +300,77 @@ int amdgpu_mca_smu_get_valid_mca_count(struct amdgpu_device *adev, enum amdgpu_m return -EOPNOTSUPP; } -int amdgpu_mca_smu_get_error_count(struct amdgpu_device *adev, enum amdgpu_ras_block blk, - enum amdgpu_mca_error_type type, uint32_t *count) +int amdgpu_mca_smu_get_mca_set_error_count(struct amdgpu_device *adev, enum amdgpu_ras_block blk, + enum amdgpu_mca_error_type type, uint32_t *total) { const struct amdgpu_mca_smu_funcs *mca_funcs = adev->mca.mca_funcs; - if (!count) + struct mca_bank_set mca_set; + struct mca_bank_node *node; + struct mca_bank_entry *entry; + uint32_t count; + int ret; + + if (!total) return -EINVAL; - if (mca_funcs && mca_funcs->mca_get_error_count) - return mca_funcs->mca_get_error_count(adev, blk, type, count); + if (!mca_funcs) + return -EOPNOTSUPP; - return -EOPNOTSUPP; + if (!mca_funcs->mca_get_ras_mca_set || !mca_funcs->mca_get_valid_mca_count) + return -EOPNOTSUPP; + + amdgpu_mca_bank_set_init(&mca_set); + + ret = mca_funcs->mca_get_ras_mca_set(adev, blk, type, &mca_set); + if (ret) + goto err_mca_set_release; + + *total = 0; + list_for_each_entry(node, &mca_set.list, node) { + entry = &node->entry; + + count = 0; + ret = mca_funcs->mca_parse_mca_error_count(adev, blk, type, entry, &count); + if (ret) + goto err_mca_set_release; + + *total += count; + } + +err_mca_set_release: + amdgpu_mca_bank_set_release(&mca_set); + + return ret; +} + +int amdgpu_mca_smu_parse_mca_error_count(struct amdgpu_device *adev, enum amdgpu_ras_block blk, + enum amdgpu_mca_error_type type, struct mca_bank_entry *entry, uint32_t *count) +{ + const struct amdgpu_mca_smu_funcs *mca_funcs = adev->mca.mca_funcs; + if (!count || !entry) + return -EINVAL; + + if (!mca_funcs || !mca_funcs->mca_parse_mca_error_count) + return -EOPNOTSUPP; + + + return mca_funcs->mca_parse_mca_error_count(adev, blk, type, entry, count); +} + +int amdgpu_mca_smu_get_mca_set(struct amdgpu_device *adev, enum amdgpu_ras_block blk, + enum amdgpu_mca_error_type type, struct mca_bank_set *mca_set) +{ + const struct amdgpu_mca_smu_funcs *mca_funcs = adev->mca.mca_funcs; + + if (!mca_set) + return -EINVAL; + + if (!mca_funcs || !mca_funcs->mca_get_ras_mca_set) + return -EOPNOTSUPP; + + WARN_ON(!list_empty(&mca_set->list)); + + return mca_funcs->mca_get_ras_mca_set(adev, blk, type, mca_set); } int amdgpu_mca_smu_get_mca_entry(struct amdgpu_device *adev, enum amdgpu_mca_error_type type, @@ -192,6 +379,9 @@ int amdgpu_mca_smu_get_mca_entry(struct amdgpu_device *adev, enum amdgpu_mca_err const struct amdgpu_mca_smu_funcs *mca_funcs = adev->mca.mca_funcs; int count; + if (!mca_funcs || !mca_funcs->mca_get_mca_entry) + return -EOPNOTSUPP; + switch (type) { case AMDGPU_MCA_ERROR_TYPE_UE: count = mca_funcs->max_ue_count; @@ -206,10 +396,7 @@ int amdgpu_mca_smu_get_mca_entry(struct amdgpu_device *adev, enum amdgpu_mca_err if (idx >= count) return -EINVAL; - if (mca_funcs && mca_funcs->mca_get_mca_entry) - return mca_funcs->mca_get_mca_entry(adev, type, idx, entry); - - return -EOPNOTSUPP; + return mca_funcs->mca_get_mca_entry(adev, type, idx, entry); } #if defined(CONFIG_DEBUG_FS) @@ -218,7 +405,7 @@ static int amdgpu_mca_smu_debug_mode_set(void *data, u64 val) struct amdgpu_device *adev = (struct amdgpu_device *)data; int ret; - ret = amdgpu_mca_smu_set_debug_mode(adev, val ? true : false); + ret = amdgpu_ras_set_mca_debug_mode(adev, val ? true : false); if (ret) return ret; @@ -230,14 +417,21 @@ static int amdgpu_mca_smu_debug_mode_set(void *data, u64 val) static void mca_dump_entry(struct seq_file *m, struct mca_bank_entry *entry) { int i, idx = entry->idx; + int reg_idx_array[] = { + MCA_REG_IDX_STATUS, + MCA_REG_IDX_ADDR, + MCA_REG_IDX_MISC0, + MCA_REG_IDX_IPID, + MCA_REG_IDX_SYND, + }; seq_printf(m, "mca entry[%d].type: %s\n", idx, entry->type == AMDGPU_MCA_ERROR_TYPE_UE ? "UE" : "CE"); seq_printf(m, "mca entry[%d].ip: %d\n", idx, entry->ip); seq_printf(m, "mca entry[%d].info: socketid:%d aid:%d hwid:0x%03x mcatype:0x%04x\n", idx, entry->info.socket_id, entry->info.aid, entry->info.hwid, entry->info.mcatype); - for (i = 0; i < ARRAY_SIZE(entry->regs); i++) - seq_printf(m, "mca entry[%d].regs[%d]: 0x%016llx\n", idx, i, entry->regs[i]); + for (i = 0; i < ARRAY_SIZE(reg_idx_array); i++) + seq_printf(m, "mca entry[%d].regs[%d]: 0x%016llx\n", idx, reg_idx_array[i], entry->regs[reg_idx_array[i]]); } static int mca_dump_show(struct seq_file *m, enum amdgpu_mca_error_type type) @@ -319,7 +513,7 @@ DEFINE_DEBUGFS_ATTRIBUTE(mca_debug_mode_fops, NULL, amdgpu_mca_smu_debug_mode_se void amdgpu_mca_smu_debugfs_init(struct amdgpu_device *adev, struct dentry *root) { #if defined(CONFIG_DEBUG_FS) - if (!root || adev->ip_versions[MP1_HWIP][0] != IP_VERSION(13, 0, 6)) + if (!root || amdgpu_ip_version(adev, MP1_HWIP, 0) != IP_VERSION(13, 0, 6)) return; debugfs_create_file("mca_debug_mode", 0200, root, adev, &mca_debug_mode_fops); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_mca.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_mca.h index 28ad463cf5c9..e5bf07ce3451 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_mca.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_mca.h @@ -25,6 +25,31 @@ #define MCA_MAX_REGS_COUNT (16) +#define MCA_REG_FIELD(x, h, l) (((x) & GENMASK_ULL(h, l)) >> l) +#define MCA_REG__STATUS__VAL(x) MCA_REG_FIELD(x, 63, 63) +#define MCA_REG__STATUS__OVERFLOW(x) MCA_REG_FIELD(x, 62, 62) +#define MCA_REG__STATUS__UC(x) MCA_REG_FIELD(x, 61, 61) +#define MCA_REG__STATUS__EN(x) MCA_REG_FIELD(x, 60, 60) +#define MCA_REG__STATUS__MISCV(x) MCA_REG_FIELD(x, 59, 59) +#define MCA_REG__STATUS__ADDRV(x) MCA_REG_FIELD(x, 58, 58) +#define MCA_REG__STATUS__PCC(x) MCA_REG_FIELD(x, 57, 57) +#define MCA_REG__STATUS__ERRCOREIDVAL(x) MCA_REG_FIELD(x, 56, 56) +#define MCA_REG__STATUS__TCC(x) MCA_REG_FIELD(x, 55, 55) +#define MCA_REG__STATUS__SYNDV(x) MCA_REG_FIELD(x, 53, 53) +#define MCA_REG__STATUS__CECC(x) MCA_REG_FIELD(x, 46, 46) +#define MCA_REG__STATUS__UECC(x) MCA_REG_FIELD(x, 45, 45) +#define MCA_REG__STATUS__DEFERRED(x) MCA_REG_FIELD(x, 44, 44) +#define MCA_REG__STATUS__POISON(x) MCA_REG_FIELD(x, 43, 43) +#define MCA_REG__STATUS__SCRUB(x) MCA_REG_FIELD(x, 40, 40) +#define MCA_REG__STATUS__ERRCOREID(x) MCA_REG_FIELD(x, 37, 32) +#define MCA_REG__STATUS__ADDRLSB(x) MCA_REG_FIELD(x, 29, 24) +#define MCA_REG__STATUS__ERRORCODEEXT(x) MCA_REG_FIELD(x, 21, 16) +#define MCA_REG__STATUS__ERRORCODE(x) MCA_REG_FIELD(x, 15, 0) + +#define MCA_REG__MISC0__ERRCNT(x) MCA_REG_FIELD(x, 43, 32) + +#define MCA_REG__SYND__ERRORINFORMATION(x) MCA_REG_FIELD(x, 17, 0) + enum amdgpu_mca_ip { AMDGPU_MCA_IP_UNKNOW = -1, AMDGPU_MCA_IP_PSP = 0, @@ -33,12 +58,14 @@ enum amdgpu_mca_ip { AMDGPU_MCA_IP_SMU, AMDGPU_MCA_IP_MP5, AMDGPU_MCA_IP_UMC, + AMDGPU_MCA_IP_PCS_XGMI, AMDGPU_MCA_IP_COUNT, }; enum amdgpu_mca_error_type { AMDGPU_MCA_ERROR_TYPE_UE = 0, AMDGPU_MCA_ERROR_TYPE_CE, + AMDGPU_MCA_ERROR_TYPE_DE, }; struct amdgpu_mca_ras_block { @@ -57,6 +84,15 @@ struct amdgpu_mca { const struct amdgpu_mca_smu_funcs *mca_funcs; }; +enum mca_reg_idx { + MCA_REG_IDX_STATUS = 1, + MCA_REG_IDX_ADDR = 2, + MCA_REG_IDX_MISC0 = 3, + MCA_REG_IDX_IPID = 5, + MCA_REG_IDX_SYND = 6, + MCA_REG_IDX_COUNT = 16, +}; + struct mca_bank_info { int socket_id; int aid; @@ -72,18 +108,28 @@ struct mca_bank_entry { uint64_t regs[MCA_MAX_REGS_COUNT]; }; +struct mca_bank_node { + struct mca_bank_entry entry; + struct list_head node; +}; + +struct mca_bank_set { + int nr_entries; + struct list_head list; +}; + struct amdgpu_mca_smu_funcs { int max_ue_count; int max_ce_count; int (*mca_set_debug_mode)(struct amdgpu_device *adev, bool enable); - int (*mca_get_error_count)(struct amdgpu_device *adev, enum amdgpu_ras_block blk, - enum amdgpu_mca_error_type type, uint32_t *count); + int (*mca_get_ras_mca_set)(struct amdgpu_device *adev, enum amdgpu_ras_block blk, enum amdgpu_mca_error_type type, + struct mca_bank_set *mca_set); + int (*mca_parse_mca_error_count)(struct amdgpu_device *adev, enum amdgpu_ras_block blk, enum amdgpu_mca_error_type type, + struct mca_bank_entry *entry, uint32_t *count); int (*mca_get_valid_mca_count)(struct amdgpu_device *adev, enum amdgpu_mca_error_type type, uint32_t *count); int (*mca_get_mca_entry)(struct amdgpu_device *adev, enum amdgpu_mca_error_type type, int idx, struct mca_bank_entry *entry); - int (*mca_get_ras_mca_idx_array)(struct amdgpu_device *adev, enum amdgpu_ras_block blk, - enum amdgpu_mca_error_type type, int *idx_array, int *idx_array_size); }; void amdgpu_mca_query_correctable_error_count(struct amdgpu_device *adev, @@ -107,11 +153,23 @@ int amdgpu_mca_mpio_ras_sw_init(struct amdgpu_device *adev); void amdgpu_mca_smu_init_funcs(struct amdgpu_device *adev, const struct amdgpu_mca_smu_funcs *mca_funcs); int amdgpu_mca_smu_set_debug_mode(struct amdgpu_device *adev, bool enable); int amdgpu_mca_smu_get_valid_mca_count(struct amdgpu_device *adev, enum amdgpu_mca_error_type type, uint32_t *count); +int amdgpu_mca_smu_get_mca_set_error_count(struct amdgpu_device *adev, enum amdgpu_ras_block blk, + enum amdgpu_mca_error_type type, uint32_t *total); int amdgpu_mca_smu_get_error_count(struct amdgpu_device *adev, enum amdgpu_ras_block blk, enum amdgpu_mca_error_type type, uint32_t *count); +int amdgpu_mca_smu_parse_mca_error_count(struct amdgpu_device *adev, enum amdgpu_ras_block blk, + enum amdgpu_mca_error_type type, struct mca_bank_entry *entry, uint32_t *count); +int amdgpu_mca_smu_get_mca_set(struct amdgpu_device *adev, enum amdgpu_ras_block blk, + enum amdgpu_mca_error_type type, struct mca_bank_set *mca_set); int amdgpu_mca_smu_get_mca_entry(struct amdgpu_device *adev, enum amdgpu_mca_error_type type, int idx, struct mca_bank_entry *entry); void amdgpu_mca_smu_debugfs_init(struct amdgpu_device *adev, struct dentry *root); +void amdgpu_mca_bank_set_init(struct mca_bank_set *mca_set); +int amdgpu_mca_bank_set_add_entry(struct mca_bank_set *mca_set, struct mca_bank_entry *entry); +void amdgpu_mca_bank_set_release(struct mca_bank_set *mca_set); +int amdgpu_mca_smu_log_ras_error(struct amdgpu_device *adev, enum amdgpu_ras_block blk, enum amdgpu_mca_error_type type, + struct ras_err_data *err_data, struct ras_query_context *qctx); + #endif diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.c index 59f10b353b3a..310c8f0c21da 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.c @@ -40,7 +40,6 @@ int amdgpu_mes_doorbell_process_slice(struct amdgpu_device *adev) } static int amdgpu_mes_kernel_doorbell_get(struct amdgpu_device *adev, - struct amdgpu_mes_process *process, int ip_type, uint64_t *doorbell_index) { unsigned int offset, found; @@ -65,7 +64,6 @@ static int amdgpu_mes_kernel_doorbell_get(struct amdgpu_device *adev, } static void amdgpu_mes_kernel_doorbell_free(struct amdgpu_device *adev, - struct amdgpu_mes_process *process, uint32_t doorbell_index) { unsigned int old, rel_index; @@ -98,6 +96,29 @@ static int amdgpu_mes_doorbell_init(struct amdgpu_device *adev) return 0; } +static int amdgpu_mes_event_log_init(struct amdgpu_device *adev) +{ + int r; + + if (!amdgpu_mes_log_enable) + return 0; + + r = amdgpu_bo_create_kernel(adev, AMDGPU_MES_LOG_BUFFER_SIZE, PAGE_SIZE, + AMDGPU_GEM_DOMAIN_GTT, + &adev->mes.event_log_gpu_obj, + &adev->mes.event_log_gpu_addr, + &adev->mes.event_log_cpu_addr); + if (r) { + dev_warn(adev->dev, "failed to create MES event log buffer (%d)", r); + return r; + } + + memset(adev->mes.event_log_cpu_addr, 0, PAGE_SIZE); + + return 0; + +} + static void amdgpu_mes_doorbell_free(struct amdgpu_device *adev) { bitmap_free(adev->mes.doorbell_bitmap); @@ -182,8 +203,14 @@ int amdgpu_mes_init(struct amdgpu_device *adev) if (r) goto error; + r = amdgpu_mes_event_log_init(adev); + if (r) + goto error_doorbell; + return 0; +error_doorbell: + amdgpu_mes_doorbell_free(adev); error: amdgpu_device_wb_free(adev, adev->mes.sch_ctx_offs); amdgpu_device_wb_free(adev, adev->mes.query_status_fence_offs); @@ -199,6 +226,10 @@ error_ids: void amdgpu_mes_fini(struct amdgpu_device *adev) { + amdgpu_bo_free_kernel(&adev->mes.event_log_gpu_obj, + &adev->mes.event_log_gpu_addr, + &adev->mes.event_log_cpu_addr); + amdgpu_device_wb_free(adev, adev->mes.sch_ctx_offs); amdgpu_device_wb_free(adev, adev->mes.query_status_fence_offs); amdgpu_device_wb_free(adev, adev->mes.read_val_offs); @@ -557,8 +588,20 @@ static void amdgpu_mes_queue_init_mqd(struct amdgpu_device *adev, mqd_prop.hqd_queue_priority = p->hqd_queue_priority; mqd_prop.hqd_active = false; + if (p->queue_type == AMDGPU_RING_TYPE_GFX || + p->queue_type == AMDGPU_RING_TYPE_COMPUTE) { + mutex_lock(&adev->srbm_mutex); + amdgpu_gfx_select_me_pipe_q(adev, p->ring->me, p->ring->pipe, 0, 0, 0); + } + mqd_mgr->init_mqd(adev, q->mqd_cpu_ptr, &mqd_prop); + if (p->queue_type == AMDGPU_RING_TYPE_GFX || + p->queue_type == AMDGPU_RING_TYPE_COMPUTE) { + amdgpu_gfx_select_me_pipe_q(adev, 0, 0, 0, 0, 0); + mutex_unlock(&adev->srbm_mutex); + } + amdgpu_bo_unreserve(q->mqd_obj); } @@ -611,7 +654,7 @@ int amdgpu_mes_add_hw_queue(struct amdgpu_device *adev, int gang_id, *queue_id = queue->queue_id = r; /* allocate a doorbell index for the queue */ - r = amdgpu_mes_kernel_doorbell_get(adev, gang->process, + r = amdgpu_mes_kernel_doorbell_get(adev, qprops->queue_type, &qprops->doorbell_off); if (r) @@ -669,8 +712,7 @@ int amdgpu_mes_add_hw_queue(struct amdgpu_device *adev, int gang_id, return 0; clean_up_doorbell: - amdgpu_mes_kernel_doorbell_free(adev, gang->process, - qprops->doorbell_off); + amdgpu_mes_kernel_doorbell_free(adev, qprops->doorbell_off); clean_up_queue_id: spin_lock_irqsave(&adev->mes.queue_id_lock, flags); idr_remove(&adev->mes.queue_id_idr, queue->queue_id); @@ -724,8 +766,7 @@ int amdgpu_mes_remove_hw_queue(struct amdgpu_device *adev, int queue_id) queue_id); list_del(&queue->list); - amdgpu_mes_kernel_doorbell_free(adev, gang->process, - queue->doorbell_off); + amdgpu_mes_kernel_doorbell_free(adev, queue->doorbell_off); amdgpu_mes_unlock(&adev->mes); amdgpu_mes_queue_free_mqd(queue); @@ -874,6 +915,11 @@ int amdgpu_mes_set_shader_debugger(struct amdgpu_device *adev, op_input.op = MES_MISC_OP_SET_SHADER_DEBUGGER; op_input.set_shader_debugger.process_context_addr = process_context_addr; op_input.set_shader_debugger.flags.u32all = flags; + + /* use amdgpu mes_flush_shader_debugger instead */ + if (op_input.set_shader_debugger.flags.process_ctx_flush) + return -EINVAL; + op_input.set_shader_debugger.spi_gdbg_per_vmid_cntl = spi_gdbg_per_vmid_cntl; memcpy(op_input.set_shader_debugger.tcp_watch_cntl, tcp_watch_cntl, sizeof(op_input.set_shader_debugger.tcp_watch_cntl)); @@ -893,6 +939,32 @@ int amdgpu_mes_set_shader_debugger(struct amdgpu_device *adev, return r; } +int amdgpu_mes_flush_shader_debugger(struct amdgpu_device *adev, + uint64_t process_context_addr) +{ + struct mes_misc_op_input op_input = {0}; + int r; + + if (!adev->mes.funcs->misc_op) { + DRM_ERROR("mes flush shader debugger is not supported!\n"); + return -EINVAL; + } + + op_input.op = MES_MISC_OP_SET_SHADER_DEBUGGER; + op_input.set_shader_debugger.process_context_addr = process_context_addr; + op_input.set_shader_debugger.flags.process_ctx_flush = true; + + amdgpu_mes_lock(&adev->mes); + + r = adev->mes.funcs->misc_op(&adev->mes, &op_input); + if (r) + DRM_ERROR("failed to set_shader_debugger\n"); + + amdgpu_mes_unlock(&adev->mes); + + return r; +} + static void amdgpu_mes_ring_to_queue_props(struct amdgpu_device *adev, struct amdgpu_ring *ring, @@ -994,9 +1066,13 @@ int amdgpu_mes_add_ring(struct amdgpu_device *adev, int gang_id, switch (queue_type) { case AMDGPU_RING_TYPE_GFX: ring->funcs = adev->gfx.gfx_ring[0].funcs; + ring->me = adev->gfx.gfx_ring[0].me; + ring->pipe = adev->gfx.gfx_ring[0].pipe; break; case AMDGPU_RING_TYPE_COMPUTE: ring->funcs = adev->gfx.compute_ring[0].funcs; + ring->me = adev->gfx.compute_ring[0].me; + ring->pipe = adev->gfx.compute_ring[0].pipe; break; case AMDGPU_RING_TYPE_SDMA: ring->funcs = adev->sdma.instance[0].ring.funcs; @@ -1052,6 +1128,7 @@ void amdgpu_mes_remove_ring(struct amdgpu_device *adev, return; amdgpu_mes_remove_hw_queue(adev, ring->hw_queue_id); + del_timer_sync(&ring->fence_drv.fallback_timer); amdgpu_ring_fini(ring); kfree(ring); } @@ -1106,7 +1183,7 @@ int amdgpu_mes_ctx_map_meta_data(struct amdgpu_device *adev, amdgpu_sync_create(&sync); - drm_exec_init(&exec, 0); + drm_exec_init(&exec, 0, 0); drm_exec_until_all_locked(&exec) { r = drm_exec_lock_obj(&exec, &ctx_data->meta_data_obj->tbo.base); @@ -1177,7 +1254,7 @@ int amdgpu_mes_ctx_unmap_meta_data(struct amdgpu_device *adev, struct drm_exec exec; long r; - drm_exec_init(&exec, 0); + drm_exec_init(&exec, 0, 0); drm_exec_until_all_locked(&exec) { r = drm_exec_lock_obj(&exec, &ctx_data->meta_data_obj->tbo.base); @@ -1321,7 +1398,7 @@ int amdgpu_mes_self_test(struct amdgpu_device *adev) goto error_fini; } - ctx_data.meta_data_gpu_addr = AMDGPU_VA_RESERVED_SIZE; + ctx_data.meta_data_gpu_addr = AMDGPU_VA_RESERVED_BOTTOM; r = amdgpu_mes_ctx_map_meta_data(adev, vm, &ctx_data); if (r) { DRM_ERROR("failed to map ctx meta data\n"); @@ -1394,7 +1471,7 @@ int amdgpu_mes_init_microcode(struct amdgpu_device *adev, int pipe) const struct mes_firmware_header_v1_0 *mes_hdr; struct amdgpu_firmware_info *info; char ucode_prefix[30]; - char fw_name[40]; + char fw_name[50]; bool need_retry = false; int r; @@ -1463,3 +1540,33 @@ out: amdgpu_ucode_release(&adev->mes.fw[pipe]); return r; } + +#if defined(CONFIG_DEBUG_FS) + +static int amdgpu_debugfs_mes_event_log_show(struct seq_file *m, void *unused) +{ + struct amdgpu_device *adev = m->private; + uint32_t *mem = (uint32_t *)(adev->mes.event_log_cpu_addr); + + seq_hex_dump(m, "", DUMP_PREFIX_OFFSET, 32, 4, + mem, AMDGPU_MES_LOG_BUFFER_SIZE, false); + + return 0; +} + +DEFINE_SHOW_ATTRIBUTE(amdgpu_debugfs_mes_event_log); + +#endif + +void amdgpu_debugfs_mes_event_log_init(struct amdgpu_device *adev) +{ + +#if defined(CONFIG_DEBUG_FS) + struct drm_minor *minor = adev_to_drm(adev)->primary; + struct dentry *root = minor->debugfs_root; + if (adev->enable_mes && amdgpu_mes_log_enable) + debugfs_create_file("amdgpu_mes_event_log", 0444, root, + adev, &amdgpu_debugfs_mes_event_log_fops); + +#endif +} diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.h index a27b424ffe00..6b3e1844eac5 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.h @@ -52,6 +52,7 @@ enum amdgpu_mes_priority_level { #define AMDGPU_MES_PROC_CTX_SIZE 0x1000 /* one page area */ #define AMDGPU_MES_GANG_CTX_SIZE 0x1000 /* one page area */ +#define AMDGPU_MES_LOG_BUFFER_SIZE 0x4000 /* Maximu log buffer size for MES */ struct amdgpu_mes_funcs; @@ -133,8 +134,19 @@ struct amdgpu_mes { uint32_t num_mes_dbs; unsigned long *doorbell_bitmap; + /* MES event log buffer */ + struct amdgpu_bo *event_log_gpu_obj; + uint64_t event_log_gpu_addr; + void *event_log_cpu_addr; + /* ip specific functions */ const struct amdgpu_mes_funcs *funcs; + + /* mes resource_1 bo*/ + struct amdgpu_bo *resource_1; + uint64_t resource_1_gpu_addr; + void *resource_1_addr; + }; struct amdgpu_mes_process { @@ -291,9 +303,10 @@ struct mes_misc_op_input { uint64_t process_context_addr; union { struct { - uint64_t single_memop : 1; - uint64_t single_alu_op : 1; - uint64_t reserved: 30; + uint32_t single_memop : 1; + uint32_t single_alu_op : 1; + uint32_t reserved: 29; + uint32_t process_ctx_flush: 1; }; uint32_t u32all; } flags; @@ -369,7 +382,8 @@ int amdgpu_mes_set_shader_debugger(struct amdgpu_device *adev, const uint32_t *tcp_watch_cntl, uint32_t flags, bool trap_en); - +int amdgpu_mes_flush_shader_debugger(struct amdgpu_device *adev, + uint64_t process_context_addr); int amdgpu_mes_add_ring(struct amdgpu_device *adev, int gang_id, int queue_type, int idx, struct amdgpu_mes_ctx_data *ctx_data, diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_mmhub.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_mmhub.h index 1ca9d4ed8063..95d676ee207f 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_mmhub.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_mmhub.h @@ -63,6 +63,8 @@ struct amdgpu_mmhub_funcs { uint64_t page_table_base); void (*update_power_gating)(struct amdgpu_device *adev, bool enable); + bool (*query_utcl2_poison_status)(struct amdgpu_device *adev, + int hub_inst); }; struct amdgpu_mmhub { diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_mode.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_mode.h index 32fe05c810c6..1fe21a70ddd0 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_mode.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_mode.h @@ -32,7 +32,6 @@ #include <drm/display/drm_dp_helper.h> #include <drm/drm_crtc.h> -#include <drm/drm_edid.h> #include <drm/drm_encoder.h> #include <drm/drm_fixed.h> #include <drm/drm_framebuffer.h> @@ -51,6 +50,7 @@ struct amdgpu_device; struct amdgpu_encoder; struct amdgpu_router; struct amdgpu_hpd; +struct edid; #define to_amdgpu_crtc(x) container_of(x, struct amdgpu_crtc, base) #define to_amdgpu_connector(x) container_of(x, struct amdgpu_connector, base) @@ -324,8 +324,6 @@ struct amdgpu_mode_info { struct drm_property *audio_property; /* FMT dithering */ struct drm_property *dither_property; - /* Adaptive Backlight Modulation (power feature) */ - struct drm_property *abm_level_property; /* hardcoded DFP edid from BIOS */ struct edid *bios_hardcoded_edid; int bios_hardcoded_edid_size; @@ -343,6 +341,97 @@ struct amdgpu_mode_info { int disp_priority; const struct amdgpu_display_funcs *funcs; const enum drm_plane_type *plane_type; + + /* Driver-private color mgmt props */ + + /* @plane_degamma_lut_property: Plane property to set a degamma LUT to + * convert encoded values to light linear values before sampling or + * blending. + */ + struct drm_property *plane_degamma_lut_property; + /* @plane_degamma_lut_size_property: Plane property to define the max + * size of degamma LUT as supported by the driver (read-only). + */ + struct drm_property *plane_degamma_lut_size_property; + /** + * @plane_degamma_tf_property: Plane pre-defined transfer function to + * to go from scanout/encoded values to linear values. + */ + struct drm_property *plane_degamma_tf_property; + /** + * @plane_hdr_mult_property: + */ + struct drm_property *plane_hdr_mult_property; + + struct drm_property *plane_ctm_property; + /** + * @shaper_lut_property: Plane property to set pre-blending shaper LUT + * that converts color content before 3D LUT. If + * plane_shaper_tf_property != Identity TF, AMD color module will + * combine the user LUT values with pre-defined TF into the LUT + * parameters to be programmed. + */ + struct drm_property *plane_shaper_lut_property; + /** + * @shaper_lut_size_property: Plane property for the size of + * pre-blending shaper LUT as supported by the driver (read-only). + */ + struct drm_property *plane_shaper_lut_size_property; + /** + * @plane_shaper_tf_property: Plane property to set a predefined + * transfer function for pre-blending shaper (before applying 3D LUT) + * with or without LUT. There is no shaper ROM, but we can use AMD + * color modules to program LUT parameters from predefined TF (or + * from a combination of pre-defined TF and the custom 1D LUT). + */ + struct drm_property *plane_shaper_tf_property; + /** + * @plane_lut3d_property: Plane property for color transformation using + * a 3D LUT (pre-blending), a three-dimensional array where each + * element is an RGB triplet. Each dimension has the size of + * lut3d_size. The array contains samples from the approximated + * function. On AMD, values between samples are estimated by + * tetrahedral interpolation. The array is accessed with three indices, + * one for each input dimension (color channel), blue being the + * outermost dimension, red the innermost. + */ + struct drm_property *plane_lut3d_property; + /** + * @plane_degamma_lut_size_property: Plane property to define the max + * size of 3D LUT as supported by the driver (read-only). The max size + * is the max size of one dimension and, therefore, the max number of + * entries for 3D LUT array is the 3D LUT size cubed; + */ + struct drm_property *plane_lut3d_size_property; + /** + * @plane_blend_lut_property: Plane property for output gamma before + * blending. Userspace set a blend LUT to convert colors after 3D LUT + * conversion. It works as a post-3DLUT 1D LUT. With shaper LUT, they + * are sandwiching 3D LUT with two 1D LUT. If plane_blend_tf_property + * != Identity TF, AMD color module will combine the user LUT values + * with pre-defined TF into the LUT parameters to be programmed. + */ + struct drm_property *plane_blend_lut_property; + /** + * @plane_blend_lut_size_property: Plane property to define the max + * size of blend LUT as supported by the driver (read-only). + */ + struct drm_property *plane_blend_lut_size_property; + /** + * @plane_blend_tf_property: Plane property to set a predefined + * transfer function for pre-blending blend/out_gamma (after applying + * 3D LUT) with or without LUT. There is no blend ROM, but we can use + * AMD color modules to program LUT parameters from predefined TF (or + * from a combination of pre-defined TF and the custom 1D LUT). + */ + struct drm_property *plane_blend_tf_property; + /* @regamma_tf_property: Transfer function for CRTC regamma + * (post-blending). Possible values are defined by `enum + * amdgpu_transfer_function`. There is no regamma ROM, but we can use + * AMD color modules to program LUT parameters from predefined TF (or + * from a combination of pre-defined TF and the custom 1D LUT). + */ + struct drm_property *regamma_tf_property; }; #define AMDGPU_MAX_BL_LEVEL 0xFF @@ -416,6 +505,10 @@ struct amdgpu_crtc { int otg_inst; struct drm_pending_vblank_event *event; + + bool wb_pending; + bool wb_enabled; + struct drm_writeback_connector *wb_conn; }; struct amdgpu_encoder_atom_dig { diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_nbio.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_nbio.c index 51ca544a7094..d085687a47ea 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_nbio.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_nbio.c @@ -53,14 +53,6 @@ u64 amdgpu_nbio_get_pcie_replay_count(struct amdgpu_device *adev) return 0; } -void amdgpu_nbio_get_pcie_usage(struct amdgpu_device *adev, uint64_t *count0, - uint64_t *count1) -{ - if (adev->nbio.funcs->get_pcie_usage) - adev->nbio.funcs->get_pcie_usage(adev, count0, count1); - -} - int amdgpu_nbio_ras_late_init(struct amdgpu_device *adev, struct ras_common_if *ras_block) { int r; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_nbio.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_nbio.h index 65e35059de40..7b8c03be1d9e 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_nbio.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_nbio.h @@ -102,8 +102,6 @@ struct amdgpu_nbio_funcs { u32 (*get_memory_partition_mode)(struct amdgpu_device *adev, u32 *supp_modes); u64 (*get_pcie_replay_count)(struct amdgpu_device *adev); - void (*get_pcie_usage)(struct amdgpu_device *adev, uint64_t *count0, - uint64_t *count1); }; struct amdgpu_nbio { @@ -116,7 +114,6 @@ struct amdgpu_nbio { }; int amdgpu_nbio_ras_sw_init(struct amdgpu_device *adev); -void amdgpu_nbio_get_pcie_usage(struct amdgpu_device *adev, uint64_t *count0, uint64_t *count1); int amdgpu_nbio_ras_late_init(struct amdgpu_device *adev, struct ras_common_if *ras_block); u64 amdgpu_nbio_get_pcie_replay_count(struct amdgpu_device *adev); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c index 0dcb6c36b02c..83c499fda14c 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c @@ -39,6 +39,7 @@ #include "amdgpu.h" #include "amdgpu_trace.h" #include "amdgpu_amdkfd.h" +#include "amdgpu_vram_mgr.h" /** * DOC: amdgpu_object @@ -173,6 +174,12 @@ void amdgpu_bo_placement_from_domain(struct amdgpu_bo *abo, u32 domain) abo->flags & AMDGPU_GEM_CREATE_PREEMPTIBLE ? AMDGPU_PL_PREEMPT : TTM_PL_TT; places[c].flags = 0; + /* + * When GTT is just an alternative to VRAM make sure that we + * only use it as fallback and still try to fill up VRAM first. + */ + if (domain & abo->preferred_domains & AMDGPU_GEM_DOMAIN_VRAM) + places[c].flags |= TTM_PL_FLAG_FALLBACK; c++; } @@ -220,9 +227,6 @@ void amdgpu_bo_placement_from_domain(struct amdgpu_bo *abo, u32 domain) placement->num_placement = c; placement->placement = places; - - placement->num_busy_placement = c; - placement->busy_placement = places; } /** @@ -598,8 +602,7 @@ int amdgpu_bo_create(struct amdgpu_device *adev, if (!amdgpu_bo_support_uswc(bo->flags)) bo->flags &= ~AMDGPU_GEM_CREATE_CPU_GTT_USWC; - if (adev->ras_enabled) - bo->flags |= AMDGPU_GEM_CREATE_VRAM_WIPE_ON_RELEASE; + bo->flags |= AMDGPU_GEM_CREATE_VRAM_WIPE_ON_RELEASE; bo->tbo.bdev = &adev->mman.bdev; if (bp->domain & (AMDGPU_GEM_DOMAIN_GWS | AMDGPU_GEM_DOMAIN_OA | @@ -608,6 +611,8 @@ int amdgpu_bo_create(struct amdgpu_device *adev, else amdgpu_bo_placement_from_domain(bo, bp->domain); if (bp->type == ttm_bo_type_kernel) + bo->tbo.priority = 2; + else if (!(bp->flags & AMDGPU_GEM_CREATE_DISCARDABLE)) bo->tbo.priority = 1; if (!bp->destroy) @@ -620,8 +625,7 @@ int amdgpu_bo_create(struct amdgpu_device *adev, return r; if (!amdgpu_gmc_vram_full_visible(&adev->gmc) && - bo->tbo.resource->mem_type == TTM_PL_VRAM && - amdgpu_bo_in_cpu_visible_vram(bo)) + amdgpu_res_cpu_visible(adev, bo->tbo.resource)) amdgpu_cs_report_moved_bytes(adev, ctx.bytes_moved, ctx.bytes_moved); else @@ -631,7 +635,7 @@ int amdgpu_bo_create(struct amdgpu_device *adev, bo->tbo.resource->mem_type == TTM_PL_VRAM) { struct dma_fence *fence; - r = amdgpu_fill_buffer(bo, 0, bo->tbo.base.resv, &fence, true); + r = amdgpu_ttm_clear_buffer(bo, bo->tbo.base.resv, &fence); if (unlikely(r)) goto fail_unreserve; @@ -1062,9 +1066,6 @@ static const char * const amdgpu_vram_names[] = { */ int amdgpu_bo_init(struct amdgpu_device *adev) { - /* set the default AGP aperture state */ - amdgpu_gmc_set_agp_default(adev, &adev->gmc); - /* On A+A platform, VRAM can be mapped as WB */ if (!adev->gmc.xgmi.connected_to_cpu && !adev->gmc.is_app_apu) { /* reserve PAT memory space to WC for VRAM */ @@ -1248,19 +1249,15 @@ int amdgpu_bo_get_metadata(struct amdgpu_bo *bo, void *buffer, * amdgpu_bo_move_notify - notification about a memory move * @bo: pointer to a buffer object * @evict: if this move is evicting the buffer from the graphics address space - * @new_mem: new information of the bufer object * * Marks the corresponding &amdgpu_bo buffer object as invalid, also performs * bookkeeping. * TTM driver callback which is called when ttm moves a buffer. */ -void amdgpu_bo_move_notify(struct ttm_buffer_object *bo, - bool evict, - struct ttm_resource *new_mem) +void amdgpu_bo_move_notify(struct ttm_buffer_object *bo, bool evict) { struct amdgpu_device *adev = amdgpu_ttm_adev(bo->bdev); struct amdgpu_bo *abo; - struct ttm_resource *old_mem = bo->resource; if (!amdgpu_bo_is_amdgpu_bo(bo)) return; @@ -1277,38 +1274,44 @@ void amdgpu_bo_move_notify(struct ttm_buffer_object *bo, /* remember the eviction */ if (evict) atomic64_inc(&adev->num_evictions); - - /* update statistics */ - if (!new_mem) - return; - - /* move_notify is called before move happens */ - trace_amdgpu_bo_move(abo, new_mem->mem_type, old_mem->mem_type); } void amdgpu_bo_get_memory(struct amdgpu_bo *bo, struct amdgpu_mem_stats *stats) { + struct amdgpu_device *adev = amdgpu_ttm_adev(bo->tbo.bdev); + struct ttm_resource *res = bo->tbo.resource; uint64_t size = amdgpu_bo_size(bo); + struct drm_gem_object *obj; unsigned int domain; + bool shared; /* Abort if the BO doesn't currently have a backing store */ - if (!bo->tbo.resource) + if (!res) return; - domain = amdgpu_mem_type_to_domain(bo->tbo.resource->mem_type); + obj = &bo->tbo.base; + shared = drm_gem_object_is_shared_for_memory_stats(obj); + + domain = amdgpu_mem_type_to_domain(res->mem_type); switch (domain) { case AMDGPU_GEM_DOMAIN_VRAM: stats->vram += size; - if (amdgpu_bo_in_cpu_visible_vram(bo)) + if (amdgpu_res_cpu_visible(adev, bo->tbo.resource)) stats->visible_vram += size; + if (shared) + stats->vram_shared += size; break; case AMDGPU_GEM_DOMAIN_GTT: stats->gtt += size; + if (shared) + stats->gtt_shared += size; break; case AMDGPU_GEM_DOMAIN_CPU: default: stats->cpu += size; + if (shared) + stats->cpu_shared += size; break; } @@ -1346,6 +1349,8 @@ void amdgpu_bo_release_notify(struct ttm_buffer_object *bo) abo = ttm_to_amdgpu_bo(bo); + WARN_ON(abo->vm_bo); + if (abo->kfd_bo) amdgpu_amdkfd_release_notify(abo); @@ -1363,8 +1368,9 @@ void amdgpu_bo_release_notify(struct ttm_buffer_object *bo) if (WARN_ON_ONCE(!dma_resv_trylock(bo->base.resv))) return; - r = amdgpu_fill_buffer(abo, AMDGPU_POISON, bo->base.resv, &fence, true); + r = amdgpu_fill_buffer(abo, 0, bo->base.resv, &fence, true); if (!WARN_ON(r)) { + amdgpu_vram_mgr_set_cleared(bo->resource); amdgpu_bo_fence(abo, fence, false); dma_fence_put(fence); } @@ -1393,10 +1399,7 @@ vm_fault_t amdgpu_bo_fault_reserve_notify(struct ttm_buffer_object *bo) /* Remember that this BO was accessed by the CPU */ abo->flags |= AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED; - if (bo->resource->mem_type != TTM_PL_VRAM) - return 0; - - if (amdgpu_bo_in_cpu_visible_vram(abo)) + if (amdgpu_res_cpu_visible(adev, bo->resource)) return 0; /* Can't move a pinned BO to visible VRAM */ @@ -1409,8 +1412,7 @@ vm_fault_t amdgpu_bo_fault_reserve_notify(struct ttm_buffer_object *bo) AMDGPU_GEM_DOMAIN_GTT); /* Avoid costly evictions; only set GTT as a busy placement */ - abo->placement.num_busy_placement = 1; - abo->placement.busy_placement = &abo->placements[1]; + abo->placements[0].flags |= TTM_PL_FLAG_DESIRED; r = ttm_bo_validate(bo, &abo->placement, &ctx); if (unlikely(r == -EBUSY || r == -ERESTARTSYS)) @@ -1420,7 +1422,7 @@ vm_fault_t amdgpu_bo_fault_reserve_notify(struct ttm_buffer_object *bo) /* this should never happen */ if (bo->resource->mem_type == TTM_PL_VRAM && - !amdgpu_bo_in_cpu_visible_vram(abo)) + !amdgpu_res_cpu_visible(adev, bo->resource)) return VM_FAULT_SIGBUS; ttm_bo_move_to_lru_tail_unlocked(bo); @@ -1530,10 +1532,14 @@ u64 amdgpu_bo_gpu_offset(struct amdgpu_bo *bo) u64 amdgpu_bo_gpu_offset_no_check(struct amdgpu_bo *bo) { struct amdgpu_device *adev = amdgpu_ttm_adev(bo->tbo.bdev); - uint64_t offset; + uint64_t offset = AMDGPU_BO_INVALID_OFFSET; - offset = (bo->tbo.resource->start << PAGE_SHIFT) + - amdgpu_ttm_domain_start(adev, bo->tbo.resource->mem_type); + if (bo->tbo.resource->mem_type == TTM_PL_TT) + offset = amdgpu_gmc_agp_addr(&bo->tbo); + + if (offset == AMDGPU_BO_INVALID_OFFSET) + offset = (bo->tbo.resource->start << PAGE_SHIFT) + + amdgpu_ttm_domain_start(adev, bo->tbo.resource->mem_type); return amdgpu_gmc_sign_extend(offset); } @@ -1580,6 +1586,7 @@ uint32_t amdgpu_bo_get_preferred_domain(struct amdgpu_device *adev, */ u64 amdgpu_bo_print_info(int id, struct amdgpu_bo *bo, struct seq_file *m) { + struct amdgpu_device *adev = amdgpu_ttm_adev(bo->tbo.bdev); struct dma_buf_attachment *attachment; struct dma_buf *dma_buf; const char *placement; @@ -1588,10 +1595,11 @@ u64 amdgpu_bo_print_info(int id, struct amdgpu_bo *bo, struct seq_file *m) if (dma_resv_trylock(bo->tbo.base.resv)) { unsigned int domain; + domain = amdgpu_mem_type_to_domain(bo->tbo.resource->mem_type); switch (domain) { case AMDGPU_GEM_DOMAIN_VRAM: - if (amdgpu_bo_in_cpu_visible_vram(bo)) + if (amdgpu_res_cpu_visible(adev, bo->tbo.resource)) placement = "VRAM VISIBLE"; else placement = "VRAM"; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h index d28e21baef16..fa03d9e4874c 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h @@ -138,12 +138,18 @@ struct amdgpu_bo_vm { struct amdgpu_mem_stats { /* current VRAM usage, includes visible VRAM */ uint64_t vram; + /* current shared VRAM usage, includes visible VRAM */ + uint64_t vram_shared; /* current visible VRAM usage */ uint64_t visible_vram; /* current GTT usage */ uint64_t gtt; + /* current shared GTT usage */ + uint64_t gtt_shared; /* current system memory usage */ uint64_t cpu; + /* current shared system memory usage */ + uint64_t cpu_shared; /* sum of evicted buffers, includes visible VRAM */ uint64_t evicted_vram; /* sum of evicted buffers due to CPU access */ @@ -245,28 +251,6 @@ static inline u64 amdgpu_bo_mmap_offset(struct amdgpu_bo *bo) } /** - * amdgpu_bo_in_cpu_visible_vram - check if BO is (partly) in visible VRAM - */ -static inline bool amdgpu_bo_in_cpu_visible_vram(struct amdgpu_bo *bo) -{ - struct amdgpu_device *adev = amdgpu_ttm_adev(bo->tbo.bdev); - struct amdgpu_res_cursor cursor; - - if (!bo->tbo.resource || bo->tbo.resource->mem_type != TTM_PL_VRAM) - return false; - - amdgpu_res_first(bo->tbo.resource, 0, amdgpu_bo_size(bo), &cursor); - while (cursor.remaining) { - if (cursor.start < adev->gmc.visible_vram_size) - return true; - - amdgpu_res_next(&cursor, cursor.size); - } - - return false; -} - -/** * amdgpu_bo_explicit_sync - return whether the bo is explicitly synced */ static inline bool amdgpu_bo_explicit_sync(struct amdgpu_bo *bo) @@ -344,9 +328,7 @@ int amdgpu_bo_set_metadata (struct amdgpu_bo *bo, void *metadata, int amdgpu_bo_get_metadata(struct amdgpu_bo *bo, void *buffer, size_t buffer_size, uint32_t *metadata_size, uint64_t *flags); -void amdgpu_bo_move_notify(struct ttm_buffer_object *bo, - bool evict, - struct ttm_resource *new_mem); +void amdgpu_bo_move_notify(struct ttm_buffer_object *bo, bool evict); void amdgpu_bo_release_notify(struct ttm_buffer_object *bo); vm_fault_t amdgpu_bo_fault_reserve_notify(struct ttm_buffer_object *bo); void amdgpu_bo_fence(struct amdgpu_bo *bo, struct dma_fence *fence, diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c index b153acf9ed05..4bd4602d11b1 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c @@ -38,6 +38,7 @@ #include "psp_v12_0.h" #include "psp_v13_0.h" #include "psp_v13_0_4.h" +#include "psp_v14_0.h" #include "amdgpu_ras.h" #include "amdgpu_securedisplay.h" @@ -162,20 +163,26 @@ static int psp_early_init(void *handle) struct amdgpu_device *adev = (struct amdgpu_device *)handle; struct psp_context *psp = &adev->psp; + psp->autoload_supported = true; + psp->boot_time_tmr = true; + switch (amdgpu_ip_version(adev, MP0_HWIP, 0)) { case IP_VERSION(9, 0, 0): psp_v3_1_set_psp_funcs(psp); psp->autoload_supported = false; + psp->boot_time_tmr = false; break; case IP_VERSION(10, 0, 0): case IP_VERSION(10, 0, 1): psp_v10_0_set_psp_funcs(psp); psp->autoload_supported = false; + psp->boot_time_tmr = false; break; case IP_VERSION(11, 0, 2): case IP_VERSION(11, 0, 4): psp_v11_0_set_psp_funcs(psp); psp->autoload_supported = false; + psp->boot_time_tmr = false; break; case IP_VERSION(11, 0, 0): case IP_VERSION(11, 0, 7): @@ -188,15 +195,20 @@ static int psp_early_init(void *handle) case IP_VERSION(11, 0, 12): case IP_VERSION(11, 0, 13): psp_v11_0_set_psp_funcs(psp); - psp->autoload_supported = true; + psp->boot_time_tmr = false; break; case IP_VERSION(11, 0, 3): case IP_VERSION(12, 0, 1): psp_v12_0_set_psp_funcs(psp); + psp->autoload_supported = false; + psp->boot_time_tmr = false; break; case IP_VERSION(13, 0, 2): + psp->boot_time_tmr = false; + fallthrough; case IP_VERSION(13, 0, 6): psp_v13_0_set_psp_funcs(psp); + psp->autoload_supported = false; break; case IP_VERSION(13, 0, 1): case IP_VERSION(13, 0, 3): @@ -204,25 +216,31 @@ static int psp_early_init(void *handle) case IP_VERSION(13, 0, 8): case IP_VERSION(13, 0, 11): case IP_VERSION(14, 0, 0): + case IP_VERSION(14, 0, 1): psp_v13_0_set_psp_funcs(psp); - psp->autoload_supported = true; + psp->boot_time_tmr = false; break; case IP_VERSION(11, 0, 8): if (adev->apu_flags & AMD_APU_IS_CYAN_SKILLFISH2) { psp_v11_0_8_set_psp_funcs(psp); - psp->autoload_supported = false; } + psp->autoload_supported = false; + psp->boot_time_tmr = false; break; case IP_VERSION(13, 0, 0): case IP_VERSION(13, 0, 7): case IP_VERSION(13, 0, 10): psp_v13_0_set_psp_funcs(psp); - psp->autoload_supported = true; adev->psp.sup_ifwi_up = !amdgpu_sriov_vf(adev); + psp->boot_time_tmr = false; break; case IP_VERSION(13, 0, 4): psp_v13_0_4_set_psp_funcs(psp); - psp->autoload_supported = true; + psp->boot_time_tmr = false; + break; + case IP_VERSION(14, 0, 2): + case IP_VERSION(14, 0, 3): + psp_v14_0_set_psp_funcs(psp); break; default: return -EINVAL; @@ -230,6 +248,8 @@ static int psp_early_init(void *handle) psp->adev = adev; + adev->psp_timeout = 20000; + psp_check_pmfw_centralized_cstate_management(psp); if (amdgpu_sriov_vf(adev)) @@ -291,21 +311,22 @@ static int psp_memory_training_init(struct psp_context *psp) struct psp_memory_training_context *ctx = &psp->mem_train_ctx; if (ctx->init != PSP_MEM_TRAIN_RESERVE_SUCCESS) { - DRM_DEBUG("memory training is not supported!\n"); + dev_dbg(psp->adev->dev, "memory training is not supported!\n"); return 0; } ctx->sys_cache = kzalloc(ctx->train_data_size, GFP_KERNEL); if (ctx->sys_cache == NULL) { - DRM_ERROR("alloc mem_train_ctx.sys_cache failed!\n"); + dev_err(psp->adev->dev, "alloc mem_train_ctx.sys_cache failed!\n"); ret = -ENOMEM; goto Err_out; } - DRM_DEBUG("train_data_size:%llx,p2c_train_data_offset:%llx,c2p_train_data_offset:%llx.\n", - ctx->train_data_size, - ctx->p2c_train_data_offset, - ctx->c2p_train_data_offset); + dev_dbg(psp->adev->dev, + "train_data_size:%llx,p2c_train_data_offset:%llx,c2p_train_data_offset:%llx.\n", + ctx->train_data_size, + ctx->p2c_train_data_offset, + ctx->c2p_train_data_offset); ctx->init = PSP_MEM_TRAIN_INIT_SUCCESS; return 0; @@ -407,7 +428,7 @@ static int psp_sw_init(void *handle) psp->cmd = kzalloc(sizeof(struct psp_gfx_cmd_resp), GFP_KERNEL); if (!psp->cmd) { - DRM_ERROR("Failed to allocate memory to command buffer!\n"); + dev_err(adev->dev, "Failed to allocate memory to command buffer!\n"); ret = -ENOMEM; } @@ -454,19 +475,19 @@ static int psp_sw_init(void *handle) if (mem_training_ctx->enable_mem_training) { ret = psp_memory_training_init(psp); if (ret) { - DRM_ERROR("Failed to initialize memory training!\n"); + dev_err(adev->dev, "Failed to initialize memory training!\n"); return ret; } ret = psp_mem_training(psp, PSP_MEM_TRAIN_COLD_BOOT); if (ret) { - DRM_ERROR("Failed to process memory training!\n"); + dev_err(adev->dev, "Failed to process memory training!\n"); return ret; } } ret = amdgpu_bo_create_kernel(adev, PSP_1_MEG, PSP_1_MEG, - amdgpu_sriov_vf(adev) ? + (amdgpu_sriov_vf(adev) || adev->debug_use_vram_fw_buf) ? AMDGPU_GEM_DOMAIN_VRAM : AMDGPU_GEM_DOMAIN_GTT, &psp->fw_pri_bo, &psp->fw_pri_mc_addr, @@ -626,7 +647,7 @@ psp_cmd_submit_buf(struct psp_context *psp, { int ret; int index; - int timeout = 20000; + int timeout = psp->adev->psp_timeout; bool ras_intr = false; bool skip_unsupport = false; @@ -675,9 +696,11 @@ psp_cmd_submit_buf(struct psp_context *psp, */ if (!skip_unsupport && (psp->cmd_buf_mem->resp.status || !timeout) && !ras_intr) { if (ucode) - DRM_WARN("failed to load ucode %s(0x%X) ", - amdgpu_ucode_name(ucode->ucode_id), ucode->ucode_id); - DRM_WARN("psp gfx command %s(0x%X) failed and response status is (0x%X)\n", + dev_warn(psp->adev->dev, + "failed to load ucode %s(0x%X) ", + amdgpu_ucode_name(ucode->ucode_id), ucode->ucode_id); + dev_warn(psp->adev->dev, + "psp gfx command %s(0x%X) failed and response status is (0x%X)\n", psp_gfx_cmd_name(psp->cmd_buf_mem->cmd_id), psp->cmd_buf_mem->cmd_id, psp->cmd_buf_mem->resp.status); /* If any firmware (including CAP) load fails under SRIOV, it should @@ -771,16 +794,6 @@ static int psp_load_toc(struct psp_context *psp, return ret; } -static bool psp_boottime_tmr(struct psp_context *psp) -{ - switch (amdgpu_ip_version(psp->adev, MP0_HWIP, 0)) { - case IP_VERSION(13, 0, 6): - return true; - default: - return false; - } -} - /* Set up Trusted Memory Region */ static int psp_tmr_init(struct psp_context *psp) { @@ -807,12 +820,12 @@ static int psp_tmr_init(struct psp_context *psp) psp->fw_pri_buf) { ret = psp_load_toc(psp, &tmr_size); if (ret) { - DRM_ERROR("Failed to load toc\n"); + dev_err(psp->adev->dev, "Failed to load toc\n"); return ret; } } - if (!psp->tmr_bo) { + if (!psp->tmr_bo && !psp->boot_time_tmr) { pptr = amdgpu_sriov_vf(psp->adev) ? &tmr_buf : NULL; ret = amdgpu_bo_create_kernel(psp->adev, tmr_size, PSP_TMR_ALIGNMENT, @@ -855,7 +868,7 @@ static int psp_tmr_load(struct psp_context *psp) psp_prep_tmr_cmd_buf(psp, cmd, psp->tmr_mc_addr, psp->tmr_bo); if (psp->tmr_bo) - DRM_INFO("reserve 0x%lx from 0x%llx for PSP TMR\n", + dev_info(psp->adev->dev, "reserve 0x%lx from 0x%llx for PSP TMR\n", amdgpu_bo_size(psp->tmr_bo), psp->tmr_mc_addr); ret = psp_cmd_submit_buf(psp, NULL, cmd, @@ -1040,6 +1053,11 @@ static int psp_asd_initialize(struct psp_context *psp) if (amdgpu_sriov_vf(psp->adev) || !psp->asd_context.bin_desc.size_bytes) return 0; + /* bypass asd if display hardware is not available */ + if (!amdgpu_device_has_display_hardware(psp->adev) && + amdgpu_ip_version(psp->adev, MP0_HWIP, 0) >= IP_VERSION(13, 0, 10)) + return 0; + psp->asd_context.mem_context.shared_mc_addr = 0; psp->asd_context.mem_context.shared_mem_size = PSP_ASD_SHARED_MEM_SIZE; psp->asd_context.ta_load_type = GFX_CMD_ID_LOAD_ASD; @@ -1113,7 +1131,7 @@ int psp_reg_program(struct psp_context *psp, enum psp_reg_prog_id reg, psp_prep_reg_prog_cmd_buf(cmd, reg, value); ret = psp_cmd_submit_buf(psp, NULL, cmd, psp->fence_buf_mc_addr); if (ret) - DRM_ERROR("PSP failed to program reg id %d", reg); + dev_err(psp->adev->dev, "PSP failed to program reg id %d\n", reg); release_psp_cmd_buf(psp); @@ -1267,6 +1285,8 @@ invoke: xgmi_cmd->cmd_id = TA_COMMAND_XGMI__INITIALIZE; ret = psp_xgmi_invoke(psp, xgmi_cmd->cmd_id); + /* note down the capbility flag for XGMI TA */ + psp->xgmi_context.xgmi_ta_caps = xgmi_cmd->caps_flag; return ret; } @@ -1388,7 +1408,7 @@ int psp_xgmi_get_topology_info(struct psp_context *psp, /* Fill in the shared memory with topology information as input */ topology_info_input = &xgmi_cmd->xgmi_in_message.get_topology_info; - xgmi_cmd->cmd_id = TA_COMMAND_XGMI__GET_GET_TOPOLOGY_INFO; + xgmi_cmd->cmd_id = TA_COMMAND_XGMI__GET_TOPOLOGY_INFO; topology_info_input->num_nodes = number_devices; for (i = 0; i < topology_info_input->num_nodes; i++) { @@ -1399,7 +1419,7 @@ int psp_xgmi_get_topology_info(struct psp_context *psp, } /* Invoke xgmi ta to get the topology information */ - ret = psp_xgmi_invoke(psp, TA_COMMAND_XGMI__GET_GET_TOPOLOGY_INFO); + ret = psp_xgmi_invoke(psp, TA_COMMAND_XGMI__GET_TOPOLOGY_INFO); if (ret) return ret; @@ -1424,28 +1444,58 @@ int psp_xgmi_get_topology_info(struct psp_context *psp, /* Invoke xgmi ta again to get the link information */ if (psp_xgmi_peer_link_info_supported(psp)) { - struct ta_xgmi_cmd_get_peer_link_info_output *link_info_output; + struct ta_xgmi_cmd_get_peer_link_info *link_info_output; + struct ta_xgmi_cmd_get_extend_peer_link_info *link_extend_info_output; bool requires_reflection = (psp->xgmi_context.supports_extended_data && get_extended_data) || amdgpu_ip_version(psp->adev, MP0_HWIP, 0) == IP_VERSION(13, 0, 6); + bool ta_port_num_support = amdgpu_sriov_vf(psp->adev) ? 0 : + psp->xgmi_context.xgmi_ta_caps & EXTEND_PEER_LINK_INFO_CMD_FLAG; - xgmi_cmd->cmd_id = TA_COMMAND_XGMI__GET_PEER_LINKS; + /* popluate the shared output buffer rather than the cmd input buffer + * with node_ids as the input for GET_PEER_LINKS command execution. + * This is required for GET_PEER_LINKS per xgmi ta implementation. + * The same requirement for GET_EXTEND_PEER_LINKS command. + */ + if (ta_port_num_support) { + link_extend_info_output = &xgmi_cmd->xgmi_out_message.get_extend_link_info; - ret = psp_xgmi_invoke(psp, TA_COMMAND_XGMI__GET_PEER_LINKS); + for (i = 0; i < topology->num_nodes; i++) + link_extend_info_output->nodes[i].node_id = topology->nodes[i].node_id; + link_extend_info_output->num_nodes = topology->num_nodes; + xgmi_cmd->cmd_id = TA_COMMAND_XGMI__GET_EXTEND_PEER_LINKS; + } else { + link_info_output = &xgmi_cmd->xgmi_out_message.get_link_info; + + for (i = 0; i < topology->num_nodes; i++) + link_info_output->nodes[i].node_id = topology->nodes[i].node_id; + + link_info_output->num_nodes = topology->num_nodes; + xgmi_cmd->cmd_id = TA_COMMAND_XGMI__GET_PEER_LINKS; + } + + ret = psp_xgmi_invoke(psp, xgmi_cmd->cmd_id); if (ret) return ret; - link_info_output = &xgmi_cmd->xgmi_out_message.get_link_info; for (i = 0; i < topology->num_nodes; i++) { + uint8_t node_num_links = ta_port_num_support ? + link_extend_info_output->nodes[i].num_links : link_info_output->nodes[i].num_links; /* accumulate num_links on extended data */ - topology->nodes[i].num_links = get_extended_data ? - topology->nodes[i].num_links + - link_info_output->nodes[i].num_links : - ((requires_reflection && topology->nodes[i].num_links) ? topology->nodes[i].num_links : - link_info_output->nodes[i].num_links); + if (get_extended_data) { + topology->nodes[i].num_links = topology->nodes[i].num_links + node_num_links; + } else { + topology->nodes[i].num_links = (requires_reflection && topology->nodes[i].num_links) ? + topology->nodes[i].num_links : node_num_links; + } + /* popluate the connected port num info if supported and available */ + if (ta_port_num_support && topology->nodes[i].num_links) { + memcpy(topology->nodes[i].port_num, link_extend_info_output->nodes[i].port_num, + sizeof(struct xgmi_connected_port_num) * TA_XGMI__MAX_PORT_NUM); + } /* reflect the topology information for bi-directionality */ if (requires_reflection && topology->nodes[i].num_hops) @@ -1494,22 +1544,22 @@ static void psp_ras_ta_check_status(struct psp_context *psp) switch (ras_cmd->ras_status) { case TA_RAS_STATUS__ERROR_UNSUPPORTED_IP: dev_warn(psp->adev->dev, - "RAS WARNING: cmd failed due to unsupported ip\n"); + "RAS WARNING: cmd failed due to unsupported ip\n"); break; case TA_RAS_STATUS__ERROR_UNSUPPORTED_ERROR_INJ: dev_warn(psp->adev->dev, - "RAS WARNING: cmd failed due to unsupported error injection\n"); + "RAS WARNING: cmd failed due to unsupported error injection\n"); break; case TA_RAS_STATUS__SUCCESS: break; case TA_RAS_STATUS__TEE_ERROR_ACCESS_DENIED: if (ras_cmd->cmd_id == TA_RAS_COMMAND__TRIGGER_ERROR) dev_warn(psp->adev->dev, - "RAS WARNING: Inject error to critical region is not allowed\n"); + "RAS WARNING: Inject error to critical region is not allowed\n"); break; default: dev_warn(psp->adev->dev, - "RAS WARNING: ras status = 0x%X\n", ras_cmd->ras_status); + "RAS WARNING: ras status = 0x%X\n", ras_cmd->ras_status); break; } } @@ -1533,7 +1583,7 @@ int psp_ras_invoke(struct psp_context *psp, uint32_t ta_cmd_id) return ret; if (ras_cmd->if_version > RAS_TA_HOST_IF_VER) { - DRM_WARN("RAS: Unsupported Interface"); + dev_warn(psp->adev->dev, "RAS: Unsupported Interface\n"); return -EINVAL; } @@ -1683,7 +1733,7 @@ int psp_ras_initialize(struct psp_context *psp) psp->ras_context.context.initialized = true; else { if (ras_cmd->ras_status) - dev_warn(psp->adev->dev, "RAS Init Status: 0x%X\n", ras_cmd->ras_status); + dev_warn(adev->dev, "RAS Init Status: 0x%X\n", ras_cmd->ras_status); /* fail to load RAS TA */ psp->ras_context.context.initialized = false; @@ -1747,6 +1797,31 @@ int psp_ras_trigger_error(struct psp_context *psp, return 0; } + +int psp_ras_query_address(struct psp_context *psp, + struct ta_ras_query_address_input *addr_in, + struct ta_ras_query_address_output *addr_out) +{ + struct ta_ras_shared_memory *ras_cmd; + int ret; + + if (!psp->ras_context.context.initialized) + return -EINVAL; + + ras_cmd = (struct ta_ras_shared_memory *)psp->ras_context.context.mem_context.shared_buf; + memset(ras_cmd, 0, sizeof(struct ta_ras_shared_memory)); + + ras_cmd->cmd_id = TA_RAS_COMMAND__QUERY_ADDRESS; + ras_cmd->ras_in_message.address = *addr_in; + + ret = psp_ras_invoke(psp, ras_cmd->cmd_id); + if (ret || ras_cmd->ras_status || psp->cmd_buf_mem->resp.status) + return -EINVAL; + + *addr_out = ras_cmd->ras_out_message.address; + + return 0; +} // ras end // HDCP start @@ -1760,6 +1835,10 @@ static int psp_hdcp_initialize(struct psp_context *psp) if (amdgpu_sriov_vf(psp->adev)) return 0; + /* bypass hdcp initialization if dmu is harvested */ + if (!amdgpu_device_has_display_hardware(psp->adev)) + return 0; + if (!psp->hdcp_context.context.bin_desc.size_bytes || !psp->hdcp_context.context.bin_desc.start_addr) { dev_info(psp->adev->dev, "HDCP: optional hdcp ta ucode is not available\n"); @@ -1792,6 +1871,9 @@ int psp_hdcp_invoke(struct psp_context *psp, uint32_t ta_cmd_id) if (amdgpu_sriov_vf(psp->adev)) return 0; + if (!psp->hdcp_context.context.initialized) + return 0; + return psp_ta_invoke(psp, ta_cmd_id, &psp->hdcp_context.context); } @@ -1827,6 +1909,10 @@ static int psp_dtm_initialize(struct psp_context *psp) if (amdgpu_sriov_vf(psp->adev)) return 0; + /* bypass dtm initialization if dmu is harvested */ + if (!amdgpu_device_has_display_hardware(psp->adev)) + return 0; + if (!psp->dtm_context.context.bin_desc.size_bytes || !psp->dtm_context.context.bin_desc.start_addr) { dev_info(psp->adev->dev, "DTM: optional dtm ta ucode is not available\n"); @@ -1859,6 +1945,9 @@ int psp_dtm_invoke(struct psp_context *psp, uint32_t ta_cmd_id) if (amdgpu_sriov_vf(psp->adev)) return 0; + if (!psp->dtm_context.context.initialized) + return 0; + return psp_ta_invoke(psp, ta_cmd_id, &psp->dtm_context.context); } @@ -1993,6 +2082,10 @@ static int psp_securedisplay_initialize(struct psp_context *psp) if (amdgpu_sriov_vf(psp->adev)) return 0; + /* bypass securedisplay initialization if dmu is harvested */ + if (!amdgpu_device_has_display_hardware(psp->adev)) + return 0; + if (!psp->securedisplay_context.context.bin_desc.size_bytes || !psp->securedisplay_context.context.bin_desc.start_addr) { dev_info(psp->adev->dev, "SECUREDISPLAY: securedisplay ta ucode is not available\n"); @@ -2093,6 +2186,16 @@ int amdgpu_psp_wait_for_bootloader(struct amdgpu_device *adev) return ret; } +bool amdgpu_psp_get_ras_capability(struct psp_context *psp) +{ + if (psp->funcs && + psp->funcs->get_ras_capability) { + return psp->funcs->get_ras_capability(psp); + } else { + return false; + } +} + static int psp_hw_start(struct psp_context *psp) { struct amdgpu_device *adev = psp->adev; @@ -2103,7 +2206,7 @@ static int psp_hw_start(struct psp_context *psp) (psp->funcs->bootloader_load_kdb != NULL)) { ret = psp_bootloader_load_kdb(psp); if (ret) { - DRM_ERROR("PSP load kdb failed!\n"); + dev_err(adev->dev, "PSP load kdb failed!\n"); return ret; } } @@ -2112,7 +2215,7 @@ static int psp_hw_start(struct psp_context *psp) (psp->funcs->bootloader_load_spl != NULL)) { ret = psp_bootloader_load_spl(psp); if (ret) { - DRM_ERROR("PSP load spl failed!\n"); + dev_err(adev->dev, "PSP load spl failed!\n"); return ret; } } @@ -2121,7 +2224,7 @@ static int psp_hw_start(struct psp_context *psp) (psp->funcs->bootloader_load_sysdrv != NULL)) { ret = psp_bootloader_load_sysdrv(psp); if (ret) { - DRM_ERROR("PSP load sys drv failed!\n"); + dev_err(adev->dev, "PSP load sys drv failed!\n"); return ret; } } @@ -2130,7 +2233,7 @@ static int psp_hw_start(struct psp_context *psp) (psp->funcs->bootloader_load_soc_drv != NULL)) { ret = psp_bootloader_load_soc_drv(psp); if (ret) { - DRM_ERROR("PSP load soc drv failed!\n"); + dev_err(adev->dev, "PSP load soc drv failed!\n"); return ret; } } @@ -2139,7 +2242,7 @@ static int psp_hw_start(struct psp_context *psp) (psp->funcs->bootloader_load_intf_drv != NULL)) { ret = psp_bootloader_load_intf_drv(psp); if (ret) { - DRM_ERROR("PSP load intf drv failed!\n"); + dev_err(adev->dev, "PSP load intf drv failed!\n"); return ret; } } @@ -2148,7 +2251,7 @@ static int psp_hw_start(struct psp_context *psp) (psp->funcs->bootloader_load_dbg_drv != NULL)) { ret = psp_bootloader_load_dbg_drv(psp); if (ret) { - DRM_ERROR("PSP load dbg drv failed!\n"); + dev_err(adev->dev, "PSP load dbg drv failed!\n"); return ret; } } @@ -2157,7 +2260,16 @@ static int psp_hw_start(struct psp_context *psp) (psp->funcs->bootloader_load_ras_drv != NULL)) { ret = psp_bootloader_load_ras_drv(psp); if (ret) { - DRM_ERROR("PSP load ras_drv failed!\n"); + dev_err(adev->dev, "PSP load ras_drv failed!\n"); + return ret; + } + } + + if ((is_psp_fw_valid(psp->ipkeymgr_drv)) && + (psp->funcs->bootloader_load_ipkeymgr_drv != NULL)) { + ret = psp_bootloader_load_ipkeymgr_drv(psp); + if (ret) { + dev_err(adev->dev, "PSP load ipkeymgr_drv failed!\n"); return ret; } } @@ -2166,7 +2278,7 @@ static int psp_hw_start(struct psp_context *psp) (psp->funcs->bootloader_load_sos != NULL)) { ret = psp_bootloader_load_sos(psp); if (ret) { - DRM_ERROR("PSP load sos failed!\n"); + dev_err(adev->dev, "PSP load sos failed!\n"); return ret; } } @@ -2174,17 +2286,17 @@ static int psp_hw_start(struct psp_context *psp) ret = psp_ring_create(psp, PSP_RING_TYPE__KM); if (ret) { - DRM_ERROR("PSP create ring failed!\n"); + dev_err(adev->dev, "PSP create ring failed!\n"); return ret; } if (amdgpu_sriov_vf(adev) && amdgpu_in_reset(adev)) goto skip_pin_bo; - if (!psp_boottime_tmr(psp)) { + if (!psp->boot_time_tmr || psp->autoload_supported) { ret = psp_tmr_init(psp); if (ret) { - DRM_ERROR("PSP tmr init failed!\n"); + dev_err(adev->dev, "PSP tmr init failed!\n"); return ret; } } @@ -2201,10 +2313,12 @@ skip_pin_bo: return ret; } - ret = psp_tmr_load(psp); - if (ret) { - DRM_ERROR("PSP load tmr failed!\n"); - return ret; + if (!psp->boot_time_tmr || !psp->autoload_supported) { + ret = psp_tmr_load(psp); + if (ret) { + dev_err(adev->dev, "PSP load tmr failed!\n"); + return ret; + } } return 0; @@ -2415,6 +2529,9 @@ static int psp_get_fw_type(struct amdgpu_firmware_info *ucode, case AMDGPU_UCODE_ID_P2S_TABLE: *type = GFX_FW_TYPE_P2S_TABLE; break; + case AMDGPU_UCODE_ID_JPEG_RAM: + *type = GFX_FW_TYPE_JPEG_RAM; + break; case AMDGPU_UCODE_ID_MAXIMUM: default: return -EINVAL; @@ -2471,7 +2588,8 @@ static void psp_print_fw_hdr(struct psp_context *psp, } } -static int psp_prep_load_ip_fw_cmd_buf(struct amdgpu_firmware_info *ucode, +static int psp_prep_load_ip_fw_cmd_buf(struct psp_context *psp, + struct amdgpu_firmware_info *ucode, struct psp_gfx_cmd_resp *cmd) { int ret; @@ -2484,7 +2602,7 @@ static int psp_prep_load_ip_fw_cmd_buf(struct amdgpu_firmware_info *ucode, ret = psp_get_fw_type(ucode, &cmd->cmd.cmd_load_ip_fw.fw_type); if (ret) - DRM_ERROR("Unknown firmware type\n"); + dev_err(psp->adev->dev, "Unknown firmware type\n"); return ret; } @@ -2495,7 +2613,7 @@ int psp_execute_ip_fw_load(struct psp_context *psp, int ret = 0; struct psp_gfx_cmd_resp *cmd = acquire_psp_cmd_buf(psp); - ret = psp_prep_load_ip_fw_cmd_buf(ucode, cmd); + ret = psp_prep_load_ip_fw_cmd_buf(psp, ucode, cmd); if (!ret) { ret = psp_cmd_submit_buf(psp, ucode, cmd, psp->fence_buf_mc_addr); @@ -2513,7 +2631,8 @@ static int psp_load_p2s_table(struct psp_context *psp) struct amdgpu_firmware_info *ucode = &adev->firmware.ucode[AMDGPU_UCODE_ID_P2S_TABLE]; - if (adev->in_runpm && (adev->pm.rpm_mode == AMDGPU_RUNPM_BACO)) + if (adev->in_runpm && ((adev->pm.rpm_mode == AMDGPU_RUNPM_BACO) || + (adev->pm.rpm_mode == AMDGPU_RUNPM_BAMACO))) return 0; if (amdgpu_ip_version(adev, MP0_HWIP, 0) == IP_VERSION(13, 0, 6)) { @@ -2543,7 +2662,8 @@ static int psp_load_smu_fw(struct psp_context *psp) * Skip SMU FW reloading in case of using BACO for runpm only, * as SMU is always alive. */ - if (adev->in_runpm && (adev->pm.rpm_mode == AMDGPU_RUNPM_BACO)) + if (adev->in_runpm && ((adev->pm.rpm_mode == AMDGPU_RUNPM_BACO) || + (adev->pm.rpm_mode == AMDGPU_RUNPM_BAMACO))) return 0; if (!ucode->fw || amdgpu_sriov_vf(psp->adev)) @@ -2554,13 +2674,13 @@ static int psp_load_smu_fw(struct psp_context *psp) amdgpu_ip_version(adev, MP0_HWIP, 0) == IP_VERSION(11, 0, 2)))) { ret = amdgpu_dpm_set_mp1_state(adev, PP_MP1_STATE_UNLOAD); if (ret) - DRM_WARN("Failed to set MP1 state prepare for reload\n"); + dev_err(adev->dev, "Failed to set MP1 state prepare for reload\n"); } ret = psp_execute_ip_fw_load(psp, ucode); if (ret) - DRM_ERROR("PSP load smu failed!\n"); + dev_err(adev->dev, "PSP load smu failed!\n"); return ret; } @@ -2665,7 +2785,7 @@ static int psp_load_non_psp_fw(struct psp_context *psp) adev->virt.autoload_ucode_id : AMDGPU_UCODE_ID_RLC_G)) { ret = psp_rlc_autoload_start(psp); if (ret) { - DRM_ERROR("Failed to start rlc autoload\n"); + dev_err(adev->dev, "Failed to start rlc autoload\n"); return ret; } } @@ -2687,7 +2807,7 @@ static int psp_load_fw(struct amdgpu_device *adev) ret = psp_ring_init(psp, PSP_RING_TYPE__KM); if (ret) { - DRM_ERROR("PSP ring init failed!\n"); + dev_err(adev->dev, "PSP ring init failed!\n"); goto failed; } } @@ -2702,13 +2822,13 @@ static int psp_load_fw(struct amdgpu_device *adev) ret = psp_asd_initialize(psp); if (ret) { - DRM_ERROR("PSP load asd failed!\n"); + dev_err(adev->dev, "PSP load asd failed!\n"); goto failed1; } ret = psp_rl_load(adev); if (ret) { - DRM_ERROR("PSP load RL failed!\n"); + dev_err(adev->dev, "PSP load RL failed!\n"); goto failed1; } @@ -2728,7 +2848,7 @@ static int psp_load_fw(struct amdgpu_device *adev) ret = psp_ras_initialize(psp); if (ret) dev_err(psp->adev->dev, - "RAS: Failed to initialize RAS\n"); + "RAS: Failed to initialize RAS\n"); ret = psp_hdcp_initialize(psp); if (ret) @@ -2781,7 +2901,7 @@ static int psp_hw_init(void *handle) ret = psp_load_fw(adev); if (ret) { - DRM_ERROR("PSP firmware loading failed\n"); + dev_err(adev->dev, "PSP firmware loading failed\n"); goto failed; } @@ -2828,7 +2948,7 @@ static int psp_suspend(void *handle) psp->xgmi_context.context.initialized) { ret = psp_xgmi_terminate(psp); if (ret) { - DRM_ERROR("Failed to terminate xgmi ta\n"); + dev_err(adev->dev, "Failed to terminate xgmi ta\n"); goto out; } } @@ -2836,46 +2956,46 @@ static int psp_suspend(void *handle) if (psp->ta_fw) { ret = psp_ras_terminate(psp); if (ret) { - DRM_ERROR("Failed to terminate ras ta\n"); + dev_err(adev->dev, "Failed to terminate ras ta\n"); goto out; } ret = psp_hdcp_terminate(psp); if (ret) { - DRM_ERROR("Failed to terminate hdcp ta\n"); + dev_err(adev->dev, "Failed to terminate hdcp ta\n"); goto out; } ret = psp_dtm_terminate(psp); if (ret) { - DRM_ERROR("Failed to terminate dtm ta\n"); + dev_err(adev->dev, "Failed to terminate dtm ta\n"); goto out; } ret = psp_rap_terminate(psp); if (ret) { - DRM_ERROR("Failed to terminate rap ta\n"); + dev_err(adev->dev, "Failed to terminate rap ta\n"); goto out; } ret = psp_securedisplay_terminate(psp); if (ret) { - DRM_ERROR("Failed to terminate securedisplay ta\n"); + dev_err(adev->dev, "Failed to terminate securedisplay ta\n"); goto out; } } ret = psp_asd_terminate(psp); if (ret) { - DRM_ERROR("Failed to terminate asd\n"); + dev_err(adev->dev, "Failed to terminate asd\n"); goto out; } ret = psp_tmr_terminate(psp); if (ret) { - DRM_ERROR("Failed to terminate tmr\n"); + dev_err(adev->dev, "Failed to terminate tmr\n"); goto out; } ret = psp_ring_stop(psp, PSP_RING_TYPE__KM); if (ret) - DRM_ERROR("PSP ring stop failed\n"); + dev_err(adev->dev, "PSP ring stop failed\n"); out: return ret; @@ -2887,12 +3007,12 @@ static int psp_resume(void *handle) struct amdgpu_device *adev = (struct amdgpu_device *)handle; struct psp_context *psp = &adev->psp; - DRM_INFO("PSP is resuming...\n"); + dev_info(adev->dev, "PSP is resuming...\n"); if (psp->mem_train_ctx.enable_mem_training) { ret = psp_mem_training(psp, PSP_MEM_TRAIN_RESUME); if (ret) { - DRM_ERROR("Failed to process memory training!\n"); + dev_err(adev->dev, "Failed to process memory training!\n"); return ret; } } @@ -2909,7 +3029,7 @@ static int psp_resume(void *handle) ret = psp_asd_initialize(psp); if (ret) { - DRM_ERROR("PSP load asd failed!\n"); + dev_err(adev->dev, "PSP load asd failed!\n"); goto failed; } @@ -2933,7 +3053,7 @@ static int psp_resume(void *handle) ret = psp_ras_initialize(psp); if (ret) dev_err(psp->adev->dev, - "RAS: Failed to initialize RAS\n"); + "RAS: Failed to initialize RAS\n"); ret = psp_hdcp_initialize(psp); if (ret) @@ -2961,7 +3081,7 @@ static int psp_resume(void *handle) return 0; failed: - DRM_ERROR("PSP resume failed\n"); + dev_err(adev->dev, "PSP resume failed\n"); mutex_unlock(&adev->firmware.mutex); return ret; } @@ -3022,9 +3142,11 @@ int psp_ring_cmd_submit(struct psp_context *psp, write_frame = ring_buffer_start + (psp_write_ptr_reg / rb_frame_size_dw); /* Check invalid write_frame ptr address */ if ((write_frame < ring_buffer_start) || (ring_buffer_end < write_frame)) { - DRM_ERROR("ring_buffer_start = %p; ring_buffer_end = %p; write_frame = %p\n", - ring_buffer_start, ring_buffer_end, write_frame); - DRM_ERROR("write_frame is pointing to address out of bounds\n"); + dev_err(adev->dev, + "ring_buffer_start = %p; ring_buffer_end = %p; write_frame = %p\n", + ring_buffer_start, ring_buffer_end, write_frame); + dev_err(adev->dev, + "write_frame is pointing to address out of bounds\n"); return -EINVAL; } @@ -3167,6 +3289,12 @@ static int parse_sos_bin_descriptor(struct psp_context *psp, psp->ras_drv.size_bytes = le32_to_cpu(desc->size_bytes); psp->ras_drv.start_addr = ucode_start_addr; break; + case PSP_FW_TYPE_PSP_IPKEYMGR_DRV: + psp->ipkeymgr_drv.fw_version = le32_to_cpu(desc->fw_version); + psp->ipkeymgr_drv.feature_version = le32_to_cpu(desc->fw_version); + psp->ipkeymgr_drv.size_bytes = le32_to_cpu(desc->size_bytes); + psp->ipkeymgr_drv.start_addr = ucode_start_addr; + break; default: dev_warn(psp->adev->dev, "Unsupported PSP FW type: %d\n", desc->fw_type); break; @@ -3550,7 +3678,7 @@ static ssize_t psp_usbc_pd_fw_sysfs_read(struct device *dev, int ret; if (!adev->ip_blocks[AMD_IP_BLOCK_TYPE_PSP].status.late_initialized) { - DRM_INFO("PSP block is not ready yet."); + dev_info(adev->dev, "PSP block is not ready yet\n."); return -EBUSY; } @@ -3559,7 +3687,7 @@ static ssize_t psp_usbc_pd_fw_sysfs_read(struct device *dev, mutex_unlock(&adev->psp.mutex); if (ret) { - DRM_ERROR("Failed to read USBC PD FW, err = %d", ret); + dev_err(adev->dev, "Failed to read USBC PD FW, err = %d\n", ret); return ret; } @@ -3581,7 +3709,7 @@ static ssize_t psp_usbc_pd_fw_sysfs_write(struct device *dev, void *fw_pri_cpu_addr; if (!adev->ip_blocks[AMD_IP_BLOCK_TYPE_PSP].status.late_initialized) { - DRM_INFO("PSP block is not ready yet."); + dev_err(adev->dev, "PSP block is not ready yet."); return -EBUSY; } @@ -3614,7 +3742,7 @@ rel_buf: release_firmware(usbc_pd_fw); fail: if (ret) { - DRM_ERROR("Failed to load USBC PD FW, err = %d", ret); + dev_err(adev->dev, "Failed to load USBC PD FW, err = %d", ret); count = ret; } @@ -3661,7 +3789,7 @@ static ssize_t amdgpu_psp_vbflash_write(struct file *filp, struct kobject *kobj, /* Safeguard against memory drain */ if (adev->psp.vbflash_image_size > AMD_VBIOS_FILE_MAX_SIZE_B) { - dev_err(adev->dev, "File size cannot exceed %u", AMD_VBIOS_FILE_MAX_SIZE_B); + dev_err(adev->dev, "File size cannot exceed %u\n", AMD_VBIOS_FILE_MAX_SIZE_B); kvfree(adev->psp.vbflash_tmp_buf); adev->psp.vbflash_tmp_buf = NULL; adev->psp.vbflash_image_size = 0; @@ -3680,7 +3808,7 @@ static ssize_t amdgpu_psp_vbflash_write(struct file *filp, struct kobject *kobj, adev->psp.vbflash_image_size += count; mutex_unlock(&adev->psp.mutex); - dev_dbg(adev->dev, "IFWI staged for update"); + dev_dbg(adev->dev, "IFWI staged for update\n"); return count; } @@ -3700,7 +3828,7 @@ static ssize_t amdgpu_psp_vbflash_read(struct file *filp, struct kobject *kobj, if (adev->psp.vbflash_image_size == 0) return -EINVAL; - dev_dbg(adev->dev, "PSP IFWI flash process initiated"); + dev_dbg(adev->dev, "PSP IFWI flash process initiated\n"); ret = amdgpu_bo_create_kernel(adev, adev->psp.vbflash_image_size, AMDGPU_GPU_PAGE_SIZE, @@ -3725,11 +3853,11 @@ rel_buf: adev->psp.vbflash_image_size = 0; if (ret) { - dev_err(adev->dev, "Failed to load IFWI, err = %d", ret); + dev_err(adev->dev, "Failed to load IFWI, err = %d\n", ret); return ret; } - dev_dbg(adev->dev, "PSP IFWI flash process done"); + dev_dbg(adev->dev, "PSP IFWI flash process done\n"); return 0; } @@ -3883,3 +4011,11 @@ const struct amdgpu_ip_block_version psp_v13_0_4_ip_block = { .rev = 4, .funcs = &psp_ip_funcs, }; + +const struct amdgpu_ip_block_version psp_v14_0_ip_block = { + .type = AMD_IP_BLOCK_TYPE_PSP, + .major = 14, + .minor = 0, + .rev = 0, + .funcs = &psp_ip_funcs, +}; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.h index 3e67ed63e638..3635303e6548 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.h @@ -73,8 +73,10 @@ enum psp_bootloader_cmd { PSP_BL__LOAD_KEY_DATABASE = 0x80000, PSP_BL__LOAD_SOCDRV = 0xB0000, PSP_BL__LOAD_DBGDRV = 0xC0000, + PSP_BL__LOAD_HADDRV = PSP_BL__LOAD_DBGDRV, PSP_BL__LOAD_INTFDRV = 0xD0000, - PSP_BL__LOAD_RASDRV = 0xE0000, + PSP_BL__LOAD_RASDRV = 0xE0000, + PSP_BL__LOAD_IPKEYMGRDRV = 0xF0000, PSP_BL__DRAM_LONG_TRAIN = 0x100000, PSP_BL__DRAM_SHORT_TRAIN = 0x200000, PSP_BL__LOAD_TOS_SPL_TABLE = 0x10000000, @@ -117,6 +119,7 @@ struct psp_funcs { int (*bootloader_load_intf_drv)(struct psp_context *psp); int (*bootloader_load_dbg_drv)(struct psp_context *psp); int (*bootloader_load_ras_drv)(struct psp_context *psp); + int (*bootloader_load_ipkeymgr_drv)(struct psp_context *psp); int (*bootloader_load_sos)(struct psp_context *psp); int (*ring_create)(struct psp_context *psp, enum psp_ring_type ring_type); @@ -134,6 +137,7 @@ struct psp_funcs { int (*update_spirom)(struct psp_context *psp, uint64_t fw_pri_mc_addr); int (*vbflash_stat)(struct psp_context *psp); int (*fatal_error_recovery_quirk)(struct psp_context *psp); + bool (*get_ras_capability)(struct psp_context *psp); }; struct ta_funcs { @@ -149,6 +153,7 @@ struct psp_xgmi_node_info { uint8_t is_sharing_enabled; enum ta_xgmi_assigned_sdma_engine sdma_engine; uint8_t num_links; + struct xgmi_connected_port_num port_num[TA_XGMI__MAX_PORT_NUM]; }; struct psp_xgmi_topology_info { @@ -189,6 +194,7 @@ struct psp_xgmi_context { struct ta_context context; struct psp_xgmi_topology_info top_info; bool supports_extended_data; + uint8_t xgmi_ta_caps; }; struct psp_ras_context { @@ -200,7 +206,7 @@ struct psp_ras_context { #define GDDR6_MEM_TRAINING_DATA_SIZE_IN_BYTES 0x1000 #define GDDR6_MEM_TRAINING_OFFSET 0x8000 /*Define the VRAM size that will be encroached by BIST training.*/ -#define GDDR6_MEM_TRAINING_ENCROACHED_SIZE 0x2000000 +#define BIST_MEM_TRAINING_ENCROACHED_SIZE 0x2000000 enum psp_memory_training_init_flag { PSP_MEM_TRAIN_NOT_SUPPORT = 0x0, @@ -333,6 +339,7 @@ struct psp_context { struct psp_bin_desc intf_drv; struct psp_bin_desc dbg_drv; struct psp_bin_desc ras_drv; + struct psp_bin_desc ipkeymgr_drv; /* tmr buffer */ struct amdgpu_bo *tmr_bo; @@ -361,6 +368,8 @@ struct psp_context { atomic_t fence_value; /* flag to mark whether gfx fw autoload is supported or not */ bool autoload_supported; + /* flag to mark whether psp use runtime TMR or boottime TMR */ + bool boot_time_tmr; /* flag to mark whether df cstate management centralized to PMFW */ bool pmfw_centralized_cstate_management; @@ -419,6 +428,9 @@ struct amdgpu_psp_funcs { #define psp_bootloader_load_ras_drv(psp) \ ((psp)->funcs->bootloader_load_ras_drv ? \ (psp)->funcs->bootloader_load_ras_drv((psp)) : 0) +#define psp_bootloader_load_ipkeymgr_drv(psp) \ + ((psp)->funcs->bootloader_load_ipkeymgr_drv ? \ + (psp)->funcs->bootloader_load_ipkeymgr_drv((psp)) : 0) #define psp_bootloader_load_sos(psp) \ ((psp)->funcs->bootloader_load_sos ? (psp)->funcs->bootloader_load_sos((psp)) : 0) #define psp_smu_reload_quirk(psp) \ @@ -460,6 +472,7 @@ extern const struct amdgpu_ip_block_version psp_v11_0_8_ip_block; extern const struct amdgpu_ip_block_version psp_v12_0_ip_block; extern const struct amdgpu_ip_block_version psp_v13_0_ip_block; extern const struct amdgpu_ip_block_version psp_v13_0_4_ip_block; +extern const struct amdgpu_ip_block_version psp_v14_0_ip_block; extern int psp_wait_for(struct psp_context *psp, uint32_t reg_index, uint32_t field_val, uint32_t mask, bool check_changed); @@ -499,6 +512,9 @@ int psp_ras_enable_features(struct psp_context *psp, int psp_ras_trigger_error(struct psp_context *psp, struct ta_ras_trigger_error_input *info, uint32_t instance_mask); int psp_ras_terminate(struct psp_context *psp); +int psp_ras_query_address(struct psp_context *psp, + struct ta_ras_query_address_input *addr_in, + struct ta_ras_query_address_output *addr_out); int psp_hdcp_invoke(struct psp_context *psp, uint32_t ta_cmd_id); int psp_dtm_invoke(struct psp_context *psp, uint32_t ta_cmd_id); @@ -535,5 +551,5 @@ int psp_spatial_partition(struct psp_context *psp, int mode); int is_psp_fw_valid(struct psp_bin_desc bin); int amdgpu_psp_wait_for_bootloader(struct amdgpu_device *adev); - +bool amdgpu_psp_get_ras_capability(struct psp_context *psp); #endif diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp_ta.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp_ta.c index 468a67b302d4..ca5c86e5f7cd 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp_ta.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp_ta.c @@ -362,7 +362,7 @@ static ssize_t ta_if_invoke_debugfs_write(struct file *fp, const char *buf, size } } - if (copy_to_user((char *)buf, context->mem_context.shared_buf, shared_buf_len)) + if (copy_to_user((char *)&buf[copy_pos], context->mem_context.shared_buf, shared_buf_len)) ret = -EFAULT; err_free_shared_buf: diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c index 973073e07b2a..352ce16a0963 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c @@ -28,6 +28,7 @@ #include <linux/reboot.h> #include <linux/syscalls.h> #include <linux/pm_runtime.h> +#include <linux/list_sort.h> #include "amdgpu.h" #include "amdgpu_ras.h" @@ -38,6 +39,7 @@ #include "nbio_v7_9.h" #include "atom.h" #include "amdgpu_reset.h" +#include "amdgpu_psp.h" #ifdef CONFIG_X86_MCE_AMD #include <asm/mce.h> @@ -72,6 +74,8 @@ const char *ras_block_string[] = { "mca", "vcn", "jpeg", + "ih", + "mpio", }; const char *ras_mca_block_string[] = { @@ -93,7 +97,8 @@ const char *get_ras_block_str(struct ras_common_if *ras_block) if (!ras_block) return "NULL"; - if (ras_block->block >= AMDGPU_RAS_BLOCK_COUNT) + if (ras_block->block >= AMDGPU_RAS_BLOCK_COUNT || + ras_block->block >= ARRAY_SIZE(ras_block_string)) return "OUT OF RANGE"; if (ras_block->block == AMDGPU_RAS_BLOCK__MCA) @@ -115,6 +120,8 @@ const char *get_ras_block_str(struct ras_common_if *ras_block) /* typical ECC bad page rate is 1 bad page per 100MB VRAM */ #define RAS_BAD_PAGE_COVER (100 * 1024 * 1024ULL) +#define MAX_UMC_POISON_POLLING_TIME_ASYNC 100 //ms + enum amdgpu_ras_retire_page_reservation { AMDGPU_RAS_RETIRE_PAGE_RESERVED, AMDGPU_RAS_RETIRE_PAGE_PENDING, @@ -304,11 +311,13 @@ static int amdgpu_ras_debugfs_ctrl_parse_data(struct file *f, return -EINVAL; data->head.block = block_id; - /* only ue and ce errors are supported */ + /* only ue, ce and poison errors are supported */ if (!memcmp("ue", err, 2)) data->head.type = AMDGPU_RAS_ERROR__MULTI_UNCORRECTABLE; else if (!memcmp("ce", err, 2)) data->head.type = AMDGPU_RAS_ERROR__SINGLE_CORRECTABLE; + else if (!memcmp("poison", err, 6)) + data->head.type = AMDGPU_RAS_ERROR__POISON; else return -EINVAL; @@ -430,9 +439,10 @@ static void amdgpu_ras_instance_mask_check(struct amdgpu_device *adev, * The block is one of: umc, sdma, gfx, etc. * see ras_block_string[] for details * - * The error type is one of: ue, ce, where, + * The error type is one of: ue, ce and poison where, * ue is multi-uncorrectable * ce is single-correctable + * poison is poison * * The sub-block is a the sub-block index, pass 0 if there is no sub-block. * The address and value are hexadecimal numbers, leading 0x is optional. @@ -624,8 +634,12 @@ static ssize_t amdgpu_ras_sysfs_read(struct device *dev, dev_warn(obj->adev->dev, "Failed to reset error counter and error status"); } - return sysfs_emit(buf, "%s: %lu\n%s: %lu\n", "ue", info.ue_count, - "ce", info.ce_count); + if (info.head.block == AMDGPU_RAS_BLOCK__UMC) + return sysfs_emit(buf, "%s: %lu\n%s: %lu\n%s: %lu\n", "ue", info.ue_count, + "ce", info.ce_count, "de", info.de_count); + else + return sysfs_emit(buf, "%s: %lu\n%s: %lu\n", "ue", info.ue_count, + "ce", info.ce_count); } /* obj begin */ @@ -635,8 +649,11 @@ static ssize_t amdgpu_ras_sysfs_read(struct device *dev, static inline void put_obj(struct ras_manager *obj) { - if (obj && (--obj->use == 0)) + if (obj && (--obj->use == 0)) { list_del(&obj->node); + amdgpu_ras_error_data_fini(&obj->err_data); + } + if (obj && (obj->use < 0)) DRM_ERROR("RAS ERROR: Unbalance obj(%s) use\n", get_ras_block_str(&obj->head)); } @@ -666,6 +683,9 @@ static struct ras_manager *amdgpu_ras_create_obj(struct amdgpu_device *adev, if (alive_obj(obj)) return NULL; + if (amdgpu_ras_error_data_init(&obj->err_data)) + return NULL; + obj->head = *head; obj->adev = adev; list_add(&obj->node, &con->head); @@ -1023,108 +1043,326 @@ static void amdgpu_ras_get_ecc_info(struct amdgpu_device *adev, struct ras_err_d } static void amdgpu_ras_error_print_error_data(struct amdgpu_device *adev, - struct ras_query_if *query_if, + struct ras_manager *ras_mgr, struct ras_err_data *err_data, - bool is_ue) + struct ras_query_context *qctx, + const char *blk_name, + bool is_ue, + bool is_de) { - struct ras_manager *ras_mgr = amdgpu_ras_find_obj(adev, &query_if->head); - const char *blk_name = get_ras_block_str(&query_if->head); struct amdgpu_smuio_mcm_config_info *mcm_info; struct ras_err_node *err_node; struct ras_err_info *err_info; + u64 event_id = qctx->event_id; + + if (is_ue) { + for_each_ras_error(err_node, err_data) { + err_info = &err_node->err_info; + mcm_info = &err_info->mcm_info; + if (err_info->ue_count) { + RAS_EVENT_LOG(adev, event_id, "socket: %d, die: %d, " + "%lld new uncorrectable hardware errors detected in %s block\n", + mcm_info->socket_id, + mcm_info->die_id, + err_info->ue_count, + blk_name); + } + } - if (is_ue) - dev_info(adev->dev, "%ld uncorrectable hardware errors detected in %s block\n", - ras_mgr->err_data.ue_count, blk_name); - else - dev_info(adev->dev, "%ld correctable hardware errors detected in %s block\n", - ras_mgr->err_data.ue_count, blk_name); + for_each_ras_error(err_node, &ras_mgr->err_data) { + err_info = &err_node->err_info; + mcm_info = &err_info->mcm_info; + RAS_EVENT_LOG(adev, event_id, "socket: %d, die: %d, " + "%lld uncorrectable hardware errors detected in total in %s block\n", + mcm_info->socket_id, mcm_info->die_id, err_info->ue_count, blk_name); + } - for_each_ras_error(err_node, err_data) { - err_info = &err_node->err_info; - mcm_info = &err_info->mcm_info; - if (is_ue && err_info->ue_count) { - dev_info(adev->dev, "socket: %d, die: %d " - "%lld uncorrectable hardware errors detected in %s block\n", - mcm_info->socket_id, - mcm_info->die_id, - err_info->ue_count, - blk_name); - } else if (!is_ue && err_info->ce_count) { - dev_info(adev->dev, "socket: %d, die: %d " - "%lld correctable hardware errors detected in %s block\n", - mcm_info->socket_id, - mcm_info->die_id, - err_info->ue_count, - blk_name); + } else { + if (is_de) { + for_each_ras_error(err_node, err_data) { + err_info = &err_node->err_info; + mcm_info = &err_info->mcm_info; + if (err_info->de_count) { + RAS_EVENT_LOG(adev, event_id, "socket: %d, die: %d, " + "%lld new deferred hardware errors detected in %s block\n", + mcm_info->socket_id, + mcm_info->die_id, + err_info->de_count, + blk_name); + } + } + + for_each_ras_error(err_node, &ras_mgr->err_data) { + err_info = &err_node->err_info; + mcm_info = &err_info->mcm_info; + RAS_EVENT_LOG(adev, event_id, "socket: %d, die: %d, " + "%lld deferred hardware errors detected in total in %s block\n", + mcm_info->socket_id, mcm_info->die_id, + err_info->de_count, blk_name); + } + } else { + for_each_ras_error(err_node, err_data) { + err_info = &err_node->err_info; + mcm_info = &err_info->mcm_info; + if (err_info->ce_count) { + RAS_EVENT_LOG(adev, event_id, "socket: %d, die: %d, " + "%lld new correctable hardware errors detected in %s block\n", + mcm_info->socket_id, + mcm_info->die_id, + err_info->ce_count, + blk_name); + } + } + + for_each_ras_error(err_node, &ras_mgr->err_data) { + err_info = &err_node->err_info; + mcm_info = &err_info->mcm_info; + RAS_EVENT_LOG(adev, event_id, "socket: %d, die: %d, " + "%lld correctable hardware errors detected in total in %s block\n", + mcm_info->socket_id, mcm_info->die_id, + err_info->ce_count, blk_name); + } } } } +static inline bool err_data_has_source_info(struct ras_err_data *data) +{ + return !list_empty(&data->err_node_list); +} + static void amdgpu_ras_error_generate_report(struct amdgpu_device *adev, struct ras_query_if *query_if, - struct ras_err_data *err_data) + struct ras_err_data *err_data, + struct ras_query_context *qctx) { struct ras_manager *ras_mgr = amdgpu_ras_find_obj(adev, &query_if->head); const char *blk_name = get_ras_block_str(&query_if->head); + u64 event_id = qctx->event_id; if (err_data->ce_count) { - if (!list_empty(&err_data->err_node_list)) { - amdgpu_ras_error_print_error_data(adev, query_if, - err_data, false); + if (err_data_has_source_info(err_data)) { + amdgpu_ras_error_print_error_data(adev, ras_mgr, err_data, qctx, + blk_name, false, false); } else if (!adev->aid_mask && adev->smuio.funcs && adev->smuio.funcs->get_socket_id && adev->smuio.funcs->get_die_id) { - dev_info(adev->dev, "socket: %d, die: %d " - "%ld correctable hardware errors " - "detected in %s block, no user " - "action is needed.\n", - adev->smuio.funcs->get_socket_id(adev), - adev->smuio.funcs->get_die_id(adev), - ras_mgr->err_data.ce_count, - blk_name); + RAS_EVENT_LOG(adev, event_id, "socket: %d, die: %d " + "%ld correctable hardware errors " + "detected in %s block\n", + adev->smuio.funcs->get_socket_id(adev), + adev->smuio.funcs->get_die_id(adev), + ras_mgr->err_data.ce_count, + blk_name); } else { - dev_info(adev->dev, "%ld correctable hardware errors " - "detected in %s block, no user " - "action is needed.\n", - ras_mgr->err_data.ce_count, - blk_name); + RAS_EVENT_LOG(adev, event_id, "%ld correctable hardware errors " + "detected in %s block\n", + ras_mgr->err_data.ce_count, + blk_name); } } if (err_data->ue_count) { - if (!list_empty(&err_data->err_node_list)) { - amdgpu_ras_error_print_error_data(adev, query_if, - err_data, true); + if (err_data_has_source_info(err_data)) { + amdgpu_ras_error_print_error_data(adev, ras_mgr, err_data, qctx, + blk_name, true, false); + } else if (!adev->aid_mask && + adev->smuio.funcs && + adev->smuio.funcs->get_socket_id && + adev->smuio.funcs->get_die_id) { + RAS_EVENT_LOG(adev, event_id, "socket: %d, die: %d " + "%ld uncorrectable hardware errors " + "detected in %s block\n", + adev->smuio.funcs->get_socket_id(adev), + adev->smuio.funcs->get_die_id(adev), + ras_mgr->err_data.ue_count, + blk_name); + } else { + RAS_EVENT_LOG(adev, event_id, "%ld uncorrectable hardware errors " + "detected in %s block\n", + ras_mgr->err_data.ue_count, + blk_name); + } + } + + if (err_data->de_count) { + if (err_data_has_source_info(err_data)) { + amdgpu_ras_error_print_error_data(adev, ras_mgr, err_data, qctx, + blk_name, false, true); } else if (!adev->aid_mask && adev->smuio.funcs && adev->smuio.funcs->get_socket_id && adev->smuio.funcs->get_die_id) { - dev_info(adev->dev, "socket: %d, die: %d " - "%ld uncorrectable hardware errors " - "detected in %s block\n", - adev->smuio.funcs->get_socket_id(adev), - adev->smuio.funcs->get_die_id(adev), - ras_mgr->err_data.ue_count, - blk_name); + RAS_EVENT_LOG(adev, event_id, "socket: %d, die: %d " + "%ld deferred hardware errors " + "detected in %s block\n", + adev->smuio.funcs->get_socket_id(adev), + adev->smuio.funcs->get_die_id(adev), + ras_mgr->err_data.de_count, + blk_name); } else { - dev_info(adev->dev, "%ld uncorrectable hardware errors " - "detected in %s block\n", - ras_mgr->err_data.ue_count, - blk_name); + RAS_EVENT_LOG(adev, event_id, "%ld deferred hardware errors " + "detected in %s block\n", + ras_mgr->err_data.de_count, + blk_name); } } +} +static void amdgpu_rasmgr_error_data_statistic_update(struct ras_manager *obj, struct ras_err_data *err_data) +{ + struct ras_err_node *err_node; + struct ras_err_info *err_info; + + if (err_data_has_source_info(err_data)) { + for_each_ras_error(err_node, err_data) { + err_info = &err_node->err_info; + amdgpu_ras_error_statistic_de_count(&obj->err_data, + &err_info->mcm_info, NULL, err_info->de_count); + amdgpu_ras_error_statistic_ce_count(&obj->err_data, + &err_info->mcm_info, NULL, err_info->ce_count); + amdgpu_ras_error_statistic_ue_count(&obj->err_data, + &err_info->mcm_info, NULL, err_info->ue_count); + } + } else { + /* for legacy asic path which doesn't has error source info */ + obj->err_data.ue_count += err_data->ue_count; + obj->err_data.ce_count += err_data->ce_count; + obj->err_data.de_count += err_data->de_count; + } } -/* query/inject/cure begin */ -int amdgpu_ras_query_error_status(struct amdgpu_device *adev, - struct ras_query_if *info) +static struct ras_manager *get_ras_manager(struct amdgpu_device *adev, enum amdgpu_ras_block blk) +{ + struct ras_common_if head; + + memset(&head, 0, sizeof(head)); + head.block = blk; + + return amdgpu_ras_find_obj(adev, &head); +} + +int amdgpu_ras_bind_aca(struct amdgpu_device *adev, enum amdgpu_ras_block blk, + const struct aca_info *aca_info, void *data) +{ + struct ras_manager *obj; + + obj = get_ras_manager(adev, blk); + if (!obj) + return -EINVAL; + + return amdgpu_aca_add_handle(adev, &obj->aca_handle, ras_block_str(blk), aca_info, data); +} + +int amdgpu_ras_unbind_aca(struct amdgpu_device *adev, enum amdgpu_ras_block blk) +{ + struct ras_manager *obj; + + obj = get_ras_manager(adev, blk); + if (!obj) + return -EINVAL; + + amdgpu_aca_remove_handle(&obj->aca_handle); + + return 0; +} + +static int amdgpu_aca_log_ras_error_data(struct amdgpu_device *adev, enum amdgpu_ras_block blk, + enum aca_error_type type, struct ras_err_data *err_data, + struct ras_query_context *qctx) +{ + struct ras_manager *obj; + + obj = get_ras_manager(adev, blk); + if (!obj) + return -EINVAL; + + return amdgpu_aca_get_error_data(adev, &obj->aca_handle, type, err_data, qctx); +} + +ssize_t amdgpu_ras_aca_sysfs_read(struct device *dev, struct device_attribute *attr, + struct aca_handle *handle, char *buf, void *data) +{ + struct ras_manager *obj = container_of(handle, struct ras_manager, aca_handle); + struct ras_query_if info = { + .head = obj->head, + }; + + if (amdgpu_ras_query_error_status(obj->adev, &info)) + return -EINVAL; + + return sysfs_emit(buf, "%s: %lu\n%s: %lu\n%s: %lu\n", "ue", info.ue_count, + "ce", info.ce_count, "de", info.ue_count); +} + +static int amdgpu_ras_query_error_status_helper(struct amdgpu_device *adev, + struct ras_query_if *info, + struct ras_err_data *err_data, + struct ras_query_context *qctx, + unsigned int error_query_mode) { + enum amdgpu_ras_block blk = info ? info->head.block : AMDGPU_RAS_BLOCK_COUNT; struct amdgpu_ras_block_object *block_obj = NULL; + int ret; + + if (blk == AMDGPU_RAS_BLOCK_COUNT) + return -EINVAL; + + if (error_query_mode == AMDGPU_RAS_INVALID_ERROR_QUERY) + return -EINVAL; + + if (error_query_mode == AMDGPU_RAS_DIRECT_ERROR_QUERY) { + if (info->head.block == AMDGPU_RAS_BLOCK__UMC) { + amdgpu_ras_get_ecc_info(adev, err_data); + } else { + block_obj = amdgpu_ras_get_ras_block(adev, info->head.block, 0); + if (!block_obj || !block_obj->hw_ops) { + dev_dbg_once(adev->dev, "%s doesn't config RAS function\n", + get_ras_block_str(&info->head)); + return -EINVAL; + } + + if (block_obj->hw_ops->query_ras_error_count) + block_obj->hw_ops->query_ras_error_count(adev, err_data); + + if ((info->head.block == AMDGPU_RAS_BLOCK__SDMA) || + (info->head.block == AMDGPU_RAS_BLOCK__GFX) || + (info->head.block == AMDGPU_RAS_BLOCK__MMHUB)) { + if (block_obj->hw_ops->query_ras_error_status) + block_obj->hw_ops->query_ras_error_status(adev); + } + } + } else { + if (amdgpu_aca_is_enabled(adev)) { + ret = amdgpu_aca_log_ras_error_data(adev, blk, ACA_ERROR_TYPE_UE, err_data, qctx); + if (ret) + return ret; + + ret = amdgpu_aca_log_ras_error_data(adev, blk, ACA_ERROR_TYPE_CE, err_data, qctx); + if (ret) + return ret; + + ret = amdgpu_aca_log_ras_error_data(adev, blk, ACA_ERROR_TYPE_DEFERRED, err_data, qctx); + if (ret) + return ret; + } else { + /* FIXME: add code to check return value later */ + amdgpu_mca_smu_log_ras_error(adev, blk, AMDGPU_MCA_ERROR_TYPE_UE, err_data, qctx); + amdgpu_mca_smu_log_ras_error(adev, blk, AMDGPU_MCA_ERROR_TYPE_CE, err_data, qctx); + } + } + + return 0; +} + +/* query/inject/cure begin */ +int amdgpu_ras_query_error_status(struct amdgpu_device *adev, struct ras_query_if *info) +{ struct ras_manager *obj = amdgpu_ras_find_obj(adev, &info->head); struct ras_err_data err_data; + struct ras_query_context qctx; + unsigned int error_query_mode; int ret; if (!obj) @@ -1134,35 +1372,26 @@ int amdgpu_ras_query_error_status(struct amdgpu_device *adev, if (ret) return ret; - if (info->head.block == AMDGPU_RAS_BLOCK__UMC) { - amdgpu_ras_get_ecc_info(adev, &err_data); - } else { - block_obj = amdgpu_ras_get_ras_block(adev, info->head.block, 0); - if (!block_obj || !block_obj->hw_ops) { - dev_dbg_once(adev->dev, "%s doesn't config RAS function\n", - get_ras_block_str(&info->head)); - ret = -EINVAL; - goto out_fini_err_data; - } - - if (block_obj->hw_ops->query_ras_error_count) - block_obj->hw_ops->query_ras_error_count(adev, &err_data); + if (!amdgpu_ras_get_error_query_mode(adev, &error_query_mode)) + return -EINVAL; - if ((info->head.block == AMDGPU_RAS_BLOCK__SDMA) || - (info->head.block == AMDGPU_RAS_BLOCK__GFX) || - (info->head.block == AMDGPU_RAS_BLOCK__MMHUB)) { - if (block_obj->hw_ops->query_ras_error_status) - block_obj->hw_ops->query_ras_error_status(adev); - } - } + memset(&qctx, 0, sizeof(qctx)); + qctx.event_id = amdgpu_ras_acquire_event_id(adev, amdgpu_ras_intr_triggered() ? + RAS_EVENT_TYPE_ISR : RAS_EVENT_TYPE_INVALID); + ret = amdgpu_ras_query_error_status_helper(adev, info, + &err_data, + &qctx, + error_query_mode); + if (ret) + goto out_fini_err_data; - obj->err_data.ue_count += err_data.ue_count; - obj->err_data.ce_count += err_data.ce_count; + amdgpu_rasmgr_error_data_statistic_update(obj, &err_data); info->ue_count = obj->err_data.ue_count; info->ce_count = obj->err_data.ce_count; + info->de_count = obj->err_data.de_count; - amdgpu_ras_error_generate_report(adev, info, &err_data); + amdgpu_ras_error_generate_report(adev, info, &err_data, &qctx); out_fini_err_data: amdgpu_ras_error_data_fini(&err_data); @@ -1170,23 +1399,53 @@ out_fini_err_data: return ret; } -int amdgpu_ras_reset_error_status(struct amdgpu_device *adev, +int amdgpu_ras_reset_error_count(struct amdgpu_device *adev, enum amdgpu_ras_block block) { struct amdgpu_ras_block_object *block_obj = amdgpu_ras_get_ras_block(adev, block, 0); + struct amdgpu_ras *ras = amdgpu_ras_get_context(adev); + const struct amdgpu_mca_smu_funcs *mca_funcs = adev->mca.mca_funcs; + const struct aca_smu_funcs *smu_funcs = adev->aca.smu_funcs; + struct amdgpu_hive_info *hive; + int hive_ras_recovery = 0; if (!block_obj || !block_obj->hw_ops) { dev_dbg_once(adev->dev, "%s doesn't config RAS function\n", - ras_block_str(block)); - return 0; + ras_block_str(block)); + return -EOPNOTSUPP; } - if (!amdgpu_ras_is_supported(adev, block)) - return 0; + if (!amdgpu_ras_is_supported(adev, block) || + !amdgpu_ras_get_aca_debug_mode(adev)) + return -EOPNOTSUPP; + + hive = amdgpu_get_xgmi_hive(adev); + if (hive) { + hive_ras_recovery = atomic_read(&hive->ras_recovery); + amdgpu_put_xgmi_hive(hive); + } + + /* skip ras error reset in gpu reset */ + if ((amdgpu_in_reset(adev) || atomic_read(&ras->in_recovery) || + hive_ras_recovery) && + ((smu_funcs && smu_funcs->set_debug_mode) || + (mca_funcs && mca_funcs->mca_set_debug_mode))) + return -EOPNOTSUPP; if (block_obj->hw_ops->reset_ras_error_count) block_obj->hw_ops->reset_ras_error_count(adev); + return 0; +} + +int amdgpu_ras_reset_error_status(struct amdgpu_device *adev, + enum amdgpu_ras_block block) +{ + struct amdgpu_ras_block_object *block_obj = amdgpu_ras_get_ras_block(adev, block, 0); + + if (amdgpu_ras_reset_error_count(adev, block) == -EOPNOTSUPP) + return 0; + if ((block == AMDGPU_RAS_BLOCK__GFX) || (block == AMDGPU_RAS_BLOCK__MMHUB)) { if (block_obj->hw_ops->reset_ras_error_status) @@ -1463,7 +1722,8 @@ static void amdgpu_ras_sysfs_remove_bad_page_node(struct amdgpu_device *adev) { struct amdgpu_ras *con = amdgpu_ras_get_context(adev); - sysfs_remove_file_from_group(&adev->dev->kobj, + if (adev->dev->kobj.sd) + sysfs_remove_file_from_group(&adev->dev->kobj, &con->badpages_attr.attr, RAS_FS_NAME); } @@ -1482,7 +1742,8 @@ static int amdgpu_ras_sysfs_remove_dev_attr_node(struct amdgpu_device *adev) .attrs = attrs, }; - sysfs_remove_group(&adev->dev->kobj, &group); + if (adev->dev->kobj.sd) + sysfs_remove_group(&adev->dev->kobj, &group); return 0; } @@ -1529,7 +1790,8 @@ int amdgpu_ras_sysfs_remove(struct amdgpu_device *adev, if (!obj || !obj->attr_inuse) return -EINVAL; - sysfs_remove_file_from_group(&adev->dev->kobj, + if (adev->dev->kobj.sd) + sysfs_remove_file_from_group(&adev->dev->kobj, &obj->sysfs_attr.attr, RAS_FS_NAME); obj->attr_inuse = 0; @@ -1663,7 +1925,10 @@ void amdgpu_ras_debugfs_create_all(struct amdgpu_device *adev) } } - amdgpu_mca_smu_debugfs_init(adev, dir); + if (amdgpu_aca_is_enabled(adev)) + amdgpu_aca_smu_debugfs_init(adev, dir); + else + amdgpu_mca_smu_debugfs_init(adev, dir); } /* debugfs end */ @@ -1791,7 +2056,7 @@ static void amdgpu_ras_interrupt_poison_consumption_handler(struct ras_manager * } } - amdgpu_umc_poison_handler(adev, false); + amdgpu_umc_poison_handler(adev, obj->head.block, 0); if (block_obj->hw_ops && block_obj->hw_ops->handle_poison_consumption) poison_stat = block_obj->hw_ops->handle_poison_consumption(adev); @@ -1810,7 +2075,7 @@ static void amdgpu_ras_interrupt_poison_creation_handler(struct ras_manager *obj struct amdgpu_iv_entry *entry) { dev_info(obj->adev->dev, - "Poison is created, no user action is needed.\n"); + "Poison is created\n"); } static void amdgpu_ras_interrupt_umc_handler(struct ras_manager *obj, @@ -1842,6 +2107,7 @@ static void amdgpu_ras_interrupt_umc_handler(struct ras_manager *obj, */ obj->err_data.ue_count += err_data.ue_count; obj->err_data.ce_count += err_data.ce_count; + obj->err_data.de_count += err_data.de_count; } amdgpu_ras_error_data_fini(&err_data); @@ -2133,6 +2399,19 @@ out: return ret; } +static void amdgpu_ras_set_fed_all(struct amdgpu_device *adev, + struct amdgpu_hive_info *hive, bool status) +{ + struct amdgpu_device *tmp_adev; + + if (hive) { + list_for_each_entry(tmp_adev, &hive->device_list, gmc.xgmi.head) + amdgpu_ras_set_fed(tmp_adev, status); + } else { + amdgpu_ras_set_fed(adev, status); + } +} + static void amdgpu_ras_do_recovery(struct work_struct *work) { struct amdgpu_ras *ras = @@ -2140,9 +2419,24 @@ static void amdgpu_ras_do_recovery(struct work_struct *work) struct amdgpu_device *remote_adev = NULL; struct amdgpu_device *adev = ras->adev; struct list_head device_list, *device_list_handle = NULL; + struct amdgpu_hive_info *hive = amdgpu_get_xgmi_hive(adev); + + if (hive) { + atomic_set(&hive->ras_recovery, 1); + /* If any device which is part of the hive received RAS fatal + * error interrupt, set fatal error status on all. This + * condition will need a recovery, and flag will be cleared + * as part of recovery. + */ + list_for_each_entry(remote_adev, &hive->device_list, + gmc.xgmi.head) + if (amdgpu_ras_get_fed_status(remote_adev)) { + amdgpu_ras_set_fed_all(adev, hive, true); + break; + } + } if (!ras->disable_ras_err_cnt_harvest) { - struct amdgpu_hive_info *hive = amdgpu_get_xgmi_hive(adev); /* Build list of devices to query RAS related errors */ if (hive && adev->gmc.xgmi.num_physical_nodes > 1) { @@ -2159,7 +2453,6 @@ static void amdgpu_ras_do_recovery(struct work_struct *work) amdgpu_ras_log_on_err_counter(remote_adev); } - amdgpu_put_xgmi_hive(hive); } if (amdgpu_device_should_recover_gpu(ras->adev)) { @@ -2194,6 +2487,10 @@ static void amdgpu_ras_do_recovery(struct work_struct *work) amdgpu_device_gpu_recover(ras->adev, NULL, &reset_context); } atomic_set(&ras->in_recovery, 0); + if (hive) { + atomic_set(&hive->ras_recovery, 0); + amdgpu_put_xgmi_hive(hive); + } } /* alloc/realloc bps array */ @@ -2406,6 +2703,32 @@ static void amdgpu_ras_validate_threshold(struct amdgpu_device *adev, } } +static int amdgpu_ras_page_retirement_thread(void *param) +{ + struct amdgpu_device *adev = (struct amdgpu_device *)param; + struct amdgpu_ras *con = amdgpu_ras_get_context(adev); + + while (!kthread_should_stop()) { + + wait_event_interruptible(con->page_retirement_wq, + kthread_should_stop() || + atomic_read(&con->page_retirement_req_cnt)); + + if (kthread_should_stop()) + break; + + dev_info(adev->dev, "Start processing page retirement. request:%d\n", + atomic_read(&con->page_retirement_req_cnt)); + + atomic_dec(&con->page_retirement_req_cnt); + + amdgpu_umc_bad_page_polling_timeout(adev, + 0, MAX_UMC_POISON_POLLING_TIME_ASYNC); + } + + return 0; +} + int amdgpu_ras_recovery_init(struct amdgpu_device *adev) { struct amdgpu_ras *con = amdgpu_ras_get_context(adev); @@ -2428,7 +2751,7 @@ int amdgpu_ras_recovery_init(struct amdgpu_device *adev) return 0; data = &con->eh_data; - *data = kmalloc(sizeof(**data), GFP_KERNEL | __GFP_ZERO); + *data = kzalloc(sizeof(**data), GFP_KERNEL); if (!*data) { ret = -ENOMEM; goto out; @@ -2469,6 +2792,16 @@ int amdgpu_ras_recovery_init(struct amdgpu_device *adev) } } + mutex_init(&con->page_retirement_lock); + init_waitqueue_head(&con->page_retirement_wq); + atomic_set(&con->page_retirement_req_cnt, 0); + con->page_retirement_thread = + kthread_run(amdgpu_ras_page_retirement_thread, adev, "umc_page_retirement"); + if (IS_ERR(con->page_retirement_thread)) { + con->page_retirement_thread = NULL; + dev_warn(adev->dev, "Failed to create umc_page_retirement thread!!!\n"); + } + #ifdef CONFIG_X86_MCE_AMD if ((adev->asic_type == CHIP_ALDEBARAN) && (adev->gmc.xgmi.connected_to_cpu)) @@ -2504,6 +2837,11 @@ static int amdgpu_ras_recovery_fini(struct amdgpu_device *adev) if (!data) return 0; + if (con->page_retirement_thread) + kthread_stop(con->page_retirement_thread); + + atomic_set(&con->page_retirement_req_cnt, 0); + cancel_work_sync(&con->recovery_work); mutex_lock(&con->recovery_lock); @@ -2565,6 +2903,87 @@ static void amdgpu_ras_get_quirks(struct amdgpu_device *adev) adev->ras_hw_enabled |= (1 << AMDGPU_RAS_BLOCK__GFX); } +/* Query ras capablity via atomfirmware interface */ +static void amdgpu_ras_query_ras_capablity_from_vbios(struct amdgpu_device *adev) +{ + /* mem_ecc cap */ + if (amdgpu_atomfirmware_mem_ecc_supported(adev)) { + dev_info(adev->dev, "MEM ECC is active.\n"); + adev->ras_hw_enabled |= (1 << AMDGPU_RAS_BLOCK__UMC | + 1 << AMDGPU_RAS_BLOCK__DF); + } else { + dev_info(adev->dev, "MEM ECC is not presented.\n"); + } + + /* sram_ecc cap */ + if (amdgpu_atomfirmware_sram_ecc_supported(adev)) { + dev_info(adev->dev, "SRAM ECC is active.\n"); + if (!amdgpu_sriov_vf(adev)) + adev->ras_hw_enabled |= ~(1 << AMDGPU_RAS_BLOCK__UMC | + 1 << AMDGPU_RAS_BLOCK__DF); + else + adev->ras_hw_enabled |= (1 << AMDGPU_RAS_BLOCK__PCIE_BIF | + 1 << AMDGPU_RAS_BLOCK__SDMA | + 1 << AMDGPU_RAS_BLOCK__GFX); + + /* + * VCN/JPEG RAS can be supported on both bare metal and + * SRIOV environment + */ + if (amdgpu_ip_version(adev, VCN_HWIP, 0) == IP_VERSION(2, 6, 0) || + amdgpu_ip_version(adev, VCN_HWIP, 0) == IP_VERSION(4, 0, 0) || + amdgpu_ip_version(adev, VCN_HWIP, 0) == IP_VERSION(4, 0, 3)) + adev->ras_hw_enabled |= (1 << AMDGPU_RAS_BLOCK__VCN | + 1 << AMDGPU_RAS_BLOCK__JPEG); + else + adev->ras_hw_enabled &= ~(1 << AMDGPU_RAS_BLOCK__VCN | + 1 << AMDGPU_RAS_BLOCK__JPEG); + + /* + * XGMI RAS is not supported if xgmi num physical nodes + * is zero + */ + if (!adev->gmc.xgmi.num_physical_nodes) + adev->ras_hw_enabled &= ~(1 << AMDGPU_RAS_BLOCK__XGMI_WAFL); + } else { + dev_info(adev->dev, "SRAM ECC is not presented.\n"); + } +} + +/* Query poison mode from umc/df IP callbacks */ +static void amdgpu_ras_query_poison_mode(struct amdgpu_device *adev) +{ + struct amdgpu_ras *con = amdgpu_ras_get_context(adev); + bool df_poison, umc_poison; + + /* poison setting is useless on SRIOV guest */ + if (amdgpu_sriov_vf(adev) || !con) + return; + + /* Init poison supported flag, the default value is false */ + if (adev->gmc.xgmi.connected_to_cpu || + adev->gmc.is_app_apu) { + /* enabled by default when GPU is connected to CPU */ + con->poison_supported = true; + } else if (adev->df.funcs && + adev->df.funcs->query_ras_poison_mode && + adev->umc.ras && + adev->umc.ras->query_ras_poison_mode) { + df_poison = + adev->df.funcs->query_ras_poison_mode(adev); + umc_poison = + adev->umc.ras->query_ras_poison_mode(adev); + + /* Only poison is set in both DF and UMC, we can support it */ + if (df_poison && umc_poison) + con->poison_supported = true; + else if (df_poison != umc_poison) + dev_warn(adev->dev, + "Poison setting is inconsistent in DF/UMC(%d:%d)!\n", + df_poison, umc_poison); + } +} + /* * check hardware's ras ability which will be saved in hw_supported. * if hardware does not support ras, we can skip some ras initializtion and @@ -2581,47 +3000,13 @@ static void amdgpu_ras_check_supported(struct amdgpu_device *adev) if (!amdgpu_ras_asic_supported(adev)) return; - if (!adev->gmc.xgmi.connected_to_cpu && !adev->gmc.is_app_apu) { - if (amdgpu_atomfirmware_mem_ecc_supported(adev)) { - dev_info(adev->dev, "MEM ECC is active.\n"); - adev->ras_hw_enabled |= (1 << AMDGPU_RAS_BLOCK__UMC | - 1 << AMDGPU_RAS_BLOCK__DF); - } else { - dev_info(adev->dev, "MEM ECC is not presented.\n"); - } - - if (amdgpu_atomfirmware_sram_ecc_supported(adev)) { - dev_info(adev->dev, "SRAM ECC is active.\n"); - if (!amdgpu_sriov_vf(adev)) - adev->ras_hw_enabled |= ~(1 << AMDGPU_RAS_BLOCK__UMC | - 1 << AMDGPU_RAS_BLOCK__DF); - else - adev->ras_hw_enabled |= (1 << AMDGPU_RAS_BLOCK__PCIE_BIF | - 1 << AMDGPU_RAS_BLOCK__SDMA | - 1 << AMDGPU_RAS_BLOCK__GFX); + /* query ras capability from psp */ + if (amdgpu_psp_get_ras_capability(&adev->psp)) + goto init_ras_enabled_flag; - /* VCN/JPEG RAS can be supported on both bare metal and - * SRIOV environment - */ - if (amdgpu_ip_version(adev, VCN_HWIP, 0) == - IP_VERSION(2, 6, 0) || - amdgpu_ip_version(adev, VCN_HWIP, 0) == - IP_VERSION(4, 0, 0)) - adev->ras_hw_enabled |= (1 << AMDGPU_RAS_BLOCK__VCN | - 1 << AMDGPU_RAS_BLOCK__JPEG); - else - adev->ras_hw_enabled &= ~(1 << AMDGPU_RAS_BLOCK__VCN | - 1 << AMDGPU_RAS_BLOCK__JPEG); - - /* - * XGMI RAS is not supported if xgmi num physical nodes - * is zero - */ - if (!adev->gmc.xgmi.num_physical_nodes) - adev->ras_hw_enabled &= ~(1 << AMDGPU_RAS_BLOCK__XGMI_WAFL); - } else { - dev_info(adev->dev, "SRAM ECC is not presented.\n"); - } + /* query ras capablity from bios */ + if (!adev->gmc.xgmi.connected_to_cpu && !adev->gmc.is_app_apu) { + amdgpu_ras_query_ras_capablity_from_vbios(adev); } else { /* driver only manages a few IP blocks RAS feature * when GPU is connected cpu through XGMI */ @@ -2630,23 +3015,21 @@ static void amdgpu_ras_check_supported(struct amdgpu_device *adev) 1 << AMDGPU_RAS_BLOCK__MMHUB); } + /* apply asic specific settings (vega20 only for now) */ amdgpu_ras_get_quirks(adev); + /* query poison mode from umc/df ip callback */ + amdgpu_ras_query_poison_mode(adev); + +init_ras_enabled_flag: /* hw_supported needs to be aligned with RAS block mask. */ adev->ras_hw_enabled &= AMDGPU_RAS_BLOCK_MASK; + adev->ras_enabled = amdgpu_ras_enable == 0 ? 0 : + adev->ras_hw_enabled & amdgpu_ras_mask; - /* - * Disable ras feature for aqua vanjaram - * by default on apu platform. - */ - if (amdgpu_ip_version(adev, MP0_HWIP, 0) == IP_VERSION(13, 0, 6) && - adev->gmc.is_app_apu) - adev->ras_enabled = amdgpu_ras_enable != 1 ? 0 : - adev->ras_hw_enabled & amdgpu_ras_mask; - else - adev->ras_enabled = amdgpu_ras_enable == 0 ? 0 : - adev->ras_hw_enabled & amdgpu_ras_mask; + /* aca is disabled by default */ + adev->aca.is_enabled = false; } static void amdgpu_ras_counte_dw(struct work_struct *work) @@ -2674,44 +3057,41 @@ Out: pm_runtime_put_autosuspend(dev->dev); } -static void amdgpu_ras_query_poison_mode(struct amdgpu_device *adev) +static int amdgpu_get_ras_schema(struct amdgpu_device *adev) { - struct amdgpu_ras *con = amdgpu_ras_get_context(adev); - bool df_poison, umc_poison; + return amdgpu_ras_is_poison_mode_supported(adev) ? AMDGPU_RAS_ERROR__POISON : 0 | + AMDGPU_RAS_ERROR__SINGLE_CORRECTABLE | + AMDGPU_RAS_ERROR__MULTI_UNCORRECTABLE | + AMDGPU_RAS_ERROR__PARITY; +} - /* poison setting is useless on SRIOV guest */ - if (amdgpu_sriov_vf(adev) || !con) +static void ras_event_mgr_init(struct ras_event_manager *mgr) +{ + int i; + + for (i = 0; i < ARRAY_SIZE(mgr->seqnos); i++) + atomic64_set(&mgr->seqnos[i], 0); +} + +static void amdgpu_ras_event_mgr_init(struct amdgpu_device *adev) +{ + struct amdgpu_ras *ras = amdgpu_ras_get_context(adev); + struct amdgpu_hive_info *hive; + + if (!ras) return; - /* Init poison supported flag, the default value is false */ - if (adev->gmc.xgmi.connected_to_cpu) { - /* enabled by default when GPU is connected to CPU */ - con->poison_supported = true; - } else if (adev->df.funcs && - adev->df.funcs->query_ras_poison_mode && - adev->umc.ras && - adev->umc.ras->query_ras_poison_mode) { - df_poison = - adev->df.funcs->query_ras_poison_mode(adev); - umc_poison = - adev->umc.ras->query_ras_poison_mode(adev); + hive = amdgpu_get_xgmi_hive(adev); + ras->event_mgr = hive ? &hive->event_mgr : &ras->__event_mgr; - /* Only poison is set in both DF and UMC, we can support it */ - if (df_poison && umc_poison) - con->poison_supported = true; - else if (df_poison != umc_poison) - dev_warn(adev->dev, - "Poison setting is inconsistent in DF/UMC(%d:%d)!\n", - df_poison, umc_poison); + /* init event manager with node 0 on xgmi system */ + if (!amdgpu_in_reset(adev)) { + if (!hive || adev->gmc.xgmi.node_id == 0) + ras_event_mgr_init(ras->event_mgr); } -} -static int amdgpu_get_ras_schema(struct amdgpu_device *adev) -{ - return amdgpu_ras_is_poison_mode_supported(adev) ? AMDGPU_RAS_ERROR__POISON : 0 | - AMDGPU_RAS_ERROR__SINGLE_CORRECTABLE | - AMDGPU_RAS_ERROR__MULTI_UNCORRECTABLE | - AMDGPU_RAS_ERROR__PARITY; + if (hive) + amdgpu_put_xgmi_hive(hive); } int amdgpu_ras_init(struct amdgpu_device *adev) @@ -2722,10 +3102,10 @@ int amdgpu_ras_init(struct amdgpu_device *adev) if (con) return 0; - con = kmalloc(sizeof(struct amdgpu_ras) + + con = kzalloc(sizeof(*con) + sizeof(struct ras_manager) * AMDGPU_RAS_BLOCK_COUNT + sizeof(struct ras_manager) * AMDGPU_RAS_MCA_BLOCK_COUNT, - GFP_KERNEL|__GFP_ZERO); + GFP_KERNEL); if (!con) return -ENOMEM; @@ -2810,7 +3190,11 @@ int amdgpu_ras_init(struct amdgpu_device *adev) goto release_con; } - amdgpu_ras_query_poison_mode(adev); + /* Packed socket_id to ras feature mask bits[31:29] */ + if (adev->smuio.funcs && + adev->smuio.funcs->get_socket_id) + con->features |= ((adev->smuio.funcs->get_socket_id(adev)) << + AMDGPU_RAS_FEATURES_SOCKETID_SHIFT); /* Get RAS schema for particular SOC */ con->schema = amdgpu_get_ras_schema(adev); @@ -3016,7 +3400,7 @@ void amdgpu_ras_suspend(struct amdgpu_device *adev) amdgpu_ras_disable_all_features(adev, 0); /* Make sure all ras objects are disabled. */ - if (con->features) + if (AMDGPU_RAS_GET_FEATURES(con->features)) amdgpu_ras_disable_all_features(adev, 1); } @@ -3030,13 +3414,31 @@ int amdgpu_ras_late_init(struct amdgpu_device *adev) if (amdgpu_sriov_vf(adev)) return 0; + amdgpu_ras_event_mgr_init(adev); + + if (amdgpu_aca_is_enabled(adev)) { + if (amdgpu_in_reset(adev)) + r = amdgpu_aca_reset(adev); + else + r = amdgpu_aca_init(adev); + if (r) + return r; + + amdgpu_ras_set_aca_debug_mode(adev, false); + } else { + amdgpu_ras_set_mca_debug_mode(adev, false); + } + list_for_each_entry_safe(node, tmp, &adev->ras_list, node) { - if (!node->ras_obj) { + obj = node->ras_obj; + if (!obj) { dev_warn(adev->dev, "Warning: abnormal ras list node.\n"); continue; } - obj = node->ras_obj; + if (!amdgpu_ras_is_supported(adev, obj->ras_comm.block)) + continue; + if (obj->ras_late_init) { r = obj->ras_late_init(adev, &obj->ras_comm); if (r) { @@ -3061,7 +3463,7 @@ int amdgpu_ras_pre_fini(struct amdgpu_device *adev) /* Need disable ras on all IPs here before ip [hw/sw]fini */ - if (con->features) + if (AMDGPU_RAS_GET_FEATURES(con->features)) amdgpu_ras_disable_all_features(adev, 0); amdgpu_ras_recovery_fini(adev); return 0; @@ -3094,10 +3496,13 @@ int amdgpu_ras_fini(struct amdgpu_device *adev) amdgpu_ras_fs_fini(adev); amdgpu_ras_interrupt_remove_all(adev); - WARN(con->features, "Feature mask is not cleared"); + if (amdgpu_aca_is_enabled(adev)) + amdgpu_aca_fini(adev); - if (con->features) - amdgpu_ras_disable_all_features(adev, 1); + WARN(AMDGPU_RAS_GET_FEATURES(con->features), "Feature mask is not cleared"); + + if (AMDGPU_RAS_GET_FEATURES(con->features)) + amdgpu_ras_disable_all_features(adev, 0); cancel_delayed_work_sync(&con->ras_counte_delay_work); @@ -3107,14 +3512,59 @@ int amdgpu_ras_fini(struct amdgpu_device *adev) return 0; } +bool amdgpu_ras_get_fed_status(struct amdgpu_device *adev) +{ + struct amdgpu_ras *ras; + + ras = amdgpu_ras_get_context(adev); + if (!ras) + return false; + + return atomic_read(&ras->fed); +} + +void amdgpu_ras_set_fed(struct amdgpu_device *adev, bool status) +{ + struct amdgpu_ras *ras; + + ras = amdgpu_ras_get_context(adev); + if (ras) + atomic_set(&ras->fed, !!status); +} + +bool amdgpu_ras_event_id_is_valid(struct amdgpu_device *adev, u64 id) +{ + return !(id & BIT_ULL(63)); +} + +u64 amdgpu_ras_acquire_event_id(struct amdgpu_device *adev, enum ras_event_type type) +{ + struct amdgpu_ras *ras = amdgpu_ras_get_context(adev); + u64 id; + + switch (type) { + case RAS_EVENT_TYPE_ISR: + id = (u64)atomic64_read(&ras->event_mgr->seqnos[type]); + break; + case RAS_EVENT_TYPE_INVALID: + default: + id = BIT_ULL(63) | 0ULL; + break; + } + + return id; +} + void amdgpu_ras_global_ras_isr(struct amdgpu_device *adev) { if (atomic_cmpxchg(&amdgpu_ras_in_intr, 0, 1) == 0) { struct amdgpu_ras *ras = amdgpu_ras_get_context(adev); + u64 event_id = (u64)atomic64_inc_return(&ras->event_mgr->seqnos[RAS_EVENT_TYPE_ISR]); - dev_info(adev->dev, "uncorrectable hardware error" - "(ERREVENT_ATHUB_INTERRUPT) detected!\n"); + RAS_EVENT_LOG(adev, event_id, "uncorrectable hardware error" + "(ERREVENT_ATHUB_INTERRUPT) detected!\n"); + amdgpu_ras_set_fed(adev, true); ras->gpu_reset_flags |= AMDGPU_RAS_GPU_RESET_MODE1_RESET; amdgpu_ras_reset_gpu(adev); } @@ -3287,6 +3737,7 @@ int amdgpu_ras_is_supported(struct amdgpu_device *adev, block == AMDGPU_RAS_BLOCK__SDMA || block == AMDGPU_RAS_BLOCK__VCN || block == AMDGPU_RAS_BLOCK__JPEG) && + (amdgpu_ras_mask & (1 << block)) && amdgpu_ras_is_poison_mode_supported(adev) && amdgpu_ras_get_ras_block(adev, block, 0)) ret = 1; @@ -3303,6 +3754,73 @@ int amdgpu_ras_reset_gpu(struct amdgpu_device *adev) return 0; } +int amdgpu_ras_set_mca_debug_mode(struct amdgpu_device *adev, bool enable) +{ + struct amdgpu_ras *con = amdgpu_ras_get_context(adev); + int ret = 0; + + if (con) { + ret = amdgpu_mca_smu_set_debug_mode(adev, enable); + if (!ret) + con->is_aca_debug_mode = enable; + } + + return ret; +} + +int amdgpu_ras_set_aca_debug_mode(struct amdgpu_device *adev, bool enable) +{ + struct amdgpu_ras *con = amdgpu_ras_get_context(adev); + int ret = 0; + + if (con) { + if (amdgpu_aca_is_enabled(adev)) + ret = amdgpu_aca_smu_set_debug_mode(adev, enable); + else + ret = amdgpu_mca_smu_set_debug_mode(adev, enable); + if (!ret) + con->is_aca_debug_mode = enable; + } + + return ret; +} + +bool amdgpu_ras_get_aca_debug_mode(struct amdgpu_device *adev) +{ + struct amdgpu_ras *con = amdgpu_ras_get_context(adev); + const struct aca_smu_funcs *smu_funcs = adev->aca.smu_funcs; + const struct amdgpu_mca_smu_funcs *mca_funcs = adev->mca.mca_funcs; + + if (!con) + return false; + + if ((amdgpu_aca_is_enabled(adev) && smu_funcs && smu_funcs->set_debug_mode) || + (!amdgpu_aca_is_enabled(adev) && mca_funcs && mca_funcs->mca_set_debug_mode)) + return con->is_aca_debug_mode; + else + return true; +} + +bool amdgpu_ras_get_error_query_mode(struct amdgpu_device *adev, + unsigned int *error_query_mode) +{ + struct amdgpu_ras *con = amdgpu_ras_get_context(adev); + const struct amdgpu_mca_smu_funcs *mca_funcs = adev->mca.mca_funcs; + const struct aca_smu_funcs *smu_funcs = adev->aca.smu_funcs; + + if (!con) { + *error_query_mode = AMDGPU_RAS_INVALID_ERROR_QUERY; + return false; + } + + if ((smu_funcs && smu_funcs->set_debug_mode) || (mca_funcs && mca_funcs->mca_set_debug_mode)) + *error_query_mode = + (con->is_aca_debug_mode) ? AMDGPU_RAS_DIRECT_ERROR_QUERY : AMDGPU_RAS_FIRMWARE_ERROR_QUERY; + else + *error_query_mode = AMDGPU_RAS_DIRECT_ERROR_QUERY; + + return true; +} /* Register each ip ras block into amdgpu ras */ int amdgpu_ras_register_ras_block(struct amdgpu_device *adev, @@ -3485,10 +4003,8 @@ void amdgpu_ras_error_data_fini(struct ras_err_data *err_data) { struct ras_err_node *err_node, *tmp; - list_for_each_entry_safe(err_node, tmp, &err_data->err_node_list, node) { + list_for_each_entry_safe(err_node, tmp, &err_data->err_node_list, node) amdgpu_ras_error_node_release(err_node); - list_del(&err_node->node); - } } static struct ras_err_node *amdgpu_ras_error_find_node_by_id(struct ras_err_data *err_data, @@ -3502,11 +4018,10 @@ static struct ras_err_node *amdgpu_ras_error_find_node_by_id(struct ras_err_data for_each_ras_error(err_node, err_data) { ref_id = &err_node->err_info.mcm_info; - if ((mcm_info->socket_id >= 0 && mcm_info->socket_id != ref_id->socket_id) || - (mcm_info->die_id >= 0 && mcm_info->die_id != ref_id->die_id)) - continue; - return err_node; + if (mcm_info->socket_id == ref_id->socket_id && + mcm_info->die_id == ref_id->die_id) + return err_node; } return NULL; @@ -3525,8 +4040,23 @@ static struct ras_err_node *amdgpu_ras_error_node_new(void) return err_node; } +static int ras_err_info_cmp(void *priv, const struct list_head *a, const struct list_head *b) +{ + struct ras_err_node *nodea = container_of(a, struct ras_err_node, node); + struct ras_err_node *nodeb = container_of(b, struct ras_err_node, node); + struct amdgpu_smuio_mcm_config_info *infoa = &nodea->err_info.mcm_info; + struct amdgpu_smuio_mcm_config_info *infob = &nodeb->err_info.mcm_info; + + if (unlikely(infoa->socket_id != infob->socket_id)) + return infoa->socket_id - infob->socket_id; + else + return infoa->die_id - infob->die_id; + + return 0; +} + static struct ras_err_info *amdgpu_ras_error_get_info(struct ras_err_data *err_data, - struct amdgpu_smuio_mcm_config_info *mcm_info) + struct amdgpu_smuio_mcm_config_info *mcm_info) { struct ras_err_node *err_node; @@ -3538,16 +4068,43 @@ static struct ras_err_info *amdgpu_ras_error_get_info(struct ras_err_data *err_d if (!err_node) return NULL; + INIT_LIST_HEAD(&err_node->err_info.err_addr_list); + memcpy(&err_node->err_info.mcm_info, mcm_info, sizeof(*mcm_info)); err_data->err_list_count++; list_add_tail(&err_node->node, &err_data->err_node_list); + list_sort(NULL, &err_data->err_node_list, ras_err_info_cmp); return &err_node->err_info; } +void amdgpu_ras_add_mca_err_addr(struct ras_err_info *err_info, struct ras_err_addr *err_addr) +{ + struct ras_err_addr *mca_err_addr; + + mca_err_addr = kzalloc(sizeof(*mca_err_addr), GFP_KERNEL); + if (!mca_err_addr) + return; + + INIT_LIST_HEAD(&mca_err_addr->node); + + mca_err_addr->err_status = err_addr->err_status; + mca_err_addr->err_ipid = err_addr->err_ipid; + mca_err_addr->err_addr = err_addr->err_addr; + + list_add_tail(&mca_err_addr->node, &err_info->err_addr_list); +} + +void amdgpu_ras_del_mca_err_addr(struct ras_err_info *err_info, struct ras_err_addr *mca_err_addr) +{ + list_del(&mca_err_addr->node); + kfree(mca_err_addr); +} + int amdgpu_ras_error_statistic_ue_count(struct ras_err_data *err_data, - struct amdgpu_smuio_mcm_config_info *mcm_info, u64 count) + struct amdgpu_smuio_mcm_config_info *mcm_info, + struct ras_err_addr *err_addr, u64 count) { struct ras_err_info *err_info; @@ -3561,6 +4118,9 @@ int amdgpu_ras_error_statistic_ue_count(struct ras_err_data *err_data, if (!err_info) return -EINVAL; + if (err_addr && err_addr->err_status) + amdgpu_ras_add_mca_err_addr(err_info, err_addr); + err_info->ue_count += count; err_data->ue_count += count; @@ -3568,7 +4128,8 @@ int amdgpu_ras_error_statistic_ue_count(struct ras_err_data *err_data, } int amdgpu_ras_error_statistic_ce_count(struct ras_err_data *err_data, - struct amdgpu_smuio_mcm_config_info *mcm_info, u64 count) + struct amdgpu_smuio_mcm_config_info *mcm_info, + struct ras_err_addr *err_addr, u64 count) { struct ras_err_info *err_info; @@ -3587,3 +4148,135 @@ int amdgpu_ras_error_statistic_ce_count(struct ras_err_data *err_data, return 0; } + +int amdgpu_ras_error_statistic_de_count(struct ras_err_data *err_data, + struct amdgpu_smuio_mcm_config_info *mcm_info, + struct ras_err_addr *err_addr, u64 count) +{ + struct ras_err_info *err_info; + + if (!err_data || !mcm_info) + return -EINVAL; + + if (!count) + return 0; + + err_info = amdgpu_ras_error_get_info(err_data, mcm_info); + if (!err_info) + return -EINVAL; + + if (err_addr && err_addr->err_status) + amdgpu_ras_add_mca_err_addr(err_info, err_addr); + + err_info->de_count += count; + err_data->de_count += count; + + return 0; +} + +#define mmMP0_SMN_C2PMSG_92 0x1609C +#define mmMP0_SMN_C2PMSG_126 0x160BE +static void amdgpu_ras_boot_time_error_reporting(struct amdgpu_device *adev, + u32 instance, u32 boot_error) +{ + u32 socket_id, aid_id, hbm_id; + u32 reg_data; + u64 reg_addr; + + socket_id = AMDGPU_RAS_GPU_ERR_SOCKET_ID(boot_error); + aid_id = AMDGPU_RAS_GPU_ERR_AID_ID(boot_error); + hbm_id = AMDGPU_RAS_GPU_ERR_HBM_ID(boot_error); + + /* The pattern for smn addressing in other SOC could be different from + * the one for aqua_vanjaram. We should revisit the code if the pattern + * is changed. In such case, replace the aqua_vanjaram implementation + * with more common helper */ + reg_addr = (mmMP0_SMN_C2PMSG_92 << 2) + + aqua_vanjaram_encode_ext_smn_addressing(instance); + + reg_data = amdgpu_device_indirect_rreg_ext(adev, reg_addr); + dev_err(adev->dev, "socket: %d, aid: %d, firmware boot failed, fw status is 0x%x\n", + socket_id, aid_id, reg_data); + + if (AMDGPU_RAS_GPU_ERR_MEM_TRAINING(boot_error)) + dev_info(adev->dev, "socket: %d, aid: %d, hbm: %d, memory training failed\n", + socket_id, aid_id, hbm_id); + + if (AMDGPU_RAS_GPU_ERR_FW_LOAD(boot_error)) + dev_info(adev->dev, "socket: %d, aid: %d, firmware load failed at boot time\n", + socket_id, aid_id); + + if (AMDGPU_RAS_GPU_ERR_WAFL_LINK_TRAINING(boot_error)) + dev_info(adev->dev, "socket: %d, aid: %d, wafl link training failed\n", + socket_id, aid_id); + + if (AMDGPU_RAS_GPU_ERR_XGMI_LINK_TRAINING(boot_error)) + dev_info(adev->dev, "socket: %d, aid: %d, xgmi link training failed\n", + socket_id, aid_id); + + if (AMDGPU_RAS_GPU_ERR_USR_CP_LINK_TRAINING(boot_error)) + dev_info(adev->dev, "socket: %d, aid: %d, usr cp link training failed\n", + socket_id, aid_id); + + if (AMDGPU_RAS_GPU_ERR_USR_DP_LINK_TRAINING(boot_error)) + dev_info(adev->dev, "socket: %d, aid: %d, usr dp link training failed\n", + socket_id, aid_id); + + if (AMDGPU_RAS_GPU_ERR_HBM_MEM_TEST(boot_error)) + dev_info(adev->dev, "socket: %d, aid: %d, hbm: %d, hbm memory test failed\n", + socket_id, aid_id, hbm_id); + + if (AMDGPU_RAS_GPU_ERR_HBM_BIST_TEST(boot_error)) + dev_info(adev->dev, "socket: %d, aid: %d, hbm: %d, hbm bist test failed\n", + socket_id, aid_id, hbm_id); +} + +static int amdgpu_ras_wait_for_boot_complete(struct amdgpu_device *adev, + u32 instance, u32 *boot_error) +{ + u32 reg_addr; + u32 reg_data; + int retry_loop; + + reg_addr = (mmMP0_SMN_C2PMSG_92 << 2) + + aqua_vanjaram_encode_ext_smn_addressing(instance); + + for (retry_loop = 0; retry_loop < AMDGPU_RAS_BOOT_STATUS_POLLING_LIMIT; retry_loop++) { + reg_data = amdgpu_device_indirect_rreg_ext(adev, reg_addr); + if ((reg_data & AMDGPU_RAS_BOOT_STATUS_MASK) == AMDGPU_RAS_BOOT_STEADY_STATUS) { + *boot_error = AMDGPU_RAS_BOOT_SUCEESS; + return 0; + } + msleep(1); + } + + /* The pattern for smn addressing in other SOC could be different from + * the one for aqua_vanjaram. We should revisit the code if the pattern + * is changed. In such case, replace the aqua_vanjaram implementation + * with more common helper */ + reg_addr = (mmMP0_SMN_C2PMSG_126 << 2) + + aqua_vanjaram_encode_ext_smn_addressing(instance); + + for (retry_loop = 0; retry_loop < AMDGPU_RAS_BOOT_STATUS_POLLING_LIMIT; retry_loop++) { + reg_data = amdgpu_device_indirect_rreg_ext(adev, reg_addr); + if (AMDGPU_RAS_GPU_ERR_BOOT_STATUS(reg_data)) { + *boot_error = reg_data; + return 0; + } + msleep(1); + } + + *boot_error = reg_data; + return -ETIME; +} + +void amdgpu_ras_query_boot_status(struct amdgpu_device *adev, u32 num_instances) +{ + u32 boot_error = 0; + u32 i; + + for (i = 0; i < num_instances; i++) { + if (amdgpu_ras_wait_for_boot_complete(adev, i, &boot_error)) + amdgpu_ras_boot_time_error_reporting(adev, i, boot_error); + } +} diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h index 0a5c8a107fb2..8d26989c75c8 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h @@ -29,9 +29,28 @@ #include "ta_ras_if.h" #include "amdgpu_ras_eeprom.h" #include "amdgpu_smuio.h" +#include "amdgpu_aca.h" struct amdgpu_iv_entry; +#define AMDGPU_RAS_GPU_ERR_MEM_TRAINING(x) AMDGPU_GET_REG_FIELD(x, 0, 0) +#define AMDGPU_RAS_GPU_ERR_FW_LOAD(x) AMDGPU_GET_REG_FIELD(x, 1, 1) +#define AMDGPU_RAS_GPU_ERR_WAFL_LINK_TRAINING(x) AMDGPU_GET_REG_FIELD(x, 2, 2) +#define AMDGPU_RAS_GPU_ERR_XGMI_LINK_TRAINING(x) AMDGPU_GET_REG_FIELD(x, 3, 3) +#define AMDGPU_RAS_GPU_ERR_USR_CP_LINK_TRAINING(x) AMDGPU_GET_REG_FIELD(x, 4, 4) +#define AMDGPU_RAS_GPU_ERR_USR_DP_LINK_TRAINING(x) AMDGPU_GET_REG_FIELD(x, 5, 5) +#define AMDGPU_RAS_GPU_ERR_HBM_MEM_TEST(x) AMDGPU_GET_REG_FIELD(x, 6, 6) +#define AMDGPU_RAS_GPU_ERR_HBM_BIST_TEST(x) AMDGPU_GET_REG_FIELD(x, 7, 7) +#define AMDGPU_RAS_GPU_ERR_SOCKET_ID(x) AMDGPU_GET_REG_FIELD(x, 10, 8) +#define AMDGPU_RAS_GPU_ERR_AID_ID(x) AMDGPU_GET_REG_FIELD(x, 12, 11) +#define AMDGPU_RAS_GPU_ERR_HBM_ID(x) AMDGPU_GET_REG_FIELD(x, 13, 13) +#define AMDGPU_RAS_GPU_ERR_BOOT_STATUS(x) AMDGPU_GET_REG_FIELD(x, 31, 31) + +#define AMDGPU_RAS_BOOT_STATUS_POLLING_LIMIT 1000 +#define AMDGPU_RAS_BOOT_STEADY_STATUS 0xBA +#define AMDGPU_RAS_BOOT_STATUS_MASK 0xFF +#define AMDGPU_RAS_BOOT_SUCEESS 0x80000000 + #define AMDGPU_RAS_FLAG_INIT_BY_VBIOS (0x1 << 0) /* position of instance value in sub_block_index of * ta_ras_trigger_error_input, the sub block uses lower 12 bits @@ -39,6 +58,20 @@ struct amdgpu_iv_entry; #define AMDGPU_RAS_INST_MASK 0xfffff000 #define AMDGPU_RAS_INST_SHIFT 0xc +#define AMDGPU_RAS_FEATURES_SOCKETID_SHIFT 29 +#define AMDGPU_RAS_FEATURES_SOCKETID_MASK 0xe0000000 + +/* The high three bits indicates socketid */ +#define AMDGPU_RAS_GET_FEATURES(val) ((val) & ~AMDGPU_RAS_FEATURES_SOCKETID_MASK) + +#define RAS_EVENT_LOG(_adev, _id, _fmt, ...) \ +do { \ + if (amdgpu_ras_event_id_is_valid((_adev), (_id))) \ + dev_info((_adev)->dev, "{%llu}" _fmt, (_id), ##__VA_ARGS__); \ + else \ + dev_info((_adev)->dev, _fmt, ##__VA_ARGS__); \ +} while (0) + enum amdgpu_ras_block { AMDGPU_RAS_BLOCK__UMC = 0, AMDGPU_RAS_BLOCK__SDMA, @@ -57,6 +90,8 @@ enum amdgpu_ras_block { AMDGPU_RAS_BLOCK__MCA, AMDGPU_RAS_BLOCK__VCN, AMDGPU_RAS_BLOCK__JPEG, + AMDGPU_RAS_BLOCK__IH, + AMDGPU_RAS_BLOCK__MPIO, AMDGPU_RAS_BLOCK__LAST }; @@ -320,6 +355,12 @@ enum amdgpu_ras_ret { AMDGPU_RAS_PT, }; +enum amdgpu_ras_error_query_mode { + AMDGPU_RAS_INVALID_ERROR_QUERY = 0, + AMDGPU_RAS_DIRECT_ERROR_QUERY = 1, + AMDGPU_RAS_FIRMWARE_ERROR_QUERY = 2, +}; + /* ras error status reisger fields */ #define ERR_STATUS_LO__ERR_STATUS_VALID_FLAG__SHIFT 0x0 #define ERR_STATUS_LO__ERR_STATUS_VALID_FLAG_MASK 0x00000001L @@ -386,6 +427,21 @@ struct umc_ecc_info { int record_ce_addr_supported; }; +enum ras_event_type { + RAS_EVENT_TYPE_INVALID = -1, + RAS_EVENT_TYPE_ISR = 0, + RAS_EVENT_TYPE_COUNT, +}; + +struct ras_event_manager { + atomic64_t seqnos[RAS_EVENT_TYPE_COUNT]; +}; + +struct ras_query_context { + enum ras_event_type type; + u64 event_id; +}; + struct amdgpu_ras { /* ras infrastructure */ /* for ras itself. */ @@ -434,9 +490,23 @@ struct amdgpu_ras { /* Indicates smu whether need update bad channel info */ bool update_channel_flag; + /* Record status of smu mca debug mode */ + bool is_aca_debug_mode; /* Record special requirements of gpu reset caller */ uint32_t gpu_reset_flags; + + struct task_struct *page_retirement_thread; + wait_queue_head_t page_retirement_wq; + struct mutex page_retirement_lock; + atomic_t page_retirement_req_cnt; + /* Fatal error detected flag */ + atomic_t fed; + + /* RAS event manager */ + struct ras_event_manager __event_mgr; + struct ras_event_manager *event_mgr; + }; struct ras_fs_data { @@ -444,10 +514,19 @@ struct ras_fs_data { char debugfs_name[32]; }; +struct ras_err_addr { + struct list_head node; + uint64_t err_status; + uint64_t err_ipid; + uint64_t err_addr; +}; + struct ras_err_info { struct amdgpu_smuio_mcm_config_info mcm_info; u64 ce_count; u64 ue_count; + u64 de_count; + struct list_head err_addr_list; }; struct ras_err_node { @@ -458,6 +537,7 @@ struct ras_err_node { struct ras_err_data { unsigned long ue_count; unsigned long ce_count; + unsigned long de_count; unsigned long err_addr_cnt; struct eeprom_table_record *err_addr; u32 err_list_count; @@ -513,10 +593,9 @@ struct ras_manager { /* IH data */ struct ras_ih_data ih_data; - struct { - unsigned long ue_count; - unsigned long ce_count; - } err_data; + struct ras_err_data err_data; + + struct aca_handle aca_handle; }; struct ras_badpage { @@ -536,6 +615,7 @@ struct ras_query_if { struct ras_common_if head; unsigned long ue_count; unsigned long ce_count; + unsigned long de_count; }; struct ras_inject_if { @@ -714,6 +794,8 @@ void amdgpu_ras_debugfs_create_all(struct amdgpu_device *adev); int amdgpu_ras_query_error_status(struct amdgpu_device *adev, struct ras_query_if *info); +int amdgpu_ras_reset_error_count(struct amdgpu_device *adev, + enum amdgpu_ras_block block); int amdgpu_ras_reset_error_status(struct amdgpu_device *adev, enum amdgpu_ras_block block); @@ -766,6 +848,12 @@ struct amdgpu_ras* amdgpu_ras_get_context(struct amdgpu_device *adev); int amdgpu_ras_set_context(struct amdgpu_device *adev, struct amdgpu_ras *ras_con); +int amdgpu_ras_set_mca_debug_mode(struct amdgpu_device *adev, bool enable); +int amdgpu_ras_set_aca_debug_mode(struct amdgpu_device *adev, bool enable); +bool amdgpu_ras_get_aca_debug_mode(struct amdgpu_device *adev); +bool amdgpu_ras_get_error_query_mode(struct amdgpu_device *adev, + unsigned int *mode); + int amdgpu_ras_register_ras_block(struct amdgpu_device *adev, struct amdgpu_ras_block_object *ras_block_obj); void amdgpu_ras_interrupt_fatal_error_handler(struct amdgpu_device *adev); @@ -794,8 +882,31 @@ void amdgpu_ras_inst_reset_ras_error_count(struct amdgpu_device *adev, int amdgpu_ras_error_data_init(struct ras_err_data *err_data); void amdgpu_ras_error_data_fini(struct ras_err_data *err_data); int amdgpu_ras_error_statistic_ce_count(struct ras_err_data *err_data, - struct amdgpu_smuio_mcm_config_info *mcm_info, u64 count); + struct amdgpu_smuio_mcm_config_info *mcm_info, + struct ras_err_addr *err_addr, u64 count); int amdgpu_ras_error_statistic_ue_count(struct ras_err_data *err_data, - struct amdgpu_smuio_mcm_config_info *mcm_info, u64 count); - + struct amdgpu_smuio_mcm_config_info *mcm_info, + struct ras_err_addr *err_addr, u64 count); +int amdgpu_ras_error_statistic_de_count(struct ras_err_data *err_data, + struct amdgpu_smuio_mcm_config_info *mcm_info, + struct ras_err_addr *err_addr, u64 count); +void amdgpu_ras_query_boot_status(struct amdgpu_device *adev, u32 num_instances); +int amdgpu_ras_bind_aca(struct amdgpu_device *adev, enum amdgpu_ras_block blk, + const struct aca_info *aca_info, void *data); +int amdgpu_ras_unbind_aca(struct amdgpu_device *adev, enum amdgpu_ras_block blk); + +ssize_t amdgpu_ras_aca_sysfs_read(struct device *dev, struct device_attribute *attr, + struct aca_handle *handle, char *buf, void *data); + +void amdgpu_ras_add_mca_err_addr(struct ras_err_info *err_info, + struct ras_err_addr *err_addr); + +void amdgpu_ras_del_mca_err_addr(struct ras_err_info *err_info, + struct ras_err_addr *mca_err_addr); + +void amdgpu_ras_set_fed(struct amdgpu_device *adev, bool status); +bool amdgpu_ras_get_fed_status(struct amdgpu_device *adev); + +bool amdgpu_ras_event_id_is_valid(struct amdgpu_device *adev, u64 id); +u64 amdgpu_ras_acquire_event_id(struct amdgpu_device *adev, enum ras_event_type type); #endif diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras_eeprom.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras_eeprom.c index 65aa218380be..06a62a8a992e 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras_eeprom.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras_eeprom.c @@ -214,6 +214,12 @@ static bool __get_eeprom_i2c_addr(struct amdgpu_device *adev, control->i2c_address = EEPROM_I2C_MADDR_0; return true; case IP_VERSION(13, 0, 0): + if (strnstr(atom_ctx->vbios_pn, "D707", + sizeof(atom_ctx->vbios_pn))) + control->i2c_address = EEPROM_I2C_MADDR_0; + else + control->i2c_address = EEPROM_I2C_MADDR_4; + return true; case IP_VERSION(13, 0, 6): case IP_VERSION(13, 0, 10): control->i2c_address = EEPROM_I2C_MADDR_4; @@ -398,6 +404,22 @@ static int amdgpu_ras_eeprom_correct_header_tag( return res; } +static void amdgpu_ras_set_eeprom_table_version(struct amdgpu_ras_eeprom_control *control) +{ + struct amdgpu_device *adev = to_amdgpu_device(control); + struct amdgpu_ras_eeprom_table_header *hdr = &control->tbl_hdr; + + switch (amdgpu_ip_version(adev, UMC_HWIP, 0)) { + case IP_VERSION(8, 10, 0): + case IP_VERSION(12, 0, 0): + hdr->version = RAS_TABLE_VER_V2_1; + return; + default: + hdr->version = RAS_TABLE_VER_V1; + return; + } +} + /** * amdgpu_ras_eeprom_reset_table -- Reset the RAS EEPROM table * @control: pointer to control structure @@ -417,11 +439,7 @@ int amdgpu_ras_eeprom_reset_table(struct amdgpu_ras_eeprom_control *control) mutex_lock(&control->ras_tbl_mutex); hdr->header = RAS_TABLE_HDR_VAL; - if (adev->umc.ras && - adev->umc.ras->set_eeprom_table_version) - adev->umc.ras->set_eeprom_table_version(hdr); - else - hdr->version = RAS_TABLE_VER_V1; + amdgpu_ras_set_eeprom_table_version(control); if (hdr->version == RAS_TABLE_VER_V2_1) { hdr->first_rec_offset = RAS_RECORD_START_V2_1; @@ -729,6 +747,9 @@ amdgpu_ras_eeprom_update_header(struct amdgpu_ras_eeprom_control *control) control->tbl_rai.rma_status = GPU_RETIRED__ECC_REACH_THRESHOLD; control->tbl_rai.health_percent = 0; } + + /* ignore the -ENOTSUPP return value */ + amdgpu_dpm_send_rma_reason(adev); } if (control->tbl_hdr.version == RAS_TABLE_VER_V2_1) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_res_cursor.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_res_cursor.h index 381101d2bf05..50fcd86e1033 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_res_cursor.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_res_cursor.h @@ -164,4 +164,29 @@ static inline void amdgpu_res_next(struct amdgpu_res_cursor *cur, uint64_t size) } } +/** + * amdgpu_res_cleared - check if blocks are cleared + * + * @cur: the cursor to extract the block + * + * Check if the @cur block is cleared + */ +static inline bool amdgpu_res_cleared(struct amdgpu_res_cursor *cur) +{ + struct drm_buddy_block *block; + + switch (cur->mem_type) { + case TTM_PL_VRAM: + block = cur->node; + + if (!amdgpu_vram_mgr_is_cleared(block)) + return false; + break; + default: + return false; + } + + return true; +} + #endif diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_reset.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_reset.c index 970bfece775c..ea4873f6ccd1 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_reset.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_reset.c @@ -158,6 +158,3 @@ void amdgpu_device_unlock_reset_domain(struct amdgpu_reset_domain *reset_domain) atomic_set(&reset_domain->in_gpu_reset, 0); up_write(&reset_domain->sem); } - - - diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_reset.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_reset.h index 471d789b33a5..b11d190ece53 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_reset.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_reset.h @@ -32,7 +32,7 @@ enum AMDGPU_RESET_FLAGS { AMDGPU_NEED_FULL_RESET = 0, AMDGPU_SKIP_HW_RESET = 1, - AMDGPU_RESET_FOR_DEVICE_REMOVE = 2, + AMDGPU_SKIP_COREDUMP = 2, }; struct amdgpu_reset_context { @@ -89,7 +89,6 @@ struct amdgpu_reset_domain { atomic_t reset_res; }; - int amdgpu_reset_init(struct amdgpu_device *adev); int amdgpu_reset_fini(struct amdgpu_device *adev); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c index 6a80d3ec887e..06f0a6534a94 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c @@ -524,46 +524,58 @@ static ssize_t amdgpu_debugfs_mqd_read(struct file *f, char __user *buf, { struct amdgpu_ring *ring = file_inode(f)->i_private; volatile u32 *mqd; - int r; + u32 *kbuf; + int r, i; uint32_t value, result; if (*pos & 3 || size & 3) return -EINVAL; - result = 0; + kbuf = kmalloc(ring->mqd_size, GFP_KERNEL); + if (!kbuf) + return -ENOMEM; r = amdgpu_bo_reserve(ring->mqd_obj, false); if (unlikely(r != 0)) - return r; + goto err_free; r = amdgpu_bo_kmap(ring->mqd_obj, (void **)&mqd); - if (r) { - amdgpu_bo_unreserve(ring->mqd_obj); - return r; - } + if (r) + goto err_unreserve; + + /* + * Copy to local buffer to avoid put_user(), which might fault + * and acquire mmap_sem, under reservation_ww_class_mutex. + */ + for (i = 0; i < ring->mqd_size/sizeof(u32); i++) + kbuf[i] = mqd[i]; + + amdgpu_bo_kunmap(ring->mqd_obj); + amdgpu_bo_unreserve(ring->mqd_obj); + result = 0; while (size) { if (*pos >= ring->mqd_size) - goto done; + break; - value = mqd[*pos/4]; + value = kbuf[*pos/4]; r = put_user(value, (uint32_t *)buf); if (r) - goto done; + goto err_free; buf += 4; result += 4; size -= 4; *pos += 4; } -done: - amdgpu_bo_kunmap(ring->mqd_obj); - mqd = NULL; - amdgpu_bo_unreserve(ring->mqd_obj); - if (r) - return r; - + kfree(kbuf); return result; + +err_unreserve: + amdgpu_bo_unreserve(ring->mqd_obj); +err_free: + kfree(kbuf); + return r; } static const struct file_operations amdgpu_debugfs_mqd_fops = { @@ -635,6 +647,7 @@ int amdgpu_ring_test_helper(struct amdgpu_ring *ring) ring->name); ring->sched.ready = !r; + return r; } @@ -642,6 +655,10 @@ static void amdgpu_ring_to_mqd_prop(struct amdgpu_ring *ring, struct amdgpu_mqd_prop *prop) { struct amdgpu_device *adev = ring->adev; + bool is_high_prio_compute = ring->funcs->type == AMDGPU_RING_TYPE_COMPUTE && + amdgpu_gfx_is_high_priority_compute_queue(adev, ring); + bool is_high_prio_gfx = ring->funcs->type == AMDGPU_RING_TYPE_GFX && + amdgpu_gfx_is_high_priority_graphics_queue(adev, ring); memset(prop, 0, sizeof(*prop)); @@ -659,10 +676,8 @@ static void amdgpu_ring_to_mqd_prop(struct amdgpu_ring *ring, */ prop->hqd_active = ring->funcs->type == AMDGPU_RING_TYPE_KIQ; - if ((ring->funcs->type == AMDGPU_RING_TYPE_COMPUTE && - amdgpu_gfx_is_high_priority_compute_queue(adev, ring)) || - (ring->funcs->type == AMDGPU_RING_TYPE_GFX && - amdgpu_gfx_is_high_priority_graphics_queue(adev, ring))) { + prop->allow_tunneling = is_high_prio_compute; + if (is_high_prio_compute || is_high_prio_gfx) { prop->hqd_pipe_priority = AMDGPU_GFX_PIPE_PRIO_HIGH; prop->hqd_queue_priority = AMDGPU_GFX_QUEUE_PRIORITY_MAXIMUM; } @@ -715,3 +730,14 @@ void amdgpu_ring_ib_on_emit_de(struct amdgpu_ring *ring) if (ring->is_sw_ring) amdgpu_sw_ring_ib_mark_offset(ring, AMDGPU_MUX_OFFSET_TYPE_DE); } + +bool amdgpu_ring_sched_ready(struct amdgpu_ring *ring) +{ + if (!ring) + return false; + + if (ring->no_scheduler || !drm_sched_wqueue_ready(&ring->sched)) + return false; + + return true; +} diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h index bbb53720a018..582053f1cd56 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h @@ -209,8 +209,7 @@ struct amdgpu_ring_funcs { void (*insert_end)(struct amdgpu_ring *ring); /* pad the indirect buffer to the necessary number of dw */ void (*pad_ib)(struct amdgpu_ring *ring, struct amdgpu_ib *ib); - unsigned (*init_cond_exec)(struct amdgpu_ring *ring); - void (*patch_cond_exec)(struct amdgpu_ring *ring, unsigned offset); + unsigned (*init_cond_exec)(struct amdgpu_ring *ring, uint64_t addr); /* note usage for clock and power gating */ void (*begin_use)(struct amdgpu_ring *ring); void (*end_use)(struct amdgpu_ring *ring); @@ -286,6 +285,9 @@ struct amdgpu_ring { unsigned cond_exe_offs; u64 cond_exe_gpu_addr; volatile u32 *cond_exe_cpu_addr; + unsigned int set_q_mode_offs; + volatile u32 *set_q_mode_ptr; + u64 set_q_mode_token; unsigned vm_hub; unsigned vm_inv_eng; struct dma_fence *vmid_wait; @@ -327,8 +329,7 @@ struct amdgpu_ring { #define amdgpu_ring_emit_reg_write_reg_wait(r, d0, d1, v, m) (r)->funcs->emit_reg_write_reg_wait((r), (d0), (d1), (v), (m)) #define amdgpu_ring_emit_frame_cntl(r, b, s) (r)->funcs->emit_frame_cntl((r), (b), (s)) #define amdgpu_ring_pad_ib(r, ib) ((r)->funcs->pad_ib((r), (ib))) -#define amdgpu_ring_init_cond_exec(r) (r)->funcs->init_cond_exec((r)) -#define amdgpu_ring_patch_cond_exec(r,o) (r)->funcs->patch_cond_exec((r),(o)) +#define amdgpu_ring_init_cond_exec(r, a) (r)->funcs->init_cond_exec((r), (a)) #define amdgpu_ring_preempt_ib(r) (r)->funcs->preempt_ib(r) #define amdgpu_ring_patch_cntl(r, o) ((r)->funcs->patch_cntl((r), (o))) #define amdgpu_ring_patch_ce(r, o) ((r)->funcs->patch_ce((r), (o))) @@ -411,6 +412,30 @@ static inline void amdgpu_ring_write_multiple(struct amdgpu_ring *ring, ring->count_dw -= count_dw; } +/** + * amdgpu_ring_patch_cond_exec - patch dw count of conditional execute + * @ring: amdgpu_ring structure + * @offset: offset returned by amdgpu_ring_init_cond_exec + * + * Calculate the dw count and patch it into a cond_exec command. + */ +static inline void amdgpu_ring_patch_cond_exec(struct amdgpu_ring *ring, + unsigned int offset) +{ + unsigned cur; + + if (!ring->funcs->init_cond_exec) + return; + + WARN_ON(offset > ring->buf_mask); + WARN_ON(ring->ring[offset] != 0); + + cur = (ring->wptr - 1) & ring->buf_mask; + if (cur < offset) + cur += ring->ring_size >> 2; + ring->ring[offset] = cur - offset; +} + #define amdgpu_mes_ctx_get_offs_gpu_addr(ring, offset) \ (ring->is_mes_queue && ring->mes_ctx ? \ (ring->mes_ctx->meta_data_gpu_addr + offset) : 0) @@ -450,5 +475,5 @@ int amdgpu_ib_schedule(struct amdgpu_ring *ring, unsigned num_ibs, int amdgpu_ib_pool_init(struct amdgpu_device *adev); void amdgpu_ib_pool_fini(struct amdgpu_device *adev); int amdgpu_ib_ring_tests(struct amdgpu_device *adev); - +bool amdgpu_ring_sched_ready(struct amdgpu_ring *ring); #endif diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring_mux.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring_mux.c index e1ee1c7117fb..d234b7ccfaaf 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring_mux.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring_mux.c @@ -159,9 +159,7 @@ int amdgpu_ring_mux_init(struct amdgpu_ring_mux *mux, struct amdgpu_ring *ring, mux->ring_entry_size = entry_size; mux->s_resubmit = false; - amdgpu_mux_chunk_slab = kmem_cache_create("amdgpu_mux_chunk", - sizeof(struct amdgpu_mux_chunk), 0, - SLAB_HWCACHE_ALIGN, NULL); + amdgpu_mux_chunk_slab = KMEM_CACHE(amdgpu_mux_chunk, SLAB_HWCACHE_ALIGN); if (!amdgpu_mux_chunk_slab) { DRM_ERROR("create amdgpu_mux_chunk cache failed\n"); return -ENOMEM; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_rlc.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_rlc.c index 35e0ae9acadc..db5791e1a7ce 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_rlc.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_rlc.c @@ -241,7 +241,7 @@ void amdgpu_gfx_rlc_setup_cp_table(struct amdgpu_device *adev) table_size = le32_to_cpu(hdr->jt_size); } - for (i = 0; i < table_size; i ++) { + for (i = 0; i < table_size; i++) { dst_ptr[bo_offset + i] = cpu_to_le32(le32_to_cpu(fw_data[table_offset + i])); } @@ -531,13 +531,12 @@ int amdgpu_gfx_rlc_init_microcode(struct amdgpu_device *adev, if (version_major == 2 && version_minor == 1) adev->gfx.rlc.is_rlc_v2_1 = true; - if (version_minor >= 0) { - err = amdgpu_gfx_rlc_init_microcode_v2_0(adev); - if (err) { - dev_err(adev->dev, "fail to init rlc v2_0 microcode\n"); - return err; - } + err = amdgpu_gfx_rlc_init_microcode_v2_0(adev); + if (err) { + dev_err(adev->dev, "fail to init rlc v2_0 microcode\n"); + return err; } + if (version_minor >= 1) amdgpu_gfx_rlc_init_microcode_v2_1(adev); if (version_minor >= 2) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_rlc.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_rlc.h index b591d33af264..5a17e0ff2ab8 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_rlc.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_rlc.h @@ -169,7 +169,7 @@ struct amdgpu_rlc_funcs { void (*stop)(struct amdgpu_device *adev); void (*reset)(struct amdgpu_device *adev); void (*start)(struct amdgpu_device *adev); - void (*update_spm_vmid)(struct amdgpu_device *adev, unsigned vmid); + void (*update_spm_vmid)(struct amdgpu_device *adev, struct amdgpu_ring *ring, unsigned vmid); bool (*is_rlcg_access_range)(struct amdgpu_device *adev, uint32_t reg); }; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_sdma.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_sdma.c index e8cbc4142d80..1d9d187de6ee 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_sdma.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_sdma.c @@ -292,27 +292,6 @@ out: return err; } -void amdgpu_sdma_unset_buffer_funcs_helper(struct amdgpu_device *adev) -{ - struct amdgpu_ring *sdma; - int i; - - for (i = 0; i < adev->sdma.num_instances; i++) { - if (adev->sdma.has_page_queue) { - sdma = &adev->sdma.instance[i].page; - if (adev->mman.buffer_funcs_ring == sdma) { - amdgpu_ttm_set_buffer_funcs_status(adev, false); - break; - } - } - sdma = &adev->sdma.instance[i].ring; - if (adev->mman.buffer_funcs_ring == sdma) { - amdgpu_ttm_set_buffer_funcs_status(adev, false); - break; - } - } -} - int amdgpu_sdma_ras_sw_init(struct amdgpu_device *adev) { int err = 0; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_sdma.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_sdma.h index 513ac22120c1..173a2a308078 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_sdma.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_sdma.h @@ -169,7 +169,6 @@ int amdgpu_sdma_init_microcode(struct amdgpu_device *adev, u32 instance, bool duplicate); void amdgpu_sdma_destroy_inst_ctx(struct amdgpu_device *adev, bool duplicate); -void amdgpu_sdma_unset_buffer_funcs_helper(struct amdgpu_device *adev); int amdgpu_sdma_ras_sw_init(struct amdgpu_device *adev); #endif diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_seq64.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_seq64.c new file mode 100644 index 000000000000..e22cb2b5cd92 --- /dev/null +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_seq64.c @@ -0,0 +1,249 @@ +// SPDX-License-Identifier: MIT +/* + * Copyright 2023 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + */ + +#include "amdgpu.h" +#include "amdgpu_seq64.h" + +#include <drm/drm_exec.h> + +/** + * DOC: amdgpu_seq64 + * + * amdgpu_seq64 allocates a 64bit memory on each request in sequence order. + * seq64 driver is required for user queue fence memory allocation, TLB + * counters and VM updates. It has maximum count of 32768 64 bit slots. + */ + +/** + * amdgpu_seq64_get_va_base - Get the seq64 va base address + * + * @adev: amdgpu_device pointer + * + * Returns: + * va base address on success + */ +static inline u64 amdgpu_seq64_get_va_base(struct amdgpu_device *adev) +{ + return AMDGPU_VA_RESERVED_SEQ64_START(adev); +} + +/** + * amdgpu_seq64_map - Map the seq64 memory to VM + * + * @adev: amdgpu_device pointer + * @vm: vm pointer + * @bo_va: bo_va pointer + * + * Map the seq64 memory to the given VM. + * + * Returns: + * 0 on success or a negative error code on failure + */ +int amdgpu_seq64_map(struct amdgpu_device *adev, struct amdgpu_vm *vm, + struct amdgpu_bo_va **bo_va) +{ + struct amdgpu_bo *bo; + struct drm_exec exec; + u64 seq64_addr; + int r; + + bo = adev->seq64.sbo; + if (!bo) + return -EINVAL; + + drm_exec_init(&exec, DRM_EXEC_INTERRUPTIBLE_WAIT, 0); + drm_exec_until_all_locked(&exec) { + r = amdgpu_vm_lock_pd(vm, &exec, 0); + if (likely(!r)) + r = drm_exec_lock_obj(&exec, &bo->tbo.base); + drm_exec_retry_on_contention(&exec); + if (unlikely(r)) + goto error; + } + + *bo_va = amdgpu_vm_bo_add(adev, vm, bo); + if (!*bo_va) { + r = -ENOMEM; + goto error; + } + + seq64_addr = amdgpu_seq64_get_va_base(adev); + r = amdgpu_vm_bo_map(adev, *bo_va, seq64_addr, 0, AMDGPU_VA_RESERVED_SEQ64_SIZE, + AMDGPU_PTE_READABLE); + if (r) { + DRM_ERROR("failed to do bo_map on userq sem, err=%d\n", r); + amdgpu_vm_bo_del(adev, *bo_va); + goto error; + } + + r = amdgpu_vm_bo_update(adev, *bo_va, false); + if (r) { + DRM_ERROR("failed to do vm_bo_update on userq sem\n"); + amdgpu_vm_bo_del(adev, *bo_va); + goto error; + } + +error: + drm_exec_fini(&exec); + return r; +} + +/** + * amdgpu_seq64_unmap - Unmap the seq64 memory + * + * @adev: amdgpu_device pointer + * @fpriv: DRM file private + * + * Unmap the seq64 memory from the given VM. + */ +void amdgpu_seq64_unmap(struct amdgpu_device *adev, struct amdgpu_fpriv *fpriv) +{ + struct amdgpu_vm *vm; + struct amdgpu_bo *bo; + struct drm_exec exec; + int r; + + if (!fpriv->seq64_va) + return; + + bo = adev->seq64.sbo; + if (!bo) + return; + + vm = &fpriv->vm; + + drm_exec_init(&exec, DRM_EXEC_INTERRUPTIBLE_WAIT, 0); + drm_exec_until_all_locked(&exec) { + r = amdgpu_vm_lock_pd(vm, &exec, 0); + if (likely(!r)) + r = drm_exec_lock_obj(&exec, &bo->tbo.base); + drm_exec_retry_on_contention(&exec); + if (unlikely(r)) + goto error; + } + + amdgpu_vm_bo_del(adev, fpriv->seq64_va); + + fpriv->seq64_va = NULL; + +error: + drm_exec_fini(&exec); +} + +/** + * amdgpu_seq64_alloc - Allocate a 64 bit memory + * + * @adev: amdgpu_device pointer + * @va: VA to access the seq in process address space + * @cpu_addr: CPU address to access the seq + * + * Alloc a 64 bit memory from seq64 pool. + * + * Returns: + * 0 on success or a negative error code on failure + */ +int amdgpu_seq64_alloc(struct amdgpu_device *adev, u64 *va, u64 **cpu_addr) +{ + unsigned long bit_pos; + + bit_pos = find_first_zero_bit(adev->seq64.used, adev->seq64.num_sem); + if (bit_pos >= adev->seq64.num_sem) + return -ENOSPC; + + __set_bit(bit_pos, adev->seq64.used); + *va = bit_pos * sizeof(u64) + amdgpu_seq64_get_va_base(adev); + *cpu_addr = bit_pos + adev->seq64.cpu_base_addr; + + return 0; +} + +/** + * amdgpu_seq64_free - Free the given 64 bit memory + * + * @adev: amdgpu_device pointer + * @va: gpu start address to be freed + * + * Free the given 64 bit memory from seq64 pool. + */ +void amdgpu_seq64_free(struct amdgpu_device *adev, u64 va) +{ + unsigned long bit_pos; + + bit_pos = (va - amdgpu_seq64_get_va_base(adev)) / sizeof(u64); + if (bit_pos < adev->seq64.num_sem) + __clear_bit(bit_pos, adev->seq64.used); +} + +/** + * amdgpu_seq64_fini - Cleanup seq64 driver + * + * @adev: amdgpu_device pointer + * + * Free the memory space allocated for seq64. + * + */ +void amdgpu_seq64_fini(struct amdgpu_device *adev) +{ + amdgpu_bo_free_kernel(&adev->seq64.sbo, + NULL, + (void **)&adev->seq64.cpu_base_addr); +} + +/** + * amdgpu_seq64_init - Initialize seq64 driver + * + * @adev: amdgpu_device pointer + * + * Allocate the required memory space for seq64. + * + * Returns: + * 0 on success or a negative error code on failure + */ +int amdgpu_seq64_init(struct amdgpu_device *adev) +{ + int r; + + if (adev->seq64.sbo) + return 0; + + /* + * AMDGPU_MAX_SEQ64_SLOTS * sizeof(u64) * 8 = AMDGPU_MAX_SEQ64_SLOTS + * 64bit slots + */ + r = amdgpu_bo_create_kernel(adev, AMDGPU_VA_RESERVED_SEQ64_SIZE, + PAGE_SIZE, AMDGPU_GEM_DOMAIN_GTT, + &adev->seq64.sbo, NULL, + (void **)&adev->seq64.cpu_base_addr); + if (r) { + dev_warn(adev->dev, "(%d) create seq64 failed\n", r); + return r; + } + + memset(adev->seq64.cpu_base_addr, 0, AMDGPU_VA_RESERVED_SEQ64_SIZE); + + adev->seq64.num_sem = AMDGPU_MAX_SEQ64_SLOTS; + memset(&adev->seq64.used, 0, sizeof(adev->seq64.used)); + + return 0; +} diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_seq64.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_seq64.h new file mode 100644 index 000000000000..4203b2ab318d --- /dev/null +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_seq64.h @@ -0,0 +1,48 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Copyright 2023 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + */ + +#ifndef __AMDGPU_SEQ64_H__ +#define __AMDGPU_SEQ64_H__ + +#include "amdgpu_vm.h" + +#define AMDGPU_MAX_SEQ64_SLOTS (AMDGPU_VA_RESERVED_SEQ64_SIZE / sizeof(u64)) + +struct amdgpu_seq64 { + struct amdgpu_bo *sbo; + u32 num_sem; + u64 *cpu_base_addr; + DECLARE_BITMAP(used, AMDGPU_MAX_SEQ64_SLOTS); +}; + +void amdgpu_seq64_fini(struct amdgpu_device *adev); +int amdgpu_seq64_init(struct amdgpu_device *adev); +int amdgpu_seq64_alloc(struct amdgpu_device *adev, u64 *gpu_addr, u64 **cpu_addr); +void amdgpu_seq64_free(struct amdgpu_device *adev, u64 gpu_addr); +int amdgpu_seq64_map(struct amdgpu_device *adev, struct amdgpu_vm *vm, + struct amdgpu_bo_va **bo_va); +void amdgpu_seq64_unmap(struct amdgpu_device *adev, struct amdgpu_fpriv *fpriv); + +#endif + diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_smuio.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_smuio.h index ff4435181055..ec9d12f85f39 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_smuio.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_smuio.h @@ -44,6 +44,7 @@ struct amdgpu_smuio_funcs { u32 (*get_socket_id)(struct amdgpu_device *adev); enum amdgpu_pkg_type (*get_pkg_type)(struct amdgpu_device *adev); bool (*is_host_gpu_xgmi_supported)(struct amdgpu_device *adev); + u64 (*get_gpu_clock_counter)(struct amdgpu_device *adev); }; struct amdgpu_smuio { diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_sync.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_sync.c index dcd8c066bc1f..bdf1ef825d89 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_sync.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_sync.c @@ -191,7 +191,8 @@ static bool amdgpu_sync_test_fence(struct amdgpu_device *adev, /* Never sync to VM updates either. */ if (fence_owner == AMDGPU_FENCE_OWNER_VM && - owner != AMDGPU_FENCE_OWNER_UNDEFINED) + owner != AMDGPU_FENCE_OWNER_UNDEFINED && + owner != AMDGPU_FENCE_OWNER_KFD) return false; /* Ignore fences depending on the sync mode */ @@ -440,9 +441,7 @@ void amdgpu_sync_free(struct amdgpu_sync *sync) */ int amdgpu_sync_init(void) { - amdgpu_sync_slab = kmem_cache_create( - "amdgpu_sync", sizeof(struct amdgpu_sync_entry), 0, - SLAB_HWCACHE_ALIGN, NULL); + amdgpu_sync_slab = KMEM_CACHE(amdgpu_sync_entry, SLAB_HWCACHE_ALIGN); if (!amdgpu_sync_slab) return -ENOMEM; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_trace.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_trace.h index 2fd1bfb35916..f539b1d00234 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_trace.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_trace.h @@ -554,6 +554,21 @@ TRACE_EVENT(amdgpu_reset_reg_dumps, __entry->value) ); +TRACE_EVENT(amdgpu_runpm_reference_dumps, + TP_PROTO(uint32_t index, const char *func), + TP_ARGS(index, func), + TP_STRUCT__entry( + __field(uint32_t, index) + __string(func, func) + ), + TP_fast_assign( + __entry->index = index; + __assign_str(func, func); + ), + TP_printk("amdgpu runpm reference dump 0x%x: 0x%s\n", + __entry->index, + __get_str(func)) +); #undef AMDGPU_JOB_GET_TIMELINE_NAME #endif diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c index 05991c5c8ddb..6b48bcf53ce9 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c @@ -102,23 +102,19 @@ static void amdgpu_evict_flags(struct ttm_buffer_object *bo, /* Don't handle scatter gather BOs */ if (bo->type == ttm_bo_type_sg) { placement->num_placement = 0; - placement->num_busy_placement = 0; return; } /* Object isn't an AMDGPU object so ignore */ if (!amdgpu_bo_is_amdgpu_bo(bo)) { placement->placement = &placements; - placement->busy_placement = &placements; placement->num_placement = 1; - placement->num_busy_placement = 1; return; } abo = ttm_to_amdgpu_bo(bo); if (abo->flags & AMDGPU_GEM_CREATE_DISCARDABLE) { placement->num_placement = 0; - placement->num_busy_placement = 0; return; } @@ -128,16 +124,16 @@ static void amdgpu_evict_flags(struct ttm_buffer_object *bo, case AMDGPU_PL_OA: case AMDGPU_PL_DOORBELL: placement->num_placement = 0; - placement->num_busy_placement = 0; return; case TTM_PL_VRAM: if (!adev->mman.buffer_funcs_enabled) { /* Move to system memory */ amdgpu_bo_placement_from_domain(abo, AMDGPU_GEM_DOMAIN_CPU); + } else if (!amdgpu_gmc_vram_full_visible(&adev->gmc) && !(abo->flags & AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED) && - amdgpu_bo_in_cpu_visible_vram(abo)) { + amdgpu_res_cpu_visible(adev, bo->resource)) { /* Try evicting to the CPU inaccessible part of VRAM * first, but only set GTT as busy placement, so this @@ -149,8 +145,7 @@ static void amdgpu_evict_flags(struct ttm_buffer_object *bo, AMDGPU_GEM_DOMAIN_CPU); abo->placements[0].fpfn = adev->gmc.visible_vram_size >> PAGE_SHIFT; abo->placements[0].lpfn = 0; - abo->placement.busy_placement = &abo->placements[1]; - abo->placement.num_busy_placement = 1; + abo->placements[0].flags |= TTM_PL_FLAG_DESIRED; } else { /* Move to GTT memory */ amdgpu_bo_placement_from_domain(abo, AMDGPU_GEM_DOMAIN_GTT | @@ -383,11 +378,12 @@ static int amdgpu_move_blit(struct ttm_buffer_object *bo, (abo->flags & AMDGPU_GEM_CREATE_VRAM_WIPE_ON_RELEASE)) { struct dma_fence *wipe_fence = NULL; - r = amdgpu_fill_buffer(abo, AMDGPU_POISON, NULL, &wipe_fence, - false); + r = amdgpu_fill_buffer(abo, 0, NULL, &wipe_fence, + false); if (r) { goto error; } else if (wipe_fence) { + amdgpu_vram_mgr_set_cleared(bo->resource); dma_fence_put(fence); fence = wipe_fence; } @@ -408,40 +404,55 @@ error: return r; } -/* - * amdgpu_mem_visible - Check that memory can be accessed by ttm_bo_move_memcpy +/** + * amdgpu_res_cpu_visible - Check that resource can be accessed by CPU + * @adev: amdgpu device + * @res: the resource to check * - * Called by amdgpu_bo_move() + * Returns: true if the full resource is CPU visible, false otherwise. */ -static bool amdgpu_mem_visible(struct amdgpu_device *adev, - struct ttm_resource *mem) +bool amdgpu_res_cpu_visible(struct amdgpu_device *adev, + struct ttm_resource *res) { - u64 mem_size = (u64)mem->size; struct amdgpu_res_cursor cursor; - u64 end; - if (mem->mem_type == TTM_PL_SYSTEM || - mem->mem_type == TTM_PL_TT) + if (!res) + return false; + + if (res->mem_type == TTM_PL_SYSTEM || res->mem_type == TTM_PL_TT || + res->mem_type == AMDGPU_PL_PREEMPT) return true; - if (mem->mem_type != TTM_PL_VRAM) + + if (res->mem_type != TTM_PL_VRAM) return false; - amdgpu_res_first(mem, 0, mem_size, &cursor); - end = cursor.start + cursor.size; + amdgpu_res_first(res, 0, res->size, &cursor); while (cursor.remaining) { + if ((cursor.start + cursor.size) >= adev->gmc.visible_vram_size) + return false; amdgpu_res_next(&cursor, cursor.size); + } - if (!cursor.remaining) - break; + return true; +} - /* ttm_resource_ioremap only supports contiguous memory */ - if (end != cursor.start) - return false; +/* + * amdgpu_res_copyable - Check that memory can be accessed by ttm_bo_move_memcpy + * + * Called by amdgpu_bo_move() + */ +static bool amdgpu_res_copyable(struct amdgpu_device *adev, + struct ttm_resource *mem) +{ + if (!amdgpu_res_cpu_visible(adev, mem)) + return false; - end = cursor.start + cursor.size; - } + /* ttm_resource_ioremap only supports contiguous memory */ + if (mem->mem_type == TTM_PL_VRAM && + !(mem->placement & TTM_PL_FLAG_CONTIGUOUS)) + return false; - return end <= adev->gmc.visible_vram_size; + return true; } /* @@ -534,8 +545,8 @@ static int amdgpu_bo_move(struct ttm_buffer_object *bo, bool evict, if (r) { /* Check that all memory is CPU accessible */ - if (!amdgpu_mem_visible(adev, old_mem) || - !amdgpu_mem_visible(adev, new_mem)) { + if (!amdgpu_res_copyable(adev, old_mem) || + !amdgpu_res_copyable(adev, new_mem)) { pr_err("Move buffer fallback to memcpy unavailable\n"); return r; } @@ -545,10 +556,11 @@ static int amdgpu_bo_move(struct ttm_buffer_object *bo, bool evict, return r; } + trace_amdgpu_bo_move(abo, new_mem->mem_type, old_mem->mem_type); out: /* update statistics */ atomic64_add(bo->base.size, &adev->num_bytes_moved); - amdgpu_bo_move_notify(bo, evict, new_mem); + amdgpu_bo_move_notify(bo, evict); return 0; } @@ -561,7 +573,6 @@ static int amdgpu_ttm_io_mem_reserve(struct ttm_device *bdev, struct ttm_resource *mem) { struct amdgpu_device *adev = amdgpu_ttm_adev(bdev); - size_t bus_size = (size_t)mem->size; switch (mem->mem_type) { case TTM_PL_SYSTEM: @@ -572,9 +583,6 @@ static int amdgpu_ttm_io_mem_reserve(struct ttm_device *bdev, break; case TTM_PL_VRAM: mem->bus.offset = mem->start << PAGE_SHIFT; - /* check if it's visible */ - if ((mem->bus.offset + bus_size) > adev->gmc.visible_vram_size) - return -EINVAL; if (adev->mman.aper_base_kaddr && mem->placement & TTM_PL_FLAG_CONTIGUOUS) @@ -868,6 +876,7 @@ static void amdgpu_ttm_gart_bind(struct amdgpu_device *adev, amdgpu_gart_bind(adev, gtt->offset, ttm->num_pages, gtt->ttm.dma_address, flags); } + gtt->bound = true; } /* @@ -959,16 +968,12 @@ int amdgpu_ttm_alloc_gart(struct ttm_buffer_object *bo) return 0; addr = amdgpu_gmc_agp_addr(bo); - if (addr != AMDGPU_BO_INVALID_OFFSET) { - bo->resource->start = addr >> PAGE_SHIFT; + if (addr != AMDGPU_BO_INVALID_OFFSET) return 0; - } /* allocate GART space */ placement.num_placement = 1; placement.placement = &placements; - placement.num_busy_placement = 1; - placement.busy_placement = &placements; placements.fpfn = 0; placements.lpfn = adev->gmc.gart_size >> PAGE_SHIFT; placements.mem_type = TTM_PL_TT; @@ -1555,7 +1560,7 @@ static int amdgpu_ttm_access_memory(struct ttm_buffer_object *bo, static void amdgpu_bo_delete_mem_notify(struct ttm_buffer_object *bo) { - amdgpu_bo_move_notify(bo, false, NULL); + amdgpu_bo_move_notify(bo, false); } static struct ttm_device_funcs amdgpu_bo_driver = { @@ -2222,6 +2227,71 @@ static int amdgpu_ttm_fill_mem(struct amdgpu_ring *ring, uint32_t src_data, return 0; } +/** + * amdgpu_ttm_clear_buffer - clear memory buffers + * @bo: amdgpu buffer object + * @resv: reservation object + * @fence: dma_fence associated with the operation + * + * Clear the memory buffer resource. + * + * Returns: + * 0 for success or a negative error code on failure. + */ +int amdgpu_ttm_clear_buffer(struct amdgpu_bo *bo, + struct dma_resv *resv, + struct dma_fence **fence) +{ + struct amdgpu_device *adev = amdgpu_ttm_adev(bo->tbo.bdev); + struct amdgpu_ring *ring = adev->mman.buffer_funcs_ring; + struct amdgpu_res_cursor cursor; + u64 addr; + int r; + + if (!adev->mman.buffer_funcs_enabled) + return -EINVAL; + + if (!fence) + return -EINVAL; + + *fence = dma_fence_get_stub(); + + amdgpu_res_first(bo->tbo.resource, 0, amdgpu_bo_size(bo), &cursor); + + mutex_lock(&adev->mman.gtt_window_lock); + while (cursor.remaining) { + struct dma_fence *next = NULL; + u64 size; + + if (amdgpu_res_cleared(&cursor)) { + amdgpu_res_next(&cursor, cursor.size); + continue; + } + + /* Never clear more than 256MiB at once to avoid timeouts */ + size = min(cursor.size, 256ULL << 20); + + r = amdgpu_ttm_map_buffer(&bo->tbo, bo->tbo.resource, &cursor, + 1, ring, false, &size, &addr); + if (r) + goto err; + + r = amdgpu_ttm_fill_mem(ring, 0, addr, size, resv, + &next, true, true); + if (r) + goto err; + + dma_fence_put(*fence); + *fence = next; + + amdgpu_res_next(&cursor, size); + } +err: + mutex_unlock(&adev->mman.gtt_window_lock); + + return r; +} + int amdgpu_fill_buffer(struct amdgpu_bo *bo, uint32_t src_data, struct dma_resv *resv, diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h index 65ec82141a8e..4f5e70ee9ad0 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h @@ -38,8 +38,6 @@ #define AMDGPU_GTT_MAX_TRANSFER_SIZE 512 #define AMDGPU_GTT_NUM_TRANSFER_WINDOWS 2 -#define AMDGPU_POISON 0xd0bed0be - extern const struct attribute_group amdgpu_vram_mgr_attr_group; extern const struct attribute_group amdgpu_gtt_mgr_attr_group; @@ -139,6 +137,9 @@ int amdgpu_vram_mgr_reserve_range(struct amdgpu_vram_mgr *mgr, int amdgpu_vram_mgr_query_page_status(struct amdgpu_vram_mgr *mgr, uint64_t start); +bool amdgpu_res_cpu_visible(struct amdgpu_device *adev, + struct ttm_resource *res); + int amdgpu_ttm_init(struct amdgpu_device *adev); void amdgpu_ttm_fini(struct amdgpu_device *adev); void amdgpu_ttm_set_buffer_funcs_status(struct amdgpu_device *adev, @@ -155,6 +156,9 @@ int amdgpu_ttm_copy_mem_to_mem(struct amdgpu_device *adev, uint64_t size, bool tmz, struct dma_resv *resv, struct dma_fence **f); +int amdgpu_ttm_clear_buffer(struct amdgpu_bo *bo, + struct dma_resv *resv, + struct dma_fence **fence); int amdgpu_fill_buffer(struct amdgpu_bo *bo, uint32_t src_data, struct dma_resv *resv, diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.c index b14127429f30..0867fd9e15ba 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.c @@ -556,6 +556,8 @@ amdgpu_ucode_get_load_type(struct amdgpu_device *adev, int load_type) default: if (!load_type) return AMDGPU_FW_LOAD_DIRECT; + else if (load_type == 3) + return AMDGPU_FW_LOAD_RLC_BACKDOOR_AUTO; else return AMDGPU_FW_LOAD_PSP; } @@ -678,6 +680,8 @@ const char *amdgpu_ucode_name(enum AMDGPU_UCODE_ID ucode_id) return "UMSCH_MM_DATA"; case AMDGPU_UCODE_ID_UMSCH_MM_CMD_BUFFER: return "UMSCH_MM_CMD_BUFFER"; + case AMDGPU_UCODE_ID_JPEG_RAM: + return "JPEG"; default: return "UNKNOWN UCODE"; } @@ -1060,9 +1064,11 @@ static int amdgpu_ucode_patch_jt(struct amdgpu_firmware_info *ucode, int amdgpu_ucode_create_bo(struct amdgpu_device *adev) { - if (adev->firmware.load_type != AMDGPU_FW_LOAD_DIRECT) { + if ((adev->firmware.load_type != AMDGPU_FW_LOAD_DIRECT) && + (adev->firmware.load_type != AMDGPU_FW_LOAD_RLC_BACKDOOR_AUTO)) { amdgpu_bo_create_kernel(adev, adev->firmware.fw_size, PAGE_SIZE, - amdgpu_sriov_vf(adev) ? AMDGPU_GEM_DOMAIN_VRAM : AMDGPU_GEM_DOMAIN_GTT, + (amdgpu_sriov_vf(adev) || adev->debug_use_vram_fw_buf) ? + AMDGPU_GEM_DOMAIN_VRAM : AMDGPU_GEM_DOMAIN_GTT, &adev->firmware.fw_buf, &adev->firmware.fw_buf_mc, &adev->firmware.fw_buf_ptr); @@ -1397,9 +1403,13 @@ int amdgpu_ucode_request(struct amdgpu_device *adev, const struct firmware **fw, if (err) return -ENODEV; + err = amdgpu_ucode_validate(*fw); - if (err) + if (err) { dev_dbg(adev->dev, "\"%s\" failed to validate\n", fw_name); + release_firmware(*fw); + *fw = NULL; + } return err; } diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.h index 4244a13f9f22..105d4de0613a 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.h @@ -125,6 +125,7 @@ enum psp_fw_type { PSP_FW_TYPE_PSP_INTF_DRV, PSP_FW_TYPE_PSP_DBG_DRV, PSP_FW_TYPE_PSP_RAS_DRV, + PSP_FW_TYPE_PSP_IPKEYMGR_DRV, PSP_FW_TYPE_MAX_INDEX, }; @@ -511,6 +512,7 @@ enum AMDGPU_UCODE_ID { AMDGPU_UCODE_ID_UMSCH_MM_DATA, AMDGPU_UCODE_ID_UMSCH_MM_CMD_BUFFER, AMDGPU_UCODE_ID_P2S_TABLE, + AMDGPU_UCODE_ID_JPEG_RAM, AMDGPU_UCODE_ID_MAXIMUM, }; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_umc.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_umc.c index f74347cc087a..f486510fc94c 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_umc.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_umc.c @@ -23,6 +23,7 @@ #include "amdgpu.h" #include "umc_v6_7.h" +#define MAX_UMC_POISON_POLLING_TIME_SYNC 20 //ms static int amdgpu_umc_convert_error_address(struct amdgpu_device *adev, struct ras_err_data *err_data, uint64_t err_addr, @@ -85,18 +86,21 @@ out_fini_err_data: return ret; } -static int amdgpu_umc_do_page_retirement(struct amdgpu_device *adev, - void *ras_error_status, - struct amdgpu_iv_entry *entry, - bool reset) +static void amdgpu_umc_handle_bad_pages(struct amdgpu_device *adev, + void *ras_error_status) { struct ras_err_data *err_data = (struct ras_err_data *)ras_error_status; struct amdgpu_ras *con = amdgpu_ras_get_context(adev); + unsigned int error_query_mode; int ret = 0; + unsigned long err_count; - kgd2kfd_set_sram_ecc_flag(adev->kfd.dev); + amdgpu_ras_get_error_query_mode(adev, &error_query_mode); + + mutex_lock(&con->page_retirement_lock); ret = amdgpu_dpm_get_ecc_info(adev, (void *)&(con->umc_ecc)); - if (ret == -EOPNOTSUPP) { + if (ret == -EOPNOTSUPP && + error_query_mode == AMDGPU_RAS_DIRECT_ERROR_QUERY) { if (adev->umc.ras && adev->umc.ras->ras_block.hw_ops && adev->umc.ras->ras_block.hw_ops->query_ras_error_count) adev->umc.ras->ras_block.hw_ops->query_ras_error_count(adev, ras_error_status); @@ -120,7 +124,8 @@ static int amdgpu_umc_do_page_retirement(struct amdgpu_device *adev, */ adev->umc.ras->ras_block.hw_ops->query_ras_error_address(adev, ras_error_status); } - } else if (!ret) { + } else if (error_query_mode == AMDGPU_RAS_FIRMWARE_ERROR_QUERY || + (!ret && error_query_mode == AMDGPU_RAS_DIRECT_ERROR_QUERY)) { if (adev->umc.ras && adev->umc.ras->ecc_info_query_ras_error_count) adev->umc.ras->ecc_info_query_ras_error_count(adev, ras_error_status); @@ -147,16 +152,13 @@ static int amdgpu_umc_do_page_retirement(struct amdgpu_device *adev, } /* only uncorrectable error needs gpu reset */ - if (err_data->ue_count) { - dev_info(adev->dev, "%ld uncorrectable hardware errors " - "detected in UMC block\n", - err_data->ue_count); - + if (err_data->ue_count || err_data->de_count) { + err_count = err_data->ue_count + err_data->de_count; if ((amdgpu_bad_page_threshold != 0) && err_data->err_addr_cnt) { amdgpu_ras_add_bad_pages(adev, err_data->err_addr, err_data->err_addr_cnt); - amdgpu_ras_save_bad_pages(adev, &(err_data->ue_count)); + amdgpu_ras_save_bad_pages(adev, &err_count); amdgpu_dpm_send_hbm_bad_pages_num(adev, con->eeprom_control.ras_num_recs); @@ -165,16 +167,84 @@ static int amdgpu_umc_do_page_retirement(struct amdgpu_device *adev, con->update_channel_flag = false; } } - - if (reset) - amdgpu_ras_reset_gpu(adev); } kfree(err_data->err_addr); + + mutex_unlock(&con->page_retirement_lock); +} + +static int amdgpu_umc_do_page_retirement(struct amdgpu_device *adev, + void *ras_error_status, + struct amdgpu_iv_entry *entry, + uint32_t reset) +{ + struct ras_err_data *err_data = (struct ras_err_data *)ras_error_status; + struct amdgpu_ras *con = amdgpu_ras_get_context(adev); + + kgd2kfd_set_sram_ecc_flag(adev->kfd.dev); + amdgpu_umc_handle_bad_pages(adev, ras_error_status); + + if (err_data->ue_count && reset) { + con->gpu_reset_flags |= reset; + amdgpu_ras_reset_gpu(adev); + } + return AMDGPU_RAS_SUCCESS; } -int amdgpu_umc_poison_handler(struct amdgpu_device *adev, bool reset) +int amdgpu_umc_bad_page_polling_timeout(struct amdgpu_device *adev, + uint32_t reset, uint32_t timeout_ms) +{ + struct ras_err_data err_data; + struct ras_common_if head = { + .block = AMDGPU_RAS_BLOCK__UMC, + }; + struct ras_manager *obj = amdgpu_ras_find_obj(adev, &head); + uint32_t timeout = timeout_ms; + + memset(&err_data, 0, sizeof(err_data)); + amdgpu_ras_error_data_init(&err_data); + + do { + + amdgpu_umc_handle_bad_pages(adev, &err_data); + + if (timeout && !err_data.de_count) { + msleep(1); + timeout--; + } + + } while (timeout && !err_data.de_count); + + if (!timeout) + dev_warn(adev->dev, "Can't find bad pages\n"); + + if (err_data.de_count) + dev_info(adev->dev, "%ld new deferred hardware errors detected\n", err_data.de_count); + + if (obj) { + obj->err_data.ue_count += err_data.ue_count; + obj->err_data.ce_count += err_data.ce_count; + obj->err_data.de_count += err_data.de_count; + } + + amdgpu_ras_error_data_fini(&err_data); + + kgd2kfd_set_sram_ecc_flag(adev->kfd.dev); + + if (reset) { + struct amdgpu_ras *con = amdgpu_ras_get_context(adev); + + con->gpu_reset_flags |= reset; + amdgpu_ras_reset_gpu(adev); + } + + return 0; +} + +int amdgpu_umc_poison_handler(struct amdgpu_device *adev, + enum amdgpu_ras_block block, uint32_t reset) { int ret = AMDGPU_RAS_SUCCESS; @@ -191,27 +261,41 @@ int amdgpu_umc_poison_handler(struct amdgpu_device *adev, bool reset) } if (!amdgpu_sriov_vf(adev)) { - struct ras_err_data err_data; - struct ras_common_if head = { - .block = AMDGPU_RAS_BLOCK__UMC, - }; - struct ras_manager *obj = amdgpu_ras_find_obj(adev, &head); + if (amdgpu_ip_version(adev, UMC_HWIP, 0) < IP_VERSION(12, 0, 0)) { + struct ras_err_data err_data; + struct ras_common_if head = { + .block = AMDGPU_RAS_BLOCK__UMC, + }; + struct ras_manager *obj = amdgpu_ras_find_obj(adev, &head); + + ret = amdgpu_ras_error_data_init(&err_data); + if (ret) + return ret; - ret = amdgpu_ras_error_data_init(&err_data); - if (ret) - return ret; + ret = amdgpu_umc_do_page_retirement(adev, &err_data, NULL, reset); - ret = amdgpu_umc_do_page_retirement(adev, &err_data, NULL, reset); + if (ret == AMDGPU_RAS_SUCCESS && obj) { + obj->err_data.ue_count += err_data.ue_count; + obj->err_data.ce_count += err_data.ce_count; + obj->err_data.de_count += err_data.de_count; + } - if (ret == AMDGPU_RAS_SUCCESS && obj) { - obj->err_data.ue_count += err_data.ue_count; - obj->err_data.ce_count += err_data.ce_count; - } + amdgpu_ras_error_data_fini(&err_data); + } else { + if (reset) { + amdgpu_umc_bad_page_polling_timeout(adev, + reset, MAX_UMC_POISON_POLLING_TIME_SYNC); + } else { + struct amdgpu_ras *con = amdgpu_ras_get_context(adev); - amdgpu_ras_error_data_fini(&err_data); + atomic_inc(&con->page_retirement_req_cnt); + + wake_up(&con->page_retirement_wq); + } + } } else { if (adev->virt.ops && adev->virt.ops->ras_poison_handler) - adev->virt.ops->ras_poison_handler(adev); + adev->virt.ops->ras_poison_handler(adev, block); else dev_warn(adev->dev, "No ras_poison_handler interface in SRIOV!\n"); @@ -224,7 +308,8 @@ int amdgpu_umc_process_ras_data_cb(struct amdgpu_device *adev, void *ras_error_status, struct amdgpu_iv_entry *entry) { - return amdgpu_umc_do_page_retirement(adev, ras_error_status, entry, true); + return amdgpu_umc_do_page_retirement(adev, ras_error_status, entry, + AMDGPU_RAS_GPU_RESET_MODE1_RESET); } int amdgpu_umc_ras_sw_init(struct amdgpu_device *adev) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_umc.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_umc.h index 417a6726c71b..563b0249247e 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_umc.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_umc.h @@ -21,7 +21,7 @@ #ifndef __AMDGPU_UMC_H__ #define __AMDGPU_UMC_H__ #include "amdgpu_ras.h" - +#include "amdgpu_mca.h" /* * (addr / 256) * 4096, the higher 26 bits in ErrorAddr * is the index of 4KB block @@ -64,8 +64,8 @@ struct amdgpu_umc_ras { void *ras_error_status); void (*ecc_info_query_ras_error_address)(struct amdgpu_device *adev, void *ras_error_status); - /* support different eeprom table version for different asic */ - void (*set_eeprom_table_version)(struct amdgpu_ras_eeprom_table_header *hdr); + bool (*check_ecc_err_status)(struct amdgpu_device *adev, + enum amdgpu_mca_error_type type, void *ras_error_status); }; struct amdgpu_umc_funcs { @@ -100,7 +100,8 @@ struct amdgpu_umc { int amdgpu_umc_ras_sw_init(struct amdgpu_device *adev); int amdgpu_umc_ras_late_init(struct amdgpu_device *adev, struct ras_common_if *ras_block); -int amdgpu_umc_poison_handler(struct amdgpu_device *adev, bool reset); +int amdgpu_umc_poison_handler(struct amdgpu_device *adev, + enum amdgpu_ras_block block, uint32_t reset); int amdgpu_umc_process_ecc_irq(struct amdgpu_device *adev, struct amdgpu_irq_src *source, struct amdgpu_iv_entry *entry); @@ -118,4 +119,7 @@ int amdgpu_umc_page_retirement_mca(struct amdgpu_device *adev, int amdgpu_umc_loop_channels(struct amdgpu_device *adev, umc_func func, void *data); + +int amdgpu_umc_bad_page_polling_timeout(struct amdgpu_device *adev, + uint32_t reset, uint32_t timeout_ms); #endif diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_umr.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_umr.h index 107f9bb0e24f..5b27fc41ffbf 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_umr.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_umr.h @@ -69,12 +69,12 @@ struct amdgpu_debugfs_gprwave_data { }; enum AMDGPU_DEBUGFS_REGS2_CMDS { - AMDGPU_DEBUGFS_REGS2_CMD_SET_STATE=0, + AMDGPU_DEBUGFS_REGS2_CMD_SET_STATE = 0, AMDGPU_DEBUGFS_REGS2_CMD_SET_STATE_V2, }; enum AMDGPU_DEBUGFS_GPRWAVE_CMDS { - AMDGPU_DEBUGFS_GPRWAVE_CMD_SET_STATE=0, + AMDGPU_DEBUGFS_GPRWAVE_CMD_SET_STATE = 0, }; //reg2 interface diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_umsch_mm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_umsch_mm.c index 1c523eb5f342..f7c73533e336 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_umsch_mm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_umsch_mm.c @@ -86,7 +86,7 @@ static int map_ring_data(struct amdgpu_device *adev, struct amdgpu_vm *vm, amdgpu_sync_create(&sync); - drm_exec_init(&exec, 0); + drm_exec_init(&exec, 0, 0); drm_exec_until_all_locked(&exec) { r = drm_exec_lock_obj(&exec, &bo->tbo.base); drm_exec_retry_on_contention(&exec); @@ -149,7 +149,7 @@ static int unmap_ring_data(struct amdgpu_device *adev, struct amdgpu_vm *vm, struct drm_exec exec; long r; - drm_exec_init(&exec, 0); + drm_exec_init(&exec, 0, 0); drm_exec_until_all_locked(&exec) { r = drm_exec_lock_obj(&exec, &bo->tbo.base); drm_exec_retry_on_contention(&exec); @@ -189,10 +189,13 @@ static void setup_vpe_queue(struct amdgpu_device *adev, mqd->rptr_val = 0; mqd->unmapped = 1; + if (adev->vpe.collaborate_mode) + memcpy(++mqd, test->mqd_data_cpu_addr, sizeof(struct MQD_INFO)); + qinfo->mqd_addr = test->mqd_data_gpu_addr; qinfo->csa_addr = test->ctx_data_gpu_addr + offsetof(struct umsch_mm_test_ctx_data, vpe_ctx_csa); - qinfo->doorbell_offset_0 = (adev->doorbell_index.vpe_ring + 1) << 1; + qinfo->doorbell_offset_0 = 0; qinfo->doorbell_offset_1 = 0; } @@ -287,7 +290,10 @@ static int submit_vpe_queue(struct amdgpu_device *adev, struct umsch_mm_test *te ring[5] = 0; mqd->wptr_val = (6 << 2); - // WDOORBELL32(adev->umsch_mm.agdb_index[CONTEXT_PRIORITY_LEVEL_NORMAL], mqd->wptr_val); + if (adev->vpe.collaborate_mode) + (++mqd)->wptr_val = (6 << 2); + + WDOORBELL32(adev->umsch_mm.agdb_index[CONTEXT_PRIORITY_LEVEL_NORMAL], mqd->wptr_val); for (i = 0; i < adev->usec_timeout; i++) { if (*fence == test_pattern) @@ -358,7 +364,7 @@ static int setup_umsch_mm_test(struct amdgpu_device *adev, memset(test->ring_data_cpu_addr, 0, sizeof(struct umsch_mm_test_ring_data)); - test->ring_data_gpu_addr = AMDGPU_VA_RESERVED_SIZE; + test->ring_data_gpu_addr = AMDGPU_VA_RESERVED_BOTTOM; r = map_ring_data(adev, test->vm, test->ring_data_obj, &test->bo_va, test->ring_data_gpu_addr, sizeof(struct umsch_mm_test_ring_data)); if (r) @@ -571,6 +577,7 @@ int amdgpu_umsch_mm_init_microcode(struct amdgpu_umsch_mm *umsch) switch (amdgpu_ip_version(adev, VCN_HWIP, 0)) { case IP_VERSION(4, 0, 5): + case IP_VERSION(4, 0, 6): fw_name = "amdgpu/umsch_mm_4_0_0.bin"; break; default: @@ -750,6 +757,7 @@ static int umsch_mm_early_init(void *handle) switch (amdgpu_ip_version(adev, VCN_HWIP, 0)) { case IP_VERSION(4, 0, 5): + case IP_VERSION(4, 0, 6): umsch_mm_v4_0_set_funcs(&adev->umsch_mm); break; default: @@ -766,6 +774,9 @@ static int umsch_mm_late_init(void *handle) { struct amdgpu_device *adev = (struct amdgpu_device *)handle; + if (amdgpu_in_reset(adev) || adev->in_s0ix || adev->in_suspend) + return 0; + return umsch_mm_test(adev); } @@ -843,6 +854,20 @@ static int umsch_mm_hw_fini(void *handle) return 0; } +static int umsch_mm_suspend(void *handle) +{ + struct amdgpu_device *adev = (struct amdgpu_device *)handle; + + return umsch_mm_hw_fini(adev); +} + +static int umsch_mm_resume(void *handle) +{ + struct amdgpu_device *adev = (struct amdgpu_device *)handle; + + return umsch_mm_hw_init(adev); +} + static const struct amd_ip_funcs umsch_mm_v4_0_ip_funcs = { .name = "umsch_mm_v4_0", .early_init = umsch_mm_early_init, @@ -851,6 +876,8 @@ static const struct amd_ip_funcs umsch_mm_v4_0_ip_funcs = { .sw_fini = umsch_mm_sw_fini, .hw_init = umsch_mm_hw_init, .hw_fini = umsch_mm_hw_fini, + .suspend = umsch_mm_suspend, + .resume = umsch_mm_resume, }; const struct amdgpu_ip_block_version umsch_mm_v4_0_ip_block = { diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_umsch_mm.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_umsch_mm.h index 8258a43a6236..5014b5af95fd 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_umsch_mm.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_umsch_mm.h @@ -33,13 +33,6 @@ enum UMSCH_SWIP_ENGINE_TYPE { UMSCH_SWIP_ENGINE_TYPE_MAX }; -enum UMSCH_SWIP_AFFINITY_TYPE { - UMSCH_SWIP_AFFINITY_TYPE_ANY = 0, - UMSCH_SWIP_AFFINITY_TYPE_VCN0 = 1, - UMSCH_SWIP_AFFINITY_TYPE_VCN1 = 2, - UMSCH_SWIP_AFFINITY_TYPE_MAX -}; - enum UMSCH_CONTEXT_PRIORITY_LEVEL { CONTEXT_PRIORITY_LEVEL_IDLE = 0, CONTEXT_PRIORITY_LEVEL_NORMAL = 1, @@ -51,13 +44,15 @@ enum UMSCH_CONTEXT_PRIORITY_LEVEL { struct umsch_mm_set_resource_input { uint32_t vmid_mask_mm_vcn; uint32_t vmid_mask_mm_vpe; + uint32_t collaboration_mask_vpe; uint32_t logging_vmid; uint32_t engine_mask; union { struct { uint32_t disable_reset : 1; uint32_t disable_umsch_mm_log : 1; - uint32_t reserved : 30; + uint32_t use_rs64mem_for_proc_ctx_csa : 1; + uint32_t reserved : 29; }; uint32_t uint32_all; }; @@ -78,15 +73,18 @@ struct umsch_mm_add_queue_input { uint32_t doorbell_offset_1; enum UMSCH_SWIP_ENGINE_TYPE engine_type; uint32_t affinity; - enum UMSCH_SWIP_AFFINITY_TYPE affinity_type; uint64_t mqd_addr; uint64_t h_context; uint64_t h_queue; uint32_t vm_context_cntl; + uint32_t process_csa_array_index; + uint32_t context_csa_array_index; + struct { uint32_t is_context_suspended : 1; - uint32_t reserved : 31; + uint32_t collaboration_mode : 1; + uint32_t reserved : 30; }; }; @@ -94,6 +92,7 @@ struct umsch_mm_remove_queue_input { uint32_t doorbell_offset_0; uint32_t doorbell_offset_1; uint64_t context_csa_addr; + uint32_t context_csa_array_index; }; struct MQD_INFO { @@ -103,6 +102,7 @@ struct MQD_INFO { uint32_t wptr_val; uint32_t rptr_val; uint32_t unmapped; + uint32_t vmid; }; struct amdgpu_umsch_mm; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c index 815b7c34ed33..07d930339b07 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c @@ -398,21 +398,22 @@ int amdgpu_uvd_sw_fini(struct amdgpu_device *adev) * amdgpu_uvd_entity_init - init entity * * @adev: amdgpu_device pointer + * @ring: amdgpu_ring pointer to check * + * Initialize the entity used for handle management in the kernel driver. */ -int amdgpu_uvd_entity_init(struct amdgpu_device *adev) +int amdgpu_uvd_entity_init(struct amdgpu_device *adev, struct amdgpu_ring *ring) { - struct amdgpu_ring *ring; - struct drm_gpu_scheduler *sched; - int r; + if (ring == &adev->uvd.inst[0].ring) { + struct drm_gpu_scheduler *sched = &ring->sched; + int r; - ring = &adev->uvd.inst[0].ring; - sched = &ring->sched; - r = drm_sched_entity_init(&adev->uvd.entity, DRM_SCHED_PRIORITY_NORMAL, - &sched, 1, NULL); - if (r) { - DRM_ERROR("Failed setting up UVD kernel entity.\n"); - return r; + r = drm_sched_entity_init(&adev->uvd.entity, DRM_SCHED_PRIORITY_NORMAL, + &sched, 1, NULL); + if (r) { + DRM_ERROR("Failed setting up UVD kernel entity.\n"); + return r; + } } return 0; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.h index a9f342537c68..9dfad2f48ef4 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.h @@ -73,7 +73,7 @@ struct amdgpu_uvd { int amdgpu_uvd_sw_init(struct amdgpu_device *adev); int amdgpu_uvd_sw_fini(struct amdgpu_device *adev); -int amdgpu_uvd_entity_init(struct amdgpu_device *adev); +int amdgpu_uvd_entity_init(struct amdgpu_device *adev, struct amdgpu_ring *ring); int amdgpu_uvd_prepare_suspend(struct amdgpu_device *adev); int amdgpu_uvd_suspend(struct amdgpu_device *adev); int amdgpu_uvd_resume(struct amdgpu_device *adev); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vce.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vce.c index 1904edf68407..59acf424a078 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vce.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vce.c @@ -230,21 +230,22 @@ int amdgpu_vce_sw_fini(struct amdgpu_device *adev) * amdgpu_vce_entity_init - init entity * * @adev: amdgpu_device pointer + * @ring: amdgpu_ring pointer to check * + * Initialize the entity used for handle management in the kernel driver. */ -int amdgpu_vce_entity_init(struct amdgpu_device *adev) +int amdgpu_vce_entity_init(struct amdgpu_device *adev, struct amdgpu_ring *ring) { - struct amdgpu_ring *ring; - struct drm_gpu_scheduler *sched; - int r; - - ring = &adev->vce.ring[0]; - sched = &ring->sched; - r = drm_sched_entity_init(&adev->vce.entity, DRM_SCHED_PRIORITY_NORMAL, - &sched, 1, NULL); - if (r != 0) { - DRM_ERROR("Failed setting up VCE run queue.\n"); - return r; + if (ring == &adev->vce.ring[0]) { + struct drm_gpu_scheduler *sched = &ring->sched; + int r; + + r = drm_sched_entity_init(&adev->vce.entity, DRM_SCHED_PRIORITY_NORMAL, + &sched, 1, NULL); + if (r != 0) { + DRM_ERROR("Failed setting up VCE run queue.\n"); + return r; + } } return 0; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vce.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_vce.h index ea680fc9a6c3..6e53f872d084 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vce.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vce.h @@ -55,7 +55,7 @@ struct amdgpu_vce { int amdgpu_vce_sw_init(struct amdgpu_device *adev, unsigned long size); int amdgpu_vce_sw_fini(struct amdgpu_device *adev); -int amdgpu_vce_entity_init(struct amdgpu_device *adev); +int amdgpu_vce_entity_init(struct amdgpu_device *adev, struct amdgpu_ring *ring); int amdgpu_vce_suspend(struct amdgpu_device *adev); int amdgpu_vce_resume(struct amdgpu_device *adev); void amdgpu_vce_free_handles(struct amdgpu_device *adev, struct drm_file *filp); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.c index f4963330c772..bb85772b1374 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.c @@ -59,6 +59,9 @@ #define FIRMWARE_VCN4_0_3 "amdgpu/vcn_4_0_3.bin" #define FIRMWARE_VCN4_0_4 "amdgpu/vcn_4_0_4.bin" #define FIRMWARE_VCN4_0_5 "amdgpu/vcn_4_0_5.bin" +#define FIRMWARE_VCN4_0_6 "amdgpu/vcn_4_0_6.bin" +#define FIRMWARE_VCN4_0_6_1 "amdgpu/vcn_4_0_6_1.bin" +#define FIRMWARE_VCN5_0_0 "amdgpu/vcn_5_0_0.bin" MODULE_FIRMWARE(FIRMWARE_RAVEN); MODULE_FIRMWARE(FIRMWARE_PICASSO); @@ -82,21 +85,32 @@ MODULE_FIRMWARE(FIRMWARE_VCN4_0_2); MODULE_FIRMWARE(FIRMWARE_VCN4_0_3); MODULE_FIRMWARE(FIRMWARE_VCN4_0_4); MODULE_FIRMWARE(FIRMWARE_VCN4_0_5); +MODULE_FIRMWARE(FIRMWARE_VCN4_0_6); +MODULE_FIRMWARE(FIRMWARE_VCN4_0_6_1); +MODULE_FIRMWARE(FIRMWARE_VCN5_0_0); static void amdgpu_vcn_idle_work_handler(struct work_struct *work); int amdgpu_vcn_early_init(struct amdgpu_device *adev) { - char ucode_prefix[30]; + char ucode_prefix[25]; char fw_name[40]; - int r; + int r, i; - amdgpu_ucode_ip_version_decode(adev, UVD_HWIP, ucode_prefix, sizeof(ucode_prefix)); - snprintf(fw_name, sizeof(fw_name), "amdgpu/%s.bin", ucode_prefix); - r = amdgpu_ucode_request(adev, &adev->vcn.fw, fw_name); - if (r) - amdgpu_ucode_release(&adev->vcn.fw); + for (i = 0; i < adev->vcn.num_vcn_inst; i++) { + amdgpu_ucode_ip_version_decode(adev, UVD_HWIP, ucode_prefix, sizeof(ucode_prefix)); + snprintf(fw_name, sizeof(fw_name), "amdgpu/%s.bin", ucode_prefix); + if (amdgpu_ip_version(adev, UVD_HWIP, 0) == IP_VERSION(4, 0, 6) && + i == 1) { + snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_%d.bin", ucode_prefix, i); + } + r = amdgpu_ucode_request(adev, &adev->vcn.fw[i], fw_name); + if (r) { + amdgpu_ucode_release(&adev->vcn.fw[i]); + return r; + } + } return r; } @@ -137,7 +151,7 @@ int amdgpu_vcn_sw_init(struct amdgpu_device *adev) } } - hdr = (const struct common_firmware_header *)adev->vcn.fw->data; + hdr = (const struct common_firmware_header *)adev->vcn.fw[0]->data; adev->vcn.fw_version = le32_to_cpu(hdr->ucode_version); /* Bit 20-23, it is encode major and non-zero for new naming convention. @@ -252,9 +266,10 @@ int amdgpu_vcn_sw_fini(struct amdgpu_device *adev) for (i = 0; i < adev->vcn.num_enc_rings; ++i) amdgpu_ring_fini(&adev->vcn.inst[j].ring_enc[i]); + + amdgpu_ucode_release(&adev->vcn.fw[j]); } - amdgpu_ucode_release(&adev->vcn.fw); mutex_destroy(&adev->vcn.vcn1_jpeg1_workaround); mutex_destroy(&adev->vcn.vcn_pg_lock); @@ -350,11 +365,12 @@ int amdgpu_vcn_resume(struct amdgpu_device *adev) const struct common_firmware_header *hdr; unsigned int offset; - hdr = (const struct common_firmware_header *)adev->vcn.fw->data; + hdr = (const struct common_firmware_header *)adev->vcn.fw[i]->data; if (adev->firmware.load_type != AMDGPU_FW_LOAD_PSP) { offset = le32_to_cpu(hdr->ucode_array_offset_bytes); if (drm_dev_enter(adev_to_drm(adev), &idx)) { - memcpy_toio(adev->vcn.inst[i].cpu_addr, adev->vcn.fw->data + offset, + memcpy_toio(adev->vcn.inst[i].cpu_addr, + adev->vcn.fw[i]->data + offset, le32_to_cpu(hdr->ucode_size_bytes)); drm_dev_exit(idx); } @@ -1039,11 +1055,11 @@ void amdgpu_vcn_setup_ucode(struct amdgpu_device *adev) if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) { const struct common_firmware_header *hdr; - hdr = (const struct common_firmware_header *)adev->vcn.fw->data; - for (i = 0; i < adev->vcn.num_vcn_inst; i++) { if (adev->vcn.harvest_config & (1 << i)) continue; + + hdr = (const struct common_firmware_header *)adev->vcn.fw[i]->data; /* currently only support 2 FW instances */ if (i >= 2) { dev_info(adev->dev, "More then 2 VCN FW instances!\n"); @@ -1051,7 +1067,7 @@ void amdgpu_vcn_setup_ucode(struct amdgpu_device *adev) } idx = AMDGPU_UCODE_ID_VCN + i; adev->firmware.ucode[idx].ucode_id = idx; - adev->firmware.ucode[idx].fw = adev->vcn.fw; + adev->firmware.ucode[idx].fw = adev->vcn.fw[i]; adev->firmware.fw_size += ALIGN(le32_to_cpu(hdr->ucode_size_bytes), PAGE_SIZE); @@ -1189,7 +1205,7 @@ int amdgpu_vcn_process_poison_irq(struct amdgpu_device *adev, amdgpu_ras_interrupt_dispatch(adev, &ih_data); } else { if (adev->virt.ops && adev->virt.ops->ras_poison_handler) - adev->virt.ops->ras_poison_handler(adev); + adev->virt.ops->ras_poison_handler(adev, ras_if->block); else dev_warn(adev->dev, "No ras_poison_handler interface in SRIOV for VCN!\n"); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.h index 0815c5a97564..a418393d89ec 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.h @@ -160,6 +160,48 @@ } \ } while (0) +#define SOC24_DPG_MODE_OFFSET(ip, inst_idx, reg) \ + ({ \ + uint32_t internal_reg_offset, addr; \ + bool video_range, aon_range; \ + \ + addr = (adev->reg_offset[ip##_HWIP][inst_idx][reg##_BASE_IDX] + reg); \ + addr <<= 2; \ + video_range = ((((0xFFFFF & addr) >= (VCN_VID_SOC_ADDRESS)) && \ + ((0xFFFFF & addr) < ((VCN_VID_SOC_ADDRESS + 0x2600))))); \ + aon_range = ((((0xFFFFF & addr) >= (VCN_AON_SOC_ADDRESS)) && \ + ((0xFFFFF & addr) < ((VCN_AON_SOC_ADDRESS + 0x600))))); \ + if (video_range) \ + internal_reg_offset = ((0xFFFFF & addr) - (VCN_VID_SOC_ADDRESS) + \ + (VCN_VID_IP_ADDRESS)); \ + else if (aon_range) \ + internal_reg_offset = ((0xFFFFF & addr) - (VCN_AON_SOC_ADDRESS) + \ + (VCN_AON_IP_ADDRESS)); \ + else \ + internal_reg_offset = (0xFFFFF & addr); \ + \ + internal_reg_offset >>= 2; \ + }) + +#define WREG32_SOC24_DPG_MODE(inst_idx, offset, value, mask_en, indirect) \ + do { \ + if (!indirect) { \ + WREG32_SOC15(VCN, GET_INST(VCN, inst_idx), \ + regUVD_DPG_LMA_DATA, value); \ + WREG32_SOC15( \ + VCN, GET_INST(VCN, inst_idx), \ + regUVD_DPG_LMA_CTL, \ + (0x1 << UVD_DPG_LMA_CTL__READ_WRITE__SHIFT | \ + mask_en << UVD_DPG_LMA_CTL__MASK_EN__SHIFT | \ + offset << UVD_DPG_LMA_CTL__READ_WRITE_ADDR__SHIFT)); \ + } else { \ + *adev->vcn.inst[inst_idx].dpg_sram_curr_addr++ = \ + offset; \ + *adev->vcn.inst[inst_idx].dpg_sram_curr_addr++ = \ + value; \ + } \ + } while (0) + #define AMDGPU_FW_SHARED_FLAG_0_UNIFIED_QUEUE (1 << 2) #define AMDGPU_FW_SHARED_FLAG_0_DRM_KEY_INJECT (1 << 4) #define AMDGPU_VCN_FW_SHARED_FLAG_0_RB (1 << 6) @@ -169,6 +211,9 @@ #define AMDGPU_VCN_SMU_VERSION_INFO_FLAG (1 << 11) #define AMDGPU_VCN_SMU_DPM_INTERFACE_FLAG (1 << 11) #define AMDGPU_VCN_VF_RB_SETUP_FLAG (1 << 14) +#define AMDGPU_VCN_VF_RB_DECOUPLE_FLAG (1 << 15) + +#define MAX_NUM_VCN_RB_SETUP 4 #define AMDGPU_VCN_IB_FLAG_DECODE_BUFFER 0x00000001 #define AMDGPU_VCN_CMD_FLAG_MSG_BUFFER 0x00000001 @@ -261,7 +306,7 @@ struct amdgpu_vcn_ras { struct amdgpu_vcn { unsigned fw_version; struct delayed_work idle_work; - const struct firmware *fw; /* VCN firmware */ + const struct firmware *fw[AMDGPU_MAX_VCN_INSTANCES]; /* VCN firmware */ unsigned num_enc_rings; enum amd_powergating_state cur_state; bool indirect_sram; @@ -335,15 +380,30 @@ struct amdgpu_fw_shared { struct amdgpu_fw_shared_smu_interface_info smu_interface_info; }; +struct amdgpu_vcn_rb_setup_info { + uint32_t rb_addr_lo; + uint32_t rb_addr_hi; + uint32_t rb_size; +}; + struct amdgpu_fw_shared_rb_setup { uint32_t is_rb_enabled_flags; - uint32_t rb_addr_lo; - uint32_t rb_addr_hi; - uint32_t rb_size; - uint32_t rb4_addr_lo; - uint32_t rb4_addr_hi; - uint32_t rb4_size; - uint32_t reserved[6]; + + union { + struct { + uint32_t rb_addr_lo; + uint32_t rb_addr_hi; + uint32_t rb_size; + uint32_t rb4_addr_lo; + uint32_t rb4_addr_hi; + uint32_t rb4_size; + uint32_t reserved[6]; + }; + + struct { + struct amdgpu_vcn_rb_setup_info rb_info[MAX_NUM_VCN_RB_SETUP]; + }; + }; }; struct amdgpu_fw_shared_drm_key_wa { @@ -351,6 +411,11 @@ struct amdgpu_fw_shared_drm_key_wa { uint8_t reserved[3]; }; +struct amdgpu_fw_shared_queue_decouple { + uint8_t is_enabled; + uint8_t reserved[7]; +}; + struct amdgpu_vcn4_fw_shared { uint32_t present_flag_0; uint8_t pad[12]; @@ -361,6 +426,8 @@ struct amdgpu_vcn4_fw_shared { struct amdgpu_fw_shared_rb_setup rb_setup; struct amdgpu_fw_shared_smu_interface_info smu_dpm_interface; struct amdgpu_fw_shared_drm_key_wa drm_key_wa; + uint8_t pad3[9]; + struct amdgpu_fw_shared_queue_decouple decouple; }; struct amdgpu_vcn_fwlog { @@ -378,6 +445,15 @@ struct amdgpu_vcn_decode_buffer { uint32_t pad[30]; }; +struct amdgpu_vcn_rb_metadata { + uint32_t size; + uint32_t present_flag_0; + + uint8_t version; + uint8_t ring_id; + uint8_t pad[26]; +}; + #define VCN_BLOCK_ENCODE_DISABLE_MASK 0x80 #define VCN_BLOCK_DECODE_DISABLE_MASK 0x40 #define VCN_BLOCK_QUEUE_DISABLE_MASK 0xC0 diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c index a0aa624f5a92..54ab51a4ada7 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c @@ -32,6 +32,7 @@ #include "amdgpu.h" #include "amdgpu_ras.h" +#include "amdgpu_reset.h" #include "vi.h" #include "soc15.h" #include "nv.h" @@ -71,58 +72,6 @@ void amdgpu_virt_init_setting(struct amdgpu_device *adev) amdgpu_num_kcq = 2; } -void amdgpu_virt_kiq_reg_write_reg_wait(struct amdgpu_device *adev, - uint32_t reg0, uint32_t reg1, - uint32_t ref, uint32_t mask) -{ - struct amdgpu_kiq *kiq = &adev->gfx.kiq[0]; - struct amdgpu_ring *ring = &kiq->ring; - signed long r, cnt = 0; - unsigned long flags; - uint32_t seq; - - if (adev->mes.ring.sched.ready) { - amdgpu_mes_reg_write_reg_wait(adev, reg0, reg1, - ref, mask); - return; - } - - spin_lock_irqsave(&kiq->ring_lock, flags); - amdgpu_ring_alloc(ring, 32); - amdgpu_ring_emit_reg_write_reg_wait(ring, reg0, reg1, - ref, mask); - r = amdgpu_fence_emit_polling(ring, &seq, MAX_KIQ_REG_WAIT); - if (r) - goto failed_undo; - - amdgpu_ring_commit(ring); - spin_unlock_irqrestore(&kiq->ring_lock, flags); - - r = amdgpu_fence_wait_polling(ring, seq, MAX_KIQ_REG_WAIT); - - /* don't wait anymore for IRQ context */ - if (r < 1 && in_interrupt()) - goto failed_kiq; - - might_sleep(); - while (r < 1 && cnt++ < MAX_KIQ_REG_TRY) { - - msleep(MAX_KIQ_REG_BAILOUT_INTERVAL); - r = amdgpu_fence_wait_polling(ring, seq, MAX_KIQ_REG_WAIT); - } - - if (cnt > MAX_KIQ_REG_TRY) - goto failed_kiq; - - return; - -failed_undo: - amdgpu_ring_undo(ring); - spin_unlock_irqrestore(&kiq->ring_lock, flags); -failed_kiq: - dev_err(adev->dev, "failed to write reg %x wait reg %x\n", reg0, reg1); -} - /** * amdgpu_virt_request_full_gpu() - request full gpu access * @adev: amdgpu device. @@ -302,11 +251,11 @@ static int amdgpu_virt_init_ras_err_handler_data(struct amdgpu_device *adev) if (!*data) goto data_failure; - bps = kmalloc_array(align_space, sizeof((*data)->bps), GFP_KERNEL); + bps = kmalloc_array(align_space, sizeof(*(*data)->bps), GFP_KERNEL); if (!bps) goto bps_failure; - bps_bo = kmalloc_array(align_space, sizeof((*data)->bps_bo), GFP_KERNEL); + bps_bo = kmalloc_array(align_space, sizeof(*(*data)->bps_bo), GFP_KERNEL); if (!bps_bo) goto bps_bo_failure; @@ -339,8 +288,10 @@ static void amdgpu_virt_ras_release_bp(struct amdgpu_device *adev) for (i = data->last_reserved - 1; i >= 0; i--) { bo = data->bps_bo[i]; - amdgpu_bo_free_kernel(&bo, NULL, NULL); - data->bps_bo[i] = bo; + if (bo) { + amdgpu_bo_free_kernel(&bo, NULL, NULL); + data->bps_bo[i] = bo; + } data->last_reserved = i; } } @@ -380,6 +331,8 @@ static void amdgpu_virt_ras_reserve_bps(struct amdgpu_device *adev) { struct amdgpu_virt *virt = &adev->virt; struct amdgpu_virt_ras_err_handler_data *data = virt->virt_eh_data; + struct amdgpu_vram_mgr *mgr = &adev->mman.vram_mgr; + struct ttm_resource_manager *man = &mgr->manager; struct amdgpu_bo *bo = NULL; uint64_t bp; int i; @@ -395,12 +348,18 @@ static void amdgpu_virt_ras_reserve_bps(struct amdgpu_device *adev) * 2) a ras bad page has been reserved (duplicate error injection * for one page); */ - if (amdgpu_bo_create_kernel_at(adev, bp << AMDGPU_GPU_PAGE_SHIFT, - AMDGPU_GPU_PAGE_SIZE, - &bo, NULL)) - DRM_DEBUG("RAS WARN: reserve vram for retired page %llx fail\n", bp); - - data->bps_bo[i] = bo; + if (ttm_resource_manager_used(man)) { + amdgpu_vram_mgr_reserve_range(&adev->mman.vram_mgr, + bp << AMDGPU_GPU_PAGE_SHIFT, + AMDGPU_GPU_PAGE_SIZE); + data->bps_bo[i] = NULL; + } else { + if (amdgpu_bo_create_kernel_at(adev, bp << AMDGPU_GPU_PAGE_SHIFT, + AMDGPU_GPU_PAGE_SIZE, + &bo, NULL)) + DRM_DEBUG("RAS WARN: reserve vram for retired page %llx fail\n", bp); + data->bps_bo[i] = bo; + } data->last_reserved = i + 1; bo = NULL; } @@ -466,7 +425,7 @@ static int amdgpu_virt_read_pf2vf_data(struct amdgpu_device *adev) return -EINVAL; if (pf2vf_info->size > 1024) { - DRM_ERROR("invalid pf2vf message size\n"); + dev_err(adev->dev, "invalid pf2vf message size: 0x%x\n", pf2vf_info->size); return -EINVAL; } @@ -477,7 +436,9 @@ static int amdgpu_virt_read_pf2vf_data(struct amdgpu_device *adev) adev->virt.fw_reserve.p_pf2vf, pf2vf_info->size, adev->virt.fw_reserve.checksum_key, checksum); if (checksum != checkval) { - DRM_ERROR("invalid pf2vf message\n"); + dev_err(adev->dev, + "invalid pf2vf message: header checksum=0x%x calculated checksum=0x%x\n", + checksum, checkval); return -EINVAL; } @@ -491,7 +452,9 @@ static int amdgpu_virt_read_pf2vf_data(struct amdgpu_device *adev) adev->virt.fw_reserve.p_pf2vf, pf2vf_info->size, 0, checksum); if (checksum != checkval) { - DRM_ERROR("invalid pf2vf message\n"); + dev_err(adev->dev, + "invalid pf2vf message: header checksum=0x%x calculated checksum=0x%x\n", + checksum, checkval); return -EINVAL; } @@ -527,7 +490,7 @@ static int amdgpu_virt_read_pf2vf_data(struct amdgpu_device *adev) ((struct amd_sriov_msg_pf2vf_info *)pf2vf_info)->uuid; break; default: - DRM_ERROR("invalid pf2vf version\n"); + dev_err(adev->dev, "invalid pf2vf version: 0x%x\n", pf2vf_info->version); return -EINVAL; } @@ -613,6 +576,11 @@ static int amdgpu_virt_write_vf2pf_data(struct amdgpu_device *adev) vf2pf_info->decode_usage = 0; vf2pf_info->dummy_page_addr = (uint64_t)adev->dummy_page_addr; + vf2pf_info->mes_info_addr = (uint64_t)adev->mes.resource_1_gpu_addr; + + if (adev->mes.resource_1) { + vf2pf_info->mes_info_size = adev->mes.resource_1->tbo.base.size; + } vf2pf_info->checksum = amd_sriov_msg_checksum( vf2pf_info, vf2pf_info->header.size, 0, 0); @@ -626,8 +594,22 @@ static void amdgpu_virt_update_vf2pf_work_item(struct work_struct *work) int ret; ret = amdgpu_virt_read_pf2vf_data(adev); - if (ret) + if (ret) { + adev->virt.vf2pf_update_retry_cnt++; + if ((adev->virt.vf2pf_update_retry_cnt >= AMDGPU_VF2PF_UPDATE_MAX_RETRY_LIMIT) && + amdgpu_sriov_runtime(adev) && !amdgpu_in_reset(adev)) { + amdgpu_ras_set_fed(adev, true); + if (amdgpu_reset_domain_schedule(adev->reset_domain, + &adev->virt.flr_work)) + return; + else + dev_err(adev->dev, "Failed to queue work! at %s", __func__); + } + goto out; + } + + adev->virt.vf2pf_update_retry_cnt = 0; amdgpu_virt_write_vf2pf_data(adev); out: @@ -648,6 +630,7 @@ void amdgpu_virt_init_data_exchange(struct amdgpu_device *adev) adev->virt.fw_reserve.p_pf2vf = NULL; adev->virt.fw_reserve.p_vf2pf = NULL; adev->virt.vf2pf_update_interval_ms = 0; + adev->virt.vf2pf_update_retry_cnt = 0; if (adev->mman.fw_vram_usage_va && adev->mman.drv_vram_usage_va) { DRM_WARN("Currently fw_vram and drv_vram should not have values at the same time!"); @@ -747,12 +730,6 @@ void amdgpu_detect_virtualization(struct amdgpu_device *adev) adev->virt.caps |= AMDGPU_PASSTHROUGH_MODE; } - if (amdgpu_sriov_vf(adev) && adev->asic_type == CHIP_SIENNA_CICHLID) - /* VF MMIO access (except mailbox range) from CPU - * will be blocked during sriov runtime - */ - adev->virt.caps |= AMDGPU_VF_MMIO_ACCESS_PROTECT; - /* we have the ability to check now */ if (amdgpu_sriov_vf(adev)) { switch (adev->asic_type) { @@ -942,7 +919,7 @@ void amdgpu_virt_update_sriov_video_codec(struct amdgpu_device *adev, } } -static bool amdgpu_virt_get_rlcg_reg_access_flag(struct amdgpu_device *adev, +bool amdgpu_virt_get_rlcg_reg_access_flag(struct amdgpu_device *adev, u32 acc_flags, u32 hwip, bool write, u32 *rlcg_flag) { @@ -975,7 +952,7 @@ static bool amdgpu_virt_get_rlcg_reg_access_flag(struct amdgpu_device *adev, return ret; } -static u32 amdgpu_virt_rlcg_reg_rw(struct amdgpu_device *adev, u32 offset, u32 v, u32 flag, u32 xcc_id) +u32 amdgpu_virt_rlcg_reg_rw(struct amdgpu_device *adev, u32 offset, u32 v, u32 flag, u32 xcc_id) { struct amdgpu_rlcg_reg_access_ctrl *reg_access_ctrl; uint32_t timeout = 50000; @@ -1021,7 +998,7 @@ static u32 amdgpu_virt_rlcg_reg_rw(struct amdgpu_device *adev, u32 offset, u32 v * SCRATCH_REG0 = read/write value * SCRATCH_REG1[30:28] = command * SCRATCH_REG1[19:0] = address in dword - * SCRATCH_REG1[26:24] = Error reporting + * SCRATCH_REG1[27:24] = Error reporting */ writel(v, scratch_reg0); writel((offset | flag), scratch_reg1); @@ -1035,7 +1012,8 @@ static u32 amdgpu_virt_rlcg_reg_rw(struct amdgpu_device *adev, u32 offset, u32 v udelay(10); } - if (i >= timeout) { + tmp = readl(scratch_reg1); + if (i >= timeout || (tmp & AMDGPU_RLCG_SCRATCH1_ERROR_MASK) != 0) { if (amdgpu_sriov_rlcg_error_report_enabled(adev)) { if (tmp & AMDGPU_RLCG_VFGATE_DISABLED) { dev_err(adev->dev, @@ -1093,3 +1071,14 @@ u32 amdgpu_sriov_rreg(struct amdgpu_device *adev, else return RREG32(offset); } + +bool amdgpu_sriov_xnack_support(struct amdgpu_device *adev) +{ + bool xnack_mode = true; + + if (amdgpu_sriov_vf(adev) && + amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(9, 4, 2)) + xnack_mode = false; + + return xnack_mode; +} diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.h index fabb83e9d9ae..642f1fd287d8 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.h @@ -45,12 +45,15 @@ #define AMDGPU_RLCG_REG_NOT_IN_RANGE 0x1000000 #define AMDGPU_RLCG_SCRATCH1_ADDRESS_MASK 0xFFFFF +#define AMDGPU_RLCG_SCRATCH1_ERROR_MASK 0xF000000 /* all asic after AI use this offset */ #define mmRCC_IOV_FUNC_IDENTIFIER 0xDE5 /* tonga/fiji use this offset */ #define mmBIF_IOV_FUNC_IDENTIFIER 0x1503 +#define AMDGPU_VF2PF_UPDATE_MAX_RETRY_LIMIT 5 + enum amdgpu_sriov_vf_mode { SRIOV_VF_MODE_BARE_METAL = 0, SRIOV_VF_MODE_ONE_VF, @@ -88,7 +91,8 @@ struct amdgpu_virt_ops { int (*wait_reset)(struct amdgpu_device *adev); void (*trans_msg)(struct amdgpu_device *adev, enum idh_request req, u32 data1, u32 data2, u32 data3); - void (*ras_poison_handler)(struct amdgpu_device *adev); + void (*ras_poison_handler)(struct amdgpu_device *adev, + enum amdgpu_ras_block block); }; /* @@ -126,6 +130,10 @@ enum AMDGIM_FEATURE_FLAG { AMDGIM_FEATURE_INDIRECT_REG_ACCESS = (1 << 5), /* AV1 Support MODE*/ AMDGIM_FEATURE_AV1_SUPPORT = (1 << 6), + /* VCN RB decouple */ + AMDGIM_FEATURE_VCN_RB_DECOUPLE = (1 << 7), + /* MES info */ + AMDGIM_FEATURE_MES_INFO_ENABLE = (1 << 8), }; enum AMDGIM_REG_ACCESS_FLAG { @@ -253,6 +261,7 @@ struct amdgpu_virt { /* vf2pf message */ struct delayed_work vf2pf_work; uint32_t vf2pf_update_interval_ms; + int vf2pf_update_retry_cnt; /* multimedia bandwidth config */ bool is_mm_bw_enabled; @@ -326,11 +335,12 @@ static inline bool is_virtual_machine(void) ((!amdgpu_in_reset(adev)) && (!adev->virt.tdr_debug)) #define amdgpu_sriov_is_av1_support(adev) \ ((adev)->virt.gim_feature & AMDGIM_FEATURE_AV1_SUPPORT) +#define amdgpu_sriov_is_vcn_rb_decouple(adev) \ + ((adev)->virt.gim_feature & AMDGIM_FEATURE_VCN_RB_DECOUPLE) +#define amdgpu_sriov_is_mes_info_enable(adev) \ + ((adev)->virt.gim_feature & AMDGIM_FEATURE_MES_INFO_ENABLE) bool amdgpu_virt_mmio_blocked(struct amdgpu_device *adev); void amdgpu_virt_init_setting(struct amdgpu_device *adev); -void amdgpu_virt_kiq_reg_write_reg_wait(struct amdgpu_device *adev, - uint32_t reg0, uint32_t rreg1, - uint32_t ref, uint32_t mask); int amdgpu_virt_request_full_gpu(struct amdgpu_device *adev, bool init); int amdgpu_virt_release_full_gpu(struct amdgpu_device *adev, bool init); int amdgpu_virt_reset_gpu(struct amdgpu_device *adev); @@ -361,4 +371,9 @@ u32 amdgpu_sriov_rreg(struct amdgpu_device *adev, bool amdgpu_virt_fw_load_skip_check(struct amdgpu_device *adev, uint32_t ucode_id); void amdgpu_virt_post_reset(struct amdgpu_device *adev); +bool amdgpu_sriov_xnack_support(struct amdgpu_device *adev); +bool amdgpu_virt_get_rlcg_reg_access_flag(struct amdgpu_device *adev, + u32 acc_flags, u32 hwip, + bool write, u32 *rlcg_flag); +u32 amdgpu_virt_rlcg_reg_rw(struct amdgpu_device *adev, u32 offset, u32 v, u32 flag, u32 xcc_id); #endif diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vkms.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vkms.c index db6fc0cb18eb..8baa2e0935cc 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vkms.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vkms.c @@ -1,6 +1,7 @@ // SPDX-License-Identifier: GPL-2.0+ #include <drm/drm_atomic_helper.h> +#include <drm/drm_edid.h> #include <drm/drm_simple_kms_helper.h> #include <drm/drm_vblank.h> @@ -659,8 +660,7 @@ static const struct amd_ip_funcs amdgpu_vkms_ip_funcs = { .set_powergating_state = amdgpu_vkms_set_powergating_state, }; -const struct amdgpu_ip_block_version amdgpu_vkms_ip_block = -{ +const struct amdgpu_ip_block_version amdgpu_vkms_ip_block = { .type = AMD_IP_BLOCK_TYPE_DCE, .major = 1, .minor = 0, diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c index f3c9f93d8899..4e2391c83d7c 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c @@ -234,6 +234,22 @@ static void amdgpu_vm_bo_invalidated(struct amdgpu_vm_bo_base *vm_bo) } /** + * amdgpu_vm_bo_evicted_user - vm_bo is evicted + * + * @vm_bo: vm_bo which is evicted + * + * State for BOs used by user mode queues which are not at the location they + * should be. + */ +static void amdgpu_vm_bo_evicted_user(struct amdgpu_vm_bo_base *vm_bo) +{ + vm_bo->moved = true; + spin_lock(&vm_bo->vm->status_lock); + list_move(&vm_bo->vm_status, &vm_bo->vm->evicted_user); + spin_unlock(&vm_bo->vm->status_lock); +} + +/** * amdgpu_vm_bo_relocated - vm_bo is reloacted * * @vm_bo: vm_bo which is relocated @@ -285,6 +301,7 @@ static void amdgpu_vm_bo_reset_state_machine(struct amdgpu_vm *vm) list_for_each_entry_safe(vm_bo, tmp, &vm->idle, vm_status) { struct amdgpu_bo *bo = vm_bo->bo; + vm_bo->moved = true; if (!bo || bo->tbo.type != ttm_bo_type_kernel) list_move(&vm_bo->vm_status, &vm_bo->vm->moved); else if (bo->parent) @@ -426,21 +443,25 @@ uint64_t amdgpu_vm_generation(struct amdgpu_device *adev, struct amdgpu_vm *vm) } /** - * amdgpu_vm_validate_pt_bos - validate the page table BOs + * amdgpu_vm_validate - validate evicted BOs tracked in the VM * * @adev: amdgpu device pointer * @vm: vm providing the BOs + * @ticket: optional reservation ticket used to reserve the VM * @validate: callback to do the validation * @param: parameter for the validation callback * - * Validate the page table BOs on command submission if neccessary. + * Validate the page table BOs and per-VM BOs on command submission if + * necessary. If a ticket is given, also try to validate evicted user queue + * BOs. They must already be reserved with the given ticket. * * Returns: * Validation result. */ -int amdgpu_vm_validate_pt_bos(struct amdgpu_device *adev, struct amdgpu_vm *vm, - int (*validate)(void *p, struct amdgpu_bo *bo), - void *param) +int amdgpu_vm_validate(struct amdgpu_device *adev, struct amdgpu_vm *vm, + struct ww_acquire_ctx *ticket, + int (*validate)(void *p, struct amdgpu_bo *bo), + void *param) { struct amdgpu_vm_bo_base *bo_base; struct amdgpu_bo *shadow; @@ -483,6 +504,34 @@ int amdgpu_vm_validate_pt_bos(struct amdgpu_device *adev, struct amdgpu_vm *vm, } spin_lock(&vm->status_lock); } + while (ticket && !list_empty(&vm->evicted_user)) { + bo_base = list_first_entry(&vm->evicted_user, + struct amdgpu_vm_bo_base, + vm_status); + spin_unlock(&vm->status_lock); + + bo = bo_base->bo; + + if (dma_resv_locking_ctx(bo->tbo.base.resv) != ticket) { + struct amdgpu_task_info *ti = amdgpu_vm_get_task_info_vm(vm); + + pr_warn_ratelimited("Evicted user BO is not reserved\n"); + if (ti) { + pr_warn_ratelimited("pid %d\n", ti->pid); + amdgpu_vm_put_task_info(ti); + } + + return -EINVAL; + } + + r = validate(param, bo); + if (r) + return r; + + amdgpu_vm_bo_invalidated(bo_base); + + spin_lock(&vm->status_lock); + } spin_unlock(&vm->status_lock); amdgpu_vm_eviction_lock(vm); @@ -609,7 +658,7 @@ int amdgpu_vm_flush(struct amdgpu_ring *ring, struct amdgpu_job *job, bool vm_flush_needed = job->vm_needs_flush; struct dma_fence *fence = NULL; bool pasid_mapping_needed = false; - unsigned patch_offset = 0; + unsigned int patch; int r; if (amdgpu_vmid_had_gpu_reset(adev, id)) { @@ -636,7 +685,8 @@ int amdgpu_vm_flush(struct amdgpu_ring *ring, struct amdgpu_job *job, amdgpu_ring_ib_begin(ring); if (ring->funcs->init_cond_exec) - patch_offset = amdgpu_ring_init_cond_exec(ring); + patch = amdgpu_ring_init_cond_exec(ring, + ring->cond_exe_gpu_addr); if (need_pipe_sync) amdgpu_ring_emit_pipeline_sync(ring); @@ -650,7 +700,7 @@ int amdgpu_vm_flush(struct amdgpu_ring *ring, struct amdgpu_job *job, amdgpu_gmc_emit_pasid_mapping(ring, job->vmid, job->pasid); if (spm_update_needed && adev->gfx.rlc.funcs->update_spm_vmid) - adev->gfx.rlc.funcs->update_spm_vmid(adev, job->vmid); + adev->gfx.rlc.funcs->update_spm_vmid(adev, ring, job->vmid); if (!ring->is_mes_queue && ring->funcs->emit_gds_switch && gds_switch_needed) { @@ -684,8 +734,7 @@ int amdgpu_vm_flush(struct amdgpu_ring *ring, struct amdgpu_job *job, } dma_fence_put(fence); - if (ring->funcs->patch_cond_exec) - amdgpu_ring_patch_cond_exec(ring, patch_offset); + amdgpu_ring_patch_cond_exec(ring, patch); /* the double SWITCH_BUFFER here *cannot* be skipped by COND_EXEC */ if (ring->funcs->emit_switch_buffer) { @@ -837,6 +886,44 @@ static void amdgpu_vm_tlb_seq_cb(struct dma_fence *fence, } /** + * amdgpu_vm_tlb_flush - prepare TLB flush + * + * @params: parameters for update + * @fence: input fence to sync TLB flush with + * @tlb_cb: the callback structure + * + * Increments the tlb sequence to make sure that future CS execute a VM flush. + */ +static void +amdgpu_vm_tlb_flush(struct amdgpu_vm_update_params *params, + struct dma_fence **fence, + struct amdgpu_vm_tlb_seq_struct *tlb_cb) +{ + struct amdgpu_vm *vm = params->vm; + + if (!fence || !*fence) + return; + + tlb_cb->vm = vm; + if (!dma_fence_add_callback(*fence, &tlb_cb->cb, + amdgpu_vm_tlb_seq_cb)) { + dma_fence_put(vm->last_tlb_flush); + vm->last_tlb_flush = dma_fence_get(*fence); + } else { + amdgpu_vm_tlb_seq_cb(NULL, &tlb_cb->cb); + } + + /* Prepare a TLB flush fence to be attached to PTs */ + if (!params->unlocked && vm->is_compute_context) { + amdgpu_vm_tlb_fence_create(params->adev, vm, fence); + + /* Makes sure no PD/PT is freed before the flush */ + dma_resv_add_fence(vm->root.bo->tbo.base.resv, *fence, + DMA_RESV_USAGE_BOOKKEEP); + } +} + +/** * amdgpu_vm_update_range - update a range in the vm page table * * @adev: amdgpu_device pointer to use for commands @@ -844,6 +931,7 @@ static void amdgpu_vm_tlb_seq_cb(struct dma_fence *fence, * @immediate: immediate submission in a page fault * @unlocked: unlocked invalidation during MM callback * @flush_tlb: trigger tlb invalidation after update completed + * @allow_override: change MTYPE for local NUMA nodes * @resv: fences we need to sync to * @start: start of mapped range * @last: last mapped entry @@ -860,14 +948,14 @@ static void amdgpu_vm_tlb_seq_cb(struct dma_fence *fence, * 0 for success, negative erro code for failure. */ int amdgpu_vm_update_range(struct amdgpu_device *adev, struct amdgpu_vm *vm, - bool immediate, bool unlocked, bool flush_tlb, + bool immediate, bool unlocked, bool flush_tlb, bool allow_override, struct dma_resv *resv, uint64_t start, uint64_t last, uint64_t flags, uint64_t offset, uint64_t vram_base, struct ttm_resource *res, dma_addr_t *pages_addr, struct dma_fence **fence) { - struct amdgpu_vm_update_params params; struct amdgpu_vm_tlb_seq_struct *tlb_cb; + struct amdgpu_vm_update_params params; struct amdgpu_res_cursor cursor; enum amdgpu_sync_mode sync_mode; int r, idx; @@ -877,8 +965,8 @@ int amdgpu_vm_update_range(struct amdgpu_device *adev, struct amdgpu_vm *vm, tlb_cb = kmalloc(sizeof(*tlb_cb), GFP_KERNEL); if (!tlb_cb) { - r = -ENOMEM; - goto error_unlock; + drm_dev_exit(idx); + return -ENOMEM; } /* Vega20+XGMI where PTEs get inadvertently cached in L2 texture cache, @@ -898,6 +986,9 @@ int amdgpu_vm_update_range(struct amdgpu_device *adev, struct amdgpu_vm *vm, params.immediate = immediate; params.pages_addr = pages_addr; params.unlocked = unlocked; + params.needs_flush = flush_tlb; + params.allow_override = allow_override; + INIT_LIST_HEAD(¶ms.tlb_flush_waitlist); /* Implicitly sync to command submissions in the same VM before * unmapping. Sync to moving fences before mapping. @@ -980,24 +1071,18 @@ int amdgpu_vm_update_range(struct amdgpu_device *adev, struct amdgpu_vm *vm, } r = vm->update_funcs->commit(¶ms, fence); + if (r) + goto error_free; - if (flush_tlb || params.table_freed) { - tlb_cb->vm = vm; - if (fence && *fence && - !dma_fence_add_callback(*fence, &tlb_cb->cb, - amdgpu_vm_tlb_seq_cb)) { - dma_fence_put(vm->last_tlb_flush); - vm->last_tlb_flush = dma_fence_get(*fence); - } else { - amdgpu_vm_tlb_seq_cb(NULL, &tlb_cb->cb); - } + if (params.needs_flush) { + amdgpu_vm_tlb_flush(¶ms, fence, tlb_cb); tlb_cb = NULL; } + amdgpu_vm_pt_free_list(adev, ¶ms); + error_free: kfree(tlb_cb); - -error_unlock: amdgpu_vm_eviction_unlock(vm); drm_dev_exit(idx); return r; @@ -1073,6 +1158,7 @@ int amdgpu_vm_bo_update(struct amdgpu_device *adev, struct amdgpu_bo_va *bo_va, struct ttm_resource *mem; struct dma_fence **last_update; bool flush_tlb = clear; + bool uncached; struct dma_resv *resv; uint64_t vram_base; uint64_t flags; @@ -1095,8 +1181,8 @@ int amdgpu_vm_bo_update(struct amdgpu_device *adev, struct amdgpu_bo_va *bo_va, bo = gem_to_amdgpu_bo(gobj); } mem = bo->tbo.resource; - if (mem->mem_type == TTM_PL_TT || - mem->mem_type == AMDGPU_PL_PREEMPT) + if (mem && (mem->mem_type == TTM_PL_TT || + mem->mem_type == AMDGPU_PL_PREEMPT)) pages_addr = bo->tbo.ttm->dma_address; } @@ -1110,9 +1196,11 @@ int amdgpu_vm_bo_update(struct amdgpu_device *adev, struct amdgpu_bo_va *bo_va, bo_adev = amdgpu_ttm_adev(bo->tbo.bdev); vram_base = bo_adev->vm_manager.vram_base_offset; + uncached = (bo->flags & AMDGPU_GEM_CREATE_UNCACHED) != 0; } else { flags = 0x0; vram_base = 0; + uncached = false; } if (clear || (bo && bo->tbo.base.resv == @@ -1146,7 +1234,7 @@ int amdgpu_vm_bo_update(struct amdgpu_device *adev, struct amdgpu_bo_va *bo_va, trace_amdgpu_vm_bo_update(mapping); r = amdgpu_vm_update_range(adev, vm, false, false, flush_tlb, - resv, mapping->start, mapping->last, + !uncached, resv, mapping->start, mapping->last, update_flags, mapping->offset, vram_base, mem, pages_addr, last_update); @@ -1337,12 +1425,8 @@ int amdgpu_vm_clear_freed(struct amdgpu_device *adev, struct amdgpu_bo_va_mapping, list); list_del(&mapping->list); - if (vm->pte_support_ats && - mapping->start < AMDGPU_GMC_HOLE_START) - init_pte_value = AMDGPU_PTE_DEFAULT_ATC; - - r = amdgpu_vm_update_range(adev, vm, false, false, true, resv, - mapping->start, mapping->last, + r = amdgpu_vm_update_range(adev, vm, false, false, true, false, + resv, mapping->start, mapping->last, init_pte_value, 0, 0, NULL, NULL, &f); amdgpu_vm_free_mapping(adev, vm, mapping, f); @@ -1368,6 +1452,7 @@ int amdgpu_vm_clear_freed(struct amdgpu_device *adev, * * @adev: amdgpu_device pointer * @vm: requested vm + * @ticket: optional reservation ticket used to reserve the VM * * Make sure all BOs which are moved are updated in the PTs. * @@ -1377,11 +1462,12 @@ int amdgpu_vm_clear_freed(struct amdgpu_device *adev, * PTs have to be reserved! */ int amdgpu_vm_handle_moved(struct amdgpu_device *adev, - struct amdgpu_vm *vm) + struct amdgpu_vm *vm, + struct ww_acquire_ctx *ticket) { struct amdgpu_bo_va *bo_va; struct dma_resv *resv; - bool clear; + bool clear, unlock; int r; spin_lock(&vm->status_lock); @@ -1404,18 +1490,35 @@ int amdgpu_vm_handle_moved(struct amdgpu_device *adev, spin_unlock(&vm->status_lock); /* Try to reserve the BO to avoid clearing its ptes */ - if (!adev->debug_vm && dma_resv_trylock(resv)) + if (!adev->debug_vm && dma_resv_trylock(resv)) { clear = false; + unlock = true; + /* The caller is already holding the reservation lock */ + } else if (ticket && dma_resv_locking_ctx(resv) == ticket) { + clear = false; + unlock = false; /* Somebody else is using the BO right now */ - else + } else { clear = true; + unlock = false; + } r = amdgpu_vm_bo_update(adev, bo_va, clear); + + if (unlock) + dma_resv_unlock(resv); if (r) return r; - if (!clear) - dma_resv_unlock(resv); + /* Remember evicted DMABuf imports in compute VMs for later + * validation + */ + if (vm->is_compute_context && + bo_va->base.bo->tbo.base.import_attach && + (!bo_va->base.bo->tbo.resource || + bo_va->base.bo->tbo.resource->mem_type == TTM_PL_SYSTEM)) + amdgpu_vm_bo_evicted_user(&bo_va->base); + spin_lock(&vm->status_lock); } spin_unlock(&vm->status_lock); @@ -1424,6 +1527,51 @@ int amdgpu_vm_handle_moved(struct amdgpu_device *adev, } /** + * amdgpu_vm_flush_compute_tlb - Flush TLB on compute VM + * + * @adev: amdgpu_device pointer + * @vm: requested vm + * @flush_type: flush type + * @xcc_mask: mask of XCCs that belong to the compute partition in need of a TLB flush. + * + * Flush TLB if needed for a compute VM. + * + * Returns: + * 0 for success. + */ +int amdgpu_vm_flush_compute_tlb(struct amdgpu_device *adev, + struct amdgpu_vm *vm, + uint32_t flush_type, + uint32_t xcc_mask) +{ + uint64_t tlb_seq = amdgpu_vm_tlb_seq(vm); + bool all_hub = false; + int xcc = 0, r = 0; + + WARN_ON_ONCE(!vm->is_compute_context); + + /* + * It can be that we race and lose here, but that is extremely unlikely + * and the worst thing which could happen is that we flush the changes + * into the TLB once more which is harmless. + */ + if (atomic64_xchg(&vm->kfd_last_flushed_seq, tlb_seq) == tlb_seq) + return 0; + + if (adev->family == AMDGPU_FAMILY_AI || + adev->family == AMDGPU_FAMILY_RV) + all_hub = true; + + for_each_inst(xcc, xcc_mask) { + r = amdgpu_gmc_flush_gpu_tlb_pasid(adev, vm->pasid, flush_type, + all_hub, xcc); + if (r) + break; + } + return r; +} + +/** * amdgpu_vm_bo_add - add a bo to a specific vm * * @adev: amdgpu_device pointer @@ -1499,6 +1647,37 @@ static void amdgpu_vm_bo_insert_map(struct amdgpu_device *adev, trace_amdgpu_vm_bo_map(bo_va, mapping); } +/* Validate operation parameters to prevent potential abuse */ +static int amdgpu_vm_verify_parameters(struct amdgpu_device *adev, + struct amdgpu_bo *bo, + uint64_t saddr, + uint64_t offset, + uint64_t size) +{ + uint64_t tmp, lpfn; + + if (saddr & AMDGPU_GPU_PAGE_MASK + || offset & AMDGPU_GPU_PAGE_MASK + || size & AMDGPU_GPU_PAGE_MASK) + return -EINVAL; + + if (check_add_overflow(saddr, size, &tmp) + || check_add_overflow(offset, size, &tmp) + || size == 0 /* which also leads to end < begin */) + return -EINVAL; + + /* make sure object fit at this offset */ + if (bo && offset + size > amdgpu_bo_size(bo)) + return -EINVAL; + + /* Ensure last pfn not exceed max_pfn */ + lpfn = (saddr + size - 1) >> AMDGPU_GPU_PAGE_SHIFT; + if (lpfn >= adev->vm_manager.max_pfn) + return -EINVAL; + + return 0; +} + /** * amdgpu_vm_bo_map - map bo inside a vm * @@ -1525,21 +1704,14 @@ int amdgpu_vm_bo_map(struct amdgpu_device *adev, struct amdgpu_bo *bo = bo_va->base.bo; struct amdgpu_vm *vm = bo_va->base.vm; uint64_t eaddr; + int r; - /* validate the parameters */ - if (saddr & ~PAGE_MASK || offset & ~PAGE_MASK || size & ~PAGE_MASK) - return -EINVAL; - if (saddr + size <= saddr || offset + size <= offset) - return -EINVAL; - - /* make sure object fit at this offset */ - eaddr = saddr + size - 1; - if ((bo && offset + size > amdgpu_bo_size(bo)) || - (eaddr >= adev->vm_manager.max_pfn << AMDGPU_GPU_PAGE_SHIFT)) - return -EINVAL; + r = amdgpu_vm_verify_parameters(adev, bo, saddr, offset, size); + if (r) + return r; saddr /= AMDGPU_GPU_PAGE_SIZE; - eaddr /= AMDGPU_GPU_PAGE_SIZE; + eaddr = saddr + (size - 1) / AMDGPU_GPU_PAGE_SIZE; tmp = amdgpu_vm_it_iter_first(&vm->va, saddr, eaddr); if (tmp) { @@ -1592,17 +1764,9 @@ int amdgpu_vm_bo_replace_map(struct amdgpu_device *adev, uint64_t eaddr; int r; - /* validate the parameters */ - if (saddr & ~PAGE_MASK || offset & ~PAGE_MASK || size & ~PAGE_MASK) - return -EINVAL; - if (saddr + size <= saddr || offset + size <= offset) - return -EINVAL; - - /* make sure object fit at this offset */ - eaddr = saddr + size - 1; - if ((bo && offset + size > amdgpu_bo_size(bo)) || - (eaddr >= adev->vm_manager.max_pfn << AMDGPU_GPU_PAGE_SHIFT)) - return -EINVAL; + r = amdgpu_vm_verify_parameters(adev, bo, saddr, offset, size); + if (r) + return r; /* Allocate all the needed memory */ mapping = kmalloc(sizeof(*mapping), GFP_KERNEL); @@ -1616,7 +1780,7 @@ int amdgpu_vm_bo_replace_map(struct amdgpu_device *adev, } saddr /= AMDGPU_GPU_PAGE_SIZE; - eaddr /= AMDGPU_GPU_PAGE_SIZE; + eaddr = saddr + (size - 1) / AMDGPU_GPU_PAGE_SIZE; mapping->start = saddr; mapping->last = eaddr; @@ -1703,10 +1867,14 @@ int amdgpu_vm_bo_clear_mappings(struct amdgpu_device *adev, struct amdgpu_bo_va_mapping *before, *after, *tmp, *next; LIST_HEAD(removed); uint64_t eaddr; + int r; + + r = amdgpu_vm_verify_parameters(adev, NULL, saddr, 0, size); + if (r) + return r; - eaddr = saddr + size - 1; saddr /= AMDGPU_GPU_PAGE_SIZE; - eaddr /= AMDGPU_GPU_PAGE_SIZE; + eaddr = saddr + (size - 1) / AMDGPU_GPU_PAGE_SIZE; /* Allocate all the needed memory */ before = kzalloc(sizeof(*before), GFP_KERNEL); @@ -2113,6 +2281,108 @@ long amdgpu_vm_wait_idle(struct amdgpu_vm *vm, long timeout) return dma_fence_wait_timeout(vm->last_unlocked, true, timeout); } +static void amdgpu_vm_destroy_task_info(struct kref *kref) +{ + struct amdgpu_task_info *ti = container_of(kref, struct amdgpu_task_info, refcount); + + kfree(ti); +} + +static inline struct amdgpu_vm * +amdgpu_vm_get_vm_from_pasid(struct amdgpu_device *adev, u32 pasid) +{ + struct amdgpu_vm *vm; + unsigned long flags; + + xa_lock_irqsave(&adev->vm_manager.pasids, flags); + vm = xa_load(&adev->vm_manager.pasids, pasid); + xa_unlock_irqrestore(&adev->vm_manager.pasids, flags); + + return vm; +} + +/** + * amdgpu_vm_put_task_info - reference down the vm task_info ptr + * + * @task_info: task_info struct under discussion. + * + * frees the vm task_info ptr at the last put + */ +void amdgpu_vm_put_task_info(struct amdgpu_task_info *task_info) +{ + kref_put(&task_info->refcount, amdgpu_vm_destroy_task_info); +} + +/** + * amdgpu_vm_get_task_info_vm - Extracts task info for a vm. + * + * @vm: VM to get info from + * + * Returns the reference counted task_info structure, which must be + * referenced down with amdgpu_vm_put_task_info. + */ +struct amdgpu_task_info * +amdgpu_vm_get_task_info_vm(struct amdgpu_vm *vm) +{ + struct amdgpu_task_info *ti = NULL; + + if (vm) { + ti = vm->task_info; + kref_get(&vm->task_info->refcount); + } + + return ti; +} + +/** + * amdgpu_vm_get_task_info_pasid - Extracts task info for a PASID. + * + * @adev: drm device pointer + * @pasid: PASID identifier for VM + * + * Returns the reference counted task_info structure, which must be + * referenced down with amdgpu_vm_put_task_info. + */ +struct amdgpu_task_info * +amdgpu_vm_get_task_info_pasid(struct amdgpu_device *adev, u32 pasid) +{ + return amdgpu_vm_get_task_info_vm( + amdgpu_vm_get_vm_from_pasid(adev, pasid)); +} + +static int amdgpu_vm_create_task_info(struct amdgpu_vm *vm) +{ + vm->task_info = kzalloc(sizeof(struct amdgpu_task_info), GFP_KERNEL); + if (!vm->task_info) + return -ENOMEM; + + kref_init(&vm->task_info->refcount); + return 0; +} + +/** + * amdgpu_vm_set_task_info - Sets VMs task info. + * + * @vm: vm for which to set the info + */ +void amdgpu_vm_set_task_info(struct amdgpu_vm *vm) +{ + if (!vm->task_info) + return; + + if (vm->task_info->pid == current->pid) + return; + + vm->task_info->pid = current->pid; + get_task_comm(vm->task_info->task_name, current); + + if (current->group_leader->mm != current->mm) + return; + + vm->task_info->tgid = current->group_leader->pid; + get_task_comm(vm->task_info->process_name, current->group_leader); +} + /** * amdgpu_vm_init - initialize a vm instance * @@ -2125,7 +2395,8 @@ long amdgpu_vm_wait_idle(struct amdgpu_vm *vm, long timeout) * Returns: * 0 for success, error for failure. */ -int amdgpu_vm_init(struct amdgpu_device *adev, struct amdgpu_vm *vm, int32_t xcp_id) +int amdgpu_vm_init(struct amdgpu_device *adev, struct amdgpu_vm *vm, + int32_t xcp_id) { struct amdgpu_bo *root_bo; struct amdgpu_bo_vm *root; @@ -2135,6 +2406,7 @@ int amdgpu_vm_init(struct amdgpu_device *adev, struct amdgpu_vm *vm, int32_t xcp for (i = 0; i < AMDGPU_MAX_VMHUBS; i++) vm->reserved_vmid[i] = NULL; INIT_LIST_HEAD(&vm->evicted); + INIT_LIST_HEAD(&vm->evicted_user); INIT_LIST_HEAD(&vm->relocated); INIT_LIST_HEAD(&vm->moved); INIT_LIST_HEAD(&vm->idle); @@ -2144,12 +2416,12 @@ int amdgpu_vm_init(struct amdgpu_device *adev, struct amdgpu_vm *vm, int32_t xcp INIT_LIST_HEAD(&vm->done); INIT_LIST_HEAD(&vm->pt_freed); INIT_WORK(&vm->pt_free_work, amdgpu_vm_pt_free_work); + INIT_KFIFO(vm->faults); r = amdgpu_vm_init_entities(adev, vm); if (r) return r; - vm->pte_support_ats = false; vm->is_compute_context = false; vm->use_cpu_for_update = !!(adev->vm_manager.vm_update_mode & @@ -2173,39 +2445,43 @@ int amdgpu_vm_init(struct amdgpu_device *adev, struct amdgpu_vm *vm, int32_t xcp mutex_init(&vm->eviction_lock); vm->evicting = false; + vm->tlb_fence_context = dma_fence_context_alloc(1); r = amdgpu_vm_pt_create(adev, vm, adev->vm_manager.root_level, false, &root, xcp_id); if (r) goto error_free_delayed; - root_bo = &root->bo; + + root_bo = amdgpu_bo_ref(&root->bo); r = amdgpu_bo_reserve(root_bo, true); - if (r) - goto error_free_root; + if (r) { + amdgpu_bo_unref(&root->shadow); + amdgpu_bo_unref(&root_bo); + goto error_free_delayed; + } + amdgpu_vm_bo_base_init(&vm->root, vm, root_bo); r = dma_resv_reserve_fences(root_bo->tbo.base.resv, 1); if (r) - goto error_unreserve; - - amdgpu_vm_bo_base_init(&vm->root, vm, root_bo); + goto error_free_root; r = amdgpu_vm_pt_clear(adev, vm, root, false); if (r) - goto error_unreserve; + goto error_free_root; - amdgpu_bo_unreserve(vm->root.bo); + r = amdgpu_vm_create_task_info(vm); + if (r) + DRM_DEBUG("Failed to create task info for VM\n"); - INIT_KFIFO(vm->faults); + amdgpu_bo_unreserve(vm->root.bo); + amdgpu_bo_unref(&root_bo); return 0; -error_unreserve: - amdgpu_bo_unreserve(vm->root.bo); - error_free_root: - amdgpu_bo_unref(&root->shadow); + amdgpu_vm_pt_free_root(adev, vm); + amdgpu_bo_unreserve(vm->root.bo); amdgpu_bo_unref(&root_bo); - vm->root.bo = NULL; error_free_delayed: dma_fence_put(vm->last_tlb_flush); @@ -2236,30 +2512,12 @@ error_free_delayed: */ int amdgpu_vm_make_compute(struct amdgpu_device *adev, struct amdgpu_vm *vm) { - bool pte_support_ats = (adev->asic_type == CHIP_RAVEN); int r; r = amdgpu_bo_reserve(vm->root.bo, true); if (r) return r; - /* Check if PD needs to be reinitialized and do it before - * changing any other state, in case it fails. - */ - if (pte_support_ats != vm->pte_support_ats) { - /* Sanity checks */ - if (!amdgpu_vm_pt_is_root_clean(adev, vm)) { - r = -EINVAL; - goto unreserve_bo; - } - - vm->pte_support_ats = pte_support_ats; - r = amdgpu_vm_pt_clear(adev, vm, to_amdgpu_bo_vm(vm->root.bo), - false); - if (r) - goto unreserve_bo; - } - /* Update VM state */ vm->use_cpu_for_update = !!(adev->vm_manager.vm_update_mode & AMDGPU_VM_USE_CPU_FOR_COMPUTE); @@ -2336,6 +2594,7 @@ void amdgpu_vm_fini(struct amdgpu_device *adev, struct amdgpu_vm *vm) root = amdgpu_bo_ref(vm->root.bo); amdgpu_bo_reserve(root, true); + amdgpu_vm_put_task_info(vm->task_info); amdgpu_vm_set_pasid(adev, vm, 0); dma_fence_wait(vm->last_unlocked, false); dma_fence_put(vm->last_unlocked); @@ -2493,48 +2752,6 @@ int amdgpu_vm_ioctl(struct drm_device *dev, void *data, struct drm_file *filp) } /** - * amdgpu_vm_get_task_info - Extracts task info for a PASID. - * - * @adev: drm device pointer - * @pasid: PASID identifier for VM - * @task_info: task_info to fill. - */ -void amdgpu_vm_get_task_info(struct amdgpu_device *adev, u32 pasid, - struct amdgpu_task_info *task_info) -{ - struct amdgpu_vm *vm; - unsigned long flags; - - xa_lock_irqsave(&adev->vm_manager.pasids, flags); - - vm = xa_load(&adev->vm_manager.pasids, pasid); - if (vm) - *task_info = vm->task_info; - - xa_unlock_irqrestore(&adev->vm_manager.pasids, flags); -} - -/** - * amdgpu_vm_set_task_info - Sets VMs task info. - * - * @vm: vm for which to set the info - */ -void amdgpu_vm_set_task_info(struct amdgpu_vm *vm) -{ - if (vm->task_info.pid) - return; - - vm->task_info.pid = current->pid; - get_task_comm(vm->task_info.task_name, current); - - if (current->group_leader->mm != current->mm) - return; - - vm->task_info.tgid = current->group_leader->pid; - get_task_comm(vm->task_info.process_name, current->group_leader); -} - -/** * amdgpu_vm_handle_fault - graceful handling of VM faults. * @adev: amdgpu device pointer * @pasid: PASID of the VM @@ -2618,8 +2835,8 @@ bool amdgpu_vm_handle_fault(struct amdgpu_device *adev, u32 pasid, goto error_unlock; } - r = amdgpu_vm_update_range(adev, vm, true, false, false, NULL, addr, - addr, flags, value, 0, NULL, NULL, NULL); + r = amdgpu_vm_update_range(adev, vm, true, false, false, false, + NULL, addr, addr, flags, value, 0, NULL, NULL, NULL); if (r) goto error_unlock; @@ -2762,6 +2979,14 @@ void amdgpu_vm_update_fault_cache(struct amdgpu_device *adev, if (vm && status) { vm->fault_info.addr = addr; vm->fault_info.status = status; + /* + * Update the fault information globally for later usage + * when vm could be stale or freed. + */ + adev->vm_manager.fault_info.addr = addr; + adev->vm_manager.fault_info.vmhub = vmhub; + adev->vm_manager.fault_info.status = status; + if (AMDGPU_IS_GFXHUB(vmhub)) { vm->fault_info.vmhub = AMDGPU_VMHUB_TYPE_GFX; vm->fault_info.vmhub |= diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h index 411d42fecfb6..54d7da396de0 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h @@ -116,7 +116,7 @@ struct amdgpu_mem_stats; #define AMDGPU_VM_FAULT_STOP_FIRST 1 #define AMDGPU_VM_FAULT_STOP_ALWAYS 2 -/* Reserve 4MB VRAM for page tables */ +/* How much VRAM be reserved for page tables */ #define AMDGPU_VM_RESERVED_VRAM (8ULL << 20) /* @@ -135,8 +135,21 @@ struct amdgpu_mem_stats; #define AMDGPU_IS_MMHUB0(x) ((x) >= AMDGPU_MMHUB0_START && (x) < AMDGPU_MMHUB1_START) #define AMDGPU_IS_MMHUB1(x) ((x) >= AMDGPU_MMHUB1_START && (x) < AMDGPU_MAX_VMHUBS) -/* Reserve 2MB at top/bottom of address space for kernel use */ -#define AMDGPU_VA_RESERVED_SIZE (2ULL << 20) +/* Reserve space at top/bottom of address space for kernel use */ +#define AMDGPU_VA_RESERVED_CSA_SIZE (2ULL << 20) +#define AMDGPU_VA_RESERVED_CSA_START(adev) (((adev)->vm_manager.max_pfn \ + << AMDGPU_GPU_PAGE_SHIFT) \ + - AMDGPU_VA_RESERVED_CSA_SIZE) +#define AMDGPU_VA_RESERVED_SEQ64_SIZE (2ULL << 20) +#define AMDGPU_VA_RESERVED_SEQ64_START(adev) (AMDGPU_VA_RESERVED_CSA_START(adev) \ + - AMDGPU_VA_RESERVED_SEQ64_SIZE) +#define AMDGPU_VA_RESERVED_TRAP_SIZE (2ULL << 12) +#define AMDGPU_VA_RESERVED_TRAP_START(adev) (AMDGPU_VA_RESERVED_SEQ64_START(adev) \ + - AMDGPU_VA_RESERVED_TRAP_SIZE) +#define AMDGPU_VA_RESERVED_BOTTOM (1ULL << 16) +#define AMDGPU_VA_RESERVED_TOP (AMDGPU_VA_RESERVED_TRAP_SIZE + \ + AMDGPU_VA_RESERVED_SEQ64_SIZE + \ + AMDGPU_VA_RESERVED_CSA_SIZE) /* See vm_update_mode */ #define AMDGPU_VM_USE_CPU_FOR_GFX (1 << 0) @@ -190,10 +203,11 @@ struct amdgpu_vm_pte_funcs { }; struct amdgpu_task_info { - char process_name[TASK_COMM_LEN]; - char task_name[TASK_COMM_LEN]; - pid_t pid; - pid_t tgid; + char process_name[TASK_COMM_LEN]; + char task_name[TASK_COMM_LEN]; + pid_t pid; + pid_t tgid; + struct kref refcount; }; /** @@ -243,9 +257,20 @@ struct amdgpu_vm_update_params { unsigned int num_dw_left; /** - * @table_freed: return true if page table is freed when updating + * @needs_flush: true whenever we need to invalidate the TLB */ - bool table_freed; + bool needs_flush; + + /** + * @allow_override: true for memory that is not uncached: allows MTYPE + * to be overridden for NUMA local memory. + */ + bool allow_override; + + /** + * @tlb_flush_waitlist: temporary storage for BOs until tlb_flush + */ + struct list_head tlb_flush_waitlist; }; struct amdgpu_vm_update_funcs { @@ -282,9 +307,12 @@ struct amdgpu_vm { /* Lock to protect vm_bo add/del/move on all lists of vm */ spinlock_t status_lock; - /* BOs who needs a validation */ + /* Per-VM and PT BOs who needs a validation */ struct list_head evicted; + /* BOs for user mode queues that need a validation */ + struct list_head evicted_user; + /* PT BOs which relocated and their parent need an update */ struct list_head relocated; @@ -318,6 +346,8 @@ struct amdgpu_vm { /* Last finished delayed update */ atomic64_t tlb_seq; struct dma_fence *last_tlb_flush; + atomic64_t kfd_last_flushed_seq; + uint64_t tlb_fence_context; /* How many times we had to re-generate the page tables */ uint64_t generation; @@ -334,9 +364,6 @@ struct amdgpu_vm { /* Functions to use for VM table updates */ const struct amdgpu_vm_update_funcs *update_funcs; - /* Flag to indicate ATS support from PTE for GFX9 */ - bool pte_support_ats; - /* Up to 128 pending retry page faults */ DECLARE_KFIFO(faults, u64, 128); @@ -350,7 +377,7 @@ struct amdgpu_vm { uint64_t pd_phys_addr; /* Some basic info about the task */ - struct amdgpu_task_info task_info; + struct amdgpu_task_info *task_info; /* Store positions of group of BOs */ struct ttm_lru_bulk_move lru_bulk_move; @@ -401,6 +428,8 @@ struct amdgpu_vm_manager { * look up VM of a page fault */ struct xarray pasids; + /* Global registration of recent page fault information */ + struct amdgpu_vm_fault_info fault_info; }; struct amdgpu_bo_va_mapping; @@ -427,9 +456,10 @@ int amdgpu_vm_lock_pd(struct amdgpu_vm *vm, struct drm_exec *exec, unsigned int num_fences); bool amdgpu_vm_ready(struct amdgpu_vm *vm); uint64_t amdgpu_vm_generation(struct amdgpu_device *adev, struct amdgpu_vm *vm); -int amdgpu_vm_validate_pt_bos(struct amdgpu_device *adev, struct amdgpu_vm *vm, - int (*callback)(void *p, struct amdgpu_bo *bo), - void *param); +int amdgpu_vm_validate(struct amdgpu_device *adev, struct amdgpu_vm *vm, + struct ww_acquire_ctx *ticket, + int (*callback)(void *p, struct amdgpu_bo *bo), + void *param); int amdgpu_vm_flush(struct amdgpu_ring *ring, struct amdgpu_job *job, bool need_pipe_sync); int amdgpu_vm_update_pdes(struct amdgpu_device *adev, struct amdgpu_vm *vm, bool immediate); @@ -437,11 +467,16 @@ int amdgpu_vm_clear_freed(struct amdgpu_device *adev, struct amdgpu_vm *vm, struct dma_fence **fence); int amdgpu_vm_handle_moved(struct amdgpu_device *adev, - struct amdgpu_vm *vm); + struct amdgpu_vm *vm, + struct ww_acquire_ctx *ticket); +int amdgpu_vm_flush_compute_tlb(struct amdgpu_device *adev, + struct amdgpu_vm *vm, + uint32_t flush_type, + uint32_t xcc_mask); void amdgpu_vm_bo_base_init(struct amdgpu_vm_bo_base *base, struct amdgpu_vm *vm, struct amdgpu_bo *bo); int amdgpu_vm_update_range(struct amdgpu_device *adev, struct amdgpu_vm *vm, - bool immediate, bool unlocked, bool flush_tlb, + bool immediate, bool unlocked, bool flush_tlb, bool allow_override, struct dma_resv *resv, uint64_t start, uint64_t last, uint64_t flags, uint64_t offset, uint64_t vram_base, struct ttm_resource *res, dma_addr_t *pages_addr, @@ -485,8 +520,14 @@ bool amdgpu_vm_need_pipeline_sync(struct amdgpu_ring *ring, struct amdgpu_job *job); void amdgpu_vm_check_compute_bug(struct amdgpu_device *adev); -void amdgpu_vm_get_task_info(struct amdgpu_device *adev, u32 pasid, - struct amdgpu_task_info *task_info); +struct amdgpu_task_info * +amdgpu_vm_get_task_info_pasid(struct amdgpu_device *adev, u32 pasid); + +struct amdgpu_task_info * +amdgpu_vm_get_task_info_vm(struct amdgpu_vm *vm); + +void amdgpu_vm_put_task_info(struct amdgpu_task_info *task_info); + bool amdgpu_vm_handle_fault(struct amdgpu_device *adev, u32 pasid, u32 vmid, u32 node_id, uint64_t addr, bool write_fault); @@ -504,8 +545,6 @@ int amdgpu_vm_pt_create(struct amdgpu_device *adev, struct amdgpu_vm *vm, int level, bool immediate, struct amdgpu_bo_vm **vmbo, int32_t xcp_id); void amdgpu_vm_pt_free_root(struct amdgpu_device *adev, struct amdgpu_vm *vm); -bool amdgpu_vm_pt_is_root_clean(struct amdgpu_device *adev, - struct amdgpu_vm *vm); int amdgpu_vm_pde_update(struct amdgpu_vm_update_params *params, struct amdgpu_vm_bo_base *entry); @@ -513,6 +552,8 @@ int amdgpu_vm_ptes_update(struct amdgpu_vm_update_params *params, uint64_t start, uint64_t end, uint64_t dst, uint64_t flags); void amdgpu_vm_pt_free_work(struct work_struct *work); +void amdgpu_vm_pt_free_list(struct amdgpu_device *adev, + struct amdgpu_vm_update_params *params); #if defined(CONFIG_DEBUG_FS) void amdgpu_debugfs_vm_bo_info(struct amdgpu_vm *vm, struct seq_file *m); @@ -578,5 +619,8 @@ void amdgpu_vm_update_fault_cache(struct amdgpu_device *adev, uint64_t addr, uint32_t status, unsigned int vmhub); +void amdgpu_vm_tlb_fence_create(struct amdgpu_device *adev, + struct amdgpu_vm *vm, + struct dma_fence **fence); #endif diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm_cpu.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm_cpu.c index 6e31621452de..3895bd7d176a 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm_cpu.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm_cpu.c @@ -108,7 +108,9 @@ static int amdgpu_vm_cpu_update(struct amdgpu_vm_update_params *p, static int amdgpu_vm_cpu_commit(struct amdgpu_vm_update_params *p, struct dma_fence **fence) { - /* Flush HDP */ + if (p->needs_flush) + atomic64_inc(&p->vm->tlb_seq); + mb(); amdgpu_device_flush_hdp(p->adev, NULL); return 0; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm_pt.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm_pt.c index 9b025fd17b84..7fdd306a48a0 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm_pt.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm_pt.c @@ -90,22 +90,6 @@ static unsigned int amdgpu_vm_pt_num_entries(struct amdgpu_device *adev, } /** - * amdgpu_vm_pt_num_ats_entries - return the number of ATS entries in the root PD - * - * @adev: amdgpu_device pointer - * - * Returns: - * The number of entries in the root page directory which needs the ATS setting. - */ -static unsigned int amdgpu_vm_pt_num_ats_entries(struct amdgpu_device *adev) -{ - unsigned int shift; - - shift = amdgpu_vm_pt_level_shift(adev, adev->vm_manager.root_level); - return AMDGPU_GMC_HOLE_START >> (shift + AMDGPU_GPU_PAGE_SHIFT); -} - -/** * amdgpu_vm_pt_entries_mask - the mask to get the entry number of a PD/PT * * @adev: amdgpu_device pointer @@ -379,7 +363,7 @@ int amdgpu_vm_pt_clear(struct amdgpu_device *adev, struct amdgpu_vm *vm, struct ttm_operation_ctx ctx = { true, false }; struct amdgpu_vm_update_params params; struct amdgpu_bo *ancestor = &vmbo->bo; - unsigned int entries, ats_entries; + unsigned int entries; struct amdgpu_bo *bo = &vmbo->bo; uint64_t addr; int r, idx; @@ -394,27 +378,6 @@ int amdgpu_vm_pt_clear(struct amdgpu_device *adev, struct amdgpu_vm *vm, } entries = amdgpu_bo_size(bo) / 8; - if (!vm->pte_support_ats) { - ats_entries = 0; - - } else if (!bo->parent) { - ats_entries = amdgpu_vm_pt_num_ats_entries(adev); - ats_entries = min(ats_entries, entries); - entries -= ats_entries; - - } else { - struct amdgpu_vm_bo_base *pt; - - pt = ancestor->vm_bo; - ats_entries = amdgpu_vm_pt_num_ats_entries(adev); - if ((pt - to_amdgpu_bo_vm(vm->root.bo)->entries) >= - ats_entries) { - ats_entries = 0; - } else { - ats_entries = entries; - entries = 0; - } - } r = ttm_bo_validate(&bo->tbo, &bo->placement, &ctx); if (r) @@ -445,44 +408,24 @@ int amdgpu_vm_pt_clear(struct amdgpu_device *adev, struct amdgpu_vm *vm, goto exit; addr = 0; - if (ats_entries) { - uint64_t value = 0, flags; - flags = AMDGPU_PTE_DEFAULT_ATC; + uint64_t value = 0, flags = 0; + if (adev->asic_type >= CHIP_VEGA10) { if (level != AMDGPU_VM_PTB) { /* Handle leaf PDEs as PTEs */ flags |= AMDGPU_PDE_PTE; - amdgpu_gmc_get_vm_pde(adev, level, &value, &flags); + amdgpu_gmc_get_vm_pde(adev, level, + &value, &flags); + } else { + /* Workaround for fault priority problem on GMC9 */ + flags = AMDGPU_PTE_EXECUTABLE; } - - r = vm->update_funcs->update(¶ms, vmbo, addr, 0, - ats_entries, value, flags); - if (r) - goto exit; - - addr += ats_entries * 8; } - if (entries) { - uint64_t value = 0, flags = 0; - - if (adev->asic_type >= CHIP_VEGA10) { - if (level != AMDGPU_VM_PTB) { - /* Handle leaf PDEs as PTEs */ - flags |= AMDGPU_PDE_PTE; - amdgpu_gmc_get_vm_pde(adev, level, - &value, &flags); - } else { - /* Workaround for fault priority problem on GMC9 */ - flags = AMDGPU_PTE_EXECUTABLE; - } - } - - r = vm->update_funcs->update(¶ms, vmbo, addr, 0, entries, - value, flags); - if (r) - goto exit; - } + r = vm->update_funcs->update(¶ms, vmbo, addr, 0, entries, + value, flags); + if (r) + goto exit; r = vm->update_funcs->commit(¶ms, NULL); exit: @@ -642,13 +585,14 @@ static void amdgpu_vm_pt_free(struct amdgpu_vm_bo_base *entry) if (!entry->bo) return; + + entry->bo->vm_bo = NULL; shadow = amdgpu_bo_shadowed(entry->bo); if (shadow) { ttm_bo_set_bulk_move(&shadow->tbo, NULL); amdgpu_bo_unref(&shadow); } ttm_bo_set_bulk_move(&entry->bo->tbo, NULL); - entry->bo->vm_bo = NULL; spin_lock(&entry->vm->status_lock); list_del(&entry->vm_status); @@ -678,79 +622,76 @@ void amdgpu_vm_pt_free_work(struct work_struct *work) } /** - * amdgpu_vm_pt_free_dfs - free PD/PT levels + * amdgpu_vm_pt_free_list - free PD/PT levels * * @adev: amdgpu device structure - * @vm: amdgpu vm structure - * @start: optional cursor where to start freeing PDs/PTs - * @unlocked: vm resv unlock status + * @params: see amdgpu_vm_update_params definition * - * Free the page directory or page table level and all sub levels. + * Free the page directory objects saved in the flush list */ -static void amdgpu_vm_pt_free_dfs(struct amdgpu_device *adev, - struct amdgpu_vm *vm, - struct amdgpu_vm_pt_cursor *start, - bool unlocked) +void amdgpu_vm_pt_free_list(struct amdgpu_device *adev, + struct amdgpu_vm_update_params *params) { - struct amdgpu_vm_pt_cursor cursor; - struct amdgpu_vm_bo_base *entry; + struct amdgpu_vm_bo_base *entry, *next; + struct amdgpu_vm *vm = params->vm; + bool unlocked = params->unlocked; + + if (list_empty(¶ms->tlb_flush_waitlist)) + return; if (unlocked) { spin_lock(&vm->status_lock); - for_each_amdgpu_vm_pt_dfs_safe(adev, vm, start, cursor, entry) - list_move(&entry->vm_status, &vm->pt_freed); - - if (start) - list_move(&start->entry->vm_status, &vm->pt_freed); + list_splice_init(¶ms->tlb_flush_waitlist, &vm->pt_freed); spin_unlock(&vm->status_lock); schedule_work(&vm->pt_free_work); return; } - for_each_amdgpu_vm_pt_dfs_safe(adev, vm, start, cursor, entry) + list_for_each_entry_safe(entry, next, ¶ms->tlb_flush_waitlist, vm_status) amdgpu_vm_pt_free(entry); - - if (start) - amdgpu_vm_pt_free(start->entry); } /** - * amdgpu_vm_pt_free_root - free root PD - * @adev: amdgpu device structure - * @vm: amdgpu vm structure + * amdgpu_vm_pt_add_list - add PD/PT level to the flush list * - * Free the root page directory and everything below it. + * @params: parameters for the update + * @cursor: first PT entry to start DF search from, non NULL + * + * This list will be freed after TLB flush. */ -void amdgpu_vm_pt_free_root(struct amdgpu_device *adev, struct amdgpu_vm *vm) +static void amdgpu_vm_pt_add_list(struct amdgpu_vm_update_params *params, + struct amdgpu_vm_pt_cursor *cursor) { - amdgpu_vm_pt_free_dfs(adev, vm, NULL, false); + struct amdgpu_vm_pt_cursor seek; + struct amdgpu_vm_bo_base *entry; + + spin_lock(¶ms->vm->status_lock); + for_each_amdgpu_vm_pt_dfs_safe(params->adev, params->vm, cursor, seek, entry) { + if (entry && entry->bo) + list_move(&entry->vm_status, ¶ms->tlb_flush_waitlist); + } + + /* enter start node now */ + list_move(&cursor->entry->vm_status, ¶ms->tlb_flush_waitlist); + spin_unlock(¶ms->vm->status_lock); } /** - * amdgpu_vm_pt_is_root_clean - check if a root PD is clean - * - * @adev: amdgpu_device pointer - * @vm: the VM to check - * - * Check all entries of the root PD, if any subsequent PDs are allocated, - * it means there are page table creating and filling, and is no a clean - * VM + * amdgpu_vm_pt_free_root - free root PD + * @adev: amdgpu device structure + * @vm: amdgpu vm structure * - * Returns: - * 0 if this VM is clean + * Free the root page directory and everything below it. */ -bool amdgpu_vm_pt_is_root_clean(struct amdgpu_device *adev, - struct amdgpu_vm *vm) +void amdgpu_vm_pt_free_root(struct amdgpu_device *adev, struct amdgpu_vm *vm) { - enum amdgpu_vm_level root = adev->vm_manager.root_level; - unsigned int entries = amdgpu_vm_pt_num_entries(adev, root); - unsigned int i = 0; + struct amdgpu_vm_pt_cursor cursor; + struct amdgpu_vm_bo_base *entry; - for (i = 0; i < entries; i++) { - if (to_amdgpu_bo_vm(vm->root.bo)->entries[i].bo) - return false; + for_each_amdgpu_vm_pt_dfs_safe(adev, vm, NULL, cursor, entry) { + if (entry) + amdgpu_vm_pt_free(entry); } - return true; } /** @@ -843,7 +784,7 @@ static void amdgpu_vm_pte_update_flags(struct amdgpu_vm_update_params *params, */ if ((flags & AMDGPU_PTE_SYSTEM) && (adev->flags & AMD_IS_APU) && adev->gmc.gmc_funcs->override_vm_pte_flags && - num_possible_nodes() > 1 && !params->pages_addr) + num_possible_nodes() > 1 && !params->pages_addr && params->allow_override) amdgpu_gmc_override_vm_pte_flags(adev, params->vm, addr, &flags); params->vm->update_funcs->update(params, pt, pe, addr, count, incr, @@ -1026,7 +967,7 @@ int amdgpu_vm_ptes_update(struct amdgpu_vm_update_params *params, trace_amdgpu_vm_update_ptes(params, frag_start, upd_end, min(nptes, 32u), dst, incr, upd_flags, - vm->task_info.tgid, + vm->task_info ? vm->task_info->tgid : 0, vm->immediate.fence_context); amdgpu_vm_pte_update_flags(params, to_amdgpu_bo_vm(pt), cursor.level, pe_start, dst, @@ -1055,10 +996,8 @@ int amdgpu_vm_ptes_update(struct amdgpu_vm_update_params *params, while (cursor.pfn < frag_start) { /* Make sure previous mapping is freed */ if (cursor.entry->bo) { - params->table_freed = true; - amdgpu_vm_pt_free_dfs(adev, params->vm, - &cursor, - params->unlocked); + params->needs_flush = true; + amdgpu_vm_pt_add_list(params, &cursor); } amdgpu_vm_pt_next(adev, &cursor); } diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm_sdma.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm_sdma.c index 349416e176a1..66e8a016126b 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm_sdma.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm_sdma.c @@ -126,6 +126,10 @@ static int amdgpu_vm_sdma_commit(struct amdgpu_vm_update_params *p, WARN_ON(ib->length_dw == 0); amdgpu_ring_pad_ib(ring, ib); + + if (p->needs_flush) + atomic64_inc(&p->vm->tlb_seq); + WARN_ON(ib->length_dw > p->num_dw_left); f = amdgpu_job_submit(p->job); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm_tlb_fence.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm_tlb_fence.c new file mode 100644 index 000000000000..51cddfa3f1e8 --- /dev/null +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm_tlb_fence.c @@ -0,0 +1,112 @@ +// SPDX-License-Identifier: GPL-2.0 OR MIT +/* + * Copyright 2023 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + */ + +#include <linux/dma-fence.h> +#include <linux/workqueue.h> + +#include "amdgpu.h" +#include "amdgpu_vm.h" +#include "amdgpu_gmc.h" + +struct amdgpu_tlb_fence { + struct dma_fence base; + struct amdgpu_device *adev; + struct dma_fence *dependency; + struct work_struct work; + spinlock_t lock; + uint16_t pasid; + +}; + +static const char *amdgpu_tlb_fence_get_driver_name(struct dma_fence *fence) +{ + return "amdgpu tlb fence"; +} + +static const char *amdgpu_tlb_fence_get_timeline_name(struct dma_fence *f) +{ + return "amdgpu tlb timeline"; +} + +static void amdgpu_tlb_fence_work(struct work_struct *work) +{ + struct amdgpu_tlb_fence *f = container_of(work, typeof(*f), work); + int r; + + if (f->dependency) { + dma_fence_wait(f->dependency, false); + dma_fence_put(f->dependency); + f->dependency = NULL; + } + + r = amdgpu_gmc_flush_gpu_tlb_pasid(f->adev, f->pasid, 2, true, 0); + if (r) { + dev_err(f->adev->dev, "TLB flush failed for PASID %d.\n", + f->pasid); + dma_fence_set_error(&f->base, r); + } + + dma_fence_signal(&f->base); + dma_fence_put(&f->base); +} + +static const struct dma_fence_ops amdgpu_tlb_fence_ops = { + .use_64bit_seqno = true, + .get_driver_name = amdgpu_tlb_fence_get_driver_name, + .get_timeline_name = amdgpu_tlb_fence_get_timeline_name +}; + +void amdgpu_vm_tlb_fence_create(struct amdgpu_device *adev, struct amdgpu_vm *vm, + struct dma_fence **fence) +{ + struct amdgpu_tlb_fence *f; + + f = kmalloc(sizeof(*f), GFP_KERNEL); + if (!f) { + /* + * We can't fail since the PDEs and PTEs are already updated, so + * just block for the dependency and execute the TLB flush + */ + if (*fence) + dma_fence_wait(*fence, false); + + amdgpu_gmc_flush_gpu_tlb_pasid(adev, vm->pasid, 2, true, 0); + *fence = dma_fence_get_stub(); + return; + } + + f->adev = adev; + f->dependency = *fence; + f->pasid = vm->pasid; + INIT_WORK(&f->work, amdgpu_tlb_fence_work); + spin_lock_init(&f->lock); + + dma_fence_init(&f->base, &amdgpu_tlb_fence_ops, &f->lock, + vm->tlb_fence_context, atomic64_read(&vm->tlb_seq)); + + /* TODO: We probably need a separate wq here */ + dma_fence_get(&f->base); + schedule_work(&f->work); + + *fence = &f->base; +} diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vpe.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vpe.c index e81579708e96..c23d97d34b7e 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vpe.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vpe.c @@ -26,6 +26,7 @@ #include "amdgpu.h" #include "amdgpu_ucode.h" #include "amdgpu_vpe.h" +#include "amdgpu_smu.h" #include "soc15_common.h" #include "vpe_v6_1.h" @@ -33,8 +34,180 @@ /* VPE CSA resides in the 4th page of CSA */ #define AMDGPU_CSA_VPE_OFFSET (4096 * 3) +/* 1 second timeout */ +#define VPE_IDLE_TIMEOUT msecs_to_jiffies(1000) + +#define VPE_MAX_DPM_LEVEL 4 +#define FIXED1_8_BITS_PER_FRACTIONAL_PART 8 +#define GET_PRATIO_INTEGER_PART(x) ((x) >> FIXED1_8_BITS_PER_FRACTIONAL_PART) + static void vpe_set_ring_funcs(struct amdgpu_device *adev); +static inline uint16_t div16_u16_rem(uint16_t dividend, uint16_t divisor, uint16_t *remainder) +{ + *remainder = dividend % divisor; + return dividend / divisor; +} + +static inline uint16_t complete_integer_division_u16( + uint16_t dividend, + uint16_t divisor, + uint16_t *remainder) +{ + return div16_u16_rem(dividend, divisor, (uint16_t *)remainder); +} + +static uint16_t vpe_u1_8_from_fraction(uint16_t numerator, uint16_t denominator) +{ + u16 arg1_value = numerator; + u16 arg2_value = denominator; + + uint16_t remainder; + + /* determine integer part */ + uint16_t res_value = complete_integer_division_u16( + arg1_value, arg2_value, &remainder); + + if (res_value > 127 /* CHAR_MAX */) + return 0; + + /* determine fractional part */ + { + unsigned int i = FIXED1_8_BITS_PER_FRACTIONAL_PART; + + do { + remainder <<= 1; + + res_value <<= 1; + + if (remainder >= arg2_value) { + res_value |= 1; + remainder -= arg2_value; + } + } while (--i != 0); + } + + /* round up LSB */ + { + uint16_t summand = (remainder << 1) >= arg2_value; + + if ((res_value + summand) > 32767 /* SHRT_MAX */) + return 0; + + res_value += summand; + } + + return res_value; +} + +static uint16_t vpe_internal_get_pratio(uint16_t from_frequency, uint16_t to_frequency) +{ + uint16_t pratio = vpe_u1_8_from_fraction(from_frequency, to_frequency); + + if (GET_PRATIO_INTEGER_PART(pratio) > 1) + pratio = 0; + + return pratio; +} + +/* + * VPE has 4 DPM levels from level 0 (lowerest) to 3 (highest), + * VPE FW will dynamically decide which level should be used according to current loading. + * + * Get VPE and SOC clocks from PM, and select the appropriate four clock values, + * calculate the ratios of adjusting from one clock to another. + * The VPE FW can then request the appropriate frequency from the PMFW. + */ +int amdgpu_vpe_configure_dpm(struct amdgpu_vpe *vpe) +{ + struct amdgpu_device *adev = vpe->ring.adev; + uint32_t dpm_ctl; + + if (adev->pm.dpm_enabled) { + struct dpm_clocks clock_table = { 0 }; + struct dpm_clock *VPEClks; + struct dpm_clock *SOCClks; + uint32_t idx; + uint32_t pratio_vmax_vnorm = 0, pratio_vnorm_vmid = 0, pratio_vmid_vmin = 0; + uint16_t pratio_vmin_freq = 0, pratio_vmid_freq = 0, pratio_vnorm_freq = 0, pratio_vmax_freq = 0; + + dpm_ctl = RREG32(vpe_get_reg_offset(vpe, 0, vpe->regs.dpm_enable)); + dpm_ctl |= 1; /* DPM enablement */ + WREG32(vpe_get_reg_offset(vpe, 0, vpe->regs.dpm_enable), dpm_ctl); + + /* Get VPECLK and SOCCLK */ + if (amdgpu_dpm_get_dpm_clock_table(adev, &clock_table)) { + dev_dbg(adev->dev, "%s: get clock failed!\n", __func__); + goto disable_dpm; + } + + SOCClks = clock_table.SocClocks; + VPEClks = clock_table.VPEClocks; + + /* vpe dpm only cares 4 levels. */ + for (idx = 0; idx < VPE_MAX_DPM_LEVEL; idx++) { + uint32_t soc_dpm_level; + uint32_t min_freq; + + if (idx == 0) + soc_dpm_level = 0; + else + soc_dpm_level = (idx * 2) + 1; + + /* clamp the max level */ + if (soc_dpm_level > PP_SMU_NUM_VPECLK_DPM_LEVELS - 1) + soc_dpm_level = PP_SMU_NUM_VPECLK_DPM_LEVELS - 1; + + min_freq = (SOCClks[soc_dpm_level].Freq < VPEClks[soc_dpm_level].Freq) ? + SOCClks[soc_dpm_level].Freq : VPEClks[soc_dpm_level].Freq; + + switch (idx) { + case 0: + pratio_vmin_freq = min_freq; + break; + case 1: + pratio_vmid_freq = min_freq; + break; + case 2: + pratio_vnorm_freq = min_freq; + break; + case 3: + pratio_vmax_freq = min_freq; + break; + default: + break; + } + } + + if (pratio_vmin_freq && pratio_vmid_freq && pratio_vnorm_freq && pratio_vmax_freq) { + uint32_t pratio_ctl; + + pratio_vmax_vnorm = (uint32_t)vpe_internal_get_pratio(pratio_vmax_freq, pratio_vnorm_freq); + pratio_vnorm_vmid = (uint32_t)vpe_internal_get_pratio(pratio_vnorm_freq, pratio_vmid_freq); + pratio_vmid_vmin = (uint32_t)vpe_internal_get_pratio(pratio_vmid_freq, pratio_vmin_freq); + + pratio_ctl = pratio_vmax_vnorm | (pratio_vnorm_vmid << 9) | (pratio_vmid_vmin << 18); + WREG32(vpe_get_reg_offset(vpe, 0, vpe->regs.dpm_pratio), pratio_ctl); /* PRatio */ + WREG32(vpe_get_reg_offset(vpe, 0, vpe->regs.dpm_request_interval), 24000); /* 1ms, unit=1/24MHz */ + WREG32(vpe_get_reg_offset(vpe, 0, vpe->regs.dpm_decision_threshold), 1200000); /* 50ms */ + WREG32(vpe_get_reg_offset(vpe, 0, vpe->regs.dpm_busy_clamp_threshold), 1200000);/* 50ms */ + WREG32(vpe_get_reg_offset(vpe, 0, vpe->regs.dpm_idle_clamp_threshold), 1200000);/* 50ms */ + dev_dbg(adev->dev, "%s: configure vpe dpm pratio done!\n", __func__); + } else { + dev_dbg(adev->dev, "%s: invalid pratio parameters!\n", __func__); + goto disable_dpm; + } + } + return 0; + +disable_dpm: + dpm_ctl = RREG32(vpe_get_reg_offset(vpe, 0, vpe->regs.dpm_enable)); + dpm_ctl &= 0xfffffffe; /* Disable DPM */ + WREG32(vpe_get_reg_offset(vpe, 0, vpe->regs.dpm_enable), dpm_ctl); + dev_dbg(adev->dev, "%s: disable vpe dpm\n", __func__); + return -EINVAL; +} + int amdgpu_vpe_psp_update_sram(struct amdgpu_device *adev) { struct amdgpu_firmware_info ucode = { @@ -124,6 +297,10 @@ static int vpe_early_init(void *handle) case IP_VERSION(6, 1, 0): vpe_v6_1_set_funcs(vpe); break; + case IP_VERSION(6, 1, 1): + vpe_v6_1_set_funcs(vpe); + vpe->collaborate_mode = true; + break; default: return -EINVAL; } @@ -131,9 +308,24 @@ static int vpe_early_init(void *handle) vpe_set_ring_funcs(adev); vpe_set_regs(vpe); + dev_info(adev->dev, "VPE: collaborate mode %s", vpe->collaborate_mode ? "true" : "false"); + return 0; } +static void vpe_idle_work_handler(struct work_struct *work) +{ + struct amdgpu_device *adev = + container_of(work, struct amdgpu_device, vpe.idle_work.work); + unsigned int fences = 0; + + fences += amdgpu_fence_count_emitted(&adev->vpe.ring); + + if (fences == 0) + amdgpu_device_ip_set_powergating_state(adev, AMD_IP_BLOCK_TYPE_VPE, AMD_PG_STATE_GATE); + else + schedule_delayed_work(&adev->vpe.idle_work, VPE_IDLE_TIMEOUT); +} static int vpe_common_init(struct amdgpu_vpe *vpe) { @@ -150,6 +342,9 @@ static int vpe_common_init(struct amdgpu_vpe *vpe) return r; } + vpe->context_started = false; + INIT_DELAYED_WORK(&adev->vpe.idle_work, vpe_idle_work_handler); + return 0; } @@ -201,6 +396,12 @@ static int vpe_hw_init(void *handle) struct amdgpu_vpe *vpe = &adev->vpe; int ret; + /* Power on VPE */ + ret = amdgpu_device_ip_set_powergating_state(adev, AMD_IP_BLOCK_TYPE_VPE, + AMD_PG_STATE_UNGATE); + if (ret) + return ret; + ret = vpe_load_microcode(vpe); if (ret) return ret; @@ -219,6 +420,9 @@ static int vpe_hw_fini(void *handle) vpe_ring_stop(vpe); + /* Power off VPE */ + amdgpu_device_ip_set_powergating_state(adev, AMD_IP_BLOCK_TYPE_VPE, AMD_PG_STATE_GATE); + return 0; } @@ -226,6 +430,8 @@ static int vpe_suspend(void *handle) { struct amdgpu_device *adev = (struct amdgpu_device *)handle; + cancel_delayed_work_sync(&adev->vpe.idle_work); + return vpe_hw_fini(adev); } @@ -263,6 +469,18 @@ static uint64_t vpe_get_csa_mc_addr(struct amdgpu_ring *ring, uint32_t vmid) return csa_mc_addr; } +static void vpe_ring_emit_pred_exec(struct amdgpu_ring *ring, + uint32_t device_select, + uint32_t exec_count) +{ + if (!ring->adev->vpe.collaborate_mode) + return; + + amdgpu_ring_write(ring, VPE_CMD_HEADER(VPE_CMD_OPCODE_PRED_EXE, 0) | + (device_select << 16)); + amdgpu_ring_write(ring, exec_count & 0x1fff); +} + static void vpe_ring_emit_ib(struct amdgpu_ring *ring, struct amdgpu_job *job, struct amdgpu_ib *ib, @@ -311,6 +529,8 @@ static void vpe_ring_emit_pipeline_sync(struct amdgpu_ring *ring) uint32_t seq = ring->fence_drv.sync_seq; uint64_t addr = ring->fence_drv.gpu_addr; + vpe_ring_emit_pred_exec(ring, 0, 6); + /* wait for idle */ amdgpu_ring_write(ring, VPE_CMD_HEADER(VPE_CMD_OPCODE_POLL_REGMEM, VPE_POLL_REGMEM_SUBOP_REGMEM) | @@ -326,6 +546,8 @@ static void vpe_ring_emit_pipeline_sync(struct amdgpu_ring *ring) static void vpe_ring_emit_wreg(struct amdgpu_ring *ring, uint32_t reg, uint32_t val) { + vpe_ring_emit_pred_exec(ring, 0, 3); + amdgpu_ring_write(ring, VPE_CMD_HEADER(VPE_CMD_OPCODE_REG_WRITE, 0)); amdgpu_ring_write(ring, reg << 2); amdgpu_ring_write(ring, val); @@ -334,6 +556,8 @@ static void vpe_ring_emit_wreg(struct amdgpu_ring *ring, uint32_t reg, uint32_t static void vpe_ring_emit_reg_wait(struct amdgpu_ring *ring, uint32_t reg, uint32_t val, uint32_t mask) { + vpe_ring_emit_pred_exec(ring, 0, 6); + amdgpu_ring_write(ring, VPE_CMD_HEADER(VPE_CMD_OPCODE_POLL_REGMEM, VPE_POLL_REGMEM_SUBOP_REGMEM) | VPE_CMD_POLL_REGMEM_HEADER_FUNC(3) | /* equal */ @@ -352,34 +576,21 @@ static void vpe_ring_emit_vm_flush(struct amdgpu_ring *ring, unsigned int vmid, amdgpu_gmc_emit_flush_gpu_tlb(ring, vmid, pd_addr); } -static unsigned int vpe_ring_init_cond_exec(struct amdgpu_ring *ring) +static unsigned int vpe_ring_init_cond_exec(struct amdgpu_ring *ring, + uint64_t addr) { unsigned int ret; amdgpu_ring_write(ring, VPE_CMD_HEADER(VPE_CMD_OPCODE_COND_EXE, 0)); - amdgpu_ring_write(ring, lower_32_bits(ring->cond_exe_gpu_addr)); - amdgpu_ring_write(ring, upper_32_bits(ring->cond_exe_gpu_addr)); + amdgpu_ring_write(ring, lower_32_bits(addr)); + amdgpu_ring_write(ring, upper_32_bits(addr)); amdgpu_ring_write(ring, 1); - ret = ring->wptr & ring->buf_mask;/* this is the offset we need patch later */ - amdgpu_ring_write(ring, 0x55aa55aa);/* insert dummy here and patch it later */ + ret = ring->wptr & ring->buf_mask; + amdgpu_ring_write(ring, 0); return ret; } -static void vpe_ring_patch_cond_exec(struct amdgpu_ring *ring, unsigned int offset) -{ - unsigned int cur; - - WARN_ON_ONCE(offset > ring->buf_mask); - WARN_ON_ONCE(ring->ring[offset] != 0x55aa55aa); - - cur = (ring->wptr - 1) & ring->buf_mask; - if (cur > offset) - ring->ring[offset] = cur - offset; - else - ring->ring[offset] = (ring->buf_mask + 1) - offset + cur; -} - static int vpe_ring_preempt_ib(struct amdgpu_ring *ring) { struct amdgpu_device *adev = ring->adev; @@ -430,6 +641,21 @@ static int vpe_set_clockgating_state(void *handle, static int vpe_set_powergating_state(void *handle, enum amd_powergating_state state) { + struct amdgpu_device *adev = (struct amdgpu_device *)handle; + struct amdgpu_vpe *vpe = &adev->vpe; + + if (!adev->pm.dpm_enabled) + dev_err(adev->dev, "Without PM, cannot support powergating\n"); + + dev_dbg(adev->dev, "%s: %s!\n", __func__, (state == AMD_PG_STATE_GATE) ? "GATE":"UNGATE"); + + if (state == AMD_PG_STATE_GATE) { + amdgpu_dpm_enable_vpe(adev, false); + vpe->context_started = false; + } else { + amdgpu_dpm_enable_vpe(adev, true); + } + return 0; } @@ -486,16 +712,22 @@ static void vpe_ring_set_wptr(struct amdgpu_ring *ring) upper_32_bits(ring->wptr << 2)); atomic64_set((atomic64_t *)ring->wptr_cpu_addr, ring->wptr << 2); WDOORBELL64(ring->doorbell_index, ring->wptr << 2); + if (vpe->collaborate_mode) + WDOORBELL64(ring->doorbell_index + 4, ring->wptr << 2); } else { - dev_dbg(adev->dev, "Not using doorbell, \ - regVPEC_QUEUE0_RB_WPTR == 0x%08x, \ - regVPEC_QUEUE0_RB_WPTR_HI == 0x%08x\n", - lower_32_bits(ring->wptr << 2), - upper_32_bits(ring->wptr << 2)); - WREG32(vpe_get_reg_offset(vpe, ring->me, vpe->regs.queue0_rb_wptr_lo), - lower_32_bits(ring->wptr << 2)); - WREG32(vpe_get_reg_offset(vpe, ring->me, vpe->regs.queue0_rb_wptr_hi), - upper_32_bits(ring->wptr << 2)); + int i; + + for (i = 0; i < vpe->num_instances; i++) { + dev_dbg(adev->dev, "Not using doorbell, \ + regVPEC_QUEUE0_RB_WPTR == 0x%08x, \ + regVPEC_QUEUE0_RB_WPTR_HI == 0x%08x\n", + lower_32_bits(ring->wptr << 2), + upper_32_bits(ring->wptr << 2)); + WREG32(vpe_get_reg_offset(vpe, i, vpe->regs.queue0_rb_wptr_lo), + lower_32_bits(ring->wptr << 2)); + WREG32(vpe_get_reg_offset(vpe, i, vpe->regs.queue0_rb_wptr_hi), + upper_32_bits(ring->wptr << 2)); + } } } @@ -595,6 +827,38 @@ err0: return ret; } +static void vpe_ring_begin_use(struct amdgpu_ring *ring) +{ + struct amdgpu_device *adev = ring->adev; + struct amdgpu_vpe *vpe = &adev->vpe; + + cancel_delayed_work_sync(&adev->vpe.idle_work); + + /* Power on VPE and notify VPE of new context */ + if (!vpe->context_started) { + uint32_t context_notify; + + /* Power on VPE */ + amdgpu_device_ip_set_powergating_state(adev, AMD_IP_BLOCK_TYPE_VPE, AMD_PG_STATE_UNGATE); + + /* Indicates that a job from a new context has been submitted. */ + context_notify = RREG32(vpe_get_reg_offset(vpe, 0, vpe->regs.context_indicator)); + if ((context_notify & 0x1) == 0) + context_notify |= 0x1; + else + context_notify &= ~(0x1); + WREG32(vpe_get_reg_offset(vpe, 0, vpe->regs.context_indicator), context_notify); + vpe->context_started = true; + } +} + +static void vpe_ring_end_use(struct amdgpu_ring *ring) +{ + struct amdgpu_device *adev = ring->adev; + + schedule_delayed_work(&adev->vpe.idle_work, VPE_IDLE_TIMEOUT); +} + static const struct amdgpu_ring_funcs vpe_ring_funcs = { .type = AMDGPU_RING_TYPE_VPE, .align_mask = 0xf, @@ -623,8 +887,9 @@ static const struct amdgpu_ring_funcs vpe_ring_funcs = { .test_ring = vpe_ring_test_ring, .test_ib = vpe_ring_test_ib, .init_cond_exec = vpe_ring_init_cond_exec, - .patch_cond_exec = vpe_ring_patch_cond_exec, .preempt_ib = vpe_ring_preempt_ib, + .begin_use = vpe_ring_begin_use, + .end_use = vpe_ring_end_use, }; static void vpe_set_ring_funcs(struct amdgpu_device *adev) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vpe.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_vpe.h index 29d56f7ae4a9..231d86d0953e 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vpe.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vpe.h @@ -27,6 +27,8 @@ #include "amdgpu_irq.h" #include "vpe_6_1_fw_if.h" +#define AMDGPU_MAX_VPE_INSTANCES 2 + struct amdgpu_vpe; struct vpe_funcs { @@ -47,6 +49,15 @@ struct vpe_regs { uint32_t queue0_rb_wptr_lo; uint32_t queue0_rb_wptr_hi; uint32_t queue0_preempt; + + uint32_t dpm_enable; + uint32_t dpm_pratio; + uint32_t dpm_request_interval; + uint32_t dpm_decision_threshold; + uint32_t dpm_busy_clamp_threshold; + uint32_t dpm_idle_clamp_threshold; + uint32_t dpm_request_lv; + uint32_t context_indicator; }; struct amdgpu_vpe { @@ -63,12 +74,18 @@ struct amdgpu_vpe { struct amdgpu_bo *cmdbuf_obj; uint64_t cmdbuf_gpu_addr; uint32_t *cmdbuf_cpu_addr; + struct delayed_work idle_work; + bool context_started; + + uint32_t num_instances; + bool collaborate_mode; }; int amdgpu_vpe_psp_update_sram(struct amdgpu_device *adev); int amdgpu_vpe_init_microcode(struct amdgpu_vpe *vpe); int amdgpu_vpe_ring_init(struct amdgpu_vpe *vpe); int amdgpu_vpe_ring_fini(struct amdgpu_vpe *vpe); +int amdgpu_vpe_configure_dpm(struct amdgpu_vpe *vpe); #define vpe_ring_init(vpe) ((vpe)->funcs->ring_init ? (vpe)->funcs->ring_init((vpe)) : 0) #define vpe_ring_start(vpe) ((vpe)->funcs->ring_start ? (vpe)->funcs->ring_start((vpe)) : 0) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c index 18f58efc9dc7..e494f5bf136a 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c @@ -77,7 +77,16 @@ static inline bool amdgpu_is_vram_mgr_blocks_contiguous(struct list_head *head) return true; } +static inline u64 amdgpu_vram_mgr_blocks_size(struct list_head *head) +{ + struct drm_buddy_block *block; + u64 size = 0; + list_for_each_entry(block, head, link) + size += amdgpu_vram_mgr_block_size(block); + + return size; +} /** * DOC: mem_info_vram_total @@ -212,8 +221,23 @@ static struct attribute *amdgpu_vram_mgr_attributes[] = { NULL }; +static umode_t amdgpu_vram_attrs_is_visible(struct kobject *kobj, + struct attribute *attr, int i) +{ + struct device *dev = kobj_to_dev(kobj); + struct drm_device *ddev = dev_get_drvdata(dev); + struct amdgpu_device *adev = drm_to_adev(ddev); + + if (attr == &dev_attr_mem_info_vram_vendor.attr && + !adev->gmc.vram_vendor) + return 0; + + return attr->mode; +} + const struct attribute_group amdgpu_vram_mgr_attr_group = { - .attrs = amdgpu_vram_mgr_attributes + .attrs = amdgpu_vram_mgr_attributes, + .is_visible = amdgpu_vram_attrs_is_visible }; /** @@ -426,6 +450,7 @@ static int amdgpu_vram_mgr_new(struct ttm_resource_manager *man, { struct amdgpu_vram_mgr *mgr = to_vram_mgr(man); struct amdgpu_device *adev = to_amdgpu_device(mgr); + struct amdgpu_bo *bo = ttm_to_amdgpu_bo(tbo); u64 vis_usage = 0, max_bytes, min_block_size; struct amdgpu_vram_mgr_resource *vres; u64 size, remaining_size, lpfn, fpfn; @@ -477,6 +502,9 @@ static int amdgpu_vram_mgr_new(struct ttm_resource_manager *man, if (place->flags & TTM_PL_FLAG_CONTIGUOUS) vres->flags |= DRM_BUDDY_CONTIGUOUS_ALLOCATION; + if (bo->flags & AMDGPU_GEM_CREATE_VRAM_CLEARED) + vres->flags |= DRM_BUDDY_CLEAR_ALLOCATION; + if (fpfn || lpfn != mgr->mm.size) /* Allocate blocks in desired range */ vres->flags |= DRM_BUDDY_RANGE_ALLOCATION; @@ -516,6 +544,8 @@ static int amdgpu_vram_mgr_new(struct ttm_resource_manager *man, mutex_unlock(&mgr->lock); vres->base.start = 0; + size = max_t(u64, amdgpu_vram_mgr_blocks_size(&vres->blocks), + vres->base.size); list_for_each_entry(block, &vres->blocks, link) { unsigned long start; @@ -523,8 +553,8 @@ static int amdgpu_vram_mgr_new(struct ttm_resource_manager *man, amdgpu_vram_mgr_block_size(block); start >>= PAGE_SHIFT; - if (start > PFN_UP(vres->base.size)) - start -= PFN_UP(vres->base.size); + if (start > PFN_UP(size)) + start -= PFN_UP(size); else start = 0; vres->base.start = max(vres->base.start, start); @@ -545,7 +575,7 @@ static int amdgpu_vram_mgr_new(struct ttm_resource_manager *man, return 0; error_free_blocks: - drm_buddy_free_list(mm, &vres->blocks); + drm_buddy_free_list(mm, &vres->blocks, 0); mutex_unlock(&mgr->lock); error_fini: ttm_resource_fini(man, &vres->base); @@ -578,7 +608,7 @@ static void amdgpu_vram_mgr_del(struct ttm_resource_manager *man, amdgpu_vram_mgr_do_reserve(man); - drm_buddy_free_list(mm, &vres->blocks); + drm_buddy_free_list(mm, &vres->blocks, vres->flags); mutex_unlock(&mgr->lock); atomic64_sub(vis_usage, &mgr->vis_usage); @@ -886,7 +916,7 @@ void amdgpu_vram_mgr_fini(struct amdgpu_device *adev) kfree(rsv); list_for_each_entry_safe(rsv, temp, &mgr->reserved_pages, blocks) { - drm_buddy_free_list(&mgr->mm, &rsv->allocated); + drm_buddy_free_list(&mgr->mm, &rsv->allocated, 0); kfree(rsv); } if (!adev->gmc.is_app_apu) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.h index 0e04e42cf809..b256cbc2bc27 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.h @@ -53,10 +53,20 @@ static inline u64 amdgpu_vram_mgr_block_size(struct drm_buddy_block *block) return (u64)PAGE_SIZE << drm_buddy_block_order(block); } +static inline bool amdgpu_vram_mgr_is_cleared(struct drm_buddy_block *block) +{ + return drm_buddy_block_is_clear(block); +} + static inline struct amdgpu_vram_mgr_resource * to_amdgpu_vram_mgr_resource(struct ttm_resource *res) { return container_of(res, struct amdgpu_vram_mgr_resource, base); } +static inline void amdgpu_vram_mgr_set_cleared(struct ttm_resource *res) +{ + to_amdgpu_vram_mgr_resource(res)->flags |= DRM_BUDDY_CLEARED; +} + #endif diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.c index 19aa6b7b16fb..dd2ec48cf5c2 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.c @@ -103,6 +103,53 @@ static const int walf_pcs_err_noncorrectable_mask_reg_aldebaran[] = { smnPCS_GOPX1_PCS_ERROR_NONCORRECTABLE_MASK + 0x100000 }; +static const int xgmi3x16_pcs_err_status_reg_v6_4[] = { + smnPCS_XGMI3X16_PCS_ERROR_STATUS, + smnPCS_XGMI3X16_PCS_ERROR_STATUS + 0x100000 +}; + +static const int xgmi3x16_pcs_err_noncorrectable_mask_reg_v6_4[] = { + smnPCS_XGMI3X16_PCS_ERROR_NONCORRECTABLE_MASK, + smnPCS_XGMI3X16_PCS_ERROR_NONCORRECTABLE_MASK + 0x100000 +}; + +static const u64 xgmi_v6_4_0_mca_base_array[] = { + 0x11a09200, + 0x11b09200, +}; + +static const char *xgmi_v6_4_0_ras_error_code_ext[32] = { + [0x00] = "XGMI PCS DataLossErr", + [0x01] = "XGMI PCS TrainingErr", + [0x02] = "XGMI PCS FlowCtrlAckErr", + [0x03] = "XGMI PCS RxFifoUnderflowErr", + [0x04] = "XGMI PCS RxFifoOverflowErr", + [0x05] = "XGMI PCS CRCErr", + [0x06] = "XGMI PCS BERExceededErr", + [0x07] = "XGMI PCS TxMetaDataErr", + [0x08] = "XGMI PCS ReplayBufParityErr", + [0x09] = "XGMI PCS DataParityErr", + [0x0a] = "XGMI PCS ReplayFifoOverflowErr", + [0x0b] = "XGMI PCS ReplayFifoUnderflowErr", + [0x0c] = "XGMI PCS ElasticFifoOverflowErr", + [0x0d] = "XGMI PCS DeskewErr", + [0x0e] = "XGMI PCS FlowCtrlCRCErr", + [0x0f] = "XGMI PCS DataStartupLimitErr", + [0x10] = "XGMI PCS FCInitTimeoutErr", + [0x11] = "XGMI PCS RecoveryTimeoutErr", + [0x12] = "XGMI PCS ReadySerialTimeoutErr", + [0x13] = "XGMI PCS ReadySerialAttemptErr", + [0x14] = "XGMI PCS RecoveryAttemptErr", + [0x15] = "XGMI PCS RecoveryRelockAttemptErr", + [0x16] = "XGMI PCS ReplayAttemptErr", + [0x17] = "XGMI PCS SyncHdrErr", + [0x18] = "XGMI PCS TxReplayTimeoutErr", + [0x19] = "XGMI PCS RxReplayTimeoutErr", + [0x1a] = "XGMI PCS LinkSubTxTimeoutErr", + [0x1b] = "XGMI PCS LinkSubRxTimeoutErr", + [0x1c] = "XGMI PCS RxCMDPktErr", +}; + static const struct amdgpu_pcs_ras_field xgmi_pcs_ras_fields[] = { {"XGMI PCS DataLossErr", SOC15_REG_FIELD(XGMI0_PCS_GOPX16_PCS_ERROR_STATUS, DataLossErr)}, @@ -366,6 +413,38 @@ static ssize_t amdgpu_xgmi_show_num_links(struct device *dev, return sysfs_emit(buf, "%s\n", buf); } +static ssize_t amdgpu_xgmi_show_connected_port_num(struct device *dev, + struct device_attribute *attr, + char *buf) +{ + struct drm_device *ddev = dev_get_drvdata(dev); + struct amdgpu_device *adev = drm_to_adev(ddev); + struct psp_xgmi_topology_info *top = &adev->psp.xgmi_context.top_info; + int i, j, size = 0; + int current_node; + /* + * get the node id in the sysfs for the current socket and show + * it in the port num info output in the sysfs for easy reading. + * it is NOT the one retrieved from xgmi ta. + */ + for (i = 0; i < top->num_nodes; i++) { + if (top->nodes[i].node_id == adev->gmc.xgmi.node_id) { + current_node = i; + break; + } + } + + for (i = 0; i < top->num_nodes; i++) { + for (j = 0; j < top->nodes[i].num_links; j++) + /* node id in sysfs starts from 1 rather than 0 so +1 here */ + size += sysfs_emit_at(buf, size, "%02x:%02x -> %02x:%02x\n", current_node + 1, + top->nodes[i].port_num[j].src_xgmi_port_num, i + 1, + top->nodes[i].port_num[j].dst_xgmi_port_num); + } + + return size; +} + #define AMDGPU_XGMI_SET_FICAA(o) ((o) | 0x456801) static ssize_t amdgpu_xgmi_show_error(struct device *dev, struct device_attribute *attr, @@ -405,6 +484,7 @@ static DEVICE_ATTR(xgmi_physical_id, 0444, amdgpu_xgmi_show_physical_id, NULL); static DEVICE_ATTR(xgmi_error, S_IRUGO, amdgpu_xgmi_show_error, NULL); static DEVICE_ATTR(xgmi_num_hops, S_IRUGO, amdgpu_xgmi_show_num_hops, NULL); static DEVICE_ATTR(xgmi_num_links, S_IRUGO, amdgpu_xgmi_show_num_links, NULL); +static DEVICE_ATTR(xgmi_port_num, S_IRUGO, amdgpu_xgmi_show_connected_port_num, NULL); static int amdgpu_xgmi_sysfs_add_dev_info(struct amdgpu_device *adev, struct amdgpu_hive_info *hive) @@ -440,6 +520,13 @@ static int amdgpu_xgmi_sysfs_add_dev_info(struct amdgpu_device *adev, if (ret) pr_err("failed to create xgmi_num_links\n"); + /* Create xgmi port num file if supported */ + if (adev->psp.xgmi_context.xgmi_ta_caps & EXTEND_PEER_LINK_INFO_CMD_FLAG) { + ret = device_create_file(adev->dev, &dev_attr_xgmi_port_num); + if (ret) + dev_err(adev->dev, "failed to create xgmi_port_num\n"); + } + /* Create sysfs link to hive info folder on the first device */ if (hive->kobj.parent != (&adev->dev->kobj)) { ret = sysfs_create_link(&adev->dev->kobj, &hive->kobj, @@ -470,6 +557,8 @@ remove_file: device_remove_file(adev->dev, &dev_attr_xgmi_error); device_remove_file(adev->dev, &dev_attr_xgmi_num_hops); device_remove_file(adev->dev, &dev_attr_xgmi_num_links); + if (adev->psp.xgmi_context.xgmi_ta_caps & EXTEND_PEER_LINK_INFO_CMD_FLAG) + device_remove_file(adev->dev, &dev_attr_xgmi_port_num); success: return ret; @@ -486,6 +575,8 @@ static void amdgpu_xgmi_sysfs_rem_dev_info(struct amdgpu_device *adev, device_remove_file(adev->dev, &dev_attr_xgmi_error); device_remove_file(adev->dev, &dev_attr_xgmi_num_hops); device_remove_file(adev->dev, &dev_attr_xgmi_num_links); + if (adev->psp.xgmi_context.xgmi_ta_caps & EXTEND_PEER_LINK_INFO_CMD_FLAG) + device_remove_file(adev->dev, &dev_attr_xgmi_port_num); if (hive->kobj.parent != (&adev->dev->kobj)) sysfs_remove_link(&adev->dev->kobj,"xgmi_hive_info"); @@ -732,6 +823,28 @@ static int amdgpu_xgmi_initialize_hive_get_data_partition(struct amdgpu_hive_inf return 0; } +static void amdgpu_xgmi_fill_topology_info(struct amdgpu_device *adev, + struct amdgpu_device *peer_adev) +{ + struct psp_xgmi_topology_info *top_info = &adev->psp.xgmi_context.top_info; + struct psp_xgmi_topology_info *peer_info = &peer_adev->psp.xgmi_context.top_info; + + for (int i = 0; i < peer_info->num_nodes; i++) { + if (peer_info->nodes[i].node_id == adev->gmc.xgmi.node_id) { + for (int j = 0; j < top_info->num_nodes; j++) { + if (top_info->nodes[j].node_id == peer_adev->gmc.xgmi.node_id) { + peer_info->nodes[i].num_hops = top_info->nodes[j].num_hops; + peer_info->nodes[i].is_sharing_enabled = + top_info->nodes[j].is_sharing_enabled; + peer_info->nodes[i].num_links = + top_info->nodes[j].num_links; + return; + } + } + } + } +} + int amdgpu_xgmi_add_device(struct amdgpu_device *adev) { struct psp_xgmi_topology_info *top_info; @@ -806,18 +919,38 @@ int amdgpu_xgmi_add_device(struct amdgpu_device *adev) goto exit_unlock; } - /* get latest topology info for each device from psp */ - list_for_each_entry(tmp_adev, &hive->device_list, gmc.xgmi.head) { - ret = psp_xgmi_get_topology_info(&tmp_adev->psp, count, - &tmp_adev->psp.xgmi_context.top_info, false); + if (amdgpu_sriov_vf(adev) && + adev->psp.xgmi_context.xgmi_ta_caps & EXTEND_PEER_LINK_INFO_CMD_FLAG) { + /* only get topology for VF being init if it can support full duplex */ + ret = psp_xgmi_get_topology_info(&adev->psp, count, + &adev->psp.xgmi_context.top_info, false); if (ret) { - dev_err(tmp_adev->dev, + dev_err(adev->dev, "XGMI: Get topology failure on device %llx, hive %llx, ret %d", - tmp_adev->gmc.xgmi.node_id, - tmp_adev->gmc.xgmi.hive_id, ret); - /* To do : continue with some node failed or disable the whole hive */ + adev->gmc.xgmi.node_id, + adev->gmc.xgmi.hive_id, ret); + /* To do: continue with some node failed or disable the whole hive*/ goto exit_unlock; } + + /* fill the topology info for peers instead of getting from PSP */ + list_for_each_entry(tmp_adev, &hive->device_list, gmc.xgmi.head) { + amdgpu_xgmi_fill_topology_info(adev, tmp_adev); + } + } else { + /* get latest topology info for each device from psp */ + list_for_each_entry(tmp_adev, &hive->device_list, gmc.xgmi.head) { + ret = psp_xgmi_get_topology_info(&tmp_adev->psp, count, + &tmp_adev->psp.xgmi_context.top_info, false); + if (ret) { + dev_err(tmp_adev->dev, + "XGMI: Get topology failure on device %llx, hive %llx, ret %d", + tmp_adev->gmc.xgmi.node_id, + tmp_adev->gmc.xgmi.hive_id, ret); + /* To do : continue with some node failed or disable the whole hive */ + goto exit_unlock; + } + } } /* get topology again for hives that support extended data */ @@ -902,15 +1035,88 @@ int amdgpu_xgmi_remove_device(struct amdgpu_device *adev) return 0; } +static int xgmi_v6_4_0_aca_bank_parser(struct aca_handle *handle, struct aca_bank *bank, + enum aca_smu_type type, void *data) +{ + struct amdgpu_device *adev = handle->adev; + struct aca_bank_info info; + const char *error_str; + u64 status, count; + int ret, ext_error_code; + + ret = aca_bank_info_decode(bank, &info); + if (ret) + return ret; + + status = bank->regs[ACA_REG_IDX_STATUS]; + ext_error_code = ACA_REG__STATUS__ERRORCODEEXT(status); + + error_str = ext_error_code < ARRAY_SIZE(xgmi_v6_4_0_ras_error_code_ext) ? + xgmi_v6_4_0_ras_error_code_ext[ext_error_code] : NULL; + if (error_str) + dev_info(adev->dev, "%s detected\n", error_str); + + count = ACA_REG__MISC0__ERRCNT(bank->regs[ACA_REG_IDX_MISC0]); + + switch (type) { + case ACA_SMU_TYPE_UE: + if (ext_error_code != 0 && ext_error_code != 9) + count = 0ULL; + + ret = aca_error_cache_log_bank_error(handle, &info, ACA_ERROR_TYPE_UE, count); + break; + case ACA_SMU_TYPE_CE: + count = ext_error_code == 6 ? count : 0ULL; + ret = aca_error_cache_log_bank_error(handle, &info, ACA_ERROR_TYPE_CE, count); + break; + default: + return -EINVAL; + } + + return ret; +} + +static const struct aca_bank_ops xgmi_v6_4_0_aca_bank_ops = { + .aca_bank_parser = xgmi_v6_4_0_aca_bank_parser, +}; + +static const struct aca_info xgmi_v6_4_0_aca_info = { + .hwip = ACA_HWIP_TYPE_PCS_XGMI, + .mask = ACA_ERROR_UE_MASK | ACA_ERROR_CE_MASK, + .bank_ops = &xgmi_v6_4_0_aca_bank_ops, +}; + static int amdgpu_xgmi_ras_late_init(struct amdgpu_device *adev, struct ras_common_if *ras_block) { + int r; + if (!adev->gmc.xgmi.supported || adev->gmc.xgmi.num_physical_nodes == 0) return 0; - adev->gmc.xgmi.ras->ras_block.hw_ops->reset_ras_error_count(adev); + amdgpu_ras_reset_error_count(adev, AMDGPU_RAS_BLOCK__XGMI_WAFL); - return amdgpu_ras_block_late_init(adev, ras_block); + r = amdgpu_ras_block_late_init(adev, ras_block); + if (r) + return r; + + switch (amdgpu_ip_version(adev, XGMI_HWIP, 0)) { + case IP_VERSION(6, 4, 0): + r = amdgpu_ras_bind_aca(adev, AMDGPU_RAS_BLOCK__XGMI_WAFL, + &xgmi_v6_4_0_aca_info, NULL); + if (r) + goto late_fini; + break; + default: + break; + } + + return 0; + +late_fini: + amdgpu_ras_block_late_fini(adev, ras_block); + + return r; } uint64_t amdgpu_xgmi_get_relative_phy_addr(struct amdgpu_device *adev, @@ -926,7 +1132,7 @@ static void pcs_clear_status(struct amdgpu_device *adev, uint32_t pcs_status_reg WREG32_PCIE(pcs_status_reg, 0); } -static void amdgpu_xgmi_reset_ras_error_count(struct amdgpu_device *adev) +static void amdgpu_xgmi_legacy_reset_ras_error_count(struct amdgpu_device *adev) { uint32_t i; @@ -952,6 +1158,49 @@ static void amdgpu_xgmi_reset_ras_error_count(struct amdgpu_device *adev) default: break; } + + switch (amdgpu_ip_version(adev, XGMI_HWIP, 0)) { + case IP_VERSION(6, 4, 0): + for (i = 0; i < ARRAY_SIZE(xgmi3x16_pcs_err_status_reg_v6_4); i++) + pcs_clear_status(adev, + xgmi3x16_pcs_err_status_reg_v6_4[i]); + break; + default: + break; + } +} + +static void __xgmi_v6_4_0_reset_error_count(struct amdgpu_device *adev, int xgmi_inst, u64 mca_base) +{ + WREG64_MCA(xgmi_inst, mca_base, ACA_REG_IDX_STATUS, 0ULL); +} + +static void xgmi_v6_4_0_reset_error_count(struct amdgpu_device *adev, int xgmi_inst) +{ + int i; + + for (i = 0; i < ARRAY_SIZE(xgmi_v6_4_0_mca_base_array); i++) + __xgmi_v6_4_0_reset_error_count(adev, xgmi_inst, xgmi_v6_4_0_mca_base_array[i]); +} + +static void xgmi_v6_4_0_reset_ras_error_count(struct amdgpu_device *adev) +{ + int i; + + for_each_inst(i, adev->aid_mask) + xgmi_v6_4_0_reset_error_count(adev, i); +} + +static void amdgpu_xgmi_reset_ras_error_count(struct amdgpu_device *adev) +{ + switch (amdgpu_ip_version(adev, XGMI_HWIP, 0)) { + case IP_VERSION(6, 4, 0): + xgmi_v6_4_0_reset_ras_error_count(adev); + break; + default: + amdgpu_xgmi_legacy_reset_ras_error_count(adev); + break; + } } static int amdgpu_xgmi_query_pcs_error_status(struct amdgpu_device *adev, @@ -969,7 +1218,9 @@ static int amdgpu_xgmi_query_pcs_error_status(struct amdgpu_device *adev, if (is_xgmi_pcs) { if (amdgpu_ip_version(adev, XGMI_HWIP, 0) == - IP_VERSION(6, 1, 0)) { + IP_VERSION(6, 1, 0) || + amdgpu_ip_version(adev, XGMI_HWIP, 0) == + IP_VERSION(6, 4, 0)) { pcs_ras_fields = &xgmi3x16_pcs_ras_fields[0]; field_array_size = ARRAY_SIZE(xgmi3x16_pcs_ras_fields); } else { @@ -1003,11 +1254,11 @@ static int amdgpu_xgmi_query_pcs_error_status(struct amdgpu_device *adev, return 0; } -static void amdgpu_xgmi_query_ras_error_count(struct amdgpu_device *adev, - void *ras_error_status) +static void amdgpu_xgmi_legacy_query_ras_error_count(struct amdgpu_device *adev, + void *ras_error_status) { struct ras_err_data *err_data = (struct ras_err_data *)ras_error_status; - int i; + int i, supported = 1; uint32_t data, mask_data = 0; uint32_t ue_cnt = 0, ce_cnt = 0; @@ -1071,42 +1322,144 @@ static void amdgpu_xgmi_query_ras_error_count(struct amdgpu_device *adev, } break; default: - dev_warn(adev->dev, "XGMI RAS error query not supported"); + supported = 0; break; } - adev->gmc.xgmi.ras->ras_block.hw_ops->reset_ras_error_count(adev); + switch (amdgpu_ip_version(adev, XGMI_HWIP, 0)) { + case IP_VERSION(6, 4, 0): + /* check xgmi3x16 pcs error */ + for (i = 0; i < ARRAY_SIZE(xgmi3x16_pcs_err_status_reg_v6_4); i++) { + data = RREG32_PCIE(xgmi3x16_pcs_err_status_reg_v6_4[i]); + mask_data = + RREG32_PCIE(xgmi3x16_pcs_err_noncorrectable_mask_reg_v6_4[i]); + if (data) + amdgpu_xgmi_query_pcs_error_status(adev, data, + mask_data, &ue_cnt, &ce_cnt, true, true); + } + break; + default: + if (!supported) + dev_warn(adev->dev, "XGMI RAS error query not supported"); + break; + } + + amdgpu_ras_reset_error_count(adev, AMDGPU_RAS_BLOCK__XGMI_WAFL); err_data->ue_count += ue_cnt; err_data->ce_count += ce_cnt; } +static enum aca_error_type xgmi_v6_4_0_pcs_mca_get_error_type(struct amdgpu_device *adev, u64 status) +{ + const char *error_str; + int ext_error_code; + + ext_error_code = ACA_REG__STATUS__ERRORCODEEXT(status); + + error_str = ext_error_code < ARRAY_SIZE(xgmi_v6_4_0_ras_error_code_ext) ? + xgmi_v6_4_0_ras_error_code_ext[ext_error_code] : NULL; + if (error_str) + dev_info(adev->dev, "%s detected\n", error_str); + + switch (ext_error_code) { + case 0: + return ACA_ERROR_TYPE_UE; + case 6: + return ACA_ERROR_TYPE_CE; + default: + return -EINVAL; + } + + return -EINVAL; +} + +static void __xgmi_v6_4_0_query_error_count(struct amdgpu_device *adev, struct amdgpu_smuio_mcm_config_info *mcm_info, + u64 mca_base, struct ras_err_data *err_data) +{ + int xgmi_inst = mcm_info->die_id; + u64 status = 0; + + status = RREG64_MCA(xgmi_inst, mca_base, ACA_REG_IDX_STATUS); + if (!ACA_REG__STATUS__VAL(status)) + return; + + switch (xgmi_v6_4_0_pcs_mca_get_error_type(adev, status)) { + case ACA_ERROR_TYPE_UE: + amdgpu_ras_error_statistic_ue_count(err_data, mcm_info, NULL, 1ULL); + break; + case ACA_ERROR_TYPE_CE: + amdgpu_ras_error_statistic_ce_count(err_data, mcm_info, NULL, 1ULL); + break; + default: + break; + } + + WREG64_MCA(xgmi_inst, mca_base, ACA_REG_IDX_STATUS, 0ULL); +} + +static void xgmi_v6_4_0_query_error_count(struct amdgpu_device *adev, int xgmi_inst, struct ras_err_data *err_data) +{ + struct amdgpu_smuio_mcm_config_info mcm_info = { + .socket_id = adev->smuio.funcs->get_socket_id(adev), + .die_id = xgmi_inst, + }; + int i; + + for (i = 0; i < ARRAY_SIZE(xgmi_v6_4_0_mca_base_array); i++) + __xgmi_v6_4_0_query_error_count(adev, &mcm_info, xgmi_v6_4_0_mca_base_array[i], err_data); +} + +static void xgmi_v6_4_0_query_ras_error_count(struct amdgpu_device *adev, void *ras_error_status) +{ + struct ras_err_data *err_data = (struct ras_err_data *)ras_error_status; + int i; + + for_each_inst(i, adev->aid_mask) + xgmi_v6_4_0_query_error_count(adev, i, err_data); +} + +static void amdgpu_xgmi_query_ras_error_count(struct amdgpu_device *adev, + void *ras_error_status) +{ + switch (amdgpu_ip_version(adev, XGMI_HWIP, 0)) { + case IP_VERSION(6, 4, 0): + xgmi_v6_4_0_query_ras_error_count(adev, ras_error_status); + break; + default: + amdgpu_xgmi_legacy_query_ras_error_count(adev, ras_error_status); + break; + } +} + /* Trigger XGMI/WAFL error */ static int amdgpu_ras_error_inject_xgmi(struct amdgpu_device *adev, void *inject_if, uint32_t instance_mask) { - int ret = 0; + int ret1, ret2; struct ta_ras_trigger_error_input *block_info = (struct ta_ras_trigger_error_input *)inject_if; if (amdgpu_dpm_set_df_cstate(adev, DF_CSTATE_DISALLOW)) dev_warn(adev->dev, "Failed to disallow df cstate"); - if (amdgpu_dpm_set_xgmi_plpd_mode(adev, XGMI_PLPD_DISALLOW)) + ret1 = amdgpu_dpm_set_xgmi_plpd_mode(adev, XGMI_PLPD_DISALLOW); + if (ret1 && ret1 != -EOPNOTSUPP) dev_warn(adev->dev, "Failed to disallow XGMI power down"); - ret = psp_ras_trigger_error(&adev->psp, block_info, instance_mask); + ret2 = psp_ras_trigger_error(&adev->psp, block_info, instance_mask); if (amdgpu_ras_intr_triggered()) - return ret; + return ret2; - if (amdgpu_dpm_set_xgmi_plpd_mode(adev, XGMI_PLPD_DEFAULT)) + ret1 = amdgpu_dpm_set_xgmi_plpd_mode(adev, XGMI_PLPD_DEFAULT); + if (ret1 && ret1 != -EOPNOTSUPP) dev_warn(adev->dev, "Failed to allow XGMI power down"); if (amdgpu_dpm_set_df_cstate(adev, DF_CSTATE_ALLOW)) dev_warn(adev->dev, "Failed to allow df cstate"); - return ret; + return ret2; } struct amdgpu_ras_block_hw_ops xgmi_ras_hw_ops = { diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.h index 86fbf56938f4..a3bfc16de6d4 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.h @@ -43,7 +43,8 @@ struct amdgpu_hive_info { } pstate; struct amdgpu_reset_domain *reset_domain; - uint32_t device_remove_count; + atomic_t ras_recovery; + struct ras_event_manager event_mgr; }; struct amdgpu_pcs_ras_field { diff --git a/drivers/gpu/drm/amd/amdgpu/amdgv_sriovmsg.h b/drivers/gpu/drm/amd/amdgpu/amdgv_sriovmsg.h index 104a5ad8397d..fb2b394bb9c5 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgv_sriovmsg.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgv_sriovmsg.h @@ -90,10 +90,12 @@ union amd_sriov_msg_feature_flags { uint32_t host_load_ucodes : 1; uint32_t host_flr_vramlost : 1; uint32_t mm_bw_management : 1; - uint32_t pp_one_vf_mode : 1; + uint32_t pp_one_vf_mode : 1; uint32_t reg_indirect_acc : 1; uint32_t av1_support : 1; - uint32_t reserved : 25; + uint32_t vcn_rb_decouple : 1; + uint32_t mes_info_enable : 1; + uint32_t reserved : 23; } flags; uint32_t all; }; @@ -156,7 +158,7 @@ struct amd_sriov_msg_pf2vf_info_header { uint32_t reserved[2]; }; -#define AMD_SRIOV_MSG_PF2VF_INFO_FILLED_SIZE (48) +#define AMD_SRIOV_MSG_PF2VF_INFO_FILLED_SIZE (49) struct amd_sriov_msg_pf2vf_info { /* header contains size and version */ struct amd_sriov_msg_pf2vf_info_header header; @@ -207,6 +209,8 @@ struct amd_sriov_msg_pf2vf_info { struct amd_sriov_msg_uuid_info uuid_info; /* PCIE atomic ops support flag */ uint32_t pcie_atomic_ops_support_flags; + /* Portion of GPU memory occupied by VF. MAX value is 65535, but set to uint32_t to maintain alignment with reserved size */ + uint32_t gpu_capacity; /* reserved */ uint32_t reserved[256 - AMD_SRIOV_MSG_PF2VF_INFO_FILLED_SIZE]; }; @@ -220,7 +224,7 @@ struct amd_sriov_msg_vf2pf_info_header { uint32_t reserved[2]; }; -#define AMD_SRIOV_MSG_VF2PF_INFO_FILLED_SIZE (70) +#define AMD_SRIOV_MSG_VF2PF_INFO_FILLED_SIZE (73) struct amd_sriov_msg_vf2pf_info { /* header contains size and version */ struct amd_sriov_msg_vf2pf_info_header header; @@ -264,7 +268,9 @@ struct amd_sriov_msg_vf2pf_info { uint32_t version; } ucode_info[AMD_SRIOV_MSG_RESERVE_UCODE]; uint64_t dummy_page_addr; - + /* FB allocated for guest MES to record UQ info */ + uint64_t mes_info_addr; + uint32_t mes_info_size; /* reserved */ uint32_t reserved[256 - AMD_SRIOV_MSG_VF2PF_INFO_FILLED_SIZE]; }; diff --git a/drivers/gpu/drm/amd/amdgpu/aqua_vanjaram.c b/drivers/gpu/drm/amd/amdgpu/aqua_vanjaram.c index 3f715e7fe1a9..414ea3f560a7 100644 --- a/drivers/gpu/drm/amd/amdgpu/aqua_vanjaram.c +++ b/drivers/gpu/drm/amd/amdgpu/aqua_vanjaram.c @@ -24,6 +24,7 @@ #include "soc15.h" #include "soc15_common.h" +#include "amdgpu_reg_state.h" #include "amdgpu_xcp.h" #include "gfx_v9_4_3.h" #include "gfxhub_v1_2.h" @@ -61,6 +62,11 @@ void aqua_vanjaram_doorbell_index_init(struct amdgpu_device *adev) adev->doorbell_index.max_assignment = AMDGPU_DOORBELL_LAYOUT1_MAX_ASSIGNMENT << 1; } +static bool aqua_vanjaram_xcp_vcn_shared(struct amdgpu_device *adev) +{ + return (adev->xcp_mgr->num_xcps > adev->vcn.num_vcn_inst); +} + static void aqua_vanjaram_set_xcp_id(struct amdgpu_device *adev, uint32_t inst_idx, struct amdgpu_ring *ring) { @@ -86,7 +92,7 @@ static void aqua_vanjaram_set_xcp_id(struct amdgpu_device *adev, case AMDGPU_RING_TYPE_VCN_ENC: case AMDGPU_RING_TYPE_VCN_JPEG: ip_blk = AMDGPU_XCP_VCN; - if (adev->xcp_mgr->mode == AMDGPU_CPX_PARTITION_MODE) + if (aqua_vanjaram_xcp_vcn_shared(adev)) inst_mask = 1 << (inst_idx * 2); break; default: @@ -139,10 +145,12 @@ static int aqua_vanjaram_xcp_sched_list_update( aqua_vanjaram_xcp_gpu_sched_update(adev, ring, ring->xcp_id); - /* VCN is shared by two partitions under CPX MODE */ + /* VCN may be shared by two partitions under CPX MODE in certain + * configs. + */ if ((ring->funcs->type == AMDGPU_RING_TYPE_VCN_ENC || - ring->funcs->type == AMDGPU_RING_TYPE_VCN_JPEG) && - adev->xcp_mgr->mode == AMDGPU_CPX_PARTITION_MODE) + ring->funcs->type == AMDGPU_RING_TYPE_VCN_JPEG) && + aqua_vanjaram_xcp_vcn_shared(adev)) aqua_vanjaram_xcp_gpu_sched_update(adev, ring, ring->xcp_id + 1); } @@ -622,7 +630,7 @@ static int aqua_vanjaram_xcp_mgr_init(struct amdgpu_device *adev) int aqua_vanjaram_init_soc_config(struct amdgpu_device *adev) { - u32 mask, inst_mask = adev->sdma.sdma_mask; + u32 mask, avail_inst, inst_mask = adev->sdma.sdma_mask; int ret, i; /* generally 1 AID supports 4 instances */ @@ -634,7 +642,9 @@ int aqua_vanjaram_init_soc_config(struct amdgpu_device *adev) for (mask = (1 << adev->sdma.num_inst_per_aid) - 1; inst_mask; inst_mask >>= adev->sdma.num_inst_per_aid, ++i) { - if ((inst_mask & mask) == mask) + avail_inst = inst_mask & mask; + if (avail_inst == mask || avail_inst == 0x3 || + avail_inst == 0xc) adev->aid_mask |= (1 << i); } @@ -656,3 +666,416 @@ int aqua_vanjaram_init_soc_config(struct amdgpu_device *adev) return 0; } + +static void aqua_read_smn(struct amdgpu_device *adev, + struct amdgpu_smn_reg_data *regdata, + uint64_t smn_addr) +{ + regdata->addr = smn_addr; + regdata->value = RREG32_PCIE(smn_addr); +} + +struct aqua_reg_list { + uint64_t start_addr; + uint32_t num_regs; + uint32_t incrx; +}; + +#define DW_ADDR_INCR 4 + +static void aqua_read_smn_ext(struct amdgpu_device *adev, + struct amdgpu_smn_reg_data *regdata, + uint64_t smn_addr, int i) +{ + regdata->addr = + smn_addr + adev->asic_funcs->encode_ext_smn_addressing(i); + regdata->value = RREG32_PCIE_EXT(regdata->addr); +} + +#define smnreg_0x1A340218 0x1A340218 +#define smnreg_0x1A3402E4 0x1A3402E4 +#define smnreg_0x1A340294 0x1A340294 +#define smreg_0x1A380088 0x1A380088 + +#define NUM_PCIE_SMN_REGS 14 + +static struct aqua_reg_list pcie_reg_addrs[] = { + { smnreg_0x1A340218, 1, 0 }, + { smnreg_0x1A3402E4, 1, 0 }, + { smnreg_0x1A340294, 6, DW_ADDR_INCR }, + { smreg_0x1A380088, 6, DW_ADDR_INCR }, +}; + +static ssize_t aqua_vanjaram_read_pcie_state(struct amdgpu_device *adev, + void *buf, size_t max_size) +{ + struct amdgpu_reg_state_pcie_v1_0 *pcie_reg_state; + uint32_t start_addr, incrx, num_regs, szbuf; + struct amdgpu_regs_pcie_v1_0 *pcie_regs; + struct amdgpu_smn_reg_data *reg_data; + struct pci_dev *us_pdev, *ds_pdev; + int aer_cap, r, n; + + if (!buf || !max_size) + return -EINVAL; + + pcie_reg_state = (struct amdgpu_reg_state_pcie_v1_0 *)buf; + + szbuf = sizeof(*pcie_reg_state) + + amdgpu_reginst_size(1, sizeof(*pcie_regs), NUM_PCIE_SMN_REGS); + /* Only one instance of pcie regs */ + if (max_size < szbuf) + return -EOVERFLOW; + + pcie_regs = (struct amdgpu_regs_pcie_v1_0 *)((uint8_t *)buf + + sizeof(*pcie_reg_state)); + pcie_regs->inst_header.instance = 0; + pcie_regs->inst_header.state = AMDGPU_INST_S_OK; + pcie_regs->inst_header.num_smn_regs = NUM_PCIE_SMN_REGS; + + reg_data = pcie_regs->smn_reg_values; + + for (r = 0; r < ARRAY_SIZE(pcie_reg_addrs); r++) { + start_addr = pcie_reg_addrs[r].start_addr; + incrx = pcie_reg_addrs[r].incrx; + num_regs = pcie_reg_addrs[r].num_regs; + for (n = 0; n < num_regs; n++) { + aqua_read_smn(adev, reg_data, start_addr + n * incrx); + ++reg_data; + } + } + + ds_pdev = pci_upstream_bridge(adev->pdev); + us_pdev = pci_upstream_bridge(ds_pdev); + + pcie_capability_read_word(us_pdev, PCI_EXP_DEVSTA, + &pcie_regs->device_status); + pcie_capability_read_word(us_pdev, PCI_EXP_LNKSTA, + &pcie_regs->link_status); + + aer_cap = pci_find_ext_capability(us_pdev, PCI_EXT_CAP_ID_ERR); + if (aer_cap) { + pci_read_config_dword(us_pdev, aer_cap + PCI_ERR_COR_STATUS, + &pcie_regs->pcie_corr_err_status); + pci_read_config_dword(us_pdev, aer_cap + PCI_ERR_UNCOR_STATUS, + &pcie_regs->pcie_uncorr_err_status); + } + + pci_read_config_dword(us_pdev, PCI_PRIMARY_BUS, + &pcie_regs->sub_bus_number_latency); + + pcie_reg_state->common_header.structure_size = szbuf; + pcie_reg_state->common_header.format_revision = 1; + pcie_reg_state->common_header.content_revision = 0; + pcie_reg_state->common_header.state_type = AMDGPU_REG_STATE_TYPE_PCIE; + pcie_reg_state->common_header.num_instances = 1; + + return pcie_reg_state->common_header.structure_size; +} + +#define smnreg_0x11A00050 0x11A00050 +#define smnreg_0x11A00180 0x11A00180 +#define smnreg_0x11A00070 0x11A00070 +#define smnreg_0x11A00200 0x11A00200 +#define smnreg_0x11A0020C 0x11A0020C +#define smnreg_0x11A00210 0x11A00210 +#define smnreg_0x11A00108 0x11A00108 + +#define XGMI_LINK_REG(smnreg, l) ((smnreg) | (l << 20)) + +#define NUM_XGMI_SMN_REGS 25 + +static struct aqua_reg_list xgmi_reg_addrs[] = { + { smnreg_0x11A00050, 1, 0 }, + { smnreg_0x11A00180, 16, DW_ADDR_INCR }, + { smnreg_0x11A00070, 4, DW_ADDR_INCR }, + { smnreg_0x11A00200, 1, 0 }, + { smnreg_0x11A0020C, 1, 0 }, + { smnreg_0x11A00210, 1, 0 }, + { smnreg_0x11A00108, 1, 0 }, +}; + +static ssize_t aqua_vanjaram_read_xgmi_state(struct amdgpu_device *adev, + void *buf, size_t max_size) +{ + struct amdgpu_reg_state_xgmi_v1_0 *xgmi_reg_state; + uint32_t start_addr, incrx, num_regs, szbuf; + struct amdgpu_regs_xgmi_v1_0 *xgmi_regs; + struct amdgpu_smn_reg_data *reg_data; + const int max_xgmi_instances = 8; + int inst = 0, i, j, r, n; + const int xgmi_inst = 2; + void *p; + + if (!buf || !max_size) + return -EINVAL; + + xgmi_reg_state = (struct amdgpu_reg_state_xgmi_v1_0 *)buf; + + szbuf = sizeof(*xgmi_reg_state) + + amdgpu_reginst_size(max_xgmi_instances, sizeof(*xgmi_regs), + NUM_XGMI_SMN_REGS); + /* Only one instance of pcie regs */ + if (max_size < szbuf) + return -EOVERFLOW; + + p = &xgmi_reg_state->xgmi_state_regs[0]; + for_each_inst(i, adev->aid_mask) { + for (j = 0; j < xgmi_inst; ++j) { + xgmi_regs = (struct amdgpu_regs_xgmi_v1_0 *)p; + xgmi_regs->inst_header.instance = inst++; + + xgmi_regs->inst_header.state = AMDGPU_INST_S_OK; + xgmi_regs->inst_header.num_smn_regs = NUM_XGMI_SMN_REGS; + + reg_data = xgmi_regs->smn_reg_values; + + for (r = 0; r < ARRAY_SIZE(xgmi_reg_addrs); r++) { + start_addr = xgmi_reg_addrs[r].start_addr; + incrx = xgmi_reg_addrs[r].incrx; + num_regs = xgmi_reg_addrs[r].num_regs; + + for (n = 0; n < num_regs; n++) { + aqua_read_smn_ext( + adev, reg_data, + XGMI_LINK_REG(start_addr, j) + + n * incrx, + i); + ++reg_data; + } + } + p = reg_data; + } + } + + xgmi_reg_state->common_header.structure_size = szbuf; + xgmi_reg_state->common_header.format_revision = 1; + xgmi_reg_state->common_header.content_revision = 0; + xgmi_reg_state->common_header.state_type = AMDGPU_REG_STATE_TYPE_XGMI; + xgmi_reg_state->common_header.num_instances = max_xgmi_instances; + + return xgmi_reg_state->common_header.structure_size; +} + +#define smnreg_0x11C00070 0x11C00070 +#define smnreg_0x11C00210 0x11C00210 + +static struct aqua_reg_list wafl_reg_addrs[] = { + { smnreg_0x11C00070, 4, DW_ADDR_INCR }, + { smnreg_0x11C00210, 1, 0 }, +}; + +#define WAFL_LINK_REG(smnreg, l) ((smnreg) | (l << 20)) + +#define NUM_WAFL_SMN_REGS 5 + +static ssize_t aqua_vanjaram_read_wafl_state(struct amdgpu_device *adev, + void *buf, size_t max_size) +{ + struct amdgpu_reg_state_wafl_v1_0 *wafl_reg_state; + uint32_t start_addr, incrx, num_regs, szbuf; + struct amdgpu_regs_wafl_v1_0 *wafl_regs; + struct amdgpu_smn_reg_data *reg_data; + const int max_wafl_instances = 8; + int inst = 0, i, j, r, n; + const int wafl_inst = 2; + void *p; + + if (!buf || !max_size) + return -EINVAL; + + wafl_reg_state = (struct amdgpu_reg_state_wafl_v1_0 *)buf; + + szbuf = sizeof(*wafl_reg_state) + + amdgpu_reginst_size(max_wafl_instances, sizeof(*wafl_regs), + NUM_WAFL_SMN_REGS); + + if (max_size < szbuf) + return -EOVERFLOW; + + p = &wafl_reg_state->wafl_state_regs[0]; + for_each_inst(i, adev->aid_mask) { + for (j = 0; j < wafl_inst; ++j) { + wafl_regs = (struct amdgpu_regs_wafl_v1_0 *)p; + wafl_regs->inst_header.instance = inst++; + + wafl_regs->inst_header.state = AMDGPU_INST_S_OK; + wafl_regs->inst_header.num_smn_regs = NUM_WAFL_SMN_REGS; + + reg_data = wafl_regs->smn_reg_values; + + for (r = 0; r < ARRAY_SIZE(wafl_reg_addrs); r++) { + start_addr = wafl_reg_addrs[r].start_addr; + incrx = wafl_reg_addrs[r].incrx; + num_regs = wafl_reg_addrs[r].num_regs; + for (n = 0; n < num_regs; n++) { + aqua_read_smn_ext( + adev, reg_data, + WAFL_LINK_REG(start_addr, j) + + n * incrx, + i); + ++reg_data; + } + } + p = reg_data; + } + } + + wafl_reg_state->common_header.structure_size = szbuf; + wafl_reg_state->common_header.format_revision = 1; + wafl_reg_state->common_header.content_revision = 0; + wafl_reg_state->common_header.state_type = AMDGPU_REG_STATE_TYPE_WAFL; + wafl_reg_state->common_header.num_instances = max_wafl_instances; + + return wafl_reg_state->common_header.structure_size; +} + +#define smnreg_0x1B311060 0x1B311060 +#define smnreg_0x1B411060 0x1B411060 +#define smnreg_0x1B511060 0x1B511060 +#define smnreg_0x1B611060 0x1B611060 + +#define smnreg_0x1C307120 0x1C307120 +#define smnreg_0x1C317120 0x1C317120 + +#define smnreg_0x1C320830 0x1C320830 +#define smnreg_0x1C380830 0x1C380830 +#define smnreg_0x1C3D0830 0x1C3D0830 +#define smnreg_0x1C420830 0x1C420830 + +#define smnreg_0x1C320100 0x1C320100 +#define smnreg_0x1C380100 0x1C380100 +#define smnreg_0x1C3D0100 0x1C3D0100 +#define smnreg_0x1C420100 0x1C420100 + +#define smnreg_0x1B310500 0x1B310500 +#define smnreg_0x1C300400 0x1C300400 + +#define USR_CAKE_INCR 0x11000 +#define USR_LINK_INCR 0x100000 +#define USR_CP_INCR 0x10000 + +#define NUM_USR_SMN_REGS 20 + +struct aqua_reg_list usr_reg_addrs[] = { + { smnreg_0x1B311060, 4, DW_ADDR_INCR }, + { smnreg_0x1B411060, 4, DW_ADDR_INCR }, + { smnreg_0x1B511060, 4, DW_ADDR_INCR }, + { smnreg_0x1B611060, 4, DW_ADDR_INCR }, + { smnreg_0x1C307120, 2, DW_ADDR_INCR }, + { smnreg_0x1C317120, 2, DW_ADDR_INCR }, +}; + +#define NUM_USR1_SMN_REGS 46 +struct aqua_reg_list usr1_reg_addrs[] = { + { smnreg_0x1C320830, 6, USR_CAKE_INCR }, + { smnreg_0x1C380830, 5, USR_CAKE_INCR }, + { smnreg_0x1C3D0830, 5, USR_CAKE_INCR }, + { smnreg_0x1C420830, 4, USR_CAKE_INCR }, + { smnreg_0x1C320100, 6, USR_CAKE_INCR }, + { smnreg_0x1C380100, 5, USR_CAKE_INCR }, + { smnreg_0x1C3D0100, 5, USR_CAKE_INCR }, + { smnreg_0x1C420100, 4, USR_CAKE_INCR }, + { smnreg_0x1B310500, 4, USR_LINK_INCR }, + { smnreg_0x1C300400, 2, USR_CP_INCR }, +}; + +static ssize_t aqua_vanjaram_read_usr_state(struct amdgpu_device *adev, + void *buf, size_t max_size, + int reg_state) +{ + uint32_t start_addr, incrx, num_regs, szbuf, num_smn; + struct amdgpu_reg_state_usr_v1_0 *usr_reg_state; + struct amdgpu_regs_usr_v1_0 *usr_regs; + struct amdgpu_smn_reg_data *reg_data; + const int max_usr_instances = 4; + struct aqua_reg_list *reg_addrs; + int inst = 0, i, n, r, arr_size; + void *p; + + if (!buf || !max_size) + return -EINVAL; + + switch (reg_state) { + case AMDGPU_REG_STATE_TYPE_USR: + arr_size = ARRAY_SIZE(usr_reg_addrs); + reg_addrs = usr_reg_addrs; + num_smn = NUM_USR_SMN_REGS; + break; + case AMDGPU_REG_STATE_TYPE_USR_1: + arr_size = ARRAY_SIZE(usr1_reg_addrs); + reg_addrs = usr1_reg_addrs; + num_smn = NUM_USR1_SMN_REGS; + break; + default: + return -EINVAL; + } + + usr_reg_state = (struct amdgpu_reg_state_usr_v1_0 *)buf; + + szbuf = sizeof(*usr_reg_state) + amdgpu_reginst_size(max_usr_instances, + sizeof(*usr_regs), + num_smn); + if (max_size < szbuf) + return -EOVERFLOW; + + p = &usr_reg_state->usr_state_regs[0]; + for_each_inst(i, adev->aid_mask) { + usr_regs = (struct amdgpu_regs_usr_v1_0 *)p; + usr_regs->inst_header.instance = inst++; + usr_regs->inst_header.state = AMDGPU_INST_S_OK; + usr_regs->inst_header.num_smn_regs = num_smn; + reg_data = usr_regs->smn_reg_values; + + for (r = 0; r < arr_size; r++) { + start_addr = reg_addrs[r].start_addr; + incrx = reg_addrs[r].incrx; + num_regs = reg_addrs[r].num_regs; + for (n = 0; n < num_regs; n++) { + aqua_read_smn_ext(adev, reg_data, + start_addr + n * incrx, i); + reg_data++; + } + } + p = reg_data; + } + + usr_reg_state->common_header.structure_size = szbuf; + usr_reg_state->common_header.format_revision = 1; + usr_reg_state->common_header.content_revision = 0; + usr_reg_state->common_header.state_type = AMDGPU_REG_STATE_TYPE_USR; + usr_reg_state->common_header.num_instances = max_usr_instances; + + return usr_reg_state->common_header.structure_size; +} + +ssize_t aqua_vanjaram_get_reg_state(struct amdgpu_device *adev, + enum amdgpu_reg_state reg_state, void *buf, + size_t max_size) +{ + ssize_t size; + + switch (reg_state) { + case AMDGPU_REG_STATE_TYPE_PCIE: + size = aqua_vanjaram_read_pcie_state(adev, buf, max_size); + break; + case AMDGPU_REG_STATE_TYPE_XGMI: + size = aqua_vanjaram_read_xgmi_state(adev, buf, max_size); + break; + case AMDGPU_REG_STATE_TYPE_WAFL: + size = aqua_vanjaram_read_wafl_state(adev, buf, max_size); + break; + case AMDGPU_REG_STATE_TYPE_USR: + size = aqua_vanjaram_read_usr_state(adev, buf, max_size, + AMDGPU_REG_STATE_TYPE_USR); + break; + case AMDGPU_REG_STATE_TYPE_USR_1: + size = aqua_vanjaram_read_usr_state( + adev, buf, max_size, AMDGPU_REG_STATE_TYPE_USR_1); + break; + default: + return -EINVAL; + } + + return size; +} diff --git a/drivers/gpu/drm/amd/amdgpu/athub_v3_0.c b/drivers/gpu/drm/amd/amdgpu/athub_v3_0.c index f0737fb3a999..d1bba9c64e16 100644 --- a/drivers/gpu/drm/amd/amdgpu/athub_v3_0.c +++ b/drivers/gpu/drm/amd/amdgpu/athub_v3_0.c @@ -30,6 +30,8 @@ #define regATHUB_MISC_CNTL_V3_0_1 0x00d7 #define regATHUB_MISC_CNTL_V3_0_1_BASE_IDX 0 +#define regATHUB_MISC_CNTL_V3_3_0 0x00d8 +#define regATHUB_MISC_CNTL_V3_3_0_BASE_IDX 0 static uint32_t athub_v3_0_get_cg_cntl(struct amdgpu_device *adev) @@ -40,6 +42,9 @@ static uint32_t athub_v3_0_get_cg_cntl(struct amdgpu_device *adev) case IP_VERSION(3, 0, 1): data = RREG32_SOC15(ATHUB, 0, regATHUB_MISC_CNTL_V3_0_1); break; + case IP_VERSION(3, 3, 0): + data = RREG32_SOC15(ATHUB, 0, regATHUB_MISC_CNTL_V3_3_0); + break; default: data = RREG32_SOC15(ATHUB, 0, regATHUB_MISC_CNTL); break; @@ -53,6 +58,9 @@ static void athub_v3_0_set_cg_cntl(struct amdgpu_device *adev, uint32_t data) case IP_VERSION(3, 0, 1): WREG32_SOC15(ATHUB, 0, regATHUB_MISC_CNTL_V3_0_1, data); break; + case IP_VERSION(3, 3, 0): + WREG32_SOC15(ATHUB, 0, regATHUB_MISC_CNTL_V3_3_0, data); + break; default: WREG32_SOC15(ATHUB, 0, regATHUB_MISC_CNTL, data); break; diff --git a/drivers/gpu/drm/amd/amdgpu/athub_v4_1_0.c b/drivers/gpu/drm/amd/amdgpu/athub_v4_1_0.c new file mode 100644 index 000000000000..8a0773b80864 --- /dev/null +++ b/drivers/gpu/drm/amd/amdgpu/athub_v4_1_0.c @@ -0,0 +1,122 @@ +/* + * Copyright 2023 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + */ + +#include "amdgpu.h" +#include "athub_v4_1_0.h" +#include "athub/athub_4_1_0_offset.h" +#include "athub/athub_4_1_0_sh_mask.h" +#include "soc15_common.h" + +static uint32_t athub_v4_1_0_get_cg_cntl(struct amdgpu_device *adev) +{ + uint32_t data; + + switch (amdgpu_ip_version(adev, ATHUB_HWIP, 0)) { + case IP_VERSION(4, 1, 0): + data = RREG32_SOC15(ATHUB, 0, regATHUB_MISC_CNTL); + break; + default: + data = 0; + break; + } + return data; +} + +static void athub_v4_1_0_set_cg_cntl(struct amdgpu_device *adev, uint32_t data) +{ + switch (amdgpu_ip_version(adev, ATHUB_HWIP, 0)) { + case IP_VERSION(4, 1, 0): + WREG32_SOC15(ATHUB, 0, regATHUB_MISC_CNTL, data); + break; + default: + break; + } +} + +static void +athub_v4_1_0_update_medium_grain_clock_gating(struct amdgpu_device *adev, + bool enable) +{ + uint32_t def, data; + + def = data = athub_v4_1_0_get_cg_cntl(adev); + + if (enable && (adev->cg_flags & AMD_CG_SUPPORT_ATHUB_MGCG)) + data |= ATHUB_MISC_CNTL__CG_ENABLE_MASK; + else + data &= ~ATHUB_MISC_CNTL__CG_ENABLE_MASK; + + if (def != data) + athub_v4_1_0_set_cg_cntl(adev, data); +} + +static void +athub_v4_1_0_update_medium_grain_light_sleep(struct amdgpu_device *adev, + bool enable) +{ + uint32_t def, data; + + def = data = athub_v4_1_0_get_cg_cntl(adev); + + if (enable && (adev->cg_flags & AMD_CG_SUPPORT_ATHUB_LS)) + data |= ATHUB_MISC_CNTL__CG_MEM_LS_ENABLE_MASK; + else + data &= ~ATHUB_MISC_CNTL__CG_MEM_LS_ENABLE_MASK; + + if (def != data) + athub_v4_1_0_set_cg_cntl(adev, data); +} + +int athub_v4_1_0_set_clockgating(struct amdgpu_device *adev, + enum amd_clockgating_state state) +{ + if (amdgpu_sriov_vf(adev)) + return 0; + + switch (amdgpu_ip_version(adev, ATHUB_HWIP, 0)) { + case IP_VERSION(4, 1, 0): + athub_v4_1_0_update_medium_grain_clock_gating(adev, + state == AMD_CG_STATE_GATE); + athub_v4_1_0_update_medium_grain_light_sleep(adev, + state == AMD_CG_STATE_GATE); + break; + default: + break; + } + + return 0; +} + +void athub_v4_1_0_get_clockgating(struct amdgpu_device *adev, u64 *flags) +{ + int data; + + /* AMD_CG_SUPPORT_ATHUB_MGCG */ + data = athub_v4_1_0_get_cg_cntl(adev); + if (data & ATHUB_MISC_CNTL__CG_ENABLE_MASK) + *flags |= AMD_CG_SUPPORT_ATHUB_MGCG; + + /* AMD_CG_SUPPORT_ATHUB_LS */ + if (data & ATHUB_MISC_CNTL__CG_MEM_LS_ENABLE_MASK) + *flags |= AMD_CG_SUPPORT_ATHUB_LS; +} diff --git a/drivers/gpu/drm/amd/amdgpu/athub_v4_1_0.h b/drivers/gpu/drm/amd/amdgpu/athub_v4_1_0.h new file mode 100644 index 000000000000..4d18d0998fa8 --- /dev/null +++ b/drivers/gpu/drm/amd/amdgpu/athub_v4_1_0.h @@ -0,0 +1,30 @@ +/* + * Copyright 2023 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + */ +#ifndef __ATHUB_V4_1_0_H__ +#define __ATHUB_V4_1_0_H__ + +int athub_v4_1_0_set_clockgating(struct amdgpu_device *adev, + enum amd_clockgating_state state); +void athub_v4_1_0_get_clockgating(struct amdgpu_device *adev, u64 *flags); + +#endif diff --git a/drivers/gpu/drm/amd/amdgpu/atom.c b/drivers/gpu/drm/amd/amdgpu/atom.c index 2c221000782c..72362df352f6 100644 --- a/drivers/gpu/drm/amd/amdgpu/atom.c +++ b/drivers/gpu/drm/amd/amdgpu/atom.c @@ -62,6 +62,7 @@ typedef struct { struct atom_context *ctx; uint32_t *ps, *ws; + int ps_size, ws_size; int ps_shift; uint16_t start; unsigned last_jump; @@ -70,8 +71,8 @@ typedef struct { } atom_exec_context; int amdgpu_atom_debug; -static int amdgpu_atom_execute_table_locked(struct atom_context *ctx, int index, uint32_t *params); -int amdgpu_atom_execute_table(struct atom_context *ctx, int index, uint32_t *params); +static int amdgpu_atom_execute_table_locked(struct atom_context *ctx, int index, uint32_t *params, int params_size); +int amdgpu_atom_execute_table(struct atom_context *ctx, int index, uint32_t *params, int params_size); static uint32_t atom_arg_mask[8] = { 0xFFFFFFFF, 0xFFFF, 0xFFFF00, 0xFFFF0000, 0xFF, 0xFF00, 0xFF0000, @@ -223,7 +224,10 @@ static uint32_t atom_get_src_int(atom_exec_context *ctx, uint8_t attr, (*ptr)++; /* get_unaligned_le32 avoids unaligned accesses from atombios * tables, noticed on a DEC Alpha. */ - val = get_unaligned_le32((u32 *)&ctx->ps[idx]); + if (idx < ctx->ps_size) + val = get_unaligned_le32((u32 *)&ctx->ps[idx]); + else + pr_info("PS index out of range: %i > %i\n", idx, ctx->ps_size); if (print) DEBUG("PS[0x%02X,0x%04X]", idx, val); break; @@ -261,7 +265,10 @@ static uint32_t atom_get_src_int(atom_exec_context *ctx, uint8_t attr, val = gctx->reg_block; break; default: - val = ctx->ws[idx]; + if (idx < ctx->ws_size) + val = ctx->ws[idx]; + else + pr_info("WS index out of range: %i > %i\n", idx, ctx->ws_size); } break; case ATOM_ARG_ID: @@ -313,7 +320,7 @@ static uint32_t atom_get_src_int(atom_exec_context *ctx, uint8_t attr, DEBUG("IMM 0x%02X\n", val); return val; } - return 0; + break; case ATOM_ARG_PLL: idx = U8(*ptr); (*ptr)++; @@ -395,7 +402,6 @@ static void atom_skip_src_int(atom_exec_context *ctx, uint8_t attr, int *ptr) (*ptr)++; return; } - return; } } @@ -496,6 +502,10 @@ static void atom_put_dst(atom_exec_context *ctx, int arg, uint8_t attr, idx = U8(*ptr); (*ptr)++; DEBUG("PS[0x%02X]", idx); + if (idx >= ctx->ps_size) { + pr_info("PS index out of range: %i > %i\n", idx, ctx->ps_size); + return; + } ctx->ps[idx] = cpu_to_le32(val); break; case ATOM_ARG_WS: @@ -528,6 +538,10 @@ static void atom_put_dst(atom_exec_context *ctx, int arg, uint8_t attr, gctx->reg_block = val; break; default: + if (idx >= ctx->ws_size) { + pr_info("WS index out of range: %i > %i\n", idx, ctx->ws_size); + return; + } ctx->ws[idx] = val; } break; @@ -625,7 +639,7 @@ static void atom_op_calltable(atom_exec_context *ctx, int *ptr, int arg) else SDEBUG(" table: %d\n", idx); if (U16(ctx->ctx->cmd_table + 4 + 2 * idx)) - r = amdgpu_atom_execute_table_locked(ctx->ctx, idx, ctx->ps + ctx->ps_shift); + r = amdgpu_atom_execute_table_locked(ctx->ctx, idx, ctx->ps + ctx->ps_shift, ctx->ps_size - ctx->ps_shift); if (r) { ctx->abort = true; } @@ -1204,7 +1218,7 @@ static struct { atom_op_div32, ATOM_ARG_WS}, }; -static int amdgpu_atom_execute_table_locked(struct atom_context *ctx, int index, uint32_t *params) +static int amdgpu_atom_execute_table_locked(struct atom_context *ctx, int index, uint32_t *params, int params_size) { int base = CU16(ctx->cmd_table + 4 + 2 * index); int len, ws, ps, ptr; @@ -1226,12 +1240,16 @@ static int amdgpu_atom_execute_table_locked(struct atom_context *ctx, int index, ectx.ps_shift = ps / 4; ectx.start = base; ectx.ps = params; + ectx.ps_size = params_size; ectx.abort = false; ectx.last_jump = 0; - if (ws) + if (ws) { ectx.ws = kcalloc(4, ws, GFP_KERNEL); - else + ectx.ws_size = ws; + } else { ectx.ws = NULL; + ectx.ws_size = 0; + } debug_depth++; while (1) { @@ -1265,7 +1283,7 @@ free: return ret; } -int amdgpu_atom_execute_table(struct atom_context *ctx, int index, uint32_t *params) +int amdgpu_atom_execute_table(struct atom_context *ctx, int index, uint32_t *params, int params_size) { int r; @@ -1281,7 +1299,7 @@ int amdgpu_atom_execute_table(struct atom_context *ctx, int index, uint32_t *par /* reset divmul */ ctx->divmul[0] = 0; ctx->divmul[1] = 0; - r = amdgpu_atom_execute_table_locked(ctx, index, params); + r = amdgpu_atom_execute_table_locked(ctx, index, params, params_size); mutex_unlock(&ctx->mutex); return r; } @@ -1553,7 +1571,7 @@ int amdgpu_atom_asic_init(struct atom_context *ctx) if (!CU16(ctx->cmd_table + 4 + 2 * ATOM_CMD_INIT)) return 1; - ret = amdgpu_atom_execute_table(ctx, ATOM_CMD_INIT, ps); + ret = amdgpu_atom_execute_table(ctx, ATOM_CMD_INIT, ps, 16); if (ret) return ret; diff --git a/drivers/gpu/drm/amd/amdgpu/atom.h b/drivers/gpu/drm/amd/amdgpu/atom.h index c11cf18a0f18..b807f6639a4c 100644 --- a/drivers/gpu/drm/amd/amdgpu/atom.h +++ b/drivers/gpu/drm/amd/amdgpu/atom.h @@ -156,7 +156,7 @@ struct atom_context { extern int amdgpu_atom_debug; struct atom_context *amdgpu_atom_parse(struct card_info *card, void *bios); -int amdgpu_atom_execute_table(struct atom_context *ctx, int index, uint32_t *params); +int amdgpu_atom_execute_table(struct atom_context *ctx, int index, uint32_t *params, int params_size); int amdgpu_atom_asic_init(struct atom_context *ctx); void amdgpu_atom_destroy(struct atom_context *ctx); bool amdgpu_atom_parse_data_header(struct atom_context *ctx, int index, uint16_t *size, diff --git a/drivers/gpu/drm/amd/amdgpu/atombios_crtc.c b/drivers/gpu/drm/amd/amdgpu/atombios_crtc.c index 10098fdd33fc..3dfc28840a7d 100644 --- a/drivers/gpu/drm/amd/amdgpu/atombios_crtc.c +++ b/drivers/gpu/drm/amd/amdgpu/atombios_crtc.c @@ -77,7 +77,7 @@ void amdgpu_atombios_crtc_overscan_setup(struct drm_crtc *crtc, args.usOverscanTop = cpu_to_le16(amdgpu_crtc->v_border); break; } - amdgpu_atom_execute_table(adev->mode_info.atom_context, index, (uint32_t *)&args); + amdgpu_atom_execute_table(adev->mode_info.atom_context, index, (uint32_t *)&args, sizeof(args)); } void amdgpu_atombios_crtc_scaler_setup(struct drm_crtc *crtc) @@ -106,7 +106,7 @@ void amdgpu_atombios_crtc_scaler_setup(struct drm_crtc *crtc) args.ucEnable = ATOM_SCALER_DISABLE; break; } - amdgpu_atom_execute_table(adev->mode_info.atom_context, index, (uint32_t *)&args); + amdgpu_atom_execute_table(adev->mode_info.atom_context, index, (uint32_t *)&args, sizeof(args)); } void amdgpu_atombios_crtc_lock(struct drm_crtc *crtc, int lock) @@ -123,7 +123,7 @@ void amdgpu_atombios_crtc_lock(struct drm_crtc *crtc, int lock) args.ucCRTC = amdgpu_crtc->crtc_id; args.ucEnable = lock; - amdgpu_atom_execute_table(adev->mode_info.atom_context, index, (uint32_t *)&args); + amdgpu_atom_execute_table(adev->mode_info.atom_context, index, (uint32_t *)&args, sizeof(args)); } void amdgpu_atombios_crtc_enable(struct drm_crtc *crtc, int state) @@ -139,7 +139,7 @@ void amdgpu_atombios_crtc_enable(struct drm_crtc *crtc, int state) args.ucCRTC = amdgpu_crtc->crtc_id; args.ucEnable = state; - amdgpu_atom_execute_table(adev->mode_info.atom_context, index, (uint32_t *)&args); + amdgpu_atom_execute_table(adev->mode_info.atom_context, index, (uint32_t *)&args, sizeof(args)); } void amdgpu_atombios_crtc_blank(struct drm_crtc *crtc, int state) @@ -155,7 +155,7 @@ void amdgpu_atombios_crtc_blank(struct drm_crtc *crtc, int state) args.ucCRTC = amdgpu_crtc->crtc_id; args.ucBlanking = state; - amdgpu_atom_execute_table(adev->mode_info.atom_context, index, (uint32_t *)&args); + amdgpu_atom_execute_table(adev->mode_info.atom_context, index, (uint32_t *)&args, sizeof(args)); } void amdgpu_atombios_crtc_powergate(struct drm_crtc *crtc, int state) @@ -171,7 +171,7 @@ void amdgpu_atombios_crtc_powergate(struct drm_crtc *crtc, int state) args.ucDispPipeId = amdgpu_crtc->crtc_id; args.ucEnable = state; - amdgpu_atom_execute_table(adev->mode_info.atom_context, index, (uint32_t *)&args); + amdgpu_atom_execute_table(adev->mode_info.atom_context, index, (uint32_t *)&args, sizeof(args)); } void amdgpu_atombios_crtc_powergate_init(struct amdgpu_device *adev) @@ -183,7 +183,7 @@ void amdgpu_atombios_crtc_powergate_init(struct amdgpu_device *adev) args.ucEnable = ATOM_INIT; - amdgpu_atom_execute_table(adev->mode_info.atom_context, index, (uint32_t *)&args); + amdgpu_atom_execute_table(adev->mode_info.atom_context, index, (uint32_t *)&args, sizeof(args)); } void amdgpu_atombios_crtc_set_dtd_timing(struct drm_crtc *crtc, @@ -228,7 +228,7 @@ void amdgpu_atombios_crtc_set_dtd_timing(struct drm_crtc *crtc, args.susModeMiscInfo.usAccess = cpu_to_le16(misc); args.ucCRTC = amdgpu_crtc->crtc_id; - amdgpu_atom_execute_table(adev->mode_info.atom_context, index, (uint32_t *)&args); + amdgpu_atom_execute_table(adev->mode_info.atom_context, index, (uint32_t *)&args, sizeof(args)); } union atom_enable_ss { @@ -293,7 +293,7 @@ static void amdgpu_atombios_crtc_program_ss(struct amdgpu_device *adev, args.v3.usSpreadSpectrumStep = cpu_to_le16(ss->step); args.v3.ucEnable = enable; - amdgpu_atom_execute_table(adev->mode_info.atom_context, index, (uint32_t *)&args); + amdgpu_atom_execute_table(adev->mode_info.atom_context, index, (uint32_t *)&args, sizeof(args)); } union adjust_pixel_clock { @@ -395,7 +395,7 @@ static u32 amdgpu_atombios_crtc_adjust_pll(struct drm_crtc *crtc, ADJUST_DISPLAY_CONFIG_SS_ENABLE; amdgpu_atom_execute_table(adev->mode_info.atom_context, - index, (uint32_t *)&args); + index, (uint32_t *)&args, sizeof(args)); adjusted_clock = le16_to_cpu(args.v1.usPixelClock) * 10; break; case 3: @@ -428,7 +428,7 @@ static u32 amdgpu_atombios_crtc_adjust_pll(struct drm_crtc *crtc, args.v3.sInput.ucExtTransmitterID = 0; amdgpu_atom_execute_table(adev->mode_info.atom_context, - index, (uint32_t *)&args); + index, (uint32_t *)&args, sizeof(args)); adjusted_clock = le32_to_cpu(args.v3.sOutput.ulDispPllFreq) * 10; if (args.v3.sOutput.ucRefDiv) { amdgpu_crtc->pll_flags |= AMDGPU_PLL_USE_FRAC_FB_DIV; @@ -514,7 +514,7 @@ void amdgpu_atombios_crtc_set_disp_eng_pll(struct amdgpu_device *adev, DRM_ERROR("Unknown table version %d %d\n", frev, crev); return; } - amdgpu_atom_execute_table(adev->mode_info.atom_context, index, (uint32_t *)&args); + amdgpu_atom_execute_table(adev->mode_info.atom_context, index, (uint32_t *)&args, sizeof(args)); } union set_dce_clock { @@ -544,7 +544,7 @@ u32 amdgpu_atombios_crtc_set_dce_clock(struct amdgpu_device *adev, args.v2_1.asParam.ulDCEClkFreq = cpu_to_le32(freq); /* 10kHz units */ args.v2_1.asParam.ucDCEClkType = clk_type; args.v2_1.asParam.ucDCEClkSrc = clk_src; - amdgpu_atom_execute_table(adev->mode_info.atom_context, index, (uint32_t *)&args); + amdgpu_atom_execute_table(adev->mode_info.atom_context, index, (uint32_t *)&args, sizeof(args)); ret_freq = le32_to_cpu(args.v2_1.asParam.ulDCEClkFreq) * 10; break; default: @@ -740,7 +740,7 @@ void amdgpu_atombios_crtc_program_pll(struct drm_crtc *crtc, return; } - amdgpu_atom_execute_table(adev->mode_info.atom_context, index, (uint32_t *)&args); + amdgpu_atom_execute_table(adev->mode_info.atom_context, index, (uint32_t *)&args, sizeof(args)); } int amdgpu_atombios_crtc_prepare_pll(struct drm_crtc *crtc, diff --git a/drivers/gpu/drm/amd/amdgpu/atombios_dp.c b/drivers/gpu/drm/amd/amdgpu/atombios_dp.c index 87c41e0e9b7c..622634c08c7b 100644 --- a/drivers/gpu/drm/amd/amdgpu/atombios_dp.c +++ b/drivers/gpu/drm/amd/amdgpu/atombios_dp.c @@ -83,7 +83,7 @@ static int amdgpu_atombios_dp_process_aux_ch(struct amdgpu_i2c_chan *chan, args.v2.ucDelay = delay / 10; args.v2.ucHPD_ID = chan->rec.hpd; - amdgpu_atom_execute_table(adev->mode_info.atom_context, index, (uint32_t *)&args); + amdgpu_atom_execute_table(adev->mode_info.atom_context, index, (uint32_t *)&args, sizeof(args)); *ack = args.v2.ucReplyStatus; @@ -301,7 +301,7 @@ static u8 amdgpu_atombios_dp_encoder_service(struct amdgpu_device *adev, args.ucLaneNum = lane_num; args.ucStatus = 0; - amdgpu_atom_execute_table(adev->mode_info.atom_context, index, (uint32_t *)&args); + amdgpu_atom_execute_table(adev->mode_info.atom_context, index, (uint32_t *)&args, sizeof(args)); return args.ucStatus; } diff --git a/drivers/gpu/drm/amd/amdgpu/atombios_encoders.c b/drivers/gpu/drm/amd/amdgpu/atombios_encoders.c index 3ee219aa2891..25feab188dfe 100644 --- a/drivers/gpu/drm/amd/amdgpu/atombios_encoders.c +++ b/drivers/gpu/drm/amd/amdgpu/atombios_encoders.c @@ -28,6 +28,7 @@ #include <acpi/video.h> +#include <drm/drm_edid.h> #include <drm/amdgpu_drm.h> #include "amdgpu.h" #include "amdgpu_connectors.h" @@ -334,7 +335,7 @@ amdgpu_atombios_encoder_setup_dac(struct drm_encoder *encoder, int action) args.ucDacStandard = ATOM_DAC1_PS2; args.usPixelClock = cpu_to_le16(amdgpu_encoder->pixel_clock / 10); - amdgpu_atom_execute_table(adev->mode_info.atom_context, index, (uint32_t *)&args); + amdgpu_atom_execute_table(adev->mode_info.atom_context, index, (uint32_t *)&args, sizeof(args)); } @@ -431,7 +432,7 @@ amdgpu_atombios_encoder_setup_dvo(struct drm_encoder *encoder, int action) break; } - amdgpu_atom_execute_table(adev->mode_info.atom_context, index, (uint32_t *)&args); + amdgpu_atom_execute_table(adev->mode_info.atom_context, index, (uint32_t *)&args, sizeof(args)); } int amdgpu_atombios_encoder_get_encoder_mode(struct drm_encoder *encoder) @@ -731,7 +732,7 @@ amdgpu_atombios_encoder_setup_dig_encoder(struct drm_encoder *encoder, break; } - amdgpu_atom_execute_table(adev->mode_info.atom_context, index, (uint32_t *)&args); + amdgpu_atom_execute_table(adev->mode_info.atom_context, index, (uint32_t *)&args, sizeof(args)); } @@ -1135,7 +1136,7 @@ amdgpu_atombios_encoder_setup_dig_transmitter(struct drm_encoder *encoder, int a break; } - amdgpu_atom_execute_table(adev->mode_info.atom_context, index, (uint32_t *)&args); + amdgpu_atom_execute_table(adev->mode_info.atom_context, index, (uint32_t *)&args, sizeof(args)); } bool @@ -1163,7 +1164,7 @@ amdgpu_atombios_encoder_set_edp_panel_power(struct drm_connector *connector, args.v1.ucAction = action; - amdgpu_atom_execute_table(adev->mode_info.atom_context, index, (uint32_t *)&args); + amdgpu_atom_execute_table(adev->mode_info.atom_context, index, (uint32_t *)&args, sizeof(args)); /* wait for the panel to power up */ if (action == ATOM_TRANSMITTER_ACTION_POWER_ON) { @@ -1287,7 +1288,7 @@ amdgpu_atombios_encoder_setup_external_encoder(struct drm_encoder *encoder, DRM_ERROR("Unknown table version: %d, %d\n", frev, crev); return; } - amdgpu_atom_execute_table(adev->mode_info.atom_context, index, (uint32_t *)&args); + amdgpu_atom_execute_table(adev->mode_info.atom_context, index, (uint32_t *)&args, sizeof(args)); } static void @@ -1632,7 +1633,7 @@ amdgpu_atombios_encoder_set_crtc_source(struct drm_encoder *encoder) return; } - amdgpu_atom_execute_table(adev->mode_info.atom_context, index, (uint32_t *)&args); + amdgpu_atom_execute_table(adev->mode_info.atom_context, index, (uint32_t *)&args, sizeof(args)); } /* This only needs to be called once at startup */ @@ -1705,7 +1706,7 @@ amdgpu_atombios_encoder_dac_load_detect(struct drm_encoder *encoder, args.sDacload.ucMisc = DAC_LOAD_MISC_YPrPb; } - amdgpu_atom_execute_table(adev->mode_info.atom_context, index, (uint32_t *)&args); + amdgpu_atom_execute_table(adev->mode_info.atom_context, index, (uint32_t *)&args, sizeof(args)); return true; } else diff --git a/drivers/gpu/drm/amd/amdgpu/atombios_i2c.c b/drivers/gpu/drm/amd/amdgpu/atombios_i2c.c index af0335535f82..a6501114322f 100644 --- a/drivers/gpu/drm/amd/amdgpu/atombios_i2c.c +++ b/drivers/gpu/drm/amd/amdgpu/atombios_i2c.c @@ -86,7 +86,7 @@ static int amdgpu_atombios_i2c_process_i2c_ch(struct amdgpu_i2c_chan *chan, args.ucSlaveAddr = slave_addr << 1; args.ucLineNumber = chan->rec.i2c_id; - amdgpu_atom_execute_table(adev->mode_info.atom_context, index, (uint32_t *)&args); + amdgpu_atom_execute_table(adev->mode_info.atom_context, index, (uint32_t *)&args, sizeof(args)); /* error */ if (args.ucStatus != HW_ASSISTED_I2C_STATUS_SUCCESS) { @@ -172,5 +172,5 @@ void amdgpu_atombios_i2c_channel_trans(struct amdgpu_device *adev, u8 slave_addr args.ucSlaveAddr = slave_addr; args.ucLineNumber = line_number; - amdgpu_atom_execute_table(adev->mode_info.atom_context, index, (uint32_t *)&args); + amdgpu_atom_execute_table(adev->mode_info.atom_context, index, (uint32_t *)&args, sizeof(args)); } diff --git a/drivers/gpu/drm/amd/amdgpu/cik.c b/drivers/gpu/drm/amd/amdgpu/cik.c index e63abdf52b6c..fdbc26346b54 100644 --- a/drivers/gpu/drm/amd/amdgpu/cik.c +++ b/drivers/gpu/drm/amd/amdgpu/cik.c @@ -1375,14 +1375,14 @@ static int cik_asic_pci_config_reset(struct amdgpu_device *adev) return r; } -static bool cik_asic_supports_baco(struct amdgpu_device *adev) +static int cik_asic_supports_baco(struct amdgpu_device *adev) { switch (adev->asic_type) { case CHIP_BONAIRE: case CHIP_HAWAII: return amdgpu_dpm_is_baco_supported(adev); default: - return false; + return 0; } } @@ -1638,28 +1638,18 @@ static void cik_pcie_gen3_enable(struct amdgpu_device *adev) PCI_EXP_LNKCTL_HAWD); /* linkctl2 */ - pcie_capability_read_word(root, PCI_EXP_LNKCTL2, - &tmp16); - tmp16 &= ~(PCI_EXP_LNKCTL2_ENTER_COMP | - PCI_EXP_LNKCTL2_TX_MARGIN); - tmp16 |= (bridge_cfg2 & - (PCI_EXP_LNKCTL2_ENTER_COMP | - PCI_EXP_LNKCTL2_TX_MARGIN)); - pcie_capability_write_word(root, - PCI_EXP_LNKCTL2, - tmp16); - - pcie_capability_read_word(adev->pdev, - PCI_EXP_LNKCTL2, - &tmp16); - tmp16 &= ~(PCI_EXP_LNKCTL2_ENTER_COMP | - PCI_EXP_LNKCTL2_TX_MARGIN); - tmp16 |= (gpu_cfg2 & - (PCI_EXP_LNKCTL2_ENTER_COMP | - PCI_EXP_LNKCTL2_TX_MARGIN)); - pcie_capability_write_word(adev->pdev, - PCI_EXP_LNKCTL2, - tmp16); + pcie_capability_clear_and_set_word(root, PCI_EXP_LNKCTL2, + PCI_EXP_LNKCTL2_ENTER_COMP | + PCI_EXP_LNKCTL2_TX_MARGIN, + bridge_cfg2 & + (PCI_EXP_LNKCTL2_ENTER_COMP | + PCI_EXP_LNKCTL2_TX_MARGIN)); + pcie_capability_clear_and_set_word(adev->pdev, PCI_EXP_LNKCTL2, + PCI_EXP_LNKCTL2_ENTER_COMP | + PCI_EXP_LNKCTL2_TX_MARGIN, + gpu_cfg2 & + (PCI_EXP_LNKCTL2_ENTER_COMP | + PCI_EXP_LNKCTL2_TX_MARGIN)); tmp = RREG32_PCIE(ixPCIE_LC_CNTL4); tmp &= ~PCIE_LC_CNTL4__LC_SET_QUIESCE_MASK; @@ -1674,16 +1664,15 @@ static void cik_pcie_gen3_enable(struct amdgpu_device *adev) speed_cntl &= ~PCIE_LC_SPEED_CNTL__LC_FORCE_DIS_SW_SPEED_CHANGE_MASK; WREG32_PCIE(ixPCIE_LC_SPEED_CNTL, speed_cntl); - pcie_capability_read_word(adev->pdev, PCI_EXP_LNKCTL2, &tmp16); - tmp16 &= ~PCI_EXP_LNKCTL2_TLS; - + tmp16 = 0; if (adev->pm.pcie_gen_mask & CAIL_PCIE_LINK_SPEED_SUPPORT_GEN3) tmp16 |= PCI_EXP_LNKCTL2_TLS_8_0GT; /* gen3 */ else if (adev->pm.pcie_gen_mask & CAIL_PCIE_LINK_SPEED_SUPPORT_GEN2) tmp16 |= PCI_EXP_LNKCTL2_TLS_5_0GT; /* gen2 */ else tmp16 |= PCI_EXP_LNKCTL2_TLS_2_5GT; /* gen1 */ - pcie_capability_write_word(adev->pdev, PCI_EXP_LNKCTL2, tmp16); + pcie_capability_clear_and_set_word(adev->pdev, PCI_EXP_LNKCTL2, + PCI_EXP_LNKCTL2_TLS, tmp16); speed_cntl = RREG32_PCIE(ixPCIE_LC_SPEED_CNTL); speed_cntl |= PCIE_LC_SPEED_CNTL__LC_INITIATE_LINK_SPEED_CHANGE_MASK; @@ -1709,10 +1698,6 @@ static void cik_program_aspm(struct amdgpu_device *adev) if (pci_is_root_bus(adev->pdev->bus)) return; - /* XXX double check APUs */ - if (adev->flags & AMD_IS_APU) - return; - orig = data = RREG32_PCIE(ixPCIE_LC_N_FTS_CNTL); data &= ~PCIE_LC_N_FTS_CNTL__LC_XMIT_N_FTS_MASK; data |= (0x24 << PCIE_LC_N_FTS_CNTL__LC_XMIT_N_FTS__SHIFT) | diff --git a/drivers/gpu/drm/amd/amdgpu/cik_ih.c b/drivers/gpu/drm/amd/amdgpu/cik_ih.c index 6f7c031dd197..f24e34dc33d1 100644 --- a/drivers/gpu/drm/amd/amdgpu/cik_ih.c +++ b/drivers/gpu/drm/amd/amdgpu/cik_ih.c @@ -204,6 +204,12 @@ static u32 cik_ih_get_wptr(struct amdgpu_device *adev, tmp = RREG32(mmIH_RB_CNTL); tmp |= IH_RB_CNTL__WPTR_OVERFLOW_CLEAR_MASK; WREG32(mmIH_RB_CNTL, tmp); + + /* Unset the CLEAR_OVERFLOW bit immediately so new overflows + * can be detected. + */ + tmp &= ~IH_RB_CNTL__WPTR_OVERFLOW_CLEAR_MASK; + WREG32(mmIH_RB_CNTL, tmp); } return (wptr & ih->ptr_mask); } diff --git a/drivers/gpu/drm/amd/amdgpu/cik_sdma.c b/drivers/gpu/drm/amd/amdgpu/cik_sdma.c index ee5dce6f6043..a3fccc4c1f43 100644 --- a/drivers/gpu/drm/amd/amdgpu/cik_sdma.c +++ b/drivers/gpu/drm/amd/amdgpu/cik_sdma.c @@ -308,8 +308,6 @@ static void cik_sdma_gfx_stop(struct amdgpu_device *adev) u32 rb_cntl; int i; - amdgpu_sdma_unset_buffer_funcs_helper(adev); - for (i = 0; i < adev->sdma.num_instances; i++) { rb_cntl = RREG32(mmSDMA0_GFX_RB_CNTL + sdma_offsets[i]); rb_cntl &= ~SDMA0_GFX_RB_CNTL__RB_ENABLE_MASK; @@ -498,9 +496,6 @@ static int cik_sdma_gfx_resume(struct amdgpu_device *adev) r = amdgpu_ring_test_helper(ring); if (r) return r; - - if (adev->mman.buffer_funcs_ring == ring) - amdgpu_ttm_set_buffer_funcs_status(adev, true); } return 0; diff --git a/drivers/gpu/drm/amd/amdgpu/clearstate_gfx9.h b/drivers/gpu/drm/amd/amdgpu/clearstate_gfx9.h index 567a904804bc..9c85ca6358c1 100644 --- a/drivers/gpu/drm/amd/amdgpu/clearstate_gfx9.h +++ b/drivers/gpu/drm/amd/amdgpu/clearstate_gfx9.h @@ -21,8 +21,7 @@ * */ -static const unsigned int gfx9_SECT_CONTEXT_def_1[] = -{ +static const unsigned int gfx9_SECT_CONTEXT_def_1[] = { 0x00000000, // DB_RENDER_CONTROL 0x00000000, // DB_COUNT_CONTROL 0x00000000, // DB_DEPTH_VIEW @@ -236,8 +235,7 @@ static const unsigned int gfx9_SECT_CONTEXT_def_1[] = 0x00000000, // PA_SC_VPORT_ZMIN_15 0x3f800000, // PA_SC_VPORT_ZMAX_15 }; -static const unsigned int gfx9_SECT_CONTEXT_def_2[] = -{ +static const unsigned int gfx9_SECT_CONTEXT_def_2[] = { 0x00000000, // PA_SC_SCREEN_EXTENT_CONTROL 0x00000000, // PA_SC_TILE_STEERING_OVERRIDE 0x00000000, // CP_PERFMON_CNTX_CNTL @@ -521,15 +519,13 @@ static const unsigned int gfx9_SECT_CONTEXT_def_2[] = 0x00000000, // CB_MRT6_EPITCH 0x00000000, // CB_MRT7_EPITCH }; -static const unsigned int gfx9_SECT_CONTEXT_def_3[] = -{ +static const unsigned int gfx9_SECT_CONTEXT_def_3[] = { 0x00000000, // PA_CL_POINT_X_RAD 0x00000000, // PA_CL_POINT_Y_RAD 0x00000000, // PA_CL_POINT_SIZE 0x00000000, // PA_CL_POINT_CULL_RAD }; -static const unsigned int gfx9_SECT_CONTEXT_def_4[] = -{ +static const unsigned int gfx9_SECT_CONTEXT_def_4[] = { 0x00000000, // DB_DEPTH_CONTROL 0x00000000, // DB_EQAA 0x00000000, // CB_COLOR_CONTROL @@ -688,17 +684,14 @@ static const unsigned int gfx9_SECT_CONTEXT_def_4[] = 0x00000000, // VGT_GS_OUT_PRIM_TYPE 0x00000000, // IA_ENHANCE }; -static const unsigned int gfx9_SECT_CONTEXT_def_5[] = -{ +static const unsigned int gfx9_SECT_CONTEXT_def_5[] = { 0x00000000, // WD_ENHANCE 0x00000000, // VGT_PRIMITIVEID_EN }; -static const unsigned int gfx9_SECT_CONTEXT_def_6[] = -{ +static const unsigned int gfx9_SECT_CONTEXT_def_6[] = { 0x00000000, // VGT_PRIMITIVEID_RESET }; -static const unsigned int gfx9_SECT_CONTEXT_def_7[] = -{ +static const unsigned int gfx9_SECT_CONTEXT_def_7[] = { 0x00000000, // VGT_GS_MAX_PRIMS_PER_SUBGROUP 0x00000000, // VGT_DRAW_PAYLOAD_CNTL 0, // HOLE @@ -766,8 +759,7 @@ static const unsigned int gfx9_SECT_CONTEXT_def_7[] = 0x00000000, // VGT_STRMOUT_CONFIG 0x00000000, // VGT_STRMOUT_BUFFER_CONFIG }; -static const unsigned int gfx9_SECT_CONTEXT_def_8[] = -{ +static const unsigned int gfx9_SECT_CONTEXT_def_8[] = { 0x00000000, // PA_SC_CENTROID_PRIORITY_0 0x00000000, // PA_SC_CENTROID_PRIORITY_1 0x00001000, // PA_SC_LINE_CNTL @@ -924,8 +916,7 @@ static const unsigned int gfx9_SECT_CONTEXT_def_8[] = 0x00000000, // CB_COLOR7_DCC_BASE 0x00000000, // CB_COLOR7_DCC_BASE_EXT }; -static const struct cs_extent_def gfx9_SECT_CONTEXT_defs[] = -{ +static const struct cs_extent_def gfx9_SECT_CONTEXT_defs[] = { {gfx9_SECT_CONTEXT_def_1, 0x0000a000, 212 }, {gfx9_SECT_CONTEXT_def_2, 0x0000a0d6, 282 }, {gfx9_SECT_CONTEXT_def_3, 0x0000a1f5, 4 }, diff --git a/drivers/gpu/drm/amd/amdgpu/clearstate_si.h b/drivers/gpu/drm/amd/amdgpu/clearstate_si.h index 66e39cdb5cb0..5fd96ddd7f0f 100644 --- a/drivers/gpu/drm/amd/amdgpu/clearstate_si.h +++ b/drivers/gpu/drm/amd/amdgpu/clearstate_si.h @@ -21,8 +21,7 @@ * */ -static const u32 si_SECT_CONTEXT_def_1[] = -{ +static const u32 si_SECT_CONTEXT_def_1[] = { 0x00000000, // DB_RENDER_CONTROL 0x00000000, // DB_COUNT_CONTROL 0x00000000, // DB_DEPTH_VIEW @@ -236,8 +235,7 @@ static const u32 si_SECT_CONTEXT_def_1[] = 0x00000000, // PA_SC_VPORT_ZMIN_15 0x3f800000, // PA_SC_VPORT_ZMAX_15 }; -static const u32 si_SECT_CONTEXT_def_2[] = -{ +static const u32 si_SECT_CONTEXT_def_2[] = { 0x00000000, // CP_PERFMON_CNTX_CNTL 0x00000000, // CP_RINGID 0x00000000, // CP_VMID @@ -511,8 +509,7 @@ static const u32 si_SECT_CONTEXT_def_2[] = 0x00000000, // CB_BLEND6_CONTROL 0x00000000, // CB_BLEND7_CONTROL }; -static const u32 si_SECT_CONTEXT_def_3[] = -{ +static const u32 si_SECT_CONTEXT_def_3[] = { 0x00000000, // PA_CL_POINT_X_RAD 0x00000000, // PA_CL_POINT_Y_RAD 0x00000000, // PA_CL_POINT_SIZE @@ -520,8 +517,7 @@ static const u32 si_SECT_CONTEXT_def_3[] = 0x00000000, // VGT_DMA_BASE_HI 0x00000000, // VGT_DMA_BASE }; -static const u32 si_SECT_CONTEXT_def_4[] = -{ +static const u32 si_SECT_CONTEXT_def_4[] = { 0x00000000, // DB_DEPTH_CONTROL 0x00000000, // DB_EQAA 0x00000000, // CB_COLOR_CONTROL @@ -680,16 +676,13 @@ static const u32 si_SECT_CONTEXT_def_4[] = 0x00000000, // VGT_GS_OUT_PRIM_TYPE 0x00000000, // IA_ENHANCE }; -static const u32 si_SECT_CONTEXT_def_5[] = -{ +static const u32 si_SECT_CONTEXT_def_5[] = { 0x00000000, // VGT_PRIMITIVEID_EN }; -static const u32 si_SECT_CONTEXT_def_6[] = -{ +static const u32 si_SECT_CONTEXT_def_6[] = { 0x00000000, // VGT_PRIMITIVEID_RESET }; -static const u32 si_SECT_CONTEXT_def_7[] = -{ +static const u32 si_SECT_CONTEXT_def_7[] = { 0x00000000, // VGT_MULTI_PRIM_IB_RESET_EN 0, // HOLE 0, // HOLE @@ -924,8 +917,7 @@ static const u32 si_SECT_CONTEXT_def_7[] = 0x00000000, // CB_COLOR7_CLEAR_WORD0 0x00000000, // CB_COLOR7_CLEAR_WORD1 }; -static const struct cs_extent_def si_SECT_CONTEXT_defs[] = -{ +static const struct cs_extent_def si_SECT_CONTEXT_defs[] = { {si_SECT_CONTEXT_def_1, 0x0000a000, 212 }, {si_SECT_CONTEXT_def_2, 0x0000a0d8, 272 }, {si_SECT_CONTEXT_def_3, 0x0000a1f5, 6 }, diff --git a/drivers/gpu/drm/amd/amdgpu/cz_ih.c b/drivers/gpu/drm/amd/amdgpu/cz_ih.c index b8c47e0cf37a..c19681492efa 100644 --- a/drivers/gpu/drm/amd/amdgpu/cz_ih.c +++ b/drivers/gpu/drm/amd/amdgpu/cz_ih.c @@ -216,6 +216,11 @@ static u32 cz_ih_get_wptr(struct amdgpu_device *adev, tmp = REG_SET_FIELD(tmp, IH_RB_CNTL, WPTR_OVERFLOW_CLEAR, 1); WREG32(mmIH_RB_CNTL, tmp); + /* Unset the CLEAR_OVERFLOW bit immediately so new overflows + * can be detected. + */ + tmp = REG_SET_FIELD(tmp, IH_RB_CNTL, WPTR_OVERFLOW_CLEAR, 0); + WREG32(mmIH_RB_CNTL, tmp); out: return (wptr & ih->ptr_mask); diff --git a/drivers/gpu/drm/amd/amdgpu/dce_v10_0.c b/drivers/gpu/drm/amd/amdgpu/dce_v10_0.c index bb666cb7522e..221af054d874 100644 --- a/drivers/gpu/drm/amd/amdgpu/dce_v10_0.c +++ b/drivers/gpu/drm/amd/amdgpu/dce_v10_0.c @@ -21,6 +21,7 @@ * */ +#include <drm/drm_edid.h> #include <drm/drm_fourcc.h> #include <drm/drm_modeset_helper.h> #include <drm/drm_modeset_helper_vtables.h> @@ -51,6 +52,7 @@ static void dce_v10_0_set_display_funcs(struct amdgpu_device *adev); static void dce_v10_0_set_irq_funcs(struct amdgpu_device *adev); +static void dce_v10_0_hpd_int_ack(struct amdgpu_device *adev, int hpd); static const u32 crtc_offsets[] = { CRTC0_REGISTER_OFFSET, @@ -363,6 +365,7 @@ static void dce_v10_0_hpd_init(struct amdgpu_device *adev) AMDGPU_HPD_DISCONNECT_INT_DELAY_IN_MS); WREG32(mmDC_HPD_TOGGLE_FILT_CNTL + hpd_offsets[amdgpu_connector->hpd.hpd], tmp); + dce_v10_0_hpd_int_ack(adev, amdgpu_connector->hpd.hpd); dce_v10_0_hpd_set_polarity(adev, amdgpu_connector->hpd.hpd); amdgpu_irq_get(adev, &adev->hpd_irq, amdgpu_connector->hpd.hpd); diff --git a/drivers/gpu/drm/amd/amdgpu/dce_v11_0.c b/drivers/gpu/drm/amd/amdgpu/dce_v11_0.c index 7af277f61cca..69e8b0db6cf7 100644 --- a/drivers/gpu/drm/amd/amdgpu/dce_v11_0.c +++ b/drivers/gpu/drm/amd/amdgpu/dce_v11_0.c @@ -21,6 +21,7 @@ * */ +#include <drm/drm_edid.h> #include <drm/drm_fourcc.h> #include <drm/drm_modeset_helper.h> #include <drm/drm_modeset_helper_vtables.h> @@ -51,6 +52,7 @@ static void dce_v11_0_set_display_funcs(struct amdgpu_device *adev); static void dce_v11_0_set_irq_funcs(struct amdgpu_device *adev); +static void dce_v11_0_hpd_int_ack(struct amdgpu_device *adev, int hpd); static const u32 crtc_offsets[] = { @@ -387,6 +389,7 @@ static void dce_v11_0_hpd_init(struct amdgpu_device *adev) AMDGPU_HPD_DISCONNECT_INT_DELAY_IN_MS); WREG32(mmDC_HPD_TOGGLE_FILT_CNTL + hpd_offsets[amdgpu_connector->hpd.hpd], tmp); + dce_v11_0_hpd_int_ack(adev, amdgpu_connector->hpd.hpd); dce_v11_0_hpd_set_polarity(adev, amdgpu_connector->hpd.hpd); amdgpu_irq_get(adev, &adev->hpd_irq, amdgpu_connector->hpd.hpd); } diff --git a/drivers/gpu/drm/amd/amdgpu/dce_v6_0.c b/drivers/gpu/drm/amd/amdgpu/dce_v6_0.c index 143efc37a17f..60d40201fdd1 100644 --- a/drivers/gpu/drm/amd/amdgpu/dce_v6_0.c +++ b/drivers/gpu/drm/amd/amdgpu/dce_v6_0.c @@ -23,6 +23,7 @@ #include <linux/pci.h> +#include <drm/drm_edid.h> #include <drm/drm_fourcc.h> #include <drm/drm_modeset_helper.h> #include <drm/drm_modeset_helper_vtables.h> @@ -272,6 +273,21 @@ static void dce_v6_0_hpd_set_polarity(struct amdgpu_device *adev, WREG32(mmDC_HPD1_INT_CONTROL + hpd_offsets[hpd], tmp); } +static void dce_v6_0_hpd_int_ack(struct amdgpu_device *adev, + int hpd) +{ + u32 tmp; + + if (hpd >= adev->mode_info.num_hpd) { + DRM_DEBUG("invalid hdp %d\n", hpd); + return; + } + + tmp = RREG32(mmDC_HPD1_INT_CONTROL + hpd_offsets[hpd]); + tmp |= DC_HPD1_INT_CONTROL__DC_HPD1_INT_ACK_MASK; + WREG32(mmDC_HPD1_INT_CONTROL + hpd_offsets[hpd], tmp); +} + /** * dce_v6_0_hpd_init - hpd setup callback. * @@ -311,6 +327,7 @@ static void dce_v6_0_hpd_init(struct amdgpu_device *adev) continue; } + dce_v6_0_hpd_int_ack(adev, amdgpu_connector->hpd.hpd); dce_v6_0_hpd_set_polarity(adev, amdgpu_connector->hpd.hpd); amdgpu_irq_get(adev, &adev->hpd_irq, amdgpu_connector->hpd.hpd); } @@ -3088,7 +3105,7 @@ static int dce_v6_0_hpd_irq(struct amdgpu_device *adev, struct amdgpu_irq_src *source, struct amdgpu_iv_entry *entry) { - uint32_t disp_int, mask, tmp; + uint32_t disp_int, mask; unsigned hpd; if (entry->src_data[0] >= adev->mode_info.num_hpd) { @@ -3101,9 +3118,7 @@ static int dce_v6_0_hpd_irq(struct amdgpu_device *adev, mask = interrupt_status_offsets[hpd].hpd; if (disp_int & mask) { - tmp = RREG32(mmDC_HPD1_INT_CONTROL + hpd_offsets[hpd]); - tmp |= DC_HPD1_INT_CONTROL__DC_HPD1_INT_ACK_MASK; - WREG32(mmDC_HPD1_INT_CONTROL + hpd_offsets[hpd], tmp); + dce_v6_0_hpd_int_ack(adev, hpd); schedule_delayed_work(&adev->hotplug_work, 0); DRM_DEBUG("IH: HPD%d\n", hpd + 1); } diff --git a/drivers/gpu/drm/amd/amdgpu/dce_v8_0.c b/drivers/gpu/drm/amd/amdgpu/dce_v8_0.c index adeddfb7ff12..5a5fcc45e452 100644 --- a/drivers/gpu/drm/amd/amdgpu/dce_v8_0.c +++ b/drivers/gpu/drm/amd/amdgpu/dce_v8_0.c @@ -21,6 +21,7 @@ * */ +#include <drm/drm_edid.h> #include <drm/drm_fourcc.h> #include <drm/drm_modeset_helper.h> #include <drm/drm_modeset_helper_vtables.h> @@ -264,6 +265,21 @@ static void dce_v8_0_hpd_set_polarity(struct amdgpu_device *adev, WREG32(mmDC_HPD1_INT_CONTROL + hpd_offsets[hpd], tmp); } +static void dce_v8_0_hpd_int_ack(struct amdgpu_device *adev, + int hpd) +{ + u32 tmp; + + if (hpd >= adev->mode_info.num_hpd) { + DRM_DEBUG("invalid hdp %d\n", hpd); + return; + } + + tmp = RREG32(mmDC_HPD1_INT_CONTROL + hpd_offsets[hpd]); + tmp |= DC_HPD1_INT_CONTROL__DC_HPD1_INT_ACK_MASK; + WREG32(mmDC_HPD1_INT_CONTROL + hpd_offsets[hpd], tmp); +} + /** * dce_v8_0_hpd_init - hpd setup callback. * @@ -303,6 +319,7 @@ static void dce_v8_0_hpd_init(struct amdgpu_device *adev) continue; } + dce_v8_0_hpd_int_ack(adev, amdgpu_connector->hpd.hpd); dce_v8_0_hpd_set_polarity(adev, amdgpu_connector->hpd.hpd); amdgpu_irq_get(adev, &adev->hpd_irq, amdgpu_connector->hpd.hpd); } @@ -3176,7 +3193,7 @@ static int dce_v8_0_hpd_irq(struct amdgpu_device *adev, struct amdgpu_irq_src *source, struct amdgpu_iv_entry *entry) { - uint32_t disp_int, mask, tmp; + uint32_t disp_int, mask; unsigned hpd; if (entry->src_data[0] >= adev->mode_info.num_hpd) { @@ -3189,9 +3206,7 @@ static int dce_v8_0_hpd_irq(struct amdgpu_device *adev, mask = interrupt_status_offsets[hpd].hpd; if (disp_int & mask) { - tmp = RREG32(mmDC_HPD1_INT_CONTROL + hpd_offsets[hpd]); - tmp |= DC_HPD1_INT_CONTROL__DC_HPD1_INT_ACK_MASK; - WREG32(mmDC_HPD1_INT_CONTROL + hpd_offsets[hpd], tmp); + dce_v8_0_hpd_int_ack(adev, hpd); schedule_delayed_work(&adev->hotplug_work, 0); DRM_DEBUG("IH: HPD%d\n", hpd + 1); } diff --git a/drivers/gpu/drm/amd/amdgpu/df_v4_6_2.c b/drivers/gpu/drm/amd/amdgpu/df_v4_6_2.c new file mode 100644 index 000000000000..a47960a0babd --- /dev/null +++ b/drivers/gpu/drm/amd/amdgpu/df_v4_6_2.c @@ -0,0 +1,34 @@ +/* + * Copyright 2023 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + */ +#include "amdgpu.h" +#include "df_v4_6_2.h" + +static bool df_v4_6_2_query_ras_poison_mode(struct amdgpu_device *adev) +{ + /* return true since related regs are inaccessible */ + return true; +} + +const struct amdgpu_df_funcs df_v4_6_2_funcs = { + .query_ras_poison_mode = df_v4_6_2_query_ras_poison_mode, +}; diff --git a/drivers/gpu/drm/amd/amdgpu/df_v4_6_2.h b/drivers/gpu/drm/amd/amdgpu/df_v4_6_2.h new file mode 100644 index 000000000000..3bc3e6d216e2 --- /dev/null +++ b/drivers/gpu/drm/amd/amdgpu/df_v4_6_2.h @@ -0,0 +1,31 @@ +/* + * Copyright 2023 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + */ + +#ifndef __DF_V4_6_2_H__ +#define __DF_V4_6_2_H__ + +#include "soc15_common.h" + +extern const struct amdgpu_df_funcs df_v4_6_2_funcs; + +#endif diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c index d9ccacd06fba..591e8ed4b491 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c @@ -3498,6 +3498,8 @@ static void gfx_v10_0_ring_invalidate_tlbs(struct amdgpu_ring *ring, static void gfx_v10_0_update_spm_vmid_internal(struct amdgpu_device *adev, unsigned int vmid); +static int gfx_v10_0_set_powergating_state(void *handle, + enum amd_powergating_state state); static void gfx10_kiq_set_resources(struct amdgpu_ring *kiq_ring, uint64_t queue_mask) { amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_SET_RESOURCES, 6)); @@ -3655,6 +3657,9 @@ static void gfx_v10_0_init_spm_golden_registers(struct amdgpu_device *adev) static void gfx_v10_0_init_golden_registers(struct amdgpu_device *adev) { + if (amdgpu_sriov_vf(adev)) + return; + switch (amdgpu_ip_version(adev, GC_HWIP, 0)) { case IP_VERSION(10, 1, 10): soc15_program_register_sequence(adev, @@ -3959,7 +3964,7 @@ static void gfx_v10_0_check_gfxoff_flag(struct amdgpu_device *adev) static int gfx_v10_0_init_microcode(struct amdgpu_device *adev) { - char fw_name[40]; + char fw_name[53]; char ucode_prefix[30]; const char *wks = ""; int err; @@ -3994,16 +3999,13 @@ static int gfx_v10_0_init_microcode(struct amdgpu_device *adev) if (!amdgpu_sriov_vf(adev)) { snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_rlc.bin", ucode_prefix); - err = amdgpu_ucode_request(adev, &adev->gfx.rlc_fw, fw_name); - /* don't check this. There are apparently firmwares in the wild with - * incorrect size in the header - */ - if (err == -ENODEV) - goto out; + err = request_firmware(&adev->gfx.rlc_fw, fw_name, adev->dev); if (err) - dev_dbg(adev->dev, - "gfx10: amdgpu_ucode_request() failed \"%s\"\n", - fw_name); + goto out; + + /* don't validate this firmware. There are apparently firmwares + * in the wild with incorrect size in the header + */ rlc_hdr = (const struct rlc_firmware_header_v2_0 *)adev->gfx.rlc_fw->data; version_major = le16_to_cpu(rlc_hdr->header.header_version_major); version_minor = le16_to_cpu(rlc_hdr->header.header_version_minor); @@ -4028,8 +4030,6 @@ static int gfx_v10_0_init_microcode(struct amdgpu_device *adev) err = 0; adev->gfx.mec2_fw = NULL; } - amdgpu_gfx_cp_init_microcode(adev, AMDGPU_UCODE_ID_CP_MEC2); - amdgpu_gfx_cp_init_microcode(adev, AMDGPU_UCODE_ID_CP_MEC2_JT); gfx_v10_0_check_fw_write_wait(adev); out: @@ -4493,7 +4493,7 @@ static int gfx_v10_0_compute_ring_init(struct amdgpu_device *adev, int ring_id, static int gfx_v10_0_sw_init(void *handle) { int i, j, k, r, ring_id = 0; - struct amdgpu_kiq *kiq; + int xcc_id = 0; struct amdgpu_device *adev = (struct amdgpu_device *)handle; switch (amdgpu_ip_version(adev, GC_HWIP, 0)) { @@ -4518,7 +4518,7 @@ static int gfx_v10_0_sw_init(void *handle) case IP_VERSION(10, 3, 3): case IP_VERSION(10, 3, 7): adev->gfx.me.num_me = 1; - adev->gfx.me.num_pipe_per_me = 1; + adev->gfx.me.num_pipe_per_me = 2; adev->gfx.me.num_queue_per_pipe = 1; adev->gfx.mec.num_mec = 2; adev->gfx.mec.num_pipe_per_mec = 4; @@ -4622,8 +4622,7 @@ static int gfx_v10_0_sw_init(void *handle) return r; } - kiq = &adev->gfx.kiq[0]; - r = amdgpu_gfx_kiq_init_ring(adev, &kiq->ring, &kiq->irq, 0); + r = amdgpu_gfx_kiq_init_ring(adev, xcc_id); if (r) return r; } @@ -4986,7 +4985,8 @@ static void gfx_v10_0_constants_init(struct amdgpu_device *adev) u32 tmp; int i; - WREG32_FIELD15(GC, 0, GRBM_CNTL, READ_TIMEOUT, 0xff); + if (!amdgpu_sriov_vf(adev)) + WREG32_FIELD15(GC, 0, GRBM_CNTL, READ_TIMEOUT, 0xff); gfx_v10_0_setup_rb(adev); gfx_v10_0_get_cu_info(adev, &adev->gfx.cu_info); @@ -6465,11 +6465,18 @@ static int gfx_v10_0_gfx_init_queue(struct amdgpu_ring *ring) nv_grbm_select(adev, 0, 0, 0, 0); mutex_unlock(&adev->srbm_mutex); if (adev->gfx.me.mqd_backup[mqd_idx]) - memcpy(adev->gfx.me.mqd_backup[mqd_idx], mqd, sizeof(*mqd)); + memcpy_fromio(adev->gfx.me.mqd_backup[mqd_idx], mqd, sizeof(*mqd)); } else { + mutex_lock(&adev->srbm_mutex); + nv_grbm_select(adev, ring->me, ring->pipe, ring->queue, 0); + if (ring->doorbell_index == adev->doorbell_index.gfx_ring0 << 1) + gfx_v10_0_cp_gfx_set_doorbell(adev, ring); + + nv_grbm_select(adev, 0, 0, 0, 0); + mutex_unlock(&adev->srbm_mutex); /* restore mqd with the backup copy */ if (adev->gfx.me.mqd_backup[mqd_idx]) - memcpy(mqd, adev->gfx.me.mqd_backup[mqd_idx], sizeof(*mqd)); + memcpy_toio(mqd, adev->gfx.me.mqd_backup[mqd_idx], sizeof(*mqd)); /* reset the ring */ ring->wptr = 0; *ring->wptr_cpu_addr = 0; @@ -6583,8 +6590,9 @@ static int gfx_v10_0_compute_mqd_init(struct amdgpu_device *adev, void *m, #ifdef __BIG_ENDIAN tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, ENDIAN_SWAP, 1); #endif - tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, UNORD_DISPATCH, 0); - tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, TUNNEL_DISPATCH, 0); + tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, UNORD_DISPATCH, 1); + tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, TUNNEL_DISPATCH, + prop->allow_tunneling); tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, PRIV_STATE, 1); tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, KMD_QUEUE, 1); mqd->cp_hqd_pq_control = tmp; @@ -6743,7 +6751,7 @@ static int gfx_v10_0_kiq_init_queue(struct amdgpu_ring *ring) if (amdgpu_in_reset(adev)) { /* for GPU_RESET case */ /* reset MQD to a clean status */ if (adev->gfx.kiq[0].mqd_backup) - memcpy(mqd, adev->gfx.kiq[0].mqd_backup, sizeof(*mqd)); + memcpy_toio(mqd, adev->gfx.kiq[0].mqd_backup, sizeof(*mqd)); /* reset ring buffer */ ring->wptr = 0; @@ -6766,7 +6774,7 @@ static int gfx_v10_0_kiq_init_queue(struct amdgpu_ring *ring) mutex_unlock(&adev->srbm_mutex); if (adev->gfx.kiq[0].mqd_backup) - memcpy(adev->gfx.kiq[0].mqd_backup, mqd, sizeof(*mqd)); + memcpy_fromio(adev->gfx.kiq[0].mqd_backup, mqd, sizeof(*mqd)); } return 0; @@ -6787,11 +6795,11 @@ static int gfx_v10_0_kcq_init_queue(struct amdgpu_ring *ring) mutex_unlock(&adev->srbm_mutex); if (adev->gfx.mec.mqd_backup[mqd_idx]) - memcpy(adev->gfx.mec.mqd_backup[mqd_idx], mqd, sizeof(*mqd)); + memcpy_fromio(adev->gfx.mec.mqd_backup[mqd_idx], mqd, sizeof(*mqd)); } else { /* restore MQD to a clean status */ if (adev->gfx.mec.mqd_backup[mqd_idx]) - memcpy(mqd, adev->gfx.mec.mqd_backup[mqd_idx], sizeof(*mqd)); + memcpy_toio(mqd, adev->gfx.mec.mqd_backup[mqd_idx], sizeof(*mqd)); /* reset ring buffer */ ring->wptr = 0; atomic64_set((atomic64_t *)ring->wptr_cpu_addr, 0); @@ -7159,7 +7167,7 @@ static int gfx_v10_0_hw_init(void *handle) if (amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(10, 3, 0)) gfx_v10_3_program_pbb_mode(adev); - if (amdgpu_ip_version(adev, GC_HWIP, 0) >= IP_VERSION(10, 3, 0)) + if (amdgpu_ip_version(adev, GC_HWIP, 0) >= IP_VERSION(10, 3, 0) && !amdgpu_sriov_vf(adev)) gfx_v10_3_set_power_brake_sequence(adev); return r; @@ -7172,6 +7180,13 @@ static int gfx_v10_0_hw_fini(void *handle) amdgpu_irq_put(adev, &adev->gfx.priv_reg_irq, 0); amdgpu_irq_put(adev, &adev->gfx.priv_inst_irq, 0); + /* WA added for Vangogh asic fixing the SMU suspend failure + * It needs to set power gating again during gfxoff control + * otherwise the gfxoff disallowing will be failed to set. + */ + if (amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(10, 3, 1)) + gfx_v10_0_set_powergating_state(handle, AMD_PG_STATE_UNGATE); + if (!adev->no_hw_access) { if (amdgpu_async_gfx_ring) { if (amdgpu_gfx_disable_kgq(adev, 0)) @@ -7935,7 +7950,7 @@ static void gfx_v10_0_update_spm_vmid_internal(struct amdgpu_device *adev, WREG32_SOC15_NO_KIQ(GC, 0, mmRLC_SPM_MC_CNTL, data); } -static void gfx_v10_0_update_spm_vmid(struct amdgpu_device *adev, unsigned int vmid) +static void gfx_v10_0_update_spm_vmid(struct amdgpu_device *adev, struct amdgpu_ring *ring, unsigned int vmid) { amdgpu_gfx_off_ctrl(adev, false); @@ -8302,7 +8317,7 @@ static void gfx_v10_0_ring_emit_hdp_flush(struct amdgpu_ring *ring) } reg_mem_engine = 0; } else { - ref_and_mask = nbio_hf_reg->ref_and_mask_cp0; + ref_and_mask = nbio_hf_reg->ref_and_mask_cp0 << ring->pipe; reg_mem_engine = 1; /* pfp */ } @@ -8531,34 +8546,23 @@ static void gfx_v10_0_ring_emit_cntxcntl(struct amdgpu_ring *ring, amdgpu_ring_write(ring, 0); } -static unsigned int gfx_v10_0_ring_emit_init_cond_exec(struct amdgpu_ring *ring) +static unsigned int gfx_v10_0_ring_emit_init_cond_exec(struct amdgpu_ring *ring, + uint64_t addr) { unsigned int ret; amdgpu_ring_write(ring, PACKET3(PACKET3_COND_EXEC, 3)); - amdgpu_ring_write(ring, lower_32_bits(ring->cond_exe_gpu_addr)); - amdgpu_ring_write(ring, upper_32_bits(ring->cond_exe_gpu_addr)); - amdgpu_ring_write(ring, 0); /* discard following DWs if *cond_exec_gpu_addr==0 */ + amdgpu_ring_write(ring, lower_32_bits(addr)); + amdgpu_ring_write(ring, upper_32_bits(addr)); + /* discard following DWs if *cond_exec_gpu_addr==0 */ + amdgpu_ring_write(ring, 0); ret = ring->wptr & ring->buf_mask; - amdgpu_ring_write(ring, 0x55aa55aa); /* patch dummy value later */ + /* patch dummy value later */ + amdgpu_ring_write(ring, 0); return ret; } -static void gfx_v10_0_ring_emit_patch_cond_exec(struct amdgpu_ring *ring, unsigned int offset) -{ - unsigned int cur; - - BUG_ON(offset > ring->buf_mask); - BUG_ON(ring->ring[offset] != 0x55aa55aa); - - cur = (ring->wptr - 1) & ring->buf_mask; - if (likely(cur > offset)) - ring->ring[offset] = cur - offset; - else - ring->ring[offset] = (ring->buf_mask + 1) - offset + cur; -} - static int gfx_v10_0_ring_preempt_ib(struct amdgpu_ring *ring) { int i, r = 0; @@ -9182,7 +9186,7 @@ static const struct amdgpu_ring_funcs gfx_v10_0_ring_funcs_gfx = { 7 + /* PIPELINE_SYNC */ SOC15_FLUSH_GPU_TLB_NUM_WREG * 5 + SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 7 + - 2 + /* VM_FLUSH */ + 4 + /* VM_FLUSH */ 8 + /* FENCE for VM_FLUSH */ 20 + /* GDS switch */ 4 + /* double SWITCH_BUFFER, @@ -9213,7 +9217,6 @@ static const struct amdgpu_ring_funcs gfx_v10_0_ring_funcs_gfx = { .emit_switch_buffer = gfx_v10_0_ring_emit_sb, .emit_cntxcntl = gfx_v10_0_ring_emit_cntxcntl, .init_cond_exec = gfx_v10_0_ring_emit_init_cond_exec, - .patch_cond_exec = gfx_v10_0_ring_emit_patch_cond_exec, .preempt_ib = gfx_v10_0_ring_preempt_ib, .emit_frame_cntl = gfx_v10_0_ring_emit_frame_cntl, .emit_wreg = gfx_v10_0_ring_emit_wreg, @@ -9273,7 +9276,6 @@ static const struct amdgpu_ring_funcs gfx_v10_0_ring_funcs_kiq = { 7 + /* gfx_v10_0_ring_emit_pipeline_sync */ SOC15_FLUSH_GPU_TLB_NUM_WREG * 5 + SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 7 + - 2 + /* gfx_v10_0_ring_emit_vm_flush */ 8 + 8 + 8, /* gfx_v10_0_ring_emit_fence_kiq x3 for user fence, vm fence */ .emit_ib_size = 7, /* gfx_v10_0_ring_emit_ib_compute */ .emit_ib = gfx_v10_0_ring_emit_ib_compute, diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c index 27b224b0688a..3c01d89e7a1b 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c @@ -67,6 +67,7 @@ MODULE_FIRMWARE("amdgpu/gc_11_0_0_pfp.bin"); MODULE_FIRMWARE("amdgpu/gc_11_0_0_me.bin"); MODULE_FIRMWARE("amdgpu/gc_11_0_0_mec.bin"); MODULE_FIRMWARE("amdgpu/gc_11_0_0_rlc.bin"); +MODULE_FIRMWARE("amdgpu/gc_11_0_0_rlc_1.bin"); MODULE_FIRMWARE("amdgpu/gc_11_0_0_toc.bin"); MODULE_FIRMWARE("amdgpu/gc_11_0_1_pfp.bin"); MODULE_FIRMWARE("amdgpu/gc_11_0_1_me.bin"); @@ -88,6 +89,14 @@ MODULE_FIRMWARE("amdgpu/gc_11_5_0_pfp.bin"); MODULE_FIRMWARE("amdgpu/gc_11_5_0_me.bin"); MODULE_FIRMWARE("amdgpu/gc_11_5_0_mec.bin"); MODULE_FIRMWARE("amdgpu/gc_11_5_0_rlc.bin"); +MODULE_FIRMWARE("amdgpu/gc_11_5_1_pfp.bin"); +MODULE_FIRMWARE("amdgpu/gc_11_5_1_me.bin"); +MODULE_FIRMWARE("amdgpu/gc_11_5_1_mec.bin"); +MODULE_FIRMWARE("amdgpu/gc_11_5_1_rlc.bin"); + +static const struct soc15_reg_golden golden_settings_gc_11_0[] = { + SOC15_REG_GOLDEN_VALUE(GC, 0, regTCP_CNTL, 0x20000000, 0x20000000) +}; static const struct soc15_reg_golden golden_settings_gc_11_0_1[] = { @@ -102,23 +111,6 @@ static const struct soc15_reg_golden golden_settings_gc_11_0_1[] = SOC15_REG_GOLDEN_VALUE(GC, 0, regTCP_CNTL2, 0xfcffffff, 0x0000000a) }; -static const struct soc15_reg_golden golden_settings_gc_11_5_0[] = { - SOC15_REG_GOLDEN_VALUE(GC, 0, regDB_DEBUG5, 0xffffffff, 0x00000800), - SOC15_REG_GOLDEN_VALUE(GC, 0, regGB_ADDR_CONFIG, 0x0c1807ff, 0x00000242), - SOC15_REG_GOLDEN_VALUE(GC, 0, regGCR_GENERAL_CNTL, 0x1ff1ffff, 0x00000500), - SOC15_REG_GOLDEN_VALUE(GC, 0, regGL2A_ADDR_MATCH_MASK, 0xffffffff, 0xfffffff3), - SOC15_REG_GOLDEN_VALUE(GC, 0, regGL2C_ADDR_MATCH_MASK, 0xffffffff, 0xfffffff3), - SOC15_REG_GOLDEN_VALUE(GC, 0, regGL2C_CTRL, 0xffffffff, 0xf37fff3f), - SOC15_REG_GOLDEN_VALUE(GC, 0, regGL2C_CTRL3, 0xfffffffb, 0x00f40188), - SOC15_REG_GOLDEN_VALUE(GC, 0, regGL2C_CTRL4, 0xf0ffffff, 0x8000b007), - SOC15_REG_GOLDEN_VALUE(GC, 0, regPA_CL_ENHANCE, 0xf1ffffff, 0x00880007), - SOC15_REG_GOLDEN_VALUE(GC, 0, regPC_CONFIG_CNTL_1, 0xffffffff, 0x00010000), - SOC15_REG_GOLDEN_VALUE(GC, 0, regTA_CNTL_AUX, 0xf7f7ffff, 0x01030000), - SOC15_REG_GOLDEN_VALUE(GC, 0, regTA_CNTL2, 0x007f0000, 0x00000000), - SOC15_REG_GOLDEN_VALUE(GC, 0, regTCP_CNTL2, 0xffcfffff, 0x0000200a), - SOC15_REG_GOLDEN_VALUE(GC, 0, regUTCL1_CTRL_2, 0xffffffff, 0x0000048f) -}; - #define DEFAULT_SH_MEM_CONFIG \ ((SH_MEM_ADDRESS_MODE_64 << SH_MEM_CONFIG__ADDRESS_MODE__SHIFT) | \ (SH_MEM_ALIGNMENT_MODE_UNALIGNED << SH_MEM_CONFIG__ALIGNMENT_MODE__SHIFT) | \ @@ -155,6 +147,7 @@ static void gfx11_kiq_set_resources(struct amdgpu_ring *kiq_ring, uint64_t queue { amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_SET_RESOURCES, 6)); amdgpu_ring_write(kiq_ring, PACKET3_SET_RESOURCES_VMID_MASK(0) | + PACKET3_SET_RESOURCES_UNMAP_LATENTY(0xa) | /* unmap_latency: 0xa (~ 1s) */ PACKET3_SET_RESOURCES_QUEUE_TYPE(0)); /* vmid_mask:0 queue_type:0 (KIQ) */ amdgpu_ring_write(kiq_ring, lower_32_bits(queue_mask)); /* queue mask lo */ amdgpu_ring_write(kiq_ring, upper_32_bits(queue_mask)); /* queue mask hi */ @@ -288,6 +281,9 @@ static void gfx_v11_0_set_kiq_pm4_funcs(struct amdgpu_device *adev) static void gfx_v11_0_init_golden_registers(struct amdgpu_device *adev) { + if (amdgpu_sriov_vf(adev)) + return; + switch (amdgpu_ip_version(adev, GC_HWIP, 0)) { case IP_VERSION(11, 0, 1): case IP_VERSION(11, 0, 4): @@ -295,14 +291,13 @@ static void gfx_v11_0_init_golden_registers(struct amdgpu_device *adev) golden_settings_gc_11_0_1, (const u32)ARRAY_SIZE(golden_settings_gc_11_0_1)); break; - case IP_VERSION(11, 5, 0): - soc15_program_register_sequence(adev, - golden_settings_gc_11_5_0, - (const u32)ARRAY_SIZE(golden_settings_gc_11_5_0)); - break; default: break; } + soc15_program_register_sequence(adev, + golden_settings_gc_11_0, + (const u32)ARRAY_SIZE(golden_settings_gc_11_0)); + } static void gfx_v11_0_write_data_to_reg(struct amdgpu_ring *ring, int eng_sel, @@ -418,7 +413,7 @@ static int gfx_v11_0_ring_test_ib(struct amdgpu_ring *ring, long timeout) adev->wb.wb[index] = cpu_to_le32(0xCAFEDEAD); cpu_ptr = &adev->wb.wb[index]; - r = amdgpu_ib_get(adev, NULL, 16, AMDGPU_IB_POOL_DIRECT, &ib); + r = amdgpu_ib_get(adev, NULL, 20, AMDGPU_IB_POOL_DIRECT, &ib); if (r) { DRM_ERROR("amdgpu: failed to get ib (%ld).\n", r); goto err1; @@ -515,7 +510,7 @@ static void gfx_v11_0_check_fw_cp_gfx_shadow(struct amdgpu_device *adev) static int gfx_v11_0_init_microcode(struct amdgpu_device *adev) { char fw_name[40]; - char ucode_prefix[30]; + char ucode_prefix[25]; int err; const struct rlc_firmware_header_v2_0 *rlc_hdr; uint16_t version_major; @@ -555,7 +550,11 @@ static int gfx_v11_0_init_microcode(struct amdgpu_device *adev) } if (!amdgpu_sriov_vf(adev)) { - snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_rlc.bin", ucode_prefix); + if (amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(11, 0, 0) && + adev->pdev->revision == 0xCE) + snprintf(fw_name, sizeof(fw_name), "amdgpu/gc_11_0_0_rlc_1.bin"); + else + snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_rlc.bin", ucode_prefix); err = amdgpu_ucode_request(adev, &adev->gfx.rlc_fw, fw_name); if (err) goto out; @@ -589,6 +588,14 @@ static int gfx_v11_0_init_microcode(struct amdgpu_device *adev) adev->gfx.mec2_fw = NULL; gfx_v11_0_check_fw_cp_gfx_shadow(adev); + + if (adev->gfx.imu.funcs && adev->gfx.imu.funcs->init_microcode) { + err = adev->gfx.imu.funcs->init_microcode(adev); + if (err) + DRM_ERROR("Failed to init imu firmware!\n"); + return err; + } + out: if (err) { amdgpu_ucode_release(&adev->gfx.pfp_fw); @@ -724,7 +731,7 @@ static int gfx_v11_0_rlc_init(struct amdgpu_device *adev) /* init spm vmid with 0xf */ if (adev->gfx.rlc.funcs->update_spm_vmid) - adev->gfx.rlc.funcs->update_spm_vmid(adev, 0xf); + adev->gfx.rlc.funcs->update_spm_vmid(adev, NULL, 0xf); return 0; } @@ -904,6 +911,7 @@ static int gfx_v11_0_gpu_early_init(struct amdgpu_device *adev) case IP_VERSION(11, 0, 1): case IP_VERSION(11, 0, 4): case IP_VERSION(11, 5, 0): + case IP_VERSION(11, 5, 1): adev->gfx.config.max_hw_contexts = 8; adev->gfx.config.sc_prim_fifo_size_frontend = 0x20; adev->gfx.config.sc_prim_fifo_size_backend = 0x100; @@ -1326,7 +1334,7 @@ static int gfx_v11_0_rlc_backdoor_autoload_enable(struct amdgpu_device *adev) static int gfx_v11_0_sw_init(void *handle) { int i, j, k, r, ring_id = 0; - struct amdgpu_kiq *kiq; + int xcc_id = 0; struct amdgpu_device *adev = (struct amdgpu_device *)handle; switch (amdgpu_ip_version(adev, GC_HWIP, 0)) { @@ -1343,6 +1351,7 @@ static int gfx_v11_0_sw_init(void *handle) case IP_VERSION(11, 0, 1): case IP_VERSION(11, 0, 4): case IP_VERSION(11, 5, 0): + case IP_VERSION(11, 5, 1): adev->gfx.me.num_me = 1; adev->gfx.me.num_pipe_per_me = 1; adev->gfx.me.num_queue_per_pipe = 1; @@ -1395,14 +1404,6 @@ static int gfx_v11_0_sw_init(void *handle) adev->gfx.gfx_current_status = AMDGPU_GFX_NORMAL_MODE; - if (adev->gfx.imu.funcs) { - if (adev->gfx.imu.funcs->init_microcode) { - r = adev->gfx.imu.funcs->init_microcode(adev); - if (r) - DRM_ERROR("Failed to load imu firmware!\n"); - } - } - gfx_v11_0_me_init(adev); r = gfx_v11_0_rlc_init(adev); @@ -1459,8 +1460,7 @@ static int gfx_v11_0_sw_init(void *handle) return r; } - kiq = &adev->gfx.kiq[0]; - r = amdgpu_gfx_kiq_init_ring(adev, &kiq->ring, &kiq->irq, 0); + r = amdgpu_gfx_kiq_init_ring(adev, xcc_id); if (r) return r; } @@ -1635,7 +1635,7 @@ static void gfx_v11_0_setup_rb(struct amdgpu_device *adev) active_rb_bitmap |= (0x3 << (i * rb_bitmap_width_per_sa)); } - active_rb_bitmap |= global_active_rb_bitmap; + active_rb_bitmap &= global_active_rb_bitmap; adev->gfx.config.backend_enable_mask = active_rb_bitmap; adev->gfx.config.num_rbs = hweight32(active_rb_bitmap); } @@ -2593,7 +2593,8 @@ static int gfx_v11_0_wait_for_rlc_autoload_complete(struct amdgpu_device *adev) IP_VERSION(11, 0, 1) || amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(11, 0, 4) || - amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(11, 5, 0)) + amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(11, 5, 0) || + amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(11, 5, 1)) bootload_status = RREG32_SOC15(GC, 0, regRLC_RLCS_BOOTLOAD_STATUS_gc_11_0_1); else @@ -3714,11 +3715,11 @@ static int gfx_v11_0_gfx_init_queue(struct amdgpu_ring *ring) soc21_grbm_select(adev, 0, 0, 0, 0); mutex_unlock(&adev->srbm_mutex); if (adev->gfx.me.mqd_backup[mqd_idx]) - memcpy(adev->gfx.me.mqd_backup[mqd_idx], mqd, sizeof(*mqd)); + memcpy_fromio(adev->gfx.me.mqd_backup[mqd_idx], mqd, sizeof(*mqd)); } else { /* restore mqd with the backup copy */ if (adev->gfx.me.mqd_backup[mqd_idx]) - memcpy(mqd, adev->gfx.me.mqd_backup[mqd_idx], sizeof(*mqd)); + memcpy_toio(mqd, adev->gfx.me.mqd_backup[mqd_idx], sizeof(*mqd)); /* reset the ring */ ring->wptr = 0; *ring->wptr_cpu_addr = 0; @@ -3829,8 +3830,9 @@ static int gfx_v11_0_compute_mqd_init(struct amdgpu_device *adev, void *m, (order_base_2(prop->queue_size / 4) - 1)); tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, RPTR_BLOCK_SIZE, (order_base_2(AMDGPU_GPU_PAGE_SIZE / 4) - 1)); - tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, UNORD_DISPATCH, 0); - tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, TUNNEL_DISPATCH, 0); + tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, UNORD_DISPATCH, 1); + tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, TUNNEL_DISPATCH, + prop->allow_tunneling); tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, PRIV_STATE, 1); tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, KMD_QUEUE, 1); mqd->cp_hqd_pq_control = tmp; @@ -4007,7 +4009,7 @@ static int gfx_v11_0_kiq_init_queue(struct amdgpu_ring *ring) if (amdgpu_in_reset(adev)) { /* for GPU_RESET case */ /* reset MQD to a clean status */ if (adev->gfx.kiq[0].mqd_backup) - memcpy(mqd, adev->gfx.kiq[0].mqd_backup, sizeof(*mqd)); + memcpy_toio(mqd, adev->gfx.kiq[0].mqd_backup, sizeof(*mqd)); /* reset ring buffer */ ring->wptr = 0; @@ -4030,7 +4032,7 @@ static int gfx_v11_0_kiq_init_queue(struct amdgpu_ring *ring) mutex_unlock(&adev->srbm_mutex); if (adev->gfx.kiq[0].mqd_backup) - memcpy(adev->gfx.kiq[0].mqd_backup, mqd, sizeof(*mqd)); + memcpy_fromio(adev->gfx.kiq[0].mqd_backup, mqd, sizeof(*mqd)); } return 0; @@ -4051,11 +4053,11 @@ static int gfx_v11_0_kcq_init_queue(struct amdgpu_ring *ring) mutex_unlock(&adev->srbm_mutex); if (adev->gfx.mec.mqd_backup[mqd_idx]) - memcpy(adev->gfx.mec.mqd_backup[mqd_idx], mqd, sizeof(*mqd)); + memcpy_fromio(adev->gfx.mec.mqd_backup[mqd_idx], mqd, sizeof(*mqd)); } else { /* restore MQD to a clean status */ if (adev->gfx.mec.mqd_backup[mqd_idx]) - memcpy(mqd, adev->gfx.mec.mqd_backup[mqd_idx], sizeof(*mqd)); + memcpy_toio(mqd, adev->gfx.mec.mqd_backup[mqd_idx], sizeof(*mqd)); /* reset ring buffer */ ring->wptr = 0; atomic64_set((atomic64_t *)ring->wptr_cpu_addr, 0); @@ -4373,6 +4375,10 @@ static int gfx_v11_0_hw_init(void *handle) if (r) return r; + /* get IMU version from HW if it's not set */ + if (!adev->gfx.imu_fw_version) + adev->gfx.imu_fw_version = RREG32_SOC15(GC, 0, regGFX_IMU_SCRATCH_0); + return r; } @@ -4452,11 +4458,43 @@ static int gfx_v11_0_wait_for_idle(void *handle) return -ETIMEDOUT; } +static int gfx_v11_0_request_gfx_index_mutex(struct amdgpu_device *adev, + int req) +{ + u32 i, tmp, val; + + for (i = 0; i < adev->usec_timeout; i++) { + /* Request with MeId=2, PipeId=0 */ + tmp = REG_SET_FIELD(0, CP_GFX_INDEX_MUTEX, REQUEST, req); + tmp = REG_SET_FIELD(tmp, CP_GFX_INDEX_MUTEX, CLIENTID, 4); + WREG32_SOC15(GC, 0, regCP_GFX_INDEX_MUTEX, tmp); + + val = RREG32_SOC15(GC, 0, regCP_GFX_INDEX_MUTEX); + if (req) { + if (val == tmp) + break; + } else { + tmp = REG_SET_FIELD(tmp, CP_GFX_INDEX_MUTEX, + REQUEST, 1); + + /* unlocked or locked by firmware */ + if (val != tmp) + break; + } + udelay(1); + } + + if (i >= adev->usec_timeout) + return -EINVAL; + + return 0; +} + static int gfx_v11_0_soft_reset(void *handle) { u32 grbm_soft_reset = 0; u32 tmp; - int i, j, k; + int r, i, j, k; struct amdgpu_device *adev = (struct amdgpu_device *)handle; tmp = RREG32_SOC15(GC, 0, regCP_INT_CNTL); @@ -4468,14 +4506,11 @@ static int gfx_v11_0_soft_reset(void *handle) gfx_v11_0_set_safe_mode(adev, 0); + mutex_lock(&adev->srbm_mutex); for (i = 0; i < adev->gfx.mec.num_mec; ++i) { for (j = 0; j < adev->gfx.mec.num_queue_per_pipe; j++) { for (k = 0; k < adev->gfx.mec.num_pipe_per_mec; k++) { - tmp = RREG32_SOC15(GC, 0, regGRBM_GFX_CNTL); - tmp = REG_SET_FIELD(tmp, GRBM_GFX_CNTL, MEID, i); - tmp = REG_SET_FIELD(tmp, GRBM_GFX_CNTL, QUEUEID, j); - tmp = REG_SET_FIELD(tmp, GRBM_GFX_CNTL, PIPEID, k); - WREG32_SOC15(GC, 0, regGRBM_GFX_CNTL, tmp); + soc21_grbm_select(adev, i, k, j, 0); WREG32_SOC15(GC, 0, regCP_HQD_DEQUEUE_REQUEST, 0x2); WREG32_SOC15(GC, 0, regSPI_COMPUTE_QUEUE_RESET, 0x1); @@ -4485,16 +4520,21 @@ static int gfx_v11_0_soft_reset(void *handle) for (i = 0; i < adev->gfx.me.num_me; ++i) { for (j = 0; j < adev->gfx.me.num_queue_per_pipe; j++) { for (k = 0; k < adev->gfx.me.num_pipe_per_me; k++) { - tmp = RREG32_SOC15(GC, 0, regGRBM_GFX_CNTL); - tmp = REG_SET_FIELD(tmp, GRBM_GFX_CNTL, MEID, i); - tmp = REG_SET_FIELD(tmp, GRBM_GFX_CNTL, QUEUEID, j); - tmp = REG_SET_FIELD(tmp, GRBM_GFX_CNTL, PIPEID, k); - WREG32_SOC15(GC, 0, regGRBM_GFX_CNTL, tmp); + soc21_grbm_select(adev, i, k, j, 0); WREG32_SOC15(GC, 0, regCP_GFX_HQD_DEQUEUE_REQUEST, 0x1); } } } + soc21_grbm_select(adev, 0, 0, 0, 0); + mutex_unlock(&adev->srbm_mutex); + + /* Try to acquire the gfx mutex before access to CP_VMID_RESET */ + r = gfx_v11_0_request_gfx_index_mutex(adev, 1); + if (r) { + DRM_ERROR("Failed to acquire the gfx mutex during soft reset\n"); + return r; + } WREG32_SOC15(GC, 0, regCP_VMID_RESET, 0xfffffffe); @@ -4504,6 +4544,13 @@ static int gfx_v11_0_soft_reset(void *handle) RREG32_SOC15(GC, 0, regCP_VMID_RESET); RREG32_SOC15(GC, 0, regCP_VMID_RESET); + /* release the gfx mutex */ + r = gfx_v11_0_request_gfx_index_mutex(adev, 0); + if (r) { + DRM_ERROR("Failed to release the gfx mutex during soft reset\n"); + return r; + } + for (i = 0; i < adev->usec_timeout; i++) { if (!RREG32_SOC15(GC, 0, regCP_HQD_ACTIVE) && !RREG32_SOC15(GC, 0, regCP_GFX_HQD_ACTIVE)) @@ -4981,7 +5028,7 @@ static int gfx_v11_0_update_gfx_clock_gating(struct amdgpu_device *adev, return 0; } -static void gfx_v11_0_update_spm_vmid(struct amdgpu_device *adev, unsigned vmid) +static void gfx_v11_0_update_spm_vmid(struct amdgpu_device *adev, struct amdgpu_ring *ring, unsigned vmid) { u32 data; @@ -4995,6 +5042,14 @@ static void gfx_v11_0_update_spm_vmid(struct amdgpu_device *adev, unsigned vmid) WREG32_SOC15_NO_KIQ(GC, 0, regRLC_SPM_MC_CNTL, data); amdgpu_gfx_off_ctrl(adev, true); + + if (ring + && amdgpu_sriov_is_pp_one_vf(adev) + && ((ring->funcs->type == AMDGPU_RING_TYPE_GFX) + || (ring->funcs->type == AMDGPU_RING_TYPE_COMPUTE))) { + uint32_t reg = SOC15_REG_OFFSET(GC, 0, regRLC_SPM_MC_CNTL); + amdgpu_ring_emit_wreg(ring, reg, data); + } } static const struct amdgpu_rlc_funcs gfx_v11_0_rlc_funcs = { @@ -5028,6 +5083,7 @@ static void gfx_v11_cntl_power_gating(struct amdgpu_device *adev, bool enable) case IP_VERSION(11, 0, 1): case IP_VERSION(11, 0, 4): case IP_VERSION(11, 5, 0): + case IP_VERSION(11, 5, 1): WREG32_SOC15(GC, 0, regRLC_PG_DELAY_3, RLC_PG_DELAY_3_DEFAULT_GC_11_0_1); break; default: @@ -5063,6 +5119,7 @@ static int gfx_v11_0_set_powergating_state(void *handle, case IP_VERSION(11, 0, 1): case IP_VERSION(11, 0, 4): case IP_VERSION(11, 5, 0): + case IP_VERSION(11, 5, 1): if (!enable) amdgpu_gfx_off_ctrl(adev, false); @@ -5094,6 +5151,7 @@ static int gfx_v11_0_set_clockgating_state(void *handle, case IP_VERSION(11, 0, 3): case IP_VERSION(11, 0, 4): case IP_VERSION(11, 5, 0): + case IP_VERSION(11, 5, 1): gfx_v11_0_update_gfx_clock_gating(adev, state == AMD_CG_STATE_GATE); break; @@ -5170,45 +5228,17 @@ static u64 gfx_v11_0_ring_get_wptr_gfx(struct amdgpu_ring *ring) static void gfx_v11_0_ring_set_wptr_gfx(struct amdgpu_ring *ring) { struct amdgpu_device *adev = ring->adev; - uint32_t *wptr_saved; - uint32_t *is_queue_unmap; - uint64_t aggregated_db_index; - uint32_t mqd_size = adev->mqds[AMDGPU_HW_IP_GFX].mqd_size; - uint64_t wptr_tmp; - if (ring->is_mes_queue) { - wptr_saved = (uint32_t *)(ring->mqd_ptr + mqd_size); - is_queue_unmap = (uint32_t *)(ring->mqd_ptr + mqd_size + - sizeof(uint32_t)); - aggregated_db_index = - amdgpu_mes_get_aggregated_doorbell_index(adev, - ring->hw_prio); - - wptr_tmp = ring->wptr & ring->buf_mask; - atomic64_set((atomic64_t *)ring->wptr_cpu_addr, wptr_tmp); - *wptr_saved = wptr_tmp; - /* assume doorbell always being used by mes mapped queue */ - if (*is_queue_unmap) { - WDOORBELL64(aggregated_db_index, wptr_tmp); - WDOORBELL64(ring->doorbell_index, wptr_tmp); - } else { - WDOORBELL64(ring->doorbell_index, wptr_tmp); - - if (*is_queue_unmap) - WDOORBELL64(aggregated_db_index, wptr_tmp); - } + if (ring->use_doorbell) { + /* XXX check if swapping is necessary on BE */ + atomic64_set((atomic64_t *)ring->wptr_cpu_addr, + ring->wptr); + WDOORBELL64(ring->doorbell_index, ring->wptr); } else { - if (ring->use_doorbell) { - /* XXX check if swapping is necessary on BE */ - atomic64_set((atomic64_t *)ring->wptr_cpu_addr, - ring->wptr); - WDOORBELL64(ring->doorbell_index, ring->wptr); - } else { - WREG32_SOC15(GC, 0, regCP_RB0_WPTR, - lower_32_bits(ring->wptr)); - WREG32_SOC15(GC, 0, regCP_RB0_WPTR_HI, - upper_32_bits(ring->wptr)); - } + WREG32_SOC15(GC, 0, regCP_RB0_WPTR, + lower_32_bits(ring->wptr)); + WREG32_SOC15(GC, 0, regCP_RB0_WPTR_HI, + upper_32_bits(ring->wptr)); } } @@ -5233,42 +5263,14 @@ static u64 gfx_v11_0_ring_get_wptr_compute(struct amdgpu_ring *ring) static void gfx_v11_0_ring_set_wptr_compute(struct amdgpu_ring *ring) { struct amdgpu_device *adev = ring->adev; - uint32_t *wptr_saved; - uint32_t *is_queue_unmap; - uint64_t aggregated_db_index; - uint32_t mqd_size = adev->mqds[AMDGPU_HW_IP_COMPUTE].mqd_size; - uint64_t wptr_tmp; - - if (ring->is_mes_queue) { - wptr_saved = (uint32_t *)(ring->mqd_ptr + mqd_size); - is_queue_unmap = (uint32_t *)(ring->mqd_ptr + mqd_size + - sizeof(uint32_t)); - aggregated_db_index = - amdgpu_mes_get_aggregated_doorbell_index(adev, - ring->hw_prio); - - wptr_tmp = ring->wptr & ring->buf_mask; - atomic64_set((atomic64_t *)ring->wptr_cpu_addr, wptr_tmp); - *wptr_saved = wptr_tmp; - /* assume doorbell always used by mes mapped queue */ - if (*is_queue_unmap) { - WDOORBELL64(aggregated_db_index, wptr_tmp); - WDOORBELL64(ring->doorbell_index, wptr_tmp); - } else { - WDOORBELL64(ring->doorbell_index, wptr_tmp); - if (*is_queue_unmap) - WDOORBELL64(aggregated_db_index, wptr_tmp); - } + /* XXX check if swapping is necessary on BE */ + if (ring->use_doorbell) { + atomic64_set((atomic64_t *)ring->wptr_cpu_addr, + ring->wptr); + WDOORBELL64(ring->doorbell_index, ring->wptr); } else { - /* XXX check if swapping is necessary on BE */ - if (ring->use_doorbell) { - atomic64_set((atomic64_t *)ring->wptr_cpu_addr, - ring->wptr); - WDOORBELL64(ring->doorbell_index, ring->wptr); - } else { - BUG(); /* only DOORBELL method supported on gfx11 now */ - } + BUG(); /* only DOORBELL method supported on gfx11 now */ } } @@ -5454,6 +5456,12 @@ static void gfx_v11_0_ring_emit_vm_flush(struct amdgpu_ring *ring, amdgpu_ring_write(ring, PACKET3(PACKET3_PFP_SYNC_ME, 0)); amdgpu_ring_write(ring, 0x0); } + + /* Make sure that we can't skip the SET_Q_MODE packets when the VM + * changed in any way. + */ + ring->set_q_mode_offs = 0; + ring->set_q_mode_ptr = NULL; } static void gfx_v11_0_ring_emit_fence_kiq(struct amdgpu_ring *ring, u64 addr, @@ -5503,16 +5511,81 @@ static void gfx_v11_0_ring_emit_cntxcntl(struct amdgpu_ring *ring, amdgpu_ring_write(ring, 0); } +static unsigned gfx_v11_0_ring_emit_init_cond_exec(struct amdgpu_ring *ring, + uint64_t addr) +{ + unsigned ret; + + amdgpu_ring_write(ring, PACKET3(PACKET3_COND_EXEC, 3)); + amdgpu_ring_write(ring, lower_32_bits(addr)); + amdgpu_ring_write(ring, upper_32_bits(addr)); + /* discard following DWs if *cond_exec_gpu_addr==0 */ + amdgpu_ring_write(ring, 0); + ret = ring->wptr & ring->buf_mask; + /* patch dummy value later */ + amdgpu_ring_write(ring, 0); + + return ret; +} + static void gfx_v11_0_ring_emit_gfx_shadow(struct amdgpu_ring *ring, u64 shadow_va, u64 csa_va, u64 gds_va, bool init_shadow, int vmid) { struct amdgpu_device *adev = ring->adev; + unsigned int offs, end; - if (!adev->gfx.cp_gfx_shadow) + if (!adev->gfx.cp_gfx_shadow || !ring->ring_obj) return; + /* + * The logic here isn't easy to understand because we need to keep state + * accross multiple executions of the function as well as between the + * CPU and GPU. The general idea is that the newly written GPU command + * has a condition on the previous one and only executed if really + * necessary. + */ + + /* + * The dw in the NOP controls if the next SET_Q_MODE packet should be + * executed or not. Reserve 64bits just to be on the save side. + */ + amdgpu_ring_write(ring, PACKET3(PACKET3_NOP, 1)); + offs = ring->wptr & ring->buf_mask; + + /* + * We start with skipping the prefix SET_Q_MODE and always executing + * the postfix SET_Q_MODE packet. This is changed below with a + * WRITE_DATA command when the postfix executed. + */ + amdgpu_ring_write(ring, shadow_va ? 1 : 0); + amdgpu_ring_write(ring, 0); + + if (ring->set_q_mode_offs) { + uint64_t addr; + + addr = amdgpu_bo_gpu_offset(ring->ring_obj); + addr += ring->set_q_mode_offs << 2; + end = gfx_v11_0_ring_emit_init_cond_exec(ring, addr); + } + + /* + * When the postfix SET_Q_MODE packet executes we need to make sure that the + * next prefix SET_Q_MODE packet executes as well. + */ + if (!shadow_va) { + uint64_t addr; + + addr = amdgpu_bo_gpu_offset(ring->ring_obj); + addr += offs << 2; + amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3)); + amdgpu_ring_write(ring, WRITE_DATA_DST_SEL(5) | WR_CONFIRM); + amdgpu_ring_write(ring, lower_32_bits(addr)); + amdgpu_ring_write(ring, upper_32_bits(addr)); + amdgpu_ring_write(ring, 0x1); + } + amdgpu_ring_write(ring, PACKET3(PACKET3_SET_Q_PREEMPTION_MODE, 7)); amdgpu_ring_write(ring, lower_32_bits(shadow_va)); amdgpu_ring_write(ring, upper_32_bits(shadow_va)); @@ -5524,33 +5597,26 @@ static void gfx_v11_0_ring_emit_gfx_shadow(struct amdgpu_ring *ring, PACKET3_SET_Q_PREEMPTION_MODE_IB_VMID(vmid) : 0); amdgpu_ring_write(ring, init_shadow ? PACKET3_SET_Q_PREEMPTION_MODE_INIT_SHADOW_MEM : 0); -} -static unsigned gfx_v11_0_ring_emit_init_cond_exec(struct amdgpu_ring *ring) -{ - unsigned ret; + if (ring->set_q_mode_offs) + amdgpu_ring_patch_cond_exec(ring, end); - amdgpu_ring_write(ring, PACKET3(PACKET3_COND_EXEC, 3)); - amdgpu_ring_write(ring, lower_32_bits(ring->cond_exe_gpu_addr)); - amdgpu_ring_write(ring, upper_32_bits(ring->cond_exe_gpu_addr)); - amdgpu_ring_write(ring, 0); /* discard following DWs if *cond_exec_gpu_addr==0 */ - ret = ring->wptr & ring->buf_mask; - amdgpu_ring_write(ring, 0x55aa55aa); /* patch dummy value later */ + if (shadow_va) { + uint64_t token = shadow_va ^ csa_va ^ gds_va ^ vmid; - return ret; -} + /* + * If the tokens match try to skip the last postfix SET_Q_MODE + * packet to avoid saving/restoring the state all the time. + */ + if (ring->set_q_mode_ptr && ring->set_q_mode_token == token) + *ring->set_q_mode_ptr = 0; -static void gfx_v11_0_ring_emit_patch_cond_exec(struct amdgpu_ring *ring, unsigned offset) -{ - unsigned cur; - BUG_ON(offset > ring->buf_mask); - BUG_ON(ring->ring[offset] != 0x55aa55aa); + ring->set_q_mode_token = token; + } else { + ring->set_q_mode_ptr = &ring->ring[ring->set_q_mode_offs]; + } - cur = (ring->wptr - 1) & ring->buf_mask; - if (likely(cur > offset)) - ring->ring[offset] = cur - offset; - else - ring->ring[offset] = (ring->buf_mask + 1) - offset + cur; + ring->set_q_mode_offs = offs; } static int gfx_v11_0_ring_preempt_ib(struct amdgpu_ring *ring) @@ -6114,13 +6180,14 @@ static const struct amdgpu_ring_funcs gfx_v11_0_ring_funcs_gfx = { .get_rptr = gfx_v11_0_ring_get_rptr_gfx, .get_wptr = gfx_v11_0_ring_get_wptr_gfx, .set_wptr = gfx_v11_0_ring_set_wptr_gfx, - .emit_frame_size = /* totally 242 maximum if 16 IBs */ + .emit_frame_size = /* totally 247 maximum if 16 IBs */ + 5 + /* update_spm_vmid */ 5 + /* COND_EXEC */ - 9 + /* SET_Q_PREEMPTION_MODE */ + 22 + /* SET_Q_PREEMPTION_MODE */ 7 + /* PIPELINE_SYNC */ SOC15_FLUSH_GPU_TLB_NUM_WREG * 5 + SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 7 + - 2 + /* VM_FLUSH */ + 4 + /* VM_FLUSH */ 8 + /* FENCE for VM_FLUSH */ 20 + /* GDS switch */ 5 + /* COND_EXEC */ @@ -6129,6 +6196,7 @@ static const struct amdgpu_ring_funcs gfx_v11_0_ring_funcs_gfx = { 31 + /* DE_META */ 3 + /* CNTX_CTRL */ 5 + /* HDP_INVL */ + 22 + /* SET_Q_PREEMPTION_MODE */ 8 + 8 + /* FENCE x2 */ 8, /* gfx_v11_0_emit_mem_sync */ .emit_ib_size = 4, /* gfx_v11_0_ring_emit_ib_gfx */ @@ -6145,7 +6213,6 @@ static const struct amdgpu_ring_funcs gfx_v11_0_ring_funcs_gfx = { .emit_cntxcntl = gfx_v11_0_ring_emit_cntxcntl, .emit_gfx_shadow = gfx_v11_0_ring_emit_gfx_shadow, .init_cond_exec = gfx_v11_0_ring_emit_init_cond_exec, - .patch_cond_exec = gfx_v11_0_ring_emit_patch_cond_exec, .preempt_ib = gfx_v11_0_ring_preempt_ib, .emit_frame_cntl = gfx_v11_0_ring_emit_frame_cntl, .emit_wreg = gfx_v11_0_ring_emit_wreg, @@ -6164,6 +6231,7 @@ static const struct amdgpu_ring_funcs gfx_v11_0_ring_funcs_compute = { .get_wptr = gfx_v11_0_ring_get_wptr_compute, .set_wptr = gfx_v11_0_ring_set_wptr_compute, .emit_frame_size = + 5 + /* update_spm_vmid */ 20 + /* gfx_v11_0_ring_emit_gds_switch */ 7 + /* gfx_v11_0_ring_emit_hdp_flush */ 5 + /* hdp invalidate */ @@ -6205,7 +6273,6 @@ static const struct amdgpu_ring_funcs gfx_v11_0_ring_funcs_kiq = { 7 + /* gfx_v11_0_ring_emit_pipeline_sync */ SOC15_FLUSH_GPU_TLB_NUM_WREG * 5 + SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 7 + - 2 + /* gfx_v11_0_ring_emit_vm_flush */ 8 + 8 + 8, /* gfx_v11_0_ring_emit_fence_kiq x3 for user fence, vm fence */ .emit_ib_size = 7, /* gfx_v11_0_ring_emit_ib_compute */ .emit_ib = gfx_v11_0_ring_emit_ib_compute, @@ -6371,6 +6438,9 @@ static int gfx_v11_0_get_cu_info(struct amdgpu_device *adev, mutex_lock(&adev->grbm_idx_mutex); for (i = 0; i < adev->gfx.config.max_shader_engines; i++) { for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) { + bitmap = i * adev->gfx.config.max_sh_per_se + j; + if (!((gfx_v11_0_get_sa_active_bitmap(adev) >> bitmap) & 1)) + continue; mask = 1; counter = 0; gfx_v11_0_select_se_sh(adev, i, j, 0xffffffff, 0); diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v11_0_3.c b/drivers/gpu/drm/amd/amdgpu/gfx_v11_0_3.c index 26d6286d86c9..9e7ce1e6bc06 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v11_0_3.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v11_0_3.c @@ -69,7 +69,7 @@ static int gfx_v11_0_3_rlc_gc_fed_irq(struct amdgpu_device *adev, amdgpu_ras_interrupt_dispatch(adev, &ih_data); } else { if (adev->virt.ops && adev->virt.ops->ras_poison_handler) - adev->virt.ops->ras_poison_handler(adev); + adev->virt.ops->ras_poison_handler(adev, ras_if->block); else dev_warn(adev->dev, "No ras_poison_handler interface in SRIOV for %s!\n", ras_if->name); diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c index c2faf6b4c2fc..86a4865b1ae5 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c @@ -3274,7 +3274,7 @@ static int gfx_v7_0_rlc_init(struct amdgpu_device *adev) /* init spm vmid with 0xf */ if (adev->gfx.rlc.funcs->update_spm_vmid) - adev->gfx.rlc.funcs->update_spm_vmid(adev, 0xf); + adev->gfx.rlc.funcs->update_spm_vmid(adev, NULL, 0xf); return 0; } @@ -3500,7 +3500,7 @@ static int gfx_v7_0_rlc_resume(struct amdgpu_device *adev) return 0; } -static void gfx_v7_0_update_spm_vmid(struct amdgpu_device *adev, unsigned vmid) +static void gfx_v7_0_update_spm_vmid(struct amdgpu_device *adev, struct amdgpu_ring *ring, unsigned vmid) { u32 data; diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c index 885ebd703260..202ddda57f98 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c @@ -883,8 +883,8 @@ static int gfx_v8_0_ring_test_ib(struct amdgpu_ring *ring, long timeout) gpu_addr = adev->wb.gpu_addr + (index * 4); adev->wb.wb[index] = cpu_to_le32(0xCAFEDEAD); memset(&ib, 0, sizeof(ib)); - r = amdgpu_ib_get(adev, NULL, 16, - AMDGPU_IB_POOL_DIRECT, &ib); + + r = amdgpu_ib_get(adev, NULL, 20, AMDGPU_IB_POOL_DIRECT, &ib); if (r) goto err1; @@ -1288,7 +1288,7 @@ static int gfx_v8_0_rlc_init(struct amdgpu_device *adev) /* init spm vmid with 0xf */ if (adev->gfx.rlc.funcs->update_spm_vmid) - adev->gfx.rlc.funcs->update_spm_vmid(adev, 0xf); + adev->gfx.rlc.funcs->update_spm_vmid(adev, NULL, 0xf); return 0; } @@ -1900,8 +1900,8 @@ static void gfx_v8_0_sq_irq_work_func(struct work_struct *work); static int gfx_v8_0_sw_init(void *handle) { int i, j, k, r, ring_id; + int xcc_id = 0; struct amdgpu_ring *ring; - struct amdgpu_kiq *kiq; struct amdgpu_device *adev = (struct amdgpu_device *)handle; switch (adev->asic_type) { @@ -2022,8 +2022,7 @@ static int gfx_v8_0_sw_init(void *handle) return r; } - kiq = &adev->gfx.kiq[0]; - r = amdgpu_gfx_kiq_init_ring(adev, &kiq->ring, &kiq->irq, 0); + r = amdgpu_gfx_kiq_init_ring(adev, xcc_id); if (r) return r; @@ -5579,7 +5578,7 @@ static void gfx_v8_0_unset_safe_mode(struct amdgpu_device *adev, int xcc_id) } } -static void gfx_v8_0_update_spm_vmid(struct amdgpu_device *adev, unsigned vmid) +static void gfx_v8_0_update_spm_vmid(struct amdgpu_device *adev, struct amdgpu_ring *ring, unsigned vmid) { u32 data; @@ -6327,33 +6326,22 @@ static void gfx_v8_ring_emit_cntxcntl(struct amdgpu_ring *ring, uint32_t flags) amdgpu_ring_write(ring, 0); } -static unsigned gfx_v8_0_ring_emit_init_cond_exec(struct amdgpu_ring *ring) +static unsigned gfx_v8_0_ring_emit_init_cond_exec(struct amdgpu_ring *ring, + uint64_t addr) { unsigned ret; amdgpu_ring_write(ring, PACKET3(PACKET3_COND_EXEC, 3)); - amdgpu_ring_write(ring, lower_32_bits(ring->cond_exe_gpu_addr)); - amdgpu_ring_write(ring, upper_32_bits(ring->cond_exe_gpu_addr)); - amdgpu_ring_write(ring, 0); /* discard following DWs if *cond_exec_gpu_addr==0 */ + amdgpu_ring_write(ring, lower_32_bits(addr)); + amdgpu_ring_write(ring, upper_32_bits(addr)); + /* discard following DWs if *cond_exec_gpu_addr==0 */ + amdgpu_ring_write(ring, 0); ret = ring->wptr & ring->buf_mask; - amdgpu_ring_write(ring, 0x55aa55aa); /* patch dummy value later */ + /* patch dummy value later */ + amdgpu_ring_write(ring, 0); return ret; } -static void gfx_v8_0_ring_emit_patch_cond_exec(struct amdgpu_ring *ring, unsigned offset) -{ - unsigned cur; - - BUG_ON(offset > ring->buf_mask); - BUG_ON(ring->ring[offset] != 0x55aa55aa); - - cur = (ring->wptr & ring->buf_mask) - 1; - if (likely(cur > offset)) - ring->ring[offset] = cur - offset; - else - ring->ring[offset] = (ring->ring_size >> 2) - offset + cur; -} - static void gfx_v8_0_ring_emit_rreg(struct amdgpu_ring *ring, uint32_t reg, uint32_t reg_val_offs) { @@ -6933,7 +6921,6 @@ static const struct amdgpu_ring_funcs gfx_v8_0_ring_funcs_gfx = { .emit_switch_buffer = gfx_v8_ring_emit_sb, .emit_cntxcntl = gfx_v8_ring_emit_cntxcntl, .init_cond_exec = gfx_v8_0_ring_emit_init_cond_exec, - .patch_cond_exec = gfx_v8_0_ring_emit_patch_cond_exec, .emit_wreg = gfx_v8_0_ring_emit_wreg, .soft_recovery = gfx_v8_0_ring_soft_recovery, .emit_mem_sync = gfx_v8_0_emit_mem_sync, diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c index e3ff6e46f3f7..eff33569b300 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c @@ -1039,8 +1039,8 @@ static int gfx_v9_0_ring_test_ib(struct amdgpu_ring *ring, long timeout) gpu_addr = adev->wb.gpu_addr + (index * 4); adev->wb.wb[index] = cpu_to_le32(0xCAFEDEAD); memset(&ib, 0, sizeof(ib)); - r = amdgpu_ib_get(adev, NULL, 16, - AMDGPU_IB_POOL_DIRECT, &ib); + + r = amdgpu_ib_get(adev, NULL, 20, AMDGPU_IB_POOL_DIRECT, &ib); if (r) goto err1; @@ -1249,7 +1249,7 @@ static void gfx_v9_0_check_if_need_gfxoff(struct amdgpu_device *adev) static int gfx_v9_0_init_cp_gfx_microcode(struct amdgpu_device *adev, char *chip_name) { - char fw_name[30]; + char fw_name[50]; int err; snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_pfp.bin", chip_name); @@ -1282,7 +1282,7 @@ out: static int gfx_v9_0_init_rlc_microcode(struct amdgpu_device *adev, char *chip_name) { - char fw_name[30]; + char fw_name[53]; int err; const struct rlc_firmware_header_v2_0 *rlc_hdr; uint16_t version_major; @@ -1337,7 +1337,7 @@ static bool gfx_v9_0_load_mec2_fw_bin_support(struct amdgpu_device *adev) static int gfx_v9_0_init_cp_compute_microcode(struct amdgpu_device *adev, char *chip_name) { - char fw_name[30]; + char fw_name[50]; int err; if (amdgpu_sriov_vf(adev) && (adev->asic_type == CHIP_ALDEBARAN)) @@ -1997,8 +1997,8 @@ static int gfx_v9_0_compute_ring_init(struct amdgpu_device *adev, int ring_id, static int gfx_v9_0_sw_init(void *handle) { int i, j, k, r, ring_id; + int xcc_id = 0; struct amdgpu_ring *ring; - struct amdgpu_kiq *kiq; struct amdgpu_device *adev = (struct amdgpu_device *)handle; unsigned int hw_prio; @@ -2080,7 +2080,7 @@ static int gfx_v9_0_sw_init(void *handle) ring->doorbell_index = adev->doorbell_index.gfx_ring0 << 1; /* disable scheduler on the real ring */ - ring->no_scheduler = true; + ring->no_scheduler = adev->gfx.mcbp; ring->vm_hub = AMDGPU_GFXHUB(0); r = amdgpu_ring_init(adev, ring, 1024, &adev->gfx.eop_irq, AMDGPU_CP_IRQ_GFX_ME0_PIPE0_EOP, @@ -2090,7 +2090,7 @@ static int gfx_v9_0_sw_init(void *handle) } /* set up the software rings */ - if (adev->gfx.num_gfx_rings) { + if (adev->gfx.mcbp && adev->gfx.num_gfx_rings) { for (i = 0; i < GFX9_NUM_SW_GFX_RINGS; i++) { ring = &adev->gfx.sw_gfx_ring[i]; ring->ring_obj = NULL; @@ -2151,8 +2151,7 @@ static int gfx_v9_0_sw_init(void *handle) return r; } - kiq = &adev->gfx.kiq[0]; - r = amdgpu_gfx_kiq_init_ring(adev, &kiq->ring, &kiq->irq, 0); + r = amdgpu_gfx_kiq_init_ring(adev, xcc_id); if (r) return r; @@ -2181,7 +2180,7 @@ static int gfx_v9_0_sw_fini(void *handle) int i; struct amdgpu_device *adev = (struct amdgpu_device *)handle; - if (adev->gfx.num_gfx_rings) { + if (adev->gfx.mcbp && adev->gfx.num_gfx_rings) { for (i = 0; i < GFX9_NUM_SW_GFX_RINGS; i++) amdgpu_ring_fini(&adev->gfx.sw_gfx_ring[i]); amdgpu_ring_mux_fini(&adev->gfx.muxer); @@ -3034,6 +3033,14 @@ static int gfx_v9_0_cp_gfx_start(struct amdgpu_device *adev) gfx_v9_0_cp_gfx_enable(adev, true); + /* Now only limit the quirk on the APU gfx9 series and already + * confirmed that the APU gfx10/gfx11 needn't such update. + */ + if (adev->flags & AMD_IS_APU && + adev->in_s3 && !adev->suspend_complete) { + DRM_INFO(" Will skip the CSB packet resubmit\n"); + return 0; + } r = amdgpu_ring_alloc(ring, gfx_v9_0_get_csb_size(adev) + 4 + 3); if (r) { DRM_ERROR("amdgpu: cp failed to lock ring (%d).\n", r); @@ -4894,7 +4901,7 @@ static void gfx_v9_0_update_spm_vmid_internal(struct amdgpu_device *adev, WREG32_SOC15(GC, 0, mmRLC_SPM_MC_CNTL, data); } -static void gfx_v9_0_update_spm_vmid(struct amdgpu_device *adev, unsigned int vmid) +static void gfx_v9_0_update_spm_vmid(struct amdgpu_device *adev, struct amdgpu_ring *ring, unsigned int vmid) { amdgpu_gfx_off_ctrl(adev, false); @@ -5603,31 +5610,21 @@ static void gfx_v9_ring_emit_cntxcntl(struct amdgpu_ring *ring, uint32_t flags) amdgpu_ring_write(ring, 0); } -static unsigned gfx_v9_0_ring_emit_init_cond_exec(struct amdgpu_ring *ring) +static unsigned gfx_v9_0_ring_emit_init_cond_exec(struct amdgpu_ring *ring, + uint64_t addr) { unsigned ret; amdgpu_ring_write(ring, PACKET3(PACKET3_COND_EXEC, 3)); - amdgpu_ring_write(ring, lower_32_bits(ring->cond_exe_gpu_addr)); - amdgpu_ring_write(ring, upper_32_bits(ring->cond_exe_gpu_addr)); - amdgpu_ring_write(ring, 0); /* discard following DWs if *cond_exec_gpu_addr==0 */ + amdgpu_ring_write(ring, lower_32_bits(addr)); + amdgpu_ring_write(ring, upper_32_bits(addr)); + /* discard following DWs if *cond_exec_gpu_addr==0 */ + amdgpu_ring_write(ring, 0); ret = ring->wptr & ring->buf_mask; - amdgpu_ring_write(ring, 0x55aa55aa); /* patch dummy value later */ + /* patch dummy value later */ + amdgpu_ring_write(ring, 0); return ret; } -static void gfx_v9_0_ring_emit_patch_cond_exec(struct amdgpu_ring *ring, unsigned offset) -{ - unsigned cur; - BUG_ON(offset > ring->buf_mask); - BUG_ON(ring->ring[offset] != 0x55aa55aa); - - cur = (ring->wptr - 1) & ring->buf_mask; - if (likely(cur > offset)) - ring->ring[offset] = cur - offset; - else - ring->ring[offset] = (ring->ring_size>>2) - offset + cur; -} - static void gfx_v9_0_ring_emit_rreg(struct amdgpu_ring *ring, uint32_t reg, uint32_t reg_val_offs) { @@ -5902,11 +5899,14 @@ static int gfx_v9_0_eop_irq(struct amdgpu_device *adev, switch (me_id) { case 0: - if (adev->gfx.num_gfx_rings && - !amdgpu_mcbp_handle_trailing_fence_irq(&adev->gfx.muxer)) { - /* Fence signals are handled on the software rings*/ - for (i = 0; i < GFX9_NUM_SW_GFX_RINGS; i++) - amdgpu_fence_process(&adev->gfx.sw_gfx_ring[i]); + if (adev->gfx.num_gfx_rings) { + if (!adev->gfx.mcbp) { + amdgpu_fence_process(&adev->gfx.gfx_ring[0]); + } else if (!amdgpu_mcbp_handle_trailing_fence_irq(&adev->gfx.muxer)) { + /* Fence signals are handled on the software rings*/ + for (i = 0; i < GFX9_NUM_SW_GFX_RINGS; i++) + amdgpu_fence_process(&adev->gfx.sw_gfx_ring[i]); + } } break; case 1: @@ -6901,7 +6901,6 @@ static const struct amdgpu_ring_funcs gfx_v9_0_ring_funcs_gfx = { .emit_switch_buffer = gfx_v9_ring_emit_sb, .emit_cntxcntl = gfx_v9_ring_emit_cntxcntl, .init_cond_exec = gfx_v9_0_ring_emit_init_cond_exec, - .patch_cond_exec = gfx_v9_0_ring_emit_patch_cond_exec, .preempt_ib = gfx_v9_0_ring_preempt_ib, .emit_frame_cntl = gfx_v9_0_ring_emit_frame_cntl, .emit_wreg = gfx_v9_0_ring_emit_wreg, @@ -6956,7 +6955,6 @@ static const struct amdgpu_ring_funcs gfx_v9_0_sw_ring_funcs_gfx = { .emit_switch_buffer = gfx_v9_ring_emit_sb, .emit_cntxcntl = gfx_v9_ring_emit_cntxcntl, .init_cond_exec = gfx_v9_0_ring_emit_init_cond_exec, - .patch_cond_exec = gfx_v9_0_ring_emit_patch_cond_exec, .emit_frame_cntl = gfx_v9_0_ring_emit_frame_cntl, .emit_wreg = gfx_v9_0_ring_emit_wreg, .emit_reg_wait = gfx_v9_0_ring_emit_reg_wait, @@ -6983,7 +6981,6 @@ static const struct amdgpu_ring_funcs gfx_v9_0_ring_funcs_compute = { 7 + /* gfx_v9_0_ring_emit_pipeline_sync */ SOC15_FLUSH_GPU_TLB_NUM_WREG * 5 + SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 7 + - 2 + /* gfx_v9_0_ring_emit_vm_flush */ 8 + 8 + 8 + /* gfx_v9_0_ring_emit_fence x3 for user fence, vm fence */ 7 + /* gfx_v9_0_emit_mem_sync */ 5 + /* gfx_v9_0_emit_wave_limit for updating mmSPI_WCL_PIPE_PERCENT_GFX register */ @@ -7021,7 +7018,6 @@ static const struct amdgpu_ring_funcs gfx_v9_0_ring_funcs_kiq = { 7 + /* gfx_v9_0_ring_emit_pipeline_sync */ SOC15_FLUSH_GPU_TLB_NUM_WREG * 5 + SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 7 + - 2 + /* gfx_v9_0_ring_emit_vm_flush */ 8 + 8 + 8, /* gfx_v9_0_ring_emit_fence_kiq x3 for user fence, vm fence */ .emit_ib_size = 7, /* gfx_v9_0_ring_emit_ib_compute */ .emit_fence = gfx_v9_0_ring_emit_fence_kiq, @@ -7043,7 +7039,7 @@ static void gfx_v9_0_set_ring_funcs(struct amdgpu_device *adev) for (i = 0; i < adev->gfx.num_gfx_rings; i++) adev->gfx.gfx_ring[i].funcs = &gfx_v9_0_ring_funcs_gfx; - if (adev->gfx.num_gfx_rings) { + if (adev->gfx.mcbp && adev->gfx.num_gfx_rings) { for (i = 0; i < GFX9_NUM_SW_GFX_RINGS; i++) adev->gfx.sw_gfx_ring[i].funcs = &gfx_v9_0_sw_ring_funcs_gfx; } diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_4.c b/drivers/gpu/drm/amd/amdgpu/gfx_v9_4.c index bc8416afb62c..f53b379d8971 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_4.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_4.c @@ -970,8 +970,9 @@ static void gfx_v9_4_reset_ras_error_count(struct amdgpu_device *adev) WREG32_SOC15(GC, 0, mmATC_L2_CACHE_4K_DSM_INDEX, 255); } -static const struct soc15_reg_entry gfx_v9_4_ea_err_status_regs = - { SOC15_REG_ENTRY(GC, 0, mmGCEA_ERR_STATUS), 0, 1, 32 }; +static const struct soc15_reg_entry gfx_v9_4_ea_err_status_regs = { + SOC15_REG_ENTRY(GC, 0, mmGCEA_ERR_STATUS), 0, 1, 32 +}; static void gfx_v9_4_query_ras_error_status(struct amdgpu_device *adev) { diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_2.c b/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_2.c index 065b2bd5f5a6..3f4fd2f08163 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_2.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_2.c @@ -1909,18 +1909,7 @@ static void gfx_v9_4_2_reset_sq_timeout_status(struct amdgpu_device *adev) mutex_unlock(&adev->grbm_idx_mutex); } -static bool gfx_v9_4_2_query_uctl2_poison_status(struct amdgpu_device *adev) -{ - u32 status = 0; - struct amdgpu_vmhub *hub; - - hub = &adev->vmhub[AMDGPU_GFXHUB(0)]; - status = RREG32(hub->vm_l2_pro_fault_status); - /* reset page fault status */ - WREG32_P(hub->vm_l2_pro_fault_cntl, 1, ~1); - return REG_GET_FIELD(status, VM_L2_PROTECTION_FAULT_STATUS, FED); -} struct amdgpu_ras_block_hw_ops gfx_v9_4_2_ras_ops = { .query_ras_error_count = &gfx_v9_4_2_query_ras_error_count, @@ -1934,5 +1923,4 @@ struct amdgpu_gfx_ras gfx_v9_4_2_ras = { .hw_ops = &gfx_v9_4_2_ras_ops, }, .enable_watchdog_timer = &gfx_v9_4_2_enable_watchdog_timer, - .query_utcl2_poison_status = gfx_v9_4_2_query_uctl2_poison_status, }; diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c b/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c index db179d085efa..fc33354f1d3d 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c @@ -38,6 +38,7 @@ #include "gfx_v9_4_3.h" #include "amdgpu_xcp.h" +#include "amdgpu_aca.h" MODULE_FIRMWARE("amdgpu/gc_9_4_3_mec.bin"); MODULE_FIRMWARE("amdgpu/gc_9_4_3_rlc.bin"); @@ -48,6 +49,10 @@ MODULE_FIRMWARE("amdgpu/gc_9_4_3_rlc.bin"); #define GOLDEN_GB_ADDR_CONFIG 0x2a114042 #define CP_HQD_PERSISTENT_STATE_DEFAULT 0xbe05301 +#define mmSMNAID_XCD0_MCA_SMU 0x36430400 /* SMN AID XCD0 */ +#define mmSMNAID_XCD1_MCA_SMU 0x38430400 /* SMN AID XCD1 */ +#define mmSMNXCD_XCD0_MCA_SMU 0x40430400 /* SMN XCD XCD0 */ + struct amdgpu_gfx_ras gfx_v9_4_3_ras; static void gfx_v9_4_3_set_ring_funcs(struct amdgpu_device *adev); @@ -256,6 +261,7 @@ static int gfx_v9_4_3_ring_test_ring(struct amdgpu_ring *ring) xcc_offset = SOC15_REG_OFFSET(GC, 0, regSCRATCH_REG0); scratch_reg0_offset = SOC15_REG_OFFSET(GC, GET_INST(GC, ring->xcc_id), regSCRATCH_REG0); WREG32(scratch_reg0_offset, 0xCAFEDEAD); + tmp = RREG32(scratch_reg0_offset); r = amdgpu_ring_alloc(ring, 3); if (r) @@ -296,8 +302,8 @@ static int gfx_v9_4_3_ring_test_ib(struct amdgpu_ring *ring, long timeout) gpu_addr = adev->wb.gpu_addr + (index * 4); adev->wb.wb[index] = cpu_to_le32(0xCAFEDEAD); memset(&ib, 0, sizeof(ib)); - r = amdgpu_ib_get(adev, NULL, 16, - AMDGPU_IB_POOL_DIRECT, &ib); + + r = amdgpu_ib_get(adev, NULL, 20, AMDGPU_IB_POOL_DIRECT, &ib); if (r) goto err1; @@ -623,7 +629,7 @@ static int gfx_v9_4_3_switch_compute_partition(struct amdgpu_device *adev, int num_xccs_per_xcp) { int ret, i, num_xcc; - u32 tmp = 0, regval; + u32 tmp = 0; if (adev->psp.funcs) { ret = psp_spatial_partition(&adev->psp, @@ -631,24 +637,23 @@ static int gfx_v9_4_3_switch_compute_partition(struct amdgpu_device *adev, num_xccs_per_xcp); if (ret) return ret; - } - - num_xcc = NUM_XCC(adev->gfx.xcc_mask); + } else { + num_xcc = NUM_XCC(adev->gfx.xcc_mask); - for (i = 0; i < num_xcc; i++) { - tmp = REG_SET_FIELD(tmp, CP_HYP_XCP_CTL, NUM_XCC_IN_XCP, - num_xccs_per_xcp); - tmp = REG_SET_FIELD(tmp, CP_HYP_XCP_CTL, VIRTUAL_XCC_ID, - i % num_xccs_per_xcp); - regval = RREG32_SOC15(GC, GET_INST(GC, i), regCP_HYP_XCP_CTL); - if (regval != tmp) + for (i = 0; i < num_xcc; i++) { + tmp = REG_SET_FIELD(tmp, CP_HYP_XCP_CTL, NUM_XCC_IN_XCP, + num_xccs_per_xcp); + tmp = REG_SET_FIELD(tmp, CP_HYP_XCP_CTL, VIRTUAL_XCC_ID, + i % num_xccs_per_xcp); WREG32_SOC15(GC, GET_INST(GC, i), regCP_HYP_XCP_CTL, tmp); + } + ret = 0; } adev->gfx.num_xcc_per_xcp = num_xccs_per_xcp; - return 0; + return ret; } static int gfx_v9_4_3_ih_to_xcc_inst(struct amdgpu_device *adev, int ih_node) @@ -675,6 +680,72 @@ static const struct amdgpu_gfx_funcs gfx_v9_4_3_gfx_funcs = { .ih_node_to_logical_xcc = &gfx_v9_4_3_ih_to_xcc_inst, }; +static int gfx_v9_4_3_aca_bank_parser(struct aca_handle *handle, + struct aca_bank *bank, enum aca_smu_type type, + void *data) +{ + struct aca_bank_info info; + u64 misc0; + u32 instlo; + int ret; + + ret = aca_bank_info_decode(bank, &info); + if (ret) + return ret; + + /* NOTE: overwrite info.die_id with xcd id for gfx */ + instlo = ACA_REG__IPID__INSTANCEIDLO(bank->regs[ACA_REG_IDX_IPID]); + instlo &= GENMASK(31, 1); + info.die_id = instlo == mmSMNAID_XCD0_MCA_SMU ? 0 : 1; + + misc0 = bank->regs[ACA_REG_IDX_MISC0]; + + switch (type) { + case ACA_SMU_TYPE_UE: + ret = aca_error_cache_log_bank_error(handle, &info, + ACA_ERROR_TYPE_UE, 1ULL); + break; + case ACA_SMU_TYPE_CE: + ret = aca_error_cache_log_bank_error(handle, &info, + ACA_ERROR_TYPE_CE, ACA_REG__MISC0__ERRCNT(misc0)); + break; + default: + return -EINVAL; + } + + return ret; +} + +static bool gfx_v9_4_3_aca_bank_is_valid(struct aca_handle *handle, struct aca_bank *bank, + enum aca_smu_type type, void *data) +{ + u32 instlo; + + instlo = ACA_REG__IPID__INSTANCEIDLO(bank->regs[ACA_REG_IDX_IPID]); + instlo &= GENMASK(31, 1); + switch (instlo) { + case mmSMNAID_XCD0_MCA_SMU: + case mmSMNAID_XCD1_MCA_SMU: + case mmSMNXCD_XCD0_MCA_SMU: + return true; + default: + break; + } + + return false; +} + +static const struct aca_bank_ops gfx_v9_4_3_aca_bank_ops = { + .aca_bank_parser = gfx_v9_4_3_aca_bank_parser, + .aca_bank_is_valid = gfx_v9_4_3_aca_bank_is_valid, +}; + +static const struct aca_info gfx_v9_4_3_aca_info = { + .hwip = ACA_HWIP_TYPE_SMU, + .mask = ACA_ERROR_UE_MASK | ACA_ERROR_CE_MASK, + .bank_ops = &gfx_v9_4_3_aca_bank_ops, +}; + static int gfx_v9_4_3_gpu_early_init(struct amdgpu_device *adev) { u32 gb_addr_config; @@ -778,7 +849,6 @@ static int gfx_v9_4_3_compute_ring_init(struct amdgpu_device *adev, int ring_id, static int gfx_v9_4_3_sw_init(void *handle) { int i, j, k, r, ring_id, xcc_id, num_xcc; - struct amdgpu_kiq *kiq; struct amdgpu_device *adev = (struct amdgpu_device *)handle; adev->gfx.mec.num_mec = 2; @@ -847,8 +917,7 @@ static int gfx_v9_4_3_sw_init(void *handle) return r; } - kiq = &adev->gfx.kiq[xcc_id]; - r = amdgpu_gfx_kiq_init_ring(adev, &kiq->ring, &kiq->irq, xcc_id); + r = amdgpu_gfx_kiq_init_ring(adev, xcc_id); if (r) return r; @@ -1102,13 +1171,14 @@ static void gfx_v9_4_3_init_rlcg_reg_access_ctrl(struct amdgpu_device *adev) reg_access_ctrl->grbm_idx = SOC15_REG_OFFSET(GC, GET_INST(GC, xcc_id), regGRBM_GFX_INDEX); reg_access_ctrl->spare_int = SOC15_REG_OFFSET(GC, GET_INST(GC, xcc_id), regRLC_SPARE_INT); } + adev->gfx.rlc.rlcg_reg_access_supported = true; } static int gfx_v9_4_3_rlc_init(struct amdgpu_device *adev) { /* init spm vmid with 0xf */ if (adev->gfx.rlc.funcs->update_spm_vmid) - adev->gfx.rlc.funcs->update_spm_vmid(adev, 0xf); + adev->gfx.rlc.funcs->update_spm_vmid(adev, NULL, 0xf); return 0; } @@ -1319,7 +1389,7 @@ static int gfx_v9_4_3_rlc_resume(struct amdgpu_device *adev) return 0; } -static void gfx_v9_4_3_update_spm_vmid(struct amdgpu_device *adev, +static void gfx_v9_4_3_update_spm_vmid(struct amdgpu_device *adev, struct amdgpu_ring *ring, unsigned vmid) { u32 reg, data; @@ -2738,16 +2808,16 @@ static void gfx_v9_4_3_xcc_set_compute_eop_interrupt_state( switch (state) { case AMDGPU_IRQ_STATE_DISABLE: - mec_int_cntl = RREG32(mec_int_cntl_reg); + mec_int_cntl = RREG32_XCC(mec_int_cntl_reg, xcc_id); mec_int_cntl = REG_SET_FIELD(mec_int_cntl, CP_ME1_PIPE0_INT_CNTL, TIME_STAMP_INT_ENABLE, 0); - WREG32(mec_int_cntl_reg, mec_int_cntl); + WREG32_XCC(mec_int_cntl_reg, mec_int_cntl, xcc_id); break; case AMDGPU_IRQ_STATE_ENABLE: - mec_int_cntl = RREG32(mec_int_cntl_reg); + mec_int_cntl = RREG32_XCC(mec_int_cntl_reg, xcc_id); mec_int_cntl = REG_SET_FIELD(mec_int_cntl, CP_ME1_PIPE0_INT_CNTL, TIME_STAMP_INT_ENABLE, 1); - WREG32(mec_int_cntl_reg, mec_int_cntl); + WREG32_XCC(mec_int_cntl_reg, mec_int_cntl, xcc_id); break; default: break; @@ -3755,10 +3825,6 @@ static const struct amdgpu_gfx_ras_reg_entry gfx_v9_4_3_ue_reg_list[] = { AMDGPU_GFX_LDS_MEM, 4}, }; -static const struct soc15_reg_entry gfx_v9_4_3_ea_err_status_regs = { - SOC15_REG_ENTRY(GC, 0, regGCEA_ERR_STATUS), 0, 1, 16 -}; - static void gfx_v9_4_3_inst_query_ras_err_count(struct amdgpu_device *adev, void *ras_error_status, int xcc_id) { @@ -3803,6 +3869,27 @@ static void gfx_v9_4_3_inst_query_ras_err_count(struct amdgpu_device *adev, } } + /* handle extra register entries of UE */ + for (; i < ARRAY_SIZE(gfx_v9_4_3_ue_reg_list); i++) { + for (j = 0; j < gfx_v9_4_3_ue_reg_list[i].se_num; j++) { + for (k = 0; k < gfx_v9_4_3_ue_reg_list[i].reg_entry.reg_inst; k++) { + /* no need to select if instance number is 1 */ + if (gfx_v9_4_3_ue_reg_list[i].se_num > 1 || + gfx_v9_4_3_ue_reg_list[i].reg_entry.reg_inst > 1) + gfx_v9_4_3_xcc_select_se_sh(adev, j, 0, k, xcc_id); + + amdgpu_ras_inst_query_ras_error_count(adev, + &(gfx_v9_4_3_ue_reg_list[i].reg_entry), + 1, + gfx_v9_4_3_ras_mem_list_array[gfx_v9_4_3_ue_reg_list[i].mem_id_type].mem_id_ent, + gfx_v9_4_3_ras_mem_list_array[gfx_v9_4_3_ue_reg_list[i].mem_id_type].size, + GET_INST(GC, xcc_id), + AMDGPU_RAS_ERROR__MULTI_UNCORRECTABLE, + &ue_count); + } + } + } + gfx_v9_4_3_xcc_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff, xcc_id); mutex_unlock(&adev->grbm_idx_mutex); @@ -3810,8 +3897,8 @@ static void gfx_v9_4_3_inst_query_ras_err_count(struct amdgpu_device *adev, /* the caller should make sure initialize value of * err_data->ue_count and err_data->ce_count */ - amdgpu_ras_error_statistic_ue_count(err_data, &mcm_info, ue_count); - amdgpu_ras_error_statistic_ce_count(err_data, &mcm_info, ce_count); + amdgpu_ras_error_statistic_ue_count(err_data, &mcm_info, NULL, ue_count); + amdgpu_ras_error_statistic_ce_count(err_data, &mcm_info, NULL, ce_count); } static void gfx_v9_4_3_inst_reset_ras_err_count(struct amdgpu_device *adev, @@ -3842,217 +3929,37 @@ static void gfx_v9_4_3_inst_reset_ras_err_count(struct amdgpu_device *adev, } } - gfx_v9_4_3_xcc_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff, - xcc_id); - mutex_unlock(&adev->grbm_idx_mutex); -} - -static void gfx_v9_4_3_inst_query_ea_err_status(struct amdgpu_device *adev, - int xcc_id) -{ - uint32_t i, j; - uint32_t reg_value; - - mutex_lock(&adev->grbm_idx_mutex); - - for (i = 0; i < gfx_v9_4_3_ea_err_status_regs.se_num; i++) { - for (j = 0; j < gfx_v9_4_3_ea_err_status_regs.instance; j++) { - gfx_v9_4_3_xcc_select_se_sh(adev, i, 0, j, xcc_id); - reg_value = RREG32_SOC15(GC, GET_INST(GC, xcc_id), - regGCEA_ERR_STATUS); - if (REG_GET_FIELD(reg_value, GCEA_ERR_STATUS, SDP_RDRSP_STATUS) || - REG_GET_FIELD(reg_value, GCEA_ERR_STATUS, SDP_WRRSP_STATUS) || - REG_GET_FIELD(reg_value, GCEA_ERR_STATUS, SDP_RDRSP_DATAPARITY_ERROR)) { - dev_warn(adev->dev, - "GCEA err detected at instance: %d, status: 0x%x!\n", - j, reg_value); - } - /* clear after read */ - reg_value = REG_SET_FIELD(reg_value, GCEA_ERR_STATUS, - CLEAR_ERROR_STATUS, 0x1); - WREG32_SOC15(GC, GET_INST(GC, xcc_id), regGCEA_ERR_STATUS, - reg_value); - } - } - - gfx_v9_4_3_xcc_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff, - xcc_id); - mutex_unlock(&adev->grbm_idx_mutex); -} - -static void gfx_v9_4_3_inst_query_utc_err_status(struct amdgpu_device *adev, - int xcc_id) -{ - uint32_t data; - - data = RREG32_SOC15(GC, GET_INST(GC, xcc_id), regUTCL2_MEM_ECC_STATUS); - if (data) { - dev_warn(adev->dev, "GFX UTCL2 Mem Ecc Status: 0x%x!\n", data); - WREG32_SOC15(GC, GET_INST(GC, xcc_id), regUTCL2_MEM_ECC_STATUS, 0x3); - } - - data = RREG32_SOC15(GC, GET_INST(GC, xcc_id), regVML2_MEM_ECC_STATUS); - if (data) { - dev_warn(adev->dev, "GFX VML2 Mem Ecc Status: 0x%x!\n", data); - WREG32_SOC15(GC, GET_INST(GC, xcc_id), regVML2_MEM_ECC_STATUS, 0x3); - } - - data = RREG32_SOC15(GC, GET_INST(GC, xcc_id), - regVML2_WALKER_MEM_ECC_STATUS); - if (data) { - dev_warn(adev->dev, "GFX VML2 Walker Mem Ecc Status: 0x%x!\n", data); - WREG32_SOC15(GC, GET_INST(GC, xcc_id), regVML2_WALKER_MEM_ECC_STATUS, - 0x3); - } -} - -static void gfx_v9_4_3_log_cu_timeout_status(struct amdgpu_device *adev, - uint32_t status, int xcc_id) -{ - struct amdgpu_cu_info *cu_info = &adev->gfx.cu_info; - uint32_t i, simd, wave; - uint32_t wave_status; - uint32_t wave_pc_lo, wave_pc_hi; - uint32_t wave_exec_lo, wave_exec_hi; - uint32_t wave_inst_dw0, wave_inst_dw1; - uint32_t wave_ib_sts; - - for (i = 0; i < 32; i++) { - if (!((i << 1) & status)) - continue; - - simd = i / cu_info->max_waves_per_simd; - wave = i % cu_info->max_waves_per_simd; - - wave_status = wave_read_ind(adev, xcc_id, simd, wave, ixSQ_WAVE_STATUS); - wave_pc_lo = wave_read_ind(adev, xcc_id, simd, wave, ixSQ_WAVE_PC_LO); - wave_pc_hi = wave_read_ind(adev, xcc_id, simd, wave, ixSQ_WAVE_PC_HI); - wave_exec_lo = - wave_read_ind(adev, xcc_id, simd, wave, ixSQ_WAVE_EXEC_LO); - wave_exec_hi = - wave_read_ind(adev, xcc_id, simd, wave, ixSQ_WAVE_EXEC_HI); - wave_inst_dw0 = - wave_read_ind(adev, xcc_id, simd, wave, ixSQ_WAVE_INST_DW0); - wave_inst_dw1 = - wave_read_ind(adev, xcc_id, simd, wave, ixSQ_WAVE_INST_DW1); - wave_ib_sts = wave_read_ind(adev, xcc_id, simd, wave, ixSQ_WAVE_IB_STS); - - dev_info( - adev->dev, - "\t SIMD %d, Wave %d: status 0x%x, pc 0x%llx, exec 0x%llx, inst 0x%llx, ib_sts 0x%x\n", - simd, wave, wave_status, - ((uint64_t)wave_pc_hi << 32 | wave_pc_lo), - ((uint64_t)wave_exec_hi << 32 | wave_exec_lo), - ((uint64_t)wave_inst_dw1 << 32 | wave_inst_dw0), - wave_ib_sts); - } -} - -static void gfx_v9_4_3_inst_query_sq_timeout_status(struct amdgpu_device *adev, - int xcc_id) -{ - uint32_t se_idx, sh_idx, cu_idx; - uint32_t status; + /* handle extra register entries of UE */ + for (; i < ARRAY_SIZE(gfx_v9_4_3_ue_reg_list); i++) { + for (j = 0; j < gfx_v9_4_3_ue_reg_list[i].se_num; j++) { + for (k = 0; k < gfx_v9_4_3_ue_reg_list[i].reg_entry.reg_inst; k++) { + /* no need to select if instance number is 1 */ + if (gfx_v9_4_3_ue_reg_list[i].se_num > 1 || + gfx_v9_4_3_ue_reg_list[i].reg_entry.reg_inst > 1) + gfx_v9_4_3_xcc_select_se_sh(adev, j, 0, k, xcc_id); - mutex_lock(&adev->grbm_idx_mutex); - for (se_idx = 0; se_idx < adev->gfx.config.max_shader_engines; se_idx++) { - for (sh_idx = 0; sh_idx < adev->gfx.config.max_sh_per_se; sh_idx++) { - for (cu_idx = 0; cu_idx < adev->gfx.config.max_cu_per_sh; cu_idx++) { - gfx_v9_4_3_xcc_select_se_sh(adev, se_idx, sh_idx, - cu_idx, xcc_id); - status = RREG32_SOC15(GC, GET_INST(GC, xcc_id), - regSQ_TIMEOUT_STATUS); - if (status != 0) { - dev_info( - adev->dev, - "GFX Watchdog Timeout: SE %d, SH %d, CU %d\n", - se_idx, sh_idx, cu_idx); - gfx_v9_4_3_log_cu_timeout_status( - adev, status, xcc_id); - } - /* clear old status */ - WREG32_SOC15(GC, GET_INST(GC, xcc_id), - regSQ_TIMEOUT_STATUS, 0); + amdgpu_ras_inst_reset_ras_error_count(adev, + &(gfx_v9_4_3_ue_reg_list[i].reg_entry), + 1, + GET_INST(GC, xcc_id)); } } } - gfx_v9_4_3_xcc_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff, - xcc_id); - mutex_unlock(&adev->grbm_idx_mutex); -} - -static void gfx_v9_4_3_inst_query_ras_err_status(struct amdgpu_device *adev, - void *ras_error_status, int xcc_id) -{ - gfx_v9_4_3_inst_query_ea_err_status(adev, xcc_id); - gfx_v9_4_3_inst_query_utc_err_status(adev, xcc_id); - gfx_v9_4_3_inst_query_sq_timeout_status(adev, xcc_id); -} - -static void gfx_v9_4_3_inst_reset_utc_err_status(struct amdgpu_device *adev, - int xcc_id) -{ - WREG32_SOC15(GC, GET_INST(GC, xcc_id), regUTCL2_MEM_ECC_STATUS, 0x3); - WREG32_SOC15(GC, GET_INST(GC, xcc_id), regVML2_MEM_ECC_STATUS, 0x3); - WREG32_SOC15(GC, GET_INST(GC, xcc_id), regVML2_WALKER_MEM_ECC_STATUS, 0x3); -} -static void gfx_v9_4_3_inst_reset_ea_err_status(struct amdgpu_device *adev, - int xcc_id) -{ - uint32_t i, j; - uint32_t value; - - mutex_lock(&adev->grbm_idx_mutex); - for (i = 0; i < gfx_v9_4_3_ea_err_status_regs.se_num; i++) { - for (j = 0; j < gfx_v9_4_3_ea_err_status_regs.instance; j++) { - gfx_v9_4_3_xcc_select_se_sh(adev, i, 0, j, xcc_id); - value = RREG32_SOC15(GC, GET_INST(GC, xcc_id), regGCEA_ERR_STATUS); - value = REG_SET_FIELD(value, GCEA_ERR_STATUS, - CLEAR_ERROR_STATUS, 0x1); - WREG32_SOC15(GC, GET_INST(GC, xcc_id), regGCEA_ERR_STATUS, value); - } - } gfx_v9_4_3_xcc_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff, xcc_id); mutex_unlock(&adev->grbm_idx_mutex); } -static void gfx_v9_4_3_inst_reset_sq_timeout_status(struct amdgpu_device *adev, - int xcc_id) -{ - uint32_t se_idx, sh_idx, cu_idx; - - mutex_lock(&adev->grbm_idx_mutex); - for (se_idx = 0; se_idx < adev->gfx.config.max_shader_engines; se_idx++) { - for (sh_idx = 0; sh_idx < adev->gfx.config.max_sh_per_se; sh_idx++) { - for (cu_idx = 0; cu_idx < adev->gfx.config.max_cu_per_sh; cu_idx++) { - gfx_v9_4_3_xcc_select_se_sh(adev, se_idx, sh_idx, - cu_idx, xcc_id); - WREG32_SOC15(GC, GET_INST(GC, xcc_id), - regSQ_TIMEOUT_STATUS, 0); - } - } - } - gfx_v9_4_3_xcc_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff, - xcc_id); - mutex_unlock(&adev->grbm_idx_mutex); -} - -static void gfx_v9_4_3_inst_reset_ras_err_status(struct amdgpu_device *adev, - void *ras_error_status, int xcc_id) -{ - gfx_v9_4_3_inst_reset_utc_err_status(adev, xcc_id); - gfx_v9_4_3_inst_reset_ea_err_status(adev, xcc_id); - gfx_v9_4_3_inst_reset_sq_timeout_status(adev, xcc_id); -} - static void gfx_v9_4_3_inst_enable_watchdog_timer(struct amdgpu_device *adev, void *ras_error_status, int xcc_id) { uint32_t i; uint32_t data; + if (amdgpu_sriov_vf(adev)) + return; + data = RREG32_SOC15(GC, GET_INST(GC, 0), regSQ_TIMEOUT_CONFIG); data = REG_SET_FIELD(data, SQ_TIMEOUT_CONFIG, TIMEOUT_FATAL_DISABLE, amdgpu_watchdog_timer.timeout_fatal_disable ? 1 : 0); @@ -4088,16 +3995,6 @@ static void gfx_v9_4_3_reset_ras_error_count(struct amdgpu_device *adev) amdgpu_gfx_ras_error_func(adev, NULL, gfx_v9_4_3_inst_reset_ras_err_count); } -static void gfx_v9_4_3_query_ras_error_status(struct amdgpu_device *adev) -{ - amdgpu_gfx_ras_error_func(adev, NULL, gfx_v9_4_3_inst_query_ras_err_status); -} - -static void gfx_v9_4_3_reset_ras_error_status(struct amdgpu_device *adev) -{ - amdgpu_gfx_ras_error_func(adev, NULL, gfx_v9_4_3_inst_reset_ras_err_status); -} - static void gfx_v9_4_3_enable_watchdog_timer(struct amdgpu_device *adev) { amdgpu_gfx_ras_error_func(adev, NULL, gfx_v9_4_3_inst_enable_watchdog_timer); @@ -4360,7 +4257,7 @@ const struct amdgpu_ip_block_version gfx_v9_4_3_ip_block = { .type = AMD_IP_BLOCK_TYPE_GFX, .major = 9, .minor = 4, - .rev = 0, + .rev = 3, .funcs = &gfx_v9_4_3_ip_funcs, }; @@ -4415,13 +4312,34 @@ struct amdgpu_xcp_ip_funcs gfx_v9_4_3_xcp_funcs = { struct amdgpu_ras_block_hw_ops gfx_v9_4_3_ras_ops = { .query_ras_error_count = &gfx_v9_4_3_query_ras_error_count, .reset_ras_error_count = &gfx_v9_4_3_reset_ras_error_count, - .query_ras_error_status = &gfx_v9_4_3_query_ras_error_status, - .reset_ras_error_status = &gfx_v9_4_3_reset_ras_error_status, }; +static int gfx_v9_4_3_ras_late_init(struct amdgpu_device *adev, struct ras_common_if *ras_block) +{ + int r; + + r = amdgpu_ras_block_late_init(adev, ras_block); + if (r) + return r; + + r = amdgpu_ras_bind_aca(adev, AMDGPU_RAS_BLOCK__GFX, + &gfx_v9_4_3_aca_info, + NULL); + if (r) + goto late_fini; + + return 0; + +late_fini: + amdgpu_ras_block_late_fini(adev, ras_block); + + return r; +} + struct amdgpu_gfx_ras gfx_v9_4_3_ras = { .ras_block = { .hw_ops = &gfx_v9_4_3_ras_ops, + .ras_late_init = &gfx_v9_4_3_ras_late_init, }, .enable_watchdog_timer = &gfx_v9_4_3_enable_watchdog_timer, }; diff --git a/drivers/gpu/drm/amd/amdgpu/gfxhub_v1_0.c b/drivers/gpu/drm/amd/amdgpu/gfxhub_v1_0.c index 53a2ba5fcf4b..d200310d1731 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfxhub_v1_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfxhub_v1_0.c @@ -102,7 +102,9 @@ static void gfxhub_v1_0_init_system_aperture_regs(struct amdgpu_device *adev) WREG32_SOC15_RLC(GC, 0, mmMC_VM_SYSTEM_APERTURE_LOW_ADDR, min(adev->gmc.fb_start, adev->gmc.agp_start) >> 18); - if (adev->apu_flags & AMD_APU_IS_RAVEN2) + if (adev->apu_flags & (AMD_APU_IS_RAVEN2 | + AMD_APU_IS_RENOIR | + AMD_APU_IS_GREEN_SARDINE)) /* * Raven2 has a HW issue that it is unable to use the * vram which is out of MC_VM_SYSTEM_APERTURE_HIGH_ADDR. @@ -441,6 +443,22 @@ static void gfxhub_v1_0_init(struct amdgpu_device *adev) mmVM_INVALIDATE_ENG0_ADDR_RANGE_LO32; } +static bool gfxhub_v1_0_query_utcl2_poison_status(struct amdgpu_device *adev, + int xcc_id) +{ + u32 status = 0; + struct amdgpu_vmhub *hub; + + if (amdgpu_ip_version(adev, GC_HWIP, 0) != IP_VERSION(9, 4, 2)) + return false; + + hub = &adev->vmhub[AMDGPU_GFXHUB(0)]; + status = RREG32(hub->vm_l2_pro_fault_status); + /* reset page fault status */ + WREG32_P(hub->vm_l2_pro_fault_cntl, 1, ~1); + + return REG_GET_FIELD(status, VM_L2_PROTECTION_FAULT_STATUS, FED); +} const struct amdgpu_gfxhub_funcs gfxhub_v1_0_funcs = { .get_mc_fb_offset = gfxhub_v1_0_get_mc_fb_offset, @@ -450,4 +468,5 @@ const struct amdgpu_gfxhub_funcs gfxhub_v1_0_funcs = { .set_fault_enable_default = gfxhub_v1_0_set_fault_enable_default, .init = gfxhub_v1_0_init, .get_xgmi_info = gfxhub_v1_1_get_xgmi_info, + .query_utcl2_poison_status = gfxhub_v1_0_query_utcl2_poison_status, }; diff --git a/drivers/gpu/drm/amd/amdgpu/gfxhub_v1_2.c b/drivers/gpu/drm/amd/amdgpu/gfxhub_v1_2.c index 55423ff1bb49..77df8c9cbad2 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfxhub_v1_2.c +++ b/drivers/gpu/drm/amd/amdgpu/gfxhub_v1_2.c @@ -139,7 +139,9 @@ gfxhub_v1_2_xcc_init_system_aperture_regs(struct amdgpu_device *adev, WREG32_SOC15_RLC(GC, GET_INST(GC, i), regMC_VM_SYSTEM_APERTURE_LOW_ADDR, min(adev->gmc.fb_start, adev->gmc.agp_start) >> 18); - if (adev->apu_flags & AMD_APU_IS_RAVEN2) + if (adev->apu_flags & (AMD_APU_IS_RAVEN2 | + AMD_APU_IS_RENOIR | + AMD_APU_IS_GREEN_SARDINE)) /* * Raven2 has a HW issue that it is unable to use the * vram which is out of MC_VM_SYSTEM_APERTURE_HIGH_ADDR. @@ -454,10 +456,12 @@ static void gfxhub_v1_2_xcc_gart_disable(struct amdgpu_device *adev, WREG32_SOC15_RLC(GC, GET_INST(GC, j), regMC_VM_MX_L1_TLB_CNTL, tmp); /* Setup L2 cache */ - tmp = RREG32_SOC15(GC, GET_INST(GC, j), regVM_L2_CNTL); - tmp = REG_SET_FIELD(tmp, VM_L2_CNTL, ENABLE_L2_CACHE, 0); - WREG32_SOC15(GC, GET_INST(GC, j), regVM_L2_CNTL, tmp); - WREG32_SOC15(GC, GET_INST(GC, j), regVM_L2_CNTL3, 0); + if (!amdgpu_sriov_vf(adev)) { + tmp = RREG32_SOC15(GC, GET_INST(GC, j), regVM_L2_CNTL); + tmp = REG_SET_FIELD(tmp, VM_L2_CNTL, ENABLE_L2_CACHE, 0); + WREG32_SOC15(GC, GET_INST(GC, j), regVM_L2_CNTL, tmp); + WREG32_SOC15(GC, GET_INST(GC, j), regVM_L2_CNTL3, 0); + } } } @@ -616,6 +620,20 @@ static int gfxhub_v1_2_get_xgmi_info(struct amdgpu_device *adev) return 0; } +static bool gfxhub_v1_2_query_utcl2_poison_status(struct amdgpu_device *adev, + int xcc_id) +{ + u32 fed, status; + + status = RREG32_SOC15(GC, GET_INST(GC, xcc_id), regVM_L2_PROTECTION_FAULT_STATUS); + fed = REG_GET_FIELD(status, VM_L2_PROTECTION_FAULT_STATUS, FED); + /* reset page fault status */ + WREG32_P(SOC15_REG_OFFSET(GC, GET_INST(GC, xcc_id), + regVM_L2_PROTECTION_FAULT_STATUS), 1, ~1); + + return fed; +} + const struct amdgpu_gfxhub_funcs gfxhub_v1_2_funcs = { .get_mc_fb_offset = gfxhub_v1_2_get_mc_fb_offset, .setup_vm_pt_regs = gfxhub_v1_2_setup_vm_pt_regs, @@ -624,6 +642,7 @@ const struct amdgpu_gfxhub_funcs gfxhub_v1_2_funcs = { .set_fault_enable_default = gfxhub_v1_2_set_fault_enable_default, .init = gfxhub_v1_2_init, .get_xgmi_info = gfxhub_v1_2_get_xgmi_info, + .query_utcl2_poison_status = gfxhub_v1_2_query_utcl2_poison_status, }; static int gfxhub_v1_2_xcp_resume(void *handle, uint32_t inst_mask) diff --git a/drivers/gpu/drm/amd/amdgpu/gfxhub_v2_1.c b/drivers/gpu/drm/amd/amdgpu/gfxhub_v2_1.c index cd0e8a321e46..17509f32f61a 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfxhub_v2_1.c +++ b/drivers/gpu/drm/amd/amdgpu/gfxhub_v2_1.c @@ -155,6 +155,9 @@ static void gfxhub_v2_1_init_system_aperture_regs(struct amdgpu_device *adev) { uint64_t value; + if (amdgpu_sriov_vf(adev)) + return; + /* Program the AGP BAR */ WREG32_SOC15(GC, 0, mmGCMC_VM_AGP_BASE, 0); WREG32_SOC15(GC, 0, mmGCMC_VM_AGP_BOT, adev->gmc.agp_start >> 24); diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c index d8a4fddab9c1..d933e19e0cf5 100644 --- a/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c @@ -105,7 +105,7 @@ static int gmc_v10_0_process_interrupt(struct amdgpu_device *adev, struct amdgpu_vmhub *hub = &adev->vmhub[vmhub_index]; bool retry_fault = !!(entry->src_data[1] & 0x80); bool write_fault = !!(entry->src_data[1] & 0x20); - struct amdgpu_task_info task_info; + struct amdgpu_task_info *task_info; uint32_t status = 0; u64 addr; @@ -157,18 +157,22 @@ static int gmc_v10_0_process_interrupt(struct amdgpu_device *adev, if (!printk_ratelimit()) return 0; - memset(&task_info, 0, sizeof(struct amdgpu_task_info)); - amdgpu_vm_get_task_info(adev, entry->pasid, &task_info); - dev_err(adev->dev, - "[%s] page fault (src_id:%u ring:%u vmid:%u pasid:%u, for process %s pid %d thread %s pid %d)\n", + "[%s] page fault (src_id:%u ring:%u vmid:%u pasid:%u)\n", entry->vmid_src ? "mmhub" : "gfxhub", - entry->src_id, entry->ring_id, entry->vmid, - entry->pasid, task_info.process_name, task_info.tgid, - task_info.task_name, task_info.pid); + entry->src_id, entry->ring_id, entry->vmid, entry->pasid); + task_info = amdgpu_vm_get_task_info_pasid(adev, entry->pasid); + if (task_info) { + dev_err(adev->dev, + " in process %s pid %d thread %s pid %d\n", + task_info->process_name, task_info->tgid, + task_info->task_name, task_info->pid); + amdgpu_vm_put_task_info(task_info); + } + dev_err(adev->dev, " in page starting at address 0x%016llx from client 0x%x (%s)\n", - addr, entry->client_id, - soc15_ih_clientid_name[entry->client_id]); + addr, entry->client_id, + soc15_ih_clientid_name[entry->client_id]); if (!amdgpu_sriov_vf(adev)) hub->vmhub_funcs->print_l2_protection_fault_status(adev, @@ -262,16 +266,17 @@ static void gmc_v10_0_flush_gpu_tlb(struct amdgpu_device *adev, uint32_t vmid, /* flush hdp cache */ adev->hdp.funcs->flush_hdp(adev, NULL); - /* For SRIOV run time, driver shouldn't access the register through MMIO - * Directly use kiq to do the vm invalidation instead + /* This is necessary for SRIOV as well as for GFXOFF to function + * properly under bare metal */ if (adev->gfx.kiq[0].ring.sched.ready && !adev->enable_mes && (amdgpu_sriov_runtime(adev) || !amdgpu_sriov_vf(adev))) { - amdgpu_virt_kiq_reg_write_reg_wait(adev, req, ack, inv_req, - 1 << vmid); + amdgpu_gmc_fw_reg_write_reg_wait(adev, req, ack, inv_req, + 1 << vmid, GET_INST(GC, 0)); return; } + /* This path is needed before KIQ/MES/GFXOFF are set up */ hub_ip = (vmhub == AMDGPU_GFXHUB(0)) ? GC_HWIP : MMHUB_HWIP; spin_lock(&adev->gmc.invalidate_lock); @@ -672,9 +677,10 @@ static void gmc_v10_0_vram_gtt_location(struct amdgpu_device *adev, /* add the xgmi offset of the physical node */ base += adev->gmc.xgmi.physical_node_id * adev->gmc.xgmi.node_segment_size; + amdgpu_gmc_set_agp_default(adev, mc); amdgpu_gmc_vram_location(adev, &adev->gmc, base); amdgpu_gmc_gart_location(adev, mc, AMDGPU_GART_PLACEMENT_BEST_FIT); - if (!amdgpu_sriov_vf(adev)) + if (!amdgpu_sriov_vf(adev) && (amdgpu_agp == 1)) amdgpu_gmc_agp_location(adev, mc); /* base offset of vram pages */ @@ -1040,6 +1046,10 @@ static int gmc_v10_0_hw_fini(void *handle) amdgpu_irq_put(adev, &adev->gmc.vm_fault, 0); + if (adev->gmc.ecc_irq.funcs && + amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__UMC)) + amdgpu_irq_put(adev, &adev->gmc.ecc_irq, 0); + return 0; } diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v11_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v11_0.c index 19eaada35ede..527dc917e049 100644 --- a/drivers/gpu/drm/amd/amdgpu/gmc_v11_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v11_0.c @@ -73,7 +73,8 @@ gmc_v11_0_vm_fault_interrupt_state(struct amdgpu_device *adev, * fini/suspend, so the overall state doesn't * change over the course of suspend/resume. */ - if (!adev->in_s0ix) + if (!adev->in_s0ix && (adev->in_runpm || adev->in_suspend || + amdgpu_in_reset(adev))) amdgpu_gmc_set_vm_fault_masks(adev, AMDGPU_GFXHUB(0), false); break; case AMDGPU_IRQ_STATE_ENABLE: @@ -125,19 +126,24 @@ static int gmc_v11_0_process_interrupt(struct amdgpu_device *adev, } if (printk_ratelimit()) { - struct amdgpu_task_info task_info; - - memset(&task_info, 0, sizeof(struct amdgpu_task_info)); - amdgpu_vm_get_task_info(adev, entry->pasid, &task_info); + struct amdgpu_task_info *task_info; dev_err(adev->dev, - "[%s] page fault (src_id:%u ring:%u vmid:%u pasid:%u, for process %s pid %d thread %s pid %d)\n", + "[%s] page fault (src_id:%u ring:%u vmid:%u pasid:%u)\n", entry->vmid_src ? "mmhub" : "gfxhub", - entry->src_id, entry->ring_id, entry->vmid, - entry->pasid, task_info.process_name, task_info.tgid, - task_info.task_name, task_info.pid); + entry->src_id, entry->ring_id, entry->vmid, entry->pasid); + task_info = amdgpu_vm_get_task_info_pasid(adev, entry->pasid); + if (task_info) { + dev_err(adev->dev, + " in process %s pid %d thread %s pid %d)\n", + task_info->process_name, task_info->tgid, + task_info->task_name, task_info->pid); + amdgpu_vm_put_task_info(task_info); + } + dev_err(adev->dev, " in page starting at address 0x%016llx from client %d\n", - addr, entry->client_id); + addr, entry->client_id); + if (!amdgpu_sriov_vf(adev)) hub->vmhub_funcs->print_l2_protection_fault_status(adev, status); } @@ -222,16 +228,17 @@ static void gmc_v11_0_flush_gpu_tlb(struct amdgpu_device *adev, uint32_t vmid, /* flush hdp cache */ adev->hdp.funcs->flush_hdp(adev, NULL); - /* For SRIOV run time, driver shouldn't access the register through MMIO - * Directly use kiq to do the vm invalidation instead + /* This is necessary for SRIOV as well as for GFXOFF to function + * properly under bare metal */ if ((adev->gfx.kiq[0].ring.sched.ready || adev->mes.ring.sched.ready) && (amdgpu_sriov_runtime(adev) || !amdgpu_sriov_vf(adev))) { - amdgpu_virt_kiq_reg_write_reg_wait(adev, req, ack, inv_req, - 1 << vmid); + amdgpu_gmc_fw_reg_write_reg_wait(adev, req, ack, inv_req, + 1 << vmid, GET_INST(GC, 0)); return; } + /* This path is needed before KIQ/MES/GFXOFF are set up */ hub_ip = (vmhub == AMDGPU_GFXHUB(0)) ? GC_HWIP : MMHUB_HWIP; spin_lock(&adev->gmc.invalidate_lock); @@ -569,6 +576,7 @@ static void gmc_v11_0_set_mmhub_funcs(struct amdgpu_device *adev) adev->mmhub.funcs = &mmhub_v3_0_2_funcs; break; case IP_VERSION(3, 3, 0): + case IP_VERSION(3, 3, 1): adev->mmhub.funcs = &mmhub_v3_3_funcs; break; default: @@ -584,6 +592,7 @@ static void gmc_v11_0_set_gfxhub_funcs(struct amdgpu_device *adev) adev->gfxhub.funcs = &gfxhub_v3_0_3_funcs; break; case IP_VERSION(11, 5, 0): + case IP_VERSION(11, 5, 1): adev->gfxhub.funcs = &gfxhub_v11_5_0_funcs; break; default: @@ -636,10 +645,12 @@ static void gmc_v11_0_vram_gtt_location(struct amdgpu_device *adev, base = adev->mmhub.funcs->get_fb_location(adev); + amdgpu_gmc_set_agp_default(adev, mc); amdgpu_gmc_vram_location(adev, &adev->gmc, base); amdgpu_gmc_gart_location(adev, mc, AMDGPU_GART_PLACEMENT_HIGH); - if (!amdgpu_sriov_vf(adev) || - (amdgpu_ip_version(adev, GC_HWIP, 0) < IP_VERSION(11, 5, 0))) + if (!amdgpu_sriov_vf(adev) && + (amdgpu_ip_version(adev, GC_HWIP, 0) < IP_VERSION(11, 5, 0)) && + (amdgpu_agp == 1)) amdgpu_gmc_agp_location(adev, mc); /* base offset of vram pages */ @@ -743,6 +754,7 @@ static int gmc_v11_0_sw_init(void *handle) case IP_VERSION(11, 0, 3): case IP_VERSION(11, 0, 4): case IP_VERSION(11, 5, 0): + case IP_VERSION(11, 5, 1): set_bit(AMDGPU_GFXHUB(0), adev->vmhubs_mask); set_bit(AMDGPU_MMHUB0(0), adev->vmhubs_mask); /* @@ -938,6 +950,11 @@ static int gmc_v11_0_hw_fini(void *handle) } amdgpu_irq_put(adev, &adev->gmc.vm_fault, 0); + + if (adev->gmc.ecc_irq.funcs && + amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__UMC)) + amdgpu_irq_put(adev, &adev->gmc.ecc_irq, 0); + gmc_v11_0_gart_disable(adev); return 0; diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v6_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v6_0.c index 7f66954fd302..23b478639921 100644 --- a/drivers/gpu/drm/amd/amdgpu/gmc_v6_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v6_0.c @@ -211,6 +211,7 @@ static void gmc_v6_0_vram_gtt_location(struct amdgpu_device *adev, base <<= 24; + amdgpu_gmc_set_agp_default(adev, mc); amdgpu_gmc_vram_location(adev, mc, base); amdgpu_gmc_gart_location(adev, mc, AMDGPU_GART_PLACEMENT_BEST_FIT); } @@ -434,9 +435,10 @@ static void gmc_v6_0_set_prt(struct amdgpu_device *adev, bool enable) WREG32(mmVM_PRT_CNTL, tmp); if (enable) { - uint32_t low = AMDGPU_VA_RESERVED_SIZE >> AMDGPU_GPU_PAGE_SHIFT; + uint32_t low = AMDGPU_VA_RESERVED_BOTTOM >> + AMDGPU_GPU_PAGE_SHIFT; uint32_t high = adev->vm_manager.max_pfn - - (AMDGPU_VA_RESERVED_SIZE >> AMDGPU_GPU_PAGE_SHIFT); + (AMDGPU_VA_RESERVED_TOP >> AMDGPU_GPU_PAGE_SHIFT); WREG32(mmVM_PRT_APERTURE0_LOW_ADDR, low); WREG32(mmVM_PRT_APERTURE1_LOW_ADDR, low); @@ -914,8 +916,8 @@ static int gmc_v6_0_hw_init(void *handle) if (amdgpu_emu_mode == 1) return amdgpu_gmc_vram_checking(adev); - else - return r; + + return 0; } static int gmc_v6_0_hw_fini(void *handle) diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c index 61ca1a82b651..3da7b6a2b00d 100644 --- a/drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c @@ -239,6 +239,7 @@ static void gmc_v7_0_vram_gtt_location(struct amdgpu_device *adev, base <<= 24; + amdgpu_gmc_set_agp_default(adev, mc); amdgpu_gmc_vram_location(adev, mc, base); amdgpu_gmc_gart_location(adev, mc, AMDGPU_GART_PLACEMENT_BEST_FIT); } @@ -562,9 +563,10 @@ static void gmc_v7_0_set_prt(struct amdgpu_device *adev, bool enable) WREG32(mmVM_PRT_CNTL, tmp); if (enable) { - uint32_t low = AMDGPU_VA_RESERVED_SIZE >> AMDGPU_GPU_PAGE_SHIFT; + uint32_t low = AMDGPU_VA_RESERVED_BOTTOM >> + AMDGPU_GPU_PAGE_SHIFT; uint32_t high = adev->vm_manager.max_pfn - - (AMDGPU_VA_RESERVED_SIZE >> AMDGPU_GPU_PAGE_SHIFT); + (AMDGPU_VA_RESERVED_TOP >> AMDGPU_GPU_PAGE_SHIFT); WREG32(mmVM_PRT_APERTURE0_LOW_ADDR, low); WREG32(mmVM_PRT_APERTURE1_LOW_ADDR, low); @@ -1098,8 +1100,8 @@ static int gmc_v7_0_hw_init(void *handle) if (amdgpu_emu_mode == 1) return amdgpu_gmc_vram_checking(adev); - else - return r; + + return 0; } static int gmc_v7_0_hw_fini(void *handle) diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c index fa59749c2aef..d20e5f20ee31 100644 --- a/drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c @@ -413,6 +413,7 @@ static void gmc_v8_0_vram_gtt_location(struct amdgpu_device *adev, base = RREG32(mmMC_VM_FB_LOCATION) & 0xFFFF; base <<= 24; + amdgpu_gmc_set_agp_default(adev, mc); amdgpu_gmc_vram_location(adev, mc, base); amdgpu_gmc_gart_location(adev, mc, AMDGPU_GART_PLACEMENT_BEST_FIT); } @@ -776,9 +777,10 @@ static void gmc_v8_0_set_prt(struct amdgpu_device *adev, bool enable) WREG32(mmVM_PRT_CNTL, tmp); if (enable) { - uint32_t low = AMDGPU_VA_RESERVED_SIZE >> AMDGPU_GPU_PAGE_SHIFT; + uint32_t low = AMDGPU_VA_RESERVED_BOTTOM >> + AMDGPU_GPU_PAGE_SHIFT; uint32_t high = adev->vm_manager.max_pfn - - (AMDGPU_VA_RESERVED_SIZE >> AMDGPU_GPU_PAGE_SHIFT); + (AMDGPU_VA_RESERVED_TOP >> AMDGPU_GPU_PAGE_SHIFT); WREG32(mmVM_PRT_APERTURE0_LOW_ADDR, low); WREG32(mmVM_PRT_APERTURE1_LOW_ADDR, low); @@ -1218,8 +1220,8 @@ static int gmc_v8_0_hw_init(void *handle) if (amdgpu_emu_mode == 1) return amdgpu_gmc_vram_checking(adev); - else - return r; + + return 0; } static int gmc_v8_0_hw_fini(void *handle) @@ -1443,18 +1445,24 @@ static int gmc_v8_0_process_interrupt(struct amdgpu_device *adev, gmc_v8_0_set_fault_enable_default(adev, false); if (printk_ratelimit()) { - struct amdgpu_task_info task_info; + struct amdgpu_task_info *task_info; - memset(&task_info, 0, sizeof(struct amdgpu_task_info)); - amdgpu_vm_get_task_info(adev, entry->pasid, &task_info); + dev_err(adev->dev, "GPU fault detected: %d 0x%08x\n", + entry->src_id, entry->src_data[0]); + + task_info = amdgpu_vm_get_task_info_pasid(adev, entry->pasid); + if (task_info) { + dev_err(adev->dev, " for process %s pid %d thread %s pid %d\n", + task_info->process_name, task_info->tgid, + task_info->task_name, task_info->pid); + amdgpu_vm_put_task_info(task_info); + } - dev_err(adev->dev, "GPU fault detected: %d 0x%08x for process %s pid %d thread %s pid %d\n", - entry->src_id, entry->src_data[0], task_info.process_name, - task_info.tgid, task_info.task_name, task_info.pid); dev_err(adev->dev, " VM_CONTEXT1_PROTECTION_FAULT_ADDR 0x%08X\n", - addr); + addr); dev_err(adev->dev, " VM_CONTEXT1_PROTECTION_FAULT_STATUS 0x%08X\n", status); + gmc_v8_0_vm_decode_fault(adev, status, addr, mc_client, entry->pasid); } diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c index 3a1050344b59..c4ec1358f3aa 100644 --- a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c @@ -496,14 +496,14 @@ static int gmc_v9_0_vm_fault_interrupt_state(struct amdgpu_device *adev, if (j >= AMDGPU_MMHUB0(0)) tmp = RREG32_SOC15_IP(MMHUB, reg); else - tmp = RREG32_SOC15_IP(GC, reg); + tmp = RREG32_XCC(reg, j); tmp &= ~bits; if (j >= AMDGPU_MMHUB0(0)) WREG32_SOC15_IP(MMHUB, reg, tmp); else - WREG32_SOC15_IP(GC, reg, tmp); + WREG32_XCC(reg, tmp, j); } } break; @@ -524,14 +524,14 @@ static int gmc_v9_0_vm_fault_interrupt_state(struct amdgpu_device *adev, if (j >= AMDGPU_MMHUB0(0)) tmp = RREG32_SOC15_IP(MMHUB, reg); else - tmp = RREG32_SOC15_IP(GC, reg); + tmp = RREG32_XCC(reg, j); tmp |= bits; if (j >= AMDGPU_MMHUB0(0)) WREG32_SOC15_IP(MMHUB, reg, tmp); else - WREG32_SOC15_IP(GC, reg, tmp); + WREG32_XCC(reg, tmp, j); } } break; @@ -548,8 +548,8 @@ static int gmc_v9_0_process_interrupt(struct amdgpu_device *adev, { bool retry_fault = !!(entry->src_data[1] & 0x80); bool write_fault = !!(entry->src_data[1] & 0x20); - uint32_t status = 0, cid = 0, rw = 0; - struct amdgpu_task_info task_info; + uint32_t status = 0, cid = 0, rw = 0, fed = 0; + struct amdgpu_task_info *task_info; struct amdgpu_vmhub *hub; const char *mmhub_cid; const char *hub_name; @@ -626,15 +626,20 @@ static int gmc_v9_0_process_interrupt(struct amdgpu_device *adev, if (!printk_ratelimit()) return 0; - memset(&task_info, 0, sizeof(struct amdgpu_task_info)); - amdgpu_vm_get_task_info(adev, entry->pasid, &task_info); - dev_err(adev->dev, - "[%s] %s page fault (src_id:%u ring:%u vmid:%u pasid:%u, for process %s pid %d thread %s pid %d)\n", - hub_name, retry_fault ? "retry" : "no-retry", - entry->src_id, entry->ring_id, entry->vmid, - entry->pasid, task_info.process_name, task_info.tgid, - task_info.task_name, task_info.pid); + "[%s] %s page fault (src_id:%u ring:%u vmid:%u pasid:%u)\n", hub_name, + retry_fault ? "retry" : "no-retry", + entry->src_id, entry->ring_id, entry->vmid, entry->pasid); + + task_info = amdgpu_vm_get_task_info_pasid(adev, entry->pasid); + if (task_info) { + dev_err(adev->dev, + " for process %s pid %d thread %s pid %d)\n", + task_info->process_name, task_info->tgid, + task_info->task_name, task_info->pid); + amdgpu_vm_put_task_info(task_info); + } + dev_err(adev->dev, " in page starting at address 0x%016llx from IH client 0x%x (%s)\n", addr, entry->client_id, soc15_ih_clientid_name[entry->client_id]); @@ -659,6 +664,13 @@ static int gmc_v9_0_process_interrupt(struct amdgpu_device *adev, status = RREG32(hub->vm_l2_pro_fault_status); cid = REG_GET_FIELD(status, VM_L2_PROTECTION_FAULT_STATUS, CID); rw = REG_GET_FIELD(status, VM_L2_PROTECTION_FAULT_STATUS, RW); + fed = REG_GET_FIELD(status, VM_L2_PROTECTION_FAULT_STATUS, FED); + + /* for fed error, kfd will handle it, return directly */ + if (fed && amdgpu_ras_is_poison_mode_supported(adev) && + (amdgpu_ip_version(adev, GC_HWIP, 0) >= IP_VERSION(9, 4, 2))) + return 0; + WREG32_P(hub->vm_l2_pro_fault_cntl, 1, ~1); amdgpu_vm_update_fault_cache(adev, entry->pasid, addr, status, vmhub); @@ -817,7 +829,7 @@ static void gmc_v9_0_flush_gpu_tlb(struct amdgpu_device *adev, uint32_t vmid, uint32_t vmhub, uint32_t flush_type) { bool use_semaphore = gmc_v9_0_use_invalidate_semaphore(adev, vmhub); - u32 j, inv_req, tmp, sem, req, ack; + u32 j, inv_req, tmp, sem, req, ack, inst; const unsigned int eng = 17; struct amdgpu_vmhub *hub; @@ -829,19 +841,25 @@ static void gmc_v9_0_flush_gpu_tlb(struct amdgpu_device *adev, uint32_t vmid, req = hub->vm_inv_eng0_req + hub->eng_distance * eng; ack = hub->vm_inv_eng0_ack + hub->eng_distance * eng; - /* This is necessary for a HW workaround under SRIOV as well - * as GFXOFF under bare metal + if (vmhub >= AMDGPU_MMHUB0(0)) + inst = GET_INST(GC, 0); + else + inst = vmhub; + + /* This is necessary for SRIOV as well as for GFXOFF to function + * properly under bare metal */ - if (adev->gfx.kiq[0].ring.sched.ready && + if (adev->gfx.kiq[inst].ring.sched.ready && (amdgpu_sriov_runtime(adev) || !amdgpu_sriov_vf(adev))) { uint32_t req = hub->vm_inv_eng0_req + hub->eng_distance * eng; uint32_t ack = hub->vm_inv_eng0_ack + hub->eng_distance * eng; - amdgpu_virt_kiq_reg_write_reg_wait(adev, req, ack, inv_req, - 1 << vmid); + amdgpu_gmc_fw_reg_write_reg_wait(adev, req, ack, inv_req, + 1 << vmid, inst); return; } + /* This path is needed before KIQ/MES/GFXOFF are set up */ spin_lock(&adev->gmc.invalidate_lock); /* @@ -856,9 +874,9 @@ static void gmc_v9_0_flush_gpu_tlb(struct amdgpu_device *adev, uint32_t vmid, for (j = 0; j < adev->usec_timeout; j++) { /* a read return value of 1 means semaphore acquire */ if (vmhub >= AMDGPU_MMHUB0(0)) - tmp = RREG32_SOC15_IP_NO_KIQ(MMHUB, sem); + tmp = RREG32_SOC15_IP_NO_KIQ(MMHUB, sem, inst); else - tmp = RREG32_SOC15_IP_NO_KIQ(GC, sem); + tmp = RREG32_SOC15_IP_NO_KIQ(GC, sem, inst); if (tmp & 0x1) break; udelay(1); @@ -869,9 +887,9 @@ static void gmc_v9_0_flush_gpu_tlb(struct amdgpu_device *adev, uint32_t vmid, } if (vmhub >= AMDGPU_MMHUB0(0)) - WREG32_SOC15_IP_NO_KIQ(MMHUB, req, inv_req); + WREG32_SOC15_IP_NO_KIQ(MMHUB, req, inv_req, inst); else - WREG32_SOC15_IP_NO_KIQ(GC, req, inv_req); + WREG32_SOC15_IP_NO_KIQ(GC, req, inv_req, inst); /* * Issue a dummy read to wait for the ACK register to @@ -879,14 +897,14 @@ static void gmc_v9_0_flush_gpu_tlb(struct amdgpu_device *adev, uint32_t vmid, * GRBM interface. */ if ((vmhub == AMDGPU_GFXHUB(0)) && - (adev->ip_versions[GC_HWIP][0] < IP_VERSION(9, 4, 2))) + (amdgpu_ip_version(adev, GC_HWIP, 0) < IP_VERSION(9, 4, 2))) RREG32_NO_KIQ(req); for (j = 0; j < adev->usec_timeout; j++) { if (vmhub >= AMDGPU_MMHUB0(0)) - tmp = RREG32_SOC15_IP_NO_KIQ(MMHUB, ack); + tmp = RREG32_SOC15_IP_NO_KIQ(MMHUB, ack, inst); else - tmp = RREG32_SOC15_IP_NO_KIQ(GC, ack); + tmp = RREG32_SOC15_IP_NO_KIQ(GC, ack, inst); if (tmp & (1 << vmid)) break; udelay(1); @@ -899,9 +917,9 @@ static void gmc_v9_0_flush_gpu_tlb(struct amdgpu_device *adev, uint32_t vmid, * write with 0 means semaphore release */ if (vmhub >= AMDGPU_MMHUB0(0)) - WREG32_SOC15_IP_NO_KIQ(MMHUB, sem, 0); + WREG32_SOC15_IP_NO_KIQ(MMHUB, sem, 0, inst); else - WREG32_SOC15_IP_NO_KIQ(GC, sem, 0); + WREG32_SOC15_IP_NO_KIQ(GC, sem, 0, inst); } spin_unlock(&adev->gmc.invalidate_lock); @@ -1176,7 +1194,10 @@ static void gmc_v9_0_get_coherence_flags(struct amdgpu_device *adev, if (uncached) { mtype = MTYPE_UC; } else if (ext_coherent) { - mtype = is_local ? MTYPE_CC : MTYPE_UC; + if (adev->rev_id) + mtype = is_local ? MTYPE_CC : MTYPE_UC; + else + mtype = MTYPE_UC; } else if (adev->flags & AMD_IS_APU) { mtype = is_local ? mtype_local : MTYPE_NC; } else { @@ -1251,12 +1272,15 @@ static void gmc_v9_0_override_vm_pte_flags(struct amdgpu_device *adev, return; } - /* Only override mappings with MTYPE_NC, which is the safe default for - * cacheable memory. + /* MTYPE_NC is the same default and can be overridden. + * MTYPE_UC will be present if the memory is extended-coherent + * and can also be overridden. */ if ((*flags & AMDGPU_PTE_MTYPE_VG10_MASK) != - AMDGPU_PTE_MTYPE_VG10(MTYPE_NC)) { - dev_dbg_ratelimited(adev->dev, "MTYPE is not NC\n"); + AMDGPU_PTE_MTYPE_VG10(MTYPE_NC) && + (*flags & AMDGPU_PTE_MTYPE_VG10_MASK) != + AMDGPU_PTE_MTYPE_VG10(MTYPE_UC)) { + dev_dbg_ratelimited(adev->dev, "MTYPE is not NC or UC\n"); return; } @@ -1283,15 +1307,23 @@ static void gmc_v9_0_override_vm_pte_flags(struct amdgpu_device *adev, vm->mem_id, local_node, nid); if (nid == local_node) { uint64_t old_flags = *flags; - unsigned int mtype_local = MTYPE_RW; - - if (amdgpu_mtype_local == 1) - mtype_local = MTYPE_NC; - else if (amdgpu_mtype_local == 2) - mtype_local = MTYPE_CC; + if ((*flags & AMDGPU_PTE_MTYPE_VG10_MASK) == + AMDGPU_PTE_MTYPE_VG10(MTYPE_NC)) { + unsigned int mtype_local = MTYPE_RW; + + if (amdgpu_mtype_local == 1) + mtype_local = MTYPE_NC; + else if (amdgpu_mtype_local == 2) + mtype_local = MTYPE_CC; + + *flags = (*flags & ~AMDGPU_PTE_MTYPE_VG10_MASK) | + AMDGPU_PTE_MTYPE_VG10(mtype_local); + } else if (adev->rev_id) { + /* MTYPE_UC case */ + *flags = (*flags & ~AMDGPU_PTE_MTYPE_VG10_MASK) | + AMDGPU_PTE_MTYPE_VG10(MTYPE_CC); + } - *flags = (*flags & ~AMDGPU_PTE_MTYPE_VG10_MASK) | - AMDGPU_PTE_MTYPE_VG10(mtype_local); dev_dbg_ratelimited(adev->dev, "flags updated from %llx to %llx\n", old_flags, *flags); } @@ -1425,7 +1457,6 @@ static void gmc_v9_0_set_umc_funcs(struct amdgpu_device *adev) adev->umc.channel_offs = UMC_V12_0_PER_CHANNEL_OFFSET; adev->umc.active_mask = adev->aid_mask; adev->umc.retire_unit = UMC_V12_0_BAD_PAGE_NUM_PER_CHANNEL; - adev->umc.channel_idx_tbl = &umc_v12_0_channel_idx_tbl[0][0][0]; if (!adev->gmc.xgmi.connected_to_cpu && !adev->gmc.is_app_apu) adev->umc.ras = &umc_v12_0_ras; break; @@ -1587,13 +1618,8 @@ static int gmc_v9_0_late_init(void *handle) } if (!amdgpu_persistent_edc_harvesting_supported(adev)) { - if (adev->mmhub.ras && adev->mmhub.ras->ras_block.hw_ops && - adev->mmhub.ras->ras_block.hw_ops->reset_ras_error_count) - adev->mmhub.ras->ras_block.hw_ops->reset_ras_error_count(adev); - - if (adev->hdp.ras && adev->hdp.ras->ras_block.hw_ops && - adev->hdp.ras->ras_block.hw_ops->reset_ras_error_count) - adev->hdp.ras->ras_block.hw_ops->reset_ras_error_count(adev); + amdgpu_ras_reset_error_count(adev, AMDGPU_RAS_BLOCK__MMHUB); + amdgpu_ras_reset_error_count(adev, AMDGPU_RAS_BLOCK__HDP); } r = amdgpu_gmc_ras_late_init(adev); @@ -1608,6 +1634,8 @@ static void gmc_v9_0_vram_gtt_location(struct amdgpu_device *adev, { u64 base = adev->mmhub.funcs->get_fb_location(adev); + amdgpu_gmc_set_agp_default(adev, mc); + /* add the xgmi offset of the physical node */ base += adev->gmc.xgmi.physical_node_id * adev->gmc.xgmi.node_segment_size; if (adev->gmc.xgmi.connected_to_cpu) { @@ -1615,7 +1643,7 @@ static void gmc_v9_0_vram_gtt_location(struct amdgpu_device *adev, } else { amdgpu_gmc_vram_location(adev, mc, base); amdgpu_gmc_gart_location(adev, mc, AMDGPU_GART_PLACEMENT_BEST_FIT); - if (!amdgpu_sriov_vf(adev)) + if (!amdgpu_sriov_vf(adev) && (amdgpu_agp == 1)) amdgpu_gmc_agp_location(adev, mc); } /* base offset of vram pages */ @@ -1932,13 +1960,6 @@ static int gmc_v9_0_init_mem_ranges(struct amdgpu_device *adev) static void gmc_v9_4_3_init_vram_info(struct amdgpu_device *adev) { - static const u32 regBIF_BIOS_SCRATCH_4 = 0x50; - u32 vram_info; - - if (!amdgpu_sriov_vf(adev)) { - vram_info = RREG32(regBIF_BIOS_SCRATCH_4); - adev->gmc.vram_vendor = vram_info & 0xF; - } adev->gmc.vram_type = AMDGPU_VRAM_TYPE_HBM; adev->gmc.vram_width = 128 * 64; } @@ -2023,11 +2044,8 @@ static int gmc_v9_0_sw_init(void *handle) * vm size is 256TB (48bit), maximum size of Vega10, * block size 512 (9bit) */ - /* sriov restrict max_pfn below AMDGPU_GMC_HOLE */ - if (amdgpu_sriov_vf(adev)) - amdgpu_vm_adjust_size(adev, 256 * 1024, 9, 3, 47); - else - amdgpu_vm_adjust_size(adev, 256 * 1024, 9, 3, 48); + + amdgpu_vm_adjust_size(adev, 256 * 1024, 9, 3, 48); if (amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(9, 4, 2)) adev->gmc.translate_further = adev->vm_manager.num_level > 1; break; @@ -2158,8 +2176,6 @@ static int gmc_v9_0_sw_fini(void *handle) if (amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(9, 4, 3)) amdgpu_gmc_sysfs_fini(adev); - adev->gmc.num_mem_partitions = 0; - kfree(adev->gmc.mem_partitions); amdgpu_gmc_ras_fini(adev); amdgpu_gem_force_release(adev); @@ -2173,6 +2189,9 @@ static int gmc_v9_0_sw_fini(void *handle) amdgpu_bo_free_kernel(&adev->gmc.pdb0_bo, NULL, &adev->gmc.ptr_pdb0); amdgpu_bo_fini(adev); + adev->gmc.num_mem_partitions = 0; + kfree(adev->gmc.mem_partitions); + return 0; } @@ -2327,8 +2346,8 @@ static int gmc_v9_0_hw_init(void *handle) if (amdgpu_emu_mode == 1) return amdgpu_gmc_vram_checking(adev); - else - return r; + + return 0; } /** @@ -2367,6 +2386,10 @@ static int gmc_v9_0_hw_fini(void *handle) amdgpu_irq_put(adev, &adev->gmc.vm_fault, 0); + if (adev->gmc.ecc_irq.funcs && + amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__UMC)) + amdgpu_irq_put(adev, &adev->gmc.ecc_irq, 0); + return 0; } diff --git a/drivers/gpu/drm/amd/amdgpu/hdp_v4_0.c b/drivers/gpu/drm/amd/amdgpu/hdp_v4_0.c index 3f3a6445c006..4db6bb73ead4 100644 --- a/drivers/gpu/drm/amd/amdgpu/hdp_v4_0.c +++ b/drivers/gpu/drm/amd/amdgpu/hdp_v4_0.c @@ -129,6 +129,11 @@ static void hdp_v4_0_get_clockgating_state(struct amdgpu_device *adev, { int data; + if (amdgpu_ip_version(adev, HDP_HWIP, 0) == IP_VERSION(4, 4, 2)) { + /* Default enabled */ + *flags |= AMD_CG_SUPPORT_HDP_MGCG; + return; + } /* AMD_CG_SUPPORT_HDP_LS */ data = RREG32(SOC15_REG_OFFSET(HDP, 0, mmHDP_MEM_POWER_LS)); if (data & HDP_MEM_POWER_LS__LS_ENABLE_MASK) @@ -145,6 +150,10 @@ static void hdp_v4_0_init_registers(struct amdgpu_device *adev) break; } + /* Do not program registers if VF */ + if (amdgpu_sriov_vf(adev)) + return; + WREG32_FIELD15(HDP, 0, HDP_MISC_CNTL, FLUSH_INVALIDATE_CACHE, 1); if (amdgpu_ip_version(adev, HDP_HWIP, 0) == IP_VERSION(4, 4, 0)) diff --git a/drivers/gpu/drm/amd/amdgpu/hdp_v7_0.c b/drivers/gpu/drm/amd/amdgpu/hdp_v7_0.c new file mode 100644 index 000000000000..8d7d0813e331 --- /dev/null +++ b/drivers/gpu/drm/amd/amdgpu/hdp_v7_0.c @@ -0,0 +1,142 @@ +/* + * Copyright 2023 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + */ +#include "amdgpu.h" +#include "amdgpu_atombios.h" +#include "hdp_v7_0.h" + +#include "hdp/hdp_7_0_0_offset.h" +#include "hdp/hdp_7_0_0_sh_mask.h" +#include <uapi/linux/kfd_ioctl.h> + +static void hdp_v7_0_flush_hdp(struct amdgpu_device *adev, + struct amdgpu_ring *ring) +{ + if (!ring || !ring->funcs->emit_wreg) + WREG32_NO_KIQ((adev->rmmio_remap.reg_offset + KFD_MMIO_REMAP_HDP_MEM_FLUSH_CNTL) >> 2, 0); + else + amdgpu_ring_emit_wreg(ring, (adev->rmmio_remap.reg_offset + KFD_MMIO_REMAP_HDP_MEM_FLUSH_CNTL) >> 2, 0); +} + +static void hdp_v7_0_update_clock_gating(struct amdgpu_device *adev, + bool enable) +{ + uint32_t hdp_clk_cntl, hdp_clk_cntl1; + uint32_t hdp_mem_pwr_cntl; + + if (!(adev->cg_flags & (AMD_CG_SUPPORT_HDP_LS | + AMD_CG_SUPPORT_HDP_DS | + AMD_CG_SUPPORT_HDP_SD))) + return; + + hdp_clk_cntl = hdp_clk_cntl1 = RREG32_SOC15(HDP, 0,regHDP_CLK_CNTL); + hdp_mem_pwr_cntl = RREG32_SOC15(HDP, 0, regHDP_MEM_POWER_CTRL); + + /* Before doing clock/power mode switch, + * forced on IPH & RC clock */ + hdp_clk_cntl = REG_SET_FIELD(hdp_clk_cntl, HDP_CLK_CNTL, + RC_MEM_CLK_SOFT_OVERRIDE, 1); + WREG32_SOC15(HDP, 0, regHDP_CLK_CNTL, hdp_clk_cntl); + + /* disable clock and power gating before any changing */ + hdp_mem_pwr_cntl = REG_SET_FIELD(hdp_mem_pwr_cntl, HDP_MEM_POWER_CTRL, + ATOMIC_MEM_POWER_CTRL_EN, 0); + hdp_mem_pwr_cntl = REG_SET_FIELD(hdp_mem_pwr_cntl, HDP_MEM_POWER_CTRL, + ATOMIC_MEM_POWER_LS_EN, 0); + hdp_mem_pwr_cntl = REG_SET_FIELD(hdp_mem_pwr_cntl, HDP_MEM_POWER_CTRL, + ATOMIC_MEM_POWER_DS_EN, 0); + hdp_mem_pwr_cntl = REG_SET_FIELD(hdp_mem_pwr_cntl, HDP_MEM_POWER_CTRL, + ATOMIC_MEM_POWER_SD_EN, 0); + hdp_mem_pwr_cntl = REG_SET_FIELD(hdp_mem_pwr_cntl, HDP_MEM_POWER_CTRL, + RC_MEM_POWER_CTRL_EN, 0); + hdp_mem_pwr_cntl = REG_SET_FIELD(hdp_mem_pwr_cntl, HDP_MEM_POWER_CTRL, + RC_MEM_POWER_LS_EN, 0); + hdp_mem_pwr_cntl = REG_SET_FIELD(hdp_mem_pwr_cntl, HDP_MEM_POWER_CTRL, + RC_MEM_POWER_DS_EN, 0); + hdp_mem_pwr_cntl = REG_SET_FIELD(hdp_mem_pwr_cntl, HDP_MEM_POWER_CTRL, + RC_MEM_POWER_SD_EN, 0); + WREG32_SOC15(HDP, 0, regHDP_MEM_POWER_CTRL, hdp_mem_pwr_cntl); + + /* Already disabled above. The actions below are for "enabled" only */ + if (enable) { + /* only one clock gating mode (LS/DS/SD) can be enabled */ + if (adev->cg_flags & AMD_CG_SUPPORT_HDP_SD) { + hdp_mem_pwr_cntl = REG_SET_FIELD(hdp_mem_pwr_cntl, + HDP_MEM_POWER_CTRL, + ATOMIC_MEM_POWER_SD_EN, 1); + hdp_mem_pwr_cntl = REG_SET_FIELD(hdp_mem_pwr_cntl, + HDP_MEM_POWER_CTRL, + RC_MEM_POWER_SD_EN, 1); + } else if (adev->cg_flags & AMD_CG_SUPPORT_HDP_LS) { + hdp_mem_pwr_cntl = REG_SET_FIELD(hdp_mem_pwr_cntl, + HDP_MEM_POWER_CTRL, + ATOMIC_MEM_POWER_LS_EN, 1); + hdp_mem_pwr_cntl = REG_SET_FIELD(hdp_mem_pwr_cntl, + HDP_MEM_POWER_CTRL, + RC_MEM_POWER_LS_EN, 1); + } else if (adev->cg_flags & AMD_CG_SUPPORT_HDP_DS) { + hdp_mem_pwr_cntl = REG_SET_FIELD(hdp_mem_pwr_cntl, + HDP_MEM_POWER_CTRL, + ATOMIC_MEM_POWER_DS_EN, 1); + hdp_mem_pwr_cntl = REG_SET_FIELD(hdp_mem_pwr_cntl, + HDP_MEM_POWER_CTRL, + RC_MEM_POWER_DS_EN, 1); + } + + /* confirmed that IPH_MEM_POWER_CTRL_EN and RC_MEM_POWER_CTRL_EN have to + * be set for SRAM LS/DS/SD */ + if (adev->cg_flags & (AMD_CG_SUPPORT_HDP_LS | AMD_CG_SUPPORT_HDP_DS | + AMD_CG_SUPPORT_HDP_SD)) { + hdp_mem_pwr_cntl = REG_SET_FIELD(hdp_mem_pwr_cntl, HDP_MEM_POWER_CTRL, + ATOMIC_MEM_POWER_CTRL_EN, 1); + hdp_mem_pwr_cntl = REG_SET_FIELD(hdp_mem_pwr_cntl, HDP_MEM_POWER_CTRL, + RC_MEM_POWER_CTRL_EN, 1); + WREG32_SOC15(HDP, 0, regHDP_MEM_POWER_CTRL, hdp_mem_pwr_cntl); + } + } + + /* disable IPH & RC clock override after clock/power mode changing */ + hdp_clk_cntl = REG_SET_FIELD(hdp_clk_cntl, HDP_CLK_CNTL, + RC_MEM_CLK_SOFT_OVERRIDE, 0); + WREG32_SOC15(HDP, 0, regHDP_CLK_CNTL, hdp_clk_cntl); +} + +static void hdp_v7_0_get_clockgating_state(struct amdgpu_device *adev, + u64 *flags) +{ + uint32_t tmp; + + /* AMD_CG_SUPPORT_HDP_LS/DS/SD */ + tmp = RREG32_SOC15(HDP, 0, regHDP_MEM_POWER_CTRL); + if (tmp & HDP_MEM_POWER_CTRL__ATOMIC_MEM_POWER_LS_EN_MASK) + *flags |= AMD_CG_SUPPORT_HDP_LS; + else if (tmp & HDP_MEM_POWER_CTRL__ATOMIC_MEM_POWER_DS_EN_MASK) + *flags |= AMD_CG_SUPPORT_HDP_DS; + else if (tmp & HDP_MEM_POWER_CTRL__ATOMIC_MEM_POWER_SD_EN_MASK) + *flags |= AMD_CG_SUPPORT_HDP_SD; +} + +const struct amdgpu_hdp_funcs hdp_v7_0_funcs = { + .flush_hdp = hdp_v7_0_flush_hdp, + .update_clock_gating = hdp_v7_0_update_clock_gating, + .get_clock_gating_state = hdp_v7_0_get_clockgating_state, +}; diff --git a/drivers/gpu/drm/amd/amdgpu/hdp_v7_0.h b/drivers/gpu/drm/amd/amdgpu/hdp_v7_0.h new file mode 100644 index 000000000000..25b69201402d --- /dev/null +++ b/drivers/gpu/drm/amd/amdgpu/hdp_v7_0.h @@ -0,0 +1,31 @@ +/* + * Copyright 2023 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + */ + +#ifndef __HDP_V7_0_H__ +#define __HDP_V7_0_H__ + +#include "soc15_common.h" + +extern const struct amdgpu_hdp_funcs hdp_v7_0_funcs; + +#endif diff --git a/drivers/gpu/drm/amd/amdgpu/iceland_ih.c b/drivers/gpu/drm/amd/amdgpu/iceland_ih.c index aecad530b10a..2c02ae69883d 100644 --- a/drivers/gpu/drm/amd/amdgpu/iceland_ih.c +++ b/drivers/gpu/drm/amd/amdgpu/iceland_ih.c @@ -215,6 +215,11 @@ static u32 iceland_ih_get_wptr(struct amdgpu_device *adev, tmp = REG_SET_FIELD(tmp, IH_RB_CNTL, WPTR_OVERFLOW_CLEAR, 1); WREG32(mmIH_RB_CNTL, tmp); + /* Unset the CLEAR_OVERFLOW bit immediately so new overflows + * can be detected. + */ + tmp = REG_SET_FIELD(tmp, IH_RB_CNTL, WPTR_OVERFLOW_CLEAR, 0); + WREG32(mmIH_RB_CNTL, tmp); out: return (wptr & ih->ptr_mask); diff --git a/drivers/gpu/drm/amd/amdgpu/ih_v6_0.c b/drivers/gpu/drm/amd/amdgpu/ih_v6_0.c index d9ed7332d805..c757ef99e3c5 100644 --- a/drivers/gpu/drm/amd/amdgpu/ih_v6_0.c +++ b/drivers/gpu/drm/amd/amdgpu/ih_v6_0.c @@ -346,6 +346,21 @@ static int ih_v6_0_irq_init(struct amdgpu_device *adev) DELAY, 3); WREG32_SOC15(OSSSYS, 0, regIH_MSI_STORM_CTRL, tmp); + /* Redirect the interrupts to IH RB1 for dGPU */ + if (adev->irq.ih1.ring_size) { + tmp = RREG32_SOC15(OSSSYS, 0, regIH_RING1_CLIENT_CFG_INDEX); + tmp = REG_SET_FIELD(tmp, IH_RING1_CLIENT_CFG_INDEX, INDEX, 0); + WREG32_SOC15(OSSSYS, 0, regIH_RING1_CLIENT_CFG_INDEX, tmp); + + tmp = RREG32_SOC15(OSSSYS, 0, regIH_RING1_CLIENT_CFG_DATA); + tmp = REG_SET_FIELD(tmp, IH_RING1_CLIENT_CFG_DATA, CLIENT_ID, 0xa); + tmp = REG_SET_FIELD(tmp, IH_RING1_CLIENT_CFG_DATA, SOURCE_ID, 0x0); + tmp = REG_SET_FIELD(tmp, IH_RING1_CLIENT_CFG_DATA, + SOURCE_ID_MATCH_ENABLE, 0x1); + + WREG32_SOC15(OSSSYS, 0, regIH_RING1_CLIENT_CFG_DATA, tmp); + } + pci_set_master(adev->pdev); /* enable interrupts */ @@ -418,6 +433,12 @@ static u32 ih_v6_0_get_wptr(struct amdgpu_device *adev, tmp = RREG32_NO_KIQ(ih_regs->ih_rb_cntl); tmp = REG_SET_FIELD(tmp, IH_RB_CNTL, WPTR_OVERFLOW_CLEAR, 1); WREG32_NO_KIQ(ih_regs->ih_rb_cntl, tmp); + + /* Unset the CLEAR_OVERFLOW bit immediately so new overflows + * can be detected. + */ + tmp = REG_SET_FIELD(tmp, IH_RB_CNTL, WPTR_OVERFLOW_CLEAR, 0); + WREG32_NO_KIQ(ih_regs->ih_rb_cntl, tmp); out: return (wptr & ih->ptr_mask); } @@ -543,8 +564,15 @@ static int ih_v6_0_sw_init(void *handle) adev->irq.ih.use_doorbell = true; adev->irq.ih.doorbell_index = adev->doorbell_index.ih << 1; - adev->irq.ih1.ring_size = 0; - adev->irq.ih2.ring_size = 0; + if (!(adev->flags & AMD_IS_APU)) { + r = amdgpu_ih_ring_init(adev, &adev->irq.ih1, IH_RING_SIZE, + use_bus_addr); + if (r) + return r; + + adev->irq.ih1.use_doorbell = true; + adev->irq.ih1.doorbell_index = (adev->doorbell_index.ih + 1) << 1; + } /* initialize ih control register offset */ ih_v6_0_init_register_offset(adev); diff --git a/drivers/gpu/drm/amd/amdgpu/ih_v6_1.c b/drivers/gpu/drm/amd/amdgpu/ih_v6_1.c index 8fb05eae340a..29ed78798070 100644 --- a/drivers/gpu/drm/amd/amdgpu/ih_v6_1.c +++ b/drivers/gpu/drm/amd/amdgpu/ih_v6_1.c @@ -346,6 +346,21 @@ static int ih_v6_1_irq_init(struct amdgpu_device *adev) DELAY, 3); WREG32_SOC15(OSSSYS, 0, regIH_MSI_STORM_CTRL, tmp); + /* Redirect the interrupts to IH RB1 for dGPU */ + if (adev->irq.ih1.ring_size) { + tmp = RREG32_SOC15(OSSSYS, 0, regIH_RING1_CLIENT_CFG_INDEX); + tmp = REG_SET_FIELD(tmp, IH_RING1_CLIENT_CFG_INDEX, INDEX, 0); + WREG32_SOC15(OSSSYS, 0, regIH_RING1_CLIENT_CFG_INDEX, tmp); + + tmp = RREG32_SOC15(OSSSYS, 0, regIH_RING1_CLIENT_CFG_DATA); + tmp = REG_SET_FIELD(tmp, IH_RING1_CLIENT_CFG_DATA, CLIENT_ID, 0xa); + tmp = REG_SET_FIELD(tmp, IH_RING1_CLIENT_CFG_DATA, SOURCE_ID, 0x0); + tmp = REG_SET_FIELD(tmp, IH_RING1_CLIENT_CFG_DATA, + SOURCE_ID_MATCH_ENABLE, 0x1); + + WREG32_SOC15(OSSSYS, 0, regIH_RING1_CLIENT_CFG_DATA, tmp); + } + pci_set_master(adev->pdev); /* enable interrupts */ @@ -418,6 +433,13 @@ static u32 ih_v6_1_get_wptr(struct amdgpu_device *adev, tmp = RREG32_NO_KIQ(ih_regs->ih_rb_cntl); tmp = REG_SET_FIELD(tmp, IH_RB_CNTL, WPTR_OVERFLOW_CLEAR, 1); WREG32_NO_KIQ(ih_regs->ih_rb_cntl, tmp); + + /* Unset the CLEAR_OVERFLOW bit immediately so new overflows + * can be detected. + */ + tmp = REG_SET_FIELD(tmp, IH_RB_CNTL, WPTR_OVERFLOW_CLEAR, 0); + WREG32_NO_KIQ(ih_regs->ih_rb_cntl, tmp); + out: return (wptr & ih->ptr_mask); } @@ -543,8 +565,15 @@ static int ih_v6_1_sw_init(void *handle) adev->irq.ih.use_doorbell = true; adev->irq.ih.doorbell_index = adev->doorbell_index.ih << 1; - adev->irq.ih1.ring_size = 0; - adev->irq.ih2.ring_size = 0; + if (!(adev->flags & AMD_IS_APU)) { + r = amdgpu_ih_ring_init(adev, &adev->irq.ih1, IH_RING_SIZE, + use_bus_addr); + if (r) + return r; + + adev->irq.ih1.use_doorbell = true; + adev->irq.ih1.doorbell_index = (adev->doorbell_index.ih + 1) << 1; + } /* initialize ih control register offset */ ih_v6_1_init_register_offset(adev); diff --git a/drivers/gpu/drm/amd/amdgpu/ih_v7_0.c b/drivers/gpu/drm/amd/amdgpu/ih_v7_0.c new file mode 100644 index 000000000000..7aed96fa10a9 --- /dev/null +++ b/drivers/gpu/drm/amd/amdgpu/ih_v7_0.c @@ -0,0 +1,773 @@ +/* + * Copyright 2023 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + */ + +#include <linux/pci.h> + +#include "amdgpu.h" +#include "amdgpu_ih.h" + +#include "oss/osssys_7_0_0_offset.h" +#include "oss/osssys_7_0_0_sh_mask.h" + +#include "soc15_common.h" +#include "ih_v7_0.h" + +#define MAX_REARM_RETRY 10 + +static void ih_v7_0_set_interrupt_funcs(struct amdgpu_device *adev); + +/** + * ih_v7_0_init_register_offset - Initialize register offset for ih rings + * + * @adev: amdgpu_device pointer + * + * Initialize register offset ih rings (IH_V7_0). + */ +static void ih_v7_0_init_register_offset(struct amdgpu_device *adev) +{ + struct amdgpu_ih_regs *ih_regs; + + /* ih ring 2 is removed + * ih ring and ih ring 1 are available */ + if (adev->irq.ih.ring_size) { + ih_regs = &adev->irq.ih.ih_regs; + ih_regs->ih_rb_base = SOC15_REG_OFFSET(OSSSYS, 0, regIH_RB_BASE); + ih_regs->ih_rb_base_hi = SOC15_REG_OFFSET(OSSSYS, 0, regIH_RB_BASE_HI); + ih_regs->ih_rb_cntl = SOC15_REG_OFFSET(OSSSYS, 0, regIH_RB_CNTL); + ih_regs->ih_rb_wptr = SOC15_REG_OFFSET(OSSSYS, 0, regIH_RB_WPTR); + ih_regs->ih_rb_rptr = SOC15_REG_OFFSET(OSSSYS, 0, regIH_RB_RPTR); + ih_regs->ih_doorbell_rptr = SOC15_REG_OFFSET(OSSSYS, 0, regIH_DOORBELL_RPTR); + ih_regs->ih_rb_wptr_addr_lo = SOC15_REG_OFFSET(OSSSYS, 0, regIH_RB_WPTR_ADDR_LO); + ih_regs->ih_rb_wptr_addr_hi = SOC15_REG_OFFSET(OSSSYS, 0, regIH_RB_WPTR_ADDR_HI); + ih_regs->psp_reg_id = PSP_REG_IH_RB_CNTL; + } + + if (adev->irq.ih1.ring_size) { + ih_regs = &adev->irq.ih1.ih_regs; + ih_regs->ih_rb_base = SOC15_REG_OFFSET(OSSSYS, 0, regIH_RB_BASE_RING1); + ih_regs->ih_rb_base_hi = SOC15_REG_OFFSET(OSSSYS, 0, regIH_RB_BASE_HI_RING1); + ih_regs->ih_rb_cntl = SOC15_REG_OFFSET(OSSSYS, 0, regIH_RB_CNTL_RING1); + ih_regs->ih_rb_wptr = SOC15_REG_OFFSET(OSSSYS, 0, regIH_RB_WPTR_RING1); + ih_regs->ih_rb_rptr = SOC15_REG_OFFSET(OSSSYS, 0, regIH_RB_RPTR_RING1); + ih_regs->ih_doorbell_rptr = SOC15_REG_OFFSET(OSSSYS, 0, regIH_DOORBELL_RPTR_RING1); + ih_regs->psp_reg_id = PSP_REG_IH_RB_CNTL_RING1; + } +} + +/** + * force_update_wptr_for_self_int - Force update the wptr for self interrupt + * + * @adev: amdgpu_device pointer + * @threshold: threshold to trigger the wptr reporting + * @timeout: timeout to trigger the wptr reporting + * @enabled: Enable/disable timeout flush mechanism + * + * threshold input range: 0 ~ 15, default 0, + * real_threshold = 2^threshold + * timeout input range: 0 ~ 20, default 8, + * real_timeout = (2^timeout) * 1024 / (socclk_freq) + * + * Force update wptr for self interrupt ( >= SIENNA_CICHLID). + */ +static void +force_update_wptr_for_self_int(struct amdgpu_device *adev, + u32 threshold, u32 timeout, bool enabled) +{ + u32 ih_cntl, ih_rb_cntl; + + ih_cntl = RREG32_SOC15(OSSSYS, 0, regIH_CNTL2); + ih_rb_cntl = RREG32_SOC15(OSSSYS, 0, regIH_RB_CNTL_RING1); + + ih_cntl = REG_SET_FIELD(ih_cntl, IH_CNTL2, + SELF_IV_FORCE_WPTR_UPDATE_TIMEOUT, timeout); + ih_cntl = REG_SET_FIELD(ih_cntl, IH_CNTL2, + SELF_IV_FORCE_WPTR_UPDATE_ENABLE, enabled); + ih_rb_cntl = REG_SET_FIELD(ih_rb_cntl, IH_RB_CNTL_RING1, + RB_USED_INT_THRESHOLD, threshold); + + if (amdgpu_sriov_vf(adev) && amdgpu_sriov_reg_indirect_ih(adev)) { + if (psp_reg_program(&adev->psp, PSP_REG_IH_RB_CNTL_RING1, ih_rb_cntl)) + return; + } else { + WREG32_SOC15(OSSSYS, 0, regIH_RB_CNTL_RING1, ih_rb_cntl); + } + + WREG32_SOC15(OSSSYS, 0, regIH_CNTL2, ih_cntl); +} + +/** + * ih_v7_0_toggle_ring_interrupts - toggle the interrupt ring buffer + * + * @adev: amdgpu_device pointer + * @ih: amdgpu_ih_ring pointet + * @enable: true - enable the interrupts, false - disable the interrupts + * + * Toggle the interrupt ring buffer (IH_V7_0) + */ +static int ih_v7_0_toggle_ring_interrupts(struct amdgpu_device *adev, + struct amdgpu_ih_ring *ih, + bool enable) +{ + struct amdgpu_ih_regs *ih_regs; + uint32_t tmp; + + ih_regs = &ih->ih_regs; + + tmp = RREG32(ih_regs->ih_rb_cntl); + tmp = REG_SET_FIELD(tmp, IH_RB_CNTL, RB_ENABLE, (enable ? 1 : 0)); + /* enable_intr field is only valid in ring0 */ + if (ih == &adev->irq.ih) + tmp = REG_SET_FIELD(tmp, IH_RB_CNTL, ENABLE_INTR, (enable ? 1 : 0)); + + if (amdgpu_sriov_vf(adev) && amdgpu_sriov_reg_indirect_ih(adev)) { + if (psp_reg_program(&adev->psp, ih_regs->psp_reg_id, tmp)) + return -ETIMEDOUT; + } else { + WREG32(ih_regs->ih_rb_cntl, tmp); + } + + if (enable) { + ih->enabled = true; + } else { + /* set rptr, wptr to 0 */ + WREG32(ih_regs->ih_rb_rptr, 0); + WREG32(ih_regs->ih_rb_wptr, 0); + ih->enabled = false; + ih->rptr = 0; + } + + return 0; +} + +/** + * ih_v7_0_toggle_interrupts - Toggle all the available interrupt ring buffers + * + * @adev: amdgpu_device pointer + * @enable: enable or disable interrupt ring buffers + * + * Toggle all the available interrupt ring buffers (IH_V7_0). + */ +static int ih_v7_0_toggle_interrupts(struct amdgpu_device *adev, bool enable) +{ + struct amdgpu_ih_ring *ih[] = {&adev->irq.ih, &adev->irq.ih1}; + int i; + int r; + + for (i = 0; i < ARRAY_SIZE(ih); i++) { + if (ih[i]->ring_size) { + r = ih_v7_0_toggle_ring_interrupts(adev, ih[i], enable); + if (r) + return r; + } + } + + return 0; +} + +static uint32_t ih_v7_0_rb_cntl(struct amdgpu_ih_ring *ih, uint32_t ih_rb_cntl) +{ + int rb_bufsz = order_base_2(ih->ring_size / 4); + + ih_rb_cntl = REG_SET_FIELD(ih_rb_cntl, IH_RB_CNTL, + MC_SPACE, ih->use_bus_addr ? 2 : 4); + ih_rb_cntl = REG_SET_FIELD(ih_rb_cntl, IH_RB_CNTL, + WPTR_OVERFLOW_CLEAR, 1); + ih_rb_cntl = REG_SET_FIELD(ih_rb_cntl, IH_RB_CNTL, + WPTR_OVERFLOW_ENABLE, 1); + ih_rb_cntl = REG_SET_FIELD(ih_rb_cntl, IH_RB_CNTL, RB_SIZE, rb_bufsz); + /* Ring Buffer write pointer writeback. If enabled, IH_RB_WPTR register + * value is written to memory + */ + ih_rb_cntl = REG_SET_FIELD(ih_rb_cntl, IH_RB_CNTL, + WPTR_WRITEBACK_ENABLE, 1); + ih_rb_cntl = REG_SET_FIELD(ih_rb_cntl, IH_RB_CNTL, MC_SNOOP, 1); + ih_rb_cntl = REG_SET_FIELD(ih_rb_cntl, IH_RB_CNTL, MC_RO, 0); + ih_rb_cntl = REG_SET_FIELD(ih_rb_cntl, IH_RB_CNTL, MC_VMID, 0); + + return ih_rb_cntl; +} + +static uint32_t ih_v7_0_doorbell_rptr(struct amdgpu_ih_ring *ih) +{ + u32 ih_doorbell_rtpr = 0; + + if (ih->use_doorbell) { + ih_doorbell_rtpr = REG_SET_FIELD(ih_doorbell_rtpr, + IH_DOORBELL_RPTR, OFFSET, + ih->doorbell_index); + ih_doorbell_rtpr = REG_SET_FIELD(ih_doorbell_rtpr, + IH_DOORBELL_RPTR, + ENABLE, 1); + } else { + ih_doorbell_rtpr = REG_SET_FIELD(ih_doorbell_rtpr, + IH_DOORBELL_RPTR, + ENABLE, 0); + } + return ih_doorbell_rtpr; +} + +/** + * ih_v7_0_enable_ring - enable an ih ring buffer + * + * @adev: amdgpu_device pointer + * @ih: amdgpu_ih_ring pointer + * + * Enable an ih ring buffer (IH_V7_0) + */ +static int ih_v7_0_enable_ring(struct amdgpu_device *adev, + struct amdgpu_ih_ring *ih) +{ + struct amdgpu_ih_regs *ih_regs; + uint32_t tmp; + + ih_regs = &ih->ih_regs; + + /* Ring Buffer base. [39:8] of 40-bit address of the beginning of the ring buffer*/ + WREG32(ih_regs->ih_rb_base, ih->gpu_addr >> 8); + WREG32(ih_regs->ih_rb_base_hi, (ih->gpu_addr >> 40) & 0xff); + + tmp = RREG32(ih_regs->ih_rb_cntl); + tmp = ih_v7_0_rb_cntl(ih, tmp); + if (ih == &adev->irq.ih) + tmp = REG_SET_FIELD(tmp, IH_RB_CNTL, RPTR_REARM, !!adev->irq.msi_enabled); + if (ih == &adev->irq.ih1) { + tmp = REG_SET_FIELD(tmp, IH_RB_CNTL, WPTR_OVERFLOW_ENABLE, 0); + tmp = REG_SET_FIELD(tmp, IH_RB_CNTL, RB_FULL_DRAIN_ENABLE, 1); + } + + if (amdgpu_sriov_vf(adev) && amdgpu_sriov_reg_indirect_ih(adev)) { + if (psp_reg_program(&adev->psp, ih_regs->psp_reg_id, tmp)) { + DRM_ERROR("PSP program IH_RB_CNTL failed!\n"); + return -ETIMEDOUT; + } + } else { + WREG32(ih_regs->ih_rb_cntl, tmp); + } + + if (ih == &adev->irq.ih) { + /* set the ih ring 0 writeback address whether it's enabled or not */ + WREG32(ih_regs->ih_rb_wptr_addr_lo, lower_32_bits(ih->wptr_addr)); + WREG32(ih_regs->ih_rb_wptr_addr_hi, upper_32_bits(ih->wptr_addr) & 0xFFFF); + } + + /* set rptr, wptr to 0 */ + WREG32(ih_regs->ih_rb_wptr, 0); + WREG32(ih_regs->ih_rb_rptr, 0); + + WREG32(ih_regs->ih_doorbell_rptr, ih_v7_0_doorbell_rptr(ih)); + + return 0; +} + +/** + * ih_v7_0_irq_init - init and enable the interrupt ring + * + * @adev: amdgpu_device pointer + * + * Allocate a ring buffer for the interrupt controller, + * enable the RLC, disable interrupts, enable the IH + * ring buffer and enable it. + * Called at device load and reume. + * Returns 0 for success, errors for failure. + */ +static int ih_v7_0_irq_init(struct amdgpu_device *adev) +{ + struct amdgpu_ih_ring *ih[] = {&adev->irq.ih, &adev->irq.ih1}; + u32 ih_chicken; + u32 tmp; + int ret; + int i; + + /* disable irqs */ + ret = ih_v7_0_toggle_interrupts(adev, false); + if (ret) + return ret; + + adev->nbio.funcs->ih_control(adev); + + if (unlikely((adev->firmware.load_type == AMDGPU_FW_LOAD_DIRECT) || + (adev->firmware.load_type == AMDGPU_FW_LOAD_RLC_BACKDOOR_AUTO))) { + if (ih[0]->use_bus_addr) { + ih_chicken = RREG32_SOC15(OSSSYS, 0, regIH_CHICKEN); + ih_chicken = REG_SET_FIELD(ih_chicken, + IH_CHICKEN, MC_SPACE_GPA_ENABLE, 1); + WREG32_SOC15(OSSSYS, 0, regIH_CHICKEN, ih_chicken); + } + } + + for (i = 0; i < ARRAY_SIZE(ih); i++) { + if (ih[i]->ring_size) { + ret = ih_v7_0_enable_ring(adev, ih[i]); + if (ret) + return ret; + } + } + + /* update doorbell range for ih ring 0 */ + adev->nbio.funcs->ih_doorbell_range(adev, ih[0]->use_doorbell, + ih[0]->doorbell_index); + + tmp = RREG32_SOC15(OSSSYS, 0, regIH_STORM_CLIENT_LIST_CNTL); + tmp = REG_SET_FIELD(tmp, IH_STORM_CLIENT_LIST_CNTL, + CLIENT18_IS_STORM_CLIENT, 1); + WREG32_SOC15(OSSSYS, 0, regIH_STORM_CLIENT_LIST_CNTL, tmp); + + tmp = RREG32_SOC15(OSSSYS, 0, regIH_INT_FLOOD_CNTL); + tmp = REG_SET_FIELD(tmp, IH_INT_FLOOD_CNTL, FLOOD_CNTL_ENABLE, 1); + WREG32_SOC15(OSSSYS, 0, regIH_INT_FLOOD_CNTL, tmp); + + /* GC/MMHUB UTCL2 page fault interrupts are configured as + * MSI storm capable interrupts by deafult. The delay is + * used to avoid ISR being called too frequently + * when page fault happens on several continuous page + * and thus avoid MSI storm */ + tmp = RREG32_SOC15(OSSSYS, 0, regIH_MSI_STORM_CTRL); + tmp = REG_SET_FIELD(tmp, IH_MSI_STORM_CTRL, + DELAY, 3); + WREG32_SOC15(OSSSYS, 0, regIH_MSI_STORM_CTRL, tmp); + + pci_set_master(adev->pdev); + + /* enable interrupts */ + ret = ih_v7_0_toggle_interrupts(adev, true); + if (ret) + return ret; + /* enable wptr force update for self int */ + force_update_wptr_for_self_int(adev, 0, 8, true); + + if (adev->irq.ih_soft.ring_size) + adev->irq.ih_soft.enabled = true; + + return 0; +} + +/** + * ih_v7_0_irq_disable - disable interrupts + * + * @adev: amdgpu_device pointer + * + * Disable interrupts on the hw. + */ +static void ih_v7_0_irq_disable(struct amdgpu_device *adev) +{ + force_update_wptr_for_self_int(adev, 0, 8, false); + ih_v7_0_toggle_interrupts(adev, false); + + /* Wait and acknowledge irq */ + mdelay(1); +} + +/** + * ih_v7_0_get_wptr() - get the IH ring buffer wptr + * + * @adev: amdgpu_device pointer + * @ih: IH ring buffer to fetch wptr + * + * Get the IH ring buffer wptr from either the register + * or the writeback memory buffer. Also check for + * ring buffer overflow and deal with it. + * Returns the value of the wptr. + */ +static u32 ih_v7_0_get_wptr(struct amdgpu_device *adev, + struct amdgpu_ih_ring *ih) +{ + u32 wptr, tmp; + struct amdgpu_ih_regs *ih_regs; + + wptr = le32_to_cpu(*ih->wptr_cpu); + ih_regs = &ih->ih_regs; + + if (!REG_GET_FIELD(wptr, IH_RB_WPTR, RB_OVERFLOW)) + goto out; + + wptr = RREG32_NO_KIQ(ih_regs->ih_rb_wptr); + if (!REG_GET_FIELD(wptr, IH_RB_WPTR, RB_OVERFLOW)) + goto out; + wptr = REG_SET_FIELD(wptr, IH_RB_WPTR, RB_OVERFLOW, 0); + + /* When a ring buffer overflow happen start parsing interrupt + * from the last not overwritten vector (wptr + 32). Hopefully + * this should allow us to catch up. + */ + tmp = (wptr + 32) & ih->ptr_mask; + dev_warn(adev->dev, "IH ring buffer overflow " + "(0x%08X, 0x%08X, 0x%08X)\n", + wptr, ih->rptr, tmp); + ih->rptr = tmp; + + tmp = RREG32_NO_KIQ(ih_regs->ih_rb_cntl); + tmp = REG_SET_FIELD(tmp, IH_RB_CNTL, WPTR_OVERFLOW_CLEAR, 1); + WREG32_NO_KIQ(ih_regs->ih_rb_cntl, tmp); + + /* Unset the CLEAR_OVERFLOW bit immediately so new overflows + * can be detected. + */ + tmp = REG_SET_FIELD(tmp, IH_RB_CNTL, WPTR_OVERFLOW_CLEAR, 0); + WREG32_NO_KIQ(ih_regs->ih_rb_cntl, tmp); +out: + return (wptr & ih->ptr_mask); +} + +/** + * ih_v7_0_irq_rearm - rearm IRQ if lost + * + * @adev: amdgpu_device pointer + * @ih: IH ring to match + * + */ +static void ih_v7_0_irq_rearm(struct amdgpu_device *adev, + struct amdgpu_ih_ring *ih) +{ + uint32_t v = 0; + uint32_t i = 0; + struct amdgpu_ih_regs *ih_regs; + + ih_regs = &ih->ih_regs; + + /* Rearm IRQ / re-write doorbell if doorbell write is lost */ + for (i = 0; i < MAX_REARM_RETRY; i++) { + v = RREG32_NO_KIQ(ih_regs->ih_rb_rptr); + if ((v < ih->ring_size) && (v != ih->rptr)) + WDOORBELL32(ih->doorbell_index, ih->rptr); + else + break; + } +} + +/** + * ih_v7_0_set_rptr - set the IH ring buffer rptr + * + * @adev: amdgpu_device pointer + * @ih: IH ring buffer to set rptr + */ +static void ih_v7_0_set_rptr(struct amdgpu_device *adev, + struct amdgpu_ih_ring *ih) +{ + struct amdgpu_ih_regs *ih_regs; + + if (ih->use_doorbell) { + /* XXX check if swapping is necessary on BE */ + *ih->rptr_cpu = ih->rptr; + WDOORBELL32(ih->doorbell_index, ih->rptr); + + if (amdgpu_sriov_vf(adev)) + ih_v7_0_irq_rearm(adev, ih); + } else { + ih_regs = &ih->ih_regs; + WREG32(ih_regs->ih_rb_rptr, ih->rptr); + } +} + +/** + * ih_v7_0_self_irq - dispatch work for ring 1 + * + * @adev: amdgpu_device pointer + * @source: irq source + * @entry: IV with WPTR update + * + * Update the WPTR from the IV and schedule work to handle the entries. + */ +static int ih_v7_0_self_irq(struct amdgpu_device *adev, + struct amdgpu_irq_src *source, + struct amdgpu_iv_entry *entry) +{ + uint32_t wptr = cpu_to_le32(entry->src_data[0]); + + switch (entry->ring_id) { + case 1: + *adev->irq.ih1.wptr_cpu = wptr; + schedule_work(&adev->irq.ih1_work); + break; + default: break; + } + return 0; +} + +static const struct amdgpu_irq_src_funcs ih_v7_0_self_irq_funcs = { + .process = ih_v7_0_self_irq, +}; + +static void ih_v7_0_set_self_irq_funcs(struct amdgpu_device *adev) +{ + adev->irq.self_irq.num_types = 0; + adev->irq.self_irq.funcs = &ih_v7_0_self_irq_funcs; +} + +static int ih_v7_0_early_init(void *handle) +{ + struct amdgpu_device *adev = (struct amdgpu_device *)handle; + + ih_v7_0_set_interrupt_funcs(adev); + ih_v7_0_set_self_irq_funcs(adev); + return 0; +} + +static int ih_v7_0_sw_init(void *handle) +{ + int r; + struct amdgpu_device *adev = (struct amdgpu_device *)handle; + bool use_bus_addr; + + r = amdgpu_irq_add_id(adev, SOC21_IH_CLIENTID_IH, 0, + &adev->irq.self_irq); + + if (r) + return r; + + /* use gpu virtual address for ih ring + * until ih_checken is programmed to allow + * use bus address for ih ring by psp bl */ + use_bus_addr = + (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) ? false : true; + r = amdgpu_ih_ring_init(adev, &adev->irq.ih, 256 * 1024, use_bus_addr); + if (r) + return r; + + adev->irq.ih.use_doorbell = true; + adev->irq.ih.doorbell_index = adev->doorbell_index.ih << 1; + + adev->irq.ih1.ring_size = 0; + adev->irq.ih2.ring_size = 0; + + /* initialize ih control register offset */ + ih_v7_0_init_register_offset(adev); + + r = amdgpu_ih_ring_init(adev, &adev->irq.ih_soft, PAGE_SIZE, true); + if (r) + return r; + + r = amdgpu_irq_init(adev); + + return r; +} + +static int ih_v7_0_sw_fini(void *handle) +{ + struct amdgpu_device *adev = (struct amdgpu_device *)handle; + + amdgpu_irq_fini_sw(adev); + + return 0; +} + +static int ih_v7_0_hw_init(void *handle) +{ + int r; + struct amdgpu_device *adev = (struct amdgpu_device *)handle; + + r = ih_v7_0_irq_init(adev); + if (r) + return r; + + return 0; +} + +static int ih_v7_0_hw_fini(void *handle) +{ + struct amdgpu_device *adev = (struct amdgpu_device *)handle; + + ih_v7_0_irq_disable(adev); + + return 0; +} + +static int ih_v7_0_suspend(void *handle) +{ + struct amdgpu_device *adev = (struct amdgpu_device *)handle; + + return ih_v7_0_hw_fini(adev); +} + +static int ih_v7_0_resume(void *handle) +{ + struct amdgpu_device *adev = (struct amdgpu_device *)handle; + + return ih_v7_0_hw_init(adev); +} + +static bool ih_v7_0_is_idle(void *handle) +{ + /* todo */ + return true; +} + +static int ih_v7_0_wait_for_idle(void *handle) +{ + /* todo */ + return -ETIMEDOUT; +} + +static int ih_v7_0_soft_reset(void *handle) +{ + /* todo */ + return 0; +} + +static void ih_v7_0_update_clockgating_state(struct amdgpu_device *adev, + bool enable) +{ + uint32_t data, def, field_val; + + if (adev->cg_flags & AMD_CG_SUPPORT_IH_CG) { + def = data = RREG32_SOC15(OSSSYS, 0, regIH_CLK_CTRL); + field_val = enable ? 0 : 1; + data = REG_SET_FIELD(data, IH_CLK_CTRL, + DBUS_MUX_CLK_SOFT_OVERRIDE, field_val); + data = REG_SET_FIELD(data, IH_CLK_CTRL, + OSSSYS_SHARE_CLK_SOFT_OVERRIDE, field_val); + data = REG_SET_FIELD(data, IH_CLK_CTRL, + LIMIT_SMN_CLK_SOFT_OVERRIDE, field_val); + data = REG_SET_FIELD(data, IH_CLK_CTRL, + DYN_CLK_SOFT_OVERRIDE, field_val); + data = REG_SET_FIELD(data, IH_CLK_CTRL, + REG_CLK_SOFT_OVERRIDE, field_val); + if (def != data) + WREG32_SOC15(OSSSYS, 0, regIH_CLK_CTRL, data); + } + + return; +} + +static int ih_v7_0_set_clockgating_state(void *handle, + enum amd_clockgating_state state) +{ + struct amdgpu_device *adev = (struct amdgpu_device *)handle; + + ih_v7_0_update_clockgating_state(adev, + state == AMD_CG_STATE_GATE); + return 0; +} + +static void ih_v7_0_update_ih_mem_power_gating(struct amdgpu_device *adev, + bool enable) +{ + uint32_t ih_mem_pwr_cntl; + + /* Disable ih sram power cntl before switch powergating mode */ + ih_mem_pwr_cntl = RREG32_SOC15(OSSSYS, 0, regIH_MEM_POWER_CTRL); + ih_mem_pwr_cntl = REG_SET_FIELD(ih_mem_pwr_cntl, IH_MEM_POWER_CTRL, + IH_BUFFER_MEM_POWER_CTRL_EN, 0); + WREG32_SOC15(OSSSYS, 0, regIH_MEM_POWER_CTRL, ih_mem_pwr_cntl); + + /* It is recommended to set mem powergating mode to DS mode */ + if (enable) { + /* mem power mode */ + ih_mem_pwr_cntl = REG_SET_FIELD(ih_mem_pwr_cntl, IH_MEM_POWER_CTRL, + IH_BUFFER_MEM_POWER_LS_EN, 0); + ih_mem_pwr_cntl = REG_SET_FIELD(ih_mem_pwr_cntl, IH_MEM_POWER_CTRL, + IH_BUFFER_MEM_POWER_DS_EN, 1); + ih_mem_pwr_cntl = REG_SET_FIELD(ih_mem_pwr_cntl, IH_MEM_POWER_CTRL, + IH_BUFFER_MEM_POWER_SD_EN, 0); + /* cam mem power mode */ + ih_mem_pwr_cntl = REG_SET_FIELD(ih_mem_pwr_cntl, IH_MEM_POWER_CTRL, + IH_RETRY_INT_CAM_MEM_POWER_LS_EN, 0); + ih_mem_pwr_cntl = REG_SET_FIELD(ih_mem_pwr_cntl, IH_MEM_POWER_CTRL, + IH_RETRY_INT_CAM_MEM_POWER_DS_EN, 1); + ih_mem_pwr_cntl = REG_SET_FIELD(ih_mem_pwr_cntl, IH_MEM_POWER_CTRL, + IH_RETRY_INT_CAM_MEM_POWER_SD_EN, 0); + /* re-enable power cntl */ + ih_mem_pwr_cntl = REG_SET_FIELD(ih_mem_pwr_cntl, IH_MEM_POWER_CTRL, + IH_BUFFER_MEM_POWER_CTRL_EN, 1); + } else { + /* mem power mode */ + ih_mem_pwr_cntl = REG_SET_FIELD(ih_mem_pwr_cntl, IH_MEM_POWER_CTRL, + IH_BUFFER_MEM_POWER_LS_EN, 0); + ih_mem_pwr_cntl = REG_SET_FIELD(ih_mem_pwr_cntl, IH_MEM_POWER_CTRL, + IH_BUFFER_MEM_POWER_DS_EN, 0); + ih_mem_pwr_cntl = REG_SET_FIELD(ih_mem_pwr_cntl, IH_MEM_POWER_CTRL, + IH_BUFFER_MEM_POWER_SD_EN, 0); + /* cam mem power mode */ + ih_mem_pwr_cntl = REG_SET_FIELD(ih_mem_pwr_cntl, IH_MEM_POWER_CTRL, + IH_RETRY_INT_CAM_MEM_POWER_LS_EN, 0); + ih_mem_pwr_cntl = REG_SET_FIELD(ih_mem_pwr_cntl, IH_MEM_POWER_CTRL, + IH_RETRY_INT_CAM_MEM_POWER_DS_EN, 0); + ih_mem_pwr_cntl = REG_SET_FIELD(ih_mem_pwr_cntl, IH_MEM_POWER_CTRL, + IH_RETRY_INT_CAM_MEM_POWER_SD_EN, 0); + /* re-enable power cntl*/ + ih_mem_pwr_cntl = REG_SET_FIELD(ih_mem_pwr_cntl, IH_MEM_POWER_CTRL, + IH_BUFFER_MEM_POWER_CTRL_EN, 1); + } + + WREG32_SOC15(OSSSYS, 0, regIH_MEM_POWER_CTRL, ih_mem_pwr_cntl); +} + +static int ih_v7_0_set_powergating_state(void *handle, + enum amd_powergating_state state) +{ + struct amdgpu_device *adev = (struct amdgpu_device *)handle; + bool enable = (state == AMD_PG_STATE_GATE); + + if (adev->pg_flags & AMD_PG_SUPPORT_IH_SRAM_PG) + ih_v7_0_update_ih_mem_power_gating(adev, enable); + + return 0; +} + +static void ih_v7_0_get_clockgating_state(void *handle, u64 *flags) +{ + struct amdgpu_device *adev = (struct amdgpu_device *)handle; + + if (!RREG32_SOC15(OSSSYS, 0, regIH_CLK_CTRL)) + *flags |= AMD_CG_SUPPORT_IH_CG; + + return; +} + +static const struct amd_ip_funcs ih_v7_0_ip_funcs = { + .name = "ih_v7_0", + .early_init = ih_v7_0_early_init, + .late_init = NULL, + .sw_init = ih_v7_0_sw_init, + .sw_fini = ih_v7_0_sw_fini, + .hw_init = ih_v7_0_hw_init, + .hw_fini = ih_v7_0_hw_fini, + .suspend = ih_v7_0_suspend, + .resume = ih_v7_0_resume, + .is_idle = ih_v7_0_is_idle, + .wait_for_idle = ih_v7_0_wait_for_idle, + .soft_reset = ih_v7_0_soft_reset, + .set_clockgating_state = ih_v7_0_set_clockgating_state, + .set_powergating_state = ih_v7_0_set_powergating_state, + .get_clockgating_state = ih_v7_0_get_clockgating_state, +}; + +static const struct amdgpu_ih_funcs ih_v7_0_funcs = { + .get_wptr = ih_v7_0_get_wptr, + .decode_iv = amdgpu_ih_decode_iv_helper, + .decode_iv_ts = amdgpu_ih_decode_iv_ts_helper, + .set_rptr = ih_v7_0_set_rptr +}; + +static void ih_v7_0_set_interrupt_funcs(struct amdgpu_device *adev) +{ + adev->irq.ih_funcs = &ih_v7_0_funcs; +} + +const struct amdgpu_ip_block_version ih_v7_0_ip_block = +{ + .type = AMD_IP_BLOCK_TYPE_IH, + .major = 7, + .minor = 0, + .rev = 0, + .funcs = &ih_v7_0_ip_funcs, +}; diff --git a/drivers/gpu/drm/amd/amdgpu/ih_v7_0.h b/drivers/gpu/drm/amd/amdgpu/ih_v7_0.h new file mode 100644 index 000000000000..af9dcbc451fd --- /dev/null +++ b/drivers/gpu/drm/amd/amdgpu/ih_v7_0.h @@ -0,0 +1,28 @@ +/* + * Copyright 2023 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + */ +#ifndef __IH_V7_0_IH_H__ +#define __IH_V7_0_IH_H__ + +extern const struct amdgpu_ip_block_version ih_v7_0_ip_block; + +#endif diff --git a/drivers/gpu/drm/amd/amdgpu/imu_v11_0.c b/drivers/gpu/drm/amd/amdgpu/imu_v11_0.c index 875fb5ac70b5..3e91a8e42c21 100644 --- a/drivers/gpu/drm/amd/amdgpu/imu_v11_0.c +++ b/drivers/gpu/drm/amd/amdgpu/imu_v11_0.c @@ -37,6 +37,7 @@ MODULE_FIRMWARE("amdgpu/gc_11_0_2_imu.bin"); MODULE_FIRMWARE("amdgpu/gc_11_0_3_imu.bin"); MODULE_FIRMWARE("amdgpu/gc_11_0_4_imu.bin"); MODULE_FIRMWARE("amdgpu/gc_11_5_0_imu.bin"); +MODULE_FIRMWARE("amdgpu/gc_11_5_1_imu.bin"); static int imu_v11_0_init_microcode(struct amdgpu_device *adev) { @@ -55,7 +56,6 @@ static int imu_v11_0_init_microcode(struct amdgpu_device *adev) if (err) goto out; imu_hdr = (const struct imu_firmware_header_v1_0 *)adev->gfx.imu_fw->data; - adev->gfx.imu_fw_version = le32_to_cpu(imu_hdr->header.ucode_version); //adev->gfx.imu_feature_version = le32_to_cpu(imu_hdr->ucode_feature_version); if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) { @@ -69,7 +69,8 @@ static int imu_v11_0_init_microcode(struct amdgpu_device *adev) info->fw = adev->gfx.imu_fw; adev->firmware.fw_size += ALIGN(le32_to_cpu(imu_hdr->imu_dram_ucode_size_bytes), PAGE_SIZE); - } + } else + adev->gfx.imu_fw_version = le32_to_cpu(imu_hdr->header.ucode_version); out: if (err) { diff --git a/drivers/gpu/drm/amd/amdgpu/jpeg_v2_5.c b/drivers/gpu/drm/amd/amdgpu/jpeg_v2_5.c index e67a337457ed..99cd49ee8ef6 100644 --- a/drivers/gpu/drm/amd/amdgpu/jpeg_v2_5.c +++ b/drivers/gpu/drm/amd/amdgpu/jpeg_v2_5.c @@ -551,7 +551,7 @@ static int jpeg_v2_5_set_powergating_state(void *handle, struct amdgpu_device *adev = (struct amdgpu_device *)handle; int ret; - if(state == adev->jpeg.cur_state) + if (state == adev->jpeg.cur_state) return 0; if (state == AMD_PG_STATE_GATE) @@ -559,7 +559,7 @@ static int jpeg_v2_5_set_powergating_state(void *handle, else ret = jpeg_v2_5_start(adev); - if(!ret) + if (!ret) adev->jpeg.cur_state = state; return ret; @@ -754,8 +754,7 @@ static void jpeg_v2_5_set_irq_funcs(struct amdgpu_device *adev) } } -const struct amdgpu_ip_block_version jpeg_v2_5_ip_block = -{ +const struct amdgpu_ip_block_version jpeg_v2_5_ip_block = { .type = AMD_IP_BLOCK_TYPE_JPEG, .major = 2, .minor = 5, @@ -763,8 +762,7 @@ const struct amdgpu_ip_block_version jpeg_v2_5_ip_block = .funcs = &jpeg_v2_5_ip_funcs, }; -const struct amdgpu_ip_block_version jpeg_v2_6_ip_block = -{ +const struct amdgpu_ip_block_version jpeg_v2_6_ip_block = { .type = AMD_IP_BLOCK_TYPE_JPEG, .major = 2, .minor = 6, diff --git a/drivers/gpu/drm/amd/amdgpu/jpeg_v4_0.c b/drivers/gpu/drm/amd/amdgpu/jpeg_v4_0.c index bc38b90f8cf8..88ea58d5c4ab 100644 --- a/drivers/gpu/drm/amd/amdgpu/jpeg_v4_0.c +++ b/drivers/gpu/drm/amd/amdgpu/jpeg_v4_0.c @@ -674,14 +674,6 @@ static int jpeg_v4_0_set_powergating_state(void *handle, return ret; } -static int jpeg_v4_0_set_interrupt_state(struct amdgpu_device *adev, - struct amdgpu_irq_src *source, - unsigned type, - enum amdgpu_interrupt_state state) -{ - return 0; -} - static int jpeg_v4_0_set_ras_interrupt_state(struct amdgpu_device *adev, struct amdgpu_irq_src *source, unsigned int type, @@ -765,7 +757,6 @@ static void jpeg_v4_0_set_dec_ring_funcs(struct amdgpu_device *adev) } static const struct amdgpu_irq_src_funcs jpeg_v4_0_irq_funcs = { - .set = jpeg_v4_0_set_interrupt_state, .process = jpeg_v4_0_process_interrupt, }; diff --git a/drivers/gpu/drm/amd/amdgpu/jpeg_v4_0_3.c b/drivers/gpu/drm/amd/amdgpu/jpeg_v4_0_3.c index 9a8ec4d7e333..32caeb37cef9 100644 --- a/drivers/gpu/drm/amd/amdgpu/jpeg_v4_0_3.c +++ b/drivers/gpu/drm/amd/amdgpu/jpeg_v4_0_3.c @@ -652,11 +652,13 @@ static void jpeg_v4_0_3_dec_ring_set_wptr(struct amdgpu_ring *ring) * * Write a start command to the ring. */ -static void jpeg_v4_0_3_dec_ring_insert_start(struct amdgpu_ring *ring) +void jpeg_v4_0_3_dec_ring_insert_start(struct amdgpu_ring *ring) { - amdgpu_ring_write(ring, PACKETJ(regUVD_JRBC_EXTERNAL_REG_INTERNAL_OFFSET, - 0, 0, PACKETJ_TYPE0)); - amdgpu_ring_write(ring, 0x62a04); /* PCTL0_MMHUB_DEEPSLEEP_IB */ + if (!amdgpu_sriov_vf(ring->adev)) { + amdgpu_ring_write(ring, PACKETJ(regUVD_JRBC_EXTERNAL_REG_INTERNAL_OFFSET, + 0, 0, PACKETJ_TYPE0)); + amdgpu_ring_write(ring, 0x62a04); /* PCTL0_MMHUB_DEEPSLEEP_IB */ + } amdgpu_ring_write(ring, PACKETJ(JRBC_DEC_EXTERNAL_REG_WRITE_ADDR, 0, 0, PACKETJ_TYPE0)); @@ -670,11 +672,13 @@ static void jpeg_v4_0_3_dec_ring_insert_start(struct amdgpu_ring *ring) * * Write a end command to the ring. */ -static void jpeg_v4_0_3_dec_ring_insert_end(struct amdgpu_ring *ring) +void jpeg_v4_0_3_dec_ring_insert_end(struct amdgpu_ring *ring) { - amdgpu_ring_write(ring, PACKETJ(regUVD_JRBC_EXTERNAL_REG_INTERNAL_OFFSET, - 0, 0, PACKETJ_TYPE0)); - amdgpu_ring_write(ring, 0x62a04); + if (!amdgpu_sriov_vf(ring->adev)) { + amdgpu_ring_write(ring, PACKETJ(regUVD_JRBC_EXTERNAL_REG_INTERNAL_OFFSET, + 0, 0, PACKETJ_TYPE0)); + amdgpu_ring_write(ring, 0x62a04); + } amdgpu_ring_write(ring, PACKETJ(JRBC_DEC_EXTERNAL_REG_WRITE_ADDR, 0, 0, PACKETJ_TYPE0)); @@ -691,7 +695,7 @@ static void jpeg_v4_0_3_dec_ring_insert_end(struct amdgpu_ring *ring) * * Write a fence and a trap command to the ring. */ -static void jpeg_v4_0_3_dec_ring_emit_fence(struct amdgpu_ring *ring, u64 addr, u64 seq, +void jpeg_v4_0_3_dec_ring_emit_fence(struct amdgpu_ring *ring, u64 addr, u64 seq, unsigned int flags) { WARN_ON(flags & AMDGPU_FENCE_FLAG_64BIT); @@ -760,7 +764,7 @@ static void jpeg_v4_0_3_dec_ring_emit_fence(struct amdgpu_ring *ring, u64 addr, * * Write ring commands to execute the indirect buffer. */ -static void jpeg_v4_0_3_dec_ring_emit_ib(struct amdgpu_ring *ring, +void jpeg_v4_0_3_dec_ring_emit_ib(struct amdgpu_ring *ring, struct amdgpu_job *job, struct amdgpu_ib *ib, uint32_t flags) @@ -811,7 +815,7 @@ static void jpeg_v4_0_3_dec_ring_emit_ib(struct amdgpu_ring *ring, amdgpu_ring_write(ring, 0x2); } -static void jpeg_v4_0_3_dec_ring_emit_reg_wait(struct amdgpu_ring *ring, uint32_t reg, +void jpeg_v4_0_3_dec_ring_emit_reg_wait(struct amdgpu_ring *ring, uint32_t reg, uint32_t val, uint32_t mask) { uint32_t reg_offset = (reg << 2); @@ -838,7 +842,7 @@ static void jpeg_v4_0_3_dec_ring_emit_reg_wait(struct amdgpu_ring *ring, uint32_ amdgpu_ring_write(ring, mask); } -static void jpeg_v4_0_3_dec_ring_emit_vm_flush(struct amdgpu_ring *ring, +void jpeg_v4_0_3_dec_ring_emit_vm_flush(struct amdgpu_ring *ring, unsigned int vmid, uint64_t pd_addr) { struct amdgpu_vmhub *hub = &ring->adev->vmhub[ring->vm_hub]; @@ -853,7 +857,7 @@ static void jpeg_v4_0_3_dec_ring_emit_vm_flush(struct amdgpu_ring *ring, jpeg_v4_0_3_dec_ring_emit_reg_wait(ring, data0, data1, mask); } -static void jpeg_v4_0_3_dec_ring_emit_wreg(struct amdgpu_ring *ring, uint32_t reg, uint32_t val) +void jpeg_v4_0_3_dec_ring_emit_wreg(struct amdgpu_ring *ring, uint32_t reg, uint32_t val) { uint32_t reg_offset = (reg << 2); @@ -871,7 +875,7 @@ static void jpeg_v4_0_3_dec_ring_emit_wreg(struct amdgpu_ring *ring, uint32_t re amdgpu_ring_write(ring, val); } -static void jpeg_v4_0_3_dec_ring_nop(struct amdgpu_ring *ring, uint32_t count) +void jpeg_v4_0_3_dec_ring_nop(struct amdgpu_ring *ring, uint32_t count) { int i; diff --git a/drivers/gpu/drm/amd/amdgpu/jpeg_v4_0_3.h b/drivers/gpu/drm/amd/amdgpu/jpeg_v4_0_3.h index 22483dc66351..747a3e5f6856 100644 --- a/drivers/gpu/drm/amd/amdgpu/jpeg_v4_0_3.h +++ b/drivers/gpu/drm/amd/amdgpu/jpeg_v4_0_3.h @@ -48,4 +48,19 @@ extern const struct amdgpu_ip_block_version jpeg_v4_0_3_ip_block; +void jpeg_v4_0_3_dec_ring_emit_ib(struct amdgpu_ring *ring, + struct amdgpu_job *job, + struct amdgpu_ib *ib, + uint32_t flags); +void jpeg_v4_0_3_dec_ring_emit_fence(struct amdgpu_ring *ring, u64 addr, u64 seq, + unsigned int flags); +void jpeg_v4_0_3_dec_ring_emit_vm_flush(struct amdgpu_ring *ring, + unsigned int vmid, uint64_t pd_addr); +void jpeg_v4_0_3_dec_ring_nop(struct amdgpu_ring *ring, uint32_t count); +void jpeg_v4_0_3_dec_ring_insert_start(struct amdgpu_ring *ring); +void jpeg_v4_0_3_dec_ring_insert_end(struct amdgpu_ring *ring); +void jpeg_v4_0_3_dec_ring_emit_wreg(struct amdgpu_ring *ring, uint32_t reg, uint32_t val); +void jpeg_v4_0_3_dec_ring_emit_reg_wait(struct amdgpu_ring *ring, uint32_t reg, + uint32_t val, uint32_t mask); + #endif /* __JPEG_V4_0_3_H__ */ diff --git a/drivers/gpu/drm/amd/amdgpu/jpeg_v4_0_5.c b/drivers/gpu/drm/amd/amdgpu/jpeg_v4_0_5.c index 9df011323d4b..edf5bcdd2bc9 100644 --- a/drivers/gpu/drm/amd/amdgpu/jpeg_v4_0_5.c +++ b/drivers/gpu/drm/amd/amdgpu/jpeg_v4_0_5.c @@ -34,7 +34,17 @@ #include "vcn/vcn_4_0_5_sh_mask.h" #include "ivsrcid/vcn/irqsrcs_vcn_4_0.h" -#define regUVD_JPEG_PITCH_INTERNAL_OFFSET 0x401f +#define mmUVD_DPG_LMA_CTL regUVD_DPG_LMA_CTL +#define mmUVD_DPG_LMA_CTL_BASE_IDX regUVD_DPG_LMA_CTL_BASE_IDX +#define mmUVD_DPG_LMA_DATA regUVD_DPG_LMA_DATA +#define mmUVD_DPG_LMA_DATA_BASE_IDX regUVD_DPG_LMA_DATA_BASE_IDX + +#define regUVD_JPEG_PITCH_INTERNAL_OFFSET 0x401f +#define regJPEG_DEC_GFX10_ADDR_CONFIG_INTERNAL_OFFSET 0x4026 +#define regJPEG_SYS_INT_EN_INTERNAL_OFFSET 0x4141 +#define regJPEG_CGC_CTRL_INTERNAL_OFFSET 0x4161 +#define regJPEG_CGC_GATE_INTERNAL_OFFSET 0x4160 +#define regUVD_NO_OP_INTERNAL_OFFSET 0x0029 static void jpeg_v4_0_5_set_dec_ring_funcs(struct amdgpu_device *adev); static void jpeg_v4_0_5_set_irq_funcs(struct amdgpu_device *adev); @@ -43,6 +53,11 @@ static int jpeg_v4_0_5_set_powergating_state(void *handle, static void jpeg_v4_0_5_dec_ring_set_wptr(struct amdgpu_ring *ring); +static int amdgpu_ih_clientid_jpeg[] = { + SOC15_IH_CLIENTID_VCN, + SOC15_IH_CLIENTID_VCN1 +}; + /** * jpeg_v4_0_5_early_init - set function pointers * @@ -54,8 +69,20 @@ static int jpeg_v4_0_5_early_init(void *handle) { struct amdgpu_device *adev = (struct amdgpu_device *)handle; + switch (amdgpu_ip_version(adev, UVD_HWIP, 0)) { + case IP_VERSION(4, 0, 5): + adev->jpeg.num_jpeg_inst = 1; + break; + case IP_VERSION(4, 0, 6): + adev->jpeg.num_jpeg_inst = 2; + break; + default: + DRM_DEV_ERROR(adev->dev, + "Failed to init vcn ip block(UVD_HWIP:0x%x)\n", + amdgpu_ip_version(adev, UVD_HWIP, 0)); + return -EINVAL; + } - adev->jpeg.num_jpeg_inst = 1; adev->jpeg.num_jpeg_rings = 1; jpeg_v4_0_5_set_dec_ring_funcs(adev); @@ -75,25 +102,30 @@ static int jpeg_v4_0_5_sw_init(void *handle) { struct amdgpu_device *adev = (struct amdgpu_device *)handle; struct amdgpu_ring *ring; - int r; - - /* JPEG TRAP */ - r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_VCN, - VCN_4_0__SRCID__JPEG_DECODE, &adev->jpeg.inst->irq); - if (r) - return r; - - /* JPEG DJPEG POISON EVENT */ - r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_VCN, - VCN_4_0__SRCID_DJPEG0_POISON, &adev->jpeg.inst->irq); - if (r) - return r; - - /* JPEG EJPEG POISON EVENT */ - r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_VCN, - VCN_4_0__SRCID_EJPEG0_POISON, &adev->jpeg.inst->irq); - if (r) - return r; + int r, i; + + for (i = 0; i < adev->jpeg.num_jpeg_inst; ++i) { + if (adev->jpeg.harvest_config & (1 << i)) + continue; + + /* JPEG TRAP */ + r = amdgpu_irq_add_id(adev, amdgpu_ih_clientid_jpeg[i], + VCN_4_0__SRCID__JPEG_DECODE, &adev->jpeg.inst[i].irq); + if (r) + return r; + + /* JPEG DJPEG POISON EVENT */ + r = amdgpu_irq_add_id(adev, amdgpu_ih_clientid_jpeg[i], + VCN_4_0__SRCID_DJPEG0_POISON, &adev->jpeg.inst[i].irq); + if (r) + return r; + + /* JPEG EJPEG POISON EVENT */ + r = amdgpu_irq_add_id(adev, amdgpu_ih_clientid_jpeg[i], + VCN_4_0__SRCID_EJPEG0_POISON, &adev->jpeg.inst[i].irq); + if (r) + return r; + } r = amdgpu_jpeg_sw_init(adev); if (r) @@ -103,21 +135,23 @@ static int jpeg_v4_0_5_sw_init(void *handle) if (r) return r; - ring = adev->jpeg.inst->ring_dec; - ring->use_doorbell = true; - ring->doorbell_index = amdgpu_sriov_vf(adev) ? - (((adev->doorbell_index.vcn.vcn_ring0_1) << 1) + 4) : - ((adev->doorbell_index.vcn.vcn_ring0_1 << 1) + 1); - ring->vm_hub = AMDGPU_MMHUB0(0); - - sprintf(ring->name, "jpeg_dec"); - r = amdgpu_ring_init(adev, ring, 512, &adev->jpeg.inst->irq, 0, - AMDGPU_RING_PRIO_DEFAULT, NULL); - if (r) - return r; - - adev->jpeg.internal.jpeg_pitch[0] = regUVD_JPEG_PITCH_INTERNAL_OFFSET; - adev->jpeg.inst->external.jpeg_pitch[0] = SOC15_REG_OFFSET(JPEG, 0, regUVD_JPEG_PITCH); + for (i = 0; i < adev->jpeg.num_jpeg_inst; ++i) { + if (adev->jpeg.harvest_config & (1 << i)) + continue; + + ring = adev->jpeg.inst[i].ring_dec; + ring->use_doorbell = true; + ring->vm_hub = AMDGPU_MMHUB0(0); + ring->doorbell_index = (adev->doorbell_index.vcn.vcn_ring0_1 << 1) + 1 + 8 * i; + sprintf(ring->name, "jpeg_dec_%d", i); + r = amdgpu_ring_init(adev, ring, 512, &adev->jpeg.inst[i].irq, + 0, AMDGPU_RING_PRIO_DEFAULT, NULL); + if (r) + return r; + + adev->jpeg.internal.jpeg_pitch[0] = regUVD_JPEG_PITCH_INTERNAL_OFFSET; + adev->jpeg.inst[i].external.jpeg_pitch[0] = SOC15_REG_OFFSET(JPEG, i, regUVD_JPEG_PITCH); + } return 0; } @@ -152,21 +186,27 @@ static int jpeg_v4_0_5_sw_fini(void *handle) static int jpeg_v4_0_5_hw_init(void *handle) { struct amdgpu_device *adev = (struct amdgpu_device *)handle; - struct amdgpu_ring *ring = adev->jpeg.inst->ring_dec; - int r; + struct amdgpu_ring *ring; + int r, i; - adev->nbio.funcs->vcn_doorbell_range(adev, ring->use_doorbell, - (adev->doorbell_index.vcn.vcn_ring0_1 << 1), 0); + // TODO: Enable ring test with DPG support + if (adev->pg_flags & AMD_PG_SUPPORT_JPEG_DPG) { + DRM_DEV_INFO(adev->dev, "JPEG decode initialized successfully under DPG Mode"); + return 0; + } - WREG32_SOC15(VCN, 0, regVCN_JPEG_DB_CTRL, - ring->doorbell_index << VCN_JPEG_DB_CTRL__OFFSET__SHIFT | - VCN_JPEG_DB_CTRL__EN_MASK); + for (i = 0; i < adev->jpeg.num_jpeg_inst; ++i) { + if (adev->jpeg.harvest_config & (1 << i)) + continue; - r = amdgpu_ring_test_helper(ring); - if (r) - return r; + ring = adev->jpeg.inst[i].ring_dec; + r = amdgpu_ring_test_helper(ring); + if (r) + return r; + } - DRM_DEV_INFO(adev->dev, "JPEG decode initialized successfully.\n"); + if (!r) + DRM_INFO("JPEG decode initialized successfully under SPG Mode\n"); return 0; } @@ -181,15 +221,20 @@ static int jpeg_v4_0_5_hw_init(void *handle) static int jpeg_v4_0_5_hw_fini(void *handle) { struct amdgpu_device *adev = (struct amdgpu_device *)handle; + int i; cancel_delayed_work_sync(&adev->vcn.idle_work); - if (!amdgpu_sriov_vf(adev)) { - if (adev->jpeg.cur_state != AMD_PG_STATE_GATE && - RREG32_SOC15(JPEG, 0, regUVD_JRBC_STATUS)) - jpeg_v4_0_5_set_powergating_state(adev, AMD_PG_STATE_GATE); - } - amdgpu_irq_put(adev, &adev->jpeg.inst->irq, 0); + for (i = 0; i < adev->jpeg.num_jpeg_inst; ++i) { + if (adev->jpeg.harvest_config & (1 << i)) + continue; + + if (!amdgpu_sriov_vf(adev)) { + if (adev->jpeg.cur_state != AMD_PG_STATE_GATE && + RREG32_SOC15(JPEG, i, regUVD_JRBC_STATUS)) + jpeg_v4_0_5_set_powergating_state(adev, AMD_PG_STATE_GATE); + } + } return 0; } @@ -235,11 +280,11 @@ static int jpeg_v4_0_5_resume(void *handle) return r; } -static void jpeg_v4_0_5_disable_clock_gating(struct amdgpu_device *adev) +static void jpeg_v4_0_5_disable_clock_gating(struct amdgpu_device *adev, int inst) { uint32_t data = 0; - data = RREG32_SOC15(JPEG, 0, regJPEG_CGC_CTRL); + data = RREG32_SOC15(JPEG, inst, regJPEG_CGC_CTRL); if (adev->cg_flags & AMD_CG_SUPPORT_JPEG_MGCG) { data |= 1 << JPEG_CGC_CTRL__DYN_CLOCK_MODE__SHIFT; data &= (~JPEG_CGC_CTRL__JPEG_DEC_MODE_MASK); @@ -249,21 +294,21 @@ static void jpeg_v4_0_5_disable_clock_gating(struct amdgpu_device *adev) data |= 1 << JPEG_CGC_CTRL__CLK_GATE_DLY_TIMER__SHIFT; data |= 4 << JPEG_CGC_CTRL__CLK_OFF_DELAY__SHIFT; - WREG32_SOC15(JPEG, 0, regJPEG_CGC_CTRL, data); + WREG32_SOC15(JPEG, inst, regJPEG_CGC_CTRL, data); - data = RREG32_SOC15(JPEG, 0, regJPEG_CGC_GATE); + data = RREG32_SOC15(JPEG, inst, regJPEG_CGC_GATE); data &= ~(JPEG_CGC_GATE__JPEG_DEC_MASK | JPEG_CGC_GATE__JPEG2_DEC_MASK | JPEG_CGC_GATE__JMCIF_MASK | JPEG_CGC_GATE__JRBBM_MASK); - WREG32_SOC15(JPEG, 0, regJPEG_CGC_GATE, data); + WREG32_SOC15(JPEG, inst, regJPEG_CGC_GATE, data); } -static void jpeg_v4_0_5_enable_clock_gating(struct amdgpu_device *adev) +static void jpeg_v4_0_5_enable_clock_gating(struct amdgpu_device *adev, int inst) { uint32_t data = 0; - data = RREG32_SOC15(JPEG, 0, regJPEG_CGC_CTRL); + data = RREG32_SOC15(JPEG, inst, regJPEG_CGC_CTRL); if (adev->cg_flags & AMD_CG_SUPPORT_JPEG_MGCG) { data |= 1 << JPEG_CGC_CTRL__DYN_CLOCK_MODE__SHIFT; data |= JPEG_CGC_CTRL__JPEG_DEC_MODE_MASK; @@ -273,47 +318,66 @@ static void jpeg_v4_0_5_enable_clock_gating(struct amdgpu_device *adev) data |= 1 << JPEG_CGC_CTRL__CLK_GATE_DLY_TIMER__SHIFT; data |= 4 << JPEG_CGC_CTRL__CLK_OFF_DELAY__SHIFT; - WREG32_SOC15(JPEG, 0, regJPEG_CGC_CTRL, data); + WREG32_SOC15(JPEG, inst, regJPEG_CGC_CTRL, data); - data = RREG32_SOC15(JPEG, 0, regJPEG_CGC_GATE); + data = RREG32_SOC15(JPEG, inst, regJPEG_CGC_GATE); data |= (JPEG_CGC_GATE__JPEG_DEC_MASK |JPEG_CGC_GATE__JPEG2_DEC_MASK |JPEG_CGC_GATE__JMCIF_MASK |JPEG_CGC_GATE__JRBBM_MASK); - WREG32_SOC15(JPEG, 0, regJPEG_CGC_GATE, data); + WREG32_SOC15(JPEG, inst, regJPEG_CGC_GATE, data); } -static int jpeg_v4_0_5_disable_static_power_gating(struct amdgpu_device *adev) +static void jpeg_engine_4_0_5_dpg_clock_gating_mode(struct amdgpu_device *adev, + int inst_idx, uint8_t indirect) +{ + uint32_t data = 0; + + if (adev->cg_flags & AMD_CG_SUPPORT_JPEG_MGCG) + data |= 1 << JPEG_CGC_CTRL__DYN_CLOCK_MODE__SHIFT; + else + data |= 0 << JPEG_CGC_CTRL__DYN_CLOCK_MODE__SHIFT; + + data |= 1 << JPEG_CGC_CTRL__CLK_GATE_DLY_TIMER__SHIFT; + data |= 4 << JPEG_CGC_CTRL__CLK_OFF_DELAY__SHIFT; + WREG32_SOC15_JPEG_DPG_MODE(inst_idx, regJPEG_CGC_CTRL_INTERNAL_OFFSET, data, indirect); + + data = 0; + WREG32_SOC15_JPEG_DPG_MODE(inst_idx, regJPEG_CGC_GATE_INTERNAL_OFFSET, + data, indirect); +} + +static int jpeg_v4_0_5_disable_static_power_gating(struct amdgpu_device *adev, int inst) { if (adev->pg_flags & AMD_PG_SUPPORT_JPEG) { - WREG32(SOC15_REG_OFFSET(JPEG, 0, regUVD_IPX_DLDO_CONFIG), + WREG32(SOC15_REG_OFFSET(JPEG, inst, regUVD_IPX_DLDO_CONFIG), 1 << UVD_IPX_DLDO_CONFIG__ONO1_PWR_CONFIG__SHIFT); - SOC15_WAIT_ON_RREG(JPEG, 0, regUVD_IPX_DLDO_STATUS, + SOC15_WAIT_ON_RREG(JPEG, inst, regUVD_IPX_DLDO_STATUS, 0, UVD_IPX_DLDO_STATUS__ONO1_PWR_STATUS_MASK); } /* disable anti hang mechanism */ - WREG32_P(SOC15_REG_OFFSET(JPEG, 0, regUVD_JPEG_POWER_STATUS), 0, + WREG32_P(SOC15_REG_OFFSET(JPEG, inst, regUVD_JPEG_POWER_STATUS), 0, ~UVD_JPEG_POWER_STATUS__JPEG_POWER_STATUS_MASK); /* keep the JPEG in static PG mode */ - WREG32_P(SOC15_REG_OFFSET(JPEG, 0, regUVD_JPEG_POWER_STATUS), 0, + WREG32_P(SOC15_REG_OFFSET(JPEG, inst, regUVD_JPEG_POWER_STATUS), 0, ~UVD_JPEG_POWER_STATUS__JPEG_PG_MODE_MASK); return 0; } -static int jpeg_v4_0_5_enable_static_power_gating(struct amdgpu_device *adev) +static int jpeg_v4_0_5_enable_static_power_gating(struct amdgpu_device *adev, int inst) { /* enable anti hang mechanism */ - WREG32_P(SOC15_REG_OFFSET(JPEG, 0, regUVD_JPEG_POWER_STATUS), + WREG32_P(SOC15_REG_OFFSET(JPEG, inst, regUVD_JPEG_POWER_STATUS), UVD_JPEG_POWER_STATUS__JPEG_POWER_STATUS_MASK, ~UVD_JPEG_POWER_STATUS__JPEG_POWER_STATUS_MASK); if (adev->pg_flags & AMD_PG_SUPPORT_JPEG) { - WREG32(SOC15_REG_OFFSET(JPEG, 0, regUVD_IPX_DLDO_CONFIG), + WREG32(SOC15_REG_OFFSET(JPEG, inst, regUVD_IPX_DLDO_CONFIG), 2 << UVD_IPX_DLDO_CONFIG__ONO1_PWR_CONFIG__SHIFT); - SOC15_WAIT_ON_RREG(JPEG, 0, regUVD_IPX_DLDO_STATUS, + SOC15_WAIT_ON_RREG(JPEG, inst, regUVD_IPX_DLDO_STATUS, 1 << UVD_IPX_DLDO_STATUS__ONO1_PWR_STATUS__SHIFT, UVD_IPX_DLDO_STATUS__ONO1_PWR_STATUS_MASK); } @@ -322,53 +386,153 @@ static int jpeg_v4_0_5_enable_static_power_gating(struct amdgpu_device *adev) } /** - * jpeg_v4_0_5_start - start JPEG block + * jpeg_v4_0_5_start_dpg_mode - Jpeg start with dpg mode * * @adev: amdgpu_device pointer + * @inst_idx: instance number index + * @indirect: indirectly write sram * - * Setup and start the JPEG block + * Start JPEG block with dpg mode */ -static int jpeg_v4_0_5_start(struct amdgpu_device *adev) +static void jpeg_v4_0_5_start_dpg_mode(struct amdgpu_device *adev, int inst_idx, bool indirect) { - struct amdgpu_ring *ring = adev->jpeg.inst->ring_dec; - int r; - - if (adev->pm.dpm_enabled) - amdgpu_dpm_enable_jpeg(adev, true); + struct amdgpu_ring *ring = adev->jpeg.inst[inst_idx].ring_dec; + uint32_t reg_data = 0; - /* disable power gating */ - r = jpeg_v4_0_5_disable_static_power_gating(adev); - if (r) - return r; + /* enable anti hang mechanism */ + reg_data = RREG32_SOC15(JPEG, inst_idx, regUVD_JPEG_POWER_STATUS); + reg_data &= ~UVD_JPEG_POWER_STATUS__JPEG_POWER_STATUS_MASK; + reg_data |= 0x1; + WREG32_SOC15(JPEG, inst_idx, regUVD_JPEG_POWER_STATUS, reg_data); - /* JPEG disable CGC */ - jpeg_v4_0_5_disable_clock_gating(adev); + if (adev->pg_flags & AMD_PG_SUPPORT_JPEG) { + WREG32(SOC15_REG_OFFSET(JPEG, inst_idx, regUVD_IPX_DLDO_CONFIG), + 2 << UVD_IPX_DLDO_CONFIG__ONO1_PWR_CONFIG__SHIFT); + SOC15_WAIT_ON_RREG(JPEG, inst_idx, regUVD_IPX_DLDO_STATUS, + 1 << UVD_IPX_DLDO_STATUS__ONO1_PWR_STATUS__SHIFT, + UVD_IPX_DLDO_STATUS__ONO1_PWR_STATUS_MASK); + } - /* MJPEG global tiling registers */ - WREG32_SOC15(JPEG, 0, regJPEG_DEC_GFX10_ADDR_CONFIG, - adev->gfx.config.gb_addr_config); + reg_data = RREG32_SOC15(JPEG, inst_idx, regUVD_JPEG_POWER_STATUS); + reg_data |= UVD_JPEG_POWER_STATUS__JPEG_PG_MODE_MASK; + WREG32_SOC15(JPEG, inst_idx, regUVD_JPEG_POWER_STATUS, reg_data); + if (indirect) + adev->jpeg.inst[inst_idx].dpg_sram_curr_addr = + (uint32_t *)adev->jpeg.inst[inst_idx].dpg_sram_cpu_addr; - /* enable JMI channel */ - WREG32_P(SOC15_REG_OFFSET(JPEG, 0, regUVD_JMI_CNTL), 0, - ~UVD_JMI_CNTL__SOFT_RESET_MASK); + jpeg_engine_4_0_5_dpg_clock_gating_mode(adev, inst_idx, indirect); + /* MJPEG global tiling registers */ + WREG32_SOC15_JPEG_DPG_MODE(inst_idx, regJPEG_DEC_GFX10_ADDR_CONFIG_INTERNAL_OFFSET, + adev->gfx.config.gb_addr_config, indirect); /* enable System Interrupt for JRBC */ - WREG32_P(SOC15_REG_OFFSET(JPEG, 0, regJPEG_SYS_INT_EN), - JPEG_SYS_INT_EN__DJRBC_MASK, - ~JPEG_SYS_INT_EN__DJRBC_MASK); + WREG32_SOC15_JPEG_DPG_MODE(inst_idx, regJPEG_SYS_INT_EN_INTERNAL_OFFSET, + JPEG_SYS_INT_EN__DJRBC_MASK, indirect); - WREG32_SOC15(JPEG, 0, regUVD_LMI_JRBC_RB_VMID, 0); - WREG32_SOC15(JPEG, 0, regUVD_JRBC_RB_CNTL, (0x00000001L | 0x00000002L)); - WREG32_SOC15(JPEG, 0, regUVD_LMI_JRBC_RB_64BIT_BAR_LOW, + /* add nop to workaround PSP size check */ + WREG32_SOC15_JPEG_DPG_MODE(inst_idx, regUVD_NO_OP_INTERNAL_OFFSET, 0, indirect); + + if (indirect) + amdgpu_jpeg_psp_update_sram(adev, inst_idx, 0); + + WREG32_SOC15(JPEG, inst_idx, regUVD_LMI_JRBC_RB_VMID, 0); + WREG32_SOC15(JPEG, inst_idx, regUVD_JRBC_RB_CNTL, (0x00000001L | 0x00000002L)); + WREG32_SOC15(JPEG, inst_idx, regUVD_LMI_JRBC_RB_64BIT_BAR_LOW, lower_32_bits(ring->gpu_addr)); - WREG32_SOC15(JPEG, 0, regUVD_LMI_JRBC_RB_64BIT_BAR_HIGH, + WREG32_SOC15(JPEG, inst_idx, regUVD_LMI_JRBC_RB_64BIT_BAR_HIGH, upper_32_bits(ring->gpu_addr)); - WREG32_SOC15(JPEG, 0, regUVD_JRBC_RB_RPTR, 0); - WREG32_SOC15(JPEG, 0, regUVD_JRBC_RB_WPTR, 0); - WREG32_SOC15(JPEG, 0, regUVD_JRBC_RB_CNTL, 0x00000002L); - WREG32_SOC15(JPEG, 0, regUVD_JRBC_RB_SIZE, ring->ring_size / 4); - ring->wptr = RREG32_SOC15(JPEG, 0, regUVD_JRBC_RB_WPTR); + WREG32_SOC15(JPEG, inst_idx, regUVD_JRBC_RB_RPTR, 0); + WREG32_SOC15(JPEG, inst_idx, regUVD_JRBC_RB_WPTR, 0); + WREG32_SOC15(JPEG, inst_idx, regUVD_JRBC_RB_CNTL, 0x00000002L); + WREG32_SOC15(JPEG, inst_idx, regUVD_JRBC_RB_SIZE, ring->ring_size / 4); + ring->wptr = RREG32_SOC15(JPEG, inst_idx, regUVD_JRBC_RB_WPTR); +} + +/** + * jpeg_v4_0_5_stop_dpg_mode - Jpeg stop with dpg mode + * + * @adev: amdgpu_device pointer + * @inst_idx: instance number index + * + * Stop JPEG block with dpg mode + */ +static void jpeg_v4_0_5_stop_dpg_mode(struct amdgpu_device *adev, int inst_idx) +{ + uint32_t reg_data = 0; + + reg_data = RREG32_SOC15(JPEG, inst_idx, regUVD_JPEG_POWER_STATUS); + reg_data &= ~UVD_JPEG_POWER_STATUS__JPEG_PG_MODE_MASK; + WREG32_SOC15(JPEG, inst_idx, regUVD_JPEG_POWER_STATUS, reg_data); + +} + +/** + * jpeg_v4_0_5_start - start JPEG block + * + * @adev: amdgpu_device pointer + * + * Setup and start the JPEG block + */ +static int jpeg_v4_0_5_start(struct amdgpu_device *adev) +{ + struct amdgpu_ring *ring; + int r, i; + + if (adev->pm.dpm_enabled) + amdgpu_dpm_enable_jpeg(adev, true); + + for (i = 0; i < adev->jpeg.num_jpeg_inst; ++i) { + if (adev->jpeg.harvest_config & (1 << i)) + continue; + + ring = adev->jpeg.inst[i].ring_dec; + /* doorbell programming is done for every playback */ + adev->nbio.funcs->vcn_doorbell_range(adev, ring->use_doorbell, + (adev->doorbell_index.vcn.vcn_ring0_1 << 1) + 8 * i, i); + + WREG32_SOC15(VCN, i, regVCN_JPEG_DB_CTRL, + ring->doorbell_index << VCN_JPEG_DB_CTRL__OFFSET__SHIFT | + VCN_JPEG_DB_CTRL__EN_MASK); + + if (adev->pg_flags & AMD_PG_SUPPORT_JPEG_DPG) { + jpeg_v4_0_5_start_dpg_mode(adev, i, adev->jpeg.indirect_sram); + continue; + } + + /* disable power gating */ + r = jpeg_v4_0_5_disable_static_power_gating(adev, i); + if (r) + return r; + + /* JPEG disable CGC */ + jpeg_v4_0_5_disable_clock_gating(adev, i); + + /* MJPEG global tiling registers */ + WREG32_SOC15(JPEG, i, regJPEG_DEC_GFX10_ADDR_CONFIG, + adev->gfx.config.gb_addr_config); + + /* enable JMI channel */ + WREG32_P(SOC15_REG_OFFSET(JPEG, i, regUVD_JMI_CNTL), 0, + ~UVD_JMI_CNTL__SOFT_RESET_MASK); + + /* enable System Interrupt for JRBC */ + WREG32_P(SOC15_REG_OFFSET(JPEG, i, regJPEG_SYS_INT_EN), + JPEG_SYS_INT_EN__DJRBC_MASK, + ~JPEG_SYS_INT_EN__DJRBC_MASK); + + WREG32_SOC15(JPEG, i, regUVD_LMI_JRBC_RB_VMID, 0); + WREG32_SOC15(JPEG, i, regUVD_JRBC_RB_CNTL, (0x00000001L | 0x00000002L)); + WREG32_SOC15(JPEG, i, regUVD_LMI_JRBC_RB_64BIT_BAR_LOW, + lower_32_bits(ring->gpu_addr)); + WREG32_SOC15(JPEG, i, regUVD_LMI_JRBC_RB_64BIT_BAR_HIGH, + upper_32_bits(ring->gpu_addr)); + WREG32_SOC15(JPEG, i, regUVD_JRBC_RB_RPTR, 0); + WREG32_SOC15(JPEG, i, regUVD_JRBC_RB_WPTR, 0); + WREG32_SOC15(JPEG, i, regUVD_JRBC_RB_CNTL, 0x00000002L); + WREG32_SOC15(JPEG, i, regUVD_JRBC_RB_SIZE, ring->ring_size / 4); + ring->wptr = RREG32_SOC15(JPEG, i, regUVD_JRBC_RB_WPTR); + } return 0; } @@ -382,20 +546,29 @@ static int jpeg_v4_0_5_start(struct amdgpu_device *adev) */ static int jpeg_v4_0_5_stop(struct amdgpu_device *adev) { - int r; + int r, i; - /* reset JMI */ - WREG32_P(SOC15_REG_OFFSET(JPEG, 0, regUVD_JMI_CNTL), - UVD_JMI_CNTL__SOFT_RESET_MASK, - ~UVD_JMI_CNTL__SOFT_RESET_MASK); + for (i = 0; i < adev->jpeg.num_jpeg_inst; ++i) { + if (adev->jpeg.harvest_config & (1 << i)) + continue; - jpeg_v4_0_5_enable_clock_gating(adev); + if (adev->pg_flags & AMD_PG_SUPPORT_JPEG_DPG) { + jpeg_v4_0_5_stop_dpg_mode(adev, i); + continue; + } - /* enable power gating */ - r = jpeg_v4_0_5_enable_static_power_gating(adev); - if (r) - return r; + /* reset JMI */ + WREG32_P(SOC15_REG_OFFSET(JPEG, i, regUVD_JMI_CNTL), + UVD_JMI_CNTL__SOFT_RESET_MASK, + ~UVD_JMI_CNTL__SOFT_RESET_MASK); + jpeg_v4_0_5_enable_clock_gating(adev, i); + + /* enable power gating */ + r = jpeg_v4_0_5_enable_static_power_gating(adev, i); + if (r) + return r; + } if (adev->pm.dpm_enabled) amdgpu_dpm_enable_jpeg(adev, false); @@ -413,7 +586,7 @@ static uint64_t jpeg_v4_0_5_dec_ring_get_rptr(struct amdgpu_ring *ring) { struct amdgpu_device *adev = ring->adev; - return RREG32_SOC15(JPEG, 0, regUVD_JRBC_RB_RPTR); + return RREG32_SOC15(JPEG, ring->me, regUVD_JRBC_RB_RPTR); } /** @@ -430,7 +603,7 @@ static uint64_t jpeg_v4_0_5_dec_ring_get_wptr(struct amdgpu_ring *ring) if (ring->use_doorbell) return *ring->wptr_cpu_addr; else - return RREG32_SOC15(JPEG, 0, regUVD_JRBC_RB_WPTR); + return RREG32_SOC15(JPEG, ring->me, regUVD_JRBC_RB_WPTR); } /** @@ -448,29 +621,41 @@ static void jpeg_v4_0_5_dec_ring_set_wptr(struct amdgpu_ring *ring) *ring->wptr_cpu_addr = lower_32_bits(ring->wptr); WDOORBELL32(ring->doorbell_index, lower_32_bits(ring->wptr)); } else { - WREG32_SOC15(JPEG, 0, regUVD_JRBC_RB_WPTR, lower_32_bits(ring->wptr)); + WREG32_SOC15(JPEG, ring->me, regUVD_JRBC_RB_WPTR, lower_32_bits(ring->wptr)); } } static bool jpeg_v4_0_5_is_idle(void *handle) { struct amdgpu_device *adev = (struct amdgpu_device *)handle; - int ret = 1; + int i, ret = 1; - ret &= (((RREG32_SOC15(JPEG, 0, regUVD_JRBC_STATUS) & - UVD_JRBC_STATUS__RB_JOB_DONE_MASK) == - UVD_JRBC_STATUS__RB_JOB_DONE_MASK)); + for (i = 0; i < adev->jpeg.num_jpeg_inst; ++i) { + if (adev->jpeg.harvest_config & (1 << i)) + continue; + ret &= (((RREG32_SOC15(JPEG, i, regUVD_JRBC_STATUS) & + UVD_JRBC_STATUS__RB_JOB_DONE_MASK) == + UVD_JRBC_STATUS__RB_JOB_DONE_MASK)); + } return ret; } static int jpeg_v4_0_5_wait_for_idle(void *handle) { struct amdgpu_device *adev = (struct amdgpu_device *)handle; + int i; + + for (i = 0; i < adev->jpeg.num_jpeg_inst; ++i) { + if (adev->jpeg.harvest_config & (1 << i)) + continue; + + return SOC15_WAIT_ON_RREG(JPEG, i, regUVD_JRBC_STATUS, + UVD_JRBC_STATUS__RB_JOB_DONE_MASK, + UVD_JRBC_STATUS__RB_JOB_DONE_MASK); + } - return SOC15_WAIT_ON_RREG(JPEG, 0, regUVD_JRBC_STATUS, - UVD_JRBC_STATUS__RB_JOB_DONE_MASK, - UVD_JRBC_STATUS__RB_JOB_DONE_MASK); + return 0; } static int jpeg_v4_0_5_set_clockgating_state(void *handle, @@ -478,13 +663,20 @@ static int jpeg_v4_0_5_set_clockgating_state(void *handle, { struct amdgpu_device *adev = (struct amdgpu_device *)handle; bool enable = (state == AMD_CG_STATE_GATE) ? true : false; + int i; - if (enable) { - if (!jpeg_v4_0_5_is_idle(handle)) - return -EBUSY; - jpeg_v4_0_5_enable_clock_gating(adev); - } else { - jpeg_v4_0_5_disable_clock_gating(adev); + for (i = 0; i < adev->jpeg.num_jpeg_inst; ++i) { + if (adev->jpeg.harvest_config & (1 << i)) + continue; + + if (enable) { + if (!jpeg_v4_0_5_is_idle(handle)) + return -EBUSY; + + jpeg_v4_0_5_enable_clock_gating(adev, i); + } else { + jpeg_v4_0_5_disable_clock_gating(adev, i); + } } return 0; @@ -515,23 +707,29 @@ static int jpeg_v4_0_5_set_powergating_state(void *handle, return ret; } -static int jpeg_v4_0_5_set_interrupt_state(struct amdgpu_device *adev, - struct amdgpu_irq_src *source, - unsigned type, - enum amdgpu_interrupt_state state) -{ - return 0; -} - static int jpeg_v4_0_5_process_interrupt(struct amdgpu_device *adev, struct amdgpu_irq_src *source, struct amdgpu_iv_entry *entry) { + uint32_t ip_instance; + DRM_DEBUG("IH: JPEG TRAP\n"); + switch (entry->client_id) { + case SOC15_IH_CLIENTID_VCN: + ip_instance = 0; + break; + case SOC15_IH_CLIENTID_VCN1: + ip_instance = 1; + break; + default: + DRM_ERROR("Unhandled client id: %d\n", entry->client_id); + return 0; + } + switch (entry->src_id) { case VCN_4_0__SRCID__JPEG_DECODE: - amdgpu_fence_process(adev->jpeg.inst->ring_dec); + amdgpu_fence_process(adev->jpeg.inst[ip_instance].ring_dec); break; case VCN_4_0__SRCID_DJPEG0_POISON: case VCN_4_0__SRCID_EJPEG0_POISON: @@ -597,19 +795,33 @@ static const struct amdgpu_ring_funcs jpeg_v4_0_5_dec_ring_vm_funcs = { static void jpeg_v4_0_5_set_dec_ring_funcs(struct amdgpu_device *adev) { - adev->jpeg.inst->ring_dec->funcs = &jpeg_v4_0_5_dec_ring_vm_funcs; - DRM_DEV_INFO(adev->dev, "JPEG decode is enabled in VM mode\n"); + int i; + + for (i = 0; i < adev->jpeg.num_jpeg_inst; ++i) { + if (adev->jpeg.harvest_config & (1 << i)) + continue; + + adev->jpeg.inst[i].ring_dec->funcs = &jpeg_v4_0_5_dec_ring_vm_funcs; + adev->jpeg.inst[i].ring_dec->me = i; + DRM_DEV_INFO(adev->dev, "JPEG%d decode is enabled in VM mode\n", i); + } } static const struct amdgpu_irq_src_funcs jpeg_v4_0_5_irq_funcs = { - .set = jpeg_v4_0_5_set_interrupt_state, .process = jpeg_v4_0_5_process_interrupt, }; static void jpeg_v4_0_5_set_irq_funcs(struct amdgpu_device *adev) { - adev->jpeg.inst->irq.num_types = 1; - adev->jpeg.inst->irq.funcs = &jpeg_v4_0_5_irq_funcs; + int i; + + for (i = 0; i < adev->jpeg.num_jpeg_inst; ++i) { + if (adev->jpeg.harvest_config & (1 << i)) + continue; + + adev->jpeg.inst[i].irq.num_types = 1; + adev->jpeg.inst[i].irq.funcs = &jpeg_v4_0_5_irq_funcs; + } } const struct amdgpu_ip_block_version jpeg_v4_0_5_ip_block = { diff --git a/drivers/gpu/drm/amd/amdgpu/jpeg_v5_0_0.c b/drivers/gpu/drm/amd/amdgpu/jpeg_v5_0_0.c new file mode 100644 index 000000000000..e70200f97555 --- /dev/null +++ b/drivers/gpu/drm/amd/amdgpu/jpeg_v5_0_0.c @@ -0,0 +1,570 @@ +/* + * Copyright 2023 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + */ + +#include "amdgpu.h" +#include "amdgpu_jpeg.h" +#include "amdgpu_pm.h" +#include "soc15.h" +#include "soc15d.h" +#include "jpeg_v4_0_3.h" + +#include "vcn/vcn_5_0_0_offset.h" +#include "vcn/vcn_5_0_0_sh_mask.h" +#include "ivsrcid/vcn/irqsrcs_vcn_4_0.h" + +static void jpeg_v5_0_0_set_dec_ring_funcs(struct amdgpu_device *adev); +static void jpeg_v5_0_0_set_irq_funcs(struct amdgpu_device *adev); +static int jpeg_v5_0_0_set_powergating_state(void *handle, + enum amd_powergating_state state); + +/** + * jpeg_v5_0_0_early_init - set function pointers + * + * @handle: amdgpu_device pointer + * + * Set ring and irq function pointers + */ +static int jpeg_v5_0_0_early_init(void *handle) +{ + struct amdgpu_device *adev = (struct amdgpu_device *)handle; + + adev->jpeg.num_jpeg_inst = 1; + adev->jpeg.num_jpeg_rings = 1; + + jpeg_v5_0_0_set_dec_ring_funcs(adev); + jpeg_v5_0_0_set_irq_funcs(adev); + + return 0; +} + +/** + * jpeg_v5_0_0_sw_init - sw init for JPEG block + * + * @handle: amdgpu_device pointer + * + * Load firmware and sw initialization + */ +static int jpeg_v5_0_0_sw_init(void *handle) +{ + struct amdgpu_device *adev = (struct amdgpu_device *)handle; + struct amdgpu_ring *ring; + int r; + + /* JPEG TRAP */ + r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_VCN, + VCN_4_0__SRCID__JPEG_DECODE, &adev->jpeg.inst->irq); + if (r) + return r; + + r = amdgpu_jpeg_sw_init(adev); + if (r) + return r; + + r = amdgpu_jpeg_resume(adev); + if (r) + return r; + + ring = adev->jpeg.inst->ring_dec; + ring->use_doorbell = true; + ring->doorbell_index = (adev->doorbell_index.vcn.vcn_ring0_1 << 1) + 1; + ring->vm_hub = AMDGPU_MMHUB0(0); + + sprintf(ring->name, "jpeg_dec"); + r = amdgpu_ring_init(adev, ring, 512, &adev->jpeg.inst->irq, 0, + AMDGPU_RING_PRIO_DEFAULT, NULL); + if (r) + return r; + + adev->jpeg.internal.jpeg_pitch[0] = regUVD_JPEG_PITCH_INTERNAL_OFFSET; + adev->jpeg.inst->external.jpeg_pitch[0] = SOC15_REG_OFFSET(JPEG, 0, regUVD_JPEG_PITCH); + + return 0; +} + +/** + * jpeg_v5_0_0_sw_fini - sw fini for JPEG block + * + * @handle: amdgpu_device pointer + * + * JPEG suspend and free up sw allocation + */ +static int jpeg_v5_0_0_sw_fini(void *handle) +{ + struct amdgpu_device *adev = (struct amdgpu_device *)handle; + int r; + + r = amdgpu_jpeg_suspend(adev); + if (r) + return r; + + r = amdgpu_jpeg_sw_fini(adev); + + return r; +} + +/** + * jpeg_v5_0_0_hw_init - start and test JPEG block + * + * @handle: amdgpu_device pointer + * + */ +static int jpeg_v5_0_0_hw_init(void *handle) +{ + struct amdgpu_device *adev = (struct amdgpu_device *)handle; + struct amdgpu_ring *ring = adev->jpeg.inst->ring_dec; + int r; + + adev->nbio.funcs->vcn_doorbell_range(adev, ring->use_doorbell, + (adev->doorbell_index.vcn.vcn_ring0_1 << 1), 0); + + WREG32_SOC15(VCN, 0, regVCN_JPEG_DB_CTRL, + ring->doorbell_index << VCN_JPEG_DB_CTRL__OFFSET__SHIFT | + VCN_JPEG_DB_CTRL__EN_MASK); + + r = amdgpu_ring_test_helper(ring); + if (r) + return r; + + DRM_DEV_INFO(adev->dev, "JPEG decode initialized successfully.\n"); + + return 0; +} + +/** + * jpeg_v5_0_0_hw_fini - stop the hardware block + * + * @handle: amdgpu_device pointer + * + * Stop the JPEG block, mark ring as not ready any more + */ +static int jpeg_v5_0_0_hw_fini(void *handle) +{ + struct amdgpu_device *adev = (struct amdgpu_device *)handle; + + cancel_delayed_work_sync(&adev->vcn.idle_work); + + if (adev->jpeg.cur_state != AMD_PG_STATE_GATE && + RREG32_SOC15(JPEG, 0, regUVD_JRBC_STATUS)) + jpeg_v5_0_0_set_powergating_state(adev, AMD_PG_STATE_GATE); + + return 0; +} + +/** + * jpeg_v5_0_0_suspend - suspend JPEG block + * + * @handle: amdgpu_device pointer + * + * HW fini and suspend JPEG block + */ +static int jpeg_v5_0_0_suspend(void *handle) +{ + struct amdgpu_device *adev = (struct amdgpu_device *)handle; + int r; + + r = jpeg_v5_0_0_hw_fini(adev); + if (r) + return r; + + r = amdgpu_jpeg_suspend(adev); + + return r; +} + +/** + * jpeg_v5_0_0_resume - resume JPEG block + * + * @handle: amdgpu_device pointer + * + * Resume firmware and hw init JPEG block + */ +static int jpeg_v5_0_0_resume(void *handle) +{ + struct amdgpu_device *adev = (struct amdgpu_device *)handle; + int r; + + r = amdgpu_jpeg_resume(adev); + if (r) + return r; + + r = jpeg_v5_0_0_hw_init(adev); + + return r; +} + +static void jpeg_v5_0_0_disable_clock_gating(struct amdgpu_device *adev) +{ + uint32_t data = 0; + + WREG32_SOC15(JPEG, 0, regJPEG_CGC_GATE, data); + + data = RREG32_SOC15(JPEG, 0, regJPEG_CGC_CTRL); + data &= ~(JPEG_CGC_CTRL__JPEG0_DEC_MODE_MASK + | JPEG_CGC_CTRL__JPEG_ENC_MODE_MASK); + WREG32_SOC15(JPEG, 0, regJPEG_CGC_CTRL, data); +} + +static void jpeg_v5_0_0_enable_clock_gating(struct amdgpu_device *adev) +{ + uint32_t data = 0; + + data = RREG32_SOC15(JPEG, 0, regJPEG_CGC_CTRL); + + data |= 1 << JPEG_CGC_CTRL__JPEG0_DEC_MODE__SHIFT; + WREG32_SOC15(JPEG, 0, regJPEG_CGC_CTRL, data); + + data = RREG32_SOC15(JPEG, 0, regJPEG_CGC_GATE); + data |= (JPEG_CGC_GATE__JPEG0_DEC_MASK + |JPEG_CGC_GATE__JPEG_ENC_MASK + |JPEG_CGC_GATE__JMCIF_MASK + |JPEG_CGC_GATE__JRBBM_MASK); + WREG32_SOC15(JPEG, 0, regJPEG_CGC_GATE, data); +} + +static int jpeg_v5_0_0_disable_static_power_gating(struct amdgpu_device *adev) +{ + uint32_t data = 0; + + data = 1 << UVD_IPX_DLDO_CONFIG__ONO1_PWR_CONFIG__SHIFT; + WREG32_SOC15(JPEG, 0, regUVD_IPX_DLDO_CONFIG, data); + SOC15_WAIT_ON_RREG(JPEG, 0, regUVD_IPX_DLDO_STATUS, 0, + UVD_IPX_DLDO_STATUS__ONO1_PWR_STATUS_MASK); + + /* disable anti hang mechanism */ + WREG32_P(SOC15_REG_OFFSET(JPEG, 0, regUVD_JPEG_POWER_STATUS), 0, + ~UVD_JPEG_POWER_STATUS__JPEG_POWER_STATUS_MASK); + + /* keep the JPEG in static PG mode */ + WREG32_P(SOC15_REG_OFFSET(JPEG, 0, regUVD_JPEG_POWER_STATUS), 0, + ~UVD_JPEG_POWER_STATUS__JPEG_PG_MODE_MASK); + + return 0; +} + +static int jpeg_v5_0_0_enable_static_power_gating(struct amdgpu_device *adev) +{ + /* enable anti hang mechanism */ + WREG32_P(SOC15_REG_OFFSET(JPEG, 0, regUVD_JPEG_POWER_STATUS), + UVD_JPEG_POWER_STATUS__JPEG_POWER_STATUS_MASK, + ~UVD_JPEG_POWER_STATUS__JPEG_POWER_STATUS_MASK); + + if (adev->pg_flags & AMD_PG_SUPPORT_JPEG) { + WREG32(SOC15_REG_OFFSET(JPEG, 0, regUVD_IPX_DLDO_CONFIG), + 2 << UVD_IPX_DLDO_CONFIG__ONO1_PWR_CONFIG__SHIFT); + SOC15_WAIT_ON_RREG(JPEG, 0, regUVD_IPX_DLDO_STATUS, + 1 << UVD_IPX_DLDO_STATUS__ONO1_PWR_STATUS__SHIFT, + UVD_IPX_DLDO_STATUS__ONO1_PWR_STATUS_MASK); + } + + return 0; +} + +/** + * jpeg_v5_0_0_start - start JPEG block + * + * @adev: amdgpu_device pointer + * + * Setup and start the JPEG block + */ +static int jpeg_v5_0_0_start(struct amdgpu_device *adev) +{ + struct amdgpu_ring *ring = adev->jpeg.inst->ring_dec; + int r; + + if (adev->pm.dpm_enabled) + amdgpu_dpm_enable_jpeg(adev, true); + + /* disable power gating */ + r = jpeg_v5_0_0_disable_static_power_gating(adev); + if (r) + return r; + + /* JPEG disable CGC */ + jpeg_v5_0_0_disable_clock_gating(adev); + + /* MJPEG global tiling registers */ + WREG32_SOC15(JPEG, 0, regJPEG_DEC_GFX10_ADDR_CONFIG, + adev->gfx.config.gb_addr_config); + + + /* enable JMI channel */ + WREG32_P(SOC15_REG_OFFSET(JPEG, 0, regUVD_JMI_CNTL), 0, + ~UVD_JMI_CNTL__SOFT_RESET_MASK); + + /* enable System Interrupt for JRBC */ + WREG32_P(SOC15_REG_OFFSET(JPEG, 0, regJPEG_SYS_INT_EN), + JPEG_SYS_INT_EN__DJRBC0_MASK, + ~JPEG_SYS_INT_EN__DJRBC0_MASK); + + WREG32_SOC15(JPEG, 0, regUVD_LMI_JRBC_RB_VMID, 0); + WREG32_SOC15(JPEG, 0, regUVD_JRBC_RB_CNTL, (0x00000001L | 0x00000002L)); + WREG32_SOC15(JPEG, 0, regUVD_LMI_JRBC_RB_64BIT_BAR_LOW, + lower_32_bits(ring->gpu_addr)); + WREG32_SOC15(JPEG, 0, regUVD_LMI_JRBC_RB_64BIT_BAR_HIGH, + upper_32_bits(ring->gpu_addr)); + WREG32_SOC15(JPEG, 0, regUVD_JRBC_RB_RPTR, 0); + WREG32_SOC15(JPEG, 0, regUVD_JRBC_RB_WPTR, 0); + WREG32_SOC15(JPEG, 0, regUVD_JRBC_RB_CNTL, 0x00000002L); + WREG32_SOC15(JPEG, 0, regUVD_JRBC_RB_SIZE, ring->ring_size / 4); + ring->wptr = RREG32_SOC15(JPEG, 0, regUVD_JRBC_RB_WPTR); + + return 0; +} + +/** + * jpeg_v5_0_0_stop - stop JPEG block + * + * @adev: amdgpu_device pointer + * + * stop the JPEG block + */ +static int jpeg_v5_0_0_stop(struct amdgpu_device *adev) +{ + int r; + + /* reset JMI */ + WREG32_P(SOC15_REG_OFFSET(JPEG, 0, regUVD_JMI_CNTL), + UVD_JMI_CNTL__SOFT_RESET_MASK, + ~UVD_JMI_CNTL__SOFT_RESET_MASK); + + jpeg_v5_0_0_enable_clock_gating(adev); + + /* enable power gating */ + r = jpeg_v5_0_0_enable_static_power_gating(adev); + if (r) + return r; + + if (adev->pm.dpm_enabled) + amdgpu_dpm_enable_jpeg(adev, false); + + return 0; +} + +/** + * jpeg_v5_0_0_dec_ring_get_rptr - get read pointer + * + * @ring: amdgpu_ring pointer + * + * Returns the current hardware read pointer + */ +static uint64_t jpeg_v5_0_0_dec_ring_get_rptr(struct amdgpu_ring *ring) +{ + struct amdgpu_device *adev = ring->adev; + + return RREG32_SOC15(JPEG, 0, regUVD_JRBC_RB_RPTR); +} + +/** + * jpeg_v5_0_0_dec_ring_get_wptr - get write pointer + * + * @ring: amdgpu_ring pointer + * + * Returns the current hardware write pointer + */ +static uint64_t jpeg_v5_0_0_dec_ring_get_wptr(struct amdgpu_ring *ring) +{ + struct amdgpu_device *adev = ring->adev; + + if (ring->use_doorbell) + return *ring->wptr_cpu_addr; + else + return RREG32_SOC15(JPEG, 0, regUVD_JRBC_RB_WPTR); +} + +/** + * jpeg_v5_0_0_dec_ring_set_wptr - set write pointer + * + * @ring: amdgpu_ring pointer + * + * Commits the write pointer to the hardware + */ +static void jpeg_v5_0_0_dec_ring_set_wptr(struct amdgpu_ring *ring) +{ + struct amdgpu_device *adev = ring->adev; + + if (ring->use_doorbell) { + *ring->wptr_cpu_addr = lower_32_bits(ring->wptr); + WDOORBELL32(ring->doorbell_index, lower_32_bits(ring->wptr)); + } else { + WREG32_SOC15(JPEG, 0, regUVD_JRBC_RB_WPTR, lower_32_bits(ring->wptr)); + } +} + +static bool jpeg_v5_0_0_is_idle(void *handle) +{ + struct amdgpu_device *adev = (struct amdgpu_device *)handle; + int ret = 1; + + ret &= (((RREG32_SOC15(JPEG, 0, regUVD_JRBC_STATUS) & + UVD_JRBC_STATUS__RB_JOB_DONE_MASK) == + UVD_JRBC_STATUS__RB_JOB_DONE_MASK)); + + return ret; +} + +static int jpeg_v5_0_0_wait_for_idle(void *handle) +{ + struct amdgpu_device *adev = (struct amdgpu_device *)handle; + + return SOC15_WAIT_ON_RREG(JPEG, 0, regUVD_JRBC_STATUS, + UVD_JRBC_STATUS__RB_JOB_DONE_MASK, + UVD_JRBC_STATUS__RB_JOB_DONE_MASK); +} + +static int jpeg_v5_0_0_set_clockgating_state(void *handle, + enum amd_clockgating_state state) +{ + struct amdgpu_device *adev = (struct amdgpu_device *)handle; + bool enable = (state == AMD_CG_STATE_GATE) ? true : false; + + if (enable) { + if (!jpeg_v5_0_0_is_idle(handle)) + return -EBUSY; + jpeg_v5_0_0_enable_clock_gating(adev); + } else { + jpeg_v5_0_0_disable_clock_gating(adev); + } + + return 0; +} + +static int jpeg_v5_0_0_set_powergating_state(void *handle, + enum amd_powergating_state state) +{ + struct amdgpu_device *adev = (struct amdgpu_device *)handle; + int ret; + + if (state == adev->jpeg.cur_state) + return 0; + + if (state == AMD_PG_STATE_GATE) + ret = jpeg_v5_0_0_stop(adev); + else + ret = jpeg_v5_0_0_start(adev); + + if (!ret) + adev->jpeg.cur_state = state; + + return ret; +} + +static int jpeg_v5_0_0_set_interrupt_state(struct amdgpu_device *adev, + struct amdgpu_irq_src *source, + unsigned int type, + enum amdgpu_interrupt_state state) +{ + return 0; +} + +static int jpeg_v5_0_0_process_interrupt(struct amdgpu_device *adev, + struct amdgpu_irq_src *source, + struct amdgpu_iv_entry *entry) +{ + DRM_DEBUG("IH: JPEG TRAP\n"); + + switch (entry->src_id) { + case VCN_4_0__SRCID__JPEG_DECODE: + amdgpu_fence_process(adev->jpeg.inst->ring_dec); + break; + default: + DRM_DEV_ERROR(adev->dev, "Unhandled interrupt: %d %d\n", + entry->src_id, entry->src_data[0]); + break; + } + + return 0; +} + +static const struct amd_ip_funcs jpeg_v5_0_0_ip_funcs = { + .name = "jpeg_v5_0_0", + .early_init = jpeg_v5_0_0_early_init, + .late_init = NULL, + .sw_init = jpeg_v5_0_0_sw_init, + .sw_fini = jpeg_v5_0_0_sw_fini, + .hw_init = jpeg_v5_0_0_hw_init, + .hw_fini = jpeg_v5_0_0_hw_fini, + .suspend = jpeg_v5_0_0_suspend, + .resume = jpeg_v5_0_0_resume, + .is_idle = jpeg_v5_0_0_is_idle, + .wait_for_idle = jpeg_v5_0_0_wait_for_idle, + .check_soft_reset = NULL, + .pre_soft_reset = NULL, + .soft_reset = NULL, + .post_soft_reset = NULL, + .set_clockgating_state = jpeg_v5_0_0_set_clockgating_state, + .set_powergating_state = jpeg_v5_0_0_set_powergating_state, +}; + +static const struct amdgpu_ring_funcs jpeg_v5_0_0_dec_ring_vm_funcs = { + .type = AMDGPU_RING_TYPE_VCN_JPEG, + .align_mask = 0xf, + .get_rptr = jpeg_v5_0_0_dec_ring_get_rptr, + .get_wptr = jpeg_v5_0_0_dec_ring_get_wptr, + .set_wptr = jpeg_v5_0_0_dec_ring_set_wptr, + .emit_frame_size = + SOC15_FLUSH_GPU_TLB_NUM_WREG * 6 + + SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 8 + + 8 + /* jpeg_v5_0_0_dec_ring_emit_vm_flush */ + 22 + 22 + /* jpeg_v5_0_0_dec_ring_emit_fence x2 vm fence */ + 8 + 16, + .emit_ib_size = 22, /* jpeg_v5_0_0_dec_ring_emit_ib */ + .emit_ib = jpeg_v4_0_3_dec_ring_emit_ib, + .emit_fence = jpeg_v4_0_3_dec_ring_emit_fence, + .emit_vm_flush = jpeg_v4_0_3_dec_ring_emit_vm_flush, + .test_ring = amdgpu_jpeg_dec_ring_test_ring, + .test_ib = amdgpu_jpeg_dec_ring_test_ib, + .insert_nop = jpeg_v4_0_3_dec_ring_nop, + .insert_start = jpeg_v4_0_3_dec_ring_insert_start, + .insert_end = jpeg_v4_0_3_dec_ring_insert_end, + .pad_ib = amdgpu_ring_generic_pad_ib, + .begin_use = amdgpu_jpeg_ring_begin_use, + .end_use = amdgpu_jpeg_ring_end_use, + .emit_wreg = jpeg_v4_0_3_dec_ring_emit_wreg, + .emit_reg_wait = jpeg_v4_0_3_dec_ring_emit_reg_wait, + .emit_reg_write_reg_wait = amdgpu_ring_emit_reg_write_reg_wait_helper, +}; + +static void jpeg_v5_0_0_set_dec_ring_funcs(struct amdgpu_device *adev) +{ + adev->jpeg.inst->ring_dec->funcs = &jpeg_v5_0_0_dec_ring_vm_funcs; + DRM_DEV_INFO(adev->dev, "JPEG decode is enabled in VM mode\n"); +} + +static const struct amdgpu_irq_src_funcs jpeg_v5_0_0_irq_funcs = { + .set = jpeg_v5_0_0_set_interrupt_state, + .process = jpeg_v5_0_0_process_interrupt, +}; + +static void jpeg_v5_0_0_set_irq_funcs(struct amdgpu_device *adev) +{ + adev->jpeg.inst->irq.num_types = 1; + adev->jpeg.inst->irq.funcs = &jpeg_v5_0_0_irq_funcs; +} + +const struct amdgpu_ip_block_version jpeg_v5_0_0_ip_block = { + .type = AMD_IP_BLOCK_TYPE_JPEG, + .major = 5, + .minor = 0, + .rev = 0, + .funcs = &jpeg_v5_0_0_ip_funcs, +}; diff --git a/drivers/gpu/drm/amd/amdgpu/jpeg_v5_0_0.h b/drivers/gpu/drm/amd/amdgpu/jpeg_v5_0_0.h new file mode 100644 index 000000000000..bd348336b215 --- /dev/null +++ b/drivers/gpu/drm/amd/amdgpu/jpeg_v5_0_0.h @@ -0,0 +1,29 @@ +/* + * Copyright 2023 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + */ + +#ifndef __JPEG_V5_0_0_H__ +#define __JPEG_V5_0_0_H__ + +extern const struct amdgpu_ip_block_version jpeg_v5_0_0_ip_block; + +#endif /* __JPEG_V5_0_0_H__ */ diff --git a/drivers/gpu/drm/amd/amdgpu/lsdma_v7_0.c b/drivers/gpu/drm/amd/amdgpu/lsdma_v7_0.c new file mode 100644 index 000000000000..396262044ea8 --- /dev/null +++ b/drivers/gpu/drm/amd/amdgpu/lsdma_v7_0.c @@ -0,0 +1,121 @@ +/* + * Copyright 2023 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + */ + +#include <linux/delay.h> +#include "amdgpu.h" +#include "lsdma_v7_0.h" +#include "amdgpu_lsdma.h" + +#include "lsdma/lsdma_7_0_0_offset.h" +#include "lsdma/lsdma_7_0_0_sh_mask.h" + +static int lsdma_v7_0_wait_pio_status(struct amdgpu_device *adev) +{ + return amdgpu_lsdma_wait_for(adev, SOC15_REG_OFFSET(LSDMA, 0, regLSDMA_PIO_STATUS), + LSDMA_PIO_STATUS__PIO_IDLE_MASK | LSDMA_PIO_STATUS__PIO_FIFO_EMPTY_MASK, + LSDMA_PIO_STATUS__PIO_IDLE_MASK | LSDMA_PIO_STATUS__PIO_FIFO_EMPTY_MASK); +} + +static int lsdma_v7_0_copy_mem(struct amdgpu_device *adev, + uint64_t src_addr, + uint64_t dst_addr, + uint64_t size) +{ + int ret; + uint32_t tmp; + + WREG32_SOC15(LSDMA, 0, regLSDMA_PIO_SRC_ADDR_LO, lower_32_bits(src_addr)); + WREG32_SOC15(LSDMA, 0, regLSDMA_PIO_SRC_ADDR_HI, upper_32_bits(src_addr)); + + WREG32_SOC15(LSDMA, 0, regLSDMA_PIO_DST_ADDR_LO, lower_32_bits(dst_addr)); + WREG32_SOC15(LSDMA, 0, regLSDMA_PIO_DST_ADDR_HI, upper_32_bits(dst_addr)); + + WREG32_SOC15(LSDMA, 0, regLSDMA_PIO_CONTROL, 0x0); + + tmp = RREG32_SOC15(LSDMA, 0, regLSDMA_PIO_COMMAND); + tmp = REG_SET_FIELD(tmp, LSDMA_PIO_COMMAND, BYTE_COUNT, size); + tmp = REG_SET_FIELD(tmp, LSDMA_PIO_COMMAND, SRC_LOCATION, 0); + tmp = REG_SET_FIELD(tmp, LSDMA_PIO_COMMAND, DST_LOCATION, 0); + tmp = REG_SET_FIELD(tmp, LSDMA_PIO_COMMAND, SRC_ADDR_INC, 0); + tmp = REG_SET_FIELD(tmp, LSDMA_PIO_COMMAND, DST_ADDR_INC, 0); + tmp = REG_SET_FIELD(tmp, LSDMA_PIO_COMMAND, OVERLAP_DISABLE, 0); + tmp = REG_SET_FIELD(tmp, LSDMA_PIO_COMMAND, CONSTANT_FILL, 0); + WREG32_SOC15(LSDMA, 0, regLSDMA_PIO_COMMAND, tmp); + + ret = lsdma_v7_0_wait_pio_status(adev); + if (ret) + dev_err(adev->dev, "LSDMA PIO failed to copy memory!\n"); + + return ret; +} + +static int lsdma_v7_0_fill_mem(struct amdgpu_device *adev, + uint64_t dst_addr, + uint32_t data, + uint64_t size) +{ + int ret; + uint32_t tmp; + + WREG32_SOC15(LSDMA, 0, regLSDMA_PIO_CONSTFILL_DATA, data); + + WREG32_SOC15(LSDMA, 0, regLSDMA_PIO_DST_ADDR_LO, lower_32_bits(dst_addr)); + WREG32_SOC15(LSDMA, 0, regLSDMA_PIO_DST_ADDR_HI, upper_32_bits(dst_addr)); + + WREG32_SOC15(LSDMA, 0, regLSDMA_PIO_CONTROL, 0x0); + + tmp = RREG32_SOC15(LSDMA, 0, regLSDMA_PIO_COMMAND); + tmp = REG_SET_FIELD(tmp, LSDMA_PIO_COMMAND, BYTE_COUNT, size); + tmp = REG_SET_FIELD(tmp, LSDMA_PIO_COMMAND, SRC_LOCATION, 0); + tmp = REG_SET_FIELD(tmp, LSDMA_PIO_COMMAND, DST_LOCATION, 0); + tmp = REG_SET_FIELD(tmp, LSDMA_PIO_COMMAND, SRC_ADDR_INC, 0); + tmp = REG_SET_FIELD(tmp, LSDMA_PIO_COMMAND, DST_ADDR_INC, 0); + tmp = REG_SET_FIELD(tmp, LSDMA_PIO_COMMAND, OVERLAP_DISABLE, 0); + tmp = REG_SET_FIELD(tmp, LSDMA_PIO_COMMAND, CONSTANT_FILL, 1); + WREG32_SOC15(LSDMA, 0, regLSDMA_PIO_COMMAND, tmp); + + ret = lsdma_v7_0_wait_pio_status(adev); + if (ret) + dev_err(adev->dev, "LSDMA PIO failed to fill memory!\n"); + + return ret; +} + +static void lsdma_v7_0_update_memory_power_gating(struct amdgpu_device *adev, + bool enable) +{ + uint32_t tmp; + + tmp = RREG32_SOC15(LSDMA, 0, regLSDMA_MEM_POWER_CTRL); + tmp = REG_SET_FIELD(tmp, LSDMA_MEM_POWER_CTRL, MEM_POWER_CTRL_EN, 0); + WREG32_SOC15(LSDMA, 0, regLSDMA_MEM_POWER_CTRL, tmp); + + tmp = REG_SET_FIELD(tmp, LSDMA_MEM_POWER_CTRL, MEM_POWER_CTRL_EN, enable); + WREG32_SOC15(LSDMA, 0, regLSDMA_MEM_POWER_CTRL, tmp); +} + +const struct amdgpu_lsdma_funcs lsdma_v7_0_funcs = { + .copy_mem = lsdma_v7_0_copy_mem, + .fill_mem = lsdma_v7_0_fill_mem, + .update_memory_power_gating = lsdma_v7_0_update_memory_power_gating +}; diff --git a/drivers/gpu/drm/amd/amdgpu/lsdma_v7_0.h b/drivers/gpu/drm/amd/amdgpu/lsdma_v7_0.h new file mode 100644 index 000000000000..52b4485cdd98 --- /dev/null +++ b/drivers/gpu/drm/amd/amdgpu/lsdma_v7_0.h @@ -0,0 +1,31 @@ +/* + * Copyright 2023 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + */ + +#ifndef __LSDMA_V7_0_H__ +#define __LSDMA_V7_0_H__ + +#include "soc15_common.h" + +extern const struct amdgpu_lsdma_funcs lsdma_v7_0_funcs; + +#endif /* __LSDMA_V7_0_H__ */ diff --git a/drivers/gpu/drm/amd/amdgpu/mes_v11_0.c b/drivers/gpu/drm/amd/amdgpu/mes_v11_0.c index 31b26e6f0b30..81833395324a 100644 --- a/drivers/gpu/drm/amd/amdgpu/mes_v11_0.c +++ b/drivers/gpu/drm/amd/amdgpu/mes_v11_0.c @@ -49,6 +49,8 @@ MODULE_FIRMWARE("amdgpu/gc_11_0_4_mes_2.bin"); MODULE_FIRMWARE("amdgpu/gc_11_0_4_mes1.bin"); MODULE_FIRMWARE("amdgpu/gc_11_5_0_mes_2.bin"); MODULE_FIRMWARE("amdgpu/gc_11_5_0_mes1.bin"); +MODULE_FIRMWARE("amdgpu/gc_11_5_1_mes_2.bin"); +MODULE_FIRMWARE("amdgpu/gc_11_5_1_mes1.bin"); static int mes_v11_0_hw_fini(void *handle); @@ -56,6 +58,7 @@ static int mes_v11_0_kiq_hw_init(struct amdgpu_device *adev); static int mes_v11_0_kiq_hw_fini(struct amdgpu_device *adev); #define MES_EOP_SIZE 2048 +#define GFX_MES_DRAM_SIZE 0x80000 static void mes_v11_0_ring_set_wptr(struct amdgpu_ring *ring) { @@ -108,7 +111,7 @@ static int mes_v11_0_submit_pkt_and_poll_completion(struct amdgpu_mes *mes, struct amdgpu_device *adev = mes->adev; struct amdgpu_ring *ring = &mes->ring; unsigned long flags; - signed long timeout = adev->usec_timeout; + signed long timeout = 3000000; /* 3000 ms */ if (amdgpu_emu_mode) { timeout *= 100; @@ -408,64 +411,45 @@ static int mes_v11_0_set_hw_resources(struct amdgpu_mes *mes) mes_set_hw_res_pkt.enable_reg_active_poll = 1; mes_set_hw_res_pkt.enable_level_process_quantum_check = 1; mes_set_hw_res_pkt.oversubscription_timer = 50; + if (amdgpu_mes_log_enable) { + mes_set_hw_res_pkt.enable_mes_event_int_logging = 1; + mes_set_hw_res_pkt.event_intr_history_gpu_mc_ptr = + mes->event_log_gpu_addr; + } return mes_v11_0_submit_pkt_and_poll_completion(mes, &mes_set_hw_res_pkt, sizeof(mes_set_hw_res_pkt), offsetof(union MESAPI_SET_HW_RESOURCES, api_status)); } -static void mes_v11_0_init_aggregated_doorbell(struct amdgpu_mes *mes) +static int mes_v11_0_set_hw_resources_1(struct amdgpu_mes *mes) { + int size = 128 * PAGE_SIZE; + int ret = 0; struct amdgpu_device *adev = mes->adev; - uint32_t data; + union MESAPI_SET_HW_RESOURCES_1 mes_set_hw_res_pkt; + memset(&mes_set_hw_res_pkt, 0, sizeof(mes_set_hw_res_pkt)); - data = RREG32_SOC15(GC, 0, regCP_MES_DOORBELL_CONTROL1); - data &= ~(CP_MES_DOORBELL_CONTROL1__DOORBELL_OFFSET_MASK | - CP_MES_DOORBELL_CONTROL1__DOORBELL_EN_MASK | - CP_MES_DOORBELL_CONTROL1__DOORBELL_HIT_MASK); - data |= mes->aggregated_doorbells[AMDGPU_MES_PRIORITY_LEVEL_LOW] << - CP_MES_DOORBELL_CONTROL1__DOORBELL_OFFSET__SHIFT; - data |= 1 << CP_MES_DOORBELL_CONTROL1__DOORBELL_EN__SHIFT; - WREG32_SOC15(GC, 0, regCP_MES_DOORBELL_CONTROL1, data); - - data = RREG32_SOC15(GC, 0, regCP_MES_DOORBELL_CONTROL2); - data &= ~(CP_MES_DOORBELL_CONTROL2__DOORBELL_OFFSET_MASK | - CP_MES_DOORBELL_CONTROL2__DOORBELL_EN_MASK | - CP_MES_DOORBELL_CONTROL2__DOORBELL_HIT_MASK); - data |= mes->aggregated_doorbells[AMDGPU_MES_PRIORITY_LEVEL_NORMAL] << - CP_MES_DOORBELL_CONTROL2__DOORBELL_OFFSET__SHIFT; - data |= 1 << CP_MES_DOORBELL_CONTROL2__DOORBELL_EN__SHIFT; - WREG32_SOC15(GC, 0, regCP_MES_DOORBELL_CONTROL2, data); - - data = RREG32_SOC15(GC, 0, regCP_MES_DOORBELL_CONTROL3); - data &= ~(CP_MES_DOORBELL_CONTROL3__DOORBELL_OFFSET_MASK | - CP_MES_DOORBELL_CONTROL3__DOORBELL_EN_MASK | - CP_MES_DOORBELL_CONTROL3__DOORBELL_HIT_MASK); - data |= mes->aggregated_doorbells[AMDGPU_MES_PRIORITY_LEVEL_MEDIUM] << - CP_MES_DOORBELL_CONTROL3__DOORBELL_OFFSET__SHIFT; - data |= 1 << CP_MES_DOORBELL_CONTROL3__DOORBELL_EN__SHIFT; - WREG32_SOC15(GC, 0, regCP_MES_DOORBELL_CONTROL3, data); - - data = RREG32_SOC15(GC, 0, regCP_MES_DOORBELL_CONTROL4); - data &= ~(CP_MES_DOORBELL_CONTROL4__DOORBELL_OFFSET_MASK | - CP_MES_DOORBELL_CONTROL4__DOORBELL_EN_MASK | - CP_MES_DOORBELL_CONTROL4__DOORBELL_HIT_MASK); - data |= mes->aggregated_doorbells[AMDGPU_MES_PRIORITY_LEVEL_HIGH] << - CP_MES_DOORBELL_CONTROL4__DOORBELL_OFFSET__SHIFT; - data |= 1 << CP_MES_DOORBELL_CONTROL4__DOORBELL_EN__SHIFT; - WREG32_SOC15(GC, 0, regCP_MES_DOORBELL_CONTROL4, data); - - data = RREG32_SOC15(GC, 0, regCP_MES_DOORBELL_CONTROL5); - data &= ~(CP_MES_DOORBELL_CONTROL5__DOORBELL_OFFSET_MASK | - CP_MES_DOORBELL_CONTROL5__DOORBELL_EN_MASK | - CP_MES_DOORBELL_CONTROL5__DOORBELL_HIT_MASK); - data |= mes->aggregated_doorbells[AMDGPU_MES_PRIORITY_LEVEL_REALTIME] << - CP_MES_DOORBELL_CONTROL5__DOORBELL_OFFSET__SHIFT; - data |= 1 << CP_MES_DOORBELL_CONTROL5__DOORBELL_EN__SHIFT; - WREG32_SOC15(GC, 0, regCP_MES_DOORBELL_CONTROL5, data); - - data = 1 << CP_HQD_GFX_CONTROL__DB_UPDATED_MSG_EN__SHIFT; - WREG32_SOC15(GC, 0, regCP_HQD_GFX_CONTROL, data); + mes_set_hw_res_pkt.header.type = MES_API_TYPE_SCHEDULER; + mes_set_hw_res_pkt.header.opcode = MES_SCH_API_SET_HW_RSRC_1; + mes_set_hw_res_pkt.header.dwsize = API_FRAME_SIZE_IN_DWORDS; + mes_set_hw_res_pkt.enable_mes_info_ctx = 1; + + ret = amdgpu_bo_create_kernel(adev, size, PAGE_SIZE, + AMDGPU_GEM_DOMAIN_VRAM, + &mes->resource_1, + &mes->resource_1_gpu_addr, + &mes->resource_1_addr); + if (ret) { + dev_err(adev->dev, "(%d) failed to create mes resource_1 bo\n", ret); + return ret; + } + + mes_set_hw_res_pkt.mes_info_ctx_mc_addr = mes->resource_1_gpu_addr; + mes_set_hw_res_pkt.mes_info_ctx_size = mes->resource_1->tbo.base.size; + return mes_v11_0_submit_pkt_and_poll_completion(mes, + &mes_set_hw_res_pkt, sizeof(mes_set_hw_res_pkt), + offsetof(union MESAPI_SET_HW_RESOURCES_1, api_status)); } static const struct amdgpu_mes_funcs mes_v11_0_funcs = { @@ -527,7 +511,13 @@ static int mes_v11_0_allocate_ucode_data_buffer(struct amdgpu_device *adev, le32_to_cpu(mes_hdr->mes_ucode_data_offset_bytes)); fw_size = le32_to_cpu(mes_hdr->mes_ucode_data_size_bytes); - r = amdgpu_bo_create_reserved(adev, fw_size, + if (fw_size > GFX_MES_DRAM_SIZE) { + dev_err(adev->dev, "PIPE%d ucode data fw size (%d) is greater than dram size (%d)\n", + pipe, fw_size, GFX_MES_DRAM_SIZE); + return -EINVAL; + } + + r = amdgpu_bo_create_reserved(adev, GFX_MES_DRAM_SIZE, 64 * 1024, AMDGPU_GEM_DOMAIN_VRAM | AMDGPU_GEM_DOMAIN_GTT, @@ -663,8 +653,8 @@ static int mes_v11_0_load_microcode(struct amdgpu_device *adev, WREG32_SOC15(GC, 0, regCP_MES_MDBASE_HI, upper_32_bits(adev->mes.data_fw_gpu_addr[pipe])); - /* Set 0x3FFFF (256K-1) to CP_MES_MDBOUND_LO */ - WREG32_SOC15(GC, 0, regCP_MES_MDBOUND_LO, 0x3FFFF); + /* Set 0x7FFFF (512K-1) to CP_MES_MDBOUND_LO */ + WREG32_SOC15(GC, 0, regCP_MES_MDBOUND_LO, 0x7FFFF); if (prime_icache) { /* invalidate ICACHE */ @@ -1243,7 +1233,13 @@ static int mes_v11_0_hw_init(void *handle) if (r) goto failure; - mes_v11_0_init_aggregated_doorbell(&adev->mes); + if (amdgpu_sriov_is_mes_info_enable(adev)) { + r = mes_v11_0_set_hw_resources_1(&adev->mes); + if (r) { + DRM_ERROR("failed mes_v11_0_set_hw_resources_1, r=%d\n", r); + goto failure; + } + } r = mes_v11_0_query_sched_status(&adev->mes); if (r) { @@ -1268,6 +1264,11 @@ failure: static int mes_v11_0_hw_fini(void *handle) { + struct amdgpu_device *adev = (struct amdgpu_device *)handle; + if (amdgpu_sriov_is_mes_info_enable(adev)) { + amdgpu_bo_free_kernel(&adev->mes.resource_1, &adev->mes.resource_1_gpu_addr, + &adev->mes.resource_1_addr); + } return 0; } diff --git a/drivers/gpu/drm/amd/amdgpu/mmhub_v1_0.c b/drivers/gpu/drm/amd/amdgpu/mmhub_v1_0.c index 843219a91736..e3ddd22aa172 100644 --- a/drivers/gpu/drm/amd/amdgpu/mmhub_v1_0.c +++ b/drivers/gpu/drm/amd/amdgpu/mmhub_v1_0.c @@ -96,7 +96,9 @@ static void mmhub_v1_0_init_system_aperture_regs(struct amdgpu_device *adev) WREG32_SOC15(MMHUB, 0, mmMC_VM_SYSTEM_APERTURE_LOW_ADDR, min(adev->gmc.fb_start, adev->gmc.agp_start) >> 18); - if (adev->apu_flags & AMD_APU_IS_RAVEN2) + if (adev->apu_flags & (AMD_APU_IS_RAVEN2 | + AMD_APU_IS_RENOIR | + AMD_APU_IS_GREEN_SARDINE)) /* * Raven2 has a HW issue that it is unable to use the vram which * is out of MC_VM_SYSTEM_APERTURE_HIGH_ADDR. So here is the diff --git a/drivers/gpu/drm/amd/amdgpu/mmhub_v1_8.c b/drivers/gpu/drm/amd/amdgpu/mmhub_v1_8.c index aa00483e7b37..7a1ff298417a 100644 --- a/drivers/gpu/drm/amd/amdgpu/mmhub_v1_8.c +++ b/drivers/gpu/drm/amd/amdgpu/mmhub_v1_8.c @@ -33,6 +33,7 @@ #define regVM_L2_CNTL3_DEFAULT 0x80100007 #define regVM_L2_CNTL4_DEFAULT 0x000000c1 +#define mmSMNAID_AID0_MCA_SMU 0x03b30400 static u64 mmhub_v1_8_get_fb_location(struct amdgpu_device *adev) { @@ -130,6 +131,9 @@ static void mmhub_v1_8_init_system_aperture_regs(struct amdgpu_device *adev) uint64_t value; int i; + if (amdgpu_sriov_vf(adev)) + return; + inst_mask = adev->aid_mask; for_each_inst(i, inst_mask) { /* Program the AGP BAR */ @@ -139,9 +143,6 @@ static void mmhub_v1_8_init_system_aperture_regs(struct amdgpu_device *adev) WREG32_SOC15(MMHUB, i, regMC_VM_AGP_TOP, adev->gmc.agp_end >> 24); - if (amdgpu_sriov_vf(adev)) - return; - /* Program the system aperture low logical page number. */ WREG32_SOC15(MMHUB, i, regMC_VM_SYSTEM_APERTURE_LOW_ADDR, min(adev->gmc.fb_start, adev->gmc.agp_start) >> 18); @@ -558,6 +559,20 @@ static void mmhub_v1_8_get_clockgating(struct amdgpu_device *adev, u64 *flags) } +static bool mmhub_v1_8_query_utcl2_poison_status(struct amdgpu_device *adev, + int hub_inst) +{ + u32 fed, status; + + status = RREG32_SOC15(MMHUB, hub_inst, regVM_L2_PROTECTION_FAULT_STATUS); + fed = REG_GET_FIELD(status, VM_L2_PROTECTION_FAULT_STATUS, FED); + /* reset page fault status */ + WREG32_P(SOC15_REG_OFFSET(MMHUB, hub_inst, + regVM_L2_PROTECTION_FAULT_STATUS), 1, ~1); + + return fed; +} + const struct amdgpu_mmhub_funcs mmhub_v1_8_funcs = { .get_fb_location = mmhub_v1_8_get_fb_location, .init = mmhub_v1_8_init, @@ -567,6 +582,7 @@ const struct amdgpu_mmhub_funcs mmhub_v1_8_funcs = { .setup_vm_pt_regs = mmhub_v1_8_setup_vm_pt_regs, .set_clockgating = mmhub_v1_8_set_clockgating, .get_clockgating = mmhub_v1_8_get_clockgating, + .query_utcl2_poison_status = mmhub_v1_8_query_utcl2_poison_status, }; static const struct amdgpu_ras_err_status_reg_entry mmhub_v1_8_ce_reg_list[] = { @@ -652,8 +668,8 @@ static void mmhub_v1_8_inst_query_ras_error_count(struct amdgpu_device *adev, AMDGPU_RAS_ERROR__MULTI_UNCORRECTABLE, &ue_count); - amdgpu_ras_error_statistic_ce_count(err_data, &mcm_info, ce_count); - amdgpu_ras_error_statistic_ue_count(err_data, &mcm_info, ue_count); + amdgpu_ras_error_statistic_ce_count(err_data, &mcm_info, NULL, ce_count); + amdgpu_ras_error_statistic_ue_count(err_data, &mcm_info, NULL, ue_count); } static void mmhub_v1_8_query_ras_error_count(struct amdgpu_device *adev, @@ -700,156 +716,103 @@ static void mmhub_v1_8_reset_ras_error_count(struct amdgpu_device *adev) mmhub_v1_8_inst_reset_ras_error_count(adev, i); } -static const u32 mmhub_v1_8_mmea_err_status_reg[] __maybe_unused = { - regMMEA0_ERR_STATUS, - regMMEA1_ERR_STATUS, - regMMEA2_ERR_STATUS, - regMMEA3_ERR_STATUS, - regMMEA4_ERR_STATUS, +static const struct amdgpu_ras_block_hw_ops mmhub_v1_8_ras_hw_ops = { + .query_ras_error_count = mmhub_v1_8_query_ras_error_count, + .reset_ras_error_count = mmhub_v1_8_reset_ras_error_count, }; -static void mmhub_v1_8_inst_query_ras_err_status(struct amdgpu_device *adev, - uint32_t mmhub_inst) +static int mmhub_v1_8_aca_bank_parser(struct aca_handle *handle, struct aca_bank *bank, + enum aca_smu_type type, void *data) { - uint32_t reg_value; - uint32_t mmea_err_status_addr_dist; - uint32_t i; - - /* query mmea ras err status */ - mmea_err_status_addr_dist = regMMEA1_ERR_STATUS - regMMEA0_ERR_STATUS; - for (i = 0; i < ARRAY_SIZE(mmhub_v1_8_mmea_err_status_reg); i++) { - reg_value = RREG32_SOC15_OFFSET(MMHUB, mmhub_inst, - regMMEA0_ERR_STATUS, - i * mmea_err_status_addr_dist); - if (REG_GET_FIELD(reg_value, MMEA0_ERR_STATUS, SDP_RDRSP_STATUS) || - REG_GET_FIELD(reg_value, MMEA0_ERR_STATUS, SDP_WRRSP_STATUS) || - REG_GET_FIELD(reg_value, MMEA0_ERR_STATUS, SDP_RDRSP_DATAPARITY_ERROR)) { - dev_warn(adev->dev, - "Detected MMEA%d err in MMHUB%d, status: 0x%x\n", - i, mmhub_inst, reg_value); - } + struct aca_bank_info info; + u64 misc0; + int ret; + + ret = aca_bank_info_decode(bank, &info); + if (ret) + return ret; + + misc0 = bank->regs[ACA_REG_IDX_MISC0]; + switch (type) { + case ACA_SMU_TYPE_UE: + ret = aca_error_cache_log_bank_error(handle, &info, ACA_ERROR_TYPE_UE, + 1ULL); + break; + case ACA_SMU_TYPE_CE: + ret = aca_error_cache_log_bank_error(handle, &info, ACA_ERROR_TYPE_CE, + ACA_REG__MISC0__ERRCNT(misc0)); + break; + default: + return -EINVAL; } - /* query mm_cane ras err status */ - reg_value = RREG32_SOC15(MMHUB, mmhub_inst, regMM_CANE_ERR_STATUS); - if (REG_GET_FIELD(reg_value, MM_CANE_ERR_STATUS, SDPM_RDRSP_STATUS) || - REG_GET_FIELD(reg_value, MM_CANE_ERR_STATUS, SDPM_WRRSP_STATUS) || - REG_GET_FIELD(reg_value, MM_CANE_ERR_STATUS, SDPM_RDRSP_DATAPARITY_ERROR)) { - dev_warn(adev->dev, - "Detected MM CANE err in MMHUB%d, status: 0x%x\n", - mmhub_inst, reg_value); - } + return ret; } -static void mmhub_v1_8_query_ras_error_status(struct amdgpu_device *adev) +/* reference to smu driver if header file */ +static int mmhub_v1_8_err_codes[] = { + 0, 1, 2, 3, 4, /* CODE_DAGB0 - 4 */ + 5, 6, 7, 8, 9, /* CODE_EA0 - 4 */ + 10, /* CODE_UTCL2_ROUTER */ + 11, /* CODE_VML2 */ + 12, /* CODE_VML2_WALKER */ + 13, /* CODE_MMCANE */ +}; + +static bool mmhub_v1_8_aca_bank_is_valid(struct aca_handle *handle, struct aca_bank *bank, + enum aca_smu_type type, void *data) { - uint32_t inst_mask; - uint32_t i; + u32 instlo; - if (!amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__MMHUB)) { - dev_warn(adev->dev, "MMHUB RAS is not supported\n"); - return; - } + instlo = ACA_REG__IPID__INSTANCEIDLO(bank->regs[ACA_REG_IDX_IPID]); + instlo &= GENMASK(31, 1); - inst_mask = adev->aid_mask; - for_each_inst(i, inst_mask) - mmhub_v1_8_inst_query_ras_err_status(adev, i); + if (instlo != mmSMNAID_AID0_MCA_SMU) + return false; + + if (aca_bank_check_error_codes(handle->adev, bank, + mmhub_v1_8_err_codes, + ARRAY_SIZE(mmhub_v1_8_err_codes))) + return false; + + return true; } -static void mmhub_v1_8_inst_reset_ras_err_status(struct amdgpu_device *adev, - uint32_t mmhub_inst) +static const struct aca_bank_ops mmhub_v1_8_aca_bank_ops = { + .aca_bank_parser = mmhub_v1_8_aca_bank_parser, + .aca_bank_is_valid = mmhub_v1_8_aca_bank_is_valid, +}; + +static const struct aca_info mmhub_v1_8_aca_info = { + .hwip = ACA_HWIP_TYPE_SMU, + .mask = ACA_ERROR_UE_MASK, + .bank_ops = &mmhub_v1_8_aca_bank_ops, +}; + +static int mmhub_v1_8_ras_late_init(struct amdgpu_device *adev, struct ras_common_if *ras_block) { - uint32_t mmea_cgtt_clk_cntl_addr_dist; - uint32_t mmea_err_status_addr_dist; - uint32_t reg_value; - uint32_t i; + int r; - /* reset mmea ras err status */ - mmea_cgtt_clk_cntl_addr_dist = regMMEA1_CGTT_CLK_CTRL - regMMEA0_CGTT_CLK_CTRL; - mmea_err_status_addr_dist = regMMEA1_ERR_STATUS - regMMEA0_ERR_STATUS; - for (i = 0; i < ARRAY_SIZE(mmhub_v1_8_mmea_err_status_reg); i++) { - /* force clk branch on for response path - * set MMEA0_CGTT_CLK_CTRL.SOFT_OVERRIDE_RETURN = 1 - */ - reg_value = RREG32_SOC15_OFFSET(MMHUB, mmhub_inst, - regMMEA0_CGTT_CLK_CTRL, - i * mmea_cgtt_clk_cntl_addr_dist); - reg_value = REG_SET_FIELD(reg_value, MMEA0_CGTT_CLK_CTRL, - SOFT_OVERRIDE_RETURN, 1); - WREG32_SOC15_OFFSET(MMHUB, mmhub_inst, - regMMEA0_CGTT_CLK_CTRL, - i * mmea_cgtt_clk_cntl_addr_dist, - reg_value); - - /* set MMEA0_ERR_STATUS.CLEAR_ERROR_STATUS = 1 */ - reg_value = RREG32_SOC15_OFFSET(MMHUB, mmhub_inst, - regMMEA0_ERR_STATUS, - i * mmea_err_status_addr_dist); - reg_value = REG_SET_FIELD(reg_value, MMEA0_ERR_STATUS, - CLEAR_ERROR_STATUS, 1); - WREG32_SOC15_OFFSET(MMHUB, mmhub_inst, - regMMEA0_ERR_STATUS, - i * mmea_err_status_addr_dist, - reg_value); - - /* set MMEA0_CGTT_CLK_CTRL.SOFT_OVERRIDE_RETURN = 0 */ - reg_value = RREG32_SOC15_OFFSET(MMHUB, mmhub_inst, - regMMEA0_CGTT_CLK_CTRL, - i * mmea_cgtt_clk_cntl_addr_dist); - reg_value = REG_SET_FIELD(reg_value, MMEA0_CGTT_CLK_CTRL, - SOFT_OVERRIDE_RETURN, 0); - WREG32_SOC15_OFFSET(MMHUB, mmhub_inst, - regMMEA0_CGTT_CLK_CTRL, - i * mmea_cgtt_clk_cntl_addr_dist, - reg_value); - } + r = amdgpu_ras_block_late_init(adev, ras_block); + if (r) + return r; - /* reset mm_cane ras err status - * force clk branch on for response path - * set MM_CANE_ICG_CTRL.SOFT_OVERRIDE_ATRET = 1 - */ - reg_value = RREG32_SOC15(MMHUB, mmhub_inst, regMM_CANE_ICG_CTRL); - reg_value = REG_SET_FIELD(reg_value, MM_CANE_ICG_CTRL, - SOFT_OVERRIDE_ATRET, 1); - WREG32_SOC15(MMHUB, mmhub_inst, regMM_CANE_ICG_CTRL, reg_value); - - /* set MM_CANE_ERR_STATUS.CLEAR_ERROR_STATUS = 1 */ - reg_value = RREG32_SOC15(MMHUB, mmhub_inst, regMM_CANE_ERR_STATUS); - reg_value = REG_SET_FIELD(reg_value, MM_CANE_ERR_STATUS, - CLEAR_ERROR_STATUS, 1); - WREG32_SOC15(MMHUB, mmhub_inst, regMM_CANE_ERR_STATUS, reg_value); - - /* set MM_CANE_ICG_CTRL.SOFT_OVERRIDE_ATRET = 0 */ - reg_value = RREG32_SOC15(MMHUB, mmhub_inst, regMM_CANE_ICG_CTRL); - reg_value = REG_SET_FIELD(reg_value, MM_CANE_ICG_CTRL, - SOFT_OVERRIDE_ATRET, 0); - WREG32_SOC15(MMHUB, mmhub_inst, regMM_CANE_ICG_CTRL, reg_value); -} + r = amdgpu_ras_bind_aca(adev, AMDGPU_RAS_BLOCK__MMHUB, + &mmhub_v1_8_aca_info, NULL); + if (r) + goto late_fini; -static void mmhub_v1_8_reset_ras_error_status(struct amdgpu_device *adev) -{ - uint32_t inst_mask; - uint32_t i; + return 0; - if (!amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__MMHUB)) { - dev_warn(adev->dev, "MMHUB RAS is not supported\n"); - return; - } +late_fini: + amdgpu_ras_block_late_fini(adev, ras_block); - inst_mask = adev->aid_mask; - for_each_inst(i, inst_mask) - mmhub_v1_8_inst_reset_ras_err_status(adev, i); + return r; } -static const struct amdgpu_ras_block_hw_ops mmhub_v1_8_ras_hw_ops = { - .query_ras_error_count = mmhub_v1_8_query_ras_error_count, - .reset_ras_error_count = mmhub_v1_8_reset_ras_error_count, - .query_ras_error_status = mmhub_v1_8_query_ras_error_status, - .reset_ras_error_status = mmhub_v1_8_reset_ras_error_status, -}; - struct amdgpu_mmhub_ras mmhub_v1_8_ras = { .ras_block = { .hw_ops = &mmhub_v1_8_ras_hw_ops, + .ras_late_init = mmhub_v1_8_ras_late_init, }, }; diff --git a/drivers/gpu/drm/amd/amdgpu/mmhub_v3_3.c b/drivers/gpu/drm/amd/amdgpu/mmhub_v3_3.c index dc4812ecc98d..238ea40c2450 100644 --- a/drivers/gpu/drm/amd/amdgpu/mmhub_v3_3.c +++ b/drivers/gpu/drm/amd/amdgpu/mmhub_v3_3.c @@ -98,16 +98,16 @@ mmhub_v3_3_print_l2_protection_fault_status(struct amdgpu_device *adev, switch (amdgpu_ip_version(adev, MMHUB_HWIP, 0)) { case IP_VERSION(3, 3, 0): - mmhub_cid = mmhub_client_ids_v3_3[cid][rw]; + case IP_VERSION(3, 3, 1): + mmhub_cid = cid < ARRAY_SIZE(mmhub_client_ids_v3_3) ? + mmhub_client_ids_v3_3[cid][rw] : + cid == 0x140 ? "UMSCH" : NULL; break; default: mmhub_cid = NULL; break; } - if (!mmhub_cid && cid == 0x140) - mmhub_cid = "UMSCH"; - dev_err(adev->dev, "\t Faulty UTCL2 client ID: %s (0x%x)\n", mmhub_cid ? mmhub_cid : "unknown", cid); dev_err(adev->dev, "\t MORE_FAULTS: 0x%lx\n", diff --git a/drivers/gpu/drm/amd/amdgpu/mxgpu_ai.c b/drivers/gpu/drm/amd/amdgpu/mxgpu_ai.c index 63725b2ebc03..0c7275bca8f7 100644 --- a/drivers/gpu/drm/amd/amdgpu/mxgpu_ai.c +++ b/drivers/gpu/drm/amd/amdgpu/mxgpu_ai.c @@ -276,6 +276,8 @@ static void xgpu_ai_mailbox_flr_work(struct work_struct *work) timeout -= 10; } while (timeout > 1); + dev_warn(adev->dev, "waiting IDH_FLR_NOTIFICATION_CMPL timeout\n"); + flr_done: atomic_set(&adev->reset_domain->in_gpu_reset, 0); up_write(&adev->reset_domain->sem); @@ -404,7 +406,8 @@ static int xgpu_ai_request_init_data(struct amdgpu_device *adev) return xgpu_ai_send_access_requests(adev, IDH_REQ_GPU_INIT_DATA); } -static void xgpu_ai_ras_poison_handler(struct amdgpu_device *adev) +static void xgpu_ai_ras_poison_handler(struct amdgpu_device *adev, + enum amdgpu_ras_block block) { xgpu_ai_send_access_requests(adev, IDH_RAS_POISON); } diff --git a/drivers/gpu/drm/amd/amdgpu/mxgpu_nv.c b/drivers/gpu/drm/amd/amdgpu/mxgpu_nv.c index 6a68ee946f1c..aba00d961627 100644 --- a/drivers/gpu/drm/amd/amdgpu/mxgpu_nv.c +++ b/drivers/gpu/drm/amd/amdgpu/mxgpu_nv.c @@ -152,14 +152,14 @@ static void xgpu_nv_mailbox_trans_msg (struct amdgpu_device *adev, xgpu_nv_mailbox_set_valid(adev, false); } -static int xgpu_nv_send_access_requests(struct amdgpu_device *adev, - enum idh_request req) +static int xgpu_nv_send_access_requests_with_param(struct amdgpu_device *adev, + enum idh_request req, u32 data1, u32 data2, u32 data3) { int r, retry = 1; enum idh_event event = -1; send_request: - xgpu_nv_mailbox_trans_msg(adev, req, 0, 0, 0); + xgpu_nv_mailbox_trans_msg(adev, req, data1, data2, data3); switch (req) { case IDH_REQ_GPU_INIT_ACCESS: @@ -170,6 +170,10 @@ send_request: case IDH_REQ_GPU_INIT_DATA: event = IDH_REQ_GPU_INIT_DATA_READY; break; + case IDH_RAS_POISON: + if (data1 != 0) + event = IDH_RAS_POISON_READY; + break; default: break; } @@ -206,6 +210,13 @@ send_request: return 0; } +static int xgpu_nv_send_access_requests(struct amdgpu_device *adev, + enum idh_request req) +{ + return xgpu_nv_send_access_requests_with_param(adev, + req, 0, 0, 0); +} + static int xgpu_nv_request_reset(struct amdgpu_device *adev) { int ret, i = 0; @@ -298,6 +309,8 @@ static void xgpu_nv_mailbox_flr_work(struct work_struct *work) timeout -= 10; } while (timeout > 1); + dev_warn(adev->dev, "waiting IDH_FLR_NOTIFICATION_CMPL timeout\n"); + flr_done: atomic_set(&adev->reset_domain->in_gpu_reset, 0); up_write(&adev->reset_domain->sem); @@ -424,9 +437,16 @@ void xgpu_nv_mailbox_put_irq(struct amdgpu_device *adev) amdgpu_irq_put(adev, &adev->virt.rcv_irq, 0); } -static void xgpu_nv_ras_poison_handler(struct amdgpu_device *adev) +static void xgpu_nv_ras_poison_handler(struct amdgpu_device *adev, + enum amdgpu_ras_block block) { - xgpu_nv_send_access_requests(adev, IDH_RAS_POISON); + if (amdgpu_ip_version(adev, UMC_HWIP, 0) < IP_VERSION(12, 0, 0)) { + xgpu_nv_send_access_requests(adev, IDH_RAS_POISON); + } else { + amdgpu_virt_fini_data_exchange(adev); + xgpu_nv_send_access_requests_with_param(adev, + IDH_RAS_POISON, block, 0, 0); + } } const struct amdgpu_virt_ops xgpu_nv_virt_ops = { diff --git a/drivers/gpu/drm/amd/amdgpu/mxgpu_nv.h b/drivers/gpu/drm/amd/amdgpu/mxgpu_nv.h index d0221ce08769..1e8fd90cab43 100644 --- a/drivers/gpu/drm/amd/amdgpu/mxgpu_nv.h +++ b/drivers/gpu/drm/amd/amdgpu/mxgpu_nv.h @@ -51,6 +51,7 @@ enum idh_event { IDH_FAIL, IDH_QUERY_ALIVE, IDH_REQ_GPU_INIT_DATA_READY, + IDH_RAS_POISON_READY, IDH_TEXT_MESSAGE = 255, }; diff --git a/drivers/gpu/drm/amd/amdgpu/navi10_ih.c b/drivers/gpu/drm/amd/amdgpu/navi10_ih.c index e64b33115848..4178f4e5dad7 100644 --- a/drivers/gpu/drm/amd/amdgpu/navi10_ih.c +++ b/drivers/gpu/drm/amd/amdgpu/navi10_ih.c @@ -442,6 +442,12 @@ static u32 navi10_ih_get_wptr(struct amdgpu_device *adev, tmp = RREG32_NO_KIQ(ih_regs->ih_rb_cntl); tmp = REG_SET_FIELD(tmp, IH_RB_CNTL, WPTR_OVERFLOW_CLEAR, 1); WREG32_NO_KIQ(ih_regs->ih_rb_cntl, tmp); + + /* Unset the CLEAR_OVERFLOW bit immediately so new overflows + * can be detected. + */ + tmp = REG_SET_FIELD(tmp, IH_RB_CNTL, WPTR_OVERFLOW_CLEAR, 0); + WREG32_NO_KIQ(ih_regs->ih_rb_cntl, tmp); out: return (wptr & ih->ptr_mask); } @@ -722,8 +728,7 @@ static void navi10_ih_set_interrupt_funcs(struct amdgpu_device *adev) adev->irq.ih_funcs = &navi10_ih_funcs; } -const struct amdgpu_ip_block_version navi10_ih_ip_block = -{ +const struct amdgpu_ip_block_version navi10_ih_ip_block = { .type = AMD_IP_BLOCK_TYPE_IH, .major = 5, .minor = 0, diff --git a/drivers/gpu/drm/amd/amdgpu/nbif_v6_3_1.c b/drivers/gpu/drm/amd/amdgpu/nbif_v6_3_1.c new file mode 100644 index 000000000000..96ed00ac81ac --- /dev/null +++ b/drivers/gpu/drm/amd/amdgpu/nbif_v6_3_1.c @@ -0,0 +1,495 @@ +/* + * Copyright 2023 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + */ +#include "amdgpu.h" +#include "amdgpu_atombios.h" +#include "nbif_v6_3_1.h" + +#include "nbif/nbif_6_3_1_offset.h" +#include "nbif/nbif_6_3_1_sh_mask.h" +#include "pcie/pcie_6_1_0_offset.h" +#include "pcie/pcie_6_1_0_sh_mask.h" +#include <uapi/linux/kfd_ioctl.h> + +static void nbif_v6_3_1_remap_hdp_registers(struct amdgpu_device *adev) +{ + WREG32_SOC15(NBIO, 0, regBIF_BX0_REMAP_HDP_MEM_FLUSH_CNTL, + adev->rmmio_remap.reg_offset + KFD_MMIO_REMAP_HDP_MEM_FLUSH_CNTL); + WREG32_SOC15(NBIO, 0, regBIF_BX0_REMAP_HDP_REG_FLUSH_CNTL, + adev->rmmio_remap.reg_offset + KFD_MMIO_REMAP_HDP_REG_FLUSH_CNTL); +} + +static u32 nbif_v6_3_1_get_rev_id(struct amdgpu_device *adev) +{ + u32 tmp = RREG32_SOC15(NBIO, 0, regRCC_STRAP0_RCC_DEV0_EPF0_STRAP0); + + tmp &= RCC_STRAP0_RCC_DEV0_EPF0_STRAP0__STRAP_ATI_REV_ID_DEV0_F0_MASK; + tmp >>= RCC_STRAP0_RCC_DEV0_EPF0_STRAP0__STRAP_ATI_REV_ID_DEV0_F0__SHIFT; + + return tmp; +} + +static void nbif_v6_3_1_mc_access_enable(struct amdgpu_device *adev, bool enable) +{ + if (enable) + WREG32_SOC15(NBIO, 0, regBIF_BX0_BIF_FB_EN, + BIF_BX0_BIF_FB_EN__FB_READ_EN_MASK | + BIF_BX0_BIF_FB_EN__FB_WRITE_EN_MASK); + else + WREG32_SOC15(NBIO, 0, regBIF_BX0_BIF_FB_EN, 0); +} + +static u32 nbif_v6_3_1_get_memsize(struct amdgpu_device *adev) +{ + return RREG32_SOC15(NBIO, 0, regRCC_DEV0_EPF0_RCC_CONFIG_MEMSIZE); +} + +static void nbif_v6_3_1_sdma_doorbell_range(struct amdgpu_device *adev, + int instance, bool use_doorbell, + int doorbell_index, + int doorbell_size) +{ + if (instance == 0) { + u32 doorbell_range = RREG32_SOC15(NBIO, 0, regGDC_S2A0_S2A_DOORBELL_ENTRY_2_CTRL); + + if (use_doorbell) { + doorbell_range = REG_SET_FIELD(doorbell_range, + GDC_S2A0_S2A_DOORBELL_ENTRY_2_CTRL, + S2A_DOORBELL_PORT2_ENABLE, + 0x1); + doorbell_range = REG_SET_FIELD(doorbell_range, + GDC_S2A0_S2A_DOORBELL_ENTRY_2_CTRL, + S2A_DOORBELL_PORT2_AWID, + 0xe); + doorbell_range = REG_SET_FIELD(doorbell_range, + GDC_S2A0_S2A_DOORBELL_ENTRY_2_CTRL, + S2A_DOORBELL_PORT2_RANGE_OFFSET, + doorbell_index); + doorbell_range = REG_SET_FIELD(doorbell_range, + GDC_S2A0_S2A_DOORBELL_ENTRY_2_CTRL, + S2A_DOORBELL_PORT2_RANGE_SIZE, + doorbell_size); + doorbell_range = REG_SET_FIELD(doorbell_range, + GDC_S2A0_S2A_DOORBELL_ENTRY_2_CTRL, + S2A_DOORBELL_PORT2_AWADDR_31_28_VALUE, + 0x3); + } else + doorbell_range = REG_SET_FIELD(doorbell_range, + GDC_S2A0_S2A_DOORBELL_ENTRY_2_CTRL, + S2A_DOORBELL_PORT2_RANGE_SIZE, + 0); + + WREG32_SOC15(NBIO, 0, regGDC_S2A0_S2A_DOORBELL_ENTRY_2_CTRL, doorbell_range); + } +} + +static void nbif_v6_3_1_vcn_doorbell_range(struct amdgpu_device *adev, + bool use_doorbell, int doorbell_index, + int instance) +{ + u32 doorbell_range; + + if (instance) + doorbell_range = RREG32_SOC15(NBIO, 0, regGDC_S2A0_S2A_DOORBELL_ENTRY_5_CTRL); + else + doorbell_range = RREG32_SOC15(NBIO, 0, regGDC_S2A0_S2A_DOORBELL_ENTRY_4_CTRL); + + if (use_doorbell) { + doorbell_range = REG_SET_FIELD(doorbell_range, + GDC_S2A0_S2A_DOORBELL_ENTRY_4_CTRL, + S2A_DOORBELL_PORT4_ENABLE, + 0x1); + doorbell_range = REG_SET_FIELD(doorbell_range, + GDC_S2A0_S2A_DOORBELL_ENTRY_4_CTRL, + S2A_DOORBELL_PORT4_AWID, + instance ? 0x7 : 0x4); + doorbell_range = REG_SET_FIELD(doorbell_range, + GDC_S2A0_S2A_DOORBELL_ENTRY_4_CTRL, + S2A_DOORBELL_PORT4_RANGE_OFFSET, + doorbell_index); + doorbell_range = REG_SET_FIELD(doorbell_range, + GDC_S2A0_S2A_DOORBELL_ENTRY_4_CTRL, + S2A_DOORBELL_PORT4_RANGE_SIZE, + 8); + doorbell_range = REG_SET_FIELD(doorbell_range, + GDC_S2A0_S2A_DOORBELL_ENTRY_4_CTRL, + S2A_DOORBELL_PORT4_AWADDR_31_28_VALUE, + instance ? 0x7 : 0x4); + } else + doorbell_range = REG_SET_FIELD(doorbell_range, + GDC_S2A0_S2A_DOORBELL_ENTRY_4_CTRL, + S2A_DOORBELL_PORT4_RANGE_SIZE, + 0); + + if (instance) + WREG32_SOC15(NBIO, 0, regGDC_S2A0_S2A_DOORBELL_ENTRY_5_CTRL, doorbell_range); + else + WREG32_SOC15(NBIO, 0, regGDC_S2A0_S2A_DOORBELL_ENTRY_4_CTRL, doorbell_range); +} + +static void nbif_v6_3_1_gc_doorbell_init(struct amdgpu_device *adev) +{ + WREG32_SOC15(NBIO, 0, regGDC_S2A0_S2A_DOORBELL_ENTRY_0_CTRL, 0x30000007); + WREG32_SOC15(NBIO, 0, regGDC_S2A0_S2A_DOORBELL_ENTRY_3_CTRL, 0x3000000d); +} + +static void nbif_v6_3_1_enable_doorbell_aperture(struct amdgpu_device *adev, + bool enable) +{ + WREG32_FIELD15_PREREG(NBIO, 0, RCC_DEV0_EPF0_RCC_DOORBELL_APER_EN, + BIF_DOORBELL_APER_EN, enable ? 1 : 0); +} + +static void +nbif_v6_3_1_enable_doorbell_selfring_aperture(struct amdgpu_device *adev, + bool enable) +{ + u32 tmp = 0; + + if (enable) { + tmp = REG_SET_FIELD(tmp, BIF_BX_PF0_DOORBELL_SELFRING_GPA_APER_CNTL, + DOORBELL_SELFRING_GPA_APER_EN, 1) | + REG_SET_FIELD(tmp, BIF_BX_PF0_DOORBELL_SELFRING_GPA_APER_CNTL, + DOORBELL_SELFRING_GPA_APER_MODE, 1) | + REG_SET_FIELD(tmp, BIF_BX_PF0_DOORBELL_SELFRING_GPA_APER_CNTL, + DOORBELL_SELFRING_GPA_APER_SIZE, 0); + + WREG32_SOC15(NBIO, 0, regBIF_BX_PF0_DOORBELL_SELFRING_GPA_APER_BASE_LOW, + lower_32_bits(adev->doorbell.base)); + WREG32_SOC15(NBIO, 0, regBIF_BX_PF0_DOORBELL_SELFRING_GPA_APER_BASE_HIGH, + upper_32_bits(adev->doorbell.base)); + } + + WREG32_SOC15(NBIO, 0, regBIF_BX_PF0_DOORBELL_SELFRING_GPA_APER_CNTL, tmp); +} + +static void nbif_v6_3_1_ih_doorbell_range(struct amdgpu_device *adev, + bool use_doorbell, int doorbell_index) +{ + u32 ih_doorbell_range = RREG32_SOC15(NBIO, 0, regGDC_S2A0_S2A_DOORBELL_ENTRY_1_CTRL); + + if (use_doorbell) { + ih_doorbell_range = REG_SET_FIELD(ih_doorbell_range, + GDC_S2A0_S2A_DOORBELL_ENTRY_1_CTRL, + S2A_DOORBELL_PORT1_ENABLE, + 0x1); + ih_doorbell_range = REG_SET_FIELD(ih_doorbell_range, + GDC_S2A0_S2A_DOORBELL_ENTRY_1_CTRL, + S2A_DOORBELL_PORT1_AWID, + 0x0); + ih_doorbell_range = REG_SET_FIELD(ih_doorbell_range, + GDC_S2A0_S2A_DOORBELL_ENTRY_1_CTRL, + S2A_DOORBELL_PORT1_RANGE_OFFSET, + doorbell_index); + ih_doorbell_range = REG_SET_FIELD(ih_doorbell_range, + GDC_S2A0_S2A_DOORBELL_ENTRY_1_CTRL, + S2A_DOORBELL_PORT1_RANGE_SIZE, + 2); + ih_doorbell_range = REG_SET_FIELD(ih_doorbell_range, + GDC_S2A0_S2A_DOORBELL_ENTRY_1_CTRL, + S2A_DOORBELL_PORT1_AWADDR_31_28_VALUE, + 0x0); + } else + ih_doorbell_range = REG_SET_FIELD(ih_doorbell_range, + GDC_S2A0_S2A_DOORBELL_ENTRY_1_CTRL, + S2A_DOORBELL_PORT1_RANGE_SIZE, + 0); + + WREG32_SOC15(NBIO, 0, regGDC_S2A0_S2A_DOORBELL_ENTRY_1_CTRL, ih_doorbell_range); +} + +static void nbif_v6_3_1_ih_control(struct amdgpu_device *adev) +{ + u32 interrupt_cntl; + + /* setup interrupt control */ + WREG32_SOC15(NBIO, 0, regBIF_BX0_INTERRUPT_CNTL2, adev->dummy_page_addr >> 8); + + interrupt_cntl = RREG32_SOC15(NBIO, 0, regBIF_BX0_INTERRUPT_CNTL); + /* + * BIF_BX0_INTERRUPT_CNTL__IH_DUMMY_RD_OVERRIDE_MASK=0 - dummy read disabled with msi, enabled without msi + * BIF_BX0_INTERRUPT_CNTL__IH_DUMMY_RD_OVERRIDE_MASK=1 - dummy read controlled by IH_DUMMY_RD_EN + */ + interrupt_cntl = REG_SET_FIELD(interrupt_cntl, BIF_BX0_INTERRUPT_CNTL, + IH_DUMMY_RD_OVERRIDE, 0); + + /* BIF_BX0_INTERRUPT_CNTL__IH_REQ_NONSNOOP_EN_MASK=1 if ring is in non-cacheable memory, e.g., vram */ + interrupt_cntl = REG_SET_FIELD(interrupt_cntl, BIF_BX0_INTERRUPT_CNTL, + IH_REQ_NONSNOOP_EN, 0); + + WREG32_SOC15(NBIO, 0, regBIF_BX0_INTERRUPT_CNTL, interrupt_cntl); +} + +static void +nbif_v6_3_1_update_medium_grain_clock_gating(struct amdgpu_device *adev, + bool enable) +{ +} + +static void +nbif_v6_3_1_update_medium_grain_light_sleep(struct amdgpu_device *adev, + bool enable) +{ +} + +static void +nbif_v6_3_1_get_clockgating_state(struct amdgpu_device *adev, + u64 *flags) +{ +} + +static u32 nbif_v6_3_1_get_hdp_flush_req_offset(struct amdgpu_device *adev) +{ + return SOC15_REG_OFFSET(NBIO, 0, regBIF_BX_PF0_GPU_HDP_FLUSH_REQ); +} + +static u32 nbif_v6_3_1_get_hdp_flush_done_offset(struct amdgpu_device *adev) +{ + return SOC15_REG_OFFSET(NBIO, 0, regBIF_BX_PF0_GPU_HDP_FLUSH_DONE); +} + +static u32 nbif_v6_3_1_get_pcie_index_offset(struct amdgpu_device *adev) +{ + return SOC15_REG_OFFSET(NBIO, 0, regBIF_BX_PF0_RSMU_INDEX); +} + +static u32 nbif_v6_3_1_get_pcie_data_offset(struct amdgpu_device *adev) +{ + return SOC15_REG_OFFSET(NBIO, 0, regBIF_BX_PF0_RSMU_DATA); +} + +const struct nbio_hdp_flush_reg nbif_v6_3_1_hdp_flush_reg = { + .ref_and_mask_cp0 = BIF_BX_PF0_GPU_HDP_FLUSH_DONE__CP0_MASK, + .ref_and_mask_cp1 = BIF_BX_PF0_GPU_HDP_FLUSH_DONE__CP1_MASK, + .ref_and_mask_cp2 = BIF_BX_PF0_GPU_HDP_FLUSH_DONE__CP2_MASK, + .ref_and_mask_cp3 = BIF_BX_PF0_GPU_HDP_FLUSH_DONE__CP3_MASK, + .ref_and_mask_cp4 = BIF_BX_PF0_GPU_HDP_FLUSH_DONE__CP4_MASK, + .ref_and_mask_cp5 = BIF_BX_PF0_GPU_HDP_FLUSH_DONE__CP5_MASK, + .ref_and_mask_cp6 = BIF_BX_PF0_GPU_HDP_FLUSH_DONE__CP6_MASK, + .ref_and_mask_cp7 = BIF_BX_PF0_GPU_HDP_FLUSH_DONE__CP7_MASK, + .ref_and_mask_cp8 = BIF_BX_PF0_GPU_HDP_FLUSH_DONE__CP8_MASK, + .ref_and_mask_cp9 = BIF_BX_PF0_GPU_HDP_FLUSH_DONE__CP9_MASK, + .ref_and_mask_sdma0 = BIF_BX_PF0_GPU_HDP_FLUSH_DONE__SDMA0_MASK, + .ref_and_mask_sdma1 = BIF_BX_PF0_GPU_HDP_FLUSH_DONE__SDMA1_MASK, +}; + +static void nbif_v6_3_1_init_registers(struct amdgpu_device *adev) +{ + uint32_t data; + + data = RREG32_SOC15(NBIO, 0, regRCC_DEV0_EPF2_STRAP2); + data &= ~RCC_DEV0_EPF2_STRAP2__STRAP_NO_SOFT_RESET_DEV0_F2_MASK; + WREG32_SOC15(NBIO, 0, regRCC_DEV0_EPF2_STRAP2, data); +} + +static u32 nbif_v6_3_1_get_rom_offset(struct amdgpu_device *adev) +{ + u32 data, rom_offset; + + data = RREG32_SOC15(NBIO, 0, regREGS_ROM_OFFSET_CTRL); + rom_offset = REG_GET_FIELD(data, REGS_ROM_OFFSET_CTRL, ROM_OFFSET); + + return rom_offset; +} + +#ifdef CONFIG_PCIEASPM +static void nbif_v6_3_1_program_ltr(struct amdgpu_device *adev) +{ + uint32_t def, data; + + def = RREG32_SOC15(NBIO, 0, regRCC_EP_DEV0_0_EP_PCIE_TX_LTR_CNTL); + data = 0x35EB; + data &= ~RCC_EP_DEV0_0_EP_PCIE_TX_LTR_CNTL__LTR_PRIV_MSG_DIS_IN_PM_NON_D0_MASK; + data &= ~RCC_EP_DEV0_0_EP_PCIE_TX_LTR_CNTL__LTR_PRIV_RST_LTR_IN_DL_DOWN_MASK; + if (def != data) + WREG32_SOC15(NBIO, 0, regRCC_EP_DEV0_0_EP_PCIE_TX_LTR_CNTL, data); + + def = data = RREG32_SOC15(NBIO, 0, regRCC_STRAP0_RCC_BIF_STRAP2); + data &= ~RCC_STRAP0_RCC_BIF_STRAP2__STRAP_LTR_IN_ASPML1_DIS_MASK; + if (def != data) + WREG32_SOC15(NBIO, 0, regRCC_STRAP0_RCC_BIF_STRAP2, data); + + def = data = RREG32_SOC15(NBIO, 0, regBIF_CFG_DEV0_EPF0_DEVICE_CNTL2); + if (adev->pdev->ltr_path) + data |= BIF_CFG_DEV0_EPF0_DEVICE_CNTL2__LTR_EN_MASK; + else + data &= ~BIF_CFG_DEV0_EPF0_DEVICE_CNTL2__LTR_EN_MASK; + if (def != data) + WREG32_SOC15(NBIO, 0, regBIF_CFG_DEV0_EPF0_DEVICE_CNTL2, data); +} +#endif + +static void nbif_v6_3_1_program_aspm(struct amdgpu_device *adev) +{ +#ifdef CONFIG_PCIEASPM + uint32_t def, data; + + def = data = RREG32_SOC15(PCIE, 0, regPCIE_LC_CNTL); + data &= ~PCIE_LC_CNTL__LC_L1_INACTIVITY_MASK; + data &= ~PCIE_LC_CNTL__LC_L0S_INACTIVITY_MASK; + data |= PCIE_LC_CNTL__LC_PMI_TO_L1_DIS_MASK; + if (def != data) + WREG32_SOC15(PCIE, 0, regPCIE_LC_CNTL, data); + + def = data = RREG32_SOC15(PCIE, 0, regPCIE_LC_CNTL7); + data |= PCIE_LC_CNTL7__LC_NBIF_ASPM_INPUT_EN_MASK; + if (def != data) + WREG32_SOC15(PCIE, 0, regPCIE_LC_CNTL7, data); + + def = data = RREG32_SOC15(PCIE, 0, regPCIE_LC_CNTL3); + data |= PCIE_LC_CNTL3__LC_DSC_DONT_ENTER_L23_AFTER_PME_ACK_MASK; + if (def != data) + WREG32_SOC15(PCIE, 0, regPCIE_LC_CNTL3, data); + + def = data = RREG32_SOC15(NBIO, 0, regRCC_STRAP0_RCC_BIF_STRAP3); + data &= ~RCC_STRAP0_RCC_BIF_STRAP3__STRAP_VLINK_ASPM_IDLE_TIMER_MASK; + data &= ~RCC_STRAP0_RCC_BIF_STRAP3__STRAP_VLINK_PM_L1_ENTRY_TIMER_MASK; + if (def != data) + WREG32_SOC15(NBIO, 0, regRCC_STRAP0_RCC_BIF_STRAP3, data); + + def = data = RREG32_SOC15(NBIO, 0, regRCC_STRAP0_RCC_BIF_STRAP5); + data &= ~RCC_STRAP0_RCC_BIF_STRAP5__STRAP_VLINK_LDN_ENTRY_TIMER_MASK; + if (def != data) + WREG32_SOC15(NBIO, 0, regRCC_STRAP0_RCC_BIF_STRAP5, data); + + def = data = RREG32_SOC15(NBIO, 0, regBIF_CFG_DEV0_EPF0_DEVICE_CNTL2); + data &= ~BIF_CFG_DEV0_EPF0_DEVICE_CNTL2__LTR_EN_MASK; + if (def != data) + WREG32_SOC15(NBIO, 0, regBIF_CFG_DEV0_EPF0_DEVICE_CNTL2, data); + + WREG32_SOC15(NBIO, 0, regBIF_CFG_DEV0_EPF0_PCIE_LTR_CAP, 0x10011001); + +#if 0 + /* regPSWUSP0_PCIE_LC_CNTL2 should be replace by PCIE_LC_CNTL2 or someone else ? */ + def = data = RREG32_SOC15(NBIO, 0, regPSWUSP0_PCIE_LC_CNTL2); + data |= PSWUSP0_PCIE_LC_CNTL2__LC_ALLOW_PDWN_IN_L1_MASK | + PSWUSP0_PCIE_LC_CNTL2__LC_ALLOW_PDWN_IN_L23_MASK; + data &= ~PSWUSP0_PCIE_LC_CNTL2__LC_RCV_L0_TO_RCV_L0S_DIS_MASK; + if (def != data) + WREG32_SOC15(NBIO, 0, regPSWUSP0_PCIE_LC_CNTL2, data); +#endif + def = data = RREG32_SOC15(PCIE, 0, regPCIE_LC_CNTL4); + data |= PCIE_LC_CNTL4__LC_L1_POWERDOWN_MASK; + if (def != data) + WREG32_SOC15(PCIE, 0, regPCIE_LC_CNTL4, data); + + def = data = RREG32_SOC15(PCIE, 0, regPCIE_LC_RXRECOVER_RXSTANDBY_CNTL); + data |= PCIE_LC_RXRECOVER_RXSTANDBY_CNTL__LC_RX_L0S_STANDBY_EN_MASK; + if (def != data) + WREG32_SOC15(PCIE, 0, regPCIE_LC_RXRECOVER_RXSTANDBY_CNTL, data); + + nbif_v6_3_1_program_ltr(adev); + + def = data = RREG32_SOC15(NBIO, 0, regRCC_STRAP0_RCC_BIF_STRAP3); + data |= 0x5DE0 << RCC_STRAP0_RCC_BIF_STRAP3__STRAP_VLINK_ASPM_IDLE_TIMER__SHIFT; + data |= 0x0010 << RCC_STRAP0_RCC_BIF_STRAP3__STRAP_VLINK_PM_L1_ENTRY_TIMER__SHIFT; + if (def != data) + WREG32_SOC15(NBIO, 0, regRCC_STRAP0_RCC_BIF_STRAP3, data); + + def = data = RREG32_SOC15(NBIO, 0, regRCC_STRAP0_RCC_BIF_STRAP5); + data |= 0x0010 << RCC_STRAP0_RCC_BIF_STRAP5__STRAP_VLINK_LDN_ENTRY_TIMER__SHIFT; + if (def != data) + WREG32_SOC15(NBIO, 0, regRCC_STRAP0_RCC_BIF_STRAP5, data); + + def = data = RREG32_SOC15(PCIE, 0, regPCIE_LC_CNTL); + data |= 0x0 << PCIE_LC_CNTL__LC_L0S_INACTIVITY__SHIFT; + data |= 0x9 << PCIE_LC_CNTL__LC_L1_INACTIVITY__SHIFT; + data &= ~PCIE_LC_CNTL__LC_PMI_TO_L1_DIS_MASK; + if (def != data) + WREG32_SOC15(PCIE, 0, regPCIE_LC_CNTL, data); + + def = data = RREG32_SOC15(PCIE, 0, regPCIE_LC_CNTL3); + data &= ~PCIE_LC_CNTL3__LC_DSC_DONT_ENTER_L23_AFTER_PME_ACK_MASK; + if (def != data) + WREG32_SOC15(PCIE, 0, regPCIE_LC_CNTL3, data); +#endif +} + +const struct amdgpu_nbio_funcs nbif_v6_3_1_funcs = { + .get_hdp_flush_req_offset = nbif_v6_3_1_get_hdp_flush_req_offset, + .get_hdp_flush_done_offset = nbif_v6_3_1_get_hdp_flush_done_offset, + .get_pcie_index_offset = nbif_v6_3_1_get_pcie_index_offset, + .get_pcie_data_offset = nbif_v6_3_1_get_pcie_data_offset, + .get_rev_id = nbif_v6_3_1_get_rev_id, + .mc_access_enable = nbif_v6_3_1_mc_access_enable, + .get_memsize = nbif_v6_3_1_get_memsize, + .sdma_doorbell_range = nbif_v6_3_1_sdma_doorbell_range, + .vcn_doorbell_range = nbif_v6_3_1_vcn_doorbell_range, + .gc_doorbell_init = nbif_v6_3_1_gc_doorbell_init, + .enable_doorbell_aperture = nbif_v6_3_1_enable_doorbell_aperture, + .enable_doorbell_selfring_aperture = nbif_v6_3_1_enable_doorbell_selfring_aperture, + .ih_doorbell_range = nbif_v6_3_1_ih_doorbell_range, + .update_medium_grain_clock_gating = nbif_v6_3_1_update_medium_grain_clock_gating, + .update_medium_grain_light_sleep = nbif_v6_3_1_update_medium_grain_light_sleep, + .get_clockgating_state = nbif_v6_3_1_get_clockgating_state, + .ih_control = nbif_v6_3_1_ih_control, + .init_registers = nbif_v6_3_1_init_registers, + .remap_hdp_registers = nbif_v6_3_1_remap_hdp_registers, + .get_rom_offset = nbif_v6_3_1_get_rom_offset, + .program_aspm = nbif_v6_3_1_program_aspm, +}; + + +static void nbif_v6_3_1_sriov_ih_doorbell_range(struct amdgpu_device *adev, + bool use_doorbell, int doorbell_index) +{ +} + +static void nbif_v6_3_1_sriov_sdma_doorbell_range(struct amdgpu_device *adev, + int instance, bool use_doorbell, + int doorbell_index, + int doorbell_size) +{ +} + +static void nbif_v6_3_1_sriov_vcn_doorbell_range(struct amdgpu_device *adev, + bool use_doorbell, + int doorbell_index, int instance) +{ +} + +static void nbif_v6_3_1_sriov_gc_doorbell_init(struct amdgpu_device *adev) +{ +} + +const struct amdgpu_nbio_funcs nbif_v6_3_1_sriov_funcs = { + .get_hdp_flush_req_offset = nbif_v6_3_1_get_hdp_flush_req_offset, + .get_hdp_flush_done_offset = nbif_v6_3_1_get_hdp_flush_done_offset, + .get_pcie_index_offset = nbif_v6_3_1_get_pcie_index_offset, + .get_pcie_data_offset = nbif_v6_3_1_get_pcie_data_offset, + .get_rev_id = nbif_v6_3_1_get_rev_id, + .mc_access_enable = nbif_v6_3_1_mc_access_enable, + .get_memsize = nbif_v6_3_1_get_memsize, + .sdma_doorbell_range = nbif_v6_3_1_sriov_sdma_doorbell_range, + .vcn_doorbell_range = nbif_v6_3_1_sriov_vcn_doorbell_range, + .gc_doorbell_init = nbif_v6_3_1_sriov_gc_doorbell_init, + .enable_doorbell_aperture = nbif_v6_3_1_enable_doorbell_aperture, + .enable_doorbell_selfring_aperture = nbif_v6_3_1_enable_doorbell_selfring_aperture, + .ih_doorbell_range = nbif_v6_3_1_sriov_ih_doorbell_range, + .update_medium_grain_clock_gating = nbif_v6_3_1_update_medium_grain_clock_gating, + .update_medium_grain_light_sleep = nbif_v6_3_1_update_medium_grain_light_sleep, + .get_clockgating_state = nbif_v6_3_1_get_clockgating_state, + .ih_control = nbif_v6_3_1_ih_control, + .init_registers = nbif_v6_3_1_init_registers, + .remap_hdp_registers = nbif_v6_3_1_remap_hdp_registers, + .get_rom_offset = nbif_v6_3_1_get_rom_offset, +}; diff --git a/drivers/gpu/drm/amd/amdgpu/nbif_v6_3_1.h b/drivers/gpu/drm/amd/amdgpu/nbif_v6_3_1.h new file mode 100644 index 000000000000..b7f2e0d88905 --- /dev/null +++ b/drivers/gpu/drm/amd/amdgpu/nbif_v6_3_1.h @@ -0,0 +1,33 @@ +/* + * Copyright 2023 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + */ + +#ifndef __NBIO_V6_3_1_H__ +#define __NBIO_V6_3_1_H__ + +#include "soc15_common.h" + +extern const struct nbio_hdp_flush_reg nbif_v6_3_1_hdp_flush_reg; +extern const struct amdgpu_nbio_funcs nbif_v6_3_1_funcs; +extern const struct amdgpu_nbio_funcs nbif_v6_3_1_sriov_funcs; + +#endif diff --git a/drivers/gpu/drm/amd/amdgpu/nbio_v2_3.c b/drivers/gpu/drm/amd/amdgpu/nbio_v2_3.c index e523627cfe25..df218d5ca775 100644 --- a/drivers/gpu/drm/amd/amdgpu/nbio_v2_3.c +++ b/drivers/gpu/drm/amd/amdgpu/nbio_v2_3.c @@ -28,6 +28,7 @@ #include "nbio/nbio_2_3_offset.h" #include "nbio/nbio_2_3_sh_mask.h" #include <uapi/linux/kfd_ioctl.h> +#include <linux/device.h> #include <linux/pci.h> #define smnPCIE_CONFIG_CNTL 0x11180044 @@ -361,7 +362,7 @@ static void nbio_v2_3_enable_aspm(struct amdgpu_device *adev, data |= NAVI10_PCIE__LC_L0S_INACTIVITY_DEFAULT << PCIE_LC_CNTL__LC_L0S_INACTIVITY__SHIFT; - if (pci_is_thunderbolt_attached(adev->pdev)) + if (dev_is_removable(&adev->pdev->dev)) data |= NAVI10_PCIE__LC_L1_INACTIVITY_TBT_DEFAULT << PCIE_LC_CNTL__LC_L1_INACTIVITY__SHIFT; else data |= NAVI10_PCIE__LC_L1_INACTIVITY_DEFAULT << PCIE_LC_CNTL__LC_L1_INACTIVITY__SHIFT; @@ -480,7 +481,7 @@ static void nbio_v2_3_program_aspm(struct amdgpu_device *adev) def = data = RREG32_PCIE(smnPCIE_LC_CNTL); data |= NAVI10_PCIE__LC_L0S_INACTIVITY_DEFAULT << PCIE_LC_CNTL__LC_L0S_INACTIVITY__SHIFT; - if (pci_is_thunderbolt_attached(adev->pdev)) + if (dev_is_removable(&adev->pdev->dev)) data |= NAVI10_PCIE__LC_L1_INACTIVITY_TBT_DEFAULT << PCIE_LC_CNTL__LC_L1_INACTIVITY__SHIFT; else data |= NAVI10_PCIE__LC_L1_INACTIVITY_DEFAULT << PCIE_LC_CNTL__LC_L1_INACTIVITY__SHIFT; diff --git a/drivers/gpu/drm/amd/amdgpu/nbio_v7_11.c b/drivers/gpu/drm/amd/amdgpu/nbio_v7_11.c index 3a94f249929e..05020141c0ae 100644 --- a/drivers/gpu/drm/amd/amdgpu/nbio_v7_11.c +++ b/drivers/gpu/drm/amd/amdgpu/nbio_v7_11.c @@ -89,7 +89,9 @@ static void nbio_v7_11_vpe_doorbell_range(struct amdgpu_device *adev, int instan bool use_doorbell, int doorbell_index, int doorbell_size) { - u32 reg = SOC15_REG_OFFSET(NBIO, 0, regGDC0_BIF_VPE_DOORBELL_RANGE); + u32 reg = instance == 0 ? + SOC15_REG_OFFSET(NBIO, 0, regGDC0_BIF_VPE_DOORBELL_RANGE) : + SOC15_REG_OFFSET(NBIO, 0, regGDC0_BIF_VPE1_DOORBELL_RANGE); u32 doorbell_range = RREG32_PCIE_PORT(reg); if (use_doorbell) { @@ -112,7 +114,10 @@ static void nbio_v7_11_vcn_doorbell_range(struct amdgpu_device *adev, bool use_doorbell, int doorbell_index, int instance) { - u32 reg = SOC15_REG_OFFSET(NBIO, 0, regGDC0_BIF_VCN0_DOORBELL_RANGE); + u32 reg = instance == 0 ? + SOC15_REG_OFFSET(NBIO, 0, regGDC0_BIF_VCN0_DOORBELL_RANGE): + SOC15_REG_OFFSET(NBIO, 0, regGDC0_BIF_VCN1_DOORBELL_RANGE); + u32 doorbell_range = RREG32_PCIE_PORT(reg); if (use_doorbell) { @@ -259,17 +264,92 @@ const struct nbio_hdp_flush_reg nbio_v7_11_hdp_flush_reg = { static void nbio_v7_11_init_registers(struct amdgpu_device *adev) { -/* uint32_t def, data; + uint32_t def, data; + + def = data = RREG32_SOC15(NBIO, 0, regBIF_BIF256_CI256_RC3X4_USB4_PCIE_MST_CTRL_3); + data = REG_SET_FIELD(data, BIF_BIF256_CI256_RC3X4_USB4_PCIE_MST_CTRL_3, + CI_SWUS_MAX_READ_REQUEST_SIZE_MODE, 1); + data = REG_SET_FIELD(data, BIF_BIF256_CI256_RC3X4_USB4_PCIE_MST_CTRL_3, + CI_SWUS_MAX_READ_REQUEST_SIZE_PRIV, 1); + + if (def != data) + WREG32_SOC15(NBIO, 0, regBIF_BIF256_CI256_RC3X4_USB4_PCIE_MST_CTRL_3, data); + +} + +static void nbio_v7_11_update_medium_grain_clock_gating(struct amdgpu_device *adev, + bool enable) +{ + uint32_t def, data; + + if (!(adev->cg_flags & AMD_CG_SUPPORT_BIF_MGCG)) + return; + + def = data = RREG32_SOC15(NBIO, 0, regBIF_BIF256_CI256_RC3X4_USB4_CPM_CONTROL); + if (enable) { + data |= (BIF_BIF256_CI256_RC3X4_USB4_CPM_CONTROL__LCLK_DYN_GATE_ENABLE_MASK | + BIF_BIF256_CI256_RC3X4_USB4_CPM_CONTROL__TXCLK_DYN_GATE_ENABLE_MASK | + BIF_BIF256_CI256_RC3X4_USB4_CPM_CONTROL__TXCLK_LCNT_GATE_ENABLE_MASK | + BIF_BIF256_CI256_RC3X4_USB4_CPM_CONTROL__TXCLK_REGS_GATE_ENABLE_MASK | + BIF_BIF256_CI256_RC3X4_USB4_CPM_CONTROL__TXCLK_PRBS_GATE_ENABLE_MASK | + BIF_BIF256_CI256_RC3X4_USB4_CPM_CONTROL__REFCLK_REGS_GATE_ENABLE_MASK); + } else { + data &= ~(BIF_BIF256_CI256_RC3X4_USB4_CPM_CONTROL__LCLK_DYN_GATE_ENABLE_MASK | + BIF_BIF256_CI256_RC3X4_USB4_CPM_CONTROL__TXCLK_DYN_GATE_ENABLE_MASK | + BIF_BIF256_CI256_RC3X4_USB4_CPM_CONTROL__TXCLK_LCNT_GATE_ENABLE_MASK | + BIF_BIF256_CI256_RC3X4_USB4_CPM_CONTROL__TXCLK_REGS_GATE_ENABLE_MASK | + BIF_BIF256_CI256_RC3X4_USB4_CPM_CONTROL__TXCLK_PRBS_GATE_ENABLE_MASK | + BIF_BIF256_CI256_RC3X4_USB4_CPM_CONTROL__REFCLK_REGS_GATE_ENABLE_MASK); + } + + if (def != data) + WREG32_SOC15(NBIO, 0, regBIF_BIF256_CI256_RC3X4_USB4_CPM_CONTROL, data); +} + +static void nbio_v7_11_update_medium_grain_light_sleep(struct amdgpu_device *adev, + bool enable) +{ + uint32_t def, data; + + if (!(adev->cg_flags & AMD_CG_SUPPORT_BIF_LS)) + return; + + def = data = RREG32_SOC15(NBIO, 0, regBIF_BIF256_CI256_RC3X4_USB4_PCIE_CNTL2); + if (enable) + data |= BIF_BIF256_CI256_RC3X4_USB4_PCIE_CNTL2__SLV_MEM_LS_EN_MASK; + else + data &= ~BIF_BIF256_CI256_RC3X4_USB4_PCIE_CNTL2__SLV_MEM_LS_EN_MASK; + + if (def != data) + WREG32_SOC15(NBIO, 0, regBIF_BIF256_CI256_RC3X4_USB4_PCIE_CNTL2, data); + + def = data = RREG32_SOC15(NBIO, 0, regBIF_BIF256_CI256_RC3X4_USB4_PCIE_TX_POWER_CTRL_1); + if (enable) { + data |= (BIF_BIF256_CI256_RC3X4_USB4_PCIE_TX_POWER_CTRL_1__MST_MEM_LS_EN_MASK | + BIF_BIF256_CI256_RC3X4_USB4_PCIE_TX_POWER_CTRL_1__REPLAY_MEM_LS_EN_MASK); + } else { + data &= ~(BIF_BIF256_CI256_RC3X4_USB4_PCIE_TX_POWER_CTRL_1__MST_MEM_LS_EN_MASK | + BIF_BIF256_CI256_RC3X4_USB4_PCIE_TX_POWER_CTRL_1__REPLAY_MEM_LS_EN_MASK); + } + + if (def != data) + WREG32_SOC15(NBIO, 0, regBIF_BIF256_CI256_RC3X4_USB4_PCIE_TX_POWER_CTRL_1, data); +} + +static void nbio_v7_11_get_clockgating_state(struct amdgpu_device *adev, + u64 *flags) +{ + uint32_t data; - def = data = RREG32_SOC15(NBIO, 0, regBIF_BIF256_CI256_RC3X4_USB4_PCIE_MST_CTRL_3); - data = REG_SET_FIELD(data, BIF_BIF256_CI256_RC3X4_USB4_PCIE_MST_CTRL_3, - CI_SWUS_MAX_READ_REQUEST_SIZE_MODE, 1); - data = REG_SET_FIELD(data, BIF_BIF256_CI256_RC3X4_USB4_PCIE_MST_CTRL_3, - CI_SWUS_MAX_READ_REQUEST_SIZE_PRIV, 1); + /* AMD_CG_SUPPORT_BIF_MGCG */ + data = RREG32_SOC15(NBIO, 0, regBIF_BIF256_CI256_RC3X4_USB4_CPM_CONTROL); + if (data & BIF_BIF256_CI256_RC3X4_USB4_CPM_CONTROL__LCLK_DYN_GATE_ENABLE_MASK) + *flags |= AMD_CG_SUPPORT_BIF_MGCG; - if (def != data) - WREG32_SOC15(NBIO, 0, regBIF_BIF256_CI256_RC3X4_USB4_PCIE_MST_CTRL_3, data); -*/ + /* AMD_CG_SUPPORT_BIF_LS */ + data = RREG32_SOC15(NBIO, 0, regBIF_BIF256_CI256_RC3X4_USB4_PCIE_CNTL2); + if (data & BIF_BIF256_CI256_RC3X4_USB4_PCIE_CNTL2__SLV_MEM_LS_EN_MASK) + *flags |= AMD_CG_SUPPORT_BIF_LS; } const struct amdgpu_nbio_funcs nbio_v7_11_funcs = { @@ -288,6 +368,9 @@ const struct amdgpu_nbio_funcs nbio_v7_11_funcs = { .enable_doorbell_aperture = nbio_v7_11_enable_doorbell_aperture, .enable_doorbell_selfring_aperture = nbio_v7_11_enable_doorbell_selfring_aperture, .ih_doorbell_range = nbio_v7_11_ih_doorbell_range, + .update_medium_grain_clock_gating = nbio_v7_11_update_medium_grain_clock_gating, + .update_medium_grain_light_sleep = nbio_v7_11_update_medium_grain_light_sleep, + .get_clockgating_state = nbio_v7_11_get_clockgating_state, .ih_control = nbio_v7_11_ih_control, .init_registers = nbio_v7_11_init_registers, .remap_hdp_registers = nbio_v7_11_remap_hdp_registers, diff --git a/drivers/gpu/drm/amd/amdgpu/nbio_v7_4.c b/drivers/gpu/drm/amd/amdgpu/nbio_v7_4.c index 6d24c84924cb..19986ff6a48d 100644 --- a/drivers/gpu/drm/amd/amdgpu/nbio_v7_4.c +++ b/drivers/gpu/drm/amd/amdgpu/nbio_v7_4.c @@ -401,8 +401,7 @@ static void nbio_v7_4_handle_ras_controller_intr_no_bifring(struct amdgpu_device if (err_data.ce_count) dev_info(adev->dev, "%ld correctable hardware " - "errors detected in %s block, " - "no user action is needed.\n", + "errors detected in %s block\n", obj->err_data.ce_count, get_ras_block_str(adev->nbio.ras_if)); diff --git a/drivers/gpu/drm/amd/amdgpu/nbio_v7_7.c b/drivers/gpu/drm/amd/amdgpu/nbio_v7_7.c index def89379b51a..4df1055e640a 100644 --- a/drivers/gpu/drm/amd/amdgpu/nbio_v7_7.c +++ b/drivers/gpu/drm/amd/amdgpu/nbio_v7_7.c @@ -254,7 +254,7 @@ static void nbio_v7_7_update_medium_grain_clock_gating(struct amdgpu_device *ade { uint32_t def, data; - if (enable && !(adev->cg_flags & AMD_CG_SUPPORT_BIF_MGCG)) + if (!(adev->cg_flags & AMD_CG_SUPPORT_BIF_MGCG)) return; def = data = RREG32_SOC15(NBIO, 0, regBIF0_CPM_CONTROL); @@ -283,7 +283,7 @@ static void nbio_v7_7_update_medium_grain_light_sleep(struct amdgpu_device *adev { uint32_t def, data; - if (enable && !(adev->cg_flags & AMD_CG_SUPPORT_BIF_LS)) + if (!(adev->cg_flags & AMD_CG_SUPPORT_BIF_LS)) return; def = data = RREG32_SOC15(NBIO, 0, regBIF0_PCIE_CNTL2); diff --git a/drivers/gpu/drm/amd/amdgpu/nbio_v7_9.c b/drivers/gpu/drm/amd/amdgpu/nbio_v7_9.c index eccb006e78aa..40d1e209eab7 100644 --- a/drivers/gpu/drm/amd/amdgpu/nbio_v7_9.c +++ b/drivers/gpu/drm/amd/amdgpu/nbio_v7_9.c @@ -35,15 +35,6 @@ /* Core 0 Port 0 counter */ #define smnPCIEP_NAK_COUNTER 0x1A340218 -#define smnPCIE_PERF_CNTL_TXCLK3 0x1A38021c -#define smnPCIE_PERF_CNTL_TXCLK7 0x1A380888 -#define smnPCIE_PERF_COUNT_CNTL 0x1A380200 -#define smnPCIE_PERF_COUNT0_TXCLK3 0x1A380220 -#define smnPCIE_PERF_COUNT0_TXCLK7 0x1A38088C -#define smnPCIE_PERF_COUNT0_UPVAL_TXCLK3 0x1A3808F8 -#define smnPCIE_PERF_COUNT0_UPVAL_TXCLK7 0x1A380918 - - static void nbio_v7_9_remap_hdp_registers(struct amdgpu_device *adev) { WREG32_SOC15(NBIO, 0, regBIF_BX0_REMAP_HDP_MEM_FLUSH_CNTL, @@ -56,8 +47,15 @@ static u32 nbio_v7_9_get_rev_id(struct amdgpu_device *adev) { u32 tmp; + tmp = IP_VERSION_SUBREV(amdgpu_ip_version_full(adev, NBIO_HWIP, 0)); + /* If it is VF or subrevision holds a non-zero value, that should be used */ + if (tmp || amdgpu_sriov_vf(adev)) + return tmp; + + /* If discovery subrev is not updated, use register version */ tmp = RREG32_SOC15(NBIO, 0, regRCC_STRAP0_RCC_DEV0_EPF0_STRAP0); - tmp = REG_GET_FIELD(tmp, RCC_STRAP0_RCC_DEV0_EPF0_STRAP0, STRAP_ATI_REV_ID_DEV0_F0); + tmp = REG_GET_FIELD(tmp, RCC_STRAP0_RCC_DEV0_EPF0_STRAP0, + STRAP_ATI_REV_ID_DEV0_F0); return tmp; } @@ -424,6 +422,12 @@ static void nbio_v7_9_init_registers(struct amdgpu_device *adev) u32 inst_mask; int i; + if (amdgpu_sriov_vf(adev)) + adev->rmmio_remap.reg_offset = + SOC15_REG_OFFSET( + NBIO, 0, + regBIF_BX_DEV0_EPF0_VF0_HDP_MEM_COHERENCY_FLUSH_CNTL) + << 2; WREG32_SOC15(NBIO, 0, regXCC_DOORBELL_FENCE, 0xff & ~(adev->gfx.xcc_mask)); @@ -471,59 +475,6 @@ static u64 nbio_v7_9_get_pcie_replay_count(struct amdgpu_device *adev) return (nak_r + nak_g); } -static void nbio_v7_9_get_pcie_usage(struct amdgpu_device *adev, uint64_t *count0, - uint64_t *count1) -{ - uint32_t perfctrrx = 0; - uint32_t perfctrtx = 0; - - /* This reports 0 on APUs, so return to avoid writing/reading registers - * that may or may not be different from their GPU counterparts - */ - if (adev->flags & AMD_IS_APU) - return; - - /* Use TXCLK3 counter group for rx event */ - /* Use TXCLK7 counter group for tx event */ - /* Set the 2 events that we wish to watch, defined above */ - /* 40 is event# for received msgs */ - /* 2 is event# of posted requests sent */ - perfctrrx = REG_SET_FIELD(perfctrrx, PCIE_PERF_CNTL_TXCLK3, EVENT0_SEL, 40); - perfctrtx = REG_SET_FIELD(perfctrtx, PCIE_PERF_CNTL_TXCLK7, EVENT0_SEL, 2); - - /* Write to enable desired perf counters */ - WREG32_PCIE(smnPCIE_PERF_CNTL_TXCLK3, perfctrrx); - WREG32_PCIE(smnPCIE_PERF_CNTL_TXCLK7, perfctrtx); - - /* Zero out and enable SHADOW_WR - * Write 0x6: - * Bit 1 = Global Shadow wr(1) - * Bit 2 = Global counter reset enable(1) - */ - WREG32_PCIE(smnPCIE_PERF_COUNT_CNTL, 0x00000006); - - /* Enable Gloabl Counter - * Write 0x1: - * Bit 0 = Global Counter Enable(1) - */ - WREG32_PCIE(smnPCIE_PERF_COUNT_CNTL, 0x00000001); - - msleep(1000); - - /* Disable Global Counter, Reset and enable SHADOW_WR - * Write 0x6: - * Bit 1 = Global Shadow wr(1) - * Bit 2 = Global counter reset enable(1) - */ - WREG32_PCIE(smnPCIE_PERF_COUNT_CNTL, 0x00000006); - - /* Get the upper and lower count */ - *count0 = RREG32_PCIE(smnPCIE_PERF_COUNT0_TXCLK3) | - ((uint64_t)RREG32_PCIE(smnPCIE_PERF_COUNT0_UPVAL_TXCLK3) << 32); - *count1 = RREG32_PCIE(smnPCIE_PERF_COUNT0_TXCLK7) | - ((uint64_t)RREG32_PCIE(smnPCIE_PERF_COUNT0_UPVAL_TXCLK7) << 32); -} - const struct amdgpu_nbio_funcs nbio_v7_9_funcs = { .get_hdp_flush_req_offset = nbio_v7_9_get_hdp_flush_req_offset, .get_hdp_flush_done_offset = nbio_v7_9_get_hdp_flush_done_offset, @@ -548,7 +499,6 @@ const struct amdgpu_nbio_funcs nbio_v7_9_funcs = { .get_memory_partition_mode = nbio_v7_9_get_memory_partition_mode, .init_registers = nbio_v7_9_init_registers, .get_pcie_replay_count = nbio_v7_9_get_pcie_replay_count, - .get_pcie_usage = nbio_v7_9_get_pcie_usage, }; static void nbio_v7_9_query_ras_error_count(struct amdgpu_device *adev, @@ -590,8 +540,7 @@ static void nbio_v7_9_handle_ras_controller_intr_no_bifring(struct amdgpu_device if (err_data.ce_count) dev_info(adev->dev, "%ld correctable hardware " - "errors detected in %s block, " - "no user action is needed.\n", + "errors detected in %s block\n", obj->err_data.ce_count, get_ras_block_str(adev->nbio.ras_if)); @@ -604,11 +553,6 @@ static void nbio_v7_9_handle_ras_controller_intr_no_bifring(struct amdgpu_device dev_info(adev->dev, "RAS controller interrupt triggered " "by NBIF error\n"); - - /* ras_controller_int is dedicated for nbif ras error, - * not the global interrupt for sync flood - */ - amdgpu_ras_reset_gpu(adev); } amdgpu_ras_error_data_fini(&err_data); diff --git a/drivers/gpu/drm/amd/amdgpu/nv.c b/drivers/gpu/drm/amd/amdgpu/nv.c index 1995c7459f20..4d7976b77767 100644 --- a/drivers/gpu/drm/amd/amdgpu/nv.c +++ b/drivers/gpu/drm/amd/amdgpu/nv.c @@ -513,11 +513,10 @@ static int nv_set_vce_clocks(struct amdgpu_device *adev, u32 evclk, u32 ecclk) static void nv_program_aspm(struct amdgpu_device *adev) { - if (!amdgpu_device_should_use_aspm(adev) || !amdgpu_device_aspm_support_quirk()) + if (!amdgpu_device_should_use_aspm(adev)) return; - if (!(adev->flags & AMD_IS_APU) && - (adev->nbio.funcs->program_aspm)) + if (adev->nbio.funcs->program_aspm) adev->nbio.funcs->program_aspm(adev); } @@ -609,9 +608,8 @@ static int nv_update_umd_stable_pstate(struct amdgpu_device *adev, if (adev->gfx.funcs->update_perfmon_mgcg) adev->gfx.funcs->update_perfmon_mgcg(adev, !enter); - if (!(adev->flags & AMD_IS_APU) && - (adev->nbio.funcs->enable_aspm) && - amdgpu_device_should_use_aspm(adev)) + if (adev->nbio.funcs->enable_aspm && + amdgpu_device_should_use_aspm(adev)) adev->nbio.funcs->enable_aspm(adev, !enter); return 0; diff --git a/drivers/gpu/drm/amd/amdgpu/psp_gfx_if.h b/drivers/gpu/drm/amd/amdgpu/psp_gfx_if.h index 4bb5e10217bb..7566973ed8f5 100644 --- a/drivers/gpu/drm/amd/amdgpu/psp_gfx_if.h +++ b/drivers/gpu/drm/amd/amdgpu/psp_gfx_if.h @@ -296,6 +296,7 @@ enum psp_gfx_fw_type { GFX_FW_TYPE_VPEC_FW1 = 100, /* VPEC FW1 To Save VPE */ GFX_FW_TYPE_VPEC_FW2 = 101, /* VPEC FW2 To Save VPE */ GFX_FW_TYPE_VPE = 102, + GFX_FW_TYPE_JPEG_RAM = 128, /**< JPEG Command buffer */ GFX_FW_TYPE_P2S_TABLE = 129, GFX_FW_TYPE_MAX }; diff --git a/drivers/gpu/drm/amd/amdgpu/psp_v11_0.c b/drivers/gpu/drm/amd/amdgpu/psp_v11_0.c index efa37e3b7931..2395f1856962 100644 --- a/drivers/gpu/drm/amd/amdgpu/psp_v11_0.c +++ b/drivers/gpu/drm/amd/amdgpu/psp_v11_0.c @@ -506,7 +506,7 @@ static int psp_v11_0_memory_training(struct psp_context *psp, uint32_t ops) * before training, and restore it after training to avoid * VRAM corruption. */ - sz = GDDR6_MEM_TRAINING_ENCROACHED_SIZE; + sz = BIST_MEM_TRAINING_ENCROACHED_SIZE; if (adev->gmc.visible_vram_size < sz || !adev->mman.aper_base_kaddr) { DRM_ERROR("visible_vram_size %llx or aper_base_kaddr %p is not initialized.\n", diff --git a/drivers/gpu/drm/amd/amdgpu/psp_v13_0.c b/drivers/gpu/drm/amd/amdgpu/psp_v13_0.c index 573046702861..0da50ea46eaf 100644 --- a/drivers/gpu/drm/amd/amdgpu/psp_v13_0.c +++ b/drivers/gpu/drm/amd/amdgpu/psp_v13_0.c @@ -27,6 +27,7 @@ #include "amdgpu_ucode.h" #include "soc15_common.h" #include "psp_v13_0.h" +#include "amdgpu_ras.h" #include "mp/mp_13_0_2_offset.h" #include "mp/mp_13_0_2_sh_mask.h" @@ -52,6 +53,8 @@ MODULE_FIRMWARE("amdgpu/psp_13_0_6_sos.bin"); MODULE_FIRMWARE("amdgpu/psp_13_0_6_ta.bin"); MODULE_FIRMWARE("amdgpu/psp_14_0_0_toc.bin"); MODULE_FIRMWARE("amdgpu/psp_14_0_0_ta.bin"); +MODULE_FIRMWARE("amdgpu/psp_14_0_1_toc.bin"); +MODULE_FIRMWARE("amdgpu/psp_14_0_1_ta.bin"); /* For large FW files the time to complete can be very long */ #define USBC_PD_POLLING_LIMIT_S 240 @@ -60,7 +63,7 @@ MODULE_FIRMWARE("amdgpu/psp_14_0_0_ta.bin"); #define GFX_CMD_USB_PD_USE_LFB 0x480 /* Retry times for vmbx ready wait */ -#define PSP_VMBX_POLLING_LIMIT 20000 +#define PSP_VMBX_POLLING_LIMIT 3000 /* VBIOS gfl defines */ #define MBOX_READY_MASK 0x80000000 @@ -100,6 +103,7 @@ static int psp_v13_0_init_microcode(struct psp_context *psp) case IP_VERSION(13, 0, 8): case IP_VERSION(13, 0, 11): case IP_VERSION(14, 0, 0): + case IP_VERSION(14, 0, 1): err = psp_init_toc_microcode(psp, ucode_prefix); if (err) return err; @@ -161,14 +165,18 @@ static int psp_v13_0_wait_for_vmbx_ready(struct psp_context *psp) static int psp_v13_0_wait_for_bootloader(struct psp_context *psp) { struct amdgpu_device *adev = psp->adev; - int retry_loop, ret; + int retry_loop, retry_cnt, ret; + retry_cnt = + (amdgpu_ip_version(adev, MP0_HWIP, 0) == IP_VERSION(13, 0, 6)) ? + PSP_VMBX_POLLING_LIMIT : + 10; /* Wait for bootloader to signify that it is ready having bit 31 of * C2PMSG_35 set to 1. All other bits are expected to be cleared. * If there is an error in processing command, bits[7:0] will be set. * This is applicable for PSP v13.0.6 and newer. */ - for (retry_loop = 0; retry_loop < 10; retry_loop++) { + for (retry_loop = 0; retry_loop < retry_cnt; retry_loop++) { ret = psp_wait_for( psp, SOC15_REG_OFFSET(MP0, 0, regMP0_SMN_C2PMSG_35), 0x80000000, 0xffffffff, false); @@ -183,11 +191,18 @@ static int psp_v13_0_wait_for_bootloader(struct psp_context *psp) static int psp_v13_0_wait_for_bootloader_steady_state(struct psp_context *psp) { struct amdgpu_device *adev = psp->adev; + int ret; if (amdgpu_ip_version(adev, MP0_HWIP, 0) == IP_VERSION(13, 0, 6)) { - psp_v13_0_wait_for_vmbx_ready(psp); + ret = psp_v13_0_wait_for_vmbx_ready(psp); + if (ret) + amdgpu_ras_query_boot_status(adev, 4); - return psp_v13_0_wait_for_bootloader(psp); + ret = psp_v13_0_wait_for_bootloader(psp); + if (ret) + amdgpu_ras_query_boot_status(adev, 4); + + return ret; } return 0; @@ -549,7 +564,7 @@ static int psp_v13_0_memory_training(struct psp_context *psp, uint32_t ops) * before training, and restore it after training to avoid * VRAM corruption. */ - sz = GDDR6_MEM_TRAINING_ENCROACHED_SIZE; + sz = BIST_MEM_TRAINING_ENCROACHED_SIZE; if (adev->gmc.visible_vram_size < sz || !adev->mman.aper_base_kaddr) { dev_err(adev->dev, "visible_vram_size %llx or aper_base_kaddr %p is not initialized.\n", @@ -759,6 +774,30 @@ static int psp_v13_0_fatal_error_recovery_quirk(struct psp_context *psp) return 0; } +static bool psp_v13_0_get_ras_capability(struct psp_context *psp) +{ + struct amdgpu_device *adev = psp->adev; + struct amdgpu_ras *con = amdgpu_ras_get_context(adev); + u32 reg_data; + + /* query ras cap should be done from host side */ + if (amdgpu_sriov_vf(adev)) + return false; + + if (!con) + return false; + + if ((amdgpu_ip_version(adev, MP0_HWIP, 0) == IP_VERSION(13, 0, 6)) && + (!(adev->flags & AMD_IS_APU))) { + reg_data = RREG32_SOC15(MP0, 0, regMP0_SMN_C2PMSG_127); + adev->ras_hw_enabled = (reg_data & GENMASK_ULL(23, 0)); + con->poison_supported = ((reg_data & GENMASK_ULL(24, 24)) >> 24) ? true : false; + return true; + } else { + return false; + } +} + static const struct psp_funcs psp_v13_0_funcs = { .init_microcode = psp_v13_0_init_microcode, .wait_for_bootloader = psp_v13_0_wait_for_bootloader_steady_state, @@ -781,6 +820,7 @@ static const struct psp_funcs psp_v13_0_funcs = { .update_spirom = psp_v13_0_update_spirom, .vbflash_stat = psp_v13_0_vbflash_status, .fatal_error_recovery_quirk = psp_v13_0_fatal_error_recovery_quirk, + .get_ras_capability = psp_v13_0_get_ras_capability, }; void psp_v13_0_set_psp_funcs(struct psp_context *psp) diff --git a/drivers/gpu/drm/amd/amdgpu/psp_v14_0.c b/drivers/gpu/drm/amd/amdgpu/psp_v14_0.c new file mode 100644 index 000000000000..f08a32c18694 --- /dev/null +++ b/drivers/gpu/drm/amd/amdgpu/psp_v14_0.c @@ -0,0 +1,678 @@ +/* + * Copyright 2023 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + */ +#include <drm/drm_drv.h> +#include <linux/vmalloc.h> +#include "amdgpu.h" +#include "amdgpu_psp.h" +#include "amdgpu_ucode.h" +#include "soc15_common.h" +#include "psp_v14_0.h" + +#include "mp/mp_14_0_2_offset.h" +#include "mp/mp_14_0_2_sh_mask.h" + +MODULE_FIRMWARE("amdgpu/psp_14_0_2_sos.bin"); +MODULE_FIRMWARE("amdgpu/psp_14_0_3_sos.bin"); + +/* For large FW files the time to complete can be very long */ +#define USBC_PD_POLLING_LIMIT_S 240 + +/* Read USB-PD from LFB */ +#define GFX_CMD_USB_PD_USE_LFB 0x480 + +/* VBIOS gfl defines */ +#define MBOX_READY_MASK 0x80000000 +#define MBOX_STATUS_MASK 0x0000FFFF +#define MBOX_COMMAND_MASK 0x00FF0000 +#define MBOX_READY_FLAG 0x80000000 +#define C2PMSG_CMD_SPI_UPDATE_ROM_IMAGE_ADDR_LO 0x2 +#define C2PMSG_CMD_SPI_UPDATE_ROM_IMAGE_ADDR_HI 0x3 +#define C2PMSG_CMD_SPI_UPDATE_FLASH_IMAGE 0x4 + +/* memory training timeout define */ +#define MEM_TRAIN_SEND_MSG_TIMEOUT_US 3000000 + +static int psp_v14_0_init_microcode(struct psp_context *psp) +{ + struct amdgpu_device *adev = psp->adev; + char ucode_prefix[30]; + int err = 0; + + amdgpu_ucode_ip_version_decode(adev, MP0_HWIP, ucode_prefix, sizeof(ucode_prefix)); + + switch (amdgpu_ip_version(adev, MP0_HWIP, 0)) { + case IP_VERSION(14, 0, 2): + case IP_VERSION(14, 0, 3): + err = psp_init_sos_microcode(psp, ucode_prefix); + if (err) + return err; + break; + default: + BUG(); + } + + return 0; +} + +static bool psp_v14_0_is_sos_alive(struct psp_context *psp) +{ + struct amdgpu_device *adev = psp->adev; + uint32_t sol_reg; + + sol_reg = RREG32_SOC15(MP0, 0, regMPASP_SMN_C2PMSG_81); + + return sol_reg != 0x0; +} + +static int psp_v14_0_wait_for_bootloader(struct psp_context *psp) +{ + struct amdgpu_device *adev = psp->adev; + + int ret; + int retry_loop; + + for (retry_loop = 0; retry_loop < 10; retry_loop++) { + /* Wait for bootloader to signify that is + ready having bit 31 of C2PMSG_35 set to 1 */ + ret = psp_wait_for(psp, + SOC15_REG_OFFSET(MP0, 0, regMPASP_SMN_C2PMSG_35), + 0x80000000, + 0x80000000, + false); + + if (ret == 0) + return 0; + } + + return ret; +} + +static int psp_v14_0_bootloader_load_component(struct psp_context *psp, + struct psp_bin_desc *bin_desc, + enum psp_bootloader_cmd bl_cmd) +{ + int ret; + uint32_t psp_gfxdrv_command_reg = 0; + struct amdgpu_device *adev = psp->adev; + + /* Check tOS sign of life register to confirm sys driver and sOS + * are already been loaded. + */ + if (psp_v14_0_is_sos_alive(psp)) + return 0; + + ret = psp_v14_0_wait_for_bootloader(psp); + if (ret) + return ret; + + memset(psp->fw_pri_buf, 0, PSP_1_MEG); + + /* Copy PSP KDB binary to memory */ + memcpy(psp->fw_pri_buf, bin_desc->start_addr, bin_desc->size_bytes); + + /* Provide the PSP KDB to bootloader */ + WREG32_SOC15(MP0, 0, regMPASP_SMN_C2PMSG_36, + (uint32_t)(psp->fw_pri_mc_addr >> 20)); + psp_gfxdrv_command_reg = bl_cmd; + WREG32_SOC15(MP0, 0, regMPASP_SMN_C2PMSG_35, + psp_gfxdrv_command_reg); + + ret = psp_v14_0_wait_for_bootloader(psp); + + return ret; +} + +static int psp_v14_0_bootloader_load_kdb(struct psp_context *psp) +{ + return psp_v14_0_bootloader_load_component(psp, &psp->kdb, PSP_BL__LOAD_KEY_DATABASE); +} + +static int psp_v14_0_bootloader_load_spl(struct psp_context *psp) +{ + return psp_v14_0_bootloader_load_component(psp, &psp->kdb, PSP_BL__LOAD_TOS_SPL_TABLE); +} + +static int psp_v14_0_bootloader_load_sysdrv(struct psp_context *psp) +{ + return psp_v14_0_bootloader_load_component(psp, &psp->sys, PSP_BL__LOAD_SYSDRV); +} + +static int psp_v14_0_bootloader_load_soc_drv(struct psp_context *psp) +{ + return psp_v14_0_bootloader_load_component(psp, &psp->soc_drv, PSP_BL__LOAD_SOCDRV); +} + +static int psp_v14_0_bootloader_load_intf_drv(struct psp_context *psp) +{ + return psp_v14_0_bootloader_load_component(psp, &psp->intf_drv, PSP_BL__LOAD_INTFDRV); +} + +static int psp_v14_0_bootloader_load_dbg_drv(struct psp_context *psp) +{ + /* dbg_drv was renamed to had_drv in psp v14 */ + return psp_v14_0_bootloader_load_component(psp, &psp->dbg_drv, PSP_BL__LOAD_HADDRV); +} + +static int psp_v14_0_bootloader_load_ras_drv(struct psp_context *psp) +{ + return psp_v14_0_bootloader_load_component(psp, &psp->ras_drv, PSP_BL__LOAD_RASDRV); +} + +static int psp_v14_0_bootloader_load_ipkeymgr_drv(struct psp_context *psp) +{ + return psp_v14_0_bootloader_load_component(psp, &psp->ipkeymgr_drv, PSP_BL__LOAD_IPKEYMGRDRV); +} + +static int psp_v14_0_bootloader_load_sos(struct psp_context *psp) +{ + int ret; + unsigned int psp_gfxdrv_command_reg = 0; + struct amdgpu_device *adev = psp->adev; + + /* Check sOS sign of life register to confirm sys driver and sOS + * are already been loaded. + */ + if (psp_v14_0_is_sos_alive(psp)) + return 0; + + ret = psp_v14_0_wait_for_bootloader(psp); + if (ret) + return ret; + + memset(psp->fw_pri_buf, 0, PSP_1_MEG); + + /* Copy Secure OS binary to PSP memory */ + memcpy(psp->fw_pri_buf, psp->sos.start_addr, psp->sos.size_bytes); + + /* Provide the PSP secure OS to bootloader */ + WREG32_SOC15(MP0, 0, regMPASP_SMN_C2PMSG_36, + (uint32_t)(psp->fw_pri_mc_addr >> 20)); + psp_gfxdrv_command_reg = PSP_BL__LOAD_SOSDRV; + WREG32_SOC15(MP0, 0, regMPASP_SMN_C2PMSG_35, + psp_gfxdrv_command_reg); + + /* there might be handshake issue with hardware which needs delay */ + mdelay(20); + ret = psp_wait_for(psp, SOC15_REG_OFFSET(MP0, 0, regMPASP_SMN_C2PMSG_81), + RREG32_SOC15(MP0, 0, regMPASP_SMN_C2PMSG_81), + 0, true); + + return ret; +} + +static int psp_v14_0_ring_stop(struct psp_context *psp, + enum psp_ring_type ring_type) +{ + int ret = 0; + struct amdgpu_device *adev = psp->adev; + + if (amdgpu_sriov_vf(adev)) { + /* Write the ring destroy command*/ + WREG32_SOC15(MP0, 0, regMPASP_SMN_C2PMSG_101, + GFX_CTRL_CMD_ID_DESTROY_GPCOM_RING); + /* there might be handshake issue with hardware which needs delay */ + mdelay(20); + /* Wait for response flag (bit 31) */ + ret = psp_wait_for(psp, SOC15_REG_OFFSET(MP0, 0, regMPASP_SMN_C2PMSG_101), + 0x80000000, 0x80000000, false); + } else { + /* Write the ring destroy command*/ + WREG32_SOC15(MP0, 0, regMPASP_SMN_C2PMSG_64, + GFX_CTRL_CMD_ID_DESTROY_RINGS); + /* there might be handshake issue with hardware which needs delay */ + mdelay(20); + /* Wait for response flag (bit 31) */ + ret = psp_wait_for(psp, SOC15_REG_OFFSET(MP0, 0, regMPASP_SMN_C2PMSG_64), + 0x80000000, 0x80000000, false); + } + + return ret; +} + +static int psp_v14_0_ring_create(struct psp_context *psp, + enum psp_ring_type ring_type) +{ + int ret = 0; + unsigned int psp_ring_reg = 0; + struct psp_ring *ring = &psp->km_ring; + struct amdgpu_device *adev = psp->adev; + + if (amdgpu_sriov_vf(adev)) { + ret = psp_v14_0_ring_stop(psp, ring_type); + if (ret) { + DRM_ERROR("psp_v14_0_ring_stop_sriov failed!\n"); + return ret; + } + + /* Write low address of the ring to C2PMSG_102 */ + psp_ring_reg = lower_32_bits(ring->ring_mem_mc_addr); + WREG32_SOC15(MP0, 0, regMPASP_SMN_C2PMSG_102, psp_ring_reg); + /* Write high address of the ring to C2PMSG_103 */ + psp_ring_reg = upper_32_bits(ring->ring_mem_mc_addr); + WREG32_SOC15(MP0, 0, regMPASP_SMN_C2PMSG_103, psp_ring_reg); + + /* Write the ring initialization command to C2PMSG_101 */ + WREG32_SOC15(MP0, 0, regMPASP_SMN_C2PMSG_101, + GFX_CTRL_CMD_ID_INIT_GPCOM_RING); + + /* there might be handshake issue with hardware which needs delay */ + mdelay(20); + + /* Wait for response flag (bit 31) in C2PMSG_101 */ + ret = psp_wait_for(psp, SOC15_REG_OFFSET(MP0, 0, regMPASP_SMN_C2PMSG_101), + 0x80000000, 0x8000FFFF, false); + + } else { + /* Wait for sOS ready for ring creation */ + ret = psp_wait_for(psp, SOC15_REG_OFFSET(MP0, 0, regMPASP_SMN_C2PMSG_64), + 0x80000000, 0x80000000, false); + if (ret) { + DRM_ERROR("Failed to wait for trust OS ready for ring creation\n"); + return ret; + } + + /* Write low address of the ring to C2PMSG_69 */ + psp_ring_reg = lower_32_bits(ring->ring_mem_mc_addr); + WREG32_SOC15(MP0, 0, regMPASP_SMN_C2PMSG_69, psp_ring_reg); + /* Write high address of the ring to C2PMSG_70 */ + psp_ring_reg = upper_32_bits(ring->ring_mem_mc_addr); + WREG32_SOC15(MP0, 0, regMPASP_SMN_C2PMSG_70, psp_ring_reg); + /* Write size of ring to C2PMSG_71 */ + psp_ring_reg = ring->ring_size; + WREG32_SOC15(MP0, 0, regMPASP_SMN_C2PMSG_71, psp_ring_reg); + /* Write the ring initialization command to C2PMSG_64 */ + psp_ring_reg = ring_type; + psp_ring_reg = psp_ring_reg << 16; + WREG32_SOC15(MP0, 0, regMPASP_SMN_C2PMSG_64, psp_ring_reg); + + /* there might be handshake issue with hardware which needs delay */ + mdelay(20); + + /* Wait for response flag (bit 31) in C2PMSG_64 */ + ret = psp_wait_for(psp, SOC15_REG_OFFSET(MP0, 0, regMPASP_SMN_C2PMSG_64), + 0x80000000, 0x8000FFFF, false); + } + + return ret; +} + +static int psp_v14_0_ring_destroy(struct psp_context *psp, + enum psp_ring_type ring_type) +{ + int ret = 0; + struct psp_ring *ring = &psp->km_ring; + struct amdgpu_device *adev = psp->adev; + + ret = psp_v14_0_ring_stop(psp, ring_type); + if (ret) + DRM_ERROR("Fail to stop psp ring\n"); + + amdgpu_bo_free_kernel(&adev->firmware.rbuf, + &ring->ring_mem_mc_addr, + (void **)&ring->ring_mem); + + return ret; +} + +static uint32_t psp_v14_0_ring_get_wptr(struct psp_context *psp) +{ + uint32_t data; + struct amdgpu_device *adev = psp->adev; + + if (amdgpu_sriov_vf(adev)) + data = RREG32_SOC15(MP0, 0, regMPASP_SMN_C2PMSG_102); + else + data = RREG32_SOC15(MP0, 0, regMPASP_SMN_C2PMSG_67); + + return data; +} + +static void psp_v14_0_ring_set_wptr(struct psp_context *psp, uint32_t value) +{ + struct amdgpu_device *adev = psp->adev; + + if (amdgpu_sriov_vf(adev)) { + WREG32_SOC15(MP0, 0, regMPASP_SMN_C2PMSG_102, value); + WREG32_SOC15(MP0, 0, regMPASP_SMN_C2PMSG_101, + GFX_CTRL_CMD_ID_CONSUME_CMD); + } else + WREG32_SOC15(MP0, 0, regMPASP_SMN_C2PMSG_67, value); +} + +static int psp_v14_0_memory_training_send_msg(struct psp_context *psp, int msg) +{ + int ret; + int i; + uint32_t data_32; + int max_wait; + struct amdgpu_device *adev = psp->adev; + + data_32 = (psp->mem_train_ctx.c2p_train_data_offset >> 20); + WREG32_SOC15(MP0, 0, regMPASP_SMN_C2PMSG_36, data_32); + WREG32_SOC15(MP0, 0, regMPASP_SMN_C2PMSG_35, msg); + + max_wait = MEM_TRAIN_SEND_MSG_TIMEOUT_US / adev->usec_timeout; + for (i = 0; i < max_wait; i++) { + ret = psp_wait_for(psp, SOC15_REG_OFFSET(MP0, 0, regMPASP_SMN_C2PMSG_35), + 0x80000000, 0x80000000, false); + if (ret == 0) + break; + } + if (i < max_wait) + ret = 0; + else + ret = -ETIME; + + dev_dbg(adev->dev, "training %s %s, cost %d @ %d ms\n", + (msg == PSP_BL__DRAM_SHORT_TRAIN) ? "short" : "long", + (ret == 0) ? "succeed" : "failed", + i, adev->usec_timeout/1000); + return ret; +} + + +static int psp_v14_0_memory_training(struct psp_context *psp, uint32_t ops) +{ + struct psp_memory_training_context *ctx = &psp->mem_train_ctx; + uint32_t *pcache = (uint32_t *)ctx->sys_cache; + struct amdgpu_device *adev = psp->adev; + uint32_t p2c_header[4]; + uint32_t sz; + void *buf; + int ret, idx; + + if (ctx->init == PSP_MEM_TRAIN_NOT_SUPPORT) { + dev_dbg(adev->dev, "Memory training is not supported.\n"); + return 0; + } else if (ctx->init != PSP_MEM_TRAIN_INIT_SUCCESS) { + dev_err(adev->dev, "Memory training initialization failure.\n"); + return -EINVAL; + } + + if (psp_v14_0_is_sos_alive(psp)) { + dev_dbg(adev->dev, "SOS is alive, skip memory training.\n"); + return 0; + } + + amdgpu_device_vram_access(adev, ctx->p2c_train_data_offset, p2c_header, sizeof(p2c_header), false); + dev_dbg(adev->dev, "sys_cache[%08x,%08x,%08x,%08x] p2c_header[%08x,%08x,%08x,%08x]\n", + pcache[0], pcache[1], pcache[2], pcache[3], + p2c_header[0], p2c_header[1], p2c_header[2], p2c_header[3]); + + if (ops & PSP_MEM_TRAIN_SEND_SHORT_MSG) { + dev_dbg(adev->dev, "Short training depends on restore.\n"); + ops |= PSP_MEM_TRAIN_RESTORE; + } + + if ((ops & PSP_MEM_TRAIN_RESTORE) && + pcache[0] != MEM_TRAIN_SYSTEM_SIGNATURE) { + dev_dbg(adev->dev, "sys_cache[0] is invalid, restore depends on save.\n"); + ops |= PSP_MEM_TRAIN_SAVE; + } + + if (p2c_header[0] == MEM_TRAIN_SYSTEM_SIGNATURE && + !(pcache[0] == MEM_TRAIN_SYSTEM_SIGNATURE && + pcache[3] == p2c_header[3])) { + dev_dbg(adev->dev, "sys_cache is invalid or out-of-date, need save training data to sys_cache.\n"); + ops |= PSP_MEM_TRAIN_SAVE; + } + + if ((ops & PSP_MEM_TRAIN_SAVE) && + p2c_header[0] != MEM_TRAIN_SYSTEM_SIGNATURE) { + dev_dbg(adev->dev, "p2c_header[0] is invalid, save depends on long training.\n"); + ops |= PSP_MEM_TRAIN_SEND_LONG_MSG; + } + + if (ops & PSP_MEM_TRAIN_SEND_LONG_MSG) { + ops &= ~PSP_MEM_TRAIN_SEND_SHORT_MSG; + ops |= PSP_MEM_TRAIN_SAVE; + } + + dev_dbg(adev->dev, "Memory training ops:%x.\n", ops); + + if (ops & PSP_MEM_TRAIN_SEND_LONG_MSG) { + /* + * Long training will encroach a certain amount on the bottom of VRAM; + * save the content from the bottom of VRAM to system memory + * before training, and restore it after training to avoid + * VRAM corruption. + */ + sz = BIST_MEM_TRAINING_ENCROACHED_SIZE; + + if (adev->gmc.visible_vram_size < sz || !adev->mman.aper_base_kaddr) { + dev_err(adev->dev, "visible_vram_size %llx or aper_base_kaddr %p is not initialized.\n", + adev->gmc.visible_vram_size, + adev->mman.aper_base_kaddr); + return -EINVAL; + } + + buf = vmalloc(sz); + if (!buf) { + dev_err(adev->dev, "failed to allocate system memory.\n"); + return -ENOMEM; + } + + if (drm_dev_enter(adev_to_drm(adev), &idx)) { + memcpy_fromio(buf, adev->mman.aper_base_kaddr, sz); + ret = psp_v14_0_memory_training_send_msg(psp, PSP_BL__DRAM_LONG_TRAIN); + if (ret) { + DRM_ERROR("Send long training msg failed.\n"); + vfree(buf); + drm_dev_exit(idx); + return ret; + } + + memcpy_toio(adev->mman.aper_base_kaddr, buf, sz); + adev->hdp.funcs->flush_hdp(adev, NULL); + vfree(buf); + drm_dev_exit(idx); + } else { + vfree(buf); + return -ENODEV; + } + } + + if (ops & PSP_MEM_TRAIN_SAVE) { + amdgpu_device_vram_access(psp->adev, ctx->p2c_train_data_offset, ctx->sys_cache, ctx->train_data_size, false); + } + + if (ops & PSP_MEM_TRAIN_RESTORE) { + amdgpu_device_vram_access(psp->adev, ctx->c2p_train_data_offset, ctx->sys_cache, ctx->train_data_size, true); + } + + if (ops & PSP_MEM_TRAIN_SEND_SHORT_MSG) { + ret = psp_v14_0_memory_training_send_msg(psp, (amdgpu_force_long_training > 0) ? + PSP_BL__DRAM_LONG_TRAIN : PSP_BL__DRAM_SHORT_TRAIN); + if (ret) { + dev_err(adev->dev, "send training msg failed.\n"); + return ret; + } + } + ctx->training_cnt++; + return 0; +} + +static int psp_v14_0_load_usbc_pd_fw(struct psp_context *psp, uint64_t fw_pri_mc_addr) +{ + struct amdgpu_device *adev = psp->adev; + uint32_t reg_status; + int ret, i = 0; + + /* + * LFB address which is aligned to 1MB address and has to be + * right-shifted by 20 so that LFB address can be passed on a 32-bit C2P + * register + */ + WREG32_SOC15(MP0, 0, regMPASP_SMN_C2PMSG_36, (fw_pri_mc_addr >> 20)); + + ret = psp_wait_for(psp, SOC15_REG_OFFSET(MP0, 0, regMPASP_SMN_C2PMSG_35), + 0x80000000, 0x80000000, false); + if (ret) + return ret; + + /* Fireup interrupt so PSP can pick up the address */ + WREG32_SOC15(MP0, 0, regMPASP_SMN_C2PMSG_35, (GFX_CMD_USB_PD_USE_LFB << 16)); + + /* FW load takes very long time */ + do { + msleep(1000); + reg_status = RREG32_SOC15(MP0, 0, regMPASP_SMN_C2PMSG_35); + + if (reg_status & 0x80000000) + goto done; + + } while (++i < USBC_PD_POLLING_LIMIT_S); + + return -ETIME; +done: + + if ((reg_status & 0xFFFF) != 0) { + DRM_ERROR("Address load failed - MP0_SMN_C2PMSG_35.Bits [15:0] = %04x\n", + reg_status & 0xFFFF); + return -EIO; + } + + return 0; +} + +static int psp_v14_0_read_usbc_pd_fw(struct psp_context *psp, uint32_t *fw_ver) +{ + struct amdgpu_device *adev = psp->adev; + int ret; + + WREG32_SOC15(MP0, 0, regMPASP_SMN_C2PMSG_35, C2PMSG_CMD_GFX_USB_PD_FW_VER); + + ret = psp_wait_for(psp, SOC15_REG_OFFSET(MP0, 0, regMPASP_SMN_C2PMSG_35), + 0x80000000, 0x80000000, false); + if (!ret) + *fw_ver = RREG32_SOC15(MP0, 0, regMPASP_SMN_C2PMSG_36); + + return ret; +} + +static int psp_v14_0_exec_spi_cmd(struct psp_context *psp, int cmd) +{ + uint32_t reg_status = 0, reg_val = 0; + struct amdgpu_device *adev = psp->adev; + int ret; + + /* clear MBX ready (MBOX_READY_MASK bit is 0) and set update command */ + reg_val |= (cmd << 16); + WREG32_SOC15(MP0, 0, regMPASP_SMN_C2PMSG_115, reg_val); + + /* Ring the doorbell */ + WREG32_SOC15(MP0, 0, regMPASP_SMN_C2PMSG_73, 1); + + if (cmd == C2PMSG_CMD_SPI_UPDATE_FLASH_IMAGE) + ret = psp_wait_for_spirom_update(psp, SOC15_REG_OFFSET(MP0, 0, regMPASP_SMN_C2PMSG_115), + MBOX_READY_FLAG, MBOX_READY_MASK, PSP_SPIROM_UPDATE_TIMEOUT); + else + ret = psp_wait_for(psp, SOC15_REG_OFFSET(MP0, 0, regMPASP_SMN_C2PMSG_115), + MBOX_READY_FLAG, MBOX_READY_MASK, false); + + ret = psp_wait_for(psp, SOC15_REG_OFFSET(MP0, 0, regMPASP_SMN_C2PMSG_115), + MBOX_READY_FLAG, MBOX_READY_MASK, false); + if (ret) { + dev_err(adev->dev, "SPI cmd %x timed out, ret = %d", cmd, ret); + return ret; + } + + reg_status = RREG32_SOC15(MP0, 0, regMPASP_SMN_C2PMSG_115); + if ((reg_status & 0xFFFF) != 0) { + dev_err(adev->dev, "SPI cmd %x failed, fail status = %04x\n", + cmd, reg_status & 0xFFFF); + return -EIO; + } + + return 0; +} + +static int psp_v14_0_update_spirom(struct psp_context *psp, + uint64_t fw_pri_mc_addr) +{ + struct amdgpu_device *adev = psp->adev; + int ret; + + /* Confirm PSP is ready to start */ + ret = psp_wait_for(psp, SOC15_REG_OFFSET(MP0, 0, regMPASP_SMN_C2PMSG_115), + MBOX_READY_FLAG, MBOX_READY_MASK, false); + if (ret) { + dev_err(adev->dev, "PSP Not ready to start processing, ret = %d", ret); + return ret; + } + + WREG32_SOC15(MP0, 0, regMPASP_SMN_C2PMSG_116, lower_32_bits(fw_pri_mc_addr)); + + ret = psp_v14_0_exec_spi_cmd(psp, C2PMSG_CMD_SPI_UPDATE_ROM_IMAGE_ADDR_LO); + if (ret) + return ret; + + WREG32_SOC15(MP0, 0, regMPASP_SMN_C2PMSG_116, upper_32_bits(fw_pri_mc_addr)); + + ret = psp_v14_0_exec_spi_cmd(psp, C2PMSG_CMD_SPI_UPDATE_ROM_IMAGE_ADDR_HI); + if (ret) + return ret; + + psp->vbflash_done = true; + + ret = psp_v14_0_exec_spi_cmd(psp, C2PMSG_CMD_SPI_UPDATE_FLASH_IMAGE); + if (ret) + return ret; + + return 0; +} + +static int psp_v14_0_vbflash_status(struct psp_context *psp) +{ + struct amdgpu_device *adev = psp->adev; + + return RREG32_SOC15(MP0, 0, regMPASP_SMN_C2PMSG_115); +} + +static const struct psp_funcs psp_v14_0_funcs = { + .init_microcode = psp_v14_0_init_microcode, + .bootloader_load_kdb = psp_v14_0_bootloader_load_kdb, + .bootloader_load_spl = psp_v14_0_bootloader_load_spl, + .bootloader_load_sysdrv = psp_v14_0_bootloader_load_sysdrv, + .bootloader_load_soc_drv = psp_v14_0_bootloader_load_soc_drv, + .bootloader_load_intf_drv = psp_v14_0_bootloader_load_intf_drv, + .bootloader_load_dbg_drv = psp_v14_0_bootloader_load_dbg_drv, + .bootloader_load_ras_drv = psp_v14_0_bootloader_load_ras_drv, + .bootloader_load_ipkeymgr_drv = psp_v14_0_bootloader_load_ipkeymgr_drv, + .bootloader_load_sos = psp_v14_0_bootloader_load_sos, + .ring_create = psp_v14_0_ring_create, + .ring_stop = psp_v14_0_ring_stop, + .ring_destroy = psp_v14_0_ring_destroy, + .ring_get_wptr = psp_v14_0_ring_get_wptr, + .ring_set_wptr = psp_v14_0_ring_set_wptr, + .mem_training = psp_v14_0_memory_training, + .load_usbc_pd_fw = psp_v14_0_load_usbc_pd_fw, + .read_usbc_pd_fw = psp_v14_0_read_usbc_pd_fw, + .update_spirom = psp_v14_0_update_spirom, + .vbflash_stat = psp_v14_0_vbflash_status +}; + +void psp_v14_0_set_psp_funcs(struct psp_context *psp) +{ + psp->funcs = &psp_v14_0_funcs; +} diff --git a/drivers/gpu/drm/amd/amdgpu/psp_v14_0.h b/drivers/gpu/drm/amd/amdgpu/psp_v14_0.h new file mode 100644 index 000000000000..dd18ba2cfad5 --- /dev/null +++ b/drivers/gpu/drm/amd/amdgpu/psp_v14_0.h @@ -0,0 +1,32 @@ +/* + * Copyright 2023 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + */ +#ifndef __PSP_V14_0_H__ +#define __PSP_V14_0_H__ + +#include "amdgpu_psp.h" + +#define PSP_SPIROM_UPDATE_TIMEOUT 60000 /* 60s */ + +void psp_v14_0_set_psp_funcs(struct psp_context *psp); + +#endif diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v2_4.c b/drivers/gpu/drm/amd/amdgpu/sdma_v2_4.c index b58a13bd75db..07e19caf2bc1 100644 --- a/drivers/gpu/drm/amd/amdgpu/sdma_v2_4.c +++ b/drivers/gpu/drm/amd/amdgpu/sdma_v2_4.c @@ -57,22 +57,19 @@ static void sdma_v2_4_set_irq_funcs(struct amdgpu_device *adev); MODULE_FIRMWARE("amdgpu/topaz_sdma.bin"); MODULE_FIRMWARE("amdgpu/topaz_sdma1.bin"); -static const u32 sdma_offsets[SDMA_MAX_INSTANCE] = -{ +static const u32 sdma_offsets[SDMA_MAX_INSTANCE] = { SDMA0_REGISTER_OFFSET, SDMA1_REGISTER_OFFSET }; -static const u32 golden_settings_iceland_a11[] = -{ +static const u32 golden_settings_iceland_a11[] = { mmSDMA0_CHICKEN_BITS, 0xfc910007, 0x00810007, mmSDMA0_CLK_CTRL, 0xff000fff, 0x00000000, mmSDMA1_CHICKEN_BITS, 0xfc910007, 0x00810007, mmSDMA1_CLK_CTRL, 0xff000fff, 0x00000000, }; -static const u32 iceland_mgcg_cgcg_init[] = -{ +static const u32 iceland_mgcg_cgcg_init[] = { mmSDMA0_CLK_CTRL, 0xff000ff0, 0x00000100, mmSDMA1_CLK_CTRL, 0xff000ff0, 0x00000100 }; @@ -142,7 +139,8 @@ static int sdma_v2_4_init_microcode(struct amdgpu_device *adev) case CHIP_TOPAZ: chip_name = "topaz"; break; - default: BUG(); + default: + BUG(); } for (i = 0; i < adev->sdma.num_instances; i++) { @@ -339,8 +337,6 @@ static void sdma_v2_4_gfx_stop(struct amdgpu_device *adev) u32 rb_cntl, ib_cntl; int i; - amdgpu_sdma_unset_buffer_funcs_helper(adev); - for (i = 0; i < adev->sdma.num_instances; i++) { rb_cntl = RREG32(mmSDMA0_GFX_RB_CNTL + sdma_offsets[i]); rb_cntl = REG_SET_FIELD(rb_cntl, SDMA0_GFX_RB_CNTL, RB_ENABLE, 0); @@ -474,9 +470,6 @@ static int sdma_v2_4_gfx_resume(struct amdgpu_device *adev) r = amdgpu_ring_test_helper(ring); if (r) return r; - - if (adev->mman.buffer_funcs_ring == ring) - amdgpu_ttm_set_buffer_funcs_status(adev, true); } return 0; @@ -818,12 +811,12 @@ static int sdma_v2_4_early_init(void *handle) struct amdgpu_device *adev = (struct amdgpu_device *)handle; int r; + adev->sdma.num_instances = SDMA_MAX_INSTANCE; + r = sdma_v2_4_init_microcode(adev); if (r) return r; - adev->sdma.num_instances = SDMA_MAX_INSTANCE; - sdma_v2_4_set_ring_funcs(adev); sdma_v2_4_set_buffer_funcs(adev); sdma_v2_4_set_vm_pte_funcs(adev); @@ -1263,8 +1256,7 @@ static void sdma_v2_4_set_vm_pte_funcs(struct amdgpu_device *adev) adev->vm_manager.vm_pte_num_scheds = adev->sdma.num_instances; } -const struct amdgpu_ip_block_version sdma_v2_4_ip_block = -{ +const struct amdgpu_ip_block_version sdma_v2_4_ip_block = { .type = AMD_IP_BLOCK_TYPE_SDMA, .major = 2, .minor = 4, diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v3_0.c b/drivers/gpu/drm/amd/amdgpu/sdma_v3_0.c index c5ea32687eb5..2ad615be4bb3 100644 --- a/drivers/gpu/drm/amd/amdgpu/sdma_v3_0.c +++ b/drivers/gpu/drm/amd/amdgpu/sdma_v3_0.c @@ -513,8 +513,6 @@ static void sdma_v3_0_gfx_stop(struct amdgpu_device *adev) u32 rb_cntl, ib_cntl; int i; - amdgpu_sdma_unset_buffer_funcs_helper(adev); - for (i = 0; i < adev->sdma.num_instances; i++) { rb_cntl = RREG32(mmSDMA0_GFX_RB_CNTL + sdma_offsets[i]); rb_cntl = REG_SET_FIELD(rb_cntl, SDMA0_GFX_RB_CNTL, RB_ENABLE, 0); @@ -746,9 +744,6 @@ static int sdma_v3_0_gfx_resume(struct amdgpu_device *adev) r = amdgpu_ring_test_helper(ring); if (r) return r; - - if (adev->mman.buffer_funcs_ring == ring) - amdgpu_ttm_set_buffer_funcs_status(adev, true); } return 0; diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c b/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c index dff66e1ae7ea..43775cb67ff5 100644 --- a/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c +++ b/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c @@ -877,8 +877,6 @@ static void sdma_v4_0_gfx_enable(struct amdgpu_device *adev, bool enable) u32 rb_cntl, ib_cntl; int i; - amdgpu_sdma_unset_buffer_funcs_helper(adev); - for (i = 0; i < adev->sdma.num_instances; i++) { rb_cntl = RREG32_SDMA(i, mmSDMA0_GFX_RB_CNTL); rb_cntl = REG_SET_FIELD(rb_cntl, SDMA0_GFX_RB_CNTL, RB_ENABLE, enable ? 1 : 0); @@ -913,8 +911,6 @@ static void sdma_v4_0_page_stop(struct amdgpu_device *adev) u32 rb_cntl, ib_cntl; int i; - amdgpu_sdma_unset_buffer_funcs_helper(adev); - for (i = 0; i < adev->sdma.num_instances; i++) { rb_cntl = RREG32_SDMA(i, mmSDMA0_PAGE_RB_CNTL); rb_cntl = REG_SET_FIELD(rb_cntl, SDMA0_PAGE_RB_CNTL, @@ -1402,13 +1398,7 @@ static int sdma_v4_0_start(struct amdgpu_device *adev) r = amdgpu_ring_test_helper(page); if (r) return r; - - if (adev->mman.buffer_funcs_ring == page) - amdgpu_ttm_set_buffer_funcs_status(adev, true); } - - if (adev->mman.buffer_funcs_ring == ring) - amdgpu_ttm_set_buffer_funcs_status(adev, true); } return r; @@ -1749,11 +1739,8 @@ static int sdma_v4_0_late_init(void *handle) sdma_v4_0_setup_ulv(adev); - if (!amdgpu_persistent_edc_harvesting_supported(adev)) { - if (adev->sdma.ras && adev->sdma.ras->ras_block.hw_ops && - adev->sdma.ras->ras_block.hw_ops->reset_ras_error_count) - adev->sdma.ras->ras_block.hw_ops->reset_ras_error_count(adev); - } + if (!amdgpu_persistent_edc_harvesting_supported(adev)) + amdgpu_ras_reset_error_count(adev, AMDGPU_RAS_BLOCK__SDMA); return 0; } @@ -1924,11 +1911,8 @@ static int sdma_v4_0_hw_fini(void *handle) struct amdgpu_device *adev = (struct amdgpu_device *)handle; int i; - if (amdgpu_sriov_vf(adev)) { - /* disable the scheduler for SDMA */ - amdgpu_sdma_unset_buffer_funcs_helper(adev); + if (amdgpu_sriov_vf(adev)) return 0; - } if (amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__SDMA)) { for (i = 0; i < adev->sdma.num_instances; i++) { @@ -1967,7 +1951,6 @@ static int sdma_v4_0_resume(void *handle) if (adev->in_s0ix) { sdma_v4_0_enable(adev, true); sdma_v4_0_gfx_enable(adev, true); - amdgpu_ttm_set_buffer_funcs_status(adev, true); return 0; } @@ -2121,7 +2104,7 @@ static int sdma_v4_0_print_iv_entry(struct amdgpu_device *adev, struct amdgpu_iv_entry *entry) { int instance; - struct amdgpu_task_info task_info; + struct amdgpu_task_info *task_info; u64 addr; instance = sdma_v4_0_irq_id_to_seq(entry->client_id); @@ -2133,15 +2116,20 @@ static int sdma_v4_0_print_iv_entry(struct amdgpu_device *adev, addr = (u64)entry->src_data[0] << 12; addr |= ((u64)entry->src_data[1] & 0xf) << 44; - memset(&task_info, 0, sizeof(struct amdgpu_task_info)); - amdgpu_vm_get_task_info(adev, entry->pasid, &task_info); - dev_dbg_ratelimited(adev->dev, - "[sdma%d] address:0x%016llx src_id:%u ring:%u vmid:%u " - "pasid:%u, for process %s pid %d thread %s pid %d\n", - instance, addr, entry->src_id, entry->ring_id, entry->vmid, - entry->pasid, task_info.process_name, task_info.tgid, - task_info.task_name, task_info.pid); + "[sdma%d] address:0x%016llx src_id:%u ring:%u vmid:%u pasid:%u\n", + instance, addr, entry->src_id, entry->ring_id, entry->vmid, + entry->pasid); + + task_info = amdgpu_vm_get_task_info_pasid(adev, entry->pasid); + if (task_info) { + dev_dbg_ratelimited(adev->dev, + " for process %s pid %d thread %s pid %d\n", + task_info->process_name, task_info->tgid, + task_info->task_name, task_info->pid); + amdgpu_vm_put_task_info(task_info); + } + return 0; } diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v4_4_2.c b/drivers/gpu/drm/amd/amdgpu/sdma_v4_4_2.c index 31aa245552d6..20909843bcd2 100644 --- a/drivers/gpu/drm/amd/amdgpu/sdma_v4_4_2.c +++ b/drivers/gpu/drm/amd/amdgpu/sdma_v4_4_2.c @@ -45,6 +45,8 @@ MODULE_FIRMWARE("amdgpu/sdma_4_4_2.bin"); +#define mmSMNAID_AID0_MCA_SMU 0x03b30400 + #define WREG32_SDMA(instance, offset, value) \ WREG32(sdma_v4_4_2_get_reg_offset(adev, (instance), (offset)), value) #define RREG32_SDMA(instance, offset) \ @@ -366,7 +368,8 @@ static void sdma_v4_4_2_ring_emit_hdp_flush(struct amdgpu_ring *ring) u32 ref_and_mask = 0; const struct nbio_hdp_flush_reg *nbio_hf_reg = adev->nbio.hdp_flush_reg; - ref_and_mask = nbio_hf_reg->ref_and_mask_sdma0 << ring->me; + ref_and_mask = nbio_hf_reg->ref_and_mask_sdma0 + << (ring->me % adev->sdma.num_inst_per_aid); sdma_v4_4_2_wait_reg_mem(ring, 0, 1, adev->nbio.funcs->get_hdp_flush_done_offset(adev), @@ -427,23 +430,31 @@ static void sdma_v4_4_2_inst_gfx_stop(struct amdgpu_device *adev, uint32_t inst_mask) { struct amdgpu_ring *sdma[AMDGPU_MAX_SDMA_INSTANCES]; + u32 doorbell_offset, doorbell; u32 rb_cntl, ib_cntl; - int i, unset = 0; + int i; for_each_inst(i, inst_mask) { sdma[i] = &adev->sdma.instance[i].ring; - if ((adev->mman.buffer_funcs_ring == sdma[i]) && unset != 1) { - amdgpu_ttm_set_buffer_funcs_status(adev, false); - unset = 1; - } - rb_cntl = RREG32_SDMA(i, regSDMA_GFX_RB_CNTL); rb_cntl = REG_SET_FIELD(rb_cntl, SDMA_GFX_RB_CNTL, RB_ENABLE, 0); WREG32_SDMA(i, regSDMA_GFX_RB_CNTL, rb_cntl); ib_cntl = RREG32_SDMA(i, regSDMA_GFX_IB_CNTL); ib_cntl = REG_SET_FIELD(ib_cntl, SDMA_GFX_IB_CNTL, IB_ENABLE, 0); WREG32_SDMA(i, regSDMA_GFX_IB_CNTL, ib_cntl); + + if (sdma[i]->use_doorbell) { + doorbell = RREG32_SDMA(i, regSDMA_GFX_DOORBELL); + doorbell_offset = RREG32_SDMA(i, regSDMA_GFX_DOORBELL_OFFSET); + + doorbell = REG_SET_FIELD(doorbell, SDMA_GFX_DOORBELL, ENABLE, 0); + doorbell_offset = REG_SET_FIELD(doorbell_offset, + SDMA_GFX_DOORBELL_OFFSET, + OFFSET, 0); + WREG32_SDMA(i, regSDMA_GFX_DOORBELL, doorbell); + WREG32_SDMA(i, regSDMA_GFX_DOORBELL_OFFSET, doorbell_offset); + } } } @@ -472,20 +483,10 @@ static void sdma_v4_4_2_inst_rlc_stop(struct amdgpu_device *adev, static void sdma_v4_4_2_inst_page_stop(struct amdgpu_device *adev, uint32_t inst_mask) { - struct amdgpu_ring *sdma[AMDGPU_MAX_SDMA_INSTANCES]; u32 rb_cntl, ib_cntl; int i; - bool unset = false; for_each_inst(i, inst_mask) { - sdma[i] = &adev->sdma.instance[i].page; - - if ((adev->mman.buffer_funcs_ring == sdma[i]) && - (!unset)) { - amdgpu_ttm_set_buffer_funcs_status(adev, false); - unset = true; - } - rb_cntl = RREG32_SDMA(i, regSDMA_PAGE_RB_CNTL); rb_cntl = REG_SET_FIELD(rb_cntl, SDMA_PAGE_RB_CNTL, RB_ENABLE, 0); @@ -597,7 +598,7 @@ static uint32_t sdma_v4_4_2_rb_cntl(struct amdgpu_ring *ring, uint32_t rb_cntl) /* Set ring buffer size in dwords */ uint32_t rb_bufsz = order_base_2(ring->ring_size / 4); - barrier(); /* work around https://bugs.llvm.org/show_bug.cgi?id=42576 */ + barrier(); /* work around https://llvm.org/pr42576 */ rb_cntl = REG_SET_FIELD(rb_cntl, SDMA_GFX_RB_CNTL, RB_SIZE, rb_bufsz); #ifdef __BIG_ENDIAN rb_cntl = REG_SET_FIELD(rb_cntl, SDMA_GFX_RB_CNTL, RB_SWAP_ENABLE, 1); @@ -631,12 +632,6 @@ static void sdma_v4_4_2_gfx_resume(struct amdgpu_device *adev, unsigned int i) rb_cntl = sdma_v4_4_2_rb_cntl(ring, rb_cntl); WREG32_SDMA(i, regSDMA_GFX_RB_CNTL, rb_cntl); - /* Initialize the ring buffer's read and write pointers */ - WREG32_SDMA(i, regSDMA_GFX_RB_RPTR, 0); - WREG32_SDMA(i, regSDMA_GFX_RB_RPTR_HI, 0); - WREG32_SDMA(i, regSDMA_GFX_RB_WPTR, 0); - WREG32_SDMA(i, regSDMA_GFX_RB_WPTR_HI, 0); - /* set the wb address whether it's enabled or not */ WREG32_SDMA(i, regSDMA_GFX_RB_RPTR_ADDR_HI, upper_32_bits(adev->wb.gpu_addr + wb_offset) & 0xFFFFFFFF); @@ -654,6 +649,12 @@ static void sdma_v4_4_2_gfx_resume(struct amdgpu_device *adev, unsigned int i) /* before programing wptr to a less value, need set minor_ptr_update first */ WREG32_SDMA(i, regSDMA_GFX_MINOR_PTR_UPDATE, 1); + /* Initialize the ring buffer's read and write pointers */ + WREG32_SDMA(i, regSDMA_GFX_RB_RPTR, 0); + WREG32_SDMA(i, regSDMA_GFX_RB_RPTR_HI, 0); + WREG32_SDMA(i, regSDMA_GFX_RB_WPTR, 0); + WREG32_SDMA(i, regSDMA_GFX_RB_WPTR_HI, 0); + doorbell = RREG32_SDMA(i, regSDMA_GFX_DOORBELL); doorbell_offset = RREG32_SDMA(i, regSDMA_GFX_DOORBELL_OFFSET); @@ -935,13 +936,7 @@ static int sdma_v4_4_2_inst_start(struct amdgpu_device *adev, r = amdgpu_ring_test_helper(page); if (r) return r; - - if (adev->mman.buffer_funcs_ring == page) - amdgpu_ttm_set_buffer_funcs_status(adev, true); } - - if (adev->mman.buffer_funcs_ring == ring) - amdgpu_ttm_set_buffer_funcs_status(adev, true); } return r; @@ -1276,11 +1271,8 @@ static int sdma_v4_4_2_late_init(void *handle) .cb = sdma_v4_4_2_process_ras_data_cb, }; #endif - if (!amdgpu_persistent_edc_harvesting_supported(adev)) { - if (adev->sdma.ras && adev->sdma.ras->ras_block.hw_ops && - adev->sdma.ras->ras_block.hw_ops->reset_ras_error_count) - adev->sdma.ras->ras_block.hw_ops->reset_ras_error_count(adev); - } + if (!amdgpu_persistent_edc_harvesting_supported(adev)) + amdgpu_ras_reset_error_count(adev, AMDGPU_RAS_BLOCK__SDMA); return 0; } @@ -1611,19 +1603,9 @@ static int sdma_v4_4_2_set_ecc_irq_state(struct amdgpu_device *adev, u32 sdma_cntl; sdma_cntl = RREG32_SDMA(type, regSDMA_CNTL); - switch (state) { - case AMDGPU_IRQ_STATE_DISABLE: - sdma_cntl = REG_SET_FIELD(sdma_cntl, SDMA_CNTL, - DRAM_ECC_INT_ENABLE, 0); - WREG32_SDMA(type, regSDMA_CNTL, sdma_cntl); - break; - /* sdma ecc interrupt is enabled by default - * driver doesn't need to do anything to - * enable the interrupt */ - case AMDGPU_IRQ_STATE_ENABLE: - default: - break; - } + sdma_cntl = REG_SET_FIELD(sdma_cntl, SDMA_CNTL, DRAM_ECC_INT_ENABLE, + state == AMDGPU_IRQ_STATE_ENABLE ? 1 : 0); + WREG32_SDMA(type, regSDMA_CNTL, sdma_cntl); return 0; } @@ -1632,7 +1614,7 @@ static int sdma_v4_4_2_print_iv_entry(struct amdgpu_device *adev, struct amdgpu_iv_entry *entry) { int instance; - struct amdgpu_task_info task_info; + struct amdgpu_task_info *task_info; u64 addr; instance = sdma_v4_4_2_irq_id_to_seq(entry->client_id); @@ -1644,15 +1626,19 @@ static int sdma_v4_4_2_print_iv_entry(struct amdgpu_device *adev, addr = (u64)entry->src_data[0] << 12; addr |= ((u64)entry->src_data[1] & 0xf) << 44; - memset(&task_info, 0, sizeof(struct amdgpu_task_info)); - amdgpu_vm_get_task_info(adev, entry->pasid, &task_info); - dev_dbg_ratelimited(adev->dev, - "[sdma%d] address:0x%016llx src_id:%u ring:%u vmid:%u " - "pasid:%u, for process %s pid %d thread %s pid %d\n", - instance, addr, entry->src_id, entry->ring_id, entry->vmid, - entry->pasid, task_info.process_name, task_info.tgid, - task_info.task_name, task_info.pid); + "[sdma%d] address:0x%016llx src_id:%u ring:%u vmid:%u pasid:%u\n", + instance, addr, entry->src_id, entry->ring_id, entry->vmid, + entry->pasid); + + task_info = amdgpu_vm_get_task_info_pasid(adev, entry->pasid); + if (task_info) { + dev_dbg_ratelimited(adev->dev, " for process %s pid %d thread %s pid %d\n", + task_info->process_name, task_info->tgid, + task_info->task_name, task_info->pid); + amdgpu_vm_put_task_info(task_info); + } + return 0; } @@ -2051,7 +2037,7 @@ const struct amdgpu_ip_block_version sdma_v4_4_2_ip_block = { .type = AMD_IP_BLOCK_TYPE_SDMA, .major = 4, .minor = 4, - .rev = 0, + .rev = 2, .funcs = &sdma_v4_4_2_ip_funcs, }; @@ -2146,7 +2132,7 @@ static void sdma_v4_4_2_inst_query_ras_error_count(struct amdgpu_device *adev, AMDGPU_RAS_ERROR__MULTI_UNCORRECTABLE, &ue_count); - amdgpu_ras_error_statistic_ue_count(err_data, &mcm_info, ue_count); + amdgpu_ras_error_statistic_ue_count(err_data, &mcm_info, NULL, ue_count); } static void sdma_v4_4_2_query_ras_error_count(struct amdgpu_device *adev, @@ -2194,9 +2180,83 @@ static const struct amdgpu_ras_block_hw_ops sdma_v4_4_2_ras_hw_ops = { .reset_ras_error_count = sdma_v4_4_2_reset_ras_error_count, }; +static int sdma_v4_4_2_aca_bank_parser(struct aca_handle *handle, struct aca_bank *bank, + enum aca_smu_type type, void *data) +{ + struct aca_bank_info info; + u64 misc0; + int ret; + + ret = aca_bank_info_decode(bank, &info); + if (ret) + return ret; + + misc0 = bank->regs[ACA_REG_IDX_MISC0]; + switch (type) { + case ACA_SMU_TYPE_UE: + ret = aca_error_cache_log_bank_error(handle, &info, ACA_ERROR_TYPE_UE, + 1ULL); + break; + case ACA_SMU_TYPE_CE: + ret = aca_error_cache_log_bank_error(handle, &info, ACA_ERROR_TYPE_CE, + ACA_REG__MISC0__ERRCNT(misc0)); + break; + default: + return -EINVAL; + } + + return ret; +} + +/* CODE_SDMA0 - CODE_SDMA4, reference to smu driver if header file */ +static int sdma_v4_4_2_err_codes[] = { 33, 34, 35, 36 }; + +static bool sdma_v4_4_2_aca_bank_is_valid(struct aca_handle *handle, struct aca_bank *bank, + enum aca_smu_type type, void *data) +{ + u32 instlo; + + instlo = ACA_REG__IPID__INSTANCEIDLO(bank->regs[ACA_REG_IDX_IPID]); + instlo &= GENMASK(31, 1); + + if (instlo != mmSMNAID_AID0_MCA_SMU) + return false; + + if (aca_bank_check_error_codes(handle->adev, bank, + sdma_v4_4_2_err_codes, + ARRAY_SIZE(sdma_v4_4_2_err_codes))) + return false; + + return true; +} + +static const struct aca_bank_ops sdma_v4_4_2_aca_bank_ops = { + .aca_bank_parser = sdma_v4_4_2_aca_bank_parser, + .aca_bank_is_valid = sdma_v4_4_2_aca_bank_is_valid, +}; + +static const struct aca_info sdma_v4_4_2_aca_info = { + .hwip = ACA_HWIP_TYPE_SMU, + .mask = ACA_ERROR_UE_MASK, + .bank_ops = &sdma_v4_4_2_aca_bank_ops, +}; + +static int sdma_v4_4_2_ras_late_init(struct amdgpu_device *adev, struct ras_common_if *ras_block) +{ + int r; + + r = amdgpu_sdma_ras_late_init(adev, ras_block); + if (r) + return r; + + return amdgpu_ras_bind_aca(adev, AMDGPU_RAS_BLOCK__SDMA, + &sdma_v4_4_2_aca_info, NULL); +} + static struct amdgpu_sdma_ras sdma_v4_4_2_ras = { .ras_block = { .hw_ops = &sdma_v4_4_2_ras_hw_ops, + .ras_late_init = sdma_v4_4_2_ras_late_init, }, }; diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v5_0.c b/drivers/gpu/drm/amd/amdgpu/sdma_v5_0.c index c1ff5eda8961..883e8a1b8a40 100644 --- a/drivers/gpu/drm/amd/amdgpu/sdma_v5_0.c +++ b/drivers/gpu/drm/amd/amdgpu/sdma_v5_0.c @@ -249,35 +249,23 @@ static int sdma_v5_0_init_microcode(struct amdgpu_device *adev) return ret; } -static unsigned sdma_v5_0_ring_init_cond_exec(struct amdgpu_ring *ring) +static unsigned sdma_v5_0_ring_init_cond_exec(struct amdgpu_ring *ring, + uint64_t addr) { unsigned ret; amdgpu_ring_write(ring, SDMA_PKT_HEADER_OP(SDMA_OP_COND_EXE)); - amdgpu_ring_write(ring, lower_32_bits(ring->cond_exe_gpu_addr)); - amdgpu_ring_write(ring, upper_32_bits(ring->cond_exe_gpu_addr)); + amdgpu_ring_write(ring, lower_32_bits(addr)); + amdgpu_ring_write(ring, upper_32_bits(addr)); amdgpu_ring_write(ring, 1); - ret = ring->wptr & ring->buf_mask;/* this is the offset we need patch later */ - amdgpu_ring_write(ring, 0x55aa55aa);/* insert dummy here and patch it later */ + /* this is the offset we need patch later */ + ret = ring->wptr & ring->buf_mask; + /* insert dummy here and patch it later */ + amdgpu_ring_write(ring, 0); return ret; } -static void sdma_v5_0_ring_patch_cond_exec(struct amdgpu_ring *ring, - unsigned offset) -{ - unsigned cur; - - BUG_ON(offset > ring->buf_mask); - BUG_ON(ring->ring[offset] != 0x55aa55aa); - - cur = (ring->wptr - 1) & ring->buf_mask; - if (cur > offset) - ring->ring[offset] = cur - offset; - else - ring->ring[offset] = (ring->buf_mask + 1) - offset + cur; -} - /** * sdma_v5_0_ring_get_rptr - get the current read pointer * @@ -559,8 +547,6 @@ static void sdma_v5_0_gfx_stop(struct amdgpu_device *adev) u32 rb_cntl, ib_cntl; int i; - amdgpu_sdma_unset_buffer_funcs_helper(adev); - for (i = 0; i < adev->sdma.num_instances; i++) { rb_cntl = RREG32_SOC15_IP(GC, sdma_v5_0_get_reg_offset(adev, i, mmSDMA0_GFX_RB_CNTL)); rb_cntl = REG_SET_FIELD(rb_cntl, SDMA0_GFX_RB_CNTL, RB_ENABLE, 0); @@ -825,9 +811,6 @@ static int sdma_v5_0_gfx_resume(struct amdgpu_device *adev) r = amdgpu_ring_test_helper(ring); if (r) return r; - - if (adev->mman.buffer_funcs_ring == ring) - amdgpu_ttm_set_buffer_funcs_status(adev, true); } return 0; @@ -1426,11 +1409,8 @@ static int sdma_v5_0_hw_fini(void *handle) { struct amdgpu_device *adev = (struct amdgpu_device *)handle; - if (amdgpu_sriov_vf(adev)) { - /* disable the scheduler for SDMA */ - amdgpu_sdma_unset_buffer_funcs_helper(adev); + if (amdgpu_sriov_vf(adev)) return 0; - } sdma_v5_0_ctx_switch_enable(adev, false); sdma_v5_0_enable(adev, false); @@ -1788,7 +1768,6 @@ static const struct amdgpu_ring_funcs sdma_v5_0_ring_funcs = { .emit_reg_wait = sdma_v5_0_ring_emit_reg_wait, .emit_reg_write_reg_wait = sdma_v5_0_ring_emit_reg_write_reg_wait, .init_cond_exec = sdma_v5_0_ring_init_cond_exec, - .patch_cond_exec = sdma_v5_0_ring_patch_cond_exec, .preempt_ib = sdma_v5_0_ring_preempt_ib, }; diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v5_2.c b/drivers/gpu/drm/amd/amdgpu/sdma_v5_2.c index 7d1e57189c8c..da01b524b9f2 100644 --- a/drivers/gpu/drm/amd/amdgpu/sdma_v5_2.c +++ b/drivers/gpu/drm/amd/amdgpu/sdma_v5_2.c @@ -89,35 +89,23 @@ static u32 sdma_v5_2_get_reg_offset(struct amdgpu_device *adev, u32 instance, u3 return base + internal_offset; } -static unsigned sdma_v5_2_ring_init_cond_exec(struct amdgpu_ring *ring) +static unsigned sdma_v5_2_ring_init_cond_exec(struct amdgpu_ring *ring, + uint64_t addr) { unsigned ret; amdgpu_ring_write(ring, SDMA_PKT_HEADER_OP(SDMA_OP_COND_EXE)); - amdgpu_ring_write(ring, lower_32_bits(ring->cond_exe_gpu_addr)); - amdgpu_ring_write(ring, upper_32_bits(ring->cond_exe_gpu_addr)); + amdgpu_ring_write(ring, lower_32_bits(addr)); + amdgpu_ring_write(ring, upper_32_bits(addr)); amdgpu_ring_write(ring, 1); - ret = ring->wptr & ring->buf_mask;/* this is the offset we need patch later */ - amdgpu_ring_write(ring, 0x55aa55aa);/* insert dummy here and patch it later */ + /* this is the offset we need patch later */ + ret = ring->wptr & ring->buf_mask; + /* insert dummy here and patch it later */ + amdgpu_ring_write(ring, 0); return ret; } -static void sdma_v5_2_ring_patch_cond_exec(struct amdgpu_ring *ring, - unsigned offset) -{ - unsigned cur; - - BUG_ON(offset > ring->buf_mask); - BUG_ON(ring->ring[offset] != 0x55aa55aa); - - cur = (ring->wptr - 1) & ring->buf_mask; - if (cur > offset) - ring->ring[offset] = cur - offset; - else - ring->ring[offset] = (ring->buf_mask + 1) - offset + cur; -} - /** * sdma_v5_2_ring_get_rptr - get the current read pointer * @@ -292,17 +280,21 @@ static void sdma_v5_2_ring_emit_hdp_flush(struct amdgpu_ring *ring) u32 ref_and_mask = 0; const struct nbio_hdp_flush_reg *nbio_hf_reg = adev->nbio.hdp_flush_reg; - ref_and_mask = nbio_hf_reg->ref_and_mask_sdma0 << ring->me; - - amdgpu_ring_write(ring, SDMA_PKT_HEADER_OP(SDMA_OP_POLL_REGMEM) | - SDMA_PKT_POLL_REGMEM_HEADER_HDP_FLUSH(1) | - SDMA_PKT_POLL_REGMEM_HEADER_FUNC(3)); /* == */ - amdgpu_ring_write(ring, (adev->nbio.funcs->get_hdp_flush_done_offset(adev)) << 2); - amdgpu_ring_write(ring, (adev->nbio.funcs->get_hdp_flush_req_offset(adev)) << 2); - amdgpu_ring_write(ring, ref_and_mask); /* reference */ - amdgpu_ring_write(ring, ref_and_mask); /* mask */ - amdgpu_ring_write(ring, SDMA_PKT_POLL_REGMEM_DW5_RETRY_COUNT(0xfff) | - SDMA_PKT_POLL_REGMEM_DW5_INTERVAL(10)); /* retry count, poll interval */ + if (ring->me > 1) { + amdgpu_asic_flush_hdp(adev, ring); + } else { + ref_and_mask = nbio_hf_reg->ref_and_mask_sdma0 << ring->me; + + amdgpu_ring_write(ring, SDMA_PKT_HEADER_OP(SDMA_OP_POLL_REGMEM) | + SDMA_PKT_POLL_REGMEM_HEADER_HDP_FLUSH(1) | + SDMA_PKT_POLL_REGMEM_HEADER_FUNC(3)); /* == */ + amdgpu_ring_write(ring, (adev->nbio.funcs->get_hdp_flush_done_offset(adev)) << 2); + amdgpu_ring_write(ring, (adev->nbio.funcs->get_hdp_flush_req_offset(adev)) << 2); + amdgpu_ring_write(ring, ref_and_mask); /* reference */ + amdgpu_ring_write(ring, ref_and_mask); /* mask */ + amdgpu_ring_write(ring, SDMA_PKT_POLL_REGMEM_DW5_RETRY_COUNT(0xfff) | + SDMA_PKT_POLL_REGMEM_DW5_INTERVAL(10)); /* retry count, poll interval */ + } } /** @@ -364,8 +356,6 @@ static void sdma_v5_2_gfx_stop(struct amdgpu_device *adev) u32 rb_cntl, ib_cntl; int i; - amdgpu_sdma_unset_buffer_funcs_helper(adev); - for (i = 0; i < adev->sdma.num_instances; i++) { rb_cntl = RREG32_SOC15_IP(GC, sdma_v5_2_get_reg_offset(adev, i, mmSDMA0_GFX_RB_CNTL)); rb_cntl = REG_SET_FIELD(rb_cntl, SDMA0_GFX_RB_CNTL, RB_ENABLE, 0); @@ -625,9 +615,6 @@ static int sdma_v5_2_gfx_resume(struct amdgpu_device *adev) r = amdgpu_ring_test_helper(ring); if (r) return r; - - if (adev->mman.buffer_funcs_ring == ring) - amdgpu_ttm_set_buffer_funcs_status(adev, true); } return 0; @@ -1284,11 +1271,8 @@ static int sdma_v5_2_hw_fini(void *handle) { struct amdgpu_device *adev = (struct amdgpu_device *)handle; - if (amdgpu_sriov_vf(adev)) { - /* disable the scheduler for SDMA */ - amdgpu_sdma_unset_buffer_funcs_helper(adev); + if (amdgpu_sriov_vf(adev)) return 0; - } sdma_v5_2_ctx_switch_enable(adev, false); sdma_v5_2_enable(adev, false); @@ -1651,6 +1635,32 @@ static void sdma_v5_2_get_clockgating_state(void *handle, u64 *flags) *flags |= AMD_CG_SUPPORT_SDMA_LS; } +static void sdma_v5_2_ring_begin_use(struct amdgpu_ring *ring) +{ + struct amdgpu_device *adev = ring->adev; + + /* SDMA 5.2.3 (RMB) FW doesn't seem to properly + * disallow GFXOFF in some cases leading to + * hangs in SDMA. Disallow GFXOFF while SDMA is active. + * We can probably just limit this to 5.2.3, + * but it shouldn't hurt for other parts since + * this GFXOFF will be disallowed anyway when SDMA is + * active, this just makes it explicit. + */ + amdgpu_gfx_off_ctrl(adev, false); +} + +static void sdma_v5_2_ring_end_use(struct amdgpu_ring *ring) +{ + struct amdgpu_device *adev = ring->adev; + + /* SDMA 5.2.3 (RMB) FW doesn't seem to properly + * disallow GFXOFF in some cases leading to + * hangs in SDMA. Allow GFXOFF when SDMA is complete. + */ + amdgpu_gfx_off_ctrl(adev, true); +} + const struct amd_ip_funcs sdma_v5_2_ip_funcs = { .name = "sdma_v5_2", .early_init = sdma_v5_2_early_init, @@ -1698,11 +1708,12 @@ static const struct amdgpu_ring_funcs sdma_v5_2_ring_funcs = { .test_ib = sdma_v5_2_ring_test_ib, .insert_nop = sdma_v5_2_ring_insert_nop, .pad_ib = sdma_v5_2_ring_pad_ib, + .begin_use = sdma_v5_2_ring_begin_use, + .end_use = sdma_v5_2_ring_end_use, .emit_wreg = sdma_v5_2_ring_emit_wreg, .emit_reg_wait = sdma_v5_2_ring_emit_reg_wait, .emit_reg_write_reg_wait = sdma_v5_2_ring_emit_reg_write_reg_wait, .init_cond_exec = sdma_v5_2_ring_init_cond_exec, - .patch_cond_exec = sdma_v5_2_ring_patch_cond_exec, .preempt_ib = sdma_v5_2_ring_preempt_ib, }; diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v6_0.c b/drivers/gpu/drm/amd/amdgpu/sdma_v6_0.c index 1f8122fd1ad7..67a4d8b1512b 100644 --- a/drivers/gpu/drm/amd/amdgpu/sdma_v6_0.c +++ b/drivers/gpu/drm/amd/amdgpu/sdma_v6_0.c @@ -49,6 +49,7 @@ MODULE_FIRMWARE("amdgpu/sdma_6_0_1.bin"); MODULE_FIRMWARE("amdgpu/sdma_6_0_2.bin"); MODULE_FIRMWARE("amdgpu/sdma_6_0_3.bin"); MODULE_FIRMWARE("amdgpu/sdma_6_1_0.bin"); +MODULE_FIRMWARE("amdgpu/sdma_6_1_1.bin"); #define SDMA1_REG_OFFSET 0x600 #define SDMA0_HYP_DEC_REG_START 0x5880 @@ -79,35 +80,23 @@ static u32 sdma_v6_0_get_reg_offset(struct amdgpu_device *adev, u32 instance, u3 return base + internal_offset; } -static unsigned sdma_v6_0_ring_init_cond_exec(struct amdgpu_ring *ring) +static unsigned sdma_v6_0_ring_init_cond_exec(struct amdgpu_ring *ring, + uint64_t addr) { unsigned ret; amdgpu_ring_write(ring, SDMA_PKT_COPY_LINEAR_HEADER_OP(SDMA_OP_COND_EXE)); - amdgpu_ring_write(ring, lower_32_bits(ring->cond_exe_gpu_addr)); - amdgpu_ring_write(ring, upper_32_bits(ring->cond_exe_gpu_addr)); + amdgpu_ring_write(ring, lower_32_bits(addr)); + amdgpu_ring_write(ring, upper_32_bits(addr)); amdgpu_ring_write(ring, 1); - ret = ring->wptr & ring->buf_mask;/* this is the offset we need patch later */ - amdgpu_ring_write(ring, 0x55aa55aa);/* insert dummy here and patch it later */ + /* this is the offset we need patch later */ + ret = ring->wptr & ring->buf_mask; + /* insert dummy here and patch it later */ + amdgpu_ring_write(ring, 0); return ret; } -static void sdma_v6_0_ring_patch_cond_exec(struct amdgpu_ring *ring, - unsigned offset) -{ - unsigned cur; - - BUG_ON(offset > ring->buf_mask); - BUG_ON(ring->ring[offset] != 0x55aa55aa); - - cur = (ring->wptr - 1) & ring->buf_mask; - if (cur > offset) - ring->ring[offset] = cur - offset; - else - ring->ring[offset] = (ring->buf_mask + 1) - offset + cur; -} - /** * sdma_v6_0_ring_get_rptr - get the current read pointer * @@ -156,68 +145,35 @@ static uint64_t sdma_v6_0_ring_get_wptr(struct amdgpu_ring *ring) static void sdma_v6_0_ring_set_wptr(struct amdgpu_ring *ring) { struct amdgpu_device *adev = ring->adev; - uint32_t *wptr_saved; - uint32_t *is_queue_unmap; - uint64_t aggregated_db_index; - uint32_t mqd_size = adev->mqds[AMDGPU_HW_IP_DMA].mqd_size; - - DRM_DEBUG("Setting write pointer\n"); - - if (ring->is_mes_queue) { - wptr_saved = (uint32_t *)(ring->mqd_ptr + mqd_size); - is_queue_unmap = (uint32_t *)(ring->mqd_ptr + mqd_size + - sizeof(uint32_t)); - aggregated_db_index = - amdgpu_mes_get_aggregated_doorbell_index(adev, - ring->hw_prio); + if (ring->use_doorbell) { + DRM_DEBUG("Using doorbell -- " + "wptr_offs == 0x%08x " + "lower_32_bits(ring->wptr) << 2 == 0x%08x " + "upper_32_bits(ring->wptr) << 2 == 0x%08x\n", + ring->wptr_offs, + lower_32_bits(ring->wptr << 2), + upper_32_bits(ring->wptr << 2)); + /* XXX check if swapping is necessary on BE */ atomic64_set((atomic64_t *)ring->wptr_cpu_addr, ring->wptr << 2); - *wptr_saved = ring->wptr << 2; - if (*is_queue_unmap) { - WDOORBELL64(aggregated_db_index, ring->wptr << 2); - DRM_DEBUG("calling WDOORBELL64(0x%08x, 0x%016llx)\n", - ring->doorbell_index, ring->wptr << 2); - WDOORBELL64(ring->doorbell_index, ring->wptr << 2); - } else { - DRM_DEBUG("calling WDOORBELL64(0x%08x, 0x%016llx)\n", - ring->doorbell_index, ring->wptr << 2); - WDOORBELL64(ring->doorbell_index, ring->wptr << 2); - - if (*is_queue_unmap) - WDOORBELL64(aggregated_db_index, - ring->wptr << 2); - } + DRM_DEBUG("calling WDOORBELL64(0x%08x, 0x%016llx)\n", + ring->doorbell_index, ring->wptr << 2); + WDOORBELL64(ring->doorbell_index, ring->wptr << 2); } else { - if (ring->use_doorbell) { - DRM_DEBUG("Using doorbell -- " - "wptr_offs == 0x%08x " - "lower_32_bits(ring->wptr) << 2 == 0x%08x " - "upper_32_bits(ring->wptr) << 2 == 0x%08x\n", - ring->wptr_offs, - lower_32_bits(ring->wptr << 2), - upper_32_bits(ring->wptr << 2)); - /* XXX check if swapping is necessary on BE */ - atomic64_set((atomic64_t *)ring->wptr_cpu_addr, - ring->wptr << 2); - DRM_DEBUG("calling WDOORBELL64(0x%08x, 0x%016llx)\n", - ring->doorbell_index, ring->wptr << 2); - WDOORBELL64(ring->doorbell_index, ring->wptr << 2); - } else { - DRM_DEBUG("Not using doorbell -- " - "regSDMA%i_GFX_RB_WPTR == 0x%08x " - "regSDMA%i_GFX_RB_WPTR_HI == 0x%08x\n", - ring->me, - lower_32_bits(ring->wptr << 2), - ring->me, - upper_32_bits(ring->wptr << 2)); - WREG32_SOC15_IP(GC, sdma_v6_0_get_reg_offset(adev, - ring->me, regSDMA0_QUEUE0_RB_WPTR), - lower_32_bits(ring->wptr << 2)); - WREG32_SOC15_IP(GC, sdma_v6_0_get_reg_offset(adev, - ring->me, regSDMA0_QUEUE0_RB_WPTR_HI), - upper_32_bits(ring->wptr << 2)); - } + DRM_DEBUG("Not using doorbell -- " + "regSDMA%i_GFX_RB_WPTR == 0x%08x " + "regSDMA%i_GFX_RB_WPTR_HI == 0x%08x\n", + ring->me, + lower_32_bits(ring->wptr << 2), + ring->me, + upper_32_bits(ring->wptr << 2)); + WREG32_SOC15_IP(GC, sdma_v6_0_get_reg_offset(adev, + ring->me, regSDMA0_QUEUE0_RB_WPTR), + lower_32_bits(ring->wptr << 2)); + WREG32_SOC15_IP(GC, sdma_v6_0_get_reg_offset(adev, + ring->me, regSDMA0_QUEUE0_RB_WPTR_HI), + upper_32_bits(ring->wptr << 2)); } } @@ -381,8 +337,6 @@ static void sdma_v6_0_gfx_stop(struct amdgpu_device *adev) u32 rb_cntl, ib_cntl; int i; - amdgpu_sdma_unset_buffer_funcs_helper(adev); - for (i = 0; i < adev->sdma.num_instances; i++) { rb_cntl = RREG32_SOC15_IP(GC, sdma_v6_0_get_reg_offset(adev, i, regSDMA0_QUEUE0_RB_CNTL)); rb_cntl = REG_SET_FIELD(rb_cntl, SDMA0_QUEUE0_RB_CNTL, RB_ENABLE, 0); @@ -553,6 +507,13 @@ static int sdma_v6_0_gfx_resume(struct amdgpu_device *adev) /* set minor_ptr_update to 0 after wptr programed */ WREG32_SOC15_IP(GC, sdma_v6_0_get_reg_offset(adev, i, regSDMA0_QUEUE0_MINOR_PTR_UPDATE), 0); + /* Set up sdma hang watchdog */ + temp = RREG32_SOC15_IP(GC, sdma_v6_0_get_reg_offset(adev, i, regSDMA0_WATCHDOG_CNTL)); + /* 100ms per unit */ + temp = REG_SET_FIELD(temp, SDMA0_WATCHDOG_CNTL, QUEUE_HANG_COUNT, + max(adev->usec_timeout/100000, 1)); + WREG32_SOC15_IP(GC, sdma_v6_0_get_reg_offset(adev, i, regSDMA0_WATCHDOG_CNTL), temp); + /* Set up RESP_MODE to non-copy addresses */ temp = RREG32_SOC15_IP(GC, sdma_v6_0_get_reg_offset(adev, i, regSDMA0_UTCL1_CNTL)); temp = REG_SET_FIELD(temp, SDMA0_UTCL1_CNTL, RESP_MODE, 3); @@ -594,9 +555,6 @@ static int sdma_v6_0_gfx_resume(struct amdgpu_device *adev) r = amdgpu_ring_test_helper(ring); if (r) return r; - - if (adev->mman.buffer_funcs_ring == ring) - amdgpu_ttm_set_buffer_funcs_status(adev, true); } return 0; @@ -1341,11 +1299,8 @@ static int sdma_v6_0_hw_fini(void *handle) { struct amdgpu_device *adev = (struct amdgpu_device *)handle; - if (amdgpu_sriov_vf(adev)) { - /* disable the scheduler for SDMA */ - amdgpu_sdma_unset_buffer_funcs_helper(adev); + if (amdgpu_sriov_vf(adev)) return 0; - } sdma_v6_0_ctxempty_int_enable(adev, false); sdma_v6_0_enable(adev, false); @@ -1582,7 +1537,6 @@ static const struct amdgpu_ring_funcs sdma_v6_0_ring_funcs = { .emit_reg_wait = sdma_v6_0_ring_emit_reg_wait, .emit_reg_write_reg_wait = sdma_v6_0_ring_emit_reg_write_reg_wait, .init_cond_exec = sdma_v6_0_ring_init_cond_exec, - .patch_cond_exec = sdma_v6_0_ring_patch_cond_exec, .preempt_ib = sdma_v6_0_ring_preempt_ib, }; diff --git a/drivers/gpu/drm/amd/amdgpu/si.c b/drivers/gpu/drm/amd/amdgpu/si.c index 4b81f29e5fd5..67e179c7e347 100644 --- a/drivers/gpu/drm/amd/amdgpu/si.c +++ b/drivers/gpu/drm/amd/amdgpu/si.c @@ -1409,9 +1409,9 @@ static int si_gpu_pci_config_reset(struct amdgpu_device *adev) return r; } -static bool si_asic_supports_baco(struct amdgpu_device *adev) +static int si_asic_supports_baco(struct amdgpu_device *adev) { - return false; + return 0; } static enum amd_reset_method @@ -2331,28 +2331,18 @@ static void si_pcie_gen3_enable(struct amdgpu_device *adev) gpu_cfg & PCI_EXP_LNKCTL_HAWD); - pcie_capability_read_word(root, PCI_EXP_LNKCTL2, - &tmp16); - tmp16 &= ~(PCI_EXP_LNKCTL2_ENTER_COMP | - PCI_EXP_LNKCTL2_TX_MARGIN); - tmp16 |= (bridge_cfg2 & - (PCI_EXP_LNKCTL2_ENTER_COMP | - PCI_EXP_LNKCTL2_TX_MARGIN)); - pcie_capability_write_word(root, - PCI_EXP_LNKCTL2, - tmp16); - - pcie_capability_read_word(adev->pdev, - PCI_EXP_LNKCTL2, - &tmp16); - tmp16 &= ~(PCI_EXP_LNKCTL2_ENTER_COMP | - PCI_EXP_LNKCTL2_TX_MARGIN); - tmp16 |= (gpu_cfg2 & - (PCI_EXP_LNKCTL2_ENTER_COMP | - PCI_EXP_LNKCTL2_TX_MARGIN)); - pcie_capability_write_word(adev->pdev, - PCI_EXP_LNKCTL2, - tmp16); + pcie_capability_clear_and_set_word(root, PCI_EXP_LNKCTL2, + PCI_EXP_LNKCTL2_ENTER_COMP | + PCI_EXP_LNKCTL2_TX_MARGIN, + bridge_cfg2 & + (PCI_EXP_LNKCTL2_ENTER_COMP | + PCI_EXP_LNKCTL2_TX_MARGIN)); + pcie_capability_clear_and_set_word(adev->pdev, PCI_EXP_LNKCTL2, + PCI_EXP_LNKCTL2_ENTER_COMP | + PCI_EXP_LNKCTL2_TX_MARGIN, + gpu_cfg2 & + (PCI_EXP_LNKCTL2_ENTER_COMP | + PCI_EXP_LNKCTL2_TX_MARGIN)); tmp = RREG32_PCIE_PORT(PCIE_LC_CNTL4); tmp &= ~LC_SET_QUIESCE; @@ -2365,16 +2355,15 @@ static void si_pcie_gen3_enable(struct amdgpu_device *adev) speed_cntl &= ~LC_FORCE_DIS_SW_SPEED_CHANGE; WREG32_PCIE_PORT(PCIE_LC_SPEED_CNTL, speed_cntl); - pcie_capability_read_word(adev->pdev, PCI_EXP_LNKCTL2, &tmp16); - tmp16 &= ~PCI_EXP_LNKCTL2_TLS; - + tmp16 = 0; if (adev->pm.pcie_gen_mask & CAIL_PCIE_LINK_SPEED_SUPPORT_GEN3) tmp16 |= PCI_EXP_LNKCTL2_TLS_8_0GT; /* gen3 */ else if (adev->pm.pcie_gen_mask & CAIL_PCIE_LINK_SPEED_SUPPORT_GEN2) tmp16 |= PCI_EXP_LNKCTL2_TLS_5_0GT; /* gen2 */ else tmp16 |= PCI_EXP_LNKCTL2_TLS_2_5GT; /* gen1 */ - pcie_capability_write_word(adev->pdev, PCI_EXP_LNKCTL2, tmp16); + pcie_capability_clear_and_set_word(adev->pdev, PCI_EXP_LNKCTL2, + PCI_EXP_LNKCTL2_TLS, tmp16); speed_cntl = RREG32_PCIE_PORT(PCIE_LC_SPEED_CNTL); speed_cntl |= LC_INITIATE_LINK_SPEED_CHANGE; @@ -2440,8 +2429,6 @@ static void si_program_aspm(struct amdgpu_device *adev) if (!amdgpu_device_should_use_aspm(adev)) return; - if (adev->flags & AMD_IS_APU) - return; orig = data = RREG32_PCIE_PORT(PCIE_LC_N_FTS_CNTL); data &= ~LC_XMIT_N_FTS_MASK; data |= LC_XMIT_N_FTS(0x24) | LC_XMIT_N_FTS_OVERRIDE_EN; diff --git a/drivers/gpu/drm/amd/amdgpu/si_dma.c b/drivers/gpu/drm/amd/amdgpu/si_dma.c index 42c4547f32ec..9aa0e11ee673 100644 --- a/drivers/gpu/drm/amd/amdgpu/si_dma.c +++ b/drivers/gpu/drm/amd/amdgpu/si_dma.c @@ -115,8 +115,6 @@ static void si_dma_stop(struct amdgpu_device *adev) u32 rb_cntl; unsigned i; - amdgpu_sdma_unset_buffer_funcs_helper(adev); - for (i = 0; i < adev->sdma.num_instances; i++) { /* dma0 */ rb_cntl = RREG32(DMA_RB_CNTL + sdma_offsets[i]); @@ -177,9 +175,6 @@ static int si_dma_start(struct amdgpu_device *adev) r = amdgpu_ring_test_helper(ring); if (r) return r; - - if (adev->mman.buffer_funcs_ring == ring) - amdgpu_ttm_set_buffer_funcs_status(adev, true); } return 0; diff --git a/drivers/gpu/drm/amd/amdgpu/si_ih.c b/drivers/gpu/drm/amd/amdgpu/si_ih.c index 9a24f17a5750..cada9f300a7f 100644 --- a/drivers/gpu/drm/amd/amdgpu/si_ih.c +++ b/drivers/gpu/drm/amd/amdgpu/si_ih.c @@ -119,6 +119,12 @@ static u32 si_ih_get_wptr(struct amdgpu_device *adev, tmp = RREG32(IH_RB_CNTL); tmp |= IH_RB_CNTL__WPTR_OVERFLOW_CLEAR_MASK; WREG32(IH_RB_CNTL, tmp); + + /* Unset the CLEAR_OVERFLOW bit immediately so new overflows + * can be detected. + */ + tmp &= ~IH_RB_CNTL__WPTR_OVERFLOW_CLEAR_MASK; + WREG32(IH_RB_CNTL, tmp); } return (wptr & ih->ptr_mask); } diff --git a/drivers/gpu/drm/amd/amdgpu/smuio_v14_0_2.c b/drivers/gpu/drm/amd/amdgpu/smuio_v14_0_2.c new file mode 100644 index 000000000000..2a51a70d4846 --- /dev/null +++ b/drivers/gpu/drm/amd/amdgpu/smuio_v14_0_2.c @@ -0,0 +1,62 @@ +/* + * Copyright 2023 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + */ +#include "amdgpu.h" +#include "smuio_v14_0_2.h" +#include "smuio/smuio_14_0_2_offset.h" +#include "smuio/smuio_14_0_2_sh_mask.h" +#include <linux/preempt.h> + +static u32 smuio_v14_0_2_get_rom_index_offset(struct amdgpu_device *adev) +{ + return SOC15_REG_OFFSET(SMUIO, 0, regROM_INDEX); +} + +static u32 smuio_v14_0_2_get_rom_data_offset(struct amdgpu_device *adev) +{ + return SOC15_REG_OFFSET(SMUIO, 0, regROM_DATA); +} + +static u64 smuio_v14_0_2_get_gpu_clock_counter(struct amdgpu_device *adev) +{ + u64 clock; + u64 clock_counter_lo, clock_counter_hi_pre, clock_counter_hi_after; + + preempt_disable(); + clock_counter_hi_pre = (u64)RREG32_SOC15(SMUIO, 0, regGOLDEN_TSC_COUNT_UPPER); + clock_counter_lo = (u64)RREG32_SOC15(SMUIO, 0, regGOLDEN_TSC_COUNT_LOWER); + /* the clock counter may be udpated during polling the counters */ + clock_counter_hi_after = (u64)RREG32_SOC15(SMUIO, 0, regGOLDEN_TSC_COUNT_UPPER); + if (clock_counter_hi_pre != clock_counter_hi_after) + clock_counter_lo = (u64)RREG32_SOC15(SMUIO, 0, regGOLDEN_TSC_COUNT_LOWER); + preempt_enable(); + + clock = clock_counter_lo | (clock_counter_hi_after << 32ULL); + + return clock; +} + +const struct amdgpu_smuio_funcs smuio_v14_0_2_funcs = { + .get_rom_index_offset = smuio_v14_0_2_get_rom_index_offset, + .get_rom_data_offset = smuio_v14_0_2_get_rom_data_offset, + .get_gpu_clock_counter = smuio_v14_0_2_get_gpu_clock_counter, +}; diff --git a/drivers/gpu/drm/amd/amdgpu/smuio_v14_0_2.h b/drivers/gpu/drm/amd/amdgpu/smuio_v14_0_2.h new file mode 100644 index 000000000000..6e617f832d90 --- /dev/null +++ b/drivers/gpu/drm/amd/amdgpu/smuio_v14_0_2.h @@ -0,0 +1,30 @@ +/* + * Copyright 2023 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + */ +#ifndef __SMUIO_V14_0_2_H__ +#define __SMUIO_V14_0_2_H__ + +#include "soc15_common.h" + +extern const struct amdgpu_smuio_funcs smuio_v14_0_2_funcs; + +#endif /* __SMUIO_V14_0_2_H__ */ diff --git a/drivers/gpu/drm/amd/amdgpu/soc15.c b/drivers/gpu/drm/amd/amdgpu/soc15.c index 66ed28136bc8..c8abbf5da736 100644 --- a/drivers/gpu/drm/amd/amdgpu/soc15.c +++ b/drivers/gpu/drm/amd/amdgpu/soc15.c @@ -502,7 +502,7 @@ static int soc15_asic_baco_reset(struct amdgpu_device *adev) static enum amd_reset_method soc15_asic_reset_method(struct amdgpu_device *adev) { - bool baco_reset = false; + int baco_reset = 0; bool connected_to_cpu = false; struct amdgpu_ras *ras = amdgpu_ras_get_context(adev); @@ -540,7 +540,7 @@ soc15_asic_reset_method(struct amdgpu_device *adev) */ if (ras && adev->ras_enabled && adev->pm.fw_version <= 0x283400) - baco_reset = false; + baco_reset = 0; } else { baco_reset = amdgpu_dpm_is_baco_supported(adev); } @@ -574,11 +574,34 @@ soc15_asic_reset_method(struct amdgpu_device *adev) return AMD_RESET_METHOD_MODE1; } +static bool soc15_need_reset_on_resume(struct amdgpu_device *adev) +{ + u32 sol_reg; + + sol_reg = RREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_81); + + /* Will reset for the following suspend abort cases. + * 1) Only reset limit on APU side, dGPU hasn't checked yet. + * 2) S3 suspend abort and TOS already launched. + */ + if (adev->flags & AMD_IS_APU && adev->in_s3 && + !adev->suspend_complete && + sol_reg) + return true; + + return false; +} + static int soc15_asic_reset(struct amdgpu_device *adev) { /* original raven doesn't have full asic reset */ - if ((adev->apu_flags & AMD_APU_IS_RAVEN) || - (adev->apu_flags & AMD_APU_IS_RAVEN2)) + /* On the latest Raven, the GPU reset can be performed + * successfully. So now, temporarily enable it for the + * S3 suspend abort case. + */ + if (((adev->apu_flags & AMD_APU_IS_RAVEN) || + (adev->apu_flags & AMD_APU_IS_RAVEN2)) && + !soc15_need_reset_on_resume(adev)) return 0; switch (soc15_asic_reset_method(adev)) { @@ -597,7 +620,7 @@ static int soc15_asic_reset(struct amdgpu_device *adev) } } -static bool soc15_supports_baco(struct amdgpu_device *adev) +static int soc15_supports_baco(struct amdgpu_device *adev) { switch (amdgpu_ip_version(adev, MP1_HWIP, 0)) { case IP_VERSION(9, 0, 0): @@ -605,13 +628,13 @@ static bool soc15_supports_baco(struct amdgpu_device *adev) if (adev->asic_type == CHIP_VEGA20) { if (adev->psp.sos.fw_version >= 0x80067) return amdgpu_dpm_is_baco_supported(adev); - return false; + return 0; } else { return amdgpu_dpm_is_baco_supported(adev); } break; default: - return false; + return 0; } } @@ -646,8 +669,7 @@ static void soc15_program_aspm(struct amdgpu_device *adev) if (!amdgpu_device_should_use_aspm(adev)) return; - if (!(adev->flags & AMD_IS_APU) && - (adev->nbio.funcs->program_aspm)) + if (adev->nbio.funcs->program_aspm) adev->nbio.funcs->program_aspm(adev); } @@ -896,13 +918,13 @@ static const struct amdgpu_asic_funcs aqua_vanjaram_asic_funcs = .get_config_memsize = &soc15_get_config_memsize, .need_full_reset = &soc15_need_full_reset, .init_doorbell_index = &aqua_vanjaram_doorbell_index_init, - .get_pcie_usage = &amdgpu_nbio_get_pcie_usage, .need_reset_on_init = &soc15_need_reset_on_init, .get_pcie_replay_count = &amdgpu_nbio_get_pcie_replay_count, .supports_baco = &soc15_supports_baco, .pre_asic_init = &soc15_pre_asic_init, .query_video_codecs = &soc15_query_video_codecs, .encode_ext_smn_addressing = &aqua_vanjaram_encode_ext_smn_addressing, + .get_reg_state = &aqua_vanjaram_get_reg_state, }; static int soc15_common_early_init(void *handle) @@ -1162,6 +1184,11 @@ static int soc15_common_early_init(void *handle) AMD_PG_SUPPORT_VCN_DPG | AMD_PG_SUPPORT_JPEG; adev->external_rev_id = adev->rev_id + 0x46; + /* GC 9.4.3 uses MMIO register region hole at a different offset */ + if (!amdgpu_sriov_vf(adev)) { + adev->rmmio_remap.reg_offset = 0x1A000; + adev->rmmio_remap.bus_addr = adev->rmmio_base + 0x1A000; + } break; default: /* FIXME: not supported yet */ @@ -1273,7 +1300,8 @@ static int soc15_common_hw_fini(void *handle) if (amdgpu_sriov_vf(adev)) xgpu_ai_mailbox_put_irq(adev); - if (adev->nbio.ras_if && + if ((!amdgpu_sriov_vf(adev)) && + adev->nbio.ras_if && amdgpu_ras_is_supported(adev, adev->nbio.ras_if->block)) { if (adev->nbio.ras && adev->nbio.ras->init_ras_controller_interrupt) @@ -1297,6 +1325,10 @@ static int soc15_common_resume(void *handle) { struct amdgpu_device *adev = (struct amdgpu_device *)handle; + if (soc15_need_reset_on_resume(adev)) { + dev_info(adev->dev, "S3 suspend abort case, let's reset ASIC.\n"); + soc15_asic_reset(adev); + } return soc15_common_hw_init(adev); } @@ -1419,11 +1451,14 @@ static void soc15_common_get_clockgating_state(void *handle, u64 *flags) if (amdgpu_sriov_vf(adev)) *flags = 0; - adev->nbio.funcs->get_clockgating_state(adev, flags); + if (adev->nbio.funcs && adev->nbio.funcs->get_clockgating_state) + adev->nbio.funcs->get_clockgating_state(adev, flags); - adev->hdp.funcs->get_clock_gating_state(adev, flags); + if (adev->hdp.funcs && adev->hdp.funcs->get_clock_gating_state) + adev->hdp.funcs->get_clock_gating_state(adev, flags); - if (amdgpu_ip_version(adev, MP0_HWIP, 0) != IP_VERSION(13, 0, 2)) { + if ((amdgpu_ip_version(adev, MP0_HWIP, 0) != IP_VERSION(13, 0, 2)) && + (amdgpu_ip_version(adev, MP0_HWIP, 0) != IP_VERSION(13, 0, 6))) { /* AMD_CG_SUPPORT_DRM_MGCG */ data = RREG32(SOC15_REG_OFFSET(MP0, 0, mmMP0_MISC_CGTT_CTRL0)); if (!(data & 0x01000000)) @@ -1436,9 +1471,11 @@ static void soc15_common_get_clockgating_state(void *handle, u64 *flags) } /* AMD_CG_SUPPORT_ROM_MGCG */ - adev->smuio.funcs->get_clock_gating_state(adev, flags); + if (adev->smuio.funcs && adev->smuio.funcs->get_clock_gating_state) + adev->smuio.funcs->get_clock_gating_state(adev, flags); - adev->df.funcs->get_clockgating_state(adev, flags); + if (adev->df.funcs && adev->df.funcs->get_clockgating_state) + adev->df.funcs->get_clockgating_state(adev, flags); } static int soc15_common_set_powergating_state(void *handle, diff --git a/drivers/gpu/drm/amd/amdgpu/soc15.h b/drivers/gpu/drm/amd/amdgpu/soc15.h index eac54042c6c0..1444b7765e4b 100644 --- a/drivers/gpu/drm/amd/amdgpu/soc15.h +++ b/drivers/gpu/drm/amd/amdgpu/soc15.h @@ -27,6 +27,7 @@ #include "nbio_v6_1.h" #include "nbio_v7_0.h" #include "nbio_v7_4.h" +#include "amdgpu_reg_state.h" extern const struct amdgpu_ip_block_version vega10_common_ip_block; @@ -114,6 +115,9 @@ int aldebaran_reg_base_init(struct amdgpu_device *adev); void aqua_vanjaram_ip_map_init(struct amdgpu_device *adev); u64 aqua_vanjaram_encode_ext_smn_addressing(int ext_id); int aqua_vanjaram_init_soc_config(struct amdgpu_device *adev); +ssize_t aqua_vanjaram_get_reg_state(struct amdgpu_device *adev, + enum amdgpu_reg_state reg_state, void *buf, + size_t max_size); void vega10_doorbell_index_init(struct amdgpu_device *adev); void vega20_doorbell_index_init(struct amdgpu_device *adev); diff --git a/drivers/gpu/drm/amd/amdgpu/soc15_common.h b/drivers/gpu/drm/amd/amdgpu/soc15_common.h index da683afa0222..242b24f73c17 100644 --- a/drivers/gpu/drm/amd/amdgpu/soc15_common.h +++ b/drivers/gpu/drm/amd/amdgpu/soc15_common.h @@ -69,7 +69,7 @@ #define RREG32_SOC15_IP(ip, reg) __RREG32_SOC15_RLC__(reg, 0, ip##_HWIP, 0) -#define RREG32_SOC15_IP_NO_KIQ(ip, reg) __RREG32_SOC15_RLC__(reg, AMDGPU_REGS_NO_KIQ, ip##_HWIP, 0) +#define RREG32_SOC15_IP_NO_KIQ(ip, reg, inst) __RREG32_SOC15_RLC__(reg, AMDGPU_REGS_NO_KIQ, ip##_HWIP, inst) #define RREG32_SOC15_NO_KIQ(ip, inst, reg) \ __RREG32_SOC15_RLC__(adev->reg_offset[ip##_HWIP][inst][reg##_BASE_IDX] + reg, \ @@ -86,8 +86,8 @@ #define WREG32_SOC15_IP(ip, reg, value) \ __WREG32_SOC15_RLC__(reg, value, 0, ip##_HWIP, 0) -#define WREG32_SOC15_IP_NO_KIQ(ip, reg, value) \ - __WREG32_SOC15_RLC__(reg, value, AMDGPU_REGS_NO_KIQ, ip##_HWIP, 0) +#define WREG32_SOC15_IP_NO_KIQ(ip, reg, value, inst) \ + __WREG32_SOC15_RLC__(reg, value, AMDGPU_REGS_NO_KIQ, ip##_HWIP, inst) #define WREG32_SOC15_NO_KIQ(ip, inst, reg, value) \ __WREG32_SOC15_RLC__(adev->reg_offset[ip##_HWIP][inst][reg##_BASE_IDX] + reg, \ @@ -140,7 +140,7 @@ /* for GC only */ #define RREG32_RLC(reg) \ - __RREG32_SOC15_RLC__(reg, AMDGPU_REGS_RLC, GC_HWIP) + __RREG32_SOC15_RLC__(reg, AMDGPU_REGS_RLC, GC_HWIP, 0) #define WREG32_RLC_NO_KIQ(reg, value, hwip) \ __WREG32_SOC15_RLC__(reg, value, AMDGPU_REGS_NO_KIQ | AMDGPU_REGS_RLC, hwip, 0) @@ -204,4 +204,10 @@ + adev->asic_funcs->encode_ext_smn_addressing(ext), \ value) \ +#define RREG64_MCA(ext, mca_base, idx) \ + RREG64_PCIE_EXT(adev->asic_funcs->encode_ext_smn_addressing(ext) + mca_base + (idx * 8)) + +#define WREG64_MCA(ext, mca_base, idx, val) \ + WREG64_PCIE_EXT(adev->asic_funcs->encode_ext_smn_addressing(ext) + mca_base + (idx * 8), val) + #endif diff --git a/drivers/gpu/drm/amd/amdgpu/soc21.c b/drivers/gpu/drm/amd/amdgpu/soc21.c index df7462cec6ab..43ca63fe85ac 100644 --- a/drivers/gpu/drm/amd/amdgpu/soc21.c +++ b/drivers/gpu/drm/amd/amdgpu/soc21.c @@ -50,13 +50,13 @@ static const struct amd_ip_funcs soc21_common_ip_funcs; /* SOC21 */ static const struct amdgpu_video_codec_info vcn_4_0_0_video_codecs_encode_array_vcn0[] = { {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_MPEG4_AVC, 4096, 2304, 0)}, - {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_HEVC, 4096, 2304, 0)}, + {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_HEVC, 8192, 4352, 0)}, {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_AV1, 8192, 4352, 0)}, }; static const struct amdgpu_video_codec_info vcn_4_0_0_video_codecs_encode_array_vcn1[] = { {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_MPEG4_AVC, 4096, 2304, 0)}, - {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_HEVC, 4096, 2304, 0)}, + {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_HEVC, 8192, 4352, 0)}, }; static const struct amdgpu_video_codecs vcn_4_0_0_video_codecs_encode_vcn0 = { @@ -185,6 +185,12 @@ static int soc21_query_video_codecs(struct amdgpu_device *adev, bool encode, } } return 0; + case IP_VERSION(4, 0, 6): + if (encode) + *codecs = &vcn_4_0_0_video_codecs_encode_vcn0; + else + *codecs = &vcn_4_0_0_video_codecs_decode_vcn0; + return 0; default: return -EINVAL; } @@ -381,6 +387,8 @@ soc21_asic_reset_method(struct amdgpu_device *adev) return AMD_RESET_METHOD_MODE1; case IP_VERSION(13, 0, 4): case IP_VERSION(13, 0, 11): + case IP_VERSION(14, 0, 0): + case IP_VERSION(14, 0, 1): return AMD_RESET_METHOD_MODE2; default: if (amdgpu_dpm_is_baco_supported(adev)) @@ -433,8 +441,7 @@ static void soc21_program_aspm(struct amdgpu_device *adev) if (!amdgpu_device_should_use_aspm(adev)) return; - if (!(adev->flags & AMD_IS_APU) && - (adev->nbio.funcs->program_aspm)) + if (adev->nbio.funcs->program_aspm) adev->nbio.funcs->program_aspm(adev); } @@ -450,10 +457,8 @@ static bool soc21_need_full_reset(struct amdgpu_device *adev) { switch (amdgpu_ip_version(adev, GC_HWIP, 0)) { case IP_VERSION(11, 0, 0): - return amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__UMC); case IP_VERSION(11, 0, 2): case IP_VERSION(11, 0, 3): - return false; default: return true; } @@ -711,10 +716,43 @@ static int soc21_common_early_init(void *handle) AMD_CG_SUPPORT_BIF_MGCG | AMD_CG_SUPPORT_BIF_LS; adev->pg_flags = AMD_PG_SUPPORT_VCN_DPG | + AMD_PG_SUPPORT_JPEG_DPG | AMD_PG_SUPPORT_VCN | AMD_PG_SUPPORT_JPEG | AMD_PG_SUPPORT_GFX_PG; - adev->external_rev_id = adev->rev_id + 0x1; + if (adev->rev_id == 0) + adev->external_rev_id = 0x1; + else + adev->external_rev_id = adev->rev_id + 0x10; + break; + case IP_VERSION(11, 5, 1): + adev->cg_flags = + AMD_CG_SUPPORT_GFX_CGCG | + AMD_CG_SUPPORT_GFX_CGLS | + AMD_CG_SUPPORT_GFX_MGCG | + AMD_CG_SUPPORT_GFX_FGCG | + AMD_CG_SUPPORT_REPEATER_FGCG | + AMD_CG_SUPPORT_GFX_PERF_CLK | + AMD_CG_SUPPORT_GFX_3D_CGCG | + AMD_CG_SUPPORT_GFX_3D_CGLS | + AMD_CG_SUPPORT_MC_MGCG | + AMD_CG_SUPPORT_MC_LS | + AMD_CG_SUPPORT_HDP_LS | + AMD_CG_SUPPORT_HDP_DS | + AMD_CG_SUPPORT_HDP_SD | + AMD_CG_SUPPORT_ATHUB_MGCG | + AMD_CG_SUPPORT_ATHUB_LS | + AMD_CG_SUPPORT_IH_CG | + AMD_CG_SUPPORT_BIF_MGCG | + AMD_CG_SUPPORT_BIF_LS | + AMD_CG_SUPPORT_VCN_MGCG | + AMD_CG_SUPPORT_JPEG_MGCG; + adev->pg_flags = + AMD_PG_SUPPORT_GFX_PG | + AMD_PG_SUPPORT_VCN | + AMD_PG_SUPPORT_VCN_DPG | + AMD_PG_SUPPORT_JPEG; + adev->external_rev_id = adev->rev_id + 0xc1; break; default: /* FIXME: not supported yet */ @@ -832,10 +870,35 @@ static int soc21_common_suspend(void *handle) return soc21_common_hw_fini(adev); } +static bool soc21_need_reset_on_resume(struct amdgpu_device *adev) +{ + u32 sol_reg1, sol_reg2; + + /* Will reset for the following suspend abort cases. + * 1) Only reset dGPU side. + * 2) S3 suspend got aborted and TOS is active. + */ + if (!(adev->flags & AMD_IS_APU) && adev->in_s3 && + !adev->suspend_complete) { + sol_reg1 = RREG32_SOC15(MP0, 0, regMP0_SMN_C2PMSG_81); + msleep(100); + sol_reg2 = RREG32_SOC15(MP0, 0, regMP0_SMN_C2PMSG_81); + + return (sol_reg1 != sol_reg2); + } + + return false; +} + static int soc21_common_resume(void *handle) { struct amdgpu_device *adev = (struct amdgpu_device *)handle; + if (soc21_need_reset_on_resume(adev)) { + dev_info(adev->dev, "S3 suspend aborted, resetting..."); + soc21_asic_reset(adev); + } + return soc21_common_hw_init(adev); } @@ -863,6 +926,9 @@ static int soc21_common_set_clockgating_state(void *handle, case IP_VERSION(4, 3, 0): case IP_VERSION(4, 3, 1): case IP_VERSION(7, 7, 0): + case IP_VERSION(7, 7, 1): + case IP_VERSION(7, 11, 0): + case IP_VERSION(7, 11, 1): adev->nbio.funcs->update_medium_grain_clock_gating(adev, state == AMD_CG_STATE_GATE); adev->nbio.funcs->update_medium_grain_light_sleep(adev, diff --git a/drivers/gpu/drm/amd/amdgpu/ta_ras_if.h b/drivers/gpu/drm/amd/amdgpu/ta_ras_if.h index 879bb7af297c..3ac56a9645eb 100644 --- a/drivers/gpu/drm/amd/amdgpu/ta_ras_if.h +++ b/drivers/gpu/drm/amd/amdgpu/ta_ras_if.h @@ -36,6 +36,9 @@ enum ras_command { TA_RAS_COMMAND__ENABLE_FEATURES = 0, TA_RAS_COMMAND__DISABLE_FEATURES, TA_RAS_COMMAND__TRIGGER_ERROR, + TA_RAS_COMMAND__QUERY_BLOCK_INFO, + TA_RAS_COMMAND__QUERY_SUB_BLOCK_INFO, + TA_RAS_COMMAND__QUERY_ADDRESS, }; enum ta_ras_status { @@ -105,6 +108,11 @@ enum ta_ras_error_type { TA_RAS_ERROR__POISON = 8, }; +enum ta_ras_address_type { + TA_RAS_MCA_TO_PA, + TA_RAS_PA_TO_MCA, +}; + /* Input/output structures for RAS commands */ /**********************************************************/ @@ -133,12 +141,39 @@ struct ta_ras_init_flags { uint8_t channel_dis_num; }; +struct ta_ras_mca_addr { + uint64_t err_addr; + uint32_t ch_inst; + uint32_t umc_inst; + uint32_t node_inst; + uint32_t socket_id; +}; + +struct ta_ras_phy_addr { + uint64_t pa; + uint32_t bank; + uint32_t channel_idx; +}; + +struct ta_ras_query_address_input { + enum ta_ras_address_type addr_type; + struct ta_ras_mca_addr ma; + struct ta_ras_phy_addr pa; +}; + struct ta_ras_output_flags { uint8_t ras_init_success_flag; uint8_t err_inject_switch_disable_flag; uint8_t reg_access_failure_flag; }; +struct ta_ras_query_address_output { + /* don't use the flags here */ + struct ta_ras_output_flags flags; + struct ta_ras_mca_addr ma; + struct ta_ras_phy_addr pa; +}; + /* Common input structure for RAS callbacks */ /**********************************************************/ union ta_ras_cmd_input { @@ -146,12 +181,14 @@ union ta_ras_cmd_input { struct ta_ras_enable_features_input enable_features; struct ta_ras_disable_features_input disable_features; struct ta_ras_trigger_error_input trigger_error; + struct ta_ras_query_address_input address; uint32_t reserve_pad[256]; }; union ta_ras_cmd_output { struct ta_ras_output_flags flags; + struct ta_ras_query_address_output address; uint32_t reserve_pad[256]; }; diff --git a/drivers/gpu/drm/amd/amdgpu/ta_xgmi_if.h b/drivers/gpu/drm/amd/amdgpu/ta_xgmi_if.h index da815a93d46e..d5748032674e 100644 --- a/drivers/gpu/drm/amd/amdgpu/ta_xgmi_if.h +++ b/drivers/gpu/drm/amd/amdgpu/ta_xgmi_if.h @@ -1,5 +1,5 @@ /* - * Copyright 2018 Advanced Micro Devices, Inc. + * Copyright 2018-2022 Advanced Micro Devices, Inc. * * Permission is hereby granted, free of charge, to any person obtaining a * copy of this software and associated documentation files (the "Software"), @@ -20,7 +20,6 @@ * OTHER DEALINGS IN THE SOFTWARE. * */ - #ifndef _TA_XGMI_IF_H #define _TA_XGMI_IF_H @@ -28,20 +27,31 @@ #define RSP_ID_MASK (1U << 31) #define RSP_ID(cmdId) (((uint32_t)(cmdId)) | RSP_ID_MASK) +#define EXTEND_PEER_LINK_INFO_CMD_FLAG 1 + enum ta_command_xgmi { + /* Initialize the Context and Session Topology */ TA_COMMAND_XGMI__INITIALIZE = 0x00, + /* Gets the current GPU's node ID */ TA_COMMAND_XGMI__GET_NODE_ID = 0x01, + /* Gets the current GPU's hive ID */ TA_COMMAND_XGMI__GET_HIVE_ID = 0x02, - TA_COMMAND_XGMI__GET_GET_TOPOLOGY_INFO = 0x03, + /* Gets the Peer's topology Information */ + TA_COMMAND_XGMI__GET_TOPOLOGY_INFO = 0x03, + /* Sets the Peer's topology Information */ TA_COMMAND_XGMI__SET_TOPOLOGY_INFO = 0x04, - TA_COMMAND_XGMI__GET_PEER_LINKS = 0x0B + /* Gets the total links between adjacent peer dies in hive */ + TA_COMMAND_XGMI__GET_PEER_LINKS = 0x0B, + /* Gets the total links and connected port numbers between adjacent peer dies in hive */ + TA_COMMAND_XGMI__GET_EXTEND_PEER_LINKS = 0x0C }; /* XGMI related enumerations */ /**********************************************************/; -enum ta_xgmi_connected_nodes { - TA_XGMI__MAX_CONNECTED_NODES = 64 -}; +enum { TA_XGMI__MAX_CONNECTED_NODES = 64 }; +enum { TA_XGMI__MAX_INTERNAL_STATE = 32 }; +enum { TA_XGMI__MAX_INTERNAL_STATE_BUFFER = 128 }; +enum { TA_XGMI__MAX_PORT_NUM = 8 }; enum ta_xgmi_status { TA_XGMI_STATUS__SUCCESS = 0x00, @@ -81,6 +91,18 @@ struct ta_xgmi_peer_link_info { uint8_t num_links; }; +struct xgmi_connected_port_num { + uint8_t dst_xgmi_port_num; + uint8_t src_xgmi_port_num; +}; + +/* support both the port num and num_links */ +struct ta_xgmi_extend_peer_link_info { + uint64_t node_id; + uint8_t num_links; + struct xgmi_connected_port_num port_num[TA_XGMI__MAX_PORT_NUM]; +}; + struct ta_xgmi_cmd_initialize_output { uint32_t status; }; @@ -103,16 +125,21 @@ struct ta_xgmi_cmd_get_topology_info_output { struct ta_xgmi_node_info nodes[TA_XGMI__MAX_CONNECTED_NODES]; }; -struct ta_xgmi_cmd_get_peer_link_info_output { +struct ta_xgmi_cmd_set_topology_info_input { uint32_t num_nodes; - struct ta_xgmi_peer_link_info nodes[TA_XGMI__MAX_CONNECTED_NODES]; + struct ta_xgmi_node_info nodes[TA_XGMI__MAX_CONNECTED_NODES]; }; -struct ta_xgmi_cmd_set_topology_info_input { +/* support XGMI TA w/ and w/o port_num both so two similar structs defined */ +struct ta_xgmi_cmd_get_peer_link_info { uint32_t num_nodes; - struct ta_xgmi_node_info nodes[TA_XGMI__MAX_CONNECTED_NODES]; + struct ta_xgmi_peer_link_info nodes[TA_XGMI__MAX_CONNECTED_NODES]; }; +struct ta_xgmi_cmd_get_extend_peer_link_info { + uint32_t num_nodes; + struct ta_xgmi_extend_peer_link_info nodes[TA_XGMI__MAX_CONNECTED_NODES]; +}; /**********************************************************/ /* Common input structure for XGMI callbacks */ union ta_xgmi_cmd_input { @@ -126,16 +153,23 @@ union ta_xgmi_cmd_output { struct ta_xgmi_cmd_get_node_id_output get_node_id; struct ta_xgmi_cmd_get_hive_id_output get_hive_id; struct ta_xgmi_cmd_get_topology_info_output get_topology_info; - struct ta_xgmi_cmd_get_peer_link_info_output get_link_info; + struct ta_xgmi_cmd_get_peer_link_info get_link_info; + struct ta_xgmi_cmd_get_extend_peer_link_info get_extend_link_info; }; -/**********************************************************/ struct ta_xgmi_shared_memory { uint32_t cmd_id; uint32_t resp_id; enum ta_xgmi_status xgmi_status; + + /* if the number of xgmi link record is more than 128, driver will set the + * flag 0 to get the first 128 of the link records and will set to 1, to get + * the second set + */ uint8_t flag_extend_link_record; - uint8_t reserved0[3]; + /* bit0: port_num info support flag for GET_EXTEND_PEER_LINKS commmand */ + uint8_t caps_flag; + uint8_t reserved[2]; union ta_xgmi_cmd_input xgmi_in_message; union ta_xgmi_cmd_output xgmi_out_message; }; diff --git a/drivers/gpu/drm/amd/amdgpu/tonga_ih.c b/drivers/gpu/drm/amd/amdgpu/tonga_ih.c index 917707bba7f3..450b6e831509 100644 --- a/drivers/gpu/drm/amd/amdgpu/tonga_ih.c +++ b/drivers/gpu/drm/amd/amdgpu/tonga_ih.c @@ -219,6 +219,12 @@ static u32 tonga_ih_get_wptr(struct amdgpu_device *adev, tmp = REG_SET_FIELD(tmp, IH_RB_CNTL, WPTR_OVERFLOW_CLEAR, 1); WREG32(mmIH_RB_CNTL, tmp); + /* Unset the CLEAR_OVERFLOW bit immediately so new overflows + * can be detected. + */ + tmp = REG_SET_FIELD(tmp, IH_RB_CNTL, WPTR_OVERFLOW_CLEAR, 0); + WREG32(mmIH_RB_CNTL, tmp); + out: return (wptr & ih->ptr_mask); } diff --git a/drivers/gpu/drm/amd/amdgpu/umc_v12_0.c b/drivers/gpu/drm/amd/amdgpu/umc_v12_0.c index b664ee3ee92d..a0122b22eda4 100644 --- a/drivers/gpu/drm/amd/amdgpu/umc_v12_0.c +++ b/drivers/gpu/drm/amd/amdgpu/umc_v12_0.c @@ -26,28 +26,7 @@ #include "amdgpu.h" #include "umc/umc_12_0_0_offset.h" #include "umc/umc_12_0_0_sh_mask.h" - -const uint32_t - umc_v12_0_channel_idx_tbl[] - [UMC_V12_0_UMC_INSTANCE_NUM] - [UMC_V12_0_CHANNEL_INSTANCE_NUM] = { - {{3, 7, 11, 15, 2, 6, 10, 14}, {1, 5, 9, 13, 0, 4, 8, 12}, - {19, 23, 27, 31, 18, 22, 26, 30}, {17, 21, 25, 29, 16, 20, 24, 28}}, - {{47, 43, 39, 35, 46, 42, 38, 34}, {45, 41, 37, 33, 44, 40, 36, 32}, - {63, 59, 55, 51, 62, 58, 54, 50}, {61, 57, 53, 49, 60, 56, 52, 48}}, - {{79, 75, 71, 67, 78, 74, 70, 66}, {77, 73, 69, 65, 76, 72, 68, 64}, - {95, 91, 87, 83, 94, 90, 86, 82}, {93, 89, 85, 81, 92, 88, 84, 80}}, - {{99, 103, 107, 111, 98, 102, 106, 110}, {97, 101, 105, 109, 96, 100, 104, 108}, - {115, 119, 123, 127, 114, 118, 122, 126}, {113, 117, 121, 125, 112, 116, 120, 124}} - }; - -/* mapping of MCA error address to normalized address */ -static const uint32_t umc_v12_0_ma2na_mapping[] = { - 0, 5, 6, 8, 9, 14, 12, 13, - 10, 11, 15, 16, 17, 18, 19, 20, - 21, 22, 23, 24, 25, 26, 27, 28, - 24, 7, 29, 30, -}; +#include "mp/mp_13_0_6_sh_mask.h" static inline uint64_t get_umc_v12_0_reg_offset(struct amdgpu_device *adev, uint32_t node_inst, @@ -88,30 +67,54 @@ static void umc_v12_0_reset_error_count(struct amdgpu_device *adev) umc_v12_0_reset_error_count_per_channel, NULL); } -static void umc_v12_0_query_correctable_error_count(struct amdgpu_device *adev, - uint64_t umc_reg_offset, - unsigned long *error_count) +bool umc_v12_0_is_deferred_error(struct amdgpu_device *adev, uint64_t mc_umc_status) { - uint64_t mc_umc_status; - uint64_t mc_umc_status_addr; + dev_info(adev->dev, + "MCA_UMC_STATUS(0x%llx): Val:%llu, Poison:%llu, Deferred:%llu, PCC:%llu, UC:%llu, TCC:%llu\n", + mc_umc_status, + REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, Val), + REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, Poison), + REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, Deferred), + REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, PCC), + REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, UC), + REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, TCC) + ); + + return (amdgpu_ras_is_poison_mode_supported(adev) && + (REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, Val) == 1) && + (REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, Deferred) == 1)); +} - mc_umc_status_addr = - SOC15_REG_OFFSET(UMC, 0, regMCA_UMC_UMC0_MCUMC_STATUST0); +bool umc_v12_0_is_uncorrectable_error(struct amdgpu_device *adev, uint64_t mc_umc_status) +{ + if (umc_v12_0_is_deferred_error(adev, mc_umc_status)) + return false; - /* Rely on MCUMC_STATUS for correctable error counter - * MCUMC_STATUS is a 64 bit register - */ - mc_umc_status = - RREG64_PCIE_EXT((mc_umc_status_addr + umc_reg_offset) * 4); + return ((REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, Val) == 1) && + (REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, PCC) == 1 || + REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, UC) == 1 || + REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, TCC) == 1)); +} - if (REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, Val) == 1 && - REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, CECC) == 1) - *error_count += 1; +bool umc_v12_0_is_correctable_error(struct amdgpu_device *adev, uint64_t mc_umc_status) +{ + if (umc_v12_0_is_deferred_error(adev, mc_umc_status)) + return false; + + return (REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, Val) == 1 && + (REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, CECC) == 1 || + (REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, UECC) == 1 && + REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, UC) == 0) || + /* Identify data parity error in replay mode */ + ((REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, ErrorCodeExt) == 0x5 || + REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, ErrorCodeExt) == 0xb) && + !(umc_v12_0_is_uncorrectable_error(adev, mc_umc_status))))); } -static void umc_v12_0_query_uncorrectable_error_count(struct amdgpu_device *adev, - uint64_t umc_reg_offset, - unsigned long *error_count) +static void umc_v12_0_query_error_count_per_type(struct amdgpu_device *adev, + uint64_t umc_reg_offset, + unsigned long *error_count, + check_error_type_func error_type_func) { uint64_t mc_umc_status; uint64_t mc_umc_status_addr; @@ -119,16 +122,11 @@ static void umc_v12_0_query_uncorrectable_error_count(struct amdgpu_device *adev mc_umc_status_addr = SOC15_REG_OFFSET(UMC, 0, regMCA_UMC_UMC0_MCUMC_STATUST0); - /* Check the MCUMC_STATUS. */ + /* Check MCUMC_STATUS */ mc_umc_status = RREG64_PCIE_EXT((mc_umc_status_addr + umc_reg_offset) * 4); - if ((REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, Val) == 1) && - (REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, Deferred) == 1 || - REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, UECC) == 1 || - REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, PCC) == 1 || - REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, UC) == 1 || - REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, TCC) == 1)) + if (error_type_func(adev, mc_umc_status)) *error_count += 1; } @@ -137,7 +135,7 @@ static int umc_v12_0_query_error_count(struct amdgpu_device *adev, uint32_t ch_inst, void *data) { struct ras_err_data *err_data = (struct ras_err_data *)data; - unsigned long ue_count = 0, ce_count = 0; + unsigned long ue_count = 0, ce_count = 0, de_count = 0; /* NOTE: node_inst is converted by adev->umc.active_mask and the range is [0-3], * which can be used as die ID directly */ @@ -149,11 +147,16 @@ static int umc_v12_0_query_error_count(struct amdgpu_device *adev, uint64_t umc_reg_offset = get_umc_v12_0_reg_offset(adev, node_inst, umc_inst, ch_inst); - umc_v12_0_query_correctable_error_count(adev, umc_reg_offset, &ce_count); - umc_v12_0_query_uncorrectable_error_count(adev, umc_reg_offset, &ue_count); + umc_v12_0_query_error_count_per_type(adev, umc_reg_offset, + &ce_count, umc_v12_0_is_correctable_error); + umc_v12_0_query_error_count_per_type(adev, umc_reg_offset, + &ue_count, umc_v12_0_is_uncorrectable_error); + umc_v12_0_query_error_count_per_type(adev, umc_reg_offset, + &de_count, umc_v12_0_is_deferred_error); - amdgpu_ras_error_statistic_ue_count(err_data, &mcm_info, ue_count); - amdgpu_ras_error_statistic_ce_count(err_data, &mcm_info, ce_count); + amdgpu_ras_error_statistic_ue_count(err_data, &mcm_info, NULL, ue_count); + amdgpu_ras_error_statistic_ce_count(err_data, &mcm_info, NULL, ce_count); + amdgpu_ras_error_statistic_de_count(err_data, &mcm_info, NULL, de_count); return 0; } @@ -167,80 +170,35 @@ static void umc_v12_0_query_ras_error_count(struct amdgpu_device *adev, umc_v12_0_reset_error_count(adev); } -static bool umc_v12_0_bit_wise_xor(uint32_t val) +static void umc_v12_0_convert_error_address(struct amdgpu_device *adev, + struct ras_err_data *err_data, + struct ta_ras_query_address_input *addr_in) { - bool result = 0; - int i; + uint32_t col, row, row_xor, bank, channel_index; + uint64_t soc_pa, retired_page, column, err_addr; + struct ta_ras_query_address_output addr_out; - for (i = 0; i < 32; i++) - result = result ^ ((val >> i) & 0x1); + err_addr = addr_in->ma.err_addr; + addr_in->addr_type = TA_RAS_MCA_TO_PA; + if (psp_ras_query_address(&adev->psp, addr_in, &addr_out)) { + dev_warn(adev->dev, "Failed to query RAS physical address for 0x%llx", + err_addr); - return result; -} + return; + } + + soc_pa = addr_out.pa.pa; + bank = addr_out.pa.bank; + channel_index = addr_out.pa.channel_idx; -static void umc_v12_0_convert_error_address(struct amdgpu_device *adev, - struct ras_err_data *err_data, uint64_t err_addr, - uint32_t ch_inst, uint32_t umc_inst, - uint32_t node_inst) -{ - uint32_t channel_index, i; - uint64_t soc_pa, na, retired_page, column; - uint32_t bank_hash0, bank_hash1, bank_hash2, bank_hash3, col, row, row_xor; - uint32_t bank0, bank1, bank2, bank3, bank; - - bank_hash0 = (err_addr >> UMC_V12_0_MCA_B0_BIT) & 0x1ULL; - bank_hash1 = (err_addr >> UMC_V12_0_MCA_B1_BIT) & 0x1ULL; - bank_hash2 = (err_addr >> UMC_V12_0_MCA_B2_BIT) & 0x1ULL; - bank_hash3 = (err_addr >> UMC_V12_0_MCA_B3_BIT) & 0x1ULL; col = (err_addr >> 1) & 0x1fULL; row = (err_addr >> 10) & 0x3fffULL; - - /* apply bank hash algorithm */ - bank0 = - bank_hash0 ^ (UMC_V12_0_XOR_EN0 & - (umc_v12_0_bit_wise_xor(col & UMC_V12_0_COL_XOR0) ^ - (umc_v12_0_bit_wise_xor(row & UMC_V12_0_ROW_XOR0)))); - bank1 = - bank_hash1 ^ (UMC_V12_0_XOR_EN1 & - (umc_v12_0_bit_wise_xor(col & UMC_V12_0_COL_XOR1) ^ - (umc_v12_0_bit_wise_xor(row & UMC_V12_0_ROW_XOR1)))); - bank2 = - bank_hash2 ^ (UMC_V12_0_XOR_EN2 & - (umc_v12_0_bit_wise_xor(col & UMC_V12_0_COL_XOR2) ^ - (umc_v12_0_bit_wise_xor(row & UMC_V12_0_ROW_XOR2)))); - bank3 = - bank_hash3 ^ (UMC_V12_0_XOR_EN3 & - (umc_v12_0_bit_wise_xor(col & UMC_V12_0_COL_XOR3) ^ - (umc_v12_0_bit_wise_xor(row & UMC_V12_0_ROW_XOR3)))); - - bank = bank0 | (bank1 << 1) | (bank2 << 2) | (bank3 << 3); - err_addr &= ~0x3c0ULL; - err_addr |= (bank << UMC_V12_0_MCA_B0_BIT); - - na = 0x0; - /* convert mca error address to normalized address */ - for (i = 1; i < ARRAY_SIZE(umc_v12_0_ma2na_mapping); i++) - na |= ((err_addr >> i) & 0x1ULL) << umc_v12_0_ma2na_mapping[i]; - - channel_index = - adev->umc.channel_idx_tbl[node_inst * adev->umc.umc_inst_num * - adev->umc.channel_inst_num + - umc_inst * adev->umc.channel_inst_num + - ch_inst]; - /* translate umc channel address to soc pa, 3 parts are included */ - soc_pa = ADDR_OF_32KB_BLOCK(na) | - ADDR_OF_256B_BLOCK(channel_index) | - OFFSET_IN_256B_BLOCK(na); - - /* the umc channel bits are not original values, they are hashed */ - UMC_V12_0_SET_CHANNEL_HASH(channel_index, soc_pa); - + row_xor = row ^ (0x1ULL << 13); /* clear [C3 C2] in soc physical address */ soc_pa &= ~(0x3ULL << UMC_V12_0_PA_C2_BIT); /* clear [C4] in soc physical address */ soc_pa &= ~(0x1ULL << UMC_V12_0_PA_C4_BIT); - row_xor = row ^ (0x1ULL << 13); /* loop for all possibilities of [C4 C3 C2] */ for (column = 0; column < UMC_V12_0_NA_MAP_PA_NUM; column++) { retired_page = soc_pa | ((column & 0x3) << UMC_V12_0_PA_C2_BIT); @@ -252,7 +210,7 @@ static void umc_v12_0_convert_error_address(struct amdgpu_device *adev, "Error Address(PA):0x%-10llx Row:0x%-4x Col:0x%-2x Bank:0x%x Channel:0x%x\n", retired_page, row, col, bank, channel_index); amdgpu_umc_fill_error_record(err_data, err_addr, - retired_page, channel_index, umc_inst); + retired_page, channel_index, addr_in->ma.umc_inst); /* shift R13 bit */ retired_page ^= (0x1ULL << UMC_V12_0_PA_R13_BIT); @@ -260,7 +218,7 @@ static void umc_v12_0_convert_error_address(struct amdgpu_device *adev, "Error Address(PA):0x%-10llx Row:0x%-4x Col:0x%-2x Bank:0x%x Channel:0x%x\n", retired_page, row_xor, col, bank, channel_index); amdgpu_umc_fill_error_record(err_data, err_addr, - retired_page, channel_index, umc_inst); + retired_page, channel_index, addr_in->ma.umc_inst); } } @@ -268,10 +226,11 @@ static int umc_v12_0_query_error_address(struct amdgpu_device *adev, uint32_t node_inst, uint32_t umc_inst, uint32_t ch_inst, void *data) { + struct ras_err_data *err_data = (struct ras_err_data *)data; + struct ta_ras_query_address_input addr_in; uint64_t mc_umc_status_addr; uint64_t mc_umc_status, err_addr; uint64_t mc_umc_addrt0; - struct ras_err_data *err_data = (struct ras_err_data *)data; uint64_t umc_reg_offset = get_umc_v12_0_reg_offset(adev, node_inst, umc_inst, ch_inst); @@ -291,10 +250,8 @@ static int umc_v12_0_query_error_address(struct amdgpu_device *adev, } /* calculate error address if ue error is detected */ - if (REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, Val) == 1 && - REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, AddrV) == 1 && - REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, UECC) == 1) { - + if (umc_v12_0_is_uncorrectable_error(adev, mc_umc_status) || + umc_v12_0_is_deferred_error(adev, mc_umc_status)) { mc_umc_addrt0 = SOC15_REG_OFFSET(UMC, 0, regMCA_UMC_UMC0_MCUMC_ADDRT0); @@ -302,8 +259,19 @@ static int umc_v12_0_query_error_address(struct amdgpu_device *adev, err_addr = REG_GET_FIELD(err_addr, MCA_UMC_UMC0_MCUMC_ADDRT0, ErrorAddr); - umc_v12_0_convert_error_address(adev, err_data, err_addr, - ch_inst, umc_inst, node_inst); + if (!adev->aid_mask && + adev->smuio.funcs && + adev->smuio.funcs->get_socket_id) + addr_in.ma.socket_id = adev->smuio.funcs->get_socket_id(adev); + else + addr_in.ma.socket_id = 0; + + addr_in.ma.err_addr = err_addr; + addr_in.ma.ch_inst = ch_inst; + addr_in.ma.umc_inst = umc_inst; + addr_in.ma.node_inst = node_inst; + + umc_v12_0_convert_error_address(adev, err_data, &addr_in); } /* clear umc status */ @@ -346,6 +314,94 @@ static int umc_v12_0_err_cnt_init_per_channel(struct amdgpu_device *adev, return 0; } +static void umc_v12_0_ecc_info_query_ras_error_count(struct amdgpu_device *adev, + void *ras_error_status) +{ + struct ras_query_context qctx; + + memset(&qctx, 0, sizeof(qctx)); + qctx.event_id = amdgpu_ras_acquire_event_id(adev, amdgpu_ras_intr_triggered() ? + RAS_EVENT_TYPE_ISR : RAS_EVENT_TYPE_INVALID); + + amdgpu_mca_smu_log_ras_error(adev, + AMDGPU_RAS_BLOCK__UMC, AMDGPU_MCA_ERROR_TYPE_CE, ras_error_status, &qctx); + amdgpu_mca_smu_log_ras_error(adev, + AMDGPU_RAS_BLOCK__UMC, AMDGPU_MCA_ERROR_TYPE_UE, ras_error_status, &qctx); +} + +static void umc_v12_0_ecc_info_query_ras_error_address(struct amdgpu_device *adev, + void *ras_error_status) +{ + struct ras_err_node *err_node; + uint64_t mc_umc_status; + struct ras_err_info *err_info; + struct ras_err_addr *mca_err_addr, *tmp; + struct ras_err_data *err_data = (struct ras_err_data *)ras_error_status; + struct ta_ras_query_address_input addr_in; + + for_each_ras_error(err_node, err_data) { + err_info = &err_node->err_info; + if (list_empty(&err_info->err_addr_list)) + continue; + + addr_in.ma.node_inst = err_info->mcm_info.die_id; + addr_in.ma.socket_id = err_info->mcm_info.socket_id; + + list_for_each_entry_safe(mca_err_addr, tmp, &err_info->err_addr_list, node) { + mc_umc_status = mca_err_addr->err_status; + if (mc_umc_status && + (umc_v12_0_is_uncorrectable_error(adev, mc_umc_status) || + umc_v12_0_is_deferred_error(adev, mc_umc_status))) { + uint64_t mca_addr, err_addr, mca_ipid; + uint32_t InstanceIdLo; + + mca_addr = mca_err_addr->err_addr; + mca_ipid = mca_err_addr->err_ipid; + + err_addr = REG_GET_FIELD(mca_addr, + MCA_UMC_UMC0_MCUMC_ADDRT0, ErrorAddr); + InstanceIdLo = REG_GET_FIELD(mca_ipid, MCMP1_IPIDT0, InstanceIdLo); + + addr_in.ma.err_addr = err_addr; + addr_in.ma.ch_inst = MCA_IPID_LO_2_UMC_CH(InstanceIdLo); + addr_in.ma.umc_inst = MCA_IPID_LO_2_UMC_INST(InstanceIdLo); + + dev_info(adev->dev, "UMC:IPID:0x%llx, aid:%d, inst:%d, ch:%d, err_addr:0x%llx\n", + mca_ipid, + err_info->mcm_info.die_id, + MCA_IPID_LO_2_UMC_INST(InstanceIdLo), + MCA_IPID_LO_2_UMC_CH(InstanceIdLo), + err_addr); + + umc_v12_0_convert_error_address(adev, + err_data, &addr_in); + } + + /* Delete error address node from list and free memory */ + amdgpu_ras_del_mca_err_addr(err_info, mca_err_addr); + } + } +} + +static bool umc_v12_0_check_ecc_err_status(struct amdgpu_device *adev, + enum amdgpu_mca_error_type type, void *ras_error_status) +{ + uint64_t mc_umc_status = *(uint64_t *)ras_error_status; + + switch (type) { + case AMDGPU_MCA_ERROR_TYPE_UE: + return umc_v12_0_is_uncorrectable_error(adev, mc_umc_status); + case AMDGPU_MCA_ERROR_TYPE_CE: + return umc_v12_0_is_correctable_error(adev, mc_umc_status); + case AMDGPU_MCA_ERROR_TYPE_DE: + return umc_v12_0_is_deferred_error(adev, mc_umc_status); + default: + return false; + } + + return false; +} + static void umc_v12_0_err_cnt_init(struct amdgpu_device *adev) { amdgpu_umc_loop_channels(adev, @@ -366,10 +422,72 @@ const struct amdgpu_ras_block_hw_ops umc_v12_0_ras_hw_ops = { .query_ras_error_address = umc_v12_0_query_ras_error_address, }; +static int umc_v12_0_aca_bank_parser(struct aca_handle *handle, struct aca_bank *bank, + enum aca_smu_type type, void *data) +{ + struct amdgpu_device *adev = handle->adev; + struct aca_bank_info info; + enum aca_error_type err_type; + u64 status, count; + u32 ext_error_code; + int ret; + + status = bank->regs[ACA_REG_IDX_STATUS]; + if (umc_v12_0_is_deferred_error(adev, status)) + err_type = ACA_ERROR_TYPE_DEFERRED; + else if (umc_v12_0_is_uncorrectable_error(adev, status)) + err_type = ACA_ERROR_TYPE_UE; + else if (umc_v12_0_is_correctable_error(adev, status)) + err_type = ACA_ERROR_TYPE_CE; + else + return 0; + + ret = aca_bank_info_decode(bank, &info); + if (ret) + return ret; + + ext_error_code = ACA_REG__STATUS__ERRORCODEEXT(status); + count = ext_error_code == 0 ? + ACA_REG__MISC0__ERRCNT(bank->regs[ACA_REG_IDX_MISC0]) : 1ULL; + + return aca_error_cache_log_bank_error(handle, &info, err_type, count); +} + +static const struct aca_bank_ops umc_v12_0_aca_bank_ops = { + .aca_bank_parser = umc_v12_0_aca_bank_parser, +}; + +const struct aca_info umc_v12_0_aca_info = { + .hwip = ACA_HWIP_TYPE_UMC, + .mask = ACA_ERROR_UE_MASK | ACA_ERROR_CE_MASK | ACA_ERROR_DEFERRED_MASK, + .bank_ops = &umc_v12_0_aca_bank_ops, +}; + +static int umc_v12_0_ras_late_init(struct amdgpu_device *adev, struct ras_common_if *ras_block) +{ + int ret; + + ret = amdgpu_umc_ras_late_init(adev, ras_block); + if (ret) + return ret; + + ret = amdgpu_ras_bind_aca(adev, AMDGPU_RAS_BLOCK__UMC, + &umc_v12_0_aca_info, NULL); + if (ret) + return ret; + + return 0; +} + struct amdgpu_umc_ras umc_v12_0_ras = { .ras_block = { .hw_ops = &umc_v12_0_ras_hw_ops, + .ras_late_init = umc_v12_0_ras_late_init, }, .err_cnt_init = umc_v12_0_err_cnt_init, .query_ras_poison_mode = umc_v12_0_query_ras_poison_mode, + .ecc_info_query_ras_error_count = umc_v12_0_ecc_info_query_ras_error_count, + .ecc_info_query_ras_error_address = umc_v12_0_ecc_info_query_ras_error_address, + .check_ecc_err_status = umc_v12_0_check_ecc_err_status, }; + diff --git a/drivers/gpu/drm/amd/amdgpu/umc_v12_0.h b/drivers/gpu/drm/amd/amdgpu/umc_v12_0.h index 4885b9fff272..1d5f44dcffdd 100644 --- a/drivers/gpu/drm/amd/amdgpu/umc_v12_0.h +++ b/drivers/gpu/drm/amd/amdgpu/umc_v12_0.h @@ -55,72 +55,22 @@ #define UMC_V12_0_NA_MAP_PA_NUM 8 /* R13 bit shift should be considered, double the number */ #define UMC_V12_0_BAD_PAGE_NUM_PER_CHANNEL (UMC_V12_0_NA_MAP_PA_NUM * 2) -/* bank bits in MCA error address */ -#define UMC_V12_0_MCA_B0_BIT 6 -#define UMC_V12_0_MCA_B1_BIT 7 -#define UMC_V12_0_MCA_B2_BIT 8 -#define UMC_V12_0_MCA_B3_BIT 9 + /* column bits in SOC physical address */ #define UMC_V12_0_PA_C2_BIT 15 #define UMC_V12_0_PA_C4_BIT 21 /* row bits in SOC physical address */ #define UMC_V12_0_PA_R13_BIT 35 -/* channel index bits in SOC physical address */ -#define UMC_V12_0_PA_CH4_BIT 12 -#define UMC_V12_0_PA_CH5_BIT 13 -#define UMC_V12_0_PA_CH6_BIT 14 - -/* bank hash settings */ -#define UMC_V12_0_XOR_EN0 1 -#define UMC_V12_0_XOR_EN1 1 -#define UMC_V12_0_XOR_EN2 1 -#define UMC_V12_0_XOR_EN3 1 -#define UMC_V12_0_COL_XOR0 0x0 -#define UMC_V12_0_COL_XOR1 0x0 -#define UMC_V12_0_COL_XOR2 0x800 -#define UMC_V12_0_COL_XOR3 0x1000 -#define UMC_V12_0_ROW_XOR0 0x11111 -#define UMC_V12_0_ROW_XOR1 0x22222 -#define UMC_V12_0_ROW_XOR2 0x4444 -#define UMC_V12_0_ROW_XOR3 0x8888 -/* channel hash settings */ -#define UMC_V12_0_HASH_4K 0 -#define UMC_V12_0_HASH_64K 1 -#define UMC_V12_0_HASH_2M 1 -#define UMC_V12_0_HASH_1G 1 -#define UMC_V12_0_HASH_1T 1 +#define MCA_IPID_LO_2_UMC_CH(_ipid_lo) (((((_ipid_lo) >> 20) & 0x1) * 4) + \ + (((_ipid_lo) >> 12) & 0xF)) +#define MCA_IPID_LO_2_UMC_INST(_ipid_lo) (((_ipid_lo) >> 21) & 0x7) -/* XOR some bits of PA into CH4~CH6 bits (bits 12~14 of PA), - * hash bit is only effective when related setting is enabled - */ -#define UMC_V12_0_CHANNEL_HASH_CH4(channel_idx, pa) ((((channel_idx) >> 5) & 0x1) ^ \ - (((pa) >> 20) & 0x1ULL & UMC_V12_0_HASH_64K) ^ \ - (((pa) >> 27) & 0x1ULL & UMC_V12_0_HASH_2M) ^ \ - (((pa) >> 34) & 0x1ULL & UMC_V12_0_HASH_1G) ^ \ - (((pa) >> 41) & 0x1ULL & UMC_V12_0_HASH_1T)) -#define UMC_V12_0_CHANNEL_HASH_CH5(channel_idx, pa) ((((channel_idx) >> 6) & 0x1) ^ \ - (((pa) >> 21) & 0x1ULL & UMC_V12_0_HASH_64K) ^ \ - (((pa) >> 28) & 0x1ULL & UMC_V12_0_HASH_2M) ^ \ - (((pa) >> 35) & 0x1ULL & UMC_V12_0_HASH_1G) ^ \ - (((pa) >> 42) & 0x1ULL & UMC_V12_0_HASH_1T)) -#define UMC_V12_0_CHANNEL_HASH_CH6(channel_idx, pa) ((((channel_idx) >> 4) & 0x1) ^ \ - (((pa) >> 19) & 0x1ULL & UMC_V12_0_HASH_64K) ^ \ - (((pa) >> 26) & 0x1ULL & UMC_V12_0_HASH_2M) ^ \ - (((pa) >> 33) & 0x1ULL & UMC_V12_0_HASH_1G) ^ \ - (((pa) >> 40) & 0x1ULL & UMC_V12_0_HASH_1T) ^ \ - (((pa) >> 47) & 0x1ULL & UMC_V12_0_HASH_4K)) -#define UMC_V12_0_SET_CHANNEL_HASH(channel_idx, pa) do { \ - (pa) &= ~(0x7ULL << UMC_V12_0_PA_CH4_BIT); \ - (pa) |= (UMC_V12_0_CHANNEL_HASH_CH4(channel_idx, pa) << UMC_V12_0_PA_CH4_BIT); \ - (pa) |= (UMC_V12_0_CHANNEL_HASH_CH5(channel_idx, pa) << UMC_V12_0_PA_CH5_BIT); \ - (pa) |= (UMC_V12_0_CHANNEL_HASH_CH6(channel_idx, pa) << UMC_V12_0_PA_CH6_BIT); \ - } while (0) +bool umc_v12_0_is_deferred_error(struct amdgpu_device *adev, uint64_t mc_umc_status); +bool umc_v12_0_is_uncorrectable_error(struct amdgpu_device *adev, uint64_t mc_umc_status); +bool umc_v12_0_is_correctable_error(struct amdgpu_device *adev, uint64_t mc_umc_status); -extern const uint32_t - umc_v12_0_channel_idx_tbl[] - [UMC_V12_0_UMC_INSTANCE_NUM] - [UMC_V12_0_CHANNEL_INSTANCE_NUM]; +typedef bool (*check_error_type_func)(struct amdgpu_device *adev, uint64_t mc_umc_status); extern struct amdgpu_umc_ras umc_v12_0_ras; diff --git a/drivers/gpu/drm/amd/amdgpu/umc_v6_0.c b/drivers/gpu/drm/amd/amdgpu/umc_v6_0.c index 0d6b50528d76..97fa88ed770c 100644 --- a/drivers/gpu/drm/amd/amdgpu/umc_v6_0.c +++ b/drivers/gpu/drm/amd/amdgpu/umc_v6_0.c @@ -25,7 +25,7 @@ static void umc_v6_0_init_registers(struct amdgpu_device *adev) { - unsigned i,j; + unsigned i, j; for (i = 0; i < 4; i++) for (j = 0; j < 4; j++) diff --git a/drivers/gpu/drm/amd/amdgpu/umc_v6_7.c b/drivers/gpu/drm/amd/amdgpu/umc_v6_7.c index 530549314ce4..a3ee3c4c650f 100644 --- a/drivers/gpu/drm/amd/amdgpu/umc_v6_7.c +++ b/drivers/gpu/drm/amd/amdgpu/umc_v6_7.c @@ -64,7 +64,7 @@ static void umc_v6_7_query_error_status_helper(struct amdgpu_device *adev, uint64_t reg_value; if (REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, Deferred) == 1) - dev_info(adev->dev, "Deferred error, no user action is needed.\n"); + dev_info(adev->dev, "Deferred error\n"); if (mc_umc_status) dev_info(adev->dev, "MCA STATUS 0x%llx, umc_reg_offset 0x%x\n", mc_umc_status, umc_reg_offset); diff --git a/drivers/gpu/drm/amd/amdgpu/umc_v8_10.c b/drivers/gpu/drm/amd/amdgpu/umc_v8_10.c index 46bfdee79bfd..a32f87992f20 100644 --- a/drivers/gpu/drm/amd/amdgpu/umc_v8_10.c +++ b/drivers/gpu/drm/amd/amdgpu/umc_v8_10.c @@ -336,7 +336,7 @@ static void umc_v8_10_ecc_info_query_correctable_error_count(struct amdgpu_devic uint32_t node_inst, uint32_t umc_inst, uint32_t ch_inst, unsigned long *error_count) { - uint64_t mc_umc_status; + uint16_t ecc_ce_cnt; uint32_t eccinfo_table_idx; struct amdgpu_ras *ras = amdgpu_ras_get_context(adev); @@ -345,12 +345,10 @@ static void umc_v8_10_ecc_info_query_correctable_error_count(struct amdgpu_devic umc_inst * adev->umc.channel_inst_num + ch_inst; - /* check the MCUMC_STATUS */ - mc_umc_status = ras->umc_ecc.ecc[eccinfo_table_idx].mca_umc_status; - if (REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, Val) == 1 && - REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, CECC) == 1) { - *error_count += 1; - } + /* Retrieve CE count */ + ecc_ce_cnt = ras->umc_ecc.ecc[eccinfo_table_idx].ce_count_lo_chip; + if (ecc_ce_cnt) + *error_count += ecc_ce_cnt; } static void umc_v8_10_ecc_info_query_uncorrectable_error_count(struct amdgpu_device *adev, @@ -444,11 +442,6 @@ static void umc_v8_10_ecc_info_query_ras_error_address(struct amdgpu_device *ade umc_v8_10_ecc_info_query_error_address, ras_error_status); } -static void umc_v8_10_set_eeprom_table_version(struct amdgpu_ras_eeprom_table_header *hdr) -{ - hdr->version = RAS_TABLE_VER_V2_1; -} - const struct amdgpu_ras_block_hw_ops umc_v8_10_ras_hw_ops = { .query_ras_error_count = umc_v8_10_query_ras_error_count, .query_ras_error_address = umc_v8_10_query_ras_error_address, @@ -462,5 +455,4 @@ struct amdgpu_umc_ras umc_v8_10_ras = { .query_ras_poison_mode = umc_v8_10_query_ras_poison_mode, .ecc_info_query_ras_error_count = umc_v8_10_ecc_info_query_ras_error_count, .ecc_info_query_ras_error_address = umc_v8_10_ecc_info_query_ras_error_address, - .set_eeprom_table_version = umc_v8_10_set_eeprom_table_version, }; diff --git a/drivers/gpu/drm/amd/amdgpu/umsch_mm_v4_0.c b/drivers/gpu/drm/amd/amdgpu/umsch_mm_v4_0.c index 8e7b763cfdb7..bd57896ab85d 100644 --- a/drivers/gpu/drm/amd/amdgpu/umsch_mm_v4_0.c +++ b/drivers/gpu/drm/amd/amdgpu/umsch_mm_v4_0.c @@ -60,7 +60,7 @@ static int umsch_mm_v4_0_load_microcode(struct amdgpu_umsch_mm *umsch) umsch->cmd_buf_curr_ptr = umsch->cmd_buf_ptr; - if (amdgpu_ip_version(adev, VCN_HWIP, 0) == IP_VERSION(4, 0, 5)) { + if (amdgpu_ip_version(adev, VCN_HWIP, 0) >= IP_VERSION(4, 0, 5)) { WREG32_SOC15(VCN, 0, regUVD_IPX_DLDO_CONFIG, 1 << UVD_IPX_DLDO_CONFIG__ONO0_PWR_CONFIG__SHIFT); SOC15_WAIT_ON_RREG(VCN, 0, regUVD_IPX_DLDO_STATUS, @@ -225,6 +225,8 @@ static int umsch_mm_v4_0_ring_start(struct amdgpu_umsch_mm *umsch) WREG32_SOC15(VCN, 0, regVCN_UMSCH_RB_SIZE, ring->ring_size); + ring->wptr = 0; + data = RREG32_SOC15(VCN, 0, regVCN_RB_ENABLE); data &= ~(VCN_RB_ENABLE__AUDIO_RB_EN_MASK); WREG32_SOC15(VCN, 0, regVCN_RB_ENABLE, data); @@ -248,7 +250,7 @@ static int umsch_mm_v4_0_ring_stop(struct amdgpu_umsch_mm *umsch) data = REG_SET_FIELD(data, VCN_UMSCH_RB_DB_CTRL, EN, 0); WREG32_SOC15(VCN, 0, regVCN_UMSCH_RB_DB_CTRL, data); - if (amdgpu_ip_version(adev, VCN_HWIP, 0) == IP_VERSION(4, 0, 5)) { + if (amdgpu_ip_version(adev, VCN_HWIP, 0) >= IP_VERSION(4, 0, 5)) { WREG32_SOC15(VCN, 0, regUVD_IPX_DLDO_CONFIG, 2 << UVD_IPX_DLDO_CONFIG__ONO0_PWR_CONFIG__SHIFT); SOC15_WAIT_ON_RREG(VCN, 0, regUVD_IPX_DLDO_STATUS, @@ -271,6 +273,8 @@ static int umsch_mm_v4_0_set_hw_resources(struct amdgpu_umsch_mm *umsch) set_hw_resources.vmid_mask_mm_vcn = umsch->vmid_mask_mm_vcn; set_hw_resources.vmid_mask_mm_vpe = umsch->vmid_mask_mm_vpe; + set_hw_resources.collaboration_mask_vpe = + adev->vpe.collaborate_mode ? 0x3 : 0x0; set_hw_resources.engine_mask = umsch->engine_mask; set_hw_resources.vcn0_hqd_mask[0] = umsch->vcn0_hqd_mask; @@ -346,6 +350,7 @@ static int umsch_mm_v4_0_add_queue(struct amdgpu_umsch_mm *umsch, add_queue.h_queue = input_ptr->h_queue; add_queue.vm_context_cntl = input_ptr->vm_context_cntl; add_queue.is_context_suspended = input_ptr->is_context_suspended; + add_queue.collaboration_mode = adev->vpe.collaborate_mode ? 1 : 0; add_queue.api_status.api_completion_fence_addr = umsch->ring.fence_drv.gpu_addr; add_queue.api_status.api_completion_fence_value = ++umsch->ring.fence_drv.sync_seq; diff --git a/drivers/gpu/drm/amd/amdgpu/uvd_v3_1.c b/drivers/gpu/drm/amd/amdgpu/uvd_v3_1.c index 58a8f78c003c..a6006f231c65 100644 --- a/drivers/gpu/drm/amd/amdgpu/uvd_v3_1.c +++ b/drivers/gpu/drm/amd/amdgpu/uvd_v3_1.c @@ -577,8 +577,6 @@ static int uvd_v3_1_sw_init(void *handle) ptr += ucode_len; memcpy(&adev->uvd.keyselect, ptr, 4); - r = amdgpu_uvd_entity_init(adev); - return r; } diff --git a/drivers/gpu/drm/amd/amdgpu/uvd_v4_2.c b/drivers/gpu/drm/amd/amdgpu/uvd_v4_2.c index d3b1e31f5450..1aa09ad7bbe3 100644 --- a/drivers/gpu/drm/amd/amdgpu/uvd_v4_2.c +++ b/drivers/gpu/drm/amd/amdgpu/uvd_v4_2.c @@ -127,8 +127,6 @@ static int uvd_v4_2_sw_init(void *handle) if (r) return r; - r = amdgpu_uvd_entity_init(adev); - return r; } diff --git a/drivers/gpu/drm/amd/amdgpu/uvd_v5_0.c b/drivers/gpu/drm/amd/amdgpu/uvd_v5_0.c index 5a8116437abf..f8b229b75435 100644 --- a/drivers/gpu/drm/amd/amdgpu/uvd_v5_0.c +++ b/drivers/gpu/drm/amd/amdgpu/uvd_v5_0.c @@ -125,8 +125,6 @@ static int uvd_v5_0_sw_init(void *handle) if (r) return r; - r = amdgpu_uvd_entity_init(adev); - return r; } diff --git a/drivers/gpu/drm/amd/amdgpu/uvd_v6_0.c b/drivers/gpu/drm/amd/amdgpu/uvd_v6_0.c index 74c09230aeb3..a9a6880f44e3 100644 --- a/drivers/gpu/drm/amd/amdgpu/uvd_v6_0.c +++ b/drivers/gpu/drm/amd/amdgpu/uvd_v6_0.c @@ -432,8 +432,6 @@ static int uvd_v6_0_sw_init(void *handle) } } - r = amdgpu_uvd_entity_init(adev); - return r; } diff --git a/drivers/gpu/drm/amd/amdgpu/uvd_v7_0.c b/drivers/gpu/drm/amd/amdgpu/uvd_v7_0.c index 1c42cf10cc29..6068b784dc69 100644 --- a/drivers/gpu/drm/amd/amdgpu/uvd_v7_0.c +++ b/drivers/gpu/drm/amd/amdgpu/uvd_v7_0.c @@ -480,10 +480,6 @@ static int uvd_v7_0_sw_init(void *handle) if (r) return r; - r = amdgpu_uvd_entity_init(adev); - if (r) - return r; - r = amdgpu_virt_alloc_mm_table(adev); if (r) return r; diff --git a/drivers/gpu/drm/amd/amdgpu/vce_v2_0.c b/drivers/gpu/drm/amd/amdgpu/vce_v2_0.c index 67eb01fef789..a08e7abca423 100644 --- a/drivers/gpu/drm/amd/amdgpu/vce_v2_0.c +++ b/drivers/gpu/drm/amd/amdgpu/vce_v2_0.c @@ -441,8 +441,6 @@ static int vce_v2_0_sw_init(void *handle) return r; } - r = amdgpu_vce_entity_init(adev); - return r; } diff --git a/drivers/gpu/drm/amd/amdgpu/vce_v3_0.c b/drivers/gpu/drm/amd/amdgpu/vce_v3_0.c index 18f6e62af339..f4760748d349 100644 --- a/drivers/gpu/drm/amd/amdgpu/vce_v3_0.c +++ b/drivers/gpu/drm/amd/amdgpu/vce_v3_0.c @@ -450,8 +450,6 @@ static int vce_v3_0_sw_init(void *handle) return r; } - r = amdgpu_vce_entity_init(adev); - return r; } diff --git a/drivers/gpu/drm/amd/amdgpu/vce_v4_0.c b/drivers/gpu/drm/amd/amdgpu/vce_v4_0.c index e0b70cd3b697..06d787385ad4 100644 --- a/drivers/gpu/drm/amd/amdgpu/vce_v4_0.c +++ b/drivers/gpu/drm/amd/amdgpu/vce_v4_0.c @@ -486,11 +486,6 @@ static int vce_v4_0_sw_init(void *handle) return r; } - - r = amdgpu_vce_entity_init(adev); - if (r) - return r; - r = amdgpu_virt_alloc_mm_table(adev); if (r) return r; diff --git a/drivers/gpu/drm/amd/amdgpu/vcn_v1_0.c b/drivers/gpu/drm/amd/amdgpu/vcn_v1_0.c index 25ba27151ac0..aaceecd558cf 100644 --- a/drivers/gpu/drm/amd/amdgpu/vcn_v1_0.c +++ b/drivers/gpu/drm/amd/amdgpu/vcn_v1_0.c @@ -304,7 +304,7 @@ static int vcn_v1_0_resume(void *handle) */ static void vcn_v1_0_mc_resume_spg_mode(struct amdgpu_device *adev) { - uint32_t size = AMDGPU_GPU_PAGE_ALIGN(adev->vcn.fw->size + 4); + uint32_t size = AMDGPU_GPU_PAGE_ALIGN(adev->vcn.fw[0]->size + 4); uint32_t offset; /* cache window 0: fw */ @@ -371,7 +371,7 @@ static void vcn_v1_0_mc_resume_spg_mode(struct amdgpu_device *adev) static void vcn_v1_0_mc_resume_dpg_mode(struct amdgpu_device *adev) { - uint32_t size = AMDGPU_GPU_PAGE_ALIGN(adev->vcn.fw->size + 4); + uint32_t size = AMDGPU_GPU_PAGE_ALIGN(adev->vcn.fw[0]->size + 4); uint32_t offset; /* cache window 0: fw */ diff --git a/drivers/gpu/drm/amd/amdgpu/vcn_v2_0.c b/drivers/gpu/drm/amd/amdgpu/vcn_v2_0.c index 18794394c5a0..e357d8cf0c01 100644 --- a/drivers/gpu/drm/amd/amdgpu/vcn_v2_0.c +++ b/drivers/gpu/drm/amd/amdgpu/vcn_v2_0.c @@ -330,7 +330,7 @@ static int vcn_v2_0_resume(void *handle) */ static void vcn_v2_0_mc_resume(struct amdgpu_device *adev) { - uint32_t size = AMDGPU_GPU_PAGE_ALIGN(adev->vcn.fw->size + 4); + uint32_t size = AMDGPU_GPU_PAGE_ALIGN(adev->vcn.fw[0]->size + 4); uint32_t offset; if (amdgpu_sriov_vf(adev)) @@ -386,7 +386,7 @@ static void vcn_v2_0_mc_resume(struct amdgpu_device *adev) static void vcn_v2_0_mc_resume_dpg_mode(struct amdgpu_device *adev, bool indirect) { - uint32_t size = AMDGPU_GPU_PAGE_ALIGN(adev->vcn.fw->size + 4); + uint32_t size = AMDGPU_GPU_PAGE_ALIGN(adev->vcn.fw[0]->size + 4); uint32_t offset; /* cache window 0: fw */ @@ -1878,7 +1878,7 @@ static int vcn_v2_0_start_sriov(struct amdgpu_device *adev) init_table += header->vcn_table_offset; - size = AMDGPU_GPU_PAGE_ALIGN(adev->vcn.fw->size + 4); + size = AMDGPU_GPU_PAGE_ALIGN(adev->vcn.fw[0]->size + 4); MMSCH_V2_0_INSERT_DIRECT_RD_MOD_WT( SOC15_REG_OFFSET(UVD, i, mmUVD_STATUS), diff --git a/drivers/gpu/drm/amd/amdgpu/vcn_v2_5.c b/drivers/gpu/drm/amd/amdgpu/vcn_v2_5.c index aba403d71806..1cd8a94b0fbc 100644 --- a/drivers/gpu/drm/amd/amdgpu/vcn_v2_5.c +++ b/drivers/gpu/drm/amd/amdgpu/vcn_v2_5.c @@ -414,13 +414,15 @@ static int vcn_v2_5_resume(void *handle) */ static void vcn_v2_5_mc_resume(struct amdgpu_device *adev) { - uint32_t size = AMDGPU_GPU_PAGE_ALIGN(adev->vcn.fw->size + 4); + uint32_t size; uint32_t offset; int i; for (i = 0; i < adev->vcn.num_vcn_inst; ++i) { if (adev->vcn.harvest_config & (1 << i)) continue; + + size = AMDGPU_GPU_PAGE_ALIGN(adev->vcn.fw[i]->size + 4); /* cache window 0: fw */ if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) { WREG32_SOC15(VCN, i, mmUVD_LMI_VCPU_CACHE_64BIT_BAR_LOW, @@ -469,7 +471,7 @@ static void vcn_v2_5_mc_resume(struct amdgpu_device *adev) static void vcn_v2_5_mc_resume_dpg_mode(struct amdgpu_device *adev, int inst_idx, bool indirect) { - uint32_t size = AMDGPU_GPU_PAGE_ALIGN(adev->vcn.fw->size + 4); + uint32_t size = AMDGPU_GPU_PAGE_ALIGN(adev->vcn.fw[inst_idx]->size + 4); uint32_t offset; /* cache window 0: fw */ @@ -1240,7 +1242,7 @@ static int vcn_v2_5_sriov_start(struct amdgpu_device *adev) SOC15_REG_OFFSET(VCN, i, mmUVD_STATUS), ~UVD_STATUS__UVD_BUSY, UVD_STATUS__UVD_BUSY); - size = AMDGPU_GPU_PAGE_ALIGN(adev->vcn.fw->size + 4); + size = AMDGPU_GPU_PAGE_ALIGN(adev->vcn.fw[i]->size + 4); /* mc resume*/ if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) { MMSCH_V1_0_INSERT_DIRECT_WT( diff --git a/drivers/gpu/drm/amd/amdgpu/vcn_v3_0.c b/drivers/gpu/drm/amd/amdgpu/vcn_v3_0.c index e02af4de521c..8f82fb887e9c 100644 --- a/drivers/gpu/drm/amd/amdgpu/vcn_v3_0.c +++ b/drivers/gpu/drm/amd/amdgpu/vcn_v3_0.c @@ -449,7 +449,7 @@ static int vcn_v3_0_resume(void *handle) */ static void vcn_v3_0_mc_resume(struct amdgpu_device *adev, int inst) { - uint32_t size = AMDGPU_GPU_PAGE_ALIGN(adev->vcn.fw->size + 4); + uint32_t size = AMDGPU_GPU_PAGE_ALIGN(adev->vcn.fw[inst]->size + 4); uint32_t offset; /* cache window 0: fw */ @@ -499,7 +499,7 @@ static void vcn_v3_0_mc_resume(struct amdgpu_device *adev, int inst) static void vcn_v3_0_mc_resume_dpg_mode(struct amdgpu_device *adev, int inst_idx, bool indirect) { - uint32_t size = AMDGPU_GPU_PAGE_ALIGN(adev->vcn.fw->size + 4); + uint32_t size = AMDGPU_GPU_PAGE_ALIGN(adev->vcn.fw[inst_idx]->size + 4); uint32_t offset; /* cache window 0: fw */ @@ -1332,7 +1332,7 @@ static int vcn_v3_0_start_sriov(struct amdgpu_device *adev) mmUVD_STATUS), ~UVD_STATUS__UVD_BUSY, UVD_STATUS__UVD_BUSY); - cache_size = AMDGPU_GPU_PAGE_ALIGN(adev->vcn.fw->size + 4); + cache_size = AMDGPU_GPU_PAGE_ALIGN(adev->vcn.fw[i]->size + 4); if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) { MMSCH_V3_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCN, i, diff --git a/drivers/gpu/drm/amd/amdgpu/vcn_v4_0.c b/drivers/gpu/drm/amd/amdgpu/vcn_v4_0.c index 7ab9263d3e19..832d15f7b5f6 100644 --- a/drivers/gpu/drm/amd/amdgpu/vcn_v4_0.c +++ b/drivers/gpu/drm/amd/amdgpu/vcn_v4_0.c @@ -100,6 +100,31 @@ static int vcn_v4_0_early_init(void *handle) return amdgpu_vcn_early_init(adev); } +static int vcn_v4_0_fw_shared_init(struct amdgpu_device *adev, int inst_idx) +{ + volatile struct amdgpu_vcn4_fw_shared *fw_shared; + + fw_shared = adev->vcn.inst[inst_idx].fw_shared.cpu_addr; + fw_shared->present_flag_0 = cpu_to_le32(AMDGPU_FW_SHARED_FLAG_0_UNIFIED_QUEUE); + fw_shared->sq.is_enabled = 1; + + fw_shared->present_flag_0 |= cpu_to_le32(AMDGPU_VCN_SMU_DPM_INTERFACE_FLAG); + fw_shared->smu_dpm_interface.smu_interface_type = (adev->flags & AMD_IS_APU) ? + AMDGPU_VCN_SMU_DPM_INTERFACE_APU : AMDGPU_VCN_SMU_DPM_INTERFACE_DGPU; + + if (amdgpu_ip_version(adev, VCN_HWIP, 0) == + IP_VERSION(4, 0, 2)) { + fw_shared->present_flag_0 |= AMDGPU_FW_SHARED_FLAG_0_DRM_KEY_INJECT; + fw_shared->drm_key_wa.method = + AMDGPU_DRM_KEY_INJECT_WORKAROUND_VCNFW_ASD_HANDSHAKING; + } + + if (amdgpu_vcnfw_log) + amdgpu_vcn_fwlog_init(&adev->vcn.inst[inst_idx]); + + return 0; +} + /** * vcn_v4_0_sw_init - sw init for VCN block * @@ -124,8 +149,6 @@ static int vcn_v4_0_sw_init(void *handle) return r; for (i = 0; i < adev->vcn.num_vcn_inst; i++) { - volatile struct amdgpu_vcn4_fw_shared *fw_shared; - if (adev->vcn.harvest_config & (1 << i)) continue; @@ -161,26 +184,7 @@ static int vcn_v4_0_sw_init(void *handle) if (r) return r; - fw_shared = adev->vcn.inst[i].fw_shared.cpu_addr; - fw_shared->present_flag_0 = cpu_to_le32(AMDGPU_FW_SHARED_FLAG_0_UNIFIED_QUEUE); - fw_shared->sq.is_enabled = 1; - - fw_shared->present_flag_0 |= cpu_to_le32(AMDGPU_VCN_SMU_DPM_INTERFACE_FLAG); - fw_shared->smu_dpm_interface.smu_interface_type = (adev->flags & AMD_IS_APU) ? - AMDGPU_VCN_SMU_DPM_INTERFACE_APU : AMDGPU_VCN_SMU_DPM_INTERFACE_DGPU; - - if (amdgpu_ip_version(adev, VCN_HWIP, 0) == - IP_VERSION(4, 0, 2)) { - fw_shared->present_flag_0 |= AMDGPU_FW_SHARED_FLAG_0_DRM_KEY_INJECT; - fw_shared->drm_key_wa.method = - AMDGPU_DRM_KEY_INJECT_WORKAROUND_VCNFW_ASD_HANDSHAKING; - } - - if (amdgpu_sriov_vf(adev)) - fw_shared->present_flag_0 |= cpu_to_le32(AMDGPU_VCN_VF_RB_SETUP_FLAG); - - if (amdgpu_vcnfw_log) - amdgpu_vcn_fwlog_init(&adev->vcn.inst[i]); + vcn_v4_0_fw_shared_init(adev, i); } if (amdgpu_sriov_vf(adev)) { @@ -378,7 +382,7 @@ static void vcn_v4_0_mc_resume(struct amdgpu_device *adev, int inst) uint32_t offset, size; const struct common_firmware_header *hdr; - hdr = (const struct common_firmware_header *)adev->vcn.fw->data; + hdr = (const struct common_firmware_header *)adev->vcn.fw[inst]->data; size = AMDGPU_GPU_PAGE_ALIGN(le32_to_cpu(hdr->ucode_size_bytes) + 8); /* cache window 0: fw */ @@ -438,7 +442,7 @@ static void vcn_v4_0_mc_resume_dpg_mode(struct amdgpu_device *adev, int inst_idx { uint32_t offset, size; const struct common_firmware_header *hdr; - hdr = (const struct common_firmware_header *)adev->vcn.fw->data; + hdr = (const struct common_firmware_header *)adev->vcn.fw[inst_idx]->data; size = AMDGPU_GPU_PAGE_ALIGN(le32_to_cpu(hdr->ucode_size_bytes) + 8); /* cache window 0: fw */ @@ -1209,6 +1213,24 @@ static int vcn_v4_0_start(struct amdgpu_device *adev) return 0; } +static int vcn_v4_0_init_ring_metadata(struct amdgpu_device *adev, uint32_t vcn_inst, struct amdgpu_ring *ring_enc) +{ + struct amdgpu_vcn_rb_metadata *rb_metadata = NULL; + uint8_t *rb_ptr = (uint8_t *)ring_enc->ring; + + rb_ptr += ring_enc->ring_size; + rb_metadata = (struct amdgpu_vcn_rb_metadata *)rb_ptr; + + memset(rb_metadata, 0, sizeof(struct amdgpu_vcn_rb_metadata)); + rb_metadata->size = sizeof(struct amdgpu_vcn_rb_metadata); + rb_metadata->present_flag_0 |= cpu_to_le32(AMDGPU_VCN_VF_RB_SETUP_FLAG); + rb_metadata->present_flag_0 |= cpu_to_le32(AMDGPU_VCN_VF_RB_DECOUPLE_FLAG); + rb_metadata->version = 1; + rb_metadata->ring_id = vcn_inst & 0xFF; + + return 0; +} + static int vcn_v4_0_start_sriov(struct amdgpu_device *adev) { int i; @@ -1258,13 +1280,16 @@ static int vcn_v4_0_start_sriov(struct amdgpu_device *adev) if (adev->vcn.harvest_config & (1 << i)) continue; + // Must re/init fw_shared at beginning + vcn_v4_0_fw_shared_init(adev, i); + table_size = 0; MMSCH_V4_0_INSERT_DIRECT_RD_MOD_WT(SOC15_REG_OFFSET(VCN, i, regUVD_STATUS), ~UVD_STATUS__UVD_BUSY, UVD_STATUS__UVD_BUSY); - cache_size = AMDGPU_GPU_PAGE_ALIGN(adev->vcn.fw->size + 4); + cache_size = AMDGPU_GPU_PAGE_ALIGN(adev->vcn.fw[i]->size + 4); if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) { MMSCH_V4_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCN, i, @@ -1331,11 +1356,30 @@ static int vcn_v4_0_start_sriov(struct amdgpu_device *adev) rb_enc_addr = ring_enc->gpu_addr; rb_setup->is_rb_enabled_flags |= RB_ENABLED; - rb_setup->rb_addr_lo = lower_32_bits(rb_enc_addr); - rb_setup->rb_addr_hi = upper_32_bits(rb_enc_addr); - rb_setup->rb_size = ring_enc->ring_size / 4; fw_shared->present_flag_0 |= cpu_to_le32(AMDGPU_VCN_VF_RB_SETUP_FLAG); + if (amdgpu_sriov_is_vcn_rb_decouple(adev)) { + vcn_v4_0_init_ring_metadata(adev, i, ring_enc); + + memset((void *)&rb_setup->rb_info, 0, sizeof(struct amdgpu_vcn_rb_setup_info) * MAX_NUM_VCN_RB_SETUP); + if (!(adev->vcn.harvest_config & (1 << 0))) { + rb_setup->rb_info[0].rb_addr_lo = lower_32_bits(adev->vcn.inst[0].ring_enc[0].gpu_addr); + rb_setup->rb_info[0].rb_addr_hi = upper_32_bits(adev->vcn.inst[0].ring_enc[0].gpu_addr); + rb_setup->rb_info[0].rb_size = adev->vcn.inst[0].ring_enc[0].ring_size / 4; + } + if (!(adev->vcn.harvest_config & (1 << 1))) { + rb_setup->rb_info[2].rb_addr_lo = lower_32_bits(adev->vcn.inst[1].ring_enc[0].gpu_addr); + rb_setup->rb_info[2].rb_addr_hi = upper_32_bits(adev->vcn.inst[1].ring_enc[0].gpu_addr); + rb_setup->rb_info[2].rb_size = adev->vcn.inst[1].ring_enc[0].ring_size / 4; + } + fw_shared->decouple.is_enabled = 1; + fw_shared->present_flag_0 |= cpu_to_le32(AMDGPU_VCN_VF_RB_DECOUPLE_FLAG); + } else { + rb_setup->rb_addr_lo = lower_32_bits(rb_enc_addr); + rb_setup->rb_addr_hi = upper_32_bits(rb_enc_addr); + rb_setup->rb_size = ring_enc->ring_size / 4; + } + MMSCH_V4_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCN, i, regUVD_LMI_VCPU_NC0_64BIT_BAR_LOW), lower_32_bits(adev->vcn.inst[i].fw_shared.gpu_addr)); @@ -1807,6 +1851,7 @@ static struct amdgpu_ring_funcs vcn_v4_0_unified_ring_vm_funcs = { .type = AMDGPU_RING_TYPE_VCN_ENC, .align_mask = 0x3f, .nop = VCN_ENC_CMD_NO_OP, + .extra_dw = sizeof(struct amdgpu_vcn_rb_metadata), .get_rptr = vcn_v4_0_unified_ring_get_rptr, .get_wptr = vcn_v4_0_unified_ring_get_wptr, .set_wptr = vcn_v4_0_unified_ring_set_wptr, @@ -1973,22 +2018,6 @@ static int vcn_v4_0_set_powergating_state(void *handle, enum amd_powergating_sta } /** - * vcn_v4_0_set_interrupt_state - set VCN block interrupt state - * - * @adev: amdgpu_device pointer - * @source: interrupt sources - * @type: interrupt types - * @state: interrupt states - * - * Set VCN block interrupt state - */ -static int vcn_v4_0_set_interrupt_state(struct amdgpu_device *adev, struct amdgpu_irq_src *source, - unsigned type, enum amdgpu_interrupt_state state) -{ - return 0; -} - -/** * vcn_v4_0_set_ras_interrupt_state - set VCN block RAS interrupt state * * @adev: amdgpu_device pointer @@ -2020,16 +2049,20 @@ static int vcn_v4_0_process_interrupt(struct amdgpu_device *adev, struct amdgpu_ { uint32_t ip_instance; - switch (entry->client_id) { - case SOC15_IH_CLIENTID_VCN: - ip_instance = 0; - break; - case SOC15_IH_CLIENTID_VCN1: - ip_instance = 1; - break; - default: - DRM_ERROR("Unhandled client id: %d\n", entry->client_id); - return 0; + if (amdgpu_sriov_is_vcn_rb_decouple(adev)) { + ip_instance = entry->ring_id; + } else { + switch (entry->client_id) { + case SOC15_IH_CLIENTID_VCN: + ip_instance = 0; + break; + case SOC15_IH_CLIENTID_VCN1: + ip_instance = 1; + break; + default: + DRM_ERROR("Unhandled client id: %d\n", entry->client_id); + return 0; + } } DRM_DEBUG("IH: VCN TRAP\n"); @@ -2048,7 +2081,6 @@ static int vcn_v4_0_process_interrupt(struct amdgpu_device *adev, struct amdgpu_ } static const struct amdgpu_irq_src_funcs vcn_v4_0_irq_funcs = { - .set = vcn_v4_0_set_interrupt_state, .process = vcn_v4_0_process_interrupt, }; diff --git a/drivers/gpu/drm/amd/amdgpu/vcn_v4_0_3.c b/drivers/gpu/drm/amd/amdgpu/vcn_v4_0_3.c index f85d18cd74ec..203fa988322b 100644 --- a/drivers/gpu/drm/amd/amdgpu/vcn_v4_0_3.c +++ b/drivers/gpu/drm/amd/amdgpu/vcn_v4_0_3.c @@ -332,7 +332,7 @@ static void vcn_v4_0_3_mc_resume(struct amdgpu_device *adev, int inst_idx) uint32_t offset, size, vcn_inst; const struct common_firmware_header *hdr; - hdr = (const struct common_firmware_header *)adev->vcn.fw->data; + hdr = (const struct common_firmware_header *)adev->vcn.fw[inst_idx]->data; size = AMDGPU_GPU_PAGE_ALIGN(le32_to_cpu(hdr->ucode_size_bytes) + 8); vcn_inst = GET_INST(VCN, inst_idx); @@ -407,7 +407,7 @@ static void vcn_v4_0_3_mc_resume_dpg_mode(struct amdgpu_device *adev, int inst_i uint32_t offset, size; const struct common_firmware_header *hdr; - hdr = (const struct common_firmware_header *)adev->vcn.fw->data; + hdr = (const struct common_firmware_header *)adev->vcn.fw[inst_idx]->data; size = AMDGPU_GPU_PAGE_ALIGN(le32_to_cpu(hdr->ucode_size_bytes) + 8); /* cache window 0: fw */ @@ -894,7 +894,7 @@ static int vcn_v4_0_3_start_sriov(struct amdgpu_device *adev) MMSCH_V4_0_INSERT_DIRECT_RD_MOD_WT(SOC15_REG_OFFSET(VCN, 0, regUVD_STATUS), ~UVD_STATUS__UVD_BUSY, UVD_STATUS__UVD_BUSY); - cache_size = AMDGPU_GPU_PAGE_ALIGN(adev->vcn.fw->size + 4); + cache_size = AMDGPU_GPU_PAGE_ALIGN(adev->vcn.fw[i]->size + 4); if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) { MMSCH_V4_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCN, 0, @@ -1760,6 +1760,11 @@ static void vcn_v4_0_3_enable_ras(struct amdgpu_device *adev, SOC15_DPG_MODE_OFFSET(VCN, 0, regVCN_RAS_CNTL), tmp, 0, indirect); + tmp = UVD_VCPU_INT_EN2__RASCNTL_VCPU_VCODEC_EN_MASK; + WREG32_SOC15_DPG_MODE(inst_idx, + SOC15_DPG_MODE_OFFSET(VCN, 0, regUVD_VCPU_INT_EN2), + tmp, 0, indirect); + tmp = UVD_SYS_INT_EN__RASCNTL_VCPU_VCODEC_EN_MASK; WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET(VCN, 0, regUVD_SYS_INT_EN), diff --git a/drivers/gpu/drm/amd/amdgpu/vcn_v4_0_5.c b/drivers/gpu/drm/amd/amdgpu/vcn_v4_0_5.c index 2eda30e78f61..501e53e69f2a 100644 --- a/drivers/gpu/drm/amd/amdgpu/vcn_v4_0_5.c +++ b/drivers/gpu/drm/amd/amdgpu/vcn_v4_0_5.c @@ -45,7 +45,7 @@ #define mmUVD_DPG_LMA_DATA_BASE_IDX regUVD_DPG_LMA_DATA_BASE_IDX #define VCN_VID_SOC_ADDRESS_2_0 0x1fb00 -#define VCN1_VID_SOC_ADDRESS_3_0 0x48300 +#define VCN1_VID_SOC_ADDRESS_3_0 (0x48300 + 0x38000) #define VCN_HARVEST_MMSCH 0 @@ -269,8 +269,6 @@ static int vcn_v4_0_5_hw_fini(void *handle) vcn_v4_0_5_set_powergating_state(adev, AMD_PG_STATE_GATE); } } - - amdgpu_irq_put(adev, &adev->vcn.inst[i].irq, 0); } return 0; @@ -331,7 +329,7 @@ static void vcn_v4_0_5_mc_resume(struct amdgpu_device *adev, int inst) uint32_t offset, size; const struct common_firmware_header *hdr; - hdr = (const struct common_firmware_header *)adev->vcn.fw->data; + hdr = (const struct common_firmware_header *)adev->vcn.fw[inst]->data; size = AMDGPU_GPU_PAGE_ALIGN(le32_to_cpu(hdr->ucode_size_bytes) + 8); /* cache window 0: fw */ @@ -392,7 +390,7 @@ static void vcn_v4_0_5_mc_resume_dpg_mode(struct amdgpu_device *adev, int inst_i uint32_t offset, size; const struct common_firmware_header *hdr; - hdr = (const struct common_firmware_header *)adev->vcn.fw->data; + hdr = (const struct common_firmware_header *)adev->vcn.fw[inst_idx]->data; size = AMDGPU_GPU_PAGE_ALIGN(le32_to_cpu(hdr->ucode_size_bytes) + 8); /* cache window 0: fw */ @@ -488,7 +486,8 @@ static void vcn_v4_0_5_mc_resume_dpg_mode(struct amdgpu_device *adev, int inst_i /* VCN global tiling registers */ WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET( - VCN, 0, regUVD_GFX10_ADDR_CONFIG), adev->gfx.config.gb_addr_config, 0, indirect); + VCN, inst_idx, regUVD_GFX10_ADDR_CONFIG), + adev->gfx.config.gb_addr_config, 0, indirect); } /** @@ -913,7 +912,6 @@ static int vcn_v4_0_5_start_dpg_mode(struct amdgpu_device *adev, int inst_idx, b VCN, inst_idx, regUVD_MASTINT_EN), UVD_MASTINT_EN__VCPU_EN_MASK, 0, indirect); - if (indirect) amdgpu_vcn_psp_update_sram(adev, inst_idx, 0); @@ -1669,22 +1667,6 @@ static int vcn_v4_0_5_set_powergating_state(void *handle, enum amd_powergating_s } /** - * vcn_v4_0_5_set_interrupt_state - set VCN block interrupt state - * - * @adev: amdgpu_device pointer - * @source: interrupt sources - * @type: interrupt types - * @state: interrupt states - * - * Set VCN block interrupt state - */ -static int vcn_v4_0_5_set_interrupt_state(struct amdgpu_device *adev, struct amdgpu_irq_src *source, - unsigned type, enum amdgpu_interrupt_state state) -{ - return 0; -} - -/** * vcn_v4_0_5_process_interrupt - process VCN block interrupt * * @adev: amdgpu_device pointer @@ -1702,6 +1684,9 @@ static int vcn_v4_0_5_process_interrupt(struct amdgpu_device *adev, struct amdgp case SOC15_IH_CLIENTID_VCN: ip_instance = 0; break; + case SOC15_IH_CLIENTID_VCN1: + ip_instance = 1; + break; default: DRM_ERROR("Unhandled client id: %d\n", entry->client_id); return 0; @@ -1726,7 +1711,6 @@ static int vcn_v4_0_5_process_interrupt(struct amdgpu_device *adev, struct amdgp } static const struct amdgpu_irq_src_funcs vcn_v4_0_5_irq_funcs = { - .set = vcn_v4_0_5_set_interrupt_state, .process = vcn_v4_0_5_process_interrupt, }; diff --git a/drivers/gpu/drm/amd/amdgpu/vcn_v5_0_0.c b/drivers/gpu/drm/amd/amdgpu/vcn_v5_0_0.c new file mode 100644 index 000000000000..bc60c554eb32 --- /dev/null +++ b/drivers/gpu/drm/amd/amdgpu/vcn_v5_0_0.c @@ -0,0 +1,1339 @@ +/* + * Copyright 2023 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + */ + +#include <linux/firmware.h> +#include "amdgpu.h" +#include "amdgpu_vcn.h" +#include "amdgpu_pm.h" +#include "soc15.h" +#include "soc15d.h" +#include "soc15_hw_ip.h" +#include "vcn_v2_0.h" + +#include "vcn/vcn_5_0_0_offset.h" +#include "vcn/vcn_5_0_0_sh_mask.h" +#include "ivsrcid/vcn/irqsrcs_vcn_4_0.h" +#include "vcn_v5_0_0.h" + +#include <drm/drm_drv.h> + +static int amdgpu_ih_clientid_vcns[] = { + SOC15_IH_CLIENTID_VCN, + SOC15_IH_CLIENTID_VCN1 +}; + +static void vcn_v5_0_0_set_unified_ring_funcs(struct amdgpu_device *adev); +static void vcn_v5_0_0_set_irq_funcs(struct amdgpu_device *adev); +static int vcn_v5_0_0_set_powergating_state(void *handle, + enum amd_powergating_state state); +static int vcn_v5_0_0_pause_dpg_mode(struct amdgpu_device *adev, + int inst_idx, struct dpg_pause_state *new_state); +static void vcn_v5_0_0_unified_ring_set_wptr(struct amdgpu_ring *ring); + +/** + * vcn_v5_0_0_early_init - set function pointers and load microcode + * + * @handle: amdgpu_device pointer + * + * Set ring and irq function pointers + * Load microcode from filesystem + */ +static int vcn_v5_0_0_early_init(void *handle) +{ + struct amdgpu_device *adev = (struct amdgpu_device *)handle; + + /* re-use enc ring as unified ring */ + adev->vcn.num_enc_rings = 1; + + vcn_v5_0_0_set_unified_ring_funcs(adev); + vcn_v5_0_0_set_irq_funcs(adev); + + return amdgpu_vcn_early_init(adev); +} + +/** + * vcn_v5_0_0_sw_init - sw init for VCN block + * + * @handle: amdgpu_device pointer + * + * Load firmware and sw initialization + */ +static int vcn_v5_0_0_sw_init(void *handle) +{ + struct amdgpu_ring *ring; + struct amdgpu_device *adev = (struct amdgpu_device *)handle; + int i, r; + + r = amdgpu_vcn_sw_init(adev); + if (r) + return r; + + amdgpu_vcn_setup_ucode(adev); + + r = amdgpu_vcn_resume(adev); + if (r) + return r; + + for (i = 0; i < adev->vcn.num_vcn_inst; i++) { + volatile struct amdgpu_vcn4_fw_shared *fw_shared; + + if (adev->vcn.harvest_config & (1 << i)) + continue; + + atomic_set(&adev->vcn.inst[i].sched_score, 0); + + /* VCN UNIFIED TRAP */ + r = amdgpu_irq_add_id(adev, amdgpu_ih_clientid_vcns[i], + VCN_4_0__SRCID__UVD_ENC_GENERAL_PURPOSE, &adev->vcn.inst[i].irq); + if (r) + return r; + + /* VCN POISON TRAP */ + r = amdgpu_irq_add_id(adev, amdgpu_ih_clientid_vcns[i], + VCN_4_0__SRCID_UVD_POISON, &adev->vcn.inst[i].irq); + if (r) + return r; + + ring = &adev->vcn.inst[i].ring_enc[0]; + ring->use_doorbell = true; + ring->doorbell_index = (adev->doorbell_index.vcn.vcn_ring0_1 << 1) + 2 + 8 * i; + + ring->vm_hub = AMDGPU_MMHUB0(0); + sprintf(ring->name, "vcn_unified_%d", i); + + r = amdgpu_ring_init(adev, ring, 512, &adev->vcn.inst[i].irq, 0, + AMDGPU_RING_PRIO_0, &adev->vcn.inst[i].sched_score); + if (r) + return r; + + fw_shared = adev->vcn.inst[i].fw_shared.cpu_addr; + fw_shared->present_flag_0 = cpu_to_le32(AMDGPU_FW_SHARED_FLAG_0_UNIFIED_QUEUE); + fw_shared->sq.is_enabled = 1; + + if (amdgpu_vcnfw_log) + amdgpu_vcn_fwlog_init(&adev->vcn.inst[i]); + } + + if (adev->pg_flags & AMD_PG_SUPPORT_VCN_DPG) + adev->vcn.pause_dpg_mode = vcn_v5_0_0_pause_dpg_mode; + + return 0; +} + +/** + * vcn_v5_0_0_sw_fini - sw fini for VCN block + * + * @handle: amdgpu_device pointer + * + * VCN suspend and free up sw allocation + */ +static int vcn_v5_0_0_sw_fini(void *handle) +{ + struct amdgpu_device *adev = (struct amdgpu_device *)handle; + int i, r, idx; + + if (drm_dev_enter(adev_to_drm(adev), &idx)) { + for (i = 0; i < adev->vcn.num_vcn_inst; i++) { + volatile struct amdgpu_vcn4_fw_shared *fw_shared; + + if (adev->vcn.harvest_config & (1 << i)) + continue; + + fw_shared = adev->vcn.inst[i].fw_shared.cpu_addr; + fw_shared->present_flag_0 = 0; + fw_shared->sq.is_enabled = 0; + } + + drm_dev_exit(idx); + } + + r = amdgpu_vcn_suspend(adev); + if (r) + return r; + + r = amdgpu_vcn_sw_fini(adev); + + return r; +} + +/** + * vcn_v5_0_0_hw_init - start and test VCN block + * + * @handle: amdgpu_device pointer + * + * Initialize the hardware, boot up the VCPU and do some testing + */ +static int vcn_v5_0_0_hw_init(void *handle) +{ + struct amdgpu_device *adev = (struct amdgpu_device *)handle; + struct amdgpu_ring *ring; + int i, r; + + for (i = 0; i < adev->vcn.num_vcn_inst; ++i) { + if (adev->vcn.harvest_config & (1 << i)) + continue; + + ring = &adev->vcn.inst[i].ring_enc[0]; + + adev->nbio.funcs->vcn_doorbell_range(adev, ring->use_doorbell, + ((adev->doorbell_index.vcn.vcn_ring0_1 << 1) + 8 * i), i); + + r = amdgpu_ring_test_helper(ring); + if (r) + goto done; + } + +done: + if (!r) + DRM_INFO("VCN decode and encode initialized successfully(under %s).\n", + (adev->pg_flags & AMD_PG_SUPPORT_VCN_DPG)?"DPG Mode":"SPG Mode"); + + return r; +} + +/** + * vcn_v5_0_0_hw_fini - stop the hardware block + * + * @handle: amdgpu_device pointer + * + * Stop the VCN block, mark ring as not ready any more + */ +static int vcn_v5_0_0_hw_fini(void *handle) +{ + struct amdgpu_device *adev = (struct amdgpu_device *)handle; + int i; + + cancel_delayed_work_sync(&adev->vcn.idle_work); + + for (i = 0; i < adev->vcn.num_vcn_inst; ++i) { + if (adev->vcn.harvest_config & (1 << i)) + continue; + + amdgpu_irq_put(adev, &adev->vcn.inst[i].irq, 0); + } + + return 0; +} + +/** + * vcn_v5_0_0_suspend - suspend VCN block + * + * @handle: amdgpu_device pointer + * + * HW fini and suspend VCN block + */ +static int vcn_v5_0_0_suspend(void *handle) +{ + int r; + struct amdgpu_device *adev = (struct amdgpu_device *)handle; + + r = vcn_v5_0_0_hw_fini(adev); + if (r) + return r; + + r = amdgpu_vcn_suspend(adev); + + return r; +} + +/** + * vcn_v5_0_0_resume - resume VCN block + * + * @handle: amdgpu_device pointer + * + * Resume firmware and hw init VCN block + */ +static int vcn_v5_0_0_resume(void *handle) +{ + int r; + struct amdgpu_device *adev = (struct amdgpu_device *)handle; + + r = amdgpu_vcn_resume(adev); + if (r) + return r; + + r = vcn_v5_0_0_hw_init(adev); + + return r; +} + +/** + * vcn_v5_0_0_mc_resume - memory controller programming + * + * @adev: amdgpu_device pointer + * @inst: instance number + * + * Let the VCN memory controller know it's offsets + */ +static void vcn_v5_0_0_mc_resume(struct amdgpu_device *adev, int inst) +{ + uint32_t offset, size; + const struct common_firmware_header *hdr; + + hdr = (const struct common_firmware_header *)adev->vcn.fw[inst]->data; + size = AMDGPU_GPU_PAGE_ALIGN(le32_to_cpu(hdr->ucode_size_bytes) + 8); + + /* cache window 0: fw */ + if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) { + WREG32_SOC15(VCN, inst, regUVD_LMI_VCPU_CACHE_64BIT_BAR_LOW, + (adev->firmware.ucode[AMDGPU_UCODE_ID_VCN + inst].tmr_mc_addr_lo)); + WREG32_SOC15(VCN, inst, regUVD_LMI_VCPU_CACHE_64BIT_BAR_HIGH, + (adev->firmware.ucode[AMDGPU_UCODE_ID_VCN + inst].tmr_mc_addr_hi)); + WREG32_SOC15(VCN, inst, regUVD_VCPU_CACHE_OFFSET0, 0); + offset = 0; + } else { + WREG32_SOC15(VCN, inst, regUVD_LMI_VCPU_CACHE_64BIT_BAR_LOW, + lower_32_bits(adev->vcn.inst[inst].gpu_addr)); + WREG32_SOC15(VCN, inst, regUVD_LMI_VCPU_CACHE_64BIT_BAR_HIGH, + upper_32_bits(adev->vcn.inst[inst].gpu_addr)); + offset = size; + WREG32_SOC15(VCN, inst, regUVD_VCPU_CACHE_OFFSET0, AMDGPU_UVD_FIRMWARE_OFFSET >> 3); + } + WREG32_SOC15(VCN, inst, regUVD_VCPU_CACHE_SIZE0, size); + + /* cache window 1: stack */ + WREG32_SOC15(VCN, inst, regUVD_LMI_VCPU_CACHE1_64BIT_BAR_LOW, + lower_32_bits(adev->vcn.inst[inst].gpu_addr + offset)); + WREG32_SOC15(VCN, inst, regUVD_LMI_VCPU_CACHE1_64BIT_BAR_HIGH, + upper_32_bits(adev->vcn.inst[inst].gpu_addr + offset)); + WREG32_SOC15(VCN, inst, regUVD_VCPU_CACHE_OFFSET1, 0); + WREG32_SOC15(VCN, inst, regUVD_VCPU_CACHE_SIZE1, AMDGPU_VCN_STACK_SIZE); + + /* cache window 2: context */ + WREG32_SOC15(VCN, inst, regUVD_LMI_VCPU_CACHE2_64BIT_BAR_LOW, + lower_32_bits(adev->vcn.inst[inst].gpu_addr + offset + AMDGPU_VCN_STACK_SIZE)); + WREG32_SOC15(VCN, inst, regUVD_LMI_VCPU_CACHE2_64BIT_BAR_HIGH, + upper_32_bits(adev->vcn.inst[inst].gpu_addr + offset + AMDGPU_VCN_STACK_SIZE)); + WREG32_SOC15(VCN, inst, regUVD_VCPU_CACHE_OFFSET2, 0); + WREG32_SOC15(VCN, inst, regUVD_VCPU_CACHE_SIZE2, AMDGPU_VCN_CONTEXT_SIZE); + + /* non-cache window */ + WREG32_SOC15(VCN, inst, regUVD_LMI_VCPU_NC0_64BIT_BAR_LOW, + lower_32_bits(adev->vcn.inst[inst].fw_shared.gpu_addr)); + WREG32_SOC15(VCN, inst, regUVD_LMI_VCPU_NC0_64BIT_BAR_HIGH, + upper_32_bits(adev->vcn.inst[inst].fw_shared.gpu_addr)); + WREG32_SOC15(VCN, inst, regUVD_VCPU_NONCACHE_OFFSET0, 0); + WREG32_SOC15(VCN, inst, regUVD_VCPU_NONCACHE_SIZE0, + AMDGPU_GPU_PAGE_ALIGN(sizeof(struct amdgpu_vcn4_fw_shared))); +} + +/** + * vcn_v5_0_0_mc_resume_dpg_mode - memory controller programming for dpg mode + * + * @adev: amdgpu_device pointer + * @inst_idx: instance number index + * @indirect: indirectly write sram + * + * Let the VCN memory controller know it's offsets with dpg mode + */ +static void vcn_v5_0_0_mc_resume_dpg_mode(struct amdgpu_device *adev, int inst_idx, bool indirect) +{ + uint32_t offset, size; + const struct common_firmware_header *hdr; + + hdr = (const struct common_firmware_header *)adev->vcn.fw[inst_idx]->data; + size = AMDGPU_GPU_PAGE_ALIGN(le32_to_cpu(hdr->ucode_size_bytes) + 8); + + /* cache window 0: fw */ + if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) { + if (!indirect) { + WREG32_SOC24_DPG_MODE(inst_idx, SOC24_DPG_MODE_OFFSET( + VCN, inst_idx, regUVD_LMI_VCPU_CACHE_64BIT_BAR_LOW), + (adev->firmware.ucode[AMDGPU_UCODE_ID_VCN + inst_idx].tmr_mc_addr_lo), 0, indirect); + WREG32_SOC24_DPG_MODE(inst_idx, SOC24_DPG_MODE_OFFSET( + VCN, inst_idx, regUVD_LMI_VCPU_CACHE_64BIT_BAR_HIGH), + (adev->firmware.ucode[AMDGPU_UCODE_ID_VCN + inst_idx].tmr_mc_addr_hi), 0, indirect); + WREG32_SOC24_DPG_MODE(inst_idx, SOC24_DPG_MODE_OFFSET( + VCN, inst_idx, regUVD_VCPU_CACHE_OFFSET0), 0, 0, indirect); + } else { + WREG32_SOC24_DPG_MODE(inst_idx, SOC24_DPG_MODE_OFFSET( + VCN, inst_idx, regUVD_LMI_VCPU_CACHE_64BIT_BAR_LOW), 0, 0, indirect); + WREG32_SOC24_DPG_MODE(inst_idx, SOC24_DPG_MODE_OFFSET( + VCN, inst_idx, regUVD_LMI_VCPU_CACHE_64BIT_BAR_HIGH), 0, 0, indirect); + WREG32_SOC24_DPG_MODE(inst_idx, SOC24_DPG_MODE_OFFSET( + VCN, inst_idx, regUVD_VCPU_CACHE_OFFSET0), 0, 0, indirect); + } + offset = 0; + } else { + WREG32_SOC24_DPG_MODE(inst_idx, SOC24_DPG_MODE_OFFSET( + VCN, inst_idx, regUVD_LMI_VCPU_CACHE_64BIT_BAR_LOW), + lower_32_bits(adev->vcn.inst[inst_idx].gpu_addr), 0, indirect); + WREG32_SOC24_DPG_MODE(inst_idx, SOC24_DPG_MODE_OFFSET( + VCN, inst_idx, regUVD_LMI_VCPU_CACHE_64BIT_BAR_HIGH), + upper_32_bits(adev->vcn.inst[inst_idx].gpu_addr), 0, indirect); + offset = size; + WREG32_SOC24_DPG_MODE(inst_idx, SOC24_DPG_MODE_OFFSET( + VCN, inst_idx, regUVD_VCPU_CACHE_OFFSET0), + AMDGPU_UVD_FIRMWARE_OFFSET >> 3, 0, indirect); + } + + if (!indirect) + WREG32_SOC24_DPG_MODE(inst_idx, SOC24_DPG_MODE_OFFSET( + VCN, inst_idx, regUVD_VCPU_CACHE_SIZE0), size, 0, indirect); + else + WREG32_SOC24_DPG_MODE(inst_idx, SOC24_DPG_MODE_OFFSET( + VCN, inst_idx, regUVD_VCPU_CACHE_SIZE0), 0, 0, indirect); + + /* cache window 1: stack */ + if (!indirect) { + WREG32_SOC24_DPG_MODE(inst_idx, SOC24_DPG_MODE_OFFSET( + VCN, inst_idx, regUVD_LMI_VCPU_CACHE1_64BIT_BAR_LOW), + lower_32_bits(adev->vcn.inst[inst_idx].gpu_addr + offset), 0, indirect); + WREG32_SOC24_DPG_MODE(inst_idx, SOC24_DPG_MODE_OFFSET( + VCN, inst_idx, regUVD_LMI_VCPU_CACHE1_64BIT_BAR_HIGH), + upper_32_bits(adev->vcn.inst[inst_idx].gpu_addr + offset), 0, indirect); + WREG32_SOC24_DPG_MODE(inst_idx, SOC24_DPG_MODE_OFFSET( + VCN, inst_idx, regUVD_VCPU_CACHE_OFFSET1), 0, 0, indirect); + } else { + WREG32_SOC24_DPG_MODE(inst_idx, SOC24_DPG_MODE_OFFSET( + VCN, inst_idx, regUVD_LMI_VCPU_CACHE1_64BIT_BAR_LOW), 0, 0, indirect); + WREG32_SOC24_DPG_MODE(inst_idx, SOC24_DPG_MODE_OFFSET( + VCN, inst_idx, regUVD_LMI_VCPU_CACHE1_64BIT_BAR_HIGH), 0, 0, indirect); + WREG32_SOC24_DPG_MODE(inst_idx, SOC24_DPG_MODE_OFFSET( + VCN, inst_idx, regUVD_VCPU_CACHE_OFFSET1), 0, 0, indirect); + } + WREG32_SOC24_DPG_MODE(inst_idx, SOC24_DPG_MODE_OFFSET( + VCN, inst_idx, regUVD_VCPU_CACHE_SIZE1), AMDGPU_VCN_STACK_SIZE, 0, indirect); + + /* cache window 2: context */ + WREG32_SOC24_DPG_MODE(inst_idx, SOC24_DPG_MODE_OFFSET( + VCN, inst_idx, regUVD_LMI_VCPU_CACHE2_64BIT_BAR_LOW), + lower_32_bits(adev->vcn.inst[inst_idx].gpu_addr + offset + AMDGPU_VCN_STACK_SIZE), 0, indirect); + WREG32_SOC24_DPG_MODE(inst_idx, SOC24_DPG_MODE_OFFSET( + VCN, inst_idx, regUVD_LMI_VCPU_CACHE2_64BIT_BAR_HIGH), + upper_32_bits(adev->vcn.inst[inst_idx].gpu_addr + offset + AMDGPU_VCN_STACK_SIZE), 0, indirect); + WREG32_SOC24_DPG_MODE(inst_idx, SOC24_DPG_MODE_OFFSET( + VCN, inst_idx, regUVD_VCPU_CACHE_OFFSET2), 0, 0, indirect); + WREG32_SOC24_DPG_MODE(inst_idx, SOC24_DPG_MODE_OFFSET( + VCN, inst_idx, regUVD_VCPU_CACHE_SIZE2), AMDGPU_VCN_CONTEXT_SIZE, 0, indirect); + + /* non-cache window */ + WREG32_SOC24_DPG_MODE(inst_idx, SOC24_DPG_MODE_OFFSET( + VCN, inst_idx, regUVD_LMI_VCPU_NC0_64BIT_BAR_LOW), + lower_32_bits(adev->vcn.inst[inst_idx].fw_shared.gpu_addr), 0, indirect); + WREG32_SOC24_DPG_MODE(inst_idx, SOC24_DPG_MODE_OFFSET( + VCN, inst_idx, regUVD_LMI_VCPU_NC0_64BIT_BAR_HIGH), + upper_32_bits(adev->vcn.inst[inst_idx].fw_shared.gpu_addr), 0, indirect); + WREG32_SOC24_DPG_MODE(inst_idx, SOC24_DPG_MODE_OFFSET( + VCN, inst_idx, regUVD_VCPU_NONCACHE_OFFSET0), 0, 0, indirect); + WREG32_SOC24_DPG_MODE(inst_idx, SOC24_DPG_MODE_OFFSET( + VCN, inst_idx, regUVD_VCPU_NONCACHE_SIZE0), + AMDGPU_GPU_PAGE_ALIGN(sizeof(struct amdgpu_vcn4_fw_shared)), 0, indirect); + + /* VCN global tiling registers */ + WREG32_SOC24_DPG_MODE(inst_idx, SOC24_DPG_MODE_OFFSET( + VCN, 0, regUVD_GFX10_ADDR_CONFIG), adev->gfx.config.gb_addr_config, 0, indirect); + + return; +} + +/** + * vcn_v5_0_0_disable_static_power_gating - disable VCN static power gating + * + * @adev: amdgpu_device pointer + * @inst: instance number + * + * Disable static power gating for VCN block + */ +static void vcn_v5_0_0_disable_static_power_gating(struct amdgpu_device *adev, int inst) +{ + uint32_t data = 0; + + if (adev->pg_flags & AMD_PG_SUPPORT_VCN) { + data = 1 << UVD_IPX_DLDO_CONFIG__ONO2_PWR_CONFIG__SHIFT; + WREG32_SOC15(VCN, inst, regUVD_IPX_DLDO_CONFIG, data); + SOC15_WAIT_ON_RREG(VCN, inst, regUVD_IPX_DLDO_STATUS, 0, + UVD_IPX_DLDO_STATUS__ONO2_PWR_STATUS_MASK); + + data = 2 << UVD_IPX_DLDO_CONFIG__ONO3_PWR_CONFIG__SHIFT; + WREG32_SOC15(VCN, inst, regUVD_IPX_DLDO_CONFIG, data); + SOC15_WAIT_ON_RREG(VCN, inst, regUVD_IPX_DLDO_STATUS, + 1 << UVD_IPX_DLDO_STATUS__ONO3_PWR_STATUS__SHIFT, + UVD_IPX_DLDO_STATUS__ONO3_PWR_STATUS_MASK); + + data = 2 << UVD_IPX_DLDO_CONFIG__ONO4_PWR_CONFIG__SHIFT; + WREG32_SOC15(VCN, inst, regUVD_IPX_DLDO_CONFIG, data); + SOC15_WAIT_ON_RREG(VCN, inst, regUVD_IPX_DLDO_STATUS, + 1 << UVD_IPX_DLDO_STATUS__ONO4_PWR_STATUS__SHIFT, + UVD_IPX_DLDO_STATUS__ONO4_PWR_STATUS_MASK); + + data = 2 << UVD_IPX_DLDO_CONFIG__ONO5_PWR_CONFIG__SHIFT; + WREG32_SOC15(VCN, inst, regUVD_IPX_DLDO_CONFIG, data); + SOC15_WAIT_ON_RREG(VCN, inst, regUVD_IPX_DLDO_STATUS, + 1 << UVD_IPX_DLDO_STATUS__ONO5_PWR_STATUS__SHIFT, + UVD_IPX_DLDO_STATUS__ONO5_PWR_STATUS_MASK); + } else { + data = 1 << UVD_IPX_DLDO_CONFIG__ONO2_PWR_CONFIG__SHIFT; + WREG32_SOC15(VCN, inst, regUVD_IPX_DLDO_CONFIG, data); + SOC15_WAIT_ON_RREG(VCN, inst, regUVD_IPX_DLDO_STATUS, 0, + UVD_IPX_DLDO_STATUS__ONO2_PWR_STATUS_MASK); + + data = 1 << UVD_IPX_DLDO_CONFIG__ONO3_PWR_CONFIG__SHIFT; + WREG32_SOC15(VCN, inst, regUVD_IPX_DLDO_CONFIG, data); + SOC15_WAIT_ON_RREG(VCN, inst, regUVD_IPX_DLDO_STATUS, 0, + UVD_IPX_DLDO_STATUS__ONO3_PWR_STATUS_MASK); + + data = 1 << UVD_IPX_DLDO_CONFIG__ONO4_PWR_CONFIG__SHIFT; + WREG32_SOC15(VCN, inst, regUVD_IPX_DLDO_CONFIG, data); + SOC15_WAIT_ON_RREG(VCN, inst, regUVD_IPX_DLDO_STATUS, 0, + UVD_IPX_DLDO_STATUS__ONO4_PWR_STATUS_MASK); + + data = 1 << UVD_IPX_DLDO_CONFIG__ONO5_PWR_CONFIG__SHIFT; + WREG32_SOC15(VCN, inst, regUVD_IPX_DLDO_CONFIG, data); + SOC15_WAIT_ON_RREG(VCN, inst, regUVD_IPX_DLDO_STATUS, 0, + UVD_IPX_DLDO_STATUS__ONO5_PWR_STATUS_MASK); + } + + data = RREG32_SOC15(VCN, inst, regUVD_POWER_STATUS); + data &= ~0x103; + if (adev->pg_flags & AMD_PG_SUPPORT_VCN) + data |= UVD_PGFSM_CONFIG__UVDM_UVDU_PWR_ON | + UVD_POWER_STATUS__UVD_PG_EN_MASK; + + WREG32_SOC15(VCN, inst, regUVD_POWER_STATUS, data); + return; +} + +/** + * vcn_v5_0_0_enable_static_power_gating - enable VCN static power gating + * + * @adev: amdgpu_device pointer + * @inst: instance number + * + * Enable static power gating for VCN block + */ +static void vcn_v5_0_0_enable_static_power_gating(struct amdgpu_device *adev, int inst) +{ + uint32_t data; + + if (adev->pg_flags & AMD_PG_SUPPORT_VCN) { + /* Before power off, this indicator has to be turned on */ + data = RREG32_SOC15(VCN, inst, regUVD_POWER_STATUS); + data &= ~UVD_POWER_STATUS__UVD_POWER_STATUS_MASK; + data |= UVD_POWER_STATUS__UVD_POWER_STATUS_TILES_OFF; + WREG32_SOC15(VCN, inst, regUVD_POWER_STATUS, data); + + data = 2 << UVD_IPX_DLDO_CONFIG__ONO5_PWR_CONFIG__SHIFT; + WREG32_SOC15(VCN, inst, regUVD_IPX_DLDO_CONFIG, data); + SOC15_WAIT_ON_RREG(VCN, inst, regUVD_IPX_DLDO_STATUS, + 1 << UVD_IPX_DLDO_STATUS__ONO5_PWR_STATUS__SHIFT, + UVD_IPX_DLDO_STATUS__ONO5_PWR_STATUS_MASK); + + data = 2 << UVD_IPX_DLDO_CONFIG__ONO4_PWR_CONFIG__SHIFT; + WREG32_SOC15(VCN, inst, regUVD_IPX_DLDO_CONFIG, data); + SOC15_WAIT_ON_RREG(VCN, inst, regUVD_IPX_DLDO_STATUS, + 1 << UVD_IPX_DLDO_STATUS__ONO4_PWR_STATUS__SHIFT, + UVD_IPX_DLDO_STATUS__ONO4_PWR_STATUS_MASK); + + data = 2 << UVD_IPX_DLDO_CONFIG__ONO3_PWR_CONFIG__SHIFT; + WREG32_SOC15(VCN, inst, regUVD_IPX_DLDO_CONFIG, data); + SOC15_WAIT_ON_RREG(VCN, inst, regUVD_IPX_DLDO_STATUS, + 1 << UVD_IPX_DLDO_STATUS__ONO3_PWR_STATUS__SHIFT, + UVD_IPX_DLDO_STATUS__ONO3_PWR_STATUS_MASK); + + data = 2 << UVD_IPX_DLDO_CONFIG__ONO2_PWR_CONFIG__SHIFT; + WREG32_SOC15(VCN, inst, regUVD_IPX_DLDO_CONFIG, data); + SOC15_WAIT_ON_RREG(VCN, inst, regUVD_IPX_DLDO_STATUS, + 1 << UVD_IPX_DLDO_STATUS__ONO2_PWR_STATUS__SHIFT, + UVD_IPX_DLDO_STATUS__ONO2_PWR_STATUS_MASK); + } + return; +} + +/** + * vcn_v5_0_0_disable_clock_gating - disable VCN clock gating + * + * @adev: amdgpu_device pointer + * @inst: instance number + * + * Disable clock gating for VCN block + */ +static void vcn_v5_0_0_disable_clock_gating(struct amdgpu_device *adev, int inst) +{ + return; +} + +#if 0 +/** + * vcn_v5_0_0_disable_clock_gating_dpg_mode - disable VCN clock gating dpg mode + * + * @adev: amdgpu_device pointer + * @sram_sel: sram select + * @inst_idx: instance number index + * @indirect: indirectly write sram + * + * Disable clock gating for VCN block with dpg mode + */ +static void vcn_v5_0_0_disable_clock_gating_dpg_mode(struct amdgpu_device *adev, uint8_t sram_sel, + int inst_idx, uint8_t indirect) +{ + return; +} +#endif + +/** + * vcn_v5_0_0_enable_clock_gating - enable VCN clock gating + * + * @adev: amdgpu_device pointer + * @inst: instance number + * + * Enable clock gating for VCN block + */ +static void vcn_v5_0_0_enable_clock_gating(struct amdgpu_device *adev, int inst) +{ + return; +} + +/** + * vcn_v5_0_0_start_dpg_mode - VCN start with dpg mode + * + * @adev: amdgpu_device pointer + * @inst_idx: instance number index + * @indirect: indirectly write sram + * + * Start VCN block with dpg mode + */ +static int vcn_v5_0_0_start_dpg_mode(struct amdgpu_device *adev, int inst_idx, bool indirect) +{ + volatile struct amdgpu_vcn4_fw_shared *fw_shared = adev->vcn.inst[inst_idx].fw_shared.cpu_addr; + struct amdgpu_ring *ring; + uint32_t tmp; + + /* disable register anti-hang mechanism */ + WREG32_P(SOC15_REG_OFFSET(VCN, inst_idx, regUVD_POWER_STATUS), 1, + ~UVD_POWER_STATUS__UVD_POWER_STATUS_MASK); + + /* enable dynamic power gating mode */ + tmp = RREG32_SOC15(VCN, inst_idx, regUVD_POWER_STATUS); + tmp |= UVD_POWER_STATUS__UVD_PG_MODE_MASK; + tmp |= UVD_POWER_STATUS__UVD_PG_EN_MASK; + WREG32_SOC15(VCN, inst_idx, regUVD_POWER_STATUS, tmp); + + if (indirect) + adev->vcn.inst[inst_idx].dpg_sram_curr_addr = (uint32_t *)adev->vcn.inst[inst_idx].dpg_sram_cpu_addr; + + /* enable VCPU clock */ + tmp = (0xFF << UVD_VCPU_CNTL__PRB_TIMEOUT_VAL__SHIFT); + tmp |= UVD_VCPU_CNTL__CLK_EN_MASK | UVD_VCPU_CNTL__BLK_RST_MASK; + WREG32_SOC24_DPG_MODE(inst_idx, SOC24_DPG_MODE_OFFSET( + VCN, inst_idx, regUVD_VCPU_CNTL), tmp, 0, indirect); + + /* disable master interrupt */ + WREG32_SOC24_DPG_MODE(inst_idx, SOC24_DPG_MODE_OFFSET( + VCN, inst_idx, regUVD_MASTINT_EN), 0, 0, indirect); + + /* setup regUVD_LMI_CTRL */ + tmp = (UVD_LMI_CTRL__WRITE_CLEAN_TIMER_EN_MASK | + UVD_LMI_CTRL__REQ_MODE_MASK | + UVD_LMI_CTRL__CRC_RESET_MASK | + UVD_LMI_CTRL__MASK_MC_URGENT_MASK | + UVD_LMI_CTRL__DATA_COHERENCY_EN_MASK | + UVD_LMI_CTRL__VCPU_DATA_COHERENCY_EN_MASK | + (8 << UVD_LMI_CTRL__WRITE_CLEAN_TIMER__SHIFT) | + 0x00100000L); + WREG32_SOC24_DPG_MODE(inst_idx, SOC24_DPG_MODE_OFFSET( + VCN, inst_idx, regUVD_LMI_CTRL), tmp, 0, indirect); + + vcn_v5_0_0_mc_resume_dpg_mode(adev, inst_idx, indirect); + + tmp = (0xFF << UVD_VCPU_CNTL__PRB_TIMEOUT_VAL__SHIFT); + tmp |= UVD_VCPU_CNTL__CLK_EN_MASK; + WREG32_SOC24_DPG_MODE(inst_idx, SOC24_DPG_MODE_OFFSET( + VCN, inst_idx, regUVD_VCPU_CNTL), tmp, 0, indirect); + + /* enable LMI MC and UMC channels */ + tmp = 0x1f << UVD_LMI_CTRL2__RE_OFLD_MIF_WR_REQ_NUM__SHIFT; + WREG32_SOC24_DPG_MODE(inst_idx, SOC24_DPG_MODE_OFFSET( + VCN, inst_idx, regUVD_LMI_CTRL2), tmp, 0, indirect); + + /* enable master interrupt */ + WREG32_SOC24_DPG_MODE(inst_idx, SOC24_DPG_MODE_OFFSET( + VCN, inst_idx, regUVD_MASTINT_EN), + UVD_MASTINT_EN__VCPU_EN_MASK, 0, indirect); + + if (indirect) + amdgpu_vcn_psp_update_sram(adev, inst_idx, 0); + + ring = &adev->vcn.inst[inst_idx].ring_enc[0]; + + WREG32_SOC15(VCN, inst_idx, regUVD_RB_BASE_LO, ring->gpu_addr); + WREG32_SOC15(VCN, inst_idx, regUVD_RB_BASE_HI, upper_32_bits(ring->gpu_addr)); + WREG32_SOC15(VCN, inst_idx, regUVD_RB_SIZE, ring->ring_size / 4); + + tmp = RREG32_SOC15(VCN, inst_idx, regVCN_RB_ENABLE); + tmp &= ~(VCN_RB_ENABLE__RB1_EN_MASK); + WREG32_SOC15(VCN, inst_idx, regVCN_RB_ENABLE, tmp); + fw_shared->sq.queue_mode |= FW_QUEUE_RING_RESET; + WREG32_SOC15(VCN, inst_idx, regUVD_RB_RPTR, 0); + WREG32_SOC15(VCN, inst_idx, regUVD_RB_WPTR, 0); + + tmp = RREG32_SOC15(VCN, inst_idx, regUVD_RB_RPTR); + WREG32_SOC15(VCN, inst_idx, regUVD_RB_WPTR, tmp); + ring->wptr = RREG32_SOC15(VCN, inst_idx, regUVD_RB_WPTR); + + tmp = RREG32_SOC15(VCN, inst_idx, regVCN_RB_ENABLE); + tmp |= VCN_RB_ENABLE__RB1_EN_MASK; + WREG32_SOC15(VCN, inst_idx, regVCN_RB_ENABLE, tmp); + fw_shared->sq.queue_mode &= ~(FW_QUEUE_RING_RESET | FW_QUEUE_DPG_HOLD_OFF); + + WREG32_SOC15(VCN, inst_idx, regVCN_RB1_DB_CTRL, + ring->doorbell_index << VCN_RB1_DB_CTRL__OFFSET__SHIFT | + VCN_RB1_DB_CTRL__EN_MASK); + + return 0; +} + +/** + * vcn_v5_0_0_start - VCN start + * + * @adev: amdgpu_device pointer + * + * Start VCN block + */ +static int vcn_v5_0_0_start(struct amdgpu_device *adev) +{ + volatile struct amdgpu_vcn4_fw_shared *fw_shared; + struct amdgpu_ring *ring; + uint32_t tmp; + int i, j, k, r; + + if (adev->pm.dpm_enabled) + amdgpu_dpm_enable_uvd(adev, true); + + for (i = 0; i < adev->vcn.num_vcn_inst; ++i) { + fw_shared = adev->vcn.inst[i].fw_shared.cpu_addr; + + if (adev->pg_flags & AMD_PG_SUPPORT_VCN_DPG) { + r = vcn_v5_0_0_start_dpg_mode(adev, i, adev->vcn.indirect_sram); + continue; + } + + /* disable VCN power gating */ + vcn_v5_0_0_disable_static_power_gating(adev, i); + + /* set VCN status busy */ + tmp = RREG32_SOC15(VCN, i, regUVD_STATUS) | UVD_STATUS__UVD_BUSY; + WREG32_SOC15(VCN, i, regUVD_STATUS, tmp); + + /* enable VCPU clock */ + WREG32_P(SOC15_REG_OFFSET(VCN, i, regUVD_VCPU_CNTL), + UVD_VCPU_CNTL__CLK_EN_MASK, ~UVD_VCPU_CNTL__CLK_EN_MASK); + + /* disable master interrupt */ + WREG32_P(SOC15_REG_OFFSET(VCN, i, regUVD_MASTINT_EN), 0, + ~UVD_MASTINT_EN__VCPU_EN_MASK); + + /* enable LMI MC and UMC channels */ + WREG32_P(SOC15_REG_OFFSET(VCN, i, regUVD_LMI_CTRL2), 0, + ~UVD_LMI_CTRL2__STALL_ARB_UMC_MASK); + + tmp = RREG32_SOC15(VCN, i, regUVD_SOFT_RESET); + tmp &= ~UVD_SOFT_RESET__LMI_SOFT_RESET_MASK; + tmp &= ~UVD_SOFT_RESET__LMI_UMC_SOFT_RESET_MASK; + WREG32_SOC15(VCN, i, regUVD_SOFT_RESET, tmp); + + /* setup regUVD_LMI_CTRL */ + tmp = RREG32_SOC15(VCN, i, regUVD_LMI_CTRL); + WREG32_SOC15(VCN, i, regUVD_LMI_CTRL, tmp | + UVD_LMI_CTRL__WRITE_CLEAN_TIMER_EN_MASK | + UVD_LMI_CTRL__MASK_MC_URGENT_MASK | + UVD_LMI_CTRL__DATA_COHERENCY_EN_MASK | + UVD_LMI_CTRL__VCPU_DATA_COHERENCY_EN_MASK); + + vcn_v5_0_0_mc_resume(adev, i); + + /* VCN global tiling registers */ + WREG32_SOC15(VCN, i, regUVD_GFX10_ADDR_CONFIG, + adev->gfx.config.gb_addr_config); + + /* unblock VCPU register access */ + WREG32_P(SOC15_REG_OFFSET(VCN, i, regUVD_RB_ARB_CTRL), 0, + ~UVD_RB_ARB_CTRL__VCPU_DIS_MASK); + + /* release VCPU reset to boot */ + WREG32_P(SOC15_REG_OFFSET(VCN, i, regUVD_VCPU_CNTL), 0, + ~UVD_VCPU_CNTL__BLK_RST_MASK); + + for (j = 0; j < 10; ++j) { + uint32_t status; + + for (k = 0; k < 100; ++k) { + status = RREG32_SOC15(VCN, i, regUVD_STATUS); + if (status & 2) + break; + mdelay(10); + if (amdgpu_emu_mode == 1) + msleep(1); + } + + if (amdgpu_emu_mode == 1) { + r = -1; + if (status & 2) { + r = 0; + break; + } + } else { + r = 0; + if (status & 2) + break; + + dev_err(adev->dev, + "VCN[%d] is not responding, trying to reset the VCPU!!!\n", i); + WREG32_P(SOC15_REG_OFFSET(VCN, i, regUVD_VCPU_CNTL), + UVD_VCPU_CNTL__BLK_RST_MASK, + ~UVD_VCPU_CNTL__BLK_RST_MASK); + mdelay(10); + WREG32_P(SOC15_REG_OFFSET(VCN, i, regUVD_VCPU_CNTL), 0, + ~UVD_VCPU_CNTL__BLK_RST_MASK); + + mdelay(10); + r = -1; + } + } + + if (r) { + dev_err(adev->dev, "VCN[%d] is not responding, giving up!!!\n", i); + return r; + } + + /* enable master interrupt */ + WREG32_P(SOC15_REG_OFFSET(VCN, i, regUVD_MASTINT_EN), + UVD_MASTINT_EN__VCPU_EN_MASK, + ~UVD_MASTINT_EN__VCPU_EN_MASK); + + /* clear the busy bit of VCN_STATUS */ + WREG32_P(SOC15_REG_OFFSET(VCN, i, regUVD_STATUS), 0, + ~(2 << UVD_STATUS__VCPU_REPORT__SHIFT)); + + ring = &adev->vcn.inst[i].ring_enc[0]; + WREG32_SOC15(VCN, i, regVCN_RB1_DB_CTRL, + ring->doorbell_index << VCN_RB1_DB_CTRL__OFFSET__SHIFT | + VCN_RB1_DB_CTRL__EN_MASK); + + WREG32_SOC15(VCN, i, regUVD_RB_BASE_LO, ring->gpu_addr); + WREG32_SOC15(VCN, i, regUVD_RB_BASE_HI, upper_32_bits(ring->gpu_addr)); + WREG32_SOC15(VCN, i, regUVD_RB_SIZE, ring->ring_size / 4); + + tmp = RREG32_SOC15(VCN, i, regVCN_RB_ENABLE); + tmp &= ~(VCN_RB_ENABLE__RB1_EN_MASK); + WREG32_SOC15(VCN, i, regVCN_RB_ENABLE, tmp); + fw_shared->sq.queue_mode |= FW_QUEUE_RING_RESET; + WREG32_SOC15(VCN, i, regUVD_RB_RPTR, 0); + WREG32_SOC15(VCN, i, regUVD_RB_WPTR, 0); + + tmp = RREG32_SOC15(VCN, i, regUVD_RB_RPTR); + WREG32_SOC15(VCN, i, regUVD_RB_WPTR, tmp); + ring->wptr = RREG32_SOC15(VCN, i, regUVD_RB_WPTR); + + tmp = RREG32_SOC15(VCN, i, regVCN_RB_ENABLE); + tmp |= VCN_RB_ENABLE__RB1_EN_MASK; + WREG32_SOC15(VCN, i, regVCN_RB_ENABLE, tmp); + fw_shared->sq.queue_mode &= ~(FW_QUEUE_RING_RESET | FW_QUEUE_DPG_HOLD_OFF); + } + + return 0; +} + +/** + * vcn_v5_0_0_stop_dpg_mode - VCN stop with dpg mode + * + * @adev: amdgpu_device pointer + * @inst_idx: instance number index + * + * Stop VCN block with dpg mode + */ +static void vcn_v5_0_0_stop_dpg_mode(struct amdgpu_device *adev, int inst_idx) +{ + struct dpg_pause_state state = {.fw_based = VCN_DPG_STATE__UNPAUSE}; + uint32_t tmp; + + vcn_v5_0_0_pause_dpg_mode(adev, inst_idx, &state); + + /* Wait for power status to be 1 */ + SOC15_WAIT_ON_RREG(VCN, inst_idx, regUVD_POWER_STATUS, 1, + UVD_POWER_STATUS__UVD_POWER_STATUS_MASK); + + /* wait for read ptr to be equal to write ptr */ + tmp = RREG32_SOC15(VCN, inst_idx, regUVD_RB_WPTR); + SOC15_WAIT_ON_RREG(VCN, inst_idx, regUVD_RB_RPTR, tmp, 0xFFFFFFFF); + + /* disable dynamic power gating mode */ + WREG32_P(SOC15_REG_OFFSET(VCN, inst_idx, regUVD_POWER_STATUS), 0, + ~UVD_POWER_STATUS__UVD_PG_MODE_MASK); + + return; +} + +/** + * vcn_v5_0_0_stop - VCN stop + * + * @adev: amdgpu_device pointer + * + * Stop VCN block + */ +static int vcn_v5_0_0_stop(struct amdgpu_device *adev) +{ + volatile struct amdgpu_vcn4_fw_shared *fw_shared; + uint32_t tmp; + int i, r = 0; + + for (i = 0; i < adev->vcn.num_vcn_inst; ++i) { + fw_shared = adev->vcn.inst[i].fw_shared.cpu_addr; + fw_shared->sq.queue_mode |= FW_QUEUE_DPG_HOLD_OFF; + + if (adev->pg_flags & AMD_PG_SUPPORT_VCN_DPG) { + vcn_v5_0_0_stop_dpg_mode(adev, i); + continue; + } + + /* wait for vcn idle */ + r = SOC15_WAIT_ON_RREG(VCN, i, regUVD_STATUS, UVD_STATUS__IDLE, 0x7); + if (r) + return r; + + tmp = UVD_LMI_STATUS__VCPU_LMI_WRITE_CLEAN_MASK | + UVD_LMI_STATUS__READ_CLEAN_MASK | + UVD_LMI_STATUS__WRITE_CLEAN_MASK | + UVD_LMI_STATUS__WRITE_CLEAN_RAW_MASK; + r = SOC15_WAIT_ON_RREG(VCN, i, regUVD_LMI_STATUS, tmp, tmp); + if (r) + return r; + + /* disable LMI UMC channel */ + tmp = RREG32_SOC15(VCN, i, regUVD_LMI_CTRL2); + tmp |= UVD_LMI_CTRL2__STALL_ARB_UMC_MASK; + WREG32_SOC15(VCN, i, regUVD_LMI_CTRL2, tmp); + tmp = UVD_LMI_STATUS__UMC_READ_CLEAN_RAW_MASK | + UVD_LMI_STATUS__UMC_WRITE_CLEAN_RAW_MASK; + r = SOC15_WAIT_ON_RREG(VCN, i, regUVD_LMI_STATUS, tmp, tmp); + if (r) + return r; + + /* block VCPU register access */ + WREG32_P(SOC15_REG_OFFSET(VCN, i, regUVD_RB_ARB_CTRL), + UVD_RB_ARB_CTRL__VCPU_DIS_MASK, + ~UVD_RB_ARB_CTRL__VCPU_DIS_MASK); + + /* reset VCPU */ + WREG32_P(SOC15_REG_OFFSET(VCN, i, regUVD_VCPU_CNTL), + UVD_VCPU_CNTL__BLK_RST_MASK, + ~UVD_VCPU_CNTL__BLK_RST_MASK); + + /* disable VCPU clock */ + WREG32_P(SOC15_REG_OFFSET(VCN, i, regUVD_VCPU_CNTL), 0, + ~(UVD_VCPU_CNTL__CLK_EN_MASK)); + + /* apply soft reset */ + tmp = RREG32_SOC15(VCN, i, regUVD_SOFT_RESET); + tmp |= UVD_SOFT_RESET__LMI_UMC_SOFT_RESET_MASK; + WREG32_SOC15(VCN, i, regUVD_SOFT_RESET, tmp); + tmp = RREG32_SOC15(VCN, i, regUVD_SOFT_RESET); + tmp |= UVD_SOFT_RESET__LMI_SOFT_RESET_MASK; + WREG32_SOC15(VCN, i, regUVD_SOFT_RESET, tmp); + + /* clear status */ + WREG32_SOC15(VCN, i, regUVD_STATUS, 0); + + /* enable VCN power gating */ + vcn_v5_0_0_enable_static_power_gating(adev, i); + } + + if (adev->pm.dpm_enabled) + amdgpu_dpm_enable_uvd(adev, false); + + return 0; +} + +/** + * vcn_v5_0_0_pause_dpg_mode - VCN pause with dpg mode + * + * @adev: amdgpu_device pointer + * @inst_idx: instance number index + * @new_state: pause state + * + * Pause dpg mode for VCN block + */ +static int vcn_v5_0_0_pause_dpg_mode(struct amdgpu_device *adev, int inst_idx, + struct dpg_pause_state *new_state) +{ + uint32_t reg_data = 0; + int ret_code; + + /* pause/unpause if state is changed */ + if (adev->vcn.inst[inst_idx].pause_state.fw_based != new_state->fw_based) { + DRM_DEV_DEBUG(adev->dev, "dpg pause state changed %d -> %d", + adev->vcn.inst[inst_idx].pause_state.fw_based, new_state->fw_based); + reg_data = RREG32_SOC15(VCN, inst_idx, regUVD_DPG_PAUSE) & + (~UVD_DPG_PAUSE__NJ_PAUSE_DPG_ACK_MASK); + + if (new_state->fw_based == VCN_DPG_STATE__PAUSE) { + ret_code = SOC15_WAIT_ON_RREG(VCN, inst_idx, regUVD_POWER_STATUS, 0x1, + UVD_POWER_STATUS__UVD_POWER_STATUS_MASK); + + if (!ret_code) { + /* pause DPG */ + reg_data |= UVD_DPG_PAUSE__NJ_PAUSE_DPG_REQ_MASK; + WREG32_SOC15(VCN, inst_idx, regUVD_DPG_PAUSE, reg_data); + + /* wait for ACK */ + SOC15_WAIT_ON_RREG(VCN, inst_idx, regUVD_DPG_PAUSE, + UVD_DPG_PAUSE__NJ_PAUSE_DPG_ACK_MASK, + UVD_DPG_PAUSE__NJ_PAUSE_DPG_ACK_MASK); + } + } else { + /* unpause dpg, no need to wait */ + reg_data &= ~UVD_DPG_PAUSE__NJ_PAUSE_DPG_REQ_MASK; + WREG32_SOC15(VCN, inst_idx, regUVD_DPG_PAUSE, reg_data); + } + adev->vcn.inst[inst_idx].pause_state.fw_based = new_state->fw_based; + } + + return 0; +} + +/** + * vcn_v5_0_0_unified_ring_get_rptr - get unified read pointer + * + * @ring: amdgpu_ring pointer + * + * Returns the current hardware unified read pointer + */ +static uint64_t vcn_v5_0_0_unified_ring_get_rptr(struct amdgpu_ring *ring) +{ + struct amdgpu_device *adev = ring->adev; + + if (ring != &adev->vcn.inst[ring->me].ring_enc[0]) + DRM_ERROR("wrong ring id is identified in %s", __func__); + + return RREG32_SOC15(VCN, ring->me, regUVD_RB_RPTR); +} + +/** + * vcn_v5_0_0_unified_ring_get_wptr - get unified write pointer + * + * @ring: amdgpu_ring pointer + * + * Returns the current hardware unified write pointer + */ +static uint64_t vcn_v5_0_0_unified_ring_get_wptr(struct amdgpu_ring *ring) +{ + struct amdgpu_device *adev = ring->adev; + + if (ring != &adev->vcn.inst[ring->me].ring_enc[0]) + DRM_ERROR("wrong ring id is identified in %s", __func__); + + if (ring->use_doorbell) + return *ring->wptr_cpu_addr; + else + return RREG32_SOC15(VCN, ring->me, regUVD_RB_WPTR); +} + +/** + * vcn_v5_0_0_unified_ring_set_wptr - set enc write pointer + * + * @ring: amdgpu_ring pointer + * + * Commits the enc write pointer to the hardware + */ +static void vcn_v5_0_0_unified_ring_set_wptr(struct amdgpu_ring *ring) +{ + struct amdgpu_device *adev = ring->adev; + + if (ring != &adev->vcn.inst[ring->me].ring_enc[0]) + DRM_ERROR("wrong ring id is identified in %s", __func__); + + if (ring->use_doorbell) { + *ring->wptr_cpu_addr = lower_32_bits(ring->wptr); + WDOORBELL32(ring->doorbell_index, lower_32_bits(ring->wptr)); + } else { + WREG32_SOC15(VCN, ring->me, regUVD_RB_WPTR, lower_32_bits(ring->wptr)); + } +} + +static const struct amdgpu_ring_funcs vcn_v5_0_0_unified_ring_vm_funcs = { + .type = AMDGPU_RING_TYPE_VCN_ENC, + .align_mask = 0x3f, + .nop = VCN_ENC_CMD_NO_OP, + .get_rptr = vcn_v5_0_0_unified_ring_get_rptr, + .get_wptr = vcn_v5_0_0_unified_ring_get_wptr, + .set_wptr = vcn_v5_0_0_unified_ring_set_wptr, + .emit_frame_size = + SOC15_FLUSH_GPU_TLB_NUM_WREG * 3 + + SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 4 + + 4 + /* vcn_v2_0_enc_ring_emit_vm_flush */ + 5 + 5 + /* vcn_v2_0_enc_ring_emit_fence x2 vm fence */ + 1, /* vcn_v2_0_enc_ring_insert_end */ + .emit_ib_size = 5, /* vcn_v2_0_enc_ring_emit_ib */ + .emit_ib = vcn_v2_0_enc_ring_emit_ib, + .emit_fence = vcn_v2_0_enc_ring_emit_fence, + .emit_vm_flush = vcn_v2_0_enc_ring_emit_vm_flush, + .test_ring = amdgpu_vcn_enc_ring_test_ring, + .test_ib = amdgpu_vcn_unified_ring_test_ib, + .insert_nop = amdgpu_ring_insert_nop, + .insert_end = vcn_v2_0_enc_ring_insert_end, + .pad_ib = amdgpu_ring_generic_pad_ib, + .begin_use = amdgpu_vcn_ring_begin_use, + .end_use = amdgpu_vcn_ring_end_use, + .emit_wreg = vcn_v2_0_enc_ring_emit_wreg, + .emit_reg_wait = vcn_v2_0_enc_ring_emit_reg_wait, + .emit_reg_write_reg_wait = amdgpu_ring_emit_reg_write_reg_wait_helper, +}; + +/** + * vcn_v5_0_0_set_unified_ring_funcs - set unified ring functions + * + * @adev: amdgpu_device pointer + * + * Set unified ring functions + */ +static void vcn_v5_0_0_set_unified_ring_funcs(struct amdgpu_device *adev) +{ + int i; + + for (i = 0; i < adev->vcn.num_vcn_inst; ++i) { + if (adev->vcn.harvest_config & (1 << i)) + continue; + + adev->vcn.inst[i].ring_enc[0].funcs = &vcn_v5_0_0_unified_ring_vm_funcs; + adev->vcn.inst[i].ring_enc[0].me = i; + + DRM_INFO("VCN(%d) encode/decode are enabled in VM mode\n", i); + } +} + +/** + * vcn_v5_0_0_is_idle - check VCN block is idle + * + * @handle: amdgpu_device pointer + * + * Check whether VCN block is idle + */ +static bool vcn_v5_0_0_is_idle(void *handle) +{ + struct amdgpu_device *adev = (struct amdgpu_device *)handle; + int i, ret = 1; + + for (i = 0; i < adev->vcn.num_vcn_inst; ++i) { + if (adev->vcn.harvest_config & (1 << i)) + continue; + + ret &= (RREG32_SOC15(VCN, i, regUVD_STATUS) == UVD_STATUS__IDLE); + } + + return ret; +} + +/** + * vcn_v5_0_0_wait_for_idle - wait for VCN block idle + * + * @handle: amdgpu_device pointer + * + * Wait for VCN block idle + */ +static int vcn_v5_0_0_wait_for_idle(void *handle) +{ + struct amdgpu_device *adev = (struct amdgpu_device *)handle; + int i, ret = 0; + + for (i = 0; i < adev->vcn.num_vcn_inst; ++i) { + if (adev->vcn.harvest_config & (1 << i)) + continue; + + ret = SOC15_WAIT_ON_RREG(VCN, i, regUVD_STATUS, UVD_STATUS__IDLE, + UVD_STATUS__IDLE); + if (ret) + return ret; + } + + return ret; +} + +/** + * vcn_v5_0_0_set_clockgating_state - set VCN block clockgating state + * + * @handle: amdgpu_device pointer + * @state: clock gating state + * + * Set VCN block clockgating state + */ +static int vcn_v5_0_0_set_clockgating_state(void *handle, enum amd_clockgating_state state) +{ + struct amdgpu_device *adev = (struct amdgpu_device *)handle; + bool enable = (state == AMD_CG_STATE_GATE) ? true : false; + int i; + + for (i = 0; i < adev->vcn.num_vcn_inst; ++i) { + if (adev->vcn.harvest_config & (1 << i)) + continue; + + if (enable) { + if (RREG32_SOC15(VCN, i, regUVD_STATUS) != UVD_STATUS__IDLE) + return -EBUSY; + vcn_v5_0_0_enable_clock_gating(adev, i); + } else { + vcn_v5_0_0_disable_clock_gating(adev, i); + } + } + + return 0; +} + +/** + * vcn_v5_0_0_set_powergating_state - set VCN block powergating state + * + * @handle: amdgpu_device pointer + * @state: power gating state + * + * Set VCN block powergating state + */ +static int vcn_v5_0_0_set_powergating_state(void *handle, enum amd_powergating_state state) +{ + struct amdgpu_device *adev = (struct amdgpu_device *)handle; + int ret; + + if (state == adev->vcn.cur_state) + return 0; + + if (state == AMD_PG_STATE_GATE) + ret = vcn_v5_0_0_stop(adev); + else + ret = vcn_v5_0_0_start(adev); + + if (!ret) + adev->vcn.cur_state = state; + + return ret; +} + +/** + * vcn_v5_0_0_set_interrupt_state - set VCN block interrupt state + * + * @adev: amdgpu_device pointer + * @source: interrupt sources + * @type: interrupt types + * @state: interrupt states + * + * Set VCN block interrupt state + */ +static int vcn_v5_0_0_set_interrupt_state(struct amdgpu_device *adev, struct amdgpu_irq_src *source, + unsigned type, enum amdgpu_interrupt_state state) +{ + return 0; +} + +/** + * vcn_v5_0_0_process_interrupt - process VCN block interrupt + * + * @adev: amdgpu_device pointer + * @source: interrupt sources + * @entry: interrupt entry from clients and sources + * + * Process VCN block interrupt + */ +static int vcn_v5_0_0_process_interrupt(struct amdgpu_device *adev, struct amdgpu_irq_src *source, + struct amdgpu_iv_entry *entry) +{ + uint32_t ip_instance; + + switch (entry->client_id) { + case SOC15_IH_CLIENTID_VCN: + ip_instance = 0; + break; + case SOC15_IH_CLIENTID_VCN1: + ip_instance = 1; + break; + default: + DRM_ERROR("Unhandled client id: %d\n", entry->client_id); + return 0; + } + + DRM_DEBUG("IH: VCN TRAP\n"); + + switch (entry->src_id) { + case VCN_4_0__SRCID__UVD_ENC_GENERAL_PURPOSE: + amdgpu_fence_process(&adev->vcn.inst[ip_instance].ring_enc[0]); + break; + case VCN_4_0__SRCID_UVD_POISON: + amdgpu_vcn_process_poison_irq(adev, source, entry); + break; + default: + DRM_ERROR("Unhandled interrupt: %d %d\n", + entry->src_id, entry->src_data[0]); + break; + } + + return 0; +} + +static const struct amdgpu_irq_src_funcs vcn_v5_0_0_irq_funcs = { + .set = vcn_v5_0_0_set_interrupt_state, + .process = vcn_v5_0_0_process_interrupt, +}; + +/** + * vcn_v5_0_0_set_irq_funcs - set VCN block interrupt irq functions + * + * @adev: amdgpu_device pointer + * + * Set VCN block interrupt irq functions + */ +static void vcn_v5_0_0_set_irq_funcs(struct amdgpu_device *adev) +{ + int i; + + for (i = 0; i < adev->vcn.num_vcn_inst; ++i) { + if (adev->vcn.harvest_config & (1 << i)) + continue; + + adev->vcn.inst[i].irq.num_types = adev->vcn.num_enc_rings + 1; + adev->vcn.inst[i].irq.funcs = &vcn_v5_0_0_irq_funcs; + } +} + +static const struct amd_ip_funcs vcn_v5_0_0_ip_funcs = { + .name = "vcn_v5_0_0", + .early_init = vcn_v5_0_0_early_init, + .late_init = NULL, + .sw_init = vcn_v5_0_0_sw_init, + .sw_fini = vcn_v5_0_0_sw_fini, + .hw_init = vcn_v5_0_0_hw_init, + .hw_fini = vcn_v5_0_0_hw_fini, + .suspend = vcn_v5_0_0_suspend, + .resume = vcn_v5_0_0_resume, + .is_idle = vcn_v5_0_0_is_idle, + .wait_for_idle = vcn_v5_0_0_wait_for_idle, + .check_soft_reset = NULL, + .pre_soft_reset = NULL, + .soft_reset = NULL, + .post_soft_reset = NULL, + .set_clockgating_state = vcn_v5_0_0_set_clockgating_state, + .set_powergating_state = vcn_v5_0_0_set_powergating_state, +}; + +const struct amdgpu_ip_block_version vcn_v5_0_0_ip_block = { + .type = AMD_IP_BLOCK_TYPE_VCN, + .major = 5, + .minor = 0, + .rev = 0, + .funcs = &vcn_v5_0_0_ip_funcs, +}; diff --git a/drivers/gpu/drm/amd/amdgpu/vcn_v5_0_0.h b/drivers/gpu/drm/amd/amdgpu/vcn_v5_0_0.h new file mode 100644 index 000000000000..51bbccd4360f --- /dev/null +++ b/drivers/gpu/drm/amd/amdgpu/vcn_v5_0_0.h @@ -0,0 +1,37 @@ +/* + * Copyright 2023 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + */ + +#ifndef __VCN_V5_0_0_H__ +#define __VCN_V5_0_0_H__ + +#define VCN_VID_SOC_ADDRESS 0x1FC00 +#define VCN_AON_SOC_ADDRESS 0x1F800 +#define VCN1_VID_SOC_ADDRESS 0x48300 +#define VCN1_AON_SOC_ADDRESS 0x48000 + +#define VCN_VID_IP_ADDRESS 0x0 +#define VCN_AON_IP_ADDRESS 0x30000 + +extern const struct amdgpu_ip_block_version vcn_v5_0_0_ip_block; + +#endif /* __VCN_V5_0_0_H__ */ diff --git a/drivers/gpu/drm/amd/amdgpu/vega10_ih.c b/drivers/gpu/drm/amd/amdgpu/vega10_ih.c index d364c6dd152c..bf68e18e3824 100644 --- a/drivers/gpu/drm/amd/amdgpu/vega10_ih.c +++ b/drivers/gpu/drm/amd/amdgpu/vega10_ih.c @@ -373,6 +373,12 @@ static u32 vega10_ih_get_wptr(struct amdgpu_device *adev, tmp = REG_SET_FIELD(tmp, IH_RB_CNTL, WPTR_OVERFLOW_CLEAR, 1); WREG32_NO_KIQ(ih_regs->ih_rb_cntl, tmp); + /* Unset the CLEAR_OVERFLOW bit immediately so new overflows + * can be detected. + */ + tmp = REG_SET_FIELD(tmp, IH_RB_CNTL, WPTR_OVERFLOW_CLEAR, 0); + WREG32_NO_KIQ(ih_regs->ih_rb_cntl, tmp); + out: return (wptr & ih->ptr_mask); } diff --git a/drivers/gpu/drm/amd/amdgpu/vega20_ih.c b/drivers/gpu/drm/amd/amdgpu/vega20_ih.c index ddfc6941f9d5..b9e785846637 100644 --- a/drivers/gpu/drm/amd/amdgpu/vega20_ih.c +++ b/drivers/gpu/drm/amd/amdgpu/vega20_ih.c @@ -291,27 +291,29 @@ static int vega20_ih_irq_init(struct amdgpu_device *adev) adev->nbio.funcs->ih_control(adev); - if ((amdgpu_ip_version(adev, OSSSYS_HWIP, 0) == IP_VERSION(4, 2, 1)) && - adev->firmware.load_type == AMDGPU_FW_LOAD_DIRECT) { - ih_chicken = RREG32_SOC15(OSSSYS, 0, mmIH_CHICKEN); - if (adev->irq.ih.use_bus_addr) { - ih_chicken = REG_SET_FIELD(ih_chicken, IH_CHICKEN, - MC_SPACE_GPA_ENABLE, 1); + if (!amdgpu_sriov_vf(adev)) { + if ((amdgpu_ip_version(adev, OSSSYS_HWIP, 0) == IP_VERSION(4, 2, 1)) && + adev->firmware.load_type == AMDGPU_FW_LOAD_DIRECT) { + ih_chicken = RREG32_SOC15(OSSSYS, 0, mmIH_CHICKEN); + if (adev->irq.ih.use_bus_addr) { + ih_chicken = REG_SET_FIELD(ih_chicken, IH_CHICKEN, + MC_SPACE_GPA_ENABLE, 1); + } + WREG32_SOC15(OSSSYS, 0, mmIH_CHICKEN, ih_chicken); } - WREG32_SOC15(OSSSYS, 0, mmIH_CHICKEN, ih_chicken); - } - /* psp firmware won't program IH_CHICKEN for aldebaran - * driver needs to program it properly according to - * MC_SPACE type in IH_RB_CNTL */ - if ((amdgpu_ip_version(adev, OSSSYS_HWIP, 0) == IP_VERSION(4, 4, 0)) || - (amdgpu_ip_version(adev, OSSSYS_HWIP, 0) == IP_VERSION(4, 4, 2))) { - ih_chicken = RREG32_SOC15(OSSSYS, 0, mmIH_CHICKEN_ALDEBARAN); - if (adev->irq.ih.use_bus_addr) { - ih_chicken = REG_SET_FIELD(ih_chicken, IH_CHICKEN, - MC_SPACE_GPA_ENABLE, 1); + /* psp firmware won't program IH_CHICKEN for aldebaran + * driver needs to program it properly according to + * MC_SPACE type in IH_RB_CNTL */ + if ((amdgpu_ip_version(adev, OSSSYS_HWIP, 0) == IP_VERSION(4, 4, 0)) || + (amdgpu_ip_version(adev, OSSSYS_HWIP, 0) == IP_VERSION(4, 4, 2))) { + ih_chicken = RREG32_SOC15(OSSSYS, 0, mmIH_CHICKEN_ALDEBARAN); + if (adev->irq.ih.use_bus_addr) { + ih_chicken = REG_SET_FIELD(ih_chicken, IH_CHICKEN, + MC_SPACE_GPA_ENABLE, 1); + } + WREG32_SOC15(OSSSYS, 0, mmIH_CHICKEN_ALDEBARAN, ih_chicken); } - WREG32_SOC15(OSSSYS, 0, mmIH_CHICKEN_ALDEBARAN, ih_chicken); } for (i = 0; i < ARRAY_SIZE(ih); i++) { @@ -421,6 +423,12 @@ static u32 vega20_ih_get_wptr(struct amdgpu_device *adev, tmp = REG_SET_FIELD(tmp, IH_RB_CNTL, WPTR_OVERFLOW_CLEAR, 1); WREG32_NO_KIQ(ih_regs->ih_rb_cntl, tmp); + /* Unset the CLEAR_OVERFLOW bit immediately so new overflows + * can be detected. + */ + tmp = REG_SET_FIELD(tmp, IH_RB_CNTL, WPTR_OVERFLOW_CLEAR, 0); + WREG32_NO_KIQ(ih_regs->ih_rb_cntl, tmp); + out: return (wptr & ih->ptr_mask); } diff --git a/drivers/gpu/drm/amd/amdgpu/vi.c b/drivers/gpu/drm/amd/amdgpu/vi.c index 6a8494f98d3e..2415355b037c 100644 --- a/drivers/gpu/drm/amd/amdgpu/vi.c +++ b/drivers/gpu/drm/amd/amdgpu/vi.c @@ -897,7 +897,7 @@ static int vi_asic_pci_config_reset(struct amdgpu_device *adev) return r; } -static bool vi_asic_supports_baco(struct amdgpu_device *adev) +static int vi_asic_supports_baco(struct amdgpu_device *adev) { switch (adev->asic_type) { case CHIP_FIJI: @@ -908,14 +908,14 @@ static bool vi_asic_supports_baco(struct amdgpu_device *adev) case CHIP_TOPAZ: return amdgpu_dpm_is_baco_supported(adev); default: - return false; + return 0; } } static enum amd_reset_method vi_asic_reset_method(struct amdgpu_device *adev) { - bool baco_reset; + int baco_reset; if (amdgpu_reset_method == AMD_RESET_METHOD_LEGACY || amdgpu_reset_method == AMD_RESET_METHOD_BACO) @@ -935,7 +935,7 @@ vi_asic_reset_method(struct amdgpu_device *adev) baco_reset = amdgpu_dpm_is_baco_supported(adev); break; default: - baco_reset = false; + baco_reset = 0; break; } @@ -1124,11 +1124,10 @@ static void vi_program_aspm(struct amdgpu_device *adev) bool bL1SS = false; bool bClkReqSupport = true; - if (!amdgpu_device_should_use_aspm(adev) || !amdgpu_device_aspm_support_quirk()) + if (!amdgpu_device_should_use_aspm(adev)) return; - if (adev->flags & AMD_IS_APU || - adev->asic_type < CHIP_POLARIS10) + if (adev->asic_type < CHIP_POLARIS10) return; orig = data = RREG32_PCIE(ixPCIE_LC_CNTL); diff --git a/drivers/gpu/drm/amd/amdgpu/vpe_6_1_fw_if.h b/drivers/gpu/drm/amd/amdgpu/vpe_6_1_fw_if.h index 9b550deb48d3..47534dbbd137 100644 --- a/drivers/gpu/drm/amd/amdgpu/vpe_6_1_fw_if.h +++ b/drivers/gpu/drm/amd/amdgpu/vpe_6_1_fw_if.h @@ -40,7 +40,8 @@ enum VPE_CMD_OPCODE { VPE_CMD_OPCODE_POLL_REGMEM = 0x8, VPE_CMD_OPCODE_COND_EXE = 0x9, VPE_CMD_OPCODE_ATOMIC = 0xA, - VPE_CMD_OPCODE_PLANE_FILL = 0xB, + VPE_CMD_OPCODE_PRED_EXE = 0xB, + VPE_CMD_OPCODE_COLLAB_SYNC = 0xC, VPE_CMD_OPCODE_TIMESTAMP = 0xD }; diff --git a/drivers/gpu/drm/amd/amdgpu/vpe_v6_1.c b/drivers/gpu/drm/amd/amdgpu/vpe_v6_1.c index 756f39348dd9..09315dd5a1ec 100644 --- a/drivers/gpu/drm/amd/amdgpu/vpe_v6_1.c +++ b/drivers/gpu/drm/amd/amdgpu/vpe_v6_1.c @@ -33,14 +33,38 @@ #include "vpe/vpe_6_1_0_sh_mask.h" MODULE_FIRMWARE("amdgpu/vpe_6_1_0.bin"); +MODULE_FIRMWARE("amdgpu/vpe_6_1_1.bin"); #define VPE_THREAD1_UCODE_OFFSET 0x8000 +#define regVPEC_COLLABORATE_CNTL 0x0013 +#define regVPEC_COLLABORATE_CNTL_BASE_IDX 0 +#define VPEC_COLLABORATE_CNTL__COLLABORATE_MODE_EN__SHIFT 0x0 +#define VPEC_COLLABORATE_CNTL__COLLABORATE_MODE_EN_MASK 0x00000001L + +#define regVPEC_COLLABORATE_CFG 0x0014 +#define regVPEC_COLLABORATE_CFG_BASE_IDX 0 +#define VPEC_COLLABORATE_CFG__MASTER_ID__SHIFT 0x0 +#define VPEC_COLLABORATE_CFG__MASTER_EN__SHIFT 0x3 +#define VPEC_COLLABORATE_CFG__SLAVE0_ID__SHIFT 0x4 +#define VPEC_COLLABORATE_CFG__SLAVE0_EN__SHIFT 0x7 +#define VPEC_COLLABORATE_CFG__MASTER_ID_MASK 0x00000007L +#define VPEC_COLLABORATE_CFG__MASTER_EN_MASK 0x00000008L +#define VPEC_COLLABORATE_CFG__SLAVE0_ID_MASK 0x00000070L +#define VPEC_COLLABORATE_CFG__SLAVE0_EN_MASK 0x00000080L + +#define regVPEC_CNTL_6_1_1 0x0016 +#define regVPEC_CNTL_6_1_1_BASE_IDX 0 +#define regVPEC_QUEUE_RESET_REQ_6_1_1 0x002c +#define regVPEC_QUEUE_RESET_REQ_6_1_1_BASE_IDX 0 +#define regVPEC_PUB_DUMMY2_6_1_1 0x004c +#define regVPEC_PUB_DUMMY2_6_1_1_BASE_IDX 0 + static uint32_t vpe_v6_1_get_reg_offset(struct amdgpu_vpe *vpe, uint32_t inst, uint32_t offset) { uint32_t base; - base = vpe->ring.adev->reg_offset[VPE_HWIP][0][0]; + base = vpe->ring.adev->reg_offset[VPE_HWIP][inst][0]; return base + offset; } @@ -48,12 +72,14 @@ static uint32_t vpe_v6_1_get_reg_offset(struct amdgpu_vpe *vpe, uint32_t inst, u static void vpe_v6_1_halt(struct amdgpu_vpe *vpe, bool halt) { struct amdgpu_device *adev = vpe->ring.adev; - uint32_t f32_cntl; + uint32_t i, f32_cntl; - f32_cntl = RREG32(vpe_get_reg_offset(vpe, 0, regVPEC_F32_CNTL)); - f32_cntl = REG_SET_FIELD(f32_cntl, VPEC_F32_CNTL, HALT, halt ? 1 : 0); - f32_cntl = REG_SET_FIELD(f32_cntl, VPEC_F32_CNTL, TH1_RESET, halt ? 1 : 0); - WREG32(vpe_get_reg_offset(vpe, 0, regVPEC_F32_CNTL), f32_cntl); + for (i = 0; i < vpe->num_instances; i++) { + f32_cntl = RREG32(vpe_get_reg_offset(vpe, i, regVPEC_F32_CNTL)); + f32_cntl = REG_SET_FIELD(f32_cntl, VPEC_F32_CNTL, HALT, halt ? 1 : 0); + f32_cntl = REG_SET_FIELD(f32_cntl, VPEC_F32_CNTL, TH1_RESET, halt ? 1 : 0); + WREG32(vpe_get_reg_offset(vpe, i, regVPEC_F32_CNTL), f32_cntl); + } } static int vpe_v6_1_irq_init(struct amdgpu_vpe *vpe) @@ -70,20 +96,64 @@ static int vpe_v6_1_irq_init(struct amdgpu_vpe *vpe) return 0; } +static void vpe_v6_1_set_collaborate_mode(struct amdgpu_vpe *vpe, bool enable) +{ + struct amdgpu_device *adev = vpe->ring.adev; + uint32_t vpe_colla_cntl, vpe_colla_cfg, i; + + if (!vpe->collaborate_mode) + return; + + for (i = 0; i < vpe->num_instances; i++) { + vpe_colla_cntl = RREG32(vpe_get_reg_offset(vpe, i, regVPEC_COLLABORATE_CNTL)); + vpe_colla_cntl = REG_SET_FIELD(vpe_colla_cntl, VPEC_COLLABORATE_CNTL, + COLLABORATE_MODE_EN, enable ? 1 : 0); + WREG32(vpe_get_reg_offset(vpe, i, regVPEC_COLLABORATE_CNTL), vpe_colla_cntl); + + vpe_colla_cfg = RREG32(vpe_get_reg_offset(vpe, i, regVPEC_COLLABORATE_CFG)); + vpe_colla_cfg = REG_SET_FIELD(vpe_colla_cfg, VPEC_COLLABORATE_CFG, MASTER_ID, 0); + vpe_colla_cfg = REG_SET_FIELD(vpe_colla_cfg, VPEC_COLLABORATE_CFG, MASTER_EN, enable ? 1 : 0); + vpe_colla_cfg = REG_SET_FIELD(vpe_colla_cfg, VPEC_COLLABORATE_CFG, SLAVE0_ID, 1); + vpe_colla_cfg = REG_SET_FIELD(vpe_colla_cfg, VPEC_COLLABORATE_CFG, SLAVE0_EN, enable ? 1 : 0); + WREG32(vpe_get_reg_offset(vpe, i, regVPEC_COLLABORATE_CFG), vpe_colla_cfg); + } +} + static int vpe_v6_1_load_microcode(struct amdgpu_vpe *vpe) { struct amdgpu_device *adev = vpe->ring.adev; const struct vpe_firmware_header_v1_0 *vpe_hdr; const __le32 *data; uint32_t ucode_offset[2], ucode_size[2]; - uint32_t i, size_dw; + uint32_t i, j, size_dw; uint32_t ret; - // disable UMSCH_INT_ENABLE - ret = RREG32(vpe_get_reg_offset(vpe, 0, regVPEC_CNTL)); - ret = REG_SET_FIELD(ret, VPEC_CNTL, UMSCH_INT_ENABLE, 0); - WREG32(vpe_get_reg_offset(vpe, 0, regVPEC_CNTL), ret); + /* disable UMSCH_INT_ENABLE */ + for (j = 0; j < vpe->num_instances; j++) { + + if (amdgpu_ip_version(adev, VPE_HWIP, 0) == IP_VERSION(6, 1, 1)) + ret = RREG32(vpe_get_reg_offset(vpe, j, regVPEC_CNTL_6_1_1)); + else + ret = RREG32(vpe_get_reg_offset(vpe, j, regVPEC_CNTL)); + + ret = REG_SET_FIELD(ret, VPEC_CNTL, UMSCH_INT_ENABLE, 0); + + if (amdgpu_ip_version(adev, VPE_HWIP, 0) == IP_VERSION(6, 1, 1)) + WREG32(vpe_get_reg_offset(vpe, j, regVPEC_CNTL_6_1_1), ret); + else + WREG32(vpe_get_reg_offset(vpe, j, regVPEC_CNTL), ret); + } + + /* setup collaborate mode */ + vpe_v6_1_set_collaborate_mode(vpe, true); + /* setup DPM */ + if (amdgpu_vpe_configure_dpm(vpe)) + dev_warn(adev->dev, "VPE failed to enable DPM\n"); + /* + * For VPE 6.1.1, still only need to add master's offset, and psp will apply it to slave as well. + * Here use instance 0 as master. + */ if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) { uint32_t f32_offset, f32_cntl; @@ -95,8 +165,7 @@ static int vpe_v6_1_load_microcode(struct amdgpu_vpe *vpe) adev->vpe.cmdbuf_cpu_addr[0] = f32_offset; adev->vpe.cmdbuf_cpu_addr[1] = f32_cntl; - amdgpu_vpe_psp_update_sram(adev); - return 0; + return amdgpu_vpe_psp_update_sram(adev); } vpe_hdr = (const struct vpe_firmware_header_v1_0 *)adev->vpe.fw->data; @@ -110,21 +179,22 @@ static int vpe_v6_1_load_microcode(struct amdgpu_vpe *vpe) vpe_v6_1_halt(vpe, true); - for (i = 0; i < 2; i++) { - if (i > 0) - WREG32(vpe_get_reg_offset(vpe, 0, regVPEC_UCODE_ADDR), VPE_THREAD1_UCODE_OFFSET); - else - WREG32(vpe_get_reg_offset(vpe, 0, regVPEC_UCODE_ADDR), 0); - - data = (const __le32 *)(adev->vpe.fw->data + ucode_offset[i]); - size_dw = ucode_size[i] / sizeof(__le32); - - while (size_dw--) { - if (amdgpu_emu_mode && size_dw % 500 == 0) - msleep(1); - WREG32(vpe_get_reg_offset(vpe, 0, regVPEC_UCODE_DATA), le32_to_cpup(data++)); + for (j = 0; j < vpe->num_instances; j++) { + for (i = 0; i < 2; i++) { + if (i > 0) + WREG32(vpe_get_reg_offset(vpe, j, regVPEC_UCODE_ADDR), VPE_THREAD1_UCODE_OFFSET); + else + WREG32(vpe_get_reg_offset(vpe, j, regVPEC_UCODE_ADDR), 0); + + data = (const __le32 *)(adev->vpe.fw->data + ucode_offset[i]); + size_dw = ucode_size[i] / sizeof(__le32); + + while (size_dw--) { + if (amdgpu_emu_mode && size_dw % 500 == 0) + msleep(1); + WREG32(vpe_get_reg_offset(vpe, j, regVPEC_UCODE_DATA), le32_to_cpup(data++)); + } } - } vpe_v6_1_halt(vpe, false); @@ -136,68 +206,68 @@ static int vpe_v6_1_ring_start(struct amdgpu_vpe *vpe) { struct amdgpu_ring *ring = &vpe->ring; struct amdgpu_device *adev = ring->adev; - uint32_t rb_bufsz, rb_cntl; - uint32_t ib_cntl; uint32_t doorbell, doorbell_offset; + uint32_t rb_bufsz, rb_cntl; + uint32_t ib_cntl, i; int ret; - rb_bufsz = order_base_2(ring->ring_size / 4); - rb_cntl = RREG32(vpe_get_reg_offset(vpe, 0, regVPEC_QUEUE0_RB_CNTL)); - rb_cntl = REG_SET_FIELD(rb_cntl, VPEC_QUEUE0_RB_CNTL, RB_SIZE, rb_bufsz); - rb_cntl = REG_SET_FIELD(rb_cntl, VPEC_QUEUE0_RB_CNTL, RB_PRIV, 1); - rb_cntl = REG_SET_FIELD(rb_cntl, VPEC_QUEUE0_RB_CNTL, RB_VMID, 0); - WREG32(vpe_get_reg_offset(vpe, 0, regVPEC_QUEUE0_RB_CNTL), rb_cntl); - - WREG32(vpe_get_reg_offset(vpe, 0, regVPEC_QUEUE0_RB_RPTR), 0); - WREG32(vpe_get_reg_offset(vpe, 0, regVPEC_QUEUE0_RB_RPTR_HI), 0); - WREG32(vpe_get_reg_offset(vpe, 0, regVPEC_QUEUE0_RB_WPTR), 0); - WREG32(vpe_get_reg_offset(vpe, 0, regVPEC_QUEUE0_RB_WPTR_HI), 0); - - WREG32(vpe_get_reg_offset(vpe, 0, regVPEC_QUEUE0_RB_RPTR_ADDR_LO), - lower_32_bits(ring->rptr_gpu_addr) & 0xFFFFFFFC); - WREG32(vpe_get_reg_offset(vpe, 0, regVPEC_QUEUE0_RB_RPTR_ADDR_HI), - upper_32_bits(ring->rptr_gpu_addr) & 0xFFFFFFFF); - - WREG32(vpe_get_reg_offset(vpe, 0, regVPEC_QUEUE0_RB_BASE), ring->gpu_addr >> 8); - WREG32(vpe_get_reg_offset(vpe, 0, regVPEC_QUEUE0_RB_BASE_HI), ring->gpu_addr >> 40); - - ring->wptr = 0; - - /* before programing wptr to a less value, need set minor_ptr_update first */ - WREG32(vpe_get_reg_offset(vpe, 0, regVPEC_QUEUE0_MINOR_PTR_UPDATE), 1); - - WREG32(vpe_get_reg_offset(vpe, 0, regVPEC_QUEUE0_RB_WPTR), lower_32_bits(ring->wptr) << 2); - WREG32(vpe_get_reg_offset(vpe, 0, regVPEC_QUEUE0_RB_WPTR_HI), upper_32_bits(ring->wptr) << 2); - - /* set minor_ptr_update to 0 after wptr programed */ - WREG32(vpe_get_reg_offset(vpe, 0, regVPEC_QUEUE0_MINOR_PTR_UPDATE), 0); - - doorbell = RREG32(vpe_get_reg_offset(vpe, 0, regVPEC_QUEUE0_DOORBELL)); - doorbell_offset = RREG32(vpe_get_reg_offset(vpe, 0, regVPEC_QUEUE0_DOORBELL_OFFSET)); - - doorbell = REG_SET_FIELD(doorbell, VPEC_QUEUE0_DOORBELL, ENABLE, ring->use_doorbell ? 1 : 0); - doorbell_offset = REG_SET_FIELD(doorbell_offset, VPEC_QUEUE0_DOORBELL_OFFSET, OFFSET, ring->doorbell_index); - - WREG32(vpe_get_reg_offset(vpe, 0, regVPEC_QUEUE0_DOORBELL), doorbell); - WREG32(vpe_get_reg_offset(vpe, 0, regVPEC_QUEUE0_DOORBELL_OFFSET), doorbell_offset); - - adev->nbio.funcs->vpe_doorbell_range(adev, 0, ring->use_doorbell, ring->doorbell_index, 2); - - rb_cntl = REG_SET_FIELD(rb_cntl, VPEC_QUEUE0_RB_CNTL, RPTR_WRITEBACK_ENABLE, 1); - rb_cntl = REG_SET_FIELD(rb_cntl, VPEC_QUEUE0_RB_CNTL, RB_ENABLE, 1); - WREG32(vpe_get_reg_offset(vpe, 0, regVPEC_QUEUE0_RB_CNTL), rb_cntl); - - ib_cntl = RREG32(vpe_get_reg_offset(vpe, 0, regVPEC_QUEUE0_IB_CNTL)); - ib_cntl = REG_SET_FIELD(ib_cntl, VPEC_QUEUE0_IB_CNTL, IB_ENABLE, 1); - WREG32(vpe_get_reg_offset(vpe, 0, regVPEC_QUEUE0_IB_CNTL), ib_cntl); - - ring->sched.ready = true; + for (i = 0; i < vpe->num_instances; i++) { + /* Set ring buffer size in dwords */ + rb_bufsz = order_base_2(ring->ring_size / 4); + rb_cntl = RREG32(vpe_get_reg_offset(vpe, i, regVPEC_QUEUE0_RB_CNTL)); + rb_cntl = REG_SET_FIELD(rb_cntl, VPEC_QUEUE0_RB_CNTL, RB_SIZE, rb_bufsz); + rb_cntl = REG_SET_FIELD(rb_cntl, VPEC_QUEUE0_RB_CNTL, RB_PRIV, 1); + rb_cntl = REG_SET_FIELD(rb_cntl, VPEC_QUEUE0_RB_CNTL, RB_VMID, 0); + WREG32(vpe_get_reg_offset(vpe, i, regVPEC_QUEUE0_RB_CNTL), rb_cntl); + + /* Initialize the ring buffer's read and write pointers */ + WREG32(vpe_get_reg_offset(vpe, i, regVPEC_QUEUE0_RB_RPTR), 0); + WREG32(vpe_get_reg_offset(vpe, i, regVPEC_QUEUE0_RB_RPTR_HI), 0); + WREG32(vpe_get_reg_offset(vpe, i, regVPEC_QUEUE0_RB_WPTR), 0); + WREG32(vpe_get_reg_offset(vpe, i, regVPEC_QUEUE0_RB_WPTR_HI), 0); + + /* set the wb address whether it's enabled or not */ + WREG32(vpe_get_reg_offset(vpe, i, regVPEC_QUEUE0_RB_RPTR_ADDR_LO), + lower_32_bits(ring->rptr_gpu_addr) & 0xFFFFFFFC); + WREG32(vpe_get_reg_offset(vpe, i, regVPEC_QUEUE0_RB_RPTR_ADDR_HI), + upper_32_bits(ring->rptr_gpu_addr) & 0xFFFFFFFF); + + rb_cntl = REG_SET_FIELD(rb_cntl, VPEC_QUEUE0_RB_CNTL, RPTR_WRITEBACK_ENABLE, 1); + + WREG32(vpe_get_reg_offset(vpe, i, regVPEC_QUEUE0_RB_BASE), ring->gpu_addr >> 8); + WREG32(vpe_get_reg_offset(vpe, i, regVPEC_QUEUE0_RB_BASE_HI), ring->gpu_addr >> 40); + + ring->wptr = 0; + + /* before programing wptr to a less value, need set minor_ptr_update first */ + WREG32(vpe_get_reg_offset(vpe, i, regVPEC_QUEUE0_MINOR_PTR_UPDATE), 1); + WREG32(vpe_get_reg_offset(vpe, i, regVPEC_QUEUE0_RB_WPTR), lower_32_bits(ring->wptr) << 2); + WREG32(vpe_get_reg_offset(vpe, i, regVPEC_QUEUE0_RB_WPTR_HI), upper_32_bits(ring->wptr) << 2); + /* set minor_ptr_update to 0 after wptr programed */ + WREG32(vpe_get_reg_offset(vpe, i, regVPEC_QUEUE0_MINOR_PTR_UPDATE), 0); + + doorbell_offset = RREG32(vpe_get_reg_offset(vpe, i, regVPEC_QUEUE0_DOORBELL_OFFSET)); + doorbell_offset = REG_SET_FIELD(doorbell_offset, VPEC_QUEUE0_DOORBELL_OFFSET, OFFSET, ring->doorbell_index + i*4); + WREG32(vpe_get_reg_offset(vpe, i, regVPEC_QUEUE0_DOORBELL_OFFSET), doorbell_offset); + + doorbell = RREG32(vpe_get_reg_offset(vpe, i, regVPEC_QUEUE0_DOORBELL)); + doorbell = REG_SET_FIELD(doorbell, VPEC_QUEUE0_DOORBELL, ENABLE, ring->use_doorbell ? 1 : 0); + WREG32(vpe_get_reg_offset(vpe, i, regVPEC_QUEUE0_DOORBELL), doorbell); + + adev->nbio.funcs->vpe_doorbell_range(adev, i, ring->use_doorbell, ring->doorbell_index + i*4, 4); + + rb_cntl = REG_SET_FIELD(rb_cntl, VPEC_QUEUE0_RB_CNTL, RPTR_WRITEBACK_ENABLE, 1); + rb_cntl = REG_SET_FIELD(rb_cntl, VPEC_QUEUE0_RB_CNTL, RB_ENABLE, 1); + WREG32(vpe_get_reg_offset(vpe, i, regVPEC_QUEUE0_RB_CNTL), rb_cntl); + + ib_cntl = RREG32(vpe_get_reg_offset(vpe, i, regVPEC_QUEUE0_IB_CNTL)); + ib_cntl = REG_SET_FIELD(ib_cntl, VPEC_QUEUE0_IB_CNTL, IB_ENABLE, 1); + WREG32(vpe_get_reg_offset(vpe, i, regVPEC_QUEUE0_IB_CNTL), ib_cntl); + } ret = amdgpu_ring_test_helper(ring); - if (ret) { - ring->sched.ready = false; + if (ret) return ret; - } return 0; } @@ -205,19 +275,34 @@ static int vpe_v6_1_ring_start(struct amdgpu_vpe *vpe) static int vpe_v_6_1_ring_stop(struct amdgpu_vpe *vpe) { struct amdgpu_device *adev = vpe->ring.adev; - uint32_t rb_cntl, ib_cntl; + uint32_t queue_reset, i; + int ret; - rb_cntl = RREG32(vpe_get_reg_offset(vpe, 0, regVPEC_QUEUE0_RB_CNTL)); - rb_cntl = REG_SET_FIELD(rb_cntl, VPEC_QUEUE0_RB_CNTL, RB_ENABLE, 0); - WREG32(vpe_get_reg_offset(vpe, 0, regVPEC_QUEUE0_RB_CNTL), rb_cntl); + for (i = 0; i < vpe->num_instances; i++) { + if (amdgpu_ip_version(adev, VPE_HWIP, 0) == IP_VERSION(6, 1, 1)) + queue_reset = RREG32(vpe_get_reg_offset(vpe, i, regVPEC_QUEUE_RESET_REQ_6_1_1)); + else + queue_reset = RREG32(vpe_get_reg_offset(vpe, i, regVPEC_QUEUE_RESET_REQ)); + + queue_reset = REG_SET_FIELD(queue_reset, VPEC_QUEUE_RESET_REQ, QUEUE0_RESET, 1); + + if (amdgpu_ip_version(adev, VPE_HWIP, 0) == IP_VERSION(6, 1, 1)) { + WREG32(vpe_get_reg_offset(vpe, i, regVPEC_QUEUE_RESET_REQ_6_1_1), queue_reset); + ret = SOC15_WAIT_ON_RREG(VPE, i, regVPEC_QUEUE_RESET_REQ_6_1_1, 0, + VPEC_QUEUE_RESET_REQ__QUEUE0_RESET_MASK); + } else { + WREG32(vpe_get_reg_offset(vpe, i, regVPEC_QUEUE_RESET_REQ), queue_reset); + ret = SOC15_WAIT_ON_RREG(VPE, i, regVPEC_QUEUE_RESET_REQ, 0, + VPEC_QUEUE_RESET_REQ__QUEUE0_RESET_MASK); + } - ib_cntl = RREG32(vpe_get_reg_offset(vpe, 0, regVPEC_QUEUE0_IB_CNTL)); - ib_cntl = REG_SET_FIELD(ib_cntl, VPEC_QUEUE0_IB_CNTL, IB_ENABLE, 0); - WREG32(vpe_get_reg_offset(vpe, 0, regVPEC_QUEUE0_IB_CNTL), ib_cntl); + if (ret) + dev_err(adev->dev, "VPE queue reset failed\n"); + } vpe->ring.sched.ready = false; - return 0; + return ret; } static int vpe_v6_1_set_trap_irq_state(struct amdgpu_device *adev, @@ -228,10 +313,18 @@ static int vpe_v6_1_set_trap_irq_state(struct amdgpu_device *adev, struct amdgpu_vpe *vpe = &adev->vpe; uint32_t vpe_cntl; - vpe_cntl = RREG32(vpe_get_reg_offset(vpe, 0, regVPEC_CNTL)); + if (amdgpu_ip_version(adev, VPE_HWIP, 0) == IP_VERSION(6, 1, 1)) + vpe_cntl = RREG32(vpe_get_reg_offset(vpe, 0, regVPEC_CNTL_6_1_1)); + else + vpe_cntl = RREG32(vpe_get_reg_offset(vpe, 0, regVPEC_CNTL)); + vpe_cntl = REG_SET_FIELD(vpe_cntl, VPEC_CNTL, TRAP_ENABLE, state == AMDGPU_IRQ_STATE_ENABLE ? 1 : 0); - WREG32(vpe_get_reg_offset(vpe, 0, regVPEC_CNTL), vpe_cntl); + + if (amdgpu_ip_version(adev, VPE_HWIP, 0) == IP_VERSION(6, 1, 1)) + WREG32(vpe_get_reg_offset(vpe, 0, regVPEC_CNTL_6_1_1), vpe_cntl); + else + WREG32(vpe_get_reg_offset(vpe, 0, regVPEC_CNTL), vpe_cntl); return 0; } @@ -256,12 +349,27 @@ static int vpe_v6_1_process_trap_irq(struct amdgpu_device *adev, static int vpe_v6_1_set_regs(struct amdgpu_vpe *vpe) { + struct amdgpu_device *adev = container_of(vpe, struct amdgpu_device, vpe); + vpe->regs.queue0_rb_rptr_lo = regVPEC_QUEUE0_RB_RPTR; vpe->regs.queue0_rb_rptr_hi = regVPEC_QUEUE0_RB_RPTR_HI; vpe->regs.queue0_rb_wptr_lo = regVPEC_QUEUE0_RB_WPTR; vpe->regs.queue0_rb_wptr_hi = regVPEC_QUEUE0_RB_WPTR_HI; vpe->regs.queue0_preempt = regVPEC_QUEUE0_PREEMPT; + if (amdgpu_ip_version(adev, VPE_HWIP, 0) == IP_VERSION(6, 1, 1)) + vpe->regs.dpm_enable = regVPEC_PUB_DUMMY2_6_1_1; + else + vpe->regs.dpm_enable = regVPEC_PUB_DUMMY2; + + vpe->regs.dpm_pratio = regVPEC_QUEUE6_DUMMY4; + vpe->regs.dpm_request_interval = regVPEC_QUEUE5_DUMMY3; + vpe->regs.dpm_decision_threshold = regVPEC_QUEUE5_DUMMY4; + vpe->regs.dpm_busy_clamp_threshold = regVPEC_QUEUE7_DUMMY2; + vpe->regs.dpm_idle_clamp_threshold = regVPEC_QUEUE7_DUMMY3; + vpe->regs.dpm_request_lv = regVPEC_QUEUE7_DUMMY1; + vpe->regs.context_indicator = regVPEC_QUEUE6_DUMMY3; + return 0; } |