diff options
Diffstat (limited to 'drivers/gpu/drm/amd/amdgpu')
242 files changed, 16946 insertions, 5044 deletions
diff --git a/drivers/gpu/drm/amd/amdgpu/Kconfig b/drivers/gpu/drm/amd/amdgpu/Kconfig index b91e79c721e2..22d88f8ef527 100644 --- a/drivers/gpu/drm/amd/amdgpu/Kconfig +++ b/drivers/gpu/drm/amd/amdgpu/Kconfig @@ -21,6 +21,7 @@ config DRM_AMDGPU select INTERVAL_TREE select DRM_BUDDY select DRM_SUBALLOC_HELPER + select DRM_EXEC # amdgpu depends on ACPI_VIDEO when ACPI is enabled, for select to work # ACPI_VIDEO's dependencies must also be selected. select INPUT if ACPI diff --git a/drivers/gpu/drm/amd/amdgpu/Makefile b/drivers/gpu/drm/amd/amdgpu/Makefile index 8d16f280b695..2afecc55090f 100644 --- a/drivers/gpu/drm/amd/amdgpu/Makefile +++ b/drivers/gpu/drm/amd/amdgpu/Makefile @@ -62,7 +62,7 @@ subdir-ccflags-$(CONFIG_DRM_AMDGPU_WERROR) += -Werror amdgpu-y := amdgpu_drv.o # add KMS driver -amdgpu-y += amdgpu_device.o amdgpu_kms.o \ +amdgpu-y += amdgpu_device.o amdgpu_doorbell_mgr.o amdgpu_kms.o \ amdgpu_atombios.o atombios_crtc.o amdgpu_connectors.o \ atom.o amdgpu_fence.o amdgpu_ttm.o amdgpu_object.o amdgpu_gart.o \ amdgpu_encoders.o amdgpu_display.o amdgpu_i2c.o \ @@ -98,13 +98,14 @@ amdgpu-y += \ vega20_reg_init.o nbio_v7_4.o nbio_v2_3.o nv.o arct_reg_init.o mxgpu_nv.o \ nbio_v7_2.o hdp_v4_0.o hdp_v5_0.o aldebaran_reg_init.o aldebaran.o soc21.o \ sienna_cichlid.o smu_v13_0_10.o nbio_v4_3.o hdp_v6_0.o nbio_v7_7.o hdp_v5_2.o lsdma_v6_0.o \ - nbio_v7_9.o aqua_vanjaram_reg_init.o + nbio_v7_9.o aqua_vanjaram.o nbio_v7_11.o # add DF block amdgpu-y += \ df_v1_7.o \ df_v3_6.o \ - df_v4_3.o + df_v4_3.o \ + df_v4_6_2.o # add GMC block amdgpu-y += \ @@ -113,11 +114,12 @@ amdgpu-y += \ gfxhub_v1_0.o mmhub_v1_0.o gmc_v9_0.o gfxhub_v1_1.o mmhub_v9_4.o \ gfxhub_v2_0.o mmhub_v2_0.o gmc_v10_0.o gfxhub_v2_1.o mmhub_v2_3.o \ mmhub_v1_7.o gfxhub_v3_0.o mmhub_v3_0.o mmhub_v3_0_2.o gmc_v11_0.o \ - mmhub_v3_0_1.o gfxhub_v3_0_3.o gfxhub_v1_2.o mmhub_v1_8.o + mmhub_v3_0_1.o gfxhub_v3_0_3.o gfxhub_v1_2.o mmhub_v1_8.o mmhub_v3_3.o \ + gfxhub_v11_5_0.o # add UMC block amdgpu-y += \ - umc_v6_0.o umc_v6_1.o umc_v6_7.o umc_v8_7.o umc_v8_10.o + umc_v6_0.o umc_v6_1.o umc_v6_7.o umc_v8_7.o umc_v8_10.o umc_v12_0.o # add IH block amdgpu-y += \ @@ -129,7 +131,8 @@ amdgpu-y += \ vega10_ih.o \ vega20_ih.o \ navi10_ih.o \ - ih_v6_0.o + ih_v6_0.o \ + ih_v6_1.o # add PSP block amdgpu-y += \ @@ -204,14 +207,27 @@ amdgpu-y += \ vcn_v3_0.o \ vcn_v4_0.o \ vcn_v4_0_3.o \ + vcn_v4_0_5.o \ amdgpu_jpeg.o \ jpeg_v1_0.o \ jpeg_v2_0.o \ jpeg_v2_5.o \ jpeg_v3_0.o \ jpeg_v4_0.o \ - jpeg_v4_0_3.o + jpeg_v4_0_3.o \ + jpeg_v4_0_5.o +# add VPE block +amdgpu-y += \ + amdgpu_vpe.o \ + vpe_v6_1.o + +# add UMSCH block +amdgpu-y += \ + amdgpu_umsch_mm.o \ + umsch_mm_v4_0.o + +# # add ATHUB block amdgpu-y += \ athub_v1_0.o \ diff --git a/drivers/gpu/drm/amd/amdgpu/aldebaran.c b/drivers/gpu/drm/amd/amdgpu/aldebaran.c index 2b97b8a96fb4..02f4c6f9d4f6 100644 --- a/drivers/gpu/drm/amd/amdgpu/aldebaran.c +++ b/drivers/gpu/drm/amd/amdgpu/aldebaran.c @@ -35,7 +35,7 @@ static bool aldebaran_is_mode2_default(struct amdgpu_reset_control *reset_ctl) { struct amdgpu_device *adev = (struct amdgpu_device *)reset_ctl->handle; - if ((adev->ip_versions[MP1_HWIP][0] == IP_VERSION(13, 0, 2) && + if ((amdgpu_ip_version(adev, MP1_HWIP, 0) == IP_VERSION(13, 0, 2) && adev->gmc.xgmi.connected_to_cpu)) return true; @@ -48,27 +48,24 @@ aldebaran_get_reset_handler(struct amdgpu_reset_control *reset_ctl, { struct amdgpu_reset_handler *handler; struct amdgpu_device *adev = (struct amdgpu_device *)reset_ctl->handle; + int i; + + if (reset_context->method == AMD_RESET_METHOD_NONE) { + if (aldebaran_is_mode2_default(reset_ctl)) + reset_context->method = AMD_RESET_METHOD_MODE2; + else + reset_context->method = amdgpu_asic_reset_method(adev); + } if (reset_context->method != AMD_RESET_METHOD_NONE) { dev_dbg(adev->dev, "Getting reset handler for method %d\n", reset_context->method); - list_for_each_entry(handler, &reset_ctl->reset_handlers, - handler_list) { + for_each_handler(i, handler, reset_ctl) { if (handler->reset_method == reset_context->method) return handler; } } - if (aldebaran_is_mode2_default(reset_ctl)) { - list_for_each_entry(handler, &reset_ctl->reset_handlers, - handler_list) { - if (handler->reset_method == AMD_RESET_METHOD_MODE2) { - reset_context->method = AMD_RESET_METHOD_MODE2; - return handler; - } - } - } - dev_dbg(adev->dev, "Reset handler not found!\n"); return NULL; @@ -124,9 +121,9 @@ static void aldebaran_async_reset(struct work_struct *work) struct amdgpu_reset_control *reset_ctl = container_of(work, struct amdgpu_reset_control, reset_work); struct amdgpu_device *adev = (struct amdgpu_device *)reset_ctl->handle; + int i; - list_for_each_entry(handler, &reset_ctl->reset_handlers, - handler_list) { + for_each_handler(i, handler, reset_ctl) { if (handler->reset_method == reset_ctl->active_reset) { dev_dbg(adev->dev, "Resetting device\n"); handler->do_reset(adev); @@ -157,7 +154,7 @@ aldebaran_mode2_perform_reset(struct amdgpu_reset_control *reset_ctl, if (reset_device_list == NULL) return -EINVAL; - if (adev->ip_versions[MP1_HWIP][0] == IP_VERSION(13, 0, 2) && + if (amdgpu_ip_version(adev, MP1_HWIP, 0) == IP_VERSION(13, 0, 2) && reset_context->hive == NULL) { /* Wrong context, return error */ return -EINVAL; @@ -338,7 +335,7 @@ aldebaran_mode2_restore_hwcontext(struct amdgpu_reset_control *reset_ctl, if (reset_device_list == NULL) return -EINVAL; - if (reset_context->reset_req_dev->ip_versions[MP1_HWIP][0] == + if (amdgpu_ip_version(reset_context->reset_req_dev, MP1_HWIP, 0) == IP_VERSION(13, 0, 2) && reset_context->hive == NULL) { /* Wrong context, return error */ @@ -395,6 +392,11 @@ static struct amdgpu_reset_handler aldebaran_mode2_handler = { .do_reset = aldebaran_mode2_reset, }; +static struct amdgpu_reset_handler + *aldebaran_rst_handlers[AMDGPU_RESET_MAX_HANDLERS] = { + &aldebaran_mode2_handler, + }; + int aldebaran_reset_init(struct amdgpu_device *adev) { struct amdgpu_reset_control *reset_ctl; @@ -408,10 +410,9 @@ int aldebaran_reset_init(struct amdgpu_device *adev) reset_ctl->active_reset = AMD_RESET_METHOD_NONE; reset_ctl->get_reset_handler = aldebaran_get_reset_handler; - INIT_LIST_HEAD(&reset_ctl->reset_handlers); INIT_WORK(&reset_ctl->reset_work, reset_ctl->async_reset); /* Only mode2 is handled through reset control now */ - amdgpu_reset_add_handler(reset_ctl, &aldebaran_mode2_handler); + reset_ctl->reset_handlers = &aldebaran_rst_handlers; adev->reset_cntl = reset_ctl; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h index 6dc950c1b689..9d92ca157677 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h @@ -53,7 +53,6 @@ #include <drm/ttm/ttm_bo.h> #include <drm/ttm/ttm_placement.h> -#include <drm/ttm/ttm_execbuf_util.h> #include <drm/amdgpu_drm.h> #include <drm/drm_gem.h> @@ -80,6 +79,8 @@ #include "amdgpu_vce.h" #include "amdgpu_vcn.h" #include "amdgpu_jpeg.h" +#include "amdgpu_vpe.h" +#include "amdgpu_umsch_mm.h" #include "amdgpu_gmc.h" #include "amdgpu_gfx.h" #include "amdgpu_sdma.h" @@ -193,7 +194,6 @@ extern int amdgpu_emu_mode; extern uint amdgpu_smu_memory_pool_size; extern int amdgpu_smu_pptable_id; extern uint amdgpu_dc_feature_mask; -extern uint amdgpu_freesync_vid_mode; extern uint amdgpu_dc_debug_mask; extern uint amdgpu_dc_visual_confirm; extern uint amdgpu_dm_abm_level; @@ -244,8 +244,11 @@ extern int amdgpu_num_kcq; #define AMDGPU_VCNFW_LOG_SIZE (32 * 1024) extern int amdgpu_vcnfw_log; extern int amdgpu_sg_display; +extern int amdgpu_umsch_mm; +extern int amdgpu_seamless; extern int amdgpu_user_partt_mode; +extern int amdgpu_agp; #define AMDGPU_VM_MAX_NUM_CTX 4096 #define AMDGPU_SG_THRESHOLD (256*1024*1024) @@ -361,9 +364,6 @@ struct amdgpu_ip_block_version { const struct amd_ip_funcs *funcs; }; -#define HW_REV(_Major, _Minor, _Rev) \ - ((((uint32_t) (_Major)) << 16) | ((uint32_t) (_Minor) << 8) | ((uint32_t) (_Rev))) - struct amdgpu_ip_block { struct amdgpu_ip_block_status status; const struct amdgpu_ip_block_version *version; @@ -625,6 +625,9 @@ typedef void (*amdgpu_wreg_ext_t)(struct amdgpu_device*, uint64_t, uint32_t); typedef uint64_t (*amdgpu_rreg64_t)(struct amdgpu_device*, uint32_t); typedef void (*amdgpu_wreg64_t)(struct amdgpu_device*, uint32_t, uint64_t); +typedef uint64_t (*amdgpu_rreg64_ext_t)(struct amdgpu_device*, uint64_t); +typedef void (*amdgpu_wreg64_ext_t)(struct amdgpu_device*, uint64_t, uint64_t); + typedef uint32_t (*amdgpu_block_rreg_t)(struct amdgpu_device*, uint32_t, uint32_t); typedef void (*amdgpu_block_wreg_t)(struct amdgpu_device*, uint32_t, uint32_t, uint32_t); @@ -656,6 +659,7 @@ enum amd_hw_ip_block_type { JPEG_HWIP = VCN_HWIP, VCN1_HWIP, VCE_HWIP, + VPE_HWIP, DF_HWIP, DCE_HWIP, OSSSYS_HWIP, @@ -675,10 +679,15 @@ enum amd_hw_ip_block_type { #define HWIP_MAX_INSTANCE 44 #define HW_ID_MAX 300 -#define IP_VERSION(mj, mn, rv) (((mj) << 16) | ((mn) << 8) | (rv)) -#define IP_VERSION_MAJ(ver) ((ver) >> 16) -#define IP_VERSION_MIN(ver) (((ver) >> 8) & 0xFF) -#define IP_VERSION_REV(ver) ((ver) & 0xFF) +#define IP_VERSION_FULL(mj, mn, rv, var, srev) \ + (((mj) << 24) | ((mn) << 16) | ((rv) << 8) | ((var) << 4) | (srev)) +#define IP_VERSION(mj, mn, rv) IP_VERSION_FULL(mj, mn, rv, 0, 0) +#define IP_VERSION_MAJ(ver) ((ver) >> 24) +#define IP_VERSION_MIN(ver) (((ver) >> 16) & 0xFF) +#define IP_VERSION_REV(ver) (((ver) >> 8) & 0xFF) +#define IP_VERSION_VARIANT(ver) (((ver) >> 4) & 0xF) +#define IP_VERSION_SUBREV(ver) ((ver) & 0xF) +#define IP_VERSION_MAJ_MIN_REV(ver) ((ver) >> 8) struct amdgpu_ip_map_info { /* Map of logical to actual dev instances/mask */ @@ -759,8 +768,19 @@ struct amdgpu_mqd { #define AMDGPU_RESET_MAGIC_NUM 64 #define AMDGPU_MAX_DF_PERFMONS 4 -#define AMDGPU_PRODUCT_NAME_LEN 64 struct amdgpu_reset_domain; +struct amdgpu_fru_info; + +struct amdgpu_reset_info { + /* reset dump register */ + u32 *reset_dump_reg_list; + u32 *reset_dump_reg_value; + int num_regs; + +#ifdef CONFIG_DEV_COREDUMP + struct amdgpu_coredump_info *coredump_info; +#endif +}; /* * Non-zero (true) if the GPU has VRAM. Zero (false) otherwise. @@ -828,6 +848,8 @@ struct amdgpu_device { amdgpu_wreg_ext_t pcie_wreg_ext; amdgpu_rreg64_t pcie_rreg64; amdgpu_wreg64_t pcie_wreg64; + amdgpu_rreg64_ext_t pcie_rreg64_ext; + amdgpu_wreg64_ext_t pcie_wreg64_ext; /* protects concurrent UVD register access */ spinlock_t uvd_ctx_idx_lock; amdgpu_rreg_t uvd_ctx_rreg; @@ -948,6 +970,13 @@ struct amdgpu_device { /* jpeg */ struct amdgpu_jpeg jpeg; + /* vpe */ + struct amdgpu_vpe vpe; + + /* umsch */ + struct amdgpu_umsch_mm umsch_mm; + bool enable_umsch_mm; + /* firmwares */ struct amdgpu_firmware firmware; @@ -1034,13 +1063,8 @@ struct amdgpu_device { bool has_pr3; bool ucode_sysfs_en; - bool psp_sysfs_en; - - /* Chip product information */ - char product_number[20]; - char product_name[AMDGPU_PRODUCT_NAME_LEN]; - char serial[20]; + struct amdgpu_fru_info *fru_info; atomic_t throttling_logging_enabled; struct ratelimit_state throttling_logging_rs; uint32_t ras_hw_enabled; @@ -1066,15 +1090,7 @@ struct amdgpu_device { struct mutex benchmark_mutex; - /* reset dump register */ - uint32_t *reset_dump_reg_list; - uint32_t *reset_dump_reg_value; - int num_regs; -#ifdef CONFIG_DEV_COREDUMP - struct amdgpu_task_info reset_task_info; - bool reset_vram_lost; - struct timespec64 reset_time; -#endif + struct amdgpu_reset_info reset_info; bool scpm_enabled; uint32_t scpm_status; @@ -1085,8 +1101,29 @@ struct amdgpu_device { bool dc_enabled; /* Mask of active clusters */ uint32_t aid_mask; + + /* Debug */ + bool debug_vm; + bool debug_largebar; + bool debug_disable_soft_recovery; }; +static inline uint32_t amdgpu_ip_version(const struct amdgpu_device *adev, + uint8_t ip, uint8_t inst) +{ + /* This considers only major/minor/rev and ignores + * subrevision/variant fields. + */ + return adev->ip_versions[ip][inst] & ~0xFFU; +} + +static inline uint32_t amdgpu_ip_version_full(const struct amdgpu_device *adev, + uint8_t ip, uint8_t inst) +{ + /* This returns full version - major/minor/rev/variant/subrevision */ + return adev->ip_versions[ip][inst]; +} + static inline struct amdgpu_device *drm_to_adev(struct drm_device *ddev) { return container_of(ddev, struct amdgpu_device, ddev); @@ -1123,13 +1160,20 @@ uint32_t amdgpu_device_rreg(struct amdgpu_device *adev, uint32_t reg, uint32_t acc_flags); u32 amdgpu_device_indirect_rreg_ext(struct amdgpu_device *adev, u64 reg_addr); +uint32_t amdgpu_device_xcc_rreg(struct amdgpu_device *adev, + uint32_t reg, uint32_t acc_flags, + uint32_t xcc_id); void amdgpu_device_wreg(struct amdgpu_device *adev, uint32_t reg, uint32_t v, uint32_t acc_flags); void amdgpu_device_indirect_wreg_ext(struct amdgpu_device *adev, u64 reg_addr, u32 reg_data); +void amdgpu_device_xcc_wreg(struct amdgpu_device *adev, + uint32_t reg, uint32_t v, + uint32_t acc_flags, + uint32_t xcc_id); void amdgpu_mm_wreg_mmio_rlc(struct amdgpu_device *adev, - uint32_t reg, uint32_t v); + uint32_t reg, uint32_t v, uint32_t xcc_id); void amdgpu_mm_wreg8(struct amdgpu_device *adev, uint32_t offset, uint8_t value); uint8_t amdgpu_mm_rreg8(struct amdgpu_device *adev, uint32_t offset); @@ -1137,10 +1181,14 @@ u32 amdgpu_device_indirect_rreg(struct amdgpu_device *adev, u32 reg_addr); u64 amdgpu_device_indirect_rreg64(struct amdgpu_device *adev, u32 reg_addr); +u64 amdgpu_device_indirect_rreg64_ext(struct amdgpu_device *adev, + u64 reg_addr); void amdgpu_device_indirect_wreg(struct amdgpu_device *adev, u32 reg_addr, u32 reg_data); void amdgpu_device_indirect_wreg64(struct amdgpu_device *adev, u32 reg_addr, u64 reg_data); +void amdgpu_device_indirect_wreg64_ext(struct amdgpu_device *adev, + u64 reg_addr, u64 reg_data); u32 amdgpu_device_get_rev_id(struct amdgpu_device *adev); bool amdgpu_device_asic_has_dc_support(enum amd_asic_type asic_type); bool amdgpu_device_has_dc_support(struct amdgpu_device *adev); @@ -1164,8 +1212,8 @@ int emu_soc_asic_init(struct amdgpu_device *adev); #define RREG32_NO_KIQ(reg) amdgpu_device_rreg(adev, (reg), AMDGPU_REGS_NO_KIQ) #define WREG32_NO_KIQ(reg, v) amdgpu_device_wreg(adev, (reg), (v), AMDGPU_REGS_NO_KIQ) -#define RREG32_KIQ(reg) amdgpu_kiq_rreg(adev, (reg)) -#define WREG32_KIQ(reg, v) amdgpu_kiq_wreg(adev, (reg), (v)) +#define RREG32_KIQ(reg) amdgpu_kiq_rreg(adev, (reg), 0) +#define WREG32_KIQ(reg, v) amdgpu_kiq_wreg(adev, (reg), (v), 0) #define RREG8(reg) amdgpu_mm_rreg8(adev, (reg)) #define WREG8(reg, v) amdgpu_mm_wreg8(adev, (reg), (v)) @@ -1175,6 +1223,8 @@ int emu_soc_asic_init(struct amdgpu_device *adev); #define WREG32(reg, v) amdgpu_device_wreg(adev, (reg), (v), 0) #define REG_SET(FIELD, v) (((v) << FIELD##_SHIFT) & FIELD##_MASK) #define REG_GET(FIELD, v) (((v) << FIELD##_SHIFT) & FIELD##_MASK) +#define RREG32_XCC(reg, inst) amdgpu_device_xcc_rreg(adev, (reg), 0, inst) +#define WREG32_XCC(reg, v, inst) amdgpu_device_xcc_wreg(adev, (reg), (v), 0, inst) #define RREG32_PCIE(reg) adev->pcie_rreg(adev, (reg)) #define WREG32_PCIE(reg, v) adev->pcie_wreg(adev, (reg), (v)) #define RREG32_PCIE_PORT(reg) adev->pciep_rreg(adev, (reg)) @@ -1183,6 +1233,8 @@ int emu_soc_asic_init(struct amdgpu_device *adev); #define WREG32_PCIE_EXT(reg, v) adev->pcie_wreg_ext(adev, (reg), (v)) #define RREG64_PCIE(reg) adev->pcie_rreg64(adev, (reg)) #define WREG64_PCIE(reg, v) adev->pcie_wreg64(adev, (reg), (v)) +#define RREG64_PCIE_EXT(reg) adev->pcie_rreg64_ext(adev, (reg)) +#define WREG64_PCIE_EXT(reg, v) adev->pcie_wreg64_ext(adev, (reg), (v)) #define RREG32_SMC(reg) adev->smc_rreg(adev, (reg)) #define WREG32_SMC(reg, v) adev->smc_wreg(adev, (reg), (v)) #define RREG32_UVD_CTX(reg) adev->uvd_ctx_rreg(adev, (reg)) @@ -1278,15 +1330,13 @@ int emu_soc_asic_init(struct amdgpu_device *adev); ((adev)->asic_funcs->update_umd_stable_pstate ? (adev)->asic_funcs->update_umd_stable_pstate((adev), (enter)) : 0) #define amdgpu_asic_query_video_codecs(adev, e, c) (adev)->asic_funcs->query_video_codecs((adev), (e), (c)) -#define amdgpu_inc_vram_lost(adev) atomic_inc(&((adev)->vram_lost_counter)); +#define amdgpu_inc_vram_lost(adev) atomic_inc(&((adev)->vram_lost_counter)) #define BIT_MASK_UPPER(i) ((i) >= BITS_PER_LONG ? 0 : ~0UL << (i)) #define for_each_inst(i, inst_mask) \ for (i = ffs(inst_mask); i-- != 0; \ i = ffs(inst_mask & BIT_MASK_UPPER(i + 1))) -#define MIN(X, Y) ((X) < (Y) ? (X) : (Y)) - /* Common functions */ bool amdgpu_device_has_job_running(struct amdgpu_device *adev); bool amdgpu_device_should_recover_gpu(struct amdgpu_device *adev); @@ -1296,10 +1346,8 @@ int amdgpu_device_gpu_recover(struct amdgpu_device *adev, void amdgpu_device_pci_config_reset(struct amdgpu_device *adev); int amdgpu_device_pci_reset(struct amdgpu_device *adev); bool amdgpu_device_need_post(struct amdgpu_device *adev); -bool amdgpu_sg_display_supported(struct amdgpu_device *adev); -bool amdgpu_device_pcie_dynamic_switching_supported(void); +bool amdgpu_device_seamless_boot_supported(struct amdgpu_device *adev); bool amdgpu_device_should_use_aspm(struct amdgpu_device *adev); -bool amdgpu_device_aspm_support_quirk(void); void amdgpu_cs_report_moved_bytes(struct amdgpu_device *adev, u64 num_bytes, u64 num_vis_bytes); @@ -1371,6 +1419,7 @@ void amdgpu_driver_postclose_kms(struct drm_device *dev, void amdgpu_driver_release_kms(struct drm_device *dev); int amdgpu_device_ip_suspend(struct amdgpu_device *adev); +int amdgpu_device_prepare(struct drm_device *dev); int amdgpu_device_suspend(struct drm_device *dev, bool fbcon); int amdgpu_device_resume(struct drm_device *dev, bool fbcon); u32 amdgpu_get_vblank_counter_kms(struct drm_crtc *crtc); @@ -1508,4 +1557,8 @@ static inline bool amdgpu_is_tmz(struct amdgpu_device *adev) int amdgpu_in_reset(struct amdgpu_device *adev); +extern const struct attribute_group amdgpu_vram_mgr_attr_group; +extern const struct attribute_group amdgpu_gtt_mgr_attr_group; +extern const struct attribute_group amdgpu_flash_attr_group; + #endif diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_acpi.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_acpi.c index 385c6acb5728..2deebece810e 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_acpi.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_acpi.c @@ -68,7 +68,7 @@ struct amdgpu_acpi_xcc_info { struct amdgpu_acpi_dev_info { struct list_head list; struct list_head xcc_list; - uint16_t bdf; + uint32_t sbdf; uint16_t supp_xcp_mode; uint16_t xcp_mode; uint16_t mem_mode; @@ -706,7 +706,7 @@ int amdgpu_acpi_pcie_performance_request(struct amdgpu_device *adev, atcs_input.size = sizeof(struct atcs_pref_req_input); /* client id (bit 2-0: func num, 7-3: dev num, 15-8: bus num) */ - atcs_input.client_id = adev->pdev->devfn | (adev->pdev->bus->number << 8); + atcs_input.client_id = pci_dev_id(adev->pdev); atcs_input.valid_flags_mask = ATCS_VALID_FLAGS_MASK; atcs_input.flags = ATCS_WAIT_FOR_COMPLETION; if (advertise) @@ -776,7 +776,7 @@ int amdgpu_acpi_power_shift_control(struct amdgpu_device *adev, atcs_input.size = sizeof(struct atcs_pwr_shift_input); /* dGPU id (bit 2-0: func num, 7-3: dev num, 15-8: bus num) */ - atcs_input.dgpu_id = adev->pdev->devfn | (adev->pdev->bus->number << 8); + atcs_input.dgpu_id = pci_dev_id(adev->pdev); atcs_input.dev_acpi_state = dev_state; atcs_input.drv_state = drv_state; @@ -868,7 +868,7 @@ static struct amdgpu_numa_info *amdgpu_acpi_get_numa_info(uint32_t pxm) if (!numa_info) { struct sysinfo info; - numa_info = kzalloc(sizeof *numa_info, GFP_KERNEL); + numa_info = kzalloc(sizeof(*numa_info), GFP_KERNEL); if (!numa_info) return NULL; @@ -927,7 +927,7 @@ static acpi_status amdgpu_acpi_get_node_id(acpi_handle handle, #endif } -static struct amdgpu_acpi_dev_info *amdgpu_acpi_get_dev(u16 bdf) +static struct amdgpu_acpi_dev_info *amdgpu_acpi_get_dev(u32 sbdf) { struct amdgpu_acpi_dev_info *acpi_dev; @@ -935,14 +935,14 @@ static struct amdgpu_acpi_dev_info *amdgpu_acpi_get_dev(u16 bdf) return NULL; list_for_each_entry(acpi_dev, &amdgpu_acpi_dev_list, list) - if (acpi_dev->bdf == bdf) + if (acpi_dev->sbdf == sbdf) return acpi_dev; return NULL; } static int amdgpu_acpi_dev_init(struct amdgpu_acpi_dev_info **dev_info, - struct amdgpu_acpi_xcc_info *xcc_info, u16 bdf) + struct amdgpu_acpi_xcc_info *xcc_info, u32 sbdf) { struct amdgpu_acpi_dev_info *tmp; union acpi_object *obj; @@ -955,7 +955,7 @@ static int amdgpu_acpi_dev_init(struct amdgpu_acpi_dev_info **dev_info, INIT_LIST_HEAD(&tmp->xcc_list); INIT_LIST_HEAD(&tmp->list); - tmp->bdf = bdf; + tmp->sbdf = sbdf; obj = acpi_evaluate_dsm_typed(xcc_info->handle, &amd_xcc_dsm_guid, 0, AMD_XCC_DSM_GET_SUPP_MODE, NULL, @@ -1007,7 +1007,7 @@ static int amdgpu_acpi_dev_init(struct amdgpu_acpi_dev_info **dev_info, DRM_DEBUG_DRIVER( "New dev(%x): Supported xcp mode: %x curr xcp_mode : %x mem mode : %x, tmr base: %llx tmr size: %llx ", - tmp->bdf, tmp->supp_xcp_mode, tmp->xcp_mode, tmp->mem_mode, + tmp->sbdf, tmp->supp_xcp_mode, tmp->xcp_mode, tmp->mem_mode, tmp->tmr_base, tmp->tmr_size); list_add_tail(&tmp->list, &amdgpu_acpi_dev_list); *dev_info = tmp; @@ -1023,7 +1023,7 @@ out: } static int amdgpu_acpi_get_xcc_info(struct amdgpu_acpi_xcc_info *xcc_info, - u16 *bdf) + u32 *sbdf) { union acpi_object *obj; acpi_status status; @@ -1054,8 +1054,10 @@ static int amdgpu_acpi_get_xcc_info(struct amdgpu_acpi_xcc_info *xcc_info, xcc_info->phy_id = (obj->integer.value >> 32) & 0xFF; /* xcp node of this xcc [47:40] */ xcc_info->xcp_node = (obj->integer.value >> 40) & 0xFF; + /* PF domain of this xcc [31:16] */ + *sbdf = (obj->integer.value) & 0xFFFF0000; /* PF bus/dev/fn of this xcc [63:48] */ - *bdf = (obj->integer.value >> 48) & 0xFFFF; + *sbdf |= (obj->integer.value >> 48) & 0xFFFF; ACPI_FREE(obj); obj = NULL; @@ -1079,7 +1081,7 @@ static int amdgpu_acpi_enumerate_xcc(void) struct acpi_device *acpi_dev; char hid[ACPI_ID_LEN]; int ret, id; - u16 bdf; + u32 sbdf; INIT_LIST_HEAD(&amdgpu_acpi_dev_list); xa_init(&numa_info_xa); @@ -1107,16 +1109,16 @@ static int amdgpu_acpi_enumerate_xcc(void) xcc_info->handle = acpi_device_handle(acpi_dev); acpi_dev_put(acpi_dev); - ret = amdgpu_acpi_get_xcc_info(xcc_info, &bdf); + ret = amdgpu_acpi_get_xcc_info(xcc_info, &sbdf); if (ret) { kfree(xcc_info); continue; } - dev_info = amdgpu_acpi_get_dev(bdf); + dev_info = amdgpu_acpi_get_dev(sbdf); if (!dev_info) - ret = amdgpu_acpi_dev_init(&dev_info, xcc_info, bdf); + ret = amdgpu_acpi_dev_init(&dev_info, xcc_info, sbdf); if (ret == -ENOMEM) return ret; @@ -1136,13 +1138,14 @@ int amdgpu_acpi_get_tmr_info(struct amdgpu_device *adev, u64 *tmr_offset, u64 *tmr_size) { struct amdgpu_acpi_dev_info *dev_info; - u16 bdf; + u32 sbdf; if (!tmr_offset || !tmr_size) return -EINVAL; - bdf = (adev->pdev->bus->number << 8) | adev->pdev->devfn; - dev_info = amdgpu_acpi_get_dev(bdf); + sbdf = (pci_domain_nr(adev->pdev->bus) << 16); + sbdf |= pci_dev_id(adev->pdev); + dev_info = amdgpu_acpi_get_dev(sbdf); if (!dev_info) return -ENOENT; @@ -1157,13 +1160,14 @@ int amdgpu_acpi_get_mem_info(struct amdgpu_device *adev, int xcc_id, { struct amdgpu_acpi_dev_info *dev_info; struct amdgpu_acpi_xcc_info *xcc_info; - u16 bdf; + u32 sbdf; if (!numa_info) return -EINVAL; - bdf = (adev->pdev->bus->number << 8) | adev->pdev->devfn; - dev_info = amdgpu_acpi_get_dev(bdf); + sbdf = (pci_domain_nr(adev->pdev->bus) << 16); + sbdf |= pci_dev_id(adev->pdev); + dev_info = amdgpu_acpi_get_dev(sbdf); if (!dev_info) return -ENOENT; @@ -1389,14 +1393,11 @@ void amdgpu_acpi_detect(void) struct pci_dev *pdev = NULL; int ret; - while ((pdev = pci_get_class(PCI_CLASS_DISPLAY_VGA << 8, pdev)) != NULL) { - if (!atif->handle) - amdgpu_atif_pci_probe_handle(pdev); - if (!atcs->handle) - amdgpu_atcs_pci_probe_handle(pdev); - } + while ((pdev = pci_get_base_class(PCI_BASE_CLASS_DISPLAY, pdev))) { + if ((pdev->class != PCI_CLASS_DISPLAY_VGA << 8) && + (pdev->class != PCI_CLASS_DISPLAY_OTHER << 8)) + continue; - while ((pdev = pci_get_class(PCI_CLASS_DISPLAY_OTHER << 8, pdev)) != NULL) { if (!atif->handle) amdgpu_atif_pci_probe_handle(pdev); if (!atcs->handle) @@ -1493,6 +1494,9 @@ bool amdgpu_acpi_is_s0ix_active(struct amdgpu_device *adev) if (adev->asic_type < CHIP_RAVEN) return false; + if (!(adev->pm.pp_feature & PP_GFXOFF_MASK)) + return false; + /* * If ACPI_FADT_LOW_POWER_S0 is not set in the FADT, it is generally * risky to do any special firmware-related preparations for entering diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c index b4fcad0e62f7..75dc58470393 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c @@ -28,6 +28,7 @@ #include "amdgpu.h" #include "amdgpu_gfx.h" #include "amdgpu_dma_buf.h" +#include <drm/ttm/ttm_tt.h> #include <linux/module.h> #include <linux/dma-buf.h> #include "amdgpu_xgmi.h" @@ -164,7 +165,7 @@ void amdgpu_amdkfd_device_init(struct amdgpu_device *adev) */ bitmap_complement(gpu_resources.cp_queue_bitmap, adev->gfx.mec_bitmap[0].queue_bitmap, - KGD_MAX_QUEUES); + AMDGPU_MAX_QUEUES); /* According to linux/bitmap.h we shouldn't use bitmap_clear if * nbits is not compile time constant @@ -172,7 +173,7 @@ void amdgpu_amdkfd_device_init(struct amdgpu_device *adev) last_valid_bit = 1 /* only first MEC can have compute queues */ * adev->gfx.mec.num_pipe_per_mec * adev->gfx.mec.num_queue_per_pipe; - for (i = last_valid_bit; i < KGD_MAX_QUEUES; ++i) + for (i = last_valid_bit; i < AMDGPU_MAX_QUEUES; ++i) clear_bit(i, gpu_resources.cp_queue_bitmap); amdgpu_doorbell_get_kfd_info(adev, @@ -226,16 +227,6 @@ void amdgpu_amdkfd_suspend(struct amdgpu_device *adev, bool run_pm) kgd2kfd_suspend(adev->kfd.dev, run_pm); } -int amdgpu_amdkfd_resume_iommu(struct amdgpu_device *adev) -{ - int r = 0; - - if (adev->kfd.dev) - r = kgd2kfd_resume_iommu(adev->kfd.dev); - - return r; -} - int amdgpu_amdkfd_resume(struct amdgpu_device *adev, bool run_pm) { int r = 0; @@ -452,9 +443,7 @@ void amdgpu_amdkfd_get_local_mem_info(struct amdgpu_device *adev, mem_info->local_mem_size_public, mem_info->local_mem_size_private); - if (amdgpu_sriov_vf(adev)) - mem_info->mem_clk_max = adev->clock.default_mclk / 100; - else if (adev->pm.dpm_enabled) { + if (adev->pm.dpm_enabled) { if (amdgpu_emu_mode == 1) mem_info->mem_clk_max = 0; else @@ -473,36 +462,12 @@ uint64_t amdgpu_amdkfd_get_gpu_clock_counter(struct amdgpu_device *adev) uint32_t amdgpu_amdkfd_get_max_engine_clock_in_mhz(struct amdgpu_device *adev) { /* the sclk is in quantas of 10kHz */ - if (amdgpu_sriov_vf(adev)) - return adev->clock.default_sclk / 100; - else if (adev->pm.dpm_enabled) + if (adev->pm.dpm_enabled) return amdgpu_dpm_get_sclk(adev, false) / 100; else return 100; } -void amdgpu_amdkfd_get_cu_info(struct amdgpu_device *adev, struct kfd_cu_info *cu_info) -{ - struct amdgpu_cu_info acu_info = adev->gfx.cu_info; - - memset(cu_info, 0, sizeof(*cu_info)); - if (sizeof(cu_info->cu_bitmap) != sizeof(acu_info.bitmap)) - return; - - cu_info->cu_active_number = acu_info.number; - cu_info->cu_ao_mask = acu_info.ao_cu_mask; - memcpy(&cu_info->cu_bitmap[0], &acu_info.bitmap[0], - sizeof(acu_info.bitmap)); - cu_info->num_shader_engines = adev->gfx.config.max_shader_engines; - cu_info->num_shader_arrays_per_engine = adev->gfx.config.max_sh_per_se; - cu_info->num_cu_per_sh = adev->gfx.config.max_cu_per_sh; - cu_info->simd_per_cu = acu_info.simd_per_cu; - cu_info->max_waves_per_simd = acu_info.max_waves_per_simd; - cu_info->wave_front_size = acu_info.wave_front_size; - cu_info->max_scratch_slots_per_cu = acu_info.max_scratch_slots_per_cu; - cu_info->lds_size = acu_info.lds_size; -} - int amdgpu_amdkfd_get_dmabuf_info(struct amdgpu_device *adev, int dma_buf_fd, struct amdgpu_device **dmabuf_adev, uint64_t *bo_size, void *metadata_buffer, @@ -582,7 +547,7 @@ int amdgpu_amdkfd_get_xgmi_bandwidth_mbytes(struct amdgpu_device *dst, struct amdgpu_device *adev = dst, *peer_adev; int num_links; - if (adev->asic_type != CHIP_ALDEBARAN) + if (amdgpu_ip_version(adev, GC_HWIP, 0) < IP_VERSION(9, 4, 2)) return 0; if (src) @@ -718,12 +683,19 @@ err: void amdgpu_amdkfd_set_compute_idle(struct amdgpu_device *adev, bool idle) { + enum amd_powergating_state state = idle ? AMD_PG_STATE_GATE : AMD_PG_STATE_UNGATE; /* Temporary workaround to fix issues observed in some * compute applications when GFXOFF is enabled on GFX11. */ - if (IP_VERSION_MAJ(adev->ip_versions[GC_HWIP][0]) == 11) { + if (IP_VERSION_MAJ(amdgpu_ip_version(adev, GC_HWIP, 0)) == 11) { pr_debug("GFXOFF is %s\n", idle ? "enabled" : "disabled"); amdgpu_gfx_off_ctrl(adev, idle); + } else if ((IP_VERSION_MAJ(amdgpu_ip_version(adev, GC_HWIP, 0)) == 9) && + (adev->flags & AMD_IS_APU)) { + /* Disable GFXOFF and PG. Temporary workaround + * to fix some compute applications issue on GFX9. + */ + adev->ip_blocks[AMD_IP_BLOCK_TYPE_GFX].version->funcs->set_powergating_state((void *)adev, state); } amdgpu_dpm_switch_power_profile(adev, PP_SMC_POWER_PROFILE_COMPUTE, @@ -819,14 +791,76 @@ void amdgpu_amdkfd_unlock_kfd(struct amdgpu_device *adev) u64 amdgpu_amdkfd_xcp_memory_size(struct amdgpu_device *adev, int xcp_id) { - u64 tmp; s8 mem_id = KFD_XCP_MEM_ID(adev, xcp_id); + u64 tmp; if (adev->gmc.num_mem_partitions && xcp_id >= 0 && mem_id >= 0) { - tmp = adev->gmc.mem_partitions[mem_id].size; + if (adev->gmc.is_app_apu && adev->gmc.num_mem_partitions == 1) { + /* In NPS1 mode, we should restrict the vram reporting + * tied to the ttm_pages_limit which is 1/2 of the system + * memory. For other partition modes, the HBM is uniformly + * divided already per numa node reported. If user wants to + * go beyond the default ttm limit and maximize the ROCm + * allocations, they can go up to max ttm and sysmem limits. + */ + + tmp = (ttm_tt_pages_limit() << PAGE_SHIFT) / num_online_nodes(); + } else { + tmp = adev->gmc.mem_partitions[mem_id].size; + } do_div(tmp, adev->xcp_mgr->num_xcp_per_mem_partition); return ALIGN_DOWN(tmp, PAGE_SIZE); } else { return adev->gmc.real_vram_size; } } + +int amdgpu_amdkfd_unmap_hiq(struct amdgpu_device *adev, u32 doorbell_off, + u32 inst) +{ + struct amdgpu_kiq *kiq = &adev->gfx.kiq[inst]; + struct amdgpu_ring *kiq_ring = &kiq->ring; + struct amdgpu_ring_funcs *ring_funcs; + struct amdgpu_ring *ring; + int r = 0; + + if (!kiq->pmf || !kiq->pmf->kiq_unmap_queues) + return -EINVAL; + + ring_funcs = kzalloc(sizeof(*ring_funcs), GFP_KERNEL); + if (!ring_funcs) + return -ENOMEM; + + ring = kzalloc(sizeof(*ring), GFP_KERNEL); + if (!ring) { + r = -ENOMEM; + goto free_ring_funcs; + } + + ring_funcs->type = AMDGPU_RING_TYPE_COMPUTE; + ring->doorbell_index = doorbell_off; + ring->funcs = ring_funcs; + + spin_lock(&kiq->ring_lock); + + if (amdgpu_ring_alloc(kiq_ring, kiq->pmf->unmap_queues_size)) { + spin_unlock(&kiq->ring_lock); + r = -ENOMEM; + goto free_ring; + } + + kiq->pmf->kiq_unmap_queues(kiq_ring, ring, RESET_QUEUES, 0, 0); + + if (kiq_ring->sched.ready && !adev->job_hang) + r = amdgpu_ring_test_helper(kiq_ring); + + spin_unlock(&kiq->ring_lock); + +free_ring: + kfree(ring); + +free_ring_funcs: + kfree(ring_funcs); + + return r; +} diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h index 2d0406bff84e..dac983da961d 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h @@ -25,6 +25,7 @@ #ifndef AMDGPU_AMDKFD_H_INCLUDED #define AMDGPU_AMDKFD_H_INCLUDED +#include <linux/list.h> #include <linux/types.h> #include <linux/mm.h> #include <linux/kthread.h> @@ -32,7 +33,6 @@ #include <linux/mmu_notifier.h> #include <linux/memremap.h> #include <kgd_kfd_interface.h> -#include <drm/ttm/ttm_execbuf_util.h> #include "amdgpu_sync.h" #include "amdgpu_vm.h" #include "amdgpu_xcp.h" @@ -71,8 +71,7 @@ struct kgd_mem { struct hmm_range *range; struct list_head attachments; /* protected by amdkfd_process_info.lock */ - struct ttm_validate_buffer validate_list; - struct ttm_validate_buffer resv_list; + struct list_head validate_list; uint32_t domain; unsigned int mapped_to_gpu_memory; uint64_t va; @@ -149,7 +148,6 @@ int amdgpu_amdkfd_init(void); void amdgpu_amdkfd_fini(void); void amdgpu_amdkfd_suspend(struct amdgpu_device *adev, bool run_pm); -int amdgpu_amdkfd_resume_iommu(struct amdgpu_device *adev); int amdgpu_amdkfd_resume(struct amdgpu_device *adev, bool run_pm); void amdgpu_amdkfd_interrupt(struct amdgpu_device *adev, const void *ih_ring_entry); @@ -237,8 +235,6 @@ void amdgpu_amdkfd_get_local_mem_info(struct amdgpu_device *adev, uint64_t amdgpu_amdkfd_get_gpu_clock_counter(struct amdgpu_device *adev); uint32_t amdgpu_amdkfd_get_max_engine_clock_in_mhz(struct amdgpu_device *adev); -void amdgpu_amdkfd_get_cu_info(struct amdgpu_device *adev, - struct kfd_cu_info *cu_info); int amdgpu_amdkfd_get_dmabuf_info(struct amdgpu_device *adev, int dma_buf_fd, struct amdgpu_device **dmabuf_adev, uint64_t *bo_size, void *metadata_buffer, @@ -252,6 +248,8 @@ int amdgpu_amdkfd_get_xgmi_bandwidth_mbytes(struct amdgpu_device *dst, int amdgpu_amdkfd_get_pcie_bandwidth_mbytes(struct amdgpu_device *adev, bool is_min); int amdgpu_amdkfd_send_close_event_drain_irq(struct amdgpu_device *adev, uint32_t *payload); +int amdgpu_amdkfd_unmap_hiq(struct amdgpu_device *adev, u32 doorbell_off, + u32 inst); /* Read user wptr from a specified user address space with page fault * disabled. The memory must be pinned and mapped to the hardware when @@ -303,6 +301,7 @@ int amdgpu_amdkfd_gpuvm_map_memory_to_gpu(struct amdgpu_device *adev, struct kgd_mem *mem, void *drm_priv); int amdgpu_amdkfd_gpuvm_unmap_memory_from_gpu( struct amdgpu_device *adev, struct kgd_mem *mem, void *drm_priv); +void amdgpu_amdkfd_gpuvm_dmaunmap_mem(struct kgd_mem *mem, void *drm_priv); int amdgpu_amdkfd_gpuvm_sync_memory( struct amdgpu_device *adev, struct kgd_mem *mem, bool intr); int amdgpu_amdkfd_gpuvm_map_gtt_bo_to_kernel(struct kgd_mem *mem, @@ -398,7 +397,6 @@ bool kgd2kfd_device_init(struct kfd_dev *kfd, const struct kgd2kfd_shared_resources *gpu_resources); void kgd2kfd_device_exit(struct kfd_dev *kfd); void kgd2kfd_suspend(struct kfd_dev *kfd, bool run_pm); -int kgd2kfd_resume_iommu(struct kfd_dev *kfd); int kgd2kfd_resume(struct kfd_dev *kfd, bool run_pm); int kgd2kfd_pre_reset(struct kfd_dev *kfd); int kgd2kfd_post_reset(struct kfd_dev *kfd); @@ -438,11 +436,6 @@ static inline void kgd2kfd_suspend(struct kfd_dev *kfd, bool run_pm) { } -static int __maybe_unused kgd2kfd_resume_iommu(struct kfd_dev *kfd) -{ - return 0; -} - static inline int kgd2kfd_resume(struct kfd_dev *kfd, bool run_pm) { return 0; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_aldebaran.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_aldebaran.c index 60f9e027fb66..aff08321e976 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_aldebaran.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_aldebaran.c @@ -23,6 +23,7 @@ #include "amdgpu_amdkfd.h" #include "amdgpu_amdkfd_arcturus.h" #include "amdgpu_amdkfd_gfx_v9.h" +#include "amdgpu_amdkfd_aldebaran.h" #include "gc/gc_9_4_2_offset.h" #include "gc/gc_9_4_2_sh_mask.h" #include <uapi/linux/kfd_ioctl.h> @@ -36,7 +37,7 @@ * initialize the debug mode registers after it has disabled GFX off during the * debug session. */ -static uint32_t kgd_aldebaran_enable_debug_trap(struct amdgpu_device *adev, +uint32_t kgd_aldebaran_enable_debug_trap(struct amdgpu_device *adev, bool restore_dbg_registers, uint32_t vmid) { @@ -107,7 +108,7 @@ static uint32_t kgd_aldebaran_set_wave_launch_trap_override(struct amdgpu_device return data; } -static uint32_t kgd_aldebaran_set_wave_launch_mode(struct amdgpu_device *adev, +uint32_t kgd_aldebaran_set_wave_launch_mode(struct amdgpu_device *adev, uint8_t wave_launch_mode, uint32_t vmid) { @@ -125,7 +126,8 @@ static uint32_t kgd_gfx_aldebaran_set_address_watch( uint32_t watch_address_mask, uint32_t watch_id, uint32_t watch_mode, - uint32_t debug_vmid) + uint32_t debug_vmid, + uint32_t inst) { uint32_t watch_address_high; uint32_t watch_address_low; @@ -161,12 +163,6 @@ static uint32_t kgd_gfx_aldebaran_set_address_watch( return watch_address_cntl; } -static uint32_t kgd_gfx_aldebaran_clear_address_watch(struct amdgpu_device *adev, - uint32_t watch_id) -{ - return 0; -} - const struct kfd2kgd_calls aldebaran_kfd2kgd = { .program_sh_mem_settings = kgd_gfx_v9_program_sh_mem_settings, .set_pasid_vmid_mapping = kgd_gfx_v9_set_pasid_vmid_mapping, @@ -191,7 +187,7 @@ const struct kfd2kgd_calls aldebaran_kfd2kgd = { .set_wave_launch_trap_override = kgd_aldebaran_set_wave_launch_trap_override, .set_wave_launch_mode = kgd_aldebaran_set_wave_launch_mode, .set_address_watch = kgd_gfx_aldebaran_set_address_watch, - .clear_address_watch = kgd_gfx_aldebaran_clear_address_watch, + .clear_address_watch = kgd_gfx_v9_clear_address_watch, .get_iq_wait_times = kgd_gfx_v9_get_iq_wait_times, .build_grace_period_packet_info = kgd_gfx_v9_build_grace_period_packet_info, .program_trap_handler_settings = kgd_gfx_v9_program_trap_handler_settings, diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_aldebaran.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_aldebaran.h new file mode 100644 index 000000000000..a7bdaf8d82dd --- /dev/null +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_aldebaran.h @@ -0,0 +1,27 @@ +/* + * Copyright 2023 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + */ +uint32_t kgd_aldebaran_enable_debug_trap(struct amdgpu_device *adev, + bool restore_dbg_registers, + uint32_t vmid); +uint32_t kgd_aldebaran_set_wave_launch_mode(struct amdgpu_device *adev, + uint8_t wave_launch_mode, + uint32_t vmid); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gc_9_4_3.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gc_9_4_3.c index 5b4b7f8b92a5..f6598b9e4faa 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gc_9_4_3.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gc_9_4_3.c @@ -22,6 +22,7 @@ #include "amdgpu.h" #include "amdgpu_amdkfd.h" #include "amdgpu_amdkfd_gfx_v9.h" +#include "amdgpu_amdkfd_aldebaran.h" #include "gc/gc_9_4_3_offset.h" #include "gc/gc_9_4_3_sh_mask.h" #include "athub/athub_1_8_0_offset.h" @@ -32,6 +33,7 @@ #include "soc15.h" #include "sdma/sdma_4_4_2_offset.h" #include "sdma/sdma_4_4_2_sh_mask.h" +#include <uapi/linux/kfd_ioctl.h> static inline struct v9_sdma_mqd *get_sdma_mqd(void *mqd) { @@ -298,14 +300,13 @@ static int kgd_gfx_v9_4_3_hqd_load(struct amdgpu_device *adev, void *mqd, hqd_end = SOC15_REG_OFFSET(GC, GET_INST(GC, inst), regCP_HQD_AQL_DISPATCH_ID_HI); for (reg = hqd_base; reg <= hqd_end; reg++) - WREG32_RLC(reg, mqd_hqd[reg - hqd_base]); + WREG32_XCC(reg, mqd_hqd[reg - hqd_base], inst); /* Activate doorbell logic before triggering WPTR poll. */ data = REG_SET_FIELD(m->cp_hqd_pq_doorbell_control, CP_HQD_PQ_DOORBELL_CONTROL, DOORBELL_EN, 1); - WREG32_RLC(SOC15_REG_OFFSET(GC, GET_INST(GC, inst), regCP_HQD_PQ_DOORBELL_CONTROL), - data); + WREG32_SOC15_RLC(GC, GET_INST(GC, inst), regCP_HQD_PQ_DOORBELL_CONTROL, data); if (wptr) { /* Don't read wptr with get_user because the user @@ -334,33 +335,180 @@ static int kgd_gfx_v9_4_3_hqd_load(struct amdgpu_device *adev, void *mqd, guessed_wptr += m->cp_hqd_pq_wptr_lo & ~(queue_size - 1); guessed_wptr += (uint64_t)m->cp_hqd_pq_wptr_hi << 32; - WREG32_RLC(SOC15_REG_OFFSET(GC, GET_INST(GC, inst), regCP_HQD_PQ_WPTR_LO), - lower_32_bits(guessed_wptr)); - WREG32_RLC(SOC15_REG_OFFSET(GC, GET_INST(GC, inst), regCP_HQD_PQ_WPTR_HI), - upper_32_bits(guessed_wptr)); - WREG32_RLC(SOC15_REG_OFFSET(GC, GET_INST(GC, inst), regCP_HQD_PQ_WPTR_POLL_ADDR), - lower_32_bits((uintptr_t)wptr)); - WREG32_RLC(SOC15_REG_OFFSET(GC, GET_INST(GC, inst), - regCP_HQD_PQ_WPTR_POLL_ADDR_HI), + WREG32_SOC15_RLC(GC, GET_INST(GC, inst), regCP_HQD_PQ_WPTR_LO, + lower_32_bits(guessed_wptr)); + WREG32_SOC15_RLC(GC, GET_INST(GC, inst), regCP_HQD_PQ_WPTR_HI, + upper_32_bits(guessed_wptr)); + WREG32_SOC15_RLC(GC, GET_INST(GC, inst), regCP_HQD_PQ_WPTR_POLL_ADDR, + lower_32_bits((uintptr_t)wptr)); + WREG32_SOC15_RLC(GC, GET_INST(GC, inst), regCP_HQD_PQ_WPTR_POLL_ADDR_HI, upper_32_bits((uintptr_t)wptr)); - WREG32(SOC15_REG_OFFSET(GC, GET_INST(GC, inst), regCP_PQ_WPTR_POLL_CNTL1), - (uint32_t)kgd_gfx_v9_get_queue_mask(adev, pipe_id, - queue_id)); + WREG32_SOC15_RLC(GC, GET_INST(GC, inst), regCP_PQ_WPTR_POLL_CNTL1, + (uint32_t)kgd_gfx_v9_get_queue_mask(adev, pipe_id, queue_id)); } /* Start the EOP fetcher */ - WREG32_RLC(SOC15_REG_OFFSET(GC, GET_INST(GC, inst), regCP_HQD_EOP_RPTR), - REG_SET_FIELD(m->cp_hqd_eop_rptr, - CP_HQD_EOP_RPTR, INIT_FETCHER, 1)); + WREG32_SOC15_RLC(GC, GET_INST(GC, inst), regCP_HQD_EOP_RPTR, + REG_SET_FIELD(m->cp_hqd_eop_rptr, CP_HQD_EOP_RPTR, INIT_FETCHER, 1)); data = REG_SET_FIELD(m->cp_hqd_active, CP_HQD_ACTIVE, ACTIVE, 1); - WREG32_RLC(SOC15_REG_OFFSET(GC, GET_INST(GC, inst), regCP_HQD_ACTIVE), data); + WREG32_SOC15_RLC(GC, GET_INST(GC, inst), regCP_HQD_ACTIVE, data); kgd_gfx_v9_release_queue(adev, inst); return 0; } +/* returns TRAP_EN, EXCP_EN and EXCP_REPLACE. */ +static uint32_t kgd_gfx_v9_4_3_disable_debug_trap(struct amdgpu_device *adev, + bool keep_trap_enabled, + uint32_t vmid) +{ + uint32_t data = 0; + + data = REG_SET_FIELD(data, SPI_GDBG_PER_VMID_CNTL, TRAP_EN, 1); + data = REG_SET_FIELD(data, SPI_GDBG_PER_VMID_CNTL, EXCP_EN, 0); + data = REG_SET_FIELD(data, SPI_GDBG_PER_VMID_CNTL, EXCP_REPLACE, 0); + + return data; +} + +static int kgd_gfx_v9_4_3_validate_trap_override_request( + struct amdgpu_device *adev, + uint32_t trap_override, + uint32_t *trap_mask_supported) +{ + *trap_mask_supported &= KFD_DBG_TRAP_MASK_FP_INVALID | + KFD_DBG_TRAP_MASK_FP_INPUT_DENORMAL | + KFD_DBG_TRAP_MASK_FP_DIVIDE_BY_ZERO | + KFD_DBG_TRAP_MASK_FP_OVERFLOW | + KFD_DBG_TRAP_MASK_FP_UNDERFLOW | + KFD_DBG_TRAP_MASK_FP_INEXACT | + KFD_DBG_TRAP_MASK_INT_DIVIDE_BY_ZERO | + KFD_DBG_TRAP_MASK_DBG_ADDRESS_WATCH | + KFD_DBG_TRAP_MASK_DBG_MEMORY_VIOLATION | + KFD_DBG_TRAP_MASK_TRAP_ON_WAVE_START | + KFD_DBG_TRAP_MASK_TRAP_ON_WAVE_END; + + if (trap_override != KFD_DBG_TRAP_OVERRIDE_OR && + trap_override != KFD_DBG_TRAP_OVERRIDE_REPLACE) + return -EPERM; + + return 0; +} + +static uint32_t trap_mask_map_sw_to_hw(uint32_t mask) +{ + uint32_t trap_on_start = (mask & KFD_DBG_TRAP_MASK_TRAP_ON_WAVE_START) ? 1 : 0; + uint32_t trap_on_end = (mask & KFD_DBG_TRAP_MASK_TRAP_ON_WAVE_END) ? 1 : 0; + uint32_t excp_en = mask & (KFD_DBG_TRAP_MASK_FP_INVALID | + KFD_DBG_TRAP_MASK_FP_INPUT_DENORMAL | + KFD_DBG_TRAP_MASK_FP_DIVIDE_BY_ZERO | + KFD_DBG_TRAP_MASK_FP_OVERFLOW | + KFD_DBG_TRAP_MASK_FP_UNDERFLOW | + KFD_DBG_TRAP_MASK_FP_INEXACT | + KFD_DBG_TRAP_MASK_INT_DIVIDE_BY_ZERO | + KFD_DBG_TRAP_MASK_DBG_ADDRESS_WATCH | + KFD_DBG_TRAP_MASK_DBG_MEMORY_VIOLATION); + uint32_t ret; + + ret = REG_SET_FIELD(0, SPI_GDBG_PER_VMID_CNTL, EXCP_EN, excp_en); + ret = REG_SET_FIELD(ret, SPI_GDBG_PER_VMID_CNTL, TRAP_ON_START, trap_on_start); + ret = REG_SET_FIELD(ret, SPI_GDBG_PER_VMID_CNTL, TRAP_ON_END, trap_on_end); + + return ret; +} + +static uint32_t trap_mask_map_hw_to_sw(uint32_t mask) +{ + uint32_t ret = REG_GET_FIELD(mask, SPI_GDBG_PER_VMID_CNTL, EXCP_EN); + + if (REG_GET_FIELD(mask, SPI_GDBG_PER_VMID_CNTL, TRAP_ON_START)) + ret |= KFD_DBG_TRAP_MASK_TRAP_ON_WAVE_START; + + if (REG_GET_FIELD(mask, SPI_GDBG_PER_VMID_CNTL, TRAP_ON_END)) + ret |= KFD_DBG_TRAP_MASK_TRAP_ON_WAVE_END; + + return ret; +} + +/* returns TRAP_EN, EXCP_EN and EXCP_REPLACE. */ +static uint32_t kgd_gfx_v9_4_3_set_wave_launch_trap_override( + struct amdgpu_device *adev, + uint32_t vmid, + uint32_t trap_override, + uint32_t trap_mask_bits, + uint32_t trap_mask_request, + uint32_t *trap_mask_prev, + uint32_t kfd_dbg_trap_cntl_prev) + +{ + uint32_t data = 0; + + *trap_mask_prev = trap_mask_map_hw_to_sw(kfd_dbg_trap_cntl_prev); + + data = (trap_mask_bits & trap_mask_request) | + (*trap_mask_prev & ~trap_mask_request); + data = trap_mask_map_sw_to_hw(data); + + data = REG_SET_FIELD(data, SPI_GDBG_PER_VMID_CNTL, TRAP_EN, 1); + data = REG_SET_FIELD(data, SPI_GDBG_PER_VMID_CNTL, EXCP_REPLACE, trap_override); + + return data; +} + +#define TCP_WATCH_STRIDE (regTCP_WATCH1_ADDR_H - regTCP_WATCH0_ADDR_H) +static uint32_t kgd_gfx_v9_4_3_set_address_watch( + struct amdgpu_device *adev, + uint64_t watch_address, + uint32_t watch_address_mask, + uint32_t watch_id, + uint32_t watch_mode, + uint32_t debug_vmid, + uint32_t inst) +{ + uint32_t watch_address_high; + uint32_t watch_address_low; + uint32_t watch_address_cntl; + + watch_address_cntl = 0; + watch_address_low = lower_32_bits(watch_address); + watch_address_high = upper_32_bits(watch_address) & 0xffff; + + watch_address_cntl = REG_SET_FIELD(watch_address_cntl, + TCP_WATCH0_CNTL, + MODE, + watch_mode); + + watch_address_cntl = REG_SET_FIELD(watch_address_cntl, + TCP_WATCH0_CNTL, + MASK, + watch_address_mask >> 7); + + watch_address_cntl = REG_SET_FIELD(watch_address_cntl, + TCP_WATCH0_CNTL, + VALID, + 1); + + WREG32_XCC((SOC15_REG_OFFSET(GC, GET_INST(GC, inst), + regTCP_WATCH0_ADDR_H) + + (watch_id * TCP_WATCH_STRIDE)), + watch_address_high, inst); + + WREG32_XCC((SOC15_REG_OFFSET(GC, GET_INST(GC, inst), + regTCP_WATCH0_ADDR_L) + + (watch_id * TCP_WATCH_STRIDE)), + watch_address_low, inst); + + return watch_address_cntl; +} + +static uint32_t kgd_gfx_v9_4_3_clear_address_watch(struct amdgpu_device *adev, + uint32_t watch_id) +{ + return 0; +} + const struct kfd2kgd_calls gc_9_4_3_kfd2kgd = { .program_sh_mem_settings = kgd_gfx_v9_program_sh_mem_settings, .set_pasid_vmid_mapping = kgd_gfx_v9_4_3_set_pasid_vmid_mapping, @@ -379,6 +527,19 @@ const struct kfd2kgd_calls gc_9_4_3_kfd2kgd = { kgd_gfx_v9_get_atc_vmid_pasid_mapping_info, .set_vm_context_page_table_base = kgd_gfx_v9_set_vm_context_page_table_base, + .get_cu_occupancy = kgd_gfx_v9_get_cu_occupancy, .program_trap_handler_settings = - kgd_gfx_v9_program_trap_handler_settings + kgd_gfx_v9_program_trap_handler_settings, + .build_grace_period_packet_info = + kgd_gfx_v9_build_grace_period_packet_info, + .get_iq_wait_times = kgd_gfx_v9_get_iq_wait_times, + .enable_debug_trap = kgd_aldebaran_enable_debug_trap, + .disable_debug_trap = kgd_gfx_v9_4_3_disable_debug_trap, + .validate_trap_override_request = + kgd_gfx_v9_4_3_validate_trap_override_request, + .set_wave_launch_trap_override = + kgd_gfx_v9_4_3_set_wave_launch_trap_override, + .set_wave_launch_mode = kgd_aldebaran_set_wave_launch_mode, + .set_address_watch = kgd_gfx_v9_4_3_set_address_watch, + .clear_address_watch = kgd_gfx_v9_4_3_clear_address_watch }; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10.c index 8ad7a7779e14..69810b3f1c63 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10.c @@ -886,7 +886,8 @@ uint32_t kgd_gfx_v10_set_address_watch(struct amdgpu_device *adev, uint32_t watch_address_mask, uint32_t watch_id, uint32_t watch_mode, - uint32_t debug_vmid) + uint32_t debug_vmid, + uint32_t inst) { uint32_t watch_address_high; uint32_t watch_address_low; @@ -968,7 +969,8 @@ uint32_t kgd_gfx_v10_clear_address_watch(struct amdgpu_device *adev, * deq_retry_wait_time -- Wait Count for Global Wave Syncs. */ void kgd_gfx_v10_get_iq_wait_times(struct amdgpu_device *adev, - uint32_t *wait_times) + uint32_t *wait_times, + uint32_t inst) { *wait_times = RREG32(SOC15_REG_OFFSET(GC, 0, mmCP_IQ_WAIT_TIME2)); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10.h index e6b70196071a..67bcaa3d4226 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10.h @@ -44,10 +44,13 @@ uint32_t kgd_gfx_v10_set_address_watch(struct amdgpu_device *adev, uint32_t watch_address_mask, uint32_t watch_id, uint32_t watch_mode, - uint32_t debug_vmid); + uint32_t debug_vmid, + uint32_t inst); uint32_t kgd_gfx_v10_clear_address_watch(struct amdgpu_device *adev, uint32_t watch_id); -void kgd_gfx_v10_get_iq_wait_times(struct amdgpu_device *adev, uint32_t *wait_times); +void kgd_gfx_v10_get_iq_wait_times(struct amdgpu_device *adev, + uint32_t *wait_times, + uint32_t inst); void kgd_gfx_v10_build_grace_period_packet_info(struct amdgpu_device *adev, uint32_t wait_times, uint32_t grace_period, diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v11.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v11.c index 91c3574ebed3..b61a32d6af4b 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v11.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v11.c @@ -637,7 +637,7 @@ static uint32_t kgd_gfx_v11_disable_debug_trap(struct amdgpu_device *adev, { uint32_t data = 0; - data = REG_SET_FIELD(data, SPI_GDBG_PER_VMID_CNTL, TRAP_EN, keep_trap_enabled); + data = REG_SET_FIELD(data, SPI_GDBG_PER_VMID_CNTL, TRAP_EN, 1); data = REG_SET_FIELD(data, SPI_GDBG_PER_VMID_CNTL, EXCP_EN, 0); data = REG_SET_FIELD(data, SPI_GDBG_PER_VMID_CNTL, EXCP_REPLACE, 0); @@ -658,7 +658,7 @@ static int kgd_gfx_v11_validate_trap_override_request(struct amdgpu_device *adev KFD_DBG_TRAP_MASK_DBG_ADDRESS_WATCH | KFD_DBG_TRAP_MASK_DBG_MEMORY_VIOLATION; - if (adev->ip_versions[GC_HWIP][0] >= IP_VERSION(11, 0, 4)) + if (amdgpu_ip_version(adev, GC_HWIP, 0) >= IP_VERSION(11, 0, 4)) *trap_mask_supported |= KFD_DBG_TRAP_MASK_TRAP_ON_WAVE_START | KFD_DBG_TRAP_MASK_TRAP_ON_WAVE_END; @@ -743,7 +743,8 @@ static uint32_t kgd_gfx_v11_set_address_watch(struct amdgpu_device *adev, uint32_t watch_address_mask, uint32_t watch_id, uint32_t watch_mode, - uint32_t debug_vmid) + uint32_t debug_vmid, + uint32_t inst) { uint32_t watch_address_high; uint32_t watch_address_low; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c index 51d93fb13ea3..00fbc0f44c92 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c @@ -91,8 +91,8 @@ void kgd_gfx_v9_program_sh_mem_settings(struct amdgpu_device *adev, uint32_t vmi { kgd_gfx_v9_lock_srbm(adev, 0, 0, 0, vmid, inst); - WREG32_RLC(SOC15_REG_OFFSET(GC, GET_INST(GC, inst), mmSH_MEM_CONFIG), sh_mem_config); - WREG32_RLC(SOC15_REG_OFFSET(GC, GET_INST(GC, inst), mmSH_MEM_BASES), sh_mem_bases); + WREG32_SOC15_RLC(GC, GET_INST(GC, inst), mmSH_MEM_CONFIG, sh_mem_config); + WREG32_SOC15_RLC(GC, GET_INST(GC, inst), mmSH_MEM_BASES, sh_mem_bases); /* APE1 no longer exists on GFX9 */ kgd_gfx_v9_unlock_srbm(adev, inst); @@ -239,14 +239,13 @@ int kgd_gfx_v9_hqd_load(struct amdgpu_device *adev, void *mqd, for (reg = hqd_base; reg <= SOC15_REG_OFFSET(GC, GET_INST(GC, inst), mmCP_HQD_PQ_WPTR_HI); reg++) - WREG32_RLC(reg, mqd_hqd[reg - hqd_base]); + WREG32_XCC(reg, mqd_hqd[reg - hqd_base], inst); /* Activate doorbell logic before triggering WPTR poll. */ data = REG_SET_FIELD(m->cp_hqd_pq_doorbell_control, CP_HQD_PQ_DOORBELL_CONTROL, DOORBELL_EN, 1); - WREG32_RLC(SOC15_REG_OFFSET(GC, GET_INST(GC, inst), mmCP_HQD_PQ_DOORBELL_CONTROL), - data); + WREG32_SOC15_RLC(GC, GET_INST(GC, inst), mmCP_HQD_PQ_DOORBELL_CONTROL, data); if (wptr) { /* Don't read wptr with get_user because the user @@ -275,25 +274,24 @@ int kgd_gfx_v9_hqd_load(struct amdgpu_device *adev, void *mqd, guessed_wptr += m->cp_hqd_pq_wptr_lo & ~(queue_size - 1); guessed_wptr += (uint64_t)m->cp_hqd_pq_wptr_hi << 32; - WREG32_RLC(SOC15_REG_OFFSET(GC, GET_INST(GC, inst), mmCP_HQD_PQ_WPTR_LO), - lower_32_bits(guessed_wptr)); - WREG32_RLC(SOC15_REG_OFFSET(GC, GET_INST(GC, inst), mmCP_HQD_PQ_WPTR_HI), - upper_32_bits(guessed_wptr)); - WREG32_RLC(SOC15_REG_OFFSET(GC, GET_INST(GC, inst), mmCP_HQD_PQ_WPTR_POLL_ADDR), - lower_32_bits((uintptr_t)wptr)); - WREG32_RLC(SOC15_REG_OFFSET(GC, GET_INST(GC, inst), mmCP_HQD_PQ_WPTR_POLL_ADDR_HI), - upper_32_bits((uintptr_t)wptr)); - WREG32_SOC15(GC, GET_INST(GC, inst), mmCP_PQ_WPTR_POLL_CNTL1, - (uint32_t)kgd_gfx_v9_get_queue_mask(adev, pipe_id, queue_id)); + WREG32_SOC15_RLC(GC, GET_INST(GC, inst), mmCP_HQD_PQ_WPTR_LO, + lower_32_bits(guessed_wptr)); + WREG32_SOC15_RLC(GC, GET_INST(GC, inst), mmCP_HQD_PQ_WPTR_HI, + upper_32_bits(guessed_wptr)); + WREG32_SOC15_RLC(GC, GET_INST(GC, inst), mmCP_HQD_PQ_WPTR_POLL_ADDR, + lower_32_bits((uintptr_t)wptr)); + WREG32_SOC15_RLC(GC, GET_INST(GC, inst), mmCP_HQD_PQ_WPTR_POLL_ADDR_HI, + upper_32_bits((uintptr_t)wptr)); + WREG32_SOC15_RLC(GC, GET_INST(GC, inst), mmCP_PQ_WPTR_POLL_CNTL1, + (uint32_t)kgd_gfx_v9_get_queue_mask(adev, pipe_id, queue_id)); } /* Start the EOP fetcher */ - WREG32_RLC(SOC15_REG_OFFSET(GC, GET_INST(GC, inst), mmCP_HQD_EOP_RPTR), - REG_SET_FIELD(m->cp_hqd_eop_rptr, - CP_HQD_EOP_RPTR, INIT_FETCHER, 1)); + WREG32_SOC15_RLC(GC, GET_INST(GC, inst), mmCP_HQD_EOP_RPTR, + REG_SET_FIELD(m->cp_hqd_eop_rptr, CP_HQD_EOP_RPTR, INIT_FETCHER, 1)); data = REG_SET_FIELD(m->cp_hqd_active, CP_HQD_ACTIVE, ACTIVE, 1); - WREG32_RLC(SOC15_REG_OFFSET(GC, GET_INST(GC, inst), mmCP_HQD_ACTIVE), data); + WREG32_SOC15_RLC(GC, GET_INST(GC, inst), mmCP_HQD_ACTIVE, data); kgd_gfx_v9_release_queue(adev, inst); @@ -556,7 +554,7 @@ int kgd_gfx_v9_hqd_destroy(struct amdgpu_device *adev, void *mqd, break; } - WREG32_RLC(SOC15_REG_OFFSET(GC, GET_INST(GC, inst), mmCP_HQD_DEQUEUE_REQUEST), type); + WREG32_SOC15_RLC(GC, GET_INST(GC, inst), mmCP_HQD_DEQUEUE_REQUEST, type); end_jiffies = (utimeout * HZ / 1000) + jiffies; while (true) { @@ -677,7 +675,7 @@ void kgd_gfx_v9_set_wave_launch_stall(struct amdgpu_device *adev, int i; uint32_t data = RREG32(SOC15_REG_OFFSET(GC, 0, mmSPI_GDBG_WAVE_CNTL)); - if (adev->ip_versions[GC_HWIP][0] == IP_VERSION(9, 4, 1)) + if (amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(9, 4, 1)) data = REG_SET_FIELD(data, SPI_GDBG_WAVE_CNTL, STALL_VMID, stall ? 1 << vmid : 0); else @@ -822,7 +820,8 @@ uint32_t kgd_gfx_v9_set_address_watch(struct amdgpu_device *adev, uint32_t watch_address_mask, uint32_t watch_id, uint32_t watch_mode, - uint32_t debug_vmid) + uint32_t debug_vmid, + uint32_t inst) { uint32_t watch_address_high; uint32_t watch_address_low; @@ -903,10 +902,12 @@ uint32_t kgd_gfx_v9_clear_address_watch(struct amdgpu_device *adev, * deq_retry_wait_time -- Wait Count for Global Wave Syncs. */ void kgd_gfx_v9_get_iq_wait_times(struct amdgpu_device *adev, - uint32_t *wait_times) + uint32_t *wait_times, + uint32_t inst) { - *wait_times = RREG32(SOC15_REG_OFFSET(GC, 0, mmCP_IQ_WAIT_TIME2)); + *wait_times = RREG32_SOC15_RLC(GC, GET_INST(GC, inst), + mmCP_IQ_WAIT_TIME2); } void kgd_gfx_v9_set_vm_context_page_table_base(struct amdgpu_device *adev, @@ -1034,7 +1035,7 @@ void kgd_gfx_v9_get_cu_occupancy(struct amdgpu_device *adev, int pasid, int pasid_tmp; int max_queue_cnt; int vmid_wave_cnt = 0; - DECLARE_BITMAP(cp_queue_bitmap, KGD_MAX_QUEUES); + DECLARE_BITMAP(cp_queue_bitmap, AMDGPU_MAX_QUEUES); lock_spi_csq_mutexes(adev); soc15_grbm_select(adev, 1, 0, 0, 0, inst); @@ -1044,7 +1045,7 @@ void kgd_gfx_v9_get_cu_occupancy(struct amdgpu_device *adev, int pasid, * to get number of waves in flight */ bitmap_complement(cp_queue_bitmap, adev->gfx.mec_bitmap[0].queue_bitmap, - KGD_MAX_QUEUES); + AMDGPU_MAX_QUEUES); max_queue_cnt = adev->gfx.mec.num_pipe_per_mec * adev->gfx.mec.num_queue_per_pipe; sh_cnt = adev->gfx.config.max_sh_per_se; @@ -1105,7 +1106,7 @@ void kgd_gfx_v9_build_grace_period_packet_info(struct amdgpu_device *adev, *reg_data = wait_times; /* - * The CP cannont handle a 0 grace period input and will result in + * The CP cannot handle a 0 grace period input and will result in * an infinite grace period being set so set to 1 to prevent this. */ if (grace_period == 0) @@ -1128,9 +1129,9 @@ void kgd_gfx_v9_program_trap_handler_settings(struct amdgpu_device *adev, * Program TBA registers */ WREG32_SOC15(GC, GET_INST(GC, inst), mmSQ_SHADER_TBA_LO, - lower_32_bits(tba_addr >> 8)); + lower_32_bits(tba_addr >> 8)); WREG32_SOC15(GC, GET_INST(GC, inst), mmSQ_SHADER_TBA_HI, - upper_32_bits(tba_addr >> 8)); + upper_32_bits(tba_addr >> 8)); /* * Program TMA registers diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.h index 5f54bff0db49..ce424615f59b 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.h @@ -89,10 +89,13 @@ uint32_t kgd_gfx_v9_set_address_watch(struct amdgpu_device *adev, uint32_t watch_address_mask, uint32_t watch_id, uint32_t watch_mode, - uint32_t debug_vmid); + uint32_t debug_vmid, + uint32_t inst); uint32_t kgd_gfx_v9_clear_address_watch(struct amdgpu_device *adev, uint32_t watch_id); -void kgd_gfx_v9_get_iq_wait_times(struct amdgpu_device *adev, uint32_t *wait_times); +void kgd_gfx_v9_get_iq_wait_times(struct amdgpu_device *adev, + uint32_t *wait_times, + uint32_t inst); void kgd_gfx_v9_build_grace_period_packet_info(struct amdgpu_device *adev, uint32_t wait_times, uint32_t grace_period, diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c index d34c3ef8f3ed..41fbc4fd0fac 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c @@ -27,6 +27,8 @@ #include <linux/sched/task.h> #include <drm/ttm/ttm_tt.h> +#include <drm/drm_exec.h> + #include "amdgpu_object.h" #include "amdgpu_gem.h" #include "amdgpu_vm.h" @@ -37,12 +39,12 @@ #include "amdgpu_xgmi.h" #include "kfd_priv.h" #include "kfd_smi_events.h" -#include <drm/ttm/ttm_tt.h> /* Userptr restore delay, just long enough to allow consecutive VM * changes to accumulate */ #define AMDGPU_USERPTR_RESTORE_DELAY_MS 1 +#define AMDGPU_RESERVE_MEM_LIMIT (3UL << 29) /* * Align VRAM availability to 2MB to avoid fragmentation caused by 4K allocations in the tail 2MB @@ -116,11 +118,16 @@ void amdgpu_amdkfd_gpuvm_init_mem_limits(void) return; si_meminfo(&si); - mem = si.freeram - si.freehigh; + mem = si.totalram - si.totalhigh; mem *= si.mem_unit; spin_lock_init(&kfd_mem_limit.mem_limit_lock); - kfd_mem_limit.max_system_mem_limit = mem - (mem >> 4); + kfd_mem_limit.max_system_mem_limit = mem - (mem >> 6); + if (kfd_mem_limit.max_system_mem_limit < 2 * AMDGPU_RESERVE_MEM_LIMIT) + kfd_mem_limit.max_system_mem_limit >>= 1; + else + kfd_mem_limit.max_system_mem_limit -= AMDGPU_RESERVE_MEM_LIMIT; + kfd_mem_limit.max_ttm_mem_limit = ttm_tt_pages_limit() << PAGE_SHIFT; pr_debug("Kernel memory limit %lluM, TTM limit %lluM\n", (kfd_mem_limit.max_system_mem_limit >> 20), @@ -418,6 +425,32 @@ validate_fail: return ret; } +static int amdgpu_amdkfd_bo_validate_and_fence(struct amdgpu_bo *bo, + uint32_t domain, + struct dma_fence *fence) +{ + int ret = amdgpu_bo_reserve(bo, false); + + if (ret) + return ret; + + ret = amdgpu_amdkfd_bo_validate(bo, domain, true); + if (ret) + goto unreserve_out; + + ret = dma_resv_reserve_fences(bo->tbo.base.resv, 1); + if (ret) + goto unreserve_out; + + dma_resv_add_fence(bo->tbo.base.resv, fence, + DMA_RESV_USAGE_BOOKKEEP); + +unreserve_out: + amdgpu_bo_unreserve(bo); + + return ret; +} + static int amdgpu_amdkfd_validate_vm_bo(void *_unused, struct amdgpu_bo *bo) { return amdgpu_amdkfd_bo_validate(bo, bo->allowed_domains, false); @@ -732,7 +765,7 @@ kfd_mem_dmaunmap_sg_bo(struct kgd_mem *mem, enum dma_data_direction dir; if (unlikely(!ttm->sg)) { - pr_err("SG Table of BO is UNEXPECTEDLY NULL"); + pr_debug("SG Table of BO is NULL"); return; } @@ -827,6 +860,7 @@ static int kfd_mem_attach(struct amdgpu_device *adev, struct kgd_mem *mem, uint64_t va = mem->va; struct kfd_mem_attachment *attachment[2] = {NULL, NULL}; struct amdgpu_bo *bo[2] = {NULL, NULL}; + struct amdgpu_bo_va *bo_va; bool same_hive = false; int i, ret; @@ -865,9 +899,10 @@ static int kfd_mem_attach(struct amdgpu_device *adev, struct kgd_mem *mem, if ((adev == bo_adev && !(mem->alloc_flags & KFD_IOC_ALLOC_MEM_FLAGS_MMIO_REMAP)) || (amdgpu_ttm_tt_get_usermm(mem->bo->tbo.ttm) && reuse_dmamap(adev, bo_adev)) || - same_hive) { + (mem->domain == AMDGPU_GEM_DOMAIN_GTT && reuse_dmamap(adev, bo_adev)) || + same_hive) { /* Mappings on the local GPU, or VRAM mappings in the - * local hive, or userptr mapping can reuse dma map + * local hive, or userptr, or GTT mapping can reuse dma map * address space share the original BO */ attachment[i]->type = KFD_MEM_ATT_SHARED; @@ -913,7 +948,12 @@ static int kfd_mem_attach(struct amdgpu_device *adev, struct kgd_mem *mem, pr_debug("Unable to reserve BO during memory attach"); goto unwind; } - attachment[i]->bo_va = amdgpu_vm_bo_add(adev, vm, bo[i]); + bo_va = amdgpu_vm_bo_find(vm, bo[i]); + if (!bo_va) + bo_va = amdgpu_vm_bo_add(adev, vm, bo[i]); + else + ++bo_va->ref_count; + attachment[i]->bo_va = bo_va; amdgpu_bo_unreserve(bo[i]); if (unlikely(!attachment[i]->bo_va)) { ret = -ENOMEM; @@ -937,7 +977,8 @@ unwind: continue; if (attachment[i]->bo_va) { amdgpu_bo_reserve(bo[i], true); - amdgpu_vm_bo_del(adev, attachment[i]->bo_va); + if (--attachment[i]->bo_va->ref_count == 0) + amdgpu_vm_bo_del(adev, attachment[i]->bo_va); amdgpu_bo_unreserve(bo[i]); list_del(&attachment[i]->list); } @@ -954,7 +995,8 @@ static void kfd_mem_detach(struct kfd_mem_attachment *attachment) pr_debug("\t remove VA 0x%llx in entry %p\n", attachment->va, attachment); - amdgpu_vm_bo_del(attachment->adev, attachment->bo_va); + if (--attachment->bo_va->ref_count == 0) + amdgpu_vm_bo_del(attachment->adev, attachment->bo_va); drm_gem_object_put(&bo->tbo.base); list_del(&attachment->list); kfree(attachment); @@ -964,28 +1006,20 @@ static void add_kgd_mem_to_kfd_bo_list(struct kgd_mem *mem, struct amdkfd_process_info *process_info, bool userptr) { - struct ttm_validate_buffer *entry = &mem->validate_list; - struct amdgpu_bo *bo = mem->bo; - - INIT_LIST_HEAD(&entry->head); - entry->num_shared = 1; - entry->bo = &bo->tbo; mutex_lock(&process_info->lock); if (userptr) - list_add_tail(&entry->head, &process_info->userptr_valid_list); + list_add_tail(&mem->validate_list, + &process_info->userptr_valid_list); else - list_add_tail(&entry->head, &process_info->kfd_bo_list); + list_add_tail(&mem->validate_list, &process_info->kfd_bo_list); mutex_unlock(&process_info->lock); } static void remove_kgd_mem_from_kfd_bo_list(struct kgd_mem *mem, struct amdkfd_process_info *process_info) { - struct ttm_validate_buffer *bo_list_entry; - - bo_list_entry = &mem->validate_list; mutex_lock(&process_info->lock); - list_del(&bo_list_entry->head); + list_del(&mem->validate_list); mutex_unlock(&process_info->lock); } @@ -1072,13 +1106,12 @@ out: * object can track VM updates. */ struct bo_vm_reservation_context { - struct amdgpu_bo_list_entry kfd_bo; /* BO list entry for the KFD BO */ - unsigned int n_vms; /* Number of VMs reserved */ - struct amdgpu_bo_list_entry *vm_pd; /* Array of VM BO list entries */ - struct ww_acquire_ctx ticket; /* Reservation ticket */ - struct list_head list, duplicates; /* BO lists */ - struct amdgpu_sync *sync; /* Pointer to sync object */ - bool reserved; /* Whether BOs are reserved */ + /* DRM execution context for the reservation */ + struct drm_exec exec; + /* Number of VMs reserved */ + unsigned int n_vms; + /* Pointer to sync object */ + struct amdgpu_sync *sync; }; enum bo_vm_match { @@ -1102,35 +1135,26 @@ static int reserve_bo_and_vm(struct kgd_mem *mem, WARN_ON(!vm); - ctx->reserved = false; ctx->n_vms = 1; ctx->sync = &mem->sync; - - INIT_LIST_HEAD(&ctx->list); - INIT_LIST_HEAD(&ctx->duplicates); - - ctx->vm_pd = kcalloc(ctx->n_vms, sizeof(*ctx->vm_pd), GFP_KERNEL); - if (!ctx->vm_pd) - return -ENOMEM; - - ctx->kfd_bo.priority = 0; - ctx->kfd_bo.tv.bo = &bo->tbo; - ctx->kfd_bo.tv.num_shared = 1; - list_add(&ctx->kfd_bo.tv.head, &ctx->list); - - amdgpu_vm_get_pd_bo(vm, &ctx->list, &ctx->vm_pd[0]); - - ret = ttm_eu_reserve_buffers(&ctx->ticket, &ctx->list, - false, &ctx->duplicates); - if (ret) { - pr_err("Failed to reserve buffers in ttm.\n"); - kfree(ctx->vm_pd); - ctx->vm_pd = NULL; - return ret; + drm_exec_init(&ctx->exec, DRM_EXEC_INTERRUPTIBLE_WAIT); + drm_exec_until_all_locked(&ctx->exec) { + ret = amdgpu_vm_lock_pd(vm, &ctx->exec, 2); + drm_exec_retry_on_contention(&ctx->exec); + if (unlikely(ret)) + goto error; + + ret = drm_exec_prepare_obj(&ctx->exec, &bo->tbo.base, 1); + drm_exec_retry_on_contention(&ctx->exec); + if (unlikely(ret)) + goto error; } - - ctx->reserved = true; return 0; + +error: + pr_err("Failed to reserve buffers in ttm.\n"); + drm_exec_fini(&ctx->exec); + return ret; } /** @@ -1147,63 +1171,39 @@ static int reserve_bo_and_cond_vms(struct kgd_mem *mem, struct amdgpu_vm *vm, enum bo_vm_match map_type, struct bo_vm_reservation_context *ctx) { - struct amdgpu_bo *bo = mem->bo; struct kfd_mem_attachment *entry; - unsigned int i; + struct amdgpu_bo *bo = mem->bo; int ret; - ctx->reserved = false; - ctx->n_vms = 0; - ctx->vm_pd = NULL; ctx->sync = &mem->sync; + drm_exec_init(&ctx->exec, DRM_EXEC_INTERRUPTIBLE_WAIT); + drm_exec_until_all_locked(&ctx->exec) { + ctx->n_vms = 0; + list_for_each_entry(entry, &mem->attachments, list) { + if ((vm && vm != entry->bo_va->base.vm) || + (entry->is_mapped != map_type + && map_type != BO_VM_ALL)) + continue; - INIT_LIST_HEAD(&ctx->list); - INIT_LIST_HEAD(&ctx->duplicates); - - list_for_each_entry(entry, &mem->attachments, list) { - if ((vm && vm != entry->bo_va->base.vm) || - (entry->is_mapped != map_type - && map_type != BO_VM_ALL)) - continue; - - ctx->n_vms++; - } - - if (ctx->n_vms != 0) { - ctx->vm_pd = kcalloc(ctx->n_vms, sizeof(*ctx->vm_pd), - GFP_KERNEL); - if (!ctx->vm_pd) - return -ENOMEM; - } - - ctx->kfd_bo.priority = 0; - ctx->kfd_bo.tv.bo = &bo->tbo; - ctx->kfd_bo.tv.num_shared = 1; - list_add(&ctx->kfd_bo.tv.head, &ctx->list); - - i = 0; - list_for_each_entry(entry, &mem->attachments, list) { - if ((vm && vm != entry->bo_va->base.vm) || - (entry->is_mapped != map_type - && map_type != BO_VM_ALL)) - continue; - - amdgpu_vm_get_pd_bo(entry->bo_va->base.vm, &ctx->list, - &ctx->vm_pd[i]); - i++; - } + ret = amdgpu_vm_lock_pd(entry->bo_va->base.vm, + &ctx->exec, 2); + drm_exec_retry_on_contention(&ctx->exec); + if (unlikely(ret)) + goto error; + ++ctx->n_vms; + } - ret = ttm_eu_reserve_buffers(&ctx->ticket, &ctx->list, - false, &ctx->duplicates); - if (ret) { - pr_err("Failed to reserve buffers in ttm.\n"); - kfree(ctx->vm_pd); - ctx->vm_pd = NULL; - return ret; + ret = drm_exec_prepare_obj(&ctx->exec, &bo->tbo.base, 1); + drm_exec_retry_on_contention(&ctx->exec); + if (unlikely(ret)) + goto error; } - - ctx->reserved = true; return 0; + +error: + pr_err("Failed to reserve buffers in ttm.\n"); + drm_exec_fini(&ctx->exec); + return ret; } /** @@ -1224,15 +1224,8 @@ static int unreserve_bo_and_vms(struct bo_vm_reservation_context *ctx, if (wait) ret = amdgpu_sync_wait(ctx->sync, intr); - if (ctx->reserved) - ttm_eu_backoff_reservation(&ctx->ticket, &ctx->list); - kfree(ctx->vm_pd); - + drm_exec_fini(&ctx->exec); ctx->sync = NULL; - - ctx->reserved = false; - ctx->vm_pd = NULL; - return ret; } @@ -1249,8 +1242,6 @@ static void unmap_bo_from_gpuvm(struct kgd_mem *mem, amdgpu_vm_clear_freed(adev, vm, &bo_va->last_pt_update); amdgpu_sync_fence(sync, bo_va->last_pt_update); - - kfd_mem_dmaunmap_attachment(mem, entry); } static int update_gpuvm_pte(struct kgd_mem *mem, @@ -1305,6 +1296,7 @@ static int map_bo_to_gpuvm(struct kgd_mem *mem, update_gpuvm_pte_failed: unmap_bo_from_gpuvm(mem, entry, sync); + kfd_mem_dmaunmap_attachment(mem, entry); return ret; } @@ -1738,6 +1730,8 @@ int amdgpu_amdkfd_gpuvm_alloc_memory_of_gpu( if (flags & KFD_IOC_ALLOC_MEM_FLAGS_COHERENT) alloc_flags |= AMDGPU_GEM_CREATE_COHERENT; + if (flags & KFD_IOC_ALLOC_MEM_FLAGS_EXT_COHERENT) + alloc_flags |= AMDGPU_GEM_CREATE_EXT_COHERENT; if (flags & KFD_IOC_ALLOC_MEM_FLAGS_UNCACHED) alloc_flags |= AMDGPU_GEM_CREATE_UNCACHED; @@ -1816,6 +1810,15 @@ int amdgpu_amdkfd_gpuvm_alloc_memory_of_gpu( } bo->allowed_domains = AMDGPU_GEM_DOMAIN_GTT; bo->preferred_domains = AMDGPU_GEM_DOMAIN_GTT; + } else { + mutex_lock(&avm->process_info->lock); + if (avm->process_info->eviction_fence && + !dma_fence_is_signaled(&avm->process_info->eviction_fence->base)) + ret = amdgpu_amdkfd_bo_validate_and_fence(bo, domain, + &avm->process_info->eviction_fence->base); + mutex_unlock(&avm->process_info->lock); + if (ret) + goto err_validate_bo; } if (offset) @@ -1825,6 +1828,7 @@ int amdgpu_amdkfd_gpuvm_alloc_memory_of_gpu( allocate_init_user_pages_failed: err_pin_bo: +err_validate_bo: remove_kgd_mem_from_kfd_bo_list(*mem, avm->process_info); drm_vma_node_revoke(&gobj->vma_node, drm_priv); err_node_allow: @@ -1855,7 +1859,6 @@ int amdgpu_amdkfd_gpuvm_free_memory_of_gpu( bool use_release_notifier = (mem->bo->kfd_bo == mem); struct kfd_mem_attachment *entry, *tmp; struct bo_vm_reservation_context ctx; - struct ttm_validate_buffer *bo_list_entry; unsigned int mapped_to_gpu_memory; int ret; bool is_imported = false; @@ -1883,9 +1886,8 @@ int amdgpu_amdkfd_gpuvm_free_memory_of_gpu( } /* Make sure restore workers don't access the BO any more */ - bo_list_entry = &mem->validate_list; mutex_lock(&process_info->lock); - list_del(&bo_list_entry->head); + list_del(&mem->validate_list); mutex_unlock(&process_info->lock); /* Cleanup user pages and MMU notifiers */ @@ -1900,18 +1902,16 @@ int amdgpu_amdkfd_gpuvm_free_memory_of_gpu( if (unlikely(ret)) return ret; - /* The eviction fence should be removed by the last unmap. - * TODO: Log an error condition if the bo still has the eviction fence - * attached - */ amdgpu_amdkfd_remove_eviction_fence(mem->bo, process_info->eviction_fence); pr_debug("Release VA 0x%llx - 0x%llx\n", mem->va, mem->va + bo_size * (1 + mem->aql_queue)); /* Remove from VM internal data structures */ - list_for_each_entry_safe(entry, tmp, &mem->attachments, list) + list_for_each_entry_safe(entry, tmp, &mem->attachments, list) { + kfd_mem_dmaunmap_attachment(mem, entry); kfd_mem_detach(entry); + } ret = unreserve_bo_and_vms(&ctx, false, false); @@ -2030,19 +2030,6 @@ int amdgpu_amdkfd_gpuvm_map_memory_to_gpu( if (unlikely(ret)) goto out_unreserve; - if (mem->mapped_to_gpu_memory == 0 && - !amdgpu_ttm_tt_get_usermm(bo->tbo.ttm)) { - /* Validate BO only once. The eviction fence gets added to BO - * the first time it is mapped. Validate will wait for all - * background evictions to complete. - */ - ret = amdgpu_amdkfd_bo_validate(bo, domain, true); - if (ret) { - pr_debug("Validate failed\n"); - goto out_unreserve; - } - } - list_for_each_entry(entry, &mem->attachments, list) { if (entry->bo_va->base.vm != avm || entry->is_mapped) continue; @@ -2069,10 +2056,6 @@ int amdgpu_amdkfd_gpuvm_map_memory_to_gpu( mem->mapped_to_gpu_memory); } - if (!amdgpu_ttm_tt_get_usermm(bo->tbo.ttm) && !bo->tbo.pin_count) - dma_resv_add_fence(bo->tbo.base.resv, - &avm->process_info->eviction_fence->base, - DMA_RESV_USAGE_BOOKKEEP); ret = unreserve_bo_and_vms(&ctx, false, false); goto out; @@ -2085,11 +2068,27 @@ out: return ret; } +void amdgpu_amdkfd_gpuvm_dmaunmap_mem(struct kgd_mem *mem, void *drm_priv) +{ + struct kfd_mem_attachment *entry; + struct amdgpu_vm *vm; + + vm = drm_priv_to_vm(drm_priv); + + mutex_lock(&mem->lock); + + list_for_each_entry(entry, &mem->attachments, list) { + if (entry->bo_va->base.vm == vm) + kfd_mem_dmaunmap_attachment(mem, entry); + } + + mutex_unlock(&mem->lock); +} + int amdgpu_amdkfd_gpuvm_unmap_memory_from_gpu( struct amdgpu_device *adev, struct kgd_mem *mem, void *drm_priv) { struct amdgpu_vm *avm = drm_priv_to_vm(drm_priv); - struct amdkfd_process_info *process_info = avm->process_info; unsigned long bo_size = mem->bo->tbo.base.size; struct kfd_mem_attachment *entry; struct bo_vm_reservation_context ctx; @@ -2130,15 +2129,6 @@ int amdgpu_amdkfd_gpuvm_unmap_memory_from_gpu( mem->mapped_to_gpu_memory); } - /* If BO is unmapped from all VMs, unfence it. It can be evicted if - * required. - */ - if (mem->mapped_to_gpu_memory == 0 && - !amdgpu_ttm_tt_get_usermm(mem->bo->tbo.ttm) && - !mem->bo->tbo.pin_count) - amdgpu_amdkfd_remove_eviction_fence(mem->bo, - process_info->eviction_fence); - unreserve_out: unreserve_bo_and_vms(&ctx, false, false); out: @@ -2366,8 +2356,20 @@ int amdgpu_amdkfd_gpuvm_import_dmabuf(struct amdgpu_device *adev, amdgpu_sync_create(&(*mem)->sync); (*mem)->is_imported = true; + mutex_lock(&avm->process_info->lock); + if (avm->process_info->eviction_fence && + !dma_fence_is_signaled(&avm->process_info->eviction_fence->base)) + ret = amdgpu_amdkfd_bo_validate_and_fence(bo, (*mem)->domain, + &avm->process_info->eviction_fence->base); + mutex_unlock(&avm->process_info->lock); + if (ret) + goto err_remove_mem; + return 0; +err_remove_mem: + remove_kgd_mem_from_kfd_bo_list(*mem, avm->process_info); + drm_vma_node_revoke(&obj->vma_node, drm_priv); err_free_mem: kfree(*mem); err_put_obj: @@ -2452,14 +2454,14 @@ static int update_invalid_user_pages(struct amdkfd_process_info *process_info, /* Move all invalidated BOs to the userptr_inval_list */ list_for_each_entry_safe(mem, tmp_mem, &process_info->userptr_valid_list, - validate_list.head) + validate_list) if (mem->invalid) - list_move_tail(&mem->validate_list.head, + list_move_tail(&mem->validate_list, &process_info->userptr_inval_list); /* Go through userptr_inval_list and update any invalid user_pages */ list_for_each_entry(mem, &process_info->userptr_inval_list, - validate_list.head) { + validate_list) { invalid = mem->invalid; if (!invalid) /* BO hasn't been invalidated since the last @@ -2539,50 +2541,41 @@ unlock_out: */ static int validate_invalid_user_pages(struct amdkfd_process_info *process_info) { - struct amdgpu_bo_list_entry *pd_bo_list_entries; - struct list_head resv_list, duplicates; - struct ww_acquire_ctx ticket; + struct ttm_operation_ctx ctx = { false, false }; struct amdgpu_sync sync; + struct drm_exec exec; struct amdgpu_vm *peer_vm; struct kgd_mem *mem, *tmp_mem; struct amdgpu_bo *bo; - struct ttm_operation_ctx ctx = { false, false }; - int i, ret; - - pd_bo_list_entries = kcalloc(process_info->n_vms, - sizeof(struct amdgpu_bo_list_entry), - GFP_KERNEL); - if (!pd_bo_list_entries) { - pr_err("%s: Failed to allocate PD BO list entries\n", __func__); - ret = -ENOMEM; - goto out_no_mem; - } - - INIT_LIST_HEAD(&resv_list); - INIT_LIST_HEAD(&duplicates); + int ret; - /* Get all the page directory BOs that need to be reserved */ - i = 0; - list_for_each_entry(peer_vm, &process_info->vm_list_head, - vm_list_node) - amdgpu_vm_get_pd_bo(peer_vm, &resv_list, - &pd_bo_list_entries[i++]); - /* Add the userptr_inval_list entries to resv_list */ - list_for_each_entry(mem, &process_info->userptr_inval_list, - validate_list.head) { - list_add_tail(&mem->resv_list.head, &resv_list); - mem->resv_list.bo = mem->validate_list.bo; - mem->resv_list.num_shared = mem->validate_list.num_shared; - } + amdgpu_sync_create(&sync); + drm_exec_init(&exec, 0); /* Reserve all BOs and page tables for validation */ - ret = ttm_eu_reserve_buffers(&ticket, &resv_list, false, &duplicates); - WARN(!list_empty(&duplicates), "Duplicates should be empty"); - if (ret) - goto out_free; + drm_exec_until_all_locked(&exec) { + /* Reserve all the page directories */ + list_for_each_entry(peer_vm, &process_info->vm_list_head, + vm_list_node) { + ret = amdgpu_vm_lock_pd(peer_vm, &exec, 2); + drm_exec_retry_on_contention(&exec); + if (unlikely(ret)) + goto unreserve_out; + } - amdgpu_sync_create(&sync); + /* Reserve the userptr_inval_list entries to resv_list */ + list_for_each_entry(mem, &process_info->userptr_inval_list, + validate_list) { + struct drm_gem_object *gobj; + + gobj = &mem->bo->tbo.base; + ret = drm_exec_prepare_obj(&exec, gobj, 1); + drm_exec_retry_on_contention(&exec); + if (unlikely(ret)) + goto unreserve_out; + } + } ret = process_validate_vms(process_info); if (ret) @@ -2591,7 +2584,7 @@ static int validate_invalid_user_pages(struct amdkfd_process_info *process_info) /* Validate BOs and update GPUVM page tables */ list_for_each_entry_safe(mem, tmp_mem, &process_info->userptr_inval_list, - validate_list.head) { + validate_list) { struct kfd_mem_attachment *attachment; bo = mem->bo; @@ -2633,12 +2626,9 @@ static int validate_invalid_user_pages(struct amdkfd_process_info *process_info) ret = process_update_pds(process_info, &sync); unreserve_out: - ttm_eu_backoff_reservation(&ticket, &resv_list); + drm_exec_fini(&exec); amdgpu_sync_wait(&sync, false); amdgpu_sync_free(&sync); -out_free: - kfree(pd_bo_list_entries); -out_no_mem: return ret; } @@ -2654,7 +2644,7 @@ static int confirm_valid_user_pages_locked(struct amdkfd_process_info *process_i list_for_each_entry_safe(mem, tmp_mem, &process_info->userptr_inval_list, - validate_list.head) { + validate_list) { bool valid; /* keep mem without hmm range at userptr_inval_list */ @@ -2678,7 +2668,7 @@ static int confirm_valid_user_pages_locked(struct amdkfd_process_info *process_i continue; } - list_move_tail(&mem->validate_list.head, + list_move_tail(&mem->validate_list, &process_info->userptr_valid_list); } @@ -2788,50 +2778,44 @@ unlock_out: */ int amdgpu_amdkfd_gpuvm_restore_process_bos(void *info, struct dma_fence **ef) { - struct amdgpu_bo_list_entry *pd_bo_list; struct amdkfd_process_info *process_info = info; struct amdgpu_vm *peer_vm; struct kgd_mem *mem; - struct bo_vm_reservation_context ctx; struct amdgpu_amdkfd_fence *new_fence; - int ret = 0, i; struct list_head duplicate_save; struct amdgpu_sync sync_obj; unsigned long failed_size = 0; unsigned long total_size = 0; + struct drm_exec exec; + int ret; INIT_LIST_HEAD(&duplicate_save); - INIT_LIST_HEAD(&ctx.list); - INIT_LIST_HEAD(&ctx.duplicates); - pd_bo_list = kcalloc(process_info->n_vms, - sizeof(struct amdgpu_bo_list_entry), - GFP_KERNEL); - if (!pd_bo_list) - return -ENOMEM; - - i = 0; mutex_lock(&process_info->lock); - list_for_each_entry(peer_vm, &process_info->vm_list_head, - vm_list_node) - amdgpu_vm_get_pd_bo(peer_vm, &ctx.list, &pd_bo_list[i++]); - - /* Reserve all BOs and page tables/directory. Add all BOs from - * kfd_bo_list to ctx.list - */ - list_for_each_entry(mem, &process_info->kfd_bo_list, - validate_list.head) { - list_add_tail(&mem->resv_list.head, &ctx.list); - mem->resv_list.bo = mem->validate_list.bo; - mem->resv_list.num_shared = mem->validate_list.num_shared; - } + drm_exec_init(&exec, 0); + drm_exec_until_all_locked(&exec) { + list_for_each_entry(peer_vm, &process_info->vm_list_head, + vm_list_node) { + ret = amdgpu_vm_lock_pd(peer_vm, &exec, 2); + drm_exec_retry_on_contention(&exec); + if (unlikely(ret)) + goto ttm_reserve_fail; + } - ret = ttm_eu_reserve_buffers(&ctx.ticket, &ctx.list, - false, &duplicate_save); - if (ret) { - pr_debug("Memory eviction: TTM Reserve Failed. Try again\n"); - goto ttm_reserve_fail; + /* Reserve all BOs and page tables/directory. Add all BOs from + * kfd_bo_list to ctx.list + */ + list_for_each_entry(mem, &process_info->kfd_bo_list, + validate_list) { + struct drm_gem_object *gobj; + + gobj = &mem->bo->tbo.base; + ret = drm_exec_prepare_obj(&exec, gobj, 1); + drm_exec_retry_on_contention(&exec); + if (unlikely(ret)) + goto ttm_reserve_fail; + } } amdgpu_sync_create(&sync_obj); @@ -2849,7 +2833,7 @@ int amdgpu_amdkfd_gpuvm_restore_process_bos(void *info, struct dma_fence **ef) /* Validate BOs and map them to GPUVM (update VM page tables). */ list_for_each_entry(mem, &process_info->kfd_bo_list, - validate_list.head) { + validate_list) { struct amdgpu_bo *bo = mem->bo; uint32_t domain = mem->domain; @@ -2925,8 +2909,7 @@ int amdgpu_amdkfd_gpuvm_restore_process_bos(void *info, struct dma_fence **ef) *ef = dma_fence_get(&new_fence->base); /* Attach new eviction fence to all BOs except pinned ones */ - list_for_each_entry(mem, &process_info->kfd_bo_list, - validate_list.head) { + list_for_each_entry(mem, &process_info->kfd_bo_list, validate_list) { if (mem->bo->tbo.pin_count) continue; @@ -2945,11 +2928,10 @@ int amdgpu_amdkfd_gpuvm_restore_process_bos(void *info, struct dma_fence **ef) } validate_map_fail: - ttm_eu_backoff_reservation(&ctx.ticket, &ctx.list); amdgpu_sync_free(&sync_obj); ttm_reserve_fail: + drm_exec_fini(&exec); mutex_unlock(&process_info->lock); - kfree(pd_bo_list); return ret; } diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_atombios.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_atombios.c index f4e3c133a16c..dce9e7d5e4ec 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_atombios.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_atombios.c @@ -1776,7 +1776,7 @@ static ssize_t amdgpu_atombios_get_vbios_version(struct device *dev, struct amdgpu_device *adev = drm_to_adev(ddev); struct atom_context *ctx = adev->mode_info.atom_context; - return sysfs_emit(buf, "%s\n", ctx->vbios_version); + return sysfs_emit(buf, "%s\n", ctx->vbios_pn); } static DEVICE_ATTR(vbios_version, 0444, amdgpu_atombios_get_vbios_version, diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_atombios.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_atombios.h index b639a80ee3fc..0811474e8fd3 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_atombios.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_atombios.h @@ -89,8 +89,7 @@ struct atom_memory_info { #define MAX_AC_TIMING_ENTRIES 16 -struct atom_memory_clock_range_table -{ +struct atom_memory_clock_range_table { u8 num_entries; u8 rsv[3]; u32 mclk[MAX_AC_TIMING_ENTRIES]; @@ -118,14 +117,12 @@ struct atom_mc_reg_table { #define MAX_VOLTAGE_ENTRIES 32 -struct atom_voltage_table_entry -{ +struct atom_voltage_table_entry { u16 value; u32 smio_low; }; -struct atom_voltage_table -{ +struct atom_voltage_table { u32 count; u32 mask_low; u32 phase_delay; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_atomfirmware.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_atomfirmware.c index 0b7f4c4d58e5..fb2681dd6b33 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_atomfirmware.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_atomfirmware.c @@ -58,7 +58,7 @@ uint32_t amdgpu_atomfirmware_query_firmware_capability(struct amdgpu_device *ade if (amdgpu_atom_parse_data_header(adev->mode_info.atom_context, index, &size, &frev, &crev, &data_offset)) { /* support firmware_info 3.1 + */ - if ((frev == 3 && crev >=1) || (frev > 3)) { + if ((frev == 3 && crev >= 1) || (frev > 3)) { firmware_info = (union firmware_info *) (mode_info->atom_context->bios + data_offset); fw_cap = le32_to_cpu(firmware_info->v31.firmware_capability); @@ -217,6 +217,7 @@ union umc_info { struct atom_umc_info_v3_1 v31; struct atom_umc_info_v3_2 v32; struct atom_umc_info_v3_3 v33; + struct atom_umc_info_v4_0 v40; }; union vram_info { @@ -508,9 +509,8 @@ bool amdgpu_atomfirmware_mem_ecc_supported(struct amdgpu_device *adev) if (amdgpu_atom_parse_data_header(mode_info->atom_context, index, &size, &frev, &crev, &data_offset)) { + umc_info = (union umc_info *)(mode_info->atom_context->bios + data_offset); if (frev == 3) { - umc_info = (union umc_info *) - (mode_info->atom_context->bios + data_offset); switch (crev) { case 1: umc_config = le32_to_cpu(umc_info->v31.umc_config); @@ -533,6 +533,20 @@ bool amdgpu_atomfirmware_mem_ecc_supported(struct amdgpu_device *adev) /* unsupported crev */ return false; } + } else if (frev == 4) { + switch (crev) { + case 0: + umc_config1 = le32_to_cpu(umc_info->v40.umc_config1); + ecc_default_enabled = + (umc_config1 & UMC_CONFIG1__ENABLE_ECC_CAPABLE) ? true : false; + break; + default: + /* unsupported crev */ + return false; + } + } else { + /* unsupported frev */ + return false; } } @@ -597,7 +611,7 @@ bool amdgpu_atomfirmware_ras_rom_addr(struct amdgpu_device *adev, index, &size, &frev, &crev, &data_offset)) { /* support firmware_info 3.4 + */ - if ((frev == 3 && crev >=4) || (frev > 3)) { + if ((frev == 3 && crev >= 4) || (frev > 3)) { firmware_info = (union firmware_info *) (mode_info->atom_context->bios + data_offset); /* The ras_rom_i2c_slave_addr should ideally @@ -850,7 +864,7 @@ int amdgpu_atomfirmware_get_fw_reserved_fb_size(struct amdgpu_device *adev) firmware_info = (union firmware_info *)(ctx->bios + data_offset); - if (frev !=3) + if (frev != 3) return -EINVAL; switch (crev) { @@ -909,7 +923,7 @@ int amdgpu_atomfirmware_asic_init(struct amdgpu_device *adev, bool fb_reset) } index = get_index_into_master_table(atom_master_list_of_command_functions_v2_1, - asic_init); + asic_init); if (amdgpu_atom_parse_cmd_header(mode_info->atom_context, index, &frev, &crev)) { if (frev == 2 && crev >= 1) { memset(&asic_init_ps_v2_1, 0, sizeof(asic_init_ps_v2_1)); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_atpx_handler.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_atpx_handler.c index d6d986be906a..375f02002579 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_atpx_handler.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_atpx_handler.c @@ -74,24 +74,29 @@ struct atpx_mux { u16 mux; } __packed; -bool amdgpu_has_atpx(void) { +bool amdgpu_has_atpx(void) +{ return amdgpu_atpx_priv.atpx_detected; } -bool amdgpu_has_atpx_dgpu_power_cntl(void) { +bool amdgpu_has_atpx_dgpu_power_cntl(void) +{ return amdgpu_atpx_priv.atpx.functions.power_cntl; } -bool amdgpu_is_atpx_hybrid(void) { +bool amdgpu_is_atpx_hybrid(void) +{ return amdgpu_atpx_priv.atpx.is_hybrid; } -bool amdgpu_atpx_dgpu_req_power_for_displays(void) { +bool amdgpu_atpx_dgpu_req_power_for_displays(void) +{ return amdgpu_atpx_priv.atpx.dgpu_req_power_for_displays; } #if defined(CONFIG_ACPI) -void *amdgpu_atpx_get_dhandle(void) { +void *amdgpu_atpx_get_dhandle(void) +{ return amdgpu_atpx_priv.dhandle; } #endif @@ -134,7 +139,7 @@ static union acpi_object *amdgpu_atpx_call(acpi_handle handle, int function, /* Fail only if calling the method fails and ATPX is supported */ if (ACPI_FAILURE(status) && status != AE_NOT_FOUND) { - printk("failed to evaluate ATPX got %s\n", + pr_err("failed to evaluate ATPX got %s\n", acpi_format_exception(status)); kfree(buffer.pointer); return NULL; @@ -190,7 +195,7 @@ static int amdgpu_atpx_validate(struct amdgpu_atpx *atpx) size = *(u16 *) info->buffer.pointer; if (size < 10) { - printk("ATPX buffer is too small: %zu\n", size); + pr_err("ATPX buffer is too small: %zu\n", size); kfree(info); return -EINVAL; } @@ -223,11 +228,11 @@ static int amdgpu_atpx_validate(struct amdgpu_atpx *atpx) atpx->is_hybrid = false; if (valid_bits & ATPX_MS_HYBRID_GFX_SUPPORTED) { if (amdgpu_atpx_priv.quirks & AMDGPU_PX_QUIRK_FORCE_ATPX) { - printk("ATPX Hybrid Graphics, forcing to ATPX\n"); + pr_warn("ATPX Hybrid Graphics, forcing to ATPX\n"); atpx->functions.power_cntl = true; atpx->is_hybrid = false; } else { - printk("ATPX Hybrid Graphics\n"); + pr_notice("ATPX Hybrid Graphics\n"); /* * Disable legacy PM methods only when pcie port PM is usable, * otherwise the device might fail to power off or power on. @@ -269,7 +274,7 @@ static int amdgpu_atpx_verify_interface(struct amdgpu_atpx *atpx) size = *(u16 *) info->buffer.pointer; if (size < 8) { - printk("ATPX buffer is too small: %zu\n", size); + pr_err("ATPX buffer is too small: %zu\n", size); err = -EINVAL; goto out; } @@ -278,8 +283,8 @@ static int amdgpu_atpx_verify_interface(struct amdgpu_atpx *atpx) memcpy(&output, info->buffer.pointer, size); /* TODO: check version? */ - printk("ATPX version %u, functions 0x%08x\n", - output.version, output.function_bits); + pr_notice("ATPX version %u, functions 0x%08x\n", + output.version, output.function_bits); amdgpu_atpx_parse_functions(&atpx->functions, output.function_bits); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_bios.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_bios.c index b582b83c4984..618e469e3622 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_bios.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_bios.c @@ -29,6 +29,7 @@ #include "amdgpu.h" #include "atom.h" +#include <linux/device.h> #include <linux/pci.h> #include <linux/slab.h> #include <linux/acpi.h> @@ -287,7 +288,15 @@ static bool amdgpu_atrm_get_bios(struct amdgpu_device *adev) if (adev->flags & AMD_IS_APU) return false; - while ((pdev = pci_get_class(PCI_CLASS_DISPLAY_VGA << 8, pdev)) != NULL) { + /* ATRM is for on-platform devices only */ + if (dev_is_removable(&adev->pdev->dev)) + return false; + + while ((pdev = pci_get_base_class(PCI_BASE_CLASS_DISPLAY, pdev))) { + if ((pdev->class != PCI_CLASS_DISPLAY_VGA << 8) && + (pdev->class != PCI_CLASS_DISPLAY_OTHER << 8)) + continue; + dhandle = ACPI_HANDLE(&pdev->dev); if (!dhandle) continue; @@ -299,20 +308,6 @@ static bool amdgpu_atrm_get_bios(struct amdgpu_device *adev) } } - if (!found) { - while ((pdev = pci_get_class(PCI_CLASS_DISPLAY_OTHER << 8, pdev)) != NULL) { - dhandle = ACPI_HANDLE(&pdev->dev); - if (!dhandle) - continue; - - status = acpi_get_handle(dhandle, "ATRM", &atrm_handle); - if (ACPI_SUCCESS(status)) { - found = true; - break; - } - } - } - if (!found) return false; pci_dev_put(pdev); @@ -460,7 +455,7 @@ bool amdgpu_get_bios(struct amdgpu_device *adev) return false; success: - adev->is_atom_fw = (adev->asic_type >= CHIP_VEGA10) ? true : false; + adev->is_atom_fw = adev->asic_type >= CHIP_VEGA10; return true; } diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.c index 252a876b0725..702f6610d024 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.c @@ -28,6 +28,7 @@ * Christian König <deathsimple@vodafone.de> */ +#include <linux/sort.h> #include <linux/uaccess.h> #include "amdgpu.h" @@ -50,13 +51,20 @@ static void amdgpu_bo_list_free(struct kref *ref) refcount); struct amdgpu_bo_list_entry *e; - amdgpu_bo_list_for_each_entry(e, list) { - struct amdgpu_bo *bo = ttm_to_amdgpu_bo(e->tv.bo); + amdgpu_bo_list_for_each_entry(e, list) + amdgpu_bo_unref(&e->bo); + call_rcu(&list->rhead, amdgpu_bo_list_free_rcu); +} - amdgpu_bo_unref(&bo); - } +static int amdgpu_bo_list_entry_cmp(const void *_a, const void *_b) +{ + const struct amdgpu_bo_list_entry *a = _a, *b = _b; - call_rcu(&list->rhead, amdgpu_bo_list_free_rcu); + if (a->priority > b->priority) + return 1; + if (a->priority < b->priority) + return -1; + return 0; } int amdgpu_bo_list_create(struct amdgpu_device *adev, struct drm_file *filp, @@ -67,27 +75,17 @@ int amdgpu_bo_list_create(struct amdgpu_device *adev, struct drm_file *filp, struct amdgpu_bo_list_entry *array; struct amdgpu_bo_list *list; uint64_t total_size = 0; - size_t size; unsigned i; int r; - if (num_entries > (SIZE_MAX - sizeof(struct amdgpu_bo_list)) - / sizeof(struct amdgpu_bo_list_entry)) - return -EINVAL; - - size = sizeof(struct amdgpu_bo_list); - size += num_entries * sizeof(struct amdgpu_bo_list_entry); - list = kvmalloc(size, GFP_KERNEL); + list = kvzalloc(struct_size(list, entries, num_entries), GFP_KERNEL); if (!list) return -ENOMEM; kref_init(&list->refcount); - list->gds_obj = NULL; - list->gws_obj = NULL; - list->oa_obj = NULL; - array = amdgpu_bo_list_array_entry(list, 0); - memset(array, 0, num_entries * sizeof(struct amdgpu_bo_list_entry)); + list->num_entries = num_entries; + array = list->entries; for (i = 0; i < num_entries; ++i) { struct amdgpu_bo_list_entry *entry; @@ -118,7 +116,7 @@ int amdgpu_bo_list_create(struct amdgpu_device *adev, struct drm_file *filp, entry->priority = min(info[i].bo_priority, AMDGPU_BO_LIST_MAX_PRIORITY); - entry->tv.bo = &bo->tbo; + entry->bo = bo; if (bo->preferred_domains == AMDGPU_GEM_DOMAIN_GDS) list->gds_obj = bo; @@ -132,7 +130,8 @@ int amdgpu_bo_list_create(struct amdgpu_device *adev, struct drm_file *filp, } list->first_userptr = first_userptr; - list->num_entries = num_entries; + sort(array, last_entry, sizeof(struct amdgpu_bo_list_entry), + amdgpu_bo_list_entry_cmp, NULL); trace_amdgpu_cs_bo_status(list->num_entries, total_size); @@ -141,16 +140,10 @@ int amdgpu_bo_list_create(struct amdgpu_device *adev, struct drm_file *filp, return 0; error_free: - for (i = 0; i < last_entry; ++i) { - struct amdgpu_bo *bo = ttm_to_amdgpu_bo(array[i].tv.bo); - - amdgpu_bo_unref(&bo); - } - for (i = first_userptr; i < num_entries; ++i) { - struct amdgpu_bo *bo = ttm_to_amdgpu_bo(array[i].tv.bo); - - amdgpu_bo_unref(&bo); - } + for (i = 0; i < last_entry; ++i) + amdgpu_bo_unref(&array[i].bo); + for (i = first_userptr; i < num_entries; ++i) + amdgpu_bo_unref(&array[i].bo); kvfree(list); return r; @@ -179,44 +172,10 @@ int amdgpu_bo_list_get(struct amdgpu_fpriv *fpriv, int id, } rcu_read_unlock(); + *result = NULL; return -ENOENT; } -void amdgpu_bo_list_get_list(struct amdgpu_bo_list *list, - struct list_head *validated) -{ - /* This is based on the bucket sort with O(n) time complexity. - * An item with priority "i" is added to bucket[i]. The lists are then - * concatenated in descending order. - */ - struct list_head bucket[AMDGPU_BO_LIST_NUM_BUCKETS]; - struct amdgpu_bo_list_entry *e; - unsigned i; - - for (i = 0; i < AMDGPU_BO_LIST_NUM_BUCKETS; i++) - INIT_LIST_HEAD(&bucket[i]); - - /* Since buffers which appear sooner in the relocation list are - * likely to be used more often than buffers which appear later - * in the list, the sort mustn't change the ordering of buffers - * with the same priority, i.e. it must be stable. - */ - amdgpu_bo_list_for_each_entry(e, list) { - struct amdgpu_bo *bo = ttm_to_amdgpu_bo(e->tv.bo); - unsigned priority = e->priority; - - if (!bo->parent) - list_add_tail(&e->tv.head, &bucket[priority]); - - e->user_pages = NULL; - e->range = NULL; - } - - /* Connect the sorted buckets in the output list. */ - for (i = 0; i < AMDGPU_BO_LIST_NUM_BUCKETS; i++) - list_splice(&bucket[i], validated); -} - void amdgpu_bo_list_put(struct amdgpu_bo_list *list) { kref_put(&list->refcount, amdgpu_bo_list_free); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.h index ededdc01ca28..555cd6d877c3 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.h @@ -23,7 +23,6 @@ #ifndef __AMDGPU_BO_LIST_H__ #define __AMDGPU_BO_LIST_H__ -#include <drm/ttm/ttm_execbuf_util.h> #include <drm/amdgpu_drm.h> struct hmm_range; @@ -36,7 +35,7 @@ struct amdgpu_bo_va; struct amdgpu_fpriv; struct amdgpu_bo_list_entry { - struct ttm_validate_buffer tv; + struct amdgpu_bo *bo; struct amdgpu_bo_va *bo_va; uint32_t priority; struct page **user_pages; @@ -56,12 +55,12 @@ struct amdgpu_bo_list { /* Protect access during command submission. */ struct mutex bo_list_mutex; + + struct amdgpu_bo_list_entry entries[] __counted_by(num_entries); }; int amdgpu_bo_list_get(struct amdgpu_fpriv *fpriv, int id, struct amdgpu_bo_list **result); -void amdgpu_bo_list_get_list(struct amdgpu_bo_list *list, - struct list_head *validated); void amdgpu_bo_list_put(struct amdgpu_bo_list *list); int amdgpu_bo_create_list_entry_array(struct drm_amdgpu_bo_list_in *in, struct drm_amdgpu_bo_list_entry **info_param); @@ -72,22 +71,14 @@ int amdgpu_bo_list_create(struct amdgpu_device *adev, size_t num_entries, struct amdgpu_bo_list **list); -static inline struct amdgpu_bo_list_entry * -amdgpu_bo_list_array_entry(struct amdgpu_bo_list *list, unsigned index) -{ - struct amdgpu_bo_list_entry *array = (void *)&list[1]; - - return &array[index]; -} - #define amdgpu_bo_list_for_each_entry(e, list) \ - for (e = amdgpu_bo_list_array_entry(list, 0); \ - e != amdgpu_bo_list_array_entry(list, (list)->num_entries); \ + for (e = list->entries; \ + e != &list->entries[list->num_entries]; \ ++e) #define amdgpu_bo_list_for_each_userptr_entry(e, list) \ - for (e = amdgpu_bo_list_array_entry(list, (list)->first_userptr); \ - e != amdgpu_bo_list_array_entry(list, (list)->num_entries); \ + for (e = &list->entries[list->first_userptr]; \ + e != &list->entries[list->num_entries]; \ ++e) #endif diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cgs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_cgs.c index 456e385333b6..b8280be6225d 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cgs.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cgs.c @@ -41,13 +41,13 @@ struct amdgpu_cgs_device { ((struct amdgpu_cgs_device *)cgs_device)->adev -static uint32_t amdgpu_cgs_read_register(struct cgs_device *cgs_device, unsigned offset) +static uint32_t amdgpu_cgs_read_register(struct cgs_device *cgs_device, unsigned int offset) { CGS_FUNC_ADEV; return RREG32(offset); } -static void amdgpu_cgs_write_register(struct cgs_device *cgs_device, unsigned offset, +static void amdgpu_cgs_write_register(struct cgs_device *cgs_device, unsigned int offset, uint32_t value) { CGS_FUNC_ADEV; @@ -56,7 +56,7 @@ static void amdgpu_cgs_write_register(struct cgs_device *cgs_device, unsigned of static uint32_t amdgpu_cgs_read_ind_register(struct cgs_device *cgs_device, enum cgs_ind_reg space, - unsigned index) + unsigned int index) { CGS_FUNC_ADEV; switch (space) { @@ -84,7 +84,7 @@ static uint32_t amdgpu_cgs_read_ind_register(struct cgs_device *cgs_device, static void amdgpu_cgs_write_ind_register(struct cgs_device *cgs_device, enum cgs_ind_reg space, - unsigned index, uint32_t value) + unsigned int index, uint32_t value) { CGS_FUNC_ADEV; switch (space) { @@ -163,38 +163,38 @@ static uint16_t amdgpu_get_firmware_version(struct cgs_device *cgs_device, uint16_t fw_version = 0; switch (type) { - case CGS_UCODE_ID_SDMA0: - fw_version = adev->sdma.instance[0].fw_version; - break; - case CGS_UCODE_ID_SDMA1: - fw_version = adev->sdma.instance[1].fw_version; - break; - case CGS_UCODE_ID_CP_CE: - fw_version = adev->gfx.ce_fw_version; - break; - case CGS_UCODE_ID_CP_PFP: - fw_version = adev->gfx.pfp_fw_version; - break; - case CGS_UCODE_ID_CP_ME: - fw_version = adev->gfx.me_fw_version; - break; - case CGS_UCODE_ID_CP_MEC: - fw_version = adev->gfx.mec_fw_version; - break; - case CGS_UCODE_ID_CP_MEC_JT1: - fw_version = adev->gfx.mec_fw_version; - break; - case CGS_UCODE_ID_CP_MEC_JT2: - fw_version = adev->gfx.mec_fw_version; - break; - case CGS_UCODE_ID_RLC_G: - fw_version = adev->gfx.rlc_fw_version; - break; - case CGS_UCODE_ID_STORAGE: - break; - default: - DRM_ERROR("firmware type %d do not have version\n", type); - break; + case CGS_UCODE_ID_SDMA0: + fw_version = adev->sdma.instance[0].fw_version; + break; + case CGS_UCODE_ID_SDMA1: + fw_version = adev->sdma.instance[1].fw_version; + break; + case CGS_UCODE_ID_CP_CE: + fw_version = adev->gfx.ce_fw_version; + break; + case CGS_UCODE_ID_CP_PFP: + fw_version = adev->gfx.pfp_fw_version; + break; + case CGS_UCODE_ID_CP_ME: + fw_version = adev->gfx.me_fw_version; + break; + case CGS_UCODE_ID_CP_MEC: + fw_version = adev->gfx.mec_fw_version; + break; + case CGS_UCODE_ID_CP_MEC_JT1: + fw_version = adev->gfx.mec_fw_version; + break; + case CGS_UCODE_ID_CP_MEC_JT2: + fw_version = adev->gfx.mec_fw_version; + break; + case CGS_UCODE_ID_RLC_G: + fw_version = adev->gfx.rlc_fw_version; + break; + case CGS_UCODE_ID_STORAGE: + break; + default: + DRM_ERROR("firmware type %d do not have version\n", type); + break; } return fw_version; } @@ -205,7 +205,7 @@ static int amdgpu_cgs_get_firmware_info(struct cgs_device *cgs_device, { CGS_FUNC_ADEV; - if ((CGS_UCODE_ID_SMU != type) && (CGS_UCODE_ID_SMU_SK != type)) { + if (type != CGS_UCODE_ID_SMU && type != CGS_UCODE_ID_SMU_SK) { uint64_t gpu_addr; uint32_t data_size; const struct gfx_firmware_header_v1_0 *header; @@ -232,7 +232,7 @@ static int amdgpu_cgs_get_firmware_info(struct cgs_device *cgs_device, info->mc_addr = gpu_addr; info->version = (uint16_t)le32_to_cpu(header->header.ucode_version); - if (CGS_UCODE_ID_CP_MEC == type) + if (type == CGS_UCODE_ID_CP_MEC) info->image_size = le32_to_cpu(header->jt_offset) << 2; info->fw_version = amdgpu_get_firmware_version(cgs_device, type); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_connectors.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_connectors.c index d34037b85cf8..7473a42f7d45 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_connectors.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_connectors.c @@ -264,16 +264,9 @@ struct edid *amdgpu_connector_edid(struct drm_connector *connector) static struct edid * amdgpu_connector_get_hardcoded_edid(struct amdgpu_device *adev) { - struct edid *edid; - if (adev->mode_info.bios_hardcoded_edid) { - edid = kmalloc(adev->mode_info.bios_hardcoded_edid_size, GFP_KERNEL); - if (edid) { - memcpy((unsigned char *)edid, - (unsigned char *)adev->mode_info.bios_hardcoded_edid, - adev->mode_info.bios_hardcoded_edid_size); - return edid; - } + return kmemdup((unsigned char *)adev->mode_info.bios_hardcoded_edid, + adev->mode_info.bios_hardcoded_edid_size, GFP_KERNEL); } return NULL; } diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c index fb78a8f47587..e50be6500030 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c @@ -65,6 +65,8 @@ static int amdgpu_cs_parser_init(struct amdgpu_cs_parser *p, } amdgpu_sync_create(&p->sync); + drm_exec_init(&p->exec, DRM_EXEC_INTERRUPTIBLE_WAIT | + DRM_EXEC_IGNORE_DUPLICATES); return 0; } @@ -125,37 +127,24 @@ static int amdgpu_cs_p1_user_fence(struct amdgpu_cs_parser *p, uint32_t *offset) { struct drm_gem_object *gobj; - struct amdgpu_bo *bo; unsigned long size; - int r; gobj = drm_gem_object_lookup(p->filp, data->handle); if (gobj == NULL) return -EINVAL; - bo = amdgpu_bo_ref(gem_to_amdgpu_bo(gobj)); - p->uf_entry.priority = 0; - p->uf_entry.tv.bo = &bo->tbo; + p->uf_bo = amdgpu_bo_ref(gem_to_amdgpu_bo(gobj)); drm_gem_object_put(gobj); - size = amdgpu_bo_size(bo); - if (size != PAGE_SIZE || (data->offset + 8) > size) { - r = -EINVAL; - goto error_unref; - } + size = amdgpu_bo_size(p->uf_bo); + if (size != PAGE_SIZE || data->offset > (size - 8)) + return -EINVAL; - if (amdgpu_ttm_tt_get_usermm(bo->tbo.ttm)) { - r = -EINVAL; - goto error_unref; - } + if (amdgpu_ttm_tt_get_usermm(p->uf_bo->tbo.ttm)) + return -EINVAL; *offset = data->offset; - return 0; - -error_unref: - amdgpu_bo_unref(&bo); - return r; } static int amdgpu_cs_p1_bo_handles(struct amdgpu_cs_parser *p, @@ -218,7 +207,7 @@ static int amdgpu_cs_pass1(struct amdgpu_cs_parser *p, } for (i = 0; i < p->nchunks; i++) { - struct drm_amdgpu_cs_chunk __user **chunk_ptr = NULL; + struct drm_amdgpu_cs_chunk __user *chunk_ptr = NULL; struct drm_amdgpu_cs_chunk user_chunk; uint32_t __user *cdata; @@ -311,7 +300,7 @@ static int amdgpu_cs_pass1(struct amdgpu_cs_parser *p, goto free_all_kdata; } - if (p->uf_entry.tv.bo) + if (p->uf_bo) p->gang_leader->uf_addr = uf_offset; kvfree(chunk_array); @@ -356,7 +345,7 @@ static int amdgpu_cs_p2_ib(struct amdgpu_cs_parser *p, ib = &job->ibs[job->num_ibs++]; /* MM engine doesn't support user fences */ - if (p->uf_entry.tv.bo && ring->funcs->no_user_fence) + if (p->uf_bo && ring->funcs->no_user_fence) return -EINVAL; if (chunk_ib->ip_type == AMDGPU_HW_IP_GFX && @@ -841,55 +830,18 @@ retry: return r; } -static int amdgpu_cs_list_validate(struct amdgpu_cs_parser *p, - struct list_head *validated) -{ - struct ttm_operation_ctx ctx = { true, false }; - struct amdgpu_bo_list_entry *lobj; - int r; - - list_for_each_entry(lobj, validated, tv.head) { - struct amdgpu_bo *bo = ttm_to_amdgpu_bo(lobj->tv.bo); - struct mm_struct *usermm; - - usermm = amdgpu_ttm_tt_get_usermm(bo->tbo.ttm); - if (usermm && usermm != current->mm) - return -EPERM; - - if (amdgpu_ttm_tt_is_userptr(bo->tbo.ttm) && - lobj->user_invalidated && lobj->user_pages) { - amdgpu_bo_placement_from_domain(bo, - AMDGPU_GEM_DOMAIN_CPU); - r = ttm_bo_validate(&bo->tbo, &bo->placement, &ctx); - if (r) - return r; - - amdgpu_ttm_tt_set_user_pages(bo->tbo.ttm, - lobj->user_pages); - } - - r = amdgpu_cs_bo_validate(p, bo); - if (r) - return r; - - kvfree(lobj->user_pages); - lobj->user_pages = NULL; - } - return 0; -} - static int amdgpu_cs_parser_bos(struct amdgpu_cs_parser *p, union drm_amdgpu_cs *cs) { struct amdgpu_fpriv *fpriv = p->filp->driver_priv; + struct ttm_operation_ctx ctx = { true, false }; struct amdgpu_vm *vm = &fpriv->vm; struct amdgpu_bo_list_entry *e; - struct list_head duplicates; + struct drm_gem_object *obj; + unsigned long index; unsigned int i; int r; - INIT_LIST_HEAD(&p->validated); - /* p->bo_list could already be assigned if AMDGPU_CHUNK_ID_BO_HANDLES is present */ if (cs->in.bo_list_handle) { if (p->bo_list) @@ -909,29 +861,13 @@ static int amdgpu_cs_parser_bos(struct amdgpu_cs_parser *p, mutex_lock(&p->bo_list->bo_list_mutex); - /* One for TTM and one for each CS job */ - amdgpu_bo_list_for_each_entry(e, p->bo_list) - e->tv.num_shared = 1 + p->gang_size; - p->uf_entry.tv.num_shared = 1 + p->gang_size; - - amdgpu_bo_list_get_list(p->bo_list, &p->validated); - - INIT_LIST_HEAD(&duplicates); - amdgpu_vm_get_pd_bo(&fpriv->vm, &p->validated, &p->vm_pd); - - /* Two for VM updates, one for TTM and one for each CS job */ - p->vm_pd.tv.num_shared = 3 + p->gang_size; - - if (p->uf_entry.tv.bo && !ttm_to_amdgpu_bo(p->uf_entry.tv.bo)->parent) - list_add(&p->uf_entry.tv.head, &p->validated); - /* Get userptr backing pages. If pages are updated after registered * in amdgpu_gem_userptr_ioctl(), amdgpu_cs_list_validate() will do * amdgpu_ttm_backend_bind() to flush and invalidate new pages */ amdgpu_bo_list_for_each_userptr_entry(e, p->bo_list) { - struct amdgpu_bo *bo = ttm_to_amdgpu_bo(e->tv.bo); bool userpage_invalidated = false; + struct amdgpu_bo *bo = e->bo; int i; e->user_pages = kvmalloc_array(bo->tbo.ttm->num_pages, @@ -959,18 +895,56 @@ static int amdgpu_cs_parser_bos(struct amdgpu_cs_parser *p, e->user_invalidated = userpage_invalidated; } - r = ttm_eu_reserve_buffers(&p->ticket, &p->validated, true, - &duplicates); - if (unlikely(r != 0)) { - if (r != -ERESTARTSYS) - DRM_ERROR("ttm_eu_reserve_buffers failed.\n"); - goto out_free_user_pages; + drm_exec_until_all_locked(&p->exec) { + r = amdgpu_vm_lock_pd(&fpriv->vm, &p->exec, 1 + p->gang_size); + drm_exec_retry_on_contention(&p->exec); + if (unlikely(r)) + goto out_free_user_pages; + + amdgpu_bo_list_for_each_entry(e, p->bo_list) { + /* One fence for TTM and one for each CS job */ + r = drm_exec_prepare_obj(&p->exec, &e->bo->tbo.base, + 1 + p->gang_size); + drm_exec_retry_on_contention(&p->exec); + if (unlikely(r)) + goto out_free_user_pages; + + e->bo_va = amdgpu_vm_bo_find(vm, e->bo); + } + + if (p->uf_bo) { + r = drm_exec_prepare_obj(&p->exec, &p->uf_bo->tbo.base, + 1 + p->gang_size); + drm_exec_retry_on_contention(&p->exec); + if (unlikely(r)) + goto out_free_user_pages; + } } - amdgpu_bo_list_for_each_entry(e, p->bo_list) { - struct amdgpu_bo *bo = ttm_to_amdgpu_bo(e->tv.bo); + amdgpu_bo_list_for_each_userptr_entry(e, p->bo_list) { + struct mm_struct *usermm; + + usermm = amdgpu_ttm_tt_get_usermm(e->bo->tbo.ttm); + if (usermm && usermm != current->mm) { + r = -EPERM; + goto out_free_user_pages; + } - e->bo_va = amdgpu_vm_bo_find(vm, bo); + if (amdgpu_ttm_tt_is_userptr(e->bo->tbo.ttm) && + e->user_invalidated && e->user_pages) { + amdgpu_bo_placement_from_domain(e->bo, + AMDGPU_GEM_DOMAIN_CPU); + r = ttm_bo_validate(&e->bo->tbo, &e->bo->placement, + &ctx); + if (r) + goto out_free_user_pages; + + amdgpu_ttm_tt_set_user_pages(e->bo->tbo.ttm, + e->user_pages); + } + + kvfree(e->user_pages); + e->user_pages = NULL; } amdgpu_cs_get_threshold_for_moves(p->adev, &p->bytes_moved_threshold, @@ -982,25 +956,21 @@ static int amdgpu_cs_parser_bos(struct amdgpu_cs_parser *p, amdgpu_cs_bo_validate, p); if (r) { DRM_ERROR("amdgpu_vm_validate_pt_bos() failed.\n"); - goto error_validate; + goto out_free_user_pages; } - r = amdgpu_cs_list_validate(p, &duplicates); - if (r) - goto error_validate; - - r = amdgpu_cs_list_validate(p, &p->validated); - if (r) - goto error_validate; - - if (p->uf_entry.tv.bo) { - struct amdgpu_bo *uf = ttm_to_amdgpu_bo(p->uf_entry.tv.bo); + drm_exec_for_each_locked_object(&p->exec, index, obj) { + r = amdgpu_cs_bo_validate(p, gem_to_amdgpu_bo(obj)); + if (unlikely(r)) + goto out_free_user_pages; + } - r = amdgpu_ttm_alloc_gart(&uf->tbo); - if (r) - goto error_validate; + if (p->uf_bo) { + r = amdgpu_ttm_alloc_gart(&p->uf_bo->tbo); + if (unlikely(r)) + goto out_free_user_pages; - p->gang_leader->uf_addr += amdgpu_bo_gpu_offset(uf); + p->gang_leader->uf_addr += amdgpu_bo_gpu_offset(p->uf_bo); } amdgpu_cs_report_moved_bytes(p->adev, p->bytes_moved, @@ -1012,12 +982,9 @@ static int amdgpu_cs_parser_bos(struct amdgpu_cs_parser *p, p->bo_list->oa_obj); return 0; -error_validate: - ttm_eu_backoff_reservation(&p->ticket, &p->validated); - out_free_user_pages: amdgpu_bo_list_for_each_userptr_entry(e, p->bo_list) { - struct amdgpu_bo *bo = ttm_to_amdgpu_bo(e->tv.bo); + struct amdgpu_bo *bo = e->bo; if (!e->user_pages) continue; @@ -1123,7 +1090,6 @@ static int amdgpu_cs_vm_handling(struct amdgpu_cs_parser *p) struct amdgpu_vm *vm = &fpriv->vm; struct amdgpu_bo_list_entry *e; struct amdgpu_bo_va *bo_va; - struct amdgpu_bo *bo; unsigned int i; int r; @@ -1151,12 +1117,12 @@ static int amdgpu_cs_vm_handling(struct amdgpu_cs_parser *p) return r; } + /* FIXME: In theory this loop shouldn't be needed any more when + * amdgpu_vm_handle_moved handles all moved BOs that are reserved + * with p->ticket. But removing it caused test regressions, so I'm + * leaving it here for now. + */ amdgpu_bo_list_for_each_entry(e, p->bo_list) { - /* ignore duplicates */ - bo = ttm_to_amdgpu_bo(e->tv.bo); - if (!bo) - continue; - bo_va = e->bo_va; if (bo_va == NULL) continue; @@ -1170,7 +1136,7 @@ static int amdgpu_cs_vm_handling(struct amdgpu_cs_parser *p) return r; } - r = amdgpu_vm_handle_moved(adev, vm); + r = amdgpu_vm_handle_moved(adev, vm, &p->exec.ticket); if (r) return r; @@ -1191,10 +1157,10 @@ static int amdgpu_cs_vm_handling(struct amdgpu_cs_parser *p) job->vm_pd_addr = amdgpu_gmc_pd_addr(vm->root.bo); } - if (amdgpu_vm_debug) { + if (adev->debug_vm) { /* Invalidate all BOs to test for userspace bugs */ amdgpu_bo_list_for_each_entry(e, p->bo_list) { - struct amdgpu_bo *bo = ttm_to_amdgpu_bo(e->tv.bo); + struct amdgpu_bo *bo = e->bo; /* ignore duplicates */ if (!bo) @@ -1211,8 +1177,9 @@ static int amdgpu_cs_sync_rings(struct amdgpu_cs_parser *p) { struct amdgpu_fpriv *fpriv = p->filp->driver_priv; struct drm_gpu_scheduler *sched; - struct amdgpu_bo_list_entry *e; + struct drm_gem_object *obj; struct dma_fence *fence; + unsigned long index; unsigned int i; int r; @@ -1223,8 +1190,9 @@ static int amdgpu_cs_sync_rings(struct amdgpu_cs_parser *p) return r; } - list_for_each_entry(e, &p->validated, tv.head) { - struct amdgpu_bo *bo = ttm_to_amdgpu_bo(e->tv.bo); + drm_exec_for_each_locked_object(&p->exec, index, obj) { + struct amdgpu_bo *bo = gem_to_amdgpu_bo(obj); + struct dma_resv *resv = bo->tbo.base.resv; enum amdgpu_sync_mode sync_mode; @@ -1288,6 +1256,8 @@ static int amdgpu_cs_submit(struct amdgpu_cs_parser *p, struct amdgpu_fpriv *fpriv = p->filp->driver_priv; struct amdgpu_job *leader = p->gang_leader; struct amdgpu_bo_list_entry *e; + struct drm_gem_object *gobj; + unsigned long index; unsigned int i; uint64_t seq; int r; @@ -1326,9 +1296,8 @@ static int amdgpu_cs_submit(struct amdgpu_cs_parser *p, */ r = 0; amdgpu_bo_list_for_each_userptr_entry(e, p->bo_list) { - struct amdgpu_bo *bo = ttm_to_amdgpu_bo(e->tv.bo); - - r |= !amdgpu_ttm_tt_get_user_pages_done(bo->tbo.ttm, e->range); + r |= !amdgpu_ttm_tt_get_user_pages_done(e->bo->tbo.ttm, + e->range); e->range = NULL; } if (r) { @@ -1338,20 +1307,22 @@ static int amdgpu_cs_submit(struct amdgpu_cs_parser *p, } p->fence = dma_fence_get(&leader->base.s_fence->finished); - list_for_each_entry(e, &p->validated, tv.head) { + drm_exec_for_each_locked_object(&p->exec, index, gobj) { + + ttm_bo_move_to_lru_tail_unlocked(&gem_to_amdgpu_bo(gobj)->tbo); /* Everybody except for the gang leader uses READ */ for (i = 0; i < p->gang_size; ++i) { if (p->jobs[i] == leader) continue; - dma_resv_add_fence(e->tv.bo->base.resv, + dma_resv_add_fence(gobj->resv, &p->jobs[i]->base.s_fence->finished, DMA_RESV_USAGE_READ); } - /* The gang leader is remembered as writer */ - e->tv.num_shared = 0; + /* The gang leader as remembered as writer */ + dma_resv_add_fence(gobj->resv, p->fence, DMA_RESV_USAGE_WRITE); } seq = amdgpu_ctx_add_fence(p->ctx, p->entities[p->gang_leader_idx], @@ -1367,7 +1338,7 @@ static int amdgpu_cs_submit(struct amdgpu_cs_parser *p, cs->out.handle = seq; leader->uf_sequence = seq; - amdgpu_vm_bo_trace_cs(&fpriv->vm, &p->ticket); + amdgpu_vm_bo_trace_cs(&fpriv->vm, &p->exec.ticket); for (i = 0; i < p->gang_size; ++i) { amdgpu_job_free_resources(p->jobs[i]); trace_amdgpu_cs_ioctl(p->jobs[i]); @@ -1376,7 +1347,6 @@ static int amdgpu_cs_submit(struct amdgpu_cs_parser *p, } amdgpu_vm_move_to_lru_tail(p->adev, &fpriv->vm); - ttm_eu_fence_buffer_objects(&p->ticket, &p->validated, p->fence); mutex_unlock(&p->adev->notifier_lock); mutex_unlock(&p->bo_list->bo_list_mutex); @@ -1389,6 +1359,8 @@ static void amdgpu_cs_parser_fini(struct amdgpu_cs_parser *parser) unsigned int i; amdgpu_sync_free(&parser->sync); + drm_exec_fini(&parser->exec); + for (i = 0; i < parser->num_post_deps; i++) { drm_syncobj_put(parser->post_deps[i].syncobj); kfree(parser->post_deps[i].chain); @@ -1409,11 +1381,7 @@ static void amdgpu_cs_parser_fini(struct amdgpu_cs_parser *parser) if (parser->jobs[i]) amdgpu_job_free(parser->jobs[i]); } - if (parser->uf_entry.tv.bo) { - struct amdgpu_bo *uf = ttm_to_amdgpu_bo(parser->uf_entry.tv.bo); - - amdgpu_bo_unref(&uf); - } + amdgpu_bo_unref(&parser->uf_bo); } int amdgpu_cs_ioctl(struct drm_device *dev, void *data, struct drm_file *filp) @@ -1430,8 +1398,7 @@ int amdgpu_cs_ioctl(struct drm_device *dev, void *data, struct drm_file *filp) r = amdgpu_cs_parser_init(&parser, adev, filp, data); if (r) { - if (printk_ratelimit()) - DRM_ERROR("Failed to initialize parser %d!\n", r); + DRM_ERROR_RATELIMITED("Failed to initialize parser %d!\n", r); return r; } @@ -1448,7 +1415,7 @@ int amdgpu_cs_ioctl(struct drm_device *dev, void *data, struct drm_file *filp) if (r == -ENOMEM) DRM_ERROR("Not enough memory for command submission!\n"); else if (r != -ERESTARTSYS && r != -EAGAIN) - DRM_ERROR("Failed to process the buffer list %d!\n", r); + DRM_DEBUG("Failed to process the buffer list %d!\n", r); goto error_fini; } @@ -1474,7 +1441,6 @@ int amdgpu_cs_ioctl(struct drm_device *dev, void *data, struct drm_file *filp) return 0; error_backoff: - ttm_eu_backoff_reservation(&parser.ticket, &parser.validated); mutex_unlock(&parser.bo_list->bo_list_mutex); error_fini: @@ -1809,7 +1775,7 @@ int amdgpu_cs_find_mapping(struct amdgpu_cs_parser *parser, *map = mapping; /* Double check that the BO is reserved by this CS */ - if (dma_resv_locking_ctx((*bo)->tbo.base.resv) != &parser->ticket) + if (dma_resv_locking_ctx((*bo)->tbo.base.resv) != &parser->exec.ticket) return -EINVAL; if (!((*bo)->flags & AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS)) { diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.h index fb3e3d56d427..39c33ad100cb 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.h @@ -24,6 +24,7 @@ #define __AMDGPU_CS_H__ #include <linux/ww_mutex.h> +#include <drm/drm_exec.h> #include "amdgpu_job.h" #include "amdgpu_bo_list.h" @@ -62,11 +63,9 @@ struct amdgpu_cs_parser { struct amdgpu_job *gang_leader; /* buffer objects */ - struct ww_acquire_ctx ticket; + struct drm_exec exec; struct amdgpu_bo_list *bo_list; struct amdgpu_mn *mn; - struct amdgpu_bo_list_entry vm_pd; - struct list_head validated; struct dma_fence *fence; uint64_t bytes_moved_threshold; uint64_t bytes_moved_vis_threshold; @@ -74,7 +73,7 @@ struct amdgpu_cs_parser { uint64_t bytes_moved_vis; /* user fence */ - struct amdgpu_bo_list_entry uf_entry; + struct amdgpu_bo *uf_bo; unsigned num_post_deps; struct amdgpu_cs_post_dep *post_deps; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_csa.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_csa.c index 23d054526e7c..720011019741 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_csa.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_csa.c @@ -22,6 +22,8 @@ * * Author: Monk.liu@amd.com */ +#include <drm/drm_exec.h> + #include "amdgpu.h" uint64_t amdgpu_csa_vaddr(struct amdgpu_device *adev) @@ -65,31 +67,25 @@ int amdgpu_map_static_csa(struct amdgpu_device *adev, struct amdgpu_vm *vm, struct amdgpu_bo *bo, struct amdgpu_bo_va **bo_va, uint64_t csa_addr, uint32_t size) { - struct ww_acquire_ctx ticket; - struct list_head list; - struct amdgpu_bo_list_entry pd; - struct ttm_validate_buffer csa_tv; + struct drm_exec exec; int r; - INIT_LIST_HEAD(&list); - INIT_LIST_HEAD(&csa_tv.head); - csa_tv.bo = &bo->tbo; - csa_tv.num_shared = 1; - - list_add(&csa_tv.head, &list); - amdgpu_vm_get_pd_bo(vm, &list, &pd); - - r = ttm_eu_reserve_buffers(&ticket, &list, true, NULL); - if (r) { - DRM_ERROR("failed to reserve CSA,PD BOs: err=%d\n", r); - return r; + drm_exec_init(&exec, DRM_EXEC_INTERRUPTIBLE_WAIT); + drm_exec_until_all_locked(&exec) { + r = amdgpu_vm_lock_pd(vm, &exec, 0); + if (likely(!r)) + r = drm_exec_lock_obj(&exec, &bo->tbo.base); + drm_exec_retry_on_contention(&exec); + if (unlikely(r)) { + DRM_ERROR("failed to reserve CSA,PD BOs: err=%d\n", r); + goto error; + } } *bo_va = amdgpu_vm_bo_add(adev, vm, bo); if (!*bo_va) { - ttm_eu_backoff_reservation(&ticket, &list); - DRM_ERROR("failed to create bo_va for static CSA\n"); - return -ENOMEM; + r = -ENOMEM; + goto error; } r = amdgpu_vm_bo_map(adev, *bo_va, csa_addr, 0, size, @@ -99,48 +95,42 @@ int amdgpu_map_static_csa(struct amdgpu_device *adev, struct amdgpu_vm *vm, if (r) { DRM_ERROR("failed to do bo_map on static CSA, err=%d\n", r); amdgpu_vm_bo_del(adev, *bo_va); - ttm_eu_backoff_reservation(&ticket, &list); - return r; + goto error; } - ttm_eu_backoff_reservation(&ticket, &list); - return 0; +error: + drm_exec_fini(&exec); + return r; } int amdgpu_unmap_static_csa(struct amdgpu_device *adev, struct amdgpu_vm *vm, struct amdgpu_bo *bo, struct amdgpu_bo_va *bo_va, uint64_t csa_addr) { - struct ww_acquire_ctx ticket; - struct list_head list; - struct amdgpu_bo_list_entry pd; - struct ttm_validate_buffer csa_tv; + struct drm_exec exec; int r; - INIT_LIST_HEAD(&list); - INIT_LIST_HEAD(&csa_tv.head); - csa_tv.bo = &bo->tbo; - csa_tv.num_shared = 1; - - list_add(&csa_tv.head, &list); - amdgpu_vm_get_pd_bo(vm, &list, &pd); - - r = ttm_eu_reserve_buffers(&ticket, &list, true, NULL); - if (r) { - DRM_ERROR("failed to reserve CSA,PD BOs: err=%d\n", r); - return r; + drm_exec_init(&exec, DRM_EXEC_INTERRUPTIBLE_WAIT); + drm_exec_until_all_locked(&exec) { + r = amdgpu_vm_lock_pd(vm, &exec, 0); + if (likely(!r)) + r = drm_exec_lock_obj(&exec, &bo->tbo.base); + drm_exec_retry_on_contention(&exec); + if (unlikely(r)) { + DRM_ERROR("failed to reserve CSA,PD BOs: err=%d\n", r); + goto error; + } } r = amdgpu_vm_bo_unmap(adev, bo_va, csa_addr); if (r) { DRM_ERROR("failed to do bo_unmap on static CSA, err=%d\n", r); - ttm_eu_backoff_reservation(&ticket, &list); - return r; + goto error; } amdgpu_vm_bo_del(adev, bo_va); - ttm_eu_backoff_reservation(&ticket, &list); - - return 0; +error: + drm_exec_fini(&exec); + return r; } diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c index 0dc9c655c4fb..e2ae9ba147ba 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c @@ -42,12 +42,12 @@ const unsigned int amdgpu_ctx_num_entities[AMDGPU_HW_IP_NUM] = { [AMDGPU_HW_IP_VCN_DEC] = 1, [AMDGPU_HW_IP_VCN_ENC] = 1, [AMDGPU_HW_IP_VCN_JPEG] = 1, + [AMDGPU_HW_IP_VPE] = 1, }; bool amdgpu_ctx_priority_is_valid(int32_t ctx_prio) { switch (ctx_prio) { - case AMDGPU_CTX_PRIORITY_UNSET: case AMDGPU_CTX_PRIORITY_VERY_LOW: case AMDGPU_CTX_PRIORITY_LOW: case AMDGPU_CTX_PRIORITY_NORMAL: @@ -55,6 +55,11 @@ bool amdgpu_ctx_priority_is_valid(int32_t ctx_prio) case AMDGPU_CTX_PRIORITY_VERY_HIGH: return true; default: + case AMDGPU_CTX_PRIORITY_UNSET: + /* UNSET priority is not valid and we don't carry that + * around, but set it to NORMAL in the only place this + * function is called, amdgpu_ctx_ioctl(). + */ return false; } } @@ -64,7 +69,8 @@ amdgpu_ctx_to_drm_sched_prio(int32_t ctx_prio) { switch (ctx_prio) { case AMDGPU_CTX_PRIORITY_UNSET: - return DRM_SCHED_PRIORITY_UNSET; + pr_warn_once("AMD-->DRM context priority value UNSET-->NORMAL"); + return DRM_SCHED_PRIORITY_NORMAL; case AMDGPU_CTX_PRIORITY_VERY_LOW: return DRM_SCHED_PRIORITY_MIN; @@ -94,9 +100,6 @@ amdgpu_ctx_to_drm_sched_prio(int32_t ctx_prio) static int amdgpu_ctx_priority_permit(struct drm_file *filp, int32_t priority) { - if (!amdgpu_ctx_priority_is_valid(priority)) - return -EINVAL; - /* NORMAL and below are accessible by everyone */ if (priority <= AMDGPU_CTX_PRIORITY_NORMAL) return 0; @@ -631,8 +634,6 @@ static int amdgpu_ctx_query2(struct amdgpu_device *adev, return 0; } - - static int amdgpu_ctx_stable_pstate(struct amdgpu_device *adev, struct amdgpu_fpriv *fpriv, uint32_t id, bool set, u32 *stable_pstate) @@ -675,8 +676,10 @@ int amdgpu_ctx_ioctl(struct drm_device *dev, void *data, id = args->in.ctx_id; priority = args->in.priority; - /* For backwards compatibility reasons, we need to accept - * ioctls with garbage in the priority field */ + /* For backwards compatibility, we need to accept ioctls with garbage + * in the priority field. Garbage values in the priority field, result + * in the priority being set to NORMAL. + */ if (!amdgpu_ctx_priority_is_valid(priority)) priority = AMDGPU_CTX_PRIORITY_NORMAL; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c index 56e89e76ff17..0e61ebdb3f3e 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c @@ -154,7 +154,7 @@ static int amdgpu_debugfs_process_reg_op(bool read, struct file *f, } else { r = get_user(value, (uint32_t *)buf); if (!r) - amdgpu_mm_wreg_mmio_rlc(adev, *pos >> 2, value); + amdgpu_mm_wreg_mmio_rlc(adev, *pos >> 2, value, 0); } if (r) { result = r; @@ -283,7 +283,7 @@ static ssize_t amdgpu_debugfs_regs2_op(struct file *f, char __user *buf, u32 off } else { r = get_user(value, (uint32_t *)buf); if (!r) - amdgpu_mm_wreg_mmio_rlc(adev, offset >> 2, value); + amdgpu_mm_wreg_mmio_rlc(adev, offset >> 2, value, rd->id.xcc_id); } if (r) { result = r; @@ -375,7 +375,7 @@ static int amdgpu_debugfs_gprwave_open(struct inode *inode, struct file *file) { struct amdgpu_debugfs_gprwave_data *rd; - rd = kzalloc(sizeof *rd, GFP_KERNEL); + rd = kzalloc(sizeof(*rd), GFP_KERNEL); if (!rd) return -ENOMEM; rd->adev = file_inode(file)->i_private; @@ -388,6 +388,7 @@ static int amdgpu_debugfs_gprwave_open(struct inode *inode, struct file *file) static int amdgpu_debugfs_gprwave_release(struct inode *inode, struct file *file) { struct amdgpu_debugfs_gprwave_data *rd = file->private_data; + mutex_destroy(&rd->lock); kfree(file->private_data); return 0; @@ -637,6 +638,9 @@ static ssize_t amdgpu_debugfs_regs_didt_read(struct file *f, char __user *buf, if (size & 0x3 || *pos & 0x3) return -EINVAL; + if (!adev->didt_rreg) + return -EOPNOTSUPP; + r = pm_runtime_get_sync(adev_to_drm(adev)->dev); if (r < 0) { pm_runtime_put_autosuspend(adev_to_drm(adev)->dev); @@ -693,6 +697,9 @@ static ssize_t amdgpu_debugfs_regs_didt_write(struct file *f, const char __user if (size & 0x3 || *pos & 0x3) return -EINVAL; + if (!adev->didt_wreg) + return -EOPNOTSUPP; + r = pm_runtime_get_sync(adev_to_drm(adev)->dev); if (r < 0) { pm_runtime_put_autosuspend(adev_to_drm(adev)->dev); @@ -747,6 +754,9 @@ static ssize_t amdgpu_debugfs_regs_smc_read(struct file *f, char __user *buf, ssize_t result = 0; int r; + if (!adev->smc_rreg) + return -EPERM; + if (size & 0x3 || *pos & 0x3) return -EINVAL; @@ -803,6 +813,9 @@ static ssize_t amdgpu_debugfs_regs_smc_write(struct file *f, const char __user * ssize_t result = 0; int r; + if (!adev->smc_wreg) + return -EPERM; + if (size & 0x3 || *pos & 0x3) return -EINVAL; @@ -2015,8 +2028,8 @@ static ssize_t amdgpu_reset_dump_register_list_read(struct file *f, if (ret) return ret; - for (i = 0; i < adev->num_regs; i++) { - sprintf(reg_offset, "0x%x\n", adev->reset_dump_reg_list[i]); + for (i = 0; i < adev->reset_info.num_regs; i++) { + sprintf(reg_offset, "0x%x\n", adev->reset_info.reset_dump_reg_list[i]); up_read(&adev->reset_domain->sem); if (copy_to_user(buf + len, reg_offset, strlen(reg_offset))) return -EFAULT; @@ -2073,9 +2086,9 @@ static ssize_t amdgpu_reset_dump_register_list_write(struct file *f, if (ret) goto error_free; - swap(adev->reset_dump_reg_list, tmp); - swap(adev->reset_dump_reg_value, new); - adev->num_regs = i; + swap(adev->reset_info.reset_dump_reg_list, tmp); + swap(adev->reset_info.reset_dump_reg_value, new); + adev->reset_info.num_regs = i; up_write(&adev->reset_domain->sem); ret = size; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c index 6238701cde23..8dee52ce26d0 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c @@ -32,8 +32,6 @@ #include <linux/slab.h> #include <linux/iommu.h> #include <linux/pci.h> -#include <linux/devcoredump.h> -#include <generated/utsrelease.h> #include <linux/pci-p2pdma.h> #include <linux/apple-gmux.h> @@ -43,6 +41,7 @@ #include <drm/drm_fb_helper.h> #include <drm/drm_probe_helper.h> #include <drm/amdgpu_drm.h> +#include <linux/device.h> #include <linux/vgaarb.h> #include <linux/vga_switcheroo.h> #include <linux/efi.h> @@ -74,6 +73,7 @@ #include "amdgpu_pmu.h" #include "amdgpu_fru_eeprom.h" #include "amdgpu_reset.h" +#include "amdgpu_virt.h" #include <linux/suspend.h> #include <drm/task_barrier.h> @@ -159,76 +159,79 @@ static ssize_t amdgpu_device_get_pcie_replay_count(struct device *dev, return sysfs_emit(buf, "%llu\n", cnt); } -static DEVICE_ATTR(pcie_replay_count, S_IRUGO, +static DEVICE_ATTR(pcie_replay_count, 0444, amdgpu_device_get_pcie_replay_count, NULL); -static void amdgpu_device_get_pcie_info(struct amdgpu_device *adev); - /** - * DOC: product_name + * DOC: board_info + * + * The amdgpu driver provides a sysfs API for giving board related information. + * It provides the form factor information in the format + * + * type : form factor + * + * Possible form factor values + * + * - "cem" - PCIE CEM card + * - "oam" - Open Compute Accelerator Module + * - "unknown" - Not known * - * The amdgpu driver provides a sysfs API for reporting the product name - * for the device - * The file product_name is used for this and returns the product name - * as returned from the FRU. - * NOTE: This is only available for certain server cards */ -static ssize_t amdgpu_device_get_product_name(struct device *dev, - struct device_attribute *attr, char *buf) +static ssize_t amdgpu_device_get_board_info(struct device *dev, + struct device_attribute *attr, + char *buf) { struct drm_device *ddev = dev_get_drvdata(dev); struct amdgpu_device *adev = drm_to_adev(ddev); + enum amdgpu_pkg_type pkg_type = AMDGPU_PKG_TYPE_CEM; + const char *pkg; - return sysfs_emit(buf, "%s\n", adev->product_name); -} + if (adev->smuio.funcs && adev->smuio.funcs->get_pkg_type) + pkg_type = adev->smuio.funcs->get_pkg_type(adev); -static DEVICE_ATTR(product_name, S_IRUGO, - amdgpu_device_get_product_name, NULL); - -/** - * DOC: product_number - * - * The amdgpu driver provides a sysfs API for reporting the part number - * for the device - * The file product_number is used for this and returns the part number - * as returned from the FRU. - * NOTE: This is only available for certain server cards - */ - -static ssize_t amdgpu_device_get_product_number(struct device *dev, - struct device_attribute *attr, char *buf) -{ - struct drm_device *ddev = dev_get_drvdata(dev); - struct amdgpu_device *adev = drm_to_adev(ddev); + switch (pkg_type) { + case AMDGPU_PKG_TYPE_CEM: + pkg = "cem"; + break; + case AMDGPU_PKG_TYPE_OAM: + pkg = "oam"; + break; + default: + pkg = "unknown"; + break; + } - return sysfs_emit(buf, "%s\n", adev->product_number); + return sysfs_emit(buf, "%s : %s\n", "type", pkg); } -static DEVICE_ATTR(product_number, S_IRUGO, - amdgpu_device_get_product_number, NULL); +static DEVICE_ATTR(board_info, 0444, amdgpu_device_get_board_info, NULL); -/** - * DOC: serial_number - * - * The amdgpu driver provides a sysfs API for reporting the serial number - * for the device - * The file serial_number is used for this and returns the serial number - * as returned from the FRU. - * NOTE: This is only available for certain server cards - */ +static struct attribute *amdgpu_board_attrs[] = { + &dev_attr_board_info.attr, + NULL, +}; -static ssize_t amdgpu_device_get_serial_number(struct device *dev, - struct device_attribute *attr, char *buf) +static umode_t amdgpu_board_attrs_is_visible(struct kobject *kobj, + struct attribute *attr, int n) { + struct device *dev = kobj_to_dev(kobj); struct drm_device *ddev = dev_get_drvdata(dev); struct amdgpu_device *adev = drm_to_adev(ddev); - return sysfs_emit(buf, "%s\n", adev->serial); + if (adev->flags & AMD_IS_APU) + return 0; + + return attr->mode; } -static DEVICE_ATTR(serial_number, S_IRUGO, - amdgpu_device_get_serial_number, NULL); +static const struct attribute_group amdgpu_board_attrs_group = { + .attrs = amdgpu_board_attrs, + .is_visible = amdgpu_board_attrs_is_visible +}; + +static void amdgpu_device_get_pcie_info(struct amdgpu_device *adev); + /** * amdgpu_device_supports_px - Is the device a dGPU with ATPX power control @@ -370,10 +373,16 @@ size_t amdgpu_device_aper_access(struct amdgpu_device *adev, loff_t pos, if (write) { memcpy_toio(addr, buf, count); + /* Make sure HDP write cache flush happens without any reordering + * after the system memory contents are sent over PCIe device + */ mb(); amdgpu_device_flush_hdp(adev, NULL); } else { amdgpu_device_invalidate_hdp(adev, NULL); + /* Make sure HDP read cache is invalidated before issuing a read + * to the PCIe device + */ mb(); memcpy_fromio(buf, addr, count); } @@ -464,7 +473,7 @@ uint32_t amdgpu_device_rreg(struct amdgpu_device *adev, if (!(acc_flags & AMDGPU_REGS_NO_KIQ) && amdgpu_sriov_runtime(adev) && down_read_trylock(&adev->reset_domain->sem)) { - ret = amdgpu_kiq_rreg(adev, reg); + ret = amdgpu_kiq_rreg(adev, reg, 0); up_read(&adev->reset_domain->sem); } else { ret = readl(((void __iomem *)adev->rmmio) + (reg * 4)); @@ -481,8 +490,7 @@ uint32_t amdgpu_device_rreg(struct amdgpu_device *adev, /* * MMIO register read with bytes helper functions * @offset:bytes offset from MMIO start - * -*/ + */ /** * amdgpu_mm_rreg8 - read a memory mapped IO register @@ -502,12 +510,55 @@ uint8_t amdgpu_mm_rreg8(struct amdgpu_device *adev, uint32_t offset) BUG(); } + +/** + * amdgpu_device_xcc_rreg - read a memory mapped IO or indirect register with specific XCC + * + * @adev: amdgpu_device pointer + * @reg: dword aligned register offset + * @acc_flags: access flags which require special behavior + * @xcc_id: xcc accelerated compute core id + * + * Returns the 32 bit value from the offset specified. + */ +uint32_t amdgpu_device_xcc_rreg(struct amdgpu_device *adev, + uint32_t reg, uint32_t acc_flags, + uint32_t xcc_id) +{ + uint32_t ret, rlcg_flag; + + if (amdgpu_device_skip_hw_access(adev)) + return 0; + + if ((reg * 4) < adev->rmmio_size) { + if (amdgpu_sriov_vf(adev) && + !amdgpu_sriov_runtime(adev) && + adev->gfx.rlc.rlcg_reg_access_supported && + amdgpu_virt_get_rlcg_reg_access_flag(adev, acc_flags, + GC_HWIP, false, + &rlcg_flag)) { + ret = amdgpu_virt_rlcg_reg_rw(adev, reg, 0, rlcg_flag, xcc_id); + } else if (!(acc_flags & AMDGPU_REGS_NO_KIQ) && + amdgpu_sriov_runtime(adev) && + down_read_trylock(&adev->reset_domain->sem)) { + ret = amdgpu_kiq_rreg(adev, reg, xcc_id); + up_read(&adev->reset_domain->sem); + } else { + ret = readl(((void __iomem *)adev->rmmio) + (reg * 4)); + } + } else { + ret = adev->pcie_rreg(adev, reg * 4); + } + + return ret; +} + /* * MMIO register write with bytes helper functions * @offset:bytes offset from MMIO start * @value: the value want to be written to the register - * -*/ + */ + /** * amdgpu_mm_wreg8 - read a memory mapped IO register * @@ -549,7 +600,7 @@ void amdgpu_device_wreg(struct amdgpu_device *adev, if (!(acc_flags & AMDGPU_REGS_NO_KIQ) && amdgpu_sriov_runtime(adev) && down_read_trylock(&adev->reset_domain->sem)) { - amdgpu_kiq_wreg(adev, reg, v); + amdgpu_kiq_wreg(adev, reg, v, 0); up_read(&adev->reset_domain->sem); } else { writel(v, ((void __iomem *)adev->rmmio) + (reg * 4)); @@ -567,11 +618,13 @@ void amdgpu_device_wreg(struct amdgpu_device *adev, * @adev: amdgpu_device pointer * @reg: mmio/rlc register * @v: value to write + * @xcc_id: xcc accelerated compute core id * * this function is invoked only for the debugfs register access */ void amdgpu_mm_wreg_mmio_rlc(struct amdgpu_device *adev, - uint32_t reg, uint32_t v) + uint32_t reg, uint32_t v, + uint32_t xcc_id) { if (amdgpu_device_skip_hw_access(adev)) return; @@ -580,7 +633,7 @@ void amdgpu_mm_wreg_mmio_rlc(struct amdgpu_device *adev, adev->gfx.rlc.funcs && adev->gfx.rlc.funcs->is_rlcg_access_range) { if (adev->gfx.rlc.funcs->is_rlcg_access_range(adev, reg)) - return amdgpu_sriov_wreg(adev, reg, v, 0, 0); + return amdgpu_sriov_wreg(adev, reg, v, 0, 0, xcc_id); } else if ((reg * 4) >= adev->rmmio_size) { adev->pcie_wreg(adev, reg * 4, v); } else { @@ -589,90 +642,43 @@ void amdgpu_mm_wreg_mmio_rlc(struct amdgpu_device *adev, } /** - * amdgpu_mm_rdoorbell - read a doorbell dword + * amdgpu_device_xcc_wreg - write to a memory mapped IO or indirect register with specific XCC * * @adev: amdgpu_device pointer - * @index: doorbell index - * - * Returns the value in the doorbell aperture at the - * requested doorbell index (CIK). - */ -u32 amdgpu_mm_rdoorbell(struct amdgpu_device *adev, u32 index) -{ - if (amdgpu_device_skip_hw_access(adev)) - return 0; - - if (index < adev->doorbell.num_kernel_doorbells) { - return readl(adev->doorbell.ptr + index); - } else { - DRM_ERROR("reading beyond doorbell aperture: 0x%08x!\n", index); - return 0; - } -} - -/** - * amdgpu_mm_wdoorbell - write a doorbell dword - * - * @adev: amdgpu_device pointer - * @index: doorbell index - * @v: value to write - * - * Writes @v to the doorbell aperture at the - * requested doorbell index (CIK). - */ -void amdgpu_mm_wdoorbell(struct amdgpu_device *adev, u32 index, u32 v) -{ - if (amdgpu_device_skip_hw_access(adev)) - return; - - if (index < adev->doorbell.num_kernel_doorbells) { - writel(v, adev->doorbell.ptr + index); - } else { - DRM_ERROR("writing beyond doorbell aperture: 0x%08x!\n", index); - } -} - -/** - * amdgpu_mm_rdoorbell64 - read a doorbell Qword - * - * @adev: amdgpu_device pointer - * @index: doorbell index + * @reg: dword aligned register offset + * @v: 32 bit value to write to the register + * @acc_flags: access flags which require special behavior + * @xcc_id: xcc accelerated compute core id * - * Returns the value in the doorbell aperture at the - * requested doorbell index (VEGA10+). + * Writes the value specified to the offset specified. */ -u64 amdgpu_mm_rdoorbell64(struct amdgpu_device *adev, u32 index) +void amdgpu_device_xcc_wreg(struct amdgpu_device *adev, + uint32_t reg, uint32_t v, + uint32_t acc_flags, uint32_t xcc_id) { - if (amdgpu_device_skip_hw_access(adev)) - return 0; - - if (index < adev->doorbell.num_kernel_doorbells) { - return atomic64_read((atomic64_t *)(adev->doorbell.ptr + index)); - } else { - DRM_ERROR("reading beyond doorbell aperture: 0x%08x!\n", index); - return 0; - } -} + uint32_t rlcg_flag; -/** - * amdgpu_mm_wdoorbell64 - write a doorbell Qword - * - * @adev: amdgpu_device pointer - * @index: doorbell index - * @v: value to write - * - * Writes @v to the doorbell aperture at the - * requested doorbell index (VEGA10+). - */ -void amdgpu_mm_wdoorbell64(struct amdgpu_device *adev, u32 index, u64 v) -{ if (amdgpu_device_skip_hw_access(adev)) return; - if (index < adev->doorbell.num_kernel_doorbells) { - atomic64_set((atomic64_t *)(adev->doorbell.ptr + index), v); + if ((reg * 4) < adev->rmmio_size) { + if (amdgpu_sriov_vf(adev) && + !amdgpu_sriov_runtime(adev) && + adev->gfx.rlc.rlcg_reg_access_supported && + amdgpu_virt_get_rlcg_reg_access_flag(adev, acc_flags, + GC_HWIP, true, + &rlcg_flag)) { + amdgpu_virt_rlcg_reg_rw(adev, reg, v, rlcg_flag, xcc_id); + } else if (!(acc_flags & AMDGPU_REGS_NO_KIQ) && + amdgpu_sriov_runtime(adev) && + down_read_trylock(&adev->reset_domain->sem)) { + amdgpu_kiq_wreg(adev, reg, v, xcc_id); + up_read(&adev->reset_domain->sem); + } else { + writel(v, ((void __iomem *)adev->rmmio) + (reg * 4)); + } } else { - DRM_ERROR("writing beyond doorbell aperture: 0x%08x!\n", index); + adev->pcie_wreg(adev, reg * 4, v); } } @@ -718,7 +724,7 @@ u32 amdgpu_device_indirect_rreg_ext(struct amdgpu_device *adev, pcie_index = adev->nbio.funcs->get_pcie_index_offset(adev); pcie_data = adev->nbio.funcs->get_pcie_data_offset(adev); - if (adev->nbio.funcs->get_pcie_index_hi_offset) + if ((reg_addr >> 32) && (adev->nbio.funcs->get_pcie_index_hi_offset)) pcie_index_hi = adev->nbio.funcs->get_pcie_index_hi_offset(adev); else pcie_index_hi = 0; @@ -785,6 +791,56 @@ u64 amdgpu_device_indirect_rreg64(struct amdgpu_device *adev, return r; } +u64 amdgpu_device_indirect_rreg64_ext(struct amdgpu_device *adev, + u64 reg_addr) +{ + unsigned long flags, pcie_index, pcie_data; + unsigned long pcie_index_hi = 0; + void __iomem *pcie_index_offset; + void __iomem *pcie_index_hi_offset; + void __iomem *pcie_data_offset; + u64 r; + + pcie_index = adev->nbio.funcs->get_pcie_index_offset(adev); + pcie_data = adev->nbio.funcs->get_pcie_data_offset(adev); + if ((reg_addr >> 32) && (adev->nbio.funcs->get_pcie_index_hi_offset)) + pcie_index_hi = adev->nbio.funcs->get_pcie_index_hi_offset(adev); + + spin_lock_irqsave(&adev->pcie_idx_lock, flags); + pcie_index_offset = (void __iomem *)adev->rmmio + pcie_index * 4; + pcie_data_offset = (void __iomem *)adev->rmmio + pcie_data * 4; + if (pcie_index_hi != 0) + pcie_index_hi_offset = (void __iomem *)adev->rmmio + + pcie_index_hi * 4; + + /* read low 32 bits */ + writel(reg_addr, pcie_index_offset); + readl(pcie_index_offset); + if (pcie_index_hi != 0) { + writel((reg_addr >> 32) & 0xff, pcie_index_hi_offset); + readl(pcie_index_hi_offset); + } + r = readl(pcie_data_offset); + /* read high 32 bits */ + writel(reg_addr + 4, pcie_index_offset); + readl(pcie_index_offset); + if (pcie_index_hi != 0) { + writel((reg_addr >> 32) & 0xff, pcie_index_hi_offset); + readl(pcie_index_hi_offset); + } + r |= ((u64)readl(pcie_data_offset) << 32); + + /* clear the high bits */ + if (pcie_index_hi != 0) { + writel(0, pcie_index_hi_offset); + readl(pcie_index_hi_offset); + } + + spin_unlock_irqrestore(&adev->pcie_idx_lock, flags); + + return r; +} + /** * amdgpu_device_indirect_wreg - write an indirect register address * @@ -824,7 +880,7 @@ void amdgpu_device_indirect_wreg_ext(struct amdgpu_device *adev, pcie_index = adev->nbio.funcs->get_pcie_index_offset(adev); pcie_data = adev->nbio.funcs->get_pcie_data_offset(adev); - if (adev->nbio.funcs->get_pcie_index_hi_offset) + if ((reg_addr >> 32) && (adev->nbio.funcs->get_pcie_index_hi_offset)) pcie_index_hi = adev->nbio.funcs->get_pcie_index_hi_offset(adev); else pcie_index_hi = 0; @@ -889,6 +945,55 @@ void amdgpu_device_indirect_wreg64(struct amdgpu_device *adev, spin_unlock_irqrestore(&adev->pcie_idx_lock, flags); } +void amdgpu_device_indirect_wreg64_ext(struct amdgpu_device *adev, + u64 reg_addr, u64 reg_data) +{ + unsigned long flags, pcie_index, pcie_data; + unsigned long pcie_index_hi = 0; + void __iomem *pcie_index_offset; + void __iomem *pcie_index_hi_offset; + void __iomem *pcie_data_offset; + + pcie_index = adev->nbio.funcs->get_pcie_index_offset(adev); + pcie_data = adev->nbio.funcs->get_pcie_data_offset(adev); + if ((reg_addr >> 32) && (adev->nbio.funcs->get_pcie_index_hi_offset)) + pcie_index_hi = adev->nbio.funcs->get_pcie_index_hi_offset(adev); + + spin_lock_irqsave(&adev->pcie_idx_lock, flags); + pcie_index_offset = (void __iomem *)adev->rmmio + pcie_index * 4; + pcie_data_offset = (void __iomem *)adev->rmmio + pcie_data * 4; + if (pcie_index_hi != 0) + pcie_index_hi_offset = (void __iomem *)adev->rmmio + + pcie_index_hi * 4; + + /* write low 32 bits */ + writel(reg_addr, pcie_index_offset); + readl(pcie_index_offset); + if (pcie_index_hi != 0) { + writel((reg_addr >> 32) & 0xff, pcie_index_hi_offset); + readl(pcie_index_hi_offset); + } + writel((u32)(reg_data & 0xffffffffULL), pcie_data_offset); + readl(pcie_data_offset); + /* write high 32 bits */ + writel(reg_addr + 4, pcie_index_offset); + readl(pcie_index_offset); + if (pcie_index_hi != 0) { + writel((reg_addr >> 32) & 0xff, pcie_index_hi_offset); + readl(pcie_index_hi_offset); + } + writel((u32)(reg_data >> 32), pcie_data_offset); + readl(pcie_data_offset); + + /* clear the high bits */ + if (pcie_index_hi != 0) { + writel(0, pcie_index_hi_offset); + readl(pcie_index_hi_offset); + } + + spin_unlock_irqrestore(&adev->pcie_idx_lock, flags); +} + /** * amdgpu_device_get_rev_id - query device rev_id * @@ -966,6 +1071,13 @@ static uint64_t amdgpu_invalid_rreg64(struct amdgpu_device *adev, uint32_t reg) return 0; } +static uint64_t amdgpu_invalid_rreg64_ext(struct amdgpu_device *adev, uint64_t reg) +{ + DRM_ERROR("Invalid callback to read register 0x%llX\n", reg); + BUG(); + return 0; +} + /** * amdgpu_invalid_wreg64 - dummy reg write function * @@ -983,6 +1095,13 @@ static void amdgpu_invalid_wreg64(struct amdgpu_device *adev, uint32_t reg, uint BUG(); } +static void amdgpu_invalid_wreg64_ext(struct amdgpu_device *adev, uint64_t reg, uint64_t v) +{ + DRM_ERROR("Invalid callback to write 64 bit register 0x%llX with 0x%08llX\n", + reg, v); + BUG(); +} + /** * amdgpu_block_invalid_rreg - dummy reg read function * @@ -1032,13 +1151,22 @@ static void amdgpu_block_invalid_wreg(struct amdgpu_device *adev, */ static int amdgpu_device_asic_init(struct amdgpu_device *adev) { + int ret; + amdgpu_asic_pre_asic_init(adev); - if (adev->ip_versions[GC_HWIP][0] == IP_VERSION(9, 4, 3) || - adev->ip_versions[GC_HWIP][0] >= IP_VERSION(11, 0, 0)) - return amdgpu_atomfirmware_asic_init(adev, true); - else + if (amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(9, 4, 3) || + amdgpu_ip_version(adev, GC_HWIP, 0) >= IP_VERSION(11, 0, 0)) { + amdgpu_psp_wait_for_bootloader(adev); + ret = amdgpu_atomfirmware_asic_init(adev, true); + /* TODO: check the return val and stop device initialization if boot fails */ + amdgpu_psp_query_boot_status(adev); + return ret; + } else { return amdgpu_atom_asic_init(adev->mode_info.atom_context); + } + + return 0; } /** @@ -1078,7 +1206,7 @@ static void amdgpu_device_mem_scratch_fini(struct amdgpu_device *adev) * @registers: pointer to the register array * @array_size: size of the register array * - * Programs an array or registers with and and or masks. + * Programs an array or registers with and or masks. * This is a helper for setting golden registers. */ void amdgpu_device_program_register_sequence(struct amdgpu_device *adev, @@ -1136,83 +1264,6 @@ int amdgpu_device_pci_reset(struct amdgpu_device *adev) } /* - * GPU doorbell aperture helpers function. - */ -/** - * amdgpu_device_doorbell_init - Init doorbell driver information. - * - * @adev: amdgpu_device pointer - * - * Init doorbell driver information (CIK) - * Returns 0 on success, error on failure. - */ -static int amdgpu_device_doorbell_init(struct amdgpu_device *adev) -{ - - /* No doorbell on SI hardware generation */ - if (adev->asic_type < CHIP_BONAIRE) { - adev->doorbell.base = 0; - adev->doorbell.size = 0; - adev->doorbell.num_kernel_doorbells = 0; - adev->doorbell.ptr = NULL; - return 0; - } - - if (pci_resource_flags(adev->pdev, 2) & IORESOURCE_UNSET) - return -EINVAL; - - amdgpu_asic_init_doorbell_index(adev); - - /* doorbell bar mapping */ - adev->doorbell.base = pci_resource_start(adev->pdev, 2); - adev->doorbell.size = pci_resource_len(adev->pdev, 2); - - if (adev->enable_mes) { - adev->doorbell.num_kernel_doorbells = - adev->doorbell.size / sizeof(u32); - } else { - adev->doorbell.num_kernel_doorbells = - min_t(u32, adev->doorbell.size / sizeof(u32), - adev->doorbell_index.max_assignment+1); - if (adev->doorbell.num_kernel_doorbells == 0) - return -EINVAL; - - /* For Vega, reserve and map two pages on doorbell BAR since SDMA - * paging queue doorbell use the second page. The - * AMDGPU_DOORBELL64_MAX_ASSIGNMENT definition assumes all the - * doorbells are in the first page. So with paging queue enabled, - * the max num_kernel_doorbells should + 1 page (0x400 in dword) - */ - if (adev->ip_versions[SDMA0_HWIP][0] >= IP_VERSION(4, 0, 0) && - adev->ip_versions[SDMA0_HWIP][0] < IP_VERSION(4, 2, 0)) - adev->doorbell.num_kernel_doorbells += 0x400; - } - - adev->doorbell.ptr = ioremap(adev->doorbell.base, - adev->doorbell.num_kernel_doorbells * - sizeof(u32)); - if (adev->doorbell.ptr == NULL) - return -ENOMEM; - - return 0; -} - -/** - * amdgpu_device_doorbell_fini - Tear down doorbell driver information. - * - * @adev: amdgpu_device pointer - * - * Tear down doorbell driver information (CIK) - */ -static void amdgpu_device_doorbell_fini(struct amdgpu_device *adev) -{ - iounmap(adev->doorbell.ptr); - adev->doorbell.ptr = NULL; -} - - - -/* * amdgpu_device_wb_*() * Writeback is the method by which the GPU updates special pages in memory * with the status of certain GPU events (fences, ring pointers,etc.). @@ -1321,10 +1372,13 @@ int amdgpu_device_resize_fb_bar(struct amdgpu_device *adev) int rbar_size = pci_rebar_bytes_to_size(adev->gmc.real_vram_size); struct pci_bus *root; struct resource *res; - unsigned i; + unsigned int i; u16 cmd; int r; + if (!IS_ENABLED(CONFIG_PHYS_ADDR_T_64BIT)) + return 0; + /* Bypass for VF */ if (amdgpu_sriov_vf(adev)) return 0; @@ -1359,7 +1413,7 @@ int amdgpu_device_resize_fb_bar(struct amdgpu_device *adev) cmd & ~PCI_COMMAND_MEMORY); /* Free the VRAM and doorbell BAR, we most likely need to move both. */ - amdgpu_device_doorbell_fini(adev); + amdgpu_doorbell_fini(adev); if (adev->asic_type >= CHIP_BONAIRE) pci_release_resource(adev->pdev, 2); @@ -1376,7 +1430,7 @@ int amdgpu_device_resize_fb_bar(struct amdgpu_device *adev) /* When the doorbell or fb BAR isn't available we have no chance of * using the device. */ - r = amdgpu_device_doorbell_init(adev); + r = amdgpu_doorbell_init(adev); if (r || (pci_resource_flags(adev->pdev, 0) & IORESOURCE_UNSET)) return -ENODEV; @@ -1387,9 +1441,8 @@ int amdgpu_device_resize_fb_bar(struct amdgpu_device *adev) static bool amdgpu_device_read_bios(struct amdgpu_device *adev) { - if (hweight32(adev->aid_mask) && (adev->flags & AMD_IS_APU)) { + if (hweight32(adev->aid_mask) && (adev->flags & AMD_IS_APU)) return false; - } return true; } @@ -1425,6 +1478,7 @@ bool amdgpu_device_need_post(struct amdgpu_device *adev) if (adev->asic_type == CHIP_FIJI) { int err; uint32_t fw_ver; + err = request_firmware(&adev->pm.fw, "amdgpu/fiji_smc.bin", adev->dev); /* force vPost if error occured */ if (err) @@ -1459,40 +1513,45 @@ bool amdgpu_device_need_post(struct amdgpu_device *adev) } /* - * On APUs with >= 64GB white flickering has been observed w/ SG enabled. - * Disable S/G on such systems until we have a proper fix. - * https://gitlab.freedesktop.org/drm/amd/-/issues/2354 - * https://gitlab.freedesktop.org/drm/amd/-/issues/2735 + * Check whether seamless boot is supported. + * + * So far we only support seamless boot on DCE 3.0 or later. + * If users report that it works on older ASICS as well, we may + * loosen this. */ -bool amdgpu_sg_display_supported(struct amdgpu_device *adev) +bool amdgpu_device_seamless_boot_supported(struct amdgpu_device *adev) { - switch (amdgpu_sg_display) { + switch (amdgpu_seamless) { case -1: break; - case 0: - return false; case 1: return true; + case 0: + return false; default: + DRM_ERROR("Invalid value for amdgpu.seamless: %d\n", + amdgpu_seamless); return false; } - if ((totalram_pages() << (PAGE_SHIFT - 10)) + - (adev->gmc.real_vram_size / 1024) >= 64000000) { - DRM_WARN("Disabling S/G due to >=64GB RAM\n"); + + if (!(adev->flags & AMD_IS_APU)) return false; - } - return true; + + if (adev->mman.keep_stolen_vga_memory) + return false; + + return adev->ip_versions[DCE_HWIP][0] >= IP_VERSION(3, 0, 0); } /* - * Intel hosts such as Raptor Lake and Sapphire Rapids don't support dynamic - * speed switching. Until we have confirmation from Intel that a specific host - * supports it, it's safer that we keep it disabled for all. + * Intel hosts such as Rocket Lake, Alder Lake, Raptor Lake and Sapphire Rapids + * don't support dynamic speed switching. Until we have confirmation from Intel + * that a specific host supports it, it's safer that we keep it disabled for all. * * https://edc.intel.com/content/www/us/en/design/products/platforms/details/raptor-lake-s/13th-generation-core-processors-datasheet-volume-1-of-2/005/pci-express-support/ * https://gitlab.freedesktop.org/drm/amd/-/issues/2663 */ -bool amdgpu_device_pcie_dynamic_switching_supported(void) +static bool amdgpu_device_pcie_dynamic_switching_supported(void) { #if IS_ENABLED(CONFIG_X86) struct cpuinfo_x86 *c = &cpu_data(0); @@ -1525,20 +1584,13 @@ bool amdgpu_device_should_use_aspm(struct amdgpu_device *adev) default: return false; } + if (adev->flags & AMD_IS_APU) + return false; + if (!(adev->pm.pp_feature & PP_PCIE_DPM_MASK)) + return false; return pcie_aspm_enabled(adev->pdev); } -bool amdgpu_device_aspm_support_quirk(void) -{ -#if IS_ENABLED(CONFIG_X86) - struct cpuinfo_x86 *c = &cpu_data(0); - - return !(c->x86 == 6 && c->x86_model == INTEL_FAM6_ALDERLAKE); -#else - return true; -#endif -} - /* if we get transitioned to only one device, take VGA back */ /** * amdgpu_device_vga_set_decode - enable/disable vga decode @@ -1553,6 +1605,7 @@ static unsigned int amdgpu_device_vga_set_decode(struct pci_dev *pdev, bool state) { struct amdgpu_device *adev = drm_to_adev(pci_get_drvdata(pdev)); + amdgpu_asic_set_vga_state(adev, state); if (state) return VGA_RSRC_LEGACY_IO | VGA_RSRC_LEGACY_MEM | @@ -1575,7 +1628,8 @@ static void amdgpu_device_check_block_size(struct amdgpu_device *adev) { /* defines number of bits in page table versus page directory, * a page is 4KB so we have 12 bits offset, minimum 9 bits in the - * page table and the remaining bits are in the page directory */ + * page table and the remaining bits are in the page directory + */ if (amdgpu_vm_block_size == -1) return; @@ -1785,6 +1839,7 @@ static void amdgpu_switcheroo_set_state(struct pci_dev *pdev, } else { pr_info("switched off\n"); dev->switch_power_state = DRM_SWITCH_POWER_CHANGING; + amdgpu_device_prepare(dev); amdgpu_device_suspend(dev, true); amdgpu_device_cache_pci_state(pdev); /* Shut down the device */ @@ -1807,7 +1862,7 @@ static bool amdgpu_switcheroo_can_switch(struct pci_dev *pdev) { struct drm_device *dev = pci_get_drvdata(pdev); - /* + /* * FIXME: open_count is protected by drm_global_mutex but that would lead to * locking inversion with the driver load path. And the access here is * completely racy anyway. So don't bother with locking for now. @@ -2256,7 +2311,6 @@ out: */ static int amdgpu_device_ip_early_init(struct amdgpu_device *adev) { - struct drm_device *dev = adev_to_drm(adev); struct pci_dev *parent; int i, r; bool total; @@ -2327,11 +2381,11 @@ static int amdgpu_device_ip_early_init(struct amdgpu_device *adev) (amdgpu_is_atpx_hybrid() || amdgpu_has_atpx_dgpu_power_cntl()) && ((adev->flags & AMD_IS_APU) == 0) && - !pci_is_thunderbolt_attached(to_pci_dev(dev->dev))) + !dev_is_removable(&adev->pdev->dev)) adev->flags |= AMD_IS_PX; if (!(adev->flags & AMD_IS_APU)) { - parent = pci_upstream_bridge(adev->pdev); + parent = pcie_find_root_port(adev->pdev); adev->has_pr3 = parent ? pci_pr3_present(parent) : false; } @@ -2341,6 +2395,8 @@ static int amdgpu_device_ip_early_init(struct amdgpu_device *adev) adev->pm.pp_feature &= ~PP_GFXOFF_MASK; if (amdgpu_sriov_vf(adev) && adev->asic_type == CHIP_SIENNA_CICHLID) adev->pm.pp_feature &= ~PP_OVERDRIVE_MASK; + if (!amdgpu_device_pcie_dynamic_switching_supported()) + adev->pm.pp_feature &= ~PP_PCIE_DPM_MASK; total = true; for (i = 0; i < adev->num_ip_blocks; i++) { @@ -2518,6 +2574,7 @@ static int amdgpu_device_init_schedulers(struct amdgpu_device *adev) } r = drm_sched_init(&ring->sched, &amdgpu_sched_ops, + DRM_SCHED_PRIORITY_COUNT, ring->num_hw_submission, 0, timeout, adev->reset_domain->wq, ring->sched_score, ring->name, @@ -2527,6 +2584,18 @@ static int amdgpu_device_init_schedulers(struct amdgpu_device *adev) ring->name); return r; } + r = amdgpu_uvd_entity_init(adev, ring); + if (r) { + DRM_ERROR("Failed to create UVD scheduling entity on ring %s.\n", + ring->name); + return r; + } + r = amdgpu_vce_entity_init(adev, ring); + if (r) { + DRM_ERROR("Failed to create VCE scheduling entity on ring %s.\n", + ring->name); + return r; + } } amdgpu_xcp_update_partition_sched_list(adev); @@ -2687,6 +2756,9 @@ static int amdgpu_device_ip_init(struct amdgpu_device *adev) if (r) goto init_failed; + if (adev->mman.buffer_funcs_ring->sched.ready) + amdgpu_ttm_set_buffer_funcs_status(adev, true); + /* Don't init kfd if whole hive need to be reset during init */ if (!adev->gmc.xgmi.pending_reset) { kgd2kfd_init_zone_device(adev); @@ -2969,7 +3041,7 @@ static void amdgpu_device_smu_fini_early(struct amdgpu_device *adev) { int i, r; - if (adev->ip_versions[GC_HWIP][0] > IP_VERSION(9, 0, 0)) + if (amdgpu_ip_version(adev, GC_HWIP, 0) > IP_VERSION(9, 0, 0)) return; for (i = 0; i < adev->num_ip_blocks; i++) { @@ -3222,8 +3294,10 @@ static int amdgpu_device_ip_suspend_phase2(struct amdgpu_device *adev) /* SDMA 5.x+ is part of GFX power domain so it's covered by GFXOFF */ if (adev->in_s0ix && - (adev->ip_versions[SDMA0_HWIP][0] >= IP_VERSION(5, 0, 0)) && - (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_SDMA)) + (amdgpu_ip_version(adev, SDMA0_HWIP, 0) >= + IP_VERSION(5, 0, 0)) && + (adev->ip_blocks[i].version->type == + AMD_IP_BLOCK_TYPE_SDMA)) continue; /* Once swPSP provides the IMU, RLC FW binaries to TOS during cold-boot. @@ -3282,6 +3356,8 @@ int amdgpu_device_ip_suspend(struct amdgpu_device *adev) amdgpu_virt_request_full_gpu(adev, false); } + amdgpu_ttm_set_buffer_funcs_status(adev, false); + r = amdgpu_device_ip_suspend_phase1(adev); if (r) return r; @@ -3452,7 +3528,7 @@ static int amdgpu_device_ip_resume_phase2(struct amdgpu_device *adev) * * Main resume function for hardware IPs. The hardware IPs * are split into two resume functions because they are - * are also used in in recovering from a GPU reset and some additional + * also used in recovering from a GPU reset and some additional * steps need to be take between them. In this case (S3/S4) they are * run sequentially. * Returns 0 on success, negative error code on failure. @@ -3461,12 +3537,6 @@ static int amdgpu_device_ip_resume(struct amdgpu_device *adev) { int r; - if (!adev->in_s0ix) { - r = amdgpu_amdkfd_resume_iommu(adev); - if (r) - return r; - } - r = amdgpu_device_ip_resume_phase1(adev); if (r) return r; @@ -3477,6 +3547,9 @@ static int amdgpu_device_ip_resume(struct amdgpu_device *adev) r = amdgpu_device_ip_resume_phase2(adev); + if (adev->mman.buffer_funcs_ring->sched.ready) + amdgpu_ttm_set_buffer_funcs_status(adev, true); + return r; } @@ -3554,8 +3627,7 @@ bool amdgpu_device_asic_has_dc_support(enum amd_asic_type asic_type) #else default: if (amdgpu_dc > 0) - DRM_INFO_ONCE("Display Core has been requested via kernel parameter " - "but isn't supported by ASIC, ignoring\n"); + DRM_INFO_ONCE("Display Core has been requested via kernel parameter but isn't supported by ASIC, ignoring\n"); return false; #endif } @@ -3607,9 +3679,7 @@ static void amdgpu_device_xgmi_reset_func(struct work_struct *__work) if (adev->asic_reset_res) goto fail; - if (adev->mmhub.ras && adev->mmhub.ras->ras_block.hw_ops && - adev->mmhub.ras->ras_block.hw_ops->reset_ras_error_count) - adev->mmhub.ras->ras_block.hw_ops->reset_ras_error_count(adev); + amdgpu_ras_reset_error_count(adev, AMDGPU_RAS_BLOCK__MMHUB); } else { task_barrier_full(&hive->tb); @@ -3711,9 +3781,6 @@ static void amdgpu_device_check_iommu_direct_map(struct amdgpu_device *adev) } static const struct attribute *amdgpu_dev_attributes[] = { - &dev_attr_product_name.attr, - &dev_attr_product_number.attr, - &dev_attr_serial_number.attr, &dev_attr_pcie_replay_count.attr, NULL }; @@ -3724,10 +3791,6 @@ static void amdgpu_device_set_mcbp(struct amdgpu_device *adev) adev->gfx.mcbp = true; else if (amdgpu_mcbp == 0) adev->gfx.mcbp = false; - else if ((adev->ip_versions[GC_HWIP][0] >= IP_VERSION(9, 0, 0)) && - (adev->ip_versions[GC_HWIP][0] < IP_VERSION(10, 0, 0)) && - adev->gfx.num_gfx_rings) - adev->gfx.mcbp = true; if (amdgpu_sriov_vf(adev)) adev->gfx.mcbp = true; @@ -3790,6 +3853,8 @@ int amdgpu_device_init(struct amdgpu_device *adev, adev->pciep_wreg = &amdgpu_invalid_wreg; adev->pcie_rreg64 = &amdgpu_invalid_rreg64; adev->pcie_wreg64 = &amdgpu_invalid_wreg64; + adev->pcie_rreg64_ext = &amdgpu_invalid_rreg64_ext; + adev->pcie_wreg64_ext = &amdgpu_invalid_wreg64_ext; adev->uvd_ctx_rreg = &amdgpu_invalid_rreg; adev->uvd_ctx_wreg = &amdgpu_invalid_wreg; adev->didt_rreg = &amdgpu_invalid_rreg; @@ -3804,7 +3869,8 @@ int amdgpu_device_init(struct amdgpu_device *adev, pdev->subsystem_vendor, pdev->subsystem_device, pdev->revision); /* mutex initialization are all done here so we - * can recall function without having locking issues */ + * can recall function without having locking issues + */ mutex_init(&adev->firmware.mutex); mutex_init(&adev->pm.mutex); mutex_init(&adev->gfx.gpu_clock_mutex); @@ -3844,6 +3910,8 @@ int amdgpu_device_init(struct amdgpu_device *adev, INIT_LIST_HEAD(&adev->ras_list); + INIT_LIST_HEAD(&adev->pm.od_kobj_list); + INIT_DELAYED_WORK(&adev->delayed_init_work, amdgpu_device_delayed_init_work_handler); INIT_DELAYED_WORK(&adev->gfx.gfx_off_delay_work, @@ -3881,11 +3949,11 @@ int amdgpu_device_init(struct amdgpu_device *adev, atomic_set(&adev->pm.pwr_state[i], POWER_STATE_UNKNOWN); adev->rmmio = ioremap(adev->rmmio_base, adev->rmmio_size); - if (adev->rmmio == NULL) { + if (!adev->rmmio) return -ENOMEM; - } + DRM_INFO("register mmio base: 0x%08X\n", (uint32_t)adev->rmmio_base); - DRM_INFO("register mmio size: %u\n", (unsigned)adev->rmmio_size); + DRM_INFO("register mmio size: %u\n", (unsigned int)adev->rmmio_size); /* * Reset domain needs to be present early, before XGMI hive discovered @@ -3940,7 +4008,8 @@ int amdgpu_device_init(struct amdgpu_device *adev, * internal path natively support atomics, set have_atomics_support to true. */ } else if ((adev->flags & AMD_IS_APU) && - (adev->ip_versions[GC_HWIP][0] > IP_VERSION(9, 0, 0))) { + (amdgpu_ip_version(adev, GC_HWIP, 0) > + IP_VERSION(9, 0, 0))) { adev->have_atomics_support = true; } else { adev->have_atomics_support = @@ -3953,7 +4022,7 @@ int amdgpu_device_init(struct amdgpu_device *adev, dev_info(adev->dev, "PCIE atomic ops is not supported\n"); /* doorbell bar mapping and doorbell index init*/ - amdgpu_device_doorbell_init(adev); + amdgpu_doorbell_init(adev); if (amdgpu_emu_mode == 1) { /* post the asic on emulation mode */ @@ -3988,13 +4057,23 @@ int amdgpu_device_init(struct amdgpu_device *adev, } } } else { - tmp = amdgpu_reset_method; - /* It should do a default reset when loading or reloading the driver, - * regardless of the module parameter reset_method. - */ - amdgpu_reset_method = AMD_RESET_METHOD_NONE; - r = amdgpu_asic_reset(adev); - amdgpu_reset_method = tmp; + switch (amdgpu_ip_version(adev, MP1_HWIP, 0)) { + case IP_VERSION(13, 0, 0): + case IP_VERSION(13, 0, 7): + case IP_VERSION(13, 0, 10): + r = psp_gpu_reset(adev); + break; + default: + tmp = amdgpu_reset_method; + /* It should do a default reset when loading or reloading the driver, + * regardless of the module parameter reset_method. + */ + amdgpu_reset_method = AMD_RESET_METHOD_NONE; + r = amdgpu_asic_reset(adev); + amdgpu_reset_method = tmp; + break; + } + if (r) { dev_err(adev->dev, "asic reset on init failed\n"); goto failed; @@ -4080,30 +4159,6 @@ fence_driver_init: /* Get a log2 for easy divisions. */ adev->mm_stats.log2_max_MBps = ilog2(max(1u, max_MBps)); - r = amdgpu_atombios_sysfs_init(adev); - if (r) - drm_err(&adev->ddev, - "registering atombios sysfs failed (%d).\n", r); - - r = amdgpu_pm_sysfs_init(adev); - if (r) - DRM_ERROR("registering pm sysfs failed (%d).\n", r); - - r = amdgpu_ucode_sysfs_init(adev); - if (r) { - adev->ucode_sysfs_en = false; - DRM_ERROR("Creating firmware sysfs failed (%d).\n", r); - } else - adev->ucode_sysfs_en = true; - - r = amdgpu_psp_sysfs_init(adev); - if (r) { - adev->psp_sysfs_en = false; - if (!amdgpu_sriov_vf(adev)) - DRM_ERROR("Creating psp sysfs failed\n"); - } else - adev->psp_sysfs_en = true; - /* * Register gpu instance before amdgpu_device_enable_mgpu_fan_boost. * Otherwise the mgpu fan boost feature will be skipped due to the @@ -4132,10 +4187,38 @@ fence_driver_init: flush_delayed_work(&adev->delayed_init_work); } + /* + * Place those sysfs registering after `late_init`. As some of those + * operations performed in `late_init` might affect the sysfs + * interfaces creating. + */ + r = amdgpu_atombios_sysfs_init(adev); + if (r) + drm_err(&adev->ddev, + "registering atombios sysfs failed (%d).\n", r); + + r = amdgpu_pm_sysfs_init(adev); + if (r) + DRM_ERROR("registering pm sysfs failed (%d).\n", r); + + r = amdgpu_ucode_sysfs_init(adev); + if (r) { + adev->ucode_sysfs_en = false; + DRM_ERROR("Creating firmware sysfs failed (%d).\n", r); + } else + adev->ucode_sysfs_en = true; + r = sysfs_create_files(&adev->dev->kobj, amdgpu_dev_attributes); if (r) dev_err(adev->dev, "Could not create amdgpu device attr\n"); + r = devm_device_add_group(adev->dev, &amdgpu_board_attrs_group); + if (r) + dev_err(adev->dev, + "Could not create amdgpu board attributes\n"); + + amdgpu_fru_sysfs_init(adev); + if (IS_ENABLED(CONFIG_PERF_EVENTS)) r = amdgpu_pmu_init(adev); if (r) @@ -4147,13 +4230,14 @@ fence_driver_init: /* if we have > 1 VGA cards, then disable the amdgpu VGA resources */ /* this will fail for cards that aren't VGA class devices, just - * ignore it */ + * ignore it + */ if ((adev->pdev->class >> 8) == PCI_CLASS_DISPLAY_VGA) vga_client_register(adev->pdev, amdgpu_device_vga_set_decode); px = amdgpu_device_supports_px(ddev); - if (px || (!pci_is_thunderbolt_attached(adev->pdev) && + if (px || (!dev_is_removable(&adev->pdev->dev) && apple_gmux_detect(NULL, NULL))) vga_switcheroo_register_client(adev->pdev, &amdgpu_switcheroo_ops, px); @@ -4199,7 +4283,7 @@ static void amdgpu_device_unmap_mmio(struct amdgpu_device *adev) unmap_mapping_range(adev->ddev.anon_inode->i_mapping, 0, 0, 1); /* Unmap all mapped bars - Doorbell, registers and VRAM */ - amdgpu_device_doorbell_fini(adev); + amdgpu_doorbell_fini(adev); iounmap(adev->rmmio); adev->rmmio = NULL; @@ -4230,7 +4314,7 @@ void amdgpu_device_fini_hw(struct amdgpu_device *adev) /* make sure IB test finished before entering exclusive mode * to avoid preemption on IB test - * */ + */ if (amdgpu_sriov_vf(adev)) { amdgpu_virt_request_full_gpu(adev, false); amdgpu_virt_fini_data_exchange(adev); @@ -4253,13 +4337,14 @@ void amdgpu_device_fini_hw(struct amdgpu_device *adev) amdgpu_pm_sysfs_fini(adev); if (adev->ucode_sysfs_en) amdgpu_ucode_sysfs_fini(adev); - if (adev->psp_sysfs_en) - amdgpu_psp_sysfs_fini(adev); sysfs_remove_files(&adev->dev->kobj, amdgpu_dev_attributes); + amdgpu_fru_sysfs_fini(adev); /* disable ras feature must before hw fini */ amdgpu_ras_pre_fini(adev); + amdgpu_ttm_set_buffer_funcs_status(adev, false); + amdgpu_device_ip_fini_early(adev); amdgpu_irq_fini_hw(adev); @@ -4297,9 +4382,12 @@ void amdgpu_device_fini_sw(struct amdgpu_device *adev) kfree(adev->bios); adev->bios = NULL; + kfree(adev->fru_info); + adev->fru_info = NULL; + px = amdgpu_device_supports_px(adev_to_drm(adev)); - if (px || (!pci_is_thunderbolt_attached(adev->pdev) && + if (px || (!dev_is_removable(&adev->pdev->dev) && apple_gmux_detect(NULL, NULL))) vga_switcheroo_unregister_client(adev->pdev); @@ -4313,7 +4401,7 @@ void amdgpu_device_fini_sw(struct amdgpu_device *adev) iounmap(adev->rmmio); adev->rmmio = NULL; - amdgpu_device_doorbell_fini(adev); + amdgpu_doorbell_fini(adev); drm_dev_exit(idx); } @@ -4356,6 +4444,41 @@ static int amdgpu_device_evict_resources(struct amdgpu_device *adev) * Suspend & resume. */ /** + * amdgpu_device_prepare - prepare for device suspend + * + * @dev: drm dev pointer + * + * Prepare to put the hw in the suspend state (all asics). + * Returns 0 for success or an error on failure. + * Called at driver suspend. + */ +int amdgpu_device_prepare(struct drm_device *dev) +{ + struct amdgpu_device *adev = drm_to_adev(dev); + int i, r; + + if (dev->switch_power_state == DRM_SWITCH_POWER_OFF) + return 0; + + /* Evict the majority of BOs before starting suspend sequence */ + r = amdgpu_device_evict_resources(adev); + if (r) + return r; + + for (i = 0; i < adev->num_ip_blocks; i++) { + if (!adev->ip_blocks[i].status.valid) + continue; + if (!adev->ip_blocks[i].version->funcs->prepare_suspend) + continue; + r = adev->ip_blocks[i].version->funcs->prepare_suspend((void *)adev); + if (r) + return r; + } + + return 0; +} + +/** * amdgpu_device_suspend - initiate device suspend * * @dev: drm dev pointer @@ -4375,11 +4498,6 @@ int amdgpu_device_suspend(struct drm_device *dev, bool fbcon) adev->in_suspend = true; - /* Evict the majority of BOs before grabbing the full access */ - r = amdgpu_device_evict_resources(adev); - if (r) - return r; - if (amdgpu_sriov_vf(adev)) { amdgpu_virt_fini_data_exchange(adev); r = amdgpu_virt_request_full_gpu(adev, false); @@ -4407,6 +4525,8 @@ int amdgpu_device_suspend(struct drm_device *dev, bool fbcon) if (r) return r; + amdgpu_ttm_set_buffer_funcs_status(adev, false); + amdgpu_fence_driver_hw_fini(adev); amdgpu_device_ip_suspend_phase2(adev); @@ -4414,6 +4534,10 @@ int amdgpu_device_suspend(struct drm_device *dev, bool fbcon) if (amdgpu_sriov_vf(adev)) amdgpu_virt_release_full_gpu(adev, false); + r = amdgpu_dpm_notify_rlc_state(adev, false); + if (r) + return r; + return 0; } @@ -4459,19 +4583,18 @@ int amdgpu_device_resume(struct drm_device *dev, bool fbcon) } amdgpu_fence_driver_hw_init(adev); - r = amdgpu_device_ip_late_init(adev); - if (r) - goto exit; - - queue_delayed_work(system_wq, &adev->delayed_init_work, - msecs_to_jiffies(AMDGPU_RESUME_MS)); - if (!adev->in_s0ix) { r = amdgpu_amdkfd_resume(adev, adev->in_runpm); if (r) goto exit; } + r = amdgpu_device_ip_late_init(adev); + if (r) + goto exit; + + queue_delayed_work(system_wq, &adev->delayed_init_work, + msecs_to_jiffies(AMDGPU_RESUME_MS)); exit: if (amdgpu_sriov_vf(adev)) { amdgpu_virt_init_data_exchange(adev); @@ -4773,6 +4896,10 @@ retry: r = amdgpu_virt_reset_gpu(adev); if (r) return r; + amdgpu_irq_gpu_reset_resume_helper(adev); + + /* some sw clean up VF needs to do before recover */ + amdgpu_virt_post_reset(adev); /* Resume IP prior to SMC */ r = amdgpu_device_ip_reinit_early_sriov(adev); @@ -4799,7 +4926,6 @@ retry: amdgpu_put_xgmi_hive(hive); if (!r) { - amdgpu_irq_gpu_reset_resume_helper(adev); r = amdgpu_ib_ring_tests(adev); amdgpu_amdkfd_post_reset(adev); @@ -4925,9 +5051,12 @@ int amdgpu_device_mode1_reset(struct amdgpu_device *adev) } if (ret) - dev_err(adev->dev, "GPU mode1 reset failed\n"); + goto mode1_reset_failed; amdgpu_device_load_pci_state(adev->pdev); + ret = amdgpu_psp_wait_for_bootloader(adev); + if (ret) + goto mode1_reset_failed; /* wait for asic to come out of reset */ for (i = 0; i < adev->usec_timeout; i++) { @@ -4938,7 +5067,17 @@ int amdgpu_device_mode1_reset(struct amdgpu_device *adev) udelay(1); } + if (i >= adev->usec_timeout) { + ret = -ETIMEDOUT; + goto mode1_reset_failed; + } + amdgpu_atombios_scratch_regs_engine_hung(adev, false); + + return 0; + +mode1_reset_failed: + dev_err(adev->dev, "GPU mode1 reset failed\n"); return ret; } @@ -4967,8 +5106,9 @@ int amdgpu_device_pre_asic_reset(struct amdgpu_device *adev, if (!ring || !ring->sched.thread) continue; - /*clear job fence from fence drv to avoid force_completion - *leave NULL and vm flush fence in fence drv */ + /* Clear job fence from fence drv to avoid force_completion + * leave NULL and vm flush fence in fence drv + */ amdgpu_fence_driver_clear_job_fences(ring); /* after all hw jobs are reset, hw fence is meaningless, so force_completion */ @@ -4982,7 +5122,7 @@ int amdgpu_device_pre_asic_reset(struct amdgpu_device *adev, r = amdgpu_reset_prepare_hwcontext(adev, reset_context); /* If reset handler not implemented, continue; otherwise return */ - if (r == -ENOSYS) + if (r == -EOPNOTSUPP) r = 0; else return r; @@ -5022,67 +5162,16 @@ static int amdgpu_reset_reg_dumps(struct amdgpu_device *adev) lockdep_assert_held(&adev->reset_domain->sem); - for (i = 0; i < adev->num_regs; i++) { - adev->reset_dump_reg_value[i] = RREG32(adev->reset_dump_reg_list[i]); - trace_amdgpu_reset_reg_dumps(adev->reset_dump_reg_list[i], - adev->reset_dump_reg_value[i]); - } - - return 0; -} + for (i = 0; i < adev->reset_info.num_regs; i++) { + adev->reset_info.reset_dump_reg_value[i] = + RREG32(adev->reset_info.reset_dump_reg_list[i]); -#ifdef CONFIG_DEV_COREDUMP -static ssize_t amdgpu_devcoredump_read(char *buffer, loff_t offset, - size_t count, void *data, size_t datalen) -{ - struct drm_printer p; - struct amdgpu_device *adev = data; - struct drm_print_iterator iter; - int i; - - iter.data = buffer; - iter.offset = 0; - iter.start = offset; - iter.remain = count; - - p = drm_coredump_printer(&iter); - - drm_printf(&p, "**** AMDGPU Device Coredump ****\n"); - drm_printf(&p, "kernel: " UTS_RELEASE "\n"); - drm_printf(&p, "module: " KBUILD_MODNAME "\n"); - drm_printf(&p, "time: %lld.%09ld\n", adev->reset_time.tv_sec, adev->reset_time.tv_nsec); - if (adev->reset_task_info.pid) - drm_printf(&p, "process_name: %s PID: %d\n", - adev->reset_task_info.process_name, - adev->reset_task_info.pid); - - if (adev->reset_vram_lost) - drm_printf(&p, "VRAM is lost due to GPU reset!\n"); - if (adev->num_regs) { - drm_printf(&p, "AMDGPU register dumps:\nOffset: Value:\n"); - - for (i = 0; i < adev->num_regs; i++) - drm_printf(&p, "0x%08x: 0x%08x\n", - adev->reset_dump_reg_list[i], - adev->reset_dump_reg_value[i]); + trace_amdgpu_reset_reg_dumps(adev->reset_info.reset_dump_reg_list[i], + adev->reset_info.reset_dump_reg_value[i]); } - return count - iter.remain; -} - -static void amdgpu_devcoredump_free(void *data) -{ -} - -static void amdgpu_reset_capture_coredumpm(struct amdgpu_device *adev) -{ - struct drm_device *dev = adev_to_drm(adev); - - ktime_get_ts64(&adev->reset_time); - dev_coredumpm(dev->dev, THIS_MODULE, adev, 0, GFP_KERNEL, - amdgpu_devcoredump_read, amdgpu_devcoredump_free); + return 0; } -#endif int amdgpu_do_asic_reset(struct list_head *device_list_handle, struct amdgpu_reset_context *reset_context) @@ -5100,7 +5189,7 @@ int amdgpu_do_asic_reset(struct list_head *device_list_handle, reset_context->reset_device_list = device_list_handle; r = amdgpu_reset_perform_reset(tmp_adev, reset_context); /* If reset handler not implemented, continue; otherwise return */ - if (r == -ENOSYS) + if (r == -EOPNOTSUPP) r = 0; else return r; @@ -5131,7 +5220,7 @@ int amdgpu_do_asic_reset(struct list_head *device_list_handle, if (r) { dev_err(tmp_adev->dev, "ASIC reset failed with error, %d for drm dev, %s", r, adev_to_drm(tmp_adev)->unique); - break; + goto out; } } @@ -5150,9 +5239,7 @@ int amdgpu_do_asic_reset(struct list_head *device_list_handle, if (!r && amdgpu_ras_intr_triggered()) { list_for_each_entry(tmp_adev, device_list_handle, reset_list) { - if (tmp_adev->mmhub.ras && tmp_adev->mmhub.ras->ras_block.hw_ops && - tmp_adev->mmhub.ras->ras_block.hw_ops->reset_ras_error_count) - tmp_adev->mmhub.ras->ras_block.hw_ops->reset_ras_error_count(tmp_adev); + amdgpu_ras_reset_error_count(tmp_adev, AMDGPU_RAS_BLOCK__MMHUB); } amdgpu_ras_intr_cleared(); @@ -5178,24 +5265,15 @@ int amdgpu_do_asic_reset(struct list_head *device_list_handle, dev_warn(tmp_adev->dev, "asic atom init failed!"); } else { dev_info(tmp_adev->dev, "GPU reset succeeded, trying to resume\n"); - r = amdgpu_amdkfd_resume_iommu(tmp_adev); - if (r) - goto out; r = amdgpu_device_ip_resume_phase1(tmp_adev); if (r) goto out; vram_lost = amdgpu_device_check_vram_lost(tmp_adev); -#ifdef CONFIG_DEV_COREDUMP - tmp_adev->reset_vram_lost = vram_lost; - memset(&tmp_adev->reset_task_info, 0, - sizeof(tmp_adev->reset_task_info)); - if (reset_context->job && reset_context->job->vm) - tmp_adev->reset_task_info = - reset_context->job->vm->task_info; - amdgpu_reset_capture_coredumpm(tmp_adev); -#endif + + amdgpu_coredump(tmp_adev, vram_lost, reset_context); + if (vram_lost) { DRM_INFO("VRAM is lost due to GPU reset!\n"); amdgpu_inc_vram_lost(tmp_adev); @@ -5205,10 +5283,18 @@ int amdgpu_do_asic_reset(struct list_head *device_list_handle, if (r) return r; + r = amdgpu_xcp_restore_partition_mode( + tmp_adev->xcp_mgr); + if (r) + goto out; + r = amdgpu_device_ip_resume_phase2(tmp_adev); if (r) goto out; + if (tmp_adev->mman.buffer_funcs_ring->sched.ready) + amdgpu_ttm_set_buffer_funcs_status(tmp_adev, true); + if (vram_lost) amdgpu_device_fill_reset_magic(tmp_adev); @@ -5422,7 +5508,8 @@ int amdgpu_device_gpu_recover(struct amdgpu_device *adev, * Flush RAM to disk so that after reboot * the user can read log and see why the system rebooted. */ - if (need_emergency_restart && amdgpu_ras_get_context(adev)->reboot) { + if (need_emergency_restart && amdgpu_ras_get_context(adev) && + amdgpu_ras_get_context(adev)->reboot) { DRM_WARN("Emergency reboot."); ksys_sync_helper(); @@ -5560,8 +5647,9 @@ retry: /* Rest of adevs pre asic reset from XGMI hive. */ adev->asic_reset_res = r; /* Aldebaran and gfx_11_0_3 support ras in SRIOV, so need resume ras during reset */ - if (adev->ip_versions[GC_HWIP][0] == IP_VERSION(9, 4, 2) || - adev->ip_versions[GC_HWIP][0] == IP_VERSION(11, 0, 3)) + if (amdgpu_ip_version(adev, GC_HWIP, 0) == + IP_VERSION(9, 4, 2) || + amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(11, 0, 3)) amdgpu_ras_resume(adev); } else { r = amdgpu_do_asic_reset(device_list_handle, reset_context); @@ -5586,12 +5674,8 @@ skip_hw_reset: drm_sched_start(&ring->sched, true); } - if (adev->enable_mes && adev->ip_versions[GC_HWIP][0] != IP_VERSION(11, 0, 3)) - amdgpu_mes_self_test(tmp_adev); - - if (!drm_drv_uses_atomic_modeset(adev_to_drm(tmp_adev)) && !job_signaled) { + if (!drm_drv_uses_atomic_modeset(adev_to_drm(tmp_adev)) && !job_signaled) drm_helper_resume_force_mode(adev_to_drm(tmp_adev)); - } if (tmp_adev->asic_reset_res) r = tmp_adev->asic_reset_res; @@ -6264,7 +6348,7 @@ bool amdgpu_device_has_display_hardware(struct amdgpu_device *adev) return true; default: /* IP discovery */ - if (!adev->ip_versions[DCE_HWIP][0] || + if (!amdgpu_ip_version(adev, DCE_HWIP, 0) || (adev->harvest_ip_mask & AMD_HARVEST_IP_DMU_MASK)) return false; return true; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.c index 8e1cfc87122d..0431eafa86b5 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.c @@ -35,10 +35,12 @@ #include "df_v1_7.h" #include "df_v3_6.h" #include "df_v4_3.h" +#include "df_v4_6_2.h" #include "nbio_v6_1.h" #include "nbio_v7_0.h" #include "nbio_v7_4.h" #include "nbio_v7_9.h" +#include "nbio_v7_11.h" #include "hdp_v4_0.h" #include "vega10_ih.h" #include "vega20_ih.h" @@ -65,6 +67,7 @@ #include "soc21.h" #include "navi10_ih.h" #include "ih_v6_0.h" +#include "ih_v6_1.h" #include "gfx_v10_0.h" #include "gfx_v11_0.h" #include "sdma_v5_0.h" @@ -79,6 +82,8 @@ #include "jpeg_v4_0.h" #include "vcn_v4_0_3.h" #include "jpeg_v4_0_3.h" +#include "vcn_v4_0_5.h" +#include "jpeg_v4_0_5.h" #include "amdgpu_vkms.h" #include "mes_v10_1.h" #include "mes_v11_0.h" @@ -88,10 +93,13 @@ #include "smuio_v13_0_3.h" #include "smuio_v13_0_6.h" +#include "amdgpu_vpe.h" + #define FIRMWARE_IP_DISCOVERY "amdgpu/ip_discovery.bin" MODULE_FIRMWARE(FIRMWARE_IP_DISCOVERY); #define mmRCC_CONFIG_MEMSIZE 0xde3 +#define mmMP0_SMN_C2PMSG_33 0x16061 #define mmMM_INDEX 0x0 #define mmMM_INDEX_HI 0x6 #define mmMM_DATA 0x1 @@ -173,6 +181,7 @@ static const char *hw_id_names[HW_ID_MAX] = { [XGMI_HWID] = "XGMI", [XGBE_HWID] = "XGBE", [MP0_HWID] = "MP0", + [VPE_HWID] = "VPE", }; static int hw_id_map[MAX_HWIP] = { @@ -202,6 +211,7 @@ static int hw_id_map[MAX_HWIP] = { [XGMI_HWIP] = XGMI_HWID, [DCI_HWIP] = DCI_HWID, [PCIE_HWIP] = PCIE_HWID, + [VPE_HWIP] = VPE_HWID, }; static int amdgpu_discovery_read_binary_from_sysmem(struct amdgpu_device *adev, uint8_t *binary) @@ -230,8 +240,26 @@ static int amdgpu_discovery_read_binary_from_sysmem(struct amdgpu_device *adev, static int amdgpu_discovery_read_binary_from_mem(struct amdgpu_device *adev, uint8_t *binary) { - uint64_t vram_size = (uint64_t)RREG32(mmRCC_CONFIG_MEMSIZE) << 20; - int ret = 0; + uint64_t vram_size; + u32 msg; + int i, ret = 0; + + /* It can take up to a second for IFWI init to complete on some dGPUs, + * but generally it should be in the 60-100ms range. Normally this starts + * as soon as the device gets power so by the time the OS loads this has long + * completed. However, when a card is hotplugged via e.g., USB4, we need to + * wait for this to complete. Once the C2PMSG is updated, we can + * continue. + */ + if (dev_is_removable(&adev->pdev->dev)) { + for (i = 0; i < 1000; i++) { + msg = RREG32(mmMP0_SMN_C2PMSG_33); + if (msg & 0x80000000) + break; + msleep(1); + } + } + vram_size = (uint64_t)RREG32(mmRCC_CONFIG_MEMSIZE) << 20; if (vram_size) { uint64_t pos = vram_size - DISCOVERY_TMR_OFFSET; @@ -303,8 +331,8 @@ static void amdgpu_discovery_harvest_config_quirk(struct amdgpu_device *adev) * So far, apply this quirk only on those Navy Flounder boards which * have a bad harvest table of VCN config. */ - if ((adev->ip_versions[UVD_HWIP][1] == IP_VERSION(3, 0, 1)) && - (adev->ip_versions[GC_HWIP][0] == IP_VERSION(10, 3, 2))) { + if ((amdgpu_ip_version(adev, UVD_HWIP, 1) == IP_VERSION(3, 0, 1)) && + (amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(10, 3, 2))) { switch (adev->pdev->revision) { case 0xC1: case 0xC2: @@ -654,7 +682,7 @@ struct ip_hw_instance { u8 harvest; int num_base_addresses; - u32 base_addr[]; + u32 base_addr[] __counted_by(num_base_addresses); }; struct ip_hw_id { @@ -1183,6 +1211,7 @@ static void amdgpu_discovery_sysfs_fini(struct amdgpu_device *adev) static int amdgpu_discovery_reg_base_init(struct amdgpu_device *adev) { + uint8_t num_base_address, subrev, variant; struct binary_header *bhdr; struct ip_discovery_header *ihdr; struct die_header *dhdr; @@ -1191,7 +1220,6 @@ static int amdgpu_discovery_reg_base_init(struct amdgpu_device *adev) uint16_t ip_offset; uint16_t num_dies; uint16_t num_ips; - uint8_t num_base_address; int hw_ip; int i, j, k; int r; @@ -1329,8 +1357,22 @@ static int amdgpu_discovery_reg_base_init(struct amdgpu_device *adev) * example. On most chips there are multiple instances * with the same HWID. */ - adev->ip_versions[hw_ip][ip->instance_number] = - IP_VERSION(ip->major, ip->minor, ip->revision); + + if (ihdr->version < 3) { + subrev = 0; + variant = 0; + } else { + subrev = ip->sub_revision; + variant = ip->variant; + } + + adev->ip_versions[hw_ip] + [ip->instance_number] = + IP_VERSION_FULL(ip->major, + ip->minor, + ip->revision, + variant, + subrev); } } @@ -1355,8 +1397,8 @@ static void amdgpu_discovery_harvest_ip(struct amdgpu_device *adev) * so read harvest bit per IP data structure to set * harvest configuration. */ - if (adev->ip_versions[GC_HWIP][0] < IP_VERSION(10, 2, 0) && - adev->ip_versions[GC_HWIP][0] != IP_VERSION(9, 4, 3)) { + if (amdgpu_ip_version(adev, GC_HWIP, 0) < IP_VERSION(10, 2, 0) && + amdgpu_ip_version(adev, GC_HWIP, 0) != IP_VERSION(9, 4, 3)) { if ((adev->pdev->device == 0x731E && (adev->pdev->revision == 0xC6 || adev->pdev->revision == 0xC7)) || @@ -1389,6 +1431,7 @@ union gc_info { struct gc_info_v1_1 v1_1; struct gc_info_v1_2 v1_2; struct gc_info_v2_0 v2; + struct gc_info_v2_1 v2_1; }; static int amdgpu_discovery_get_gfx_info(struct amdgpu_device *adev) @@ -1430,12 +1473,12 @@ static int amdgpu_discovery_get_gfx_info(struct amdgpu_device *adev) adev->gfx.config.num_sc_per_sh = le32_to_cpu(gc_info->v1.gc_num_sc_per_se) / le32_to_cpu(gc_info->v1.gc_num_sa_per_se); adev->gfx.config.num_packer_per_sc = le32_to_cpu(gc_info->v1.gc_num_packer_per_sc); - if (gc_info->v1.header.version_minor >= 1) { + if (le16_to_cpu(gc_info->v1.header.version_minor) >= 1) { adev->gfx.config.gc_num_tcp_per_sa = le32_to_cpu(gc_info->v1_1.gc_num_tcp_per_sa); adev->gfx.config.gc_num_sdp_interface = le32_to_cpu(gc_info->v1_1.gc_num_sdp_interface); adev->gfx.config.gc_num_tcps = le32_to_cpu(gc_info->v1_1.gc_num_tcps); } - if (gc_info->v1.header.version_minor >= 2) { + if (le16_to_cpu(gc_info->v1.header.version_minor) >= 2) { adev->gfx.config.gc_num_tcp_per_wpg = le32_to_cpu(gc_info->v1_2.gc_num_tcp_per_wpg); adev->gfx.config.gc_tcp_l1_size = le32_to_cpu(gc_info->v1_2.gc_tcp_l1_size); adev->gfx.config.gc_num_sqc_per_wgp = le32_to_cpu(gc_info->v1_2.gc_num_sqc_per_wgp); @@ -1464,6 +1507,15 @@ static int amdgpu_discovery_get_gfx_info(struct amdgpu_device *adev) adev->gfx.config.num_sc_per_sh = le32_to_cpu(gc_info->v2.gc_num_sc_per_se) / le32_to_cpu(gc_info->v2.gc_num_sh_per_se); adev->gfx.config.num_packer_per_sc = le32_to_cpu(gc_info->v2.gc_num_packer_per_sc); + if (le16_to_cpu(gc_info->v2.header.version_minor) == 1) { + adev->gfx.config.gc_num_tcp_per_sa = le32_to_cpu(gc_info->v2_1.gc_num_tcp_per_sh); + adev->gfx.config.gc_tcp_size_per_cu = le32_to_cpu(gc_info->v2_1.gc_tcp_size_per_cu); + adev->gfx.config.gc_num_sdp_interface = le32_to_cpu(gc_info->v2_1.gc_num_sdp_interface); /* per XCD */ + adev->gfx.config.gc_num_cu_per_sqc = le32_to_cpu(gc_info->v2_1.gc_num_cu_per_sqc); + adev->gfx.config.gc_l1_instruction_cache_size_per_sqc = le32_to_cpu(gc_info->v2_1.gc_instruction_cache_size_per_sqc); + adev->gfx.config.gc_l1_data_cache_size_per_sqc = le32_to_cpu(gc_info->v2_1.gc_scalar_data_cache_size_per_sqc); + adev->gfx.config.gc_tcc_size = le32_to_cpu(gc_info->v2_1.gc_tcc_size); /* per XCD */ + } break; default: dev_err(adev->dev, @@ -1477,6 +1529,7 @@ static int amdgpu_discovery_get_gfx_info(struct amdgpu_device *adev) union mall_info { struct mall_info_v1_0 v1; + struct mall_info_v2_0 v2; }; static int amdgpu_discovery_get_mall_info(struct amdgpu_device *adev) @@ -1517,6 +1570,10 @@ static int amdgpu_discovery_get_mall_info(struct amdgpu_device *adev) adev->gmc.mall_size = mall_size; adev->gmc.m_half_use = half_use; break; + case 2: + mall_size_per_umc = le32_to_cpu(mall_info->v2.mall_size_per_umc); + adev->gmc.mall_size = mall_size_per_umc * adev->gmc.num_umc; + break; default: dev_err(adev->dev, "Unhandled MALL info table %d.%d\n", @@ -1584,7 +1641,7 @@ static int amdgpu_discovery_get_vcn_info(struct amdgpu_device *adev) static int amdgpu_discovery_set_common_ip_blocks(struct amdgpu_device *adev) { /* what IP to use for this? */ - switch (adev->ip_versions[GC_HWIP][0]) { + switch (amdgpu_ip_version(adev, GC_HWIP, 0)) { case IP_VERSION(9, 0, 1): case IP_VERSION(9, 1, 0): case IP_VERSION(9, 2, 1): @@ -1616,12 +1673,13 @@ static int amdgpu_discovery_set_common_ip_blocks(struct amdgpu_device *adev) case IP_VERSION(11, 0, 2): case IP_VERSION(11, 0, 3): case IP_VERSION(11, 0, 4): + case IP_VERSION(11, 5, 0): amdgpu_device_ip_block_add(adev, &soc21_common_ip_block); break; default: dev_err(adev->dev, "Failed to add common ip block(GC_HWIP:0x%x)\n", - adev->ip_versions[GC_HWIP][0]); + amdgpu_ip_version(adev, GC_HWIP, 0)); return -EINVAL; } return 0; @@ -1630,7 +1688,7 @@ static int amdgpu_discovery_set_common_ip_blocks(struct amdgpu_device *adev) static int amdgpu_discovery_set_gmc_ip_blocks(struct amdgpu_device *adev) { /* use GC or MMHUB IP version */ - switch (adev->ip_versions[GC_HWIP][0]) { + switch (amdgpu_ip_version(adev, GC_HWIP, 0)) { case IP_VERSION(9, 0, 1): case IP_VERSION(9, 1, 0): case IP_VERSION(9, 2, 1): @@ -1662,12 +1720,12 @@ static int amdgpu_discovery_set_gmc_ip_blocks(struct amdgpu_device *adev) case IP_VERSION(11, 0, 2): case IP_VERSION(11, 0, 3): case IP_VERSION(11, 0, 4): + case IP_VERSION(11, 5, 0): amdgpu_device_ip_block_add(adev, &gmc_v11_0_ip_block); break; default: - dev_err(adev->dev, - "Failed to add gmc ip block(GC_HWIP:0x%x)\n", - adev->ip_versions[GC_HWIP][0]); + dev_err(adev->dev, "Failed to add gmc ip block(GC_HWIP:0x%x)\n", + amdgpu_ip_version(adev, GC_HWIP, 0)); return -EINVAL; } return 0; @@ -1675,7 +1733,7 @@ static int amdgpu_discovery_set_gmc_ip_blocks(struct amdgpu_device *adev) static int amdgpu_discovery_set_ih_ip_blocks(struct amdgpu_device *adev) { - switch (adev->ip_versions[OSSSYS_HWIP][0]) { + switch (amdgpu_ip_version(adev, OSSSYS_HWIP, 0)) { case IP_VERSION(4, 0, 0): case IP_VERSION(4, 0, 1): case IP_VERSION(4, 1, 0): @@ -1702,10 +1760,13 @@ static int amdgpu_discovery_set_ih_ip_blocks(struct amdgpu_device *adev) case IP_VERSION(6, 0, 2): amdgpu_device_ip_block_add(adev, &ih_v6_0_ip_block); break; + case IP_VERSION(6, 1, 0): + amdgpu_device_ip_block_add(adev, &ih_v6_1_ip_block); + break; default: dev_err(adev->dev, "Failed to add ih ip block(OSSSYS_HWIP:0x%x)\n", - adev->ip_versions[OSSSYS_HWIP][0]); + amdgpu_ip_version(adev, OSSSYS_HWIP, 0)); return -EINVAL; } return 0; @@ -1713,7 +1774,7 @@ static int amdgpu_discovery_set_ih_ip_blocks(struct amdgpu_device *adev) static int amdgpu_discovery_set_psp_ip_blocks(struct amdgpu_device *adev) { - switch (adev->ip_versions[MP0_HWIP][0]) { + switch (amdgpu_ip_version(adev, MP0_HWIP, 0)) { case IP_VERSION(9, 0, 0): amdgpu_device_ip_block_add(adev, &psp_v3_1_ip_block); break; @@ -1750,6 +1811,7 @@ static int amdgpu_discovery_set_psp_ip_blocks(struct amdgpu_device *adev) case IP_VERSION(13, 0, 8): case IP_VERSION(13, 0, 10): case IP_VERSION(13, 0, 11): + case IP_VERSION(14, 0, 0): amdgpu_device_ip_block_add(adev, &psp_v13_0_ip_block); break; case IP_VERSION(13, 0, 4): @@ -1758,7 +1820,7 @@ static int amdgpu_discovery_set_psp_ip_blocks(struct amdgpu_device *adev) default: dev_err(adev->dev, "Failed to add psp ip block(MP0_HWIP:0x%x)\n", - adev->ip_versions[MP0_HWIP][0]); + amdgpu_ip_version(adev, MP0_HWIP, 0)); return -EINVAL; } return 0; @@ -1766,7 +1828,7 @@ static int amdgpu_discovery_set_psp_ip_blocks(struct amdgpu_device *adev) static int amdgpu_discovery_set_smu_ip_blocks(struct amdgpu_device *adev) { - switch (adev->ip_versions[MP1_HWIP][0]) { + switch (amdgpu_ip_version(adev, MP1_HWIP, 0)) { case IP_VERSION(9, 0, 0): case IP_VERSION(10, 0, 0): case IP_VERSION(10, 0, 1): @@ -1804,10 +1866,13 @@ static int amdgpu_discovery_set_smu_ip_blocks(struct amdgpu_device *adev) case IP_VERSION(13, 0, 11): amdgpu_device_ip_block_add(adev, &smu_v13_0_ip_block); break; + case IP_VERSION(14, 0, 0): + amdgpu_device_ip_block_add(adev, &smu_v14_0_ip_block); + break; default: dev_err(adev->dev, "Failed to add smu ip block(MP1_HWIP:0x%x)\n", - adev->ip_versions[MP1_HWIP][0]); + amdgpu_ip_version(adev, MP1_HWIP, 0)); return -EINVAL; } return 0; @@ -1832,8 +1897,8 @@ static int amdgpu_discovery_set_display_ip_blocks(struct amdgpu_device *adev) return 0; #if defined(CONFIG_DRM_AMD_DC) - if (adev->ip_versions[DCE_HWIP][0]) { - switch (adev->ip_versions[DCE_HWIP][0]) { + if (amdgpu_ip_version(adev, DCE_HWIP, 0)) { + switch (amdgpu_ip_version(adev, DCE_HWIP, 0)) { case IP_VERSION(1, 0, 0): case IP_VERSION(1, 0, 1): case IP_VERSION(2, 0, 2): @@ -1851,6 +1916,7 @@ static int amdgpu_discovery_set_display_ip_blocks(struct amdgpu_device *adev) case IP_VERSION(3, 1, 6): case IP_VERSION(3, 2, 0): case IP_VERSION(3, 2, 1): + case IP_VERSION(3, 5, 0): if (amdgpu_sriov_vf(adev)) amdgpu_discovery_set_sriov_display(adev); else @@ -1859,11 +1925,11 @@ static int amdgpu_discovery_set_display_ip_blocks(struct amdgpu_device *adev) default: dev_err(adev->dev, "Failed to add dm ip block(DCE_HWIP:0x%x)\n", - adev->ip_versions[DCE_HWIP][0]); + amdgpu_ip_version(adev, DCE_HWIP, 0)); return -EINVAL; } - } else if (adev->ip_versions[DCI_HWIP][0]) { - switch (adev->ip_versions[DCI_HWIP][0]) { + } else if (amdgpu_ip_version(adev, DCI_HWIP, 0)) { + switch (amdgpu_ip_version(adev, DCI_HWIP, 0)) { case IP_VERSION(12, 0, 0): case IP_VERSION(12, 0, 1): case IP_VERSION(12, 1, 0): @@ -1875,7 +1941,7 @@ static int amdgpu_discovery_set_display_ip_blocks(struct amdgpu_device *adev) default: dev_err(adev->dev, "Failed to add dm ip block(DCI_HWIP:0x%x)\n", - adev->ip_versions[DCI_HWIP][0]); + amdgpu_ip_version(adev, DCI_HWIP, 0)); return -EINVAL; } } @@ -1885,7 +1951,7 @@ static int amdgpu_discovery_set_display_ip_blocks(struct amdgpu_device *adev) static int amdgpu_discovery_set_gc_ip_blocks(struct amdgpu_device *adev) { - switch (adev->ip_versions[GC_HWIP][0]) { + switch (amdgpu_ip_version(adev, GC_HWIP, 0)) { case IP_VERSION(9, 0, 1): case IP_VERSION(9, 1, 0): case IP_VERSION(9, 2, 1): @@ -1921,12 +1987,12 @@ static int amdgpu_discovery_set_gc_ip_blocks(struct amdgpu_device *adev) case IP_VERSION(11, 0, 2): case IP_VERSION(11, 0, 3): case IP_VERSION(11, 0, 4): + case IP_VERSION(11, 5, 0): amdgpu_device_ip_block_add(adev, &gfx_v11_0_ip_block); break; default: - dev_err(adev->dev, - "Failed to add gfx ip block(GC_HWIP:0x%x)\n", - adev->ip_versions[GC_HWIP][0]); + dev_err(adev->dev, "Failed to add gfx ip block(GC_HWIP:0x%x)\n", + amdgpu_ip_version(adev, GC_HWIP, 0)); return -EINVAL; } return 0; @@ -1934,7 +2000,7 @@ static int amdgpu_discovery_set_gc_ip_blocks(struct amdgpu_device *adev) static int amdgpu_discovery_set_sdma_ip_blocks(struct amdgpu_device *adev) { - switch (adev->ip_versions[SDMA0_HWIP][0]) { + switch (amdgpu_ip_version(adev, SDMA0_HWIP, 0)) { case IP_VERSION(4, 0, 0): case IP_VERSION(4, 0, 1): case IP_VERSION(4, 1, 0): @@ -1968,12 +2034,13 @@ static int amdgpu_discovery_set_sdma_ip_blocks(struct amdgpu_device *adev) case IP_VERSION(6, 0, 1): case IP_VERSION(6, 0, 2): case IP_VERSION(6, 0, 3): + case IP_VERSION(6, 1, 0): amdgpu_device_ip_block_add(adev, &sdma_v6_0_ip_block); break; default: dev_err(adev->dev, "Failed to add sdma ip block(SDMA0_HWIP:0x%x)\n", - adev->ip_versions[SDMA0_HWIP][0]); + amdgpu_ip_version(adev, SDMA0_HWIP, 0)); return -EINVAL; } return 0; @@ -1981,8 +2048,8 @@ static int amdgpu_discovery_set_sdma_ip_blocks(struct amdgpu_device *adev) static int amdgpu_discovery_set_mm_ip_blocks(struct amdgpu_device *adev) { - if (adev->ip_versions[VCE_HWIP][0]) { - switch (adev->ip_versions[UVD_HWIP][0]) { + if (amdgpu_ip_version(adev, VCE_HWIP, 0)) { + switch (amdgpu_ip_version(adev, UVD_HWIP, 0)) { case IP_VERSION(7, 0, 0): case IP_VERSION(7, 2, 0): /* UVD is not supported on vega20 SR-IOV */ @@ -1992,10 +2059,10 @@ static int amdgpu_discovery_set_mm_ip_blocks(struct amdgpu_device *adev) default: dev_err(adev->dev, "Failed to add uvd v7 ip block(UVD_HWIP:0x%x)\n", - adev->ip_versions[UVD_HWIP][0]); + amdgpu_ip_version(adev, UVD_HWIP, 0)); return -EINVAL; } - switch (adev->ip_versions[VCE_HWIP][0]) { + switch (amdgpu_ip_version(adev, VCE_HWIP, 0)) { case IP_VERSION(4, 0, 0): case IP_VERSION(4, 1, 0): /* VCE is not supported on vega20 SR-IOV */ @@ -2005,11 +2072,11 @@ static int amdgpu_discovery_set_mm_ip_blocks(struct amdgpu_device *adev) default: dev_err(adev->dev, "Failed to add VCE v4 ip block(VCE_HWIP:0x%x)\n", - adev->ip_versions[VCE_HWIP][0]); + amdgpu_ip_version(adev, VCE_HWIP, 0)); return -EINVAL; } } else { - switch (adev->ip_versions[UVD_HWIP][0]) { + switch (amdgpu_ip_version(adev, UVD_HWIP, 0)) { case IP_VERSION(1, 0, 0): case IP_VERSION(1, 0, 1): amdgpu_device_ip_block_add(adev, &vcn_v1_0_ip_block); @@ -2053,10 +2120,14 @@ static int amdgpu_discovery_set_mm_ip_blocks(struct amdgpu_device *adev) amdgpu_device_ip_block_add(adev, &vcn_v4_0_3_ip_block); amdgpu_device_ip_block_add(adev, &jpeg_v4_0_3_ip_block); break; + case IP_VERSION(4, 0, 5): + amdgpu_device_ip_block_add(adev, &vcn_v4_0_5_ip_block); + amdgpu_device_ip_block_add(adev, &jpeg_v4_0_5_ip_block); + break; default: dev_err(adev->dev, "Failed to add vcn/jpeg ip block(UVD_HWIP:0x%x)\n", - adev->ip_versions[UVD_HWIP][0]); + amdgpu_ip_version(adev, UVD_HWIP, 0)); return -EINVAL; } } @@ -2065,7 +2136,7 @@ static int amdgpu_discovery_set_mm_ip_blocks(struct amdgpu_device *adev) static int amdgpu_discovery_set_mes_ip_blocks(struct amdgpu_device *adev) { - switch (adev->ip_versions[GC_HWIP][0]) { + switch (amdgpu_ip_version(adev, GC_HWIP, 0)) { case IP_VERSION(10, 1, 10): case IP_VERSION(10, 1, 1): case IP_VERSION(10, 1, 2): @@ -2090,6 +2161,7 @@ static int amdgpu_discovery_set_mes_ip_blocks(struct amdgpu_device *adev) case IP_VERSION(11, 0, 2): case IP_VERSION(11, 0, 3): case IP_VERSION(11, 0, 4): + case IP_VERSION(11, 5, 0): amdgpu_device_ip_block_add(adev, &mes_v11_0_ip_block); adev->enable_mes = true; adev->enable_mes_kiq = true; @@ -2102,7 +2174,7 @@ static int amdgpu_discovery_set_mes_ip_blocks(struct amdgpu_device *adev) static void amdgpu_discovery_init_soc_config(struct amdgpu_device *adev) { - switch (adev->ip_versions[GC_HWIP][0]) { + switch (amdgpu_ip_version(adev, GC_HWIP, 0)) { case IP_VERSION(9, 4, 3): aqua_vanjaram_init_soc_config(adev); break; @@ -2111,6 +2183,35 @@ static void amdgpu_discovery_init_soc_config(struct amdgpu_device *adev) } } +static int amdgpu_discovery_set_vpe_ip_blocks(struct amdgpu_device *adev) +{ + switch (amdgpu_ip_version(adev, VPE_HWIP, 0)) { + case IP_VERSION(6, 1, 0): + amdgpu_device_ip_block_add(adev, &vpe_v6_1_ip_block); + break; + default: + break; + } + + return 0; +} + +static int amdgpu_discovery_set_umsch_mm_ip_blocks(struct amdgpu_device *adev) +{ + switch (amdgpu_ip_version(adev, VCN_HWIP, 0)) { + case IP_VERSION(4, 0, 5): + if (amdgpu_umsch_mm & 0x1) { + amdgpu_device_ip_block_add(adev, &umsch_mm_v4_0_ip_block); + adev->enable_umsch_mm = true; + } + break; + default: + break; + } + + return 0; +} + int amdgpu_discovery_set_ip_blocks(struct amdgpu_device *adev) { int r; @@ -2291,7 +2392,7 @@ int amdgpu_discovery_set_ip_blocks(struct amdgpu_device *adev) amdgpu_discovery_init_soc_config(adev); amdgpu_discovery_sysfs_init(adev); - switch (adev->ip_versions[GC_HWIP][0]) { + switch (amdgpu_ip_version(adev, GC_HWIP, 0)) { case IP_VERSION(9, 0, 1): case IP_VERSION(9, 2, 1): case IP_VERSION(9, 4, 0): @@ -2338,11 +2439,14 @@ int amdgpu_discovery_set_ip_blocks(struct amdgpu_device *adev) case IP_VERSION(11, 0, 4): adev->family = AMDGPU_FAMILY_GC_11_0_1; break; + case IP_VERSION(11, 5, 0): + adev->family = AMDGPU_FAMILY_GC_11_5_0; + break; default: return -EINVAL; } - switch (adev->ip_versions[GC_HWIP][0]) { + switch (amdgpu_ip_version(adev, GC_HWIP, 0)) { case IP_VERSION(9, 1, 0): case IP_VERSION(9, 2, 2): case IP_VERSION(9, 3, 0): @@ -2354,17 +2458,21 @@ int amdgpu_discovery_set_ip_blocks(struct amdgpu_device *adev) case IP_VERSION(10, 3, 7): case IP_VERSION(11, 0, 1): case IP_VERSION(11, 0, 4): + case IP_VERSION(11, 5, 0): adev->flags |= AMD_IS_APU; break; default: break; } - if (adev->ip_versions[XGMI_HWIP][0] == IP_VERSION(4, 8, 0)) + if (amdgpu_ip_version(adev, XGMI_HWIP, 0) == IP_VERSION(4, 8, 0)) adev->gmc.xgmi.supported = true; + if (amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(9, 4, 3)) + adev->ip_versions[XGMI_HWIP][0] = IP_VERSION(6, 4, 0); + /* set NBIO version */ - switch (adev->ip_versions[NBIO_HWIP][0]) { + switch (amdgpu_ip_version(adev, NBIO_HWIP, 0)) { case IP_VERSION(6, 1, 0): case IP_VERSION(6, 2, 0): adev->nbio.funcs = &nbio_v6_1_funcs; @@ -2386,6 +2494,10 @@ int amdgpu_discovery_set_ip_blocks(struct amdgpu_device *adev) adev->nbio.funcs = &nbio_v7_9_funcs; adev->nbio.hdp_flush_reg = &nbio_v7_9_hdp_flush_reg; break; + case IP_VERSION(7, 11, 0): + adev->nbio.funcs = &nbio_v7_11_funcs; + adev->nbio.hdp_flush_reg = &nbio_v7_11_hdp_flush_reg; + break; case IP_VERSION(7, 2, 0): case IP_VERSION(7, 2, 1): case IP_VERSION(7, 3, 0): @@ -2422,7 +2534,7 @@ int amdgpu_discovery_set_ip_blocks(struct amdgpu_device *adev) break; } - switch (adev->ip_versions[HDP_HWIP][0]) { + switch (amdgpu_ip_version(adev, HDP_HWIP, 0)) { case IP_VERSION(4, 0, 0): case IP_VERSION(4, 0, 1): case IP_VERSION(4, 1, 0): @@ -2447,13 +2559,14 @@ int amdgpu_discovery_set_ip_blocks(struct amdgpu_device *adev) break; case IP_VERSION(6, 0, 0): case IP_VERSION(6, 0, 1): + case IP_VERSION(6, 1, 0): adev->hdp.funcs = &hdp_v6_0_funcs; break; default: break; } - switch (adev->ip_versions[DF_HWIP][0]) { + switch (amdgpu_ip_version(adev, DF_HWIP, 0)) { case IP_VERSION(3, 6, 0): case IP_VERSION(3, 6, 1): case IP_VERSION(3, 6, 2): @@ -2469,11 +2582,14 @@ int amdgpu_discovery_set_ip_blocks(struct amdgpu_device *adev) case IP_VERSION(4, 3, 0): adev->df.funcs = &df_v4_3_funcs; break; + case IP_VERSION(4, 6, 2): + adev->df.funcs = &df_v4_6_2_funcs; + break; default: break; } - switch (adev->ip_versions[SMUIO_HWIP][0]) { + switch (amdgpu_ip_version(adev, SMUIO_HWIP, 0)) { case IP_VERSION(9, 0, 0): case IP_VERSION(9, 0, 1): case IP_VERSION(10, 0, 0): @@ -2509,13 +2625,14 @@ int amdgpu_discovery_set_ip_blocks(struct amdgpu_device *adev) break; case IP_VERSION(13, 0, 6): case IP_VERSION(13, 0, 8): + case IP_VERSION(14, 0, 0): adev->smuio.funcs = &smuio_v13_0_6_funcs; break; default: break; } - switch (adev->ip_versions[LSDMA_HWIP][0]) { + switch (amdgpu_ip_version(adev, LSDMA_HWIP, 0)) { case IP_VERSION(6, 0, 0): case IP_VERSION(6, 0, 1): case IP_VERSION(6, 0, 2): @@ -2588,6 +2705,14 @@ int amdgpu_discovery_set_ip_blocks(struct amdgpu_device *adev) if (r) return r; + r = amdgpu_discovery_set_vpe_ip_blocks(adev); + if (r) + return r; + + r = amdgpu_discovery_set_umsch_mm_ip_blocks(adev); + if (r) + return r; + return 0; } diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.h index 3a2f347bd50d..4d03cd5b3410 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.h @@ -24,7 +24,7 @@ #ifndef __AMDGPU_DISCOVERY__ #define __AMDGPU_DISCOVERY__ -#define DISCOVERY_TMR_SIZE (8 << 10) +#define DISCOVERY_TMR_SIZE (10 << 10) #define DISCOVERY_TMR_OFFSET (64 << 10) void amdgpu_discovery_fini(struct amdgpu_device *adev); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_display.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_display.c index b702f499f5fb..b8fbe97efe1d 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_display.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_display.c @@ -38,6 +38,8 @@ #include <linux/pci.h> #include <linux/pm_runtime.h> #include <drm/drm_crtc_helper.h> +#include <drm/drm_damage_helper.h> +#include <drm/drm_drv.h> #include <drm/drm_edid.h> #include <drm/drm_fb_helper.h> #include <drm/drm_gem_framebuffer_helper.h> @@ -124,7 +126,7 @@ static void amdgpu_display_flip_work_func(struct work_struct *__work) struct drm_crtc *crtc = &amdgpu_crtc->base; unsigned long flags; - unsigned i; + unsigned int i; int vpos, hpos; for (i = 0; i < work->shared_count; ++i) @@ -201,7 +203,7 @@ int amdgpu_display_crtc_page_flip_target(struct drm_crtc *crtc, u64 tiling_flags; int i, r; - work = kzalloc(sizeof *work, GFP_KERNEL); + work = kzalloc(sizeof(*work), GFP_KERNEL); if (work == NULL) return -ENOMEM; @@ -332,18 +334,17 @@ int amdgpu_display_crtc_set_config(struct drm_mode_set *set, adev = drm_to_adev(dev); /* if we have active crtcs and we don't have a power ref, - take the current one */ + * take the current one + */ if (active && !adev->have_disp_power_ref) { adev->have_disp_power_ref = true; return ret; } - /* if we have no active crtcs, then drop the power ref - we got before */ - if (!active && adev->have_disp_power_ref) { - pm_runtime_put_autosuspend(dev->dev); + /* if we have no active crtcs, then go to + * drop the power ref we got before + */ + if (!active && adev->have_disp_power_ref) adev->have_disp_power_ref = false; - } - out: /* drop the power reference we got coming in here */ pm_runtime_put_autosuspend(dev->dev); @@ -507,11 +508,10 @@ bool amdgpu_display_ddc_probe(struct amdgpu_connector *amdgpu_connector, if (amdgpu_connector->router.ddc_valid) amdgpu_i2c_router_select_ddc_port(amdgpu_connector); - if (use_aux) { + if (use_aux) ret = i2c_transfer(&amdgpu_connector->ddc_bus->aux.ddc, msgs, 2); - } else { + else ret = i2c_transfer(&amdgpu_connector->ddc_bus->adapter, msgs, 2); - } if (ret != 2) /* Couldn't find an accessible DDC on this connector */ @@ -520,20 +520,40 @@ bool amdgpu_display_ddc_probe(struct amdgpu_connector *amdgpu_connector, * EDID header starts with: * 0x00,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0x00. * Only the first 6 bytes must be valid as - * drm_edid_block_valid() can fix the last 2 bytes */ + * drm_edid_block_valid() can fix the last 2 bytes + */ if (drm_edid_header_is_valid(buf) < 6) { /* Couldn't find an accessible EDID on this - * connector */ + * connector + */ return false; } return true; } +static int amdgpu_dirtyfb(struct drm_framebuffer *fb, struct drm_file *file, + unsigned int flags, unsigned int color, + struct drm_clip_rect *clips, unsigned int num_clips) +{ + + if (file) + return -ENOSYS; + + return drm_atomic_helper_dirtyfb(fb, file, flags, color, clips, + num_clips); +} + static const struct drm_framebuffer_funcs amdgpu_fb_funcs = { .destroy = drm_gem_fb_destroy, .create_handle = drm_gem_fb_create_handle, }; +static const struct drm_framebuffer_funcs amdgpu_fb_funcs_atomic = { + .destroy = drm_gem_fb_destroy, + .create_handle = drm_gem_fb_create_handle, + .dirty = amdgpu_dirtyfb +}; + uint32_t amdgpu_display_supported_domains(struct amdgpu_device *adev, uint64_t bo_flags) { @@ -743,11 +763,13 @@ static int convert_tiling_flags_to_modifier(struct amdgpu_framebuffer *afb) return -EINVAL; } - if (adev->ip_versions[GC_HWIP][0] >= IP_VERSION(11, 0, 0)) + if (amdgpu_ip_version(adev, GC_HWIP, 0) >= IP_VERSION(11, 0, 0)) version = AMD_FMT_MOD_TILE_VER_GFX11; - else if (adev->ip_versions[GC_HWIP][0] >= IP_VERSION(10, 3, 0)) + else if (amdgpu_ip_version(adev, GC_HWIP, 0) >= + IP_VERSION(10, 3, 0)) version = AMD_FMT_MOD_TILE_VER_GFX10_RBPLUS; - else if (adev->ip_versions[GC_HWIP][0] >= IP_VERSION(10, 0, 0)) + else if (amdgpu_ip_version(adev, GC_HWIP, 0) >= + IP_VERSION(10, 0, 0)) version = AMD_FMT_MOD_TILE_VER_GFX10; else version = AMD_FMT_MOD_TILE_VER_GFX9; @@ -756,13 +778,15 @@ static int convert_tiling_flags_to_modifier(struct amdgpu_framebuffer *afb) case 0: /* Z microtiling */ return -EINVAL; case 1: /* S microtiling */ - if (adev->ip_versions[GC_HWIP][0] < IP_VERSION(11, 0, 0)) { + if (amdgpu_ip_version(adev, GC_HWIP, 0) < + IP_VERSION(11, 0, 0)) { if (!has_xor) version = AMD_FMT_MOD_TILE_VER_GFX9; } break; case 2: - if (adev->ip_versions[GC_HWIP][0] < IP_VERSION(11, 0, 0)) { + if (amdgpu_ip_version(adev, GC_HWIP, 0) < + IP_VERSION(11, 0, 0)) { if (!has_xor && afb->base.format->cpp[0] != 4) version = AMD_FMT_MOD_TILE_VER_GFX9; } @@ -815,10 +839,12 @@ static int convert_tiling_flags_to_modifier(struct amdgpu_framebuffer *afb) u64 render_dcc_offset; /* Enable constant encode on RAVEN2 and later. */ - bool dcc_constant_encode = (adev->asic_type > CHIP_RAVEN || - (adev->asic_type == CHIP_RAVEN && - adev->external_rev_id >= 0x81)) && - adev->ip_versions[GC_HWIP][0] < IP_VERSION(11, 0, 0); + bool dcc_constant_encode = + (adev->asic_type > CHIP_RAVEN || + (adev->asic_type == CHIP_RAVEN && + adev->external_rev_id >= 0x81)) && + amdgpu_ip_version(adev, GC_HWIP, 0) < + IP_VERSION(11, 0, 0); int max_cblock_size = dcc_i64b ? AMD_FMT_MOD_DCC_BLOCK_64B : dcc_i128b ? AMD_FMT_MOD_DCC_BLOCK_128B : @@ -855,7 +881,9 @@ static int convert_tiling_flags_to_modifier(struct amdgpu_framebuffer *afb) if (adev->family >= AMDGPU_FAMILY_NV) { int extra_pipe = 0; - if ((adev->ip_versions[GC_HWIP][0] >= IP_VERSION(10, 3, 0)) && + if ((amdgpu_ip_version(adev, GC_HWIP, + 0) >= + IP_VERSION(10, 3, 0)) && pipes == packers && pipes > 1) extra_pipe = 1; @@ -1136,7 +1164,11 @@ static int amdgpu_display_gem_fb_verify_and_init(struct drm_device *dev, if (ret) goto err; - ret = drm_framebuffer_init(dev, &rfb->base, &amdgpu_fb_funcs); + if (drm_drv_uses_atomic_modeset(dev)) + ret = drm_framebuffer_init(dev, &rfb->base, + &amdgpu_fb_funcs_atomic); + else + ret = drm_framebuffer_init(dev, &rfb->base, &amdgpu_fb_funcs); if (ret) goto err; @@ -1216,8 +1248,10 @@ amdgpu_display_user_framebuffer_create(struct drm_device *dev, obj = drm_gem_object_lookup(file_priv, mode_cmd->handles[0]); if (obj == NULL) { - drm_dbg_kms(dev, "No GEM object associated to handle 0x%08X, " - "can't create framebuffer\n", mode_cmd->handles[0]); + drm_dbg_kms(dev, + "No GEM object associated to handle 0x%08X, can't create framebuffer\n", + mode_cmd->handles[0]); + return ERR_PTR(-ENOENT); } @@ -1410,6 +1444,7 @@ bool amdgpu_display_crtc_scaling_mode_fixup(struct drm_crtc *crtc, } if (amdgpu_crtc->rmx_type != RMX_OFF) { fixed20_12 a, b; + a.full = dfixed_const(src_v); b.full = dfixed_const(dst_v); amdgpu_crtc->vsc.full = dfixed_div(a, b); @@ -1429,7 +1464,7 @@ bool amdgpu_display_crtc_scaling_mode_fixup(struct drm_crtc *crtc, * * \param dev Device to query. * \param pipe Crtc to query. - * \param flags Flags from caller (DRM_CALLED_FROM_VBLIRQ or 0). + * \param flags from caller (DRM_CALLED_FROM_VBLIRQ or 0). * For driver internal use only also supports these flags: * * USE_REAL_VBLANKSTART to use the real start of vblank instead @@ -1504,8 +1539,8 @@ int amdgpu_display_get_crtc_scanoutpos(struct drm_device *dev, /* Called from driver internal vblank counter query code? */ if (flags & GET_DISTANCE_TO_VBLANKSTART) { - /* Caller wants distance from real vbl_start in *hpos */ - *hpos = *vpos - vbl_start; + /* Caller wants distance from real vbl_start in *hpos */ + *hpos = *vpos - vbl_start; } /* Fudge vblank to start a few scanlines earlier to handle the @@ -1527,7 +1562,7 @@ int amdgpu_display_get_crtc_scanoutpos(struct drm_device *dev, /* In vblank? */ if (in_vbl) - ret |= DRM_SCANOUTPOS_IN_VBLANK; + ret |= DRM_SCANOUTPOS_IN_VBLANK; /* Called from driver internal vblank counter query code? */ if (flags & GET_DISTANCE_TO_VBLANKSTART) { @@ -1635,6 +1670,7 @@ int amdgpu_display_suspend_helper(struct amdgpu_device *adev) if (amdgpu_crtc->cursor_bo && !adev->enable_virtual_display) { struct amdgpu_bo *aobj = gem_to_amdgpu_bo(amdgpu_crtc->cursor_bo); + r = amdgpu_bo_reserve(aobj, true); if (r == 0) { amdgpu_bo_unpin(aobj); @@ -1642,9 +1678,9 @@ int amdgpu_display_suspend_helper(struct amdgpu_device *adev) } } - if (fb == NULL || fb->obj[0] == NULL) { + if (!fb || !fb->obj[0]) continue; - } + robj = gem_to_amdgpu_bo(fb->obj[0]); if (!amdgpu_display_robj_is_fb(adev, robj)) { r = amdgpu_bo_reserve(robj, true); @@ -1671,6 +1707,7 @@ int amdgpu_display_resume_helper(struct amdgpu_device *adev) if (amdgpu_crtc->cursor_bo && !adev->enable_virtual_display) { struct amdgpu_bo *aobj = gem_to_amdgpu_bo(amdgpu_crtc->cursor_bo); + r = amdgpu_bo_reserve(aobj, true); if (r == 0) { r = amdgpu_bo_pin(aobj, AMDGPU_GEM_DOMAIN_VRAM); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_dma_buf.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_dma_buf.c index 12210598e5b8..e7e87a3b2601 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_dma_buf.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_dma_buf.c @@ -331,6 +331,7 @@ amdgpu_dma_buf_create_obj(struct drm_device *dev, struct dma_buf *dma_buf) flags |= other->flags & (AMDGPU_GEM_CREATE_CPU_GTT_USWC | AMDGPU_GEM_CREATE_COHERENT | + AMDGPU_GEM_CREATE_EXT_COHERENT | AMDGPU_GEM_CREATE_UNCACHED); } @@ -403,9 +404,12 @@ amdgpu_dma_buf_move_notify(struct dma_buf_attachment *attach) continue; } - r = amdgpu_vm_clear_freed(adev, vm, NULL); + /* Reserve fences for two SDMA page table updates */ + r = dma_resv_reserve_fences(resv, 2); if (!r) - r = amdgpu_vm_handle_moved(adev, vm); + r = amdgpu_vm_clear_freed(adev, vm, NULL); + if (!r) + r = amdgpu_vm_handle_moved(adev, vm, ticket); if (r && r != -EBUSY) DRM_ERROR("Failed to invalidate VM page tables (%d))\n", diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_doorbell.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_doorbell.h index f637574644c0..2675689ef70f 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_doorbell.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_doorbell.h @@ -31,10 +31,15 @@ struct amdgpu_doorbell { /* doorbell mmio */ resource_size_t base; resource_size_t size; - u32 __iomem *ptr; /* Number of doorbells reserved for amdgpu kernel driver */ u32 num_kernel_doorbells; + + /* Kernel doorbells */ + struct amdgpu_bo *kernel_doorbells; + + /* For CPU access of doorbells */ + uint32_t *cpu_addr; }; /* Reserved doorbells for amdgpu (including multimedia). @@ -81,6 +86,7 @@ struct amdgpu_doorbell_index { uint32_t vce_ring6_7; } uvd_vce; }; + uint32_t vpe_ring; uint32_t first_non_cp; uint32_t last_non_cp; uint32_t max_assignment; @@ -90,8 +96,7 @@ struct amdgpu_doorbell_index { uint32_t xcc_doorbell_range; }; -typedef enum _AMDGPU_DOORBELL_ASSIGNMENT -{ +enum AMDGPU_DOORBELL_ASSIGNMENT { AMDGPU_DOORBELL_KIQ = 0x000, AMDGPU_DOORBELL_HIQ = 0x001, AMDGPU_DOORBELL_DIQ = 0x002, @@ -109,10 +114,10 @@ typedef enum _AMDGPU_DOORBELL_ASSIGNMENT AMDGPU_DOORBELL_IH = 0x1E8, AMDGPU_DOORBELL_MAX_ASSIGNMENT = 0x3FF, AMDGPU_DOORBELL_INVALID = 0xFFFF -} AMDGPU_DOORBELL_ASSIGNMENT; +}; + +enum AMDGPU_VEGA20_DOORBELL_ASSIGNMENT { -typedef enum _AMDGPU_VEGA20_DOORBELL_ASSIGNMENT -{ /* Compute + GFX: 0~255 */ AMDGPU_VEGA20_DOORBELL_KIQ = 0x000, AMDGPU_VEGA20_DOORBELL_HIQ = 0x001, @@ -176,10 +181,10 @@ typedef enum _AMDGPU_VEGA20_DOORBELL_ASSIGNMENT AMDGPU_VEGA20_DOORBELL_MAX_ASSIGNMENT = 0x1F7, AMDGPU_VEGA20_DOORBELL_INVALID = 0xFFFF -} AMDGPU_VEGA20_DOORBELL_ASSIGNMENT; +}; + +enum AMDGPU_NAVI10_DOORBELL_ASSIGNMENT { -typedef enum _AMDGPU_NAVI10_DOORBELL_ASSIGNMENT -{ /* Compute + GFX: 0~255 */ AMDGPU_NAVI10_DOORBELL_KIQ = 0x000, AMDGPU_NAVI10_DOORBELL_HIQ = 0x001, @@ -222,18 +227,19 @@ typedef enum _AMDGPU_NAVI10_DOORBELL_ASSIGNMENT AMDGPU_NAVI10_DOORBELL64_VCNc_d = 0x18E, AMDGPU_NAVI10_DOORBELL64_VCNe_f = 0x18F, + AMDGPU_NAVI10_DOORBELL64_VPE = 0x190, + AMDGPU_NAVI10_DOORBELL64_FIRST_NON_CP = AMDGPU_NAVI10_DOORBELL_sDMA_ENGINE0, - AMDGPU_NAVI10_DOORBELL64_LAST_NON_CP = AMDGPU_NAVI10_DOORBELL64_VCNe_f, + AMDGPU_NAVI10_DOORBELL64_LAST_NON_CP = AMDGPU_NAVI10_DOORBELL64_VPE, - AMDGPU_NAVI10_DOORBELL_MAX_ASSIGNMENT = 0x18F, + AMDGPU_NAVI10_DOORBELL_MAX_ASSIGNMENT = AMDGPU_NAVI10_DOORBELL64_VPE, AMDGPU_NAVI10_DOORBELL_INVALID = 0xFFFF -} AMDGPU_NAVI10_DOORBELL_ASSIGNMENT; +}; /* * 64bit doorbell, offset are in QWORD, occupy 2KB doorbell space */ -typedef enum _AMDGPU_DOORBELL64_ASSIGNMENT -{ +enum AMDGPU_DOORBELL64_ASSIGNMENT { /* * All compute related doorbells: kiq, hiq, diq, traditional compute queue, user queue, should locate in * a continues range so that programming CP_MEC_DOORBELL_RANGE_LOWER/UPPER can cover this range. @@ -309,9 +315,10 @@ typedef enum _AMDGPU_DOORBELL64_ASSIGNMENT AMDGPU_DOORBELL64_MAX_ASSIGNMENT = 0xFF, AMDGPU_DOORBELL64_INVALID = 0xFFFF -} AMDGPU_DOORBELL64_ASSIGNMENT; +}; + +enum AMDGPU_DOORBELL_ASSIGNMENT_LAYOUT1 { -typedef enum _AMDGPU_DOORBELL_ASSIGNMENT_LAYOUT1 { /* XCC0: 0x00 ~20, XCC1: 20 ~ 2F ... */ /* KIQ/HIQ/DIQ */ @@ -330,22 +337,33 @@ typedef enum _AMDGPU_DOORBELL_ASSIGNMENT_LAYOUT1 { AMDGPU_DOORBELL_LAYOUT1_sDMA_ENGINE_END = 0x19F, /* IH: 0x1A0 ~ 0x1AF */ AMDGPU_DOORBELL_LAYOUT1_IH = 0x1A0, - /* VCN: 0x1B0 ~ 0x1D4 */ + /* VCN: 0x1B0 ~ 0x1E8 */ AMDGPU_DOORBELL_LAYOUT1_VCN_START = 0x1B0, - AMDGPU_DOORBELL_LAYOUT1_VCN_END = 0x1D4, + AMDGPU_DOORBELL_LAYOUT1_VCN_END = 0x1E8, AMDGPU_DOORBELL_LAYOUT1_FIRST_NON_CP = AMDGPU_DOORBELL_LAYOUT1_sDMA_ENGINE_START, AMDGPU_DOORBELL_LAYOUT1_LAST_NON_CP = AMDGPU_DOORBELL_LAYOUT1_VCN_END, - AMDGPU_DOORBELL_LAYOUT1_MAX_ASSIGNMENT = 0x1D4, + AMDGPU_DOORBELL_LAYOUT1_MAX_ASSIGNMENT = 0x1E8, AMDGPU_DOORBELL_LAYOUT1_INVALID = 0xFFFF -} AMDGPU_DOORBELL_ASSIGNMENT_LAYOUT1; +}; u32 amdgpu_mm_rdoorbell(struct amdgpu_device *adev, u32 index); void amdgpu_mm_wdoorbell(struct amdgpu_device *adev, u32 index, u32 v); u64 amdgpu_mm_rdoorbell64(struct amdgpu_device *adev, u32 index); void amdgpu_mm_wdoorbell64(struct amdgpu_device *adev, u32 index, u64 v); +/* + * GPU doorbell aperture helpers function. + */ +int amdgpu_doorbell_init(struct amdgpu_device *adev); +void amdgpu_doorbell_fini(struct amdgpu_device *adev); +int amdgpu_doorbell_create_kernel_doorbells(struct amdgpu_device *adev); +uint32_t amdgpu_doorbell_index_on_bar(struct amdgpu_device *adev, + struct amdgpu_bo *db_bo, + uint32_t doorbell_index, + uint32_t db_size); + #define RDOORBELL32(index) amdgpu_mm_rdoorbell(adev, (index)) #define WDOORBELL32(index, v) amdgpu_mm_wdoorbell(adev, (index), (v)) #define RDOORBELL64(index) amdgpu_mm_rdoorbell64(adev, (index)) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_doorbell_mgr.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_doorbell_mgr.c new file mode 100644 index 000000000000..3f3662e8b871 --- /dev/null +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_doorbell_mgr.c @@ -0,0 +1,239 @@ +// SPDX-License-Identifier: MIT +/* + * Copyright 2022 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + */ + +#include "amdgpu.h" + +/** + * amdgpu_mm_rdoorbell - read a doorbell dword + * + * @adev: amdgpu_device pointer + * @index: doorbell index + * + * Returns the value in the doorbell aperture at the + * requested doorbell index (CIK). + */ +u32 amdgpu_mm_rdoorbell(struct amdgpu_device *adev, u32 index) +{ + if (amdgpu_device_skip_hw_access(adev)) + return 0; + + if (index < adev->doorbell.num_kernel_doorbells) + return readl(adev->doorbell.cpu_addr + index); + + DRM_ERROR("reading beyond doorbell aperture: 0x%08x!\n", index); + return 0; +} + +/** + * amdgpu_mm_wdoorbell - write a doorbell dword + * + * @adev: amdgpu_device pointer + * @index: doorbell index + * @v: value to write + * + * Writes @v to the doorbell aperture at the + * requested doorbell index (CIK). + */ +void amdgpu_mm_wdoorbell(struct amdgpu_device *adev, u32 index, u32 v) +{ + if (amdgpu_device_skip_hw_access(adev)) + return; + + if (index < adev->doorbell.num_kernel_doorbells) + writel(v, adev->doorbell.cpu_addr + index); + else + DRM_ERROR("writing beyond doorbell aperture: 0x%08x!\n", index); +} + +/** + * amdgpu_mm_rdoorbell64 - read a doorbell Qword + * + * @adev: amdgpu_device pointer + * @index: doorbell index + * + * Returns the value in the doorbell aperture at the + * requested doorbell index (VEGA10+). + */ +u64 amdgpu_mm_rdoorbell64(struct amdgpu_device *adev, u32 index) +{ + if (amdgpu_device_skip_hw_access(adev)) + return 0; + + if (index < adev->doorbell.num_kernel_doorbells) + return atomic64_read((atomic64_t *)(adev->doorbell.cpu_addr + index)); + + DRM_ERROR("reading beyond doorbell aperture: 0x%08x!\n", index); + return 0; +} + +/** + * amdgpu_mm_wdoorbell64 - write a doorbell Qword + * + * @adev: amdgpu_device pointer + * @index: doorbell index + * @v: value to write + * + * Writes @v to the doorbell aperture at the + * requested doorbell index (VEGA10+). + */ +void amdgpu_mm_wdoorbell64(struct amdgpu_device *adev, u32 index, u64 v) +{ + if (amdgpu_device_skip_hw_access(adev)) + return; + + if (index < adev->doorbell.num_kernel_doorbells) + atomic64_set((atomic64_t *)(adev->doorbell.cpu_addr + index), v); + else + DRM_ERROR("writing beyond doorbell aperture: 0x%08x!\n", index); +} + +/** + * amdgpu_doorbell_index_on_bar - Find doorbell's absolute offset in BAR + * + * @adev: amdgpu_device pointer + * @db_bo: doorbell object's bo + * @doorbell_index: doorbell relative index in this doorbell object + * @db_size: doorbell size is in byte + * + * returns doorbell's absolute index in BAR + */ +uint32_t amdgpu_doorbell_index_on_bar(struct amdgpu_device *adev, + struct amdgpu_bo *db_bo, + uint32_t doorbell_index, + uint32_t db_size) +{ + int db_bo_offset; + + db_bo_offset = amdgpu_bo_gpu_offset_no_check(db_bo); + + /* doorbell index is 32 bit but doorbell's size can be 32 bit + * or 64 bit, so *db_size(in byte)/4 for alignment. + */ + return db_bo_offset / sizeof(u32) + doorbell_index * + DIV_ROUND_UP(db_size, 4); +} + +/** + * amdgpu_doorbell_create_kernel_doorbells - Create kernel doorbells for graphics + * + * @adev: amdgpu_device pointer + * + * Creates doorbells for graphics driver usages. + * returns 0 on success, error otherwise. + */ +int amdgpu_doorbell_create_kernel_doorbells(struct amdgpu_device *adev) +{ + int r; + int size; + + /* SI HW does not have doorbells, skip allocation */ + if (adev->doorbell.num_kernel_doorbells == 0) + return 0; + + /* Reserve first num_kernel_doorbells (page-aligned) for kernel ops */ + size = ALIGN(adev->doorbell.num_kernel_doorbells * sizeof(u32), PAGE_SIZE); + + /* Allocate an extra page for MES kernel usages (ring test) */ + adev->mes.db_start_dw_offset = size / sizeof(u32); + size += PAGE_SIZE; + + r = amdgpu_bo_create_kernel(adev, + size, + PAGE_SIZE, + AMDGPU_GEM_DOMAIN_DOORBELL, + &adev->doorbell.kernel_doorbells, + NULL, + (void **)&adev->doorbell.cpu_addr); + if (r) { + DRM_ERROR("Failed to allocate kernel doorbells, err=%d\n", r); + return r; + } + + adev->doorbell.num_kernel_doorbells = size / sizeof(u32); + return 0; +} + +/* + * GPU doorbell aperture helpers function. + */ +/** + * amdgpu_doorbell_init - Init doorbell driver information. + * + * @adev: amdgpu_device pointer + * + * Init doorbell driver information (CIK) + * Returns 0 on success, error on failure. + */ +int amdgpu_doorbell_init(struct amdgpu_device *adev) +{ + + /* No doorbell on SI hardware generation */ + if (adev->asic_type < CHIP_BONAIRE) { + adev->doorbell.base = 0; + adev->doorbell.size = 0; + adev->doorbell.num_kernel_doorbells = 0; + return 0; + } + + if (pci_resource_flags(adev->pdev, 2) & IORESOURCE_UNSET) + return -EINVAL; + + amdgpu_asic_init_doorbell_index(adev); + + /* doorbell bar mapping */ + adev->doorbell.base = pci_resource_start(adev->pdev, 2); + adev->doorbell.size = pci_resource_len(adev->pdev, 2); + + adev->doorbell.num_kernel_doorbells = + min_t(u32, adev->doorbell.size / sizeof(u32), + adev->doorbell_index.max_assignment + 1); + if (adev->doorbell.num_kernel_doorbells == 0) + return -EINVAL; + + /* + * For Vega, reserve and map two pages on doorbell BAR since SDMA + * paging queue doorbell use the second page. The + * AMDGPU_DOORBELL64_MAX_ASSIGNMENT definition assumes all the + * doorbells are in the first page. So with paging queue enabled, + * the max num_kernel_doorbells should + 1 page (0x400 in dword) + */ + if (adev->asic_type >= CHIP_VEGA10) + adev->doorbell.num_kernel_doorbells += 0x400; + + return 0; +} + +/** + * amdgpu_doorbell_fini - Tear down doorbell driver information. + * + * @adev: amdgpu_device pointer + * + * Tear down doorbell driver information (CIK) + */ +void amdgpu_doorbell_fini(struct amdgpu_device *adev) +{ + amdgpu_bo_free_kernel(&adev->doorbell.kernel_doorbells, + NULL, + (void **)&adev->doorbell.cpu_addr); +} diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c index 0593ef8fe0a6..8b33b130ea36 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c @@ -26,30 +26,30 @@ #include <drm/drm_drv.h> #include <drm/drm_fbdev_generic.h> #include <drm/drm_gem.h> -#include <drm/drm_vblank.h> #include <drm/drm_managed.h> -#include "amdgpu_drv.h" - #include <drm/drm_pciids.h> -#include <linux/module.h> -#include <linux/pm_runtime.h> -#include <linux/vga_switcheroo.h> #include <drm/drm_probe_helper.h> -#include <linux/mmu_notifier.h> -#include <linux/suspend.h> +#include <drm/drm_vblank.h> + #include <linux/cc_platform.h> #include <linux/dynamic_debug.h> +#include <linux/module.h> +#include <linux/mmu_notifier.h> +#include <linux/pm_runtime.h> +#include <linux/suspend.h> +#include <linux/vga_switcheroo.h> #include "amdgpu.h" -#include "amdgpu_irq.h" +#include "amdgpu_amdkfd.h" #include "amdgpu_dma_buf.h" -#include "amdgpu_sched.h" +#include "amdgpu_drv.h" #include "amdgpu_fdinfo.h" -#include "amdgpu_amdkfd.h" - +#include "amdgpu_irq.h" +#include "amdgpu_psp.h" #include "amdgpu_ras.h" -#include "amdgpu_xgmi.h" #include "amdgpu_reset.h" +#include "amdgpu_sched.h" +#include "amdgpu_xgmi.h" #include "../amdxcp/amdgpu_xcp_drv.h" /* @@ -113,11 +113,22 @@ * gl1c_cache_size, gl2c_cache_size, mall_size, enabled_rb_pipes_mask_hi * 3.53.0 - Support for GFX11 CP GFX shadowing * 3.54.0 - Add AMDGPU_CTX_QUERY2_FLAGS_RESET_IN_PROGRESS support + * - 3.55.0 - Add AMDGPU_INFO_GPUVM_FAULT query + * - 3.56.0 - Update IB start address and size alignment for decode and encode */ #define KMS_DRIVER_MAJOR 3 -#define KMS_DRIVER_MINOR 54 +#define KMS_DRIVER_MINOR 56 #define KMS_DRIVER_PATCHLEVEL 0 +/* + * amdgpu.debug module options. Are all disabled by default + */ +enum AMDGPU_DEBUG_MASK { + AMDGPU_DEBUG_VM = BIT(0), + AMDGPU_DEBUG_LARGEBAR = BIT(1), + AMDGPU_DEBUG_DISABLE_GPU_SOFT_RECOVERY = BIT(2), +}; + unsigned int amdgpu_vram_limit = UINT_MAX; int amdgpu_vis_vram_limit; int amdgpu_gart_size = -1; /* auto */ @@ -140,7 +151,6 @@ int amdgpu_vm_size = -1; int amdgpu_vm_fragment_size = -1; int amdgpu_vm_block_size = -1; int amdgpu_vm_fault_stop; -int amdgpu_vm_debug; int amdgpu_vm_update_mode = -1; int amdgpu_exp_hw_support; int amdgpu_dc = -1; @@ -187,7 +197,6 @@ int amdgpu_mes_kiq; int amdgpu_noretry = -1; int amdgpu_force_asic_type = -1; int amdgpu_tmz = -1; /* auto */ -uint amdgpu_freesync_vid_mode; int amdgpu_reset_method = -1; /* auto */ int amdgpu_num_kcq = -1; int amdgpu_smartshift_bias; @@ -195,6 +204,10 @@ int amdgpu_use_xgmi_p2p = 1; int amdgpu_vcnfw_log; int amdgpu_sg_display = -1; /* auto */ int amdgpu_user_partt_mode = AMDGPU_AUTO_COMPUTE_PARTITION_MODE; +int amdgpu_umsch_mm; +int amdgpu_seamless = -1; /* auto */ +uint amdgpu_debug_mask; +int amdgpu_agp = -1; /* auto */ static void amdgpu_drv_delayed_reset_work_handler(struct work_struct *work); @@ -313,9 +326,7 @@ module_param_named(msi, amdgpu_msi, int, 0444); * jobs is 10000. The timeout for compute is 60000. */ MODULE_PARM_DESC(lockup_timeout, "GPU lockup timeout in ms (default: for bare metal 10000 for non-compute jobs and 60000 for compute jobs; " - "for passthrough or sriov, 10000 for all jobs." - " 0: keep default value. negative: infinity timeout), " - "format: for bare metal [Non-Compute] or [GFX,Compute,SDMA,Video]; " + "for passthrough or sriov, 10000 for all jobs. 0: keep default value. negative: infinity timeout), format: for bare metal [Non-Compute] or [GFX,Compute,SDMA,Video]; " "for passthrough or sriov [all jobs] or [GFX,Compute,SDMA,Video]."); module_param_string(lockup_timeout, amdgpu_lockup_timeout, sizeof(amdgpu_lockup_timeout), 0444); @@ -350,8 +361,9 @@ module_param_named(aspm, amdgpu_aspm, int, 0444); * Override for runtime power management control for dGPUs. The amdgpu driver can dynamically power down * the dGPUs when they are idle if supported. The default is -1 (auto enable). * Setting the value to 0 disables this functionality. + * Setting the value to -2 is auto enabled with power down when displays are attached. */ -MODULE_PARM_DESC(runpm, "PX runtime pm (2 = force enable with BAMACO, 1 = force enable with BACO, 0 = disable, -1 = auto)"); +MODULE_PARM_DESC(runpm, "PX runtime pm (2 = force enable with BAMACO, 1 = force enable with BACO, 0 = disable, -1 = auto, -2 = autowith displays)"); module_param_named(runpm, amdgpu_runtime_pm, int, 0444); /** @@ -408,13 +420,6 @@ MODULE_PARM_DESC(vm_fault_stop, "Stop on VM fault (0 = never (default), 1 = prin module_param_named(vm_fault_stop, amdgpu_vm_fault_stop, int, 0444); /** - * DOC: vm_debug (int) - * Debug VM handling (0 = disabled, 1 = enabled). The default is 0 (Disabled). - */ -MODULE_PARM_DESC(vm_debug, "Debug VM handling (0 = disabled (default), 1 = enabled)"); -module_param_named(vm_debug, amdgpu_vm_debug, int, 0644); - -/** * DOC: vm_update_mode (int) * Override VM update mode. VM updated by using CPU (0 = never, 1 = Graphics only, 2 = Compute only, 3 = Both). The default * is -1 (Only in large BAR(LB) systems Compute VM tables will be updated by CPU, otherwise 0, never). @@ -584,7 +589,7 @@ module_param_named(timeout_period, amdgpu_watchdog_timer.period, uint, 0644); */ #ifdef CONFIG_DRM_AMDGPU_SI -#if defined(CONFIG_DRM_RADEON) || defined(CONFIG_DRM_RADEON_MODULE) +#if IS_ENABLED(CONFIG_DRM_RADEON) || IS_ENABLED(CONFIG_DRM_RADEON_MODULE) int amdgpu_si_support = 0; MODULE_PARM_DESC(si_support, "SI support (1 = enabled, 0 = disabled (default))"); #else @@ -603,7 +608,7 @@ module_param_named(si_support, amdgpu_si_support, int, 0444); */ #ifdef CONFIG_DRM_AMDGPU_CIK -#if defined(CONFIG_DRM_RADEON) || defined(CONFIG_DRM_RADEON_MODULE) +#if IS_ENABLED(CONFIG_DRM_RADEON) || IS_ENABLED(CONFIG_DRM_RADEON_MODULE) int amdgpu_cik_support = 0; MODULE_PARM_DESC(cik_support, "CIK support (1 = enabled, 0 = disabled (default))"); #else @@ -620,8 +625,7 @@ module_param_named(cik_support, amdgpu_cik_support, int, 0444); * E.g. 0x1 = 256Mbyte, 0x2 = 512Mbyte, 0x4 = 1 Gbyte, 0x8 = 2GByte. The default is 0 (disabled). */ MODULE_PARM_DESC(smu_memory_pool_size, - "reserve gtt for smu debug usage, 0 = disable," - "0x1 = 256Mbyte, 0x2 = 512Mbyte, 0x4 = 1 Gbyte, 0x8 = 2GByte"); + "reserve gtt for smu debug usage, 0 = disable,0x1 = 256Mbyte, 0x2 = 512Mbyte, 0x4 = 1 Gbyte, 0x8 = 2GByte"); module_param_named(smu_memory_pool_size, amdgpu_smu_memory_pool_size, uint, 0444); /** @@ -747,32 +751,6 @@ MODULE_PARM_DESC(send_sigterm, "Send sigterm to HSA process on unhandled exception (0 = disable, 1 = enable)"); /** - * DOC: debug_largebar (int) - * Set debug_largebar as 1 to enable simulating large-bar capability on non-large bar - * system. This limits the VRAM size reported to ROCm applications to the visible - * size, usually 256MB. - * Default value is 0, diabled. - */ -int debug_largebar; -module_param(debug_largebar, int, 0444); -MODULE_PARM_DESC(debug_largebar, - "Debug large-bar flag used to simulate large-bar capability on non-large bar machine (0 = disable, 1 = enable)"); - -/** - * DOC: ignore_crat (int) - * Ignore CRAT table during KFD initialization. By default, KFD uses the ACPI CRAT - * table to get information about AMD APUs. This option can serve as a workaround on - * systems with a broken CRAT table. - * - * Default is auto (according to asic type, iommu_v2, and crat table, to decide - * whether use CRAT) - */ -int ignore_crat; -module_param(ignore_crat, int, 0444); -MODULE_PARM_DESC(ignore_crat, - "Ignore CRAT table during KFD initialization (0 = auto (default), 1 = ignore CRAT)"); - -/** * DOC: halt_if_hws_hang (int) * Halt if HWS hang is detected. Default value, 0, disables the halt on hang. * Setting 1 enables halt on hang. @@ -791,9 +769,9 @@ module_param(hws_gws_support, bool, 0444); MODULE_PARM_DESC(hws_gws_support, "Assume MEC2 FW supports GWS barriers (false = rely on FW version check (Default), true = force supported)"); /** - * DOC: queue_preemption_timeout_ms (int) - * queue preemption timeout in ms (1 = Minimum, 9000 = default) - */ + * DOC: queue_preemption_timeout_ms (int) + * queue preemption timeout in ms (1 = Minimum, 9000 = default) + */ int queue_preemption_timeout_ms = 9000; module_param(queue_preemption_timeout_ms, int, 0644); MODULE_PARM_DESC(queue_preemption_timeout_ms, "queue preemption timeout in ms (1 = Minimum, 9000 = default)"); @@ -889,32 +867,6 @@ MODULE_PARM_DESC(tmz, "Enable TMZ feature (-1 = auto (default), 0 = off, 1 = on) module_param_named(tmz, amdgpu_tmz, int, 0444); /** - * DOC: freesync_video (uint) - * Enable the optimization to adjust front porch timing to achieve seamless - * mode change experience when setting a freesync supported mode for which full - * modeset is not needed. - * - * The Display Core will add a set of modes derived from the base FreeSync - * video mode into the corresponding connector's mode list based on commonly - * used refresh rates and VRR range of the connected display, when users enable - * this feature. From the userspace perspective, they can see a seamless mode - * change experience when the change between different refresh rates under the - * same resolution. Additionally, userspace applications such as Video playback - * can read this modeset list and change the refresh rate based on the video - * frame rate. Finally, the userspace can also derive an appropriate mode for a - * particular refresh rate based on the FreeSync Mode and add it to the - * connector's mode list. - * - * Note: This is an experimental feature. - * - * The default value: 0 (off). - */ -MODULE_PARM_DESC( - freesync_video, - "Enable freesync modesetting optimization feature (0 = off (default), 1 = on)"); -module_param_named(freesync_video, amdgpu_freesync_vid_mode, uint, 0444); - -/** * DOC: reset_method (int) * GPU reset method (-1 = auto (default), 0 = legacy, 1 = mode0, 2 = mode1, 3 = mode2, 4 = baco) */ @@ -951,6 +903,15 @@ MODULE_PARM_DESC(sg_display, "S/G Display (-1 = auto (default), 0 = disable)"); module_param_named(sg_display, amdgpu_sg_display, int, 0444); /** + * DOC: umsch_mm (int) + * Enable Multi Media User Mode Scheduler. This is a HW scheduling engine for VCN and VPE. + * (0 = disabled (default), 1 = enabled) + */ +MODULE_PARM_DESC(umsch_mm, + "Enable Multi Media User Mode Scheduler (0 = disabled (default), 1 = enabled)"); +module_param_named(umsch_mm, amdgpu_umsch_mm, int, 0444); + +/** * DOC: smu_pptable_id (int) * Used to override pptable id. id = 0 use VBIOS pptable. * id > 0 use the soft pptable with specicfied id. @@ -981,6 +942,35 @@ module_param_named(user_partt_mode, amdgpu_user_partt_mode, uint, 0444); module_param(enforce_isolation, bool, 0444); MODULE_PARM_DESC(enforce_isolation, "enforce process isolation between graphics and compute . enforce_isolation = on"); +/** + * DOC: seamless (int) + * Seamless boot will keep the image on the screen during the boot process. + */ +MODULE_PARM_DESC(seamless, "Seamless boot (-1 = auto (default), 0 = disable, 1 = enable)"); +module_param_named(seamless, amdgpu_seamless, int, 0444); + +/** + * DOC: debug_mask (uint) + * Debug options for amdgpu, work as a binary mask with the following options: + * + * - 0x1: Debug VM handling + * - 0x2: Enable simulating large-bar capability on non-large bar system. This + * limits the VRAM size reported to ROCm applications to the visible + * size, usually 256MB. + * - 0x4: Disable GPU soft recovery, always do a full reset + */ +MODULE_PARM_DESC(debug_mask, "debug options for amdgpu, disabled by default"); +module_param_named(debug_mask, amdgpu_debug_mask, uint, 0444); + +/** + * DOC: agp (int) + * Enable the AGP aperture. This provides an aperture in the GPU's internal + * address space for direct access to system memory. Note that these accesses + * are non-snooped, so they are only used for access to uncached memory. + */ +MODULE_PARM_DESC(agp, "AGP (-1 = auto (default), 0 = disable, 1 = enable)"); +module_param_named(agp, amdgpu_agp, int, 0444); + /* These devices are not supported by amdgpu. * They are supported by the mach64, r128, radeon drivers */ @@ -2061,6 +2051,14 @@ static const struct pci_device_id pciidlist[] = { MODULE_DEVICE_TABLE(pci, pciidlist); +static const struct amdgpu_asic_type_quirk asic_type_quirks[] = { + /* differentiate between P10 and P11 asics with the same DID */ + {0x67FF, 0xE3, CHIP_POLARIS10}, + {0x67FF, 0xE7, CHIP_POLARIS10}, + {0x67FF, 0xF3, CHIP_POLARIS10}, + {0x67FF, 0xF7, CHIP_POLARIS10}, +}; + static const struct drm_driver amdgpu_kms_driver; static void amdgpu_get_secondary_funcs(struct amdgpu_device *adev) @@ -2085,6 +2083,40 @@ static void amdgpu_get_secondary_funcs(struct amdgpu_device *adev) } } +static void amdgpu_init_debug_options(struct amdgpu_device *adev) +{ + if (amdgpu_debug_mask & AMDGPU_DEBUG_VM) { + pr_info("debug: VM handling debug enabled\n"); + adev->debug_vm = true; + } + + if (amdgpu_debug_mask & AMDGPU_DEBUG_LARGEBAR) { + pr_info("debug: enabled simulating large-bar capability on non-large bar system\n"); + adev->debug_largebar = true; + } + + if (amdgpu_debug_mask & AMDGPU_DEBUG_DISABLE_GPU_SOFT_RECOVERY) { + pr_info("debug: soft reset for GPU recovery disabled\n"); + adev->debug_disable_soft_recovery = true; + } +} + +static unsigned long amdgpu_fix_asic_type(struct pci_dev *pdev, unsigned long flags) +{ + int i; + + for (i = 0; i < ARRAY_SIZE(asic_type_quirks); i++) { + if (pdev->device == asic_type_quirks[i].device && + pdev->revision == asic_type_quirks[i].revision) { + flags &= ~AMD_ASIC_MASK; + flags |= asic_type_quirks[i].type; + break; + } + } + + return flags; +} + static int amdgpu_pci_probe(struct pci_dev *pdev, const struct pci_device_id *ent) { @@ -2112,15 +2144,8 @@ static int amdgpu_pci_probe(struct pci_dev *pdev, "See modparam exp_hw_support\n"); return -ENODEV; } - /* differentiate between P10 and P11 asics with the same DID */ - if (pdev->device == 0x67FF && - (pdev->revision == 0xE3 || - pdev->revision == 0xE7 || - pdev->revision == 0xF3 || - pdev->revision == 0xF7)) { - flags &= ~AMD_ASIC_MASK; - flags |= CHIP_POLARIS10; - } + + flags = amdgpu_fix_asic_type(pdev, flags); /* Due to hardware bugs, S/G Display on raven requires a 1:1 IOMMU mapping, * however, SME requires an indirect IOMMU mapping because the encryption @@ -2238,6 +2263,8 @@ retry_init: pm_runtime_mark_last_busy(ddev->dev); pm_runtime_put_autosuspend(ddev->dev); + pci_wake_from_d3(pdev, TRUE); + /* * For runpm implemented via BACO, PMFW will handle the * timing for BACO in and out: @@ -2263,6 +2290,8 @@ retry_init: amdgpu_get_secondary_funcs(adev); } + amdgpu_init_debug_options(adev); + return 0; err_pci: @@ -2284,7 +2313,7 @@ amdgpu_pci_remove(struct pci_dev *pdev) pm_runtime_forbid(dev->dev); } - if (adev->ip_versions[MP1_HWIP][0] == IP_VERSION(13, 0, 2) && + if (amdgpu_ip_version(adev, MP1_HWIP, 0) == IP_VERSION(13, 0, 2) && !amdgpu_sriov_vf(adev)) { bool need_to_reset_gpu = false; @@ -2417,7 +2446,6 @@ static void amdgpu_drv_delayed_reset_work_handler(struct work_struct *work) amdgpu_amdkfd_device_init(adev); amdgpu_ttm_set_buffer_funcs_status(adev, true); } - return; } static int amdgpu_pmops_prepare(struct device *dev) @@ -2428,8 +2456,9 @@ static int amdgpu_pmops_prepare(struct device *dev) /* Return a positive number here so * DPM_FLAG_SMART_SUSPEND works properly */ - if (amdgpu_device_supports_boco(drm_dev)) - return pm_runtime_suspended(dev); + if (amdgpu_device_supports_boco(drm_dev) && + pm_runtime_suspended(dev)) + return 1; /* if we will not support s3 or s2i for the device * then skip suspend @@ -2438,7 +2467,7 @@ static int amdgpu_pmops_prepare(struct device *dev) !amdgpu_acpi_is_s3_active(adev)) return 1; - return 0; + return amdgpu_device_prepare(drm_dev); } static void amdgpu_pmops_complete(struct device *dev) @@ -2541,24 +2570,26 @@ static int amdgpu_runtime_idle_check_display(struct device *dev) struct drm_connector_list_iter iter; int ret = 0; - /* XXX: Return busy if any displays are connected to avoid - * possible display wakeups after runtime resume due to - * hotplug events in case any displays were connected while - * the GPU was in suspend. Remove this once that is fixed. - */ - mutex_lock(&drm_dev->mode_config.mutex); - drm_connector_list_iter_begin(drm_dev, &iter); - drm_for_each_connector_iter(list_connector, &iter) { - if (list_connector->status == connector_status_connected) { - ret = -EBUSY; - break; + if (amdgpu_runtime_pm != -2) { + /* XXX: Return busy if any displays are connected to avoid + * possible display wakeups after runtime resume due to + * hotplug events in case any displays were connected while + * the GPU was in suspend. Remove this once that is fixed. + */ + mutex_lock(&drm_dev->mode_config.mutex); + drm_connector_list_iter_begin(drm_dev, &iter); + drm_for_each_connector_iter(list_connector, &iter) { + if (list_connector->status == connector_status_connected) { + ret = -EBUSY; + break; + } } - } - drm_connector_list_iter_end(&iter); - mutex_unlock(&drm_dev->mode_config.mutex); + drm_connector_list_iter_end(&iter); + mutex_unlock(&drm_dev->mode_config.mutex); - if (ret) - return ret; + if (ret) + return ret; + } if (adev->dc_enabled) { struct drm_crtc *crtc; @@ -2614,6 +2645,7 @@ static int amdgpu_pmops_runtime_suspend(struct device *dev) /* wait for all rings to drain before suspending */ for (i = 0; i < AMDGPU_MAX_RINGS; i++) { struct amdgpu_ring *ring = adev->rings[i]; + if (ring && ring->sched.ready) { ret = amdgpu_fence_wait_empty(ring); if (ret) @@ -2635,6 +2667,9 @@ static int amdgpu_pmops_runtime_suspend(struct device *dev) if (amdgpu_device_supports_boco(drm_dev)) adev->mp1_state = PP_MP1_STATE_UNLOAD; + ret = amdgpu_device_prepare(drm_dev); + if (ret) + return ret; ret = amdgpu_device_suspend(drm_dev, false); if (ret) { adev->in_runpm = false; @@ -2738,6 +2773,7 @@ long amdgpu_drm_ioctl(struct file *filp, struct drm_file *file_priv = filp->private_data; struct drm_device *dev; long ret; + dev = file_priv->minor->dev; ret = pm_runtime_get_sync(dev->dev); if (ret < 0) @@ -2802,9 +2838,8 @@ int amdgpu_file_to_fpriv(struct file *filp, struct amdgpu_fpriv **fpriv) if (!filp) return -EINVAL; - if (filp->f_op != &amdgpu_driver_kms_fops) { + if (filp->f_op != &amdgpu_driver_kms_fops) return -EINVAL; - } file = filp->private_data; *fpriv = file->driver_priv; @@ -2850,10 +2885,7 @@ static const struct drm_driver amdgpu_kms_driver = { .show_fdinfo = amdgpu_show_fdinfo, #endif - .prime_handle_to_fd = drm_gem_prime_handle_to_fd, - .prime_fd_to_handle = drm_gem_prime_fd_to_handle, .gem_prime_import = amdgpu_gem_prime_import, - .gem_prime_mmap = drm_gem_prime_mmap, .name = DRIVER_NAME, .desc = DRIVER_DESC, @@ -2877,10 +2909,7 @@ const struct drm_driver amdgpu_partition_driver = { .fops = &amdgpu_driver_kms_fops, .release = &amdgpu_driver_release_kms, - .prime_handle_to_fd = drm_gem_prime_handle_to_fd, - .prime_fd_to_handle = drm_gem_prime_fd_to_handle, .gem_prime_import = amdgpu_gem_prime_import, - .gem_prime_mmap = drm_gem_prime_mmap, .name = DRIVER_NAME, .desc = DRIVER_DESC, @@ -2897,16 +2926,13 @@ static struct pci_error_handlers amdgpu_pci_err_handler = { .resume = amdgpu_pci_resume, }; -extern const struct attribute_group amdgpu_vram_mgr_attr_group; -extern const struct attribute_group amdgpu_gtt_mgr_attr_group; - static const struct attribute_group *amdgpu_sysfs_groups[] = { &amdgpu_vram_mgr_attr_group, &amdgpu_gtt_mgr_attr_group, + &amdgpu_flash_attr_group, NULL, }; - static struct pci_driver amdgpu_kms_pci_driver = { .name = DRIVER_NAME, .id_table = pciidlist, diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_eeprom.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_eeprom.c index 7d2a908438e9..e71768661ca8 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_eeprom.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_eeprom.c @@ -183,6 +183,8 @@ static int amdgpu_eeprom_xfer(struct i2c_adapter *i2c_adap, u32 eeprom_addr, { const struct i2c_adapter_quirks *quirks = i2c_adap->quirks; u16 limit; + u16 ps; /* Partial size */ + int res = 0, r; if (!quirks) limit = 0; @@ -200,28 +202,25 @@ static int amdgpu_eeprom_xfer(struct i2c_adapter *i2c_adap, u32 eeprom_addr, eeprom_addr, buf_size, read ? "read" : "write", EEPROM_OFFSET_SIZE); return -EINVAL; - } else { - u16 ps; /* Partial size */ - int res = 0, r; - - /* The "limit" includes all data bytes sent/received, - * which would include the EEPROM_OFFSET_SIZE bytes. - * Account for them here. - */ - limit -= EEPROM_OFFSET_SIZE; - for ( ; buf_size > 0; - buf_size -= ps, eeprom_addr += ps, eeprom_buf += ps) { - ps = min(limit, buf_size); - - r = __amdgpu_eeprom_xfer(i2c_adap, eeprom_addr, - eeprom_buf, ps, read); - if (r < 0) - return r; - res += r; - } + } - return res; + /* The "limit" includes all data bytes sent/received, + * which would include the EEPROM_OFFSET_SIZE bytes. + * Account for them here. + */ + limit -= EEPROM_OFFSET_SIZE; + for ( ; buf_size > 0; + buf_size -= ps, eeprom_addr += ps, eeprom_buf += ps) { + ps = min(limit, buf_size); + + r = __amdgpu_eeprom_xfer(i2c_adap, eeprom_addr, + eeprom_buf, ps, read); + if (r < 0) + return r; + res += r; } + + return res; } int amdgpu_eeprom_read(struct i2c_adapter *i2c_adap, diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_fdinfo.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_fdinfo.c index 13d7413d4ca3..5706b282a0c7 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_fdinfo.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_fdinfo.c @@ -51,25 +51,20 @@ static const char *amdgpu_ip_name[AMDGPU_HW_IP_NUM] = { [AMDGPU_HW_IP_VCN_DEC] = "dec", [AMDGPU_HW_IP_VCN_ENC] = "enc", [AMDGPU_HW_IP_VCN_JPEG] = "jpeg", + [AMDGPU_HW_IP_VPE] = "vpe", }; void amdgpu_show_fdinfo(struct drm_printer *p, struct drm_file *file) { - struct amdgpu_device *adev = drm_to_adev(file->minor->dev); struct amdgpu_fpriv *fpriv = file->driver_priv; struct amdgpu_vm *vm = &fpriv->vm; struct amdgpu_mem_stats stats; ktime_t usage[AMDGPU_HW_IP_NUM]; - uint32_t bus, dev, fn, domain; unsigned int hw_ip; int ret; memset(&stats, 0, sizeof(stats)); - bus = adev->pdev->bus->number; - domain = pci_domain_nr(adev->pdev->bus); - dev = PCI_SLOT(adev->pdev->devfn); - fn = PCI_FUNC(adev->pdev->devfn); ret = amdgpu_bo_reserve(vm->root.bo, false); if (ret) @@ -87,9 +82,6 @@ void amdgpu_show_fdinfo(struct drm_printer *p, struct drm_file *file) */ drm_printf(p, "pasid:\t%u\n", fpriv->vm.pasid); - drm_printf(p, "drm-driver:\t%s\n", file->minor->dev->driver->name); - drm_printf(p, "drm-pdev:\t%04x:%02x:%02x.%d\n", domain, bus, dev, fn); - drm_printf(p, "drm-client-id:\t%Lu\n", vm->immediate.fence_context); drm_printf(p, "drm-memory-vram:\t%llu KiB\n", stats.vram/1024UL); drm_printf(p, "drm-memory-gtt: \t%llu KiB\n", stats.gtt/1024UL); drm_printf(p, "drm-memory-cpu: \t%llu KiB\n", stats.cpu/1024UL); @@ -109,7 +101,7 @@ void amdgpu_show_fdinfo(struct drm_printer *p, struct drm_file *file) if (!usage[hw_ip]) continue; - drm_printf(p, "drm-engine-%s:\t%Ld ns\n", amdgpu_ip_name[hw_ip], + drm_printf(p, "drm-engine-%s:\t%lld ns\n", amdgpu_ip_name[hw_ip], ktime_to_ns(usage[hw_ip])); } } diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c index 7537f5aa76f0..dc230212746a 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c @@ -570,7 +570,8 @@ static bool amdgpu_fence_need_ring_interrupt_restore(struct amdgpu_ring *ring) switch (ring->funcs->type) { case AMDGPU_RING_TYPE_SDMA: /* SDMA 5.x+ is part of GFX power domain so it's covered by GFXOFF */ - if (adev->ip_versions[SDMA0_HWIP][0] >= IP_VERSION(5, 0, 0)) + if (amdgpu_ip_version(adev, SDMA0_HWIP, 0) >= + IP_VERSION(5, 0, 0)) is_gfx_power_domain = true; break; case AMDGPU_RING_TYPE_GFX: diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_fru_eeprom.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_fru_eeprom.c index 4620c4712ce3..a08c148b13f9 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_fru_eeprom.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_fru_eeprom.c @@ -42,8 +42,9 @@ static bool is_fru_eeprom_supported(struct amdgpu_device *adev, u32 *fru_addr) /* The i2c access is blocked on VF * TODO: Need other way to get the info + * Also, FRU not valid for APU devices. */ - if (amdgpu_sriov_vf(adev)) + if (amdgpu_sriov_vf(adev) || (adev->flags & AMD_IS_APU)) return false; /* The default I2C EEPROM address of the FRU. @@ -57,40 +58,51 @@ static bool is_fru_eeprom_supported(struct amdgpu_device *adev, u32 *fru_addr) * for ease/speed/readability. For now, 2 string comparisons are * reasonable and not too expensive */ - switch (adev->asic_type) { - case CHIP_VEGA20: - /* D161 and D163 are the VG20 server SKUs */ - if (strnstr(atom_ctx->vbios_version, "D161", - sizeof(atom_ctx->vbios_version)) || - strnstr(atom_ctx->vbios_version, "D163", - sizeof(atom_ctx->vbios_version))) { + switch (amdgpu_ip_version(adev, MP1_HWIP, 0)) { + case IP_VERSION(11, 0, 2): + switch (adev->asic_type) { + case CHIP_VEGA20: + /* D161 and D163 are the VG20 server SKUs */ + if (strnstr(atom_ctx->vbios_pn, "D161", + sizeof(atom_ctx->vbios_pn)) || + strnstr(atom_ctx->vbios_pn, "D163", + sizeof(atom_ctx->vbios_pn))) { + if (fru_addr) + *fru_addr = FRU_EEPROM_MADDR_6; + return true; + } else { + return false; + } + case CHIP_ARCTURUS: + default: + return false; + } + case IP_VERSION(11, 0, 7): + if (strnstr(atom_ctx->vbios_pn, "D603", + sizeof(atom_ctx->vbios_pn))) { + if (strnstr(atom_ctx->vbios_pn, "D603GLXE", + sizeof(atom_ctx->vbios_pn))) { + return false; + } + if (fru_addr) *fru_addr = FRU_EEPROM_MADDR_6; return true; + } else { return false; } - case CHIP_ALDEBARAN: + case IP_VERSION(13, 0, 2): /* All Aldebaran SKUs have an FRU */ - if (!strnstr(atom_ctx->vbios_version, "D673", - sizeof(atom_ctx->vbios_version))) + if (!strnstr(atom_ctx->vbios_pn, "D673", + sizeof(atom_ctx->vbios_pn))) if (fru_addr) *fru_addr = FRU_EEPROM_MADDR_6; return true; - case CHIP_SIENNA_CICHLID: - if (strnstr(atom_ctx->vbios_version, "D603", - sizeof(atom_ctx->vbios_version))) { - if (strnstr(atom_ctx->vbios_version, "D603GLXE", - sizeof(atom_ctx->vbios_version))) { - return false; - } else { - if (fru_addr) - *fru_addr = FRU_EEPROM_MADDR_6; - return true; - } - } else { - return false; - } + case IP_VERSION(13, 0, 6): + if (fru_addr) + *fru_addr = FRU_EEPROM_MADDR_8; + return true; default: return false; } @@ -98,6 +110,7 @@ static bool is_fru_eeprom_supported(struct amdgpu_device *adev, u32 *fru_addr) int amdgpu_fru_get_product_info(struct amdgpu_device *adev) { + struct amdgpu_fru_info *fru_info; unsigned char buf[8], *pia; u32 addr, fru_addr; int size, len; @@ -106,6 +119,19 @@ int amdgpu_fru_get_product_info(struct amdgpu_device *adev) if (!is_fru_eeprom_supported(adev, &fru_addr)) return 0; + if (!adev->fru_info) { + adev->fru_info = kzalloc(sizeof(*adev->fru_info), GFP_KERNEL); + if (!adev->fru_info) + return -ENOMEM; + } + + fru_info = adev->fru_info; + /* For Arcturus-and-later, default value of serial_number is unique_id + * so convert it to a 16-digit HEX string for convenience and + * backwards-compatibility. + */ + sprintf(fru_info->serial, "%llx", adev->unique_id); + /* If algo exists, it means that the i2c_adapter's initialized */ if (!adev->pm.fru_eeprom_i2c_bus || !adev->pm.fru_eeprom_i2c_bus->algo) { DRM_WARN("Cannot access FRU, EEPROM accessor not initialized"); @@ -169,32 +195,39 @@ int amdgpu_fru_get_product_info(struct amdgpu_device *adev) csum += pia[size - 1]; if (csum) { DRM_ERROR("Bad Product Info Area checksum: 0x%02x", csum); + kfree(pia); return -EIO; } /* Now extract useful information from the PIA. * - * Skip the Manufacturer Name at [3] and go directly to - * the Product Name field. + * Read Manufacturer Name field whose length is [3]. */ - addr = 3 + 1 + (pia[3] & 0x3F); + addr = 3; if (addr + 1 >= len) goto Out; - memcpy(adev->product_name, pia + addr + 1, - min_t(size_t, - sizeof(adev->product_name), + memcpy(fru_info->manufacturer_name, pia + addr + 1, + min_t(size_t, sizeof(fru_info->manufacturer_name), pia[addr] & 0x3F)); - adev->product_name[sizeof(adev->product_name) - 1] = '\0'; + fru_info->manufacturer_name[sizeof(fru_info->manufacturer_name) - 1] = + '\0'; + + /* Read Product Name field. */ + addr += 1 + (pia[addr] & 0x3F); + if (addr + 1 >= len) + goto Out; + memcpy(fru_info->product_name, pia + addr + 1, + min_t(size_t, sizeof(fru_info->product_name), pia[addr] & 0x3F)); + fru_info->product_name[sizeof(fru_info->product_name) - 1] = '\0'; /* Go to the Product Part/Model Number field. */ addr += 1 + (pia[addr] & 0x3F); if (addr + 1 >= len) goto Out; - memcpy(adev->product_number, pia + addr + 1, - min_t(size_t, - sizeof(adev->product_number), + memcpy(fru_info->product_number, pia + addr + 1, + min_t(size_t, sizeof(fru_info->product_number), pia[addr] & 0x3F)); - adev->product_number[sizeof(adev->product_number) - 1] = '\0'; + fru_info->product_number[sizeof(fru_info->product_number) - 1] = '\0'; /* Go to the Product Version field. */ addr += 1 + (pia[addr] & 0x3F); @@ -203,11 +236,155 @@ int amdgpu_fru_get_product_info(struct amdgpu_device *adev) addr += 1 + (pia[addr] & 0x3F); if (addr + 1 >= len) goto Out; - memcpy(adev->serial, pia + addr + 1, min_t(size_t, - sizeof(adev->serial), - pia[addr] & 0x3F)); - adev->serial[sizeof(adev->serial) - 1] = '\0'; + memcpy(fru_info->serial, pia + addr + 1, + min_t(size_t, sizeof(fru_info->serial), pia[addr] & 0x3F)); + fru_info->serial[sizeof(fru_info->serial) - 1] = '\0'; + + /* Asset Tag field */ + addr += 1 + (pia[addr] & 0x3F); + + /* FRU File Id field. This could be 'null'. */ + addr += 1 + (pia[addr] & 0x3F); + if ((addr + 1 >= len) || !(pia[addr] & 0x3F)) + goto Out; + memcpy(fru_info->fru_id, pia + addr + 1, + min_t(size_t, sizeof(fru_info->fru_id), pia[addr] & 0x3F)); + fru_info->fru_id[sizeof(fru_info->fru_id) - 1] = '\0'; + Out: kfree(pia); return 0; } + +/** + * DOC: product_name + * + * The amdgpu driver provides a sysfs API for reporting the product name + * for the device + * The file product_name is used for this and returns the product name + * as returned from the FRU. + * NOTE: This is only available for certain server cards + */ + +static ssize_t amdgpu_fru_product_name_show(struct device *dev, + struct device_attribute *attr, + char *buf) +{ + struct drm_device *ddev = dev_get_drvdata(dev); + struct amdgpu_device *adev = drm_to_adev(ddev); + + return sysfs_emit(buf, "%s\n", adev->fru_info->product_name); +} + +static DEVICE_ATTR(product_name, 0444, amdgpu_fru_product_name_show, NULL); + +/** + * DOC: product_number + * + * The amdgpu driver provides a sysfs API for reporting the part number + * for the device + * The file product_number is used for this and returns the part number + * as returned from the FRU. + * NOTE: This is only available for certain server cards + */ + +static ssize_t amdgpu_fru_product_number_show(struct device *dev, + struct device_attribute *attr, + char *buf) +{ + struct drm_device *ddev = dev_get_drvdata(dev); + struct amdgpu_device *adev = drm_to_adev(ddev); + + return sysfs_emit(buf, "%s\n", adev->fru_info->product_number); +} + +static DEVICE_ATTR(product_number, 0444, amdgpu_fru_product_number_show, NULL); + +/** + * DOC: serial_number + * + * The amdgpu driver provides a sysfs API for reporting the serial number + * for the device + * The file serial_number is used for this and returns the serial number + * as returned from the FRU. + * NOTE: This is only available for certain server cards + */ + +static ssize_t amdgpu_fru_serial_number_show(struct device *dev, + struct device_attribute *attr, + char *buf) +{ + struct drm_device *ddev = dev_get_drvdata(dev); + struct amdgpu_device *adev = drm_to_adev(ddev); + + return sysfs_emit(buf, "%s\n", adev->fru_info->serial); +} + +static DEVICE_ATTR(serial_number, 0444, amdgpu_fru_serial_number_show, NULL); + +/** + * DOC: fru_id + * + * The amdgpu driver provides a sysfs API for reporting FRU File Id + * for the device. + * The file fru_id is used for this and returns the File Id value + * as returned from the FRU. + * NOTE: This is only available for certain server cards + */ + +static ssize_t amdgpu_fru_id_show(struct device *dev, + struct device_attribute *attr, char *buf) +{ + struct drm_device *ddev = dev_get_drvdata(dev); + struct amdgpu_device *adev = drm_to_adev(ddev); + + return sysfs_emit(buf, "%s\n", adev->fru_info->fru_id); +} + +static DEVICE_ATTR(fru_id, 0444, amdgpu_fru_id_show, NULL); + +/** + * DOC: manufacturer + * + * The amdgpu driver provides a sysfs API for reporting manufacturer name from + * FRU information. + * The file manufacturer returns the value as returned from the FRU. + * NOTE: This is only available for certain server cards + */ + +static ssize_t amdgpu_fru_manufacturer_name_show(struct device *dev, + struct device_attribute *attr, + char *buf) +{ + struct drm_device *ddev = dev_get_drvdata(dev); + struct amdgpu_device *adev = drm_to_adev(ddev); + + return sysfs_emit(buf, "%s\n", adev->fru_info->manufacturer_name); +} + +static DEVICE_ATTR(manufacturer, 0444, amdgpu_fru_manufacturer_name_show, NULL); + +static const struct attribute *amdgpu_fru_attributes[] = { + &dev_attr_product_name.attr, + &dev_attr_product_number.attr, + &dev_attr_serial_number.attr, + &dev_attr_fru_id.attr, + &dev_attr_manufacturer.attr, + NULL +}; + +int amdgpu_fru_sysfs_init(struct amdgpu_device *adev) +{ + if (!is_fru_eeprom_supported(adev, NULL) || !adev->fru_info) + return 0; + + return sysfs_create_files(&adev->dev->kobj, amdgpu_fru_attributes); +} + +void amdgpu_fru_sysfs_fini(struct amdgpu_device *adev) +{ + if (!is_fru_eeprom_supported(adev, NULL) || !adev->fru_info) + return; + + sysfs_remove_files(&adev->dev->kobj, amdgpu_fru_attributes); +} diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_fru_eeprom.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_fru_eeprom.h index 1308d976d60e..bc58dca18035 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_fru_eeprom.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_fru_eeprom.h @@ -24,6 +24,19 @@ #ifndef __AMDGPU_FRU_EEPROM_H__ #define __AMDGPU_FRU_EEPROM_H__ +#define AMDGPU_PRODUCT_NAME_LEN 64 + +/* FRU product information */ +struct amdgpu_fru_info { + char product_number[20]; + char product_name[AMDGPU_PRODUCT_NAME_LEN]; + char serial[20]; + char manufacturer_name[32]; + char fru_id[32]; +}; + int amdgpu_fru_get_product_info(struct amdgpu_device *adev); +int amdgpu_fru_sysfs_init(struct amdgpu_device *adev); +void amdgpu_fru_sysfs_fini(struct amdgpu_device *adev); #endif // __AMDGPU_FRU_EEPROM_H__ diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_fw_attestation.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_fw_attestation.c index 2ca3c329de6d..2d4b67175b55 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_fw_attestation.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_fw_attestation.c @@ -32,17 +32,15 @@ #include "soc15_common.h" #define FW_ATTESTATION_DB_COOKIE 0x143b6a37 -#define FW_ATTESTATION_RECORD_VALID 1 +#define FW_ATTESTATION_RECORD_VALID 1 #define FW_ATTESTATION_MAX_SIZE 4096 -typedef struct FW_ATT_DB_HEADER -{ +struct FW_ATT_DB_HEADER { uint32_t AttDbVersion; /* version of the fwar feature */ uint32_t AttDbCookie; /* cookie as an extra check for corrupt data */ -} FW_ATT_DB_HEADER; +}; -typedef struct FW_ATT_RECORD -{ +struct FW_ATT_RECORD { uint16_t AttFwIdV1; /* Legacy FW Type field */ uint16_t AttFwIdV2; /* V2 FW ID field */ uint32_t AttFWVersion; /* FW Version */ @@ -50,7 +48,7 @@ typedef struct FW_ATT_RECORD uint8_t AttSource; /* FW source indicator */ uint8_t RecordValid; /* Indicates whether the record is a valid entry */ uint32_t AttFwTaId; /* Ta ID (only in TA Attestation Table) */ -} FW_ATT_RECORD; +}; static ssize_t amdgpu_fw_attestation_debugfs_read(struct file *f, char __user *buf, @@ -60,15 +58,15 @@ static ssize_t amdgpu_fw_attestation_debugfs_read(struct file *f, struct amdgpu_device *adev = (struct amdgpu_device *)file_inode(f)->i_private; uint64_t records_addr = 0; uint64_t vram_pos = 0; - FW_ATT_DB_HEADER fw_att_hdr = {0}; - FW_ATT_RECORD fw_att_record = {0}; + struct FW_ATT_DB_HEADER fw_att_hdr = {0}; + struct FW_ATT_RECORD fw_att_record = {0}; - if (size < sizeof(FW_ATT_RECORD)) { + if (size < sizeof(struct FW_ATT_RECORD)) { DRM_WARN("FW attestation input buffer not enough memory"); return -EINVAL; } - if ((*pos + sizeof(FW_ATT_DB_HEADER)) >= FW_ATTESTATION_MAX_SIZE) { + if ((*pos + sizeof(struct FW_ATT_DB_HEADER)) >= FW_ATTESTATION_MAX_SIZE) { DRM_WARN("FW attestation out of bounds"); return 0; } @@ -83,8 +81,8 @@ static ssize_t amdgpu_fw_attestation_debugfs_read(struct file *f, if (*pos == 0) { amdgpu_device_vram_access(adev, vram_pos, - (uint32_t*)&fw_att_hdr, - sizeof(FW_ATT_DB_HEADER), + (uint32_t *)&fw_att_hdr, + sizeof(struct FW_ATT_DB_HEADER), false); if (fw_att_hdr.AttDbCookie != FW_ATTESTATION_DB_COOKIE) { @@ -96,20 +94,20 @@ static ssize_t amdgpu_fw_attestation_debugfs_read(struct file *f, } amdgpu_device_vram_access(adev, - vram_pos + sizeof(FW_ATT_DB_HEADER) + *pos, - (uint32_t*)&fw_att_record, - sizeof(FW_ATT_RECORD), + vram_pos + sizeof(struct FW_ATT_DB_HEADER) + *pos, + (uint32_t *)&fw_att_record, + sizeof(struct FW_ATT_RECORD), false); if (fw_att_record.RecordValid != FW_ATTESTATION_RECORD_VALID) return 0; - if (copy_to_user(buf, (void*)&fw_att_record, sizeof(FW_ATT_RECORD))) + if (copy_to_user(buf, (void *)&fw_att_record, sizeof(struct FW_ATT_RECORD))) return -EINVAL; - *pos += sizeof(FW_ATT_RECORD); + *pos += sizeof(struct FW_ATT_RECORD); - return sizeof(FW_ATT_RECORD); + return sizeof(struct FW_ATT_RECORD); } static const struct file_operations amdgpu_fw_attestation_debugfs_ops = { @@ -136,7 +134,7 @@ void amdgpu_fw_attestation_debugfs_init(struct amdgpu_device *adev) return; debugfs_create_file("amdgpu_fw_attestation", - S_IRUSR, + 0400, adev_to_drm(adev)->primary->debugfs_root, adev, &amdgpu_fw_attestation_debugfs_ops); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c index 74055cba3dc9..84beeaa4d21c 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c @@ -33,6 +33,7 @@ #include <drm/amdgpu_drm.h> #include <drm/drm_drv.h> +#include <drm/drm_exec.h> #include <drm/drm_gem_ttm_helper.h> #include <drm/ttm/ttm_tt.h> @@ -181,11 +182,10 @@ static int amdgpu_gem_object_open(struct drm_gem_object *obj, return r; bo_va = amdgpu_vm_bo_find(vm, abo); - if (!bo_va) { + if (!bo_va) bo_va = amdgpu_vm_bo_add(adev, vm, abo); - } else { + else ++bo_va->ref_count; - } amdgpu_bo_unreserve(abo); return 0; } @@ -198,29 +198,24 @@ static void amdgpu_gem_object_close(struct drm_gem_object *obj, struct amdgpu_fpriv *fpriv = file_priv->driver_priv; struct amdgpu_vm *vm = &fpriv->vm; - struct amdgpu_bo_list_entry vm_pd; - struct list_head list, duplicates; struct dma_fence *fence = NULL; - struct ttm_validate_buffer tv; - struct ww_acquire_ctx ticket; struct amdgpu_bo_va *bo_va; + struct drm_exec exec; long r; - INIT_LIST_HEAD(&list); - INIT_LIST_HEAD(&duplicates); - - tv.bo = &bo->tbo; - tv.num_shared = 2; - list_add(&tv.head, &list); - - amdgpu_vm_get_pd_bo(vm, &list, &vm_pd); - - r = ttm_eu_reserve_buffers(&ticket, &list, false, &duplicates); - if (r) { - dev_err(adev->dev, "leaking bo va because " - "we fail to reserve bo (%ld)\n", r); - return; + drm_exec_init(&exec, DRM_EXEC_IGNORE_DUPLICATES); + drm_exec_until_all_locked(&exec) { + r = drm_exec_prepare_obj(&exec, &bo->tbo.base, 1); + drm_exec_retry_on_contention(&exec); + if (unlikely(r)) + goto out_unlock; + + r = amdgpu_vm_lock_pd(vm, &exec, 0); + drm_exec_retry_on_contention(&exec); + if (unlikely(r)) + goto out_unlock; } + bo_va = amdgpu_vm_bo_find(vm, bo); if (!bo_va || --bo_va->ref_count) goto out_unlock; @@ -230,6 +225,9 @@ static void amdgpu_gem_object_close(struct drm_gem_object *obj, goto out_unlock; r = amdgpu_vm_clear_freed(adev, vm, &fence); + if (unlikely(r < 0)) + dev_err(adev->dev, "failed to clear page " + "tables on GEM object close (%ld)\n", r); if (r || !fence) goto out_unlock; @@ -237,10 +235,9 @@ static void amdgpu_gem_object_close(struct drm_gem_object *obj, dma_fence_put(fence); out_unlock: - if (unlikely(r < 0)) - dev_err(adev->dev, "failed to clear page " - "tables on GEM object close (%ld)\n", r); - ttm_eu_backoff_reservation(&ticket, &list); + if (r) + dev_err(adev->dev, "leaking bo va (%ld)\n", r); + drm_exec_fini(&exec); } static int amdgpu_gem_object_mmap(struct drm_gem_object *obj, struct vm_area_struct *vma) @@ -292,6 +289,10 @@ int amdgpu_gem_create_ioctl(struct drm_device *dev, void *data, uint32_t handle, initial_domain; int r; + /* reject DOORBELLs until userspace code to use it is available */ + if (args->in.domains & AMDGPU_GEM_DOMAIN_DOORBELL) + return -EINVAL; + /* reject invalid gem flags */ if (flags & ~(AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED | AMDGPU_GEM_CREATE_NO_CPU_ACCESS | @@ -463,9 +464,9 @@ int amdgpu_mode_dumb_mmap(struct drm_file *filp, struct amdgpu_bo *robj; gobj = drm_gem_object_lookup(filp, handle); - if (gobj == NULL) { + if (!gobj) return -ENOENT; - } + robj = gem_to_amdgpu_bo(gobj); if (amdgpu_ttm_tt_get_usermm(robj->tbo.ttm) || (robj->flags & AMDGPU_GEM_CREATE_NO_CPU_ACCESS)) { @@ -482,6 +483,7 @@ int amdgpu_gem_mmap_ioctl(struct drm_device *dev, void *data, { union drm_amdgpu_gem_mmap *args = data; uint32_t handle = args->in.handle; + memset(args, 0, sizeof(*args)); return amdgpu_mode_dumb_mmap(filp, dev, handle, &args->out.addr_ptr); } @@ -508,7 +510,7 @@ unsigned long amdgpu_gem_timeout(uint64_t timeout_ns) timeout_jiffies = nsecs_to_jiffies(ktime_to_ns(timeout)); /* clamp timeout to avoid unsigned-> signed overflow */ - if (timeout_jiffies > MAX_SCHEDULE_TIMEOUT ) + if (timeout_jiffies > MAX_SCHEDULE_TIMEOUT) return MAX_SCHEDULE_TIMEOUT - 1; return timeout_jiffies; @@ -526,9 +528,9 @@ int amdgpu_gem_wait_idle_ioctl(struct drm_device *dev, void *data, long ret; gobj = drm_gem_object_lookup(filp, handle); - if (gobj == NULL) { + if (!gobj) return -ENOENT; - } + robj = gem_to_amdgpu_bo(gobj); ret = dma_resv_wait_timeout(robj->tbo.base.resv, DMA_RESV_USAGE_READ, true, timeout); @@ -555,7 +557,7 @@ int amdgpu_gem_metadata_ioctl(struct drm_device *dev, void *data, struct amdgpu_bo *robj; int r = -1; - DRM_DEBUG("%d \n", args->handle); + DRM_DEBUG("%d\n", args->handle); gobj = drm_gem_object_lookup(filp, args->handle); if (gobj == NULL) return -ENOENT; @@ -675,17 +677,14 @@ int amdgpu_gem_va_ioctl(struct drm_device *dev, void *data, struct amdgpu_fpriv *fpriv = filp->driver_priv; struct amdgpu_bo *abo; struct amdgpu_bo_va *bo_va; - struct amdgpu_bo_list_entry vm_pd; - struct ttm_validate_buffer tv; - struct ww_acquire_ctx ticket; - struct list_head list, duplicates; + struct drm_exec exec; uint64_t va_flags; uint64_t vm_size; int r = 0; if (args->va_address < AMDGPU_VA_RESERVED_SIZE) { dev_dbg(dev->dev, - "va_address 0x%LX is in reserved area 0x%LX\n", + "va_address 0x%llx is in reserved area 0x%llx\n", args->va_address, AMDGPU_VA_RESERVED_SIZE); return -EINVAL; } @@ -693,7 +692,7 @@ int amdgpu_gem_va_ioctl(struct drm_device *dev, void *data, if (args->va_address >= AMDGPU_GMC_HOLE_START && args->va_address < AMDGPU_GMC_HOLE_END) { dev_dbg(dev->dev, - "va_address 0x%LX is in VA hole 0x%LX-0x%LX\n", + "va_address 0x%llx is in VA hole 0x%llx-0x%llx\n", args->va_address, AMDGPU_GMC_HOLE_START, AMDGPU_GMC_HOLE_END); return -EINVAL; @@ -728,36 +727,38 @@ int amdgpu_gem_va_ioctl(struct drm_device *dev, void *data, return -EINVAL; } - INIT_LIST_HEAD(&list); - INIT_LIST_HEAD(&duplicates); if ((args->operation != AMDGPU_VA_OP_CLEAR) && !(args->flags & AMDGPU_VM_PAGE_PRT)) { gobj = drm_gem_object_lookup(filp, args->handle); if (gobj == NULL) return -ENOENT; abo = gem_to_amdgpu_bo(gobj); - tv.bo = &abo->tbo; - if (abo->flags & AMDGPU_GEM_CREATE_VM_ALWAYS_VALID) - tv.num_shared = 1; - else - tv.num_shared = 0; - list_add(&tv.head, &list); } else { gobj = NULL; abo = NULL; } - amdgpu_vm_get_pd_bo(&fpriv->vm, &list, &vm_pd); + drm_exec_init(&exec, DRM_EXEC_INTERRUPTIBLE_WAIT | + DRM_EXEC_IGNORE_DUPLICATES); + drm_exec_until_all_locked(&exec) { + if (gobj) { + r = drm_exec_lock_obj(&exec, gobj); + drm_exec_retry_on_contention(&exec); + if (unlikely(r)) + goto error; + } - r = ttm_eu_reserve_buffers(&ticket, &list, true, &duplicates); - if (r) - goto error_unref; + r = amdgpu_vm_lock_pd(&fpriv->vm, &exec, 2); + drm_exec_retry_on_contention(&exec); + if (unlikely(r)) + goto error; + } if (abo) { bo_va = amdgpu_vm_bo_find(&fpriv->vm, abo); if (!bo_va) { r = -ENOENT; - goto error_backoff; + goto error; } } else if (args->operation != AMDGPU_VA_OP_CLEAR) { bo_va = fpriv->prt_va; @@ -790,14 +791,12 @@ int amdgpu_gem_va_ioctl(struct drm_device *dev, void *data, default: break; } - if (!r && !(args->flags & AMDGPU_VM_DELAY_UPDATE) && !amdgpu_vm_debug) + if (!r && !(args->flags & AMDGPU_VM_DELAY_UPDATE) && !adev->debug_vm) amdgpu_gem_va_update_vm(adev, &fpriv->vm, bo_va, args->operation); -error_backoff: - ttm_eu_backoff_reservation(&ticket, &list); - -error_unref: +error: + drm_exec_fini(&exec); drm_gem_object_put(gobj); return r; } @@ -813,9 +812,9 @@ int amdgpu_gem_op_ioctl(struct drm_device *dev, void *data, int r; gobj = drm_gem_object_lookup(filp, args->handle); - if (gobj == NULL) { + if (!gobj) return -ENOENT; - } + robj = gem_to_amdgpu_bo(gobj); r = amdgpu_bo_reserve(robj, false); @@ -941,9 +940,9 @@ int amdgpu_mode_dumb_create(struct drm_file *file_priv, r = drm_gem_handle_create(file_priv, gobj, &handle); /* drop reference from allocate - handle holds it now */ drm_gem_object_put(gobj); - if (r) { + if (r) return r; - } + args->handle = handle; return 0; } @@ -963,6 +962,7 @@ static int amdgpu_debugfs_gem_info_show(struct seq_file *m, void *unused) list_for_each_entry(file, &dev->filelist, lhead) { struct task_struct *task; struct drm_gem_object *gobj; + struct pid *pid; int id; /* @@ -972,8 +972,9 @@ static int amdgpu_debugfs_gem_info_show(struct seq_file *m, void *unused) * Therefore, we need to protect this ->comm access using RCU. */ rcu_read_lock(); - task = pid_task(file->pid, PIDTYPE_TGID); - seq_printf(m, "pid %8d command %s:\n", pid_nr(file->pid), + pid = rcu_dereference(file->pid); + task = pid_task(pid, PIDTYPE_TGID); + seq_printf(m, "pid %8d command %s:\n", pid_nr(pid), task ? task->comm : "<unknown>"); rcu_read_unlock(); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c index fd81b04559d4..b9674c57c436 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c @@ -29,6 +29,7 @@ #include "amdgpu_rlc.h" #include "amdgpu_ras.h" #include "amdgpu_xcp.h" +#include "amdgpu_xgmi.h" /* delay 0.1 second to enable gfx off feature */ #define GFX_OFF_DELAY_ENABLE msecs_to_jiffies(100) @@ -110,9 +111,9 @@ bool amdgpu_gfx_is_me_queue_enabled(struct amdgpu_device *adev, * The bitmask of CUs to be disabled in the shader array determined by se and * sh is stored in mask[se * max_sh + sh]. */ -void amdgpu_gfx_parse_disable_cu(unsigned *mask, unsigned max_se, unsigned max_sh) +void amdgpu_gfx_parse_disable_cu(unsigned int *mask, unsigned int max_se, unsigned int max_sh) { - unsigned se, sh, cu; + unsigned int se, sh, cu; const char *p; memset(mask, 0, sizeof(*mask) * max_se * max_sh); @@ -124,6 +125,7 @@ void amdgpu_gfx_parse_disable_cu(unsigned *mask, unsigned max_se, unsigned max_s for (;;) { char *next; int ret = sscanf(p, "%u.%u.%u", &se, &sh, &cu); + if (ret < 3) { DRM_ERROR("amdgpu: could not parse disable_cu\n"); return; @@ -157,7 +159,7 @@ static bool amdgpu_gfx_is_compute_multipipe_capable(struct amdgpu_device *adev) return amdgpu_compute_multipipe == 1; } - if (adev->ip_versions[GC_HWIP][0] > IP_VERSION(9, 0, 0)) + if (amdgpu_ip_version(adev, GC_HWIP, 0) > IP_VERSION(9, 0, 0)) return true; /* FIXME: spreading the queues across pipes causes perf regressions @@ -349,7 +351,7 @@ void amdgpu_gfx_kiq_fini(struct amdgpu_device *adev, int xcc_id) } int amdgpu_gfx_kiq_init(struct amdgpu_device *adev, - unsigned hpd_size, int xcc_id) + unsigned int hpd_size, int xcc_id) { int r; u32 *hpd; @@ -376,16 +378,18 @@ int amdgpu_gfx_kiq_init(struct amdgpu_device *adev, /* create MQD for each compute/gfx queue */ int amdgpu_gfx_mqd_sw_init(struct amdgpu_device *adev, - unsigned mqd_size, int xcc_id) + unsigned int mqd_size, int xcc_id) { int r, i, j; struct amdgpu_kiq *kiq = &adev->gfx.kiq[xcc_id]; struct amdgpu_ring *ring = &kiq->ring; u32 domain = AMDGPU_GEM_DOMAIN_GTT; +#if !defined(CONFIG_ARM) && !defined(CONFIG_ARM64) /* Only enable on gfx10 and 11 for now to avoid changing behavior on older chips */ - if (adev->ip_versions[GC_HWIP][0] >= IP_VERSION(10, 0, 0)) + if (amdgpu_ip_version(adev, GC_HWIP, 0) >= IP_VERSION(10, 0, 0)) domain |= AMDGPU_GEM_DOMAIN_VRAM; +#endif /* create MQD for KIQ */ if (!adev->enable_mes_kiq && !ring->mqd_obj) { @@ -407,8 +411,11 @@ int amdgpu_gfx_mqd_sw_init(struct amdgpu_device *adev, /* prepare MQD backup */ kiq->mqd_backup = kmalloc(mqd_size, GFP_KERNEL); - if (!kiq->mqd_backup) - dev_warn(adev->dev, "no memory to create MQD backup for ring %s\n", ring->name); + if (!kiq->mqd_backup) { + dev_warn(adev->dev, + "no memory to create MQD backup for ring %s\n", ring->name); + return -ENOMEM; + } } if (adev->asic_type >= CHIP_NAVI10 && amdgpu_async_gfx_ring) { @@ -427,8 +434,10 @@ int amdgpu_gfx_mqd_sw_init(struct amdgpu_device *adev, ring->mqd_size = mqd_size; /* prepare MQD backup */ adev->gfx.me.mqd_backup[i] = kmalloc(mqd_size, GFP_KERNEL); - if (!adev->gfx.me.mqd_backup[i]) + if (!adev->gfx.me.mqd_backup[i]) { dev_warn(adev->dev, "no memory to create MQD backup for ring %s\n", ring->name); + return -ENOMEM; + } } } } @@ -449,8 +458,10 @@ int amdgpu_gfx_mqd_sw_init(struct amdgpu_device *adev, ring->mqd_size = mqd_size; /* prepare MQD backup */ adev->gfx.mec.mqd_backup[j] = kmalloc(mqd_size, GFP_KERNEL); - if (!adev->gfx.mec.mqd_backup[j]) + if (!adev->gfx.mec.mqd_backup[j]) { dev_warn(adev->dev, "no memory to create MQD backup for ring %s\n", ring->name); + return -ENOMEM; + } } } @@ -493,6 +504,9 @@ int amdgpu_gfx_disable_kcq(struct amdgpu_device *adev, int xcc_id) { struct amdgpu_kiq *kiq = &adev->gfx.kiq[xcc_id]; struct amdgpu_ring *kiq_ring = &kiq->ring; + struct amdgpu_hive_info *hive; + struct amdgpu_ras *ras; + int hive_ras_recovery = 0; int i, r = 0; int j; @@ -513,6 +527,23 @@ int amdgpu_gfx_disable_kcq(struct amdgpu_device *adev, int xcc_id) RESET_QUEUES, 0, 0); } + /** + * This is workaround: only skip kiq_ring test + * during ras recovery in suspend stage for gfx9.4.3 + */ + hive = amdgpu_get_xgmi_hive(adev); + if (hive) { + hive_ras_recovery = atomic_read(&hive->ras_recovery); + amdgpu_put_xgmi_hive(hive); + } + + ras = amdgpu_ras_get_context(adev); + if ((amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(9, 4, 3)) && + ras && (atomic_read(&ras->in_recovery) || hive_ras_recovery)) { + spin_unlock(&kiq->ring_lock); + return 0; + } + if (kiq_ring->sched.ready && !adev->job_hang) r = amdgpu_ring_test_helper(kiq_ring); spin_unlock(&kiq->ring_lock); @@ -900,12 +931,12 @@ void amdgpu_gfx_ras_error_func(struct amdgpu_device *adev, func(adev, ras_error_status, i); } -uint32_t amdgpu_kiq_rreg(struct amdgpu_device *adev, uint32_t reg) +uint32_t amdgpu_kiq_rreg(struct amdgpu_device *adev, uint32_t reg, uint32_t xcc_id) { signed long r, cnt = 0; unsigned long flags; uint32_t seq, reg_val_offs = 0, value = 0; - struct amdgpu_kiq *kiq = &adev->gfx.kiq[0]; + struct amdgpu_kiq *kiq = &adev->gfx.kiq[xcc_id]; struct amdgpu_ring *ring = &kiq->ring; if (amdgpu_device_skip_hw_access(adev)) @@ -968,12 +999,12 @@ failed_kiq_read: return ~0; } -void amdgpu_kiq_wreg(struct amdgpu_device *adev, uint32_t reg, uint32_t v) +void amdgpu_kiq_wreg(struct amdgpu_device *adev, uint32_t reg, uint32_t v, uint32_t xcc_id) { signed long r, cnt = 0; unsigned long flags; uint32_t seq; - struct amdgpu_kiq *kiq = &adev->gfx.kiq[0]; + struct amdgpu_kiq *kiq = &adev->gfx.kiq[xcc_id]; struct amdgpu_ring *ring = &kiq->ring; BUG_ON(!ring->funcs->emit_wreg); @@ -1274,11 +1305,11 @@ static ssize_t amdgpu_gfx_get_available_compute_partition(struct device *dev, return sysfs_emit(buf, "%s\n", supported_partition); } -static DEVICE_ATTR(current_compute_partition, S_IRUGO | S_IWUSR, +static DEVICE_ATTR(current_compute_partition, 0644, amdgpu_gfx_get_current_compute_partition, amdgpu_gfx_set_compute_partition); -static DEVICE_ATTR(available_compute_partition, S_IRUGO, +static DEVICE_ATTR(available_compute_partition, 0444, amdgpu_gfx_get_available_compute_partition, NULL); int amdgpu_gfx_sysfs_init(struct amdgpu_device *adev) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h index a4ff515ce896..f23bafec71c5 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h @@ -43,9 +43,10 @@ #define AMDGPU_GFX_LBPW_DISABLED_MODE 0x00000008L #define AMDGPU_MAX_GC_INSTANCES 8 +#define AMDGPU_MAX_QUEUES 128 -#define AMDGPU_MAX_GFX_QUEUES KGD_MAX_QUEUES -#define AMDGPU_MAX_COMPUTE_QUEUES KGD_MAX_QUEUES +#define AMDGPU_MAX_GFX_QUEUES AMDGPU_MAX_QUEUES +#define AMDGPU_MAX_COMPUTE_QUEUES AMDGPU_MAX_QUEUES enum amdgpu_gfx_pipe_priority { AMDGPU_GFX_PIPE_PRIO_NORMAL = AMDGPU_RING_PRIO_1, @@ -68,11 +69,6 @@ enum amdgpu_gfx_partition { #define NUM_XCC(x) hweight16(x) -enum amdgpu_pkg_type { - AMDGPU_PKG_TYPE_APU = 2, - AMDGPU_PKG_TYPE_UNKNOWN, -}; - enum amdgpu_gfx_ras_mem_id_type { AMDGPU_GFX_CP_MEM = 0, AMDGPU_GFX_GCEA_MEM, @@ -241,6 +237,9 @@ struct amdgpu_gfx_config { uint32_t gc_gl1c_per_sa; uint32_t gc_gl1c_size_per_instance; uint32_t gc_gl2c_per_gpu; + uint32_t gc_tcp_size_per_cu; + uint32_t gc_num_cu_per_sqc; + uint32_t gc_tcc_size; }; struct amdgpu_cu_info { @@ -254,7 +253,7 @@ struct amdgpu_cu_info { uint32_t number; uint32_t ao_cu_mask; uint32_t ao_cu_bitmap[4][4]; - uint32_t bitmap[4][4]; + uint32_t bitmap[AMDGPU_MAX_GC_INSTANCES][4][4]; }; struct amdgpu_gfx_ras { @@ -522,8 +521,8 @@ int amdgpu_gfx_process_ras_data_cb(struct amdgpu_device *adev, int amdgpu_gfx_cp_ecc_error_irq(struct amdgpu_device *adev, struct amdgpu_irq_src *source, struct amdgpu_iv_entry *entry); -uint32_t amdgpu_kiq_rreg(struct amdgpu_device *adev, uint32_t reg); -void amdgpu_kiq_wreg(struct amdgpu_device *adev, uint32_t reg, uint32_t v); +uint32_t amdgpu_kiq_rreg(struct amdgpu_device *adev, uint32_t reg, uint32_t xcc_id); +void amdgpu_kiq_wreg(struct amdgpu_device *adev, uint32_t reg, uint32_t v, uint32_t xcc_id); int amdgpu_gfx_get_num_kcq(struct amdgpu_device *adev); void amdgpu_gfx_cp_init_microcode(struct amdgpu_device *adev, uint32_t ucode_id); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c index d78bd9732543..d2f273d77e59 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c @@ -32,6 +32,7 @@ #include "amdgpu.h" #include "amdgpu_gmc.h" #include "amdgpu_ras.h" +#include "amdgpu_reset.h" #include "amdgpu_xgmi.h" #include <drm/drm_drv.h> @@ -180,6 +181,9 @@ uint64_t amdgpu_gmc_agp_addr(struct ttm_buffer_object *bo) { struct amdgpu_device *adev = amdgpu_ttm_adev(bo->bdev); + if (!bo->ttm) + return AMDGPU_BO_INVALID_OFFSET; + if (bo->ttm->num_pages != 1 || bo->ttm->caching == ttm_cached) return AMDGPU_BO_INVALID_OFFSET; @@ -263,12 +267,14 @@ void amdgpu_gmc_sysvm_location(struct amdgpu_device *adev, struct amdgpu_gmc *mc * * @adev: amdgpu device structure holding all necessary information * @mc: memory controller structure holding memory information + * @gart_placement: GART placement policy with respect to VRAM * * Function will place try to place GART before or after VRAM. * If GART size is bigger than space left then we ajust GART size. * Thus function will never fails. */ -void amdgpu_gmc_gart_location(struct amdgpu_device *adev, struct amdgpu_gmc *mc) +void amdgpu_gmc_gart_location(struct amdgpu_device *adev, struct amdgpu_gmc *mc, + enum amdgpu_gart_placement gart_placement) { const uint64_t four_gb = 0x100000000ULL; u64 size_af, size_bf; @@ -286,11 +292,22 @@ void amdgpu_gmc_gart_location(struct amdgpu_device *adev, struct amdgpu_gmc *mc) mc->gart_size = max(size_bf, size_af); } - if ((size_bf >= mc->gart_size && size_bf < size_af) || - (size_af < mc->gart_size)) - mc->gart_start = 0; - else + switch (gart_placement) { + case AMDGPU_GART_PLACEMENT_HIGH: mc->gart_start = max_mc_address - mc->gart_size + 1; + break; + case AMDGPU_GART_PLACEMENT_LOW: + mc->gart_start = 0; + break; + case AMDGPU_GART_PLACEMENT_BEST_FIT: + default: + if ((size_bf >= mc->gart_size && size_bf < size_af) || + (size_af < mc->gart_size)) + mc->gart_start = 0; + else + mc->gart_start = max_mc_address - mc->gart_size + 1; + break; + } mc->gart_start &= ~(four_gb - 1); mc->gart_end = mc->gart_start + mc->gart_size - 1; @@ -315,14 +332,6 @@ void amdgpu_gmc_agp_location(struct amdgpu_device *adev, struct amdgpu_gmc *mc) const uint64_t sixteen_gb_mask = ~(sixteen_gb - 1); u64 size_af, size_bf; - if (amdgpu_sriov_vf(adev)) { - mc->agp_start = 0xffffffffffff; - mc->agp_end = 0x0; - mc->agp_size = 0; - - return; - } - if (mc->fb_start > mc->gart_start) { size_bf = (mc->fb_start & sixteen_gb_mask) - ALIGN(mc->gart_end + 1, sixteen_gb); @@ -347,6 +356,25 @@ void amdgpu_gmc_agp_location(struct amdgpu_device *adev, struct amdgpu_gmc *mc) } /** + * amdgpu_gmc_set_agp_default - Set the default AGP aperture value. + * @adev: amdgpu device structure holding all necessary information + * @mc: memory controller structure holding memory information + * + * To disable the AGP aperture, you need to set the start to a larger + * value than the end. This function sets the default value which + * can then be overridden using amdgpu_gmc_agp_location() if you want + * to enable the AGP aperture on a specific chip. + * + */ +void amdgpu_gmc_set_agp_default(struct amdgpu_device *adev, + struct amdgpu_gmc *mc) +{ + mc->agp_start = 0xffffffffffff; + mc->agp_end = 0; + mc->agp_size = 0; +} + +/** * amdgpu_gmc_fault_key - get hask key from vm fault address and pasid * * @addr: 48 bit physical address, page aligned (36 significant bits) @@ -452,7 +480,10 @@ void amdgpu_gmc_filter_faults_remove(struct amdgpu_device *adev, uint64_t addr, uint32_t hash; uint64_t tmp; - ih = adev->irq.retry_cam_enabled ? &adev->irq.ih_soft : &adev->irq.ih1; + if (adev->irq.retry_cam_enabled) + return; + + ih = &adev->irq.ih1; /* Get the WPTR of the last entry in IH ring */ last_wptr = amdgpu_ih_get_wptr(adev, ih); /* Order wptr with ring data. */ @@ -549,13 +580,17 @@ int amdgpu_gmc_allocate_vm_inv_eng(struct amdgpu_device *adev) /* reserve engine 5 for firmware */ if (adev->enable_mes) vm_inv_engs[i] &= ~(1 << 5); + /* reserve mmhub engine 3 for firmware */ + if (adev->enable_umsch_mm) + vm_inv_engs[i] &= ~(1 << 3); } for (i = 0; i < adev->num_rings; ++i) { ring = adev->rings[i]; vmhub = ring->vm_hub; - if (ring == &adev->mes.ring) + if (ring == &adev->mes.ring || + ring == &adev->umsch_mm.ring) continue; inv_eng = ffs(vm_inv_engs[vmhub]); @@ -575,6 +610,142 @@ int amdgpu_gmc_allocate_vm_inv_eng(struct amdgpu_device *adev) return 0; } +void amdgpu_gmc_flush_gpu_tlb(struct amdgpu_device *adev, uint32_t vmid, + uint32_t vmhub, uint32_t flush_type) +{ + struct amdgpu_ring *ring = adev->mman.buffer_funcs_ring; + struct amdgpu_vmhub *hub = &adev->vmhub[vmhub]; + struct dma_fence *fence; + struct amdgpu_job *job; + int r; + + if (!hub->sdma_invalidation_workaround || vmid || + !adev->mman.buffer_funcs_enabled || + !adev->ib_pool_ready || amdgpu_in_reset(adev) || + !ring->sched.ready) { + + /* + * A GPU reset should flush all TLBs anyway, so no need to do + * this while one is ongoing. + */ + if (!down_read_trylock(&adev->reset_domain->sem)) + return; + + if (adev->gmc.flush_tlb_needs_extra_type_2) + adev->gmc.gmc_funcs->flush_gpu_tlb(adev, vmid, + vmhub, 2); + + if (adev->gmc.flush_tlb_needs_extra_type_0 && flush_type == 2) + adev->gmc.gmc_funcs->flush_gpu_tlb(adev, vmid, + vmhub, 0); + + adev->gmc.gmc_funcs->flush_gpu_tlb(adev, vmid, vmhub, + flush_type); + up_read(&adev->reset_domain->sem); + return; + } + + /* The SDMA on Navi 1x has a bug which can theoretically result in memory + * corruption if an invalidation happens at the same time as an VA + * translation. Avoid this by doing the invalidation from the SDMA + * itself at least for GART. + */ + mutex_lock(&adev->mman.gtt_window_lock); + r = amdgpu_job_alloc_with_ib(ring->adev, &adev->mman.high_pr, + AMDGPU_FENCE_OWNER_UNDEFINED, + 16 * 4, AMDGPU_IB_POOL_IMMEDIATE, + &job); + if (r) + goto error_alloc; + + job->vm_pd_addr = amdgpu_gmc_pd_addr(adev->gart.bo); + job->vm_needs_flush = true; + job->ibs->ptr[job->ibs->length_dw++] = ring->funcs->nop; + amdgpu_ring_pad_ib(ring, &job->ibs[0]); + fence = amdgpu_job_submit(job); + mutex_unlock(&adev->mman.gtt_window_lock); + + dma_fence_wait(fence, false); + dma_fence_put(fence); + + return; + +error_alloc: + mutex_unlock(&adev->mman.gtt_window_lock); + dev_err(adev->dev, "Error flushing GPU TLB using the SDMA (%d)!\n", r); +} + +int amdgpu_gmc_flush_gpu_tlb_pasid(struct amdgpu_device *adev, uint16_t pasid, + uint32_t flush_type, bool all_hub, + uint32_t inst) +{ + u32 usec_timeout = amdgpu_sriov_vf(adev) ? SRIOV_USEC_TIMEOUT : + adev->usec_timeout; + struct amdgpu_ring *ring = &adev->gfx.kiq[inst].ring; + struct amdgpu_kiq *kiq = &adev->gfx.kiq[inst]; + unsigned int ndw; + signed long r; + uint32_t seq; + + if (!adev->gmc.flush_pasid_uses_kiq || !ring->sched.ready || + !down_read_trylock(&adev->reset_domain->sem)) { + + if (adev->gmc.flush_tlb_needs_extra_type_2) + adev->gmc.gmc_funcs->flush_gpu_tlb_pasid(adev, pasid, + 2, all_hub, + inst); + + if (adev->gmc.flush_tlb_needs_extra_type_0 && flush_type == 2) + adev->gmc.gmc_funcs->flush_gpu_tlb_pasid(adev, pasid, + 0, all_hub, + inst); + + adev->gmc.gmc_funcs->flush_gpu_tlb_pasid(adev, pasid, + flush_type, all_hub, + inst); + return 0; + } + + /* 2 dwords flush + 8 dwords fence */ + ndw = kiq->pmf->invalidate_tlbs_size + 8; + + if (adev->gmc.flush_tlb_needs_extra_type_2) + ndw += kiq->pmf->invalidate_tlbs_size; + + if (adev->gmc.flush_tlb_needs_extra_type_0) + ndw += kiq->pmf->invalidate_tlbs_size; + + spin_lock(&adev->gfx.kiq[inst].ring_lock); + amdgpu_ring_alloc(ring, ndw); + if (adev->gmc.flush_tlb_needs_extra_type_2) + kiq->pmf->kiq_invalidate_tlbs(ring, pasid, 2, all_hub); + + if (flush_type == 2 && adev->gmc.flush_tlb_needs_extra_type_0) + kiq->pmf->kiq_invalidate_tlbs(ring, pasid, 0, all_hub); + + kiq->pmf->kiq_invalidate_tlbs(ring, pasid, flush_type, all_hub); + r = amdgpu_fence_emit_polling(ring, &seq, MAX_KIQ_REG_WAIT); + if (r) { + amdgpu_ring_undo(ring); + spin_unlock(&adev->gfx.kiq[inst].ring_lock); + goto error_unlock_reset; + } + + amdgpu_ring_commit(ring); + spin_unlock(&adev->gfx.kiq[inst].ring_lock); + r = amdgpu_fence_wait_polling(ring, seq, usec_timeout); + if (r < 1) { + dev_err(adev->dev, "wait for kiq fence error: %ld.\n", r); + r = -ETIME; + goto error_unlock_reset; + } + r = 0; + +error_unlock_reset: + up_read(&adev->reset_domain->sem); + return r; +} + /** * amdgpu_gmc_tmz_set -- check and set if a device supports TMZ * @adev: amdgpu_device pointer @@ -584,7 +755,7 @@ int amdgpu_gmc_allocate_vm_inv_eng(struct amdgpu_device *adev) */ void amdgpu_gmc_tmz_set(struct amdgpu_device *adev) { - switch (adev->ip_versions[GC_HWIP][0]) { + switch (amdgpu_ip_version(adev, GC_HWIP, 0)) { /* RAVEN */ case IP_VERSION(9, 2, 2): case IP_VERSION(9, 1, 0): @@ -618,6 +789,7 @@ void amdgpu_gmc_tmz_set(struct amdgpu_device *adev) /* YELLOW_CARP*/ case IP_VERSION(10, 3, 3): case IP_VERSION(11, 0, 4): + case IP_VERSION(11, 5, 0): /* Don't enable it by default yet. */ if (amdgpu_tmz < 1) { @@ -648,7 +820,7 @@ void amdgpu_gmc_tmz_set(struct amdgpu_device *adev) void amdgpu_gmc_noretry_set(struct amdgpu_device *adev) { struct amdgpu_gmc *gmc = &adev->gmc; - uint32_t gc_ver = adev->ip_versions[GC_HWIP][0]; + uint32_t gc_ver = amdgpu_ip_version(adev, GC_HWIP, 0); bool noretry_default = (gc_ver == IP_VERSION(9, 0, 1) || gc_ver == IP_VERSION(9, 3, 0) || gc_ver == IP_VERSION(9, 4, 0) || @@ -657,7 +829,10 @@ void amdgpu_gmc_noretry_set(struct amdgpu_device *adev) gc_ver == IP_VERSION(9, 4, 3) || gc_ver >= IP_VERSION(10, 3, 0)); - gmc->noretry = (amdgpu_noretry == -1) ? noretry_default : amdgpu_noretry; + if (!amdgpu_sriov_xnack_support(adev)) + gmc->noretry = 1; + else + gmc->noretry = (amdgpu_noretry == -1) ? noretry_default : amdgpu_noretry; } void amdgpu_gmc_set_vm_fault_masks(struct amdgpu_device *adev, int hub_type, @@ -721,12 +896,6 @@ void amdgpu_gmc_get_vbios_allocations(struct amdgpu_device *adev) case CHIP_RENOIR: adev->mman.keep_stolen_vga_memory = true; break; - case CHIP_YELLOW_CARP: - if (amdgpu_discovery == 0) { - adev->mman.stolen_reserved_offset = 0x1ffb0000; - adev->mman.stolen_reserved_size = 64 * PAGE_SIZE; - } - break; default: adev->mman.keep_stolen_vga_memory = false; break; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.h index 56d73fade568..e699d1ca8deb 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.h @@ -117,6 +117,8 @@ struct amdgpu_vmhub { uint32_t vm_contexts_disable; + bool sdma_invalidation_workaround; + const struct amdgpu_vmhub_funcs *vmhub_funcs; }; @@ -128,9 +130,9 @@ struct amdgpu_gmc_funcs { void (*flush_gpu_tlb)(struct amdgpu_device *adev, uint32_t vmid, uint32_t vmhub, uint32_t flush_type); /* flush the vm tlb via pasid */ - int (*flush_gpu_tlb_pasid)(struct amdgpu_device *adev, uint16_t pasid, - uint32_t flush_type, bool all_hub, - uint32_t inst); + void (*flush_gpu_tlb_pasid)(struct amdgpu_device *adev, uint16_t pasid, + uint32_t flush_type, bool all_hub, + uint32_t inst); /* flush the vm tlb via ring */ uint64_t (*emit_flush_gpu_tlb)(struct amdgpu_ring *ring, unsigned vmid, uint64_t pd_addr); @@ -197,6 +199,12 @@ struct amdgpu_mem_partition_info { #define INVALID_PFN -1 +enum amdgpu_gart_placement { + AMDGPU_GART_PLACEMENT_BEST_FIT = 0, + AMDGPU_GART_PLACEMENT_HIGH, + AMDGPU_GART_PLACEMENT_LOW, +}; + struct amdgpu_gmc { /* FB's physical address in MMIO space (for CPU to * map FB). This is different compared to the agp/ @@ -331,12 +339,14 @@ struct amdgpu_gmc { u64 VM_CONTEXT_PAGE_TABLE_END_ADDR_LO32[16]; u64 VM_CONTEXT_PAGE_TABLE_END_ADDR_HI32[16]; u64 MC_VM_MX_L1_TLB_CNTL; + + u64 noretry_flags; + + bool flush_tlb_needs_extra_type_0; + bool flush_tlb_needs_extra_type_2; + bool flush_pasid_uses_kiq; }; -#define amdgpu_gmc_flush_gpu_tlb(adev, vmid, vmhub, type) ((adev)->gmc.gmc_funcs->flush_gpu_tlb((adev), (vmid), (vmhub), (type))) -#define amdgpu_gmc_flush_gpu_tlb_pasid(adev, pasid, type, allhub, inst) \ - ((adev)->gmc.gmc_funcs->flush_gpu_tlb_pasid \ - ((adev), (pasid), (type), (allhub), (inst))) #define amdgpu_gmc_emit_flush_gpu_tlb(r, vmid, addr) (r)->adev->gmc.gmc_funcs->emit_flush_gpu_tlb((r), (vmid), (addr)) #define amdgpu_gmc_emit_pasid_mapping(r, vmid, pasid) (r)->adev->gmc.gmc_funcs->emit_pasid_mapping((r), (vmid), (pasid)) #define amdgpu_gmc_map_mtype(adev, flags) (adev)->gmc.gmc_funcs->map_mtype((adev),(flags)) @@ -387,9 +397,12 @@ void amdgpu_gmc_sysvm_location(struct amdgpu_device *adev, struct amdgpu_gmc *mc void amdgpu_gmc_vram_location(struct amdgpu_device *adev, struct amdgpu_gmc *mc, u64 base); void amdgpu_gmc_gart_location(struct amdgpu_device *adev, - struct amdgpu_gmc *mc); + struct amdgpu_gmc *mc, + enum amdgpu_gart_placement gart_placement); void amdgpu_gmc_agp_location(struct amdgpu_device *adev, struct amdgpu_gmc *mc); +void amdgpu_gmc_set_agp_default(struct amdgpu_device *adev, + struct amdgpu_gmc *mc); bool amdgpu_gmc_filter_faults(struct amdgpu_device *adev, struct amdgpu_ih_ring *ih, uint64_t addr, uint16_t pasid, uint64_t timestamp); @@ -399,6 +412,11 @@ int amdgpu_gmc_ras_sw_init(struct amdgpu_device *adev); int amdgpu_gmc_ras_late_init(struct amdgpu_device *adev); void amdgpu_gmc_ras_fini(struct amdgpu_device *adev); int amdgpu_gmc_allocate_vm_inv_eng(struct amdgpu_device *adev); +void amdgpu_gmc_flush_gpu_tlb(struct amdgpu_device *adev, uint32_t vmid, + uint32_t vmhub, uint32_t flush_type); +int amdgpu_gmc_flush_gpu_tlb_pasid(struct amdgpu_device *adev, uint16_t pasid, + uint32_t flush_type, bool all_hub, + uint32_t inst); extern void amdgpu_gmc_tmz_set(struct amdgpu_device *adev); extern void amdgpu_gmc_noretry_set(struct amdgpu_device *adev); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c index ebeddc9a37e9..6aa3b1d845ab 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c @@ -62,7 +62,7 @@ * Returns 0 on success, error on failure. */ int amdgpu_ib_get(struct amdgpu_device *adev, struct amdgpu_vm *vm, - unsigned size, enum amdgpu_ib_pool_type pool_type, + unsigned int size, enum amdgpu_ib_pool_type pool_type, struct amdgpu_ib *ib) { int r; @@ -123,7 +123,7 @@ void amdgpu_ib_free(struct amdgpu_device *adev, struct amdgpu_ib *ib, * a CONST_IB), it will be put on the ring prior to the DE IB. Prior * to SI there was just a DE IB. */ -int amdgpu_ib_schedule(struct amdgpu_ring *ring, unsigned num_ibs, +int amdgpu_ib_schedule(struct amdgpu_ring *ring, unsigned int num_ibs, struct amdgpu_ib *ibs, struct amdgpu_job *job, struct dma_fence **f) { @@ -131,16 +131,16 @@ int amdgpu_ib_schedule(struct amdgpu_ring *ring, unsigned num_ibs, struct amdgpu_ib *ib = &ibs[0]; struct dma_fence *tmp = NULL; bool need_ctx_switch; - unsigned patch_offset = ~0; + unsigned int patch_offset = ~0; struct amdgpu_vm *vm; uint64_t fence_ctx; uint32_t status = 0, alloc_size; - unsigned fence_flags = 0; + unsigned int fence_flags = 0; bool secure, init_shadow; u64 shadow_va, csa_va, gds_va; int vmid = AMDGPU_JOB_GET_VMID(job); - unsigned i; + unsigned int i; int r = 0; bool need_pipe_sync = false; @@ -282,7 +282,7 @@ int amdgpu_ib_schedule(struct amdgpu_ring *ring, unsigned num_ibs, amdgpu_ring_emit_gfx_shadow(ring, 0, 0, 0, false, 0); if (ring->funcs->init_cond_exec) { - unsigned ce_offset = ~0; + unsigned int ce_offset = ~0; ce_offset = amdgpu_ring_init_cond_exec(ring); if (ce_offset != ~0 && ring->funcs->patch_cond_exec) @@ -385,7 +385,7 @@ int amdgpu_ib_ring_tests(struct amdgpu_device *adev) { long tmo_gfx, tmo_mm; int r, ret = 0; - unsigned i; + unsigned int i; tmo_mm = tmo_gfx = AMDGPU_IB_TEST_TIMEOUT; if (amdgpu_sriov_vf(adev)) { @@ -402,7 +402,7 @@ int amdgpu_ib_ring_tests(struct amdgpu_device *adev) /* for CP & SDMA engines since they are scheduled together so * need to make the timeout width enough to cover the time * cost waiting for it coming back under RUNTIME only - */ + */ tmo_gfx = 8 * AMDGPU_IB_TEST_TIMEOUT; } else if (adev->gmc.xgmi.hive_id) { tmo_gfx = AMDGPU_IB_TEST_GFX_XGMI_TIMEOUT; @@ -465,13 +465,13 @@ static int amdgpu_debugfs_sa_info_show(struct seq_file *m, void *unused) { struct amdgpu_device *adev = m->private; - seq_printf(m, "--------------------- DELAYED --------------------- \n"); + seq_puts(m, "--------------------- DELAYED ---------------------\n"); amdgpu_sa_bo_dump_debug_info(&adev->ib_pools[AMDGPU_IB_POOL_DELAYED], m); - seq_printf(m, "-------------------- IMMEDIATE -------------------- \n"); + seq_puts(m, "-------------------- IMMEDIATE --------------------\n"); amdgpu_sa_bo_dump_debug_info(&adev->ib_pools[AMDGPU_IB_POOL_IMMEDIATE], m); - seq_printf(m, "--------------------- DIRECT ---------------------- \n"); + seq_puts(m, "--------------------- DIRECT ----------------------\n"); amdgpu_sa_bo_dump_debug_info(&adev->ib_pools[AMDGPU_IB_POOL_DIRECT], m); return 0; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ids.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ids.c index ff1ea99292fb..ddd0891da116 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ids.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ids.c @@ -188,7 +188,6 @@ static bool amdgpu_vmid_compatible(struct amdgpu_vmid *id, /** * amdgpu_vmid_grab_idle - grab idle VMID * - * @vm: vm to allocate id for * @ring: ring we want to submit job to * @idle: resulting idle VMID * @fence: fence to wait for if no id could be grabbed @@ -196,8 +195,7 @@ static bool amdgpu_vmid_compatible(struct amdgpu_vmid *id, * Try to find an idle VMID, if none is idle add a fence to wait to the sync * object. Returns -ENOMEM when we are out of memory. */ -static int amdgpu_vmid_grab_idle(struct amdgpu_vm *vm, - struct amdgpu_ring *ring, +static int amdgpu_vmid_grab_idle(struct amdgpu_ring *ring, struct amdgpu_vmid **idle, struct dma_fence **fence) { @@ -405,7 +403,7 @@ int amdgpu_vmid_grab(struct amdgpu_vm *vm, struct amdgpu_ring *ring, int r = 0; mutex_lock(&id_mgr->lock); - r = amdgpu_vmid_grab_idle(vm, ring, &idle, fence); + r = amdgpu_vmid_grab_idle(ring, &idle, fence); if (r || !idle) goto error; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ih.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ih.c index fceb3b384955..f3b0aaf3ebc6 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ih.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ih.c @@ -138,6 +138,7 @@ void amdgpu_ih_ring_fini(struct amdgpu_device *adev, struct amdgpu_ih_ring *ih) /** * amdgpu_ih_ring_write - write IV to the ring buffer * + * @adev: amdgpu_device pointer * @ih: ih ring to write to * @iv: the iv to write * @num_dw: size of the iv in dw @@ -145,8 +146,8 @@ void amdgpu_ih_ring_fini(struct amdgpu_device *adev, struct amdgpu_ih_ring *ih) * Writes an IV to the ring buffer using the CPU and increment the wptr. * Used for testing and delegating IVs to a software ring. */ -void amdgpu_ih_ring_write(struct amdgpu_ih_ring *ih, const uint32_t *iv, - unsigned int num_dw) +void amdgpu_ih_ring_write(struct amdgpu_device *adev, struct amdgpu_ih_ring *ih, + const uint32_t *iv, unsigned int num_dw) { uint32_t wptr = le32_to_cpu(*ih->wptr_cpu) >> 2; unsigned int i; @@ -161,6 +162,9 @@ void amdgpu_ih_ring_write(struct amdgpu_ih_ring *ih, const uint32_t *iv, if (wptr != READ_ONCE(ih->rptr)) { wmb(); WRITE_ONCE(*ih->wptr_cpu, cpu_to_le32(wptr)); + } else if (adev->irq.retry_cam_enabled) { + dev_warn_once(adev->dev, "IH soft ring buffer overflow 0x%X, 0x%X\n", + wptr, ih->rptr); } } diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ih.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_ih.h index dd1c2eded6b9..508f02eb0cf8 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ih.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ih.h @@ -27,6 +27,9 @@ /* Maximum number of IVs processed at once */ #define AMDGPU_IH_MAX_NUM_IVS 32 +#define IH_RING_SIZE (256 * 1024) +#define IH_SW_RING_SIZE (16 * 1024) /* enough for 512 CAM entries */ + struct amdgpu_device; struct amdgpu_iv_entry; @@ -97,8 +100,8 @@ struct amdgpu_ih_funcs { int amdgpu_ih_ring_init(struct amdgpu_device *adev, struct amdgpu_ih_ring *ih, unsigned ring_size, bool use_bus_addr); void amdgpu_ih_ring_fini(struct amdgpu_device *adev, struct amdgpu_ih_ring *ih); -void amdgpu_ih_ring_write(struct amdgpu_ih_ring *ih, const uint32_t *iv, - unsigned int num_dw); +void amdgpu_ih_ring_write(struct amdgpu_device *adev, struct amdgpu_ih_ring *ih, + const uint32_t *iv, unsigned int num_dw); int amdgpu_ih_wait_on_checkpoint_process_ts(struct amdgpu_device *adev, struct amdgpu_ih_ring *ih); int amdgpu_ih_process(struct amdgpu_device *adev, struct amdgpu_ih_ring *ih); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_irq.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_irq.c index 5273decc5753..7e6d09730e6d 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_irq.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_irq.c @@ -270,29 +270,29 @@ static void amdgpu_restore_msix(struct amdgpu_device *adev) */ int amdgpu_irq_init(struct amdgpu_device *adev) { - int r = 0; - unsigned int irq; + unsigned int irq, flags; + int r; spin_lock_init(&adev->irq.lock); /* Enable MSI if not disabled by module parameter */ adev->irq.msi_enabled = false; + if (!amdgpu_msi_ok(adev)) + flags = PCI_IRQ_LEGACY; + else + flags = PCI_IRQ_ALL_TYPES; + + /* we only need one vector */ + r = pci_alloc_irq_vectors(adev->pdev, 1, 1, flags); + if (r < 0) { + dev_err(adev->dev, "Failed to alloc msi vectors\n"); + return r; + } + if (amdgpu_msi_ok(adev)) { - int nvec = pci_msix_vec_count(adev->pdev); - unsigned int flags; - - if (nvec <= 0) - flags = PCI_IRQ_MSI; - else - flags = PCI_IRQ_MSI | PCI_IRQ_MSIX; - - /* we only need one vector */ - nvec = pci_alloc_irq_vectors(adev->pdev, 1, 1, flags); - if (nvec > 0) { - adev->irq.msi_enabled = true; - dev_dbg(adev->dev, "using MSI/MSI-X.\n"); - } + adev->irq.msi_enabled = true; + dev_dbg(adev->dev, "using MSI/MSI-X.\n"); } INIT_WORK(&adev->irq.ih1_work, amdgpu_irq_handle_ih1); @@ -302,22 +302,29 @@ int amdgpu_irq_init(struct amdgpu_device *adev) /* Use vector 0 for MSI-X. */ r = pci_irq_vector(adev->pdev, 0); if (r < 0) - return r; + goto free_vectors; irq = r; /* PCI devices require shared interrupts. */ r = request_irq(irq, amdgpu_irq_handler, IRQF_SHARED, adev_to_drm(adev)->driver->name, adev_to_drm(adev)); if (r) - return r; + goto free_vectors; + adev->irq.installed = true; adev->irq.irq = irq; adev_to_drm(adev)->max_vblank_count = 0x00ffffff; DRM_DEBUG("amdgpu: irq initialized.\n"); return 0; -} +free_vectors: + if (adev->irq.msi_enabled) + pci_free_irq_vectors(adev->pdev); + + adev->irq.msi_enabled = false; + return r; +} void amdgpu_irq_fini_hw(struct amdgpu_device *adev) { @@ -493,7 +500,7 @@ void amdgpu_irq_delegate(struct amdgpu_device *adev, struct amdgpu_iv_entry *entry, unsigned int num_dw) { - amdgpu_ih_ring_write(&adev->irq.ih_soft, entry->iv_entry, num_dw); + amdgpu_ih_ring_write(adev, &adev->irq.ih_soft, entry->iv_entry, num_dw); schedule_work(&adev->irq.ih_soft_work); } diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c index 78476bc75b4e..1f357198533f 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c @@ -325,8 +325,8 @@ void amdgpu_job_stop_all_jobs_on_sched(struct drm_gpu_scheduler *sched) int i; /* Signal all jobs not yet scheduled */ - for (i = DRM_SCHED_PRIORITY_COUNT - 1; i >= DRM_SCHED_PRIORITY_MIN; i--) { - struct drm_sched_rq *rq = &sched->sched_rq[i]; + for (i = sched->num_rqs - 1; i >= DRM_SCHED_PRIORITY_MIN; i--) { + struct drm_sched_rq *rq = sched->sched_rq[i]; spin_lock(&rq->lock); list_for_each_entry(s_entity, &rq->entities, list) { while ((s_job = to_drm_sched_job(spsc_queue_pop(&s_entity->job_queue)))) { diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c index 12414a713256..583cf03950cd 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c @@ -200,6 +200,44 @@ out: return r; } +static enum amd_ip_block_type + amdgpu_ip_get_block_type(struct amdgpu_device *adev, uint32_t ip) +{ + enum amd_ip_block_type type; + + switch (ip) { + case AMDGPU_HW_IP_GFX: + type = AMD_IP_BLOCK_TYPE_GFX; + break; + case AMDGPU_HW_IP_COMPUTE: + type = AMD_IP_BLOCK_TYPE_GFX; + break; + case AMDGPU_HW_IP_DMA: + type = AMD_IP_BLOCK_TYPE_SDMA; + break; + case AMDGPU_HW_IP_UVD: + case AMDGPU_HW_IP_UVD_ENC: + type = AMD_IP_BLOCK_TYPE_UVD; + break; + case AMDGPU_HW_IP_VCE: + type = AMD_IP_BLOCK_TYPE_VCE; + break; + case AMDGPU_HW_IP_VCN_DEC: + case AMDGPU_HW_IP_VCN_ENC: + type = AMD_IP_BLOCK_TYPE_VCN; + break; + case AMDGPU_HW_IP_VCN_JPEG: + type = (amdgpu_device_ip_get_ip_block(adev, AMD_IP_BLOCK_TYPE_JPEG)) ? + AMD_IP_BLOCK_TYPE_JPEG : AMD_IP_BLOCK_TYPE_VCN; + break; + default: + type = AMD_IP_BLOCK_TYPE_NUM; + break; + } + + return type; +} + static int amdgpu_firmware_info(struct drm_amdgpu_info_firmware *fw_info, struct drm_amdgpu_query_fw *query_fw, struct amdgpu_device *adev) @@ -352,6 +390,10 @@ static int amdgpu_firmware_info(struct drm_amdgpu_info_firmware *fw_info, fw_info->ver = adev->gfx.imu_fw_version; fw_info->feature = 0; break; + case AMDGPU_INFO_FW_VPE: + fw_info->ver = adev->vpe.fw_version; + fw_info->feature = adev->vpe.feature_version; + break; default: return -EINVAL; } @@ -405,7 +447,7 @@ static int amdgpu_hw_ip_info(struct amdgpu_device *adev, if (adev->uvd.inst[i].ring.sched.ready) ++num_rings; } - ib_start_alignment = 64; + ib_start_alignment = 256; ib_size_alignment = 64; break; case AMDGPU_HW_IP_VCE: @@ -413,8 +455,8 @@ static int amdgpu_hw_ip_info(struct amdgpu_device *adev, for (i = 0; i < adev->vce.num_rings; i++) if (adev->vce.ring[i].sched.ready) ++num_rings; - ib_start_alignment = 4; - ib_size_alignment = 1; + ib_start_alignment = 256; + ib_size_alignment = 4; break; case AMDGPU_HW_IP_UVD_ENC: type = AMD_IP_BLOCK_TYPE_UVD; @@ -426,8 +468,8 @@ static int amdgpu_hw_ip_info(struct amdgpu_device *adev, if (adev->uvd.inst[i].ring_enc[j].sched.ready) ++num_rings; } - ib_start_alignment = 64; - ib_size_alignment = 64; + ib_start_alignment = 256; + ib_size_alignment = 4; break; case AMDGPU_HW_IP_VCN_DEC: type = AMD_IP_BLOCK_TYPE_VCN; @@ -438,8 +480,8 @@ static int amdgpu_hw_ip_info(struct amdgpu_device *adev, if (adev->vcn.inst[i].ring_dec.sched.ready) ++num_rings; } - ib_start_alignment = 16; - ib_size_alignment = 16; + ib_start_alignment = 256; + ib_size_alignment = 64; break; case AMDGPU_HW_IP_VCN_ENC: type = AMD_IP_BLOCK_TYPE_VCN; @@ -451,8 +493,8 @@ static int amdgpu_hw_ip_info(struct amdgpu_device *adev, if (adev->vcn.inst[i].ring_enc[j].sched.ready) ++num_rings; } - ib_start_alignment = 64; - ib_size_alignment = 1; + ib_start_alignment = 256; + ib_size_alignment = 4; break; case AMDGPU_HW_IP_VCN_JPEG: type = (amdgpu_device_ip_get_ip_block(adev, AMD_IP_BLOCK_TYPE_JPEG)) ? @@ -466,8 +508,15 @@ static int amdgpu_hw_ip_info(struct amdgpu_device *adev, if (adev->jpeg.inst[i].ring_dec[j].sched.ready) ++num_rings; } - ib_start_alignment = 16; - ib_size_alignment = 16; + ib_start_alignment = 256; + ib_size_alignment = 64; + break; + case AMDGPU_HW_IP_VPE: + type = AMD_IP_BLOCK_TYPE_VPE; + if (adev->vpe.ring.sched.ready) + ++num_rings; + ib_start_alignment = 256; + ib_size_alignment = 4; break; default: return -EINVAL; @@ -490,18 +539,26 @@ static int amdgpu_hw_ip_info(struct amdgpu_device *adev, if (adev->asic_type >= CHIP_VEGA10) { switch (type) { case AMD_IP_BLOCK_TYPE_GFX: - result->ip_discovery_version = adev->ip_versions[GC_HWIP][0]; + result->ip_discovery_version = + IP_VERSION_MAJ_MIN_REV(amdgpu_ip_version(adev, GC_HWIP, 0)); break; case AMD_IP_BLOCK_TYPE_SDMA: - result->ip_discovery_version = adev->ip_versions[SDMA0_HWIP][0]; + result->ip_discovery_version = + IP_VERSION_MAJ_MIN_REV(amdgpu_ip_version(adev, SDMA0_HWIP, 0)); break; case AMD_IP_BLOCK_TYPE_UVD: case AMD_IP_BLOCK_TYPE_VCN: case AMD_IP_BLOCK_TYPE_JPEG: - result->ip_discovery_version = adev->ip_versions[UVD_HWIP][0]; + result->ip_discovery_version = + IP_VERSION_MAJ_MIN_REV(amdgpu_ip_version(adev, UVD_HWIP, 0)); break; case AMD_IP_BLOCK_TYPE_VCE: - result->ip_discovery_version = adev->ip_versions[VCE_HWIP][0]; + result->ip_discovery_version = + IP_VERSION_MAJ_MIN_REV(amdgpu_ip_version(adev, VCE_HWIP, 0)); + break; + case AMD_IP_BLOCK_TYPE_VPE: + result->ip_discovery_version = + IP_VERSION_MAJ_MIN_REV(amdgpu_ip_version(adev, VPE_HWIP, 0)); break; default: result->ip_discovery_version = 0; @@ -538,11 +595,16 @@ int amdgpu_info_ioctl(struct drm_device *dev, void *data, struct drm_file *filp) struct drm_amdgpu_info *info = data; struct amdgpu_mode_info *minfo = &adev->mode_info; void __user *out = (void __user *)(uintptr_t)info->return_pointer; + struct amdgpu_fpriv *fpriv; + struct amdgpu_ip_block *ip_block; + enum amd_ip_block_type type; + struct amdgpu_xcp *xcp; + u32 count, inst_mask; uint32_t size = info->return_size; struct drm_crtc *crtc; uint32_t ui32 = 0; uint64_t ui64 = 0; - int i, found; + int i, found, ret; int ui32_size = sizeof(ui32); if (!info->return_size || !info->return_pointer) @@ -557,6 +619,7 @@ int amdgpu_info_ioctl(struct drm_device *dev, void *data, struct drm_file *filp) crtc = (struct drm_crtc *)minfo->crtcs[i]; if (crtc && crtc->base.id == info->mode_crtc.id) { struct amdgpu_crtc *amdgpu_crtc = to_amdgpu_crtc(crtc); + ui32 = amdgpu_crtc->crtc_id; found = 1; break; @@ -569,56 +632,75 @@ int amdgpu_info_ioctl(struct drm_device *dev, void *data, struct drm_file *filp) return copy_to_user(out, &ui32, min(size, 4u)) ? -EFAULT : 0; case AMDGPU_INFO_HW_IP_INFO: { struct drm_amdgpu_info_hw_ip ip = {}; - int ret; ret = amdgpu_hw_ip_info(adev, info, &ip); if (ret) return ret; - ret = copy_to_user(out, &ip, min((size_t)size, sizeof(ip))); + ret = copy_to_user(out, &ip, min_t(size_t, size, sizeof(ip))); return ret ? -EFAULT : 0; } case AMDGPU_INFO_HW_IP_COUNT: { - enum amd_ip_block_type type; - uint32_t count = 0; + fpriv = (struct amdgpu_fpriv *)filp->driver_priv; + type = amdgpu_ip_get_block_type(adev, info->query_hw_ip.type); + ip_block = amdgpu_device_ip_get_ip_block(adev, type); - switch (info->query_hw_ip.type) { - case AMDGPU_HW_IP_GFX: - type = AMD_IP_BLOCK_TYPE_GFX; - break; - case AMDGPU_HW_IP_COMPUTE: - type = AMD_IP_BLOCK_TYPE_GFX; - break; - case AMDGPU_HW_IP_DMA: - type = AMD_IP_BLOCK_TYPE_SDMA; - break; - case AMDGPU_HW_IP_UVD: - type = AMD_IP_BLOCK_TYPE_UVD; + if (!ip_block || !ip_block->status.valid) + return -EINVAL; + + if (adev->xcp_mgr && adev->xcp_mgr->num_xcps > 0 && + fpriv->xcp_id >= 0 && fpriv->xcp_id < adev->xcp_mgr->num_xcps) { + xcp = &adev->xcp_mgr->xcp[fpriv->xcp_id]; + switch (type) { + case AMD_IP_BLOCK_TYPE_GFX: + ret = amdgpu_xcp_get_inst_details(xcp, AMDGPU_XCP_GFX, &inst_mask); + count = hweight32(inst_mask); + break; + case AMD_IP_BLOCK_TYPE_SDMA: + ret = amdgpu_xcp_get_inst_details(xcp, AMDGPU_XCP_SDMA, &inst_mask); + count = hweight32(inst_mask); + break; + case AMD_IP_BLOCK_TYPE_JPEG: + ret = amdgpu_xcp_get_inst_details(xcp, AMDGPU_XCP_VCN, &inst_mask); + count = hweight32(inst_mask) * adev->jpeg.num_jpeg_rings; + break; + case AMD_IP_BLOCK_TYPE_VCN: + ret = amdgpu_xcp_get_inst_details(xcp, AMDGPU_XCP_VCN, &inst_mask); + count = hweight32(inst_mask); + break; + default: + return -EINVAL; + } + if (ret) + return ret; + return copy_to_user(out, &count, min(size, 4u)) ? -EFAULT : 0; + } + + switch (type) { + case AMD_IP_BLOCK_TYPE_GFX: + case AMD_IP_BLOCK_TYPE_VCE: + count = 1; break; - case AMDGPU_HW_IP_VCE: - type = AMD_IP_BLOCK_TYPE_VCE; + case AMD_IP_BLOCK_TYPE_SDMA: + count = adev->sdma.num_instances; break; - case AMDGPU_HW_IP_UVD_ENC: - type = AMD_IP_BLOCK_TYPE_UVD; + case AMD_IP_BLOCK_TYPE_JPEG: + count = adev->jpeg.num_jpeg_inst * adev->jpeg.num_jpeg_rings; break; - case AMDGPU_HW_IP_VCN_DEC: - case AMDGPU_HW_IP_VCN_ENC: - type = AMD_IP_BLOCK_TYPE_VCN; + case AMD_IP_BLOCK_TYPE_VCN: + count = adev->vcn.num_vcn_inst; break; - case AMDGPU_HW_IP_VCN_JPEG: - type = (amdgpu_device_ip_get_ip_block(adev, AMD_IP_BLOCK_TYPE_JPEG)) ? - AMD_IP_BLOCK_TYPE_JPEG : AMD_IP_BLOCK_TYPE_VCN; + case AMD_IP_BLOCK_TYPE_UVD: + count = adev->uvd.num_uvd_inst; break; + /* For all other IP block types not listed in the switch statement + * the ip status is valid here and the instance count is one. + */ default: - return -EINVAL; + count = 1; + break; } - for (i = 0; i < adev->num_ip_blocks; i++) - if (adev->ip_blocks[i].version->type == type && - adev->ip_blocks[i].status.valid && - count < AMDGPU_HW_IP_INSTANCE_MAX_COUNT) - count++; - return copy_to_user(out, &count, min(size, 4u)) ? -EFAULT : 0; } case AMDGPU_INFO_TIMESTAMP: @@ -626,7 +708,6 @@ int amdgpu_info_ioctl(struct drm_device *dev, void *data, struct drm_file *filp) return copy_to_user(out, &ui64, min(size, 8u)) ? -EFAULT : 0; case AMDGPU_INFO_FW_VERSION: { struct drm_amdgpu_info_firmware fw_info; - int ret; /* We only support one instance of each IP block right now. */ if (info->query_fw.ip_instance != 0) @@ -721,17 +802,18 @@ int amdgpu_info_ioctl(struct drm_device *dev, void *data, struct drm_file *filp) ? -EFAULT : 0; } case AMDGPU_INFO_READ_MMR_REG: { - unsigned n, alloc_size; + unsigned int n, alloc_size; uint32_t *regs; - unsigned se_num = (info->read_mmr_reg.instance >> + unsigned int se_num = (info->read_mmr_reg.instance >> AMDGPU_INFO_MMR_SE_INDEX_SHIFT) & AMDGPU_INFO_MMR_SE_INDEX_MASK; - unsigned sh_num = (info->read_mmr_reg.instance >> + unsigned int sh_num = (info->read_mmr_reg.instance >> AMDGPU_INFO_MMR_SH_INDEX_SHIFT) & AMDGPU_INFO_MMR_SH_INDEX_MASK; /* set full masks if the userspace set all bits - * in the bitfields */ + * in the bitfields + */ if (se_num == AMDGPU_INFO_MMR_SE_INDEX_MASK) se_num = 0xffffffff; else if (se_num >= AMDGPU_GFX_MAX_SE) @@ -770,7 +852,6 @@ int amdgpu_info_ioctl(struct drm_device *dev, void *data, struct drm_file *filp) struct drm_amdgpu_info_device *dev_info; uint64_t vm_size; uint32_t pcie_gen_mask; - int ret; dev_info = kzalloc(sizeof(*dev_info), GFP_KERNEL); if (!dev_info) @@ -837,7 +918,7 @@ int amdgpu_info_ioctl(struct drm_device *dev, void *data, struct drm_file *filp) memcpy(&dev_info->cu_ao_bitmap[0], &adev->gfx.cu_info.ao_cu_bitmap[0], sizeof(adev->gfx.cu_info.ao_cu_bitmap)); memcpy(&dev_info->cu_bitmap[0], &adev->gfx.cu_info.bitmap[0], - sizeof(adev->gfx.cu_info.bitmap)); + sizeof(dev_info->cu_bitmap)); dev_info->vram_type = adev->gmc.vram_type; dev_info->vram_bit_width = adev->gmc.vram_width; dev_info->vce_harvest_config = adev->vce.harvest_config; @@ -896,7 +977,7 @@ int amdgpu_info_ioctl(struct drm_device *dev, void *data, struct drm_file *filp) return ret; } case AMDGPU_INFO_VCE_CLOCK_TABLE: { - unsigned i; + unsigned int i; struct drm_amdgpu_info_vce_clock_table vce_clk_table = {}; struct amd_vce_state *vce_state; @@ -938,12 +1019,17 @@ int amdgpu_info_ioctl(struct drm_device *dev, void *data, struct drm_file *filp) struct atom_context *atom_context; atom_context = adev->mode_info.atom_context; - memcpy(vbios_info.name, atom_context->name, sizeof(atom_context->name)); - memcpy(vbios_info.vbios_pn, atom_context->vbios_pn, sizeof(atom_context->vbios_pn)); - vbios_info.version = atom_context->version; - memcpy(vbios_info.vbios_ver_str, atom_context->vbios_ver_str, - sizeof(atom_context->vbios_ver_str)); - memcpy(vbios_info.date, atom_context->date, sizeof(atom_context->date)); + if (atom_context) { + memcpy(vbios_info.name, atom_context->name, + sizeof(atom_context->name)); + memcpy(vbios_info.vbios_pn, atom_context->vbios_pn, + sizeof(atom_context->vbios_pn)); + vbios_info.version = atom_context->version; + memcpy(vbios_info.vbios_ver_str, atom_context->vbios_ver_str, + sizeof(atom_context->vbios_ver_str)); + memcpy(vbios_info.date, atom_context->date, + sizeof(atom_context->date)); + } return copy_to_user(out, &vbios_info, min((size_t)size, sizeof(vbios_info))) ? -EFAULT : 0; @@ -1017,7 +1103,7 @@ int amdgpu_info_ioctl(struct drm_device *dev, void *data, struct drm_file *filp) case AMDGPU_INFO_SENSOR_GPU_AVG_POWER: /* get average GPU power */ if (amdgpu_dpm_read_sensor(adev, - AMDGPU_PP_SENSOR_GPU_POWER, + AMDGPU_PP_SENSOR_GPU_AVG_POWER, (void *)&ui32, &ui32_size)) { return -EINVAL; } @@ -1102,6 +1188,9 @@ int amdgpu_info_ioctl(struct drm_device *dev, void *data, struct drm_file *filp) struct drm_amdgpu_info_video_caps *caps; int r; + if (!adev->asic_funcs->query_video_codecs) + return -EINVAL; + switch (info->video_cap.type) { case AMDGPU_INFO_VIDEO_CAPS_DECODE: r = amdgpu_asic_query_video_codecs(adev, false, &codecs); @@ -1163,6 +1252,26 @@ int amdgpu_info_ioctl(struct drm_device *dev, void *data, struct drm_file *filp) return copy_to_user(out, max_ibs, min((size_t)size, sizeof(max_ibs))) ? -EFAULT : 0; } + case AMDGPU_INFO_GPUVM_FAULT: { + struct amdgpu_fpriv *fpriv = filp->driver_priv; + struct amdgpu_vm *vm = &fpriv->vm; + struct drm_amdgpu_info_gpuvm_fault gpuvm_fault; + unsigned long flags; + + if (!vm) + return -EINVAL; + + memset(&gpuvm_fault, 0, sizeof(gpuvm_fault)); + + xa_lock_irqsave(&adev->vm_manager.pasids, flags); + gpuvm_fault.addr = vm->fault_info.addr; + gpuvm_fault.status = vm->fault_info.status; + gpuvm_fault.vmhub = vm->fault_info.vmhub; + xa_unlock_irqrestore(&adev->vm_manager.pasids, flags); + + return copy_to_user(out, &gpuvm_fault, + min((size_t)size, sizeof(gpuvm_fault))) ? -EFAULT : 0; + } default: DRM_DEBUG_KMS("Invalid request %d\n", info->query); return -EINVAL; @@ -1719,7 +1828,15 @@ static int amdgpu_debugfs_firmware_info_show(struct seq_file *m, void *unused) seq_printf(m, "MES feature version: %u, firmware version: 0x%08x\n", fw_info.feature, fw_info.ver); - seq_printf(m, "VBIOS version: %s\n", ctx->vbios_version); + /* VPE */ + query_fw.fw_type = AMDGPU_INFO_FW_VPE; + ret = amdgpu_firmware_info(&fw_info, &query_fw, adev); + if (ret) + return ret; + seq_printf(m, "VPE feature version: %u, firmware version: 0x%08x\n", + fw_info.feature, fw_info.ver); + + seq_printf(m, "VBIOS version: %s\n", ctx->vbios_pn); return 0; } diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_mca.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_mca.c index 8d9ff9e151de..cf33eb219e25 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_mca.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_mca.c @@ -142,3 +142,355 @@ int amdgpu_mca_mpio_ras_sw_init(struct amdgpu_device *adev) return 0; } + +void amdgpu_mca_bank_set_init(struct mca_bank_set *mca_set) +{ + if (!mca_set) + return; + + memset(mca_set, 0, sizeof(*mca_set)); + INIT_LIST_HEAD(&mca_set->list); +} + +int amdgpu_mca_bank_set_add_entry(struct mca_bank_set *mca_set, struct mca_bank_entry *entry) +{ + struct mca_bank_node *node; + + if (!entry) + return -EINVAL; + + node = kvzalloc(sizeof(*node), GFP_KERNEL); + if (!node) + return -ENOMEM; + + memcpy(&node->entry, entry, sizeof(*entry)); + + INIT_LIST_HEAD(&node->node); + list_add_tail(&node->node, &mca_set->list); + + mca_set->nr_entries++; + + return 0; +} + +void amdgpu_mca_bank_set_release(struct mca_bank_set *mca_set) +{ + struct mca_bank_node *node, *tmp; + + list_for_each_entry_safe(node, tmp, &mca_set->list, node) { + list_del(&node->node); + kvfree(node); + } +} + +void amdgpu_mca_smu_init_funcs(struct amdgpu_device *adev, const struct amdgpu_mca_smu_funcs *mca_funcs) +{ + struct amdgpu_mca *mca = &adev->mca; + + mca->mca_funcs = mca_funcs; +} + +int amdgpu_mca_smu_set_debug_mode(struct amdgpu_device *adev, bool enable) +{ + const struct amdgpu_mca_smu_funcs *mca_funcs = adev->mca.mca_funcs; + + if (mca_funcs && mca_funcs->mca_set_debug_mode) + return mca_funcs->mca_set_debug_mode(adev, enable); + + return -EOPNOTSUPP; +} + +static void amdgpu_mca_smu_mca_bank_dump(struct amdgpu_device *adev, int idx, struct mca_bank_entry *entry) +{ + dev_info(adev->dev, "[Hardware error] Accelerator Check Architecture events logged\n"); + dev_info(adev->dev, "[Hardware error] aca entry[%02d].STATUS=0x%016llx\n", + idx, entry->regs[MCA_REG_IDX_STATUS]); + dev_info(adev->dev, "[Hardware error] aca entry[%02d].ADDR=0x%016llx\n", + idx, entry->regs[MCA_REG_IDX_ADDR]); + dev_info(adev->dev, "[Hardware error] aca entry[%02d].MISC0=0x%016llx\n", + idx, entry->regs[MCA_REG_IDX_MISC0]); + dev_info(adev->dev, "[Hardware error] aca entry[%02d].IPID=0x%016llx\n", + idx, entry->regs[MCA_REG_IDX_IPID]); + dev_info(adev->dev, "[Hardware error] aca entry[%02d].SYND=0x%016llx\n", + idx, entry->regs[MCA_REG_IDX_SYND]); +} + +int amdgpu_mca_smu_log_ras_error(struct amdgpu_device *adev, enum amdgpu_ras_block blk, enum amdgpu_mca_error_type type, struct ras_err_data *err_data) +{ + struct amdgpu_smuio_mcm_config_info mcm_info; + struct mca_bank_set mca_set; + struct mca_bank_node *node; + struct mca_bank_entry *entry; + uint32_t count; + int ret, i = 0; + + amdgpu_mca_bank_set_init(&mca_set); + + ret = amdgpu_mca_smu_get_mca_set(adev, blk, type, &mca_set); + if (ret) + goto out_mca_release; + + list_for_each_entry(node, &mca_set.list, node) { + entry = &node->entry; + + amdgpu_mca_smu_mca_bank_dump(adev, i++, entry); + + count = 0; + ret = amdgpu_mca_smu_parse_mca_error_count(adev, blk, type, entry, &count); + if (ret) + goto out_mca_release; + + if (!count) + continue; + + mcm_info.socket_id = entry->info.socket_id; + mcm_info.die_id = entry->info.aid; + + if (type == AMDGPU_MCA_ERROR_TYPE_UE) + amdgpu_ras_error_statistic_ue_count(err_data, &mcm_info, (uint64_t)count); + else + amdgpu_ras_error_statistic_ce_count(err_data, &mcm_info, (uint64_t)count); + } + +out_mca_release: + amdgpu_mca_bank_set_release(&mca_set); + + return ret; +} + + +int amdgpu_mca_smu_get_valid_mca_count(struct amdgpu_device *adev, enum amdgpu_mca_error_type type, uint32_t *count) +{ + const struct amdgpu_mca_smu_funcs *mca_funcs = adev->mca.mca_funcs; + + if (!count) + return -EINVAL; + + if (mca_funcs && mca_funcs->mca_get_valid_mca_count) + return mca_funcs->mca_get_valid_mca_count(adev, type, count); + + return -EOPNOTSUPP; +} + +int amdgpu_mca_smu_get_mca_set_error_count(struct amdgpu_device *adev, enum amdgpu_ras_block blk, + enum amdgpu_mca_error_type type, uint32_t *total) +{ + const struct amdgpu_mca_smu_funcs *mca_funcs = adev->mca.mca_funcs; + struct mca_bank_set mca_set; + struct mca_bank_node *node; + struct mca_bank_entry *entry; + uint32_t count; + int ret; + + if (!total) + return -EINVAL; + + if (!mca_funcs) + return -EOPNOTSUPP; + + if (!mca_funcs->mca_get_ras_mca_set || !mca_funcs->mca_get_valid_mca_count) + return -EOPNOTSUPP; + + amdgpu_mca_bank_set_init(&mca_set); + + ret = mca_funcs->mca_get_ras_mca_set(adev, blk, type, &mca_set); + if (ret) + goto err_mca_set_release; + + *total = 0; + list_for_each_entry(node, &mca_set.list, node) { + entry = &node->entry; + + count = 0; + ret = mca_funcs->mca_parse_mca_error_count(adev, blk, type, entry, &count); + if (ret) + goto err_mca_set_release; + + *total += count; + } + +err_mca_set_release: + amdgpu_mca_bank_set_release(&mca_set); + + return ret; +} + +int amdgpu_mca_smu_parse_mca_error_count(struct amdgpu_device *adev, enum amdgpu_ras_block blk, + enum amdgpu_mca_error_type type, struct mca_bank_entry *entry, uint32_t *count) +{ + const struct amdgpu_mca_smu_funcs *mca_funcs = adev->mca.mca_funcs; + if (!count || !entry) + return -EINVAL; + + if (!mca_funcs || !mca_funcs->mca_parse_mca_error_count) + return -EOPNOTSUPP; + + + return mca_funcs->mca_parse_mca_error_count(adev, blk, type, entry, count); +} + +int amdgpu_mca_smu_get_mca_set(struct amdgpu_device *adev, enum amdgpu_ras_block blk, + enum amdgpu_mca_error_type type, struct mca_bank_set *mca_set) +{ + const struct amdgpu_mca_smu_funcs *mca_funcs = adev->mca.mca_funcs; + + if (!mca_set) + return -EINVAL; + + if (!mca_funcs || !mca_funcs->mca_get_ras_mca_set) + return -EOPNOTSUPP; + + WARN_ON(!list_empty(&mca_set->list)); + + return mca_funcs->mca_get_ras_mca_set(adev, blk, type, mca_set); +} + +int amdgpu_mca_smu_get_mca_entry(struct amdgpu_device *adev, enum amdgpu_mca_error_type type, + int idx, struct mca_bank_entry *entry) +{ + const struct amdgpu_mca_smu_funcs *mca_funcs = adev->mca.mca_funcs; + int count; + + switch (type) { + case AMDGPU_MCA_ERROR_TYPE_UE: + count = mca_funcs->max_ue_count; + break; + case AMDGPU_MCA_ERROR_TYPE_CE: + count = mca_funcs->max_ce_count; + break; + default: + return -EINVAL; + } + + if (idx >= count) + return -EINVAL; + + if (mca_funcs && mca_funcs->mca_get_mca_entry) + return mca_funcs->mca_get_mca_entry(adev, type, idx, entry); + + return -EOPNOTSUPP; +} + +#if defined(CONFIG_DEBUG_FS) +static int amdgpu_mca_smu_debug_mode_set(void *data, u64 val) +{ + struct amdgpu_device *adev = (struct amdgpu_device *)data; + int ret; + + ret = amdgpu_mca_smu_set_debug_mode(adev, val ? true : false); + if (ret) + return ret; + + dev_info(adev->dev, "amdgpu set smu mca debug mode %s success\n", val ? "on" : "off"); + + return 0; +} + +static void mca_dump_entry(struct seq_file *m, struct mca_bank_entry *entry) +{ + int i, idx = entry->idx; + int reg_idx_array[] = { + MCA_REG_IDX_STATUS, + MCA_REG_IDX_ADDR, + MCA_REG_IDX_MISC0, + MCA_REG_IDX_IPID, + MCA_REG_IDX_SYND, + }; + + seq_printf(m, "mca entry[%d].type: %s\n", idx, entry->type == AMDGPU_MCA_ERROR_TYPE_UE ? "UE" : "CE"); + seq_printf(m, "mca entry[%d].ip: %d\n", idx, entry->ip); + seq_printf(m, "mca entry[%d].info: socketid:%d aid:%d hwid:0x%03x mcatype:0x%04x\n", + idx, entry->info.socket_id, entry->info.aid, entry->info.hwid, entry->info.mcatype); + + for (i = 0; i < ARRAY_SIZE(reg_idx_array); i++) + seq_printf(m, "mca entry[%d].regs[%d]: 0x%016llx\n", idx, reg_idx_array[i], entry->regs[reg_idx_array[i]]); +} + +static int mca_dump_show(struct seq_file *m, enum amdgpu_mca_error_type type) +{ + struct amdgpu_device *adev = (struct amdgpu_device *)m->private; + struct mca_bank_entry *entry; + uint32_t count = 0; + int i, ret; + + ret = amdgpu_mca_smu_get_valid_mca_count(adev, type, &count); + if (ret) + return ret; + + seq_printf(m, "amdgpu smu %s valid mca count: %d\n", + type == AMDGPU_MCA_ERROR_TYPE_UE ? "UE" : "CE", count); + + if (!count) + return 0; + + entry = kmalloc(sizeof(*entry), GFP_KERNEL); + if (!entry) + return -ENOMEM; + + for (i = 0; i < count; i++) { + memset(entry, 0, sizeof(*entry)); + + ret = amdgpu_mca_smu_get_mca_entry(adev, type, i, entry); + if (ret) + goto err_free_entry; + + mca_dump_entry(m, entry); + } + +err_free_entry: + kfree(entry); + + return ret; +} + +static int mca_dump_ce_show(struct seq_file *m, void *unused) +{ + return mca_dump_show(m, AMDGPU_MCA_ERROR_TYPE_CE); +} + +static int mca_dump_ce_open(struct inode *inode, struct file *file) +{ + return single_open(file, mca_dump_ce_show, inode->i_private); +} + +static const struct file_operations mca_ce_dump_debug_fops = { + .owner = THIS_MODULE, + .open = mca_dump_ce_open, + .read = seq_read, + .llseek = seq_lseek, + .release = single_release, +}; + +static int mca_dump_ue_show(struct seq_file *m, void *unused) +{ + return mca_dump_show(m, AMDGPU_MCA_ERROR_TYPE_UE); +} + +static int mca_dump_ue_open(struct inode *inode, struct file *file) +{ + return single_open(file, mca_dump_ue_show, inode->i_private); +} + +static const struct file_operations mca_ue_dump_debug_fops = { + .owner = THIS_MODULE, + .open = mca_dump_ue_open, + .read = seq_read, + .llseek = seq_lseek, + .release = single_release, +}; + +DEFINE_DEBUGFS_ATTRIBUTE(mca_debug_mode_fops, NULL, amdgpu_mca_smu_debug_mode_set, "%llu\n"); +#endif + +void amdgpu_mca_smu_debugfs_init(struct amdgpu_device *adev, struct dentry *root) +{ +#if defined(CONFIG_DEBUG_FS) + if (!root || adev->ip_versions[MP1_HWIP][0] != IP_VERSION(13, 0, 6)) + return; + + debugfs_create_file("mca_debug_mode", 0200, root, adev, &mca_debug_mode_fops); + debugfs_create_file("mca_ue_dump", 0400, root, adev, &mca_ue_dump_debug_fops); + debugfs_create_file("mca_ce_dump", 0400, root, adev, &mca_ce_dump_debug_fops); +#endif +} + diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_mca.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_mca.h index 997a073e2409..e51e8918e667 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_mca.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_mca.h @@ -21,6 +21,50 @@ #ifndef __AMDGPU_MCA_H__ #define __AMDGPU_MCA_H__ +#include "amdgpu_ras.h" + +#define MCA_MAX_REGS_COUNT (16) + +#define MCA_REG_FIELD(x, h, l) (((x) & GENMASK_ULL(h, l)) >> l) +#define MCA_REG__STATUS__VAL(x) MCA_REG_FIELD(x, 63, 63) +#define MCA_REG__STATUS__OVERFLOW(x) MCA_REG_FIELD(x, 62, 62) +#define MCA_REG__STATUS__UC(x) MCA_REG_FIELD(x, 61, 61) +#define MCA_REG__STATUS__EN(x) MCA_REG_FIELD(x, 60, 60) +#define MCA_REG__STATUS__MISCV(x) MCA_REG_FIELD(x, 59, 59) +#define MCA_REG__STATUS__ADDRV(x) MCA_REG_FIELD(x, 58, 58) +#define MCA_REG__STATUS__PCC(x) MCA_REG_FIELD(x, 57, 57) +#define MCA_REG__STATUS__ERRCOREIDVAL(x) MCA_REG_FIELD(x, 56, 56) +#define MCA_REG__STATUS__TCC(x) MCA_REG_FIELD(x, 55, 55) +#define MCA_REG__STATUS__SYNDV(x) MCA_REG_FIELD(x, 53, 53) +#define MCA_REG__STATUS__CECC(x) MCA_REG_FIELD(x, 46, 46) +#define MCA_REG__STATUS__UECC(x) MCA_REG_FIELD(x, 45, 45) +#define MCA_REG__STATUS__DEFERRED(x) MCA_REG_FIELD(x, 44, 44) +#define MCA_REG__STATUS__POISON(x) MCA_REG_FIELD(x, 43, 43) +#define MCA_REG__STATUS__SCRUB(x) MCA_REG_FIELD(x, 40, 40) +#define MCA_REG__STATUS__ERRCOREID(x) MCA_REG_FIELD(x, 37, 32) +#define MCA_REG__STATUS__ADDRLSB(x) MCA_REG_FIELD(x, 29, 24) +#define MCA_REG__STATUS__ERRORCODEEXT(x) MCA_REG_FIELD(x, 21, 16) +#define MCA_REG__STATUS__ERRORCODE(x) MCA_REG_FIELD(x, 15, 0) + +#define MCA_REG__SYND__ERRORINFORMATION(x) MCA_REG_FIELD(x, 17, 0) + +enum amdgpu_mca_ip { + AMDGPU_MCA_IP_UNKNOW = -1, + AMDGPU_MCA_IP_PSP = 0, + AMDGPU_MCA_IP_SDMA, + AMDGPU_MCA_IP_GC, + AMDGPU_MCA_IP_SMU, + AMDGPU_MCA_IP_MP5, + AMDGPU_MCA_IP_UMC, + AMDGPU_MCA_IP_PCS_XGMI, + AMDGPU_MCA_IP_COUNT, +}; + +enum amdgpu_mca_error_type { + AMDGPU_MCA_ERROR_TYPE_UE = 0, + AMDGPU_MCA_ERROR_TYPE_CE, +}; + struct amdgpu_mca_ras_block { struct amdgpu_ras_block_object ras_block; }; @@ -34,6 +78,55 @@ struct amdgpu_mca { struct amdgpu_mca_ras mp0; struct amdgpu_mca_ras mp1; struct amdgpu_mca_ras mpio; + const struct amdgpu_mca_smu_funcs *mca_funcs; +}; + +enum mca_reg_idx { + MCA_REG_IDX_STATUS = 1, + MCA_REG_IDX_ADDR = 2, + MCA_REG_IDX_MISC0 = 3, + MCA_REG_IDX_IPID = 5, + MCA_REG_IDX_SYND = 6, + MCA_REG_IDX_COUNT = 16, +}; + +struct mca_bank_info { + int socket_id; + int aid; + int hwid; + int mcatype; +}; + +struct mca_bank_entry { + int idx; + enum amdgpu_mca_error_type type; + enum amdgpu_mca_ip ip; + struct mca_bank_info info; + uint64_t regs[MCA_MAX_REGS_COUNT]; +}; + +struct mca_bank_node { + struct mca_bank_entry entry; + struct list_head node; +}; + +struct mca_bank_set { + int nr_entries; + struct list_head list; +}; + +struct amdgpu_mca_smu_funcs { + int max_ue_count; + int max_ce_count; + int (*mca_set_debug_mode)(struct amdgpu_device *adev, bool enable); + int (*mca_get_ras_mca_set)(struct amdgpu_device *adev, enum amdgpu_ras_block blk, enum amdgpu_mca_error_type type, + struct mca_bank_set *mca_set); + int (*mca_parse_mca_error_count)(struct amdgpu_device *adev, enum amdgpu_ras_block blk, enum amdgpu_mca_error_type type, + struct mca_bank_entry *entry, uint32_t *count); + int (*mca_get_valid_mca_count)(struct amdgpu_device *adev, enum amdgpu_mca_error_type type, + uint32_t *count); + int (*mca_get_mca_entry)(struct amdgpu_device *adev, enum amdgpu_mca_error_type type, + int idx, struct mca_bank_entry *entry); }; void amdgpu_mca_query_correctable_error_count(struct amdgpu_device *adev, @@ -53,4 +146,26 @@ void amdgpu_mca_query_ras_error_count(struct amdgpu_device *adev, int amdgpu_mca_mp0_ras_sw_init(struct amdgpu_device *adev); int amdgpu_mca_mp1_ras_sw_init(struct amdgpu_device *adev); int amdgpu_mca_mpio_ras_sw_init(struct amdgpu_device *adev); + +void amdgpu_mca_smu_init_funcs(struct amdgpu_device *adev, const struct amdgpu_mca_smu_funcs *mca_funcs); +int amdgpu_mca_smu_set_debug_mode(struct amdgpu_device *adev, bool enable); +int amdgpu_mca_smu_get_valid_mca_count(struct amdgpu_device *adev, enum amdgpu_mca_error_type type, uint32_t *count); +int amdgpu_mca_smu_get_mca_set_error_count(struct amdgpu_device *adev, enum amdgpu_ras_block blk, + enum amdgpu_mca_error_type type, uint32_t *total); +int amdgpu_mca_smu_get_error_count(struct amdgpu_device *adev, enum amdgpu_ras_block blk, + enum amdgpu_mca_error_type type, uint32_t *count); +int amdgpu_mca_smu_parse_mca_error_count(struct amdgpu_device *adev, enum amdgpu_ras_block blk, + enum amdgpu_mca_error_type type, struct mca_bank_entry *entry, uint32_t *count); +int amdgpu_mca_smu_get_mca_set(struct amdgpu_device *adev, enum amdgpu_ras_block blk, + enum amdgpu_mca_error_type type, struct mca_bank_set *mca_set); +int amdgpu_mca_smu_get_mca_entry(struct amdgpu_device *adev, enum amdgpu_mca_error_type type, + int idx, struct mca_bank_entry *entry); + +void amdgpu_mca_smu_debugfs_init(struct amdgpu_device *adev, struct dentry *root); + +void amdgpu_mca_bank_set_init(struct mca_bank_set *mca_set); +int amdgpu_mca_bank_set_add_entry(struct mca_bank_set *mca_set, struct mca_bank_entry *entry); +void amdgpu_mca_bank_set_release(struct mca_bank_set *mca_set); +int amdgpu_mca_smu_log_ras_error(struct amdgpu_device *adev, enum amdgpu_ras_block blk, enum amdgpu_mca_error_type type, struct ras_err_data *err_data); + #endif diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.c index f808841310fd..9ddbf1494326 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.c @@ -22,6 +22,7 @@ */ #include <linux/firmware.h> +#include <drm/drm_exec.h> #include "amdgpu_mes.h" #include "amdgpu.h" @@ -38,120 +39,70 @@ int amdgpu_mes_doorbell_process_slice(struct amdgpu_device *adev) PAGE_SIZE); } -int amdgpu_mes_alloc_process_doorbells(struct amdgpu_device *adev, - unsigned int *doorbell_index) -{ - int r = ida_simple_get(&adev->mes.doorbell_ida, 2, - adev->mes.max_doorbell_slices, - GFP_KERNEL); - if (r > 0) - *doorbell_index = r; - - return r; -} - -void amdgpu_mes_free_process_doorbells(struct amdgpu_device *adev, - unsigned int doorbell_index) -{ - if (doorbell_index) - ida_simple_remove(&adev->mes.doorbell_ida, doorbell_index); -} - -unsigned int amdgpu_mes_get_doorbell_dw_offset_in_bar( - struct amdgpu_device *adev, - uint32_t doorbell_index, - unsigned int doorbell_id) -{ - return ((doorbell_index * - amdgpu_mes_doorbell_process_slice(adev)) / sizeof(u32) + - doorbell_id * 2); -} - -static int amdgpu_mes_queue_doorbell_get(struct amdgpu_device *adev, +static int amdgpu_mes_kernel_doorbell_get(struct amdgpu_device *adev, struct amdgpu_mes_process *process, int ip_type, uint64_t *doorbell_index) { unsigned int offset, found; + struct amdgpu_mes *mes = &adev->mes; - if (ip_type == AMDGPU_RING_TYPE_SDMA) { + if (ip_type == AMDGPU_RING_TYPE_SDMA) offset = adev->doorbell_index.sdma_engine[0]; - found = find_next_zero_bit(process->doorbell_bitmap, - AMDGPU_MES_MAX_NUM_OF_QUEUES_PER_PROCESS, - offset); - } else { - found = find_first_zero_bit(process->doorbell_bitmap, - AMDGPU_MES_MAX_NUM_OF_QUEUES_PER_PROCESS); - } + else + offset = 0; - if (found >= AMDGPU_MES_MAX_NUM_OF_QUEUES_PER_PROCESS) { + found = find_next_zero_bit(mes->doorbell_bitmap, mes->num_mes_dbs, offset); + if (found >= mes->num_mes_dbs) { DRM_WARN("No doorbell available\n"); return -ENOSPC; } - set_bit(found, process->doorbell_bitmap); - - *doorbell_index = amdgpu_mes_get_doorbell_dw_offset_in_bar(adev, - process->doorbell_index, found); + set_bit(found, mes->doorbell_bitmap); + /* Get the absolute doorbell index on BAR */ + *doorbell_index = mes->db_start_dw_offset + found * 2; return 0; } -static void amdgpu_mes_queue_doorbell_free(struct amdgpu_device *adev, +static void amdgpu_mes_kernel_doorbell_free(struct amdgpu_device *adev, struct amdgpu_mes_process *process, uint32_t doorbell_index) { - unsigned int old, doorbell_id; + unsigned int old, rel_index; + struct amdgpu_mes *mes = &adev->mes; - doorbell_id = doorbell_index - - (process->doorbell_index * - amdgpu_mes_doorbell_process_slice(adev)) / sizeof(u32); - doorbell_id /= 2; - - old = test_and_clear_bit(doorbell_id, process->doorbell_bitmap); + /* Find the relative index of the doorbell in this object */ + rel_index = (doorbell_index - mes->db_start_dw_offset) / 2; + old = test_and_clear_bit(rel_index, mes->doorbell_bitmap); WARN_ON(!old); } static int amdgpu_mes_doorbell_init(struct amdgpu_device *adev) { - size_t doorbell_start_offset; - size_t doorbell_aperture_size; - size_t doorbell_process_limit; - size_t aggregated_doorbell_start; int i; + struct amdgpu_mes *mes = &adev->mes; - aggregated_doorbell_start = (adev->doorbell_index.max_assignment + 1) * sizeof(u32); - aggregated_doorbell_start = - roundup(aggregated_doorbell_start, PAGE_SIZE); - - doorbell_start_offset = aggregated_doorbell_start + PAGE_SIZE; - doorbell_start_offset = - roundup(doorbell_start_offset, - amdgpu_mes_doorbell_process_slice(adev)); - - doorbell_aperture_size = adev->doorbell.size; - doorbell_aperture_size = - rounddown(doorbell_aperture_size, - amdgpu_mes_doorbell_process_slice(adev)); - - if (doorbell_aperture_size > doorbell_start_offset) - doorbell_process_limit = - (doorbell_aperture_size - doorbell_start_offset) / - amdgpu_mes_doorbell_process_slice(adev); - else - return -ENOSPC; - - adev->mes.doorbell_id_offset = doorbell_start_offset / sizeof(u32); - adev->mes.max_doorbell_slices = doorbell_process_limit; + /* Bitmap for dynamic allocation of kernel doorbells */ + mes->doorbell_bitmap = bitmap_zalloc(PAGE_SIZE / sizeof(u32), GFP_KERNEL); + if (!mes->doorbell_bitmap) { + DRM_ERROR("Failed to allocate MES doorbell bitmap\n"); + return -ENOMEM; + } - /* allocate Qword range for aggregated doorbell */ - for (i = 0; i < AMDGPU_MES_PRIORITY_NUM_LEVELS; i++) - adev->mes.aggregated_doorbells[i] = - aggregated_doorbell_start / sizeof(u32) + i * 2; + mes->num_mes_dbs = PAGE_SIZE / AMDGPU_ONE_DOORBELL_SIZE; + for (i = 0; i < AMDGPU_MES_PRIORITY_NUM_LEVELS; i++) { + adev->mes.aggregated_doorbells[i] = mes->db_start_dw_offset + i * 2; + set_bit(i, mes->doorbell_bitmap); + } - DRM_INFO("max_doorbell_slices=%zu\n", doorbell_process_limit); return 0; } +static void amdgpu_mes_doorbell_free(struct amdgpu_device *adev) +{ + bitmap_free(adev->mes.doorbell_bitmap); +} + int amdgpu_mes_init(struct amdgpu_device *adev) { int i, r; @@ -181,7 +132,8 @@ int amdgpu_mes_init(struct amdgpu_device *adev) adev->mes.gfx_hqd_mask[i] = i ? 0 : 0xfffffffe; for (i = 0; i < AMDGPU_MES_MAX_SDMA_PIPES; i++) { - if (adev->ip_versions[SDMA0_HWIP][0] < IP_VERSION(6, 0, 0)) + if (amdgpu_ip_version(adev, SDMA0_HWIP, 0) < + IP_VERSION(6, 0, 0)) adev->mes.sdma_hqd_mask[i] = i ? 0 : 0x3fc; /* zero sdma_hqd_mask for non-existent engine */ else if (adev->sdma.num_instances == 1) @@ -250,6 +202,7 @@ void amdgpu_mes_fini(struct amdgpu_device *adev) amdgpu_device_wb_free(adev, adev->mes.sch_ctx_offs); amdgpu_device_wb_free(adev, adev->mes.query_status_fence_offs); amdgpu_device_wb_free(adev, adev->mes.read_val_offs); + amdgpu_mes_doorbell_free(adev); idr_destroy(&adev->mes.pasid_idr); idr_destroy(&adev->mes.gang_id_idr); @@ -278,15 +231,6 @@ int amdgpu_mes_create_process(struct amdgpu_device *adev, int pasid, return -ENOMEM; } - process->doorbell_bitmap = - kzalloc(DIV_ROUND_UP(AMDGPU_MES_MAX_NUM_OF_QUEUES_PER_PROCESS, - BITS_PER_BYTE), GFP_KERNEL); - if (!process->doorbell_bitmap) { - DRM_ERROR("failed to allocate doorbell bitmap\n"); - kfree(process); - return -ENOMEM; - } - /* allocate the process context bo and map it */ r = amdgpu_bo_create_kernel(adev, AMDGPU_MES_PROC_CTX_SIZE, PAGE_SIZE, AMDGPU_GEM_DOMAIN_GTT, @@ -313,15 +257,6 @@ int amdgpu_mes_create_process(struct amdgpu_device *adev, int pasid, goto clean_up_ctx; } - /* allocate the starting doorbell index of the process */ - r = amdgpu_mes_alloc_process_doorbells(adev, &process->doorbell_index); - if (r < 0) { - DRM_ERROR("failed to allocate doorbell for process\n"); - goto clean_up_pasid; - } - - DRM_DEBUG("process doorbell index = %d\n", process->doorbell_index); - INIT_LIST_HEAD(&process->gang_list); process->vm = vm; process->pasid = pasid; @@ -331,15 +266,12 @@ int amdgpu_mes_create_process(struct amdgpu_device *adev, int pasid, amdgpu_mes_unlock(&adev->mes); return 0; -clean_up_pasid: - idr_remove(&adev->mes.pasid_idr, pasid); - amdgpu_mes_unlock(&adev->mes); clean_up_ctx: + amdgpu_mes_unlock(&adev->mes); amdgpu_bo_free_kernel(&process->proc_ctx_bo, &process->proc_ctx_gpu_addr, &process->proc_ctx_cpu_ptr); clean_up_memory: - kfree(process->doorbell_bitmap); kfree(process); return r; } @@ -385,7 +317,6 @@ void amdgpu_mes_destroy_process(struct amdgpu_device *adev, int pasid) idr_remove(&adev->mes.gang_id_idr, gang->gang_id); } - amdgpu_mes_free_process_doorbells(adev, process->doorbell_index); idr_remove(&adev->mes.pasid_idr, pasid); amdgpu_mes_unlock(&adev->mes); @@ -407,7 +338,6 @@ void amdgpu_mes_destroy_process(struct amdgpu_device *adev, int pasid) amdgpu_bo_free_kernel(&process->proc_ctx_bo, &process->proc_ctx_gpu_addr, &process->proc_ctx_cpu_ptr); - kfree(process->doorbell_bitmap); kfree(process); } @@ -627,8 +557,20 @@ static void amdgpu_mes_queue_init_mqd(struct amdgpu_device *adev, mqd_prop.hqd_queue_priority = p->hqd_queue_priority; mqd_prop.hqd_active = false; + if (p->queue_type == AMDGPU_RING_TYPE_GFX || + p->queue_type == AMDGPU_RING_TYPE_COMPUTE) { + mutex_lock(&adev->srbm_mutex); + amdgpu_gfx_select_me_pipe_q(adev, p->ring->me, p->ring->pipe, 0, 0, 0); + } + mqd_mgr->init_mqd(adev, q->mqd_cpu_ptr, &mqd_prop); + if (p->queue_type == AMDGPU_RING_TYPE_GFX || + p->queue_type == AMDGPU_RING_TYPE_COMPUTE) { + amdgpu_gfx_select_me_pipe_q(adev, 0, 0, 0, 0, 0); + mutex_unlock(&adev->srbm_mutex); + } + amdgpu_bo_unreserve(q->mqd_obj); } @@ -642,6 +584,8 @@ int amdgpu_mes_add_hw_queue(struct amdgpu_device *adev, int gang_id, unsigned long flags; int r; + memset(&queue_input, 0, sizeof(struct mes_add_queue_input)); + /* allocate the mes queue buffer */ queue = kzalloc(sizeof(struct amdgpu_mes_queue), GFP_KERNEL); if (!queue) { @@ -679,7 +623,7 @@ int amdgpu_mes_add_hw_queue(struct amdgpu_device *adev, int gang_id, *queue_id = queue->queue_id = r; /* allocate a doorbell index for the queue */ - r = amdgpu_mes_queue_doorbell_get(adev, gang->process, + r = amdgpu_mes_kernel_doorbell_get(adev, gang->process, qprops->queue_type, &qprops->doorbell_off); if (r) @@ -737,7 +681,7 @@ int amdgpu_mes_add_hw_queue(struct amdgpu_device *adev, int gang_id, return 0; clean_up_doorbell: - amdgpu_mes_queue_doorbell_free(adev, gang->process, + amdgpu_mes_kernel_doorbell_free(adev, gang->process, qprops->doorbell_off); clean_up_queue_id: spin_lock_irqsave(&adev->mes.queue_id_lock, flags); @@ -792,7 +736,7 @@ int amdgpu_mes_remove_hw_queue(struct amdgpu_device *adev, int queue_id) queue_id); list_del(&queue->list); - amdgpu_mes_queue_doorbell_free(adev, gang->process, + amdgpu_mes_kernel_doorbell_free(adev, gang->process, queue->doorbell_off); amdgpu_mes_unlock(&adev->mes); @@ -1062,9 +1006,13 @@ int amdgpu_mes_add_ring(struct amdgpu_device *adev, int gang_id, switch (queue_type) { case AMDGPU_RING_TYPE_GFX: ring->funcs = adev->gfx.gfx_ring[0].funcs; + ring->me = adev->gfx.gfx_ring[0].me; + ring->pipe = adev->gfx.gfx_ring[0].pipe; break; case AMDGPU_RING_TYPE_COMPUTE: ring->funcs = adev->gfx.compute_ring[0].funcs; + ring->me = adev->gfx.compute_ring[0].me; + ring->pipe = adev->gfx.compute_ring[0].pipe; break; case AMDGPU_RING_TYPE_SDMA: ring->funcs = adev->sdma.instance[0].ring.funcs; @@ -1168,34 +1116,31 @@ int amdgpu_mes_ctx_map_meta_data(struct amdgpu_device *adev, struct amdgpu_mes_ctx_data *ctx_data) { struct amdgpu_bo_va *bo_va; - struct ww_acquire_ctx ticket; - struct list_head list; - struct amdgpu_bo_list_entry pd; - struct ttm_validate_buffer csa_tv; struct amdgpu_sync sync; + struct drm_exec exec; int r; amdgpu_sync_create(&sync); - INIT_LIST_HEAD(&list); - INIT_LIST_HEAD(&csa_tv.head); - - csa_tv.bo = &ctx_data->meta_data_obj->tbo; - csa_tv.num_shared = 1; - - list_add(&csa_tv.head, &list); - amdgpu_vm_get_pd_bo(vm, &list, &pd); - r = ttm_eu_reserve_buffers(&ticket, &list, true, NULL); - if (r) { - DRM_ERROR("failed to reserve meta data BO: err=%d\n", r); - return r; + drm_exec_init(&exec, 0); + drm_exec_until_all_locked(&exec) { + r = drm_exec_lock_obj(&exec, + &ctx_data->meta_data_obj->tbo.base); + drm_exec_retry_on_contention(&exec); + if (unlikely(r)) + goto error_fini_exec; + + r = amdgpu_vm_lock_pd(vm, &exec, 0); + drm_exec_retry_on_contention(&exec); + if (unlikely(r)) + goto error_fini_exec; } bo_va = amdgpu_vm_bo_add(adev, vm, ctx_data->meta_data_obj); if (!bo_va) { - ttm_eu_backoff_reservation(&ticket, &list); DRM_ERROR("failed to create bo_va for meta data BO\n"); - return -ENOMEM; + r = -ENOMEM; + goto error_fini_exec; } r = amdgpu_vm_bo_map(adev, bo_va, ctx_data->meta_data_gpu_addr, 0, @@ -1205,33 +1150,35 @@ int amdgpu_mes_ctx_map_meta_data(struct amdgpu_device *adev, if (r) { DRM_ERROR("failed to do bo_map on meta data, err=%d\n", r); - goto error; + goto error_del_bo_va; } r = amdgpu_vm_bo_update(adev, bo_va, false); if (r) { DRM_ERROR("failed to do vm_bo_update on meta data\n"); - goto error; + goto error_del_bo_va; } amdgpu_sync_fence(&sync, bo_va->last_pt_update); r = amdgpu_vm_update_pdes(adev, vm, false); if (r) { DRM_ERROR("failed to update pdes on meta data\n"); - goto error; + goto error_del_bo_va; } amdgpu_sync_fence(&sync, vm->last_update); amdgpu_sync_wait(&sync, false); - ttm_eu_backoff_reservation(&ticket, &list); + drm_exec_fini(&exec); amdgpu_sync_free(&sync); ctx_data->meta_data_va = bo_va; return 0; -error: +error_del_bo_va: amdgpu_vm_bo_del(adev, bo_va); - ttm_eu_backoff_reservation(&ticket, &list); + +error_fini_exec: + drm_exec_fini(&exec); amdgpu_sync_free(&sync); return r; } @@ -1242,34 +1189,30 @@ int amdgpu_mes_ctx_unmap_meta_data(struct amdgpu_device *adev, struct amdgpu_bo_va *bo_va = ctx_data->meta_data_va; struct amdgpu_bo *bo = ctx_data->meta_data_obj; struct amdgpu_vm *vm = bo_va->base.vm; - struct amdgpu_bo_list_entry vm_pd; - struct list_head list, duplicates; - struct dma_fence *fence = NULL; - struct ttm_validate_buffer tv; - struct ww_acquire_ctx ticket; - long r = 0; - - INIT_LIST_HEAD(&list); - INIT_LIST_HEAD(&duplicates); - - tv.bo = &bo->tbo; - tv.num_shared = 2; - list_add(&tv.head, &list); - - amdgpu_vm_get_pd_bo(vm, &list, &vm_pd); - - r = ttm_eu_reserve_buffers(&ticket, &list, false, &duplicates); - if (r) { - dev_err(adev->dev, "leaking bo va because " - "we fail to reserve bo (%ld)\n", r); - return r; + struct dma_fence *fence; + struct drm_exec exec; + long r; + + drm_exec_init(&exec, 0); + drm_exec_until_all_locked(&exec) { + r = drm_exec_lock_obj(&exec, + &ctx_data->meta_data_obj->tbo.base); + drm_exec_retry_on_contention(&exec); + if (unlikely(r)) + goto out_unlock; + + r = amdgpu_vm_lock_pd(vm, &exec, 0); + drm_exec_retry_on_contention(&exec); + if (unlikely(r)) + goto out_unlock; } amdgpu_vm_bo_del(adev, bo_va); if (!amdgpu_vm_ready(vm)) goto out_unlock; - r = dma_resv_get_singleton(bo->tbo.base.resv, DMA_RESV_USAGE_BOOKKEEP, &fence); + r = dma_resv_get_singleton(bo->tbo.base.resv, DMA_RESV_USAGE_BOOKKEEP, + &fence); if (r) goto out_unlock; if (fence) { @@ -1288,7 +1231,7 @@ int amdgpu_mes_ctx_unmap_meta_data(struct amdgpu_device *adev, out_unlock: if (unlikely(r < 0)) dev_err(adev->dev, "failed to clear page tables (%ld)\n", r); - ttm_eu_backoff_reservation(&ticket, &list); + drm_exec_fini(&exec); return r; } @@ -1409,8 +1352,10 @@ int amdgpu_mes_self_test(struct amdgpu_device *adev) for (i = 0; i < ARRAY_SIZE(queue_types); i++) { /* On GFX v10.3, fw hasn't supported to map sdma queue. */ - if (adev->ip_versions[GC_HWIP][0] >= IP_VERSION(10, 3, 0) && - adev->ip_versions[GC_HWIP][0] < IP_VERSION(11, 0, 0) && + if (amdgpu_ip_version(adev, GC_HWIP, 0) >= + IP_VERSION(10, 3, 0) && + amdgpu_ip_version(adev, GC_HWIP, 0) < + IP_VERSION(11, 0, 0) && queue_types[i][0] == AMDGPU_RING_TYPE_SDMA) continue; @@ -1471,7 +1416,7 @@ int amdgpu_mes_init_microcode(struct amdgpu_device *adev, int pipe) amdgpu_ucode_ip_version_decode(adev, GC_HWIP, ucode_prefix, sizeof(ucode_prefix)); - if (adev->ip_versions[GC_HWIP][0] >= IP_VERSION(11, 0, 0)) { + if (amdgpu_ip_version(adev, GC_HWIP, 0) >= IP_VERSION(11, 0, 0)) { snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_mes%s.bin", ucode_prefix, pipe == AMDGPU_MES_SCHED_PIPE ? "_2" : "1"); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.h index 2d6ac30b7135..a27b424ffe00 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.h @@ -27,6 +27,7 @@ #include "amdgpu_irq.h" #include "kgd_kfd_interface.h" #include "amdgpu_gfx.h" +#include "amdgpu_doorbell.h" #include <linux/sched/mm.h> #define AMDGPU_MES_MAX_COMPUTE_PIPES 8 @@ -76,7 +77,6 @@ struct amdgpu_mes { uint32_t kiq_version; uint32_t total_max_queue; - uint32_t doorbell_id_offset; uint32_t max_doorbell_slices; uint64_t default_process_quantum; @@ -128,6 +128,11 @@ struct amdgpu_mes { int (*kiq_hw_init)(struct amdgpu_device *adev); int (*kiq_hw_fini)(struct amdgpu_device *adev); + /* MES doorbells */ + uint32_t db_start_dw_offset; + uint32_t num_mes_dbs; + unsigned long *doorbell_bitmap; + /* ip specific functions */ const struct amdgpu_mes_funcs *funcs; }; @@ -142,7 +147,6 @@ struct amdgpu_mes_process { uint64_t process_quantum; struct list_head gang_list; uint32_t doorbell_index; - unsigned long *doorbell_bitmap; struct mutex doorbell_lock; }; @@ -224,6 +228,7 @@ struct mes_add_queue_input { uint32_t is_kfd_process; uint32_t is_aql_queue; uint32_t queue_size; + uint32_t exclusively_scheduled; }; struct mes_remove_queue_input { @@ -386,14 +391,6 @@ int amdgpu_mes_ctx_unmap_meta_data(struct amdgpu_device *adev, int amdgpu_mes_self_test(struct amdgpu_device *adev); -int amdgpu_mes_alloc_process_doorbells(struct amdgpu_device *adev, - unsigned int *doorbell_index); -void amdgpu_mes_free_process_doorbells(struct amdgpu_device *adev, - unsigned int doorbell_index); -unsigned int amdgpu_mes_get_doorbell_dw_offset_in_bar( - struct amdgpu_device *adev, - uint32_t doorbell_index, - unsigned int doorbell_id); int amdgpu_mes_doorbell_process_slice(struct amdgpu_device *adev); /* diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_nbio.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_nbio.c index a3bc00577a7c..51ca544a7094 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_nbio.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_nbio.c @@ -45,6 +45,22 @@ int amdgpu_nbio_ras_sw_init(struct amdgpu_device *adev) return 0; } +u64 amdgpu_nbio_get_pcie_replay_count(struct amdgpu_device *adev) +{ + if (adev->nbio.funcs && adev->nbio.funcs->get_pcie_replay_count) + return adev->nbio.funcs->get_pcie_replay_count(adev); + + return 0; +} + +void amdgpu_nbio_get_pcie_usage(struct amdgpu_device *adev, uint64_t *count0, + uint64_t *count1) +{ + if (adev->nbio.funcs->get_pcie_usage) + adev->nbio.funcs->get_pcie_usage(adev, count0, count1); + +} + int amdgpu_nbio_ras_late_init(struct amdgpu_device *adev, struct ras_common_if *ras_block) { int r; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_nbio.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_nbio.h index 8ab8ae01f87c..65e35059de40 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_nbio.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_nbio.h @@ -69,6 +69,8 @@ struct amdgpu_nbio_funcs { u32 (*get_memsize)(struct amdgpu_device *adev); void (*sdma_doorbell_range)(struct amdgpu_device *adev, int instance, bool use_doorbell, int doorbell_index, int doorbell_size); + void (*vpe_doorbell_range)(struct amdgpu_device *adev, int instance, + bool use_doorbell, int doorbell_index, int doorbell_size); void (*vcn_doorbell_range)(struct amdgpu_device *adev, bool use_doorbell, int doorbell_index, int instance); void (*gc_doorbell_init)(struct amdgpu_device *adev); @@ -99,6 +101,9 @@ struct amdgpu_nbio_funcs { int (*get_compute_partition_mode)(struct amdgpu_device *adev); u32 (*get_memory_partition_mode)(struct amdgpu_device *adev, u32 *supp_modes); + u64 (*get_pcie_replay_count)(struct amdgpu_device *adev); + void (*get_pcie_usage)(struct amdgpu_device *adev, uint64_t *count0, + uint64_t *count1); }; struct amdgpu_nbio { @@ -111,5 +116,8 @@ struct amdgpu_nbio { }; int amdgpu_nbio_ras_sw_init(struct amdgpu_device *adev); +void amdgpu_nbio_get_pcie_usage(struct amdgpu_device *adev, uint64_t *count0, uint64_t *count1); int amdgpu_nbio_ras_late_init(struct amdgpu_device *adev, struct ras_common_if *ras_block); +u64 amdgpu_nbio_get_pcie_replay_count(struct amdgpu_device *adev); + #endif diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c index f7905bce0de1..5ad03f2afdb4 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c @@ -158,6 +158,14 @@ void amdgpu_bo_placement_from_domain(struct amdgpu_bo *abo, u32 domain) c++; } + if (domain & AMDGPU_GEM_DOMAIN_DOORBELL) { + places[c].fpfn = 0; + places[c].lpfn = 0; + places[c].mem_type = AMDGPU_PL_DOORBELL; + places[c].flags = 0; + c++; + } + if (domain & AMDGPU_GEM_DOMAIN_GTT) { places[c].fpfn = 0; places[c].lpfn = 0; @@ -451,7 +459,7 @@ void amdgpu_bo_free_kernel(struct amdgpu_bo **bo, u64 *gpu_addr, *cpu_addr = NULL; } -/* Validate bo size is bit bigger then the request domain */ +/* Validate bo size is bit bigger than the request domain */ static bool amdgpu_bo_validate_size(struct amdgpu_device *adev, unsigned long size, u32 domain) { @@ -461,29 +469,24 @@ static bool amdgpu_bo_validate_size(struct amdgpu_device *adev, * If GTT is part of requested domains the check must succeed to * allow fall back to GTT. */ - if (domain & AMDGPU_GEM_DOMAIN_GTT) { + if (domain & AMDGPU_GEM_DOMAIN_GTT) man = ttm_manager_type(&adev->mman.bdev, TTM_PL_TT); - - if (man && size < man->size) - return true; - else if (!man) - WARN_ON_ONCE("GTT domain requested but GTT mem manager uninitialized"); - goto fail; - } else if (domain & AMDGPU_GEM_DOMAIN_VRAM) { + else if (domain & AMDGPU_GEM_DOMAIN_VRAM) man = ttm_manager_type(&adev->mman.bdev, TTM_PL_VRAM); + else + return true; - if (man && size < man->size) - return true; - goto fail; + if (!man) { + if (domain & AMDGPU_GEM_DOMAIN_GTT) + WARN_ON_ONCE("GTT domain requested but GTT mem manager uninitialized"); + return false; } - /* TODO add more domains checks, such as AMDGPU_GEM_DOMAIN_CPU */ - return true; + /* TODO add more domains checks, such as AMDGPU_GEM_DOMAIN_CPU, _DOMAIN_DOORBELL */ + if (size < man->size) + return true; -fail: - if (man) - DRM_DEBUG("BO size %lu > total memory in domain: %llu\n", size, - man->size); + DRM_DEBUG("BO size %lu > total memory in domain: %llu\n", size, man->size); return false; } @@ -1029,6 +1032,7 @@ void amdgpu_bo_unpin(struct amdgpu_bo *bo) } else if (bo->tbo.resource->mem_type == TTM_PL_TT) { atomic64_sub(amdgpu_bo_size(bo), &adev->gart_pin_size); } + } static const char * const amdgpu_vram_names[] = { @@ -1339,6 +1343,8 @@ void amdgpu_bo_release_notify(struct ttm_buffer_object *bo) abo = ttm_to_amdgpu_bo(bo); + WARN_ON(abo->vm_bo); + if (abo->kfd_bo) amdgpu_amdkfd_release_notify(abo); @@ -1523,10 +1529,14 @@ u64 amdgpu_bo_gpu_offset(struct amdgpu_bo *bo) u64 amdgpu_bo_gpu_offset_no_check(struct amdgpu_bo *bo) { struct amdgpu_device *adev = amdgpu_ttm_adev(bo->tbo.bdev); - uint64_t offset; + uint64_t offset = AMDGPU_BO_INVALID_OFFSET; - offset = (bo->tbo.resource->start << PAGE_SHIFT) + - amdgpu_ttm_domain_start(adev, bo->tbo.resource->mem_type); + if (bo->tbo.resource->mem_type == TTM_PL_TT) + offset = amdgpu_gmc_agp_addr(&bo->tbo); + + if (offset == AMDGPU_BO_INVALID_OFFSET) + offset = (bo->tbo.resource->start << PAGE_SHIFT) + + amdgpu_ttm_domain_start(adev, bo->tbo.resource->mem_type); return amdgpu_gmc_sign_extend(offset); } @@ -1575,23 +1585,31 @@ u64 amdgpu_bo_print_info(int id, struct amdgpu_bo *bo, struct seq_file *m) { struct dma_buf_attachment *attachment; struct dma_buf *dma_buf; - unsigned int domain; const char *placement; unsigned int pin_count; u64 size; - domain = amdgpu_mem_type_to_domain(bo->tbo.resource->mem_type); - switch (domain) { - case AMDGPU_GEM_DOMAIN_VRAM: - placement = "VRAM"; - break; - case AMDGPU_GEM_DOMAIN_GTT: - placement = " GTT"; - break; - case AMDGPU_GEM_DOMAIN_CPU: - default: - placement = " CPU"; - break; + if (dma_resv_trylock(bo->tbo.base.resv)) { + unsigned int domain; + domain = amdgpu_mem_type_to_domain(bo->tbo.resource->mem_type); + switch (domain) { + case AMDGPU_GEM_DOMAIN_VRAM: + if (amdgpu_bo_in_cpu_visible_vram(bo)) + placement = "VRAM VISIBLE"; + else + placement = "VRAM"; + break; + case AMDGPU_GEM_DOMAIN_GTT: + placement = "GTT"; + break; + case AMDGPU_GEM_DOMAIN_CPU: + default: + placement = "CPU"; + break; + } + dma_resv_unlock(bo->tbo.base.resv); + } else { + placement = "UNKNOWN"; } size = amdgpu_bo_size(bo); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h index 05496b97ef93..d28e21baef16 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h @@ -182,6 +182,8 @@ static inline unsigned amdgpu_mem_type_to_domain(u32 mem_type) return AMDGPU_GEM_DOMAIN_GWS; case AMDGPU_PL_OA: return AMDGPU_GEM_DOMAIN_OA; + case AMDGPU_PL_DOORBELL: + return AMDGPU_GEM_DOMAIN_DOORBELL; default: break; } @@ -250,7 +252,7 @@ static inline bool amdgpu_bo_in_cpu_visible_vram(struct amdgpu_bo *bo) struct amdgpu_device *adev = amdgpu_ttm_adev(bo->tbo.bdev); struct amdgpu_res_cursor cursor; - if (bo->tbo.resource->mem_type != TTM_PL_VRAM) + if (!bo->tbo.resource || bo->tbo.resource->mem_type != TTM_PL_VRAM) return false; amdgpu_res_first(bo->tbo.resource, 0, amdgpu_bo_size(bo), &cursor); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_pmu.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_pmu.c index 71ee361d0972..6e91ea1de5aa 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_pmu.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_pmu.c @@ -276,9 +276,8 @@ static void amdgpu_perf_read(struct perf_event *event) (!pe->adev->df.funcs->pmc_get_count)) return; + prev = local64_read(&hwc->prev_count); do { - prev = local64_read(&hwc->prev_count); - switch (hwc->config_base) { case AMDGPU_PMU_EVENT_CONFIG_TYPE_DF: case AMDGPU_PMU_EVENT_CONFIG_TYPE_XGMI: @@ -289,7 +288,7 @@ static void amdgpu_perf_read(struct perf_event *event) count = 0; break; } - } while (local64_cmpxchg(&hwc->prev_count, prev, count) != prev); + } while (!local64_try_cmpxchg(&hwc->prev_count, &prev, count)); local64_add(count - prev, &event->count); } diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c index 78d1ee71f3f4..a21045d018f2 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c @@ -45,9 +45,6 @@ #define AMD_VBIOS_FILE_MAX_SIZE_B (1024*1024*3) -static int psp_sysfs_init(struct amdgpu_device *adev); -static void psp_sysfs_fini(struct amdgpu_device *adev); - static int psp_load_smu_fw(struct psp_context *psp); static int psp_rap_terminate(struct psp_context *psp); static int psp_securedisplay_terminate(struct psp_context *psp); @@ -103,7 +100,7 @@ static void psp_check_pmfw_centralized_cstate_management(struct psp_context *psp return; } - switch (adev->ip_versions[MP0_HWIP][0]) { + switch (amdgpu_ip_version(adev, MP0_HWIP, 0)) { case IP_VERSION(11, 0, 0): case IP_VERSION(11, 0, 4): case IP_VERSION(11, 0, 5): @@ -131,7 +128,7 @@ static int psp_init_sriov_microcode(struct psp_context *psp) amdgpu_ucode_ip_version_decode(adev, MP0_HWIP, ucode_prefix, sizeof(ucode_prefix)); - switch (adev->ip_versions[MP0_HWIP][0]) { + switch (amdgpu_ip_version(adev, MP0_HWIP, 0)) { case IP_VERSION(9, 0, 0): case IP_VERSION(11, 0, 7): case IP_VERSION(11, 0, 9): @@ -148,6 +145,7 @@ static int psp_init_sriov_microcode(struct psp_context *psp) break; case IP_VERSION(13, 0, 6): ret = psp_init_cap_microcode(psp, ucode_prefix); + ret &= psp_init_ta_microcode(psp, ucode_prefix); break; case IP_VERSION(13, 0, 10): adev->virt.autoload_ucode_id = AMDGPU_UCODE_ID_CP_MES1_DATA; @@ -164,7 +162,7 @@ static int psp_early_init(void *handle) struct amdgpu_device *adev = (struct amdgpu_device *)handle; struct psp_context *psp = &adev->psp; - switch (adev->ip_versions[MP0_HWIP][0]) { + switch (amdgpu_ip_version(adev, MP0_HWIP, 0)) { case IP_VERSION(9, 0, 0): psp_v3_1_set_psp_funcs(psp); psp->autoload_supported = false; @@ -180,9 +178,11 @@ static int psp_early_init(void *handle) psp->autoload_supported = false; break; case IP_VERSION(11, 0, 0): + case IP_VERSION(11, 0, 7): + adev->psp.sup_pd_fw_up = !amdgpu_sriov_vf(adev); + fallthrough; case IP_VERSION(11, 0, 5): case IP_VERSION(11, 0, 9): - case IP_VERSION(11, 0, 7): case IP_VERSION(11, 0, 11): case IP_VERSION(11, 5, 0): case IP_VERSION(11, 0, 12): @@ -202,8 +202,8 @@ static int psp_early_init(void *handle) case IP_VERSION(13, 0, 3): case IP_VERSION(13, 0, 5): case IP_VERSION(13, 0, 8): - case IP_VERSION(13, 0, 10): case IP_VERSION(13, 0, 11): + case IP_VERSION(14, 0, 0): psp_v13_0_set_psp_funcs(psp); psp->autoload_supported = true; break; @@ -215,8 +215,10 @@ static int psp_early_init(void *handle) break; case IP_VERSION(13, 0, 0): case IP_VERSION(13, 0, 7): + case IP_VERSION(13, 0, 10): psp_v13_0_set_psp_funcs(psp); psp->autoload_supported = true; + adev->psp.sup_ifwi_up = !amdgpu_sriov_vf(adev); break; case IP_VERSION(13, 0, 4): psp_v13_0_4_set_psp_funcs(psp); @@ -332,7 +334,7 @@ static bool psp_get_runtime_db_entry(struct amdgpu_device *adev, bool ret = false; int i; - if (adev->ip_versions[MP0_HWIP][0] == IP_VERSION(13, 0, 6)) + if (amdgpu_ip_version(adev, MP0_HWIP, 0) == IP_VERSION(13, 0, 6)) return false; db_header_pos = adev->gmc.mc_vram_size - PSP_RUNTIME_DB_OFFSET; @@ -411,7 +413,7 @@ static int psp_sw_init(void *handle) adev->psp.xgmi_context.supports_extended_data = !adev->gmc.xgmi.connected_to_cpu && - adev->ip_versions[MP0_HWIP][0] == IP_VERSION(13, 0, 2); + amdgpu_ip_version(adev, MP0_HWIP, 0) == IP_VERSION(13, 0, 2); memset(&scpm_entry, 0, sizeof(scpm_entry)); if ((psp_get_runtime_db_entry(adev, @@ -437,14 +439,15 @@ static int psp_sw_init(void *handle) /* If psp runtime database exists, then * only enable two stage memory training * when TWO_STAGE_DRAM_TRAINING bit is set - * in runtime database */ + * in runtime database + */ mem_training_ctx->enable_mem_training = true; } } else { - /* If psp runtime database doesn't exist or - * is invalid, force enable two stage memory - * training */ + /* If psp runtime database doesn't exist or is + * invalid, force enable two stage memory training + */ mem_training_ctx->enable_mem_training = true; } @@ -462,13 +465,6 @@ static int psp_sw_init(void *handle) } } - if (adev->ip_versions[MP0_HWIP][0] == IP_VERSION(11, 0, 0) || - adev->ip_versions[MP0_HWIP][0] == IP_VERSION(11, 0, 7)) { - ret = psp_sysfs_init(adev); - if (ret) - return ret; - } - ret = amdgpu_bo_create_kernel(adev, PSP_1_MEG, PSP_1_MEG, amdgpu_sriov_vf(adev) ? AMDGPU_GEM_DOMAIN_VRAM : AMDGPU_GEM_DOMAIN_GTT, @@ -520,10 +516,6 @@ static int psp_sw_fini(void *handle) amdgpu_ucode_release(&psp->cap_fw); amdgpu_ucode_release(&psp->toc_fw); - if (adev->ip_versions[MP0_HWIP][0] == IP_VERSION(11, 0, 0) || - adev->ip_versions[MP0_HWIP][0] == IP_VERSION(11, 0, 7)) - psp_sysfs_fini(adev); - kfree(cmd); cmd = NULL; @@ -781,7 +773,7 @@ static int psp_load_toc(struct psp_context *psp, static bool psp_boottime_tmr(struct psp_context *psp) { - switch (psp->adev->ip_versions[MP0_HWIP][0]) { + switch (amdgpu_ip_version(psp->adev, MP0_HWIP, 0)) { case IP_VERSION(13, 0, 6): return true; default: @@ -807,7 +799,8 @@ static int psp_tmr_init(struct psp_context *psp) tmr_size = PSP_TMR_SIZE(psp->adev); /* For ASICs support RLC autoload, psp will parse the toc - * and calculate the total size of TMR needed */ + * and calculate the total size of TMR needed + */ if (!amdgpu_sriov_vf(psp->adev) && psp->toc.start_addr && psp->toc.size_bytes && @@ -835,7 +828,7 @@ static int psp_tmr_init(struct psp_context *psp) static bool psp_skip_tmr(struct psp_context *psp) { - switch (psp->adev->ip_versions[MP0_HWIP][0]) { + switch (amdgpu_ip_version(psp->adev, MP0_HWIP, 0)) { case IP_VERSION(11, 0, 9): case IP_VERSION(11, 0, 7): case IP_VERSION(13, 0, 2): @@ -1147,9 +1140,9 @@ int psp_ta_init_shared_buf(struct psp_context *psp, struct ta_mem_context *mem_ctx) { /* - * Allocate 16k memory aligned to 4k from Frame Buffer (local - * physical) for ta to host memory - */ + * Allocate 16k memory aligned to 4k from Frame Buffer (local + * physical) for ta to host memory + */ return amdgpu_bo_create_kernel(psp->adev, mem_ctx->shared_mem_size, PAGE_SIZE, AMDGPU_GEM_DOMAIN_VRAM | AMDGPU_GEM_DOMAIN_GTT, @@ -1222,8 +1215,8 @@ int psp_xgmi_terminate(struct psp_context *psp) struct amdgpu_device *adev = psp->adev; /* XGMI TA unload currently is not supported on Arcturus/Aldebaran A+A */ - if (adev->ip_versions[MP0_HWIP][0] == IP_VERSION(11, 0, 4) || - (adev->ip_versions[MP0_HWIP][0] == IP_VERSION(13, 0, 2) && + if (amdgpu_ip_version(adev, MP0_HWIP, 0) == IP_VERSION(11, 0, 4) || + (amdgpu_ip_version(adev, MP0_HWIP, 0) == IP_VERSION(13, 0, 2) && adev->gmc.xgmi.connected_to_cpu)) return 0; @@ -1274,6 +1267,8 @@ invoke: xgmi_cmd->cmd_id = TA_COMMAND_XGMI__INITIALIZE; ret = psp_xgmi_invoke(psp, xgmi_cmd->cmd_id); + /* note down the capbility flag for XGMI TA */ + psp->xgmi_context.xgmi_ta_caps = xgmi_cmd->caps_flag; return ret; } @@ -1320,9 +1315,11 @@ int psp_xgmi_get_node_id(struct psp_context *psp, uint64_t *node_id) static bool psp_xgmi_peer_link_info_supported(struct psp_context *psp) { - return (psp->adev->ip_versions[MP0_HWIP][0] == IP_VERSION(13, 0, 2) && + return (amdgpu_ip_version(psp->adev, MP0_HWIP, 0) == + IP_VERSION(13, 0, 2) && psp->xgmi_context.context.bin_desc.fw_version >= 0x2000000b) || - psp->adev->ip_versions[MP0_HWIP][0] >= IP_VERSION(13, 0, 6); + amdgpu_ip_version(psp->adev, MP0_HWIP, 0) >= + IP_VERSION(13, 0, 6); } /* @@ -1393,7 +1390,7 @@ int psp_xgmi_get_topology_info(struct psp_context *psp, /* Fill in the shared memory with topology information as input */ topology_info_input = &xgmi_cmd->xgmi_in_message.get_topology_info; - xgmi_cmd->cmd_id = TA_COMMAND_XGMI__GET_GET_TOPOLOGY_INFO; + xgmi_cmd->cmd_id = TA_COMMAND_XGMI__GET_TOPOLOGY_INFO; topology_info_input->num_nodes = number_devices; for (i = 0; i < topology_info_input->num_nodes; i++) { @@ -1404,7 +1401,7 @@ int psp_xgmi_get_topology_info(struct psp_context *psp, } /* Invoke xgmi ta to get the topology information */ - ret = psp_xgmi_invoke(psp, TA_COMMAND_XGMI__GET_GET_TOPOLOGY_INFO); + ret = psp_xgmi_invoke(psp, TA_COMMAND_XGMI__GET_TOPOLOGY_INFO); if (ret) return ret; @@ -1429,26 +1426,58 @@ int psp_xgmi_get_topology_info(struct psp_context *psp, /* Invoke xgmi ta again to get the link information */ if (psp_xgmi_peer_link_info_supported(psp)) { - struct ta_xgmi_cmd_get_peer_link_info_output *link_info_output; + struct ta_xgmi_cmd_get_peer_link_info *link_info_output; + struct ta_xgmi_cmd_get_extend_peer_link_info *link_extend_info_output; bool requires_reflection = - (psp->xgmi_context.supports_extended_data && get_extended_data) || - psp->adev->ip_versions[MP0_HWIP][0] == IP_VERSION(13, 0, 6); + (psp->xgmi_context.supports_extended_data && + get_extended_data) || + amdgpu_ip_version(psp->adev, MP0_HWIP, 0) == + IP_VERSION(13, 0, 6); + bool ta_port_num_support = psp->xgmi_context.xgmi_ta_caps & + EXTEND_PEER_LINK_INFO_CMD_FLAG; + + /* popluate the shared output buffer rather than the cmd input buffer + * with node_ids as the input for GET_PEER_LINKS command execution. + * This is required for GET_PEER_LINKS per xgmi ta implementation. + * The same requirement for GET_EXTEND_PEER_LINKS command. + */ + if (ta_port_num_support) { + link_extend_info_output = &xgmi_cmd->xgmi_out_message.get_extend_link_info; - xgmi_cmd->cmd_id = TA_COMMAND_XGMI__GET_PEER_LINKS; + for (i = 0; i < topology->num_nodes; i++) + link_extend_info_output->nodes[i].node_id = topology->nodes[i].node_id; - ret = psp_xgmi_invoke(psp, TA_COMMAND_XGMI__GET_PEER_LINKS); + link_extend_info_output->num_nodes = topology->num_nodes; + xgmi_cmd->cmd_id = TA_COMMAND_XGMI__GET_EXTEND_PEER_LINKS; + } else { + link_info_output = &xgmi_cmd->xgmi_out_message.get_link_info; + for (i = 0; i < topology->num_nodes; i++) + link_info_output->nodes[i].node_id = topology->nodes[i].node_id; + + link_info_output->num_nodes = topology->num_nodes; + xgmi_cmd->cmd_id = TA_COMMAND_XGMI__GET_PEER_LINKS; + } + + ret = psp_xgmi_invoke(psp, xgmi_cmd->cmd_id); if (ret) return ret; - link_info_output = &xgmi_cmd->xgmi_out_message.get_link_info; for (i = 0; i < topology->num_nodes; i++) { + uint8_t node_num_links = ta_port_num_support ? + link_extend_info_output->nodes[i].num_links : link_info_output->nodes[i].num_links; /* accumulate num_links on extended data */ - topology->nodes[i].num_links = get_extended_data ? - topology->nodes[i].num_links + - link_info_output->nodes[i].num_links : - ((requires_reflection && topology->nodes[i].num_links) ? topology->nodes[i].num_links : - link_info_output->nodes[i].num_links); + if (get_extended_data) { + topology->nodes[i].num_links = topology->nodes[i].num_links + node_num_links; + } else { + topology->nodes[i].num_links = (requires_reflection && topology->nodes[i].num_links) ? + topology->nodes[i].num_links : node_num_links; + } + /* popluate the connected port num info if supported and available */ + if (ta_port_num_support && topology->nodes[i].num_links) { + memcpy(topology->nodes[i].port_num, link_extend_info_output->nodes[i].port_num, + sizeof(struct xgmi_connected_port_num) * TA_XGMI__MAX_PORT_NUM); + } /* reflect the topology information for bi-directionality */ if (requires_reflection && topology->nodes[i].num_hops) @@ -1738,7 +1767,8 @@ int psp_ras_trigger_error(struct psp_context *psp, return -EINVAL; /* If err_event_athub occurs error inject was successful, however - return status from TA is no long reliable */ + * return status from TA is no long reliable + */ if (amdgpu_ras_intr_triggered()) return 0; @@ -2084,6 +2114,32 @@ int psp_securedisplay_invoke(struct psp_context *psp, uint32_t ta_cmd_id) } /* SECUREDISPLAY end */ +int amdgpu_psp_wait_for_bootloader(struct amdgpu_device *adev) +{ + struct psp_context *psp = &adev->psp; + int ret = 0; + + if (!amdgpu_sriov_vf(adev) && psp->funcs && psp->funcs->wait_for_bootloader != NULL) + ret = psp->funcs->wait_for_bootloader(psp); + + return ret; +} + +int amdgpu_psp_query_boot_status(struct amdgpu_device *adev) +{ + struct psp_context *psp = &adev->psp; + int ret = 0; + + if (amdgpu_sriov_vf(adev) || (adev->flags & AMD_IS_APU)) + return 0; + + if (psp->funcs && + psp->funcs->query_boot_status) + ret = psp->funcs->query_boot_status(psp); + + return ret; +} + static int psp_hw_start(struct psp_context *psp) { struct amdgpu_device *adev = psp->adev; @@ -2385,6 +2441,27 @@ static int psp_get_fw_type(struct amdgpu_firmware_info *ucode, case AMDGPU_UCODE_ID_CP_RS64_MEC_P3_STACK: *type = GFX_FW_TYPE_RS64_MEC_P3_STACK; break; + case AMDGPU_UCODE_ID_VPE_CTX: + *type = GFX_FW_TYPE_VPEC_FW1; + break; + case AMDGPU_UCODE_ID_VPE_CTL: + *type = GFX_FW_TYPE_VPEC_FW2; + break; + case AMDGPU_UCODE_ID_VPE: + *type = GFX_FW_TYPE_VPE; + break; + case AMDGPU_UCODE_ID_UMSCH_MM_UCODE: + *type = GFX_FW_TYPE_UMSCH_UCODE; + break; + case AMDGPU_UCODE_ID_UMSCH_MM_DATA: + *type = GFX_FW_TYPE_UMSCH_DATA; + break; + case AMDGPU_UCODE_ID_UMSCH_MM_CMD_BUFFER: + *type = GFX_FW_TYPE_UMSCH_CMD_BUFFER; + break; + case AMDGPU_UCODE_ID_P2S_TABLE: + *type = GFX_FW_TYPE_P2S_TABLE; + break; case AMDGPU_UCODE_ID_MAXIMUM: default: return -EINVAL; @@ -2459,8 +2536,8 @@ static int psp_prep_load_ip_fw_cmd_buf(struct amdgpu_firmware_info *ucode, return ret; } -static int psp_execute_non_psp_fw_load(struct psp_context *psp, - struct amdgpu_firmware_info *ucode) +int psp_execute_ip_fw_load(struct psp_context *psp, + struct amdgpu_firmware_info *ucode) { int ret = 0; struct psp_gfx_cmd_resp *cmd = acquire_psp_cmd_buf(psp); @@ -2476,6 +2553,31 @@ static int psp_execute_non_psp_fw_load(struct psp_context *psp, return ret; } +static int psp_load_p2s_table(struct psp_context *psp) +{ + int ret; + struct amdgpu_device *adev = psp->adev; + struct amdgpu_firmware_info *ucode = + &adev->firmware.ucode[AMDGPU_UCODE_ID_P2S_TABLE]; + + if (adev->in_runpm && (adev->pm.rpm_mode == AMDGPU_RUNPM_BACO)) + return 0; + + if (amdgpu_ip_version(adev, MP0_HWIP, 0) == IP_VERSION(13, 0, 6)) { + uint32_t supp_vers = adev->flags & AMD_IS_APU ? 0x0036013D : + 0x0036003C; + if (psp->sos.fw_version < supp_vers) + return 0; + } + + if (!ucode->fw || amdgpu_sriov_vf(psp->adev)) + return 0; + + ret = psp_execute_ip_fw_load(psp, ucode); + + return ret; +} + static int psp_load_smu_fw(struct psp_context *psp) { int ret; @@ -2494,16 +2596,15 @@ static int psp_load_smu_fw(struct psp_context *psp) if (!ucode->fw || amdgpu_sriov_vf(psp->adev)) return 0; - if ((amdgpu_in_reset(adev) && - ras && adev->ras_enabled && - (adev->ip_versions[MP0_HWIP][0] == IP_VERSION(11, 0, 4) || - adev->ip_versions[MP0_HWIP][0] == IP_VERSION(11, 0, 2)))) { + if ((amdgpu_in_reset(adev) && ras && adev->ras_enabled && + (amdgpu_ip_version(adev, MP0_HWIP, 0) == IP_VERSION(11, 0, 4) || + amdgpu_ip_version(adev, MP0_HWIP, 0) == IP_VERSION(11, 0, 2)))) { ret = amdgpu_dpm_set_mp1_state(adev, PP_MP1_STATE_UNLOAD); if (ret) DRM_WARN("Failed to set MP1 state prepare for reload\n"); } - ret = psp_execute_non_psp_fw_load(psp, ucode); + ret = psp_execute_ip_fw_load(psp, ucode); if (ret) DRM_ERROR("PSP load smu failed!\n"); @@ -2517,6 +2618,9 @@ static bool fw_load_skip_check(struct psp_context *psp, if (!ucode->fw || !ucode->ucode_size) return true; + if (ucode->ucode_id == AMDGPU_UCODE_ID_P2S_TABLE) + return true; + if (ucode->ucode_id == AMDGPU_UCODE_ID_SMC && (psp_smu_reload_quirk(psp) || psp->autoload_supported || @@ -2545,7 +2649,7 @@ int psp_load_fw_list(struct psp_context *psp, for (i = 0; i < ucode_count; ++i) { ucode = ucode_list[i]; psp_print_fw_hdr(psp, ucode); - ret = psp_execute_non_psp_fw_load(psp, ucode); + ret = psp_execute_ip_fw_load(psp, ucode); if (ret) return ret; } @@ -2565,6 +2669,9 @@ static int psp_load_non_psp_fw(struct psp_context *psp) return ret; } + /* Load P2S table first if it's available */ + psp_load_p2s_table(psp); + for (i = 0; i < adev->firmware.max_ucodes; i++) { ucode = &adev->firmware.ucode[i]; @@ -2580,19 +2687,23 @@ static int psp_load_non_psp_fw(struct psp_context *psp) continue; if (psp->autoload_supported && - (adev->ip_versions[MP0_HWIP][0] == IP_VERSION(11, 0, 7) || - adev->ip_versions[MP0_HWIP][0] == IP_VERSION(11, 0, 11) || - adev->ip_versions[MP0_HWIP][0] == IP_VERSION(11, 0, 12)) && + (amdgpu_ip_version(adev, MP0_HWIP, 0) == + IP_VERSION(11, 0, 7) || + amdgpu_ip_version(adev, MP0_HWIP, 0) == + IP_VERSION(11, 0, 11) || + amdgpu_ip_version(adev, MP0_HWIP, 0) == + IP_VERSION(11, 0, 12)) && (ucode->ucode_id == AMDGPU_UCODE_ID_SDMA1 || ucode->ucode_id == AMDGPU_UCODE_ID_SDMA2 || ucode->ucode_id == AMDGPU_UCODE_ID_SDMA3)) /* PSP only receive one SDMA fw for sienna_cichlid, - * as all four sdma fw are same */ + * as all four sdma fw are same + */ continue; psp_print_fw_hdr(psp, ucode); - ret = psp_execute_non_psp_fw_load(psp, ucode); + ret = psp_execute_ip_fw_load(psp, ucode); if (ret) return ret; @@ -2652,8 +2763,8 @@ static int psp_load_fw(struct amdgpu_device *adev) if (adev->gmc.xgmi.num_physical_nodes > 1) { ret = psp_xgmi_initialize(psp, false, true); /* Warning the XGMI seesion initialize failure - * Instead of stop driver initialization - */ + * Instead of stop driver initialization + */ if (ret) dev_err(psp->adev->dev, "XGMI: Failed to initialize XGMI session\n"); @@ -2931,19 +3042,6 @@ int psp_rlc_autoload_start(struct psp_context *psp) return ret; } -int psp_update_vcn_sram(struct amdgpu_device *adev, int inst_idx, - uint64_t cmd_gpu_addr, int cmd_size) -{ - struct amdgpu_firmware_info ucode = {0}; - - ucode.ucode_id = inst_idx ? AMDGPU_UCODE_ID_VCN1_RAM : - AMDGPU_UCODE_ID_VCN0_RAM; - ucode.mc_addr = cmd_gpu_addr; - ucode.ucode_size = cmd_size; - - return psp_execute_non_psp_fw_load(&adev->psp, &ucode); -} - int psp_ring_cmd_submit(struct psp_context *psp, uint64_t cmd_buf_mc_addr, uint64_t fence_mc_addr, @@ -3135,7 +3233,7 @@ static int psp_init_sos_base_fw(struct amdgpu_device *adev) le32_to_cpu(sos_hdr->header.ucode_array_offset_bytes); if (adev->gmc.xgmi.connected_to_cpu || - (adev->ip_versions[MP0_HWIP][0] != IP_VERSION(13, 0, 2))) { + (amdgpu_ip_version(adev, MP0_HWIP, 0) != IP_VERSION(13, 0, 2))) { adev->psp.sos.fw_version = le32_to_cpu(sos_hdr->header.ucode_version); adev->psp.sos.feature_version = le32_to_cpu(sos_hdr->sos.fw_version); @@ -3584,6 +3682,11 @@ void psp_copy_fw(struct psp_context *psp, uint8_t *start_addr, uint32_t bin_size drm_dev_exit(idx); } +/** + * DOC: usbc_pd_fw + * Reading from this file will retrieve the USB-C PD firmware version. Writing to + * this file will trigger the update process. + */ static DEVICE_ATTR(usbc_pd_fw, 0644, psp_usbc_pd_fw_sysfs_read, psp_usbc_pd_fw_sysfs_write); @@ -3624,7 +3727,7 @@ static ssize_t amdgpu_psp_vbflash_write(struct file *filp, struct kobject *kobj, adev->psp.vbflash_image_size += count; mutex_unlock(&adev->psp.mutex); - dev_info(adev->dev, "VBIOS flash write PSP done"); + dev_dbg(adev->dev, "IFWI staged for update"); return count; } @@ -3644,7 +3747,7 @@ static ssize_t amdgpu_psp_vbflash_read(struct file *filp, struct kobject *kobj, if (adev->psp.vbflash_image_size == 0) return -EINVAL; - dev_info(adev->dev, "VBIOS flash to PSP started"); + dev_dbg(adev->dev, "PSP IFWI flash process initiated"); ret = amdgpu_bo_create_kernel(adev, adev->psp.vbflash_image_size, AMDGPU_GPU_PAGE_SIZE, @@ -3669,14 +3772,32 @@ rel_buf: adev->psp.vbflash_image_size = 0; if (ret) { - dev_err(adev->dev, "Failed to load VBIOS FW, err = %d", ret); + dev_err(adev->dev, "Failed to load IFWI, err = %d", ret); return ret; } - dev_info(adev->dev, "VBIOS flash to PSP done"); + dev_dbg(adev->dev, "PSP IFWI flash process done"); return 0; } +/** + * DOC: psp_vbflash + * Writing to this file will stage an IFWI for update. Reading from this file + * will trigger the update process. + */ +static struct bin_attribute psp_vbflash_bin_attr = { + .attr = {.name = "psp_vbflash", .mode = 0660}, + .size = 0, + .write = amdgpu_psp_vbflash_write, + .read = amdgpu_psp_vbflash_read, +}; + +/** + * DOC: psp_vbflash_status + * The status of the flash process. + * 0: IFWI flash not complete. + * 1: IFWI flash complete. + */ static ssize_t amdgpu_psp_vbflash_status(struct device *dev, struct device_attribute *attr, char *buf) @@ -3693,39 +3814,49 @@ static ssize_t amdgpu_psp_vbflash_status(struct device *dev, return sysfs_emit(buf, "0x%x\n", vbflash_status); } +static DEVICE_ATTR(psp_vbflash_status, 0440, amdgpu_psp_vbflash_status, NULL); -static const struct bin_attribute psp_vbflash_bin_attr = { - .attr = {.name = "psp_vbflash", .mode = 0660}, - .size = 0, - .write = amdgpu_psp_vbflash_write, - .read = amdgpu_psp_vbflash_read, +static struct bin_attribute *bin_flash_attrs[] = { + &psp_vbflash_bin_attr, + NULL }; -static DEVICE_ATTR(psp_vbflash_status, 0440, amdgpu_psp_vbflash_status, NULL); +static struct attribute *flash_attrs[] = { + &dev_attr_psp_vbflash_status.attr, + &dev_attr_usbc_pd_fw.attr, + NULL +}; -int amdgpu_psp_sysfs_init(struct amdgpu_device *adev) +static umode_t amdgpu_flash_attr_is_visible(struct kobject *kobj, struct attribute *attr, int idx) { - int ret = 0; + struct device *dev = kobj_to_dev(kobj); + struct drm_device *ddev = dev_get_drvdata(dev); + struct amdgpu_device *adev = drm_to_adev(ddev); - if (amdgpu_sriov_vf(adev)) - return -EINVAL; + if (attr == &dev_attr_usbc_pd_fw.attr) + return adev->psp.sup_pd_fw_up ? 0660 : 0; - switch (adev->ip_versions[MP0_HWIP][0]) { - case IP_VERSION(13, 0, 0): - case IP_VERSION(13, 0, 7): - case IP_VERSION(13, 0, 10): - ret = sysfs_create_bin_file(&adev->dev->kobj, &psp_vbflash_bin_attr); - if (ret) - dev_err(adev->dev, "Failed to create device file psp_vbflash"); - ret = device_create_file(adev->dev, &dev_attr_psp_vbflash_status); - if (ret) - dev_err(adev->dev, "Failed to create device file psp_vbflash_status"); - return ret; - default: - return 0; - } + return adev->psp.sup_ifwi_up ? 0440 : 0; +} + +static umode_t amdgpu_bin_flash_attr_is_visible(struct kobject *kobj, + struct bin_attribute *attr, + int idx) +{ + struct device *dev = kobj_to_dev(kobj); + struct drm_device *ddev = dev_get_drvdata(dev); + struct amdgpu_device *adev = drm_to_adev(ddev); + + return adev->psp.sup_ifwi_up ? 0660 : 0; } +const struct attribute_group amdgpu_flash_attr_group = { + .attrs = flash_attrs, + .bin_attrs = bin_flash_attrs, + .is_bin_visible = amdgpu_bin_flash_attr_is_visible, + .is_visible = amdgpu_flash_attr_is_visible, +}; + const struct amd_ip_funcs psp_ip_funcs = { .name = "psp", .early_init = psp_early_init, @@ -3744,27 +3875,6 @@ const struct amd_ip_funcs psp_ip_funcs = { .set_powergating_state = psp_set_powergating_state, }; -static int psp_sysfs_init(struct amdgpu_device *adev) -{ - int ret = device_create_file(adev->dev, &dev_attr_usbc_pd_fw); - - if (ret) - DRM_ERROR("Failed to create USBC PD FW control file!"); - - return ret; -} - -void amdgpu_psp_sysfs_fini(struct amdgpu_device *adev) -{ - sysfs_remove_bin_file(&adev->dev->kobj, &psp_vbflash_bin_attr); - device_remove_file(adev->dev, &dev_attr_psp_vbflash_status); -} - -static void psp_sysfs_fini(struct amdgpu_device *adev) -{ - device_remove_file(adev->dev, &dev_attr_usbc_pd_fw); -} - const struct amdgpu_ip_block_version psp_v3_1_ip_block = { .type = AMD_IP_BLOCK_TYPE_PSP, .major = 3, diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.h index 2cae0b1a0b8a..c4d9cbde55b9 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.h @@ -39,6 +39,8 @@ #define PSP_TMR_ALIGNMENT 0x100000 #define PSP_FW_NAME_LEN 0x24 +extern const struct attribute_group amdgpu_flash_attr_group; + enum psp_shared_mem_size { PSP_ASD_SHARED_MEM_SIZE = 0x0, PSP_XGMI_SHARED_MEM_SIZE = 0x4000, @@ -78,8 +80,7 @@ enum psp_bootloader_cmd { PSP_BL__LOAD_TOS_SPL_TABLE = 0x10000000, }; -enum psp_ring_type -{ +enum psp_ring_type { PSP_RING_TYPE__INVALID = 0, /* * These values map to the way the PSP kernel identifies the @@ -89,8 +90,7 @@ enum psp_ring_type PSP_RING_TYPE__KM = 2 /* Kernel mode ring (formerly called GPCOM) */ }; -struct psp_ring -{ +struct psp_ring { enum psp_ring_type ring_type; struct psp_gfx_rb_frame *ring_mem; uint64_t ring_mem_mc_addr; @@ -107,9 +107,9 @@ enum psp_reg_prog_id { PSP_REG_LAST }; -struct psp_funcs -{ +struct psp_funcs { int (*init_microcode)(struct psp_context *psp); + int (*wait_for_bootloader)(struct psp_context *psp); int (*bootloader_load_kdb)(struct psp_context *psp); int (*bootloader_load_spl)(struct psp_context *psp); int (*bootloader_load_sysdrv)(struct psp_context *psp); @@ -133,6 +133,8 @@ struct psp_funcs int (*read_usbc_pd_fw)(struct psp_context *psp, uint32_t *fw_ver); int (*update_spirom)(struct psp_context *psp, uint64_t fw_pri_mc_addr); int (*vbflash_stat)(struct psp_context *psp); + int (*fatal_error_recovery_quirk)(struct psp_context *psp); + int (*query_boot_status)(struct psp_context *psp); }; struct ta_funcs { @@ -148,6 +150,7 @@ struct psp_xgmi_node_info { uint8_t is_sharing_enabled; enum ta_xgmi_assigned_sdma_engine sdma_engine; uint8_t num_links; + struct xgmi_connected_port_num port_num[TA_XGMI__MAX_PORT_NUM]; }; struct psp_xgmi_topology_info { @@ -188,6 +191,7 @@ struct psp_xgmi_context { struct ta_context context; struct psp_xgmi_topology_info top_info; bool supports_extended_data; + uint8_t xgmi_ta_caps; }; struct psp_ras_context { @@ -307,10 +311,9 @@ struct psp_runtime_scpm_entry { enum psp_runtime_scpm_authentication scpm_status; }; -struct psp_context -{ - struct amdgpu_device *adev; - struct psp_ring km_ring; +struct psp_context { + struct amdgpu_device *adev; + struct psp_ring km_ring; struct psp_gfx_cmd_resp *cmd; const struct psp_funcs *funcs; @@ -339,7 +342,7 @@ struct psp_context uint64_t tmr_mc_addr; /* asd firmware */ - const struct firmware *asd_fw; + const struct firmware *asd_fw; /* toc firmware */ const struct firmware *toc_fw; @@ -384,9 +387,13 @@ struct psp_context uint32_t boot_cfg_bitmask; - char *vbflash_tmp_buf; - size_t vbflash_image_size; - bool vbflash_done; + /* firmware upgrades supported */ + bool sup_pd_fw_up; + bool sup_ifwi_up; + + char *vbflash_tmp_buf; + size_t vbflash_image_size; + bool vbflash_done; }; struct amdgpu_psp_funcs { @@ -443,6 +450,10 @@ struct amdgpu_psp_funcs { ((psp)->funcs->vbflash_stat ? \ (psp)->funcs->vbflash_stat((psp)) : -EINVAL) +#define psp_fatal_error_recovery_quirk(psp) \ + ((psp)->funcs->fatal_error_recovery_quirk ? \ + (psp)->funcs->fatal_error_recovery_quirk((psp)) : 0) + extern const struct amd_ip_funcs psp_ip_funcs; extern const struct amdgpu_ip_block_version psp_v3_1_ip_block; @@ -458,9 +469,10 @@ extern int psp_wait_for(struct psp_context *psp, uint32_t reg_index, extern int psp_wait_for_spirom_update(struct psp_context *psp, uint32_t reg_index, uint32_t field_val, uint32_t mask, uint32_t msec_timeout); +int psp_execute_ip_fw_load(struct psp_context *psp, + struct amdgpu_firmware_info *ucode); + int psp_gpu_reset(struct amdgpu_device *adev); -int psp_update_vcn_sram(struct amdgpu_device *adev, int inst_idx, - uint64_t cmd_gpu_addr, int cmd_size); int psp_ta_init_shared_buf(struct psp_context *psp, struct ta_mem_context *mem_ctx); @@ -525,6 +537,8 @@ int psp_spatial_partition(struct psp_context *psp, int mode); int is_psp_fw_valid(struct psp_bin_desc bin); -int amdgpu_psp_sysfs_init(struct amdgpu_device *adev); -void amdgpu_psp_sysfs_fini(struct amdgpu_device *adev); +int amdgpu_psp_wait_for_bootloader(struct amdgpu_device *adev); + +int amdgpu_psp_query_boot_status(struct amdgpu_device *adev); + #endif diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c index 8aaa427f8c0f..63fb4cd85e53 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c @@ -28,6 +28,7 @@ #include <linux/reboot.h> #include <linux/syscalls.h> #include <linux/pm_runtime.h> +#include <linux/list_sort.h> #include "amdgpu.h" #include "amdgpu_ras.h" @@ -35,6 +36,7 @@ #include "amdgpu_xgmi.h" #include "ivsrcid/nbio/irqsrcs_nbif_7_4.h" #include "nbio_v4_3.h" +#include "nbio_v7_9.h" #include "atom.h" #include "amdgpu_reset.h" @@ -151,8 +153,9 @@ static bool amdgpu_ras_get_error_query_ready(struct amdgpu_device *adev) static int amdgpu_reserve_page_direct(struct amdgpu_device *adev, uint64_t address) { - struct ras_err_data err_data = {0, 0, 0, NULL}; + struct ras_err_data err_data; struct eeprom_table_record err_rec; + int ret; if ((address >= adev->gmc.mc_vram_size) || (address >= RAS_UMC_INJECT_ADDR_LIMIT)) { @@ -169,6 +172,10 @@ static int amdgpu_reserve_page_direct(struct amdgpu_device *adev, uint64_t addre return 0; } + ret = amdgpu_ras_error_data_init(&err_data); + if (ret) + return ret; + memset(&err_rec, 0x0, sizeof(struct eeprom_table_record)); err_data.err_addr = &err_rec; amdgpu_umc_fill_error_record(&err_data, address, address, 0, 0); @@ -179,6 +186,8 @@ static int amdgpu_reserve_page_direct(struct amdgpu_device *adev, uint64_t addre amdgpu_ras_save_bad_pages(adev, NULL); } + amdgpu_ras_error_data_fini(&err_data); + dev_warn(adev->dev, "WARNING: THIS IS ONLY FOR TEST PURPOSES AND WILL CORRUPT RAS EEPROM\n"); dev_warn(adev->dev, "Clear EEPROM:\n"); dev_warn(adev->dev, " echo 1 > /sys/kernel/debug/dri/0/ras/ras_eeprom_reset\n"); @@ -200,8 +209,8 @@ static ssize_t amdgpu_ras_debugfs_read(struct file *f, char __user *buf, return -EINVAL; /* Hardware counter will be reset automatically after the query on Vega20 and Arcturus */ - if (obj->adev->ip_versions[MP0_HWIP][0] != IP_VERSION(11, 0, 2) && - obj->adev->ip_versions[MP0_HWIP][0] != IP_VERSION(11, 0, 4)) { + if (amdgpu_ip_version(obj->adev, MP0_HWIP, 0) != IP_VERSION(11, 0, 2) && + amdgpu_ip_version(obj->adev, MP0_HWIP, 0) != IP_VERSION(11, 0, 4)) { if (amdgpu_ras_reset_error_status(obj->adev, info.head.block)) dev_warn(obj->adev->dev, "Failed to reset error counter and error status"); } @@ -610,8 +619,8 @@ static ssize_t amdgpu_ras_sysfs_read(struct device *dev, if (amdgpu_ras_query_error_status(obj->adev, &info)) return -EINVAL; - if (obj->adev->ip_versions[MP0_HWIP][0] != IP_VERSION(11, 0, 2) && - obj->adev->ip_versions[MP0_HWIP][0] != IP_VERSION(11, 0, 4)) { + if (amdgpu_ip_version(obj->adev, MP0_HWIP, 0) != IP_VERSION(11, 0, 2) && + amdgpu_ip_version(obj->adev, MP0_HWIP, 0) != IP_VERSION(11, 0, 4)) { if (amdgpu_ras_reset_error_status(obj->adev, info.head.block)) dev_warn(obj->adev->dev, "Failed to reset error counter and error status"); } @@ -627,8 +636,11 @@ static ssize_t amdgpu_ras_sysfs_read(struct device *dev, static inline void put_obj(struct ras_manager *obj) { - if (obj && (--obj->use == 0)) + if (obj && (--obj->use == 0)) { list_del(&obj->node); + amdgpu_ras_error_data_fini(&obj->err_data); + } + if (obj && (obj->use < 0)) DRM_ERROR("RAS ERROR: Unbalance obj(%s) use\n", get_ras_block_str(&obj->head)); } @@ -658,6 +670,9 @@ static struct ras_manager *amdgpu_ras_create_obj(struct amdgpu_device *adev, if (alive_obj(obj)) return NULL; + if (amdgpu_ras_error_data_init(&obj->err_data)) + return NULL; + obj->head = *head; obj->adev = adev; list_add(&obj->node, &con->head); @@ -757,28 +772,28 @@ static int __amdgpu_ras_feature_enable(struct amdgpu_device *adev, return 0; } -static int amdgpu_ras_check_feature_allowed(struct amdgpu_device *adev, - struct ras_common_if *head) -{ - if (amdgpu_ras_is_feature_allowed(adev, head) || - amdgpu_ras_is_poison_mode_supported(adev)) - return 1; - else - return 0; -} - /* wrapper of psp_ras_enable_features */ int amdgpu_ras_feature_enable(struct amdgpu_device *adev, struct ras_common_if *head, bool enable) { struct amdgpu_ras *con = amdgpu_ras_get_context(adev); union ta_ras_cmd_input *info; - int ret = 0; + int ret; if (!con) return -EINVAL; - if (head->block == AMDGPU_RAS_BLOCK__GFX) { + /* For non-gfx ip, do not enable ras feature if it is not allowed */ + /* For gfx ip, regardless of feature support status, */ + /* Force issue enable or disable ras feature commands */ + if (head->block != AMDGPU_RAS_BLOCK__GFX && + !amdgpu_ras_is_feature_allowed(adev, head)) + return 0; + + /* Only enable gfx ras feature from host side */ + if (head->block == AMDGPU_RAS_BLOCK__GFX && + !amdgpu_sriov_vf(adev) && + !amdgpu_ras_intr_triggered()) { info = kzalloc(sizeof(union ta_ras_cmd_input), GFP_KERNEL); if (!info) return -ENOMEM; @@ -794,32 +809,24 @@ int amdgpu_ras_feature_enable(struct amdgpu_device *adev, .error_type = amdgpu_ras_error_to_ta(head->type), }; } - } - /* Do not enable if it is not allowed. */ - if (enable && !amdgpu_ras_check_feature_allowed(adev, head)) - goto out; - - /* Only enable ras feature operation handle on host side */ - if (head->block == AMDGPU_RAS_BLOCK__GFX && - !amdgpu_sriov_vf(adev) && - !amdgpu_ras_intr_triggered()) { ret = psp_ras_enable_features(&adev->psp, info, enable); if (ret) { dev_err(adev->dev, "ras %s %s failed poison:%d ret:%d\n", enable ? "enable":"disable", get_ras_block_str(head), amdgpu_ras_is_poison_mode_supported(adev), ret); - goto out; + kfree(info); + return ret; } + + kfree(info); } /* setup the obj */ __amdgpu_ras_feature_enable(adev, head, enable); -out: - if (head->block == AMDGPU_RAS_BLOCK__GFX) - kfree(info); - return ret; + + return 0; } /* Only used in device probe stage and called only once. */ @@ -1022,103 +1029,266 @@ static void amdgpu_ras_get_ecc_info(struct amdgpu_device *adev, struct ras_err_d } } -/* query/inject/cure begin */ -int amdgpu_ras_query_error_status(struct amdgpu_device *adev, - struct ras_query_if *info) -{ - struct amdgpu_ras_block_object *block_obj = NULL; - struct ras_manager *obj = amdgpu_ras_find_obj(adev, &info->head); - struct ras_err_data err_data = {0, 0, 0, NULL}; +static void amdgpu_ras_error_print_error_data(struct amdgpu_device *adev, + struct ras_manager *ras_mgr, + struct ras_err_data *err_data, + const char *blk_name, + bool is_ue) +{ + struct amdgpu_smuio_mcm_config_info *mcm_info; + struct ras_err_node *err_node; + struct ras_err_info *err_info; + + if (is_ue) { + for_each_ras_error(err_node, err_data) { + err_info = &err_node->err_info; + mcm_info = &err_info->mcm_info; + if (err_info->ue_count) { + dev_info(adev->dev, "socket: %d, die: %d, " + "%lld new uncorrectable hardware errors detected in %s block\n", + mcm_info->socket_id, + mcm_info->die_id, + err_info->ue_count, + blk_name); + } + } - if (!obj) - return -EINVAL; + for_each_ras_error(err_node, &ras_mgr->err_data) { + err_info = &err_node->err_info; + mcm_info = &err_info->mcm_info; + dev_info(adev->dev, "socket: %d, die: %d, " + "%lld uncorrectable hardware errors detected in total in %s block\n", + mcm_info->socket_id, mcm_info->die_id, err_info->ue_count, blk_name); + } - if (info->head.block == AMDGPU_RAS_BLOCK__UMC) { - amdgpu_ras_get_ecc_info(adev, &err_data); } else { - block_obj = amdgpu_ras_get_ras_block(adev, info->head.block, 0); - if (!block_obj || !block_obj->hw_ops) { - dev_dbg_once(adev->dev, "%s doesn't config RAS function\n", - get_ras_block_str(&info->head)); - return -EINVAL; + for_each_ras_error(err_node, err_data) { + err_info = &err_node->err_info; + mcm_info = &err_info->mcm_info; + if (err_info->ce_count) { + dev_info(adev->dev, "socket: %d, die: %d, " + "%lld new correctable hardware errors detected in %s block, " + "no user action is needed\n", + mcm_info->socket_id, + mcm_info->die_id, + err_info->ce_count, + blk_name); + } } - if (block_obj->hw_ops->query_ras_error_count) - block_obj->hw_ops->query_ras_error_count(adev, &err_data); - - if ((info->head.block == AMDGPU_RAS_BLOCK__SDMA) || - (info->head.block == AMDGPU_RAS_BLOCK__GFX) || - (info->head.block == AMDGPU_RAS_BLOCK__MMHUB)) { - if (block_obj->hw_ops->query_ras_error_status) - block_obj->hw_ops->query_ras_error_status(adev); - } + for_each_ras_error(err_node, &ras_mgr->err_data) { + err_info = &err_node->err_info; + mcm_info = &err_info->mcm_info; + dev_info(adev->dev, "socket: %d, die: %d, " + "%lld correctable hardware errors detected in total in %s block, " + "no user action is needed\n", + mcm_info->socket_id, mcm_info->die_id, err_info->ce_count, blk_name); + } } +} - obj->err_data.ue_count += err_data.ue_count; - obj->err_data.ce_count += err_data.ce_count; +static inline bool err_data_has_source_info(struct ras_err_data *data) +{ + return !list_empty(&data->err_node_list); +} - info->ue_count = obj->err_data.ue_count; - info->ce_count = obj->err_data.ce_count; +static void amdgpu_ras_error_generate_report(struct amdgpu_device *adev, + struct ras_query_if *query_if, + struct ras_err_data *err_data) +{ + struct ras_manager *ras_mgr = amdgpu_ras_find_obj(adev, &query_if->head); + const char *blk_name = get_ras_block_str(&query_if->head); - if (err_data.ce_count) { - if (adev->smuio.funcs && - adev->smuio.funcs->get_socket_id && - adev->smuio.funcs->get_die_id) { + if (err_data->ce_count) { + if (err_data_has_source_info(err_data)) { + amdgpu_ras_error_print_error_data(adev, ras_mgr, err_data, blk_name, false); + } else if (!adev->aid_mask && + adev->smuio.funcs && + adev->smuio.funcs->get_socket_id && + adev->smuio.funcs->get_die_id) { dev_info(adev->dev, "socket: %d, die: %d " - "%ld correctable hardware errors " - "detected in %s block, no user " - "action is needed.\n", - adev->smuio.funcs->get_socket_id(adev), - adev->smuio.funcs->get_die_id(adev), - obj->err_data.ce_count, - get_ras_block_str(&info->head)); + "%ld correctable hardware errors " + "detected in %s block, no user " + "action is needed.\n", + adev->smuio.funcs->get_socket_id(adev), + adev->smuio.funcs->get_die_id(adev), + ras_mgr->err_data.ce_count, + blk_name); } else { dev_info(adev->dev, "%ld correctable hardware errors " - "detected in %s block, no user " - "action is needed.\n", - obj->err_data.ce_count, - get_ras_block_str(&info->head)); + "detected in %s block, no user " + "action is needed.\n", + ras_mgr->err_data.ce_count, + blk_name); } } - if (err_data.ue_count) { - if (adev->smuio.funcs && - adev->smuio.funcs->get_socket_id && - adev->smuio.funcs->get_die_id) { + + if (err_data->ue_count) { + if (err_data_has_source_info(err_data)) { + amdgpu_ras_error_print_error_data(adev, ras_mgr, err_data, blk_name, true); + } else if (!adev->aid_mask && + adev->smuio.funcs && + adev->smuio.funcs->get_socket_id && + adev->smuio.funcs->get_die_id) { dev_info(adev->dev, "socket: %d, die: %d " - "%ld uncorrectable hardware errors " - "detected in %s block\n", - adev->smuio.funcs->get_socket_id(adev), - adev->smuio.funcs->get_die_id(adev), - obj->err_data.ue_count, - get_ras_block_str(&info->head)); + "%ld uncorrectable hardware errors " + "detected in %s block\n", + adev->smuio.funcs->get_socket_id(adev), + adev->smuio.funcs->get_die_id(adev), + ras_mgr->err_data.ue_count, + blk_name); } else { dev_info(adev->dev, "%ld uncorrectable hardware errors " - "detected in %s block\n", - obj->err_data.ue_count, - get_ras_block_str(&info->head)); + "detected in %s block\n", + ras_mgr->err_data.ue_count, + blk_name); } } +} + +static void amdgpu_rasmgr_error_data_statistic_update(struct ras_manager *obj, struct ras_err_data *err_data) +{ + struct ras_err_node *err_node; + struct ras_err_info *err_info; + + if (err_data_has_source_info(err_data)) { + for_each_ras_error(err_node, err_data) { + err_info = &err_node->err_info; + + amdgpu_ras_error_statistic_ce_count(&obj->err_data, &err_info->mcm_info, err_info->ce_count); + amdgpu_ras_error_statistic_ue_count(&obj->err_data, &err_info->mcm_info, err_info->ue_count); + } + } else { + /* for legacy asic path which doesn't has error source info */ + obj->err_data.ue_count += err_data->ue_count; + obj->err_data.ce_count += err_data->ce_count; + } +} + +static int amdgpu_ras_query_error_status_helper(struct amdgpu_device *adev, + struct ras_query_if *info, + struct ras_err_data *err_data, + unsigned int error_query_mode) +{ + enum amdgpu_ras_block blk = info ? info->head.block : AMDGPU_RAS_BLOCK_COUNT; + struct amdgpu_ras_block_object *block_obj = NULL; + + if (error_query_mode == AMDGPU_RAS_INVALID_ERROR_QUERY) + return -EINVAL; + + if (error_query_mode == AMDGPU_RAS_DIRECT_ERROR_QUERY) { + if (info->head.block == AMDGPU_RAS_BLOCK__UMC) { + amdgpu_ras_get_ecc_info(adev, err_data); + } else { + block_obj = amdgpu_ras_get_ras_block(adev, info->head.block, 0); + if (!block_obj || !block_obj->hw_ops) { + dev_dbg_once(adev->dev, "%s doesn't config RAS function\n", + get_ras_block_str(&info->head)); + return -EINVAL; + } + + if (block_obj->hw_ops->query_ras_error_count) + block_obj->hw_ops->query_ras_error_count(adev, err_data); + + if ((info->head.block == AMDGPU_RAS_BLOCK__SDMA) || + (info->head.block == AMDGPU_RAS_BLOCK__GFX) || + (info->head.block == AMDGPU_RAS_BLOCK__MMHUB)) { + if (block_obj->hw_ops->query_ras_error_status) + block_obj->hw_ops->query_ras_error_status(adev); + } + } + } else { + /* FIXME: add code to check return value later */ + amdgpu_mca_smu_log_ras_error(adev, blk, AMDGPU_MCA_ERROR_TYPE_UE, err_data); + amdgpu_mca_smu_log_ras_error(adev, blk, AMDGPU_MCA_ERROR_TYPE_CE, err_data); + } + return 0; } -int amdgpu_ras_reset_error_status(struct amdgpu_device *adev, - enum amdgpu_ras_block block) +/* query/inject/cure begin */ +int amdgpu_ras_query_error_status(struct amdgpu_device *adev, struct ras_query_if *info) { - struct amdgpu_ras_block_object *block_obj = amdgpu_ras_get_ras_block(adev, block, 0); + struct ras_manager *obj = amdgpu_ras_find_obj(adev, &info->head); + struct ras_err_data err_data; + unsigned int error_query_mode; + int ret; - if (!amdgpu_ras_is_supported(adev, block)) + if (!obj) return -EINVAL; - if (!block_obj || !block_obj->hw_ops) { - dev_dbg_once(adev->dev, "%s doesn't config RAS function\n", - ras_block_str(block)); + ret = amdgpu_ras_error_data_init(&err_data); + if (ret) + return ret; + + if (!amdgpu_ras_get_error_query_mode(adev, &error_query_mode)) return -EINVAL; + + ret = amdgpu_ras_query_error_status_helper(adev, info, + &err_data, + error_query_mode); + if (ret) + goto out_fini_err_data; + + amdgpu_rasmgr_error_data_statistic_update(obj, &err_data); + + info->ue_count = obj->err_data.ue_count; + info->ce_count = obj->err_data.ce_count; + + amdgpu_ras_error_generate_report(adev, info, &err_data); + +out_fini_err_data: + amdgpu_ras_error_data_fini(&err_data); + + return ret; +} + +int amdgpu_ras_reset_error_count(struct amdgpu_device *adev, + enum amdgpu_ras_block block) +{ + struct amdgpu_ras_block_object *block_obj = amdgpu_ras_get_ras_block(adev, block, 0); + struct amdgpu_ras *ras = amdgpu_ras_get_context(adev); + const struct amdgpu_mca_smu_funcs *mca_funcs = adev->mca.mca_funcs; + struct amdgpu_hive_info *hive; + int hive_ras_recovery = 0; + + if (!block_obj || !block_obj->hw_ops) { + dev_dbg_once(adev->dev, "%s doesn't config RAS function\n", + ras_block_str(block)); + return -EOPNOTSUPP; + } + + if (!amdgpu_ras_is_supported(adev, block) || + !amdgpu_ras_get_mca_debug_mode(adev)) + return -EOPNOTSUPP; + + hive = amdgpu_get_xgmi_hive(adev); + if (hive) { + hive_ras_recovery = atomic_read(&hive->ras_recovery); + amdgpu_put_xgmi_hive(hive); } + /* skip ras error reset in gpu reset */ + if ((amdgpu_in_reset(adev) || atomic_read(&ras->in_recovery) || + hive_ras_recovery) && + mca_funcs && mca_funcs->mca_set_debug_mode) + return -EOPNOTSUPP; + if (block_obj->hw_ops->reset_ras_error_count) block_obj->hw_ops->reset_ras_error_count(adev); + return 0; +} + +int amdgpu_ras_reset_error_status(struct amdgpu_device *adev, + enum amdgpu_ras_block block) +{ + struct amdgpu_ras_block_object *block_obj = amdgpu_ras_get_ras_block(adev, block, 0); + + if (amdgpu_ras_reset_error_count(adev, block) == -EOPNOTSUPP) + return 0; + if ((block == AMDGPU_RAS_BLOCK__GFX) || (block == AMDGPU_RAS_BLOCK__MMHUB)) { if (block_obj->hw_ops->reset_ras_error_status) @@ -1159,7 +1329,8 @@ int amdgpu_ras_error_inject(struct amdgpu_device *adev, } /* Calculate XGMI relative offset */ - if (adev->gmc.xgmi.num_physical_nodes > 1) { + if (adev->gmc.xgmi.num_physical_nodes > 1 && + info->head.block != AMDGPU_RAS_BLOCK__GFX) { block_info.address = amdgpu_xgmi_get_relative_phy_addr(adev, block_info.address); @@ -1213,8 +1384,8 @@ static int amdgpu_ras_query_error_count_helper(struct amdgpu_device *adev, /* some hardware/IP supports read to clear * no need to explictly reset the err status after the query call */ - if (adev->ip_versions[MP0_HWIP][0] != IP_VERSION(11, 0, 2) && - adev->ip_versions[MP0_HWIP][0] != IP_VERSION(11, 0, 4)) { + if (amdgpu_ip_version(adev, MP0_HWIP, 0) != IP_VERSION(11, 0, 2) && + amdgpu_ip_version(adev, MP0_HWIP, 0) != IP_VERSION(11, 0, 4)) { if (amdgpu_ras_reset_error_status(adev, query_info->head.block)) dev_warn(adev->dev, "Failed to reset error counter and error status\n"); @@ -1374,20 +1545,39 @@ static ssize_t amdgpu_ras_sysfs_features_read(struct device *dev, return sysfs_emit(buf, "feature mask: 0x%x\n", con->features); } +static ssize_t amdgpu_ras_sysfs_version_show(struct device *dev, + struct device_attribute *attr, char *buf) +{ + struct amdgpu_ras *con = + container_of(attr, struct amdgpu_ras, version_attr); + return sysfs_emit(buf, "table version: 0x%x\n", con->eeprom_control.tbl_hdr.version); +} + +static ssize_t amdgpu_ras_sysfs_schema_show(struct device *dev, + struct device_attribute *attr, char *buf) +{ + struct amdgpu_ras *con = + container_of(attr, struct amdgpu_ras, schema_attr); + return sysfs_emit(buf, "schema: 0x%x\n", con->schema); +} + static void amdgpu_ras_sysfs_remove_bad_page_node(struct amdgpu_device *adev) { struct amdgpu_ras *con = amdgpu_ras_get_context(adev); - sysfs_remove_file_from_group(&adev->dev->kobj, + if (adev->dev->kobj.sd) + sysfs_remove_file_from_group(&adev->dev->kobj, &con->badpages_attr.attr, RAS_FS_NAME); } -static int amdgpu_ras_sysfs_remove_feature_node(struct amdgpu_device *adev) +static int amdgpu_ras_sysfs_remove_dev_attr_node(struct amdgpu_device *adev) { struct amdgpu_ras *con = amdgpu_ras_get_context(adev); struct attribute *attrs[] = { &con->features_attr.attr, + &con->version_attr.attr, + &con->schema_attr.attr, NULL }; struct attribute_group group = { @@ -1395,7 +1585,8 @@ static int amdgpu_ras_sysfs_remove_feature_node(struct amdgpu_device *adev) .attrs = attrs, }; - sysfs_remove_group(&adev->dev->kobj, &group); + if (adev->dev->kobj.sd) + sysfs_remove_group(&adev->dev->kobj, &group); return 0; } @@ -1442,7 +1633,8 @@ int amdgpu_ras_sysfs_remove(struct amdgpu_device *adev, if (!obj || !obj->attr_inuse) return -EINVAL; - sysfs_remove_file_from_group(&adev->dev->kobj, + if (adev->dev->kobj.sd) + sysfs_remove_file_from_group(&adev->dev->kobj, &obj->sysfs_attr.attr, RAS_FS_NAME); obj->attr_inuse = 0; @@ -1463,7 +1655,7 @@ static int amdgpu_ras_sysfs_remove_all(struct amdgpu_device *adev) if (amdgpu_bad_page_threshold != 0) amdgpu_ras_sysfs_remove_bad_page_node(adev); - amdgpu_ras_sysfs_remove_feature_node(adev); + amdgpu_ras_sysfs_remove_dev_attr_node(adev); return 0; } @@ -1575,6 +1767,8 @@ void amdgpu_ras_debugfs_create_all(struct amdgpu_device *adev) amdgpu_ras_debugfs_create(adev, &fs_info, dir); } } + + amdgpu_mca_smu_debugfs_init(adev, dir); } /* debugfs end */ @@ -1584,6 +1778,10 @@ static BIN_ATTR(gpu_vram_bad_pages, S_IRUGO, amdgpu_ras_sysfs_badpages_read, NULL, 0); static DEVICE_ATTR(features, S_IRUGO, amdgpu_ras_sysfs_features_read, NULL); +static DEVICE_ATTR(version, 0444, + amdgpu_ras_sysfs_version_show, NULL); +static DEVICE_ATTR(schema, 0444, + amdgpu_ras_sysfs_schema_show, NULL); static int amdgpu_ras_fs_init(struct amdgpu_device *adev) { struct amdgpu_ras *con = amdgpu_ras_get_context(adev); @@ -1592,6 +1790,8 @@ static int amdgpu_ras_fs_init(struct amdgpu_device *adev) }; struct attribute *attrs[] = { &con->features_attr.attr, + &con->version_attr.attr, + &con->schema_attr.attr, NULL }; struct bin_attribute *bin_attrs[] = { @@ -1600,11 +1800,20 @@ static int amdgpu_ras_fs_init(struct amdgpu_device *adev) }; int r; + group.attrs = attrs; + /* add features entry */ con->features_attr = dev_attr_features; - group.attrs = attrs; sysfs_attr_init(attrs[0]); + /* add version entry */ + con->version_attr = dev_attr_version; + sysfs_attr_init(attrs[1]); + + /* add schema entry */ + con->schema_attr = dev_attr_schema; + sysfs_attr_init(attrs[2]); + if (amdgpu_bad_page_threshold != 0) { /* add bad_page_features entry */ bin_attr_gpu_vram_bad_pages.private = NULL; @@ -1713,12 +1922,16 @@ static void amdgpu_ras_interrupt_umc_handler(struct ras_manager *obj, struct amdgpu_iv_entry *entry) { struct ras_ih_data *data = &obj->ih_data; - struct ras_err_data err_data = {0, 0, 0, NULL}; + struct ras_err_data err_data; int ret; if (!data->cb) return; + ret = amdgpu_ras_error_data_init(&err_data); + if (ret) + return; + /* Let IP handle its data, maybe we need get the output * from the callback to update the error type/count, etc */ @@ -1735,6 +1948,8 @@ static void amdgpu_ras_interrupt_umc_handler(struct ras_manager *obj, obj->err_data.ue_count += err_data.ue_count; obj->err_data.ce_count += err_data.ce_count; } + + amdgpu_ras_error_data_fini(&err_data); } static void amdgpu_ras_interrupt_handler(struct ras_manager *obj) @@ -1910,14 +2125,18 @@ static void amdgpu_ras_log_on_err_counter(struct amdgpu_device *adev) * should be removed until smu fix handle ecc_info table. */ if ((info.head.block == AMDGPU_RAS_BLOCK__UMC) && - (adev->ip_versions[MP1_HWIP][0] == IP_VERSION(13, 0, 2))) + (amdgpu_ip_version(adev, MP1_HWIP, 0) == + IP_VERSION(13, 0, 2))) continue; amdgpu_ras_query_error_status(adev, &info); - if (adev->ip_versions[MP0_HWIP][0] != IP_VERSION(11, 0, 2) && - adev->ip_versions[MP0_HWIP][0] != IP_VERSION(11, 0, 4) && - adev->ip_versions[MP0_HWIP][0] != IP_VERSION(13, 0, 0)) { + if (amdgpu_ip_version(adev, MP0_HWIP, 0) != + IP_VERSION(11, 0, 2) && + amdgpu_ip_version(adev, MP0_HWIP, 0) != + IP_VERSION(11, 0, 4) && + amdgpu_ip_version(adev, MP0_HWIP, 0) != + IP_VERSION(13, 0, 0)) { if (amdgpu_ras_reset_error_status(adev, info.head.block)) dev_warn(adev->dev, "Failed to reset error counter and error status"); } @@ -2026,9 +2245,11 @@ static void amdgpu_ras_do_recovery(struct work_struct *work) struct amdgpu_device *remote_adev = NULL; struct amdgpu_device *adev = ras->adev; struct list_head device_list, *device_list_handle = NULL; + struct amdgpu_hive_info *hive = amdgpu_get_xgmi_hive(adev); + if (hive) + atomic_set(&hive->ras_recovery, 1); if (!ras->disable_ras_err_cnt_harvest) { - struct amdgpu_hive_info *hive = amdgpu_get_xgmi_hive(adev); /* Build list of devices to query RAS related errors */ if (hive && adev->gmc.xgmi.num_physical_nodes > 1) { @@ -2045,7 +2266,6 @@ static void amdgpu_ras_do_recovery(struct work_struct *work) amdgpu_ras_log_on_err_counter(remote_adev); } - amdgpu_put_xgmi_hive(hive); } if (amdgpu_device_should_recover_gpu(ras->adev)) { @@ -2072,12 +2292,18 @@ static void amdgpu_ras_do_recovery(struct work_struct *work) if (ras->gpu_reset_flags & AMDGPU_RAS_GPU_RESET_MODE1_RESET) { ras->gpu_reset_flags &= ~AMDGPU_RAS_GPU_RESET_MODE1_RESET; set_bit(AMDGPU_NEED_FULL_RESET, &reset_context.flags); + + psp_fatal_error_recovery_quirk(&adev->psp); } } amdgpu_device_gpu_recover(ras->adev, NULL, &reset_context); } atomic_set(&ras->in_recovery, 0); + if (hive) { + atomic_set(&hive->ras_recovery, 0); + amdgpu_put_xgmi_hive(hive); + } } /* alloc/realloc bps array */ @@ -2403,8 +2629,9 @@ static int amdgpu_ras_recovery_fini(struct amdgpu_device *adev) static bool amdgpu_ras_asic_supported(struct amdgpu_device *adev) { if (amdgpu_sriov_vf(adev)) { - switch (adev->ip_versions[MP0_HWIP][0]) { + switch (amdgpu_ip_version(adev, MP0_HWIP, 0)) { case IP_VERSION(13, 0, 2): + case IP_VERSION(13, 0, 6): return true; default: return false; @@ -2412,8 +2639,9 @@ static bool amdgpu_ras_asic_supported(struct amdgpu_device *adev) } if (adev->asic_type == CHIP_IP_DISCOVERY) { - switch (adev->ip_versions[MP0_HWIP][0]) { + switch (amdgpu_ip_version(adev, MP0_HWIP, 0)) { case IP_VERSION(13, 0, 0): + case IP_VERSION(13, 0, 6): case IP_VERSION(13, 0, 10): return true; default: @@ -2440,10 +2668,10 @@ static void amdgpu_ras_get_quirks(struct amdgpu_device *adev) if (!ctx) return; - if (strnstr(ctx->vbios_version, "D16406", - sizeof(ctx->vbios_version)) || - strnstr(ctx->vbios_version, "D36002", - sizeof(ctx->vbios_version))) + if (strnstr(ctx->vbios_pn, "D16406", + sizeof(ctx->vbios_pn)) || + strnstr(ctx->vbios_pn, "D36002", + sizeof(ctx->vbios_pn))) adev->ras_hw_enabled |= (1 << AMDGPU_RAS_BLOCK__GFX); } @@ -2485,8 +2713,12 @@ static void amdgpu_ras_check_supported(struct amdgpu_device *adev) /* VCN/JPEG RAS can be supported on both bare metal and * SRIOV environment */ - if (adev->ip_versions[VCN_HWIP][0] == IP_VERSION(2, 6, 0) || - adev->ip_versions[VCN_HWIP][0] == IP_VERSION(4, 0, 0)) + if (amdgpu_ip_version(adev, VCN_HWIP, 0) == + IP_VERSION(2, 6, 0) || + amdgpu_ip_version(adev, VCN_HWIP, 0) == + IP_VERSION(4, 0, 0) || + amdgpu_ip_version(adev, VCN_HWIP, 0) == + IP_VERSION(4, 0, 3)) adev->ras_hw_enabled |= (1 << AMDGPU_RAS_BLOCK__VCN | 1 << AMDGPU_RAS_BLOCK__JPEG); else @@ -2554,7 +2786,8 @@ static void amdgpu_ras_query_poison_mode(struct amdgpu_device *adev) return; /* Init poison supported flag, the default value is false */ - if (adev->gmc.xgmi.connected_to_cpu) { + if (adev->gmc.xgmi.connected_to_cpu || + adev->gmc.is_app_apu) { /* enabled by default when GPU is connected to CPU */ con->poison_supported = true; } else if (adev->df.funcs && @@ -2576,6 +2809,14 @@ static void amdgpu_ras_query_poison_mode(struct amdgpu_device *adev) } } +static int amdgpu_get_ras_schema(struct amdgpu_device *adev) +{ + return amdgpu_ras_is_poison_mode_supported(adev) ? AMDGPU_RAS_ERROR__POISON : 0 | + AMDGPU_RAS_ERROR__SINGLE_CORRECTABLE | + AMDGPU_RAS_ERROR__MULTI_UNCORRECTABLE | + AMDGPU_RAS_ERROR__PARITY; +} + int amdgpu_ras_init(struct amdgpu_device *adev) { struct amdgpu_ras *con = amdgpu_ras_get_context(adev); @@ -2618,6 +2859,7 @@ int amdgpu_ras_init(struct amdgpu_device *adev) con->update_channel_flag = false; con->features = 0; + con->schema = 0; INIT_LIST_HEAD(&con->head); /* Might need get this flag from vbios. */ con->flags = RAS_DEFAULT_FLAGS; @@ -2625,7 +2867,7 @@ int amdgpu_ras_init(struct amdgpu_device *adev) /* initialize nbio ras function ahead of any other * ras functions so hardware fatal error interrupt * can be enabled as early as possible */ - switch (adev->ip_versions[NBIO_HWIP][0]) { + switch (amdgpu_ip_version(adev, NBIO_HWIP, 0)) { case IP_VERSION(7, 4, 0): case IP_VERSION(7, 4, 1): case IP_VERSION(7, 4, 4): @@ -2642,6 +2884,10 @@ int amdgpu_ras_init(struct amdgpu_device *adev) * check DF RAS */ adev->nbio.ras = &nbio_v4_3_ras; break; + case IP_VERSION(7, 9, 0): + if (!adev->gmc.is_app_apu) + adev->nbio.ras = &nbio_v7_9_ras; + break; default: /* nbio ras is not available */ break; @@ -2669,6 +2915,9 @@ int amdgpu_ras_init(struct amdgpu_device *adev) amdgpu_ras_query_poison_mode(adev); + /* Get RAS schema for particular SOC */ + con->schema = amdgpu_get_ras_schema(adev); + if (amdgpu_ras_fs_init(adev)) { r = -EINVAL; goto release_con; @@ -2765,23 +3014,28 @@ int amdgpu_ras_block_late_init(struct amdgpu_device *adev, goto cleanup; } - r = amdgpu_ras_sysfs_create(adev, ras_block); - if (r) - goto interrupt; + if (ras_obj->hw_ops && + (ras_obj->hw_ops->query_ras_error_count || + ras_obj->hw_ops->query_ras_error_status)) { + r = amdgpu_ras_sysfs_create(adev, ras_block); + if (r) + goto interrupt; - /* Those are the cached values at init. - */ - query_info = kzalloc(sizeof(struct ras_query_if), GFP_KERNEL); - if (!query_info) - return -ENOMEM; - memcpy(&query_info->head, ras_block, sizeof(struct ras_common_if)); + /* Those are the cached values at init. + */ + query_info = kzalloc(sizeof(*query_info), GFP_KERNEL); + if (!query_info) + return -ENOMEM; + memcpy(&query_info->head, ras_block, sizeof(struct ras_common_if)); - if (amdgpu_ras_query_error_count(adev, &ce_count, &ue_count, query_info) == 0) { - atomic_set(&con->ras_ce_count, ce_count); - atomic_set(&con->ras_ue_count, ue_count); + if (amdgpu_ras_query_error_count(adev, &ce_count, &ue_count, query_info) == 0) { + atomic_set(&con->ras_ce_count, ce_count); + atomic_set(&con->ras_ue_count, ue_count); + } + + kfree(query_info); } - kfree(query_info); return 0; interrupt: @@ -2958,10 +3212,6 @@ int amdgpu_ras_fini(struct amdgpu_device *adev) void amdgpu_ras_global_ras_isr(struct amdgpu_device *adev) { - amdgpu_ras_check_supported(adev); - if (!adev->ras_hw_enabled) - return; - if (atomic_cmpxchg(&amdgpu_ras_in_intr, 0, 1) == 0) { struct amdgpu_ras *ras = amdgpu_ras_get_context(adev); @@ -3136,6 +3386,10 @@ int amdgpu_ras_is_supported(struct amdgpu_device *adev, * that the ras block supports ras function. */ if (!ret && + (block == AMDGPU_RAS_BLOCK__GFX || + block == AMDGPU_RAS_BLOCK__SDMA || + block == AMDGPU_RAS_BLOCK__VCN || + block == AMDGPU_RAS_BLOCK__JPEG) && amdgpu_ras_is_poison_mode_supported(adev) && amdgpu_ras_get_ras_block(adev, block, 0)) ret = 1; @@ -3152,6 +3406,47 @@ int amdgpu_ras_reset_gpu(struct amdgpu_device *adev) return 0; } +void amdgpu_ras_set_mca_debug_mode(struct amdgpu_device *adev, bool enable) +{ + struct amdgpu_ras *con = amdgpu_ras_get_context(adev); + + if (con) + con->is_mca_debug_mode = enable; +} + +bool amdgpu_ras_get_mca_debug_mode(struct amdgpu_device *adev) +{ + struct amdgpu_ras *con = amdgpu_ras_get_context(adev); + const struct amdgpu_mca_smu_funcs *mca_funcs = adev->mca.mca_funcs; + + if (!con) + return false; + + if (mca_funcs && mca_funcs->mca_set_debug_mode) + return con->is_mca_debug_mode; + else + return true; +} + +bool amdgpu_ras_get_error_query_mode(struct amdgpu_device *adev, + unsigned int *error_query_mode) +{ + struct amdgpu_ras *con = amdgpu_ras_get_context(adev); + const struct amdgpu_mca_smu_funcs *mca_funcs = adev->mca.mca_funcs; + + if (!con) { + *error_query_mode = AMDGPU_RAS_INVALID_ERROR_QUERY; + return false; + } + + if (mca_funcs && mca_funcs->mca_set_debug_mode) + *error_query_mode = + (con->is_mca_debug_mode) ? AMDGPU_RAS_DIRECT_ERROR_QUERY : AMDGPU_RAS_FIRMWARE_ERROR_QUERY; + else + *error_query_mode = AMDGPU_RAS_DIRECT_ERROR_QUERY; + + return true; +} /* Register each ip ras block into amdgpu ras */ int amdgpu_ras_register_ras_block(struct amdgpu_device *adev, @@ -3311,3 +3606,141 @@ void amdgpu_ras_inst_reset_ras_error_count(struct amdgpu_device *adev, WREG32(err_status_hi_offset, 0); } } + +int amdgpu_ras_error_data_init(struct ras_err_data *err_data) +{ + memset(err_data, 0, sizeof(*err_data)); + + INIT_LIST_HEAD(&err_data->err_node_list); + + return 0; +} + +static void amdgpu_ras_error_node_release(struct ras_err_node *err_node) +{ + if (!err_node) + return; + + list_del(&err_node->node); + kvfree(err_node); +} + +void amdgpu_ras_error_data_fini(struct ras_err_data *err_data) +{ + struct ras_err_node *err_node, *tmp; + + list_for_each_entry_safe(err_node, tmp, &err_data->err_node_list, node) + amdgpu_ras_error_node_release(err_node); +} + +static struct ras_err_node *amdgpu_ras_error_find_node_by_id(struct ras_err_data *err_data, + struct amdgpu_smuio_mcm_config_info *mcm_info) +{ + struct ras_err_node *err_node; + struct amdgpu_smuio_mcm_config_info *ref_id; + + if (!err_data || !mcm_info) + return NULL; + + for_each_ras_error(err_node, err_data) { + ref_id = &err_node->err_info.mcm_info; + + if (mcm_info->socket_id == ref_id->socket_id && + mcm_info->die_id == ref_id->die_id) + return err_node; + } + + return NULL; +} + +static struct ras_err_node *amdgpu_ras_error_node_new(void) +{ + struct ras_err_node *err_node; + + err_node = kvzalloc(sizeof(*err_node), GFP_KERNEL); + if (!err_node) + return NULL; + + INIT_LIST_HEAD(&err_node->node); + + return err_node; +} + +static int ras_err_info_cmp(void *priv, const struct list_head *a, const struct list_head *b) +{ + struct ras_err_node *nodea = container_of(a, struct ras_err_node, node); + struct ras_err_node *nodeb = container_of(b, struct ras_err_node, node); + struct amdgpu_smuio_mcm_config_info *infoa = &nodea->err_info.mcm_info; + struct amdgpu_smuio_mcm_config_info *infob = &nodeb->err_info.mcm_info; + + if (unlikely(infoa->socket_id != infob->socket_id)) + return infoa->socket_id - infob->socket_id; + else + return infoa->die_id - infob->die_id; + + return 0; +} + +static struct ras_err_info *amdgpu_ras_error_get_info(struct ras_err_data *err_data, + struct amdgpu_smuio_mcm_config_info *mcm_info) +{ + struct ras_err_node *err_node; + + err_node = amdgpu_ras_error_find_node_by_id(err_data, mcm_info); + if (err_node) + return &err_node->err_info; + + err_node = amdgpu_ras_error_node_new(); + if (!err_node) + return NULL; + + memcpy(&err_node->err_info.mcm_info, mcm_info, sizeof(*mcm_info)); + + err_data->err_list_count++; + list_add_tail(&err_node->node, &err_data->err_node_list); + list_sort(NULL, &err_data->err_node_list, ras_err_info_cmp); + + return &err_node->err_info; +} + +int amdgpu_ras_error_statistic_ue_count(struct ras_err_data *err_data, + struct amdgpu_smuio_mcm_config_info *mcm_info, u64 count) +{ + struct ras_err_info *err_info; + + if (!err_data || !mcm_info) + return -EINVAL; + + if (!count) + return 0; + + err_info = amdgpu_ras_error_get_info(err_data, mcm_info); + if (!err_info) + return -EINVAL; + + err_info->ue_count += count; + err_data->ue_count += count; + + return 0; +} + +int amdgpu_ras_error_statistic_ce_count(struct ras_err_data *err_data, + struct amdgpu_smuio_mcm_config_info *mcm_info, u64 count) +{ + struct ras_err_info *err_info; + + if (!err_data || !mcm_info) + return -EINVAL; + + if (!count) + return 0; + + err_info = amdgpu_ras_error_get_info(err_data, mcm_info); + if (!err_info) + return -EINVAL; + + err_info->ce_count += count; + err_data->ce_count += count; + + return 0; +} diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h index ffb49b2d533a..19161916ac46 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h @@ -28,6 +28,7 @@ #include <linux/list.h> #include "ta_ras_if.h" #include "amdgpu_ras_eeprom.h" +#include "amdgpu_smuio.h" struct amdgpu_iv_entry; @@ -319,6 +320,12 @@ enum amdgpu_ras_ret { AMDGPU_RAS_PT, }; +enum amdgpu_ras_error_query_mode { + AMDGPU_RAS_INVALID_ERROR_QUERY = 0, + AMDGPU_RAS_DIRECT_ERROR_QUERY = 1, + AMDGPU_RAS_FIRMWARE_ERROR_QUERY = 2, +}; + /* ras error status reisger fields */ #define ERR_STATUS_LO__ERR_STATUS_VALID_FLAG__SHIFT 0x0 #define ERR_STATUS_LO__ERR_STATUS_VALID_FLAG_MASK 0x00000001L @@ -389,9 +396,12 @@ struct amdgpu_ras { /* ras infrastructure */ /* for ras itself. */ uint32_t features; + uint32_t schema; struct list_head head; /* sysfs */ struct device_attribute features_attr; + struct device_attribute version_attr; + struct device_attribute schema_attr; struct bin_attribute badpages_attr; struct dentry *de_ras_eeprom_table; /* block array */ @@ -430,23 +440,41 @@ struct amdgpu_ras { /* Indicates smu whether need update bad channel info */ bool update_channel_flag; + /* Record status of smu mca debug mode */ + bool is_mca_debug_mode; /* Record special requirements of gpu reset caller */ uint32_t gpu_reset_flags; }; struct ras_fs_data { - char sysfs_name[32]; + char sysfs_name[48]; char debugfs_name[32]; }; +struct ras_err_info { + struct amdgpu_smuio_mcm_config_info mcm_info; + u64 ce_count; + u64 ue_count; +}; + +struct ras_err_node { + struct list_head node; + struct ras_err_info err_info; +}; + struct ras_err_data { unsigned long ue_count; unsigned long ce_count; unsigned long err_addr_cnt; struct eeprom_table_record *err_addr; + u32 err_list_count; + struct list_head err_node_list; }; +#define for_each_ras_error(err_node, err_data) \ + list_for_each_entry(err_node, &(err_data)->err_node_list, node) + struct ras_err_handler_data { /* point to bad page records array */ struct eeprom_table_record *bps; @@ -691,6 +719,8 @@ void amdgpu_ras_debugfs_create_all(struct amdgpu_device *adev); int amdgpu_ras_query_error_status(struct amdgpu_device *adev, struct ras_query_if *info); +int amdgpu_ras_reset_error_count(struct amdgpu_device *adev, + enum amdgpu_ras_block block); int amdgpu_ras_reset_error_status(struct amdgpu_device *adev, enum amdgpu_ras_block block); @@ -743,6 +773,11 @@ struct amdgpu_ras* amdgpu_ras_get_context(struct amdgpu_device *adev); int amdgpu_ras_set_context(struct amdgpu_device *adev, struct amdgpu_ras *ras_con); +void amdgpu_ras_set_mca_debug_mode(struct amdgpu_device *adev, bool enable); +bool amdgpu_ras_get_mca_debug_mode(struct amdgpu_device *adev); +bool amdgpu_ras_get_error_query_mode(struct amdgpu_device *adev, + unsigned int *mode); + int amdgpu_ras_register_ras_block(struct amdgpu_device *adev, struct amdgpu_ras_block_object *ras_block_obj); void amdgpu_ras_interrupt_fatal_error_handler(struct amdgpu_device *adev); @@ -767,4 +802,12 @@ void amdgpu_ras_inst_reset_ras_error_count(struct amdgpu_device *adev, const struct amdgpu_ras_err_status_reg_entry *reg_list, uint32_t reg_list_size, uint32_t instance); + +int amdgpu_ras_error_data_init(struct ras_err_data *err_data); +void amdgpu_ras_error_data_fini(struct ras_err_data *err_data); +int amdgpu_ras_error_statistic_ce_count(struct ras_err_data *err_data, + struct amdgpu_smuio_mcm_config_info *mcm_info, u64 count); +int amdgpu_ras_error_statistic_ue_count(struct ras_err_data *err_data, + struct amdgpu_smuio_mcm_config_info *mcm_info, u64 count); + #endif diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras_eeprom.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras_eeprom.c index 0648dfe559af..2fde93b00cab 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras_eeprom.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras_eeprom.c @@ -149,17 +149,19 @@ RAS_TABLE_HEADER_SIZE - \ RAS_TABLE_V2_1_INFO_SIZE) / RAS_TABLE_RECORD_SIZE) -#define to_amdgpu_device(x) (container_of(x, struct amdgpu_ras, eeprom_control))->adev +#define to_amdgpu_device(x) ((container_of(x, struct amdgpu_ras, eeprom_control))->adev) static bool __is_ras_eeprom_supported(struct amdgpu_device *adev) { - switch (adev->ip_versions[MP1_HWIP][0]) { + switch (amdgpu_ip_version(adev, MP1_HWIP, 0)) { case IP_VERSION(11, 0, 2): /* VEGA20 and ARCTURUS */ case IP_VERSION(11, 0, 7): /* Sienna cichlid */ case IP_VERSION(13, 0, 0): case IP_VERSION(13, 0, 2): /* Aldebaran */ case IP_VERSION(13, 0, 10): return true; + case IP_VERSION(13, 0, 6): + return (adev->gmc.is_app_apu) ? false : true; default: return false; } @@ -189,14 +191,14 @@ static bool __get_eeprom_i2c_addr(struct amdgpu_device *adev, return true; } - switch (adev->ip_versions[MP1_HWIP][0]) { + switch (amdgpu_ip_version(adev, MP1_HWIP, 0)) { case IP_VERSION(11, 0, 2): /* VEGA20 and ARCTURUS */ if (adev->asic_type == CHIP_VEGA20) control->i2c_address = EEPROM_I2C_MADDR_0; - else if (strnstr(atom_ctx->vbios_version, + else if (strnstr(atom_ctx->vbios_pn, "D342", - sizeof(atom_ctx->vbios_version))) + sizeof(atom_ctx->vbios_pn))) control->i2c_address = EEPROM_I2C_MADDR_0; else control->i2c_address = EEPROM_I2C_MADDR_4; @@ -205,13 +207,20 @@ static bool __get_eeprom_i2c_addr(struct amdgpu_device *adev, control->i2c_address = EEPROM_I2C_MADDR_0; return true; case IP_VERSION(13, 0, 2): - if (strnstr(atom_ctx->vbios_version, "D673", - sizeof(atom_ctx->vbios_version))) + if (strnstr(atom_ctx->vbios_pn, "D673", + sizeof(atom_ctx->vbios_pn))) control->i2c_address = EEPROM_I2C_MADDR_4; else control->i2c_address = EEPROM_I2C_MADDR_0; return true; case IP_VERSION(13, 0, 0): + if (strnstr(atom_ctx->vbios_pn, "D707", + sizeof(atom_ctx->vbios_pn))) + control->i2c_address = EEPROM_I2C_MADDR_0; + else + control->i2c_address = EEPROM_I2C_MADDR_4; + return true; + case IP_VERSION(13, 0, 6): case IP_VERSION(13, 0, 10): control->i2c_address = EEPROM_I2C_MADDR_4; return true; @@ -613,7 +622,8 @@ amdgpu_ras_eeprom_append_table(struct amdgpu_ras_eeprom_control *control, __encode_table_record_to_buf(control, &record[i], pp); /* update bad channel bitmap */ - if (!(control->bad_channel_bitmap & (1 << record[i].mem_channel))) { + if ((record[i].mem_channel < BITS_PER_TYPE(control->bad_channel_bitmap)) && + !(control->bad_channel_bitmap & (1 << record[i].mem_channel))) { control->bad_channel_bitmap |= 1 << record[i].mem_channel; con->update_channel_flag = true; } @@ -966,7 +976,8 @@ int amdgpu_ras_eeprom_read(struct amdgpu_ras_eeprom_control *control, __decode_table_record_from_buf(control, &record[i], pp); /* update bad channel bitmap */ - if (!(control->bad_channel_bitmap & (1 << record[i].mem_channel))) { + if ((record[i].mem_channel < BITS_PER_TYPE(control->bad_channel_bitmap)) && + !(control->bad_channel_bitmap & (1 << record[i].mem_channel))) { control->bad_channel_bitmap |= 1 << record[i].mem_channel; con->update_channel_flag = true; } diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_res_cursor.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_res_cursor.h index 5c4f93ee0c57..381101d2bf05 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_res_cursor.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_res_cursor.h @@ -90,6 +90,7 @@ static inline void amdgpu_res_first(struct ttm_resource *res, cur->node = block; break; case TTM_PL_TT: + case AMDGPU_PL_DOORBELL: node = to_ttm_range_mgr_node(res)->mm_nodes; while (start >= node->size << PAGE_SHIFT) start -= node++->size << PAGE_SHIFT; @@ -111,7 +112,6 @@ fallback: cur->remaining = size; cur->node = NULL; WARN_ON(res && start + size > res->size); - return; } /** @@ -152,6 +152,7 @@ static inline void amdgpu_res_next(struct amdgpu_res_cursor *cur, uint64_t size) cur->size = min(amdgpu_vram_mgr_block_size(block), cur->remaining); break; case TTM_PL_TT: + case AMDGPU_PL_DOORBELL: node = cur->node; cur->node = ++node; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_reset.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_reset.c index eec41ad30406..4baa300121d8 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_reset.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_reset.c @@ -21,24 +21,19 @@ * */ +#include <linux/devcoredump.h> +#include <generated/utsrelease.h> + #include "amdgpu_reset.h" #include "aldebaran.h" #include "sienna_cichlid.h" #include "smu_v13_0_10.h" -int amdgpu_reset_add_handler(struct amdgpu_reset_control *reset_ctl, - struct amdgpu_reset_handler *handler) -{ - /* TODO: Check if handler exists? */ - list_add_tail(&handler->handler_list, &reset_ctl->reset_handlers); - return 0; -} - int amdgpu_reset_init(struct amdgpu_device *adev) { int ret = 0; - switch (adev->ip_versions[MP1_HWIP][0]) { + switch (amdgpu_ip_version(adev, MP1_HWIP, 0)) { case IP_VERSION(13, 0, 2): case IP_VERSION(13, 0, 6): ret = aldebaran_reset_init(adev); @@ -60,7 +55,7 @@ int amdgpu_reset_fini(struct amdgpu_device *adev) { int ret = 0; - switch (adev->ip_versions[MP1_HWIP][0]) { + switch (amdgpu_ip_version(adev, MP1_HWIP, 0)) { case IP_VERSION(13, 0, 2): case IP_VERSION(13, 0, 6): ret = aldebaran_reset_fini(adev); @@ -87,7 +82,7 @@ int amdgpu_reset_prepare_hwcontext(struct amdgpu_device *adev, reset_handler = adev->reset_cntl->get_reset_handler( adev->reset_cntl, reset_context); if (!reset_handler) - return -ENOSYS; + return -EOPNOTSUPP; return reset_handler->prepare_hwcontext(adev->reset_cntl, reset_context); @@ -103,7 +98,7 @@ int amdgpu_reset_perform_reset(struct amdgpu_device *adev, reset_handler = adev->reset_cntl->get_reset_handler( adev->reset_cntl, reset_context); if (!reset_handler) - return -ENOSYS; + return -EOPNOTSUPP; ret = reset_handler->perform_reset(adev->reset_cntl, reset_context); if (ret) @@ -167,5 +162,82 @@ void amdgpu_device_unlock_reset_domain(struct amdgpu_reset_domain *reset_domain) up_write(&reset_domain->sem); } +#ifndef CONFIG_DEV_COREDUMP +void amdgpu_coredump(struct amdgpu_device *adev, bool vram_lost, + struct amdgpu_reset_context *reset_context) +{ +} +#else +static ssize_t +amdgpu_devcoredump_read(char *buffer, loff_t offset, size_t count, + void *data, size_t datalen) +{ + struct drm_printer p; + struct amdgpu_coredump_info *coredump = data; + struct drm_print_iterator iter; + int i; + + iter.data = buffer; + iter.offset = 0; + iter.start = offset; + iter.remain = count; + + p = drm_coredump_printer(&iter); + + drm_printf(&p, "**** AMDGPU Device Coredump ****\n"); + drm_printf(&p, "version: " AMDGPU_COREDUMP_VERSION "\n"); + drm_printf(&p, "kernel: " UTS_RELEASE "\n"); + drm_printf(&p, "module: " KBUILD_MODNAME "\n"); + drm_printf(&p, "time: %lld.%09ld\n", coredump->reset_time.tv_sec, + coredump->reset_time.tv_nsec); + + if (coredump->reset_task_info.pid) + drm_printf(&p, "process_name: %s PID: %d\n", + coredump->reset_task_info.process_name, + coredump->reset_task_info.pid); + + if (coredump->reset_vram_lost) + drm_printf(&p, "VRAM is lost due to GPU reset!\n"); + if (coredump->adev->reset_info.num_regs) { + drm_printf(&p, "AMDGPU register dumps:\nOffset: Value:\n"); + + for (i = 0; i < coredump->adev->reset_info.num_regs; i++) + drm_printf(&p, "0x%08x: 0x%08x\n", + coredump->adev->reset_info.reset_dump_reg_list[i], + coredump->adev->reset_info.reset_dump_reg_value[i]); + } + + return count - iter.remain; +} + +static void amdgpu_devcoredump_free(void *data) +{ + kfree(data); +} + +void amdgpu_coredump(struct amdgpu_device *adev, bool vram_lost, + struct amdgpu_reset_context *reset_context) +{ + struct amdgpu_coredump_info *coredump; + struct drm_device *dev = adev_to_drm(adev); + + coredump = kzalloc(sizeof(*coredump), GFP_NOWAIT); + if (!coredump) { + DRM_ERROR("%s: failed to allocate memory for coredump\n", __func__); + return; + } + coredump->reset_vram_lost = vram_lost; + + if (reset_context->job && reset_context->job->vm) + coredump->reset_task_info = reset_context->job->vm->task_info; + + coredump->adev = adev; + + ktime_get_ts64(&coredump->reset_time); + + dev_coredumpm(dev->dev, THIS_MODULE, coredump, 0, GFP_NOWAIT, + amdgpu_devcoredump_read, amdgpu_devcoredump_free); +} +#endif diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_reset.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_reset.h index f4a501ff87d9..b0335a1c5e90 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_reset.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_reset.h @@ -26,6 +26,8 @@ #include "amdgpu.h" +#define AMDGPU_RESET_MAX_HANDLERS 5 + enum AMDGPU_RESET_FLAGS { AMDGPU_NEED_FULL_RESET = 0, @@ -44,7 +46,6 @@ struct amdgpu_reset_context { struct amdgpu_reset_handler { enum amd_reset_method reset_method; - struct list_head handler_list; int (*prepare_env)(struct amdgpu_reset_control *reset_ctl, struct amdgpu_reset_context *context); int (*prepare_hwcontext)(struct amdgpu_reset_control *reset_ctl, @@ -63,7 +64,8 @@ struct amdgpu_reset_control { void *handle; struct work_struct reset_work; struct mutex reset_lock; - struct list_head reset_handlers; + struct amdgpu_reset_handler *( + *reset_handlers)[AMDGPU_RESET_MAX_HANDLERS]; atomic_t in_reset; enum amd_reset_method active_reset; struct amdgpu_reset_handler *(*get_reset_handler)( @@ -87,6 +89,17 @@ struct amdgpu_reset_domain { atomic_t reset_res; }; +#ifdef CONFIG_DEV_COREDUMP + +#define AMDGPU_COREDUMP_VERSION "1" + +struct amdgpu_coredump_info { + struct amdgpu_device *adev; + struct amdgpu_task_info reset_task_info; + struct timespec64 reset_time; + bool reset_vram_lost; +}; +#endif int amdgpu_reset_init(struct amdgpu_device *adev); int amdgpu_reset_fini(struct amdgpu_device *adev); @@ -97,8 +110,10 @@ int amdgpu_reset_prepare_hwcontext(struct amdgpu_device *adev, int amdgpu_reset_perform_reset(struct amdgpu_device *adev, struct amdgpu_reset_context *reset_context); -int amdgpu_reset_add_handler(struct amdgpu_reset_control *reset_ctl, - struct amdgpu_reset_handler *handler); +int amdgpu_reset_prepare_env(struct amdgpu_device *adev, + struct amdgpu_reset_context *reset_context); +int amdgpu_reset_restore_env(struct amdgpu_device *adev, + struct amdgpu_reset_context *reset_context); struct amdgpu_reset_domain *amdgpu_reset_create_reset_domain(enum amdgpu_reset_domain_type type, char *wq_name); @@ -126,4 +141,11 @@ void amdgpu_device_lock_reset_domain(struct amdgpu_reset_domain *reset_domain); void amdgpu_device_unlock_reset_domain(struct amdgpu_reset_domain *reset_domain); +void amdgpu_coredump(struct amdgpu_device *adev, bool vram_lost, + struct amdgpu_reset_context *reset_context); + +#define for_each_handler(i, handler, reset_ctl) \ + for (i = 0; (i < AMDGPU_RESET_MAX_HANDLERS) && \ + (handler = (*reset_ctl->reset_handlers)[i]); \ + ++i) #endif diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c index 80d6e132e409..6a80d3ec887e 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c @@ -434,8 +434,12 @@ bool amdgpu_ring_soft_recovery(struct amdgpu_ring *ring, unsigned int vmid, struct dma_fence *fence) { unsigned long flags; + ktime_t deadline; - ktime_t deadline = ktime_add_us(ktime_get(), 10000); + if (unlikely(ring->adev->debug_disable_soft_recovery)) + return false; + + deadline = ktime_add_us(ktime_get(), 10000); if (amdgpu_sriov_vf(ring->adev) || !ring->funcs->soft_recovery || !fence) return false; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h index 028ff075db51..bbb53720a018 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h @@ -44,6 +44,7 @@ struct amdgpu_vm; #define AMDGPU_MAX_COMPUTE_RINGS 8 #define AMDGPU_MAX_VCE_RINGS 3 #define AMDGPU_MAX_UVD_ENC_RINGS 2 +#define AMDGPU_MAX_VPE_RINGS 2 enum amdgpu_ring_priority_level { AMDGPU_RING_PRIO_0, @@ -77,8 +78,10 @@ enum amdgpu_ring_type { AMDGPU_RING_TYPE_VCN_DEC = AMDGPU_HW_IP_VCN_DEC, AMDGPU_RING_TYPE_VCN_ENC = AMDGPU_HW_IP_VCN_ENC, AMDGPU_RING_TYPE_VCN_JPEG = AMDGPU_HW_IP_VCN_JPEG, + AMDGPU_RING_TYPE_VPE = AMDGPU_HW_IP_VPE, AMDGPU_RING_TYPE_KIQ, - AMDGPU_RING_TYPE_MES + AMDGPU_RING_TYPE_MES, + AMDGPU_RING_TYPE_UMSCH_MM, }; enum amdgpu_ib_pool_type { @@ -389,7 +392,7 @@ static inline void amdgpu_ring_write_multiple(struct amdgpu_ring *ring, occupied = ring->wptr & ring->buf_mask; dst = (void *)&ring->ring[occupied]; chunk1 = ring->buf_mask + 1 - occupied; - chunk1 = (chunk1 >= count_dw) ? count_dw: chunk1; + chunk1 = (chunk1 >= count_dw) ? count_dw : chunk1; chunk2 = count_dw - chunk1; chunk1 <<= 2; chunk2 <<= 2; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring_mux.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring_mux.h index b22d4fb2a847..d3186b570b82 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring_mux.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring_mux.h @@ -56,6 +56,15 @@ enum amdgpu_ring_mux_offset_type { AMDGPU_MUX_OFFSET_TYPE_CE, }; +enum ib_complete_status { + /* IB not started/reset value, default value. */ + IB_COMPLETION_STATUS_DEFAULT = 0, + /* IB preempted, started but not completed. */ + IB_COMPLETION_STATUS_PREEMPTED = 1, + /* IB completed. */ + IB_COMPLETION_STATUS_COMPLETED = 2, +}; + struct amdgpu_ring_mux { struct amdgpu_ring *real_ring; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_rlc.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_rlc.h index 80b263646966..b591d33af264 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_rlc.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_rlc.h @@ -26,6 +26,8 @@ #include "clearstate_defs.h" +#define AMDGPU_MAX_RLC_INSTANCES 8 + /* firmware ID used in rlc toc */ typedef enum _FIRMWARE_ID_ { FIRMWARE_ID_INVALID = 0, @@ -201,7 +203,7 @@ struct amdgpu_rlc { u32 cp_table_size; /* safe mode for updating CG/PG state */ - bool in_safe_mode[8]; + bool in_safe_mode[AMDGPU_MAX_RLC_INSTANCES]; const struct amdgpu_rlc_funcs *funcs; /* for firmware data */ @@ -257,7 +259,7 @@ struct amdgpu_rlc { bool rlcg_reg_access_supported; /* registers for rlcg indirect reg access */ - struct amdgpu_rlcg_reg_access_ctrl reg_access_ctrl; + struct amdgpu_rlcg_reg_access_ctrl reg_access_ctrl[AMDGPU_MAX_RLC_INSTANCES]; }; void amdgpu_gfx_rlc_enter_safe_mode(struct amdgpu_device *adev, int xcc_id); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_sa.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_sa.c index c6b4337eb20c..10df731998b2 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_sa.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_sa.c @@ -81,7 +81,7 @@ int amdgpu_sa_bo_new(struct amdgpu_sa_manager *sa_manager, unsigned int size) { struct drm_suballoc *sa = drm_suballoc_new(&sa_manager->base, size, - GFP_KERNEL, true, 0); + GFP_KERNEL, false, 0); if (IS_ERR(sa)) { *sa_bo = NULL; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_sdma.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_sdma.c index dacf281d2b21..1d9d187de6ee 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_sdma.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_sdma.c @@ -208,7 +208,7 @@ int amdgpu_sdma_init_microcode(struct amdgpu_device *adev, const struct sdma_firmware_header_v2_0 *sdma_hdr; uint16_t version_major; char ucode_prefix[30]; - char fw_name[40]; + char fw_name[52]; amdgpu_ucode_ip_version_decode(adev, SDMA0_HWIP, ucode_prefix, sizeof(ucode_prefix)); if (instance == 0) @@ -239,9 +239,6 @@ int amdgpu_sdma_init_microcode(struct amdgpu_device *adev, sizeof(struct amdgpu_sdma_instance)); } - if (amdgpu_sriov_vf(adev)) - return 0; - DRM_DEBUG("psp_load == '%s'\n", adev->firmware.load_type == AMDGPU_FW_LOAD_PSP ? "true" : "false"); @@ -254,8 +251,11 @@ int amdgpu_sdma_init_microcode(struct amdgpu_device *adev, else { /* Use a single copy per SDMA firmware type. PSP uses the same instance for all * groups of SDMAs */ - if (adev->ip_versions[SDMA0_HWIP][0] == IP_VERSION(4, 4, 2) && - adev->firmware.load_type == AMDGPU_FW_LOAD_PSP && + if (amdgpu_ip_version(adev, SDMA0_HWIP, + 0) == + IP_VERSION(4, 4, 2) && + adev->firmware.load_type == + AMDGPU_FW_LOAD_PSP && adev->sdma.num_inst_per_aid == i) { break; } @@ -292,27 +292,6 @@ out: return err; } -void amdgpu_sdma_unset_buffer_funcs_helper(struct amdgpu_device *adev) -{ - struct amdgpu_ring *sdma; - int i; - - for (i = 0; i < adev->sdma.num_instances; i++) { - if (adev->sdma.has_page_queue) { - sdma = &adev->sdma.instance[i].page; - if (adev->mman.buffer_funcs_ring == sdma) { - amdgpu_ttm_set_buffer_funcs_status(adev, false); - break; - } - } - sdma = &adev->sdma.instance[i].ring; - if (adev->mman.buffer_funcs_ring == sdma) { - amdgpu_ttm_set_buffer_funcs_status(adev, false); - break; - } - } -} - int amdgpu_sdma_ras_sw_init(struct amdgpu_device *adev) { int err = 0; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_sdma.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_sdma.h index 513ac22120c1..173a2a308078 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_sdma.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_sdma.h @@ -169,7 +169,6 @@ int amdgpu_sdma_init_microcode(struct amdgpu_device *adev, u32 instance, bool duplicate); void amdgpu_sdma_destroy_inst_ctx(struct amdgpu_device *adev, bool duplicate); -void amdgpu_sdma_unset_buffer_funcs_helper(struct amdgpu_device *adev); int amdgpu_sdma_ras_sw_init(struct amdgpu_device *adev); #endif diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_smuio.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_smuio.h index 89c38d864471..ff4435181055 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_smuio.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_smuio.h @@ -23,6 +23,18 @@ #ifndef __AMDGPU_SMUIO_H__ #define __AMDGPU_SMUIO_H__ +enum amdgpu_pkg_type { + AMDGPU_PKG_TYPE_APU = 2, + AMDGPU_PKG_TYPE_CEM = 3, + AMDGPU_PKG_TYPE_OAM = 4, + AMDGPU_PKG_TYPE_UNKNOWN, +}; + +struct amdgpu_smuio_mcm_config_info { + int socket_id; + int die_id; +}; + struct amdgpu_smuio_funcs { u32 (*get_rom_index_offset)(struct amdgpu_device *adev); u32 (*get_rom_data_offset)(struct amdgpu_device *adev); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_trace.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_trace.h index 525dffbe046a..2fd1bfb35916 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_trace.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_trace.h @@ -432,7 +432,7 @@ TRACE_EVENT(amdgpu_vm_flush, ), TP_printk("ring=%s, id=%u, hub=%u, pd_addr=%010Lx", __get_str(ring), __entry->vmid, - __entry->vm_hub,__entry->pd_addr) + __entry->vm_hub, __entry->pd_addr) ); DECLARE_EVENT_CLASS(amdgpu_pasid, @@ -494,7 +494,7 @@ TRACE_EVENT(amdgpu_cs_bo_status, ); TRACE_EVENT(amdgpu_bo_move, - TP_PROTO(struct amdgpu_bo* bo, uint32_t new_placement, uint32_t old_placement), + TP_PROTO(struct amdgpu_bo *bo, uint32_t new_placement, uint32_t old_placement), TP_ARGS(bo, new_placement, old_placement), TP_STRUCT__entry( __field(struct amdgpu_bo *, bo) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c index 0534ab716809..ab4a762aed5b 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c @@ -49,7 +49,6 @@ #include <drm/ttm/ttm_tt.h> #include <drm/amdgpu_drm.h> -#include <drm/drm_drv.h> #include "amdgpu.h" #include "amdgpu_object.h" @@ -127,6 +126,7 @@ static void amdgpu_evict_flags(struct ttm_buffer_object *bo, case AMDGPU_PL_GDS: case AMDGPU_PL_GWS: case AMDGPU_PL_OA: + case AMDGPU_PL_DOORBELL: placement->num_placement = 0; placement->num_busy_placement = 0; return; @@ -496,9 +496,11 @@ static int amdgpu_bo_move(struct ttm_buffer_object *bo, bool evict, if (old_mem->mem_type == AMDGPU_PL_GDS || old_mem->mem_type == AMDGPU_PL_GWS || old_mem->mem_type == AMDGPU_PL_OA || + old_mem->mem_type == AMDGPU_PL_DOORBELL || new_mem->mem_type == AMDGPU_PL_GDS || new_mem->mem_type == AMDGPU_PL_GWS || - new_mem->mem_type == AMDGPU_PL_OA) { + new_mem->mem_type == AMDGPU_PL_OA || + new_mem->mem_type == AMDGPU_PL_DOORBELL) { /* Nothing to save here */ ttm_bo_move_null(bo, new_mem); goto out; @@ -582,6 +584,12 @@ static int amdgpu_ttm_io_mem_reserve(struct ttm_device *bdev, mem->bus.offset += adev->gmc.aper_base; mem->bus.is_iomem = true; break; + case AMDGPU_PL_DOORBELL: + mem->bus.offset = mem->start << PAGE_SHIFT; + mem->bus.offset += adev->doorbell.base; + mem->bus.is_iomem = true; + mem->bus.caching = ttm_uncached; + break; default: return -EINVAL; } @@ -596,6 +604,10 @@ static unsigned long amdgpu_ttm_io_mem_pfn(struct ttm_buffer_object *bo, amdgpu_res_first(bo->resource, (u64)page_offset << PAGE_SHIFT, 0, &cursor); + + if (bo->resource->mem_type == AMDGPU_PL_DOORBELL) + return ((uint64_t)(adev->doorbell.base + cursor.start)) >> PAGE_SHIFT; + return (adev->gmc.aper_base + cursor.start) >> PAGE_SHIFT; } @@ -947,10 +959,8 @@ int amdgpu_ttm_alloc_gart(struct ttm_buffer_object *bo) return 0; addr = amdgpu_gmc_agp_addr(bo); - if (addr != AMDGPU_BO_INVALID_OFFSET) { - bo->resource->start = addr >> PAGE_SHIFT; + if (addr != AMDGPU_BO_INVALID_OFFSET) return 0; - } /* allocate GART space */ placement.num_placement = 1; @@ -1305,6 +1315,7 @@ uint64_t amdgpu_ttm_tt_pde_flags(struct ttm_tt *ttm, struct ttm_resource *mem) flags |= AMDGPU_PTE_VALID; if (mem && (mem->mem_type == TTM_PL_TT || + mem->mem_type == AMDGPU_PL_DOORBELL || mem->mem_type == AMDGPU_PL_PREEMPT)) { flags |= AMDGPU_PTE_SYSTEM; @@ -1714,7 +1725,8 @@ static int amdgpu_ttm_reserve_tmr(struct amdgpu_device *adev) reserve_size = amdgpu_atomfirmware_get_fw_reserved_fb_size(adev); - if (!adev->bios && adev->ip_versions[GC_HWIP][0] == IP_VERSION(9, 4, 3)) + if (!adev->bios && + amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(9, 4, 3)) reserve_size = max(reserve_size, (uint32_t)280 << 20); else if (!reserve_size) reserve_size = DISCOVERY_TMR_OFFSET; @@ -1924,6 +1936,20 @@ int amdgpu_ttm_init(struct amdgpu_device *adev) DRM_INFO("amdgpu: %uM of GTT memory ready.\n", (unsigned int)(gtt_size / (1024 * 1024))); + /* Initiailize doorbell pool on PCI BAR */ + r = amdgpu_ttm_init_on_chip(adev, AMDGPU_PL_DOORBELL, adev->doorbell.size / PAGE_SIZE); + if (r) { + DRM_ERROR("Failed initializing doorbell heap.\n"); + return r; + } + + /* Create a boorbell page for kernel usages */ + r = amdgpu_doorbell_create_kernel_doorbells(adev); + if (r) { + DRM_ERROR("Failed to initialize kernel doorbells.\n"); + return r; + } + /* Initialize preemptible memory pool */ r = amdgpu_preempt_mgr_init(adev); if (r) { @@ -2392,7 +2418,7 @@ static ssize_t amdgpu_iomem_read(struct file *f, char __user *buf, struct page *p; void *ptr; - bytes = bytes < size ? bytes : size; + bytes = min(bytes, size); /* Translate the bus address to a physical address. If * the domain is NULL it means there is no IOMMU active @@ -2447,7 +2473,7 @@ static ssize_t amdgpu_iomem_write(struct file *f, const char __user *buf, struct page *p; void *ptr; - bytes = bytes < size ? bytes : size; + bytes = min(bytes, size); addr = dom ? iommu_iova_to_phys(dom, addr) : addr; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h index 6d0d66e40db9..65ec82141a8e 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h @@ -33,12 +33,16 @@ #define AMDGPU_PL_GWS (TTM_PL_PRIV + 1) #define AMDGPU_PL_OA (TTM_PL_PRIV + 2) #define AMDGPU_PL_PREEMPT (TTM_PL_PRIV + 3) +#define AMDGPU_PL_DOORBELL (TTM_PL_PRIV + 4) #define AMDGPU_GTT_MAX_TRANSFER_SIZE 512 #define AMDGPU_GTT_NUM_TRANSFER_WINDOWS 2 #define AMDGPU_POISON 0xd0bed0be +extern const struct attribute_group amdgpu_vram_mgr_attr_group; +extern const struct attribute_group amdgpu_gtt_mgr_attr_group; + struct hmm_range; struct amdgpu_gtt_mgr { diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.c index 16807ff96dc9..b14127429f30 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.c @@ -642,6 +642,8 @@ const char *amdgpu_ucode_name(enum AMDGPU_UCODE_ID ucode_id) return "SMC"; case AMDGPU_UCODE_ID_PPTABLE: return "PPTABLE"; + case AMDGPU_UCODE_ID_P2S_TABLE: + return "P2STABLE"; case AMDGPU_UCODE_ID_UVD: return "UVD"; case AMDGPU_UCODE_ID_UVD1: @@ -664,20 +666,42 @@ const char *amdgpu_ucode_name(enum AMDGPU_UCODE_ID ucode_id) return "DMCUB"; case AMDGPU_UCODE_ID_CAP: return "CAP"; + case AMDGPU_UCODE_ID_VPE_CTX: + return "VPE_CTX"; + case AMDGPU_UCODE_ID_VPE_CTL: + return "VPE_CTL"; + case AMDGPU_UCODE_ID_VPE: + return "VPE"; + case AMDGPU_UCODE_ID_UMSCH_MM_UCODE: + return "UMSCH_MM_UCODE"; + case AMDGPU_UCODE_ID_UMSCH_MM_DATA: + return "UMSCH_MM_DATA"; + case AMDGPU_UCODE_ID_UMSCH_MM_CMD_BUFFER: + return "UMSCH_MM_CMD_BUFFER"; default: return "UNKNOWN UCODE"; } } +static inline int amdgpu_ucode_is_valid(uint32_t fw_version) +{ + if (!fw_version) + return -EINVAL; + + return 0; +} + #define FW_VERSION_ATTR(name, mode, field) \ static ssize_t show_##name(struct device *dev, \ - struct device_attribute *attr, \ - char *buf) \ + struct device_attribute *attr, char *buf) \ { \ struct drm_device *ddev = dev_get_drvdata(dev); \ struct amdgpu_device *adev = drm_to_adev(ddev); \ \ - return sysfs_emit(buf, "0x%08x\n", adev->field); \ + if (!buf) \ + return amdgpu_ucode_is_valid(adev->field); \ + \ + return sysfs_emit(buf, "0x%08x\n", adev->field); \ } \ static DEVICE_ATTR(name, mode, show_##name, NULL) @@ -703,6 +727,8 @@ FW_VERSION_ATTR(sdma_fw_version, 0444, sdma.instance[0].fw_version); FW_VERSION_ATTR(sdma2_fw_version, 0444, sdma.instance[1].fw_version); FW_VERSION_ATTR(vcn_fw_version, 0444, vcn.fw_version); FW_VERSION_ATTR(dmcu_fw_version, 0444, dm.dmcu_fw_version); +FW_VERSION_ATTR(mes_fw_version, 0444, mes.sched_version & AMDGPU_MES_VERSION_MASK); +FW_VERSION_ATTR(mes_kiq_fw_version, 0444, mes.kiq_version & AMDGPU_MES_VERSION_MASK); static struct attribute *fw_attrs[] = { &dev_attr_vce_fw_version.attr, &dev_attr_uvd_fw_version.attr, @@ -716,12 +742,28 @@ static struct attribute *fw_attrs[] = { &dev_attr_smc_fw_version.attr, &dev_attr_sdma_fw_version.attr, &dev_attr_sdma2_fw_version.attr, &dev_attr_vcn_fw_version.attr, &dev_attr_dmcu_fw_version.attr, &dev_attr_imu_fw_version.attr, + &dev_attr_mes_fw_version.attr, &dev_attr_mes_kiq_fw_version.attr, NULL }; +#define to_dev_attr(x) container_of(x, struct device_attribute, attr) + +static umode_t amdgpu_ucode_sys_visible(struct kobject *kobj, + struct attribute *attr, int idx) +{ + struct device_attribute *dev_attr = to_dev_attr(attr); + struct device *dev = kobj_to_dev(kobj); + + if (dev_attr->show(dev, dev_attr, NULL) == -EINVAL) + return 0; + + return attr->mode; +} + static const struct attribute_group fw_attr_group = { .name = "fw_version", - .attrs = fw_attrs + .attrs = fw_attrs, + .is_visible = amdgpu_ucode_sys_visible }; int amdgpu_ucode_sysfs_init(struct amdgpu_device *adev) @@ -746,6 +788,8 @@ static int amdgpu_ucode_init_single_fw(struct amdgpu_device *adev, const struct mes_firmware_header_v1_0 *mes_hdr = NULL; const struct sdma_firmware_header_v2_0 *sdma_hdr = NULL; const struct imu_firmware_header_v1_0 *imu_hdr = NULL; + const struct vpe_firmware_header_v1_0 *vpe_hdr = NULL; + const struct umsch_mm_firmware_header_v1_0 *umsch_mm_hdr = NULL; u8 *ucode_addr; if (!ucode->fw) @@ -765,6 +809,8 @@ static int amdgpu_ucode_init_single_fw(struct amdgpu_device *adev, mes_hdr = (const struct mes_firmware_header_v1_0 *)ucode->fw->data; sdma_hdr = (const struct sdma_firmware_header_v2_0 *)ucode->fw->data; imu_hdr = (const struct imu_firmware_header_v1_0 *)ucode->fw->data; + vpe_hdr = (const struct vpe_firmware_header_v1_0 *)ucode->fw->data; + umsch_mm_hdr = (const struct umsch_mm_firmware_header_v1_0 *)ucode->fw->data; if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) { switch (ucode->ucode_id) { @@ -881,6 +927,10 @@ static int amdgpu_ucode_init_single_fw(struct amdgpu_device *adev, ucode->ucode_size = ucode->fw->size; ucode_addr = (u8 *)ucode->fw->data; break; + case AMDGPU_UCODE_ID_P2S_TABLE: + ucode->ucode_size = ucode->fw->size; + ucode_addr = (u8 *)ucode->fw->data; + break; case AMDGPU_UCODE_ID_IMU_I: ucode->ucode_size = le32_to_cpu(imu_hdr->imu_iram_ucode_size_bytes); ucode_addr = (u8 *)ucode->fw->data + @@ -947,6 +997,26 @@ static int amdgpu_ucode_init_single_fw(struct amdgpu_device *adev, ucode_addr = (u8 *)ucode->fw->data + le32_to_cpu(cpv2_hdr->data_offset_bytes); break; + case AMDGPU_UCODE_ID_VPE_CTX: + ucode->ucode_size = le32_to_cpu(vpe_hdr->ctx_ucode_size_bytes); + ucode_addr = (u8 *)ucode->fw->data + + le32_to_cpu(vpe_hdr->header.ucode_array_offset_bytes); + break; + case AMDGPU_UCODE_ID_VPE_CTL: + ucode->ucode_size = le32_to_cpu(vpe_hdr->ctl_ucode_size_bytes); + ucode_addr = (u8 *)ucode->fw->data + + le32_to_cpu(vpe_hdr->ctl_ucode_offset); + break; + case AMDGPU_UCODE_ID_UMSCH_MM_UCODE: + ucode->ucode_size = le32_to_cpu(umsch_mm_hdr->umsch_mm_ucode_size_bytes); + ucode_addr = (u8 *)ucode->fw->data + + le32_to_cpu(umsch_mm_hdr->header.ucode_array_offset_bytes); + break; + case AMDGPU_UCODE_ID_UMSCH_MM_DATA: + ucode->ucode_size = le32_to_cpu(umsch_mm_hdr->umsch_mm_ucode_data_size_bytes); + ucode_addr = (u8 *)ucode->fw->data + + le32_to_cpu(umsch_mm_hdr->umsch_mm_ucode_data_offset_bytes); + break; default: ucode->ucode_size = le32_to_cpu(header->ucode_size_bytes); ucode_addr = (u8 *)ucode->fw->data + @@ -1058,7 +1128,7 @@ int amdgpu_ucode_init_bo(struct amdgpu_device *adev) static const char *amdgpu_ucode_legacy_naming(struct amdgpu_device *adev, int block_type) { if (block_type == MP0_HWIP) { - switch (adev->ip_versions[MP0_HWIP][0]) { + switch (amdgpu_ip_version(adev, MP0_HWIP, 0)) { case IP_VERSION(9, 0, 0): switch (adev->asic_type) { case CHIP_VEGA10: @@ -1109,7 +1179,7 @@ static const char *amdgpu_ucode_legacy_naming(struct amdgpu_device *adev, int bl return "yellow_carp"; } } else if (block_type == MP1_HWIP) { - switch (adev->ip_versions[MP1_HWIP][0]) { + switch (amdgpu_ip_version(adev, MP1_HWIP, 0)) { case IP_VERSION(9, 0, 0): case IP_VERSION(10, 0, 0): case IP_VERSION(10, 0, 1): @@ -1135,7 +1205,7 @@ static const char *amdgpu_ucode_legacy_naming(struct amdgpu_device *adev, int bl return "aldebaran_smc"; } } else if (block_type == SDMA0_HWIP) { - switch (adev->ip_versions[SDMA0_HWIP][0]) { + switch (amdgpu_ip_version(adev, SDMA0_HWIP, 0)) { case IP_VERSION(4, 0, 0): return "vega10_sdma"; case IP_VERSION(4, 0, 1): @@ -1179,7 +1249,7 @@ static const char *amdgpu_ucode_legacy_naming(struct amdgpu_device *adev, int bl return "vangogh_sdma"; } } else if (block_type == UVD_HWIP) { - switch (adev->ip_versions[UVD_HWIP][0]) { + switch (amdgpu_ip_version(adev, UVD_HWIP, 0)) { case IP_VERSION(1, 0, 0): case IP_VERSION(1, 0, 1): if (adev->apu_flags & AMD_APU_IS_RAVEN2) @@ -1204,7 +1274,8 @@ static const char *amdgpu_ucode_legacy_naming(struct amdgpu_device *adev, int bl case IP_VERSION(3, 0, 0): case IP_VERSION(3, 0, 64): case IP_VERSION(3, 0, 192): - if (adev->ip_versions[GC_HWIP][0] == IP_VERSION(10, 3, 0)) + if (amdgpu_ip_version(adev, GC_HWIP, 0) == + IP_VERSION(10, 3, 0)) return "sienna_cichlid_vcn"; return "navy_flounder_vcn"; case IP_VERSION(3, 0, 2): @@ -1217,7 +1288,7 @@ static const char *amdgpu_ucode_legacy_naming(struct amdgpu_device *adev, int bl return "yellow_carp_vcn"; } } else if (block_type == GC_HWIP) { - switch (adev->ip_versions[GC_HWIP][0]) { + switch (amdgpu_ip_version(adev, GC_HWIP, 0)) { case IP_VERSION(9, 0, 1): return "vega10"; case IP_VERSION(9, 2, 1): @@ -1270,7 +1341,7 @@ void amdgpu_ucode_ip_version_decode(struct amdgpu_device *adev, int block_type, int maj, min, rev; char *ip_name; const char *legacy; - uint32_t version = adev->ip_versions[block_type][0]; + uint32_t version = amdgpu_ip_version(adev, block_type, 0); legacy = amdgpu_ucode_legacy_naming(adev, block_type); if (legacy) { @@ -1294,6 +1365,9 @@ void amdgpu_ucode_ip_version_decode(struct amdgpu_device *adev, int block_type, case UVD_HWIP: ip_name = "vcn"; break; + case VPE_HWIP: + ip_name = "vpe"; + break; default: BUG(); } diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.h index b03321e7d2d8..4244a13f9f22 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.h @@ -315,6 +315,36 @@ struct sdma_firmware_header_v2_0 { uint32_t ctl_jt_size; /* control thread size of jt */ }; +/* version_major=1, version_minor=0 */ +struct vpe_firmware_header_v1_0 { + struct common_firmware_header header; + uint32_t ucode_feature_version; + uint32_t ctx_ucode_size_bytes; /* context thread ucode size */ + uint32_t ctx_jt_offset; /* context thread jt location */ + uint32_t ctx_jt_size; /* context thread size of jt */ + uint32_t ctl_ucode_offset; + uint32_t ctl_ucode_size_bytes; /* control thread ucode size */ + uint32_t ctl_jt_offset; /* control thread jt location */ + uint32_t ctl_jt_size; /* control thread size of jt */ +}; + +/* version_major=1, version_minor=0 */ +struct umsch_mm_firmware_header_v1_0 { + struct common_firmware_header header; + uint32_t umsch_mm_ucode_version; + uint32_t umsch_mm_ucode_size_bytes; + uint32_t umsch_mm_ucode_offset_bytes; + uint32_t umsch_mm_ucode_data_version; + uint32_t umsch_mm_ucode_data_size_bytes; + uint32_t umsch_mm_ucode_data_offset_bytes; + uint32_t umsch_mm_irq_start_addr_lo; + uint32_t umsch_mm_irq_start_addr_hi; + uint32_t umsch_mm_uc_start_addr_lo; + uint32_t umsch_mm_uc_start_addr_hi; + uint32_t umsch_mm_data_start_addr_lo; + uint32_t umsch_mm_data_start_addr_hi; +}; + /* gpu info payload */ struct gpu_info_firmware_v1_0 { uint32_t gc_num_se; @@ -474,6 +504,13 @@ enum AMDGPU_UCODE_ID { AMDGPU_UCODE_ID_VCN0_RAM, AMDGPU_UCODE_ID_VCN1_RAM, AMDGPU_UCODE_ID_DMCUB, + AMDGPU_UCODE_ID_VPE_CTX, + AMDGPU_UCODE_ID_VPE_CTL, + AMDGPU_UCODE_ID_VPE, + AMDGPU_UCODE_ID_UMSCH_MM_UCODE, + AMDGPU_UCODE_ID_UMSCH_MM_DATA, + AMDGPU_UCODE_ID_UMSCH_MM_CMD_BUFFER, + AMDGPU_UCODE_ID_P2S_TABLE, AMDGPU_UCODE_ID_MAXIMUM, }; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_umc.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_umc.c index db0d94ca4ffc..d65e21914d8c 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_umc.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_umc.c @@ -28,7 +28,7 @@ static int amdgpu_umc_convert_error_address(struct amdgpu_device *adev, struct ras_err_data *err_data, uint64_t err_addr, uint32_t ch_inst, uint32_t umc_inst) { - switch (adev->ip_versions[UMC_HWIP][0]) { + switch (amdgpu_ip_version(adev, UMC_HWIP, 0)) { case IP_VERSION(6, 7, 0): umc_v6_7_convert_error_address(adev, err_data, err_addr, ch_inst, umc_inst); @@ -45,8 +45,12 @@ static int amdgpu_umc_convert_error_address(struct amdgpu_device *adev, int amdgpu_umc_page_retirement_mca(struct amdgpu_device *adev, uint64_t err_addr, uint32_t ch_inst, uint32_t umc_inst) { - struct ras_err_data err_data = {0, 0, 0, NULL}; - int ret = AMDGPU_RAS_FAIL; + struct ras_err_data err_data; + int ret; + + ret = amdgpu_ras_error_data_init(&err_data); + if (ret) + return ret; err_data.err_addr = kcalloc(adev->umc.max_ras_err_cnt_per_query, @@ -54,7 +58,8 @@ int amdgpu_umc_page_retirement_mca(struct amdgpu_device *adev, if (!err_data.err_addr) { dev_warn(adev->dev, "Failed to alloc memory for umc error record in MCA notifier!\n"); - return AMDGPU_RAS_FAIL; + ret = AMDGPU_RAS_FAIL; + goto out_fini_err_data; } /* @@ -63,7 +68,7 @@ int amdgpu_umc_page_retirement_mca(struct amdgpu_device *adev, ret = amdgpu_umc_convert_error_address(adev, &err_data, err_addr, ch_inst, umc_inst); if (ret) - goto out; + goto out_free_err_addr; if (amdgpu_bad_page_threshold != 0) { amdgpu_ras_add_bad_pages(adev, err_data.err_addr, @@ -71,8 +76,12 @@ int amdgpu_umc_page_retirement_mca(struct amdgpu_device *adev, amdgpu_ras_save_bad_pages(adev, NULL); } -out: +out_free_err_addr: kfree(err_data.err_addr); + +out_fini_err_data: + amdgpu_ras_error_data_fini(&err_data); + return ret; } @@ -157,8 +166,12 @@ static int amdgpu_umc_do_page_retirement(struct amdgpu_device *adev, } } - if (reset) + if (reset) { + /* use mode-2 reset for poison consumption */ + if (!entry) + con->gpu_reset_flags |= AMDGPU_RAS_GPU_RESET_MODE2_RESET; amdgpu_ras_reset_gpu(adev); + } } kfree(err_data->err_addr); @@ -182,18 +195,24 @@ int amdgpu_umc_poison_handler(struct amdgpu_device *adev, bool reset) } if (!amdgpu_sriov_vf(adev)) { - struct ras_err_data err_data = {0, 0, 0, NULL}; + struct ras_err_data err_data; struct ras_common_if head = { .block = AMDGPU_RAS_BLOCK__UMC, }; struct ras_manager *obj = amdgpu_ras_find_obj(adev, &head); + ret = amdgpu_ras_error_data_init(&err_data); + if (ret) + return ret; + ret = amdgpu_umc_do_page_retirement(adev, &err_data, NULL, reset); if (ret == AMDGPU_RAS_SUCCESS && obj) { obj->err_data.ue_count += err_data.ue_count; obj->err_data.ce_count += err_data.ce_count; } + + amdgpu_ras_error_data_fini(&err_data); } else { if (adev->virt.ops && adev->virt.ops->ras_poison_handler) adev->virt.ops->ras_poison_handler(adev); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_umc.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_umc.h index 43321f57f557..417a6726c71b 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_umc.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_umc.h @@ -32,6 +32,11 @@ * is the index of 8KB block */ #define ADDR_OF_8KB_BLOCK(addr) (((addr) & ~0xffULL) << 5) +/* + * (addr / 256) * 32768, the higher 26 bits in ErrorAddr + * is the index of 8KB block + */ +#define ADDR_OF_32KB_BLOCK(addr) (((addr) & ~0xffULL) << 7) /* channel index is the index of 256B block */ #define ADDR_OF_256B_BLOCK(channel_index) ((channel_index) << 8) /* offset in 256B block */ diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_umsch_mm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_umsch_mm.c new file mode 100644 index 000000000000..ca45ba8ac171 --- /dev/null +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_umsch_mm.c @@ -0,0 +1,878 @@ +// SPDX-License-Identifier: MIT +/* + * Copyright 2023 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + */ + +#include <linux/firmware.h> +#include <drm/drm_exec.h> + +#include "amdgpu.h" +#include "amdgpu_umsch_mm.h" +#include "umsch_mm_v4_0.h" + +struct umsch_mm_test_ctx_data { + uint8_t process_csa[PAGE_SIZE]; + uint8_t vpe_ctx_csa[PAGE_SIZE]; + uint8_t vcn_ctx_csa[PAGE_SIZE]; +}; + +struct umsch_mm_test_mqd_data { + uint8_t vpe_mqd[PAGE_SIZE]; + uint8_t vcn_mqd[PAGE_SIZE]; +}; + +struct umsch_mm_test_ring_data { + uint8_t vpe_ring[PAGE_SIZE]; + uint8_t vpe_ib[PAGE_SIZE]; + uint8_t vcn_ring[PAGE_SIZE]; + uint8_t vcn_ib[PAGE_SIZE]; +}; + +struct umsch_mm_test_queue_info { + uint64_t mqd_addr; + uint64_t csa_addr; + uint32_t doorbell_offset_0; + uint32_t doorbell_offset_1; + enum UMSCH_SWIP_ENGINE_TYPE engine; +}; + +struct umsch_mm_test { + struct amdgpu_bo *ctx_data_obj; + uint64_t ctx_data_gpu_addr; + uint32_t *ctx_data_cpu_addr; + + struct amdgpu_bo *mqd_data_obj; + uint64_t mqd_data_gpu_addr; + uint32_t *mqd_data_cpu_addr; + + struct amdgpu_bo *ring_data_obj; + uint64_t ring_data_gpu_addr; + uint32_t *ring_data_cpu_addr; + + + struct amdgpu_vm *vm; + struct amdgpu_bo_va *bo_va; + uint32_t pasid; + uint32_t vm_cntx_cntl; + uint32_t num_queues; +}; + +static int map_ring_data(struct amdgpu_device *adev, struct amdgpu_vm *vm, + struct amdgpu_bo *bo, struct amdgpu_bo_va **bo_va, + uint64_t addr, uint32_t size) +{ + struct amdgpu_sync sync; + struct drm_exec exec; + int r; + + amdgpu_sync_create(&sync); + + drm_exec_init(&exec, 0); + drm_exec_until_all_locked(&exec) { + r = drm_exec_lock_obj(&exec, &bo->tbo.base); + drm_exec_retry_on_contention(&exec); + if (unlikely(r)) + goto error_fini_exec; + + r = amdgpu_vm_lock_pd(vm, &exec, 0); + drm_exec_retry_on_contention(&exec); + if (unlikely(r)) + goto error_fini_exec; + } + + *bo_va = amdgpu_vm_bo_add(adev, vm, bo); + if (!*bo_va) { + r = -ENOMEM; + goto error_fini_exec; + } + + r = amdgpu_vm_bo_map(adev, *bo_va, addr, 0, size, + AMDGPU_PTE_READABLE | AMDGPU_PTE_WRITEABLE | + AMDGPU_PTE_EXECUTABLE); + + if (r) + goto error_del_bo_va; + + + r = amdgpu_vm_bo_update(adev, *bo_va, false); + if (r) + goto error_del_bo_va; + + amdgpu_sync_fence(&sync, (*bo_va)->last_pt_update); + + r = amdgpu_vm_update_pdes(adev, vm, false); + if (r) + goto error_del_bo_va; + + amdgpu_sync_fence(&sync, vm->last_update); + + amdgpu_sync_wait(&sync, false); + drm_exec_fini(&exec); + + amdgpu_sync_free(&sync); + + return 0; + +error_del_bo_va: + amdgpu_vm_bo_del(adev, *bo_va); + amdgpu_sync_free(&sync); + +error_fini_exec: + drm_exec_fini(&exec); + amdgpu_sync_free(&sync); + return r; +} + +static int unmap_ring_data(struct amdgpu_device *adev, struct amdgpu_vm *vm, + struct amdgpu_bo *bo, struct amdgpu_bo_va *bo_va, + uint64_t addr) +{ + struct drm_exec exec; + long r; + + drm_exec_init(&exec, 0); + drm_exec_until_all_locked(&exec) { + r = drm_exec_lock_obj(&exec, &bo->tbo.base); + drm_exec_retry_on_contention(&exec); + if (unlikely(r)) + goto out_unlock; + + r = amdgpu_vm_lock_pd(vm, &exec, 0); + drm_exec_retry_on_contention(&exec); + if (unlikely(r)) + goto out_unlock; + } + + + r = amdgpu_vm_bo_unmap(adev, bo_va, addr); + if (r) + goto out_unlock; + + amdgpu_vm_bo_del(adev, bo_va); + +out_unlock: + drm_exec_fini(&exec); + + return r; +} + +static void setup_vpe_queue(struct amdgpu_device *adev, + struct umsch_mm_test *test, + struct umsch_mm_test_queue_info *qinfo) +{ + struct MQD_INFO *mqd = (struct MQD_INFO *)test->mqd_data_cpu_addr; + uint64_t ring_gpu_addr = test->ring_data_gpu_addr; + + mqd->rb_base_lo = (ring_gpu_addr >> 8); + mqd->rb_base_hi = (ring_gpu_addr >> 40); + mqd->rb_size = PAGE_SIZE / 4; + mqd->wptr_val = 0; + mqd->rptr_val = 0; + mqd->unmapped = 1; + + qinfo->mqd_addr = test->mqd_data_gpu_addr; + qinfo->csa_addr = test->ctx_data_gpu_addr + + offsetof(struct umsch_mm_test_ctx_data, vpe_ctx_csa); + qinfo->doorbell_offset_0 = (adev->doorbell_index.vpe_ring + 1) << 1; + qinfo->doorbell_offset_1 = 0; +} + +static void setup_vcn_queue(struct amdgpu_device *adev, + struct umsch_mm_test *test, + struct umsch_mm_test_queue_info *qinfo) +{ +} + +static int add_test_queue(struct amdgpu_device *adev, + struct umsch_mm_test *test, + struct umsch_mm_test_queue_info *qinfo) +{ + struct umsch_mm_add_queue_input queue_input = {}; + int r; + + queue_input.process_id = test->pasid; + queue_input.page_table_base_addr = amdgpu_gmc_pd_addr(test->vm->root.bo); + + queue_input.process_va_start = 0; + queue_input.process_va_end = (adev->vm_manager.max_pfn - 1) << AMDGPU_GPU_PAGE_SHIFT; + + queue_input.process_quantum = 100000; /* 10ms */ + queue_input.process_csa_addr = test->ctx_data_gpu_addr + + offsetof(struct umsch_mm_test_ctx_data, process_csa); + + queue_input.context_quantum = 10000; /* 1ms */ + queue_input.context_csa_addr = qinfo->csa_addr; + + queue_input.inprocess_context_priority = CONTEXT_PRIORITY_LEVEL_NORMAL; + queue_input.context_global_priority_level = CONTEXT_PRIORITY_LEVEL_NORMAL; + queue_input.doorbell_offset_0 = qinfo->doorbell_offset_0; + queue_input.doorbell_offset_1 = qinfo->doorbell_offset_1; + + queue_input.engine_type = qinfo->engine; + queue_input.mqd_addr = qinfo->mqd_addr; + queue_input.vm_context_cntl = test->vm_cntx_cntl; + + amdgpu_umsch_mm_lock(&adev->umsch_mm); + r = adev->umsch_mm.funcs->add_queue(&adev->umsch_mm, &queue_input); + amdgpu_umsch_mm_unlock(&adev->umsch_mm); + if (r) + return r; + + return 0; +} + +static int remove_test_queue(struct amdgpu_device *adev, + struct umsch_mm_test *test, + struct umsch_mm_test_queue_info *qinfo) +{ + struct umsch_mm_remove_queue_input queue_input = {}; + int r; + + queue_input.doorbell_offset_0 = qinfo->doorbell_offset_0; + queue_input.doorbell_offset_1 = qinfo->doorbell_offset_1; + queue_input.context_csa_addr = qinfo->csa_addr; + + amdgpu_umsch_mm_lock(&adev->umsch_mm); + r = adev->umsch_mm.funcs->remove_queue(&adev->umsch_mm, &queue_input); + amdgpu_umsch_mm_unlock(&adev->umsch_mm); + if (r) + return r; + + return 0; +} + +static int submit_vpe_queue(struct amdgpu_device *adev, struct umsch_mm_test *test) +{ + struct MQD_INFO *mqd = (struct MQD_INFO *)test->mqd_data_cpu_addr; + uint32_t *ring = test->ring_data_cpu_addr + + offsetof(struct umsch_mm_test_ring_data, vpe_ring) / 4; + uint32_t *ib = test->ring_data_cpu_addr + + offsetof(struct umsch_mm_test_ring_data, vpe_ib) / 4; + uint64_t ib_gpu_addr = test->ring_data_gpu_addr + + offsetof(struct umsch_mm_test_ring_data, vpe_ib); + uint32_t *fence = ib + 2048 / 4; + uint64_t fence_gpu_addr = ib_gpu_addr + 2048; + const uint32_t test_pattern = 0xdeadbeef; + int i; + + ib[0] = VPE_CMD_HEADER(VPE_CMD_OPCODE_FENCE, 0); + ib[1] = lower_32_bits(fence_gpu_addr); + ib[2] = upper_32_bits(fence_gpu_addr); + ib[3] = test_pattern; + + ring[0] = VPE_CMD_HEADER(VPE_CMD_OPCODE_INDIRECT, 0); + ring[1] = (ib_gpu_addr & 0xffffffe0); + ring[2] = upper_32_bits(ib_gpu_addr); + ring[3] = 4; + ring[4] = 0; + ring[5] = 0; + + mqd->wptr_val = (6 << 2); + // WDOORBELL32(adev->umsch_mm.agdb_index[CONTEXT_PRIORITY_LEVEL_NORMAL], mqd->wptr_val); + + for (i = 0; i < adev->usec_timeout; i++) { + if (*fence == test_pattern) + return 0; + udelay(1); + } + + dev_err(adev->dev, "vpe queue submission timeout\n"); + + return -ETIMEDOUT; +} + +static int submit_vcn_queue(struct amdgpu_device *adev, struct umsch_mm_test *test) +{ + return 0; +} + +static int setup_umsch_mm_test(struct amdgpu_device *adev, + struct umsch_mm_test *test) +{ + struct amdgpu_vmhub *hub = &adev->vmhub[AMDGPU_MMHUB0(0)]; + int r; + + test->vm_cntx_cntl = hub->vm_cntx_cntl; + + test->vm = kzalloc(sizeof(*test->vm), GFP_KERNEL); + if (!test->vm) { + r = -ENOMEM; + return r; + } + + r = amdgpu_vm_init(adev, test->vm, -1); + if (r) + goto error_free_vm; + + r = amdgpu_pasid_alloc(16); + if (r < 0) + goto error_fini_vm; + test->pasid = r; + + r = amdgpu_bo_create_kernel(adev, sizeof(struct umsch_mm_test_ctx_data), + PAGE_SIZE, AMDGPU_GEM_DOMAIN_GTT, + &test->ctx_data_obj, + &test->ctx_data_gpu_addr, + (void **)&test->ctx_data_cpu_addr); + if (r) + goto error_free_pasid; + + memset(test->ctx_data_cpu_addr, 0, sizeof(struct umsch_mm_test_ctx_data)); + + r = amdgpu_bo_create_kernel(adev, PAGE_SIZE, + PAGE_SIZE, AMDGPU_GEM_DOMAIN_GTT, + &test->mqd_data_obj, + &test->mqd_data_gpu_addr, + (void **)&test->mqd_data_cpu_addr); + if (r) + goto error_free_ctx_data_obj; + + memset(test->mqd_data_cpu_addr, 0, PAGE_SIZE); + + r = amdgpu_bo_create_kernel(adev, sizeof(struct umsch_mm_test_ring_data), + PAGE_SIZE, AMDGPU_GEM_DOMAIN_GTT, + &test->ring_data_obj, + NULL, + (void **)&test->ring_data_cpu_addr); + if (r) + goto error_free_mqd_data_obj; + + memset(test->ring_data_cpu_addr, 0, sizeof(struct umsch_mm_test_ring_data)); + + test->ring_data_gpu_addr = AMDGPU_VA_RESERVED_SIZE; + r = map_ring_data(adev, test->vm, test->ring_data_obj, &test->bo_va, + test->ring_data_gpu_addr, sizeof(struct umsch_mm_test_ring_data)); + if (r) + goto error_free_ring_data_obj; + + return 0; + +error_free_ring_data_obj: + amdgpu_bo_free_kernel(&test->ring_data_obj, NULL, + (void **)&test->ring_data_cpu_addr); +error_free_mqd_data_obj: + amdgpu_bo_free_kernel(&test->mqd_data_obj, &test->mqd_data_gpu_addr, + (void **)&test->mqd_data_cpu_addr); +error_free_ctx_data_obj: + amdgpu_bo_free_kernel(&test->ctx_data_obj, &test->ctx_data_gpu_addr, + (void **)&test->ctx_data_cpu_addr); +error_free_pasid: + amdgpu_pasid_free(test->pasid); +error_fini_vm: + amdgpu_vm_fini(adev, test->vm); +error_free_vm: + kfree(test->vm); + + return r; +} + +static void cleanup_umsch_mm_test(struct amdgpu_device *adev, + struct umsch_mm_test *test) +{ + unmap_ring_data(adev, test->vm, test->ring_data_obj, + test->bo_va, test->ring_data_gpu_addr); + amdgpu_bo_free_kernel(&test->mqd_data_obj, &test->mqd_data_gpu_addr, + (void **)&test->mqd_data_cpu_addr); + amdgpu_bo_free_kernel(&test->ring_data_obj, NULL, + (void **)&test->ring_data_cpu_addr); + amdgpu_bo_free_kernel(&test->ctx_data_obj, &test->ctx_data_gpu_addr, + (void **)&test->ctx_data_cpu_addr); + amdgpu_pasid_free(test->pasid); + amdgpu_vm_fini(adev, test->vm); + kfree(test->vm); +} + +static int setup_test_queues(struct amdgpu_device *adev, + struct umsch_mm_test *test, + struct umsch_mm_test_queue_info *qinfo) +{ + int i, r; + + for (i = 0; i < test->num_queues; i++) { + if (qinfo[i].engine == UMSCH_SWIP_ENGINE_TYPE_VPE) + setup_vpe_queue(adev, test, &qinfo[i]); + else + setup_vcn_queue(adev, test, &qinfo[i]); + + r = add_test_queue(adev, test, &qinfo[i]); + if (r) + return r; + } + + return 0; +} + +static int submit_test_queues(struct amdgpu_device *adev, + struct umsch_mm_test *test, + struct umsch_mm_test_queue_info *qinfo) +{ + int i, r; + + for (i = 0; i < test->num_queues; i++) { + if (qinfo[i].engine == UMSCH_SWIP_ENGINE_TYPE_VPE) + r = submit_vpe_queue(adev, test); + else + r = submit_vcn_queue(adev, test); + if (r) + return r; + } + + return 0; +} + +static void cleanup_test_queues(struct amdgpu_device *adev, + struct umsch_mm_test *test, + struct umsch_mm_test_queue_info *qinfo) +{ + int i; + + for (i = 0; i < test->num_queues; i++) + remove_test_queue(adev, test, &qinfo[i]); +} + +static int umsch_mm_test(struct amdgpu_device *adev) +{ + struct umsch_mm_test_queue_info qinfo[] = { + { .engine = UMSCH_SWIP_ENGINE_TYPE_VPE }, + }; + struct umsch_mm_test test = { .num_queues = ARRAY_SIZE(qinfo) }; + int r; + + r = setup_umsch_mm_test(adev, &test); + if (r) + return r; + + r = setup_test_queues(adev, &test, qinfo); + if (r) + goto cleanup; + + r = submit_test_queues(adev, &test, qinfo); + if (r) + goto cleanup; + + cleanup_test_queues(adev, &test, qinfo); + cleanup_umsch_mm_test(adev, &test); + + return 0; + +cleanup: + cleanup_test_queues(adev, &test, qinfo); + cleanup_umsch_mm_test(adev, &test); + return r; +} + +int amdgpu_umsch_mm_submit_pkt(struct amdgpu_umsch_mm *umsch, void *pkt, int ndws) +{ + struct amdgpu_ring *ring = &umsch->ring; + + if (amdgpu_ring_alloc(ring, ndws)) + return -ENOMEM; + + amdgpu_ring_write_multiple(ring, pkt, ndws); + amdgpu_ring_commit(ring); + + return 0; +} + +int amdgpu_umsch_mm_query_fence(struct amdgpu_umsch_mm *umsch) +{ + struct amdgpu_ring *ring = &umsch->ring; + struct amdgpu_device *adev = ring->adev; + int r; + + r = amdgpu_fence_wait_polling(ring, ring->fence_drv.sync_seq, adev->usec_timeout); + if (r < 1) { + dev_err(adev->dev, "ring umsch timeout, emitted fence %u\n", + ring->fence_drv.sync_seq); + return -ETIMEDOUT; + } + + return 0; +} + +static void umsch_mm_ring_set_wptr(struct amdgpu_ring *ring) +{ + struct amdgpu_umsch_mm *umsch = (struct amdgpu_umsch_mm *)ring; + struct amdgpu_device *adev = ring->adev; + + if (ring->use_doorbell) + WDOORBELL32(ring->doorbell_index, ring->wptr << 2); + else + WREG32(umsch->rb_wptr, ring->wptr << 2); +} + +static u64 umsch_mm_ring_get_rptr(struct amdgpu_ring *ring) +{ + struct amdgpu_umsch_mm *umsch = (struct amdgpu_umsch_mm *)ring; + struct amdgpu_device *adev = ring->adev; + + return RREG32(umsch->rb_rptr); +} + +static u64 umsch_mm_ring_get_wptr(struct amdgpu_ring *ring) +{ + struct amdgpu_umsch_mm *umsch = (struct amdgpu_umsch_mm *)ring; + struct amdgpu_device *adev = ring->adev; + + return RREG32(umsch->rb_wptr); +} + +static const struct amdgpu_ring_funcs umsch_v4_0_ring_funcs = { + .type = AMDGPU_RING_TYPE_UMSCH_MM, + .align_mask = 0, + .nop = 0, + .support_64bit_ptrs = false, + .get_rptr = umsch_mm_ring_get_rptr, + .get_wptr = umsch_mm_ring_get_wptr, + .set_wptr = umsch_mm_ring_set_wptr, + .insert_nop = amdgpu_ring_insert_nop, +}; + +int amdgpu_umsch_mm_ring_init(struct amdgpu_umsch_mm *umsch) +{ + struct amdgpu_device *adev = container_of(umsch, struct amdgpu_device, umsch_mm); + struct amdgpu_ring *ring = &umsch->ring; + + ring->vm_hub = AMDGPU_MMHUB0(0); + ring->use_doorbell = true; + ring->no_scheduler = true; + ring->doorbell_index = (AMDGPU_NAVI10_DOORBELL64_VCN0_1 << 1) + 6; + + snprintf(ring->name, sizeof(ring->name), "umsch"); + + return amdgpu_ring_init(adev, ring, 1024, NULL, 0, AMDGPU_RING_PRIO_DEFAULT, NULL); +} + +int amdgpu_umsch_mm_init_microcode(struct amdgpu_umsch_mm *umsch) +{ + const struct umsch_mm_firmware_header_v1_0 *umsch_mm_hdr; + struct amdgpu_device *adev = umsch->ring.adev; + const char *fw_name = NULL; + int r; + + switch (amdgpu_ip_version(adev, VCN_HWIP, 0)) { + case IP_VERSION(4, 0, 5): + fw_name = "amdgpu/umsch_mm_4_0_0.bin"; + break; + default: + break; + } + + r = amdgpu_ucode_request(adev, &adev->umsch_mm.fw, fw_name); + if (r) { + release_firmware(adev->umsch_mm.fw); + adev->umsch_mm.fw = NULL; + return r; + } + + umsch_mm_hdr = (const struct umsch_mm_firmware_header_v1_0 *)adev->umsch_mm.fw->data; + + adev->umsch_mm.ucode_size = le32_to_cpu(umsch_mm_hdr->umsch_mm_ucode_size_bytes); + adev->umsch_mm.data_size = le32_to_cpu(umsch_mm_hdr->umsch_mm_ucode_data_size_bytes); + + adev->umsch_mm.irq_start_addr = + le32_to_cpu(umsch_mm_hdr->umsch_mm_irq_start_addr_lo) | + ((uint64_t)(le32_to_cpu(umsch_mm_hdr->umsch_mm_irq_start_addr_hi)) << 32); + adev->umsch_mm.uc_start_addr = + le32_to_cpu(umsch_mm_hdr->umsch_mm_uc_start_addr_lo) | + ((uint64_t)(le32_to_cpu(umsch_mm_hdr->umsch_mm_uc_start_addr_hi)) << 32); + adev->umsch_mm.data_start_addr = + le32_to_cpu(umsch_mm_hdr->umsch_mm_data_start_addr_lo) | + ((uint64_t)(le32_to_cpu(umsch_mm_hdr->umsch_mm_data_start_addr_hi)) << 32); + + if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) { + struct amdgpu_firmware_info *info; + + info = &adev->firmware.ucode[AMDGPU_UCODE_ID_UMSCH_MM_UCODE]; + info->ucode_id = AMDGPU_UCODE_ID_UMSCH_MM_UCODE; + info->fw = adev->umsch_mm.fw; + adev->firmware.fw_size += + ALIGN(le32_to_cpu(umsch_mm_hdr->umsch_mm_ucode_size_bytes), PAGE_SIZE); + + info = &adev->firmware.ucode[AMDGPU_UCODE_ID_UMSCH_MM_DATA]; + info->ucode_id = AMDGPU_UCODE_ID_UMSCH_MM_DATA; + info->fw = adev->umsch_mm.fw; + adev->firmware.fw_size += + ALIGN(le32_to_cpu(umsch_mm_hdr->umsch_mm_ucode_data_size_bytes), PAGE_SIZE); + } + + return 0; +} + +int amdgpu_umsch_mm_allocate_ucode_buffer(struct amdgpu_umsch_mm *umsch) +{ + const struct umsch_mm_firmware_header_v1_0 *umsch_mm_hdr; + struct amdgpu_device *adev = umsch->ring.adev; + const __le32 *fw_data; + uint32_t fw_size; + int r; + + umsch_mm_hdr = (const struct umsch_mm_firmware_header_v1_0 *) + adev->umsch_mm.fw->data; + + fw_data = (const __le32 *)(adev->umsch_mm.fw->data + + le32_to_cpu(umsch_mm_hdr->umsch_mm_ucode_offset_bytes)); + fw_size = le32_to_cpu(umsch_mm_hdr->umsch_mm_ucode_size_bytes); + + r = amdgpu_bo_create_reserved(adev, fw_size, + 4 * 1024, AMDGPU_GEM_DOMAIN_VRAM, + &adev->umsch_mm.ucode_fw_obj, + &adev->umsch_mm.ucode_fw_gpu_addr, + (void **)&adev->umsch_mm.ucode_fw_ptr); + if (r) { + dev_err(adev->dev, "(%d) failed to create umsch_mm fw ucode bo\n", r); + return r; + } + + memcpy(adev->umsch_mm.ucode_fw_ptr, fw_data, fw_size); + + amdgpu_bo_kunmap(adev->umsch_mm.ucode_fw_obj); + amdgpu_bo_unreserve(adev->umsch_mm.ucode_fw_obj); + return 0; +} + +int amdgpu_umsch_mm_allocate_ucode_data_buffer(struct amdgpu_umsch_mm *umsch) +{ + const struct umsch_mm_firmware_header_v1_0 *umsch_mm_hdr; + struct amdgpu_device *adev = umsch->ring.adev; + const __le32 *fw_data; + uint32_t fw_size; + int r; + + umsch_mm_hdr = (const struct umsch_mm_firmware_header_v1_0 *) + adev->umsch_mm.fw->data; + + fw_data = (const __le32 *)(adev->umsch_mm.fw->data + + le32_to_cpu(umsch_mm_hdr->umsch_mm_ucode_data_offset_bytes)); + fw_size = le32_to_cpu(umsch_mm_hdr->umsch_mm_ucode_data_size_bytes); + + r = amdgpu_bo_create_reserved(adev, fw_size, + 64 * 1024, AMDGPU_GEM_DOMAIN_VRAM, + &adev->umsch_mm.data_fw_obj, + &adev->umsch_mm.data_fw_gpu_addr, + (void **)&adev->umsch_mm.data_fw_ptr); + if (r) { + dev_err(adev->dev, "(%d) failed to create umsch_mm fw data bo\n", r); + return r; + } + + memcpy(adev->umsch_mm.data_fw_ptr, fw_data, fw_size); + + amdgpu_bo_kunmap(adev->umsch_mm.data_fw_obj); + amdgpu_bo_unreserve(adev->umsch_mm.data_fw_obj); + return 0; +} + +int amdgpu_umsch_mm_psp_execute_cmd_buf(struct amdgpu_umsch_mm *umsch) +{ + struct amdgpu_device *adev = umsch->ring.adev; + struct amdgpu_firmware_info ucode = { + .ucode_id = AMDGPU_UCODE_ID_UMSCH_MM_CMD_BUFFER, + .mc_addr = adev->umsch_mm.cmd_buf_gpu_addr, + .ucode_size = ((uintptr_t)adev->umsch_mm.cmd_buf_curr_ptr - + (uintptr_t)adev->umsch_mm.cmd_buf_ptr), + }; + + return psp_execute_ip_fw_load(&adev->psp, &ucode); +} + +static void umsch_mm_agdb_index_init(struct amdgpu_device *adev) +{ + uint32_t umsch_mm_agdb_start; + int i; + + umsch_mm_agdb_start = adev->doorbell_index.max_assignment + 1; + umsch_mm_agdb_start = roundup(umsch_mm_agdb_start, 1024); + umsch_mm_agdb_start += (AMDGPU_NAVI10_DOORBELL64_VCN0_1 << 1); + + for (i = 0; i < CONTEXT_PRIORITY_NUM_LEVELS; i++) + adev->umsch_mm.agdb_index[i] = umsch_mm_agdb_start + i; +} + +static int umsch_mm_init(struct amdgpu_device *adev) +{ + int r; + + adev->umsch_mm.vmid_mask_mm_vpe = 0xf00; + adev->umsch_mm.engine_mask = (1 << UMSCH_SWIP_ENGINE_TYPE_VPE); + adev->umsch_mm.vpe_hqd_mask = 0xfe; + + r = amdgpu_device_wb_get(adev, &adev->umsch_mm.wb_index); + if (r) { + dev_err(adev->dev, "failed to alloc wb for umsch: %d\n", r); + return r; + } + + adev->umsch_mm.sch_ctx_gpu_addr = adev->wb.gpu_addr + + (adev->umsch_mm.wb_index * 4); + + r = amdgpu_bo_create_kernel(adev, PAGE_SIZE, PAGE_SIZE, + AMDGPU_GEM_DOMAIN_GTT, + &adev->umsch_mm.cmd_buf_obj, + &adev->umsch_mm.cmd_buf_gpu_addr, + (void **)&adev->umsch_mm.cmd_buf_ptr); + if (r) { + dev_err(adev->dev, "failed to allocate cmdbuf bo %d\n", r); + amdgpu_device_wb_free(adev, adev->umsch_mm.wb_index); + return r; + } + + mutex_init(&adev->umsch_mm.mutex_hidden); + + umsch_mm_agdb_index_init(adev); + + return 0; +} + + +static int umsch_mm_early_init(void *handle) +{ + struct amdgpu_device *adev = (struct amdgpu_device *)handle; + + switch (amdgpu_ip_version(adev, VCN_HWIP, 0)) { + case IP_VERSION(4, 0, 5): + umsch_mm_v4_0_set_funcs(&adev->umsch_mm); + break; + default: + return -EINVAL; + } + + adev->umsch_mm.ring.funcs = &umsch_v4_0_ring_funcs; + umsch_mm_set_regs(&adev->umsch_mm); + + return 0; +} + +static int umsch_mm_late_init(void *handle) +{ + struct amdgpu_device *adev = (struct amdgpu_device *)handle; + + return umsch_mm_test(adev); +} + +static int umsch_mm_sw_init(void *handle) +{ + struct amdgpu_device *adev = (struct amdgpu_device *)handle; + int r; + + r = umsch_mm_init(adev); + if (r) + return r; + + r = umsch_mm_ring_init(&adev->umsch_mm); + if (r) + return r; + + r = umsch_mm_init_microcode(&adev->umsch_mm); + if (r) + return r; + + return 0; +} + +static int umsch_mm_sw_fini(void *handle) +{ + struct amdgpu_device *adev = (struct amdgpu_device *)handle; + + release_firmware(adev->umsch_mm.fw); + adev->umsch_mm.fw = NULL; + + amdgpu_ring_fini(&adev->umsch_mm.ring); + + mutex_destroy(&adev->umsch_mm.mutex_hidden); + + amdgpu_bo_free_kernel(&adev->umsch_mm.cmd_buf_obj, + &adev->umsch_mm.cmd_buf_gpu_addr, + (void **)&adev->umsch_mm.cmd_buf_ptr); + + amdgpu_device_wb_free(adev, adev->umsch_mm.wb_index); + + return 0; +} + +static int umsch_mm_hw_init(void *handle) +{ + struct amdgpu_device *adev = (struct amdgpu_device *)handle; + int r; + + r = umsch_mm_load_microcode(&adev->umsch_mm); + if (r) + return r; + + umsch_mm_ring_start(&adev->umsch_mm); + + r = umsch_mm_set_hw_resources(&adev->umsch_mm); + if (r) + return r; + + return 0; +} + +static int umsch_mm_hw_fini(void *handle) +{ + struct amdgpu_device *adev = (struct amdgpu_device *)handle; + + umsch_mm_ring_stop(&adev->umsch_mm); + + amdgpu_bo_free_kernel(&adev->umsch_mm.data_fw_obj, + &adev->umsch_mm.data_fw_gpu_addr, + (void **)&adev->umsch_mm.data_fw_ptr); + + amdgpu_bo_free_kernel(&adev->umsch_mm.ucode_fw_obj, + &adev->umsch_mm.ucode_fw_gpu_addr, + (void **)&adev->umsch_mm.ucode_fw_ptr); + return 0; +} + +static int umsch_mm_suspend(void *handle) +{ + struct amdgpu_device *adev = (struct amdgpu_device *)handle; + + return umsch_mm_hw_fini(adev); +} + +static int umsch_mm_resume(void *handle) +{ + struct amdgpu_device *adev = (struct amdgpu_device *)handle; + + return umsch_mm_hw_init(adev); +} + +static const struct amd_ip_funcs umsch_mm_v4_0_ip_funcs = { + .name = "umsch_mm_v4_0", + .early_init = umsch_mm_early_init, + .late_init = umsch_mm_late_init, + .sw_init = umsch_mm_sw_init, + .sw_fini = umsch_mm_sw_fini, + .hw_init = umsch_mm_hw_init, + .hw_fini = umsch_mm_hw_fini, + .suspend = umsch_mm_suspend, + .resume = umsch_mm_resume, +}; + +const struct amdgpu_ip_block_version umsch_mm_v4_0_ip_block = { + .type = AMD_IP_BLOCK_TYPE_UMSCH_MM, + .major = 4, + .minor = 0, + .rev = 0, + .funcs = &umsch_mm_v4_0_ip_funcs, +}; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_umsch_mm.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_umsch_mm.h new file mode 100644 index 000000000000..8258a43a6236 --- /dev/null +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_umsch_mm.h @@ -0,0 +1,228 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Copyright 2023 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + */ + +#ifndef __AMDGPU_UMSCH_MM_H__ +#define __AMDGPU_UMSCH_MM_H__ + +enum UMSCH_SWIP_ENGINE_TYPE { + UMSCH_SWIP_ENGINE_TYPE_VCN0 = 0, + UMSCH_SWIP_ENGINE_TYPE_VCN1 = 1, + UMSCH_SWIP_ENGINE_TYPE_VCN = 2, + UMSCH_SWIP_ENGINE_TYPE_VPE = 3, + UMSCH_SWIP_ENGINE_TYPE_MAX +}; + +enum UMSCH_SWIP_AFFINITY_TYPE { + UMSCH_SWIP_AFFINITY_TYPE_ANY = 0, + UMSCH_SWIP_AFFINITY_TYPE_VCN0 = 1, + UMSCH_SWIP_AFFINITY_TYPE_VCN1 = 2, + UMSCH_SWIP_AFFINITY_TYPE_MAX +}; + +enum UMSCH_CONTEXT_PRIORITY_LEVEL { + CONTEXT_PRIORITY_LEVEL_IDLE = 0, + CONTEXT_PRIORITY_LEVEL_NORMAL = 1, + CONTEXT_PRIORITY_LEVEL_FOCUS = 2, + CONTEXT_PRIORITY_LEVEL_REALTIME = 3, + CONTEXT_PRIORITY_NUM_LEVELS +}; + +struct umsch_mm_set_resource_input { + uint32_t vmid_mask_mm_vcn; + uint32_t vmid_mask_mm_vpe; + uint32_t logging_vmid; + uint32_t engine_mask; + union { + struct { + uint32_t disable_reset : 1; + uint32_t disable_umsch_mm_log : 1; + uint32_t reserved : 30; + }; + uint32_t uint32_all; + }; +}; + +struct umsch_mm_add_queue_input { + uint32_t process_id; + uint64_t page_table_base_addr; + uint64_t process_va_start; + uint64_t process_va_end; + uint64_t process_quantum; + uint64_t process_csa_addr; + uint64_t context_quantum; + uint64_t context_csa_addr; + uint32_t inprocess_context_priority; + enum UMSCH_CONTEXT_PRIORITY_LEVEL context_global_priority_level; + uint32_t doorbell_offset_0; + uint32_t doorbell_offset_1; + enum UMSCH_SWIP_ENGINE_TYPE engine_type; + uint32_t affinity; + enum UMSCH_SWIP_AFFINITY_TYPE affinity_type; + uint64_t mqd_addr; + uint64_t h_context; + uint64_t h_queue; + uint32_t vm_context_cntl; + + struct { + uint32_t is_context_suspended : 1; + uint32_t reserved : 31; + }; +}; + +struct umsch_mm_remove_queue_input { + uint32_t doorbell_offset_0; + uint32_t doorbell_offset_1; + uint64_t context_csa_addr; +}; + +struct MQD_INFO { + uint32_t rb_base_hi; + uint32_t rb_base_lo; + uint32_t rb_size; + uint32_t wptr_val; + uint32_t rptr_val; + uint32_t unmapped; +}; + +struct amdgpu_umsch_mm; + +struct umsch_mm_funcs { + int (*set_hw_resources)(struct amdgpu_umsch_mm *umsch); + int (*add_queue)(struct amdgpu_umsch_mm *umsch, + struct umsch_mm_add_queue_input *input); + int (*remove_queue)(struct amdgpu_umsch_mm *umsch, + struct umsch_mm_remove_queue_input *input); + int (*set_regs)(struct amdgpu_umsch_mm *umsch); + int (*init_microcode)(struct amdgpu_umsch_mm *umsch); + int (*load_microcode)(struct amdgpu_umsch_mm *umsch); + int (*ring_init)(struct amdgpu_umsch_mm *umsch); + int (*ring_start)(struct amdgpu_umsch_mm *umsch); + int (*ring_stop)(struct amdgpu_umsch_mm *umsch); + int (*ring_fini)(struct amdgpu_umsch_mm *umsch); +}; + +struct amdgpu_umsch_mm { + struct amdgpu_ring ring; + + uint32_t rb_wptr; + uint32_t rb_rptr; + + const struct umsch_mm_funcs *funcs; + + const struct firmware *fw; + uint32_t fw_version; + uint32_t feature_version; + + struct amdgpu_bo *ucode_fw_obj; + uint64_t ucode_fw_gpu_addr; + uint32_t *ucode_fw_ptr; + uint64_t irq_start_addr; + uint64_t uc_start_addr; + uint32_t ucode_size; + + struct amdgpu_bo *data_fw_obj; + uint64_t data_fw_gpu_addr; + uint32_t *data_fw_ptr; + uint64_t data_start_addr; + uint32_t data_size; + + struct amdgpu_bo *cmd_buf_obj; + uint64_t cmd_buf_gpu_addr; + uint32_t *cmd_buf_ptr; + uint32_t *cmd_buf_curr_ptr; + + uint32_t wb_index; + uint64_t sch_ctx_gpu_addr; + uint32_t *sch_ctx_cpu_addr; + + uint32_t vmid_mask_mm_vcn; + uint32_t vmid_mask_mm_vpe; + uint32_t engine_mask; + uint32_t vcn0_hqd_mask; + uint32_t vcn1_hqd_mask; + uint32_t vcn_hqd_mask[2]; + uint32_t vpe_hqd_mask; + uint32_t agdb_index[CONTEXT_PRIORITY_NUM_LEVELS]; + + struct mutex mutex_hidden; +}; + +int amdgpu_umsch_mm_submit_pkt(struct amdgpu_umsch_mm *umsch, void *pkt, int ndws); +int amdgpu_umsch_mm_query_fence(struct amdgpu_umsch_mm *umsch); + +int amdgpu_umsch_mm_init_microcode(struct amdgpu_umsch_mm *umsch); +int amdgpu_umsch_mm_allocate_ucode_buffer(struct amdgpu_umsch_mm *umsch); +int amdgpu_umsch_mm_allocate_ucode_data_buffer(struct amdgpu_umsch_mm *umsch); + +int amdgpu_umsch_mm_psp_execute_cmd_buf(struct amdgpu_umsch_mm *umsch); + +int amdgpu_umsch_mm_ring_init(struct amdgpu_umsch_mm *umsch); + +#define WREG32_SOC15_UMSCH(reg, value) \ + do { \ + uint32_t reg_offset = adev->reg_offset[VCN_HWIP][0][reg##_BASE_IDX] + reg; \ + if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) { \ + *adev->umsch_mm.cmd_buf_curr_ptr++ = (reg_offset << 2); \ + *adev->umsch_mm.cmd_buf_curr_ptr++ = value; \ + } else { \ + WREG32(reg_offset, value); \ + } \ + } while (0) + +#define umsch_mm_set_hw_resources(umsch) \ + ((umsch)->funcs->set_hw_resources ? (umsch)->funcs->set_hw_resources((umsch)) : 0) +#define umsch_mm_add_queue(umsch, input) \ + ((umsch)->funcs->add_queue ? (umsch)->funcs->add_queue((umsch), (input)) : 0) +#define umsch_mm_remove_queue(umsch, input) \ + ((umsch)->funcs->remove_queue ? (umsch)->funcs->remove_queue((umsch), (input)) : 0) + +#define umsch_mm_set_regs(umsch) \ + ((umsch)->funcs->set_regs ? (umsch)->funcs->set_regs((umsch)) : 0) +#define umsch_mm_init_microcode(umsch) \ + ((umsch)->funcs->init_microcode ? (umsch)->funcs->init_microcode((umsch)) : 0) +#define umsch_mm_load_microcode(umsch) \ + ((umsch)->funcs->load_microcode ? (umsch)->funcs->load_microcode((umsch)) : 0) + +#define umsch_mm_ring_init(umsch) \ + ((umsch)->funcs->ring_init ? (umsch)->funcs->ring_init((umsch)) : 0) +#define umsch_mm_ring_start(umsch) \ + ((umsch)->funcs->ring_start ? (umsch)->funcs->ring_start((umsch)) : 0) +#define umsch_mm_ring_stop(umsch) \ + ((umsch)->funcs->ring_stop ? (umsch)->funcs->ring_stop((umsch)) : 0) +#define umsch_mm_ring_fini(umsch) \ + ((umsch)->funcs->ring_fini ? (umsch)->funcs->ring_fini((umsch)) : 0) + +static inline void amdgpu_umsch_mm_lock(struct amdgpu_umsch_mm *umsch) +{ + mutex_lock(&umsch->mutex_hidden); +} + +static inline void amdgpu_umsch_mm_unlock(struct amdgpu_umsch_mm *umsch) +{ + mutex_unlock(&umsch->mutex_hidden); +} + +extern const struct amdgpu_ip_block_version umsch_mm_v4_0_ip_block; + +#endif diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c index b7441654e6fa..07d930339b07 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c @@ -398,32 +398,32 @@ int amdgpu_uvd_sw_fini(struct amdgpu_device *adev) * amdgpu_uvd_entity_init - init entity * * @adev: amdgpu_device pointer + * @ring: amdgpu_ring pointer to check * + * Initialize the entity used for handle management in the kernel driver. */ -int amdgpu_uvd_entity_init(struct amdgpu_device *adev) +int amdgpu_uvd_entity_init(struct amdgpu_device *adev, struct amdgpu_ring *ring) { - struct amdgpu_ring *ring; - struct drm_gpu_scheduler *sched; - int r; + if (ring == &adev->uvd.inst[0].ring) { + struct drm_gpu_scheduler *sched = &ring->sched; + int r; - ring = &adev->uvd.inst[0].ring; - sched = &ring->sched; - r = drm_sched_entity_init(&adev->uvd.entity, DRM_SCHED_PRIORITY_NORMAL, - &sched, 1, NULL); - if (r) { - DRM_ERROR("Failed setting up UVD kernel entity.\n"); - return r; + r = drm_sched_entity_init(&adev->uvd.entity, DRM_SCHED_PRIORITY_NORMAL, + &sched, 1, NULL); + if (r) { + DRM_ERROR("Failed setting up UVD kernel entity.\n"); + return r; + } } return 0; } -int amdgpu_uvd_suspend(struct amdgpu_device *adev) +int amdgpu_uvd_prepare_suspend(struct amdgpu_device *adev) { unsigned int size; void *ptr; int i, j, idx; - bool in_ras_intr = amdgpu_ras_intr_triggered(); cancel_delayed_work_sync(&adev->uvd.idle_work); @@ -452,7 +452,7 @@ int amdgpu_uvd_suspend(struct amdgpu_device *adev) if (drm_dev_enter(adev_to_drm(adev), &idx)) { /* re-write 0 since err_event_athub will corrupt VCPU buffer */ - if (in_ras_intr) + if (amdgpu_ras_intr_triggered()) memset(adev->uvd.inst[j].saved_bo, 0, size); else memcpy_fromio(adev->uvd.inst[j].saved_bo, ptr, size); @@ -461,7 +461,12 @@ int amdgpu_uvd_suspend(struct amdgpu_device *adev) } } - if (in_ras_intr) + return 0; +} + +int amdgpu_uvd_suspend(struct amdgpu_device *adev) +{ + if (amdgpu_ras_intr_triggered()) DRM_WARN("UVD VCPU state may lost due to RAS ERREVENT_ATHUB_INTERRUPT\n"); return 0; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.h index 9f89bb7cd60b..9dfad2f48ef4 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.h @@ -73,7 +73,8 @@ struct amdgpu_uvd { int amdgpu_uvd_sw_init(struct amdgpu_device *adev); int amdgpu_uvd_sw_fini(struct amdgpu_device *adev); -int amdgpu_uvd_entity_init(struct amdgpu_device *adev); +int amdgpu_uvd_entity_init(struct amdgpu_device *adev, struct amdgpu_ring *ring); +int amdgpu_uvd_prepare_suspend(struct amdgpu_device *adev); int amdgpu_uvd_suspend(struct amdgpu_device *adev); int amdgpu_uvd_resume(struct amdgpu_device *adev); int amdgpu_uvd_get_create_msg(struct amdgpu_ring *ring, uint32_t handle, diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vce.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vce.c index 1904edf68407..59acf424a078 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vce.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vce.c @@ -230,21 +230,22 @@ int amdgpu_vce_sw_fini(struct amdgpu_device *adev) * amdgpu_vce_entity_init - init entity * * @adev: amdgpu_device pointer + * @ring: amdgpu_ring pointer to check * + * Initialize the entity used for handle management in the kernel driver. */ -int amdgpu_vce_entity_init(struct amdgpu_device *adev) +int amdgpu_vce_entity_init(struct amdgpu_device *adev, struct amdgpu_ring *ring) { - struct amdgpu_ring *ring; - struct drm_gpu_scheduler *sched; - int r; - - ring = &adev->vce.ring[0]; - sched = &ring->sched; - r = drm_sched_entity_init(&adev->vce.entity, DRM_SCHED_PRIORITY_NORMAL, - &sched, 1, NULL); - if (r != 0) { - DRM_ERROR("Failed setting up VCE run queue.\n"); - return r; + if (ring == &adev->vce.ring[0]) { + struct drm_gpu_scheduler *sched = &ring->sched; + int r; + + r = drm_sched_entity_init(&adev->vce.entity, DRM_SCHED_PRIORITY_NORMAL, + &sched, 1, NULL); + if (r != 0) { + DRM_ERROR("Failed setting up VCE run queue.\n"); + return r; + } } return 0; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vce.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_vce.h index ea680fc9a6c3..6e53f872d084 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vce.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vce.h @@ -55,7 +55,7 @@ struct amdgpu_vce { int amdgpu_vce_sw_init(struct amdgpu_device *adev, unsigned long size); int amdgpu_vce_sw_fini(struct amdgpu_device *adev); -int amdgpu_vce_entity_init(struct amdgpu_device *adev); +int amdgpu_vce_entity_init(struct amdgpu_device *adev, struct amdgpu_ring *ring); int amdgpu_vce_suspend(struct amdgpu_device *adev); int amdgpu_vce_resume(struct amdgpu_device *adev); void amdgpu_vce_free_handles(struct amdgpu_device *adev, struct drm_file *filp); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.c index ae455aab5d29..f4963330c772 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.c @@ -58,6 +58,7 @@ #define FIRMWARE_VCN4_0_2 "amdgpu/vcn_4_0_2.bin" #define FIRMWARE_VCN4_0_3 "amdgpu/vcn_4_0_3.bin" #define FIRMWARE_VCN4_0_4 "amdgpu/vcn_4_0_4.bin" +#define FIRMWARE_VCN4_0_5 "amdgpu/vcn_4_0_5.bin" MODULE_FIRMWARE(FIRMWARE_RAVEN); MODULE_FIRMWARE(FIRMWARE_PICASSO); @@ -80,6 +81,7 @@ MODULE_FIRMWARE(FIRMWARE_VCN4_0_0); MODULE_FIRMWARE(FIRMWARE_VCN4_0_2); MODULE_FIRMWARE(FIRMWARE_VCN4_0_3); MODULE_FIRMWARE(FIRMWARE_VCN4_0_4); +MODULE_FIRMWARE(FIRMWARE_VCN4_0_5); static void amdgpu_vcn_idle_work_handler(struct work_struct *work); @@ -124,7 +126,7 @@ int amdgpu_vcn_sw_init(struct amdgpu_device *adev) * Hence, check for these versions here - notice this is * restricted to Vangogh (Deck's APU). */ - if (adev->ip_versions[UVD_HWIP][0] == IP_VERSION(3, 0, 2)) { + if (amdgpu_ip_version(adev, UVD_HWIP, 0) == IP_VERSION(3, 0, 2)) { const char *bios_ver = dmi_get_system_info(DMI_BIOS_VERSION); if (bios_ver && (!strncmp("F7A0113", bios_ver, 7) || @@ -169,7 +171,7 @@ int amdgpu_vcn_sw_init(struct amdgpu_device *adev) if (adev->firmware.load_type != AMDGPU_FW_LOAD_PSP) bo_size += AMDGPU_GPU_PAGE_ALIGN(le32_to_cpu(hdr->ucode_size_bytes) + 8); - if (adev->ip_versions[UVD_HWIP][0] >= IP_VERSION(4, 0, 0)) { + if (amdgpu_ip_version(adev, UVD_HWIP, 0) >= IP_VERSION(4, 0, 0)) { fw_shared_size = AMDGPU_GPU_PAGE_ALIGN(sizeof(struct amdgpu_vcn4_fw_shared)); log_offset = offsetof(struct amdgpu_vcn4_fw_shared, fw_log); } else { @@ -265,7 +267,7 @@ static bool amdgpu_vcn_using_unified_queue(struct amdgpu_ring *ring) struct amdgpu_device *adev = ring->adev; bool ret = false; - if (adev->ip_versions[UVD_HWIP][0] >= IP_VERSION(4, 0, 0)) + if (amdgpu_ip_version(adev, UVD_HWIP, 0) >= IP_VERSION(4, 0, 0)) ret = true; return ret; @@ -292,8 +294,15 @@ int amdgpu_vcn_suspend(struct amdgpu_device *adev) void *ptr; int i, idx; + bool in_ras_intr = amdgpu_ras_intr_triggered(); + cancel_delayed_work_sync(&adev->vcn.idle_work); + /* err_event_athub will corrupt VCPU buffer, so we need to + * restore fw data and clear buffer in amdgpu_vcn_resume() */ + if (in_ras_intr) + return 0; + for (i = 0; i < adev->vcn.num_vcn_inst; ++i) { if (adev->vcn.harvest_config & (1 << i)) continue; @@ -996,7 +1005,7 @@ int amdgpu_vcn_unified_ring_test_ib(struct amdgpu_ring *ring, long timeout) struct amdgpu_device *adev = ring->adev; long r; - if (adev->ip_versions[UVD_HWIP][0] != IP_VERSION(4, 0, 3)) { + if (amdgpu_ip_version(adev, UVD_HWIP, 0) != IP_VERSION(4, 0, 3)) { r = amdgpu_vcn_enc_ring_test_ib(ring, timeout); if (r) goto error; @@ -1046,7 +1055,8 @@ void amdgpu_vcn_setup_ucode(struct amdgpu_device *adev) adev->firmware.fw_size += ALIGN(le32_to_cpu(hdr->ucode_size_bytes), PAGE_SIZE); - if (adev->ip_versions[UVD_HWIP][0] == IP_VERSION(4, 0, 3)) + if (amdgpu_ip_version(adev, UVD_HWIP, 0) == + IP_VERSION(4, 0, 3)) break; } dev_info(adev->dev, "Will use PSP to load VCN firmware\n"); @@ -1087,7 +1097,7 @@ static ssize_t amdgpu_debugfs_vcn_fwlog_read(struct file *f, char __user *buf, if (write_pos > read_pos) { available = write_pos - read_pos; - read_num[0] = min(size, (size_t)available); + read_num[0] = min_t(size_t, size, available); } else { read_num[0] = AMDGPU_VCNFW_LOG_SIZE - read_pos; available = read_num[0] + write_pos - plog->header_size; @@ -1239,3 +1249,18 @@ int amdgpu_vcn_ras_sw_init(struct amdgpu_device *adev) return 0; } + +int amdgpu_vcn_psp_update_sram(struct amdgpu_device *adev, int inst_idx, + enum AMDGPU_UCODE_ID ucode_id) +{ + struct amdgpu_firmware_info ucode = { + .ucode_id = (ucode_id ? ucode_id : + (inst_idx ? AMDGPU_UCODE_ID_VCN1_RAM : + AMDGPU_UCODE_ID_VCN0_RAM)), + .mc_addr = adev->vcn.inst[inst_idx].dpg_sram_gpu_addr, + .ucode_size = ((uintptr_t)adev->vcn.inst[inst_idx].dpg_sram_curr_addr - + (uintptr_t)adev->vcn.inst[inst_idx].dpg_sram_cpu_addr), + }; + + return psp_execute_ip_fw_load(&adev->psp, &ucode); +} diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.h index 92d5534df5f4..514c98ea144f 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.h @@ -33,7 +33,7 @@ #define AMDGPU_VCN_MAX_ENC_RINGS 3 #define AMDGPU_MAX_VCN_INSTANCES 4 -#define AMDGPU_MAX_VCN_ENC_RINGS AMDGPU_VCN_MAX_ENC_RINGS * AMDGPU_MAX_VCN_INSTANCES +#define AMDGPU_MAX_VCN_ENC_RINGS (AMDGPU_VCN_MAX_ENC_RINGS * AMDGPU_MAX_VCN_INSTANCES) #define AMDGPU_VCN_HARVEST_VCN0 (1 << 0) #define AMDGPU_VCN_HARVEST_VCN1 (1 << 1) @@ -161,6 +161,7 @@ } while (0) #define AMDGPU_FW_SHARED_FLAG_0_UNIFIED_QUEUE (1 << 2) +#define AMDGPU_FW_SHARED_FLAG_0_DRM_KEY_INJECT (1 << 4) #define AMDGPU_VCN_FW_SHARED_FLAG_0_RB (1 << 6) #define AMDGPU_VCN_MULTI_QUEUE_FLAG (1 << 8) #define AMDGPU_VCN_SW_RING_FLAG (1 << 9) @@ -168,6 +169,9 @@ #define AMDGPU_VCN_SMU_VERSION_INFO_FLAG (1 << 11) #define AMDGPU_VCN_SMU_DPM_INTERFACE_FLAG (1 << 11) #define AMDGPU_VCN_VF_RB_SETUP_FLAG (1 << 14) +#define AMDGPU_VCN_VF_RB_DECOUPLE_FLAG (1 << 15) + +#define MAX_NUM_VCN_RB_SETUP 4 #define AMDGPU_VCN_IB_FLAG_DECODE_BUFFER 0x00000001 #define AMDGPU_VCN_CMD_FLAG_MSG_BUFFER 0x00000001 @@ -180,6 +184,8 @@ #define AMDGPU_VCN_SMU_DPM_INTERFACE_DGPU (0) #define AMDGPU_VCN_SMU_DPM_INTERFACE_APU (1) +#define AMDGPU_DRM_KEY_INJECT_WORKAROUND_VCNFW_ASD_HANDSHAKING 2 + enum fw_queue_mode { FW_QUEUE_RING_RESET = 1, FW_QUEUE_DPG_HOLD_OFF = 2, @@ -332,15 +338,40 @@ struct amdgpu_fw_shared { struct amdgpu_fw_shared_smu_interface_info smu_interface_info; }; +struct amdgpu_vcn_rb_setup_info { + uint32_t rb_addr_lo; + uint32_t rb_addr_hi; + uint32_t rb_size; +}; + struct amdgpu_fw_shared_rb_setup { uint32_t is_rb_enabled_flags; - uint32_t rb_addr_lo; - uint32_t rb_addr_hi; - uint32_t rb_size; - uint32_t rb4_addr_lo; - uint32_t rb4_addr_hi; - uint32_t rb4_size; - uint32_t reserved[6]; + + union { + struct { + uint32_t rb_addr_lo; + uint32_t rb_addr_hi; + uint32_t rb_size; + uint32_t rb4_addr_lo; + uint32_t rb4_addr_hi; + uint32_t rb4_size; + uint32_t reserved[6]; + }; + + struct { + struct amdgpu_vcn_rb_setup_info rb_info[MAX_NUM_VCN_RB_SETUP]; + }; + }; +}; + +struct amdgpu_fw_shared_drm_key_wa { + uint8_t method; + uint8_t reserved[3]; +}; + +struct amdgpu_fw_shared_queue_decouple { + uint8_t is_enabled; + uint8_t reserved[7]; }; struct amdgpu_vcn4_fw_shared { @@ -352,6 +383,9 @@ struct amdgpu_vcn4_fw_shared { uint8_t pad2[20]; struct amdgpu_fw_shared_rb_setup rb_setup; struct amdgpu_fw_shared_smu_interface_info smu_dpm_interface; + struct amdgpu_fw_shared_drm_key_wa drm_key_wa; + uint8_t pad3[9]; + struct amdgpu_fw_shared_queue_decouple decouple; }; struct amdgpu_vcn_fwlog { @@ -369,6 +403,15 @@ struct amdgpu_vcn_decode_buffer { uint32_t pad[30]; }; +struct amdgpu_vcn_rb_metadata { + uint32_t size; + uint32_t present_flag_0; + + uint8_t version; + uint8_t ring_id; + uint8_t pad[26]; +}; + #define VCN_BLOCK_ENCODE_DISABLE_MASK 0x80 #define VCN_BLOCK_DECODE_DISABLE_MASK 0x40 #define VCN_BLOCK_QUEUE_DISABLE_MASK 0xC0 @@ -414,4 +457,7 @@ int amdgpu_vcn_ras_late_init(struct amdgpu_device *adev, struct ras_common_if *ras_block); int amdgpu_vcn_ras_sw_init(struct amdgpu_device *adev); +int amdgpu_vcn_psp_update_sram(struct amdgpu_device *adev, int inst_idx, + enum AMDGPU_UCODE_ID ucode_id); + #endif diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c index 41aa853a07d2..3a632c3b1a2c 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c @@ -73,9 +73,10 @@ void amdgpu_virt_init_setting(struct amdgpu_device *adev) void amdgpu_virt_kiq_reg_write_reg_wait(struct amdgpu_device *adev, uint32_t reg0, uint32_t reg1, - uint32_t ref, uint32_t mask) + uint32_t ref, uint32_t mask, + uint32_t xcc_inst) { - struct amdgpu_kiq *kiq = &adev->gfx.kiq[0]; + struct amdgpu_kiq *kiq = &adev->gfx.kiq[xcc_inst]; struct amdgpu_ring *ring = &kiq->ring; signed long r, cnt = 0; unsigned long flags; @@ -520,7 +521,7 @@ static int amdgpu_virt_read_pf2vf_data(struct amdgpu_device *adev) tmp = ((struct amd_sriov_msg_pf2vf_info *)pf2vf_info)->mm_bw_management[i].encode_max_frame_pixels; adev->virt.encode_max_frame_pixels = max(tmp, adev->virt.encode_max_frame_pixels); } - if((adev->virt.decode_max_dimension_pixels > 0) || (adev->virt.encode_max_dimension_pixels > 0)) + if ((adev->virt.decode_max_dimension_pixels > 0) || (adev->virt.encode_max_dimension_pixels > 0)) adev->virt.is_mm_bw_enabled = true; adev->unique_id = @@ -835,9 +836,19 @@ enum amdgpu_sriov_vf_mode amdgpu_virt_get_sriov_vf_mode(struct amdgpu_device *ad return mode; } +void amdgpu_virt_post_reset(struct amdgpu_device *adev) +{ + if (amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(11, 0, 3)) { + /* force set to GFXOFF state after reset, + * to avoid some invalid operation before GC enable + */ + adev->gfx.is_poweron = false; + } +} + bool amdgpu_virt_fw_load_skip_check(struct amdgpu_device *adev, uint32_t ucode_id) { - switch (adev->ip_versions[MP0_HWIP][0]) { + switch (amdgpu_ip_version(adev, MP0_HWIP, 0)) { case IP_VERSION(13, 0, 0): /* no vf autoload, white list */ if (ucode_id == AMDGPU_UCODE_ID_VCN1 || @@ -845,6 +856,17 @@ bool amdgpu_virt_fw_load_skip_check(struct amdgpu_device *adev, uint32_t ucode_i return false; else return true; + case IP_VERSION(11, 0, 9): + case IP_VERSION(11, 0, 7): + /* black list for CHIP_NAVI12 and CHIP_SIENNA_CICHLID */ + if (ucode_id == AMDGPU_UCODE_ID_RLC_G + || ucode_id == AMDGPU_UCODE_ID_RLC_RESTORE_LIST_CNTL + || ucode_id == AMDGPU_UCODE_ID_RLC_RESTORE_LIST_GPM_MEM + || ucode_id == AMDGPU_UCODE_ID_RLC_RESTORE_LIST_SRM_MEM + || ucode_id == AMDGPU_UCODE_ID_SMC) + return true; + else + return false; case IP_VERSION(13, 0, 10): /* white list */ if (ucode_id == AMDGPU_UCODE_ID_CAP @@ -921,7 +943,7 @@ void amdgpu_virt_update_sriov_video_codec(struct amdgpu_device *adev, } } -static bool amdgpu_virt_get_rlcg_reg_access_flag(struct amdgpu_device *adev, +bool amdgpu_virt_get_rlcg_reg_access_flag(struct amdgpu_device *adev, u32 acc_flags, u32 hwip, bool write, u32 *rlcg_flag) { @@ -954,7 +976,7 @@ static bool amdgpu_virt_get_rlcg_reg_access_flag(struct amdgpu_device *adev, return ret; } -static u32 amdgpu_virt_rlcg_reg_rw(struct amdgpu_device *adev, u32 offset, u32 v, u32 flag) +u32 amdgpu_virt_rlcg_reg_rw(struct amdgpu_device *adev, u32 offset, u32 v, u32 flag, u32 xcc_id) { struct amdgpu_rlcg_reg_access_ctrl *reg_access_ctrl; uint32_t timeout = 50000; @@ -972,7 +994,12 @@ static u32 amdgpu_virt_rlcg_reg_rw(struct amdgpu_device *adev, u32 offset, u32 v return 0; } - reg_access_ctrl = &adev->gfx.rlc.reg_access_ctrl; + if (adev->gfx.xcc_mask && (((1 << xcc_id) & adev->gfx.xcc_mask) == 0)) { + dev_err(adev->dev, "invalid xcc\n"); + return 0; + } + + reg_access_ctrl = &adev->gfx.rlc.reg_access_ctrl[xcc_id]; scratch_reg0 = (void __iomem *)adev->rmmio + 4 * reg_access_ctrl->scratch_reg0; scratch_reg1 = (void __iomem *)adev->rmmio + 4 * reg_access_ctrl->scratch_reg1; scratch_reg2 = (void __iomem *)adev->rmmio + 4 * reg_access_ctrl->scratch_reg2; @@ -1037,13 +1064,13 @@ static u32 amdgpu_virt_rlcg_reg_rw(struct amdgpu_device *adev, u32 offset, u32 v void amdgpu_sriov_wreg(struct amdgpu_device *adev, u32 offset, u32 value, - u32 acc_flags, u32 hwip) + u32 acc_flags, u32 hwip, u32 xcc_id) { u32 rlcg_flag; if (!amdgpu_sriov_runtime(adev) && amdgpu_virt_get_rlcg_reg_access_flag(adev, acc_flags, hwip, true, &rlcg_flag)) { - amdgpu_virt_rlcg_reg_rw(adev, offset, value, rlcg_flag); + amdgpu_virt_rlcg_reg_rw(adev, offset, value, rlcg_flag, xcc_id); return; } @@ -1054,16 +1081,26 @@ void amdgpu_sriov_wreg(struct amdgpu_device *adev, } u32 amdgpu_sriov_rreg(struct amdgpu_device *adev, - u32 offset, u32 acc_flags, u32 hwip) + u32 offset, u32 acc_flags, u32 hwip, u32 xcc_id) { u32 rlcg_flag; if (!amdgpu_sriov_runtime(adev) && amdgpu_virt_get_rlcg_reg_access_flag(adev, acc_flags, hwip, false, &rlcg_flag)) - return amdgpu_virt_rlcg_reg_rw(adev, offset, 0, rlcg_flag); + return amdgpu_virt_rlcg_reg_rw(adev, offset, 0, rlcg_flag, xcc_id); if (acc_flags & AMDGPU_REGS_NO_KIQ) return RREG32_NO_KIQ(offset); else return RREG32(offset); } + +bool amdgpu_sriov_xnack_support(struct amdgpu_device *adev) +{ + bool xnack_mode = true; + + if (amdgpu_sriov_vf(adev) && adev->ip_versions[GC_HWIP][0] == IP_VERSION(9, 4, 2)) + xnack_mode = false; + + return xnack_mode; +} diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.h index 4f7bab52282a..d4207e44141f 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.h @@ -126,6 +126,8 @@ enum AMDGIM_FEATURE_FLAG { AMDGIM_FEATURE_INDIRECT_REG_ACCESS = (1 << 5), /* AV1 Support MODE*/ AMDGIM_FEATURE_AV1_SUPPORT = (1 << 6), + /* VCN RB decouple */ + AMDGIM_FEATURE_VCN_RB_DECOUPLE = (1 << 7), }; enum AMDGIM_REG_ACCESS_FLAG { @@ -326,11 +328,14 @@ static inline bool is_virtual_machine(void) ((!amdgpu_in_reset(adev)) && (!adev->virt.tdr_debug)) #define amdgpu_sriov_is_av1_support(adev) \ ((adev)->virt.gim_feature & AMDGIM_FEATURE_AV1_SUPPORT) +#define amdgpu_sriov_is_vcn_rb_decouple(adev) \ + ((adev)->virt.gim_feature & AMDGIM_FEATURE_VCN_RB_DECOUPLE) bool amdgpu_virt_mmio_blocked(struct amdgpu_device *adev); void amdgpu_virt_init_setting(struct amdgpu_device *adev); void amdgpu_virt_kiq_reg_write_reg_wait(struct amdgpu_device *adev, uint32_t reg0, uint32_t rreg1, - uint32_t ref, uint32_t mask); + uint32_t ref, uint32_t mask, + uint32_t xcc_inst); int amdgpu_virt_request_full_gpu(struct amdgpu_device *adev, bool init); int amdgpu_virt_release_full_gpu(struct amdgpu_device *adev, bool init); int amdgpu_virt_reset_gpu(struct amdgpu_device *adev); @@ -355,9 +360,15 @@ void amdgpu_virt_update_sriov_video_codec(struct amdgpu_device *adev, struct amdgpu_video_codec_info *decode, uint32_t decode_array_size); void amdgpu_sriov_wreg(struct amdgpu_device *adev, u32 offset, u32 value, - u32 acc_flags, u32 hwip); + u32 acc_flags, u32 hwip, u32 xcc_id); u32 amdgpu_sriov_rreg(struct amdgpu_device *adev, - u32 offset, u32 acc_flags, u32 hwip); + u32 offset, u32 acc_flags, u32 hwip, u32 xcc_id); bool amdgpu_virt_fw_load_skip_check(struct amdgpu_device *adev, uint32_t ucode_id); +void amdgpu_virt_post_reset(struct amdgpu_device *adev); +bool amdgpu_sriov_xnack_support(struct amdgpu_device *adev); +bool amdgpu_virt_get_rlcg_reg_access_flag(struct amdgpu_device *adev, + u32 acc_flags, u32 hwip, + bool write, u32 *rlcg_flag); +u32 amdgpu_virt_rlcg_reg_rw(struct amdgpu_device *adev, u32 offset, u32 v, u32 flag, u32 xcc_id); #endif diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vkms.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vkms.c index d0748bcfad16..db6fc0cb18eb 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vkms.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vkms.c @@ -239,6 +239,8 @@ static int amdgpu_vkms_conn_get_modes(struct drm_connector *connector) for (i = 0; i < ARRAY_SIZE(common_modes); i++) { mode = drm_cvt_mode(dev, common_modes[i].w, common_modes[i].h, 60, false, false, false); + if (!mode) + continue; drm_mode_probed_add(connector, mode); } @@ -501,8 +503,6 @@ static int amdgpu_vkms_sw_init(void *handle) adev_to_drm(adev)->mode_config.fb_modifiers_not_supported = true; - adev_to_drm(adev)->mode_config.fb_modifiers_not_supported = true; - r = amdgpu_display_modeset_create_props(adev); if (r) return r; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c index ec1ec08d4058..5baefb548a29 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c @@ -34,6 +34,7 @@ #include <drm/amdgpu_drm.h> #include <drm/drm_drv.h> #include <drm/ttm/ttm_tt.h> +#include <drm/drm_exec.h> #include "amdgpu.h" #include "amdgpu_trace.h" #include "amdgpu_amdkfd.h" @@ -111,9 +112,9 @@ struct amdgpu_prt_cb { }; /** - * struct amdgpu_vm_tlb_seq_cb - Helper to increment the TLB flush sequence + * struct amdgpu_vm_tlb_seq_struct - Helper to increment the TLB flush sequence */ -struct amdgpu_vm_tlb_seq_cb { +struct amdgpu_vm_tlb_seq_struct { /** * @vm: pointer to the amdgpu_vm structure to set the fence sequence on */ @@ -284,6 +285,7 @@ static void amdgpu_vm_bo_reset_state_machine(struct amdgpu_vm *vm) list_for_each_entry_safe(vm_bo, tmp, &vm->idle, vm_status) { struct amdgpu_bo *bo = vm_bo->bo; + vm_bo->moved = true; if (!bo || bo->tbo.type != ttm_bo_type_kernel) list_move(&vm_bo->vm_status, &vm_bo->vm->moved); else if (bo->parent) @@ -339,25 +341,20 @@ void amdgpu_vm_bo_base_init(struct amdgpu_vm_bo_base *base, } /** - * amdgpu_vm_get_pd_bo - add the VM PD to a validation list + * amdgpu_vm_lock_pd - lock PD in drm_exec * * @vm: vm providing the BOs - * @validated: head of validation list - * @entry: entry to add + * @exec: drm execution context + * @num_fences: number of extra fences to reserve * - * Add the page directory to the list of BOs to - * validate for command submission. + * Lock the VM root PD in the DRM execution context. */ -void amdgpu_vm_get_pd_bo(struct amdgpu_vm *vm, - struct list_head *validated, - struct amdgpu_bo_list_entry *entry) +int amdgpu_vm_lock_pd(struct amdgpu_vm *vm, struct drm_exec *exec, + unsigned int num_fences) { - entry->priority = 0; - entry->tv.bo = &vm->root.bo->tbo; - /* Two for VM updates, one for TTM and one for the CS job */ - entry->tv.num_shared = 4; - entry->user_pages = NULL; - list_add(&entry->tv.head, validated); + /* We need at least two fences for the VM PD/PT updates */ + return drm_exec_prepare_obj(exec, &vm->root.bo->tbo.base, + 2 + num_fences); } /** @@ -833,7 +830,7 @@ error: static void amdgpu_vm_tlb_seq_cb(struct dma_fence *fence, struct dma_fence_cb *cb) { - struct amdgpu_vm_tlb_seq_cb *tlb_cb; + struct amdgpu_vm_tlb_seq_struct *tlb_cb; tlb_cb = container_of(cb, typeof(*tlb_cb), cb); atomic64_inc(&tlb_cb->vm->tlb_seq); @@ -848,6 +845,7 @@ static void amdgpu_vm_tlb_seq_cb(struct dma_fence *fence, * @immediate: immediate submission in a page fault * @unlocked: unlocked invalidation during MM callback * @flush_tlb: trigger tlb invalidation after update completed + * @allow_override: change MTYPE for local NUMA nodes * @resv: fences we need to sync to * @start: start of mapped range * @last: last mapped entry @@ -864,14 +862,14 @@ static void amdgpu_vm_tlb_seq_cb(struct dma_fence *fence, * 0 for success, negative erro code for failure. */ int amdgpu_vm_update_range(struct amdgpu_device *adev, struct amdgpu_vm *vm, - bool immediate, bool unlocked, bool flush_tlb, + bool immediate, bool unlocked, bool flush_tlb, bool allow_override, struct dma_resv *resv, uint64_t start, uint64_t last, uint64_t flags, uint64_t offset, uint64_t vram_base, struct ttm_resource *res, dma_addr_t *pages_addr, struct dma_fence **fence) { struct amdgpu_vm_update_params params; - struct amdgpu_vm_tlb_seq_cb *tlb_cb; + struct amdgpu_vm_tlb_seq_struct *tlb_cb; struct amdgpu_res_cursor cursor; enum amdgpu_sync_mode sync_mode; int r, idx; @@ -889,12 +887,12 @@ int amdgpu_vm_update_range(struct amdgpu_device *adev, struct amdgpu_vm *vm, * heavy-weight flush TLB unconditionally. */ flush_tlb |= adev->gmc.xgmi.num_physical_nodes && - adev->ip_versions[GC_HWIP][0] == IP_VERSION(9, 4, 0); + amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(9, 4, 0); /* * On GFX8 and older any 8 PTE block with a valid bit set enters the TLB */ - flush_tlb |= adev->ip_versions[GC_HWIP][0] < IP_VERSION(9, 0, 0); + flush_tlb |= amdgpu_ip_version(adev, GC_HWIP, 0) < IP_VERSION(9, 0, 0); memset(¶ms, 0, sizeof(params)); params.adev = adev; @@ -902,6 +900,7 @@ int amdgpu_vm_update_range(struct amdgpu_device *adev, struct amdgpu_vm *vm, params.immediate = immediate; params.pages_addr = pages_addr; params.unlocked = unlocked; + params.allow_override = allow_override; /* Implicitly sync to command submissions in the same VM before * unmapping. Sync to moving fences before mapping. @@ -1077,6 +1076,7 @@ int amdgpu_vm_bo_update(struct amdgpu_device *adev, struct amdgpu_bo_va *bo_va, struct ttm_resource *mem; struct dma_fence **last_update; bool flush_tlb = clear; + bool uncached; struct dma_resv *resv; uint64_t vram_base; uint64_t flags; @@ -1094,12 +1094,13 @@ int amdgpu_vm_bo_update(struct amdgpu_device *adev, struct amdgpu_bo_va *bo_va, struct drm_gem_object *gobj = dma_buf->priv; struct amdgpu_bo *abo = gem_to_amdgpu_bo(gobj); - if (abo->tbo.resource->mem_type == TTM_PL_VRAM) + if (abo->tbo.resource && + abo->tbo.resource->mem_type == TTM_PL_VRAM) bo = gem_to_amdgpu_bo(gobj); } mem = bo->tbo.resource; - if (mem->mem_type == TTM_PL_TT || - mem->mem_type == AMDGPU_PL_PREEMPT) + if (mem && (mem->mem_type == TTM_PL_TT || + mem->mem_type == AMDGPU_PL_PREEMPT)) pages_addr = bo->tbo.ttm->dma_address; } @@ -1113,9 +1114,11 @@ int amdgpu_vm_bo_update(struct amdgpu_device *adev, struct amdgpu_bo_va *bo_va, bo_adev = amdgpu_ttm_adev(bo->tbo.bdev); vram_base = bo_adev->vm_manager.vram_base_offset; + uncached = (bo->flags & AMDGPU_GEM_CREATE_UNCACHED) != 0; } else { flags = 0x0; vram_base = 0; + uncached = false; } if (clear || (bo && bo->tbo.base.resv == @@ -1149,7 +1152,7 @@ int amdgpu_vm_bo_update(struct amdgpu_device *adev, struct amdgpu_bo_va *bo_va, trace_amdgpu_vm_bo_update(mapping); r = amdgpu_vm_update_range(adev, vm, false, false, flush_tlb, - resv, mapping->start, mapping->last, + !uncached, resv, mapping->start, mapping->last, update_flags, mapping->offset, vram_base, mem, pages_addr, last_update); @@ -1344,8 +1347,8 @@ int amdgpu_vm_clear_freed(struct amdgpu_device *adev, mapping->start < AMDGPU_GMC_HOLE_START) init_pte_value = AMDGPU_PTE_DEFAULT_ATC; - r = amdgpu_vm_update_range(adev, vm, false, false, true, resv, - mapping->start, mapping->last, + r = amdgpu_vm_update_range(adev, vm, false, false, true, false, + resv, mapping->start, mapping->last, init_pte_value, 0, 0, NULL, NULL, &f); amdgpu_vm_free_mapping(adev, vm, mapping, f); @@ -1371,6 +1374,7 @@ int amdgpu_vm_clear_freed(struct amdgpu_device *adev, * * @adev: amdgpu_device pointer * @vm: requested vm + * @ticket: optional reservation ticket used to reserve the VM * * Make sure all BOs which are moved are updated in the PTs. * @@ -1380,11 +1384,12 @@ int amdgpu_vm_clear_freed(struct amdgpu_device *adev, * PTs have to be reserved! */ int amdgpu_vm_handle_moved(struct amdgpu_device *adev, - struct amdgpu_vm *vm) + struct amdgpu_vm *vm, + struct ww_acquire_ctx *ticket) { struct amdgpu_bo_va *bo_va; struct dma_resv *resv; - bool clear; + bool clear, unlock; int r; spin_lock(&vm->status_lock); @@ -1407,17 +1412,24 @@ int amdgpu_vm_handle_moved(struct amdgpu_device *adev, spin_unlock(&vm->status_lock); /* Try to reserve the BO to avoid clearing its ptes */ - if (!amdgpu_vm_debug && dma_resv_trylock(resv)) + if (!adev->debug_vm && dma_resv_trylock(resv)) { + clear = false; + unlock = true; + /* The caller is already holding the reservation lock */ + } else if (ticket && dma_resv_locking_ctx(resv) == ticket) { clear = false; + unlock = false; /* Somebody else is using the BO right now */ - else + } else { clear = true; + unlock = false; + } r = amdgpu_vm_bo_update(adev, bo_va, clear); if (r) return r; - if (!clear) + if (unlock) dma_resv_unlock(resv); spin_lock(&vm->status_lock); } @@ -2063,7 +2075,7 @@ void amdgpu_vm_adjust_size(struct amdgpu_device *adev, uint32_t min_vm_size, if (amdgpu_vm_block_size != -1) tmp >>= amdgpu_vm_block_size - 9; tmp = DIV_ROUND_UP(fls64(tmp) - 1, 9) - 1; - adev->vm_manager.num_level = min(max_level, (unsigned)tmp); + adev->vm_manager.num_level = min_t(unsigned int, max_level, tmp); switch (adev->vm_manager.num_level) { case 3: adev->vm_manager.root_level = AMDGPU_VM_PDB2; @@ -2128,7 +2140,8 @@ long amdgpu_vm_wait_idle(struct amdgpu_vm *vm, long timeout) * Returns: * 0 for success, error for failure. */ -int amdgpu_vm_init(struct amdgpu_device *adev, struct amdgpu_vm *vm, int32_t xcp_id) +int amdgpu_vm_init(struct amdgpu_device *adev, struct amdgpu_vm *vm, + int32_t xcp_id) { struct amdgpu_bo *root_bo; struct amdgpu_bo_vm *root; @@ -2147,6 +2160,7 @@ int amdgpu_vm_init(struct amdgpu_device *adev, struct amdgpu_vm *vm, int32_t xcp INIT_LIST_HEAD(&vm->done); INIT_LIST_HEAD(&vm->pt_freed); INIT_WORK(&vm->pt_free_work, amdgpu_vm_pt_free_work); + INIT_KFIFO(vm->faults); r = amdgpu_vm_init_entities(adev, vm); if (r) @@ -2181,34 +2195,33 @@ int amdgpu_vm_init(struct amdgpu_device *adev, struct amdgpu_vm *vm, int32_t xcp false, &root, xcp_id); if (r) goto error_free_delayed; - root_bo = &root->bo; + + root_bo = amdgpu_bo_ref(&root->bo); r = amdgpu_bo_reserve(root_bo, true); - if (r) - goto error_free_root; + if (r) { + amdgpu_bo_unref(&root->shadow); + amdgpu_bo_unref(&root_bo); + goto error_free_delayed; + } + amdgpu_vm_bo_base_init(&vm->root, vm, root_bo); r = dma_resv_reserve_fences(root_bo->tbo.base.resv, 1); if (r) - goto error_unreserve; - - amdgpu_vm_bo_base_init(&vm->root, vm, root_bo); + goto error_free_root; r = amdgpu_vm_pt_clear(adev, vm, root, false); if (r) - goto error_unreserve; + goto error_free_root; amdgpu_bo_unreserve(vm->root.bo); - - INIT_KFIFO(vm->faults); + amdgpu_bo_unref(&root_bo); return 0; -error_unreserve: - amdgpu_bo_unreserve(vm->root.bo); - error_free_root: - amdgpu_bo_unref(&root->shadow); + amdgpu_vm_pt_free_root(adev, vm); + amdgpu_bo_unreserve(vm->root.bo); amdgpu_bo_unref(&root_bo); - vm->root.bo = NULL; error_free_delayed: dma_fence_put(vm->last_tlb_flush); @@ -2280,16 +2293,13 @@ int amdgpu_vm_make_compute(struct amdgpu_device *adev, struct amdgpu_vm *vm) goto unreserve_bo; vm->update_funcs = &amdgpu_vm_cpu_funcs; + r = amdgpu_vm_pt_map_tables(adev, vm); + if (r) + goto unreserve_bo; + } else { vm->update_funcs = &amdgpu_vm_sdma_funcs; } - /* - * Make sure root PD gets mapped. As vm_update_mode could be changed - * when turning a GFX VM into a compute VM. - */ - r = vm->update_funcs->map_table(to_amdgpu_bo_vm(vm->root.bo)); - if (r) - goto unreserve_bo; dma_fence_put(vm->last_update); vm->last_update = dma_fence_get_stub(); @@ -2605,7 +2615,7 @@ bool amdgpu_vm_handle_fault(struct amdgpu_device *adev, u32 pasid, /* Intentionally setting invalid PTE flag * combination to force a no-retry-fault */ - flags = AMDGPU_PTE_SNOOPED | AMDGPU_PTE_PRT; + flags = AMDGPU_VM_NORETRY_FLAGS; value = 0; } else if (amdgpu_vm_fault_stop == AMDGPU_VM_FAULT_STOP_NEVER) { /* Redirect the access to the dummy page */ @@ -2624,8 +2634,8 @@ bool amdgpu_vm_handle_fault(struct amdgpu_device *adev, u32 pasid, goto error_unlock; } - r = amdgpu_vm_update_range(adev, vm, true, false, false, NULL, addr, - addr, flags, value, 0, NULL, NULL, NULL); + r = amdgpu_vm_update_range(adev, vm, true, false, false, false, + NULL, addr, addr, flags, value, 0, NULL, NULL, NULL); if (r) goto error_unlock; @@ -2737,3 +2747,53 @@ void amdgpu_debugfs_vm_bo_info(struct amdgpu_vm *vm, struct seq_file *m) total_done_objs); } #endif + +/** + * amdgpu_vm_update_fault_cache - update cached fault into. + * @adev: amdgpu device pointer + * @pasid: PASID of the VM + * @addr: Address of the fault + * @status: GPUVM fault status register + * @vmhub: which vmhub got the fault + * + * Cache the fault info for later use by userspace in debugging. + */ +void amdgpu_vm_update_fault_cache(struct amdgpu_device *adev, + unsigned int pasid, + uint64_t addr, + uint32_t status, + unsigned int vmhub) +{ + struct amdgpu_vm *vm; + unsigned long flags; + + xa_lock_irqsave(&adev->vm_manager.pasids, flags); + + vm = xa_load(&adev->vm_manager.pasids, pasid); + /* Don't update the fault cache if status is 0. In the multiple + * fault case, subsequent faults will return a 0 status which is + * useless for userspace and replaces the useful fault status, so + * only update if status is non-0. + */ + if (vm && status) { + vm->fault_info.addr = addr; + vm->fault_info.status = status; + if (AMDGPU_IS_GFXHUB(vmhub)) { + vm->fault_info.vmhub = AMDGPU_VMHUB_TYPE_GFX; + vm->fault_info.vmhub |= + (vmhub - AMDGPU_GFXHUB_START) << AMDGPU_VMHUB_IDX_SHIFT; + } else if (AMDGPU_IS_MMHUB0(vmhub)) { + vm->fault_info.vmhub = AMDGPU_VMHUB_TYPE_MM0; + vm->fault_info.vmhub |= + (vmhub - AMDGPU_MMHUB0_START) << AMDGPU_VMHUB_IDX_SHIFT; + } else if (AMDGPU_IS_MMHUB1(vmhub)) { + vm->fault_info.vmhub = AMDGPU_VMHUB_TYPE_MM1; + vm->fault_info.vmhub |= + (vmhub - AMDGPU_MMHUB1_START) << AMDGPU_VMHUB_IDX_SHIFT; + } else { + WARN_ONCE(1, "Invalid vmhub %u\n", vmhub); + } + } + xa_unlock_irqrestore(&adev->vm_manager.pasids, flags); +} + diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h index ffac7413c657..2cd86d2bf73f 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h @@ -36,6 +36,8 @@ #include "amdgpu_ring.h" #include "amdgpu_ids.h" +struct drm_exec; + struct amdgpu_bo_va; struct amdgpu_job; struct amdgpu_bo_list_entry; @@ -84,7 +86,13 @@ struct amdgpu_mem_stats; /* PDE Block Fragment Size for VEGA10 */ #define AMDGPU_PDE_BFS(a) ((uint64_t)a << 59) +/* Flag combination to set no-retry with TF disabled */ +#define AMDGPU_VM_NORETRY_FLAGS (AMDGPU_PTE_EXECUTABLE | AMDGPU_PDE_PTE | \ + AMDGPU_PTE_TF) +/* Flag combination to set no-retry with TF enabled */ +#define AMDGPU_VM_NORETRY_FLAGS_TF (AMDGPU_PTE_VALID | AMDGPU_PTE_SYSTEM | \ + AMDGPU_PTE_PRT) /* For GFX9 */ #define AMDGPU_PTE_MTYPE_VG10(a) ((uint64_t)(a) << 57) #define AMDGPU_PTE_MTYPE_VG10_MASK AMDGPU_PTE_MTYPE_VG10(3ULL) @@ -116,9 +124,16 @@ struct amdgpu_mem_stats; * layout: max 8 GFXHUB + 4 MMHUB0 + 1 MMHUB1 */ #define AMDGPU_MAX_VMHUBS 13 -#define AMDGPU_GFXHUB(x) (x) -#define AMDGPU_MMHUB0(x) (8 + x) -#define AMDGPU_MMHUB1(x) (8 + 4 + x) +#define AMDGPU_GFXHUB_START 0 +#define AMDGPU_MMHUB0_START 8 +#define AMDGPU_MMHUB1_START 12 +#define AMDGPU_GFXHUB(x) (AMDGPU_GFXHUB_START + (x)) +#define AMDGPU_MMHUB0(x) (AMDGPU_MMHUB0_START + (x)) +#define AMDGPU_MMHUB1(x) (AMDGPU_MMHUB1_START + (x)) + +#define AMDGPU_IS_GFXHUB(x) ((x) >= AMDGPU_GFXHUB_START && (x) < AMDGPU_MMHUB0_START) +#define AMDGPU_IS_MMHUB0(x) ((x) >= AMDGPU_MMHUB0_START && (x) < AMDGPU_MMHUB1_START) +#define AMDGPU_IS_MMHUB1(x) ((x) >= AMDGPU_MMHUB1_START && (x) < AMDGPU_MAX_VMHUBS) /* Reserve 2MB at top/bottom of address space for kernel use */ #define AMDGPU_VA_RESERVED_SIZE (2ULL << 20) @@ -231,6 +246,12 @@ struct amdgpu_vm_update_params { * @table_freed: return true if page table is freed when updating */ bool table_freed; + + /** + * @allow_override: true for memory that is not uncached: allows MTYPE + * to be overridden for NUMA local memory. + */ + bool allow_override; }; struct amdgpu_vm_update_funcs { @@ -244,6 +265,15 @@ struct amdgpu_vm_update_funcs { struct dma_fence **fence); }; +struct amdgpu_vm_fault_info { + /* fault address */ + uint64_t addr; + /* fault status register */ + uint32_t status; + /* which vmhub? gfxhub, mmhub, etc. */ + unsigned int vmhub; +}; + struct amdgpu_vm { /* tree of virtual addresses mapped */ struct rb_root_cached va; @@ -335,6 +365,9 @@ struct amdgpu_vm { /* Memory partition number, -1 means any partition */ int8_t mem_id; + + /* cached fault info */ + struct amdgpu_vm_fault_info fault_info; }; struct amdgpu_vm_manager { @@ -396,9 +429,8 @@ int amdgpu_vm_init(struct amdgpu_device *adev, struct amdgpu_vm *vm, int32_t xcp int amdgpu_vm_make_compute(struct amdgpu_device *adev, struct amdgpu_vm *vm); void amdgpu_vm_release_compute(struct amdgpu_device *adev, struct amdgpu_vm *vm); void amdgpu_vm_fini(struct amdgpu_device *adev, struct amdgpu_vm *vm); -void amdgpu_vm_get_pd_bo(struct amdgpu_vm *vm, - struct list_head *validated, - struct amdgpu_bo_list_entry *entry); +int amdgpu_vm_lock_pd(struct amdgpu_vm *vm, struct drm_exec *exec, + unsigned int num_fences); bool amdgpu_vm_ready(struct amdgpu_vm *vm); uint64_t amdgpu_vm_generation(struct amdgpu_device *adev, struct amdgpu_vm *vm); int amdgpu_vm_validate_pt_bos(struct amdgpu_device *adev, struct amdgpu_vm *vm, @@ -411,11 +443,12 @@ int amdgpu_vm_clear_freed(struct amdgpu_device *adev, struct amdgpu_vm *vm, struct dma_fence **fence); int amdgpu_vm_handle_moved(struct amdgpu_device *adev, - struct amdgpu_vm *vm); + struct amdgpu_vm *vm, + struct ww_acquire_ctx *ticket); void amdgpu_vm_bo_base_init(struct amdgpu_vm_bo_base *base, struct amdgpu_vm *vm, struct amdgpu_bo *bo); int amdgpu_vm_update_range(struct amdgpu_device *adev, struct amdgpu_vm *vm, - bool immediate, bool unlocked, bool flush_tlb, + bool immediate, bool unlocked, bool flush_tlb, bool allow_override, struct dma_resv *resv, uint64_t start, uint64_t last, uint64_t flags, uint64_t offset, uint64_t vram_base, struct ttm_resource *res, dma_addr_t *pages_addr, @@ -492,6 +525,8 @@ void amdgpu_vm_pt_free_work(struct work_struct *work); void amdgpu_debugfs_vm_bo_info(struct amdgpu_vm *vm, struct seq_file *m); #endif +int amdgpu_vm_pt_map_tables(struct amdgpu_device *adev, struct amdgpu_vm *vm); + /** * amdgpu_vm_tlb_seq - return tlb flush sequence number * @vm: the amdgpu_vm structure to query @@ -545,4 +580,10 @@ static inline void amdgpu_vm_eviction_unlock(struct amdgpu_vm *vm) mutex_unlock(&vm->eviction_lock); } +void amdgpu_vm_update_fault_cache(struct amdgpu_device *adev, + unsigned int pasid, + uint64_t addr, + uint32_t status, + unsigned int vmhub); + #endif diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm_cpu.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm_cpu.c index 31913ae86de6..6e31621452de 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm_cpu.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm_cpu.c @@ -31,6 +31,7 @@ */ static int amdgpu_vm_cpu_map_table(struct amdgpu_bo_vm *table) { + table->bo.flags |= AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED; return amdgpu_bo_kmap(&table->bo, NULL); } diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm_pt.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm_pt.c index 5431332bbdb8..a160265ddc07 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm_pt.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm_pt.c @@ -642,13 +642,14 @@ static void amdgpu_vm_pt_free(struct amdgpu_vm_bo_base *entry) if (!entry->bo) return; + + entry->bo->vm_bo = NULL; shadow = amdgpu_bo_shadowed(entry->bo); if (shadow) { ttm_bo_set_bulk_move(&shadow->tbo, NULL); amdgpu_bo_unref(&shadow); } ttm_bo_set_bulk_move(&entry->bo->tbo, NULL); - entry->bo->vm_bo = NULL; spin_lock(&entry->vm->status_lock); list_del(&entry->vm_status); @@ -780,6 +781,27 @@ int amdgpu_vm_pde_update(struct amdgpu_vm_update_params *params, 1, 0, flags); } +/** + * amdgpu_vm_pte_update_noretry_flags - Update PTE no-retry flags + * + * @adev: amdgpu_device pointer + * @flags: pointer to PTE flags + * + * Update PTE no-retry flags when TF is enabled. + */ +static void amdgpu_vm_pte_update_noretry_flags(struct amdgpu_device *adev, + uint64_t *flags) +{ + /* + * Update no-retry flags with the corresponding TF + * no-retry combination. + */ + if ((*flags & AMDGPU_VM_NORETRY_FLAGS) == AMDGPU_VM_NORETRY_FLAGS) { + *flags &= ~AMDGPU_VM_NORETRY_FLAGS; + *flags |= adev->gmc.noretry_flags; + } +} + /* * amdgpu_vm_pte_update_flags - figure out flags for PTE updates * @@ -806,20 +828,24 @@ static void amdgpu_vm_pte_update_flags(struct amdgpu_vm_update_params *params, flags |= AMDGPU_PTE_EXECUTABLE; } + /* + * Update no-retry flags to use the no-retry flag combination + * with TF enabled. The AMDGPU_VM_NORETRY_FLAGS flag combination + * does not work when TF is enabled. So, replace them with + * AMDGPU_VM_NORETRY_FLAGS_TF flag combination which works for + * all cases. + */ + if (level == AMDGPU_VM_PTB) + amdgpu_vm_pte_update_noretry_flags(adev, &flags); + /* APUs mapping system memory may need different MTYPEs on different * NUMA nodes. Only do this for contiguous ranges that can be assumed * to be on the same NUMA node. */ if ((flags & AMDGPU_PTE_SYSTEM) && (adev->flags & AMD_IS_APU) && adev->gmc.gmc_funcs->override_vm_pte_flags && - num_possible_nodes() > 1) { - if (!params->pages_addr) - amdgpu_gmc_override_vm_pte_flags(adev, params->vm, - addr, &flags); - else - dev_dbg(adev->dev, - "override_vm_pte_flags skipped: non-contiguous\n"); - } + num_possible_nodes() > 1 && !params->pages_addr && params->allow_override) + amdgpu_gmc_override_vm_pte_flags(adev, params->vm, addr, &flags); params->vm->update_funcs->update(params, pt, pe, addr, count, incr, flags); @@ -1046,3 +1072,31 @@ int amdgpu_vm_ptes_update(struct amdgpu_vm_update_params *params, return 0; } + +/** + * amdgpu_vm_pt_map_tables - have bo of root PD cpu accessible + * @adev: amdgpu device structure + * @vm: amdgpu vm structure + * + * make root page directory and everything below it cpu accessible. + */ +int amdgpu_vm_pt_map_tables(struct amdgpu_device *adev, struct amdgpu_vm *vm) +{ + struct amdgpu_vm_pt_cursor cursor; + struct amdgpu_vm_bo_base *entry; + + for_each_amdgpu_vm_pt_dfs_safe(adev, vm, NULL, cursor, entry) { + + struct amdgpu_bo_vm *bo; + int r; + + if (entry->bo) { + bo = to_amdgpu_bo_vm(entry->bo); + r = vm->update_funcs->map_table(bo); + if (r) + return r; + } + } + + return 0; +} diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vpe.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vpe.c new file mode 100644 index 000000000000..e81579708e96 --- /dev/null +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vpe.c @@ -0,0 +1,656 @@ +/* + * Copyright 2022 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + */ + +#include <linux/firmware.h> +#include <drm/drm_drv.h> + +#include "amdgpu.h" +#include "amdgpu_ucode.h" +#include "amdgpu_vpe.h" +#include "soc15_common.h" +#include "vpe_v6_1.h" + +#define AMDGPU_CSA_VPE_SIZE 64 +/* VPE CSA resides in the 4th page of CSA */ +#define AMDGPU_CSA_VPE_OFFSET (4096 * 3) + +static void vpe_set_ring_funcs(struct amdgpu_device *adev); + +int amdgpu_vpe_psp_update_sram(struct amdgpu_device *adev) +{ + struct amdgpu_firmware_info ucode = { + .ucode_id = AMDGPU_UCODE_ID_VPE, + .mc_addr = adev->vpe.cmdbuf_gpu_addr, + .ucode_size = 8, + }; + + return psp_execute_ip_fw_load(&adev->psp, &ucode); +} + +int amdgpu_vpe_init_microcode(struct amdgpu_vpe *vpe) +{ + struct amdgpu_device *adev = vpe->ring.adev; + const struct vpe_firmware_header_v1_0 *vpe_hdr; + char fw_prefix[32], fw_name[64]; + int ret; + + amdgpu_ucode_ip_version_decode(adev, VPE_HWIP, fw_prefix, sizeof(fw_prefix)); + snprintf(fw_name, sizeof(fw_name), "amdgpu/%s.bin", fw_prefix); + + ret = amdgpu_ucode_request(adev, &adev->vpe.fw, fw_name); + if (ret) + goto out; + + vpe_hdr = (const struct vpe_firmware_header_v1_0 *)adev->vpe.fw->data; + adev->vpe.fw_version = le32_to_cpu(vpe_hdr->header.ucode_version); + adev->vpe.feature_version = le32_to_cpu(vpe_hdr->ucode_feature_version); + + if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) { + struct amdgpu_firmware_info *info; + + info = &adev->firmware.ucode[AMDGPU_UCODE_ID_VPE_CTX]; + info->ucode_id = AMDGPU_UCODE_ID_VPE_CTX; + info->fw = adev->vpe.fw; + adev->firmware.fw_size += + ALIGN(le32_to_cpu(vpe_hdr->ctx_ucode_size_bytes), PAGE_SIZE); + + info = &adev->firmware.ucode[AMDGPU_UCODE_ID_VPE_CTL]; + info->ucode_id = AMDGPU_UCODE_ID_VPE_CTL; + info->fw = adev->vpe.fw; + adev->firmware.fw_size += + ALIGN(le32_to_cpu(vpe_hdr->ctl_ucode_size_bytes), PAGE_SIZE); + } + + return 0; +out: + dev_err(adev->dev, "fail to initialize vpe microcode\n"); + release_firmware(adev->vpe.fw); + adev->vpe.fw = NULL; + return ret; +} + +int amdgpu_vpe_ring_init(struct amdgpu_vpe *vpe) +{ + struct amdgpu_device *adev = container_of(vpe, struct amdgpu_device, vpe); + struct amdgpu_ring *ring = &vpe->ring; + int ret; + + ring->ring_obj = NULL; + ring->use_doorbell = true; + ring->vm_hub = AMDGPU_MMHUB0(0); + ring->doorbell_index = (adev->doorbell_index.vpe_ring << 1); + snprintf(ring->name, 4, "vpe"); + + ret = amdgpu_ring_init(adev, ring, 1024, &vpe->trap_irq, 0, + AMDGPU_RING_PRIO_DEFAULT, NULL); + if (ret) + return ret; + + return 0; +} + +int amdgpu_vpe_ring_fini(struct amdgpu_vpe *vpe) +{ + amdgpu_ring_fini(&vpe->ring); + + return 0; +} + +static int vpe_early_init(void *handle) +{ + struct amdgpu_device *adev = (struct amdgpu_device *)handle; + struct amdgpu_vpe *vpe = &adev->vpe; + + switch (amdgpu_ip_version(adev, VPE_HWIP, 0)) { + case IP_VERSION(6, 1, 0): + vpe_v6_1_set_funcs(vpe); + break; + default: + return -EINVAL; + } + + vpe_set_ring_funcs(adev); + vpe_set_regs(vpe); + + return 0; +} + + +static int vpe_common_init(struct amdgpu_vpe *vpe) +{ + struct amdgpu_device *adev = container_of(vpe, struct amdgpu_device, vpe); + int r; + + r = amdgpu_bo_create_kernel(adev, PAGE_SIZE, PAGE_SIZE, + AMDGPU_GEM_DOMAIN_GTT, + &adev->vpe.cmdbuf_obj, + &adev->vpe.cmdbuf_gpu_addr, + (void **)&adev->vpe.cmdbuf_cpu_addr); + if (r) { + dev_err(adev->dev, "VPE: failed to allocate cmdbuf bo %d\n", r); + return r; + } + + return 0; +} + +static int vpe_sw_init(void *handle) +{ + struct amdgpu_device *adev = (struct amdgpu_device *)handle; + struct amdgpu_vpe *vpe = &adev->vpe; + int ret; + + ret = vpe_common_init(vpe); + if (ret) + goto out; + + ret = vpe_irq_init(vpe); + if (ret) + goto out; + + ret = vpe_ring_init(vpe); + if (ret) + goto out; + + ret = vpe_init_microcode(vpe); + if (ret) + goto out; +out: + return ret; +} + +static int vpe_sw_fini(void *handle) +{ + struct amdgpu_device *adev = (struct amdgpu_device *)handle; + struct amdgpu_vpe *vpe = &adev->vpe; + + release_firmware(vpe->fw); + vpe->fw = NULL; + + vpe_ring_fini(vpe); + + amdgpu_bo_free_kernel(&adev->vpe.cmdbuf_obj, + &adev->vpe.cmdbuf_gpu_addr, + (void **)&adev->vpe.cmdbuf_cpu_addr); + + return 0; +} + +static int vpe_hw_init(void *handle) +{ + struct amdgpu_device *adev = (struct amdgpu_device *)handle; + struct amdgpu_vpe *vpe = &adev->vpe; + int ret; + + ret = vpe_load_microcode(vpe); + if (ret) + return ret; + + ret = vpe_ring_start(vpe); + if (ret) + return ret; + + return 0; +} + +static int vpe_hw_fini(void *handle) +{ + struct amdgpu_device *adev = (struct amdgpu_device *)handle; + struct amdgpu_vpe *vpe = &adev->vpe; + + vpe_ring_stop(vpe); + + return 0; +} + +static int vpe_suspend(void *handle) +{ + struct amdgpu_device *adev = (struct amdgpu_device *)handle; + + return vpe_hw_fini(adev); +} + +static int vpe_resume(void *handle) +{ + struct amdgpu_device *adev = (struct amdgpu_device *)handle; + + return vpe_hw_init(adev); +} + +static void vpe_ring_insert_nop(struct amdgpu_ring *ring, uint32_t count) +{ + int i; + + for (i = 0; i < count; i++) + if (i == 0) + amdgpu_ring_write(ring, ring->funcs->nop | + VPE_CMD_NOP_HEADER_COUNT(count - 1)); + else + amdgpu_ring_write(ring, ring->funcs->nop); +} + +static uint64_t vpe_get_csa_mc_addr(struct amdgpu_ring *ring, uint32_t vmid) +{ + struct amdgpu_device *adev = ring->adev; + uint32_t index = 0; + uint64_t csa_mc_addr; + + if (amdgpu_sriov_vf(adev) || vmid == 0 || !adev->gfx.mcbp) + return 0; + + csa_mc_addr = amdgpu_csa_vaddr(adev) + AMDGPU_CSA_VPE_OFFSET + + index * AMDGPU_CSA_VPE_SIZE; + + return csa_mc_addr; +} + +static void vpe_ring_emit_ib(struct amdgpu_ring *ring, + struct amdgpu_job *job, + struct amdgpu_ib *ib, + uint32_t flags) +{ + uint32_t vmid = AMDGPU_JOB_GET_VMID(job); + uint64_t csa_mc_addr = vpe_get_csa_mc_addr(ring, vmid); + + amdgpu_ring_write(ring, VPE_CMD_HEADER(VPE_CMD_OPCODE_INDIRECT, 0) | + VPE_CMD_INDIRECT_HEADER_VMID(vmid & 0xf)); + + /* base must be 32 byte aligned */ + amdgpu_ring_write(ring, ib->gpu_addr & 0xffffffe0); + amdgpu_ring_write(ring, upper_32_bits(ib->gpu_addr)); + amdgpu_ring_write(ring, ib->length_dw); + amdgpu_ring_write(ring, lower_32_bits(csa_mc_addr)); + amdgpu_ring_write(ring, upper_32_bits(csa_mc_addr)); +} + +static void vpe_ring_emit_fence(struct amdgpu_ring *ring, uint64_t addr, + uint64_t seq, unsigned int flags) +{ + int i = 0; + + do { + /* write the fence */ + amdgpu_ring_write(ring, VPE_CMD_HEADER(VPE_CMD_OPCODE_FENCE, 0)); + /* zero in first two bits */ + WARN_ON_ONCE(addr & 0x3); + amdgpu_ring_write(ring, lower_32_bits(addr)); + amdgpu_ring_write(ring, upper_32_bits(addr)); + amdgpu_ring_write(ring, i == 0 ? lower_32_bits(seq) : upper_32_bits(seq)); + addr += 4; + } while ((flags & AMDGPU_FENCE_FLAG_64BIT) && (i++ < 1)); + + if (flags & AMDGPU_FENCE_FLAG_INT) { + /* generate an interrupt */ + amdgpu_ring_write(ring, VPE_CMD_HEADER(VPE_CMD_OPCODE_TRAP, 0)); + amdgpu_ring_write(ring, 0); + } + +} + +static void vpe_ring_emit_pipeline_sync(struct amdgpu_ring *ring) +{ + uint32_t seq = ring->fence_drv.sync_seq; + uint64_t addr = ring->fence_drv.gpu_addr; + + /* wait for idle */ + amdgpu_ring_write(ring, VPE_CMD_HEADER(VPE_CMD_OPCODE_POLL_REGMEM, + VPE_POLL_REGMEM_SUBOP_REGMEM) | + VPE_CMD_POLL_REGMEM_HEADER_FUNC(3) | /* equal */ + VPE_CMD_POLL_REGMEM_HEADER_MEM(1)); + amdgpu_ring_write(ring, addr & 0xfffffffc); + amdgpu_ring_write(ring, upper_32_bits(addr)); + amdgpu_ring_write(ring, seq); /* reference */ + amdgpu_ring_write(ring, 0xffffffff); /* mask */ + amdgpu_ring_write(ring, VPE_CMD_POLL_REGMEM_DW5_RETRY_COUNT(0xfff) | + VPE_CMD_POLL_REGMEM_DW5_INTERVAL(4)); +} + +static void vpe_ring_emit_wreg(struct amdgpu_ring *ring, uint32_t reg, uint32_t val) +{ + amdgpu_ring_write(ring, VPE_CMD_HEADER(VPE_CMD_OPCODE_REG_WRITE, 0)); + amdgpu_ring_write(ring, reg << 2); + amdgpu_ring_write(ring, val); +} + +static void vpe_ring_emit_reg_wait(struct amdgpu_ring *ring, uint32_t reg, + uint32_t val, uint32_t mask) +{ + amdgpu_ring_write(ring, VPE_CMD_HEADER(VPE_CMD_OPCODE_POLL_REGMEM, + VPE_POLL_REGMEM_SUBOP_REGMEM) | + VPE_CMD_POLL_REGMEM_HEADER_FUNC(3) | /* equal */ + VPE_CMD_POLL_REGMEM_HEADER_MEM(0)); + amdgpu_ring_write(ring, reg << 2); + amdgpu_ring_write(ring, 0); + amdgpu_ring_write(ring, val); /* reference */ + amdgpu_ring_write(ring, mask); /* mask */ + amdgpu_ring_write(ring, VPE_CMD_POLL_REGMEM_DW5_RETRY_COUNT(0xfff) | + VPE_CMD_POLL_REGMEM_DW5_INTERVAL(10)); +} + +static void vpe_ring_emit_vm_flush(struct amdgpu_ring *ring, unsigned int vmid, + uint64_t pd_addr) +{ + amdgpu_gmc_emit_flush_gpu_tlb(ring, vmid, pd_addr); +} + +static unsigned int vpe_ring_init_cond_exec(struct amdgpu_ring *ring) +{ + unsigned int ret; + + amdgpu_ring_write(ring, VPE_CMD_HEADER(VPE_CMD_OPCODE_COND_EXE, 0)); + amdgpu_ring_write(ring, lower_32_bits(ring->cond_exe_gpu_addr)); + amdgpu_ring_write(ring, upper_32_bits(ring->cond_exe_gpu_addr)); + amdgpu_ring_write(ring, 1); + ret = ring->wptr & ring->buf_mask;/* this is the offset we need patch later */ + amdgpu_ring_write(ring, 0x55aa55aa);/* insert dummy here and patch it later */ + + return ret; +} + +static void vpe_ring_patch_cond_exec(struct amdgpu_ring *ring, unsigned int offset) +{ + unsigned int cur; + + WARN_ON_ONCE(offset > ring->buf_mask); + WARN_ON_ONCE(ring->ring[offset] != 0x55aa55aa); + + cur = (ring->wptr - 1) & ring->buf_mask; + if (cur > offset) + ring->ring[offset] = cur - offset; + else + ring->ring[offset] = (ring->buf_mask + 1) - offset + cur; +} + +static int vpe_ring_preempt_ib(struct amdgpu_ring *ring) +{ + struct amdgpu_device *adev = ring->adev; + struct amdgpu_vpe *vpe = &adev->vpe; + uint32_t preempt_reg = vpe->regs.queue0_preempt; + int i, r = 0; + + /* assert preemption condition */ + amdgpu_ring_set_preempt_cond_exec(ring, false); + + /* emit the trailing fence */ + ring->trail_seq += 1; + amdgpu_ring_alloc(ring, 10); + vpe_ring_emit_fence(ring, ring->trail_fence_gpu_addr, ring->trail_seq, 0); + amdgpu_ring_commit(ring); + + /* assert IB preemption */ + WREG32(vpe_get_reg_offset(vpe, ring->me, preempt_reg), 1); + + /* poll the trailing fence */ + for (i = 0; i < adev->usec_timeout; i++) { + if (ring->trail_seq == + le32_to_cpu(*(ring->trail_fence_cpu_addr))) + break; + udelay(1); + } + + if (i >= adev->usec_timeout) { + r = -EINVAL; + dev_err(adev->dev, "ring %d failed to be preempted\n", ring->idx); + } + + /* deassert IB preemption */ + WREG32(vpe_get_reg_offset(vpe, ring->me, preempt_reg), 0); + + /* deassert the preemption condition */ + amdgpu_ring_set_preempt_cond_exec(ring, true); + + return r; +} + +static int vpe_set_clockgating_state(void *handle, + enum amd_clockgating_state state) +{ + return 0; +} + +static int vpe_set_powergating_state(void *handle, + enum amd_powergating_state state) +{ + return 0; +} + +static uint64_t vpe_ring_get_rptr(struct amdgpu_ring *ring) +{ + struct amdgpu_device *adev = ring->adev; + struct amdgpu_vpe *vpe = &adev->vpe; + uint64_t rptr; + + if (ring->use_doorbell) { + rptr = atomic64_read((atomic64_t *)ring->rptr_cpu_addr); + dev_dbg(adev->dev, "rptr/doorbell before shift == 0x%016llx\n", rptr); + } else { + rptr = RREG32(vpe_get_reg_offset(vpe, ring->me, vpe->regs.queue0_rb_rptr_hi)); + rptr = rptr << 32; + rptr |= RREG32(vpe_get_reg_offset(vpe, ring->me, vpe->regs.queue0_rb_rptr_lo)); + dev_dbg(adev->dev, "rptr before shift [%i] == 0x%016llx\n", ring->me, rptr); + } + + return (rptr >> 2); +} + +static uint64_t vpe_ring_get_wptr(struct amdgpu_ring *ring) +{ + struct amdgpu_device *adev = ring->adev; + struct amdgpu_vpe *vpe = &adev->vpe; + uint64_t wptr; + + if (ring->use_doorbell) { + wptr = atomic64_read((atomic64_t *)ring->wptr_cpu_addr); + dev_dbg(adev->dev, "wptr/doorbell before shift == 0x%016llx\n", wptr); + } else { + wptr = RREG32(vpe_get_reg_offset(vpe, ring->me, vpe->regs.queue0_rb_wptr_hi)); + wptr = wptr << 32; + wptr |= RREG32(vpe_get_reg_offset(vpe, ring->me, vpe->regs.queue0_rb_wptr_lo)); + dev_dbg(adev->dev, "wptr before shift [%i] == 0x%016llx\n", ring->me, wptr); + } + + return (wptr >> 2); +} + +static void vpe_ring_set_wptr(struct amdgpu_ring *ring) +{ + struct amdgpu_device *adev = ring->adev; + struct amdgpu_vpe *vpe = &adev->vpe; + + if (ring->use_doorbell) { + dev_dbg(adev->dev, "Using doorbell, \ + wptr_offs == 0x%08x, \ + lower_32_bits(ring->wptr) << 2 == 0x%08x, \ + upper_32_bits(ring->wptr) << 2 == 0x%08x\n", + ring->wptr_offs, + lower_32_bits(ring->wptr << 2), + upper_32_bits(ring->wptr << 2)); + atomic64_set((atomic64_t *)ring->wptr_cpu_addr, ring->wptr << 2); + WDOORBELL64(ring->doorbell_index, ring->wptr << 2); + } else { + dev_dbg(adev->dev, "Not using doorbell, \ + regVPEC_QUEUE0_RB_WPTR == 0x%08x, \ + regVPEC_QUEUE0_RB_WPTR_HI == 0x%08x\n", + lower_32_bits(ring->wptr << 2), + upper_32_bits(ring->wptr << 2)); + WREG32(vpe_get_reg_offset(vpe, ring->me, vpe->regs.queue0_rb_wptr_lo), + lower_32_bits(ring->wptr << 2)); + WREG32(vpe_get_reg_offset(vpe, ring->me, vpe->regs.queue0_rb_wptr_hi), + upper_32_bits(ring->wptr << 2)); + } +} + +static int vpe_ring_test_ring(struct amdgpu_ring *ring) +{ + struct amdgpu_device *adev = ring->adev; + const uint32_t test_pattern = 0xdeadbeef; + uint32_t index, i; + uint64_t wb_addr; + int ret; + + ret = amdgpu_device_wb_get(adev, &index); + if (ret) { + dev_err(adev->dev, "(%d) failed to allocate wb slot\n", ret); + return ret; + } + + adev->wb.wb[index] = 0; + wb_addr = adev->wb.gpu_addr + (index * 4); + + ret = amdgpu_ring_alloc(ring, 4); + if (ret) { + dev_err(adev->dev, "amdgpu: dma failed to lock ring %d (%d).\n", ring->idx, ret); + goto out; + } + + amdgpu_ring_write(ring, VPE_CMD_HEADER(VPE_CMD_OPCODE_FENCE, 0)); + amdgpu_ring_write(ring, lower_32_bits(wb_addr)); + amdgpu_ring_write(ring, upper_32_bits(wb_addr)); + amdgpu_ring_write(ring, test_pattern); + amdgpu_ring_commit(ring); + + for (i = 0; i < adev->usec_timeout; i++) { + if (le32_to_cpu(adev->wb.wb[index]) == test_pattern) + goto out; + udelay(1); + } + + ret = -ETIMEDOUT; +out: + amdgpu_device_wb_free(adev, index); + + return ret; +} + +static int vpe_ring_test_ib(struct amdgpu_ring *ring, long timeout) +{ + struct amdgpu_device *adev = ring->adev; + const uint32_t test_pattern = 0xdeadbeef; + struct amdgpu_ib ib = {}; + struct dma_fence *f = NULL; + uint32_t index; + uint64_t wb_addr; + int ret; + + ret = amdgpu_device_wb_get(adev, &index); + if (ret) { + dev_err(adev->dev, "(%d) failed to allocate wb slot\n", ret); + return ret; + } + + adev->wb.wb[index] = 0; + wb_addr = adev->wb.gpu_addr + (index * 4); + + ret = amdgpu_ib_get(adev, NULL, 256, AMDGPU_IB_POOL_DIRECT, &ib); + if (ret) + goto err0; + + ib.ptr[0] = VPE_CMD_HEADER(VPE_CMD_OPCODE_FENCE, 0); + ib.ptr[1] = lower_32_bits(wb_addr); + ib.ptr[2] = upper_32_bits(wb_addr); + ib.ptr[3] = test_pattern; + ib.ptr[4] = VPE_CMD_HEADER(VPE_CMD_OPCODE_NOP, 0); + ib.ptr[5] = VPE_CMD_HEADER(VPE_CMD_OPCODE_NOP, 0); + ib.ptr[6] = VPE_CMD_HEADER(VPE_CMD_OPCODE_NOP, 0); + ib.ptr[7] = VPE_CMD_HEADER(VPE_CMD_OPCODE_NOP, 0); + ib.length_dw = 8; + + ret = amdgpu_ib_schedule(ring, 1, &ib, NULL, &f); + if (ret) + goto err1; + + ret = dma_fence_wait_timeout(f, false, timeout); + if (ret <= 0) { + ret = ret ? : -ETIMEDOUT; + goto err1; + } + + ret = (le32_to_cpu(adev->wb.wb[index]) == test_pattern) ? 0 : -EINVAL; + +err1: + amdgpu_ib_free(adev, &ib, NULL); + dma_fence_put(f); +err0: + amdgpu_device_wb_free(adev, index); + + return ret; +} + +static const struct amdgpu_ring_funcs vpe_ring_funcs = { + .type = AMDGPU_RING_TYPE_VPE, + .align_mask = 0xf, + .nop = VPE_CMD_HEADER(VPE_CMD_OPCODE_NOP, 0), + .support_64bit_ptrs = true, + .get_rptr = vpe_ring_get_rptr, + .get_wptr = vpe_ring_get_wptr, + .set_wptr = vpe_ring_set_wptr, + .emit_frame_size = + 5 + /* vpe_ring_init_cond_exec */ + 6 + /* vpe_ring_emit_pipeline_sync */ + 10 + 10 + 10 + /* vpe_ring_emit_fence */ + /* vpe_ring_emit_vm_flush */ + SOC15_FLUSH_GPU_TLB_NUM_WREG * 3 + + SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 6, + .emit_ib_size = 7 + 6, + .emit_ib = vpe_ring_emit_ib, + .emit_pipeline_sync = vpe_ring_emit_pipeline_sync, + .emit_fence = vpe_ring_emit_fence, + .emit_vm_flush = vpe_ring_emit_vm_flush, + .emit_wreg = vpe_ring_emit_wreg, + .emit_reg_wait = vpe_ring_emit_reg_wait, + .emit_reg_write_reg_wait = amdgpu_ring_emit_reg_write_reg_wait_helper, + .insert_nop = vpe_ring_insert_nop, + .pad_ib = amdgpu_ring_generic_pad_ib, + .test_ring = vpe_ring_test_ring, + .test_ib = vpe_ring_test_ib, + .init_cond_exec = vpe_ring_init_cond_exec, + .patch_cond_exec = vpe_ring_patch_cond_exec, + .preempt_ib = vpe_ring_preempt_ib, +}; + +static void vpe_set_ring_funcs(struct amdgpu_device *adev) +{ + adev->vpe.ring.funcs = &vpe_ring_funcs; +} + +const struct amd_ip_funcs vpe_ip_funcs = { + .name = "vpe_v6_1", + .early_init = vpe_early_init, + .late_init = NULL, + .sw_init = vpe_sw_init, + .sw_fini = vpe_sw_fini, + .hw_init = vpe_hw_init, + .hw_fini = vpe_hw_fini, + .suspend = vpe_suspend, + .resume = vpe_resume, + .soft_reset = NULL, + .set_clockgating_state = vpe_set_clockgating_state, + .set_powergating_state = vpe_set_powergating_state, +}; + +const struct amdgpu_ip_block_version vpe_v6_1_ip_block = { + .type = AMD_IP_BLOCK_TYPE_VPE, + .major = 6, + .minor = 1, + .rev = 0, + .funcs = &vpe_ip_funcs, +}; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vpe.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_vpe.h new file mode 100644 index 000000000000..29d56f7ae4a9 --- /dev/null +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vpe.h @@ -0,0 +1,91 @@ +/* + * Copyright 2022 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + */ + +#ifndef __AMDGPU_VPE_H__ +#define __AMDGPU_VPE_H__ + +#include "amdgpu_ring.h" +#include "amdgpu_irq.h" +#include "vpe_6_1_fw_if.h" + +struct amdgpu_vpe; + +struct vpe_funcs { + uint32_t (*get_reg_offset)(struct amdgpu_vpe *vpe, uint32_t inst, uint32_t offset); + int (*set_regs)(struct amdgpu_vpe *vpe); + int (*irq_init)(struct amdgpu_vpe *vpe); + int (*init_microcode)(struct amdgpu_vpe *vpe); + int (*load_microcode)(struct amdgpu_vpe *vpe); + int (*ring_init)(struct amdgpu_vpe *vpe); + int (*ring_start)(struct amdgpu_vpe *vpe); + int (*ring_stop)(struct amdgpu_vpe *vpe); + int (*ring_fini)(struct amdgpu_vpe *vpe); +}; + +struct vpe_regs { + uint32_t queue0_rb_rptr_lo; + uint32_t queue0_rb_rptr_hi; + uint32_t queue0_rb_wptr_lo; + uint32_t queue0_rb_wptr_hi; + uint32_t queue0_preempt; +}; + +struct amdgpu_vpe { + struct amdgpu_ring ring; + struct amdgpu_irq_src trap_irq; + + const struct vpe_funcs *funcs; + struct vpe_regs regs; + + const struct firmware *fw; + uint32_t fw_version; + uint32_t feature_version; + + struct amdgpu_bo *cmdbuf_obj; + uint64_t cmdbuf_gpu_addr; + uint32_t *cmdbuf_cpu_addr; +}; + +int amdgpu_vpe_psp_update_sram(struct amdgpu_device *adev); +int amdgpu_vpe_init_microcode(struct amdgpu_vpe *vpe); +int amdgpu_vpe_ring_init(struct amdgpu_vpe *vpe); +int amdgpu_vpe_ring_fini(struct amdgpu_vpe *vpe); + +#define vpe_ring_init(vpe) ((vpe)->funcs->ring_init ? (vpe)->funcs->ring_init((vpe)) : 0) +#define vpe_ring_start(vpe) ((vpe)->funcs->ring_start ? (vpe)->funcs->ring_start((vpe)) : 0) +#define vpe_ring_stop(vpe) ((vpe)->funcs->ring_stop ? (vpe)->funcs->ring_stop((vpe)) : 0) +#define vpe_ring_fini(vpe) ((vpe)->funcs->ring_fini ? (vpe)->funcs->ring_fini((vpe)) : 0) + +#define vpe_get_reg_offset(vpe, inst, offset) \ + ((vpe)->funcs->get_reg_offset ? (vpe)->funcs->get_reg_offset((vpe), (inst), (offset)) : 0) +#define vpe_set_regs(vpe) \ + ((vpe)->funcs->set_regs ? (vpe)->funcs->set_regs((vpe)) : 0) +#define vpe_irq_init(vpe) \ + ((vpe)->funcs->irq_init ? (vpe)->funcs->irq_init((vpe)) : 0) +#define vpe_init_microcode(vpe) \ + ((vpe)->funcs->init_microcode ? (vpe)->funcs->init_microcode((vpe)) : 0) +#define vpe_load_microcode(vpe) \ + ((vpe)->funcs->load_microcode ? (vpe)->funcs->load_microcode((vpe)) : 0) + +extern const struct amdgpu_ip_block_version vpe_v6_1_ip_block; + +#endif diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c index c7085a747b03..08916538a615 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c @@ -77,7 +77,16 @@ static inline bool amdgpu_is_vram_mgr_blocks_contiguous(struct list_head *head) return true; } +static inline u64 amdgpu_vram_mgr_blocks_size(struct list_head *head) +{ + struct drm_buddy_block *block; + u64 size = 0; + + list_for_each_entry(block, head, link) + size += amdgpu_vram_mgr_block_size(block); + return size; +} /** * DOC: mem_info_vram_total @@ -424,9 +433,9 @@ static int amdgpu_vram_mgr_new(struct ttm_resource_manager *man, const struct ttm_place *place, struct ttm_resource **res) { - u64 vis_usage = 0, max_bytes, cur_size, min_block_size; struct amdgpu_vram_mgr *mgr = to_vram_mgr(man); struct amdgpu_device *adev = to_amdgpu_device(mgr); + u64 vis_usage = 0, max_bytes, min_block_size; struct amdgpu_vram_mgr_resource *vres; u64 size, remaining_size, lpfn, fpfn; struct drm_buddy *mm = &mgr->mm; @@ -474,6 +483,9 @@ static int amdgpu_vram_mgr_new(struct ttm_resource_manager *man, if (place->flags & TTM_PL_FLAG_TOPDOWN) vres->flags |= DRM_BUDDY_TOPDOWN_ALLOCATION; + if (place->flags & TTM_PL_FLAG_CONTIGUOUS) + vres->flags |= DRM_BUDDY_CONTIGUOUS_ALLOCATION; + if (fpfn || lpfn != mgr->mm.size) /* Allocate blocks in desired range */ vres->flags |= DRM_BUDDY_RANGE_ALLOCATION; @@ -496,25 +508,6 @@ static int amdgpu_vram_mgr_new(struct ttm_resource_manager *man, !(size & (((u64)pages_per_block << PAGE_SHIFT) - 1))) min_block_size = (u64)pages_per_block << PAGE_SHIFT; - cur_size = size; - - if (fpfn + size != (u64)place->lpfn << PAGE_SHIFT) { - /* - * Except for actual range allocation, modify the size and - * min_block_size conforming to continuous flag enablement - */ - if (place->flags & TTM_PL_FLAG_CONTIGUOUS) { - size = roundup_pow_of_two(size); - min_block_size = size; - /* - * Modify the size value if size is not - * aligned with min_block_size - */ - } else if (!IS_ALIGNED(size, min_block_size)) { - size = round_up(size, min_block_size); - } - } - r = drm_buddy_alloc_blocks(mm, fpfn, lpfn, size, @@ -531,41 +524,9 @@ static int amdgpu_vram_mgr_new(struct ttm_resource_manager *man, } mutex_unlock(&mgr->lock); - if (cur_size != size) { - struct drm_buddy_block *block; - struct list_head *trim_list; - u64 original_size; - LIST_HEAD(temp); - - trim_list = &vres->blocks; - original_size = (u64)vres->base.size; - - /* - * If size value is rounded up to min_block_size, trim the last - * block to the required size - */ - if (!list_is_singular(&vres->blocks)) { - block = list_last_entry(&vres->blocks, typeof(*block), link); - list_move_tail(&block->link, &temp); - trim_list = &temp; - /* - * Compute the original_size value by subtracting the - * last block size with (aligned size - original size) - */ - original_size = amdgpu_vram_mgr_block_size(block) - (size - cur_size); - } - - mutex_lock(&mgr->lock); - drm_buddy_block_trim(mm, - original_size, - trim_list); - mutex_unlock(&mgr->lock); - - if (!list_empty(&temp)) - list_splice_tail(trim_list, &vres->blocks); - } - vres->base.start = 0; + size = max_t(u64, amdgpu_vram_mgr_blocks_size(&vres->blocks), + vres->base.size); list_for_each_entry(block, &vres->blocks, link) { unsigned long start; @@ -573,8 +534,8 @@ static int amdgpu_vram_mgr_new(struct ttm_resource_manager *man, amdgpu_vram_mgr_block_size(block); start >>= PAGE_SHIFT; - if (start > PFN_UP(vres->base.size)) - start -= PFN_UP(vres->base.size); + if (start > PFN_UP(size)) + start -= PFN_UP(size); else start = 0; vres->base.start = max(vres->base.start, start); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_xcp.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_xcp.c index 565a1fa436d4..2b99eed5ba19 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_xcp.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_xcp.c @@ -163,16 +163,11 @@ int amdgpu_xcp_init(struct amdgpu_xcp_mgr *xcp_mgr, int num_xcps, int mode) return 0; } -int amdgpu_xcp_switch_partition_mode(struct amdgpu_xcp_mgr *xcp_mgr, int mode) +static int __amdgpu_xcp_switch_partition_mode(struct amdgpu_xcp_mgr *xcp_mgr, + int mode) { int ret, curr_mode, num_xcps = 0; - if (!xcp_mgr || mode == AMDGPU_XCP_MODE_NONE) - return -EINVAL; - - if (xcp_mgr->mode == mode) - return 0; - if (!xcp_mgr->funcs || !xcp_mgr->funcs->switch_partition_mode) return 0; @@ -201,6 +196,25 @@ out: return ret; } +int amdgpu_xcp_switch_partition_mode(struct amdgpu_xcp_mgr *xcp_mgr, int mode) +{ + if (!xcp_mgr || mode == AMDGPU_XCP_MODE_NONE) + return -EINVAL; + + if (xcp_mgr->mode == mode) + return 0; + + return __amdgpu_xcp_switch_partition_mode(xcp_mgr, mode); +} + +int amdgpu_xcp_restore_partition_mode(struct amdgpu_xcp_mgr *xcp_mgr) +{ + if (!xcp_mgr || xcp_mgr->mode == AMDGPU_XCP_MODE_NONE) + return 0; + + return __amdgpu_xcp_switch_partition_mode(xcp_mgr, xcp_mgr->mode); +} + int amdgpu_xcp_query_partition_mode(struct amdgpu_xcp_mgr *xcp_mgr, u32 flags) { int mode; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_xcp.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_xcp.h index 9a1036aeec2a..90138bc5f03d 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_xcp.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_xcp.h @@ -129,6 +129,7 @@ int amdgpu_xcp_mgr_init(struct amdgpu_device *adev, int init_mode, int amdgpu_xcp_init(struct amdgpu_xcp_mgr *xcp_mgr, int num_xcps, int mode); int amdgpu_xcp_query_partition_mode(struct amdgpu_xcp_mgr *xcp_mgr, u32 flags); int amdgpu_xcp_switch_partition_mode(struct amdgpu_xcp_mgr *xcp_mgr, int mode); +int amdgpu_xcp_restore_partition_mode(struct amdgpu_xcp_mgr *xcp_mgr); int amdgpu_xcp_get_partition(struct amdgpu_xcp_mgr *xcp_mgr, enum AMDGPU_XCP_IP_BLOCK ip, int instance); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.c index 03dc59cbe8aa..bd20cb3b9819 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.c @@ -103,6 +103,53 @@ static const int walf_pcs_err_noncorrectable_mask_reg_aldebaran[] = { smnPCS_GOPX1_PCS_ERROR_NONCORRECTABLE_MASK + 0x100000 }; +static const int xgmi3x16_pcs_err_status_reg_v6_4[] = { + smnPCS_XGMI3X16_PCS_ERROR_STATUS, + smnPCS_XGMI3X16_PCS_ERROR_STATUS + 0x100000 +}; + +static const int xgmi3x16_pcs_err_noncorrectable_mask_reg_v6_4[] = { + smnPCS_XGMI3X16_PCS_ERROR_NONCORRECTABLE_MASK, + smnPCS_XGMI3X16_PCS_ERROR_NONCORRECTABLE_MASK + 0x100000 +}; + +static const u64 xgmi_v6_4_0_mca_base_array[] = { + 0x11a09200, + 0x11b09200, +}; + +static const char *xgmi_v6_4_0_ras_error_code_ext[32] = { + [0x00] = "XGMI PCS DataLossErr", + [0x01] = "XGMI PCS TrainingErr", + [0x02] = "XGMI PCS FlowCtrlAckErr", + [0x03] = "XGMI PCS RxFifoUnderflowErr", + [0x04] = "XGMI PCS RxFifoOverflowErr", + [0x05] = "XGMI PCS CRCErr", + [0x06] = "XGMI PCS BERExceededErr", + [0x07] = "XGMI PCS TxMetaDataErr", + [0x08] = "XGMI PCS ReplayBufParityErr", + [0x09] = "XGMI PCS DataParityErr", + [0x0a] = "XGMI PCS ReplayFifoOverflowErr", + [0x0b] = "XGMI PCS ReplayFifoUnderflowErr", + [0x0c] = "XGMI PCS ElasticFifoOverflowErr", + [0x0d] = "XGMI PCS DeskewErr", + [0x0e] = "XGMI PCS FlowCtrlCRCErr", + [0x0f] = "XGMI PCS DataStartupLimitErr", + [0x10] = "XGMI PCS FCInitTimeoutErr", + [0x11] = "XGMI PCS RecoveryTimeoutErr", + [0x12] = "XGMI PCS ReadySerialTimeoutErr", + [0x13] = "XGMI PCS ReadySerialAttemptErr", + [0x14] = "XGMI PCS RecoveryAttemptErr", + [0x15] = "XGMI PCS RecoveryRelockAttemptErr", + [0x16] = "XGMI PCS ReplayAttemptErr", + [0x17] = "XGMI PCS SyncHdrErr", + [0x18] = "XGMI PCS TxReplayTimeoutErr", + [0x19] = "XGMI PCS RxReplayTimeoutErr", + [0x1a] = "XGMI PCS LinkSubTxTimeoutErr", + [0x1b] = "XGMI PCS LinkSubRxTimeoutErr", + [0x1c] = "XGMI PCS RxCMDPktErr", +}; + static const struct amdgpu_pcs_ras_field xgmi_pcs_ras_fields[] = { {"XGMI PCS DataLossErr", SOC15_REG_FIELD(XGMI0_PCS_GOPX16_PCS_ERROR_STATUS, DataLossErr)}, @@ -325,6 +372,17 @@ static ssize_t amdgpu_xgmi_show_device_id(struct device *dev, } +static ssize_t amdgpu_xgmi_show_physical_id(struct device *dev, + struct device_attribute *attr, + char *buf) +{ + struct drm_device *ddev = dev_get_drvdata(dev); + struct amdgpu_device *adev = drm_to_adev(ddev); + + return sysfs_emit(buf, "%u\n", adev->gmc.xgmi.physical_node_id); + +} + static ssize_t amdgpu_xgmi_show_num_hops(struct device *dev, struct device_attribute *attr, char *buf) @@ -390,6 +448,7 @@ static ssize_t amdgpu_xgmi_show_error(struct device *dev, static DEVICE_ATTR(xgmi_device_id, S_IRUGO, amdgpu_xgmi_show_device_id, NULL); +static DEVICE_ATTR(xgmi_physical_id, 0444, amdgpu_xgmi_show_physical_id, NULL); static DEVICE_ATTR(xgmi_error, S_IRUGO, amdgpu_xgmi_show_error, NULL); static DEVICE_ATTR(xgmi_num_hops, S_IRUGO, amdgpu_xgmi_show_num_hops, NULL); static DEVICE_ATTR(xgmi_num_links, S_IRUGO, amdgpu_xgmi_show_num_links, NULL); @@ -407,6 +466,12 @@ static int amdgpu_xgmi_sysfs_add_dev_info(struct amdgpu_device *adev, return ret; } + ret = device_create_file(adev->dev, &dev_attr_xgmi_physical_id); + if (ret) { + dev_err(adev->dev, "XGMI: Failed to create device file xgmi_physical_id\n"); + return ret; + } + /* Create xgmi error file */ ret = device_create_file(adev->dev, &dev_attr_xgmi_error); if (ret) @@ -448,6 +513,7 @@ remove_link: remove_file: device_remove_file(adev->dev, &dev_attr_xgmi_device_id); + device_remove_file(adev->dev, &dev_attr_xgmi_physical_id); device_remove_file(adev->dev, &dev_attr_xgmi_error); device_remove_file(adev->dev, &dev_attr_xgmi_num_hops); device_remove_file(adev->dev, &dev_attr_xgmi_num_links); @@ -463,6 +529,7 @@ static void amdgpu_xgmi_sysfs_rem_dev_info(struct amdgpu_device *adev, memset(node, 0, sizeof(node)); device_remove_file(adev->dev, &dev_attr_xgmi_device_id); + device_remove_file(adev->dev, &dev_attr_xgmi_physical_id); device_remove_file(adev->dev, &dev_attr_xgmi_error); device_remove_file(adev->dev, &dev_attr_xgmi_num_hops); device_remove_file(adev->dev, &dev_attr_xgmi_num_links); @@ -500,6 +567,7 @@ struct amdgpu_hive_info *amdgpu_get_xgmi_hive(struct amdgpu_device *adev) hive = kzalloc(sizeof(*hive), GFP_KERNEL); if (!hive) { dev_err(adev->dev, "XGMI: allocation failed\n"); + ret = -ENOMEM; hive = NULL; goto pro_end; } @@ -887,7 +955,7 @@ static int amdgpu_xgmi_ras_late_init(struct amdgpu_device *adev, struct ras_comm adev->gmc.xgmi.num_physical_nodes == 0) return 0; - adev->gmc.xgmi.ras->ras_block.hw_ops->reset_ras_error_count(adev); + amdgpu_ras_reset_error_count(adev, AMDGPU_RAS_BLOCK__XGMI_WAFL); return amdgpu_ras_block_late_init(adev, ras_block); } @@ -905,7 +973,7 @@ static void pcs_clear_status(struct amdgpu_device *adev, uint32_t pcs_status_reg WREG32_PCIE(pcs_status_reg, 0); } -static void amdgpu_xgmi_reset_ras_error_count(struct amdgpu_device *adev) +static void amdgpu_xgmi_legacy_reset_ras_error_count(struct amdgpu_device *adev) { uint32_t i; @@ -931,6 +999,49 @@ static void amdgpu_xgmi_reset_ras_error_count(struct amdgpu_device *adev) default: break; } + + switch (amdgpu_ip_version(adev, XGMI_HWIP, 0)) { + case IP_VERSION(6, 4, 0): + for (i = 0; i < ARRAY_SIZE(xgmi3x16_pcs_err_status_reg_v6_4); i++) + pcs_clear_status(adev, + xgmi3x16_pcs_err_status_reg_v6_4[i]); + break; + default: + break; + } +} + +static void __xgmi_v6_4_0_reset_error_count(struct amdgpu_device *adev, int xgmi_inst, u64 mca_base) +{ + WREG64_MCA(xgmi_inst, mca_base, MCA_REG_IDX_STATUS, 0ULL); +} + +static void xgmi_v6_4_0_reset_error_count(struct amdgpu_device *adev, int xgmi_inst) +{ + int i; + + for (i = 0; i < ARRAY_SIZE(xgmi_v6_4_0_mca_base_array); i++) + __xgmi_v6_4_0_reset_error_count(adev, xgmi_inst, xgmi_v6_4_0_mca_base_array[i]); +} + +static void xgmi_v6_4_0_reset_ras_error_count(struct amdgpu_device *adev) +{ + int i; + + for_each_inst(i, adev->aid_mask) + xgmi_v6_4_0_reset_error_count(adev, i); +} + +static void amdgpu_xgmi_reset_ras_error_count(struct amdgpu_device *adev) +{ + switch (amdgpu_ip_version(adev, XGMI_HWIP, 0)) { + case IP_VERSION(6, 4, 0): + xgmi_v6_4_0_reset_ras_error_count(adev); + break; + default: + amdgpu_xgmi_legacy_reset_ras_error_count(adev); + break; + } } static int amdgpu_xgmi_query_pcs_error_status(struct amdgpu_device *adev, @@ -947,7 +1058,10 @@ static int amdgpu_xgmi_query_pcs_error_status(struct amdgpu_device *adev, uint32_t field_array_size = 0; if (is_xgmi_pcs) { - if (adev->ip_versions[XGMI_HWIP][0] == IP_VERSION(6, 1, 0)) { + if (amdgpu_ip_version(adev, XGMI_HWIP, 0) == + IP_VERSION(6, 1, 0) || + amdgpu_ip_version(adev, XGMI_HWIP, 0) == + IP_VERSION(6, 4, 0)) { pcs_ras_fields = &xgmi3x16_pcs_ras_fields[0]; field_array_size = ARRAY_SIZE(xgmi3x16_pcs_ras_fields); } else { @@ -981,11 +1095,11 @@ static int amdgpu_xgmi_query_pcs_error_status(struct amdgpu_device *adev, return 0; } -static void amdgpu_xgmi_query_ras_error_count(struct amdgpu_device *adev, - void *ras_error_status) +static void amdgpu_xgmi_legacy_query_ras_error_count(struct amdgpu_device *adev, + void *ras_error_status) { struct ras_err_data *err_data = (struct ras_err_data *)ras_error_status; - int i; + int i, supported = 1; uint32_t data, mask_data = 0; uint32_t ue_cnt = 0, ce_cnt = 0; @@ -1049,42 +1163,144 @@ static void amdgpu_xgmi_query_ras_error_count(struct amdgpu_device *adev, } break; default: - dev_warn(adev->dev, "XGMI RAS error query not supported"); + supported = 0; break; } - adev->gmc.xgmi.ras->ras_block.hw_ops->reset_ras_error_count(adev); + switch (amdgpu_ip_version(adev, XGMI_HWIP, 0)) { + case IP_VERSION(6, 4, 0): + /* check xgmi3x16 pcs error */ + for (i = 0; i < ARRAY_SIZE(xgmi3x16_pcs_err_status_reg_v6_4); i++) { + data = RREG32_PCIE(xgmi3x16_pcs_err_status_reg_v6_4[i]); + mask_data = + RREG32_PCIE(xgmi3x16_pcs_err_noncorrectable_mask_reg_v6_4[i]); + if (data) + amdgpu_xgmi_query_pcs_error_status(adev, data, + mask_data, &ue_cnt, &ce_cnt, true, true); + } + break; + default: + if (!supported) + dev_warn(adev->dev, "XGMI RAS error query not supported"); + break; + } + + amdgpu_ras_reset_error_count(adev, AMDGPU_RAS_BLOCK__XGMI_WAFL); err_data->ue_count += ue_cnt; err_data->ce_count += ce_cnt; } +static enum amdgpu_mca_error_type xgmi_v6_4_0_pcs_mca_get_error_type(struct amdgpu_device *adev, u64 status) +{ + const char *error_str; + int ext_error_code; + + ext_error_code = MCA_REG__STATUS__ERRORCODEEXT(status); + + error_str = ext_error_code < ARRAY_SIZE(xgmi_v6_4_0_ras_error_code_ext) ? + xgmi_v6_4_0_ras_error_code_ext[ext_error_code] : NULL; + if (error_str) + dev_info(adev->dev, "%s detected\n", error_str); + + switch (ext_error_code) { + case 0: + return AMDGPU_MCA_ERROR_TYPE_UE; + case 6: + return AMDGPU_MCA_ERROR_TYPE_CE; + default: + return -EINVAL; + } + + return -EINVAL; +} + +static void __xgmi_v6_4_0_query_error_count(struct amdgpu_device *adev, struct amdgpu_smuio_mcm_config_info *mcm_info, + u64 mca_base, struct ras_err_data *err_data) +{ + int xgmi_inst = mcm_info->die_id; + u64 status = 0; + + status = RREG64_MCA(xgmi_inst, mca_base, MCA_REG_IDX_STATUS); + if (!MCA_REG__STATUS__VAL(status)) + return; + + switch (xgmi_v6_4_0_pcs_mca_get_error_type(adev, status)) { + case AMDGPU_MCA_ERROR_TYPE_UE: + amdgpu_ras_error_statistic_ue_count(err_data, mcm_info, 1ULL); + break; + case AMDGPU_MCA_ERROR_TYPE_CE: + amdgpu_ras_error_statistic_ce_count(err_data, mcm_info, 1ULL); + break; + default: + break; + } + + WREG64_MCA(xgmi_inst, mca_base, MCA_REG_IDX_STATUS, 0ULL); +} + +static void xgmi_v6_4_0_query_error_count(struct amdgpu_device *adev, int xgmi_inst, struct ras_err_data *err_data) +{ + struct amdgpu_smuio_mcm_config_info mcm_info = { + .socket_id = adev->smuio.funcs->get_socket_id(adev), + .die_id = xgmi_inst, + }; + int i; + + for (i = 0; i < ARRAY_SIZE(xgmi_v6_4_0_mca_base_array); i++) + __xgmi_v6_4_0_query_error_count(adev, &mcm_info, xgmi_v6_4_0_mca_base_array[i], err_data); +} + +static void xgmi_v6_4_0_query_ras_error_count(struct amdgpu_device *adev, void *ras_error_status) +{ + struct ras_err_data *err_data = (struct ras_err_data *)ras_error_status; + int i; + + for_each_inst(i, adev->aid_mask) + xgmi_v6_4_0_query_error_count(adev, i, err_data); +} + +static void amdgpu_xgmi_query_ras_error_count(struct amdgpu_device *adev, + void *ras_error_status) +{ + switch (amdgpu_ip_version(adev, XGMI_HWIP, 0)) { + case IP_VERSION(6, 4, 0): + xgmi_v6_4_0_query_ras_error_count(adev, ras_error_status); + break; + default: + amdgpu_xgmi_legacy_query_ras_error_count(adev, ras_error_status); + break; + } +} + /* Trigger XGMI/WAFL error */ static int amdgpu_ras_error_inject_xgmi(struct amdgpu_device *adev, void *inject_if, uint32_t instance_mask) { - int ret = 0; + int ret1, ret2; struct ta_ras_trigger_error_input *block_info = (struct ta_ras_trigger_error_input *)inject_if; if (amdgpu_dpm_set_df_cstate(adev, DF_CSTATE_DISALLOW)) dev_warn(adev->dev, "Failed to disallow df cstate"); - if (amdgpu_dpm_allow_xgmi_power_down(adev, false)) + ret1 = amdgpu_dpm_set_xgmi_plpd_mode(adev, XGMI_PLPD_DISALLOW); + if (ret1 && ret1 != -EOPNOTSUPP) dev_warn(adev->dev, "Failed to disallow XGMI power down"); - ret = psp_ras_trigger_error(&adev->psp, block_info, instance_mask); + ret2 = psp_ras_trigger_error(&adev->psp, block_info, instance_mask); if (amdgpu_ras_intr_triggered()) - return ret; + return ret2; - if (amdgpu_dpm_allow_xgmi_power_down(adev, true)) + ret1 = amdgpu_dpm_set_xgmi_plpd_mode(adev, XGMI_PLPD_DEFAULT); + if (ret1 && ret1 != -EOPNOTSUPP) dev_warn(adev->dev, "Failed to allow XGMI power down"); if (amdgpu_dpm_set_df_cstate(adev, DF_CSTATE_ALLOW)) dev_warn(adev->dev, "Failed to allow df cstate"); - return ret; + return ret2; } struct amdgpu_ras_block_hw_ops xgmi_ras_hw_ops = { diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.h index 86fbf56938f4..6cab882e8061 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.h @@ -44,6 +44,7 @@ struct amdgpu_hive_info { struct amdgpu_reset_domain *reset_domain; uint32_t device_remove_count; + atomic_t ras_recovery; }; struct amdgpu_pcs_ras_field { diff --git a/drivers/gpu/drm/amd/amdgpu/amdgv_sriovmsg.h b/drivers/gpu/drm/amd/amdgpu/amdgv_sriovmsg.h index 104a5ad8397d..51a14f6d93bd 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgv_sriovmsg.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgv_sriovmsg.h @@ -90,10 +90,11 @@ union amd_sriov_msg_feature_flags { uint32_t host_load_ucodes : 1; uint32_t host_flr_vramlost : 1; uint32_t mm_bw_management : 1; - uint32_t pp_one_vf_mode : 1; + uint32_t pp_one_vf_mode : 1; uint32_t reg_indirect_acc : 1; uint32_t av1_support : 1; - uint32_t reserved : 25; + uint32_t vcn_rb_decouple : 1; + uint32_t reserved : 24; } flags; uint32_t all; }; diff --git a/drivers/gpu/drm/amd/amdgpu/aqua_vanjaram_reg_init.c b/drivers/gpu/drm/amd/amdgpu/aqua_vanjaram.c index 72b629a78c62..3f715e7fe1a9 100644 --- a/drivers/gpu/drm/amd/amdgpu/aqua_vanjaram_reg_init.c +++ b/drivers/gpu/drm/amd/amdgpu/aqua_vanjaram.c @@ -134,7 +134,7 @@ static int aqua_vanjaram_xcp_sched_list_update( for (i = 0; i < AMDGPU_MAX_RINGS; i++) { ring = adev->rings[i]; - if (!ring || !ring->sched.ready) + if (!ring || !ring->sched.ready || ring->no_scheduler) continue; aqua_vanjaram_xcp_gpu_sched_update(adev, ring, ring->xcp_id); @@ -500,7 +500,7 @@ static int aqua_vanjaram_switch_partition_mode(struct amdgpu_xcp_mgr *xcp_mgr, return -EINVAL; } - if (adev->kfd.init_complete) + if (adev->kfd.init_complete && !amdgpu_in_reset(adev)) flags |= AMDGPU_XCP_OPS_KFD; if (flags & AMDGPU_XCP_OPS_KFD) { diff --git a/drivers/gpu/drm/amd/amdgpu/athub_v1_0.c b/drivers/gpu/drm/amd/amdgpu/athub_v1_0.c index a13c443ea10f..42f4e163e251 100644 --- a/drivers/gpu/drm/amd/amdgpu/athub_v1_0.c +++ b/drivers/gpu/drm/amd/amdgpu/athub_v1_0.c @@ -68,7 +68,7 @@ int athub_v1_0_set_clockgating(struct amdgpu_device *adev, if (amdgpu_sriov_vf(adev)) return 0; - switch (adev->ip_versions[ATHUB_HWIP][0]) { + switch (amdgpu_ip_version(adev, ATHUB_HWIP, 0)) { case IP_VERSION(9, 0, 0): case IP_VERSION(9, 1, 0): case IP_VERSION(9, 2, 0): diff --git a/drivers/gpu/drm/amd/amdgpu/athub_v2_0.c b/drivers/gpu/drm/amd/amdgpu/athub_v2_0.c index a9521c98e7f7..5a122f50a6e7 100644 --- a/drivers/gpu/drm/amd/amdgpu/athub_v2_0.c +++ b/drivers/gpu/drm/amd/amdgpu/athub_v2_0.c @@ -77,7 +77,7 @@ int athub_v2_0_set_clockgating(struct amdgpu_device *adev, if (amdgpu_sriov_vf(adev)) return 0; - switch (adev->ip_versions[ATHUB_HWIP][0]) { + switch (amdgpu_ip_version(adev, ATHUB_HWIP, 0)) { case IP_VERSION(1, 3, 1): case IP_VERSION(2, 0, 0): case IP_VERSION(2, 0, 2): diff --git a/drivers/gpu/drm/amd/amdgpu/athub_v2_1.c b/drivers/gpu/drm/amd/amdgpu/athub_v2_1.c index 78508ae6a670..e143fcc46148 100644 --- a/drivers/gpu/drm/amd/amdgpu/athub_v2_1.c +++ b/drivers/gpu/drm/amd/amdgpu/athub_v2_1.c @@ -70,7 +70,7 @@ int athub_v2_1_set_clockgating(struct amdgpu_device *adev, if (amdgpu_sriov_vf(adev)) return 0; - switch (adev->ip_versions[ATHUB_HWIP][0]) { + switch (amdgpu_ip_version(adev, ATHUB_HWIP, 0)) { case IP_VERSION(2, 1, 0): case IP_VERSION(2, 1, 1): case IP_VERSION(2, 1, 2): diff --git a/drivers/gpu/drm/amd/amdgpu/athub_v3_0.c b/drivers/gpu/drm/amd/amdgpu/athub_v3_0.c index f0e235f98afb..f0737fb3a999 100644 --- a/drivers/gpu/drm/amd/amdgpu/athub_v3_0.c +++ b/drivers/gpu/drm/amd/amdgpu/athub_v3_0.c @@ -36,7 +36,7 @@ static uint32_t athub_v3_0_get_cg_cntl(struct amdgpu_device *adev) { uint32_t data; - switch (adev->ip_versions[ATHUB_HWIP][0]) { + switch (amdgpu_ip_version(adev, ATHUB_HWIP, 0)) { case IP_VERSION(3, 0, 1): data = RREG32_SOC15(ATHUB, 0, regATHUB_MISC_CNTL_V3_0_1); break; @@ -49,7 +49,7 @@ static uint32_t athub_v3_0_get_cg_cntl(struct amdgpu_device *adev) static void athub_v3_0_set_cg_cntl(struct amdgpu_device *adev, uint32_t data) { - switch (adev->ip_versions[ATHUB_HWIP][0]) { + switch (amdgpu_ip_version(adev, ATHUB_HWIP, 0)) { case IP_VERSION(3, 0, 1): WREG32_SOC15(ATHUB, 0, regATHUB_MISC_CNTL_V3_0_1, data); break; @@ -99,10 +99,11 @@ int athub_v3_0_set_clockgating(struct amdgpu_device *adev, if (amdgpu_sriov_vf(adev)) return 0; - switch (adev->ip_versions[ATHUB_HWIP][0]) { + switch (amdgpu_ip_version(adev, ATHUB_HWIP, 0)) { case IP_VERSION(3, 0, 0): case IP_VERSION(3, 0, 1): case IP_VERSION(3, 0, 2): + case IP_VERSION(3, 3, 0): athub_v3_0_update_medium_grain_clock_gating(adev, state == AMD_CG_STATE_GATE); athub_v3_0_update_medium_grain_light_sleep(adev, diff --git a/drivers/gpu/drm/amd/amdgpu/atom.c b/drivers/gpu/drm/amd/amdgpu/atom.c index 5f610e9a5f0f..2c221000782c 100644 --- a/drivers/gpu/drm/amd/amdgpu/atom.c +++ b/drivers/gpu/drm/amd/amdgpu/atom.c @@ -1438,14 +1438,33 @@ static void atom_get_vbios_pn(struct atom_context *ctx) ctx->vbios_pn[count] = 0; } + + pr_info("ATOM BIOS: %s\n", ctx->vbios_pn); } static void atom_get_vbios_version(struct atom_context *ctx) { + unsigned short start = 3, end; unsigned char *vbios_ver; + unsigned char *p_rom; + + p_rom = ctx->bios; + /* Search from strings offset if it's present */ + start = *(unsigned short *)(p_rom + + OFFSET_TO_GET_ATOMBIOS_STRING_START); + + /* Search till atom rom header start point */ + end = *(unsigned short *)(p_rom + OFFSET_TO_ATOM_ROM_HEADER_POINTER); + + /* Use hardcoded offsets, if the offsets are not populated */ + if (end <= start) { + start = 3; + end = 1024; + } /* find anchor ATOMBIOSBK-AMD */ - vbios_ver = atom_find_str_in_rom(ctx, BIOS_VERSION_PREFIX, 3, 1024, 64); + vbios_ver = + atom_find_str_in_rom(ctx, BIOS_VERSION_PREFIX, start, end, 64); if (vbios_ver != NULL) { /* skip ATOMBIOSBK-AMD VER */ vbios_ver += 18; @@ -1460,11 +1479,9 @@ struct atom_context *amdgpu_atom_parse(struct card_info *card, void *bios) int base; struct atom_context *ctx = kzalloc(sizeof(struct atom_context), GFP_KERNEL); - char *str; struct _ATOM_ROM_HEADER *atom_rom_header; struct _ATOM_MASTER_DATA_TABLE *master_table; struct _ATOM_FIRMWARE_INFO *atom_fw_info; - u16 idx; if (!ctx) return NULL; @@ -1502,16 +1519,6 @@ struct atom_context *amdgpu_atom_parse(struct card_info *card, void *bios) return NULL; } - idx = CU16(ATOM_ROM_PART_NUMBER_PTR); - if (idx == 0) - idx = 0x80; - - str = CSTR(idx); - if (*str != '\0') { - pr_info("ATOM BIOS: %s\n", str); - strscpy(ctx->vbios_version, str, sizeof(ctx->vbios_version)); - } - atom_rom_header = (struct _ATOM_ROM_HEADER *)CSTR(base); if (atom_rom_header->usMasterDataTableOffset != 0) { master_table = (struct _ATOM_MASTER_DATA_TABLE *) diff --git a/drivers/gpu/drm/amd/amdgpu/atom.h b/drivers/gpu/drm/amd/amdgpu/atom.h index 0c1839824520..c11cf18a0f18 100644 --- a/drivers/gpu/drm/amd/amdgpu/atom.h +++ b/drivers/gpu/drm/amd/amdgpu/atom.h @@ -33,7 +33,6 @@ struct drm_device; #define ATOM_ATI_MAGIC_PTR 0x30 #define ATOM_ATI_MAGIC " 761295520" #define ATOM_ROM_TABLE_PTR 0x48 -#define ATOM_ROM_PART_NUMBER_PTR 0x6E #define ATOM_ROM_MAGIC "ATOM" #define ATOM_ROM_MAGIC_PTR 4 @@ -118,12 +117,15 @@ struct drm_device; struct card_info { struct drm_device *dev; - void (* reg_write)(struct card_info *, uint32_t, uint32_t); /* filled by driver */ - uint32_t (* reg_read)(struct card_info *, uint32_t); /* filled by driver */ - void (* mc_write)(struct card_info *, uint32_t, uint32_t); /* filled by driver */ - uint32_t (* mc_read)(struct card_info *, uint32_t); /* filled by driver */ - void (* pll_write)(struct card_info *, uint32_t, uint32_t); /* filled by driver */ - uint32_t (* pll_read)(struct card_info *, uint32_t); /* filled by driver */ + void (*reg_write)(struct card_info *info, + u32 reg, uint32_t val); /* filled by driver */ + uint32_t (*reg_read)(struct card_info *info, uint32_t reg); /* filled by driver */ + void (*mc_write)(struct card_info *info, + u32 reg, uint32_t val); /* filled by driver */ + uint32_t (*mc_read)(struct card_info *info, uint32_t reg); /* filled by driver */ + void (*pll_write)(struct card_info *info, + u32 reg, uint32_t val); /* filled by driver */ + uint32_t (*pll_read)(struct card_info *info, uint32_t reg); /* filled by driver */ }; struct atom_context { @@ -143,7 +145,6 @@ struct atom_context { int io_mode; uint32_t *scratch; int scratch_size_bytes; - char vbios_version[20]; uint8_t name[STRLEN_LONG]; uint8_t vbios_pn[STRLEN_LONG]; @@ -154,10 +155,10 @@ struct atom_context { extern int amdgpu_atom_debug; -struct atom_context *amdgpu_atom_parse(struct card_info *, void *); -int amdgpu_atom_execute_table(struct atom_context *, int, uint32_t *); -int amdgpu_atom_asic_init(struct atom_context *); -void amdgpu_atom_destroy(struct atom_context *); +struct atom_context *amdgpu_atom_parse(struct card_info *card, void *bios); +int amdgpu_atom_execute_table(struct atom_context *ctx, int index, uint32_t *params); +int amdgpu_atom_asic_init(struct atom_context *ctx); +void amdgpu_atom_destroy(struct atom_context *ctx); bool amdgpu_atom_parse_data_header(struct atom_context *ctx, int index, uint16_t *size, uint8_t *frev, uint8_t *crev, uint16_t *data_start); bool amdgpu_atom_parse_cmd_header(struct atom_context *ctx, int index, diff --git a/drivers/gpu/drm/amd/amdgpu/atombios_encoders.c b/drivers/gpu/drm/amd/amdgpu/atombios_encoders.c index d95b2dc78063..3ee219aa2891 100644 --- a/drivers/gpu/drm/amd/amdgpu/atombios_encoders.c +++ b/drivers/gpu/drm/amd/amdgpu/atombios_encoders.c @@ -228,7 +228,6 @@ error: register_acpi_backlight: /* Try registering an ACPI video backlight device instead. */ acpi_video_register_backlight(); - return; } void diff --git a/drivers/gpu/drm/amd/amdgpu/cik.c b/drivers/gpu/drm/amd/amdgpu/cik.c index 5641cf05d856..4dfaa017cf7f 100644 --- a/drivers/gpu/drm/amd/amdgpu/cik.c +++ b/drivers/gpu/drm/amd/amdgpu/cik.c @@ -1574,17 +1574,8 @@ static void cik_pcie_gen3_enable(struct amdgpu_device *adev) u16 bridge_cfg2, gpu_cfg2; u32 max_lw, current_lw, tmp; - pcie_capability_read_word(root, PCI_EXP_LNKCTL, - &bridge_cfg); - pcie_capability_read_word(adev->pdev, PCI_EXP_LNKCTL, - &gpu_cfg); - - tmp16 = bridge_cfg | PCI_EXP_LNKCTL_HAWD; - pcie_capability_write_word(root, PCI_EXP_LNKCTL, tmp16); - - tmp16 = gpu_cfg | PCI_EXP_LNKCTL_HAWD; - pcie_capability_write_word(adev->pdev, PCI_EXP_LNKCTL, - tmp16); + pcie_capability_set_word(root, PCI_EXP_LNKCTL, PCI_EXP_LNKCTL_HAWD); + pcie_capability_set_word(adev->pdev, PCI_EXP_LNKCTL, PCI_EXP_LNKCTL_HAWD); tmp = RREG32_PCIE(ixPCIE_LC_STATUS1); max_lw = (tmp & PCIE_LC_STATUS1__LC_DETECTED_LINK_WIDTH_MASK) >> @@ -1637,21 +1628,14 @@ static void cik_pcie_gen3_enable(struct amdgpu_device *adev) msleep(100); /* linkctl */ - pcie_capability_read_word(root, PCI_EXP_LNKCTL, - &tmp16); - tmp16 &= ~PCI_EXP_LNKCTL_HAWD; - tmp16 |= (bridge_cfg & PCI_EXP_LNKCTL_HAWD); - pcie_capability_write_word(root, PCI_EXP_LNKCTL, - tmp16); - - pcie_capability_read_word(adev->pdev, - PCI_EXP_LNKCTL, - &tmp16); - tmp16 &= ~PCI_EXP_LNKCTL_HAWD; - tmp16 |= (gpu_cfg & PCI_EXP_LNKCTL_HAWD); - pcie_capability_write_word(adev->pdev, - PCI_EXP_LNKCTL, - tmp16); + pcie_capability_clear_and_set_word(root, PCI_EXP_LNKCTL, + PCI_EXP_LNKCTL_HAWD, + bridge_cfg & + PCI_EXP_LNKCTL_HAWD); + pcie_capability_clear_and_set_word(adev->pdev, PCI_EXP_LNKCTL, + PCI_EXP_LNKCTL_HAWD, + gpu_cfg & + PCI_EXP_LNKCTL_HAWD); /* linkctl2 */ pcie_capability_read_word(root, PCI_EXP_LNKCTL2, @@ -1725,10 +1709,6 @@ static void cik_program_aspm(struct amdgpu_device *adev) if (pci_is_root_bus(adev->pdev->bus)) return; - /* XXX double check APUs */ - if (adev->flags & AMD_IS_APU) - return; - orig = data = RREG32_PCIE(ixPCIE_LC_N_FTS_CNTL); data &= ~PCIE_LC_N_FTS_CNTL__LC_XMIT_N_FTS_MASK; data |= (0x24 << PCIE_LC_N_FTS_CNTL__LC_XMIT_N_FTS__SHIFT) | diff --git a/drivers/gpu/drm/amd/amdgpu/cik_ih.c b/drivers/gpu/drm/amd/amdgpu/cik_ih.c index df385ffc9768..6f7c031dd197 100644 --- a/drivers/gpu/drm/amd/amdgpu/cik_ih.c +++ b/drivers/gpu/drm/amd/amdgpu/cik_ih.c @@ -442,8 +442,7 @@ static void cik_ih_set_interrupt_funcs(struct amdgpu_device *adev) adev->irq.ih_funcs = &cik_ih_funcs; } -const struct amdgpu_ip_block_version cik_ih_ip_block = -{ +const struct amdgpu_ip_block_version cik_ih_ip_block = { .type = AMD_IP_BLOCK_TYPE_IH, .major = 2, .minor = 0, diff --git a/drivers/gpu/drm/amd/amdgpu/cik_sdma.c b/drivers/gpu/drm/amd/amdgpu/cik_sdma.c index 52598fbc9b39..a3fccc4c1f43 100644 --- a/drivers/gpu/drm/amd/amdgpu/cik_sdma.c +++ b/drivers/gpu/drm/amd/amdgpu/cik_sdma.c @@ -308,8 +308,6 @@ static void cik_sdma_gfx_stop(struct amdgpu_device *adev) u32 rb_cntl; int i; - amdgpu_sdma_unset_buffer_funcs_helper(adev); - for (i = 0; i < adev->sdma.num_instances; i++) { rb_cntl = RREG32(mmSDMA0_GFX_RB_CNTL + sdma_offsets[i]); rb_cntl &= ~SDMA0_GFX_RB_CNTL__RB_ENABLE_MASK; @@ -498,9 +496,6 @@ static int cik_sdma_gfx_resume(struct amdgpu_device *adev) r = amdgpu_ring_test_helper(ring); if (r) return r; - - if (adev->mman.buffer_funcs_ring == ring) - amdgpu_ttm_set_buffer_funcs_status(adev, true); } return 0; @@ -925,9 +920,14 @@ static void cik_enable_sdma_mgls(struct amdgpu_device *adev, static int cik_sdma_early_init(void *handle) { struct amdgpu_device *adev = (struct amdgpu_device *)handle; + int r; adev->sdma.num_instances = SDMA_MAX_INSTANCE; + r = cik_sdma_init_microcode(adev); + if (r) + return r; + cik_sdma_set_ring_funcs(adev); cik_sdma_set_irq_funcs(adev); cik_sdma_set_buffer_funcs(adev); @@ -942,12 +942,6 @@ static int cik_sdma_sw_init(void *handle) struct amdgpu_device *adev = (struct amdgpu_device *)handle; int r, i; - r = cik_sdma_init_microcode(adev); - if (r) { - DRM_ERROR("Failed to load sdma firmware!\n"); - return r; - } - /* SDMA trap event */ r = amdgpu_irq_add_id(adev, AMDGPU_IRQ_CLIENTID_LEGACY, 224, &adev->sdma.trap_irq); diff --git a/drivers/gpu/drm/amd/amdgpu/dce_v10_0.c b/drivers/gpu/drm/amd/amdgpu/dce_v10_0.c index 9a24ed463abd..bb666cb7522e 100644 --- a/drivers/gpu/drm/amd/amdgpu/dce_v10_0.c +++ b/drivers/gpu/drm/amd/amdgpu/dce_v10_0.c @@ -52,8 +52,7 @@ static void dce_v10_0_set_display_funcs(struct amdgpu_device *adev); static void dce_v10_0_set_irq_funcs(struct amdgpu_device *adev); -static const u32 crtc_offsets[] = -{ +static const u32 crtc_offsets[] = { CRTC0_REGISTER_OFFSET, CRTC1_REGISTER_OFFSET, CRTC2_REGISTER_OFFSET, @@ -63,8 +62,7 @@ static const u32 crtc_offsets[] = CRTC6_REGISTER_OFFSET }; -static const u32 hpd_offsets[] = -{ +static const u32 hpd_offsets[] = { HPD0_REGISTER_OFFSET, HPD1_REGISTER_OFFSET, HPD2_REGISTER_OFFSET, @@ -121,30 +119,26 @@ static const struct { .hpd = DISP_INTERRUPT_STATUS_CONTINUE5__DC_HPD6_INTERRUPT_MASK } }; -static const u32 golden_settings_tonga_a11[] = -{ +static const u32 golden_settings_tonga_a11[] = { mmDCI_CLK_CNTL, 0x00000080, 0x00000000, mmFBC_DEBUG_COMP, 0x000000f0, 0x00000070, mmFBC_MISC, 0x1f311fff, 0x12300000, mmHDMI_CONTROL, 0x31000111, 0x00000011, }; -static const u32 tonga_mgcg_cgcg_init[] = -{ +static const u32 tonga_mgcg_cgcg_init[] = { mmXDMA_CLOCK_GATING_CNTL, 0xffffffff, 0x00000100, mmXDMA_MEM_POWER_CNTL, 0x00000101, 0x00000000, }; -static const u32 golden_settings_fiji_a10[] = -{ +static const u32 golden_settings_fiji_a10[] = { mmDCI_CLK_CNTL, 0x00000080, 0x00000000, mmFBC_DEBUG_COMP, 0x000000f0, 0x00000070, mmFBC_MISC, 0x1f311fff, 0x12300000, mmHDMI_CONTROL, 0x31000111, 0x00000011, }; -static const u32 fiji_mgcg_cgcg_init[] = -{ +static const u32 fiji_mgcg_cgcg_init[] = { mmXDMA_CLOCK_GATING_CNTL, 0xffffffff, 0x00000100, mmXDMA_MEM_POWER_CNTL, 0x00000101, 0x00000000, }; @@ -1042,7 +1036,7 @@ static void dce_v10_0_program_watermarks(struct amdgpu_device *adev, (u32)mode->clock); line_time = (u32) div_u64((u64)mode->crtc_htotal * 1000000, (u32)mode->clock); - line_time = min(line_time, (u32)65535); + line_time = min_t(u32, line_time, 65535); /* watermark for high clocks */ if (adev->pm.dpm_enabled) { @@ -1072,7 +1066,7 @@ static void dce_v10_0_program_watermarks(struct amdgpu_device *adev, wm_high.num_heads = num_heads; /* set for high clocks */ - latency_watermark_a = min(dce_v10_0_latency_watermark(&wm_high), (u32)65535); + latency_watermark_a = min_t(u32, dce_v10_0_latency_watermark(&wm_high), 65535); /* possibly force display priority to high */ /* should really do this at mode validation time... */ @@ -1111,7 +1105,7 @@ static void dce_v10_0_program_watermarks(struct amdgpu_device *adev, wm_low.num_heads = num_heads; /* set for low clocks */ - latency_watermark_b = min(dce_v10_0_latency_watermark(&wm_low), (u32)65535); + latency_watermark_b = min_t(u32, dce_v10_0_latency_watermark(&wm_low), 65535); /* possibly force display priority to high */ /* should really do this at mode validation time... */ @@ -1425,8 +1419,7 @@ static void dce_v10_0_audio_enable(struct amdgpu_device *adev, enable ? AZALIA_F0_CODEC_PIN_CONTROL_HOT_PLUG_CONTROL__AUDIO_ENABLED_MASK : 0); } -static const u32 pin_offsets[] = -{ +static const u32 pin_offsets[] = { AUD0_REGISTER_OFFSET, AUD1_REGISTER_OFFSET, AUD2_REGISTER_OFFSET, @@ -1811,8 +1804,7 @@ static void dce_v10_0_afmt_fini(struct amdgpu_device *adev) } } -static const u32 vga_control_regs[6] = -{ +static const u32 vga_control_regs[6] = { mmD1VGA_CONTROL, mmD2VGA_CONTROL, mmD3VGA_CONTROL, @@ -3651,8 +3643,7 @@ static void dce_v10_0_set_irq_funcs(struct amdgpu_device *adev) adev->hpd_irq.funcs = &dce_v10_0_hpd_irq_funcs; } -const struct amdgpu_ip_block_version dce_v10_0_ip_block = -{ +const struct amdgpu_ip_block_version dce_v10_0_ip_block = { .type = AMD_IP_BLOCK_TYPE_DCE, .major = 10, .minor = 0, @@ -3660,8 +3651,7 @@ const struct amdgpu_ip_block_version dce_v10_0_ip_block = .funcs = &dce_v10_0_ip_funcs, }; -const struct amdgpu_ip_block_version dce_v10_1_ip_block = -{ +const struct amdgpu_ip_block_version dce_v10_1_ip_block = { .type = AMD_IP_BLOCK_TYPE_DCE, .major = 10, .minor = 1, diff --git a/drivers/gpu/drm/amd/amdgpu/dce_v11_0.c b/drivers/gpu/drm/amd/amdgpu/dce_v11_0.c index c14b70350a51..7af277f61cca 100644 --- a/drivers/gpu/drm/amd/amdgpu/dce_v11_0.c +++ b/drivers/gpu/drm/amd/amdgpu/dce_v11_0.c @@ -1068,7 +1068,7 @@ static void dce_v11_0_program_watermarks(struct amdgpu_device *adev, (u32)mode->clock); line_time = (u32) div_u64((u64)mode->crtc_htotal * 1000000, (u32)mode->clock); - line_time = min(line_time, (u32)65535); + line_time = min_t(u32, line_time, 65535); /* watermark for high clocks */ if (adev->pm.dpm_enabled) { @@ -1098,7 +1098,7 @@ static void dce_v11_0_program_watermarks(struct amdgpu_device *adev, wm_high.num_heads = num_heads; /* set for high clocks */ - latency_watermark_a = min(dce_v11_0_latency_watermark(&wm_high), (u32)65535); + latency_watermark_a = min_t(u32, dce_v11_0_latency_watermark(&wm_high), 65535); /* possibly force display priority to high */ /* should really do this at mode validation time... */ @@ -1137,7 +1137,7 @@ static void dce_v11_0_program_watermarks(struct amdgpu_device *adev, wm_low.num_heads = num_heads; /* set for low clocks */ - latency_watermark_b = min(dce_v11_0_latency_watermark(&wm_low), (u32)65535); + latency_watermark_b = min_t(u32, dce_v11_0_latency_watermark(&wm_low), 65535); /* possibly force display priority to high */ /* should really do this at mode validation time... */ diff --git a/drivers/gpu/drm/amd/amdgpu/dce_v6_0.c b/drivers/gpu/drm/amd/amdgpu/dce_v6_0.c index 7f85ba5b726f..143efc37a17f 100644 --- a/drivers/gpu/drm/amd/amdgpu/dce_v6_0.c +++ b/drivers/gpu/drm/amd/amdgpu/dce_v6_0.c @@ -845,7 +845,7 @@ static void dce_v6_0_program_watermarks(struct amdgpu_device *adev, (u32)mode->clock); line_time = (u32) div_u64((u64)mode->crtc_htotal * 1000000, (u32)mode->clock); - line_time = min(line_time, (u32)65535); + line_time = min_t(u32, line_time, 65535); priority_a_cnt = 0; priority_b_cnt = 0; @@ -906,9 +906,9 @@ static void dce_v6_0_program_watermarks(struct amdgpu_device *adev, wm_low.num_heads = num_heads; /* set for high clocks */ - latency_watermark_a = min(dce_v6_0_latency_watermark(&wm_high), (u32)65535); + latency_watermark_a = min_t(u32, dce_v6_0_latency_watermark(&wm_high), 65535); /* set for low clocks */ - latency_watermark_b = min(dce_v6_0_latency_watermark(&wm_low), (u32)65535); + latency_watermark_b = min_t(u32, dce_v6_0_latency_watermark(&wm_low), 65535); /* possibly force display priority to high */ /* should really do this at mode validation time... */ diff --git a/drivers/gpu/drm/amd/amdgpu/dce_v8_0.c b/drivers/gpu/drm/amd/amdgpu/dce_v8_0.c index d421a268c9ff..adeddfb7ff12 100644 --- a/drivers/gpu/drm/amd/amdgpu/dce_v8_0.c +++ b/drivers/gpu/drm/amd/amdgpu/dce_v8_0.c @@ -53,8 +53,7 @@ static void dce_v8_0_set_display_funcs(struct amdgpu_device *adev); static void dce_v8_0_set_irq_funcs(struct amdgpu_device *adev); -static const u32 crtc_offsets[6] = -{ +static const u32 crtc_offsets[6] = { CRTC0_REGISTER_OFFSET, CRTC1_REGISTER_OFFSET, CRTC2_REGISTER_OFFSET, @@ -63,8 +62,7 @@ static const u32 crtc_offsets[6] = CRTC5_REGISTER_OFFSET }; -static const u32 hpd_offsets[] = -{ +static const u32 hpd_offsets[] = { HPD0_REGISTER_OFFSET, HPD1_REGISTER_OFFSET, HPD2_REGISTER_OFFSET, @@ -977,7 +975,7 @@ static void dce_v8_0_program_watermarks(struct amdgpu_device *adev, (u32)mode->clock); line_time = (u32) div_u64((u64)mode->crtc_htotal * 1000000, (u32)mode->clock); - line_time = min(line_time, (u32)65535); + line_time = min_t(u32, line_time, 65535); /* watermark for high clocks */ if (adev->pm.dpm_enabled) { @@ -1007,7 +1005,7 @@ static void dce_v8_0_program_watermarks(struct amdgpu_device *adev, wm_high.num_heads = num_heads; /* set for high clocks */ - latency_watermark_a = min(dce_v8_0_latency_watermark(&wm_high), (u32)65535); + latency_watermark_a = min_t(u32, dce_v8_0_latency_watermark(&wm_high), 65535); /* possibly force display priority to high */ /* should really do this at mode validation time... */ @@ -1046,7 +1044,7 @@ static void dce_v8_0_program_watermarks(struct amdgpu_device *adev, wm_low.num_heads = num_heads; /* set for low clocks */ - latency_watermark_b = min(dce_v8_0_latency_watermark(&wm_low), (u32)65535); + latency_watermark_b = min_t(u32, dce_v8_0_latency_watermark(&wm_low), 65535); /* possibly force display priority to high */ /* should really do this at mode validation time... */ @@ -1345,9 +1343,9 @@ static void dce_v8_0_audio_write_sad_regs(struct drm_encoder *encoder) if (sad->channels > max_channels) { value = (sad->channels << AZALIA_F0_CODEC_PIN_CONTROL_AUDIO_DESCRIPTOR0__MAX_CHANNELS__SHIFT) | - (sad->byte2 << + (sad->byte2 << AZALIA_F0_CODEC_PIN_CONTROL_AUDIO_DESCRIPTOR0__DESCRIPTOR_BYTE_2__SHIFT) | - (sad->freq << + (sad->freq << AZALIA_F0_CODEC_PIN_CONTROL_AUDIO_DESCRIPTOR0__SUPPORTED_FREQUENCIES__SHIFT); max_channels = sad->channels; } @@ -1379,8 +1377,7 @@ static void dce_v8_0_audio_enable(struct amdgpu_device *adev, enable ? AZALIA_F0_CODEC_PIN_CONTROL_HOT_PLUG_CONTROL__AUDIO_ENABLED_MASK : 0); } -static const u32 pin_offsets[7] = -{ +static const u32 pin_offsets[7] = { (0x1780 - 0x1780), (0x1786 - 0x1780), (0x178c - 0x1780), @@ -1740,8 +1737,7 @@ static void dce_v8_0_afmt_fini(struct amdgpu_device *adev) } } -static const u32 vga_control_regs[6] = -{ +static const u32 vga_control_regs[6] = { mmD1VGA_CONTROL, mmD2VGA_CONTROL, mmD3VGA_CONTROL, @@ -1895,9 +1891,9 @@ static int dce_v8_0_crtc_do_set_base(struct drm_crtc *crtc, case DRM_FORMAT_XBGR8888: case DRM_FORMAT_ABGR8888: fb_format = ((GRPH_DEPTH_32BPP << GRPH_CONTROL__GRPH_DEPTH__SHIFT) | - (GRPH_FORMAT_ARGB8888 << GRPH_CONTROL__GRPH_FORMAT__SHIFT)); + (GRPH_FORMAT_ARGB8888 << GRPH_CONTROL__GRPH_FORMAT__SHIFT)); fb_swap = ((GRPH_RED_SEL_B << GRPH_SWAP_CNTL__GRPH_RED_CROSSBAR__SHIFT) | - (GRPH_BLUE_SEL_R << GRPH_SWAP_CNTL__GRPH_BLUE_CROSSBAR__SHIFT)); + (GRPH_BLUE_SEL_R << GRPH_SWAP_CNTL__GRPH_BLUE_CROSSBAR__SHIFT)); #ifdef __BIG_ENDIAN fb_swap |= (GRPH_ENDIAN_8IN32 << GRPH_SWAP_CNTL__GRPH_ENDIAN_SWAP__SHIFT); #endif @@ -3151,7 +3147,7 @@ static int dce_v8_0_pageflip_irq(struct amdgpu_device *adev, spin_lock_irqsave(&adev_to_drm(adev)->event_lock, flags); works = amdgpu_crtc->pflip_works; - if (amdgpu_crtc->pflip_status != AMDGPU_FLIP_SUBMITTED){ + if (amdgpu_crtc->pflip_status != AMDGPU_FLIP_SUBMITTED) { DRM_DEBUG_DRIVER("amdgpu_crtc->pflip_status = %d != " "AMDGPU_FLIP_SUBMITTED(%d)\n", amdgpu_crtc->pflip_status, @@ -3544,8 +3540,7 @@ static void dce_v8_0_set_irq_funcs(struct amdgpu_device *adev) adev->hpd_irq.funcs = &dce_v8_0_hpd_irq_funcs; } -const struct amdgpu_ip_block_version dce_v8_0_ip_block = -{ +const struct amdgpu_ip_block_version dce_v8_0_ip_block = { .type = AMD_IP_BLOCK_TYPE_DCE, .major = 8, .minor = 0, @@ -3553,8 +3548,7 @@ const struct amdgpu_ip_block_version dce_v8_0_ip_block = .funcs = &dce_v8_0_ip_funcs, }; -const struct amdgpu_ip_block_version dce_v8_1_ip_block = -{ +const struct amdgpu_ip_block_version dce_v8_1_ip_block = { .type = AMD_IP_BLOCK_TYPE_DCE, .major = 8, .minor = 1, @@ -3562,8 +3556,7 @@ const struct amdgpu_ip_block_version dce_v8_1_ip_block = .funcs = &dce_v8_0_ip_funcs, }; -const struct amdgpu_ip_block_version dce_v8_2_ip_block = -{ +const struct amdgpu_ip_block_version dce_v8_2_ip_block = { .type = AMD_IP_BLOCK_TYPE_DCE, .major = 8, .minor = 2, @@ -3571,8 +3564,7 @@ const struct amdgpu_ip_block_version dce_v8_2_ip_block = .funcs = &dce_v8_0_ip_funcs, }; -const struct amdgpu_ip_block_version dce_v8_3_ip_block = -{ +const struct amdgpu_ip_block_version dce_v8_3_ip_block = { .type = AMD_IP_BLOCK_TYPE_DCE, .major = 8, .minor = 3, @@ -3580,8 +3572,7 @@ const struct amdgpu_ip_block_version dce_v8_3_ip_block = .funcs = &dce_v8_0_ip_funcs, }; -const struct amdgpu_ip_block_version dce_v8_5_ip_block = -{ +const struct amdgpu_ip_block_version dce_v8_5_ip_block = { .type = AMD_IP_BLOCK_TYPE_DCE, .major = 8, .minor = 5, diff --git a/drivers/gpu/drm/amd/amdgpu/df_v4_6_2.c b/drivers/gpu/drm/amd/amdgpu/df_v4_6_2.c new file mode 100644 index 000000000000..a47960a0babd --- /dev/null +++ b/drivers/gpu/drm/amd/amdgpu/df_v4_6_2.c @@ -0,0 +1,34 @@ +/* + * Copyright 2023 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + */ +#include "amdgpu.h" +#include "df_v4_6_2.h" + +static bool df_v4_6_2_query_ras_poison_mode(struct amdgpu_device *adev) +{ + /* return true since related regs are inaccessible */ + return true; +} + +const struct amdgpu_df_funcs df_v4_6_2_funcs = { + .query_ras_poison_mode = df_v4_6_2_query_ras_poison_mode, +}; diff --git a/drivers/gpu/drm/amd/amdgpu/df_v4_6_2.h b/drivers/gpu/drm/amd/amdgpu/df_v4_6_2.h new file mode 100644 index 000000000000..3bc3e6d216e2 --- /dev/null +++ b/drivers/gpu/drm/amd/amdgpu/df_v4_6_2.h @@ -0,0 +1,31 @@ +/* + * Copyright 2023 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + */ + +#ifndef __DF_V4_6_2_H__ +#define __DF_V4_6_2_H__ + +#include "soc15_common.h" + +extern const struct amdgpu_df_funcs df_v4_6_2_funcs; + +#endif diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c index 44af8022b89f..c8a3bf01743f 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c @@ -102,6 +102,11 @@ #define mmGCR_GENERAL_CNTL_Sienna_Cichlid 0x1580 #define mmGCR_GENERAL_CNTL_Sienna_Cichlid_BASE_IDX 0 +#define mmGOLDEN_TSC_COUNT_UPPER_Cyan_Skillfish 0x0105 +#define mmGOLDEN_TSC_COUNT_UPPER_Cyan_Skillfish_BASE_IDX 1 +#define mmGOLDEN_TSC_COUNT_LOWER_Cyan_Skillfish 0x0106 +#define mmGOLDEN_TSC_COUNT_LOWER_Cyan_Skillfish_BASE_IDX 1 + #define mmGOLDEN_TSC_COUNT_UPPER_Vangogh 0x0025 #define mmGOLDEN_TSC_COUNT_UPPER_Vangogh_BASE_IDX 1 #define mmGOLDEN_TSC_COUNT_LOWER_Vangogh 0x0026 @@ -271,8 +276,7 @@ MODULE_FIRMWARE("amdgpu/gc_10_3_7_mec.bin"); MODULE_FIRMWARE("amdgpu/gc_10_3_7_mec2.bin"); MODULE_FIRMWARE("amdgpu/gc_10_3_7_rlc.bin"); -static const struct soc15_reg_golden golden_settings_gc_10_1[] = -{ +static const struct soc15_reg_golden golden_settings_gc_10_1[] = { SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL_4, 0xffffffff, 0x00400014), SOC15_REG_GOLDEN_VALUE(GC, 0, mmCGTT_CPF_CLK_CTRL, 0xfcff8fff, 0xf8000100), SOC15_REG_GOLDEN_VALUE(GC, 0, mmCGTT_SPI_CLK_CTRL, 0xcd000000, 0x0d000100), @@ -315,13 +319,11 @@ static const struct soc15_reg_golden golden_settings_gc_10_1[] = SOC15_REG_GOLDEN_VALUE(GC, 0, mmUTCL1_CTRL, 0x00c00000, 0x00c00000) }; -static const struct soc15_reg_golden golden_settings_gc_10_0_nv10[] = -{ +static const struct soc15_reg_golden golden_settings_gc_10_0_nv10[] = { /* Pending on emulation bring up */ }; -static const struct soc15_reg_golden golden_settings_gc_rlc_spm_10_0_nv10[] = -{ +static const struct soc15_reg_golden golden_settings_gc_rlc_spm_10_0_nv10[] = { SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xe0000000, 0x0), SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0), SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_GLB_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x28), @@ -1376,8 +1378,7 @@ static const struct soc15_reg_golden golden_settings_gc_rlc_spm_10_0_nv10[] = SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xFFFFFFFF, 0xe0000000) }; -static const struct soc15_reg_golden golden_settings_gc_10_1_1[] = -{ +static const struct soc15_reg_golden golden_settings_gc_10_1_1[] = { SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL_4, 0xffffffff, 0x003c0014), SOC15_REG_GOLDEN_VALUE(GC, 0, mmCGTT_GS_NGG_CLK_CTRL, 0xffff8fff, 0xffff8100), SOC15_REG_GOLDEN_VALUE(GC, 0, mmCGTT_IA_CLK_CTRL, 0xffff0fff, 0xffff0100), @@ -1418,8 +1419,7 @@ static const struct soc15_reg_golden golden_settings_gc_10_1_1[] = SOC15_REG_GOLDEN_VALUE(GC, 0, mmUTCL1_CTRL, 0x00c00000, 0x00c00000), }; -static const struct soc15_reg_golden golden_settings_gc_10_1_2[] = -{ +static const struct soc15_reg_golden golden_settings_gc_10_1_2[] = { SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL_4, 0x003e001f, 0x003c0014), SOC15_REG_GOLDEN_VALUE(GC, 0, mmCGTT_GS_NGG_CLK_CTRL, 0xffff8fff, 0xffff8100), SOC15_REG_GOLDEN_VALUE(GC, 0, mmCGTT_IA_CLK_CTRL, 0xffff0fff, 0xffff0100), @@ -1464,13 +1464,11 @@ static const struct soc15_reg_golden golden_settings_gc_10_1_2[] = SOC15_REG_GOLDEN_VALUE(GC, 0, mmUTCL1_CTRL, 0xffffffff, 0x00c00000) }; -static const struct soc15_reg_golden golden_settings_gc_10_1_nv14[] = -{ +static const struct soc15_reg_golden golden_settings_gc_10_1_nv14[] = { /* Pending on emulation bring up */ }; -static const struct soc15_reg_golden golden_settings_gc_rlc_spm_10_1_nv14[] = -{ +static const struct soc15_reg_golden golden_settings_gc_rlc_spm_10_1_nv14[] = { SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xE0000000L, 0x0), SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0), SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_GLB_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x28), @@ -2093,13 +2091,11 @@ static const struct soc15_reg_golden golden_settings_gc_rlc_spm_10_1_nv14[] = SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xFFFFFFFF, 0xe0000000) }; -static const struct soc15_reg_golden golden_settings_gc_10_1_2_nv12[] = -{ +static const struct soc15_reg_golden golden_settings_gc_10_1_2_nv12[] = { /* Pending on emulation bring up */ }; -static const struct soc15_reg_golden golden_settings_gc_rlc_spm_10_1_2_nv12[] = -{ +static const struct soc15_reg_golden golden_settings_gc_rlc_spm_10_1_2_nv12[] = { SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xe0000000L, 0x0), SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xffffff, 0x0), SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_GLB_SAMPLEDELAY_IND_ADDR, 0xFFFFFFFF, 0x28), @@ -3154,8 +3150,7 @@ static const struct soc15_reg_golden golden_settings_gc_rlc_spm_10_1_2_nv12[] = SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_GFX_INDEX, 0xFFFFFFFF, 0xe0000000) }; -static const struct soc15_reg_golden golden_settings_gc_10_3[] = -{ +static const struct soc15_reg_golden golden_settings_gc_10_3[] = { SOC15_REG_GOLDEN_VALUE(GC, 0, mmCGTT_SPI_CS_CLK_CTRL, 0x78000000, 0x78000100), SOC15_REG_GOLDEN_VALUE(GC, 0, mmCGTT_SPI_PS_CLK_CTRL, 0xff7f0fff, 0x78000100), SOC15_REG_GOLDEN_VALUE(GC, 0, mmCGTT_SPI_RA0_CLK_CTRL, 0xff7f0fff, 0x30000100), @@ -3164,7 +3159,7 @@ static const struct soc15_reg_golden golden_settings_gc_10_3[] = SOC15_REG_GOLDEN_VALUE(GC, 0, mmDB_DEBUG3, 0xffffffff, 0x00000280), SOC15_REG_GOLDEN_VALUE(GC, 0, mmDB_DEBUG4, 0xffffffff, 0x00800000), SOC15_REG_GOLDEN_VALUE(GC, 0, mmDB_EXCEPTION_CONTROL, 0x7fff0f1f, 0x00b80000), - SOC15_REG_GOLDEN_VALUE(GC, 0 ,mmGCEA_SDP_TAG_RESERVE0, 0xffffffff, 0x10100100), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmGCEA_SDP_TAG_RESERVE0, 0xffffffff, 0x10100100), SOC15_REG_GOLDEN_VALUE(GC, 0, mmGCEA_SDP_TAG_RESERVE1, 0xffffffff, 0x17000088), SOC15_REG_GOLDEN_VALUE(GC, 0, mmGCR_GENERAL_CNTL_Sienna_Cichlid, 0x1ff1ffff, 0x00000500), SOC15_REG_GOLDEN_VALUE(GC, 0, mmGCUTCL2_CGTT_CLK_CTRL_Sienna_Cichlid, 0xff000000, 0xff008080), @@ -3201,13 +3196,11 @@ static const struct soc15_reg_golden golden_settings_gc_10_3[] = SOC15_REG_GOLDEN_VALUE(GC, 0, mmUTCL1_CTRL, 0xffbfffff, 0x00a00000) }; -static const struct soc15_reg_golden golden_settings_gc_10_3_sienna_cichlid[] = -{ +static const struct soc15_reg_golden golden_settings_gc_10_3_sienna_cichlid[] = { /* Pending on emulation bring up */ }; -static const struct soc15_reg_golden golden_settings_gc_10_3_2[] = -{ +static const struct soc15_reg_golden golden_settings_gc_10_3_2[] = { SOC15_REG_GOLDEN_VALUE(GC, 0, mmCGTT_SPI_CS_CLK_CTRL, 0xff7f0fff, 0x78000100), SOC15_REG_GOLDEN_VALUE(GC, 0, mmCGTT_SPI_PS_CLK_CTRL, 0xff7f0fff, 0x78000100), SOC15_REG_GOLDEN_VALUE(GC, 0, mmCGTT_SPI_RA0_CLK_CTRL, 0xff7f0fff, 0x30000100), @@ -3254,8 +3247,7 @@ static const struct soc15_reg_golden golden_settings_gc_10_3_2[] = SOC15_REG_GOLDEN_VALUE(GC, 0, mmLDS_CONFIG, 0x00000020, 0x00000020), }; -static const struct soc15_reg_golden golden_settings_gc_10_3_vangogh[] = -{ +static const struct soc15_reg_golden golden_settings_gc_10_3_vangogh[] = { SOC15_REG_GOLDEN_VALUE(GC, 0, mmCGTT_SPI_RA0_CLK_CTRL, 0xff7f0fff, 0x30000100), SOC15_REG_GOLDEN_VALUE(GC, 0, mmCGTT_SPI_RA1_CLK_CTRL, 0xff7f0fff, 0x7e000100), SOC15_REG_GOLDEN_VALUE(GC, 0, mmCH_PIPE_STEER, 0x000000ff, 0x000000e4), @@ -3285,8 +3277,7 @@ static const struct soc15_reg_golden golden_settings_gc_10_3_vangogh[] = SOC15_REG_GOLDEN_VALUE(GC, 0, mmLDS_CONFIG, 0x00000020, 0x00000020), }; -static const struct soc15_reg_golden golden_settings_gc_10_3_3[] = -{ +static const struct soc15_reg_golden golden_settings_gc_10_3_3[] = { SOC15_REG_GOLDEN_VALUE(GC, 0, mmCGTT_SPI_CS_CLK_CTRL, 0xff7f0fff, 0x78000100), SOC15_REG_GOLDEN_VALUE(GC, 0, mmCH_PIPE_STEER, 0x000000ff, 0x000000e4), SOC15_REG_GOLDEN_VALUE(GC, 0, mmCPF_GCR_CNTL, 0x0007ffff, 0x0000c200), @@ -3309,8 +3300,7 @@ static const struct soc15_reg_golden golden_settings_gc_10_3_3[] = SOC15_REG_GOLDEN_VALUE(GC, 0, mmUTCL1_CTRL, 0xffffffff, 0x00100000) }; -static const struct soc15_reg_golden golden_settings_gc_10_3_4[] = -{ +static const struct soc15_reg_golden golden_settings_gc_10_3_4[] = { SOC15_REG_GOLDEN_VALUE(GC, 0, mmCGTT_SPI_CS_CLK_CTRL, 0x78000000, 0x78000100), SOC15_REG_GOLDEN_VALUE(GC, 0, mmCGTT_SPI_RA0_CLK_CTRL, 0x30000000, 0x30000100), SOC15_REG_GOLDEN_VALUE(GC, 0, mmCGTT_SPI_RA1_CLK_CTRL, 0x7e000000, 0x7e000100), @@ -3380,7 +3370,7 @@ static const struct soc15_reg_golden golden_settings_gc_10_3_5[] = { SOC15_REG_GOLDEN_VALUE(GC, 0, mmSQ_PERFCOUNTER7_SELECT, 0xf0f001ff, 0x00000000), SOC15_REG_GOLDEN_VALUE(GC, 0, mmSQ_PERFCOUNTER8_SELECT, 0xf0f001ff, 0x00000000), SOC15_REG_GOLDEN_VALUE(GC, 0, mmSQ_PERFCOUNTER9_SELECT, 0xf0f001ff, 0x00000000), - SOC15_REG_GOLDEN_VALUE(GC, 0, mmTA_CNTL_AUX,0xfff7ffff, 0x01030000), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmTA_CNTL_AUX, 0xfff7ffff, 0x01030000), SOC15_REG_GOLDEN_VALUE(GC, 0, mmUTCL1_CTRL, 0xffbfffff, 0x00a00000) }; @@ -3421,8 +3411,7 @@ static const struct soc15_reg_golden golden_settings_gc_10_0_cyan_skillfish[] = SOC15_REG_GOLDEN_VALUE(GC, 0, mmUTCL1_CTRL, 0x00800000, 0x00800000) }; -static const struct soc15_reg_golden golden_settings_gc_10_3_6[] = -{ +static const struct soc15_reg_golden golden_settings_gc_10_3_6[] = { SOC15_REG_GOLDEN_VALUE(GC, 0, mmCGTT_SPI_CS_CLK_CTRL, 0xff7f0fff, 0x78000100), SOC15_REG_GOLDEN_VALUE(GC, 0, mmCH_PIPE_STEER, 0x000000ff, 0x00000044), SOC15_REG_GOLDEN_VALUE(GC, 0, mmCPF_GCR_CNTL, 0x0007ffff, 0x0000c200), @@ -3506,7 +3495,11 @@ static void gfx_v10_3_set_power_brake_sequence(struct amdgpu_device *adev); static void gfx_v10_0_ring_invalidate_tlbs(struct amdgpu_ring *ring, uint16_t pasid, uint32_t flush_type, bool all_hub, uint8_t dst_sel); +static void gfx_v10_0_update_spm_vmid_internal(struct amdgpu_device *adev, + unsigned int vmid); +static int gfx_v10_0_set_powergating_state(void *handle, + enum amd_powergating_state state); static void gfx10_kiq_set_resources(struct amdgpu_ring *kiq_ring, uint64_t queue_mask) { amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_SET_RESOURCES, 6)); @@ -3641,7 +3634,7 @@ static void gfx_v10_0_set_kiq_pm4_funcs(struct amdgpu_device *adev) static void gfx_v10_0_init_spm_golden_registers(struct amdgpu_device *adev) { - switch (adev->ip_versions[GC_HWIP][0]) { + switch (amdgpu_ip_version(adev, GC_HWIP, 0)) { case IP_VERSION(10, 1, 10): soc15_program_register_sequence(adev, golden_settings_gc_rlc_spm_10_0_nv10, @@ -3664,7 +3657,7 @@ static void gfx_v10_0_init_spm_golden_registers(struct amdgpu_device *adev) static void gfx_v10_0_init_golden_registers(struct amdgpu_device *adev) { - switch (adev->ip_versions[GC_HWIP][0]) { + switch (amdgpu_ip_version(adev, GC_HWIP, 0)) { case IP_VERSION(10, 1, 10): soc15_program_register_sequence(adev, golden_settings_gc_10_1, @@ -3714,8 +3707,8 @@ static void gfx_v10_0_init_golden_registers(struct amdgpu_device *adev) break; case IP_VERSION(10, 3, 4): soc15_program_register_sequence(adev, - golden_settings_gc_10_3_4, - (const u32)ARRAY_SIZE(golden_settings_gc_10_3_4)); + golden_settings_gc_10_3_4, + (const u32)ARRAY_SIZE(golden_settings_gc_10_3_4)); break; case IP_VERSION(10, 3, 5): soc15_program_register_sequence(adev, @@ -3782,7 +3775,7 @@ static int gfx_v10_0_ring_test_ring(struct amdgpu_ring *ring) struct amdgpu_device *adev = ring->adev; uint32_t scratch = SOC15_REG_OFFSET(GC, 0, mmSCRATCH_REG0); uint32_t tmp = 0; - unsigned i; + unsigned int i; int r; WREG32(scratch, 0xCAFEDEAD); @@ -3820,7 +3813,7 @@ static int gfx_v10_0_ring_test_ib(struct amdgpu_ring *ring, long timeout) struct amdgpu_device *adev = ring->adev; struct amdgpu_ib ib; struct dma_fence *f = NULL; - unsigned index; + unsigned int index; uint64_t gpu_addr; volatile uint32_t *cpu_ptr; long r; @@ -3905,7 +3898,7 @@ static void gfx_v10_0_check_fw_write_wait(struct amdgpu_device *adev) { adev->gfx.cp_fw_write_wait = false; - switch (adev->ip_versions[GC_HWIP][0]) { + switch (amdgpu_ip_version(adev, GC_HWIP, 0)) { case IP_VERSION(10, 1, 10): case IP_VERSION(10, 1, 2): case IP_VERSION(10, 1, 1): @@ -3951,12 +3944,12 @@ static bool gfx_v10_0_navi10_gfxoff_should_enable(struct amdgpu_device *adev) break; } - return ret ; + return ret; } static void gfx_v10_0_check_gfxoff_flag(struct amdgpu_device *adev) { - switch (adev->ip_versions[GC_HWIP][0]) { + switch (amdgpu_ip_version(adev, GC_HWIP, 0)) { case IP_VERSION(10, 1, 10): if (!gfx_v10_0_navi10_gfxoff_should_enable(adev)) adev->pm.pp_feature &= ~PP_GFXOFF_MASK; @@ -3978,8 +3971,8 @@ static int gfx_v10_0_init_microcode(struct amdgpu_device *adev) DRM_DEBUG("\n"); - if (adev->ip_versions[GC_HWIP][0] == IP_VERSION(10, 1, 1) && - (!(adev->pdev->device == 0x7340 && adev->pdev->revision != 0x00))) + if (amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(10, 1, 1) && + (!(adev->pdev->device == 0x7340 && adev->pdev->revision != 0x00))) wks = "_wks"; amdgpu_ucode_ip_version_decode(adev, GC_HWIP, ucode_prefix, sizeof(ucode_prefix)); @@ -4151,22 +4144,22 @@ static void gfx_v10_0_init_rlcg_reg_access_ctrl(struct amdgpu_device *adev) { struct amdgpu_rlcg_reg_access_ctrl *reg_access_ctrl; - reg_access_ctrl = &adev->gfx.rlc.reg_access_ctrl; + reg_access_ctrl = &adev->gfx.rlc.reg_access_ctrl[0]; reg_access_ctrl->scratch_reg0 = SOC15_REG_OFFSET(GC, 0, mmSCRATCH_REG0); reg_access_ctrl->scratch_reg1 = SOC15_REG_OFFSET(GC, 0, mmSCRATCH_REG1); reg_access_ctrl->scratch_reg2 = SOC15_REG_OFFSET(GC, 0, mmSCRATCH_REG2); reg_access_ctrl->scratch_reg3 = SOC15_REG_OFFSET(GC, 0, mmSCRATCH_REG3); reg_access_ctrl->grbm_cntl = SOC15_REG_OFFSET(GC, 0, mmGRBM_GFX_CNTL); reg_access_ctrl->grbm_idx = SOC15_REG_OFFSET(GC, 0, mmGRBM_GFX_INDEX); - switch (adev->ip_versions[GC_HWIP][0]) { - case IP_VERSION(10, 3, 0): - reg_access_ctrl->spare_int = - SOC15_REG_OFFSET(GC, 0, mmRLC_SPARE_INT_0_Sienna_Cichlid); - break; - default: - reg_access_ctrl->spare_int = - SOC15_REG_OFFSET(GC, 0, mmRLC_SPARE_INT); - break; + switch (amdgpu_ip_version(adev, GC_HWIP, 0)) { + case IP_VERSION(10, 3, 0): + reg_access_ctrl->spare_int = + SOC15_REG_OFFSET(GC, 0, mmRLC_SPARE_INT_0_Sienna_Cichlid); + break; + default: + reg_access_ctrl->spare_int = + SOC15_REG_OFFSET(GC, 0, mmRLC_SPARE_INT); + break; } adev->gfx.rlc.rlcg_reg_access_supported = true; } @@ -4187,11 +4180,6 @@ static int gfx_v10_0_rlc_init(struct amdgpu_device *adev) return r; } - /* init spm vmid with 0xf */ - if (adev->gfx.rlc.funcs->update_spm_vmid) - adev->gfx.rlc.funcs->update_spm_vmid(adev, 0xf); - - return 0; } @@ -4213,7 +4201,7 @@ static int gfx_v10_0_mec_init(struct amdgpu_device *adev) int r; u32 *hpd; const __le32 *fw_data = NULL; - unsigned fw_size; + unsigned int fw_size; u32 *fw = NULL; size_t mec_hpd_size; @@ -4295,7 +4283,8 @@ static void gfx_v10_0_read_wave_data(struct amdgpu_device *adev, uint32_t xcc_id { /* in gfx10 the SIMD_ID is specified as part of the INSTANCE * field when performing a select_se_sh so it should be - * zero here */ + * zero here + */ WARN_ON(simd != 0); /* type 2 wave data */ @@ -4376,7 +4365,7 @@ static void gfx_v10_0_gpu_early_init(struct amdgpu_device *adev) { u32 gb_addr_config; - switch (adev->ip_versions[GC_HWIP][0]) { + switch (amdgpu_ip_version(adev, GC_HWIP, 0)) { case IP_VERSION(10, 1, 10): case IP_VERSION(10, 1, 1): case IP_VERSION(10, 1, 2): @@ -4474,7 +4463,7 @@ static int gfx_v10_0_gfx_ring_init(struct amdgpu_device *adev, int ring_id, static int gfx_v10_0_compute_ring_init(struct amdgpu_device *adev, int ring_id, int mec, int pipe, int queue) { - unsigned irq_type; + unsigned int irq_type; struct amdgpu_ring *ring; unsigned int hw_prio; @@ -4509,7 +4498,7 @@ static int gfx_v10_0_sw_init(void *handle) struct amdgpu_kiq *kiq; struct amdgpu_device *adev = (struct amdgpu_device *)handle; - switch (adev->ip_versions[GC_HWIP][0]) { + switch (amdgpu_ip_version(adev, GC_HWIP, 0)) { case IP_VERSION(10, 1, 10): case IP_VERSION(10, 1, 1): case IP_VERSION(10, 1, 2): @@ -4767,9 +4756,12 @@ static void gfx_v10_0_setup_rb(struct amdgpu_device *adev) for (i = 0; i < adev->gfx.config.max_shader_engines; i++) { for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) { bitmap = i * adev->gfx.config.max_sh_per_se + j; - if (((adev->ip_versions[GC_HWIP][0] == IP_VERSION(10, 3, 0)) || - (adev->ip_versions[GC_HWIP][0] == IP_VERSION(10, 3, 3)) || - (adev->ip_versions[GC_HWIP][0] == IP_VERSION(10, 3, 6))) && + if (((amdgpu_ip_version(adev, GC_HWIP, 0) == + IP_VERSION(10, 3, 0)) || + (amdgpu_ip_version(adev, GC_HWIP, 0) == + IP_VERSION(10, 3, 3)) || + (amdgpu_ip_version(adev, GC_HWIP, 0) == + IP_VERSION(10, 3, 6))) && ((gfx_v10_3_get_disabled_sa(adev) >> bitmap) & 1)) continue; gfx_v10_0_select_se_sh(adev, i, j, 0xffffffff, 0); @@ -4795,8 +4787,9 @@ static u32 gfx_v10_0_init_pa_sc_tile_steering_override(struct amdgpu_device *ade uint32_t pa_sc_tile_steering_override; /* for ASICs that integrates GFX v10.3 - * pa_sc_tile_steering_override should be set to 0 */ - if (adev->ip_versions[GC_HWIP][0] >= IP_VERSION(10, 3, 0)) + * pa_sc_tile_steering_override should be set to 0 + */ + if (amdgpu_ip_version(adev, GC_HWIP, 0) >= IP_VERSION(10, 3, 0)) return 0; /* init num_sc */ @@ -4871,8 +4864,10 @@ static void gfx_v10_0_init_compute_vmid(struct amdgpu_device *adev) nv_grbm_select(adev, 0, 0, 0, 0); mutex_unlock(&adev->srbm_mutex); - /* Initialize all compute VMIDs to have no GDS, GWS, or OA - access. These should be enabled by FW for target VMIDs. */ + /* + * Initialize all compute VMIDs to have no GDS, GWS, or OA + * access. These should be enabled by FW for target VMIDs. + */ for (i = adev->vm_manager.first_kfd_vmid; i < AMDGPU_NUM_VMID; i++) { WREG32_SOC15_OFFSET(GC, 0, mmGDS_VMID0_BASE, 2 * i, 0); WREG32_SOC15_OFFSET(GC, 0, mmGDS_VMID0_SIZE, 2 * i, 0); @@ -4975,7 +4970,7 @@ static void gfx_v10_0_get_tcc_info(struct amdgpu_device *adev) /* TCCs are global (not instanced). */ uint32_t tcc_disable; - if (adev->ip_versions[GC_HWIP][0] >= IP_VERSION(10, 3, 0)) { + if (amdgpu_ip_version(adev, GC_HWIP, 0) >= IP_VERSION(10, 3, 0)) { tcc_disable = RREG32_SOC15(GC, 0, mmCGTS_TCC_DISABLE_gc_10_3) | RREG32_SOC15(GC, 0, mmCGTS_USER_TCC_DISABLE_gc_10_3); } else { @@ -5052,7 +5047,7 @@ static int gfx_v10_0_init_csb(struct amdgpu_device *adev) adev->gfx.rlc.funcs->get_csb_buffer(adev, adev->gfx.rlc.cs_ptr); /* csib */ - if (adev->ip_versions[GC_HWIP][0] == IP_VERSION(10, 1, 2)) { + if (amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(10, 1, 2)) { WREG32_SOC15_RLC(GC, 0, mmRLC_CSIB_ADDR_HI, adev->gfx.rlc.clear_state_gpu_addr >> 32); WREG32_SOC15_RLC(GC, 0, mmRLC_CSIB_ADDR_LO, @@ -5108,8 +5103,10 @@ static void gfx_v10_0_rlc_smu_handshake_cntl(struct amdgpu_device *adev, static void gfx_v10_0_rlc_start(struct amdgpu_device *adev) { - /* TODO: enable rlc & smu handshake until smu - * and gfxoff feature works as expected */ + /* + * TODO: enable rlc & smu handshake until smu + * and gfxoff feature works as expected + */ if (!(amdgpu_pp_feature_mask & PP_GFXOFF_MASK)) gfx_v10_0_rlc_smu_handshake_cntl(adev, false); @@ -5132,7 +5129,7 @@ static int gfx_v10_0_rlc_load_microcode(struct amdgpu_device *adev) { const struct rlc_firmware_header_v2_0 *hdr; const __le32 *fw_data; - unsigned i, fw_size; + unsigned int i, fw_size; if (!adev->gfx.rlc_fw) return -EINVAL; @@ -5169,6 +5166,8 @@ static int gfx_v10_0_rlc_resume(struct amdgpu_device *adev) gfx_v10_0_init_csb(adev); + gfx_v10_0_update_spm_vmid_internal(adev, 0xf); + if (!amdgpu_sriov_vf(adev)) /* enable RLC SRM */ gfx_v10_0_rlc_enable_srm(adev); } else { @@ -5199,6 +5198,8 @@ static int gfx_v10_0_rlc_resume(struct amdgpu_device *adev) gfx_v10_0_init_csb(adev); + gfx_v10_0_update_spm_vmid_internal(adev, 0xf); + adev->gfx.rlc.funcs->start(adev); if (adev->firmware.load_type == AMDGPU_FW_LOAD_RLC_BACKDOOR_AUTO) { @@ -5207,6 +5208,7 @@ static int gfx_v10_0_rlc_resume(struct amdgpu_device *adev) return r; } } + return 0; } @@ -5674,11 +5676,10 @@ static int gfx_v10_0_cp_gfx_enable(struct amdgpu_device *adev, bool enable) tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, PFP_HALT, enable ? 0 : 1); tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, CE_HALT, enable ? 0 : 1); - if (adev->ip_versions[GC_HWIP][0] == IP_VERSION(10, 1, 2)) { + if (amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(10, 1, 2)) WREG32_SOC15_RLC(GC, 0, mmCP_ME_CNTL, tmp); - } else { + else WREG32_SOC15(GC, 0, mmCP_ME_CNTL, tmp); - } if (adev->job_hang && !enable) return 0; @@ -5700,7 +5701,7 @@ static int gfx_v10_0_cp_gfx_load_pfp_microcode(struct amdgpu_device *adev) int r; const struct gfx_firmware_header_v1_0 *pfp_hdr; const __le32 *fw_data; - unsigned i, fw_size; + unsigned int i, fw_size; uint32_t tmp; uint32_t usec_timeout = 50000; /* wait for 50ms */ @@ -5778,7 +5779,7 @@ static int gfx_v10_0_cp_gfx_load_ce_microcode(struct amdgpu_device *adev) int r; const struct gfx_firmware_header_v1_0 *ce_hdr; const __le32 *fw_data; - unsigned i, fw_size; + unsigned int i, fw_size; uint32_t tmp; uint32_t usec_timeout = 50000; /* wait for 50ms */ @@ -5855,7 +5856,7 @@ static int gfx_v10_0_cp_gfx_load_me_microcode(struct amdgpu_device *adev) int r; const struct gfx_firmware_header_v1_0 *me_hdr; const __le32 *fw_data; - unsigned i, fw_size; + unsigned int i, fw_size; uint32_t tmp; uint32_t usec_timeout = 50000; /* wait for 50ms */ @@ -6066,7 +6067,7 @@ static void gfx_v10_0_cp_gfx_set_doorbell(struct amdgpu_device *adev, } WREG32_SOC15(GC, 0, mmCP_RB_DOORBELL_CONTROL, tmp); } - switch (adev->ip_versions[GC_HWIP][0]) { + switch (amdgpu_ip_version(adev, GC_HWIP, 0)) { case IP_VERSION(10, 3, 0): case IP_VERSION(10, 3, 2): case IP_VERSION(10, 3, 1): @@ -6199,7 +6200,7 @@ static int gfx_v10_0_cp_gfx_resume(struct amdgpu_device *adev) static void gfx_v10_0_cp_compute_enable(struct amdgpu_device *adev, bool enable) { if (enable) { - switch (adev->ip_versions[GC_HWIP][0]) { + switch (amdgpu_ip_version(adev, GC_HWIP, 0)) { case IP_VERSION(10, 3, 0): case IP_VERSION(10, 3, 2): case IP_VERSION(10, 3, 1): @@ -6215,7 +6216,7 @@ static void gfx_v10_0_cp_compute_enable(struct amdgpu_device *adev, bool enable) break; } } else { - switch (adev->ip_versions[GC_HWIP][0]) { + switch (amdgpu_ip_version(adev, GC_HWIP, 0)) { case IP_VERSION(10, 3, 0): case IP_VERSION(10, 3, 2): case IP_VERSION(10, 3, 1): @@ -6243,7 +6244,7 @@ static int gfx_v10_0_cp_compute_load_microcode(struct amdgpu_device *adev) { const struct gfx_firmware_header_v1_0 *mec_hdr; const __le32 *fw_data; - unsigned i; + unsigned int i; u32 tmp; u32 usec_timeout = 50000; /* Wait for 50 ms */ @@ -6315,7 +6316,7 @@ static void gfx_v10_0_kiq_setting(struct amdgpu_ring *ring) struct amdgpu_device *adev = ring->adev; /* tell RLC which is KIQ queue */ - switch (adev->ip_versions[GC_HWIP][0]) { + switch (amdgpu_ip_version(adev, GC_HWIP, 0)) { case IP_VERSION(10, 3, 0): case IP_VERSION(10, 3, 2): case IP_VERSION(10, 3, 1): @@ -6466,11 +6467,18 @@ static int gfx_v10_0_gfx_init_queue(struct amdgpu_ring *ring) nv_grbm_select(adev, 0, 0, 0, 0); mutex_unlock(&adev->srbm_mutex); if (adev->gfx.me.mqd_backup[mqd_idx]) - memcpy(adev->gfx.me.mqd_backup[mqd_idx], mqd, sizeof(*mqd)); + memcpy_fromio(adev->gfx.me.mqd_backup[mqd_idx], mqd, sizeof(*mqd)); } else { + mutex_lock(&adev->srbm_mutex); + nv_grbm_select(adev, ring->me, ring->pipe, ring->queue, 0); + if (ring->doorbell_index == adev->doorbell_index.gfx_ring0 << 1) + gfx_v10_0_cp_gfx_set_doorbell(adev, ring); + + nv_grbm_select(adev, 0, 0, 0, 0); + mutex_unlock(&adev->srbm_mutex); /* restore mqd with the backup copy */ if (adev->gfx.me.mqd_backup[mqd_idx]) - memcpy(mqd, adev->gfx.me.mqd_backup[mqd_idx], sizeof(*mqd)); + memcpy_toio(mqd, adev->gfx.me.mqd_backup[mqd_idx], sizeof(*mqd)); /* reset the ring */ ring->wptr = 0; *ring->wptr_cpu_addr = 0; @@ -6744,7 +6752,7 @@ static int gfx_v10_0_kiq_init_queue(struct amdgpu_ring *ring) if (amdgpu_in_reset(adev)) { /* for GPU_RESET case */ /* reset MQD to a clean status */ if (adev->gfx.kiq[0].mqd_backup) - memcpy(mqd, adev->gfx.kiq[0].mqd_backup, sizeof(*mqd)); + memcpy_toio(mqd, adev->gfx.kiq[0].mqd_backup, sizeof(*mqd)); /* reset ring buffer */ ring->wptr = 0; @@ -6767,7 +6775,7 @@ static int gfx_v10_0_kiq_init_queue(struct amdgpu_ring *ring) mutex_unlock(&adev->srbm_mutex); if (adev->gfx.kiq[0].mqd_backup) - memcpy(adev->gfx.kiq[0].mqd_backup, mqd, sizeof(*mqd)); + memcpy_fromio(adev->gfx.kiq[0].mqd_backup, mqd, sizeof(*mqd)); } return 0; @@ -6788,11 +6796,11 @@ static int gfx_v10_0_kcq_init_queue(struct amdgpu_ring *ring) mutex_unlock(&adev->srbm_mutex); if (adev->gfx.mec.mqd_backup[mqd_idx]) - memcpy(adev->gfx.mec.mqd_backup[mqd_idx], mqd, sizeof(*mqd)); + memcpy_fromio(adev->gfx.mec.mqd_backup[mqd_idx], mqd, sizeof(*mqd)); } else { /* restore MQD to a clean status */ if (adev->gfx.mec.mqd_backup[mqd_idx]) - memcpy(mqd, adev->gfx.mec.mqd_backup[mqd_idx], sizeof(*mqd)); + memcpy_toio(mqd, adev->gfx.mec.mqd_backup[mqd_idx], sizeof(*mqd)); /* reset ring buffer */ ring->wptr = 0; atomic64_set((atomic64_t *)ring->wptr_cpu_addr, 0); @@ -6922,9 +6930,11 @@ static bool gfx_v10_0_check_grbm_cam_remapping(struct amdgpu_device *adev) { uint32_t data, pattern = 0xDEADBEEF; - /* check if mmVGT_ESGS_RING_SIZE_UMD - * has been remapped to mmVGT_ESGS_RING_SIZE */ - switch (adev->ip_versions[GC_HWIP][0]) { + /* + * check if mmVGT_ESGS_RING_SIZE_UMD + * has been remapped to mmVGT_ESGS_RING_SIZE + */ + switch (amdgpu_ip_version(adev, GC_HWIP, 0)) { case IP_VERSION(10, 3, 0): case IP_VERSION(10, 3, 2): case IP_VERSION(10, 3, 4): @@ -6934,12 +6944,10 @@ static bool gfx_v10_0_check_grbm_cam_remapping(struct amdgpu_device *adev) WREG32_SOC15(GC, 0, mmVGT_ESGS_RING_SIZE_UMD, pattern); if (RREG32_SOC15(GC, 0, mmVGT_ESGS_RING_SIZE_Sienna_Cichlid) == pattern) { - WREG32_SOC15(GC, 0, mmVGT_ESGS_RING_SIZE_UMD , data); + WREG32_SOC15(GC, 0, mmVGT_ESGS_RING_SIZE_UMD, data); return true; - } else { - WREG32_SOC15(GC, 0, mmVGT_ESGS_RING_SIZE_Sienna_Cichlid, data); - return false; } + WREG32_SOC15(GC, 0, mmVGT_ESGS_RING_SIZE_Sienna_Cichlid, data); break; case IP_VERSION(10, 3, 1): case IP_VERSION(10, 3, 3): @@ -6954,12 +6962,12 @@ static bool gfx_v10_0_check_grbm_cam_remapping(struct amdgpu_device *adev) if (RREG32_SOC15(GC, 0, mmVGT_ESGS_RING_SIZE) == pattern) { WREG32_SOC15(GC, 0, mmVGT_ESGS_RING_SIZE_UMD, data); return true; - } else { - WREG32_SOC15(GC, 0, mmVGT_ESGS_RING_SIZE, data); - return false; } + WREG32_SOC15(GC, 0, mmVGT_ESGS_RING_SIZE, data); break; } + + return false; } static void gfx_v10_0_setup_grbm_cam_remapping(struct amdgpu_device *adev) @@ -6969,11 +6977,13 @@ static void gfx_v10_0_setup_grbm_cam_remapping(struct amdgpu_device *adev) if (amdgpu_sriov_vf(adev)) return; - /* initialize cam_index to 0 - * index will auto-inc after each data writting */ + /* + * Initialize cam_index to 0 + * index will auto-inc after each data writing + */ WREG32_SOC15(GC, 0, mmGRBM_CAM_INDEX, 0); - switch (adev->ip_versions[GC_HWIP][0]) { + switch (amdgpu_ip_version(adev, GC_HWIP, 0)) { case IP_VERSION(10, 3, 0): case IP_VERSION(10, 3, 2): case IP_VERSION(10, 3, 1): @@ -7100,6 +7110,7 @@ static void gfx_v10_0_setup_grbm_cam_remapping(struct amdgpu_device *adev) static void gfx_v10_0_disable_gpa_mode(struct amdgpu_device *adev) { uint32_t data; + data = RREG32_SOC15(GC, 0, mmCPC_PSP_DEBUG); data |= CPC_PSP_DEBUG__GPA_OVERRIDE_MASK; WREG32_SOC15(GC, 0, mmCPC_PSP_DEBUG, data); @@ -7145,19 +7156,19 @@ static int gfx_v10_0_hw_init(void *handle) * init golden registers and rlc resume may override some registers, * reconfig them here */ - if (adev->ip_versions[GC_HWIP][0] == IP_VERSION(10, 1, 10) || - adev->ip_versions[GC_HWIP][0] == IP_VERSION(10, 1, 1) || - adev->ip_versions[GC_HWIP][0] == IP_VERSION(10, 1, 2)) + if (amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(10, 1, 10) || + amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(10, 1, 1) || + amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(10, 1, 2)) gfx_v10_0_tcp_harvest(adev); r = gfx_v10_0_cp_resume(adev); if (r) return r; - if (adev->ip_versions[GC_HWIP][0] == IP_VERSION(10, 3, 0)) + if (amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(10, 3, 0)) gfx_v10_3_program_pbb_mode(adev); - if (adev->ip_versions[GC_HWIP][0] >= IP_VERSION(10, 3, 0)) + if (amdgpu_ip_version(adev, GC_HWIP, 0) >= IP_VERSION(10, 3, 0)) gfx_v10_3_set_power_brake_sequence(adev); return r; @@ -7170,6 +7181,13 @@ static int gfx_v10_0_hw_fini(void *handle) amdgpu_irq_put(adev, &adev->gfx.priv_reg_irq, 0); amdgpu_irq_put(adev, &adev->gfx.priv_inst_irq, 0); + /* WA added for Vangogh asic fixing the SMU suspend failure + * It needs to set power gating again during gfxoff control + * otherwise the gfxoff disallowing will be failed to set. + */ + if (amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(10, 3, 1)) + gfx_v10_0_set_powergating_state(handle, AMD_PG_STATE_UNGATE); + if (!adev->no_hw_access) { if (amdgpu_async_gfx_ring) { if (amdgpu_gfx_disable_kgq(adev, 0)) @@ -7216,7 +7234,7 @@ static bool gfx_v10_0_is_idle(void *handle) static int gfx_v10_0_wait_for_idle(void *handle) { - unsigned i; + unsigned int i; u32 tmp; struct amdgpu_device *adev = (struct amdgpu_device *)handle; @@ -7261,7 +7279,7 @@ static int gfx_v10_0_soft_reset(void *handle) /* GRBM_STATUS2 */ tmp = RREG32_SOC15(GC, 0, mmGRBM_STATUS2); - switch (adev->ip_versions[GC_HWIP][0]) { + switch (amdgpu_ip_version(adev, GC_HWIP, 0)) { case IP_VERSION(10, 3, 0): case IP_VERSION(10, 3, 2): case IP_VERSION(10, 3, 1): @@ -7318,7 +7336,23 @@ static uint64_t gfx_v10_0_get_gpu_clock_counter(struct amdgpu_device *adev) { uint64_t clock, clock_lo, clock_hi, hi_check; - switch (adev->ip_versions[GC_HWIP][0]) { + switch (amdgpu_ip_version(adev, GC_HWIP, 0)) { + case IP_VERSION(10, 1, 3): + case IP_VERSION(10, 1, 4): + preempt_disable(); + clock_hi = RREG32_SOC15_NO_KIQ(SMUIO, 0, mmGOLDEN_TSC_COUNT_UPPER_Cyan_Skillfish); + clock_lo = RREG32_SOC15_NO_KIQ(SMUIO, 0, mmGOLDEN_TSC_COUNT_LOWER_Cyan_Skillfish); + hi_check = RREG32_SOC15_NO_KIQ(SMUIO, 0, mmGOLDEN_TSC_COUNT_UPPER_Cyan_Skillfish); + /* The SMUIO TSC clock frequency is 100MHz, which sets 32-bit carry over + * roughly every 42 seconds. + */ + if (hi_check != clock_hi) { + clock_lo = RREG32_SOC15_NO_KIQ(SMUIO, 0, mmGOLDEN_TSC_COUNT_LOWER_Cyan_Skillfish); + clock_hi = hi_check; + } + preempt_enable(); + clock = clock_lo | (clock_hi << 32ULL); + break; case IP_VERSION(10, 3, 1): case IP_VERSION(10, 3, 3): case IP_VERSION(10, 3, 7): @@ -7405,7 +7439,7 @@ static int gfx_v10_0_early_init(void *handle) adev->gfx.funcs = &gfx_v10_0_gfx_funcs; - switch (adev->ip_versions[GC_HWIP][0]) { + switch (amdgpu_ip_version(adev, GC_HWIP, 0)) { case IP_VERSION(10, 1, 10): case IP_VERSION(10, 1, 1): case IP_VERSION(10, 1, 2): @@ -7471,12 +7505,12 @@ static bool gfx_v10_0_is_rlc_enabled(struct amdgpu_device *adev) static void gfx_v10_0_set_safe_mode(struct amdgpu_device *adev, int xcc_id) { uint32_t data; - unsigned i; + unsigned int i; data = RLC_SAFE_MODE__CMD_MASK; data |= (1 << RLC_SAFE_MODE__MESSAGE__SHIFT); - switch (adev->ip_versions[GC_HWIP][0]) { + switch (amdgpu_ip_version(adev, GC_HWIP, 0)) { case IP_VERSION(10, 3, 0): case IP_VERSION(10, 3, 2): case IP_VERSION(10, 3, 1): @@ -7514,7 +7548,7 @@ static void gfx_v10_0_unset_safe_mode(struct amdgpu_device *adev, int xcc_id) uint32_t data; data = RLC_SAFE_MODE__CMD_MASK; - switch (adev->ip_versions[GC_HWIP][0]) { + switch (amdgpu_ip_version(adev, GC_HWIP, 0)) { case IP_VERSION(10, 3, 0): case IP_VERSION(10, 3, 2): case IP_VERSION(10, 3, 1): @@ -7825,7 +7859,7 @@ static void gfx_v10_0_apply_medium_grain_clock_gating_workaround(struct amdgpu_d mmCGTS_SA1_QUAD1_SM_CTRL_REG }; - if (adev->ip_versions[GC_HWIP][0] == IP_VERSION(10, 1, 2)) { + if (amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(10, 1, 2)) { for (i = 0; i < ARRAY_SIZE(tcp_ctrl_regs_nv12); i++) { reg_idx = adev->reg_offset[GC_HWIP][0][mmCGTS_SA0_WGP00_CU0_TCP_CTRL_REG_BASE_IDX] + tcp_ctrl_regs_nv12[i]; @@ -7870,9 +7904,12 @@ static int gfx_v10_0_update_gfx_clock_gating(struct amdgpu_device *adev, /* === CGCG + CGLS === */ gfx_v10_0_update_coarse_grain_clock_gating(adev, enable); - if ((adev->ip_versions[GC_HWIP][0] == IP_VERSION(10, 1, 10)) || - (adev->ip_versions[GC_HWIP][0] == IP_VERSION(10, 1, 1)) || - (adev->ip_versions[GC_HWIP][0] == IP_VERSION(10, 1, 2))) + if ((amdgpu_ip_version(adev, GC_HWIP, 0) == + IP_VERSION(10, 1, 10)) || + (amdgpu_ip_version(adev, GC_HWIP, 0) == + IP_VERSION(10, 1, 1)) || + (amdgpu_ip_version(adev, GC_HWIP, 0) == + IP_VERSION(10, 1, 2))) gfx_v10_0_apply_medium_grain_clock_gating_workaround(adev); } else { /* CGCG/CGLS should be disabled before MGCG/MGLS @@ -7900,26 +7937,25 @@ static int gfx_v10_0_update_gfx_clock_gating(struct amdgpu_device *adev, return 0; } -static void gfx_v10_0_update_spm_vmid(struct amdgpu_device *adev, unsigned vmid) +static void gfx_v10_0_update_spm_vmid_internal(struct amdgpu_device *adev, + unsigned int vmid) { - u32 reg, data; - - amdgpu_gfx_off_ctrl(adev, false); + u32 data; /* not for *_SOC15 */ - reg = SOC15_REG_OFFSET(GC, 0, mmRLC_SPM_MC_CNTL); - if (amdgpu_sriov_is_pp_one_vf(adev)) - data = RREG32_NO_KIQ(reg); - else - data = RREG32_SOC15(GC, 0, mmRLC_SPM_MC_CNTL); + data = RREG32_SOC15_NO_KIQ(GC, 0, mmRLC_SPM_MC_CNTL); data &= ~RLC_SPM_MC_CNTL__RLC_SPM_VMID_MASK; data |= (vmid & RLC_SPM_MC_CNTL__RLC_SPM_VMID_MASK) << RLC_SPM_MC_CNTL__RLC_SPM_VMID__SHIFT; - if (amdgpu_sriov_is_pp_one_vf(adev)) - WREG32_SOC15_NO_KIQ(GC, 0, mmRLC_SPM_MC_CNTL, data); - else - WREG32_SOC15(GC, 0, mmRLC_SPM_MC_CNTL, data); + WREG32_SOC15_NO_KIQ(GC, 0, mmRLC_SPM_MC_CNTL, data); +} + +static void gfx_v10_0_update_spm_vmid(struct amdgpu_device *adev, unsigned int vmid) +{ + amdgpu_gfx_off_ctrl(adev, false); + + gfx_v10_0_update_spm_vmid_internal(adev, vmid); amdgpu_gfx_off_ctrl(adev, true); } @@ -7973,7 +8009,7 @@ static void gfx_v10_cntl_power_gating(struct amdgpu_device *adev, bool enable) * Power/performance team will optimize it and might give a new value later. */ if (enable && (adev->pg_flags & AMD_PG_SUPPORT_GFX_PG)) { - switch (adev->ip_versions[GC_HWIP][0]) { + switch (amdgpu_ip_version(adev, GC_HWIP, 0)) { case IP_VERSION(10, 3, 1): case IP_VERSION(10, 3, 3): case IP_VERSION(10, 3, 6): @@ -8034,7 +8070,7 @@ static int gfx_v10_0_set_powergating_state(void *handle, if (amdgpu_sriov_vf(adev)) return 0; - switch (adev->ip_versions[GC_HWIP][0]) { + switch (amdgpu_ip_version(adev, GC_HWIP, 0)) { case IP_VERSION(10, 1, 10): case IP_VERSION(10, 1, 1): case IP_VERSION(10, 1, 2): @@ -8071,7 +8107,7 @@ static int gfx_v10_0_set_clockgating_state(void *handle, if (amdgpu_sriov_vf(adev)) return 0; - switch (adev->ip_versions[GC_HWIP][0]) { + switch (amdgpu_ip_version(adev, GC_HWIP, 0)) { case IP_VERSION(10, 1, 10): case IP_VERSION(10, 1, 1): case IP_VERSION(10, 1, 2): @@ -8297,7 +8333,7 @@ static void gfx_v10_0_ring_emit_ib_gfx(struct amdgpu_ring *ring, struct amdgpu_ib *ib, uint32_t flags) { - unsigned vmid = AMDGPU_JOB_GET_VMID(job); + unsigned int vmid = AMDGPU_JOB_GET_VMID(job); u32 header, control = 0; if (ib->flags & AMDGPU_IB_FLAG_CE) @@ -8338,7 +8374,7 @@ static void gfx_v10_0_ring_emit_ib_compute(struct amdgpu_ring *ring, struct amdgpu_ib *ib, uint32_t flags) { - unsigned vmid = AMDGPU_JOB_GET_VMID(job); + unsigned int vmid = AMDGPU_JOB_GET_VMID(job); u32 control = INDIRECT_BUFFER_VALID | ib->length_dw | (vmid << 24); if (ring->is_mes_queue) @@ -8373,7 +8409,7 @@ static void gfx_v10_0_ring_emit_ib_compute(struct amdgpu_ring *ring, } static void gfx_v10_0_ring_emit_fence(struct amdgpu_ring *ring, u64 addr, - u64 seq, unsigned flags) + u64 seq, unsigned int flags) { bool write64bit = flags & AMDGPU_FENCE_FLAG_64BIT; bool int_sel = flags & AMDGPU_FENCE_FLAG_INT; @@ -8429,7 +8465,7 @@ static void gfx_v10_0_ring_invalidate_tlbs(struct amdgpu_ring *ring, } static void gfx_v10_0_ring_emit_vm_flush(struct amdgpu_ring *ring, - unsigned vmid, uint64_t pd_addr) + unsigned int vmid, uint64_t pd_addr) { if (ring->is_mes_queue) gfx_v10_0_ring_invalidate_tlbs(ring, 0, 0, false, 0); @@ -8511,9 +8547,9 @@ static void gfx_v10_0_ring_emit_cntxcntl(struct amdgpu_ring *ring, amdgpu_ring_write(ring, 0); } -static unsigned gfx_v10_0_ring_emit_init_cond_exec(struct amdgpu_ring *ring) +static unsigned int gfx_v10_0_ring_emit_init_cond_exec(struct amdgpu_ring *ring) { - unsigned ret; + unsigned int ret; amdgpu_ring_write(ring, PACKET3(PACKET3_COND_EXEC, 3)); amdgpu_ring_write(ring, lower_32_bits(ring->cond_exe_gpu_addr)); @@ -8525,9 +8561,10 @@ static unsigned gfx_v10_0_ring_emit_init_cond_exec(struct amdgpu_ring *ring) return ret; } -static void gfx_v10_0_ring_emit_patch_cond_exec(struct amdgpu_ring *ring, unsigned offset) +static void gfx_v10_0_ring_emit_patch_cond_exec(struct amdgpu_ring *ring, unsigned int offset) { - unsigned cur; + unsigned int cur; + BUG_ON(offset > ring->buf_mask); BUG_ON(ring->ring[offset] != 0x55aa55aa); @@ -8750,7 +8787,7 @@ static void gfx_v10_0_ring_emit_reg_write_reg_wait(struct amdgpu_ring *ring, } static void gfx_v10_0_ring_soft_recovery(struct amdgpu_ring *ring, - unsigned vmid) + unsigned int vmid) { struct amdgpu_device *adev = ring->adev; uint32_t value = 0; @@ -8859,7 +8896,7 @@ static void gfx_v10_0_set_compute_eop_interrupt_state(struct amdgpu_device *adev static int gfx_v10_0_set_eop_interrupt_state(struct amdgpu_device *adev, struct amdgpu_irq_src *src, - unsigned type, + unsigned int type, enum amdgpu_interrupt_state state) { switch (type) { @@ -8956,7 +8993,7 @@ static int gfx_v10_0_eop_irq(struct amdgpu_device *adev, static int gfx_v10_0_set_priv_reg_fault_state(struct amdgpu_device *adev, struct amdgpu_irq_src *source, - unsigned type, + unsigned int type, enum amdgpu_interrupt_state state) { switch (state) { @@ -8975,7 +9012,7 @@ static int gfx_v10_0_set_priv_reg_fault_state(struct amdgpu_device *adev, static int gfx_v10_0_set_priv_inst_fault_state(struct amdgpu_device *adev, struct amdgpu_irq_src *source, - unsigned type, + unsigned int type, enum amdgpu_interrupt_state state) { switch (state) { @@ -9317,7 +9354,7 @@ static void gfx_v10_0_set_irq_funcs(struct amdgpu_device *adev) static void gfx_v10_0_set_rlc_funcs(struct amdgpu_device *adev) { - switch (adev->ip_versions[GC_HWIP][0]) { + switch (amdgpu_ip_version(adev, GC_HWIP, 0)) { case IP_VERSION(10, 1, 10): case IP_VERSION(10, 1, 1): case IP_VERSION(10, 1, 3): @@ -9342,7 +9379,7 @@ static void gfx_v10_0_set_rlc_funcs(struct amdgpu_device *adev) static void gfx_v10_0_set_gds_init(struct amdgpu_device *adev) { - unsigned total_cu = adev->gfx.config.max_cu_per_sh * + unsigned int total_cu = adev->gfx.config.max_cu_per_sh * adev->gfx.config.max_sh_per_se * adev->gfx.config.max_shader_engines; @@ -9423,7 +9460,7 @@ static int gfx_v10_0_get_cu_info(struct amdgpu_device *adev, { int i, j, k, counter, active_cu_number = 0; u32 mask, bitmap, ao_bitmap, ao_cu_mask = 0; - unsigned disable_masks[4 * 2]; + unsigned int disable_masks[4 * 2]; if (!adev || !cu_info) return -EINVAL; @@ -9434,10 +9471,14 @@ static int gfx_v10_0_get_cu_info(struct amdgpu_device *adev, for (i = 0; i < adev->gfx.config.max_shader_engines; i++) { for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) { bitmap = i * adev->gfx.config.max_sh_per_se + j; - if (((adev->ip_versions[GC_HWIP][0] == IP_VERSION(10, 3, 0)) || - (adev->ip_versions[GC_HWIP][0] == IP_VERSION(10, 3, 3)) || - (adev->ip_versions[GC_HWIP][0] == IP_VERSION(10, 3, 6)) || - (adev->ip_versions[GC_HWIP][0] == IP_VERSION(10, 3, 7))) && + if (((amdgpu_ip_version(adev, GC_HWIP, 0) == + IP_VERSION(10, 3, 0)) || + (amdgpu_ip_version(adev, GC_HWIP, 0) == + IP_VERSION(10, 3, 3)) || + (amdgpu_ip_version(adev, GC_HWIP, 0) == + IP_VERSION(10, 3, 6)) || + (amdgpu_ip_version(adev, GC_HWIP, 0) == + IP_VERSION(10, 3, 7))) && ((gfx_v10_3_get_disabled_sa(adev) >> bitmap) & 1)) continue; mask = 1; @@ -9448,7 +9489,7 @@ static int gfx_v10_0_get_cu_info(struct amdgpu_device *adev, gfx_v10_0_set_user_wgp_inactive_bitmap_per_sh( adev, disable_masks[i * 2 + j]); bitmap = gfx_v10_0_get_cu_active_bitmap_per_sh(adev); - cu_info->bitmap[i][j] = bitmap; + cu_info->bitmap[0][i][j] = bitmap; for (k = 0; k < adev->gfx.config.max_cu_per_sh; k++) { if (bitmap & mask) { @@ -9540,8 +9581,7 @@ static void gfx_v10_3_set_power_brake_sequence(struct amdgpu_device *adev) (0x1 << DIDT_SQ_THROTTLE_CTRL__PWRBRK_STALL_EN__SHIFT)); } -const struct amdgpu_ip_block_version gfx_v10_0_ip_block = -{ +const struct amdgpu_ip_block_version gfx_v10_0_ip_block = { .type = AMD_IP_BLOCK_TYPE_GFX, .major = 10, .minor = 0, diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c index 0451533ddde4..8ed4a6fb147a 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c @@ -60,6 +60,8 @@ #define regCGTT_WD_CLK_CTRL_BASE_IDX 1 #define regRLC_RLCS_BOOTLOAD_STATUS_gc_11_0_1 0x4e7e #define regRLC_RLCS_BOOTLOAD_STATUS_gc_11_0_1_BASE_IDX 1 +#define regPC_CONFIG_CNTL_1 0x194d +#define regPC_CONFIG_CNTL_1_BASE_IDX 1 MODULE_FIRMWARE("amdgpu/gc_11_0_0_pfp.bin"); MODULE_FIRMWARE("amdgpu/gc_11_0_0_me.bin"); @@ -82,6 +84,14 @@ MODULE_FIRMWARE("amdgpu/gc_11_0_4_pfp.bin"); MODULE_FIRMWARE("amdgpu/gc_11_0_4_me.bin"); MODULE_FIRMWARE("amdgpu/gc_11_0_4_mec.bin"); MODULE_FIRMWARE("amdgpu/gc_11_0_4_rlc.bin"); +MODULE_FIRMWARE("amdgpu/gc_11_5_0_pfp.bin"); +MODULE_FIRMWARE("amdgpu/gc_11_5_0_me.bin"); +MODULE_FIRMWARE("amdgpu/gc_11_5_0_mec.bin"); +MODULE_FIRMWARE("amdgpu/gc_11_5_0_rlc.bin"); + +static const struct soc15_reg_golden golden_settings_gc_11_0[] = { + SOC15_REG_GOLDEN_VALUE(GC, 0, regTCP_CNTL, 0x20000000, 0x20000000) +}; static const struct soc15_reg_golden golden_settings_gc_11_0_1[] = { @@ -96,6 +106,23 @@ static const struct soc15_reg_golden golden_settings_gc_11_0_1[] = SOC15_REG_GOLDEN_VALUE(GC, 0, regTCP_CNTL2, 0xfcffffff, 0x0000000a) }; +static const struct soc15_reg_golden golden_settings_gc_11_5_0[] = { + SOC15_REG_GOLDEN_VALUE(GC, 0, regDB_DEBUG5, 0xffffffff, 0x00000800), + SOC15_REG_GOLDEN_VALUE(GC, 0, regGB_ADDR_CONFIG, 0x0c1807ff, 0x00000242), + SOC15_REG_GOLDEN_VALUE(GC, 0, regGCR_GENERAL_CNTL, 0x1ff1ffff, 0x00000500), + SOC15_REG_GOLDEN_VALUE(GC, 0, regGL2A_ADDR_MATCH_MASK, 0xffffffff, 0xfffffff3), + SOC15_REG_GOLDEN_VALUE(GC, 0, regGL2C_ADDR_MATCH_MASK, 0xffffffff, 0xfffffff3), + SOC15_REG_GOLDEN_VALUE(GC, 0, regGL2C_CTRL, 0xffffffff, 0xf37fff3f), + SOC15_REG_GOLDEN_VALUE(GC, 0, regGL2C_CTRL3, 0xfffffffb, 0x00f40188), + SOC15_REG_GOLDEN_VALUE(GC, 0, regGL2C_CTRL4, 0xf0ffffff, 0x8000b007), + SOC15_REG_GOLDEN_VALUE(GC, 0, regPA_CL_ENHANCE, 0xf1ffffff, 0x00880007), + SOC15_REG_GOLDEN_VALUE(GC, 0, regPC_CONFIG_CNTL_1, 0xffffffff, 0x00010000), + SOC15_REG_GOLDEN_VALUE(GC, 0, regTA_CNTL_AUX, 0xf7f7ffff, 0x01030000), + SOC15_REG_GOLDEN_VALUE(GC, 0, regTA_CNTL2, 0x007f0000, 0x00000000), + SOC15_REG_GOLDEN_VALUE(GC, 0, regTCP_CNTL2, 0xffcfffff, 0x0000200a), + SOC15_REG_GOLDEN_VALUE(GC, 0, regUTCL1_CTRL_2, 0xffffffff, 0x0000048f) +}; + #define DEFAULT_SH_MEM_CONFIG \ ((SH_MEM_ADDRESS_MODE_64 << SH_MEM_CONFIG__ADDRESS_MODE__SHIFT) | \ (SH_MEM_ALIGNMENT_MODE_UNALIGNED << SH_MEM_CONFIG__ALIGNMENT_MODE__SHIFT) | \ @@ -132,6 +159,7 @@ static void gfx11_kiq_set_resources(struct amdgpu_ring *kiq_ring, uint64_t queue { amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_SET_RESOURCES, 6)); amdgpu_ring_write(kiq_ring, PACKET3_SET_RESOURCES_VMID_MASK(0) | + PACKET3_SET_RESOURCES_UNMAP_LATENTY(0xa) | /* unmap_latency: 0xa (~ 1s) */ PACKET3_SET_RESOURCES_QUEUE_TYPE(0)); /* vmid_mask:0 queue_type:0 (KIQ) */ amdgpu_ring_write(kiq_ring, lower_32_bits(queue_mask)); /* queue mask lo */ amdgpu_ring_write(kiq_ring, upper_32_bits(queue_mask)); /* queue mask hi */ @@ -265,16 +293,25 @@ static void gfx_v11_0_set_kiq_pm4_funcs(struct amdgpu_device *adev) static void gfx_v11_0_init_golden_registers(struct amdgpu_device *adev) { - switch (adev->ip_versions[GC_HWIP][0]) { + switch (amdgpu_ip_version(adev, GC_HWIP, 0)) { case IP_VERSION(11, 0, 1): case IP_VERSION(11, 0, 4): soc15_program_register_sequence(adev, golden_settings_gc_11_0_1, (const u32)ARRAY_SIZE(golden_settings_gc_11_0_1)); break; + case IP_VERSION(11, 5, 0): + soc15_program_register_sequence(adev, + golden_settings_gc_11_5_0, + (const u32)ARRAY_SIZE(golden_settings_gc_11_5_0)); + break; default: break; } + soc15_program_register_sequence(adev, + golden_settings_gc_11_0, + (const u32)ARRAY_SIZE(golden_settings_gc_11_0)); + } static void gfx_v11_0_write_data_to_reg(struct amdgpu_ring *ring, int eng_sel, @@ -390,7 +427,7 @@ static int gfx_v11_0_ring_test_ib(struct amdgpu_ring *ring, long timeout) adev->wb.wb[index] = cpu_to_le32(0xCAFEDEAD); cpu_ptr = &adev->wb.wb[index]; - r = amdgpu_ib_get(adev, NULL, 16, AMDGPU_IB_POOL_DIRECT, &ib); + r = amdgpu_ib_get(adev, NULL, 20, AMDGPU_IB_POOL_DIRECT, &ib); if (r) { DRM_ERROR("amdgpu: failed to get ib (%ld).\n", r); goto err1; @@ -465,7 +502,7 @@ out: static void gfx_v11_0_check_fw_cp_gfx_shadow(struct amdgpu_device *adev) { - switch (adev->ip_versions[GC_HWIP][0]) { + switch (amdgpu_ip_version(adev, GC_HWIP, 0)) { case IP_VERSION(11, 0, 0): case IP_VERSION(11, 0, 2): case IP_VERSION(11, 0, 3): @@ -561,6 +598,14 @@ static int gfx_v11_0_init_microcode(struct amdgpu_device *adev) adev->gfx.mec2_fw = NULL; gfx_v11_0_check_fw_cp_gfx_shadow(adev); + + if (adev->gfx.imu.funcs && adev->gfx.imu.funcs->init_microcode) { + err = adev->gfx.imu.funcs->init_microcode(adev); + if (err) + DRM_ERROR("Failed to init imu firmware!\n"); + return err; + } + out: if (err) { amdgpu_ucode_release(&adev->gfx.pfp_fw); @@ -667,7 +712,7 @@ static void gfx_v11_0_init_rlcg_reg_access_ctrl(struct amdgpu_device *adev) { struct amdgpu_rlcg_reg_access_ctrl *reg_access_ctrl; - reg_access_ctrl = &adev->gfx.rlc.reg_access_ctrl; + reg_access_ctrl = &adev->gfx.rlc.reg_access_ctrl[0]; reg_access_ctrl->scratch_reg0 = SOC15_REG_OFFSET(GC, 0, regSCRATCH_REG0); reg_access_ctrl->scratch_reg1 = SOC15_REG_OFFSET(GC, 0, regSCRATCH_REG1); reg_access_ctrl->scratch_reg2 = SOC15_REG_OFFSET(GC, 0, regSCRATCH_REG2); @@ -856,8 +901,7 @@ static const struct amdgpu_gfx_funcs gfx_v11_0_gfx_funcs = { static int gfx_v11_0_gpu_early_init(struct amdgpu_device *adev) { - - switch (adev->ip_versions[GC_HWIP][0]) { + switch (amdgpu_ip_version(adev, GC_HWIP, 0)) { case IP_VERSION(11, 0, 0): case IP_VERSION(11, 0, 2): adev->gfx.config.max_hw_contexts = 8; @@ -876,6 +920,7 @@ static int gfx_v11_0_gpu_early_init(struct amdgpu_device *adev) break; case IP_VERSION(11, 0, 1): case IP_VERSION(11, 0, 4): + case IP_VERSION(11, 5, 0): adev->gfx.config.max_hw_contexts = 8; adev->gfx.config.sc_prim_fifo_size_frontend = 0x20; adev->gfx.config.sc_prim_fifo_size_backend = 0x100; @@ -1301,9 +1346,7 @@ static int gfx_v11_0_sw_init(void *handle) struct amdgpu_kiq *kiq; struct amdgpu_device *adev = (struct amdgpu_device *)handle; - adev->gfxhub.funcs->init(adev); - - switch (adev->ip_versions[GC_HWIP][0]) { + switch (amdgpu_ip_version(adev, GC_HWIP, 0)) { case IP_VERSION(11, 0, 0): case IP_VERSION(11, 0, 2): case IP_VERSION(11, 0, 3): @@ -1316,6 +1359,7 @@ static int gfx_v11_0_sw_init(void *handle) break; case IP_VERSION(11, 0, 1): case IP_VERSION(11, 0, 4): + case IP_VERSION(11, 5, 0): adev->gfx.me.num_me = 1; adev->gfx.me.num_pipe_per_me = 1; adev->gfx.me.num_queue_per_pipe = 1; @@ -1334,8 +1378,8 @@ static int gfx_v11_0_sw_init(void *handle) } /* Enable CG flag in one VF mode for enabling RLC safe mode enter/exit */ - if (adev->ip_versions[GC_HWIP][0] == IP_VERSION(11, 0, 3) && - amdgpu_sriov_is_pp_one_vf(adev)) + if (amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(11, 0, 3) && + amdgpu_sriov_is_pp_one_vf(adev)) adev->cg_flags = AMD_CG_SUPPORT_GFX_CGCG; /* EOP Event */ @@ -1368,14 +1412,6 @@ static int gfx_v11_0_sw_init(void *handle) adev->gfx.gfx_current_status = AMDGPU_GFX_NORMAL_MODE; - if (adev->gfx.imu.funcs) { - if (adev->gfx.imu.funcs->init_microcode) { - r = adev->gfx.imu.funcs->init_microcode(adev); - if (r) - DRM_ERROR("Failed to load imu firmware!\n"); - } - } - gfx_v11_0_me_init(adev); r = gfx_v11_0_rlc_init(adev); @@ -2562,8 +2598,11 @@ static int gfx_v11_0_wait_for_rlc_autoload_complete(struct amdgpu_device *adev) for (i = 0; i < adev->usec_timeout; i++) { cp_status = RREG32_SOC15(GC, 0, regCP_STAT); - if (adev->ip_versions[GC_HWIP][0] == IP_VERSION(11, 0, 1) || - adev->ip_versions[GC_HWIP][0] == IP_VERSION(11, 0, 4)) + if (amdgpu_ip_version(adev, GC_HWIP, 0) == + IP_VERSION(11, 0, 1) || + amdgpu_ip_version(adev, GC_HWIP, 0) == + IP_VERSION(11, 0, 4) || + amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(11, 5, 0)) bootload_status = RREG32_SOC15(GC, 0, regRLC_RLCS_BOOTLOAD_STATUS_gc_11_0_1); else @@ -3684,11 +3723,11 @@ static int gfx_v11_0_gfx_init_queue(struct amdgpu_ring *ring) soc21_grbm_select(adev, 0, 0, 0, 0); mutex_unlock(&adev->srbm_mutex); if (adev->gfx.me.mqd_backup[mqd_idx]) - memcpy(adev->gfx.me.mqd_backup[mqd_idx], mqd, sizeof(*mqd)); + memcpy_fromio(adev->gfx.me.mqd_backup[mqd_idx], mqd, sizeof(*mqd)); } else { /* restore mqd with the backup copy */ if (adev->gfx.me.mqd_backup[mqd_idx]) - memcpy(mqd, adev->gfx.me.mqd_backup[mqd_idx], sizeof(*mqd)); + memcpy_toio(mqd, adev->gfx.me.mqd_backup[mqd_idx], sizeof(*mqd)); /* reset the ring */ ring->wptr = 0; *ring->wptr_cpu_addr = 0; @@ -3977,7 +4016,7 @@ static int gfx_v11_0_kiq_init_queue(struct amdgpu_ring *ring) if (amdgpu_in_reset(adev)) { /* for GPU_RESET case */ /* reset MQD to a clean status */ if (adev->gfx.kiq[0].mqd_backup) - memcpy(mqd, adev->gfx.kiq[0].mqd_backup, sizeof(*mqd)); + memcpy_toio(mqd, adev->gfx.kiq[0].mqd_backup, sizeof(*mqd)); /* reset ring buffer */ ring->wptr = 0; @@ -4000,7 +4039,7 @@ static int gfx_v11_0_kiq_init_queue(struct amdgpu_ring *ring) mutex_unlock(&adev->srbm_mutex); if (adev->gfx.kiq[0].mqd_backup) - memcpy(adev->gfx.kiq[0].mqd_backup, mqd, sizeof(*mqd)); + memcpy_fromio(adev->gfx.kiq[0].mqd_backup, mqd, sizeof(*mqd)); } return 0; @@ -4021,11 +4060,11 @@ static int gfx_v11_0_kcq_init_queue(struct amdgpu_ring *ring) mutex_unlock(&adev->srbm_mutex); if (adev->gfx.mec.mqd_backup[mqd_idx]) - memcpy(adev->gfx.mec.mqd_backup[mqd_idx], mqd, sizeof(*mqd)); + memcpy_fromio(adev->gfx.mec.mqd_backup[mqd_idx], mqd, sizeof(*mqd)); } else { /* restore MQD to a clean status */ if (adev->gfx.mec.mqd_backup[mqd_idx]) - memcpy(mqd, adev->gfx.mec.mqd_backup[mqd_idx], sizeof(*mqd)); + memcpy_toio(mqd, adev->gfx.mec.mqd_backup[mqd_idx], sizeof(*mqd)); /* reset ring buffer */ ring->wptr = 0; atomic64_set((atomic64_t *)ring->wptr_cpu_addr, 0); @@ -4343,6 +4382,10 @@ static int gfx_v11_0_hw_init(void *handle) if (r) return r; + /* get IMU version from HW if it's not set */ + if (!adev->gfx.imu_fw_version) + adev->gfx.imu_fw_version = RREG32_SOC15(GC, 0, regGFX_IMU_SCRATCH_0); + return r; } @@ -4654,26 +4697,6 @@ static int gfx_v11_0_early_init(void *handle) return gfx_v11_0_init_microcode(adev); } -static int gfx_v11_0_ras_late_init(void *handle) -{ - struct amdgpu_device *adev = (struct amdgpu_device *)handle; - struct ras_common_if *gfx_common_if; - int ret; - - gfx_common_if = kzalloc(sizeof(struct ras_common_if), GFP_KERNEL); - if (!gfx_common_if) - return -ENOMEM; - - gfx_common_if->block = AMDGPU_RAS_BLOCK__GFX; - - ret = amdgpu_ras_feature_enable(adev, gfx_common_if, true); - if (ret) - dev_warn(adev->dev, "Failed to enable gfx11 ras feature\n"); - - kfree(gfx_common_if); - return 0; -} - static int gfx_v11_0_late_init(void *handle) { struct amdgpu_device *adev = (struct amdgpu_device *)handle; @@ -4687,12 +4710,6 @@ static int gfx_v11_0_late_init(void *handle) if (r) return r; - if (adev->ip_versions[GC_HWIP][0] == IP_VERSION(11, 0, 3)) { - r = gfx_v11_0_ras_late_init(handle); - if (r) - return r; - } - return 0; } @@ -4979,23 +4996,16 @@ static int gfx_v11_0_update_gfx_clock_gating(struct amdgpu_device *adev, static void gfx_v11_0_update_spm_vmid(struct amdgpu_device *adev, unsigned vmid) { - u32 reg, data; + u32 data; amdgpu_gfx_off_ctrl(adev, false); - reg = SOC15_REG_OFFSET(GC, 0, regRLC_SPM_MC_CNTL); - if (amdgpu_sriov_is_pp_one_vf(adev)) - data = RREG32_NO_KIQ(reg); - else - data = RREG32(reg); + data = RREG32_SOC15_NO_KIQ(GC, 0, regRLC_SPM_MC_CNTL); data &= ~RLC_SPM_MC_CNTL__RLC_SPM_VMID_MASK; data |= (vmid & RLC_SPM_MC_CNTL__RLC_SPM_VMID_MASK) << RLC_SPM_MC_CNTL__RLC_SPM_VMID__SHIFT; - if (amdgpu_sriov_is_pp_one_vf(adev)) - WREG32_SOC15_NO_KIQ(GC, 0, regRLC_SPM_MC_CNTL, data); - else - WREG32_SOC15(GC, 0, regRLC_SPM_MC_CNTL, data); + WREG32_SOC15_NO_KIQ(GC, 0, regRLC_SPM_MC_CNTL, data); amdgpu_gfx_off_ctrl(adev, true); } @@ -5027,9 +5037,10 @@ static void gfx_v11_cntl_power_gating(struct amdgpu_device *adev, bool enable) // Program RLC_PG_DELAY3 for CGPG hysteresis if (enable && (adev->pg_flags & AMD_PG_SUPPORT_GFX_PG)) { - switch (adev->ip_versions[GC_HWIP][0]) { + switch (amdgpu_ip_version(adev, GC_HWIP, 0)) { case IP_VERSION(11, 0, 1): case IP_VERSION(11, 0, 4): + case IP_VERSION(11, 5, 0): WREG32_SOC15(GC, 0, regRLC_PG_DELAY_3, RLC_PG_DELAY_3_DEFAULT_GC_11_0_1); break; default: @@ -5056,7 +5067,7 @@ static int gfx_v11_0_set_powergating_state(void *handle, if (amdgpu_sriov_vf(adev)) return 0; - switch (adev->ip_versions[GC_HWIP][0]) { + switch (amdgpu_ip_version(adev, GC_HWIP, 0)) { case IP_VERSION(11, 0, 0): case IP_VERSION(11, 0, 2): case IP_VERSION(11, 0, 3): @@ -5064,6 +5075,7 @@ static int gfx_v11_0_set_powergating_state(void *handle, break; case IP_VERSION(11, 0, 1): case IP_VERSION(11, 0, 4): + case IP_VERSION(11, 5, 0): if (!enable) amdgpu_gfx_off_ctrl(adev, false); @@ -5088,12 +5100,13 @@ static int gfx_v11_0_set_clockgating_state(void *handle, if (amdgpu_sriov_vf(adev)) return 0; - switch (adev->ip_versions[GC_HWIP][0]) { + switch (amdgpu_ip_version(adev, GC_HWIP, 0)) { case IP_VERSION(11, 0, 0): case IP_VERSION(11, 0, 1): case IP_VERSION(11, 0, 2): case IP_VERSION(11, 0, 3): case IP_VERSION(11, 0, 4): + case IP_VERSION(11, 5, 0): gfx_v11_0_update_gfx_clock_gating(adev, state == AMD_CG_STATE_GATE); break; @@ -5170,45 +5183,17 @@ static u64 gfx_v11_0_ring_get_wptr_gfx(struct amdgpu_ring *ring) static void gfx_v11_0_ring_set_wptr_gfx(struct amdgpu_ring *ring) { struct amdgpu_device *adev = ring->adev; - uint32_t *wptr_saved; - uint32_t *is_queue_unmap; - uint64_t aggregated_db_index; - uint32_t mqd_size = adev->mqds[AMDGPU_HW_IP_GFX].mqd_size; - uint64_t wptr_tmp; - - if (ring->is_mes_queue) { - wptr_saved = (uint32_t *)(ring->mqd_ptr + mqd_size); - is_queue_unmap = (uint32_t *)(ring->mqd_ptr + mqd_size + - sizeof(uint32_t)); - aggregated_db_index = - amdgpu_mes_get_aggregated_doorbell_index(adev, - ring->hw_prio); - - wptr_tmp = ring->wptr & ring->buf_mask; - atomic64_set((atomic64_t *)ring->wptr_cpu_addr, wptr_tmp); - *wptr_saved = wptr_tmp; - /* assume doorbell always being used by mes mapped queue */ - if (*is_queue_unmap) { - WDOORBELL64(aggregated_db_index, wptr_tmp); - WDOORBELL64(ring->doorbell_index, wptr_tmp); - } else { - WDOORBELL64(ring->doorbell_index, wptr_tmp); - if (*is_queue_unmap) - WDOORBELL64(aggregated_db_index, wptr_tmp); - } + if (ring->use_doorbell) { + /* XXX check if swapping is necessary on BE */ + atomic64_set((atomic64_t *)ring->wptr_cpu_addr, + ring->wptr); + WDOORBELL64(ring->doorbell_index, ring->wptr); } else { - if (ring->use_doorbell) { - /* XXX check if swapping is necessary on BE */ - atomic64_set((atomic64_t *)ring->wptr_cpu_addr, - ring->wptr); - WDOORBELL64(ring->doorbell_index, ring->wptr); - } else { - WREG32_SOC15(GC, 0, regCP_RB0_WPTR, - lower_32_bits(ring->wptr)); - WREG32_SOC15(GC, 0, regCP_RB0_WPTR_HI, - upper_32_bits(ring->wptr)); - } + WREG32_SOC15(GC, 0, regCP_RB0_WPTR, + lower_32_bits(ring->wptr)); + WREG32_SOC15(GC, 0, regCP_RB0_WPTR_HI, + upper_32_bits(ring->wptr)); } } @@ -5233,42 +5218,14 @@ static u64 gfx_v11_0_ring_get_wptr_compute(struct amdgpu_ring *ring) static void gfx_v11_0_ring_set_wptr_compute(struct amdgpu_ring *ring) { struct amdgpu_device *adev = ring->adev; - uint32_t *wptr_saved; - uint32_t *is_queue_unmap; - uint64_t aggregated_db_index; - uint32_t mqd_size = adev->mqds[AMDGPU_HW_IP_COMPUTE].mqd_size; - uint64_t wptr_tmp; - - if (ring->is_mes_queue) { - wptr_saved = (uint32_t *)(ring->mqd_ptr + mqd_size); - is_queue_unmap = (uint32_t *)(ring->mqd_ptr + mqd_size + - sizeof(uint32_t)); - aggregated_db_index = - amdgpu_mes_get_aggregated_doorbell_index(adev, - ring->hw_prio); - - wptr_tmp = ring->wptr & ring->buf_mask; - atomic64_set((atomic64_t *)ring->wptr_cpu_addr, wptr_tmp); - *wptr_saved = wptr_tmp; - /* assume doorbell always used by mes mapped queue */ - if (*is_queue_unmap) { - WDOORBELL64(aggregated_db_index, wptr_tmp); - WDOORBELL64(ring->doorbell_index, wptr_tmp); - } else { - WDOORBELL64(ring->doorbell_index, wptr_tmp); - if (*is_queue_unmap) - WDOORBELL64(aggregated_db_index, wptr_tmp); - } + /* XXX check if swapping is necessary on BE */ + if (ring->use_doorbell) { + atomic64_set((atomic64_t *)ring->wptr_cpu_addr, + ring->wptr); + WDOORBELL64(ring->doorbell_index, ring->wptr); } else { - /* XXX check if swapping is necessary on BE */ - if (ring->use_doorbell) { - atomic64_set((atomic64_t *)ring->wptr_cpu_addr, - ring->wptr); - WDOORBELL64(ring->doorbell_index, ring->wptr); - } else { - BUG(); /* only DOORBELL method supported on gfx11 now */ - } + BUG(); /* only DOORBELL method supported on gfx11 now */ } } @@ -6394,7 +6351,7 @@ static int gfx_v11_0_get_cu_info(struct amdgpu_device *adev, * SE6: {SH0,SH1} --> {bitmap[2][2], bitmap[2][3]} * SE7: {SH0,SH1} --> {bitmap[3][2], bitmap[3][3]} */ - cu_info->bitmap[i % 4][j + (i / 4) * 2] = bitmap; + cu_info->bitmap[0][i % 4][j + (i / 4) * 2] = bitmap; for (k = 0; k < adev->gfx.config.max_cu_per_sh; k++) { if (bitmap & mask) diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v6_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v6_0.c index da6caff78c22..34f9211b2679 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v6_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v6_0.c @@ -3577,7 +3577,7 @@ static void gfx_v6_0_get_cu_info(struct amdgpu_device *adev) gfx_v6_0_set_user_cu_inactive_bitmap( adev, disable_masks[i * 2 + j]); bitmap = gfx_v6_0_get_cu_enabled(adev); - cu_info->bitmap[i][j] = bitmap; + cu_info->bitmap[0][i][j] = bitmap; for (k = 0; k < adev->gfx.config.max_cu_per_sh; k++) { if (bitmap & mask) { diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c index 8c174c11eaee..c2faf6b4c2fc 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c @@ -90,8 +90,7 @@ MODULE_FIRMWARE("amdgpu/mullins_ce.bin"); MODULE_FIRMWARE("amdgpu/mullins_rlc.bin"); MODULE_FIRMWARE("amdgpu/mullins_mec.bin"); -static const struct amdgpu_gds_reg_offset amdgpu_gds_reg_offset[] = -{ +static const struct amdgpu_gds_reg_offset amdgpu_gds_reg_offset[] = { {mmGDS_VMID0_BASE, mmGDS_VMID0_SIZE, mmGDS_GWS_VMID0, mmGDS_OA_VMID0}, {mmGDS_VMID1_BASE, mmGDS_VMID1_SIZE, mmGDS_GWS_VMID1, mmGDS_OA_VMID1}, {mmGDS_VMID2_BASE, mmGDS_VMID2_SIZE, mmGDS_GWS_VMID2, mmGDS_OA_VMID2}, @@ -110,8 +109,7 @@ static const struct amdgpu_gds_reg_offset amdgpu_gds_reg_offset[] = {mmGDS_VMID15_BASE, mmGDS_VMID15_SIZE, mmGDS_GWS_VMID15, mmGDS_OA_VMID15} }; -static const u32 spectre_rlc_save_restore_register_list[] = -{ +static const u32 spectre_rlc_save_restore_register_list[] = { (0x0e00 << 16) | (0xc12c >> 2), 0x00000000, (0x0e00 << 16) | (0xc140 >> 2), @@ -557,8 +555,7 @@ static const u32 spectre_rlc_save_restore_register_list[] = (0x0e00 << 16) | (0x9600 >> 2), }; -static const u32 kalindi_rlc_save_restore_register_list[] = -{ +static const u32 kalindi_rlc_save_restore_register_list[] = { (0x0e00 << 16) | (0xc12c >> 2), 0x00000000, (0x0e00 << 16) | (0xc140 >> 2), @@ -933,7 +930,8 @@ static int gfx_v7_0_init_microcode(struct amdgpu_device *adev) case CHIP_MULLINS: chip_name = "mullins"; break; - default: BUG(); + default: + BUG(); } snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_pfp.bin", chip_name); @@ -2759,8 +2757,7 @@ static int gfx_v7_0_mec_init(struct amdgpu_device *adev) return 0; } -struct hqd_registers -{ +struct hqd_registers { u32 cp_mqd_base_addr; u32 cp_mqd_base_addr_hi; u32 cp_hqd_active; @@ -5122,13 +5119,13 @@ static void gfx_v7_0_get_cu_info(struct amdgpu_device *adev) gfx_v7_0_set_user_cu_inactive_bitmap( adev, disable_masks[i * 2 + j]); bitmap = gfx_v7_0_get_cu_active_bitmap(adev); - cu_info->bitmap[i][j] = bitmap; + cu_info->bitmap[0][i][j] = bitmap; - for (k = 0; k < adev->gfx.config.max_cu_per_sh; k ++) { + for (k = 0; k < adev->gfx.config.max_cu_per_sh; k++) { if (bitmap & mask) { if (counter < ao_cu_num) ao_bitmap |= mask; - counter ++; + counter++; } mask <<= 1; } @@ -5150,8 +5147,7 @@ static void gfx_v7_0_get_cu_info(struct amdgpu_device *adev) cu_info->lds_size = 64; } -const struct amdgpu_ip_block_version gfx_v7_1_ip_block = -{ +const struct amdgpu_ip_block_version gfx_v7_1_ip_block = { .type = AMD_IP_BLOCK_TYPE_GFX, .major = 7, .minor = 1, @@ -5159,8 +5155,7 @@ const struct amdgpu_ip_block_version gfx_v7_1_ip_block = .funcs = &gfx_v7_0_ip_funcs, }; -const struct amdgpu_ip_block_version gfx_v7_2_ip_block = -{ +const struct amdgpu_ip_block_version gfx_v7_2_ip_block = { .type = AMD_IP_BLOCK_TYPE_GFX, .major = 7, .minor = 2, @@ -5168,8 +5163,7 @@ const struct amdgpu_ip_block_version gfx_v7_2_ip_block = .funcs = &gfx_v7_0_ip_funcs, }; -const struct amdgpu_ip_block_version gfx_v7_3_ip_block = -{ +const struct amdgpu_ip_block_version gfx_v7_3_ip_block = { .type = AMD_IP_BLOCK_TYPE_GFX, .major = 7, .minor = 3, diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c index 51c1745c8369..1943beb135c4 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c @@ -883,8 +883,8 @@ static int gfx_v8_0_ring_test_ib(struct amdgpu_ring *ring, long timeout) gpu_addr = adev->wb.gpu_addr + (index * 4); adev->wb.wb[index] = cpu_to_le32(0xCAFEDEAD); memset(&ib, 0, sizeof(ib)); - r = amdgpu_ib_get(adev, NULL, 16, - AMDGPU_IB_POOL_DIRECT, &ib); + + r = amdgpu_ib_get(adev, NULL, 20, AMDGPU_IB_POOL_DIRECT, &ib); if (r) goto err1; @@ -7121,7 +7121,7 @@ static void gfx_v8_0_get_cu_info(struct amdgpu_device *adev) gfx_v8_0_set_user_cu_inactive_bitmap( adev, disable_masks[i * 2 + j]); bitmap = gfx_v8_0_get_cu_active_bitmap(adev); - cu_info->bitmap[i][j] = bitmap; + cu_info->bitmap[0][i][j] = bitmap; for (k = 0; k < adev->gfx.config.max_cu_per_sh; k ++) { if (bitmap & mask) { diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c index 65577eca58f1..69c500910746 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c @@ -762,6 +762,8 @@ static void gfx_v9_0_query_ras_error_count(struct amdgpu_device *adev, static int gfx_v9_0_ras_error_inject(struct amdgpu_device *adev, void *inject_if, uint32_t instance_mask); static void gfx_v9_0_reset_ras_error_count(struct amdgpu_device *adev); +static void gfx_v9_0_update_spm_vmid_internal(struct amdgpu_device *adev, + unsigned int vmid); static void gfx_v9_0_kiq_set_resources(struct amdgpu_ring *kiq_ring, uint64_t queue_mask) @@ -893,7 +895,7 @@ static void gfx_v9_0_set_kiq_pm4_funcs(struct amdgpu_device *adev) static void gfx_v9_0_init_golden_registers(struct amdgpu_device *adev) { - switch (adev->ip_versions[GC_HWIP][0]) { + switch (amdgpu_ip_version(adev, GC_HWIP, 0)) { case IP_VERSION(9, 0, 1): soc15_program_register_sequence(adev, golden_settings_gc_9_0, @@ -949,8 +951,8 @@ static void gfx_v9_0_init_golden_registers(struct amdgpu_device *adev) break; } - if ((adev->ip_versions[GC_HWIP][0] != IP_VERSION(9, 4, 1)) && - (adev->ip_versions[GC_HWIP][0] != IP_VERSION(9, 4, 2))) + if ((amdgpu_ip_version(adev, GC_HWIP, 0) != IP_VERSION(9, 4, 1)) && + (amdgpu_ip_version(adev, GC_HWIP, 0) != IP_VERSION(9, 4, 2))) soc15_program_register_sequence(adev, golden_settings_gc_9_x_common, (const u32)ARRAY_SIZE(golden_settings_gc_9_x_common)); } @@ -1037,8 +1039,8 @@ static int gfx_v9_0_ring_test_ib(struct amdgpu_ring *ring, long timeout) gpu_addr = adev->wb.gpu_addr + (index * 4); adev->wb.wb[index] = cpu_to_le32(0xCAFEDEAD); memset(&ib, 0, sizeof(ib)); - r = amdgpu_ib_get(adev, NULL, 16, - AMDGPU_IB_POOL_DIRECT, &ib); + + r = amdgpu_ib_get(adev, NULL, 20, AMDGPU_IB_POOL_DIRECT, &ib); if (r) goto err1; @@ -1093,14 +1095,14 @@ static void gfx_v9_0_check_fw_write_wait(struct amdgpu_device *adev) adev->gfx.me_fw_write_wait = false; adev->gfx.mec_fw_write_wait = false; - if ((adev->ip_versions[GC_HWIP][0] != IP_VERSION(9, 4, 1)) && + if ((amdgpu_ip_version(adev, GC_HWIP, 0) != IP_VERSION(9, 4, 1)) && ((adev->gfx.mec_fw_version < 0x000001a5) || - (adev->gfx.mec_feature_version < 46) || - (adev->gfx.pfp_fw_version < 0x000000b7) || - (adev->gfx.pfp_feature_version < 46))) + (adev->gfx.mec_feature_version < 46) || + (adev->gfx.pfp_fw_version < 0x000000b7) || + (adev->gfx.pfp_feature_version < 46))) DRM_WARN_ONCE("CP firmware version too old, please update!"); - switch (adev->ip_versions[GC_HWIP][0]) { + switch (amdgpu_ip_version(adev, GC_HWIP, 0)) { case IP_VERSION(9, 0, 1): if ((adev->gfx.me_fw_version >= 0x0000009c) && (adev->gfx.me_feature_version >= 42) && @@ -1200,7 +1202,7 @@ static bool is_raven_kicker(struct amdgpu_device *adev) static bool check_if_enlarge_doorbell_range(struct amdgpu_device *adev) { - if ((adev->ip_versions[GC_HWIP][0] == IP_VERSION(9, 3, 0)) && + if ((amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(9, 3, 0)) && (adev->gfx.me_fw_version >= 0x000000a5) && (adev->gfx.me_feature_version >= 52)) return true; @@ -1213,7 +1215,7 @@ static void gfx_v9_0_check_if_need_gfxoff(struct amdgpu_device *adev) if (gfx_v9_0_should_disable_gfxoff(adev->pdev)) adev->pm.pp_feature &= ~PP_GFXOFF_MASK; - switch (adev->ip_versions[GC_HWIP][0]) { + switch (amdgpu_ip_version(adev, GC_HWIP, 0)) { case IP_VERSION(9, 0, 1): case IP_VERSION(9, 2, 1): case IP_VERSION(9, 4, 0): @@ -1324,9 +1326,9 @@ out: static bool gfx_v9_0_load_mec2_fw_bin_support(struct amdgpu_device *adev) { - if (adev->ip_versions[GC_HWIP][0] == IP_VERSION(9, 4, 2) || - adev->ip_versions[GC_HWIP][0] == IP_VERSION(9, 4, 1) || - adev->ip_versions[GC_HWIP][0] == IP_VERSION(9, 3, 0)) + if (amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(9, 4, 2) || + amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(9, 4, 1) || + amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(9, 3, 0)) return false; return true; @@ -1483,7 +1485,7 @@ static void gfx_v9_0_init_always_on_cu_mask(struct amdgpu_device *adev) if (adev->flags & AMD_IS_APU) always_on_cu_num = 4; - else if (adev->ip_versions[GC_HWIP][0] == IP_VERSION(9, 2, 1)) + else if (amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(9, 2, 1)) always_on_cu_num = 8; else always_on_cu_num = 12; @@ -1497,7 +1499,7 @@ static void gfx_v9_0_init_always_on_cu_mask(struct amdgpu_device *adev) amdgpu_gfx_select_se_sh(adev, i, j, 0xffffffff, 0); for (k = 0; k < adev->gfx.config.max_cu_per_sh; k ++) { - if (cu_info->bitmap[i][j] & mask) { + if (cu_info->bitmap[0][i][j] & mask) { if (counter == pg_always_on_cu_num) WREG32_SOC15(GC, 0, mmRLC_PG_ALWAYS_ON_CU_MASK, cu_bitmap); if (counter < always_on_cu_num) @@ -1632,7 +1634,7 @@ static void gfx_v9_0_init_rlcg_reg_access_ctrl(struct amdgpu_device *adev) { struct amdgpu_rlcg_reg_access_ctrl *reg_access_ctrl; - reg_access_ctrl = &adev->gfx.rlc.reg_access_ctrl; + reg_access_ctrl = &adev->gfx.rlc.reg_access_ctrl[0]; reg_access_ctrl->scratch_reg0 = SOC15_REG_OFFSET(GC, 0, mmSCRATCH_REG0); reg_access_ctrl->scratch_reg1 = SOC15_REG_OFFSET(GC, 0, mmSCRATCH_REG1); reg_access_ctrl->scratch_reg2 = SOC15_REG_OFFSET(GC, 0, mmSCRATCH_REG2); @@ -1667,22 +1669,6 @@ static int gfx_v9_0_rlc_init(struct amdgpu_device *adev) return r; } - switch (adev->ip_versions[GC_HWIP][0]) { - case IP_VERSION(9, 2, 2): - case IP_VERSION(9, 1, 0): - gfx_v9_0_init_lbpw(adev); - break; - case IP_VERSION(9, 4, 0): - gfx_v9_4_init_lbpw(adev); - break; - default: - break; - } - - /* init spm vmid with 0xf */ - if (adev->gfx.rlc.funcs->update_spm_vmid) - adev->gfx.rlc.funcs->update_spm_vmid(adev, 0xf); - return 0; } @@ -1850,7 +1836,7 @@ static int gfx_v9_0_gpu_early_init(struct amdgpu_device *adev) u32 gb_addr_config; int err; - switch (adev->ip_versions[GC_HWIP][0]) { + switch (amdgpu_ip_version(adev, GC_HWIP, 0)) { case IP_VERSION(9, 0, 1): adev->gfx.config.max_hw_contexts = 8; adev->gfx.config.sc_prim_fifo_size_frontend = 0x20; @@ -2016,7 +2002,7 @@ static int gfx_v9_0_sw_init(void *handle) struct amdgpu_device *adev = (struct amdgpu_device *)handle; unsigned int hw_prio; - switch (adev->ip_versions[GC_HWIP][0]) { + switch (amdgpu_ip_version(adev, GC_HWIP, 0)) { case IP_VERSION(9, 0, 1): case IP_VERSION(9, 2, 1): case IP_VERSION(9, 4, 0): @@ -2377,7 +2363,7 @@ static void gfx_v9_0_init_sq_config(struct amdgpu_device *adev) { uint32_t tmp; - switch (adev->ip_versions[GC_HWIP][0]) { + switch (amdgpu_ip_version(adev, GC_HWIP, 0)) { case IP_VERSION(9, 4, 1): tmp = RREG32_SOC15(GC, 0, mmSQ_CONFIG); tmp = REG_SET_FIELD(tmp, SQ_CONFIG, DISABLE_BARRIER_WAITCNT, @@ -2714,7 +2700,7 @@ static void gfx_v9_0_init_gfx_power_gating(struct amdgpu_device *adev) /* program GRBM_REG_SAVE_GFX_IDLE_THRESHOLD to 0x55f0 */ data |= (0x55f0 << RLC_AUTO_PG_CTRL__GRBM_REG_SAVE_GFX_IDLE_THRESHOLD__SHIFT); WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_AUTO_PG_CTRL), data); - if (adev->ip_versions[GC_HWIP][0] != IP_VERSION(9, 3, 0)) + if (amdgpu_ip_version(adev, GC_HWIP, 0) != IP_VERSION(9, 3, 0)) pwr_10_0_gfxip_control_over_cgpg(adev, true); } } @@ -2826,7 +2812,8 @@ static void gfx_v9_0_init_pg(struct amdgpu_device *adev) * And it's needed by gfxoff feature. */ if (adev->gfx.rlc.is_rlc_v2_1) { - if (adev->ip_versions[GC_HWIP][0] == IP_VERSION(9, 2, 1) || + if (amdgpu_ip_version(adev, GC_HWIP, 0) == + IP_VERSION(9, 2, 1) || (adev->apu_flags & AMD_APU_IS_RAVEN2)) gfx_v9_1_init_rlc_save_restore_list(adev); gfx_v9_0_enable_save_restore_machine(adev); @@ -2939,15 +2926,17 @@ static int gfx_v9_0_rlc_resume(struct amdgpu_device *adev) return r; } - switch (adev->ip_versions[GC_HWIP][0]) { + switch (amdgpu_ip_version(adev, GC_HWIP, 0)) { case IP_VERSION(9, 2, 2): case IP_VERSION(9, 1, 0): + gfx_v9_0_init_lbpw(adev); if (amdgpu_lbpw == 0) gfx_v9_0_enable_lbpw(adev, false); else gfx_v9_0_enable_lbpw(adev, true); break; case IP_VERSION(9, 4, 0): + gfx_v9_4_init_lbpw(adev); if (amdgpu_lbpw > 0) gfx_v9_0_enable_lbpw(adev, true); else @@ -2957,6 +2946,8 @@ static int gfx_v9_0_rlc_resume(struct amdgpu_device *adev) break; } + gfx_v9_0_update_spm_vmid_internal(adev, 0xf); + adev->gfx.rlc.funcs->start(adev); return 0; @@ -3723,8 +3714,8 @@ static void gfx_v9_0_init_tcp_config(struct amdgpu_device *adev) { u32 tmp; - if (adev->ip_versions[GC_HWIP][0] != IP_VERSION(9, 4, 1) && - adev->ip_versions[GC_HWIP][0] != IP_VERSION(9, 4, 2)) + if (amdgpu_ip_version(adev, GC_HWIP, 0) != IP_VERSION(9, 4, 1) && + amdgpu_ip_version(adev, GC_HWIP, 0) != IP_VERSION(9, 4, 2)) return; tmp = RREG32_SOC15(GC, 0, mmTCP_ADDR_CONFIG); @@ -3764,7 +3755,7 @@ static int gfx_v9_0_hw_init(void *handle) if (r) return r; - if (adev->ip_versions[GC_HWIP][0] == IP_VERSION(9, 4, 2)) + if (amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(9, 4, 2)) gfx_v9_4_2_set_power_brake_sequence(adev); return r; @@ -3812,7 +3803,7 @@ static int gfx_v9_0_hw_fini(void *handle) /* Skip stopping RLC with A+A reset or when RLC controls GFX clock */ if ((adev->gmc.xgmi.connected_to_cpu && amdgpu_in_reset(adev)) || - (adev->ip_versions[GC_HWIP][0] >= IP_VERSION(9, 4, 2))) { + (amdgpu_ip_version(adev, GC_HWIP, 0) >= IP_VERSION(9, 4, 2))) { dev_dbg(adev->dev, "Skipping RLC halt\n"); return 0; } @@ -3996,7 +3987,7 @@ static uint64_t gfx_v9_0_get_gpu_clock_counter(struct amdgpu_device *adev) { uint64_t clock, clock_lo, clock_hi, hi_check; - switch (adev->ip_versions[GC_HWIP][0]) { + switch (amdgpu_ip_version(adev, GC_HWIP, 0)) { case IP_VERSION(9, 3, 0): preempt_disable(); clock_hi = RREG32_SOC15_NO_KIQ(SMUIO, 0, mmGOLDEN_TSC_COUNT_UPPER_Renoir); @@ -4015,7 +4006,9 @@ static uint64_t gfx_v9_0_get_gpu_clock_counter(struct amdgpu_device *adev) default: amdgpu_gfx_off_ctrl(adev, false); mutex_lock(&adev->gfx.gpu_clock_mutex); - if (adev->ip_versions[GC_HWIP][0] == IP_VERSION(9, 0, 1) && amdgpu_sriov_runtime(adev)) { + if (amdgpu_ip_version(adev, GC_HWIP, 0) == + IP_VERSION(9, 0, 1) && + amdgpu_sriov_runtime(adev)) { clock = gfx_v9_0_kiq_read_clock(adev); } else { WREG32_SOC15(GC, 0, mmRLC_CAPTURE_GPU_CLOCK_COUNT, 1); @@ -4367,7 +4360,7 @@ static int gfx_v9_0_do_edc_gpr_workarounds(struct amdgpu_device *adev) if (!ring->sched.ready) return 0; - if (adev->ip_versions[GC_HWIP][0] == IP_VERSION(9, 4, 1)) { + if (amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(9, 4, 1)) { vgpr_init_shader_ptr = vgpr_init_compute_shader_arcturus; vgpr_init_shader_size = sizeof(vgpr_init_compute_shader_arcturus); vgpr_init_regs_ptr = vgpr_init_regs_arcturus; @@ -4519,8 +4512,8 @@ static int gfx_v9_0_early_init(void *handle) adev->gfx.funcs = &gfx_v9_0_gfx_funcs; - if (adev->ip_versions[GC_HWIP][0] == IP_VERSION(9, 4, 1) || - adev->ip_versions[GC_HWIP][0] == IP_VERSION(9, 4, 2)) + if (amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(9, 4, 1) || + amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(9, 4, 2)) adev->gfx.num_gfx_rings = 0; else adev->gfx.num_gfx_rings = GFX9_NUM_GFX_RINGS; @@ -4558,7 +4551,7 @@ static int gfx_v9_0_ecc_late_init(void *handle) } /* requires IBs so do in late init after IB pool is initialized */ - if (adev->ip_versions[GC_HWIP][0] == IP_VERSION(9, 4, 2)) + if (amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(9, 4, 2)) r = gfx_v9_4_2_do_edc_gpr_workarounds(adev); else r = gfx_v9_0_do_edc_gpr_workarounds(adev); @@ -4590,7 +4583,7 @@ static int gfx_v9_0_late_init(void *handle) if (r) return r; - if (adev->ip_versions[GC_HWIP][0] == IP_VERSION(9, 4, 2)) + if (amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(9, 4, 2)) gfx_v9_4_2_debug_trap_config_init(adev, adev->vm_manager.first_kfd_vmid, AMDGPU_NUM_VMID); else @@ -4686,7 +4679,7 @@ static void gfx_v9_0_update_medium_grain_clock_gating(struct amdgpu_device *adev /* 1 - RLC_CGTT_MGCG_OVERRIDE */ def = data = RREG32_SOC15(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE); - if (adev->ip_versions[GC_HWIP][0] != IP_VERSION(9, 2, 1)) + if (amdgpu_ip_version(adev, GC_HWIP, 0) != IP_VERSION(9, 2, 1)) data &= ~RLC_CGTT_MGCG_OVERRIDE__CPF_CGTT_SCLK_OVERRIDE_MASK; data &= ~(RLC_CGTT_MGCG_OVERRIDE__GRBM_CGTT_SCLK_OVERRIDE_MASK | @@ -4720,7 +4713,7 @@ static void gfx_v9_0_update_medium_grain_clock_gating(struct amdgpu_device *adev /* 1 - MGCG_OVERRIDE */ def = data = RREG32_SOC15(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE); - if (adev->ip_versions[GC_HWIP][0] != IP_VERSION(9, 2, 1)) + if (amdgpu_ip_version(adev, GC_HWIP, 0) != IP_VERSION(9, 2, 1)) data |= RLC_CGTT_MGCG_OVERRIDE__CPF_CGTT_SCLK_OVERRIDE_MASK; data |= (RLC_CGTT_MGCG_OVERRIDE__RLC_CGTT_SCLK_OVERRIDE_MASK | @@ -4826,7 +4819,7 @@ static void gfx_v9_0_update_coarse_grain_clock_gating(struct amdgpu_device *adev /* enable cgcg FSM(0x0000363F) */ def = RREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL); - if (adev->ip_versions[GC_HWIP][0] == IP_VERSION(9, 4, 1)) + if (amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(9, 4, 1)) data = (0x2000 << RLC_CGCG_CGLS_CTRL__CGCG_GFX_IDLE_THRESHOLD__SHIFT) | RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK; else @@ -4881,12 +4874,11 @@ static int gfx_v9_0_update_gfx_clock_gating(struct amdgpu_device *adev, return 0; } -static void gfx_v9_0_update_spm_vmid(struct amdgpu_device *adev, unsigned vmid) +static void gfx_v9_0_update_spm_vmid_internal(struct amdgpu_device *adev, + unsigned int vmid) { u32 reg, data; - amdgpu_gfx_off_ctrl(adev, false); - reg = SOC15_REG_OFFSET(GC, 0, mmRLC_SPM_MC_CNTL); if (amdgpu_sriov_is_pp_one_vf(adev)) data = RREG32_NO_KIQ(reg); @@ -4900,6 +4892,13 @@ static void gfx_v9_0_update_spm_vmid(struct amdgpu_device *adev, unsigned vmid) WREG32_SOC15_NO_KIQ(GC, 0, mmRLC_SPM_MC_CNTL, data); else WREG32_SOC15(GC, 0, mmRLC_SPM_MC_CNTL, data); +} + +static void gfx_v9_0_update_spm_vmid(struct amdgpu_device *adev, unsigned int vmid) +{ + amdgpu_gfx_off_ctrl(adev, false); + + gfx_v9_0_update_spm_vmid_internal(adev, vmid); amdgpu_gfx_off_ctrl(adev, true); } @@ -4955,7 +4954,7 @@ static int gfx_v9_0_set_powergating_state(void *handle, struct amdgpu_device *adev = (struct amdgpu_device *)handle; bool enable = (state == AMD_PG_STATE_GATE); - switch (adev->ip_versions[GC_HWIP][0]) { + switch (amdgpu_ip_version(adev, GC_HWIP, 0)) { case IP_VERSION(9, 2, 2): case IP_VERSION(9, 1, 0): case IP_VERSION(9, 3, 0): @@ -5002,7 +5001,7 @@ static int gfx_v9_0_set_clockgating_state(void *handle, if (amdgpu_sriov_vf(adev)) return 0; - switch (adev->ip_versions[GC_HWIP][0]) { + switch (amdgpu_ip_version(adev, GC_HWIP, 0)) { case IP_VERSION(9, 0, 1): case IP_VERSION(9, 2, 1): case IP_VERSION(9, 4, 0): @@ -5052,7 +5051,7 @@ static void gfx_v9_0_get_clockgating_state(void *handle, u64 *flags) if (data & CP_MEM_SLP_CNTL__CP_MEM_LS_EN_MASK) *flags |= AMD_CG_SUPPORT_GFX_CP_LS | AMD_CG_SUPPORT_GFX_MGLS; - if (adev->ip_versions[GC_HWIP][0] != IP_VERSION(9, 4, 1)) { + if (amdgpu_ip_version(adev, GC_HWIP, 0) != IP_VERSION(9, 4, 1)) { /* AMD_CG_SUPPORT_GFX_3D_CGCG */ data = RREG32_KIQ(SOC15_REG_OFFSET(GC, 0, mmRLC_CGCG_CGLS_CTRL_3D)); if (data & RLC_CGCG_CGLS_CTRL_3D__CGCG_EN_MASK) @@ -5230,6 +5229,9 @@ static void gfx_v9_0_ring_patch_de_meta(struct amdgpu_ring *ring, de_payload_cpu_addr = adev->virt.csa_cpu_addr + payload_offset; } + ((struct v9_de_ib_state *)de_payload_cpu_addr)->ib_completion_status = + IB_COMPLETION_STATUS_PREEMPTED; + if (offset + (payload_size >> 2) <= ring->buf_mask + 1) { memcpy((void *)&ring->ring[offset], de_payload_cpu_addr, payload_size); } else { @@ -6455,7 +6457,7 @@ static int gfx_v9_0_ras_error_inject(struct amdgpu_device *adev, return ret; } -static const char *vml2_mems[] = { +static const char * const vml2_mems[] = { "UTC_VML2_BANK_CACHE_0_BIGK_MEM0", "UTC_VML2_BANK_CACHE_0_BIGK_MEM1", "UTC_VML2_BANK_CACHE_0_4K_MEM0", @@ -6474,7 +6476,7 @@ static const char *vml2_mems[] = { "UTC_VML2_BANK_CACHE_3_4K_MEM1", }; -static const char *vml2_walker_mems[] = { +static const char * const vml2_walker_mems[] = { "UTC_VML2_CACHE_PDE0_MEM0", "UTC_VML2_CACHE_PDE0_MEM1", "UTC_VML2_CACHE_PDE1_MEM0", @@ -6484,7 +6486,7 @@ static const char *vml2_walker_mems[] = { "UTC_VML2_RDIF_LOG_FIFO", }; -static const char *atc_l2_cache_2m_mems[] = { +static const char * const atc_l2_cache_2m_mems[] = { "UTC_ATCL2_CACHE_2M_BANK0_WAY0_MEM", "UTC_ATCL2_CACHE_2M_BANK0_WAY1_MEM", "UTC_ATCL2_CACHE_2M_BANK1_WAY0_MEM", @@ -7088,7 +7090,7 @@ static void gfx_v9_0_set_irq_funcs(struct amdgpu_device *adev) static void gfx_v9_0_set_rlc_funcs(struct amdgpu_device *adev) { - switch (adev->ip_versions[GC_HWIP][0]) { + switch (amdgpu_ip_version(adev, GC_HWIP, 0)) { case IP_VERSION(9, 0, 1): case IP_VERSION(9, 2, 1): case IP_VERSION(9, 4, 0): @@ -7107,7 +7109,7 @@ static void gfx_v9_0_set_rlc_funcs(struct amdgpu_device *adev) static void gfx_v9_0_set_gds_init(struct amdgpu_device *adev) { /* init asci gds info */ - switch (adev->ip_versions[GC_HWIP][0]) { + switch (amdgpu_ip_version(adev, GC_HWIP, 0)) { case IP_VERSION(9, 0, 1): case IP_VERSION(9, 2, 1): case IP_VERSION(9, 4, 0): @@ -7129,7 +7131,7 @@ static void gfx_v9_0_set_gds_init(struct amdgpu_device *adev) break; } - switch (adev->ip_versions[GC_HWIP][0]) { + switch (amdgpu_ip_version(adev, GC_HWIP, 0)) { case IP_VERSION(9, 0, 1): case IP_VERSION(9, 4, 0): adev->gds.gds_compute_max_wave_id = 0x7ff; @@ -7234,7 +7236,7 @@ static int gfx_v9_0_get_cu_info(struct amdgpu_device *adev, * SE6,SH0 --> bitmap[2][1] * SE7,SH0 --> bitmap[3][1] */ - cu_info->bitmap[i % 4][j + i / 4] = bitmap; + cu_info->bitmap[0][i % 4][j + i / 4] = bitmap; for (k = 0; k < adev->gfx.config.max_cu_per_sh; k ++) { if (bitmap & mask) { diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_2.c b/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_2.c index 63f6843a069e..065b2bd5f5a6 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_2.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_2.c @@ -746,8 +746,6 @@ void gfx_v9_4_2_init_golden_registers(struct amdgpu_device *adev, die_id); break; } - - return; } void gfx_v9_4_2_debug_trap_config_init(struct amdgpu_device *adev, @@ -1548,8 +1546,8 @@ static void gfx_v9_4_2_log_utc_edc_count(struct amdgpu_device *adev, uint32_t ded_cnt) { uint32_t bank, way, mem; - static const char *vml2_way_str[] = { "BIGK", "4K" }; - static const char *utcl2_rounter_str[] = { "VMC", "APT" }; + static const char * const vml2_way_str[] = { "BIGK", "4K" }; + static const char * const utcl2_rounter_str[] = { "VMC", "APT" }; mem = instance % blk->num_mem_blocks; way = (instance / blk->num_mem_blocks) % blk->num_ways; diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c b/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c index 4f883b94f98e..4a09cc0d8ce0 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c @@ -196,16 +196,16 @@ static void gfx_v9_4_3_init_golden_registers(struct amdgpu_device *adev) num_xcc = NUM_XCC(adev->gfx.xcc_mask); for (i = 0; i < num_xcc; i++) { dev_inst = GET_INST(GC, i); - if (dev_inst >= 2) - WREG32_SOC15(GC, dev_inst, regGRBM_MCM_ADDR, 0x4); + WREG32_SOC15(GC, dev_inst, regGB_ADDR_CONFIG, + GOLDEN_GB_ADDR_CONFIG); /* Golden settings applied by driver for ASIC with rev_id 0 */ if (adev->rev_id == 0) { - WREG32_SOC15(GC, dev_inst, regGB_ADDR_CONFIG, - GOLDEN_GB_ADDR_CONFIG); - WREG32_FIELD15_PREREG(GC, dev_inst, TCP_UTCL1_CNTL1, REDUCE_FIFO_DEPTH_BY_2, 2); + } else { + WREG32_FIELD15_PREREG(GC, dev_inst, TCP_UTCL1_CNTL2, + SPARE, 0x1); } } } @@ -256,6 +256,7 @@ static int gfx_v9_4_3_ring_test_ring(struct amdgpu_ring *ring) xcc_offset = SOC15_REG_OFFSET(GC, 0, regSCRATCH_REG0); scratch_reg0_offset = SOC15_REG_OFFSET(GC, GET_INST(GC, ring->xcc_id), regSCRATCH_REG0); WREG32(scratch_reg0_offset, 0xCAFEDEAD); + tmp = RREG32(scratch_reg0_offset); r = amdgpu_ring_alloc(ring, 3); if (r) @@ -296,8 +297,8 @@ static int gfx_v9_4_3_ring_test_ib(struct amdgpu_ring *ring, long timeout) gpu_addr = adev->wb.gpu_addr + (index * 4); adev->wb.wb[index] = cpu_to_le32(0xCAFEDEAD); memset(&ib, 0, sizeof(ib)); - r = amdgpu_ib_get(adev, NULL, 16, - AMDGPU_IB_POOL_DIRECT, &ib); + + r = amdgpu_ib_get(adev, NULL, 20, AMDGPU_IB_POOL_DIRECT, &ib); if (r) goto err1; @@ -340,13 +341,11 @@ static uint64_t gfx_v9_4_3_get_gpu_clock_counter(struct amdgpu_device *adev) { uint64_t clock; - amdgpu_gfx_off_ctrl(adev, false); mutex_lock(&adev->gfx.gpu_clock_mutex); WREG32_SOC15(GC, GET_INST(GC, 0), regRLC_CAPTURE_GPU_CLOCK_COUNT, 1); clock = (uint64_t)RREG32_SOC15(GC, GET_INST(GC, 0), regRLC_GPU_CLOCK_COUNT_LSB) | ((uint64_t)RREG32_SOC15(GC, GET_INST(GC, 0), regRLC_GPU_CLOCK_COUNT_MSB) << 32ULL); mutex_unlock(&adev->gfx.gpu_clock_mutex); - amdgpu_gfx_off_ctrl(adev, true); return clock; } @@ -683,7 +682,7 @@ static int gfx_v9_4_3_gpu_early_init(struct amdgpu_device *adev) adev->gfx.funcs = &gfx_v9_4_3_gfx_funcs; adev->gfx.ras = &gfx_v9_4_3_ras; - switch (adev->ip_versions[GC_HWIP][0]) { + switch (amdgpu_ip_version(adev, GC_HWIP, 0)) { case IP_VERSION(9, 4, 3): adev->gfx.config.max_hw_contexts = 8; adev->gfx.config.sc_prim_fifo_size_frontend = 0x20; @@ -864,11 +863,15 @@ static int gfx_v9_4_3_sw_init(void *handle) if (r) return r; - r = amdgpu_gfx_sysfs_init(adev); + r = amdgpu_gfx_ras_sw_init(adev); if (r) return r; - return amdgpu_gfx_ras_sw_init(adev); + + if (!amdgpu_sriov_vf(adev)) + r = amdgpu_gfx_sysfs_init(adev); + + return r; } static int gfx_v9_4_3_sw_fini(void *handle) @@ -889,7 +892,8 @@ static int gfx_v9_4_3_sw_fini(void *handle) gfx_v9_4_3_mec_fini(adev); amdgpu_bo_unref(&adev->gfx.rlc.clear_state_obj); gfx_v9_4_3_free_microcode(adev); - amdgpu_gfx_sysfs_fini(adev); + if (!amdgpu_sriov_vf(adev)) + amdgpu_gfx_sysfs_fini(adev); return 0; } @@ -901,6 +905,7 @@ static void gfx_v9_4_3_xcc_init_compute_vmid(struct amdgpu_device *adev, int i; uint32_t sh_mem_config; uint32_t sh_mem_bases; + uint32_t data; /* * Configure apertures: @@ -920,6 +925,11 @@ static void gfx_v9_4_3_xcc_init_compute_vmid(struct amdgpu_device *adev, /* CP and shaders */ WREG32_SOC15_RLC(GC, GET_INST(GC, xcc_id), regSH_MEM_CONFIG, sh_mem_config); WREG32_SOC15_RLC(GC, GET_INST(GC, xcc_id), regSH_MEM_BASES, sh_mem_bases); + + /* Enable trap for each kfd vmid. */ + data = RREG32_SOC15(GC, GET_INST(GC, xcc_id), regSPI_GDBG_PER_VMID_CNTL); + data = REG_SET_FIELD(data, SPI_GDBG_PER_VMID_CNTL, TRAP_EN, 1); + WREG32_SOC15_RLC(GC, GET_INST(GC, xcc_id), regSPI_GDBG_PER_VMID_CNTL, data); } soc15_grbm_select(adev, 0, 0, 0, 0, GET_INST(GC, xcc_id)); mutex_unlock(&adev->srbm_mutex); @@ -1038,32 +1048,6 @@ static void gfx_v9_4_3_xcc_disable_gpa_mode(struct amdgpu_device *adev, int xcc_ WREG32_SOC15(GC, GET_INST(GC, xcc_id), regCPC_PSP_DEBUG, data); } -static void gfx_v9_4_3_xcc_program_xcc_id(struct amdgpu_device *adev, - int xcc_id) -{ - uint32_t tmp = 0; - int num_xcc; - - num_xcc = NUM_XCC(adev->gfx.xcc_mask); - switch (num_xcc) { - /* directly config VIRTUAL_XCC_ID to 0 for 1-XCC */ - case 1: - WREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_HYP_XCP_CTL, 0x8); - break; - case 2: - case 4: - case 6: - case 8: - tmp = (xcc_id % adev->gfx.num_xcc_per_xcp) << REG_FIELD_SHIFT(CP_HYP_XCP_CTL, VIRTUAL_XCC_ID); - tmp = tmp | (adev->gfx.num_xcc_per_xcp << REG_FIELD_SHIFT(CP_HYP_XCP_CTL, NUM_XCC_IN_XCP)); - WREG32_SOC15(GC, GET_INST(GC, xcc_id), regCP_HYP_XCP_CTL, tmp); - - break; - default: - break; - } -} - static bool gfx_v9_4_3_is_rlc_enabled(struct amdgpu_device *adev) { uint32_t rlc_setting; @@ -1102,6 +1086,25 @@ static void gfx_v9_4_3_xcc_unset_safe_mode(struct amdgpu_device *adev, WREG32_SOC15(GC, GET_INST(GC, xcc_id), regRLC_SAFE_MODE, data); } +static void gfx_v9_4_3_init_rlcg_reg_access_ctrl(struct amdgpu_device *adev) +{ + int xcc_id, num_xcc; + struct amdgpu_rlcg_reg_access_ctrl *reg_access_ctrl; + + num_xcc = NUM_XCC(adev->gfx.xcc_mask); + for (xcc_id = 0; xcc_id < num_xcc; xcc_id++) { + reg_access_ctrl = &adev->gfx.rlc.reg_access_ctrl[GET_INST(GC, xcc_id)]; + reg_access_ctrl->scratch_reg0 = SOC15_REG_OFFSET(GC, GET_INST(GC, xcc_id), regSCRATCH_REG0); + reg_access_ctrl->scratch_reg1 = SOC15_REG_OFFSET(GC, GET_INST(GC, xcc_id), regSCRATCH_REG1); + reg_access_ctrl->scratch_reg2 = SOC15_REG_OFFSET(GC, GET_INST(GC, xcc_id), regSCRATCH_REG2); + reg_access_ctrl->scratch_reg3 = SOC15_REG_OFFSET(GC, GET_INST(GC, xcc_id), regSCRATCH_REG3); + reg_access_ctrl->grbm_cntl = SOC15_REG_OFFSET(GC, GET_INST(GC, xcc_id), regGRBM_GFX_CNTL); + reg_access_ctrl->grbm_idx = SOC15_REG_OFFSET(GC, GET_INST(GC, xcc_id), regGRBM_GFX_INDEX); + reg_access_ctrl->spare_int = SOC15_REG_OFFSET(GC, GET_INST(GC, xcc_id), regRLC_SPARE_INT); + } + adev->gfx.rlc.rlcg_reg_access_supported = true; +} + static int gfx_v9_4_3_rlc_init(struct amdgpu_device *adev) { /* init spm vmid with 0xf */ @@ -1921,9 +1924,6 @@ static int gfx_v9_4_3_xcc_cp_resume(struct amdgpu_device *adev, int xcc_id) return r; } - /* set the virtual and physical id based on partition_mode */ - gfx_v9_4_3_xcc_program_xcc_id(adev, xcc_id); - r = gfx_v9_4_3_xcc_kiq_resume(adev, xcc_id); if (r) return r; @@ -2182,6 +2182,9 @@ static int gfx_v9_4_3_early_init(void *handle) gfx_v9_4_3_set_gds_init(adev); gfx_v9_4_3_set_rlc_funcs(adev); + /* init rlcg reg access ctrl */ + gfx_v9_4_3_init_rlcg_reg_access_ctrl(adev); + return gfx_v9_4_3_init_microcode(adev); } @@ -2198,6 +2201,10 @@ static int gfx_v9_4_3_late_init(void *handle) if (r) return r; + if (adev->gfx.ras && + adev->gfx.ras->enable_watchdog_timer) + adev->gfx.ras->enable_watchdog_timer(adev); + return 0; } @@ -2221,15 +2228,6 @@ static void gfx_v9_4_3_xcc_update_sram_fgcg(struct amdgpu_device *adev, WREG32_SOC15(GC, GET_INST(GC, xcc_id), regRLC_CGTT_MGCG_OVERRIDE, data); - def = data = RREG32_SOC15(GC, GET_INST(GC, xcc_id), regRLC_CLK_CNTL); - - if (enable) - data &= ~RLC_CLK_CNTL__RLC_SRAM_CLK_GATER_OVERRIDE_MASK; - else - data |= RLC_CLK_CNTL__RLC_SRAM_CLK_GATER_OVERRIDE_MASK; - - if (def != data) - WREG32_SOC15(GC, GET_INST(GC, xcc_id), regRLC_CLK_CNTL, data); } static void gfx_v9_4_3_xcc_update_repeater_fgcg(struct amdgpu_device *adev, @@ -2433,7 +2431,7 @@ static int gfx_v9_4_3_set_clockgating_state(void *handle, return 0; num_xcc = NUM_XCC(adev->gfx.xcc_mask); - switch (adev->ip_versions[GC_HWIP][0]) { + switch (amdgpu_ip_version(adev, GC_HWIP, 0)) { case IP_VERSION(9, 4, 3): for (i = 0; i < num_xcc; i++) gfx_v9_4_3_xcc_update_gfx_clock_gating( @@ -2741,16 +2739,16 @@ static void gfx_v9_4_3_xcc_set_compute_eop_interrupt_state( switch (state) { case AMDGPU_IRQ_STATE_DISABLE: - mec_int_cntl = RREG32(mec_int_cntl_reg); + mec_int_cntl = RREG32_XCC(mec_int_cntl_reg, xcc_id); mec_int_cntl = REG_SET_FIELD(mec_int_cntl, CP_ME1_PIPE0_INT_CNTL, TIME_STAMP_INT_ENABLE, 0); - WREG32(mec_int_cntl_reg, mec_int_cntl); + WREG32_XCC(mec_int_cntl_reg, mec_int_cntl, xcc_id); break; case AMDGPU_IRQ_STATE_ENABLE: - mec_int_cntl = RREG32(mec_int_cntl_reg); + mec_int_cntl = RREG32_XCC(mec_int_cntl_reg, xcc_id); mec_int_cntl = REG_SET_FIELD(mec_int_cntl, CP_ME1_PIPE0_INT_CNTL, TIME_STAMP_INT_ENABLE, 1); - WREG32(mec_int_cntl_reg, mec_int_cntl); + WREG32_XCC(mec_int_cntl_reg, mec_int_cntl, xcc_id); break; default: break; @@ -3656,19 +3654,19 @@ static const struct amdgpu_gfx_ras_reg_entry gfx_v9_4_3_ce_reg_list[] = { AMDGPU_GFX_GC_CANE_MEM, 1}, {{AMDGPU_RAS_REG_ENTRY(GC, 0, regSPI_CE_ERR_STATUS_LO, regSPI_CE_ERR_STATUS_HI), 1, (AMDGPU_RAS_ERR_INFO_VALID | AMDGPU_RAS_ERR_STATUS_VALID), "SPI"}, - AMDGPU_GFX_SPI_MEM, 8}, + AMDGPU_GFX_SPI_MEM, 1}, {{AMDGPU_RAS_REG_ENTRY(GC, 0, regSP0_CE_ERR_STATUS_LO, regSP0_CE_ERR_STATUS_HI), 10, (AMDGPU_RAS_ERR_INFO_VALID | AMDGPU_RAS_ERR_STATUS_VALID), "SP0"}, - AMDGPU_GFX_SP_MEM, 1}, + AMDGPU_GFX_SP_MEM, 4}, {{AMDGPU_RAS_REG_ENTRY(GC, 0, regSP1_CE_ERR_STATUS_LO, regSP1_CE_ERR_STATUS_HI), 10, (AMDGPU_RAS_ERR_INFO_VALID | AMDGPU_RAS_ERR_STATUS_VALID), "SP1"}, - AMDGPU_GFX_SP_MEM, 1}, + AMDGPU_GFX_SP_MEM, 4}, {{AMDGPU_RAS_REG_ENTRY(GC, 0, regSQ_CE_ERR_STATUS_LO, regSQ_CE_ERR_STATUS_HI), 10, (AMDGPU_RAS_ERR_INFO_VALID | AMDGPU_RAS_ERR_STATUS_VALID), "SQ"}, - AMDGPU_GFX_SQ_MEM, 8}, + AMDGPU_GFX_SQ_MEM, 4}, {{AMDGPU_RAS_REG_ENTRY(GC, 0, regSQC_CE_EDC_LO, regSQC_CE_EDC_HI), 5, (AMDGPU_RAS_ERR_INFO_VALID | AMDGPU_RAS_ERR_STATUS_VALID), "SQC"}, - AMDGPU_GFX_SQC_MEM, 8}, + AMDGPU_GFX_SQC_MEM, 4}, {{AMDGPU_RAS_REG_ENTRY(GC, 0, regTCX_CE_ERR_STATUS_LO, regTCX_CE_ERR_STATUS_HI), 2, (AMDGPU_RAS_ERR_INFO_VALID | AMDGPU_RAS_ERR_STATUS_VALID), "TCX"}, AMDGPU_GFX_TCX_MEM, 1}, @@ -3677,22 +3675,22 @@ static const struct amdgpu_gfx_ras_reg_entry gfx_v9_4_3_ce_reg_list[] = { AMDGPU_GFX_TCC_MEM, 1}, {{AMDGPU_RAS_REG_ENTRY(GC, 0, regTA_CE_EDC_LO, regTA_CE_EDC_HI), 10, (AMDGPU_RAS_ERR_INFO_VALID | AMDGPU_RAS_ERR_STATUS_VALID), "TA"}, - AMDGPU_GFX_TA_MEM, 8}, + AMDGPU_GFX_TA_MEM, 4}, {{AMDGPU_RAS_REG_ENTRY(GC, 0, regTCI_CE_EDC_LO_REG, regTCI_CE_EDC_HI_REG), - 31, (AMDGPU_RAS_ERR_INFO_VALID | AMDGPU_RAS_ERR_STATUS_VALID), "TCI"}, + 27, (AMDGPU_RAS_ERR_INFO_VALID | AMDGPU_RAS_ERR_STATUS_VALID), "TCI"}, AMDGPU_GFX_TCI_MEM, 1}, {{AMDGPU_RAS_REG_ENTRY(GC, 0, regTCP_CE_EDC_LO_REG, regTCP_CE_EDC_HI_REG), 10, (AMDGPU_RAS_ERR_INFO_VALID | AMDGPU_RAS_ERR_STATUS_VALID), "TCP"}, - AMDGPU_GFX_TCP_MEM, 8}, + AMDGPU_GFX_TCP_MEM, 4}, {{AMDGPU_RAS_REG_ENTRY(GC, 0, regTD_CE_EDC_LO, regTD_CE_EDC_HI), 10, (AMDGPU_RAS_ERR_INFO_VALID | AMDGPU_RAS_ERR_STATUS_VALID), "TD"}, - AMDGPU_GFX_TD_MEM, 8}, + AMDGPU_GFX_TD_MEM, 4}, {{AMDGPU_RAS_REG_ENTRY(GC, 0, regGCEA_CE_ERR_STATUS_LO, regGCEA_CE_ERR_STATUS_HI), 16, (AMDGPU_RAS_ERR_INFO_VALID | AMDGPU_RAS_ERR_STATUS_VALID), "GCEA"}, AMDGPU_GFX_GCEA_MEM, 1}, {{AMDGPU_RAS_REG_ENTRY(GC, 0, regLDS_CE_ERR_STATUS_LO, regLDS_CE_ERR_STATUS_HI), 10, (AMDGPU_RAS_ERR_INFO_VALID | AMDGPU_RAS_ERR_STATUS_VALID), "LDS"}, - AMDGPU_GFX_LDS_MEM, 1}, + AMDGPU_GFX_LDS_MEM, 4}, }; static const struct amdgpu_gfx_ras_reg_entry gfx_v9_4_3_ue_reg_list[] = { @@ -3716,19 +3714,19 @@ static const struct amdgpu_gfx_ras_reg_entry gfx_v9_4_3_ue_reg_list[] = { AMDGPU_GFX_GC_CANE_MEM, 1}, {{AMDGPU_RAS_REG_ENTRY(GC, 0, regSPI_UE_ERR_STATUS_LO, regSPI_UE_ERR_STATUS_HI), 1, (AMDGPU_RAS_ERR_INFO_VALID | AMDGPU_RAS_ERR_STATUS_VALID), "SPI"}, - AMDGPU_GFX_SPI_MEM, 8}, + AMDGPU_GFX_SPI_MEM, 1}, {{AMDGPU_RAS_REG_ENTRY(GC, 0, regSP0_UE_ERR_STATUS_LO, regSP0_UE_ERR_STATUS_HI), 10, (AMDGPU_RAS_ERR_INFO_VALID | AMDGPU_RAS_ERR_STATUS_VALID), "SP0"}, - AMDGPU_GFX_SP_MEM, 1}, + AMDGPU_GFX_SP_MEM, 4}, {{AMDGPU_RAS_REG_ENTRY(GC, 0, regSP1_UE_ERR_STATUS_LO, regSP1_UE_ERR_STATUS_HI), 10, (AMDGPU_RAS_ERR_INFO_VALID | AMDGPU_RAS_ERR_STATUS_VALID), "SP1"}, - AMDGPU_GFX_SP_MEM, 1}, + AMDGPU_GFX_SP_MEM, 4}, {{AMDGPU_RAS_REG_ENTRY(GC, 0, regSQ_UE_ERR_STATUS_LO, regSQ_UE_ERR_STATUS_HI), 10, (AMDGPU_RAS_ERR_INFO_VALID | AMDGPU_RAS_ERR_STATUS_VALID), "SQ"}, - AMDGPU_GFX_SQ_MEM, 8}, + AMDGPU_GFX_SQ_MEM, 4}, {{AMDGPU_RAS_REG_ENTRY(GC, 0, regSQC_UE_EDC_LO, regSQC_UE_EDC_HI), 5, (AMDGPU_RAS_ERR_INFO_VALID | AMDGPU_RAS_ERR_STATUS_VALID), "SQC"}, - AMDGPU_GFX_SQC_MEM, 8}, + AMDGPU_GFX_SQC_MEM, 4}, {{AMDGPU_RAS_REG_ENTRY(GC, 0, regTCX_UE_ERR_STATUS_LO, regTCX_UE_ERR_STATUS_HI), 2, (AMDGPU_RAS_ERR_INFO_VALID | AMDGPU_RAS_ERR_STATUS_VALID), "TCX"}, AMDGPU_GFX_TCX_MEM, 1}, @@ -3737,16 +3735,16 @@ static const struct amdgpu_gfx_ras_reg_entry gfx_v9_4_3_ue_reg_list[] = { AMDGPU_GFX_TCC_MEM, 1}, {{AMDGPU_RAS_REG_ENTRY(GC, 0, regTA_UE_EDC_LO, regTA_UE_EDC_HI), 10, (AMDGPU_RAS_ERR_INFO_VALID | AMDGPU_RAS_ERR_STATUS_VALID), "TA"}, - AMDGPU_GFX_TA_MEM, 8}, + AMDGPU_GFX_TA_MEM, 4}, {{AMDGPU_RAS_REG_ENTRY(GC, 0, regTCI_UE_EDC_LO_REG, regTCI_UE_EDC_HI_REG), - 31, (AMDGPU_RAS_ERR_INFO_VALID | AMDGPU_RAS_ERR_STATUS_VALID), "TCI"}, + 27, (AMDGPU_RAS_ERR_INFO_VALID | AMDGPU_RAS_ERR_STATUS_VALID), "TCI"}, AMDGPU_GFX_TCI_MEM, 1}, {{AMDGPU_RAS_REG_ENTRY(GC, 0, regTCP_UE_EDC_LO_REG, regTCP_UE_EDC_HI_REG), 10, (AMDGPU_RAS_ERR_INFO_VALID | AMDGPU_RAS_ERR_STATUS_VALID), "TCP"}, - AMDGPU_GFX_TCP_MEM, 8}, + AMDGPU_GFX_TCP_MEM, 4}, {{AMDGPU_RAS_REG_ENTRY(GC, 0, regTD_UE_EDC_LO, regTD_UE_EDC_HI), 10, (AMDGPU_RAS_ERR_INFO_VALID | AMDGPU_RAS_ERR_STATUS_VALID), "TD"}, - AMDGPU_GFX_TD_MEM, 8}, + AMDGPU_GFX_TD_MEM, 4}, {{AMDGPU_RAS_REG_ENTRY(GC, 0, regTCA_UE_ERR_STATUS_LO, regTCA_UE_ERR_STATUS_HI), 2, (AMDGPU_RAS_ERR_INFO_VALID | AMDGPU_RAS_ERR_STATUS_VALID), "TCA"}, AMDGPU_GFX_TCA_MEM, 1}, @@ -3755,11 +3753,7 @@ static const struct amdgpu_gfx_ras_reg_entry gfx_v9_4_3_ue_reg_list[] = { AMDGPU_GFX_GCEA_MEM, 1}, {{AMDGPU_RAS_REG_ENTRY(GC, 0, regLDS_UE_ERR_STATUS_LO, regLDS_UE_ERR_STATUS_HI), 10, (AMDGPU_RAS_ERR_INFO_VALID | AMDGPU_RAS_ERR_STATUS_VALID), "LDS"}, - AMDGPU_GFX_LDS_MEM, 1}, -}; - -static const struct soc15_reg_entry gfx_v9_4_3_ea_err_status_regs = { - SOC15_REG_ENTRY(GC, 0, regGCEA_ERR_STATUS), 0, 1, 16 + AMDGPU_GFX_LDS_MEM, 4}, }; static void gfx_v9_4_3_inst_query_ras_err_count(struct amdgpu_device *adev, @@ -3769,6 +3763,12 @@ static void gfx_v9_4_3_inst_query_ras_err_count(struct amdgpu_device *adev, unsigned long ce_count = 0, ue_count = 0; uint32_t i, j, k; + /* NOTE: convert xcc_id to physical XCD ID (XCD0 or XCD1) */ + struct amdgpu_smuio_mcm_config_info mcm_info = { + .socket_id = adev->smuio.funcs->get_socket_id(adev), + .die_id = xcc_id & 0x01 ? 1 : 0, + }; + mutex_lock(&adev->grbm_idx_mutex); for (i = 0; i < ARRAY_SIZE(gfx_v9_4_3_ce_reg_list); i++) { @@ -3800,6 +3800,27 @@ static void gfx_v9_4_3_inst_query_ras_err_count(struct amdgpu_device *adev, } } + /* handle extra register entries of UE */ + for (; i < ARRAY_SIZE(gfx_v9_4_3_ue_reg_list); i++) { + for (j = 0; j < gfx_v9_4_3_ue_reg_list[i].se_num; j++) { + for (k = 0; k < gfx_v9_4_3_ue_reg_list[i].reg_entry.reg_inst; k++) { + /* no need to select if instance number is 1 */ + if (gfx_v9_4_3_ue_reg_list[i].se_num > 1 || + gfx_v9_4_3_ue_reg_list[i].reg_entry.reg_inst > 1) + gfx_v9_4_3_xcc_select_se_sh(adev, j, 0, k, xcc_id); + + amdgpu_ras_inst_query_ras_error_count(adev, + &(gfx_v9_4_3_ue_reg_list[i].reg_entry), + 1, + gfx_v9_4_3_ras_mem_list_array[gfx_v9_4_3_ue_reg_list[i].mem_id_type].mem_id_ent, + gfx_v9_4_3_ras_mem_list_array[gfx_v9_4_3_ue_reg_list[i].mem_id_type].size, + GET_INST(GC, xcc_id), + AMDGPU_RAS_ERROR__MULTI_UNCORRECTABLE, + &ue_count); + } + } + } + gfx_v9_4_3_xcc_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff, xcc_id); mutex_unlock(&adev->grbm_idx_mutex); @@ -3807,8 +3828,8 @@ static void gfx_v9_4_3_inst_query_ras_err_count(struct amdgpu_device *adev, /* the caller should make sure initialize value of * err_data->ue_count and err_data->ce_count */ - err_data->ce_count += ce_count; - err_data->ue_count += ue_count; + amdgpu_ras_error_statistic_ue_count(err_data, &mcm_info, ue_count); + amdgpu_ras_error_statistic_ce_count(err_data, &mcm_info, ce_count); } static void gfx_v9_4_3_inst_reset_ras_err_count(struct amdgpu_device *adev, @@ -3839,36 +3860,20 @@ static void gfx_v9_4_3_inst_reset_ras_err_count(struct amdgpu_device *adev, } } - gfx_v9_4_3_xcc_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff, - xcc_id); - mutex_unlock(&adev->grbm_idx_mutex); -} - -static void gfx_v9_4_3_inst_query_ea_err_status(struct amdgpu_device *adev, - int xcc_id) -{ - uint32_t i, j; - uint32_t reg_value; - - mutex_lock(&adev->grbm_idx_mutex); + /* handle extra register entries of UE */ + for (; i < ARRAY_SIZE(gfx_v9_4_3_ue_reg_list); i++) { + for (j = 0; j < gfx_v9_4_3_ue_reg_list[i].se_num; j++) { + for (k = 0; k < gfx_v9_4_3_ue_reg_list[i].reg_entry.reg_inst; k++) { + /* no need to select if instance number is 1 */ + if (gfx_v9_4_3_ue_reg_list[i].se_num > 1 || + gfx_v9_4_3_ue_reg_list[i].reg_entry.reg_inst > 1) + gfx_v9_4_3_xcc_select_se_sh(adev, j, 0, k, xcc_id); - for (i = 0; i < gfx_v9_4_3_ea_err_status_regs.se_num; i++) { - for (j = 0; j < gfx_v9_4_3_ea_err_status_regs.instance; j++) { - gfx_v9_4_3_xcc_select_se_sh(adev, i, 0, j, xcc_id); - reg_value = RREG32_SOC15(GC, GET_INST(GC, xcc_id), - regGCEA_ERR_STATUS); - if (REG_GET_FIELD(reg_value, GCEA_ERR_STATUS, SDP_RDRSP_STATUS) || - REG_GET_FIELD(reg_value, GCEA_ERR_STATUS, SDP_WRRSP_STATUS) || - REG_GET_FIELD(reg_value, GCEA_ERR_STATUS, SDP_RDRSP_DATAPARITY_ERROR)) { - dev_warn(adev->dev, - "GCEA err detected at instance: %d, status: 0x%x!\n", - j, reg_value); + amdgpu_ras_inst_reset_ras_error_count(adev, + &(gfx_v9_4_3_ue_reg_list[i].reg_entry), + 1, + GET_INST(GC, xcc_id)); } - /* clear after read */ - reg_value = REG_SET_FIELD(reg_value, GCEA_ERR_STATUS, - CLEAR_ERROR_STATUS, 0x1); - WREG32_SOC15(GC, GET_INST(GC, xcc_id), regGCEA_ERR_STATUS, - reg_value); } } @@ -3981,7 +3986,6 @@ static void gfx_v9_4_3_inst_query_sq_timeout_status(struct amdgpu_device *adev, static void gfx_v9_4_3_inst_query_ras_err_status(struct amdgpu_device *adev, void *ras_error_status, int xcc_id) { - gfx_v9_4_3_inst_query_ea_err_status(adev, xcc_id); gfx_v9_4_3_inst_query_utc_err_status(adev, xcc_id); gfx_v9_4_3_inst_query_sq_timeout_status(adev, xcc_id); } @@ -3994,27 +3998,6 @@ static void gfx_v9_4_3_inst_reset_utc_err_status(struct amdgpu_device *adev, WREG32_SOC15(GC, GET_INST(GC, xcc_id), regVML2_WALKER_MEM_ECC_STATUS, 0x3); } -static void gfx_v9_4_3_inst_reset_ea_err_status(struct amdgpu_device *adev, - int xcc_id) -{ - uint32_t i, j; - uint32_t value; - - mutex_lock(&adev->grbm_idx_mutex); - for (i = 0; i < gfx_v9_4_3_ea_err_status_regs.se_num; i++) { - for (j = 0; j < gfx_v9_4_3_ea_err_status_regs.instance; j++) { - gfx_v9_4_3_xcc_select_se_sh(adev, i, 0, j, xcc_id); - value = RREG32_SOC15(GC, GET_INST(GC, xcc_id), regGCEA_ERR_STATUS); - value = REG_SET_FIELD(value, GCEA_ERR_STATUS, - CLEAR_ERROR_STATUS, 0x1); - WREG32_SOC15(GC, GET_INST(GC, xcc_id), regGCEA_ERR_STATUS, value); - } - } - gfx_v9_4_3_xcc_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff, - xcc_id); - mutex_unlock(&adev->grbm_idx_mutex); -} - static void gfx_v9_4_3_inst_reset_sq_timeout_status(struct amdgpu_device *adev, int xcc_id) { @@ -4040,10 +4023,38 @@ static void gfx_v9_4_3_inst_reset_ras_err_status(struct amdgpu_device *adev, void *ras_error_status, int xcc_id) { gfx_v9_4_3_inst_reset_utc_err_status(adev, xcc_id); - gfx_v9_4_3_inst_reset_ea_err_status(adev, xcc_id); gfx_v9_4_3_inst_reset_sq_timeout_status(adev, xcc_id); } +static void gfx_v9_4_3_inst_enable_watchdog_timer(struct amdgpu_device *adev, + void *ras_error_status, int xcc_id) +{ + uint32_t i; + uint32_t data; + + data = RREG32_SOC15(GC, GET_INST(GC, 0), regSQ_TIMEOUT_CONFIG); + data = REG_SET_FIELD(data, SQ_TIMEOUT_CONFIG, TIMEOUT_FATAL_DISABLE, + amdgpu_watchdog_timer.timeout_fatal_disable ? 1 : 0); + + if (amdgpu_watchdog_timer.timeout_fatal_disable && + (amdgpu_watchdog_timer.period < 1 || + amdgpu_watchdog_timer.period > 0x23)) { + dev_warn(adev->dev, "Watchdog period range is 1 to 0x23\n"); + amdgpu_watchdog_timer.period = 0x23; + } + data = REG_SET_FIELD(data, SQ_TIMEOUT_CONFIG, PERIOD_SEL, + amdgpu_watchdog_timer.period); + + mutex_lock(&adev->grbm_idx_mutex); + for (i = 0; i < adev->gfx.config.max_shader_engines; i++) { + gfx_v9_4_3_xcc_select_se_sh(adev, i, 0xffffffff, 0xffffffff, xcc_id); + WREG32_SOC15(GC, GET_INST(GC, xcc_id), regSQ_TIMEOUT_CONFIG, data); + } + gfx_v9_4_3_xcc_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff, + xcc_id); + mutex_unlock(&adev->grbm_idx_mutex); +} + static void gfx_v9_4_3_query_ras_error_count(struct amdgpu_device *adev, void *ras_error_status) { @@ -4066,6 +4077,11 @@ static void gfx_v9_4_3_reset_ras_error_status(struct amdgpu_device *adev) amdgpu_gfx_ras_error_func(adev, NULL, gfx_v9_4_3_inst_reset_ras_err_status); } +static void gfx_v9_4_3_enable_watchdog_timer(struct amdgpu_device *adev) +{ + amdgpu_gfx_ras_error_func(adev, NULL, gfx_v9_4_3_inst_enable_watchdog_timer); +} + static const struct amd_ip_funcs gfx_v9_4_3_ip_funcs = { .name = "gfx_v9_4_3", .early_init = gfx_v9_4_3_early_init, @@ -4200,7 +4216,7 @@ static void gfx_v9_4_3_set_rlc_funcs(struct amdgpu_device *adev) static void gfx_v9_4_3_set_gds_init(struct amdgpu_device *adev) { /* init asci gds info */ - switch (adev->ip_versions[GC_HWIP][0]) { + switch (amdgpu_ip_version(adev, GC_HWIP, 0)) { case IP_VERSION(9, 4, 3): /* 9.4.3 removed all the GDS internal memory, * only support GWS opcode in kernel, like barrier @@ -4212,7 +4228,7 @@ static void gfx_v9_4_3_set_gds_init(struct amdgpu_device *adev) break; } - switch (adev->ip_versions[GC_HWIP][0]) { + switch (amdgpu_ip_version(adev, GC_HWIP, 0)) { case IP_VERSION(9, 4, 3): /* deprecated for 9.4.3, no usage at all */ adev->gds.gds_compute_max_wave_id = 0; @@ -4228,7 +4244,7 @@ static void gfx_v9_4_3_set_gds_init(struct amdgpu_device *adev) } static void gfx_v9_4_3_set_user_cu_inactive_bitmap(struct amdgpu_device *adev, - u32 bitmap) + u32 bitmap, int xcc_id) { u32 data; @@ -4238,15 +4254,15 @@ static void gfx_v9_4_3_set_user_cu_inactive_bitmap(struct amdgpu_device *adev, data = bitmap << GC_USER_SHADER_ARRAY_CONFIG__INACTIVE_CUS__SHIFT; data &= GC_USER_SHADER_ARRAY_CONFIG__INACTIVE_CUS_MASK; - WREG32_SOC15(GC, GET_INST(GC, 0), regGC_USER_SHADER_ARRAY_CONFIG, data); + WREG32_SOC15(GC, GET_INST(GC, xcc_id), regGC_USER_SHADER_ARRAY_CONFIG, data); } -static u32 gfx_v9_4_3_get_cu_active_bitmap(struct amdgpu_device *adev) +static u32 gfx_v9_4_3_get_cu_active_bitmap(struct amdgpu_device *adev, int xcc_id) { u32 data, mask; - data = RREG32_SOC15(GC, GET_INST(GC, 0), regCC_GC_SHADER_ARRAY_CONFIG); - data |= RREG32_SOC15(GC, GET_INST(GC, 0), regGC_USER_SHADER_ARRAY_CONFIG); + data = RREG32_SOC15(GC, GET_INST(GC, xcc_id), regCC_GC_SHADER_ARRAY_CONFIG); + data |= RREG32_SOC15(GC, GET_INST(GC, xcc_id), regGC_USER_SHADER_ARRAY_CONFIG); data &= CC_GC_SHADER_ARRAY_CONFIG__INACTIVE_CUS_MASK; data >>= CC_GC_SHADER_ARRAY_CONFIG__INACTIVE_CUS__SHIFT; @@ -4259,7 +4275,7 @@ static u32 gfx_v9_4_3_get_cu_active_bitmap(struct amdgpu_device *adev) static int gfx_v9_4_3_get_cu_info(struct amdgpu_device *adev, struct amdgpu_cu_info *cu_info) { - int i, j, k, counter, active_cu_number = 0; + int i, j, k, counter, xcc_id, active_cu_number = 0; u32 mask, bitmap, ao_bitmap, ao_cu_mask = 0; unsigned disable_masks[4 * 4]; @@ -4278,46 +4294,38 @@ static int gfx_v9_4_3_get_cu_info(struct amdgpu_device *adev, adev->gfx.config.max_sh_per_se); mutex_lock(&adev->grbm_idx_mutex); - for (i = 0; i < adev->gfx.config.max_shader_engines; i++) { - for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) { - mask = 1; - ao_bitmap = 0; - counter = 0; - gfx_v9_4_3_xcc_select_se_sh(adev, i, j, 0xffffffff, 0); - gfx_v9_4_3_set_user_cu_inactive_bitmap( - adev, disable_masks[i * adev->gfx.config.max_sh_per_se + j]); - bitmap = gfx_v9_4_3_get_cu_active_bitmap(adev); - - /* - * The bitmap(and ao_cu_bitmap) in cu_info structure is - * 4x4 size array, and it's usually suitable for Vega - * ASICs which has 4*2 SE/SH layout. - * But for Arcturus, SE/SH layout is changed to 8*1. - * To mostly reduce the impact, we make it compatible - * with current bitmap array as below: - * SE4,SH0 --> bitmap[0][1] - * SE5,SH0 --> bitmap[1][1] - * SE6,SH0 --> bitmap[2][1] - * SE7,SH0 --> bitmap[3][1] - */ - cu_info->bitmap[i % 4][j + i / 4] = bitmap; - - for (k = 0; k < adev->gfx.config.max_cu_per_sh; k++) { - if (bitmap & mask) { - if (counter < adev->gfx.config.max_cu_per_sh) - ao_bitmap |= mask; - counter++; + for (xcc_id = 0; xcc_id < NUM_XCC(adev->gfx.xcc_mask); xcc_id++) { + for (i = 0; i < adev->gfx.config.max_shader_engines; i++) { + for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) { + mask = 1; + ao_bitmap = 0; + counter = 0; + gfx_v9_4_3_xcc_select_se_sh(adev, i, j, 0xffffffff, xcc_id); + gfx_v9_4_3_set_user_cu_inactive_bitmap( + adev, + disable_masks[i * adev->gfx.config.max_sh_per_se + j], + xcc_id); + bitmap = gfx_v9_4_3_get_cu_active_bitmap(adev, xcc_id); + + cu_info->bitmap[xcc_id][i][j] = bitmap; + + for (k = 0; k < adev->gfx.config.max_cu_per_sh; k++) { + if (bitmap & mask) { + if (counter < adev->gfx.config.max_cu_per_sh) + ao_bitmap |= mask; + counter++; + } + mask <<= 1; } - mask <<= 1; + active_cu_number += counter; + if (i < 2 && j < 2) + ao_cu_mask |= (ao_bitmap << (i * 16 + j * 8)); + cu_info->ao_cu_bitmap[i][j] = ao_bitmap; } - active_cu_number += counter; - if (i < 2 && j < 2) - ao_cu_mask |= (ao_bitmap << (i * 16 + j * 8)); - cu_info->ao_cu_bitmap[i % 4][j + i / 4] = ao_bitmap; } + gfx_v9_4_3_xcc_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff, + xcc_id); } - gfx_v9_4_3_xcc_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff, - 0); mutex_unlock(&adev->grbm_idx_mutex); cu_info->number = active_cu_number; @@ -4331,7 +4339,7 @@ const struct amdgpu_ip_block_version gfx_v9_4_3_ip_block = { .type = AMD_IP_BLOCK_TYPE_GFX, .major = 9, .minor = 4, - .rev = 0, + .rev = 3, .funcs = &gfx_v9_4_3_ip_funcs, }; @@ -4394,4 +4402,5 @@ struct amdgpu_gfx_ras gfx_v9_4_3_ras = { .ras_block = { .hw_ops = &gfx_v9_4_3_ras_ops, }, + .enable_watchdog_timer = &gfx_v9_4_3_enable_watchdog_timer, }; diff --git a/drivers/gpu/drm/amd/amdgpu/gfxhub_v11_5_0.c b/drivers/gpu/drm/amd/amdgpu/gfxhub_v11_5_0.c new file mode 100644 index 000000000000..f9949fedfbb9 --- /dev/null +++ b/drivers/gpu/drm/amd/amdgpu/gfxhub_v11_5_0.c @@ -0,0 +1,516 @@ +/* + * Copyright 2023 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + */ + +#include "amdgpu.h" +#include "gfxhub_v11_5_0.h" + +#include "gc/gc_11_5_0_offset.h" +#include "gc/gc_11_5_0_sh_mask.h" + +#include "navi10_enum.h" +#include "soc15_common.h" + +#define regGCVM_L2_CNTL3_DEFAULT 0x80100007 +#define regGCVM_L2_CNTL4_DEFAULT 0x000000c1 +#define regGCVM_L2_CNTL5_DEFAULT 0x00003fe0 + + +static const char *gfxhub_client_ids[] = { + "CB/DB", + "Reserved", + "GE1", + "GE2", + "CPF", + "CPC", + "CPG", + "RLC", + "TCP", + "SQC (inst)", + "SQC (data)", + "SQG", + "Reserved", + "SDMA0", + "SDMA1", + "GCR", + "SDMA2", + "SDMA3", +}; + +static uint32_t gfxhub_v11_5_0_get_invalidate_req(unsigned int vmid, + uint32_t flush_type) +{ + u32 req = 0; + + /* invalidate using legacy mode on vmid*/ + req = REG_SET_FIELD(req, GCVM_INVALIDATE_ENG0_REQ, + PER_VMID_INVALIDATE_REQ, 1 << vmid); + req = REG_SET_FIELD(req, GCVM_INVALIDATE_ENG0_REQ, FLUSH_TYPE, flush_type); + req = REG_SET_FIELD(req, GCVM_INVALIDATE_ENG0_REQ, INVALIDATE_L2_PTES, 1); + req = REG_SET_FIELD(req, GCVM_INVALIDATE_ENG0_REQ, INVALIDATE_L2_PDE0, 1); + req = REG_SET_FIELD(req, GCVM_INVALIDATE_ENG0_REQ, INVALIDATE_L2_PDE1, 1); + req = REG_SET_FIELD(req, GCVM_INVALIDATE_ENG0_REQ, INVALIDATE_L2_PDE2, 1); + req = REG_SET_FIELD(req, GCVM_INVALIDATE_ENG0_REQ, INVALIDATE_L1_PTES, 1); + req = REG_SET_FIELD(req, GCVM_INVALIDATE_ENG0_REQ, + CLEAR_PROTECTION_FAULT_STATUS_ADDR, 0); + + return req; +} + +static void +gfxhub_v11_5_0_print_l2_protection_fault_status(struct amdgpu_device *adev, + uint32_t status) +{ + u32 cid = REG_GET_FIELD(status, + GCVM_L2_PROTECTION_FAULT_STATUS, CID); + + dev_err(adev->dev, + "GCVM_L2_PROTECTION_FAULT_STATUS:0x%08X\n", + status); + dev_err(adev->dev, "\t Faulty UTCL2 client ID: %s (0x%x)\n", + cid >= ARRAY_SIZE(gfxhub_client_ids) ? "unknown" : gfxhub_client_ids[cid], + cid); + dev_err(adev->dev, "\t MORE_FAULTS: 0x%lx\n", + REG_GET_FIELD(status, + GCVM_L2_PROTECTION_FAULT_STATUS, MORE_FAULTS)); + dev_err(adev->dev, "\t WALKER_ERROR: 0x%lx\n", + REG_GET_FIELD(status, + GCVM_L2_PROTECTION_FAULT_STATUS, WALKER_ERROR)); + dev_err(adev->dev, "\t PERMISSION_FAULTS: 0x%lx\n", + REG_GET_FIELD(status, + GCVM_L2_PROTECTION_FAULT_STATUS, PERMISSION_FAULTS)); + dev_err(adev->dev, "\t MAPPING_ERROR: 0x%lx\n", + REG_GET_FIELD(status, + GCVM_L2_PROTECTION_FAULT_STATUS, MAPPING_ERROR)); + dev_err(adev->dev, "\t RW: 0x%lx\n", + REG_GET_FIELD(status, + GCVM_L2_PROTECTION_FAULT_STATUS, RW)); +} + +static u64 gfxhub_v11_5_0_get_fb_location(struct amdgpu_device *adev) +{ + u64 base = RREG32_SOC15(GC, 0, regGCMC_VM_FB_LOCATION_BASE); + + base &= GCMC_VM_FB_LOCATION_BASE__FB_BASE_MASK; + base <<= 24; + + return base; +} + +static u64 gfxhub_v11_5_0_get_mc_fb_offset(struct amdgpu_device *adev) +{ + return (u64)RREG32_SOC15(GC, 0, regGCMC_VM_FB_OFFSET) << 24; +} + +static void gfxhub_v11_5_0_setup_vm_pt_regs(struct amdgpu_device *adev, uint32_t vmid, + uint64_t page_table_base) +{ + struct amdgpu_vmhub *hub = &adev->vmhub[AMDGPU_GFXHUB(0)]; + + WREG32_SOC15_OFFSET(GC, 0, regGCVM_CONTEXT0_PAGE_TABLE_BASE_ADDR_LO32, + hub->ctx_addr_distance * vmid, + lower_32_bits(page_table_base)); + + WREG32_SOC15_OFFSET(GC, 0, regGCVM_CONTEXT0_PAGE_TABLE_BASE_ADDR_HI32, + hub->ctx_addr_distance * vmid, + upper_32_bits(page_table_base)); +} + +static void gfxhub_v11_5_0_init_gart_aperture_regs(struct amdgpu_device *adev) +{ + uint64_t pt_base = amdgpu_gmc_pd_addr(adev->gart.bo); + + gfxhub_v11_5_0_setup_vm_pt_regs(adev, 0, pt_base); + + WREG32_SOC15(GC, 0, regGCVM_CONTEXT0_PAGE_TABLE_START_ADDR_LO32, + (u32)(adev->gmc.gart_start >> 12)); + WREG32_SOC15(GC, 0, regGCVM_CONTEXT0_PAGE_TABLE_START_ADDR_HI32, + (u32)(adev->gmc.gart_start >> 44)); + + WREG32_SOC15(GC, 0, regGCVM_CONTEXT0_PAGE_TABLE_END_ADDR_LO32, + (u32)(adev->gmc.gart_end >> 12)); + WREG32_SOC15(GC, 0, regGCVM_CONTEXT0_PAGE_TABLE_END_ADDR_HI32, + (u32)(adev->gmc.gart_end >> 44)); +} + +static void gfxhub_v11_5_0_init_system_aperture_regs(struct amdgpu_device *adev) +{ + uint64_t value; + + WREG32_SOC15(GC, 0, regGCMC_VM_AGP_BASE, 0); + WREG32_SOC15(GC, 0, regGCMC_VM_AGP_BOT, adev->gmc.agp_start >> 24); + WREG32_SOC15(GC, 0, regGCMC_VM_AGP_TOP, adev->gmc.agp_end >> 24); + + /* Program the system aperture low logical page number. */ + WREG32_SOC15(GC, 0, regGCMC_VM_SYSTEM_APERTURE_LOW_ADDR, + min(adev->gmc.fb_start, adev->gmc.agp_start) >> 18); + + WREG32_SOC15(GC, 0, regGCMC_VM_SYSTEM_APERTURE_HIGH_ADDR, + max(adev->gmc.fb_end, adev->gmc.agp_end) >> 18); + + /* Set default page address. */ + value = amdgpu_gmc_vram_mc2pa(adev, adev->mem_scratch.gpu_addr); + WREG32_SOC15(GC, 0, regGCMC_VM_SYSTEM_APERTURE_DEFAULT_ADDR_LSB, + (u32)(value >> 12)); + WREG32_SOC15(GC, 0, regGCMC_VM_SYSTEM_APERTURE_DEFAULT_ADDR_MSB, + (u32)(value >> 44)); + + /* Program "protection fault". */ + WREG32_SOC15(GC, 0, regGCVM_L2_PROTECTION_FAULT_DEFAULT_ADDR_LO32, + (u32)(adev->dummy_page_addr >> 12)); + WREG32_SOC15(GC, 0, regGCVM_L2_PROTECTION_FAULT_DEFAULT_ADDR_HI32, + (u32)((u64)adev->dummy_page_addr >> 44)); + + WREG32_FIELD15_PREREG(GC, 0, GCVM_L2_PROTECTION_FAULT_CNTL2, + ACTIVE_PAGE_MIGRATION_PTE_READ_RETRY, 1); +} + +static void gfxhub_v11_5_0_init_tlb_regs(struct amdgpu_device *adev) +{ + uint32_t tmp; + + /* Setup TLB control */ + tmp = RREG32_SOC15(GC, 0, regGCMC_VM_MX_L1_TLB_CNTL); + + tmp = REG_SET_FIELD(tmp, GCMC_VM_MX_L1_TLB_CNTL, ENABLE_L1_TLB, 1); + tmp = REG_SET_FIELD(tmp, GCMC_VM_MX_L1_TLB_CNTL, SYSTEM_ACCESS_MODE, 3); + tmp = REG_SET_FIELD(tmp, GCMC_VM_MX_L1_TLB_CNTL, + ENABLE_ADVANCED_DRIVER_MODEL, 1); + tmp = REG_SET_FIELD(tmp, GCMC_VM_MX_L1_TLB_CNTL, + SYSTEM_APERTURE_UNMAPPED_ACCESS, 0); + tmp = REG_SET_FIELD(tmp, GCMC_VM_MX_L1_TLB_CNTL, ECO_BITS, 0); + tmp = REG_SET_FIELD(tmp, GCMC_VM_MX_L1_TLB_CNTL, + MTYPE, MTYPE_UC); /* UC, uncached */ + + WREG32_SOC15(GC, 0, regGCMC_VM_MX_L1_TLB_CNTL, tmp); +} + +static void gfxhub_v11_5_0_init_cache_regs(struct amdgpu_device *adev) +{ + uint32_t tmp; + + /* These registers are not accessible to VF-SRIOV. + * The PF will program them instead. + */ + if (amdgpu_sriov_vf(adev)) + return; + + /* Setup L2 cache */ + tmp = RREG32_SOC15(GC, 0, regGCVM_L2_CNTL); + tmp = REG_SET_FIELD(tmp, GCVM_L2_CNTL, ENABLE_L2_CACHE, 1); + tmp = REG_SET_FIELD(tmp, GCVM_L2_CNTL, ENABLE_L2_FRAGMENT_PROCESSING, 0); + tmp = REG_SET_FIELD(tmp, GCVM_L2_CNTL, + ENABLE_DEFAULT_PAGE_OUT_TO_SYSTEM_MEMORY, 1); + /* XXX for emulation, Refer to closed source code.*/ + tmp = REG_SET_FIELD(tmp, GCVM_L2_CNTL, + L2_PDE0_CACHE_TAG_GENERATION_MODE, 0); + tmp = REG_SET_FIELD(tmp, GCVM_L2_CNTL, PDE_FAULT_CLASSIFICATION, 0); + tmp = REG_SET_FIELD(tmp, GCVM_L2_CNTL, CONTEXT1_IDENTITY_ACCESS_MODE, 1); + tmp = REG_SET_FIELD(tmp, GCVM_L2_CNTL, IDENTITY_MODE_FRAGMENT_SIZE, 0); + WREG32_SOC15(GC, 0, regGCVM_L2_CNTL, tmp); + + tmp = RREG32_SOC15(GC, 0, regGCVM_L2_CNTL2); + tmp = REG_SET_FIELD(tmp, GCVM_L2_CNTL2, INVALIDATE_ALL_L1_TLBS, 1); + tmp = REG_SET_FIELD(tmp, GCVM_L2_CNTL2, INVALIDATE_L2_CACHE, 1); + WREG32_SOC15(GC, 0, regGCVM_L2_CNTL2, tmp); + + tmp = regGCVM_L2_CNTL3_DEFAULT; + if (adev->gmc.translate_further) { + tmp = REG_SET_FIELD(tmp, GCVM_L2_CNTL3, BANK_SELECT, 12); + tmp = REG_SET_FIELD(tmp, GCVM_L2_CNTL3, + L2_CACHE_BIGK_FRAGMENT_SIZE, 9); + } else { + tmp = REG_SET_FIELD(tmp, GCVM_L2_CNTL3, BANK_SELECT, 9); + tmp = REG_SET_FIELD(tmp, GCVM_L2_CNTL3, + L2_CACHE_BIGK_FRAGMENT_SIZE, 6); + } + WREG32_SOC15(GC, 0, regGCVM_L2_CNTL3, tmp); + + tmp = regGCVM_L2_CNTL4_DEFAULT; + tmp = REG_SET_FIELD(tmp, GCVM_L2_CNTL4, VMC_TAP_PDE_REQUEST_PHYSICAL, 0); + tmp = REG_SET_FIELD(tmp, GCVM_L2_CNTL4, VMC_TAP_PTE_REQUEST_PHYSICAL, 0); + WREG32_SOC15(GC, 0, regGCVM_L2_CNTL4, tmp); + + tmp = regGCVM_L2_CNTL5_DEFAULT; + tmp = REG_SET_FIELD(tmp, GCVM_L2_CNTL5, L2_CACHE_SMALLK_FRAGMENT_SIZE, 0); + WREG32_SOC15(GC, 0, regGCVM_L2_CNTL5, tmp); +} + +static void gfxhub_v11_5_0_enable_system_domain(struct amdgpu_device *adev) +{ + uint32_t tmp; + + tmp = RREG32_SOC15(GC, 0, regGCVM_CONTEXT0_CNTL); + tmp = REG_SET_FIELD(tmp, GCVM_CONTEXT0_CNTL, ENABLE_CONTEXT, 1); + tmp = REG_SET_FIELD(tmp, GCVM_CONTEXT0_CNTL, PAGE_TABLE_DEPTH, 0); + tmp = REG_SET_FIELD(tmp, GCVM_CONTEXT0_CNTL, + RETRY_PERMISSION_OR_INVALID_PAGE_FAULT, 0); + WREG32_SOC15(GC, 0, regGCVM_CONTEXT0_CNTL, tmp); +} + +static void gfxhub_v11_5_0_disable_identity_aperture(struct amdgpu_device *adev) +{ + /* These registers are not accessible to VF-SRIOV. + * The PF will program them instead. + */ + if (amdgpu_sriov_vf(adev)) + return; + + WREG32_SOC15(GC, 0, regGCVM_L2_CONTEXT1_IDENTITY_APERTURE_LOW_ADDR_LO32, + 0xFFFFFFFF); + WREG32_SOC15(GC, 0, regGCVM_L2_CONTEXT1_IDENTITY_APERTURE_LOW_ADDR_HI32, + 0x0000000F); + + WREG32_SOC15(GC, 0, regGCVM_L2_CONTEXT1_IDENTITY_APERTURE_HIGH_ADDR_LO32, + 0); + WREG32_SOC15(GC, 0, regGCVM_L2_CONTEXT1_IDENTITY_APERTURE_HIGH_ADDR_HI32, + 0); + + WREG32_SOC15(GC, 0, regGCVM_L2_CONTEXT_IDENTITY_PHYSICAL_OFFSET_LO32, 0); + WREG32_SOC15(GC, 0, regGCVM_L2_CONTEXT_IDENTITY_PHYSICAL_OFFSET_HI32, 0); + +} + +static void gfxhub_v11_5_0_setup_vmid_config(struct amdgpu_device *adev) +{ + struct amdgpu_vmhub *hub = &adev->vmhub[AMDGPU_GFXHUB(0)]; + int i; + uint32_t tmp; + + for (i = 0; i <= 14; i++) { + tmp = RREG32_SOC15_OFFSET(GC, 0, regGCVM_CONTEXT1_CNTL, i * hub->ctx_distance); + tmp = REG_SET_FIELD(tmp, GCVM_CONTEXT1_CNTL, ENABLE_CONTEXT, 1); + tmp = REG_SET_FIELD(tmp, GCVM_CONTEXT1_CNTL, PAGE_TABLE_DEPTH, + adev->vm_manager.num_level); + tmp = REG_SET_FIELD(tmp, GCVM_CONTEXT1_CNTL, + RANGE_PROTECTION_FAULT_ENABLE_DEFAULT, 1); + tmp = REG_SET_FIELD(tmp, GCVM_CONTEXT1_CNTL, + DUMMY_PAGE_PROTECTION_FAULT_ENABLE_DEFAULT, 1); + tmp = REG_SET_FIELD(tmp, GCVM_CONTEXT1_CNTL, + PDE0_PROTECTION_FAULT_ENABLE_DEFAULT, 1); + tmp = REG_SET_FIELD(tmp, GCVM_CONTEXT1_CNTL, + VALID_PROTECTION_FAULT_ENABLE_DEFAULT, 1); + tmp = REG_SET_FIELD(tmp, GCVM_CONTEXT1_CNTL, + READ_PROTECTION_FAULT_ENABLE_DEFAULT, 1); + tmp = REG_SET_FIELD(tmp, GCVM_CONTEXT1_CNTL, + WRITE_PROTECTION_FAULT_ENABLE_DEFAULT, 1); + tmp = REG_SET_FIELD(tmp, GCVM_CONTEXT1_CNTL, + EXECUTE_PROTECTION_FAULT_ENABLE_DEFAULT, 1); + tmp = REG_SET_FIELD(tmp, GCVM_CONTEXT1_CNTL, + PAGE_TABLE_BLOCK_SIZE, + adev->vm_manager.block_size - 9); + /* Send no-retry XNACK on fault to suppress VM fault storm. */ + tmp = REG_SET_FIELD(tmp, GCVM_CONTEXT1_CNTL, + RETRY_PERMISSION_OR_INVALID_PAGE_FAULT, + !amdgpu_noretry); + WREG32_SOC15_OFFSET(GC, 0, regGCVM_CONTEXT1_CNTL, + i * hub->ctx_distance, tmp); + WREG32_SOC15_OFFSET(GC, 0, regGCVM_CONTEXT1_PAGE_TABLE_START_ADDR_LO32, + i * hub->ctx_addr_distance, 0); + WREG32_SOC15_OFFSET(GC, 0, regGCVM_CONTEXT1_PAGE_TABLE_START_ADDR_HI32, + i * hub->ctx_addr_distance, 0); + WREG32_SOC15_OFFSET(GC, 0, regGCVM_CONTEXT1_PAGE_TABLE_END_ADDR_LO32, + i * hub->ctx_addr_distance, + lower_32_bits(adev->vm_manager.max_pfn - 1)); + WREG32_SOC15_OFFSET(GC, 0, regGCVM_CONTEXT1_PAGE_TABLE_END_ADDR_HI32, + i * hub->ctx_addr_distance, + upper_32_bits(adev->vm_manager.max_pfn - 1)); + } + + hub->vm_cntx_cntl = tmp; +} + +static void gfxhub_v11_5_0_program_invalidation(struct amdgpu_device *adev) +{ + struct amdgpu_vmhub *hub = &adev->vmhub[AMDGPU_GFXHUB(0)]; + unsigned i; + + for (i = 0 ; i < 18; ++i) { + WREG32_SOC15_OFFSET(GC, 0, regGCVM_INVALIDATE_ENG0_ADDR_RANGE_LO32, + i * hub->eng_addr_distance, 0xffffffff); + WREG32_SOC15_OFFSET(GC, 0, regGCVM_INVALIDATE_ENG0_ADDR_RANGE_HI32, + i * hub->eng_addr_distance, 0x1f); + } +} + +static int gfxhub_v11_5_0_gart_enable(struct amdgpu_device *adev) +{ + if (amdgpu_sriov_vf(adev)) { + /* + * GCMC_VM_FB_LOCATION_BASE/TOP is NULL for VF, becuase they are + * VF copy registers so vbios post doesn't program them, for + * SRIOV driver need to program them + */ + WREG32_SOC15(GC, 0, regGCMC_VM_FB_LOCATION_BASE, + adev->gmc.vram_start >> 24); + WREG32_SOC15(GC, 0, regGCMC_VM_FB_LOCATION_TOP, + adev->gmc.vram_end >> 24); + } + + /* GART Enable. */ + gfxhub_v11_5_0_init_gart_aperture_regs(adev); + gfxhub_v11_5_0_init_system_aperture_regs(adev); + gfxhub_v11_5_0_init_tlb_regs(adev); + gfxhub_v11_5_0_init_cache_regs(adev); + + gfxhub_v11_5_0_enable_system_domain(adev); + gfxhub_v11_5_0_disable_identity_aperture(adev); + gfxhub_v11_5_0_setup_vmid_config(adev); + gfxhub_v11_5_0_program_invalidation(adev); + + return 0; +} + +static void gfxhub_v11_5_0_gart_disable(struct amdgpu_device *adev) +{ + struct amdgpu_vmhub *hub = &adev->vmhub[AMDGPU_GFXHUB(0)]; + u32 tmp; + u32 i; + + /* Disable all tables */ + for (i = 0; i < 16; i++) + WREG32_SOC15_OFFSET(GC, 0, regGCVM_CONTEXT0_CNTL, + i * hub->ctx_distance, 0); + + /* Setup TLB control */ + tmp = RREG32_SOC15(GC, 0, regGCMC_VM_MX_L1_TLB_CNTL); + tmp = REG_SET_FIELD(tmp, GCMC_VM_MX_L1_TLB_CNTL, ENABLE_L1_TLB, 0); + tmp = REG_SET_FIELD(tmp, GCMC_VM_MX_L1_TLB_CNTL, + ENABLE_ADVANCED_DRIVER_MODEL, 0); + WREG32_SOC15(GC, 0, regGCMC_VM_MX_L1_TLB_CNTL, tmp); + + /* Setup L2 cache */ + WREG32_FIELD15_PREREG(GC, 0, GCVM_L2_CNTL, ENABLE_L2_CACHE, 0); + WREG32_SOC15(GC, 0, regGCVM_L2_CNTL3, 0); +} + +/** + * gfxhub_v11_5_0_set_fault_enable_default - update GART/VM fault handling + * + * @adev: amdgpu_device pointer + * @value: true redirects VM faults to the default page + */ +static void gfxhub_v11_5_0_set_fault_enable_default(struct amdgpu_device *adev, + bool value) +{ + u32 tmp; + + /* NO halt CP when page fault */ + tmp = RREG32_SOC15(GC, 0, regCP_DEBUG); + tmp = REG_SET_FIELD(tmp, CP_DEBUG, CPG_UTCL1_ERROR_HALT_DISABLE, 1); + WREG32_SOC15(GC, 0, regCP_DEBUG, tmp); + + /* These registers are not accessible to VF-SRIOV. + * The PF will program them instead. + */ + if (amdgpu_sriov_vf(adev)) + return; + + tmp = RREG32_SOC15(GC, 0, regGCVM_L2_PROTECTION_FAULT_CNTL); + tmp = REG_SET_FIELD(tmp, GCVM_L2_PROTECTION_FAULT_CNTL, + RANGE_PROTECTION_FAULT_ENABLE_DEFAULT, value); + tmp = REG_SET_FIELD(tmp, GCVM_L2_PROTECTION_FAULT_CNTL, + PDE0_PROTECTION_FAULT_ENABLE_DEFAULT, value); + tmp = REG_SET_FIELD(tmp, GCVM_L2_PROTECTION_FAULT_CNTL, + PDE1_PROTECTION_FAULT_ENABLE_DEFAULT, value); + tmp = REG_SET_FIELD(tmp, GCVM_L2_PROTECTION_FAULT_CNTL, + PDE2_PROTECTION_FAULT_ENABLE_DEFAULT, value); + tmp = REG_SET_FIELD(tmp, GCVM_L2_PROTECTION_FAULT_CNTL, + TRANSLATE_FURTHER_PROTECTION_FAULT_ENABLE_DEFAULT, + value); + tmp = REG_SET_FIELD(tmp, GCVM_L2_PROTECTION_FAULT_CNTL, + NACK_PROTECTION_FAULT_ENABLE_DEFAULT, value); + tmp = REG_SET_FIELD(tmp, GCVM_L2_PROTECTION_FAULT_CNTL, + DUMMY_PAGE_PROTECTION_FAULT_ENABLE_DEFAULT, value); + tmp = REG_SET_FIELD(tmp, GCVM_L2_PROTECTION_FAULT_CNTL, + VALID_PROTECTION_FAULT_ENABLE_DEFAULT, value); + tmp = REG_SET_FIELD(tmp, GCVM_L2_PROTECTION_FAULT_CNTL, + READ_PROTECTION_FAULT_ENABLE_DEFAULT, value); + tmp = REG_SET_FIELD(tmp, GCVM_L2_PROTECTION_FAULT_CNTL, + WRITE_PROTECTION_FAULT_ENABLE_DEFAULT, value); + tmp = REG_SET_FIELD(tmp, GCVM_L2_PROTECTION_FAULT_CNTL, + EXECUTE_PROTECTION_FAULT_ENABLE_DEFAULT, value); + if (!value) { + tmp = REG_SET_FIELD(tmp, GCVM_L2_PROTECTION_FAULT_CNTL, + CRASH_ON_NO_RETRY_FAULT, 1); + tmp = REG_SET_FIELD(tmp, GCVM_L2_PROTECTION_FAULT_CNTL, + CRASH_ON_RETRY_FAULT, 1); + } + WREG32_SOC15(GC, 0, regGCVM_L2_PROTECTION_FAULT_CNTL, tmp); +} + +static const struct amdgpu_vmhub_funcs gfxhub_v11_5_0_vmhub_funcs = { + .print_l2_protection_fault_status = gfxhub_v11_5_0_print_l2_protection_fault_status, + .get_invalidate_req = gfxhub_v11_5_0_get_invalidate_req, +}; + +static void gfxhub_v11_5_0_init(struct amdgpu_device *adev) +{ + struct amdgpu_vmhub *hub = &adev->vmhub[AMDGPU_GFXHUB(0)]; + + hub->ctx0_ptb_addr_lo32 = + SOC15_REG_OFFSET(GC, 0, + regGCVM_CONTEXT0_PAGE_TABLE_BASE_ADDR_LO32); + hub->ctx0_ptb_addr_hi32 = + SOC15_REG_OFFSET(GC, 0, + regGCVM_CONTEXT0_PAGE_TABLE_BASE_ADDR_HI32); + hub->vm_inv_eng0_sem = + SOC15_REG_OFFSET(GC, 0, regGCVM_INVALIDATE_ENG0_SEM); + hub->vm_inv_eng0_req = + SOC15_REG_OFFSET(GC, 0, regGCVM_INVALIDATE_ENG0_REQ); + hub->vm_inv_eng0_ack = + SOC15_REG_OFFSET(GC, 0, regGCVM_INVALIDATE_ENG0_ACK); + hub->vm_context0_cntl = + SOC15_REG_OFFSET(GC, 0, regGCVM_CONTEXT0_CNTL); + hub->vm_l2_pro_fault_status = + SOC15_REG_OFFSET(GC, 0, regGCVM_L2_PROTECTION_FAULT_STATUS); + hub->vm_l2_pro_fault_cntl = + SOC15_REG_OFFSET(GC, 0, regGCVM_L2_PROTECTION_FAULT_CNTL); + + hub->ctx_distance = regGCVM_CONTEXT1_CNTL - regGCVM_CONTEXT0_CNTL; + hub->ctx_addr_distance = regGCVM_CONTEXT1_PAGE_TABLE_BASE_ADDR_LO32 - + regGCVM_CONTEXT0_PAGE_TABLE_BASE_ADDR_LO32; + hub->eng_distance = regGCVM_INVALIDATE_ENG1_REQ - + regGCVM_INVALIDATE_ENG0_REQ; + hub->eng_addr_distance = regGCVM_INVALIDATE_ENG1_ADDR_RANGE_LO32 - + regGCVM_INVALIDATE_ENG0_ADDR_RANGE_LO32; + + hub->vm_cntx_cntl_vm_fault = GCVM_CONTEXT1_CNTL__RANGE_PROTECTION_FAULT_ENABLE_INTERRUPT_MASK | + GCVM_CONTEXT1_CNTL__DUMMY_PAGE_PROTECTION_FAULT_ENABLE_INTERRUPT_MASK | + GCVM_CONTEXT1_CNTL__PDE0_PROTECTION_FAULT_ENABLE_INTERRUPT_MASK | + GCVM_CONTEXT1_CNTL__VALID_PROTECTION_FAULT_ENABLE_INTERRUPT_MASK | + GCVM_CONTEXT1_CNTL__READ_PROTECTION_FAULT_ENABLE_INTERRUPT_MASK | + GCVM_CONTEXT1_CNTL__WRITE_PROTECTION_FAULT_ENABLE_INTERRUPT_MASK | + GCVM_CONTEXT1_CNTL__EXECUTE_PROTECTION_FAULT_ENABLE_INTERRUPT_MASK; + + hub->vmhub_funcs = &gfxhub_v11_5_0_vmhub_funcs; +} + +const struct amdgpu_gfxhub_funcs gfxhub_v11_5_0_funcs = { + .get_fb_location = gfxhub_v11_5_0_get_fb_location, + .get_mc_fb_offset = gfxhub_v11_5_0_get_mc_fb_offset, + .setup_vm_pt_regs = gfxhub_v11_5_0_setup_vm_pt_regs, + .gart_enable = gfxhub_v11_5_0_gart_enable, + .gart_disable = gfxhub_v11_5_0_gart_disable, + .set_fault_enable_default = gfxhub_v11_5_0_set_fault_enable_default, + .init = gfxhub_v11_5_0_init, +}; diff --git a/drivers/gpu/drm/amd/amdgpu/gfxhub_v11_5_0.h b/drivers/gpu/drm/amd/amdgpu/gfxhub_v11_5_0.h new file mode 100644 index 000000000000..265ab631b3d0 --- /dev/null +++ b/drivers/gpu/drm/amd/amdgpu/gfxhub_v11_5_0.h @@ -0,0 +1,29 @@ +/* + * Copyright 2023 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + */ + +#ifndef __GFXHUB_V11_5_0_H__ +#define __GFXHUB_V11_5_0_H__ + +extern const struct amdgpu_gfxhub_funcs gfxhub_v11_5_0_funcs; + +#endif diff --git a/drivers/gpu/drm/amd/amdgpu/gfxhub_v1_0.c b/drivers/gpu/drm/amd/amdgpu/gfxhub_v1_0.c index d94cc1ec7242..53a2ba5fcf4b 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfxhub_v1_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfxhub_v1_0.c @@ -103,7 +103,7 @@ static void gfxhub_v1_0_init_system_aperture_regs(struct amdgpu_device *adev) min(adev->gmc.fb_start, adev->gmc.agp_start) >> 18); if (adev->apu_flags & AMD_APU_IS_RAVEN2) - /* + /* * Raven2 has a HW issue that it is unable to use the * vram which is out of MC_VM_SYSTEM_APERTURE_HIGH_ADDR. * So here is the workaround that increase system @@ -248,7 +248,7 @@ static void gfxhub_v1_0_disable_identity_aperture(struct amdgpu_device *adev) static void gfxhub_v1_0_setup_vmid_config(struct amdgpu_device *adev) { struct amdgpu_vmhub *hub = &adev->vmhub[AMDGPU_GFXHUB(0)]; - unsigned num_level, block_size; + unsigned int num_level, block_size; uint32_t tmp; int i; @@ -260,7 +260,7 @@ static void gfxhub_v1_0_setup_vmid_config(struct amdgpu_device *adev) block_size -= 9; for (i = 0; i <= 14; i++) { - tmp = RREG32_SOC15_OFFSET(GC, 0, mmVM_CONTEXT1_CNTL, i); + tmp = RREG32_SOC15_OFFSET(GC, 0, mmVM_CONTEXT1_CNTL, i * hub->ctx_distance); tmp = REG_SET_FIELD(tmp, VM_CONTEXT1_CNTL, ENABLE_CONTEXT, 1); tmp = REG_SET_FIELD(tmp, VM_CONTEXT1_CNTL, PAGE_TABLE_DEPTH, num_level); @@ -308,7 +308,7 @@ static void gfxhub_v1_0_setup_vmid_config(struct amdgpu_device *adev) static void gfxhub_v1_0_program_invalidation(struct amdgpu_device *adev) { struct amdgpu_vmhub *hub = &adev->vmhub[AMDGPU_GFXHUB(0)]; - unsigned i; + unsigned int i; for (i = 0 ; i < 18; ++i) { WREG32_SOC15_OFFSET(GC, 0, mmVM_INVALIDATE_ENG0_ADDR_RANGE_LO32, @@ -375,6 +375,7 @@ static void gfxhub_v1_0_set_fault_enable_default(struct amdgpu_device *adev, bool value) { u32 tmp; + tmp = RREG32_SOC15(GC, 0, mmVM_L2_PROTECTION_FAULT_CNTL); tmp = REG_SET_FIELD(tmp, VM_L2_PROTECTION_FAULT_CNTL, RANGE_PROTECTION_FAULT_ENABLE_DEFAULT, value); diff --git a/drivers/gpu/drm/amd/amdgpu/gfxhub_v1_2.c b/drivers/gpu/drm/amd/amdgpu/gfxhub_v1_2.c index d9f14dc55998..55423ff1bb49 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfxhub_v1_2.c +++ b/drivers/gpu/drm/amd/amdgpu/gfxhub_v1_2.c @@ -140,7 +140,7 @@ gfxhub_v1_2_xcc_init_system_aperture_regs(struct amdgpu_device *adev, min(adev->gmc.fb_start, adev->gmc.agp_start) >> 18); if (adev->apu_flags & AMD_APU_IS_RAVEN2) - /* + /* * Raven2 has a HW issue that it is unable to use the * vram which is out of MC_VM_SYSTEM_APERTURE_HIGH_ADDR. * So here is the workaround that increase system @@ -315,7 +315,7 @@ static void gfxhub_v1_2_xcc_setup_vmid_config(struct amdgpu_device *adev, uint32_t xcc_mask) { struct amdgpu_vmhub *hub; - unsigned num_level, block_size; + unsigned int num_level, block_size; uint32_t tmp; int i, j; @@ -329,7 +329,8 @@ static void gfxhub_v1_2_xcc_setup_vmid_config(struct amdgpu_device *adev, for_each_inst(j, xcc_mask) { hub = &adev->vmhub[AMDGPU_GFXHUB(j)]; for (i = 0; i <= 14; i++) { - tmp = RREG32_SOC15_OFFSET(GC, GET_INST(GC, j), regVM_CONTEXT1_CNTL, i); + tmp = RREG32_SOC15_OFFSET(GC, GET_INST(GC, j), regVM_CONTEXT1_CNTL, + i * hub->ctx_distance); tmp = REG_SET_FIELD(tmp, VM_CONTEXT1_CNTL, ENABLE_CONTEXT, 1); tmp = REG_SET_FIELD(tmp, VM_CONTEXT1_CNTL, PAGE_TABLE_DEPTH, num_level); @@ -356,11 +357,14 @@ static void gfxhub_v1_2_xcc_setup_vmid_config(struct amdgpu_device *adev, * the SQ per-process. * Retry faults need to be enabled for that to work. */ - tmp = REG_SET_FIELD(tmp, VM_CONTEXT1_CNTL, - RETRY_PERMISSION_OR_INVALID_PAGE_FAULT, - !adev->gmc.noretry || - adev->ip_versions[GC_HWIP][0] == IP_VERSION(9, 4, 2) || - adev->ip_versions[GC_HWIP][0] == IP_VERSION(9, 4, 3)); + tmp = REG_SET_FIELD( + tmp, VM_CONTEXT1_CNTL, + RETRY_PERMISSION_OR_INVALID_PAGE_FAULT, + !adev->gmc.noretry || + amdgpu_ip_version(adev, GC_HWIP, 0) == + IP_VERSION(9, 4, 2) || + amdgpu_ip_version(adev, GC_HWIP, 0) == + IP_VERSION(9, 4, 3)); WREG32_SOC15_OFFSET(GC, GET_INST(GC, j), regVM_CONTEXT1_CNTL, i * hub->ctx_distance, tmp); WREG32_SOC15_OFFSET(GC, GET_INST(GC, j), @@ -402,22 +406,6 @@ static void gfxhub_v1_2_xcc_program_invalidation(struct amdgpu_device *adev, static int gfxhub_v1_2_xcc_gart_enable(struct amdgpu_device *adev, uint32_t xcc_mask) { - int i; - - /* - * MC_VM_FB_LOCATION_BASE/TOP is NULL for VF, because they are - * VF copy registers so vbios post doesn't program them, for - * SRIOV driver need to program them - */ - if (amdgpu_sriov_vf(adev)) { - for_each_inst(i, xcc_mask) { - WREG32_SOC15_RLC(GC, GET_INST(GC, i), regMC_VM_FB_LOCATION_BASE, - adev->gmc.vram_start >> 24); - WREG32_SOC15_RLC(GC, GET_INST(GC, i), regMC_VM_FB_LOCATION_TOP, - adev->gmc.vram_end >> 24); - } - } - /* GART Enable. */ gfxhub_v1_2_xcc_init_gart_aperture_regs(adev, xcc_mask); gfxhub_v1_2_xcc_init_system_aperture_regs(adev, xcc_mask); diff --git a/drivers/gpu/drm/amd/amdgpu/gfxhub_v2_0.c b/drivers/gpu/drm/amd/amdgpu/gfxhub_v2_0.c index f173a61c6c15..793faf62cb07 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfxhub_v2_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfxhub_v2_0.c @@ -31,7 +31,7 @@ #include "soc15_common.h" -static const char *gfxhub_client_ids[] = { +static const char * const gfxhub_client_ids[] = { "CB/DB", "Reserved", "GE1", @@ -287,7 +287,7 @@ static void gfxhub_v2_0_setup_vmid_config(struct amdgpu_device *adev) uint32_t tmp; for (i = 0; i <= 14; i++) { - tmp = RREG32_SOC15_OFFSET(GC, 0, mmGCVM_CONTEXT1_CNTL, i); + tmp = RREG32_SOC15_OFFSET(GC, 0, mmGCVM_CONTEXT1_CNTL, i * hub->ctx_distance); tmp = REG_SET_FIELD(tmp, GCVM_CONTEXT1_CNTL, ENABLE_CONTEXT, 1); tmp = REG_SET_FIELD(tmp, GCVM_CONTEXT1_CNTL, PAGE_TABLE_DEPTH, adev->vm_manager.num_level); @@ -332,7 +332,7 @@ static void gfxhub_v2_0_setup_vmid_config(struct amdgpu_device *adev) static void gfxhub_v2_0_program_invalidation(struct amdgpu_device *adev) { struct amdgpu_vmhub *hub = &adev->vmhub[AMDGPU_GFXHUB(0)]; - unsigned i; + unsigned int i; for (i = 0 ; i < 18; ++i) { WREG32_SOC15_OFFSET(GC, 0, mmGCVM_INVALIDATE_ENG0_ADDR_RANGE_LO32, @@ -393,6 +393,7 @@ static void gfxhub_v2_0_set_fault_enable_default(struct amdgpu_device *adev, bool value) { u32 tmp; + tmp = RREG32_SOC15(GC, 0, mmGCVM_L2_PROTECTION_FAULT_CNTL); tmp = REG_SET_FIELD(tmp, GCVM_L2_PROTECTION_FAULT_CNTL, RANGE_PROTECTION_FAULT_ENABLE_DEFAULT, value); @@ -470,6 +471,9 @@ static void gfxhub_v2_0_init(struct amdgpu_device *adev) GCVM_CONTEXT1_CNTL__WRITE_PROTECTION_FAULT_ENABLE_INTERRUPT_MASK | GCVM_CONTEXT1_CNTL__EXECUTE_PROTECTION_FAULT_ENABLE_INTERRUPT_MASK; + /* TODO: This is only needed on some Navi 1x revisions */ + hub->sdma_invalidation_workaround = true; + hub->vmhub_funcs = &gfxhub_v2_0_vmhub_funcs; } diff --git a/drivers/gpu/drm/amd/amdgpu/gfxhub_v2_1.c b/drivers/gpu/drm/amd/amdgpu/gfxhub_v2_1.c index d8fc3e8088cd..cd0e8a321e46 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfxhub_v2_1.c +++ b/drivers/gpu/drm/amd/amdgpu/gfxhub_v2_1.c @@ -34,7 +34,7 @@ #define mmGCUTCL2_HARVEST_BYPASS_GROUPS_YELLOW_CARP 0x16f8 #define mmGCUTCL2_HARVEST_BYPASS_GROUPS_YELLOW_CARP_BASE_IDX 0 -static const char *gfxhub_client_ids[] = { +static const char * const gfxhub_client_ids[] = { "CB/DB", "Reserved", "GE1", @@ -296,7 +296,7 @@ static void gfxhub_v2_1_setup_vmid_config(struct amdgpu_device *adev) uint32_t tmp; for (i = 0; i <= 14; i++) { - tmp = RREG32_SOC15_OFFSET(GC, 0, mmGCVM_CONTEXT1_CNTL, i); + tmp = RREG32_SOC15_OFFSET(GC, 0, mmGCVM_CONTEXT1_CNTL, i * hub->ctx_distance); tmp = REG_SET_FIELD(tmp, GCVM_CONTEXT1_CNTL, ENABLE_CONTEXT, 1); tmp = REG_SET_FIELD(tmp, GCVM_CONTEXT1_CNTL, PAGE_TABLE_DEPTH, adev->vm_manager.num_level); @@ -341,7 +341,7 @@ static void gfxhub_v2_1_setup_vmid_config(struct amdgpu_device *adev) static void gfxhub_v2_1_program_invalidation(struct amdgpu_device *adev) { struct amdgpu_vmhub *hub = &adev->vmhub[AMDGPU_GFXHUB(0)]; - unsigned i; + unsigned int i; for (i = 0 ; i < 18; ++i) { WREG32_SOC15_OFFSET(GC, 0, mmGCVM_INVALIDATE_ENG0_ADDR_RANGE_LO32, @@ -510,7 +510,7 @@ static int gfxhub_v2_1_get_xgmi_info(struct amdgpu_device *adev) u32 max_num_physical_nodes = 0; u32 max_physical_node_id = 0; - switch (adev->ip_versions[XGMI_HWIP][0]) { + switch (amdgpu_ip_version(adev, XGMI_HWIP, 0)) { case IP_VERSION(4, 8, 0): max_num_physical_nodes = 4; max_physical_node_id = 3; @@ -548,7 +548,7 @@ static void gfxhub_v2_1_utcl2_harvest(struct amdgpu_device *adev) adev->gfx.config.max_sh_per_se * adev->gfx.config.max_shader_engines); - switch (adev->ip_versions[GC_HWIP][0]) { + switch (amdgpu_ip_version(adev, GC_HWIP, 0)) { case IP_VERSION(10, 3, 1): case IP_VERSION(10, 3, 3): /* Get SA disabled bitmap from eFuse setting */ @@ -582,6 +582,7 @@ static void gfxhub_v2_1_utcl2_harvest(struct amdgpu_device *adev) static void gfxhub_v2_1_save_regs(struct amdgpu_device *adev) { int i; + adev->gmc.VM_L2_CNTL = RREG32_SOC15(GC, 0, mmGCVM_L2_CNTL); adev->gmc.VM_L2_CNTL2 = RREG32_SOC15(GC, 0, mmGCVM_L2_CNTL2); adev->gmc.VM_DUMMY_PAGE_FAULT_CNTL = RREG32_SOC15(GC, 0, mmGCVM_DUMMY_PAGE_FAULT_CNTL); @@ -616,6 +617,7 @@ static void gfxhub_v2_1_save_regs(struct amdgpu_device *adev) static void gfxhub_v2_1_restore_regs(struct amdgpu_device *adev) { int i; + WREG32_SOC15(GC, 0, mmGCVM_L2_CNTL, adev->gmc.VM_L2_CNTL); WREG32_SOC15(GC, 0, mmGCVM_L2_CNTL2, adev->gmc.VM_L2_CNTL2); WREG32_SOC15(GC, 0, mmGCVM_DUMMY_PAGE_FAULT_CNTL, adev->gmc.VM_DUMMY_PAGE_FAULT_CNTL); @@ -679,9 +681,8 @@ static void gfxhub_v2_1_halt(struct amdgpu_device *adev) tmp = RREG32_SOC15(GC, 0, mmGRBM_STATUS2); } - if (!time) { + if (!time) DRM_WARN("failed to wait for GRBM(EA) idle\n"); - } } const struct amdgpu_gfxhub_funcs gfxhub_v2_1_funcs = { diff --git a/drivers/gpu/drm/amd/amdgpu/gfxhub_v3_0.c b/drivers/gpu/drm/amd/amdgpu/gfxhub_v3_0.c index c53147f9c9fc..abe30c8bd2ba 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfxhub_v3_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfxhub_v3_0.c @@ -30,7 +30,7 @@ #include "navi10_enum.h" #include "soc15_common.h" -static const char *gfxhub_client_ids[] = { +static const char * const gfxhub_client_ids[] = { "CB/DB", "Reserved", "GE1", @@ -164,8 +164,7 @@ static void gfxhub_v3_0_init_system_aperture_regs(struct amdgpu_device *adev) max(adev->gmc.fb_end, adev->gmc.agp_end) >> 18); /* Set default page address. */ - value = adev->mem_scratch.gpu_addr - adev->gmc.vram_start - + adev->vm_manager.vram_base_offset; + value = amdgpu_gmc_vram_mc2pa(adev, adev->mem_scratch.gpu_addr); WREG32_SOC15(GC, 0, regGCMC_VM_SYSTEM_APERTURE_DEFAULT_ADDR_LSB, (u32)(value >> 12)); WREG32_SOC15(GC, 0, regGCMC_VM_SYSTEM_APERTURE_DEFAULT_ADDR_MSB, @@ -295,7 +294,7 @@ static void gfxhub_v3_0_setup_vmid_config(struct amdgpu_device *adev) uint32_t tmp; for (i = 0; i <= 14; i++) { - tmp = RREG32_SOC15_OFFSET(GC, 0, regGCVM_CONTEXT1_CNTL, i); + tmp = RREG32_SOC15_OFFSET(GC, 0, regGCVM_CONTEXT1_CNTL, i * hub->ctx_distance); tmp = REG_SET_FIELD(tmp, GCVM_CONTEXT1_CNTL, ENABLE_CONTEXT, 1); tmp = REG_SET_FIELD(tmp, GCVM_CONTEXT1_CNTL, PAGE_TABLE_DEPTH, adev->vm_manager.num_level); @@ -340,7 +339,7 @@ static void gfxhub_v3_0_setup_vmid_config(struct amdgpu_device *adev) static void gfxhub_v3_0_program_invalidation(struct amdgpu_device *adev) { struct amdgpu_vmhub *hub = &adev->vmhub[AMDGPU_GFXHUB(0)]; - unsigned i; + unsigned int i; for (i = 0 ; i < 18; ++i) { WREG32_SOC15_OFFSET(GC, 0, regGCVM_INVALIDATE_ENG0_ADDR_RANGE_LO32, diff --git a/drivers/gpu/drm/amd/amdgpu/gfxhub_v3_0_3.c b/drivers/gpu/drm/amd/amdgpu/gfxhub_v3_0_3.c index ae777487d72e..b3ef6e71811f 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfxhub_v3_0_3.c +++ b/drivers/gpu/drm/amd/amdgpu/gfxhub_v3_0_3.c @@ -33,7 +33,7 @@ #define regGCVM_L2_CNTL4_DEFAULT 0x000000c1 #define regGCVM_L2_CNTL5_DEFAULT 0x00003fe0 -static const char *gfxhub_client_ids[] = { +static const char * const gfxhub_client_ids[] = { "CB/DB", "Reserved", "GE1", @@ -169,8 +169,7 @@ static void gfxhub_v3_0_3_init_system_aperture_regs(struct amdgpu_device *adev) max(adev->gmc.fb_end, adev->gmc.agp_end) >> 18); /* Set default page address. */ - value = adev->mem_scratch.gpu_addr - adev->gmc.vram_start - + adev->vm_manager.vram_base_offset; + value = amdgpu_gmc_vram_mc2pa(adev, adev->mem_scratch.gpu_addr); WREG32_SOC15(GC, 0, regGCMC_VM_SYSTEM_APERTURE_DEFAULT_ADDR_LSB, (u32)(value >> 12)); WREG32_SOC15(GC, 0, regGCMC_VM_SYSTEM_APERTURE_DEFAULT_ADDR_MSB, @@ -300,7 +299,7 @@ static void gfxhub_v3_0_3_setup_vmid_config(struct amdgpu_device *adev) uint32_t tmp; for (i = 0; i <= 14; i++) { - tmp = RREG32_SOC15_OFFSET(GC, 0, regGCVM_CONTEXT1_CNTL, i); + tmp = RREG32_SOC15_OFFSET(GC, 0, regGCVM_CONTEXT1_CNTL, i * hub->ctx_distance); tmp = REG_SET_FIELD(tmp, GCVM_CONTEXT1_CNTL, ENABLE_CONTEXT, 1); tmp = REG_SET_FIELD(tmp, GCVM_CONTEXT1_CNTL, PAGE_TABLE_DEPTH, adev->vm_manager.num_level); @@ -345,7 +344,7 @@ static void gfxhub_v3_0_3_setup_vmid_config(struct amdgpu_device *adev) static void gfxhub_v3_0_3_program_invalidation(struct amdgpu_device *adev) { struct amdgpu_vmhub *hub = &adev->vmhub[AMDGPU_GFXHUB(0)]; - unsigned i; + unsigned int i; for (i = 0 ; i < 18; ++i) { WREG32_SOC15_OFFSET(GC, 0, regGCVM_INVALIDATE_ENG0_ADDR_RANGE_LO32, diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c index 0c8a47989576..a5a05c16c10d 100644 --- a/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c @@ -51,18 +51,9 @@ #include "athub_v2_0.h" #include "athub_v2_1.h" -#include "amdgpu_reset.h" - -#if 0 -static const struct soc15_reg_golden golden_settings_navi10_hdp[] = -{ - /* TODO add golden setting for hdp */ -}; -#endif - static int gmc_v10_0_ecc_interrupt_state(struct amdgpu_device *adev, struct amdgpu_irq_src *src, - unsigned type, + unsigned int type, enum amdgpu_interrupt_state state) { return 0; @@ -70,7 +61,7 @@ static int gmc_v10_0_ecc_interrupt_state(struct amdgpu_device *adev, static int gmc_v10_0_vm_fault_interrupt_state(struct amdgpu_device *adev, - struct amdgpu_irq_src *src, unsigned type, + struct amdgpu_irq_src *src, unsigned int type, enum amdgpu_interrupt_state state) { switch (state) { @@ -109,9 +100,11 @@ static int gmc_v10_0_process_interrupt(struct amdgpu_device *adev, struct amdgpu_irq_src *source, struct amdgpu_iv_entry *entry) { + uint32_t vmhub_index = entry->client_id == SOC15_IH_CLIENTID_VMC ? + AMDGPU_MMHUB0(0) : AMDGPU_GFXHUB(0); + struct amdgpu_vmhub *hub = &adev->vmhub[vmhub_index]; bool retry_fault = !!(entry->src_data[1] & 0x80); bool write_fault = !!(entry->src_data[1] & 0x20); - struct amdgpu_vmhub *hub = &adev->vmhub[entry->vmid_src]; struct amdgpu_task_info task_info; uint32_t status = 0; u64 addr; @@ -150,11 +143,15 @@ static int gmc_v10_0_process_interrupt(struct amdgpu_device *adev, * the new fast GRBM interface. */ if ((entry->vmid_src == AMDGPU_GFXHUB(0)) && - (adev->ip_versions[GC_HWIP][0] < IP_VERSION(10, 3, 0))) + (amdgpu_ip_version(adev, GC_HWIP, 0) < + IP_VERSION(10, 3, 0))) RREG32(hub->vm_l2_pro_fault_status); status = RREG32(hub->vm_l2_pro_fault_status); WREG32_P(hub->vm_l2_pro_fault_cntl, 1, ~1); + + amdgpu_vm_update_fault_cache(adev, entry->pasid, addr, status, + entry->vmid_src ? AMDGPU_MMHUB0(0) : AMDGPU_GFXHUB(0)); } if (!printk_ratelimit()) @@ -164,8 +161,7 @@ static int gmc_v10_0_process_interrupt(struct amdgpu_device *adev, amdgpu_vm_get_task_info(adev, entry->pasid, &task_info); dev_err(adev->dev, - "[%s] page fault (src_id:%u ring:%u vmid:%u pasid:%u, " - "for process %s pid %d thread %s pid %d)\n", + "[%s] page fault (src_id:%u ring:%u vmid:%u pasid:%u, for process %s pid %d thread %s pid %d)\n", entry->vmid_src ? "mmhub" : "gfxhub", entry->src_id, entry->ring_id, entry->vmid, entry->pasid, task_info.process_name, task_info.tgid, @@ -236,20 +232,47 @@ static bool gmc_v10_0_get_atc_vmid_pasid_mapping_info( * by the amdgpu vm/hsa code. */ -static void gmc_v10_0_flush_vm_hub(struct amdgpu_device *adev, uint32_t vmid, - unsigned int vmhub, uint32_t flush_type) +/** + * gmc_v10_0_flush_gpu_tlb - gart tlb flush callback + * + * @adev: amdgpu_device pointer + * @vmid: vm instance to flush + * @vmhub: vmhub type + * @flush_type: the flush type + * + * Flush the TLB for the requested page table. + */ +static void gmc_v10_0_flush_gpu_tlb(struct amdgpu_device *adev, uint32_t vmid, + uint32_t vmhub, uint32_t flush_type) { bool use_semaphore = gmc_v10_0_use_invalidate_semaphore(adev, vmhub); struct amdgpu_vmhub *hub = &adev->vmhub[vmhub]; u32 inv_req = hub->vmhub_funcs->get_invalidate_req(vmid, flush_type); - u32 tmp; /* Use register 17 for GART */ - const unsigned eng = 17; - unsigned int i; + const unsigned int eng = 17; unsigned char hub_ip = 0; + u32 sem, req, ack; + unsigned int i; + u32 tmp; + + sem = hub->vm_inv_eng0_sem + hub->eng_distance * eng; + req = hub->vm_inv_eng0_req + hub->eng_distance * eng; + ack = hub->vm_inv_eng0_ack + hub->eng_distance * eng; - hub_ip = (vmhub == AMDGPU_GFXHUB(0)) ? - GC_HWIP : MMHUB_HWIP; + /* flush hdp cache */ + adev->hdp.funcs->flush_hdp(adev, NULL); + + /* For SRIOV run time, driver shouldn't access the register through MMIO + * Directly use kiq to do the vm invalidation instead + */ + if (adev->gfx.kiq[0].ring.sched.ready && !adev->enable_mes && + (amdgpu_sriov_runtime(adev) || !amdgpu_sriov_vf(adev))) { + amdgpu_virt_kiq_reg_write_reg_wait(adev, req, ack, inv_req, + 1 << vmid, GET_INST(GC, 0)); + return; + } + + hub_ip = (vmhub == AMDGPU_GFXHUB(0)) ? GC_HWIP : MMHUB_HWIP; spin_lock(&adev->gmc.invalidate_lock); /* @@ -263,9 +286,7 @@ static void gmc_v10_0_flush_vm_hub(struct amdgpu_device *adev, uint32_t vmid, if (use_semaphore) { for (i = 0; i < adev->usec_timeout; i++) { /* a read return value of 1 means semaphore acuqire */ - tmp = RREG32_RLC_NO_KIQ(hub->vm_inv_eng0_sem + - hub->eng_distance * eng, hub_ip); - + tmp = RREG32_RLC_NO_KIQ(sem, hub_ip); if (tmp & 0x1) break; udelay(1); @@ -275,24 +296,19 @@ static void gmc_v10_0_flush_vm_hub(struct amdgpu_device *adev, uint32_t vmid, DRM_ERROR("Timeout waiting for sem acquire in VM flush!\n"); } - WREG32_RLC_NO_KIQ(hub->vm_inv_eng0_req + - hub->eng_distance * eng, - inv_req, hub_ip); + WREG32_RLC_NO_KIQ(req, inv_req, hub_ip); /* * Issue a dummy read to wait for the ACK register to be cleared * to avoid a false ACK due to the new fast GRBM interface. */ if ((vmhub == AMDGPU_GFXHUB(0)) && - (adev->ip_versions[GC_HWIP][0] < IP_VERSION(10, 3, 0))) - RREG32_RLC_NO_KIQ(hub->vm_inv_eng0_req + - hub->eng_distance * eng, hub_ip); + (amdgpu_ip_version(adev, GC_HWIP, 0) < IP_VERSION(10, 3, 0))) + RREG32_RLC_NO_KIQ(req, hub_ip); /* Wait for ACK with a delay.*/ for (i = 0; i < adev->usec_timeout; i++) { - tmp = RREG32_RLC_NO_KIQ(hub->vm_inv_eng0_ack + - hub->eng_distance * eng, hub_ip); - + tmp = RREG32_RLC_NO_KIQ(ack, hub_ip); tmp &= 1 << vmid; if (tmp) break; @@ -302,109 +318,13 @@ static void gmc_v10_0_flush_vm_hub(struct amdgpu_device *adev, uint32_t vmid, /* TODO: It needs to continue working on debugging with semaphore for GFXHUB as well. */ if (use_semaphore) - /* - * add semaphore release after invalidation, - * write with 0 means semaphore release - */ - WREG32_RLC_NO_KIQ(hub->vm_inv_eng0_sem + - hub->eng_distance * eng, 0, hub_ip); + WREG32_RLC_NO_KIQ(sem, 0, hub_ip); spin_unlock(&adev->gmc.invalidate_lock); - if (i < adev->usec_timeout) - return; - - DRM_ERROR("Timeout waiting for VM flush hub: %d!\n", vmhub); -} - -/** - * gmc_v10_0_flush_gpu_tlb - gart tlb flush callback - * - * @adev: amdgpu_device pointer - * @vmid: vm instance to flush - * @vmhub: vmhub type - * @flush_type: the flush type - * - * Flush the TLB for the requested page table. - */ -static void gmc_v10_0_flush_gpu_tlb(struct amdgpu_device *adev, uint32_t vmid, - uint32_t vmhub, uint32_t flush_type) -{ - struct amdgpu_ring *ring = adev->mman.buffer_funcs_ring; - struct dma_fence *fence; - struct amdgpu_job *job; - - int r; - - /* flush hdp cache */ - adev->hdp.funcs->flush_hdp(adev, NULL); - - /* For SRIOV run time, driver shouldn't access the register through MMIO - * Directly use kiq to do the vm invalidation instead - */ - if (adev->gfx.kiq[0].ring.sched.ready && !adev->enable_mes && - (amdgpu_sriov_runtime(adev) || !amdgpu_sriov_vf(adev)) && - down_read_trylock(&adev->reset_domain->sem)) { - struct amdgpu_vmhub *hub = &adev->vmhub[vmhub]; - const unsigned eng = 17; - u32 inv_req = hub->vmhub_funcs->get_invalidate_req(vmid, flush_type); - u32 req = hub->vm_inv_eng0_req + hub->eng_distance * eng; - u32 ack = hub->vm_inv_eng0_ack + hub->eng_distance * eng; - - amdgpu_virt_kiq_reg_write_reg_wait(adev, req, ack, inv_req, - 1 << vmid); - - up_read(&adev->reset_domain->sem); - return; - } - - mutex_lock(&adev->mman.gtt_window_lock); - - if (vmhub == AMDGPU_MMHUB0(0)) { - gmc_v10_0_flush_vm_hub(adev, vmid, AMDGPU_MMHUB0(0), 0); - mutex_unlock(&adev->mman.gtt_window_lock); - return; - } - - BUG_ON(vmhub != AMDGPU_GFXHUB(0)); - - if (!adev->mman.buffer_funcs_enabled || - !adev->ib_pool_ready || - amdgpu_in_reset(adev) || - ring->sched.ready == false) { - gmc_v10_0_flush_vm_hub(adev, vmid, AMDGPU_GFXHUB(0), 0); - mutex_unlock(&adev->mman.gtt_window_lock); - return; - } - - /* The SDMA on Navi has a bug which can theoretically result in memory - * corruption if an invalidation happens at the same time as an VA - * translation. Avoid this by doing the invalidation from the SDMA - * itself. - */ - r = amdgpu_job_alloc_with_ib(ring->adev, &adev->mman.high_pr, - AMDGPU_FENCE_OWNER_UNDEFINED, - 16 * 4, AMDGPU_IB_POOL_IMMEDIATE, - &job); - if (r) - goto error_alloc; - - job->vm_pd_addr = amdgpu_gmc_pd_addr(adev->gart.bo); - job->vm_needs_flush = true; - job->ibs->ptr[job->ibs->length_dw++] = ring->funcs->nop; - amdgpu_ring_pad_ib(ring, &job->ibs[0]); - fence = amdgpu_job_submit(job); - - mutex_unlock(&adev->mman.gtt_window_lock); - - dma_fence_wait(fence, false); - dma_fence_put(fence); - - return; - -error_alloc: - mutex_unlock(&adev->mman.gtt_window_lock); - DRM_ERROR("Error flushing GPU TLB using the SDMA (%d)!\n", r); + if (i >= adev->usec_timeout) + dev_err(adev->dev, "Timeout waiting for VM flush hub: %d!\n", + vmhub); } /** @@ -418,71 +338,40 @@ error_alloc: * * Flush the TLB for the requested pasid. */ -static int gmc_v10_0_flush_gpu_tlb_pasid(struct amdgpu_device *adev, - uint16_t pasid, uint32_t flush_type, - bool all_hub, uint32_t inst) +static void gmc_v10_0_flush_gpu_tlb_pasid(struct amdgpu_device *adev, + uint16_t pasid, uint32_t flush_type, + bool all_hub, uint32_t inst) { + uint16_t queried; int vmid, i; - signed long r; - uint32_t seq; - uint16_t queried_pasid; - bool ret; - u32 usec_timeout = amdgpu_sriov_vf(adev) ? SRIOV_USEC_TIMEOUT : adev->usec_timeout; - struct amdgpu_ring *ring = &adev->gfx.kiq[0].ring; - struct amdgpu_kiq *kiq = &adev->gfx.kiq[0]; - - if (amdgpu_emu_mode == 0 && ring->sched.ready) { - spin_lock(&adev->gfx.kiq[0].ring_lock); - /* 2 dwords flush + 8 dwords fence */ - amdgpu_ring_alloc(ring, kiq->pmf->invalidate_tlbs_size + 8); - kiq->pmf->kiq_invalidate_tlbs(ring, - pasid, flush_type, all_hub); - r = amdgpu_fence_emit_polling(ring, &seq, MAX_KIQ_REG_WAIT); - if (r) { - amdgpu_ring_undo(ring); - spin_unlock(&adev->gfx.kiq[0].ring_lock); - return -ETIME; - } - - amdgpu_ring_commit(ring); - spin_unlock(&adev->gfx.kiq[0].ring_lock); - r = amdgpu_fence_wait_polling(ring, seq, usec_timeout); - if (r < 1) { - dev_err(adev->dev, "wait for kiq fence error: %ld.\n", r); - return -ETIME; - } - - return 0; - } for (vmid = 1; vmid < AMDGPU_NUM_VMID; vmid++) { - - ret = gmc_v10_0_get_atc_vmid_pasid_mapping_info(adev, vmid, - &queried_pasid); - if (ret && queried_pasid == pasid) { - if (all_hub) { - for_each_set_bit(i, adev->vmhubs_mask, AMDGPU_MAX_VMHUBS) - gmc_v10_0_flush_gpu_tlb(adev, vmid, - i, flush_type); - } else { - gmc_v10_0_flush_gpu_tlb(adev, vmid, - AMDGPU_GFXHUB(0), flush_type); - } - if (!adev->enable_mes) - break; + bool valid; + + valid = gmc_v10_0_get_atc_vmid_pasid_mapping_info(adev, vmid, + &queried); + if (!valid || queried != pasid) + continue; + + if (all_hub) { + for_each_set_bit(i, adev->vmhubs_mask, + AMDGPU_MAX_VMHUBS) + gmc_v10_0_flush_gpu_tlb(adev, vmid, i, + flush_type); + } else { + gmc_v10_0_flush_gpu_tlb(adev, vmid, AMDGPU_GFXHUB(0), + flush_type); } } - - return 0; } static uint64_t gmc_v10_0_emit_flush_gpu_tlb(struct amdgpu_ring *ring, - unsigned vmid, uint64_t pd_addr) + unsigned int vmid, uint64_t pd_addr) { bool use_semaphore = gmc_v10_0_use_invalidate_semaphore(ring->adev, ring->vm_hub); struct amdgpu_vmhub *hub = &ring->adev->vmhub[ring->vm_hub]; uint32_t req = hub->vmhub_funcs->get_invalidate_req(vmid, 0); - unsigned eng = ring->vm_inv_eng; + unsigned int eng = ring->vm_inv_eng; /* * It may lose gpuvm invalidate acknowldege state across power-gating @@ -524,8 +413,8 @@ static uint64_t gmc_v10_0_emit_flush_gpu_tlb(struct amdgpu_ring *ring, return pd_addr; } -static void gmc_v10_0_emit_pasid_mapping(struct amdgpu_ring *ring, unsigned vmid, - unsigned pasid) +static void gmc_v10_0_emit_pasid_mapping(struct amdgpu_ring *ring, unsigned int vmid, + unsigned int pasid) { struct amdgpu_device *adev = ring->adev; uint32_t reg; @@ -640,15 +529,16 @@ static void gmc_v10_0_get_vm_pte(struct amdgpu_device *adev, } if (bo && bo->flags & (AMDGPU_GEM_CREATE_COHERENT | + AMDGPU_GEM_CREATE_EXT_COHERENT | AMDGPU_GEM_CREATE_UNCACHED)) *flags = (*flags & ~AMDGPU_PTE_MTYPE_NV10_MASK) | AMDGPU_PTE_MTYPE_NV10(MTYPE_UC); } -static unsigned gmc_v10_0_get_vbios_fb_size(struct amdgpu_device *adev) +static unsigned int gmc_v10_0_get_vbios_fb_size(struct amdgpu_device *adev) { u32 d1vga_control = RREG32_SOC15(DCE, 0, mmD1VGA_CONTROL); - unsigned size; + unsigned int size; if (REG_GET_FIELD(d1vga_control, D1VGA_CONTROL, D1VGA_MODE_ENABLE)) { size = AMDGPU_VBIOS_VGA_ALLOCATION; @@ -686,7 +576,7 @@ static void gmc_v10_0_set_gmc_funcs(struct amdgpu_device *adev) static void gmc_v10_0_set_umc_funcs(struct amdgpu_device *adev) { - switch (adev->ip_versions[UMC_HWIP][0]) { + switch (amdgpu_ip_version(adev, UMC_HWIP, 0)) { case IP_VERSION(8, 7, 0): adev->umc.max_ras_err_cnt_per_query = UMC_V8_7_TOTAL_CHANNEL_NUM; adev->umc.channel_inst_num = UMC_V8_7_CHANNEL_INSTANCE_NUM; @@ -703,7 +593,7 @@ static void gmc_v10_0_set_umc_funcs(struct amdgpu_device *adev) static void gmc_v10_0_set_mmhub_funcs(struct amdgpu_device *adev) { - switch (adev->ip_versions[MMHUB_HWIP][0]) { + switch (amdgpu_ip_version(adev, MMHUB_HWIP, 0)) { case IP_VERSION(2, 3, 0): case IP_VERSION(2, 4, 0): case IP_VERSION(2, 4, 1): @@ -717,7 +607,7 @@ static void gmc_v10_0_set_mmhub_funcs(struct amdgpu_device *adev) static void gmc_v10_0_set_gfxhub_funcs(struct amdgpu_device *adev) { - switch (adev->ip_versions[GC_HWIP][0]) { + switch (amdgpu_ip_version(adev, GC_HWIP, 0)) { case IP_VERSION(10, 3, 0): case IP_VERSION(10, 3, 2): case IP_VERSION(10, 3, 1): @@ -751,6 +641,7 @@ static int gmc_v10_0_early_init(void *handle) adev->gmc.private_aperture_start = 0x1000000000000000ULL; adev->gmc.private_aperture_end = adev->gmc.private_aperture_start + (4ULL << 30) - 1; + adev->gmc.noretry_flags = AMDGPU_VM_NORETRY_FLAGS_TF; return 0; } @@ -781,9 +672,11 @@ static void gmc_v10_0_vram_gtt_location(struct amdgpu_device *adev, /* add the xgmi offset of the physical node */ base += adev->gmc.xgmi.physical_node_id * adev->gmc.xgmi.node_segment_size; + amdgpu_gmc_set_agp_default(adev, mc); amdgpu_gmc_vram_location(adev, &adev->gmc, base); - amdgpu_gmc_gart_location(adev, mc); - amdgpu_gmc_agp_location(adev, mc); + amdgpu_gmc_gart_location(adev, mc, AMDGPU_GART_PLACEMENT_BEST_FIT); + if (!amdgpu_sriov_vf(adev) && (amdgpu_agp == 1)) + amdgpu_gmc_agp_location(adev, mc); /* base offset of vram pages */ adev->vm_manager.vram_base_offset = adev->gfxhub.funcs->get_mc_fb_offset(adev); @@ -830,7 +723,7 @@ static int gmc_v10_0_mc_init(struct amdgpu_device *adev) /* set the gart size */ if (amdgpu_gart_size == -1) { - switch (adev->ip_versions[GC_HWIP][0]) { + switch (amdgpu_ip_version(adev, GC_HWIP, 0)) { default: adev->gmc.gart_size = 512ULL << 20; break; @@ -897,7 +790,7 @@ static int gmc_v10_0_sw_init(void *handle) adev->gmc.vram_vendor = vram_vendor; } - switch (adev->ip_versions[GC_HWIP][0]) { + switch (amdgpu_ip_version(adev, GC_HWIP, 0)) { case IP_VERSION(10, 3, 0): adev->gmc.mall_size = 128 * 1024 * 1024; break; @@ -915,7 +808,7 @@ static int gmc_v10_0_sw_init(void *handle) break; } - switch (adev->ip_versions[GC_HWIP][0]) { + switch (amdgpu_ip_version(adev, GC_HWIP, 0)) { case IP_VERSION(10, 1, 10): case IP_VERSION(10, 1, 1): case IP_VERSION(10, 1, 2): @@ -972,7 +865,7 @@ static int gmc_v10_0_sw_init(void *handle) r = dma_set_mask_and_coherent(adev->dev, DMA_BIT_MASK(44)); if (r) { - printk(KERN_WARNING "amdgpu: No suitable DMA available.\n"); + dev_warn(adev->dev, "amdgpu: No suitable DMA available.\n"); return r; } @@ -1081,7 +974,7 @@ static int gmc_v10_0_gart_enable(struct amdgpu_device *adev) gmc_v10_0_flush_gpu_tlb(adev, 0, AMDGPU_GFXHUB(0), 0); DRM_INFO("PCIE GART of %uM enabled (table at 0x%016llX).\n", - (unsigned)(adev->gmc.gart_size >> 20), + (unsigned int)(adev->gmc.gart_size >> 20), (unsigned long long)amdgpu_bo_gpu_offset(adev->gart.bo)); return 0; @@ -1089,8 +982,10 @@ static int gmc_v10_0_gart_enable(struct amdgpu_device *adev) static int gmc_v10_0_hw_init(void *handle) { - int r; struct amdgpu_device *adev = (struct amdgpu_device *)handle; + int r; + + adev->gmc.flush_pasid_uses_kiq = !amdgpu_emu_mode; /* The sequence of these two function calls matters.*/ gmc_v10_0_init_golden_registers(adev); @@ -1200,7 +1095,8 @@ static int gmc_v10_0_set_clockgating_state(void *handle, * is a new problem observed at DF 3.0.3, however with the same suspend sequence not * seen any issue on the DF 3.0.2 series platform. */ - if (adev->in_s0ix && adev->ip_versions[DF_HWIP][0] > IP_VERSION(3, 0, 2)) { + if (adev->in_s0ix && + amdgpu_ip_version(adev, DF_HWIP, 0) > IP_VERSION(3, 0, 2)) { dev_dbg(adev->dev, "keep mmhub clock gating being enabled for s0ix\n"); return 0; } @@ -1209,7 +1105,7 @@ static int gmc_v10_0_set_clockgating_state(void *handle, if (r) return r; - if (adev->ip_versions[ATHUB_HWIP][0] >= IP_VERSION(2, 1, 0)) + if (amdgpu_ip_version(adev, ATHUB_HWIP, 0) >= IP_VERSION(2, 1, 0)) return athub_v2_1_set_clockgating(adev, state); else return athub_v2_0_set_clockgating(adev, state); @@ -1219,13 +1115,13 @@ static void gmc_v10_0_get_clockgating_state(void *handle, u64 *flags) { struct amdgpu_device *adev = (struct amdgpu_device *)handle; - if (adev->ip_versions[GC_HWIP][0] == IP_VERSION(10, 1, 3) || - adev->ip_versions[GC_HWIP][0] == IP_VERSION(10, 1, 4)) + if (amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(10, 1, 3) || + amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(10, 1, 4)) return; adev->mmhub.funcs->get_clockgating(adev, flags); - if (adev->ip_versions[ATHUB_HWIP][0] >= IP_VERSION(2, 1, 0)) + if (amdgpu_ip_version(adev, ATHUB_HWIP, 0) >= IP_VERSION(2, 1, 0)) athub_v2_1_get_clockgating(adev, flags); else athub_v2_0_get_clockgating(adev, flags); @@ -1255,8 +1151,7 @@ const struct amd_ip_funcs gmc_v10_0_ip_funcs = { .get_clockgating_state = gmc_v10_0_get_clockgating_state, }; -const struct amdgpu_ip_block_version gmc_v10_0_ip_block = -{ +const struct amdgpu_ip_block_version gmc_v10_0_ip_block = { .type = AMD_IP_BLOCK_TYPE_GMC, .major = 10, .minor = 0, diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v11_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v11_0.c index c571f0d95994..23d7b548d13f 100644 --- a/drivers/gpu/drm/amd/amdgpu/gmc_v11_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v11_0.c @@ -42,15 +42,17 @@ #include "nbio_v4_3.h" #include "gfxhub_v3_0.h" #include "gfxhub_v3_0_3.h" +#include "gfxhub_v11_5_0.h" #include "mmhub_v3_0.h" #include "mmhub_v3_0_1.h" #include "mmhub_v3_0_2.h" +#include "mmhub_v3_3.h" #include "athub_v3_0.h" static int gmc_v11_0_ecc_interrupt_state(struct amdgpu_device *adev, struct amdgpu_irq_src *src, - unsigned type, + unsigned int type, enum amdgpu_interrupt_state state) { return 0; @@ -58,7 +60,7 @@ static int gmc_v11_0_ecc_interrupt_state(struct amdgpu_device *adev, static int gmc_v11_0_vm_fault_interrupt_state(struct amdgpu_device *adev, - struct amdgpu_irq_src *src, unsigned type, + struct amdgpu_irq_src *src, unsigned int type, enum amdgpu_interrupt_state state) { switch (state) { @@ -71,7 +73,8 @@ gmc_v11_0_vm_fault_interrupt_state(struct amdgpu_device *adev, * fini/suspend, so the overall state doesn't * change over the course of suspend/resume. */ - if (!adev->in_s0ix) + if (!adev->in_s0ix && (adev->in_runpm || adev->in_suspend || + amdgpu_in_reset(adev))) amdgpu_gmc_set_vm_fault_masks(adev, AMDGPU_GFXHUB(0), false); break; case AMDGPU_IRQ_STATE_ENABLE: @@ -97,7 +100,9 @@ static int gmc_v11_0_process_interrupt(struct amdgpu_device *adev, struct amdgpu_irq_src *source, struct amdgpu_iv_entry *entry) { - struct amdgpu_vmhub *hub = &adev->vmhub[entry->vmid_src]; + uint32_t vmhub_index = entry->client_id == SOC21_IH_CLIENTID_VMC ? + AMDGPU_MMHUB0(0) : AMDGPU_GFXHUB(0); + struct amdgpu_vmhub *hub = &adev->vmhub[vmhub_index]; uint32_t status = 0; u64 addr; @@ -115,6 +120,9 @@ static int gmc_v11_0_process_interrupt(struct amdgpu_device *adev, status = RREG32(hub->vm_l2_pro_fault_status); WREG32_P(hub->vm_l2_pro_fault_cntl, 1, ~1); + + amdgpu_vm_update_fault_cache(adev, entry->pasid, addr, status, + entry->vmid_src ? AMDGPU_MMHUB0(0) : AMDGPU_GFXHUB(0)); } if (printk_ratelimit()) { @@ -124,8 +132,7 @@ static int gmc_v11_0_process_interrupt(struct amdgpu_device *adev, amdgpu_vm_get_task_info(adev, entry->pasid, &task_info); dev_err(adev->dev, - "[%s] page fault (src_id:%u ring:%u vmid:%u pasid:%u, " - "for process %s pid %d thread %s pid %d)\n", + "[%s] page fault (src_id:%u ring:%u vmid:%u pasid:%u, for process %s pid %d thread %s pid %d)\n", entry->vmid_src ? "mmhub" : "gfxhub", entry->src_id, entry->ring_id, entry->vmid, entry->pasid, task_info.process_name, task_info.tgid, @@ -183,27 +190,50 @@ static bool gmc_v11_0_get_vmid_pasid_mapping_info( return !!(*p_pasid); } -/* - * GART - * VMID 0 is the physical GPU addresses as used by the kernel. - * VMIDs 1-15 are used for userspace clients and are handled - * by the amdgpu vm/hsa code. +/** + * gmc_v11_0_flush_gpu_tlb - gart tlb flush callback + * + * @adev: amdgpu_device pointer + * @vmid: vm instance to flush + * @vmhub: which hub to flush + * @flush_type: the flush type + * + * Flush the TLB for the requested page table. */ - -static void gmc_v11_0_flush_vm_hub(struct amdgpu_device *adev, uint32_t vmid, - unsigned int vmhub, uint32_t flush_type) +static void gmc_v11_0_flush_gpu_tlb(struct amdgpu_device *adev, uint32_t vmid, + uint32_t vmhub, uint32_t flush_type) { bool use_semaphore = gmc_v11_0_use_invalidate_semaphore(adev, vmhub); struct amdgpu_vmhub *hub = &adev->vmhub[vmhub]; u32 inv_req = hub->vmhub_funcs->get_invalidate_req(vmid, flush_type); - u32 tmp; /* Use register 17 for GART */ - const unsigned eng = 17; + const unsigned int eng = 17; + unsigned char hub_ip; + u32 sem, req, ack; unsigned int i; - unsigned char hub_ip = 0; + u32 tmp; - hub_ip = (vmhub == AMDGPU_GFXHUB(0)) ? - GC_HWIP : MMHUB_HWIP; + if ((vmhub == AMDGPU_GFXHUB(0)) && !adev->gfx.is_poweron) + return; + + sem = hub->vm_inv_eng0_sem + hub->eng_distance * eng; + req = hub->vm_inv_eng0_req + hub->eng_distance * eng; + ack = hub->vm_inv_eng0_ack + hub->eng_distance * eng; + + /* flush hdp cache */ + adev->hdp.funcs->flush_hdp(adev, NULL); + + /* For SRIOV run time, driver shouldn't access the register through MMIO + * Directly use kiq to do the vm invalidation instead + */ + if ((adev->gfx.kiq[0].ring.sched.ready || adev->mes.ring.sched.ready) && + (amdgpu_sriov_runtime(adev) || !amdgpu_sriov_vf(adev))) { + amdgpu_virt_kiq_reg_write_reg_wait(adev, req, ack, inv_req, + 1 << vmid, GET_INST(GC, 0)); + return; + } + + hub_ip = (vmhub == AMDGPU_GFXHUB(0)) ? GC_HWIP : MMHUB_HWIP; spin_lock(&adev->gmc.invalidate_lock); /* @@ -217,8 +247,7 @@ static void gmc_v11_0_flush_vm_hub(struct amdgpu_device *adev, uint32_t vmid, if (use_semaphore) { for (i = 0; i < adev->usec_timeout; i++) { /* a read return value of 1 means semaphore acuqire */ - tmp = RREG32_RLC_NO_KIQ(hub->vm_inv_eng0_sem + - hub->eng_distance * eng, hub_ip); + tmp = RREG32_RLC_NO_KIQ(sem, hub_ip); if (tmp & 0x1) break; udelay(1); @@ -228,12 +257,11 @@ static void gmc_v11_0_flush_vm_hub(struct amdgpu_device *adev, uint32_t vmid, DRM_ERROR("Timeout waiting for sem acquire in VM flush!\n"); } - WREG32_RLC_NO_KIQ(hub->vm_inv_eng0_req + hub->eng_distance * eng, inv_req, hub_ip); + WREG32_RLC_NO_KIQ(req, inv_req, hub_ip); /* Wait for ACK with a delay.*/ for (i = 0; i < adev->usec_timeout; i++) { - tmp = RREG32_RLC_NO_KIQ(hub->vm_inv_eng0_ack + - hub->eng_distance * eng, hub_ip); + tmp = RREG32_RLC_NO_KIQ(ack, hub_ip); tmp &= 1 << vmid; if (tmp) break; @@ -243,12 +271,7 @@ static void gmc_v11_0_flush_vm_hub(struct amdgpu_device *adev, uint32_t vmid, /* TODO: It needs to continue working on debugging with semaphore for GFXHUB as well. */ if (use_semaphore) - /* - * add semaphore release after invalidation, - * write with 0 means semaphore release - */ - WREG32_RLC_NO_KIQ(hub->vm_inv_eng0_sem + - hub->eng_distance * eng, 0, hub_ip); + WREG32_RLC_NO_KIQ(sem, 0, hub_ip); /* Issue additional private vm invalidation to MMHUB */ if ((vmhub != AMDGPU_GFXHUB(0)) && @@ -265,51 +288,8 @@ static void gmc_v11_0_flush_vm_hub(struct amdgpu_device *adev, uint32_t vmid, spin_unlock(&adev->gmc.invalidate_lock); - if (i < adev->usec_timeout) - return; - - DRM_ERROR("Timeout waiting for VM flush ACK!\n"); -} - -/** - * gmc_v11_0_flush_gpu_tlb - gart tlb flush callback - * - * @adev: amdgpu_device pointer - * @vmid: vm instance to flush - * @vmhub: which hub to flush - * @flush_type: the flush type - * - * Flush the TLB for the requested page table. - */ -static void gmc_v11_0_flush_gpu_tlb(struct amdgpu_device *adev, uint32_t vmid, - uint32_t vmhub, uint32_t flush_type) -{ - if ((vmhub == AMDGPU_GFXHUB(0)) && !adev->gfx.is_poweron) - return; - - /* flush hdp cache */ - adev->hdp.funcs->flush_hdp(adev, NULL); - - /* For SRIOV run time, driver shouldn't access the register through MMIO - * Directly use kiq to do the vm invalidation instead - */ - if ((adev->gfx.kiq[0].ring.sched.ready || adev->mes.ring.sched.ready) && - (amdgpu_sriov_runtime(adev) || !amdgpu_sriov_vf(adev))) { - struct amdgpu_vmhub *hub = &adev->vmhub[vmhub]; - const unsigned eng = 17; - u32 inv_req = hub->vmhub_funcs->get_invalidate_req(vmid, flush_type); - u32 req = hub->vm_inv_eng0_req + hub->eng_distance * eng; - u32 ack = hub->vm_inv_eng0_ack + hub->eng_distance * eng; - - amdgpu_virt_kiq_reg_write_reg_wait(adev, req, ack, inv_req, - 1 << vmid); - return; - } - - mutex_lock(&adev->mman.gtt_window_lock); - gmc_v11_0_flush_vm_hub(adev, vmid, vmhub, 0); - mutex_unlock(&adev->mman.gtt_window_lock); - return; + if (i >= adev->usec_timeout) + dev_err(adev->dev, "Timeout waiting for VM flush ACK!\n"); } /** @@ -323,68 +303,40 @@ static void gmc_v11_0_flush_gpu_tlb(struct amdgpu_device *adev, uint32_t vmid, * * Flush the TLB for the requested pasid. */ -static int gmc_v11_0_flush_gpu_tlb_pasid(struct amdgpu_device *adev, - uint16_t pasid, uint32_t flush_type, - bool all_hub, uint32_t inst) +static void gmc_v11_0_flush_gpu_tlb_pasid(struct amdgpu_device *adev, + uint16_t pasid, uint32_t flush_type, + bool all_hub, uint32_t inst) { + uint16_t queried; int vmid, i; - signed long r; - uint32_t seq; - uint16_t queried_pasid; - bool ret; - struct amdgpu_ring *ring = &adev->gfx.kiq[0].ring; - struct amdgpu_kiq *kiq = &adev->gfx.kiq[0]; - - if (amdgpu_emu_mode == 0 && ring->sched.ready) { - spin_lock(&adev->gfx.kiq[0].ring_lock); - /* 2 dwords flush + 8 dwords fence */ - amdgpu_ring_alloc(ring, kiq->pmf->invalidate_tlbs_size + 8); - kiq->pmf->kiq_invalidate_tlbs(ring, - pasid, flush_type, all_hub); - r = amdgpu_fence_emit_polling(ring, &seq, MAX_KIQ_REG_WAIT); - if (r) { - amdgpu_ring_undo(ring); - spin_unlock(&adev->gfx.kiq[0].ring_lock); - return -ETIME; - } - - amdgpu_ring_commit(ring); - spin_unlock(&adev->gfx.kiq[0].ring_lock); - r = amdgpu_fence_wait_polling(ring, seq, adev->usec_timeout); - if (r < 1) { - dev_err(adev->dev, "wait for kiq fence error: %ld.\n", r); - return -ETIME; - } - - return 0; - } for (vmid = 1; vmid < 16; vmid++) { - - ret = gmc_v11_0_get_vmid_pasid_mapping_info(adev, vmid, - &queried_pasid); - if (ret && queried_pasid == pasid) { - if (all_hub) { - for_each_set_bit(i, adev->vmhubs_mask, AMDGPU_MAX_VMHUBS) - gmc_v11_0_flush_gpu_tlb(adev, vmid, - i, flush_type); - } else { - gmc_v11_0_flush_gpu_tlb(adev, vmid, - AMDGPU_GFXHUB(0), flush_type); - } + bool valid; + + valid = gmc_v11_0_get_vmid_pasid_mapping_info(adev, vmid, + &queried); + if (!valid || queried != pasid) + continue; + + if (all_hub) { + for_each_set_bit(i, adev->vmhubs_mask, + AMDGPU_MAX_VMHUBS) + gmc_v11_0_flush_gpu_tlb(adev, vmid, i, + flush_type); + } else { + gmc_v11_0_flush_gpu_tlb(adev, vmid, AMDGPU_GFXHUB(0), + flush_type); } } - - return 0; } static uint64_t gmc_v11_0_emit_flush_gpu_tlb(struct amdgpu_ring *ring, - unsigned vmid, uint64_t pd_addr) + unsigned int vmid, uint64_t pd_addr) { bool use_semaphore = gmc_v11_0_use_invalidate_semaphore(ring->adev, ring->vm_hub); struct amdgpu_vmhub *hub = &ring->adev->vmhub[ring->vm_hub]; uint32_t req = hub->vmhub_funcs->get_invalidate_req(vmid, 0); - unsigned eng = ring->vm_inv_eng; + unsigned int eng = ring->vm_inv_eng; /* * It may lose gpuvm invalidate acknowldege state across power-gating @@ -426,8 +378,8 @@ static uint64_t gmc_v11_0_emit_flush_gpu_tlb(struct amdgpu_ring *ring, return pd_addr; } -static void gmc_v11_0_emit_pasid_mapping(struct amdgpu_ring *ring, unsigned vmid, - unsigned pasid) +static void gmc_v11_0_emit_pasid_mapping(struct amdgpu_ring *ring, unsigned int vmid, + unsigned int pasid) { struct amdgpu_device *adev = ring->adev; uint32_t reg; @@ -498,8 +450,7 @@ static void gmc_v11_0_get_vm_pde(struct amdgpu_device *adev, int level, uint64_t *addr, uint64_t *flags) { if (!(*flags & AMDGPU_PDE_PTE) && !(*flags & AMDGPU_PTE_SYSTEM)) - *addr = adev->vm_manager.vram_base_offset + *addr - - adev->gmc.vram_start; + *addr = amdgpu_gmc_vram_mc2pa(adev, *addr); BUG_ON(*addr & 0xFFFF00000000003FULL); if (!adev->gmc.translate_further) @@ -542,15 +493,16 @@ static void gmc_v11_0_get_vm_pte(struct amdgpu_device *adev, } if (bo && bo->flags & (AMDGPU_GEM_CREATE_COHERENT | + AMDGPU_GEM_CREATE_EXT_COHERENT | AMDGPU_GEM_CREATE_UNCACHED)) *flags = (*flags & ~AMDGPU_PTE_MTYPE_NV10_MASK) | AMDGPU_PTE_MTYPE_NV10(MTYPE_UC); } -static unsigned gmc_v11_0_get_vbios_fb_size(struct amdgpu_device *adev) +static unsigned int gmc_v11_0_get_vbios_fb_size(struct amdgpu_device *adev) { u32 d1vga_control = RREG32_SOC15(DCE, 0, regD1VGA_CONTROL); - unsigned size; + unsigned int size; if (REG_GET_FIELD(d1vga_control, D1VGA_CONTROL, D1VGA_MODE_ENABLE)) { size = AMDGPU_VBIOS_VGA_ALLOCATION; @@ -587,7 +539,7 @@ static void gmc_v11_0_set_gmc_funcs(struct amdgpu_device *adev) static void gmc_v11_0_set_umc_funcs(struct amdgpu_device *adev) { - switch (adev->ip_versions[UMC_HWIP][0]) { + switch (amdgpu_ip_version(adev, UMC_HWIP, 0)) { case IP_VERSION(8, 10, 0): adev->umc.channel_inst_num = UMC_V8_10_CHANNEL_INSTANCE_NUM; adev->umc.umc_inst_num = UMC_V8_10_UMC_INSTANCE_NUM; @@ -610,13 +562,16 @@ static void gmc_v11_0_set_umc_funcs(struct amdgpu_device *adev) static void gmc_v11_0_set_mmhub_funcs(struct amdgpu_device *adev) { - switch (adev->ip_versions[MMHUB_HWIP][0]) { + switch (amdgpu_ip_version(adev, MMHUB_HWIP, 0)) { case IP_VERSION(3, 0, 1): adev->mmhub.funcs = &mmhub_v3_0_1_funcs; break; case IP_VERSION(3, 0, 2): adev->mmhub.funcs = &mmhub_v3_0_2_funcs; break; + case IP_VERSION(3, 3, 0): + adev->mmhub.funcs = &mmhub_v3_3_funcs; + break; default: adev->mmhub.funcs = &mmhub_v3_0_funcs; break; @@ -625,10 +580,13 @@ static void gmc_v11_0_set_mmhub_funcs(struct amdgpu_device *adev) static void gmc_v11_0_set_gfxhub_funcs(struct amdgpu_device *adev) { - switch (adev->ip_versions[GC_HWIP][0]) { + switch (amdgpu_ip_version(adev, GC_HWIP, 0)) { case IP_VERSION(11, 0, 3): adev->gfxhub.funcs = &gfxhub_v3_0_3_funcs; break; + case IP_VERSION(11, 5, 0): + adev->gfxhub.funcs = &gfxhub_v11_5_0_funcs; + break; default: adev->gfxhub.funcs = &gfxhub_v3_0_funcs; break; @@ -651,6 +609,7 @@ static int gmc_v11_0_early_init(void *handle) adev->gmc.private_aperture_start = 0x1000000000000000ULL; adev->gmc.private_aperture_end = adev->gmc.private_aperture_start + (4ULL << 30) - 1; + adev->gmc.noretry_flags = AMDGPU_VM_NORETRY_FLAGS_TF; return 0; } @@ -678,9 +637,13 @@ static void gmc_v11_0_vram_gtt_location(struct amdgpu_device *adev, base = adev->mmhub.funcs->get_fb_location(adev); + amdgpu_gmc_set_agp_default(adev, mc); amdgpu_gmc_vram_location(adev, &adev->gmc, base); - amdgpu_gmc_gart_location(adev, mc); - amdgpu_gmc_agp_location(adev, mc); + amdgpu_gmc_gart_location(adev, mc, AMDGPU_GART_PLACEMENT_HIGH); + if (!amdgpu_sriov_vf(adev) && + (amdgpu_ip_version(adev, GC_HWIP, 0) < IP_VERSION(11, 5, 0)) && + (amdgpu_agp == 1)) + amdgpu_gmc_agp_location(adev, mc); /* base offset of vram pages */ if (amdgpu_sriov_vf(adev)) @@ -727,9 +690,9 @@ static int gmc_v11_0_mc_init(struct amdgpu_device *adev) adev->gmc.visible_vram_size = adev->gmc.real_vram_size; /* set the gart size */ - if (amdgpu_gart_size == -1) { + if (amdgpu_gart_size == -1) adev->gmc.gart_size = 512ULL << 20; - } else + else adev->gmc.gart_size = (u64)amdgpu_gart_size << 20; gmc_v11_0_vram_gtt_location(adev, &adev->gmc); @@ -765,6 +728,8 @@ static int gmc_v11_0_sw_init(void *handle) adev->mmhub.funcs->init(adev); + adev->gfxhub.funcs->init(adev); + spin_lock_init(&adev->gmc.invalidate_lock); r = amdgpu_atomfirmware_get_vram_info(adev, @@ -774,12 +739,13 @@ static int gmc_v11_0_sw_init(void *handle) adev->gmc.vram_type = vram_type; adev->gmc.vram_vendor = vram_vendor; - switch (adev->ip_versions[GC_HWIP][0]) { + switch (amdgpu_ip_version(adev, GC_HWIP, 0)) { case IP_VERSION(11, 0, 0): case IP_VERSION(11, 0, 1): case IP_VERSION(11, 0, 2): case IP_VERSION(11, 0, 3): case IP_VERSION(11, 0, 4): + case IP_VERSION(11, 5, 0): set_bit(AMDGPU_GFXHUB(0), adev->vmhubs_mask); set_bit(AMDGPU_MMHUB0(0), adev->vmhubs_mask); /* @@ -823,7 +789,7 @@ static int gmc_v11_0_sw_init(void *handle) r = dma_set_mask_and_coherent(adev->dev, DMA_BIT_MASK(44)); if (r) { - printk(KERN_WARNING "amdgpu: No suitable DMA available.\n"); + dev_warn(adev->dev, "amdgpu: No suitable DMA available.\n"); return r; } @@ -926,7 +892,7 @@ static int gmc_v11_0_gart_enable(struct amdgpu_device *adev) gmc_v11_0_flush_gpu_tlb(adev, 0, AMDGPU_MMHUB0(0), 0); DRM_INFO("PCIE GART of %uM enabled (table at 0x%016llX).\n", - (unsigned)(adev->gmc.gart_size >> 20), + (unsigned int)(adev->gmc.gart_size >> 20), (unsigned long long)amdgpu_bo_gpu_offset(adev->gart.bo)); return 0; @@ -934,8 +900,10 @@ static int gmc_v11_0_gart_enable(struct amdgpu_device *adev) static int gmc_v11_0_hw_init(void *handle) { - int r; struct amdgpu_device *adev = (struct amdgpu_device *)handle; + int r; + + adev->gmc.flush_pasid_uses_kiq = !amdgpu_emu_mode; /* The sequence of these two function calls matters.*/ gmc_v11_0_init_golden_registers(adev); diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v6_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v6_0.c index aa754c95a0b3..42e103d7077d 100644 --- a/drivers/gpu/drm/amd/amdgpu/gmc_v6_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v6_0.c @@ -120,7 +120,8 @@ static int gmc_v6_0_init_microcode(struct amdgpu_device *adev) case CHIP_HAINAN: chip_name = "hainan"; break; - default: BUG(); + default: + BUG(); } /* this memory configuration requires special firmware */ @@ -178,9 +179,8 @@ static int gmc_v6_0_mc_load_microcode(struct amdgpu_device *adev) WREG32(mmMC_SEQ_IO_DEBUG_DATA, le32_to_cpup(new_io_mc_regs++)); } /* load the MC ucode */ - for (i = 0; i < ucode_size; i++) { + for (i = 0; i < ucode_size; i++) WREG32(mmMC_SEQ_SUP_PGM, le32_to_cpup(new_fw_data++)); - } /* put the engine back into the active state */ WREG32(mmMC_SEQ_SUP_CNTL, 0x00000008); @@ -208,10 +208,12 @@ static void gmc_v6_0_vram_gtt_location(struct amdgpu_device *adev, struct amdgpu_gmc *mc) { u64 base = RREG32(mmMC_VM_FB_LOCATION) & 0xFFFF; + base <<= 24; + amdgpu_gmc_set_agp_default(adev, mc); amdgpu_gmc_vram_location(adev, mc, base); - amdgpu_gmc_gart_location(adev, mc); + amdgpu_gmc_gart_location(adev, mc, AMDGPU_GART_PLACEMENT_BEST_FIT); } static void gmc_v6_0_mc_program(struct amdgpu_device *adev) @@ -228,9 +230,8 @@ static void gmc_v6_0_mc_program(struct amdgpu_device *adev) } WREG32(mmHDP_REG_COHERENCY_FLUSH_CNTL, 0); - if (gmc_v6_0_wait_for_idle((void *)adev)) { + if (gmc_v6_0_wait_for_idle((void *)adev)) dev_warn(adev->dev, "Wait for MC idle timedout !\n"); - } if (adev->mode_info.num_crtc) { u32 tmp; @@ -253,12 +254,11 @@ static void gmc_v6_0_mc_program(struct amdgpu_device *adev) WREG32(mmMC_VM_SYSTEM_APERTURE_DEFAULT_ADDR, adev->mem_scratch.gpu_addr >> 12); WREG32(mmMC_VM_AGP_BASE, 0); - WREG32(mmMC_VM_AGP_TOP, 0x0FFFFFFF); - WREG32(mmMC_VM_AGP_BOT, 0x0FFFFFFF); + WREG32(mmMC_VM_AGP_TOP, adev->gmc.agp_end >> 22); + WREG32(mmMC_VM_AGP_BOT, adev->gmc.agp_start >> 22); - if (gmc_v6_0_wait_for_idle((void *)adev)) { + if (gmc_v6_0_wait_for_idle((void *)adev)) dev_warn(adev->dev, "Wait for MC idle timedout !\n"); - } } static int gmc_v6_0_mc_init(struct amdgpu_device *adev) @@ -269,13 +269,13 @@ static int gmc_v6_0_mc_init(struct amdgpu_device *adev) int r; tmp = RREG32(mmMC_ARB_RAMCFG); - if (tmp & (1 << 11)) { + if (tmp & (1 << 11)) chansize = 16; - } else if (tmp & MC_ARB_RAMCFG__CHANSIZE_MASK) { + else if (tmp & MC_ARB_RAMCFG__CHANSIZE_MASK) chansize = 64; - } else { + else chansize = 32; - } + tmp = RREG32(mmMC_SHARED_CHMAP); switch ((tmp & MC_SHARED_CHMAP__NOOFCHAN_MASK) >> MC_SHARED_CHMAP__NOOFCHAN__SHIFT) { case 0: @@ -352,7 +352,7 @@ static void gmc_v6_0_flush_gpu_tlb(struct amdgpu_device *adev, uint32_t vmid, } static uint64_t gmc_v6_0_emit_flush_gpu_tlb(struct amdgpu_ring *ring, - unsigned vmid, uint64_t pd_addr) + unsigned int vmid, uint64_t pd_addr) { uint32_t reg; @@ -405,11 +405,11 @@ static void gmc_v6_0_set_fault_enable_default(struct amdgpu_device *adev, } /** - + * gmc_v8_0_set_prt - set PRT VM fault - + * - + * @adev: amdgpu_device pointer - + * @enable: enable/disable VM fault handling for PRT - +*/ + * gmc_v8_0_set_prt() - set PRT VM fault + * + * @adev: amdgpu_device pointer + * @enable: enable/disable VM fault handling for PRT + */ static void gmc_v6_0_set_prt(struct amdgpu_device *adev, bool enable) { u32 tmp; @@ -547,7 +547,7 @@ static int gmc_v6_0_gart_enable(struct amdgpu_device *adev) gmc_v6_0_flush_gpu_tlb(adev, 0, 0, 0); dev_info(adev->dev, "PCIE GART of %uM enabled (table at 0x%016llX).\n", - (unsigned)(adev->gmc.gart_size >> 20), + (unsigned int)(adev->gmc.gart_size >> 20), (unsigned long long)table_addr); return 0; } @@ -787,15 +787,16 @@ static int gmc_v6_0_late_init(void *handle) return 0; } -static unsigned gmc_v6_0_get_vbios_fb_size(struct amdgpu_device *adev) +static unsigned int gmc_v6_0_get_vbios_fb_size(struct amdgpu_device *adev) { u32 d1vga_control = RREG32(mmD1VGA_CONTROL); - unsigned size; + unsigned int size; if (REG_GET_FIELD(d1vga_control, D1VGA_CONTROL, D1VGA_MODE_ENABLE)) { size = AMDGPU_VBIOS_VGA_ALLOCATION; } else { u32 viewport = RREG32(mmVIEWPORT_SIZE); + size = (REG_GET_FIELD(viewport, VIEWPORT_SIZE, VIEWPORT_HEIGHT) * REG_GET_FIELD(viewport, VIEWPORT_SIZE, VIEWPORT_WIDTH) * 4); @@ -814,6 +815,7 @@ static int gmc_v6_0_sw_init(void *handle) adev->gmc.vram_type = AMDGPU_VRAM_TYPE_UNKNOWN; } else { u32 tmp = RREG32(mmMC_SEQ_MISC0); + tmp &= MC_SEQ_MISC0__MT__MASK; adev->gmc.vram_type = gmc_v6_0_convert_vram_type(tmp); } @@ -964,7 +966,7 @@ static bool gmc_v6_0_is_idle(void *handle) static int gmc_v6_0_wait_for_idle(void *handle) { - unsigned i; + unsigned int i; struct amdgpu_device *adev = (struct amdgpu_device *)handle; for (i = 0; i < adev->usec_timeout; i++) { @@ -995,10 +997,8 @@ static int gmc_v6_0_soft_reset(void *handle) if (srbm_soft_reset) { gmc_v6_0_mc_stop(adev); - if (gmc_v6_0_wait_for_idle(adev)) { + if (gmc_v6_0_wait_for_idle(adev)) dev_warn(adev->dev, "Wait for GMC idle timed out !\n"); - } - tmp = RREG32(mmSRBM_SOFT_RESET); tmp |= srbm_soft_reset; @@ -1023,7 +1023,7 @@ static int gmc_v6_0_soft_reset(void *handle) static int gmc_v6_0_vm_fault_interrupt_state(struct amdgpu_device *adev, struct amdgpu_irq_src *src, - unsigned type, + unsigned int type, enum amdgpu_interrupt_state state) { u32 tmp; @@ -1141,8 +1141,7 @@ static void gmc_v6_0_set_irq_funcs(struct amdgpu_device *adev) adev->gmc.vm_fault.funcs = &gmc_v6_0_irq_funcs; } -const struct amdgpu_ip_block_version gmc_v6_0_ip_block = -{ +const struct amdgpu_ip_block_version gmc_v6_0_ip_block = { .type = AMD_IP_BLOCK_TYPE_GMC, .major = 6, .minor = 0, diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c index acd2b407860f..efc16e580f1e 100644 --- a/drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c @@ -58,16 +58,14 @@ MODULE_FIRMWARE("amdgpu/bonaire_mc.bin"); MODULE_FIRMWARE("amdgpu/hawaii_mc.bin"); MODULE_FIRMWARE("amdgpu/topaz_mc.bin"); -static const u32 golden_settings_iceland_a11[] = -{ +static const u32 golden_settings_iceland_a11[] = { mmVM_PRT_APERTURE0_LOW_ADDR, 0x0fffffff, 0x0fffffff, mmVM_PRT_APERTURE1_LOW_ADDR, 0x0fffffff, 0x0fffffff, mmVM_PRT_APERTURE2_LOW_ADDR, 0x0fffffff, 0x0fffffff, mmVM_PRT_APERTURE3_LOW_ADDR, 0x0fffffff, 0x0fffffff }; -static const u32 iceland_mgcg_cgcg_init[] = -{ +static const u32 iceland_mgcg_cgcg_init[] = { mmMC_MEM_POWER_LS, 0xffffffff, 0x00000104 }; @@ -151,7 +149,8 @@ static int gmc_v7_0_init_microcode(struct amdgpu_device *adev) case CHIP_KABINI: case CHIP_MULLINS: return 0; - default: BUG(); + default: + return -EINVAL; } snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_mc.bin", chip_name); @@ -237,10 +236,12 @@ static void gmc_v7_0_vram_gtt_location(struct amdgpu_device *adev, struct amdgpu_gmc *mc) { u64 base = RREG32(mmMC_VM_FB_LOCATION) & 0xFFFF; + base <<= 24; + amdgpu_gmc_set_agp_default(adev, mc); amdgpu_gmc_vram_location(adev, mc, base); - amdgpu_gmc_gart_location(adev, mc); + amdgpu_gmc_gart_location(adev, mc, AMDGPU_GART_PLACEMENT_BEST_FIT); } /** @@ -266,9 +267,9 @@ static void gmc_v7_0_mc_program(struct amdgpu_device *adev) } WREG32(mmHDP_REG_COHERENCY_FLUSH_CNTL, 0); - if (gmc_v7_0_wait_for_idle((void *)adev)) { + if (gmc_v7_0_wait_for_idle((void *)adev)) dev_warn(adev->dev, "Wait for MC idle timedout !\n"); - } + if (adev->mode_info.num_crtc) { /* Lockout access through VGA aperture*/ tmp = RREG32(mmVGA_HDP_CONTROL); @@ -288,11 +289,10 @@ static void gmc_v7_0_mc_program(struct amdgpu_device *adev) WREG32(mmMC_VM_SYSTEM_APERTURE_DEFAULT_ADDR, adev->mem_scratch.gpu_addr >> 12); WREG32(mmMC_VM_AGP_BASE, 0); - WREG32(mmMC_VM_AGP_TOP, 0x0FFFFFFF); - WREG32(mmMC_VM_AGP_BOT, 0x0FFFFFFF); - if (gmc_v7_0_wait_for_idle((void *)adev)) { + WREG32(mmMC_VM_AGP_TOP, adev->gmc.agp_end >> 22); + WREG32(mmMC_VM_AGP_BOT, adev->gmc.agp_start >> 22); + if (gmc_v7_0_wait_for_idle((void *)adev)) dev_warn(adev->dev, "Wait for MC idle timedout !\n"); - } WREG32(mmBIF_FB_EN, BIF_FB_EN__FB_READ_EN_MASK | BIF_FB_EN__FB_WRITE_EN_MASK); @@ -324,11 +324,11 @@ static int gmc_v7_0_mc_init(struct amdgpu_device *adev) /* Get VRAM informations */ tmp = RREG32(mmMC_ARB_RAMCFG); - if (REG_GET_FIELD(tmp, MC_ARB_RAMCFG, CHANSIZE)) { + if (REG_GET_FIELD(tmp, MC_ARB_RAMCFG, CHANSIZE)) chansize = 64; - } else { + else chansize = 32; - } + tmp = RREG32(mmMC_SHARED_CHMAP); switch (REG_GET_FIELD(tmp, MC_SHARED_CHMAP, NOOFCHAN)) { case 0: @@ -423,28 +423,23 @@ static int gmc_v7_0_mc_init(struct amdgpu_device *adev) * * Flush the TLB for the requested pasid. */ -static int gmc_v7_0_flush_gpu_tlb_pasid(struct amdgpu_device *adev, - uint16_t pasid, uint32_t flush_type, - bool all_hub, uint32_t inst) +static void gmc_v7_0_flush_gpu_tlb_pasid(struct amdgpu_device *adev, + uint16_t pasid, uint32_t flush_type, + bool all_hub, uint32_t inst) { + u32 mask = 0x0; int vmid; - unsigned int tmp; - - if (amdgpu_in_reset(adev)) - return -EIO; for (vmid = 1; vmid < 16; vmid++) { + u32 tmp = RREG32(mmATC_VMID0_PASID_MAPPING + vmid); - tmp = RREG32(mmATC_VMID0_PASID_MAPPING + vmid); if ((tmp & ATC_VMID0_PASID_MAPPING__VALID_MASK) && - (tmp & ATC_VMID0_PASID_MAPPING__PASID_MASK) == pasid) { - WREG32(mmVM_INVALIDATE_REQUEST, 1 << vmid); - RREG32(mmVM_INVALIDATE_RESPONSE); - break; - } + (tmp & ATC_VMID0_PASID_MAPPING__PASID_MASK) == pasid) + mask |= 1 << vmid; } - return 0; + WREG32(mmVM_INVALIDATE_REQUEST, mask); + RREG32(mmVM_INVALIDATE_RESPONSE); } /* @@ -472,7 +467,7 @@ static void gmc_v7_0_flush_gpu_tlb(struct amdgpu_device *adev, uint32_t vmid, } static uint64_t gmc_v7_0_emit_flush_gpu_tlb(struct amdgpu_ring *ring, - unsigned vmid, uint64_t pd_addr) + unsigned int vmid, uint64_t pd_addr) { uint32_t reg; @@ -488,8 +483,8 @@ static uint64_t gmc_v7_0_emit_flush_gpu_tlb(struct amdgpu_ring *ring, return pd_addr; } -static void gmc_v7_0_emit_pasid_mapping(struct amdgpu_ring *ring, unsigned vmid, - unsigned pasid) +static void gmc_v7_0_emit_pasid_mapping(struct amdgpu_ring *ring, unsigned int vmid, + unsigned int pasid) { amdgpu_ring_emit_wreg(ring, mmIH_VMID_0_LUT + vmid, pasid); } @@ -700,7 +695,7 @@ static int gmc_v7_0_gart_enable(struct amdgpu_device *adev) gmc_v7_0_flush_gpu_tlb(adev, 0, 0, 0); DRM_INFO("PCIE GART of %uM enabled (table at 0x%016llX).\n", - (unsigned)(adev->gmc.gart_size >> 20), + (unsigned int)(adev->gmc.gart_size >> 20), (unsigned long long)table_addr); return 0; } @@ -761,7 +756,7 @@ static void gmc_v7_0_gart_disable(struct amdgpu_device *adev) * Print human readable fault information (CIK). */ static void gmc_v7_0_vm_decode_fault(struct amdgpu_device *adev, u32 status, - u32 addr, u32 mc_client, unsigned pasid) + u32 addr, u32 mc_client, unsigned int pasid) { u32 vmid = REG_GET_FIELD(status, VM_CONTEXT1_PROTECTION_FAULT_STATUS, VMID); u32 protections = REG_GET_FIELD(status, VM_CONTEXT1_PROTECTION_FAULT_STATUS, @@ -942,6 +937,7 @@ static int gmc_v7_0_early_init(void *handle) adev->gmc.shared_aperture_end + 1; adev->gmc.private_aperture_end = adev->gmc.private_aperture_start + (4ULL << 30) - 1; + adev->gmc.noretry_flags = AMDGPU_VM_NORETRY_FLAGS_TF; return 0; } @@ -956,15 +952,16 @@ static int gmc_v7_0_late_init(void *handle) return 0; } -static unsigned gmc_v7_0_get_vbios_fb_size(struct amdgpu_device *adev) +static unsigned int gmc_v7_0_get_vbios_fb_size(struct amdgpu_device *adev) { u32 d1vga_control = RREG32(mmD1VGA_CONTROL); - unsigned size; + unsigned int size; if (REG_GET_FIELD(d1vga_control, D1VGA_CONTROL, D1VGA_MODE_ENABLE)) { size = AMDGPU_VBIOS_VGA_ALLOCATION; } else { u32 viewport = RREG32(mmVIEWPORT_SIZE); + size = (REG_GET_FIELD(viewport, VIEWPORT_SIZE, VIEWPORT_HEIGHT) * REG_GET_FIELD(viewport, VIEWPORT_SIZE, VIEWPORT_WIDTH) * 4); @@ -984,6 +981,7 @@ static int gmc_v7_0_sw_init(void *handle) adev->gmc.vram_type = AMDGPU_VRAM_TYPE_UNKNOWN; } else { u32 tmp = RREG32(mmMC_SEQ_MISC0); + tmp &= MC_SEQ_MISC0__MT__MASK; adev->gmc.vram_type = gmc_v7_0_convert_vram_type(tmp); } @@ -1152,7 +1150,7 @@ static bool gmc_v7_0_is_idle(void *handle) static int gmc_v7_0_wait_for_idle(void *handle) { - unsigned i; + unsigned int i; u32 tmp; struct amdgpu_device *adev = (struct amdgpu_device *)handle; @@ -1190,10 +1188,8 @@ static int gmc_v7_0_soft_reset(void *handle) if (srbm_soft_reset) { gmc_v7_0_mc_stop(adev); - if (gmc_v7_0_wait_for_idle((void *)adev)) { + if (gmc_v7_0_wait_for_idle((void *)adev)) dev_warn(adev->dev, "Wait for GMC idle timed out !\n"); - } - tmp = RREG32(mmSRBM_SOFT_RESET); tmp |= srbm_soft_reset; @@ -1219,7 +1215,7 @@ static int gmc_v7_0_soft_reset(void *handle) static int gmc_v7_0_vm_fault_interrupt_state(struct amdgpu_device *adev, struct amdgpu_irq_src *src, - unsigned type, + unsigned int type, enum amdgpu_interrupt_state state) { u32 tmp; @@ -1273,6 +1269,9 @@ static int gmc_v7_0_process_interrupt(struct amdgpu_device *adev, if (!addr && !status) return 0; + amdgpu_vm_update_fault_cache(adev, entry->pasid, + ((u64)addr) << AMDGPU_GPU_PAGE_SHIFT, status, AMDGPU_GFXHUB(0)); + if (amdgpu_vm_fault_stop == AMDGPU_VM_FAULT_STOP_FIRST) gmc_v7_0_set_fault_enable_default(adev, false); @@ -1383,8 +1382,7 @@ static void gmc_v7_0_set_irq_funcs(struct amdgpu_device *adev) adev->gmc.vm_fault.funcs = &gmc_v7_0_irq_funcs; } -const struct amdgpu_ip_block_version gmc_v7_0_ip_block = -{ +const struct amdgpu_ip_block_version gmc_v7_0_ip_block = { .type = AMD_IP_BLOCK_TYPE_GMC, .major = 7, .minor = 0, @@ -1392,8 +1390,7 @@ const struct amdgpu_ip_block_version gmc_v7_0_ip_block = .funcs = &gmc_v7_0_ip_funcs, }; -const struct amdgpu_ip_block_version gmc_v7_4_ip_block = -{ +const struct amdgpu_ip_block_version gmc_v7_4_ip_block = { .type = AMD_IP_BLOCK_TYPE_GMC, .major = 7, .minor = 4, diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c index 85dead2a5702..ff4ae73d27ec 100644 --- a/drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c @@ -64,8 +64,7 @@ MODULE_FIRMWARE("amdgpu/polaris11_k_mc.bin"); MODULE_FIRMWARE("amdgpu/polaris10_k_mc.bin"); MODULE_FIRMWARE("amdgpu/polaris12_k_mc.bin"); -static const u32 golden_settings_tonga_a11[] = -{ +static const u32 golden_settings_tonga_a11[] = { mmMC_ARB_WTM_GRPWT_RD, 0x00000003, 0x00000000, mmMC_HUB_RDREQ_DMIF_LIMIT, 0x0000007f, 0x00000028, mmMC_HUB_WDP_UMC, 0x00007fb6, 0x00000991, @@ -75,34 +74,29 @@ static const u32 golden_settings_tonga_a11[] = mmVM_PRT_APERTURE3_LOW_ADDR, 0x0fffffff, 0x0fffffff, }; -static const u32 tonga_mgcg_cgcg_init[] = -{ +static const u32 tonga_mgcg_cgcg_init[] = { mmMC_MEM_POWER_LS, 0xffffffff, 0x00000104 }; -static const u32 golden_settings_fiji_a10[] = -{ +static const u32 golden_settings_fiji_a10[] = { mmVM_PRT_APERTURE0_LOW_ADDR, 0x0fffffff, 0x0fffffff, mmVM_PRT_APERTURE1_LOW_ADDR, 0x0fffffff, 0x0fffffff, mmVM_PRT_APERTURE2_LOW_ADDR, 0x0fffffff, 0x0fffffff, mmVM_PRT_APERTURE3_LOW_ADDR, 0x0fffffff, 0x0fffffff, }; -static const u32 fiji_mgcg_cgcg_init[] = -{ +static const u32 fiji_mgcg_cgcg_init[] = { mmMC_MEM_POWER_LS, 0xffffffff, 0x00000104 }; -static const u32 golden_settings_polaris11_a11[] = -{ +static const u32 golden_settings_polaris11_a11[] = { mmVM_PRT_APERTURE0_LOW_ADDR, 0x0fffffff, 0x0fffffff, mmVM_PRT_APERTURE1_LOW_ADDR, 0x0fffffff, 0x0fffffff, mmVM_PRT_APERTURE2_LOW_ADDR, 0x0fffffff, 0x0fffffff, mmVM_PRT_APERTURE3_LOW_ADDR, 0x0fffffff, 0x0fffffff }; -static const u32 golden_settings_polaris10_a11[] = -{ +static const u32 golden_settings_polaris10_a11[] = { mmMC_ARB_WTM_GRPWT_RD, 0x00000003, 0x00000000, mmVM_PRT_APERTURE0_LOW_ADDR, 0x0fffffff, 0x0fffffff, mmVM_PRT_APERTURE1_LOW_ADDR, 0x0fffffff, 0x0fffffff, @@ -110,19 +104,16 @@ static const u32 golden_settings_polaris10_a11[] = mmVM_PRT_APERTURE3_LOW_ADDR, 0x0fffffff, 0x0fffffff }; -static const u32 cz_mgcg_cgcg_init[] = -{ +static const u32 cz_mgcg_cgcg_init[] = { mmMC_MEM_POWER_LS, 0xffffffff, 0x00000104 }; -static const u32 stoney_mgcg_cgcg_init[] = -{ +static const u32 stoney_mgcg_cgcg_init[] = { mmATC_MISC_CG, 0xffffffff, 0x000c0200, mmMC_MEM_POWER_LS, 0xffffffff, 0x00000104 }; -static const u32 golden_settings_stoney_common[] = -{ +static const u32 golden_settings_stoney_common[] = { mmMC_HUB_RDREQ_UVD, MC_HUB_RDREQ_UVD__PRESCALE_MASK, 0x00000004, mmMC_RD_GRP_OTH, MC_RD_GRP_OTH__UVD_MASK, 0x00600000 }; @@ -260,7 +251,8 @@ static int gmc_v8_0_init_microcode(struct amdgpu_device *adev) case CHIP_STONEY: case CHIP_VEGAM: return 0; - default: BUG(); + default: + return -EINVAL; } snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_mc.bin", chip_name); @@ -421,8 +413,9 @@ static void gmc_v8_0_vram_gtt_location(struct amdgpu_device *adev, base = RREG32(mmMC_VM_FB_LOCATION) & 0xFFFF; base <<= 24; + amdgpu_gmc_set_agp_default(adev, mc); amdgpu_gmc_vram_location(adev, mc, base); - amdgpu_gmc_gart_location(adev, mc); + amdgpu_gmc_gart_location(adev, mc, AMDGPU_GART_PLACEMENT_BEST_FIT); } /** @@ -448,9 +441,9 @@ static void gmc_v8_0_mc_program(struct amdgpu_device *adev) } WREG32(mmHDP_REG_COHERENCY_FLUSH_CNTL, 0); - if (gmc_v8_0_wait_for_idle((void *)adev)) { + if (gmc_v8_0_wait_for_idle((void *)adev)) dev_warn(adev->dev, "Wait for MC idle timedout !\n"); - } + if (adev->mode_info.num_crtc) { /* Lockout access through VGA aperture*/ tmp = RREG32(mmVGA_HDP_CONTROL); @@ -481,11 +474,10 @@ static void gmc_v8_0_mc_program(struct amdgpu_device *adev) } WREG32(mmMC_VM_AGP_BASE, 0); - WREG32(mmMC_VM_AGP_TOP, 0x0FFFFFFF); - WREG32(mmMC_VM_AGP_BOT, 0x0FFFFFFF); - if (gmc_v8_0_wait_for_idle((void *)adev)) { + WREG32(mmMC_VM_AGP_TOP, adev->gmc.agp_end >> 22); + WREG32(mmMC_VM_AGP_BOT, adev->gmc.agp_start >> 22); + if (gmc_v8_0_wait_for_idle((void *)adev)) dev_warn(adev->dev, "Wait for MC idle timedout !\n"); - } WREG32(mmBIF_FB_EN, BIF_FB_EN__FB_READ_EN_MASK | BIF_FB_EN__FB_WRITE_EN_MASK); @@ -517,11 +509,11 @@ static int gmc_v8_0_mc_init(struct amdgpu_device *adev) /* Get VRAM informations */ tmp = RREG32(mmMC_ARB_RAMCFG); - if (REG_GET_FIELD(tmp, MC_ARB_RAMCFG, CHANSIZE)) { + if (REG_GET_FIELD(tmp, MC_ARB_RAMCFG, CHANSIZE)) chansize = 64; - } else { + else chansize = 32; - } + tmp = RREG32(mmMC_SHARED_CHMAP); switch (REG_GET_FIELD(tmp, MC_SHARED_CHMAP, NOOFCHAN)) { case 0: @@ -621,29 +613,23 @@ static int gmc_v8_0_mc_init(struct amdgpu_device *adev) * * Flush the TLB for the requested pasid. */ -static int gmc_v8_0_flush_gpu_tlb_pasid(struct amdgpu_device *adev, - uint16_t pasid, uint32_t flush_type, - bool all_hub, uint32_t inst) +static void gmc_v8_0_flush_gpu_tlb_pasid(struct amdgpu_device *adev, + uint16_t pasid, uint32_t flush_type, + bool all_hub, uint32_t inst) { + u32 mask = 0x0; int vmid; - unsigned int tmp; - - if (amdgpu_in_reset(adev)) - return -EIO; for (vmid = 1; vmid < 16; vmid++) { + u32 tmp = RREG32(mmATC_VMID0_PASID_MAPPING + vmid); - tmp = RREG32(mmATC_VMID0_PASID_MAPPING + vmid); if ((tmp & ATC_VMID0_PASID_MAPPING__VALID_MASK) && - (tmp & ATC_VMID0_PASID_MAPPING__PASID_MASK) == pasid) { - WREG32(mmVM_INVALIDATE_REQUEST, 1 << vmid); - RREG32(mmVM_INVALIDATE_RESPONSE); - break; - } + (tmp & ATC_VMID0_PASID_MAPPING__PASID_MASK) == pasid) + mask |= 1 << vmid; } - return 0; - + WREG32(mmVM_INVALIDATE_REQUEST, mask); + RREG32(mmVM_INVALIDATE_RESPONSE); } /* @@ -671,7 +657,7 @@ static void gmc_v8_0_flush_gpu_tlb(struct amdgpu_device *adev, uint32_t vmid, } static uint64_t gmc_v8_0_emit_flush_gpu_tlb(struct amdgpu_ring *ring, - unsigned vmid, uint64_t pd_addr) + unsigned int vmid, uint64_t pd_addr) { uint32_t reg; @@ -687,8 +673,8 @@ static uint64_t gmc_v8_0_emit_flush_gpu_tlb(struct amdgpu_ring *ring, return pd_addr; } -static void gmc_v8_0_emit_pasid_mapping(struct amdgpu_ring *ring, unsigned vmid, - unsigned pasid) +static void gmc_v8_0_emit_pasid_mapping(struct amdgpu_ring *ring, unsigned int vmid, + unsigned int pasid) { amdgpu_ring_emit_wreg(ring, mmIH_VMID_0_LUT + vmid, pasid); } @@ -759,11 +745,11 @@ static void gmc_v8_0_set_fault_enable_default(struct amdgpu_device *adev, } /** - * gmc_v8_0_set_prt - set PRT VM fault + * gmc_v8_0_set_prt() - set PRT VM fault * * @adev: amdgpu_device pointer * @enable: enable/disable VM fault handling for PRT -*/ + */ static void gmc_v8_0_set_prt(struct amdgpu_device *adev, bool enable) { u32 tmp; @@ -940,7 +926,7 @@ static int gmc_v8_0_gart_enable(struct amdgpu_device *adev) gmc_v8_0_flush_gpu_tlb(adev, 0, 0, 0); DRM_INFO("PCIE GART of %uM enabled (table at 0x%016llX).\n", - (unsigned)(adev->gmc.gart_size >> 20), + (unsigned int)(adev->gmc.gart_size >> 20), (unsigned long long)table_addr); return 0; } @@ -1001,7 +987,7 @@ static void gmc_v8_0_gart_disable(struct amdgpu_device *adev) * Print human readable fault information (VI). */ static void gmc_v8_0_vm_decode_fault(struct amdgpu_device *adev, u32 status, - u32 addr, u32 mc_client, unsigned pasid) + u32 addr, u32 mc_client, unsigned int pasid) { u32 vmid = REG_GET_FIELD(status, VM_CONTEXT1_PROTECTION_FAULT_STATUS, VMID); u32 protections = REG_GET_FIELD(status, VM_CONTEXT1_PROTECTION_FAULT_STATUS, @@ -1056,6 +1042,7 @@ static int gmc_v8_0_early_init(void *handle) adev->gmc.shared_aperture_end + 1; adev->gmc.private_aperture_end = adev->gmc.private_aperture_start + (4ULL << 30) - 1; + adev->gmc.noretry_flags = AMDGPU_VM_NORETRY_FLAGS_TF; return 0; } @@ -1070,15 +1057,16 @@ static int gmc_v8_0_late_init(void *handle) return 0; } -static unsigned gmc_v8_0_get_vbios_fb_size(struct amdgpu_device *adev) +static unsigned int gmc_v8_0_get_vbios_fb_size(struct amdgpu_device *adev) { u32 d1vga_control = RREG32(mmD1VGA_CONTROL); - unsigned size; + unsigned int size; if (REG_GET_FIELD(d1vga_control, D1VGA_CONTROL, D1VGA_MODE_ENABLE)) { size = AMDGPU_VBIOS_VGA_ALLOCATION; } else { u32 viewport = RREG32(mmVIEWPORT_SIZE); + size = (REG_GET_FIELD(viewport, VIEWPORT_SIZE, VIEWPORT_HEIGHT) * REG_GET_FIELD(viewport, VIEWPORT_SIZE, VIEWPORT_WIDTH) * 4); @@ -1282,7 +1270,7 @@ static bool gmc_v8_0_is_idle(void *handle) static int gmc_v8_0_wait_for_idle(void *handle) { - unsigned i; + unsigned int i; u32 tmp; struct amdgpu_device *adev = (struct amdgpu_device *)handle; @@ -1318,13 +1306,15 @@ static bool gmc_v8_0_check_soft_reset(void *handle) srbm_soft_reset = REG_SET_FIELD(srbm_soft_reset, SRBM_SOFT_RESET, SOFT_RESET_MC, 1); } + if (srbm_soft_reset) { adev->gmc.srbm_soft_reset = srbm_soft_reset; return true; - } else { - adev->gmc.srbm_soft_reset = 0; - return false; } + + adev->gmc.srbm_soft_reset = 0; + + return false; } static int gmc_v8_0_pre_soft_reset(void *handle) @@ -1335,9 +1325,8 @@ static int gmc_v8_0_pre_soft_reset(void *handle) return 0; gmc_v8_0_mc_stop(adev); - if (gmc_v8_0_wait_for_idle(adev)) { + if (gmc_v8_0_wait_for_idle(adev)) dev_warn(adev->dev, "Wait for GMC idle timed out !\n"); - } return 0; } @@ -1386,7 +1375,7 @@ static int gmc_v8_0_post_soft_reset(void *handle) static int gmc_v8_0_vm_fault_interrupt_state(struct amdgpu_device *adev, struct amdgpu_irq_src *src, - unsigned type, + unsigned int type, enum amdgpu_interrupt_state state) { u32 tmp; @@ -1448,6 +1437,9 @@ static int gmc_v8_0_process_interrupt(struct amdgpu_device *adev, if (!addr && !status) return 0; + amdgpu_vm_update_fault_cache(adev, entry->pasid, + ((u64)addr) << AMDGPU_GPU_PAGE_SHIFT, status, AMDGPU_GFXHUB(0)); + if (amdgpu_vm_fault_stop == AMDGPU_VM_FAULT_STOP_FIRST) gmc_v8_0_set_fault_enable_default(adev, false); @@ -1747,8 +1739,7 @@ static void gmc_v8_0_set_irq_funcs(struct amdgpu_device *adev) adev->gmc.vm_fault.funcs = &gmc_v8_0_irq_funcs; } -const struct amdgpu_ip_block_version gmc_v8_0_ip_block = -{ +const struct amdgpu_ip_block_version gmc_v8_0_ip_block = { .type = AMD_IP_BLOCK_TYPE_GMC, .major = 8, .minor = 0, @@ -1756,8 +1747,7 @@ const struct amdgpu_ip_block_version gmc_v8_0_ip_block = .funcs = &gmc_v8_0_ip_funcs, }; -const struct amdgpu_ip_block_version gmc_v8_1_ip_block = -{ +const struct amdgpu_ip_block_version gmc_v8_1_ip_block = { .type = AMD_IP_BLOCK_TYPE_GMC, .major = 8, .minor = 1, @@ -1765,8 +1755,7 @@ const struct amdgpu_ip_block_version gmc_v8_1_ip_block = .funcs = &gmc_v8_0_ip_funcs, }; -const struct amdgpu_ip_block_version gmc_v8_5_ip_block = -{ +const struct amdgpu_ip_block_version gmc_v8_5_ip_block = { .type = AMD_IP_BLOCK_TYPE_GMC, .major = 8, .minor = 5, diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c index 67e669e0141c..2ac5820e9c92 100644 --- a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c @@ -56,6 +56,7 @@ #include "umc_v6_1.h" #include "umc_v6_0.h" #include "umc_v6_7.h" +#include "umc_v12_0.h" #include "hdp_v4_0.h" #include "mca_v3_0.h" @@ -64,8 +65,6 @@ #include "amdgpu_ras.h" #include "amdgpu_xgmi.h" -#include "amdgpu_reset.h" - /* add these here since we already include dce12 headers and these are for DCN */ #define mmHUBP0_DCSURF_PRI_VIEWPORT_DIMENSION 0x055d #define mmHUBP0_DCSURF_PRI_VIEWPORT_DIMENSION_BASE_IDX 2 @@ -81,7 +80,7 @@ #define MAX_MEM_RANGES 8 -static const char *gfxhub_client_ids[] = { +static const char * const gfxhub_client_ids[] = { "CB", "DB", "IA", @@ -332,14 +331,12 @@ static const char *mmhub_client_ids_aldebaran[][2] = { [384+0][1] = "OSS", }; -static const struct soc15_reg_golden golden_settings_mmhub_1_0_0[] = -{ +static const struct soc15_reg_golden golden_settings_mmhub_1_0_0[] = { SOC15_REG_GOLDEN_VALUE(MMHUB, 0, mmDAGB1_WRCLI2, 0x00000007, 0xfe5fe0fa), SOC15_REG_GOLDEN_VALUE(MMHUB, 0, mmMMEA1_DRAM_WR_CLI2GRP_MAP0, 0x00000030, 0x55555565) }; -static const struct soc15_reg_golden golden_settings_athub_1_0_0[] = -{ +static const struct soc15_reg_golden golden_settings_athub_1_0_0[] = { SOC15_REG_GOLDEN_VALUE(ATHUB, 0, mmRPB_ARB_CNTL, 0x0000ff00, 0x00000800), SOC15_REG_GOLDEN_VALUE(ATHUB, 0, mmRPB_ARB_CNTL2, 0x00ff00ff, 0x00080008) }; @@ -416,13 +413,14 @@ static const uint32_t ecc_umc_mcumc_ctrl_mask_addrs[] = { static int gmc_v9_0_ecc_interrupt_state(struct amdgpu_device *adev, struct amdgpu_irq_src *src, - unsigned type, + unsigned int type, enum amdgpu_interrupt_state state) { u32 bits, i, tmp, reg; /* Devices newer then VEGA10/12 shall have these programming - sequences performed by PSP BL */ + * sequences performed by PSP BL + */ if (adev->asic_type >= CHIP_VEGA20) return 0; @@ -466,7 +464,7 @@ static int gmc_v9_0_ecc_interrupt_state(struct amdgpu_device *adev, static int gmc_v9_0_vm_fault_interrupt_state(struct amdgpu_device *adev, struct amdgpu_irq_src *src, - unsigned type, + unsigned int type, enum amdgpu_interrupt_state state) { struct amdgpu_vmhub *hub; @@ -555,6 +553,7 @@ static int gmc_v9_0_process_interrupt(struct amdgpu_device *adev, struct amdgpu_vmhub *hub; const char *mmhub_cid; const char *hub_name; + unsigned int vmhub; u64 addr; uint32_t cam_index = 0; int ret, xcc_id = 0; @@ -567,10 +566,10 @@ static int gmc_v9_0_process_interrupt(struct amdgpu_device *adev, if (entry->client_id == SOC15_IH_CLIENTID_VMC) { hub_name = "mmhub0"; - hub = &adev->vmhub[AMDGPU_MMHUB0(node_id / 4)]; + vmhub = AMDGPU_MMHUB0(node_id / 4); } else if (entry->client_id == SOC15_IH_CLIENTID_VMC1) { hub_name = "mmhub1"; - hub = &adev->vmhub[AMDGPU_MMHUB1(0)]; + vmhub = AMDGPU_MMHUB1(0); } else { hub_name = "gfxhub0"; if (adev->gfx.funcs->ih_node_to_logical_xcc) { @@ -579,8 +578,9 @@ static int gmc_v9_0_process_interrupt(struct amdgpu_device *adev, if (xcc_id < 0) xcc_id = 0; } - hub = &adev->vmhub[xcc_id]; + vmhub = xcc_id; } + hub = &adev->vmhub[vmhub]; if (retry_fault) { if (adev->irq.retry_cam_enabled) { @@ -626,13 +626,11 @@ static int gmc_v9_0_process_interrupt(struct amdgpu_device *adev, if (!printk_ratelimit()) return 0; - memset(&task_info, 0, sizeof(struct amdgpu_task_info)); amdgpu_vm_get_task_info(adev, entry->pasid, &task_info); dev_err(adev->dev, - "[%s] %s page fault (src_id:%u ring:%u vmid:%u " - "pasid:%u, for process %s pid %d thread %s pid %d)\n", + "[%s] %s page fault (src_id:%u ring:%u vmid:%u pasid:%u, for process %s pid %d thread %s pid %d)\n", hub_name, retry_fault ? "retry" : "no-retry", entry->src_id, entry->ring_id, entry->vmid, entry->pasid, task_info.process_name, task_info.tgid, @@ -641,7 +639,7 @@ static int gmc_v9_0_process_interrupt(struct amdgpu_device *adev, addr, entry->client_id, soc15_ih_clientid_name[entry->client_id]); - if (adev->ip_versions[GC_HWIP][0] == IP_VERSION(9, 4, 3)) + if (amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(9, 4, 3)) dev_err(adev->dev, " cookie node_id %d fault from die %s%d%s\n", node_id, node_id % 4 == 3 ? "RSV" : "AID", node_id / 4, node_id % 4 == 1 ? ".XCD0" : node_id % 4 == 2 ? ".XCD1" : ""); @@ -655,7 +653,7 @@ static int gmc_v9_0_process_interrupt(struct amdgpu_device *adev, * the new fast GRBM interface. */ if ((entry->vmid_src == AMDGPU_GFXHUB(0)) && - (adev->ip_versions[GC_HWIP][0] < IP_VERSION(9, 4, 2))) + (amdgpu_ip_version(adev, GC_HWIP, 0) < IP_VERSION(9, 4, 2))) RREG32(hub->vm_l2_pro_fault_status); status = RREG32(hub->vm_l2_pro_fault_status); @@ -663,6 +661,8 @@ static int gmc_v9_0_process_interrupt(struct amdgpu_device *adev, rw = REG_GET_FIELD(status, VM_L2_PROTECTION_FAULT_STATUS, RW); WREG32_P(hub->vm_l2_pro_fault_cntl, 1, ~1); + amdgpu_vm_update_fault_cache(adev, entry->pasid, addr, status, vmhub); + dev_err(adev->dev, "VM_L2_PROTECTION_FAULT_STATUS:0x%08X\n", status); @@ -672,7 +672,7 @@ static int gmc_v9_0_process_interrupt(struct amdgpu_device *adev, gfxhub_client_ids[cid], cid); } else { - switch (adev->ip_versions[MMHUB_HWIP][0]) { + switch (amdgpu_ip_version(adev, MMHUB_HWIP, 0)) { case IP_VERSION(9, 0, 0): mmhub_cid = mmhub_client_ids_vega10[cid][rw]; break; @@ -737,7 +737,8 @@ static void gmc_v9_0_set_irq_funcs(struct amdgpu_device *adev) adev->gmc.vm_fault.funcs = &gmc_v9_0_irq_funcs; if (!amdgpu_sriov_vf(adev) && - !adev->gmc.xgmi.connected_to_cpu) { + !adev->gmc.xgmi.connected_to_cpu && + !adev->gmc.is_app_apu) { adev->gmc.ecc_irq.num_types = 1; adev->gmc.ecc_irq.funcs = &gmc_v9_0_ecc_funcs; } @@ -772,8 +773,8 @@ static uint32_t gmc_v9_0_get_invalidate_req(unsigned int vmid, static bool gmc_v9_0_use_invalidate_semaphore(struct amdgpu_device *adev, uint32_t vmhub) { - if (adev->ip_versions[GC_HWIP][0] == IP_VERSION(9, 4, 2) || - adev->ip_versions[GC_HWIP][0] == IP_VERSION(9, 4, 3)) + if (amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(9, 4, 2) || + amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(9, 4, 3)) return false; return ((vmhub == AMDGPU_MMHUB0(0) || @@ -816,45 +817,32 @@ static void gmc_v9_0_flush_gpu_tlb(struct amdgpu_device *adev, uint32_t vmid, uint32_t vmhub, uint32_t flush_type) { bool use_semaphore = gmc_v9_0_use_invalidate_semaphore(adev, vmhub); - const unsigned eng = 17; - u32 j, inv_req, inv_req2, tmp; + u32 j, inv_req, tmp, sem, req, ack, inst; + const unsigned int eng = 17; struct amdgpu_vmhub *hub; BUG_ON(vmhub >= AMDGPU_MAX_VMHUBS); hub = &adev->vmhub[vmhub]; - if (adev->gmc.xgmi.num_physical_nodes && - adev->ip_versions[GC_HWIP][0] == IP_VERSION(9, 4, 0)) { - /* Vega20+XGMI caches PTEs in TC and TLB. Add a - * heavy-weight TLB flush (type 2), which flushes - * both. Due to a race condition with concurrent - * memory accesses using the same TLB cache line, we - * still need a second TLB flush after this. - */ - inv_req = gmc_v9_0_get_invalidate_req(vmid, 2); - inv_req2 = gmc_v9_0_get_invalidate_req(vmid, flush_type); - } else if (flush_type == 2 && - adev->ip_versions[GC_HWIP][0] == IP_VERSION(9, 4, 3) && - adev->rev_id == 0) { - inv_req = gmc_v9_0_get_invalidate_req(vmid, 0); - inv_req2 = gmc_v9_0_get_invalidate_req(vmid, flush_type); - } else { - inv_req = gmc_v9_0_get_invalidate_req(vmid, flush_type); - inv_req2 = 0; - } + inv_req = gmc_v9_0_get_invalidate_req(vmid, flush_type); + sem = hub->vm_inv_eng0_sem + hub->eng_distance * eng; + req = hub->vm_inv_eng0_req + hub->eng_distance * eng; + ack = hub->vm_inv_eng0_ack + hub->eng_distance * eng; /* This is necessary for a HW workaround under SRIOV as well * as GFXOFF under bare metal */ - if (adev->gfx.kiq[0].ring.sched.ready && - (amdgpu_sriov_runtime(adev) || !amdgpu_sriov_vf(adev)) && - down_read_trylock(&adev->reset_domain->sem)) { + if (vmhub >= AMDGPU_MMHUB0(0)) + inst = GET_INST(GC, 0); + else + inst = vmhub; + if (adev->gfx.kiq[inst].ring.sched.ready && + (amdgpu_sriov_runtime(adev) || !amdgpu_sriov_vf(adev))) { uint32_t req = hub->vm_inv_eng0_req + hub->eng_distance * eng; uint32_t ack = hub->vm_inv_eng0_ack + hub->eng_distance * eng; amdgpu_virt_kiq_reg_write_reg_wait(adev, req, ack, inv_req, - 1 << vmid); - up_read(&adev->reset_domain->sem); + 1 << vmid, inst); return; } @@ -872,9 +860,9 @@ static void gmc_v9_0_flush_gpu_tlb(struct amdgpu_device *adev, uint32_t vmid, for (j = 0; j < adev->usec_timeout; j++) { /* a read return value of 1 means semaphore acquire */ if (vmhub >= AMDGPU_MMHUB0(0)) - tmp = RREG32_SOC15_IP_NO_KIQ(MMHUB, hub->vm_inv_eng0_sem + hub->eng_distance * eng); + tmp = RREG32_SOC15_IP_NO_KIQ(MMHUB, sem, inst); else - tmp = RREG32_SOC15_IP_NO_KIQ(GC, hub->vm_inv_eng0_sem + hub->eng_distance * eng); + tmp = RREG32_SOC15_IP_NO_KIQ(GC, sem, inst); if (tmp & 0x1) break; udelay(1); @@ -884,35 +872,29 @@ static void gmc_v9_0_flush_gpu_tlb(struct amdgpu_device *adev, uint32_t vmid, DRM_ERROR("Timeout waiting for sem acquire in VM flush!\n"); } - do { - if (vmhub >= AMDGPU_MMHUB0(0)) - WREG32_SOC15_IP_NO_KIQ(MMHUB, hub->vm_inv_eng0_req + hub->eng_distance * eng, inv_req); - else - WREG32_SOC15_IP_NO_KIQ(GC, hub->vm_inv_eng0_req + hub->eng_distance * eng, inv_req); - - /* - * Issue a dummy read to wait for the ACK register to - * be cleared to avoid a false ACK due to the new fast - * GRBM interface. - */ - if ((vmhub == AMDGPU_GFXHUB(0)) && - (adev->ip_versions[GC_HWIP][0] < IP_VERSION(9, 4, 2))) - RREG32_NO_KIQ(hub->vm_inv_eng0_req + - hub->eng_distance * eng); + if (vmhub >= AMDGPU_MMHUB0(0)) + WREG32_SOC15_IP_NO_KIQ(MMHUB, req, inv_req, inst); + else + WREG32_SOC15_IP_NO_KIQ(GC, req, inv_req, inst); - for (j = 0; j < adev->usec_timeout; j++) { - if (vmhub >= AMDGPU_MMHUB0(0)) - tmp = RREG32_SOC15_IP_NO_KIQ(MMHUB, hub->vm_inv_eng0_ack + hub->eng_distance * eng); - else - tmp = RREG32_SOC15_IP_NO_KIQ(GC, hub->vm_inv_eng0_ack + hub->eng_distance * eng); - if (tmp & (1 << vmid)) - break; - udelay(1); - } + /* + * Issue a dummy read to wait for the ACK register to + * be cleared to avoid a false ACK due to the new fast + * GRBM interface. + */ + if ((vmhub == AMDGPU_GFXHUB(0)) && + (adev->ip_versions[GC_HWIP][0] < IP_VERSION(9, 4, 2))) + RREG32_NO_KIQ(req); - inv_req = inv_req2; - inv_req2 = 0; - } while (inv_req); + for (j = 0; j < adev->usec_timeout; j++) { + if (vmhub >= AMDGPU_MMHUB0(0)) + tmp = RREG32_SOC15_IP_NO_KIQ(MMHUB, ack, inst); + else + tmp = RREG32_SOC15_IP_NO_KIQ(GC, ack, inst); + if (tmp & (1 << vmid)) + break; + udelay(1); + } /* TODO: It needs to continue working on debugging with semaphore for GFXHUB as well. */ if (use_semaphore) { @@ -921,9 +903,9 @@ static void gmc_v9_0_flush_gpu_tlb(struct amdgpu_device *adev, uint32_t vmid, * write with 0 means semaphore release */ if (vmhub >= AMDGPU_MMHUB0(0)) - WREG32_SOC15_IP_NO_KIQ(MMHUB, hub->vm_inv_eng0_sem + hub->eng_distance * eng, 0); + WREG32_SOC15_IP_NO_KIQ(MMHUB, sem, 0, inst); else - WREG32_SOC15_IP_NO_KIQ(GC, hub->vm_inv_eng0_sem + hub->eng_distance * eng, 0); + WREG32_SOC15_IP_NO_KIQ(GC, sem, 0, inst); } spin_unlock(&adev->gmc.invalidate_lock); @@ -945,101 +927,42 @@ static void gmc_v9_0_flush_gpu_tlb(struct amdgpu_device *adev, uint32_t vmid, * * Flush the TLB for the requested pasid. */ -static int gmc_v9_0_flush_gpu_tlb_pasid(struct amdgpu_device *adev, - uint16_t pasid, uint32_t flush_type, - bool all_hub, uint32_t inst) +static void gmc_v9_0_flush_gpu_tlb_pasid(struct amdgpu_device *adev, + uint16_t pasid, uint32_t flush_type, + bool all_hub, uint32_t inst) { - int vmid, i; - signed long r; - uint32_t seq; - uint16_t queried_pasid; - bool ret; - u32 usec_timeout = amdgpu_sriov_vf(adev) ? SRIOV_USEC_TIMEOUT : adev->usec_timeout; - struct amdgpu_ring *ring = &adev->gfx.kiq[inst].ring; - struct amdgpu_kiq *kiq = &adev->gfx.kiq[inst]; - - if (amdgpu_in_reset(adev)) - return -EIO; - - if (ring->sched.ready && down_read_trylock(&adev->reset_domain->sem)) { - /* Vega20+XGMI caches PTEs in TC and TLB. Add a - * heavy-weight TLB flush (type 2), which flushes - * both. Due to a race condition with concurrent - * memory accesses using the same TLB cache line, we - * still need a second TLB flush after this. - */ - bool vega20_xgmi_wa = (adev->gmc.xgmi.num_physical_nodes && - adev->ip_versions[GC_HWIP][0] == IP_VERSION(9, 4, 0)); - /* 2 dwords flush + 8 dwords fence */ - unsigned int ndw = kiq->pmf->invalidate_tlbs_size + 8; - - if (vega20_xgmi_wa) - ndw += kiq->pmf->invalidate_tlbs_size; - - spin_lock(&adev->gfx.kiq[inst].ring_lock); - /* 2 dwords flush + 8 dwords fence */ - amdgpu_ring_alloc(ring, ndw); - if (vega20_xgmi_wa) - kiq->pmf->kiq_invalidate_tlbs(ring, - pasid, 2, all_hub); - - if (flush_type == 2 && - adev->ip_versions[GC_HWIP][0] == IP_VERSION(9, 4, 3) && - adev->rev_id == 0) - kiq->pmf->kiq_invalidate_tlbs(ring, - pasid, 0, all_hub); - - kiq->pmf->kiq_invalidate_tlbs(ring, - pasid, flush_type, all_hub); - r = amdgpu_fence_emit_polling(ring, &seq, MAX_KIQ_REG_WAIT); - if (r) { - amdgpu_ring_undo(ring); - spin_unlock(&adev->gfx.kiq[inst].ring_lock); - up_read(&adev->reset_domain->sem); - return -ETIME; - } - - amdgpu_ring_commit(ring); - spin_unlock(&adev->gfx.kiq[inst].ring_lock); - r = amdgpu_fence_wait_polling(ring, seq, usec_timeout); - if (r < 1) { - dev_err(adev->dev, "wait for kiq fence error: %ld.\n", r); - up_read(&adev->reset_domain->sem); - return -ETIME; - } - up_read(&adev->reset_domain->sem); - return 0; - } + uint16_t queried; + int i, vmid; for (vmid = 1; vmid < 16; vmid++) { + bool valid; - ret = gmc_v9_0_get_atc_vmid_pasid_mapping_info(adev, vmid, - &queried_pasid); - if (ret && queried_pasid == pasid) { - if (all_hub) { - for_each_set_bit(i, adev->vmhubs_mask, AMDGPU_MAX_VMHUBS) - gmc_v9_0_flush_gpu_tlb(adev, vmid, - i, flush_type); - } else { - gmc_v9_0_flush_gpu_tlb(adev, vmid, - AMDGPU_GFXHUB(0), flush_type); - } - break; + valid = gmc_v9_0_get_atc_vmid_pasid_mapping_info(adev, vmid, + &queried); + if (!valid || queried != pasid) + continue; + + if (all_hub) { + for_each_set_bit(i, adev->vmhubs_mask, + AMDGPU_MAX_VMHUBS) + gmc_v9_0_flush_gpu_tlb(adev, vmid, i, + flush_type); + } else { + gmc_v9_0_flush_gpu_tlb(adev, vmid, + AMDGPU_GFXHUB(0), + flush_type); } } - - return 0; - } static uint64_t gmc_v9_0_emit_flush_gpu_tlb(struct amdgpu_ring *ring, - unsigned vmid, uint64_t pd_addr) + unsigned int vmid, uint64_t pd_addr) { bool use_semaphore = gmc_v9_0_use_invalidate_semaphore(ring->adev, ring->vm_hub); struct amdgpu_device *adev = ring->adev; struct amdgpu_vmhub *hub = &adev->vmhub[ring->vm_hub]; uint32_t req = gmc_v9_0_get_invalidate_req(vmid, 0); - unsigned eng = ring->vm_inv_eng; + unsigned int eng = ring->vm_inv_eng; /* * It may lose gpuvm invalidate acknowldege state across power-gating @@ -1081,8 +1004,8 @@ static uint64_t gmc_v9_0_emit_flush_gpu_tlb(struct amdgpu_ring *ring, return pd_addr; } -static void gmc_v9_0_emit_pasid_mapping(struct amdgpu_ring *ring, unsigned vmid, - unsigned pasid) +static void gmc_v9_0_emit_pasid_mapping(struct amdgpu_ring *ring, unsigned int vmid, + unsigned int pasid) { struct amdgpu_device *adev = ring->adev; uint32_t reg; @@ -1185,14 +1108,15 @@ static void gmc_v9_0_get_coherence_flags(struct amdgpu_device *adev, { struct amdgpu_device *bo_adev = amdgpu_ttm_adev(bo->tbo.bdev); bool is_vram = bo->tbo.resource->mem_type == TTM_PL_VRAM; - bool coherent = bo->flags & AMDGPU_GEM_CREATE_COHERENT; + bool coherent = bo->flags & (AMDGPU_GEM_CREATE_COHERENT | AMDGPU_GEM_CREATE_EXT_COHERENT); + bool ext_coherent = bo->flags & AMDGPU_GEM_CREATE_EXT_COHERENT; bool uncached = bo->flags & AMDGPU_GEM_CREATE_UNCACHED; struct amdgpu_vm *vm = mapping->bo_va->base.vm; unsigned int mtype_local, mtype; bool snoop = false; bool is_local; - switch (adev->ip_versions[GC_HWIP][0]) { + switch (amdgpu_ip_version(adev, GC_HWIP, 0)) { case IP_VERSION(9, 4, 1): case IP_VERSION(9, 4, 2): if (is_vram) { @@ -1206,8 +1130,10 @@ static void gmc_v9_0_get_coherence_flags(struct amdgpu_device *adev, /* FIXME: is this still needed? Or does * amdgpu_ttm_tt_pde_flags already handle this? */ - if ((adev->ip_versions[GC_HWIP][0] == IP_VERSION(9, 4, 2) || - adev->ip_versions[GC_HWIP][0] == IP_VERSION(9, 4, 3)) && + if ((amdgpu_ip_version(adev, GC_HWIP, 0) == + IP_VERSION(9, 4, 2) || + amdgpu_ip_version(adev, GC_HWIP, 0) == + IP_VERSION(9, 4, 3)) && adev->gmc.xgmi.connected_to_cpu) snoop = true; } else { @@ -1253,6 +1179,11 @@ static void gmc_v9_0_get_coherence_flags(struct amdgpu_device *adev, snoop = true; if (uncached) { mtype = MTYPE_UC; + } else if (ext_coherent) { + if (adev->rev_id) + mtype = is_local ? MTYPE_CC : MTYPE_UC; + else + mtype = MTYPE_UC; } else if (adev->flags & AMD_IS_APU) { mtype = is_local ? mtype_local : MTYPE_NC; } else { @@ -1316,23 +1247,26 @@ static void gmc_v9_0_override_vm_pte_flags(struct amdgpu_device *adev, /* Only GFX 9.4.3 APUs associate GPUs with NUMA nodes. Local system * memory can use more efficient MTYPEs. */ - if (adev->ip_versions[GC_HWIP][0] != IP_VERSION(9, 4, 3)) + if (amdgpu_ip_version(adev, GC_HWIP, 0) != IP_VERSION(9, 4, 3)) return; /* Only direct-mapped memory allows us to determine the NUMA node from * the DMA address. */ if (!adev->ram_is_direct_mapped) { - dev_dbg(adev->dev, "RAM is not direct mapped\n"); + dev_dbg_ratelimited(adev->dev, "RAM is not direct mapped\n"); return; } - /* Only override mappings with MTYPE_NC, which is the safe default for - * cacheable memory. + /* MTYPE_NC is the same default and can be overridden. + * MTYPE_UC will be present if the memory is extended-coherent + * and can also be overridden. */ if ((*flags & AMDGPU_PTE_MTYPE_VG10_MASK) != - AMDGPU_PTE_MTYPE_VG10(MTYPE_NC)) { - dev_dbg(adev->dev, "MTYPE is not NC\n"); + AMDGPU_PTE_MTYPE_VG10(MTYPE_NC) && + (*flags & AMDGPU_PTE_MTYPE_VG10_MASK) != + AMDGPU_PTE_MTYPE_VG10(MTYPE_UC)) { + dev_dbg_ratelimited(adev->dev, "MTYPE is not NC or UC\n"); return; } @@ -1343,7 +1277,7 @@ static void gmc_v9_0_override_vm_pte_flags(struct amdgpu_device *adev, if (adev->gmc.is_app_apu && vm->mem_id >= 0) { local_node = adev->gmc.mem_partitions[vm->mem_id].numa.node; } else { - dev_dbg(adev->dev, "Only native mode APU is supported.\n"); + dev_dbg_ratelimited(adev->dev, "Only native mode APU is supported.\n"); return; } @@ -1351,32 +1285,40 @@ static void gmc_v9_0_override_vm_pte_flags(struct amdgpu_device *adev, * page or NUMA nodes. */ if (!page_is_ram(addr >> PAGE_SHIFT)) { - dev_dbg(adev->dev, "Page is not RAM.\n"); + dev_dbg_ratelimited(adev->dev, "Page is not RAM.\n"); return; } nid = pfn_to_nid(addr >> PAGE_SHIFT); - dev_dbg(adev->dev, "vm->mem_id=%d, local_node=%d, nid=%d\n", - vm->mem_id, local_node, nid); + dev_dbg_ratelimited(adev->dev, "vm->mem_id=%d, local_node=%d, nid=%d\n", + vm->mem_id, local_node, nid); if (nid == local_node) { uint64_t old_flags = *flags; - unsigned int mtype_local = MTYPE_RW; - - if (amdgpu_mtype_local == 1) - mtype_local = MTYPE_NC; - else if (amdgpu_mtype_local == 2) - mtype_local = MTYPE_CC; + if ((*flags & AMDGPU_PTE_MTYPE_VG10_MASK) == + AMDGPU_PTE_MTYPE_VG10(MTYPE_NC)) { + unsigned int mtype_local = MTYPE_RW; + + if (amdgpu_mtype_local == 1) + mtype_local = MTYPE_NC; + else if (amdgpu_mtype_local == 2) + mtype_local = MTYPE_CC; + + *flags = (*flags & ~AMDGPU_PTE_MTYPE_VG10_MASK) | + AMDGPU_PTE_MTYPE_VG10(mtype_local); + } else if (adev->rev_id) { + /* MTYPE_UC case */ + *flags = (*flags & ~AMDGPU_PTE_MTYPE_VG10_MASK) | + AMDGPU_PTE_MTYPE_VG10(MTYPE_CC); + } - *flags = (*flags & ~AMDGPU_PTE_MTYPE_VG10_MASK) | - AMDGPU_PTE_MTYPE_VG10(mtype_local); - dev_dbg(adev->dev, "flags updated from %llx to %llx\n", - old_flags, *flags); + dev_dbg_ratelimited(adev->dev, "flags updated from %llx to %llx\n", + old_flags, *flags); } } -static unsigned gmc_v9_0_get_vbios_fb_size(struct amdgpu_device *adev) +static unsigned int gmc_v9_0_get_vbios_fb_size(struct amdgpu_device *adev) { u32 d1vga_control = RREG32_SOC15(DCE, 0, mmD1VGA_CONTROL); - unsigned size; + unsigned int size; /* TODO move to DC so GMC doesn't need to hard-code DCN registers */ @@ -1385,7 +1327,7 @@ static unsigned gmc_v9_0_get_vbios_fb_size(struct amdgpu_device *adev) } else { u32 viewport; - switch (adev->ip_versions[DCE_HWIP][0]) { + switch (amdgpu_ip_version(adev, DCE_HWIP, 0)) { case IP_VERSION(1, 0, 0): case IP_VERSION(1, 0, 1): viewport = RREG32_SOC15(DCE, 0, mmHUBP0_DCSURF_PRI_VIEWPORT_DIMENSION); @@ -1456,7 +1398,7 @@ static void gmc_v9_0_set_gmc_funcs(struct amdgpu_device *adev) static void gmc_v9_0_set_umc_funcs(struct amdgpu_device *adev) { - switch (adev->ip_versions[UMC_HWIP][0]) { + switch (amdgpu_ip_version(adev, UMC_HWIP, 0)) { case IP_VERSION(6, 0, 0): adev->umc.funcs = &umc_v6_0_funcs; break; @@ -1492,6 +1434,19 @@ static void gmc_v9_0_set_umc_funcs(struct amdgpu_device *adev) else adev->umc.channel_idx_tbl = &umc_v6_7_channel_idx_tbl_second[0][0]; break; + case IP_VERSION(12, 0, 0): + adev->umc.max_ras_err_cnt_per_query = + UMC_V12_0_TOTAL_CHANNEL_NUM(adev) * UMC_V12_0_BAD_PAGE_NUM_PER_CHANNEL; + adev->umc.channel_inst_num = UMC_V12_0_CHANNEL_INSTANCE_NUM; + adev->umc.umc_inst_num = UMC_V12_0_UMC_INSTANCE_NUM; + adev->umc.node_inst_num /= UMC_V12_0_UMC_INSTANCE_NUM; + adev->umc.channel_offs = UMC_V12_0_PER_CHANNEL_OFFSET; + adev->umc.active_mask = adev->aid_mask; + adev->umc.retire_unit = UMC_V12_0_BAD_PAGE_NUM_PER_CHANNEL; + adev->umc.channel_idx_tbl = &umc_v12_0_channel_idx_tbl[0][0][0]; + if (!adev->gmc.xgmi.connected_to_cpu && !adev->gmc.is_app_apu) + adev->umc.ras = &umc_v12_0_ras; + break; default: break; } @@ -1499,7 +1454,7 @@ static void gmc_v9_0_set_umc_funcs(struct amdgpu_device *adev) static void gmc_v9_0_set_mmhub_funcs(struct amdgpu_device *adev) { - switch (adev->ip_versions[MMHUB_HWIP][0]) { + switch (amdgpu_ip_version(adev, MMHUB_HWIP, 0)) { case IP_VERSION(9, 4, 1): adev->mmhub.funcs = &mmhub_v9_4_funcs; break; @@ -1517,7 +1472,7 @@ static void gmc_v9_0_set_mmhub_funcs(struct amdgpu_device *adev) static void gmc_v9_0_set_mmhub_ras_funcs(struct amdgpu_device *adev) { - switch (adev->ip_versions[MMHUB_HWIP][0]) { + switch (amdgpu_ip_version(adev, MMHUB_HWIP, 0)) { case IP_VERSION(9, 4, 0): adev->mmhub.ras = &mmhub_v1_0_ras; break; @@ -1538,7 +1493,7 @@ static void gmc_v9_0_set_mmhub_ras_funcs(struct amdgpu_device *adev) static void gmc_v9_0_set_gfxhub_funcs(struct amdgpu_device *adev) { - if (adev->ip_versions[GC_HWIP][0] == IP_VERSION(9, 4, 3)) + if (amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(9, 4, 3)) adev->gfxhub.funcs = &gfxhub_v1_2_funcs; else adev->gfxhub.funcs = &gfxhub_v1_0_funcs; @@ -1554,7 +1509,7 @@ static void gmc_v9_0_set_mca_ras_funcs(struct amdgpu_device *adev) struct amdgpu_mca *mca = &adev->mca; /* is UMC the right IP to check for MCA? Maybe DF? */ - switch (adev->ip_versions[UMC_HWIP][0]) { + switch (amdgpu_ip_version(adev, UMC_HWIP, 0)) { case IP_VERSION(6, 7, 0): if (!adev->gmc.xgmi.connected_to_cpu) { mca->mp0.ras = &mca_v3_0_mp0_ras; @@ -1581,18 +1536,18 @@ static int gmc_v9_0_early_init(void *handle) * 9.4.0, 9.4.1 and 9.4.3 don't have XGMI defined * in their IP discovery tables */ - if (adev->ip_versions[GC_HWIP][0] == IP_VERSION(9, 4, 0) || - adev->ip_versions[GC_HWIP][0] == IP_VERSION(9, 4, 1) || - adev->ip_versions[GC_HWIP][0] == IP_VERSION(9, 4, 3)) + if (amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(9, 4, 0) || + amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(9, 4, 1) || + amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(9, 4, 3)) adev->gmc.xgmi.supported = true; - if (adev->ip_versions[XGMI_HWIP][0] == IP_VERSION(6, 1, 0)) { + if (amdgpu_ip_version(adev, XGMI_HWIP, 0) == IP_VERSION(6, 1, 0)) { adev->gmc.xgmi.supported = true; adev->gmc.xgmi.connected_to_cpu = adev->smuio.funcs->is_host_gpu_xgmi_supported(adev); } - if (adev->ip_versions[GC_HWIP][0] == IP_VERSION(9, 4, 3)) { + if (amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(9, 4, 3)) { enum amdgpu_pkg_type pkg_type = adev->smuio.funcs->get_pkg_type(adev); /* On GFXIP 9.4.3. APU, there is no physical VRAM domain present @@ -1622,6 +1577,7 @@ static int gmc_v9_0_early_init(void *handle) adev->gmc.private_aperture_start = 0x1000000000000000ULL; adev->gmc.private_aperture_end = adev->gmc.private_aperture_start + (4ULL << 30) - 1; + adev->gmc.noretry_flags = AMDGPU_VM_NORETRY_FLAGS_TF; return 0; } @@ -1640,7 +1596,7 @@ static int gmc_v9_0_late_init(void *handle) * writes, while disables HBM ECC for vega10. */ if (!amdgpu_sriov_vf(adev) && - (adev->ip_versions[UMC_HWIP][0] == IP_VERSION(6, 0, 0))) { + (amdgpu_ip_version(adev, UMC_HWIP, 0) == IP_VERSION(6, 0, 0))) { if (!(adev->ras_enabled & (1 << AMDGPU_RAS_BLOCK__UMC))) { if (adev->df.funcs && adev->df.funcs->enable_ecc_force_par_wr_rmw) @@ -1649,13 +1605,8 @@ static int gmc_v9_0_late_init(void *handle) } if (!amdgpu_persistent_edc_harvesting_supported(adev)) { - if (adev->mmhub.ras && adev->mmhub.ras->ras_block.hw_ops && - adev->mmhub.ras->ras_block.hw_ops->reset_ras_error_count) - adev->mmhub.ras->ras_block.hw_ops->reset_ras_error_count(adev); - - if (adev->hdp.ras && adev->hdp.ras->ras_block.hw_ops && - adev->hdp.ras->ras_block.hw_ops->reset_ras_error_count) - adev->hdp.ras->ras_block.hw_ops->reset_ras_error_count(adev); + amdgpu_ras_reset_error_count(adev, AMDGPU_RAS_BLOCK__MMHUB); + amdgpu_ras_reset_error_count(adev, AMDGPU_RAS_BLOCK__HDP); } r = amdgpu_gmc_ras_late_init(adev); @@ -1670,14 +1621,17 @@ static void gmc_v9_0_vram_gtt_location(struct amdgpu_device *adev, { u64 base = adev->mmhub.funcs->get_fb_location(adev); + amdgpu_gmc_set_agp_default(adev, mc); + /* add the xgmi offset of the physical node */ base += adev->gmc.xgmi.physical_node_id * adev->gmc.xgmi.node_segment_size; if (adev->gmc.xgmi.connected_to_cpu) { amdgpu_gmc_sysvm_location(adev, mc); } else { amdgpu_gmc_vram_location(adev, mc, base); - amdgpu_gmc_gart_location(adev, mc); - amdgpu_gmc_agp_location(adev, mc); + amdgpu_gmc_gart_location(adev, mc, AMDGPU_GART_PLACEMENT_BEST_FIT); + if (!amdgpu_sriov_vf(adev) && (amdgpu_agp == 1)) + amdgpu_gmc_agp_location(adev, mc); } /* base offset of vram pages */ adev->vm_manager.vram_base_offset = adev->gfxhub.funcs->get_mc_fb_offset(adev); @@ -1748,7 +1702,7 @@ static int gmc_v9_0_mc_init(struct amdgpu_device *adev) /* set the gart size */ if (amdgpu_gart_size == -1) { - switch (adev->ip_versions[GC_HWIP][0]) { + switch (amdgpu_ip_version(adev, GC_HWIP, 0)) { case IP_VERSION(9, 0, 1): /* all engines support GPUVM */ case IP_VERSION(9, 2, 1): /* all engines support GPUVM */ case IP_VERSION(9, 4, 0): @@ -1827,8 +1781,8 @@ static int gmc_v9_0_gart_init(struct amdgpu_device *adev) */ static void gmc_v9_0_save_registers(struct amdgpu_device *adev) { - if ((adev->ip_versions[DCE_HWIP][0] == IP_VERSION(1, 0, 0)) || - (adev->ip_versions[DCE_HWIP][0] == IP_VERSION(1, 0, 1))) + if ((amdgpu_ip_version(adev, DCE_HWIP, 0) == IP_VERSION(1, 0, 0)) || + (amdgpu_ip_version(adev, DCE_HWIP, 0) == IP_VERSION(1, 0, 1))) adev->gmc.sdpif_register = RREG32_SOC15(DCE, 0, mmDCHUBBUB_SDPIF_MMIO_CNTRL_0); } @@ -1880,15 +1834,14 @@ static void gmc_v9_0_init_acpi_mem_ranges(struct amdgpu_device *adev, struct amdgpu_mem_partition_info *mem_ranges) { - int num_ranges = 0, ret, mem_groups; struct amdgpu_numa_info numa_info; int node_ids[MAX_MEM_RANGES]; + int num_ranges = 0, ret; int num_xcc, xcc_id; uint32_t xcc_mask; num_xcc = NUM_XCC(adev->gfx.xcc_mask); xcc_mask = (1U << num_xcc) - 1; - mem_groups = hweight32(adev->aid_mask); for_each_inst(xcc_id, xcc_mask) { ret = amdgpu_acpi_get_mem_info(adev, xcc_id, &numa_info); @@ -1913,12 +1866,6 @@ gmc_v9_0_init_acpi_mem_ranges(struct amdgpu_device *adev, } adev->gmc.num_mem_partitions = num_ranges; - - /* If there is only partition, don't use entire size */ - if (adev->gmc.num_mem_partitions == 1) { - mem_ranges[0].size = mem_ranges[0].size * (mem_groups - 1); - do_div(mem_ranges[0].size, mem_groups); - } } static void @@ -1973,10 +1920,9 @@ static int gmc_v9_0_init_mem_ranges(struct amdgpu_device *adev) { bool valid; - adev->gmc.mem_partitions = kzalloc( - MAX_MEM_RANGES * sizeof(struct amdgpu_mem_partition_info), - GFP_KERNEL); - + adev->gmc.mem_partitions = kcalloc(MAX_MEM_RANGES, + sizeof(struct amdgpu_mem_partition_info), + GFP_KERNEL); if (!adev->gmc.mem_partitions) return -ENOMEM; @@ -1999,6 +1945,19 @@ static int gmc_v9_0_init_mem_ranges(struct amdgpu_device *adev) return 0; } +static void gmc_v9_4_3_init_vram_info(struct amdgpu_device *adev) +{ + static const u32 regBIF_BIOS_SCRATCH_4 = 0x50; + u32 vram_info; + + if (!amdgpu_sriov_vf(adev)) { + vram_info = RREG32(regBIF_BIOS_SCRATCH_4); + adev->gmc.vram_vendor = vram_info & 0xF; + } + adev->gmc.vram_type = AMDGPU_VRAM_TYPE_HBM; + adev->gmc.vram_width = 128 * 64; +} + static int gmc_v9_0_sw_init(void *handle) { int r, vram_width = 0, vram_type = 0, vram_vendor = 0, dma_addr_bits; @@ -2011,15 +1970,12 @@ static int gmc_v9_0_sw_init(void *handle) spin_lock_init(&adev->gmc.invalidate_lock); - if (!(adev->bios) || adev->gmc.is_app_apu) { + if (amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(9, 4, 3)) { + gmc_v9_4_3_init_vram_info(adev); + } else if (!adev->bios) { if (adev->flags & AMD_IS_APU) { - if (adev->gmc.is_app_apu) { - adev->gmc.vram_type = AMDGPU_VRAM_TYPE_HBM; - adev->gmc.vram_width = 128 * 64; - } else { - adev->gmc.vram_type = AMDGPU_VRAM_TYPE_DDR4; - adev->gmc.vram_width = 64 * 64; - } + adev->gmc.vram_type = AMDGPU_VRAM_TYPE_DDR4; + adev->gmc.vram_width = 64 * 64; } else { adev->gmc.vram_type = AMDGPU_VRAM_TYPE_HBM; adev->gmc.vram_width = 128 * 64; @@ -2054,7 +2010,7 @@ static int gmc_v9_0_sw_init(void *handle) adev->gmc.vram_type = vram_type; adev->gmc.vram_vendor = vram_vendor; } - switch (adev->ip_versions[GC_HWIP][0]) { + switch (amdgpu_ip_version(adev, GC_HWIP, 0)) { case IP_VERSION(9, 1, 0): case IP_VERSION(9, 2, 2): set_bit(AMDGPU_GFXHUB(0), adev->vmhubs_mask); @@ -2082,12 +2038,9 @@ static int gmc_v9_0_sw_init(void *handle) * vm size is 256TB (48bit), maximum size of Vega10, * block size 512 (9bit) */ - /* sriov restrict max_pfn below AMDGPU_GMC_HOLE */ - if (amdgpu_sriov_vf(adev)) - amdgpu_vm_adjust_size(adev, 256 * 1024, 9, 3, 47); - else - amdgpu_vm_adjust_size(adev, 256 * 1024, 9, 3, 48); - if (adev->ip_versions[GC_HWIP][0] == IP_VERSION(9, 4, 2)) + + amdgpu_vm_adjust_size(adev, 256 * 1024, 9, 3, 48); + if (amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(9, 4, 2)) adev->gmc.translate_further = adev->vm_manager.num_level > 1; break; case IP_VERSION(9, 4, 1): @@ -2119,7 +2072,7 @@ static int gmc_v9_0_sw_init(void *handle) if (r) return r; - if (adev->ip_versions[GC_HWIP][0] == IP_VERSION(9, 4, 1)) { + if (amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(9, 4, 1)) { r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_VMC1, VMC_1_0__SRCID__VM_FAULT, &adev->gmc.vm_fault); if (r) @@ -2133,7 +2086,8 @@ static int gmc_v9_0_sw_init(void *handle) return r; if (!amdgpu_sriov_vf(adev) && - !adev->gmc.xgmi.connected_to_cpu) { + !adev->gmc.xgmi.connected_to_cpu && + !adev->gmc.is_app_apu) { /* interrupt sent to DF. */ r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_DF, 0, &adev->gmc.ecc_irq); @@ -2147,10 +2101,13 @@ static int gmc_v9_0_sw_init(void *handle) */ adev->gmc.mc_mask = 0xffffffffffffULL; /* 48 bit MC */ - dma_addr_bits = adev->ip_versions[GC_HWIP][0] >= IP_VERSION(9, 4, 2) ? 48:44; + dma_addr_bits = amdgpu_ip_version(adev, GC_HWIP, 0) >= + IP_VERSION(9, 4, 2) ? + 48 : + 44; r = dma_set_mask_and_coherent(adev->dev, DMA_BIT_MASK(dma_addr_bits)); if (r) { - printk(KERN_WARNING "amdgpu: No suitable DMA available.\n"); + dev_warn(adev->dev, "amdgpu: No suitable DMA available.\n"); return r; } adev->need_swiotlb = drm_need_swiotlb(dma_addr_bits); @@ -2161,7 +2118,7 @@ static int gmc_v9_0_sw_init(void *handle) amdgpu_gmc_get_vbios_allocations(adev); - if (adev->ip_versions[GC_HWIP][0] == IP_VERSION(9, 4, 3)) { + if (amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(9, 4, 3)) { r = gmc_v9_0_init_mem_ranges(adev); if (r) return r; @@ -2187,9 +2144,11 @@ static int gmc_v9_0_sw_init(void *handle) * for video processing. */ adev->vm_manager.first_kfd_vmid = - (adev->ip_versions[GC_HWIP][0] == IP_VERSION(9, 4, 1) || - adev->ip_versions[GC_HWIP][0] == IP_VERSION(9, 4, 2) || - adev->ip_versions[GC_HWIP][0] == IP_VERSION(9, 4, 3)) ? 3 : 8; + (amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(9, 4, 1) || + amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(9, 4, 2) || + amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(9, 4, 3)) ? + 3 : + 8; amdgpu_vm_manager_init(adev); @@ -2199,7 +2158,7 @@ static int gmc_v9_0_sw_init(void *handle) if (r) return r; - if (adev->ip_versions[GC_HWIP][0] == IP_VERSION(9, 4, 3)) + if (amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(9, 4, 3)) amdgpu_gmc_sysfs_init(adev); return 0; @@ -2209,10 +2168,8 @@ static int gmc_v9_0_sw_fini(void *handle) { struct amdgpu_device *adev = (struct amdgpu_device *)handle; - if (adev->ip_versions[GC_HWIP][0] == IP_VERSION(9, 4, 3)) + if (amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(9, 4, 3)) amdgpu_gmc_sysfs_fini(adev); - adev->gmc.num_mem_partitions = 0; - kfree(adev->gmc.mem_partitions); amdgpu_gmc_ras_fini(adev); amdgpu_gem_force_release(adev); @@ -2226,13 +2183,15 @@ static int gmc_v9_0_sw_fini(void *handle) amdgpu_bo_free_kernel(&adev->gmc.pdb0_bo, NULL, &adev->gmc.ptr_pdb0); amdgpu_bo_fini(adev); + adev->gmc.num_mem_partitions = 0; + kfree(adev->gmc.mem_partitions); + return 0; } static void gmc_v9_0_init_golden_registers(struct amdgpu_device *adev) { - - switch (adev->ip_versions[MMHUB_HWIP][0]) { + switch (amdgpu_ip_version(adev, MMHUB_HWIP, 0)) { case IP_VERSION(9, 0, 0): if (amdgpu_sriov_vf(adev)) break; @@ -2266,8 +2225,8 @@ static void gmc_v9_0_init_golden_registers(struct amdgpu_device *adev) */ void gmc_v9_0_restore_registers(struct amdgpu_device *adev) { - if ((adev->ip_versions[DCE_HWIP][0] == IP_VERSION(1, 0, 0)) || - (adev->ip_versions[DCE_HWIP][0] == IP_VERSION(1, 0, 1))) { + if ((amdgpu_ip_version(adev, DCE_HWIP, 0) == IP_VERSION(1, 0, 0)) || + (amdgpu_ip_version(adev, DCE_HWIP, 0) == IP_VERSION(1, 0, 1))) { WREG32_SOC15(DCE, 0, mmDCHUBBUB_SDPIF_MMIO_CNTRL_0, adev->gmc.sdpif_register); WARN_ON(adev->gmc.sdpif_register != RREG32_SOC15(DCE, 0, mmDCHUBBUB_SDPIF_MMIO_CNTRL_0)); @@ -2304,7 +2263,7 @@ static int gmc_v9_0_gart_enable(struct amdgpu_device *adev) return r; DRM_INFO("PCIE GART of %uM enabled.\n", - (unsigned)(adev->gmc.gart_size >> 20)); + (unsigned int)(adev->gmc.gart_size >> 20)); if (adev->gmc.pdb0_bo) DRM_INFO("PDB0 located at 0x%016llX\n", (unsigned long long)amdgpu_bo_gpu_offset(adev->gmc.pdb0_bo)); @@ -2320,6 +2279,24 @@ static int gmc_v9_0_hw_init(void *handle) bool value; int i, r; + adev->gmc.flush_pasid_uses_kiq = true; + + /* Vega20+XGMI caches PTEs in TC and TLB. Add a heavy-weight TLB flush + * (type 2), which flushes both. Due to a race condition with + * concurrent memory accesses using the same TLB cache line, we still + * need a second TLB flush after this. + */ + adev->gmc.flush_tlb_needs_extra_type_2 = + amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(9, 4, 0) && + adev->gmc.xgmi.num_physical_nodes; + /* + * TODO: This workaround is badly documented and had a buggy + * implementation. We should probably verify what we do here. + */ + adev->gmc.flush_tlb_needs_extra_type_0 = + amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(9, 4, 3) && + adev->rev_id == 0; + /* The sequence of these two function calls matters.*/ gmc_v9_0_init_golden_registers(adev); @@ -2490,8 +2467,7 @@ const struct amd_ip_funcs gmc_v9_0_ip_funcs = { .get_clockgating_state = gmc_v9_0_get_clockgating_state, }; -const struct amdgpu_ip_block_version gmc_v9_0_ip_block = -{ +const struct amdgpu_ip_block_version gmc_v9_0_ip_block = { .type = AMD_IP_BLOCK_TYPE_GMC, .major = 9, .minor = 0, diff --git a/drivers/gpu/drm/amd/amdgpu/hdp_v4_0.c b/drivers/gpu/drm/amd/amdgpu/hdp_v4_0.c index 71d1a2e3bac9..4db6bb73ead4 100644 --- a/drivers/gpu/drm/amd/amdgpu/hdp_v4_0.c +++ b/drivers/gpu/drm/amd/amdgpu/hdp_v4_0.c @@ -49,8 +49,8 @@ static void hdp_v4_0_flush_hdp(struct amdgpu_device *adev, static void hdp_v4_0_invalidate_hdp(struct amdgpu_device *adev, struct amdgpu_ring *ring) { - if (adev->ip_versions[HDP_HWIP][0] == IP_VERSION(4, 4, 0) || - adev->ip_versions[HDP_HWIP][0] == IP_VERSION(4, 4, 2)) + if (amdgpu_ip_version(adev, HDP_HWIP, 0) == IP_VERSION(4, 4, 0) || + amdgpu_ip_version(adev, HDP_HWIP, 0) == IP_VERSION(4, 4, 2)) return; if (!ring || !ring->funcs->emit_wreg) @@ -80,7 +80,7 @@ static void hdp_v4_0_reset_ras_error_count(struct amdgpu_device *adev) if (!amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__HDP)) return; - if (adev->ip_versions[HDP_HWIP][0] >= IP_VERSION(4, 4, 0)) + if (amdgpu_ip_version(adev, HDP_HWIP, 0) >= IP_VERSION(4, 4, 0)) WREG32_SOC15(HDP, 0, mmHDP_EDC_CNT, 0); else /*read back hdp ras counter to reset it to 0 */ @@ -92,10 +92,10 @@ static void hdp_v4_0_update_clock_gating(struct amdgpu_device *adev, { uint32_t def, data; - if (adev->ip_versions[HDP_HWIP][0] == IP_VERSION(4, 0, 0) || - adev->ip_versions[HDP_HWIP][0] == IP_VERSION(4, 0, 1) || - adev->ip_versions[HDP_HWIP][0] == IP_VERSION(4, 1, 1) || - adev->ip_versions[HDP_HWIP][0] == IP_VERSION(4, 1, 0)) { + if (amdgpu_ip_version(adev, HDP_HWIP, 0) == IP_VERSION(4, 0, 0) || + amdgpu_ip_version(adev, HDP_HWIP, 0) == IP_VERSION(4, 0, 1) || + amdgpu_ip_version(adev, HDP_HWIP, 0) == IP_VERSION(4, 1, 1) || + amdgpu_ip_version(adev, HDP_HWIP, 0) == IP_VERSION(4, 1, 0)) { def = data = RREG32(SOC15_REG_OFFSET(HDP, 0, mmHDP_MEM_POWER_LS)); if (enable && (adev->cg_flags & AMD_CG_SUPPORT_HDP_LS)) @@ -129,6 +129,11 @@ static void hdp_v4_0_get_clockgating_state(struct amdgpu_device *adev, { int data; + if (amdgpu_ip_version(adev, HDP_HWIP, 0) == IP_VERSION(4, 4, 2)) { + /* Default enabled */ + *flags |= AMD_CG_SUPPORT_HDP_MGCG; + return; + } /* AMD_CG_SUPPORT_HDP_LS */ data = RREG32(SOC15_REG_OFFSET(HDP, 0, mmHDP_MEM_POWER_LS)); if (data & HDP_MEM_POWER_LS__LS_ENABLE_MASK) @@ -137,7 +142,7 @@ static void hdp_v4_0_get_clockgating_state(struct amdgpu_device *adev, static void hdp_v4_0_init_registers(struct amdgpu_device *adev) { - switch (adev->ip_versions[HDP_HWIP][0]) { + switch (amdgpu_ip_version(adev, HDP_HWIP, 0)) { case IP_VERSION(4, 2, 1): WREG32_FIELD15(HDP, 0, HDP_MMHUB_CNTL, HDP_MMHUB_GCC, 1); break; @@ -145,9 +150,13 @@ static void hdp_v4_0_init_registers(struct amdgpu_device *adev) break; } + /* Do not program registers if VF */ + if (amdgpu_sriov_vf(adev)) + return; + WREG32_FIELD15(HDP, 0, HDP_MISC_CNTL, FLUSH_INVALIDATE_CACHE, 1); - if (adev->ip_versions[HDP_HWIP][0] == IP_VERSION(4, 4, 0)) + if (amdgpu_ip_version(adev, HDP_HWIP, 0) == IP_VERSION(4, 4, 0)) WREG32_FIELD15(HDP, 0, HDP_MISC_CNTL, READ_BUFFER_WATERMARK, 2); WREG32_SOC15(HDP, 0, mmHDP_NONSURFACE_BASE, (adev->gmc.vram_start >> 8)); diff --git a/drivers/gpu/drm/amd/amdgpu/hdp_v6_0.c b/drivers/gpu/drm/amd/amdgpu/hdp_v6_0.c index 063eba619f2f..ab06c2b4b20b 100644 --- a/drivers/gpu/drm/amd/amdgpu/hdp_v6_0.c +++ b/drivers/gpu/drm/amd/amdgpu/hdp_v6_0.c @@ -28,6 +28,9 @@ #include "hdp/hdp_6_0_0_sh_mask.h" #include <uapi/linux/kfd_ioctl.h> +#define regHDP_CLK_CNTL_V6_1 0xd5 +#define regHDP_CLK_CNTL_V6_1_BASE_IDX 0 + static void hdp_v6_0_flush_hdp(struct amdgpu_device *adev, struct amdgpu_ring *ring) { @@ -40,7 +43,7 @@ static void hdp_v6_0_flush_hdp(struct amdgpu_device *adev, static void hdp_v6_0_update_clock_gating(struct amdgpu_device *adev, bool enable) { - uint32_t hdp_clk_cntl, hdp_clk_cntl1; + uint32_t hdp_clk_cntl; uint32_t hdp_mem_pwr_cntl; if (!(adev->cg_flags & (AMD_CG_SUPPORT_HDP_LS | @@ -48,14 +51,20 @@ static void hdp_v6_0_update_clock_gating(struct amdgpu_device *adev, AMD_CG_SUPPORT_HDP_SD))) return; - hdp_clk_cntl = hdp_clk_cntl1 = RREG32_SOC15(HDP, 0,regHDP_CLK_CNTL); + if (amdgpu_ip_version(adev, HDP_HWIP, 0) == IP_VERSION(6, 1, 0)) + hdp_clk_cntl = RREG32_SOC15(HDP, 0, regHDP_CLK_CNTL_V6_1); + else + hdp_clk_cntl = RREG32_SOC15(HDP, 0, regHDP_CLK_CNTL); hdp_mem_pwr_cntl = RREG32_SOC15(HDP, 0, regHDP_MEM_POWER_CTRL); /* Before doing clock/power mode switch, * forced on IPH & RC clock */ hdp_clk_cntl = REG_SET_FIELD(hdp_clk_cntl, HDP_CLK_CNTL, RC_MEM_CLK_SOFT_OVERRIDE, 1); - WREG32_SOC15(HDP, 0, regHDP_CLK_CNTL, hdp_clk_cntl); + if (amdgpu_ip_version(adev, HDP_HWIP, 0) == IP_VERSION(6, 1, 0)) + WREG32_SOC15(HDP, 0, regHDP_CLK_CNTL_V6_1, hdp_clk_cntl); + else + WREG32_SOC15(HDP, 0, regHDP_CLK_CNTL, hdp_clk_cntl); /* disable clock and power gating before any changing */ hdp_mem_pwr_cntl = REG_SET_FIELD(hdp_mem_pwr_cntl, HDP_MEM_POWER_CTRL, @@ -117,7 +126,10 @@ static void hdp_v6_0_update_clock_gating(struct amdgpu_device *adev, /* disable IPH & RC clock override after clock/power mode changing */ hdp_clk_cntl = REG_SET_FIELD(hdp_clk_cntl, HDP_CLK_CNTL, RC_MEM_CLK_SOFT_OVERRIDE, 0); - WREG32_SOC15(HDP, 0, regHDP_CLK_CNTL, hdp_clk_cntl); + if (amdgpu_ip_version(adev, HDP_HWIP, 0) == IP_VERSION(6, 1, 0)) + WREG32_SOC15(HDP, 0, regHDP_CLK_CNTL_V6_1, hdp_clk_cntl); + else + WREG32_SOC15(HDP, 0, regHDP_CLK_CNTL, hdp_clk_cntl); } static void hdp_v6_0_get_clockgating_state(struct amdgpu_device *adev, diff --git a/drivers/gpu/drm/amd/amdgpu/ih_v6_0.c b/drivers/gpu/drm/amd/amdgpu/ih_v6_0.c index b02e1cef78a7..d9ed7332d805 100644 --- a/drivers/gpu/drm/amd/amdgpu/ih_v6_0.c +++ b/drivers/gpu/drm/amd/amdgpu/ih_v6_0.c @@ -494,7 +494,8 @@ static int ih_v6_0_self_irq(struct amdgpu_device *adev, *adev->irq.ih1.wptr_cpu = wptr; schedule_work(&adev->irq.ih1_work); break; - default: break; + default: + break; } return 0; } @@ -535,7 +536,7 @@ static int ih_v6_0_sw_init(void *handle) * use bus address for ih ring by psp bl */ use_bus_addr = (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) ? false : true; - r = amdgpu_ih_ring_init(adev, &adev->irq.ih, 256 * 1024, use_bus_addr); + r = amdgpu_ih_ring_init(adev, &adev->irq.ih, IH_RING_SIZE, use_bus_addr); if (r) return r; @@ -548,7 +549,7 @@ static int ih_v6_0_sw_init(void *handle) /* initialize ih control register offset */ ih_v6_0_init_register_offset(adev); - r = amdgpu_ih_ring_init(adev, &adev->irq.ih_soft, PAGE_SIZE, true); + r = amdgpu_ih_ring_init(adev, &adev->irq.ih_soft, IH_SW_RING_SIZE, true); if (r) return r; @@ -640,8 +641,6 @@ static void ih_v6_0_update_clockgating_state(struct amdgpu_device *adev, if (def != data) WREG32_SOC15(OSSSYS, 0, regIH_CLK_CTRL, data); } - - return; } static int ih_v6_0_set_clockgating_state(void *handle, @@ -725,8 +724,6 @@ static void ih_v6_0_get_clockgating_state(void *handle, u64 *flags) if (!RREG32_SOC15(OSSSYS, 0, regIH_CLK_CTRL)) *flags |= AMD_CG_SUPPORT_IH_CG; - - return; } static const struct amd_ip_funcs ih_v6_0_ip_funcs = { @@ -759,8 +756,7 @@ static void ih_v6_0_set_interrupt_funcs(struct amdgpu_device *adev) adev->irq.ih_funcs = &ih_v6_0_funcs; } -const struct amdgpu_ip_block_version ih_v6_0_ip_block = -{ +const struct amdgpu_ip_block_version ih_v6_0_ip_block = { .type = AMD_IP_BLOCK_TYPE_IH, .major = 6, .minor = 0, diff --git a/drivers/gpu/drm/amd/amdgpu/ih_v6_1.c b/drivers/gpu/drm/amd/amdgpu/ih_v6_1.c new file mode 100644 index 000000000000..8fb05eae340a --- /dev/null +++ b/drivers/gpu/drm/amd/amdgpu/ih_v6_1.c @@ -0,0 +1,769 @@ +/* + * Copyright 2023 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + */ + +#include <linux/pci.h> + +#include "amdgpu.h" +#include "amdgpu_ih.h" + +#include "oss/osssys_6_1_0_offset.h" +#include "oss/osssys_6_1_0_sh_mask.h" + +#include "soc15_common.h" +#include "ih_v6_1.h" + +#define MAX_REARM_RETRY 10 + +static void ih_v6_1_set_interrupt_funcs(struct amdgpu_device *adev); + +/** + * ih_v6_1_init_register_offset - Initialize register offset for ih rings + * + * @adev: amdgpu_device pointer + * + * Initialize register offset ih rings (IH_V6_0). + */ +static void ih_v6_1_init_register_offset(struct amdgpu_device *adev) +{ + struct amdgpu_ih_regs *ih_regs; + + /* ih ring 2 is removed + * ih ring and ih ring 1 are available */ + if (adev->irq.ih.ring_size) { + ih_regs = &adev->irq.ih.ih_regs; + ih_regs->ih_rb_base = SOC15_REG_OFFSET(OSSSYS, 0, regIH_RB_BASE); + ih_regs->ih_rb_base_hi = SOC15_REG_OFFSET(OSSSYS, 0, regIH_RB_BASE_HI); + ih_regs->ih_rb_cntl = SOC15_REG_OFFSET(OSSSYS, 0, regIH_RB_CNTL); + ih_regs->ih_rb_wptr = SOC15_REG_OFFSET(OSSSYS, 0, regIH_RB_WPTR); + ih_regs->ih_rb_rptr = SOC15_REG_OFFSET(OSSSYS, 0, regIH_RB_RPTR); + ih_regs->ih_doorbell_rptr = SOC15_REG_OFFSET(OSSSYS, 0, regIH_DOORBELL_RPTR); + ih_regs->ih_rb_wptr_addr_lo = SOC15_REG_OFFSET(OSSSYS, 0, regIH_RB_WPTR_ADDR_LO); + ih_regs->ih_rb_wptr_addr_hi = SOC15_REG_OFFSET(OSSSYS, 0, regIH_RB_WPTR_ADDR_HI); + ih_regs->psp_reg_id = PSP_REG_IH_RB_CNTL; + } + + if (adev->irq.ih1.ring_size) { + ih_regs = &adev->irq.ih1.ih_regs; + ih_regs->ih_rb_base = SOC15_REG_OFFSET(OSSSYS, 0, regIH_RB_BASE_RING1); + ih_regs->ih_rb_base_hi = SOC15_REG_OFFSET(OSSSYS, 0, regIH_RB_BASE_HI_RING1); + ih_regs->ih_rb_cntl = SOC15_REG_OFFSET(OSSSYS, 0, regIH_RB_CNTL_RING1); + ih_regs->ih_rb_wptr = SOC15_REG_OFFSET(OSSSYS, 0, regIH_RB_WPTR_RING1); + ih_regs->ih_rb_rptr = SOC15_REG_OFFSET(OSSSYS, 0, regIH_RB_RPTR_RING1); + ih_regs->ih_doorbell_rptr = SOC15_REG_OFFSET(OSSSYS, 0, regIH_DOORBELL_RPTR_RING1); + ih_regs->psp_reg_id = PSP_REG_IH_RB_CNTL_RING1; + } +} + +/** + * force_update_wptr_for_self_int - Force update the wptr for self interrupt + * + * @adev: amdgpu_device pointer + * @threshold: threshold to trigger the wptr reporting + * @timeout: timeout to trigger the wptr reporting + * @enabled: Enable/disable timeout flush mechanism + * + * threshold input range: 0 ~ 15, default 0, + * real_threshold = 2^threshold + * timeout input range: 0 ~ 20, default 8, + * real_timeout = (2^timeout) * 1024 / (socclk_freq) + * + * Force update wptr for self interrupt ( >= SIENNA_CICHLID). + */ +static void +force_update_wptr_for_self_int(struct amdgpu_device *adev, + u32 threshold, u32 timeout, bool enabled) +{ + u32 ih_cntl, ih_rb_cntl; + + ih_cntl = RREG32_SOC15(OSSSYS, 0, regIH_CNTL2); + ih_rb_cntl = RREG32_SOC15(OSSSYS, 0, regIH_RB_CNTL_RING1); + + ih_cntl = REG_SET_FIELD(ih_cntl, IH_CNTL2, + SELF_IV_FORCE_WPTR_UPDATE_TIMEOUT, timeout); + ih_cntl = REG_SET_FIELD(ih_cntl, IH_CNTL2, + SELF_IV_FORCE_WPTR_UPDATE_ENABLE, enabled); + ih_rb_cntl = REG_SET_FIELD(ih_rb_cntl, IH_RB_CNTL_RING1, + RB_USED_INT_THRESHOLD, threshold); + + if (amdgpu_sriov_vf(adev) && amdgpu_sriov_reg_indirect_ih(adev)) { + if (psp_reg_program(&adev->psp, PSP_REG_IH_RB_CNTL_RING1, ih_rb_cntl)) + return; + } else { + WREG32_SOC15(OSSSYS, 0, regIH_RB_CNTL_RING1, ih_rb_cntl); + } + + WREG32_SOC15(OSSSYS, 0, regIH_CNTL2, ih_cntl); +} + +/** + * ih_v6_1_toggle_ring_interrupts - toggle the interrupt ring buffer + * + * @adev: amdgpu_device pointer + * @ih: amdgpu_ih_ring pointer + * @enable: true - enable the interrupts, false - disable the interrupts + * + * Toggle the interrupt ring buffer (IH_V6_0) + */ +static int ih_v6_1_toggle_ring_interrupts(struct amdgpu_device *adev, + struct amdgpu_ih_ring *ih, + bool enable) +{ + struct amdgpu_ih_regs *ih_regs; + uint32_t tmp; + + ih_regs = &ih->ih_regs; + + tmp = RREG32(ih_regs->ih_rb_cntl); + tmp = REG_SET_FIELD(tmp, IH_RB_CNTL, RB_ENABLE, (enable ? 1 : 0)); + /* enable_intr field is only valid in ring0 */ + if (ih == &adev->irq.ih) + tmp = REG_SET_FIELD(tmp, IH_RB_CNTL, ENABLE_INTR, (enable ? 1 : 0)); + + if (amdgpu_sriov_vf(adev) && amdgpu_sriov_reg_indirect_ih(adev)) { + if (psp_reg_program(&adev->psp, ih_regs->psp_reg_id, tmp)) + return -ETIMEDOUT; + } else { + WREG32(ih_regs->ih_rb_cntl, tmp); + } + + if (enable) { + ih->enabled = true; + } else { + /* set rptr, wptr to 0 */ + WREG32(ih_regs->ih_rb_rptr, 0); + WREG32(ih_regs->ih_rb_wptr, 0); + ih->enabled = false; + ih->rptr = 0; + } + + return 0; +} + +/** + * ih_v6_1_toggle_interrupts - Toggle all the available interrupt ring buffers + * + * @adev: amdgpu_device pointer + * @enable: enable or disable interrupt ring buffers + * + * Toggle all the available interrupt ring buffers (IH_V6_0). + */ +static int ih_v6_1_toggle_interrupts(struct amdgpu_device *adev, bool enable) +{ + struct amdgpu_ih_ring *ih[] = {&adev->irq.ih, &adev->irq.ih1}; + int i; + int r; + + for (i = 0; i < ARRAY_SIZE(ih); i++) { + if (ih[i]->ring_size) { + r = ih_v6_1_toggle_ring_interrupts(adev, ih[i], enable); + if (r) + return r; + } + } + + return 0; +} + +static uint32_t ih_v6_1_rb_cntl(struct amdgpu_ih_ring *ih, uint32_t ih_rb_cntl) +{ + int rb_bufsz = order_base_2(ih->ring_size / 4); + + ih_rb_cntl = REG_SET_FIELD(ih_rb_cntl, IH_RB_CNTL, + MC_SPACE, ih->use_bus_addr ? 2 : 4); + ih_rb_cntl = REG_SET_FIELD(ih_rb_cntl, IH_RB_CNTL, + WPTR_OVERFLOW_CLEAR, 1); + ih_rb_cntl = REG_SET_FIELD(ih_rb_cntl, IH_RB_CNTL, + WPTR_OVERFLOW_ENABLE, 1); + ih_rb_cntl = REG_SET_FIELD(ih_rb_cntl, IH_RB_CNTL, RB_SIZE, rb_bufsz); + /* Ring Buffer write pointer writeback. If enabled, IH_RB_WPTR register + * value is written to memory + */ + ih_rb_cntl = REG_SET_FIELD(ih_rb_cntl, IH_RB_CNTL, + WPTR_WRITEBACK_ENABLE, 1); + ih_rb_cntl = REG_SET_FIELD(ih_rb_cntl, IH_RB_CNTL, MC_SNOOP, 1); + ih_rb_cntl = REG_SET_FIELD(ih_rb_cntl, IH_RB_CNTL, MC_RO, 0); + ih_rb_cntl = REG_SET_FIELD(ih_rb_cntl, IH_RB_CNTL, MC_VMID, 0); + + return ih_rb_cntl; +} + +static uint32_t ih_v6_1_doorbell_rptr(struct amdgpu_ih_ring *ih) +{ + u32 ih_doorbell_rtpr = 0; + + if (ih->use_doorbell) { + ih_doorbell_rtpr = REG_SET_FIELD(ih_doorbell_rtpr, + IH_DOORBELL_RPTR, OFFSET, + ih->doorbell_index); + ih_doorbell_rtpr = REG_SET_FIELD(ih_doorbell_rtpr, + IH_DOORBELL_RPTR, + ENABLE, 1); + } else { + ih_doorbell_rtpr = REG_SET_FIELD(ih_doorbell_rtpr, + IH_DOORBELL_RPTR, + ENABLE, 0); + } + return ih_doorbell_rtpr; +} + +/** + * ih_v6_1_enable_ring - enable an ih ring buffer + * + * @adev: amdgpu_device pointer + * @ih: amdgpu_ih_ring pointer + * + * Enable an ih ring buffer (IH_V6_0) + */ +static int ih_v6_1_enable_ring(struct amdgpu_device *adev, + struct amdgpu_ih_ring *ih) +{ + struct amdgpu_ih_regs *ih_regs; + uint32_t tmp; + + ih_regs = &ih->ih_regs; + + /* Ring Buffer base. [39:8] of 40-bit address of the beginning of the ring buffer*/ + WREG32(ih_regs->ih_rb_base, ih->gpu_addr >> 8); + WREG32(ih_regs->ih_rb_base_hi, (ih->gpu_addr >> 40) & 0xff); + + tmp = RREG32(ih_regs->ih_rb_cntl); + tmp = ih_v6_1_rb_cntl(ih, tmp); + if (ih == &adev->irq.ih) + tmp = REG_SET_FIELD(tmp, IH_RB_CNTL, RPTR_REARM, !!adev->irq.msi_enabled); + if (ih == &adev->irq.ih1) { + tmp = REG_SET_FIELD(tmp, IH_RB_CNTL, WPTR_OVERFLOW_ENABLE, 0); + tmp = REG_SET_FIELD(tmp, IH_RB_CNTL, RB_FULL_DRAIN_ENABLE, 1); + } + + if (amdgpu_sriov_vf(adev) && amdgpu_sriov_reg_indirect_ih(adev)) { + if (psp_reg_program(&adev->psp, ih_regs->psp_reg_id, tmp)) { + DRM_ERROR("PSP program IH_RB_CNTL failed!\n"); + return -ETIMEDOUT; + } + } else { + WREG32(ih_regs->ih_rb_cntl, tmp); + } + + if (ih == &adev->irq.ih) { + /* set the ih ring 0 writeback address whether it's enabled or not */ + WREG32(ih_regs->ih_rb_wptr_addr_lo, lower_32_bits(ih->wptr_addr)); + WREG32(ih_regs->ih_rb_wptr_addr_hi, upper_32_bits(ih->wptr_addr) & 0xFFFF); + } + + /* set rptr, wptr to 0 */ + WREG32(ih_regs->ih_rb_wptr, 0); + WREG32(ih_regs->ih_rb_rptr, 0); + + WREG32(ih_regs->ih_doorbell_rptr, ih_v6_1_doorbell_rptr(ih)); + + return 0; +} + +/** + * ih_v6_1_irq_init - init and enable the interrupt ring + * + * @adev: amdgpu_device pointer + * + * Allocate a ring buffer for the interrupt controller, + * enable the RLC, disable interrupts, enable the IH + * ring buffer and enable it. + * Called at device load and reume. + * Returns 0 for success, errors for failure. + */ +static int ih_v6_1_irq_init(struct amdgpu_device *adev) +{ + struct amdgpu_ih_ring *ih[] = {&adev->irq.ih, &adev->irq.ih1}; + u32 ih_chicken; + u32 tmp; + int ret; + int i; + + /* disable irqs */ + ret = ih_v6_1_toggle_interrupts(adev, false); + if (ret) + return ret; + + adev->nbio.funcs->ih_control(adev); + + if (unlikely((adev->firmware.load_type == AMDGPU_FW_LOAD_DIRECT) || + (adev->firmware.load_type == AMDGPU_FW_LOAD_RLC_BACKDOOR_AUTO))) { + if (ih[0]->use_bus_addr) { + ih_chicken = RREG32_SOC15(OSSSYS, 0, regIH_CHICKEN); + ih_chicken = REG_SET_FIELD(ih_chicken, + IH_CHICKEN, MC_SPACE_GPA_ENABLE, 1); + WREG32_SOC15(OSSSYS, 0, regIH_CHICKEN, ih_chicken); + } + } + + for (i = 0; i < ARRAY_SIZE(ih); i++) { + if (ih[i]->ring_size) { + ret = ih_v6_1_enable_ring(adev, ih[i]); + if (ret) + return ret; + } + } + + /* update doorbell range for ih ring 0 */ + adev->nbio.funcs->ih_doorbell_range(adev, ih[0]->use_doorbell, + ih[0]->doorbell_index); + + tmp = RREG32_SOC15(OSSSYS, 0, regIH_STORM_CLIENT_LIST_CNTL); + tmp = REG_SET_FIELD(tmp, IH_STORM_CLIENT_LIST_CNTL, + CLIENT18_IS_STORM_CLIENT, 1); + WREG32_SOC15(OSSSYS, 0, regIH_STORM_CLIENT_LIST_CNTL, tmp); + + tmp = RREG32_SOC15(OSSSYS, 0, regIH_INT_FLOOD_CNTL); + tmp = REG_SET_FIELD(tmp, IH_INT_FLOOD_CNTL, FLOOD_CNTL_ENABLE, 1); + WREG32_SOC15(OSSSYS, 0, regIH_INT_FLOOD_CNTL, tmp); + + /* GC/MMHUB UTCL2 page fault interrupts are configured as + * MSI storm capable interrupts by deafult. The delay is + * used to avoid ISR being called too frequently + * when page fault happens on several continuous page + * and thus avoid MSI storm */ + tmp = RREG32_SOC15(OSSSYS, 0, regIH_MSI_STORM_CTRL); + tmp = REG_SET_FIELD(tmp, IH_MSI_STORM_CTRL, + DELAY, 3); + WREG32_SOC15(OSSSYS, 0, regIH_MSI_STORM_CTRL, tmp); + + pci_set_master(adev->pdev); + + /* enable interrupts */ + ret = ih_v6_1_toggle_interrupts(adev, true); + if (ret) + return ret; + /* enable wptr force update for self int */ + force_update_wptr_for_self_int(adev, 0, 8, true); + + if (adev->irq.ih_soft.ring_size) + adev->irq.ih_soft.enabled = true; + + return 0; +} + +/** + * ih_v6_1_irq_disable - disable interrupts + * + * @adev: amdgpu_device pointer + * + * Disable interrupts on the hw. + */ +static void ih_v6_1_irq_disable(struct amdgpu_device *adev) +{ + force_update_wptr_for_self_int(adev, 0, 8, false); + ih_v6_1_toggle_interrupts(adev, false); + + /* Wait and acknowledge irq */ + mdelay(1); +} + +/** + * ih_v6_1_get_wptr - get the IH ring buffer wptr + * + * @adev: amdgpu_device pointer + * @ih: amdgpu_ih_ring pointer + * + * Get the IH ring buffer wptr from either the register + * or the writeback memory buffer. Also check for + * ring buffer overflow and deal with it. + * Returns the value of the wptr. + */ +static u32 ih_v6_1_get_wptr(struct amdgpu_device *adev, + struct amdgpu_ih_ring *ih) +{ + u32 wptr, tmp; + struct amdgpu_ih_regs *ih_regs; + + wptr = le32_to_cpu(*ih->wptr_cpu); + ih_regs = &ih->ih_regs; + + if (!REG_GET_FIELD(wptr, IH_RB_WPTR, RB_OVERFLOW)) + goto out; + + wptr = RREG32_NO_KIQ(ih_regs->ih_rb_wptr); + if (!REG_GET_FIELD(wptr, IH_RB_WPTR, RB_OVERFLOW)) + goto out; + wptr = REG_SET_FIELD(wptr, IH_RB_WPTR, RB_OVERFLOW, 0); + + /* When a ring buffer overflow happen start parsing interrupt + * from the last not overwritten vector (wptr + 32). Hopefully + * this should allow us to catch up. + */ + tmp = (wptr + 32) & ih->ptr_mask; + dev_warn(adev->dev, "IH ring buffer overflow " + "(0x%08X, 0x%08X, 0x%08X)\n", + wptr, ih->rptr, tmp); + ih->rptr = tmp; + + tmp = RREG32_NO_KIQ(ih_regs->ih_rb_cntl); + tmp = REG_SET_FIELD(tmp, IH_RB_CNTL, WPTR_OVERFLOW_CLEAR, 1); + WREG32_NO_KIQ(ih_regs->ih_rb_cntl, tmp); +out: + return (wptr & ih->ptr_mask); +} + +/** + * ih_v6_1_irq_rearm - rearm IRQ if lost + * + * @adev: amdgpu_device pointer + * @ih: amdgpu_ih_ring pointer + * + */ +static void ih_v6_1_irq_rearm(struct amdgpu_device *adev, + struct amdgpu_ih_ring *ih) +{ + uint32_t v = 0; + uint32_t i = 0; + struct amdgpu_ih_regs *ih_regs; + + ih_regs = &ih->ih_regs; + + /* Rearm IRQ / re-write doorbell if doorbell write is lost */ + for (i = 0; i < MAX_REARM_RETRY; i++) { + v = RREG32_NO_KIQ(ih_regs->ih_rb_rptr); + if ((v < ih->ring_size) && (v != ih->rptr)) + WDOORBELL32(ih->doorbell_index, ih->rptr); + else + break; + } +} + +/** + * ih_v6_1_set_rptr - set the IH ring buffer rptr + * + * @adev: amdgpu_device pointer + * @ih: amdgpu_ih_ring pointer + * + * Set the IH ring buffer rptr. + */ +static void ih_v6_1_set_rptr(struct amdgpu_device *adev, + struct amdgpu_ih_ring *ih) +{ + struct amdgpu_ih_regs *ih_regs; + + if (ih->use_doorbell) { + /* XXX check if swapping is necessary on BE */ + *ih->rptr_cpu = ih->rptr; + WDOORBELL32(ih->doorbell_index, ih->rptr); + + if (amdgpu_sriov_vf(adev)) + ih_v6_1_irq_rearm(adev, ih); + } else { + ih_regs = &ih->ih_regs; + WREG32(ih_regs->ih_rb_rptr, ih->rptr); + } +} + +/** + * ih_v6_1_self_irq - dispatch work for ring 1 + * + * @adev: amdgpu_device pointer + * @source: irq source + * @entry: IV with WPTR update + * + * Update the WPTR from the IV and schedule work to handle the entries. + */ +static int ih_v6_1_self_irq(struct amdgpu_device *adev, + struct amdgpu_irq_src *source, + struct amdgpu_iv_entry *entry) +{ + uint32_t wptr = cpu_to_le32(entry->src_data[0]); + + switch (entry->ring_id) { + case 1: + *adev->irq.ih1.wptr_cpu = wptr; + schedule_work(&adev->irq.ih1_work); + break; + default: + break; + } + return 0; +} + +static const struct amdgpu_irq_src_funcs ih_v6_1_self_irq_funcs = { + .process = ih_v6_1_self_irq, +}; + +static void ih_v6_1_set_self_irq_funcs(struct amdgpu_device *adev) +{ + adev->irq.self_irq.num_types = 0; + adev->irq.self_irq.funcs = &ih_v6_1_self_irq_funcs; +} + +static int ih_v6_1_early_init(void *handle) +{ + struct amdgpu_device *adev = (struct amdgpu_device *)handle; + + ih_v6_1_set_interrupt_funcs(adev); + ih_v6_1_set_self_irq_funcs(adev); + return 0; +} + +static int ih_v6_1_sw_init(void *handle) +{ + int r; + struct amdgpu_device *adev = (struct amdgpu_device *)handle; + bool use_bus_addr; + + r = amdgpu_irq_add_id(adev, SOC21_IH_CLIENTID_IH, 0, + &adev->irq.self_irq); + + if (r) + return r; + + /* use gpu virtual address for ih ring + * until ih_checken is programmed to allow + * use bus address for ih ring by psp bl */ + use_bus_addr = + (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) ? false : true; + r = amdgpu_ih_ring_init(adev, &adev->irq.ih, 256 * 1024, use_bus_addr); + if (r) + return r; + + adev->irq.ih.use_doorbell = true; + adev->irq.ih.doorbell_index = adev->doorbell_index.ih << 1; + + adev->irq.ih1.ring_size = 0; + adev->irq.ih2.ring_size = 0; + + /* initialize ih control register offset */ + ih_v6_1_init_register_offset(adev); + + r = amdgpu_ih_ring_init(adev, &adev->irq.ih_soft, PAGE_SIZE, true); + if (r) + return r; + + r = amdgpu_irq_init(adev); + + return r; +} + +static int ih_v6_1_sw_fini(void *handle) +{ + struct amdgpu_device *adev = (struct amdgpu_device *)handle; + + amdgpu_irq_fini_sw(adev); + + return 0; +} + +static int ih_v6_1_hw_init(void *handle) +{ + int r; + struct amdgpu_device *adev = (struct amdgpu_device *)handle; + + r = ih_v6_1_irq_init(adev); + if (r) + return r; + + return 0; +} + +static int ih_v6_1_hw_fini(void *handle) +{ + struct amdgpu_device *adev = (struct amdgpu_device *)handle; + + ih_v6_1_irq_disable(adev); + + return 0; +} + +static int ih_v6_1_suspend(void *handle) +{ + struct amdgpu_device *adev = (struct amdgpu_device *)handle; + + return ih_v6_1_hw_fini(adev); +} + +static int ih_v6_1_resume(void *handle) +{ + struct amdgpu_device *adev = (struct amdgpu_device *)handle; + + return ih_v6_1_hw_init(adev); +} + +static bool ih_v6_1_is_idle(void *handle) +{ + /* todo */ + return true; +} + +static int ih_v6_1_wait_for_idle(void *handle) +{ + /* todo */ + return -ETIMEDOUT; +} + +static int ih_v6_1_soft_reset(void *handle) +{ + /* todo */ + return 0; +} + +static void ih_v6_1_update_clockgating_state(struct amdgpu_device *adev, + bool enable) +{ + uint32_t data, def, field_val; + + if (adev->cg_flags & AMD_CG_SUPPORT_IH_CG) { + def = data = RREG32_SOC15(OSSSYS, 0, regIH_CLK_CTRL); + field_val = enable ? 0 : 1; + data = REG_SET_FIELD(data, IH_CLK_CTRL, + DBUS_MUX_CLK_SOFT_OVERRIDE, field_val); + data = REG_SET_FIELD(data, IH_CLK_CTRL, + OSSSYS_SHARE_CLK_SOFT_OVERRIDE, field_val); + data = REG_SET_FIELD(data, IH_CLK_CTRL, + LIMIT_SMN_CLK_SOFT_OVERRIDE, field_val); + data = REG_SET_FIELD(data, IH_CLK_CTRL, + DYN_CLK_SOFT_OVERRIDE, field_val); + data = REG_SET_FIELD(data, IH_CLK_CTRL, + REG_CLK_SOFT_OVERRIDE, field_val); + if (def != data) + WREG32_SOC15(OSSSYS, 0, regIH_CLK_CTRL, data); + } + + return; +} + +static int ih_v6_1_set_clockgating_state(void *handle, + enum amd_clockgating_state state) +{ + struct amdgpu_device *adev = (struct amdgpu_device *)handle; + + ih_v6_1_update_clockgating_state(adev, + state == AMD_CG_STATE_GATE); + return 0; +} + +static void ih_v6_1_update_ih_mem_power_gating(struct amdgpu_device *adev, + bool enable) +{ + uint32_t ih_mem_pwr_cntl; + + /* Disable ih sram power cntl before switch powergating mode */ + ih_mem_pwr_cntl = RREG32_SOC15(OSSSYS, 0, regIH_MEM_POWER_CTRL); + ih_mem_pwr_cntl = REG_SET_FIELD(ih_mem_pwr_cntl, IH_MEM_POWER_CTRL, + IH_BUFFER_MEM_POWER_CTRL_EN, 0); + WREG32_SOC15(OSSSYS, 0, regIH_MEM_POWER_CTRL, ih_mem_pwr_cntl); + + /* It is recommended to set mem powergating mode to DS mode */ + if (enable) { + /* mem power mode */ + ih_mem_pwr_cntl = REG_SET_FIELD(ih_mem_pwr_cntl, IH_MEM_POWER_CTRL, + IH_BUFFER_MEM_POWER_LS_EN, 0); + ih_mem_pwr_cntl = REG_SET_FIELD(ih_mem_pwr_cntl, IH_MEM_POWER_CTRL, + IH_BUFFER_MEM_POWER_DS_EN, 1); + ih_mem_pwr_cntl = REG_SET_FIELD(ih_mem_pwr_cntl, IH_MEM_POWER_CTRL, + IH_BUFFER_MEM_POWER_SD_EN, 0); + /* cam mem power mode */ + ih_mem_pwr_cntl = REG_SET_FIELD(ih_mem_pwr_cntl, IH_MEM_POWER_CTRL, + IH_RETRY_INT_CAM_MEM_POWER_LS_EN, 0); + ih_mem_pwr_cntl = REG_SET_FIELD(ih_mem_pwr_cntl, IH_MEM_POWER_CTRL, + IH_RETRY_INT_CAM_MEM_POWER_DS_EN, 1); + ih_mem_pwr_cntl = REG_SET_FIELD(ih_mem_pwr_cntl, IH_MEM_POWER_CTRL, + IH_RETRY_INT_CAM_MEM_POWER_SD_EN, 0); + /* re-enable power cntl */ + ih_mem_pwr_cntl = REG_SET_FIELD(ih_mem_pwr_cntl, IH_MEM_POWER_CTRL, + IH_BUFFER_MEM_POWER_CTRL_EN, 1); + } else { + /* mem power mode */ + ih_mem_pwr_cntl = REG_SET_FIELD(ih_mem_pwr_cntl, IH_MEM_POWER_CTRL, + IH_BUFFER_MEM_POWER_LS_EN, 0); + ih_mem_pwr_cntl = REG_SET_FIELD(ih_mem_pwr_cntl, IH_MEM_POWER_CTRL, + IH_BUFFER_MEM_POWER_DS_EN, 0); + ih_mem_pwr_cntl = REG_SET_FIELD(ih_mem_pwr_cntl, IH_MEM_POWER_CTRL, + IH_BUFFER_MEM_POWER_SD_EN, 0); + /* cam mem power mode */ + ih_mem_pwr_cntl = REG_SET_FIELD(ih_mem_pwr_cntl, IH_MEM_POWER_CTRL, + IH_RETRY_INT_CAM_MEM_POWER_LS_EN, 0); + ih_mem_pwr_cntl = REG_SET_FIELD(ih_mem_pwr_cntl, IH_MEM_POWER_CTRL, + IH_RETRY_INT_CAM_MEM_POWER_DS_EN, 0); + ih_mem_pwr_cntl = REG_SET_FIELD(ih_mem_pwr_cntl, IH_MEM_POWER_CTRL, + IH_RETRY_INT_CAM_MEM_POWER_SD_EN, 0); + /* re-enable power cntl*/ + ih_mem_pwr_cntl = REG_SET_FIELD(ih_mem_pwr_cntl, IH_MEM_POWER_CTRL, + IH_BUFFER_MEM_POWER_CTRL_EN, 1); + } + + WREG32_SOC15(OSSSYS, 0, regIH_MEM_POWER_CTRL, ih_mem_pwr_cntl); +} + +static int ih_v6_1_set_powergating_state(void *handle, + enum amd_powergating_state state) +{ + struct amdgpu_device *adev = (struct amdgpu_device *)handle; + bool enable = (state == AMD_PG_STATE_GATE); + + if (adev->pg_flags & AMD_PG_SUPPORT_IH_SRAM_PG) + ih_v6_1_update_ih_mem_power_gating(adev, enable); + + return 0; +} + +static void ih_v6_1_get_clockgating_state(void *handle, u64 *flags) +{ + struct amdgpu_device *adev = (struct amdgpu_device *)handle; + + if (!RREG32_SOC15(OSSSYS, 0, regIH_CLK_CTRL)) + *flags |= AMD_CG_SUPPORT_IH_CG; + + return; +} + +static const struct amd_ip_funcs ih_v6_1_ip_funcs = { + .name = "ih_v6_1", + .early_init = ih_v6_1_early_init, + .late_init = NULL, + .sw_init = ih_v6_1_sw_init, + .sw_fini = ih_v6_1_sw_fini, + .hw_init = ih_v6_1_hw_init, + .hw_fini = ih_v6_1_hw_fini, + .suspend = ih_v6_1_suspend, + .resume = ih_v6_1_resume, + .is_idle = ih_v6_1_is_idle, + .wait_for_idle = ih_v6_1_wait_for_idle, + .soft_reset = ih_v6_1_soft_reset, + .set_clockgating_state = ih_v6_1_set_clockgating_state, + .set_powergating_state = ih_v6_1_set_powergating_state, + .get_clockgating_state = ih_v6_1_get_clockgating_state, +}; + +static const struct amdgpu_ih_funcs ih_v6_1_funcs = { + .get_wptr = ih_v6_1_get_wptr, + .decode_iv = amdgpu_ih_decode_iv_helper, + .decode_iv_ts = amdgpu_ih_decode_iv_ts_helper, + .set_rptr = ih_v6_1_set_rptr +}; + +static void ih_v6_1_set_interrupt_funcs(struct amdgpu_device *adev) +{ + adev->irq.ih_funcs = &ih_v6_1_funcs; +} + +const struct amdgpu_ip_block_version ih_v6_1_ip_block = { + .type = AMD_IP_BLOCK_TYPE_IH, + .major = 6, + .minor = 0, + .rev = 0, + .funcs = &ih_v6_1_ip_funcs, +}; diff --git a/drivers/gpu/drm/amd/amdgpu/ih_v6_1.h b/drivers/gpu/drm/amd/amdgpu/ih_v6_1.h new file mode 100644 index 000000000000..2232bc5cbd09 --- /dev/null +++ b/drivers/gpu/drm/amd/amdgpu/ih_v6_1.h @@ -0,0 +1,28 @@ +/* + * Copyright 2023 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + */ +#ifndef __IH_V6_1_IH_H__ +#define __IH_V6_1_IH_H__ + +extern const struct amdgpu_ip_block_version ih_v6_1_ip_block; + +#endif diff --git a/drivers/gpu/drm/amd/amdgpu/imu_v11_0.c b/drivers/gpu/drm/amd/amdgpu/imu_v11_0.c index 4ab90c7852c3..c0bdab3bf0e4 100644 --- a/drivers/gpu/drm/amd/amdgpu/imu_v11_0.c +++ b/drivers/gpu/drm/amd/amdgpu/imu_v11_0.c @@ -36,10 +36,11 @@ MODULE_FIRMWARE("amdgpu/gc_11_0_1_imu.bin"); MODULE_FIRMWARE("amdgpu/gc_11_0_2_imu.bin"); MODULE_FIRMWARE("amdgpu/gc_11_0_3_imu.bin"); MODULE_FIRMWARE("amdgpu/gc_11_0_4_imu.bin"); +MODULE_FIRMWARE("amdgpu/gc_11_5_0_imu.bin"); static int imu_v11_0_init_microcode(struct amdgpu_device *adev) { - char fw_name[40]; + char fw_name[45]; char ucode_prefix[30]; int err; const struct imu_firmware_header_v1_0 *imu_hdr; @@ -54,7 +55,6 @@ static int imu_v11_0_init_microcode(struct amdgpu_device *adev) if (err) goto out; imu_hdr = (const struct imu_firmware_header_v1_0 *)adev->gfx.imu_fw->data; - adev->gfx.imu_fw_version = le32_to_cpu(imu_hdr->header.ucode_version); //adev->gfx.imu_feature_version = le32_to_cpu(imu_hdr->ucode_feature_version); if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) { @@ -68,7 +68,8 @@ static int imu_v11_0_init_microcode(struct amdgpu_device *adev) info->fw = adev->gfx.imu_fw; adev->firmware.fw_size += ALIGN(le32_to_cpu(imu_hdr->imu_dram_ucode_size_bytes), PAGE_SIZE); - } + } else + adev->gfx.imu_fw_version = le32_to_cpu(imu_hdr->header.ucode_version); out: if (err) { @@ -352,7 +353,7 @@ static void imu_v11_0_program_rlc_ram(struct amdgpu_device *adev) WREG32_SOC15(GC, 0, regGFX_IMU_RLC_RAM_INDEX, 0x2); - switch (adev->ip_versions[GC_HWIP][0]) { + switch (amdgpu_ip_version(adev, GC_HWIP, 0)) { case IP_VERSION(11, 0, 0): program_imu_rlc_ram(adev, imu_rlc_ram_golden_11, (const u32)ARRAY_SIZE(imu_rlc_ram_golden_11)); diff --git a/drivers/gpu/drm/amd/amdgpu/jpeg_v2_0.c b/drivers/gpu/drm/amd/amdgpu/jpeg_v2_0.c index c25d4a07350b..1c8116d75f63 100644 --- a/drivers/gpu/drm/amd/amdgpu/jpeg_v2_0.c +++ b/drivers/gpu/drm/amd/amdgpu/jpeg_v2_0.c @@ -807,8 +807,7 @@ static void jpeg_v2_0_set_irq_funcs(struct amdgpu_device *adev) adev->jpeg.inst->irq.funcs = &jpeg_v2_0_irq_funcs; } -const struct amdgpu_ip_block_version jpeg_v2_0_ip_block = -{ +const struct amdgpu_ip_block_version jpeg_v2_0_ip_block = { .type = AMD_IP_BLOCK_TYPE_JPEG, .major = 2, .minor = 0, diff --git a/drivers/gpu/drm/amd/amdgpu/jpeg_v2_5.c b/drivers/gpu/drm/amd/amdgpu/jpeg_v2_5.c index aadb74de52bc..e67a337457ed 100644 --- a/drivers/gpu/drm/amd/amdgpu/jpeg_v2_5.c +++ b/drivers/gpu/drm/amd/amdgpu/jpeg_v2_5.c @@ -128,7 +128,7 @@ static int jpeg_v2_5_sw_init(void *handle) ring = adev->jpeg.inst[i].ring_dec; ring->use_doorbell = true; - if (adev->ip_versions[UVD_HWIP][0] == IP_VERSION(2, 5, 0)) + if (amdgpu_ip_version(adev, UVD_HWIP, 0) == IP_VERSION(2, 5, 0)) ring->vm_hub = AMDGPU_MMHUB1(0); else ring->vm_hub = AMDGPU_MMHUB0(0); @@ -822,7 +822,7 @@ static struct amdgpu_jpeg_ras jpeg_v2_6_ras = { static void jpeg_v2_5_set_ras_funcs(struct amdgpu_device *adev) { - switch (adev->ip_versions[JPEG_HWIP][0]) { + switch (amdgpu_ip_version(adev, JPEG_HWIP, 0)) { case IP_VERSION(2, 6, 0): adev->jpeg.ras = &jpeg_v2_6_ras; break; diff --git a/drivers/gpu/drm/amd/amdgpu/jpeg_v3_0.c b/drivers/gpu/drm/amd/amdgpu/jpeg_v3_0.c index 79791379fc2b..a92481da60cd 100644 --- a/drivers/gpu/drm/amd/amdgpu/jpeg_v3_0.c +++ b/drivers/gpu/drm/amd/amdgpu/jpeg_v3_0.c @@ -52,7 +52,7 @@ static int jpeg_v3_0_early_init(void *handle) u32 harvest; - switch (adev->ip_versions[UVD_HWIP][0]) { + switch (amdgpu_ip_version(adev, UVD_HWIP, 0)) { case IP_VERSION(3, 1, 1): case IP_VERSION(3, 1, 2): break; @@ -479,7 +479,7 @@ static int jpeg_v3_0_set_clockgating_state(void *handle, enum amd_clockgating_state state) { struct amdgpu_device *adev = (struct amdgpu_device *)handle; - bool enable = (state == AMD_CG_STATE_GATE) ? true : false; + bool enable = state == AMD_CG_STATE_GATE; if (enable) { if (!jpeg_v3_0_is_idle(handle)) diff --git a/drivers/gpu/drm/amd/amdgpu/jpeg_v4_0.c b/drivers/gpu/drm/amd/amdgpu/jpeg_v4_0.c index a707d407fbd0..bc38b90f8cf8 100644 --- a/drivers/gpu/drm/amd/amdgpu/jpeg_v4_0.c +++ b/drivers/gpu/drm/amd/amdgpu/jpeg_v4_0.c @@ -431,6 +431,10 @@ static int jpeg_v4_0_start_sriov(struct amdgpu_device *adev) end.cmd_header.command_type = MMSCH_COMMAND__END; + size = sizeof(struct mmsch_v4_0_init_header); + table_loc = (uint32_t *)table->cpu_addr; + memcpy(&header, (void *)table_loc, size); + header.version = MMSCH_VERSION; header.total_size = RREG32_SOC15(VCN, 0, regMMSCH_VF_CTX_SIZE); @@ -468,6 +472,9 @@ static int jpeg_v4_0_start_sriov(struct amdgpu_device *adev) table_loc = (uint32_t *)table->cpu_addr; memcpy((void *)table_loc, &header, size); + /* Perform HDP flush before writing to MMSCH registers */ + amdgpu_device_flush_hdp(adev, NULL); + /* message MMSCH (in VCN[0]) to initialize this client * 1, write to mmsch_vf_ctx_addr_lo/hi register with GPU mc addr * of memory descriptor location @@ -515,8 +522,11 @@ static int jpeg_v4_0_start_sriov(struct amdgpu_device *adev) return -EBUSY; } } - if (resp != expected && resp != MMSCH_VF_MAILBOX_RESP__INCOMPLETE && init_status != MMSCH_VF_ENGINE_STATUS__PASS) + if (resp != expected && resp != MMSCH_VF_MAILBOX_RESP__INCOMPLETE + && init_status != MMSCH_VF_ENGINE_STATUS__PASS) { DRM_ERROR("MMSCH init status is incorrect! readback=0x%08x, header init status for jpeg: %x\n", resp, init_status); + return -EINVAL; + } return 0; @@ -626,7 +636,7 @@ static int jpeg_v4_0_set_clockgating_state(void *handle, enum amd_clockgating_state state) { struct amdgpu_device *adev = (struct amdgpu_device *)handle; - bool enable = (state == AMD_CG_STATE_GATE) ? true : false; + bool enable = state == AMD_CG_STATE_GATE; if (enable) { if (!jpeg_v4_0_is_idle(handle)) @@ -831,7 +841,7 @@ static struct amdgpu_jpeg_ras jpeg_v4_0_ras = { static void jpeg_v4_0_set_ras_funcs(struct amdgpu_device *adev) { - switch (adev->ip_versions[JPEG_HWIP][0]) { + switch (amdgpu_ip_version(adev, JPEG_HWIP, 0)) { case IP_VERSION(4, 0, 0): adev->jpeg.ras = &jpeg_v4_0_ras; break; diff --git a/drivers/gpu/drm/amd/amdgpu/jpeg_v4_0_3.c b/drivers/gpu/drm/amd/amdgpu/jpeg_v4_0_3.c index ce2b22f7e4e4..82b6b62c170b 100644 --- a/drivers/gpu/drm/amd/amdgpu/jpeg_v4_0_3.c +++ b/drivers/gpu/drm/amd/amdgpu/jpeg_v4_0_3.c @@ -26,6 +26,7 @@ #include "soc15.h" #include "soc15d.h" #include "jpeg_v4_0_3.h" +#include "mmsch_v4_0_3.h" #include "vcn/vcn_4_0_3_offset.h" #include "vcn/vcn_4_0_3_sh_mask.h" @@ -41,6 +42,7 @@ static void jpeg_v4_0_3_set_irq_funcs(struct amdgpu_device *adev); static int jpeg_v4_0_3_set_powergating_state(void *handle, enum amd_powergating_state state); static void jpeg_v4_0_3_set_ras_funcs(struct amdgpu_device *adev); +static void jpeg_v4_0_3_dec_ring_set_wptr(struct amdgpu_ring *ring); static int amdgpu_ih_srcid_jpeg[] = { VCN_4_0__SRCID__JPEG_DECODE, @@ -109,9 +111,20 @@ static int jpeg_v4_0_3_sw_init(void *handle) ring = &adev->jpeg.inst[i].ring_dec[j]; ring->use_doorbell = true; ring->vm_hub = AMDGPU_MMHUB0(adev->jpeg.inst[i].aid_id); - ring->doorbell_index = - (adev->doorbell_index.vcn.vcn_ring0_1 << 1) + - 1 + j + 9 * jpeg_inst; + if (!amdgpu_sriov_vf(adev)) { + ring->doorbell_index = + (adev->doorbell_index.vcn.vcn_ring0_1 << 1) + + 1 + j + 9 * jpeg_inst; + } else { + if (j < 4) + ring->doorbell_index = + (adev->doorbell_index.vcn.vcn_ring0_1 << 1) + + 4 + j + 32 * jpeg_inst; + else + ring->doorbell_index = + (adev->doorbell_index.vcn.vcn_ring0_1 << 1) + + 8 + j + 32 * jpeg_inst; + } sprintf(ring->name, "jpeg_dec_%d.%d", adev->jpeg.inst[i].aid_id, j); r = amdgpu_ring_init(adev, ring, 512, &adev->jpeg.inst->irq, 0, AMDGPU_RING_PRIO_DEFAULT, NULL); @@ -160,6 +173,119 @@ static int jpeg_v4_0_3_sw_fini(void *handle) return r; } +static int jpeg_v4_0_3_start_sriov(struct amdgpu_device *adev) +{ + struct amdgpu_ring *ring; + uint64_t ctx_addr; + uint32_t param, resp, expected; + uint32_t tmp, timeout; + + struct amdgpu_mm_table *table = &adev->virt.mm_table; + uint32_t *table_loc; + uint32_t table_size; + uint32_t size, size_dw, item_offset; + uint32_t init_status; + int i, j, jpeg_inst; + + struct mmsch_v4_0_cmd_direct_write + direct_wt = { {0} }; + struct mmsch_v4_0_cmd_end end = { {0} }; + struct mmsch_v4_0_3_init_header header; + + direct_wt.cmd_header.command_type = + MMSCH_COMMAND__DIRECT_REG_WRITE; + end.cmd_header.command_type = + MMSCH_COMMAND__END; + + for (i = 0; i < adev->jpeg.num_jpeg_inst; i++) { + jpeg_inst = GET_INST(JPEG, i); + + memset(&header, 0, sizeof(struct mmsch_v4_0_3_init_header)); + header.version = MMSCH_VERSION; + header.total_size = sizeof(struct mmsch_v4_0_3_init_header) >> 2; + + table_loc = (uint32_t *)table->cpu_addr; + table_loc += header.total_size; + + item_offset = header.total_size; + + for (j = 0; j < adev->jpeg.num_jpeg_rings; j++) { + ring = &adev->jpeg.inst[i].ring_dec[j]; + table_size = 0; + + tmp = SOC15_REG_OFFSET(JPEG, 0, regUVD_JMI0_UVD_LMI_JRBC_RB_64BIT_BAR_LOW); + MMSCH_V4_0_INSERT_DIRECT_WT(tmp, lower_32_bits(ring->gpu_addr)); + tmp = SOC15_REG_OFFSET(JPEG, 0, regUVD_JMI0_UVD_LMI_JRBC_RB_64BIT_BAR_HIGH); + MMSCH_V4_0_INSERT_DIRECT_WT(tmp, upper_32_bits(ring->gpu_addr)); + tmp = SOC15_REG_OFFSET(JPEG, 0, regUVD_JRBC0_UVD_JRBC_RB_SIZE); + MMSCH_V4_0_INSERT_DIRECT_WT(tmp, ring->ring_size / 4); + + if (j <= 3) { + header.mjpegdec0[j].table_offset = item_offset; + header.mjpegdec0[j].init_status = 0; + header.mjpegdec0[j].table_size = table_size; + } else { + header.mjpegdec1[j - 4].table_offset = item_offset; + header.mjpegdec1[j - 4].init_status = 0; + header.mjpegdec1[j - 4].table_size = table_size; + } + header.total_size += table_size; + item_offset += table_size; + } + + MMSCH_V4_0_INSERT_END(); + + /* send init table to MMSCH */ + size = sizeof(struct mmsch_v4_0_3_init_header); + table_loc = (uint32_t *)table->cpu_addr; + memcpy((void *)table_loc, &header, size); + + ctx_addr = table->gpu_addr; + WREG32_SOC15(VCN, jpeg_inst, regMMSCH_VF_CTX_ADDR_LO, lower_32_bits(ctx_addr)); + WREG32_SOC15(VCN, jpeg_inst, regMMSCH_VF_CTX_ADDR_HI, upper_32_bits(ctx_addr)); + + tmp = RREG32_SOC15(VCN, jpeg_inst, regMMSCH_VF_VMID); + tmp &= ~MMSCH_VF_VMID__VF_CTX_VMID_MASK; + tmp |= (0 << MMSCH_VF_VMID__VF_CTX_VMID__SHIFT); + WREG32_SOC15(VCN, jpeg_inst, regMMSCH_VF_VMID, tmp); + + size = header.total_size; + WREG32_SOC15(VCN, jpeg_inst, regMMSCH_VF_CTX_SIZE, size); + + WREG32_SOC15(VCN, jpeg_inst, regMMSCH_VF_MAILBOX_RESP, 0); + + param = 0x00000001; + WREG32_SOC15(VCN, jpeg_inst, regMMSCH_VF_MAILBOX_HOST, param); + tmp = 0; + timeout = 1000; + resp = 0; + expected = MMSCH_VF_MAILBOX_RESP__OK; + init_status = + ((struct mmsch_v4_0_3_init_header *)(table_loc))->mjpegdec0[i].init_status; + while (resp != expected) { + resp = RREG32_SOC15(VCN, jpeg_inst, regMMSCH_VF_MAILBOX_RESP); + + if (resp != 0) + break; + udelay(10); + tmp = tmp + 10; + if (tmp >= timeout) { + DRM_ERROR("failed to init MMSCH. TIME-OUT after %d usec"\ + " waiting for regMMSCH_VF_MAILBOX_RESP "\ + "(expected=0x%08x, readback=0x%08x)\n", + tmp, expected, resp); + return -EBUSY; + } + } + if (resp != expected && resp != MMSCH_VF_MAILBOX_RESP__INCOMPLETE && + init_status != MMSCH_VF_ENGINE_STATUS__PASS) + DRM_ERROR("MMSCH init status is incorrect! readback=0x%08x, header init status for jpeg: %x\n", + resp, init_status); + + } + return 0; +} + /** * jpeg_v4_0_3_hw_init - start and test JPEG block * @@ -172,31 +298,47 @@ static int jpeg_v4_0_3_hw_init(void *handle) struct amdgpu_ring *ring; int i, j, r, jpeg_inst; - for (i = 0; i < adev->jpeg.num_jpeg_inst; ++i) { - jpeg_inst = GET_INST(JPEG, i); + if (amdgpu_sriov_vf(adev)) { + r = jpeg_v4_0_3_start_sriov(adev); + if (r) + return r; - ring = adev->jpeg.inst[i].ring_dec; + for (i = 0; i < adev->vcn.num_vcn_inst; ++i) { + for (j = 0; j < adev->jpeg.num_jpeg_rings; ++j) { + ring = &adev->jpeg.inst[i].ring_dec[j]; + ring->wptr = 0; + ring->wptr_old = 0; + jpeg_v4_0_3_dec_ring_set_wptr(ring); + ring->sched.ready = true; + } + } + } else { + for (i = 0; i < adev->jpeg.num_jpeg_inst; ++i) { + jpeg_inst = GET_INST(JPEG, i); - if (ring->use_doorbell) - adev->nbio.funcs->vcn_doorbell_range( - adev, ring->use_doorbell, - (adev->doorbell_index.vcn.vcn_ring0_1 << 1) + - 9 * jpeg_inst, - adev->jpeg.inst[i].aid_id); + ring = adev->jpeg.inst[i].ring_dec; - for (j = 0; j < adev->jpeg.num_jpeg_rings; ++j) { - ring = &adev->jpeg.inst[i].ring_dec[j]; if (ring->use_doorbell) - WREG32_SOC15_OFFSET( - VCN, GET_INST(VCN, i), - regVCN_JPEG_DB_CTRL, - (ring->pipe ? (ring->pipe - 0x15) : 0), - ring->doorbell_index + adev->nbio.funcs->vcn_doorbell_range( + adev, ring->use_doorbell, + (adev->doorbell_index.vcn.vcn_ring0_1 << 1) + + 9 * jpeg_inst, + adev->jpeg.inst[i].aid_id); + + for (j = 0; j < adev->jpeg.num_jpeg_rings; ++j) { + ring = &adev->jpeg.inst[i].ring_dec[j]; + if (ring->use_doorbell) + WREG32_SOC15_OFFSET( + VCN, GET_INST(VCN, i), + regVCN_JPEG_DB_CTRL, + (ring->pipe ? (ring->pipe - 0x15) : 0), + ring->doorbell_index << VCN_JPEG_DB_CTRL__OFFSET__SHIFT | - VCN_JPEG_DB_CTRL__EN_MASK); - r = amdgpu_ring_test_helper(ring); - if (r) - return r; + VCN_JPEG_DB_CTRL__EN_MASK); + r = amdgpu_ring_test_helper(ring); + if (r) + return r; + } } } DRM_DEV_INFO(adev->dev, "JPEG decode initialized successfully.\n"); @@ -218,8 +360,10 @@ static int jpeg_v4_0_3_hw_fini(void *handle) cancel_delayed_work_sync(&adev->jpeg.idle_work); - if (adev->jpeg.cur_state != AMD_PG_STATE_GATE) - ret = jpeg_v4_0_3_set_powergating_state(adev, AMD_PG_STATE_GATE); + if (!amdgpu_sriov_vf(adev)) { + if (adev->jpeg.cur_state != AMD_PG_STATE_GATE) + ret = jpeg_v4_0_3_set_powergating_state(adev, AMD_PG_STATE_GATE); + } return ret; } @@ -510,9 +654,11 @@ static void jpeg_v4_0_3_dec_ring_set_wptr(struct amdgpu_ring *ring) */ static void jpeg_v4_0_3_dec_ring_insert_start(struct amdgpu_ring *ring) { - amdgpu_ring_write(ring, PACKETJ(regUVD_JRBC_EXTERNAL_REG_INTERNAL_OFFSET, - 0, 0, PACKETJ_TYPE0)); - amdgpu_ring_write(ring, 0x62a04); /* PCTL0_MMHUB_DEEPSLEEP_IB */ + if (!amdgpu_sriov_vf(ring->adev)) { + amdgpu_ring_write(ring, PACKETJ(regUVD_JRBC_EXTERNAL_REG_INTERNAL_OFFSET, + 0, 0, PACKETJ_TYPE0)); + amdgpu_ring_write(ring, 0x62a04); /* PCTL0_MMHUB_DEEPSLEEP_IB */ + } amdgpu_ring_write(ring, PACKETJ(JRBC_DEC_EXTERNAL_REG_WRITE_ADDR, 0, 0, PACKETJ_TYPE0)); @@ -528,9 +674,11 @@ static void jpeg_v4_0_3_dec_ring_insert_start(struct amdgpu_ring *ring) */ static void jpeg_v4_0_3_dec_ring_insert_end(struct amdgpu_ring *ring) { - amdgpu_ring_write(ring, PACKETJ(regUVD_JRBC_EXTERNAL_REG_INTERNAL_OFFSET, - 0, 0, PACKETJ_TYPE0)); - amdgpu_ring_write(ring, 0x62a04); + if (!amdgpu_sriov_vf(ring->adev)) { + amdgpu_ring_write(ring, PACKETJ(regUVD_JRBC_EXTERNAL_REG_INTERNAL_OFFSET, + 0, 0, PACKETJ_TYPE0)); + amdgpu_ring_write(ring, 0x62a04); + } amdgpu_ring_write(ring, PACKETJ(JRBC_DEC_EXTERNAL_REG_WRITE_ADDR, 0, 0, PACKETJ_TYPE0)); @@ -785,7 +933,7 @@ static int jpeg_v4_0_3_set_clockgating_state(void *handle, enum amd_clockgating_state state) { struct amdgpu_device *adev = (struct amdgpu_device *)handle; - bool enable = (state == AMD_CG_STATE_GATE) ? true : false; + bool enable = state == AMD_CG_STATE_GATE; int i; for (i = 0; i < adev->jpeg.num_jpeg_inst; ++i) { @@ -806,6 +954,11 @@ static int jpeg_v4_0_3_set_powergating_state(void *handle, struct amdgpu_device *adev = (struct amdgpu_device *)handle; int ret; + if (amdgpu_sriov_vf(adev)) { + adev->jpeg.cur_state = AMD_PG_STATE_UNGATE; + return 0; + } + if (state == adev->jpeg.cur_state) return 0; diff --git a/drivers/gpu/drm/amd/amdgpu/jpeg_v4_0_5.c b/drivers/gpu/drm/amd/amdgpu/jpeg_v4_0_5.c new file mode 100644 index 000000000000..6ede85b28cc8 --- /dev/null +++ b/drivers/gpu/drm/amd/amdgpu/jpeg_v4_0_5.c @@ -0,0 +1,623 @@ +/* + * Copyright 2023 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + */ + +#include "amdgpu.h" +#include "amdgpu_jpeg.h" +#include "amdgpu_pm.h" +#include "soc15.h" +#include "soc15d.h" +#include "jpeg_v2_0.h" +#include "jpeg_v4_0_5.h" +#include "mmsch_v4_0.h" + +#include "vcn/vcn_4_0_5_offset.h" +#include "vcn/vcn_4_0_5_sh_mask.h" +#include "ivsrcid/vcn/irqsrcs_vcn_4_0.h" + +#define regUVD_JPEG_PITCH_INTERNAL_OFFSET 0x401f + +static void jpeg_v4_0_5_set_dec_ring_funcs(struct amdgpu_device *adev); +static void jpeg_v4_0_5_set_irq_funcs(struct amdgpu_device *adev); +static int jpeg_v4_0_5_set_powergating_state(void *handle, + enum amd_powergating_state state); + +static void jpeg_v4_0_5_dec_ring_set_wptr(struct amdgpu_ring *ring); + +/** + * jpeg_v4_0_5_early_init - set function pointers + * + * @handle: amdgpu_device pointer + * + * Set ring and irq function pointers + */ +static int jpeg_v4_0_5_early_init(void *handle) +{ + struct amdgpu_device *adev = (struct amdgpu_device *)handle; + + + adev->jpeg.num_jpeg_inst = 1; + adev->jpeg.num_jpeg_rings = 1; + + jpeg_v4_0_5_set_dec_ring_funcs(adev); + jpeg_v4_0_5_set_irq_funcs(adev); + + return 0; +} + +/** + * jpeg_v4_0_5_sw_init - sw init for JPEG block + * + * @handle: amdgpu_device pointer + * + * Load firmware and sw initialization + */ +static int jpeg_v4_0_5_sw_init(void *handle) +{ + struct amdgpu_device *adev = (struct amdgpu_device *)handle; + struct amdgpu_ring *ring; + int r; + + /* JPEG TRAP */ + r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_VCN, + VCN_4_0__SRCID__JPEG_DECODE, &adev->jpeg.inst->irq); + if (r) + return r; + + /* JPEG DJPEG POISON EVENT */ + r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_VCN, + VCN_4_0__SRCID_DJPEG0_POISON, &adev->jpeg.inst->irq); + if (r) + return r; + + /* JPEG EJPEG POISON EVENT */ + r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_VCN, + VCN_4_0__SRCID_EJPEG0_POISON, &adev->jpeg.inst->irq); + if (r) + return r; + + r = amdgpu_jpeg_sw_init(adev); + if (r) + return r; + + r = amdgpu_jpeg_resume(adev); + if (r) + return r; + + ring = adev->jpeg.inst->ring_dec; + ring->use_doorbell = true; + ring->doorbell_index = amdgpu_sriov_vf(adev) ? + (((adev->doorbell_index.vcn.vcn_ring0_1) << 1) + 4) : + ((adev->doorbell_index.vcn.vcn_ring0_1 << 1) + 1); + ring->vm_hub = AMDGPU_MMHUB0(0); + + sprintf(ring->name, "jpeg_dec"); + r = amdgpu_ring_init(adev, ring, 512, &adev->jpeg.inst->irq, 0, + AMDGPU_RING_PRIO_DEFAULT, NULL); + if (r) + return r; + + adev->jpeg.internal.jpeg_pitch[0] = regUVD_JPEG_PITCH_INTERNAL_OFFSET; + adev->jpeg.inst->external.jpeg_pitch[0] = SOC15_REG_OFFSET(JPEG, 0, regUVD_JPEG_PITCH); + + return 0; +} + +/** + * jpeg_v4_0_5_sw_fini - sw fini for JPEG block + * + * @handle: amdgpu_device pointer + * + * JPEG suspend and free up sw allocation + */ +static int jpeg_v4_0_5_sw_fini(void *handle) +{ + struct amdgpu_device *adev = (struct amdgpu_device *)handle; + int r; + + r = amdgpu_jpeg_suspend(adev); + if (r) + return r; + + r = amdgpu_jpeg_sw_fini(adev); + + return r; +} + +/** + * jpeg_v4_0_5_hw_init - start and test JPEG block + * + * @handle: amdgpu_device pointer + * + */ +static int jpeg_v4_0_5_hw_init(void *handle) +{ + struct amdgpu_device *adev = (struct amdgpu_device *)handle; + struct amdgpu_ring *ring = adev->jpeg.inst->ring_dec; + int r; + + r = amdgpu_ring_test_helper(ring); + if (r) + return r; + + DRM_DEV_INFO(adev->dev, "JPEG decode initialized successfully.\n"); + + return 0; +} + +/** + * jpeg_v4_0_5_hw_fini - stop the hardware block + * + * @handle: amdgpu_device pointer + * + * Stop the JPEG block, mark ring as not ready any more + */ +static int jpeg_v4_0_5_hw_fini(void *handle) +{ + struct amdgpu_device *adev = (struct amdgpu_device *)handle; + + cancel_delayed_work_sync(&adev->vcn.idle_work); + if (!amdgpu_sriov_vf(adev)) { + if (adev->jpeg.cur_state != AMD_PG_STATE_GATE && + RREG32_SOC15(JPEG, 0, regUVD_JRBC_STATUS)) + jpeg_v4_0_5_set_powergating_state(adev, AMD_PG_STATE_GATE); + } + amdgpu_irq_put(adev, &adev->jpeg.inst->irq, 0); + + return 0; +} + +/** + * jpeg_v4_0_5_suspend - suspend JPEG block + * + * @handle: amdgpu_device pointer + * + * HW fini and suspend JPEG block + */ +static int jpeg_v4_0_5_suspend(void *handle) +{ + struct amdgpu_device *adev = (struct amdgpu_device *)handle; + int r; + + r = jpeg_v4_0_5_hw_fini(adev); + if (r) + return r; + + r = amdgpu_jpeg_suspend(adev); + + return r; +} + +/** + * jpeg_v4_0_5_resume - resume JPEG block + * + * @handle: amdgpu_device pointer + * + * Resume firmware and hw init JPEG block + */ +static int jpeg_v4_0_5_resume(void *handle) +{ + struct amdgpu_device *adev = (struct amdgpu_device *)handle; + int r; + + r = amdgpu_jpeg_resume(adev); + if (r) + return r; + + r = jpeg_v4_0_5_hw_init(adev); + + return r; +} + +static void jpeg_v4_0_5_disable_clock_gating(struct amdgpu_device *adev) +{ + uint32_t data = 0; + + data = RREG32_SOC15(JPEG, 0, regJPEG_CGC_CTRL); + if (adev->cg_flags & AMD_CG_SUPPORT_JPEG_MGCG) { + data |= 1 << JPEG_CGC_CTRL__DYN_CLOCK_MODE__SHIFT; + data &= (~JPEG_CGC_CTRL__JPEG_DEC_MODE_MASK); + } else { + data &= ~JPEG_CGC_CTRL__DYN_CLOCK_MODE__SHIFT; + } + + data |= 1 << JPEG_CGC_CTRL__CLK_GATE_DLY_TIMER__SHIFT; + data |= 4 << JPEG_CGC_CTRL__CLK_OFF_DELAY__SHIFT; + WREG32_SOC15(JPEG, 0, regJPEG_CGC_CTRL, data); + + data = RREG32_SOC15(JPEG, 0, regJPEG_CGC_GATE); + data &= ~(JPEG_CGC_GATE__JPEG_DEC_MASK + | JPEG_CGC_GATE__JPEG2_DEC_MASK + | JPEG_CGC_GATE__JMCIF_MASK + | JPEG_CGC_GATE__JRBBM_MASK); + WREG32_SOC15(JPEG, 0, regJPEG_CGC_GATE, data); +} + +static void jpeg_v4_0_5_enable_clock_gating(struct amdgpu_device *adev) +{ + uint32_t data = 0; + + data = RREG32_SOC15(JPEG, 0, regJPEG_CGC_CTRL); + if (adev->cg_flags & AMD_CG_SUPPORT_JPEG_MGCG) { + data |= 1 << JPEG_CGC_CTRL__DYN_CLOCK_MODE__SHIFT; + data |= JPEG_CGC_CTRL__JPEG_DEC_MODE_MASK; + } else { + data &= ~JPEG_CGC_CTRL__DYN_CLOCK_MODE__SHIFT; + } + + data |= 1 << JPEG_CGC_CTRL__CLK_GATE_DLY_TIMER__SHIFT; + data |= 4 << JPEG_CGC_CTRL__CLK_OFF_DELAY__SHIFT; + WREG32_SOC15(JPEG, 0, regJPEG_CGC_CTRL, data); + + data = RREG32_SOC15(JPEG, 0, regJPEG_CGC_GATE); + data |= (JPEG_CGC_GATE__JPEG_DEC_MASK + |JPEG_CGC_GATE__JPEG2_DEC_MASK + |JPEG_CGC_GATE__JMCIF_MASK + |JPEG_CGC_GATE__JRBBM_MASK); + WREG32_SOC15(JPEG, 0, regJPEG_CGC_GATE, data); +} + +static int jpeg_v4_0_5_disable_static_power_gating(struct amdgpu_device *adev) +{ + if (adev->pg_flags & AMD_PG_SUPPORT_JPEG) { + WREG32(SOC15_REG_OFFSET(JPEG, 0, regUVD_IPX_DLDO_CONFIG), + 1 << UVD_IPX_DLDO_CONFIG__ONO1_PWR_CONFIG__SHIFT); + SOC15_WAIT_ON_RREG(JPEG, 0, regUVD_IPX_DLDO_STATUS, + 0, UVD_IPX_DLDO_STATUS__ONO1_PWR_STATUS_MASK); + } + + /* disable anti hang mechanism */ + WREG32_P(SOC15_REG_OFFSET(JPEG, 0, regUVD_JPEG_POWER_STATUS), 0, + ~UVD_JPEG_POWER_STATUS__JPEG_POWER_STATUS_MASK); + + /* keep the JPEG in static PG mode */ + WREG32_P(SOC15_REG_OFFSET(JPEG, 0, regUVD_JPEG_POWER_STATUS), 0, + ~UVD_JPEG_POWER_STATUS__JPEG_PG_MODE_MASK); + + return 0; +} + +static int jpeg_v4_0_5_enable_static_power_gating(struct amdgpu_device *adev) +{ + /* enable anti hang mechanism */ + WREG32_P(SOC15_REG_OFFSET(JPEG, 0, regUVD_JPEG_POWER_STATUS), + UVD_JPEG_POWER_STATUS__JPEG_POWER_STATUS_MASK, + ~UVD_JPEG_POWER_STATUS__JPEG_POWER_STATUS_MASK); + + if (adev->pg_flags & AMD_PG_SUPPORT_JPEG) { + WREG32(SOC15_REG_OFFSET(JPEG, 0, regUVD_IPX_DLDO_CONFIG), + 2 << UVD_IPX_DLDO_CONFIG__ONO1_PWR_CONFIG__SHIFT); + SOC15_WAIT_ON_RREG(JPEG, 0, regUVD_IPX_DLDO_STATUS, + 1 << UVD_IPX_DLDO_STATUS__ONO1_PWR_STATUS__SHIFT, + UVD_IPX_DLDO_STATUS__ONO1_PWR_STATUS_MASK); + } + + return 0; +} + +/** + * jpeg_v4_0_5_start - start JPEG block + * + * @adev: amdgpu_device pointer + * + * Setup and start the JPEG block + */ +static int jpeg_v4_0_5_start(struct amdgpu_device *adev) +{ + struct amdgpu_ring *ring = adev->jpeg.inst->ring_dec; + int r; + + if (adev->pm.dpm_enabled) + amdgpu_dpm_enable_jpeg(adev, true); + + /* doorbell programming is done for every playback */ + adev->nbio.funcs->vcn_doorbell_range(adev, ring->use_doorbell, + (adev->doorbell_index.vcn.vcn_ring0_1 << 1), 0); + + WREG32_SOC15(VCN, 0, regVCN_JPEG_DB_CTRL, + ring->doorbell_index << VCN_JPEG_DB_CTRL__OFFSET__SHIFT | + VCN_JPEG_DB_CTRL__EN_MASK); + + /* disable power gating */ + r = jpeg_v4_0_5_disable_static_power_gating(adev); + if (r) + return r; + + /* JPEG disable CGC */ + jpeg_v4_0_5_disable_clock_gating(adev); + + /* MJPEG global tiling registers */ + WREG32_SOC15(JPEG, 0, regJPEG_DEC_GFX10_ADDR_CONFIG, + adev->gfx.config.gb_addr_config); + + + /* enable JMI channel */ + WREG32_P(SOC15_REG_OFFSET(JPEG, 0, regUVD_JMI_CNTL), 0, + ~UVD_JMI_CNTL__SOFT_RESET_MASK); + + /* enable System Interrupt for JRBC */ + WREG32_P(SOC15_REG_OFFSET(JPEG, 0, regJPEG_SYS_INT_EN), + JPEG_SYS_INT_EN__DJRBC_MASK, + ~JPEG_SYS_INT_EN__DJRBC_MASK); + + WREG32_SOC15(JPEG, 0, regUVD_LMI_JRBC_RB_VMID, 0); + WREG32_SOC15(JPEG, 0, regUVD_JRBC_RB_CNTL, (0x00000001L | 0x00000002L)); + WREG32_SOC15(JPEG, 0, regUVD_LMI_JRBC_RB_64BIT_BAR_LOW, + lower_32_bits(ring->gpu_addr)); + WREG32_SOC15(JPEG, 0, regUVD_LMI_JRBC_RB_64BIT_BAR_HIGH, + upper_32_bits(ring->gpu_addr)); + WREG32_SOC15(JPEG, 0, regUVD_JRBC_RB_RPTR, 0); + WREG32_SOC15(JPEG, 0, regUVD_JRBC_RB_WPTR, 0); + WREG32_SOC15(JPEG, 0, regUVD_JRBC_RB_CNTL, 0x00000002L); + WREG32_SOC15(JPEG, 0, regUVD_JRBC_RB_SIZE, ring->ring_size / 4); + ring->wptr = RREG32_SOC15(JPEG, 0, regUVD_JRBC_RB_WPTR); + + return 0; +} + +/** + * jpeg_v4_0_5_stop - stop JPEG block + * + * @adev: amdgpu_device pointer + * + * stop the JPEG block + */ +static int jpeg_v4_0_5_stop(struct amdgpu_device *adev) +{ + int r; + + /* reset JMI */ + WREG32_P(SOC15_REG_OFFSET(JPEG, 0, regUVD_JMI_CNTL), + UVD_JMI_CNTL__SOFT_RESET_MASK, + ~UVD_JMI_CNTL__SOFT_RESET_MASK); + + jpeg_v4_0_5_enable_clock_gating(adev); + + /* enable power gating */ + r = jpeg_v4_0_5_enable_static_power_gating(adev); + if (r) + return r; + + if (adev->pm.dpm_enabled) + amdgpu_dpm_enable_jpeg(adev, false); + + return 0; +} + +/** + * jpeg_v4_0_5_dec_ring_get_rptr - get read pointer + * + * @ring: amdgpu_ring pointer + * + * Returns the current hardware read pointer + */ +static uint64_t jpeg_v4_0_5_dec_ring_get_rptr(struct amdgpu_ring *ring) +{ + struct amdgpu_device *adev = ring->adev; + + return RREG32_SOC15(JPEG, 0, regUVD_JRBC_RB_RPTR); +} + +/** + * jpeg_v4_0_5_dec_ring_get_wptr - get write pointer + * + * @ring: amdgpu_ring pointer + * + * Returns the current hardware write pointer + */ +static uint64_t jpeg_v4_0_5_dec_ring_get_wptr(struct amdgpu_ring *ring) +{ + struct amdgpu_device *adev = ring->adev; + + if (ring->use_doorbell) + return *ring->wptr_cpu_addr; + else + return RREG32_SOC15(JPEG, 0, regUVD_JRBC_RB_WPTR); +} + +/** + * jpeg_v4_0_5_dec_ring_set_wptr - set write pointer + * + * @ring: amdgpu_ring pointer + * + * Commits the write pointer to the hardware + */ +static void jpeg_v4_0_5_dec_ring_set_wptr(struct amdgpu_ring *ring) +{ + struct amdgpu_device *adev = ring->adev; + + if (ring->use_doorbell) { + *ring->wptr_cpu_addr = lower_32_bits(ring->wptr); + WDOORBELL32(ring->doorbell_index, lower_32_bits(ring->wptr)); + } else { + WREG32_SOC15(JPEG, 0, regUVD_JRBC_RB_WPTR, lower_32_bits(ring->wptr)); + } +} + +static bool jpeg_v4_0_5_is_idle(void *handle) +{ + struct amdgpu_device *adev = (struct amdgpu_device *)handle; + int ret = 1; + + ret &= (((RREG32_SOC15(JPEG, 0, regUVD_JRBC_STATUS) & + UVD_JRBC_STATUS__RB_JOB_DONE_MASK) == + UVD_JRBC_STATUS__RB_JOB_DONE_MASK)); + + return ret; +} + +static int jpeg_v4_0_5_wait_for_idle(void *handle) +{ + struct amdgpu_device *adev = (struct amdgpu_device *)handle; + + return SOC15_WAIT_ON_RREG(JPEG, 0, regUVD_JRBC_STATUS, + UVD_JRBC_STATUS__RB_JOB_DONE_MASK, + UVD_JRBC_STATUS__RB_JOB_DONE_MASK); +} + +static int jpeg_v4_0_5_set_clockgating_state(void *handle, + enum amd_clockgating_state state) +{ + struct amdgpu_device *adev = (struct amdgpu_device *)handle; + bool enable = (state == AMD_CG_STATE_GATE) ? true : false; + + if (enable) { + if (!jpeg_v4_0_5_is_idle(handle)) + return -EBUSY; + jpeg_v4_0_5_enable_clock_gating(adev); + } else { + jpeg_v4_0_5_disable_clock_gating(adev); + } + + return 0; +} + +static int jpeg_v4_0_5_set_powergating_state(void *handle, + enum amd_powergating_state state) +{ + struct amdgpu_device *adev = (struct amdgpu_device *)handle; + int ret; + + if (amdgpu_sriov_vf(adev)) { + adev->jpeg.cur_state = AMD_PG_STATE_UNGATE; + return 0; + } + + if (state == adev->jpeg.cur_state) + return 0; + + if (state == AMD_PG_STATE_GATE) + ret = jpeg_v4_0_5_stop(adev); + else + ret = jpeg_v4_0_5_start(adev); + + if (!ret) + adev->jpeg.cur_state = state; + + return ret; +} + +static int jpeg_v4_0_5_set_interrupt_state(struct amdgpu_device *adev, + struct amdgpu_irq_src *source, + unsigned type, + enum amdgpu_interrupt_state state) +{ + return 0; +} + +static int jpeg_v4_0_5_process_interrupt(struct amdgpu_device *adev, + struct amdgpu_irq_src *source, + struct amdgpu_iv_entry *entry) +{ + DRM_DEBUG("IH: JPEG TRAP\n"); + + switch (entry->src_id) { + case VCN_4_0__SRCID__JPEG_DECODE: + amdgpu_fence_process(adev->jpeg.inst->ring_dec); + break; + case VCN_4_0__SRCID_DJPEG0_POISON: + case VCN_4_0__SRCID_EJPEG0_POISON: + amdgpu_jpeg_process_poison_irq(adev, source, entry); + break; + default: + DRM_DEV_ERROR(adev->dev, "Unhandled interrupt: %d %d\n", + entry->src_id, entry->src_data[0]); + break; + } + + return 0; +} + +static const struct amd_ip_funcs jpeg_v4_0_5_ip_funcs = { + .name = "jpeg_v4_0_5", + .early_init = jpeg_v4_0_5_early_init, + .late_init = NULL, + .sw_init = jpeg_v4_0_5_sw_init, + .sw_fini = jpeg_v4_0_5_sw_fini, + .hw_init = jpeg_v4_0_5_hw_init, + .hw_fini = jpeg_v4_0_5_hw_fini, + .suspend = jpeg_v4_0_5_suspend, + .resume = jpeg_v4_0_5_resume, + .is_idle = jpeg_v4_0_5_is_idle, + .wait_for_idle = jpeg_v4_0_5_wait_for_idle, + .check_soft_reset = NULL, + .pre_soft_reset = NULL, + .soft_reset = NULL, + .post_soft_reset = NULL, + .set_clockgating_state = jpeg_v4_0_5_set_clockgating_state, + .set_powergating_state = jpeg_v4_0_5_set_powergating_state, +}; + +static const struct amdgpu_ring_funcs jpeg_v4_0_5_dec_ring_vm_funcs = { + .type = AMDGPU_RING_TYPE_VCN_JPEG, + .align_mask = 0xf, + .get_rptr = jpeg_v4_0_5_dec_ring_get_rptr, + .get_wptr = jpeg_v4_0_5_dec_ring_get_wptr, + .set_wptr = jpeg_v4_0_5_dec_ring_set_wptr, + .emit_frame_size = + SOC15_FLUSH_GPU_TLB_NUM_WREG * 6 + + SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 8 + + 8 + /* jpeg_v4_0_5_dec_ring_emit_vm_flush */ + 18 + 18 + /* jpeg_v4_0_5_dec_ring_emit_fence x2 vm fence */ + 8 + 16, + .emit_ib_size = 22, /* jpeg_v4_0_5_dec_ring_emit_ib */ + .emit_ib = jpeg_v2_0_dec_ring_emit_ib, + .emit_fence = jpeg_v2_0_dec_ring_emit_fence, + .emit_vm_flush = jpeg_v2_0_dec_ring_emit_vm_flush, + .test_ring = amdgpu_jpeg_dec_ring_test_ring, + .test_ib = amdgpu_jpeg_dec_ring_test_ib, + .insert_nop = jpeg_v2_0_dec_ring_nop, + .insert_start = jpeg_v2_0_dec_ring_insert_start, + .insert_end = jpeg_v2_0_dec_ring_insert_end, + .pad_ib = amdgpu_ring_generic_pad_ib, + .begin_use = amdgpu_jpeg_ring_begin_use, + .end_use = amdgpu_jpeg_ring_end_use, + .emit_wreg = jpeg_v2_0_dec_ring_emit_wreg, + .emit_reg_wait = jpeg_v2_0_dec_ring_emit_reg_wait, + .emit_reg_write_reg_wait = amdgpu_ring_emit_reg_write_reg_wait_helper, +}; + +static void jpeg_v4_0_5_set_dec_ring_funcs(struct amdgpu_device *adev) +{ + adev->jpeg.inst->ring_dec->funcs = &jpeg_v4_0_5_dec_ring_vm_funcs; + DRM_DEV_INFO(adev->dev, "JPEG decode is enabled in VM mode\n"); +} + +static const struct amdgpu_irq_src_funcs jpeg_v4_0_5_irq_funcs = { + .set = jpeg_v4_0_5_set_interrupt_state, + .process = jpeg_v4_0_5_process_interrupt, +}; + +static void jpeg_v4_0_5_set_irq_funcs(struct amdgpu_device *adev) +{ + adev->jpeg.inst->irq.num_types = 1; + adev->jpeg.inst->irq.funcs = &jpeg_v4_0_5_irq_funcs; +} + +const struct amdgpu_ip_block_version jpeg_v4_0_5_ip_block = { + .type = AMD_IP_BLOCK_TYPE_JPEG, + .major = 4, + .minor = 0, + .rev = 5, + .funcs = &jpeg_v4_0_5_ip_funcs, +}; + diff --git a/drivers/gpu/drm/amd/amdgpu/jpeg_v4_0_5.h b/drivers/gpu/drm/amd/amdgpu/jpeg_v4_0_5.h new file mode 100644 index 000000000000..c5eee572079c --- /dev/null +++ b/drivers/gpu/drm/amd/amdgpu/jpeg_v4_0_5.h @@ -0,0 +1,35 @@ +/* + * Copyright 2023 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + */ + +#ifndef __JPEG_V4_0_5_H__ +#define __JPEG_V4_0_5_H__ + +enum amdgpu_jpeg_v4_0_5_sub_block { + AMDGPU_JPEG_V4_0_5_JPEG0 = 0, + + AMDGPU_JPEG_V4_0_5_MAX_SUB_BLOCK, +}; + +extern const struct amdgpu_ip_block_version jpeg_v4_0_5_ip_block; + +#endif /* __JPEG_V4_0_H__ */ diff --git a/drivers/gpu/drm/amd/amdgpu/mes_v10_1.c b/drivers/gpu/drm/amd/amdgpu/mes_v10_1.c index 36a123e6c8ee..1e5ad1e08d2a 100644 --- a/drivers/gpu/drm/amd/amdgpu/mes_v10_1.c +++ b/drivers/gpu/drm/amd/amdgpu/mes_v10_1.c @@ -558,7 +558,7 @@ static int mes_v10_1_load_microcode(struct amdgpu_device *adev, WREG32_SOC15(GC, 0, mmCP_MES_MDBOUND_LO, 0x3FFFF); /* invalidate ICACHE */ - switch (adev->ip_versions[GC_HWIP][0]) { + switch (amdgpu_ip_version(adev, GC_HWIP, 0)) { case IP_VERSION(10, 3, 0): data = RREG32_SOC15(GC, 0, mmCP_MES_IC_OP_CNTL_Sienna_Cichlid); break; @@ -568,7 +568,7 @@ static int mes_v10_1_load_microcode(struct amdgpu_device *adev, } data = REG_SET_FIELD(data, CP_MES_IC_OP_CNTL, PRIME_ICACHE, 0); data = REG_SET_FIELD(data, CP_MES_IC_OP_CNTL, INVALIDATE_CACHE, 1); - switch (adev->ip_versions[GC_HWIP][0]) { + switch (amdgpu_ip_version(adev, GC_HWIP, 0)) { case IP_VERSION(10, 3, 0): WREG32_SOC15(GC, 0, mmCP_MES_IC_OP_CNTL_Sienna_Cichlid, data); break; @@ -578,7 +578,7 @@ static int mes_v10_1_load_microcode(struct amdgpu_device *adev, } /* prime the ICACHE. */ - switch (adev->ip_versions[GC_HWIP][0]) { + switch (amdgpu_ip_version(adev, GC_HWIP, 0)) { case IP_VERSION(10, 3, 0): data = RREG32_SOC15(GC, 0, mmCP_MES_IC_OP_CNTL_Sienna_Cichlid); break; @@ -587,7 +587,7 @@ static int mes_v10_1_load_microcode(struct amdgpu_device *adev, break; } data = REG_SET_FIELD(data, CP_MES_IC_OP_CNTL, PRIME_ICACHE, 1); - switch (adev->ip_versions[GC_HWIP][0]) { + switch (amdgpu_ip_version(adev, GC_HWIP, 0)) { case IP_VERSION(10, 3, 0): WREG32_SOC15(GC, 0, mmCP_MES_IC_OP_CNTL_Sienna_Cichlid, data); break; @@ -909,10 +909,12 @@ static int mes_v10_1_mqd_sw_init(struct amdgpu_device *adev, /* prepare MQD backup */ adev->mes.mqd_backup[pipe] = kmalloc(mqd_size, GFP_KERNEL); - if (!adev->mes.mqd_backup[pipe]) + if (!adev->mes.mqd_backup[pipe]) { dev_warn(adev->dev, "no memory to create MQD backup for ring %s\n", ring->name); + return -ENOMEM; + } return 0; } @@ -993,7 +995,7 @@ static void mes_v10_1_kiq_setting(struct amdgpu_ring *ring) struct amdgpu_device *adev = ring->adev; /* tell RLC which is KIQ queue */ - switch (adev->ip_versions[GC_HWIP][0]) { + switch (amdgpu_ip_version(adev, GC_HWIP, 0)) { case IP_VERSION(10, 3, 0): case IP_VERSION(10, 3, 2): case IP_VERSION(10, 3, 1): diff --git a/drivers/gpu/drm/amd/amdgpu/mes_v11_0.c b/drivers/gpu/drm/amd/amdgpu/mes_v11_0.c index 1bdaa00c0b46..4dfec56e1b7f 100644 --- a/drivers/gpu/drm/amd/amdgpu/mes_v11_0.c +++ b/drivers/gpu/drm/amd/amdgpu/mes_v11_0.c @@ -47,6 +47,9 @@ MODULE_FIRMWARE("amdgpu/gc_11_0_3_mes1.bin"); MODULE_FIRMWARE("amdgpu/gc_11_0_4_mes.bin"); MODULE_FIRMWARE("amdgpu/gc_11_0_4_mes_2.bin"); MODULE_FIRMWARE("amdgpu/gc_11_0_4_mes1.bin"); +MODULE_FIRMWARE("amdgpu/gc_11_5_0_mes_2.bin"); +MODULE_FIRMWARE("amdgpu/gc_11_5_0_mes1.bin"); + static int mes_v11_0_hw_fini(void *handle); static int mes_v11_0_kiq_hw_init(struct amdgpu_device *adev); @@ -210,9 +213,7 @@ static int mes_v11_0_add_hw_queue(struct amdgpu_mes *mes, mes_add_queue_pkt.is_aql_queue = input->is_aql_queue; mes_add_queue_pkt.gds_size = input->queue_size; - /* For KFD, gds_size is re-used for queue size (needed in MES for AQL queues) */ - mes_add_queue_pkt.is_aql_queue = input->is_aql_queue; - mes_add_queue_pkt.gds_size = input->queue_size; + mes_add_queue_pkt.exclusively_scheduled = input->exclusively_scheduled; return mes_v11_0_submit_pkt_and_poll_completion(mes, &mes_add_queue_pkt, sizeof(mes_add_queue_pkt), @@ -405,6 +406,7 @@ static int mes_v11_0_set_hw_resources(struct amdgpu_mes *mes) mes_set_hw_res_pkt.disable_mes_log = 1; mes_set_hw_res_pkt.use_different_vmid_compute = 1; mes_set_hw_res_pkt.enable_reg_active_poll = 1; + mes_set_hw_res_pkt.enable_level_process_quantum_check = 1; mes_set_hw_res_pkt.oversubscription_timer = 50; return mes_v11_0_submit_pkt_and_poll_completion(mes, @@ -412,60 +414,6 @@ static int mes_v11_0_set_hw_resources(struct amdgpu_mes *mes) offsetof(union MESAPI_SET_HW_RESOURCES, api_status)); } -static void mes_v11_0_init_aggregated_doorbell(struct amdgpu_mes *mes) -{ - struct amdgpu_device *adev = mes->adev; - uint32_t data; - - data = RREG32_SOC15(GC, 0, regCP_MES_DOORBELL_CONTROL1); - data &= ~(CP_MES_DOORBELL_CONTROL1__DOORBELL_OFFSET_MASK | - CP_MES_DOORBELL_CONTROL1__DOORBELL_EN_MASK | - CP_MES_DOORBELL_CONTROL1__DOORBELL_HIT_MASK); - data |= mes->aggregated_doorbells[AMDGPU_MES_PRIORITY_LEVEL_LOW] << - CP_MES_DOORBELL_CONTROL1__DOORBELL_OFFSET__SHIFT; - data |= 1 << CP_MES_DOORBELL_CONTROL1__DOORBELL_EN__SHIFT; - WREG32_SOC15(GC, 0, regCP_MES_DOORBELL_CONTROL1, data); - - data = RREG32_SOC15(GC, 0, regCP_MES_DOORBELL_CONTROL2); - data &= ~(CP_MES_DOORBELL_CONTROL2__DOORBELL_OFFSET_MASK | - CP_MES_DOORBELL_CONTROL2__DOORBELL_EN_MASK | - CP_MES_DOORBELL_CONTROL2__DOORBELL_HIT_MASK); - data |= mes->aggregated_doorbells[AMDGPU_MES_PRIORITY_LEVEL_NORMAL] << - CP_MES_DOORBELL_CONTROL2__DOORBELL_OFFSET__SHIFT; - data |= 1 << CP_MES_DOORBELL_CONTROL2__DOORBELL_EN__SHIFT; - WREG32_SOC15(GC, 0, regCP_MES_DOORBELL_CONTROL2, data); - - data = RREG32_SOC15(GC, 0, regCP_MES_DOORBELL_CONTROL3); - data &= ~(CP_MES_DOORBELL_CONTROL3__DOORBELL_OFFSET_MASK | - CP_MES_DOORBELL_CONTROL3__DOORBELL_EN_MASK | - CP_MES_DOORBELL_CONTROL3__DOORBELL_HIT_MASK); - data |= mes->aggregated_doorbells[AMDGPU_MES_PRIORITY_LEVEL_MEDIUM] << - CP_MES_DOORBELL_CONTROL3__DOORBELL_OFFSET__SHIFT; - data |= 1 << CP_MES_DOORBELL_CONTROL3__DOORBELL_EN__SHIFT; - WREG32_SOC15(GC, 0, regCP_MES_DOORBELL_CONTROL3, data); - - data = RREG32_SOC15(GC, 0, regCP_MES_DOORBELL_CONTROL4); - data &= ~(CP_MES_DOORBELL_CONTROL4__DOORBELL_OFFSET_MASK | - CP_MES_DOORBELL_CONTROL4__DOORBELL_EN_MASK | - CP_MES_DOORBELL_CONTROL4__DOORBELL_HIT_MASK); - data |= mes->aggregated_doorbells[AMDGPU_MES_PRIORITY_LEVEL_HIGH] << - CP_MES_DOORBELL_CONTROL4__DOORBELL_OFFSET__SHIFT; - data |= 1 << CP_MES_DOORBELL_CONTROL4__DOORBELL_EN__SHIFT; - WREG32_SOC15(GC, 0, regCP_MES_DOORBELL_CONTROL4, data); - - data = RREG32_SOC15(GC, 0, regCP_MES_DOORBELL_CONTROL5); - data &= ~(CP_MES_DOORBELL_CONTROL5__DOORBELL_OFFSET_MASK | - CP_MES_DOORBELL_CONTROL5__DOORBELL_EN_MASK | - CP_MES_DOORBELL_CONTROL5__DOORBELL_HIT_MASK); - data |= mes->aggregated_doorbells[AMDGPU_MES_PRIORITY_LEVEL_REALTIME] << - CP_MES_DOORBELL_CONTROL5__DOORBELL_OFFSET__SHIFT; - data |= 1 << CP_MES_DOORBELL_CONTROL5__DOORBELL_EN__SHIFT; - WREG32_SOC15(GC, 0, regCP_MES_DOORBELL_CONTROL5, data); - - data = 1 << CP_HQD_GFX_CONTROL__DB_UPDATED_MSG_EN__SHIFT; - WREG32_SOC15(GC, 0, regCP_HQD_GFX_CONTROL, data); -} - static const struct amdgpu_mes_funcs mes_v11_0_funcs = { .add_hw_queue = mes_v11_0_add_hw_queue, .remove_hw_queue = mes_v11_0_remove_hw_queue, @@ -790,8 +738,7 @@ static int mes_v11_0_mqd_init(struct amdgpu_ring *ring) DOORBELL_SOURCE, 0); tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL, DOORBELL_HIT, 0); - } - else + } else tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL, DOORBELL_EN, 0); mqd->cp_hqd_pq_doorbell_control = tmp; @@ -1019,10 +966,12 @@ static int mes_v11_0_mqd_sw_init(struct amdgpu_device *adev, /* prepare MQD backup */ adev->mes.mqd_backup[pipe] = kmalloc(mqd_size, GFP_KERNEL); - if (!adev->mes.mqd_backup[pipe]) + if (!adev->mes.mqd_backup[pipe]) { dev_warn(adev->dev, "no memory to create MQD backup for ring %s\n", ring->name); + return -ENOMEM; + } return 0; } @@ -1240,8 +1189,6 @@ static int mes_v11_0_hw_init(void *handle) if (r) goto failure; - mes_v11_0_init_aggregated_doorbell(&adev->mes); - r = mes_v11_0_query_sched_status(&adev->mes); if (r) { DRM_ERROR("MES is busy\n"); @@ -1314,7 +1261,7 @@ static int mes_v11_0_late_init(void *handle) /* it's only intended for use in mes_self_test case, not for s0ix and reset */ if (!amdgpu_in_reset(adev) && !adev->in_s0ix && !adev->in_suspend && - (adev->ip_versions[GC_HWIP][0] != IP_VERSION(11, 0, 3))) + (amdgpu_ip_version(adev, GC_HWIP, 0) != IP_VERSION(11, 0, 3))) amdgpu_mes_self_test(adev); return 0; diff --git a/drivers/gpu/drm/amd/amdgpu/mmhub_v1_0.c b/drivers/gpu/drm/amd/amdgpu/mmhub_v1_0.c index fb91b31056ca..843219a91736 100644 --- a/drivers/gpu/drm/amd/amdgpu/mmhub_v1_0.c +++ b/drivers/gpu/drm/amd/amdgpu/mmhub_v1_0.c @@ -242,7 +242,7 @@ static void mmhub_v1_0_setup_vmid_config(struct amdgpu_device *adev) block_size -= 9; for (i = 0; i <= 14; i++) { - tmp = RREG32_SOC15_OFFSET(MMHUB, 0, mmVM_CONTEXT1_CNTL, i); + tmp = RREG32_SOC15_OFFSET(MMHUB, 0, mmVM_CONTEXT1_CNTL, i * hub->ctx_distance); tmp = REG_SET_FIELD(tmp, VM_CONTEXT1_CNTL, ENABLE_CONTEXT, 1); tmp = REG_SET_FIELD(tmp, VM_CONTEXT1_CNTL, PAGE_TABLE_DEPTH, num_level); diff --git a/drivers/gpu/drm/amd/amdgpu/mmhub_v1_7.c b/drivers/gpu/drm/amd/amdgpu/mmhub_v1_7.c index 9086f2fdfaf4..92432cd2c0c7 100644 --- a/drivers/gpu/drm/amd/amdgpu/mmhub_v1_7.c +++ b/drivers/gpu/drm/amd/amdgpu/mmhub_v1_7.c @@ -274,7 +274,7 @@ static void mmhub_v1_7_setup_vmid_config(struct amdgpu_device *adev) block_size -= 9; for (i = 0; i <= 14; i++) { - tmp = RREG32_SOC15_OFFSET(MMHUB, 0, regVM_CONTEXT1_CNTL, i); + tmp = RREG32_SOC15_OFFSET(MMHUB, 0, regVM_CONTEXT1_CNTL, i * hub->ctx_distance); tmp = REG_SET_FIELD(tmp, VM_CONTEXT1_CNTL, ENABLE_CONTEXT, 1); tmp = REG_SET_FIELD(tmp, VM_CONTEXT1_CNTL, PAGE_TABLE_DEPTH, num_level); diff --git a/drivers/gpu/drm/amd/amdgpu/mmhub_v1_8.c b/drivers/gpu/drm/amd/amdgpu/mmhub_v1_8.c index 5e8b493f8699..9b0146732e13 100644 --- a/drivers/gpu/drm/amd/amdgpu/mmhub_v1_8.c +++ b/drivers/gpu/drm/amd/amdgpu/mmhub_v1_8.c @@ -130,6 +130,9 @@ static void mmhub_v1_8_init_system_aperture_regs(struct amdgpu_device *adev) uint64_t value; int i; + if (amdgpu_sriov_vf(adev)) + return; + inst_mask = adev->aid_mask; for_each_inst(i, inst_mask) { /* Program the AGP BAR */ @@ -139,9 +142,6 @@ static void mmhub_v1_8_init_system_aperture_regs(struct amdgpu_device *adev) WREG32_SOC15(MMHUB, i, regMC_VM_AGP_TOP, adev->gmc.agp_end >> 24); - if (amdgpu_sriov_vf(adev)) - return; - /* Program the system aperture low logical page number. */ WREG32_SOC15(MMHUB, i, regMC_VM_SYSTEM_APERTURE_LOW_ADDR, min(adev->gmc.fb_start, adev->gmc.agp_start) >> 18); @@ -344,7 +344,7 @@ static void mmhub_v1_8_setup_vmid_config(struct amdgpu_device *adev) hub = &adev->vmhub[AMDGPU_MMHUB0(j)]; for (i = 0; i <= 14; i++) { tmp = RREG32_SOC15_OFFSET(MMHUB, j, regVM_CONTEXT1_CNTL, - i); + i * hub->ctx_distance); tmp = REG_SET_FIELD(tmp, VM_CONTEXT1_CNTL, ENABLE_CONTEXT, 1); tmp = REG_SET_FIELD(tmp, VM_CONTEXT1_CNTL, @@ -413,18 +413,6 @@ static void mmhub_v1_8_program_invalidation(struct amdgpu_device *adev) static int mmhub_v1_8_gart_enable(struct amdgpu_device *adev) { - if (amdgpu_sriov_vf(adev)) { - /* - * MC_VM_FB_LOCATION_BASE/TOP is NULL for VF, becuase they are - * VF copy registers so vbios post doesn't program them, for - * SRIOV driver need to program them - */ - WREG32_SOC15(MMHUB, 0, regMC_VM_FB_LOCATION_BASE, - adev->gmc.vram_start >> 24); - WREG32_SOC15(MMHUB, 0, regMC_VM_FB_LOCATION_TOP, - adev->gmc.vram_end >> 24); - } - /* GART Enable. */ mmhub_v1_8_init_gart_aperture_regs(adev); mmhub_v1_8_init_system_aperture_regs(adev); @@ -638,6 +626,14 @@ static void mmhub_v1_8_inst_query_ras_error_count(struct amdgpu_device *adev, void *ras_err_status) { struct ras_err_data *err_data = (struct ras_err_data *)ras_err_status; + unsigned long ue_count = 0, ce_count = 0; + + /* NOTE: mmhub is converted by aid_mask and the range is 0-3, + * which can be used as die ID directly */ + struct amdgpu_smuio_mcm_config_info mcm_info = { + .socket_id = adev->smuio.funcs->get_socket_id(adev), + .die_id = mmhub_inst, + }; amdgpu_ras_inst_query_ras_error_count(adev, mmhub_v1_8_ce_reg_list, @@ -646,7 +642,7 @@ static void mmhub_v1_8_inst_query_ras_error_count(struct amdgpu_device *adev, ARRAY_SIZE(mmhub_v1_8_ras_memory_list), mmhub_inst, AMDGPU_RAS_ERROR__SINGLE_CORRECTABLE, - &err_data->ce_count); + &ce_count); amdgpu_ras_inst_query_ras_error_count(adev, mmhub_v1_8_ue_reg_list, ARRAY_SIZE(mmhub_v1_8_ue_reg_list), @@ -654,7 +650,10 @@ static void mmhub_v1_8_inst_query_ras_error_count(struct amdgpu_device *adev, ARRAY_SIZE(mmhub_v1_8_ras_memory_list), mmhub_inst, AMDGPU_RAS_ERROR__MULTI_UNCORRECTABLE, - &err_data->ue_count); + &ue_count); + + amdgpu_ras_error_statistic_ce_count(err_data, &mcm_info, ce_count); + amdgpu_ras_error_statistic_ue_count(err_data, &mcm_info, ue_count); } static void mmhub_v1_8_query_ras_error_count(struct amdgpu_device *adev, @@ -701,152 +700,9 @@ static void mmhub_v1_8_reset_ras_error_count(struct amdgpu_device *adev) mmhub_v1_8_inst_reset_ras_error_count(adev, i); } -static const u32 mmhub_v1_8_mmea_err_status_reg[] __maybe_unused = { - regMMEA0_ERR_STATUS, - regMMEA1_ERR_STATUS, - regMMEA2_ERR_STATUS, - regMMEA3_ERR_STATUS, - regMMEA4_ERR_STATUS, -}; - -static void mmhub_v1_8_inst_query_ras_err_status(struct amdgpu_device *adev, - uint32_t mmhub_inst) -{ - uint32_t reg_value; - uint32_t mmea_err_status_addr_dist; - uint32_t i; - - /* query mmea ras err status */ - mmea_err_status_addr_dist = regMMEA1_ERR_STATUS - regMMEA0_ERR_STATUS; - for (i = 0; i < ARRAY_SIZE(mmhub_v1_8_mmea_err_status_reg); i++) { - reg_value = RREG32_SOC15_OFFSET(MMHUB, mmhub_inst, - regMMEA0_ERR_STATUS, - i * mmea_err_status_addr_dist); - if (REG_GET_FIELD(reg_value, MMEA0_ERR_STATUS, SDP_RDRSP_STATUS) || - REG_GET_FIELD(reg_value, MMEA0_ERR_STATUS, SDP_WRRSP_STATUS) || - REG_GET_FIELD(reg_value, MMEA0_ERR_STATUS, SDP_RDRSP_DATAPARITY_ERROR)) { - dev_warn(adev->dev, - "Detected MMEA%d err in MMHUB%d, status: 0x%x\n", - i, mmhub_inst, reg_value); - } - } - - /* query mm_cane ras err status */ - reg_value = RREG32_SOC15(MMHUB, mmhub_inst, regMM_CANE_ERR_STATUS); - if (REG_GET_FIELD(reg_value, MM_CANE_ERR_STATUS, SDPM_RDRSP_STATUS) || - REG_GET_FIELD(reg_value, MM_CANE_ERR_STATUS, SDPM_WRRSP_STATUS) || - REG_GET_FIELD(reg_value, MM_CANE_ERR_STATUS, SDPM_RDRSP_DATAPARITY_ERROR)) { - dev_warn(adev->dev, - "Detected MM CANE err in MMHUB%d, status: 0x%x\n", - mmhub_inst, reg_value); - } -} - -static void mmhub_v1_8_query_ras_error_status(struct amdgpu_device *adev) -{ - uint32_t inst_mask; - uint32_t i; - - if (!amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__MMHUB)) { - dev_warn(adev->dev, "MMHUB RAS is not supported\n"); - return; - } - - inst_mask = adev->aid_mask; - for_each_inst(i, inst_mask) - mmhub_v1_8_inst_query_ras_err_status(adev, i); -} - -static void mmhub_v1_8_inst_reset_ras_err_status(struct amdgpu_device *adev, - uint32_t mmhub_inst) -{ - uint32_t mmea_cgtt_clk_cntl_addr_dist; - uint32_t mmea_err_status_addr_dist; - uint32_t reg_value; - uint32_t i; - - /* reset mmea ras err status */ - mmea_cgtt_clk_cntl_addr_dist = regMMEA1_CGTT_CLK_CTRL - regMMEA0_CGTT_CLK_CTRL; - mmea_err_status_addr_dist = regMMEA1_ERR_STATUS - regMMEA0_ERR_STATUS; - for (i = 0; i < ARRAY_SIZE(mmhub_v1_8_mmea_err_status_reg); i++) { - /* force clk branch on for response path - * set MMEA0_CGTT_CLK_CTRL.SOFT_OVERRIDE_RETURN = 1 - */ - reg_value = RREG32_SOC15_OFFSET(MMHUB, mmhub_inst, - regMMEA0_CGTT_CLK_CTRL, - i * mmea_cgtt_clk_cntl_addr_dist); - reg_value = REG_SET_FIELD(reg_value, MMEA0_CGTT_CLK_CTRL, - SOFT_OVERRIDE_RETURN, 1); - WREG32_SOC15_OFFSET(MMHUB, mmhub_inst, - regMMEA0_CGTT_CLK_CTRL, - i * mmea_cgtt_clk_cntl_addr_dist, - reg_value); - - /* set MMEA0_ERR_STATUS.CLEAR_ERROR_STATUS = 1 */ - reg_value = RREG32_SOC15_OFFSET(MMHUB, mmhub_inst, - regMMEA0_ERR_STATUS, - i * mmea_err_status_addr_dist); - reg_value = REG_SET_FIELD(reg_value, MMEA0_ERR_STATUS, - CLEAR_ERROR_STATUS, 1); - WREG32_SOC15_OFFSET(MMHUB, mmhub_inst, - regMMEA0_ERR_STATUS, - i * mmea_err_status_addr_dist, - reg_value); - - /* set MMEA0_CGTT_CLK_CTRL.SOFT_OVERRIDE_RETURN = 0 */ - reg_value = RREG32_SOC15_OFFSET(MMHUB, mmhub_inst, - regMMEA0_CGTT_CLK_CTRL, - i * mmea_cgtt_clk_cntl_addr_dist); - reg_value = REG_SET_FIELD(reg_value, MMEA0_CGTT_CLK_CTRL, - SOFT_OVERRIDE_RETURN, 0); - WREG32_SOC15_OFFSET(MMHUB, mmhub_inst, - regMMEA0_CGTT_CLK_CTRL, - i * mmea_cgtt_clk_cntl_addr_dist, - reg_value); - } - - /* reset mm_cane ras err status - * force clk branch on for response path - * set MM_CANE_ICG_CTRL.SOFT_OVERRIDE_ATRET = 1 - */ - reg_value = RREG32_SOC15(MMHUB, mmhub_inst, regMM_CANE_ICG_CTRL); - reg_value = REG_SET_FIELD(reg_value, MM_CANE_ICG_CTRL, - SOFT_OVERRIDE_ATRET, 1); - WREG32_SOC15(MMHUB, mmhub_inst, regMM_CANE_ICG_CTRL, reg_value); - - /* set MM_CANE_ERR_STATUS.CLEAR_ERROR_STATUS = 1 */ - reg_value = RREG32_SOC15(MMHUB, mmhub_inst, regMM_CANE_ERR_STATUS); - reg_value = REG_SET_FIELD(reg_value, MM_CANE_ERR_STATUS, - CLEAR_ERROR_STATUS, 1); - WREG32_SOC15(MMHUB, mmhub_inst, regMM_CANE_ERR_STATUS, reg_value); - - /* set MM_CANE_ICG_CTRL.SOFT_OVERRIDE_ATRET = 0 */ - reg_value = RREG32_SOC15(MMHUB, mmhub_inst, regMM_CANE_ICG_CTRL); - reg_value = REG_SET_FIELD(reg_value, MM_CANE_ICG_CTRL, - SOFT_OVERRIDE_ATRET, 0); - WREG32_SOC15(MMHUB, mmhub_inst, regMM_CANE_ICG_CTRL, reg_value); -} - -static void mmhub_v1_8_reset_ras_error_status(struct amdgpu_device *adev) -{ - uint32_t inst_mask; - uint32_t i; - - if (!amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__MMHUB)) { - dev_warn(adev->dev, "MMHUB RAS is not supported\n"); - return; - } - - inst_mask = adev->aid_mask; - for_each_inst(i, inst_mask) - mmhub_v1_8_inst_reset_ras_err_status(adev, i); -} - static const struct amdgpu_ras_block_hw_ops mmhub_v1_8_ras_hw_ops = { .query_ras_error_count = mmhub_v1_8_query_ras_error_count, .reset_ras_error_count = mmhub_v1_8_reset_ras_error_count, - .query_ras_error_status = mmhub_v1_8_query_ras_error_status, - .reset_ras_error_status = mmhub_v1_8_reset_ras_error_status, }; struct amdgpu_mmhub_ras mmhub_v1_8_ras = { diff --git a/drivers/gpu/drm/amd/amdgpu/mmhub_v2_0.c b/drivers/gpu/drm/amd/amdgpu/mmhub_v2_0.c index 8f76c6ecf50a..02fd45261399 100644 --- a/drivers/gpu/drm/amd/amdgpu/mmhub_v2_0.c +++ b/drivers/gpu/drm/amd/amdgpu/mmhub_v2_0.c @@ -151,7 +151,7 @@ mmhub_v2_0_print_l2_protection_fault_status(struct amdgpu_device *adev, dev_err(adev->dev, "MMVM_L2_PROTECTION_FAULT_STATUS:0x%08X\n", status); - switch (adev->ip_versions[MMHUB_HWIP][0]) { + switch (amdgpu_ip_version(adev, MMHUB_HWIP, 0)) { case IP_VERSION(2, 0, 0): case IP_VERSION(2, 0, 2): mmhub_cid = mmhub_client_ids_navi1x[cid][rw]; @@ -367,7 +367,7 @@ static void mmhub_v2_0_setup_vmid_config(struct amdgpu_device *adev) uint32_t tmp; for (i = 0; i <= 14; i++) { - tmp = RREG32_SOC15_OFFSET(MMHUB, 0, mmMMVM_CONTEXT1_CNTL, i); + tmp = RREG32_SOC15_OFFSET(MMHUB, 0, mmMMVM_CONTEXT1_CNTL, i * hub->ctx_distance); tmp = REG_SET_FIELD(tmp, MMVM_CONTEXT1_CNTL, ENABLE_CONTEXT, 1); tmp = REG_SET_FIELD(tmp, MMVM_CONTEXT1_CNTL, PAGE_TABLE_DEPTH, adev->vm_manager.num_level); @@ -568,7 +568,7 @@ static void mmhub_v2_0_update_medium_grain_clock_gating(struct amdgpu_device *ad if (!(adev->cg_flags & AMD_CG_SUPPORT_MC_MGCG)) return; - switch (adev->ip_versions[MMHUB_HWIP][0]) { + switch (amdgpu_ip_version(adev, MMHUB_HWIP, 0)) { case IP_VERSION(2, 1, 0): case IP_VERSION(2, 1, 1): case IP_VERSION(2, 1, 2): @@ -601,7 +601,7 @@ static void mmhub_v2_0_update_medium_grain_clock_gating(struct amdgpu_device *ad DAGB0_CNTL_MISC2__DISABLE_TLBRD_CG_MASK); } - switch (adev->ip_versions[MMHUB_HWIP][0]) { + switch (amdgpu_ip_version(adev, MMHUB_HWIP, 0)) { case IP_VERSION(2, 1, 0): case IP_VERSION(2, 1, 1): case IP_VERSION(2, 1, 2): @@ -625,7 +625,7 @@ static void mmhub_v2_0_update_medium_grain_light_sleep(struct amdgpu_device *ade if (!(adev->cg_flags & AMD_CG_SUPPORT_MC_LS)) return; - switch (adev->ip_versions[MMHUB_HWIP][0]) { + switch (amdgpu_ip_version(adev, MMHUB_HWIP, 0)) { case IP_VERSION(2, 1, 0): case IP_VERSION(2, 1, 1): case IP_VERSION(2, 1, 2): @@ -651,7 +651,7 @@ static int mmhub_v2_0_set_clockgating(struct amdgpu_device *adev, if (amdgpu_sriov_vf(adev)) return 0; - switch (adev->ip_versions[MMHUB_HWIP][0]) { + switch (amdgpu_ip_version(adev, MMHUB_HWIP, 0)) { case IP_VERSION(2, 0, 0): case IP_VERSION(2, 0, 2): case IP_VERSION(2, 1, 0): @@ -676,7 +676,7 @@ static void mmhub_v2_0_get_clockgating(struct amdgpu_device *adev, u64 *flags) if (amdgpu_sriov_vf(adev)) *flags = 0; - switch (adev->ip_versions[MMHUB_HWIP][0]) { + switch (amdgpu_ip_version(adev, MMHUB_HWIP, 0)) { case IP_VERSION(2, 1, 0): case IP_VERSION(2, 1, 1): case IP_VERSION(2, 1, 2): diff --git a/drivers/gpu/drm/amd/amdgpu/mmhub_v2_3.c b/drivers/gpu/drm/amd/amdgpu/mmhub_v2_3.c index 8bd0fc8d9d25..5eb8122e2746 100644 --- a/drivers/gpu/drm/amd/amdgpu/mmhub_v2_3.c +++ b/drivers/gpu/drm/amd/amdgpu/mmhub_v2_3.c @@ -90,7 +90,7 @@ mmhub_v2_3_print_l2_protection_fault_status(struct amdgpu_device *adev, dev_err(adev->dev, "MMVM_L2_PROTECTION_FAULT_STATUS:0x%08X\n", status); - switch (adev->ip_versions[MMHUB_HWIP][0]) { + switch (amdgpu_ip_version(adev, MMHUB_HWIP, 0)) { case IP_VERSION(2, 3, 0): case IP_VERSION(2, 4, 0): case IP_VERSION(2, 4, 1): @@ -285,7 +285,7 @@ static void mmhub_v2_3_setup_vmid_config(struct amdgpu_device *adev) uint32_t tmp; for (i = 0; i <= 14; i++) { - tmp = RREG32_SOC15_OFFSET(MMHUB, 0, mmMMVM_CONTEXT1_CNTL, i); + tmp = RREG32_SOC15_OFFSET(MMHUB, 0, mmMMVM_CONTEXT1_CNTL, i * hub->ctx_distance); tmp = REG_SET_FIELD(tmp, MMVM_CONTEXT1_CNTL, ENABLE_CONTEXT, 1); tmp = REG_SET_FIELD(tmp, MMVM_CONTEXT1_CNTL, PAGE_TABLE_DEPTH, adev->vm_manager.num_level); @@ -331,7 +331,7 @@ static void mmhub_v2_3_setup_vmid_config(struct amdgpu_device *adev) static void mmhub_v2_3_program_invalidation(struct amdgpu_device *adev) { struct amdgpu_vmhub *hub = &adev->vmhub[AMDGPU_MMHUB0(0)]; - unsigned i; + unsigned int i; for (i = 0; i < 18; ++i) { WREG32_SOC15_OFFSET(MMHUB, 0, @@ -406,6 +406,7 @@ static void mmhub_v2_3_set_fault_enable_default(struct amdgpu_device *adev, bool value) { u32 tmp; + tmp = RREG32_SOC15(MMHUB, 0, mmMMVM_L2_PROTECTION_FAULT_CNTL); tmp = REG_SET_FIELD(tmp, MMVM_L2_PROTECTION_FAULT_CNTL, RANGE_PROTECTION_FAULT_ENABLE_DEFAULT, value); @@ -499,11 +500,11 @@ mmhub_v2_3_update_medium_grain_clock_gating(struct amdgpu_device *adev, if (enable && (adev->cg_flags & AMD_CG_SUPPORT_MC_MGCG)) { data &= ~MM_ATC_L2_CGTT_CLK_CTRL__SOFT_OVERRIDE_MASK; data1 &= ~(DAGB0_CNTL_MISC2__DISABLE_WRREQ_CG_MASK | - DAGB0_CNTL_MISC2__DISABLE_WRRET_CG_MASK | - DAGB0_CNTL_MISC2__DISABLE_RDREQ_CG_MASK | - DAGB0_CNTL_MISC2__DISABLE_RDRET_CG_MASK | - DAGB0_CNTL_MISC2__DISABLE_TLBWR_CG_MASK | - DAGB0_CNTL_MISC2__DISABLE_TLBRD_CG_MASK); + DAGB0_CNTL_MISC2__DISABLE_WRRET_CG_MASK | + DAGB0_CNTL_MISC2__DISABLE_RDREQ_CG_MASK | + DAGB0_CNTL_MISC2__DISABLE_RDRET_CG_MASK | + DAGB0_CNTL_MISC2__DISABLE_TLBWR_CG_MASK | + DAGB0_CNTL_MISC2__DISABLE_TLBRD_CG_MASK); } else { data |= MM_ATC_L2_CGTT_CLK_CTRL__SOFT_OVERRIDE_MASK; @@ -593,13 +594,13 @@ static void mmhub_v2_3_get_clockgating(struct amdgpu_device *adev, u64 *flags) /* AMD_CG_SUPPORT_MC_MGCG */ if (!(data & (DAGB0_CNTL_MISC2__DISABLE_WRREQ_CG_MASK | - DAGB0_CNTL_MISC2__DISABLE_WRRET_CG_MASK | - DAGB0_CNTL_MISC2__DISABLE_RDREQ_CG_MASK | - DAGB0_CNTL_MISC2__DISABLE_RDRET_CG_MASK | - DAGB0_CNTL_MISC2__DISABLE_TLBWR_CG_MASK | - DAGB0_CNTL_MISC2__DISABLE_TLBRD_CG_MASK)) + DAGB0_CNTL_MISC2__DISABLE_WRRET_CG_MASK | + DAGB0_CNTL_MISC2__DISABLE_RDREQ_CG_MASK | + DAGB0_CNTL_MISC2__DISABLE_RDRET_CG_MASK | + DAGB0_CNTL_MISC2__DISABLE_TLBWR_CG_MASK | + DAGB0_CNTL_MISC2__DISABLE_TLBRD_CG_MASK)) && !(data1 & MM_ATC_L2_CGTT_CLK_CTRL__SOFT_OVERRIDE_MASK)) { - *flags |= AMD_CG_SUPPORT_MC_MGCG; + *flags |= AMD_CG_SUPPORT_MC_MGCG; } /* AMD_CG_SUPPORT_MC_LS */ diff --git a/drivers/gpu/drm/amd/amdgpu/mmhub_v3_0.c b/drivers/gpu/drm/amd/amdgpu/mmhub_v3_0.c index 441379e91cfa..7d5242df58a5 100644 --- a/drivers/gpu/drm/amd/amdgpu/mmhub_v3_0.c +++ b/drivers/gpu/drm/amd/amdgpu/mmhub_v3_0.c @@ -107,7 +107,7 @@ mmhub_v3_0_print_l2_protection_fault_status(struct amdgpu_device *adev, dev_err(adev->dev, "MMVM_L2_PROTECTION_FAULT_STATUS:0x%08X\n", status); - switch (adev->ip_versions[MMHUB_HWIP][0]) { + switch (amdgpu_ip_version(adev, MMHUB_HWIP, 0)) { case IP_VERSION(3, 0, 0): case IP_VERSION(3, 0, 1): mmhub_cid = mmhub_client_ids_v3_0_0[cid][rw]; @@ -189,8 +189,7 @@ static void mmhub_v3_0_init_system_aperture_regs(struct amdgpu_device *adev) max(adev->gmc.fb_end, adev->gmc.agp_end) >> 18); /* Set default page address. */ - value = adev->mem_scratch.gpu_addr - adev->gmc.vram_start + - adev->vm_manager.vram_base_offset; + value = amdgpu_gmc_vram_mc2pa(adev, adev->mem_scratch.gpu_addr); WREG32_SOC15(MMHUB, 0, regMMMC_VM_SYSTEM_APERTURE_DEFAULT_ADDR_LSB, (u32)(value >> 12)); WREG32_SOC15(MMHUB, 0, regMMMC_VM_SYSTEM_APERTURE_DEFAULT_ADDR_MSB, @@ -324,7 +323,7 @@ static void mmhub_v3_0_setup_vmid_config(struct amdgpu_device *adev) uint32_t tmp; for (i = 0; i <= 14; i++) { - tmp = RREG32_SOC15_OFFSET(MMHUB, 0, regMMVM_CONTEXT1_CNTL, i); + tmp = RREG32_SOC15_OFFSET(MMHUB, 0, regMMVM_CONTEXT1_CNTL, i * hub->ctx_distance); tmp = REG_SET_FIELD(tmp, MMVM_CONTEXT1_CNTL, ENABLE_CONTEXT, 1); tmp = REG_SET_FIELD(tmp, MMVM_CONTEXT1_CNTL, PAGE_TABLE_DEPTH, adev->vm_manager.num_level); diff --git a/drivers/gpu/drm/amd/amdgpu/mmhub_v3_0_1.c b/drivers/gpu/drm/amd/amdgpu/mmhub_v3_0_1.c index 12c7f4b46ea9..134c4ec10887 100644 --- a/drivers/gpu/drm/amd/amdgpu/mmhub_v3_0_1.c +++ b/drivers/gpu/drm/amd/amdgpu/mmhub_v3_0_1.c @@ -108,7 +108,7 @@ mmhub_v3_0_1_print_l2_protection_fault_status(struct amdgpu_device *adev, "MMVM_L2_PROTECTION_FAULT_STATUS:0x%08X\n", status); - switch (adev->ip_versions[MMHUB_HWIP][0]) { + switch (amdgpu_ip_version(adev, MMHUB_HWIP, 0)) { case IP_VERSION(3, 0, 1): mmhub_cid = mmhub_client_ids_v3_0_1[cid][rw]; break; @@ -188,8 +188,7 @@ static void mmhub_v3_0_1_init_system_aperture_regs(struct amdgpu_device *adev) max(adev->gmc.fb_end, adev->gmc.agp_end) >> 18); /* Set default page address. */ - value = adev->mem_scratch.gpu_addr - adev->gmc.vram_start + - adev->vm_manager.vram_base_offset; + value = amdgpu_gmc_vram_mc2pa(adev, adev->mem_scratch.gpu_addr); WREG32_SOC15(MMHUB, 0, regMMMC_VM_SYSTEM_APERTURE_DEFAULT_ADDR_LSB, (u32)(value >> 12)); WREG32_SOC15(MMHUB, 0, regMMMC_VM_SYSTEM_APERTURE_DEFAULT_ADDR_MSB, @@ -311,7 +310,7 @@ static void mmhub_v3_0_1_setup_vmid_config(struct amdgpu_device *adev) uint32_t tmp; for (i = 0; i <= 14; i++) { - tmp = RREG32_SOC15_OFFSET(MMHUB, 0, regMMVM_CONTEXT1_CNTL, i); + tmp = RREG32_SOC15_OFFSET(MMHUB, 0, regMMVM_CONTEXT1_CNTL, i * hub->ctx_distance); tmp = REG_SET_FIELD(tmp, MMVM_CONTEXT1_CNTL, ENABLE_CONTEXT, 1); tmp = REG_SET_FIELD(tmp, MMVM_CONTEXT1_CNTL, PAGE_TABLE_DEPTH, adev->vm_manager.num_level); diff --git a/drivers/gpu/drm/amd/amdgpu/mmhub_v3_0_2.c b/drivers/gpu/drm/amd/amdgpu/mmhub_v3_0_2.c index 5dadc85abf7e..f0f182f033b9 100644 --- a/drivers/gpu/drm/amd/amdgpu/mmhub_v3_0_2.c +++ b/drivers/gpu/drm/amd/amdgpu/mmhub_v3_0_2.c @@ -181,8 +181,7 @@ static void mmhub_v3_0_2_init_system_aperture_regs(struct amdgpu_device *adev) } /* Set default page address. */ - value = adev->mem_scratch.gpu_addr - adev->gmc.vram_start + - adev->vm_manager.vram_base_offset; + value = amdgpu_gmc_vram_mc2pa(adev, adev->mem_scratch.gpu_addr); WREG32_SOC15(MMHUB, 0, regMMMC_VM_SYSTEM_APERTURE_DEFAULT_ADDR_LSB, (u32)(value >> 12)); WREG32_SOC15(MMHUB, 0, regMMMC_VM_SYSTEM_APERTURE_DEFAULT_ADDR_MSB, @@ -316,7 +315,7 @@ static void mmhub_v3_0_2_setup_vmid_config(struct amdgpu_device *adev) uint32_t tmp; for (i = 0; i <= 14; i++) { - tmp = RREG32_SOC15_OFFSET(MMHUB, 0, regMMVM_CONTEXT1_CNTL, i); + tmp = RREG32_SOC15_OFFSET(MMHUB, 0, regMMVM_CONTEXT1_CNTL, i * hub->ctx_distance); tmp = REG_SET_FIELD(tmp, MMVM_CONTEXT1_CNTL, ENABLE_CONTEXT, 1); tmp = REG_SET_FIELD(tmp, MMVM_CONTEXT1_CNTL, PAGE_TABLE_DEPTH, adev->vm_manager.num_level); diff --git a/drivers/gpu/drm/amd/amdgpu/mmhub_v3_3.c b/drivers/gpu/drm/amd/amdgpu/mmhub_v3_3.c new file mode 100644 index 000000000000..dc4812ecc98d --- /dev/null +++ b/drivers/gpu/drm/amd/amdgpu/mmhub_v3_3.c @@ -0,0 +1,589 @@ +/* + * Copyright 2023 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + */ + +#include "amdgpu.h" +#include "mmhub_v3_3.h" + +#include "mmhub/mmhub_3_3_0_offset.h" +#include "mmhub/mmhub_3_3_0_sh_mask.h" + +#include "navi10_enum.h" +#include "soc15_common.h" + +#define regMMVM_L2_CNTL3_DEFAULT 0x80100007 +#define regMMVM_L2_CNTL4_DEFAULT 0x000000c1 +#define regMMVM_L2_CNTL5_DEFAULT 0x00003fe0 + +static const char *mmhub_client_ids_v3_3[][2] = { + [0][0] = "VMC", + [4][0] = "DCEDMC", + [6][0] = "MP0", + [7][0] = "MP1", + [8][0] = "MPM", + [24][0] = "HDP", + [25][0] = "LSDMA", + [26][0] = "JPEG", + [27][0] = "VPE", + [29][0] = "VCNU", + [30][0] = "VCN", + [3][1] = "DCEDWB", + [4][1] = "DCEDMC", + [6][1] = "MP0", + [7][1] = "MP1", + [8][1] = "MPM", + [21][1] = "OSSSYS", + [24][1] = "HDP", + [25][1] = "LSDMA", + [26][1] = "JPEG", + [27][1] = "VPE", + [29][1] = "VCNU", + [30][1] = "VCN", +}; + +static uint32_t mmhub_v3_3_get_invalidate_req(unsigned int vmid, + uint32_t flush_type) +{ + u32 req = 0; + + /* invalidate using legacy mode on vmid*/ + req = REG_SET_FIELD(req, MMVM_INVALIDATE_ENG0_REQ, + PER_VMID_INVALIDATE_REQ, 1 << vmid); + req = REG_SET_FIELD(req, MMVM_INVALIDATE_ENG0_REQ, FLUSH_TYPE, flush_type ? : 1); + req = REG_SET_FIELD(req, MMVM_INVALIDATE_ENG0_REQ, INVALIDATE_L2_PTES, 1); + req = REG_SET_FIELD(req, MMVM_INVALIDATE_ENG0_REQ, INVALIDATE_L2_PDE0, 1); + req = REG_SET_FIELD(req, MMVM_INVALIDATE_ENG0_REQ, INVALIDATE_L2_PDE1, 1); + req = REG_SET_FIELD(req, MMVM_INVALIDATE_ENG0_REQ, INVALIDATE_L2_PDE2, 1); + req = REG_SET_FIELD(req, MMVM_INVALIDATE_ENG0_REQ, INVALIDATE_L1_PTES, 1); + req = REG_SET_FIELD(req, MMVM_INVALIDATE_ENG0_REQ, + CLEAR_PROTECTION_FAULT_STATUS_ADDR, 0); + + return req; +} + +static void +mmhub_v3_3_print_l2_protection_fault_status(struct amdgpu_device *adev, + uint32_t status) +{ + uint32_t cid, rw; + const char *mmhub_cid = NULL; + + cid = REG_GET_FIELD(status, + MMVM_L2_PROTECTION_FAULT_STATUS, CID); + rw = REG_GET_FIELD(status, + MMVM_L2_PROTECTION_FAULT_STATUS, RW); + + dev_err(adev->dev, + "MMVM_L2_PROTECTION_FAULT_STATUS:0x%08X\n", + status); + + switch (amdgpu_ip_version(adev, MMHUB_HWIP, 0)) { + case IP_VERSION(3, 3, 0): + mmhub_cid = mmhub_client_ids_v3_3[cid][rw]; + break; + default: + mmhub_cid = NULL; + break; + } + + if (!mmhub_cid && cid == 0x140) + mmhub_cid = "UMSCH"; + + dev_err(adev->dev, "\t Faulty UTCL2 client ID: %s (0x%x)\n", + mmhub_cid ? mmhub_cid : "unknown", cid); + dev_err(adev->dev, "\t MORE_FAULTS: 0x%lx\n", + REG_GET_FIELD(status, + MMVM_L2_PROTECTION_FAULT_STATUS, MORE_FAULTS)); + dev_err(adev->dev, "\t WALKER_ERROR: 0x%lx\n", + REG_GET_FIELD(status, + MMVM_L2_PROTECTION_FAULT_STATUS, WALKER_ERROR)); + dev_err(adev->dev, "\t PERMISSION_FAULTS: 0x%lx\n", + REG_GET_FIELD(status, + MMVM_L2_PROTECTION_FAULT_STATUS, PERMISSION_FAULTS)); + dev_err(adev->dev, "\t MAPPING_ERROR: 0x%lx\n", + REG_GET_FIELD(status, + MMVM_L2_PROTECTION_FAULT_STATUS, MAPPING_ERROR)); + dev_err(adev->dev, "\t RW: 0x%x\n", rw); +} + +static void mmhub_v3_3_setup_vm_pt_regs(struct amdgpu_device *adev, + uint32_t vmid, + uint64_t page_table_base) +{ + struct amdgpu_vmhub *hub = &adev->vmhub[AMDGPU_MMHUB0(0)]; + + WREG32_SOC15_OFFSET(MMHUB, 0, regMMVM_CONTEXT0_PAGE_TABLE_BASE_ADDR_LO32, + hub->ctx_addr_distance * vmid, + lower_32_bits(page_table_base)); + + WREG32_SOC15_OFFSET(MMHUB, 0, regMMVM_CONTEXT0_PAGE_TABLE_BASE_ADDR_HI32, + hub->ctx_addr_distance * vmid, + upper_32_bits(page_table_base)); + +} + +static void mmhub_v3_3_init_gart_aperture_regs(struct amdgpu_device *adev) +{ + uint64_t pt_base = amdgpu_gmc_pd_addr(adev->gart.bo); + + mmhub_v3_3_setup_vm_pt_regs(adev, 0, pt_base); + + WREG32_SOC15(MMHUB, 0, regMMVM_CONTEXT0_PAGE_TABLE_START_ADDR_LO32, + (u32)(adev->gmc.gart_start >> 12)); + WREG32_SOC15(MMHUB, 0, regMMVM_CONTEXT0_PAGE_TABLE_START_ADDR_HI32, + (u32)(adev->gmc.gart_start >> 44)); + + WREG32_SOC15(MMHUB, 0, regMMVM_CONTEXT0_PAGE_TABLE_END_ADDR_LO32, + (u32)(adev->gmc.gart_end >> 12)); + WREG32_SOC15(MMHUB, 0, regMMVM_CONTEXT0_PAGE_TABLE_END_ADDR_HI32, + (u32)(adev->gmc.gart_end >> 44)); +} + +static void mmhub_v3_3_init_system_aperture_regs(struct amdgpu_device *adev) +{ + uint64_t value; + uint32_t tmp; + + /* Program the AGP BAR */ + WREG32_SOC15(MMHUB, 0, regMMMC_VM_AGP_BASE, 0); + WREG32_SOC15(MMHUB, 0, regMMMC_VM_AGP_BOT, adev->gmc.agp_start >> 24); + WREG32_SOC15(MMHUB, 0, regMMMC_VM_AGP_TOP, adev->gmc.agp_end >> 24); + + /* + * the new L1 policy will block SRIOV guest from writing + * these regs, and they will be programed at host. + * so skip programing these regs. + */ + /* Program the system aperture low logical page number. */ + WREG32_SOC15(MMHUB, 0, regMMMC_VM_SYSTEM_APERTURE_LOW_ADDR, + min(adev->gmc.fb_start, adev->gmc.agp_start) >> 18); + WREG32_SOC15(MMHUB, 0, regMMMC_VM_SYSTEM_APERTURE_HIGH_ADDR, + max(adev->gmc.fb_end, adev->gmc.agp_end) >> 18); + + /* Set default page address. */ + value = amdgpu_gmc_vram_mc2pa(adev, adev->mem_scratch.gpu_addr); + WREG32_SOC15(MMHUB, 0, regMMMC_VM_SYSTEM_APERTURE_DEFAULT_ADDR_LSB, + (u32)(value >> 12)); + WREG32_SOC15(MMHUB, 0, regMMMC_VM_SYSTEM_APERTURE_DEFAULT_ADDR_MSB, + (u32)(value >> 44)); + + /* Program "protection fault". */ + WREG32_SOC15(MMHUB, 0, regMMVM_L2_PROTECTION_FAULT_DEFAULT_ADDR_LO32, + (u32)(adev->dummy_page_addr >> 12)); + WREG32_SOC15(MMHUB, 0, regMMVM_L2_PROTECTION_FAULT_DEFAULT_ADDR_HI32, + (u32)((u64)adev->dummy_page_addr >> 44)); + + tmp = RREG32_SOC15(MMHUB, 0, regMMVM_L2_PROTECTION_FAULT_CNTL2); + tmp = REG_SET_FIELD(tmp, MMVM_L2_PROTECTION_FAULT_CNTL2, + ACTIVE_PAGE_MIGRATION_PTE_READ_RETRY, 1); + WREG32_SOC15(MMHUB, 0, regMMVM_L2_PROTECTION_FAULT_CNTL2, tmp); +} + +static void mmhub_v3_3_init_tlb_regs(struct amdgpu_device *adev) +{ + uint32_t tmp; + + /* Setup TLB control */ + tmp = RREG32_SOC15(MMHUB, 0, regMMMC_VM_MX_L1_TLB_CNTL); + + tmp = REG_SET_FIELD(tmp, MMMC_VM_MX_L1_TLB_CNTL, ENABLE_L1_TLB, 1); + tmp = REG_SET_FIELD(tmp, MMMC_VM_MX_L1_TLB_CNTL, SYSTEM_ACCESS_MODE, 3); + tmp = REG_SET_FIELD(tmp, MMMC_VM_MX_L1_TLB_CNTL, + ENABLE_ADVANCED_DRIVER_MODEL, 1); + tmp = REG_SET_FIELD(tmp, MMMC_VM_MX_L1_TLB_CNTL, + SYSTEM_APERTURE_UNMAPPED_ACCESS, 0); + tmp = REG_SET_FIELD(tmp, MMMC_VM_MX_L1_TLB_CNTL, ECO_BITS, 0); + tmp = REG_SET_FIELD(tmp, MMMC_VM_MX_L1_TLB_CNTL, + MTYPE, MTYPE_UC); /* UC, uncached */ + + WREG32_SOC15(MMHUB, 0, regMMMC_VM_MX_L1_TLB_CNTL, tmp); +} + +static void mmhub_v3_3_init_cache_regs(struct amdgpu_device *adev) +{ + uint32_t tmp; + + /* Setup L2 cache */ + tmp = RREG32_SOC15(MMHUB, 0, regMMVM_L2_CNTL); + tmp = REG_SET_FIELD(tmp, MMVM_L2_CNTL, ENABLE_L2_CACHE, 1); + tmp = REG_SET_FIELD(tmp, MMVM_L2_CNTL, ENABLE_L2_FRAGMENT_PROCESSING, 0); + tmp = REG_SET_FIELD(tmp, MMVM_L2_CNTL, + ENABLE_DEFAULT_PAGE_OUT_TO_SYSTEM_MEMORY, 1); + /* XXX for emulation, Refer to closed source code.*/ + tmp = REG_SET_FIELD(tmp, MMVM_L2_CNTL, L2_PDE0_CACHE_TAG_GENERATION_MODE, + 0); + tmp = REG_SET_FIELD(tmp, MMVM_L2_CNTL, PDE_FAULT_CLASSIFICATION, 0); + tmp = REG_SET_FIELD(tmp, MMVM_L2_CNTL, CONTEXT1_IDENTITY_ACCESS_MODE, 1); + tmp = REG_SET_FIELD(tmp, MMVM_L2_CNTL, IDENTITY_MODE_FRAGMENT_SIZE, 0); + WREG32_SOC15(MMHUB, 0, regMMVM_L2_CNTL, tmp); + + tmp = RREG32_SOC15(MMHUB, 0, regMMVM_L2_CNTL2); + tmp = REG_SET_FIELD(tmp, MMVM_L2_CNTL2, INVALIDATE_ALL_L1_TLBS, 1); + tmp = REG_SET_FIELD(tmp, MMVM_L2_CNTL2, INVALIDATE_L2_CACHE, 1); + WREG32_SOC15(MMHUB, 0, regMMVM_L2_CNTL2, tmp); + + tmp = regMMVM_L2_CNTL3_DEFAULT; + if (adev->gmc.translate_further) { + tmp = REG_SET_FIELD(tmp, MMVM_L2_CNTL3, BANK_SELECT, 12); + tmp = REG_SET_FIELD(tmp, MMVM_L2_CNTL3, + L2_CACHE_BIGK_FRAGMENT_SIZE, 9); + } else { + tmp = REG_SET_FIELD(tmp, MMVM_L2_CNTL3, BANK_SELECT, 9); + tmp = REG_SET_FIELD(tmp, MMVM_L2_CNTL3, + L2_CACHE_BIGK_FRAGMENT_SIZE, 6); + } + WREG32_SOC15(MMHUB, 0, regMMVM_L2_CNTL3, tmp); + + tmp = regMMVM_L2_CNTL4_DEFAULT; + tmp = REG_SET_FIELD(tmp, MMVM_L2_CNTL4, VMC_TAP_PDE_REQUEST_PHYSICAL, 0); + tmp = REG_SET_FIELD(tmp, MMVM_L2_CNTL4, VMC_TAP_PTE_REQUEST_PHYSICAL, 0); + WREG32_SOC15(MMHUB, 0, regMMVM_L2_CNTL4, tmp); + + tmp = regMMVM_L2_CNTL5_DEFAULT; + tmp = REG_SET_FIELD(tmp, MMVM_L2_CNTL5, L2_CACHE_SMALLK_FRAGMENT_SIZE, 0); + WREG32_SOC15(MMHUB, 0, regMMVM_L2_CNTL5, tmp); +} + +static void mmhub_v3_3_enable_system_domain(struct amdgpu_device *adev) +{ + uint32_t tmp; + + tmp = RREG32_SOC15(MMHUB, 0, regMMVM_CONTEXT0_CNTL); + tmp = REG_SET_FIELD(tmp, MMVM_CONTEXT0_CNTL, ENABLE_CONTEXT, 1); + tmp = REG_SET_FIELD(tmp, MMVM_CONTEXT0_CNTL, PAGE_TABLE_DEPTH, 0); + tmp = REG_SET_FIELD(tmp, MMVM_CONTEXT0_CNTL, + RETRY_PERMISSION_OR_INVALID_PAGE_FAULT, 0); + + WREG32_SOC15(MMHUB, 0, regMMVM_CONTEXT0_CNTL, tmp); +} + +static void mmhub_v3_3_disable_identity_aperture(struct amdgpu_device *adev) +{ + WREG32_SOC15(MMHUB, 0, + regMMVM_L2_CONTEXT1_IDENTITY_APERTURE_LOW_ADDR_LO32, + 0xFFFFFFFF); + WREG32_SOC15(MMHUB, 0, + regMMVM_L2_CONTEXT1_IDENTITY_APERTURE_LOW_ADDR_HI32, + 0x0000000F); + + WREG32_SOC15(MMHUB, 0, + regMMVM_L2_CONTEXT1_IDENTITY_APERTURE_HIGH_ADDR_LO32, 0); + WREG32_SOC15(MMHUB, 0, + regMMVM_L2_CONTEXT1_IDENTITY_APERTURE_HIGH_ADDR_HI32, 0); + + WREG32_SOC15(MMHUB, 0, regMMVM_L2_CONTEXT_IDENTITY_PHYSICAL_OFFSET_LO32, + 0); + WREG32_SOC15(MMHUB, 0, regMMVM_L2_CONTEXT_IDENTITY_PHYSICAL_OFFSET_HI32, + 0); +} + +static void mmhub_v3_3_setup_vmid_config(struct amdgpu_device *adev) +{ + struct amdgpu_vmhub *hub = &adev->vmhub[AMDGPU_MMHUB0(0)]; + int i; + uint32_t tmp; + + for (i = 0; i <= 14; i++) { + tmp = RREG32_SOC15_OFFSET(MMHUB, 0, regMMVM_CONTEXT1_CNTL, i * hub->ctx_distance); + tmp = REG_SET_FIELD(tmp, MMVM_CONTEXT1_CNTL, ENABLE_CONTEXT, 1); + tmp = REG_SET_FIELD(tmp, MMVM_CONTEXT1_CNTL, PAGE_TABLE_DEPTH, + adev->vm_manager.num_level); + tmp = REG_SET_FIELD(tmp, MMVM_CONTEXT1_CNTL, + RANGE_PROTECTION_FAULT_ENABLE_DEFAULT, 1); + tmp = REG_SET_FIELD(tmp, MMVM_CONTEXT1_CNTL, + DUMMY_PAGE_PROTECTION_FAULT_ENABLE_DEFAULT, + 1); + tmp = REG_SET_FIELD(tmp, MMVM_CONTEXT1_CNTL, + PDE0_PROTECTION_FAULT_ENABLE_DEFAULT, 1); + tmp = REG_SET_FIELD(tmp, MMVM_CONTEXT1_CNTL, + VALID_PROTECTION_FAULT_ENABLE_DEFAULT, 1); + tmp = REG_SET_FIELD(tmp, MMVM_CONTEXT1_CNTL, + READ_PROTECTION_FAULT_ENABLE_DEFAULT, 1); + tmp = REG_SET_FIELD(tmp, MMVM_CONTEXT1_CNTL, + WRITE_PROTECTION_FAULT_ENABLE_DEFAULT, 1); + tmp = REG_SET_FIELD(tmp, MMVM_CONTEXT1_CNTL, + EXECUTE_PROTECTION_FAULT_ENABLE_DEFAULT, 1); + tmp = REG_SET_FIELD(tmp, MMVM_CONTEXT1_CNTL, + PAGE_TABLE_BLOCK_SIZE, + adev->vm_manager.block_size - 9); + /* Send no-retry XNACK on fault to suppress VM fault storm. */ + tmp = REG_SET_FIELD(tmp, MMVM_CONTEXT1_CNTL, + RETRY_PERMISSION_OR_INVALID_PAGE_FAULT, + !amdgpu_noretry); + WREG32_SOC15_OFFSET(MMHUB, 0, regMMVM_CONTEXT1_CNTL, + i * hub->ctx_distance, tmp); + WREG32_SOC15_OFFSET(MMHUB, 0, regMMVM_CONTEXT1_PAGE_TABLE_START_ADDR_LO32, + i * hub->ctx_addr_distance, 0); + WREG32_SOC15_OFFSET(MMHUB, 0, regMMVM_CONTEXT1_PAGE_TABLE_START_ADDR_HI32, + i * hub->ctx_addr_distance, 0); + WREG32_SOC15_OFFSET(MMHUB, 0, regMMVM_CONTEXT1_PAGE_TABLE_END_ADDR_LO32, + i * hub->ctx_addr_distance, + lower_32_bits(adev->vm_manager.max_pfn - 1)); + WREG32_SOC15_OFFSET(MMHUB, 0, regMMVM_CONTEXT1_PAGE_TABLE_END_ADDR_HI32, + i * hub->ctx_addr_distance, + upper_32_bits(adev->vm_manager.max_pfn - 1)); + } + + hub->vm_cntx_cntl = tmp; +} + +static void mmhub_v3_3_program_invalidation(struct amdgpu_device *adev) +{ + struct amdgpu_vmhub *hub = &adev->vmhub[AMDGPU_MMHUB0(0)]; + unsigned int i; + + for (i = 0; i < 18; ++i) { + WREG32_SOC15_OFFSET(MMHUB, 0, regMMVM_INVALIDATE_ENG0_ADDR_RANGE_LO32, + i * hub->eng_addr_distance, 0xffffffff); + WREG32_SOC15_OFFSET(MMHUB, 0, regMMVM_INVALIDATE_ENG0_ADDR_RANGE_HI32, + i * hub->eng_addr_distance, 0x1f); + } +} + +static int mmhub_v3_3_gart_enable(struct amdgpu_device *adev) +{ + /* GART Enable. */ + mmhub_v3_3_init_gart_aperture_regs(adev); + mmhub_v3_3_init_system_aperture_regs(adev); + mmhub_v3_3_init_tlb_regs(adev); + mmhub_v3_3_init_cache_regs(adev); + + mmhub_v3_3_enable_system_domain(adev); + mmhub_v3_3_disable_identity_aperture(adev); + mmhub_v3_3_setup_vmid_config(adev); + mmhub_v3_3_program_invalidation(adev); + + return 0; +} + +static void mmhub_v3_3_gart_disable(struct amdgpu_device *adev) +{ + struct amdgpu_vmhub *hub = &adev->vmhub[AMDGPU_MMHUB0(0)]; + u32 tmp; + u32 i; + + /* Disable all tables */ + for (i = 0; i < 16; i++) + WREG32_SOC15_OFFSET(MMHUB, 0, regMMVM_CONTEXT0_CNTL, + i * hub->ctx_distance, 0); + + /* Setup TLB control */ + tmp = RREG32_SOC15(MMHUB, 0, regMMMC_VM_MX_L1_TLB_CNTL); + tmp = REG_SET_FIELD(tmp, MMMC_VM_MX_L1_TLB_CNTL, ENABLE_L1_TLB, 0); + tmp = REG_SET_FIELD(tmp, MMMC_VM_MX_L1_TLB_CNTL, + ENABLE_ADVANCED_DRIVER_MODEL, 0); + WREG32_SOC15(MMHUB, 0, regMMMC_VM_MX_L1_TLB_CNTL, tmp); + + /* Setup L2 cache */ + tmp = RREG32_SOC15(MMHUB, 0, regMMVM_L2_CNTL); + tmp = REG_SET_FIELD(tmp, MMVM_L2_CNTL, ENABLE_L2_CACHE, 0); + WREG32_SOC15(MMHUB, 0, regMMVM_L2_CNTL, tmp); + WREG32_SOC15(MMHUB, 0, regMMVM_L2_CNTL3, 0); +} + +/** + * mmhub_v3_3_set_fault_enable_default - update GART/VM fault handling + * + * @adev: amdgpu_device pointer + * @value: true redirects VM faults to the default page + */ +static void mmhub_v3_3_set_fault_enable_default(struct amdgpu_device *adev, + bool value) +{ + u32 tmp; + + tmp = RREG32_SOC15(MMHUB, 0, regMMVM_L2_PROTECTION_FAULT_CNTL); + tmp = REG_SET_FIELD(tmp, MMVM_L2_PROTECTION_FAULT_CNTL, + RANGE_PROTECTION_FAULT_ENABLE_DEFAULT, value); + tmp = REG_SET_FIELD(tmp, MMVM_L2_PROTECTION_FAULT_CNTL, + PDE0_PROTECTION_FAULT_ENABLE_DEFAULT, value); + tmp = REG_SET_FIELD(tmp, MMVM_L2_PROTECTION_FAULT_CNTL, + PDE1_PROTECTION_FAULT_ENABLE_DEFAULT, value); + tmp = REG_SET_FIELD(tmp, MMVM_L2_PROTECTION_FAULT_CNTL, + PDE2_PROTECTION_FAULT_ENABLE_DEFAULT, value); + tmp = REG_SET_FIELD(tmp, MMVM_L2_PROTECTION_FAULT_CNTL, + TRANSLATE_FURTHER_PROTECTION_FAULT_ENABLE_DEFAULT, + value); + tmp = REG_SET_FIELD(tmp, MMVM_L2_PROTECTION_FAULT_CNTL, + NACK_PROTECTION_FAULT_ENABLE_DEFAULT, value); + tmp = REG_SET_FIELD(tmp, MMVM_L2_PROTECTION_FAULT_CNTL, + DUMMY_PAGE_PROTECTION_FAULT_ENABLE_DEFAULT, value); + tmp = REG_SET_FIELD(tmp, MMVM_L2_PROTECTION_FAULT_CNTL, + VALID_PROTECTION_FAULT_ENABLE_DEFAULT, value); + tmp = REG_SET_FIELD(tmp, MMVM_L2_PROTECTION_FAULT_CNTL, + READ_PROTECTION_FAULT_ENABLE_DEFAULT, value); + tmp = REG_SET_FIELD(tmp, MMVM_L2_PROTECTION_FAULT_CNTL, + WRITE_PROTECTION_FAULT_ENABLE_DEFAULT, value); + tmp = REG_SET_FIELD(tmp, MMVM_L2_PROTECTION_FAULT_CNTL, + EXECUTE_PROTECTION_FAULT_ENABLE_DEFAULT, value); + if (!value) { + tmp = REG_SET_FIELD(tmp, MMVM_L2_PROTECTION_FAULT_CNTL, + CRASH_ON_NO_RETRY_FAULT, 1); + tmp = REG_SET_FIELD(tmp, MMVM_L2_PROTECTION_FAULT_CNTL, + CRASH_ON_RETRY_FAULT, 1); + } + WREG32_SOC15(MMHUB, 0, regMMVM_L2_PROTECTION_FAULT_CNTL, tmp); +} + +static const struct amdgpu_vmhub_funcs mmhub_v3_3_vmhub_funcs = { + .print_l2_protection_fault_status = mmhub_v3_3_print_l2_protection_fault_status, + .get_invalidate_req = mmhub_v3_3_get_invalidate_req, +}; + +static void mmhub_v3_3_init(struct amdgpu_device *adev) +{ + struct amdgpu_vmhub *hub = &adev->vmhub[AMDGPU_MMHUB0(0)]; + + hub->ctx0_ptb_addr_lo32 = + SOC15_REG_OFFSET(MMHUB, 0, + regMMVM_CONTEXT0_PAGE_TABLE_BASE_ADDR_LO32); + hub->ctx0_ptb_addr_hi32 = + SOC15_REG_OFFSET(MMHUB, 0, + regMMVM_CONTEXT0_PAGE_TABLE_BASE_ADDR_HI32); + hub->vm_inv_eng0_sem = + SOC15_REG_OFFSET(MMHUB, 0, regMMVM_INVALIDATE_ENG0_SEM); + hub->vm_inv_eng0_req = + SOC15_REG_OFFSET(MMHUB, 0, regMMVM_INVALIDATE_ENG0_REQ); + hub->vm_inv_eng0_ack = + SOC15_REG_OFFSET(MMHUB, 0, regMMVM_INVALIDATE_ENG0_ACK); + hub->vm_context0_cntl = + SOC15_REG_OFFSET(MMHUB, 0, regMMVM_CONTEXT0_CNTL); + hub->vm_l2_pro_fault_status = + SOC15_REG_OFFSET(MMHUB, 0, regMMVM_L2_PROTECTION_FAULT_STATUS); + hub->vm_l2_pro_fault_cntl = + SOC15_REG_OFFSET(MMHUB, 0, regMMVM_L2_PROTECTION_FAULT_CNTL); + + hub->ctx_distance = regMMVM_CONTEXT1_CNTL - regMMVM_CONTEXT0_CNTL; + hub->ctx_addr_distance = regMMVM_CONTEXT1_PAGE_TABLE_BASE_ADDR_LO32 - + regMMVM_CONTEXT0_PAGE_TABLE_BASE_ADDR_LO32; + hub->eng_distance = regMMVM_INVALIDATE_ENG1_REQ - + regMMVM_INVALIDATE_ENG0_REQ; + hub->eng_addr_distance = regMMVM_INVALIDATE_ENG1_ADDR_RANGE_LO32 - + regMMVM_INVALIDATE_ENG0_ADDR_RANGE_LO32; + + hub->vm_cntx_cntl_vm_fault = MMVM_CONTEXT1_CNTL__RANGE_PROTECTION_FAULT_ENABLE_INTERRUPT_MASK | + MMVM_CONTEXT1_CNTL__DUMMY_PAGE_PROTECTION_FAULT_ENABLE_INTERRUPT_MASK | + MMVM_CONTEXT1_CNTL__PDE0_PROTECTION_FAULT_ENABLE_INTERRUPT_MASK | + MMVM_CONTEXT1_CNTL__VALID_PROTECTION_FAULT_ENABLE_INTERRUPT_MASK | + MMVM_CONTEXT1_CNTL__READ_PROTECTION_FAULT_ENABLE_INTERRUPT_MASK | + MMVM_CONTEXT1_CNTL__WRITE_PROTECTION_FAULT_ENABLE_INTERRUPT_MASK | + MMVM_CONTEXT1_CNTL__EXECUTE_PROTECTION_FAULT_ENABLE_INTERRUPT_MASK; + + hub->vmhub_funcs = &mmhub_v3_3_vmhub_funcs; +} + +static u64 mmhub_v3_3_get_fb_location(struct amdgpu_device *adev) +{ + u64 base; + + base = RREG32_SOC15(MMHUB, 0, regMMMC_VM_FB_LOCATION_BASE); + base &= MMMC_VM_FB_LOCATION_BASE__FB_BASE_MASK; + base <<= 24; + + return base; +} + +static u64 mmhub_v3_3_get_mc_fb_offset(struct amdgpu_device *adev) +{ + u64 offset; + + offset = RREG32_SOC15(MMHUB, 0, regMMMC_VM_FB_OFFSET); + offset &= MMMC_VM_FB_OFFSET__FB_OFFSET_MASK; + offset <<= 24; + + return offset; +} + +static void mmhub_v3_3_update_medium_grain_clock_gating(struct amdgpu_device *adev, + bool enable) +{ + uint32_t def, data; + + def = data = RREG32_SOC15(MMHUB, 0, regMM_ATC_L2_MISC_CG); + + if (enable) + data |= MM_ATC_L2_MISC_CG__ENABLE_MASK; + else + data &= ~MM_ATC_L2_MISC_CG__ENABLE_MASK; + + if (def != data) + WREG32_SOC15(MMHUB, 0, regMM_ATC_L2_MISC_CG, data); +} + +static void mmhub_v3_3_update_medium_grain_light_sleep(struct amdgpu_device *adev, + bool enable) +{ + uint32_t def, data; + + def = data = RREG32_SOC15(MMHUB, 0, regMM_ATC_L2_MISC_CG); + + if (enable) + data |= MM_ATC_L2_MISC_CG__MEM_LS_ENABLE_MASK; + else + data &= ~MM_ATC_L2_MISC_CG__MEM_LS_ENABLE_MASK; + + if (def != data) + WREG32_SOC15(MMHUB, 0, regMM_ATC_L2_MISC_CG, data); +} + +static int mmhub_v3_3_set_clockgating(struct amdgpu_device *adev, + enum amd_clockgating_state state) +{ + if (amdgpu_sriov_vf(adev)) + return 0; + + mmhub_v3_3_update_medium_grain_clock_gating(adev, + state == AMD_CG_STATE_GATE); + mmhub_v3_3_update_medium_grain_light_sleep(adev, + state == AMD_CG_STATE_GATE); + return 0; +} + +static void mmhub_v3_3_get_clockgating(struct amdgpu_device *adev, u64 *flags) +{ + int data; + + if (amdgpu_sriov_vf(adev)) + *flags = 0; + + data = RREG32_SOC15(MMHUB, 0, regMM_ATC_L2_MISC_CG); + + /* AMD_CG_SUPPORT_MC_MGCG */ + if (data & MM_ATC_L2_MISC_CG__ENABLE_MASK) + *flags |= AMD_CG_SUPPORT_MC_MGCG; + + /* AMD_CG_SUPPORT_MC_LS */ + if (data & MM_ATC_L2_MISC_CG__MEM_LS_ENABLE_MASK) + *flags |= AMD_CG_SUPPORT_MC_LS; +} + +const struct amdgpu_mmhub_funcs mmhub_v3_3_funcs = { + .init = mmhub_v3_3_init, + .get_fb_location = mmhub_v3_3_get_fb_location, + .get_mc_fb_offset = mmhub_v3_3_get_mc_fb_offset, + .gart_enable = mmhub_v3_3_gart_enable, + .set_fault_enable_default = mmhub_v3_3_set_fault_enable_default, + .gart_disable = mmhub_v3_3_gart_disable, + .set_clockgating = mmhub_v3_3_set_clockgating, + .get_clockgating = mmhub_v3_3_get_clockgating, + .setup_vm_pt_regs = mmhub_v3_3_setup_vm_pt_regs, +}; diff --git a/drivers/gpu/drm/amd/amdgpu/mmhub_v3_3.h b/drivers/gpu/drm/amd/amdgpu/mmhub_v3_3.h new file mode 100644 index 000000000000..37b62c7e5a4a --- /dev/null +++ b/drivers/gpu/drm/amd/amdgpu/mmhub_v3_3.h @@ -0,0 +1,29 @@ +/* + * Copyright 2023 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + */ + +#ifndef __MMHUB_V3_3_H__ +#define __MMHUB_V3_3_H__ + +extern const struct amdgpu_mmhub_funcs mmhub_v3_3_funcs; + +#endif diff --git a/drivers/gpu/drm/amd/amdgpu/mmhub_v9_4.c b/drivers/gpu/drm/amd/amdgpu/mmhub_v9_4.c index e790f890aec6..1b7da4aff2b8 100644 --- a/drivers/gpu/drm/amd/amdgpu/mmhub_v9_4.c +++ b/drivers/gpu/drm/amd/amdgpu/mmhub_v9_4.c @@ -108,7 +108,7 @@ static void mmhub_v9_4_setup_vm_pt_regs(struct amdgpu_device *adev, uint32_t vmi } static void mmhub_v9_4_init_system_aperture_regs(struct amdgpu_device *adev, - int hubid) + int hubid) { uint64_t value; uint32_t tmp; @@ -308,7 +308,7 @@ static void mmhub_v9_4_setup_vmid_config(struct amdgpu_device *adev, int hubid) for (i = 0; i <= 14; i++) { tmp = RREG32_SOC15_OFFSET(MMHUB, 0, mmVML2VC0_VM_CONTEXT1_CNTL, - hubid * MMHUB_INSTANCE_REGISTER_OFFSET + i); + hubid * MMHUB_INSTANCE_REGISTER_OFFSET + i * hub->ctx_distance); tmp = REG_SET_FIELD(tmp, VML2VC0_VM_CONTEXT1_CNTL, ENABLE_CONTEXT, 1); tmp = REG_SET_FIELD(tmp, VML2VC0_VM_CONTEXT1_CNTL, @@ -1568,7 +1568,7 @@ static int mmhub_v9_4_get_ras_error_count(struct amdgpu_device *adev, uint32_t sec_cnt, ded_cnt; for (i = 0; i < ARRAY_SIZE(mmhub_v9_4_ras_fields); i++) { - if(mmhub_v9_4_ras_fields[i].reg_offset != reg->reg_offset) + if (mmhub_v9_4_ras_fields[i].reg_offset != reg->reg_offset) continue; sec_cnt = (value & diff --git a/drivers/gpu/drm/amd/amdgpu/mmsch_v4_0.h b/drivers/gpu/drm/amd/amdgpu/mmsch_v4_0.h index 796d4f8791e5..ced26cc5123a 100644 --- a/drivers/gpu/drm/amd/amdgpu/mmsch_v4_0.h +++ b/drivers/gpu/drm/amd/amdgpu/mmsch_v4_0.h @@ -35,13 +35,11 @@ #define MMSCH_VF_ENGINE_STATUS__PASS 0x1 -#define MMSCH_VF_MAILBOX_RESP__OK 0x1 -#define MMSCH_VF_MAILBOX_RESP__INCOMPLETE 0x2 - -#define MMSCH_VF_ENGINE_STATUS__PASS 0x1 - -#define MMSCH_VF_MAILBOX_RESP__OK 0x1 -#define MMSCH_VF_MAILBOX_RESP__INCOMPLETE 0x2 +#define MMSCH_VF_MAILBOX_RESP__OK 0x1 +#define MMSCH_VF_MAILBOX_RESP__INCOMPLETE 0x2 +#define MMSCH_VF_MAILBOX_RESP__FAILED 0x3 +#define MMSCH_VF_MAILBOX_RESP__FAILED_SMALL_CTX_SIZE 0x4 +#define MMSCH_VF_MAILBOX_RESP__UNKNOWN_CMD 0x5 #define MMSCH_V4_0_VCN_INSTANCES 0x2 diff --git a/drivers/gpu/drm/amd/amdgpu/mmsch_v4_0_3.h b/drivers/gpu/drm/amd/amdgpu/mmsch_v4_0_3.h new file mode 100644 index 000000000000..db7eb5260295 --- /dev/null +++ b/drivers/gpu/drm/amd/amdgpu/mmsch_v4_0_3.h @@ -0,0 +1,37 @@ +/* + * Copyright 2023 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + */ + +#ifndef __MMSCH_V4_0_3_H__ +#define __MMSCH_V4_0_3_H__ + +#include "amdgpu_vcn.h" +#include "mmsch_v4_0.h" + +struct mmsch_v4_0_3_init_header { + uint32_t version; + uint32_t total_size; + struct mmsch_v4_0_table_info vcn0; + struct mmsch_v4_0_table_info mjpegdec0[4]; + struct mmsch_v4_0_table_info mjpegdec1[4]; +}; +#endif diff --git a/drivers/gpu/drm/amd/amdgpu/mxgpu_ai.h b/drivers/gpu/drm/amd/amdgpu/mxgpu_ai.h index af1a784696bd..c520b2fabfb9 100644 --- a/drivers/gpu/drm/amd/amdgpu/mxgpu_ai.h +++ b/drivers/gpu/drm/amd/amdgpu/mxgpu_ai.h @@ -62,7 +62,9 @@ int xgpu_ai_mailbox_add_irq_id(struct amdgpu_device *adev); int xgpu_ai_mailbox_get_irq(struct amdgpu_device *adev); void xgpu_ai_mailbox_put_irq(struct amdgpu_device *adev); -#define AI_MAIBOX_CONTROL_TRN_OFFSET_BYTE SOC15_REG_OFFSET(NBIO, 0, mmBIF_BX_PF0_MAILBOX_CONTROL) * 4 -#define AI_MAIBOX_CONTROL_RCV_OFFSET_BYTE SOC15_REG_OFFSET(NBIO, 0, mmBIF_BX_PF0_MAILBOX_CONTROL) * 4 + 1 +#define AI_MAIBOX_CONTROL_TRN_OFFSET_BYTE \ + (SOC15_REG_OFFSET(NBIO, 0, mmBIF_BX_PF0_MAILBOX_CONTROL) * 4) +#define AI_MAIBOX_CONTROL_RCV_OFFSET_BYTE \ + (SOC15_REG_OFFSET(NBIO, 0, mmBIF_BX_PF0_MAILBOX_CONTROL) * 4 + 1) #endif diff --git a/drivers/gpu/drm/amd/amdgpu/mxgpu_nv.c b/drivers/gpu/drm/amd/amdgpu/mxgpu_nv.c index cae1aaa4ddb6..6a68ee946f1c 100644 --- a/drivers/gpu/drm/amd/amdgpu/mxgpu_nv.c +++ b/drivers/gpu/drm/amd/amdgpu/mxgpu_nv.c @@ -183,12 +183,10 @@ send_request: if (req != IDH_REQ_GPU_INIT_DATA) { pr_err("Doesn't get msg:%d from pf, error=%d\n", event, r); return r; - } - else /* host doesn't support REQ_GPU_INIT_DATA handshake */ + } else /* host doesn't support REQ_GPU_INIT_DATA handshake */ adev->virt.req_init_data_ver = 0; } else { - if (req == IDH_REQ_GPU_INIT_DATA) - { + if (req == IDH_REQ_GPU_INIT_DATA) { adev->virt.req_init_data_ver = RREG32_NO_KIQ(mmMAILBOX_MSGBUF_RCV_DW1); diff --git a/drivers/gpu/drm/amd/amdgpu/mxgpu_vi.c b/drivers/gpu/drm/amd/amdgpu/mxgpu_vi.c index 288c414babdf..59f53c743362 100644 --- a/drivers/gpu/drm/amd/amdgpu/mxgpu_vi.c +++ b/drivers/gpu/drm/amd/amdgpu/mxgpu_vi.c @@ -334,7 +334,7 @@ static void xgpu_vi_mailbox_send_ack(struct amdgpu_device *adev) break; } mdelay(1); - timeout -=1; + timeout -= 1; reg = RREG32_NO_KIQ(mmMAILBOX_CONTROL); } diff --git a/drivers/gpu/drm/amd/amdgpu/navi10_ih.c b/drivers/gpu/drm/amd/amdgpu/navi10_ih.c index eec13cb5bf75..e64b33115848 100644 --- a/drivers/gpu/drm/amd/amdgpu/navi10_ih.c +++ b/drivers/gpu/drm/amd/amdgpu/navi10_ih.c @@ -107,7 +107,7 @@ force_update_wptr_for_self_int(struct amdgpu_device *adev, { u32 ih_cntl, ih_rb_cntl; - if (adev->ip_versions[OSSSYS_HWIP][0] < IP_VERSION(5, 0, 3)) + if (amdgpu_ip_version(adev, OSSSYS_HWIP, 0) < IP_VERSION(5, 0, 3)) return; ih_cntl = RREG32_SOC15(OSSSYS, 0, mmIH_CNTL2); @@ -330,7 +330,7 @@ static int navi10_ih_irq_init(struct amdgpu_device *adev) if (unlikely(adev->firmware.load_type == AMDGPU_FW_LOAD_DIRECT)) { if (ih[0]->use_bus_addr) { - switch (adev->ip_versions[OSSSYS_HWIP][0]) { + switch (amdgpu_ip_version(adev, OSSSYS_HWIP, 0)) { case IP_VERSION(5, 0, 3): case IP_VERSION(5, 2, 0): case IP_VERSION(5, 2, 1): @@ -565,7 +565,7 @@ static int navi10_ih_sw_init(void *handle) use_bus_addr = false; else use_bus_addr = true; - r = amdgpu_ih_ring_init(adev, &adev->irq.ih, 256 * 1024, use_bus_addr); + r = amdgpu_ih_ring_init(adev, &adev->irq.ih, IH_RING_SIZE, use_bus_addr); if (r) return r; @@ -578,7 +578,7 @@ static int navi10_ih_sw_init(void *handle) /* initialize ih control registers offset */ navi10_ih_init_register_offset(adev); - r = amdgpu_ih_ring_init(adev, &adev->irq.ih_soft, PAGE_SIZE, true); + r = amdgpu_ih_ring_init(adev, &adev->irq.ih_soft, IH_SW_RING_SIZE, true); if (r) return r; @@ -665,8 +665,6 @@ static void navi10_ih_update_clockgating_state(struct amdgpu_device *adev, if (def != data) WREG32_SOC15(OSSSYS, 0, mmIH_CLK_CTRL, data); } - - return; } static int navi10_ih_set_clockgating_state(void *handle, @@ -691,8 +689,6 @@ static void navi10_ih_get_clockgating_state(void *handle, u64 *flags) if (!RREG32_SOC15(OSSSYS, 0, mmIH_CLK_CTRL)) *flags |= AMD_CG_SUPPORT_IH_CG; - - return; } static const struct amd_ip_funcs navi10_ih_ip_funcs = { diff --git a/drivers/gpu/drm/amd/amdgpu/nbio_v2_3.c b/drivers/gpu/drm/amd/amdgpu/nbio_v2_3.c index 4038455d7998..df218d5ca775 100644 --- a/drivers/gpu/drm/amd/amdgpu/nbio_v2_3.c +++ b/drivers/gpu/drm/amd/amdgpu/nbio_v2_3.c @@ -28,6 +28,7 @@ #include "nbio/nbio_2_3_offset.h" #include "nbio/nbio_2_3_sh_mask.h" #include <uapi/linux/kfd_ioctl.h> +#include <linux/device.h> #include <linux/pci.h> #define smnPCIE_CONFIG_CNTL 0x11180044 @@ -361,7 +362,7 @@ static void nbio_v2_3_enable_aspm(struct amdgpu_device *adev, data |= NAVI10_PCIE__LC_L0S_INACTIVITY_DEFAULT << PCIE_LC_CNTL__LC_L0S_INACTIVITY__SHIFT; - if (pci_is_thunderbolt_attached(adev->pdev)) + if (dev_is_removable(&adev->pdev->dev)) data |= NAVI10_PCIE__LC_L1_INACTIVITY_TBT_DEFAULT << PCIE_LC_CNTL__LC_L1_INACTIVITY__SHIFT; else data |= NAVI10_PCIE__LC_L1_INACTIVITY_DEFAULT << PCIE_LC_CNTL__LC_L1_INACTIVITY__SHIFT; @@ -480,7 +481,7 @@ static void nbio_v2_3_program_aspm(struct amdgpu_device *adev) def = data = RREG32_PCIE(smnPCIE_LC_CNTL); data |= NAVI10_PCIE__LC_L0S_INACTIVITY_DEFAULT << PCIE_LC_CNTL__LC_L0S_INACTIVITY__SHIFT; - if (pci_is_thunderbolt_attached(adev->pdev)) + if (dev_is_removable(&adev->pdev->dev)) data |= NAVI10_PCIE__LC_L1_INACTIVITY_TBT_DEFAULT << PCIE_LC_CNTL__LC_L1_INACTIVITY__SHIFT; else data |= NAVI10_PCIE__LC_L1_INACTIVITY_DEFAULT << PCIE_LC_CNTL__LC_L1_INACTIVITY__SHIFT; @@ -536,7 +537,7 @@ static void nbio_v2_3_clear_doorbell_interrupt(struct amdgpu_device *adev) { uint32_t reg, reg_data; - if (adev->ip_versions[NBIO_HWIP][0] != IP_VERSION(3, 3, 0)) + if (amdgpu_ip_version(adev, NBIO_HWIP, 0) != IP_VERSION(3, 3, 0)) return; reg = RREG32_SOC15(NBIO, 0, mmBIF_RB_CNTL); diff --git a/drivers/gpu/drm/amd/amdgpu/nbio_v4_3.c b/drivers/gpu/drm/amd/amdgpu/nbio_v4_3.c index d5ed9e0e1a5f..a3622897e3fe 100644 --- a/drivers/gpu/drm/amd/amdgpu/nbio_v4_3.c +++ b/drivers/gpu/drm/amd/amdgpu/nbio_v4_3.c @@ -338,13 +338,16 @@ const struct nbio_hdp_flush_reg nbio_v4_3_hdp_flush_reg = { static void nbio_v4_3_init_registers(struct amdgpu_device *adev) { - if (adev->ip_versions[NBIO_HWIP][0] == IP_VERSION(4, 3, 0)) { + if (amdgpu_ip_version(adev, NBIO_HWIP, 0) == IP_VERSION(4, 3, 0)) { uint32_t data; data = RREG32_SOC15(NBIO, 0, regRCC_DEV0_EPF2_STRAP2); data &= ~RCC_DEV0_EPF2_STRAP2__STRAP_NO_SOFT_RESET_DEV0_F2_MASK; WREG32_SOC15(NBIO, 0, regRCC_DEV0_EPF2_STRAP2, data); } + if (amdgpu_sriov_vf(adev)) + adev->rmmio_remap.reg_offset = SOC15_REG_OFFSET(NBIO, 0, + regBIF_BX_DEV0_EPF0_VF0_HDP_MEM_COHERENCY_FLUSH_CNTL) << 2; } static u32 nbio_v4_3_get_rom_offset(struct amdgpu_device *adev) @@ -389,8 +392,8 @@ static void nbio_v4_3_program_aspm(struct amdgpu_device *adev) #ifdef CONFIG_PCIEASPM uint32_t def, data; - if (!(adev->ip_versions[PCIE_HWIP][0] == IP_VERSION(7, 4, 0)) && - !(adev->ip_versions[PCIE_HWIP][0] == IP_VERSION(7, 6, 0))) + if (!(amdgpu_ip_version(adev, PCIE_HWIP, 0) == IP_VERSION(7, 4, 0)) && + !(amdgpu_ip_version(adev, PCIE_HWIP, 0) == IP_VERSION(7, 6, 0))) return; def = data = RREG32_SOC15(NBIO, 0, regPCIE_LC_CNTL); diff --git a/drivers/gpu/drm/amd/amdgpu/nbio_v7_11.c b/drivers/gpu/drm/amd/amdgpu/nbio_v7_11.c new file mode 100644 index 000000000000..1f52b4b1db03 --- /dev/null +++ b/drivers/gpu/drm/amd/amdgpu/nbio_v7_11.c @@ -0,0 +1,372 @@ +/* + * Copyright 2021 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + */ +#include "amdgpu.h" +#include "amdgpu_atombios.h" +#include "nbio_v7_11.h" + +#include "nbio/nbio_7_11_0_offset.h" +#include "nbio/nbio_7_11_0_sh_mask.h" +#include <uapi/linux/kfd_ioctl.h> + +static void nbio_v7_11_remap_hdp_registers(struct amdgpu_device *adev) +{ + WREG32_SOC15(NBIO, 0, regBIF_BX0_REMAP_HDP_MEM_FLUSH_CNTL, + adev->rmmio_remap.reg_offset + KFD_MMIO_REMAP_HDP_MEM_FLUSH_CNTL); + WREG32_SOC15(NBIO, 0, regBIF_BX0_REMAP_HDP_REG_FLUSH_CNTL, + adev->rmmio_remap.reg_offset + KFD_MMIO_REMAP_HDP_REG_FLUSH_CNTL); +} + +static u32 nbio_v7_11_get_rev_id(struct amdgpu_device *adev) +{ + u32 tmp; + + tmp = RREG32_SOC15(NBIO, 0, regRCC_STRAP1_RCC_DEV0_EPF0_STRAP0); + tmp &= RCC_STRAP0_RCC_DEV0_EPF0_STRAP0__STRAP_ATI_REV_ID_DEV0_F0_MASK; + tmp >>= RCC_STRAP0_RCC_DEV0_EPF0_STRAP0__STRAP_ATI_REV_ID_DEV0_F0__SHIFT; + + return tmp; +} + +static void nbio_v7_11_mc_access_enable(struct amdgpu_device *adev, bool enable) +{ + if (enable) + WREG32_SOC15(NBIO, 0, regBIF_BX1_BIF_FB_EN, + BIF_BX1_BIF_FB_EN__FB_READ_EN_MASK | + BIF_BX1_BIF_FB_EN__FB_WRITE_EN_MASK); + else + WREG32_SOC15(NBIO, 0, regBIF_BX1_BIF_FB_EN, 0); +} + +static u32 nbio_v7_11_get_memsize(struct amdgpu_device *adev) +{ + return RREG32_SOC15(NBIO, 0, regRCC_DEV0_EPF0_0_RCC_CONFIG_MEMSIZE); +} + +static void nbio_v7_11_sdma_doorbell_range(struct amdgpu_device *adev, int instance, + bool use_doorbell, int doorbell_index, + int doorbell_size) +{ + u32 reg = SOC15_REG_OFFSET(NBIO, 0, regGDC0_BIF_CSDMA_DOORBELL_RANGE); + u32 doorbell_range = RREG32_PCIE_PORT(reg); + + if (use_doorbell) { + doorbell_range = REG_SET_FIELD(doorbell_range, + GDC0_BIF_CSDMA_DOORBELL_RANGE, + OFFSET, doorbell_index); + doorbell_range = REG_SET_FIELD(doorbell_range, + GDC0_BIF_CSDMA_DOORBELL_RANGE, + SIZE, doorbell_size); + } else { + doorbell_range = REG_SET_FIELD(doorbell_range, + GDC0_BIF_CSDMA_DOORBELL_RANGE, + SIZE, 0); + } + + WREG32_PCIE_PORT(reg, doorbell_range); +} + +static void nbio_v7_11_vpe_doorbell_range(struct amdgpu_device *adev, int instance, + bool use_doorbell, int doorbell_index, + int doorbell_size) +{ + u32 reg = SOC15_REG_OFFSET(NBIO, 0, regGDC0_BIF_VPE_DOORBELL_RANGE); + u32 doorbell_range = RREG32_PCIE_PORT(reg); + + if (use_doorbell) { + doorbell_range = REG_SET_FIELD(doorbell_range, + GDC0_BIF_VPE_DOORBELL_RANGE, + OFFSET, doorbell_index); + doorbell_range = REG_SET_FIELD(doorbell_range, + GDC0_BIF_VPE_DOORBELL_RANGE, + SIZE, doorbell_size); + } else { + doorbell_range = REG_SET_FIELD(doorbell_range, + GDC0_BIF_VPE_DOORBELL_RANGE, + SIZE, 0); + } + + WREG32_PCIE_PORT(reg, doorbell_range); +} + +static void nbio_v7_11_vcn_doorbell_range(struct amdgpu_device *adev, + bool use_doorbell, + int doorbell_index, int instance) +{ + u32 reg = SOC15_REG_OFFSET(NBIO, 0, regGDC0_BIF_VCN0_DOORBELL_RANGE); + u32 doorbell_range = RREG32_PCIE_PORT(reg); + + if (use_doorbell) { + doorbell_range = REG_SET_FIELD(doorbell_range, + GDC0_BIF_VCN0_DOORBELL_RANGE, OFFSET, + doorbell_index); + doorbell_range = REG_SET_FIELD(doorbell_range, + GDC0_BIF_VCN0_DOORBELL_RANGE, SIZE, 8); + } else { + doorbell_range = REG_SET_FIELD(doorbell_range, + GDC0_BIF_VCN0_DOORBELL_RANGE, SIZE, 0); + } + + WREG32_PCIE_PORT(reg, doorbell_range); +} + +static void nbio_v7_11_enable_doorbell_aperture(struct amdgpu_device *adev, + bool enable) +{ + u32 reg; + + + reg = RREG32_SOC15(NBIO, 0, regRCC_DEV0_EPF0_0_RCC_DOORBELL_APER_EN); + reg = REG_SET_FIELD(reg, RCC_DEV0_EPF0_0_RCC_DOORBELL_APER_EN, + BIF_DOORBELL_APER_EN, enable ? 1 : 0); + + WREG32_SOC15(NBIO, 0, regRCC_DEV0_EPF0_0_RCC_DOORBELL_APER_EN, reg); +} + +static void nbio_v7_11_enable_doorbell_selfring_aperture(struct amdgpu_device *adev, + bool enable) +{ + u32 tmp = 0; + + if (enable) { + tmp = REG_SET_FIELD(tmp, BIF_BX_PF1_DOORBELL_SELFRING_GPA_APER_CNTL, + DOORBELL_SELFRING_GPA_APER_EN, 1) | + REG_SET_FIELD(tmp, BIF_BX_PF1_DOORBELL_SELFRING_GPA_APER_CNTL, + DOORBELL_SELFRING_GPA_APER_MODE, 1) | + REG_SET_FIELD(tmp, BIF_BX_PF1_DOORBELL_SELFRING_GPA_APER_CNTL, + DOORBELL_SELFRING_GPA_APER_SIZE, 0); + + WREG32_SOC15(NBIO, 0, + regBIF_BX_PF1_DOORBELL_SELFRING_GPA_APER_BASE_LOW, + lower_32_bits(adev->doorbell.base)); + WREG32_SOC15(NBIO, 0, + regBIF_BX_PF1_DOORBELL_SELFRING_GPA_APER_BASE_HIGH, + upper_32_bits(adev->doorbell.base)); + } + + WREG32_SOC15(NBIO, 0, regBIF_BX_PF1_DOORBELL_SELFRING_GPA_APER_CNTL, tmp); +} + + +static void nbio_v7_11_ih_doorbell_range(struct amdgpu_device *adev, + bool use_doorbell, int doorbell_index) +{ + u32 ih_doorbell_range = RREG32_SOC15(NBIO, 0,regGDC0_BIF_IH_DOORBELL_RANGE); + + if (use_doorbell) { + ih_doorbell_range = REG_SET_FIELD(ih_doorbell_range, + GDC0_BIF_IH_DOORBELL_RANGE, OFFSET, + doorbell_index); + ih_doorbell_range = REG_SET_FIELD(ih_doorbell_range, + GDC0_BIF_IH_DOORBELL_RANGE, SIZE, + 2); + } else { + ih_doorbell_range = REG_SET_FIELD(ih_doorbell_range, + GDC0_BIF_IH_DOORBELL_RANGE, SIZE, + 0); + } + + WREG32_SOC15(NBIO, 0, regGDC0_BIF_IH_DOORBELL_RANGE, + ih_doorbell_range); +} + +static void nbio_v7_11_ih_control(struct amdgpu_device *adev) +{ + u32 interrupt_cntl; + + /* setup interrupt control */ + WREG32_SOC15(NBIO, 0, regBIF_BX1_INTERRUPT_CNTL2, + adev->dummy_page_addr >> 8); + + interrupt_cntl = RREG32_SOC15(NBIO, 0, regBIF_BX1_INTERRUPT_CNTL); + /* + * INTERRUPT_CNTL__IH_DUMMY_RD_OVERRIDE_MASK=0 - dummy read disabled with msi, enabled without msi + * INTERRUPT_CNTL__IH_DUMMY_RD_OVERRIDE_MASK=1 - dummy read controlled by IH_DUMMY_RD_EN + */ + interrupt_cntl = REG_SET_FIELD(interrupt_cntl, BIF_BX1_INTERRUPT_CNTL, + IH_DUMMY_RD_OVERRIDE, 0); + + /* INTERRUPT_CNTL__IH_REQ_NONSNOOP_EN_MASK=1 if ring is in non-cacheable memory, e.g., vram */ + interrupt_cntl = REG_SET_FIELD(interrupt_cntl, BIF_BX1_INTERRUPT_CNTL, + IH_REQ_NONSNOOP_EN, 0); + + WREG32_SOC15(NBIO, 0, regBIF_BX1_INTERRUPT_CNTL, interrupt_cntl); +} + +static u32 nbio_v7_11_get_hdp_flush_req_offset(struct amdgpu_device *adev) +{ + return SOC15_REG_OFFSET(NBIO, 0, regBIF_BX_PF1_GPU_HDP_FLUSH_REQ); +} + +static u32 nbio_v7_11_get_hdp_flush_done_offset(struct amdgpu_device *adev) +{ + return SOC15_REG_OFFSET(NBIO, 0, regBIF_BX_PF1_GPU_HDP_FLUSH_DONE); +} + +static u32 nbio_v7_11_get_pcie_index_offset(struct amdgpu_device *adev) +{ + return SOC15_REG_OFFSET(NBIO, 0, regBIF_BX1_PCIE_INDEX2); +} + +static u32 nbio_v7_11_get_pcie_data_offset(struct amdgpu_device *adev) +{ + return SOC15_REG_OFFSET(NBIO, 0, regBIF_BX1_PCIE_DATA2); +} + +static u32 nbio_v7_11_get_pcie_port_index_offset(struct amdgpu_device *adev) +{ + return SOC15_REG_OFFSET(NBIO, 0, regBIF_BX_PF1_RSMU_INDEX); +} + +static u32 nbio_v7_11_get_pcie_port_data_offset(struct amdgpu_device *adev) +{ + return SOC15_REG_OFFSET(NBIO, 0, regBIF_BX_PF1_RSMU_DATA); +} + +const struct nbio_hdp_flush_reg nbio_v7_11_hdp_flush_reg = { + .ref_and_mask_cp0 = BIF_BX_PF1_GPU_HDP_FLUSH_DONE__CP0_MASK, + .ref_and_mask_cp1 = BIF_BX_PF1_GPU_HDP_FLUSH_DONE__CP1_MASK, + .ref_and_mask_cp2 = BIF_BX_PF1_GPU_HDP_FLUSH_DONE__CP2_MASK, + .ref_and_mask_cp3 = BIF_BX_PF1_GPU_HDP_FLUSH_DONE__CP3_MASK, + .ref_and_mask_cp4 = BIF_BX_PF1_GPU_HDP_FLUSH_DONE__CP4_MASK, + .ref_and_mask_cp5 = BIF_BX_PF1_GPU_HDP_FLUSH_DONE__CP5_MASK, + .ref_and_mask_cp6 = BIF_BX_PF1_GPU_HDP_FLUSH_DONE__CP6_MASK, + .ref_and_mask_cp7 = BIF_BX_PF1_GPU_HDP_FLUSH_DONE__CP7_MASK, + .ref_and_mask_cp8 = BIF_BX_PF1_GPU_HDP_FLUSH_DONE__CP8_MASK, + .ref_and_mask_cp9 = BIF_BX_PF1_GPU_HDP_FLUSH_DONE__CP9_MASK, + .ref_and_mask_sdma0 = BIF_BX_PF1_GPU_HDP_FLUSH_DONE__SDMA0_MASK, + .ref_and_mask_sdma1 = BIF_BX_PF1_GPU_HDP_FLUSH_DONE__SDMA1_MASK, +}; + +static void nbio_v7_11_init_registers(struct amdgpu_device *adev) +{ + uint32_t def, data; + + def = data = RREG32_SOC15(NBIO, 0, regBIF_BIF256_CI256_RC3X4_USB4_PCIE_MST_CTRL_3); + data = REG_SET_FIELD(data, BIF_BIF256_CI256_RC3X4_USB4_PCIE_MST_CTRL_3, + CI_SWUS_MAX_READ_REQUEST_SIZE_MODE, 1); + data = REG_SET_FIELD(data, BIF_BIF256_CI256_RC3X4_USB4_PCIE_MST_CTRL_3, + CI_SWUS_MAX_READ_REQUEST_SIZE_PRIV, 1); + + if (def != data) + WREG32_SOC15(NBIO, 0, regBIF_BIF256_CI256_RC3X4_USB4_PCIE_MST_CTRL_3, data); + +} + +static void nbio_v7_11_update_medium_grain_clock_gating(struct amdgpu_device *adev, + bool enable) +{ + uint32_t def, data; + + if (!(adev->cg_flags & AMD_CG_SUPPORT_BIF_MGCG)) + return; + + def = data = RREG32_SOC15(NBIO, 0, regBIF_BIF256_CI256_RC3X4_USB4_CPM_CONTROL); + if (enable) { + data |= (BIF_BIF256_CI256_RC3X4_USB4_CPM_CONTROL__LCLK_DYN_GATE_ENABLE_MASK | + BIF_BIF256_CI256_RC3X4_USB4_CPM_CONTROL__TXCLK_DYN_GATE_ENABLE_MASK | + BIF_BIF256_CI256_RC3X4_USB4_CPM_CONTROL__TXCLK_LCNT_GATE_ENABLE_MASK | + BIF_BIF256_CI256_RC3X4_USB4_CPM_CONTROL__TXCLK_REGS_GATE_ENABLE_MASK | + BIF_BIF256_CI256_RC3X4_USB4_CPM_CONTROL__TXCLK_PRBS_GATE_ENABLE_MASK | + BIF_BIF256_CI256_RC3X4_USB4_CPM_CONTROL__REFCLK_REGS_GATE_ENABLE_MASK); + } else { + data &= ~(BIF_BIF256_CI256_RC3X4_USB4_CPM_CONTROL__LCLK_DYN_GATE_ENABLE_MASK | + BIF_BIF256_CI256_RC3X4_USB4_CPM_CONTROL__TXCLK_DYN_GATE_ENABLE_MASK | + BIF_BIF256_CI256_RC3X4_USB4_CPM_CONTROL__TXCLK_LCNT_GATE_ENABLE_MASK | + BIF_BIF256_CI256_RC3X4_USB4_CPM_CONTROL__TXCLK_REGS_GATE_ENABLE_MASK | + BIF_BIF256_CI256_RC3X4_USB4_CPM_CONTROL__TXCLK_PRBS_GATE_ENABLE_MASK | + BIF_BIF256_CI256_RC3X4_USB4_CPM_CONTROL__REFCLK_REGS_GATE_ENABLE_MASK); + } + + if (def != data) + WREG32_SOC15(NBIO, 0, regBIF_BIF256_CI256_RC3X4_USB4_CPM_CONTROL, data); +} + +static void nbio_v7_11_update_medium_grain_light_sleep(struct amdgpu_device *adev, + bool enable) +{ + uint32_t def, data; + + if (!(adev->cg_flags & AMD_CG_SUPPORT_BIF_LS)) + return; + + def = data = RREG32_SOC15(NBIO, 0, regBIF_BIF256_CI256_RC3X4_USB4_PCIE_CNTL2); + if (enable) + data |= BIF_BIF256_CI256_RC3X4_USB4_PCIE_CNTL2__SLV_MEM_LS_EN_MASK; + else + data &= ~BIF_BIF256_CI256_RC3X4_USB4_PCIE_CNTL2__SLV_MEM_LS_EN_MASK; + + if (def != data) + WREG32_SOC15(NBIO, 0, regBIF_BIF256_CI256_RC3X4_USB4_PCIE_CNTL2, data); + + def = data = RREG32_SOC15(NBIO, 0, regBIF_BIF256_CI256_RC3X4_USB4_PCIE_TX_POWER_CTRL_1); + if (enable) { + data |= (BIF_BIF256_CI256_RC3X4_USB4_PCIE_TX_POWER_CTRL_1__MST_MEM_LS_EN_MASK | + BIF_BIF256_CI256_RC3X4_USB4_PCIE_TX_POWER_CTRL_1__REPLAY_MEM_LS_EN_MASK); + } else { + data &= ~(BIF_BIF256_CI256_RC3X4_USB4_PCIE_TX_POWER_CTRL_1__MST_MEM_LS_EN_MASK | + BIF_BIF256_CI256_RC3X4_USB4_PCIE_TX_POWER_CTRL_1__REPLAY_MEM_LS_EN_MASK); + } + + if (def != data) + WREG32_SOC15(NBIO, 0, regBIF_BIF256_CI256_RC3X4_USB4_PCIE_TX_POWER_CTRL_1, data); +} + +static void nbio_v7_11_get_clockgating_state(struct amdgpu_device *adev, + u64 *flags) +{ + uint32_t data; + + /* AMD_CG_SUPPORT_BIF_MGCG */ + data = RREG32_SOC15(NBIO, 0, regBIF_BIF256_CI256_RC3X4_USB4_CPM_CONTROL); + if (data & BIF_BIF256_CI256_RC3X4_USB4_CPM_CONTROL__LCLK_DYN_GATE_ENABLE_MASK) + *flags |= AMD_CG_SUPPORT_BIF_MGCG; + + /* AMD_CG_SUPPORT_BIF_LS */ + data = RREG32_SOC15(NBIO, 0, regBIF_BIF256_CI256_RC3X4_USB4_PCIE_CNTL2); + if (data & BIF_BIF256_CI256_RC3X4_USB4_PCIE_CNTL2__SLV_MEM_LS_EN_MASK) + *flags |= AMD_CG_SUPPORT_BIF_LS; +} + +const struct amdgpu_nbio_funcs nbio_v7_11_funcs = { + .get_hdp_flush_req_offset = nbio_v7_11_get_hdp_flush_req_offset, + .get_hdp_flush_done_offset = nbio_v7_11_get_hdp_flush_done_offset, + .get_pcie_index_offset = nbio_v7_11_get_pcie_index_offset, + .get_pcie_data_offset = nbio_v7_11_get_pcie_data_offset, + .get_pcie_port_index_offset = nbio_v7_11_get_pcie_port_index_offset, + .get_pcie_port_data_offset = nbio_v7_11_get_pcie_port_data_offset, + .get_rev_id = nbio_v7_11_get_rev_id, + .mc_access_enable = nbio_v7_11_mc_access_enable, + .get_memsize = nbio_v7_11_get_memsize, + .sdma_doorbell_range = nbio_v7_11_sdma_doorbell_range, + .vcn_doorbell_range = nbio_v7_11_vcn_doorbell_range, + .vpe_doorbell_range = nbio_v7_11_vpe_doorbell_range, + .enable_doorbell_aperture = nbio_v7_11_enable_doorbell_aperture, + .enable_doorbell_selfring_aperture = nbio_v7_11_enable_doorbell_selfring_aperture, + .ih_doorbell_range = nbio_v7_11_ih_doorbell_range, + .update_medium_grain_clock_gating = nbio_v7_11_update_medium_grain_clock_gating, + .update_medium_grain_light_sleep = nbio_v7_11_update_medium_grain_light_sleep, + .get_clockgating_state = nbio_v7_11_get_clockgating_state, + .ih_control = nbio_v7_11_ih_control, + .init_registers = nbio_v7_11_init_registers, + .remap_hdp_registers = nbio_v7_11_remap_hdp_registers, +}; diff --git a/drivers/gpu/drm/amd/amdgpu/nbio_v7_11.h b/drivers/gpu/drm/amd/amdgpu/nbio_v7_11.h new file mode 100644 index 000000000000..9d8258ed3f0a --- /dev/null +++ b/drivers/gpu/drm/amd/amdgpu/nbio_v7_11.h @@ -0,0 +1,33 @@ +/* + * Copyright 2021 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + */ + +#ifndef __NBIO_V7_11_H__ +#define __NBIO_V7_11_H__ + +#include "soc15_common.h" + +extern const struct nbio_hdp_flush_reg nbio_v7_11_hdp_flush_reg; +extern const struct amdgpu_nbio_funcs nbio_v7_11_funcs; +extern const struct amdgpu_nbio_ras_funcs nbio_v7_11_ras_funcs; + +#endif diff --git a/drivers/gpu/drm/amd/amdgpu/nbio_v7_2.c b/drivers/gpu/drm/amd/amdgpu/nbio_v7_2.c index 4ef1fa4603c8..e962821ae6a1 100644 --- a/drivers/gpu/drm/amd/amdgpu/nbio_v7_2.c +++ b/drivers/gpu/drm/amd/amdgpu/nbio_v7_2.c @@ -59,7 +59,7 @@ static u32 nbio_v7_2_get_rev_id(struct amdgpu_device *adev) { u32 tmp; - switch (adev->ip_versions[NBIO_HWIP][0]) { + switch (amdgpu_ip_version(adev, NBIO_HWIP, 0)) { case IP_VERSION(7, 2, 1): case IP_VERSION(7, 3, 0): case IP_VERSION(7, 5, 0): @@ -78,7 +78,7 @@ static u32 nbio_v7_2_get_rev_id(struct amdgpu_device *adev) static void nbio_v7_2_mc_access_enable(struct amdgpu_device *adev, bool enable) { - switch (adev->ip_versions[NBIO_HWIP][0]) { + switch (amdgpu_ip_version(adev, NBIO_HWIP, 0)) { case IP_VERSION(7, 2, 1): case IP_VERSION(7, 3, 0): case IP_VERSION(7, 5, 0): @@ -262,7 +262,7 @@ static void nbio_v7_2_update_medium_grain_light_sleep(struct amdgpu_device *adev { uint32_t def, data; - switch (adev->ip_versions[NBIO_HWIP][0]) { + switch (amdgpu_ip_version(adev, NBIO_HWIP, 0)) { case IP_VERSION(7, 2, 1): case IP_VERSION(7, 3, 0): case IP_VERSION(7, 5, 0): @@ -369,7 +369,7 @@ const struct nbio_hdp_flush_reg nbio_v7_2_hdp_flush_reg = { static void nbio_v7_2_init_registers(struct amdgpu_device *adev) { uint32_t def, data; - switch (adev->ip_versions[NBIO_HWIP][0]) { + switch (amdgpu_ip_version(adev, NBIO_HWIP, 0)) { case IP_VERSION(7, 2, 1): case IP_VERSION(7, 3, 0): case IP_VERSION(7, 5, 0): @@ -394,7 +394,7 @@ static void nbio_v7_2_init_registers(struct amdgpu_device *adev) break; } - switch (adev->ip_versions[NBIO_HWIP][0]) { + switch (amdgpu_ip_version(adev, NBIO_HWIP, 0)) { case IP_VERSION(7, 3, 0): case IP_VERSION(7, 5, 1): data = RREG32_SOC15(NBIO, 0, regRCC_DEV2_EPF0_STRAP2); diff --git a/drivers/gpu/drm/amd/amdgpu/nbio_v7_4.c b/drivers/gpu/drm/amd/amdgpu/nbio_v7_4.c index 685abf57ffdd..6d24c84924cb 100644 --- a/drivers/gpu/drm/amd/amdgpu/nbio_v7_4.c +++ b/drivers/gpu/drm/amd/amdgpu/nbio_v7_4.c @@ -347,7 +347,7 @@ static void nbio_v7_4_init_registers(struct amdgpu_device *adev) adev->rmmio_remap.reg_offset = SOC15_REG_OFFSET(NBIO, 0, mmBIF_BX_DEV0_EPF0_VF0_HDP_MEM_COHERENCY_FLUSH_CNTL) << 2; - if (adev->ip_versions[NBIO_HWIP][0] == IP_VERSION(7, 4, 4) && + if (amdgpu_ip_version(adev, NBIO_HWIP, 0) == IP_VERSION(7, 4, 4) && !amdgpu_sriov_vf(adev)) { baco_cntl = RREG32_SOC15(NBIO, 0, mmBACO_CNTL); if (baco_cntl & @@ -365,9 +365,12 @@ static void nbio_v7_4_handle_ras_controller_intr_no_bifring(struct amdgpu_device { uint32_t bif_doorbell_intr_cntl; struct ras_manager *obj = amdgpu_ras_find_obj(adev, adev->nbio.ras_if); - struct ras_err_data err_data = {0, 0, 0, NULL}; + struct ras_err_data err_data; struct amdgpu_ras *ras = amdgpu_ras_get_context(adev); + if (amdgpu_ras_error_data_init(&err_data)) + return; + if (adev->asic_type == CHIP_ALDEBARAN) bif_doorbell_intr_cntl = RREG32_SOC15(NBIO, 0, mmBIF_DOORBELL_INT_CNTL_ALDE); else @@ -418,6 +421,8 @@ static void nbio_v7_4_handle_ras_controller_intr_no_bifring(struct amdgpu_device */ amdgpu_ras_reset_gpu(adev); } + + amdgpu_ras_error_data_fini(&err_data); } static void nbio_v7_4_handle_ras_err_event_athub_intr_no_bifring(struct amdgpu_device *adev) @@ -702,7 +707,7 @@ static void nbio_v7_4_program_aspm(struct amdgpu_device *adev) #ifdef CONFIG_PCIEASPM uint32_t def, data; - if (adev->ip_versions[NBIO_HWIP][0] == IP_VERSION(7, 4, 4)) + if (amdgpu_ip_version(adev, NBIO_HWIP, 0) == IP_VERSION(7, 4, 4)) return; def = data = RREG32_PCIE(smnPCIE_LC_CNTL); diff --git a/drivers/gpu/drm/amd/amdgpu/nbio_v7_7.c b/drivers/gpu/drm/amd/amdgpu/nbio_v7_7.c index def89379b51a..4df1055e640a 100644 --- a/drivers/gpu/drm/amd/amdgpu/nbio_v7_7.c +++ b/drivers/gpu/drm/amd/amdgpu/nbio_v7_7.c @@ -254,7 +254,7 @@ static void nbio_v7_7_update_medium_grain_clock_gating(struct amdgpu_device *ade { uint32_t def, data; - if (enable && !(adev->cg_flags & AMD_CG_SUPPORT_BIF_MGCG)) + if (!(adev->cg_flags & AMD_CG_SUPPORT_BIF_MGCG)) return; def = data = RREG32_SOC15(NBIO, 0, regBIF0_CPM_CONTROL); @@ -283,7 +283,7 @@ static void nbio_v7_7_update_medium_grain_light_sleep(struct amdgpu_device *adev { uint32_t def, data; - if (enable && !(adev->cg_flags & AMD_CG_SUPPORT_BIF_LS)) + if (!(adev->cg_flags & AMD_CG_SUPPORT_BIF_LS)) return; def = data = RREG32_SOC15(NBIO, 0, regBIF0_PCIE_CNTL2); diff --git a/drivers/gpu/drm/amd/amdgpu/nbio_v7_9.c b/drivers/gpu/drm/amd/amdgpu/nbio_v7_9.c index cd1a02d30420..25a3da83e0fb 100644 --- a/drivers/gpu/drm/amd/amdgpu/nbio_v7_9.c +++ b/drivers/gpu/drm/amd/amdgpu/nbio_v7_9.c @@ -32,6 +32,18 @@ #define NPS_MODE_MASK 0x000000FFL +/* Core 0 Port 0 counter */ +#define smnPCIEP_NAK_COUNTER 0x1A340218 + +#define smnPCIE_PERF_CNTL_TXCLK3 0x1A38021c +#define smnPCIE_PERF_CNTL_TXCLK7 0x1A380888 +#define smnPCIE_PERF_COUNT_CNTL 0x1A380200 +#define smnPCIE_PERF_COUNT0_TXCLK3 0x1A380220 +#define smnPCIE_PERF_COUNT0_TXCLK7 0x1A38088C +#define smnPCIE_PERF_COUNT0_UPVAL_TXCLK3 0x1A3808F8 +#define smnPCIE_PERF_COUNT0_UPVAL_TXCLK7 0x1A380918 + + static void nbio_v7_9_remap_hdp_registers(struct amdgpu_device *adev) { WREG32_SOC15(NBIO, 0, regBIF_BX0_REMAP_HDP_MEM_FLUSH_CNTL, @@ -44,8 +56,15 @@ static u32 nbio_v7_9_get_rev_id(struct amdgpu_device *adev) { u32 tmp; + tmp = IP_VERSION_SUBREV(amdgpu_ip_version_full(adev, NBIO_HWIP, 0)); + /* If it is VF or subrevision holds a non-zero value, that should be used */ + if (tmp || amdgpu_sriov_vf(adev)) + return tmp; + + /* If discovery subrev is not updated, use register version */ tmp = RREG32_SOC15(NBIO, 0, regRCC_STRAP0_RCC_DEV0_EPF0_STRAP0); - tmp = REG_GET_FIELD(tmp, RCC_STRAP0_RCC_DEV0_EPF0_STRAP0, STRAP_ATI_REV_ID_DEV0_F0); + tmp = REG_GET_FIELD(tmp, RCC_STRAP0_RCC_DEV0_EPF0_STRAP0, + STRAP_ATI_REV_ID_DEV0_F0); return tmp; } @@ -161,8 +180,6 @@ static void nbio_v7_9_sdma_doorbell_range(struct amdgpu_device *adev, int instan default: break; } - - return; } static void nbio_v7_9_vcn_doorbell_range(struct amdgpu_device *adev, bool use_doorbell, @@ -425,6 +442,93 @@ static void nbio_v7_9_init_registers(struct amdgpu_device *adev) XCC_DOORBELL_FENCE__SHUB_SLV_MODE_MASK); } + + if (!amdgpu_sriov_vf(adev)) { + u32 baco_cntl; + for_each_inst(i, adev->aid_mask) { + baco_cntl = RREG32_SOC15(NBIO, i, regBIF_BX0_BACO_CNTL); + if (baco_cntl & (BIF_BX0_BACO_CNTL__BACO_DUMMY_EN_MASK | + BIF_BX0_BACO_CNTL__BACO_EN_MASK)) { + baco_cntl &= ~( + BIF_BX0_BACO_CNTL__BACO_DUMMY_EN_MASK | + BIF_BX0_BACO_CNTL__BACO_EN_MASK); + dev_dbg(adev->dev, + "Unsetting baco dummy mode %x", + baco_cntl); + WREG32_SOC15(NBIO, i, regBIF_BX0_BACO_CNTL, + baco_cntl); + } + } + } +} + +static u64 nbio_v7_9_get_pcie_replay_count(struct amdgpu_device *adev) +{ + u32 val, nak_r, nak_g; + + if (adev->flags & AMD_IS_APU) + return 0; + + /* Get the number of NAKs received and generated */ + val = RREG32_PCIE(smnPCIEP_NAK_COUNTER); + nak_r = val & 0xFFFF; + nak_g = val >> 16; + + /* Add the total number of NAKs, i.e the number of replays */ + return (nak_r + nak_g); +} + +static void nbio_v7_9_get_pcie_usage(struct amdgpu_device *adev, uint64_t *count0, + uint64_t *count1) +{ + uint32_t perfctrrx = 0; + uint32_t perfctrtx = 0; + + /* This reports 0 on APUs, so return to avoid writing/reading registers + * that may or may not be different from their GPU counterparts + */ + if (adev->flags & AMD_IS_APU) + return; + + /* Use TXCLK3 counter group for rx event */ + /* Use TXCLK7 counter group for tx event */ + /* Set the 2 events that we wish to watch, defined above */ + /* 40 is event# for received msgs */ + /* 2 is event# of posted requests sent */ + perfctrrx = REG_SET_FIELD(perfctrrx, PCIE_PERF_CNTL_TXCLK3, EVENT0_SEL, 40); + perfctrtx = REG_SET_FIELD(perfctrtx, PCIE_PERF_CNTL_TXCLK7, EVENT0_SEL, 2); + + /* Write to enable desired perf counters */ + WREG32_PCIE(smnPCIE_PERF_CNTL_TXCLK3, perfctrrx); + WREG32_PCIE(smnPCIE_PERF_CNTL_TXCLK7, perfctrtx); + + /* Zero out and enable SHADOW_WR + * Write 0x6: + * Bit 1 = Global Shadow wr(1) + * Bit 2 = Global counter reset enable(1) + */ + WREG32_PCIE(smnPCIE_PERF_COUNT_CNTL, 0x00000006); + + /* Enable Gloabl Counter + * Write 0x1: + * Bit 0 = Global Counter Enable(1) + */ + WREG32_PCIE(smnPCIE_PERF_COUNT_CNTL, 0x00000001); + + msleep(1000); + + /* Disable Global Counter, Reset and enable SHADOW_WR + * Write 0x6: + * Bit 1 = Global Shadow wr(1) + * Bit 2 = Global counter reset enable(1) + */ + WREG32_PCIE(smnPCIE_PERF_COUNT_CNTL, 0x00000006); + + /* Get the upper and lower count */ + *count0 = RREG32_PCIE(smnPCIE_PERF_COUNT0_TXCLK3) | + ((uint64_t)RREG32_PCIE(smnPCIE_PERF_COUNT0_UPVAL_TXCLK3) << 32); + *count1 = RREG32_PCIE(smnPCIE_PERF_COUNT0_TXCLK7) | + ((uint64_t)RREG32_PCIE(smnPCIE_PERF_COUNT0_UPVAL_TXCLK7) << 32); } const struct amdgpu_nbio_funcs nbio_v7_9_funcs = { @@ -450,4 +554,192 @@ const struct amdgpu_nbio_funcs nbio_v7_9_funcs = { .get_compute_partition_mode = nbio_v7_9_get_compute_partition_mode, .get_memory_partition_mode = nbio_v7_9_get_memory_partition_mode, .init_registers = nbio_v7_9_init_registers, + .get_pcie_replay_count = nbio_v7_9_get_pcie_replay_count, + .get_pcie_usage = nbio_v7_9_get_pcie_usage, +}; + +static void nbio_v7_9_query_ras_error_count(struct amdgpu_device *adev, + void *ras_error_status) +{ +} + +static void nbio_v7_9_handle_ras_controller_intr_no_bifring(struct amdgpu_device *adev) +{ + uint32_t bif_doorbell_intr_cntl; + struct ras_manager *obj = amdgpu_ras_find_obj(adev, adev->nbio.ras_if); + struct ras_err_data err_data; + struct amdgpu_ras *ras = amdgpu_ras_get_context(adev); + + if (amdgpu_ras_error_data_init(&err_data)) + return; + + bif_doorbell_intr_cntl = RREG32_SOC15(NBIO, 0, regBIF_BX0_BIF_DOORBELL_INT_CNTL); + + if (REG_GET_FIELD(bif_doorbell_intr_cntl, + BIF_BX0_BIF_DOORBELL_INT_CNTL, RAS_CNTLR_INTERRUPT_STATUS)) { + /* driver has to clear the interrupt status when bif ring is disabled */ + bif_doorbell_intr_cntl = REG_SET_FIELD(bif_doorbell_intr_cntl, + BIF_BX0_BIF_DOORBELL_INT_CNTL, + RAS_CNTLR_INTERRUPT_CLEAR, 1); + WREG32_SOC15(NBIO, 0, regBIF_BX0_BIF_DOORBELL_INT_CNTL, bif_doorbell_intr_cntl); + + if (!ras->disable_ras_err_cnt_harvest) { + /* + * clear error status after ras_controller_intr + * according to hw team and count ue number + * for query + */ + nbio_v7_9_query_ras_error_count(adev, &err_data); + + /* logging on error cnt and printing for awareness */ + obj->err_data.ue_count += err_data.ue_count; + obj->err_data.ce_count += err_data.ce_count; + + if (err_data.ce_count) + dev_info(adev->dev, "%ld correctable hardware " + "errors detected in %s block, " + "no user action is needed.\n", + obj->err_data.ce_count, + get_ras_block_str(adev->nbio.ras_if)); + + if (err_data.ue_count) + dev_info(adev->dev, "%ld uncorrectable hardware " + "errors detected in %s block\n", + obj->err_data.ue_count, + get_ras_block_str(adev->nbio.ras_if)); + } + + dev_info(adev->dev, "RAS controller interrupt triggered " + "by NBIF error\n"); + } + + amdgpu_ras_error_data_fini(&err_data); +} + +static void nbio_v7_9_handle_ras_err_event_athub_intr_no_bifring(struct amdgpu_device *adev) +{ + uint32_t bif_doorbell_intr_cntl; + + bif_doorbell_intr_cntl = RREG32_SOC15(NBIO, 0, regBIF_BX0_BIF_DOORBELL_INT_CNTL); + + if (REG_GET_FIELD(bif_doorbell_intr_cntl, + BIF_BX0_BIF_DOORBELL_INT_CNTL, RAS_ATHUB_ERR_EVENT_INTERRUPT_STATUS)) { + /* driver has to clear the interrupt status when bif ring is disabled */ + bif_doorbell_intr_cntl = REG_SET_FIELD(bif_doorbell_intr_cntl, + BIF_BX0_BIF_DOORBELL_INT_CNTL, + RAS_ATHUB_ERR_EVENT_INTERRUPT_CLEAR, 1); + + WREG32_SOC15(NBIO, 0, regBIF_BX0_BIF_DOORBELL_INT_CNTL, bif_doorbell_intr_cntl); + + amdgpu_ras_global_ras_isr(adev); + } +} + +static int nbio_v7_9_set_ras_controller_irq_state(struct amdgpu_device *adev, + struct amdgpu_irq_src *src, + unsigned type, + enum amdgpu_interrupt_state state) +{ + /* Dummy function, there is no initialization operation in driver */ + + return 0; +} + +static int nbio_v7_9_process_ras_controller_irq(struct amdgpu_device *adev, + struct amdgpu_irq_src *source, + struct amdgpu_iv_entry *entry) +{ + /* By design, the ih cookie for ras_controller_irq should be written + * to BIFring instead of general iv ring. However, due to known bif ring + * hw bug, it has to be disabled. There is no chance the process function + * will be involked. Just left it as a dummy one. + */ + return 0; +} + +static int nbio_v7_9_set_ras_err_event_athub_irq_state(struct amdgpu_device *adev, + struct amdgpu_irq_src *src, + unsigned type, + enum amdgpu_interrupt_state state) +{ + /* Dummy function, there is no initialization operation in driver */ + + return 0; +} + +static int nbio_v7_9_process_err_event_athub_irq(struct amdgpu_device *adev, + struct amdgpu_irq_src *source, + struct amdgpu_iv_entry *entry) +{ + /* By design, the ih cookie for err_event_athub_irq should be written + * to BIFring instead of general iv ring. However, due to known bif ring + * hw bug, it has to be disabled. There is no chance the process function + * will be involked. Just left it as a dummy one. + */ + return 0; +} + +static const struct amdgpu_irq_src_funcs nbio_v7_9_ras_controller_irq_funcs = { + .set = nbio_v7_9_set_ras_controller_irq_state, + .process = nbio_v7_9_process_ras_controller_irq, +}; + +static const struct amdgpu_irq_src_funcs nbio_v7_9_ras_err_event_athub_irq_funcs = { + .set = nbio_v7_9_set_ras_err_event_athub_irq_state, + .process = nbio_v7_9_process_err_event_athub_irq, +}; + +static int nbio_v7_9_init_ras_controller_interrupt (struct amdgpu_device *adev) +{ + int r; + + /* init the irq funcs */ + adev->nbio.ras_controller_irq.funcs = + &nbio_v7_9_ras_controller_irq_funcs; + adev->nbio.ras_controller_irq.num_types = 1; + + /* register ras controller interrupt */ + r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_BIF, + NBIF_7_4__SRCID__RAS_CONTROLLER_INTERRUPT, + &adev->nbio.ras_controller_irq); + + return r; +} + +static int nbio_v7_9_init_ras_err_event_athub_interrupt (struct amdgpu_device *adev) +{ + + int r; + + /* init the irq funcs */ + adev->nbio.ras_err_event_athub_irq.funcs = + &nbio_v7_9_ras_err_event_athub_irq_funcs; + adev->nbio.ras_err_event_athub_irq.num_types = 1; + + /* register ras err event athub interrupt */ + r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_BIF, + NBIF_7_4__SRCID__ERREVENT_ATHUB_INTERRUPT, + &adev->nbio.ras_err_event_athub_irq); + + return r; +} + +const struct amdgpu_ras_block_hw_ops nbio_v7_9_ras_hw_ops = { + .query_ras_error_count = nbio_v7_9_query_ras_error_count, +}; + +struct amdgpu_nbio_ras nbio_v7_9_ras = { + .ras_block = { + .ras_comm = { + .name = "pcie_bif", + .block = AMDGPU_RAS_BLOCK__PCIE_BIF, + .type = AMDGPU_RAS_ERROR__MULTI_UNCORRECTABLE, + }, + .hw_ops = &nbio_v7_9_ras_hw_ops, + .ras_late_init = amdgpu_nbio_ras_late_init, + }, + .handle_ras_controller_intr_no_bifring = nbio_v7_9_handle_ras_controller_intr_no_bifring, + .handle_ras_err_event_athub_intr_no_bifring = nbio_v7_9_handle_ras_err_event_athub_intr_no_bifring, + .init_ras_controller_interrupt = nbio_v7_9_init_ras_controller_interrupt, + .init_ras_err_event_athub_interrupt = nbio_v7_9_init_ras_err_event_athub_interrupt, }; diff --git a/drivers/gpu/drm/amd/amdgpu/nbio_v7_9.h b/drivers/gpu/drm/amd/amdgpu/nbio_v7_9.h index 8e04eb484328..73709771950d 100644 --- a/drivers/gpu/drm/amd/amdgpu/nbio_v7_9.h +++ b/drivers/gpu/drm/amd/amdgpu/nbio_v7_9.h @@ -28,5 +28,6 @@ extern const struct nbio_hdp_flush_reg nbio_v7_9_hdp_flush_reg; extern const struct amdgpu_nbio_funcs nbio_v7_9_funcs; +extern struct amdgpu_nbio_ras nbio_v7_9_ras; #endif diff --git a/drivers/gpu/drm/amd/amdgpu/nv.c b/drivers/gpu/drm/amd/amdgpu/nv.c index 51523b27a186..4d7976b77767 100644 --- a/drivers/gpu/drm/amd/amdgpu/nv.c +++ b/drivers/gpu/drm/amd/amdgpu/nv.c @@ -67,21 +67,18 @@ static const struct amd_ip_funcs nv_common_ip_funcs; /* Navi */ -static const struct amdgpu_video_codec_info nv_video_codecs_encode_array[] = -{ +static const struct amdgpu_video_codec_info nv_video_codecs_encode_array[] = { {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_MPEG4_AVC, 4096, 2304, 0)}, {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_HEVC, 4096, 2304, 0)}, }; -static const struct amdgpu_video_codecs nv_video_codecs_encode = -{ +static const struct amdgpu_video_codecs nv_video_codecs_encode = { .codec_count = ARRAY_SIZE(nv_video_codecs_encode_array), .codec_array = nv_video_codecs_encode_array, }; /* Navi1x */ -static const struct amdgpu_video_codec_info nv_video_codecs_decode_array[] = -{ +static const struct amdgpu_video_codec_info nv_video_codecs_decode_array[] = { {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_MPEG2, 4096, 4096, 3)}, {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_MPEG4, 4096, 4096, 5)}, {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_MPEG4_AVC, 4096, 4096, 52)}, @@ -91,8 +88,7 @@ static const struct amdgpu_video_codec_info nv_video_codecs_decode_array[] = {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_VP9, 8192, 4352, 0)}, }; -static const struct amdgpu_video_codecs nv_video_codecs_decode = -{ +static const struct amdgpu_video_codecs nv_video_codecs_decode = { .codec_count = ARRAY_SIZE(nv_video_codecs_decode_array), .codec_array = nv_video_codecs_decode_array, }; @@ -108,8 +104,7 @@ static const struct amdgpu_video_codecs sc_video_codecs_encode = { .codec_array = sc_video_codecs_encode_array, }; -static const struct amdgpu_video_codec_info sc_video_codecs_decode_array_vcn0[] = -{ +static const struct amdgpu_video_codec_info sc_video_codecs_decode_array_vcn0[] = { {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_MPEG2, 4096, 4096, 3)}, {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_MPEG4, 4096, 4096, 5)}, {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_MPEG4_AVC, 4096, 4096, 52)}, @@ -120,8 +115,7 @@ static const struct amdgpu_video_codec_info sc_video_codecs_decode_array_vcn0[] {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_AV1, 8192, 4352, 0)}, }; -static const struct amdgpu_video_codec_info sc_video_codecs_decode_array_vcn1[] = -{ +static const struct amdgpu_video_codec_info sc_video_codecs_decode_array_vcn1[] = { {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_MPEG2, 4096, 4096, 3)}, {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_MPEG4, 4096, 4096, 5)}, {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_MPEG4_AVC, 4096, 4096, 52)}, @@ -131,27 +125,23 @@ static const struct amdgpu_video_codec_info sc_video_codecs_decode_array_vcn1[] {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_VP9, 8192, 4352, 0)}, }; -static const struct amdgpu_video_codecs sc_video_codecs_decode_vcn0 = -{ +static const struct amdgpu_video_codecs sc_video_codecs_decode_vcn0 = { .codec_count = ARRAY_SIZE(sc_video_codecs_decode_array_vcn0), .codec_array = sc_video_codecs_decode_array_vcn0, }; -static const struct amdgpu_video_codecs sc_video_codecs_decode_vcn1 = -{ +static const struct amdgpu_video_codecs sc_video_codecs_decode_vcn1 = { .codec_count = ARRAY_SIZE(sc_video_codecs_decode_array_vcn1), .codec_array = sc_video_codecs_decode_array_vcn1, }; /* SRIOV Sienna Cichlid, not const since data is controlled by host */ -static struct amdgpu_video_codec_info sriov_sc_video_codecs_encode_array[] = -{ +static struct amdgpu_video_codec_info sriov_sc_video_codecs_encode_array[] = { {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_MPEG4_AVC, 4096, 2160, 0)}, {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_HEVC, 7680, 4352, 0)}, }; -static struct amdgpu_video_codec_info sriov_sc_video_codecs_decode_array_vcn0[] = -{ +static struct amdgpu_video_codec_info sriov_sc_video_codecs_decode_array_vcn0[] = { {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_MPEG2, 4096, 4096, 3)}, {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_MPEG4, 4096, 4096, 5)}, {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_MPEG4_AVC, 4096, 4096, 52)}, @@ -162,8 +152,7 @@ static struct amdgpu_video_codec_info sriov_sc_video_codecs_decode_array_vcn0[] {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_AV1, 8192, 4352, 0)}, }; -static struct amdgpu_video_codec_info sriov_sc_video_codecs_decode_array_vcn1[] = -{ +static struct amdgpu_video_codec_info sriov_sc_video_codecs_decode_array_vcn1[] = { {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_MPEG2, 4096, 4096, 3)}, {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_MPEG4, 4096, 4096, 5)}, {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_MPEG4_AVC, 4096, 4096, 52)}, @@ -173,20 +162,17 @@ static struct amdgpu_video_codec_info sriov_sc_video_codecs_decode_array_vcn1[] {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_VP9, 8192, 4352, 0)}, }; -static struct amdgpu_video_codecs sriov_sc_video_codecs_encode = -{ +static struct amdgpu_video_codecs sriov_sc_video_codecs_encode = { .codec_count = ARRAY_SIZE(sriov_sc_video_codecs_encode_array), .codec_array = sriov_sc_video_codecs_encode_array, }; -static struct amdgpu_video_codecs sriov_sc_video_codecs_decode_vcn0 = -{ +static struct amdgpu_video_codecs sriov_sc_video_codecs_decode_vcn0 = { .codec_count = ARRAY_SIZE(sriov_sc_video_codecs_decode_array_vcn0), .codec_array = sriov_sc_video_codecs_decode_array_vcn0, }; -static struct amdgpu_video_codecs sriov_sc_video_codecs_decode_vcn1 = -{ +static struct amdgpu_video_codecs sriov_sc_video_codecs_decode_vcn1 = { .codec_count = ARRAY_SIZE(sriov_sc_video_codecs_decode_array_vcn1), .codec_array = sriov_sc_video_codecs_decode_array_vcn1, }; @@ -228,7 +214,7 @@ static int nv_query_video_codecs(struct amdgpu_device *adev, bool encode, if (adev->vcn.num_vcn_inst == hweight8(adev->vcn.harvest_config)) return -EINVAL; - switch (adev->ip_versions[UVD_HWIP][0]) { + switch (amdgpu_ip_version(adev, UVD_HWIP, 0)) { case IP_VERSION(3, 0, 0): case IP_VERSION(3, 0, 64): case IP_VERSION(3, 0, 192): @@ -467,7 +453,7 @@ nv_asic_reset_method(struct amdgpu_device *adev) dev_warn(adev->dev, "Specified reset method:%d isn't supported, using AUTO instead.\n", amdgpu_reset_method); - switch (adev->ip_versions[MP1_HWIP][0]) { + switch (amdgpu_ip_version(adev, MP1_HWIP, 0)) { case IP_VERSION(11, 5, 0): case IP_VERSION(13, 0, 1): case IP_VERSION(13, 0, 3): @@ -527,17 +513,15 @@ static int nv_set_vce_clocks(struct amdgpu_device *adev, u32 evclk, u32 ecclk) static void nv_program_aspm(struct amdgpu_device *adev) { - if (!amdgpu_device_should_use_aspm(adev) || !amdgpu_device_aspm_support_quirk()) + if (!amdgpu_device_should_use_aspm(adev)) return; - if (!(adev->flags & AMD_IS_APU) && - (adev->nbio.funcs->program_aspm)) + if (adev->nbio.funcs->program_aspm) adev->nbio.funcs->program_aspm(adev); } -const struct amdgpu_ip_block_version nv_common_ip_block = -{ +const struct amdgpu_ip_block_version nv_common_ip_block = { .type = AMD_IP_BLOCK_TYPE_COMMON, .major = 1, .minor = 0, @@ -572,16 +556,6 @@ static bool nv_need_reset_on_init(struct amdgpu_device *adev) return false; } -static uint64_t nv_get_pcie_replay_count(struct amdgpu_device *adev) -{ - - /* TODO - * dummy implement for pcie_replay_count sysfs interface - * */ - - return 0; -} - static void nv_init_doorbell_index(struct amdgpu_device *adev) { adev->doorbell_index.kiq = AMDGPU_NAVI10_DOORBELL_KIQ; @@ -634,16 +608,14 @@ static int nv_update_umd_stable_pstate(struct amdgpu_device *adev, if (adev->gfx.funcs->update_perfmon_mgcg) adev->gfx.funcs->update_perfmon_mgcg(adev, !enter); - if (!(adev->flags & AMD_IS_APU) && - (adev->nbio.funcs->enable_aspm) && - amdgpu_device_should_use_aspm(adev)) + if (adev->nbio.funcs->enable_aspm && + amdgpu_device_should_use_aspm(adev)) adev->nbio.funcs->enable_aspm(adev, !enter); return 0; } -static const struct amdgpu_asic_funcs nv_asic_funcs = -{ +static const struct amdgpu_asic_funcs nv_asic_funcs = { .read_disabled_bios = &nv_read_disabled_bios, .read_bios_from_rom = &amdgpu_soc15_read_bios_from_rom, .read_register = &nv_read_register, @@ -656,7 +628,7 @@ static const struct amdgpu_asic_funcs nv_asic_funcs = .init_doorbell_index = &nv_init_doorbell_index, .need_full_reset = &nv_need_full_reset, .need_reset_on_init = &nv_need_reset_on_init, - .get_pcie_replay_count = &nv_get_pcie_replay_count, + .get_pcie_replay_count = &amdgpu_nbio_get_pcie_replay_count, .supports_baco = &amdgpu_dpm_is_baco_supported, .pre_asic_init = &nv_pre_asic_init, .update_umd_stable_pstate = &nv_update_umd_stable_pstate, @@ -695,7 +667,7 @@ static int nv_common_early_init(void *handle) /* TODO: split the GC and PG flags based on the relevant IP version for which * they are relevant. */ - switch (adev->ip_versions[GC_HWIP][0]) { + switch (amdgpu_ip_version(adev, GC_HWIP, 0)) { case IP_VERSION(10, 1, 10): adev->cg_flags = AMD_CG_SUPPORT_GFX_MGCG | AMD_CG_SUPPORT_GFX_CGCG | @@ -889,7 +861,8 @@ static int nv_common_early_init(void *handle) AMD_CG_SUPPORT_ATHUB_LS | AMD_CG_SUPPORT_IH_CG | AMD_CG_SUPPORT_VCN_MGCG | - AMD_CG_SUPPORT_JPEG_MGCG; + AMD_CG_SUPPORT_JPEG_MGCG | + AMD_CG_SUPPORT_SDMA_MGCG; adev->pg_flags = AMD_PG_SUPPORT_GFX_PG | AMD_PG_SUPPORT_VCN | AMD_PG_SUPPORT_VCN_DPG | @@ -950,7 +923,8 @@ static int nv_common_early_init(void *handle) AMD_CG_SUPPORT_ATHUB_LS | AMD_CG_SUPPORT_IH_CG | AMD_CG_SUPPORT_VCN_MGCG | - AMD_CG_SUPPORT_JPEG_MGCG; + AMD_CG_SUPPORT_JPEG_MGCG | + AMD_CG_SUPPORT_SDMA_MGCG; adev->pg_flags = AMD_PG_SUPPORT_VCN | AMD_PG_SUPPORT_VCN_DPG | AMD_PG_SUPPORT_JPEG | @@ -1097,7 +1071,7 @@ static int nv_common_set_clockgating_state(void *handle, if (amdgpu_sriov_vf(adev)) return 0; - switch (adev->ip_versions[NBIO_HWIP][0]) { + switch (amdgpu_ip_version(adev, NBIO_HWIP, 0)) { case IP_VERSION(2, 3, 0): case IP_VERSION(2, 3, 1): case IP_VERSION(2, 3, 2): @@ -1139,8 +1113,6 @@ static void nv_common_get_clockgating_state(void *handle, u64 *flags) adev->hdp.funcs->get_clock_gating_state(adev, flags); adev->smuio.funcs->get_clock_gating_state(adev, flags); - - return; } static const struct amd_ip_funcs nv_common_ip_funcs = { diff --git a/drivers/gpu/drm/amd/amdgpu/psp_gfx_if.h b/drivers/gpu/drm/amd/amdgpu/psp_gfx_if.h index 18917df785ec..4bb5e10217bb 100644 --- a/drivers/gpu/drm/amd/amdgpu/psp_gfx_if.h +++ b/drivers/gpu/drm/amd/amdgpu/psp_gfx_if.h @@ -293,6 +293,10 @@ enum psp_gfx_fw_type { GFX_FW_TYPE_RS64_MEC_P1_STACK = 95, /* RS64 MEC stack P1 SOC21 */ GFX_FW_TYPE_RS64_MEC_P2_STACK = 96, /* RS64 MEC stack P2 SOC21 */ GFX_FW_TYPE_RS64_MEC_P3_STACK = 97, /* RS64 MEC stack P3 SOC21 */ + GFX_FW_TYPE_VPEC_FW1 = 100, /* VPEC FW1 To Save VPE */ + GFX_FW_TYPE_VPEC_FW2 = 101, /* VPEC FW2 To Save VPE */ + GFX_FW_TYPE_VPE = 102, + GFX_FW_TYPE_P2S_TABLE = 129, GFX_FW_TYPE_MAX }; diff --git a/drivers/gpu/drm/amd/amdgpu/psp_v10_0.c b/drivers/gpu/drm/amd/amdgpu/psp_v10_0.c index 5f10883da6a2..145186a1e48f 100644 --- a/drivers/gpu/drm/amd/amdgpu/psp_v10_0.c +++ b/drivers/gpu/drm/amd/amdgpu/psp_v10_0.c @@ -58,9 +58,10 @@ static int psp_v10_0_init_microcode(struct psp_context *psp) return err; err = psp_init_ta_microcode(psp, ucode_prefix); - if ((adev->ip_versions[GC_HWIP][0] == IP_VERSION(9, 1, 0)) && - (adev->pdev->revision == 0xa1) && - (psp->securedisplay_context.context.bin_desc.fw_version >= 0x27000008)) { + if ((amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(9, 1, 0)) && + (adev->pdev->revision == 0xa1) && + (psp->securedisplay_context.context.bin_desc.fw_version >= + 0x27000008)) { adev->psp.securedisplay_context.context.bin_desc.size_bytes = 0; } return err; diff --git a/drivers/gpu/drm/amd/amdgpu/psp_v11_0.c b/drivers/gpu/drm/amd/amdgpu/psp_v11_0.c index 8f84fe40abbb..efa37e3b7931 100644 --- a/drivers/gpu/drm/amd/amdgpu/psp_v11_0.c +++ b/drivers/gpu/drm/amd/amdgpu/psp_v11_0.c @@ -95,7 +95,7 @@ static int psp_v11_0_init_microcode(struct psp_context *psp) amdgpu_ucode_ip_version_decode(adev, MP0_HWIP, ucode_prefix, sizeof(ucode_prefix)); - switch (adev->ip_versions[MP0_HWIP][0]) { + switch (amdgpu_ip_version(adev, MP0_HWIP, 0)) { case IP_VERSION(11, 0, 2): case IP_VERSION(11, 0, 4): err = psp_init_sos_microcode(psp, ucode_prefix); diff --git a/drivers/gpu/drm/amd/amdgpu/psp_v13_0.c b/drivers/gpu/drm/amd/amdgpu/psp_v13_0.c index af5685f4cb34..df1844d0800f 100644 --- a/drivers/gpu/drm/amd/amdgpu/psp_v13_0.c +++ b/drivers/gpu/drm/amd/amdgpu/psp_v13_0.c @@ -50,6 +50,8 @@ MODULE_FIRMWARE("amdgpu/psp_13_0_11_toc.bin"); MODULE_FIRMWARE("amdgpu/psp_13_0_11_ta.bin"); MODULE_FIRMWARE("amdgpu/psp_13_0_6_sos.bin"); MODULE_FIRMWARE("amdgpu/psp_13_0_6_ta.bin"); +MODULE_FIRMWARE("amdgpu/psp_14_0_0_toc.bin"); +MODULE_FIRMWARE("amdgpu/psp_14_0_0_ta.bin"); /* For large FW files the time to complete can be very long */ #define USBC_PD_POLLING_LIMIT_S 240 @@ -57,6 +59,9 @@ MODULE_FIRMWARE("amdgpu/psp_13_0_6_ta.bin"); /* Read USB-PD from LFB */ #define GFX_CMD_USB_PD_USE_LFB 0x480 +/* Retry times for vmbx ready wait */ +#define PSP_VMBX_POLLING_LIMIT 3000 + /* VBIOS gfl defines */ #define MBOX_READY_MASK 0x80000000 #define MBOX_STATUS_MASK 0x0000FFFF @@ -77,7 +82,7 @@ static int psp_v13_0_init_microcode(struct psp_context *psp) amdgpu_ucode_ip_version_decode(adev, MP0_HWIP, ucode_prefix, sizeof(ucode_prefix)); - switch (adev->ip_versions[MP0_HWIP][0]) { + switch (amdgpu_ip_version(adev, MP0_HWIP, 0)) { case IP_VERSION(13, 0, 2): err = psp_init_sos_microcode(psp, ucode_prefix); if (err) @@ -94,6 +99,7 @@ static int psp_v13_0_init_microcode(struct psp_context *psp) case IP_VERSION(13, 0, 5): case IP_VERSION(13, 0, 8): case IP_VERSION(13, 0, 11): + case IP_VERSION(14, 0, 0): err = psp_init_toc_microcode(psp, ucode_prefix); if (err) return err; @@ -130,19 +136,43 @@ static bool psp_v13_0_is_sos_alive(struct psp_context *psp) return sol_reg != 0x0; } -static int psp_v13_0_wait_for_bootloader(struct psp_context *psp) +static int psp_v13_0_wait_for_vmbx_ready(struct psp_context *psp) { struct amdgpu_device *adev = psp->adev; + int retry_loop, ret; - int ret; - int retry_loop; + for (retry_loop = 0; retry_loop < PSP_VMBX_POLLING_LIMIT; retry_loop++) { + /* Wait for bootloader to signify that is + ready having bit 31 of C2PMSG_33 set to 1 */ + ret = psp_wait_for( + psp, SOC15_REG_OFFSET(MP0, 0, regMP0_SMN_C2PMSG_33), + 0x80000000, 0xffffffff, false); + + if (ret == 0) + break; + } + + if (ret) + dev_warn(adev->dev, "Bootloader wait timed out"); + + return ret; +} + +static int psp_v13_0_wait_for_bootloader(struct psp_context *psp) +{ + struct amdgpu_device *adev = psp->adev; + int retry_loop, retry_cnt, ret; + retry_cnt = + (amdgpu_ip_version(adev, MP0_HWIP, 0) == IP_VERSION(13, 0, 6)) ? + PSP_VMBX_POLLING_LIMIT : + 10; /* Wait for bootloader to signify that it is ready having bit 31 of * C2PMSG_35 set to 1. All other bits are expected to be cleared. * If there is an error in processing command, bits[7:0] will be set. * This is applicable for PSP v13.0.6 and newer. */ - for (retry_loop = 0; retry_loop < 10; retry_loop++) { + for (retry_loop = 0; retry_loop < retry_cnt; retry_loop++) { ret = psp_wait_for( psp, SOC15_REG_OFFSET(MP0, 0, regMP0_SMN_C2PMSG_35), 0x80000000, 0xffffffff, false); @@ -154,6 +184,19 @@ static int psp_v13_0_wait_for_bootloader(struct psp_context *psp) return ret; } +static int psp_v13_0_wait_for_bootloader_steady_state(struct psp_context *psp) +{ + struct amdgpu_device *adev = psp->adev; + + if (amdgpu_ip_version(adev, MP0_HWIP, 0) == IP_VERSION(13, 0, 6)) { + psp_v13_0_wait_for_vmbx_ready(psp); + + return psp_v13_0_wait_for_bootloader(psp); + } + + return 0; +} + static int psp_v13_0_bootloader_load_component(struct psp_context *psp, struct psp_bin_desc *bin_desc, enum psp_bootloader_cmd bl_cmd) @@ -224,6 +267,12 @@ static int psp_v13_0_bootloader_load_ras_drv(struct psp_context *psp) return psp_v13_0_bootloader_load_component(psp, &psp->ras_drv, PSP_BL__LOAD_RASDRV); } +static inline void psp_v13_0_init_sos_version(struct psp_context *psp) +{ + struct amdgpu_device *adev = psp->adev; + + psp->sos.fw_version = RREG32_SOC15(MP0, 0, regMP0_SMN_C2PMSG_58); +} static int psp_v13_0_bootloader_load_sos(struct psp_context *psp) { @@ -234,8 +283,10 @@ static int psp_v13_0_bootloader_load_sos(struct psp_context *psp) /* Check sOS sign of life register to confirm sys driver and sOS * are already been loaded. */ - if (psp_v13_0_is_sos_alive(psp)) + if (psp_v13_0_is_sos_alive(psp)) { + psp_v13_0_init_sos_version(psp); return 0; + } ret = psp_v13_0_wait_for_bootloader(psp); if (ret) @@ -259,6 +310,9 @@ static int psp_v13_0_bootloader_load_sos(struct psp_context *psp) RREG32_SOC15(MP0, 0, regMP0_SMN_C2PMSG_81), 0, true); + if (!ret) + psp_v13_0_init_sos_version(psp); + return ret; } @@ -688,8 +742,107 @@ static int psp_v13_0_vbflash_status(struct psp_context *psp) return RREG32_SOC15(MP0, 0, regMP0_SMN_C2PMSG_115); } +static int psp_v13_0_fatal_error_recovery_quirk(struct psp_context *psp) +{ + struct amdgpu_device *adev = psp->adev; + + if (amdgpu_ip_version(adev, MP0_HWIP, 0) == IP_VERSION(13, 0, 10)) { + uint32_t reg_data; + /* MP1 fatal error: trigger PSP dram read to unhalt PSP + * during MP1 triggered sync flood. + */ + reg_data = RREG32_SOC15(MP0, 0, regMP0_SMN_C2PMSG_67); + WREG32_SOC15(MP0, 0, regMP0_SMN_C2PMSG_67, reg_data + 0x10); + + /* delay 1000ms for the mode1 reset for fatal error + * to be recovered back. + */ + msleep(1000); + } + + return 0; +} + + +static void psp_v13_0_boot_error_reporting(struct amdgpu_device *adev, + uint32_t inst, + uint32_t boot_error) +{ + uint32_t socket_id; + uint32_t aid_id; + uint32_t hbm_id; + uint32_t reg_data; + + socket_id = REG_GET_FIELD(boot_error, MP0_SMN_C2PMSG_126, SOCKET_ID); + aid_id = REG_GET_FIELD(boot_error, MP0_SMN_C2PMSG_126, AID_ID); + hbm_id = REG_GET_FIELD(boot_error, MP0_SMN_C2PMSG_126, HBM_ID); + + reg_data = RREG32_SOC15(MP0, inst, regMP0_SMN_C2PMSG_109); + dev_info(adev->dev, "socket: %d, aid: %d, firmware boot failed, fw status is 0x%x\n", + socket_id, aid_id, reg_data); + + if (REG_GET_FIELD(boot_error, MP0_SMN_C2PMSG_126, GPU_ERR_MEM_TRAINING)) + dev_info(adev->dev, "socket: %d, aid: %d, hbm: %d, memory training failed\n", + socket_id, aid_id, hbm_id); + + if (REG_GET_FIELD(boot_error, MP0_SMN_C2PMSG_126, GPU_ERR_FW_LOAD)) + dev_info(adev->dev, "socket: %d, aid: %d, firmware load failed at boot time\n", + socket_id, aid_id); + + if (REG_GET_FIELD(boot_error, MP0_SMN_C2PMSG_126, GPU_ERR_WAFL_LINK_TRAINING)) + dev_info(adev->dev, "socket: %d, aid: %d, wafl link training failed\n", + socket_id, aid_id); + + if (REG_GET_FIELD(boot_error, MP0_SMN_C2PMSG_126, GPU_ERR_XGMI_LINK_TRAINING)) + dev_info(adev->dev, "socket: %d, aid: %d, xgmi link training failed\n", + socket_id, aid_id); + + if (REG_GET_FIELD(boot_error, MP0_SMN_C2PMSG_126, GPU_ERR_USR_CP_LINK_TRAINING)) + dev_info(adev->dev, "socket: %d, aid: %d, usr cp link training failed\n", + socket_id, aid_id); + + if (REG_GET_FIELD(boot_error, MP0_SMN_C2PMSG_126, GPU_ERR_USR_DP_LINK_TRAINING)) + dev_info(adev->dev, "socket: %d, aid: %d, usr dp link training failed\n", + socket_id, aid_id); + + if (REG_GET_FIELD(boot_error, MP0_SMN_C2PMSG_126, GPU_ERR_HBM_MEM_TEST)) + dev_info(adev->dev, "socket: %d, aid: %d, hbm: %d, hbm memory test failed\n", + socket_id, aid_id, hbm_id); + + if (REG_GET_FIELD(boot_error, MP0_SMN_C2PMSG_126, GPU_ERR_HBM_BIST_TEST)) + dev_info(adev->dev, "socket: %d, aid: %d, hbm: %d, hbm bist test failed\n", + socket_id, aid_id, hbm_id); +} + +static int psp_v13_0_query_boot_status(struct psp_context *psp) +{ + struct amdgpu_device *adev = psp->adev; + int inst_mask = adev->aid_mask; + uint32_t reg_data; + uint32_t i; + int ret = 0; + + if (amdgpu_ip_version(adev, MP0_HWIP, 0) != IP_VERSION(13, 0, 6)) + return 0; + + if (RREG32_SOC15(MP0, 0, regMP0_SMN_C2PMSG_59) < 0x00a10109) + return 0; + + for_each_inst(i, inst_mask) { + reg_data = RREG32_SOC15(MP0, i, regMP0_SMN_C2PMSG_126); + if (!REG_GET_FIELD(reg_data, MP0_SMN_C2PMSG_126, BOOT_STATUS)) { + psp_v13_0_boot_error_reporting(adev, i, reg_data); + ret = -EINVAL; + break; + } + } + + return ret; +} + static const struct psp_funcs psp_v13_0_funcs = { .init_microcode = psp_v13_0_init_microcode, + .wait_for_bootloader = psp_v13_0_wait_for_bootloader_steady_state, .bootloader_load_kdb = psp_v13_0_bootloader_load_kdb, .bootloader_load_spl = psp_v13_0_bootloader_load_spl, .bootloader_load_sysdrv = psp_v13_0_bootloader_load_sysdrv, @@ -707,7 +860,9 @@ static const struct psp_funcs psp_v13_0_funcs = { .load_usbc_pd_fw = psp_v13_0_load_usbc_pd_fw, .read_usbc_pd_fw = psp_v13_0_read_usbc_pd_fw, .update_spirom = psp_v13_0_update_spirom, - .vbflash_stat = psp_v13_0_vbflash_status + .vbflash_stat = psp_v13_0_vbflash_status, + .fatal_error_recovery_quirk = psp_v13_0_fatal_error_recovery_quirk, + .query_boot_status = psp_v13_0_query_boot_status, }; void psp_v13_0_set_psp_funcs(struct psp_context *psp) diff --git a/drivers/gpu/drm/amd/amdgpu/psp_v13_0_4.c b/drivers/gpu/drm/amd/amdgpu/psp_v13_0_4.c index d5ba58eba3e2..eaa5512a21da 100644 --- a/drivers/gpu/drm/amd/amdgpu/psp_v13_0_4.c +++ b/drivers/gpu/drm/amd/amdgpu/psp_v13_0_4.c @@ -40,7 +40,7 @@ static int psp_v13_0_4_init_microcode(struct psp_context *psp) amdgpu_ucode_ip_version_decode(adev, MP0_HWIP, ucode_prefix, sizeof(ucode_prefix)); - switch (adev->ip_versions[MP0_HWIP][0]) { + switch (amdgpu_ip_version(adev, MP0_HWIP, 0)) { case IP_VERSION(13, 0, 4): err = psp_init_toc_microcode(psp, ucode_prefix); if (err) diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v2_4.c b/drivers/gpu/drm/amd/amdgpu/sdma_v2_4.c index 51afc92994a8..8d5d86675a7f 100644 --- a/drivers/gpu/drm/amd/amdgpu/sdma_v2_4.c +++ b/drivers/gpu/drm/amd/amdgpu/sdma_v2_4.c @@ -339,8 +339,6 @@ static void sdma_v2_4_gfx_stop(struct amdgpu_device *adev) u32 rb_cntl, ib_cntl; int i; - amdgpu_sdma_unset_buffer_funcs_helper(adev); - for (i = 0; i < adev->sdma.num_instances; i++) { rb_cntl = RREG32(mmSDMA0_GFX_RB_CNTL + sdma_offsets[i]); rb_cntl = REG_SET_FIELD(rb_cntl, SDMA0_GFX_RB_CNTL, RB_ENABLE, 0); @@ -474,9 +472,6 @@ static int sdma_v2_4_gfx_resume(struct amdgpu_device *adev) r = amdgpu_ring_test_helper(ring); if (r) return r; - - if (adev->mman.buffer_funcs_ring == ring) - amdgpu_ttm_set_buffer_funcs_status(adev, true); } return 0; @@ -816,9 +811,14 @@ static void sdma_v2_4_ring_emit_wreg(struct amdgpu_ring *ring, static int sdma_v2_4_early_init(void *handle) { struct amdgpu_device *adev = (struct amdgpu_device *)handle; + int r; adev->sdma.num_instances = SDMA_MAX_INSTANCE; + r = sdma_v2_4_init_microcode(adev); + if (r) + return r; + sdma_v2_4_set_ring_funcs(adev); sdma_v2_4_set_buffer_funcs(adev); sdma_v2_4_set_vm_pte_funcs(adev); @@ -851,12 +851,6 @@ static int sdma_v2_4_sw_init(void *handle) if (r) return r; - r = sdma_v2_4_init_microcode(adev); - if (r) { - DRM_ERROR("Failed to load sdma firmware!\n"); - return r; - } - for (i = 0; i < adev->sdma.num_instances; i++) { ring = &adev->sdma.instance[i].ring; ring->ring_obj = NULL; diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v3_0.c b/drivers/gpu/drm/amd/amdgpu/sdma_v3_0.c index 344202870aeb..2ad615be4bb3 100644 --- a/drivers/gpu/drm/amd/amdgpu/sdma_v3_0.c +++ b/drivers/gpu/drm/amd/amdgpu/sdma_v3_0.c @@ -513,8 +513,6 @@ static void sdma_v3_0_gfx_stop(struct amdgpu_device *adev) u32 rb_cntl, ib_cntl; int i; - amdgpu_sdma_unset_buffer_funcs_helper(adev); - for (i = 0; i < adev->sdma.num_instances; i++) { rb_cntl = RREG32(mmSDMA0_GFX_RB_CNTL + sdma_offsets[i]); rb_cntl = REG_SET_FIELD(rb_cntl, SDMA0_GFX_RB_CNTL, RB_ENABLE, 0); @@ -746,9 +744,6 @@ static int sdma_v3_0_gfx_resume(struct amdgpu_device *adev) r = amdgpu_ring_test_helper(ring); if (r) return r; - - if (adev->mman.buffer_funcs_ring == ring) - amdgpu_ttm_set_buffer_funcs_status(adev, true); } return 0; @@ -1087,6 +1082,7 @@ static void sdma_v3_0_ring_emit_wreg(struct amdgpu_ring *ring, static int sdma_v3_0_early_init(void *handle) { struct amdgpu_device *adev = (struct amdgpu_device *)handle; + int r; switch (adev->asic_type) { case CHIP_STONEY: @@ -1097,6 +1093,10 @@ static int sdma_v3_0_early_init(void *handle) break; } + r = sdma_v3_0_init_microcode(adev); + if (r) + return r; + sdma_v3_0_set_ring_funcs(adev); sdma_v3_0_set_buffer_funcs(adev); sdma_v3_0_set_vm_pte_funcs(adev); @@ -1129,12 +1129,6 @@ static int sdma_v3_0_sw_init(void *handle) if (r) return r; - r = sdma_v3_0_init_microcode(adev); - if (r) { - DRM_ERROR("Failed to load sdma firmware!\n"); - return r; - } - for (i = 0; i < adev->sdma.num_instances; i++) { ring = &adev->sdma.instance[i].ring; ring->ring_obj = NULL; diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c b/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c index cd37f45e01a1..3d68dd5523c6 100644 --- a/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c +++ b/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c @@ -469,7 +469,7 @@ static int sdma_v4_0_irq_id_to_seq(unsigned client_id) static void sdma_v4_0_init_golden_registers(struct amdgpu_device *adev) { - switch (adev->ip_versions[SDMA0_HWIP][0]) { + switch (amdgpu_ip_version(adev, SDMA0_HWIP, 0)) { case IP_VERSION(4, 0, 0): soc15_program_register_sequence(adev, golden_settings_sdma_4, @@ -539,7 +539,7 @@ static void sdma_v4_0_setup_ulv(struct amdgpu_device *adev) * The only chips with SDMAv4 and ULV are VG10 and VG20. * Server SKUs take a different hysteresis setting from other SKUs. */ - switch (adev->ip_versions[SDMA0_HWIP][0]) { + switch (amdgpu_ip_version(adev, SDMA0_HWIP, 0)) { case IP_VERSION(4, 0, 0): if (adev->pdev->device == 0x6860) break; @@ -578,8 +578,10 @@ static int sdma_v4_0_init_microcode(struct amdgpu_device *adev) int ret, i; for (i = 0; i < adev->sdma.num_instances; i++) { - if (adev->ip_versions[SDMA0_HWIP][0] == IP_VERSION(4, 2, 2) || - adev->ip_versions[SDMA0_HWIP][0] == IP_VERSION(4, 4, 0)) { + if (amdgpu_ip_version(adev, SDMA0_HWIP, 0) == + IP_VERSION(4, 2, 2) || + amdgpu_ip_version(adev, SDMA0_HWIP, 0) == + IP_VERSION(4, 4, 0)) { /* Acturus & Aldebaran will leverage the same FW memory for every SDMA instance */ ret = amdgpu_sdma_init_microcode(adev, 0, true); @@ -875,8 +877,6 @@ static void sdma_v4_0_gfx_enable(struct amdgpu_device *adev, bool enable) u32 rb_cntl, ib_cntl; int i; - amdgpu_sdma_unset_buffer_funcs_helper(adev); - for (i = 0; i < adev->sdma.num_instances; i++) { rb_cntl = RREG32_SDMA(i, mmSDMA0_GFX_RB_CNTL); rb_cntl = REG_SET_FIELD(rb_cntl, SDMA0_GFX_RB_CNTL, RB_ENABLE, enable ? 1 : 0); @@ -911,8 +911,6 @@ static void sdma_v4_0_page_stop(struct amdgpu_device *adev) u32 rb_cntl, ib_cntl; int i; - amdgpu_sdma_unset_buffer_funcs_helper(adev); - for (i = 0; i < adev->sdma.num_instances; i++) { rb_cntl = RREG32_SDMA(i, mmSDMA0_PAGE_RB_CNTL); rb_cntl = REG_SET_FIELD(rb_cntl, SDMA0_PAGE_RB_CNTL, @@ -978,7 +976,8 @@ static void sdma_v4_0_ctx_switch_enable(struct amdgpu_device *adev, bool enable) * Arcturus for the moment and firmware version 14 * and above. */ - if (adev->ip_versions[SDMA0_HWIP][0] == IP_VERSION(4, 2, 2) && + if (amdgpu_ip_version(adev, SDMA0_HWIP, 0) == + IP_VERSION(4, 2, 2) && adev->sdma.instance[i].fw_version >= 14) WREG32_SDMA(i, mmSDMA0_PUB_DUMMY_REG2, enable); /* Extend page fault timeout to avoid interrupt storm */ @@ -1255,7 +1254,7 @@ static void sdma_v4_0_init_pg(struct amdgpu_device *adev) if (!(adev->pg_flags & AMD_PG_SUPPORT_SDMA)) return; - switch (adev->ip_versions[SDMA0_HWIP][0]) { + switch (amdgpu_ip_version(adev, SDMA0_HWIP, 0)) { case IP_VERSION(4, 1, 0): case IP_VERSION(4, 1, 1): case IP_VERSION(4, 1, 2): @@ -1399,13 +1398,7 @@ static int sdma_v4_0_start(struct amdgpu_device *adev) r = amdgpu_ring_test_helper(page); if (r) return r; - - if (adev->mman.buffer_funcs_ring == page) - amdgpu_ttm_set_buffer_funcs_status(adev, true); } - - if (adev->mman.buffer_funcs_ring == ring) - amdgpu_ttm_set_buffer_funcs_status(adev, true); } return r; @@ -1698,7 +1691,7 @@ static bool sdma_v4_0_fw_support_paging_queue(struct amdgpu_device *adev) { uint fw_version = adev->sdma.instance[0].fw_version; - switch (adev->ip_versions[SDMA0_HWIP][0]) { + switch (amdgpu_ip_version(adev, SDMA0_HWIP, 0)) { case IP_VERSION(4, 0, 0): return fw_version >= 430; case IP_VERSION(4, 0, 1): @@ -1717,13 +1710,11 @@ static int sdma_v4_0_early_init(void *handle) int r; r = sdma_v4_0_init_microcode(adev); - if (r) { - DRM_ERROR("Failed to load sdma firmware!\n"); + if (r) return r; - } /* TODO: Page queue breaks driver reload under SRIOV */ - if ((adev->ip_versions[SDMA0_HWIP][0] == IP_VERSION(4, 0, 0)) && + if ((amdgpu_ip_version(adev, SDMA0_HWIP, 0) == IP_VERSION(4, 0, 0)) && amdgpu_sriov_vf((adev))) adev->sdma.has_page_queue = false; else if (sdma_v4_0_fw_support_paging_queue(adev)) @@ -1748,11 +1739,8 @@ static int sdma_v4_0_late_init(void *handle) sdma_v4_0_setup_ulv(adev); - if (!amdgpu_persistent_edc_harvesting_supported(adev)) { - if (adev->sdma.ras && adev->sdma.ras->ras_block.hw_ops && - adev->sdma.ras->ras_block.hw_ops->reset_ras_error_count) - adev->sdma.ras->ras_block.hw_ops->reset_ras_error_count(adev); - } + if (!amdgpu_persistent_edc_harvesting_supported(adev)) + amdgpu_ras_reset_error_count(adev, AMDGPU_RAS_BLOCK__SDMA); return 0; } @@ -1823,7 +1811,9 @@ static int sdma_v4_0_sw_init(void *handle) * On Arcturus, SDMA instance 5~7 has a different vmhub * type(AMDGPU_MMHUB1). */ - if (adev->ip_versions[SDMA0_HWIP][0] == IP_VERSION(4, 2, 2) && i >= 5) + if (amdgpu_ip_version(adev, SDMA0_HWIP, 0) == + IP_VERSION(4, 2, 2) && + i >= 5) ring->vm_hub = AMDGPU_MMHUB1(0); else ring->vm_hub = AMDGPU_MMHUB0(0); @@ -1843,8 +1833,10 @@ static int sdma_v4_0_sw_init(void *handle) /* paging queue use same doorbell index/routing as gfx queue * with 0x400 (4096 dwords) offset on second doorbell page */ - if (adev->ip_versions[SDMA0_HWIP][0] >= IP_VERSION(4, 0, 0) && - adev->ip_versions[SDMA0_HWIP][0] < IP_VERSION(4, 2, 0)) { + if (amdgpu_ip_version(adev, SDMA0_HWIP, 0) >= + IP_VERSION(4, 0, 0) && + amdgpu_ip_version(adev, SDMA0_HWIP, 0) < + IP_VERSION(4, 2, 0)) { ring->doorbell_index = adev->doorbell_index.sdma_engine[i] << 1; ring->doorbell_index += 0x400; @@ -1856,7 +1848,9 @@ static int sdma_v4_0_sw_init(void *handle) (adev->doorbell_index.sdma_engine[i] + 1) << 1; } - if (adev->ip_versions[SDMA0_HWIP][0] == IP_VERSION(4, 2, 2) && i >= 5) + if (amdgpu_ip_version(adev, SDMA0_HWIP, 0) == + IP_VERSION(4, 2, 2) && + i >= 5) ring->vm_hub = AMDGPU_MMHUB1(0); else ring->vm_hub = AMDGPU_MMHUB0(0); @@ -1890,8 +1884,8 @@ static int sdma_v4_0_sw_fini(void *handle) amdgpu_ring_fini(&adev->sdma.instance[i].page); } - if (adev->ip_versions[SDMA0_HWIP][0] == IP_VERSION(4, 2, 2) || - adev->ip_versions[SDMA0_HWIP][0] == IP_VERSION(4, 4, 0)) + if (amdgpu_ip_version(adev, SDMA0_HWIP, 0) == IP_VERSION(4, 2, 2) || + amdgpu_ip_version(adev, SDMA0_HWIP, 0) == IP_VERSION(4, 4, 0)) amdgpu_sdma_destroy_inst_ctx(adev, true); else amdgpu_sdma_destroy_inst_ctx(adev, false); @@ -1917,11 +1911,8 @@ static int sdma_v4_0_hw_fini(void *handle) struct amdgpu_device *adev = (struct amdgpu_device *)handle; int i; - if (amdgpu_sriov_vf(adev)) { - /* disable the scheduler for SDMA */ - amdgpu_sdma_unset_buffer_funcs_helper(adev); + if (amdgpu_sriov_vf(adev)) return 0; - } if (amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__SDMA)) { for (i = 0; i < adev->sdma.num_instances; i++) { @@ -1960,7 +1951,6 @@ static int sdma_v4_0_resume(void *handle) if (adev->in_s0ix) { sdma_v4_0_enable(adev, true); sdma_v4_0_gfx_enable(adev, true); - amdgpu_ttm_set_buffer_funcs_status(adev, true); return 0; } @@ -2036,14 +2026,16 @@ static int sdma_v4_0_process_trap_irq(struct amdgpu_device *adev, amdgpu_fence_process(&adev->sdma.instance[instance].ring); break; case 1: - if (adev->ip_versions[SDMA0_HWIP][0] == IP_VERSION(4, 2, 0)) + if (amdgpu_ip_version(adev, SDMA0_HWIP, 0) == + IP_VERSION(4, 2, 0)) amdgpu_fence_process(&adev->sdma.instance[instance].page); break; case 2: /* XXX compute */ break; case 3: - if (adev->ip_versions[SDMA0_HWIP][0] != IP_VERSION(4, 2, 0)) + if (amdgpu_ip_version(adev, SDMA0_HWIP, 0) != + IP_VERSION(4, 2, 0)) amdgpu_fence_process(&adev->sdma.instance[instance].page); break; } @@ -2259,7 +2251,7 @@ static int sdma_v4_0_set_powergating_state(void *handle, { struct amdgpu_device *adev = (struct amdgpu_device *)handle; - switch (adev->ip_versions[SDMA0_HWIP][0]) { + switch (amdgpu_ip_version(adev, SDMA0_HWIP, 0)) { case IP_VERSION(4, 1, 0): case IP_VERSION(4, 1, 1): case IP_VERSION(4, 1, 2): @@ -2622,7 +2614,7 @@ static struct amdgpu_sdma_ras sdma_v4_0_ras = { static void sdma_v4_0_set_ras_funcs(struct amdgpu_device *adev) { - switch (adev->ip_versions[SDMA0_HWIP][0]) { + switch (amdgpu_ip_version(adev, SDMA0_HWIP, 0)) { case IP_VERSION(4, 2, 0): case IP_VERSION(4, 2, 2): adev->sdma.ras = &sdma_v4_0_ras; @@ -2633,7 +2625,6 @@ static void sdma_v4_0_set_ras_funcs(struct amdgpu_device *adev) default: break; } - } const struct amdgpu_ip_block_version sdma_v4_0_ip_block = { diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v4_4_2.c b/drivers/gpu/drm/amd/amdgpu/sdma_v4_4_2.c index f413898dda37..0f24af6f2810 100644 --- a/drivers/gpu/drm/amd/amdgpu/sdma_v4_4_2.c +++ b/drivers/gpu/drm/amd/amdgpu/sdma_v4_4_2.c @@ -132,7 +132,8 @@ static int sdma_v4_4_2_init_microcode(struct amdgpu_device *adev) int ret, i; for (i = 0; i < adev->sdma.num_instances; i++) { - if (adev->ip_versions[SDMA0_HWIP][0] == IP_VERSION(4, 4, 2)) { + if (amdgpu_ip_version(adev, SDMA0_HWIP, 0) == + IP_VERSION(4, 4, 2)) { ret = amdgpu_sdma_init_microcode(adev, 0, true); break; } else { @@ -154,13 +155,13 @@ static int sdma_v4_4_2_init_microcode(struct amdgpu_device *adev) */ static uint64_t sdma_v4_4_2_ring_get_rptr(struct amdgpu_ring *ring) { - u64 *rptr; + u64 rptr; /* XXX check if swapping is necessary on BE */ - rptr = ((u64 *)&ring->adev->wb.wb[ring->rptr_offs]); + rptr = READ_ONCE(*((u64 *)&ring->adev->wb.wb[ring->rptr_offs])); - DRM_DEBUG("rptr before shift == 0x%016llx\n", *rptr); - return ((*rptr) >> 2); + DRM_DEBUG("rptr before shift == 0x%016llx\n", rptr); + return rptr >> 2; } /** @@ -426,6 +427,7 @@ static void sdma_v4_4_2_inst_gfx_stop(struct amdgpu_device *adev, uint32_t inst_mask) { struct amdgpu_ring *sdma[AMDGPU_MAX_SDMA_INSTANCES]; + u32 doorbell_offset, doorbell; u32 rb_cntl, ib_cntl; int i, unset = 0; @@ -443,6 +445,18 @@ static void sdma_v4_4_2_inst_gfx_stop(struct amdgpu_device *adev, ib_cntl = RREG32_SDMA(i, regSDMA_GFX_IB_CNTL); ib_cntl = REG_SET_FIELD(ib_cntl, SDMA_GFX_IB_CNTL, IB_ENABLE, 0); WREG32_SDMA(i, regSDMA_GFX_IB_CNTL, ib_cntl); + + if (sdma[i]->use_doorbell) { + doorbell = RREG32_SDMA(i, regSDMA_GFX_DOORBELL); + doorbell_offset = RREG32_SDMA(i, regSDMA_GFX_DOORBELL_OFFSET); + + doorbell = REG_SET_FIELD(doorbell, SDMA_GFX_DOORBELL, ENABLE, 0); + doorbell_offset = REG_SET_FIELD(doorbell_offset, + SDMA_GFX_DOORBELL_OFFSET, + OFFSET, 0); + WREG32_SDMA(i, regSDMA_GFX_DOORBELL, doorbell); + WREG32_SDMA(i, regSDMA_GFX_DOORBELL_OFFSET, doorbell_offset); + } } } @@ -630,12 +644,6 @@ static void sdma_v4_4_2_gfx_resume(struct amdgpu_device *adev, unsigned int i) rb_cntl = sdma_v4_4_2_rb_cntl(ring, rb_cntl); WREG32_SDMA(i, regSDMA_GFX_RB_CNTL, rb_cntl); - /* Initialize the ring buffer's read and write pointers */ - WREG32_SDMA(i, regSDMA_GFX_RB_RPTR, 0); - WREG32_SDMA(i, regSDMA_GFX_RB_RPTR_HI, 0); - WREG32_SDMA(i, regSDMA_GFX_RB_WPTR, 0); - WREG32_SDMA(i, regSDMA_GFX_RB_WPTR_HI, 0); - /* set the wb address whether it's enabled or not */ WREG32_SDMA(i, regSDMA_GFX_RB_RPTR_ADDR_HI, upper_32_bits(adev->wb.gpu_addr + wb_offset) & 0xFFFFFFFF); @@ -653,6 +661,12 @@ static void sdma_v4_4_2_gfx_resume(struct amdgpu_device *adev, unsigned int i) /* before programing wptr to a less value, need set minor_ptr_update first */ WREG32_SDMA(i, regSDMA_GFX_MINOR_PTR_UPDATE, 1); + /* Initialize the ring buffer's read and write pointers */ + WREG32_SDMA(i, regSDMA_GFX_RB_RPTR, 0); + WREG32_SDMA(i, regSDMA_GFX_RB_RPTR_HI, 0); + WREG32_SDMA(i, regSDMA_GFX_RB_WPTR, 0); + WREG32_SDMA(i, regSDMA_GFX_RB_WPTR_HI, 0); + doorbell = RREG32_SDMA(i, regSDMA_GFX_DOORBELL); doorbell_offset = RREG32_SDMA(i, regSDMA_GFX_DOORBELL_OFFSET); @@ -1231,7 +1245,7 @@ static void sdma_v4_4_2_ring_emit_reg_wait(struct amdgpu_ring *ring, uint32_t re static bool sdma_v4_4_2_fw_support_paging_queue(struct amdgpu_device *adev) { - switch (adev->ip_versions[SDMA0_HWIP][0]) { + switch (amdgpu_ip_version(adev, SDMA0_HWIP, 0)) { case IP_VERSION(4, 4, 2): return false; default: @@ -1245,10 +1259,8 @@ static int sdma_v4_4_2_early_init(void *handle) int r; r = sdma_v4_4_2_init_microcode(adev); - if (r) { - DRM_ERROR("Failed to load sdma firmware!\n"); + if (r) return r; - } /* TODO: Page queue breaks driver reload under SRIOV */ if (sdma_v4_4_2_fw_support_paging_queue(adev)) @@ -1277,11 +1289,8 @@ static int sdma_v4_4_2_late_init(void *handle) .cb = sdma_v4_4_2_process_ras_data_cb, }; #endif - if (!amdgpu_persistent_edc_harvesting_supported(adev)) { - if (adev->sdma.ras && adev->sdma.ras->ras_block.hw_ops && - adev->sdma.ras->ras_block.hw_ops->reset_ras_error_count) - adev->sdma.ras->ras_block.hw_ops->reset_ras_error_count(adev); - } + if (!amdgpu_persistent_edc_harvesting_supported(adev)) + amdgpu_ras_reset_error_count(adev, AMDGPU_RAS_BLOCK__SDMA); return 0; } @@ -1401,7 +1410,7 @@ static int sdma_v4_4_2_sw_fini(void *handle) amdgpu_ring_fini(&adev->sdma.instance[i].page); } - if (adev->ip_versions[SDMA0_HWIP][0] == IP_VERSION(4, 4, 2)) + if (amdgpu_ip_version(adev, SDMA0_HWIP, 0) == IP_VERSION(4, 4, 2)) amdgpu_sdma_destroy_inst_ctx(adev, true); else amdgpu_sdma_destroy_inst_ctx(adev, false); @@ -2052,7 +2061,7 @@ const struct amdgpu_ip_block_version sdma_v4_4_2_ip_block = { .type = AMD_IP_BLOCK_TYPE_SDMA, .major = 4, .minor = 4, - .rev = 0, + .rev = 2, .funcs = &sdma_v4_4_2_ip_funcs, }; @@ -2131,6 +2140,11 @@ static void sdma_v4_4_2_inst_query_ras_error_count(struct amdgpu_device *adev, { struct ras_err_data *err_data = (struct ras_err_data *)ras_err_status; uint32_t sdma_dev_inst = GET_INST(SDMA0, sdma_inst); + unsigned long ue_count = 0; + struct amdgpu_smuio_mcm_config_info mcm_info = { + .socket_id = adev->smuio.funcs->get_socket_id(adev), + .die_id = adev->sdma.instance[sdma_inst].aid_id, + }; /* sdma v4_4_2 doesn't support query ce counts */ amdgpu_ras_inst_query_ras_error_count(adev, @@ -2140,7 +2154,9 @@ static void sdma_v4_4_2_inst_query_ras_error_count(struct amdgpu_device *adev, ARRAY_SIZE(sdma_v4_4_2_ras_memory_list), sdma_dev_inst, AMDGPU_RAS_ERROR__MULTI_UNCORRECTABLE, - &err_data->ue_count); + &ue_count); + + amdgpu_ras_error_statistic_ue_count(err_data, &mcm_info, ue_count); } static void sdma_v4_4_2_query_ras_error_count(struct amdgpu_device *adev, diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v5_0.c b/drivers/gpu/drm/amd/amdgpu/sdma_v5_0.c index 5c4d4df9cf94..3c485e5a531a 100644 --- a/drivers/gpu/drm/amd/amdgpu/sdma_v5_0.c +++ b/drivers/gpu/drm/amd/amdgpu/sdma_v5_0.c @@ -184,7 +184,7 @@ static u32 sdma_v5_0_get_reg_offset(struct amdgpu_device *adev, u32 instance, u3 static void sdma_v5_0_init_golden_registers(struct amdgpu_device *adev) { - switch (adev->ip_versions[SDMA0_HWIP][0]) { + switch (amdgpu_ip_version(adev, SDMA0_HWIP, 0)) { case IP_VERSION(5, 0, 0): soc15_program_register_sequence(adev, golden_settings_sdma_5, @@ -237,17 +237,15 @@ static void sdma_v5_0_init_golden_registers(struct amdgpu_device *adev) // emulation only, won't work on real chip // navi10 real chip need to use PSP to load firmware static int sdma_v5_0_init_microcode(struct amdgpu_device *adev) -{ int ret, i; - - if (amdgpu_sriov_vf(adev) && (adev->ip_versions[SDMA0_HWIP][0] == IP_VERSION(5, 0, 5))) - return 0; +{ + int ret, i; for (i = 0; i < adev->sdma.num_instances; i++) { ret = amdgpu_sdma_init_microcode(adev, i, false); if (ret) return ret; } - + return ret; } @@ -561,8 +559,6 @@ static void sdma_v5_0_gfx_stop(struct amdgpu_device *adev) u32 rb_cntl, ib_cntl; int i; - amdgpu_sdma_unset_buffer_funcs_helper(adev); - for (i = 0; i < adev->sdma.num_instances; i++) { rb_cntl = RREG32_SOC15_IP(GC, sdma_v5_0_get_reg_offset(adev, i, mmSDMA0_GFX_RB_CNTL)); rb_cntl = REG_SET_FIELD(rb_cntl, SDMA0_GFX_RB_CNTL, RB_ENABLE, 0); @@ -827,9 +823,6 @@ static int sdma_v5_0_gfx_resume(struct amdgpu_device *adev) r = amdgpu_ring_test_helper(ring); if (r) return r; - - if (adev->mman.buffer_funcs_ring == ring) - amdgpu_ttm_set_buffer_funcs_status(adev, true); } return 0; @@ -1338,6 +1331,11 @@ static void sdma_v5_0_ring_emit_reg_write_reg_wait(struct amdgpu_ring *ring, static int sdma_v5_0_early_init(void *handle) { struct amdgpu_device *adev = (struct amdgpu_device *)handle; + int r; + + r = sdma_v5_0_init_microcode(adev); + if (r) + return r; sdma_v5_0_set_ring_funcs(adev); sdma_v5_0_set_buffer_funcs(adev); @@ -1369,12 +1367,6 @@ static int sdma_v5_0_sw_init(void *handle) if (r) return r; - r = sdma_v5_0_init_microcode(adev); - if (r) { - DRM_ERROR("Failed to load sdma firmware!\n"); - return r; - } - for (i = 0; i < adev->sdma.num_instances; i++) { ring = &adev->sdma.instance[i].ring; ring->ring_obj = NULL; @@ -1429,11 +1421,8 @@ static int sdma_v5_0_hw_fini(void *handle) { struct amdgpu_device *adev = (struct amdgpu_device *)handle; - if (amdgpu_sriov_vf(adev)) { - /* disable the scheduler for SDMA */ - amdgpu_sdma_unset_buffer_funcs_helper(adev); + if (amdgpu_sriov_vf(adev)) return 0; - } sdma_v5_0_ctx_switch_enable(adev, false); sdma_v5_0_enable(adev, false); @@ -1699,7 +1688,7 @@ static int sdma_v5_0_set_clockgating_state(void *handle, if (amdgpu_sriov_vf(adev)) return 0; - switch (adev->ip_versions[SDMA0_HWIP][0]) { + switch (amdgpu_ip_version(adev, SDMA0_HWIP, 0)) { case IP_VERSION(5, 0, 0): case IP_VERSION(5, 0, 2): case IP_VERSION(5, 0, 5): diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v5_2.c b/drivers/gpu/drm/amd/amdgpu/sdma_v5_2.c index a7b230e5a26d..0058f3f7cf6e 100644 --- a/drivers/gpu/drm/amd/amdgpu/sdma_v5_2.c +++ b/drivers/gpu/drm/amd/amdgpu/sdma_v5_2.c @@ -364,8 +364,6 @@ static void sdma_v5_2_gfx_stop(struct amdgpu_device *adev) u32 rb_cntl, ib_cntl; int i; - amdgpu_sdma_unset_buffer_funcs_helper(adev); - for (i = 0; i < adev->sdma.num_instances; i++) { rb_cntl = RREG32_SOC15_IP(GC, sdma_v5_2_get_reg_offset(adev, i, mmSDMA0_GFX_RB_CNTL)); rb_cntl = REG_SET_FIELD(rb_cntl, SDMA0_GFX_RB_CNTL, RB_ENABLE, 0); @@ -625,9 +623,6 @@ static int sdma_v5_2_gfx_resume(struct amdgpu_device *adev) r = amdgpu_ring_test_helper(ring); if (r) return r; - - if (adev->mman.buffer_funcs_ring == ring) - amdgpu_ttm_set_buffer_funcs_status(adev, true); } return 0; @@ -1172,6 +1167,11 @@ static void sdma_v5_2_ring_emit_reg_write_reg_wait(struct amdgpu_ring *ring, static int sdma_v5_2_early_init(void *handle) { struct amdgpu_device *adev = (struct amdgpu_device *)handle; + int r; + + r = amdgpu_sdma_init_microcode(adev, 0, true); + if (r) + return r; sdma_v5_2_set_ring_funcs(adev); sdma_v5_2_set_buffer_funcs(adev); @@ -1231,12 +1231,6 @@ static int sdma_v5_2_sw_init(void *handle) return r; } - r = amdgpu_sdma_init_microcode(adev, 0, true); - if (r) { - DRM_ERROR("Failed to load sdma firmware!\n"); - return r; - } - for (i = 0; i < adev->sdma.num_instances; i++) { ring = &adev->sdma.instance[i].ring; ring->ring_obj = NULL; @@ -1285,11 +1279,8 @@ static int sdma_v5_2_hw_fini(void *handle) { struct amdgpu_device *adev = (struct amdgpu_device *)handle; - if (amdgpu_sriov_vf(adev)) { - /* disable the scheduler for SDMA */ - amdgpu_sdma_unset_buffer_funcs_helper(adev); + if (amdgpu_sriov_vf(adev)) return 0; - } sdma_v5_2_ctx_switch_enable(adev, false); sdma_v5_2_enable(adev, false); @@ -1507,6 +1498,30 @@ static int sdma_v5_2_process_illegal_inst_irq(struct amdgpu_device *adev, return 0; } +static bool sdma_v5_2_firmware_mgcg_support(struct amdgpu_device *adev, + int i) +{ + switch (amdgpu_ip_version(adev, SDMA0_HWIP, 0)) { + case IP_VERSION(5, 2, 1): + if (adev->sdma.instance[i].fw_version < 70) + return false; + break; + case IP_VERSION(5, 2, 3): + if (adev->sdma.instance[i].fw_version < 47) + return false; + break; + case IP_VERSION(5, 2, 7): + if (adev->sdma.instance[i].fw_version < 9) + return false; + break; + default: + return true; + } + + return true; + +} + static void sdma_v5_2_update_medium_grain_clock_gating(struct amdgpu_device *adev, bool enable) { @@ -1515,7 +1530,7 @@ static void sdma_v5_2_update_medium_grain_clock_gating(struct amdgpu_device *ade for (i = 0; i < adev->sdma.num_instances; i++) { - if (adev->sdma.instance[i].fw_version < 70 && adev->ip_versions[SDMA0_HWIP][0] == IP_VERSION(5, 2, 1)) + if (!sdma_v5_2_firmware_mgcg_support(adev, i)) adev->cg_flags &= ~AMD_CG_SUPPORT_SDMA_MGCG; if (enable && (adev->cg_flags & AMD_CG_SUPPORT_SDMA_MGCG)) { @@ -1551,8 +1566,9 @@ static void sdma_v5_2_update_medium_grain_light_sleep(struct amdgpu_device *adev int i; for (i = 0; i < adev->sdma.num_instances; i++) { - - if (adev->sdma.instance[i].fw_version < 70 && adev->ip_versions[SDMA0_HWIP][0] == IP_VERSION(5, 2, 1)) + if (adev->sdma.instance[i].fw_version < 70 && + amdgpu_ip_version(adev, SDMA0_HWIP, 0) == + IP_VERSION(5, 2, 1)) adev->cg_flags &= ~AMD_CG_SUPPORT_SDMA_LS; if (enable && (adev->cg_flags & AMD_CG_SUPPORT_SDMA_LS)) { @@ -1581,7 +1597,7 @@ static int sdma_v5_2_set_clockgating_state(void *handle, if (amdgpu_sriov_vf(adev)) return 0; - switch (adev->ip_versions[SDMA0_HWIP][0]) { + switch (amdgpu_ip_version(adev, SDMA0_HWIP, 0)) { case IP_VERSION(5, 2, 0): case IP_VERSION(5, 2, 2): case IP_VERSION(5, 2, 1): @@ -1589,6 +1605,7 @@ static int sdma_v5_2_set_clockgating_state(void *handle, case IP_VERSION(5, 2, 5): case IP_VERSION(5, 2, 6): case IP_VERSION(5, 2, 3): + case IP_VERSION(5, 2, 7): sdma_v5_2_update_medium_grain_clock_gating(adev, state == AMD_CG_STATE_GATE); sdma_v5_2_update_medium_grain_light_sleep(adev, @@ -1626,6 +1643,32 @@ static void sdma_v5_2_get_clockgating_state(void *handle, u64 *flags) *flags |= AMD_CG_SUPPORT_SDMA_LS; } +static void sdma_v5_2_ring_begin_use(struct amdgpu_ring *ring) +{ + struct amdgpu_device *adev = ring->adev; + + /* SDMA 5.2.3 (RMB) FW doesn't seem to properly + * disallow GFXOFF in some cases leading to + * hangs in SDMA. Disallow GFXOFF while SDMA is active. + * We can probably just limit this to 5.2.3, + * but it shouldn't hurt for other parts since + * this GFXOFF will be disallowed anyway when SDMA is + * active, this just makes it explicit. + */ + amdgpu_gfx_off_ctrl(adev, false); +} + +static void sdma_v5_2_ring_end_use(struct amdgpu_ring *ring) +{ + struct amdgpu_device *adev = ring->adev; + + /* SDMA 5.2.3 (RMB) FW doesn't seem to properly + * disallow GFXOFF in some cases leading to + * hangs in SDMA. Allow GFXOFF when SDMA is complete. + */ + amdgpu_gfx_off_ctrl(adev, true); +} + const struct amd_ip_funcs sdma_v5_2_ip_funcs = { .name = "sdma_v5_2", .early_init = sdma_v5_2_early_init, @@ -1673,6 +1716,8 @@ static const struct amdgpu_ring_funcs sdma_v5_2_ring_funcs = { .test_ib = sdma_v5_2_ring_test_ib, .insert_nop = sdma_v5_2_ring_insert_nop, .pad_ib = sdma_v5_2_ring_pad_ib, + .begin_use = sdma_v5_2_ring_begin_use, + .end_use = sdma_v5_2_ring_end_use, .emit_wreg = sdma_v5_2_ring_emit_wreg, .emit_reg_wait = sdma_v5_2_ring_emit_reg_wait, .emit_reg_write_reg_wait = sdma_v5_2_ring_emit_reg_write_reg_wait, diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v6_0.c b/drivers/gpu/drm/amd/amdgpu/sdma_v6_0.c index 3b03dda854fd..3c7ddd219de8 100644 --- a/drivers/gpu/drm/amd/amdgpu/sdma_v6_0.c +++ b/drivers/gpu/drm/amd/amdgpu/sdma_v6_0.c @@ -48,6 +48,7 @@ MODULE_FIRMWARE("amdgpu/sdma_6_0_0.bin"); MODULE_FIRMWARE("amdgpu/sdma_6_0_1.bin"); MODULE_FIRMWARE("amdgpu/sdma_6_0_2.bin"); MODULE_FIRMWARE("amdgpu/sdma_6_0_3.bin"); +MODULE_FIRMWARE("amdgpu/sdma_6_1_0.bin"); #define SDMA1_REG_OFFSET 0x600 #define SDMA0_HYP_DEC_REG_START 0x5880 @@ -155,68 +156,35 @@ static uint64_t sdma_v6_0_ring_get_wptr(struct amdgpu_ring *ring) static void sdma_v6_0_ring_set_wptr(struct amdgpu_ring *ring) { struct amdgpu_device *adev = ring->adev; - uint32_t *wptr_saved; - uint32_t *is_queue_unmap; - uint64_t aggregated_db_index; - uint32_t mqd_size = adev->mqds[AMDGPU_HW_IP_DMA].mqd_size; - - DRM_DEBUG("Setting write pointer\n"); - - if (ring->is_mes_queue) { - wptr_saved = (uint32_t *)(ring->mqd_ptr + mqd_size); - is_queue_unmap = (uint32_t *)(ring->mqd_ptr + mqd_size + - sizeof(uint32_t)); - aggregated_db_index = - amdgpu_mes_get_aggregated_doorbell_index(adev, - ring->hw_prio); + if (ring->use_doorbell) { + DRM_DEBUG("Using doorbell -- " + "wptr_offs == 0x%08x " + "lower_32_bits(ring->wptr) << 2 == 0x%08x " + "upper_32_bits(ring->wptr) << 2 == 0x%08x\n", + ring->wptr_offs, + lower_32_bits(ring->wptr << 2), + upper_32_bits(ring->wptr << 2)); + /* XXX check if swapping is necessary on BE */ atomic64_set((atomic64_t *)ring->wptr_cpu_addr, ring->wptr << 2); - *wptr_saved = ring->wptr << 2; - if (*is_queue_unmap) { - WDOORBELL64(aggregated_db_index, ring->wptr << 2); - DRM_DEBUG("calling WDOORBELL64(0x%08x, 0x%016llx)\n", - ring->doorbell_index, ring->wptr << 2); - WDOORBELL64(ring->doorbell_index, ring->wptr << 2); - } else { - DRM_DEBUG("calling WDOORBELL64(0x%08x, 0x%016llx)\n", - ring->doorbell_index, ring->wptr << 2); - WDOORBELL64(ring->doorbell_index, ring->wptr << 2); - - if (*is_queue_unmap) - WDOORBELL64(aggregated_db_index, - ring->wptr << 2); - } + DRM_DEBUG("calling WDOORBELL64(0x%08x, 0x%016llx)\n", + ring->doorbell_index, ring->wptr << 2); + WDOORBELL64(ring->doorbell_index, ring->wptr << 2); } else { - if (ring->use_doorbell) { - DRM_DEBUG("Using doorbell -- " - "wptr_offs == 0x%08x " - "lower_32_bits(ring->wptr) << 2 == 0x%08x " - "upper_32_bits(ring->wptr) << 2 == 0x%08x\n", - ring->wptr_offs, - lower_32_bits(ring->wptr << 2), - upper_32_bits(ring->wptr << 2)); - /* XXX check if swapping is necessary on BE */ - atomic64_set((atomic64_t *)ring->wptr_cpu_addr, - ring->wptr << 2); - DRM_DEBUG("calling WDOORBELL64(0x%08x, 0x%016llx)\n", - ring->doorbell_index, ring->wptr << 2); - WDOORBELL64(ring->doorbell_index, ring->wptr << 2); - } else { - DRM_DEBUG("Not using doorbell -- " - "regSDMA%i_GFX_RB_WPTR == 0x%08x " - "regSDMA%i_GFX_RB_WPTR_HI == 0x%08x\n", - ring->me, - lower_32_bits(ring->wptr << 2), - ring->me, - upper_32_bits(ring->wptr << 2)); - WREG32_SOC15_IP(GC, sdma_v6_0_get_reg_offset(adev, - ring->me, regSDMA0_QUEUE0_RB_WPTR), - lower_32_bits(ring->wptr << 2)); - WREG32_SOC15_IP(GC, sdma_v6_0_get_reg_offset(adev, - ring->me, regSDMA0_QUEUE0_RB_WPTR_HI), - upper_32_bits(ring->wptr << 2)); - } + DRM_DEBUG("Not using doorbell -- " + "regSDMA%i_GFX_RB_WPTR == 0x%08x " + "regSDMA%i_GFX_RB_WPTR_HI == 0x%08x\n", + ring->me, + lower_32_bits(ring->wptr << 2), + ring->me, + upper_32_bits(ring->wptr << 2)); + WREG32_SOC15_IP(GC, sdma_v6_0_get_reg_offset(adev, + ring->me, regSDMA0_QUEUE0_RB_WPTR), + lower_32_bits(ring->wptr << 2)); + WREG32_SOC15_IP(GC, sdma_v6_0_get_reg_offset(adev, + ring->me, regSDMA0_QUEUE0_RB_WPTR_HI), + upper_32_bits(ring->wptr << 2)); } } @@ -233,7 +201,7 @@ static void sdma_v6_0_ring_insert_nop(struct amdgpu_ring *ring, uint32_t count) amdgpu_ring_write(ring, ring->funcs->nop); } -/** +/* * sdma_v6_0_ring_emit_ib - Schedule an IB on the DMA engine * * @ring: amdgpu ring pointer @@ -380,8 +348,6 @@ static void sdma_v6_0_gfx_stop(struct amdgpu_device *adev) u32 rb_cntl, ib_cntl; int i; - amdgpu_sdma_unset_buffer_funcs_helper(adev); - for (i = 0; i < adev->sdma.num_instances; i++) { rb_cntl = RREG32_SOC15_IP(GC, sdma_v6_0_get_reg_offset(adev, i, regSDMA0_QUEUE0_RB_CNTL)); rb_cntl = REG_SET_FIELD(rb_cntl, SDMA0_QUEUE0_RB_CNTL, RB_ENABLE, 0); @@ -593,9 +559,6 @@ static int sdma_v6_0_gfx_resume(struct amdgpu_device *adev) r = amdgpu_ring_test_helper(ring); if (r) return r; - - if (adev->mman.buffer_funcs_ring == ring) - amdgpu_ttm_set_buffer_funcs_status(adev, true); } return 0; @@ -936,7 +899,7 @@ static int sdma_v6_0_ring_test_ring(struct amdgpu_ring *ring) return r; } -/** +/* * sdma_v6_0_ring_test_ib - test an IB on the DMA engine * * @ring: amdgpu_ring structure holding ring information @@ -1118,7 +1081,7 @@ static void sdma_v6_0_vm_set_pte_pde(struct amdgpu_ib *ib, ib->ptr[ib->length_dw++] = count - 1; /* number of entries */ } -/** +/* * sdma_v6_0_ring_pad_ib - pad the IB * @ib: indirect buffer to fill with padding * @ring: amdgpu ring pointer @@ -1167,7 +1130,7 @@ static void sdma_v6_0_ring_emit_pipeline_sync(struct amdgpu_ring *ring) SDMA_PKT_POLL_REGMEM_DW5_INTERVAL(4)); /* retry count, poll interval */ } -/** +/* * sdma_v6_0_ring_emit_vm_flush - vm flush using sDMA * * @ring: amdgpu_ring pointer @@ -1245,19 +1208,23 @@ static struct amdgpu_sdma_ras sdma_v6_0_3_ras = { static void sdma_v6_0_set_ras_funcs(struct amdgpu_device *adev) { - switch (adev->ip_versions[SDMA0_HWIP][0]) { + switch (amdgpu_ip_version(adev, SDMA0_HWIP, 0)) { case IP_VERSION(6, 0, 3): adev->sdma.ras = &sdma_v6_0_3_ras; break; default: break; } - } static int sdma_v6_0_early_init(void *handle) { struct amdgpu_device *adev = (struct amdgpu_device *)handle; + int r; + + r = amdgpu_sdma_init_microcode(adev, 0, true); + if (r) + return r; sdma_v6_0_set_ring_funcs(adev); sdma_v6_0_set_buffer_funcs(adev); @@ -1282,12 +1249,6 @@ static int sdma_v6_0_sw_init(void *handle) if (r) return r; - r = amdgpu_sdma_init_microcode(adev, 0, true); - if (r) { - DRM_ERROR("Failed to load sdma firmware!\n"); - return r; - } - for (i = 0; i < adev->sdma.num_instances; i++) { ring = &adev->sdma.instance[i].ring; ring->ring_obj = NULL; @@ -1342,11 +1303,8 @@ static int sdma_v6_0_hw_fini(void *handle) { struct amdgpu_device *adev = (struct amdgpu_device *)handle; - if (amdgpu_sriov_vf(adev)) { - /* disable the scheduler for SDMA */ - amdgpu_sdma_unset_buffer_funcs_helper(adev); + if (amdgpu_sriov_vf(adev)) return 0; - } sdma_v6_0_ctxempty_int_enable(adev, false); sdma_v6_0_enable(adev, false); diff --git a/drivers/gpu/drm/amd/amdgpu/si.c b/drivers/gpu/drm/amd/amdgpu/si.c index f64b87b11b1b..a757526153e5 100644 --- a/drivers/gpu/drm/amd/amdgpu/si.c +++ b/drivers/gpu/drm/amd/amdgpu/si.c @@ -2276,17 +2276,8 @@ static void si_pcie_gen3_enable(struct amdgpu_device *adev) u16 bridge_cfg2, gpu_cfg2; u32 max_lw, current_lw, tmp; - pcie_capability_read_word(root, PCI_EXP_LNKCTL, - &bridge_cfg); - pcie_capability_read_word(adev->pdev, PCI_EXP_LNKCTL, - &gpu_cfg); - - tmp16 = bridge_cfg | PCI_EXP_LNKCTL_HAWD; - pcie_capability_write_word(root, PCI_EXP_LNKCTL, tmp16); - - tmp16 = gpu_cfg | PCI_EXP_LNKCTL_HAWD; - pcie_capability_write_word(adev->pdev, PCI_EXP_LNKCTL, - tmp16); + pcie_capability_set_word(root, PCI_EXP_LNKCTL, PCI_EXP_LNKCTL_HAWD); + pcie_capability_set_word(adev->pdev, PCI_EXP_LNKCTL, PCI_EXP_LNKCTL_HAWD); tmp = RREG32_PCIE(PCIE_LC_STATUS1); max_lw = (tmp & LC_DETECTED_LINK_WIDTH_MASK) >> LC_DETECTED_LINK_WIDTH_SHIFT; @@ -2331,21 +2322,14 @@ static void si_pcie_gen3_enable(struct amdgpu_device *adev) mdelay(100); - pcie_capability_read_word(root, PCI_EXP_LNKCTL, - &tmp16); - tmp16 &= ~PCI_EXP_LNKCTL_HAWD; - tmp16 |= (bridge_cfg & PCI_EXP_LNKCTL_HAWD); - pcie_capability_write_word(root, PCI_EXP_LNKCTL, - tmp16); - - pcie_capability_read_word(adev->pdev, - PCI_EXP_LNKCTL, - &tmp16); - tmp16 &= ~PCI_EXP_LNKCTL_HAWD; - tmp16 |= (gpu_cfg & PCI_EXP_LNKCTL_HAWD); - pcie_capability_write_word(adev->pdev, - PCI_EXP_LNKCTL, - tmp16); + pcie_capability_clear_and_set_word(root, PCI_EXP_LNKCTL, + PCI_EXP_LNKCTL_HAWD, + bridge_cfg & + PCI_EXP_LNKCTL_HAWD); + pcie_capability_clear_and_set_word(adev->pdev, PCI_EXP_LNKCTL, + PCI_EXP_LNKCTL_HAWD, + gpu_cfg & + PCI_EXP_LNKCTL_HAWD); pcie_capability_read_word(root, PCI_EXP_LNKCTL2, &tmp16); @@ -2456,8 +2440,6 @@ static void si_program_aspm(struct amdgpu_device *adev) if (!amdgpu_device_should_use_aspm(adev)) return; - if (adev->flags & AMD_IS_APU) - return; orig = data = RREG32_PCIE_PORT(PCIE_LC_N_FTS_CNTL); data &= ~LC_XMIT_N_FTS_MASK; data |= LC_XMIT_N_FTS(0x24) | LC_XMIT_N_FTS_OVERRIDE_EN; diff --git a/drivers/gpu/drm/amd/amdgpu/si_dma.c b/drivers/gpu/drm/amd/amdgpu/si_dma.c index 42c4547f32ec..9aa0e11ee673 100644 --- a/drivers/gpu/drm/amd/amdgpu/si_dma.c +++ b/drivers/gpu/drm/amd/amdgpu/si_dma.c @@ -115,8 +115,6 @@ static void si_dma_stop(struct amdgpu_device *adev) u32 rb_cntl; unsigned i; - amdgpu_sdma_unset_buffer_funcs_helper(adev); - for (i = 0; i < adev->sdma.num_instances; i++) { /* dma0 */ rb_cntl = RREG32(DMA_RB_CNTL + sdma_offsets[i]); @@ -177,9 +175,6 @@ static int si_dma_start(struct amdgpu_device *adev) r = amdgpu_ring_test_helper(ring); if (r) return r; - - if (adev->mman.buffer_funcs_ring == ring) - amdgpu_ttm_set_buffer_funcs_status(adev, true); } return 0; diff --git a/drivers/gpu/drm/amd/amdgpu/sienna_cichlid.c b/drivers/gpu/drm/amd/amdgpu/sienna_cichlid.c index 8b8086d5c864..93f6772d1b24 100644 --- a/drivers/gpu/drm/amd/amdgpu/sienna_cichlid.c +++ b/drivers/gpu/drm/amd/amdgpu/sienna_cichlid.c @@ -36,7 +36,7 @@ static bool sienna_cichlid_is_mode2_default(struct amdgpu_reset_control *reset_c #if 0 struct amdgpu_device *adev = (struct amdgpu_device *)reset_ctl->handle; - if (adev->ip_versions[MP1_HWIP][0] == IP_VERSION(11, 0, 7) && + if (amdgpu_ip_version(adev, MP1_HWIP, 0) == IP_VERSION(11, 0, 7) && adev->pm.fw_version >= 0x3a5500 && !amdgpu_sriov_vf(adev)) return true; #endif @@ -48,18 +48,17 @@ sienna_cichlid_get_reset_handler(struct amdgpu_reset_control *reset_ctl, struct amdgpu_reset_context *reset_context) { struct amdgpu_reset_handler *handler; + int i; if (reset_context->method != AMD_RESET_METHOD_NONE) { - list_for_each_entry(handler, &reset_ctl->reset_handlers, - handler_list) { + for_each_handler(i, handler, reset_ctl) { if (handler->reset_method == reset_context->method) return handler; } } if (sienna_cichlid_is_mode2_default(reset_ctl)) { - list_for_each_entry (handler, &reset_ctl->reset_handlers, - handler_list) { + for_each_handler(i, handler, reset_ctl) { if (handler->reset_method == AMD_RESET_METHOD_MODE2) return handler; } @@ -120,9 +119,9 @@ static void sienna_cichlid_async_reset(struct work_struct *work) struct amdgpu_reset_control *reset_ctl = container_of(work, struct amdgpu_reset_control, reset_work); struct amdgpu_device *adev = (struct amdgpu_device *)reset_ctl->handle; + int i; - list_for_each_entry(handler, &reset_ctl->reset_handlers, - handler_list) { + for_each_handler(i, handler, reset_ctl) { if (handler->reset_method == reset_ctl->active_reset) { dev_dbg(adev->dev, "Resetting device\n"); handler->do_reset(adev); @@ -281,6 +280,11 @@ static struct amdgpu_reset_handler sienna_cichlid_mode2_handler = { .do_reset = sienna_cichlid_mode2_reset, }; +static struct amdgpu_reset_handler + *sienna_cichlid_rst_handlers[AMDGPU_RESET_MAX_HANDLERS] = { + &sienna_cichlid_mode2_handler, + }; + int sienna_cichlid_reset_init(struct amdgpu_device *adev) { struct amdgpu_reset_control *reset_ctl; @@ -294,11 +298,9 @@ int sienna_cichlid_reset_init(struct amdgpu_device *adev) reset_ctl->active_reset = AMD_RESET_METHOD_NONE; reset_ctl->get_reset_handler = sienna_cichlid_get_reset_handler; - INIT_LIST_HEAD(&reset_ctl->reset_handlers); INIT_WORK(&reset_ctl->reset_work, reset_ctl->async_reset); /* Only mode2 is handled through reset control now */ - amdgpu_reset_add_handler(reset_ctl, &sienna_cichlid_mode2_handler); - + reset_ctl->reset_handlers = &sienna_cichlid_rst_handlers; adev->reset_cntl = reset_ctl; return 0; diff --git a/drivers/gpu/drm/amd/amdgpu/smu_v13_0_10.c b/drivers/gpu/drm/amd/amdgpu/smu_v13_0_10.c index ae29620b1ea4..04c797d54511 100644 --- a/drivers/gpu/drm/amd/amdgpu/smu_v13_0_10.c +++ b/drivers/gpu/drm/amd/amdgpu/smu_v13_0_10.c @@ -44,10 +44,10 @@ smu_v13_0_10_get_reset_handler(struct amdgpu_reset_control *reset_ctl, { struct amdgpu_reset_handler *handler; struct amdgpu_device *adev = (struct amdgpu_device *)reset_ctl->handle; + int i; if (reset_context->method != AMD_RESET_METHOD_NONE) { - list_for_each_entry(handler, &reset_ctl->reset_handlers, - handler_list) { + for_each_handler(i, handler, reset_ctl) { if (handler->reset_method == reset_context->method) return handler; } @@ -55,8 +55,7 @@ smu_v13_0_10_get_reset_handler(struct amdgpu_reset_control *reset_ctl, if (smu_v13_0_10_is_mode2_default(reset_ctl) && amdgpu_asic_reset_method(adev) == AMD_RESET_METHOD_MODE2) { - list_for_each_entry (handler, &reset_ctl->reset_handlers, - handler_list) { + for_each_handler(i, handler, reset_ctl) { if (handler->reset_method == AMD_RESET_METHOD_MODE2) return handler; } @@ -119,9 +118,9 @@ static void smu_v13_0_10_async_reset(struct work_struct *work) struct amdgpu_reset_control *reset_ctl = container_of(work, struct amdgpu_reset_control, reset_work); struct amdgpu_device *adev = (struct amdgpu_device *)reset_ctl->handle; + int i; - list_for_each_entry(handler, &reset_ctl->reset_handlers, - handler_list) { + for_each_handler(i, handler, reset_ctl) { if (handler->reset_method == reset_ctl->active_reset) { dev_dbg(adev->dev, "Resetting device\n"); handler->do_reset(adev); @@ -272,6 +271,11 @@ static struct amdgpu_reset_handler smu_v13_0_10_mode2_handler = { .do_reset = smu_v13_0_10_mode2_reset, }; +static struct amdgpu_reset_handler + *smu_v13_0_10_rst_handlers[AMDGPU_RESET_MAX_HANDLERS] = { + &smu_v13_0_10_mode2_handler, + }; + int smu_v13_0_10_reset_init(struct amdgpu_device *adev) { struct amdgpu_reset_control *reset_ctl; @@ -285,10 +289,9 @@ int smu_v13_0_10_reset_init(struct amdgpu_device *adev) reset_ctl->active_reset = AMD_RESET_METHOD_NONE; reset_ctl->get_reset_handler = smu_v13_0_10_get_reset_handler; - INIT_LIST_HEAD(&reset_ctl->reset_handlers); INIT_WORK(&reset_ctl->reset_work, reset_ctl->async_reset); /* Only mode2 is handled through reset control now */ - amdgpu_reset_add_handler(reset_ctl, &smu_v13_0_10_mode2_handler); + reset_ctl->reset_handlers = &smu_v13_0_10_rst_handlers; adev->reset_cntl = reset_ctl; diff --git a/drivers/gpu/drm/amd/amdgpu/smuio_v13_0.c b/drivers/gpu/drm/amd/amdgpu/smuio_v13_0.c index 13e905c22592..bf8b8e5ddf5d 100644 --- a/drivers/gpu/drm/amd/amdgpu/smuio_v13_0.c +++ b/drivers/gpu/drm/amd/amdgpu/smuio_v13_0.c @@ -128,6 +128,27 @@ static bool smuio_v13_0_is_host_gpu_xgmi_supported(struct amdgpu_device *adev) return data ? true : false; } +static enum amdgpu_pkg_type smuio_v13_0_get_pkg_type(struct amdgpu_device *adev) +{ + enum amdgpu_pkg_type pkg_type; + u32 data; + + data = RREG32_SOC15(SMUIO, 0, regSMUIO_MCM_CONFIG); + data = REG_GET_FIELD(data, SMUIO_MCM_CONFIG, TOPOLOGY_ID); + + switch (data) { + case 0x4: + case 0xC: + pkg_type = AMDGPU_PKG_TYPE_CEM; + break; + default: + pkg_type = AMDGPU_PKG_TYPE_OAM; + break; + } + + return pkg_type; +} + const struct amdgpu_smuio_funcs smuio_v13_0_funcs = { .get_rom_index_offset = smuio_v13_0_get_rom_index_offset, .get_rom_data_offset = smuio_v13_0_get_rom_data_offset, @@ -136,4 +157,5 @@ const struct amdgpu_smuio_funcs smuio_v13_0_funcs = { .is_host_gpu_xgmi_supported = smuio_v13_0_is_host_gpu_xgmi_supported, .update_rom_clock_gating = smuio_v13_0_update_rom_clock_gating, .get_clock_gating_state = smuio_v13_0_get_clock_gating_state, + .get_pkg_type = smuio_v13_0_get_pkg_type, }; diff --git a/drivers/gpu/drm/amd/amdgpu/smuio_v13_0_3.c b/drivers/gpu/drm/amd/amdgpu/smuio_v13_0_3.c index 4368a5891eeb..5461b5289793 100644 --- a/drivers/gpu/drm/amd/amdgpu/smuio_v13_0_3.c +++ b/drivers/gpu/drm/amd/amdgpu/smuio_v13_0_3.c @@ -84,6 +84,12 @@ static enum amdgpu_pkg_type smuio_v13_0_3_get_pkg_type(struct amdgpu_device *ade * b0100 - b1111 - Reserved */ switch (data & PKG_TYPE_MASK) { + case 0x0: + pkg_type = AMDGPU_PKG_TYPE_CEM; + break; + case 0x1: + pkg_type = AMDGPU_PKG_TYPE_OAM; + break; case 0x2: pkg_type = AMDGPU_PKG_TYPE_APU; break; diff --git a/drivers/gpu/drm/amd/amdgpu/soc15.c b/drivers/gpu/drm/amd/amdgpu/soc15.c index afcaeadda4c7..51342809af03 100644 --- a/drivers/gpu/drm/amd/amdgpu/soc15.c +++ b/drivers/gpu/drm/amd/amdgpu/soc15.c @@ -174,8 +174,8 @@ static const struct amdgpu_video_codecs vcn_4_0_3_video_codecs_encode = { static int soc15_query_video_codecs(struct amdgpu_device *adev, bool encode, const struct amdgpu_video_codecs **codecs) { - if (adev->ip_versions[VCE_HWIP][0]) { - switch (adev->ip_versions[VCE_HWIP][0]) { + if (amdgpu_ip_version(adev, VCE_HWIP, 0)) { + switch (amdgpu_ip_version(adev, VCE_HWIP, 0)) { case IP_VERSION(4, 0, 0): case IP_VERSION(4, 1, 0): if (encode) @@ -187,7 +187,7 @@ static int soc15_query_video_codecs(struct amdgpu_device *adev, bool encode, return -EINVAL; } } else { - switch (adev->ip_versions[UVD_HWIP][0]) { + switch (amdgpu_ip_version(adev, UVD_HWIP, 0)) { case IP_VERSION(1, 0, 0): case IP_VERSION(1, 0, 1): if (encode) @@ -324,11 +324,12 @@ static u32 soc15_get_xclk(struct amdgpu_device *adev) { u32 reference_clock = adev->clock.spll.reference_freq; - if (adev->ip_versions[MP1_HWIP][0] == IP_VERSION(12, 0, 0) || - adev->ip_versions[MP1_HWIP][0] == IP_VERSION(12, 0, 1)) + if (amdgpu_ip_version(adev, MP1_HWIP, 0) == IP_VERSION(12, 0, 0) || + amdgpu_ip_version(adev, MP1_HWIP, 0) == IP_VERSION(12, 0, 1) || + amdgpu_ip_version(adev, MP1_HWIP, 0) == IP_VERSION(13, 0, 6)) return 10000; - if (adev->ip_versions[MP1_HWIP][0] == IP_VERSION(10, 0, 0) || - adev->ip_versions[MP1_HWIP][0] == IP_VERSION(10, 0, 1)) + if (amdgpu_ip_version(adev, MP1_HWIP, 0) == IP_VERSION(10, 0, 0) || + amdgpu_ip_version(adev, MP1_HWIP, 0) == IP_VERSION(10, 0, 1)) return reference_clock / 4; return reference_clock; @@ -522,7 +523,7 @@ soc15_asic_reset_method(struct amdgpu_device *adev) dev_warn(adev->dev, "Specified reset method:%d isn't supported, using AUTO instead.\n", amdgpu_reset_method); - switch (adev->ip_versions[MP1_HWIP][0]) { + switch (amdgpu_ip_version(adev, MP1_HWIP, 0)) { case IP_VERSION(10, 0, 0): case IP_VERSION(10, 0, 1): case IP_VERSION(12, 0, 0): @@ -559,8 +560,10 @@ soc15_asic_reset_method(struct amdgpu_device *adev) */ if (amdgpu_gpu_recovery == 4 || amdgpu_gpu_recovery == 5) return AMD_RESET_METHOD_MODE2; + else if (!(adev->flags & AMD_IS_APU)) + return AMD_RESET_METHOD_MODE1; else - return AMD_RESET_METHOD_NONE; + return AMD_RESET_METHOD_MODE2; default: break; } @@ -596,7 +599,7 @@ static int soc15_asic_reset(struct amdgpu_device *adev) static bool soc15_supports_baco(struct amdgpu_device *adev) { - switch (adev->ip_versions[MP1_HWIP][0]) { + switch (amdgpu_ip_version(adev, MP1_HWIP, 0)) { case IP_VERSION(9, 0, 0): case IP_VERSION(11, 0, 2): if (adev->asic_type == CHIP_VEGA20) { @@ -643,8 +646,7 @@ static void soc15_program_aspm(struct amdgpu_device *adev) if (!amdgpu_device_should_use_aspm(adev)) return; - if (!(adev->flags & AMD_IS_APU) && - (adev->nbio.funcs->program_aspm)) + if (adev->nbio.funcs->program_aspm) adev->nbio.funcs->program_aspm(adev); } @@ -893,9 +895,9 @@ static const struct amdgpu_asic_funcs aqua_vanjaram_asic_funcs = .get_config_memsize = &soc15_get_config_memsize, .need_full_reset = &soc15_need_full_reset, .init_doorbell_index = &aqua_vanjaram_doorbell_index_init, - .get_pcie_usage = &vega20_get_pcie_usage, + .get_pcie_usage = &amdgpu_nbio_get_pcie_usage, .need_reset_on_init = &soc15_need_reset_on_init, - .get_pcie_replay_count = &soc15_get_pcie_replay_count, + .get_pcie_replay_count = &amdgpu_nbio_get_pcie_replay_count, .supports_baco = &soc15_supports_baco, .pre_asic_init = &soc15_pre_asic_init, .query_video_codecs = &soc15_query_video_codecs, @@ -919,6 +921,8 @@ static int soc15_common_early_init(void *handle) adev->pcie_wreg_ext = &amdgpu_device_indirect_wreg_ext; adev->pcie_rreg64 = &amdgpu_device_indirect_rreg64; adev->pcie_wreg64 = &amdgpu_device_indirect_wreg64; + adev->pcie_rreg64_ext = &amdgpu_device_indirect_rreg64_ext; + adev->pcie_wreg64_ext = &amdgpu_device_indirect_wreg64_ext; adev->uvd_ctx_rreg = &soc15_uvd_ctx_rreg; adev->uvd_ctx_wreg = &soc15_uvd_ctx_wreg; adev->didt_rreg = &soc15_didt_rreg; @@ -933,7 +937,7 @@ static int soc15_common_early_init(void *handle) /* TODO: split the GC and PG flags based on the relevant IP version for which * they are relevant. */ - switch (adev->ip_versions[GC_HWIP][0]) { + switch (amdgpu_ip_version(adev, GC_HWIP, 0)) { case IP_VERSION(9, 0, 1): adev->asic_funcs = &soc15_asic_funcs; adev->cg_flags = AMD_CG_SUPPORT_GFX_MGCG | @@ -1157,6 +1161,11 @@ static int soc15_common_early_init(void *handle) AMD_PG_SUPPORT_VCN_DPG | AMD_PG_SUPPORT_JPEG; adev->external_rev_id = adev->rev_id + 0x46; + /* GC 9.4.3 uses MMIO register region hole at a different offset */ + if (!amdgpu_sriov_vf(adev)) { + adev->rmmio_remap.reg_offset = 0x1A000; + adev->rmmio_remap.bus_addr = adev->rmmio_base + 0x1A000; + } break; default: /* FIXME: not supported yet */ @@ -1362,7 +1371,7 @@ static int soc15_common_set_clockgating_state(void *handle, if (amdgpu_sriov_vf(adev)) return 0; - switch (adev->ip_versions[NBIO_HWIP][0]) { + switch (amdgpu_ip_version(adev, NBIO_HWIP, 0)) { case IP_VERSION(6, 1, 0): case IP_VERSION(6, 2, 0): case IP_VERSION(7, 4, 0): @@ -1414,12 +1423,14 @@ static void soc15_common_get_clockgating_state(void *handle, u64 *flags) if (amdgpu_sriov_vf(adev)) *flags = 0; - adev->nbio.funcs->get_clockgating_state(adev, flags); - - adev->hdp.funcs->get_clock_gating_state(adev, flags); + if (adev->nbio.funcs && adev->nbio.funcs->get_clockgating_state) + adev->nbio.funcs->get_clockgating_state(adev, flags); - if (adev->ip_versions[MP0_HWIP][0] != IP_VERSION(13, 0, 2)) { + if (adev->hdp.funcs && adev->hdp.funcs->get_clock_gating_state) + adev->hdp.funcs->get_clock_gating_state(adev, flags); + if ((amdgpu_ip_version(adev, MP0_HWIP, 0) != IP_VERSION(13, 0, 2)) && + (amdgpu_ip_version(adev, MP0_HWIP, 0) != IP_VERSION(13, 0, 6))) { /* AMD_CG_SUPPORT_DRM_MGCG */ data = RREG32(SOC15_REG_OFFSET(MP0, 0, mmMP0_MISC_CGTT_CTRL0)); if (!(data & 0x01000000)) @@ -1432,9 +1443,11 @@ static void soc15_common_get_clockgating_state(void *handle, u64 *flags) } /* AMD_CG_SUPPORT_ROM_MGCG */ - adev->smuio.funcs->get_clock_gating_state(adev, flags); + if (adev->smuio.funcs && adev->smuio.funcs->get_clock_gating_state) + adev->smuio.funcs->get_clock_gating_state(adev, flags); - adev->df.funcs->get_clockgating_state(adev, flags); + if (adev->df.funcs && adev->df.funcs->get_clockgating_state) + adev->df.funcs->get_clockgating_state(adev, flags); } static int soc15_common_set_powergating_state(void *handle, diff --git a/drivers/gpu/drm/amd/amdgpu/soc15_common.h b/drivers/gpu/drm/amd/amdgpu/soc15_common.h index 96948a59f8dd..242b24f73c17 100644 --- a/drivers/gpu/drm/amd/amdgpu/soc15_common.h +++ b/drivers/gpu/drm/amd/amdgpu/soc15_common.h @@ -37,65 +37,65 @@ #define SOC15_REG_OFFSET1(ip, inst, reg, offset) \ (adev->reg_offset[ip##_HWIP][inst][reg##_BASE_IDX] + (reg)+(offset)) -#define __WREG32_SOC15_RLC__(reg, value, flag, hwip) \ +#define __WREG32_SOC15_RLC__(reg, value, flag, hwip, inst) \ ((amdgpu_sriov_vf(adev) && adev->gfx.rlc.funcs && adev->gfx.rlc.rlcg_reg_access_supported) ? \ - amdgpu_sriov_wreg(adev, reg, value, flag, hwip) : \ + amdgpu_sriov_wreg(adev, reg, value, flag, hwip, inst) : \ WREG32(reg, value)) -#define __RREG32_SOC15_RLC__(reg, flag, hwip) \ +#define __RREG32_SOC15_RLC__(reg, flag, hwip, inst) \ ((amdgpu_sriov_vf(adev) && adev->gfx.rlc.funcs && adev->gfx.rlc.rlcg_reg_access_supported) ? \ - amdgpu_sriov_rreg(adev, reg, flag, hwip) : \ + amdgpu_sriov_rreg(adev, reg, flag, hwip, inst) : \ RREG32(reg)) #define WREG32_FIELD15(ip, idx, reg, field, val) \ __WREG32_SOC15_RLC__(adev->reg_offset[ip##_HWIP][idx][mm##reg##_BASE_IDX] + mm##reg, \ (__RREG32_SOC15_RLC__( \ adev->reg_offset[ip##_HWIP][idx][mm##reg##_BASE_IDX] + mm##reg, \ - 0, ip##_HWIP) & \ + 0, ip##_HWIP, idx) & \ ~REG_FIELD_MASK(reg, field)) | (val) << REG_FIELD_SHIFT(reg, field), \ - 0, ip##_HWIP) + 0, ip##_HWIP, idx) #define WREG32_FIELD15_PREREG(ip, idx, reg_name, field, val) \ __WREG32_SOC15_RLC__(adev->reg_offset[ip##_HWIP][idx][reg##reg_name##_BASE_IDX] + reg##reg_name, \ (__RREG32_SOC15_RLC__( \ adev->reg_offset[ip##_HWIP][idx][reg##reg_name##_BASE_IDX] + reg##reg_name, \ - 0, ip##_HWIP) & \ + 0, ip##_HWIP, idx) & \ ~REG_FIELD_MASK(reg_name, field)) | (val) << REG_FIELD_SHIFT(reg_name, field), \ - 0, ip##_HWIP) + 0, ip##_HWIP, idx) #define RREG32_SOC15(ip, inst, reg) \ __RREG32_SOC15_RLC__(adev->reg_offset[ip##_HWIP][inst][reg##_BASE_IDX] + reg, \ - 0, ip##_HWIP) + 0, ip##_HWIP, inst) -#define RREG32_SOC15_IP(ip, reg) __RREG32_SOC15_RLC__(reg, 0, ip##_HWIP) +#define RREG32_SOC15_IP(ip, reg) __RREG32_SOC15_RLC__(reg, 0, ip##_HWIP, 0) -#define RREG32_SOC15_IP_NO_KIQ(ip, reg) __RREG32_SOC15_RLC__(reg, AMDGPU_REGS_NO_KIQ, ip##_HWIP) +#define RREG32_SOC15_IP_NO_KIQ(ip, reg, inst) __RREG32_SOC15_RLC__(reg, AMDGPU_REGS_NO_KIQ, ip##_HWIP, inst) #define RREG32_SOC15_NO_KIQ(ip, inst, reg) \ __RREG32_SOC15_RLC__(adev->reg_offset[ip##_HWIP][inst][reg##_BASE_IDX] + reg, \ - AMDGPU_REGS_NO_KIQ, ip##_HWIP) + AMDGPU_REGS_NO_KIQ, ip##_HWIP, inst) #define RREG32_SOC15_OFFSET(ip, inst, reg, offset) \ __RREG32_SOC15_RLC__((adev->reg_offset[ip##_HWIP][inst][reg##_BASE_IDX] + (reg)) + \ - (offset), 0, ip##_HWIP) + (offset), 0, ip##_HWIP, inst) #define WREG32_SOC15(ip, inst, reg, value) \ __WREG32_SOC15_RLC__((adev->reg_offset[ip##_HWIP][inst][reg##_BASE_IDX] + reg), \ - value, 0, ip##_HWIP) + value, 0, ip##_HWIP, inst) #define WREG32_SOC15_IP(ip, reg, value) \ - __WREG32_SOC15_RLC__(reg, value, 0, ip##_HWIP) + __WREG32_SOC15_RLC__(reg, value, 0, ip##_HWIP, 0) -#define WREG32_SOC15_IP_NO_KIQ(ip, reg, value) \ - __WREG32_SOC15_RLC__(reg, value, AMDGPU_REGS_NO_KIQ, ip##_HWIP) +#define WREG32_SOC15_IP_NO_KIQ(ip, reg, value, inst) \ + __WREG32_SOC15_RLC__(reg, value, AMDGPU_REGS_NO_KIQ, ip##_HWIP, inst) #define WREG32_SOC15_NO_KIQ(ip, inst, reg, value) \ __WREG32_SOC15_RLC__(adev->reg_offset[ip##_HWIP][inst][reg##_BASE_IDX] + reg, \ - value, AMDGPU_REGS_NO_KIQ, ip##_HWIP) + value, AMDGPU_REGS_NO_KIQ, ip##_HWIP, inst) #define WREG32_SOC15_OFFSET(ip, inst, reg, offset, value) \ __WREG32_SOC15_RLC__((adev->reg_offset[ip##_HWIP][inst][reg##_BASE_IDX] + reg) + offset, \ - value, 0, ip##_HWIP) + value, 0, ip##_HWIP, inst) #define SOC15_WAIT_ON_RREG(ip, inst, reg, expected_value, mask) \ amdgpu_device_wait_on_rreg(adev, inst, \ @@ -108,16 +108,16 @@ #reg, expected_value, mask) #define WREG32_RLC(reg, value) \ - __WREG32_SOC15_RLC__(reg, value, AMDGPU_REGS_RLC, GC_HWIP) + __WREG32_SOC15_RLC__(reg, value, AMDGPU_REGS_RLC, GC_HWIP, 0) -#define WREG32_RLC_EX(prefix, reg, value) \ +#define WREG32_RLC_EX(prefix, reg, value, inst) \ do { \ if (amdgpu_sriov_fullaccess(adev)) { \ uint32_t i = 0; \ uint32_t retries = 50000; \ - uint32_t r0 = adev->reg_offset[GC_HWIP][0][prefix##SCRATCH_REG0_BASE_IDX] + prefix##SCRATCH_REG0; \ - uint32_t r1 = adev->reg_offset[GC_HWIP][0][prefix##SCRATCH_REG1_BASE_IDX] + prefix##SCRATCH_REG1; \ - uint32_t spare_int = adev->reg_offset[GC_HWIP][0][prefix##RLC_SPARE_INT_BASE_IDX] + prefix##RLC_SPARE_INT; \ + uint32_t r0 = adev->reg_offset[GC_HWIP][inst][prefix##SCRATCH_REG0_BASE_IDX] + prefix##SCRATCH_REG0; \ + uint32_t r1 = adev->reg_offset[GC_HWIP][inst][prefix##SCRATCH_REG1_BASE_IDX] + prefix##SCRATCH_REG1; \ + uint32_t spare_int = adev->reg_offset[GC_HWIP][inst][prefix##RLC_SPARE_INT_BASE_IDX] + prefix##RLC_SPARE_INT; \ WREG32(r0, value); \ WREG32(r1, (reg | 0x80000000)); \ WREG32(spare_int, 0x1); \ @@ -136,17 +136,17 @@ /* shadow the registers in the callback function */ #define WREG32_SOC15_RLC_SHADOW(ip, inst, reg, value) \ - __WREG32_SOC15_RLC__((adev->reg_offset[ip##_HWIP][inst][reg##_BASE_IDX] + reg), value, AMDGPU_REGS_RLC, GC_HWIP) + __WREG32_SOC15_RLC__((adev->reg_offset[ip##_HWIP][inst][reg##_BASE_IDX] + reg), value, AMDGPU_REGS_RLC, GC_HWIP, inst) /* for GC only */ #define RREG32_RLC(reg) \ - __RREG32_SOC15_RLC__(reg, AMDGPU_REGS_RLC, GC_HWIP) + __RREG32_SOC15_RLC__(reg, AMDGPU_REGS_RLC, GC_HWIP, 0) #define WREG32_RLC_NO_KIQ(reg, value, hwip) \ - __WREG32_SOC15_RLC__(reg, value, AMDGPU_REGS_NO_KIQ | AMDGPU_REGS_RLC, hwip) + __WREG32_SOC15_RLC__(reg, value, AMDGPU_REGS_NO_KIQ | AMDGPU_REGS_RLC, hwip, 0) #define RREG32_RLC_NO_KIQ(reg, hwip) \ - __RREG32_SOC15_RLC__(reg, AMDGPU_REGS_NO_KIQ | AMDGPU_REGS_RLC, hwip) + __RREG32_SOC15_RLC__(reg, AMDGPU_REGS_NO_KIQ | AMDGPU_REGS_RLC, hwip, 0) #define WREG32_SOC15_RLC_SHADOW_EX(prefix, ip, inst, reg, value) \ do { \ @@ -167,32 +167,32 @@ } while (0) #define RREG32_SOC15_RLC(ip, inst, reg) \ - __RREG32_SOC15_RLC__(adev->reg_offset[ip##_HWIP][inst][reg##_BASE_IDX] + reg, AMDGPU_REGS_RLC, ip##_HWIP) + __RREG32_SOC15_RLC__(adev->reg_offset[ip##_HWIP][inst][reg##_BASE_IDX] + reg, AMDGPU_REGS_RLC, ip##_HWIP, inst) #define WREG32_SOC15_RLC(ip, inst, reg, value) \ do { \ uint32_t target_reg = adev->reg_offset[ip##_HWIP][inst][reg##_BASE_IDX] + reg;\ - __WREG32_SOC15_RLC__(target_reg, value, AMDGPU_REGS_RLC, ip##_HWIP); \ + __WREG32_SOC15_RLC__(target_reg, value, AMDGPU_REGS_RLC, ip##_HWIP, inst); \ } while (0) #define WREG32_SOC15_RLC_EX(prefix, ip, inst, reg, value) \ do { \ uint32_t target_reg = adev->reg_offset[GC_HWIP][inst][reg##_BASE_IDX] + reg;\ - WREG32_RLC_EX(prefix, target_reg, value); \ + WREG32_RLC_EX(prefix, target_reg, value, inst); \ } while (0) #define WREG32_FIELD15_RLC(ip, idx, reg, field, val) \ __WREG32_SOC15_RLC__((adev->reg_offset[ip##_HWIP][idx][mm##reg##_BASE_IDX] + mm##reg), \ (__RREG32_SOC15_RLC__(adev->reg_offset[ip##_HWIP][idx][mm##reg##_BASE_IDX] + mm##reg, \ - AMDGPU_REGS_RLC, ip##_HWIP) & \ + AMDGPU_REGS_RLC, ip##_HWIP, idx) & \ ~REG_FIELD_MASK(reg, field)) | (val) << REG_FIELD_SHIFT(reg, field), \ - AMDGPU_REGS_RLC, ip##_HWIP) + AMDGPU_REGS_RLC, ip##_HWIP, idx) #define WREG32_SOC15_OFFSET_RLC(ip, inst, reg, offset, value) \ - __WREG32_SOC15_RLC__((adev->reg_offset[ip##_HWIP][inst][reg##_BASE_IDX] + reg) + offset, value, AMDGPU_REGS_RLC, ip##_HWIP) + __WREG32_SOC15_RLC__((adev->reg_offset[ip##_HWIP][inst][reg##_BASE_IDX] + reg) + offset, value, AMDGPU_REGS_RLC, ip##_HWIP, inst) #define RREG32_SOC15_OFFSET_RLC(ip, inst, reg, offset) \ - __RREG32_SOC15_RLC__((adev->reg_offset[ip##_HWIP][inst][reg##_BASE_IDX] + reg) + offset, AMDGPU_REGS_RLC, ip##_HWIP) + __RREG32_SOC15_RLC__((adev->reg_offset[ip##_HWIP][inst][reg##_BASE_IDX] + reg) + offset, AMDGPU_REGS_RLC, ip##_HWIP, inst) /* inst equals to ext for some IPs */ #define RREG32_SOC15_EXT(ip, inst, reg, ext) \ @@ -204,4 +204,10 @@ + adev->asic_funcs->encode_ext_smn_addressing(ext), \ value) \ +#define RREG64_MCA(ext, mca_base, idx) \ + RREG64_PCIE_EXT(adev->asic_funcs->encode_ext_smn_addressing(ext) + mca_base + (idx * 8)) + +#define WREG64_MCA(ext, mca_base, idx, val) \ + WREG64_PCIE_EXT(adev->asic_funcs->encode_ext_smn_addressing(ext) + mca_base + (idx * 8), val) + #endif diff --git a/drivers/gpu/drm/amd/amdgpu/soc21.c b/drivers/gpu/drm/amd/amdgpu/soc21.c index e5e5d68a4d70..48c6efcdeac9 100644 --- a/drivers/gpu/drm/amd/amdgpu/soc21.c +++ b/drivers/gpu/drm/amd/amdgpu/soc21.c @@ -48,33 +48,28 @@ static const struct amd_ip_funcs soc21_common_ip_funcs; /* SOC21 */ -static const struct amdgpu_video_codec_info vcn_4_0_0_video_codecs_encode_array_vcn0[] = -{ +static const struct amdgpu_video_codec_info vcn_4_0_0_video_codecs_encode_array_vcn0[] = { {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_MPEG4_AVC, 4096, 2304, 0)}, {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_HEVC, 4096, 2304, 0)}, {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_AV1, 8192, 4352, 0)}, }; -static const struct amdgpu_video_codec_info vcn_4_0_0_video_codecs_encode_array_vcn1[] = -{ +static const struct amdgpu_video_codec_info vcn_4_0_0_video_codecs_encode_array_vcn1[] = { {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_MPEG4_AVC, 4096, 2304, 0)}, {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_HEVC, 4096, 2304, 0)}, }; -static const struct amdgpu_video_codecs vcn_4_0_0_video_codecs_encode_vcn0 = -{ +static const struct amdgpu_video_codecs vcn_4_0_0_video_codecs_encode_vcn0 = { .codec_count = ARRAY_SIZE(vcn_4_0_0_video_codecs_encode_array_vcn0), .codec_array = vcn_4_0_0_video_codecs_encode_array_vcn0, }; -static const struct amdgpu_video_codecs vcn_4_0_0_video_codecs_encode_vcn1 = -{ +static const struct amdgpu_video_codecs vcn_4_0_0_video_codecs_encode_vcn1 = { .codec_count = ARRAY_SIZE(vcn_4_0_0_video_codecs_encode_array_vcn1), .codec_array = vcn_4_0_0_video_codecs_encode_array_vcn1, }; -static const struct amdgpu_video_codec_info vcn_4_0_0_video_codecs_decode_array_vcn0[] = -{ +static const struct amdgpu_video_codec_info vcn_4_0_0_video_codecs_decode_array_vcn0[] = { {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_MPEG4_AVC, 4096, 4096, 52)}, {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_HEVC, 8192, 4352, 186)}, {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_JPEG, 4096, 4096, 0)}, @@ -82,22 +77,19 @@ static const struct amdgpu_video_codec_info vcn_4_0_0_video_codecs_decode_array_ {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_AV1, 8192, 4352, 0)}, }; -static const struct amdgpu_video_codec_info vcn_4_0_0_video_codecs_decode_array_vcn1[] = -{ +static const struct amdgpu_video_codec_info vcn_4_0_0_video_codecs_decode_array_vcn1[] = { {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_MPEG4_AVC, 4096, 4096, 52)}, {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_HEVC, 8192, 4352, 186)}, {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_JPEG, 4096, 4096, 0)}, {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_VP9, 8192, 4352, 0)}, }; -static const struct amdgpu_video_codecs vcn_4_0_0_video_codecs_decode_vcn0 = -{ +static const struct amdgpu_video_codecs vcn_4_0_0_video_codecs_decode_vcn0 = { .codec_count = ARRAY_SIZE(vcn_4_0_0_video_codecs_decode_array_vcn0), .codec_array = vcn_4_0_0_video_codecs_decode_array_vcn0, }; -static const struct amdgpu_video_codecs vcn_4_0_0_video_codecs_decode_vcn1 = -{ +static const struct amdgpu_video_codecs vcn_4_0_0_video_codecs_decode_vcn1 = { .codec_count = ARRAY_SIZE(vcn_4_0_0_video_codecs_decode_array_vcn1), .codec_array = vcn_4_0_0_video_codecs_decode_array_vcn1, }; @@ -161,10 +153,11 @@ static int soc21_query_video_codecs(struct amdgpu_device *adev, bool encode, if (adev->vcn.num_vcn_inst == hweight8(adev->vcn.harvest_config)) return -EINVAL; - switch (adev->ip_versions[UVD_HWIP][0]) { + switch (amdgpu_ip_version(adev, UVD_HWIP, 0)) { case IP_VERSION(4, 0, 0): case IP_VERSION(4, 0, 2): case IP_VERSION(4, 0, 4): + case IP_VERSION(4, 0, 5): if (amdgpu_sriov_vf(adev)) { if ((adev->vcn.harvest_config & AMDGPU_VCN_HARVEST_VCN0) || !amdgpu_sriov_is_av1_support(adev)) { @@ -381,13 +374,14 @@ soc21_asic_reset_method(struct amdgpu_device *adev) dev_warn(adev->dev, "Specified reset method:%d isn't supported, using AUTO instead.\n", amdgpu_reset_method); - switch (adev->ip_versions[MP1_HWIP][0]) { + switch (amdgpu_ip_version(adev, MP1_HWIP, 0)) { case IP_VERSION(13, 0, 0): case IP_VERSION(13, 0, 7): case IP_VERSION(13, 0, 10): return AMD_RESET_METHOD_MODE1; case IP_VERSION(13, 0, 4): case IP_VERSION(13, 0, 11): + case IP_VERSION(14, 0, 0): return AMD_RESET_METHOD_MODE2; default: if (amdgpu_dpm_is_baco_supported(adev)) @@ -440,13 +434,11 @@ static void soc21_program_aspm(struct amdgpu_device *adev) if (!amdgpu_device_should_use_aspm(adev)) return; - if (!(adev->flags & AMD_IS_APU) && - (adev->nbio.funcs->program_aspm)) + if (adev->nbio.funcs->program_aspm) adev->nbio.funcs->program_aspm(adev); } -const struct amdgpu_ip_block_version soc21_common_ip_block = -{ +const struct amdgpu_ip_block_version soc21_common_ip_block = { .type = AMD_IP_BLOCK_TYPE_COMMON, .major = 1, .minor = 0, @@ -456,7 +448,7 @@ const struct amdgpu_ip_block_version soc21_common_ip_block = static bool soc21_need_full_reset(struct amdgpu_device *adev) { - switch (adev->ip_versions[GC_HWIP][0]) { + switch (amdgpu_ip_version(adev, GC_HWIP, 0)) { case IP_VERSION(11, 0, 0): return amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__UMC); case IP_VERSION(11, 0, 2): @@ -484,16 +476,6 @@ static bool soc21_need_reset_on_init(struct amdgpu_device *adev) return false; } -static uint64_t soc21_get_pcie_replay_count(struct amdgpu_device *adev) -{ - - /* TODO - * dummy implement for pcie_replay_count sysfs interface - * */ - - return 0; -} - static void soc21_init_doorbell_index(struct amdgpu_device *adev) { adev->doorbell_index.kiq = AMDGPU_NAVI10_DOORBELL_KIQ; @@ -522,6 +504,7 @@ static void soc21_init_doorbell_index(struct amdgpu_device *adev) adev->doorbell_index.vcn.vcn_ring2_3 = AMDGPU_NAVI10_DOORBELL64_VCN2_3; adev->doorbell_index.vcn.vcn_ring4_5 = AMDGPU_NAVI10_DOORBELL64_VCN4_5; adev->doorbell_index.vcn.vcn_ring6_7 = AMDGPU_NAVI10_DOORBELL64_VCN6_7; + adev->doorbell_index.vpe_ring = AMDGPU_NAVI10_DOORBELL64_VPE; adev->doorbell_index.first_non_cp = AMDGPU_NAVI10_DOORBELL64_FIRST_NON_CP; adev->doorbell_index.last_non_cp = AMDGPU_NAVI10_DOORBELL64_LAST_NON_CP; @@ -547,8 +530,7 @@ static int soc21_update_umd_stable_pstate(struct amdgpu_device *adev, return 0; } -static const struct amdgpu_asic_funcs soc21_asic_funcs = -{ +static const struct amdgpu_asic_funcs soc21_asic_funcs = { .read_disabled_bios = &soc21_read_disabled_bios, .read_bios_from_rom = &amdgpu_soc15_read_bios_from_rom, .read_register = &soc21_read_register, @@ -561,7 +543,7 @@ static const struct amdgpu_asic_funcs soc21_asic_funcs = .init_doorbell_index = &soc21_init_doorbell_index, .need_full_reset = &soc21_need_full_reset, .need_reset_on_init = &soc21_need_reset_on_init, - .get_pcie_replay_count = &soc21_get_pcie_replay_count, + .get_pcie_replay_count = &amdgpu_nbio_get_pcie_replay_count, .supports_baco = &amdgpu_dpm_is_baco_supported, .pre_asic_init = &soc21_pre_asic_init, .query_video_codecs = &soc21_query_video_codecs, @@ -595,7 +577,7 @@ static int soc21_common_early_init(void *handle) adev->rev_id = amdgpu_device_get_rev_id(adev); adev->external_rev_id = 0xff; - switch (adev->ip_versions[GC_HWIP][0]) { + switch (amdgpu_ip_version(adev, GC_HWIP, 0)) { case IP_VERSION(11, 0, 0): adev->cg_flags = AMD_CG_SUPPORT_GFX_CGCG | AMD_CG_SUPPORT_GFX_CGLS | @@ -707,7 +689,33 @@ static int soc21_common_early_init(void *handle) AMD_PG_SUPPORT_JPEG; adev->external_rev_id = adev->rev_id + 0x80; break; - + case IP_VERSION(11, 5, 0): + adev->cg_flags = AMD_CG_SUPPORT_VCN_MGCG | + AMD_CG_SUPPORT_JPEG_MGCG | + AMD_CG_SUPPORT_GFX_CGCG | + AMD_CG_SUPPORT_GFX_CGLS | + AMD_CG_SUPPORT_GFX_MGCG | + AMD_CG_SUPPORT_GFX_FGCG | + AMD_CG_SUPPORT_REPEATER_FGCG | + AMD_CG_SUPPORT_GFX_PERF_CLK | + AMD_CG_SUPPORT_GFX_3D_CGCG | + AMD_CG_SUPPORT_GFX_3D_CGLS | + AMD_CG_SUPPORT_MC_MGCG | + AMD_CG_SUPPORT_MC_LS | + AMD_CG_SUPPORT_HDP_LS | + AMD_CG_SUPPORT_HDP_DS | + AMD_CG_SUPPORT_HDP_SD | + AMD_CG_SUPPORT_ATHUB_MGCG | + AMD_CG_SUPPORT_ATHUB_LS | + AMD_CG_SUPPORT_IH_CG | + AMD_CG_SUPPORT_BIF_MGCG | + AMD_CG_SUPPORT_BIF_LS; + adev->pg_flags = AMD_PG_SUPPORT_VCN_DPG | + AMD_PG_SUPPORT_VCN | + AMD_PG_SUPPORT_JPEG | + AMD_PG_SUPPORT_GFX_PG; + adev->external_rev_id = adev->rev_id + 0x1; + break; default: /* FIXME: not supported yet */ return -EINVAL; @@ -786,7 +794,7 @@ static int soc21_common_hw_init(void *handle) * for the purpose of expose those registers * to process space */ - if (adev->nbio.funcs->remap_hdp_registers) + if (adev->nbio.funcs->remap_hdp_registers && !amdgpu_sriov_vf(adev)) adev->nbio.funcs->remap_hdp_registers(adev); /* enable the doorbell aperture */ adev->nbio.funcs->enable_doorbell_aperture(adev, true); @@ -851,10 +859,12 @@ static int soc21_common_set_clockgating_state(void *handle, { struct amdgpu_device *adev = (struct amdgpu_device *)handle; - switch (adev->ip_versions[NBIO_HWIP][0]) { + switch (amdgpu_ip_version(adev, NBIO_HWIP, 0)) { case IP_VERSION(4, 3, 0): case IP_VERSION(4, 3, 1): case IP_VERSION(7, 7, 0): + case IP_VERSION(7, 7, 1): + case IP_VERSION(7, 11, 0): adev->nbio.funcs->update_medium_grain_clock_gating(adev, state == AMD_CG_STATE_GATE); adev->nbio.funcs->update_medium_grain_light_sleep(adev, @@ -873,7 +883,7 @@ static int soc21_common_set_powergating_state(void *handle, { struct amdgpu_device *adev = (struct amdgpu_device *)handle; - switch (adev->ip_versions[LSDMA_HWIP][0]) { + switch (amdgpu_ip_version(adev, LSDMA_HWIP, 0)) { case IP_VERSION(6, 0, 0): case IP_VERSION(6, 0, 2): adev->lsdma.funcs->update_memory_power_gating(adev, @@ -893,8 +903,6 @@ static void soc21_common_get_clockgating_state(void *handle, u64 *flags) adev->nbio.funcs->get_clockgating_state(adev, flags); adev->hdp.funcs->get_clock_gating_state(adev, flags); - - return; } static const struct amd_ip_funcs soc21_common_ip_funcs = { diff --git a/drivers/gpu/drm/amd/amdgpu/ta_xgmi_if.h b/drivers/gpu/drm/amd/amdgpu/ta_xgmi_if.h index da815a93d46e..d5748032674e 100644 --- a/drivers/gpu/drm/amd/amdgpu/ta_xgmi_if.h +++ b/drivers/gpu/drm/amd/amdgpu/ta_xgmi_if.h @@ -1,5 +1,5 @@ /* - * Copyright 2018 Advanced Micro Devices, Inc. + * Copyright 2018-2022 Advanced Micro Devices, Inc. * * Permission is hereby granted, free of charge, to any person obtaining a * copy of this software and associated documentation files (the "Software"), @@ -20,7 +20,6 @@ * OTHER DEALINGS IN THE SOFTWARE. * */ - #ifndef _TA_XGMI_IF_H #define _TA_XGMI_IF_H @@ -28,20 +27,31 @@ #define RSP_ID_MASK (1U << 31) #define RSP_ID(cmdId) (((uint32_t)(cmdId)) | RSP_ID_MASK) +#define EXTEND_PEER_LINK_INFO_CMD_FLAG 1 + enum ta_command_xgmi { + /* Initialize the Context and Session Topology */ TA_COMMAND_XGMI__INITIALIZE = 0x00, + /* Gets the current GPU's node ID */ TA_COMMAND_XGMI__GET_NODE_ID = 0x01, + /* Gets the current GPU's hive ID */ TA_COMMAND_XGMI__GET_HIVE_ID = 0x02, - TA_COMMAND_XGMI__GET_GET_TOPOLOGY_INFO = 0x03, + /* Gets the Peer's topology Information */ + TA_COMMAND_XGMI__GET_TOPOLOGY_INFO = 0x03, + /* Sets the Peer's topology Information */ TA_COMMAND_XGMI__SET_TOPOLOGY_INFO = 0x04, - TA_COMMAND_XGMI__GET_PEER_LINKS = 0x0B + /* Gets the total links between adjacent peer dies in hive */ + TA_COMMAND_XGMI__GET_PEER_LINKS = 0x0B, + /* Gets the total links and connected port numbers between adjacent peer dies in hive */ + TA_COMMAND_XGMI__GET_EXTEND_PEER_LINKS = 0x0C }; /* XGMI related enumerations */ /**********************************************************/; -enum ta_xgmi_connected_nodes { - TA_XGMI__MAX_CONNECTED_NODES = 64 -}; +enum { TA_XGMI__MAX_CONNECTED_NODES = 64 }; +enum { TA_XGMI__MAX_INTERNAL_STATE = 32 }; +enum { TA_XGMI__MAX_INTERNAL_STATE_BUFFER = 128 }; +enum { TA_XGMI__MAX_PORT_NUM = 8 }; enum ta_xgmi_status { TA_XGMI_STATUS__SUCCESS = 0x00, @@ -81,6 +91,18 @@ struct ta_xgmi_peer_link_info { uint8_t num_links; }; +struct xgmi_connected_port_num { + uint8_t dst_xgmi_port_num; + uint8_t src_xgmi_port_num; +}; + +/* support both the port num and num_links */ +struct ta_xgmi_extend_peer_link_info { + uint64_t node_id; + uint8_t num_links; + struct xgmi_connected_port_num port_num[TA_XGMI__MAX_PORT_NUM]; +}; + struct ta_xgmi_cmd_initialize_output { uint32_t status; }; @@ -103,16 +125,21 @@ struct ta_xgmi_cmd_get_topology_info_output { struct ta_xgmi_node_info nodes[TA_XGMI__MAX_CONNECTED_NODES]; }; -struct ta_xgmi_cmd_get_peer_link_info_output { +struct ta_xgmi_cmd_set_topology_info_input { uint32_t num_nodes; - struct ta_xgmi_peer_link_info nodes[TA_XGMI__MAX_CONNECTED_NODES]; + struct ta_xgmi_node_info nodes[TA_XGMI__MAX_CONNECTED_NODES]; }; -struct ta_xgmi_cmd_set_topology_info_input { +/* support XGMI TA w/ and w/o port_num both so two similar structs defined */ +struct ta_xgmi_cmd_get_peer_link_info { uint32_t num_nodes; - struct ta_xgmi_node_info nodes[TA_XGMI__MAX_CONNECTED_NODES]; + struct ta_xgmi_peer_link_info nodes[TA_XGMI__MAX_CONNECTED_NODES]; }; +struct ta_xgmi_cmd_get_extend_peer_link_info { + uint32_t num_nodes; + struct ta_xgmi_extend_peer_link_info nodes[TA_XGMI__MAX_CONNECTED_NODES]; +}; /**********************************************************/ /* Common input structure for XGMI callbacks */ union ta_xgmi_cmd_input { @@ -126,16 +153,23 @@ union ta_xgmi_cmd_output { struct ta_xgmi_cmd_get_node_id_output get_node_id; struct ta_xgmi_cmd_get_hive_id_output get_hive_id; struct ta_xgmi_cmd_get_topology_info_output get_topology_info; - struct ta_xgmi_cmd_get_peer_link_info_output get_link_info; + struct ta_xgmi_cmd_get_peer_link_info get_link_info; + struct ta_xgmi_cmd_get_extend_peer_link_info get_extend_link_info; }; -/**********************************************************/ struct ta_xgmi_shared_memory { uint32_t cmd_id; uint32_t resp_id; enum ta_xgmi_status xgmi_status; + + /* if the number of xgmi link record is more than 128, driver will set the + * flag 0 to get the first 128 of the link records and will set to 1, to get + * the second set + */ uint8_t flag_extend_link_record; - uint8_t reserved0[3]; + /* bit0: port_num info support flag for GET_EXTEND_PEER_LINKS commmand */ + uint8_t caps_flag; + uint8_t reserved[2]; union ta_xgmi_cmd_input xgmi_in_message; union ta_xgmi_cmd_output xgmi_out_message; }; diff --git a/drivers/gpu/drm/amd/amdgpu/tonga_ih.c b/drivers/gpu/drm/amd/amdgpu/tonga_ih.c index b08905d1c00f..917707bba7f3 100644 --- a/drivers/gpu/drm/amd/amdgpu/tonga_ih.c +++ b/drivers/gpu/drm/amd/amdgpu/tonga_ih.c @@ -493,8 +493,7 @@ static void tonga_ih_set_interrupt_funcs(struct amdgpu_device *adev) adev->irq.ih_funcs = &tonga_ih_funcs; } -const struct amdgpu_ip_block_version tonga_ih_ip_block = -{ +const struct amdgpu_ip_block_version tonga_ih_ip_block = { .type = AMD_IP_BLOCK_TYPE_IH, .major = 3, .minor = 0, diff --git a/drivers/gpu/drm/amd/amdgpu/umc_v12_0.c b/drivers/gpu/drm/amd/amdgpu/umc_v12_0.c new file mode 100644 index 000000000000..e9c2ff74f0bc --- /dev/null +++ b/drivers/gpu/drm/amd/amdgpu/umc_v12_0.c @@ -0,0 +1,389 @@ +/* + * Copyright 2023 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + */ +#include "umc_v12_0.h" +#include "amdgpu_ras.h" +#include "amdgpu_umc.h" +#include "amdgpu.h" +#include "umc/umc_12_0_0_offset.h" +#include "umc/umc_12_0_0_sh_mask.h" + +const uint32_t + umc_v12_0_channel_idx_tbl[] + [UMC_V12_0_UMC_INSTANCE_NUM] + [UMC_V12_0_CHANNEL_INSTANCE_NUM] = { + {{3, 7, 11, 15, 2, 6, 10, 14}, {1, 5, 9, 13, 0, 4, 8, 12}, + {19, 23, 27, 31, 18, 22, 26, 30}, {17, 21, 25, 29, 16, 20, 24, 28}}, + {{47, 43, 39, 35, 46, 42, 38, 34}, {45, 41, 37, 33, 44, 40, 36, 32}, + {63, 59, 55, 51, 62, 58, 54, 50}, {61, 57, 53, 49, 60, 56, 52, 48}}, + {{79, 75, 71, 67, 78, 74, 70, 66}, {77, 73, 69, 65, 76, 72, 68, 64}, + {95, 91, 87, 83, 94, 90, 86, 82}, {93, 89, 85, 81, 92, 88, 84, 80}}, + {{99, 103, 107, 111, 98, 102, 106, 110}, {97, 101, 105, 109, 96, 100, 104, 108}, + {115, 119, 123, 127, 114, 118, 122, 126}, {113, 117, 121, 125, 112, 116, 120, 124}} + }; + +/* mapping of MCA error address to normalized address */ +static const uint32_t umc_v12_0_ma2na_mapping[] = { + 0, 5, 6, 8, 9, 14, 12, 13, + 10, 11, 15, 16, 17, 18, 19, 20, + 21, 22, 23, 24, 25, 26, 27, 28, + 24, 7, 29, 30, +}; + +static inline uint64_t get_umc_v12_0_reg_offset(struct amdgpu_device *adev, + uint32_t node_inst, + uint32_t umc_inst, + uint32_t ch_inst) +{ + uint32_t index = umc_inst * adev->umc.channel_inst_num + ch_inst; + uint64_t cross_node_offset = (node_inst == 0) ? 0 : UMC_V12_0_CROSS_NODE_OFFSET; + + umc_inst = index / 4; + ch_inst = index % 4; + + return adev->umc.channel_offs * ch_inst + UMC_V12_0_INST_DIST * umc_inst + + UMC_V12_0_NODE_DIST * node_inst + cross_node_offset; +} + +static int umc_v12_0_reset_error_count_per_channel(struct amdgpu_device *adev, + uint32_t node_inst, uint32_t umc_inst, + uint32_t ch_inst, void *data) +{ + uint64_t odecc_err_cnt_addr; + uint64_t umc_reg_offset = + get_umc_v12_0_reg_offset(adev, node_inst, umc_inst, ch_inst); + + odecc_err_cnt_addr = + SOC15_REG_OFFSET(UMC, 0, regUMCCH0_OdEccErrCnt); + + /* clear error count */ + WREG32_PCIE_EXT((odecc_err_cnt_addr + umc_reg_offset) * 4, + UMC_V12_0_CE_CNT_INIT); + + return 0; +} + +static void umc_v12_0_reset_error_count(struct amdgpu_device *adev) +{ + amdgpu_umc_loop_channels(adev, + umc_v12_0_reset_error_count_per_channel, NULL); +} + +bool umc_v12_0_is_uncorrectable_error(uint64_t mc_umc_status) +{ + return ((REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, Val) == 1) && + (REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, PCC) == 1 || + REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, UC) == 1 || + REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, TCC) == 1)); +} + +bool umc_v12_0_is_correctable_error(uint64_t mc_umc_status) +{ + return (REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, Val) == 1 && + (REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, CECC) == 1 || + (REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, UECC) == 1 && + REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, UC) == 0) || + /* Identify data parity error in replay mode */ + ((REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, ErrorCodeExt) == 0x5 || + REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, ErrorCodeExt) == 0xb) && + !(umc_v12_0_is_uncorrectable_error(mc_umc_status))))); +} + +static void umc_v12_0_query_correctable_error_count(struct amdgpu_device *adev, + uint64_t umc_reg_offset, + unsigned long *error_count) +{ + uint64_t mc_umc_status; + uint64_t mc_umc_status_addr; + + mc_umc_status_addr = + SOC15_REG_OFFSET(UMC, 0, regMCA_UMC_UMC0_MCUMC_STATUST0); + + /* Rely on MCUMC_STATUS for correctable error counter + * MCUMC_STATUS is a 64 bit register + */ + mc_umc_status = + RREG64_PCIE_EXT((mc_umc_status_addr + umc_reg_offset) * 4); + + if (umc_v12_0_is_correctable_error(mc_umc_status)) + *error_count += 1; +} + +static void umc_v12_0_query_uncorrectable_error_count(struct amdgpu_device *adev, + uint64_t umc_reg_offset, + unsigned long *error_count) +{ + uint64_t mc_umc_status; + uint64_t mc_umc_status_addr; + + mc_umc_status_addr = + SOC15_REG_OFFSET(UMC, 0, regMCA_UMC_UMC0_MCUMC_STATUST0); + + /* Check the MCUMC_STATUS. */ + mc_umc_status = + RREG64_PCIE_EXT((mc_umc_status_addr + umc_reg_offset) * 4); + + if (umc_v12_0_is_uncorrectable_error(mc_umc_status)) + *error_count += 1; +} + +static int umc_v12_0_query_error_count(struct amdgpu_device *adev, + uint32_t node_inst, uint32_t umc_inst, + uint32_t ch_inst, void *data) +{ + struct ras_err_data *err_data = (struct ras_err_data *)data; + unsigned long ue_count = 0, ce_count = 0; + + /* NOTE: node_inst is converted by adev->umc.active_mask and the range is [0-3], + * which can be used as die ID directly */ + struct amdgpu_smuio_mcm_config_info mcm_info = { + .socket_id = adev->smuio.funcs->get_socket_id(adev), + .die_id = node_inst, + }; + + uint64_t umc_reg_offset = + get_umc_v12_0_reg_offset(adev, node_inst, umc_inst, ch_inst); + + umc_v12_0_query_correctable_error_count(adev, umc_reg_offset, &ce_count); + umc_v12_0_query_uncorrectable_error_count(adev, umc_reg_offset, &ue_count); + + amdgpu_ras_error_statistic_ue_count(err_data, &mcm_info, ue_count); + amdgpu_ras_error_statistic_ce_count(err_data, &mcm_info, ce_count); + + return 0; +} + +static void umc_v12_0_query_ras_error_count(struct amdgpu_device *adev, + void *ras_error_status) +{ + amdgpu_umc_loop_channels(adev, + umc_v12_0_query_error_count, ras_error_status); + + umc_v12_0_reset_error_count(adev); +} + +static bool umc_v12_0_bit_wise_xor(uint32_t val) +{ + bool result = 0; + int i; + + for (i = 0; i < 32; i++) + result = result ^ ((val >> i) & 0x1); + + return result; +} + +static void umc_v12_0_convert_error_address(struct amdgpu_device *adev, + struct ras_err_data *err_data, uint64_t err_addr, + uint32_t ch_inst, uint32_t umc_inst, + uint32_t node_inst) +{ + uint32_t channel_index, i; + uint64_t soc_pa, na, retired_page, column; + uint32_t bank_hash0, bank_hash1, bank_hash2, bank_hash3, col, row, row_xor; + uint32_t bank0, bank1, bank2, bank3, bank; + + bank_hash0 = (err_addr >> UMC_V12_0_MCA_B0_BIT) & 0x1ULL; + bank_hash1 = (err_addr >> UMC_V12_0_MCA_B1_BIT) & 0x1ULL; + bank_hash2 = (err_addr >> UMC_V12_0_MCA_B2_BIT) & 0x1ULL; + bank_hash3 = (err_addr >> UMC_V12_0_MCA_B3_BIT) & 0x1ULL; + col = (err_addr >> 1) & 0x1fULL; + row = (err_addr >> 10) & 0x3fffULL; + + /* apply bank hash algorithm */ + bank0 = + bank_hash0 ^ (UMC_V12_0_XOR_EN0 & + (umc_v12_0_bit_wise_xor(col & UMC_V12_0_COL_XOR0) ^ + (umc_v12_0_bit_wise_xor(row & UMC_V12_0_ROW_XOR0)))); + bank1 = + bank_hash1 ^ (UMC_V12_0_XOR_EN1 & + (umc_v12_0_bit_wise_xor(col & UMC_V12_0_COL_XOR1) ^ + (umc_v12_0_bit_wise_xor(row & UMC_V12_0_ROW_XOR1)))); + bank2 = + bank_hash2 ^ (UMC_V12_0_XOR_EN2 & + (umc_v12_0_bit_wise_xor(col & UMC_V12_0_COL_XOR2) ^ + (umc_v12_0_bit_wise_xor(row & UMC_V12_0_ROW_XOR2)))); + bank3 = + bank_hash3 ^ (UMC_V12_0_XOR_EN3 & + (umc_v12_0_bit_wise_xor(col & UMC_V12_0_COL_XOR3) ^ + (umc_v12_0_bit_wise_xor(row & UMC_V12_0_ROW_XOR3)))); + + bank = bank0 | (bank1 << 1) | (bank2 << 2) | (bank3 << 3); + err_addr &= ~0x3c0ULL; + err_addr |= (bank << UMC_V12_0_MCA_B0_BIT); + + na = 0x0; + /* convert mca error address to normalized address */ + for (i = 1; i < ARRAY_SIZE(umc_v12_0_ma2na_mapping); i++) + na |= ((err_addr >> i) & 0x1ULL) << umc_v12_0_ma2na_mapping[i]; + + channel_index = + adev->umc.channel_idx_tbl[node_inst * adev->umc.umc_inst_num * + adev->umc.channel_inst_num + + umc_inst * adev->umc.channel_inst_num + + ch_inst]; + /* translate umc channel address to soc pa, 3 parts are included */ + soc_pa = ADDR_OF_32KB_BLOCK(na) | + ADDR_OF_256B_BLOCK(channel_index) | + OFFSET_IN_256B_BLOCK(na); + + /* the umc channel bits are not original values, they are hashed */ + UMC_V12_0_SET_CHANNEL_HASH(channel_index, soc_pa); + + /* clear [C3 C2] in soc physical address */ + soc_pa &= ~(0x3ULL << UMC_V12_0_PA_C2_BIT); + /* clear [C4] in soc physical address */ + soc_pa &= ~(0x1ULL << UMC_V12_0_PA_C4_BIT); + + row_xor = row ^ (0x1ULL << 13); + /* loop for all possibilities of [C4 C3 C2] */ + for (column = 0; column < UMC_V12_0_NA_MAP_PA_NUM; column++) { + retired_page = soc_pa | ((column & 0x3) << UMC_V12_0_PA_C2_BIT); + retired_page |= (((column & 0x4) >> 2) << UMC_V12_0_PA_C4_BIT); + /* include column bit 0 and 1 */ + col &= 0x3; + col |= (column << 2); + dev_info(adev->dev, + "Error Address(PA):0x%-10llx Row:0x%-4x Col:0x%-2x Bank:0x%x Channel:0x%x\n", + retired_page, row, col, bank, channel_index); + amdgpu_umc_fill_error_record(err_data, err_addr, + retired_page, channel_index, umc_inst); + + /* shift R13 bit */ + retired_page ^= (0x1ULL << UMC_V12_0_PA_R13_BIT); + dev_info(adev->dev, + "Error Address(PA):0x%-10llx Row:0x%-4x Col:0x%-2x Bank:0x%x Channel:0x%x\n", + retired_page, row_xor, col, bank, channel_index); + amdgpu_umc_fill_error_record(err_data, err_addr, + retired_page, channel_index, umc_inst); + } +} + +static int umc_v12_0_query_error_address(struct amdgpu_device *adev, + uint32_t node_inst, uint32_t umc_inst, + uint32_t ch_inst, void *data) +{ + uint64_t mc_umc_status_addr; + uint64_t mc_umc_status, err_addr; + uint64_t mc_umc_addrt0; + struct ras_err_data *err_data = (struct ras_err_data *)data; + uint64_t umc_reg_offset = + get_umc_v12_0_reg_offset(adev, node_inst, umc_inst, ch_inst); + + mc_umc_status_addr = + SOC15_REG_OFFSET(UMC, 0, regMCA_UMC_UMC0_MCUMC_STATUST0); + + mc_umc_status = RREG64_PCIE_EXT((mc_umc_status_addr + umc_reg_offset) * 4); + + if (mc_umc_status == 0) + return 0; + + if (!err_data->err_addr) { + /* clear umc status */ + WREG64_PCIE_EXT((mc_umc_status_addr + umc_reg_offset) * 4, 0x0ULL); + + return 0; + } + + /* calculate error address if ue error is detected */ + if (REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, Val) == 1 && + REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, AddrV) == 1 && + REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, UC) == 1) { + + mc_umc_addrt0 = + SOC15_REG_OFFSET(UMC, 0, regMCA_UMC_UMC0_MCUMC_ADDRT0); + + err_addr = RREG64_PCIE_EXT((mc_umc_addrt0 + umc_reg_offset) * 4); + + err_addr = REG_GET_FIELD(err_addr, MCA_UMC_UMC0_MCUMC_ADDRT0, ErrorAddr); + + umc_v12_0_convert_error_address(adev, err_data, err_addr, + ch_inst, umc_inst, node_inst); + } + + /* clear umc status */ + WREG64_PCIE_EXT((mc_umc_status_addr + umc_reg_offset) * 4, 0x0ULL); + + return 0; +} + +static void umc_v12_0_query_ras_error_address(struct amdgpu_device *adev, + void *ras_error_status) +{ + amdgpu_umc_loop_channels(adev, + umc_v12_0_query_error_address, ras_error_status); +} + +static int umc_v12_0_err_cnt_init_per_channel(struct amdgpu_device *adev, + uint32_t node_inst, uint32_t umc_inst, + uint32_t ch_inst, void *data) +{ + uint32_t odecc_cnt_sel; + uint64_t odecc_cnt_sel_addr, odecc_err_cnt_addr; + uint64_t umc_reg_offset = + get_umc_v12_0_reg_offset(adev, node_inst, umc_inst, ch_inst); + + odecc_cnt_sel_addr = + SOC15_REG_OFFSET(UMC, 0, regUMCCH0_OdEccCntSel); + odecc_err_cnt_addr = + SOC15_REG_OFFSET(UMC, 0, regUMCCH0_OdEccErrCnt); + + odecc_cnt_sel = RREG32_PCIE_EXT((odecc_cnt_sel_addr + umc_reg_offset) * 4); + + /* set ce error interrupt type to APIC based interrupt */ + odecc_cnt_sel = REG_SET_FIELD(odecc_cnt_sel, UMCCH0_OdEccCntSel, + OdEccErrInt, 0x1); + WREG32_PCIE_EXT((odecc_cnt_sel_addr + umc_reg_offset) * 4, odecc_cnt_sel); + + /* set error count to initial value */ + WREG32_PCIE_EXT((odecc_err_cnt_addr + umc_reg_offset) * 4, UMC_V12_0_CE_CNT_INIT); + + return 0; +} + +static void umc_v12_0_err_cnt_init(struct amdgpu_device *adev) +{ + amdgpu_umc_loop_channels(adev, + umc_v12_0_err_cnt_init_per_channel, NULL); +} + +static bool umc_v12_0_query_ras_poison_mode(struct amdgpu_device *adev) +{ + /* + * Force return true, because regUMCCH0_EccCtrl + * is not accessible from host side + */ + return true; +} + +const struct amdgpu_ras_block_hw_ops umc_v12_0_ras_hw_ops = { + .query_ras_error_count = umc_v12_0_query_ras_error_count, + .query_ras_error_address = umc_v12_0_query_ras_error_address, +}; + +struct amdgpu_umc_ras umc_v12_0_ras = { + .ras_block = { + .hw_ops = &umc_v12_0_ras_hw_ops, + }, + .err_cnt_init = umc_v12_0_err_cnt_init, + .query_ras_poison_mode = umc_v12_0_query_ras_poison_mode, +}; diff --git a/drivers/gpu/drm/amd/amdgpu/umc_v12_0.h b/drivers/gpu/drm/amd/amdgpu/umc_v12_0.h new file mode 100644 index 000000000000..b34b1e358f8b --- /dev/null +++ b/drivers/gpu/drm/amd/amdgpu/umc_v12_0.h @@ -0,0 +1,130 @@ +/* + * Copyright 2023 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + */ +#ifndef __UMC_V12_0_H__ +#define __UMC_V12_0_H__ + +#include "soc15_common.h" +#include "amdgpu.h" + +#define UMC_V12_0_NODE_DIST 0x40000000 +#define UMC_V12_0_INST_DIST 0x40000 + +/* UMC register per channel offset */ +#define UMC_V12_0_PER_CHANNEL_OFFSET 0x400 + +/* UMC cross node offset */ +#define UMC_V12_0_CROSS_NODE_OFFSET 0x100000000 + +/* OdEccErrCnt max value */ +#define UMC_V12_0_CE_CNT_MAX 0xffff +/* umc ce interrupt threshold */ +#define UMC_V12_0_CE_INT_THRESHOLD 0xffff +/* umc ce count initial value */ +#define UMC_V12_0_CE_CNT_INIT (UMC_V12_0_CE_CNT_MAX - UMC_V12_0_CE_INT_THRESHOLD) + +/* number of umc channel instance with memory map register access */ +#define UMC_V12_0_CHANNEL_INSTANCE_NUM 8 +/* number of umc instance with memory map register access */ +#define UMC_V12_0_UMC_INSTANCE_NUM 4 + +/* Total channel instances for all available umc nodes */ +#define UMC_V12_0_TOTAL_CHANNEL_NUM(adev) \ + (UMC_V12_0_CHANNEL_INSTANCE_NUM * (adev)->gmc.num_umc) + +/* one piece of normalized address is mapped to 8 pieces of physical address */ +#define UMC_V12_0_NA_MAP_PA_NUM 8 +/* R13 bit shift should be considered, double the number */ +#define UMC_V12_0_BAD_PAGE_NUM_PER_CHANNEL (UMC_V12_0_NA_MAP_PA_NUM * 2) +/* bank bits in MCA error address */ +#define UMC_V12_0_MCA_B0_BIT 6 +#define UMC_V12_0_MCA_B1_BIT 7 +#define UMC_V12_0_MCA_B2_BIT 8 +#define UMC_V12_0_MCA_B3_BIT 9 +/* column bits in SOC physical address */ +#define UMC_V12_0_PA_C2_BIT 15 +#define UMC_V12_0_PA_C4_BIT 21 +/* row bits in SOC physical address */ +#define UMC_V12_0_PA_R13_BIT 35 +/* channel index bits in SOC physical address */ +#define UMC_V12_0_PA_CH4_BIT 12 +#define UMC_V12_0_PA_CH5_BIT 13 +#define UMC_V12_0_PA_CH6_BIT 14 + +/* bank hash settings */ +#define UMC_V12_0_XOR_EN0 1 +#define UMC_V12_0_XOR_EN1 1 +#define UMC_V12_0_XOR_EN2 1 +#define UMC_V12_0_XOR_EN3 1 +#define UMC_V12_0_COL_XOR0 0x0 +#define UMC_V12_0_COL_XOR1 0x0 +#define UMC_V12_0_COL_XOR2 0x800 +#define UMC_V12_0_COL_XOR3 0x1000 +#define UMC_V12_0_ROW_XOR0 0x11111 +#define UMC_V12_0_ROW_XOR1 0x22222 +#define UMC_V12_0_ROW_XOR2 0x4444 +#define UMC_V12_0_ROW_XOR3 0x8888 + +/* channel hash settings */ +#define UMC_V12_0_HASH_4K 0 +#define UMC_V12_0_HASH_64K 1 +#define UMC_V12_0_HASH_2M 1 +#define UMC_V12_0_HASH_1G 1 +#define UMC_V12_0_HASH_1T 1 + +/* XOR some bits of PA into CH4~CH6 bits (bits 12~14 of PA), + * hash bit is only effective when related setting is enabled + */ +#define UMC_V12_0_CHANNEL_HASH_CH4(channel_idx, pa) ((((channel_idx) >> 5) & 0x1) ^ \ + (((pa) >> 20) & 0x1ULL & UMC_V12_0_HASH_64K) ^ \ + (((pa) >> 27) & 0x1ULL & UMC_V12_0_HASH_2M) ^ \ + (((pa) >> 34) & 0x1ULL & UMC_V12_0_HASH_1G) ^ \ + (((pa) >> 41) & 0x1ULL & UMC_V12_0_HASH_1T)) +#define UMC_V12_0_CHANNEL_HASH_CH5(channel_idx, pa) ((((channel_idx) >> 6) & 0x1) ^ \ + (((pa) >> 21) & 0x1ULL & UMC_V12_0_HASH_64K) ^ \ + (((pa) >> 28) & 0x1ULL & UMC_V12_0_HASH_2M) ^ \ + (((pa) >> 35) & 0x1ULL & UMC_V12_0_HASH_1G) ^ \ + (((pa) >> 42) & 0x1ULL & UMC_V12_0_HASH_1T)) +#define UMC_V12_0_CHANNEL_HASH_CH6(channel_idx, pa) ((((channel_idx) >> 4) & 0x1) ^ \ + (((pa) >> 19) & 0x1ULL & UMC_V12_0_HASH_64K) ^ \ + (((pa) >> 26) & 0x1ULL & UMC_V12_0_HASH_2M) ^ \ + (((pa) >> 33) & 0x1ULL & UMC_V12_0_HASH_1G) ^ \ + (((pa) >> 40) & 0x1ULL & UMC_V12_0_HASH_1T) ^ \ + (((pa) >> 47) & 0x1ULL & UMC_V12_0_HASH_4K)) +#define UMC_V12_0_SET_CHANNEL_HASH(channel_idx, pa) do { \ + (pa) &= ~(0x7ULL << UMC_V12_0_PA_CH4_BIT); \ + (pa) |= (UMC_V12_0_CHANNEL_HASH_CH4(channel_idx, pa) << UMC_V12_0_PA_CH4_BIT); \ + (pa) |= (UMC_V12_0_CHANNEL_HASH_CH5(channel_idx, pa) << UMC_V12_0_PA_CH5_BIT); \ + (pa) |= (UMC_V12_0_CHANNEL_HASH_CH6(channel_idx, pa) << UMC_V12_0_PA_CH6_BIT); \ + } while (0) + +bool umc_v12_0_is_uncorrectable_error(uint64_t mc_umc_status); +bool umc_v12_0_is_correctable_error(uint64_t mc_umc_status); + +extern const uint32_t + umc_v12_0_channel_idx_tbl[] + [UMC_V12_0_UMC_INSTANCE_NUM] + [UMC_V12_0_CHANNEL_INSTANCE_NUM]; + +extern struct amdgpu_umc_ras umc_v12_0_ras; + +#endif diff --git a/drivers/gpu/drm/amd/amdgpu/umc_v8_10.c b/drivers/gpu/drm/amd/amdgpu/umc_v8_10.c index 46bfdee79bfd..c4c77257710c 100644 --- a/drivers/gpu/drm/amd/amdgpu/umc_v8_10.c +++ b/drivers/gpu/drm/amd/amdgpu/umc_v8_10.c @@ -336,7 +336,7 @@ static void umc_v8_10_ecc_info_query_correctable_error_count(struct amdgpu_devic uint32_t node_inst, uint32_t umc_inst, uint32_t ch_inst, unsigned long *error_count) { - uint64_t mc_umc_status; + uint16_t ecc_ce_cnt; uint32_t eccinfo_table_idx; struct amdgpu_ras *ras = amdgpu_ras_get_context(adev); @@ -345,12 +345,10 @@ static void umc_v8_10_ecc_info_query_correctable_error_count(struct amdgpu_devic umc_inst * adev->umc.channel_inst_num + ch_inst; - /* check the MCUMC_STATUS */ - mc_umc_status = ras->umc_ecc.ecc[eccinfo_table_idx].mca_umc_status; - if (REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, Val) == 1 && - REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, CECC) == 1) { - *error_count += 1; - } + /* Retrieve CE count */ + ecc_ce_cnt = ras->umc_ecc.ecc[eccinfo_table_idx].ce_count_lo_chip; + if (ecc_ce_cnt) + *error_count += ecc_ce_cnt; } static void umc_v8_10_ecc_info_query_uncorrectable_error_count(struct amdgpu_device *adev, diff --git a/drivers/gpu/drm/amd/amdgpu/umsch_mm_v4_0.c b/drivers/gpu/drm/amd/amdgpu/umsch_mm_v4_0.c new file mode 100644 index 000000000000..8e7b763cfdb7 --- /dev/null +++ b/drivers/gpu/drm/amd/amdgpu/umsch_mm_v4_0.c @@ -0,0 +1,424 @@ +// SPDX-License-Identifier: MIT +/* + * Copyright 2023 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + */ + +#include <linux/firmware.h> +#include <linux/module.h> +#include "amdgpu.h" +#include "soc15_common.h" +#include "soc21.h" +#include "vcn/vcn_4_0_0_offset.h" +#include "vcn/vcn_4_0_0_sh_mask.h" + +#include "amdgpu_umsch_mm.h" +#include "umsch_mm_4_0_api_def.h" +#include "umsch_mm_v4_0.h" + +#define regUVD_IPX_DLDO_CONFIG 0x0064 +#define regUVD_IPX_DLDO_CONFIG_BASE_IDX 1 +#define regUVD_IPX_DLDO_STATUS 0x0065 +#define regUVD_IPX_DLDO_STATUS_BASE_IDX 1 + +#define UVD_IPX_DLDO_CONFIG__ONO0_PWR_CONFIG__SHIFT 0x00000002 +#define UVD_IPX_DLDO_CONFIG__ONO0_PWR_CONFIG_MASK 0x0000000cUL +#define UVD_IPX_DLDO_STATUS__ONO0_PWR_STATUS__SHIFT 0x00000001 +#define UVD_IPX_DLDO_STATUS__ONO0_PWR_STATUS_MASK 0x00000002UL + +static int umsch_mm_v4_0_load_microcode(struct amdgpu_umsch_mm *umsch) +{ + struct amdgpu_device *adev = umsch->ring.adev; + uint64_t data; + int r; + + r = amdgpu_umsch_mm_allocate_ucode_buffer(umsch); + if (r) + return r; + + r = amdgpu_umsch_mm_allocate_ucode_data_buffer(umsch); + if (r) + goto err_free_ucode_bo; + + umsch->cmd_buf_curr_ptr = umsch->cmd_buf_ptr; + + if (amdgpu_ip_version(adev, VCN_HWIP, 0) == IP_VERSION(4, 0, 5)) { + WREG32_SOC15(VCN, 0, regUVD_IPX_DLDO_CONFIG, + 1 << UVD_IPX_DLDO_CONFIG__ONO0_PWR_CONFIG__SHIFT); + SOC15_WAIT_ON_RREG(VCN, 0, regUVD_IPX_DLDO_STATUS, + 0 << UVD_IPX_DLDO_STATUS__ONO0_PWR_STATUS__SHIFT, + UVD_IPX_DLDO_STATUS__ONO0_PWR_STATUS_MASK); + } + + data = RREG32_SOC15(VCN, 0, regUMSCH_MES_RESET_CTRL); + data = REG_SET_FIELD(data, UMSCH_MES_RESET_CTRL, MES_CORE_SOFT_RESET, 0); + WREG32_SOC15_UMSCH(regUMSCH_MES_RESET_CTRL, data); + + data = RREG32_SOC15(VCN, 0, regVCN_MES_CNTL); + data = REG_SET_FIELD(data, VCN_MES_CNTL, MES_INVALIDATE_ICACHE, 1); + data = REG_SET_FIELD(data, VCN_MES_CNTL, MES_PIPE0_RESET, 1); + data = REG_SET_FIELD(data, VCN_MES_CNTL, MES_PIPE0_ACTIVE, 0); + data = REG_SET_FIELD(data, VCN_MES_CNTL, MES_HALT, 1); + WREG32_SOC15_UMSCH(regVCN_MES_CNTL, data); + + data = RREG32_SOC15(VCN, 0, regVCN_MES_IC_BASE_CNTL); + data = REG_SET_FIELD(data, VCN_MES_IC_BASE_CNTL, VMID, 0); + data = REG_SET_FIELD(data, VCN_MES_IC_BASE_CNTL, EXE_DISABLE, 0); + data = REG_SET_FIELD(data, VCN_MES_IC_BASE_CNTL, CACHE_POLICY, 0); + WREG32_SOC15_UMSCH(regVCN_MES_IC_BASE_CNTL, data); + + WREG32_SOC15_UMSCH(regVCN_MES_INTR_ROUTINE_START, + lower_32_bits(adev->umsch_mm.irq_start_addr >> 2)); + WREG32_SOC15_UMSCH(regVCN_MES_INTR_ROUTINE_START_HI, + upper_32_bits(adev->umsch_mm.irq_start_addr >> 2)); + + WREG32_SOC15_UMSCH(regVCN_MES_PRGRM_CNTR_START, + lower_32_bits(adev->umsch_mm.uc_start_addr >> 2)); + WREG32_SOC15_UMSCH(regVCN_MES_PRGRM_CNTR_START_HI, + upper_32_bits(adev->umsch_mm.uc_start_addr >> 2)); + + WREG32_SOC15_UMSCH(regVCN_MES_LOCAL_INSTR_BASE_LO, 0); + WREG32_SOC15_UMSCH(regVCN_MES_LOCAL_INSTR_BASE_HI, 0); + + data = adev->umsch_mm.uc_start_addr + adev->umsch_mm.ucode_size - 1; + WREG32_SOC15_UMSCH(regVCN_MES_LOCAL_INSTR_MASK_LO, lower_32_bits(data)); + WREG32_SOC15_UMSCH(regVCN_MES_LOCAL_INSTR_MASK_HI, upper_32_bits(data)); + + data = adev->firmware.load_type == AMDGPU_FW_LOAD_PSP ? + 0 : adev->umsch_mm.ucode_fw_gpu_addr; + WREG32_SOC15_UMSCH(regVCN_MES_IC_BASE_LO, lower_32_bits(data)); + WREG32_SOC15_UMSCH(regVCN_MES_IC_BASE_HI, upper_32_bits(data)); + + WREG32_SOC15_UMSCH(regVCN_MES_MIBOUND_LO, 0x1FFFFF); + + WREG32_SOC15_UMSCH(regVCN_MES_LOCAL_BASE0_LO, + lower_32_bits(adev->umsch_mm.data_start_addr)); + WREG32_SOC15_UMSCH(regVCN_MES_LOCAL_BASE0_HI, + upper_32_bits(adev->umsch_mm.data_start_addr)); + + WREG32_SOC15_UMSCH(regVCN_MES_LOCAL_MASK0_LO, + lower_32_bits(adev->umsch_mm.data_size - 1)); + WREG32_SOC15_UMSCH(regVCN_MES_LOCAL_MASK0_HI, + upper_32_bits(adev->umsch_mm.data_size - 1)); + + data = adev->firmware.load_type == AMDGPU_FW_LOAD_PSP ? + 0 : adev->umsch_mm.data_fw_gpu_addr; + WREG32_SOC15_UMSCH(regVCN_MES_DC_BASE_LO, lower_32_bits(data)); + WREG32_SOC15_UMSCH(regVCN_MES_DC_BASE_HI, upper_32_bits(data)); + + WREG32_SOC15_UMSCH(regVCN_MES_MDBOUND_LO, 0x3FFFF); + + data = RREG32_SOC15(VCN, 0, regUVD_UMSCH_FORCE); + data = REG_SET_FIELD(data, UVD_UMSCH_FORCE, IC_FORCE_GPUVM, 1); + data = REG_SET_FIELD(data, UVD_UMSCH_FORCE, DC_FORCE_GPUVM, 1); + WREG32_SOC15_UMSCH(regUVD_UMSCH_FORCE, data); + + data = RREG32_SOC15(VCN, 0, regVCN_MES_IC_OP_CNTL); + data = REG_SET_FIELD(data, VCN_MES_IC_OP_CNTL, PRIME_ICACHE, 0); + data = REG_SET_FIELD(data, VCN_MES_IC_OP_CNTL, INVALIDATE_CACHE, 1); + WREG32_SOC15_UMSCH(regVCN_MES_IC_OP_CNTL, data); + + data = RREG32_SOC15(VCN, 0, regVCN_MES_IC_OP_CNTL); + data = REG_SET_FIELD(data, VCN_MES_IC_OP_CNTL, PRIME_ICACHE, 1); + WREG32_SOC15_UMSCH(regVCN_MES_IC_OP_CNTL, data); + + WREG32_SOC15_UMSCH(regVCN_MES_GP0_LO, 0); + WREG32_SOC15_UMSCH(regVCN_MES_GP0_HI, 0); + + WREG32_SOC15_UMSCH(regVCN_MES_GP1_LO, 0); + WREG32_SOC15_UMSCH(regVCN_MES_GP1_HI, 0); + + data = RREG32_SOC15(VCN, 0, regVCN_MES_CNTL); + data = REG_SET_FIELD(data, VCN_MES_CNTL, MES_INVALIDATE_ICACHE, 0); + data = REG_SET_FIELD(data, VCN_MES_CNTL, MES_PIPE0_RESET, 0); + data = REG_SET_FIELD(data, VCN_MES_CNTL, MES_HALT, 0); + data = REG_SET_FIELD(data, VCN_MES_CNTL, MES_PIPE0_ACTIVE, 1); + WREG32_SOC15_UMSCH(regVCN_MES_CNTL, data); + + if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) + amdgpu_umsch_mm_psp_execute_cmd_buf(umsch); + + r = SOC15_WAIT_ON_RREG(VCN, 0, regVCN_MES_MSTATUS_LO, 0xAAAAAAAA, 0xFFFFFFFF); + if (r) { + dev_err(adev->dev, "UMSCH FW Load: Failed, regVCN_MES_MSTATUS_LO: 0x%08x\n", + RREG32_SOC15(VCN, 0, regVCN_MES_MSTATUS_LO)); + goto err_free_data_bo; + } + + return 0; + +err_free_data_bo: + amdgpu_bo_free_kernel(&adev->umsch_mm.data_fw_obj, + &adev->umsch_mm.data_fw_gpu_addr, + (void **)&adev->umsch_mm.data_fw_ptr); +err_free_ucode_bo: + amdgpu_bo_free_kernel(&adev->umsch_mm.ucode_fw_obj, + &adev->umsch_mm.ucode_fw_gpu_addr, + (void **)&adev->umsch_mm.ucode_fw_ptr); + return r; +} + +static void umsch_mm_v4_0_aggregated_doorbell_init(struct amdgpu_umsch_mm *umsch) +{ + struct amdgpu_device *adev = umsch->ring.adev; + uint32_t data; + + data = RREG32_SOC15(VCN, 0, regVCN_AGDB_CTRL0); + data = REG_SET_FIELD(data, VCN_AGDB_CTRL0, OFFSET, + umsch->agdb_index[CONTEXT_PRIORITY_LEVEL_REALTIME]); + data = REG_SET_FIELD(data, VCN_AGDB_CTRL0, EN, 1); + WREG32_SOC15(VCN, 0, regVCN_AGDB_CTRL0, data); + + data = RREG32_SOC15(VCN, 0, regVCN_AGDB_CTRL1); + data = REG_SET_FIELD(data, VCN_AGDB_CTRL1, OFFSET, + umsch->agdb_index[CONTEXT_PRIORITY_LEVEL_FOCUS]); + data = REG_SET_FIELD(data, VCN_AGDB_CTRL1, EN, 1); + WREG32_SOC15(VCN, 0, regVCN_AGDB_CTRL1, data); + + data = RREG32_SOC15(VCN, 0, regVCN_AGDB_CTRL2); + data = REG_SET_FIELD(data, VCN_AGDB_CTRL2, OFFSET, + umsch->agdb_index[CONTEXT_PRIORITY_LEVEL_NORMAL]); + data = REG_SET_FIELD(data, VCN_AGDB_CTRL2, EN, 1); + WREG32_SOC15(VCN, 0, regVCN_AGDB_CTRL2, data); + + data = RREG32_SOC15(VCN, 0, regVCN_AGDB_CTRL3); + data = REG_SET_FIELD(data, VCN_AGDB_CTRL3, OFFSET, + umsch->agdb_index[CONTEXT_PRIORITY_LEVEL_IDLE]); + data = REG_SET_FIELD(data, VCN_AGDB_CTRL3, EN, 1); + WREG32_SOC15(VCN, 0, regVCN_AGDB_CTRL3, data); +} + +static int umsch_mm_v4_0_ring_start(struct amdgpu_umsch_mm *umsch) +{ + struct amdgpu_ring *ring = &umsch->ring; + struct amdgpu_device *adev = ring->adev; + uint32_t data; + + data = RREG32_SOC15(VCN, 0, regVCN_UMSCH_RB_DB_CTRL); + data = REG_SET_FIELD(data, VCN_UMSCH_RB_DB_CTRL, OFFSET, ring->doorbell_index); + data = REG_SET_FIELD(data, VCN_UMSCH_RB_DB_CTRL, EN, 1); + WREG32_SOC15(VCN, 0, regVCN_UMSCH_RB_DB_CTRL, data); + + adev->nbio.funcs->vcn_doorbell_range(adev, ring->use_doorbell, + (adev->doorbell_index.vcn.vcn_ring0_1 << 1), 0); + + WREG32_SOC15(VCN, 0, regVCN_UMSCH_RB_BASE_LO, lower_32_bits(ring->gpu_addr)); + WREG32_SOC15(VCN, 0, regVCN_UMSCH_RB_BASE_HI, upper_32_bits(ring->gpu_addr)); + + WREG32_SOC15(VCN, 0, regVCN_UMSCH_RB_SIZE, ring->ring_size); + + data = RREG32_SOC15(VCN, 0, regVCN_RB_ENABLE); + data &= ~(VCN_RB_ENABLE__AUDIO_RB_EN_MASK); + WREG32_SOC15(VCN, 0, regVCN_RB_ENABLE, data); + + umsch_mm_v4_0_aggregated_doorbell_init(umsch); + + return 0; +} + +static int umsch_mm_v4_0_ring_stop(struct amdgpu_umsch_mm *umsch) +{ + struct amdgpu_ring *ring = &umsch->ring; + struct amdgpu_device *adev = ring->adev; + uint32_t data; + + data = RREG32_SOC15(VCN, 0, regVCN_RB_ENABLE); + data = REG_SET_FIELD(data, VCN_RB_ENABLE, UMSCH_RB_EN, 0); + WREG32_SOC15(VCN, 0, regVCN_RB_ENABLE, data); + + data = RREG32_SOC15(VCN, 0, regVCN_UMSCH_RB_DB_CTRL); + data = REG_SET_FIELD(data, VCN_UMSCH_RB_DB_CTRL, EN, 0); + WREG32_SOC15(VCN, 0, regVCN_UMSCH_RB_DB_CTRL, data); + + if (amdgpu_ip_version(adev, VCN_HWIP, 0) == IP_VERSION(4, 0, 5)) { + WREG32_SOC15(VCN, 0, regUVD_IPX_DLDO_CONFIG, + 2 << UVD_IPX_DLDO_CONFIG__ONO0_PWR_CONFIG__SHIFT); + SOC15_WAIT_ON_RREG(VCN, 0, regUVD_IPX_DLDO_STATUS, + 1 << UVD_IPX_DLDO_STATUS__ONO0_PWR_STATUS__SHIFT, + UVD_IPX_DLDO_STATUS__ONO0_PWR_STATUS_MASK); + } + + return 0; +} + +static int umsch_mm_v4_0_set_hw_resources(struct amdgpu_umsch_mm *umsch) +{ + union UMSCHAPI__SET_HW_RESOURCES set_hw_resources = {}; + struct amdgpu_device *adev = umsch->ring.adev; + int r; + + set_hw_resources.header.type = UMSCH_API_TYPE_SCHEDULER; + set_hw_resources.header.opcode = UMSCH_API_SET_HW_RSRC; + set_hw_resources.header.dwsize = API_FRAME_SIZE_IN_DWORDS; + + set_hw_resources.vmid_mask_mm_vcn = umsch->vmid_mask_mm_vcn; + set_hw_resources.vmid_mask_mm_vpe = umsch->vmid_mask_mm_vpe; + set_hw_resources.engine_mask = umsch->engine_mask; + + set_hw_resources.vcn0_hqd_mask[0] = umsch->vcn0_hqd_mask; + set_hw_resources.vcn1_hqd_mask[0] = umsch->vcn1_hqd_mask; + set_hw_resources.vcn_hqd_mask[0] = umsch->vcn_hqd_mask[0]; + set_hw_resources.vcn_hqd_mask[1] = umsch->vcn_hqd_mask[1]; + set_hw_resources.vpe_hqd_mask[0] = umsch->vpe_hqd_mask; + + set_hw_resources.g_sch_ctx_gpu_mc_ptr = umsch->sch_ctx_gpu_addr; + + set_hw_resources.enable_level_process_quantum_check = 1; + + memcpy(set_hw_resources.mmhub_base, adev->reg_offset[MMHUB_HWIP][0], + sizeof(uint32_t) * 5); + set_hw_resources.mmhub_version = + IP_VERSION_MAJ_MIN_REV(amdgpu_ip_version(adev, MMHUB_HWIP, 0)); + + memcpy(set_hw_resources.osssys_base, adev->reg_offset[OSSSYS_HWIP][0], + sizeof(uint32_t) * 5); + set_hw_resources.osssys_version = + IP_VERSION_MAJ_MIN_REV(amdgpu_ip_version(adev, OSSSYS_HWIP, 0)); + + set_hw_resources.vcn_version = + IP_VERSION_MAJ_MIN_REV(amdgpu_ip_version(adev, VCN_HWIP, 0)); + set_hw_resources.vpe_version = + IP_VERSION_MAJ_MIN_REV(amdgpu_ip_version(adev, VPE_HWIP, 0)); + + set_hw_resources.api_status.api_completion_fence_addr = umsch->ring.fence_drv.gpu_addr; + set_hw_resources.api_status.api_completion_fence_value = ++umsch->ring.fence_drv.sync_seq; + + r = amdgpu_umsch_mm_submit_pkt(umsch, &set_hw_resources.max_dwords_in_api, + API_FRAME_SIZE_IN_DWORDS); + if (r) + return r; + + r = amdgpu_umsch_mm_query_fence(umsch); + if (r) { + dev_err(adev->dev, "UMSCH SET_HW_RESOURCES: Failed\n"); + return r; + } + + return 0; +} + +static int umsch_mm_v4_0_add_queue(struct amdgpu_umsch_mm *umsch, + struct umsch_mm_add_queue_input *input_ptr) +{ + struct amdgpu_device *adev = umsch->ring.adev; + union UMSCHAPI__ADD_QUEUE add_queue = {}; + int r; + + add_queue.header.type = UMSCH_API_TYPE_SCHEDULER; + add_queue.header.opcode = UMSCH_API_ADD_QUEUE; + add_queue.header.dwsize = API_FRAME_SIZE_IN_DWORDS; + + add_queue.process_id = input_ptr->process_id; + add_queue.page_table_base_addr = input_ptr->page_table_base_addr; + add_queue.process_va_start = input_ptr->process_va_start; + add_queue.process_va_end = input_ptr->process_va_end; + add_queue.process_quantum = input_ptr->process_quantum; + add_queue.process_csa_addr = input_ptr->process_csa_addr; + add_queue.context_quantum = input_ptr->context_quantum; + add_queue.context_csa_addr = input_ptr->context_csa_addr; + add_queue.inprocess_context_priority = input_ptr->inprocess_context_priority; + add_queue.context_global_priority_level = + (enum UMSCH_AMD_PRIORITY_LEVEL)input_ptr->context_global_priority_level; + add_queue.doorbell_offset_0 = input_ptr->doorbell_offset_0; + add_queue.doorbell_offset_1 = input_ptr->doorbell_offset_1; + add_queue.affinity.u32All = input_ptr->affinity; + add_queue.mqd_addr = input_ptr->mqd_addr; + add_queue.engine_type = (enum UMSCH_ENGINE_TYPE)input_ptr->engine_type; + add_queue.h_context = input_ptr->h_context; + add_queue.h_queue = input_ptr->h_queue; + add_queue.vm_context_cntl = input_ptr->vm_context_cntl; + add_queue.is_context_suspended = input_ptr->is_context_suspended; + + add_queue.api_status.api_completion_fence_addr = umsch->ring.fence_drv.gpu_addr; + add_queue.api_status.api_completion_fence_value = ++umsch->ring.fence_drv.sync_seq; + + r = amdgpu_umsch_mm_submit_pkt(umsch, &add_queue.max_dwords_in_api, + API_FRAME_SIZE_IN_DWORDS); + if (r) + return r; + + r = amdgpu_umsch_mm_query_fence(umsch); + if (r) { + dev_err(adev->dev, "UMSCH ADD_QUEUE: Failed\n"); + return r; + } + + return 0; +} + +static int umsch_mm_v4_0_remove_queue(struct amdgpu_umsch_mm *umsch, + struct umsch_mm_remove_queue_input *input_ptr) +{ + union UMSCHAPI__REMOVE_QUEUE remove_queue = {}; + struct amdgpu_device *adev = umsch->ring.adev; + int r; + + remove_queue.header.type = UMSCH_API_TYPE_SCHEDULER; + remove_queue.header.opcode = UMSCH_API_REMOVE_QUEUE; + remove_queue.header.dwsize = API_FRAME_SIZE_IN_DWORDS; + + remove_queue.doorbell_offset_0 = input_ptr->doorbell_offset_0; + remove_queue.doorbell_offset_1 = input_ptr->doorbell_offset_1; + remove_queue.context_csa_addr = input_ptr->context_csa_addr; + + remove_queue.api_status.api_completion_fence_addr = umsch->ring.fence_drv.gpu_addr; + remove_queue.api_status.api_completion_fence_value = ++umsch->ring.fence_drv.sync_seq; + + r = amdgpu_umsch_mm_submit_pkt(umsch, &remove_queue.max_dwords_in_api, + API_FRAME_SIZE_IN_DWORDS); + if (r) + return r; + + r = amdgpu_umsch_mm_query_fence(umsch); + if (r) { + dev_err(adev->dev, "UMSCH REMOVE_QUEUE: Failed\n"); + return r; + } + + return 0; +} + +static int umsch_mm_v4_0_set_regs(struct amdgpu_umsch_mm *umsch) +{ + struct amdgpu_device *adev = container_of(umsch, struct amdgpu_device, umsch_mm); + + umsch->rb_wptr = SOC15_REG_OFFSET(VCN, 0, regVCN_UMSCH_RB_WPTR); + umsch->rb_rptr = SOC15_REG_OFFSET(VCN, 0, regVCN_UMSCH_RB_RPTR); + + return 0; +} + +static const struct umsch_mm_funcs umsch_mm_v4_0_funcs = { + .set_hw_resources = umsch_mm_v4_0_set_hw_resources, + .add_queue = umsch_mm_v4_0_add_queue, + .remove_queue = umsch_mm_v4_0_remove_queue, + .set_regs = umsch_mm_v4_0_set_regs, + .init_microcode = amdgpu_umsch_mm_init_microcode, + .load_microcode = umsch_mm_v4_0_load_microcode, + .ring_init = amdgpu_umsch_mm_ring_init, + .ring_start = umsch_mm_v4_0_ring_start, + .ring_stop = umsch_mm_v4_0_ring_stop, +}; + +void umsch_mm_v4_0_set_funcs(struct amdgpu_umsch_mm *umsch) +{ + umsch->funcs = &umsch_mm_v4_0_funcs; +} diff --git a/drivers/gpu/drm/amd/amdgpu/umsch_mm_v4_0.h b/drivers/gpu/drm/amd/amdgpu/umsch_mm_v4_0.h new file mode 100644 index 000000000000..06bc0fa74996 --- /dev/null +++ b/drivers/gpu/drm/amd/amdgpu/umsch_mm_v4_0.h @@ -0,0 +1,30 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Copyright 2023 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + */ + +#ifndef __UMSCH_MM_V4_0_H__ +#define __UMSCH_MM_V4_0_H__ + +void umsch_mm_v4_0_set_funcs(struct amdgpu_umsch_mm *umsch); + +#endif diff --git a/drivers/gpu/drm/amd/amdgpu/uvd_v3_1.c b/drivers/gpu/drm/amd/amdgpu/uvd_v3_1.c index 0fef925b6602..a6006f231c65 100644 --- a/drivers/gpu/drm/amd/amdgpu/uvd_v3_1.c +++ b/drivers/gpu/drm/amd/amdgpu/uvd_v3_1.c @@ -577,8 +577,6 @@ static int uvd_v3_1_sw_init(void *handle) ptr += ucode_len; memcpy(&adev->uvd.keyselect, ptr, 4); - r = amdgpu_uvd_entity_init(adev); - return r; } @@ -706,6 +704,13 @@ static int uvd_v3_1_hw_fini(void *handle) return 0; } +static int uvd_v3_1_prepare_suspend(void *handle) +{ + struct amdgpu_device *adev = (struct amdgpu_device *)handle; + + return amdgpu_uvd_prepare_suspend(adev); +} + static int uvd_v3_1_suspend(void *handle) { int r; @@ -806,6 +811,7 @@ static const struct amd_ip_funcs uvd_v3_1_ip_funcs = { .sw_fini = uvd_v3_1_sw_fini, .hw_init = uvd_v3_1_hw_init, .hw_fini = uvd_v3_1_hw_fini, + .prepare_suspend = uvd_v3_1_prepare_suspend, .suspend = uvd_v3_1_suspend, .resume = uvd_v3_1_resume, .is_idle = uvd_v3_1_is_idle, @@ -815,8 +821,7 @@ static const struct amd_ip_funcs uvd_v3_1_ip_funcs = { .set_powergating_state = uvd_v3_1_set_powergating_state, }; -const struct amdgpu_ip_block_version uvd_v3_1_ip_block = -{ +const struct amdgpu_ip_block_version uvd_v3_1_ip_block = { .type = AMD_IP_BLOCK_TYPE_UVD, .major = 3, .minor = 1, diff --git a/drivers/gpu/drm/amd/amdgpu/uvd_v4_2.c b/drivers/gpu/drm/amd/amdgpu/uvd_v4_2.c index c108b8381795..1aa09ad7bbe3 100644 --- a/drivers/gpu/drm/amd/amdgpu/uvd_v4_2.c +++ b/drivers/gpu/drm/amd/amdgpu/uvd_v4_2.c @@ -127,8 +127,6 @@ static int uvd_v4_2_sw_init(void *handle) if (r) return r; - r = amdgpu_uvd_entity_init(adev); - return r; } @@ -220,6 +218,13 @@ static int uvd_v4_2_hw_fini(void *handle) return 0; } +static int uvd_v4_2_prepare_suspend(void *handle) +{ + struct amdgpu_device *adev = (struct amdgpu_device *)handle; + + return amdgpu_uvd_prepare_suspend(adev); +} + static int uvd_v4_2_suspend(void *handle) { int r; @@ -756,6 +761,7 @@ static const struct amd_ip_funcs uvd_v4_2_ip_funcs = { .sw_fini = uvd_v4_2_sw_fini, .hw_init = uvd_v4_2_hw_init, .hw_fini = uvd_v4_2_hw_fini, + .prepare_suspend = uvd_v4_2_prepare_suspend, .suspend = uvd_v4_2_suspend, .resume = uvd_v4_2_resume, .is_idle = uvd_v4_2_is_idle, diff --git a/drivers/gpu/drm/amd/amdgpu/uvd_v5_0.c b/drivers/gpu/drm/amd/amdgpu/uvd_v5_0.c index d7e31e48a2b8..f8b229b75435 100644 --- a/drivers/gpu/drm/amd/amdgpu/uvd_v5_0.c +++ b/drivers/gpu/drm/amd/amdgpu/uvd_v5_0.c @@ -125,8 +125,6 @@ static int uvd_v5_0_sw_init(void *handle) if (r) return r; - r = amdgpu_uvd_entity_init(adev); - return r; } @@ -218,6 +216,13 @@ static int uvd_v5_0_hw_fini(void *handle) return 0; } +static int uvd_v5_0_prepare_suspend(void *handle) +{ + struct amdgpu_device *adev = (struct amdgpu_device *)handle; + + return amdgpu_uvd_prepare_suspend(adev); +} + static int uvd_v5_0_suspend(void *handle) { int r; @@ -863,6 +868,7 @@ static const struct amd_ip_funcs uvd_v5_0_ip_funcs = { .sw_fini = uvd_v5_0_sw_fini, .hw_init = uvd_v5_0_hw_init, .hw_fini = uvd_v5_0_hw_fini, + .prepare_suspend = uvd_v5_0_prepare_suspend, .suspend = uvd_v5_0_suspend, .resume = uvd_v5_0_resume, .is_idle = uvd_v5_0_is_idle, diff --git a/drivers/gpu/drm/amd/amdgpu/uvd_v6_0.c b/drivers/gpu/drm/amd/amdgpu/uvd_v6_0.c index 5fe872f4bea7..a9a6880f44e3 100644 --- a/drivers/gpu/drm/amd/amdgpu/uvd_v6_0.c +++ b/drivers/gpu/drm/amd/amdgpu/uvd_v6_0.c @@ -432,8 +432,6 @@ static int uvd_v6_0_sw_init(void *handle) } } - r = amdgpu_uvd_entity_init(adev); - return r; } @@ -542,6 +540,13 @@ static int uvd_v6_0_hw_fini(void *handle) return 0; } +static int uvd_v6_0_prepare_suspend(void *handle) +{ + struct amdgpu_device *adev = (struct amdgpu_device *)handle; + + return amdgpu_uvd_prepare_suspend(adev); +} + static int uvd_v6_0_suspend(void *handle) { int r; @@ -1528,6 +1533,7 @@ static const struct amd_ip_funcs uvd_v6_0_ip_funcs = { .sw_fini = uvd_v6_0_sw_fini, .hw_init = uvd_v6_0_hw_init, .hw_fini = uvd_v6_0_hw_fini, + .prepare_suspend = uvd_v6_0_prepare_suspend, .suspend = uvd_v6_0_suspend, .resume = uvd_v6_0_resume, .is_idle = uvd_v6_0_is_idle, diff --git a/drivers/gpu/drm/amd/amdgpu/uvd_v7_0.c b/drivers/gpu/drm/amd/amdgpu/uvd_v7_0.c index abaa4463e906..6068b784dc69 100644 --- a/drivers/gpu/drm/amd/amdgpu/uvd_v7_0.c +++ b/drivers/gpu/drm/amd/amdgpu/uvd_v7_0.c @@ -480,10 +480,6 @@ static int uvd_v7_0_sw_init(void *handle) if (r) return r; - r = amdgpu_uvd_entity_init(adev); - if (r) - return r; - r = amdgpu_virt_alloc_mm_table(adev); if (r) return r; @@ -612,6 +608,13 @@ static int uvd_v7_0_hw_fini(void *handle) return 0; } +static int uvd_v7_0_prepare_suspend(void *handle) +{ + struct amdgpu_device *adev = (struct amdgpu_device *)handle; + + return amdgpu_uvd_prepare_suspend(adev); +} + static int uvd_v7_0_suspend(void *handle) { int r; @@ -679,11 +682,11 @@ static void uvd_v7_0_mc_resume(struct amdgpu_device *adev) if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) { WREG32_SOC15(UVD, i, mmUVD_LMI_VCPU_CACHE_64BIT_BAR_LOW, i == 0 ? - adev->firmware.ucode[AMDGPU_UCODE_ID_UVD].tmr_mc_addr_lo: + adev->firmware.ucode[AMDGPU_UCODE_ID_UVD].tmr_mc_addr_lo : adev->firmware.ucode[AMDGPU_UCODE_ID_UVD1].tmr_mc_addr_lo); WREG32_SOC15(UVD, i, mmUVD_LMI_VCPU_CACHE_64BIT_BAR_HIGH, i == 0 ? - adev->firmware.ucode[AMDGPU_UCODE_ID_UVD].tmr_mc_addr_hi: + adev->firmware.ucode[AMDGPU_UCODE_ID_UVD].tmr_mc_addr_hi : adev->firmware.ucode[AMDGPU_UCODE_ID_UVD1].tmr_mc_addr_hi); WREG32_SOC15(UVD, i, mmUVD_VCPU_CACHE_OFFSET0, 0); offset = 0; @@ -1787,6 +1790,7 @@ const struct amd_ip_funcs uvd_v7_0_ip_funcs = { .sw_fini = uvd_v7_0_sw_fini, .hw_init = uvd_v7_0_hw_init, .hw_fini = uvd_v7_0_hw_fini, + .prepare_suspend = uvd_v7_0_prepare_suspend, .suspend = uvd_v7_0_suspend, .resume = uvd_v7_0_resume, .is_idle = NULL /* uvd_v7_0_is_idle */, @@ -1908,8 +1912,7 @@ static void uvd_v7_0_set_irq_funcs(struct amdgpu_device *adev) } } -const struct amdgpu_ip_block_version uvd_v7_0_ip_block = -{ +const struct amdgpu_ip_block_version uvd_v7_0_ip_block = { .type = AMD_IP_BLOCK_TYPE_UVD, .major = 7, .minor = 0, diff --git a/drivers/gpu/drm/amd/amdgpu/vce_v2_0.c b/drivers/gpu/drm/amd/amdgpu/vce_v2_0.c index 67eb01fef789..a08e7abca423 100644 --- a/drivers/gpu/drm/amd/amdgpu/vce_v2_0.c +++ b/drivers/gpu/drm/amd/amdgpu/vce_v2_0.c @@ -441,8 +441,6 @@ static int vce_v2_0_sw_init(void *handle) return r; } - r = amdgpu_vce_entity_init(adev); - return r; } diff --git a/drivers/gpu/drm/amd/amdgpu/vce_v3_0.c b/drivers/gpu/drm/amd/amdgpu/vce_v3_0.c index 8def62c83ffd..f4760748d349 100644 --- a/drivers/gpu/drm/amd/amdgpu/vce_v3_0.c +++ b/drivers/gpu/drm/amd/amdgpu/vce_v3_0.c @@ -450,8 +450,6 @@ static int vce_v3_0_sw_init(void *handle) return r; } - r = amdgpu_vce_entity_init(adev); - return r; } @@ -998,8 +996,7 @@ static void vce_v3_0_set_irq_funcs(struct amdgpu_device *adev) adev->vce.irq.funcs = &vce_v3_0_irq_funcs; }; -const struct amdgpu_ip_block_version vce_v3_0_ip_block = -{ +const struct amdgpu_ip_block_version vce_v3_0_ip_block = { .type = AMD_IP_BLOCK_TYPE_VCE, .major = 3, .minor = 0, @@ -1007,8 +1004,7 @@ const struct amdgpu_ip_block_version vce_v3_0_ip_block = .funcs = &vce_v3_0_ip_funcs, }; -const struct amdgpu_ip_block_version vce_v3_1_ip_block = -{ +const struct amdgpu_ip_block_version vce_v3_1_ip_block = { .type = AMD_IP_BLOCK_TYPE_VCE, .major = 3, .minor = 1, @@ -1016,8 +1012,7 @@ const struct amdgpu_ip_block_version vce_v3_1_ip_block = .funcs = &vce_v3_0_ip_funcs, }; -const struct amdgpu_ip_block_version vce_v3_4_ip_block = -{ +const struct amdgpu_ip_block_version vce_v3_4_ip_block = { .type = AMD_IP_BLOCK_TYPE_VCE, .major = 3, .minor = 4, diff --git a/drivers/gpu/drm/amd/amdgpu/vce_v4_0.c b/drivers/gpu/drm/amd/amdgpu/vce_v4_0.c index e0b70cd3b697..06d787385ad4 100644 --- a/drivers/gpu/drm/amd/amdgpu/vce_v4_0.c +++ b/drivers/gpu/drm/amd/amdgpu/vce_v4_0.c @@ -486,11 +486,6 @@ static int vce_v4_0_sw_init(void *handle) return r; } - - r = amdgpu_vce_entity_init(adev); - if (r) - return r; - r = amdgpu_virt_alloc_mm_table(adev); if (r) return r; diff --git a/drivers/gpu/drm/amd/amdgpu/vcn_v1_0.c b/drivers/gpu/drm/amd/amdgpu/vcn_v1_0.c index 16feb491adf5..25ba27151ac0 100644 --- a/drivers/gpu/drm/amd/amdgpu/vcn_v1_0.c +++ b/drivers/gpu/drm/amd/amdgpu/vcn_v1_0.c @@ -473,7 +473,7 @@ static void vcn_v1_0_disable_clock_gating(struct amdgpu_device *adev) if (adev->cg_flags & AMD_CG_SUPPORT_VCN_MGCG) data |= 1 << UVD_CGC_CTRL__DYN_CLOCK_MODE__SHIFT; else - data &= ~ UVD_CGC_CTRL__DYN_CLOCK_MODE_MASK; + data &= ~UVD_CGC_CTRL__DYN_CLOCK_MODE_MASK; data |= 1 << UVD_CGC_CTRL__CLK_GATE_DLY_TIMER__SHIFT; data |= 4 << UVD_CGC_CTRL__CLK_OFF_DELAY__SHIFT; @@ -1772,7 +1772,7 @@ static int vcn_v1_0_set_powergating_state(void *handle, int ret; struct amdgpu_device *adev = (struct amdgpu_device *)handle; - if(state == adev->vcn.cur_state) + if (state == adev->vcn.cur_state) return 0; if (state == AMD_PG_STATE_GATE) @@ -1780,7 +1780,7 @@ static int vcn_v1_0_set_powergating_state(void *handle, else ret = vcn_v1_0_start(adev); - if(!ret) + if (!ret) adev->vcn.cur_state = state; return ret; } @@ -2065,8 +2065,7 @@ static void vcn_v1_0_set_irq_funcs(struct amdgpu_device *adev) adev->vcn.inst->irq.funcs = &vcn_v1_0_irq_funcs; } -const struct amdgpu_ip_block_version vcn_v1_0_ip_block = -{ +const struct amdgpu_ip_block_version vcn_v1_0_ip_block = { .type = AMD_IP_BLOCK_TYPE_VCN, .major = 1, .minor = 0, diff --git a/drivers/gpu/drm/amd/amdgpu/vcn_v2_0.c b/drivers/gpu/drm/amd/amdgpu/vcn_v2_0.c index c975aed2f6c7..18794394c5a0 100644 --- a/drivers/gpu/drm/amd/amdgpu/vcn_v2_0.c +++ b/drivers/gpu/drm/amd/amdgpu/vcn_v2_0.c @@ -881,9 +881,7 @@ static int vcn_v2_0_start_dpg_mode(struct amdgpu_device *adev, bool indirect) UVD_MASTINT_EN__VCPU_EN_MASK, 0, indirect); if (indirect) - psp_update_vcn_sram(adev, 0, adev->vcn.inst->dpg_sram_gpu_addr, - (uint32_t)((uintptr_t)adev->vcn.inst->dpg_sram_curr_addr - - (uintptr_t)adev->vcn.inst->dpg_sram_cpu_addr)); + amdgpu_vcn_psp_update_sram(adev, 0, 0); /* force RBC into idle state */ rb_bufsz = order_base_2(ring->ring_size); diff --git a/drivers/gpu/drm/amd/amdgpu/vcn_v2_5.c b/drivers/gpu/drm/amd/amdgpu/vcn_v2_5.c index bb1875f926f1..aba403d71806 100644 --- a/drivers/gpu/drm/amd/amdgpu/vcn_v2_5.c +++ b/drivers/gpu/drm/amd/amdgpu/vcn_v2_5.c @@ -187,7 +187,7 @@ static int vcn_v2_5_sw_init(void *handle) ring->doorbell_index = (adev->doorbell_index.vcn.vcn_ring0_1 << 1) + (amdgpu_sriov_vf(adev) ? 2*j : 8*j); - if (adev->ip_versions[UVD_HWIP][0] == IP_VERSION(2, 5, 0)) + if (amdgpu_ip_version(adev, UVD_HWIP, 0) == IP_VERSION(2, 5, 0)) ring->vm_hub = AMDGPU_MMHUB1(0); else ring->vm_hub = AMDGPU_MMHUB0(0); @@ -207,7 +207,8 @@ static int vcn_v2_5_sw_init(void *handle) ring->doorbell_index = (adev->doorbell_index.vcn.vcn_ring0_1 << 1) + (amdgpu_sriov_vf(adev) ? (1 + i + 2*j) : (2 + i + 8*j)); - if (adev->ip_versions[UVD_HWIP][0] == IP_VERSION(2, 5, 0)) + if (amdgpu_ip_version(adev, UVD_HWIP, 0) == + IP_VERSION(2, 5, 0)) ring->vm_hub = AMDGPU_MMHUB1(0); else ring->vm_hub = AMDGPU_MMHUB0(0); @@ -794,7 +795,7 @@ static void vcn_v2_6_enable_ras(struct amdgpu_device *adev, int inst_idx, { uint32_t tmp; - if (adev->ip_versions[UVD_HWIP][0] != IP_VERSION(2, 6, 0)) + if (amdgpu_ip_version(adev, UVD_HWIP, 0) != IP_VERSION(2, 6, 0)) return; tmp = VCN_RAS_CNTL__VCPU_VCODEC_REARM_MASK | @@ -912,9 +913,7 @@ static int vcn_v2_5_start_dpg_mode(struct amdgpu_device *adev, int inst_idx, boo UVD_MASTINT_EN__VCPU_EN_MASK, 0, indirect); if (indirect) - psp_update_vcn_sram(adev, inst_idx, adev->vcn.inst[inst_idx].dpg_sram_gpu_addr, - (uint32_t)((uintptr_t)adev->vcn.inst[inst_idx].dpg_sram_curr_addr - - (uintptr_t)adev->vcn.inst[inst_idx].dpg_sram_cpu_addr)); + amdgpu_vcn_psp_update_sram(adev, inst_idx, 0); ring = &adev->vcn.inst[inst_idx].ring_dec; /* force RBC into idle state */ @@ -1987,7 +1986,7 @@ static struct amdgpu_vcn_ras vcn_v2_6_ras = { static void vcn_v2_5_set_ras_funcs(struct amdgpu_device *adev) { - switch (adev->ip_versions[VCN_HWIP][0]) { + switch (amdgpu_ip_version(adev, VCN_HWIP, 0)) { case IP_VERSION(2, 6, 0): adev->vcn.ras = &vcn_v2_6_ras; break; diff --git a/drivers/gpu/drm/amd/amdgpu/vcn_v3_0.c b/drivers/gpu/drm/amd/amdgpu/vcn_v3_0.c index c8f63b3c6f69..e02af4de521c 100644 --- a/drivers/gpu/drm/amd/amdgpu/vcn_v3_0.c +++ b/drivers/gpu/drm/amd/amdgpu/vcn_v3_0.c @@ -100,7 +100,8 @@ static int vcn_v3_0_early_init(void *handle) /* both instances are harvested, disable the block */ return -ENOENT; - if (adev->ip_versions[UVD_HWIP][0] == IP_VERSION(3, 0, 33)) + if (amdgpu_ip_version(adev, UVD_HWIP, 0) == + IP_VERSION(3, 0, 33)) adev->vcn.num_enc_rings = 0; else adev->vcn.num_enc_rings = 2; @@ -227,9 +228,10 @@ static int vcn_v3_0_sw_init(void *handle) cpu_to_le32(AMDGPU_VCN_FW_SHARED_FLAG_0_RB); fw_shared->sw_ring.is_enabled = cpu_to_le32(DEC_SW_RING_ENABLED); fw_shared->present_flag_0 |= AMDGPU_VCN_SMU_VERSION_INFO_FLAG; - if (adev->ip_versions[UVD_HWIP][0] == IP_VERSION(3, 1, 2)) + if (amdgpu_ip_version(adev, UVD_HWIP, 0) == IP_VERSION(3, 1, 2)) fw_shared->smu_interface_info.smu_interface_type = 2; - else if (adev->ip_versions[UVD_HWIP][0] == IP_VERSION(3, 1, 1)) + else if (amdgpu_ip_version(adev, UVD_HWIP, 0) == + IP_VERSION(3, 1, 1)) fw_shared->smu_interface_info.smu_interface_type = 1; if (amdgpu_vcnfw_log) @@ -1037,9 +1039,7 @@ static int vcn_v3_0_start_dpg_mode(struct amdgpu_device *adev, int inst_idx, boo VCN, inst_idx, mmUVD_VCPU_CNTL), tmp, 0, indirect); if (indirect) - psp_update_vcn_sram(adev, inst_idx, adev->vcn.inst[inst_idx].dpg_sram_gpu_addr, - (uint32_t)((uintptr_t)adev->vcn.inst[inst_idx].dpg_sram_curr_addr - - (uintptr_t)adev->vcn.inst[inst_idx].dpg_sram_cpu_addr)); + amdgpu_vcn_psp_update_sram(adev, inst_idx, 0); ring = &adev->vcn.inst[inst_idx].ring_dec; /* force RBC into idle state */ @@ -1107,7 +1107,7 @@ static int vcn_v3_0_start(struct amdgpu_device *adev) if (adev->vcn.harvest_config & (1 << i)) continue; - if (adev->pg_flags & AMD_PG_SUPPORT_VCN_DPG){ + if (adev->pg_flags & AMD_PG_SUPPORT_VCN_DPG) { r = vcn_v3_0_start_dpg_mode(adev, i, adev->vcn.indirect_sram); continue; } @@ -1257,7 +1257,8 @@ static int vcn_v3_0_start(struct amdgpu_device *adev) fw_shared->rb.wptr = lower_32_bits(ring->wptr); fw_shared->multi_queue.decode_queue_mode &= cpu_to_le32(~FW_QUEUE_RING_RESET); - if (adev->ip_versions[UVD_HWIP][0] != IP_VERSION(3, 0, 33)) { + if (amdgpu_ip_version(adev, UVD_HWIP, 0) != + IP_VERSION(3, 0, 33)) { fw_shared->multi_queue.encode_generalpurpose_queue_mode |= cpu_to_le32(FW_QUEUE_RING_RESET); ring = &adev->vcn.inst[i].ring_enc[0]; WREG32_SOC15(VCN, i, mmUVD_RB_RPTR, lower_32_bits(ring->wptr)); @@ -1630,7 +1631,8 @@ static int vcn_v3_0_pause_dpg_mode(struct amdgpu_device *adev, UVD_POWER_STATUS__STALL_DPG_POWER_UP_MASK, ~UVD_POWER_STATUS__STALL_DPG_POWER_UP_MASK); - if (adev->ip_versions[UVD_HWIP][0] != IP_VERSION(3, 0, 33)) { + if (amdgpu_ip_version(adev, UVD_HWIP, 0) != + IP_VERSION(3, 0, 33)) { /* Restore */ fw_shared = adev->vcn.inst[inst_idx].fw_shared.cpu_addr; fw_shared->multi_queue.encode_generalpurpose_queue_mode |= cpu_to_le32(FW_QUEUE_RING_RESET); @@ -1791,7 +1793,7 @@ static int vcn_v3_0_dec_msg(struct amdgpu_cs_parser *p, struct amdgpu_job *job, struct amdgpu_bo *bo; uint64_t start, end; unsigned int i; - void * ptr; + void *ptr; int r; addr &= AMDGPU_GMC_HOLE_MASK; @@ -2097,7 +2099,7 @@ static int vcn_v3_0_set_clockgating_state(void *handle, enum amd_clockgating_state state) { struct amdgpu_device *adev = (struct amdgpu_device *)handle; - bool enable = (state == AMD_CG_STATE_GATE) ? true : false; + bool enable = state == AMD_CG_STATE_GATE; int i; for (i = 0; i < adev->vcn.num_vcn_inst; ++i) { @@ -2131,7 +2133,7 @@ static int vcn_v3_0_set_powergating_state(void *handle, return 0; } - if(state == adev->vcn.cur_state) + if (state == adev->vcn.cur_state) return 0; if (state == AMD_PG_STATE_GATE) @@ -2139,7 +2141,7 @@ static int vcn_v3_0_set_powergating_state(void *handle, else ret = vcn_v3_0_start(adev); - if(!ret) + if (!ret) adev->vcn.cur_state = state; return ret; @@ -2230,8 +2232,7 @@ static const struct amd_ip_funcs vcn_v3_0_ip_funcs = { .set_powergating_state = vcn_v3_0_set_powergating_state, }; -const struct amdgpu_ip_block_version vcn_v3_0_ip_block = -{ +const struct amdgpu_ip_block_version vcn_v3_0_ip_block = { .type = AMD_IP_BLOCK_TYPE_VCN, .major = 3, .minor = 0, diff --git a/drivers/gpu/drm/amd/amdgpu/vcn_v4_0.c b/drivers/gpu/drm/amd/amdgpu/vcn_v4_0.c index 259795098173..48bfcd0d558b 100644 --- a/drivers/gpu/drm/amd/amdgpu/vcn_v4_0.c +++ b/drivers/gpu/drm/amd/amdgpu/vcn_v4_0.c @@ -169,8 +169,12 @@ static int vcn_v4_0_sw_init(void *handle) fw_shared->smu_dpm_interface.smu_interface_type = (adev->flags & AMD_IS_APU) ? AMDGPU_VCN_SMU_DPM_INTERFACE_APU : AMDGPU_VCN_SMU_DPM_INTERFACE_DGPU; - if (amdgpu_sriov_vf(adev)) - fw_shared->present_flag_0 |= cpu_to_le32(AMDGPU_VCN_VF_RB_SETUP_FLAG); + if (amdgpu_ip_version(adev, VCN_HWIP, 0) == + IP_VERSION(4, 0, 2)) { + fw_shared->present_flag_0 |= AMDGPU_FW_SHARED_FLAG_0_DRM_KEY_INJECT; + fw_shared->drm_key_wa.method = + AMDGPU_DRM_KEY_INJECT_WORKAROUND_VCNFW_ASD_HANDSHAKING; + } if (amdgpu_vcnfw_log) amdgpu_vcn_fwlog_init(&adev->vcn.inst[i]); @@ -870,8 +874,6 @@ static void vcn_v4_0_enable_clock_gating(struct amdgpu_device *adev, int inst) | UVD_SUVD_CGC_CTRL__IME_MODE_MASK | UVD_SUVD_CGC_CTRL__SITE_MODE_MASK); WREG32_SOC15(VCN, inst, regUVD_SUVD_CGC_CTRL, data); - - return; } static void vcn_v4_0_enable_ras(struct amdgpu_device *adev, int inst_idx, @@ -993,9 +995,7 @@ static int vcn_v4_0_start_dpg_mode(struct amdgpu_device *adev, int inst_idx, boo if (indirect) - psp_update_vcn_sram(adev, inst_idx, adev->vcn.inst[inst_idx].dpg_sram_gpu_addr, - (uint32_t)((uintptr_t)adev->vcn.inst[inst_idx].dpg_sram_curr_addr - - (uintptr_t)adev->vcn.inst[inst_idx].dpg_sram_cpu_addr)); + amdgpu_vcn_psp_update_sram(adev, inst_idx, 0); ring = &adev->vcn.inst[inst_idx].ring_enc[0]; @@ -1135,11 +1135,11 @@ static int vcn_v4_0_start(struct amdgpu_device *adev) if (status & 2) break; mdelay(10); - if (amdgpu_emu_mode==1) + if (amdgpu_emu_mode == 1) msleep(1); } - if (amdgpu_emu_mode==1) { + if (amdgpu_emu_mode == 1) { r = -1; if (status & 2) { r = 0; @@ -1206,6 +1206,24 @@ static int vcn_v4_0_start(struct amdgpu_device *adev) return 0; } +static int vcn_v4_0_init_ring_metadata(struct amdgpu_device *adev, uint32_t vcn_inst, struct amdgpu_ring *ring_enc) +{ + struct amdgpu_vcn_rb_metadata *rb_metadata = NULL; + uint8_t *rb_ptr = (uint8_t *)ring_enc->ring; + + rb_ptr += ring_enc->ring_size; + rb_metadata = (struct amdgpu_vcn_rb_metadata *)rb_ptr; + + memset(rb_metadata, 0, sizeof(struct amdgpu_vcn_rb_metadata)); + rb_metadata->size = sizeof(struct amdgpu_vcn_rb_metadata); + rb_metadata->present_flag_0 |= cpu_to_le32(AMDGPU_VCN_VF_RB_SETUP_FLAG); + rb_metadata->present_flag_0 |= cpu_to_le32(AMDGPU_VCN_VF_RB_DECOUPLE_FLAG); + rb_metadata->version = 1; + rb_metadata->ring_id = vcn_inst & 0xFF; + + return 0; +} + static int vcn_v4_0_start_sriov(struct amdgpu_device *adev) { int i; @@ -1328,11 +1346,30 @@ static int vcn_v4_0_start_sriov(struct amdgpu_device *adev) rb_enc_addr = ring_enc->gpu_addr; rb_setup->is_rb_enabled_flags |= RB_ENABLED; - rb_setup->rb_addr_lo = lower_32_bits(rb_enc_addr); - rb_setup->rb_addr_hi = upper_32_bits(rb_enc_addr); - rb_setup->rb_size = ring_enc->ring_size / 4; fw_shared->present_flag_0 |= cpu_to_le32(AMDGPU_VCN_VF_RB_SETUP_FLAG); + if (amdgpu_sriov_is_vcn_rb_decouple(adev)) { + vcn_v4_0_init_ring_metadata(adev, i, ring_enc); + + memset((void *)&rb_setup->rb_info, 0, sizeof(struct amdgpu_vcn_rb_setup_info) * MAX_NUM_VCN_RB_SETUP); + if (!(adev->vcn.harvest_config & (1 << 0))) { + rb_setup->rb_info[0].rb_addr_lo = lower_32_bits(adev->vcn.inst[0].ring_enc[0].gpu_addr); + rb_setup->rb_info[0].rb_addr_hi = upper_32_bits(adev->vcn.inst[0].ring_enc[0].gpu_addr); + rb_setup->rb_info[0].rb_size = adev->vcn.inst[0].ring_enc[0].ring_size / 4; + } + if (!(adev->vcn.harvest_config & (1 << 1))) { + rb_setup->rb_info[2].rb_addr_lo = lower_32_bits(adev->vcn.inst[1].ring_enc[0].gpu_addr); + rb_setup->rb_info[2].rb_addr_hi = upper_32_bits(adev->vcn.inst[1].ring_enc[0].gpu_addr); + rb_setup->rb_info[2].rb_size = adev->vcn.inst[1].ring_enc[0].ring_size / 4; + } + fw_shared->decouple.is_enabled = 1; + fw_shared->present_flag_0 |= cpu_to_le32(AMDGPU_VCN_VF_RB_DECOUPLE_FLAG); + } else { + rb_setup->rb_addr_lo = lower_32_bits(rb_enc_addr); + rb_setup->rb_addr_hi = upper_32_bits(rb_enc_addr); + rb_setup->rb_size = ring_enc->ring_size / 4; + } + MMSCH_V4_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCN, i, regUVD_LMI_VCPU_NC0_64BIT_BAR_LOW), lower_32_bits(adev->vcn.inst[i].fw_shared.gpu_addr)); @@ -1800,10 +1837,11 @@ static int vcn_v4_0_ring_patch_cs_in_place(struct amdgpu_cs_parser *p, return 0; } -static const struct amdgpu_ring_funcs vcn_v4_0_unified_ring_vm_funcs = { +static struct amdgpu_ring_funcs vcn_v4_0_unified_ring_vm_funcs = { .type = AMDGPU_RING_TYPE_VCN_ENC, .align_mask = 0x3f, .nop = VCN_ENC_CMD_NO_OP, + .extra_dw = sizeof(struct amdgpu_vcn_rb_metadata), .get_rptr = vcn_v4_0_unified_ring_get_rptr, .get_wptr = vcn_v4_0_unified_ring_get_wptr, .set_wptr = vcn_v4_0_unified_ring_set_wptr, @@ -1845,7 +1883,11 @@ static void vcn_v4_0_set_unified_ring_funcs(struct amdgpu_device *adev) if (adev->vcn.harvest_config & (1 << i)) continue; - adev->vcn.inst[i].ring_enc[0].funcs = &vcn_v4_0_unified_ring_vm_funcs; + if (amdgpu_ip_version(adev, VCN_HWIP, 0) == IP_VERSION(4, 0, 2)) + vcn_v4_0_unified_ring_vm_funcs.secure_submission_supported = true; + + adev->vcn.inst[i].ring_enc[0].funcs = + (const struct amdgpu_ring_funcs *)&vcn_v4_0_unified_ring_vm_funcs; adev->vcn.inst[i].ring_enc[0].me = i; DRM_INFO("VCN(%d) encode/decode are enabled in VM mode\n", i); @@ -1910,7 +1952,7 @@ static int vcn_v4_0_wait_for_idle(void *handle) static int vcn_v4_0_set_clockgating_state(void *handle, enum amd_clockgating_state state) { struct amdgpu_device *adev = (struct amdgpu_device *)handle; - bool enable = (state == AMD_CG_STATE_GATE) ? true : false; + bool enable = state == AMD_CG_STATE_GATE; int i; for (i = 0; i < adev->vcn.num_vcn_inst; ++i) { @@ -1951,7 +1993,7 @@ static int vcn_v4_0_set_powergating_state(void *handle, enum amd_powergating_sta return 0; } - if(state == adev->vcn.cur_state) + if (state == adev->vcn.cur_state) return 0; if (state == AMD_PG_STATE_GATE) @@ -1959,7 +2001,7 @@ static int vcn_v4_0_set_powergating_state(void *handle, enum amd_powergating_sta else ret = vcn_v4_0_start(adev); - if(!ret) + if (!ret) adev->vcn.cur_state = state; return ret; @@ -2013,16 +2055,20 @@ static int vcn_v4_0_process_interrupt(struct amdgpu_device *adev, struct amdgpu_ { uint32_t ip_instance; - switch (entry->client_id) { - case SOC15_IH_CLIENTID_VCN: - ip_instance = 0; - break; - case SOC15_IH_CLIENTID_VCN1: - ip_instance = 1; - break; - default: - DRM_ERROR("Unhandled client id: %d\n", entry->client_id); - return 0; + if (amdgpu_sriov_is_vcn_rb_decouple(adev)) { + ip_instance = entry->ring_id; + } else { + switch (entry->client_id) { + case SOC15_IH_CLIENTID_VCN: + ip_instance = 0; + break; + case SOC15_IH_CLIENTID_VCN1: + ip_instance = 1; + break; + default: + DRM_ERROR("Unhandled client id: %d\n", entry->client_id); + return 0; + } } DRM_DEBUG("IH: VCN TRAP\n"); @@ -2093,8 +2139,7 @@ static const struct amd_ip_funcs vcn_v4_0_ip_funcs = { .set_powergating_state = vcn_v4_0_set_powergating_state, }; -const struct amdgpu_ip_block_version vcn_v4_0_ip_block = -{ +const struct amdgpu_ip_block_version vcn_v4_0_ip_block = { .type = AMD_IP_BLOCK_TYPE_VCN, .major = 4, .minor = 0, @@ -2149,7 +2194,7 @@ static struct amdgpu_vcn_ras vcn_v4_0_ras = { static void vcn_v4_0_set_ras_funcs(struct amdgpu_device *adev) { - switch (adev->ip_versions[VCN_HWIP][0]) { + switch (amdgpu_ip_version(adev, VCN_HWIP, 0)) { case IP_VERSION(4, 0, 0): adev->vcn.ras = &vcn_v4_0_ras; break; diff --git a/drivers/gpu/drm/amd/amdgpu/vcn_v4_0_3.c b/drivers/gpu/drm/amd/amdgpu/vcn_v4_0_3.c index 5d67b8b8a3d6..810bbfccd6f2 100644 --- a/drivers/gpu/drm/amd/amdgpu/vcn_v4_0_3.c +++ b/drivers/gpu/drm/amd/amdgpu/vcn_v4_0_3.c @@ -31,6 +31,7 @@ #include "soc15d.h" #include "soc15_hw_ip.h" #include "vcn_v2_0.h" +#include "mmsch_v4_0_3.h" #include "vcn/vcn_4_0_3_offset.h" #include "vcn/vcn_4_0_3_sh_mask.h" @@ -44,6 +45,7 @@ #define VCN_VID_SOC_ADDRESS_2_0 0x1fb00 #define VCN1_VID_SOC_ADDRESS_3_0 0x48300 +static int vcn_v4_0_3_start_sriov(struct amdgpu_device *adev); static void vcn_v4_0_3_set_unified_ring_funcs(struct amdgpu_device *adev); static void vcn_v4_0_3_set_irq_funcs(struct amdgpu_device *adev); static int vcn_v4_0_3_set_powergating_state(void *handle, @@ -111,9 +113,16 @@ static int vcn_v4_0_3_sw_init(void *handle) ring = &adev->vcn.inst[i].ring_enc[0]; ring->use_doorbell = true; - ring->doorbell_index = - (adev->doorbell_index.vcn.vcn_ring0_1 << 1) + - 9 * vcn_inst; + + if (!amdgpu_sriov_vf(adev)) + ring->doorbell_index = + (adev->doorbell_index.vcn.vcn_ring0_1 << 1) + + 9 * vcn_inst; + else + ring->doorbell_index = + (adev->doorbell_index.vcn.vcn_ring0_1 << 1) + + 32 * vcn_inst; + ring->vm_hub = AMDGPU_MMHUB0(adev->vcn.inst[i].aid_id); sprintf(ring->name, "vcn_unified_%d", adev->vcn.inst[i].aid_id); r = amdgpu_ring_init(adev, ring, 512, &adev->vcn.inst->irq, 0, @@ -130,6 +139,12 @@ static int vcn_v4_0_3_sw_init(void *handle) amdgpu_vcn_fwlog_init(&adev->vcn.inst[i]); } + if (amdgpu_sriov_vf(adev)) { + r = amdgpu_virt_alloc_mm_table(adev); + if (r) + return r; + } + if (adev->pg_flags & AMD_PG_SUPPORT_VCN_DPG) adev->vcn.pause_dpg_mode = vcn_v4_0_3_pause_dpg_mode; @@ -167,6 +182,9 @@ static int vcn_v4_0_3_sw_fini(void *handle) drm_dev_exit(idx); } + if (amdgpu_sriov_vf(adev)) + amdgpu_virt_free_mm_table(adev); + r = amdgpu_vcn_suspend(adev); if (r) return r; @@ -189,33 +207,47 @@ static int vcn_v4_0_3_hw_init(void *handle) struct amdgpu_ring *ring; int i, r, vcn_inst; - for (i = 0; i < adev->vcn.num_vcn_inst; ++i) { - vcn_inst = GET_INST(VCN, i); - ring = &adev->vcn.inst[i].ring_enc[0]; + if (amdgpu_sriov_vf(adev)) { + r = vcn_v4_0_3_start_sriov(adev); + if (r) + goto done; - if (ring->use_doorbell) { - adev->nbio.funcs->vcn_doorbell_range( - adev, ring->use_doorbell, - (adev->doorbell_index.vcn.vcn_ring0_1 << 1) + - 9 * vcn_inst, - adev->vcn.inst[i].aid_id); - - WREG32_SOC15( - VCN, GET_INST(VCN, ring->me), - regVCN_RB1_DB_CTRL, - ring->doorbell_index - << VCN_RB1_DB_CTRL__OFFSET__SHIFT | - VCN_RB1_DB_CTRL__EN_MASK); - - /* Read DB_CTRL to flush the write DB_CTRL command. */ - RREG32_SOC15( - VCN, GET_INST(VCN, ring->me), - regVCN_RB1_DB_CTRL); + for (i = 0; i < adev->vcn.num_vcn_inst; ++i) { + ring = &adev->vcn.inst[i].ring_enc[0]; + ring->wptr = 0; + ring->wptr_old = 0; + vcn_v4_0_3_unified_ring_set_wptr(ring); + ring->sched.ready = true; } + } else { + for (i = 0; i < adev->vcn.num_vcn_inst; ++i) { + vcn_inst = GET_INST(VCN, i); + ring = &adev->vcn.inst[i].ring_enc[0]; + + if (ring->use_doorbell) { + adev->nbio.funcs->vcn_doorbell_range( + adev, ring->use_doorbell, + (adev->doorbell_index.vcn.vcn_ring0_1 << 1) + + 9 * vcn_inst, + adev->vcn.inst[i].aid_id); + + WREG32_SOC15( + VCN, GET_INST(VCN, ring->me), + regVCN_RB1_DB_CTRL, + ring->doorbell_index + << VCN_RB1_DB_CTRL__OFFSET__SHIFT | + VCN_RB1_DB_CTRL__EN_MASK); + + /* Read DB_CTRL to flush the write DB_CTRL command. */ + RREG32_SOC15( + VCN, GET_INST(VCN, ring->me), + regVCN_RB1_DB_CTRL); + } - r = amdgpu_ring_test_helper(ring); - if (r) - goto done; + r = amdgpu_ring_test_helper(ring); + if (r) + goto done; + } } done: @@ -778,9 +810,7 @@ static int vcn_v4_0_3_start_dpg_mode(struct amdgpu_device *adev, int inst_idx, b UVD_MASTINT_EN__VCPU_EN_MASK, 0, indirect); if (indirect) - psp_update_vcn_sram(adev, 0, adev->vcn.inst[inst_idx].dpg_sram_gpu_addr, - (uint32_t)((uintptr_t)adev->vcn.inst[inst_idx].dpg_sram_curr_addr - - (uintptr_t)adev->vcn.inst[inst_idx].dpg_sram_cpu_addr)); + amdgpu_vcn_psp_update_sram(adev, inst_idx, AMDGPU_UCODE_ID_VCN0_RAM); ring = &adev->vcn.inst[inst_idx].ring_enc[0]; @@ -815,6 +845,193 @@ static int vcn_v4_0_3_start_dpg_mode(struct amdgpu_device *adev, int inst_idx, b return 0; } +static int vcn_v4_0_3_start_sriov(struct amdgpu_device *adev) +{ + int i, vcn_inst; + struct amdgpu_ring *ring_enc; + uint64_t cache_addr; + uint64_t rb_enc_addr; + uint64_t ctx_addr; + uint32_t param, resp, expected; + uint32_t offset, cache_size; + uint32_t tmp, timeout; + + struct amdgpu_mm_table *table = &adev->virt.mm_table; + uint32_t *table_loc; + uint32_t table_size; + uint32_t size, size_dw; + uint32_t init_status; + uint32_t enabled_vcn; + + struct mmsch_v4_0_cmd_direct_write + direct_wt = { {0} }; + struct mmsch_v4_0_cmd_direct_read_modify_write + direct_rd_mod_wt = { {0} }; + struct mmsch_v4_0_cmd_end end = { {0} }; + struct mmsch_v4_0_3_init_header header; + + volatile struct amdgpu_vcn4_fw_shared *fw_shared; + volatile struct amdgpu_fw_shared_rb_setup *rb_setup; + + direct_wt.cmd_header.command_type = + MMSCH_COMMAND__DIRECT_REG_WRITE; + direct_rd_mod_wt.cmd_header.command_type = + MMSCH_COMMAND__DIRECT_REG_READ_MODIFY_WRITE; + end.cmd_header.command_type = MMSCH_COMMAND__END; + + for (i = 0; i < adev->vcn.num_vcn_inst; i++) { + vcn_inst = GET_INST(VCN, i); + + memset(&header, 0, sizeof(struct mmsch_v4_0_3_init_header)); + header.version = MMSCH_VERSION; + header.total_size = sizeof(struct mmsch_v4_0_3_init_header) >> 2; + + table_loc = (uint32_t *)table->cpu_addr; + table_loc += header.total_size; + + table_size = 0; + + MMSCH_V4_0_INSERT_DIRECT_RD_MOD_WT(SOC15_REG_OFFSET(VCN, 0, regUVD_STATUS), + ~UVD_STATUS__UVD_BUSY, UVD_STATUS__UVD_BUSY); + + cache_size = AMDGPU_GPU_PAGE_ALIGN(adev->vcn.fw->size + 4); + + if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) { + MMSCH_V4_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCN, 0, + regUVD_LMI_VCPU_CACHE_64BIT_BAR_LOW), + adev->firmware.ucode[AMDGPU_UCODE_ID_VCN + i].tmr_mc_addr_lo); + + MMSCH_V4_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCN, 0, + regUVD_LMI_VCPU_CACHE_64BIT_BAR_HIGH), + adev->firmware.ucode[AMDGPU_UCODE_ID_VCN + i].tmr_mc_addr_hi); + + offset = 0; + MMSCH_V4_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCN, 0, + regUVD_VCPU_CACHE_OFFSET0), 0); + } else { + MMSCH_V4_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCN, 0, + regUVD_LMI_VCPU_CACHE_64BIT_BAR_LOW), + lower_32_bits(adev->vcn.inst[i].gpu_addr)); + MMSCH_V4_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCN, 0, + regUVD_LMI_VCPU_CACHE_64BIT_BAR_HIGH), + upper_32_bits(adev->vcn.inst[i].gpu_addr)); + offset = cache_size; + MMSCH_V4_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCN, 0, + regUVD_VCPU_CACHE_OFFSET0), + AMDGPU_UVD_FIRMWARE_OFFSET >> 3); + } + + MMSCH_V4_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCN, 0, + regUVD_VCPU_CACHE_SIZE0), + cache_size); + + cache_addr = adev->vcn.inst[vcn_inst].gpu_addr + offset; + MMSCH_V4_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCN, 0, + regUVD_LMI_VCPU_CACHE1_64BIT_BAR_LOW), lower_32_bits(cache_addr)); + MMSCH_V4_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCN, 0, + regUVD_LMI_VCPU_CACHE1_64BIT_BAR_HIGH), upper_32_bits(cache_addr)); + MMSCH_V4_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCN, 0, + regUVD_VCPU_CACHE_OFFSET1), 0); + MMSCH_V4_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCN, 0, + regUVD_VCPU_CACHE_SIZE1), AMDGPU_VCN_STACK_SIZE); + + cache_addr = adev->vcn.inst[vcn_inst].gpu_addr + offset + + AMDGPU_VCN_STACK_SIZE; + + MMSCH_V4_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCN, 0, + regUVD_LMI_VCPU_CACHE2_64BIT_BAR_LOW), lower_32_bits(cache_addr)); + + MMSCH_V4_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCN, 0, + regUVD_LMI_VCPU_CACHE2_64BIT_BAR_HIGH), upper_32_bits(cache_addr)); + + MMSCH_V4_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCN, 0, + regUVD_VCPU_CACHE_OFFSET2), 0); + + MMSCH_V4_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCN, 0, + regUVD_VCPU_CACHE_SIZE2), AMDGPU_VCN_CONTEXT_SIZE); + + fw_shared = adev->vcn.inst[vcn_inst].fw_shared.cpu_addr; + rb_setup = &fw_shared->rb_setup; + + ring_enc = &adev->vcn.inst[vcn_inst].ring_enc[0]; + ring_enc->wptr = 0; + rb_enc_addr = ring_enc->gpu_addr; + + rb_setup->is_rb_enabled_flags |= RB_ENABLED; + rb_setup->rb_addr_lo = lower_32_bits(rb_enc_addr); + rb_setup->rb_addr_hi = upper_32_bits(rb_enc_addr); + rb_setup->rb_size = ring_enc->ring_size / 4; + fw_shared->present_flag_0 |= cpu_to_le32(AMDGPU_VCN_VF_RB_SETUP_FLAG); + + MMSCH_V4_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCN, 0, + regUVD_LMI_VCPU_NC0_64BIT_BAR_LOW), + lower_32_bits(adev->vcn.inst[vcn_inst].fw_shared.gpu_addr)); + MMSCH_V4_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCN, 0, + regUVD_LMI_VCPU_NC0_64BIT_BAR_HIGH), + upper_32_bits(adev->vcn.inst[vcn_inst].fw_shared.gpu_addr)); + MMSCH_V4_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCN, 0, + regUVD_VCPU_NONCACHE_SIZE0), + AMDGPU_GPU_PAGE_ALIGN(sizeof(struct amdgpu_vcn4_fw_shared))); + MMSCH_V4_0_INSERT_END(); + + header.vcn0.init_status = 0; + header.vcn0.table_offset = header.total_size; + header.vcn0.table_size = table_size; + header.total_size += table_size; + + /* Send init table to mmsch */ + size = sizeof(struct mmsch_v4_0_3_init_header); + table_loc = (uint32_t *)table->cpu_addr; + memcpy((void *)table_loc, &header, size); + + ctx_addr = table->gpu_addr; + WREG32_SOC15(VCN, vcn_inst, regMMSCH_VF_CTX_ADDR_LO, lower_32_bits(ctx_addr)); + WREG32_SOC15(VCN, vcn_inst, regMMSCH_VF_CTX_ADDR_HI, upper_32_bits(ctx_addr)); + + tmp = RREG32_SOC15(VCN, vcn_inst, regMMSCH_VF_VMID); + tmp &= ~MMSCH_VF_VMID__VF_CTX_VMID_MASK; + tmp |= (0 << MMSCH_VF_VMID__VF_CTX_VMID__SHIFT); + WREG32_SOC15(VCN, vcn_inst, regMMSCH_VF_VMID, tmp); + + size = header.total_size; + WREG32_SOC15(VCN, vcn_inst, regMMSCH_VF_CTX_SIZE, size); + + WREG32_SOC15(VCN, vcn_inst, regMMSCH_VF_MAILBOX_RESP, 0); + + param = 0x00000001; + WREG32_SOC15(VCN, vcn_inst, regMMSCH_VF_MAILBOX_HOST, param); + tmp = 0; + timeout = 1000; + resp = 0; + expected = MMSCH_VF_MAILBOX_RESP__OK; + while (resp != expected) { + resp = RREG32_SOC15(VCN, vcn_inst, regMMSCH_VF_MAILBOX_RESP); + if (resp != 0) + break; + + udelay(10); + tmp = tmp + 10; + if (tmp >= timeout) { + DRM_ERROR("failed to init MMSCH. TIME-OUT after %d usec"\ + " waiting for regMMSCH_VF_MAILBOX_RESP "\ + "(expected=0x%08x, readback=0x%08x)\n", + tmp, expected, resp); + return -EBUSY; + } + } + + enabled_vcn = amdgpu_vcn_is_disabled_vcn(adev, VCN_DECODE_RING, 0) ? 1 : 0; + init_status = ((struct mmsch_v4_0_3_init_header *)(table_loc))->vcn0.init_status; + if (resp != expected && resp != MMSCH_VF_MAILBOX_RESP__INCOMPLETE + && init_status != MMSCH_VF_ENGINE_STATUS__PASS) { + DRM_ERROR("MMSCH init status is incorrect! readback=0x%08x, header init "\ + "status for VCN%x: 0x%x\n", resp, enabled_vcn, init_status); + } + } + + return 0; +} + /** * vcn_v4_0_3_start - VCN start * @@ -1289,7 +1506,7 @@ static int vcn_v4_0_3_set_clockgating_state(void *handle, enum amd_clockgating_state state) { struct amdgpu_device *adev = (struct amdgpu_device *)handle; - bool enable = (state == AMD_CG_STATE_GATE) ? true : false; + bool enable = state == AMD_CG_STATE_GATE; int i; for (i = 0; i < adev->vcn.num_vcn_inst; ++i) { @@ -1319,6 +1536,15 @@ static int vcn_v4_0_3_set_powergating_state(void *handle, struct amdgpu_device *adev = (struct amdgpu_device *)handle; int ret; + /* for SRIOV, guest should not control VCN Power-gating + * MMSCH FW should control Power-gating and clock-gating + * guest should avoid touching CGC and PG + */ + if (amdgpu_sriov_vf(adev)) { + adev->vcn.cur_state = AMD_PG_STATE_UNGATE; + return 0; + } + if (state == adev->vcn.cur_state) return 0; @@ -1534,6 +1760,11 @@ static void vcn_v4_0_3_enable_ras(struct amdgpu_device *adev, SOC15_DPG_MODE_OFFSET(VCN, 0, regVCN_RAS_CNTL), tmp, 0, indirect); + tmp = UVD_VCPU_INT_EN2__RASCNTL_VCPU_VCODEC_EN_MASK; + WREG32_SOC15_DPG_MODE(inst_idx, + SOC15_DPG_MODE_OFFSET(VCN, 0, regUVD_VCPU_INT_EN2), + tmp, 0, indirect); + tmp = UVD_SYS_INT_EN__RASCNTL_VCPU_VCODEC_EN_MASK; WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET(VCN, 0, regUVD_SYS_INT_EN), diff --git a/drivers/gpu/drm/amd/amdgpu/vcn_v4_0_5.c b/drivers/gpu/drm/amd/amdgpu/vcn_v4_0_5.c new file mode 100644 index 000000000000..2eda30e78f61 --- /dev/null +++ b/drivers/gpu/drm/amd/amdgpu/vcn_v4_0_5.c @@ -0,0 +1,1779 @@ +/* + * Copyright 2023 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + */ + +#include <linux/firmware.h> +#include "amdgpu.h" +#include "amdgpu_vcn.h" +#include "amdgpu_pm.h" +#include "amdgpu_cs.h" +#include "soc15.h" +#include "soc15d.h" +#include "soc15_hw_ip.h" +#include "vcn_v2_0.h" +#include "mmsch_v4_0.h" +#include "vcn_v4_0_5.h" + +#include "vcn/vcn_4_0_5_offset.h" +#include "vcn/vcn_4_0_5_sh_mask.h" +#include "ivsrcid/vcn/irqsrcs_vcn_4_0.h" + +#include <drm/drm_drv.h> + +#define mmUVD_DPG_LMA_CTL regUVD_DPG_LMA_CTL +#define mmUVD_DPG_LMA_CTL_BASE_IDX regUVD_DPG_LMA_CTL_BASE_IDX +#define mmUVD_DPG_LMA_DATA regUVD_DPG_LMA_DATA +#define mmUVD_DPG_LMA_DATA_BASE_IDX regUVD_DPG_LMA_DATA_BASE_IDX + +#define VCN_VID_SOC_ADDRESS_2_0 0x1fb00 +#define VCN1_VID_SOC_ADDRESS_3_0 0x48300 + +#define VCN_HARVEST_MMSCH 0 + +#define RDECODE_MSG_CREATE 0x00000000 +#define RDECODE_MESSAGE_CREATE 0x00000001 + +static int amdgpu_ih_clientid_vcns[] = { + SOC15_IH_CLIENTID_VCN, + SOC15_IH_CLIENTID_VCN1 +}; + +static void vcn_v4_0_5_set_unified_ring_funcs(struct amdgpu_device *adev); +static void vcn_v4_0_5_set_irq_funcs(struct amdgpu_device *adev); +static int vcn_v4_0_5_set_powergating_state(void *handle, + enum amd_powergating_state state); +static int vcn_v4_0_5_pause_dpg_mode(struct amdgpu_device *adev, + int inst_idx, struct dpg_pause_state *new_state); +static void vcn_v4_0_5_unified_ring_set_wptr(struct amdgpu_ring *ring); + +/** + * vcn_v4_0_5_early_init - set function pointers and load microcode + * + * @handle: amdgpu_device pointer + * + * Set ring and irq function pointers + * Load microcode from filesystem + */ +static int vcn_v4_0_5_early_init(void *handle) +{ + struct amdgpu_device *adev = (struct amdgpu_device *)handle; + + /* re-use enc ring as unified ring */ + adev->vcn.num_enc_rings = 1; + vcn_v4_0_5_set_unified_ring_funcs(adev); + vcn_v4_0_5_set_irq_funcs(adev); + + return amdgpu_vcn_early_init(adev); +} + +/** + * vcn_v4_0_5_sw_init - sw init for VCN block + * + * @handle: amdgpu_device pointer + * + * Load firmware and sw initialization + */ +static int vcn_v4_0_5_sw_init(void *handle) +{ + struct amdgpu_ring *ring; + struct amdgpu_device *adev = (struct amdgpu_device *)handle; + int i, r; + + r = amdgpu_vcn_sw_init(adev); + if (r) + return r; + + amdgpu_vcn_setup_ucode(adev); + + r = amdgpu_vcn_resume(adev); + if (r) + return r; + + for (i = 0; i < adev->vcn.num_vcn_inst; i++) { + volatile struct amdgpu_vcn4_fw_shared *fw_shared; + + if (adev->vcn.harvest_config & (1 << i)) + continue; + + atomic_set(&adev->vcn.inst[i].sched_score, 0); + + /* VCN UNIFIED TRAP */ + r = amdgpu_irq_add_id(adev, amdgpu_ih_clientid_vcns[i], + VCN_4_0__SRCID__UVD_ENC_GENERAL_PURPOSE, &adev->vcn.inst[i].irq); + if (r) + return r; + + /* VCN POISON TRAP */ + r = amdgpu_irq_add_id(adev, amdgpu_ih_clientid_vcns[i], + VCN_4_0__SRCID_UVD_POISON, &adev->vcn.inst[i].irq); + if (r) + return r; + + ring = &adev->vcn.inst[i].ring_enc[0]; + ring->use_doorbell = true; + if (amdgpu_sriov_vf(adev)) + ring->doorbell_index = (adev->doorbell_index.vcn.vcn_ring0_1 << 1) + + i * (adev->vcn.num_enc_rings + 1) + 1; + else + ring->doorbell_index = (adev->doorbell_index.vcn.vcn_ring0_1 << 1) + + 2 + 8 * i; + ring->vm_hub = AMDGPU_MMHUB0(0); + sprintf(ring->name, "vcn_unified_%d", i); + + r = amdgpu_ring_init(adev, ring, 512, &adev->vcn.inst[i].irq, 0, + AMDGPU_RING_PRIO_0, &adev->vcn.inst[i].sched_score); + if (r) + return r; + + fw_shared = adev->vcn.inst[i].fw_shared.cpu_addr; + fw_shared->present_flag_0 = cpu_to_le32(AMDGPU_FW_SHARED_FLAG_0_UNIFIED_QUEUE); + fw_shared->sq.is_enabled = 1; + + fw_shared->present_flag_0 |= cpu_to_le32(AMDGPU_VCN_SMU_DPM_INTERFACE_FLAG); + fw_shared->smu_dpm_interface.smu_interface_type = (adev->flags & AMD_IS_APU) ? + AMDGPU_VCN_SMU_DPM_INTERFACE_APU : AMDGPU_VCN_SMU_DPM_INTERFACE_DGPU; + + if (amdgpu_sriov_vf(adev)) + fw_shared->present_flag_0 |= cpu_to_le32(AMDGPU_VCN_VF_RB_SETUP_FLAG); + + if (amdgpu_vcnfw_log) + amdgpu_vcn_fwlog_init(&adev->vcn.inst[i]); + } + + if (amdgpu_sriov_vf(adev)) { + r = amdgpu_virt_alloc_mm_table(adev); + if (r) + return r; + } + + if (adev->pg_flags & AMD_PG_SUPPORT_VCN_DPG) + adev->vcn.pause_dpg_mode = vcn_v4_0_5_pause_dpg_mode; + + return 0; +} + +/** + * vcn_v4_0_5_sw_fini - sw fini for VCN block + * + * @handle: amdgpu_device pointer + * + * VCN suspend and free up sw allocation + */ +static int vcn_v4_0_5_sw_fini(void *handle) +{ + struct amdgpu_device *adev = (struct amdgpu_device *)handle; + int i, r, idx; + + if (drm_dev_enter(adev_to_drm(adev), &idx)) { + for (i = 0; i < adev->vcn.num_vcn_inst; i++) { + volatile struct amdgpu_vcn4_fw_shared *fw_shared; + + if (adev->vcn.harvest_config & (1 << i)) + continue; + + fw_shared = adev->vcn.inst[i].fw_shared.cpu_addr; + fw_shared->present_flag_0 = 0; + fw_shared->sq.is_enabled = 0; + } + + drm_dev_exit(idx); + } + + if (amdgpu_sriov_vf(adev)) + amdgpu_virt_free_mm_table(adev); + + r = amdgpu_vcn_suspend(adev); + if (r) + return r; + + r = amdgpu_vcn_sw_fini(adev); + + return r; +} + +/** + * vcn_v4_0_5_hw_init - start and test VCN block + * + * @handle: amdgpu_device pointer + * + * Initialize the hardware, boot up the VCPU and do some testing + */ +static int vcn_v4_0_5_hw_init(void *handle) +{ + struct amdgpu_device *adev = (struct amdgpu_device *)handle; + struct amdgpu_ring *ring; + int i, r; + + for (i = 0; i < adev->vcn.num_vcn_inst; ++i) { + if (adev->vcn.harvest_config & (1 << i)) + continue; + + ring = &adev->vcn.inst[i].ring_enc[0]; + + adev->nbio.funcs->vcn_doorbell_range(adev, ring->use_doorbell, + ((adev->doorbell_index.vcn.vcn_ring0_1 << 1) + 8 * i), i); + + r = amdgpu_ring_test_helper(ring); + if (r) + goto done; + } + +done: + if (!r) + DRM_INFO("VCN decode and encode initialized successfully(under %s).\n", + (adev->pg_flags & AMD_PG_SUPPORT_VCN_DPG)?"DPG Mode":"SPG Mode"); + + return r; +} + +/** + * vcn_v4_0_5_hw_fini - stop the hardware block + * + * @handle: amdgpu_device pointer + * + * Stop the VCN block, mark ring as not ready any more + */ +static int vcn_v4_0_5_hw_fini(void *handle) +{ + struct amdgpu_device *adev = (struct amdgpu_device *)handle; + int i; + + cancel_delayed_work_sync(&adev->vcn.idle_work); + + for (i = 0; i < adev->vcn.num_vcn_inst; ++i) { + if (adev->vcn.harvest_config & (1 << i)) + continue; + if (!amdgpu_sriov_vf(adev)) { + if ((adev->pg_flags & AMD_PG_SUPPORT_VCN_DPG) || + (adev->vcn.cur_state != AMD_PG_STATE_GATE && + RREG32_SOC15(VCN, i, regUVD_STATUS))) { + vcn_v4_0_5_set_powergating_state(adev, AMD_PG_STATE_GATE); + } + } + + amdgpu_irq_put(adev, &adev->vcn.inst[i].irq, 0); + } + + return 0; +} + +/** + * vcn_v4_0_5_suspend - suspend VCN block + * + * @handle: amdgpu_device pointer + * + * HW fini and suspend VCN block + */ +static int vcn_v4_0_5_suspend(void *handle) +{ + int r; + struct amdgpu_device *adev = (struct amdgpu_device *)handle; + + r = vcn_v4_0_5_hw_fini(adev); + if (r) + return r; + + r = amdgpu_vcn_suspend(adev); + + return r; +} + +/** + * vcn_v4_0_5_resume - resume VCN block + * + * @handle: amdgpu_device pointer + * + * Resume firmware and hw init VCN block + */ +static int vcn_v4_0_5_resume(void *handle) +{ + int r; + struct amdgpu_device *adev = (struct amdgpu_device *)handle; + + r = amdgpu_vcn_resume(adev); + if (r) + return r; + + r = vcn_v4_0_5_hw_init(adev); + + return r; +} + +/** + * vcn_v4_0_5_mc_resume - memory controller programming + * + * @adev: amdgpu_device pointer + * @inst: instance number + * + * Let the VCN memory controller know it's offsets + */ +static void vcn_v4_0_5_mc_resume(struct amdgpu_device *adev, int inst) +{ + uint32_t offset, size; + const struct common_firmware_header *hdr; + + hdr = (const struct common_firmware_header *)adev->vcn.fw->data; + size = AMDGPU_GPU_PAGE_ALIGN(le32_to_cpu(hdr->ucode_size_bytes) + 8); + + /* cache window 0: fw */ + if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) { + WREG32_SOC15(VCN, inst, regUVD_LMI_VCPU_CACHE_64BIT_BAR_LOW, + (adev->firmware.ucode[AMDGPU_UCODE_ID_VCN + inst].tmr_mc_addr_lo)); + WREG32_SOC15(VCN, inst, regUVD_LMI_VCPU_CACHE_64BIT_BAR_HIGH, + (adev->firmware.ucode[AMDGPU_UCODE_ID_VCN + inst].tmr_mc_addr_hi)); + WREG32_SOC15(VCN, inst, regUVD_VCPU_CACHE_OFFSET0, 0); + offset = 0; + } else { + WREG32_SOC15(VCN, inst, regUVD_LMI_VCPU_CACHE_64BIT_BAR_LOW, + lower_32_bits(adev->vcn.inst[inst].gpu_addr)); + WREG32_SOC15(VCN, inst, regUVD_LMI_VCPU_CACHE_64BIT_BAR_HIGH, + upper_32_bits(adev->vcn.inst[inst].gpu_addr)); + offset = size; + WREG32_SOC15(VCN, inst, regUVD_VCPU_CACHE_OFFSET0, AMDGPU_UVD_FIRMWARE_OFFSET >> 3); + } + WREG32_SOC15(VCN, inst, regUVD_VCPU_CACHE_SIZE0, size); + + /* cache window 1: stack */ + WREG32_SOC15(VCN, inst, regUVD_LMI_VCPU_CACHE1_64BIT_BAR_LOW, + lower_32_bits(adev->vcn.inst[inst].gpu_addr + offset)); + WREG32_SOC15(VCN, inst, regUVD_LMI_VCPU_CACHE1_64BIT_BAR_HIGH, + upper_32_bits(adev->vcn.inst[inst].gpu_addr + offset)); + WREG32_SOC15(VCN, inst, regUVD_VCPU_CACHE_OFFSET1, 0); + WREG32_SOC15(VCN, inst, regUVD_VCPU_CACHE_SIZE1, AMDGPU_VCN_STACK_SIZE); + + /* cache window 2: context */ + WREG32_SOC15(VCN, inst, regUVD_LMI_VCPU_CACHE2_64BIT_BAR_LOW, + lower_32_bits(adev->vcn.inst[inst].gpu_addr + offset + AMDGPU_VCN_STACK_SIZE)); + WREG32_SOC15(VCN, inst, regUVD_LMI_VCPU_CACHE2_64BIT_BAR_HIGH, + upper_32_bits(adev->vcn.inst[inst].gpu_addr + offset + AMDGPU_VCN_STACK_SIZE)); + WREG32_SOC15(VCN, inst, regUVD_VCPU_CACHE_OFFSET2, 0); + WREG32_SOC15(VCN, inst, regUVD_VCPU_CACHE_SIZE2, AMDGPU_VCN_CONTEXT_SIZE); + + /* non-cache window */ + WREG32_SOC15(VCN, inst, regUVD_LMI_VCPU_NC0_64BIT_BAR_LOW, + lower_32_bits(adev->vcn.inst[inst].fw_shared.gpu_addr)); + WREG32_SOC15(VCN, inst, regUVD_LMI_VCPU_NC0_64BIT_BAR_HIGH, + upper_32_bits(adev->vcn.inst[inst].fw_shared.gpu_addr)); + WREG32_SOC15(VCN, inst, regUVD_VCPU_NONCACHE_OFFSET0, 0); + WREG32_SOC15(VCN, inst, regUVD_VCPU_NONCACHE_SIZE0, + AMDGPU_GPU_PAGE_ALIGN(sizeof(struct amdgpu_vcn4_fw_shared))); +} + +/** + * vcn_v4_0_5_mc_resume_dpg_mode - memory controller programming for dpg mode + * + * @adev: amdgpu_device pointer + * @inst_idx: instance number index + * @indirect: indirectly write sram + * + * Let the VCN memory controller know it's offsets with dpg mode + */ +static void vcn_v4_0_5_mc_resume_dpg_mode(struct amdgpu_device *adev, int inst_idx, bool indirect) +{ + uint32_t offset, size; + const struct common_firmware_header *hdr; + + hdr = (const struct common_firmware_header *)adev->vcn.fw->data; + size = AMDGPU_GPU_PAGE_ALIGN(le32_to_cpu(hdr->ucode_size_bytes) + 8); + + /* cache window 0: fw */ + if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) { + if (!indirect) { + WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET( + VCN, inst_idx, regUVD_LMI_VCPU_CACHE_64BIT_BAR_LOW), + (adev->firmware.ucode[AMDGPU_UCODE_ID_VCN + inst_idx].tmr_mc_addr_lo), + 0, indirect); + WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET( + VCN, inst_idx, regUVD_LMI_VCPU_CACHE_64BIT_BAR_HIGH), + (adev->firmware.ucode[AMDGPU_UCODE_ID_VCN + inst_idx].tmr_mc_addr_hi), + 0, indirect); + WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET( + VCN, inst_idx, regUVD_VCPU_CACHE_OFFSET0), 0, 0, indirect); + } else { + WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET( + VCN, inst_idx, regUVD_LMI_VCPU_CACHE_64BIT_BAR_LOW), 0, 0, indirect); + WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET( + VCN, inst_idx, regUVD_LMI_VCPU_CACHE_64BIT_BAR_HIGH), 0, 0, indirect); + WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET( + VCN, inst_idx, regUVD_VCPU_CACHE_OFFSET0), 0, 0, indirect); + } + offset = 0; + } else { + WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET( + VCN, inst_idx, regUVD_LMI_VCPU_CACHE_64BIT_BAR_LOW), + lower_32_bits(adev->vcn.inst[inst_idx].gpu_addr), 0, indirect); + WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET( + VCN, inst_idx, regUVD_LMI_VCPU_CACHE_64BIT_BAR_HIGH), + upper_32_bits(adev->vcn.inst[inst_idx].gpu_addr), 0, indirect); + offset = size; + WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET( + VCN, inst_idx, regUVD_VCPU_CACHE_OFFSET0), + AMDGPU_UVD_FIRMWARE_OFFSET >> 3, 0, indirect); + } + + if (!indirect) + WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET( + VCN, inst_idx, regUVD_VCPU_CACHE_SIZE0), size, 0, indirect); + else + WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET( + VCN, inst_idx, regUVD_VCPU_CACHE_SIZE0), 0, 0, indirect); + + /* cache window 1: stack */ + if (!indirect) { + WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET( + VCN, inst_idx, regUVD_LMI_VCPU_CACHE1_64BIT_BAR_LOW), + lower_32_bits(adev->vcn.inst[inst_idx].gpu_addr + offset), 0, indirect); + WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET( + VCN, inst_idx, regUVD_LMI_VCPU_CACHE1_64BIT_BAR_HIGH), + upper_32_bits(adev->vcn.inst[inst_idx].gpu_addr + offset), 0, indirect); + WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET( + VCN, inst_idx, regUVD_VCPU_CACHE_OFFSET1), 0, 0, indirect); + } else { + WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET( + VCN, inst_idx, regUVD_LMI_VCPU_CACHE1_64BIT_BAR_LOW), 0, 0, indirect); + WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET( + VCN, inst_idx, regUVD_LMI_VCPU_CACHE1_64BIT_BAR_HIGH), 0, 0, indirect); + WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET( + VCN, inst_idx, regUVD_VCPU_CACHE_OFFSET1), 0, 0, indirect); + } + + WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET( + VCN, inst_idx, regUVD_VCPU_CACHE_SIZE1), AMDGPU_VCN_STACK_SIZE, 0, indirect); + + /* cache window 2: context */ + WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET( + VCN, inst_idx, regUVD_LMI_VCPU_CACHE2_64BIT_BAR_LOW), + lower_32_bits(adev->vcn.inst[inst_idx].gpu_addr + offset + AMDGPU_VCN_STACK_SIZE), + 0, indirect); + WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET( + VCN, inst_idx, regUVD_LMI_VCPU_CACHE2_64BIT_BAR_HIGH), + upper_32_bits(adev->vcn.inst[inst_idx].gpu_addr + offset + AMDGPU_VCN_STACK_SIZE), + 0, indirect); + WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET( + VCN, inst_idx, regUVD_VCPU_CACHE_OFFSET2), 0, 0, indirect); + WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET( + VCN, inst_idx, regUVD_VCPU_CACHE_SIZE2), AMDGPU_VCN_CONTEXT_SIZE, 0, indirect); + + /* non-cache window */ + WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET( + VCN, inst_idx, regUVD_LMI_VCPU_NC0_64BIT_BAR_LOW), + lower_32_bits(adev->vcn.inst[inst_idx].fw_shared.gpu_addr), 0, indirect); + WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET( + VCN, inst_idx, regUVD_LMI_VCPU_NC0_64BIT_BAR_HIGH), + upper_32_bits(adev->vcn.inst[inst_idx].fw_shared.gpu_addr), 0, indirect); + WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET( + VCN, inst_idx, regUVD_VCPU_NONCACHE_OFFSET0), 0, 0, indirect); + WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET( + VCN, inst_idx, regUVD_VCPU_NONCACHE_SIZE0), + AMDGPU_GPU_PAGE_ALIGN(sizeof(struct amdgpu_vcn4_fw_shared)), 0, indirect); + + /* VCN global tiling registers */ + WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET( + VCN, 0, regUVD_GFX10_ADDR_CONFIG), adev->gfx.config.gb_addr_config, 0, indirect); +} + +/** + * vcn_v4_0_5_disable_static_power_gating - disable VCN static power gating + * + * @adev: amdgpu_device pointer + * @inst: instance number + * + * Disable static power gating for VCN block + */ +static void vcn_v4_0_5_disable_static_power_gating(struct amdgpu_device *adev, int inst) +{ + uint32_t data = 0; + + if (adev->pg_flags & AMD_PG_SUPPORT_VCN) { + WREG32_SOC15(VCN, inst, regUVD_IPX_DLDO_CONFIG, + 1 << UVD_IPX_DLDO_CONFIG__ONO2_PWR_CONFIG__SHIFT); + SOC15_WAIT_ON_RREG(VCN, inst, regUVD_IPX_DLDO_STATUS, 0, + UVD_IPX_DLDO_STATUS__ONO2_PWR_STATUS_MASK); + WREG32_SOC15(VCN, inst, regUVD_IPX_DLDO_CONFIG, + 2 << UVD_IPX_DLDO_CONFIG__ONO3_PWR_CONFIG__SHIFT); + SOC15_WAIT_ON_RREG(VCN, inst, regUVD_IPX_DLDO_STATUS, + 1 << UVD_IPX_DLDO_STATUS__ONO3_PWR_STATUS__SHIFT, + UVD_IPX_DLDO_STATUS__ONO3_PWR_STATUS_MASK); + WREG32_SOC15(VCN, inst, regUVD_IPX_DLDO_CONFIG, + 2 << UVD_IPX_DLDO_CONFIG__ONO4_PWR_CONFIG__SHIFT); + SOC15_WAIT_ON_RREG(VCN, inst, regUVD_IPX_DLDO_STATUS, + 1 << UVD_IPX_DLDO_STATUS__ONO4_PWR_STATUS__SHIFT, + UVD_IPX_DLDO_STATUS__ONO4_PWR_STATUS_MASK); + WREG32_SOC15(VCN, inst, regUVD_IPX_DLDO_CONFIG, + 2 << UVD_IPX_DLDO_CONFIG__ONO5_PWR_CONFIG__SHIFT); + SOC15_WAIT_ON_RREG(VCN, inst, regUVD_IPX_DLDO_STATUS, + 1 << UVD_IPX_DLDO_STATUS__ONO5_PWR_STATUS__SHIFT, + UVD_IPX_DLDO_STATUS__ONO5_PWR_STATUS_MASK); + } else { + WREG32_SOC15(VCN, inst, regUVD_IPX_DLDO_CONFIG, + 1 << UVD_IPX_DLDO_CONFIG__ONO2_PWR_CONFIG__SHIFT); + SOC15_WAIT_ON_RREG(VCN, inst, regUVD_IPX_DLDO_STATUS, + 0, UVD_IPX_DLDO_STATUS__ONO2_PWR_STATUS_MASK); + WREG32_SOC15(VCN, inst, regUVD_IPX_DLDO_CONFIG, + 1 << UVD_IPX_DLDO_CONFIG__ONO3_PWR_CONFIG__SHIFT); + SOC15_WAIT_ON_RREG(VCN, inst, regUVD_IPX_DLDO_STATUS, + 0, UVD_IPX_DLDO_STATUS__ONO3_PWR_STATUS_MASK); + WREG32_SOC15(VCN, inst, regUVD_IPX_DLDO_CONFIG, + 1 << UVD_IPX_DLDO_CONFIG__ONO4_PWR_CONFIG__SHIFT); + SOC15_WAIT_ON_RREG(VCN, inst, regUVD_IPX_DLDO_STATUS, + 0, UVD_IPX_DLDO_STATUS__ONO4_PWR_STATUS_MASK); + WREG32_SOC15(VCN, inst, regUVD_IPX_DLDO_CONFIG, + 1 << UVD_IPX_DLDO_CONFIG__ONO5_PWR_CONFIG__SHIFT); + SOC15_WAIT_ON_RREG(VCN, inst, regUVD_IPX_DLDO_STATUS, + 0, UVD_IPX_DLDO_STATUS__ONO5_PWR_STATUS_MASK); + } + + data = RREG32_SOC15(VCN, inst, regUVD_POWER_STATUS); + data &= ~0x103; + if (adev->pg_flags & AMD_PG_SUPPORT_VCN) + data |= UVD_PGFSM_CONFIG__UVDM_UVDU_PWR_ON | + UVD_POWER_STATUS__UVD_PG_EN_MASK; + WREG32_SOC15(VCN, inst, regUVD_POWER_STATUS, data); +} + +/** + * vcn_v4_0_5_enable_static_power_gating - enable VCN static power gating + * + * @adev: amdgpu_device pointer + * @inst: instance number + * + * Enable static power gating for VCN block + */ +static void vcn_v4_0_5_enable_static_power_gating(struct amdgpu_device *adev, int inst) +{ + uint32_t data; + + if (adev->pg_flags & AMD_PG_SUPPORT_VCN) { + /* Before power off, this indicator has to be turned on */ + data = RREG32_SOC15(VCN, inst, regUVD_POWER_STATUS); + data &= ~UVD_POWER_STATUS__UVD_POWER_STATUS_MASK; + data |= UVD_POWER_STATUS__UVD_POWER_STATUS_TILES_OFF; + WREG32_SOC15(VCN, inst, regUVD_POWER_STATUS, data); + + WREG32_SOC15(VCN, inst, regUVD_IPX_DLDO_CONFIG, + 2 << UVD_IPX_DLDO_CONFIG__ONO5_PWR_CONFIG__SHIFT); + SOC15_WAIT_ON_RREG(VCN, inst, regUVD_IPX_DLDO_STATUS, + 1 << UVD_IPX_DLDO_STATUS__ONO5_PWR_STATUS__SHIFT, + UVD_IPX_DLDO_STATUS__ONO5_PWR_STATUS_MASK); + WREG32_SOC15(VCN, inst, regUVD_IPX_DLDO_CONFIG, + 2 << UVD_IPX_DLDO_CONFIG__ONO4_PWR_CONFIG__SHIFT); + SOC15_WAIT_ON_RREG(VCN, inst, regUVD_IPX_DLDO_STATUS, + 1 << UVD_IPX_DLDO_STATUS__ONO4_PWR_STATUS__SHIFT, + UVD_IPX_DLDO_STATUS__ONO4_PWR_STATUS_MASK); + WREG32_SOC15(VCN, inst, regUVD_IPX_DLDO_CONFIG, + 2 << UVD_IPX_DLDO_CONFIG__ONO3_PWR_CONFIG__SHIFT); + SOC15_WAIT_ON_RREG(VCN, inst, regUVD_IPX_DLDO_STATUS, + 1 << UVD_IPX_DLDO_STATUS__ONO3_PWR_STATUS__SHIFT, + UVD_IPX_DLDO_STATUS__ONO3_PWR_STATUS_MASK); + WREG32_SOC15(VCN, inst, regUVD_IPX_DLDO_CONFIG, + 2 << UVD_IPX_DLDO_CONFIG__ONO2_PWR_CONFIG__SHIFT); + SOC15_WAIT_ON_RREG(VCN, inst, regUVD_IPX_DLDO_STATUS, + 1 << UVD_IPX_DLDO_STATUS__ONO2_PWR_STATUS__SHIFT, + UVD_IPX_DLDO_STATUS__ONO2_PWR_STATUS_MASK); + } +} + +/** + * vcn_v4_0_5_disable_clock_gating - disable VCN clock gating + * + * @adev: amdgpu_device pointer + * @inst: instance number + * + * Disable clock gating for VCN block + */ +static void vcn_v4_0_5_disable_clock_gating(struct amdgpu_device *adev, int inst) +{ + uint32_t data; + + if (adev->cg_flags & AMD_CG_SUPPORT_VCN_MGCG) + return; + + /* VCN disable CGC */ + data = RREG32_SOC15(VCN, inst, regUVD_CGC_CTRL); + data &= ~UVD_CGC_CTRL__DYN_CLOCK_MODE_MASK; + data |= 1 << UVD_CGC_CTRL__CLK_GATE_DLY_TIMER__SHIFT; + data |= 4 << UVD_CGC_CTRL__CLK_OFF_DELAY__SHIFT; + WREG32_SOC15(VCN, inst, regUVD_CGC_CTRL, data); + + data = RREG32_SOC15(VCN, inst, regUVD_CGC_GATE); + data &= ~(UVD_CGC_GATE__SYS_MASK + | UVD_CGC_GATE__UDEC_MASK + | UVD_CGC_GATE__MPEG2_MASK + | UVD_CGC_GATE__REGS_MASK + | UVD_CGC_GATE__RBC_MASK + | UVD_CGC_GATE__LMI_MC_MASK + | UVD_CGC_GATE__LMI_UMC_MASK + | UVD_CGC_GATE__IDCT_MASK + | UVD_CGC_GATE__MPRD_MASK + | UVD_CGC_GATE__MPC_MASK + | UVD_CGC_GATE__LBSI_MASK + | UVD_CGC_GATE__LRBBM_MASK + | UVD_CGC_GATE__UDEC_RE_MASK + | UVD_CGC_GATE__UDEC_CM_MASK + | UVD_CGC_GATE__UDEC_IT_MASK + | UVD_CGC_GATE__UDEC_DB_MASK + | UVD_CGC_GATE__UDEC_MP_MASK + | UVD_CGC_GATE__WCB_MASK + | UVD_CGC_GATE__VCPU_MASK + | UVD_CGC_GATE__MMSCH_MASK); + + WREG32_SOC15(VCN, inst, regUVD_CGC_GATE, data); + SOC15_WAIT_ON_RREG(VCN, inst, regUVD_CGC_GATE, 0, 0xFFFFFFFF); + + data = RREG32_SOC15(VCN, inst, regUVD_CGC_CTRL); + data &= ~(UVD_CGC_CTRL__UDEC_RE_MODE_MASK + | UVD_CGC_CTRL__UDEC_CM_MODE_MASK + | UVD_CGC_CTRL__UDEC_IT_MODE_MASK + | UVD_CGC_CTRL__UDEC_DB_MODE_MASK + | UVD_CGC_CTRL__UDEC_MP_MODE_MASK + | UVD_CGC_CTRL__SYS_MODE_MASK + | UVD_CGC_CTRL__UDEC_MODE_MASK + | UVD_CGC_CTRL__MPEG2_MODE_MASK + | UVD_CGC_CTRL__REGS_MODE_MASK + | UVD_CGC_CTRL__RBC_MODE_MASK + | UVD_CGC_CTRL__LMI_MC_MODE_MASK + | UVD_CGC_CTRL__LMI_UMC_MODE_MASK + | UVD_CGC_CTRL__IDCT_MODE_MASK + | UVD_CGC_CTRL__MPRD_MODE_MASK + | UVD_CGC_CTRL__MPC_MODE_MASK + | UVD_CGC_CTRL__LBSI_MODE_MASK + | UVD_CGC_CTRL__LRBBM_MODE_MASK + | UVD_CGC_CTRL__WCB_MODE_MASK + | UVD_CGC_CTRL__VCPU_MODE_MASK + | UVD_CGC_CTRL__MMSCH_MODE_MASK); + WREG32_SOC15(VCN, inst, regUVD_CGC_CTRL, data); + + data = RREG32_SOC15(VCN, inst, regUVD_SUVD_CGC_GATE); + data |= (UVD_SUVD_CGC_GATE__SRE_MASK + | UVD_SUVD_CGC_GATE__SIT_MASK + | UVD_SUVD_CGC_GATE__SMP_MASK + | UVD_SUVD_CGC_GATE__SCM_MASK + | UVD_SUVD_CGC_GATE__SDB_MASK + | UVD_SUVD_CGC_GATE__SRE_H264_MASK + | UVD_SUVD_CGC_GATE__SRE_HEVC_MASK + | UVD_SUVD_CGC_GATE__SIT_H264_MASK + | UVD_SUVD_CGC_GATE__SIT_HEVC_MASK + | UVD_SUVD_CGC_GATE__SCM_H264_MASK + | UVD_SUVD_CGC_GATE__SCM_HEVC_MASK + | UVD_SUVD_CGC_GATE__SDB_H264_MASK + | UVD_SUVD_CGC_GATE__SDB_HEVC_MASK + | UVD_SUVD_CGC_GATE__SCLR_MASK + | UVD_SUVD_CGC_GATE__UVD_SC_MASK + | UVD_SUVD_CGC_GATE__ENT_MASK + | UVD_SUVD_CGC_GATE__SIT_HEVC_DEC_MASK + | UVD_SUVD_CGC_GATE__SIT_HEVC_ENC_MASK + | UVD_SUVD_CGC_GATE__SITE_MASK + | UVD_SUVD_CGC_GATE__SRE_VP9_MASK + | UVD_SUVD_CGC_GATE__SCM_VP9_MASK + | UVD_SUVD_CGC_GATE__SIT_VP9_DEC_MASK + | UVD_SUVD_CGC_GATE__SDB_VP9_MASK + | UVD_SUVD_CGC_GATE__IME_HEVC_MASK); + WREG32_SOC15(VCN, inst, regUVD_SUVD_CGC_GATE, data); + + data = RREG32_SOC15(VCN, inst, regUVD_SUVD_CGC_CTRL); + data &= ~(UVD_SUVD_CGC_CTRL__SRE_MODE_MASK + | UVD_SUVD_CGC_CTRL__SIT_MODE_MASK + | UVD_SUVD_CGC_CTRL__SMP_MODE_MASK + | UVD_SUVD_CGC_CTRL__SCM_MODE_MASK + | UVD_SUVD_CGC_CTRL__SDB_MODE_MASK + | UVD_SUVD_CGC_CTRL__SCLR_MODE_MASK + | UVD_SUVD_CGC_CTRL__UVD_SC_MODE_MASK + | UVD_SUVD_CGC_CTRL__ENT_MODE_MASK + | UVD_SUVD_CGC_CTRL__IME_MODE_MASK + | UVD_SUVD_CGC_CTRL__SITE_MODE_MASK); + WREG32_SOC15(VCN, inst, regUVD_SUVD_CGC_CTRL, data); +} + +/** + * vcn_v4_0_5_disable_clock_gating_dpg_mode - disable VCN clock gating dpg mode + * + * @adev: amdgpu_device pointer + * @sram_sel: sram select + * @inst_idx: instance number index + * @indirect: indirectly write sram + * + * Disable clock gating for VCN block with dpg mode + */ +static void vcn_v4_0_5_disable_clock_gating_dpg_mode(struct amdgpu_device *adev, uint8_t sram_sel, + int inst_idx, uint8_t indirect) +{ + uint32_t reg_data = 0; + + if (adev->cg_flags & AMD_CG_SUPPORT_VCN_MGCG) + return; + + /* enable sw clock gating control */ + reg_data = 0 << UVD_CGC_CTRL__DYN_CLOCK_MODE__SHIFT; + reg_data |= 1 << UVD_CGC_CTRL__CLK_GATE_DLY_TIMER__SHIFT; + reg_data |= 4 << UVD_CGC_CTRL__CLK_OFF_DELAY__SHIFT; + reg_data &= ~(UVD_CGC_CTRL__UDEC_RE_MODE_MASK | + UVD_CGC_CTRL__UDEC_CM_MODE_MASK | + UVD_CGC_CTRL__UDEC_IT_MODE_MASK | + UVD_CGC_CTRL__UDEC_DB_MODE_MASK | + UVD_CGC_CTRL__UDEC_MP_MODE_MASK | + UVD_CGC_CTRL__SYS_MODE_MASK | + UVD_CGC_CTRL__UDEC_MODE_MASK | + UVD_CGC_CTRL__MPEG2_MODE_MASK | + UVD_CGC_CTRL__REGS_MODE_MASK | + UVD_CGC_CTRL__RBC_MODE_MASK | + UVD_CGC_CTRL__LMI_MC_MODE_MASK | + UVD_CGC_CTRL__LMI_UMC_MODE_MASK | + UVD_CGC_CTRL__IDCT_MODE_MASK | + UVD_CGC_CTRL__MPRD_MODE_MASK | + UVD_CGC_CTRL__MPC_MODE_MASK | + UVD_CGC_CTRL__LBSI_MODE_MASK | + UVD_CGC_CTRL__LRBBM_MODE_MASK | + UVD_CGC_CTRL__WCB_MODE_MASK | + UVD_CGC_CTRL__VCPU_MODE_MASK); + WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET( + VCN, inst_idx, regUVD_CGC_CTRL), reg_data, sram_sel, indirect); + + /* turn off clock gating */ + WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET( + VCN, inst_idx, regUVD_CGC_GATE), 0, sram_sel, indirect); + + /* turn on SUVD clock gating */ + WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET( + VCN, inst_idx, regUVD_SUVD_CGC_GATE), 1, sram_sel, indirect); + + /* turn on sw mode in UVD_SUVD_CGC_CTRL */ + WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET( + VCN, inst_idx, regUVD_SUVD_CGC_CTRL), 0, sram_sel, indirect); +} + +/** + * vcn_v4_0_5_enable_clock_gating - enable VCN clock gating + * + * @adev: amdgpu_device pointer + * @inst: instance number + * + * Enable clock gating for VCN block + */ +static void vcn_v4_0_5_enable_clock_gating(struct amdgpu_device *adev, int inst) +{ + uint32_t data; + + if (adev->cg_flags & AMD_CG_SUPPORT_VCN_MGCG) + return; + + /* enable VCN CGC */ + data = RREG32_SOC15(VCN, inst, regUVD_CGC_CTRL); + data |= 0 << UVD_CGC_CTRL__DYN_CLOCK_MODE__SHIFT; + data |= 1 << UVD_CGC_CTRL__CLK_GATE_DLY_TIMER__SHIFT; + data |= 4 << UVD_CGC_CTRL__CLK_OFF_DELAY__SHIFT; + WREG32_SOC15(VCN, inst, regUVD_CGC_CTRL, data); + + data = RREG32_SOC15(VCN, inst, regUVD_CGC_CTRL); + data |= (UVD_CGC_CTRL__UDEC_RE_MODE_MASK + | UVD_CGC_CTRL__UDEC_CM_MODE_MASK + | UVD_CGC_CTRL__UDEC_IT_MODE_MASK + | UVD_CGC_CTRL__UDEC_DB_MODE_MASK + | UVD_CGC_CTRL__UDEC_MP_MODE_MASK + | UVD_CGC_CTRL__SYS_MODE_MASK + | UVD_CGC_CTRL__UDEC_MODE_MASK + | UVD_CGC_CTRL__MPEG2_MODE_MASK + | UVD_CGC_CTRL__REGS_MODE_MASK + | UVD_CGC_CTRL__RBC_MODE_MASK + | UVD_CGC_CTRL__LMI_MC_MODE_MASK + | UVD_CGC_CTRL__LMI_UMC_MODE_MASK + | UVD_CGC_CTRL__IDCT_MODE_MASK + | UVD_CGC_CTRL__MPRD_MODE_MASK + | UVD_CGC_CTRL__MPC_MODE_MASK + | UVD_CGC_CTRL__LBSI_MODE_MASK + | UVD_CGC_CTRL__LRBBM_MODE_MASK + | UVD_CGC_CTRL__WCB_MODE_MASK + | UVD_CGC_CTRL__VCPU_MODE_MASK + | UVD_CGC_CTRL__MMSCH_MODE_MASK); + WREG32_SOC15(VCN, inst, regUVD_CGC_CTRL, data); + + data = RREG32_SOC15(VCN, inst, regUVD_SUVD_CGC_CTRL); + data |= (UVD_SUVD_CGC_CTRL__SRE_MODE_MASK + | UVD_SUVD_CGC_CTRL__SIT_MODE_MASK + | UVD_SUVD_CGC_CTRL__SMP_MODE_MASK + | UVD_SUVD_CGC_CTRL__SCM_MODE_MASK + | UVD_SUVD_CGC_CTRL__SDB_MODE_MASK + | UVD_SUVD_CGC_CTRL__SCLR_MODE_MASK + | UVD_SUVD_CGC_CTRL__UVD_SC_MODE_MASK + | UVD_SUVD_CGC_CTRL__ENT_MODE_MASK + | UVD_SUVD_CGC_CTRL__IME_MODE_MASK + | UVD_SUVD_CGC_CTRL__SITE_MODE_MASK); + WREG32_SOC15(VCN, inst, regUVD_SUVD_CGC_CTRL, data); +} + +/** + * vcn_v4_0_5_start_dpg_mode - VCN start with dpg mode + * + * @adev: amdgpu_device pointer + * @inst_idx: instance number index + * @indirect: indirectly write sram + * + * Start VCN block with dpg mode + */ +static int vcn_v4_0_5_start_dpg_mode(struct amdgpu_device *adev, int inst_idx, bool indirect) +{ + volatile struct amdgpu_vcn4_fw_shared *fw_shared = adev->vcn.inst[inst_idx].fw_shared.cpu_addr; + struct amdgpu_ring *ring; + uint32_t tmp; + + /* disable register anti-hang mechanism */ + WREG32_P(SOC15_REG_OFFSET(VCN, inst_idx, regUVD_POWER_STATUS), 1, + ~UVD_POWER_STATUS__UVD_POWER_STATUS_MASK); + /* enable dynamic power gating mode */ + tmp = RREG32_SOC15(VCN, inst_idx, regUVD_POWER_STATUS); + tmp |= UVD_POWER_STATUS__UVD_PG_MODE_MASK; + tmp |= UVD_POWER_STATUS__UVD_PG_EN_MASK; + WREG32_SOC15(VCN, inst_idx, regUVD_POWER_STATUS, tmp); + + if (indirect) + adev->vcn.inst[inst_idx].dpg_sram_curr_addr = + (uint32_t *)adev->vcn.inst[inst_idx].dpg_sram_cpu_addr; + + /* enable clock gating */ + vcn_v4_0_5_disable_clock_gating_dpg_mode(adev, 0, inst_idx, indirect); + + /* enable VCPU clock */ + tmp = (0xFF << UVD_VCPU_CNTL__PRB_TIMEOUT_VAL__SHIFT); + tmp |= UVD_VCPU_CNTL__CLK_EN_MASK | UVD_VCPU_CNTL__BLK_RST_MASK; + WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET( + VCN, inst_idx, regUVD_VCPU_CNTL), tmp, 0, indirect); + + /* disable master interrupt */ + WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET( + VCN, inst_idx, regUVD_MASTINT_EN), 0, 0, indirect); + + /* setup regUVD_LMI_CTRL */ + tmp = (UVD_LMI_CTRL__WRITE_CLEAN_TIMER_EN_MASK | + UVD_LMI_CTRL__REQ_MODE_MASK | + UVD_LMI_CTRL__CRC_RESET_MASK | + UVD_LMI_CTRL__MASK_MC_URGENT_MASK | + UVD_LMI_CTRL__DATA_COHERENCY_EN_MASK | + UVD_LMI_CTRL__VCPU_DATA_COHERENCY_EN_MASK | + (8 << UVD_LMI_CTRL__WRITE_CLEAN_TIMER__SHIFT) | + 0x00100000L); + WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET( + VCN, inst_idx, regUVD_LMI_CTRL), tmp, 0, indirect); + + WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET( + VCN, inst_idx, regUVD_MPC_CNTL), + 0x2 << UVD_MPC_CNTL__REPLACEMENT_MODE__SHIFT, 0, indirect); + + WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET( + VCN, inst_idx, regUVD_MPC_SET_MUXA0), + ((0x1 << UVD_MPC_SET_MUXA0__VARA_1__SHIFT) | + (0x2 << UVD_MPC_SET_MUXA0__VARA_2__SHIFT) | + (0x3 << UVD_MPC_SET_MUXA0__VARA_3__SHIFT) | + (0x4 << UVD_MPC_SET_MUXA0__VARA_4__SHIFT)), 0, indirect); + + WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET( + VCN, inst_idx, regUVD_MPC_SET_MUXB0), + ((0x1 << UVD_MPC_SET_MUXB0__VARB_1__SHIFT) | + (0x2 << UVD_MPC_SET_MUXB0__VARB_2__SHIFT) | + (0x3 << UVD_MPC_SET_MUXB0__VARB_3__SHIFT) | + (0x4 << UVD_MPC_SET_MUXB0__VARB_4__SHIFT)), 0, indirect); + + WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET( + VCN, inst_idx, regUVD_MPC_SET_MUX), + ((0x0 << UVD_MPC_SET_MUX__SET_0__SHIFT) | + (0x1 << UVD_MPC_SET_MUX__SET_1__SHIFT) | + (0x2 << UVD_MPC_SET_MUX__SET_2__SHIFT)), 0, indirect); + + vcn_v4_0_5_mc_resume_dpg_mode(adev, inst_idx, indirect); + + tmp = (0xFF << UVD_VCPU_CNTL__PRB_TIMEOUT_VAL__SHIFT); + tmp |= UVD_VCPU_CNTL__CLK_EN_MASK; + WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET( + VCN, inst_idx, regUVD_VCPU_CNTL), tmp, 0, indirect); + + /* enable LMI MC and UMC channels */ + tmp = 0x1f << UVD_LMI_CTRL2__RE_OFLD_MIF_WR_REQ_NUM__SHIFT; + WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET( + VCN, inst_idx, regUVD_LMI_CTRL2), tmp, 0, indirect); + + /* enable master interrupt */ + WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET( + VCN, inst_idx, regUVD_MASTINT_EN), + UVD_MASTINT_EN__VCPU_EN_MASK, 0, indirect); + + + if (indirect) + amdgpu_vcn_psp_update_sram(adev, inst_idx, 0); + + ring = &adev->vcn.inst[inst_idx].ring_enc[0]; + + WREG32_SOC15(VCN, inst_idx, regUVD_RB_BASE_LO, ring->gpu_addr); + WREG32_SOC15(VCN, inst_idx, regUVD_RB_BASE_HI, upper_32_bits(ring->gpu_addr)); + WREG32_SOC15(VCN, inst_idx, regUVD_RB_SIZE, ring->ring_size / 4); + + tmp = RREG32_SOC15(VCN, inst_idx, regVCN_RB_ENABLE); + tmp &= ~(VCN_RB_ENABLE__RB1_EN_MASK); + WREG32_SOC15(VCN, inst_idx, regVCN_RB_ENABLE, tmp); + fw_shared->sq.queue_mode |= FW_QUEUE_RING_RESET; + WREG32_SOC15(VCN, inst_idx, regUVD_RB_RPTR, 0); + WREG32_SOC15(VCN, inst_idx, regUVD_RB_WPTR, 0); + + tmp = RREG32_SOC15(VCN, inst_idx, regUVD_RB_RPTR); + WREG32_SOC15(VCN, inst_idx, regUVD_RB_WPTR, tmp); + ring->wptr = RREG32_SOC15(VCN, inst_idx, regUVD_RB_WPTR); + + tmp = RREG32_SOC15(VCN, inst_idx, regVCN_RB_ENABLE); + tmp |= VCN_RB_ENABLE__RB1_EN_MASK; + WREG32_SOC15(VCN, inst_idx, regVCN_RB_ENABLE, tmp); + fw_shared->sq.queue_mode &= ~(FW_QUEUE_RING_RESET | FW_QUEUE_DPG_HOLD_OFF); + + WREG32_SOC15(VCN, inst_idx, regVCN_RB1_DB_CTRL, + ring->doorbell_index << VCN_RB1_DB_CTRL__OFFSET__SHIFT | + VCN_RB1_DB_CTRL__EN_MASK); + + return 0; +} + + +/** + * vcn_v4_0_5_start - VCN start + * + * @adev: amdgpu_device pointer + * + * Start VCN block + */ +static int vcn_v4_0_5_start(struct amdgpu_device *adev) +{ + volatile struct amdgpu_vcn4_fw_shared *fw_shared; + struct amdgpu_ring *ring; + uint32_t tmp; + int i, j, k, r; + + if (adev->pm.dpm_enabled) + amdgpu_dpm_enable_uvd(adev, true); + + for (i = 0; i < adev->vcn.num_vcn_inst; ++i) { + fw_shared = adev->vcn.inst[i].fw_shared.cpu_addr; + + if (adev->pg_flags & AMD_PG_SUPPORT_VCN_DPG) { + r = vcn_v4_0_5_start_dpg_mode(adev, i, adev->vcn.indirect_sram); + continue; + } + + /* disable VCN power gating */ + vcn_v4_0_5_disable_static_power_gating(adev, i); + + /* set VCN status busy */ + tmp = RREG32_SOC15(VCN, i, regUVD_STATUS) | UVD_STATUS__UVD_BUSY; + WREG32_SOC15(VCN, i, regUVD_STATUS, tmp); + + /*SW clock gating */ + vcn_v4_0_5_disable_clock_gating(adev, i); + + /* enable VCPU clock */ + WREG32_P(SOC15_REG_OFFSET(VCN, i, regUVD_VCPU_CNTL), + UVD_VCPU_CNTL__CLK_EN_MASK, ~UVD_VCPU_CNTL__CLK_EN_MASK); + + /* disable master interrupt */ + WREG32_P(SOC15_REG_OFFSET(VCN, i, regUVD_MASTINT_EN), 0, + ~UVD_MASTINT_EN__VCPU_EN_MASK); + + /* enable LMI MC and UMC channels */ + WREG32_P(SOC15_REG_OFFSET(VCN, i, regUVD_LMI_CTRL2), 0, + ~UVD_LMI_CTRL2__STALL_ARB_UMC_MASK); + + tmp = RREG32_SOC15(VCN, i, regUVD_SOFT_RESET); + tmp &= ~UVD_SOFT_RESET__LMI_SOFT_RESET_MASK; + tmp &= ~UVD_SOFT_RESET__LMI_UMC_SOFT_RESET_MASK; + WREG32_SOC15(VCN, i, regUVD_SOFT_RESET, tmp); + + /* setup regUVD_LMI_CTRL */ + tmp = RREG32_SOC15(VCN, i, regUVD_LMI_CTRL); + WREG32_SOC15(VCN, i, regUVD_LMI_CTRL, tmp | + UVD_LMI_CTRL__WRITE_CLEAN_TIMER_EN_MASK | + UVD_LMI_CTRL__MASK_MC_URGENT_MASK | + UVD_LMI_CTRL__DATA_COHERENCY_EN_MASK | + UVD_LMI_CTRL__VCPU_DATA_COHERENCY_EN_MASK); + + /* setup regUVD_MPC_CNTL */ + tmp = RREG32_SOC15(VCN, i, regUVD_MPC_CNTL); + tmp &= ~UVD_MPC_CNTL__REPLACEMENT_MODE_MASK; + tmp |= 0x2 << UVD_MPC_CNTL__REPLACEMENT_MODE__SHIFT; + WREG32_SOC15(VCN, i, regUVD_MPC_CNTL, tmp); + + /* setup UVD_MPC_SET_MUXA0 */ + WREG32_SOC15(VCN, i, regUVD_MPC_SET_MUXA0, + ((0x1 << UVD_MPC_SET_MUXA0__VARA_1__SHIFT) | + (0x2 << UVD_MPC_SET_MUXA0__VARA_2__SHIFT) | + (0x3 << UVD_MPC_SET_MUXA0__VARA_3__SHIFT) | + (0x4 << UVD_MPC_SET_MUXA0__VARA_4__SHIFT))); + + /* setup UVD_MPC_SET_MUXB0 */ + WREG32_SOC15(VCN, i, regUVD_MPC_SET_MUXB0, + ((0x1 << UVD_MPC_SET_MUXB0__VARB_1__SHIFT) | + (0x2 << UVD_MPC_SET_MUXB0__VARB_2__SHIFT) | + (0x3 << UVD_MPC_SET_MUXB0__VARB_3__SHIFT) | + (0x4 << UVD_MPC_SET_MUXB0__VARB_4__SHIFT))); + + /* setup UVD_MPC_SET_MUX */ + WREG32_SOC15(VCN, i, regUVD_MPC_SET_MUX, + ((0x0 << UVD_MPC_SET_MUX__SET_0__SHIFT) | + (0x1 << UVD_MPC_SET_MUX__SET_1__SHIFT) | + (0x2 << UVD_MPC_SET_MUX__SET_2__SHIFT))); + + vcn_v4_0_5_mc_resume(adev, i); + + /* VCN global tiling registers */ + WREG32_SOC15(VCN, i, regUVD_GFX10_ADDR_CONFIG, + adev->gfx.config.gb_addr_config); + + /* unblock VCPU register access */ + WREG32_P(SOC15_REG_OFFSET(VCN, i, regUVD_RB_ARB_CTRL), 0, + ~UVD_RB_ARB_CTRL__VCPU_DIS_MASK); + + /* release VCPU reset to boot */ + WREG32_P(SOC15_REG_OFFSET(VCN, i, regUVD_VCPU_CNTL), 0, + ~UVD_VCPU_CNTL__BLK_RST_MASK); + + for (j = 0; j < 10; ++j) { + uint32_t status; + + for (k = 0; k < 100; ++k) { + status = RREG32_SOC15(VCN, i, regUVD_STATUS); + if (status & 2) + break; + mdelay(10); + if (amdgpu_emu_mode == 1) + msleep(1); + } + + if (amdgpu_emu_mode == 1) { + r = -1; + if (status & 2) { + r = 0; + break; + } + } else { + r = 0; + if (status & 2) + break; + + dev_err(adev->dev, + "VCN[%d] is not responding, trying to reset VCPU!!!\n", i); + WREG32_P(SOC15_REG_OFFSET(VCN, i, regUVD_VCPU_CNTL), + UVD_VCPU_CNTL__BLK_RST_MASK, + ~UVD_VCPU_CNTL__BLK_RST_MASK); + mdelay(10); + WREG32_P(SOC15_REG_OFFSET(VCN, i, regUVD_VCPU_CNTL), 0, + ~UVD_VCPU_CNTL__BLK_RST_MASK); + + mdelay(10); + r = -1; + } + } + + if (r) { + dev_err(adev->dev, "VCN[%d] is not responding, giving up!!!\n", i); + return r; + } + + /* enable master interrupt */ + WREG32_P(SOC15_REG_OFFSET(VCN, i, regUVD_MASTINT_EN), + UVD_MASTINT_EN__VCPU_EN_MASK, + ~UVD_MASTINT_EN__VCPU_EN_MASK); + + /* clear the busy bit of VCN_STATUS */ + WREG32_P(SOC15_REG_OFFSET(VCN, i, regUVD_STATUS), 0, + ~(2 << UVD_STATUS__VCPU_REPORT__SHIFT)); + + ring = &adev->vcn.inst[i].ring_enc[0]; + WREG32_SOC15(VCN, i, regVCN_RB1_DB_CTRL, + ring->doorbell_index << VCN_RB1_DB_CTRL__OFFSET__SHIFT | + VCN_RB1_DB_CTRL__EN_MASK); + + WREG32_SOC15(VCN, i, regUVD_RB_BASE_LO, ring->gpu_addr); + WREG32_SOC15(VCN, i, regUVD_RB_BASE_HI, upper_32_bits(ring->gpu_addr)); + WREG32_SOC15(VCN, i, regUVD_RB_SIZE, ring->ring_size / 4); + + tmp = RREG32_SOC15(VCN, i, regVCN_RB_ENABLE); + tmp &= ~(VCN_RB_ENABLE__RB1_EN_MASK); + WREG32_SOC15(VCN, i, regVCN_RB_ENABLE, tmp); + fw_shared->sq.queue_mode |= FW_QUEUE_RING_RESET; + WREG32_SOC15(VCN, i, regUVD_RB_RPTR, 0); + WREG32_SOC15(VCN, i, regUVD_RB_WPTR, 0); + + tmp = RREG32_SOC15(VCN, i, regUVD_RB_RPTR); + WREG32_SOC15(VCN, i, regUVD_RB_WPTR, tmp); + ring->wptr = RREG32_SOC15(VCN, i, regUVD_RB_WPTR); + + tmp = RREG32_SOC15(VCN, i, regVCN_RB_ENABLE); + tmp |= VCN_RB_ENABLE__RB1_EN_MASK; + WREG32_SOC15(VCN, i, regVCN_RB_ENABLE, tmp); + fw_shared->sq.queue_mode &= ~(FW_QUEUE_RING_RESET | FW_QUEUE_DPG_HOLD_OFF); + } + + return 0; +} + +/** + * vcn_v4_0_5_stop_dpg_mode - VCN stop with dpg mode + * + * @adev: amdgpu_device pointer + * @inst_idx: instance number index + * + * Stop VCN block with dpg mode + */ +static void vcn_v4_0_5_stop_dpg_mode(struct amdgpu_device *adev, int inst_idx) +{ + uint32_t tmp; + + /* Wait for power status to be 1 */ + SOC15_WAIT_ON_RREG(VCN, inst_idx, regUVD_POWER_STATUS, 1, + UVD_POWER_STATUS__UVD_POWER_STATUS_MASK); + + /* wait for read ptr to be equal to write ptr */ + tmp = RREG32_SOC15(VCN, inst_idx, regUVD_RB_WPTR); + SOC15_WAIT_ON_RREG(VCN, inst_idx, regUVD_RB_RPTR, tmp, 0xFFFFFFFF); + + SOC15_WAIT_ON_RREG(VCN, inst_idx, regUVD_POWER_STATUS, 1, + UVD_POWER_STATUS__UVD_POWER_STATUS_MASK); + + /* disable dynamic power gating mode */ + WREG32_P(SOC15_REG_OFFSET(VCN, inst_idx, regUVD_POWER_STATUS), 0, + ~UVD_POWER_STATUS__UVD_PG_MODE_MASK); +} + +/** + * vcn_v4_0_5_stop - VCN stop + * + * @adev: amdgpu_device pointer + * + * Stop VCN block + */ +static int vcn_v4_0_5_stop(struct amdgpu_device *adev) +{ + volatile struct amdgpu_vcn4_fw_shared *fw_shared; + uint32_t tmp; + int i, r = 0; + + for (i = 0; i < adev->vcn.num_vcn_inst; ++i) { + fw_shared = adev->vcn.inst[i].fw_shared.cpu_addr; + fw_shared->sq.queue_mode |= FW_QUEUE_DPG_HOLD_OFF; + + if (adev->pg_flags & AMD_PG_SUPPORT_VCN_DPG) { + vcn_v4_0_5_stop_dpg_mode(adev, i); + continue; + } + + /* wait for vcn idle */ + r = SOC15_WAIT_ON_RREG(VCN, i, regUVD_STATUS, UVD_STATUS__IDLE, 0x7); + if (r) + return r; + + tmp = UVD_LMI_STATUS__VCPU_LMI_WRITE_CLEAN_MASK | + UVD_LMI_STATUS__READ_CLEAN_MASK | + UVD_LMI_STATUS__WRITE_CLEAN_MASK | + UVD_LMI_STATUS__WRITE_CLEAN_RAW_MASK; + r = SOC15_WAIT_ON_RREG(VCN, i, regUVD_LMI_STATUS, tmp, tmp); + if (r) + return r; + + /* disable LMI UMC channel */ + tmp = RREG32_SOC15(VCN, i, regUVD_LMI_CTRL2); + tmp |= UVD_LMI_CTRL2__STALL_ARB_UMC_MASK; + WREG32_SOC15(VCN, i, regUVD_LMI_CTRL2, tmp); + tmp = UVD_LMI_STATUS__UMC_READ_CLEAN_RAW_MASK | + UVD_LMI_STATUS__UMC_WRITE_CLEAN_RAW_MASK; + r = SOC15_WAIT_ON_RREG(VCN, i, regUVD_LMI_STATUS, tmp, tmp); + if (r) + return r; + + /* block VCPU register access */ + WREG32_P(SOC15_REG_OFFSET(VCN, i, regUVD_RB_ARB_CTRL), + UVD_RB_ARB_CTRL__VCPU_DIS_MASK, + ~UVD_RB_ARB_CTRL__VCPU_DIS_MASK); + + /* reset VCPU */ + WREG32_P(SOC15_REG_OFFSET(VCN, i, regUVD_VCPU_CNTL), + UVD_VCPU_CNTL__BLK_RST_MASK, + ~UVD_VCPU_CNTL__BLK_RST_MASK); + + /* disable VCPU clock */ + WREG32_P(SOC15_REG_OFFSET(VCN, i, regUVD_VCPU_CNTL), 0, + ~(UVD_VCPU_CNTL__CLK_EN_MASK)); + + /* apply soft reset */ + tmp = RREG32_SOC15(VCN, i, regUVD_SOFT_RESET); + tmp |= UVD_SOFT_RESET__LMI_UMC_SOFT_RESET_MASK; + WREG32_SOC15(VCN, i, regUVD_SOFT_RESET, tmp); + tmp = RREG32_SOC15(VCN, i, regUVD_SOFT_RESET); + tmp |= UVD_SOFT_RESET__LMI_SOFT_RESET_MASK; + WREG32_SOC15(VCN, i, regUVD_SOFT_RESET, tmp); + + /* clear status */ + WREG32_SOC15(VCN, i, regUVD_STATUS, 0); + + /* apply HW clock gating */ + vcn_v4_0_5_enable_clock_gating(adev, i); + + /* enable VCN power gating */ + vcn_v4_0_5_enable_static_power_gating(adev, i); + } + + if (adev->pm.dpm_enabled) + amdgpu_dpm_enable_uvd(adev, false); + + return 0; +} + +/** + * vcn_v4_0_5_pause_dpg_mode - VCN pause with dpg mode + * + * @adev: amdgpu_device pointer + * @inst_idx: instance number index + * @new_state: pause state + * + * Pause dpg mode for VCN block + */ +static int vcn_v4_0_5_pause_dpg_mode(struct amdgpu_device *adev, int inst_idx, + struct dpg_pause_state *new_state) +{ + uint32_t reg_data = 0; + int ret_code; + + /* pause/unpause if state is changed */ + if (adev->vcn.inst[inst_idx].pause_state.fw_based != new_state->fw_based) { + DRM_DEV_DEBUG(adev->dev, "dpg pause state changed %d -> %d", + adev->vcn.inst[inst_idx].pause_state.fw_based, new_state->fw_based); + reg_data = RREG32_SOC15(VCN, inst_idx, regUVD_DPG_PAUSE) & + (~UVD_DPG_PAUSE__NJ_PAUSE_DPG_ACK_MASK); + + if (new_state->fw_based == VCN_DPG_STATE__PAUSE) { + ret_code = SOC15_WAIT_ON_RREG(VCN, inst_idx, regUVD_POWER_STATUS, 0x1, + UVD_POWER_STATUS__UVD_POWER_STATUS_MASK); + + if (!ret_code) { + /* pause DPG */ + reg_data |= UVD_DPG_PAUSE__NJ_PAUSE_DPG_REQ_MASK; + WREG32_SOC15(VCN, inst_idx, regUVD_DPG_PAUSE, reg_data); + + /* wait for ACK */ + SOC15_WAIT_ON_RREG(VCN, inst_idx, regUVD_DPG_PAUSE, + UVD_DPG_PAUSE__NJ_PAUSE_DPG_ACK_MASK, + UVD_DPG_PAUSE__NJ_PAUSE_DPG_ACK_MASK); + + SOC15_WAIT_ON_RREG(VCN, inst_idx, regUVD_POWER_STATUS, + UVD_PGFSM_CONFIG__UVDM_UVDU_PWR_ON, + UVD_POWER_STATUS__UVD_POWER_STATUS_MASK); + } + } else { + /* unpause dpg, no need to wait */ + reg_data &= ~UVD_DPG_PAUSE__NJ_PAUSE_DPG_REQ_MASK; + WREG32_SOC15(VCN, inst_idx, regUVD_DPG_PAUSE, reg_data); + } + adev->vcn.inst[inst_idx].pause_state.fw_based = new_state->fw_based; + } + + return 0; +} + +/** + * vcn_v4_0_5_unified_ring_get_rptr - get unified read pointer + * + * @ring: amdgpu_ring pointer + * + * Returns the current hardware unified read pointer + */ +static uint64_t vcn_v4_0_5_unified_ring_get_rptr(struct amdgpu_ring *ring) +{ + struct amdgpu_device *adev = ring->adev; + + if (ring != &adev->vcn.inst[ring->me].ring_enc[0]) + DRM_ERROR("wrong ring id is identified in %s", __func__); + + return RREG32_SOC15(VCN, ring->me, regUVD_RB_RPTR); +} + +/** + * vcn_v4_0_5_unified_ring_get_wptr - get unified write pointer + * + * @ring: amdgpu_ring pointer + * + * Returns the current hardware unified write pointer + */ +static uint64_t vcn_v4_0_5_unified_ring_get_wptr(struct amdgpu_ring *ring) +{ + struct amdgpu_device *adev = ring->adev; + + if (ring != &adev->vcn.inst[ring->me].ring_enc[0]) + DRM_ERROR("wrong ring id is identified in %s", __func__); + + if (ring->use_doorbell) + return *ring->wptr_cpu_addr; + else + return RREG32_SOC15(VCN, ring->me, regUVD_RB_WPTR); +} + +/** + * vcn_v4_0_5_unified_ring_set_wptr - set enc write pointer + * + * @ring: amdgpu_ring pointer + * + * Commits the enc write pointer to the hardware + */ +static void vcn_v4_0_5_unified_ring_set_wptr(struct amdgpu_ring *ring) +{ + struct amdgpu_device *adev = ring->adev; + + if (ring != &adev->vcn.inst[ring->me].ring_enc[0]) + DRM_ERROR("wrong ring id is identified in %s", __func__); + + if (ring->use_doorbell) { + *ring->wptr_cpu_addr = lower_32_bits(ring->wptr); + WDOORBELL32(ring->doorbell_index, lower_32_bits(ring->wptr)); + } else { + WREG32_SOC15(VCN, ring->me, regUVD_RB_WPTR, lower_32_bits(ring->wptr)); + } +} + +static int vcn_v4_0_5_limit_sched(struct amdgpu_cs_parser *p, + struct amdgpu_job *job) +{ + struct drm_gpu_scheduler **scheds; + + /* The create msg must be in the first IB submitted */ + if (atomic_read(&job->base.entity->fence_seq)) + return -EINVAL; + + /* if VCN0 is harvested, we can't support AV1 */ + if (p->adev->vcn.harvest_config & AMDGPU_VCN_HARVEST_VCN0) + return -EINVAL; + + scheds = p->adev->gpu_sched[AMDGPU_HW_IP_VCN_ENC] + [AMDGPU_RING_PRIO_0].sched; + drm_sched_entity_modify_sched(job->base.entity, scheds, 1); + return 0; +} + +static int vcn_v4_0_5_dec_msg(struct amdgpu_cs_parser *p, struct amdgpu_job *job, + uint64_t addr) +{ + struct ttm_operation_ctx ctx = { false, false }; + struct amdgpu_bo_va_mapping *map; + uint32_t *msg, num_buffers; + struct amdgpu_bo *bo; + uint64_t start, end; + unsigned int i; + void *ptr; + int r; + + addr &= AMDGPU_GMC_HOLE_MASK; + r = amdgpu_cs_find_mapping(p, addr, &bo, &map); + if (r) { + DRM_ERROR("Can't find BO for addr 0x%08llx\n", addr); + return r; + } + + start = map->start * AMDGPU_GPU_PAGE_SIZE; + end = (map->last + 1) * AMDGPU_GPU_PAGE_SIZE; + if (addr & 0x7) { + DRM_ERROR("VCN messages must be 8 byte aligned!\n"); + return -EINVAL; + } + + bo->flags |= AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED; + amdgpu_bo_placement_from_domain(bo, bo->allowed_domains); + r = ttm_bo_validate(&bo->tbo, &bo->placement, &ctx); + if (r) { + DRM_ERROR("Failed validating the VCN message BO (%d)!\n", r); + return r; + } + + r = amdgpu_bo_kmap(bo, &ptr); + if (r) { + DRM_ERROR("Failed mapping the VCN message (%d)!\n", r); + return r; + } + + msg = ptr + addr - start; + + /* Check length */ + if (msg[1] > end - addr) { + r = -EINVAL; + goto out; + } + + if (msg[3] != RDECODE_MSG_CREATE) + goto out; + + num_buffers = msg[2]; + for (i = 0, msg = &msg[6]; i < num_buffers; ++i, msg += 4) { + uint32_t offset, size, *create; + + if (msg[0] != RDECODE_MESSAGE_CREATE) + continue; + + offset = msg[1]; + size = msg[2]; + + if (offset + size > end) { + r = -EINVAL; + goto out; + } + + create = ptr + addr + offset - start; + + /* H264, HEVC and VP9 can run on any instance */ + if (create[0] == 0x7 || create[0] == 0x10 || create[0] == 0x11) + continue; + + r = vcn_v4_0_5_limit_sched(p, job); + if (r) + goto out; + } + +out: + amdgpu_bo_kunmap(bo); + return r; +} + +#define RADEON_VCN_ENGINE_TYPE_ENCODE (0x00000002) +#define RADEON_VCN_ENGINE_TYPE_DECODE (0x00000003) + +#define RADEON_VCN_ENGINE_INFO (0x30000001) +#define RADEON_VCN_ENGINE_INFO_MAX_OFFSET 16 + +#define RENCODE_ENCODE_STANDARD_AV1 2 +#define RENCODE_IB_PARAM_SESSION_INIT 0x00000003 +#define RENCODE_IB_PARAM_SESSION_INIT_MAX_OFFSET 64 + +/* return the offset in ib if id is found, -1 otherwise + * to speed up the searching we only search upto max_offset + */ +static int vcn_v4_0_5_enc_find_ib_param(struct amdgpu_ib *ib, uint32_t id, int max_offset) +{ + int i; + + for (i = 0; i < ib->length_dw && i < max_offset && ib->ptr[i] >= 8; i += ib->ptr[i]/4) { + if (ib->ptr[i + 1] == id) + return i; + } + return -1; +} + +static int vcn_v4_0_5_ring_patch_cs_in_place(struct amdgpu_cs_parser *p, + struct amdgpu_job *job, + struct amdgpu_ib *ib) +{ + struct amdgpu_ring *ring = amdgpu_job_ring(job); + struct amdgpu_vcn_decode_buffer *decode_buffer; + uint64_t addr; + uint32_t val; + int idx; + + /* The first instance can decode anything */ + if (!ring->me) + return 0; + + /* RADEON_VCN_ENGINE_INFO is at the top of ib block */ + idx = vcn_v4_0_5_enc_find_ib_param(ib, RADEON_VCN_ENGINE_INFO, + RADEON_VCN_ENGINE_INFO_MAX_OFFSET); + if (idx < 0) /* engine info is missing */ + return 0; + + val = amdgpu_ib_get_value(ib, idx + 2); /* RADEON_VCN_ENGINE_TYPE */ + if (val == RADEON_VCN_ENGINE_TYPE_DECODE) { + decode_buffer = (struct amdgpu_vcn_decode_buffer *)&ib->ptr[idx + 6]; + + if (!(decode_buffer->valid_buf_flag & 0x1)) + return 0; + + addr = ((u64)decode_buffer->msg_buffer_address_hi) << 32 | + decode_buffer->msg_buffer_address_lo; + return vcn_v4_0_5_dec_msg(p, job, addr); + } else if (val == RADEON_VCN_ENGINE_TYPE_ENCODE) { + idx = vcn_v4_0_5_enc_find_ib_param(ib, RENCODE_IB_PARAM_SESSION_INIT, + RENCODE_IB_PARAM_SESSION_INIT_MAX_OFFSET); + if (idx >= 0 && ib->ptr[idx + 2] == RENCODE_ENCODE_STANDARD_AV1) + return vcn_v4_0_5_limit_sched(p, job); + } + return 0; +} + +static const struct amdgpu_ring_funcs vcn_v4_0_5_unified_ring_vm_funcs = { + .type = AMDGPU_RING_TYPE_VCN_ENC, + .align_mask = 0x3f, + .nop = VCN_ENC_CMD_NO_OP, + .get_rptr = vcn_v4_0_5_unified_ring_get_rptr, + .get_wptr = vcn_v4_0_5_unified_ring_get_wptr, + .set_wptr = vcn_v4_0_5_unified_ring_set_wptr, + .patch_cs_in_place = vcn_v4_0_5_ring_patch_cs_in_place, + .emit_frame_size = + SOC15_FLUSH_GPU_TLB_NUM_WREG * 3 + + SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 4 + + 4 + /* vcn_v2_0_enc_ring_emit_vm_flush */ + 5 + 5 + /* vcn_v2_0_enc_ring_emit_fence x2 vm fence */ + 1, /* vcn_v2_0_enc_ring_insert_end */ + .emit_ib_size = 5, /* vcn_v2_0_enc_ring_emit_ib */ + .emit_ib = vcn_v2_0_enc_ring_emit_ib, + .emit_fence = vcn_v2_0_enc_ring_emit_fence, + .emit_vm_flush = vcn_v2_0_enc_ring_emit_vm_flush, + .test_ring = amdgpu_vcn_enc_ring_test_ring, + .test_ib = amdgpu_vcn_unified_ring_test_ib, + .insert_nop = amdgpu_ring_insert_nop, + .insert_end = vcn_v2_0_enc_ring_insert_end, + .pad_ib = amdgpu_ring_generic_pad_ib, + .begin_use = amdgpu_vcn_ring_begin_use, + .end_use = amdgpu_vcn_ring_end_use, + .emit_wreg = vcn_v2_0_enc_ring_emit_wreg, + .emit_reg_wait = vcn_v2_0_enc_ring_emit_reg_wait, + .emit_reg_write_reg_wait = amdgpu_ring_emit_reg_write_reg_wait_helper, +}; + +/** + * vcn_v4_0_5_set_unified_ring_funcs - set unified ring functions + * + * @adev: amdgpu_device pointer + * + * Set unified ring functions + */ +static void vcn_v4_0_5_set_unified_ring_funcs(struct amdgpu_device *adev) +{ + int i; + + for (i = 0; i < adev->vcn.num_vcn_inst; ++i) { + if (adev->vcn.harvest_config & (1 << i)) + continue; + + adev->vcn.inst[i].ring_enc[0].funcs = &vcn_v4_0_5_unified_ring_vm_funcs; + adev->vcn.inst[i].ring_enc[0].me = i; + + DRM_INFO("VCN(%d) encode/decode are enabled in VM mode\n", i); + } +} + +/** + * vcn_v4_0_5_is_idle - check VCN block is idle + * + * @handle: amdgpu_device pointer + * + * Check whether VCN block is idle + */ +static bool vcn_v4_0_5_is_idle(void *handle) +{ + struct amdgpu_device *adev = (struct amdgpu_device *)handle; + int i, ret = 1; + + for (i = 0; i < adev->vcn.num_vcn_inst; ++i) { + if (adev->vcn.harvest_config & (1 << i)) + continue; + + ret &= (RREG32_SOC15(VCN, i, regUVD_STATUS) == UVD_STATUS__IDLE); + } + + return ret; +} + +/** + * vcn_v4_0_5_wait_for_idle - wait for VCN block idle + * + * @handle: amdgpu_device pointer + * + * Wait for VCN block idle + */ +static int vcn_v4_0_5_wait_for_idle(void *handle) +{ + struct amdgpu_device *adev = (struct amdgpu_device *)handle; + int i, ret = 0; + + for (i = 0; i < adev->vcn.num_vcn_inst; ++i) { + if (adev->vcn.harvest_config & (1 << i)) + continue; + + ret = SOC15_WAIT_ON_RREG(VCN, i, regUVD_STATUS, UVD_STATUS__IDLE, + UVD_STATUS__IDLE); + if (ret) + return ret; + } + + return ret; +} + +/** + * vcn_v4_0_5_set_clockgating_state - set VCN block clockgating state + * + * @handle: amdgpu_device pointer + * @state: clock gating state + * + * Set VCN block clockgating state + */ +static int vcn_v4_0_5_set_clockgating_state(void *handle, enum amd_clockgating_state state) +{ + struct amdgpu_device *adev = (struct amdgpu_device *)handle; + bool enable = (state == AMD_CG_STATE_GATE) ? true : false; + int i; + + for (i = 0; i < adev->vcn.num_vcn_inst; ++i) { + if (adev->vcn.harvest_config & (1 << i)) + continue; + + if (enable) { + if (RREG32_SOC15(VCN, i, regUVD_STATUS) != UVD_STATUS__IDLE) + return -EBUSY; + vcn_v4_0_5_enable_clock_gating(adev, i); + } else { + vcn_v4_0_5_disable_clock_gating(adev, i); + } + } + + return 0; +} + +/** + * vcn_v4_0_5_set_powergating_state - set VCN block powergating state + * + * @handle: amdgpu_device pointer + * @state: power gating state + * + * Set VCN block powergating state + */ +static int vcn_v4_0_5_set_powergating_state(void *handle, enum amd_powergating_state state) +{ + struct amdgpu_device *adev = (struct amdgpu_device *)handle; + int ret; + + if (state == adev->vcn.cur_state) + return 0; + + if (state == AMD_PG_STATE_GATE) + ret = vcn_v4_0_5_stop(adev); + else + ret = vcn_v4_0_5_start(adev); + + if (!ret) + adev->vcn.cur_state = state; + + return ret; +} + +/** + * vcn_v4_0_5_set_interrupt_state - set VCN block interrupt state + * + * @adev: amdgpu_device pointer + * @source: interrupt sources + * @type: interrupt types + * @state: interrupt states + * + * Set VCN block interrupt state + */ +static int vcn_v4_0_5_set_interrupt_state(struct amdgpu_device *adev, struct amdgpu_irq_src *source, + unsigned type, enum amdgpu_interrupt_state state) +{ + return 0; +} + +/** + * vcn_v4_0_5_process_interrupt - process VCN block interrupt + * + * @adev: amdgpu_device pointer + * @source: interrupt sources + * @entry: interrupt entry from clients and sources + * + * Process VCN block interrupt + */ +static int vcn_v4_0_5_process_interrupt(struct amdgpu_device *adev, struct amdgpu_irq_src *source, + struct amdgpu_iv_entry *entry) +{ + uint32_t ip_instance; + + switch (entry->client_id) { + case SOC15_IH_CLIENTID_VCN: + ip_instance = 0; + break; + default: + DRM_ERROR("Unhandled client id: %d\n", entry->client_id); + return 0; + } + + DRM_DEBUG("IH: VCN TRAP\n"); + + switch (entry->src_id) { + case VCN_4_0__SRCID__UVD_ENC_GENERAL_PURPOSE: + amdgpu_fence_process(&adev->vcn.inst[ip_instance].ring_enc[0]); + break; + case VCN_4_0__SRCID_UVD_POISON: + amdgpu_vcn_process_poison_irq(adev, source, entry); + break; + default: + DRM_ERROR("Unhandled interrupt: %d %d\n", + entry->src_id, entry->src_data[0]); + break; + } + + return 0; +} + +static const struct amdgpu_irq_src_funcs vcn_v4_0_5_irq_funcs = { + .set = vcn_v4_0_5_set_interrupt_state, + .process = vcn_v4_0_5_process_interrupt, +}; + +/** + * vcn_v4_0_5_set_irq_funcs - set VCN block interrupt irq functions + * + * @adev: amdgpu_device pointer + * + * Set VCN block interrupt irq functions + */ +static void vcn_v4_0_5_set_irq_funcs(struct amdgpu_device *adev) +{ + int i; + + for (i = 0; i < adev->vcn.num_vcn_inst; ++i) { + if (adev->vcn.harvest_config & (1 << i)) + continue; + + adev->vcn.inst[i].irq.num_types = adev->vcn.num_enc_rings + 1; + adev->vcn.inst[i].irq.funcs = &vcn_v4_0_5_irq_funcs; + } +} + +static const struct amd_ip_funcs vcn_v4_0_5_ip_funcs = { + .name = "vcn_v4_0_5", + .early_init = vcn_v4_0_5_early_init, + .late_init = NULL, + .sw_init = vcn_v4_0_5_sw_init, + .sw_fini = vcn_v4_0_5_sw_fini, + .hw_init = vcn_v4_0_5_hw_init, + .hw_fini = vcn_v4_0_5_hw_fini, + .suspend = vcn_v4_0_5_suspend, + .resume = vcn_v4_0_5_resume, + .is_idle = vcn_v4_0_5_is_idle, + .wait_for_idle = vcn_v4_0_5_wait_for_idle, + .check_soft_reset = NULL, + .pre_soft_reset = NULL, + .soft_reset = NULL, + .post_soft_reset = NULL, + .set_clockgating_state = vcn_v4_0_5_set_clockgating_state, + .set_powergating_state = vcn_v4_0_5_set_powergating_state, +}; + +const struct amdgpu_ip_block_version vcn_v4_0_5_ip_block = { + .type = AMD_IP_BLOCK_TYPE_VCN, + .major = 4, + .minor = 0, + .rev = 5, + .funcs = &vcn_v4_0_5_ip_funcs, +}; diff --git a/drivers/gpu/drm/amd/amdgpu/vcn_v4_0_5.h b/drivers/gpu/drm/amd/amdgpu/vcn_v4_0_5.h new file mode 100644 index 000000000000..ff9b3d6f6a47 --- /dev/null +++ b/drivers/gpu/drm/amd/amdgpu/vcn_v4_0_5.h @@ -0,0 +1,35 @@ +/* + * Copyright 2023 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + */ + +#ifndef __VCN_V4_0_5_H__ +#define __VCN_V4_0_5_H__ + +enum amdgpu_vcn_v4_0_5_sub_block { + AMDGPU_VCN_V4_0_5_VCPU_VCODEC = 0, + + AMDGPU_VCN_V4_0_5_MAX_SUB_BLOCK, +}; + +extern const struct amdgpu_ip_block_version vcn_v4_0_5_ip_block; + +#endif /* __VCN_V4_0_5_H__ */ diff --git a/drivers/gpu/drm/amd/amdgpu/vega10_ih.c b/drivers/gpu/drm/amd/amdgpu/vega10_ih.c index 1e83db0c5438..d364c6dd152c 100644 --- a/drivers/gpu/drm/amd/amdgpu/vega10_ih.c +++ b/drivers/gpu/drm/amd/amdgpu/vega10_ih.c @@ -485,7 +485,7 @@ static int vega10_ih_sw_init(void *handle) if (r) return r; - r = amdgpu_ih_ring_init(adev, &adev->irq.ih, 256 * 1024, true); + r = amdgpu_ih_ring_init(adev, &adev->irq.ih, IH_RING_SIZE, true); if (r) return r; @@ -510,7 +510,7 @@ static int vega10_ih_sw_init(void *handle) /* initialize ih control registers offset */ vega10_ih_init_register_offset(adev); - r = amdgpu_ih_ring_init(adev, &adev->irq.ih_soft, PAGE_SIZE, true); + r = amdgpu_ih_ring_init(adev, &adev->irq.ih_soft, IH_SW_RING_SIZE, true); if (r) return r; diff --git a/drivers/gpu/drm/amd/amdgpu/vega20_ih.c b/drivers/gpu/drm/amd/amdgpu/vega20_ih.c index 4d719df376a7..ddfc6941f9d5 100644 --- a/drivers/gpu/drm/amd/amdgpu/vega20_ih.c +++ b/drivers/gpu/drm/amd/amdgpu/vega20_ih.c @@ -291,7 +291,7 @@ static int vega20_ih_irq_init(struct amdgpu_device *adev) adev->nbio.funcs->ih_control(adev); - if ((adev->ip_versions[OSSSYS_HWIP][0] == IP_VERSION(4, 2, 1)) && + if ((amdgpu_ip_version(adev, OSSSYS_HWIP, 0) == IP_VERSION(4, 2, 1)) && adev->firmware.load_type == AMDGPU_FW_LOAD_DIRECT) { ih_chicken = RREG32_SOC15(OSSSYS, 0, mmIH_CHICKEN); if (adev->irq.ih.use_bus_addr) { @@ -304,8 +304,8 @@ static int vega20_ih_irq_init(struct amdgpu_device *adev) /* psp firmware won't program IH_CHICKEN for aldebaran * driver needs to program it properly according to * MC_SPACE type in IH_RB_CNTL */ - if ((adev->ip_versions[OSSSYS_HWIP][0] == IP_VERSION(4, 4, 0)) || - (adev->ip_versions[OSSSYS_HWIP][0] == IP_VERSION(4, 4, 2))) { + if ((amdgpu_ip_version(adev, OSSSYS_HWIP, 0) == IP_VERSION(4, 4, 0)) || + (amdgpu_ip_version(adev, OSSSYS_HWIP, 0) == IP_VERSION(4, 4, 2))) { ih_chicken = RREG32_SOC15(OSSSYS, 0, mmIH_CHICKEN_ALDEBARAN); if (adev->irq.ih.use_bus_addr) { ih_chicken = REG_SET_FIELD(ih_chicken, IH_CHICKEN, @@ -334,8 +334,8 @@ static int vega20_ih_irq_init(struct amdgpu_device *adev) vega20_setup_retry_doorbell(adev->irq.retry_cam_doorbell_index)); /* Enable IH Retry CAM */ - if (adev->ip_versions[OSSSYS_HWIP][0] == IP_VERSION(4, 4, 0) || - adev->ip_versions[OSSSYS_HWIP][0] == IP_VERSION(4, 4, 2)) + if (amdgpu_ip_version(adev, OSSSYS_HWIP, 0) == IP_VERSION(4, 4, 0) || + amdgpu_ip_version(adev, OSSSYS_HWIP, 0) == IP_VERSION(4, 4, 2)) WREG32_FIELD15(OSSSYS, 0, IH_RETRY_INT_CAM_CNTL_ALDEBARAN, ENABLE, 1); else @@ -500,7 +500,8 @@ static int vega20_ih_self_irq(struct amdgpu_device *adev, case 2: schedule_work(&adev->irq.ih2_work); break; - default: break; + default: + break; } return 0; } @@ -536,10 +537,10 @@ static int vega20_ih_sw_init(void *handle) return r; if ((adev->flags & AMD_IS_APU) && - (adev->ip_versions[OSSSYS_HWIP][0] == IP_VERSION(4, 4, 2))) + (amdgpu_ip_version(adev, OSSSYS_HWIP, 0) == IP_VERSION(4, 4, 2))) use_bus_addr = false; - r = amdgpu_ih_ring_init(adev, &adev->irq.ih, 256 * 1024, use_bus_addr); + r = amdgpu_ih_ring_init(adev, &adev->irq.ih, IH_RING_SIZE, use_bus_addr); if (r) return r; @@ -553,7 +554,7 @@ static int vega20_ih_sw_init(void *handle) adev->irq.ih1.use_doorbell = true; adev->irq.ih1.doorbell_index = (adev->doorbell_index.ih + 1) << 1; - if (adev->ip_versions[OSSSYS_HWIP][0] != IP_VERSION(4, 4, 2)) { + if (amdgpu_ip_version(adev, OSSSYS_HWIP, 0) != IP_VERSION(4, 4, 2)) { r = amdgpu_ih_ring_init(adev, &adev->irq.ih2, PAGE_SIZE, true); if (r) return r; @@ -565,7 +566,7 @@ static int vega20_ih_sw_init(void *handle) /* initialize ih control registers offset */ vega20_ih_init_register_offset(adev); - r = amdgpu_ih_ring_init(adev, &adev->irq.ih_soft, PAGE_SIZE, use_bus_addr); + r = amdgpu_ih_ring_init(adev, &adev->irq.ih_soft, IH_SW_RING_SIZE, use_bus_addr); if (r) return r; @@ -710,8 +711,7 @@ static void vega20_ih_set_interrupt_funcs(struct amdgpu_device *adev) adev->irq.ih_funcs = &vega20_ih_funcs; } -const struct amdgpu_ip_block_version vega20_ih_ip_block = -{ +const struct amdgpu_ip_block_version vega20_ih_ip_block = { .type = AMD_IP_BLOCK_TYPE_IH, .major = 4, .minor = 2, diff --git a/drivers/gpu/drm/amd/amdgpu/vi.c b/drivers/gpu/drm/amd/amdgpu/vi.c index 6a8494f98d3e..1a98812981f4 100644 --- a/drivers/gpu/drm/amd/amdgpu/vi.c +++ b/drivers/gpu/drm/amd/amdgpu/vi.c @@ -1124,11 +1124,10 @@ static void vi_program_aspm(struct amdgpu_device *adev) bool bL1SS = false; bool bClkReqSupport = true; - if (!amdgpu_device_should_use_aspm(adev) || !amdgpu_device_aspm_support_quirk()) + if (!amdgpu_device_should_use_aspm(adev)) return; - if (adev->flags & AMD_IS_APU || - adev->asic_type < CHIP_POLARIS10) + if (adev->asic_type < CHIP_POLARIS10) return; orig = data = RREG32_PCIE(ixPCIE_LC_CNTL); diff --git a/drivers/gpu/drm/amd/amdgpu/vpe_6_1_fw_if.h b/drivers/gpu/drm/amd/amdgpu/vpe_6_1_fw_if.h new file mode 100644 index 000000000000..9b550deb48d3 --- /dev/null +++ b/drivers/gpu/drm/amd/amdgpu/vpe_6_1_fw_if.h @@ -0,0 +1,217 @@ +/* Copyright 2023 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + * Authors: AMD + * + */ + +#ifndef __VPE_6_1_FW_IF_H_ +#define __VPE_6_1_FW_IF_H_ + +/**************** + * VPE OP Codes + ****************/ +enum VPE_CMD_OPCODE { + VPE_CMD_OPCODE_NOP = 0x0, + VPE_CMD_OPCODE_VPE_DESC = 0x1, + VPE_CMD_OPCODE_PLANE_CFG = 0x2, + VPE_CMD_OPCODE_VPEP_CFG = 0x3, + VPE_CMD_OPCODE_INDIRECT = 0x4, + VPE_CMD_OPCODE_FENCE = 0x5, + VPE_CMD_OPCODE_TRAP = 0x6, + VPE_CMD_OPCODE_REG_WRITE = 0x7, + VPE_CMD_OPCODE_POLL_REGMEM = 0x8, + VPE_CMD_OPCODE_COND_EXE = 0x9, + VPE_CMD_OPCODE_ATOMIC = 0xA, + VPE_CMD_OPCODE_PLANE_FILL = 0xB, + VPE_CMD_OPCODE_TIMESTAMP = 0xD +}; + +/** Generic Command Header + * Generic Commands include: + * Noop, Fence, Trap, + * RegisterWrite, PollRegisterWriteMemory, + * SetLocalTimestamp, GetLocalTimestamp + * GetGlobalGPUTimestamp */ +#define VPE_HEADER_SUB_OPCODE__SHIFT 8 +#define VPE_HEADER_SUB_OPCODE_MASK 0x0000FF00 +#define VPE_HEADER_OPCODE__SHIFT 0 +#define VPE_HEADER_OPCODE_MASK 0x000000FF + +#define VPE_CMD_HEADER(op, subop) \ + (((subop << VPE_HEADER_SUB_OPCODE__SHIFT) & VPE_HEADER_SUB_OPCODE_MASK) | \ + ((op << VPE_HEADER_OPCODE__SHIFT) & VPE_HEADER_OPCODE_MASK)) + + + /*************************** + * VPE NOP + ***************************/ +#define VPE_CMD_NOP_HEADER_COUNT__SHIFT 16 +#define VPE_CMD_NOP_HEADER_COUNT_MASK 0x00003FFF + +#define VPE_CMD_NOP_HEADER_COUNT(count) \ + (((count) & VPE_CMD_NOP_HEADER_COUNT_MASK) << VPE_CMD_NOP_HEADER_COUNT__SHIFT) + + /*************************** + * VPE Descriptor + ***************************/ +#define VPE_DESC_CD__SHIFT 16 +#define VPE_DESC_CD_MASK 0x000F0000 + +#define VPE_DESC_CMD_HEADER(cd) \ + (VPE_CMD_HEADER(VPE_CMD_OPCODE_VPE_DESC, 0) | \ + (((cd) << VPE_DESC_CD__SHIFT) & VPE_DESC_CD_MASK)) + + /*************************** + * VPE Plane Config + ***************************/ +enum VPE_PLANE_CFG_SUBOP { + VPE_PLANE_CFG_SUBOP_1_TO_1 = 0x0, + VPE_PLANE_CFG_SUBOP_2_TO_1 = 0x1, + VPE_PLANE_CFG_SUBOP_2_TO_2 = 0x2 +}; + +#define VPE_PLANE_CFG_ONE_PLANE 0 +#define VPE_PLANE_CFG_TWO_PLANES 1 + +#define VPE_PLANE_CFG_NPS0__SHIFT 16 +#define VPE_PLANE_CFG_NPS0_MASK 0x00030000 + +#define VPE_PLANE_CFG_NPD0__SHIFT 18 +#define VPE_PLANE_CFG_NPD0_MASK 0x000C0000 + +#define VPE_PLANE_CFG_NPS1__SHIFT 20 +#define VPE_PLANE_CFG_NPS1_MASK 0x00300000 + +#define VPE_PLANE_CFG_NPD1__SHIFT 22 +#define VPE_PLANE_CFG_NPD1_MASK 0x00C00000 + +#define VPE_PLANE_CFG_TMZ__SHIFT 16 +#define VPE_PLANE_CFG_TMZ_MASK 0x00010000 + +#define VPE_PLANE_CFG_SWIZZLE_MODE__SHIFT 3 +#define VPE_PLANE_CFG_SWIZZLE_MODE_MASK 0x000000F8 + +#define VPE_PLANE_CFG_ROTATION__SHIFT 0 +#define VPE_PLANE_CFG_ROTATION_MASK 0x00000003 + +#define VPE_PLANE_ADDR_LO__SHIFT 0 +#define VPE_PLANE_ADDR_LO_MASK 0xFFFFFF00 + +#define VPE_PLANE_CFG_PITCH__SHIFT 0 +#define VPE_PLANE_CFG_PITCH_MASK 0x00003FFF + +#define VPE_PLANE_CFG_VIEWPORT_Y__SHIFT 16 +#define VPE_PLANE_CFG_VIEWPORT_Y_MASK 0x3FFF0000 +#define VPE_PLANE_CFG_VIEWPORT_X__SHIFT 0 +#define VPE_PLANE_CFG_VIEWPORT_X_MASK 0x00003FFF + + +#define VPE_PLANE_CFG_VIEWPORT_HEIGHT__SHIFT 16 +#define VPE_PLANE_CFG_VIEWPORT_HEIGHT_MASK 0x1FFF0000 +#define VPE_PLANE_CFG_VIEWPORT_ELEMENT_SIZE__SHIFT 13 +#define VPE_PLANE_CFG_VIEWPORT_ELEMENT_SIZE_MASK 0x0000E000 +#define VPE_PLANE_CFG_VIEWPORT_WIDTH__SHIFT 0 +#define VPE_PLANE_CFG_VIEWPORT_WIDTH_MASK 0x00001FFF + +enum VPE_PLANE_CFG_ELEMENT_SIZE { + VPE_PLANE_CFG_ELEMENT_SIZE_8BPE = 0, + VPE_PLANE_CFG_ELEMENT_SIZE_16BPE = 1, + VPE_PLANE_CFG_ELEMENT_SIZE_32BPE = 2, + VPE_PLANE_CFG_ELEMENT_SIZE_64BPE = 3 +}; + +#define VPE_PLANE_CFG_CMD_HEADER(subop, nps0, npd0, nps1, npd1) \ + (VPE_CMD_HEADER(VPE_CMD_OPCODE_PLANE_CFG, subop) | \ + (((nps0) << VPE_PLANE_CFG_NPS0__SHIFT) & VPE_PLANE_CFG_NPS0_MASK) | \ + (((npd0) << VPE_PLANE_CFG_NPD0__SHIFT) & VPE_PLANE_CFG_NPD0_MASK) | \ + (((nps1) << VPE_PLANE_CFG_NPS1__SHIFT) & VPE_PLANE_CFG_NPS1_MASK) | \ + (((npd0) << VPE_PLANE_CFG_NPD1__SHIFT) & VPE_PLANE_CFG_NPD1_MASK)) + + +/************************ + * VPEP Config + ************************/ +enum VPE_VPEP_CFG_SUBOP { + VPE_VPEP_CFG_SUBOP_DIR_CFG = 0x0, + VPE_VPEP_CFG_SUBOP_IND_CFG = 0x1 +}; + + +// Direct Config Command Header +#define VPE_DIR_CFG_HEADER_ARRAY_SIZE__SHIFT 16 +#define VPE_DIR_CFG_HEADER_ARRAY_SIZE_MASK 0xFFFF0000 + +#define VPE_DIR_CFG_CMD_HEADER(subop, arr_sz) \ + (VPE_CMD_HEADER(VPE_CMD_OPCODE_VPEP_CFG, subop) | \ + (((arr_sz) << VPE_DIR_CFG_HEADER_ARRAY_SIZE__SHIFT) & VPE_DIR_CFG_HEADER_ARRAY_SIZE_MASK)) + + +#define VPE_DIR_CFG_PKT_REGISTER_OFFSET__SHIFT 2 +#define VPE_DIR_CFG_PKT_REGISTER_OFFSET_MASK 0x000FFFFC + +#define VPE_DIR_CFG_PKT_DATA_SIZE__SHIFT 20 +#define VPE_DIR_CFG_PKT_DATA_SIZE_MASK 0xFFF00000 + + +// InDirect Config Command Header +#define VPE_IND_CFG_HEADER_NUM_DST__SHIFT 28 +#define VPE_IND_CFG_HEADER_NUM_DST_MASK 0xF0000000 + +#define VPE_IND_CFG_CMD_HEADER(subop, num_dst) \ + (VPE_CMD_HEADER(VPE_CMD_OPCODE_VPEP_CFG, subop) | \ + (((num_dst) << VPE_IND_CFG_HEADER_NUM_DST__SHIFT) & VPE_IND_CFG_HEADER_NUM_DST_MASK)) + +// Indirect Buffer Command Header +#define VPE_CMD_INDIRECT_HEADER_VMID__SHIFT 16 +#define VPE_CMD_INDIRECT_HEADER_VMID_MASK 0x0000000F +#define VPE_CMD_INDIRECT_HEADER_VMID(vmid) \ + (((vmid) & VPE_CMD_INDIRECT_HEADER_VMID_MASK) << VPE_CMD_INDIRECT_HEADER_VMID__SHIFT) + + +/************************** + * Poll Reg/Mem Sub-OpCode + **************************/ +enum VPE_POLL_REGMEM_SUBOP { + VPE_POLL_REGMEM_SUBOP_REGMEM = 0x0, + VPE_POLL_REGMEM_SUBOP_REGMEM_WRITE = 0x1 +}; + +#define VPE_CMD_POLL_REGMEM_HEADER_FUNC__SHIFT 28 +#define VPE_CMD_POLL_REGMEM_HEADER_FUNC_MASK 0x00000007 +#define VPE_CMD_POLL_REGMEM_HEADER_FUNC(func) \ + (((func) & VPE_CMD_POLL_REGMEM_HEADER_FUNC_MASK) << VPE_CMD_POLL_REGMEM_HEADER_FUNC__SHIFT) + +#define VPE_CMD_POLL_REGMEM_HEADER_MEM__SHIFT 31 +#define VPE_CMD_POLL_REGMEM_HEADER_MEM_MASK 0x00000001 +#define VPE_CMD_POLL_REGMEM_HEADER_MEM(mem) \ + (((mem) & VPE_CMD_POLL_REGMEM_HEADER_MEM_MASK) << VPE_CMD_POLL_REGMEM_HEADER_MEM__SHIFT) + +#define VPE_CMD_POLL_REGMEM_DW5_INTERVAL__SHIFT 0 +#define VPE_CMD_POLL_REGMEM_DW5_INTERVAL_MASK 0x0000FFFF +#define VPE_CMD_POLL_REGMEM_DW5_INTERVAL(interval) \ + (((interval) & VPE_CMD_POLL_REGMEM_DW5_INTERVAL_MASK) << VPE_CMD_POLL_REGMEM_DW5_INTERVAL__SHIFT) + +#define VPE_CMD_POLL_REGMEM_DW5_RETRY_COUNT__SHIFT 16 +#define VPE_CMD_POLL_REGMEM_DW5_RETRY_COUNT_MASK 0x00000FFF +#define VPE_CMD_POLL_REGMEM_DW5_RETRY_COUNT(count) \ + (((count) & VPE_CMD_POLL_REGMEM_DW5_RETRY_COUNT_MASK) << VPE_CMD_POLL_REGMEM_DW5_RETRY_COUNT__SHIFT) + +#endif diff --git a/drivers/gpu/drm/amd/amdgpu/vpe_v6_1.c b/drivers/gpu/drm/amd/amdgpu/vpe_v6_1.c new file mode 100644 index 000000000000..174f13eff575 --- /dev/null +++ b/drivers/gpu/drm/amd/amdgpu/vpe_v6_1.c @@ -0,0 +1,291 @@ +/* + * Copyright 2022 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + */ + +#include <linux/firmware.h> +#include <drm/drm_drv.h> + +#include "amdgpu.h" +#include "amdgpu_ucode.h" +#include "amdgpu_vpe.h" +#include "vpe_v6_1.h" +#include "soc15_common.h" +#include "ivsrcid/vpe/irqsrcs_vpe_6_1.h" +#include "vpe/vpe_6_1_0_offset.h" +#include "vpe/vpe_6_1_0_sh_mask.h" + +MODULE_FIRMWARE("amdgpu/vpe_6_1_0.bin"); + +#define VPE_THREAD1_UCODE_OFFSET 0x8000 + +static uint32_t vpe_v6_1_get_reg_offset(struct amdgpu_vpe *vpe, uint32_t inst, uint32_t offset) +{ + uint32_t base; + + base = vpe->ring.adev->reg_offset[VPE_HWIP][0][0]; + + return base + offset; +} + +static void vpe_v6_1_halt(struct amdgpu_vpe *vpe, bool halt) +{ + struct amdgpu_device *adev = vpe->ring.adev; + uint32_t f32_cntl; + + f32_cntl = RREG32(vpe_get_reg_offset(vpe, 0, regVPEC_F32_CNTL)); + f32_cntl = REG_SET_FIELD(f32_cntl, VPEC_F32_CNTL, HALT, halt ? 1 : 0); + f32_cntl = REG_SET_FIELD(f32_cntl, VPEC_F32_CNTL, TH1_RESET, halt ? 1 : 0); + WREG32(vpe_get_reg_offset(vpe, 0, regVPEC_F32_CNTL), f32_cntl); +} + +static int vpe_v6_1_irq_init(struct amdgpu_vpe *vpe) +{ + struct amdgpu_device *adev = container_of(vpe, struct amdgpu_device, vpe); + int ret; + + ret = amdgpu_irq_add_id(adev, SOC21_IH_CLIENTID_VPE, + VPE_6_1_SRCID__VPE_TRAP, + &adev->vpe.trap_irq); + if (ret) + return ret; + + return 0; +} + +static int vpe_v6_1_load_microcode(struct amdgpu_vpe *vpe) +{ + struct amdgpu_device *adev = vpe->ring.adev; + const struct vpe_firmware_header_v1_0 *vpe_hdr; + const __le32 *data; + uint32_t ucode_offset[2], ucode_size[2]; + uint32_t i, size_dw; + uint32_t ret; + + // disable UMSCH_INT_ENABLE + ret = RREG32(vpe_get_reg_offset(vpe, 0, regVPEC_CNTL)); + ret = REG_SET_FIELD(ret, VPEC_CNTL, UMSCH_INT_ENABLE, 0); + WREG32(vpe_get_reg_offset(vpe, 0, regVPEC_CNTL), ret); + + if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) { + uint32_t f32_offset, f32_cntl; + + f32_offset = vpe_get_reg_offset(vpe, 0, regVPEC_F32_CNTL); + f32_cntl = RREG32(f32_offset); + f32_cntl = REG_SET_FIELD(f32_cntl, VPEC_F32_CNTL, HALT, 0); + f32_cntl = REG_SET_FIELD(f32_cntl, VPEC_F32_CNTL, TH1_RESET, 0); + + adev->vpe.cmdbuf_cpu_addr[0] = f32_offset; + adev->vpe.cmdbuf_cpu_addr[1] = f32_cntl; + + amdgpu_vpe_psp_update_sram(adev); + return 0; + } + + vpe_hdr = (const struct vpe_firmware_header_v1_0 *)adev->vpe.fw->data; + + /* Thread 0(command thread) ucode offset/size */ + ucode_offset[0] = le32_to_cpu(vpe_hdr->header.ucode_array_offset_bytes); + ucode_size[0] = le32_to_cpu(vpe_hdr->ctx_ucode_size_bytes); + /* Thread 1(control thread) ucode offset/size */ + ucode_offset[1] = le32_to_cpu(vpe_hdr->ctl_ucode_offset); + ucode_size[1] = le32_to_cpu(vpe_hdr->ctl_ucode_size_bytes); + + vpe_v6_1_halt(vpe, true); + + for (i = 0; i < 2; i++) { + if (i > 0) + WREG32(vpe_get_reg_offset(vpe, 0, regVPEC_UCODE_ADDR), VPE_THREAD1_UCODE_OFFSET); + else + WREG32(vpe_get_reg_offset(vpe, 0, regVPEC_UCODE_ADDR), 0); + + data = (const __le32 *)(adev->vpe.fw->data + ucode_offset[i]); + size_dw = ucode_size[i] / sizeof(__le32); + + while (size_dw--) { + if (amdgpu_emu_mode && size_dw % 500 == 0) + msleep(1); + WREG32(vpe_get_reg_offset(vpe, 0, regVPEC_UCODE_DATA), le32_to_cpup(data++)); + } + + } + + vpe_v6_1_halt(vpe, false); + + return 0; +} + +static int vpe_v6_1_ring_start(struct amdgpu_vpe *vpe) +{ + struct amdgpu_ring *ring = &vpe->ring; + struct amdgpu_device *adev = ring->adev; + uint32_t rb_bufsz, rb_cntl; + uint32_t ib_cntl; + uint32_t doorbell, doorbell_offset; + int ret; + + rb_bufsz = order_base_2(ring->ring_size / 4); + rb_cntl = RREG32(vpe_get_reg_offset(vpe, 0, regVPEC_QUEUE0_RB_CNTL)); + rb_cntl = REG_SET_FIELD(rb_cntl, VPEC_QUEUE0_RB_CNTL, RB_SIZE, rb_bufsz); + rb_cntl = REG_SET_FIELD(rb_cntl, VPEC_QUEUE0_RB_CNTL, RB_PRIV, 1); + rb_cntl = REG_SET_FIELD(rb_cntl, VPEC_QUEUE0_RB_CNTL, RB_VMID, 0); + WREG32(vpe_get_reg_offset(vpe, 0, regVPEC_QUEUE0_RB_CNTL), rb_cntl); + + WREG32(vpe_get_reg_offset(vpe, 0, regVPEC_QUEUE0_RB_RPTR), 0); + WREG32(vpe_get_reg_offset(vpe, 0, regVPEC_QUEUE0_RB_RPTR_HI), 0); + WREG32(vpe_get_reg_offset(vpe, 0, regVPEC_QUEUE0_RB_WPTR), 0); + WREG32(vpe_get_reg_offset(vpe, 0, regVPEC_QUEUE0_RB_WPTR_HI), 0); + + WREG32(vpe_get_reg_offset(vpe, 0, regVPEC_QUEUE0_RB_RPTR_ADDR_LO), + lower_32_bits(ring->rptr_gpu_addr) & 0xFFFFFFFC); + WREG32(vpe_get_reg_offset(vpe, 0, regVPEC_QUEUE0_RB_RPTR_ADDR_HI), + upper_32_bits(ring->rptr_gpu_addr) & 0xFFFFFFFF); + + WREG32(vpe_get_reg_offset(vpe, 0, regVPEC_QUEUE0_RB_BASE), ring->gpu_addr >> 8); + WREG32(vpe_get_reg_offset(vpe, 0, regVPEC_QUEUE0_RB_BASE_HI), ring->gpu_addr >> 40); + + ring->wptr = 0; + + /* before programing wptr to a less value, need set minor_ptr_update first */ + WREG32(vpe_get_reg_offset(vpe, 0, regVPEC_QUEUE0_MINOR_PTR_UPDATE), 1); + + WREG32(vpe_get_reg_offset(vpe, 0, regVPEC_QUEUE0_RB_WPTR), lower_32_bits(ring->wptr) << 2); + WREG32(vpe_get_reg_offset(vpe, 0, regVPEC_QUEUE0_RB_WPTR_HI), upper_32_bits(ring->wptr) << 2); + + /* set minor_ptr_update to 0 after wptr programed */ + WREG32(vpe_get_reg_offset(vpe, 0, regVPEC_QUEUE0_MINOR_PTR_UPDATE), 0); + + doorbell = RREG32(vpe_get_reg_offset(vpe, 0, regVPEC_QUEUE0_DOORBELL)); + doorbell_offset = RREG32(vpe_get_reg_offset(vpe, 0, regVPEC_QUEUE0_DOORBELL_OFFSET)); + + doorbell = REG_SET_FIELD(doorbell, VPEC_QUEUE0_DOORBELL, ENABLE, ring->use_doorbell ? 1 : 0); + doorbell_offset = REG_SET_FIELD(doorbell_offset, VPEC_QUEUE0_DOORBELL_OFFSET, OFFSET, ring->doorbell_index); + + WREG32(vpe_get_reg_offset(vpe, 0, regVPEC_QUEUE0_DOORBELL), doorbell); + WREG32(vpe_get_reg_offset(vpe, 0, regVPEC_QUEUE0_DOORBELL_OFFSET), doorbell_offset); + + adev->nbio.funcs->vpe_doorbell_range(adev, 0, ring->use_doorbell, ring->doorbell_index, 2); + + rb_cntl = REG_SET_FIELD(rb_cntl, VPEC_QUEUE0_RB_CNTL, RPTR_WRITEBACK_ENABLE, 1); + rb_cntl = REG_SET_FIELD(rb_cntl, VPEC_QUEUE0_RB_CNTL, RB_ENABLE, 1); + WREG32(vpe_get_reg_offset(vpe, 0, regVPEC_QUEUE0_RB_CNTL), rb_cntl); + + ib_cntl = RREG32(vpe_get_reg_offset(vpe, 0, regVPEC_QUEUE0_IB_CNTL)); + ib_cntl = REG_SET_FIELD(ib_cntl, VPEC_QUEUE0_IB_CNTL, IB_ENABLE, 1); + WREG32(vpe_get_reg_offset(vpe, 0, regVPEC_QUEUE0_IB_CNTL), ib_cntl); + + ring->sched.ready = true; + + ret = amdgpu_ring_test_helper(ring); + if (ret) { + ring->sched.ready = false; + return ret; + } + + return 0; +} + +static int vpe_v_6_1_ring_stop(struct amdgpu_vpe *vpe) +{ + struct amdgpu_device *adev = vpe->ring.adev; + uint32_t queue_reset; + int ret; + + queue_reset = RREG32(vpe_get_reg_offset(vpe, 0, regVPEC_QUEUE_RESET_REQ)); + queue_reset = REG_SET_FIELD(queue_reset, VPEC_QUEUE_RESET_REQ, QUEUE0_RESET, 1); + WREG32(vpe_get_reg_offset(vpe, 0, regVPEC_QUEUE_RESET_REQ), queue_reset); + + ret = SOC15_WAIT_ON_RREG(VPE, 0, regVPEC_QUEUE_RESET_REQ, 0, + VPEC_QUEUE_RESET_REQ__QUEUE0_RESET_MASK); + if (ret) + dev_err(adev->dev, "VPE queue reset failed\n"); + + vpe->ring.sched.ready = false; + + return ret; +} + +static int vpe_v6_1_set_trap_irq_state(struct amdgpu_device *adev, + struct amdgpu_irq_src *source, + unsigned int type, + enum amdgpu_interrupt_state state) +{ + struct amdgpu_vpe *vpe = &adev->vpe; + uint32_t vpe_cntl; + + vpe_cntl = RREG32(vpe_get_reg_offset(vpe, 0, regVPEC_CNTL)); + vpe_cntl = REG_SET_FIELD(vpe_cntl, VPEC_CNTL, TRAP_ENABLE, + state == AMDGPU_IRQ_STATE_ENABLE ? 1 : 0); + WREG32(vpe_get_reg_offset(vpe, 0, regVPEC_CNTL), vpe_cntl); + + return 0; +} + +static int vpe_v6_1_process_trap_irq(struct amdgpu_device *adev, + struct amdgpu_irq_src *source, + struct amdgpu_iv_entry *entry) +{ + + dev_dbg(adev->dev, "IH: VPE trap\n"); + + switch (entry->client_id) { + case SOC21_IH_CLIENTID_VPE: + amdgpu_fence_process(&adev->vpe.ring); + break; + default: + break; + } + + return 0; +} + +static int vpe_v6_1_set_regs(struct amdgpu_vpe *vpe) +{ + vpe->regs.queue0_rb_rptr_lo = regVPEC_QUEUE0_RB_RPTR; + vpe->regs.queue0_rb_rptr_hi = regVPEC_QUEUE0_RB_RPTR_HI; + vpe->regs.queue0_rb_wptr_lo = regVPEC_QUEUE0_RB_WPTR; + vpe->regs.queue0_rb_wptr_hi = regVPEC_QUEUE0_RB_WPTR_HI; + vpe->regs.queue0_preempt = regVPEC_QUEUE0_PREEMPT; + + return 0; +} + +static const struct vpe_funcs vpe_v6_1_funcs = { + .get_reg_offset = vpe_v6_1_get_reg_offset, + .set_regs = vpe_v6_1_set_regs, + .irq_init = vpe_v6_1_irq_init, + .init_microcode = amdgpu_vpe_init_microcode, + .load_microcode = vpe_v6_1_load_microcode, + .ring_init = amdgpu_vpe_ring_init, + .ring_start = vpe_v6_1_ring_start, + .ring_stop = vpe_v_6_1_ring_stop, + .ring_fini = amdgpu_vpe_ring_fini, +}; + +static const struct amdgpu_irq_src_funcs vpe_v6_1_trap_irq_funcs = { + .set = vpe_v6_1_set_trap_irq_state, + .process = vpe_v6_1_process_trap_irq, +}; + +void vpe_v6_1_set_funcs(struct amdgpu_vpe *vpe) +{ + vpe->funcs = &vpe_v6_1_funcs; + vpe->trap_irq.funcs = &vpe_v6_1_trap_irq_funcs; +} diff --git a/drivers/gpu/drm/amd/amdgpu/vpe_v6_1.h b/drivers/gpu/drm/amd/amdgpu/vpe_v6_1.h new file mode 100644 index 000000000000..a9bea7905a77 --- /dev/null +++ b/drivers/gpu/drm/amd/amdgpu/vpe_v6_1.h @@ -0,0 +1,29 @@ +/* + * Copyright 2022 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + */ +#ifndef __VPE_V6_1_H__ +#define __VPE_V6_1_H__ + +#include "amdgpu_vpe.h" + +void vpe_v6_1_set_funcs(struct amdgpu_vpe *vpe); + +#endif |