diff options
Diffstat (limited to 'drivers/gpu/drm/amd/amdgpu')
207 files changed, 8961 insertions, 5667 deletions
diff --git a/drivers/gpu/drm/amd/amdgpu/Makefile b/drivers/gpu/drm/amd/amdgpu/Makefile index 7fedbb725e17..2b454e7d7a76 100644 --- a/drivers/gpu/drm/amd/amdgpu/Makefile +++ b/drivers/gpu/drm/amd/amdgpu/Makefile @@ -46,19 +46,19 @@ amdgpu-y += amdgpu_device.o amdgpu_kms.o \ atom.o amdgpu_fence.o amdgpu_ttm.o amdgpu_object.o amdgpu_gart.o \ amdgpu_encoders.o amdgpu_display.o amdgpu_i2c.o \ amdgpu_gem.o amdgpu_ring.o \ - amdgpu_cs.o amdgpu_bios.o amdgpu_benchmark.o amdgpu_test.o \ + amdgpu_cs.o amdgpu_bios.o amdgpu_benchmark.o \ atombios_dp.o amdgpu_afmt.o amdgpu_trace_points.o \ atombios_encoders.o amdgpu_sa.o atombios_i2c.o \ - amdgpu_dma_buf.o amdgpu_vm.o amdgpu_ib.o amdgpu_pll.o \ + amdgpu_dma_buf.o amdgpu_vm.o amdgpu_vm_pt.o amdgpu_ib.o amdgpu_pll.o \ amdgpu_ucode.o amdgpu_bo_list.o amdgpu_ctx.o amdgpu_sync.o \ amdgpu_gtt_mgr.o amdgpu_preempt_mgr.o amdgpu_vram_mgr.o amdgpu_virt.o \ amdgpu_atomfirmware.o amdgpu_vf_error.o amdgpu_sched.o \ - amdgpu_debugfs.o amdgpu_ids.o amdgpu_gmc.o amdgpu_mmhub.o \ + amdgpu_debugfs.o amdgpu_ids.o amdgpu_gmc.o \ amdgpu_xgmi.o amdgpu_csa.o amdgpu_ras.o amdgpu_vm_cpu.o \ amdgpu_vm_sdma.o amdgpu_discovery.o amdgpu_ras_eeprom.o amdgpu_nbio.o \ amdgpu_umc.o smu_v11_0_i2c.o amdgpu_fru_eeprom.o amdgpu_rap.o \ - amdgpu_fw_attestation.o amdgpu_securedisplay.o amdgpu_hdp.o \ - amdgpu_eeprom.o amdgpu_mca.o + amdgpu_fw_attestation.o amdgpu_securedisplay.o \ + amdgpu_eeprom.o amdgpu_mca.o amdgpu_psp_ta.o amdgpu-$(CONFIG_PROC_FS) += amdgpu_fdinfo.o @@ -74,7 +74,7 @@ amdgpu-$(CONFIG_DRM_AMDGPU_SI)+= si.o gmc_v6_0.o gfx_v6_0.o si_ih.o si_dma.o dce amdgpu-y += \ vi.o mxgpu_vi.o nbio_v6_1.o soc15.o emu_soc.o mxgpu_ai.o nbio_v7_0.o vega10_reg_init.o \ vega20_reg_init.o nbio_v7_4.o nbio_v2_3.o nv.o arct_reg_init.o mxgpu_nv.o \ - nbio_v7_2.o hdp_v4_0.o hdp_v5_0.o aldebaran_reg_init.o aldebaran.o + nbio_v7_2.o hdp_v4_0.o hdp_v5_0.o aldebaran_reg_init.o aldebaran.o soc21.o # add DF block amdgpu-y += \ diff --git a/drivers/gpu/drm/amd/amdgpu/ObjectID.h b/drivers/gpu/drm/amd/amdgpu/ObjectID.h index 5b393622f592..a0f0a17e224f 100644 --- a/drivers/gpu/drm/amd/amdgpu/ObjectID.h +++ b/drivers/gpu/drm/amd/amdgpu/ObjectID.h @@ -119,6 +119,7 @@ #define CONNECTOR_OBJECT_ID_eDP 0x14 #define CONNECTOR_OBJECT_ID_MXM 0x15 #define CONNECTOR_OBJECT_ID_LVDS_eDP 0x16 +#define CONNECTOR_OBJECT_ID_USBC 0x17 /* deleted */ diff --git a/drivers/gpu/drm/amd/amdgpu/aldebaran.c b/drivers/gpu/drm/amd/amdgpu/aldebaran.c index bcfdb63b1d42..c6cc493a5486 100644 --- a/drivers/gpu/drm/amd/amdgpu/aldebaran.c +++ b/drivers/gpu/drm/amd/amdgpu/aldebaran.c @@ -31,6 +31,17 @@ #include "amdgpu_psp.h" #include "amdgpu_xgmi.h" +static bool aldebaran_is_mode2_default(struct amdgpu_reset_control *reset_ctl) +{ + struct amdgpu_device *adev = (struct amdgpu_device *)reset_ctl->handle; + + if ((adev->ip_versions[MP1_HWIP][0] == IP_VERSION(13, 0, 2) && + adev->gmc.xgmi.connected_to_cpu)) + return true; + + return false; +} + static struct amdgpu_reset_handler * aldebaran_get_reset_handler(struct amdgpu_reset_control *reset_ctl, struct amdgpu_reset_context *reset_context) @@ -48,7 +59,7 @@ aldebaran_get_reset_handler(struct amdgpu_reset_control *reset_ctl, } } - if (adev->gmc.xgmi.connected_to_cpu) { + if (aldebaran_is_mode2_default(reset_ctl)) { list_for_each_entry(handler, &reset_ctl->reset_handlers, handler_list) { if (handler->reset_method == AMD_RESET_METHOD_MODE2) { @@ -136,18 +147,31 @@ static int aldebaran_mode2_perform_reset(struct amdgpu_reset_control *reset_ctl, struct amdgpu_reset_context *reset_context) { - struct amdgpu_device *tmp_adev = NULL; struct amdgpu_device *adev = (struct amdgpu_device *)reset_ctl->handle; + struct amdgpu_device *tmp_adev = NULL; + struct list_head reset_device_list; int r = 0; dev_dbg(adev->dev, "aldebaran perform hw reset\n"); - if (reset_context->hive == NULL) { + if (adev->ip_versions[MP1_HWIP][0] == IP_VERSION(13, 0, 2) && + reset_context->hive == NULL) { /* Wrong context, return error */ return -EINVAL; } - list_for_each_entry(tmp_adev, &reset_context->hive->device_list, - gmc.xgmi.head) { + INIT_LIST_HEAD(&reset_device_list); + if (reset_context->hive) { + list_for_each_entry (tmp_adev, + &reset_context->hive->device_list, + gmc.xgmi.head) + list_add_tail(&tmp_adev->reset_list, + &reset_device_list); + } else { + list_add_tail(&reset_context->reset_req_dev->reset_list, + &reset_device_list); + } + + list_for_each_entry (tmp_adev, &reset_device_list, reset_list) { mutex_lock(&tmp_adev->reset_cntl->reset_lock); tmp_adev->reset_cntl->active_reset = AMD_RESET_METHOD_MODE2; } @@ -155,8 +179,7 @@ aldebaran_mode2_perform_reset(struct amdgpu_reset_control *reset_ctl, * Mode2 reset doesn't need any sync between nodes in XGMI hive, instead launch * them together so that they can be completed asynchronously on multiple nodes */ - list_for_each_entry(tmp_adev, &reset_context->hive->device_list, - gmc.xgmi.head) { + list_for_each_entry (tmp_adev, &reset_device_list, reset_list) { /* For XGMI run all resets in parallel to speed up the process */ if (tmp_adev->gmc.xgmi.num_physical_nodes > 1) { if (!queue_work(system_unbound_wq, @@ -174,9 +197,7 @@ aldebaran_mode2_perform_reset(struct amdgpu_reset_control *reset_ctl, /* For XGMI wait for all resets to complete before proceed */ if (!r) { - list_for_each_entry(tmp_adev, - &reset_context->hive->device_list, - gmc.xgmi.head) { + list_for_each_entry (tmp_adev, &reset_device_list, reset_list) { if (tmp_adev->gmc.xgmi.num_physical_nodes > 1) { flush_work(&tmp_adev->reset_cntl->reset_work); r = tmp_adev->asic_reset_res; @@ -186,8 +207,7 @@ aldebaran_mode2_perform_reset(struct amdgpu_reset_control *reset_ctl, } } - list_for_each_entry(tmp_adev, &reset_context->hive->device_list, - gmc.xgmi.head) { + list_for_each_entry (tmp_adev, &reset_device_list, reset_list) { mutex_unlock(&tmp_adev->reset_cntl->reset_lock); tmp_adev->reset_cntl->active_reset = AMD_RESET_METHOD_NONE; } @@ -260,7 +280,7 @@ static int aldebaran_mode2_restore_ip(struct amdgpu_device *adev) adev->gfx.rlc.funcs->resume(adev); /* Wait for FW reset event complete */ - r = smu_wait_for_event(adev, SMU_EVENT_RESET_COMPLETE, 0); + r = amdgpu_dpm_wait_for_event(adev, SMU_EVENT_RESET_COMPLETE, 0); if (r) { dev_err(adev->dev, "Failed to get response from firmware after reset\n"); @@ -319,16 +339,30 @@ static int aldebaran_mode2_restore_hwcontext(struct amdgpu_reset_control *reset_ctl, struct amdgpu_reset_context *reset_context) { - int r; struct amdgpu_device *tmp_adev = NULL; + struct list_head reset_device_list; + int r; - if (reset_context->hive == NULL) { + if (reset_context->reset_req_dev->ip_versions[MP1_HWIP][0] == + IP_VERSION(13, 0, 2) && + reset_context->hive == NULL) { /* Wrong context, return error */ return -EINVAL; } - list_for_each_entry(tmp_adev, &reset_context->hive->device_list, - gmc.xgmi.head) { + INIT_LIST_HEAD(&reset_device_list); + if (reset_context->hive) { + list_for_each_entry (tmp_adev, + &reset_context->hive->device_list, + gmc.xgmi.head) + list_add_tail(&tmp_adev->reset_list, + &reset_device_list); + } else { + list_add_tail(&reset_context->reset_req_dev->reset_list, + &reset_device_list); + } + + list_for_each_entry (tmp_adev, &reset_device_list, reset_list) { dev_info(tmp_adev->dev, "GPU reset succeeded, trying to resume\n"); r = aldebaran_mode2_restore_ip(tmp_adev); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h index 9a53a4de2bb7..d557f4db2565 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h @@ -60,7 +60,6 @@ #include <drm/amdgpu_drm.h> #include <drm/drm_gem.h> #include <drm/drm_ioctl.h> -#include <drm/gpu_scheduler.h> #include <kgd_kfd_interface.h> #include "dm_pp_interface.h" @@ -99,7 +98,6 @@ #include "amdgpu_gem.h" #include "amdgpu_doorbell.h" #include "amdgpu_amdkfd.h" -#include "amdgpu_smu.h" #include "amdgpu_discovery.h" #include "amdgpu_mes.h" #include "amdgpu_umc.h" @@ -109,6 +107,7 @@ #include "amdgpu_smuio.h" #include "amdgpu_fdinfo.h" #include "amdgpu_mca.h" +#include "amdgpu_ras.h" #define MAX_GPU_INSTANCE 16 @@ -155,8 +154,6 @@ extern int amdgpu_vis_vram_limit; extern int amdgpu_gart_size; extern int amdgpu_gtt_size; extern int amdgpu_moverate; -extern int amdgpu_benchmarking; -extern int amdgpu_testing; extern int amdgpu_audio; extern int amdgpu_disp_priority; extern int amdgpu_hw_i2c; @@ -182,7 +179,7 @@ extern int amdgpu_sched_jobs; extern int amdgpu_sched_hw_submission; extern uint amdgpu_pcie_gen_cap; extern uint amdgpu_pcie_lane_cap; -extern uint amdgpu_cg_mask; +extern u64 amdgpu_cg_mask; extern uint amdgpu_pg_mask; extern uint amdgpu_sdma_phase_quantum; extern char *amdgpu_disable_cu; @@ -197,7 +194,6 @@ extern int amdgpu_emu_mode; extern uint amdgpu_smu_memory_pool_size; extern int amdgpu_smu_pptable_id; extern uint amdgpu_dc_feature_mask; -extern uint amdgpu_freesync_vid_mode; extern uint amdgpu_dc_debug_mask; extern uint amdgpu_dm_abm_level; extern int amdgpu_backlight; @@ -214,6 +210,7 @@ extern int amdgpu_mes; extern int amdgpu_noretry; extern int amdgpu_force_asic_type; extern int amdgpu_smartshift_bias; +extern int amdgpu_use_xgmi_p2p; #ifdef CONFIG_HSA_AMD extern int sched_policy; extern bool debug_evictions; @@ -235,6 +232,9 @@ extern int amdgpu_cik_support; #endif extern int amdgpu_num_kcq; +#define AMDGPU_VCNFW_LOG_SIZE (32 * 1024) +extern int amdgpu_vcnfw_log; + #define AMDGPU_VM_MAX_NUM_CTX 4096 #define AMDGPU_SG_THRESHOLD (256*1024*1024) #define AMDGPU_DEFAULT_GTT_SIZE_MB 3072ULL /* 3GB by default */ @@ -276,9 +276,6 @@ extern int amdgpu_num_kcq; #define AMDGPU_SMARTSHIFT_MIN_BIAS (-100) struct amdgpu_device; -struct amdgpu_ib; -struct amdgpu_cs_parser; -struct amdgpu_job; struct amdgpu_irq_src; struct amdgpu_fpriv; struct amdgpu_bo_va_mapping; @@ -325,7 +322,7 @@ int amdgpu_device_ip_set_powergating_state(void *dev, enum amd_ip_block_type block_type, enum amd_powergating_state state); void amdgpu_device_ip_get_clockgating_state(struct amdgpu_device *adev, - u32 *flags); + u64 *flags); int amdgpu_device_ip_wait_for_idle(struct amdgpu_device *adev, enum amd_ip_block_type block_type); bool amdgpu_device_ip_is_idle(struct amdgpu_device *adev, @@ -373,7 +370,8 @@ int amdgpu_device_ip_block_add(struct amdgpu_device *adev, */ bool amdgpu_get_bios(struct amdgpu_device *adev); bool amdgpu_read_bios(struct amdgpu_device *adev); - +bool amdgpu_soc15_read_bios_from_rom(struct amdgpu_device *adev, + u8 *bios, u32 length_bytes); /* * Clocks */ @@ -466,20 +464,6 @@ struct amdgpu_flip_work { /* - * CP & rings. - */ - -struct amdgpu_ib { - struct amdgpu_sa_bo *sa_bo; - uint32_t length_dw; - uint64_t gpu_addr; - uint32_t *ptr; - uint32_t flags; -}; - -extern const struct drm_sched_backend_ops amdgpu_sched_ops; - -/* * file private structure */ @@ -494,79 +478,6 @@ struct amdgpu_fpriv { int amdgpu_file_to_fpriv(struct file *filp, struct amdgpu_fpriv **fpriv); -int amdgpu_ib_get(struct amdgpu_device *adev, struct amdgpu_vm *vm, - unsigned size, - enum amdgpu_ib_pool_type pool, - struct amdgpu_ib *ib); -void amdgpu_ib_free(struct amdgpu_device *adev, struct amdgpu_ib *ib, - struct dma_fence *f); -int amdgpu_ib_schedule(struct amdgpu_ring *ring, unsigned num_ibs, - struct amdgpu_ib *ibs, struct amdgpu_job *job, - struct dma_fence **f); -int amdgpu_ib_pool_init(struct amdgpu_device *adev); -void amdgpu_ib_pool_fini(struct amdgpu_device *adev); -int amdgpu_ib_ring_tests(struct amdgpu_device *adev); - -/* - * CS. - */ -struct amdgpu_cs_chunk { - uint32_t chunk_id; - uint32_t length_dw; - void *kdata; -}; - -struct amdgpu_cs_post_dep { - struct drm_syncobj *syncobj; - struct dma_fence_chain *chain; - u64 point; -}; - -struct amdgpu_cs_parser { - struct amdgpu_device *adev; - struct drm_file *filp; - struct amdgpu_ctx *ctx; - - /* chunks */ - unsigned nchunks; - struct amdgpu_cs_chunk *chunks; - - /* scheduler job object */ - struct amdgpu_job *job; - struct drm_sched_entity *entity; - - /* buffer objects */ - struct ww_acquire_ctx ticket; - struct amdgpu_bo_list *bo_list; - struct amdgpu_mn *mn; - struct amdgpu_bo_list_entry vm_pd; - struct list_head validated; - struct dma_fence *fence; - uint64_t bytes_moved_threshold; - uint64_t bytes_moved_vis_threshold; - uint64_t bytes_moved; - uint64_t bytes_moved_vis; - - /* user fence */ - struct amdgpu_bo_list_entry uf_entry; - - unsigned num_post_deps; - struct amdgpu_cs_post_dep *post_deps; -}; - -static inline u32 amdgpu_get_ib_value(struct amdgpu_cs_parser *p, - uint32_t ib_idx, int idx) -{ - return p->job->ibs[ib_idx].ptr[idx]; -} - -static inline void amdgpu_set_ib_value(struct amdgpu_cs_parser *p, - uint32_t ib_idx, int idx, - uint32_t value) -{ - p->job->ibs[ib_idx].ptr[idx] = value; -} - /* * Writeback */ @@ -586,13 +497,7 @@ void amdgpu_device_wb_free(struct amdgpu_device *adev, u32 wb); /* * Benchmarking */ -void amdgpu_benchmark(struct amdgpu_device *adev, int test_number); - - -/* - * Testing - */ -void amdgpu_test_moves(struct amdgpu_device *adev); +int amdgpu_benchmark(struct amdgpu_device *adev, int test_number); /* * ASIC specific register table accessible by UMD @@ -761,16 +666,21 @@ enum amd_hw_ip_block_type { MAX_HWIP }; -#define HWIP_MAX_INSTANCE 10 +#define HWIP_MAX_INSTANCE 11 #define HW_ID_MAX 300 #define IP_VERSION(mj, mn, rv) (((mj) << 16) | ((mn) << 8) | (rv)) +#define IP_VERSION_MAJ(ver) ((ver) >> 16) +#define IP_VERSION_MIN(ver) (((ver) >> 8) & 0xFF) +#define IP_VERSION_REV(ver) ((ver) & 0xFF) struct amd_powerplay { void *pp_handle; const struct amd_pm_funcs *pp_funcs; }; +struct ip_discovery_top; + /* polaris10 kickers */ #define ASICID_IS_P20(did, rid) (((did == 0x67DF) && \ ((rid == 0xE3) || \ @@ -813,6 +723,8 @@ struct amd_powerplay { #define AMDGPU_RESET_MAGIC_NUM 64 #define AMDGPU_MAX_DF_PERFMONS 4 #define AMDGPU_PRODUCT_NAME_LEN 64 +struct amdgpu_reset_domain; + struct amdgpu_device { struct device *dev; struct pci_dev *pdev; @@ -950,14 +862,8 @@ struct amdgpu_device { /* powerplay */ struct amd_powerplay powerplay; - bool pp_force_state_enabled; - - /* smu */ - struct smu_context smu; - - /* dpm */ struct amdgpu_pm pm; - u32 cg_flags; + u64 cg_flags; u32 pg_flags; /* nbio */ @@ -1054,9 +960,7 @@ struct amdgpu_device { bool in_s4; bool in_s0ix; - atomic_t in_gpu_reset; enum pp_mp1_state mp1_state; - struct rw_semaphore reset_sem; struct amdgpu_doorbell_index doorbell_index; struct mutex notifier_lock; @@ -1100,6 +1004,18 @@ struct amdgpu_device { uint32_t ip_versions[MAX_HWIP][HWIP_MAX_INSTANCE]; bool ram_is_direct_mapped; + + struct list_head ras_list; + + struct ip_discovery_top *ip_top; + + struct amdgpu_reset_domain *reset_domain; + + struct mutex benchmark_mutex; + + /* reset dump register */ + uint32_t *reset_dump_reg_list; + int num_regs; }; static inline struct amdgpu_device *drm_to_adev(struct drm_device *ddev) @@ -1293,9 +1209,12 @@ bool amdgpu_device_has_job_running(struct amdgpu_device *adev); bool amdgpu_device_should_recover_gpu(struct amdgpu_device *adev); int amdgpu_device_gpu_recover(struct amdgpu_device *adev, struct amdgpu_job* job); +int amdgpu_device_gpu_recover_imp(struct amdgpu_device *adev, + struct amdgpu_job *job); void amdgpu_device_pci_config_reset(struct amdgpu_device *adev); int amdgpu_device_pci_reset(struct amdgpu_device *adev); bool amdgpu_device_need_post(struct amdgpu_device *adev); +bool amdgpu_device_should_use_aspm(struct amdgpu_device *adev); void amdgpu_cs_report_moved_bytes(struct amdgpu_device *adev, u64 num_bytes, u64 num_vis_bytes); @@ -1321,6 +1240,10 @@ void amdgpu_device_invalidate_hdp(struct amdgpu_device *adev, struct amdgpu_ring *ring); void amdgpu_device_halt(struct amdgpu_device *adev); +u32 amdgpu_device_pcie_port_rreg(struct amdgpu_device *adev, + u32 reg); +void amdgpu_device_pcie_port_wreg(struct amdgpu_device *adev, + u32 reg, u32 v); /* atpx handler */ #if defined(CONFIG_VGA_SWITCHEROO) @@ -1428,10 +1351,6 @@ static inline bool amdgpu_acpi_is_s0ix_active(struct amdgpu_device *adev) { retu static inline bool amdgpu_acpi_is_s3_active(struct amdgpu_device *adev) { return false; } #endif -int amdgpu_cs_find_mapping(struct amdgpu_cs_parser *parser, - uint64_t addr, struct amdgpu_bo **bo, - struct amdgpu_bo_va_mapping **mapping); - #if defined(CONFIG_DRM_AMD_DC) int amdgpu_dm_display_resume(struct amdgpu_device *adev ); #else @@ -1458,6 +1377,15 @@ int amdgpu_device_set_cg_state(struct amdgpu_device *adev, int amdgpu_device_set_pg_state(struct amdgpu_device *adev, enum amd_powergating_state state); +static inline bool amdgpu_device_has_timeouts_enabled(struct amdgpu_device *adev) +{ + return amdgpu_gpu_recovery != 0 && + adev->gfx_timeout != MAX_SCHEDULE_TIMEOUT && + adev->compute_timeout != MAX_SCHEDULE_TIMEOUT && + adev->sdma_timeout != MAX_SCHEDULE_TIMEOUT && + adev->video_timeout != MAX_SCHEDULE_TIMEOUT; +} + #include "amdgpu_object.h" static inline bool amdgpu_is_tmz(struct amdgpu_device *adev) @@ -1465,8 +1393,6 @@ static inline bool amdgpu_is_tmz(struct amdgpu_device *adev) return adev->gmc.tmz_enabled; } -static inline int amdgpu_in_reset(struct amdgpu_device *adev) -{ - return atomic_read(&adev->in_gpu_reset); -} +int amdgpu_in_reset(struct amdgpu_device *adev); + #endif diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c index 6ca1db3c243f..64c6664b34e8 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c @@ -724,3 +724,11 @@ void amdgpu_amdkfd_ras_poison_consumption_handler(struct amdgpu_device *adev, bo else if (reset) amdgpu_amdkfd_gpu_reset(adev); } + +bool amdgpu_amdkfd_ras_query_utcl2_poison_status(struct amdgpu_device *adev) +{ + if (adev->gfx.ras && adev->gfx.ras->query_utcl2_poison_status) + return adev->gfx.ras->query_utcl2_poison_status(adev); + else + return false; +} diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h index ac841ae8f5cc..f8b9f27adcf5 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h @@ -131,6 +131,7 @@ struct amdkfd_process_info { atomic_t evicted_bos; struct delayed_work restore_userptr_work; struct pid *pid; + bool block_mmu_notifications; }; int amdgpu_amdkfd_init(void); @@ -268,13 +269,12 @@ uint64_t amdgpu_amdkfd_gpuvm_get_process_page_dir(void *drm_priv); int amdgpu_amdkfd_gpuvm_alloc_memory_of_gpu( struct amdgpu_device *adev, uint64_t va, uint64_t size, void *drm_priv, struct kgd_mem **mem, - uint64_t *offset, uint32_t flags); + uint64_t *offset, uint32_t flags, bool criu_resume); int amdgpu_amdkfd_gpuvm_free_memory_of_gpu( struct amdgpu_device *adev, struct kgd_mem *mem, void *drm_priv, uint64_t *size); -int amdgpu_amdkfd_gpuvm_map_memory_to_gpu( - struct amdgpu_device *adev, struct kgd_mem *mem, void *drm_priv, - bool *table_freed); +int amdgpu_amdkfd_gpuvm_map_memory_to_gpu(struct amdgpu_device *adev, + struct kgd_mem *mem, void *drm_priv); int amdgpu_amdkfd_gpuvm_unmap_memory_from_gpu( struct amdgpu_device *adev, struct kgd_mem *mem, void *drm_priv); int amdgpu_amdkfd_gpuvm_sync_memory( @@ -297,6 +297,11 @@ int amdgpu_amdkfd_get_tile_config(struct amdgpu_device *adev, struct tile_config *config); void amdgpu_amdkfd_ras_poison_consumption_handler(struct amdgpu_device *adev, bool reset); +bool amdgpu_amdkfd_bo_mapped_to_dev(struct amdgpu_device *adev, struct kgd_mem *mem); +void amdgpu_amdkfd_block_mmu_notifications(void *p); +int amdgpu_amdkfd_criu_resume(void *p); +bool amdgpu_amdkfd_ras_query_utcl2_poison_status(struct amdgpu_device *adev); + #if IS_ENABLED(CONFIG_HSA_AMD) void amdgpu_amdkfd_gpuvm_init_mem_limits(void); void amdgpu_amdkfd_gpuvm_destroy_cb(struct amdgpu_device *adev, diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_aldebaran.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_aldebaran.c index 46cd4ee6bafb..c8935d718207 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_aldebaran.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_aldebaran.c @@ -37,10 +37,7 @@ const struct kfd2kgd_calls aldebaran_kfd2kgd = { .hqd_sdma_is_occupied = kgd_arcturus_hqd_sdma_is_occupied, .hqd_destroy = kgd_gfx_v9_hqd_destroy, .hqd_sdma_destroy = kgd_arcturus_hqd_sdma_destroy, - .address_watch_disable = kgd_gfx_v9_address_watch_disable, - .address_watch_execute = kgd_gfx_v9_address_watch_execute, .wave_control_execute = kgd_gfx_v9_wave_control_execute, - .address_watch_get_offset = kgd_gfx_v9_address_watch_get_offset, .get_atc_vmid_pasid_mapping_info = kgd_gfx_v9_get_atc_vmid_pasid_mapping_info, .set_vm_context_page_table_base = kgd_gfx_v9_set_vm_context_page_table_base, diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_arcturus.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_arcturus.c index abe93b3ff765..4191af5a3f13 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_arcturus.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_arcturus.c @@ -289,10 +289,7 @@ const struct kfd2kgd_calls arcturus_kfd2kgd = { .hqd_sdma_is_occupied = kgd_arcturus_hqd_sdma_is_occupied, .hqd_destroy = kgd_gfx_v9_hqd_destroy, .hqd_sdma_destroy = kgd_arcturus_hqd_sdma_destroy, - .address_watch_disable = kgd_gfx_v9_address_watch_disable, - .address_watch_execute = kgd_gfx_v9_address_watch_execute, .wave_control_execute = kgd_gfx_v9_wave_control_execute, - .address_watch_get_offset = kgd_gfx_v9_address_watch_get_offset, .get_atc_vmid_pasid_mapping_info = kgd_gfx_v9_get_atc_vmid_pasid_mapping_info, .set_vm_context_page_table_base = diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10.c index 7b7f4b2764c1..9378fc79e9ea 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10.c @@ -671,20 +671,6 @@ static bool get_atc_vmid_pasid_mapping_info(struct amdgpu_device *adev, return !!(value & ATC_VMID0_PASID_MAPPING__VALID_MASK); } -static int kgd_address_watch_disable(struct amdgpu_device *adev) -{ - return 0; -} - -static int kgd_address_watch_execute(struct amdgpu_device *adev, - unsigned int watch_point_id, - uint32_t cntl_val, - uint32_t addr_hi, - uint32_t addr_lo) -{ - return 0; -} - static int kgd_wave_control_execute(struct amdgpu_device *adev, uint32_t gfx_index_val, uint32_t sq_cmd) @@ -709,13 +695,6 @@ static int kgd_wave_control_execute(struct amdgpu_device *adev, return 0; } -static uint32_t kgd_address_watch_get_offset(struct amdgpu_device *adev, - unsigned int watch_point_id, - unsigned int reg_offset) -{ - return 0; -} - static void set_vm_context_page_table_base(struct amdgpu_device *adev, uint32_t vmid, uint64_t page_table_base) { @@ -767,10 +746,7 @@ const struct kfd2kgd_calls gfx_v10_kfd2kgd = { .hqd_sdma_is_occupied = kgd_hqd_sdma_is_occupied, .hqd_destroy = kgd_hqd_destroy, .hqd_sdma_destroy = kgd_hqd_sdma_destroy, - .address_watch_disable = kgd_address_watch_disable, - .address_watch_execute = kgd_address_watch_execute, .wave_control_execute = kgd_wave_control_execute, - .address_watch_get_offset = kgd_address_watch_get_offset, .get_atc_vmid_pasid_mapping_info = get_atc_vmid_pasid_mapping_info, .set_vm_context_page_table_base = set_vm_context_page_table_base, diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10_3.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10_3.c index 1f37d3574001..ba21ec6b35e0 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10_3.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10_3.c @@ -26,6 +26,8 @@ #include "gc/gc_10_3_0_sh_mask.h" #include "oss/osssys_5_0_0_offset.h" #include "oss/osssys_5_0_0_sh_mask.h" +#include "athub/athub_2_1_0_offset.h" +#include "athub/athub_2_1_0_sh_mask.h" #include "soc15_common.h" #include "v10_structs.h" #include "nv.h" @@ -582,21 +584,6 @@ static int hqd_sdma_destroy_v10_3(struct amdgpu_device *adev, void *mqd, return 0; } - -static int address_watch_disable_v10_3(struct amdgpu_device *adev) -{ - return 0; -} - -static int address_watch_execute_v10_3(struct amdgpu_device *adev, - unsigned int watch_point_id, - uint32_t cntl_val, - uint32_t addr_hi, - uint32_t addr_lo) -{ - return 0; -} - static int wave_control_execute_v10_3(struct amdgpu_device *adev, uint32_t gfx_index_val, uint32_t sq_cmd) @@ -621,11 +608,16 @@ static int wave_control_execute_v10_3(struct amdgpu_device *adev, return 0; } -static uint32_t address_watch_get_offset_v10_3(struct amdgpu_device *adev, - unsigned int watch_point_id, - unsigned int reg_offset) +static bool get_atc_vmid_pasid_mapping_info_v10_3(struct amdgpu_device *adev, + uint8_t vmid, uint16_t *p_pasid) { - return 0; + uint32_t value; + + value = RREG32(SOC15_REG_OFFSET(ATHUB, 0, mmATC_VMID0_PASID_MAPPING) + + vmid); + *p_pasid = value & ATC_VMID0_PASID_MAPPING__PASID_MASK; + + return !!(value & ATC_VMID0_PASID_MAPPING__VALID_MASK); } static void set_vm_context_page_table_base_v10_3(struct amdgpu_device *adev, @@ -809,11 +801,8 @@ const struct kfd2kgd_calls gfx_v10_3_kfd2kgd = { .hqd_sdma_is_occupied = hqd_sdma_is_occupied_v10_3, .hqd_destroy = hqd_destroy_v10_3, .hqd_sdma_destroy = hqd_sdma_destroy_v10_3, - .address_watch_disable = address_watch_disable_v10_3, - .address_watch_execute = address_watch_execute_v10_3, .wave_control_execute = wave_control_execute_v10_3, - .address_watch_get_offset = address_watch_get_offset_v10_3, - .get_atc_vmid_pasid_mapping_info = NULL, + .get_atc_vmid_pasid_mapping_info = get_atc_vmid_pasid_mapping_info_v10_3, .set_vm_context_page_table_base = set_vm_context_page_table_base_v10_3, .program_trap_handler_settings = program_trap_handler_settings_v10_3, #if 0 diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v7.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v7.c index 36528dad7684..65552bb7d2f2 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v7.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v7.c @@ -45,43 +45,6 @@ enum { MAX_WATCH_ADDRESSES = 4 }; -enum { - ADDRESS_WATCH_REG_ADDR_HI = 0, - ADDRESS_WATCH_REG_ADDR_LO, - ADDRESS_WATCH_REG_CNTL, - ADDRESS_WATCH_REG_MAX -}; - -/* not defined in the CI/KV reg file */ -enum { - ADDRESS_WATCH_REG_CNTL_ATC_BIT = 0x10000000UL, - ADDRESS_WATCH_REG_CNTL_DEFAULT_MASK = 0x00FFFFFF, - ADDRESS_WATCH_REG_ADDLOW_MASK_EXTENSION = 0x03000000, - /* extend the mask to 26 bits to match the low address field */ - ADDRESS_WATCH_REG_ADDLOW_SHIFT = 6, - ADDRESS_WATCH_REG_ADDHIGH_MASK = 0xFFFF -}; - -static const uint32_t watchRegs[MAX_WATCH_ADDRESSES * ADDRESS_WATCH_REG_MAX] = { - mmTCP_WATCH0_ADDR_H, mmTCP_WATCH0_ADDR_L, mmTCP_WATCH0_CNTL, - mmTCP_WATCH1_ADDR_H, mmTCP_WATCH1_ADDR_L, mmTCP_WATCH1_CNTL, - mmTCP_WATCH2_ADDR_H, mmTCP_WATCH2_ADDR_L, mmTCP_WATCH2_CNTL, - mmTCP_WATCH3_ADDR_H, mmTCP_WATCH3_ADDR_L, mmTCP_WATCH3_CNTL -}; - -union TCP_WATCH_CNTL_BITS { - struct { - uint32_t mask:24; - uint32_t vmid:4; - uint32_t atc:1; - uint32_t mode:2; - uint32_t valid:1; - } bitfields, bits; - uint32_t u32All; - signed int i32All; - float f32All; -}; - static void lock_srbm(struct amdgpu_device *adev, uint32_t mec, uint32_t pipe, uint32_t queue, uint32_t vmid) { @@ -529,55 +492,6 @@ static int kgd_hqd_sdma_destroy(struct amdgpu_device *adev, void *mqd, return 0; } -static int kgd_address_watch_disable(struct amdgpu_device *adev) -{ - union TCP_WATCH_CNTL_BITS cntl; - unsigned int i; - - cntl.u32All = 0; - - cntl.bitfields.valid = 0; - cntl.bitfields.mask = ADDRESS_WATCH_REG_CNTL_DEFAULT_MASK; - cntl.bitfields.atc = 1; - - /* Turning off this address until we set all the registers */ - for (i = 0; i < MAX_WATCH_ADDRESSES; i++) - WREG32(watchRegs[i * ADDRESS_WATCH_REG_MAX + - ADDRESS_WATCH_REG_CNTL], cntl.u32All); - - return 0; -} - -static int kgd_address_watch_execute(struct amdgpu_device *adev, - unsigned int watch_point_id, - uint32_t cntl_val, - uint32_t addr_hi, - uint32_t addr_lo) -{ - union TCP_WATCH_CNTL_BITS cntl; - - cntl.u32All = cntl_val; - - /* Turning off this watch point until we set all the registers */ - cntl.bitfields.valid = 0; - WREG32(watchRegs[watch_point_id * ADDRESS_WATCH_REG_MAX + - ADDRESS_WATCH_REG_CNTL], cntl.u32All); - - WREG32(watchRegs[watch_point_id * ADDRESS_WATCH_REG_MAX + - ADDRESS_WATCH_REG_ADDR_HI], addr_hi); - - WREG32(watchRegs[watch_point_id * ADDRESS_WATCH_REG_MAX + - ADDRESS_WATCH_REG_ADDR_LO], addr_lo); - - /* Enable the watch point */ - cntl.bitfields.valid = 1; - - WREG32(watchRegs[watch_point_id * ADDRESS_WATCH_REG_MAX + - ADDRESS_WATCH_REG_CNTL], cntl.u32All); - - return 0; -} - static int kgd_wave_control_execute(struct amdgpu_device *adev, uint32_t gfx_index_val, uint32_t sq_cmd) @@ -602,13 +516,6 @@ static int kgd_wave_control_execute(struct amdgpu_device *adev, return 0; } -static uint32_t kgd_address_watch_get_offset(struct amdgpu_device *adev, - unsigned int watch_point_id, - unsigned int reg_offset) -{ - return watchRegs[watch_point_id * ADDRESS_WATCH_REG_MAX + reg_offset]; -} - static bool get_atc_vmid_pasid_mapping_info(struct amdgpu_device *adev, uint8_t vmid, uint16_t *p_pasid) { @@ -665,10 +572,7 @@ const struct kfd2kgd_calls gfx_v7_kfd2kgd = { .hqd_sdma_is_occupied = kgd_hqd_sdma_is_occupied, .hqd_destroy = kgd_hqd_destroy, .hqd_sdma_destroy = kgd_hqd_sdma_destroy, - .address_watch_disable = kgd_address_watch_disable, - .address_watch_execute = kgd_address_watch_execute, .wave_control_execute = kgd_wave_control_execute, - .address_watch_get_offset = kgd_address_watch_get_offset, .get_atc_vmid_pasid_mapping_info = get_atc_vmid_pasid_mapping_info, .set_scratch_backing_va = set_scratch_backing_va, .set_vm_context_page_table_base = set_vm_context_page_table_base, diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v8.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v8.c index 52832cd69a93..9dc5f2a0cc07 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v8.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v8.c @@ -538,20 +538,6 @@ static bool get_atc_vmid_pasid_mapping_info(struct amdgpu_device *adev, return !!(value & ATC_VMID0_PASID_MAPPING__VALID_MASK); } -static int kgd_address_watch_disable(struct amdgpu_device *adev) -{ - return 0; -} - -static int kgd_address_watch_execute(struct amdgpu_device *adev, - unsigned int watch_point_id, - uint32_t cntl_val, - uint32_t addr_hi, - uint32_t addr_lo) -{ - return 0; -} - static int kgd_wave_control_execute(struct amdgpu_device *adev, uint32_t gfx_index_val, uint32_t sq_cmd) @@ -576,13 +562,6 @@ static int kgd_wave_control_execute(struct amdgpu_device *adev, return 0; } -static uint32_t kgd_address_watch_get_offset(struct amdgpu_device *adev, - unsigned int watch_point_id, - unsigned int reg_offset) -{ - return 0; -} - static void set_scratch_backing_va(struct amdgpu_device *adev, uint64_t va, uint32_t vmid) { @@ -614,10 +593,7 @@ const struct kfd2kgd_calls gfx_v8_kfd2kgd = { .hqd_sdma_is_occupied = kgd_hqd_sdma_is_occupied, .hqd_destroy = kgd_hqd_destroy, .hqd_sdma_destroy = kgd_hqd_sdma_destroy, - .address_watch_disable = kgd_address_watch_disable, - .address_watch_execute = kgd_address_watch_execute, .wave_control_execute = kgd_wave_control_execute, - .address_watch_get_offset = kgd_address_watch_get_offset, .get_atc_vmid_pasid_mapping_info = get_atc_vmid_pasid_mapping_info, .set_scratch_backing_va = set_scratch_backing_va, diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c index 1abf662a0e91..81e3b528bbc9 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c @@ -622,20 +622,6 @@ bool kgd_gfx_v9_get_atc_vmid_pasid_mapping_info(struct amdgpu_device *adev, return !!(value & ATC_VMID0_PASID_MAPPING__VALID_MASK); } -int kgd_gfx_v9_address_watch_disable(struct amdgpu_device *adev) -{ - return 0; -} - -int kgd_gfx_v9_address_watch_execute(struct amdgpu_device *adev, - unsigned int watch_point_id, - uint32_t cntl_val, - uint32_t addr_hi, - uint32_t addr_lo) -{ - return 0; -} - int kgd_gfx_v9_wave_control_execute(struct amdgpu_device *adev, uint32_t gfx_index_val, uint32_t sq_cmd) @@ -660,13 +646,6 @@ int kgd_gfx_v9_wave_control_execute(struct amdgpu_device *adev, return 0; } -uint32_t kgd_gfx_v9_address_watch_get_offset(struct amdgpu_device *adev, - unsigned int watch_point_id, - unsigned int reg_offset) -{ - return 0; -} - void kgd_gfx_v9_set_vm_context_page_table_base(struct amdgpu_device *adev, uint32_t vmid, uint64_t page_table_base) { @@ -736,7 +715,7 @@ static void get_wave_count(struct amdgpu_device *adev, int queue_idx, * process whose pasid is provided as a parameter. The process could have ZERO * or more queues running and submitting waves to compute units. * - * @kgd: Handle of device from which to get number of waves in flight + * @adev: Handle of device from which to get number of waves in flight * @pasid: Identifies the process for which this query call is invoked * @pasid_wave_cnt: Output parameter updated with number of waves in flight that * belong to process with given pasid @@ -745,7 +724,7 @@ static void get_wave_count(struct amdgpu_device *adev, int queue_idx, * * Note: It's possible that the device has too many queues (oversubscription) * in which case a VMID could be remapped to a different PASID. This could lead - * to an iaccurate wave count. Following is a high-level sequence: + * to an inaccurate wave count. Following is a high-level sequence: * Time T1: vmid = getVmid(); vmid is associated with Pasid P1 * Time T2: passId = getPasId(vmid); vmid is associated with Pasid P2 * In the sequence above wave count obtained from time T1 will be incorrectly @@ -888,10 +867,7 @@ const struct kfd2kgd_calls gfx_v9_kfd2kgd = { .hqd_sdma_is_occupied = kgd_hqd_sdma_is_occupied, .hqd_destroy = kgd_gfx_v9_hqd_destroy, .hqd_sdma_destroy = kgd_hqd_sdma_destroy, - .address_watch_disable = kgd_gfx_v9_address_watch_disable, - .address_watch_execute = kgd_gfx_v9_address_watch_execute, .wave_control_execute = kgd_gfx_v9_wave_control_execute, - .address_watch_get_offset = kgd_gfx_v9_address_watch_get_offset, .get_atc_vmid_pasid_mapping_info = kgd_gfx_v9_get_atc_vmid_pasid_mapping_info, .set_vm_context_page_table_base = kgd_gfx_v9_set_vm_context_page_table_base, diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.h index 24be49df26fd..c7ed3bc9053c 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.h @@ -46,19 +46,9 @@ int kgd_gfx_v9_hqd_destroy(struct amdgpu_device *adev, void *mqd, enum kfd_preempt_type reset_type, unsigned int utimeout, uint32_t pipe_id, uint32_t queue_id); -int kgd_gfx_v9_address_watch_disable(struct amdgpu_device *adev); -int kgd_gfx_v9_address_watch_execute(struct amdgpu_device *adev, - unsigned int watch_point_id, - uint32_t cntl_val, - uint32_t addr_hi, - uint32_t addr_lo); int kgd_gfx_v9_wave_control_execute(struct amdgpu_device *adev, uint32_t gfx_index_val, uint32_t sq_cmd); -uint32_t kgd_gfx_v9_address_watch_get_offset(struct amdgpu_device *adev, - unsigned int watch_point_id, - unsigned int reg_offset); - bool kgd_gfx_v9_get_atc_vmid_pasid_mapping_info(struct amdgpu_device *adev, uint8_t vmid, uint16_t *p_pasid); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c index f9bab963a948..80b6b8e432fd 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c @@ -121,7 +121,7 @@ static size_t amdgpu_amdkfd_acc_size(uint64_t size) } /** - * @amdgpu_amdkfd_reserve_mem_limit() - Decrease available memory by size + * amdgpu_amdkfd_reserve_mem_limit() - Decrease available memory by size * of buffer including any reserved for control structures * * @adev: Device to which allocated BO belongs to @@ -253,53 +253,18 @@ void amdgpu_amdkfd_release_notify(struct amdgpu_bo *bo) static int amdgpu_amdkfd_remove_eviction_fence(struct amdgpu_bo *bo, struct amdgpu_amdkfd_fence *ef) { - struct dma_resv *resv = bo->tbo.base.resv; - struct dma_resv_list *old, *new; - unsigned int i, j, k; + struct dma_fence *replacement; if (!ef) return -EINVAL; - old = dma_resv_shared_list(resv); - if (!old) - return 0; - - new = kmalloc(struct_size(new, shared, old->shared_max), GFP_KERNEL); - if (!new) - return -ENOMEM; - - /* Go through all the shared fences in the resevation object and sort - * the interesting ones to the end of the list. + /* TODO: Instead of block before we should use the fence of the page + * table update and TLB flush here directly. */ - for (i = 0, j = old->shared_count, k = 0; i < old->shared_count; ++i) { - struct dma_fence *f; - - f = rcu_dereference_protected(old->shared[i], - dma_resv_held(resv)); - - if (f->context == ef->base.context) - RCU_INIT_POINTER(new->shared[--j], f); - else - RCU_INIT_POINTER(new->shared[k++], f); - } - new->shared_max = old->shared_max; - new->shared_count = k; - - /* Install the new fence list, seqcount provides the barriers */ - write_seqcount_begin(&resv->seq); - RCU_INIT_POINTER(resv->fence, new); - write_seqcount_end(&resv->seq); - - /* Drop the references to the removed fences or move them to ef_list */ - for (i = j; i < old->shared_count; ++i) { - struct dma_fence *f; - - f = rcu_dereference_protected(new->shared[i], - dma_resv_held(resv)); - dma_fence_put(f); - } - kfree_rcu(old, rcu); - + replacement = dma_fence_get_stub(); + dma_resv_replace_fences(bo->tbo.base.resv, ef->base.context, + replacement, DMA_RESV_USAGE_READ); + dma_fence_put(replacement); return 0; } @@ -778,7 +743,7 @@ unwind: continue; if (attachment[i]->bo_va) { amdgpu_bo_reserve(bo[i], true); - amdgpu_vm_bo_rmv(adev, attachment[i]->bo_va); + amdgpu_vm_bo_del(adev, attachment[i]->bo_va); amdgpu_bo_unreserve(bo[i]); list_del(&attachment[i]->list); } @@ -795,7 +760,7 @@ static void kfd_mem_detach(struct kfd_mem_attachment *attachment) pr_debug("\t remove VA 0x%llx in entry %p\n", attachment->va, attachment); - amdgpu_vm_bo_rmv(attachment->adev, attachment->bo_va); + amdgpu_vm_bo_del(attachment->adev, attachment->bo_va); drm_gem_object_put(&bo->tbo.base); list_del(&attachment->list); kfree(attachment); @@ -842,7 +807,8 @@ static void remove_kgd_mem_from_kfd_bo_list(struct kgd_mem *mem, * * Returns 0 for success, negative errno for errors. */ -static int init_user_pages(struct kgd_mem *mem, uint64_t user_addr) +static int init_user_pages(struct kgd_mem *mem, uint64_t user_addr, + bool criu_resume) { struct amdkfd_process_info *process_info = mem->process_info; struct amdgpu_bo *bo = mem->bo; @@ -864,6 +830,18 @@ static int init_user_pages(struct kgd_mem *mem, uint64_t user_addr) goto out; } + if (criu_resume) { + /* + * During a CRIU restore operation, the userptr buffer objects + * will be validated in the restore_userptr_work worker at a + * later stage when it is scheduled by another ioctl called by + * CRIU master process for the target pid for restore. + */ + atomic_inc(&mem->invalid); + mutex_unlock(&process_info->lock); + return 0; + } + ret = amdgpu_ttm_tt_get_user_pages(bo, bo->tbo.ttm->pages); if (ret) { pr_err("%s: Failed to get user pages: %d\n", __func__, ret); @@ -1080,8 +1058,7 @@ static void unmap_bo_from_gpuvm(struct kgd_mem *mem, static int update_gpuvm_pte(struct kgd_mem *mem, struct kfd_mem_attachment *entry, - struct amdgpu_sync *sync, - bool *table_freed) + struct amdgpu_sync *sync) { struct amdgpu_bo_va *bo_va = entry->bo_va; struct amdgpu_device *adev = entry->adev; @@ -1092,7 +1069,7 @@ static int update_gpuvm_pte(struct kgd_mem *mem, return ret; /* Update the page tables */ - ret = amdgpu_vm_bo_update(adev, bo_va, false, table_freed); + ret = amdgpu_vm_bo_update(adev, bo_va, false); if (ret) { pr_err("amdgpu_vm_bo_update failed\n"); return ret; @@ -1104,8 +1081,7 @@ static int update_gpuvm_pte(struct kgd_mem *mem, static int map_bo_to_gpuvm(struct kgd_mem *mem, struct kfd_mem_attachment *entry, struct amdgpu_sync *sync, - bool no_update_pte, - bool *table_freed) + bool no_update_pte) { int ret; @@ -1122,7 +1098,7 @@ static int map_bo_to_gpuvm(struct kgd_mem *mem, if (no_update_pte) return 0; - ret = update_gpuvm_pte(mem, entry, sync, table_freed); + ret = update_gpuvm_pte(mem, entry, sync); if (ret) { pr_err("update_gpuvm_pte() failed\n"); goto update_gpuvm_pte_failed; @@ -1255,7 +1231,7 @@ static int init_kfd_vm(struct amdgpu_vm *vm, void **process_info, AMDGPU_FENCE_OWNER_KFD, false); if (ret) goto wait_pd_fail; - ret = dma_resv_reserve_shared(vm->root.bo->tbo.base.resv, 1); + ret = dma_resv_reserve_fences(vm->root.bo->tbo.base.resv, 1); if (ret) goto reserve_shared_fail; amdgpu_bo_fence(vm->root.bo, @@ -1452,10 +1428,39 @@ uint64_t amdgpu_amdkfd_gpuvm_get_process_page_dir(void *drm_priv) return avm->pd_phys_addr; } +void amdgpu_amdkfd_block_mmu_notifications(void *p) +{ + struct amdkfd_process_info *pinfo = (struct amdkfd_process_info *)p; + + mutex_lock(&pinfo->lock); + WRITE_ONCE(pinfo->block_mmu_notifications, true); + mutex_unlock(&pinfo->lock); +} + +int amdgpu_amdkfd_criu_resume(void *p) +{ + int ret = 0; + struct amdkfd_process_info *pinfo = (struct amdkfd_process_info *)p; + + mutex_lock(&pinfo->lock); + pr_debug("scheduling work\n"); + atomic_inc(&pinfo->evicted_bos); + if (!READ_ONCE(pinfo->block_mmu_notifications)) { + ret = -EINVAL; + goto out_unlock; + } + WRITE_ONCE(pinfo->block_mmu_notifications, false); + schedule_delayed_work(&pinfo->restore_userptr_work, 0); + +out_unlock: + mutex_unlock(&pinfo->lock); + return ret; +} + int amdgpu_amdkfd_gpuvm_alloc_memory_of_gpu( struct amdgpu_device *adev, uint64_t va, uint64_t size, void *drm_priv, struct kgd_mem **mem, - uint64_t *offset, uint32_t flags) + uint64_t *offset, uint32_t flags, bool criu_resume) { struct amdgpu_vm *avm = drm_priv_to_vm(drm_priv); enum ttm_bo_type bo_type = ttm_bo_type_device; @@ -1558,7 +1563,8 @@ int amdgpu_amdkfd_gpuvm_alloc_memory_of_gpu( add_kgd_mem_to_kfd_bo_list(*mem, avm->process_info, user_addr); if (user_addr) { - ret = init_user_pages(*mem, user_addr); + pr_debug("creating userptr BO for user_addr = %llu\n", user_addr); + ret = init_user_pages(*mem, user_addr, criu_resume); if (ret) goto allocate_init_user_pages_failed; } else if (flags & (KFD_IOC_ALLOC_MEM_FLAGS_DOORBELL | @@ -1702,7 +1708,7 @@ int amdgpu_amdkfd_gpuvm_free_memory_of_gpu( int amdgpu_amdkfd_gpuvm_map_memory_to_gpu( struct amdgpu_device *adev, struct kgd_mem *mem, - void *drm_priv, bool *table_freed) + void *drm_priv) { struct amdgpu_vm *avm = drm_priv_to_vm(drm_priv); int ret; @@ -1789,7 +1795,7 @@ int amdgpu_amdkfd_gpuvm_map_memory_to_gpu( entry->va, entry->va + bo_size, entry); ret = map_bo_to_gpuvm(mem, entry, ctx.sync, - is_invalid_userptr, table_freed); + is_invalid_userptr); if (ret) { pr_err("Failed to map bo to gpuvm\n"); goto out_unreserve; @@ -1813,12 +1819,6 @@ int amdgpu_amdkfd_gpuvm_map_memory_to_gpu( true); ret = unreserve_bo_and_vms(&ctx, false, false); - /* Only apply no TLB flush on Aldebaran to - * workaround regressions on other Asics. - */ - if (table_freed && (adev->asic_type != CHIP_ALDEBARAN)) - *table_freed = true; - goto out; out_unreserve: @@ -2068,6 +2068,10 @@ int amdgpu_amdkfd_evict_userptr(struct kgd_mem *mem, int evicted_bos; int r = 0; + /* Do not process MMU notifications until stage-4 IOCTL is received */ + if (READ_ONCE(process_info->block_mmu_notifications)) + return 0; + atomic_inc(&mem->invalid); evicted_bos = atomic_inc_return(&process_info->evicted_bos); if (evicted_bos == 1) { @@ -2259,7 +2263,7 @@ static int validate_invalid_user_pages(struct amdkfd_process_info *process_info) continue; kfd_mem_dmaunmap_attachment(mem, attachment); - ret = update_gpuvm_pte(mem, attachment, &sync, NULL); + ret = update_gpuvm_pte(mem, attachment, &sync); if (ret) { pr_err("%s: update PTE failed\n", __func__); /* make sure this gets validated again */ @@ -2441,6 +2445,8 @@ int amdgpu_amdkfd_gpuvm_restore_process_bos(void *info, struct dma_fence **ef) struct amdgpu_bo *bo = mem->bo; uint32_t domain = mem->domain; struct kfd_mem_attachment *attachment; + struct dma_resv_iter cursor; + struct dma_fence *fence; total_size += amdgpu_bo_size(bo); @@ -2455,17 +2461,20 @@ int amdgpu_amdkfd_gpuvm_restore_process_bos(void *info, struct dma_fence **ef) goto validate_map_fail; } } - ret = amdgpu_sync_fence(&sync_obj, bo->tbo.moving); - if (ret) { - pr_debug("Memory eviction: Sync BO fence failed. Try again\n"); - goto validate_map_fail; + dma_resv_for_each_fence(&cursor, bo->tbo.base.resv, + DMA_RESV_USAGE_KERNEL, fence) { + ret = amdgpu_sync_fence(&sync_obj, fence); + if (ret) { + pr_debug("Memory eviction: Sync BO fence failed. Try again\n"); + goto validate_map_fail; + } } list_for_each_entry(attachment, &mem->attachments, list) { if (!attachment->is_mapped) continue; kfd_mem_dmaunmap_attachment(mem, attachment); - ret = update_gpuvm_pte(mem, attachment, &sync_obj, NULL); + ret = update_gpuvm_pte(mem, attachment, &sync_obj); if (ret) { pr_debug("Memory eviction: update PTE failed. Try again\n"); goto validate_map_fail; @@ -2565,7 +2574,7 @@ int amdgpu_amdkfd_add_gws_to_process(void *info, void *gws, struct kgd_mem **mem * Add process eviction fence to bo so they can * evict each other. */ - ret = dma_resv_reserve_shared(gws_bo->tbo.base.resv, 1); + ret = dma_resv_reserve_fences(gws_bo->tbo.base.resv, 1); if (ret) goto reserve_shared_fail; amdgpu_bo_fence(gws_bo, &process_info->eviction_fence->base, true); @@ -2635,3 +2644,14 @@ int amdgpu_amdkfd_get_tile_config(struct amdgpu_device *adev, return 0; } + +bool amdgpu_amdkfd_bo_mapped_to_dev(struct amdgpu_device *adev, struct kgd_mem *mem) +{ + struct kfd_mem_attachment *entry; + + list_for_each_entry(entry, &mem->attachments, list) { + if (entry->is_mapped && entry->adev == adev) + return true; + } + return false; +} diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_atombios.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_atombios.c index 12a6b1c99c93..9ba4817a9148 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_atombios.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_atombios.c @@ -1083,6 +1083,7 @@ int amdgpu_atombios_get_clock_dividers(struct amdgpu_device *adev, return 0; } +#ifdef CONFIG_DRM_AMDGPU_SI int amdgpu_atombios_get_memory_pll_dividers(struct amdgpu_device *adev, u32 clock, bool strobe_mode, @@ -1503,6 +1504,7 @@ int amdgpu_atombios_init_mc_reg_table(struct amdgpu_device *adev, } return -EINVAL; } +#endif bool amdgpu_atombios_has_gpu_virtualization_table(struct amdgpu_device *adev) { diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_atombios.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_atombios.h index 27e74b1fc260..4153d520e2a3 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_atombios.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_atombios.h @@ -160,6 +160,7 @@ int amdgpu_atombios_get_clock_dividers(struct amdgpu_device *adev, bool strobe_mode, struct atom_clock_dividers *dividers); +#ifdef CONFIG_DRM_AMDGPU_SI int amdgpu_atombios_get_memory_pll_dividers(struct amdgpu_device *adev, u32 clock, bool strobe_mode, @@ -179,6 +180,17 @@ int amdgpu_atombios_get_voltage_table(struct amdgpu_device *adev, int amdgpu_atombios_init_mc_reg_table(struct amdgpu_device *adev, u8 module_index, struct atom_mc_reg_table *reg_table); +int amdgpu_atombios_get_max_vddc(struct amdgpu_device *adev, u8 voltage_type, + u16 voltage_id, u16 *voltage); +int amdgpu_atombios_get_leakage_vddc_based_on_leakage_idx(struct amdgpu_device *adev, + u16 *voltage, + u16 leakage_idx); +void amdgpu_atombios_get_default_voltages(struct amdgpu_device *adev, + u16 *vddc, u16 *vddci, u16 *mvdd); +int amdgpu_atombios_get_svi2_info(struct amdgpu_device *adev, + u8 voltage_type, + u8 *svd_gpio_id, u8 *svc_gpio_id); +#endif bool amdgpu_atombios_has_gpu_virtualization_table(struct amdgpu_device *adev); @@ -190,21 +202,11 @@ void amdgpu_atombios_scratch_regs_set_backlight_level(struct amdgpu_device *adev bool amdgpu_atombios_scratch_need_asic_init(struct amdgpu_device *adev); void amdgpu_atombios_copy_swap(u8 *dst, u8 *src, u8 num_bytes, bool to_le); -int amdgpu_atombios_get_max_vddc(struct amdgpu_device *adev, u8 voltage_type, - u16 voltage_id, u16 *voltage); -int amdgpu_atombios_get_leakage_vddc_based_on_leakage_idx(struct amdgpu_device *adev, - u16 *voltage, - u16 leakage_idx); -void amdgpu_atombios_get_default_voltages(struct amdgpu_device *adev, - u16 *vddc, u16 *vddci, u16 *mvdd); int amdgpu_atombios_get_clock_dividers(struct amdgpu_device *adev, u8 clock_type, u32 clock, bool strobe_mode, struct atom_clock_dividers *dividers); -int amdgpu_atombios_get_svi2_info(struct amdgpu_device *adev, - u8 voltage_type, - u8 *svd_gpio_id, u8 *svc_gpio_id); int amdgpu_atombios_get_data_table(struct amdgpu_device *adev, uint32_t table, diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_atomfirmware.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_atomfirmware.c index 4d4ddf026faf..494ca6a0f47a 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_atomfirmware.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_atomfirmware.c @@ -162,12 +162,14 @@ union vram_info { struct atom_vram_info_header_v2_4 v24; struct atom_vram_info_header_v2_5 v25; struct atom_vram_info_header_v2_6 v26; + struct atom_vram_info_header_v3_0 v30; }; union vram_module { struct atom_vram_module_v9 v9; struct atom_vram_module_v10 v10; struct atom_vram_module_v11 v11; + struct atom_vram_module_v3_0 v30; }; static int convert_atom_mem_type_to_vram_type(struct amdgpu_device *adev, @@ -294,88 +296,116 @@ amdgpu_atomfirmware_get_vram_info(struct amdgpu_device *adev, vram_info = (union vram_info *) (mode_info->atom_context->bios + data_offset); module_id = (RREG32(adev->bios_scratch_reg_offset + 4) & 0x00ff0000) >> 16; - switch (crev) { - case 3: - if (module_id > vram_info->v23.vram_module_num) - module_id = 0; - vram_module = (union vram_module *)vram_info->v23.vram_module; - while (i < module_id) { - vram_module = (union vram_module *) - ((u8 *)vram_module + vram_module->v9.vram_module_size); - i++; - } - mem_type = vram_module->v9.memory_type; - if (vram_type) - *vram_type = convert_atom_mem_type_to_vram_type(adev, mem_type); - mem_channel_number = vram_module->v9.channel_num; - mem_channel_width = vram_module->v9.channel_width; - if (vram_width) - *vram_width = mem_channel_number * (1 << mem_channel_width); - mem_vendor = (vram_module->v9.vender_rev_id) & 0xF; - if (vram_vendor) - *vram_vendor = mem_vendor; - break; - case 4: - if (module_id > vram_info->v24.vram_module_num) - module_id = 0; - vram_module = (union vram_module *)vram_info->v24.vram_module; - while (i < module_id) { - vram_module = (union vram_module *) - ((u8 *)vram_module + vram_module->v10.vram_module_size); - i++; - } - mem_type = vram_module->v10.memory_type; - if (vram_type) - *vram_type = convert_atom_mem_type_to_vram_type(adev, mem_type); - mem_channel_number = vram_module->v10.channel_num; - mem_channel_width = vram_module->v10.channel_width; - if (vram_width) - *vram_width = mem_channel_number * (1 << mem_channel_width); - mem_vendor = (vram_module->v10.vender_rev_id) & 0xF; - if (vram_vendor) - *vram_vendor = mem_vendor; - break; - case 5: - if (module_id > vram_info->v25.vram_module_num) - module_id = 0; - vram_module = (union vram_module *)vram_info->v25.vram_module; - while (i < module_id) { - vram_module = (union vram_module *) - ((u8 *)vram_module + vram_module->v11.vram_module_size); - i++; + if (frev == 3) { + switch (crev) { + /* v30 */ + case 0: + vram_module = (union vram_module *)vram_info->v30.vram_module; + mem_vendor = (vram_module->v30.dram_vendor_id) & 0xF; + if (vram_vendor) + *vram_vendor = mem_vendor; + mem_type = vram_info->v30.memory_type; + if (vram_type) + *vram_type = convert_atom_mem_type_to_vram_type(adev, mem_type); + mem_channel_number = vram_info->v30.channel_num; + mem_channel_width = vram_info->v30.channel_width; + if (vram_width) + *vram_width = mem_channel_number * mem_channel_width; + break; + default: + return -EINVAL; } - mem_type = vram_module->v11.memory_type; - if (vram_type) - *vram_type = convert_atom_mem_type_to_vram_type(adev, mem_type); - mem_channel_number = vram_module->v11.channel_num; - mem_channel_width = vram_module->v11.channel_width; - if (vram_width) - *vram_width = mem_channel_number * (1 << mem_channel_width); - mem_vendor = (vram_module->v11.vender_rev_id) & 0xF; - if (vram_vendor) - *vram_vendor = mem_vendor; - break; - case 6: - if (module_id > vram_info->v26.vram_module_num) - module_id = 0; - vram_module = (union vram_module *)vram_info->v26.vram_module; - while (i < module_id) { - vram_module = (union vram_module *) - ((u8 *)vram_module + vram_module->v9.vram_module_size); - i++; + } else if (frev == 2) { + switch (crev) { + /* v23 */ + case 3: + if (module_id > vram_info->v23.vram_module_num) + module_id = 0; + vram_module = (union vram_module *)vram_info->v23.vram_module; + while (i < module_id) { + vram_module = (union vram_module *) + ((u8 *)vram_module + vram_module->v9.vram_module_size); + i++; + } + mem_type = vram_module->v9.memory_type; + if (vram_type) + *vram_type = convert_atom_mem_type_to_vram_type(adev, mem_type); + mem_channel_number = vram_module->v9.channel_num; + mem_channel_width = vram_module->v9.channel_width; + if (vram_width) + *vram_width = mem_channel_number * (1 << mem_channel_width); + mem_vendor = (vram_module->v9.vender_rev_id) & 0xF; + if (vram_vendor) + *vram_vendor = mem_vendor; + break; + /* v24 */ + case 4: + if (module_id > vram_info->v24.vram_module_num) + module_id = 0; + vram_module = (union vram_module *)vram_info->v24.vram_module; + while (i < module_id) { + vram_module = (union vram_module *) + ((u8 *)vram_module + vram_module->v10.vram_module_size); + i++; + } + mem_type = vram_module->v10.memory_type; + if (vram_type) + *vram_type = convert_atom_mem_type_to_vram_type(adev, mem_type); + mem_channel_number = vram_module->v10.channel_num; + mem_channel_width = vram_module->v10.channel_width; + if (vram_width) + *vram_width = mem_channel_number * (1 << mem_channel_width); + mem_vendor = (vram_module->v10.vender_rev_id) & 0xF; + if (vram_vendor) + *vram_vendor = mem_vendor; + break; + /* v25 */ + case 5: + if (module_id > vram_info->v25.vram_module_num) + module_id = 0; + vram_module = (union vram_module *)vram_info->v25.vram_module; + while (i < module_id) { + vram_module = (union vram_module *) + ((u8 *)vram_module + vram_module->v11.vram_module_size); + i++; + } + mem_type = vram_module->v11.memory_type; + if (vram_type) + *vram_type = convert_atom_mem_type_to_vram_type(adev, mem_type); + mem_channel_number = vram_module->v11.channel_num; + mem_channel_width = vram_module->v11.channel_width; + if (vram_width) + *vram_width = mem_channel_number * (1 << mem_channel_width); + mem_vendor = (vram_module->v11.vender_rev_id) & 0xF; + if (vram_vendor) + *vram_vendor = mem_vendor; + break; + /* v26 */ + case 6: + if (module_id > vram_info->v26.vram_module_num) + module_id = 0; + vram_module = (union vram_module *)vram_info->v26.vram_module; + while (i < module_id) { + vram_module = (union vram_module *) + ((u8 *)vram_module + vram_module->v9.vram_module_size); + i++; + } + mem_type = vram_module->v9.memory_type; + if (vram_type) + *vram_type = convert_atom_mem_type_to_vram_type(adev, mem_type); + mem_channel_number = vram_module->v9.channel_num; + mem_channel_width = vram_module->v9.channel_width; + if (vram_width) + *vram_width = mem_channel_number * (1 << mem_channel_width); + mem_vendor = (vram_module->v9.vender_rev_id) & 0xF; + if (vram_vendor) + *vram_vendor = mem_vendor; + break; + default: + return -EINVAL; } - mem_type = vram_module->v9.memory_type; - if (vram_type) - *vram_type = convert_atom_mem_type_to_vram_type(adev, mem_type); - mem_channel_number = vram_module->v9.channel_num; - mem_channel_width = vram_module->v9.channel_width; - if (vram_width) - *vram_width = mem_channel_number * (1 << mem_channel_width); - mem_vendor = (vram_module->v9.vender_rev_id) & 0xF; - if (vram_vendor) - *vram_vendor = mem_vendor; - break; - default: + } else { + /* invalid frev */ return -EINVAL; } } @@ -528,6 +558,13 @@ union smu_info { struct atom_smu_info_v3_1 v31; }; +union gfx_info { + struct atom_gfx_info_v2_2 v22; + struct atom_gfx_info_v2_4 v24; + struct atom_gfx_info_v2_7 v27; + struct atom_gfx_info_v3_0 v30; +}; + int amdgpu_atomfirmware_get_clock_info(struct amdgpu_device *adev) { struct amdgpu_mode_info *mode_info = &adev->mode_info; @@ -609,22 +646,26 @@ int amdgpu_atomfirmware_get_clock_info(struct amdgpu_device *adev) gfx_info); if (amdgpu_atom_parse_data_header(mode_info->atom_context, index, NULL, &frev, &crev, &data_offset)) { - struct atom_gfx_info_v2_2 *gfx_info = (struct atom_gfx_info_v2_2*) + union gfx_info *gfx_info = (union gfx_info *) (mode_info->atom_context->bios + data_offset); - if ((frev == 2) && (crev >= 2)) - spll->reference_freq = le32_to_cpu(gfx_info->rlc_gpu_timer_refclk); - ret = 0; + if ((frev == 3) || + (frev == 2 && crev == 6)) { + spll->reference_freq = le32_to_cpu(gfx_info->v30.golden_tsc_count_lower_refclk); + ret = 0; + } else if ((frev == 2) && + (crev >= 2) && + (crev != 6)) { + spll->reference_freq = le32_to_cpu(gfx_info->v22.rlc_gpu_timer_refclk); + ret = 0; + } else { + BUG(); + } } } return ret; } -union gfx_info { - struct atom_gfx_info_v2_4 v24; - struct atom_gfx_info_v2_7 v27; -}; - int amdgpu_atomfirmware_get_gfx_info(struct amdgpu_device *adev) { struct amdgpu_mode_info *mode_info = &adev->mode_info; @@ -638,42 +679,58 @@ int amdgpu_atomfirmware_get_gfx_info(struct amdgpu_device *adev) &frev, &crev, &data_offset)) { union gfx_info *gfx_info = (union gfx_info *) (mode_info->atom_context->bios + data_offset); - switch (crev) { - case 4: - adev->gfx.config.max_shader_engines = gfx_info->v24.max_shader_engines; - adev->gfx.config.max_cu_per_sh = gfx_info->v24.max_cu_per_sh; - adev->gfx.config.max_sh_per_se = gfx_info->v24.max_sh_per_se; - adev->gfx.config.max_backends_per_se = gfx_info->v24.max_backends_per_se; - adev->gfx.config.max_texture_channel_caches = gfx_info->v24.max_texture_channel_caches; - adev->gfx.config.max_gprs = le16_to_cpu(gfx_info->v24.gc_num_gprs); - adev->gfx.config.max_gs_threads = gfx_info->v24.gc_num_max_gs_thds; - adev->gfx.config.gs_vgt_table_depth = gfx_info->v24.gc_gs_table_depth; - adev->gfx.config.gs_prim_buffer_depth = - le16_to_cpu(gfx_info->v24.gc_gsprim_buff_depth); - adev->gfx.config.double_offchip_lds_buf = - gfx_info->v24.gc_double_offchip_lds_buffer; - adev->gfx.cu_info.wave_front_size = le16_to_cpu(gfx_info->v24.gc_wave_size); - adev->gfx.cu_info.max_waves_per_simd = le16_to_cpu(gfx_info->v24.gc_max_waves_per_simd); - adev->gfx.cu_info.max_scratch_slots_per_cu = gfx_info->v24.gc_max_scratch_slots_per_cu; - adev->gfx.cu_info.lds_size = le16_to_cpu(gfx_info->v24.gc_lds_size); - return 0; - case 7: - adev->gfx.config.max_shader_engines = gfx_info->v27.max_shader_engines; - adev->gfx.config.max_cu_per_sh = gfx_info->v27.max_cu_per_sh; - adev->gfx.config.max_sh_per_se = gfx_info->v27.max_sh_per_se; - adev->gfx.config.max_backends_per_se = gfx_info->v27.max_backends_per_se; - adev->gfx.config.max_texture_channel_caches = gfx_info->v27.max_texture_channel_caches; - adev->gfx.config.max_gprs = le16_to_cpu(gfx_info->v27.gc_num_gprs); - adev->gfx.config.max_gs_threads = gfx_info->v27.gc_num_max_gs_thds; - adev->gfx.config.gs_vgt_table_depth = gfx_info->v27.gc_gs_table_depth; - adev->gfx.config.gs_prim_buffer_depth = le16_to_cpu(gfx_info->v27.gc_gsprim_buff_depth); - adev->gfx.config.double_offchip_lds_buf = gfx_info->v27.gc_double_offchip_lds_buffer; - adev->gfx.cu_info.wave_front_size = le16_to_cpu(gfx_info->v27.gc_wave_size); - adev->gfx.cu_info.max_waves_per_simd = le16_to_cpu(gfx_info->v27.gc_max_waves_per_simd); - adev->gfx.cu_info.max_scratch_slots_per_cu = gfx_info->v27.gc_max_scratch_slots_per_cu; - adev->gfx.cu_info.lds_size = le16_to_cpu(gfx_info->v27.gc_lds_size); - return 0; - default: + if (frev == 2) { + switch (crev) { + case 4: + adev->gfx.config.max_shader_engines = gfx_info->v24.max_shader_engines; + adev->gfx.config.max_cu_per_sh = gfx_info->v24.max_cu_per_sh; + adev->gfx.config.max_sh_per_se = gfx_info->v24.max_sh_per_se; + adev->gfx.config.max_backends_per_se = gfx_info->v24.max_backends_per_se; + adev->gfx.config.max_texture_channel_caches = gfx_info->v24.max_texture_channel_caches; + adev->gfx.config.max_gprs = le16_to_cpu(gfx_info->v24.gc_num_gprs); + adev->gfx.config.max_gs_threads = gfx_info->v24.gc_num_max_gs_thds; + adev->gfx.config.gs_vgt_table_depth = gfx_info->v24.gc_gs_table_depth; + adev->gfx.config.gs_prim_buffer_depth = + le16_to_cpu(gfx_info->v24.gc_gsprim_buff_depth); + adev->gfx.config.double_offchip_lds_buf = + gfx_info->v24.gc_double_offchip_lds_buffer; + adev->gfx.cu_info.wave_front_size = le16_to_cpu(gfx_info->v24.gc_wave_size); + adev->gfx.cu_info.max_waves_per_simd = le16_to_cpu(gfx_info->v24.gc_max_waves_per_simd); + adev->gfx.cu_info.max_scratch_slots_per_cu = gfx_info->v24.gc_max_scratch_slots_per_cu; + adev->gfx.cu_info.lds_size = le16_to_cpu(gfx_info->v24.gc_lds_size); + return 0; + case 7: + adev->gfx.config.max_shader_engines = gfx_info->v27.max_shader_engines; + adev->gfx.config.max_cu_per_sh = gfx_info->v27.max_cu_per_sh; + adev->gfx.config.max_sh_per_se = gfx_info->v27.max_sh_per_se; + adev->gfx.config.max_backends_per_se = gfx_info->v27.max_backends_per_se; + adev->gfx.config.max_texture_channel_caches = gfx_info->v27.max_texture_channel_caches; + adev->gfx.config.max_gprs = le16_to_cpu(gfx_info->v27.gc_num_gprs); + adev->gfx.config.max_gs_threads = gfx_info->v27.gc_num_max_gs_thds; + adev->gfx.config.gs_vgt_table_depth = gfx_info->v27.gc_gs_table_depth; + adev->gfx.config.gs_prim_buffer_depth = le16_to_cpu(gfx_info->v27.gc_gsprim_buff_depth); + adev->gfx.config.double_offchip_lds_buf = gfx_info->v27.gc_double_offchip_lds_buffer; + adev->gfx.cu_info.wave_front_size = le16_to_cpu(gfx_info->v27.gc_wave_size); + adev->gfx.cu_info.max_waves_per_simd = le16_to_cpu(gfx_info->v27.gc_max_waves_per_simd); + adev->gfx.cu_info.max_scratch_slots_per_cu = gfx_info->v27.gc_max_scratch_slots_per_cu; + adev->gfx.cu_info.lds_size = le16_to_cpu(gfx_info->v27.gc_lds_size); + return 0; + default: + return -EINVAL; + } + } else if (frev == 3) { + switch (crev) { + case 0: + adev->gfx.config.max_shader_engines = gfx_info->v30.max_shader_engines; + adev->gfx.config.max_cu_per_sh = gfx_info->v30.max_cu_per_sh; + adev->gfx.config.max_sh_per_se = gfx_info->v30.max_sh_per_se; + adev->gfx.config.max_backends_per_se = gfx_info->v30.max_backends_per_se; + adev->gfx.config.max_texture_channel_caches = gfx_info->v30.max_texture_channel_caches; + return 0; + default: + return -EINVAL; + } + } else { return -EINVAL; } @@ -731,3 +788,67 @@ int amdgpu_atomfirmware_get_fw_reserved_fb_size(struct amdgpu_device *adev) return fw_reserved_fb_size; } + +/* + * Helper function to execute asic_init table + * + * @adev: amdgpu_device pointer + * @fb_reset: flag to indicate whether fb is reset or not + * + * Return 0 if succeed, otherwise failed + */ +int amdgpu_atomfirmware_asic_init(struct amdgpu_device *adev, bool fb_reset) +{ + struct amdgpu_mode_info *mode_info = &adev->mode_info; + struct atom_context *ctx; + uint8_t frev, crev; + uint16_t data_offset; + uint32_t bootup_sclk_in10khz, bootup_mclk_in10khz; + struct asic_init_ps_allocation_v2_1 asic_init_ps_v2_1; + int index; + + if (!mode_info) + return -EINVAL; + + ctx = mode_info->atom_context; + if (!ctx) + return -EINVAL; + + /* query bootup sclk/mclk from firmware_info table */ + index = get_index_into_master_table(atom_master_list_of_data_tables_v2_1, + firmwareinfo); + if (amdgpu_atom_parse_data_header(ctx, index, NULL, + &frev, &crev, &data_offset)) { + union firmware_info *firmware_info = + (union firmware_info *)(ctx->bios + + data_offset); + + bootup_sclk_in10khz = + le32_to_cpu(firmware_info->v31.bootup_sclk_in10khz); + bootup_mclk_in10khz = + le32_to_cpu(firmware_info->v31.bootup_mclk_in10khz); + } else { + return -EINVAL; + } + + index = get_index_into_master_table(atom_master_list_of_command_functions_v2_1, + asic_init); + if (amdgpu_atom_parse_cmd_header(mode_info->atom_context, index, &frev, &crev)) { + if (frev == 2 && crev >= 1) { + memset(&asic_init_ps_v2_1, 0, sizeof(asic_init_ps_v2_1)); + asic_init_ps_v2_1.param.engineparam.sclkfreqin10khz = bootup_sclk_in10khz; + asic_init_ps_v2_1.param.memparam.mclkfreqin10khz = bootup_mclk_in10khz; + asic_init_ps_v2_1.param.engineparam.engineflag = b3NORMAL_ENGINE_INIT; + if (!fb_reset) + asic_init_ps_v2_1.param.memparam.memflag = b3DRAM_SELF_REFRESH_EXIT; + else + asic_init_ps_v2_1.param.memparam.memflag = 0; + } else { + return -EINVAL; + } + } else { + return -EINVAL; + } + + return amdgpu_atom_execute_table(ctx, ATOM_CMD_INIT, (uint32_t *)&asic_init_ps_v2_1); +} diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_atomfirmware.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_atomfirmware.h index 751248b253de..c7eb2caec65a 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_atomfirmware.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_atomfirmware.h @@ -40,5 +40,6 @@ bool amdgpu_atomfirmware_ras_rom_addr(struct amdgpu_device *adev, uint8_t* i2c_a bool amdgpu_atomfirmware_mem_training_supported(struct amdgpu_device *adev); bool amdgpu_atomfirmware_dynamic_boot_config_supported(struct amdgpu_device *adev); int amdgpu_atomfirmware_get_fw_reserved_fb_size(struct amdgpu_device *adev); +int amdgpu_atomfirmware_asic_init(struct amdgpu_device *adev, bool fb_reset); #endif diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_benchmark.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_benchmark.c index 313517f7cf10..edc6377ec5ff 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_benchmark.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_benchmark.c @@ -29,14 +29,13 @@ #define AMDGPU_BENCHMARK_COMMON_MODES_N 17 static int amdgpu_benchmark_do_move(struct amdgpu_device *adev, unsigned size, - uint64_t saddr, uint64_t daddr, int n) + uint64_t saddr, uint64_t daddr, int n, s64 *time_ms) { - unsigned long start_jiffies; - unsigned long end_jiffies; + ktime_t stime, etime; struct dma_fence *fence; int i, r; - start_jiffies = jiffies; + stime = ktime_get(); for (i = 0; i < n; i++) { struct amdgpu_ring *ring = adev->mman.buffer_funcs_ring; r = amdgpu_copy_buffer(ring, saddr, daddr, size, NULL, &fence, @@ -48,120 +47,81 @@ static int amdgpu_benchmark_do_move(struct amdgpu_device *adev, unsigned size, if (r) goto exit_do_move; } - end_jiffies = jiffies; - r = jiffies_to_msecs(end_jiffies - start_jiffies); exit_do_move: + etime = ktime_get(); + *time_ms = ktime_ms_delta(etime, stime); + return r; } -static void amdgpu_benchmark_log_results(int n, unsigned size, - unsigned int time, +static void amdgpu_benchmark_log_results(struct amdgpu_device *adev, + int n, unsigned size, + s64 time_ms, unsigned sdomain, unsigned ddomain, char *kind) { - unsigned int throughput = (n * (size >> 10)) / time; - DRM_INFO("amdgpu: %s %u bo moves of %u kB from" - " %d to %d in %u ms, throughput: %u Mb/s or %u MB/s\n", - kind, n, size >> 10, sdomain, ddomain, time, + s64 throughput = (n * (size >> 10)); + + throughput = div64_s64(throughput, time_ms); + + dev_info(adev->dev, "amdgpu: %s %u bo moves of %u kB from" + " %d to %d in %lld ms, throughput: %lld Mb/s or %lld MB/s\n", + kind, n, size >> 10, sdomain, ddomain, time_ms, throughput * 8, throughput); } -static void amdgpu_benchmark_move(struct amdgpu_device *adev, unsigned size, - unsigned sdomain, unsigned ddomain) +static int amdgpu_benchmark_move(struct amdgpu_device *adev, unsigned size, + unsigned sdomain, unsigned ddomain) { struct amdgpu_bo *dobj = NULL; struct amdgpu_bo *sobj = NULL; - struct amdgpu_bo_param bp; uint64_t saddr, daddr; + s64 time_ms; int r, n; - int time; - - memset(&bp, 0, sizeof(bp)); - bp.size = size; - bp.byte_align = PAGE_SIZE; - bp.domain = sdomain; - bp.flags = 0; - bp.type = ttm_bo_type_kernel; - bp.resv = NULL; - bp.bo_ptr_size = sizeof(struct amdgpu_bo); n = AMDGPU_BENCHMARK_ITERATIONS; - r = amdgpu_bo_create(adev, &bp, &sobj); - if (r) { - goto out_cleanup; - } - r = amdgpu_bo_reserve(sobj, false); - if (unlikely(r != 0)) - goto out_cleanup; - r = amdgpu_bo_pin(sobj, sdomain); - if (r) { - amdgpu_bo_unreserve(sobj); - goto out_cleanup; - } - r = amdgpu_ttm_alloc_gart(&sobj->tbo); - amdgpu_bo_unreserve(sobj); - if (r) { - goto out_cleanup; - } - saddr = amdgpu_bo_gpu_offset(sobj); - bp.domain = ddomain; - r = amdgpu_bo_create(adev, &bp, &dobj); - if (r) { - goto out_cleanup; - } - r = amdgpu_bo_reserve(dobj, false); - if (unlikely(r != 0)) - goto out_cleanup; - r = amdgpu_bo_pin(dobj, ddomain); - if (r) { - amdgpu_bo_unreserve(sobj); + + r = amdgpu_bo_create_kernel(adev, size, + PAGE_SIZE, sdomain, + &sobj, + &saddr, + NULL); + if (r) goto out_cleanup; - } - r = amdgpu_ttm_alloc_gart(&dobj->tbo); - amdgpu_bo_unreserve(dobj); - if (r) { + r = amdgpu_bo_create_kernel(adev, size, + PAGE_SIZE, ddomain, + &dobj, + &daddr, + NULL); + if (r) goto out_cleanup; - } - daddr = amdgpu_bo_gpu_offset(dobj); if (adev->mman.buffer_funcs) { - time = amdgpu_benchmark_do_move(adev, size, saddr, daddr, n); - if (time < 0) + r = amdgpu_benchmark_do_move(adev, size, saddr, daddr, n, &time_ms); + if (r) goto out_cleanup; - if (time > 0) - amdgpu_benchmark_log_results(n, size, time, + else + amdgpu_benchmark_log_results(adev, n, size, time_ms, sdomain, ddomain, "dma"); } out_cleanup: /* Check error value now. The value can be overwritten when clean up.*/ - if (r) { - DRM_ERROR("Error while benchmarking BO move.\n"); - } + if (r < 0) + dev_info(adev->dev, "Error while benchmarking BO move.\n"); - if (sobj) { - r = amdgpu_bo_reserve(sobj, true); - if (likely(r == 0)) { - amdgpu_bo_unpin(sobj); - amdgpu_bo_unreserve(sobj); - } - amdgpu_bo_unref(&sobj); - } - if (dobj) { - r = amdgpu_bo_reserve(dobj, true); - if (likely(r == 0)) { - amdgpu_bo_unpin(dobj); - amdgpu_bo_unreserve(dobj); - } - amdgpu_bo_unref(&dobj); - } + if (sobj) + amdgpu_bo_free_kernel(&sobj, &saddr, NULL); + if (dobj) + amdgpu_bo_free_kernel(&dobj, &daddr, NULL); + return r; } -void amdgpu_benchmark(struct amdgpu_device *adev, int test_number) +int amdgpu_benchmark(struct amdgpu_device *adev, int test_number) { - int i; + int i, r; static const int common_modes[AMDGPU_BENCHMARK_COMMON_MODES_N] = { 640 * 480 * 4, 720 * 480 * 4, @@ -182,63 +142,119 @@ void amdgpu_benchmark(struct amdgpu_device *adev, int test_number) 1920 * 1200 * 4 }; + mutex_lock(&adev->benchmark_mutex); switch (test_number) { case 1: + dev_info(adev->dev, + "benchmark test: %d (simple test, VRAM to GTT and GTT to VRAM)\n", + test_number); /* simple test, VRAM to GTT and GTT to VRAM */ - amdgpu_benchmark_move(adev, 1024*1024, AMDGPU_GEM_DOMAIN_GTT, - AMDGPU_GEM_DOMAIN_VRAM); - amdgpu_benchmark_move(adev, 1024*1024, AMDGPU_GEM_DOMAIN_VRAM, - AMDGPU_GEM_DOMAIN_GTT); + r = amdgpu_benchmark_move(adev, 1024*1024, AMDGPU_GEM_DOMAIN_GTT, + AMDGPU_GEM_DOMAIN_VRAM); + if (r) + goto done; + r = amdgpu_benchmark_move(adev, 1024*1024, AMDGPU_GEM_DOMAIN_VRAM, + AMDGPU_GEM_DOMAIN_GTT); + if (r) + goto done; break; case 2: + dev_info(adev->dev, + "benchmark test: %d (simple test, VRAM to VRAM)\n", + test_number); /* simple test, VRAM to VRAM */ - amdgpu_benchmark_move(adev, 1024*1024, AMDGPU_GEM_DOMAIN_VRAM, - AMDGPU_GEM_DOMAIN_VRAM); + r = amdgpu_benchmark_move(adev, 1024*1024, AMDGPU_GEM_DOMAIN_VRAM, + AMDGPU_GEM_DOMAIN_VRAM); + if (r) + goto done; break; case 3: + dev_info(adev->dev, + "benchmark test: %d (GTT to VRAM, buffer size sweep, powers of 2)\n", + test_number); /* GTT to VRAM, buffer size sweep, powers of 2 */ - for (i = 1; i <= 16384; i <<= 1) - amdgpu_benchmark_move(adev, i * AMDGPU_GPU_PAGE_SIZE, - AMDGPU_GEM_DOMAIN_GTT, - AMDGPU_GEM_DOMAIN_VRAM); + for (i = 1; i <= 16384; i <<= 1) { + r = amdgpu_benchmark_move(adev, i * AMDGPU_GPU_PAGE_SIZE, + AMDGPU_GEM_DOMAIN_GTT, + AMDGPU_GEM_DOMAIN_VRAM); + if (r) + goto done; + } break; case 4: + dev_info(adev->dev, + "benchmark test: %d (VRAM to GTT, buffer size sweep, powers of 2)\n", + test_number); /* VRAM to GTT, buffer size sweep, powers of 2 */ - for (i = 1; i <= 16384; i <<= 1) - amdgpu_benchmark_move(adev, i * AMDGPU_GPU_PAGE_SIZE, - AMDGPU_GEM_DOMAIN_VRAM, - AMDGPU_GEM_DOMAIN_GTT); + for (i = 1; i <= 16384; i <<= 1) { + r = amdgpu_benchmark_move(adev, i * AMDGPU_GPU_PAGE_SIZE, + AMDGPU_GEM_DOMAIN_VRAM, + AMDGPU_GEM_DOMAIN_GTT); + if (r) + goto done; + } break; case 5: + dev_info(adev->dev, + "benchmark test: %d (VRAM to VRAM, buffer size sweep, powers of 2)\n", + test_number); /* VRAM to VRAM, buffer size sweep, powers of 2 */ - for (i = 1; i <= 16384; i <<= 1) - amdgpu_benchmark_move(adev, i * AMDGPU_GPU_PAGE_SIZE, - AMDGPU_GEM_DOMAIN_VRAM, - AMDGPU_GEM_DOMAIN_VRAM); + for (i = 1; i <= 16384; i <<= 1) { + r = amdgpu_benchmark_move(adev, i * AMDGPU_GPU_PAGE_SIZE, + AMDGPU_GEM_DOMAIN_VRAM, + AMDGPU_GEM_DOMAIN_VRAM); + if (r) + goto done; + } break; case 6: + dev_info(adev->dev, + "benchmark test: %d (GTT to VRAM, buffer size sweep, common modes)\n", + test_number); /* GTT to VRAM, buffer size sweep, common modes */ - for (i = 0; i < AMDGPU_BENCHMARK_COMMON_MODES_N; i++) - amdgpu_benchmark_move(adev, common_modes[i], - AMDGPU_GEM_DOMAIN_GTT, - AMDGPU_GEM_DOMAIN_VRAM); + for (i = 0; i < AMDGPU_BENCHMARK_COMMON_MODES_N; i++) { + r = amdgpu_benchmark_move(adev, common_modes[i], + AMDGPU_GEM_DOMAIN_GTT, + AMDGPU_GEM_DOMAIN_VRAM); + if (r) + goto done; + } break; case 7: + dev_info(adev->dev, + "benchmark test: %d (VRAM to GTT, buffer size sweep, common modes)\n", + test_number); /* VRAM to GTT, buffer size sweep, common modes */ - for (i = 0; i < AMDGPU_BENCHMARK_COMMON_MODES_N; i++) - amdgpu_benchmark_move(adev, common_modes[i], - AMDGPU_GEM_DOMAIN_VRAM, - AMDGPU_GEM_DOMAIN_GTT); + for (i = 0; i < AMDGPU_BENCHMARK_COMMON_MODES_N; i++) { + r = amdgpu_benchmark_move(adev, common_modes[i], + AMDGPU_GEM_DOMAIN_VRAM, + AMDGPU_GEM_DOMAIN_GTT); + if (r) + goto done; + } break; case 8: + dev_info(adev->dev, + "benchmark test: %d (VRAM to VRAM, buffer size sweep, common modes)\n", + test_number); /* VRAM to VRAM, buffer size sweep, common modes */ - for (i = 0; i < AMDGPU_BENCHMARK_COMMON_MODES_N; i++) - amdgpu_benchmark_move(adev, common_modes[i], + for (i = 0; i < AMDGPU_BENCHMARK_COMMON_MODES_N; i++) { + r = amdgpu_benchmark_move(adev, common_modes[i], AMDGPU_GEM_DOMAIN_VRAM, AMDGPU_GEM_DOMAIN_VRAM); + if (r) + goto done; + } break; default: - DRM_ERROR("Unknown benchmark\n"); + dev_info(adev->dev, "Unknown benchmark %d\n", test_number); + r = -EINVAL; + break; } + +done: + mutex_unlock(&adev->benchmark_mutex); + + return r; } diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_bios.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_bios.c index 27b19503773b..e363f56c72af 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_bios.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_bios.c @@ -464,3 +464,50 @@ success: adev->is_atom_fw = (adev->asic_type >= CHIP_VEGA10) ? true : false; return true; } + +/* helper function for soc15 and onwards to read bios from rom */ +bool amdgpu_soc15_read_bios_from_rom(struct amdgpu_device *adev, + u8 *bios, u32 length_bytes) +{ + u32 *dw_ptr; + u32 i, length_dw; + u32 rom_offset; + u32 rom_index_offset; + u32 rom_data_offset; + + if (bios == NULL) + return false; + if (length_bytes == 0) + return false; + /* APU vbios image is part of sbios image */ + if (adev->flags & AMD_IS_APU) + return false; + if (!adev->smuio.funcs || + !adev->smuio.funcs->get_rom_index_offset || + !adev->smuio.funcs->get_rom_data_offset) + return false; + + dw_ptr = (u32 *)bios; + length_dw = ALIGN(length_bytes, 4) / 4; + + rom_index_offset = + adev->smuio.funcs->get_rom_index_offset(adev); + rom_data_offset = + adev->smuio.funcs->get_rom_data_offset(adev); + + if (adev->nbio.funcs && + adev->nbio.funcs->get_rom_offset) { + rom_offset = adev->nbio.funcs->get_rom_offset(adev); + rom_offset = rom_offset << 17; + } else { + rom_offset = 0; + } + + /* set rom index to rom_offset */ + WREG32(rom_index_offset, rom_offset); + /* read out the rom data */ + for (i = 0; i < length_dw; i++) + dw_ptr[i] = RREG32(rom_data_offset); + + return true; +} diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.h index 044b41f0bfd9..529d52a204cf 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.h @@ -34,7 +34,6 @@ struct amdgpu_fpriv; struct amdgpu_bo_list_entry { struct ttm_validate_buffer tv; struct amdgpu_bo_va *bo_va; - struct dma_fence_chain *chain; uint32_t priority; struct page **user_pages; bool user_invalidated; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_connectors.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_connectors.c index c16a2704ced6..b7933c2ce765 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_connectors.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_connectors.c @@ -24,9 +24,9 @@ * Alex Deucher */ +#include <drm/display/drm_dp_helper.h> #include <drm/drm_edid.h> #include <drm/drm_fb_helper.h> -#include <drm/drm_dp_helper.h> #include <drm/drm_probe_helper.h> #include <drm/amdgpu_drm.h> #include "amdgpu.h" @@ -175,7 +175,7 @@ int amdgpu_connector_get_monitor_bpc(struct drm_connector *connector) /* Check if bpc is within clock limit. Try to degrade gracefully otherwise */ if ((bpc == 12) && (mode_clock * 3/2 > max_tmds_clock)) { - if ((connector->display_info.edid_hdmi_dc_modes & DRM_EDID_HDMI_DC_30) && + if ((connector->display_info.edid_hdmi_rgb444_dc_modes & DRM_EDID_HDMI_DC_30) && (mode_clock * 5/4 <= max_tmds_clock)) bpc = 10; else @@ -626,7 +626,7 @@ amdgpu_connector_fixup_lcd_native_mode(struct drm_encoder *encoder, if (mode->type & DRM_MODE_TYPE_PREFERRED) { if (mode->hdisplay != native_mode->hdisplay || mode->vdisplay != native_mode->vdisplay) - memcpy(native_mode, mode, sizeof(*mode)); + drm_mode_copy(native_mode, mode); } } @@ -635,7 +635,7 @@ amdgpu_connector_fixup_lcd_native_mode(struct drm_encoder *encoder, list_for_each_entry_safe(mode, t, &connector->probed_modes, head) { if (mode->hdisplay == native_mode->hdisplay && mode->vdisplay == native_mode->vdisplay) { - *native_mode = *mode; + drm_mode_copy(native_mode, mode); drm_mode_set_crtcinfo(native_mode, CRTC_INTERLACE_HALVE_V); DRM_DEBUG_KMS("Determined LVDS native mode details from EDID\n"); break; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c index 06d07502a1f6..2982b543c27f 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c @@ -32,6 +32,7 @@ #include <drm/amdgpu_drm.h> #include <drm/drm_syncobj.h> +#include "amdgpu_cs.h" #include "amdgpu.h" #include "amdgpu_trace.h" #include "amdgpu_gmc.h" @@ -54,8 +55,8 @@ static int amdgpu_cs_user_fence_chunk(struct amdgpu_cs_parser *p, bo = amdgpu_bo_ref(gem_to_amdgpu_bo(gobj)); p->uf_entry.priority = 0; p->uf_entry.tv.bo = &bo->tbo; - /* One for TTM and one for the CS job */ - p->uf_entry.tv.num_shared = 2; + /* One for TTM and two for the CS job */ + p->uf_entry.tv.num_shared = 3; drm_gem_object_put(gobj); @@ -314,7 +315,7 @@ static void amdgpu_cs_get_threshold_for_moves(struct amdgpu_device *adev, } total_vram = adev->gmc.real_vram_size - atomic64_read(&adev->vram_pin_size); - used_vram = amdgpu_vram_mgr_usage(&adev->mman.vram_mgr); + used_vram = ttm_resource_manager_usage(&adev->mman.vram_mgr.manager); free_vram = used_vram >= total_vram ? 0 : total_vram - used_vram; spin_lock(&adev->mm_stats.lock); @@ -341,7 +342,7 @@ static void amdgpu_cs_get_threshold_for_moves(struct amdgpu_device *adev, if (free_vram >= 128 * 1024 * 1024 || free_vram >= total_vram / 8) { s64 min_us; - /* Be more aggresive on dGPUs. Try to fill a portion of free + /* Be more aggressive on dGPUs. Try to fill a portion of free * VRAM now. */ if (!(adev->flags & AMD_IS_APU)) @@ -551,7 +552,7 @@ static int amdgpu_cs_parser_bos(struct amdgpu_cs_parser *p, if (r) { kvfree(e->user_pages); e->user_pages = NULL; - return r; + goto out_free_user_pages; } for (i = 0; i < bo->tbo.ttm->num_pages; i++) { @@ -568,21 +569,23 @@ static int amdgpu_cs_parser_bos(struct amdgpu_cs_parser *p, if (unlikely(r != 0)) { if (r != -ERESTARTSYS) DRM_ERROR("ttm_eu_reserve_buffers failed.\n"); - goto out; + goto out_free_user_pages; } amdgpu_bo_list_for_each_entry(e, p->bo_list) { struct amdgpu_bo *bo = ttm_to_amdgpu_bo(e->tv.bo); e->bo_va = amdgpu_vm_bo_find(vm, bo); + } - if (bo->tbo.base.dma_buf && !amdgpu_bo_explicit_sync(bo)) { - e->chain = dma_fence_chain_alloc(); - if (!e->chain) { - r = -ENOMEM; - goto error_validate; - } - } + /* Move fence waiting after getting reservation lock of + * PD root. Then there is no need on a ctx mutex lock. + */ + r = amdgpu_ctx_wait_prev_fence(p->ctx, p->entity); + if (unlikely(r != 0)) { + if (r != -ERESTARTSYS) + DRM_ERROR("amdgpu_ctx_wait_prev_fence failed.\n"); + goto error_validate; } amdgpu_cs_get_threshold_for_moves(p->adev, &p->bytes_moved_threshold, @@ -633,14 +636,21 @@ static int amdgpu_cs_parser_bos(struct amdgpu_cs_parser *p, } error_validate: + if (r) + ttm_eu_backoff_reservation(&p->ticket, &p->validated); + +out_free_user_pages: if (r) { - amdgpu_bo_list_for_each_entry(e, p->bo_list) { - dma_fence_chain_free(e->chain); - e->chain = NULL; + amdgpu_bo_list_for_each_userptr_entry(e, p->bo_list) { + struct amdgpu_bo *bo = ttm_to_amdgpu_bo(e->tv.bo); + + if (!e->user_pages) + continue; + amdgpu_ttm_tt_get_user_pages_done(bo->tbo.ttm); + kvfree(e->user_pages); + e->user_pages = NULL; } - ttm_eu_backoff_reservation(&p->ticket, &p->validated); } -out: return r; } @@ -679,17 +689,9 @@ static void amdgpu_cs_parser_fini(struct amdgpu_cs_parser *parser, int error, { unsigned i; - if (error && backoff) { - struct amdgpu_bo_list_entry *e; - - amdgpu_bo_list_for_each_entry(e, parser->bo_list) { - dma_fence_chain_free(e->chain); - e->chain = NULL; - } - + if (error && backoff) ttm_eu_backoff_reservation(&parser->ticket, &parser->validated); - } for (i = 0; i < parser->num_post_deps; i++) { drm_syncobj_put(parser->post_deps[i].syncobj); @@ -775,12 +777,12 @@ static int amdgpu_cs_vm_handling(struct amdgpu_cs_parser *p) memcpy(ib->ptr, kptr, chunk_ib->ib_bytes); amdgpu_bo_kunmap(aobj); - r = amdgpu_ring_parse_cs(ring, p, j); + r = amdgpu_ring_parse_cs(ring, p, p->job, ib); if (r) return r; } else { ib->ptr = (uint32_t *)kptr; - r = amdgpu_ring_patch_cs_in_place(ring, p, j); + r = amdgpu_ring_patch_cs_in_place(ring, p, p->job, ib); amdgpu_bo_kunmap(aobj); if (r) return r; @@ -798,22 +800,22 @@ static int amdgpu_cs_vm_handling(struct amdgpu_cs_parser *p) if (r) return r; - r = amdgpu_vm_bo_update(adev, fpriv->prt_va, false, NULL); + r = amdgpu_vm_bo_update(adev, fpriv->prt_va, false); if (r) return r; - r = amdgpu_sync_vm_fence(&p->job->sync, fpriv->prt_va->last_pt_update); + r = amdgpu_sync_fence(&p->job->sync, fpriv->prt_va->last_pt_update); if (r) return r; if (amdgpu_mcbp || amdgpu_sriov_vf(adev)) { bo_va = fpriv->csa_va; BUG_ON(!bo_va); - r = amdgpu_vm_bo_update(adev, bo_va, false, NULL); + r = amdgpu_vm_bo_update(adev, bo_va, false); if (r) return r; - r = amdgpu_sync_vm_fence(&p->job->sync, bo_va->last_pt_update); + r = amdgpu_sync_fence(&p->job->sync, bo_va->last_pt_update); if (r) return r; } @@ -828,11 +830,11 @@ static int amdgpu_cs_vm_handling(struct amdgpu_cs_parser *p) if (bo_va == NULL) continue; - r = amdgpu_vm_bo_update(adev, bo_va, false, NULL); + r = amdgpu_vm_bo_update(adev, bo_va, false); if (r) return r; - r = amdgpu_sync_vm_fence(&p->job->sync, bo_va->last_pt_update); + r = amdgpu_sync_fence(&p->job->sync, bo_va->last_pt_update); if (r) return r; } @@ -845,7 +847,7 @@ static int amdgpu_cs_vm_handling(struct amdgpu_cs_parser *p) if (r) return r; - r = amdgpu_sync_vm_fence(&p->job->sync, vm->last_update); + r = amdgpu_sync_fence(&p->job->sync, vm->last_update); if (r) return r; @@ -944,7 +946,7 @@ static int amdgpu_cs_ib_fill(struct amdgpu_device *adev, if (parser->job->uf_addr && ring->funcs->no_user_fence) return -EINVAL; - return amdgpu_ctx_wait_prev_fence(parser->ctx, parser->entity); + return 0; } static int amdgpu_cs_process_fence_dep(struct amdgpu_cs_parser *p, @@ -1149,6 +1151,9 @@ static int amdgpu_cs_dependencies(struct amdgpu_device *adev, { int i, r; + /* TODO: Investigate why we still need the context lock */ + mutex_unlock(&p->ctx->lock); + for (i = 0; i < p->nchunks; ++i) { struct amdgpu_cs_chunk *chunk; @@ -1159,32 +1164,34 @@ static int amdgpu_cs_dependencies(struct amdgpu_device *adev, case AMDGPU_CHUNK_ID_SCHEDULED_DEPENDENCIES: r = amdgpu_cs_process_fence_dep(p, chunk); if (r) - return r; + goto out; break; case AMDGPU_CHUNK_ID_SYNCOBJ_IN: r = amdgpu_cs_process_syncobj_in_dep(p, chunk); if (r) - return r; + goto out; break; case AMDGPU_CHUNK_ID_SYNCOBJ_OUT: r = amdgpu_cs_process_syncobj_out_dep(p, chunk); if (r) - return r; + goto out; break; case AMDGPU_CHUNK_ID_SYNCOBJ_TIMELINE_WAIT: r = amdgpu_cs_process_syncobj_timeline_in_dep(p, chunk); if (r) - return r; + goto out; break; case AMDGPU_CHUNK_ID_SYNCOBJ_TIMELINE_SIGNAL: r = amdgpu_cs_process_syncobj_timeline_out_dep(p, chunk); if (r) - return r; + goto out; break; } } - return 0; +out: + mutex_lock(&p->ctx->lock); + return r; } static void amdgpu_cs_post_dependencies(struct amdgpu_cs_parser *p) @@ -1264,27 +1271,9 @@ static int amdgpu_cs_submit(struct amdgpu_cs_parser *p, amdgpu_vm_move_to_lru_tail(p->adev, &fpriv->vm); - amdgpu_bo_list_for_each_entry(e, p->bo_list) { - struct dma_resv *resv = e->tv.bo->base.resv; - struct dma_fence_chain *chain = e->chain; - - if (!chain) - continue; - - /* - * Work around dma_resv shortcommings by wrapping up the - * submission in a dma_fence_chain and add it as exclusive - * fence, but first add the submission as shared fence to make - * sure that shared fences never signal before the exclusive - * one. - */ - dma_fence_chain_init(chain, dma_resv_excl_fence(resv), - dma_fence_get(p->fence), 1); - - dma_resv_add_shared_fence(resv, p->fence); - rcu_assign_pointer(resv->fence_excl, &chain->base); - e->chain = NULL; - } + /* Make sure all BOs are remembered as writers */ + amdgpu_bo_list_for_each_entry(e, p->bo_list) + e->tv.num_shared = 0; ttm_eu_fence_buffer_objects(&p->ticket, &p->validated, p->fence); mutex_unlock(&p->adev->notifier_lock); @@ -1509,6 +1498,7 @@ int amdgpu_cs_fence_to_handle_ioctl(struct drm_device *dev, void *data, return 0; default: + dma_fence_put(fence); return -EINVAL; } } diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.h new file mode 100644 index 000000000000..30ecc4917f81 --- /dev/null +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.h @@ -0,0 +1,80 @@ +/* + * Copyright 2022 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + */ +#ifndef __AMDGPU_CS_H__ +#define __AMDGPU_CS_H__ + +#include "amdgpu_job.h" +#include "amdgpu_bo_list.h" +#include "amdgpu_ring.h" + +struct amdgpu_bo_va_mapping; + +struct amdgpu_cs_chunk { + uint32_t chunk_id; + uint32_t length_dw; + void *kdata; +}; + +struct amdgpu_cs_post_dep { + struct drm_syncobj *syncobj; + struct dma_fence_chain *chain; + u64 point; +}; + +struct amdgpu_cs_parser { + struct amdgpu_device *adev; + struct drm_file *filp; + struct amdgpu_ctx *ctx; + + /* chunks */ + unsigned nchunks; + struct amdgpu_cs_chunk *chunks; + + /* scheduler job object */ + struct amdgpu_job *job; + struct drm_sched_entity *entity; + + /* buffer objects */ + struct ww_acquire_ctx ticket; + struct amdgpu_bo_list *bo_list; + struct amdgpu_mn *mn; + struct amdgpu_bo_list_entry vm_pd; + struct list_head validated; + struct dma_fence *fence; + uint64_t bytes_moved_threshold; + uint64_t bytes_moved_vis_threshold; + uint64_t bytes_moved; + uint64_t bytes_moved_vis; + + /* user fence */ + struct amdgpu_bo_list_entry uf_entry; + + unsigned num_post_deps; + struct amdgpu_cs_post_dep *post_deps; +}; + +int amdgpu_cs_find_mapping(struct amdgpu_cs_parser *parser, + uint64_t addr, struct amdgpu_bo **bo, + struct amdgpu_bo_va_mapping **mapping); + +#endif diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_csa.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_csa.c index da21e60bb827..c6d4d41c4393 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_csa.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_csa.c @@ -98,7 +98,7 @@ int amdgpu_map_static_csa(struct amdgpu_device *adev, struct amdgpu_vm *vm, if (r) { DRM_ERROR("failed to do bo_map on static CSA, err=%d\n", r); - amdgpu_vm_bo_rmv(adev, *bo_va); + amdgpu_vm_bo_del(adev, *bo_va); ttm_eu_backoff_reservation(&ticket, &list); return r; } diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c index 468003583b2a..8f0e6d93bb9c 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c @@ -23,6 +23,7 @@ */ #include <drm/drm_auth.h> +#include <drm/drm_drv.h> #include "amdgpu.h" #include "amdgpu_sched.h" #include "amdgpu_ras.h" @@ -204,9 +205,15 @@ static int amdgpu_ctx_init_entity(struct amdgpu_ctx *ctx, u32 hw_ip, if (r) goto error_free_entity; - ctx->entities[hw_ip][ring] = entity; + /* It's not an error if we fail to install the new entity */ + if (cmpxchg(&ctx->entities[hw_ip][ring], NULL, entity)) + goto cleanup_entity; + return 0; +cleanup_entity: + drm_sched_entity_fini(&entity->entity); + error_free_entity: kfree(entity); @@ -237,6 +244,7 @@ static int amdgpu_ctx_init(struct amdgpu_device *adev, ctx->vram_lost_counter = atomic_read(&adev->vram_lost_counter); ctx->init_priority = priority; ctx->override_priority = AMDGPU_CTX_PRIORITY_UNSET; + ctx->stable_pstate = AMDGPU_CTX_STABLE_PSTATE_NONE; return 0; } @@ -255,11 +263,85 @@ static void amdgpu_ctx_fini_entity(struct amdgpu_ctx_entity *entity) kfree(entity); } +static int amdgpu_ctx_get_stable_pstate(struct amdgpu_ctx *ctx, + u32 *stable_pstate) +{ + struct amdgpu_device *adev = ctx->adev; + enum amd_dpm_forced_level current_level; + + current_level = amdgpu_dpm_get_performance_level(adev); + + switch (current_level) { + case AMD_DPM_FORCED_LEVEL_PROFILE_STANDARD: + *stable_pstate = AMDGPU_CTX_STABLE_PSTATE_STANDARD; + break; + case AMD_DPM_FORCED_LEVEL_PROFILE_MIN_SCLK: + *stable_pstate = AMDGPU_CTX_STABLE_PSTATE_MIN_SCLK; + break; + case AMD_DPM_FORCED_LEVEL_PROFILE_MIN_MCLK: + *stable_pstate = AMDGPU_CTX_STABLE_PSTATE_MIN_MCLK; + break; + case AMD_DPM_FORCED_LEVEL_PROFILE_PEAK: + *stable_pstate = AMDGPU_CTX_STABLE_PSTATE_PEAK; + break; + default: + *stable_pstate = AMDGPU_CTX_STABLE_PSTATE_NONE; + break; + } + return 0; +} + +static int amdgpu_ctx_set_stable_pstate(struct amdgpu_ctx *ctx, + u32 stable_pstate) +{ + struct amdgpu_device *adev = ctx->adev; + enum amd_dpm_forced_level level; + int r; + + mutex_lock(&adev->pm.stable_pstate_ctx_lock); + if (adev->pm.stable_pstate_ctx && adev->pm.stable_pstate_ctx != ctx) { + r = -EBUSY; + goto done; + } + + switch (stable_pstate) { + case AMDGPU_CTX_STABLE_PSTATE_NONE: + level = AMD_DPM_FORCED_LEVEL_AUTO; + break; + case AMDGPU_CTX_STABLE_PSTATE_STANDARD: + level = AMD_DPM_FORCED_LEVEL_PROFILE_STANDARD; + break; + case AMDGPU_CTX_STABLE_PSTATE_MIN_SCLK: + level = AMD_DPM_FORCED_LEVEL_PROFILE_MIN_SCLK; + break; + case AMDGPU_CTX_STABLE_PSTATE_MIN_MCLK: + level = AMD_DPM_FORCED_LEVEL_PROFILE_MIN_MCLK; + break; + case AMDGPU_CTX_STABLE_PSTATE_PEAK: + level = AMD_DPM_FORCED_LEVEL_PROFILE_PEAK; + break; + default: + r = -EINVAL; + goto done; + } + + r = amdgpu_dpm_force_performance_level(adev, level); + + if (level == AMD_DPM_FORCED_LEVEL_AUTO) + adev->pm.stable_pstate_ctx = NULL; + else + adev->pm.stable_pstate_ctx = ctx; +done: + mutex_unlock(&adev->pm.stable_pstate_ctx_lock); + + return r; +} + static void amdgpu_ctx_fini(struct kref *ref) { struct amdgpu_ctx *ctx = container_of(ref, struct amdgpu_ctx, refcount); struct amdgpu_device *adev = ctx->adev; - unsigned i, j; + unsigned i, j, idx; if (!adev) return; @@ -271,6 +353,11 @@ static void amdgpu_ctx_fini(struct kref *ref) } } + if (drm_dev_enter(&adev->ddev, &idx)) { + amdgpu_ctx_set_stable_pstate(ctx, AMDGPU_CTX_STABLE_PSTATE_NONE); + drm_dev_exit(idx); + } + mutex_destroy(&ctx->lock); kfree(ctx); } @@ -467,11 +554,41 @@ static int amdgpu_ctx_query2(struct amdgpu_device *adev, return 0; } + + +static int amdgpu_ctx_stable_pstate(struct amdgpu_device *adev, + struct amdgpu_fpriv *fpriv, uint32_t id, + bool set, u32 *stable_pstate) +{ + struct amdgpu_ctx *ctx; + struct amdgpu_ctx_mgr *mgr; + int r; + + if (!fpriv) + return -EINVAL; + + mgr = &fpriv->ctx_mgr; + mutex_lock(&mgr->lock); + ctx = idr_find(&mgr->ctx_handles, id); + if (!ctx) { + mutex_unlock(&mgr->lock); + return -EINVAL; + } + + if (set) + r = amdgpu_ctx_set_stable_pstate(ctx, *stable_pstate); + else + r = amdgpu_ctx_get_stable_pstate(ctx, stable_pstate); + + mutex_unlock(&mgr->lock); + return r; +} + int amdgpu_ctx_ioctl(struct drm_device *dev, void *data, struct drm_file *filp) { int r; - uint32_t id; + uint32_t id, stable_pstate; int32_t priority; union drm_amdgpu_ctx *args = data; @@ -500,6 +617,21 @@ int amdgpu_ctx_ioctl(struct drm_device *dev, void *data, case AMDGPU_CTX_OP_QUERY_STATE2: r = amdgpu_ctx_query2(adev, fpriv, id, &args->out); break; + case AMDGPU_CTX_OP_GET_STABLE_PSTATE: + if (args->in.flags) + return -EINVAL; + r = amdgpu_ctx_stable_pstate(adev, fpriv, id, false, &stable_pstate); + if (!r) + args->out.pstate.flags = stable_pstate; + break; + case AMDGPU_CTX_OP_SET_STABLE_PSTATE: + if (args->in.flags & ~AMDGPU_CTX_STABLE_PSTATE_FLAGS_MASK) + return -EINVAL; + stable_pstate = args->in.flags & AMDGPU_CTX_STABLE_PSTATE_FLAGS_MASK; + if (stable_pstate > AMDGPU_CTX_STABLE_PSTATE_PEAK) + return -EINVAL; + r = amdgpu_ctx_stable_pstate(adev, fpriv, id, true, &stable_pstate); + break; default: return -EINVAL; } diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.h index a44b8b8ed39c..142f2f87d44c 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.h @@ -53,6 +53,7 @@ struct amdgpu_ctx { atomic_t guilty; unsigned long ras_counter_ce; unsigned long ras_counter_ue; + uint32_t stable_pstate; }; struct amdgpu_ctx_mgr { diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c index 25e2e5bf90eb..eedb12f6b8a3 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c @@ -37,6 +37,9 @@ #include "amdgpu_fw_attestation.h" #include "amdgpu_umr.h" +#include "amdgpu_reset.h" +#include "amdgpu_psp_ta.h" + #if defined(CONFIG_DEBUG_FS) /** @@ -728,7 +731,7 @@ static ssize_t amdgpu_debugfs_gca_config_read(struct file *f, char __user *buf, return -ENOMEM; /* version, increment each time something is added */ - config[no_regs++] = 3; + config[no_regs++] = 5; config[no_regs++] = adev->gfx.config.max_shader_engines; config[no_regs++] = adev->gfx.config.max_tile_pipes; config[no_regs++] = adev->gfx.config.max_cu_per_sh; @@ -755,8 +758,8 @@ static ssize_t amdgpu_debugfs_gca_config_read(struct file *f, char __user *buf, /* rev==1 */ config[no_regs++] = adev->rev_id; - config[no_regs++] = adev->pg_flags; - config[no_regs++] = adev->cg_flags; + config[no_regs++] = lower_32_bits(adev->pg_flags); + config[no_regs++] = lower_32_bits(adev->cg_flags); /* rev==2 */ config[no_regs++] = adev->family; @@ -768,6 +771,13 @@ static ssize_t amdgpu_debugfs_gca_config_read(struct file *f, char __user *buf, config[no_regs++] = adev->pdev->subsystem_device; config[no_regs++] = adev->pdev->subsystem_vendor; + /* rev==4 APU flag */ + config[no_regs++] = adev->flags & AMD_IS_APU ? 1 : 0; + + /* rev==5 PG/CG flag upper 32bit */ + config[no_regs++] = upper_32_bits(adev->pg_flags); + config[no_regs++] = upper_32_bits(adev->cg_flags); + while (size && (*pos < no_regs * 4)) { uint32_t value; @@ -1120,8 +1130,10 @@ static ssize_t amdgpu_debugfs_gfxoff_read(struct file *f, char __user *buf, return -EINVAL; r = pm_runtime_get_sync(adev_to_drm(adev)->dev); - if (r < 0) + if (r < 0) { + pm_runtime_put_autosuspend(adev_to_drm(adev)->dev); return r; + } while (size) { uint32_t value; @@ -1279,7 +1291,7 @@ static int amdgpu_debugfs_test_ib_show(struct seq_file *m, void *unused) } /* Avoid accidently unparking the sched thread during GPU reset */ - r = down_write_killable(&adev->reset_sem); + r = down_write_killable(&adev->reset_domain->sem); if (r) return r; @@ -1308,7 +1320,7 @@ static int amdgpu_debugfs_test_ib_show(struct seq_file *m, void *unused) kthread_unpark(ring->sched.thread); } - up_write(&adev->reset_sem); + up_write(&adev->reset_domain->sem); pm_runtime_mark_last_busy(dev->dev); pm_runtime_put_autosuspend(dev->dev); @@ -1357,6 +1369,25 @@ static int amdgpu_debugfs_evict_gtt(void *data, u64 *val) return 0; } +static int amdgpu_debugfs_benchmark(void *data, u64 val) +{ + struct amdgpu_device *adev = (struct amdgpu_device *)data; + struct drm_device *dev = adev_to_drm(adev); + int r; + + r = pm_runtime_get_sync(dev->dev); + if (r < 0) { + pm_runtime_put_autosuspend(dev->dev); + return r; + } + + r = amdgpu_benchmark(adev, val); + + pm_runtime_mark_last_busy(dev->dev); + pm_runtime_put_autosuspend(dev->dev); + + return r; +} static int amdgpu_debugfs_vm_info_show(struct seq_file *m, void *unused) { @@ -1393,6 +1424,8 @@ DEFINE_DEBUGFS_ATTRIBUTE(amdgpu_evict_vram_fops, amdgpu_debugfs_evict_vram, NULL, "%lld\n"); DEFINE_DEBUGFS_ATTRIBUTE(amdgpu_evict_gtt_fops, amdgpu_debugfs_evict_gtt, NULL, "%lld\n"); +DEFINE_DEBUGFS_ATTRIBUTE(amdgpu_benchmark_fops, NULL, amdgpu_debugfs_benchmark, + "%lld\n"); static void amdgpu_ib_preempt_fences_swap(struct amdgpu_ring *ring, struct dma_fence **fences) @@ -1517,7 +1550,7 @@ static int amdgpu_debugfs_ib_preempt(void *data, u64 val) return -ENOMEM; /* Avoid accidently unparking the sched thread during GPU reset */ - r = down_read_killable(&adev->reset_sem); + r = down_read_killable(&adev->reset_domain->sem); if (r) goto pro_end; @@ -1560,7 +1593,7 @@ failure: /* restart the scheduler */ kthread_unpark(ring->sched.thread); - up_read(&adev->reset_sem); + up_read(&adev->reset_domain->sem); ttm_bo_unlock_delayed_workqueue(&adev->mman.bdev, resched); @@ -1585,22 +1618,25 @@ static int amdgpu_debugfs_sclk_set(void *data, u64 val) return ret; } - if (is_support_sw_smu(adev)) { - ret = smu_get_dpm_freq_range(&adev->smu, SMU_SCLK, &min_freq, &max_freq); - if (ret || val > max_freq || val < min_freq) - return -EINVAL; - ret = smu_set_soft_freq_range(&adev->smu, SMU_SCLK, (uint32_t)val, (uint32_t)val); - } else { - return 0; + ret = amdgpu_dpm_get_dpm_freq_range(adev, PP_SCLK, &min_freq, &max_freq); + if (ret == -EOPNOTSUPP) { + ret = 0; + goto out; } + if (ret || val > max_freq || val < min_freq) { + ret = -EINVAL; + goto out; + } + + ret = amdgpu_dpm_set_soft_freq_range(adev, PP_SCLK, (uint32_t)val, (uint32_t)val); + if (ret) + ret = -EINVAL; +out: pm_runtime_mark_last_busy(adev_to_drm(adev)->dev); pm_runtime_put_autosuspend(adev_to_drm(adev)->dev); - if (ret) - return -EINVAL; - - return 0; + return ret; } DEFINE_DEBUGFS_ATTRIBUTE(fops_ib_preempt, NULL, @@ -1609,6 +1645,91 @@ DEFINE_DEBUGFS_ATTRIBUTE(fops_ib_preempt, NULL, DEFINE_DEBUGFS_ATTRIBUTE(fops_sclk_set, NULL, amdgpu_debugfs_sclk_set, "%llu\n"); +static ssize_t amdgpu_reset_dump_register_list_read(struct file *f, + char __user *buf, size_t size, loff_t *pos) +{ + struct amdgpu_device *adev = (struct amdgpu_device *)file_inode(f)->i_private; + char reg_offset[12]; + int i, ret, len = 0; + + if (*pos) + return 0; + + memset(reg_offset, 0, 12); + ret = down_read_killable(&adev->reset_domain->sem); + if (ret) + return ret; + + for (i = 0; i < adev->num_regs; i++) { + sprintf(reg_offset, "0x%x\n", adev->reset_dump_reg_list[i]); + up_read(&adev->reset_domain->sem); + if (copy_to_user(buf + len, reg_offset, strlen(reg_offset))) + return -EFAULT; + + len += strlen(reg_offset); + ret = down_read_killable(&adev->reset_domain->sem); + if (ret) + return ret; + } + + up_read(&adev->reset_domain->sem); + *pos += len; + + return len; +} + +static ssize_t amdgpu_reset_dump_register_list_write(struct file *f, + const char __user *buf, size_t size, loff_t *pos) +{ + struct amdgpu_device *adev = (struct amdgpu_device *)file_inode(f)->i_private; + char reg_offset[11]; + uint32_t *new, *tmp = NULL; + int ret, i = 0, len = 0; + + do { + memset(reg_offset, 0, 11); + if (copy_from_user(reg_offset, buf + len, + min(10, ((int)size-len)))) { + ret = -EFAULT; + goto error_free; + } + + new = krealloc_array(tmp, i + 1, sizeof(uint32_t), GFP_KERNEL); + if (!new) { + ret = -ENOMEM; + goto error_free; + } + tmp = new; + if (sscanf(reg_offset, "%X %n", &tmp[i], &ret) != 1) { + ret = -EINVAL; + goto error_free; + } + + len += ret; + i++; + } while (len < size); + + ret = down_write_killable(&adev->reset_domain->sem); + if (ret) + goto error_free; + + swap(adev->reset_dump_reg_list, tmp); + adev->num_regs = i; + up_write(&adev->reset_domain->sem); + ret = size; + +error_free: + kfree(tmp); + return ret; +} + +static const struct file_operations amdgpu_reset_dump_register_list = { + .owner = THIS_MODULE, + .read = amdgpu_reset_dump_register_list_read, + .write = amdgpu_reset_dump_register_list_write, + .llseek = default_llseek +}; + int amdgpu_debugfs_init(struct amdgpu_device *adev) { struct dentry *root = adev_to_drm(adev)->primary->debugfs_root; @@ -1647,6 +1768,7 @@ int amdgpu_debugfs_init(struct amdgpu_device *adev) DRM_ERROR("registering register debugfs failed (%d).\n", r); amdgpu_debugfs_firmware_init(adev); + amdgpu_ta_if_debugfs_init(adev); #if defined(CONFIG_DRM_AMD_DC) if (amdgpu_device_has_dc_support(adev)) @@ -1662,6 +1784,16 @@ int amdgpu_debugfs_init(struct amdgpu_device *adev) amdgpu_debugfs_ring_init(adev, ring); } + for ( i = 0; i < adev->vcn.num_vcn_inst; i++) { + if (!amdgpu_vcnfw_log) + break; + + if (adev->vcn.harvest_config & (1 << i)) + continue; + + amdgpu_debugfs_vcn_fwlog_init(adev, i, &adev->vcn.inst[i]); + } + amdgpu_ras_debugfs_create_all(adev); amdgpu_rap_debugfs_init(adev); amdgpu_securedisplay_debugfs_init(adev); @@ -1675,6 +1807,10 @@ int amdgpu_debugfs_init(struct amdgpu_device *adev) &amdgpu_debugfs_test_ib_fops); debugfs_create_file("amdgpu_vm_info", 0444, root, adev, &amdgpu_debugfs_vm_info_fops); + debugfs_create_file("amdgpu_benchmark", 0200, root, adev, + &amdgpu_benchmark_fops); + debugfs_create_file("amdgpu_reset_dump_register_list", 0644, root, adev, + &amdgpu_reset_dump_register_list); adev->debugfs_vbios_blob.data = adev->bios; adev->debugfs_vbios_blob.size = adev->bios_size; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c index ed077de426d9..53d938d5a00a 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c @@ -31,6 +31,7 @@ #include <linux/console.h> #include <linux/slab.h> #include <linux/iommu.h> +#include <linux/pci.h> #include <drm/drm_atomic_helper.h> #include <drm/drm_probe_helper.h> @@ -55,7 +56,6 @@ #include "soc15.h" #include "nv.h" #include "bif/bif_4_1_d.h" -#include <linux/pci.h> #include <linux/firmware.h> #include "amdgpu_vf_error.h" @@ -80,14 +80,11 @@ MODULE_FIRMWARE("amdgpu/raven_gpu_info.bin"); MODULE_FIRMWARE("amdgpu/picasso_gpu_info.bin"); MODULE_FIRMWARE("amdgpu/raven2_gpu_info.bin"); MODULE_FIRMWARE("amdgpu/arcturus_gpu_info.bin"); -MODULE_FIRMWARE("amdgpu/renoir_gpu_info.bin"); -MODULE_FIRMWARE("amdgpu/navi10_gpu_info.bin"); -MODULE_FIRMWARE("amdgpu/navi14_gpu_info.bin"); MODULE_FIRMWARE("amdgpu/navi12_gpu_info.bin"); -MODULE_FIRMWARE("amdgpu/vangogh_gpu_info.bin"); -MODULE_FIRMWARE("amdgpu/yellow_carp_gpu_info.bin"); #define AMDGPU_RESUME_MS 2000 +#define AMDGPU_MAX_RETRY_LIMIT 2 +#define AMDGPU_RETRY_SRIOV_RESET(r) ((r) == -EBUSY || (r) == -ETIMEDOUT || (r) == -EINVAL) const char *amdgpu_asic_name[] = { "TAHITI", @@ -424,10 +421,10 @@ bool amdgpu_device_skip_hw_access(struct amdgpu_device *adev) * the lock. */ if (in_task()) { - if (down_read_trylock(&adev->reset_sem)) - up_read(&adev->reset_sem); + if (down_read_trylock(&adev->reset_domain->sem)) + up_read(&adev->reset_domain->sem); else - lockdep_assert_held(&adev->reset_sem); + lockdep_assert_held(&adev->reset_domain->sem); } #endif return false; @@ -453,9 +450,9 @@ uint32_t amdgpu_device_rreg(struct amdgpu_device *adev, if ((reg * 4) < adev->rmmio_size) { if (!(acc_flags & AMDGPU_REGS_NO_KIQ) && amdgpu_sriov_runtime(adev) && - down_read_trylock(&adev->reset_sem)) { + down_read_trylock(&adev->reset_domain->sem)) { ret = amdgpu_kiq_rreg(adev, reg); - up_read(&adev->reset_sem); + up_read(&adev->reset_domain->sem); } else { ret = readl(((void __iomem *)adev->rmmio) + (reg * 4)); } @@ -538,9 +535,9 @@ void amdgpu_device_wreg(struct amdgpu_device *adev, if ((reg * 4) < adev->rmmio_size) { if (!(acc_flags & AMDGPU_REGS_NO_KIQ) && amdgpu_sriov_runtime(adev) && - down_read_trylock(&adev->reset_sem)) { + down_read_trylock(&adev->reset_domain->sem)) { amdgpu_kiq_wreg(adev, reg, v); - up_read(&adev->reset_sem); + up_read(&adev->reset_domain->sem); } else { writel(v, ((void __iomem *)adev->rmmio) + (reg * 4)); } @@ -554,7 +551,11 @@ void amdgpu_device_wreg(struct amdgpu_device *adev, /** * amdgpu_mm_wreg_mmio_rlc - write register either with direct/indirect mmio or with RLC path if in range * - * this function is invoked only the debugfs register access + * @adev: amdgpu_device pointer + * @reg: mmio/rlc register + * @v: value to write + * + * this function is invoked only for the debugfs register access */ void amdgpu_mm_wreg_mmio_rlc(struct amdgpu_device *adev, uint32_t reg, uint32_t v) @@ -566,7 +567,7 @@ void amdgpu_mm_wreg_mmio_rlc(struct amdgpu_device *adev, adev->gfx.rlc.funcs && adev->gfx.rlc.funcs->is_rlcg_access_range) { if (adev->gfx.rlc.funcs->is_rlcg_access_range(adev, reg)) - return adev->gfx.rlc.funcs->sriov_wreg(adev, reg, v, 0, 0); + return amdgpu_sriov_wreg(adev, reg, v, 0, 0); } else if ((reg * 4) >= adev->rmmio_size) { adev->pcie_wreg(adev, reg * 4, v); } else { @@ -912,7 +913,10 @@ static int amdgpu_device_asic_init(struct amdgpu_device *adev) { amdgpu_asic_pre_asic_init(adev); - return amdgpu_atom_asic_init(adev->mode_info.atom_context); + if (adev->ip_versions[GC_HWIP][0] >= IP_VERSION(11, 0, 0)) + return amdgpu_atomfirmware_asic_init(adev, true); + else + return amdgpu_atom_asic_init(adev->mode_info.atom_context); } /** @@ -1312,6 +1316,31 @@ bool amdgpu_device_need_post(struct amdgpu_device *adev) return true; } +/** + * amdgpu_device_should_use_aspm - check if the device should program ASPM + * + * @adev: amdgpu_device pointer + * + * Confirm whether the module parameter and pcie bridge agree that ASPM should + * be set for this device. + * + * Returns true if it should be used or false if not. + */ +bool amdgpu_device_should_use_aspm(struct amdgpu_device *adev) +{ + switch (amdgpu_aspm) { + case -1: + break; + case 0: + return false; + case 1: + return true; + default: + return false; + } + return pcie_aspm_enabled(adev->pdev); +} + /* if we get transitioned to only one device, take VGA back */ /** * amdgpu_device_vga_set_decode - enable/disable vga decode @@ -1446,7 +1475,8 @@ static int amdgpu_device_init_apu_flags(struct amdgpu_device *adev) case CHIP_YELLOW_CARP: break; case CHIP_CYAN_SKILLFISH: - if (adev->pdev->device == 0x13FE) + if ((adev->pdev->device == 0x13FE) || + (adev->pdev->device == 0x143F)) adev->apu_flags |= AMD_APU_IS_CYAN_SKILLFISH2; break; default: @@ -1507,6 +1537,11 @@ static int amdgpu_device_check_arguments(struct amdgpu_device *adev) amdgpu_sched_hw_submission = roundup_pow_of_two(amdgpu_sched_hw_submission); } + if (amdgpu_reset_method < -1 || amdgpu_reset_method > 4) { + dev_warn(adev->dev, "invalid option for reset method, reverting to default\n"); + amdgpu_reset_method = -1; + } + amdgpu_device_check_smu_prv_buffer_size(adev); amdgpu_device_check_vm_size(adev); @@ -1517,7 +1552,6 @@ static int amdgpu_device_check_arguments(struct amdgpu_device *adev) amdgpu_gmc_tmz_set(adev); - amdgpu_gmc_noretry_set(adev); return 0; } @@ -1672,7 +1706,7 @@ int amdgpu_device_ip_set_powergating_state(void *dev, * clockgating is enabled. */ void amdgpu_device_ip_get_clockgating_state(struct amdgpu_device *adev, - u32 *flags) + u64 *flags) { int i; @@ -1895,11 +1929,9 @@ static int amdgpu_device_parse_gpu_info_fw(struct amdgpu_device *adev) adev->firmware.gpu_info_fw = NULL; if (adev->mman.discovery_bin) { - amdgpu_discovery_get_gfx_info(adev); - /* * FIXME: The bounding box is still needed by Navi12, so - * temporarily read it from gpu_info firmware. Should be droped + * temporarily read it from gpu_info firmware. Should be dropped * when DAL no longer needs it. */ if (adev->asic_type != CHIP_NAVI12) @@ -1955,27 +1987,9 @@ static int amdgpu_device_parse_gpu_info_fw(struct amdgpu_device *adev) case CHIP_ARCTURUS: chip_name = "arcturus"; break; - case CHIP_RENOIR: - if (adev->apu_flags & AMD_APU_IS_RENOIR) - chip_name = "renoir"; - else - chip_name = "green_sardine"; - break; - case CHIP_NAVI10: - chip_name = "navi10"; - break; - case CHIP_NAVI14: - chip_name = "navi14"; - break; case CHIP_NAVI12: chip_name = "navi12"; break; - case CHIP_VANGOGH: - chip_name = "vangogh"; - break; - case CHIP_YELLOW_CARP: - chip_name = "yellow_carp"; - break; } snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_gpu_info.bin", chip_name); @@ -2073,6 +2087,8 @@ out: */ static int amdgpu_device_ip_early_init(struct amdgpu_device *adev) { + struct drm_device *dev = adev_to_drm(adev); + struct pci_dev *parent; int i, r; amdgpu_device_enable_virtual_display(adev); @@ -2137,6 +2153,18 @@ static int amdgpu_device_ip_early_init(struct amdgpu_device *adev) break; } + if (amdgpu_has_atpx() && + (amdgpu_is_atpx_hybrid() || + amdgpu_has_atpx_dgpu_power_cntl()) && + ((adev->flags & AMD_IS_APU) == 0) && + !pci_is_thunderbolt_attached(to_pci_dev(dev->dev))) + adev->flags |= AMD_IS_PX; + + if (!(adev->flags & AMD_IS_APU)) { + parent = pci_upstream_bridge(adev->pdev); + adev->has_pr3 = parent ? pci_pr3_present(parent) : false; + } + amdgpu_amdkfd_device_probe(adev); adev->pm.pp_feature = amdgpu_pp_feature_mask; @@ -2287,6 +2315,49 @@ static int amdgpu_device_fw_loading(struct amdgpu_device *adev) return r; } +static int amdgpu_device_init_schedulers(struct amdgpu_device *adev) +{ + long timeout; + int r, i; + + for (i = 0; i < AMDGPU_MAX_RINGS; ++i) { + struct amdgpu_ring *ring = adev->rings[i]; + + /* No need to setup the GPU scheduler for rings that don't need it */ + if (!ring || ring->no_scheduler) + continue; + + switch (ring->funcs->type) { + case AMDGPU_RING_TYPE_GFX: + timeout = adev->gfx_timeout; + break; + case AMDGPU_RING_TYPE_COMPUTE: + timeout = adev->compute_timeout; + break; + case AMDGPU_RING_TYPE_SDMA: + timeout = adev->sdma_timeout; + break; + default: + timeout = adev->video_timeout; + break; + } + + r = drm_sched_init(&ring->sched, &amdgpu_sched_ops, + ring->num_hw_submission, amdgpu_job_hang_limit, + timeout, adev->reset_domain->wq, + ring->sched_score, ring->name, + adev->dev); + if (r) { + DRM_ERROR("Failed to create scheduler on ring %s.\n", + ring->name); + return r; + } + } + + return 0; +} + + /** * amdgpu_device_ip_init - run init for hardware IPs * @@ -2398,8 +2469,28 @@ static int amdgpu_device_ip_init(struct amdgpu_device *adev) if (r) goto init_failed; - if (adev->gmc.xgmi.num_physical_nodes > 1) - amdgpu_xgmi_add_device(adev); + /** + * In case of XGMI grab extra reference for reset domain for this device + */ + if (adev->gmc.xgmi.num_physical_nodes > 1) { + if (amdgpu_xgmi_add_device(adev) == 0) { + struct amdgpu_hive_info *hive = amdgpu_get_xgmi_hive(adev); + + if (!hive->reset_domain || + !amdgpu_reset_get_reset_domain(hive->reset_domain)) { + r = -ENOENT; + goto init_failed; + } + + /* Drop the early temporary reset domain we created for device */ + amdgpu_reset_put_reset_domain(adev->reset_domain); + adev->reset_domain = hive->reset_domain; + } + } + + r = amdgpu_device_init_schedulers(adev); + if (r) + goto init_failed; /* Don't init kfd if whole hive need to be reset during init */ if (!adev->gmc.xgmi.pending_reset) @@ -2610,6 +2701,12 @@ static int amdgpu_device_ip_late_init(struct amdgpu_device *adev) adev->ip_blocks[i].status.late_initialized = true; } + r = amdgpu_ras_late_init(adev); + if (r) { + DRM_ERROR("amdgpu_ras_late_init failed %d", r); + return r; + } + amdgpu_ras_set_error_query_ready(adev, true); amdgpu_device_set_cg_state(adev, AMD_CG_STATE_GATE); @@ -2624,7 +2721,7 @@ static int amdgpu_device_ip_late_init(struct amdgpu_device *adev) /* For passthrough configuration on arcturus and aldebaran, enable special handling SBR */ if (amdgpu_passthrough(adev) && ((adev->asic_type == CHIP_ARCTURUS && adev->gmc.xgmi.num_physical_nodes > 1)|| adev->asic_type == CHIP_ALDEBARAN )) - smu_handle_passthrough_sbr(&adev->smu, true); + amdgpu_dpm_handle_passthrough_sbr(adev, true); if (adev->gmc.xgmi.num_physical_nodes > 1) { mutex_lock(&mgpu_info.mutex); @@ -2708,11 +2805,11 @@ static int amdgpu_device_ip_fini_early(struct amdgpu_device *adev) } } - amdgpu_amdkfd_suspend(adev, false); - amdgpu_device_set_pg_state(adev, AMD_PG_STATE_UNGATE); amdgpu_device_set_cg_state(adev, AMD_CG_STATE_UNGATE); + amdgpu_amdkfd_suspend(adev, false); + /* Workaroud for ASICs need to disable SMC first */ amdgpu_device_smu_fini_early(adev); @@ -2881,7 +2978,7 @@ static int amdgpu_device_ip_suspend_phase2(struct amdgpu_device *adev) int i, r; if (adev->in_s0ix) - amdgpu_gfx_state_change_set(adev, sGpuChangeState_D3Entry); + amdgpu_dpm_gfx_state_change(adev, sGpuChangeState_D3Entry); for (i = adev->num_ip_blocks - 1; i >= 0; i--) { if (!adev->ip_blocks[i].status.valid) @@ -3307,9 +3404,9 @@ static void amdgpu_device_xgmi_reset_func(struct work_struct *__work) if (adev->asic_reset_res) goto fail; - if (adev->mmhub.ras_funcs && - adev->mmhub.ras_funcs->reset_ras_error_count) - adev->mmhub.ras_funcs->reset_ras_error_count(adev); + if (adev->mmhub.ras && adev->mmhub.ras->ras_block.hw_ops && + adev->mmhub.ras->ras_block.hw_ops->reset_ras_error_count) + adev->mmhub.ras->ras_block.hw_ops->reset_ras_error_count(adev); } else { task_barrier_full(&hive->tb); @@ -3493,12 +3590,12 @@ int amdgpu_device_init(struct amdgpu_device *adev, mutex_init(&adev->mn_lock); mutex_init(&adev->virt.vf_errors.lock); hash_init(adev->mn_hash); - atomic_set(&adev->in_gpu_reset, 0); - init_rwsem(&adev->reset_sem); mutex_init(&adev->psp.mutex); mutex_init(&adev->notifier_lock); + mutex_init(&adev->pm.stable_pstate_ctx_lock); + mutex_init(&adev->benchmark_mutex); - amdgpu_device_init_apu_flags(adev); + amdgpu_device_init_apu_flags(adev); r = amdgpu_device_check_arguments(adev); if (r) @@ -3519,6 +3616,8 @@ int amdgpu_device_init(struct amdgpu_device *adev, INIT_LIST_HEAD(&adev->reset_list); + INIT_LIST_HEAD(&adev->ras_list); + INIT_DELAYED_WORK(&adev->delayed_init_work, amdgpu_device_delayed_init_work_handler); INIT_DELAYED_WORK(&adev->gfx.gfx_off_delay_work, @@ -3568,6 +3667,15 @@ int amdgpu_device_init(struct amdgpu_device *adev, if (amdgpu_mes && adev->asic_type >= CHIP_NAVI10) adev->enable_mes = true; + /* + * Reset domain needs to be present early, before XGMI hive discovered + * (if any) and intitialized to use reset sem and in_gpu reset flag + * early on during init and before calling to RREG32. + */ + adev->reset_domain = amdgpu_reset_create_reset_domain(SINGLE_DEVICE, "amdgpu-reset-dev"); + if (!adev->reset_domain) + return -ENOMEM; + /* detect hw virtualization here */ amdgpu_detect_virtualization(adev); @@ -3582,6 +3690,7 @@ int amdgpu_device_init(struct amdgpu_device *adev, if (r) return r; + amdgpu_gmc_noretry_set(adev); /* Need to get xgmi info early to decide the reset behavior*/ if (adev->gmc.xgmi.supported) { r = adev->gfxhub.funcs->get_xgmi_info(adev); @@ -3592,7 +3701,7 @@ int amdgpu_device_init(struct amdgpu_device *adev, /* enable PCIE atomic ops */ if (amdgpu_sriov_vf(adev)) adev->have_atomics_support = ((struct amd_sriov_msg_pf2vf_info *) - adev->virt.fw_reserve.p_pf2vf)->pcie_atomic_ops_enabled_flags == + adev->virt.fw_reserve.p_pf2vf)->pcie_atomic_ops_support_flags == (PCI_EXP_DEVCAP2_ATOMIC_COMP32 | PCI_EXP_DEVCAP2_ATOMIC_COMP64); else adev->have_atomics_support = @@ -3749,19 +3858,6 @@ fence_driver_init: } else adev->ucode_sysfs_en = true; - if ((amdgpu_testing & 1)) { - if (adev->accel_working) - amdgpu_test_moves(adev); - else - DRM_INFO("amdgpu: acceleration disabled, skipping move tests\n"); - } - if (amdgpu_benchmarking) { - if (adev->accel_working) - amdgpu_benchmark(adev, amdgpu_benchmarking); - else - DRM_INFO("amdgpu: acceleration disabled, skipping benchmarks\n"); - } - /* * Register gpu instance before amdgpu_device_enable_mgpu_fan_boost. * Otherwise the mgpu fan boost feature will be skipped due to the @@ -3953,6 +4049,9 @@ void amdgpu_device_fini_sw(struct amdgpu_device *adev) if (adev->mman.discovery_bin) amdgpu_discovery_fini(adev); + amdgpu_reset_put_reset_domain(adev->reset_domain); + adev->reset_domain = NULL; + kfree(adev->pci_state); } @@ -4044,7 +4143,7 @@ int amdgpu_device_resume(struct drm_device *dev, bool fbcon) return 0; if (adev->in_s0ix) - amdgpu_gfx_state_change_set(adev, sGpuChangeState_D0Entry); + amdgpu_dpm_gfx_state_change(adev, sGpuChangeState_D0Entry); /* post card */ if (amdgpu_device_need_post(adev)) { @@ -4347,7 +4446,9 @@ static int amdgpu_device_reset_sriov(struct amdgpu_device *adev, { int r; struct amdgpu_hive_info *hive = NULL; + int retry_limit = 0; +retry: amdgpu_amdkfd_pre_reset(adev); amdgpu_amdkfd_pre_reset(adev); @@ -4396,6 +4497,14 @@ error: } amdgpu_virt_release_full_gpu(adev, true); + if (AMDGPU_RETRY_SRIOV_RESET(r)) { + if (retry_limit < AMDGPU_MAX_RETRY_LIMIT) { + retry_limit++; + goto retry; + } else + DRM_ERROR("GPU reset retry is beyond the retry limit\n"); + } + return r; } @@ -4587,6 +4696,22 @@ int amdgpu_device_pre_asic_reset(struct amdgpu_device *adev, return r; } +static int amdgpu_reset_reg_dumps(struct amdgpu_device *adev) +{ + uint32_t reg_value; + int i; + + lockdep_assert_held(&adev->reset_domain->sem); + dump_stack(); + + for (i = 0; i < adev->num_regs; i++) { + reg_value = RREG32(adev->reset_dump_reg_list[i]); + trace_amdgpu_reset_reg_dumps(adev->reset_dump_reg_list[i], reg_value); + } + + return 0; +} + int amdgpu_do_asic_reset(struct list_head *device_list_handle, struct amdgpu_reset_context *reset_context) { @@ -4597,6 +4722,7 @@ int amdgpu_do_asic_reset(struct list_head *device_list_handle, /* Try reset handler method first */ tmp_adev = list_first_entry(device_list_handle, struct amdgpu_device, reset_list); + amdgpu_reset_reg_dumps(tmp_adev); r = amdgpu_reset_perform_reset(tmp_adev, reset_context); /* If reset handler not implemented, continue; otherwise return */ if (r == -ENOSYS) @@ -4645,9 +4771,9 @@ int amdgpu_do_asic_reset(struct list_head *device_list_handle, if (!r && amdgpu_ras_intr_triggered()) { list_for_each_entry(tmp_adev, device_list_handle, reset_list) { - if (tmp_adev->mmhub.ras_funcs && - tmp_adev->mmhub.ras_funcs->reset_ras_error_count) - tmp_adev->mmhub.ras_funcs->reset_ras_error_count(tmp_adev); + if (tmp_adev->mmhub.ras && tmp_adev->mmhub.ras->ras_block.hw_ops && + tmp_adev->mmhub.ras->ras_block.hw_ops->reset_ras_error_count) + tmp_adev->mmhub.ras->ras_block.hw_ops->reset_ras_error_count(tmp_adev); } amdgpu_ras_intr_cleared(); @@ -4754,17 +4880,8 @@ end: return r; } -static bool amdgpu_device_lock_adev(struct amdgpu_device *adev, - struct amdgpu_hive_info *hive) +static void amdgpu_device_set_mp1_state(struct amdgpu_device *adev) { - if (atomic_cmpxchg(&adev->in_gpu_reset, 0, 1) != 0) - return false; - - if (hive) { - down_write_nest_lock(&adev->reset_sem, &hive->hive_lock); - } else { - down_write(&adev->reset_sem); - } switch (amdgpu_asic_reset_method(adev)) { case AMD_RESET_METHOD_MODE1: @@ -4777,56 +4894,12 @@ static bool amdgpu_device_lock_adev(struct amdgpu_device *adev, adev->mp1_state = PP_MP1_STATE_NONE; break; } - - return true; } -static void amdgpu_device_unlock_adev(struct amdgpu_device *adev) +static void amdgpu_device_unset_mp1_state(struct amdgpu_device *adev) { amdgpu_vf_error_trans_all(adev); adev->mp1_state = PP_MP1_STATE_NONE; - atomic_set(&adev->in_gpu_reset, 0); - up_write(&adev->reset_sem); -} - -/* - * to lockup a list of amdgpu devices in a hive safely, if not a hive - * with multiple nodes, it will be similar as amdgpu_device_lock_adev. - * - * unlock won't require roll back. - */ -static int amdgpu_device_lock_hive_adev(struct amdgpu_device *adev, struct amdgpu_hive_info *hive) -{ - struct amdgpu_device *tmp_adev = NULL; - - if (!amdgpu_sriov_vf(adev) && (adev->gmc.xgmi.num_physical_nodes > 1)) { - if (!hive) { - dev_err(adev->dev, "Hive is NULL while device has multiple xgmi nodes"); - return -ENODEV; - } - list_for_each_entry(tmp_adev, &hive->device_list, gmc.xgmi.head) { - if (!amdgpu_device_lock_adev(tmp_adev, hive)) - goto roll_back; - } - } else if (!amdgpu_device_lock_adev(adev, hive)) - return -EAGAIN; - - return 0; -roll_back: - if (!list_is_first(&tmp_adev->gmc.xgmi.head, &hive->device_list)) { - /* - * if the lockup iteration break in the middle of a hive, - * it may means there may has a race issue, - * or a hive device locked up independently. - * we may be in trouble and may not, so will try to roll back - * the lock and give out a warnning. - */ - dev_warn(tmp_adev->dev, "Hive lock iteration broke in the middle. Rolling back to unlock"); - list_for_each_entry_continue_reverse(tmp_adev, &hive->device_list, gmc.xgmi.head) { - amdgpu_device_unlock_adev(tmp_adev); - } - } - return -EAGAIN; } static void amdgpu_device_resume_display_audio(struct amdgpu_device *adev) @@ -4960,7 +5033,7 @@ retry: } /** - * amdgpu_device_gpu_recover - reset the asic and recover scheduler + * amdgpu_device_gpu_recover_imp - reset the asic and recover scheduler * * @adev: amdgpu_device pointer * @job: which job trigger hang @@ -4970,7 +5043,7 @@ retry: * Returns 0 for success or an error on failure. */ -int amdgpu_device_gpu_recover(struct amdgpu_device *adev, +int amdgpu_device_gpu_recover_imp(struct amdgpu_device *adev, struct amdgpu_job *job) { struct list_head device_list, *device_list_handle = NULL; @@ -5004,26 +5077,10 @@ int amdgpu_device_gpu_recover(struct amdgpu_device *adev, dev_info(adev->dev, "GPU %s begin!\n", need_emergency_restart ? "jobs stop":"reset"); - /* - * Here we trylock to avoid chain of resets executing from - * either trigger by jobs on different adevs in XGMI hive or jobs on - * different schedulers for same device while this TO handler is running. - * We always reset all schedulers for device and all devices for XGMI - * hive so that should take care of them too. - */ if (!amdgpu_sriov_vf(adev)) hive = amdgpu_get_xgmi_hive(adev); - if (hive) { - if (atomic_cmpxchg(&hive->in_reset, 0, 1) != 0) { - DRM_INFO("Bailing on TDR for s_job:%llx, hive: %llx as another already in progress", - job ? job->base.id : -1, hive->hive_id); - amdgpu_put_xgmi_hive(hive); - if (job && job->vm) - drm_sched_increase_karma(&job->base); - return 0; - } + if (hive) mutex_lock(&hive->hive_lock); - } reset_context.method = AMD_RESET_METHOD_NONE; reset_context.reset_req_dev = adev; @@ -5032,22 +5089,6 @@ int amdgpu_device_gpu_recover(struct amdgpu_device *adev, clear_bit(AMDGPU_NEED_FULL_RESET, &reset_context.flags); /* - * lock the device before we try to operate the linked list - * if didn't get the device lock, don't touch the linked list since - * others may iterating it. - */ - r = amdgpu_device_lock_hive_adev(adev, hive); - if (r) { - dev_info(adev->dev, "Bailing on TDR for s_job:%llx, as another already in progress", - job ? job->base.id : -1); - - /* even we skipped this reset, still need to set the job to guilty */ - if (job && job->vm) - drm_sched_increase_karma(&job->base); - goto skip_recovery; - } - - /* * Build list of devices to reset. * In case we are in XGMI hive mode, resort the device list * to put adev in the 1st position. @@ -5064,8 +5105,16 @@ int amdgpu_device_gpu_recover(struct amdgpu_device *adev, device_list_handle = &device_list; } + /* We need to lock reset domain only once both for XGMI and single device */ + tmp_adev = list_first_entry(device_list_handle, struct amdgpu_device, + reset_list); + amdgpu_device_lock_reset_domain(tmp_adev->reset_domain); + /* block all schedulers and reset given job's ring */ list_for_each_entry(tmp_adev, device_list_handle, reset_list) { + + amdgpu_device_set_mp1_state(tmp_adev); + /* * Try to put the audio codec into suspend state * before gpu reset started. @@ -5187,6 +5236,9 @@ skip_hw_reset: drm_helper_resume_force_mode(adev_to_drm(tmp_adev)); } + if (tmp_adev->asic_reset_res) + r = tmp_adev->asic_reset_res; + tmp_adev->asic_reset_res = 0; if (r) { @@ -5214,21 +5266,55 @@ skip_sched_resume: if (audio_suspended) amdgpu_device_resume_display_audio(tmp_adev); - amdgpu_device_unlock_adev(tmp_adev); + + amdgpu_device_unset_mp1_state(tmp_adev); } -skip_recovery: + tmp_adev = list_first_entry(device_list_handle, struct amdgpu_device, + reset_list); + amdgpu_device_unlock_reset_domain(tmp_adev->reset_domain); + if (hive) { - atomic_set(&hive->in_reset, 0); mutex_unlock(&hive->hive_lock); amdgpu_put_xgmi_hive(hive); } - if (r && r != -EAGAIN) + if (r) dev_info(adev->dev, "GPU reset end with ret = %d\n", r); return r; } +struct amdgpu_recover_work_struct { + struct work_struct base; + struct amdgpu_device *adev; + struct amdgpu_job *job; + int ret; +}; + +static void amdgpu_device_queue_gpu_recover_work(struct work_struct *work) +{ + struct amdgpu_recover_work_struct *recover_work = container_of(work, struct amdgpu_recover_work_struct, base); + + recover_work->ret = amdgpu_device_gpu_recover_imp(recover_work->adev, recover_work->job); +} +/* + * Serialize gpu recover into reset domain single threaded wq + */ +int amdgpu_device_gpu_recover(struct amdgpu_device *adev, + struct amdgpu_job *job) +{ + struct amdgpu_recover_work_struct work = {.adev = adev, .job = job}; + + INIT_WORK(&work.base, amdgpu_device_queue_gpu_recover_work); + + if (!amdgpu_reset_domain_schedule(adev->reset_domain, &work.base)) + return -EAGAIN; + + flush_work(&work.base); + + return work.ret; +} + /** * amdgpu_device_get_pcie_info - fence pcie info about the PCIE slot * @@ -5416,20 +5502,6 @@ int amdgpu_device_baco_exit(struct drm_device *dev) return 0; } -static void amdgpu_cancel_all_tdr(struct amdgpu_device *adev) -{ - int i; - - for (i = 0; i < AMDGPU_MAX_RINGS; ++i) { - struct amdgpu_ring *ring = adev->rings[i]; - - if (!ring || !ring->sched.thread) - continue; - - cancel_delayed_work_sync(&ring->sched.work_tdr); - } -} - /** * amdgpu_pci_error_detected - Called when a PCI error is detected. * @pdev: PCI device struct @@ -5460,14 +5532,11 @@ pci_ers_result_t amdgpu_pci_error_detected(struct pci_dev *pdev, pci_channel_sta /* Fatal error, prepare for slot reset */ case pci_channel_io_frozen: /* - * Cancel and wait for all TDRs in progress if failing to - * set adev->in_gpu_reset in amdgpu_device_lock_adev - * - * Locking adev->reset_sem will prevent any external access + * Locking adev->reset_domain->sem will prevent any external access * to GPU during PCI error recovery */ - while (!amdgpu_device_lock_adev(adev, NULL)) - amdgpu_cancel_all_tdr(adev); + amdgpu_device_lock_reset_domain(adev->reset_domain); + amdgpu_device_set_mp1_state(adev); /* * Block any work scheduling as we do for regular GPU reset @@ -5574,7 +5643,8 @@ out: DRM_INFO("PCIe error recovery succeeded\n"); } else { DRM_ERROR("PCIe error recovery failed, err:%d", r); - amdgpu_device_unlock_adev(adev); + amdgpu_device_unset_mp1_state(adev); + amdgpu_device_unlock_reset_domain(adev->reset_domain); } return r ? PCI_ERS_RESULT_DISCONNECT : PCI_ERS_RESULT_RECOVERED; @@ -5611,7 +5681,8 @@ void amdgpu_pci_resume(struct pci_dev *pdev) drm_sched_start(&ring->sched, true); } - amdgpu_device_unlock_adev(adev); + amdgpu_device_unset_mp1_state(adev); + amdgpu_device_unlock_reset_domain(adev->reset_domain); } bool amdgpu_device_cache_pci_state(struct pci_dev *pdev) @@ -5663,7 +5734,7 @@ void amdgpu_device_flush_hdp(struct amdgpu_device *adev, struct amdgpu_ring *ring) { #ifdef CONFIG_X86_64 - if (adev->flags & AMD_IS_APU) + if ((adev->flags & AMD_IS_APU) && !amdgpu_passthrough(adev)) return; #endif if (adev->gmc.xgmi.connected_to_cpu) @@ -5679,7 +5750,7 @@ void amdgpu_device_invalidate_hdp(struct amdgpu_device *adev, struct amdgpu_ring *ring) { #ifdef CONFIG_X86_64 - if (adev->flags & AMD_IS_APU) + if ((adev->flags & AMD_IS_APU) && !amdgpu_passthrough(adev)) return; #endif if (adev->gmc.xgmi.connected_to_cpu) @@ -5688,6 +5759,11 @@ void amdgpu_device_invalidate_hdp(struct amdgpu_device *adev, amdgpu_asic_invalidate_hdp(adev, ring); } +int amdgpu_in_reset(struct amdgpu_device *adev) +{ + return atomic_read(&adev->reset_domain->in_gpu_reset); + } + /** * amdgpu_device_halt() - bring hardware to some kind of halt state * @@ -5726,3 +5802,36 @@ void amdgpu_device_halt(struct amdgpu_device *adev) pci_disable_device(pdev); pci_wait_for_pending_transaction(pdev); } + +u32 amdgpu_device_pcie_port_rreg(struct amdgpu_device *adev, + u32 reg) +{ + unsigned long flags, address, data; + u32 r; + + address = adev->nbio.funcs->get_pcie_port_index_offset(adev); + data = adev->nbio.funcs->get_pcie_port_data_offset(adev); + + spin_lock_irqsave(&adev->pcie_idx_lock, flags); + WREG32(address, reg * 4); + (void)RREG32(address); + r = RREG32(data); + spin_unlock_irqrestore(&adev->pcie_idx_lock, flags); + return r; +} + +void amdgpu_device_pcie_port_wreg(struct amdgpu_device *adev, + u32 reg, u32 v) +{ + unsigned long flags, address, data; + + address = adev->nbio.funcs->get_pcie_port_index_offset(adev); + data = adev->nbio.funcs->get_pcie_port_data_offset(adev); + + spin_lock_irqsave(&adev->pcie_idx_lock, flags); + WREG32(address, reg * 4); + (void)RREG32(address); + WREG32(data, v); + (void)RREG32(data); + spin_unlock_irqrestore(&adev->pcie_idx_lock, flags); +} diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_df.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_df.h index 6b25837955c4..1538b2dbfff1 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_df.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_df.h @@ -40,7 +40,7 @@ struct amdgpu_df_funcs { void (*update_medium_grain_clock_gating)(struct amdgpu_device *adev, bool enable); void (*get_clockgating_state)(struct amdgpu_device *adev, - u32 *flags); + u64 *flags); void (*enable_ecc_force_par_wr_rmw)(struct amdgpu_device *adev, bool enable); int (*pmc_start)(struct amdgpu_device *adev, uint64_t config, diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.c index 81bfee978b74..0c359ad9fd63 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.c @@ -271,8 +271,6 @@ static int amdgpu_discovery_init(struct amdgpu_device *adev) { struct table_info *info; struct binary_header *bhdr; - struct ip_discovery_header *ihdr; - struct gpu_info_header *ghdr; uint16_t offset; uint16_t size; uint16_t checksum; @@ -290,7 +288,7 @@ static int amdgpu_discovery_init(struct amdgpu_device *adev) goto out; } - if(!amdgpu_discovery_verify_binary_signature(adev->mman.discovery_bin)) { + if (!amdgpu_discovery_verify_binary_signature(adev->mman.discovery_bin)) { dev_warn(adev->dev, "get invalid ip discovery binary signature from vram\n"); /* retry read ip discovery binary from file */ r = amdgpu_discovery_read_binary_from_file(adev, adev->mman.discovery_bin); @@ -324,31 +322,110 @@ static int amdgpu_discovery_init(struct amdgpu_device *adev) info = &bhdr->table_list[IP_DISCOVERY]; offset = le16_to_cpu(info->offset); checksum = le16_to_cpu(info->checksum); - ihdr = (struct ip_discovery_header *)(adev->mman.discovery_bin + offset); - if (le32_to_cpu(ihdr->signature) != DISCOVERY_TABLE_SIGNATURE) { - dev_err(adev->dev, "invalid ip discovery data table signature\n"); - r = -EINVAL; - goto out; - } + if (offset) { + struct ip_discovery_header *ihdr = + (struct ip_discovery_header *)(adev->mman.discovery_bin + offset); + if (le32_to_cpu(ihdr->signature) != DISCOVERY_TABLE_SIGNATURE) { + dev_err(adev->dev, "invalid ip discovery data table signature\n"); + r = -EINVAL; + goto out; + } - if (!amdgpu_discovery_verify_checksum(adev->mman.discovery_bin + offset, - le16_to_cpu(ihdr->size), checksum)) { - dev_err(adev->dev, "invalid ip discovery data table checksum\n"); - r = -EINVAL; - goto out; + if (!amdgpu_discovery_verify_checksum(adev->mman.discovery_bin + offset, + le16_to_cpu(ihdr->size), checksum)) { + dev_err(adev->dev, "invalid ip discovery data table checksum\n"); + r = -EINVAL; + goto out; + } } info = &bhdr->table_list[GC]; offset = le16_to_cpu(info->offset); checksum = le16_to_cpu(info->checksum); - ghdr = (struct gpu_info_header *)(adev->mman.discovery_bin + offset); - if (!amdgpu_discovery_verify_checksum(adev->mman.discovery_bin + offset, - le32_to_cpu(ghdr->size), checksum)) { - dev_err(adev->dev, "invalid gc data table checksum\n"); - r = -EINVAL; - goto out; + if (offset) { + struct gpu_info_header *ghdr = + (struct gpu_info_header *)(adev->mman.discovery_bin + offset); + + if (le32_to_cpu(ghdr->table_id) != GC_TABLE_ID) { + dev_err(adev->dev, "invalid ip discovery gc table id\n"); + r = -EINVAL; + goto out; + } + + if (!amdgpu_discovery_verify_checksum(adev->mman.discovery_bin + offset, + le32_to_cpu(ghdr->size), checksum)) { + dev_err(adev->dev, "invalid gc data table checksum\n"); + r = -EINVAL; + goto out; + } + } + + info = &bhdr->table_list[HARVEST_INFO]; + offset = le16_to_cpu(info->offset); + checksum = le16_to_cpu(info->checksum); + + if (offset) { + struct harvest_info_header *hhdr = + (struct harvest_info_header *)(adev->mman.discovery_bin + offset); + + if (le32_to_cpu(hhdr->signature) != HARVEST_TABLE_SIGNATURE) { + dev_err(adev->dev, "invalid ip discovery harvest table signature\n"); + r = -EINVAL; + goto out; + } + + if (!amdgpu_discovery_verify_checksum(adev->mman.discovery_bin + offset, + sizeof(struct harvest_table), checksum)) { + dev_err(adev->dev, "invalid harvest data table checksum\n"); + r = -EINVAL; + goto out; + } + } + + info = &bhdr->table_list[VCN_INFO]; + offset = le16_to_cpu(info->offset); + checksum = le16_to_cpu(info->checksum); + + if (offset) { + struct vcn_info_header *vhdr = + (struct vcn_info_header *)(adev->mman.discovery_bin + offset); + + if (le32_to_cpu(vhdr->table_id) != VCN_INFO_TABLE_ID) { + dev_err(adev->dev, "invalid ip discovery vcn table id\n"); + r = -EINVAL; + goto out; + } + + if (!amdgpu_discovery_verify_checksum(adev->mman.discovery_bin + offset, + le32_to_cpu(vhdr->size_bytes), checksum)) { + dev_err(adev->dev, "invalid vcn data table checksum\n"); + r = -EINVAL; + goto out; + } + } + + info = &bhdr->table_list[MALL_INFO]; + offset = le16_to_cpu(info->offset); + checksum = le16_to_cpu(info->checksum); + + if (0 && offset) { + struct mall_info_header *mhdr = + (struct mall_info_header *)(adev->mman.discovery_bin + offset); + + if (le32_to_cpu(mhdr->table_id) != MALL_INFO_TABLE_ID) { + dev_err(adev->dev, "invalid ip discovery mall table id\n"); + r = -EINVAL; + goto out; + } + + if (!amdgpu_discovery_verify_checksum(adev->mman.discovery_bin + offset, + le32_to_cpu(mhdr->size_bytes), checksum)) { + dev_err(adev->dev, "invalid mall data table checksum\n"); + r = -EINVAL; + goto out; + } } return 0; @@ -360,8 +437,11 @@ out: return r; } +static void amdgpu_discovery_sysfs_fini(struct amdgpu_device *adev); + void amdgpu_discovery_fini(struct amdgpu_device *adev) { + amdgpu_discovery_sysfs_fini(adev); kfree(adev->mman.discovery_bin); adev->mman.discovery_bin = NULL; } @@ -382,7 +462,591 @@ static int amdgpu_discovery_validate_ip(const struct ip *ip) return 0; } -int amdgpu_discovery_reg_base_init(struct amdgpu_device *adev) +static void amdgpu_discovery_read_harvest_bit_per_ip(struct amdgpu_device *adev, + uint32_t *vcn_harvest_count) +{ + struct binary_header *bhdr; + struct ip_discovery_header *ihdr; + struct die_header *dhdr; + struct ip *ip; + uint16_t die_offset, ip_offset, num_dies, num_ips; + int i, j; + + bhdr = (struct binary_header *)adev->mman.discovery_bin; + ihdr = (struct ip_discovery_header *)(adev->mman.discovery_bin + + le16_to_cpu(bhdr->table_list[IP_DISCOVERY].offset)); + num_dies = le16_to_cpu(ihdr->num_dies); + + /* scan harvest bit of all IP data structures */ + for (i = 0; i < num_dies; i++) { + die_offset = le16_to_cpu(ihdr->die_info[i].die_offset); + dhdr = (struct die_header *)(adev->mman.discovery_bin + die_offset); + num_ips = le16_to_cpu(dhdr->num_ips); + ip_offset = die_offset + sizeof(*dhdr); + + for (j = 0; j < num_ips; j++) { + ip = (struct ip *)(adev->mman.discovery_bin + ip_offset); + + if (amdgpu_discovery_validate_ip(ip)) + goto next_ip; + + if (le16_to_cpu(ip->harvest) == 1) { + switch (le16_to_cpu(ip->hw_id)) { + case VCN_HWID: + (*vcn_harvest_count)++; + if (ip->number_instance == 0) + adev->vcn.harvest_config |= AMDGPU_VCN_HARVEST_VCN0; + else + adev->vcn.harvest_config |= AMDGPU_VCN_HARVEST_VCN1; + break; + case DMU_HWID: + adev->harvest_ip_mask |= AMD_HARVEST_IP_DMU_MASK; + break; + default: + break; + } + } +next_ip: + ip_offset += struct_size(ip, base_address, ip->num_base_address); + } + } +} + +static void amdgpu_discovery_read_from_harvest_table(struct amdgpu_device *adev, + uint32_t *vcn_harvest_count, + uint32_t *umc_harvest_count) +{ + struct binary_header *bhdr; + struct harvest_table *harvest_info; + u16 offset; + int i; + + bhdr = (struct binary_header *)adev->mman.discovery_bin; + offset = le16_to_cpu(bhdr->table_list[HARVEST_INFO].offset); + + if (!offset) { + dev_err(adev->dev, "invalid harvest table offset\n"); + return; + } + + harvest_info = (struct harvest_table *)(adev->mman.discovery_bin + offset); + + for (i = 0; i < 32; i++) { + if (le16_to_cpu(harvest_info->list[i].hw_id) == 0) + break; + + switch (le16_to_cpu(harvest_info->list[i].hw_id)) { + case VCN_HWID: + (*vcn_harvest_count)++; + if (harvest_info->list[i].number_instance == 0) + adev->vcn.harvest_config |= AMDGPU_VCN_HARVEST_VCN0; + else + adev->vcn.harvest_config |= AMDGPU_VCN_HARVEST_VCN1; + break; + case DMU_HWID: + adev->harvest_ip_mask |= AMD_HARVEST_IP_DMU_MASK; + break; + case UMC_HWID: + (*umc_harvest_count)++; + break; + default: + break; + } + } +} + +/* ================================================== */ + +struct ip_hw_instance { + struct kobject kobj; /* ip_discovery/die/#die/#hw_id/#instance/<attrs...> */ + + int hw_id; + u8 num_instance; + u8 major, minor, revision; + u8 harvest; + + int num_base_addresses; + u32 base_addr[]; +}; + +struct ip_hw_id { + struct kset hw_id_kset; /* ip_discovery/die/#die/#hw_id/, contains ip_hw_instance */ + int hw_id; +}; + +struct ip_die_entry { + struct kset ip_kset; /* ip_discovery/die/#die/, contains ip_hw_id */ + u16 num_ips; +}; + +/* -------------------------------------------------- */ + +struct ip_hw_instance_attr { + struct attribute attr; + ssize_t (*show)(struct ip_hw_instance *ip_hw_instance, char *buf); +}; + +static ssize_t hw_id_show(struct ip_hw_instance *ip_hw_instance, char *buf) +{ + return sysfs_emit(buf, "%d\n", ip_hw_instance->hw_id); +} + +static ssize_t num_instance_show(struct ip_hw_instance *ip_hw_instance, char *buf) +{ + return sysfs_emit(buf, "%d\n", ip_hw_instance->num_instance); +} + +static ssize_t major_show(struct ip_hw_instance *ip_hw_instance, char *buf) +{ + return sysfs_emit(buf, "%d\n", ip_hw_instance->major); +} + +static ssize_t minor_show(struct ip_hw_instance *ip_hw_instance, char *buf) +{ + return sysfs_emit(buf, "%d\n", ip_hw_instance->minor); +} + +static ssize_t revision_show(struct ip_hw_instance *ip_hw_instance, char *buf) +{ + return sysfs_emit(buf, "%d\n", ip_hw_instance->revision); +} + +static ssize_t harvest_show(struct ip_hw_instance *ip_hw_instance, char *buf) +{ + return sysfs_emit(buf, "0x%01X\n", ip_hw_instance->harvest); +} + +static ssize_t num_base_addresses_show(struct ip_hw_instance *ip_hw_instance, char *buf) +{ + return sysfs_emit(buf, "%d\n", ip_hw_instance->num_base_addresses); +} + +static ssize_t base_addr_show(struct ip_hw_instance *ip_hw_instance, char *buf) +{ + ssize_t res, at; + int ii; + + for (res = at = ii = 0; ii < ip_hw_instance->num_base_addresses; ii++) { + /* Here we satisfy the condition that, at + size <= PAGE_SIZE. + */ + if (at + 12 > PAGE_SIZE) + break; + res = sysfs_emit_at(buf, at, "0x%08X\n", + ip_hw_instance->base_addr[ii]); + if (res <= 0) + break; + at += res; + } + + return res < 0 ? res : at; +} + +static struct ip_hw_instance_attr ip_hw_attr[] = { + __ATTR_RO(hw_id), + __ATTR_RO(num_instance), + __ATTR_RO(major), + __ATTR_RO(minor), + __ATTR_RO(revision), + __ATTR_RO(harvest), + __ATTR_RO(num_base_addresses), + __ATTR_RO(base_addr), +}; + +static struct attribute *ip_hw_instance_attrs[ARRAY_SIZE(ip_hw_attr) + 1]; +ATTRIBUTE_GROUPS(ip_hw_instance); + +#define to_ip_hw_instance(x) container_of(x, struct ip_hw_instance, kobj) +#define to_ip_hw_instance_attr(x) container_of(x, struct ip_hw_instance_attr, attr) + +static ssize_t ip_hw_instance_attr_show(struct kobject *kobj, + struct attribute *attr, + char *buf) +{ + struct ip_hw_instance *ip_hw_instance = to_ip_hw_instance(kobj); + struct ip_hw_instance_attr *ip_hw_attr = to_ip_hw_instance_attr(attr); + + if (!ip_hw_attr->show) + return -EIO; + + return ip_hw_attr->show(ip_hw_instance, buf); +} + +static const struct sysfs_ops ip_hw_instance_sysfs_ops = { + .show = ip_hw_instance_attr_show, +}; + +static void ip_hw_instance_release(struct kobject *kobj) +{ + struct ip_hw_instance *ip_hw_instance = to_ip_hw_instance(kobj); + + kfree(ip_hw_instance); +} + +static struct kobj_type ip_hw_instance_ktype = { + .release = ip_hw_instance_release, + .sysfs_ops = &ip_hw_instance_sysfs_ops, + .default_groups = ip_hw_instance_groups, +}; + +/* -------------------------------------------------- */ + +#define to_ip_hw_id(x) container_of(to_kset(x), struct ip_hw_id, hw_id_kset) + +static void ip_hw_id_release(struct kobject *kobj) +{ + struct ip_hw_id *ip_hw_id = to_ip_hw_id(kobj); + + if (!list_empty(&ip_hw_id->hw_id_kset.list)) + DRM_ERROR("ip_hw_id->hw_id_kset is not empty"); + kfree(ip_hw_id); +} + +static struct kobj_type ip_hw_id_ktype = { + .release = ip_hw_id_release, + .sysfs_ops = &kobj_sysfs_ops, +}; + +/* -------------------------------------------------- */ + +static void die_kobj_release(struct kobject *kobj); +static void ip_disc_release(struct kobject *kobj); + +struct ip_die_entry_attribute { + struct attribute attr; + ssize_t (*show)(struct ip_die_entry *ip_die_entry, char *buf); +}; + +#define to_ip_die_entry_attr(x) container_of(x, struct ip_die_entry_attribute, attr) + +static ssize_t num_ips_show(struct ip_die_entry *ip_die_entry, char *buf) +{ + return sysfs_emit(buf, "%d\n", ip_die_entry->num_ips); +} + +/* If there are more ip_die_entry attrs, other than the number of IPs, + * we can make this intro an array of attrs, and then initialize + * ip_die_entry_attrs in a loop. + */ +static struct ip_die_entry_attribute num_ips_attr = + __ATTR_RO(num_ips); + +static struct attribute *ip_die_entry_attrs[] = { + &num_ips_attr.attr, + NULL, +}; +ATTRIBUTE_GROUPS(ip_die_entry); /* ip_die_entry_groups */ + +#define to_ip_die_entry(x) container_of(to_kset(x), struct ip_die_entry, ip_kset) + +static ssize_t ip_die_entry_attr_show(struct kobject *kobj, + struct attribute *attr, + char *buf) +{ + struct ip_die_entry_attribute *ip_die_entry_attr = to_ip_die_entry_attr(attr); + struct ip_die_entry *ip_die_entry = to_ip_die_entry(kobj); + + if (!ip_die_entry_attr->show) + return -EIO; + + return ip_die_entry_attr->show(ip_die_entry, buf); +} + +static void ip_die_entry_release(struct kobject *kobj) +{ + struct ip_die_entry *ip_die_entry = to_ip_die_entry(kobj); + + if (!list_empty(&ip_die_entry->ip_kset.list)) + DRM_ERROR("ip_die_entry->ip_kset is not empty"); + kfree(ip_die_entry); +} + +static const struct sysfs_ops ip_die_entry_sysfs_ops = { + .show = ip_die_entry_attr_show, +}; + +static struct kobj_type ip_die_entry_ktype = { + .release = ip_die_entry_release, + .sysfs_ops = &ip_die_entry_sysfs_ops, + .default_groups = ip_die_entry_groups, +}; + +static struct kobj_type die_kobj_ktype = { + .release = die_kobj_release, + .sysfs_ops = &kobj_sysfs_ops, +}; + +static struct kobj_type ip_discovery_ktype = { + .release = ip_disc_release, + .sysfs_ops = &kobj_sysfs_ops, +}; + +struct ip_discovery_top { + struct kobject kobj; /* ip_discovery/ */ + struct kset die_kset; /* ip_discovery/die/, contains ip_die_entry */ + struct amdgpu_device *adev; +}; + +static void die_kobj_release(struct kobject *kobj) +{ + struct ip_discovery_top *ip_top = container_of(to_kset(kobj), + struct ip_discovery_top, + die_kset); + if (!list_empty(&ip_top->die_kset.list)) + DRM_ERROR("ip_top->die_kset is not empty"); +} + +static void ip_disc_release(struct kobject *kobj) +{ + struct ip_discovery_top *ip_top = container_of(kobj, struct ip_discovery_top, + kobj); + struct amdgpu_device *adev = ip_top->adev; + + adev->ip_top = NULL; + kfree(ip_top); +} + +static int amdgpu_discovery_sysfs_ips(struct amdgpu_device *adev, + struct ip_die_entry *ip_die_entry, + const size_t _ip_offset, const int num_ips) +{ + int ii, jj, kk, res; + + DRM_DEBUG("num_ips:%d", num_ips); + + /* Find all IPs of a given HW ID, and add their instance to + * #die/#hw_id/#instance/<attributes> + */ + for (ii = 0; ii < HW_ID_MAX; ii++) { + struct ip_hw_id *ip_hw_id = NULL; + size_t ip_offset = _ip_offset; + + for (jj = 0; jj < num_ips; jj++) { + struct ip *ip; + struct ip_hw_instance *ip_hw_instance; + + ip = (struct ip *)(adev->mman.discovery_bin + ip_offset); + if (amdgpu_discovery_validate_ip(ip) || + le16_to_cpu(ip->hw_id) != ii) + goto next_ip; + + DRM_DEBUG("match:%d @ ip_offset:%zu", ii, ip_offset); + + /* We have a hw_id match; register the hw + * block if not yet registered. + */ + if (!ip_hw_id) { + ip_hw_id = kzalloc(sizeof(*ip_hw_id), GFP_KERNEL); + if (!ip_hw_id) + return -ENOMEM; + ip_hw_id->hw_id = ii; + + kobject_set_name(&ip_hw_id->hw_id_kset.kobj, "%d", ii); + ip_hw_id->hw_id_kset.kobj.kset = &ip_die_entry->ip_kset; + ip_hw_id->hw_id_kset.kobj.ktype = &ip_hw_id_ktype; + res = kset_register(&ip_hw_id->hw_id_kset); + if (res) { + DRM_ERROR("Couldn't register ip_hw_id kset"); + kfree(ip_hw_id); + return res; + } + if (hw_id_names[ii]) { + res = sysfs_create_link(&ip_die_entry->ip_kset.kobj, + &ip_hw_id->hw_id_kset.kobj, + hw_id_names[ii]); + if (res) { + DRM_ERROR("Couldn't create IP link %s in IP Die:%s\n", + hw_id_names[ii], + kobject_name(&ip_die_entry->ip_kset.kobj)); + } + } + } + + /* Now register its instance. + */ + ip_hw_instance = kzalloc(struct_size(ip_hw_instance, + base_addr, + ip->num_base_address), + GFP_KERNEL); + if (!ip_hw_instance) { + DRM_ERROR("no memory for ip_hw_instance"); + return -ENOMEM; + } + ip_hw_instance->hw_id = le16_to_cpu(ip->hw_id); /* == ii */ + ip_hw_instance->num_instance = ip->number_instance; + ip_hw_instance->major = ip->major; + ip_hw_instance->minor = ip->minor; + ip_hw_instance->revision = ip->revision; + ip_hw_instance->harvest = ip->harvest; + ip_hw_instance->num_base_addresses = ip->num_base_address; + + for (kk = 0; kk < ip_hw_instance->num_base_addresses; kk++) + ip_hw_instance->base_addr[kk] = ip->base_address[kk]; + + kobject_init(&ip_hw_instance->kobj, &ip_hw_instance_ktype); + ip_hw_instance->kobj.kset = &ip_hw_id->hw_id_kset; + res = kobject_add(&ip_hw_instance->kobj, NULL, + "%d", ip_hw_instance->num_instance); +next_ip: + ip_offset += struct_size(ip, base_address, ip->num_base_address); + } + } + + return 0; +} + +static int amdgpu_discovery_sysfs_recurse(struct amdgpu_device *adev) +{ + struct binary_header *bhdr; + struct ip_discovery_header *ihdr; + struct die_header *dhdr; + struct kset *die_kset = &adev->ip_top->die_kset; + u16 num_dies, die_offset, num_ips; + size_t ip_offset; + int ii, res; + + bhdr = (struct binary_header *)adev->mman.discovery_bin; + ihdr = (struct ip_discovery_header *)(adev->mman.discovery_bin + + le16_to_cpu(bhdr->table_list[IP_DISCOVERY].offset)); + num_dies = le16_to_cpu(ihdr->num_dies); + + DRM_DEBUG("number of dies: %d\n", num_dies); + + for (ii = 0; ii < num_dies; ii++) { + struct ip_die_entry *ip_die_entry; + + die_offset = le16_to_cpu(ihdr->die_info[ii].die_offset); + dhdr = (struct die_header *)(adev->mman.discovery_bin + die_offset); + num_ips = le16_to_cpu(dhdr->num_ips); + ip_offset = die_offset + sizeof(*dhdr); + + /* Add the die to the kset. + * + * dhdr->die_id == ii, which was checked in + * amdgpu_discovery_reg_base_init(). + */ + + ip_die_entry = kzalloc(sizeof(*ip_die_entry), GFP_KERNEL); + if (!ip_die_entry) + return -ENOMEM; + + ip_die_entry->num_ips = num_ips; + + kobject_set_name(&ip_die_entry->ip_kset.kobj, "%d", le16_to_cpu(dhdr->die_id)); + ip_die_entry->ip_kset.kobj.kset = die_kset; + ip_die_entry->ip_kset.kobj.ktype = &ip_die_entry_ktype; + res = kset_register(&ip_die_entry->ip_kset); + if (res) { + DRM_ERROR("Couldn't register ip_die_entry kset"); + kfree(ip_die_entry); + return res; + } + + amdgpu_discovery_sysfs_ips(adev, ip_die_entry, ip_offset, num_ips); + } + + return 0; +} + +static int amdgpu_discovery_sysfs_init(struct amdgpu_device *adev) +{ + struct kset *die_kset; + int res, ii; + + adev->ip_top = kzalloc(sizeof(*adev->ip_top), GFP_KERNEL); + if (!adev->ip_top) + return -ENOMEM; + + adev->ip_top->adev = adev; + + res = kobject_init_and_add(&adev->ip_top->kobj, &ip_discovery_ktype, + &adev->dev->kobj, "ip_discovery"); + if (res) { + DRM_ERROR("Couldn't init and add ip_discovery/"); + goto Err; + } + + die_kset = &adev->ip_top->die_kset; + kobject_set_name(&die_kset->kobj, "%s", "die"); + die_kset->kobj.parent = &adev->ip_top->kobj; + die_kset->kobj.ktype = &die_kobj_ktype; + res = kset_register(&adev->ip_top->die_kset); + if (res) { + DRM_ERROR("Couldn't register die_kset"); + goto Err; + } + + for (ii = 0; ii < ARRAY_SIZE(ip_hw_attr); ii++) + ip_hw_instance_attrs[ii] = &ip_hw_attr[ii].attr; + ip_hw_instance_attrs[ii] = NULL; + + res = amdgpu_discovery_sysfs_recurse(adev); + + return res; +Err: + kobject_put(&adev->ip_top->kobj); + return res; +} + +/* -------------------------------------------------- */ + +#define list_to_kobj(el) container_of(el, struct kobject, entry) + +static void amdgpu_discovery_sysfs_ip_hw_free(struct ip_hw_id *ip_hw_id) +{ + struct list_head *el, *tmp; + struct kset *hw_id_kset; + + hw_id_kset = &ip_hw_id->hw_id_kset; + spin_lock(&hw_id_kset->list_lock); + list_for_each_prev_safe(el, tmp, &hw_id_kset->list) { + list_del_init(el); + spin_unlock(&hw_id_kset->list_lock); + /* kobject is embedded in ip_hw_instance */ + kobject_put(list_to_kobj(el)); + spin_lock(&hw_id_kset->list_lock); + } + spin_unlock(&hw_id_kset->list_lock); + kobject_put(&ip_hw_id->hw_id_kset.kobj); +} + +static void amdgpu_discovery_sysfs_die_free(struct ip_die_entry *ip_die_entry) +{ + struct list_head *el, *tmp; + struct kset *ip_kset; + + ip_kset = &ip_die_entry->ip_kset; + spin_lock(&ip_kset->list_lock); + list_for_each_prev_safe(el, tmp, &ip_kset->list) { + list_del_init(el); + spin_unlock(&ip_kset->list_lock); + amdgpu_discovery_sysfs_ip_hw_free(to_ip_hw_id(list_to_kobj(el))); + spin_lock(&ip_kset->list_lock); + } + spin_unlock(&ip_kset->list_lock); + kobject_put(&ip_die_entry->ip_kset.kobj); +} + +static void amdgpu_discovery_sysfs_fini(struct amdgpu_device *adev) +{ + struct list_head *el, *tmp; + struct kset *die_kset; + + die_kset = &adev->ip_top->die_kset; + spin_lock(&die_kset->list_lock); + list_for_each_prev_safe(el, tmp, &die_kset->list) { + list_del_init(el); + spin_unlock(&die_kset->list_lock); + amdgpu_discovery_sysfs_die_free(to_ip_die_entry(list_to_kobj(el))); + spin_lock(&die_kset->list_lock); + } + spin_unlock(&die_kset->list_lock); + kobject_put(&adev->ip_top->die_kset.kobj); + kobject_put(&adev->ip_top->kobj); +} + +/* ================================================== */ + +static int amdgpu_discovery_reg_base_init(struct amdgpu_device *adev) { struct binary_header *bhdr; struct ip_discovery_header *ihdr; @@ -458,6 +1122,9 @@ int amdgpu_discovery_reg_base_init(struct amdgpu_device *adev) le16_to_cpu(ip->hw_id) == SDMA3_HWID) adev->sdma.num_instances++; + if (le16_to_cpu(ip->hw_id) == UMC_HWID) + adev->gmc.num_umc++; + for (k = 0; k < num_base_address; k++) { /* * convert the endianness of base addresses in place, @@ -488,10 +1155,12 @@ int amdgpu_discovery_reg_base_init(struct amdgpu_device *adev) } next_ip: - ip_offset += sizeof(*ip) + 4 * (ip->num_base_address - 1); + ip_offset += struct_size(ip, base_address, ip->num_base_address); } } + amdgpu_discovery_sysfs_init(adev); + return 0; } @@ -536,41 +1205,37 @@ int amdgpu_discovery_get_ip_version(struct amdgpu_device *adev, int hw_id, int n *revision = ip->revision; return 0; } - ip_offset += sizeof(*ip) + 4 * (ip->num_base_address - 1); + ip_offset += struct_size(ip, base_address, ip->num_base_address); } } return -EINVAL; } -void amdgpu_discovery_harvest_ip(struct amdgpu_device *adev) +static void amdgpu_discovery_harvest_ip(struct amdgpu_device *adev) { - struct binary_header *bhdr; - struct harvest_table *harvest_info; - int i, vcn_harvest_count = 0; + int vcn_harvest_count = 0; + int umc_harvest_count = 0; - bhdr = (struct binary_header *)adev->mman.discovery_bin; - harvest_info = (struct harvest_table *)(adev->mman.discovery_bin + - le16_to_cpu(bhdr->table_list[HARVEST_INFO].offset)); - - for (i = 0; i < 32; i++) { - if (le16_to_cpu(harvest_info->list[i].hw_id) == 0) - break; - - switch (le16_to_cpu(harvest_info->list[i].hw_id)) { - case VCN_HWID: - vcn_harvest_count++; - if (harvest_info->list[i].number_instance == 0) - adev->vcn.harvest_config |= AMDGPU_VCN_HARVEST_VCN0; - else - adev->vcn.harvest_config |= AMDGPU_VCN_HARVEST_VCN1; - break; - case DMU_HWID: - adev->harvest_ip_mask |= AMD_HARVEST_IP_DMU_MASK; - break; - default: - break; - } + /* + * Harvest table does not fit Navi1x and legacy GPUs, + * so read harvest bit per IP data structure to set + * harvest configuration. + */ + if (adev->ip_versions[GC_HWIP][0] < IP_VERSION(10, 2, 0)) { + if ((adev->pdev->device == 0x731E && + (adev->pdev->revision == 0xC6 || + adev->pdev->revision == 0xC7)) || + (adev->pdev->device == 0x7340 && + adev->pdev->revision == 0xC9) || + (adev->pdev->device == 0x7360 && + adev->pdev->revision == 0xC7)) + amdgpu_discovery_read_harvest_bit_per_ip(adev, + &vcn_harvest_count); + } else { + amdgpu_discovery_read_from_harvest_table(adev, + &vcn_harvest_count, + &umc_harvest_count); } amdgpu_discovery_harvest_config_quirk(adev); @@ -579,24 +1244,24 @@ void amdgpu_discovery_harvest_ip(struct amdgpu_device *adev) adev->harvest_ip_mask |= AMD_HARVEST_IP_VCN_MASK; adev->harvest_ip_mask |= AMD_HARVEST_IP_JPEG_MASK; } - if ((adev->pdev->device == 0x731E && - (adev->pdev->revision == 0xC6 || adev->pdev->revision == 0xC7)) || - (adev->pdev->device == 0x7340 && adev->pdev->revision == 0xC9) || - (adev->pdev->device == 0x7360 && adev->pdev->revision == 0xC7)) { - adev->harvest_ip_mask |= AMD_HARVEST_IP_VCN_MASK; - adev->harvest_ip_mask |= AMD_HARVEST_IP_JPEG_MASK; + + if (umc_harvest_count < adev->gmc.num_umc) { + adev->gmc.num_umc -= umc_harvest_count; } } union gc_info { struct gc_info_v1_0 v1; + struct gc_info_v1_1 v1_1; + struct gc_info_v1_2 v1_2; struct gc_info_v2_0 v2; }; -int amdgpu_discovery_get_gfx_info(struct amdgpu_device *adev) +static int amdgpu_discovery_get_gfx_info(struct amdgpu_device *adev) { struct binary_header *bhdr; union gc_info *gc_info; + u16 offset; if (!adev->mman.discovery_bin) { DRM_ERROR("ip discovery uninitialized\n"); @@ -604,9 +1269,14 @@ int amdgpu_discovery_get_gfx_info(struct amdgpu_device *adev) } bhdr = (struct binary_header *)adev->mman.discovery_bin; - gc_info = (union gc_info *)(adev->mman.discovery_bin + - le16_to_cpu(bhdr->table_list[GC].offset)); - switch (gc_info->v1.header.version_major) { + offset = le16_to_cpu(bhdr->table_list[GC].offset); + + if (!offset) + return 0; + + gc_info = (union gc_info *)(adev->mman.discovery_bin + offset); + + switch (le16_to_cpu(gc_info->v1.header.version_major)) { case 1: adev->gfx.config.max_shader_engines = le32_to_cpu(gc_info->v1.gc_num_se); adev->gfx.config.max_cu_per_sh = 2 * (le32_to_cpu(gc_info->v1.gc_num_wgp0_per_sa) + @@ -626,6 +1296,21 @@ int amdgpu_discovery_get_gfx_info(struct amdgpu_device *adev) adev->gfx.config.num_sc_per_sh = le32_to_cpu(gc_info->v1.gc_num_sc_per_se) / le32_to_cpu(gc_info->v1.gc_num_sa_per_se); adev->gfx.config.num_packer_per_sc = le32_to_cpu(gc_info->v1.gc_num_packer_per_sc); + if (gc_info->v1.header.version_minor >= 1) { + adev->gfx.config.gc_num_tcp_per_sa = le32_to_cpu(gc_info->v1_1.gc_num_tcp_per_sa); + adev->gfx.config.gc_num_sdp_interface = le32_to_cpu(gc_info->v1_1.gc_num_sdp_interface); + adev->gfx.config.gc_num_tcps = le32_to_cpu(gc_info->v1_1.gc_num_tcps); + } + if (gc_info->v1.header.version_minor >= 2) { + adev->gfx.config.gc_num_tcp_per_wpg = le32_to_cpu(gc_info->v1_2.gc_num_tcp_per_wpg); + adev->gfx.config.gc_tcp_l1_size = le32_to_cpu(gc_info->v1_2.gc_tcp_l1_size); + adev->gfx.config.gc_num_sqc_per_wgp = le32_to_cpu(gc_info->v1_2.gc_num_sqc_per_wgp); + adev->gfx.config.gc_l1_instruction_cache_size_per_sqc = le32_to_cpu(gc_info->v1_2.gc_l1_instruction_cache_size_per_sqc); + adev->gfx.config.gc_l1_data_cache_size_per_sqc = le32_to_cpu(gc_info->v1_2.gc_l1_data_cache_size_per_sqc); + adev->gfx.config.gc_gl1c_per_sa = le32_to_cpu(gc_info->v1_2.gc_gl1c_per_sa); + adev->gfx.config.gc_gl1c_size_per_instance = le32_to_cpu(gc_info->v1_2.gc_gl1c_size_per_instance); + adev->gfx.config.gc_gl2c_per_gpu = le32_to_cpu(gc_info->v1_2.gc_gl2c_per_gpu); + } break; case 2: adev->gfx.config.max_shader_engines = le32_to_cpu(gc_info->v2.gc_num_se); @@ -649,8 +1334,105 @@ int amdgpu_discovery_get_gfx_info(struct amdgpu_device *adev) default: dev_err(adev->dev, "Unhandled GC info table %d.%d\n", - gc_info->v1.header.version_major, - gc_info->v1.header.version_minor); + le16_to_cpu(gc_info->v1.header.version_major), + le16_to_cpu(gc_info->v1.header.version_minor)); + return -EINVAL; + } + return 0; +} + +union mall_info { + struct mall_info_v1_0 v1; +}; + +int amdgpu_discovery_get_mall_info(struct amdgpu_device *adev) +{ + struct binary_header *bhdr; + union mall_info *mall_info; + u32 u, mall_size_per_umc, m_s_present, half_use; + u64 mall_size; + u16 offset; + + if (!adev->mman.discovery_bin) { + DRM_ERROR("ip discovery uninitialized\n"); + return -EINVAL; + } + + bhdr = (struct binary_header *)adev->mman.discovery_bin; + offset = le16_to_cpu(bhdr->table_list[MALL_INFO].offset); + + if (!offset) + return 0; + + mall_info = (union mall_info *)(adev->mman.discovery_bin + offset); + + switch (le16_to_cpu(mall_info->v1.header.version_major)) { + case 1: + mall_size = 0; + mall_size_per_umc = le32_to_cpu(mall_info->v1.mall_size_per_m); + m_s_present = le32_to_cpu(mall_info->v1.m_s_present); + half_use = le32_to_cpu(mall_info->v1.m_half_use); + for (u = 0; u < adev->gmc.num_umc; u++) { + if (m_s_present & (1 << u)) + mall_size += mall_size_per_umc * 2; + else if (half_use & (1 << u)) + mall_size += mall_size_per_umc / 2; + else + mall_size += mall_size_per_umc; + } + adev->gmc.mall_size = mall_size; + break; + default: + dev_err(adev->dev, + "Unhandled MALL info table %d.%d\n", + le16_to_cpu(mall_info->v1.header.version_major), + le16_to_cpu(mall_info->v1.header.version_minor)); + return -EINVAL; + } + return 0; +} + +union vcn_info { + struct vcn_info_v1_0 v1; +}; + +static int amdgpu_discovery_get_vcn_info(struct amdgpu_device *adev) +{ + struct binary_header *bhdr; + union vcn_info *vcn_info; + u16 offset; + int v; + + if (!adev->mman.discovery_bin) { + DRM_ERROR("ip discovery uninitialized\n"); + return -EINVAL; + } + + if (adev->vcn.num_vcn_inst > VCN_INFO_TABLE_MAX_NUM_INSTANCES) { + dev_err(adev->dev, "invalid vcn instances\n"); + return -EINVAL; + } + + bhdr = (struct binary_header *)adev->mman.discovery_bin; + offset = le16_to_cpu(bhdr->table_list[VCN_INFO].offset); + + if (!offset) + return 0; + + vcn_info = (union vcn_info *)(adev->mman.discovery_bin + offset); + + switch (le16_to_cpu(vcn_info->v1.header.version_major)) { + case 1: + for (v = 0; v < adev->vcn.num_vcn_inst; v++) { + adev->vcn.vcn_codec_disable_mask[v] = + le32_to_cpu(vcn_info->v1.instance_info[v].fuse_data.all_bits); + } + break; + default: + dev_err(adev->dev, + "Unhandled VCN info table %d.%d\n", + le16_to_cpu(vcn_info->v1.header.version_major), + le16_to_cpu(vcn_info->v1.header.version_minor)); return -EINVAL; } return 0; @@ -674,12 +1456,15 @@ static int amdgpu_discovery_set_common_ip_blocks(struct amdgpu_device *adev) case IP_VERSION(10, 1, 1): case IP_VERSION(10, 1, 2): case IP_VERSION(10, 1, 3): + case IP_VERSION(10, 1, 4): case IP_VERSION(10, 3, 0): case IP_VERSION(10, 3, 1): case IP_VERSION(10, 3, 2): case IP_VERSION(10, 3, 3): case IP_VERSION(10, 3, 4): case IP_VERSION(10, 3, 5): + case IP_VERSION(10, 3, 6): + case IP_VERSION(10, 3, 7): amdgpu_device_ip_block_add(adev, &nv_common_ip_block); break; default: @@ -709,12 +1494,15 @@ static int amdgpu_discovery_set_gmc_ip_blocks(struct amdgpu_device *adev) case IP_VERSION(10, 1, 1): case IP_VERSION(10, 1, 2): case IP_VERSION(10, 1, 3): + case IP_VERSION(10, 1, 4): case IP_VERSION(10, 3, 0): case IP_VERSION(10, 3, 1): case IP_VERSION(10, 3, 2): case IP_VERSION(10, 3, 3): case IP_VERSION(10, 3, 4): case IP_VERSION(10, 3, 5): + case IP_VERSION(10, 3, 6): + case IP_VERSION(10, 3, 7): amdgpu_device_ip_block_add(adev, &gmc_v10_0_ip_block); break; default: @@ -790,6 +1578,8 @@ static int amdgpu_discovery_set_psp_ip_blocks(struct amdgpu_device *adev) case IP_VERSION(13, 0, 1): case IP_VERSION(13, 0, 2): case IP_VERSION(13, 0, 3): + case IP_VERSION(13, 0, 5): + case IP_VERSION(13, 0, 8): amdgpu_device_ip_block_add(adev, &psp_v13_0_ip_block); break; default: @@ -831,6 +1621,8 @@ static int amdgpu_discovery_set_smu_ip_blocks(struct amdgpu_device *adev) case IP_VERSION(13, 0, 1): case IP_VERSION(13, 0, 2): case IP_VERSION(13, 0, 3): + case IP_VERSION(13, 0, 5): + case IP_VERSION(13, 0, 8): amdgpu_device_ip_block_add(adev, &smu_v13_0_ip_block); break; default: @@ -846,8 +1638,14 @@ static int amdgpu_discovery_set_display_ip_blocks(struct amdgpu_device *adev) { if (adev->enable_virtual_display || amdgpu_sriov_vf(adev)) { amdgpu_device_ip_block_add(adev, &amdgpu_vkms_ip_block); + return 0; + } + + if (!amdgpu_device_has_dc_support(adev)) + return 0; + #if defined(CONFIG_DRM_AMD_DC) - } else if (adev->ip_versions[DCE_HWIP][0]) { + if (adev->ip_versions[DCE_HWIP][0]) { switch (adev->ip_versions[DCE_HWIP][0]) { case IP_VERSION(1, 0, 0): case IP_VERSION(1, 0, 1): @@ -861,6 +1659,8 @@ static int amdgpu_discovery_set_display_ip_blocks(struct amdgpu_device *adev) case IP_VERSION(3, 0, 1): case IP_VERSION(3, 1, 2): case IP_VERSION(3, 1, 3): + case IP_VERSION(3, 1, 5): + case IP_VERSION(3, 1, 6): amdgpu_device_ip_block_add(adev, &dm_ip_block); break; default: @@ -882,8 +1682,8 @@ static int amdgpu_discovery_set_display_ip_blocks(struct amdgpu_device *adev) adev->ip_versions[DCI_HWIP][0]); return -EINVAL; } -#endif } +#endif return 0; } @@ -904,12 +1704,15 @@ static int amdgpu_discovery_set_gc_ip_blocks(struct amdgpu_device *adev) case IP_VERSION(10, 1, 2): case IP_VERSION(10, 1, 1): case IP_VERSION(10, 1, 3): + case IP_VERSION(10, 1, 4): case IP_VERSION(10, 3, 0): case IP_VERSION(10, 3, 2): case IP_VERSION(10, 3, 1): case IP_VERSION(10, 3, 4): case IP_VERSION(10, 3, 5): + case IP_VERSION(10, 3, 6): case IP_VERSION(10, 3, 3): + case IP_VERSION(10, 3, 7): amdgpu_device_ip_block_add(adev, &gfx_v10_0_ip_block); break; default: @@ -944,8 +1747,10 @@ static int amdgpu_discovery_set_sdma_ip_blocks(struct amdgpu_device *adev) case IP_VERSION(5, 2, 2): case IP_VERSION(5, 2, 4): case IP_VERSION(5, 2, 5): + case IP_VERSION(5, 2, 6): case IP_VERSION(5, 2, 3): case IP_VERSION(5, 2, 1): + case IP_VERSION(5, 2, 7): amdgpu_device_ip_block_add(adev, &sdma_v5_2_ip_block); break; default: @@ -1012,6 +1817,7 @@ static int amdgpu_discovery_set_mm_ip_blocks(struct amdgpu_device *adev) case IP_VERSION(3, 0, 0): case IP_VERSION(3, 0, 16): case IP_VERSION(3, 1, 1): + case IP_VERSION(3, 1, 2): case IP_VERSION(3, 0, 2): case IP_VERSION(3, 0, 192): amdgpu_device_ip_block_add(adev, &vcn_v3_0_ip_block); @@ -1038,12 +1844,14 @@ static int amdgpu_discovery_set_mes_ip_blocks(struct amdgpu_device *adev) case IP_VERSION(10, 1, 1): case IP_VERSION(10, 1, 2): case IP_VERSION(10, 1, 3): + case IP_VERSION(10, 1, 4): case IP_VERSION(10, 3, 0): case IP_VERSION(10, 3, 1): case IP_VERSION(10, 3, 2): case IP_VERSION(10, 3, 3): case IP_VERSION(10, 3, 4): case IP_VERSION(10, 3, 5): + case IP_VERSION(10, 3, 6): amdgpu_device_ip_block_add(adev, &mes_v10_1_ip_block); break; default: @@ -1060,6 +1868,7 @@ int amdgpu_discovery_set_ip_blocks(struct amdgpu_device *adev) case CHIP_VEGA10: vega10_reg_base_init(adev); adev->sdma.num_instances = 2; + adev->gmc.num_umc = 4; adev->ip_versions[MMHUB_HWIP][0] = IP_VERSION(9, 0, 0); adev->ip_versions[ATHUB_HWIP][0] = IP_VERSION(9, 0, 0); adev->ip_versions[OSSSYS_HWIP][0] = IP_VERSION(4, 0, 0); @@ -1081,6 +1890,7 @@ int amdgpu_discovery_set_ip_blocks(struct amdgpu_device *adev) case CHIP_VEGA12: vega10_reg_base_init(adev); adev->sdma.num_instances = 2; + adev->gmc.num_umc = 4; adev->ip_versions[MMHUB_HWIP][0] = IP_VERSION(9, 3, 0); adev->ip_versions[ATHUB_HWIP][0] = IP_VERSION(9, 3, 0); adev->ip_versions[OSSSYS_HWIP][0] = IP_VERSION(4, 0, 1); @@ -1103,6 +1913,7 @@ int amdgpu_discovery_set_ip_blocks(struct amdgpu_device *adev) vega10_reg_base_init(adev); adev->sdma.num_instances = 1; adev->vcn.num_vcn_inst = 1; + adev->gmc.num_umc = 2; if (adev->apu_flags & AMD_APU_IS_RAVEN2) { adev->ip_versions[MMHUB_HWIP][0] = IP_VERSION(9, 2, 0); adev->ip_versions[ATHUB_HWIP][0] = IP_VERSION(9, 2, 0); @@ -1140,6 +1951,7 @@ int amdgpu_discovery_set_ip_blocks(struct amdgpu_device *adev) case CHIP_VEGA20: vega20_reg_base_init(adev); adev->sdma.num_instances = 2; + adev->gmc.num_umc = 8; adev->ip_versions[MMHUB_HWIP][0] = IP_VERSION(9, 4, 0); adev->ip_versions[ATHUB_HWIP][0] = IP_VERSION(9, 4, 0); adev->ip_versions[OSSSYS_HWIP][0] = IP_VERSION(4, 2, 0); @@ -1163,6 +1975,7 @@ int amdgpu_discovery_set_ip_blocks(struct amdgpu_device *adev) arct_reg_base_init(adev); adev->sdma.num_instances = 8; adev->vcn.num_vcn_inst = 2; + adev->gmc.num_umc = 8; adev->ip_versions[MMHUB_HWIP][0] = IP_VERSION(9, 4, 1); adev->ip_versions[ATHUB_HWIP][0] = IP_VERSION(9, 4, 1); adev->ip_versions[OSSSYS_HWIP][0] = IP_VERSION(4, 2, 1); @@ -1190,6 +2003,7 @@ int amdgpu_discovery_set_ip_blocks(struct amdgpu_device *adev) aldebaran_reg_base_init(adev); adev->sdma.num_instances = 5; adev->vcn.num_vcn_inst = 2; + adev->gmc.num_umc = 4; adev->ip_versions[MMHUB_HWIP][0] = IP_VERSION(9, 4, 2); adev->ip_versions[ATHUB_HWIP][0] = IP_VERSION(9, 4, 2); adev->ip_versions[OSSSYS_HWIP][0] = IP_VERSION(4, 4, 0); @@ -1217,11 +2031,9 @@ int amdgpu_discovery_set_ip_blocks(struct amdgpu_device *adev) return -EINVAL; amdgpu_discovery_harvest_ip(adev); - - if (!adev->mman.discovery_bin) { - DRM_ERROR("ip discovery uninitialized\n"); - return -EINVAL; - } + amdgpu_discovery_get_gfx_info(adev); + amdgpu_discovery_get_mall_info(adev); + amdgpu_discovery_get_vcn_info(adev); break; } @@ -1242,6 +2054,7 @@ int amdgpu_discovery_set_ip_blocks(struct amdgpu_device *adev) case IP_VERSION(10, 1, 1): case IP_VERSION(10, 1, 2): case IP_VERSION(10, 1, 3): + case IP_VERSION(10, 1, 4): case IP_VERSION(10, 3, 0): case IP_VERSION(10, 3, 2): case IP_VERSION(10, 3, 4): @@ -1254,10 +2067,32 @@ int amdgpu_discovery_set_ip_blocks(struct amdgpu_device *adev) case IP_VERSION(10, 3, 3): adev->family = AMDGPU_FAMILY_YC; break; + case IP_VERSION(10, 3, 6): + adev->family = AMDGPU_FAMILY_GC_10_3_6; + break; + case IP_VERSION(10, 3, 7): + adev->family = AMDGPU_FAMILY_GC_10_3_7; + break; default: return -EINVAL; } + switch (adev->ip_versions[GC_HWIP][0]) { + case IP_VERSION(9, 1, 0): + case IP_VERSION(9, 2, 2): + case IP_VERSION(9, 3, 0): + case IP_VERSION(10, 1, 3): + case IP_VERSION(10, 1, 4): + case IP_VERSION(10, 3, 1): + case IP_VERSION(10, 3, 3): + case IP_VERSION(10, 3, 6): + case IP_VERSION(10, 3, 7): + adev->flags |= AMD_IS_APU; + break; + default: + break; + } + if (adev->ip_versions[XGMI_HWIP][0] == IP_VERSION(4, 8, 0)) adev->gmc.xgmi.supported = true; @@ -1285,7 +2120,9 @@ int amdgpu_discovery_set_ip_blocks(struct amdgpu_device *adev) break; case IP_VERSION(7, 2, 0): case IP_VERSION(7, 2, 1): + case IP_VERSION(7, 3, 0): case IP_VERSION(7, 5, 0): + case IP_VERSION(7, 5, 1): adev->nbio.funcs = &nbio_v7_2_funcs; adev->nbio.hdp_flush_reg = &nbio_v7_2_hdp_flush_reg; break; @@ -1368,6 +2205,8 @@ int amdgpu_discovery_set_ip_blocks(struct amdgpu_device *adev) case IP_VERSION(11, 0, 11): case IP_VERSION(11, 5, 0): case IP_VERSION(13, 0, 1): + case IP_VERSION(13, 0, 9): + case IP_VERSION(13, 0, 10): adev->smuio.funcs = &smuio_v11_0_6_funcs; break; case IP_VERSION(13, 0, 2): diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.h index 14537cec19db..8563dd4a7dc2 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.h @@ -28,12 +28,8 @@ #define DISCOVERY_TMR_OFFSET (64 << 10) void amdgpu_discovery_fini(struct amdgpu_device *adev); -int amdgpu_discovery_reg_base_init(struct amdgpu_device *adev); -void amdgpu_discovery_harvest_ip(struct amdgpu_device *adev); int amdgpu_discovery_get_ip_version(struct amdgpu_device *adev, int hw_id, int number_instance, int *major, int *minor, int *revision); - -int amdgpu_discovery_get_gfx_info(struct amdgpu_device *adev); int amdgpu_discovery_set_ip_blocks(struct amdgpu_device *adev); #endif /* __AMDGPU_DISCOVERY__ */ diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_display.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_display.c index c4387b38229c..17c9bbe0cbc5 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_display.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_display.c @@ -41,6 +41,11 @@ #include <drm/drm_fourcc.h> #include <drm/drm_vblank.h> +static int amdgpu_display_framebuffer_init(struct drm_device *dev, + struct amdgpu_framebuffer *rfb, + const struct drm_mode_fb_cmd2 *mode_cmd, + struct drm_gem_object *obj); + static void amdgpu_display_flip_callback(struct dma_fence *f, struct dma_fence_cb *cb) { @@ -113,8 +118,9 @@ static void amdgpu_display_flip_work_func(struct work_struct *__work) spin_unlock_irqrestore(&crtc->dev->event_lock, flags); - DRM_DEBUG_DRIVER("crtc:%d[%p], pflip_stat:AMDGPU_FLIP_SUBMITTED, work: %p,\n", - amdgpu_crtc->crtc_id, amdgpu_crtc, work); + drm_dbg_vbl(adev_to_drm(adev), + "crtc:%d[%p], pflip_stat:AMDGPU_FLIP_SUBMITTED, work: %p,\n", + amdgpu_crtc->crtc_id, amdgpu_crtc, work); } @@ -200,8 +206,9 @@ int amdgpu_display_crtc_page_flip_target(struct drm_crtc *crtc, goto unpin; } - r = dma_resv_get_fences(new_abo->tbo.base.resv, NULL, - &work->shared_count, &work->shared); + r = dma_resv_get_fences(new_abo->tbo.base.resv, DMA_RESV_USAGE_WRITE, + &work->shared_count, + &work->shared); if (unlikely(r != 0)) { DRM_ERROR("failed to get fences for buffer\n"); goto unpin; @@ -504,28 +511,9 @@ uint32_t amdgpu_display_supported_domains(struct amdgpu_device *adev, */ if ((bo_flags & AMDGPU_GEM_CREATE_CPU_GTT_USWC) && amdgpu_bo_support_uswc(bo_flags) && - amdgpu_device_asic_has_dc_support(adev->asic_type)) { - switch (adev->asic_type) { - case CHIP_CARRIZO: - case CHIP_STONEY: - domain |= AMDGPU_GEM_DOMAIN_GTT; - break; - case CHIP_RAVEN: - /* enable S/G on PCO and RV2 */ - if ((adev->apu_flags & AMD_APU_IS_RAVEN2) || - (adev->apu_flags & AMD_APU_IS_PICASSO)) - domain |= AMDGPU_GEM_DOMAIN_GTT; - break; - case CHIP_RENOIR: - case CHIP_VANGOGH: - case CHIP_YELLOW_CARP: - domain |= AMDGPU_GEM_DOMAIN_GTT; - break; - - default: - break; - } - } + amdgpu_device_asic_has_dc_support(adev->asic_type) && + adev->mode_info.gpu_vm_support) + domain |= AMDGPU_GEM_DOMAIN_GTT; #endif return domain; @@ -708,9 +696,9 @@ static int convert_tiling_flags_to_modifier(struct amdgpu_framebuffer *afb) return -EINVAL; } - if (adev->asic_type >= CHIP_SIENNA_CICHLID) + if (adev->ip_versions[GC_HWIP][0] >= IP_VERSION(10, 3, 0)) version = AMD_FMT_MOD_TILE_VER_GFX10_RBPLUS; - else if (adev->family == AMDGPU_FAMILY_NV) + else if (adev->ip_versions[GC_HWIP][0] >= IP_VERSION(10, 0, 0)) version = AMD_FMT_MOD_TILE_VER_GFX10; else version = AMD_FMT_MOD_TILE_VER_GFX9; @@ -804,7 +792,7 @@ static int convert_tiling_flags_to_modifier(struct amdgpu_framebuffer *afb) if (adev->family >= AMDGPU_FAMILY_NV) { int extra_pipe = 0; - if (adev->asic_type >= CHIP_SIENNA_CICHLID && + if ((adev->ip_versions[GC_HWIP][0] >= IP_VERSION(10, 3, 0)) && pipes == packers && pipes > 1) extra_pipe = 1; @@ -954,7 +942,7 @@ static int amdgpu_display_verify_sizes(struct amdgpu_framebuffer *rfb) int ret; unsigned int i, block_width, block_height, block_size_log2; - if (!rfb->base.dev->mode_config.allow_fb_modifiers) + if (rfb->base.dev->mode_config.fb_modifiers_not_supported) return 0; for (i = 0; i < format_info->num_planes; ++i) { @@ -1056,35 +1044,11 @@ static int amdgpu_display_get_fb_info(const struct amdgpu_framebuffer *amdgpu_fb return r; } -int amdgpu_display_gem_fb_init(struct drm_device *dev, - struct amdgpu_framebuffer *rfb, - const struct drm_mode_fb_cmd2 *mode_cmd, - struct drm_gem_object *obj) -{ - int ret; - - rfb->base.obj[0] = obj; - drm_helper_mode_fill_fb_struct(dev, &rfb->base, mode_cmd); - - ret = amdgpu_display_framebuffer_init(dev, rfb, mode_cmd, obj); - if (ret) - goto err; - - ret = drm_framebuffer_init(dev, &rfb->base, &amdgpu_fb_funcs); - if (ret) - goto err; - - return 0; -err: - drm_dbg_kms(dev, "Failed to init gem fb: %d\n", ret); - rfb->base.obj[0] = NULL; - return ret; -} - -int amdgpu_display_gem_fb_verify_and_init( - struct drm_device *dev, struct amdgpu_framebuffer *rfb, - struct drm_file *file_priv, const struct drm_mode_fb_cmd2 *mode_cmd, - struct drm_gem_object *obj) +static int amdgpu_display_gem_fb_verify_and_init(struct drm_device *dev, + struct amdgpu_framebuffer *rfb, + struct drm_file *file_priv, + const struct drm_mode_fb_cmd2 *mode_cmd, + struct drm_gem_object *obj) { int ret; @@ -1116,10 +1080,10 @@ err: return ret; } -int amdgpu_display_framebuffer_init(struct drm_device *dev, - struct amdgpu_framebuffer *rfb, - const struct drm_mode_fb_cmd2 *mode_cmd, - struct drm_gem_object *obj) +static int amdgpu_display_framebuffer_init(struct drm_device *dev, + struct amdgpu_framebuffer *rfb, + const struct drm_mode_fb_cmd2 *mode_cmd, + struct drm_gem_object *obj) { struct amdgpu_device *adev = drm_to_adev(dev); int ret, i; @@ -1141,7 +1105,7 @@ int amdgpu_display_framebuffer_init(struct drm_device *dev, if (ret) return ret; - if (!dev->mode_config.allow_fb_modifiers && !adev->enable_virtual_display) { + if (dev->mode_config.fb_modifiers_not_supported && !adev->enable_virtual_display) { drm_WARN_ONCE(dev, adev->family >= AMDGPU_FAMILY_AI, "GFX9+ requires FB check based on format modifier\n"); ret = check_tiling_flags_gfx6(rfb); @@ -1149,7 +1113,7 @@ int amdgpu_display_framebuffer_init(struct drm_device *dev, return ret; } - if (dev->mode_config.allow_fb_modifiers && + if (!dev->mode_config.fb_modifiers_not_supported && !(rfb->base.flags & DRM_MODE_FB_MODIFIERS)) { ret = convert_tiling_flags_to_modifier(rfb); if (ret) { diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_dma_buf.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_dma_buf.c index 579adfafe4d0..782cbca37538 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_dma_buf.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_dma_buf.c @@ -102,21 +102,9 @@ static int amdgpu_dma_buf_pin(struct dma_buf_attachment *attach) { struct drm_gem_object *obj = attach->dmabuf->priv; struct amdgpu_bo *bo = gem_to_amdgpu_bo(obj); - int r; /* pin buffer into GTT */ - r = amdgpu_bo_pin(bo, AMDGPU_GEM_DOMAIN_GTT); - if (r) - return r; - - if (bo->tbo.moving) { - r = dma_fence_wait(bo->tbo.moving, true); - if (r) { - amdgpu_bo_unpin(bo); - return r; - } - } - return 0; + return amdgpu_bo_pin(bo, AMDGPU_GEM_DOMAIN_GTT); } /** diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c index 0ead08ba58c2..ebd37fb19cdb 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c @@ -99,9 +99,11 @@ * - 3.42.0 - Add 16bpc fixed point display support * - 3.43.0 - Add device hot plug/unplug support * - 3.44.0 - DCN3 supports DCC independent block settings: !64B && 128B, 64B && 128B + * - 3.45.0 - Add context ioctl stable pstate interface + * * 3.46.0 - To enable hot plug amdgpu tests in libdrm */ #define KMS_DRIVER_MAJOR 3 -#define KMS_DRIVER_MINOR 44 +#define KMS_DRIVER_MINOR 46 #define KMS_DRIVER_PATCHLEVEL 0 int amdgpu_vram_limit; @@ -109,8 +111,6 @@ int amdgpu_vis_vram_limit; int amdgpu_gart_size = -1; /* auto */ int amdgpu_gtt_size = -1; /* auto */ int amdgpu_moverate = -1; /* auto */ -int amdgpu_benchmarking; -int amdgpu_testing; int amdgpu_audio = -1; int amdgpu_disp_priority; int amdgpu_hw_i2c; @@ -136,7 +136,7 @@ int amdgpu_sched_jobs = 32; int amdgpu_sched_hw_submission = 2; uint amdgpu_pcie_gen_cap; uint amdgpu_pcie_lane_cap; -uint amdgpu_cg_mask = 0xffffffff; +u64 amdgpu_cg_mask = 0xffffffffffffffff; uint amdgpu_pg_mask = 0xffffffff; uint amdgpu_sdma_phase_quantum = 32; char *amdgpu_disable_cu = NULL; @@ -174,10 +174,11 @@ int amdgpu_mes; int amdgpu_noretry = -1; int amdgpu_force_asic_type = -1; int amdgpu_tmz = -1; /* auto */ -uint amdgpu_freesync_vid_mode; int amdgpu_reset_method = -1; /* auto */ int amdgpu_num_kcq = -1; int amdgpu_smartshift_bias; +int amdgpu_use_xgmi_p2p = 1; +int amdgpu_vcnfw_log; static void amdgpu_drv_delayed_reset_work_handler(struct work_struct *work); @@ -232,20 +233,6 @@ MODULE_PARM_DESC(moverate, "Maximum buffer migration rate in MB/s. (32, 64, etc. module_param_named(moverate, amdgpu_moverate, int, 0600); /** - * DOC: benchmark (int) - * Run benchmarks. The default is 0 (Skip benchmarks). - */ -MODULE_PARM_DESC(benchmark, "Run benchmark"); -module_param_named(benchmark, amdgpu_benchmarking, int, 0444); - -/** - * DOC: test (int) - * Test BO GTT->VRAM and VRAM->GTT GPU copies. The default is 0 (Skip test, only set 1 to run test). - */ -MODULE_PARM_DESC(test, "Run tests"); -module_param_named(test, amdgpu_testing, int, 0444); - -/** * DOC: audio (int) * Set HDMI/DPAudio. Only affects non-DC display handling. The default is -1 (Enabled), set 0 to disabled it. */ @@ -467,12 +454,12 @@ MODULE_PARM_DESC(pcie_lane_cap, "PCIE Lane Caps (0: autodetect (default))"); module_param_named(pcie_lane_cap, amdgpu_pcie_lane_cap, uint, 0444); /** - * DOC: cg_mask (uint) + * DOC: cg_mask (ullong) * Override Clockgating features enabled on GPU (0 = disable clock gating). See the AMD_CG_SUPPORT flags in - * drivers/gpu/drm/amd/include/amd_shared.h. The default is 0xffffffff (all enabled). + * drivers/gpu/drm/amd/include/amd_shared.h. The default is 0xffffffffffffffff (all enabled). */ MODULE_PARM_DESC(cg_mask, "Clockgating flags mask (0 = disable clock gating)"); -module_param_named(cg_mask, amdgpu_cg_mask, uint, 0444); +module_param_named(cg_mask, amdgpu_cg_mask, ullong, 0444); /** * DOC: pg_mask (uint) @@ -667,6 +654,13 @@ MODULE_PARM_DESC(force_asic_type, "A non negative value used to specify the asic type for all supported GPUs"); module_param_named(force_asic_type, amdgpu_force_asic_type, int, 0444); +/** + * DOC: use_xgmi_p2p (int) + * Enables/disables XGMI P2P interface (0 = disable, 1 = enable). + */ +MODULE_PARM_DESC(use_xgmi_p2p, + "Enable XGMI P2P interface (0 = disable; 1 = enable (default))"); +module_param_named(use_xgmi_p2p, amdgpu_use_xgmi_p2p, int, 0444); #ifdef CONFIG_HSA_AMD @@ -686,7 +680,7 @@ MODULE_PARM_DESC(sched_policy, * Maximum number of processes that HWS can schedule concurrently. The maximum is the * number of VMIDs assigned to the HWS, which is also the default. */ -int hws_max_conc_proc = 8; +int hws_max_conc_proc = -1; module_param(hws_max_conc_proc, int, 0444); MODULE_PARM_DESC(hws_max_conc_proc, "Max # processes HWS can execute concurrently when sched_policy=0 (0 = no concurrency, #VMIDs for KFD = Maximum(default))"); @@ -740,7 +734,7 @@ MODULE_PARM_DESC(debug_largebar, * systems with a broken CRAT table. * * Default is auto (according to asic type, iommu_v2, and crat table, to decide - * whehter use CRAT) + * whether use CRAT) */ int ignore_crat; module_param(ignore_crat, int, 0444); @@ -844,36 +838,10 @@ MODULE_PARM_DESC(tmz, "Enable TMZ feature (-1 = auto (default), 0 = off, 1 = on) module_param_named(tmz, amdgpu_tmz, int, 0444); /** - * DOC: freesync_video (uint) - * Enable the optimization to adjust front porch timing to achieve seamless - * mode change experience when setting a freesync supported mode for which full - * modeset is not needed. - * - * The Display Core will add a set of modes derived from the base FreeSync - * video mode into the corresponding connector's mode list based on commonly - * used refresh rates and VRR range of the connected display, when users enable - * this feature. From the userspace perspective, they can see a seamless mode - * change experience when the change between different refresh rates under the - * same resolution. Additionally, userspace applications such as Video playback - * can read this modeset list and change the refresh rate based on the video - * frame rate. Finally, the userspace can also derive an appropriate mode for a - * particular refresh rate based on the FreeSync Mode and add it to the - * connector's mode list. - * - * Note: This is an experimental feature. - * - * The default value: 0 (off). - */ -MODULE_PARM_DESC( - freesync_video, - "Enable freesync modesetting optimization feature (0 = off (default), 1 = on)"); -module_param_named(freesync_video, amdgpu_freesync_vid_mode, uint, 0444); - -/** * DOC: reset_method (int) - * GPU reset method (-1 = auto (default), 0 = legacy, 1 = mode0, 2 = mode1, 3 = mode2, 4 = baco, 5 = pci) + * GPU reset method (-1 = auto (default), 0 = legacy, 1 = mode0, 2 = mode1, 3 = mode2, 4 = baco) */ -MODULE_PARM_DESC(reset_method, "GPU reset method (-1 = auto (default), 0 = legacy, 1 = mode0, 2 = mode1, 3 = mode2, 4 = baco/bamaco, 5 = pci)"); +MODULE_PARM_DESC(reset_method, "GPU reset method (-1 = auto (default), 0 = legacy, 1 = mode0, 2 = mode1, 3 = mode2, 4 = baco/bamaco)"); module_param_named(reset_method, amdgpu_reset_method, int, 0444); /** @@ -889,6 +857,13 @@ MODULE_PARM_DESC(num_kcq, "number of kernel compute queue user want to setup (8 module_param_named(num_kcq, amdgpu_num_kcq, int, 0444); /** + * DOC: vcnfw_log (int) + * Enable vcnfw log output for debugging, the default is disabled. + */ +MODULE_PARM_DESC(vcnfw_log, "Enable vcnfw log(0 = disable (default value), 1 = enable)"); +module_param_named(vcnfw_log, amdgpu_vcnfw_log, int, 0444); + +/** * DOC: smu_pptable_id (int) * Used to override pptable id. id = 0 use VBIOS pptable. * id > 0 use the soft pptable with specicfied id. @@ -1942,13 +1917,14 @@ static const struct pci_device_id pciidlist[] = { {0x1002, 0x73FF, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_DIMGREY_CAVEFISH}, /* Aldebaran */ - {0x1002, 0x7408, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_ALDEBARAN|AMD_EXP_HW_SUPPORT}, - {0x1002, 0x740C, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_ALDEBARAN|AMD_EXP_HW_SUPPORT}, - {0x1002, 0x740F, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_ALDEBARAN|AMD_EXP_HW_SUPPORT}, - {0x1002, 0x7410, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_ALDEBARAN|AMD_EXP_HW_SUPPORT}, + {0x1002, 0x7408, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_ALDEBARAN}, + {0x1002, 0x740C, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_ALDEBARAN}, + {0x1002, 0x740F, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_ALDEBARAN}, + {0x1002, 0x7410, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_ALDEBARAN}, /* CYAN_SKILLFISH */ {0x1002, 0x13FE, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_CYAN_SKILLFISH|AMD_IS_APU}, + {0x1002, 0x143F, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_CYAN_SKILLFISH|AMD_IS_APU}, /* BEIGE_GOBY */ {0x1002, 0x7420, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_BEIGE_GOBY}, @@ -1994,6 +1970,28 @@ static bool amdgpu_is_fw_framebuffer(resource_size_t base, return found; } +static void amdgpu_get_secondary_funcs(struct amdgpu_device *adev) +{ + struct pci_dev *p = NULL; + int i; + + /* 0 - GPU + * 1 - audio + * 2 - USB + * 3 - UCSI + */ + for (i = 1; i < 4; i++) { + p = pci_get_domain_bus_and_slot(pci_domain_nr(adev->pdev->bus), + adev->pdev->bus->number, i); + if (p) { + pm_runtime_get_sync(&p->dev); + pm_runtime_mark_last_busy(&p->dev); + pm_runtime_put_autosuspend(&p->dev); + pci_dev_put(p); + } + } +} + static int amdgpu_pci_probe(struct pci_dev *pdev, const struct pci_device_id *ent) { @@ -2129,6 +2127,48 @@ retry_init: if (ret) DRM_ERROR("Creating debugfs files failed (%d).\n", ret); + if (adev->runpm) { + /* only need to skip on ATPX */ + if (amdgpu_device_supports_px(ddev)) + dev_pm_set_driver_flags(ddev->dev, DPM_FLAG_NO_DIRECT_COMPLETE); + /* we want direct complete for BOCO */ + if (amdgpu_device_supports_boco(ddev)) + dev_pm_set_driver_flags(ddev->dev, DPM_FLAG_SMART_PREPARE | + DPM_FLAG_SMART_SUSPEND | + DPM_FLAG_MAY_SKIP_RESUME); + pm_runtime_use_autosuspend(ddev->dev); + pm_runtime_set_autosuspend_delay(ddev->dev, 5000); + + pm_runtime_allow(ddev->dev); + + pm_runtime_mark_last_busy(ddev->dev); + pm_runtime_put_autosuspend(ddev->dev); + + /* + * For runpm implemented via BACO, PMFW will handle the + * timing for BACO in and out: + * - put ASIC into BACO state only when both video and + * audio functions are in D3 state. + * - pull ASIC out of BACO state when either video or + * audio function is in D0 state. + * Also, at startup, PMFW assumes both functions are in + * D0 state. + * + * So if snd driver was loaded prior to amdgpu driver + * and audio function was put into D3 state, there will + * be no PMFW-aware D-state transition(D0->D3) on runpm + * suspend. Thus the BACO will be not correctly kicked in. + * + * Via amdgpu_get_secondary_funcs(), the audio dev is put + * into D0 state. Then there will be a PMFW-aware D-state + * transition(D0->D3) on runpm suspend. + */ + if (amdgpu_device_supports_baco(ddev) && + !(adev->flags & AMD_IS_APU) && + (adev->asic_type >= CHIP_NAVI10)) + amdgpu_get_secondary_funcs(adev); + } + return 0; err_pci: @@ -2140,8 +2180,15 @@ static void amdgpu_pci_remove(struct pci_dev *pdev) { struct drm_device *dev = pci_get_drvdata(pdev); + struct amdgpu_device *adev = drm_to_adev(dev); drm_dev_unplug(dev); + + if (adev->runpm) { + pm_runtime_get_sync(dev->dev); + pm_runtime_forbid(dev->dev); + } + amdgpu_driver_unload_kms(dev); /* @@ -2276,18 +2323,23 @@ static int amdgpu_pmops_suspend(struct device *dev) { struct drm_device *drm_dev = dev_get_drvdata(dev); struct amdgpu_device *adev = drm_to_adev(drm_dev); - int r; if (amdgpu_acpi_is_s0ix_active(adev)) adev->in_s0ix = true; else adev->in_s3 = true; - r = amdgpu_device_suspend(drm_dev, true); - if (r) - return r; + return amdgpu_device_suspend(drm_dev, true); +} + +static int amdgpu_pmops_suspend_noirq(struct device *dev) +{ + struct drm_device *drm_dev = dev_get_drvdata(dev); + struct amdgpu_device *adev = drm_to_adev(drm_dev); + if (!adev->in_s0ix) - r = amdgpu_asic_reset(adev); - return r; + return amdgpu_asic_reset(adev); + + return 0; } static int amdgpu_pmops_resume(struct device *dev) @@ -2343,6 +2395,71 @@ static int amdgpu_pmops_restore(struct device *dev) return amdgpu_device_resume(drm_dev, true); } +static int amdgpu_runtime_idle_check_display(struct device *dev) +{ + struct pci_dev *pdev = to_pci_dev(dev); + struct drm_device *drm_dev = pci_get_drvdata(pdev); + struct amdgpu_device *adev = drm_to_adev(drm_dev); + + if (adev->mode_info.num_crtc) { + struct drm_connector *list_connector; + struct drm_connector_list_iter iter; + int ret = 0; + + /* XXX: Return busy if any displays are connected to avoid + * possible display wakeups after runtime resume due to + * hotplug events in case any displays were connected while + * the GPU was in suspend. Remove this once that is fixed. + */ + mutex_lock(&drm_dev->mode_config.mutex); + drm_connector_list_iter_begin(drm_dev, &iter); + drm_for_each_connector_iter(list_connector, &iter) { + if (list_connector->status == connector_status_connected) { + ret = -EBUSY; + break; + } + } + drm_connector_list_iter_end(&iter); + mutex_unlock(&drm_dev->mode_config.mutex); + + if (ret) + return ret; + + if (amdgpu_device_has_dc_support(adev)) { + struct drm_crtc *crtc; + + drm_for_each_crtc(crtc, drm_dev) { + drm_modeset_lock(&crtc->mutex, NULL); + if (crtc->state->active) + ret = -EBUSY; + drm_modeset_unlock(&crtc->mutex); + if (ret < 0) + break; + } + } else { + mutex_lock(&drm_dev->mode_config.mutex); + drm_modeset_lock(&drm_dev->mode_config.connection_mutex, NULL); + + drm_connector_list_iter_begin(drm_dev, &iter); + drm_for_each_connector_iter(list_connector, &iter) { + if (list_connector->dpms == DRM_MODE_DPMS_ON) { + ret = -EBUSY; + break; + } + } + + drm_connector_list_iter_end(&iter); + + drm_modeset_unlock(&drm_dev->mode_config.connection_mutex); + mutex_unlock(&drm_dev->mode_config.mutex); + } + if (ret) + return ret; + } + + return 0; +} + static int amdgpu_pmops_runtime_suspend(struct device *dev) { struct pci_dev *pdev = to_pci_dev(dev); @@ -2355,6 +2472,10 @@ static int amdgpu_pmops_runtime_suspend(struct device *dev) return -EBUSY; } + ret = amdgpu_runtime_idle_check_display(dev); + if (ret) + return ret; + /* wait for all rings to drain before suspending */ for (i = 0; i < AMDGPU_MAX_RINGS; i++) { struct amdgpu_ring *ring = adev->rings[i]; @@ -2464,41 +2585,7 @@ static int amdgpu_pmops_runtime_idle(struct device *dev) return -EBUSY; } - if (amdgpu_device_has_dc_support(adev)) { - struct drm_crtc *crtc; - - drm_for_each_crtc(crtc, drm_dev) { - drm_modeset_lock(&crtc->mutex, NULL); - if (crtc->state->active) - ret = -EBUSY; - drm_modeset_unlock(&crtc->mutex); - if (ret < 0) - break; - } - - } else { - struct drm_connector *list_connector; - struct drm_connector_list_iter iter; - - mutex_lock(&drm_dev->mode_config.mutex); - drm_modeset_lock(&drm_dev->mode_config.connection_mutex, NULL); - - drm_connector_list_iter_begin(drm_dev, &iter); - drm_for_each_connector_iter(list_connector, &iter) { - if (list_connector->dpms == DRM_MODE_DPMS_ON) { - ret = -EBUSY; - break; - } - } - - drm_connector_list_iter_end(&iter); - - drm_modeset_unlock(&drm_dev->mode_config.connection_mutex); - mutex_unlock(&drm_dev->mode_config.mutex); - } - - if (ret == -EBUSY) - DRM_DEBUG_DRIVER("failing to power off - crtc active\n"); + ret = amdgpu_runtime_idle_check_display(dev); pm_runtime_mark_last_busy(dev); pm_runtime_autosuspend(dev); @@ -2528,6 +2615,7 @@ static const struct dev_pm_ops amdgpu_pm_ops = { .prepare = amdgpu_pmops_prepare, .complete = amdgpu_pmops_complete, .suspend = amdgpu_pmops_suspend, + .suspend_noirq = amdgpu_pmops_suspend_noirq, .resume = amdgpu_pmops_resume, .freeze = amdgpu_pmops_freeze, .thaw = amdgpu_pmops_thaw, diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c index 45977a72b5dd..5d13ed376ab4 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c @@ -446,24 +446,18 @@ int amdgpu_fence_driver_start_ring(struct amdgpu_ring *ring, * for the requested ring. * * @ring: ring to init the fence driver on - * @num_hw_submission: number of entries on the hardware queue - * @sched_score: optional score atomic shared with other schedulers * * Init the fence driver for the requested ring (all asics). * Helper function for amdgpu_fence_driver_init(). */ -int amdgpu_fence_driver_init_ring(struct amdgpu_ring *ring, - unsigned num_hw_submission, - atomic_t *sched_score) +int amdgpu_fence_driver_init_ring(struct amdgpu_ring *ring) { struct amdgpu_device *adev = ring->adev; - long timeout; - int r; if (!adev) return -EINVAL; - if (!is_power_of_2(num_hw_submission)) + if (!is_power_of_2(ring->num_hw_submission)) return -EINVAL; ring->fence_drv.cpu_addr = NULL; @@ -474,41 +468,14 @@ int amdgpu_fence_driver_init_ring(struct amdgpu_ring *ring, timer_setup(&ring->fence_drv.fallback_timer, amdgpu_fence_fallback, 0); - ring->fence_drv.num_fences_mask = num_hw_submission * 2 - 1; + ring->fence_drv.num_fences_mask = ring->num_hw_submission * 2 - 1; spin_lock_init(&ring->fence_drv.lock); - ring->fence_drv.fences = kcalloc(num_hw_submission * 2, sizeof(void *), + ring->fence_drv.fences = kcalloc(ring->num_hw_submission * 2, sizeof(void *), GFP_KERNEL); + if (!ring->fence_drv.fences) return -ENOMEM; - /* No need to setup the GPU scheduler for rings that don't need it */ - if (ring->no_scheduler) - return 0; - - switch (ring->funcs->type) { - case AMDGPU_RING_TYPE_GFX: - timeout = adev->gfx_timeout; - break; - case AMDGPU_RING_TYPE_COMPUTE: - timeout = adev->compute_timeout; - break; - case AMDGPU_RING_TYPE_SDMA: - timeout = adev->sdma_timeout; - break; - default: - timeout = adev->video_timeout; - break; - } - - r = drm_sched_init(&ring->sched, &amdgpu_sched_ops, - num_hw_submission, amdgpu_job_hang_limit, - timeout, NULL, sched_score, ring->name); - if (r) { - DRM_ERROR("Failed to create scheduler on ring %s.\n", - ring->name); - return r; - } - return 0; } diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_fru_eeprom.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_fru_eeprom.c index 2a786e788627..ecada5eadfe3 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_fru_eeprom.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_fru_eeprom.c @@ -30,7 +30,6 @@ #include "amdgpu_eeprom.h" #define FRU_EEPROM_MADDR 0x60000 -#define I2C_PRODUCT_INFO_OFFSET 0xC0 static bool is_fru_eeprom_supported(struct amdgpu_device *adev) { @@ -40,7 +39,13 @@ static bool is_fru_eeprom_supported(struct amdgpu_device *adev) */ struct atom_context *atom_ctx = adev->mode_info.atom_context; - /* VBIOS is of the format ###-DXXXYY-##. For SKU identification, + /* The i2c access is blocked on VF + * TODO: Need other way to get the info + */ + if (amdgpu_sriov_vf(adev)) + return false; + + /* VBIOS is of the format ###-DXXXYYYY-##. For SKU identification, * we can use just the "DXXX" portion. If there were more models, we * could convert the 3 characters to a hex integer and use a switch * for ease/speed/readability. For now, 2 string comparisons are @@ -59,17 +64,24 @@ static bool is_fru_eeprom_supported(struct amdgpu_device *adev) case CHIP_ALDEBARAN: /* All Aldebaran SKUs have the FRU */ return true; + case CHIP_SIENNA_CICHLID: + if (strnstr(atom_ctx->vbios_version, "D603", + sizeof(atom_ctx->vbios_version))) + return true; + else + return false; default: return false; } } static int amdgpu_fru_read_eeprom(struct amdgpu_device *adev, uint32_t addrptr, - unsigned char *buff) + unsigned char *buf, size_t buf_size) { - int ret, size; + int ret; + u8 size; - ret = amdgpu_eeprom_read(&adev->pm.smu_i2c, addrptr, buff, 1); + ret = amdgpu_eeprom_read(adev->pm.fru_eeprom_i2c_bus, addrptr, buf, 1); if (ret < 1) { DRM_WARN("FRU: Failed to get size field"); return ret; @@ -78,9 +90,11 @@ static int amdgpu_fru_read_eeprom(struct amdgpu_device *adev, uint32_t addrptr, /* The size returned by the i2c requires subtraction of 0xC0 since the * size apparently always reports as 0xC0+actual size. */ - size = buff[0] - I2C_PRODUCT_INFO_OFFSET; + size = buf[0] & 0x3F; + size = min_t(size_t, size, buf_size); - ret = amdgpu_eeprom_read(&adev->pm.smu_i2c, addrptr + 1, buff, size); + ret = amdgpu_eeprom_read(adev->pm.fru_eeprom_i2c_bus, addrptr + 1, + buf, size); if (ret < 1) { DRM_WARN("FRU: Failed to get data field"); return ret; @@ -91,19 +105,15 @@ static int amdgpu_fru_read_eeprom(struct amdgpu_device *adev, uint32_t addrptr, int amdgpu_fru_get_product_info(struct amdgpu_device *adev) { - unsigned char buff[AMDGPU_PRODUCT_NAME_LEN+2]; + unsigned char buf[AMDGPU_PRODUCT_NAME_LEN]; u32 addrptr; int size, len; - int offset = 2; if (!is_fru_eeprom_supported(adev)) return 0; - if (adev->asic_type == CHIP_ALDEBARAN) - offset = 0; - /* If algo exists, it means that the i2c_adapter's initialized */ - if (!adev->pm.smu_i2c.algo) { + if (!adev->pm.fru_eeprom_i2c_bus || !adev->pm.fru_eeprom_i2c_bus->algo) { DRM_WARN("Cannot access FRU, EEPROM accessor not initialized"); return -ENODEV; } @@ -121,7 +131,7 @@ int amdgpu_fru_get_product_info(struct amdgpu_device *adev) * and the language field, so just start from 0xb, manufacturer size */ addrptr = FRU_EEPROM_MADDR + 0xb; - size = amdgpu_fru_read_eeprom(adev, addrptr, buff); + size = amdgpu_fru_read_eeprom(adev, addrptr, buf, sizeof(buf)); if (size < 1) { DRM_ERROR("Failed to read FRU Manufacturer, ret:%d", size); return -EINVAL; @@ -131,7 +141,7 @@ int amdgpu_fru_get_product_info(struct amdgpu_device *adev) * size field being 1 byte. This pattern continues below. */ addrptr += size + 1; - size = amdgpu_fru_read_eeprom(adev, addrptr, buff); + size = amdgpu_fru_read_eeprom(adev, addrptr, buf, sizeof(buf)); if (size < 1) { DRM_ERROR("Failed to read FRU product name, ret:%d", size); return -EINVAL; @@ -143,12 +153,11 @@ int amdgpu_fru_get_product_info(struct amdgpu_device *adev) AMDGPU_PRODUCT_NAME_LEN); len = AMDGPU_PRODUCT_NAME_LEN - 1; } - /* Start at 2 due to buff using fields 0 and 1 for the address */ - memcpy(adev->product_name, &buff[offset], len); + memcpy(adev->product_name, buf, len); adev->product_name[len] = '\0'; addrptr += size + 1; - size = amdgpu_fru_read_eeprom(adev, addrptr, buff); + size = amdgpu_fru_read_eeprom(adev, addrptr, buf, sizeof(buf)); if (size < 1) { DRM_ERROR("Failed to read FRU product number, ret:%d", size); return -EINVAL; @@ -162,11 +171,11 @@ int amdgpu_fru_get_product_info(struct amdgpu_device *adev) DRM_WARN("FRU Product Number is larger than 16 characters. This is likely a mistake"); len = sizeof(adev->product_number) - 1; } - memcpy(adev->product_number, &buff[offset], len); + memcpy(adev->product_number, buf, len); adev->product_number[len] = '\0'; addrptr += size + 1; - size = amdgpu_fru_read_eeprom(adev, addrptr, buff); + size = amdgpu_fru_read_eeprom(adev, addrptr, buf, sizeof(buf)); if (size < 1) { DRM_ERROR("Failed to read FRU product version, ret:%d", size); @@ -174,7 +183,7 @@ int amdgpu_fru_get_product_info(struct amdgpu_device *adev) } addrptr += size + 1; - size = amdgpu_fru_read_eeprom(adev, addrptr, buff); + size = amdgpu_fru_read_eeprom(adev, addrptr, buf, sizeof(buf)); if (size < 1) { DRM_ERROR("Failed to read FRU serial number, ret:%d", size); @@ -189,7 +198,7 @@ int amdgpu_fru_get_product_info(struct amdgpu_device *adev) DRM_WARN("FRU Serial Number is larger than 16 characters. This is likely a mistake"); len = sizeof(adev->serial) - 1; } - memcpy(adev->serial, &buff[offset], len); + memcpy(adev->serial, buf, len); adev->serial[len] = '\0'; return 0; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gart.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_gart.c index 645950a653a0..01cb89ffbd56 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gart.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gart.c @@ -150,7 +150,7 @@ void amdgpu_gart_table_vram_free(struct amdgpu_device *adev) * replaces them with the dummy page (all asics). * Returns 0 for success, -EINVAL for failure. */ -int amdgpu_gart_unbind(struct amdgpu_device *adev, uint64_t offset, +void amdgpu_gart_unbind(struct amdgpu_device *adev, uint64_t offset, int pages) { unsigned t; @@ -161,13 +161,11 @@ int amdgpu_gart_unbind(struct amdgpu_device *adev, uint64_t offset, uint64_t flags = 0; int idx; - if (!adev->gart.ready) { - WARN(1, "trying to unbind memory from uninitialized GART !\n"); - return -EINVAL; - } + if (!adev->gart.ptr) + return; if (!drm_dev_enter(adev_to_drm(adev), &idx)) - return 0; + return; t = offset / AMDGPU_GPU_PAGE_SIZE; p = t / AMDGPU_GPU_PAGES_IN_CPU_PAGE; @@ -188,7 +186,6 @@ int amdgpu_gart_unbind(struct amdgpu_device *adev, uint64_t offset, amdgpu_gmc_flush_gpu_tlb(adev, 0, i, 0); drm_dev_exit(idx); - return 0; } /** @@ -204,7 +201,7 @@ int amdgpu_gart_unbind(struct amdgpu_device *adev, uint64_t offset, * Map the dma_addresses into GART entries (all asics). * Returns 0 for success, -EINVAL for failure. */ -int amdgpu_gart_map(struct amdgpu_device *adev, uint64_t offset, +void amdgpu_gart_map(struct amdgpu_device *adev, uint64_t offset, int pages, dma_addr_t *dma_addr, uint64_t flags, void *dst) { @@ -212,13 +209,8 @@ int amdgpu_gart_map(struct amdgpu_device *adev, uint64_t offset, unsigned i, j, t; int idx; - if (!adev->gart.ready) { - WARN(1, "trying to bind memory to uninitialized GART !\n"); - return -EINVAL; - } - if (!drm_dev_enter(adev_to_drm(adev), &idx)) - return 0; + return; t = offset / AMDGPU_GPU_PAGE_SIZE; @@ -230,7 +222,6 @@ int amdgpu_gart_map(struct amdgpu_device *adev, uint64_t offset, } } drm_dev_exit(idx); - return 0; } /** @@ -246,20 +237,14 @@ int amdgpu_gart_map(struct amdgpu_device *adev, uint64_t offset, * (all asics). * Returns 0 for success, -EINVAL for failure. */ -int amdgpu_gart_bind(struct amdgpu_device *adev, uint64_t offset, +void amdgpu_gart_bind(struct amdgpu_device *adev, uint64_t offset, int pages, dma_addr_t *dma_addr, uint64_t flags) { - if (!adev->gart.ready) { - WARN(1, "trying to bind memory to uninitialized GART !\n"); - return -EINVAL; - } - if (!adev->gart.ptr) - return 0; + return; - return amdgpu_gart_map(adev, offset, pages, dma_addr, flags, - adev->gart.ptr); + amdgpu_gart_map(adev, offset, pages, dma_addr, flags, adev->gart.ptr); } /** @@ -274,6 +259,9 @@ void amdgpu_gart_invalidate_tlb(struct amdgpu_device *adev) { int i; + if (!adev->gart.ptr) + return; + mb(); amdgpu_device_flush_hdp(adev, NULL); for (i = 0; i < adev->num_vmhubs; i++) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gart.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_gart.h index 78895413cf9f..8fea3e04e411 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gart.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gart.h @@ -46,7 +46,6 @@ struct amdgpu_gart { unsigned num_gpu_pages; unsigned num_cpu_pages; unsigned table_size; - bool ready; /* Asic default pte flags */ uint64_t gart_pte_flags; @@ -58,12 +57,12 @@ int amdgpu_gart_table_vram_pin(struct amdgpu_device *adev); void amdgpu_gart_table_vram_unpin(struct amdgpu_device *adev); int amdgpu_gart_init(struct amdgpu_device *adev); void amdgpu_gart_dummy_page_fini(struct amdgpu_device *adev); -int amdgpu_gart_unbind(struct amdgpu_device *adev, uint64_t offset, - int pages); -int amdgpu_gart_map(struct amdgpu_device *adev, uint64_t offset, - int pages, dma_addr_t *dma_addr, uint64_t flags, - void *dst); -int amdgpu_gart_bind(struct amdgpu_device *adev, uint64_t offset, - int pages, dma_addr_t *dma_addr, uint64_t flags); +void amdgpu_gart_unbind(struct amdgpu_device *adev, uint64_t offset, + int pages); +void amdgpu_gart_map(struct amdgpu_device *adev, uint64_t offset, + int pages, dma_addr_t *dma_addr, uint64_t flags, + void *dst); +void amdgpu_gart_bind(struct amdgpu_device *adev, uint64_t offset, + int pages, dma_addr_t *dma_addr, uint64_t flags); void amdgpu_gart_invalidate_tlb(struct amdgpu_device *adev); #endif diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c index c0d8f40a5b45..652571267077 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c @@ -222,16 +222,10 @@ static void amdgpu_gem_object_close(struct drm_gem_object *obj, if (!bo_va || --bo_va->ref_count) goto out_unlock; - amdgpu_vm_bo_rmv(adev, bo_va); + amdgpu_vm_bo_del(adev, bo_va); if (!amdgpu_vm_ready(vm)) goto out_unlock; - fence = dma_resv_excl_fence(bo->tbo.base.resv); - if (fence) { - amdgpu_bo_fence(bo, fence, true); - fence = NULL; - } - r = amdgpu_vm_clear_freed(adev, vm, &fence); if (r || !fence) goto out_unlock; @@ -532,7 +526,8 @@ int amdgpu_gem_wait_idle_ioctl(struct drm_device *dev, void *data, return -ENOENT; } robj = gem_to_amdgpu_bo(gobj); - ret = dma_resv_wait_timeout(robj->tbo.base.resv, true, true, timeout); + ret = dma_resv_wait_timeout(robj->tbo.base.resv, DMA_RESV_USAGE_READ, + true, timeout); /* ret == 0 means not signaled, * ret > 0 means signaled @@ -618,7 +613,7 @@ static void amdgpu_gem_va_update_vm(struct amdgpu_device *adev, if (operation == AMDGPU_VA_OP_MAP || operation == AMDGPU_VA_OP_REPLACE) { - r = amdgpu_vm_bo_update(adev, bo_va, false, NULL); + r = amdgpu_vm_bo_update(adev, bo_va, false); if (r) goto error; } diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c index 1916ec84dd71..28a736c507bb 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c @@ -266,7 +266,7 @@ static int amdgpu_gfx_kiq_acquire(struct amdgpu_device *adev, * adev->gfx.mec.num_pipe_per_mec * adev->gfx.mec.num_queue_per_pipe; - while (queue_bit-- >= 0) { + while (--queue_bit >= 0) { if (test_bit(queue_bit, adev->gfx.mec.queue_bitmap)) continue; @@ -615,74 +615,35 @@ int amdgpu_get_gfx_off_status(struct amdgpu_device *adev, uint32_t *value) mutex_lock(&adev->gfx.gfx_off_mutex); - r = smu_get_status_gfxoff(adev, value); + r = amdgpu_dpm_get_status_gfxoff(adev, value); mutex_unlock(&adev->gfx.gfx_off_mutex); return r; } -int amdgpu_gfx_ras_late_init(struct amdgpu_device *adev) +int amdgpu_gfx_ras_late_init(struct amdgpu_device *adev, struct ras_common_if *ras_block) { int r; - struct ras_fs_if fs_info = { - .sysfs_name = "gfx_err_count", - }; - struct ras_ih_if ih_info = { - .cb = amdgpu_gfx_process_ras_data_cb, - }; - - if (!adev->gfx.ras_if) { - adev->gfx.ras_if = kmalloc(sizeof(struct ras_common_if), GFP_KERNEL); - if (!adev->gfx.ras_if) - return -ENOMEM; - adev->gfx.ras_if->block = AMDGPU_RAS_BLOCK__GFX; - adev->gfx.ras_if->type = AMDGPU_RAS_ERROR__MULTI_UNCORRECTABLE; - adev->gfx.ras_if->sub_block_index = 0; - } - fs_info.head = ih_info.head = *adev->gfx.ras_if; - r = amdgpu_ras_late_init(adev, adev->gfx.ras_if, - &fs_info, &ih_info); + r = amdgpu_ras_block_late_init(adev, ras_block); if (r) - goto free; + return r; - if (amdgpu_ras_is_supported(adev, adev->gfx.ras_if->block)) { + if (amdgpu_ras_is_supported(adev, ras_block->block)) { if (!amdgpu_persistent_edc_harvesting_supported(adev)) amdgpu_ras_reset_error_status(adev, AMDGPU_RAS_BLOCK__GFX); r = amdgpu_irq_get(adev, &adev->gfx.cp_ecc_error_irq, 0); if (r) goto late_fini; - } else { - /* free gfx ras_if if ras is not supported */ - r = 0; - goto free; } return 0; late_fini: - amdgpu_ras_late_fini(adev, adev->gfx.ras_if, &ih_info); -free: - kfree(adev->gfx.ras_if); - adev->gfx.ras_if = NULL; + amdgpu_ras_block_late_fini(adev, ras_block); return r; } -void amdgpu_gfx_ras_fini(struct amdgpu_device *adev) -{ - if (amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__GFX) && - adev->gfx.ras_if) { - struct ras_common_if *ras_if = adev->gfx.ras_if; - struct ras_ih_if ih_info = { - .head = *ras_if, - .cb = amdgpu_gfx_process_ras_data_cb, - }; - - amdgpu_ras_late_fini(adev, ras_if, &ih_info); - kfree(ras_if); - } -} - int amdgpu_gfx_process_ras_data_cb(struct amdgpu_device *adev, void *err_data, struct amdgpu_iv_entry *entry) @@ -695,9 +656,9 @@ int amdgpu_gfx_process_ras_data_cb(struct amdgpu_device *adev, */ if (!amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__GFX)) { kgd2kfd_set_sram_ecc_flag(adev->kfd.dev); - if (adev->gfx.ras_funcs && - adev->gfx.ras_funcs->query_ras_error_count) - adev->gfx.ras_funcs->query_ras_error_count(adev, err_data); + if (adev->gfx.ras && adev->gfx.ras->ras_block.hw_ops && + adev->gfx.ras->ras_block.hw_ops->query_ras_error_count) + adev->gfx.ras->ras_block.hw_ops->query_ras_error_count(adev, err_data); amdgpu_ras_reset_gpu(adev); } return AMDGPU_RAS_SUCCESS; @@ -852,19 +813,3 @@ int amdgpu_gfx_get_num_kcq(struct amdgpu_device *adev) } return amdgpu_num_kcq; } - -/* amdgpu_gfx_state_change_set - Handle gfx power state change set - * @adev: amdgpu_device pointer - * @state: gfx power state(1 -sGpuChangeState_D0Entry and 2 -sGpuChangeState_D3Entry) - * - */ - -void amdgpu_gfx_state_change_set(struct amdgpu_device *adev, enum gfx_change_state state) -{ - mutex_lock(&adev->pm.mutex); - if (adev->powerplay.pp_funcs && - adev->powerplay.pp_funcs->gfx_state_change_set) - ((adev)->powerplay.pp_funcs->gfx_state_change_set( - (adev)->powerplay.pp_handle, state)); - mutex_unlock(&adev->pm.mutex); -} diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h index f851196c83a5..ad8e7d486a7d 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h @@ -31,6 +31,7 @@ #include "amdgpu_ring.h" #include "amdgpu_rlc.h" #include "soc15.h" +#include "amdgpu_ras.h" /* GFX current status */ #define AMDGPU_GFX_NORMAL_MODE 0x00000000L @@ -47,12 +48,6 @@ enum amdgpu_gfx_pipe_priority { AMDGPU_GFX_PIPE_PRIO_HIGH = AMDGPU_RING_PRIO_2 }; -/* Argument for PPSMC_MSG_GpuChangeState */ -enum gfx_change_state { - sGpuChangeState_D0Entry = 1, - sGpuChangeState_D3Entry, -}; - #define AMDGPU_GFX_QUEUE_PRIORITY_MINIMUM 0 #define AMDGPU_GFX_QUEUE_PRIORITY_MAXIMUM 15 @@ -188,6 +183,17 @@ struct amdgpu_gfx_config { uint32_t num_packer_per_sc; uint32_t pa_sc_tile_steering_override; uint64_t tcc_disabled_mask; + uint32_t gc_num_tcp_per_sa; + uint32_t gc_num_sdp_interface; + uint32_t gc_num_tcps; + uint32_t gc_num_tcp_per_wpg; + uint32_t gc_tcp_l1_size; + uint32_t gc_num_sqc_per_wgp; + uint32_t gc_l1_instruction_cache_size_per_sqc; + uint32_t gc_l1_data_cache_size_per_sqc; + uint32_t gc_gl1c_per_sa; + uint32_t gc_gl1c_size_per_instance; + uint32_t gc_gl2c_per_gpu; }; struct amdgpu_cu_info { @@ -204,17 +210,10 @@ struct amdgpu_cu_info { uint32_t bitmap[4][4]; }; -struct amdgpu_gfx_ras_funcs { - int (*ras_late_init)(struct amdgpu_device *adev); - void (*ras_fini)(struct amdgpu_device *adev); - int (*ras_error_inject)(struct amdgpu_device *adev, - void *inject_if); - int (*query_ras_error_count)(struct amdgpu_device *adev, - void *ras_error_status); - void (*reset_ras_error_count)(struct amdgpu_device *adev); - void (*query_ras_error_status)(struct amdgpu_device *adev); - void (*reset_ras_error_status)(struct amdgpu_device *adev); +struct amdgpu_gfx_ras { + struct amdgpu_ras_block_object ras_block; void (*enable_watchdog_timer)(struct amdgpu_device *adev); + bool (*query_utcl2_poison_status)(struct amdgpu_device *adev); }; struct amdgpu_gfx_funcs { @@ -337,7 +336,7 @@ struct amdgpu_gfx { /*ras */ struct ras_common_if *ras_if; - const struct amdgpu_gfx_ras_funcs *ras_funcs; + struct amdgpu_gfx_ras *ras; }; #define amdgpu_gfx_get_gpu_clock_counter(adev) (adev)->gfx.funcs->get_gpu_clock_counter((adev)) @@ -399,8 +398,7 @@ bool amdgpu_gfx_is_me_queue_enabled(struct amdgpu_device *adev, int me, int pipe, int queue); void amdgpu_gfx_off_ctrl(struct amdgpu_device *adev, bool enable); int amdgpu_get_gfx_off_status(struct amdgpu_device *adev, uint32_t *value); -int amdgpu_gfx_ras_late_init(struct amdgpu_device *adev); -void amdgpu_gfx_ras_fini(struct amdgpu_device *adev); +int amdgpu_gfx_ras_late_init(struct amdgpu_device *adev, struct ras_common_if *ras_block); int amdgpu_gfx_process_ras_data_cb(struct amdgpu_device *adev, void *err_data, struct amdgpu_iv_entry *entry); @@ -410,5 +408,4 @@ int amdgpu_gfx_cp_ecc_error_irq(struct amdgpu_device *adev, uint32_t amdgpu_kiq_rreg(struct amdgpu_device *adev, uint32_t reg); void amdgpu_kiq_wreg(struct amdgpu_device *adev, uint32_t reg, uint32_t v); int amdgpu_gfx_get_num_kcq(struct amdgpu_device *adev); -void amdgpu_gfx_state_change_set(struct amdgpu_device *adev, enum gfx_change_state state); #endif diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c index 2430d6223c2d..88b852b3a2cb 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c @@ -25,6 +25,9 @@ */ #include <linux/io-64-nonatomic-lo-hi.h> +#ifdef CONFIG_X86 +#include <asm/hypervisor.h> +#endif #include "amdgpu.h" #include "amdgpu_gmc.h" @@ -436,82 +439,25 @@ void amdgpu_gmc_filter_faults_remove(struct amdgpu_device *adev, uint64_t addr, } while (fault->timestamp < tmp); } -int amdgpu_gmc_ras_late_init(struct amdgpu_device *adev) +int amdgpu_gmc_ras_early_init(struct amdgpu_device *adev) { - int r; - - if (adev->umc.ras_funcs && - adev->umc.ras_funcs->ras_late_init) { - r = adev->umc.ras_funcs->ras_late_init(adev); - if (r) - return r; - } - - if (adev->mmhub.ras_funcs && - adev->mmhub.ras_funcs->ras_late_init) { - r = adev->mmhub.ras_funcs->ras_late_init(adev); - if (r) - return r; - } - - if (!adev->gmc.xgmi.connected_to_cpu) - adev->gmc.xgmi.ras_funcs = &xgmi_ras_funcs; - - if (adev->gmc.xgmi.ras_funcs && - adev->gmc.xgmi.ras_funcs->ras_late_init) { - r = adev->gmc.xgmi.ras_funcs->ras_late_init(adev); - if (r) - return r; + if (!adev->gmc.xgmi.connected_to_cpu) { + adev->gmc.xgmi.ras = &xgmi_ras; + amdgpu_ras_register_ras_block(adev, &adev->gmc.xgmi.ras->ras_block); + adev->gmc.xgmi.ras_if = &adev->gmc.xgmi.ras->ras_block.ras_comm; } - if (adev->hdp.ras_funcs && - adev->hdp.ras_funcs->ras_late_init) { - r = adev->hdp.ras_funcs->ras_late_init(adev); - if (r) - return r; - } - - if (adev->mca.mp0.ras_funcs && - adev->mca.mp0.ras_funcs->ras_late_init) { - r = adev->mca.mp0.ras_funcs->ras_late_init(adev); - if (r) - return r; - } - - if (adev->mca.mp1.ras_funcs && - adev->mca.mp1.ras_funcs->ras_late_init) { - r = adev->mca.mp1.ras_funcs->ras_late_init(adev); - if (r) - return r; - } - - if (adev->mca.mpio.ras_funcs && - adev->mca.mpio.ras_funcs->ras_late_init) { - r = adev->mca.mpio.ras_funcs->ras_late_init(adev); - if (r) - return r; - } + return 0; +} +int amdgpu_gmc_ras_late_init(struct amdgpu_device *adev) +{ return 0; } void amdgpu_gmc_ras_fini(struct amdgpu_device *adev) { - if (adev->umc.ras_funcs && - adev->umc.ras_funcs->ras_fini) - adev->umc.ras_funcs->ras_fini(adev); - if (adev->mmhub.ras_funcs && - adev->mmhub.ras_funcs->ras_fini) - adev->mmhub.ras_funcs->ras_fini(adev); - - if (adev->gmc.xgmi.ras_funcs && - adev->gmc.xgmi.ras_funcs->ras_fini) - adev->gmc.xgmi.ras_funcs->ras_fini(adev); - - if (adev->hdp.ras_funcs && - adev->hdp.ras_funcs->ras_fini) - adev->hdp.ras_funcs->ras_fini(adev); } /* @@ -584,6 +530,7 @@ void amdgpu_gmc_tmz_set(struct amdgpu_device *adev) case CHIP_NAVI12: case CHIP_VANGOGH: case CHIP_YELLOW_CARP: + case CHIP_IP_DISCOVERY: /* Don't enable it by default yet. */ if (amdgpu_tmz < 1) { @@ -615,11 +562,17 @@ void amdgpu_gmc_noretry_set(struct amdgpu_device *adev) { struct amdgpu_gmc *gmc = &adev->gmc; - switch (adev->asic_type) { - case CHIP_VEGA10: - case CHIP_VEGA20: - case CHIP_ARCTURUS: - case CHIP_ALDEBARAN: + switch (adev->ip_versions[GC_HWIP][0]) { + case IP_VERSION(9, 0, 1): + case IP_VERSION(9, 3, 0): + case IP_VERSION(9, 4, 0): + case IP_VERSION(9, 4, 1): + case IP_VERSION(9, 4, 2): + case IP_VERSION(10, 3, 3): + case IP_VERSION(10, 3, 4): + case IP_VERSION(10, 3, 5): + case IP_VERSION(10, 3, 6): + case IP_VERSION(10, 3, 7): /* * noretry = 0 will cause kfd page fault tests fail * for some ASICs, so set default to 1 for these ASICs. @@ -629,7 +582,6 @@ void amdgpu_gmc_noretry_set(struct amdgpu_device *adev) else gmc->noretry = amdgpu_noretry; break; - case CHIP_RAVEN: default: /* Raven currently has issues with noretry * regardless of what we decide for other @@ -680,6 +632,13 @@ void amdgpu_gmc_get_vbios_allocations(struct amdgpu_device *adev) unsigned size; /* + * Some ASICs need to reserve a region of video memory to avoid access + * from driver + */ + adev->mman.stolen_reserved_offset = 0; + adev->mman.stolen_reserved_size = 0; + + /* * TODO: * Currently there is a bug where some memory client outside * of the driver writes to first 8M of VRAM on S3 resume, @@ -689,10 +648,27 @@ void amdgpu_gmc_get_vbios_allocations(struct amdgpu_device *adev) */ switch (adev->asic_type) { case CHIP_VEGA10: + adev->mman.keep_stolen_vga_memory = true; + /* + * VEGA10 SRIOV VF with MS_HYPERV host needs some firmware reserved area. + */ +#ifdef CONFIG_X86 + if (amdgpu_sriov_vf(adev) && hypervisor_is_type(X86_HYPER_MS_HYPERV)) { + adev->mman.stolen_reserved_offset = 0x500000; + adev->mman.stolen_reserved_size = 0x200000; + } +#endif + break; case CHIP_RAVEN: case CHIP_RENOIR: adev->mman.keep_stolen_vga_memory = true; break; + case CHIP_YELLOW_CARP: + if (amdgpu_discovery == 0) { + adev->mman.stolen_reserved_offset = 0x1ffb0000; + adev->mman.stolen_reserved_size = 64 * PAGE_SIZE; + } + break; default: adev->mman.keep_stolen_vga_memory = false; break; @@ -813,21 +789,48 @@ uint64_t amdgpu_gmc_vram_cpu_pa(struct amdgpu_device *adev, struct amdgpu_bo *bo return amdgpu_bo_gpu_offset(bo) - adev->gmc.vram_start + adev->gmc.aper_base; } -void amdgpu_gmc_get_reserved_allocation(struct amdgpu_device *adev) +int amdgpu_gmc_vram_checking(struct amdgpu_device *adev) { - /* Some ASICs need to reserve a region of video memory to avoid access - * from driver */ - adev->mman.stolen_reserved_offset = 0; - adev->mman.stolen_reserved_size = 0; + struct amdgpu_bo *vram_bo = NULL; + uint64_t vram_gpu = 0; + void *vram_ptr = NULL; + + int ret, size = 0x100000; + uint8_t cptr[10]; + + ret = amdgpu_bo_create_kernel(adev, size, PAGE_SIZE, + AMDGPU_GEM_DOMAIN_VRAM, + &vram_bo, + &vram_gpu, + &vram_ptr); + if (ret) + return ret; + + memset(vram_ptr, 0x86, size); + memset(cptr, 0x86, 10); + + /** + * Check the start, the mid, and the end of the memory if the content of + * each byte is the pattern "0x86". If yes, we suppose the vram bo is + * workable. + * + * Note: If check the each byte of whole 1M bo, it will cost too many + * seconds, so here, we just pick up three parts for emulation. + */ + ret = memcmp(vram_ptr, cptr, 10); + if (ret) + return ret; - switch (adev->asic_type) { - case CHIP_YELLOW_CARP: - if (amdgpu_discovery == 0) { - adev->mman.stolen_reserved_offset = 0x1ffb0000; - adev->mman.stolen_reserved_size = 64 * PAGE_SIZE; - } - break; - default: - break; - } + ret = memcmp(vram_ptr + (size / 2), cptr, 10); + if (ret) + return ret; + + ret = memcmp(vram_ptr + size - 10, cptr, 10); + if (ret) + return ret; + + amdgpu_bo_free_kernel(&vram_bo, &vram_gpu, + &vram_ptr); + + return 0; } diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.h index 8458cebc6d5b..e7dc069c4512 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.h @@ -29,6 +29,7 @@ #include <linux/types.h> #include "amdgpu_irq.h" +#include "amdgpu_ras.h" /* VA hole for 48bit addresses on Vega10 */ #define AMDGPU_GMC_HOLE_START 0x0000800000000000ULL @@ -135,12 +136,8 @@ struct amdgpu_gmc_funcs { unsigned int (*get_vbios_fb_size)(struct amdgpu_device *adev); }; -struct amdgpu_xgmi_ras_funcs { - int (*ras_late_init)(struct amdgpu_device *adev); - void (*ras_fini)(struct amdgpu_device *adev); - int (*query_ras_error_count)(struct amdgpu_device *adev, - void *ras_error_status); - void (*reset_ras_error_count)(struct amdgpu_device *adev); +struct amdgpu_xgmi_ras { + struct amdgpu_ras_block_object ras_block; }; struct amdgpu_xgmi { @@ -159,7 +156,7 @@ struct amdgpu_xgmi { struct ras_common_if *ras_if; bool connected_to_cpu; bool pending_reset; - const struct amdgpu_xgmi_ras_funcs *ras_funcs; + struct amdgpu_xgmi_ras *ras; }; struct amdgpu_gmc { @@ -260,6 +257,11 @@ struct amdgpu_gmc { struct amdgpu_bo *pdb0_bo; /* CPU kmapped address of pdb0*/ void *ptr_pdb0; + + /* MALL size */ + u64 mall_size; + /* number of UMC instances */ + int num_umc; }; #define amdgpu_gmc_flush_gpu_tlb(adev, vmid, vmhub, type) ((adev)->gmc.gmc_funcs->flush_gpu_tlb((adev), (vmid), (vmhub), (type))) @@ -321,6 +323,7 @@ bool amdgpu_gmc_filter_faults(struct amdgpu_device *adev, uint16_t pasid, uint64_t timestamp); void amdgpu_gmc_filter_faults_remove(struct amdgpu_device *adev, uint64_t addr, uint16_t pasid); +int amdgpu_gmc_ras_early_init(struct amdgpu_device *adev); int amdgpu_gmc_ras_late_init(struct amdgpu_device *adev); void amdgpu_gmc_ras_fini(struct amdgpu_device *adev); int amdgpu_gmc_allocate_vm_inv_eng(struct amdgpu_device *adev); @@ -333,10 +336,10 @@ amdgpu_gmc_set_vm_fault_masks(struct amdgpu_device *adev, int hub_type, bool enable); void amdgpu_gmc_get_vbios_allocations(struct amdgpu_device *adev); -void amdgpu_gmc_get_reserved_allocation(struct amdgpu_device *adev); void amdgpu_gmc_init_pdb0(struct amdgpu_device *adev); uint64_t amdgpu_gmc_vram_mc2pa(struct amdgpu_device *adev, uint64_t mc_addr); uint64_t amdgpu_gmc_vram_pa(struct amdgpu_device *adev, struct amdgpu_bo *bo); uint64_t amdgpu_gmc_vram_cpu_pa(struct amdgpu_device *adev, struct amdgpu_bo *bo); +int amdgpu_gmc_vram_checking(struct amdgpu_device *adev); #endif diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gtt_mgr.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_gtt_mgr.c index 72022df264f6..8c6b2284cf56 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gtt_mgr.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gtt_mgr.c @@ -26,23 +26,12 @@ #include "amdgpu.h" -struct amdgpu_gtt_node { - struct ttm_buffer_object *tbo; - struct ttm_range_mgr_node base; -}; - static inline struct amdgpu_gtt_mgr * to_gtt_mgr(struct ttm_resource_manager *man) { return container_of(man, struct amdgpu_gtt_mgr, manager); } -static inline struct amdgpu_gtt_node * -to_amdgpu_gtt_node(struct ttm_resource *res) -{ - return container_of(res, struct amdgpu_gtt_node, base.base); -} - /** * DOC: mem_info_gtt_total * @@ -60,7 +49,7 @@ static ssize_t amdgpu_mem_info_gtt_total_show(struct device *dev, struct ttm_resource_manager *man; man = ttm_manager_type(&adev->mman.bdev, TTM_PL_TT); - return sysfs_emit(buf, "%llu\n", man->size * PAGE_SIZE); + return sysfs_emit(buf, "%llu\n", man->size); } /** @@ -77,8 +66,9 @@ static ssize_t amdgpu_mem_info_gtt_used_show(struct device *dev, { struct drm_device *ddev = dev_get_drvdata(dev); struct amdgpu_device *adev = drm_to_adev(ddev); + struct ttm_resource_manager *man = &adev->mman.gtt_mgr.manager; - return sysfs_emit(buf, "%llu\n", amdgpu_gtt_mgr_usage(&adev->mman.gtt_mgr)); + return sysfs_emit(buf, "%llu\n", ttm_resource_manager_usage(man)); } static DEVICE_ATTR(mem_info_gtt_total, S_IRUGO, @@ -105,9 +95,9 @@ const struct attribute_group amdgpu_gtt_mgr_attr_group = { */ bool amdgpu_gtt_mgr_has_gart_addr(struct ttm_resource *res) { - struct amdgpu_gtt_node *node = to_amdgpu_gtt_node(res); + struct ttm_range_mgr_node *node = to_ttm_range_mgr_node(res); - return drm_mm_node_allocated(&node->base.mm_nodes[0]); + return drm_mm_node_allocated(&node->mm_nodes[0]); } /** @@ -127,28 +117,23 @@ static int amdgpu_gtt_mgr_new(struct ttm_resource_manager *man, { struct amdgpu_gtt_mgr *mgr = to_gtt_mgr(man); uint32_t num_pages = PFN_UP(tbo->base.size); - struct amdgpu_gtt_node *node; + struct ttm_range_mgr_node *node; int r; - if (!(place->flags & TTM_PL_FLAG_TEMPORARY) && - atomic64_add_return(num_pages, &mgr->used) > man->size) { - atomic64_sub(num_pages, &mgr->used); - return -ENOSPC; - } + node = kzalloc(struct_size(node, mm_nodes, 1), GFP_KERNEL); + if (!node) + return -ENOMEM; - node = kzalloc(struct_size(node, base.mm_nodes, 1), GFP_KERNEL); - if (!node) { - r = -ENOMEM; - goto err_out; + ttm_resource_init(tbo, place, &node->base); + if (!(place->flags & TTM_PL_FLAG_TEMPORARY) && + ttm_resource_manager_usage(man) > man->size) { + r = -ENOSPC; + goto err_free; } - node->tbo = tbo; - ttm_resource_init(tbo, place, &node->base.base); - if (place->lpfn) { spin_lock(&mgr->lock); - r = drm_mm_insert_node_in_range(&mgr->mm, - &node->base.mm_nodes[0], + r = drm_mm_insert_node_in_range(&mgr->mm, &node->mm_nodes[0], num_pages, tbo->page_alignment, 0, place->fpfn, place->lpfn, DRM_MM_INSERT_BEST); @@ -156,23 +141,19 @@ static int amdgpu_gtt_mgr_new(struct ttm_resource_manager *man, if (unlikely(r)) goto err_free; - node->base.base.start = node->base.mm_nodes[0].start; + node->base.start = node->mm_nodes[0].start; } else { - node->base.mm_nodes[0].start = 0; - node->base.mm_nodes[0].size = node->base.base.num_pages; - node->base.base.start = AMDGPU_BO_INVALID_OFFSET; + node->mm_nodes[0].start = 0; + node->mm_nodes[0].size = node->base.num_pages; + node->base.start = AMDGPU_BO_INVALID_OFFSET; } - *res = &node->base.base; + *res = &node->base; return 0; err_free: + ttm_resource_fini(man, &node->base); kfree(node); - -err_out: - if (!(place->flags & TTM_PL_FLAG_TEMPORARY)) - atomic64_sub(num_pages, &mgr->used); - return r; } @@ -187,59 +168,40 @@ err_out: static void amdgpu_gtt_mgr_del(struct ttm_resource_manager *man, struct ttm_resource *res) { - struct amdgpu_gtt_node *node = to_amdgpu_gtt_node(res); + struct ttm_range_mgr_node *node = to_ttm_range_mgr_node(res); struct amdgpu_gtt_mgr *mgr = to_gtt_mgr(man); spin_lock(&mgr->lock); - if (drm_mm_node_allocated(&node->base.mm_nodes[0])) - drm_mm_remove_node(&node->base.mm_nodes[0]); + if (drm_mm_node_allocated(&node->mm_nodes[0])) + drm_mm_remove_node(&node->mm_nodes[0]); spin_unlock(&mgr->lock); - if (!(res->placement & TTM_PL_FLAG_TEMPORARY)) - atomic64_sub(res->num_pages, &mgr->used); - + ttm_resource_fini(man, res); kfree(node); } /** - * amdgpu_gtt_mgr_usage - return usage of GTT domain - * - * @mgr: amdgpu_gtt_mgr pointer - * - * Return how many bytes are used in the GTT domain - */ -uint64_t amdgpu_gtt_mgr_usage(struct amdgpu_gtt_mgr *mgr) -{ - return atomic64_read(&mgr->used) * PAGE_SIZE; -} - -/** * amdgpu_gtt_mgr_recover - re-init gart * * @mgr: amdgpu_gtt_mgr pointer * * Re-init the gart for each known BO in the GTT. */ -int amdgpu_gtt_mgr_recover(struct amdgpu_gtt_mgr *mgr) +void amdgpu_gtt_mgr_recover(struct amdgpu_gtt_mgr *mgr) { - struct amdgpu_gtt_node *node; + struct ttm_range_mgr_node *node; struct drm_mm_node *mm_node; struct amdgpu_device *adev; - int r = 0; adev = container_of(mgr, typeof(*adev), mman.gtt_mgr); spin_lock(&mgr->lock); drm_mm_for_each_node(mm_node, &mgr->mm) { - node = container_of(mm_node, typeof(*node), base.mm_nodes[0]); - r = amdgpu_ttm_recover_gart(node->tbo); - if (r) - break; + node = container_of(mm_node, typeof(*node), mm_nodes[0]); + amdgpu_ttm_recover_gart(node->base.bo); } spin_unlock(&mgr->lock); amdgpu_gart_invalidate_tlb(adev); - - return r; } /** @@ -258,9 +220,6 @@ static void amdgpu_gtt_mgr_debug(struct ttm_resource_manager *man, spin_lock(&mgr->lock); drm_mm_print(&mgr->mm, printer); spin_unlock(&mgr->lock); - - drm_printf(printer, "man size:%llu pages, gtt used:%llu pages\n", - man->size, atomic64_read(&mgr->used)); } static const struct ttm_resource_manager_func amdgpu_gtt_mgr_func = { @@ -286,13 +245,12 @@ int amdgpu_gtt_mgr_init(struct amdgpu_device *adev, uint64_t gtt_size) man->use_tt = true; man->func = &amdgpu_gtt_mgr_func; - ttm_resource_manager_init(man, gtt_size >> PAGE_SHIFT); + ttm_resource_manager_init(man, &adev->mman.bdev, gtt_size); start = AMDGPU_GTT_MAX_TRANSFER_SIZE * AMDGPU_GTT_NUM_TRANSFER_WINDOWS; size = (adev->gmc.gart_size >> PAGE_SHIFT) - start; drm_mm_init(&mgr->mm, start, size); spin_lock_init(&mgr->lock); - atomic64_set(&mgr->used, 0); ttm_set_driver_manager(&adev->mman.bdev, TTM_PL_TT, &mgr->manager); ttm_resource_manager_set_used(man, true); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_hdp.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_hdp.h index 7ec99d591584..ac5c61d3de2b 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_hdp.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_hdp.h @@ -22,13 +22,10 @@ */ #ifndef __AMDGPU_HDP_H__ #define __AMDGPU_HDP_H__ +#include "amdgpu_ras.h" -struct amdgpu_hdp_ras_funcs { - int (*ras_late_init)(struct amdgpu_device *adev); - void (*ras_fini)(struct amdgpu_device *adev); - void (*query_ras_error_count)(struct amdgpu_device *adev, - void *ras_error_status); - void (*reset_ras_error_count)(struct amdgpu_device *adev); +struct amdgpu_hdp_ras { + struct amdgpu_ras_block_object ras_block; }; struct amdgpu_hdp_funcs { @@ -36,16 +33,15 @@ struct amdgpu_hdp_funcs { void (*invalidate_hdp)(struct amdgpu_device *adev, struct amdgpu_ring *ring); void (*update_clock_gating)(struct amdgpu_device *adev, bool enable); - void (*get_clock_gating_state)(struct amdgpu_device *adev, u32 *flags); + void (*get_clock_gating_state)(struct amdgpu_device *adev, u64 *flags); void (*init_registers)(struct amdgpu_device *adev); }; struct amdgpu_hdp { struct ras_common_if *ras_if; const struct amdgpu_hdp_funcs *funcs; - const struct amdgpu_hdp_ras_funcs *ras_funcs; + struct amdgpu_hdp_ras *ras; }; -int amdgpu_hdp_ras_late_init(struct amdgpu_device *adev); -void amdgpu_hdp_ras_fini(struct amdgpu_device *adev); +int amdgpu_hdp_ras_late_init(struct amdgpu_device *adev, struct ras_common_if *ras_block); #endif /* __AMDGPU_HDP_H__ */ diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c index bc1297dcdf97..d583766ea392 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c @@ -166,8 +166,8 @@ int amdgpu_ib_schedule(struct amdgpu_ring *ring, unsigned num_ibs, } if ((ib->flags & AMDGPU_IB_FLAGS_SECURE) && - (ring->funcs->type == AMDGPU_RING_TYPE_COMPUTE)) { - dev_err(adev->dev, "secure submissions not supported on compute rings\n"); + (!ring->funcs->secure_submission_supported)) { + dev_err(adev->dev, "secure submissions not supported on ring <%s>\n", ring->name); return -EINVAL; } diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ids.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ids.c index b7fb72bff2c1..03d115d2b5ed 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ids.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ids.c @@ -107,36 +107,19 @@ static void amdgpu_pasid_free_cb(struct dma_fence *fence, void amdgpu_pasid_free_delayed(struct dma_resv *resv, u32 pasid) { - struct dma_fence *fence, **fences; struct amdgpu_pasid_cb *cb; - unsigned count; + struct dma_fence *fence; int r; - r = dma_resv_get_fences(resv, NULL, &count, &fences); + r = dma_resv_get_singleton(resv, DMA_RESV_USAGE_BOOKKEEP, &fence); if (r) goto fallback; - if (count == 0) { + if (!fence) { amdgpu_pasid_free(pasid); return; } - if (count == 1) { - fence = fences[0]; - kfree(fences); - } else { - uint64_t context = dma_fence_context_alloc(1); - struct dma_fence_array *array; - - array = dma_fence_array_create(count, fences, context, - 1, false); - if (!array) { - kfree(fences); - goto fallback; - } - fence = &array->base; - } - cb = kmalloc(sizeof(*cb), GFP_KERNEL); if (!cb) { /* Last resort when we are OOM */ @@ -156,7 +139,8 @@ fallback: /* Not enough memory for the delayed delete, as last resort * block for all the fences to complete. */ - dma_resv_wait_timeout(resv, true, false, MAX_SCHEDULE_TIMEOUT); + dma_resv_wait_timeout(resv, DMA_RESV_USAGE_BOOKKEEP, + false, MAX_SCHEDULE_TIMEOUT); amdgpu_pasid_free(pasid); } @@ -204,7 +188,7 @@ static int amdgpu_vmid_grab_idle(struct amdgpu_vm *vm, unsigned i; int r; - if (ring->vmid_wait && !dma_fence_is_signaled(ring->vmid_wait)) + if (!dma_fence_is_signaled(ring->vmid_wait)) return amdgpu_sync_fence(sync, ring->vmid_wait); fences = kmalloc_array(id_mgr->num_ids, sizeof(void *), GFP_KERNEL); @@ -276,19 +260,15 @@ static int amdgpu_vmid_grab_reserved(struct amdgpu_vm *vm, struct amdgpu_device *adev = ring->adev; unsigned vmhub = ring->funcs->vmhub; uint64_t fence_context = adev->fence_context + ring->idx; - struct dma_fence *updates = sync->last_vm_update; bool needs_flush = vm->use_cpu_for_update; - int r = 0; + uint64_t updates = amdgpu_vm_tlb_seq(vm); + int r; *id = vm->reserved_vmid[vmhub]; - if (updates && (*id)->flushed_updates && - updates->context == (*id)->flushed_updates->context && - !dma_fence_is_later(updates, (*id)->flushed_updates)) - updates = NULL; - if ((*id)->owner != vm->immediate.fence_context || - job->vm_pd_addr != (*id)->pd_gpu_addr || - updates || !(*id)->last_flush || + (*id)->pd_gpu_addr != job->vm_pd_addr || + (*id)->flushed_updates < updates || + !(*id)->last_flush || ((*id)->last_flush->context != fence_context && !dma_fence_is_signaled((*id)->last_flush))) { struct dma_fence *tmp; @@ -302,8 +282,7 @@ static int amdgpu_vmid_grab_reserved(struct amdgpu_vm *vm, tmp = amdgpu_sync_peek_fence(&(*id)->active, ring); if (tmp) { *id = NULL; - r = amdgpu_sync_fence(sync, tmp); - return r; + return amdgpu_sync_fence(sync, tmp); } needs_flush = true; } @@ -315,10 +294,7 @@ static int amdgpu_vmid_grab_reserved(struct amdgpu_vm *vm, if (r) return r; - if (updates) { - dma_fence_put((*id)->flushed_updates); - (*id)->flushed_updates = dma_fence_get(updates); - } + (*id)->flushed_updates = updates; job->vm_needs_flush = needs_flush; return 0; } @@ -346,7 +322,7 @@ static int amdgpu_vmid_grab_used(struct amdgpu_vm *vm, unsigned vmhub = ring->funcs->vmhub; struct amdgpu_vmid_mgr *id_mgr = &adev->vm_manager.id_mgr[vmhub]; uint64_t fence_context = adev->fence_context + ring->idx; - struct dma_fence *updates = sync->last_vm_update; + uint64_t updates = amdgpu_vm_tlb_seq(vm); int r; job->vm_needs_flush = vm->use_cpu_for_update; @@ -354,7 +330,6 @@ static int amdgpu_vmid_grab_used(struct amdgpu_vm *vm, /* Check if we can use a VMID already assigned to this VM */ list_for_each_entry_reverse((*id), &id_mgr->ids_lru, list) { bool needs_flush = vm->use_cpu_for_update; - struct dma_fence *flushed; /* Check all the prerequisites to using this VMID */ if ((*id)->owner != vm->immediate.fence_context) @@ -368,8 +343,7 @@ static int amdgpu_vmid_grab_used(struct amdgpu_vm *vm, !dma_fence_is_signaled((*id)->last_flush))) needs_flush = true; - flushed = (*id)->flushed_updates; - if (updates && (!flushed || dma_fence_is_later(updates, flushed))) + if ((*id)->flushed_updates < updates) needs_flush = true; if (needs_flush && !adev->vm_manager.concurrent_flush) @@ -382,11 +356,7 @@ static int amdgpu_vmid_grab_used(struct amdgpu_vm *vm, if (r) return r; - if (updates && (!flushed || dma_fence_is_later(updates, flushed))) { - dma_fence_put((*id)->flushed_updates); - (*id)->flushed_updates = dma_fence_get(updates); - } - + (*id)->flushed_updates = updates; job->vm_needs_flush |= needs_flush; return 0; } @@ -432,8 +402,6 @@ int amdgpu_vmid_grab(struct amdgpu_vm *vm, struct amdgpu_ring *ring, goto error; if (!id) { - struct dma_fence *updates = sync->last_vm_update; - /* Still no ID to use? Then use the idle one found earlier */ id = idle; @@ -442,8 +410,7 @@ int amdgpu_vmid_grab(struct amdgpu_vm *vm, struct amdgpu_ring *ring, if (r) goto error; - dma_fence_put(id->flushed_updates); - id->flushed_updates = dma_fence_get(updates); + id->flushed_updates = amdgpu_vm_tlb_seq(vm); job->vm_needs_flush = true; } @@ -610,7 +577,6 @@ void amdgpu_vmid_mgr_fini(struct amdgpu_device *adev) struct amdgpu_vmid *id = &id_mgr->ids[j]; amdgpu_sync_free(&id->active); - dma_fence_put(id->flushed_updates); dma_fence_put(id->last_flush); dma_fence_put(id->pasid_mapping); } diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ids.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_ids.h index 0c3b4fa1f936..06c8a0034fa5 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ids.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ids.h @@ -47,7 +47,7 @@ struct amdgpu_vmid { uint64_t pd_gpu_addr; /* last flushed PD/PT update */ - struct dma_fence *flushed_updates; + uint64_t flushed_updates; uint32_t current_gpu_reset_count; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_irq.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_irq.c index f5cbc2747ac6..b4cf8717f554 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_irq.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_irq.c @@ -193,20 +193,7 @@ static irqreturn_t amdgpu_irq_handler(int irq, void *arg) if (ret == IRQ_HANDLED) pm_runtime_mark_last_busy(dev->dev); - /* For the hardware that cannot enable bif ring for both ras_controller_irq - * and ras_err_evnet_athub_irq ih cookies, the driver has to poll status - * register to check whether the interrupt is triggered or not, and properly - * ack the interrupt if it is there - */ - if (amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__PCIE_BIF)) { - if (adev->nbio.ras_funcs && - adev->nbio.ras_funcs->handle_ras_controller_intr_no_bifring) - adev->nbio.ras_funcs->handle_ras_controller_intr_no_bifring(adev); - - if (adev->nbio.ras_funcs && - adev->nbio.ras_funcs->handle_ras_err_event_athub_intr_no_bifring) - adev->nbio.ras_funcs->handle_ras_err_event_athub_intr_no_bifring(adev); - } + amdgpu_ras_interrupt_fatal_error_handler(adev); return ret; } diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c index bfc47bea23db..67f66f2f1809 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c @@ -37,6 +37,7 @@ static enum drm_gpu_sched_stat amdgpu_job_timedout(struct drm_sched_job *s_job) struct amdgpu_task_info ti; struct amdgpu_device *adev = ring->adev; int idx; + int r; if (!drm_dev_enter(adev_to_drm(adev), &idx)) { DRM_INFO("%s - device unplugged skipping recovery on scheduler:%s", @@ -63,7 +64,9 @@ static enum drm_gpu_sched_stat amdgpu_job_timedout(struct drm_sched_job *s_job) ti.process_name, ti.tgid, ti.task_name, ti.pid); if (amdgpu_device_should_recover_gpu(ring->adev)) { - amdgpu_device_gpu_recover(ring->adev, job); + r = amdgpu_device_gpu_recover_imp(ring->adev, job); + if (r) + DRM_ERROR("GPU Recovery Failed: %d\n", r); } else { drm_sched_suspend_timeout(&ring->sched); if (amdgpu_sriov_vf(adev)) @@ -78,14 +81,10 @@ exit: int amdgpu_job_alloc(struct amdgpu_device *adev, unsigned num_ibs, struct amdgpu_job **job, struct amdgpu_vm *vm) { - size_t size = sizeof(struct amdgpu_job); - if (num_ibs == 0) return -EINVAL; - size += sizeof(struct amdgpu_ib) * num_ibs; - - *job = kzalloc(size, GFP_KERNEL); + *job = kzalloc(struct_size(*job, ibs, num_ibs), GFP_KERNEL); if (!*job) return -ENOMEM; @@ -95,7 +94,6 @@ int amdgpu_job_alloc(struct amdgpu_device *adev, unsigned num_ibs, */ (*job)->base.sched = &adev->rings[0]->sched; (*job)->vm = vm; - (*job)->ibs = (void *)&(*job)[1]; (*job)->num_ibs = num_ibs; amdgpu_sync_create(&(*job)->sync); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_job.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_job.h index 9e65730193b8..d599c0540b46 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_job.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_job.h @@ -23,6 +23,10 @@ #ifndef __AMDGPU_JOB_H__ #define __AMDGPU_JOB_H__ +#include <drm/gpu_scheduler.h> +#include "amdgpu_sync.h" +#include "amdgpu_ring.h" + /* bit set means command submit involves a preamble IB */ #define AMDGPU_PREAMBLE_IB_PRESENT (1 << 0) /* bit set means preamble IB is first presented in belonging context */ @@ -45,12 +49,10 @@ struct amdgpu_job { struct amdgpu_vm *vm; struct amdgpu_sync sync; struct amdgpu_sync sched_sync; - struct amdgpu_ib *ibs; struct dma_fence hw_fence; struct dma_fence *external_hw_fence; uint32_t preamble_status; uint32_t preemption_status; - uint32_t num_ibs; bool vm_needs_flush; uint64_t vm_pd_addr; unsigned vmid; @@ -66,6 +68,9 @@ struct amdgpu_job { /* job_run_counter >= 1 means a resubmit job */ uint32_t job_run_counter; + + uint32_t num_ibs; + struct amdgpu_ib ibs[]; }; int amdgpu_job_alloc(struct amdgpu_device *adev, unsigned num_ibs, diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_jpeg.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_jpeg.h index 55fbff2be761..b6c7fb00e05a 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_jpeg.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_jpeg.h @@ -24,6 +24,8 @@ #ifndef __AMDGPU_JPEG_H__ #define __AMDGPU_JPEG_H__ +#include "amdgpu_ras.h" + #define AMDGPU_MAX_JPEG_INSTANCES 2 #define AMDGPU_JPEG_HARVEST_JPEG0 (1 << 0) @@ -39,6 +41,10 @@ struct amdgpu_jpeg_inst { struct amdgpu_jpeg_reg external; }; +struct amdgpu_jpeg_ras { + struct amdgpu_ras_block_object ras_block; +}; + struct amdgpu_jpeg { uint8_t num_jpeg_inst; struct amdgpu_jpeg_inst inst[AMDGPU_MAX_JPEG_INSTANCES]; @@ -48,6 +54,8 @@ struct amdgpu_jpeg { enum amd_powergating_state cur_state; struct mutex jpeg_pg_lock; atomic_t total_submission_cnt; + struct ras_common_if *ras_if; + struct amdgpu_jpeg_ras *ras; }; int amdgpu_jpeg_sw_init(struct amdgpu_device *adev); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c index 1ebb91db2274..51bb977154eb 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c @@ -43,6 +43,17 @@ #include "amdgpu_display.h" #include "amdgpu_ras.h" +static void amdgpu_runtime_pm_quirk(struct amdgpu_device *adev) +{ + /* + * Add below quirk on several sienna_cichlid cards to disable + * runtime pm to fix EMI failures. + */ + if (((adev->pdev->device == 0x73A1) && (adev->pdev->revision == 0x00)) || + ((adev->pdev->device == 0x73BF) && (adev->pdev->revision == 0xCF))) + adev->runpm = false; +} + void amdgpu_unregister_gpu_instance(struct amdgpu_device *adev) { struct amdgpu_gpu_instance *gpu_instance; @@ -87,11 +98,6 @@ void amdgpu_driver_unload_kms(struct drm_device *dev) if (adev->rmmio == NULL) return; - if (adev->runpm) { - pm_runtime_get_sync(dev->dev); - pm_runtime_forbid(dev->dev); - } - if (amdgpu_acpi_smart_shift_update(dev, AMDGPU_SS_DRV_UNLOAD)) DRM_WARN("smart shift update failed\n"); @@ -124,22 +130,6 @@ void amdgpu_register_gpu_instance(struct amdgpu_device *adev) mutex_unlock(&mgpu_info.mutex); } -static void amdgpu_get_audio_func(struct amdgpu_device *adev) -{ - struct pci_dev *p = NULL; - - p = pci_get_domain_bus_and_slot(pci_domain_nr(adev->pdev->bus), - adev->pdev->bus->number, 1); - if (p) { - pm_runtime_get_sync(&p->dev); - - pm_runtime_mark_last_busy(&p->dev); - pm_runtime_put_autosuspend(&p->dev); - - pci_dev_put(p); - } -} - /** * amdgpu_driver_load_kms - Main load function for KMS. * @@ -152,21 +142,10 @@ static void amdgpu_get_audio_func(struct amdgpu_device *adev) int amdgpu_driver_load_kms(struct amdgpu_device *adev, unsigned long flags) { struct drm_device *dev; - struct pci_dev *parent; int r, acpi_status; dev = adev_to_drm(adev); - if (amdgpu_has_atpx() && - (amdgpu_is_atpx_hybrid() || - amdgpu_has_atpx_dgpu_power_cntl()) && - ((flags & AMD_IS_APU) == 0) && - !pci_is_thunderbolt_attached(to_pci_dev(dev->dev))) - flags |= AMD_IS_PX; - - parent = pci_upstream_bridge(adev->pdev); - adev->has_pr3 = parent ? pci_pr3_present(parent) : false; - /* amdgpu_device_init should report only fatal error * like memory allocation failure or iomapping failure, * or memory manager initialization failure, it must @@ -212,6 +191,9 @@ int amdgpu_driver_load_kms(struct amdgpu_device *adev, unsigned long flags) */ if (adev->is_fw_fb) adev->runpm = false; + + amdgpu_runtime_pm_quirk(adev); + if (adev->runpm) dev_info(adev->dev, "Using BACO for runtime pm\n"); } @@ -224,58 +206,12 @@ int amdgpu_driver_load_kms(struct amdgpu_device *adev, unsigned long flags) if (acpi_status) dev_dbg(dev->dev, "Error during ACPI methods call\n"); - if (adev->runpm) { - /* only need to skip on ATPX */ - if (amdgpu_device_supports_px(dev)) - dev_pm_set_driver_flags(dev->dev, DPM_FLAG_NO_DIRECT_COMPLETE); - /* we want direct complete for BOCO */ - if (amdgpu_device_supports_boco(dev)) - dev_pm_set_driver_flags(dev->dev, DPM_FLAG_SMART_PREPARE | - DPM_FLAG_SMART_SUSPEND | - DPM_FLAG_MAY_SKIP_RESUME); - pm_runtime_use_autosuspend(dev->dev); - pm_runtime_set_autosuspend_delay(dev->dev, 5000); - - pm_runtime_allow(dev->dev); - - pm_runtime_mark_last_busy(dev->dev); - pm_runtime_put_autosuspend(dev->dev); - - /* - * For runpm implemented via BACO, PMFW will handle the - * timing for BACO in and out: - * - put ASIC into BACO state only when both video and - * audio functions are in D3 state. - * - pull ASIC out of BACO state when either video or - * audio function is in D0 state. - * Also, at startup, PMFW assumes both functions are in - * D0 state. - * - * So if snd driver was loaded prior to amdgpu driver - * and audio function was put into D3 state, there will - * be no PMFW-aware D-state transition(D0->D3) on runpm - * suspend. Thus the BACO will be not correctly kicked in. - * - * Via amdgpu_get_audio_func(), the audio dev is put - * into D0 state. Then there will be a PMFW-aware D-state - * transition(D0->D3) on runpm suspend. - */ - if (amdgpu_device_supports_baco(dev) && - !(adev->flags & AMD_IS_APU) && - (adev->asic_type >= CHIP_NAVI10)) - amdgpu_get_audio_func(adev); - } - if (amdgpu_acpi_smart_shift_update(dev, AMDGPU_SS_DRV_LOAD)) DRM_WARN("smart shift update failed\n"); out: - if (r) { - /* balance pm_runtime_get_sync in amdgpu_driver_unload_kms */ - if (adev->rmmio && adev->runpm) - pm_runtime_put_noidle(dev->dev); + if (r) amdgpu_driver_unload_kms(dev); - } return r; } @@ -406,6 +342,10 @@ static int amdgpu_firmware_info(struct drm_amdgpu_info_firmware *fw_info, fw_info->ver = adev->psp.toc.fw_version; fw_info->feature = adev->psp.toc.feature_version; break; + case AMDGPU_INFO_FW_CAP: + fw_info->ver = adev->psp.cap_fw_version; + fw_info->feature = adev->psp.cap_feature_version; + break; default: return -EINVAL; } @@ -678,13 +618,13 @@ int amdgpu_info_ioctl(struct drm_device *dev, void *data, struct drm_file *filp) ui64 = atomic64_read(&adev->num_vram_cpu_page_faults); return copy_to_user(out, &ui64, min(size, 8u)) ? -EFAULT : 0; case AMDGPU_INFO_VRAM_USAGE: - ui64 = amdgpu_vram_mgr_usage(&adev->mman.vram_mgr); + ui64 = ttm_resource_manager_usage(&adev->mman.vram_mgr.manager); return copy_to_user(out, &ui64, min(size, 8u)) ? -EFAULT : 0; case AMDGPU_INFO_VIS_VRAM_USAGE: ui64 = amdgpu_vram_mgr_vis_usage(&adev->mman.vram_mgr); return copy_to_user(out, &ui64, min(size, 8u)) ? -EFAULT : 0; case AMDGPU_INFO_GTT_USAGE: - ui64 = amdgpu_gtt_mgr_usage(&adev->mman.gtt_mgr); + ui64 = ttm_resource_manager_usage(&adev->mman.gtt_mgr.manager); return copy_to_user(out, &ui64, min(size, 8u)) ? -EFAULT : 0; case AMDGPU_INFO_GDS_CONFIG: { struct drm_amdgpu_info_gds gds_info; @@ -716,14 +656,17 @@ int amdgpu_info_ioctl(struct drm_device *dev, void *data, struct drm_file *filp) case AMDGPU_INFO_MEMORY: { struct drm_amdgpu_memory_info mem; struct ttm_resource_manager *gtt_man = - ttm_manager_type(&adev->mman.bdev, TTM_PL_TT); + &adev->mman.gtt_mgr.manager; + struct ttm_resource_manager *vram_man = + &adev->mman.vram_mgr.manager; + memset(&mem, 0, sizeof(mem)); mem.vram.total_heap_size = adev->gmc.real_vram_size; mem.vram.usable_heap_size = adev->gmc.real_vram_size - atomic64_read(&adev->vram_pin_size) - AMDGPU_VM_RESERVED_VRAM; mem.vram.heap_usage = - amdgpu_vram_mgr_usage(&adev->mman.vram_mgr); + ttm_resource_manager_usage(vram_man); mem.vram.max_allocation = mem.vram.usable_heap_size * 3 / 4; mem.cpu_accessible_vram.total_heap_size = @@ -741,8 +684,7 @@ int amdgpu_info_ioctl(struct drm_device *dev, void *data, struct drm_file *filp) mem.gtt.total_heap_size *= PAGE_SIZE; mem.gtt.usable_heap_size = mem.gtt.total_heap_size - atomic64_read(&adev->gart_pin_size); - mem.gtt.heap_usage = - amdgpu_gtt_mgr_usage(&adev->mman.gtt_mgr); + mem.gtt.heap_usage = ttm_resource_manager_usage(gtt_man); mem.gtt.max_allocation = mem.gtt.usable_heap_size * 3 / 4; return copy_to_user(out, &mem, @@ -1268,18 +1210,20 @@ void amdgpu_driver_postclose_kms(struct drm_device *dev, if (amdgpu_device_ip_get_ip_block(adev, AMD_IP_BLOCK_TYPE_VCE) != NULL) amdgpu_vce_free_handles(adev, file_priv); - amdgpu_vm_bo_rmv(adev, fpriv->prt_va); - if (amdgpu_mcbp || amdgpu_sriov_vf(adev)) { /* TODO: how to handle reserve failure */ BUG_ON(amdgpu_bo_reserve(adev->virt.csa_obj, true)); - amdgpu_vm_bo_rmv(adev, fpriv->csa_va); + amdgpu_vm_bo_del(adev, fpriv->csa_va); fpriv->csa_va = NULL; amdgpu_bo_unreserve(adev->virt.csa_obj); } pasid = fpriv->vm.pasid; pd = amdgpu_bo_ref(fpriv->vm.root.bo); + if (!WARN_ON(amdgpu_bo_reserve(pd, true))) { + amdgpu_vm_bo_del(adev, fpriv->prt_va); + amdgpu_bo_unreserve(pd); + } amdgpu_ctx_mgr_fini(&fpriv->ctx_mgr); amdgpu_vm_fini(adev, &fpriv->vm); @@ -1427,8 +1371,7 @@ static int amdgpu_debugfs_firmware_info_show(struct seq_file *m, void *unused) struct drm_amdgpu_info_firmware fw_info; struct drm_amdgpu_query_fw query_fw; struct atom_context *ctx = adev->mode_info.atom_context; - uint8_t smu_minor, smu_debug; - uint16_t smu_major; + uint8_t smu_program, smu_major, smu_minor, smu_debug; int ret, i; static const char *ta_fw_name[TA_FW_TYPE_MAX_INDEX] = { @@ -1574,11 +1517,12 @@ static int amdgpu_debugfs_firmware_info_show(struct seq_file *m, void *unused) ret = amdgpu_firmware_info(&fw_info, &query_fw, adev); if (ret) return ret; - smu_major = (fw_info.ver >> 16) & 0xffff; + smu_program = (fw_info.ver >> 24) & 0xff; + smu_major = (fw_info.ver >> 16) & 0xff; smu_minor = (fw_info.ver >> 8) & 0xff; smu_debug = (fw_info.ver >> 0) & 0xff; - seq_printf(m, "SMC feature version: %u, firmware version: 0x%08x (%d.%d.%d)\n", - fw_info.feature, fw_info.ver, smu_major, smu_minor, smu_debug); + seq_printf(m, "SMC feature version: %u, program: %d, firmware version: 0x%08x (%d.%d.%d)\n", + fw_info.feature, smu_program, fw_info.ver, smu_major, smu_minor, smu_debug); /* SDMA */ query_fw.fw_type = AMDGPU_INFO_FW_SDMA; @@ -1623,6 +1567,16 @@ static int amdgpu_debugfs_firmware_info_show(struct seq_file *m, void *unused) seq_printf(m, "TOC feature version: %u, firmware version: 0x%08x\n", fw_info.feature, fw_info.ver); + /* CAP */ + if (adev->psp.cap_fw) { + query_fw.fw_type = AMDGPU_INFO_FW_CAP; + ret = amdgpu_firmware_info(&fw_info, &query_fw, adev); + if (ret) + return ret; + seq_printf(m, "CAP feature version: %u, firmware version: 0x%08x\n", + fw_info.feature, fw_info.ver); + } + seq_printf(m, "VBIOS version: %s\n", ctx->vbios_version); return 0; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_mca.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_mca.c index ce538f4819f9..51c2a82e2fa4 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_mca.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_mca.c @@ -70,48 +70,3 @@ void amdgpu_mca_query_ras_error_count(struct amdgpu_device *adev, amdgpu_mca_reset_error_count(adev, mc_status_addr); } - -int amdgpu_mca_ras_late_init(struct amdgpu_device *adev, - struct amdgpu_mca_ras *mca_dev) -{ - int r; - struct ras_ih_if ih_info = { - .cb = NULL, - }; - struct ras_fs_if fs_info = { - .sysfs_name = mca_dev->ras_funcs->sysfs_name, - }; - - if (!mca_dev->ras_if) { - mca_dev->ras_if = kmalloc(sizeof(struct ras_common_if), GFP_KERNEL); - if (!mca_dev->ras_if) - return -ENOMEM; - mca_dev->ras_if->block = mca_dev->ras_funcs->ras_block; - mca_dev->ras_if->sub_block_index = mca_dev->ras_funcs->ras_sub_block; - mca_dev->ras_if->type = AMDGPU_RAS_ERROR__MULTI_UNCORRECTABLE; - } - ih_info.head = fs_info.head = *mca_dev->ras_if; - r = amdgpu_ras_late_init(adev, mca_dev->ras_if, - &fs_info, &ih_info); - if (r || !amdgpu_ras_is_supported(adev, mca_dev->ras_if->block)) { - kfree(mca_dev->ras_if); - mca_dev->ras_if = NULL; - } - - return r; -} - -void amdgpu_mca_ras_fini(struct amdgpu_device *adev, - struct amdgpu_mca_ras *mca_dev) -{ - struct ras_ih_if ih_info = { - .cb = NULL, - }; - - if (!mca_dev->ras_if) - return; - - amdgpu_ras_late_fini(adev, mca_dev->ras_if, &ih_info); - kfree(mca_dev->ras_if); - mca_dev->ras_if = NULL; -}
\ No newline at end of file diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_mca.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_mca.h index c74bc7177066..7ce16d16e34b 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_mca.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_mca.h @@ -21,21 +21,13 @@ #ifndef __AMDGPU_MCA_H__ #define __AMDGPU_MCA_H__ -struct amdgpu_mca_ras_funcs { - int (*ras_late_init)(struct amdgpu_device *adev); - void (*ras_fini)(struct amdgpu_device *adev); - void (*query_ras_error_count)(struct amdgpu_device *adev, - void *ras_error_status); - void (*query_ras_error_address)(struct amdgpu_device *adev, - void *ras_error_status); - uint32_t ras_block; - uint32_t ras_sub_block; - const char* sysfs_name; +struct amdgpu_mca_ras_block { + struct amdgpu_ras_block_object ras_block; }; struct amdgpu_mca_ras { struct ras_common_if *ras_if; - const struct amdgpu_mca_ras_funcs *ras_funcs; + struct amdgpu_mca_ras_block *ras; }; struct amdgpu_mca_funcs { @@ -64,10 +56,4 @@ void amdgpu_mca_query_ras_error_count(struct amdgpu_device *adev, uint64_t mc_status_addr, void *ras_error_status); -int amdgpu_mca_ras_late_init(struct amdgpu_device *adev, - struct amdgpu_mca_ras *mca_dev); - -void amdgpu_mca_ras_fini(struct amdgpu_device *adev, - struct amdgpu_mca_ras *mca_dev); - #endif diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_mmhub.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_mmhub.c deleted file mode 100644 index 24297dc51434..000000000000 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_mmhub.c +++ /dev/null @@ -1,68 +0,0 @@ -/* - * Copyright 2019 Advanced Micro Devices, Inc. - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in - * all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR - * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, - * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR - * OTHER DEALINGS IN THE SOFTWARE. - * - */ - -#include "amdgpu.h" -#include "amdgpu_ras.h" - -int amdgpu_mmhub_ras_late_init(struct amdgpu_device *adev) -{ - int r; - struct ras_ih_if ih_info = { - .cb = NULL, - }; - struct ras_fs_if fs_info = { - .sysfs_name = "mmhub_err_count", - }; - - if (!adev->mmhub.ras_if) { - adev->mmhub.ras_if = kmalloc(sizeof(struct ras_common_if), GFP_KERNEL); - if (!adev->mmhub.ras_if) - return -ENOMEM; - adev->mmhub.ras_if->block = AMDGPU_RAS_BLOCK__MMHUB; - adev->mmhub.ras_if->type = AMDGPU_RAS_ERROR__MULTI_UNCORRECTABLE; - adev->mmhub.ras_if->sub_block_index = 0; - } - ih_info.head = fs_info.head = *adev->mmhub.ras_if; - r = amdgpu_ras_late_init(adev, adev->mmhub.ras_if, - &fs_info, &ih_info); - if (r || !amdgpu_ras_is_supported(adev, adev->mmhub.ras_if->block)) { - kfree(adev->mmhub.ras_if); - adev->mmhub.ras_if = NULL; - } - - return r; -} - -void amdgpu_mmhub_ras_fini(struct amdgpu_device *adev) -{ - if (amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__MMHUB) && - adev->mmhub.ras_if) { - struct ras_common_if *ras_if = adev->mmhub.ras_if; - struct ras_ih_if ih_info = { - .cb = NULL, - }; - - amdgpu_ras_late_fini(adev, ras_if, &ih_info); - kfree(ras_if); - } -} diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_mmhub.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_mmhub.h index b27fcbccce2b..f939395c5914 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_mmhub.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_mmhub.h @@ -21,14 +21,8 @@ #ifndef __AMDGPU_MMHUB_H__ #define __AMDGPU_MMHUB_H__ -struct amdgpu_mmhub_ras_funcs { - int (*ras_late_init)(struct amdgpu_device *adev); - void (*ras_fini)(struct amdgpu_device *adev); - void (*query_ras_error_count)(struct amdgpu_device *adev, - void *ras_error_status); - void (*query_ras_error_status)(struct amdgpu_device *adev); - void (*reset_ras_error_count)(struct amdgpu_device *adev); - void (*reset_ras_error_status)(struct amdgpu_device *adev); +struct amdgpu_mmhub_ras { + struct amdgpu_ras_block_object ras_block; }; struct amdgpu_mmhub_funcs { @@ -40,7 +34,7 @@ struct amdgpu_mmhub_funcs { void (*gart_disable)(struct amdgpu_device *adev); int (*set_clockgating)(struct amdgpu_device *adev, enum amd_clockgating_state state); - void (*get_clockgating)(struct amdgpu_device *adev, u32 *flags); + void (*get_clockgating)(struct amdgpu_device *adev, u64 *flags); void (*setup_vm_pt_regs)(struct amdgpu_device *adev, uint32_t vmid, uint64_t page_table_base); void (*update_power_gating)(struct amdgpu_device *adev, @@ -50,10 +44,8 @@ struct amdgpu_mmhub_funcs { struct amdgpu_mmhub { struct ras_common_if *ras_if; const struct amdgpu_mmhub_funcs *funcs; - const struct amdgpu_mmhub_ras_funcs *ras_funcs; + struct amdgpu_mmhub_ras *ras; }; -int amdgpu_mmhub_ras_late_init(struct amdgpu_device *adev); -void amdgpu_mmhub_ras_fini(struct amdgpu_device *adev); #endif diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_mn.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_mn.c index 4b153daf283d..b86c0b8252a5 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_mn.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_mn.c @@ -75,8 +75,8 @@ static bool amdgpu_mn_invalidate_gfx(struct mmu_interval_notifier *mni, mmu_interval_set_seq(mni, cur_seq); - r = dma_resv_wait_timeout(bo->tbo.base.resv, true, false, - MAX_SCHEDULE_TIMEOUT); + r = dma_resv_wait_timeout(bo->tbo.base.resv, DMA_RESV_USAGE_BOOKKEEP, + false, MAX_SCHEDULE_TIMEOUT); mutex_unlock(&adev->notifier_lock); if (r <= 0) DRM_ERROR("(%ld) failed to wait for user bo\n", r); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_mode.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_mode.h index 6043bf6fd414..f80b4838cea1 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_mode.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_mode.h @@ -30,10 +30,10 @@ #ifndef AMDGPU_MODE_H #define AMDGPU_MODE_H +#include <drm/display/drm_dp_helper.h> #include <drm/drm_crtc.h> #include <drm/drm_edid.h> #include <drm/drm_encoder.h> -#include <drm/drm_dp_helper.h> #include <drm/drm_fixed.h> #include <drm/drm_crtc_helper.h> #include <drm/drm_fb_helper.h> @@ -44,7 +44,7 @@ #include <linux/hrtimer.h> #include "amdgpu_irq.h" -#include <drm/drm_dp_mst_helper.h> +#include <drm/display/drm_dp_mst_helper.h> #include "modules/inc/mod_freesync.h" #include "amdgpu_dm_irq_params.h" @@ -341,6 +341,7 @@ struct amdgpu_mode_info { int num_crtc; /* number of crtcs */ int num_hpd; /* number of hpd pins */ int num_dig; /* number of dig blocks */ + bool gpu_vm_support; /* supports display from GTT */ int disp_priority; const struct amdgpu_display_funcs *funcs; const enum drm_plane_type *plane_type; @@ -591,19 +592,6 @@ int amdgpu_display_get_crtc_scanoutpos(struct drm_device *dev, int *hpos, ktime_t *stime, ktime_t *etime, const struct drm_display_mode *mode); -int amdgpu_display_gem_fb_init(struct drm_device *dev, - struct amdgpu_framebuffer *rfb, - const struct drm_mode_fb_cmd2 *mode_cmd, - struct drm_gem_object *obj); -int amdgpu_display_gem_fb_verify_and_init( - struct drm_device *dev, struct amdgpu_framebuffer *rfb, - struct drm_file *file_priv, const struct drm_mode_fb_cmd2 *mode_cmd, - struct drm_gem_object *obj); -int amdgpu_display_framebuffer_init(struct drm_device *dev, - struct amdgpu_framebuffer *rfb, - const struct drm_mode_fb_cmd2 *mode_cmd, - struct drm_gem_object *obj); - int amdgpufb_remove(struct drm_device *dev, struct drm_framebuffer *fb); void amdgpu_enc_destroy(struct drm_encoder *encoder); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_nbio.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_nbio.c index 6afb02fef8cf..37d779b8e4a6 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_nbio.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_nbio.c @@ -22,61 +22,24 @@ #include "amdgpu.h" #include "amdgpu_ras.h" -int amdgpu_nbio_ras_late_init(struct amdgpu_device *adev) +int amdgpu_nbio_ras_late_init(struct amdgpu_device *adev, struct ras_common_if *ras_block) { int r; - struct ras_ih_if ih_info = { - .cb = NULL, - }; - struct ras_fs_if fs_info = { - .sysfs_name = "pcie_bif_err_count", - }; - - if (!adev->nbio.ras_if) { - adev->nbio.ras_if = kmalloc(sizeof(struct ras_common_if), GFP_KERNEL); - if (!adev->nbio.ras_if) - return -ENOMEM; - adev->nbio.ras_if->block = AMDGPU_RAS_BLOCK__PCIE_BIF; - adev->nbio.ras_if->type = AMDGPU_RAS_ERROR__MULTI_UNCORRECTABLE; - adev->nbio.ras_if->sub_block_index = 0; - } - ih_info.head = fs_info.head = *adev->nbio.ras_if; - r = amdgpu_ras_late_init(adev, adev->nbio.ras_if, - &fs_info, &ih_info); + r = amdgpu_ras_block_late_init(adev, ras_block); if (r) - goto free; + return r; - if (amdgpu_ras_is_supported(adev, adev->nbio.ras_if->block)) { + if (amdgpu_ras_is_supported(adev, ras_block->block)) { r = amdgpu_irq_get(adev, &adev->nbio.ras_controller_irq, 0); if (r) goto late_fini; r = amdgpu_irq_get(adev, &adev->nbio.ras_err_event_athub_irq, 0); if (r) goto late_fini; - } else { - r = 0; - goto free; } return 0; late_fini: - amdgpu_ras_late_fini(adev, adev->nbio.ras_if, &ih_info); -free: - kfree(adev->nbio.ras_if); - adev->nbio.ras_if = NULL; + amdgpu_ras_block_late_fini(adev, ras_block); return r; } - -void amdgpu_nbio_ras_fini(struct amdgpu_device *adev) -{ - if (amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__PCIE_BIF) && - adev->nbio.ras_if) { - struct ras_common_if *ras_if = adev->nbio.ras_if; - struct ras_ih_if ih_info = { - .cb = NULL, - }; - - amdgpu_ras_late_fini(adev, ras_if, &ih_info); - kfree(ras_if); - } -} diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_nbio.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_nbio.h index 843052205bd5..ccd9fe96fab7 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_nbio.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_nbio.h @@ -47,15 +47,12 @@ struct nbio_hdp_flush_reg { u32 ref_and_mask_sdma7; }; -struct amdgpu_nbio_ras_funcs { +struct amdgpu_nbio_ras { + struct amdgpu_ras_block_object ras_block; void (*handle_ras_controller_intr_no_bifring)(struct amdgpu_device *adev); void (*handle_ras_err_event_athub_intr_no_bifring)(struct amdgpu_device *adev); int (*init_ras_controller_interrupt)(struct amdgpu_device *adev); int (*init_ras_err_event_athub_interrupt)(struct amdgpu_device *adev); - void (*query_ras_error_count)(struct amdgpu_device *adev, - void *ras_error_status); - int (*ras_late_init)(struct amdgpu_device *adev); - void (*ras_fini)(struct amdgpu_device *adev); }; struct amdgpu_nbio_funcs { @@ -86,7 +83,7 @@ struct amdgpu_nbio_funcs { void (*update_medium_grain_light_sleep)(struct amdgpu_device *adev, bool enable); void (*get_clockgating_state)(struct amdgpu_device *adev, - u32 *flags); + u64 *flags); void (*ih_control)(struct amdgpu_device *adev); void (*init_registers)(struct amdgpu_device *adev); void (*remap_hdp_registers)(struct amdgpu_device *adev); @@ -96,6 +93,7 @@ struct amdgpu_nbio_funcs { void (*apply_lc_spc_mode_wa)(struct amdgpu_device *adev); void (*apply_l1_link_width_reconfig_wa)(struct amdgpu_device *adev); void (*clear_doorbell_interrupt)(struct amdgpu_device *adev); + u32 (*get_rom_offset)(struct amdgpu_device *adev); }; struct amdgpu_nbio { @@ -104,9 +102,8 @@ struct amdgpu_nbio { struct amdgpu_irq_src ras_err_event_athub_irq; struct ras_common_if *ras_if; const struct amdgpu_nbio_funcs *funcs; - const struct amdgpu_nbio_ras_funcs *ras_funcs; + struct amdgpu_nbio_ras *ras; }; -int amdgpu_nbio_ras_late_init(struct amdgpu_device *adev); -void amdgpu_nbio_ras_fini(struct amdgpu_device *adev); +int amdgpu_nbio_ras_late_init(struct amdgpu_device *adev, struct ras_common_if *ras_block); #endif diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c index 5661b82d84d4..5444515c1476 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c @@ -451,7 +451,7 @@ static bool amdgpu_bo_validate_size(struct amdgpu_device *adev, if (domain & AMDGPU_GEM_DOMAIN_GTT) { man = ttm_manager_type(&adev->mman.bdev, TTM_PL_TT); - if (size < (man->size << PAGE_SHIFT)) + if (size < man->size) return true; else goto fail; @@ -460,7 +460,7 @@ static bool amdgpu_bo_validate_size(struct amdgpu_device *adev, if (domain & AMDGPU_GEM_DOMAIN_VRAM) { man = ttm_manager_type(&adev->mman.bdev, TTM_PL_VRAM); - if (size < (man->size << PAGE_SHIFT)) + if (size < man->size) return true; else goto fail; @@ -472,7 +472,7 @@ static bool amdgpu_bo_validate_size(struct amdgpu_device *adev, fail: DRM_DEBUG("BO size %lu > total memory in domain: %llu\n", size, - man->size << PAGE_SHIFT); + man->size); return false; } @@ -575,6 +575,9 @@ int amdgpu_bo_create(struct amdgpu_device *adev, if (!amdgpu_bo_support_uswc(bo->flags)) bo->flags &= ~AMDGPU_GEM_CREATE_CPU_GTT_USWC; + if (adev->ras_enabled) + bo->flags |= AMDGPU_GEM_CREATE_VRAM_WIPE_ON_RELEASE; + bo->tbo.bdev = &adev->mman.bdev; if (bp->domain & (AMDGPU_GEM_DOMAIN_GWS | AMDGPU_GEM_DOMAIN_OA | AMDGPU_GEM_DOMAIN_GDS)) @@ -609,9 +612,8 @@ int amdgpu_bo_create(struct amdgpu_device *adev, if (unlikely(r)) goto fail_unreserve; - amdgpu_bo_fence(bo, fence, false); - dma_fence_put(bo->tbo.moving); - bo->tbo.moving = dma_fence_get(fence); + dma_resv_add_fence(bo->tbo.base.resv, fence, + DMA_RESV_USAGE_KERNEL); dma_fence_put(fence); } if (!bp->resv) @@ -758,6 +760,11 @@ int amdgpu_bo_kmap(struct amdgpu_bo *bo, void **ptr) if (bo->flags & AMDGPU_GEM_CREATE_NO_CPU_ACCESS) return -EPERM; + r = dma_resv_wait_timeout(bo->tbo.base.resv, DMA_RESV_USAGE_KERNEL, + false, MAX_SCHEDULE_TIMEOUT); + if (r < 0) + return r; + kptr = amdgpu_bo_kptr(bo); if (kptr) { if (ptr) @@ -765,11 +772,6 @@ int amdgpu_bo_kmap(struct amdgpu_bo *bo, void **ptr) return 0; } - r = dma_resv_wait_timeout(bo->tbo.base.resv, false, false, - MAX_SCHEDULE_TIMEOUT); - if (r < 0) - return r; - r = ttm_bo_kmap(&bo->tbo, 0, bo->tbo.resource->num_pages, &bo->kmap); if (r) return r; @@ -1281,6 +1283,7 @@ void amdgpu_bo_get_memory(struct amdgpu_bo *bo, uint64_t *vram_mem, */ void amdgpu_bo_release_notify(struct ttm_buffer_object *bo) { + struct amdgpu_device *adev = amdgpu_ttm_adev(bo->bdev); struct dma_fence *fence = NULL; struct amdgpu_bo *abo; int r; @@ -1300,10 +1303,12 @@ void amdgpu_bo_release_notify(struct ttm_buffer_object *bo) amdgpu_amdkfd_remove_fence_on_pt_pd_bos(abo); if (bo->resource->mem_type != TTM_PL_VRAM || - !(abo->flags & AMDGPU_GEM_CREATE_VRAM_WIPE_ON_RELEASE)) + !(abo->flags & AMDGPU_GEM_CREATE_VRAM_WIPE_ON_RELEASE) || + adev->in_suspend || adev->shutdown) return; - dma_resv_lock(bo->base.resv, NULL); + if (WARN_ON_ONCE(!dma_resv_trylock(bo->base.resv))) + return; r = amdgpu_fill_buffer(abo, AMDGPU_POISON, bo->base.resv, &fence); if (!WARN_ON(r)) { @@ -1384,11 +1389,17 @@ void amdgpu_bo_fence(struct amdgpu_bo *bo, struct dma_fence *fence, bool shared) { struct dma_resv *resv = bo->tbo.base.resv; + int r; - if (shared) - dma_resv_add_shared_fence(resv, fence); - else - dma_resv_add_excl_fence(resv, fence); + r = dma_resv_reserve_fences(resv, 1); + if (r) { + /* As last resort on OOM we block for the fence */ + dma_fence_wait(fence, false); + return; + } + + dma_resv_add_fence(resv, fence, shared ? DMA_RESV_USAGE_READ : + DMA_RESV_USAGE_WRITE); } /** diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_preempt_mgr.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_preempt_mgr.c index 786afe4f58f9..e8adfd0a570a 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_preempt_mgr.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_preempt_mgr.c @@ -25,12 +25,6 @@ #include "amdgpu.h" -static inline struct amdgpu_preempt_mgr * -to_preempt_mgr(struct ttm_resource_manager *man) -{ - return container_of(man, struct amdgpu_preempt_mgr, manager); -} - /** * DOC: mem_info_preempt_used * @@ -45,10 +39,9 @@ static ssize_t mem_info_preempt_used_show(struct device *dev, { struct drm_device *ddev = dev_get_drvdata(dev); struct amdgpu_device *adev = drm_to_adev(ddev); - struct ttm_resource_manager *man; + struct ttm_resource_manager *man = &adev->mman.preempt_mgr; - man = ttm_manager_type(&adev->mman.bdev, AMDGPU_PL_PREEMPT); - return sysfs_emit(buf, "%llu\n", amdgpu_preempt_mgr_usage(man)); + return sysfs_emit(buf, "%llu\n", ttm_resource_manager_usage(man)); } static DEVICE_ATTR_RO(mem_info_preempt_used); @@ -68,16 +61,12 @@ static int amdgpu_preempt_mgr_new(struct ttm_resource_manager *man, const struct ttm_place *place, struct ttm_resource **res) { - struct amdgpu_preempt_mgr *mgr = to_preempt_mgr(man); - *res = kzalloc(sizeof(**res), GFP_KERNEL); if (!*res) return -ENOMEM; ttm_resource_init(tbo, place, *res); (*res)->start = AMDGPU_BO_INVALID_OFFSET; - - atomic64_add((*res)->num_pages, &mgr->used); return 0; } @@ -92,48 +81,13 @@ static int amdgpu_preempt_mgr_new(struct ttm_resource_manager *man, static void amdgpu_preempt_mgr_del(struct ttm_resource_manager *man, struct ttm_resource *res) { - struct amdgpu_preempt_mgr *mgr = to_preempt_mgr(man); - - atomic64_sub(res->num_pages, &mgr->used); + ttm_resource_fini(man, res); kfree(res); } -/** - * amdgpu_preempt_mgr_usage - return usage of PREEMPT domain - * - * @man: TTM memory type manager - * - * Return how many bytes are used in the GTT domain - */ -uint64_t amdgpu_preempt_mgr_usage(struct ttm_resource_manager *man) -{ - struct amdgpu_preempt_mgr *mgr = to_preempt_mgr(man); - s64 result = atomic64_read(&mgr->used); - - return (result > 0 ? result : 0) * PAGE_SIZE; -} - -/** - * amdgpu_preempt_mgr_debug - dump VRAM table - * - * @man: TTM memory type manager - * @printer: DRM printer to use - * - * Dump the table content using printk. - */ -static void amdgpu_preempt_mgr_debug(struct ttm_resource_manager *man, - struct drm_printer *printer) -{ - struct amdgpu_preempt_mgr *mgr = to_preempt_mgr(man); - - drm_printf(printer, "man size:%llu pages, preempt used:%lld pages\n", - man->size, (u64)atomic64_read(&mgr->used)); -} - static const struct ttm_resource_manager_func amdgpu_preempt_mgr_func = { .alloc = amdgpu_preempt_mgr_new, .free = amdgpu_preempt_mgr_del, - .debug = amdgpu_preempt_mgr_debug }; /** @@ -145,16 +99,13 @@ static const struct ttm_resource_manager_func amdgpu_preempt_mgr_func = { */ int amdgpu_preempt_mgr_init(struct amdgpu_device *adev) { - struct amdgpu_preempt_mgr *mgr = &adev->mman.preempt_mgr; - struct ttm_resource_manager *man = &mgr->manager; + struct ttm_resource_manager *man = &adev->mman.preempt_mgr; int ret; man->use_tt = true; man->func = &amdgpu_preempt_mgr_func; - ttm_resource_manager_init(man, (1 << 30)); - - atomic64_set(&mgr->used, 0); + ttm_resource_manager_init(man, &adev->mman.bdev, (1 << 30)); ret = device_create_file(adev->dev, &dev_attr_mem_info_preempt_used); if (ret) { @@ -162,8 +113,7 @@ int amdgpu_preempt_mgr_init(struct amdgpu_device *adev) return ret; } - ttm_set_driver_manager(&adev->mman.bdev, AMDGPU_PL_PREEMPT, - &mgr->manager); + ttm_set_driver_manager(&adev->mman.bdev, AMDGPU_PL_PREEMPT, man); ttm_resource_manager_set_used(man, true); return 0; } @@ -178,8 +128,7 @@ int amdgpu_preempt_mgr_init(struct amdgpu_device *adev) */ void amdgpu_preempt_mgr_fini(struct amdgpu_device *adev) { - struct amdgpu_preempt_mgr *mgr = &adev->mman.preempt_mgr; - struct ttm_resource_manager *man = &mgr->manager; + struct ttm_resource_manager *man = &adev->mman.preempt_mgr; int ret; ttm_resource_manager_set_used(man, false); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c index dee17a0e1187..0bd22ebcc3d1 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c @@ -46,8 +46,6 @@ static int psp_sysfs_init(struct amdgpu_device *adev); static void psp_sysfs_fini(struct amdgpu_device *adev); static int psp_load_smu_fw(struct psp_context *psp); -static int psp_ta_unload(struct psp_context *psp, struct ta_context *context); -static int psp_ta_load(struct psp_context *psp, struct ta_context *context); static int psp_rap_terminate(struct psp_context *psp); static int psp_securedisplay_terminate(struct psp_context *psp); @@ -133,6 +131,8 @@ static int psp_early_init(void *handle) break; case IP_VERSION(13, 0, 1): case IP_VERSION(13, 0, 3): + case IP_VERSION(13, 0, 5): + case IP_VERSION(13, 0, 8): psp_v13_0_set_psp_funcs(psp); psp->autoload_supported = true; break; @@ -259,6 +259,33 @@ static bool psp_get_runtime_db_entry(struct amdgpu_device *adev, return ret; } +static int psp_init_sriov_microcode(struct psp_context *psp) +{ + struct amdgpu_device *adev = psp->adev; + int ret = 0; + + switch (adev->ip_versions[MP0_HWIP][0]) { + case IP_VERSION(9, 0, 0): + ret = psp_init_cap_microcode(psp, "vega10"); + break; + case IP_VERSION(11, 0, 9): + ret = psp_init_cap_microcode(psp, "navi12"); + break; + case IP_VERSION(11, 0, 7): + ret = psp_init_cap_microcode(psp, "sienna_cichlid"); + break; + case IP_VERSION(13, 0, 2): + ret = psp_init_cap_microcode(psp, "aldebaran"); + ret &= psp_init_ta_microcode(psp, "aldebaran"); + break; + default: + BUG(); + break; + } + + return ret; +} + static int psp_sw_init(void *handle) { struct amdgpu_device *adev = (struct amdgpu_device *)handle; @@ -273,21 +300,19 @@ static int psp_sw_init(void *handle) ret = -ENOMEM; } - if (!amdgpu_sriov_vf(adev)) { + if (amdgpu_sriov_vf(adev)) + ret = psp_init_sriov_microcode(psp); + else ret = psp_init_microcode(psp); - if (ret) { - DRM_ERROR("Failed to load psp firmware!\n"); - return ret; - } - } else if (amdgpu_sriov_vf(adev) && - adev->ip_versions[MP0_HWIP][0] == IP_VERSION(13, 0, 2)) { - ret = psp_init_ta_microcode(psp, "aldebaran"); - if (ret) { - DRM_ERROR("Failed to initialize ta microcode!\n"); - return ret; - } + if (ret) { + DRM_ERROR("Failed to load psp firmware!\n"); + return ret; } + adev->psp.xgmi_context.supports_extended_data = + !adev->gmc.xgmi.connected_to_cpu && + adev->ip_versions[MP0_HWIP][0] == IP_VERSION(13, 0, 2); + memset(&boot_cfg_entry, 0, sizeof(boot_cfg_entry)); if (psp_get_runtime_db_entry(adev, PSP_RUNTIME_ENTRY_TYPE_BOOT_CONFIG, @@ -353,6 +378,10 @@ static int psp_sw_fini(void *handle) release_firmware(psp->ta_fw); psp->ta_fw = NULL; } + if (adev->psp.cap_fw) { + release_firmware(psp->cap_fw); + psp->cap_fw = NULL; + } if (adev->ip_versions[MP0_HWIP][0] == IP_VERSION(11, 0, 0) || adev->ip_versions[MP0_HWIP][0] == IP_VERSION(11, 0, 7)) @@ -491,7 +520,10 @@ psp_cmd_submit_buf(struct psp_context *psp, DRM_WARN("psp gfx command %s(0x%X) failed and response status is (0x%X)\n", psp_gfx_cmd_name(psp->cmd_buf_mem->cmd_id), psp->cmd_buf_mem->cmd_id, psp->cmd_buf_mem->resp.status); - if (!timeout) { + /* If we load CAP FW, PSP must return 0 under SRIOV + * also return failure in case of timeout + */ + if ((ucode && (ucode->ucode_id == AMDGPU_UCODE_ID_CAP)) || !timeout) { ret = -EINVAL; goto exit; } @@ -829,7 +861,7 @@ static void psp_prep_ta_unload_cmd_buf(struct psp_gfx_cmd_resp *cmd, cmd->cmd.cmd_unload_ta.session_id = session_id; } -static int psp_ta_unload(struct psp_context *psp, struct ta_context *context) +int psp_ta_unload(struct psp_context *psp, struct ta_context *context) { int ret; struct psp_gfx_cmd_resp *cmd = acquire_psp_cmd_buf(psp); @@ -911,25 +943,21 @@ static void psp_prep_ta_load_cmd_buf(struct psp_gfx_cmd_resp *cmd, cmd->cmd.cmd_load_ta.cmd_buf_len = context->mem_context.shared_mem_size; } -static int psp_ta_init_shared_buf(struct psp_context *psp, +int psp_ta_init_shared_buf(struct psp_context *psp, struct ta_mem_context *mem_ctx) { - int ret; - /* * Allocate 16k memory aligned to 4k from Frame Buffer (local * physical) for ta to host memory */ - ret = amdgpu_bo_create_kernel(psp->adev, mem_ctx->shared_mem_size, + return amdgpu_bo_create_kernel(psp->adev, mem_ctx->shared_mem_size, PAGE_SIZE, AMDGPU_GEM_DOMAIN_VRAM, &mem_ctx->shared_bo, &mem_ctx->shared_mc_addr, &mem_ctx->shared_buf); - - return ret; } -static void psp_ta_free_shared_buf(struct ta_mem_context *mem_ctx) +void psp_ta_free_shared_buf(struct ta_mem_context *mem_ctx) { amdgpu_bo_free_kernel(&mem_ctx->shared_bo, &mem_ctx->shared_mc_addr, &mem_ctx->shared_buf); @@ -940,6 +968,42 @@ static int psp_xgmi_init_shared_buf(struct psp_context *psp) return psp_ta_init_shared_buf(psp, &psp->xgmi_context.context.mem_context); } +static void psp_prep_ta_invoke_indirect_cmd_buf(struct psp_gfx_cmd_resp *cmd, + uint32_t ta_cmd_id, + struct ta_context *context) +{ + cmd->cmd_id = GFX_CMD_ID_INVOKE_CMD; + cmd->cmd.cmd_invoke_cmd.session_id = context->session_id; + cmd->cmd.cmd_invoke_cmd.ta_cmd_id = ta_cmd_id; + + cmd->cmd.cmd_invoke_cmd.buf.num_desc = 1; + cmd->cmd.cmd_invoke_cmd.buf.total_size = context->mem_context.shared_mem_size; + cmd->cmd.cmd_invoke_cmd.buf.buf_desc[0].buf_size = context->mem_context.shared_mem_size; + cmd->cmd.cmd_invoke_cmd.buf.buf_desc[0].buf_phy_addr_lo = + lower_32_bits(context->mem_context.shared_mc_addr); + cmd->cmd.cmd_invoke_cmd.buf.buf_desc[0].buf_phy_addr_hi = + upper_32_bits(context->mem_context.shared_mc_addr); +} + +int psp_ta_invoke_indirect(struct psp_context *psp, + uint32_t ta_cmd_id, + struct ta_context *context) +{ + int ret; + struct psp_gfx_cmd_resp *cmd = acquire_psp_cmd_buf(psp); + + psp_prep_ta_invoke_indirect_cmd_buf(cmd, ta_cmd_id, context); + + ret = psp_cmd_submit_buf(psp, NULL, cmd, + psp->fence_buf_mc_addr); + + context->resp_status = cmd->resp.status; + + release_psp_cmd_buf(psp); + + return ret; +} + static void psp_prep_ta_invoke_cmd_buf(struct psp_gfx_cmd_resp *cmd, uint32_t ta_cmd_id, uint32_t session_id) @@ -949,7 +1013,7 @@ static void psp_prep_ta_invoke_cmd_buf(struct psp_gfx_cmd_resp *cmd, cmd->cmd.cmd_invoke_cmd.ta_cmd_id = ta_cmd_id; } -static int psp_ta_invoke(struct psp_context *psp, +int psp_ta_invoke(struct psp_context *psp, uint32_t ta_cmd_id, struct ta_context *context) { @@ -961,12 +1025,14 @@ static int psp_ta_invoke(struct psp_context *psp, ret = psp_cmd_submit_buf(psp, NULL, cmd, psp->fence_buf_mc_addr); + context->resp_status = cmd->resp.status; + release_psp_cmd_buf(psp); return ret; } -static int psp_ta_load(struct psp_context *psp, struct ta_context *context) +int psp_ta_load(struct psp_context *psp, struct ta_context *context) { int ret; struct psp_gfx_cmd_resp *cmd; @@ -981,6 +1047,8 @@ static int psp_ta_load(struct psp_context *psp, struct ta_context *context) ret = psp_cmd_submit_buf(psp, NULL, cmd, psp->fence_buf_mc_addr); + context->resp_status = cmd->resp.status; + if (!ret) { context->session_id = cmd->resp.session_id; } @@ -1308,6 +1376,11 @@ static void psp_ras_ta_check_status(struct psp_context *psp) break; case TA_RAS_STATUS__SUCCESS: break; + case TA_RAS_STATUS__TEE_ERROR_ACCESS_DENIED: + if (ras_cmd->cmd_id == TA_RAS_COMMAND__TRIGGER_ERROR) + dev_warn(psp->adev->dev, + "RAS WARNING: Inject error to critical region is not allowed\n"); + break; default: dev_warn(psp->adev->dev, "RAS WARNING: ras status = 0x%X\n", ras_cmd->ras_status); @@ -1381,7 +1454,7 @@ int psp_ras_enable_features(struct psp_context *psp, return 0; } -static int psp_ras_terminate(struct psp_context *psp) +int psp_ras_terminate(struct psp_context *psp) { int ret; @@ -1520,7 +1593,9 @@ int psp_ras_trigger_error(struct psp_context *psp, if (amdgpu_ras_intr_triggered()) return 0; - if (ras_cmd->ras_status) + if (ras_cmd->ras_status == TA_RAS_STATUS__TEE_ERROR_ACCESS_DENIED) + return -EACCES; + else if (ras_cmd->ras_status) return -EINVAL; return 0; @@ -2051,6 +2126,9 @@ static int psp_get_fw_type(struct amdgpu_firmware_info *ucode, enum psp_gfx_fw_type *type) { switch (ucode->ucode_id) { + case AMDGPU_UCODE_ID_CAP: + *type = GFX_FW_TYPE_CAP; + break; case AMDGPU_UCODE_ID_SDMA0: *type = GFX_FW_TYPE_SDMA0; break; @@ -2973,7 +3051,6 @@ static int psp_init_sos_base_fw(struct amdgpu_device *adev) adev->psp.sos.size_bytes = le32_to_cpu(sos_hdr->sos.size_bytes); adev->psp.sos.start_addr = ucode_array_start_addr + le32_to_cpu(sos_hdr->sos.offset_bytes); - adev->psp.xgmi_context.supports_extended_data = false; } else { /* Load alternate PSP SOS FW */ sos_hdr_v1_3 = (const struct psp_firmware_header_v1_3 *)adev->psp.sos_fw->data; @@ -2988,7 +3065,6 @@ static int psp_init_sos_base_fw(struct amdgpu_device *adev) adev->psp.sos.size_bytes = le32_to_cpu(sos_hdr_v1_3->sos_aux.size_bytes); adev->psp.sos.start_addr = ucode_array_start_addr + le32_to_cpu(sos_hdr_v1_3->sos_aux.offset_bytes); - adev->psp.xgmi_context.supports_extended_data = true; } if ((adev->psp.sys.size_bytes == 0) || (adev->psp.sos.size_bytes == 0)) { @@ -3217,6 +3293,58 @@ out: return err; } +int psp_init_cap_microcode(struct psp_context *psp, + const char *chip_name) +{ + struct amdgpu_device *adev = psp->adev; + char fw_name[PSP_FW_NAME_LEN]; + const struct psp_firmware_header_v1_0 *cap_hdr_v1_0; + struct amdgpu_firmware_info *info = NULL; + int err = 0; + + if (!chip_name) { + dev_err(adev->dev, "invalid chip name for cap microcode\n"); + return -EINVAL; + } + + if (!amdgpu_sriov_vf(adev)) { + dev_err(adev->dev, "cap microcode should only be loaded under SRIOV\n"); + return -EINVAL; + } + + snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_cap.bin", chip_name); + err = request_firmware(&adev->psp.cap_fw, fw_name, adev->dev); + if (err) { + dev_warn(adev->dev, "cap microcode does not exist, skip\n"); + err = 0; + goto out; + } + + err = amdgpu_ucode_validate(adev->psp.cap_fw); + if (err) { + dev_err(adev->dev, "fail to initialize cap microcode\n"); + goto out; + } + + info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CAP]; + info->ucode_id = AMDGPU_UCODE_ID_CAP; + info->fw = adev->psp.cap_fw; + cap_hdr_v1_0 = (const struct psp_firmware_header_v1_0 *) + adev->psp.cap_fw->data; + adev->firmware.fw_size += ALIGN( + le32_to_cpu(cap_hdr_v1_0->header.ucode_size_bytes), PAGE_SIZE); + adev->psp.cap_fw_version = le32_to_cpu(cap_hdr_v1_0->header.ucode_version); + adev->psp.cap_feature_version = le32_to_cpu(cap_hdr_v1_0->sos.fw_version); + adev->psp.cap_ucode_size = le32_to_cpu(cap_hdr_v1_0->header.ucode_size_bytes); + + return 0; + +out: + release_firmware(adev->psp.cap_fw); + adev->psp.cap_fw = NULL; + return err; +} + static int psp_set_clockgating_state(void *handle, enum amd_clockgating_state state) { diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.h index f29afabbff1f..cf8d3199b35b 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.h @@ -48,6 +48,17 @@ enum psp_shared_mem_size { PSP_SECUREDISPLAY_SHARED_MEM_SIZE = 0x4000, }; +enum ta_type_id { + TA_TYPE_XGMI = 1, + TA_TYPE_RAS, + TA_TYPE_HDCP, + TA_TYPE_DTM, + TA_TYPE_RAP, + TA_TYPE_SECUREDISPLAY, + + TA_TYPE_MAX_INDEX, +}; + struct psp_context; struct psp_xgmi_node_info; struct psp_xgmi_topology_info; @@ -151,9 +162,11 @@ struct ta_mem_context { struct ta_context { bool initialized; uint32_t session_id; + uint32_t resp_status; struct ta_mem_context mem_context; struct psp_bin_desc bin_desc; enum psp_gfx_cmd_id ta_load_type; + enum ta_type_id ta_type; }; struct ta_cp_context { @@ -306,6 +319,9 @@ struct psp_context /* toc firmware */ const struct firmware *toc_fw; + /* cap firmware */ + const struct firmware *cap_fw; + /* fence buffer */ struct amdgpu_bo *fence_buf_bo; uint64_t fence_buf_mc_addr; @@ -327,6 +343,10 @@ struct psp_context const struct firmware *ta_fw; uint32_t ta_fw_version; + uint32_t cap_fw_version; + uint32_t cap_feature_version; + uint32_t cap_ucode_size; + struct ta_context asd_context; struct psp_xgmi_context xgmi_context; struct psp_ras_context ras_context; @@ -400,6 +420,18 @@ int psp_gpu_reset(struct amdgpu_device *adev); int psp_update_vcn_sram(struct amdgpu_device *adev, int inst_idx, uint64_t cmd_gpu_addr, int cmd_size); +int psp_ta_init_shared_buf(struct psp_context *psp, + struct ta_mem_context *mem_ctx); +void psp_ta_free_shared_buf(struct ta_mem_context *mem_ctx); +int psp_ta_unload(struct psp_context *psp, struct ta_context *context); +int psp_ta_load(struct psp_context *psp, struct ta_context *context); +int psp_ta_invoke(struct psp_context *psp, + uint32_t ta_cmd_id, + struct ta_context *context); +int psp_ta_invoke_indirect(struct psp_context *psp, + uint32_t ta_cmd_id, + struct ta_context *context); + int psp_xgmi_initialize(struct psp_context *psp, bool set_extended_data, bool load_ta); int psp_xgmi_terminate(struct psp_context *psp); int psp_xgmi_invoke(struct psp_context *psp, uint32_t ta_cmd_id); @@ -418,6 +450,7 @@ int psp_ras_enable_features(struct psp_context *psp, union ta_ras_cmd_input *info, bool enable); int psp_ras_trigger_error(struct psp_context *psp, struct ta_ras_trigger_error_input *info); +int psp_ras_terminate(struct psp_context *psp); int psp_hdcp_invoke(struct psp_context *psp, uint32_t ta_cmd_id); int psp_dtm_invoke(struct psp_context *psp, uint32_t ta_cmd_id); @@ -440,6 +473,8 @@ int psp_init_sos_microcode(struct psp_context *psp, const char *chip_name); int psp_init_ta_microcode(struct psp_context *psp, const char *chip_name); +int psp_init_cap_microcode(struct psp_context *psp, + const char *chip_name); int psp_get_fw_attestation_records_addr(struct psp_context *psp, uint64_t *output_ptr); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp_ta.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp_ta.c new file mode 100644 index 000000000000..0988e00612e5 --- /dev/null +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp_ta.c @@ -0,0 +1,304 @@ +/* + * Copyright 2022 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + */ + +#include "amdgpu.h" +#include "amdgpu_psp_ta.h" + +#if defined(CONFIG_DEBUG_FS) + +static ssize_t ta_if_load_debugfs_write(struct file *fp, const char *buf, + size_t len, loff_t *off); +static ssize_t ta_if_unload_debugfs_write(struct file *fp, const char *buf, + size_t len, loff_t *off); +static ssize_t ta_if_invoke_debugfs_write(struct file *fp, const char *buf, + size_t len, loff_t *off); + +static uint32_t get_bin_version(const uint8_t *bin) +{ + const struct common_firmware_header *hdr = + (const struct common_firmware_header *)bin; + + return hdr->ucode_version; +} + +static void prep_ta_mem_context(struct psp_context *psp, + struct ta_context *context, + uint8_t *shared_buf, + uint32_t shared_buf_len) +{ + context->mem_context.shared_mem_size = PAGE_ALIGN(shared_buf_len); + psp_ta_init_shared_buf(psp, &context->mem_context); + + memcpy((void *)context->mem_context.shared_buf, shared_buf, shared_buf_len); +} + +static bool is_ta_type_valid(enum ta_type_id ta_type) +{ + bool ret = false; + + switch (ta_type) { + case TA_TYPE_RAS: + ret = true; + break; + default: + break; + } + + return ret; +} + +static const struct file_operations ta_load_debugfs_fops = { + .write = ta_if_load_debugfs_write, + .llseek = default_llseek, + .owner = THIS_MODULE +}; + +static const struct file_operations ta_unload_debugfs_fops = { + .write = ta_if_unload_debugfs_write, + .llseek = default_llseek, + .owner = THIS_MODULE +}; + +static const struct file_operations ta_invoke_debugfs_fops = { + .write = ta_if_invoke_debugfs_write, + .llseek = default_llseek, + .owner = THIS_MODULE +}; + + +/** + * DOC: AMDGPU TA debugfs interfaces + * + * Three debugfs interfaces can be opened by a program to + * load/invoke/unload TA, + * + * - /sys/kernel/debug/dri/<N>/ta_if/ta_load + * - /sys/kernel/debug/dri/<N>/ta_if/ta_invoke + * - /sys/kernel/debug/dri/<N>/ta_if/ta_unload + * + * How to use the interfaces in a program? + * + * A program needs to provide transmit buffer to the interfaces + * and will receive buffer from the interfaces below, + * + * - For TA load debugfs interface: + * Transmit buffer: + * - TA type (4bytes) + * - TA bin length (4bytes) + * - TA bin + * Receive buffer: + * - TA ID (4bytes) + * + * - For TA invoke debugfs interface: + * Transmit buffer: + * - TA ID (4bytes) + * - TA CMD ID (4bytes) + * - TA shard buf length (4bytes) + * - TA shared buf + * Receive buffer: + * - TA shared buf + * + * - For TA unload debugfs interface: + * Transmit buffer: + * - TA ID (4bytes) + */ + +static ssize_t ta_if_load_debugfs_write(struct file *fp, const char *buf, size_t len, loff_t *off) +{ + uint32_t ta_type = 0; + uint32_t ta_bin_len = 0; + uint8_t *ta_bin = NULL; + uint32_t copy_pos = 0; + int ret = 0; + + struct amdgpu_device *adev = (struct amdgpu_device *)file_inode(fp)->i_private; + struct psp_context *psp = &adev->psp; + struct ta_context context = {0}; + + if (!buf) + return -EINVAL; + + ret = copy_from_user((void *)&ta_type, &buf[copy_pos], sizeof(uint32_t)); + if (ret || (!is_ta_type_valid(ta_type))) + return -EINVAL; + + copy_pos += sizeof(uint32_t); + + ret = copy_from_user((void *)&ta_bin_len, &buf[copy_pos], sizeof(uint32_t)); + if (ret) + return -EINVAL; + + copy_pos += sizeof(uint32_t); + + ta_bin = kzalloc(ta_bin_len, GFP_KERNEL); + if (!ta_bin) + ret = -ENOMEM; + if (copy_from_user((void *)ta_bin, &buf[copy_pos], ta_bin_len)) { + ret = -EFAULT; + goto err_free_bin; + } + + ret = psp_ras_terminate(psp); + if (ret) { + dev_err(adev->dev, "Failed to unload embedded RAS TA\n"); + goto err_free_bin; + } + + context.ta_type = ta_type; + context.ta_load_type = GFX_CMD_ID_LOAD_TA; + context.bin_desc.fw_version = get_bin_version(ta_bin); + context.bin_desc.size_bytes = ta_bin_len; + context.bin_desc.start_addr = ta_bin; + + ret = psp_ta_load(psp, &context); + + if (ret || context.resp_status) { + dev_err(adev->dev, "TA load via debugfs failed (%d) status %d\n", + ret, context.resp_status); + if (!ret) + ret = -EINVAL; + goto err_free_bin; + } + + context.initialized = true; + if (copy_to_user((char *)buf, (void *)&context.session_id, sizeof(uint32_t))) + ret = -EFAULT; + +err_free_bin: + kfree(ta_bin); + + return ret; +} + +static ssize_t ta_if_unload_debugfs_write(struct file *fp, const char *buf, size_t len, loff_t *off) +{ + uint32_t ta_id = 0; + int ret = 0; + + struct amdgpu_device *adev = (struct amdgpu_device *)file_inode(fp)->i_private; + struct psp_context *psp = &adev->psp; + struct ta_context context = {0}; + + if (!buf) + return -EINVAL; + + ret = copy_from_user((void *)&ta_id, buf, sizeof(uint32_t)); + if (ret) + return -EINVAL; + + context.session_id = ta_id; + + ret = psp_ta_unload(psp, &context); + if (!ret) + context.initialized = false; + + return ret; +} + +static ssize_t ta_if_invoke_debugfs_write(struct file *fp, const char *buf, size_t len, loff_t *off) +{ + uint32_t ta_id = 0; + uint32_t cmd_id = 0; + uint32_t shared_buf_len = 0; + uint8_t *shared_buf = NULL; + uint32_t copy_pos = 0; + int ret = 0; + + struct amdgpu_device *adev = (struct amdgpu_device *)file_inode(fp)->i_private; + struct psp_context *psp = &adev->psp; + struct ta_context context = {0}; + + if (!buf) + return -EINVAL; + + ret = copy_from_user((void *)&ta_id, &buf[copy_pos], sizeof(uint32_t)); + if (ret) + return -EINVAL; + copy_pos += sizeof(uint32_t); + + ret = copy_from_user((void *)&cmd_id, &buf[copy_pos], sizeof(uint32_t)); + if (ret) + return -EINVAL; + copy_pos += sizeof(uint32_t); + + ret = copy_from_user((void *)&shared_buf_len, &buf[copy_pos], sizeof(uint32_t)); + if (ret) + return -EINVAL; + copy_pos += sizeof(uint32_t); + + shared_buf = kzalloc(shared_buf_len, GFP_KERNEL); + if (!shared_buf) + return -ENOMEM; + if (copy_from_user((void *)shared_buf, &buf[copy_pos], shared_buf_len)) { + ret = -EFAULT; + goto err_free_shared_buf; + } + + context.session_id = ta_id; + + prep_ta_mem_context(psp, &context, shared_buf, shared_buf_len); + + ret = psp_ta_invoke_indirect(psp, cmd_id, &context); + + if (ret || context.resp_status) { + dev_err(adev->dev, "TA invoke via debugfs failed (%d) status %d\n", + ret, context.resp_status); + if (!ret) + ret = -EINVAL; + goto err_free_ta_shared_buf; + } + + if (copy_to_user((char *)buf, context.mem_context.shared_buf, shared_buf_len)) + ret = -EFAULT; + +err_free_ta_shared_buf: + psp_ta_free_shared_buf(&context.mem_context); + +err_free_shared_buf: + kfree(shared_buf); + + return ret; +} + +void amdgpu_ta_if_debugfs_init(struct amdgpu_device *adev) +{ + struct drm_minor *minor = adev_to_drm(adev)->primary; + + struct dentry *dir = debugfs_create_dir("ta_if", minor->debugfs_root); + + debugfs_create_file("ta_load", 0200, dir, adev, + &ta_load_debugfs_fops); + + debugfs_create_file("ta_unload", 0200, dir, + adev, &ta_unload_debugfs_fops); + + debugfs_create_file("ta_invoke", 0200, dir, + adev, &ta_invoke_debugfs_fops); +} + +#else +void amdgpu_ta_if_debugfs_init(struct amdgpu_device *adev) +{ + +} +#endif diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp_ta.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp_ta.h new file mode 100644 index 000000000000..cfc1542f63ef --- /dev/null +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp_ta.h @@ -0,0 +1,29 @@ +/* + * Copyright 2022 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + */ + +#ifndef __AMDGPU_PSP_TA_H__ +#define __AMDGPU_PSP_TA_H__ + +void amdgpu_ta_if_debugfs_init(struct amdgpu_device *adev); + +#endif diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c index 8f47c14ecbc7..7e126dff004f 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c @@ -66,6 +66,8 @@ const char *ras_block_string[] = { "mp1", "fuse", "mca", + "vcn", + "jpeg", }; const char *ras_mca_block_string[] = { @@ -75,6 +77,13 @@ const char *ras_mca_block_string[] = { "mca_iohc", }; +struct amdgpu_ras_block_list { + /* ras block link */ + struct list_head node; + + struct amdgpu_ras_block_object *ras_obj; +}; + const char *get_ras_block_str(struct ras_common_if *ras_block) { if (!ras_block) @@ -89,6 +98,9 @@ const char *get_ras_block_str(struct ras_common_if *ras_block) return ras_block_string[ras_block->block]; } +#define ras_block_str(_BLOCK_) \ + (((_BLOCK_) < ARRAY_SIZE(ras_block_string)) ? ras_block_string[_BLOCK_] : "Out Of Range") + #define ras_err_str(i) (ras_error_string[ffs(i)]) #define RAS_DEFAULT_FLAGS (AMDGPU_RAS_FLAG_INIT_BY_VBIOS) @@ -155,14 +167,9 @@ static int amdgpu_reserve_page_direct(struct amdgpu_device *adev, uint64_t addre } memset(&err_rec, 0x0, sizeof(struct eeprom_table_record)); - - err_rec.address = address; - err_rec.retired_page = address >> AMDGPU_GPU_PAGE_SHIFT; - err_rec.ts = (uint64_t)ktime_get_real_seconds(); - err_rec.err_type = AMDGPU_RAS_EEPROM_ERR_NON_RECOVERABLE; - err_data.err_addr = &err_rec; - err_data.err_addr_cnt = 1; + amdgpu_umc_fill_error_record(&err_data, address, + (address >> AMDGPU_GPU_PAGE_SHIFT), 0, 0); if (amdgpu_bad_page_threshold != 0) { amdgpu_ras_add_bad_pages(adev, err_data.err_addr, @@ -452,7 +459,7 @@ static ssize_t amdgpu_ras_debugfs_ctrl_write(struct file *f, } if (ret) - return -EINVAL; + return ret; return size; } @@ -866,30 +873,47 @@ static int amdgpu_ras_enable_all_features(struct amdgpu_device *adev, } /* feature ctl end */ +static int amdgpu_ras_block_match_default(struct amdgpu_ras_block_object *block_obj, + enum amdgpu_ras_block block) +{ + if (!block_obj) + return -EINVAL; + + if (block_obj->ras_comm.block == block) + return 0; + + return -EINVAL; +} -static void amdgpu_ras_mca_query_error_status(struct amdgpu_device *adev, - struct ras_common_if *ras_block, - struct ras_err_data *err_data) +static struct amdgpu_ras_block_object *amdgpu_ras_get_ras_block(struct amdgpu_device *adev, + enum amdgpu_ras_block block, uint32_t sub_block_index) { - switch (ras_block->sub_block_index) { - case AMDGPU_RAS_MCA_BLOCK__MP0: - if (adev->mca.mp0.ras_funcs && - adev->mca.mp0.ras_funcs->query_ras_error_count) - adev->mca.mp0.ras_funcs->query_ras_error_count(adev, &err_data); - break; - case AMDGPU_RAS_MCA_BLOCK__MP1: - if (adev->mca.mp1.ras_funcs && - adev->mca.mp1.ras_funcs->query_ras_error_count) - adev->mca.mp1.ras_funcs->query_ras_error_count(adev, &err_data); - break; - case AMDGPU_RAS_MCA_BLOCK__MPIO: - if (adev->mca.mpio.ras_funcs && - adev->mca.mpio.ras_funcs->query_ras_error_count) - adev->mca.mpio.ras_funcs->query_ras_error_count(adev, &err_data); - break; - default: - break; + struct amdgpu_ras_block_list *node, *tmp; + struct amdgpu_ras_block_object *obj; + + if (block >= AMDGPU_RAS_BLOCK__LAST) + return NULL; + + if (!amdgpu_ras_is_supported(adev, block)) + return NULL; + + list_for_each_entry_safe(node, tmp, &adev->ras_list, node) { + if (!node->ras_obj) { + dev_warn(adev->dev, "Warning: abnormal ras list node.\n"); + continue; + } + + obj = node->ras_obj; + if (obj->ras_block_match) { + if (obj->ras_block_match(obj, block, sub_block_index) == 0) + return obj; + } else { + if (amdgpu_ras_block_match_default(obj, block) == 0) + return obj; + } } + + return NULL; } static void amdgpu_ras_get_ecc_info(struct amdgpu_device *adev, struct ras_err_data *err_data) @@ -901,26 +925,26 @@ static void amdgpu_ras_get_ecc_info(struct amdgpu_device *adev, struct ras_err_d * choosing right query method according to * whether smu support query error information */ - ret = smu_get_ecc_info(&adev->smu, (void *)&(ras->umc_ecc)); + ret = amdgpu_dpm_get_ecc_info(adev, (void *)&(ras->umc_ecc)); if (ret == -EOPNOTSUPP) { - if (adev->umc.ras_funcs && - adev->umc.ras_funcs->query_ras_error_count) - adev->umc.ras_funcs->query_ras_error_count(adev, err_data); + if (adev->umc.ras && adev->umc.ras->ras_block.hw_ops && + adev->umc.ras->ras_block.hw_ops->query_ras_error_count) + adev->umc.ras->ras_block.hw_ops->query_ras_error_count(adev, err_data); /* umc query_ras_error_address is also responsible for clearing * error status */ - if (adev->umc.ras_funcs && - adev->umc.ras_funcs->query_ras_error_address) - adev->umc.ras_funcs->query_ras_error_address(adev, err_data); + if (adev->umc.ras && adev->umc.ras->ras_block.hw_ops && + adev->umc.ras->ras_block.hw_ops->query_ras_error_address) + adev->umc.ras->ras_block.hw_ops->query_ras_error_address(adev, err_data); } else if (!ret) { - if (adev->umc.ras_funcs && - adev->umc.ras_funcs->ecc_info_query_ras_error_count) - adev->umc.ras_funcs->ecc_info_query_ras_error_count(adev, err_data); + if (adev->umc.ras && + adev->umc.ras->ecc_info_query_ras_error_count) + adev->umc.ras->ecc_info_query_ras_error_count(adev, err_data); - if (adev->umc.ras_funcs && - adev->umc.ras_funcs->ecc_info_query_ras_error_address) - adev->umc.ras_funcs->ecc_info_query_ras_error_address(adev, err_data); + if (adev->umc.ras && + adev->umc.ras->ecc_info_query_ras_error_address) + adev->umc.ras->ecc_info_query_ras_error_address(adev, err_data); } } @@ -928,62 +952,32 @@ static void amdgpu_ras_get_ecc_info(struct amdgpu_device *adev, struct ras_err_d int amdgpu_ras_query_error_status(struct amdgpu_device *adev, struct ras_query_if *info) { + struct amdgpu_ras_block_object *block_obj = NULL; struct ras_manager *obj = amdgpu_ras_find_obj(adev, &info->head); struct ras_err_data err_data = {0, 0, 0, NULL}; - int i; if (!obj) return -EINVAL; - switch (info->head.block) { - case AMDGPU_RAS_BLOCK__UMC: + if (info->head.block == AMDGPU_RAS_BLOCK__UMC) { amdgpu_ras_get_ecc_info(adev, &err_data); - break; - case AMDGPU_RAS_BLOCK__SDMA: - if (adev->sdma.funcs->query_ras_error_count) { - for (i = 0; i < adev->sdma.num_instances; i++) - adev->sdma.funcs->query_ras_error_count(adev, i, - &err_data); + } else { + block_obj = amdgpu_ras_get_ras_block(adev, info->head.block, 0); + if (!block_obj || !block_obj->hw_ops) { + dev_dbg_once(adev->dev, "%s doesn't config RAS function\n", + get_ras_block_str(&info->head)); + return -EINVAL; } - break; - case AMDGPU_RAS_BLOCK__GFX: - if (adev->gfx.ras_funcs && - adev->gfx.ras_funcs->query_ras_error_count) - adev->gfx.ras_funcs->query_ras_error_count(adev, &err_data); - - if (adev->gfx.ras_funcs && - adev->gfx.ras_funcs->query_ras_error_status) - adev->gfx.ras_funcs->query_ras_error_status(adev); - break; - case AMDGPU_RAS_BLOCK__MMHUB: - if (adev->mmhub.ras_funcs && - adev->mmhub.ras_funcs->query_ras_error_count) - adev->mmhub.ras_funcs->query_ras_error_count(adev, &err_data); - - if (adev->mmhub.ras_funcs && - adev->mmhub.ras_funcs->query_ras_error_status) - adev->mmhub.ras_funcs->query_ras_error_status(adev); - break; - case AMDGPU_RAS_BLOCK__PCIE_BIF: - if (adev->nbio.ras_funcs && - adev->nbio.ras_funcs->query_ras_error_count) - adev->nbio.ras_funcs->query_ras_error_count(adev, &err_data); - break; - case AMDGPU_RAS_BLOCK__XGMI_WAFL: - if (adev->gmc.xgmi.ras_funcs && - adev->gmc.xgmi.ras_funcs->query_ras_error_count) - adev->gmc.xgmi.ras_funcs->query_ras_error_count(adev, &err_data); - break; - case AMDGPU_RAS_BLOCK__HDP: - if (adev->hdp.ras_funcs && - adev->hdp.ras_funcs->query_ras_error_count) - adev->hdp.ras_funcs->query_ras_error_count(adev, &err_data); - break; - case AMDGPU_RAS_BLOCK__MCA: - amdgpu_ras_mca_query_error_status(adev, &info->head, &err_data); - break; - default: - break; + + if (block_obj->hw_ops->query_ras_error_count) + block_obj->hw_ops->query_ras_error_count(adev, &err_data); + + if ((info->head.block == AMDGPU_RAS_BLOCK__SDMA) || + (info->head.block == AMDGPU_RAS_BLOCK__GFX) || + (info->head.block == AMDGPU_RAS_BLOCK__MMHUB)) { + if (block_obj->hw_ops->query_ras_error_status) + block_obj->hw_ops->query_ras_error_status(adev); + } } obj->err_data.ue_count += err_data.ue_count; @@ -1040,68 +1034,27 @@ int amdgpu_ras_query_error_status(struct amdgpu_device *adev, int amdgpu_ras_reset_error_status(struct amdgpu_device *adev, enum amdgpu_ras_block block) { + struct amdgpu_ras_block_object *block_obj = amdgpu_ras_get_ras_block(adev, block, 0); + if (!amdgpu_ras_is_supported(adev, block)) return -EINVAL; - switch (block) { - case AMDGPU_RAS_BLOCK__GFX: - if (adev->gfx.ras_funcs && - adev->gfx.ras_funcs->reset_ras_error_count) - adev->gfx.ras_funcs->reset_ras_error_count(adev); - - if (adev->gfx.ras_funcs && - adev->gfx.ras_funcs->reset_ras_error_status) - adev->gfx.ras_funcs->reset_ras_error_status(adev); - break; - case AMDGPU_RAS_BLOCK__MMHUB: - if (adev->mmhub.ras_funcs && - adev->mmhub.ras_funcs->reset_ras_error_count) - adev->mmhub.ras_funcs->reset_ras_error_count(adev); - - if (adev->mmhub.ras_funcs && - adev->mmhub.ras_funcs->reset_ras_error_status) - adev->mmhub.ras_funcs->reset_ras_error_status(adev); - break; - case AMDGPU_RAS_BLOCK__SDMA: - if (adev->sdma.funcs->reset_ras_error_count) - adev->sdma.funcs->reset_ras_error_count(adev); - break; - case AMDGPU_RAS_BLOCK__HDP: - if (adev->hdp.ras_funcs && - adev->hdp.ras_funcs->reset_ras_error_count) - adev->hdp.ras_funcs->reset_ras_error_count(adev); - break; - default: - break; + if (!block_obj || !block_obj->hw_ops) { + dev_dbg_once(adev->dev, "%s doesn't config RAS function\n", + ras_block_str(block)); + return -EINVAL; } - return 0; -} - -/* Trigger XGMI/WAFL error */ -static int amdgpu_ras_error_inject_xgmi(struct amdgpu_device *adev, - struct ta_ras_trigger_error_input *block_info) -{ - int ret; - - if (amdgpu_dpm_set_df_cstate(adev, DF_CSTATE_DISALLOW)) - dev_warn(adev->dev, "Failed to disallow df cstate"); - - if (amdgpu_dpm_allow_xgmi_power_down(adev, false)) - dev_warn(adev->dev, "Failed to disallow XGMI power down"); - - ret = psp_ras_trigger_error(&adev->psp, block_info); + if (block_obj->hw_ops->reset_ras_error_count) + block_obj->hw_ops->reset_ras_error_count(adev); - if (amdgpu_ras_intr_triggered()) - return ret; - - if (amdgpu_dpm_allow_xgmi_power_down(adev, true)) - dev_warn(adev->dev, "Failed to allow XGMI power down"); - - if (amdgpu_dpm_set_df_cstate(adev, DF_CSTATE_ALLOW)) - dev_warn(adev->dev, "Failed to allow df cstate"); + if ((block == AMDGPU_RAS_BLOCK__GFX) || + (block == AMDGPU_RAS_BLOCK__MMHUB)) { + if (block_obj->hw_ops->reset_ras_error_status) + block_obj->hw_ops->reset_ras_error_status(adev); + } - return ret; + return 0; } /* wrapper of psp_ras_trigger_error */ @@ -1116,11 +1069,20 @@ int amdgpu_ras_error_inject(struct amdgpu_device *adev, .address = info->address, .value = info->value, }; - int ret = 0; + int ret = -EINVAL; + struct amdgpu_ras_block_object *block_obj = amdgpu_ras_get_ras_block(adev, + info->head.block, + info->head.sub_block_index); if (!obj) return -EINVAL; + if (!block_obj || !block_obj->hw_ops) { + dev_dbg_once(adev->dev, "%s doesn't config RAS function\n", + get_ras_block_str(&info->head)); + return -EINVAL; + } + /* Calculate XGMI relative offset */ if (adev->gmc.xgmi.num_physical_nodes > 1) { block_info.address = @@ -1128,28 +1090,15 @@ int amdgpu_ras_error_inject(struct amdgpu_device *adev, block_info.address); } - switch (info->head.block) { - case AMDGPU_RAS_BLOCK__GFX: - if (adev->gfx.ras_funcs && - adev->gfx.ras_funcs->ras_error_inject) - ret = adev->gfx.ras_funcs->ras_error_inject(adev, info); - else - ret = -EINVAL; - break; - case AMDGPU_RAS_BLOCK__UMC: - case AMDGPU_RAS_BLOCK__SDMA: - case AMDGPU_RAS_BLOCK__MMHUB: - case AMDGPU_RAS_BLOCK__PCIE_BIF: - case AMDGPU_RAS_BLOCK__MCA: - ret = psp_ras_trigger_error(&adev->psp, &block_info); - break; - case AMDGPU_RAS_BLOCK__XGMI_WAFL: - ret = amdgpu_ras_error_inject_xgmi(adev, &block_info); - break; - default: - dev_info(adev->dev, "%s error injection is not supported yet\n", - get_ras_block_str(&info->head)); - ret = -EINVAL; + if (info->head.block == AMDGPU_RAS_BLOCK__GFX) { + if (block_obj->hw_ops->ras_error_inject) + ret = block_obj->hw_ops->ras_error_inject(adev, info); + } else { + /* If defined special ras_error_inject(e.g: xgmi), implement special ras_error_inject */ + if (block_obj->hw_ops->ras_error_inject) + ret = block_obj->hw_ops->ras_error_inject(adev, &block_info); + else /*If not defined .ras_error_inject, use default ras_error_inject*/ + ret = psp_ras_trigger_error(&adev->psp, &block_info); } if (ret) @@ -1329,18 +1278,17 @@ static int amdgpu_ras_sysfs_remove_feature_node(struct amdgpu_device *adev) } int amdgpu_ras_sysfs_create(struct amdgpu_device *adev, - struct ras_fs_if *head) + struct ras_common_if *head) { - struct ras_manager *obj = amdgpu_ras_find_obj(adev, &head->head); + struct ras_manager *obj = amdgpu_ras_find_obj(adev, head); if (!obj || obj->attr_inuse) return -EINVAL; get_obj(obj); - memcpy(obj->fs_data.sysfs_name, - head->sysfs_name, - sizeof(obj->fs_data.sysfs_name)); + snprintf(obj->fs_data.sysfs_name, sizeof(obj->fs_data.sysfs_name), + "%s_err_count", head->name); obj->sysfs_attr = (struct device_attribute){ .attr = { @@ -1567,12 +1515,97 @@ static int amdgpu_ras_fs_fini(struct amdgpu_device *adev) /* ras fs end */ /* ih begin */ + +/* For the hardware that cannot enable bif ring for both ras_controller_irq + * and ras_err_evnet_athub_irq ih cookies, the driver has to poll status + * register to check whether the interrupt is triggered or not, and properly + * ack the interrupt if it is there + */ +void amdgpu_ras_interrupt_fatal_error_handler(struct amdgpu_device *adev) +{ + if (!amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__PCIE_BIF)) + return; + + if (adev->nbio.ras && + adev->nbio.ras->handle_ras_controller_intr_no_bifring) + adev->nbio.ras->handle_ras_controller_intr_no_bifring(adev); + + if (adev->nbio.ras && + adev->nbio.ras->handle_ras_err_event_athub_intr_no_bifring) + adev->nbio.ras->handle_ras_err_event_athub_intr_no_bifring(adev); +} + +static void amdgpu_ras_interrupt_poison_consumption_handler(struct ras_manager *obj, + struct amdgpu_iv_entry *entry) +{ + bool poison_stat = true, need_reset = true; + struct amdgpu_device *adev = obj->adev; + struct ras_err_data err_data = {0, 0, 0, NULL}; + struct amdgpu_ras_block_object *block_obj = + amdgpu_ras_get_ras_block(adev, obj->head.block, 0); + + if (!adev->gmc.xgmi.connected_to_cpu) + amdgpu_umc_poison_handler(adev, &err_data, false); + + /* both query_poison_status and handle_poison_consumption are optional */ + if (block_obj && block_obj->hw_ops) { + if (block_obj->hw_ops->query_poison_status) { + poison_stat = block_obj->hw_ops->query_poison_status(adev); + if (!poison_stat) + dev_info(adev->dev, "No RAS poison status in %s poison IH.\n", + block_obj->ras_comm.name); + } + + if (poison_stat && block_obj->hw_ops->handle_poison_consumption) { + poison_stat = block_obj->hw_ops->handle_poison_consumption(adev); + need_reset = poison_stat; + } + } + + /* gpu reset is fallback for all failed cases */ + if (need_reset) + amdgpu_ras_reset_gpu(adev); +} + +static void amdgpu_ras_interrupt_poison_creation_handler(struct ras_manager *obj, + struct amdgpu_iv_entry *entry) +{ + dev_info(obj->adev->dev, + "Poison is created, no user action is needed.\n"); +} + +static void amdgpu_ras_interrupt_umc_handler(struct ras_manager *obj, + struct amdgpu_iv_entry *entry) +{ + struct ras_ih_data *data = &obj->ih_data; + struct ras_err_data err_data = {0, 0, 0, NULL}; + int ret; + + if (!data->cb) + return; + + /* Let IP handle its data, maybe we need get the output + * from the callback to update the error type/count, etc + */ + ret = data->cb(obj->adev, &err_data, entry); + /* ue will trigger an interrupt, and in that case + * we need do a reset to recovery the whole system. + * But leave IP do that recovery, here we just dispatch + * the error. + */ + if (ret == AMDGPU_RAS_SUCCESS) { + /* these counts could be left as 0 if + * some blocks do not count error number + */ + obj->err_data.ue_count += err_data.ue_count; + obj->err_data.ce_count += err_data.ce_count; + } +} + static void amdgpu_ras_interrupt_handler(struct ras_manager *obj) { struct ras_ih_data *data = &obj->ih_data; struct amdgpu_iv_entry entry; - int ret; - struct ras_err_data err_data = {0, 0, 0, NULL}; while (data->rptr != data->wptr) { rmb(); @@ -1583,30 +1616,17 @@ static void amdgpu_ras_interrupt_handler(struct ras_manager *obj) data->rptr = (data->aligned_element_size + data->rptr) % data->ring_size; - if (data->cb) { - if (amdgpu_ras_is_poison_mode_supported(obj->adev) && - obj->head.block == AMDGPU_RAS_BLOCK__UMC) - dev_info(obj->adev->dev, - "Poison is created, no user action is needed.\n"); - else { - /* Let IP handle its data, maybe we need get the output - * from the callback to udpate the error type/count, etc - */ - memset(&err_data, 0, sizeof(err_data)); - ret = data->cb(obj->adev, &err_data, &entry); - /* ue will trigger an interrupt, and in that case - * we need do a reset to recovery the whole system. - * But leave IP do that recovery, here we just dispatch - * the error. - */ - if (ret == AMDGPU_RAS_SUCCESS) { - /* these counts could be left as 0 if - * some blocks do not count error number - */ - obj->err_data.ue_count += err_data.ue_count; - obj->err_data.ce_count += err_data.ce_count; - } - } + if (amdgpu_ras_is_poison_mode_supported(obj->adev)) { + if (obj->head.block == AMDGPU_RAS_BLOCK__UMC) + amdgpu_ras_interrupt_poison_creation_handler(obj, &entry); + else + amdgpu_ras_interrupt_poison_consumption_handler(obj, &entry); + } else { + if (obj->head.block == AMDGPU_RAS_BLOCK__UMC) + amdgpu_ras_interrupt_umc_handler(obj, &entry); + else + dev_warn(obj->adev->dev, + "No RAS interrupt handler for non-UMC block with poison disabled.\n"); } } } @@ -1647,9 +1667,9 @@ int amdgpu_ras_interrupt_dispatch(struct amdgpu_device *adev, } int amdgpu_ras_interrupt_remove_handler(struct amdgpu_device *adev, - struct ras_ih_if *info) + struct ras_common_if *head) { - struct ras_manager *obj = amdgpu_ras_find_obj(adev, &info->head); + struct ras_manager *obj = amdgpu_ras_find_obj(adev, head); struct ras_ih_data *data; if (!obj) @@ -1669,24 +1689,27 @@ int amdgpu_ras_interrupt_remove_handler(struct amdgpu_device *adev, } int amdgpu_ras_interrupt_add_handler(struct amdgpu_device *adev, - struct ras_ih_if *info) + struct ras_common_if *head) { - struct ras_manager *obj = amdgpu_ras_find_obj(adev, &info->head); + struct ras_manager *obj = amdgpu_ras_find_obj(adev, head); struct ras_ih_data *data; + struct amdgpu_ras_block_object *ras_obj; if (!obj) { /* in case we registe the IH before enable ras feature */ - obj = amdgpu_ras_create_obj(adev, &info->head); + obj = amdgpu_ras_create_obj(adev, head); if (!obj) return -EINVAL; } else get_obj(obj); + ras_obj = container_of(head, struct amdgpu_ras_block_object, ras_comm); + data = &obj->ih_data; /* add the callback.etc */ *data = (struct ras_ih_data) { .inuse = 0, - .cb = info->cb, + .cb = ras_obj->ras_cb, .element_size = sizeof(struct amdgpu_iv_entry), .rptr = 0, .wptr = 0, @@ -1715,10 +1738,7 @@ static int amdgpu_ras_interrupt_remove_all(struct amdgpu_device *adev) struct ras_manager *obj, *tmp; list_for_each_entry_safe(obj, tmp, &con->head, node) { - struct ras_ih_if info = { - .head = obj->head, - }; - amdgpu_ras_interrupt_remove_handler(adev, &info); + amdgpu_ras_interrupt_remove_handler(adev, &obj->head); } return 0; @@ -1766,24 +1786,28 @@ static void amdgpu_ras_log_on_err_counter(struct amdgpu_device *adev) static void amdgpu_ras_error_status_query(struct amdgpu_device *adev, struct ras_query_if *info) { + struct amdgpu_ras_block_object *block_obj; /* * Only two block need to query read/write * RspStatus at current state */ - switch (info->head.block) { - case AMDGPU_RAS_BLOCK__GFX: - if (adev->gfx.ras_funcs && - adev->gfx.ras_funcs->query_ras_error_status) - adev->gfx.ras_funcs->query_ras_error_status(adev); - break; - case AMDGPU_RAS_BLOCK__MMHUB: - if (adev->mmhub.ras_funcs && - adev->mmhub.ras_funcs->query_ras_error_status) - adev->mmhub.ras_funcs->query_ras_error_status(adev); - break; - default: - break; + if ((info->head.block != AMDGPU_RAS_BLOCK__GFX) && + (info->head.block != AMDGPU_RAS_BLOCK__MMHUB)) + return; + + block_obj = amdgpu_ras_get_ras_block(adev, + info->head.block, + info->head.sub_block_index); + + if (!block_obj || !block_obj->hw_ops) { + dev_dbg_once(adev->dev, "%s doesn't config RAS function\n", + get_ras_block_str(&info->head)); + return; } + + if (block_obj->hw_ops->query_ras_error_status) + block_obj->hw_ops->query_ras_error_status(adev); + } static void amdgpu_ras_query_err_status(struct amdgpu_device *adev) @@ -1897,7 +1921,6 @@ static int amdgpu_ras_realloc_eh_data_space(struct amdgpu_device *adev, void *bps = kmalloc(align_space * sizeof(*data->bps), GFP_KERNEL); if (!bps) { - kfree(bps); return -ENOMEM; } @@ -2118,6 +2141,7 @@ int amdgpu_ras_recovery_init(struct amdgpu_device *adev) mutex_init(&con->recovery_lock); INIT_WORK(&con->recovery_work, amdgpu_ras_do_recovery); atomic_set(&con->in_recovery, 0); + con->eeprom_control.bad_channel_bitmap = 0; max_eeprom_records_count = amdgpu_ras_eeprom_max_record_count(); amdgpu_ras_validate_threshold(adev, max_eeprom_records_count); @@ -2141,8 +2165,12 @@ int amdgpu_ras_recovery_init(struct amdgpu_device *adev) if (ret) goto free; - if (adev->smu.ppt_funcs && adev->smu.ppt_funcs->send_hbm_bad_pages_num) - adev->smu.ppt_funcs->send_hbm_bad_pages_num(&adev->smu, con->eeprom_control.ras_num_recs); + amdgpu_dpm_send_hbm_bad_pages_num(adev, con->eeprom_control.ras_num_recs); + + if (con->update_channel_flag == true) { + amdgpu_dpm_send_hbm_bad_channel_flag(adev, con->eeprom_control.bad_channel_bitmap); + con->update_channel_flag = false; + } } #ifdef CONFIG_X86_MCE_AMD @@ -2250,6 +2278,13 @@ static void amdgpu_ras_check_supported(struct amdgpu_device *adev) dev_info(adev->dev, "SRAM ECC is active.\n"); adev->ras_hw_enabled |= ~(1 << AMDGPU_RAS_BLOCK__UMC | 1 << AMDGPU_RAS_BLOCK__DF); + + if (adev->ip_versions[VCN_HWIP][0] == IP_VERSION(2, 6, 0)) + adev->ras_hw_enabled |= (1 << AMDGPU_RAS_BLOCK__VCN | + 1 << AMDGPU_RAS_BLOCK__JPEG); + else + adev->ras_hw_enabled &= ~(1 << AMDGPU_RAS_BLOCK__VCN | + 1 << AMDGPU_RAS_BLOCK__JPEG); } else { dev_info(adev->dev, "SRAM ECC is not presented.\n"); } @@ -2336,6 +2371,7 @@ int amdgpu_ras_init(struct amdgpu_device *adev) goto release_con; } + con->update_channel_flag = false; con->features = 0; INIT_LIST_HEAD(&con->head); /* Might need get this flag from vbios. */ @@ -2348,24 +2384,27 @@ int amdgpu_ras_init(struct amdgpu_device *adev) case CHIP_VEGA20: case CHIP_ARCTURUS: case CHIP_ALDEBARAN: - if (!adev->gmc.xgmi.connected_to_cpu) - adev->nbio.ras_funcs = &nbio_v7_4_ras_funcs; + if (!adev->gmc.xgmi.connected_to_cpu) { + adev->nbio.ras = &nbio_v7_4_ras; + amdgpu_ras_register_ras_block(adev, &adev->nbio.ras->ras_block); + adev->nbio.ras_if = &adev->nbio.ras->ras_block.ras_comm; + } break; default: /* nbio ras is not available */ break; } - if (adev->nbio.ras_funcs && - adev->nbio.ras_funcs->init_ras_controller_interrupt) { - r = adev->nbio.ras_funcs->init_ras_controller_interrupt(adev); + if (adev->nbio.ras && + adev->nbio.ras->init_ras_controller_interrupt) { + r = adev->nbio.ras->init_ras_controller_interrupt(adev); if (r) goto release_con; } - if (adev->nbio.ras_funcs && - adev->nbio.ras_funcs->init_ras_err_event_athub_interrupt) { - r = adev->nbio.ras_funcs->init_ras_err_event_athub_interrupt(adev); + if (adev->nbio.ras && + adev->nbio.ras->init_ras_err_event_athub_interrupt) { + r = adev->nbio.ras->init_ras_err_event_athub_interrupt(adev); if (r) goto release_con; } @@ -2377,12 +2416,12 @@ int amdgpu_ras_init(struct amdgpu_device *adev) } else if (adev->df.funcs && adev->df.funcs->query_ras_poison_mode && - adev->umc.ras_funcs && - adev->umc.ras_funcs->query_ras_poison_mode) { + adev->umc.ras && + adev->umc.ras->query_ras_poison_mode) { df_poison = adev->df.funcs->query_ras_poison_mode(adev); umc_poison = - adev->umc.ras_funcs->query_ras_poison_mode(adev); + adev->umc.ras->query_ras_poison_mode(adev); /* Only poison is set in both DF and UMC, we can support it */ if (df_poison && umc_poison) con->poison_supported = true; @@ -2445,11 +2484,10 @@ bool amdgpu_ras_is_poison_mode_supported(struct amdgpu_device *adev) } /* helper function to handle common stuff in ip late init phase */ -int amdgpu_ras_late_init(struct amdgpu_device *adev, - struct ras_common_if *ras_block, - struct ras_fs_if *fs_info, - struct ras_ih_if *ih_info) +int amdgpu_ras_block_late_init(struct amdgpu_device *adev, + struct ras_common_if *ras_block) { + struct amdgpu_ras_block_object *ras_obj = NULL; struct amdgpu_ras *con = amdgpu_ras_get_context(adev); unsigned long ue_count, ce_count; int r; @@ -2477,15 +2515,16 @@ int amdgpu_ras_late_init(struct amdgpu_device *adev, if (adev->in_suspend || amdgpu_in_reset(adev)) return 0; - if (ih_info->cb) { - r = amdgpu_ras_interrupt_add_handler(adev, ih_info); + ras_obj = container_of(ras_block, struct amdgpu_ras_block_object, ras_comm); + if (ras_obj->ras_cb) { + r = amdgpu_ras_interrupt_add_handler(adev, ras_block); if (r) - goto interrupt; + goto cleanup; } - r = amdgpu_ras_sysfs_create(adev, fs_info); + r = amdgpu_ras_sysfs_create(adev, ras_block); if (r) - goto sysfs; + goto interrupt; /* Those are the cached values at init. */ @@ -2495,27 +2534,40 @@ int amdgpu_ras_late_init(struct amdgpu_device *adev, } return 0; -cleanup: - amdgpu_ras_sysfs_remove(adev, ras_block); -sysfs: - if (ih_info->cb) - amdgpu_ras_interrupt_remove_handler(adev, ih_info); + interrupt: + if (ras_obj->ras_cb) + amdgpu_ras_interrupt_remove_handler(adev, ras_block); +cleanup: amdgpu_ras_feature_enable(adev, ras_block, 0); return r; } +static int amdgpu_ras_block_late_init_default(struct amdgpu_device *adev, + struct ras_common_if *ras_block) +{ + return amdgpu_ras_block_late_init(adev, ras_block); +} + /* helper function to remove ras fs node and interrupt handler */ -void amdgpu_ras_late_fini(struct amdgpu_device *adev, - struct ras_common_if *ras_block, - struct ras_ih_if *ih_info) +void amdgpu_ras_block_late_fini(struct amdgpu_device *adev, + struct ras_common_if *ras_block) { - if (!ras_block || !ih_info) + struct amdgpu_ras_block_object *ras_obj; + if (!ras_block) return; amdgpu_ras_sysfs_remove(adev, ras_block); - if (ih_info->cb) - amdgpu_ras_interrupt_remove_handler(adev, ih_info); + + ras_obj = container_of(ras_block, struct amdgpu_ras_block_object, ras_comm); + if (ras_obj->ras_cb) + amdgpu_ras_interrupt_remove_handler(adev, ras_block); +} + +static void amdgpu_ras_block_late_fini_default(struct amdgpu_device *adev, + struct ras_common_if *ras_block) +{ + return amdgpu_ras_block_late_fini(adev, ras_block); } /* do some init work after IP late init as dependence. @@ -2568,6 +2620,33 @@ void amdgpu_ras_suspend(struct amdgpu_device *adev) amdgpu_ras_disable_all_features(adev, 1); } +int amdgpu_ras_late_init(struct amdgpu_device *adev) +{ + struct amdgpu_ras_block_list *node, *tmp; + struct amdgpu_ras_block_object *obj; + int r; + + list_for_each_entry_safe(node, tmp, &adev->ras_list, node) { + if (!node->ras_obj) { + dev_warn(adev->dev, "Warning: abnormal ras list node.\n"); + continue; + } + + obj = node->ras_obj; + if (obj->ras_late_init) { + r = obj->ras_late_init(adev, &obj->ras_comm); + if (r) { + dev_err(adev->dev, "%s failed to execute ras_late_init! ret:%d\n", + obj->ras_comm.name, r); + return r; + } + } else + amdgpu_ras_block_late_init_default(adev, &obj->ras_comm); + } + + return 0; +} + /* do some fini work before IP fini as dependence */ int amdgpu_ras_pre_fini(struct amdgpu_device *adev) { @@ -2585,11 +2664,28 @@ int amdgpu_ras_pre_fini(struct amdgpu_device *adev) int amdgpu_ras_fini(struct amdgpu_device *adev) { + struct amdgpu_ras_block_list *ras_node, *tmp; + struct amdgpu_ras_block_object *obj = NULL; struct amdgpu_ras *con = amdgpu_ras_get_context(adev); if (!adev->ras_enabled || !con) return 0; + list_for_each_entry_safe(ras_node, tmp, &adev->ras_list, node) { + if (ras_node->ras_obj) { + obj = ras_node->ras_obj; + if (amdgpu_ras_is_supported(adev, obj->ras_comm.block) && + obj->ras_fini) + obj->ras_fini(adev, &obj->ras_comm); + else + amdgpu_ras_block_late_fini_default(adev, &obj->ras_comm); + } + + /* Clear ras blocks from ras_list and free ras block list node */ + list_del(&ras_node->node); + kfree(ras_node); + } + amdgpu_ras_fs_fini(adev); amdgpu_ras_interrupt_remove_all(adev); @@ -2717,8 +2813,6 @@ static int amdgpu_bad_page_notifier(struct notifier_block *nb, dev_info(adev->dev, "Uncorrectable error detected in UMC inst: %d, chan_idx: %d", umc_inst, ch_inst); - memset(&err_rec, 0x0, sizeof(struct eeprom_table_record)); - /* * Translate UMC channel address to Physical address */ @@ -2730,16 +2824,10 @@ static int amdgpu_bad_page_notifier(struct notifier_block *nb, ADDR_OF_256B_BLOCK(channel_index) | OFFSET_IN_256B_BLOCK(m->addr); - err_rec.address = m->addr; - err_rec.retired_page = retired_page >> AMDGPU_GPU_PAGE_SHIFT; - err_rec.ts = (uint64_t)ktime_get_real_seconds(); - err_rec.err_type = AMDGPU_RAS_EEPROM_ERR_NON_RECOVERABLE; - err_rec.cu = 0; - err_rec.mem_channel = channel_index; - err_rec.mcumc_id = umc_inst; - + memset(&err_rec, 0x0, sizeof(struct eeprom_table_record)); err_data.err_addr = &err_rec; - err_data.err_addr_cnt = 1; + amdgpu_umc_fill_error_record(&err_data, m->addr, + retired_page, channel_index, umc_inst); if (amdgpu_bad_page_threshold != 0) { amdgpu_ras_add_bad_pages(adev, err_data.err_addr, @@ -2777,3 +2865,63 @@ static void amdgpu_register_bad_pages_mca_notifier(struct amdgpu_device *adev) } } #endif + +struct amdgpu_ras *amdgpu_ras_get_context(struct amdgpu_device *adev) +{ + if (!adev) + return NULL; + + return adev->psp.ras_context.ras; +} + +int amdgpu_ras_set_context(struct amdgpu_device *adev, struct amdgpu_ras *ras_con) +{ + if (!adev) + return -EINVAL; + + adev->psp.ras_context.ras = ras_con; + return 0; +} + +/* check if ras is supported on block, say, sdma, gfx */ +int amdgpu_ras_is_supported(struct amdgpu_device *adev, + unsigned int block) +{ + struct amdgpu_ras *ras = amdgpu_ras_get_context(adev); + + if (block >= AMDGPU_RAS_BLOCK_COUNT) + return 0; + return ras && (adev->ras_enabled & (1 << block)); +} + +int amdgpu_ras_reset_gpu(struct amdgpu_device *adev) +{ + struct amdgpu_ras *ras = amdgpu_ras_get_context(adev); + + if (atomic_cmpxchg(&ras->in_recovery, 0, 1) == 0) + schedule_work(&ras->recovery_work); + return 0; +} + + +/* Register each ip ras block into amdgpu ras */ +int amdgpu_ras_register_ras_block(struct amdgpu_device *adev, + struct amdgpu_ras_block_object *ras_block_obj) +{ + struct amdgpu_ras_block_list *ras_node; + if (!adev || !ras_block_obj) + return -EINVAL; + + if (!amdgpu_ras_asic_supported(adev)) + return 0; + + ras_node = kzalloc(sizeof(*ras_node), GFP_KERNEL); + if (!ras_node) + return -ENOMEM; + + INIT_LIST_HEAD(&ras_node->node); + ras_node->ras_obj = ras_block_obj; + list_add_tail(&ras_node->node, &adev->ras_list); + + return 0; +} diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h index 1c708122d492..b9a6fac2b8b2 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h @@ -26,11 +26,11 @@ #include <linux/debugfs.h> #include <linux/list.h> -#include "amdgpu.h" -#include "amdgpu_psp.h" #include "ta_ras_if.h" #include "amdgpu_ras_eeprom.h" +struct amdgpu_iv_entry; + #define AMDGPU_RAS_FLAG_INIT_BY_VBIOS (0x1 << 0) enum amdgpu_ras_block { @@ -49,6 +49,8 @@ enum amdgpu_ras_block { AMDGPU_RAS_BLOCK__MP1, AMDGPU_RAS_BLOCK__FUSE, AMDGPU_RAS_BLOCK__MCA, + AMDGPU_RAS_BLOCK__VCN, + AMDGPU_RAS_BLOCK__JPEG, AMDGPU_RAS_BLOCK__LAST }; @@ -374,6 +376,9 @@ struct amdgpu_ras { /* record umc error info queried from smu */ struct umc_ecc_info umc_ecc; + + /* Indicates smu whether need update bad channel info */ + bool update_channel_flag; }; struct ras_fs_data { @@ -484,6 +489,29 @@ struct ras_debug_if { }; int op; }; + +struct amdgpu_ras_block_object { + struct ras_common_if ras_comm; + + int (*ras_block_match)(struct amdgpu_ras_block_object *block_obj, + enum amdgpu_ras_block block, uint32_t sub_block_index); + int (*ras_late_init)(struct amdgpu_device *adev, struct ras_common_if *ras_block); + void (*ras_fini)(struct amdgpu_device *adev, struct ras_common_if *ras_block); + ras_ih_cb ras_cb; + const struct amdgpu_ras_block_hw_ops *hw_ops; +}; + +struct amdgpu_ras_block_hw_ops { + int (*ras_error_inject)(struct amdgpu_device *adev, void *inject_if); + void (*query_ras_error_count)(struct amdgpu_device *adev, void *ras_error_status); + void (*query_ras_error_status)(struct amdgpu_device *adev); + void (*query_ras_error_address)(struct amdgpu_device *adev, void *ras_error_status); + void (*reset_ras_error_count)(struct amdgpu_device *adev); + void (*reset_ras_error_status)(struct amdgpu_device *adev); + bool (*query_poison_status)(struct amdgpu_device *adev); + bool (*handle_poison_consumption)(struct amdgpu_device *adev); +}; + /* work flow * vbios * 1: ras feature enable (enabled by default) @@ -498,19 +526,6 @@ struct ras_debug_if { * 8: feature disable */ -#define amdgpu_ras_get_context(adev) ((adev)->psp.ras_context.ras) -#define amdgpu_ras_set_context(adev, ras_con) ((adev)->psp.ras_context.ras = (ras_con)) - -/* check if ras is supported on block, say, sdma, gfx */ -static inline int amdgpu_ras_is_supported(struct amdgpu_device *adev, - unsigned int block) -{ - struct amdgpu_ras *ras = amdgpu_ras_get_context(adev); - - if (block >= AMDGPU_RAS_BLOCK_COUNT) - return 0; - return ras && (adev->ras_enabled & (1 << block)); -} int amdgpu_ras_recovery_init(struct amdgpu_device *adev); @@ -527,15 +542,6 @@ int amdgpu_ras_add_bad_pages(struct amdgpu_device *adev, int amdgpu_ras_save_bad_pages(struct amdgpu_device *adev); -static inline int amdgpu_ras_reset_gpu(struct amdgpu_device *adev) -{ - struct amdgpu_ras *ras = amdgpu_ras_get_context(adev); - - if (atomic_cmpxchg(&ras->in_recovery, 0, 1) == 0) - schedule_work(&ras->recovery_work); - return 0; -} - static inline enum ta_ras_block amdgpu_ras_block_to_ta(enum amdgpu_ras_block block) { switch (block) { @@ -596,15 +602,15 @@ amdgpu_ras_error_to_ta(enum amdgpu_ras_error_type error) { /* called in ip_init and ip_fini */ int amdgpu_ras_init(struct amdgpu_device *adev); +int amdgpu_ras_late_init(struct amdgpu_device *adev); int amdgpu_ras_fini(struct amdgpu_device *adev); int amdgpu_ras_pre_fini(struct amdgpu_device *adev); -int amdgpu_ras_late_init(struct amdgpu_device *adev, - struct ras_common_if *ras_block, - struct ras_fs_if *fs_info, - struct ras_ih_if *ih_info); -void amdgpu_ras_late_fini(struct amdgpu_device *adev, - struct ras_common_if *ras_block, - struct ras_ih_if *ih_info); + +int amdgpu_ras_block_late_init(struct amdgpu_device *adev, + struct ras_common_if *ras_block); + +void amdgpu_ras_block_late_fini(struct amdgpu_device *adev, + struct ras_common_if *ras_block); int amdgpu_ras_feature_enable(struct amdgpu_device *adev, struct ras_common_if *head, bool enable); @@ -613,7 +619,7 @@ int amdgpu_ras_feature_enable_on_boot(struct amdgpu_device *adev, struct ras_common_if *head, bool enable); int amdgpu_ras_sysfs_create(struct amdgpu_device *adev, - struct ras_fs_if *head); + struct ras_common_if *head); int amdgpu_ras_sysfs_remove(struct amdgpu_device *adev, struct ras_common_if *head); @@ -630,10 +636,10 @@ int amdgpu_ras_error_inject(struct amdgpu_device *adev, struct ras_inject_if *info); int amdgpu_ras_interrupt_add_handler(struct amdgpu_device *adev, - struct ras_ih_if *info); + struct ras_common_if *head); int amdgpu_ras_interrupt_remove_handler(struct amdgpu_device *adev, - struct ras_ih_if *info); + struct ras_common_if *head); int amdgpu_ras_interrupt_dispatch(struct amdgpu_device *adev, struct ras_dispatch_if *info); @@ -667,4 +673,15 @@ const char *get_ras_block_str(struct ras_common_if *ras_block); bool amdgpu_ras_is_poison_mode_supported(struct amdgpu_device *adev); +int amdgpu_ras_is_supported(struct amdgpu_device *adev, unsigned int block); + +int amdgpu_ras_reset_gpu(struct amdgpu_device *adev); + +struct amdgpu_ras* amdgpu_ras_get_context(struct amdgpu_device *adev); + +int amdgpu_ras_set_context(struct amdgpu_device *adev, struct amdgpu_ras *ras_con); + +int amdgpu_ras_register_ras_block(struct amdgpu_device *adev, + struct amdgpu_ras_block_object *ras_block_obj); +void amdgpu_ras_interrupt_fatal_error_handler(struct amdgpu_device *adev); #endif diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras_eeprom.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras_eeprom.c index 05117eda105b..c4283987bb1e 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras_eeprom.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras_eeprom.c @@ -31,6 +31,8 @@ #include <linux/debugfs.h> #include <linux/uaccess.h> +#include "amdgpu_reset.h" + #define EEPROM_I2C_MADDR_VEGA20 0x0 #define EEPROM_I2C_MADDR_ARCTURUS 0x40000 #define EEPROM_I2C_MADDR_ARCTURUS_D342 0x0 @@ -193,12 +195,12 @@ static int __write_table_header(struct amdgpu_ras_eeprom_control *control) __encode_table_header_to_buf(&control->tbl_hdr, buf); /* i2c may be unstable in gpu reset */ - down_read(&adev->reset_sem); - res = amdgpu_eeprom_write(&adev->pm.smu_i2c, + down_read(&adev->reset_domain->sem); + res = amdgpu_eeprom_write(adev->pm.ras_eeprom_i2c_bus, control->i2c_address + control->ras_header_offset, buf, RAS_TABLE_HEADER_SIZE); - up_read(&adev->reset_sem); + up_read(&adev->reset_domain->sem); if (res < 0) { DRM_ERROR("Failed to write EEPROM table header:%d", res); @@ -263,7 +265,9 @@ static int amdgpu_ras_eeprom_correct_header_tag( */ int amdgpu_ras_eeprom_reset_table(struct amdgpu_ras_eeprom_control *control) { + struct amdgpu_device *adev = to_amdgpu_device(control); struct amdgpu_ras_eeprom_table_header *hdr = &control->tbl_hdr; + struct amdgpu_ras *con = amdgpu_ras_get_context(adev); u8 csum; int res; @@ -282,6 +286,12 @@ int amdgpu_ras_eeprom_reset_table(struct amdgpu_ras_eeprom_control *control) control->ras_num_recs = 0; control->ras_fri = 0; + amdgpu_dpm_send_hbm_bad_pages_num(adev, control->ras_num_recs); + + control->bad_channel_bitmap = 0; + amdgpu_dpm_send_hbm_bad_channel_flag(adev, control->bad_channel_bitmap); + con->update_channel_flag = false; + amdgpu_ras_debugfs_set_ret_size(control); mutex_unlock(&control->ras_tbl_mutex); @@ -387,13 +397,13 @@ static int __amdgpu_ras_eeprom_write(struct amdgpu_ras_eeprom_control *control, int res; /* i2c may be unstable in gpu reset */ - down_read(&adev->reset_sem); + down_read(&adev->reset_domain->sem); buf_size = num * RAS_TABLE_RECORD_SIZE; - res = amdgpu_eeprom_write(&adev->pm.smu_i2c, + res = amdgpu_eeprom_write(adev->pm.ras_eeprom_i2c_bus, control->i2c_address + RAS_INDEX_TO_OFFSET(control, fri), buf, buf_size); - up_read(&adev->reset_sem); + up_read(&adev->reset_domain->sem); if (res < 0) { DRM_ERROR("Writing %d EEPROM table records error:%d", num, res); @@ -415,6 +425,7 @@ amdgpu_ras_eeprom_append_table(struct amdgpu_ras_eeprom_control *control, struct eeprom_table_record *record, const u32 num) { + struct amdgpu_ras *con = amdgpu_ras_get_context(to_amdgpu_device(control)); u32 a, b, i; u8 *buf, *pp; int res; @@ -426,9 +437,16 @@ amdgpu_ras_eeprom_append_table(struct amdgpu_ras_eeprom_control *control, /* Encode all of them in one go. */ pp = buf; - for (i = 0; i < num; i++, pp += RAS_TABLE_RECORD_SIZE) + for (i = 0; i < num; i++, pp += RAS_TABLE_RECORD_SIZE) { __encode_table_record_to_buf(control, &record[i], pp); + /* update bad channel bitmap */ + if (!(control->bad_channel_bitmap & (1 << record[i].mem_channel))) { + control->bad_channel_bitmap |= 1 << record[i].mem_channel; + con->update_channel_flag = true; + } + } + /* a, first record index to write into. * b, last record index to write into. * a = first index to read (fri) + number of records in the table, @@ -547,12 +565,12 @@ amdgpu_ras_eeprom_update_header(struct amdgpu_ras_eeprom_control *control) goto Out; } - down_read(&adev->reset_sem); - res = amdgpu_eeprom_read(&adev->pm.smu_i2c, + down_read(&adev->reset_domain->sem); + res = amdgpu_eeprom_read(adev->pm.ras_eeprom_i2c_bus, control->i2c_address + control->ras_record_offset, buf, buf_size); - up_read(&adev->reset_sem); + up_read(&adev->reset_domain->sem); if (res < 0) { DRM_ERROR("EEPROM failed reading records:%d\n", res); @@ -642,13 +660,13 @@ static int __amdgpu_ras_eeprom_read(struct amdgpu_ras_eeprom_control *control, int res; /* i2c may be unstable in gpu reset */ - down_read(&adev->reset_sem); + down_read(&adev->reset_domain->sem); buf_size = num * RAS_TABLE_RECORD_SIZE; - res = amdgpu_eeprom_read(&adev->pm.smu_i2c, + res = amdgpu_eeprom_read(adev->pm.ras_eeprom_i2c_bus, control->i2c_address + RAS_INDEX_TO_OFFSET(control, fri), buf, buf_size); - up_read(&adev->reset_sem); + up_read(&adev->reset_domain->sem); if (res < 0) { DRM_ERROR("Reading %d EEPROM table records error:%d", num, res); @@ -681,6 +699,7 @@ int amdgpu_ras_eeprom_read(struct amdgpu_ras_eeprom_control *control, const u32 num) { struct amdgpu_device *adev = to_amdgpu_device(control); + struct amdgpu_ras *con = amdgpu_ras_get_context(adev); int i, res; u8 *buf, *pp; u32 g0, g1; @@ -748,8 +767,15 @@ int amdgpu_ras_eeprom_read(struct amdgpu_ras_eeprom_control *control, /* Read up everything? Then transform. */ pp = buf; - for (i = 0; i < num; i++, pp += RAS_TABLE_RECORD_SIZE) + for (i = 0; i < num; i++, pp += RAS_TABLE_RECORD_SIZE) { __decode_table_record_from_buf(control, &record[i], pp); + + /* update bad channel bitmap */ + if (!(control->bad_channel_bitmap & (1 << record[i].mem_channel))) { + control->bad_channel_bitmap |= 1 << record[i].mem_channel; + con->update_channel_flag = true; + } + } Out: kfree(buf); mutex_unlock(&control->ras_tbl_mutex); @@ -1009,7 +1035,7 @@ static int __verify_ras_table_checksum(struct amdgpu_ras_eeprom_control *control return -ENOMEM; } - res = amdgpu_eeprom_read(&adev->pm.smu_i2c, + res = amdgpu_eeprom_read(adev->pm.ras_eeprom_i2c_bus, control->i2c_address + control->ras_header_offset, buf, buf_size); @@ -1045,7 +1071,7 @@ int amdgpu_ras_eeprom_init(struct amdgpu_ras_eeprom_control *control, return 0; /* Verify i2c adapter is initialized */ - if (!adev->pm.smu_i2c.algo) + if (!adev->pm.ras_eeprom_i2c_bus || !adev->pm.ras_eeprom_i2c_bus->algo) return -ENOENT; if (!__get_eeprom_i2c_addr(adev, control)) @@ -1057,7 +1083,7 @@ int amdgpu_ras_eeprom_init(struct amdgpu_ras_eeprom_control *control, mutex_init(&control->ras_tbl_mutex); /* Read the table header from EEPROM address */ - res = amdgpu_eeprom_read(&adev->pm.smu_i2c, + res = amdgpu_eeprom_read(adev->pm.ras_eeprom_i2c_bus, control->i2c_address + control->ras_header_offset, buf, RAS_TABLE_HEADER_SIZE); if (res < RAS_TABLE_HEADER_SIZE) { diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras_eeprom.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras_eeprom.h index 6bb00578bfbb..54d9bfe0881d 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras_eeprom.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras_eeprom.h @@ -80,6 +80,10 @@ struct amdgpu_ras_eeprom_control { /* Protect table access via this mutex. */ struct mutex ras_tbl_mutex; + + /* Record channel info which occurred bad pages + */ + u32 bad_channel_bitmap; }; /* diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_res_cursor.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_res_cursor.h index acfa207cf970..6546552e596c 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_res_cursor.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_res_cursor.h @@ -30,12 +30,15 @@ #include <drm/ttm/ttm_resource.h> #include <drm/ttm/ttm_range_manager.h> +#include "amdgpu_vram_mgr.h" + /* state back for walking over vram_mgr and gtt_mgr allocations */ struct amdgpu_res_cursor { uint64_t start; uint64_t size; uint64_t remaining; - struct drm_mm_node *node; + void *node; + uint32_t mem_type; }; /** @@ -52,27 +55,63 @@ static inline void amdgpu_res_first(struct ttm_resource *res, uint64_t start, uint64_t size, struct amdgpu_res_cursor *cur) { + struct drm_buddy_block *block; + struct list_head *head, *next; struct drm_mm_node *node; - if (!res || res->mem_type == TTM_PL_SYSTEM) { - cur->start = start; - cur->size = size; - cur->remaining = size; - cur->node = NULL; - WARN_ON(res && start + size > res->num_pages << PAGE_SHIFT); - return; - } + if (!res) + goto fallback; BUG_ON(start + size > res->num_pages << PAGE_SHIFT); - node = to_ttm_range_mgr_node(res)->mm_nodes; - while (start >= node->size << PAGE_SHIFT) - start -= node++->size << PAGE_SHIFT; + cur->mem_type = res->mem_type; + + switch (cur->mem_type) { + case TTM_PL_VRAM: + head = &to_amdgpu_vram_mgr_resource(res)->blocks; + + block = list_first_entry_or_null(head, + struct drm_buddy_block, + link); + if (!block) + goto fallback; + + while (start >= amdgpu_vram_mgr_block_size(block)) { + start -= amdgpu_vram_mgr_block_size(block); + + next = block->link.next; + if (next != head) + block = list_entry(next, struct drm_buddy_block, link); + } + + cur->start = amdgpu_vram_mgr_block_start(block) + start; + cur->size = min(amdgpu_vram_mgr_block_size(block) - start, size); + cur->remaining = size; + cur->node = block; + break; + case TTM_PL_TT: + node = to_ttm_range_mgr_node(res)->mm_nodes; + while (start >= node->size << PAGE_SHIFT) + start -= node++->size << PAGE_SHIFT; + + cur->start = (node->start << PAGE_SHIFT) + start; + cur->size = min((node->size << PAGE_SHIFT) - start, size); + cur->remaining = size; + cur->node = node; + break; + default: + goto fallback; + } - cur->start = (node->start << PAGE_SHIFT) + start; - cur->size = min((node->size << PAGE_SHIFT) - start, size); + return; + +fallback: + cur->start = start; + cur->size = size; cur->remaining = size; - cur->node = node; + cur->node = NULL; + WARN_ON(res && start + size > res->num_pages << PAGE_SHIFT); + return; } /** @@ -85,7 +124,9 @@ static inline void amdgpu_res_first(struct ttm_resource *res, */ static inline void amdgpu_res_next(struct amdgpu_res_cursor *cur, uint64_t size) { - struct drm_mm_node *node = cur->node; + struct drm_buddy_block *block; + struct drm_mm_node *node; + struct list_head *next; BUG_ON(size > cur->remaining); @@ -99,9 +140,27 @@ static inline void amdgpu_res_next(struct amdgpu_res_cursor *cur, uint64_t size) return; } - cur->node = ++node; - cur->start = node->start << PAGE_SHIFT; - cur->size = min(node->size << PAGE_SHIFT, cur->remaining); + switch (cur->mem_type) { + case TTM_PL_VRAM: + block = cur->node; + + next = block->link.next; + block = list_entry(next, struct drm_buddy_block, link); + + cur->node = block; + cur->start = amdgpu_vram_mgr_block_start(block); + cur->size = min(amdgpu_vram_mgr_block_size(block), cur->remaining); + break; + case TTM_PL_TT: + node = cur->node; + + cur->node = ++node; + cur->start = node->start << PAGE_SHIFT; + cur->size = min(node->size << PAGE_SHIFT, cur->remaining); + break; + default: + return; + } } #endif diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_reset.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_reset.c index 02afd4115675..c80af0889773 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_reset.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_reset.c @@ -36,8 +36,8 @@ int amdgpu_reset_init(struct amdgpu_device *adev) { int ret = 0; - switch (adev->asic_type) { - case CHIP_ALDEBARAN: + switch (adev->ip_versions[MP1_HWIP][0]) { + case IP_VERSION(13, 0, 2): ret = aldebaran_reset_init(adev); break; default: @@ -51,8 +51,8 @@ int amdgpu_reset_fini(struct amdgpu_device *adev) { int ret = 0; - switch (adev->asic_type) { - case CHIP_ALDEBARAN: + switch (adev->ip_versions[MP1_HWIP][0]) { + case IP_VERSION(13, 0, 2): ret = aldebaran_reset_fini(adev); break; default: @@ -96,3 +96,59 @@ int amdgpu_reset_perform_reset(struct amdgpu_device *adev, return reset_handler->restore_hwcontext(adev->reset_cntl, reset_context); } + + +void amdgpu_reset_destroy_reset_domain(struct kref *ref) +{ + struct amdgpu_reset_domain *reset_domain = container_of(ref, + struct amdgpu_reset_domain, + refcount); + if (reset_domain->wq) + destroy_workqueue(reset_domain->wq); + + kvfree(reset_domain); +} + +struct amdgpu_reset_domain *amdgpu_reset_create_reset_domain(enum amdgpu_reset_domain_type type, + char *wq_name) +{ + struct amdgpu_reset_domain *reset_domain; + + reset_domain = kvzalloc(sizeof(struct amdgpu_reset_domain), GFP_KERNEL); + if (!reset_domain) { + DRM_ERROR("Failed to allocate amdgpu_reset_domain!"); + return NULL; + } + + reset_domain->type = type; + kref_init(&reset_domain->refcount); + + reset_domain->wq = create_singlethread_workqueue(wq_name); + if (!reset_domain->wq) { + DRM_ERROR("Failed to allocate wq for amdgpu_reset_domain!"); + amdgpu_reset_put_reset_domain(reset_domain); + return NULL; + + } + + atomic_set(&reset_domain->in_gpu_reset, 0); + init_rwsem(&reset_domain->sem); + + return reset_domain; +} + +void amdgpu_device_lock_reset_domain(struct amdgpu_reset_domain *reset_domain) +{ + atomic_set(&reset_domain->in_gpu_reset, 1); + down_write(&reset_domain->sem); +} + + +void amdgpu_device_unlock_reset_domain(struct amdgpu_reset_domain *reset_domain) +{ + atomic_set(&reset_domain->in_gpu_reset, 0); + up_write(&reset_domain->sem); +} + + + diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_reset.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_reset.h index e00d38d9160a..1949dbe28a86 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_reset.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_reset.h @@ -70,6 +70,21 @@ struct amdgpu_reset_control { void (*async_reset)(struct work_struct *work); }; + +enum amdgpu_reset_domain_type { + SINGLE_DEVICE, + XGMI_HIVE +}; + +struct amdgpu_reset_domain { + struct kref refcount; + struct workqueue_struct *wq; + enum amdgpu_reset_domain_type type; + struct rw_semaphore sem; + atomic_t in_gpu_reset; +}; + + int amdgpu_reset_init(struct amdgpu_device *adev); int amdgpu_reset_fini(struct amdgpu_device *adev); @@ -82,4 +97,29 @@ int amdgpu_reset_perform_reset(struct amdgpu_device *adev, int amdgpu_reset_add_handler(struct amdgpu_reset_control *reset_ctl, struct amdgpu_reset_handler *handler); +struct amdgpu_reset_domain *amdgpu_reset_create_reset_domain(enum amdgpu_reset_domain_type type, + char *wq_name); + +void amdgpu_reset_destroy_reset_domain(struct kref *ref); + +static inline bool amdgpu_reset_get_reset_domain(struct amdgpu_reset_domain *domain) +{ + return kref_get_unless_zero(&domain->refcount) != 0; +} + +static inline void amdgpu_reset_put_reset_domain(struct amdgpu_reset_domain *domain) +{ + kref_put(&domain->refcount, amdgpu_reset_destroy_reset_domain); +} + +static inline bool amdgpu_reset_domain_schedule(struct amdgpu_reset_domain *domain, + struct work_struct *work) +{ + return queue_work(domain->wq, work); +} + +void amdgpu_device_lock_reset_domain(struct amdgpu_reset_domain *reset_domain); + +void amdgpu_device_unlock_reset_domain(struct amdgpu_reset_domain *reset_domain); + #endif diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c index ab2351ba9574..7f33ae87cb41 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c @@ -191,8 +191,10 @@ int amdgpu_ring_init(struct amdgpu_device *adev, struct amdgpu_ring *ring, ring->adev = adev; ring->idx = adev->num_rings++; adev->rings[ring->idx] = ring; - r = amdgpu_fence_driver_init_ring(ring, sched_hw_submission, - sched_score); + ring->num_hw_submission = sched_hw_submission; + ring->sched_score = sched_score; + ring->vmid_wait = dma_fence_get_stub(); + r = amdgpu_fence_driver_init_ring(ring); if (r) return r; } diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h index fae7d185ad0d..317d80209e95 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h @@ -28,6 +28,13 @@ #include <drm/gpu_scheduler.h> #include <drm/drm_print.h> +struct amdgpu_device; +struct amdgpu_ring; +struct amdgpu_ib; +struct amdgpu_cs_parser; +struct amdgpu_job; +struct amdgpu_vm; + /* max number of rings */ #define AMDGPU_MAX_RINGS 28 #define AMDGPU_MAX_HWIP_RINGS 8 @@ -82,11 +89,13 @@ enum amdgpu_ib_pool_type { AMDGPU_IB_POOL_MAX }; -struct amdgpu_device; -struct amdgpu_ring; -struct amdgpu_ib; -struct amdgpu_cs_parser; -struct amdgpu_job; +struct amdgpu_ib { + struct amdgpu_sa_bo *sa_bo; + uint32_t length_dw; + uint64_t gpu_addr; + uint32_t *ptr; + uint32_t flags; +}; struct amdgpu_sched { u32 num_scheds; @@ -111,12 +120,12 @@ struct amdgpu_fence_driver { struct dma_fence **fences; }; +extern const struct drm_sched_backend_ops amdgpu_sched_ops; + void amdgpu_fence_driver_clear_job_fences(struct amdgpu_ring *ring); void amdgpu_fence_driver_force_completion(struct amdgpu_ring *ring); -int amdgpu_fence_driver_init_ring(struct amdgpu_ring *ring, - unsigned num_hw_submission, - atomic_t *sched_score); +int amdgpu_fence_driver_init_ring(struct amdgpu_ring *ring); int amdgpu_fence_driver_start_ring(struct amdgpu_ring *ring, struct amdgpu_irq_src *irq_src, unsigned irq_type); @@ -146,6 +155,7 @@ struct amdgpu_ring_funcs { u32 nop; bool support_64bit_ptrs; bool no_user_fence; + bool secure_submission_supported; unsigned vmhub; unsigned extra_dw; @@ -154,8 +164,12 @@ struct amdgpu_ring_funcs { u64 (*get_wptr)(struct amdgpu_ring *ring); void (*set_wptr)(struct amdgpu_ring *ring); /* validating and patching of IBs */ - int (*parse_cs)(struct amdgpu_cs_parser *p, uint32_t ib_idx); - int (*patch_cs_in_place)(struct amdgpu_cs_parser *p, uint32_t ib_idx); + int (*parse_cs)(struct amdgpu_cs_parser *p, + struct amdgpu_job *job, + struct amdgpu_ib *ib); + int (*patch_cs_in_place)(struct amdgpu_cs_parser *p, + struct amdgpu_job *job, + struct amdgpu_ib *ib); /* constants to calculate how many DW are needed for an emit */ unsigned emit_frame_size; unsigned emit_ib_size; @@ -251,10 +265,12 @@ struct amdgpu_ring { bool has_compute_vm_bug; bool no_scheduler; int hw_prio; + unsigned num_hw_submission; + atomic_t *sched_score; }; -#define amdgpu_ring_parse_cs(r, p, ib) ((r)->funcs->parse_cs((p), (ib))) -#define amdgpu_ring_patch_cs_in_place(r, p, ib) ((r)->funcs->patch_cs_in_place((p), (ib))) +#define amdgpu_ring_parse_cs(r, p, job, ib) ((r)->funcs->parse_cs((p), (job), (ib))) +#define amdgpu_ring_patch_cs_in_place(r, p, job, ib) ((r)->funcs->patch_cs_in_place((p), (job), (ib))) #define amdgpu_ring_test_ring(r) (r)->funcs->test_ring((r)) #define amdgpu_ring_test_ib(r, t) (r)->funcs->test_ib((r), (t)) #define amdgpu_ring_get_rptr(r) (r)->funcs->get_rptr((r)) @@ -284,8 +300,8 @@ void amdgpu_ring_generic_pad_ib(struct amdgpu_ring *ring, struct amdgpu_ib *ib); void amdgpu_ring_commit(struct amdgpu_ring *ring); void amdgpu_ring_undo(struct amdgpu_ring *ring); int amdgpu_ring_init(struct amdgpu_device *adev, struct amdgpu_ring *ring, - unsigned int ring_size, struct amdgpu_irq_src *irq_src, - unsigned int irq_type, unsigned int prio, + unsigned int max_dw, struct amdgpu_irq_src *irq_src, + unsigned int irq_type, unsigned int hw_prio, atomic_t *sched_score); void amdgpu_ring_fini(struct amdgpu_ring *ring); void amdgpu_ring_emit_reg_write_reg_wait_helper(struct amdgpu_ring *ring, @@ -352,4 +368,29 @@ int amdgpu_ring_test_helper(struct amdgpu_ring *ring); void amdgpu_debugfs_ring_init(struct amdgpu_device *adev, struct amdgpu_ring *ring); + +static inline u32 amdgpu_ib_get_value(struct amdgpu_ib *ib, int idx) +{ + return ib->ptr[idx]; +} + +static inline void amdgpu_ib_set_value(struct amdgpu_ib *ib, int idx, + uint32_t value) +{ + ib->ptr[idx] = value; +} + +int amdgpu_ib_get(struct amdgpu_device *adev, struct amdgpu_vm *vm, + unsigned size, + enum amdgpu_ib_pool_type pool, + struct amdgpu_ib *ib); +void amdgpu_ib_free(struct amdgpu_device *adev, struct amdgpu_ib *ib, + struct dma_fence *f); +int amdgpu_ib_schedule(struct amdgpu_ring *ring, unsigned num_ibs, + struct amdgpu_ib *ibs, struct amdgpu_job *job, + struct dma_fence **f); +int amdgpu_ib_pool_init(struct amdgpu_device *adev); +void amdgpu_ib_pool_fini(struct amdgpu_device *adev); +int amdgpu_ib_ring_tests(struct amdgpu_device *adev); + #endif diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_rlc.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_rlc.h index 00afd0dcae86..3f671a62b009 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_rlc.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_rlc.h @@ -127,11 +127,19 @@ struct amdgpu_rlc_funcs { void (*reset)(struct amdgpu_device *adev); void (*start)(struct amdgpu_device *adev); void (*update_spm_vmid)(struct amdgpu_device *adev, unsigned vmid); - void (*sriov_wreg)(struct amdgpu_device *adev, u32 offset, u32 v, u32 acc_flags, u32 hwip); - u32 (*sriov_rreg)(struct amdgpu_device *adev, u32 offset, u32 acc_flags, u32 hwip); bool (*is_rlcg_access_range)(struct amdgpu_device *adev, uint32_t reg); }; +struct amdgpu_rlcg_reg_access_ctrl { + uint32_t scratch_reg0; + uint32_t scratch_reg1; + uint32_t scratch_reg2; + uint32_t scratch_reg3; + uint32_t grbm_cntl; + uint32_t grbm_idx; + uint32_t spare_int; +}; + struct amdgpu_rlc { /* for power gating */ struct amdgpu_bo *save_restore_obj; @@ -191,6 +199,10 @@ struct amdgpu_rlc { struct amdgpu_bo *rlc_toc_bo; uint64_t rlc_toc_gpu_addr; void *rlc_toc_buf; + + bool rlcg_reg_access_supported; + /* registers for rlcg indirect reg access */ + struct amdgpu_rlcg_reg_access_ctrl reg_access_ctrl; }; void amdgpu_gfx_rlc_enter_safe_mode(struct amdgpu_device *adev); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_sdma.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_sdma.c index 65debb65a5df..e1835fd4b237 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_sdma.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_sdma.c @@ -87,73 +87,30 @@ uint64_t amdgpu_sdma_get_csa_mc_addr(struct amdgpu_ring *ring, } int amdgpu_sdma_ras_late_init(struct amdgpu_device *adev, - void *ras_ih_info) + struct ras_common_if *ras_block) { int r, i; - struct ras_ih_if *ih_info = (struct ras_ih_if *)ras_ih_info; - struct ras_fs_if fs_info = { - .sysfs_name = "sdma_err_count", - }; - - if (!ih_info) - return -EINVAL; - - if (!adev->sdma.ras_if) { - adev->sdma.ras_if = kmalloc(sizeof(struct ras_common_if), GFP_KERNEL); - if (!adev->sdma.ras_if) - return -ENOMEM; - adev->sdma.ras_if->block = AMDGPU_RAS_BLOCK__SDMA; - adev->sdma.ras_if->type = AMDGPU_RAS_ERROR__MULTI_UNCORRECTABLE; - adev->sdma.ras_if->sub_block_index = 0; - } - fs_info.head = ih_info->head = *adev->sdma.ras_if; - r = amdgpu_ras_late_init(adev, adev->sdma.ras_if, - &fs_info, ih_info); + r = amdgpu_ras_block_late_init(adev, ras_block); if (r) - goto free; + return r; - if (amdgpu_ras_is_supported(adev, adev->sdma.ras_if->block)) { + if (amdgpu_ras_is_supported(adev, ras_block->block)) { for (i = 0; i < adev->sdma.num_instances; i++) { r = amdgpu_irq_get(adev, &adev->sdma.ecc_irq, AMDGPU_SDMA_IRQ_INSTANCE0 + i); if (r) goto late_fini; } - } else { - r = 0; - goto free; } return 0; late_fini: - amdgpu_ras_late_fini(adev, adev->sdma.ras_if, ih_info); -free: - kfree(adev->sdma.ras_if); - adev->sdma.ras_if = NULL; + amdgpu_ras_block_late_fini(adev, ras_block); return r; } -void amdgpu_sdma_ras_fini(struct amdgpu_device *adev) -{ - if (amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__SDMA) && - adev->sdma.ras_if) { - struct ras_common_if *ras_if = adev->sdma.ras_if; - struct ras_ih_if ih_info = { - .head = *ras_if, - /* the cb member will not be used by - * amdgpu_ras_interrupt_remove_handler, init it only - * to cheat the check in ras_late_fini - */ - .cb = amdgpu_sdma_process_ras_data_cb, - }; - - amdgpu_ras_late_fini(adev, ras_if, &ih_info); - kfree(ras_if); - } -} - int amdgpu_sdma_process_ras_data_cb(struct amdgpu_device *adev, void *err_data, struct amdgpu_iv_entry *entry) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_sdma.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_sdma.h index f8fb755e3aa6..53ac3ebae8d6 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_sdma.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_sdma.h @@ -23,6 +23,7 @@ #ifndef __AMDGPU_SDMA_H__ #define __AMDGPU_SDMA_H__ +#include "amdgpu_ras.h" /* max number of IP instances */ #define AMDGPU_MAX_SDMA_INSTANCES 8 @@ -50,13 +51,8 @@ struct amdgpu_sdma_instance { bool burst_nop; }; -struct amdgpu_sdma_ras_funcs { - int (*ras_late_init)(struct amdgpu_device *adev, - void *ras_ih_info); - void (*ras_fini)(struct amdgpu_device *adev); - int (*query_ras_error_count)(struct amdgpu_device *adev, - uint32_t instance, void *ras_error_status); - void (*reset_ras_error_count)(struct amdgpu_device *adev); +struct amdgpu_sdma_ras { + struct amdgpu_ras_block_object ras_block; }; struct amdgpu_sdma { @@ -73,7 +69,7 @@ struct amdgpu_sdma { uint32_t srbm_soft_reset; bool has_page_queue; struct ras_common_if *ras_if; - const struct amdgpu_sdma_ras_funcs *funcs; + struct amdgpu_sdma_ras *ras; }; /* @@ -121,8 +117,7 @@ amdgpu_sdma_get_instance_from_ring(struct amdgpu_ring *ring); int amdgpu_sdma_get_index_from_ring(struct amdgpu_ring *ring, uint32_t *index); uint64_t amdgpu_sdma_get_csa_mc_addr(struct amdgpu_ring *ring, unsigned vmid); int amdgpu_sdma_ras_late_init(struct amdgpu_device *adev, - void *ras_ih_info); -void amdgpu_sdma_ras_fini(struct amdgpu_device *adev); + struct ras_common_if *ras_block); int amdgpu_sdma_process_ras_data_cb(struct amdgpu_device *adev, void *err_data, struct amdgpu_iv_entry *entry); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_smuio.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_smuio.h index 484bb3dcec47..c7a823f3f2c5 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_smuio.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_smuio.h @@ -27,7 +27,7 @@ struct amdgpu_smuio_funcs { u32 (*get_rom_index_offset)(struct amdgpu_device *adev); u32 (*get_rom_data_offset)(struct amdgpu_device *adev); void (*update_rom_clock_gating)(struct amdgpu_device *adev, bool enable); - void (*get_clock_gating_state)(struct amdgpu_device *adev, u32 *flags); + void (*get_clock_gating_state)(struct amdgpu_device *adev, u64 *flags); u32 (*get_die_id)(struct amdgpu_device *adev); u32 (*get_socket_id)(struct amdgpu_device *adev); bool (*is_host_gpu_xgmi_supported)(struct amdgpu_device *adev); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_sync.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_sync.c index f7d8487799b2..504af1b93bfa 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_sync.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_sync.c @@ -51,7 +51,6 @@ static struct kmem_cache *amdgpu_sync_slab; void amdgpu_sync_create(struct amdgpu_sync *sync) { hash_init(sync->fences); - sync->last_vm_update = NULL; } /** @@ -171,23 +170,6 @@ int amdgpu_sync_fence(struct amdgpu_sync *sync, struct dma_fence *f) return 0; } -/** - * amdgpu_sync_vm_fence - remember to sync to this VM fence - * - * @sync: sync object to add fence to - * @fence: the VM fence to add - * - * Add the fence to the sync object and remember it as VM update. - */ -int amdgpu_sync_vm_fence(struct amdgpu_sync *sync, struct dma_fence *fence) -{ - if (!fence) - return 0; - - amdgpu_sync_keep_later(&sync->last_vm_update, fence); - return amdgpu_sync_fence(sync, fence); -} - /* Determine based on the owner and mode if we should sync to a fence or not */ static bool amdgpu_sync_test_fence(struct amdgpu_device *adev, enum amdgpu_sync_mode mode, @@ -259,12 +241,12 @@ int amdgpu_sync_resv(struct amdgpu_device *adev, struct amdgpu_sync *sync, if (resv == NULL) return -EINVAL; - dma_resv_for_each_fence(&cursor, resv, true, f) { + /* TODO: Use DMA_RESV_USAGE_READ here */ + dma_resv_for_each_fence(&cursor, resv, DMA_RESV_USAGE_BOOKKEEP, f) { dma_fence_chain_for_each(f, f) { - struct dma_fence_chain *chain = to_dma_fence_chain(f); + struct dma_fence *tmp = dma_fence_chain_contained(f); - if (amdgpu_sync_test_fence(adev, mode, owner, chain ? - chain->fence : f)) { + if (amdgpu_sync_test_fence(adev, mode, owner, tmp)) { r = amdgpu_sync_fence(sync, f); dma_fence_put(f); if (r) @@ -377,9 +359,6 @@ int amdgpu_sync_clone(struct amdgpu_sync *source, struct amdgpu_sync *clone) } } - dma_fence_put(clone->last_vm_update); - clone->last_vm_update = dma_fence_get(source->last_vm_update); - return 0; } @@ -420,8 +399,6 @@ void amdgpu_sync_free(struct amdgpu_sync *sync) dma_fence_put(e->fence); kmem_cache_free(amdgpu_sync_slab, e); } - - dma_fence_put(sync->last_vm_update); } /** diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_sync.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_sync.h index 7c0fe20c470d..2d5c613cda10 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_sync.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_sync.h @@ -43,12 +43,10 @@ enum amdgpu_sync_mode { */ struct amdgpu_sync { DECLARE_HASHTABLE(fences, 4); - struct dma_fence *last_vm_update; }; void amdgpu_sync_create(struct amdgpu_sync *sync); int amdgpu_sync_fence(struct amdgpu_sync *sync, struct dma_fence *f); -int amdgpu_sync_vm_fence(struct amdgpu_sync *sync, struct dma_fence *fence); int amdgpu_sync_resv(struct amdgpu_device *adev, struct amdgpu_sync *sync, struct dma_resv *resv, enum amdgpu_sync_mode mode, void *owner); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_test.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_test.c deleted file mode 100644 index 909d830b513e..000000000000 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_test.c +++ /dev/null @@ -1,250 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 OR MIT -/* - * Copyright 2009 VMware, Inc. - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in - * all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR - * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, - * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR - * OTHER DEALINGS IN THE SOFTWARE. - * - * Authors: Michel Dänzer - */ - -#include <drm/amdgpu_drm.h> -#include "amdgpu.h" -#include "amdgpu_uvd.h" -#include "amdgpu_vce.h" - -/* Test BO GTT->VRAM and VRAM->GTT GPU copies across the whole GTT aperture */ -static void amdgpu_do_test_moves(struct amdgpu_device *adev) -{ - struct amdgpu_ring *ring = adev->mman.buffer_funcs_ring; - struct amdgpu_bo *vram_obj = NULL; - struct amdgpu_bo **gtt_obj = NULL; - struct amdgpu_bo_param bp; - uint64_t gart_addr, vram_addr; - unsigned n, size; - int i, r; - - size = 1024 * 1024; - - /* Number of tests = - * (Total GTT - gart_pin_size - (2 transfer windows for buffer moves)) / test size - */ - n = adev->gmc.gart_size - atomic64_read(&adev->gart_pin_size); - n -= AMDGPU_GTT_MAX_TRANSFER_SIZE * AMDGPU_GTT_NUM_TRANSFER_WINDOWS * - AMDGPU_GPU_PAGE_SIZE; - n /= size; - - gtt_obj = kcalloc(n, sizeof(*gtt_obj), GFP_KERNEL); - if (!gtt_obj) { - DRM_ERROR("Failed to allocate %d pointers\n", n); - r = 1; - goto out_cleanup; - } - memset(&bp, 0, sizeof(bp)); - bp.size = size; - bp.byte_align = PAGE_SIZE; - bp.domain = AMDGPU_GEM_DOMAIN_VRAM; - bp.flags = 0; - bp.type = ttm_bo_type_kernel; - bp.resv = NULL; - bp.bo_ptr_size = sizeof(struct amdgpu_bo); - - r = amdgpu_bo_create(adev, &bp, &vram_obj); - if (r) { - DRM_ERROR("Failed to create VRAM object\n"); - goto out_cleanup; - } - r = amdgpu_bo_reserve(vram_obj, false); - if (unlikely(r != 0)) - goto out_unref; - r = amdgpu_bo_pin(vram_obj, AMDGPU_GEM_DOMAIN_VRAM); - if (r) { - DRM_ERROR("Failed to pin VRAM object\n"); - goto out_unres; - } - vram_addr = amdgpu_bo_gpu_offset(vram_obj); - for (i = 0; i < n; i++) { - void *gtt_map, *vram_map; - void **gart_start, **gart_end; - void **vram_start, **vram_end; - struct dma_fence *fence = NULL; - - bp.domain = AMDGPU_GEM_DOMAIN_GTT; - r = amdgpu_bo_create(adev, &bp, gtt_obj + i); - if (r) { - DRM_ERROR("Failed to create GTT object %d\n", i); - goto out_lclean; - } - - r = amdgpu_bo_reserve(gtt_obj[i], false); - if (unlikely(r != 0)) - goto out_lclean_unref; - r = amdgpu_bo_pin(gtt_obj[i], AMDGPU_GEM_DOMAIN_GTT); - if (r) { - DRM_ERROR("Failed to pin GTT object %d\n", i); - goto out_lclean_unres; - } - r = amdgpu_ttm_alloc_gart(>t_obj[i]->tbo); - if (r) { - DRM_ERROR("%p bind failed\n", gtt_obj[i]); - goto out_lclean_unpin; - } - gart_addr = amdgpu_bo_gpu_offset(gtt_obj[i]); - - r = amdgpu_bo_kmap(gtt_obj[i], >t_map); - if (r) { - DRM_ERROR("Failed to map GTT object %d\n", i); - goto out_lclean_unpin; - } - - for (gart_start = gtt_map, gart_end = gtt_map + size; - gart_start < gart_end; - gart_start++) - *gart_start = gart_start; - - amdgpu_bo_kunmap(gtt_obj[i]); - - r = amdgpu_copy_buffer(ring, gart_addr, vram_addr, - size, NULL, &fence, false, false, false); - - if (r) { - DRM_ERROR("Failed GTT->VRAM copy %d\n", i); - goto out_lclean_unpin; - } - - r = dma_fence_wait(fence, false); - if (r) { - DRM_ERROR("Failed to wait for GTT->VRAM fence %d\n", i); - goto out_lclean_unpin; - } - - dma_fence_put(fence); - fence = NULL; - - r = amdgpu_bo_kmap(vram_obj, &vram_map); - if (r) { - DRM_ERROR("Failed to map VRAM object after copy %d\n", i); - goto out_lclean_unpin; - } - - for (gart_start = gtt_map, gart_end = gtt_map + size, - vram_start = vram_map, vram_end = vram_map + size; - vram_start < vram_end; - gart_start++, vram_start++) { - if (*vram_start != gart_start) { - DRM_ERROR("Incorrect GTT->VRAM copy %d: Got 0x%p, " - "expected 0x%p (GTT/VRAM offset " - "0x%16llx/0x%16llx)\n", - i, *vram_start, gart_start, - (unsigned long long) - (gart_addr - adev->gmc.gart_start + - (void *)gart_start - gtt_map), - (unsigned long long) - (vram_addr - adev->gmc.vram_start + - (void *)gart_start - gtt_map)); - amdgpu_bo_kunmap(vram_obj); - goto out_lclean_unpin; - } - *vram_start = vram_start; - } - - amdgpu_bo_kunmap(vram_obj); - - r = amdgpu_copy_buffer(ring, vram_addr, gart_addr, - size, NULL, &fence, false, false, false); - - if (r) { - DRM_ERROR("Failed VRAM->GTT copy %d\n", i); - goto out_lclean_unpin; - } - - r = dma_fence_wait(fence, false); - if (r) { - DRM_ERROR("Failed to wait for VRAM->GTT fence %d\n", i); - goto out_lclean_unpin; - } - - dma_fence_put(fence); - fence = NULL; - - r = amdgpu_bo_kmap(gtt_obj[i], >t_map); - if (r) { - DRM_ERROR("Failed to map GTT object after copy %d\n", i); - goto out_lclean_unpin; - } - - for (gart_start = gtt_map, gart_end = gtt_map + size, - vram_start = vram_map, vram_end = vram_map + size; - gart_start < gart_end; - gart_start++, vram_start++) { - if (*gart_start != vram_start) { - DRM_ERROR("Incorrect VRAM->GTT copy %d: Got 0x%p, " - "expected 0x%p (VRAM/GTT offset " - "0x%16llx/0x%16llx)\n", - i, *gart_start, vram_start, - (unsigned long long) - (vram_addr - adev->gmc.vram_start + - (void *)vram_start - vram_map), - (unsigned long long) - (gart_addr - adev->gmc.gart_start + - (void *)vram_start - vram_map)); - amdgpu_bo_kunmap(gtt_obj[i]); - goto out_lclean_unpin; - } - } - - amdgpu_bo_kunmap(gtt_obj[i]); - - DRM_INFO("Tested GTT->VRAM and VRAM->GTT copy for GTT offset 0x%llx\n", - gart_addr - adev->gmc.gart_start); - continue; - -out_lclean_unpin: - amdgpu_bo_unpin(gtt_obj[i]); -out_lclean_unres: - amdgpu_bo_unreserve(gtt_obj[i]); -out_lclean_unref: - amdgpu_bo_unref(>t_obj[i]); -out_lclean: - for (--i; i >= 0; --i) { - amdgpu_bo_unpin(gtt_obj[i]); - amdgpu_bo_unreserve(gtt_obj[i]); - amdgpu_bo_unref(>t_obj[i]); - } - if (fence) - dma_fence_put(fence); - break; - } - - amdgpu_bo_unpin(vram_obj); -out_unres: - amdgpu_bo_unreserve(vram_obj); -out_unref: - amdgpu_bo_unref(&vram_obj); -out_cleanup: - kfree(gtt_obj); - if (r) { - pr_warn("Error while testing BO move\n"); - } -} - -void amdgpu_test_moves(struct amdgpu_device *adev) -{ - if (adev->mman.buffer_funcs) - amdgpu_do_test_moves(adev); -} diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_trace.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_trace.h index d855cb53c7e0..06dfcf297a8d 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_trace.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_trace.h @@ -358,11 +358,10 @@ TRACE_EVENT(amdgpu_vm_update_ptes, } ), TP_printk("pid:%u vm_ctx:0x%llx start:0x%010llx end:0x%010llx," - " flags:0x%llx, incr:%llu, dst:\n%s%s", __entry->pid, + " flags:0x%llx, incr:%llu, dst:\n%s", __entry->pid, __entry->vm_ctx, __entry->start, __entry->end, __entry->flags, __entry->incr, __print_array( - __get_dynamic_array(dst), min(__entry->nptes, 32u), 8), - __entry->nptes > 32 ? "..." : "") + __get_dynamic_array(dst), __entry->nptes, 8)) ); TRACE_EVENT(amdgpu_vm_set_ptes, @@ -537,6 +536,22 @@ TRACE_EVENT(amdgpu_ib_pipe_sync, __entry->seqno) ); +TRACE_EVENT(amdgpu_reset_reg_dumps, + TP_PROTO(uint32_t address, uint32_t value), + TP_ARGS(address, value), + TP_STRUCT__entry( + __field(uint32_t, address) + __field(uint32_t, value) + ), + TP_fast_assign( + __entry->address = address; + __entry->value = value; + ), + TP_printk("amdgpu register dump 0x%x: 0x%x", + __entry->address, + __entry->value) +); + #undef AMDGPU_JOB_GET_TIMELINE_NAME #endif diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_trace_points.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_trace_points.c index 57c6c39ba064..b96d885f6e33 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_trace_points.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_trace_points.c @@ -23,6 +23,7 @@ */ #include <drm/amdgpu_drm.h> +#include "amdgpu_cs.h" #include "amdgpu.h" #define CREATE_TRACE_POINTS diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c index 4655702a5e00..ec26edd4f4d8 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c @@ -50,6 +50,7 @@ #include <drm/ttm/ttm_range_manager.h> #include <drm/amdgpu_drm.h> +#include <drm/drm_drv.h> #include "amdgpu.h" #include "amdgpu_object.h" @@ -170,10 +171,10 @@ static void amdgpu_evict_flags(struct ttm_buffer_object *bo, * @bo: buffer object to map * @mem: memory object to map * @mm_cur: range to map - * @num_pages: number of pages to map * @window: which GART window to use * @ring: DMA ring to use for the copy * @tmz: if we should setup a TMZ enabled mapping + * @size: in number of bytes to map, out number of bytes mapped * @addr: resulting address inside the MC address space * * Setup one of the GART windows to access a specific piece of memory or return @@ -182,15 +183,14 @@ static void amdgpu_evict_flags(struct ttm_buffer_object *bo, static int amdgpu_ttm_map_buffer(struct ttm_buffer_object *bo, struct ttm_resource *mem, struct amdgpu_res_cursor *mm_cur, - unsigned num_pages, unsigned window, - struct amdgpu_ring *ring, bool tmz, - uint64_t *addr) + unsigned window, struct amdgpu_ring *ring, + bool tmz, uint64_t *size, uint64_t *addr) { struct amdgpu_device *adev = ring->adev; - struct amdgpu_job *job; - unsigned num_dw, num_bytes; - struct dma_fence *fence; + unsigned offset, num_pages, num_dw, num_bytes; uint64_t src_addr, dst_addr; + struct dma_fence *fence; + struct amdgpu_job *job; void *cpu_addr; uint64_t flags; unsigned int i; @@ -198,7 +198,9 @@ static int amdgpu_ttm_map_buffer(struct ttm_buffer_object *bo, BUG_ON(adev->mman.buffer_funcs->copy_max_bytes < AMDGPU_GTT_MAX_TRANSFER_SIZE * 8); - BUG_ON(mem->mem_type == AMDGPU_PL_PREEMPT); + + if (WARN_ON(mem->mem_type == AMDGPU_PL_PREEMPT)) + return -EINVAL; /* Map only what can't be accessed directly */ if (!tmz && mem->start != AMDGPU_BO_INVALID_OFFSET) { @@ -207,10 +209,22 @@ static int amdgpu_ttm_map_buffer(struct ttm_buffer_object *bo, return 0; } + + /* + * If start begins at an offset inside the page, then adjust the size + * and addr accordingly + */ + offset = mm_cur->start & ~PAGE_MASK; + + num_pages = PFN_UP(*size + offset); + num_pages = min_t(uint32_t, num_pages, AMDGPU_GTT_MAX_TRANSFER_SIZE); + + *size = min(*size, (uint64_t)num_pages * PAGE_SIZE - offset); + *addr = adev->gmc.gart_start; *addr += (u64)window * AMDGPU_GTT_MAX_TRANSFER_SIZE * AMDGPU_GPU_PAGE_SIZE; - *addr += mm_cur->start & ~PAGE_MASK; + *addr += offset; num_dw = ALIGN(adev->mman.buffer_funcs->copy_num_dw, 8); num_bytes = num_pages * 8 * AMDGPU_GPU_PAGES_IN_CPU_PAGE; @@ -241,10 +255,7 @@ static int amdgpu_ttm_map_buffer(struct ttm_buffer_object *bo, dma_addr_t *dma_addr; dma_addr = &bo->ttm->dma_address[mm_cur->start >> PAGE_SHIFT]; - r = amdgpu_gart_map(adev, 0, num_pages, dma_addr, flags, - cpu_addr); - if (r) - goto error_free; + amdgpu_gart_map(adev, 0, num_pages, dma_addr, flags, cpu_addr); } else { dma_addr_t dma_address; @@ -252,11 +263,8 @@ static int amdgpu_ttm_map_buffer(struct ttm_buffer_object *bo, dma_address += adev->vm_manager.vram_base_offset; for (i = 0; i < num_pages; ++i) { - r = amdgpu_gart_map(adev, i << PAGE_SHIFT, 1, - &dma_address, flags, cpu_addr); - if (r) - goto error_free; - + amdgpu_gart_map(adev, i << PAGE_SHIFT, 1, &dma_address, + flags, cpu_addr); dma_address += PAGE_SIZE; } } @@ -297,9 +305,6 @@ int amdgpu_ttm_copy_mem_to_mem(struct amdgpu_device *adev, struct dma_resv *resv, struct dma_fence **f) { - const uint32_t GTT_MAX_BYTES = (AMDGPU_GTT_MAX_TRANSFER_SIZE * - AMDGPU_GPU_PAGE_SIZE); - struct amdgpu_ring *ring = adev->mman.buffer_funcs_ring; struct amdgpu_res_cursor src_mm, dst_mm; struct dma_fence *fence = NULL; @@ -315,29 +320,20 @@ int amdgpu_ttm_copy_mem_to_mem(struct amdgpu_device *adev, mutex_lock(&adev->mman.gtt_window_lock); while (src_mm.remaining) { - uint32_t src_page_offset = src_mm.start & ~PAGE_MASK; - uint32_t dst_page_offset = dst_mm.start & ~PAGE_MASK; + uint64_t from, to, cur_size; struct dma_fence *next; - uint32_t cur_size; - uint64_t from, to; - /* Copy size cannot exceed GTT_MAX_BYTES. So if src or dst - * begins at an offset, then adjust the size accordingly - */ - cur_size = max(src_page_offset, dst_page_offset); - cur_size = min(min3(src_mm.size, dst_mm.size, size), - (uint64_t)(GTT_MAX_BYTES - cur_size)); + /* Never copy more than 256MiB at once to avoid a timeout */ + cur_size = min3(src_mm.size, dst_mm.size, 256ULL << 20); /* Map src to window 0 and dst to window 1. */ r = amdgpu_ttm_map_buffer(src->bo, src->mem, &src_mm, - PFN_UP(cur_size + src_page_offset), - 0, ring, tmz, &from); + 0, ring, tmz, &cur_size, &from); if (r) goto error; r = amdgpu_ttm_map_buffer(dst->bo, dst->mem, &dst_mm, - PFN_UP(cur_size + dst_page_offset), - 1, ring, tmz, &to); + 1, ring, tmz, &cur_size, &to); if (r) goto error; @@ -396,8 +392,7 @@ static int amdgpu_move_blit(struct ttm_buffer_object *bo, (abo->flags & AMDGPU_GEM_CREATE_VRAM_WIPE_ON_RELEASE)) { struct dma_fence *wipe_fence = NULL; - r = amdgpu_fill_buffer(ttm_to_amdgpu_bo(bo), AMDGPU_POISON, - NULL, &wipe_fence); + r = amdgpu_fill_buffer(abo, AMDGPU_POISON, NULL, &wipe_fence); if (r) { goto error; } else if (wipe_fence) { @@ -821,14 +816,13 @@ static void amdgpu_ttm_tt_unpin_userptr(struct ttm_device *bdev, #endif } -static int amdgpu_ttm_gart_bind(struct amdgpu_device *adev, - struct ttm_buffer_object *tbo, - uint64_t flags) +static void amdgpu_ttm_gart_bind(struct amdgpu_device *adev, + struct ttm_buffer_object *tbo, + uint64_t flags) { struct amdgpu_bo *abo = ttm_to_amdgpu_bo(tbo); struct ttm_tt *ttm = tbo->ttm; struct amdgpu_ttm_tt *gtt = (void *)ttm; - int r; if (amdgpu_bo_encrypted(abo)) flags |= AMDGPU_PTE_TMZ; @@ -836,10 +830,8 @@ static int amdgpu_ttm_gart_bind(struct amdgpu_device *adev, if (abo->flags & AMDGPU_GEM_CREATE_CP_MQD_GFX9) { uint64_t page_idx = 1; - r = amdgpu_gart_bind(adev, gtt->offset, page_idx, - gtt->ttm.dma_address, flags); - if (r) - goto gart_bind_fail; + amdgpu_gart_bind(adev, gtt->offset, page_idx, + gtt->ttm.dma_address, flags); /* The memory type of the first page defaults to UC. Now * modify the memory type to NC from the second page of @@ -848,21 +840,13 @@ static int amdgpu_ttm_gart_bind(struct amdgpu_device *adev, flags &= ~AMDGPU_PTE_MTYPE_VG10_MASK; flags |= AMDGPU_PTE_MTYPE_VG10(AMDGPU_MTYPE_NC); - r = amdgpu_gart_bind(adev, - gtt->offset + (page_idx << PAGE_SHIFT), - ttm->num_pages - page_idx, - &(gtt->ttm.dma_address[page_idx]), flags); + amdgpu_gart_bind(adev, gtt->offset + (page_idx << PAGE_SHIFT), + ttm->num_pages - page_idx, + &(gtt->ttm.dma_address[page_idx]), flags); } else { - r = amdgpu_gart_bind(adev, gtt->offset, ttm->num_pages, - gtt->ttm.dma_address, flags); + amdgpu_gart_bind(adev, gtt->offset, ttm->num_pages, + gtt->ttm.dma_address, flags); } - -gart_bind_fail: - if (r) - DRM_ERROR("failed to bind %u pages at 0x%08llX\n", - ttm->num_pages, gtt->offset); - - return r; } /* @@ -878,7 +862,7 @@ static int amdgpu_ttm_backend_bind(struct ttm_device *bdev, struct amdgpu_device *adev = amdgpu_ttm_adev(bdev); struct amdgpu_ttm_tt *gtt = (void*)ttm; uint64_t flags; - int r = 0; + int r; if (!bo_mem) return -EINVAL; @@ -925,14 +909,10 @@ static int amdgpu_ttm_backend_bind(struct ttm_device *bdev, /* bind pages into GART page tables */ gtt->offset = (u64)bo_mem->start << PAGE_SHIFT; - r = amdgpu_gart_bind(adev, gtt->offset, ttm->num_pages, - gtt->ttm.dma_address, flags); - - if (r) - DRM_ERROR("failed to bind %u pages at 0x%08llX\n", - ttm->num_pages, gtt->offset); + amdgpu_gart_bind(adev, gtt->offset, ttm->num_pages, + gtt->ttm.dma_address, flags); gtt->bound = true; - return r; + return 0; } /* @@ -982,12 +962,7 @@ int amdgpu_ttm_alloc_gart(struct ttm_buffer_object *bo) /* Bind pages */ gtt->offset = (u64)tmp->start << PAGE_SHIFT; - r = amdgpu_ttm_gart_bind(adev, bo, flags); - if (unlikely(r)) { - ttm_resource_free(bo, &tmp); - return r; - } - + amdgpu_ttm_gart_bind(adev, bo, flags); amdgpu_gart_invalidate_tlb(adev); ttm_resource_free(bo, &bo->resource); ttm_bo_assign_mem(bo, tmp); @@ -1001,19 +976,16 @@ int amdgpu_ttm_alloc_gart(struct ttm_buffer_object *bo) * Called by amdgpu_gtt_mgr_recover() from amdgpu_device_reset() to * rebind GTT pages during a GPU reset. */ -int amdgpu_ttm_recover_gart(struct ttm_buffer_object *tbo) +void amdgpu_ttm_recover_gart(struct ttm_buffer_object *tbo) { struct amdgpu_device *adev = amdgpu_ttm_adev(tbo->bdev); uint64_t flags; - int r; if (!tbo->ttm) - return 0; + return; flags = amdgpu_ttm_tt_pte_flags(adev, tbo->ttm, tbo->resource); - r = amdgpu_ttm_gart_bind(adev, tbo, flags); - - return r; + amdgpu_ttm_gart_bind(adev, tbo, flags); } /* @@ -1027,7 +999,6 @@ static void amdgpu_ttm_backend_unbind(struct ttm_device *bdev, { struct amdgpu_device *adev = amdgpu_ttm_adev(bdev); struct amdgpu_ttm_tt *gtt = (void *)ttm; - int r; /* if the pages have userptr pinning then clear that first */ if (gtt->userptr) { @@ -1047,10 +1018,7 @@ static void amdgpu_ttm_backend_unbind(struct ttm_device *bdev, return; /* unbind shouldn't be done for GDS/GWS/OA in ttm_bo_clean_mm */ - r = amdgpu_gart_unbind(adev, gtt->offset, ttm->num_pages); - if (r) - DRM_ERROR("failed to unbind %u pages at 0x%08llX\n", - gtt->ttm.num_pages, gtt->offset); + amdgpu_gart_unbind(adev, gtt->offset, ttm->num_pages); gtt->bound = false; } @@ -1169,6 +1137,26 @@ static void amdgpu_ttm_tt_unpopulate(struct ttm_device *bdev, } /** + * amdgpu_ttm_tt_get_userptr - Return the userptr GTT ttm_tt for the current + * task + * + * @tbo: The ttm_buffer_object that contains the userptr + * @user_addr: The returned value + */ +int amdgpu_ttm_tt_get_userptr(const struct ttm_buffer_object *tbo, + uint64_t *user_addr) +{ + struct amdgpu_ttm_tt *gtt; + + if (!tbo->ttm) + return -EINVAL; + + gtt = (void *)tbo->ttm; + *user_addr = gtt->userptr; + return 0; +} + +/** * amdgpu_ttm_tt_set_userptr - Initialize userptr GTT ttm_tt for the current * task * @@ -1356,7 +1344,8 @@ static bool amdgpu_ttm_bo_eviction_valuable(struct ttm_buffer_object *bo, * If true, then return false as any KFD process needs all its BOs to * be resident to run successfully */ - dma_resv_for_each_fence(&resv_cursor, bo->base.resv, true, f) { + dma_resv_for_each_fence(&resv_cursor, bo->base.resv, + DMA_RESV_USAGE_BOOKKEEP, f) { if (amdkfd_fence_check_mm(f, current->mm)) return false; } @@ -1433,6 +1422,63 @@ static void amdgpu_ttm_vram_mm_access(struct amdgpu_device *adev, loff_t pos, } } +static int amdgpu_ttm_access_memory_sdma(struct ttm_buffer_object *bo, + unsigned long offset, void *buf, int len, int write) +{ + struct amdgpu_bo *abo = ttm_to_amdgpu_bo(bo); + struct amdgpu_device *adev = amdgpu_ttm_adev(abo->tbo.bdev); + struct amdgpu_res_cursor src_mm; + struct amdgpu_job *job; + struct dma_fence *fence; + uint64_t src_addr, dst_addr; + unsigned int num_dw; + int r, idx; + + if (len != PAGE_SIZE) + return -EINVAL; + + if (!adev->mman.sdma_access_ptr) + return -EACCES; + + if (!drm_dev_enter(adev_to_drm(adev), &idx)) + return -ENODEV; + + if (write) + memcpy(adev->mman.sdma_access_ptr, buf, len); + + num_dw = ALIGN(adev->mman.buffer_funcs->copy_num_dw, 8); + r = amdgpu_job_alloc_with_ib(adev, num_dw * 4, AMDGPU_IB_POOL_DELAYED, &job); + if (r) + goto out; + + amdgpu_res_first(abo->tbo.resource, offset, len, &src_mm); + src_addr = amdgpu_ttm_domain_start(adev, bo->resource->mem_type) + src_mm.start; + dst_addr = amdgpu_bo_gpu_offset(adev->mman.sdma_access_bo); + if (write) + swap(src_addr, dst_addr); + + amdgpu_emit_copy_buffer(adev, &job->ibs[0], src_addr, dst_addr, PAGE_SIZE, false); + + amdgpu_ring_pad_ib(adev->mman.buffer_funcs_ring, &job->ibs[0]); + WARN_ON(job->ibs[0].length_dw > num_dw); + + r = amdgpu_job_submit(job, &adev->mman.entity, AMDGPU_FENCE_OWNER_UNDEFINED, &fence); + if (r) { + amdgpu_job_free(job); + goto out; + } + + if (!dma_fence_wait_timeout(fence, false, adev->sdma_timeout)) + r = -ETIMEDOUT; + dma_fence_put(fence); + + if (!(r || write)) + memcpy(buf, adev->mman.sdma_access_ptr, len); +out: + drm_dev_exit(idx); + return r; +} + /** * amdgpu_ttm_access_memory - Read or Write memory that backs a buffer object. * @@ -1457,6 +1503,10 @@ static int amdgpu_ttm_access_memory(struct ttm_buffer_object *bo, if (bo->resource->mem_type != TTM_PL_VRAM) return -EIO; + if (amdgpu_device_has_timeouts_enabled(adev) && + !amdgpu_ttm_access_memory_sdma(bo, offset, buf, len, write)) + return len; + amdgpu_res_first(bo->resource, offset, len, &cursor); while (cursor.remaining) { size_t count, size = cursor.size; @@ -1498,7 +1548,6 @@ static struct ttm_device_funcs amdgpu_bo_driver = { .io_mem_reserve = &amdgpu_ttm_io_mem_reserve, .io_mem_pfn = amdgpu_ttm_io_mem_pfn, .access_memory = &amdgpu_ttm_access_memory, - .del_from_lru_notify = &amdgpu_vm_del_from_lru_notify }; /* @@ -1797,6 +1846,12 @@ int amdgpu_ttm_init(struct amdgpu_device *adev) return r; } + if (amdgpu_bo_create_kernel(adev, PAGE_SIZE, PAGE_SIZE, + AMDGPU_GEM_DOMAIN_GTT, + &adev->mman.sdma_access_bo, NULL, + &adev->mman.sdma_access_ptr)) + DRM_WARN("Debug VRAM access will use slowpath MM access\n"); + return 0; } @@ -1818,6 +1873,8 @@ void amdgpu_ttm_fini(struct amdgpu_device *adev) if (adev->mman.stolen_reserved_size) amdgpu_bo_free_kernel(&adev->mman.stolen_reserved_memory, NULL, NULL); + amdgpu_bo_free_kernel(&adev->mman.sdma_access_bo, NULL, + &adev->mman.sdma_access_ptr); amdgpu_ttm_fw_reserve_vram_fini(adev); if (drm_dev_enter(adev_to_drm(adev), &idx)) { @@ -1884,23 +1941,55 @@ void amdgpu_ttm_set_buffer_funcs_status(struct amdgpu_device *adev, bool enable) size = adev->gmc.real_vram_size; else size = adev->gmc.visible_vram_size; - man->size = size >> PAGE_SHIFT; + man->size = size; adev->mman.buffer_funcs_enabled = enable; } +static int amdgpu_ttm_prepare_job(struct amdgpu_device *adev, + bool direct_submit, + unsigned int num_dw, + struct dma_resv *resv, + bool vm_needs_flush, + struct amdgpu_job **job) +{ + enum amdgpu_ib_pool_type pool = direct_submit ? + AMDGPU_IB_POOL_DIRECT : + AMDGPU_IB_POOL_DELAYED; + int r; + + r = amdgpu_job_alloc_with_ib(adev, num_dw * 4, pool, job); + if (r) + return r; + + if (vm_needs_flush) { + (*job)->vm_pd_addr = amdgpu_gmc_pd_addr(adev->gmc.pdb0_bo ? + adev->gmc.pdb0_bo : + adev->gart.bo); + (*job)->vm_needs_flush = true; + } + if (resv) { + r = amdgpu_sync_resv(adev, &(*job)->sync, resv, + AMDGPU_SYNC_ALWAYS, + AMDGPU_FENCE_OWNER_UNDEFINED); + if (r) { + DRM_ERROR("sync failed (%d).\n", r); + amdgpu_job_free(*job); + return r; + } + } + return 0; +} + int amdgpu_copy_buffer(struct amdgpu_ring *ring, uint64_t src_offset, uint64_t dst_offset, uint32_t byte_count, struct dma_resv *resv, struct dma_fence **fence, bool direct_submit, bool vm_needs_flush, bool tmz) { - enum amdgpu_ib_pool_type pool = direct_submit ? AMDGPU_IB_POOL_DIRECT : - AMDGPU_IB_POOL_DELAYED; struct amdgpu_device *adev = ring->adev; + unsigned num_loops, num_dw; struct amdgpu_job *job; - uint32_t max_bytes; - unsigned num_loops, num_dw; unsigned i; int r; @@ -1912,26 +2001,11 @@ int amdgpu_copy_buffer(struct amdgpu_ring *ring, uint64_t src_offset, max_bytes = adev->mman.buffer_funcs->copy_max_bytes; num_loops = DIV_ROUND_UP(byte_count, max_bytes); num_dw = ALIGN(num_loops * adev->mman.buffer_funcs->copy_num_dw, 8); - - r = amdgpu_job_alloc_with_ib(adev, num_dw * 4, pool, &job); + r = amdgpu_ttm_prepare_job(adev, direct_submit, num_dw, + resv, vm_needs_flush, &job); if (r) return r; - if (vm_needs_flush) { - job->vm_pd_addr = amdgpu_gmc_pd_addr(adev->gmc.pdb0_bo ? - adev->gmc.pdb0_bo : adev->gart.bo); - job->vm_needs_flush = true; - } - if (resv) { - r = amdgpu_sync_resv(adev, &job->sync, resv, - AMDGPU_SYNC_ALWAYS, - AMDGPU_FENCE_OWNER_UNDEFINED); - if (r) { - DRM_ERROR("sync failed (%d).\n", r); - goto error_free; - } - } - for (i = 0; i < num_loops; i++) { uint32_t cur_size_in_bytes = min(byte_count, max_bytes); @@ -1961,77 +2035,35 @@ error_free: return r; } -int amdgpu_fill_buffer(struct amdgpu_bo *bo, - uint32_t src_data, - struct dma_resv *resv, - struct dma_fence **fence) +static int amdgpu_ttm_fill_mem(struct amdgpu_ring *ring, uint32_t src_data, + uint64_t dst_addr, uint32_t byte_count, + struct dma_resv *resv, + struct dma_fence **fence, + bool vm_needs_flush) { - struct amdgpu_device *adev = amdgpu_ttm_adev(bo->tbo.bdev); - uint32_t max_bytes = adev->mman.buffer_funcs->fill_max_bytes; - struct amdgpu_ring *ring = adev->mman.buffer_funcs_ring; - - struct amdgpu_res_cursor cursor; + struct amdgpu_device *adev = ring->adev; unsigned int num_loops, num_dw; - uint64_t num_bytes; - struct amdgpu_job *job; + uint32_t max_bytes; + unsigned int i; int r; - if (!adev->mman.buffer_funcs_enabled) { - DRM_ERROR("Trying to clear memory with ring turned off.\n"); - return -EINVAL; - } - - if (bo->tbo.resource->mem_type == AMDGPU_PL_PREEMPT) { - DRM_ERROR("Trying to clear preemptible memory.\n"); - return -EINVAL; - } - - if (bo->tbo.resource->mem_type == TTM_PL_TT) { - r = amdgpu_ttm_alloc_gart(&bo->tbo); - if (r) - return r; - } - - num_bytes = bo->tbo.resource->num_pages << PAGE_SHIFT; - num_loops = 0; - - amdgpu_res_first(bo->tbo.resource, 0, num_bytes, &cursor); - while (cursor.remaining) { - num_loops += DIV_ROUND_UP_ULL(cursor.size, max_bytes); - amdgpu_res_next(&cursor, cursor.size); - } - num_dw = num_loops * adev->mman.buffer_funcs->fill_num_dw; - - /* for IB padding */ - num_dw += 64; - - r = amdgpu_job_alloc_with_ib(adev, num_dw * 4, AMDGPU_IB_POOL_DELAYED, - &job); + max_bytes = adev->mman.buffer_funcs->fill_max_bytes; + num_loops = DIV_ROUND_UP_ULL(byte_count, max_bytes); + num_dw = ALIGN(num_loops * adev->mman.buffer_funcs->fill_num_dw, 8); + r = amdgpu_ttm_prepare_job(adev, false, num_dw, resv, vm_needs_flush, + &job); if (r) return r; - if (resv) { - r = amdgpu_sync_resv(adev, &job->sync, resv, - AMDGPU_SYNC_ALWAYS, - AMDGPU_FENCE_OWNER_UNDEFINED); - if (r) { - DRM_ERROR("sync failed (%d).\n", r); - goto error_free; - } - } - - amdgpu_res_first(bo->tbo.resource, 0, num_bytes, &cursor); - while (cursor.remaining) { - uint32_t cur_size = min_t(uint64_t, cursor.size, max_bytes); - uint64_t dst_addr = cursor.start; + for (i = 0; i < num_loops; i++) { + uint32_t cur_size = min(byte_count, max_bytes); - dst_addr += amdgpu_ttm_domain_start(adev, - bo->tbo.resource->mem_type); amdgpu_emit_fill_buffer(adev, &job->ibs[0], src_data, dst_addr, cur_size); - amdgpu_res_next(&cursor, cur_size); + dst_addr += cur_size; + byte_count -= cur_size; } amdgpu_ring_pad_ib(ring, &job->ibs[0]); @@ -2048,6 +2080,55 @@ error_free: return r; } +int amdgpu_fill_buffer(struct amdgpu_bo *bo, + uint32_t src_data, + struct dma_resv *resv, + struct dma_fence **f) +{ + struct amdgpu_device *adev = amdgpu_ttm_adev(bo->tbo.bdev); + struct amdgpu_ring *ring = adev->mman.buffer_funcs_ring; + struct dma_fence *fence = NULL; + struct amdgpu_res_cursor dst; + int r; + + if (!adev->mman.buffer_funcs_enabled) { + DRM_ERROR("Trying to clear memory with ring turned off.\n"); + return -EINVAL; + } + + amdgpu_res_first(bo->tbo.resource, 0, amdgpu_bo_size(bo), &dst); + + mutex_lock(&adev->mman.gtt_window_lock); + while (dst.remaining) { + struct dma_fence *next; + uint64_t cur_size, to; + + /* Never fill more than 256MiB at once to avoid timeouts */ + cur_size = min(dst.size, 256ULL << 20); + + r = amdgpu_ttm_map_buffer(&bo->tbo, bo->tbo.resource, &dst, + 1, ring, false, &cur_size, &to); + if (r) + goto error; + + r = amdgpu_ttm_fill_mem(ring, src_data, to, cur_size, resv, + &next, true); + if (r) + goto error; + + dma_fence_put(fence); + fence = next; + + amdgpu_res_next(&dst, cur_size); + } +error: + mutex_unlock(&adev->mman.gtt_window_lock); + if (f) + *f = dma_fence_get(fence); + dma_fence_put(fence); + return r; +} + /** * amdgpu_ttm_evict_resources - evict memory buffers * @adev: amdgpu device object @@ -2080,17 +2161,6 @@ int amdgpu_ttm_evict_resources(struct amdgpu_device *adev, int mem_type) #if defined(CONFIG_DEBUG_FS) -static int amdgpu_mm_vram_table_show(struct seq_file *m, void *unused) -{ - struct amdgpu_device *adev = (struct amdgpu_device *)m->private; - struct ttm_resource_manager *man = ttm_manager_type(&adev->mman.bdev, - TTM_PL_VRAM); - struct drm_printer p = drm_seq_file_printer(m); - - man->func->debug(man, &p); - return 0; -} - static int amdgpu_ttm_page_pool_show(struct seq_file *m, void *unused) { struct amdgpu_device *adev = (struct amdgpu_device *)m->private; @@ -2098,55 +2168,6 @@ static int amdgpu_ttm_page_pool_show(struct seq_file *m, void *unused) return ttm_pool_debugfs(&adev->mman.bdev.pool, m); } -static int amdgpu_mm_tt_table_show(struct seq_file *m, void *unused) -{ - struct amdgpu_device *adev = (struct amdgpu_device *)m->private; - struct ttm_resource_manager *man = ttm_manager_type(&adev->mman.bdev, - TTM_PL_TT); - struct drm_printer p = drm_seq_file_printer(m); - - man->func->debug(man, &p); - return 0; -} - -static int amdgpu_mm_gds_table_show(struct seq_file *m, void *unused) -{ - struct amdgpu_device *adev = (struct amdgpu_device *)m->private; - struct ttm_resource_manager *man = ttm_manager_type(&adev->mman.bdev, - AMDGPU_PL_GDS); - struct drm_printer p = drm_seq_file_printer(m); - - man->func->debug(man, &p); - return 0; -} - -static int amdgpu_mm_gws_table_show(struct seq_file *m, void *unused) -{ - struct amdgpu_device *adev = (struct amdgpu_device *)m->private; - struct ttm_resource_manager *man = ttm_manager_type(&adev->mman.bdev, - AMDGPU_PL_GWS); - struct drm_printer p = drm_seq_file_printer(m); - - man->func->debug(man, &p); - return 0; -} - -static int amdgpu_mm_oa_table_show(struct seq_file *m, void *unused) -{ - struct amdgpu_device *adev = (struct amdgpu_device *)m->private; - struct ttm_resource_manager *man = ttm_manager_type(&adev->mman.bdev, - AMDGPU_PL_OA); - struct drm_printer p = drm_seq_file_printer(m); - - man->func->debug(man, &p); - return 0; -} - -DEFINE_SHOW_ATTRIBUTE(amdgpu_mm_vram_table); -DEFINE_SHOW_ATTRIBUTE(amdgpu_mm_tt_table); -DEFINE_SHOW_ATTRIBUTE(amdgpu_mm_gds_table); -DEFINE_SHOW_ATTRIBUTE(amdgpu_mm_gws_table); -DEFINE_SHOW_ATTRIBUTE(amdgpu_mm_oa_table); DEFINE_SHOW_ATTRIBUTE(amdgpu_ttm_page_pool); /* @@ -2356,17 +2377,23 @@ void amdgpu_ttm_debugfs_init(struct amdgpu_device *adev) &amdgpu_ttm_vram_fops, adev->gmc.mc_vram_size); debugfs_create_file("amdgpu_iomem", 0444, root, adev, &amdgpu_ttm_iomem_fops); - debugfs_create_file("amdgpu_vram_mm", 0444, root, adev, - &amdgpu_mm_vram_table_fops); - debugfs_create_file("amdgpu_gtt_mm", 0444, root, adev, - &amdgpu_mm_tt_table_fops); - debugfs_create_file("amdgpu_gds_mm", 0444, root, adev, - &amdgpu_mm_gds_table_fops); - debugfs_create_file("amdgpu_gws_mm", 0444, root, adev, - &amdgpu_mm_gws_table_fops); - debugfs_create_file("amdgpu_oa_mm", 0444, root, adev, - &amdgpu_mm_oa_table_fops); debugfs_create_file("ttm_page_pool", 0444, root, adev, &amdgpu_ttm_page_pool_fops); + ttm_resource_manager_create_debugfs(ttm_manager_type(&adev->mman.bdev, + TTM_PL_VRAM), + root, "amdgpu_vram_mm"); + ttm_resource_manager_create_debugfs(ttm_manager_type(&adev->mman.bdev, + TTM_PL_TT), + root, "amdgpu_gtt_mm"); + ttm_resource_manager_create_debugfs(ttm_manager_type(&adev->mman.bdev, + AMDGPU_PL_GDS), + root, "amdgpu_gds_mm"); + ttm_resource_manager_create_debugfs(ttm_manager_type(&adev->mman.bdev, + AMDGPU_PL_GWS), + root, "amdgpu_gws_mm"); + ttm_resource_manager_create_debugfs(ttm_manager_type(&adev->mman.bdev, + AMDGPU_PL_OA), + root, "amdgpu_oa_mm"); + #endif } diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h index f8f48be16d80..6a70818039dd 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h @@ -26,6 +26,7 @@ #include <linux/dma-direction.h> #include <drm/gpu_scheduler.h> +#include "amdgpu_vram_mgr.h" #include "amdgpu.h" #define AMDGPU_PL_GDS (TTM_PL_PRIV + 0) @@ -38,26 +39,10 @@ #define AMDGPU_POISON 0xd0bed0be -struct amdgpu_vram_mgr { - struct ttm_resource_manager manager; - struct drm_mm mm; - spinlock_t lock; - struct list_head reservations_pending; - struct list_head reserved_pages; - atomic64_t usage; - atomic64_t vis_usage; -}; - struct amdgpu_gtt_mgr { struct ttm_resource_manager manager; struct drm_mm mm; spinlock_t lock; - atomic64_t used; -}; - -struct amdgpu_preempt_mgr { - struct ttm_resource_manager manager; - atomic64_t used; }; struct amdgpu_mman { @@ -76,7 +61,7 @@ struct amdgpu_mman { struct amdgpu_vram_mgr vram_mgr; struct amdgpu_gtt_mgr gtt_mgr; - struct amdgpu_preempt_mgr preempt_mgr; + struct ttm_resource_manager preempt_mgr; uint64_t stolen_vga_size; struct amdgpu_bo *stolen_vga_memory; @@ -98,6 +83,10 @@ struct amdgpu_mman { u64 fw_vram_usage_size; struct amdgpu_bo *fw_vram_usage_reserved_bo; void *fw_vram_usage_va; + + /* PAGE_SIZE'd BO for process memory r/w over SDMA. */ + struct amdgpu_bo *sdma_access_bo; + void *sdma_access_ptr; }; struct amdgpu_copy_mem { @@ -114,8 +103,7 @@ int amdgpu_vram_mgr_init(struct amdgpu_device *adev); void amdgpu_vram_mgr_fini(struct amdgpu_device *adev); bool amdgpu_gtt_mgr_has_gart_addr(struct ttm_resource *mem); -uint64_t amdgpu_gtt_mgr_usage(struct amdgpu_gtt_mgr *mgr); -int amdgpu_gtt_mgr_recover(struct amdgpu_gtt_mgr *mgr); +void amdgpu_gtt_mgr_recover(struct amdgpu_gtt_mgr *mgr); uint64_t amdgpu_preempt_mgr_usage(struct ttm_resource_manager *man); @@ -129,7 +117,6 @@ int amdgpu_vram_mgr_alloc_sgt(struct amdgpu_device *adev, void amdgpu_vram_mgr_free_sgt(struct device *dev, enum dma_data_direction dir, struct sg_table *sgt); -uint64_t amdgpu_vram_mgr_usage(struct amdgpu_vram_mgr *mgr); uint64_t amdgpu_vram_mgr_vis_usage(struct amdgpu_vram_mgr *mgr); int amdgpu_vram_mgr_reserve_range(struct amdgpu_vram_mgr *mgr, uint64_t start, uint64_t size); @@ -158,7 +145,7 @@ int amdgpu_fill_buffer(struct amdgpu_bo *bo, struct dma_fence **fence); int amdgpu_ttm_alloc_gart(struct ttm_buffer_object *bo); -int amdgpu_ttm_recover_gart(struct ttm_buffer_object *tbo); +void amdgpu_ttm_recover_gart(struct ttm_buffer_object *tbo); uint64_t amdgpu_ttm_domain_start(struct amdgpu_device *adev, uint32_t type); #if IS_ENABLED(CONFIG_DRM_AMDGPU_USERPTR) @@ -177,6 +164,8 @@ static inline bool amdgpu_ttm_tt_get_user_pages_done(struct ttm_tt *ttm) #endif void amdgpu_ttm_tt_set_user_pages(struct ttm_tt *ttm, struct page **pages); +int amdgpu_ttm_tt_get_userptr(const struct ttm_buffer_object *tbo, + uint64_t *user_addr); int amdgpu_ttm_tt_set_userptr(struct ttm_buffer_object *bo, uint64_t addr, uint32_t flags); bool amdgpu_ttm_tt_has_userptr(struct ttm_tt *ttm); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.c index ca3350502618..016477fa2f90 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.c @@ -760,3 +760,36 @@ int amdgpu_ucode_init_bo(struct amdgpu_device *adev) } return 0; } + +void amdgpu_ucode_ip_version_decode(struct amdgpu_device *adev, int block_type, char *ucode_prefix, int len) +{ + int maj, min, rev; + char *ip_name; + uint32_t version = adev->ip_versions[block_type][0]; + + switch (block_type) { + case GC_HWIP: + ip_name = "gc"; + break; + case SDMA0_HWIP: + ip_name = "sdma"; + break; + case MP0_HWIP: + ip_name = "psp"; + break; + case MP1_HWIP: + ip_name = "smu"; + break; + case UVD_HWIP: + ip_name = "vcn"; + break; + default: + BUG(); + } + + maj = IP_VERSION_MAJ(version); + min = IP_VERSION_MIN(version); + rev = IP_VERSION_REV(version); + + snprintf(ucode_prefix, len, "%s_%d_%d_%d", ip_name, maj, min, rev); +} diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.h index 7c2538db3cd5..864984d0d3ef 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.h @@ -343,7 +343,8 @@ union amdgpu_firmware_header { * fw loading support */ enum AMDGPU_UCODE_ID { - AMDGPU_UCODE_ID_SDMA0 = 0, + AMDGPU_UCODE_ID_CAP = 0, + AMDGPU_UCODE_ID_SDMA0, AMDGPU_UCODE_ID_SDMA1, AMDGPU_UCODE_ID_SDMA2, AMDGPU_UCODE_ID_SDMA3, @@ -462,4 +463,6 @@ amdgpu_ucode_get_load_type(struct amdgpu_device *adev, int load_type); const char *amdgpu_ucode_name(enum AMDGPU_UCODE_ID ucode_id); +void amdgpu_ucode_ip_version_decode(struct amdgpu_device *adev, int block_type, char *ucode_prefix, int len); + #endif diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_umc.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_umc.c index 46264a4002f7..aad3c8b4c810 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_umc.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_umc.c @@ -21,7 +21,7 @@ * */ -#include "amdgpu_ras.h" +#include "amdgpu.h" static int amdgpu_umc_do_page_retirement(struct amdgpu_device *adev, void *ras_error_status, @@ -33,14 +33,14 @@ static int amdgpu_umc_do_page_retirement(struct amdgpu_device *adev, int ret = 0; kgd2kfd_set_sram_ecc_flag(adev->kfd.dev); - ret = smu_get_ecc_info(&adev->smu, (void *)&(con->umc_ecc)); + ret = amdgpu_dpm_get_ecc_info(adev, (void *)&(con->umc_ecc)); if (ret == -EOPNOTSUPP) { - if (adev->umc.ras_funcs && - adev->umc.ras_funcs->query_ras_error_count) - adev->umc.ras_funcs->query_ras_error_count(adev, ras_error_status); + if (adev->umc.ras && adev->umc.ras->ras_block.hw_ops && + adev->umc.ras->ras_block.hw_ops->query_ras_error_count) + adev->umc.ras->ras_block.hw_ops->query_ras_error_count(adev, ras_error_status); - if (adev->umc.ras_funcs && - adev->umc.ras_funcs->query_ras_error_address && + if (adev->umc.ras && adev->umc.ras->ras_block.hw_ops && + adev->umc.ras->ras_block.hw_ops->query_ras_error_address && adev->umc.max_ras_err_cnt_per_query) { err_data->err_addr = kcalloc(adev->umc.max_ras_err_cnt_per_query, @@ -56,15 +56,15 @@ static int amdgpu_umc_do_page_retirement(struct amdgpu_device *adev, /* umc query_ras_error_address is also responsible for clearing * error status */ - adev->umc.ras_funcs->query_ras_error_address(adev, ras_error_status); + adev->umc.ras->ras_block.hw_ops->query_ras_error_address(adev, ras_error_status); } } else if (!ret) { - if (adev->umc.ras_funcs && - adev->umc.ras_funcs->ecc_info_query_ras_error_count) - adev->umc.ras_funcs->ecc_info_query_ras_error_count(adev, ras_error_status); + if (adev->umc.ras && + adev->umc.ras->ecc_info_query_ras_error_count) + adev->umc.ras->ecc_info_query_ras_error_count(adev, ras_error_status); - if (adev->umc.ras_funcs && - adev->umc.ras_funcs->ecc_info_query_ras_error_address && + if (adev->umc.ras && + adev->umc.ras->ecc_info_query_ras_error_address && adev->umc.max_ras_err_cnt_per_query) { err_data->err_addr = kcalloc(adev->umc.max_ras_err_cnt_per_query, @@ -80,7 +80,7 @@ static int amdgpu_umc_do_page_retirement(struct amdgpu_device *adev, /* umc query_ras_error_address is also responsible for clearing * error status */ - adev->umc.ras_funcs->ecc_info_query_ras_error_address(adev, ras_error_status); + adev->umc.ras->ecc_info_query_ras_error_address(adev, ras_error_status); } } @@ -96,8 +96,12 @@ static int amdgpu_umc_do_page_retirement(struct amdgpu_device *adev, err_data->err_addr_cnt); amdgpu_ras_save_bad_pages(adev); - if (adev->smu.ppt_funcs && adev->smu.ppt_funcs->send_hbm_bad_pages_num) - adev->smu.ppt_funcs->send_hbm_bad_pages_num(&adev->smu, con->eeprom_control.ras_num_recs); + amdgpu_dpm_send_hbm_bad_pages_num(adev, con->eeprom_control.ras_num_recs); + + if (con->update_channel_flag == true) { + amdgpu_dpm_send_hbm_bad_channel_flag(adev, con->eeprom_control.bad_channel_bitmap); + con->update_channel_flag = false; + } } if (reset) @@ -130,78 +134,39 @@ int amdgpu_umc_poison_handler(struct amdgpu_device *adev, return ret; } -static int amdgpu_umc_process_ras_data_cb(struct amdgpu_device *adev, +int amdgpu_umc_process_ras_data_cb(struct amdgpu_device *adev, void *ras_error_status, struct amdgpu_iv_entry *entry) { return amdgpu_umc_do_page_retirement(adev, ras_error_status, entry, true); } -int amdgpu_umc_ras_late_init(struct amdgpu_device *adev) +int amdgpu_umc_ras_late_init(struct amdgpu_device *adev, struct ras_common_if *ras_block) { int r; - struct ras_fs_if fs_info = { - .sysfs_name = "umc_err_count", - }; - struct ras_ih_if ih_info = { - .cb = amdgpu_umc_process_ras_data_cb, - }; - if (!adev->umc.ras_if) { - adev->umc.ras_if = - kmalloc(sizeof(struct ras_common_if), GFP_KERNEL); - if (!adev->umc.ras_if) - return -ENOMEM; - adev->umc.ras_if->block = AMDGPU_RAS_BLOCK__UMC; - adev->umc.ras_if->type = AMDGPU_RAS_ERROR__MULTI_UNCORRECTABLE; - adev->umc.ras_if->sub_block_index = 0; - } - ih_info.head = fs_info.head = *adev->umc.ras_if; - - r = amdgpu_ras_late_init(adev, adev->umc.ras_if, - &fs_info, &ih_info); + r = amdgpu_ras_block_late_init(adev, ras_block); if (r) - goto free; + return r; - if (amdgpu_ras_is_supported(adev, adev->umc.ras_if->block)) { + if (amdgpu_ras_is_supported(adev, ras_block->block)) { r = amdgpu_irq_get(adev, &adev->gmc.ecc_irq, 0); if (r) goto late_fini; - } else { - r = 0; - goto free; } /* ras init of specific umc version */ - if (adev->umc.ras_funcs && - adev->umc.ras_funcs->err_cnt_init) - adev->umc.ras_funcs->err_cnt_init(adev); + if (adev->umc.ras && + adev->umc.ras->err_cnt_init) + adev->umc.ras->err_cnt_init(adev); return 0; late_fini: - amdgpu_ras_late_fini(adev, adev->umc.ras_if, &ih_info); -free: - kfree(adev->umc.ras_if); - adev->umc.ras_if = NULL; + amdgpu_ras_block_late_fini(adev, ras_block); return r; } -void amdgpu_umc_ras_fini(struct amdgpu_device *adev) -{ - if (amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__UMC) && - adev->umc.ras_if) { - struct ras_common_if *ras_if = adev->umc.ras_if; - struct ras_ih_if ih_info = { - .head = *ras_if, - .cb = amdgpu_umc_process_ras_data_cb, - }; - - amdgpu_ras_late_fini(adev, ras_if, &ih_info); - kfree(ras_if); - } -} - int amdgpu_umc_process_ecc_irq(struct amdgpu_device *adev, struct amdgpu_irq_src *source, struct amdgpu_iv_entry *entry) @@ -219,3 +184,24 @@ int amdgpu_umc_process_ecc_irq(struct amdgpu_device *adev, amdgpu_ras_interrupt_dispatch(adev, &ih_data); return 0; } + +void amdgpu_umc_fill_error_record(struct ras_err_data *err_data, + uint64_t err_addr, + uint64_t retired_page, + uint32_t channel_index, + uint32_t umc_inst) +{ + struct eeprom_table_record *err_rec = + &err_data->err_addr[err_data->err_addr_cnt]; + + err_rec->address = err_addr; + /* page frame address is saved */ + err_rec->retired_page = retired_page >> AMDGPU_GPU_PAGE_SHIFT; + err_rec->ts = (uint64_t)ktime_get_real_seconds(); + err_rec->err_type = AMDGPU_RAS_EEPROM_ERR_NON_RECOVERABLE; + err_rec->cu = 0; + err_rec->mem_channel = channel_index; + err_rec->mcumc_id = umc_inst; + + err_data->err_addr_cnt++; +} diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_umc.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_umc.h index b72194e8bfe5..2ec6698aa1fe 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_umc.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_umc.h @@ -20,6 +20,7 @@ */ #ifndef __AMDGPU_UMC_H__ #define __AMDGPU_UMC_H__ +#include "amdgpu_ras.h" /* * (addr / 256) * 4096, the higher 26 bits in ErrorAddr @@ -40,14 +41,9 @@ #define LOOP_UMC_CH_INST(ch_inst) for ((ch_inst) = 0; (ch_inst) < adev->umc.channel_inst_num; (ch_inst)++) #define LOOP_UMC_INST_AND_CH(umc_inst, ch_inst) LOOP_UMC_INST((umc_inst)) LOOP_UMC_CH_INST((ch_inst)) -struct amdgpu_umc_ras_funcs { +struct amdgpu_umc_ras { + struct amdgpu_ras_block_object ras_block; void (*err_cnt_init)(struct amdgpu_device *adev); - int (*ras_late_init)(struct amdgpu_device *adev); - void (*ras_fini)(struct amdgpu_device *adev); - void (*query_ras_error_count)(struct amdgpu_device *adev, - void *ras_error_status); - void (*query_ras_error_address)(struct amdgpu_device *adev, - void *ras_error_status); bool (*query_ras_poison_mode)(struct amdgpu_device *adev); void (*ecc_info_query_ras_error_count)(struct amdgpu_device *adev, void *ras_error_status); @@ -73,15 +69,23 @@ struct amdgpu_umc { struct ras_common_if *ras_if; const struct amdgpu_umc_funcs *funcs; - const struct amdgpu_umc_ras_funcs *ras_funcs; + struct amdgpu_umc_ras *ras; }; -int amdgpu_umc_ras_late_init(struct amdgpu_device *adev); -void amdgpu_umc_ras_fini(struct amdgpu_device *adev); +int amdgpu_umc_ras_late_init(struct amdgpu_device *adev, struct ras_common_if *ras_block); int amdgpu_umc_poison_handler(struct amdgpu_device *adev, void *ras_error_status, bool reset); int amdgpu_umc_process_ecc_irq(struct amdgpu_device *adev, struct amdgpu_irq_src *source, struct amdgpu_iv_entry *entry); +void amdgpu_umc_fill_error_record(struct ras_err_data *err_data, + uint64_t err_addr, + uint64_t retired_page, + uint32_t channel_index, + uint32_t umc_inst); + +int amdgpu_umc_process_ras_data_cb(struct amdgpu_device *adev, + void *ras_error_status, + struct amdgpu_iv_entry *entry); #endif diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c index 6f8de11a17f1..6eac649499d3 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c @@ -37,6 +37,7 @@ #include "amdgpu.h" #include "amdgpu_pm.h" #include "amdgpu_uvd.h" +#include "amdgpu_cs.h" #include "cikd.h" #include "uvd/uvd_4_2_d.h" @@ -98,7 +99,7 @@ struct amdgpu_uvd_cs_ctx { unsigned reg, count; unsigned data0, data1; unsigned idx; - unsigned ib_idx; + struct amdgpu_ib *ib; /* does the IB has a msg command */ bool has_msg_cmd; @@ -557,8 +558,8 @@ static u64 amdgpu_uvd_get_addr_from_ctx(struct amdgpu_uvd_cs_ctx *ctx) uint32_t lo, hi; uint64_t addr; - lo = amdgpu_get_ib_value(ctx->parser, ctx->ib_idx, ctx->data0); - hi = amdgpu_get_ib_value(ctx->parser, ctx->ib_idx, ctx->data1); + lo = amdgpu_ib_get_value(ctx->ib, ctx->data0); + hi = amdgpu_ib_get_value(ctx->ib, ctx->data1); addr = ((uint64_t)lo) | (((uint64_t)hi) << 32); return addr; @@ -589,7 +590,7 @@ static int amdgpu_uvd_cs_pass1(struct amdgpu_uvd_cs_ctx *ctx) if (!ctx->parser->adev->uvd.address_64_bit) { /* check if it's a message or feedback command */ - cmd = amdgpu_get_ib_value(ctx->parser, ctx->ib_idx, ctx->idx) >> 1; + cmd = amdgpu_ib_get_value(ctx->ib, ctx->idx) >> 1; if (cmd == 0x0 || cmd == 0x3) { /* yes, force it into VRAM */ uint32_t domain = AMDGPU_GEM_DOMAIN_VRAM; @@ -834,6 +835,7 @@ static int amdgpu_uvd_cs_msg(struct amdgpu_uvd_cs_ctx *ctx, handle = msg[2]; if (handle == 0) { + amdgpu_bo_kunmap(bo); DRM_ERROR("Invalid UVD handle!\n"); return -EINVAL; } @@ -892,6 +894,7 @@ static int amdgpu_uvd_cs_msg(struct amdgpu_uvd_cs_ctx *ctx, DRM_ERROR("Illegal UVD message type (%d)!\n", msg_type); } + amdgpu_bo_kunmap(bo); return -EINVAL; } @@ -925,12 +928,10 @@ static int amdgpu_uvd_cs_pass2(struct amdgpu_uvd_cs_ctx *ctx) addr -= mapping->start * AMDGPU_GPU_PAGE_SIZE; start += addr; - amdgpu_set_ib_value(ctx->parser, ctx->ib_idx, ctx->data0, - lower_32_bits(start)); - amdgpu_set_ib_value(ctx->parser, ctx->ib_idx, ctx->data1, - upper_32_bits(start)); + amdgpu_ib_set_value(ctx->ib, ctx->data0, lower_32_bits(start)); + amdgpu_ib_set_value(ctx->ib, ctx->data1, upper_32_bits(start)); - cmd = amdgpu_get_ib_value(ctx->parser, ctx->ib_idx, ctx->idx) >> 1; + cmd = amdgpu_ib_get_value(ctx->ib, ctx->idx) >> 1; if (cmd < 0x4) { if ((end - start) < ctx->buf_sizes[cmd]) { DRM_ERROR("buffer (%d) to small (%d / %d)!\n", cmd, @@ -990,14 +991,13 @@ static int amdgpu_uvd_cs_pass2(struct amdgpu_uvd_cs_ctx *ctx) static int amdgpu_uvd_cs_reg(struct amdgpu_uvd_cs_ctx *ctx, int (*cb)(struct amdgpu_uvd_cs_ctx *ctx)) { - struct amdgpu_ib *ib = &ctx->parser->job->ibs[ctx->ib_idx]; int i, r; ctx->idx++; for (i = 0; i <= ctx->count; ++i) { unsigned reg = ctx->reg + i; - if (ctx->idx >= ib->length_dw) { + if (ctx->idx >= ctx->ib->length_dw) { DRM_ERROR("Register command after end of CS!\n"); return -EINVAL; } @@ -1037,11 +1037,10 @@ static int amdgpu_uvd_cs_reg(struct amdgpu_uvd_cs_ctx *ctx, static int amdgpu_uvd_cs_packets(struct amdgpu_uvd_cs_ctx *ctx, int (*cb)(struct amdgpu_uvd_cs_ctx *ctx)) { - struct amdgpu_ib *ib = &ctx->parser->job->ibs[ctx->ib_idx]; int r; - for (ctx->idx = 0 ; ctx->idx < ib->length_dw; ) { - uint32_t cmd = amdgpu_get_ib_value(ctx->parser, ctx->ib_idx, ctx->idx); + for (ctx->idx = 0 ; ctx->idx < ctx->ib->length_dw; ) { + uint32_t cmd = amdgpu_ib_get_value(ctx->ib, ctx->idx); unsigned type = CP_PACKET_GET_TYPE(cmd); switch (type) { case PACKET_TYPE0: @@ -1066,11 +1065,14 @@ static int amdgpu_uvd_cs_packets(struct amdgpu_uvd_cs_ctx *ctx, * amdgpu_uvd_ring_parse_cs - UVD command submission parser * * @parser: Command submission parser context - * @ib_idx: Which indirect buffer to use + * @job: the job to parse + * @ib: the IB to patch * * Parse the command stream, patch in addresses as necessary. */ -int amdgpu_uvd_ring_parse_cs(struct amdgpu_cs_parser *parser, uint32_t ib_idx) +int amdgpu_uvd_ring_parse_cs(struct amdgpu_cs_parser *parser, + struct amdgpu_job *job, + struct amdgpu_ib *ib) { struct amdgpu_uvd_cs_ctx ctx = {}; unsigned buf_sizes[] = { @@ -1080,10 +1082,9 @@ int amdgpu_uvd_ring_parse_cs(struct amdgpu_cs_parser *parser, uint32_t ib_idx) [0x00000003] = 2048, [0x00000004] = 0xFFFFFFFF, }; - struct amdgpu_ib *ib = &parser->job->ibs[ib_idx]; int r; - parser->job->vm = NULL; + job->vm = NULL; ib->gpu_addr = amdgpu_sa_bo_gpu_addr(ib->sa_bo); if (ib->length_dw % 16) { @@ -1094,7 +1095,7 @@ int amdgpu_uvd_ring_parse_cs(struct amdgpu_cs_parser *parser, uint32_t ib_idx) ctx.parser = parser; ctx.buf_sizes = buf_sizes; - ctx.ib_idx = ib_idx; + ctx.ib = ib; /* first round only required on chips without UVD 64 bit address support */ if (!parser->adev->uvd.address_64_bit) { @@ -1162,7 +1163,8 @@ static int amdgpu_uvd_send_msg(struct amdgpu_ring *ring, struct amdgpu_bo *bo, ib->length_dw = 16; if (direct) { - r = dma_resv_wait_timeout(bo->tbo.base.resv, true, false, + r = dma_resv_wait_timeout(bo->tbo.base.resv, + DMA_RESV_USAGE_KERNEL, false, msecs_to_jiffies(10)); if (r == 0) r = -ETIMEDOUT; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.h index 76ac9699885d..9f89bb7cd60b 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.h @@ -82,7 +82,9 @@ int amdgpu_uvd_get_destroy_msg(struct amdgpu_ring *ring, uint32_t handle, bool direct, struct dma_fence **fence); void amdgpu_uvd_free_handles(struct amdgpu_device *adev, struct drm_file *filp); -int amdgpu_uvd_ring_parse_cs(struct amdgpu_cs_parser *parser, uint32_t ib_idx); +int amdgpu_uvd_ring_parse_cs(struct amdgpu_cs_parser *parser, + struct amdgpu_job *job, + struct amdgpu_ib *ib); void amdgpu_uvd_ring_begin_use(struct amdgpu_ring *ring); void amdgpu_uvd_ring_end_use(struct amdgpu_ring *ring); int amdgpu_uvd_ring_test_ib(struct amdgpu_ring *ring, long timeout); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vce.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vce.c index 344f711ad144..02cb3a12dd76 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vce.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vce.c @@ -34,6 +34,7 @@ #include "amdgpu.h" #include "amdgpu_pm.h" #include "amdgpu_vce.h" +#include "amdgpu_cs.h" #include "cikd.h" /* 1 second timeout */ @@ -587,8 +588,7 @@ err: /** * amdgpu_vce_validate_bo - make sure not to cross 4GB boundary * - * @p: parser context - * @ib_idx: indirect buffer to use + * @ib: indirect buffer to use * @lo: address of lower dword * @hi: address of higher dword * @size: minimum size @@ -596,8 +596,9 @@ err: * * Make sure that no BO cross a 4GB boundary. */ -static int amdgpu_vce_validate_bo(struct amdgpu_cs_parser *p, uint32_t ib_idx, - int lo, int hi, unsigned size, int32_t index) +static int amdgpu_vce_validate_bo(struct amdgpu_cs_parser *p, + struct amdgpu_ib *ib, int lo, int hi, + unsigned size, int32_t index) { int64_t offset = ((uint64_t)size) * ((int64_t)index); struct ttm_operation_ctx ctx = { false, false }; @@ -607,8 +608,8 @@ static int amdgpu_vce_validate_bo(struct amdgpu_cs_parser *p, uint32_t ib_idx, uint64_t addr; int r; - addr = ((uint64_t)amdgpu_get_ib_value(p, ib_idx, lo)) | - ((uint64_t)amdgpu_get_ib_value(p, ib_idx, hi)) << 32; + addr = ((uint64_t)amdgpu_ib_get_value(ib, lo)) | + ((uint64_t)amdgpu_ib_get_value(ib, hi)) << 32; if (index >= 0) { addr += offset; fpfn = PAGE_ALIGN(offset) >> PAGE_SHIFT; @@ -638,7 +639,7 @@ static int amdgpu_vce_validate_bo(struct amdgpu_cs_parser *p, uint32_t ib_idx, * amdgpu_vce_cs_reloc - command submission relocation * * @p: parser context - * @ib_idx: indirect buffer to use + * @ib: indirect buffer to use * @lo: address of lower dword * @hi: address of higher dword * @size: minimum size @@ -646,7 +647,7 @@ static int amdgpu_vce_validate_bo(struct amdgpu_cs_parser *p, uint32_t ib_idx, * * Patch relocation inside command stream with real buffer address */ -static int amdgpu_vce_cs_reloc(struct amdgpu_cs_parser *p, uint32_t ib_idx, +static int amdgpu_vce_cs_reloc(struct amdgpu_cs_parser *p, struct amdgpu_ib *ib, int lo, int hi, unsigned size, uint32_t index) { struct amdgpu_bo_va_mapping *mapping; @@ -657,8 +658,8 @@ static int amdgpu_vce_cs_reloc(struct amdgpu_cs_parser *p, uint32_t ib_idx, if (index == 0xffffffff) index = 0; - addr = ((uint64_t)amdgpu_get_ib_value(p, ib_idx, lo)) | - ((uint64_t)amdgpu_get_ib_value(p, ib_idx, hi)) << 32; + addr = ((uint64_t)amdgpu_ib_get_value(ib, lo)) | + ((uint64_t)amdgpu_ib_get_value(ib, hi)) << 32; addr += ((uint64_t)size) * ((uint64_t)index); r = amdgpu_cs_find_mapping(p, addr, &bo, &mapping); @@ -679,8 +680,8 @@ static int amdgpu_vce_cs_reloc(struct amdgpu_cs_parser *p, uint32_t ib_idx, addr += amdgpu_bo_gpu_offset(bo); addr -= ((uint64_t)size) * ((uint64_t)index); - amdgpu_set_ib_value(p, ib_idx, lo, lower_32_bits(addr)); - amdgpu_set_ib_value(p, ib_idx, hi, upper_32_bits(addr)); + amdgpu_ib_set_value(ib, lo, lower_32_bits(addr)); + amdgpu_ib_set_value(ib, hi, upper_32_bits(addr)); return 0; } @@ -729,11 +730,13 @@ static int amdgpu_vce_validate_handle(struct amdgpu_cs_parser *p, * amdgpu_vce_ring_parse_cs - parse and validate the command stream * * @p: parser context - * @ib_idx: indirect buffer to use + * @job: the job to parse + * @ib: the IB to patch */ -int amdgpu_vce_ring_parse_cs(struct amdgpu_cs_parser *p, uint32_t ib_idx) +int amdgpu_vce_ring_parse_cs(struct amdgpu_cs_parser *p, + struct amdgpu_job *job, + struct amdgpu_ib *ib) { - struct amdgpu_ib *ib = &p->job->ibs[ib_idx]; unsigned fb_idx = 0, bs_idx = 0; int session_idx = -1; uint32_t destroyed = 0; @@ -744,12 +747,12 @@ int amdgpu_vce_ring_parse_cs(struct amdgpu_cs_parser *p, uint32_t ib_idx) unsigned idx; int i, r = 0; - p->job->vm = NULL; + job->vm = NULL; ib->gpu_addr = amdgpu_sa_bo_gpu_addr(ib->sa_bo); for (idx = 0; idx < ib->length_dw;) { - uint32_t len = amdgpu_get_ib_value(p, ib_idx, idx); - uint32_t cmd = amdgpu_get_ib_value(p, ib_idx, idx + 1); + uint32_t len = amdgpu_ib_get_value(ib, idx); + uint32_t cmd = amdgpu_ib_get_value(ib, idx + 1); if ((len < 8) || (len & 3)) { DRM_ERROR("invalid VCE command length (%d)!\n", len); @@ -759,52 +762,52 @@ int amdgpu_vce_ring_parse_cs(struct amdgpu_cs_parser *p, uint32_t ib_idx) switch (cmd) { case 0x00000002: /* task info */ - fb_idx = amdgpu_get_ib_value(p, ib_idx, idx + 6); - bs_idx = amdgpu_get_ib_value(p, ib_idx, idx + 7); + fb_idx = amdgpu_ib_get_value(ib, idx + 6); + bs_idx = amdgpu_ib_get_value(ib, idx + 7); break; case 0x03000001: /* encode */ - r = amdgpu_vce_validate_bo(p, ib_idx, idx + 10, - idx + 9, 0, 0); + r = amdgpu_vce_validate_bo(p, ib, idx + 10, idx + 9, + 0, 0); if (r) goto out; - r = amdgpu_vce_validate_bo(p, ib_idx, idx + 12, - idx + 11, 0, 0); + r = amdgpu_vce_validate_bo(p, ib, idx + 12, idx + 11, + 0, 0); if (r) goto out; break; case 0x05000001: /* context buffer */ - r = amdgpu_vce_validate_bo(p, ib_idx, idx + 3, - idx + 2, 0, 0); + r = amdgpu_vce_validate_bo(p, ib, idx + 3, idx + 2, + 0, 0); if (r) goto out; break; case 0x05000004: /* video bitstream buffer */ - tmp = amdgpu_get_ib_value(p, ib_idx, idx + 4); - r = amdgpu_vce_validate_bo(p, ib_idx, idx + 3, idx + 2, + tmp = amdgpu_ib_get_value(ib, idx + 4); + r = amdgpu_vce_validate_bo(p, ib, idx + 3, idx + 2, tmp, bs_idx); if (r) goto out; break; case 0x05000005: /* feedback buffer */ - r = amdgpu_vce_validate_bo(p, ib_idx, idx + 3, idx + 2, + r = amdgpu_vce_validate_bo(p, ib, idx + 3, idx + 2, 4096, fb_idx); if (r) goto out; break; case 0x0500000d: /* MV buffer */ - r = amdgpu_vce_validate_bo(p, ib_idx, idx + 3, - idx + 2, 0, 0); + r = amdgpu_vce_validate_bo(p, ib, idx + 3, idx + 2, + 0, 0); if (r) goto out; - r = amdgpu_vce_validate_bo(p, ib_idx, idx + 8, - idx + 7, 0, 0); + r = amdgpu_vce_validate_bo(p, ib, idx + 8, idx + 7, + 0, 0); if (r) goto out; break; @@ -814,12 +817,12 @@ int amdgpu_vce_ring_parse_cs(struct amdgpu_cs_parser *p, uint32_t ib_idx) } for (idx = 0; idx < ib->length_dw;) { - uint32_t len = amdgpu_get_ib_value(p, ib_idx, idx); - uint32_t cmd = amdgpu_get_ib_value(p, ib_idx, idx + 1); + uint32_t len = amdgpu_ib_get_value(ib, idx); + uint32_t cmd = amdgpu_ib_get_value(ib, idx + 1); switch (cmd) { case 0x00000001: /* session */ - handle = amdgpu_get_ib_value(p, ib_idx, idx + 2); + handle = amdgpu_ib_get_value(ib, idx + 2); session_idx = amdgpu_vce_validate_handle(p, handle, &allocated); if (session_idx < 0) { @@ -830,8 +833,8 @@ int amdgpu_vce_ring_parse_cs(struct amdgpu_cs_parser *p, uint32_t ib_idx) break; case 0x00000002: /* task info */ - fb_idx = amdgpu_get_ib_value(p, ib_idx, idx + 6); - bs_idx = amdgpu_get_ib_value(p, ib_idx, idx + 7); + fb_idx = amdgpu_ib_get_value(ib, idx + 6); + bs_idx = amdgpu_ib_get_value(ib, idx + 7); break; case 0x01000001: /* create */ @@ -846,8 +849,8 @@ int amdgpu_vce_ring_parse_cs(struct amdgpu_cs_parser *p, uint32_t ib_idx) goto out; } - *size = amdgpu_get_ib_value(p, ib_idx, idx + 8) * - amdgpu_get_ib_value(p, ib_idx, idx + 10) * + *size = amdgpu_ib_get_value(ib, idx + 8) * + amdgpu_ib_get_value(ib, idx + 10) * 8 * 3 / 2; break; @@ -876,12 +879,12 @@ int amdgpu_vce_ring_parse_cs(struct amdgpu_cs_parser *p, uint32_t ib_idx) break; case 0x03000001: /* encode */ - r = amdgpu_vce_cs_reloc(p, ib_idx, idx + 10, idx + 9, + r = amdgpu_vce_cs_reloc(p, ib, idx + 10, idx + 9, *size, 0); if (r) goto out; - r = amdgpu_vce_cs_reloc(p, ib_idx, idx + 12, idx + 11, + r = amdgpu_vce_cs_reloc(p, ib, idx + 12, idx + 11, *size / 3, 0); if (r) goto out; @@ -892,35 +895,35 @@ int amdgpu_vce_ring_parse_cs(struct amdgpu_cs_parser *p, uint32_t ib_idx) break; case 0x05000001: /* context buffer */ - r = amdgpu_vce_cs_reloc(p, ib_idx, idx + 3, idx + 2, + r = amdgpu_vce_cs_reloc(p, ib, idx + 3, idx + 2, *size * 2, 0); if (r) goto out; break; case 0x05000004: /* video bitstream buffer */ - tmp = amdgpu_get_ib_value(p, ib_idx, idx + 4); - r = amdgpu_vce_cs_reloc(p, ib_idx, idx + 3, idx + 2, + tmp = amdgpu_ib_get_value(ib, idx + 4); + r = amdgpu_vce_cs_reloc(p, ib, idx + 3, idx + 2, tmp, bs_idx); if (r) goto out; break; case 0x05000005: /* feedback buffer */ - r = amdgpu_vce_cs_reloc(p, ib_idx, idx + 3, idx + 2, + r = amdgpu_vce_cs_reloc(p, ib, idx + 3, idx + 2, 4096, fb_idx); if (r) goto out; break; case 0x0500000d: /* MV buffer */ - r = amdgpu_vce_cs_reloc(p, ib_idx, idx + 3, - idx + 2, *size, 0); + r = amdgpu_vce_cs_reloc(p, ib, idx + 3, + idx + 2, *size, 0); if (r) goto out; - r = amdgpu_vce_cs_reloc(p, ib_idx, idx + 8, - idx + 7, *size / 12, 0); + r = amdgpu_vce_cs_reloc(p, ib, idx + 8, + idx + 7, *size / 12, 0); if (r) goto out; break; @@ -965,11 +968,13 @@ out: * amdgpu_vce_ring_parse_cs_vm - parse the command stream in VM mode * * @p: parser context - * @ib_idx: indirect buffer to use + * @job: the job to parse + * @ib: the IB to patch */ -int amdgpu_vce_ring_parse_cs_vm(struct amdgpu_cs_parser *p, uint32_t ib_idx) +int amdgpu_vce_ring_parse_cs_vm(struct amdgpu_cs_parser *p, + struct amdgpu_job *job, + struct amdgpu_ib *ib) { - struct amdgpu_ib *ib = &p->job->ibs[ib_idx]; int session_idx = -1; uint32_t destroyed = 0; uint32_t created = 0; @@ -978,8 +983,8 @@ int amdgpu_vce_ring_parse_cs_vm(struct amdgpu_cs_parser *p, uint32_t ib_idx) int i, r = 0, idx = 0; while (idx < ib->length_dw) { - uint32_t len = amdgpu_get_ib_value(p, ib_idx, idx); - uint32_t cmd = amdgpu_get_ib_value(p, ib_idx, idx + 1); + uint32_t len = amdgpu_ib_get_value(ib, idx); + uint32_t cmd = amdgpu_ib_get_value(ib, idx + 1); if ((len < 8) || (len & 3)) { DRM_ERROR("invalid VCE command length (%d)!\n", len); @@ -989,7 +994,7 @@ int amdgpu_vce_ring_parse_cs_vm(struct amdgpu_cs_parser *p, uint32_t ib_idx) switch (cmd) { case 0x00000001: /* session */ - handle = amdgpu_get_ib_value(p, ib_idx, idx + 2); + handle = amdgpu_ib_get_value(ib, idx + 2); session_idx = amdgpu_vce_validate_handle(p, handle, &allocated); if (session_idx < 0) { diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vce.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_vce.h index be4a6e773c5b..ea680fc9a6c3 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vce.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vce.h @@ -59,8 +59,11 @@ int amdgpu_vce_entity_init(struct amdgpu_device *adev); int amdgpu_vce_suspend(struct amdgpu_device *adev); int amdgpu_vce_resume(struct amdgpu_device *adev); void amdgpu_vce_free_handles(struct amdgpu_device *adev, struct drm_file *filp); -int amdgpu_vce_ring_parse_cs(struct amdgpu_cs_parser *p, uint32_t ib_idx); -int amdgpu_vce_ring_parse_cs_vm(struct amdgpu_cs_parser *p, uint32_t ib_idx); +int amdgpu_vce_ring_parse_cs(struct amdgpu_cs_parser *p, struct amdgpu_job *job, + struct amdgpu_ib *ib); +int amdgpu_vce_ring_parse_cs_vm(struct amdgpu_cs_parser *p, + struct amdgpu_job *job, + struct amdgpu_ib *ib); void amdgpu_vce_ring_emit_ib(struct amdgpu_ring *ring, struct amdgpu_job *job, struct amdgpu_ib *ib, uint32_t flags); void amdgpu_vce_ring_emit_fence(struct amdgpu_ring *ring, u64 addr, u64 seq, diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.c index 9a19a6a57b23..a0ee828a4a97 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.c @@ -27,6 +27,7 @@ #include <linux/firmware.h> #include <linux/module.h> #include <linux/pci.h> +#include <linux/debugfs.h> #include <drm/drm_drv.h> #include "amdgpu.h" @@ -51,6 +52,7 @@ #define FIRMWARE_ALDEBARAN "amdgpu/aldebaran_vcn.bin" #define FIRMWARE_BEIGE_GOBY "amdgpu/beige_goby_vcn.bin" #define FIRMWARE_YELLOW_CARP "amdgpu/yellow_carp_vcn.bin" +#define FIRMWARE_VCN_3_1_2 "amdgpu/vcn_3_1_2.bin" MODULE_FIRMWARE(FIRMWARE_RAVEN); MODULE_FIRMWARE(FIRMWARE_PICASSO); @@ -68,6 +70,7 @@ MODULE_FIRMWARE(FIRMWARE_VANGOGH); MODULE_FIRMWARE(FIRMWARE_DIMGREY_CAVEFISH); MODULE_FIRMWARE(FIRMWARE_BEIGE_GOBY); MODULE_FIRMWARE(FIRMWARE_YELLOW_CARP); +MODULE_FIRMWARE(FIRMWARE_VCN_3_1_2); static void amdgpu_vcn_idle_work_handler(struct work_struct *work); @@ -77,6 +80,7 @@ int amdgpu_vcn_sw_init(struct amdgpu_device *adev) const char *fw_name; const struct common_firmware_header *hdr; unsigned char fw_check; + unsigned int fw_shared_size, log_offset; int i, r; INIT_DELAYED_WORK(&adev->vcn.idle_work, amdgpu_vcn_idle_work_handler); @@ -165,6 +169,12 @@ int amdgpu_vcn_sw_init(struct amdgpu_device *adev) (adev->pg_flags & AMD_PG_SUPPORT_VCN_DPG)) adev->vcn.indirect_sram = true; break; + case IP_VERSION(3, 1, 2): + fw_name = FIRMWARE_VCN_3_1_2; + if ((adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) && + (adev->pg_flags & AMD_PG_SUPPORT_VCN_DPG)) + adev->vcn.indirect_sram = true; + break; default: return -EINVAL; } @@ -218,7 +228,12 @@ int amdgpu_vcn_sw_init(struct amdgpu_device *adev) bo_size = AMDGPU_VCN_STACK_SIZE + AMDGPU_VCN_CONTEXT_SIZE; if (adev->firmware.load_type != AMDGPU_FW_LOAD_PSP) bo_size += AMDGPU_GPU_PAGE_ALIGN(le32_to_cpu(hdr->ucode_size_bytes) + 8); - bo_size += AMDGPU_GPU_PAGE_ALIGN(sizeof(struct amdgpu_fw_shared)); + fw_shared_size = AMDGPU_GPU_PAGE_ALIGN(sizeof(struct amdgpu_fw_shared)); + log_offset = offsetof(struct amdgpu_fw_shared, fw_log); + bo_size += fw_shared_size; + + if (amdgpu_vcnfw_log) + bo_size += AMDGPU_VCNFW_LOG_SIZE; for (i = 0; i < adev->vcn.num_vcn_inst; i++) { if (adev->vcn.harvest_config & (1 << i)) @@ -232,10 +247,18 @@ int amdgpu_vcn_sw_init(struct amdgpu_device *adev) return r; } - adev->vcn.inst[i].fw_shared_cpu_addr = adev->vcn.inst[i].cpu_addr + - bo_size - AMDGPU_GPU_PAGE_ALIGN(sizeof(struct amdgpu_fw_shared)); - adev->vcn.inst[i].fw_shared_gpu_addr = adev->vcn.inst[i].gpu_addr + - bo_size - AMDGPU_GPU_PAGE_ALIGN(sizeof(struct amdgpu_fw_shared)); + adev->vcn.inst[i].fw_shared.cpu_addr = adev->vcn.inst[i].cpu_addr + + bo_size - fw_shared_size; + adev->vcn.inst[i].fw_shared.gpu_addr = adev->vcn.inst[i].gpu_addr + + bo_size - fw_shared_size; + + adev->vcn.inst[i].fw_shared.mem_size = fw_shared_size; + + if (amdgpu_vcnfw_log) { + adev->vcn.inst[i].fw_shared.cpu_addr -= AMDGPU_VCNFW_LOG_SIZE; + adev->vcn.inst[i].fw_shared.gpu_addr -= AMDGPU_VCNFW_LOG_SIZE; + adev->vcn.inst[i].fw_shared.log_offset = log_offset; + } if (adev->vcn.indirect_sram) { r = amdgpu_bo_create_kernel(adev, 64 * 2 * 4, PAGE_SIZE, @@ -971,3 +994,112 @@ void amdgpu_vcn_setup_ucode(struct amdgpu_device *adev) dev_info(adev->dev, "Will use PSP to load VCN firmware\n"); } } + +/* + * debugfs for mapping vcn firmware log buffer. + */ +#if defined(CONFIG_DEBUG_FS) +static ssize_t amdgpu_debugfs_vcn_fwlog_read(struct file *f, char __user *buf, + size_t size, loff_t *pos) +{ + struct amdgpu_vcn_inst *vcn; + void *log_buf; + volatile struct amdgpu_vcn_fwlog *plog; + unsigned int read_pos, write_pos, available, i, read_bytes = 0; + unsigned int read_num[2] = {0}; + + vcn = file_inode(f)->i_private; + if (!vcn) + return -ENODEV; + + if (!vcn->fw_shared.cpu_addr || !amdgpu_vcnfw_log) + return -EFAULT; + + log_buf = vcn->fw_shared.cpu_addr + vcn->fw_shared.mem_size; + + plog = (volatile struct amdgpu_vcn_fwlog *)log_buf; + read_pos = plog->rptr; + write_pos = plog->wptr; + + if (read_pos > AMDGPU_VCNFW_LOG_SIZE || write_pos > AMDGPU_VCNFW_LOG_SIZE) + return -EFAULT; + + if (!size || (read_pos == write_pos)) + return 0; + + if (write_pos > read_pos) { + available = write_pos - read_pos; + read_num[0] = min(size, (size_t)available); + } else { + read_num[0] = AMDGPU_VCNFW_LOG_SIZE - read_pos; + available = read_num[0] + write_pos - plog->header_size; + if (size > available) + read_num[1] = write_pos - plog->header_size; + else if (size > read_num[0]) + read_num[1] = size - read_num[0]; + else + read_num[0] = size; + } + + for (i = 0; i < 2; i++) { + if (read_num[i]) { + if (read_pos == AMDGPU_VCNFW_LOG_SIZE) + read_pos = plog->header_size; + if (read_num[i] == copy_to_user((buf + read_bytes), + (log_buf + read_pos), read_num[i])) + return -EFAULT; + + read_bytes += read_num[i]; + read_pos += read_num[i]; + } + } + + plog->rptr = read_pos; + *pos += read_bytes; + return read_bytes; +} + +static const struct file_operations amdgpu_debugfs_vcnfwlog_fops = { + .owner = THIS_MODULE, + .read = amdgpu_debugfs_vcn_fwlog_read, + .llseek = default_llseek +}; +#endif + +void amdgpu_debugfs_vcn_fwlog_init(struct amdgpu_device *adev, uint8_t i, + struct amdgpu_vcn_inst *vcn) +{ +#if defined(CONFIG_DEBUG_FS) + struct drm_minor *minor = adev_to_drm(adev)->primary; + struct dentry *root = minor->debugfs_root; + char name[32]; + + sprintf(name, "amdgpu_vcn_%d_fwlog", i); + debugfs_create_file_size(name, S_IFREG | S_IRUGO, root, vcn, + &amdgpu_debugfs_vcnfwlog_fops, + AMDGPU_VCNFW_LOG_SIZE); +#endif +} + +void amdgpu_vcn_fwlog_init(struct amdgpu_vcn_inst *vcn) +{ +#if defined(CONFIG_DEBUG_FS) + volatile uint32_t *flag = vcn->fw_shared.cpu_addr; + void *fw_log_cpu_addr = vcn->fw_shared.cpu_addr + vcn->fw_shared.mem_size; + uint64_t fw_log_gpu_addr = vcn->fw_shared.gpu_addr + vcn->fw_shared.mem_size; + volatile struct amdgpu_vcn_fwlog *log_buf = fw_log_cpu_addr; + volatile struct amdgpu_fw_shared_fw_logging *fw_log = vcn->fw_shared.cpu_addr + + vcn->fw_shared.log_offset; + *flag |= cpu_to_le32(AMDGPU_VCN_FW_LOGGING_FLAG); + fw_log->is_enabled = 1; + fw_log->addr_lo = cpu_to_le32(fw_log_gpu_addr & 0xFFFFFFFF); + fw_log->addr_hi = cpu_to_le32(fw_log_gpu_addr >> 32); + fw_log->size = cpu_to_le32(AMDGPU_VCNFW_LOG_SIZE); + + log_buf->header_size = sizeof(struct amdgpu_vcn_fwlog); + log_buf->buffer_size = AMDGPU_VCNFW_LOG_SIZE; + log_buf->rptr = log_buf->header_size; + log_buf->wptr = log_buf->header_size; + log_buf->wrapped = 0; +#endif +} diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.h index 5d3728b027d3..5f7da4c19822 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.h @@ -24,6 +24,8 @@ #ifndef __AMDGPU_VCN_H__ #define __AMDGPU_VCN_H__ +#include "amdgpu_ras.h" + #define AMDGPU_VCN_STACK_SIZE (128*1024) #define AMDGPU_VCN_CONTEXT_SIZE (512*1024) @@ -158,10 +160,17 @@ #define AMDGPU_VCN_FW_SHARED_FLAG_0_RB (1 << 6) #define AMDGPU_VCN_MULTI_QUEUE_FLAG (1 << 8) #define AMDGPU_VCN_SW_RING_FLAG (1 << 9) +#define AMDGPU_VCN_FW_LOGGING_FLAG (1 << 10) +#define AMDGPU_VCN_SMU_VERSION_INFO_FLAG (1 << 11) #define AMDGPU_VCN_IB_FLAG_DECODE_BUFFER 0x00000001 #define AMDGPU_VCN_CMD_FLAG_MSG_BUFFER 0x00000001 +#define VCN_CODEC_DISABLE_MASK_AV1 (1 << 0) +#define VCN_CODEC_DISABLE_MASK_VP9 (1 << 1) +#define VCN_CODEC_DISABLE_MASK_HEVC (1 << 2) +#define VCN_CODEC_DISABLE_MASK_H264 (1 << 3) + enum fw_queue_mode { FW_QUEUE_RING_RESET = 1, FW_QUEUE_DPG_HOLD_OFF = 2, @@ -205,6 +214,13 @@ struct amdgpu_vcn_reg{ unsigned scratch9; }; +struct amdgpu_vcn_fw_shared { + void *cpu_addr; + uint64_t gpu_addr; + uint32_t mem_size; + uint32_t log_offset; +}; + struct amdgpu_vcn_inst { struct amdgpu_bo *vcpu_bo; void *cpu_addr; @@ -221,8 +237,11 @@ struct amdgpu_vcn_inst { uint64_t dpg_sram_gpu_addr; uint32_t *dpg_sram_curr_addr; atomic_t dpg_enc_submission_cnt; - void *fw_shared_cpu_addr; - uint64_t fw_shared_gpu_addr; + struct amdgpu_vcn_fw_shared fw_shared; +}; + +struct amdgpu_vcn_ras { + struct amdgpu_ras_block_object ras_block; }; struct amdgpu_vcn { @@ -236,6 +255,7 @@ struct amdgpu_vcn { uint8_t num_vcn_inst; struct amdgpu_vcn_inst inst[AMDGPU_MAX_VCN_INSTANCES]; uint8_t vcn_config[AMDGPU_MAX_VCN_INSTANCES]; + uint32_t vcn_codec_disable_mask[AMDGPU_MAX_VCN_INSTANCES]; struct amdgpu_vcn_reg internal; struct mutex vcn_pg_lock; struct mutex vcn1_jpeg1_workaround; @@ -244,6 +264,9 @@ struct amdgpu_vcn { unsigned harvest_config; int (*pause_dpg_mode)(struct amdgpu_device *adev, int inst_idx, struct dpg_pause_state *new_state); + + struct ras_common_if *ras_if; + struct amdgpu_vcn_ras *ras; }; struct amdgpu_fw_shared_rb_ptrs_struct { @@ -265,6 +288,18 @@ struct amdgpu_fw_shared_sw_ring { uint8_t padding[3]; }; +struct amdgpu_fw_shared_fw_logging { + uint8_t is_enabled; + uint32_t addr_lo; + uint32_t addr_hi; + uint32_t size; +}; + +struct amdgpu_fw_shared_smu_interface_info { + uint8_t smu_interface_type; + uint8_t padding[3]; +}; + struct amdgpu_fw_shared { uint32_t present_flag_0; uint8_t pad[44]; @@ -272,6 +307,16 @@ struct amdgpu_fw_shared { uint8_t pad1[1]; struct amdgpu_fw_shared_multi_queue multi_queue; struct amdgpu_fw_shared_sw_ring sw_ring; + struct amdgpu_fw_shared_fw_logging fw_log; + struct amdgpu_fw_shared_smu_interface_info smu_interface_info; +}; + +struct amdgpu_vcn_fwlog { + uint32_t rptr; + uint32_t wptr; + uint32_t buffer_size; + uint32_t header_size; + uint8_t wrapped; }; struct amdgpu_vcn_decode_buffer { @@ -313,4 +358,7 @@ enum amdgpu_ring_priority_level amdgpu_vcn_get_enc_ring_prio(int ring); void amdgpu_vcn_setup_ucode(struct amdgpu_device *adev); +void amdgpu_vcn_fwlog_init(struct amdgpu_vcn_inst *vcn); +void amdgpu_debugfs_vcn_fwlog_init(struct amdgpu_device *adev, + uint8_t i, struct amdgpu_vcn_inst *vcn); #endif diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c index 07bc0f504713..a8ecf04389b3 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c @@ -23,7 +23,12 @@ #include <linux/module.h> +#ifdef CONFIG_X86 +#include <asm/hypervisor.h> +#endif + #include <drm/drm_drv.h> +#include <xen/xen.h> #include "amdgpu.h" #include "amdgpu_ras.h" @@ -575,8 +580,10 @@ static int amdgpu_virt_write_vf2pf_data(struct amdgpu_device *adev) vf2pf_info->driver_cert = 0; vf2pf_info->os_info.all = 0; - vf2pf_info->fb_usage = amdgpu_vram_mgr_usage(&adev->mman.vram_mgr) >> 20; - vf2pf_info->fb_vis_usage = amdgpu_vram_mgr_vis_usage(&adev->mman.vram_mgr) >> 20; + vf2pf_info->fb_usage = + ttm_resource_manager_usage(&adev->mman.vram_mgr.manager) >> 20; + vf2pf_info->fb_vis_usage = + amdgpu_vram_mgr_vis_usage(&adev->mman.vram_mgr) >> 20; vf2pf_info->fb_size = adev->gmc.real_vram_size >> 20; vf2pf_info->fb_vis_size = adev->gmc.visible_vram_size >> 20; @@ -708,7 +715,8 @@ void amdgpu_detect_virtualization(struct amdgpu_device *adev) adev->virt.caps |= AMDGPU_SRIOV_CAPS_ENABLE_IOV; if (!reg) { - if (is_virtual_machine()) /* passthrough mode exclus sriov mod */ + /* passthrough mode exclus sriov mod */ + if (is_virtual_machine() && !xen_initial_domain()) adev->virt.caps |= AMDGPU_PASSTHROUGH_MODE; } @@ -721,8 +729,12 @@ void amdgpu_detect_virtualization(struct amdgpu_device *adev) break; case CHIP_VEGA10: soc15_set_virt_ops(adev); - /* send a dummy GPU_INIT_DATA request to host on vega10 */ - amdgpu_virt_request_init_data(adev); +#ifdef CONFIG_X86 + /* not send GPU_INIT_DATA with MS_HYPERV*/ + if (!hypervisor_is_type(X86_HYPER_MS_HYPERV)) +#endif + /* send a dummy GPU_INIT_DATA request to host on vega10 */ + amdgpu_virt_request_init_data(adev); break; case CHIP_VEGA20: case CHIP_ARCTURUS: @@ -820,3 +832,148 @@ void amdgpu_virt_update_sriov_video_codec(struct amdgpu_device *adev, } } } + +static bool amdgpu_virt_get_rlcg_reg_access_flag(struct amdgpu_device *adev, + u32 acc_flags, u32 hwip, + bool write, u32 *rlcg_flag) +{ + bool ret = false; + + switch (hwip) { + case GC_HWIP: + if (amdgpu_sriov_reg_indirect_gc(adev)) { + *rlcg_flag = + write ? AMDGPU_RLCG_GC_WRITE : AMDGPU_RLCG_GC_READ; + ret = true; + /* only in new version, AMDGPU_REGS_NO_KIQ and + * AMDGPU_REGS_RLC are enabled simultaneously */ + } else if ((acc_flags & AMDGPU_REGS_RLC) && + !(acc_flags & AMDGPU_REGS_NO_KIQ) && write) { + *rlcg_flag = AMDGPU_RLCG_GC_WRITE_LEGACY; + ret = true; + } + break; + case MMHUB_HWIP: + if (amdgpu_sriov_reg_indirect_mmhub(adev) && + (acc_flags & AMDGPU_REGS_RLC) && write) { + *rlcg_flag = AMDGPU_RLCG_MMHUB_WRITE; + ret = true; + } + break; + default: + break; + } + return ret; +} + +static u32 amdgpu_virt_rlcg_reg_rw(struct amdgpu_device *adev, u32 offset, u32 v, u32 flag) +{ + struct amdgpu_rlcg_reg_access_ctrl *reg_access_ctrl; + uint32_t timeout = 50000; + uint32_t i, tmp; + uint32_t ret = 0; + void *scratch_reg0; + void *scratch_reg1; + void *scratch_reg2; + void *scratch_reg3; + void *spare_int; + + if (!adev->gfx.rlc.rlcg_reg_access_supported) { + dev_err(adev->dev, + "indirect registers access through rlcg is not available\n"); + return 0; + } + + reg_access_ctrl = &adev->gfx.rlc.reg_access_ctrl; + scratch_reg0 = (void __iomem *)adev->rmmio + 4 * reg_access_ctrl->scratch_reg0; + scratch_reg1 = (void __iomem *)adev->rmmio + 4 * reg_access_ctrl->scratch_reg1; + scratch_reg2 = (void __iomem *)adev->rmmio + 4 * reg_access_ctrl->scratch_reg2; + scratch_reg3 = (void __iomem *)adev->rmmio + 4 * reg_access_ctrl->scratch_reg3; + if (reg_access_ctrl->spare_int) + spare_int = (void __iomem *)adev->rmmio + 4 * reg_access_ctrl->spare_int; + + if (offset == reg_access_ctrl->grbm_cntl) { + /* if the target reg offset is grbm_cntl, write to scratch_reg2 */ + writel(v, scratch_reg2); + writel(v, ((void __iomem *)adev->rmmio) + (offset * 4)); + } else if (offset == reg_access_ctrl->grbm_idx) { + /* if the target reg offset is grbm_idx, write to scratch_reg3 */ + writel(v, scratch_reg3); + writel(v, ((void __iomem *)adev->rmmio) + (offset * 4)); + } else { + /* + * SCRATCH_REG0 = read/write value + * SCRATCH_REG1[30:28] = command + * SCRATCH_REG1[19:0] = address in dword + * SCRATCH_REG1[26:24] = Error reporting + */ + writel(v, scratch_reg0); + writel((offset | flag), scratch_reg1); + if (reg_access_ctrl->spare_int) + writel(1, spare_int); + + for (i = 0; i < timeout; i++) { + tmp = readl(scratch_reg1); + if (!(tmp & AMDGPU_RLCG_SCRATCH1_ADDRESS_MASK)) + break; + udelay(10); + } + + if (i >= timeout) { + if (amdgpu_sriov_rlcg_error_report_enabled(adev)) { + if (tmp & AMDGPU_RLCG_VFGATE_DISABLED) { + dev_err(adev->dev, + "vfgate is disabled, rlcg failed to program reg: 0x%05x\n", offset); + } else if (tmp & AMDGPU_RLCG_WRONG_OPERATION_TYPE) { + dev_err(adev->dev, + "wrong operation type, rlcg failed to program reg: 0x%05x\n", offset); + } else if (tmp & AMDGPU_RLCG_REG_NOT_IN_RANGE) { + dev_err(adev->dev, + "register is not in range, rlcg failed to program reg: 0x%05x\n", offset); + } else { + dev_err(adev->dev, + "unknown error type, rlcg failed to program reg: 0x%05x\n", offset); + } + } else { + dev_err(adev->dev, + "timeout: rlcg faled to program reg: 0x%05x\n", offset); + } + } + } + + ret = readl(scratch_reg0); + return ret; +} + +void amdgpu_sriov_wreg(struct amdgpu_device *adev, + u32 offset, u32 value, + u32 acc_flags, u32 hwip) +{ + u32 rlcg_flag; + + if (!amdgpu_sriov_runtime(adev) && + amdgpu_virt_get_rlcg_reg_access_flag(adev, acc_flags, hwip, true, &rlcg_flag)) { + amdgpu_virt_rlcg_reg_rw(adev, offset, value, rlcg_flag); + return; + } + + if (acc_flags & AMDGPU_REGS_NO_KIQ) + WREG32_NO_KIQ(offset, value); + else + WREG32(offset, value); +} + +u32 amdgpu_sriov_rreg(struct amdgpu_device *adev, + u32 offset, u32 acc_flags, u32 hwip) +{ + u32 rlcg_flag; + + if (!amdgpu_sriov_runtime(adev) && + amdgpu_virt_get_rlcg_reg_access_flag(adev, acc_flags, hwip, false, &rlcg_flag)) + return amdgpu_virt_rlcg_reg_rw(adev, offset, 0, rlcg_flag); + + if (acc_flags & AMDGPU_REGS_NO_KIQ) + return RREG32_NO_KIQ(offset); + else + return RREG32(offset); +} diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.h index 9adfb8d63280..239f232f9c02 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.h @@ -32,6 +32,19 @@ #define AMDGPU_PASSTHROUGH_MODE (1 << 3) /* thw whole GPU is pass through for VM */ #define AMDGPU_SRIOV_CAPS_RUNTIME (1 << 4) /* is out of full access mode */ +/* flags for indirect register access path supported by rlcg for sriov */ +#define AMDGPU_RLCG_GC_WRITE_LEGACY (0x8 << 28) +#define AMDGPU_RLCG_GC_WRITE (0x0 << 28) +#define AMDGPU_RLCG_GC_READ (0x1 << 28) +#define AMDGPU_RLCG_MMHUB_WRITE (0x2 << 28) + +/* error code for indirect register access path supported by rlcg for sriov */ +#define AMDGPU_RLCG_VFGATE_DISABLED 0x4000000 +#define AMDGPU_RLCG_WRONG_OPERATION_TYPE 0x2000000 +#define AMDGPU_RLCG_REG_NOT_IN_RANGE 0x1000000 + +#define AMDGPU_RLCG_SCRATCH1_ADDRESS_MASK 0xFFFFF + /* all asic after AI use this offset */ #define mmRCC_IOV_FUNC_IDENTIFIER 0xDE5 /* tonga/fiji use this offset */ @@ -275,13 +288,18 @@ struct amdgpu_video_codec_info; (amdgpu_sriov_vf((adev)) && \ ((adev)->virt.reg_access & (AMDGIM_FEATURE_GC_REG_RLC_EN))) +#define amdgpu_sriov_rlcg_error_report_enabled(adev) \ + (amdgpu_sriov_reg_indirect_mmhub(adev) || amdgpu_sriov_reg_indirect_gc(adev)) + #define amdgpu_passthrough(adev) \ ((adev)->virt.caps & AMDGPU_PASSTHROUGH_MODE) static inline bool is_virtual_machine(void) { -#ifdef CONFIG_X86 +#if defined(CONFIG_X86) return boot_cpu_has(X86_FEATURE_HYPERVISOR); +#elif defined(CONFIG_ARM64) + return !is_kernel_in_hyp_mode(); #else return false; #endif @@ -293,7 +311,6 @@ static inline bool is_virtual_machine(void) ((!amdgpu_in_reset(adev)) && adev->virt.tdr_debug) #define amdgpu_sriov_is_normal(adev) \ ((!amdgpu_in_reset(adev)) && (!adev->virt.tdr_debug)) - bool amdgpu_virt_mmio_blocked(struct amdgpu_device *adev); void amdgpu_virt_init_setting(struct amdgpu_device *adev); void amdgpu_virt_kiq_reg_write_reg_wait(struct amdgpu_device *adev, @@ -321,4 +338,9 @@ enum amdgpu_sriov_vf_mode amdgpu_virt_get_sriov_vf_mode(struct amdgpu_device *ad void amdgpu_virt_update_sriov_video_codec(struct amdgpu_device *adev, struct amdgpu_video_codec_info *encode, uint32_t encode_array_size, struct amdgpu_video_codec_info *decode, uint32_t decode_array_size); +void amdgpu_sriov_wreg(struct amdgpu_device *adev, + u32 offset, u32 value, + u32 acc_flags, u32 hwip); +u32 amdgpu_sriov_rreg(struct amdgpu_device *adev, + u32 offset, u32 acc_flags, u32 hwip); #endif diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vkms.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vkms.c index 5224d9a39737..576849e95296 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vkms.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vkms.c @@ -302,9 +302,6 @@ static int amdgpu_vkms_prepare_fb(struct drm_plane *plane, struct drm_gem_object *obj; struct amdgpu_device *adev; struct amdgpu_bo *rbo; - struct list_head list; - struct ttm_validate_buffer tv; - struct ww_acquire_ctx ticket; uint32_t domain; int r; @@ -316,18 +313,19 @@ static int amdgpu_vkms_prepare_fb(struct drm_plane *plane, obj = new_state->fb->obj[0]; rbo = gem_to_amdgpu_bo(obj); adev = amdgpu_ttm_adev(rbo->tbo.bdev); - INIT_LIST_HEAD(&list); - tv.bo = &rbo->tbo; - tv.num_shared = 1; - list_add(&tv.head, &list); - - r = ttm_eu_reserve_buffers(&ticket, &list, false, NULL); + r = amdgpu_bo_reserve(rbo, true); if (r) { dev_err(adev->dev, "fail to reserve bo (%d)\n", r); return r; } + r = dma_resv_reserve_fences(rbo->tbo.base.resv, 1); + if (r) { + dev_err(adev->dev, "allocating fence slot failed (%d)\n", r); + goto error_unlock; + } + if (plane->type != DRM_PLANE_TYPE_CURSOR) domain = amdgpu_display_supported_domains(adev, rbo->flags); else @@ -337,25 +335,29 @@ static int amdgpu_vkms_prepare_fb(struct drm_plane *plane, if (unlikely(r != 0)) { if (r != -ERESTARTSYS) DRM_ERROR("Failed to pin framebuffer with error %d\n", r); - ttm_eu_backoff_reservation(&ticket, &list); - return r; + goto error_unlock; } r = amdgpu_ttm_alloc_gart(&rbo->tbo); if (unlikely(r != 0)) { - amdgpu_bo_unpin(rbo); - ttm_eu_backoff_reservation(&ticket, &list); DRM_ERROR("%p bind failed\n", rbo); - return r; + goto error_unpin; } - ttm_eu_backoff_reservation(&ticket, &list); + amdgpu_bo_unreserve(rbo); afb->address = amdgpu_bo_gpu_offset(rbo); amdgpu_bo_ref(rbo); return 0; + +error_unpin: + amdgpu_bo_unpin(rbo); + +error_unlock: + amdgpu_bo_unreserve(rbo); + return r; } static void amdgpu_vkms_cleanup_fb(struct drm_plane *plane, diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c index 418341a67517..f9479e23de18 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c @@ -89,6 +89,21 @@ struct amdgpu_prt_cb { }; /** + * struct amdgpu_vm_tlb_seq_cb - Helper to increment the TLB flush sequence + */ +struct amdgpu_vm_tlb_seq_cb { + /** + * @vm: pointer to the amdgpu_vm structure to set the fence sequence on + */ + struct amdgpu_vm *vm; + + /** + * @cb: callback + */ + struct dma_fence_cb cb; +}; + +/** * amdgpu_vm_set_pasid - manage pasid and vm ptr mapping * * @adev: amdgpu_device pointer @@ -155,108 +170,6 @@ static inline void amdgpu_vm_eviction_unlock(struct amdgpu_vm *vm) } /** - * amdgpu_vm_level_shift - return the addr shift for each level - * - * @adev: amdgpu_device pointer - * @level: VMPT level - * - * Returns: - * The number of bits the pfn needs to be right shifted for a level. - */ -static unsigned amdgpu_vm_level_shift(struct amdgpu_device *adev, - unsigned level) -{ - switch (level) { - case AMDGPU_VM_PDB2: - case AMDGPU_VM_PDB1: - case AMDGPU_VM_PDB0: - return 9 * (AMDGPU_VM_PDB0 - level) + - adev->vm_manager.block_size; - case AMDGPU_VM_PTB: - return 0; - default: - return ~0; - } -} - -/** - * amdgpu_vm_num_entries - return the number of entries in a PD/PT - * - * @adev: amdgpu_device pointer - * @level: VMPT level - * - * Returns: - * The number of entries in a page directory or page table. - */ -static unsigned amdgpu_vm_num_entries(struct amdgpu_device *adev, - unsigned level) -{ - unsigned shift = amdgpu_vm_level_shift(adev, - adev->vm_manager.root_level); - - if (level == adev->vm_manager.root_level) - /* For the root directory */ - return round_up(adev->vm_manager.max_pfn, 1ULL << shift) - >> shift; - else if (level != AMDGPU_VM_PTB) - /* Everything in between */ - return 512; - else - /* For the page tables on the leaves */ - return AMDGPU_VM_PTE_COUNT(adev); -} - -/** - * amdgpu_vm_num_ats_entries - return the number of ATS entries in the root PD - * - * @adev: amdgpu_device pointer - * - * Returns: - * The number of entries in the root page directory which needs the ATS setting. - */ -static unsigned amdgpu_vm_num_ats_entries(struct amdgpu_device *adev) -{ - unsigned shift; - - shift = amdgpu_vm_level_shift(adev, adev->vm_manager.root_level); - return AMDGPU_GMC_HOLE_START >> (shift + AMDGPU_GPU_PAGE_SHIFT); -} - -/** - * amdgpu_vm_entries_mask - the mask to get the entry number of a PD/PT - * - * @adev: amdgpu_device pointer - * @level: VMPT level - * - * Returns: - * The mask to extract the entry number of a PD/PT from an address. - */ -static uint32_t amdgpu_vm_entries_mask(struct amdgpu_device *adev, - unsigned int level) -{ - if (level <= adev->vm_manager.root_level) - return 0xffffffff; - else if (level != AMDGPU_VM_PTB) - return 0x1ff; - else - return AMDGPU_VM_PTE_COUNT(adev) - 1; -} - -/** - * amdgpu_vm_bo_size - returns the size of the BOs in bytes - * - * @adev: amdgpu_device pointer - * @level: VMPT level - * - * Returns: - * The size of the BO for a page directory or page table in bytes. - */ -static unsigned amdgpu_vm_bo_size(struct amdgpu_device *adev, unsigned level) -{ - return AMDGPU_GPU_PAGE_ALIGN(amdgpu_vm_num_entries(adev, level) * 8); -} - -/** * amdgpu_vm_bo_evicted - vm_bo is evicted * * @vm_bo: vm_bo which is evicted @@ -358,9 +271,8 @@ static void amdgpu_vm_bo_done(struct amdgpu_vm_bo_base *vm_bo) * Initialize a bo_va_base structure and add it to the appropriate lists * */ -static void amdgpu_vm_bo_base_init(struct amdgpu_vm_bo_base *base, - struct amdgpu_vm *vm, - struct amdgpu_bo *bo) +void amdgpu_vm_bo_base_init(struct amdgpu_vm_bo_base *base, + struct amdgpu_vm *vm, struct amdgpu_bo *bo) { base->vm = vm; base->bo = bo; @@ -375,7 +287,9 @@ static void amdgpu_vm_bo_base_init(struct amdgpu_vm_bo_base *base, if (bo->tbo.base.resv != vm->root.bo->tbo.base.resv) return; - vm->bulk_moveable = false; + dma_resv_assert_held(vm->root.bo->tbo.base.resv); + + ttm_bo_set_bulk_move(&bo->tbo, &vm->lru_bulk_move); if (bo->tbo.type == ttm_bo_type_kernel && bo->parent) amdgpu_vm_bo_relocated(base); else @@ -394,228 +308,6 @@ static void amdgpu_vm_bo_base_init(struct amdgpu_vm_bo_base *base, } /** - * amdgpu_vm_pt_parent - get the parent page directory - * - * @pt: child page table - * - * Helper to get the parent entry for the child page table. NULL if we are at - * the root page directory. - */ -static struct amdgpu_vm_bo_base *amdgpu_vm_pt_parent(struct amdgpu_vm_bo_base *pt) -{ - struct amdgpu_bo *parent = pt->bo->parent; - - if (!parent) - return NULL; - - return parent->vm_bo; -} - -/* - * amdgpu_vm_pt_cursor - state for for_each_amdgpu_vm_pt - */ -struct amdgpu_vm_pt_cursor { - uint64_t pfn; - struct amdgpu_vm_bo_base *parent; - struct amdgpu_vm_bo_base *entry; - unsigned level; -}; - -/** - * amdgpu_vm_pt_start - start PD/PT walk - * - * @adev: amdgpu_device pointer - * @vm: amdgpu_vm structure - * @start: start address of the walk - * @cursor: state to initialize - * - * Initialize a amdgpu_vm_pt_cursor to start a walk. - */ -static void amdgpu_vm_pt_start(struct amdgpu_device *adev, - struct amdgpu_vm *vm, uint64_t start, - struct amdgpu_vm_pt_cursor *cursor) -{ - cursor->pfn = start; - cursor->parent = NULL; - cursor->entry = &vm->root; - cursor->level = adev->vm_manager.root_level; -} - -/** - * amdgpu_vm_pt_descendant - go to child node - * - * @adev: amdgpu_device pointer - * @cursor: current state - * - * Walk to the child node of the current node. - * Returns: - * True if the walk was possible, false otherwise. - */ -static bool amdgpu_vm_pt_descendant(struct amdgpu_device *adev, - struct amdgpu_vm_pt_cursor *cursor) -{ - unsigned mask, shift, idx; - - if ((cursor->level == AMDGPU_VM_PTB) || !cursor->entry || - !cursor->entry->bo) - return false; - - mask = amdgpu_vm_entries_mask(adev, cursor->level); - shift = amdgpu_vm_level_shift(adev, cursor->level); - - ++cursor->level; - idx = (cursor->pfn >> shift) & mask; - cursor->parent = cursor->entry; - cursor->entry = &to_amdgpu_bo_vm(cursor->entry->bo)->entries[idx]; - return true; -} - -/** - * amdgpu_vm_pt_sibling - go to sibling node - * - * @adev: amdgpu_device pointer - * @cursor: current state - * - * Walk to the sibling node of the current node. - * Returns: - * True if the walk was possible, false otherwise. - */ -static bool amdgpu_vm_pt_sibling(struct amdgpu_device *adev, - struct amdgpu_vm_pt_cursor *cursor) -{ - unsigned shift, num_entries; - - /* Root doesn't have a sibling */ - if (!cursor->parent) - return false; - - /* Go to our parents and see if we got a sibling */ - shift = amdgpu_vm_level_shift(adev, cursor->level - 1); - num_entries = amdgpu_vm_num_entries(adev, cursor->level - 1); - - if (cursor->entry == &to_amdgpu_bo_vm(cursor->parent->bo)->entries[num_entries - 1]) - return false; - - cursor->pfn += 1ULL << shift; - cursor->pfn &= ~((1ULL << shift) - 1); - ++cursor->entry; - return true; -} - -/** - * amdgpu_vm_pt_ancestor - go to parent node - * - * @cursor: current state - * - * Walk to the parent node of the current node. - * Returns: - * True if the walk was possible, false otherwise. - */ -static bool amdgpu_vm_pt_ancestor(struct amdgpu_vm_pt_cursor *cursor) -{ - if (!cursor->parent) - return false; - - --cursor->level; - cursor->entry = cursor->parent; - cursor->parent = amdgpu_vm_pt_parent(cursor->parent); - return true; -} - -/** - * amdgpu_vm_pt_next - get next PD/PT in hieratchy - * - * @adev: amdgpu_device pointer - * @cursor: current state - * - * Walk the PD/PT tree to the next node. - */ -static void amdgpu_vm_pt_next(struct amdgpu_device *adev, - struct amdgpu_vm_pt_cursor *cursor) -{ - /* First try a newborn child */ - if (amdgpu_vm_pt_descendant(adev, cursor)) - return; - - /* If that didn't worked try to find a sibling */ - while (!amdgpu_vm_pt_sibling(adev, cursor)) { - /* No sibling, go to our parents and grandparents */ - if (!amdgpu_vm_pt_ancestor(cursor)) { - cursor->pfn = ~0ll; - return; - } - } -} - -/** - * amdgpu_vm_pt_first_dfs - start a deep first search - * - * @adev: amdgpu_device structure - * @vm: amdgpu_vm structure - * @start: optional cursor to start with - * @cursor: state to initialize - * - * Starts a deep first traversal of the PD/PT tree. - */ -static void amdgpu_vm_pt_first_dfs(struct amdgpu_device *adev, - struct amdgpu_vm *vm, - struct amdgpu_vm_pt_cursor *start, - struct amdgpu_vm_pt_cursor *cursor) -{ - if (start) - *cursor = *start; - else - amdgpu_vm_pt_start(adev, vm, 0, cursor); - while (amdgpu_vm_pt_descendant(adev, cursor)); -} - -/** - * amdgpu_vm_pt_continue_dfs - check if the deep first search should continue - * - * @start: starting point for the search - * @entry: current entry - * - * Returns: - * True when the search should continue, false otherwise. - */ -static bool amdgpu_vm_pt_continue_dfs(struct amdgpu_vm_pt_cursor *start, - struct amdgpu_vm_bo_base *entry) -{ - return entry && (!start || entry != start->entry); -} - -/** - * amdgpu_vm_pt_next_dfs - get the next node for a deep first search - * - * @adev: amdgpu_device structure - * @cursor: current state - * - * Move the cursor to the next node in a deep first search. - */ -static void amdgpu_vm_pt_next_dfs(struct amdgpu_device *adev, - struct amdgpu_vm_pt_cursor *cursor) -{ - if (!cursor->entry) - return; - - if (!cursor->parent) - cursor->entry = NULL; - else if (amdgpu_vm_pt_sibling(adev, cursor)) - while (amdgpu_vm_pt_descendant(adev, cursor)); - else - amdgpu_vm_pt_ancestor(cursor); -} - -/* - * for_each_amdgpu_vm_pt_dfs_safe - safe deep first search of all PDs/PTs - */ -#define for_each_amdgpu_vm_pt_dfs_safe(adev, vm, start, cursor, entry) \ - for (amdgpu_vm_pt_first_dfs((adev), (vm), (start), &(cursor)), \ - (entry) = (cursor).entry, amdgpu_vm_pt_next_dfs((adev), &(cursor));\ - amdgpu_vm_pt_continue_dfs((start), (entry)); \ - (entry) = (cursor).entry, amdgpu_vm_pt_next_dfs((adev), &(cursor))) - -/** * amdgpu_vm_get_pd_bo - add the VM PD to a validation list * * @vm: vm providing the BOs @@ -638,36 +330,6 @@ void amdgpu_vm_get_pd_bo(struct amdgpu_vm *vm, } /** - * amdgpu_vm_del_from_lru_notify - update bulk_moveable flag - * - * @bo: BO which was removed from the LRU - * - * Make sure the bulk_moveable flag is updated when a BO is removed from the - * LRU. - */ -void amdgpu_vm_del_from_lru_notify(struct ttm_buffer_object *bo) -{ - struct amdgpu_bo *abo; - struct amdgpu_vm_bo_base *bo_base; - - if (!amdgpu_bo_is_amdgpu_bo(bo)) - return; - - if (bo->pin_count) - return; - - abo = ttm_to_amdgpu_bo(bo); - if (!abo->parent) - return; - for (bo_base = abo->vm_bo; bo_base; bo_base = bo_base->next) { - struct amdgpu_vm *vm = bo_base->vm; - - if (abo->tbo.base.resv == vm->root.bo->tbo.base.resv) - vm->bulk_moveable = false; - } - -} -/** * amdgpu_vm_move_to_lru_tail - move all BOs to the end of LRU * * @adev: amdgpu device pointer @@ -679,35 +341,9 @@ void amdgpu_vm_del_from_lru_notify(struct ttm_buffer_object *bo) void amdgpu_vm_move_to_lru_tail(struct amdgpu_device *adev, struct amdgpu_vm *vm) { - struct amdgpu_vm_bo_base *bo_base; - - if (vm->bulk_moveable) { - spin_lock(&adev->mman.bdev.lru_lock); - ttm_bo_bulk_move_lru_tail(&vm->lru_bulk_move); - spin_unlock(&adev->mman.bdev.lru_lock); - return; - } - - memset(&vm->lru_bulk_move, 0, sizeof(vm->lru_bulk_move)); - spin_lock(&adev->mman.bdev.lru_lock); - list_for_each_entry(bo_base, &vm->idle, vm_status) { - struct amdgpu_bo *bo = bo_base->bo; - struct amdgpu_bo *shadow = amdgpu_bo_shadowed(bo); - - if (!bo->parent) - continue; - - ttm_bo_move_to_lru_tail(&bo->tbo, bo->tbo.resource, - &vm->lru_bulk_move); - if (shadow) - ttm_bo_move_to_lru_tail(&shadow->tbo, - shadow->tbo.resource, - &vm->lru_bulk_move); - } + ttm_lru_bulk_move_tail(&vm->lru_bulk_move); spin_unlock(&adev->mman.bdev.lru_lock); - - vm->bulk_moveable = true; } /** @@ -730,8 +366,6 @@ int amdgpu_vm_validate_pt_bos(struct amdgpu_device *adev, struct amdgpu_vm *vm, struct amdgpu_vm_bo_base *bo_base, *tmp; int r; - vm->bulk_moveable &= list_empty(&vm->evicted); - list_for_each_entry_safe(bo_base, tmp, &vm->evicted, vm_status) { struct amdgpu_bo *bo = bo_base->bo; struct amdgpu_bo *shadow = amdgpu_bo_shadowed(bo); @@ -782,312 +416,6 @@ bool amdgpu_vm_ready(struct amdgpu_vm *vm) } /** - * amdgpu_vm_clear_bo - initially clear the PDs/PTs - * - * @adev: amdgpu_device pointer - * @vm: VM to clear BO from - * @vmbo: BO to clear - * @immediate: use an immediate update - * - * Root PD needs to be reserved when calling this. - * - * Returns: - * 0 on success, errno otherwise. - */ -static int amdgpu_vm_clear_bo(struct amdgpu_device *adev, - struct amdgpu_vm *vm, - struct amdgpu_bo_vm *vmbo, - bool immediate) -{ - struct ttm_operation_ctx ctx = { true, false }; - unsigned level = adev->vm_manager.root_level; - struct amdgpu_vm_update_params params; - struct amdgpu_bo *ancestor = &vmbo->bo; - struct amdgpu_bo *bo = &vmbo->bo; - unsigned entries, ats_entries; - uint64_t addr; - int r, idx; - - /* Figure out our place in the hierarchy */ - if (ancestor->parent) { - ++level; - while (ancestor->parent->parent) { - ++level; - ancestor = ancestor->parent; - } - } - - entries = amdgpu_bo_size(bo) / 8; - if (!vm->pte_support_ats) { - ats_entries = 0; - - } else if (!bo->parent) { - ats_entries = amdgpu_vm_num_ats_entries(adev); - ats_entries = min(ats_entries, entries); - entries -= ats_entries; - - } else { - struct amdgpu_vm_bo_base *pt; - - pt = ancestor->vm_bo; - ats_entries = amdgpu_vm_num_ats_entries(adev); - if ((pt - to_amdgpu_bo_vm(vm->root.bo)->entries) >= ats_entries) { - ats_entries = 0; - } else { - ats_entries = entries; - entries = 0; - } - } - - r = ttm_bo_validate(&bo->tbo, &bo->placement, &ctx); - if (r) - return r; - - if (vmbo->shadow) { - struct amdgpu_bo *shadow = vmbo->shadow; - - r = ttm_bo_validate(&shadow->tbo, &shadow->placement, &ctx); - if (r) - return r; - } - - if (!drm_dev_enter(adev_to_drm(adev), &idx)) - return -ENODEV; - - r = vm->update_funcs->map_table(vmbo); - if (r) - goto exit; - - memset(¶ms, 0, sizeof(params)); - params.adev = adev; - params.vm = vm; - params.immediate = immediate; - - r = vm->update_funcs->prepare(¶ms, NULL, AMDGPU_SYNC_EXPLICIT); - if (r) - goto exit; - - addr = 0; - if (ats_entries) { - uint64_t value = 0, flags; - - flags = AMDGPU_PTE_DEFAULT_ATC; - if (level != AMDGPU_VM_PTB) { - /* Handle leaf PDEs as PTEs */ - flags |= AMDGPU_PDE_PTE; - amdgpu_gmc_get_vm_pde(adev, level, &value, &flags); - } - - r = vm->update_funcs->update(¶ms, vmbo, addr, 0, ats_entries, - value, flags); - if (r) - goto exit; - - addr += ats_entries * 8; - } - - if (entries) { - uint64_t value = 0, flags = 0; - - if (adev->asic_type >= CHIP_VEGA10) { - if (level != AMDGPU_VM_PTB) { - /* Handle leaf PDEs as PTEs */ - flags |= AMDGPU_PDE_PTE; - amdgpu_gmc_get_vm_pde(adev, level, - &value, &flags); - } else { - /* Workaround for fault priority problem on GMC9 */ - flags = AMDGPU_PTE_EXECUTABLE; - } - } - - r = vm->update_funcs->update(¶ms, vmbo, addr, 0, entries, - value, flags); - if (r) - goto exit; - } - - r = vm->update_funcs->commit(¶ms, NULL); -exit: - drm_dev_exit(idx); - return r; -} - -/** - * amdgpu_vm_pt_create - create bo for PD/PT - * - * @adev: amdgpu_device pointer - * @vm: requesting vm - * @level: the page table level - * @immediate: use a immediate update - * @vmbo: pointer to the buffer object pointer - */ -static int amdgpu_vm_pt_create(struct amdgpu_device *adev, - struct amdgpu_vm *vm, - int level, bool immediate, - struct amdgpu_bo_vm **vmbo) -{ - struct amdgpu_bo_param bp; - struct amdgpu_bo *bo; - struct dma_resv *resv; - unsigned int num_entries; - int r; - - memset(&bp, 0, sizeof(bp)); - - bp.size = amdgpu_vm_bo_size(adev, level); - bp.byte_align = AMDGPU_GPU_PAGE_SIZE; - bp.domain = AMDGPU_GEM_DOMAIN_VRAM; - bp.domain = amdgpu_bo_get_preferred_domain(adev, bp.domain); - bp.flags = AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS | - AMDGPU_GEM_CREATE_CPU_GTT_USWC; - - if (level < AMDGPU_VM_PTB) - num_entries = amdgpu_vm_num_entries(adev, level); - else - num_entries = 0; - - bp.bo_ptr_size = struct_size((*vmbo), entries, num_entries); - - if (vm->use_cpu_for_update) - bp.flags |= AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED; - - bp.type = ttm_bo_type_kernel; - bp.no_wait_gpu = immediate; - if (vm->root.bo) - bp.resv = vm->root.bo->tbo.base.resv; - - r = amdgpu_bo_create_vm(adev, &bp, vmbo); - if (r) - return r; - - bo = &(*vmbo)->bo; - if (vm->is_compute_context || (adev->flags & AMD_IS_APU)) { - (*vmbo)->shadow = NULL; - return 0; - } - - if (!bp.resv) - WARN_ON(dma_resv_lock(bo->tbo.base.resv, - NULL)); - resv = bp.resv; - memset(&bp, 0, sizeof(bp)); - bp.size = amdgpu_vm_bo_size(adev, level); - bp.domain = AMDGPU_GEM_DOMAIN_GTT; - bp.flags = AMDGPU_GEM_CREATE_CPU_GTT_USWC; - bp.type = ttm_bo_type_kernel; - bp.resv = bo->tbo.base.resv; - bp.bo_ptr_size = sizeof(struct amdgpu_bo); - - r = amdgpu_bo_create(adev, &bp, &(*vmbo)->shadow); - - if (!resv) - dma_resv_unlock(bo->tbo.base.resv); - - if (r) { - amdgpu_bo_unref(&bo); - return r; - } - - (*vmbo)->shadow->parent = amdgpu_bo_ref(bo); - amdgpu_bo_add_to_shadow_list(*vmbo); - - return 0; -} - -/** - * amdgpu_vm_alloc_pts - Allocate a specific page table - * - * @adev: amdgpu_device pointer - * @vm: VM to allocate page tables for - * @cursor: Which page table to allocate - * @immediate: use an immediate update - * - * Make sure a specific page table or directory is allocated. - * - * Returns: - * 1 if page table needed to be allocated, 0 if page table was already - * allocated, negative errno if an error occurred. - */ -static int amdgpu_vm_alloc_pts(struct amdgpu_device *adev, - struct amdgpu_vm *vm, - struct amdgpu_vm_pt_cursor *cursor, - bool immediate) -{ - struct amdgpu_vm_bo_base *entry = cursor->entry; - struct amdgpu_bo *pt_bo; - struct amdgpu_bo_vm *pt; - int r; - - if (entry->bo) - return 0; - - r = amdgpu_vm_pt_create(adev, vm, cursor->level, immediate, &pt); - if (r) - return r; - - /* Keep a reference to the root directory to avoid - * freeing them up in the wrong order. - */ - pt_bo = &pt->bo; - pt_bo->parent = amdgpu_bo_ref(cursor->parent->bo); - amdgpu_vm_bo_base_init(entry, vm, pt_bo); - r = amdgpu_vm_clear_bo(adev, vm, pt, immediate); - if (r) - goto error_free_pt; - - return 0; - -error_free_pt: - amdgpu_bo_unref(&pt->shadow); - amdgpu_bo_unref(&pt_bo); - return r; -} - -/** - * amdgpu_vm_free_table - fre one PD/PT - * - * @entry: PDE to free - */ -static void amdgpu_vm_free_table(struct amdgpu_vm_bo_base *entry) -{ - struct amdgpu_bo *shadow; - - if (!entry->bo) - return; - shadow = amdgpu_bo_shadowed(entry->bo); - entry->bo->vm_bo = NULL; - list_del(&entry->vm_status); - amdgpu_bo_unref(&shadow); - amdgpu_bo_unref(&entry->bo); -} - -/** - * amdgpu_vm_free_pts - free PD/PT levels - * - * @adev: amdgpu device structure - * @vm: amdgpu vm structure - * @start: optional cursor where to start freeing PDs/PTs - * - * Free the page directory or page table level and all sub levels. - */ -static void amdgpu_vm_free_pts(struct amdgpu_device *adev, - struct amdgpu_vm *vm, - struct amdgpu_vm_pt_cursor *start) -{ - struct amdgpu_vm_pt_cursor cursor; - struct amdgpu_vm_bo_base *entry; - - vm->bulk_moveable = false; - - for_each_amdgpu_vm_pt_dfs_safe(adev, vm, start, cursor, entry) - amdgpu_vm_free_table(entry); - - if (start) - amdgpu_vm_free_table(start->entry); -} - -/** * amdgpu_vm_check_compute_bug - check whether asic has compute vm bug * * @adev: amdgpu_device pointer @@ -1334,53 +662,6 @@ uint64_t amdgpu_vm_map_gart(const dma_addr_t *pages_addr, uint64_t addr) } /** - * amdgpu_vm_update_pde - update a single level in the hierarchy - * - * @params: parameters for the update - * @vm: requested vm - * @entry: entry to update - * - * Makes sure the requested entry in parent is up to date. - */ -static int amdgpu_vm_update_pde(struct amdgpu_vm_update_params *params, - struct amdgpu_vm *vm, - struct amdgpu_vm_bo_base *entry) -{ - struct amdgpu_vm_bo_base *parent = amdgpu_vm_pt_parent(entry); - struct amdgpu_bo *bo = parent->bo, *pbo; - uint64_t pde, pt, flags; - unsigned level; - - for (level = 0, pbo = bo->parent; pbo; ++level) - pbo = pbo->parent; - - level += params->adev->vm_manager.root_level; - amdgpu_gmc_get_pde_for_bo(entry->bo, level, &pt, &flags); - pde = (entry - to_amdgpu_bo_vm(parent->bo)->entries) * 8; - return vm->update_funcs->update(params, to_amdgpu_bo_vm(bo), pde, pt, - 1, 0, flags); -} - -/** - * amdgpu_vm_invalidate_pds - mark all PDs as invalid - * - * @adev: amdgpu_device pointer - * @vm: related vm - * - * Mark all PD level as invalid after an error. - */ -static void amdgpu_vm_invalidate_pds(struct amdgpu_device *adev, - struct amdgpu_vm *vm) -{ - struct amdgpu_vm_pt_cursor cursor; - struct amdgpu_vm_bo_base *entry; - - for_each_amdgpu_vm_pt_dfs_safe(adev, vm, NULL, cursor, entry) - if (entry->bo && !entry->moved) - amdgpu_vm_bo_relocated(entry); -} - -/** * amdgpu_vm_update_pdes - make sure that all directories are valid * * @adev: amdgpu_device pointer @@ -1396,6 +677,7 @@ int amdgpu_vm_update_pdes(struct amdgpu_device *adev, struct amdgpu_vm *vm, bool immediate) { struct amdgpu_vm_update_params params; + struct amdgpu_vm_bo_base *entry; int r, idx; if (list_empty(&vm->relocated)) @@ -1411,17 +693,10 @@ int amdgpu_vm_update_pdes(struct amdgpu_device *adev, r = vm->update_funcs->prepare(¶ms, NULL, AMDGPU_SYNC_EXPLICIT); if (r) - goto exit; - - while (!list_empty(&vm->relocated)) { - struct amdgpu_vm_bo_base *entry; - - entry = list_first_entry(&vm->relocated, - struct amdgpu_vm_bo_base, - vm_status); - amdgpu_vm_bo_idle(entry); + goto error; - r = amdgpu_vm_update_pde(¶ms, vm, entry); + list_for_each_entry(entry, &vm->relocated, vm_status) { + r = amdgpu_vm_pde_update(¶ms, entry); if (r) goto error; } @@ -1429,297 +704,68 @@ int amdgpu_vm_update_pdes(struct amdgpu_device *adev, r = vm->update_funcs->commit(¶ms, &vm->last_update); if (r) goto error; - drm_dev_exit(idx); - return 0; + + while (!list_empty(&vm->relocated)) { + entry = list_first_entry(&vm->relocated, + struct amdgpu_vm_bo_base, + vm_status); + amdgpu_vm_bo_idle(entry); + } error: - amdgpu_vm_invalidate_pds(adev, vm); -exit: drm_dev_exit(idx); return r; } -/* - * amdgpu_vm_update_flags - figure out flags for PTE updates - * - * Make sure to set the right flags for the PTEs at the desired level. - */ -static void amdgpu_vm_update_flags(struct amdgpu_vm_update_params *params, - struct amdgpu_bo_vm *pt, unsigned int level, - uint64_t pe, uint64_t addr, - unsigned int count, uint32_t incr, - uint64_t flags) - -{ - if (level != AMDGPU_VM_PTB) { - flags |= AMDGPU_PDE_PTE; - amdgpu_gmc_get_vm_pde(params->adev, level, &addr, &flags); - - } else if (params->adev->asic_type >= CHIP_VEGA10 && - !(flags & AMDGPU_PTE_VALID) && - !(flags & AMDGPU_PTE_PRT)) { - - /* Workaround for fault priority problem on GMC9 */ - flags |= AMDGPU_PTE_EXECUTABLE; - } - - params->vm->update_funcs->update(params, pt, pe, addr, count, incr, - flags); -} - /** - * amdgpu_vm_fragment - get fragment for PTEs - * - * @params: see amdgpu_vm_update_params definition - * @start: first PTE to handle - * @end: last PTE to handle - * @flags: hw mapping flags - * @frag: resulting fragment size - * @frag_end: end of this fragment + * amdgpu_vm_tlb_seq_cb - make sure to increment tlb sequence + * @fence: unused + * @cb: the callback structure * - * Returns the first possible fragment for the start and end address. + * Increments the tlb sequence to make sure that future CS execute a VM flush. */ -static void amdgpu_vm_fragment(struct amdgpu_vm_update_params *params, - uint64_t start, uint64_t end, uint64_t flags, - unsigned int *frag, uint64_t *frag_end) +static void amdgpu_vm_tlb_seq_cb(struct dma_fence *fence, + struct dma_fence_cb *cb) { - /** - * The MC L1 TLB supports variable sized pages, based on a fragment - * field in the PTE. When this field is set to a non-zero value, page - * granularity is increased from 4KB to (1 << (12 + frag)). The PTE - * flags are considered valid for all PTEs within the fragment range - * and corresponding mappings are assumed to be physically contiguous. - * - * The L1 TLB can store a single PTE for the whole fragment, - * significantly increasing the space available for translation - * caching. This leads to large improvements in throughput when the - * TLB is under pressure. - * - * The L2 TLB distributes small and large fragments into two - * asymmetric partitions. The large fragment cache is significantly - * larger. Thus, we try to use large fragments wherever possible. - * Userspace can support this by aligning virtual base address and - * allocation size to the fragment size. - * - * Starting with Vega10 the fragment size only controls the L1. The L2 - * is now directly feed with small/huge/giant pages from the walker. - */ - unsigned max_frag; + struct amdgpu_vm_tlb_seq_cb *tlb_cb; - if (params->adev->asic_type < CHIP_VEGA10) - max_frag = params->adev->vm_manager.fragment_size; - else - max_frag = 31; - - /* system pages are non continuously */ - if (params->pages_addr) { - *frag = 0; - *frag_end = end; - return; - } - - /* This intentionally wraps around if no bit is set */ - *frag = min((unsigned)ffs(start) - 1, (unsigned)fls64(end - start) - 1); - if (*frag >= max_frag) { - *frag = max_frag; - *frag_end = end & ~((1ULL << max_frag) - 1); - } else { - *frag_end = start + (1 << *frag); - } + tlb_cb = container_of(cb, typeof(*tlb_cb), cb); + atomic64_inc(&tlb_cb->vm->tlb_seq); + kfree(tlb_cb); } /** - * amdgpu_vm_update_ptes - make sure that page tables are valid - * - * @params: see amdgpu_vm_update_params definition - * @start: start of GPU address range - * @end: end of GPU address range - * @dst: destination address to map to, the next dst inside the function - * @flags: mapping flags + * amdgpu_vm_update_range - update a range in the vm page table * - * Update the page tables in the range @start - @end. - * - * Returns: - * 0 for success, -EINVAL for failure. - */ -static int amdgpu_vm_update_ptes(struct amdgpu_vm_update_params *params, - uint64_t start, uint64_t end, - uint64_t dst, uint64_t flags) -{ - struct amdgpu_device *adev = params->adev; - struct amdgpu_vm_pt_cursor cursor; - uint64_t frag_start = start, frag_end; - unsigned int frag; - int r; - - /* figure out the initial fragment */ - amdgpu_vm_fragment(params, frag_start, end, flags, &frag, &frag_end); - - /* walk over the address space and update the PTs */ - amdgpu_vm_pt_start(adev, params->vm, start, &cursor); - while (cursor.pfn < end) { - unsigned shift, parent_shift, mask; - uint64_t incr, entry_end, pe_start; - struct amdgpu_bo *pt; - - if (!params->unlocked) { - /* make sure that the page tables covering the - * address range are actually allocated - */ - r = amdgpu_vm_alloc_pts(params->adev, params->vm, - &cursor, params->immediate); - if (r) - return r; - } - - shift = amdgpu_vm_level_shift(adev, cursor.level); - parent_shift = amdgpu_vm_level_shift(adev, cursor.level - 1); - if (params->unlocked) { - /* Unlocked updates are only allowed on the leaves */ - if (amdgpu_vm_pt_descendant(adev, &cursor)) - continue; - } else if (adev->asic_type < CHIP_VEGA10 && - (flags & AMDGPU_PTE_VALID)) { - /* No huge page support before GMC v9 */ - if (cursor.level != AMDGPU_VM_PTB) { - if (!amdgpu_vm_pt_descendant(adev, &cursor)) - return -ENOENT; - continue; - } - } else if (frag < shift) { - /* We can't use this level when the fragment size is - * smaller than the address shift. Go to the next - * child entry and try again. - */ - if (amdgpu_vm_pt_descendant(adev, &cursor)) - continue; - } else if (frag >= parent_shift) { - /* If the fragment size is even larger than the parent - * shift we should go up one level and check it again. - */ - if (!amdgpu_vm_pt_ancestor(&cursor)) - return -EINVAL; - continue; - } - - pt = cursor.entry->bo; - if (!pt) { - /* We need all PDs and PTs for mapping something, */ - if (flags & AMDGPU_PTE_VALID) - return -ENOENT; - - /* but unmapping something can happen at a higher - * level. - */ - if (!amdgpu_vm_pt_ancestor(&cursor)) - return -EINVAL; - - pt = cursor.entry->bo; - shift = parent_shift; - frag_end = max(frag_end, ALIGN(frag_start + 1, - 1ULL << shift)); - } - - /* Looks good so far, calculate parameters for the update */ - incr = (uint64_t)AMDGPU_GPU_PAGE_SIZE << shift; - mask = amdgpu_vm_entries_mask(adev, cursor.level); - pe_start = ((cursor.pfn >> shift) & mask) * 8; - entry_end = ((uint64_t)mask + 1) << shift; - entry_end += cursor.pfn & ~(entry_end - 1); - entry_end = min(entry_end, end); - - do { - struct amdgpu_vm *vm = params->vm; - uint64_t upd_end = min(entry_end, frag_end); - unsigned nptes = (upd_end - frag_start) >> shift; - uint64_t upd_flags = flags | AMDGPU_PTE_FRAG(frag); - - /* This can happen when we set higher level PDs to - * silent to stop fault floods. - */ - nptes = max(nptes, 1u); - - trace_amdgpu_vm_update_ptes(params, frag_start, upd_end, - nptes, dst, incr, upd_flags, - vm->task_info.pid, - vm->immediate.fence_context); - amdgpu_vm_update_flags(params, to_amdgpu_bo_vm(pt), - cursor.level, pe_start, dst, - nptes, incr, upd_flags); - - pe_start += nptes * 8; - dst += nptes * incr; - - frag_start = upd_end; - if (frag_start >= frag_end) { - /* figure out the next fragment */ - amdgpu_vm_fragment(params, frag_start, end, - flags, &frag, &frag_end); - if (frag < shift) - break; - } - } while (frag_start < entry_end); - - if (amdgpu_vm_pt_descendant(adev, &cursor)) { - /* Free all child entries. - * Update the tables with the flags and addresses and free up subsequent - * tables in the case of huge pages or freed up areas. - * This is the maximum you can free, because all other page tables are not - * completely covered by the range and so potentially still in use. - */ - while (cursor.pfn < frag_start) { - /* Make sure previous mapping is freed */ - if (cursor.entry->bo) { - params->table_freed = true; - amdgpu_vm_free_pts(adev, params->vm, &cursor); - } - amdgpu_vm_pt_next(adev, &cursor); - } - - } else if (frag >= shift) { - /* or just move on to the next on the same level. */ - amdgpu_vm_pt_next(adev, &cursor); - } - } - - return 0; -} - -/** - * amdgpu_vm_bo_update_mapping - update a mapping in the vm page table - * - * @adev: amdgpu_device pointer of the VM - * @bo_adev: amdgpu_device pointer of the mapped BO - * @vm: requested vm + * @adev: amdgpu_device pointer to use for commands + * @vm: the VM to update the range * @immediate: immediate submission in a page fault * @unlocked: unlocked invalidation during MM callback + * @flush_tlb: trigger tlb invalidation after update completed * @resv: fences we need to sync to * @start: start of mapped range * @last: last mapped entry * @flags: flags for the entries * @offset: offset into nodes and pages_addr + * @vram_base: base for vram mappings * @res: ttm_resource to map * @pages_addr: DMA addresses to use for mapping * @fence: optional resulting fence - * @table_freed: return true if page table is freed * * Fill in the page table entries between @start and @last. * * Returns: - * 0 for success, -EINVAL for failure. + * 0 for success, negative erro code for failure. */ -int amdgpu_vm_bo_update_mapping(struct amdgpu_device *adev, - struct amdgpu_device *bo_adev, - struct amdgpu_vm *vm, bool immediate, - bool unlocked, struct dma_resv *resv, - uint64_t start, uint64_t last, - uint64_t flags, uint64_t offset, - struct ttm_resource *res, - dma_addr_t *pages_addr, - struct dma_fence **fence, - bool *table_freed) +int amdgpu_vm_update_range(struct amdgpu_device *adev, struct amdgpu_vm *vm, + bool immediate, bool unlocked, bool flush_tlb, + struct dma_resv *resv, uint64_t start, uint64_t last, + uint64_t flags, uint64_t offset, uint64_t vram_base, + struct ttm_resource *res, dma_addr_t *pages_addr, + struct dma_fence **fence) { struct amdgpu_vm_update_params params; + struct amdgpu_vm_tlb_seq_cb *tlb_cb; struct amdgpu_res_cursor cursor; enum amdgpu_sync_mode sync_mode; int r, idx; @@ -1727,6 +773,18 @@ int amdgpu_vm_bo_update_mapping(struct amdgpu_device *adev, if (!drm_dev_enter(adev_to_drm(adev), &idx)) return -ENODEV; + tlb_cb = kmalloc(sizeof(*tlb_cb), GFP_KERNEL); + if (!tlb_cb) { + r = -ENOMEM; + goto error_unlock; + } + + /* Vega20+XGMI where PTEs get inadvertently cached in L2 texture cache, + * heavy-weight flush TLB unconditionally. + */ + flush_tlb |= adev->gmc.xgmi.num_physical_nodes && + adev->ip_versions[GC_HWIP][0] == IP_VERSION(9, 4, 0); + memset(¶ms, 0, sizeof(params)); params.adev = adev; params.vm = vm; @@ -1745,7 +803,7 @@ int amdgpu_vm_bo_update_mapping(struct amdgpu_device *adev, amdgpu_vm_eviction_lock(vm); if (vm->evicting) { r = -EBUSY; - goto error_unlock; + goto error_free; } if (!unlocked && !dma_fence_is_signaled(vm->last_unlocked)) { @@ -1758,7 +816,7 @@ int amdgpu_vm_bo_update_mapping(struct amdgpu_device *adev, r = vm->update_funcs->prepare(¶ms, resv, sync_mode); if (r) - goto error_unlock; + goto error_free; amdgpu_res_first(pages_addr ? NULL : res, offset, (last - start + 1) * AMDGPU_GPU_PAGE_SIZE, &cursor); @@ -1798,16 +856,15 @@ int amdgpu_vm_bo_update_mapping(struct amdgpu_device *adev, } } else if (flags & (AMDGPU_PTE_VALID | AMDGPU_PTE_PRT)) { - addr = bo_adev->vm_manager.vram_base_offset + - cursor.start; + addr = vram_base + cursor.start; } else { addr = 0; } tmp = start + num_entries; - r = amdgpu_vm_update_ptes(¶ms, start, tmp, addr, flags); + r = amdgpu_vm_ptes_update(¶ms, start, tmp, addr, flags); if (r) - goto error_unlock; + goto error_free; amdgpu_res_next(&cursor, num_entries * AMDGPU_GPU_PAGE_SIZE); start = tmp; @@ -1815,8 +872,21 @@ int amdgpu_vm_bo_update_mapping(struct amdgpu_device *adev, r = vm->update_funcs->commit(¶ms, fence); - if (table_freed) - *table_freed = *table_freed || params.table_freed; + if (flush_tlb || params.table_freed) { + tlb_cb->vm = vm; + if (fence && *fence && + !dma_fence_add_callback(*fence, &tlb_cb->cb, + amdgpu_vm_tlb_seq_cb)) { + dma_fence_put(vm->last_tlb_flush); + vm->last_tlb_flush = dma_fence_get(*fence); + } else { + amdgpu_vm_tlb_seq_cb(NULL, &tlb_cb->cb); + } + tlb_cb = NULL; + } + +error_free: + kfree(tlb_cb); error_unlock: amdgpu_vm_eviction_unlock(vm); @@ -1874,7 +944,6 @@ void amdgpu_vm_get_memory(struct amdgpu_vm *vm, uint64_t *vram_mem, * @adev: amdgpu_device pointer * @bo_va: requested BO and VM object * @clear: if true clear the entries - * @table_freed: return true if page table is freed * * Fill in the page table entries for @bo_va. * @@ -1882,7 +951,7 @@ void amdgpu_vm_get_memory(struct amdgpu_vm *vm, uint64_t *vram_mem, * 0 for success, -EINVAL for failure. */ int amdgpu_vm_bo_update(struct amdgpu_device *adev, struct amdgpu_bo_va *bo_va, - bool clear, bool *table_freed) + bool clear) { struct amdgpu_bo *bo = bo_va->base.bo; struct amdgpu_vm *vm = bo_va->base.vm; @@ -1890,9 +959,10 @@ int amdgpu_vm_bo_update(struct amdgpu_device *adev, struct amdgpu_bo_va *bo_va, dma_addr_t *pages_addr = NULL; struct ttm_resource *mem; struct dma_fence **last_update; + bool flush_tlb = clear; struct dma_resv *resv; + uint64_t vram_base; uint64_t flags; - struct amdgpu_device *bo_adev = adev; int r; if (clear || !bo) { @@ -1917,14 +987,18 @@ int amdgpu_vm_bo_update(struct amdgpu_device *adev, struct amdgpu_bo_va *bo_va, } if (bo) { + struct amdgpu_device *bo_adev; + flags = amdgpu_ttm_tt_pte_flags(adev, bo->tbo.ttm, mem); if (amdgpu_bo_encrypted(bo)) flags |= AMDGPU_PTE_TMZ; bo_adev = amdgpu_ttm_adev(bo->tbo.bdev); + vram_base = bo_adev->vm_manager.vram_base_offset; } else { flags = 0x0; + vram_base = 0; } if (clear || (bo && bo->tbo.base.resv == @@ -1934,7 +1008,7 @@ int amdgpu_vm_bo_update(struct amdgpu_device *adev, struct amdgpu_bo_va *bo_va, last_update = &bo_va->last_pt_update; if (!clear && bo_va->base.moved) { - bo_va->base.moved = false; + flush_tlb = true; list_splice_init(&bo_va->valids, &bo_va->invalids); } else if (bo_va->cleared != clear) { @@ -1957,11 +1031,11 @@ int amdgpu_vm_bo_update(struct amdgpu_device *adev, struct amdgpu_bo_va *bo_va, trace_amdgpu_vm_bo_update(mapping); - r = amdgpu_vm_bo_update_mapping(adev, bo_adev, vm, false, false, - resv, mapping->start, - mapping->last, update_flags, - mapping->offset, mem, - pages_addr, last_update, table_freed); + r = amdgpu_vm_update_range(adev, vm, false, false, flush_tlb, + resv, mapping->start, mapping->last, + update_flags, mapping->offset, + vram_base, mem, pages_addr, + last_update); if (r) return r; } @@ -1984,6 +1058,7 @@ int amdgpu_vm_bo_update(struct amdgpu_device *adev, struct amdgpu_bo_va *bo_va, list_splice_init(&bo_va->invalids, &bo_va->valids); bo_va->cleared = clear; + bo_va->base.moved = false; if (trace_amdgpu_vm_bo_mapping_enabled()) { list_for_each_entry(mapping, &bo_va->valids, list) @@ -2111,7 +1186,7 @@ static void amdgpu_vm_prt_fini(struct amdgpu_device *adev, struct amdgpu_vm *vm) struct dma_resv_iter cursor; struct dma_fence *fence; - dma_resv_for_each_fence(&cursor, resv, true, fence) { + dma_resv_for_each_fence(&cursor, resv, DMA_RESV_USAGE_BOOKKEEP, fence) { /* Add a callback for each fence in the reservation object */ amdgpu_vm_prt_get(adev); amdgpu_vm_add_prt_cb(adev, fence); @@ -2152,10 +1227,10 @@ int amdgpu_vm_clear_freed(struct amdgpu_device *adev, mapping->start < AMDGPU_GMC_HOLE_START) init_pte_value = AMDGPU_PTE_DEFAULT_ATC; - r = amdgpu_vm_bo_update_mapping(adev, adev, vm, false, false, - resv, mapping->start, - mapping->last, init_pte_value, - 0, NULL, NULL, &f, NULL); + r = amdgpu_vm_update_range(adev, vm, false, false, true, resv, + mapping->start, mapping->last, + init_pte_value, 0, 0, NULL, NULL, + &f); amdgpu_vm_free_mapping(adev, vm, mapping, f); if (r) { dma_fence_put(f); @@ -2197,7 +1272,7 @@ int amdgpu_vm_handle_moved(struct amdgpu_device *adev, list_for_each_entry_safe(bo_va, tmp, &vm->moved, base.vm_status) { /* Per VM BOs never need to bo cleared in the page tables */ - r = amdgpu_vm_bo_update(adev, bo_va, false, NULL); + r = amdgpu_vm_bo_update(adev, bo_va, false); if (r) return r; } @@ -2216,7 +1291,7 @@ int amdgpu_vm_handle_moved(struct amdgpu_device *adev, else clear = true; - r = amdgpu_vm_bo_update(adev, bo_va, clear, NULL); + r = amdgpu_vm_bo_update(adev, bo_va, clear); if (r) return r; @@ -2263,6 +1338,7 @@ struct amdgpu_bo_va *amdgpu_vm_bo_add(struct amdgpu_device *adev, if (!bo) return bo_va; + dma_resv_assert_held(bo->tbo.base.resv); if (amdgpu_dmabuf_is_xgmi_accessible(adev, bo)) { bo_va->is_xgmi = true; /* Power up XGMI if it can be potentially used */ @@ -2640,7 +1716,7 @@ void amdgpu_vm_bo_trace_cs(struct amdgpu_vm *vm, struct ww_acquire_ctx *ticket) } /** - * amdgpu_vm_bo_rmv - remove a bo to a specific vm + * amdgpu_vm_bo_del - remove a bo from a specific vm * * @adev: amdgpu_device pointer * @bo_va: requested bo_va @@ -2649,7 +1725,7 @@ void amdgpu_vm_bo_trace_cs(struct amdgpu_vm *vm, struct ww_acquire_ctx *ticket) * * Object have to be reserved! */ -void amdgpu_vm_bo_rmv(struct amdgpu_device *adev, +void amdgpu_vm_bo_del(struct amdgpu_device *adev, struct amdgpu_bo_va *bo_va) { struct amdgpu_bo_va_mapping *mapping, *next; @@ -2657,9 +1733,12 @@ void amdgpu_vm_bo_rmv(struct amdgpu_device *adev, struct amdgpu_vm *vm = bo_va->base.vm; struct amdgpu_vm_bo_base **base; + dma_resv_assert_held(vm->root.bo->tbo.base.resv); + if (bo) { + dma_resv_assert_held(bo->tbo.base.resv); if (bo->tbo.base.resv == vm->root.bo->tbo.base.resv) - vm->bulk_moveable = false; + ttm_bo_set_bulk_move(&bo->tbo, NULL); for (base = &bo_va->base.bo->vm_bo; *base; base = &(*base)->next) { @@ -2713,7 +1792,7 @@ bool amdgpu_vm_evictable(struct amdgpu_bo *bo) return true; /* Don't evict VM page tables while they are busy */ - if (!dma_resv_test_signaled(bo->tbo.base.resv, true)) + if (!dma_resv_test_signaled(bo->tbo.base.resv, DMA_RESV_USAGE_BOOKKEEP)) return false; /* Try to block ongoing updates */ @@ -2893,7 +1972,8 @@ void amdgpu_vm_adjust_size(struct amdgpu_device *adev, uint32_t min_vm_size, */ long amdgpu_vm_wait_idle(struct amdgpu_vm *vm, long timeout) { - timeout = dma_resv_wait_timeout(vm->root.bo->tbo.base.resv, true, + timeout = dma_resv_wait_timeout(vm->root.bo->tbo.base.resv, + DMA_RESV_USAGE_BOOKKEEP, true, timeout); if (timeout <= 0) return timeout; @@ -2961,6 +2041,7 @@ int amdgpu_vm_init(struct amdgpu_device *adev, struct amdgpu_vm *vm) vm->update_funcs = &amdgpu_vm_sdma_funcs; vm->last_update = NULL; vm->last_unlocked = dma_fence_get_stub(); + vm->last_tlb_flush = dma_fence_get_stub(); mutex_init(&vm->eviction_lock); vm->evicting = false; @@ -2974,13 +2055,13 @@ int amdgpu_vm_init(struct amdgpu_device *adev, struct amdgpu_vm *vm) if (r) goto error_free_root; - r = dma_resv_reserve_shared(root_bo->tbo.base.resv, 1); + r = dma_resv_reserve_fences(root_bo->tbo.base.resv, 1); if (r) goto error_unreserve; amdgpu_vm_bo_base_init(&vm->root, vm, root_bo); - r = amdgpu_vm_clear_bo(adev, vm, root, false); + r = amdgpu_vm_pt_clear(adev, vm, root, false); if (r) goto error_unreserve; @@ -2999,6 +2080,7 @@ error_free_root: vm->root.bo = NULL; error_free_delayed: + dma_fence_put(vm->last_tlb_flush); dma_fence_put(vm->last_unlocked); drm_sched_entity_destroy(&vm->delayed); @@ -3009,34 +2091,6 @@ error_free_immediate: } /** - * amdgpu_vm_check_clean_reserved - check if a VM is clean - * - * @adev: amdgpu_device pointer - * @vm: the VM to check - * - * check all entries of the root PD, if any subsequent PDs are allocated, - * it means there are page table creating and filling, and is no a clean - * VM - * - * Returns: - * 0 if this VM is clean - */ -static int amdgpu_vm_check_clean_reserved(struct amdgpu_device *adev, - struct amdgpu_vm *vm) -{ - enum amdgpu_vm_level root = adev->vm_manager.root_level; - unsigned int entries = amdgpu_vm_num_entries(adev, root); - unsigned int i = 0; - - for (i = 0; i < entries; i++) { - if (to_amdgpu_bo_vm(vm->root.bo)->entries[i].bo) - return -EINVAL; - } - - return 0; -} - -/** * amdgpu_vm_make_compute - Turn a GFX VM into a compute VM * * @adev: amdgpu_device pointer @@ -3065,17 +2119,17 @@ int amdgpu_vm_make_compute(struct amdgpu_device *adev, struct amdgpu_vm *vm) return r; /* Sanity checks */ - r = amdgpu_vm_check_clean_reserved(adev, vm); - if (r) + if (!amdgpu_vm_pt_is_root_clean(adev, vm)) { + r = -EINVAL; goto unreserve_bo; + } /* Check if PD needs to be reinitialized and do it before * changing any other state, in case it fails. */ if (pte_support_ats != vm->pte_support_ats) { vm->pte_support_ats = pte_support_ats; - r = amdgpu_vm_clear_bo(adev, vm, - to_amdgpu_bo_vm(vm->root.bo), + r = amdgpu_vm_pt_clear(adev, vm, to_amdgpu_bo_vm(vm->root.bo), false); if (r) goto unreserve_bo; @@ -3143,6 +2197,7 @@ void amdgpu_vm_fini(struct amdgpu_device *adev, struct amdgpu_vm *vm) struct amdgpu_bo_va_mapping *mapping, *tmp; bool prt_fini_needed = !!adev->gmc.gmc_funcs->set_prt; struct amdgpu_bo *root; + unsigned long flags; int i; amdgpu_amdkfd_gpuvm_destroy_cb(adev, vm); @@ -3152,6 +2207,11 @@ void amdgpu_vm_fini(struct amdgpu_device *adev, struct amdgpu_vm *vm) amdgpu_vm_set_pasid(adev, vm, 0); dma_fence_wait(vm->last_unlocked, false); dma_fence_put(vm->last_unlocked); + dma_fence_wait(vm->last_tlb_flush, false); + /* Make sure that all fence callbacks have completed */ + spin_lock_irqsave(vm->last_tlb_flush->lock, flags); + spin_unlock_irqrestore(vm->last_tlb_flush->lock, flags); + dma_fence_put(vm->last_tlb_flush); list_for_each_entry_safe(mapping, tmp, &vm->freed, list) { if (mapping->flags & AMDGPU_PTE_PRT && prt_fini_needed) { @@ -3163,7 +2223,7 @@ void amdgpu_vm_fini(struct amdgpu_device *adev, struct amdgpu_vm *vm) amdgpu_vm_free_mapping(adev, vm, mapping, NULL); } - amdgpu_vm_free_pts(adev, vm, NULL); + amdgpu_vm_pt_free_root(adev, vm); amdgpu_bo_unreserve(root); amdgpu_bo_unref(&root); WARN_ON(vm->root.bo); @@ -3417,15 +2477,14 @@ bool amdgpu_vm_handle_fault(struct amdgpu_device *adev, u32 pasid, value = 0; } - r = dma_resv_reserve_shared(root->tbo.base.resv, 1); + r = dma_resv_reserve_fences(root->tbo.base.resv, 1); if (r) { pr_debug("failed %d to reserve fence slot\n", r); goto error_unlock; } - r = amdgpu_vm_bo_update_mapping(adev, adev, vm, true, false, NULL, addr, - addr, flags, value, NULL, NULL, NULL, - NULL); + r = amdgpu_vm_update_range(adev, vm, true, false, false, NULL, addr, + addr, flags, value, 0, NULL, NULL, NULL); if (r) goto error_unlock; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h index 85fcfb8c5efd..9ecb7f663e19 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h @@ -284,6 +284,10 @@ struct amdgpu_vm { struct drm_sched_entity immediate; struct drm_sched_entity delayed; + /* Last finished delayed update */ + atomic64_t tlb_seq; + struct dma_fence *last_tlb_flush; + /* Last unlocked submission to the scheduler entities */ struct dma_fence *last_unlocked; @@ -317,8 +321,6 @@ struct amdgpu_vm { /* Store positions of group of BOs */ struct ttm_lru_bulk_move lru_bulk_move; - /* mark whether can do the bulk move */ - bool bulk_moveable; /* Flag to indicate if VM is used for compute */ bool is_compute_context; }; @@ -397,18 +399,17 @@ int amdgpu_vm_clear_freed(struct amdgpu_device *adev, struct dma_fence **fence); int amdgpu_vm_handle_moved(struct amdgpu_device *adev, struct amdgpu_vm *vm); -int amdgpu_vm_bo_update_mapping(struct amdgpu_device *adev, - struct amdgpu_device *bo_adev, - struct amdgpu_vm *vm, bool immediate, - bool unlocked, struct dma_resv *resv, - uint64_t start, uint64_t last, - uint64_t flags, uint64_t offset, - struct ttm_resource *res, - dma_addr_t *pages_addr, - struct dma_fence **fence, bool *free_table); +void amdgpu_vm_bo_base_init(struct amdgpu_vm_bo_base *base, + struct amdgpu_vm *vm, struct amdgpu_bo *bo); +int amdgpu_vm_update_range(struct amdgpu_device *adev, struct amdgpu_vm *vm, + bool immediate, bool unlocked, bool flush_tlb, + struct dma_resv *resv, uint64_t start, uint64_t last, + uint64_t flags, uint64_t offset, uint64_t vram_base, + struct ttm_resource *res, dma_addr_t *pages_addr, + struct dma_fence **fence); int amdgpu_vm_bo_update(struct amdgpu_device *adev, struct amdgpu_bo_va *bo_va, - bool clear, bool *table_freed); + bool clear); bool amdgpu_vm_evictable(struct amdgpu_bo *bo); void amdgpu_vm_bo_invalidate(struct amdgpu_device *adev, struct amdgpu_bo *bo, bool evicted); @@ -435,7 +436,7 @@ int amdgpu_vm_bo_clear_mappings(struct amdgpu_device *adev, struct amdgpu_bo_va_mapping *amdgpu_vm_bo_lookup_mapping(struct amdgpu_vm *vm, uint64_t addr); void amdgpu_vm_bo_trace_cs(struct amdgpu_vm *vm, struct ww_acquire_ctx *ticket); -void amdgpu_vm_bo_rmv(struct amdgpu_device *adev, +void amdgpu_vm_bo_del(struct amdgpu_device *adev, struct amdgpu_bo_va *bo_va); void amdgpu_vm_adjust_size(struct amdgpu_device *adev, uint32_t min_vm_size, uint32_t fragment_size_default, unsigned max_level, @@ -454,12 +455,37 @@ void amdgpu_vm_set_task_info(struct amdgpu_vm *vm); void amdgpu_vm_move_to_lru_tail(struct amdgpu_device *adev, struct amdgpu_vm *vm); -void amdgpu_vm_del_from_lru_notify(struct ttm_buffer_object *bo); void amdgpu_vm_get_memory(struct amdgpu_vm *vm, uint64_t *vram_mem, uint64_t *gtt_mem, uint64_t *cpu_mem); +int amdgpu_vm_pt_clear(struct amdgpu_device *adev, struct amdgpu_vm *vm, + struct amdgpu_bo_vm *vmbo, bool immediate); +int amdgpu_vm_pt_create(struct amdgpu_device *adev, struct amdgpu_vm *vm, + int level, bool immediate, struct amdgpu_bo_vm **vmbo); +void amdgpu_vm_pt_free_root(struct amdgpu_device *adev, struct amdgpu_vm *vm); +bool amdgpu_vm_pt_is_root_clean(struct amdgpu_device *adev, + struct amdgpu_vm *vm); + +int amdgpu_vm_pde_update(struct amdgpu_vm_update_params *params, + struct amdgpu_vm_bo_base *entry); +int amdgpu_vm_ptes_update(struct amdgpu_vm_update_params *params, + uint64_t start, uint64_t end, + uint64_t dst, uint64_t flags); + #if defined(CONFIG_DEBUG_FS) void amdgpu_debugfs_vm_bo_info(struct amdgpu_vm *vm, struct seq_file *m); #endif +/** + * amdgpu_vm_tlb_seq - return tlb flush sequence number + * @vm: the amdgpu_vm structure to query + * + * Returns the tlb flush sequence number which indicates that the VM TLBs needs + * to be invalidated whenever the sequence number change. + */ +static inline uint64_t amdgpu_vm_tlb_seq(struct amdgpu_vm *vm) +{ + return atomic64_read(&vm->tlb_seq); +} + #endif diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm_cpu.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm_cpu.c index e3fbf0f10add..31913ae86de6 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm_cpu.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm_cpu.c @@ -74,13 +74,12 @@ static int amdgpu_vm_cpu_update(struct amdgpu_vm_update_params *p, { unsigned int i; uint64_t value; - int r; + long r; - if (vmbo->bo.tbo.moving) { - r = dma_fence_wait(vmbo->bo.tbo.moving, true); - if (r) - return r; - } + r = dma_resv_wait_timeout(vmbo->bo.tbo.base.resv, DMA_RESV_USAGE_KERNEL, + true, MAX_SCHEDULE_TIMEOUT); + if (r < 0) + return r; pe += (unsigned long)amdgpu_bo_kptr(&vmbo->bo); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm_pt.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm_pt.c new file mode 100644 index 000000000000..88de9f0d4728 --- /dev/null +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm_pt.c @@ -0,0 +1,981 @@ +// SPDX-License-Identifier: GPL-2.0 OR MIT +/* + * Copyright 2022 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + */ + +#include <drm/drm_drv.h> + +#include "amdgpu.h" +#include "amdgpu_trace.h" +#include "amdgpu_vm.h" + +/* + * amdgpu_vm_pt_cursor - state for for_each_amdgpu_vm_pt + */ +struct amdgpu_vm_pt_cursor { + uint64_t pfn; + struct amdgpu_vm_bo_base *parent; + struct amdgpu_vm_bo_base *entry; + unsigned int level; +}; + +/** + * amdgpu_vm_pt_level_shift - return the addr shift for each level + * + * @adev: amdgpu_device pointer + * @level: VMPT level + * + * Returns: + * The number of bits the pfn needs to be right shifted for a level. + */ +static unsigned int amdgpu_vm_pt_level_shift(struct amdgpu_device *adev, + unsigned int level) +{ + switch (level) { + case AMDGPU_VM_PDB2: + case AMDGPU_VM_PDB1: + case AMDGPU_VM_PDB0: + return 9 * (AMDGPU_VM_PDB0 - level) + + adev->vm_manager.block_size; + case AMDGPU_VM_PTB: + return 0; + default: + return ~0; + } +} + +/** + * amdgpu_vm_pt_num_entries - return the number of entries in a PD/PT + * + * @adev: amdgpu_device pointer + * @level: VMPT level + * + * Returns: + * The number of entries in a page directory or page table. + */ +static unsigned int amdgpu_vm_pt_num_entries(struct amdgpu_device *adev, + unsigned int level) +{ + unsigned int shift; + + shift = amdgpu_vm_pt_level_shift(adev, adev->vm_manager.root_level); + if (level == adev->vm_manager.root_level) + /* For the root directory */ + return round_up(adev->vm_manager.max_pfn, 1ULL << shift) + >> shift; + else if (level != AMDGPU_VM_PTB) + /* Everything in between */ + return 512; + + /* For the page tables on the leaves */ + return AMDGPU_VM_PTE_COUNT(adev); +} + +/** + * amdgpu_vm_pt_num_ats_entries - return the number of ATS entries in the root PD + * + * @adev: amdgpu_device pointer + * + * Returns: + * The number of entries in the root page directory which needs the ATS setting. + */ +static unsigned int amdgpu_vm_pt_num_ats_entries(struct amdgpu_device *adev) +{ + unsigned int shift; + + shift = amdgpu_vm_pt_level_shift(adev, adev->vm_manager.root_level); + return AMDGPU_GMC_HOLE_START >> (shift + AMDGPU_GPU_PAGE_SHIFT); +} + +/** + * amdgpu_vm_pt_entries_mask - the mask to get the entry number of a PD/PT + * + * @adev: amdgpu_device pointer + * @level: VMPT level + * + * Returns: + * The mask to extract the entry number of a PD/PT from an address. + */ +static uint32_t amdgpu_vm_pt_entries_mask(struct amdgpu_device *adev, + unsigned int level) +{ + if (level <= adev->vm_manager.root_level) + return 0xffffffff; + else if (level != AMDGPU_VM_PTB) + return 0x1ff; + else + return AMDGPU_VM_PTE_COUNT(adev) - 1; +} + +/** + * amdgpu_vm_pt_size - returns the size of the page table in bytes + * + * @adev: amdgpu_device pointer + * @level: VMPT level + * + * Returns: + * The size of the BO for a page directory or page table in bytes. + */ +static unsigned int amdgpu_vm_pt_size(struct amdgpu_device *adev, + unsigned int level) +{ + return AMDGPU_GPU_PAGE_ALIGN(amdgpu_vm_pt_num_entries(adev, level) * 8); +} + +/** + * amdgpu_vm_pt_parent - get the parent page directory + * + * @pt: child page table + * + * Helper to get the parent entry for the child page table. NULL if we are at + * the root page directory. + */ +static struct amdgpu_vm_bo_base * +amdgpu_vm_pt_parent(struct amdgpu_vm_bo_base *pt) +{ + struct amdgpu_bo *parent = pt->bo->parent; + + if (!parent) + return NULL; + + return parent->vm_bo; +} + +/** + * amdgpu_vm_pt_start - start PD/PT walk + * + * @adev: amdgpu_device pointer + * @vm: amdgpu_vm structure + * @start: start address of the walk + * @cursor: state to initialize + * + * Initialize a amdgpu_vm_pt_cursor to start a walk. + */ +static void amdgpu_vm_pt_start(struct amdgpu_device *adev, + struct amdgpu_vm *vm, uint64_t start, + struct amdgpu_vm_pt_cursor *cursor) +{ + cursor->pfn = start; + cursor->parent = NULL; + cursor->entry = &vm->root; + cursor->level = adev->vm_manager.root_level; +} + +/** + * amdgpu_vm_pt_descendant - go to child node + * + * @adev: amdgpu_device pointer + * @cursor: current state + * + * Walk to the child node of the current node. + * Returns: + * True if the walk was possible, false otherwise. + */ +static bool amdgpu_vm_pt_descendant(struct amdgpu_device *adev, + struct amdgpu_vm_pt_cursor *cursor) +{ + unsigned int mask, shift, idx; + + if ((cursor->level == AMDGPU_VM_PTB) || !cursor->entry || + !cursor->entry->bo) + return false; + + mask = amdgpu_vm_pt_entries_mask(adev, cursor->level); + shift = amdgpu_vm_pt_level_shift(adev, cursor->level); + + ++cursor->level; + idx = (cursor->pfn >> shift) & mask; + cursor->parent = cursor->entry; + cursor->entry = &to_amdgpu_bo_vm(cursor->entry->bo)->entries[idx]; + return true; +} + +/** + * amdgpu_vm_pt_sibling - go to sibling node + * + * @adev: amdgpu_device pointer + * @cursor: current state + * + * Walk to the sibling node of the current node. + * Returns: + * True if the walk was possible, false otherwise. + */ +static bool amdgpu_vm_pt_sibling(struct amdgpu_device *adev, + struct amdgpu_vm_pt_cursor *cursor) +{ + + unsigned int shift, num_entries; + struct amdgpu_bo_vm *parent; + + /* Root doesn't have a sibling */ + if (!cursor->parent) + return false; + + /* Go to our parents and see if we got a sibling */ + shift = amdgpu_vm_pt_level_shift(adev, cursor->level - 1); + num_entries = amdgpu_vm_pt_num_entries(adev, cursor->level - 1); + parent = to_amdgpu_bo_vm(cursor->parent->bo); + + if (cursor->entry == &parent->entries[num_entries - 1]) + return false; + + cursor->pfn += 1ULL << shift; + cursor->pfn &= ~((1ULL << shift) - 1); + ++cursor->entry; + return true; +} + +/** + * amdgpu_vm_pt_ancestor - go to parent node + * + * @cursor: current state + * + * Walk to the parent node of the current node. + * Returns: + * True if the walk was possible, false otherwise. + */ +static bool amdgpu_vm_pt_ancestor(struct amdgpu_vm_pt_cursor *cursor) +{ + if (!cursor->parent) + return false; + + --cursor->level; + cursor->entry = cursor->parent; + cursor->parent = amdgpu_vm_pt_parent(cursor->parent); + return true; +} + +/** + * amdgpu_vm_pt_next - get next PD/PT in hieratchy + * + * @adev: amdgpu_device pointer + * @cursor: current state + * + * Walk the PD/PT tree to the next node. + */ +static void amdgpu_vm_pt_next(struct amdgpu_device *adev, + struct amdgpu_vm_pt_cursor *cursor) +{ + /* First try a newborn child */ + if (amdgpu_vm_pt_descendant(adev, cursor)) + return; + + /* If that didn't worked try to find a sibling */ + while (!amdgpu_vm_pt_sibling(adev, cursor)) { + /* No sibling, go to our parents and grandparents */ + if (!amdgpu_vm_pt_ancestor(cursor)) { + cursor->pfn = ~0ll; + return; + } + } +} + +/** + * amdgpu_vm_pt_first_dfs - start a deep first search + * + * @adev: amdgpu_device structure + * @vm: amdgpu_vm structure + * @start: optional cursor to start with + * @cursor: state to initialize + * + * Starts a deep first traversal of the PD/PT tree. + */ +static void amdgpu_vm_pt_first_dfs(struct amdgpu_device *adev, + struct amdgpu_vm *vm, + struct amdgpu_vm_pt_cursor *start, + struct amdgpu_vm_pt_cursor *cursor) +{ + if (start) + *cursor = *start; + else + amdgpu_vm_pt_start(adev, vm, 0, cursor); + + while (amdgpu_vm_pt_descendant(adev, cursor)) + ; +} + +/** + * amdgpu_vm_pt_continue_dfs - check if the deep first search should continue + * + * @start: starting point for the search + * @entry: current entry + * + * Returns: + * True when the search should continue, false otherwise. + */ +static bool amdgpu_vm_pt_continue_dfs(struct amdgpu_vm_pt_cursor *start, + struct amdgpu_vm_bo_base *entry) +{ + return entry && (!start || entry != start->entry); +} + +/** + * amdgpu_vm_pt_next_dfs - get the next node for a deep first search + * + * @adev: amdgpu_device structure + * @cursor: current state + * + * Move the cursor to the next node in a deep first search. + */ +static void amdgpu_vm_pt_next_dfs(struct amdgpu_device *adev, + struct amdgpu_vm_pt_cursor *cursor) +{ + if (!cursor->entry) + return; + + if (!cursor->parent) + cursor->entry = NULL; + else if (amdgpu_vm_pt_sibling(adev, cursor)) + while (amdgpu_vm_pt_descendant(adev, cursor)) + ; + else + amdgpu_vm_pt_ancestor(cursor); +} + +/* + * for_each_amdgpu_vm_pt_dfs_safe - safe deep first search of all PDs/PTs + */ +#define for_each_amdgpu_vm_pt_dfs_safe(adev, vm, start, cursor, entry) \ + for (amdgpu_vm_pt_first_dfs((adev), (vm), (start), &(cursor)), \ + (entry) = (cursor).entry, amdgpu_vm_pt_next_dfs((adev), &(cursor));\ + amdgpu_vm_pt_continue_dfs((start), (entry)); \ + (entry) = (cursor).entry, amdgpu_vm_pt_next_dfs((adev), &(cursor))) + +/** + * amdgpu_vm_pt_clear - initially clear the PDs/PTs + * + * @adev: amdgpu_device pointer + * @vm: VM to clear BO from + * @vmbo: BO to clear + * @immediate: use an immediate update + * + * Root PD needs to be reserved when calling this. + * + * Returns: + * 0 on success, errno otherwise. + */ +int amdgpu_vm_pt_clear(struct amdgpu_device *adev, struct amdgpu_vm *vm, + struct amdgpu_bo_vm *vmbo, bool immediate) +{ + unsigned int level = adev->vm_manager.root_level; + struct ttm_operation_ctx ctx = { true, false }; + struct amdgpu_vm_update_params params; + struct amdgpu_bo *ancestor = &vmbo->bo; + unsigned int entries, ats_entries; + struct amdgpu_bo *bo = &vmbo->bo; + uint64_t addr; + int r, idx; + + /* Figure out our place in the hierarchy */ + if (ancestor->parent) { + ++level; + while (ancestor->parent->parent) { + ++level; + ancestor = ancestor->parent; + } + } + + entries = amdgpu_bo_size(bo) / 8; + if (!vm->pte_support_ats) { + ats_entries = 0; + + } else if (!bo->parent) { + ats_entries = amdgpu_vm_pt_num_ats_entries(adev); + ats_entries = min(ats_entries, entries); + entries -= ats_entries; + + } else { + struct amdgpu_vm_bo_base *pt; + + pt = ancestor->vm_bo; + ats_entries = amdgpu_vm_pt_num_ats_entries(adev); + if ((pt - to_amdgpu_bo_vm(vm->root.bo)->entries) >= + ats_entries) { + ats_entries = 0; + } else { + ats_entries = entries; + entries = 0; + } + } + + r = ttm_bo_validate(&bo->tbo, &bo->placement, &ctx); + if (r) + return r; + + if (vmbo->shadow) { + struct amdgpu_bo *shadow = vmbo->shadow; + + r = ttm_bo_validate(&shadow->tbo, &shadow->placement, &ctx); + if (r) + return r; + } + + if (!drm_dev_enter(adev_to_drm(adev), &idx)) + return -ENODEV; + + r = vm->update_funcs->map_table(vmbo); + if (r) + goto exit; + + memset(¶ms, 0, sizeof(params)); + params.adev = adev; + params.vm = vm; + params.immediate = immediate; + + r = vm->update_funcs->prepare(¶ms, NULL, AMDGPU_SYNC_EXPLICIT); + if (r) + goto exit; + + addr = 0; + if (ats_entries) { + uint64_t value = 0, flags; + + flags = AMDGPU_PTE_DEFAULT_ATC; + if (level != AMDGPU_VM_PTB) { + /* Handle leaf PDEs as PTEs */ + flags |= AMDGPU_PDE_PTE; + amdgpu_gmc_get_vm_pde(adev, level, &value, &flags); + } + + r = vm->update_funcs->update(¶ms, vmbo, addr, 0, + ats_entries, value, flags); + if (r) + goto exit; + + addr += ats_entries * 8; + } + + if (entries) { + uint64_t value = 0, flags = 0; + + if (adev->asic_type >= CHIP_VEGA10) { + if (level != AMDGPU_VM_PTB) { + /* Handle leaf PDEs as PTEs */ + flags |= AMDGPU_PDE_PTE; + amdgpu_gmc_get_vm_pde(adev, level, + &value, &flags); + } else { + /* Workaround for fault priority problem on GMC9 */ + flags = AMDGPU_PTE_EXECUTABLE; + } + } + + r = vm->update_funcs->update(¶ms, vmbo, addr, 0, entries, + value, flags); + if (r) + goto exit; + } + + r = vm->update_funcs->commit(¶ms, NULL); +exit: + drm_dev_exit(idx); + return r; +} + +/** + * amdgpu_vm_pt_create - create bo for PD/PT + * + * @adev: amdgpu_device pointer + * @vm: requesting vm + * @level: the page table level + * @immediate: use a immediate update + * @vmbo: pointer to the buffer object pointer + */ +int amdgpu_vm_pt_create(struct amdgpu_device *adev, struct amdgpu_vm *vm, + int level, bool immediate, struct amdgpu_bo_vm **vmbo) +{ + struct amdgpu_bo_param bp; + struct amdgpu_bo *bo; + struct dma_resv *resv; + unsigned int num_entries; + int r; + + memset(&bp, 0, sizeof(bp)); + + bp.size = amdgpu_vm_pt_size(adev, level); + bp.byte_align = AMDGPU_GPU_PAGE_SIZE; + bp.domain = AMDGPU_GEM_DOMAIN_VRAM; + bp.domain = amdgpu_bo_get_preferred_domain(adev, bp.domain); + bp.flags = AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS | + AMDGPU_GEM_CREATE_CPU_GTT_USWC; + + if (level < AMDGPU_VM_PTB) + num_entries = amdgpu_vm_pt_num_entries(adev, level); + else + num_entries = 0; + + bp.bo_ptr_size = struct_size((*vmbo), entries, num_entries); + + if (vm->use_cpu_for_update) + bp.flags |= AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED; + + bp.type = ttm_bo_type_kernel; + bp.no_wait_gpu = immediate; + if (vm->root.bo) + bp.resv = vm->root.bo->tbo.base.resv; + + r = amdgpu_bo_create_vm(adev, &bp, vmbo); + if (r) + return r; + + bo = &(*vmbo)->bo; + if (vm->is_compute_context || (adev->flags & AMD_IS_APU)) { + (*vmbo)->shadow = NULL; + return 0; + } + + if (!bp.resv) + WARN_ON(dma_resv_lock(bo->tbo.base.resv, + NULL)); + resv = bp.resv; + memset(&bp, 0, sizeof(bp)); + bp.size = amdgpu_vm_pt_size(adev, level); + bp.domain = AMDGPU_GEM_DOMAIN_GTT; + bp.flags = AMDGPU_GEM_CREATE_CPU_GTT_USWC; + bp.type = ttm_bo_type_kernel; + bp.resv = bo->tbo.base.resv; + bp.bo_ptr_size = sizeof(struct amdgpu_bo); + + r = amdgpu_bo_create(adev, &bp, &(*vmbo)->shadow); + + if (!resv) + dma_resv_unlock(bo->tbo.base.resv); + + if (r) { + amdgpu_bo_unref(&bo); + return r; + } + + (*vmbo)->shadow->parent = amdgpu_bo_ref(bo); + amdgpu_bo_add_to_shadow_list(*vmbo); + + return 0; +} + +/** + * amdgpu_vm_pt_alloc - Allocate a specific page table + * + * @adev: amdgpu_device pointer + * @vm: VM to allocate page tables for + * @cursor: Which page table to allocate + * @immediate: use an immediate update + * + * Make sure a specific page table or directory is allocated. + * + * Returns: + * 1 if page table needed to be allocated, 0 if page table was already + * allocated, negative errno if an error occurred. + */ +static int amdgpu_vm_pt_alloc(struct amdgpu_device *adev, + struct amdgpu_vm *vm, + struct amdgpu_vm_pt_cursor *cursor, + bool immediate) +{ + struct amdgpu_vm_bo_base *entry = cursor->entry; + struct amdgpu_bo *pt_bo; + struct amdgpu_bo_vm *pt; + int r; + + if (entry->bo) + return 0; + + r = amdgpu_vm_pt_create(adev, vm, cursor->level, immediate, &pt); + if (r) + return r; + + /* Keep a reference to the root directory to avoid + * freeing them up in the wrong order. + */ + pt_bo = &pt->bo; + pt_bo->parent = amdgpu_bo_ref(cursor->parent->bo); + amdgpu_vm_bo_base_init(entry, vm, pt_bo); + r = amdgpu_vm_pt_clear(adev, vm, pt, immediate); + if (r) + goto error_free_pt; + + return 0; + +error_free_pt: + amdgpu_bo_unref(&pt->shadow); + amdgpu_bo_unref(&pt_bo); + return r; +} + +/** + * amdgpu_vm_pt_free - free one PD/PT + * + * @entry: PDE to free + */ +static void amdgpu_vm_pt_free(struct amdgpu_vm_bo_base *entry) +{ + struct amdgpu_bo *shadow; + + if (!entry->bo) + return; + shadow = amdgpu_bo_shadowed(entry->bo); + if (shadow) { + ttm_bo_set_bulk_move(&shadow->tbo, NULL); + amdgpu_bo_unref(&shadow); + } + ttm_bo_set_bulk_move(&entry->bo->tbo, NULL); + entry->bo->vm_bo = NULL; + list_del(&entry->vm_status); + amdgpu_bo_unref(&entry->bo); +} + +/** + * amdgpu_vm_pt_free_dfs - free PD/PT levels + * + * @adev: amdgpu device structure + * @vm: amdgpu vm structure + * @start: optional cursor where to start freeing PDs/PTs + * + * Free the page directory or page table level and all sub levels. + */ +static void amdgpu_vm_pt_free_dfs(struct amdgpu_device *adev, + struct amdgpu_vm *vm, + struct amdgpu_vm_pt_cursor *start) +{ + struct amdgpu_vm_pt_cursor cursor; + struct amdgpu_vm_bo_base *entry; + + for_each_amdgpu_vm_pt_dfs_safe(adev, vm, start, cursor, entry) + amdgpu_vm_pt_free(entry); + + if (start) + amdgpu_vm_pt_free(start->entry); +} + +/** + * amdgpu_vm_pt_free_root - free root PD + * @adev: amdgpu device structure + * @vm: amdgpu vm structure + * + * Free the root page directory and everything below it. + */ +void amdgpu_vm_pt_free_root(struct amdgpu_device *adev, struct amdgpu_vm *vm) +{ + amdgpu_vm_pt_free_dfs(adev, vm, NULL); +} + +/** + * amdgpu_vm_pt_is_root_clean - check if a root PD is clean + * + * @adev: amdgpu_device pointer + * @vm: the VM to check + * + * Check all entries of the root PD, if any subsequent PDs are allocated, + * it means there are page table creating and filling, and is no a clean + * VM + * + * Returns: + * 0 if this VM is clean + */ +bool amdgpu_vm_pt_is_root_clean(struct amdgpu_device *adev, + struct amdgpu_vm *vm) +{ + enum amdgpu_vm_level root = adev->vm_manager.root_level; + unsigned int entries = amdgpu_vm_pt_num_entries(adev, root); + unsigned int i = 0; + + for (i = 0; i < entries; i++) { + if (to_amdgpu_bo_vm(vm->root.bo)->entries[i].bo) + return false; + } + return true; +} + +/** + * amdgpu_vm_pde_update - update a single level in the hierarchy + * + * @params: parameters for the update + * @entry: entry to update + * + * Makes sure the requested entry in parent is up to date. + */ +int amdgpu_vm_pde_update(struct amdgpu_vm_update_params *params, + struct amdgpu_vm_bo_base *entry) +{ + struct amdgpu_vm_bo_base *parent = amdgpu_vm_pt_parent(entry); + struct amdgpu_bo *bo = parent->bo, *pbo; + struct amdgpu_vm *vm = params->vm; + uint64_t pde, pt, flags; + unsigned int level; + + for (level = 0, pbo = bo->parent; pbo; ++level) + pbo = pbo->parent; + + level += params->adev->vm_manager.root_level; + amdgpu_gmc_get_pde_for_bo(entry->bo, level, &pt, &flags); + pde = (entry - to_amdgpu_bo_vm(parent->bo)->entries) * 8; + return vm->update_funcs->update(params, to_amdgpu_bo_vm(bo), pde, pt, + 1, 0, flags); +} + +/* + * amdgpu_vm_pte_update_flags - figure out flags for PTE updates + * + * Make sure to set the right flags for the PTEs at the desired level. + */ +static void amdgpu_vm_pte_update_flags(struct amdgpu_vm_update_params *params, + struct amdgpu_bo_vm *pt, + unsigned int level, + uint64_t pe, uint64_t addr, + unsigned int count, uint32_t incr, + uint64_t flags) + +{ + if (level != AMDGPU_VM_PTB) { + flags |= AMDGPU_PDE_PTE; + amdgpu_gmc_get_vm_pde(params->adev, level, &addr, &flags); + + } else if (params->adev->asic_type >= CHIP_VEGA10 && + !(flags & AMDGPU_PTE_VALID) && + !(flags & AMDGPU_PTE_PRT)) { + + /* Workaround for fault priority problem on GMC9 */ + flags |= AMDGPU_PTE_EXECUTABLE; + } + + params->vm->update_funcs->update(params, pt, pe, addr, count, incr, + flags); +} + +/** + * amdgpu_vm_pte_fragment - get fragment for PTEs + * + * @params: see amdgpu_vm_update_params definition + * @start: first PTE to handle + * @end: last PTE to handle + * @flags: hw mapping flags + * @frag: resulting fragment size + * @frag_end: end of this fragment + * + * Returns the first possible fragment for the start and end address. + */ +static void amdgpu_vm_pte_fragment(struct amdgpu_vm_update_params *params, + uint64_t start, uint64_t end, uint64_t flags, + unsigned int *frag, uint64_t *frag_end) +{ + /** + * The MC L1 TLB supports variable sized pages, based on a fragment + * field in the PTE. When this field is set to a non-zero value, page + * granularity is increased from 4KB to (1 << (12 + frag)). The PTE + * flags are considered valid for all PTEs within the fragment range + * and corresponding mappings are assumed to be physically contiguous. + * + * The L1 TLB can store a single PTE for the whole fragment, + * significantly increasing the space available for translation + * caching. This leads to large improvements in throughput when the + * TLB is under pressure. + * + * The L2 TLB distributes small and large fragments into two + * asymmetric partitions. The large fragment cache is significantly + * larger. Thus, we try to use large fragments wherever possible. + * Userspace can support this by aligning virtual base address and + * allocation size to the fragment size. + * + * Starting with Vega10 the fragment size only controls the L1. The L2 + * is now directly feed with small/huge/giant pages from the walker. + */ + unsigned int max_frag; + + if (params->adev->asic_type < CHIP_VEGA10) + max_frag = params->adev->vm_manager.fragment_size; + else + max_frag = 31; + + /* system pages are non continuously */ + if (params->pages_addr) { + *frag = 0; + *frag_end = end; + return; + } + + /* This intentionally wraps around if no bit is set */ + *frag = min_t(unsigned int, ffs(start) - 1, fls64(end - start) - 1); + if (*frag >= max_frag) { + *frag = max_frag; + *frag_end = end & ~((1ULL << max_frag) - 1); + } else { + *frag_end = start + (1 << *frag); + } +} + +/** + * amdgpu_vm_ptes_update - make sure that page tables are valid + * + * @params: see amdgpu_vm_update_params definition + * @start: start of GPU address range + * @end: end of GPU address range + * @dst: destination address to map to, the next dst inside the function + * @flags: mapping flags + * + * Update the page tables in the range @start - @end. + * + * Returns: + * 0 for success, -EINVAL for failure. + */ +int amdgpu_vm_ptes_update(struct amdgpu_vm_update_params *params, + uint64_t start, uint64_t end, + uint64_t dst, uint64_t flags) +{ + struct amdgpu_device *adev = params->adev; + struct amdgpu_vm_pt_cursor cursor; + uint64_t frag_start = start, frag_end; + unsigned int frag; + int r; + + /* figure out the initial fragment */ + amdgpu_vm_pte_fragment(params, frag_start, end, flags, &frag, + &frag_end); + + /* walk over the address space and update the PTs */ + amdgpu_vm_pt_start(adev, params->vm, start, &cursor); + while (cursor.pfn < end) { + unsigned int shift, parent_shift, mask; + uint64_t incr, entry_end, pe_start; + struct amdgpu_bo *pt; + + if (!params->unlocked) { + /* make sure that the page tables covering the + * address range are actually allocated + */ + r = amdgpu_vm_pt_alloc(params->adev, params->vm, + &cursor, params->immediate); + if (r) + return r; + } + + shift = amdgpu_vm_pt_level_shift(adev, cursor.level); + parent_shift = amdgpu_vm_pt_level_shift(adev, cursor.level - 1); + if (params->unlocked) { + /* Unlocked updates are only allowed on the leaves */ + if (amdgpu_vm_pt_descendant(adev, &cursor)) + continue; + } else if (adev->asic_type < CHIP_VEGA10 && + (flags & AMDGPU_PTE_VALID)) { + /* No huge page support before GMC v9 */ + if (cursor.level != AMDGPU_VM_PTB) { + if (!amdgpu_vm_pt_descendant(adev, &cursor)) + return -ENOENT; + continue; + } + } else if (frag < shift) { + /* We can't use this level when the fragment size is + * smaller than the address shift. Go to the next + * child entry and try again. + */ + if (amdgpu_vm_pt_descendant(adev, &cursor)) + continue; + } else if (frag >= parent_shift) { + /* If the fragment size is even larger than the parent + * shift we should go up one level and check it again. + */ + if (!amdgpu_vm_pt_ancestor(&cursor)) + return -EINVAL; + continue; + } + + pt = cursor.entry->bo; + if (!pt) { + /* We need all PDs and PTs for mapping something, */ + if (flags & AMDGPU_PTE_VALID) + return -ENOENT; + + /* but unmapping something can happen at a higher + * level. + */ + if (!amdgpu_vm_pt_ancestor(&cursor)) + return -EINVAL; + + pt = cursor.entry->bo; + shift = parent_shift; + frag_end = max(frag_end, ALIGN(frag_start + 1, + 1ULL << shift)); + } + + /* Looks good so far, calculate parameters for the update */ + incr = (uint64_t)AMDGPU_GPU_PAGE_SIZE << shift; + mask = amdgpu_vm_pt_entries_mask(adev, cursor.level); + pe_start = ((cursor.pfn >> shift) & mask) * 8; + entry_end = ((uint64_t)mask + 1) << shift; + entry_end += cursor.pfn & ~(entry_end - 1); + entry_end = min(entry_end, end); + + do { + struct amdgpu_vm *vm = params->vm; + uint64_t upd_end = min(entry_end, frag_end); + unsigned int nptes = (upd_end - frag_start) >> shift; + uint64_t upd_flags = flags | AMDGPU_PTE_FRAG(frag); + + /* This can happen when we set higher level PDs to + * silent to stop fault floods. + */ + nptes = max(nptes, 1u); + + trace_amdgpu_vm_update_ptes(params, frag_start, upd_end, + min(nptes, 32u), dst, incr, + upd_flags, + vm->task_info.pid, + vm->immediate.fence_context); + amdgpu_vm_pte_update_flags(params, to_amdgpu_bo_vm(pt), + cursor.level, pe_start, dst, + nptes, incr, upd_flags); + + pe_start += nptes * 8; + dst += nptes * incr; + + frag_start = upd_end; + if (frag_start >= frag_end) { + /* figure out the next fragment */ + amdgpu_vm_pte_fragment(params, frag_start, end, + flags, &frag, &frag_end); + if (frag < shift) + break; + } + } while (frag_start < entry_end); + + if (amdgpu_vm_pt_descendant(adev, &cursor)) { + /* Free all child entries. + * Update the tables with the flags and addresses and free up subsequent + * tables in the case of huge pages or freed up areas. + * This is the maximum you can free, because all other page tables are not + * completely covered by the range and so potentially still in use. + */ + while (cursor.pfn < frag_start) { + /* Make sure previous mapping is freed */ + if (cursor.entry->bo) { + params->table_freed = true; + amdgpu_vm_pt_free_dfs(adev, params->vm, + &cursor); + } + amdgpu_vm_pt_next(adev, &cursor); + } + + } else if (frag >= shift) { + /* or just move on to the next on the same level. */ + amdgpu_vm_pt_next(adev, &cursor); + } + } + + return 0; +} diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm_sdma.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm_sdma.c index dbb551762805..1fd3cbca20a2 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm_sdma.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm_sdma.c @@ -109,7 +109,7 @@ static int amdgpu_vm_sdma_commit(struct amdgpu_vm_update_params *p, if (p->unlocked) { struct dma_fence *tmp = dma_fence_get(f); - swap(p->vm->last_unlocked, f); + swap(p->vm->last_unlocked, tmp); dma_fence_put(tmp); } else { amdgpu_bo_fence(p->vm->root.bo, f, true); @@ -204,14 +204,19 @@ static int amdgpu_vm_sdma_update(struct amdgpu_vm_update_params *p, struct amdgpu_bo *bo = &vmbo->bo; enum amdgpu_ib_pool_type pool = p->immediate ? AMDGPU_IB_POOL_IMMEDIATE : AMDGPU_IB_POOL_DELAYED; + struct dma_resv_iter cursor; unsigned int i, ndw, nptes; + struct dma_fence *fence; uint64_t *pte; int r; /* Wait for PD/PT moves to be completed */ - r = amdgpu_sync_fence(&p->job->sync, bo->tbo.moving); - if (r) - return r; + dma_resv_for_each_fence(&cursor, bo->tbo.base.resv, + DMA_RESV_USAGE_KERNEL, fence) { + r = amdgpu_sync_fence(&p->job->sync, fence); + if (r) + return r; + } do { ndw = p->num_dw_left; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c index 7a2b487db57c..49e4092f447f 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c @@ -32,8 +32,10 @@ #include "atom.h" struct amdgpu_vram_reservation { - struct list_head node; - struct drm_mm_node mm_node; + u64 start; + u64 size; + struct list_head allocated; + struct list_head blocks; }; static inline struct amdgpu_vram_mgr * @@ -96,9 +98,9 @@ static ssize_t amdgpu_mem_info_vram_used_show(struct device *dev, { struct drm_device *ddev = dev_get_drvdata(dev); struct amdgpu_device *adev = drm_to_adev(ddev); + struct ttm_resource_manager *man = &adev->mman.vram_mgr.manager; - return sysfs_emit(buf, "%llu\n", - amdgpu_vram_mgr_usage(&adev->mman.vram_mgr)); + return sysfs_emit(buf, "%llu\n", ttm_resource_manager_usage(man)); } /** @@ -186,18 +188,18 @@ const struct attribute_group amdgpu_vram_mgr_attr_group = { }; /** - * amdgpu_vram_mgr_vis_size - Calculate visible node size + * amdgpu_vram_mgr_vis_size - Calculate visible block size * * @adev: amdgpu_device pointer - * @node: MM node structure + * @block: DRM BUDDY block structure * - * Calculate how many bytes of the MM node are inside visible VRAM + * Calculate how many bytes of the DRM BUDDY block are inside visible VRAM */ static u64 amdgpu_vram_mgr_vis_size(struct amdgpu_device *adev, - struct drm_mm_node *node) + struct drm_buddy_block *block) { - uint64_t start = node->start << PAGE_SHIFT; - uint64_t end = (node->size + node->start) << PAGE_SHIFT; + u64 start = amdgpu_vram_mgr_block_start(block); + u64 end = start + amdgpu_vram_mgr_block_size(block); if (start >= adev->gmc.visible_vram_size) return 0; @@ -218,9 +220,9 @@ u64 amdgpu_vram_mgr_bo_visible_size(struct amdgpu_bo *bo) { struct amdgpu_device *adev = amdgpu_ttm_adev(bo->tbo.bdev); struct ttm_resource *res = bo->tbo.resource; - unsigned pages = res->num_pages; - struct drm_mm_node *mm; - u64 usage; + struct amdgpu_vram_mgr_resource *vres = to_amdgpu_vram_mgr_resource(res); + struct drm_buddy_block *block; + u64 usage = 0; if (amdgpu_gmc_vram_full_visible(&adev->gmc)) return amdgpu_bo_size(bo); @@ -228,9 +230,8 @@ u64 amdgpu_vram_mgr_bo_visible_size(struct amdgpu_bo *bo) if (res->start >= adev->gmc.visible_vram_size >> PAGE_SHIFT) return 0; - mm = &container_of(res, struct ttm_range_mgr_node, base)->mm_nodes[0]; - for (usage = 0; pages; pages -= mm->size, mm++) - usage += amdgpu_vram_mgr_vis_size(adev, mm); + list_for_each_entry(block, &vres->blocks, link) + usage += amdgpu_vram_mgr_vis_size(adev, block); return usage; } @@ -240,21 +241,30 @@ static void amdgpu_vram_mgr_do_reserve(struct ttm_resource_manager *man) { struct amdgpu_vram_mgr *mgr = to_vram_mgr(man); struct amdgpu_device *adev = to_amdgpu_device(mgr); - struct drm_mm *mm = &mgr->mm; + struct drm_buddy *mm = &mgr->mm; struct amdgpu_vram_reservation *rsv, *temp; + struct drm_buddy_block *block; uint64_t vis_usage; - list_for_each_entry_safe(rsv, temp, &mgr->reservations_pending, node) { - if (drm_mm_reserve_node(mm, &rsv->mm_node)) + list_for_each_entry_safe(rsv, temp, &mgr->reservations_pending, blocks) { + if (drm_buddy_alloc_blocks(mm, rsv->start, rsv->start + rsv->size, + rsv->size, mm->chunk_size, &rsv->allocated, + DRM_BUDDY_RANGE_ALLOCATION)) + continue; + + block = amdgpu_vram_mgr_first_block(&rsv->allocated); + if (!block) continue; dev_dbg(adev->dev, "Reservation 0x%llx - %lld, Succeeded\n", - rsv->mm_node.start, rsv->mm_node.size); + rsv->start, rsv->size); - vis_usage = amdgpu_vram_mgr_vis_size(adev, &rsv->mm_node); + vis_usage = amdgpu_vram_mgr_vis_size(adev, block); atomic64_add(vis_usage, &mgr->vis_usage); - atomic64_add(rsv->mm_node.size << PAGE_SHIFT, &mgr->usage); - list_move(&rsv->node, &mgr->reserved_pages); + spin_lock(&man->bdev->lru_lock); + man->usage += rsv->size; + spin_unlock(&man->bdev->lru_lock); + list_move(&rsv->blocks, &mgr->reserved_pages); } } @@ -276,14 +286,16 @@ int amdgpu_vram_mgr_reserve_range(struct amdgpu_vram_mgr *mgr, if (!rsv) return -ENOMEM; - INIT_LIST_HEAD(&rsv->node); - rsv->mm_node.start = start >> PAGE_SHIFT; - rsv->mm_node.size = size >> PAGE_SHIFT; + INIT_LIST_HEAD(&rsv->allocated); + INIT_LIST_HEAD(&rsv->blocks); - spin_lock(&mgr->lock); - list_add_tail(&mgr->reservations_pending, &rsv->node); + rsv->start = start; + rsv->size = size; + + mutex_lock(&mgr->lock); + list_add_tail(&rsv->blocks, &mgr->reservations_pending); amdgpu_vram_mgr_do_reserve(&mgr->manager); - spin_unlock(&mgr->lock); + mutex_unlock(&mgr->lock); return 0; } @@ -305,19 +317,19 @@ int amdgpu_vram_mgr_query_page_status(struct amdgpu_vram_mgr *mgr, struct amdgpu_vram_reservation *rsv; int ret; - spin_lock(&mgr->lock); + mutex_lock(&mgr->lock); - list_for_each_entry(rsv, &mgr->reservations_pending, node) { - if ((rsv->mm_node.start <= start) && - (start < (rsv->mm_node.start + rsv->mm_node.size))) { + list_for_each_entry(rsv, &mgr->reservations_pending, blocks) { + if (rsv->start <= start && + (start < (rsv->start + rsv->size))) { ret = -EBUSY; goto out; } } - list_for_each_entry(rsv, &mgr->reserved_pages, node) { - if ((rsv->mm_node.start <= start) && - (start < (rsv->mm_node.start + rsv->mm_node.size))) { + list_for_each_entry(rsv, &mgr->reserved_pages, blocks) { + if (rsv->start <= start && + (start < (rsv->start + rsv->size))) { ret = 0; goto out; } @@ -325,33 +337,11 @@ int amdgpu_vram_mgr_query_page_status(struct amdgpu_vram_mgr *mgr, ret = -ENOENT; out: - spin_unlock(&mgr->lock); + mutex_unlock(&mgr->lock); return ret; } /** - * amdgpu_vram_mgr_virt_start - update virtual start address - * - * @mem: ttm_resource to update - * @node: just allocated node - * - * Calculate a virtual BO start address to easily check if everything is CPU - * accessible. - */ -static void amdgpu_vram_mgr_virt_start(struct ttm_resource *mem, - struct drm_mm_node *node) -{ - unsigned long start; - - start = node->start + node->size; - if (start > mem->num_pages) - start -= mem->num_pages; - else - start = 0; - mem->start = max(mem->start, start); -} - -/** * amdgpu_vram_mgr_new - allocate new ranges * * @man: TTM memory type manager @@ -366,116 +356,176 @@ static int amdgpu_vram_mgr_new(struct ttm_resource_manager *man, const struct ttm_place *place, struct ttm_resource **res) { - unsigned long lpfn, num_nodes, pages_per_node, pages_left, pages; + u64 vis_usage = 0, max_bytes, cur_size, min_block_size; struct amdgpu_vram_mgr *mgr = to_vram_mgr(man); struct amdgpu_device *adev = to_amdgpu_device(mgr); - uint64_t vis_usage = 0, mem_bytes, max_bytes; - struct ttm_range_mgr_node *node; - struct drm_mm *mm = &mgr->mm; - enum drm_mm_insert_mode mode; - unsigned i; + struct amdgpu_vram_mgr_resource *vres; + u64 size, remaining_size, lpfn, fpfn; + struct drm_buddy *mm = &mgr->mm; + struct drm_buddy_block *block; + unsigned long pages_per_block; int r; - lpfn = place->lpfn; + lpfn = place->lpfn << PAGE_SHIFT; if (!lpfn) lpfn = man->size; + fpfn = place->fpfn << PAGE_SHIFT; + max_bytes = adev->gmc.mc_vram_size; if (tbo->type != ttm_bo_type_kernel) max_bytes -= AMDGPU_VM_RESERVED_VRAM; - /* bail out quickly if there's likely not enough VRAM for this BO */ - mem_bytes = tbo->base.size; - if (atomic64_add_return(mem_bytes, &mgr->usage) > max_bytes) { - r = -ENOSPC; - goto error_sub; - } - if (place->flags & TTM_PL_FLAG_CONTIGUOUS) { - pages_per_node = ~0ul; - num_nodes = 1; + pages_per_block = ~0ul; } else { #ifdef CONFIG_TRANSPARENT_HUGEPAGE - pages_per_node = HPAGE_PMD_NR; + pages_per_block = HPAGE_PMD_NR; #else /* default to 2MB */ - pages_per_node = 2UL << (20UL - PAGE_SHIFT); + pages_per_block = 2UL << (20UL - PAGE_SHIFT); #endif - pages_per_node = max_t(uint32_t, pages_per_node, - tbo->page_alignment); - num_nodes = DIV_ROUND_UP_ULL(PFN_UP(mem_bytes), pages_per_node); + pages_per_block = max_t(uint32_t, pages_per_block, + tbo->page_alignment); } - node = kvmalloc(struct_size(node, mm_nodes, num_nodes), - GFP_KERNEL | __GFP_ZERO); - if (!node) { - r = -ENOMEM; - goto error_sub; + vres = kzalloc(sizeof(*vres), GFP_KERNEL); + if (!vres) + return -ENOMEM; + + ttm_resource_init(tbo, place, &vres->base); + + /* bail out quickly if there's likely not enough VRAM for this BO */ + if (ttm_resource_manager_usage(man) > max_bytes) { + r = -ENOSPC; + goto error_fini; } - ttm_resource_init(tbo, place, &node->base); + INIT_LIST_HEAD(&vres->blocks); - mode = DRM_MM_INSERT_BEST; if (place->flags & TTM_PL_FLAG_TOPDOWN) - mode = DRM_MM_INSERT_HIGH; - - pages_left = node->base.num_pages; - - /* Limit maximum size to 2GB due to SG table limitations */ - pages = min(pages_left, 2UL << (30 - PAGE_SHIFT)); - - i = 0; - spin_lock(&mgr->lock); - while (pages_left) { - uint32_t alignment = tbo->page_alignment; - - if (pages >= pages_per_node) - alignment = pages_per_node; - - r = drm_mm_insert_node_in_range(mm, &node->mm_nodes[i], pages, - alignment, 0, place->fpfn, - lpfn, mode); - if (unlikely(r)) { - if (pages > pages_per_node) { - if (is_power_of_2(pages)) - pages = pages / 2; - else - pages = rounddown_pow_of_two(pages); - continue; + vres->flags |= DRM_BUDDY_TOPDOWN_ALLOCATION; + + if (fpfn || lpfn != man->size) + /* Allocate blocks in desired range */ + vres->flags |= DRM_BUDDY_RANGE_ALLOCATION; + + remaining_size = vres->base.num_pages << PAGE_SHIFT; + + mutex_lock(&mgr->lock); + while (remaining_size) { + if (tbo->page_alignment) + min_block_size = tbo->page_alignment << PAGE_SHIFT; + else + min_block_size = mgr->default_page_size; + + BUG_ON(min_block_size < mm->chunk_size); + + /* Limit maximum size to 2GiB due to SG table limitations */ + size = min(remaining_size, 2ULL << 30); + + if (size >= pages_per_block << PAGE_SHIFT) + min_block_size = pages_per_block << PAGE_SHIFT; + + cur_size = size; + + if (fpfn + size != place->lpfn << PAGE_SHIFT) { + /* + * Except for actual range allocation, modify the size and + * min_block_size conforming to continuous flag enablement + */ + if (place->flags & TTM_PL_FLAG_CONTIGUOUS) { + size = roundup_pow_of_two(size); + min_block_size = size; + /* + * Modify the size value if size is not + * aligned with min_block_size + */ + } else if (!IS_ALIGNED(size, min_block_size)) { + size = round_up(size, min_block_size); } - goto error_free; } - vis_usage += amdgpu_vram_mgr_vis_size(adev, &node->mm_nodes[i]); - amdgpu_vram_mgr_virt_start(&node->base, &node->mm_nodes[i]); - pages_left -= pages; - ++i; + r = drm_buddy_alloc_blocks(mm, fpfn, + lpfn, + size, + min_block_size, + &vres->blocks, + vres->flags); + if (unlikely(r)) + goto error_free_blocks; + + if (size > remaining_size) + remaining_size = 0; + else + remaining_size -= size; + } + mutex_unlock(&mgr->lock); + + if (cur_size != size) { + struct drm_buddy_block *block; + struct list_head *trim_list; + u64 original_size; + LIST_HEAD(temp); + + trim_list = &vres->blocks; + original_size = vres->base.num_pages << PAGE_SHIFT; + + /* + * If size value is rounded up to min_block_size, trim the last + * block to the required size + */ + if (!list_is_singular(&vres->blocks)) { + block = list_last_entry(&vres->blocks, typeof(*block), link); + list_move_tail(&block->link, &temp); + trim_list = &temp; + /* + * Compute the original_size value by subtracting the + * last block size with (aligned size - original size) + */ + original_size = amdgpu_vram_mgr_block_size(block) - (size - cur_size); + } + + mutex_lock(&mgr->lock); + drm_buddy_block_trim(mm, + original_size, + trim_list); + mutex_unlock(&mgr->lock); - if (pages > pages_left) - pages = pages_left; + if (!list_empty(&temp)) + list_splice_tail(trim_list, &vres->blocks); } - spin_unlock(&mgr->lock); - if (i == 1) - node->base.placement |= TTM_PL_FLAG_CONTIGUOUS; + list_for_each_entry(block, &vres->blocks, link) + vis_usage += amdgpu_vram_mgr_vis_size(adev, block); + + block = amdgpu_vram_mgr_first_block(&vres->blocks); + if (!block) { + r = -EINVAL; + goto error_fini; + } + + vres->base.start = amdgpu_vram_mgr_block_start(block) >> PAGE_SHIFT; + + if (amdgpu_is_vram_mgr_blocks_contiguous(&vres->blocks)) + vres->base.placement |= TTM_PL_FLAG_CONTIGUOUS; if (adev->gmc.xgmi.connected_to_cpu) - node->base.bus.caching = ttm_cached; + vres->base.bus.caching = ttm_cached; else - node->base.bus.caching = ttm_write_combined; + vres->base.bus.caching = ttm_write_combined; atomic64_add(vis_usage, &mgr->vis_usage); - *res = &node->base; + *res = &vres->base; return 0; -error_free: - while (i--) - drm_mm_remove_node(&node->mm_nodes[i]); - spin_unlock(&mgr->lock); - kvfree(node); +error_free_blocks: + drm_buddy_free_list(mm, &vres->blocks); + mutex_unlock(&mgr->lock); +error_fini: + ttm_resource_fini(man, &vres->base); + kfree(vres); -error_sub: - atomic64_sub(mem_bytes, &mgr->usage); return r; } @@ -490,28 +540,26 @@ error_sub: static void amdgpu_vram_mgr_del(struct ttm_resource_manager *man, struct ttm_resource *res) { - struct ttm_range_mgr_node *node = to_ttm_range_mgr_node(res); + struct amdgpu_vram_mgr_resource *vres = to_amdgpu_vram_mgr_resource(res); struct amdgpu_vram_mgr *mgr = to_vram_mgr(man); struct amdgpu_device *adev = to_amdgpu_device(mgr); - uint64_t usage = 0, vis_usage = 0; - unsigned i, pages; + struct drm_buddy *mm = &mgr->mm; + struct drm_buddy_block *block; + uint64_t vis_usage = 0; - spin_lock(&mgr->lock); - for (i = 0, pages = res->num_pages; pages; - pages -= node->mm_nodes[i].size, ++i) { - struct drm_mm_node *mm = &node->mm_nodes[i]; + mutex_lock(&mgr->lock); + list_for_each_entry(block, &vres->blocks, link) + vis_usage += amdgpu_vram_mgr_vis_size(adev, block); - drm_mm_remove_node(mm); - usage += mm->size << PAGE_SHIFT; - vis_usage += amdgpu_vram_mgr_vis_size(adev, mm); - } amdgpu_vram_mgr_do_reserve(man); - spin_unlock(&mgr->lock); - atomic64_sub(usage, &mgr->usage); + drm_buddy_free_list(mm, &vres->blocks); + mutex_unlock(&mgr->lock); + atomic64_sub(vis_usage, &mgr->vis_usage); - kvfree(node); + ttm_resource_fini(man, res); + kfree(vres); } /** @@ -543,7 +591,7 @@ int amdgpu_vram_mgr_alloc_sgt(struct amdgpu_device *adev, if (!*sgt) return -ENOMEM; - /* Determine the number of DRM_MM nodes to export */ + /* Determine the number of DRM_BUDDY blocks to export */ amdgpu_res_first(res, offset, length, &cursor); while (cursor.remaining) { num_entries++; @@ -559,10 +607,10 @@ int amdgpu_vram_mgr_alloc_sgt(struct amdgpu_device *adev, sg->length = 0; /* - * Walk down DRM_MM nodes to populate scatterlist nodes - * @note: Use iterator api to get first the DRM_MM node + * Walk down DRM_BUDDY blocks to populate scatterlist nodes + * @note: Use iterator api to get first the DRM_BUDDY block * and the number of bytes from it. Access the following - * DRM_MM node(s) if more buffer needs to exported + * DRM_BUDDY block(s) if more buffer needs to exported */ amdgpu_res_first(res, offset, length, &cursor); for_each_sgtable_sg((*sgt), sg, i) { @@ -626,18 +674,6 @@ void amdgpu_vram_mgr_free_sgt(struct device *dev, } /** - * amdgpu_vram_mgr_usage - how many bytes are used in this domain - * - * @mgr: amdgpu_vram_mgr pointer - * - * Returns how many bytes are used in this domain. - */ -uint64_t amdgpu_vram_mgr_usage(struct amdgpu_vram_mgr *mgr) -{ - return atomic64_read(&mgr->usage); -} - -/** * amdgpu_vram_mgr_vis_usage - how many bytes are used in the visible part * * @mgr: amdgpu_vram_mgr pointer @@ -661,14 +697,22 @@ static void amdgpu_vram_mgr_debug(struct ttm_resource_manager *man, struct drm_printer *printer) { struct amdgpu_vram_mgr *mgr = to_vram_mgr(man); + struct drm_buddy *mm = &mgr->mm; + struct drm_buddy_block *block; + + drm_printf(printer, " vis usage:%llu\n", + amdgpu_vram_mgr_vis_usage(mgr)); - spin_lock(&mgr->lock); - drm_mm_print(&mgr->mm, printer); - spin_unlock(&mgr->lock); + mutex_lock(&mgr->lock); + drm_printf(printer, "default_page_size: %lluKiB\n", + mgr->default_page_size >> 10); - drm_printf(printer, "man size:%llu pages, ram usage:%lluMB, vis usage:%lluMB\n", - man->size, amdgpu_vram_mgr_usage(mgr) >> 20, - amdgpu_vram_mgr_vis_usage(mgr) >> 20); + drm_buddy_print(mm, printer); + + drm_printf(printer, "reserved:\n"); + list_for_each_entry(block, &mgr->reserved_pages, link) + drm_buddy_block_print(mm, block, printer); + mutex_unlock(&mgr->lock); } static const struct ttm_resource_manager_func amdgpu_vram_mgr_func = { @@ -688,15 +732,21 @@ int amdgpu_vram_mgr_init(struct amdgpu_device *adev) { struct amdgpu_vram_mgr *mgr = &adev->mman.vram_mgr; struct ttm_resource_manager *man = &mgr->manager; + int err; - ttm_resource_manager_init(man, adev->gmc.real_vram_size >> PAGE_SHIFT); + ttm_resource_manager_init(man, &adev->mman.bdev, + adev->gmc.real_vram_size); man->func = &amdgpu_vram_mgr_func; - drm_mm_init(&mgr->mm, 0, man->size); - spin_lock_init(&mgr->lock); + err = drm_buddy_init(&mgr->mm, man->size, PAGE_SIZE); + if (err) + return err; + + mutex_init(&mgr->lock); INIT_LIST_HEAD(&mgr->reservations_pending); INIT_LIST_HEAD(&mgr->reserved_pages); + mgr->default_page_size = PAGE_SIZE; ttm_set_driver_manager(&adev->mman.bdev, TTM_PL_VRAM, &mgr->manager); ttm_resource_manager_set_used(man, true); @@ -724,16 +774,16 @@ void amdgpu_vram_mgr_fini(struct amdgpu_device *adev) if (ret) return; - spin_lock(&mgr->lock); - list_for_each_entry_safe(rsv, temp, &mgr->reservations_pending, node) + mutex_lock(&mgr->lock); + list_for_each_entry_safe(rsv, temp, &mgr->reservations_pending, blocks) kfree(rsv); - list_for_each_entry_safe(rsv, temp, &mgr->reserved_pages, node) { - drm_mm_remove_node(&rsv->mm_node); + list_for_each_entry_safe(rsv, temp, &mgr->reserved_pages, blocks) { + drm_buddy_free_list(&mgr->mm, &rsv->blocks); kfree(rsv); } - drm_mm_takedown(&mgr->mm); - spin_unlock(&mgr->lock); + drm_buddy_fini(&mgr->mm); + mutex_unlock(&mgr->lock); ttm_resource_manager_cleanup(man); ttm_set_driver_manager(&adev->mman.bdev, TTM_PL_VRAM, NULL); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.h new file mode 100644 index 000000000000..9a2db87186c7 --- /dev/null +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.h @@ -0,0 +1,89 @@ +/* SPDX-License-Identifier: MIT + * Copyright 2021 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + */ + +#ifndef __AMDGPU_VRAM_MGR_H__ +#define __AMDGPU_VRAM_MGR_H__ + +#include <drm/drm_buddy.h> + +struct amdgpu_vram_mgr { + struct ttm_resource_manager manager; + struct drm_buddy mm; + /* protects access to buffer objects */ + struct mutex lock; + struct list_head reservations_pending; + struct list_head reserved_pages; + atomic64_t vis_usage; + u64 default_page_size; +}; + +struct amdgpu_vram_mgr_resource { + struct ttm_resource base; + struct list_head blocks; + unsigned long flags; +}; + +static inline u64 amdgpu_vram_mgr_block_start(struct drm_buddy_block *block) +{ + return drm_buddy_block_offset(block); +} + +static inline u64 amdgpu_vram_mgr_block_size(struct drm_buddy_block *block) +{ + return PAGE_SIZE << drm_buddy_block_order(block); +} + +static inline struct drm_buddy_block * +amdgpu_vram_mgr_first_block(struct list_head *list) +{ + return list_first_entry_or_null(list, struct drm_buddy_block, link); +} + +static inline bool amdgpu_is_vram_mgr_blocks_contiguous(struct list_head *head) +{ + struct drm_buddy_block *block; + u64 start, size; + + block = amdgpu_vram_mgr_first_block(head); + if (!block) + return false; + + while (head != block->link.next) { + start = amdgpu_vram_mgr_block_start(block); + size = amdgpu_vram_mgr_block_size(block); + + block = list_entry(block->link.next, struct drm_buddy_block, link); + if (start + size != amdgpu_vram_mgr_block_start(block)) + return false; + } + + return true; +} + +static inline struct amdgpu_vram_mgr_resource * +to_amdgpu_vram_mgr_resource(struct ttm_resource *res) +{ + return container_of(res, struct amdgpu_vram_mgr_resource, base); +} + +#endif diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.c index e8b8f28c2f72..1b108d03e785 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.c @@ -32,7 +32,8 @@ #include "wafl/wafl2_4_0_0_smn.h" #include "wafl/wafl2_4_0_0_sh_mask.h" -#define smnPCS_XGMI23_PCS_ERROR_STATUS 0x11a01210 +#include "amdgpu_reset.h" + #define smnPCS_XGMI3X16_PCS_ERROR_STATUS 0x11a0020c #define smnPCS_GOPX1_PCS_ERROR_STATUS 0x12200210 @@ -67,17 +68,6 @@ static const int wafl_pcs_err_status_reg_arct[] = { smnPCS_GOPX1_0_PCS_GOPX1_PCS_ERROR_STATUS + 0x100000, }; -static const int xgmi23_pcs_err_status_reg_aldebaran[] = { - smnPCS_XGMI23_PCS_ERROR_STATUS, - smnPCS_XGMI23_PCS_ERROR_STATUS + 0x100000, - smnPCS_XGMI23_PCS_ERROR_STATUS + 0x200000, - smnPCS_XGMI23_PCS_ERROR_STATUS + 0x300000, - smnPCS_XGMI23_PCS_ERROR_STATUS + 0x400000, - smnPCS_XGMI23_PCS_ERROR_STATUS + 0x500000, - smnPCS_XGMI23_PCS_ERROR_STATUS + 0x600000, - smnPCS_XGMI23_PCS_ERROR_STATUS + 0x700000 -}; - static const int xgmi3x16_pcs_err_status_reg_aldebaran[] = { smnPCS_XGMI3X16_PCS_ERROR_STATUS, smnPCS_XGMI3X16_PCS_ERROR_STATUS + 0x100000, @@ -227,6 +217,9 @@ static void amdgpu_xgmi_hive_release(struct kobject *kobj) struct amdgpu_hive_info *hive = container_of( kobj, struct amdgpu_hive_info, kobj); + amdgpu_reset_put_reset_domain(hive->reset_domain); + hive->reset_domain = NULL; + mutex_destroy(&hive->hive_lock); kfree(hive); } @@ -398,15 +391,35 @@ struct amdgpu_hive_info *amdgpu_get_xgmi_hive(struct amdgpu_device *adev) goto pro_end; } + /** + * Avoid recreating reset domain when hive is reconstructed for the case + * of reset the devices in the XGMI hive during probe for SRIOV + * See https://www.spinics.net/lists/amd-gfx/msg58836.html + */ + if (adev->reset_domain->type != XGMI_HIVE) { + hive->reset_domain = amdgpu_reset_create_reset_domain(XGMI_HIVE, "amdgpu-reset-hive"); + if (!hive->reset_domain) { + dev_err(adev->dev, "XGMI: failed initializing reset domain for xgmi hive\n"); + ret = -ENOMEM; + kobject_put(&hive->kobj); + kfree(hive); + hive = NULL; + goto pro_end; + } + } else { + amdgpu_reset_get_reset_domain(adev->reset_domain); + hive->reset_domain = adev->reset_domain; + } + hive->hive_id = adev->gmc.xgmi.hive_id; INIT_LIST_HEAD(&hive->device_list); INIT_LIST_HEAD(&hive->node); mutex_init(&hive->hive_lock); - atomic_set(&hive->in_reset, 0); atomic_set(&hive->number_devices, 0); task_barrier_init(&hive->tb); hive->pstate = AMDGPU_XGMI_PSTATE_UNKNOWN; hive->hi_req_gpu = NULL; + /* * hive pstate on boot is high in vega20 so we have to go to low * pstate on after boot. @@ -732,53 +745,15 @@ int amdgpu_xgmi_remove_device(struct amdgpu_device *adev) return psp_xgmi_terminate(&adev->psp); } -static int amdgpu_xgmi_ras_late_init(struct amdgpu_device *adev) +static int amdgpu_xgmi_ras_late_init(struct amdgpu_device *adev, struct ras_common_if *ras_block) { - int r; - struct ras_ih_if ih_info = { - .cb = NULL, - }; - struct ras_fs_if fs_info = { - .sysfs_name = "xgmi_wafl_err_count", - }; - if (!adev->gmc.xgmi.supported || adev->gmc.xgmi.num_physical_nodes == 0) return 0; - adev->gmc.xgmi.ras_funcs->reset_ras_error_count(adev); - - if (!adev->gmc.xgmi.ras_if) { - adev->gmc.xgmi.ras_if = kmalloc(sizeof(struct ras_common_if), GFP_KERNEL); - if (!adev->gmc.xgmi.ras_if) - return -ENOMEM; - adev->gmc.xgmi.ras_if->block = AMDGPU_RAS_BLOCK__XGMI_WAFL; - adev->gmc.xgmi.ras_if->type = AMDGPU_RAS_ERROR__MULTI_UNCORRECTABLE; - adev->gmc.xgmi.ras_if->sub_block_index = 0; - } - ih_info.head = fs_info.head = *adev->gmc.xgmi.ras_if; - r = amdgpu_ras_late_init(adev, adev->gmc.xgmi.ras_if, - &fs_info, &ih_info); - if (r || !amdgpu_ras_is_supported(adev, adev->gmc.xgmi.ras_if->block)) { - kfree(adev->gmc.xgmi.ras_if); - adev->gmc.xgmi.ras_if = NULL; - } - - return r; -} + adev->gmc.xgmi.ras->ras_block.hw_ops->reset_ras_error_count(adev); -static void amdgpu_xgmi_ras_fini(struct amdgpu_device *adev) -{ - if (amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__XGMI_WAFL) && - adev->gmc.xgmi.ras_if) { - struct ras_common_if *ras_if = adev->gmc.xgmi.ras_if; - struct ras_ih_if ih_info = { - .cb = NULL, - }; - - amdgpu_ras_late_fini(adev, ras_if, &ih_info); - kfree(ras_if); - } + return amdgpu_ras_block_late_init(adev, ras_block); } uint64_t amdgpu_xgmi_get_relative_phy_addr(struct amdgpu_device *adev, @@ -810,9 +785,6 @@ static void amdgpu_xgmi_reset_ras_error_count(struct amdgpu_device *adev) xgmi_pcs_err_status_reg_vg20[i]); break; case CHIP_ALDEBARAN: - for (i = 0; i < ARRAY_SIZE(xgmi23_pcs_err_status_reg_aldebaran); i++) - pcs_clear_status(adev, - xgmi23_pcs_err_status_reg_aldebaran[i]); for (i = 0; i < ARRAY_SIZE(xgmi3x16_pcs_err_status_reg_aldebaran); i++) pcs_clear_status(adev, xgmi3x16_pcs_err_status_reg_aldebaran[i]); @@ -865,7 +837,7 @@ static int amdgpu_xgmi_query_pcs_error_status(struct amdgpu_device *adev, return 0; } -static int amdgpu_xgmi_query_ras_error_count(struct amdgpu_device *adev, +static void amdgpu_xgmi_query_ras_error_count(struct amdgpu_device *adev, void *ras_error_status) { struct ras_err_data *err_data = (struct ras_err_data *)ras_error_status; @@ -874,7 +846,7 @@ static int amdgpu_xgmi_query_ras_error_count(struct amdgpu_device *adev, uint32_t ue_cnt = 0, ce_cnt = 0; if (!amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__XGMI_WAFL)) - return -EINVAL; + return ; err_data->ue_count = 0; err_data->ce_count = 0; @@ -913,13 +885,6 @@ static int amdgpu_xgmi_query_ras_error_count(struct amdgpu_device *adev, } break; case CHIP_ALDEBARAN: - /* check xgmi23 pcs error */ - for (i = 0; i < ARRAY_SIZE(xgmi23_pcs_err_status_reg_aldebaran); i++) { - data = RREG32_PCIE(xgmi23_pcs_err_status_reg_aldebaran[i]); - if (data) - amdgpu_xgmi_query_pcs_error_status(adev, - data, &ue_cnt, &ce_cnt, true); - } /* check xgmi3x16 pcs error */ for (i = 0; i < ARRAY_SIZE(xgmi3x16_pcs_err_status_reg_aldebaran); i++) { data = RREG32_PCIE(xgmi3x16_pcs_err_status_reg_aldebaran[i]); @@ -940,17 +905,53 @@ static int amdgpu_xgmi_query_ras_error_count(struct amdgpu_device *adev, break; } - adev->gmc.xgmi.ras_funcs->reset_ras_error_count(adev); + adev->gmc.xgmi.ras->ras_block.hw_ops->reset_ras_error_count(adev); err_data->ue_count += ue_cnt; err_data->ce_count += ce_cnt; +} - return 0; +/* Trigger XGMI/WAFL error */ +static int amdgpu_ras_error_inject_xgmi(struct amdgpu_device *adev, void *inject_if) +{ + int ret = 0; + struct ta_ras_trigger_error_input *block_info = + (struct ta_ras_trigger_error_input *)inject_if; + + if (amdgpu_dpm_set_df_cstate(adev, DF_CSTATE_DISALLOW)) + dev_warn(adev->dev, "Failed to disallow df cstate"); + + if (amdgpu_dpm_allow_xgmi_power_down(adev, false)) + dev_warn(adev->dev, "Failed to disallow XGMI power down"); + + ret = psp_ras_trigger_error(&adev->psp, block_info); + + if (amdgpu_ras_intr_triggered()) + return ret; + + if (amdgpu_dpm_allow_xgmi_power_down(adev, true)) + dev_warn(adev->dev, "Failed to allow XGMI power down"); + + if (amdgpu_dpm_set_df_cstate(adev, DF_CSTATE_ALLOW)) + dev_warn(adev->dev, "Failed to allow df cstate"); + + return ret; } -const struct amdgpu_xgmi_ras_funcs xgmi_ras_funcs = { - .ras_late_init = amdgpu_xgmi_ras_late_init, - .ras_fini = amdgpu_xgmi_ras_fini, +struct amdgpu_ras_block_hw_ops xgmi_ras_hw_ops = { .query_ras_error_count = amdgpu_xgmi_query_ras_error_count, .reset_ras_error_count = amdgpu_xgmi_reset_ras_error_count, + .ras_error_inject = amdgpu_ras_error_inject_xgmi, +}; + +struct amdgpu_xgmi_ras xgmi_ras = { + .ras_block = { + .ras_comm = { + .name = "xgmi_wafl", + .block = AMDGPU_RAS_BLOCK__XGMI_WAFL, + .type = AMDGPU_RAS_ERROR__MULTI_UNCORRECTABLE, + }, + .hw_ops = &xgmi_ras_hw_ops, + .ras_late_init = amdgpu_xgmi_ras_late_init, + }, }; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.h index d2189bf7d428..552e6fb55aa8 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.h @@ -24,7 +24,7 @@ #include <drm/task_barrier.h> #include "amdgpu_psp.h" - +#include "amdgpu_ras.h" struct amdgpu_hive_info { struct kobject kobj; @@ -33,7 +33,6 @@ struct amdgpu_hive_info { struct list_head node; atomic_t number_devices; struct mutex hive_lock; - atomic_t in_reset; int hi_req_count; struct amdgpu_device *hi_req_gpu; struct task_barrier tb; @@ -42,6 +41,8 @@ struct amdgpu_hive_info { AMDGPU_XGMI_PSTATE_MAX_VEGA20, AMDGPU_XGMI_PSTATE_UNKNOWN } pstate; + + struct amdgpu_reset_domain *reset_domain; }; struct amdgpu_pcs_ras_field { @@ -50,7 +51,7 @@ struct amdgpu_pcs_ras_field { uint32_t pcs_err_shift; }; -extern const struct amdgpu_xgmi_ras_funcs xgmi_ras_funcs; +extern struct amdgpu_xgmi_ras xgmi_ras; struct amdgpu_hive_info *amdgpu_get_xgmi_hive(struct amdgpu_device *adev); void amdgpu_put_xgmi_hive(struct amdgpu_hive_info *hive); int amdgpu_xgmi_update_topology(struct amdgpu_hive_info *hive, struct amdgpu_device *adev); @@ -66,7 +67,8 @@ uint64_t amdgpu_xgmi_get_relative_phy_addr(struct amdgpu_device *adev, static inline bool amdgpu_xgmi_same_hive(struct amdgpu_device *adev, struct amdgpu_device *bo_adev) { - return (adev != bo_adev && + return (amdgpu_use_xgmi_p2p && + adev != bo_adev && adev->gmc.xgmi.hive_id && adev->gmc.xgmi.hive_id == bo_adev->gmc.xgmi.hive_id); } diff --git a/drivers/gpu/drm/amd/amdgpu/amdgv_sriovmsg.h b/drivers/gpu/drm/amd/amdgpu/amdgv_sriovmsg.h index 7326b6c1b71c..e78e4c27b62a 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgv_sriovmsg.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgv_sriovmsg.h @@ -1,34 +1,33 @@ /* - * Copyright 2018-2019 Advanced Micro Devices, Inc. + * Copyright (c) 2018-2021 Advanced Micro Devices, Inc. All rights reserved. * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: * * The above copyright notice and this permission notice shall be included in * all copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR - * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, - * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR - * OTHER DEALINGS IN THE SOFTWARE. - * + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. */ #ifndef AMDGV_SRIOV_MSG__H_ #define AMDGV_SRIOV_MSG__H_ /* unit in kilobytes */ -#define AMD_SRIOV_MSG_VBIOS_OFFSET 0 -#define AMD_SRIOV_MSG_VBIOS_SIZE_KB 64 -#define AMD_SRIOV_MSG_DATAEXCHANGE_OFFSET_KB AMD_SRIOV_MSG_VBIOS_SIZE_KB -#define AMD_SRIOV_MSG_DATAEXCHANGE_SIZE_KB 4 +#define AMD_SRIOV_MSG_VBIOS_OFFSET 0 +#define AMD_SRIOV_MSG_VBIOS_SIZE_KB 64 +#define AMD_SRIOV_MSG_DATAEXCHANGE_OFFSET_KB AMD_SRIOV_MSG_VBIOS_SIZE_KB +#define AMD_SRIOV_MSG_DATAEXCHANGE_SIZE_KB 4 /* * layout @@ -51,10 +50,10 @@ * v2 defined in amdgim * v3 current */ -#define AMD_SRIOV_MSG_FW_VRAM_PF2VF_VER 2 -#define AMD_SRIOV_MSG_FW_VRAM_VF2PF_VER 3 +#define AMD_SRIOV_MSG_FW_VRAM_PF2VF_VER 2 +#define AMD_SRIOV_MSG_FW_VRAM_VF2PF_VER 3 -#define AMD_SRIOV_MSG_RESERVE_UCODE 24 +#define AMD_SRIOV_MSG_RESERVE_UCODE 24 #define AMD_SRIOV_MSG_RESERVE_VCN_INST 4 @@ -83,19 +82,19 @@ enum amd_sriov_ucode_engine_id { AMD_SRIOV_UCODE_ID__MAX }; -#pragma pack(push, 1) // PF2VF / VF2PF data areas are byte packed +#pragma pack(push, 1) // PF2VF / VF2PF data areas are byte packed union amd_sriov_msg_feature_flags { struct { - uint32_t error_log_collect : 1; - uint32_t host_load_ucodes : 1; - uint32_t host_flr_vramlost : 1; - uint32_t mm_bw_management : 1; - uint32_t pp_one_vf_mode : 1; - uint32_t reg_indirect_acc : 1; - uint32_t reserved : 26; + uint32_t error_log_collect : 1; + uint32_t host_load_ucodes : 1; + uint32_t host_flr_vramlost : 1; + uint32_t mm_bw_management : 1; + uint32_t pp_one_vf_mode : 1; + uint32_t reg_indirect_acc : 1; + uint32_t reserved : 26; } flags; - uint32_t all; + uint32_t all; }; union amd_sriov_reg_access_flags { @@ -110,10 +109,10 @@ union amd_sriov_reg_access_flags { union amd_sriov_msg_os_info { struct { - uint32_t windows : 1; - uint32_t reserved : 31; + uint32_t windows : 1; + uint32_t reserved : 31; } info; - uint32_t all; + uint32_t all; }; struct amd_sriov_msg_uuid_info { @@ -156,6 +155,7 @@ struct amd_sriov_msg_pf2vf_info_header { uint32_t reserved[2]; }; +#define AMD_SRIOV_MSG_PF2VF_INFO_FILLED_SIZE (48) struct amd_sriov_msg_pf2vf_info { /* header contains size and version */ struct amd_sriov_msg_pf2vf_info_header header; @@ -204,10 +204,10 @@ struct amd_sriov_msg_pf2vf_info { } mm_bw_management[AMD_SRIOV_MSG_RESERVE_VCN_INST]; /* UUID info */ struct amd_sriov_msg_uuid_info uuid_info; - /* pcie atomic Ops info */ - uint32_t pcie_atomic_ops_enabled_flags; + /* PCIE atomic ops support flag */ + uint32_t pcie_atomic_ops_support_flags; /* reserved */ - uint32_t reserved[256 - 48]; + uint32_t reserved[256 - AMD_SRIOV_MSG_PF2VF_INFO_FILLED_SIZE]; }; struct amd_sriov_msg_vf2pf_info_header { @@ -219,12 +219,13 @@ struct amd_sriov_msg_vf2pf_info_header { uint32_t reserved[2]; }; +#define AMD_SRIOV_MSG_VF2PF_INFO_FILLED_SIZE (70) struct amd_sriov_msg_vf2pf_info { /* header contains size and version */ struct amd_sriov_msg_vf2pf_info_header header; uint32_t checksum; /* driver version */ - uint8_t driver_version[64]; + uint8_t driver_version[64]; /* driver certification, 1=WHQL, 0=None */ uint32_t driver_cert; /* guest OS type and version */ @@ -258,13 +259,13 @@ struct amd_sriov_msg_vf2pf_info { uint32_t fb_size; /* guest ucode data, each one is 1.25 Dword */ struct { - uint8_t id; + uint8_t id; uint32_t version; } ucode_info[AMD_SRIOV_MSG_RESERVE_UCODE]; uint64_t dummy_page_addr; /* reserved */ - uint32_t reserved[256-70]; + uint32_t reserved[256 - AMD_SRIOV_MSG_VF2PF_INFO_FILLED_SIZE]; }; /* mailbox message send from guest to host */ @@ -276,7 +277,7 @@ enum amd_sriov_mailbox_request_message { MB_REQ_MSG_REQ_GPU_RESET_ACCESS, MB_REQ_MSG_REQ_GPU_INIT_DATA, - MB_REQ_MSG_LOG_VF_ERROR = 200, + MB_REQ_MSG_LOG_VF_ERROR = 200, }; /* mailbox message send from host to guest */ @@ -298,17 +299,15 @@ enum amd_sriov_gpu_init_data_version { GPU_INIT_DATA_READY_V1 = 1, }; -#pragma pack(pop) // Restore previous packing option +#pragma pack(pop) // Restore previous packing option /* checksum function between host and guest */ -unsigned int amd_sriov_msg_checksum(void *obj, - unsigned long obj_size, - unsigned int key, - unsigned int checksum); +unsigned int amd_sriov_msg_checksum(void *obj, unsigned long obj_size, unsigned int key, + unsigned int checksum); /* assertion at compile time */ #ifdef __linux__ -#define stringification(s) _stringification(s) +#define stringification(s) _stringification(s) #define _stringification(s) #s _Static_assert( @@ -319,13 +318,11 @@ _Static_assert( sizeof(struct amd_sriov_msg_pf2vf_info) == AMD_SRIOV_MSG_SIZE_KB << 10, "amd_sriov_msg_pf2vf_info must be " stringification(AMD_SRIOV_MSG_SIZE_KB) " KB"); -_Static_assert( - AMD_SRIOV_MSG_RESERVE_UCODE % 4 == 0, - "AMD_SRIOV_MSG_RESERVE_UCODE must be multiple of 4"); +_Static_assert(AMD_SRIOV_MSG_RESERVE_UCODE % 4 == 0, + "AMD_SRIOV_MSG_RESERVE_UCODE must be multiple of 4"); -_Static_assert( - AMD_SRIOV_MSG_RESERVE_UCODE > AMD_SRIOV_UCODE_ID__MAX, - "AMD_SRIOV_MSG_RESERVE_UCODE must be bigger than AMD_SRIOV_UCODE_ID__MAX"); +_Static_assert(AMD_SRIOV_MSG_RESERVE_UCODE > AMD_SRIOV_UCODE_ID__MAX, + "AMD_SRIOV_MSG_RESERVE_UCODE must be bigger than AMD_SRIOV_UCODE_ID__MAX"); #undef _stringification #undef stringification diff --git a/drivers/gpu/drm/amd/amdgpu/athub_v1_0.c b/drivers/gpu/drm/amd/amdgpu/athub_v1_0.c index 3ea557864320..a13c443ea10f 100644 --- a/drivers/gpu/drm/amd/amdgpu/athub_v1_0.c +++ b/drivers/gpu/drm/amd/amdgpu/athub_v1_0.c @@ -68,12 +68,13 @@ int athub_v1_0_set_clockgating(struct amdgpu_device *adev, if (amdgpu_sriov_vf(adev)) return 0; - switch (adev->asic_type) { - case CHIP_VEGA10: - case CHIP_VEGA12: - case CHIP_VEGA20: - case CHIP_RAVEN: - case CHIP_RENOIR: + switch (adev->ip_versions[ATHUB_HWIP][0]) { + case IP_VERSION(9, 0, 0): + case IP_VERSION(9, 1, 0): + case IP_VERSION(9, 2, 0): + case IP_VERSION(9, 3, 0): + case IP_VERSION(9, 4, 0): + case IP_VERSION(1, 5, 0): athub_update_medium_grain_clock_gating(adev, state == AMD_CG_STATE_GATE); athub_update_medium_grain_light_sleep(adev, @@ -86,7 +87,7 @@ int athub_v1_0_set_clockgating(struct amdgpu_device *adev, return 0; } -void athub_v1_0_get_clockgating(struct amdgpu_device *adev, u32 *flags) +void athub_v1_0_get_clockgating(struct amdgpu_device *adev, u64 *flags) { int data; diff --git a/drivers/gpu/drm/amd/amdgpu/athub_v1_0.h b/drivers/gpu/drm/amd/amdgpu/athub_v1_0.h index b279af59e34f..6be0a6704ea7 100644 --- a/drivers/gpu/drm/amd/amdgpu/athub_v1_0.h +++ b/drivers/gpu/drm/amd/amdgpu/athub_v1_0.h @@ -25,6 +25,6 @@ int athub_v1_0_set_clockgating(struct amdgpu_device *adev, enum amd_clockgating_state state); -void athub_v1_0_get_clockgating(struct amdgpu_device *adev, u32 *flags); +void athub_v1_0_get_clockgating(struct amdgpu_device *adev, u64 *flags); #endif diff --git a/drivers/gpu/drm/amd/amdgpu/athub_v2_0.c b/drivers/gpu/drm/amd/amdgpu/athub_v2_0.c index ab6a07e5e8c4..a9521c98e7f7 100644 --- a/drivers/gpu/drm/amd/amdgpu/athub_v2_0.c +++ b/drivers/gpu/drm/amd/amdgpu/athub_v2_0.c @@ -78,6 +78,7 @@ int athub_v2_0_set_clockgating(struct amdgpu_device *adev, return 0; switch (adev->ip_versions[ATHUB_HWIP][0]) { + case IP_VERSION(1, 3, 1): case IP_VERSION(2, 0, 0): case IP_VERSION(2, 0, 2): athub_v2_0_update_medium_grain_clock_gating(adev, @@ -92,7 +93,7 @@ int athub_v2_0_set_clockgating(struct amdgpu_device *adev, return 0; } -void athub_v2_0_get_clockgating(struct amdgpu_device *adev, u32 *flags) +void athub_v2_0_get_clockgating(struct amdgpu_device *adev, u64 *flags) { int data; diff --git a/drivers/gpu/drm/amd/amdgpu/athub_v2_0.h b/drivers/gpu/drm/amd/amdgpu/athub_v2_0.h index 02932c1c8bab..8b763f6dfd81 100644 --- a/drivers/gpu/drm/amd/amdgpu/athub_v2_0.h +++ b/drivers/gpu/drm/amd/amdgpu/athub_v2_0.h @@ -25,6 +25,6 @@ int athub_v2_0_set_clockgating(struct amdgpu_device *adev, enum amd_clockgating_state state); -void athub_v2_0_get_clockgating(struct amdgpu_device *adev, u32 *flags); +void athub_v2_0_get_clockgating(struct amdgpu_device *adev, u64 *flags); #endif diff --git a/drivers/gpu/drm/amd/amdgpu/athub_v2_1.c b/drivers/gpu/drm/amd/amdgpu/athub_v2_1.c index 2edefd10e56c..78508ae6a670 100644 --- a/drivers/gpu/drm/amd/amdgpu/athub_v2_1.c +++ b/drivers/gpu/drm/amd/amdgpu/athub_v2_1.c @@ -74,6 +74,7 @@ int athub_v2_1_set_clockgating(struct amdgpu_device *adev, case IP_VERSION(2, 1, 0): case IP_VERSION(2, 1, 1): case IP_VERSION(2, 1, 2): + case IP_VERSION(2, 4, 0): athub_v2_1_update_medium_grain_clock_gating(adev, state == AMD_CG_STATE_GATE); athub_v2_1_update_medium_grain_light_sleep(adev, state == AMD_CG_STATE_GATE); break; @@ -84,7 +85,7 @@ int athub_v2_1_set_clockgating(struct amdgpu_device *adev, return 0; } -void athub_v2_1_get_clockgating(struct amdgpu_device *adev, u32 *flags) +void athub_v2_1_get_clockgating(struct amdgpu_device *adev, u64 *flags) { int data; diff --git a/drivers/gpu/drm/amd/amdgpu/athub_v2_1.h b/drivers/gpu/drm/amd/amdgpu/athub_v2_1.h index 5e6824c0f591..b799f14bce03 100644 --- a/drivers/gpu/drm/amd/amdgpu/athub_v2_1.h +++ b/drivers/gpu/drm/amd/amdgpu/athub_v2_1.h @@ -25,6 +25,6 @@ int athub_v2_1_set_clockgating(struct amdgpu_device *adev, enum amd_clockgating_state state); -void athub_v2_1_get_clockgating(struct amdgpu_device *adev, u32 *flags); +void athub_v2_1_get_clockgating(struct amdgpu_device *adev, u64 *flags); #endif diff --git a/drivers/gpu/drm/amd/amdgpu/atom.c b/drivers/gpu/drm/amd/amdgpu/atom.c index 6fa2229b7229..1c5d9388ad0b 100644 --- a/drivers/gpu/drm/amd/amdgpu/atom.c +++ b/drivers/gpu/drm/amd/amdgpu/atom.c @@ -25,6 +25,8 @@ #include <linux/module.h> #include <linux/sched.h> #include <linux/slab.h> +#include <linux/string_helpers.h> + #include <asm/unaligned.h> #include <drm/drm_util.h> @@ -740,7 +742,7 @@ static void atom_op_jump(atom_exec_context *ctx, int *ptr, int arg) break; } if (arg != ATOM_COND_ALWAYS) - SDEBUG(" taken: %s\n", execute ? "yes" : "no"); + SDEBUG(" taken: %s\n", str_yes_no(execute)); SDEBUG(" target: 0x%04X\n", target); if (execute) { if (ctx->last_jump == (ctx->start + target)) { diff --git a/drivers/gpu/drm/amd/amdgpu/atombios_dp.c b/drivers/gpu/drm/amd/amdgpu/atombios_dp.c index f327becb022f..87c41e0e9b7c 100644 --- a/drivers/gpu/drm/amd/amdgpu/atombios_dp.c +++ b/drivers/gpu/drm/amd/amdgpu/atombios_dp.c @@ -26,6 +26,8 @@ */ #include <drm/amdgpu_drm.h> +#include <drm/display/drm_dp_helper.h> + #include "amdgpu.h" #include "atom.h" @@ -34,7 +36,6 @@ #include "atombios_dp.h" #include "amdgpu_connectors.h" #include "amdgpu_atombios.h" -#include <drm/drm_dp_helper.h> /* move these to drm_dp_helper.c/h */ #define DP_LINK_CONFIGURATION_SIZE 9 diff --git a/drivers/gpu/drm/amd/amdgpu/atombios_encoders.c b/drivers/gpu/drm/amd/amdgpu/atombios_encoders.c index a92d86e12718..d4f5a584075d 100644 --- a/drivers/gpu/drm/amd/amdgpu/atombios_encoders.c +++ b/drivers/gpu/drm/amd/amdgpu/atombios_encoders.c @@ -765,7 +765,6 @@ amdgpu_atombios_encoder_setup_dig_transmitter(struct drm_encoder *encoder, int a int dp_clock = 0; int dp_lane_count = 0; int connector_object_id = 0; - int igp_lane_info = 0; int dig_encoder = dig->dig_encoder; int hpd_id = AMDGPU_HPD_NONE; @@ -848,26 +847,6 @@ amdgpu_atombios_encoder_setup_dig_transmitter(struct drm_encoder *encoder, int a else args.v1.ucConfig |= ATOM_TRANSMITTER_CONFIG_DIG1_ENCODER; - if ((adev->flags & AMD_IS_APU) && - (amdgpu_encoder->encoder_id == ENCODER_OBJECT_ID_INTERNAL_UNIPHY)) { - if (is_dp || - !amdgpu_dig_monitor_is_duallink(encoder, amdgpu_encoder->pixel_clock)) { - if (igp_lane_info & 0x1) - args.v1.ucConfig |= ATOM_TRANSMITTER_CONFIG_LANE_0_3; - else if (igp_lane_info & 0x2) - args.v1.ucConfig |= ATOM_TRANSMITTER_CONFIG_LANE_4_7; - else if (igp_lane_info & 0x4) - args.v1.ucConfig |= ATOM_TRANSMITTER_CONFIG_LANE_8_11; - else if (igp_lane_info & 0x8) - args.v1.ucConfig |= ATOM_TRANSMITTER_CONFIG_LANE_12_15; - } else { - if (igp_lane_info & 0x3) - args.v1.ucConfig |= ATOM_TRANSMITTER_CONFIG_LANE_0_7; - else if (igp_lane_info & 0xc) - args.v1.ucConfig |= ATOM_TRANSMITTER_CONFIG_LANE_8_15; - } - } - if (dig->linkb) args.v1.ucConfig |= ATOM_TRANSMITTER_CONFIG_LINKB; else diff --git a/drivers/gpu/drm/amd/amdgpu/cik.c b/drivers/gpu/drm/amd/amdgpu/cik.c index f10ce740a29c..de6d10390ab2 100644 --- a/drivers/gpu/drm/amd/amdgpu/cik.c +++ b/drivers/gpu/drm/amd/amdgpu/cik.c @@ -1719,7 +1719,7 @@ static void cik_program_aspm(struct amdgpu_device *adev) bool disable_l0s = false, disable_l1 = false, disable_plloff_in_l1 = false; bool disable_clkreq = false; - if (amdgpu_aspm == 0) + if (!amdgpu_device_should_use_aspm(adev)) return; if (pci_is_root_bus(adev->pdev->bus)) diff --git a/drivers/gpu/drm/amd/amdgpu/cik_sdma.c b/drivers/gpu/drm/amd/amdgpu/cik_sdma.c index c8ebd108548d..6c01199e9112 100644 --- a/drivers/gpu/drm/amd/amdgpu/cik_sdma.c +++ b/drivers/gpu/drm/amd/amdgpu/cik_sdma.c @@ -195,7 +195,7 @@ static void cik_sdma_ring_set_wptr(struct amdgpu_ring *ring) struct amdgpu_device *adev = ring->adev; WREG32(mmSDMA0_GFX_RB_WPTR + sdma_offsets[ring->me], - (lower_32_bits(ring->wptr) << 2) & 0x3fffc); + (ring->wptr << 2) & 0x3fffc); } static void cik_sdma_ring_insert_nop(struct amdgpu_ring *ring, uint32_t count) @@ -487,7 +487,7 @@ static int cik_sdma_gfx_resume(struct amdgpu_device *adev) WREG32(mmSDMA0_GFX_RB_BASE_HI + sdma_offsets[i], ring->gpu_addr >> 40); ring->wptr = 0; - WREG32(mmSDMA0_GFX_RB_WPTR + sdma_offsets[i], lower_32_bits(ring->wptr) << 2); + WREG32(mmSDMA0_GFX_RB_WPTR + sdma_offsets[i], ring->wptr << 2); /* enable DMA RB */ WREG32(mmSDMA0_GFX_RB_CNTL + sdma_offsets[i], diff --git a/drivers/gpu/drm/amd/amdgpu/dce_v10_0.c b/drivers/gpu/drm/amd/amdgpu/dce_v10_0.c index d1570a462a51..288fce7dc0ed 100644 --- a/drivers/gpu/drm/amd/amdgpu/dce_v10_0.c +++ b/drivers/gpu/drm/amd/amdgpu/dce_v10_0.c @@ -2532,7 +2532,7 @@ static void dce_v10_0_crtc_dpms(struct drm_crtc *crtc, int mode) break; } /* adjust pm to dpms */ - amdgpu_pm_compute_clocks(adev); + amdgpu_dpm_compute_clocks(adev); } static void dce_v10_0_crtc_prepare(struct drm_crtc *crtc) @@ -2798,6 +2798,8 @@ static int dce_v10_0_sw_init(void *handle) adev_to_drm(adev)->mode_config.preferred_depth = 24; adev_to_drm(adev)->mode_config.prefer_shadow = 1; + adev_to_drm(adev)->mode_config.fb_modifiers_not_supported = true; + adev_to_drm(adev)->mode_config.fb_base = adev->gmc.aper_base; r = amdgpu_display_modeset_create_props(adev); diff --git a/drivers/gpu/drm/amd/amdgpu/dce_v11_0.c b/drivers/gpu/drm/amd/amdgpu/dce_v11_0.c index 18a7b3bd633b..cbe5250b31cb 100644 --- a/drivers/gpu/drm/amd/amdgpu/dce_v11_0.c +++ b/drivers/gpu/drm/amd/amdgpu/dce_v11_0.c @@ -2608,7 +2608,7 @@ static void dce_v11_0_crtc_dpms(struct drm_crtc *crtc, int mode) break; } /* adjust pm to dpms */ - amdgpu_pm_compute_clocks(adev); + amdgpu_dpm_compute_clocks(adev); } static void dce_v11_0_crtc_prepare(struct drm_crtc *crtc) @@ -2916,6 +2916,8 @@ static int dce_v11_0_sw_init(void *handle) adev_to_drm(adev)->mode_config.preferred_depth = 24; adev_to_drm(adev)->mode_config.prefer_shadow = 1; + adev_to_drm(adev)->mode_config.fb_modifiers_not_supported = true; + adev_to_drm(adev)->mode_config.fb_base = adev->gmc.aper_base; r = amdgpu_display_modeset_create_props(adev); diff --git a/drivers/gpu/drm/amd/amdgpu/dce_v6_0.c b/drivers/gpu/drm/amd/amdgpu/dce_v6_0.c index c7803dc2b2d5..982855e6cf52 100644 --- a/drivers/gpu/drm/amd/amdgpu/dce_v6_0.c +++ b/drivers/gpu/drm/amd/amdgpu/dce_v6_0.c @@ -2424,7 +2424,7 @@ static void dce_v6_0_crtc_dpms(struct drm_crtc *crtc, int mode) break; } /* adjust pm to dpms */ - amdgpu_pm_compute_clocks(adev); + amdgpu_dpm_compute_clocks(adev); } static void dce_v6_0_crtc_prepare(struct drm_crtc *crtc) @@ -2674,6 +2674,7 @@ static int dce_v6_0_sw_init(void *handle) adev_to_drm(adev)->mode_config.max_height = 16384; adev_to_drm(adev)->mode_config.preferred_depth = 24; adev_to_drm(adev)->mode_config.prefer_shadow = 1; + adev_to_drm(adev)->mode_config.fb_modifiers_not_supported = true; adev_to_drm(adev)->mode_config.fb_base = adev->gmc.aper_base; r = amdgpu_display_modeset_create_props(adev); diff --git a/drivers/gpu/drm/amd/amdgpu/dce_v8_0.c b/drivers/gpu/drm/amd/amdgpu/dce_v8_0.c index 8318ee8339f1..84440741c60b 100644 --- a/drivers/gpu/drm/amd/amdgpu/dce_v8_0.c +++ b/drivers/gpu/drm/amd/amdgpu/dce_v8_0.c @@ -2433,7 +2433,7 @@ static void dce_v8_0_crtc_dpms(struct drm_crtc *crtc, int mode) break; } /* adjust pm to dpms */ - amdgpu_pm_compute_clocks(adev); + amdgpu_dpm_compute_clocks(adev); } static void dce_v8_0_crtc_prepare(struct drm_crtc *crtc) @@ -2695,6 +2695,8 @@ static int dce_v8_0_sw_init(void *handle) adev_to_drm(adev)->mode_config.preferred_depth = 24; adev_to_drm(adev)->mode_config.prefer_shadow = 1; + adev_to_drm(adev)->mode_config.fb_modifiers_not_supported = true; + adev_to_drm(adev)->mode_config.fb_base = adev->gmc.aper_base; r = amdgpu_display_modeset_create_props(adev); diff --git a/drivers/gpu/drm/amd/amdgpu/df_v1_7.c b/drivers/gpu/drm/amd/amdgpu/df_v1_7.c index 2d01ac0d4c11..b991609f46c1 100644 --- a/drivers/gpu/drm/amd/amdgpu/df_v1_7.c +++ b/drivers/gpu/drm/amd/amdgpu/df_v1_7.c @@ -99,7 +99,7 @@ static void df_v1_7_update_medium_grain_clock_gating(struct amdgpu_device *adev, } static void df_v1_7_get_clockgating_state(struct amdgpu_device *adev, - u32 *flags) + u64 *flags) { u32 tmp; diff --git a/drivers/gpu/drm/amd/amdgpu/df_v3_6.c b/drivers/gpu/drm/amd/amdgpu/df_v3_6.c index 43c5e3ec9a39..483a441b46aa 100644 --- a/drivers/gpu/drm/amd/amdgpu/df_v3_6.c +++ b/drivers/gpu/drm/amd/amdgpu/df_v3_6.c @@ -332,7 +332,7 @@ static void df_v3_6_update_medium_grain_clock_gating(struct amdgpu_device *adev, } static void df_v3_6_get_clockgating_state(struct amdgpu_device *adev, - u32 *flags) + u64 *flags) { u32 tmp; @@ -458,7 +458,7 @@ static int df_v3_6_pmc_add_cntr(struct amdgpu_device *adev, #define DEFERRED_ARM_MASK (1 << 31) static int df_v3_6_pmc_set_deferred(struct amdgpu_device *adev, - int counter_idx, uint64_t config, + uint64_t config, int counter_idx, bool is_deferred) { @@ -476,8 +476,8 @@ static int df_v3_6_pmc_set_deferred(struct amdgpu_device *adev, } static bool df_v3_6_pmc_is_deferred(struct amdgpu_device *adev, - int counter_idx, - uint64_t config) + uint64_t config, + int counter_idx) { return (df_v3_6_pmc_has_counter(adev, config, counter_idx) && (adev->df_perfmon_config_assign_mask[counter_idx] diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c index dbe7442fb25c..407074f958f4 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c @@ -56,10 +56,6 @@ #define GFX10_NUM_GFX_RINGS_Sienna_Cichlid 1 #define GFX10_MEC_HPD_SIZE 2048 -#define RLCG_VFGATE_DISABLED 0x4000000 -#define RLCG_WRONG_OPERATION_TYPE 0x2000000 -#define RLCG_NOT_IN_RANGE 0x1000000 - #define F32_CE_PROGRAM_RAM_SIZE 65536 #define RLCG_UCODE_LOADING_START_ADDRESS 0x00002000L @@ -110,6 +106,12 @@ #define mmGOLDEN_TSC_COUNT_UPPER_Vangogh_BASE_IDX 1 #define mmGOLDEN_TSC_COUNT_LOWER_Vangogh 0x0026 #define mmGOLDEN_TSC_COUNT_LOWER_Vangogh_BASE_IDX 1 + +#define mmGOLDEN_TSC_COUNT_UPPER_GC_10_3_6 0x002d +#define mmGOLDEN_TSC_COUNT_UPPER_GC_10_3_6_BASE_IDX 1 +#define mmGOLDEN_TSC_COUNT_LOWER_GC_10_3_6 0x002e +#define mmGOLDEN_TSC_COUNT_LOWER_GC_10_3_6_BASE_IDX 1 + #define mmSPI_CONFIG_CNTL_1_Vangogh 0x2441 #define mmSPI_CONFIG_CNTL_1_Vangogh_BASE_IDX 1 #define mmVGT_TF_MEMORY_BASE_HI_Vangogh 0x2261 @@ -180,14 +182,6 @@ #define mmRLC_SPARE_INT_0_Sienna_Cichlid 0x4ca5 #define mmRLC_SPARE_INT_0_Sienna_Cichlid_BASE_IDX 1 -#define GFX_RLCG_GC_WRITE_OLD (0x8 << 28) -#define GFX_RLCG_GC_WRITE (0x0 << 28) -#define GFX_RLCG_GC_READ (0x1 << 28) -#define GFX_RLCG_MMHUB_WRITE (0x2 << 28) - -#define RLCG_ERROR_REPORT_ENABLED(adev) \ - (amdgpu_sriov_reg_indirect_mmhub(adev) || amdgpu_sriov_reg_indirect_gc(adev)) - MODULE_FIRMWARE("amdgpu/navi10_ce.bin"); MODULE_FIRMWARE("amdgpu/navi10_pfp.bin"); MODULE_FIRMWARE("amdgpu/navi10_me.bin"); @@ -256,13 +250,6 @@ MODULE_FIRMWARE("amdgpu/yellow_carp_mec.bin"); MODULE_FIRMWARE("amdgpu/yellow_carp_mec2.bin"); MODULE_FIRMWARE("amdgpu/yellow_carp_rlc.bin"); -MODULE_FIRMWARE("amdgpu/cyan_skillfish_ce.bin"); -MODULE_FIRMWARE("amdgpu/cyan_skillfish_pfp.bin"); -MODULE_FIRMWARE("amdgpu/cyan_skillfish_me.bin"); -MODULE_FIRMWARE("amdgpu/cyan_skillfish_mec.bin"); -MODULE_FIRMWARE("amdgpu/cyan_skillfish_mec2.bin"); -MODULE_FIRMWARE("amdgpu/cyan_skillfish_rlc.bin"); - MODULE_FIRMWARE("amdgpu/cyan_skillfish2_ce.bin"); MODULE_FIRMWARE("amdgpu/cyan_skillfish2_pfp.bin"); MODULE_FIRMWARE("amdgpu/cyan_skillfish2_me.bin"); @@ -270,6 +257,20 @@ MODULE_FIRMWARE("amdgpu/cyan_skillfish2_mec.bin"); MODULE_FIRMWARE("amdgpu/cyan_skillfish2_mec2.bin"); MODULE_FIRMWARE("amdgpu/cyan_skillfish2_rlc.bin"); +MODULE_FIRMWARE("amdgpu/gc_10_3_6_ce.bin"); +MODULE_FIRMWARE("amdgpu/gc_10_3_6_pfp.bin"); +MODULE_FIRMWARE("amdgpu/gc_10_3_6_me.bin"); +MODULE_FIRMWARE("amdgpu/gc_10_3_6_mec.bin"); +MODULE_FIRMWARE("amdgpu/gc_10_3_6_mec2.bin"); +MODULE_FIRMWARE("amdgpu/gc_10_3_6_rlc.bin"); + +MODULE_FIRMWARE("amdgpu/gc_10_3_7_ce.bin"); +MODULE_FIRMWARE("amdgpu/gc_10_3_7_pfp.bin"); +MODULE_FIRMWARE("amdgpu/gc_10_3_7_me.bin"); +MODULE_FIRMWARE("amdgpu/gc_10_3_7_mec.bin"); +MODULE_FIRMWARE("amdgpu/gc_10_3_7_mec2.bin"); +MODULE_FIRMWARE("amdgpu/gc_10_3_7_rlc.bin"); + static const struct soc15_reg_golden golden_settings_gc_10_1[] = { SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL_4, 0xffffffff, 0x00400014), @@ -1463,143 +1464,6 @@ static const struct soc15_reg_golden golden_settings_gc_10_1_2[] = SOC15_REG_GOLDEN_VALUE(GC, 0, mmUTCL1_CTRL, 0xffffffff, 0x00c00000) }; -static bool gfx_v10_get_rlcg_flag(struct amdgpu_device *adev, u32 acc_flags, u32 hwip, - int write, u32 *rlcg_flag) -{ - switch (hwip) { - case GC_HWIP: - if (amdgpu_sriov_reg_indirect_gc(adev)) { - *rlcg_flag = write ? GFX_RLCG_GC_WRITE : GFX_RLCG_GC_READ; - - return true; - /* only in new version, AMDGPU_REGS_NO_KIQ and AMDGPU_REGS_RLC enabled simultaneously */ - } else if ((acc_flags & AMDGPU_REGS_RLC) && !(acc_flags & AMDGPU_REGS_NO_KIQ)) { - *rlcg_flag = GFX_RLCG_GC_WRITE_OLD; - - return true; - } - - break; - case MMHUB_HWIP: - if (amdgpu_sriov_reg_indirect_mmhub(adev) && - (acc_flags & AMDGPU_REGS_RLC) && write) { - *rlcg_flag = GFX_RLCG_MMHUB_WRITE; - return true; - } - - break; - default: - DRM_DEBUG("Not program register by RLCG\n"); - } - - return false; -} - -static u32 gfx_v10_rlcg_rw(struct amdgpu_device *adev, u32 offset, u32 v, uint32_t flag) -{ - static void *scratch_reg0; - static void *scratch_reg1; - static void *scratch_reg2; - static void *scratch_reg3; - static void *spare_int; - static uint32_t grbm_cntl; - static uint32_t grbm_idx; - uint32_t i = 0; - uint32_t retries = 50000; - u32 ret = 0; - u32 tmp; - - scratch_reg0 = adev->rmmio + - (adev->reg_offset[GC_HWIP][0][mmSCRATCH_REG0_BASE_IDX] + mmSCRATCH_REG0) * 4; - scratch_reg1 = adev->rmmio + - (adev->reg_offset[GC_HWIP][0][mmSCRATCH_REG1_BASE_IDX] + mmSCRATCH_REG1) * 4; - scratch_reg2 = adev->rmmio + - (adev->reg_offset[GC_HWIP][0][mmSCRATCH_REG0_BASE_IDX] + mmSCRATCH_REG2) * 4; - scratch_reg3 = adev->rmmio + - (adev->reg_offset[GC_HWIP][0][mmSCRATCH_REG1_BASE_IDX] + mmSCRATCH_REG3) * 4; - - if (adev->ip_versions[GC_HWIP][0] >= IP_VERSION(10, 3, 0)) { - spare_int = adev->rmmio + - (adev->reg_offset[GC_HWIP][0][mmRLC_SPARE_INT_0_Sienna_Cichlid_BASE_IDX] - + mmRLC_SPARE_INT_0_Sienna_Cichlid) * 4; - } else { - spare_int = adev->rmmio + - (adev->reg_offset[GC_HWIP][0][mmRLC_SPARE_INT_BASE_IDX] + mmRLC_SPARE_INT) * 4; - } - - grbm_cntl = adev->reg_offset[GC_HWIP][0][mmGRBM_GFX_CNTL_BASE_IDX] + mmGRBM_GFX_CNTL; - grbm_idx = adev->reg_offset[GC_HWIP][0][mmGRBM_GFX_INDEX_BASE_IDX] + mmGRBM_GFX_INDEX; - - if (offset == grbm_cntl || offset == grbm_idx) { - if (offset == grbm_cntl) - writel(v, scratch_reg2); - else if (offset == grbm_idx) - writel(v, scratch_reg3); - - writel(v, ((void __iomem *)adev->rmmio) + (offset * 4)); - } else { - writel(v, scratch_reg0); - writel(offset | flag, scratch_reg1); - writel(1, spare_int); - - for (i = 0; i < retries; i++) { - tmp = readl(scratch_reg1); - if (!(tmp & flag)) - break; - - udelay(10); - } - - if (i >= retries) { - if (RLCG_ERROR_REPORT_ENABLED(adev)) { - if (tmp & RLCG_VFGATE_DISABLED) - pr_err("The vfgate is disabled, program reg:0x%05x failed!\n", offset); - else if (tmp & RLCG_WRONG_OPERATION_TYPE) - pr_err("Wrong operation type, program reg:0x%05x failed!\n", offset); - else if (tmp & RLCG_NOT_IN_RANGE) - pr_err("The register is not in range, program reg:0x%05x failed!\n", offset); - else - pr_err("Unknown error type, program reg:0x%05x failed!\n", offset); - } else - pr_err("timeout: rlcg program reg:0x%05x failed!\n", offset); - } - } - - ret = readl(scratch_reg0); - - return ret; -} - -static void gfx_v10_sriov_wreg(struct amdgpu_device *adev, u32 offset, u32 value, u32 acc_flags, u32 hwip) -{ - u32 rlcg_flag; - - if (!amdgpu_sriov_runtime(adev) && - gfx_v10_get_rlcg_flag(adev, acc_flags, hwip, 1, &rlcg_flag)) { - gfx_v10_rlcg_rw(adev, offset, value, rlcg_flag); - return; - } - - if (acc_flags & AMDGPU_REGS_NO_KIQ) - WREG32_NO_KIQ(offset, value); - else - WREG32(offset, value); -} - -static u32 gfx_v10_sriov_rreg(struct amdgpu_device *adev, u32 offset, u32 acc_flags, u32 hwip) -{ - u32 rlcg_flag; - - if (!amdgpu_sriov_runtime(adev) && - gfx_v10_get_rlcg_flag(adev, acc_flags, hwip, 0, &rlcg_flag)) - return gfx_v10_rlcg_rw(adev, offset, 0, rlcg_flag); - - if (acc_flags & AMDGPU_REGS_NO_KIQ) - return RREG32_NO_KIQ(offset); - else - return RREG32(offset); -} - static const struct soc15_reg_golden golden_settings_gc_10_1_nv14[] = { /* Pending on emulation bring up */ @@ -3429,7 +3293,7 @@ static const struct soc15_reg_golden golden_settings_gc_10_3_3[] = SOC15_REG_GOLDEN_VALUE(GC, 0, mmDB_DEBUG3, 0xffffffff, 0x00000280), SOC15_REG_GOLDEN_VALUE(GC, 0, mmDB_DEBUG4, 0xffffffff, 0x00800000), SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG, 0x0c1807ff, 0x00000242), - SOC15_REG_GOLDEN_VALUE(GC, 0, mmGCR_GENERAL_CNTL, 0x1ff1ffff, 0x00000500), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmGCR_GENERAL_CNTL_Vangogh, 0x1ff1ffff, 0x00000500), SOC15_REG_GOLDEN_VALUE(GC, 0, mmGL1_PIPE_STEER, 0x000000ff, 0x000000e4), SOC15_REG_GOLDEN_VALUE(GC, 0, mmGL2_PIPE_STEER_0, 0x77777777, 0x32103210), SOC15_REG_GOLDEN_VALUE(GC, 0, mmGL2_PIPE_STEER_1, 0x77777777, 0x32103210), @@ -3557,6 +3421,57 @@ static const struct soc15_reg_golden golden_settings_gc_10_0_cyan_skillfish[] = SOC15_REG_GOLDEN_VALUE(GC, 0, mmUTCL1_CTRL, 0x00800000, 0x00800000) }; +static const struct soc15_reg_golden golden_settings_gc_10_3_6[] = +{ + SOC15_REG_GOLDEN_VALUE(GC, 0, mmCGTT_SPI_CS_CLK_CTRL, 0xff7f0fff, 0x78000100), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmCH_PIPE_STEER, 0x000000ff, 0x00000044), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmCPF_GCR_CNTL, 0x0007ffff, 0x0000c200), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmDB_DEBUG3, 0xffffffff, 0x00000280), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmDB_DEBUG4, 0xffffffff, 0x00800000), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG, 0x0c1807ff, 0x00000042), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmGCR_GENERAL_CNTL_Vangogh, 0x1ff1ffff, 0x00000500), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmGL1_PIPE_STEER, 0x000000ff, 0x00000044), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmGL2_PIPE_STEER_0, 0x77777777, 0x32103210), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmGL2_PIPE_STEER_1, 0x77777777, 0x32103210), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmGL2A_ADDR_MATCH_MASK, 0xffffffff, 0xfffffff3), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmGL2C_ADDR_MATCH_MASK, 0xffffffff, 0xfffffff3), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmGL2C_CM_CTRL1, 0xff8fff0f, 0x580f1008), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmGL2C_CTRL3, 0xf7ffffff, 0x00f80988), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmLDS_CONFIG, 0x000001ff, 0x00000020), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_CL_ENHANCE, 0xf17fffff, 0x01200007), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_BINNER_TIMEOUT_COUNTER, 0xffffffff, 0x00000800), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE_2, 0xffffffbf, 0x00000820), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmSQG_CONFIG, 0x000017ff, 0x00001000), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmSX_DEBUG_1, 0xffffff7f, 0x00010020), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmTA_CNTL_AUX, 0xfff7ffff, 0x01030000), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmUTCL1_CTRL, 0xffffffff, 0x00100000) +}; + +static const struct soc15_reg_golden golden_settings_gc_10_3_7[] = { + SOC15_REG_GOLDEN_VALUE(GC, 0, mmCGTT_SPI_CS_CLK_CTRL, 0xff7f0fff, 0x78000100), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmCH_PIPE_STEER, 0x000000ff, 0x000000e4), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmCPF_GCR_CNTL, 0x0007ffff, 0x0000c200), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmDB_DEBUG3, 0xffffffff, 0x00000280), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmDB_DEBUG4, 0xffffffff, 0x00800000), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG, 0x0c1807ff, 0x00000041), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmGCR_GENERAL_CNTL_Vangogh, 0x1ff1ffff, 0x00000500), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmGL1_PIPE_STEER, 0x000000ff, 0x000000e4), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmGL2_PIPE_STEER_0, 0x77777777, 0x32103210), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmGL2_PIPE_STEER_1, 0x77777777, 0x32103210), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmGL2A_ADDR_MATCH_MASK, 0xffffffff, 0xffffffff), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmGL2C_ADDR_MATCH_MASK, 0xffffffff, 0xffffffff), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmGL2C_CM_CTRL1, 0xff8fff0f, 0x580f1008), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmGL2C_CTRL3, 0xf7ffffff, 0x00f80988), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmLDS_CONFIG, 0x000001ff, 0x00000020), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_CL_ENHANCE, 0xf000003f, 0x01200007), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_BINNER_TIMEOUT_COUNTER, 0xffffffff, 0x00000800), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE_2, 0xffffffbf, 0x00000820), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmSQG_CONFIG, 0x000017ff, 0x00001000), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmSX_DEBUG_1, 0xffffff7f, 0x00010020), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmTA_CNTL_AUX, 0xfff7ffff, 0x01030000), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmUTCL1_CTRL, 0xffffffff, 0x00100000) +}; + #define DEFAULT_SH_MEM_CONFIG \ ((SH_MEM_ADDRESS_MODE_64 << SH_MEM_CONFIG__ADDRESS_MODE__SHIFT) | \ (SH_MEM_ALIGNMENT_MODE_UNALIGNED << SH_MEM_CONFIG__ALIGNMENT_MODE__SHIFT) | \ @@ -3790,10 +3705,21 @@ static void gfx_v10_0_init_golden_registers(struct amdgpu_device *adev) (const u32)ARRAY_SIZE(golden_settings_gc_10_3_5)); break; case IP_VERSION(10, 1, 3): + case IP_VERSION(10, 1, 4): soc15_program_register_sequence(adev, golden_settings_gc_10_0_cyan_skillfish, (const u32)ARRAY_SIZE(golden_settings_gc_10_0_cyan_skillfish)); break; + case IP_VERSION(10, 3, 6): + soc15_program_register_sequence(adev, + golden_settings_gc_10_3_6, + (const u32)ARRAY_SIZE(golden_settings_gc_10_3_6)); + break; + case IP_VERSION(10, 3, 7): + soc15_program_register_sequence(adev, + golden_settings_gc_10_3_7, + (const u32)ARRAY_SIZE(golden_settings_gc_10_3_7)); + break; default: break; } @@ -3904,8 +3830,7 @@ static int gfx_v10_0_ring_test_ib(struct amdgpu_ring *ring, long timeout) gpu_addr = adev->wb.gpu_addr + (index * 4); adev->wb.wb[index] = cpu_to_le32(0xCAFEDEAD); memset(&ib, 0, sizeof(ib)); - r = amdgpu_ib_get(adev, NULL, 16, - AMDGPU_IB_POOL_DIRECT, &ib); + r = amdgpu_ib_get(adev, NULL, 20, AMDGPU_IB_POOL_DIRECT, &ib); if (r) goto err1; @@ -3968,6 +3893,7 @@ static void gfx_v10_0_check_fw_write_wait(struct amdgpu_device *adev) case IP_VERSION(10, 1, 2): case IP_VERSION(10, 1, 1): case IP_VERSION(10, 1, 3): + case IP_VERSION(10, 1, 4): if ((adev->gfx.me_fw_version >= 0x00000046) && (adev->gfx.me_feature_version >= 27) && (adev->gfx.pfp_fw_version >= 0x00000068) && @@ -3981,7 +3907,9 @@ static void gfx_v10_0_check_fw_write_wait(struct amdgpu_device *adev) case IP_VERSION(10, 3, 1): case IP_VERSION(10, 3, 4): case IP_VERSION(10, 3, 5): + case IP_VERSION(10, 3, 6): case IP_VERSION(10, 3, 3): + case IP_VERSION(10, 3, 7): adev->gfx.cp_fw_write_wait = true; break; default: @@ -4102,11 +4030,15 @@ static int gfx_v10_0_init_microcode(struct amdgpu_device *adev) case IP_VERSION(10, 3, 3): chip_name = "yellow_carp"; break; + case IP_VERSION(10, 3, 6): + chip_name = "gc_10_3_6"; + break; case IP_VERSION(10, 1, 3): - if (adev->apu_flags & AMD_APU_IS_CYAN_SKILLFISH2) - chip_name = "cyan_skillfish2"; - else - chip_name = "cyan_skillfish"; + case IP_VERSION(10, 1, 4): + chip_name = "cyan_skillfish2"; + break; + case IP_VERSION(10, 3, 7): + chip_name = "gc_10_3_7"; break; default: BUG(); @@ -4448,6 +4380,30 @@ static void gfx_v10_0_rlc_fini(struct amdgpu_device *adev) (void **)&adev->gfx.rlc.cp_table_ptr); } +static void gfx_v10_0_init_rlcg_reg_access_ctrl(struct amdgpu_device *adev) +{ + struct amdgpu_rlcg_reg_access_ctrl *reg_access_ctrl; + + reg_access_ctrl = &adev->gfx.rlc.reg_access_ctrl; + reg_access_ctrl->scratch_reg0 = SOC15_REG_OFFSET(GC, 0, mmSCRATCH_REG0); + reg_access_ctrl->scratch_reg1 = SOC15_REG_OFFSET(GC, 0, mmSCRATCH_REG1); + reg_access_ctrl->scratch_reg2 = SOC15_REG_OFFSET(GC, 0, mmSCRATCH_REG2); + reg_access_ctrl->scratch_reg3 = SOC15_REG_OFFSET(GC, 0, mmSCRATCH_REG3); + reg_access_ctrl->grbm_cntl = SOC15_REG_OFFSET(GC, 0, mmGRBM_GFX_CNTL); + reg_access_ctrl->grbm_idx = SOC15_REG_OFFSET(GC, 0, mmGRBM_GFX_INDEX); + switch (adev->ip_versions[GC_HWIP][0]) { + case IP_VERSION(10, 3, 0): + reg_access_ctrl->spare_int = + SOC15_REG_OFFSET(GC, 0, mmRLC_SPARE_INT_0_Sienna_Cichlid); + break; + default: + reg_access_ctrl->spare_int = + SOC15_REG_OFFSET(GC, 0, mmRLC_SPARE_INT); + break; + } + adev->gfx.rlc.rlcg_reg_access_supported = true; +} + static int gfx_v10_0_rlc_init(struct amdgpu_device *adev) { const struct cs_section_def *cs_data; @@ -4468,6 +4424,7 @@ static int gfx_v10_0_rlc_init(struct amdgpu_device *adev) if (adev->gfx.rlc.funcs->update_spm_vmid) adev->gfx.rlc.funcs->update_spm_vmid(adev, 0xf); + return 0; } @@ -4678,7 +4635,9 @@ static void gfx_v10_0_gpu_early_init(struct amdgpu_device *adev) case IP_VERSION(10, 3, 1): case IP_VERSION(10, 3, 4): case IP_VERSION(10, 3, 5): + case IP_VERSION(10, 3, 6): case IP_VERSION(10, 3, 3): + case IP_VERSION(10, 3, 7): adev->gfx.config.max_hw_contexts = 8; adev->gfx.config.sc_prim_fifo_size_frontend = 0x20; adev->gfx.config.sc_prim_fifo_size_backend = 0x100; @@ -4689,6 +4648,7 @@ static void gfx_v10_0_gpu_early_init(struct amdgpu_device *adev) 1 << REG_GET_FIELD(gb_addr_config, GB_ADDR_CONFIG, NUM_PKRS); break; case IP_VERSION(10, 1, 3): + case IP_VERSION(10, 1, 4): adev->gfx.config.max_hw_contexts = 8; adev->gfx.config.sc_prim_fifo_size_frontend = 0x20; adev->gfx.config.sc_prim_fifo_size_backend = 0x100; @@ -4780,7 +4740,7 @@ static int gfx_v10_0_compute_ring_init(struct amdgpu_device *adev, int ring_id, + ((ring->me - 1) * adev->gfx.mec.num_pipe_per_mec) + ring->pipe; hw_prio = amdgpu_gfx_is_high_priority_compute_queue(adev, ring) ? - AMDGPU_GFX_PIPE_PRIO_HIGH : AMDGPU_GFX_PIPE_PRIO_NORMAL; + AMDGPU_RING_PRIO_2 : AMDGPU_RING_PRIO_DEFAULT; /* type-2 packets are deprecated on MEC, use type-3 instead */ r = amdgpu_ring_init(adev, ring, 1024, &adev->gfx.eop_irq, irq_type, hw_prio, NULL); @@ -4801,6 +4761,7 @@ static int gfx_v10_0_sw_init(void *handle) case IP_VERSION(10, 1, 1): case IP_VERSION(10, 1, 2): case IP_VERSION(10, 1, 3): + case IP_VERSION(10, 1, 4): adev->gfx.me.num_me = 1; adev->gfx.me.num_pipe_per_me = 1; adev->gfx.me.num_queue_per_pipe = 1; @@ -4813,7 +4774,9 @@ static int gfx_v10_0_sw_init(void *handle) case IP_VERSION(10, 3, 1): case IP_VERSION(10, 3, 4): case IP_VERSION(10, 3, 5): + case IP_VERSION(10, 3, 6): case IP_VERSION(10, 3, 3): + case IP_VERSION(10, 3, 7): adev->gfx.me.num_me = 1; adev->gfx.me.num_pipe_per_me = 1; adev->gfx.me.num_queue_per_pipe = 1; @@ -4865,10 +4828,14 @@ static int gfx_v10_0_sw_init(void *handle) if (r) return r; - r = gfx_v10_0_rlc_init(adev); - if (r) { - DRM_ERROR("Failed to init rlc BOs!\n"); - return r; + if (adev->gfx.rlc.funcs) { + if (adev->gfx.rlc.funcs->init) { + r = adev->gfx.rlc.funcs->init(adev); + if (r) { + dev_err(adev->dev, "Failed to init rlc BOs!\n"); + return r; + } + } } r = gfx_v10_0_mec_init(adev); @@ -5047,7 +5014,8 @@ static void gfx_v10_0_setup_rb(struct amdgpu_device *adev) for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) { bitmap = i * adev->gfx.config.max_sh_per_se + j; if (((adev->ip_versions[GC_HWIP][0] == IP_VERSION(10, 3, 0)) || - (adev->ip_versions[GC_HWIP][0] == IP_VERSION(10, 3, 3))) && + (adev->ip_versions[GC_HWIP][0] == IP_VERSION(10, 3, 3)) || + (adev->ip_versions[GC_HWIP][0] == IP_VERSION(10, 3, 6))) && ((gfx_v10_3_get_disabled_sa(adev) >> bitmap) & 1)) continue; gfx_v10_0_select_se_sh(adev, i, j, 0xffffffff); @@ -6321,7 +6289,9 @@ static void gfx_v10_0_cp_gfx_set_doorbell(struct amdgpu_device *adev, case IP_VERSION(10, 3, 1): case IP_VERSION(10, 3, 4): case IP_VERSION(10, 3, 5): + case IP_VERSION(10, 3, 6): case IP_VERSION(10, 3, 3): + case IP_VERSION(10, 3, 7): tmp = REG_SET_FIELD(0, CP_RB_DOORBELL_RANGE_LOWER, DOORBELL_RANGE_LOWER_Sienna_Cichlid, ring->doorbell_index); WREG32_SOC15(GC, 0, mmCP_RB_DOORBELL_RANGE_LOWER, tmp); @@ -6458,7 +6428,9 @@ static void gfx_v10_0_cp_compute_enable(struct amdgpu_device *adev, bool enable) case IP_VERSION(10, 3, 1): case IP_VERSION(10, 3, 4): case IP_VERSION(10, 3, 5): + case IP_VERSION(10, 3, 6): case IP_VERSION(10, 3, 3): + case IP_VERSION(10, 3, 7): WREG32_SOC15(GC, 0, mmCP_MEC_CNTL_Sienna_Cichlid, 0); break; default: @@ -6472,7 +6444,9 @@ static void gfx_v10_0_cp_compute_enable(struct amdgpu_device *adev, bool enable) case IP_VERSION(10, 3, 1): case IP_VERSION(10, 3, 4): case IP_VERSION(10, 3, 5): + case IP_VERSION(10, 3, 6): case IP_VERSION(10, 3, 3): + case IP_VERSION(10, 3, 7): WREG32_SOC15(GC, 0, mmCP_MEC_CNTL_Sienna_Cichlid, (CP_MEC_CNTL__MEC_ME1_HALT_MASK | CP_MEC_CNTL__MEC_ME2_HALT_MASK)); @@ -6570,7 +6544,9 @@ static void gfx_v10_0_kiq_setting(struct amdgpu_ring *ring) case IP_VERSION(10, 3, 1): case IP_VERSION(10, 3, 4): case IP_VERSION(10, 3, 5): + case IP_VERSION(10, 3, 6): case IP_VERSION(10, 3, 3): + case IP_VERSION(10, 3, 7): tmp = RREG32_SOC15(GC, 0, mmRLC_CP_SCHEDULERS_Sienna_Cichlid); tmp &= 0xffffff00; tmp |= (ring->me << 5) | (ring->pipe << 3) | (ring->queue); @@ -7300,6 +7276,8 @@ static bool gfx_v10_0_check_grbm_cam_remapping(struct amdgpu_device *adev) break; case IP_VERSION(10, 3, 1): case IP_VERSION(10, 3, 3): + case IP_VERSION(10, 3, 6): + case IP_VERSION(10, 3, 7): return true; default: data = RREG32_SOC15(GC, 0, mmVGT_ESGS_RING_SIZE); @@ -7334,7 +7312,9 @@ static void gfx_v10_0_setup_grbm_cam_remapping(struct amdgpu_device *adev) case IP_VERSION(10, 3, 1): case IP_VERSION(10, 3, 4): case IP_VERSION(10, 3, 5): + case IP_VERSION(10, 3, 6): case IP_VERSION(10, 3, 3): + case IP_VERSION(10, 3, 7): /* mmVGT_TF_RING_SIZE_UMD -> mmVGT_TF_RING_SIZE */ data = (SOC15_REG_OFFSET(GC, 0, mmVGT_TF_RING_SIZE_UMD) << GRBM_CAM_DATA__CAM_ADDR__SHIFT) | @@ -7654,6 +7634,7 @@ static int gfx_v10_0_soft_reset(void *handle) case IP_VERSION(10, 3, 1): case IP_VERSION(10, 3, 4): case IP_VERSION(10, 3, 5): + case IP_VERSION(10, 3, 6): case IP_VERSION(10, 3, 3): if (REG_GET_FIELD(tmp, GRBM_STATUS2, RLC_BUSY_Sienna_Cichlid)) grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset, @@ -7707,6 +7688,7 @@ static uint64_t gfx_v10_0_get_gpu_clock_counter(struct amdgpu_device *adev) switch (adev->ip_versions[GC_HWIP][0]) { case IP_VERSION(10, 3, 1): case IP_VERSION(10, 3, 3): + case IP_VERSION(10, 3, 7): preempt_disable(); clock_hi = RREG32_SOC15_NO_KIQ(SMUIO, 0, mmGOLDEN_TSC_COUNT_UPPER_Vangogh); clock_lo = RREG32_SOC15_NO_KIQ(SMUIO, 0, mmGOLDEN_TSC_COUNT_LOWER_Vangogh); @@ -7721,6 +7703,21 @@ static uint64_t gfx_v10_0_get_gpu_clock_counter(struct amdgpu_device *adev) preempt_enable(); clock = clock_lo | (clock_hi << 32ULL); break; + case IP_VERSION(10, 3, 6): + preempt_disable(); + clock_hi = RREG32_SOC15_NO_KIQ(SMUIO, 0, mmGOLDEN_TSC_COUNT_UPPER_GC_10_3_6); + clock_lo = RREG32_SOC15_NO_KIQ(SMUIO, 0, mmGOLDEN_TSC_COUNT_LOWER_GC_10_3_6); + hi_check = RREG32_SOC15_NO_KIQ(SMUIO, 0, mmGOLDEN_TSC_COUNT_UPPER_GC_10_3_6); + /* The SMUIO TSC clock frequency is 100MHz, which sets 32-bit carry over + * roughly every 42 seconds. + */ + if (hi_check != clock_hi) { + clock_lo = RREG32_SOC15_NO_KIQ(SMUIO, 0, mmGOLDEN_TSC_COUNT_LOWER_GC_10_3_6); + clock_hi = hi_check; + } + preempt_enable(); + clock = clock_lo | (clock_hi << 32ULL); + break; default: preempt_disable(); clock_hi = RREG32_SOC15_NO_KIQ(SMUIO, 0, mmGOLDEN_TSC_COUNT_UPPER); @@ -7778,6 +7775,7 @@ static int gfx_v10_0_early_init(void *handle) case IP_VERSION(10, 1, 1): case IP_VERSION(10, 1, 2): case IP_VERSION(10, 1, 3): + case IP_VERSION(10, 1, 4): adev->gfx.num_gfx_rings = GFX10_NUM_GFX_RINGS_NV1X; break; case IP_VERSION(10, 3, 0): @@ -7785,7 +7783,9 @@ static int gfx_v10_0_early_init(void *handle) case IP_VERSION(10, 3, 1): case IP_VERSION(10, 3, 4): case IP_VERSION(10, 3, 5): + case IP_VERSION(10, 3, 6): case IP_VERSION(10, 3, 3): + case IP_VERSION(10, 3, 7): adev->gfx.num_gfx_rings = GFX10_NUM_GFX_RINGS_Sienna_Cichlid; break; default: @@ -7801,6 +7801,9 @@ static int gfx_v10_0_early_init(void *handle) gfx_v10_0_set_gds_init(adev); gfx_v10_0_set_rlc_funcs(adev); + /* init rlcg reg access ctrl */ + gfx_v10_0_init_rlcg_reg_access_ctrl(adev); + return 0; } @@ -7843,7 +7846,9 @@ static void gfx_v10_0_set_safe_mode(struct amdgpu_device *adev) case IP_VERSION(10, 3, 1): case IP_VERSION(10, 3, 4): case IP_VERSION(10, 3, 5): + case IP_VERSION(10, 3, 6): case IP_VERSION(10, 3, 3): + case IP_VERSION(10, 3, 7): WREG32_SOC15(GC, 0, mmRLC_SAFE_MODE_Sienna_Cichlid, data); /* wait for RLC_SAFE_MODE */ @@ -7879,7 +7884,9 @@ static void gfx_v10_0_unset_safe_mode(struct amdgpu_device *adev) case IP_VERSION(10, 3, 1): case IP_VERSION(10, 3, 4): case IP_VERSION(10, 3, 5): + case IP_VERSION(10, 3, 6): case IP_VERSION(10, 3, 3): + case IP_VERSION(10, 3, 7): WREG32_SOC15(GC, 0, mmRLC_SAFE_MODE_Sienna_Cichlid, data); break; default: @@ -8333,6 +8340,8 @@ static void gfx_v10_cntl_power_gating(struct amdgpu_device *adev, bool enable) switch (adev->ip_versions[GC_HWIP][0]) { case IP_VERSION(10, 3, 1): case IP_VERSION(10, 3, 3): + case IP_VERSION(10, 3, 6): + case IP_VERSION(10, 3, 7): data = 0x4E20 & RLC_PG_DELAY_3__CGCG_ACTIVE_BEFORE_CGPG_MASK_Vangogh; WREG32_SOC15(GC, 0, mmRLC_PG_DELAY_3, data); break; @@ -8377,8 +8386,6 @@ static const struct amdgpu_rlc_funcs gfx_v10_0_rlc_funcs_sriov = { .reset = gfx_v10_0_rlc_reset, .start = gfx_v10_0_rlc_start, .update_spm_vmid = gfx_v10_0_update_spm_vmid, - .sriov_wreg = gfx_v10_sriov_wreg, - .sriov_rreg = gfx_v10_sriov_rreg, .is_rlcg_access_range = gfx_v10_0_is_rlcg_access_range, }; @@ -8403,6 +8410,8 @@ static int gfx_v10_0_set_powergating_state(void *handle, break; case IP_VERSION(10, 3, 1): case IP_VERSION(10, 3, 3): + case IP_VERSION(10, 3, 6): + case IP_VERSION(10, 3, 7): gfx_v10_cntl_pg(adev, enable); amdgpu_gfx_off_ctrl(adev, enable); break; @@ -8429,7 +8438,9 @@ static int gfx_v10_0_set_clockgating_state(void *handle, case IP_VERSION(10, 3, 1): case IP_VERSION(10, 3, 4): case IP_VERSION(10, 3, 5): + case IP_VERSION(10, 3, 6): case IP_VERSION(10, 3, 3): + case IP_VERSION(10, 3, 7): gfx_v10_0_update_gfx_clock_gating(adev, state == AMD_CG_STATE_GATE); break; @@ -8439,7 +8450,7 @@ static int gfx_v10_0_set_clockgating_state(void *handle, return 0; } -static void gfx_v10_0_get_clockgating_state(void *handle, u32 *flags) +static void gfx_v10_0_get_clockgating_state(void *handle, u64 *flags) { struct amdgpu_device *adev = (struct amdgpu_device *)handle; int data; @@ -9366,6 +9377,7 @@ static const struct amdgpu_ring_funcs gfx_v10_0_ring_funcs_gfx = { .align_mask = 0xff, .nop = PACKET3(PACKET3_NOP, 0x3FFF), .support_64bit_ptrs = true, + .secure_submission_supported = true, .vmhub = AMDGPU_GFXHUB_0, .get_rptr = gfx_v10_0_ring_get_rptr_gfx, .get_wptr = gfx_v10_0_ring_get_wptr_gfx, @@ -9537,11 +9549,14 @@ static void gfx_v10_0_set_rlc_funcs(struct amdgpu_device *adev) case IP_VERSION(10, 1, 10): case IP_VERSION(10, 1, 1): case IP_VERSION(10, 1, 3): + case IP_VERSION(10, 1, 4): case IP_VERSION(10, 3, 2): case IP_VERSION(10, 3, 1): case IP_VERSION(10, 3, 4): case IP_VERSION(10, 3, 5): + case IP_VERSION(10, 3, 6): case IP_VERSION(10, 3, 3): + case IP_VERSION(10, 3, 7): adev->gfx.rlc.funcs = &gfx_v10_0_rlc_funcs; break; case IP_VERSION(10, 1, 2): @@ -9634,7 +9649,9 @@ static int gfx_v10_0_get_cu_info(struct amdgpu_device *adev, for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) { bitmap = i * adev->gfx.config.max_sh_per_se + j; if (((adev->ip_versions[GC_HWIP][0] == IP_VERSION(10, 3, 0)) || - (adev->ip_versions[GC_HWIP][0] == IP_VERSION(10, 3, 3))) && + (adev->ip_versions[GC_HWIP][0] == IP_VERSION(10, 3, 3)) || + (adev->ip_versions[GC_HWIP][0] == IP_VERSION(10, 3, 6)) || + (adev->ip_versions[GC_HWIP][0] == IP_VERSION(10, 3, 7))) && ((gfx_v10_3_get_disabled_sa(adev) >> bitmap) & 1)) continue; mask = 1; diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c index 5f112efda634..25dc729d0ec2 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c @@ -1925,7 +1925,7 @@ static int gfx_v8_0_compute_ring_init(struct amdgpu_device *adev, int ring_id, + ring->pipe; hw_prio = amdgpu_gfx_is_high_priority_compute_queue(adev, ring) ? - AMDGPU_GFX_PIPE_PRIO_HIGH : AMDGPU_RING_PRIO_DEFAULT; + AMDGPU_RING_PRIO_2 : AMDGPU_RING_PRIO_DEFAULT; /* type-2 packets are deprecated on MEC, use type-3 instead */ r = amdgpu_ring_init(adev, ring, 1024, &adev->gfx.eop_irq, irq_type, hw_prio, NULL); @@ -5475,7 +5475,7 @@ static int gfx_v8_0_set_powergating_state(void *handle, return 0; } -static void gfx_v8_0_get_clockgating_state(void *handle, u32 *flags) +static void gfx_v8_0_get_clockgating_state(void *handle, u64 *flags) { struct amdgpu_device *adev = (struct amdgpu_device *)handle; int data; diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c index 9189fb85a4dd..d58fd83524ac 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c @@ -63,13 +63,6 @@ #define mmGCEA_PROBE_MAP 0x070c #define mmGCEA_PROBE_MAP_BASE_IDX 0 -#define GFX9_RLCG_GC_WRITE_OLD (0x8 << 28) -#define GFX9_RLCG_GC_WRITE (0x0 << 28) -#define GFX9_RLCG_GC_READ (0x1 << 28) -#define GFX9_RLCG_VFGATE_DISABLED 0x4000000 -#define GFX9_RLCG_WRONG_OPERATION_TYPE 0x2000000 -#define GFX9_RLCG_NOT_IN_RANGE 0x1000000 - MODULE_FIRMWARE("amdgpu/vega10_ce.bin"); MODULE_FIRMWARE("amdgpu/vega10_pfp.bin"); MODULE_FIRMWARE("amdgpu/vega10_me.bin"); @@ -746,128 +739,6 @@ static const u32 GFX_RLC_SRM_INDEX_CNTL_DATA_OFFSETS[] = mmRLC_SRM_INDEX_CNTL_DATA_7 - mmRLC_SRM_INDEX_CNTL_DATA_0, }; -static u32 gfx_v9_0_rlcg_rw(struct amdgpu_device *adev, u32 offset, u32 v, uint32_t flag) -{ - static void *scratch_reg0; - static void *scratch_reg1; - static void *scratch_reg2; - static void *scratch_reg3; - static void *spare_int; - static uint32_t grbm_cntl; - static uint32_t grbm_idx; - uint32_t i = 0; - uint32_t retries = 50000; - u32 ret = 0; - u32 tmp; - - scratch_reg0 = adev->rmmio + (adev->reg_offset[GC_HWIP][0][mmSCRATCH_REG0_BASE_IDX] + mmSCRATCH_REG0)*4; - scratch_reg1 = adev->rmmio + (adev->reg_offset[GC_HWIP][0][mmSCRATCH_REG1_BASE_IDX] + mmSCRATCH_REG1)*4; - scratch_reg2 = adev->rmmio + (adev->reg_offset[GC_HWIP][0][mmSCRATCH_REG2_BASE_IDX] + mmSCRATCH_REG2)*4; - scratch_reg3 = adev->rmmio + (adev->reg_offset[GC_HWIP][0][mmSCRATCH_REG3_BASE_IDX] + mmSCRATCH_REG3)*4; - spare_int = adev->rmmio + (adev->reg_offset[GC_HWIP][0][mmRLC_SPARE_INT_BASE_IDX] + mmRLC_SPARE_INT)*4; - - grbm_cntl = adev->reg_offset[GC_HWIP][0][mmGRBM_GFX_CNTL_BASE_IDX] + mmGRBM_GFX_CNTL; - grbm_idx = adev->reg_offset[GC_HWIP][0][mmGRBM_GFX_INDEX_BASE_IDX] + mmGRBM_GFX_INDEX; - - if (offset == grbm_cntl || offset == grbm_idx) { - if (offset == grbm_cntl) - writel(v, scratch_reg2); - else if (offset == grbm_idx) - writel(v, scratch_reg3); - - writel(v, ((void __iomem *)adev->rmmio) + (offset * 4)); - } else { - /* - * SCRATCH_REG0 = read/write value - * SCRATCH_REG1[30:28] = command - * SCRATCH_REG1[19:0] = address in dword - * SCRATCH_REG1[26:24] = Error reporting - */ - writel(v, scratch_reg0); - writel(offset | flag, scratch_reg1); - writel(1, spare_int); - - for (i = 0; i < retries; i++) { - tmp = readl(scratch_reg1); - if (!(tmp & flag)) - break; - - udelay(10); - } - - if (i >= retries) { - if (amdgpu_sriov_reg_indirect_gc(adev)) { - if (tmp & GFX9_RLCG_VFGATE_DISABLED) - pr_err("The vfgate is disabled, program reg:0x%05x failed!\n", offset); - else if (tmp & GFX9_RLCG_WRONG_OPERATION_TYPE) - pr_err("Wrong operation type, program reg:0x%05x failed!\n", offset); - else if (tmp & GFX9_RLCG_NOT_IN_RANGE) - pr_err("The register is not in range, program reg:0x%05x failed!\n", offset); - else - pr_err("Unknown error type, program reg:0x%05x failed!\n", offset); - } else - pr_err("timeout: rlcg program reg:0x%05x failed!\n", offset); - } - } - - ret = readl(scratch_reg0); - - return ret; -} - -static bool gfx_v9_0_get_rlcg_flag(struct amdgpu_device *adev, u32 acc_flags, u32 hwip, - int write, u32 *rlcg_flag) -{ - - switch (hwip) { - case GC_HWIP: - if (amdgpu_sriov_reg_indirect_gc(adev)) { - *rlcg_flag = write ? GFX9_RLCG_GC_WRITE : GFX9_RLCG_GC_READ; - - return true; - /* only in new version, AMDGPU_REGS_NO_KIQ and AMDGPU_REGS_RLC enabled simultaneously */ - } else if ((acc_flags & AMDGPU_REGS_RLC) && !(acc_flags & AMDGPU_REGS_NO_KIQ) && write) { - *rlcg_flag = GFX9_RLCG_GC_WRITE_OLD; - return true; - } - - break; - default: - return false; - } - - return false; -} - -static u32 gfx_v9_0_sriov_rreg(struct amdgpu_device *adev, u32 offset, u32 acc_flags, u32 hwip) -{ - u32 rlcg_flag; - - if (!amdgpu_sriov_runtime(adev) && gfx_v9_0_get_rlcg_flag(adev, acc_flags, hwip, 0, &rlcg_flag)) - return gfx_v9_0_rlcg_rw(adev, offset, 0, rlcg_flag); - - if (acc_flags & AMDGPU_REGS_NO_KIQ) - return RREG32_NO_KIQ(offset); - else - return RREG32(offset); -} - -static void gfx_v9_0_sriov_wreg(struct amdgpu_device *adev, u32 offset, - u32 value, u32 acc_flags, u32 hwip) -{ - u32 rlcg_flag; - - if (!amdgpu_sriov_runtime(adev) && gfx_v9_0_get_rlcg_flag(adev, acc_flags, hwip, 1, &rlcg_flag)) { - gfx_v9_0_rlcg_rw(adev, offset, value, rlcg_flag); - return; - } - - if (acc_flags & AMDGPU_REGS_NO_KIQ) - WREG32_NO_KIQ(offset, value); - else - WREG32(offset, value); -} - #define VEGA10_GB_ADDR_CONFIG_GOLDEN 0x2a114042 #define VEGA12_GB_ADDR_CONFIG_GOLDEN 0x24104041 #define RAVEN_GB_ADDR_CONFIG_GOLDEN 0x24000042 @@ -882,7 +753,7 @@ static int gfx_v9_0_get_cu_info(struct amdgpu_device *adev, static uint64_t gfx_v9_0_get_gpu_clock_counter(struct amdgpu_device *adev); static void gfx_v9_0_ring_emit_de_meta(struct amdgpu_ring *ring); static u64 gfx_v9_0_ring_get_rptr_compute(struct amdgpu_ring *ring); -static int gfx_v9_0_query_ras_error_count(struct amdgpu_device *adev, +static void gfx_v9_0_query_ras_error_count(struct amdgpu_device *adev, void *ras_error_status); static int gfx_v9_0_ras_error_inject(struct amdgpu_device *adev, void *inject_if); @@ -1334,6 +1205,8 @@ static const struct amdgpu_gfxoff_quirk amdgpu_gfxoff_quirk_list[] = { { 0x1002, 0x15dd, 0x103c, 0x83e7, 0xd3 }, /* GFXOFF is unstable on C6 parts with a VBIOS 113-RAVEN-114 */ { 0x1002, 0x15dd, 0x1002, 0x15dd, 0xc6 }, + /* Apple MacBook Pro (15-inch, 2019) Radeon Pro Vega 20 4 GB */ + { 0x1002, 0x69af, 0x106b, 0x019a, 0xc0 }, { 0, 0, 0, 0, 0 }, }; @@ -2008,6 +1881,21 @@ static int gfx_v9_0_cp_jump_table_num(struct amdgpu_device *adev) return 4; } +static void gfx_v9_0_init_rlcg_reg_access_ctrl(struct amdgpu_device *adev) +{ + struct amdgpu_rlcg_reg_access_ctrl *reg_access_ctrl; + + reg_access_ctrl = &adev->gfx.rlc.reg_access_ctrl; + reg_access_ctrl->scratch_reg0 = SOC15_REG_OFFSET(GC, 0, mmSCRATCH_REG0); + reg_access_ctrl->scratch_reg1 = SOC15_REG_OFFSET(GC, 0, mmSCRATCH_REG1); + reg_access_ctrl->scratch_reg2 = SOC15_REG_OFFSET(GC, 0, mmSCRATCH_REG2); + reg_access_ctrl->scratch_reg3 = SOC15_REG_OFFSET(GC, 0, mmSCRATCH_REG3); + reg_access_ctrl->grbm_cntl = SOC15_REG_OFFSET(GC, 0, mmGRBM_GFX_CNTL); + reg_access_ctrl->grbm_idx = SOC15_REG_OFFSET(GC, 0, mmGRBM_GFX_INDEX); + reg_access_ctrl->spare_int = SOC15_REG_OFFSET(GC, 0, mmRLC_SPARE_INT); + adev->gfx.rlc.rlcg_reg_access_supported = true; +} + static int gfx_v9_0_rlc_init(struct amdgpu_device *adev) { const struct cs_section_def *cs_data; @@ -2197,12 +2085,16 @@ static const struct amdgpu_gfx_funcs gfx_v9_0_gfx_funcs = { .select_me_pipe_q = &gfx_v9_0_select_me_pipe_q, }; -static const struct amdgpu_gfx_ras_funcs gfx_v9_0_ras_funcs = { - .ras_late_init = amdgpu_gfx_ras_late_init, - .ras_fini = amdgpu_gfx_ras_fini, - .ras_error_inject = &gfx_v9_0_ras_error_inject, - .query_ras_error_count = &gfx_v9_0_query_ras_error_count, - .reset_ras_error_count = &gfx_v9_0_reset_ras_error_count, +const struct amdgpu_ras_block_hw_ops gfx_v9_0_ras_ops = { + .ras_error_inject = &gfx_v9_0_ras_error_inject, + .query_ras_error_count = &gfx_v9_0_query_ras_error_count, + .reset_ras_error_count = &gfx_v9_0_reset_ras_error_count, +}; + +static struct amdgpu_gfx_ras gfx_v9_0_ras = { + .ras_block = { + .hw_ops = &gfx_v9_0_ras_ops, + }, }; static int gfx_v9_0_gpu_early_init(struct amdgpu_device *adev) @@ -2231,7 +2123,7 @@ static int gfx_v9_0_gpu_early_init(struct amdgpu_device *adev) DRM_INFO("fix gfx.config for vega12\n"); break; case IP_VERSION(9, 4, 0): - adev->gfx.ras_funcs = &gfx_v9_0_ras_funcs; + adev->gfx.ras = &gfx_v9_0_ras; adev->gfx.config.max_hw_contexts = 8; adev->gfx.config.sc_prim_fifo_size_frontend = 0x20; adev->gfx.config.sc_prim_fifo_size_backend = 0x100; @@ -2258,7 +2150,7 @@ static int gfx_v9_0_gpu_early_init(struct amdgpu_device *adev) gb_addr_config = RAVEN_GB_ADDR_CONFIG_GOLDEN; break; case IP_VERSION(9, 4, 1): - adev->gfx.ras_funcs = &gfx_v9_4_ras_funcs; + adev->gfx.ras = &gfx_v9_4_ras; adev->gfx.config.max_hw_contexts = 8; adev->gfx.config.sc_prim_fifo_size_frontend = 0x20; adev->gfx.config.sc_prim_fifo_size_backend = 0x100; @@ -2279,7 +2171,7 @@ static int gfx_v9_0_gpu_early_init(struct amdgpu_device *adev) gb_addr_config |= 0x22010042; break; case IP_VERSION(9, 4, 2): - adev->gfx.ras_funcs = &gfx_v9_4_2_ras_funcs; + adev->gfx.ras = &gfx_v9_4_2_ras; adev->gfx.config.max_hw_contexts = 8; adev->gfx.config.sc_prim_fifo_size_frontend = 0x20; adev->gfx.config.sc_prim_fifo_size_backend = 0x100; @@ -2298,6 +2190,27 @@ static int gfx_v9_0_gpu_early_init(struct amdgpu_device *adev) break; } + if (adev->gfx.ras) { + err = amdgpu_ras_register_ras_block(adev, &adev->gfx.ras->ras_block); + if (err) { + DRM_ERROR("Failed to register gfx ras block!\n"); + return err; + } + + strcpy(adev->gfx.ras->ras_block.ras_comm.name, "gfx"); + adev->gfx.ras->ras_block.ras_comm.block = AMDGPU_RAS_BLOCK__GFX; + adev->gfx.ras->ras_block.ras_comm.type = AMDGPU_RAS_ERROR__MULTI_UNCORRECTABLE; + adev->gfx.ras_if = &adev->gfx.ras->ras_block.ras_comm; + + /* If not define special ras_late_init function, use gfx default ras_late_init */ + if (!adev->gfx.ras->ras_block.ras_late_init) + adev->gfx.ras->ras_block.ras_late_init = amdgpu_gfx_ras_late_init; + + /* If not defined special ras_cb function, use default ras_cb */ + if (!adev->gfx.ras->ras_block.ras_cb) + adev->gfx.ras->ras_block.ras_cb = amdgpu_gfx_process_ras_data_cb; + } + adev->gfx.config.gb_addr_config = gb_addr_config; adev->gfx.config.gb_addr_config_fields.num_pipes = 1 << @@ -2363,7 +2276,7 @@ static int gfx_v9_0_compute_ring_init(struct amdgpu_device *adev, int ring_id, + ((ring->me - 1) * adev->gfx.mec.num_pipe_per_mec) + ring->pipe; hw_prio = amdgpu_gfx_is_high_priority_compute_queue(adev, ring) ? - AMDGPU_GFX_PIPE_PRIO_HIGH : AMDGPU_GFX_PIPE_PRIO_NORMAL; + AMDGPU_RING_PRIO_2 : AMDGPU_RING_PRIO_DEFAULT; /* type-2 packets are deprecated on MEC, use type-3 instead */ return amdgpu_ring_init(adev, ring, 1024, &adev->gfx.eop_irq, irq_type, hw_prio, NULL); @@ -2434,10 +2347,14 @@ static int gfx_v9_0_sw_init(void *handle) return r; } - r = adev->gfx.rlc.funcs->init(adev); - if (r) { - DRM_ERROR("Failed to init rlc BOs!\n"); - return r; + if (adev->gfx.rlc.funcs) { + if (adev->gfx.rlc.funcs->init) { + r = adev->gfx.rlc.funcs->init(adev); + if (r) { + dev_err(adev->dev, "Failed to init rlc BOs!\n"); + return r; + } + } } r = gfx_v9_0_mec_init(adev); @@ -2513,10 +2430,6 @@ static int gfx_v9_0_sw_fini(void *handle) int i; struct amdgpu_device *adev = (struct amdgpu_device *)handle; - if (adev->gfx.ras_funcs && - adev->gfx.ras_funcs->ras_fini) - adev->gfx.ras_funcs->ras_fini(adev); - for (i = 0; i < adev->gfx.num_gfx_rings; i++) amdgpu_ring_fini(&adev->gfx.gfx_ring[i]); for (i = 0; i < adev->gfx.num_compute_rings; i++) @@ -4840,6 +4753,9 @@ static int gfx_v9_0_early_init(void *handle) gfx_v9_0_set_gds_init(adev); gfx_v9_0_set_rlc_funcs(adev); + /* init rlcg reg access ctrl */ + gfx_v9_0_init_rlcg_reg_access_ctrl(adev); + return 0; } @@ -4870,16 +4786,9 @@ static int gfx_v9_0_ecc_late_init(void *handle) if (r) return r; - if (adev->gfx.ras_funcs && - adev->gfx.ras_funcs->ras_late_init) { - r = adev->gfx.ras_funcs->ras_late_init(adev); - if (r) - return r; - } - - if (adev->gfx.ras_funcs && - adev->gfx.ras_funcs->enable_watchdog_timer) - adev->gfx.ras_funcs->enable_watchdog_timer(adev); + if (adev->gfx.ras && + adev->gfx.ras->enable_watchdog_timer) + adev->gfx.ras->enable_watchdog_timer(adev); return 0; } @@ -5250,8 +5159,6 @@ static const struct amdgpu_rlc_funcs gfx_v9_0_rlc_funcs = { .reset = gfx_v9_0_rlc_reset, .start = gfx_v9_0_rlc_start, .update_spm_vmid = gfx_v9_0_update_spm_vmid, - .sriov_wreg = gfx_v9_0_sriov_wreg, - .sriov_rreg = gfx_v9_0_sriov_rreg, .is_rlcg_access_range = gfx_v9_0_is_rlcg_access_range, }; @@ -5326,7 +5233,7 @@ static int gfx_v9_0_set_clockgating_state(void *handle, return 0; } -static void gfx_v9_0_get_clockgating_state(void *handle, u32 *flags) +static void gfx_v9_0_get_clockgating_state(void *handle, u64 *flags) { struct amdgpu_device *adev = (struct amdgpu_device *)handle; int data; @@ -6819,7 +6726,7 @@ static void gfx_v9_0_reset_ras_error_count(struct amdgpu_device *adev) WREG32_SOC15(GC, 0, mmATC_L2_CACHE_4K_EDC_INDEX, 255); } -static int gfx_v9_0_query_ras_error_count(struct amdgpu_device *adev, +static void gfx_v9_0_query_ras_error_count(struct amdgpu_device *adev, void *ras_error_status) { struct ras_err_data *err_data = (struct ras_err_data *)ras_error_status; @@ -6828,7 +6735,7 @@ static int gfx_v9_0_query_ras_error_count(struct amdgpu_device *adev, uint32_t reg_value; if (!amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__GFX)) - return -EINVAL; + return; err_data->ue_count = 0; err_data->ce_count = 0; @@ -6857,8 +6764,6 @@ static int gfx_v9_0_query_ras_error_count(struct amdgpu_device *adev, mutex_unlock(&adev->grbm_idx_mutex); gfx_v9_0_query_utc_edc_status(adev, err_data); - - return 0; } static void gfx_v9_0_emit_mem_sync(struct amdgpu_ring *ring) @@ -6962,6 +6867,7 @@ static const struct amdgpu_ring_funcs gfx_v9_0_ring_funcs_gfx = { .align_mask = 0xff, .nop = PACKET3(PACKET3_NOP, 0x3FFF), .support_64bit_ptrs = true, + .secure_submission_supported = true, .vmhub = AMDGPU_GFXHUB_0, .get_rptr = gfx_v9_0_ring_get_rptr_gfx, .get_wptr = gfx_v9_0_ring_get_wptr_gfx, diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_4.c b/drivers/gpu/drm/amd/amdgpu/gfx_v9_4.c index b4789dfc2bb9..c67e387a97f5 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_4.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_4.c @@ -863,7 +863,7 @@ static int gfx_v9_4_ras_error_count(struct amdgpu_device *adev, return 0; } -static int gfx_v9_4_query_ras_error_count(struct amdgpu_device *adev, +static void gfx_v9_4_query_ras_error_count(struct amdgpu_device *adev, void *ras_error_status) { struct ras_err_data *err_data = (struct ras_err_data *)ras_error_status; @@ -872,7 +872,7 @@ static int gfx_v9_4_query_ras_error_count(struct amdgpu_device *adev, uint32_t reg_value; if (!amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__GFX)) - return -EINVAL; + return; err_data->ue_count = 0; err_data->ce_count = 0; @@ -903,7 +903,6 @@ static int gfx_v9_4_query_ras_error_count(struct amdgpu_device *adev, gfx_v9_4_query_utc_edc_status(adev, err_data); - return 0; } static void gfx_v9_4_reset_ras_error_count(struct amdgpu_device *adev) @@ -1029,11 +1028,16 @@ static void gfx_v9_4_query_ras_error_status(struct amdgpu_device *adev) mutex_unlock(&adev->grbm_idx_mutex); } -const struct amdgpu_gfx_ras_funcs gfx_v9_4_ras_funcs = { - .ras_late_init = amdgpu_gfx_ras_late_init, - .ras_fini = amdgpu_gfx_ras_fini, - .ras_error_inject = &gfx_v9_4_ras_error_inject, - .query_ras_error_count = &gfx_v9_4_query_ras_error_count, - .reset_ras_error_count = &gfx_v9_4_reset_ras_error_count, - .query_ras_error_status = &gfx_v9_4_query_ras_error_status, + +const struct amdgpu_ras_block_hw_ops gfx_v9_4_ras_ops = { + .ras_error_inject = &gfx_v9_4_ras_error_inject, + .query_ras_error_count = &gfx_v9_4_query_ras_error_count, + .reset_ras_error_count = &gfx_v9_4_reset_ras_error_count, + .query_ras_error_status = &gfx_v9_4_query_ras_error_status, +}; + +struct amdgpu_gfx_ras gfx_v9_4_ras = { + .ras_block = { + .hw_ops = &gfx_v9_4_ras_ops, + }, }; diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_4.h b/drivers/gpu/drm/amd/amdgpu/gfx_v9_4.h index bdd16b568021..ca520a767267 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_4.h +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_4.h @@ -24,6 +24,6 @@ #ifndef __GFX_V9_4_H__ #define __GFX_V9_4_H__ -extern const struct amdgpu_gfx_ras_funcs gfx_v9_4_ras_funcs; +extern struct amdgpu_gfx_ras gfx_v9_4_ras; #endif /* __GFX_V9_4_H__ */ diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_2.c b/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_2.c index c4f37a161875..3a797424579c 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_2.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_2.c @@ -1641,14 +1641,14 @@ static int gfx_v9_4_2_query_utc_edc_count(struct amdgpu_device *adev, return 0; } -static int gfx_v9_4_2_query_ras_error_count(struct amdgpu_device *adev, +static void gfx_v9_4_2_query_ras_error_count(struct amdgpu_device *adev, void *ras_error_status) { struct ras_err_data *err_data = (struct ras_err_data *)ras_error_status; uint32_t sec_count = 0, ded_count = 0; if (!amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__GFX)) - return -EINVAL; + return; err_data->ue_count = 0; err_data->ce_count = 0; @@ -1661,7 +1661,6 @@ static int gfx_v9_4_2_query_ras_error_count(struct amdgpu_device *adev, err_data->ce_count += sec_count; err_data->ue_count += ded_count; - return 0; } static void gfx_v9_4_2_reset_utc_err_status(struct amdgpu_device *adev) @@ -1931,13 +1930,31 @@ static void gfx_v9_4_2_reset_sq_timeout_status(struct amdgpu_device *adev) mutex_unlock(&adev->grbm_idx_mutex); } -const struct amdgpu_gfx_ras_funcs gfx_v9_4_2_ras_funcs = { - .ras_late_init = amdgpu_gfx_ras_late_init, - .ras_fini = amdgpu_gfx_ras_fini, - .ras_error_inject = &gfx_v9_4_2_ras_error_inject, - .query_ras_error_count = &gfx_v9_4_2_query_ras_error_count, - .reset_ras_error_count = &gfx_v9_4_2_reset_ras_error_count, - .query_ras_error_status = &gfx_v9_4_2_query_ras_error_status, - .reset_ras_error_status = &gfx_v9_4_2_reset_ras_error_status, +static bool gfx_v9_4_2_query_uctl2_poison_status(struct amdgpu_device *adev) +{ + u32 status = 0; + struct amdgpu_vmhub *hub; + + hub = &adev->vmhub[AMDGPU_GFXHUB_0]; + status = RREG32(hub->vm_l2_pro_fault_status); + /* reset page fault status */ + WREG32_P(hub->vm_l2_pro_fault_cntl, 1, ~1); + + return REG_GET_FIELD(status, VM_L2_PROTECTION_FAULT_STATUS, FED); +} + +struct amdgpu_ras_block_hw_ops gfx_v9_4_2_ras_ops = { + .ras_error_inject = &gfx_v9_4_2_ras_error_inject, + .query_ras_error_count = &gfx_v9_4_2_query_ras_error_count, + .reset_ras_error_count = &gfx_v9_4_2_reset_ras_error_count, + .query_ras_error_status = &gfx_v9_4_2_query_ras_error_status, + .reset_ras_error_status = &gfx_v9_4_2_reset_ras_error_status, +}; + +struct amdgpu_gfx_ras gfx_v9_4_2_ras = { + .ras_block = { + .hw_ops = &gfx_v9_4_2_ras_ops, + }, .enable_watchdog_timer = &gfx_v9_4_2_enable_watchdog_timer, + .query_utcl2_poison_status = gfx_v9_4_2_query_uctl2_poison_status, }; diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_2.h b/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_2.h index 6db1f88509af..7584624b641c 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_2.h +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_2.h @@ -31,6 +31,6 @@ void gfx_v9_4_2_init_golden_registers(struct amdgpu_device *adev, void gfx_v9_4_2_set_power_brake_sequence(struct amdgpu_device *adev); int gfx_v9_4_2_do_edc_gpr_workarounds(struct amdgpu_device *adev); -extern const struct amdgpu_gfx_ras_funcs gfx_v9_4_2_ras_funcs; +extern struct amdgpu_gfx_ras gfx_v9_4_2_ras; #endif /* __GFX_V9_4_2_H__ */ diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c index a2f8ed0e6a64..487c33937a87 100644 --- a/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c @@ -48,6 +48,8 @@ #include "athub_v2_0.h" #include "athub_v2_1.h" +#include "amdgpu_reset.h" + #if 0 static const struct soc15_reg_golden golden_settings_navi10_hdp[] = { @@ -328,7 +330,7 @@ static void gmc_v10_0_flush_gpu_tlb(struct amdgpu_device *adev, uint32_t vmid, */ if (adev->gfx.kiq.ring.sched.ready && (amdgpu_sriov_runtime(adev) || !amdgpu_sriov_vf(adev)) && - down_read_trylock(&adev->reset_sem)) { + down_read_trylock(&adev->reset_domain->sem)) { struct amdgpu_vmhub *hub = &adev->vmhub[vmhub]; const unsigned eng = 17; u32 inv_req = hub->vmhub_funcs->get_invalidate_req(vmid, flush_type); @@ -338,7 +340,7 @@ static void gmc_v10_0_flush_gpu_tlb(struct amdgpu_device *adev, uint32_t vmid, amdgpu_virt_kiq_reg_write_reg_wait(adev, req, ack, inv_req, 1 << vmid); - up_read(&adev->reset_sem); + up_read(&adev->reset_domain->sem); return; } @@ -664,11 +666,27 @@ static void gmc_v10_0_set_umc_funcs(struct amdgpu_device *adev) adev->umc.umc_inst_num = UMC_V8_7_UMC_INSTANCE_NUM; adev->umc.channel_offs = UMC_V8_7_PER_CHANNEL_OFFSET_SIENNA; adev->umc.channel_idx_tbl = &umc_v8_7_channel_idx_tbl[0][0]; - adev->umc.ras_funcs = &umc_v8_7_ras_funcs; + adev->umc.ras = &umc_v8_7_ras; break; default: break; } + if (adev->umc.ras) { + amdgpu_ras_register_ras_block(adev, &adev->umc.ras->ras_block); + + strcpy(adev->umc.ras->ras_block.ras_comm.name, "umc"); + adev->umc.ras->ras_block.ras_comm.block = AMDGPU_RAS_BLOCK__UMC; + adev->umc.ras->ras_block.ras_comm.type = AMDGPU_RAS_ERROR__MULTI_UNCORRECTABLE; + adev->umc.ras_if = &adev->umc.ras->ras_block.ras_comm; + + /* If don't define special ras_late_init function, use default ras_late_init */ + if (!adev->umc.ras->ras_block.ras_late_init) + adev->umc.ras->ras_block.ras_late_init = amdgpu_umc_ras_late_init; + + /* If not defined special ras_cb function, use default ras_cb */ + if (!adev->umc.ras->ras_block.ras_cb) + adev->umc.ras->ras_block.ras_cb = amdgpu_umc_process_ras_data_cb; + } } @@ -677,6 +695,7 @@ static void gmc_v10_0_set_mmhub_funcs(struct amdgpu_device *adev) switch (adev->ip_versions[MMHUB_HWIP][0]) { case IP_VERSION(2, 3, 0): case IP_VERSION(2, 4, 0): + case IP_VERSION(2, 4, 1): adev->mmhub.funcs = &mmhub_v2_3_funcs; break; default: @@ -693,7 +712,9 @@ static void gmc_v10_0_set_gfxhub_funcs(struct amdgpu_device *adev) case IP_VERSION(10, 3, 1): case IP_VERSION(10, 3, 4): case IP_VERSION(10, 3, 5): + case IP_VERSION(10, 3, 6): case IP_VERSION(10, 3, 3): + case IP_VERSION(10, 3, 7): adev->gfxhub.funcs = &gfxhub_v2_1_funcs; break; default: @@ -705,6 +726,7 @@ static void gmc_v10_0_set_gfxhub_funcs(struct amdgpu_device *adev) static int gmc_v10_0_early_init(void *handle) { + int r; struct amdgpu_device *adev = (struct amdgpu_device *)handle; gmc_v10_0_set_mmhub_funcs(adev); @@ -720,6 +742,10 @@ static int gmc_v10_0_early_init(void *handle) adev->gmc.private_aperture_end = adev->gmc.private_aperture_start + (4ULL << 30) - 1; + r = amdgpu_gmc_ras_early_init(adev); + if (r) + return r; + return 0; } @@ -788,7 +814,7 @@ static int gmc_v10_0_mc_init(struct amdgpu_device *adev) adev->gmc.aper_size = pci_resource_len(adev->pdev, 0); #ifdef CONFIG_X86_64 - if (adev->flags & AMD_IS_APU) { + if ((adev->flags & AMD_IS_APU) && !amdgpu_passthrough(adev)) { adev->gmc.aper_base = adev->gfxhub.funcs->get_mc_fb_offset(adev); adev->gmc.aper_size = adev->gmc.real_vram_size; } @@ -858,16 +884,37 @@ static int gmc_v10_0_sw_init(void *handle) } switch (adev->ip_versions[GC_HWIP][0]) { + case IP_VERSION(10, 3, 0): + adev->gmc.mall_size = 128 * 1024 * 1024; + break; + case IP_VERSION(10, 3, 2): + adev->gmc.mall_size = 96 * 1024 * 1024; + break; + case IP_VERSION(10, 3, 4): + adev->gmc.mall_size = 32 * 1024 * 1024; + break; + case IP_VERSION(10, 3, 5): + adev->gmc.mall_size = 16 * 1024 * 1024; + break; + default: + adev->gmc.mall_size = 0; + break; + } + + switch (adev->ip_versions[GC_HWIP][0]) { case IP_VERSION(10, 1, 10): case IP_VERSION(10, 1, 1): case IP_VERSION(10, 1, 2): case IP_VERSION(10, 1, 3): + case IP_VERSION(10, 1, 4): case IP_VERSION(10, 3, 0): case IP_VERSION(10, 3, 2): case IP_VERSION(10, 3, 1): case IP_VERSION(10, 3, 4): case IP_VERSION(10, 3, 5): + case IP_VERSION(10, 3, 6): case IP_VERSION(10, 3, 3): + case IP_VERSION(10, 3, 7): adev->num_vmhubs = 2; /* * To fulfill 4-level page support, @@ -919,7 +966,6 @@ static int gmc_v10_0_sw_init(void *handle) return r; amdgpu_gmc_get_vbios_allocations(adev); - amdgpu_gmc_get_reserved_allocation(adev); /* Memory manager */ r = amdgpu_bo_init(adev); @@ -986,14 +1032,7 @@ static int gmc_v10_0_gart_enable(struct amdgpu_device *adev) return -EINVAL; } - if (amdgpu_sriov_vf(adev) && amdgpu_in_reset(adev)) - goto skip_pin_bo; - - r = amdgpu_gtt_mgr_recover(&adev->mman.gtt_mgr); - if (r) - return r; - -skip_pin_bo: + amdgpu_gtt_mgr_recover(&adev->mman.gtt_mgr); r = adev->gfxhub.funcs->gart_enable(adev); if (r) return r; @@ -1019,8 +1058,6 @@ skip_pin_bo: (unsigned)(adev->gmc.gart_size >> 20), (unsigned long long)amdgpu_bo_gpu_offset(adev->gart.bo)); - adev->gart.ready = true; - return 0; } @@ -1043,6 +1080,12 @@ static int gmc_v10_0_hw_init(void *handle) if (r) return r; + if (amdgpu_emu_mode == 1) { + r = amdgpu_gmc_vram_checking(adev); + if (r) + return r; + } + if (adev->umc.funcs && adev->umc.funcs->init_registers) adev->umc.funcs->init_registers(adev); @@ -1126,6 +1169,16 @@ static int gmc_v10_0_set_clockgating_state(void *handle, int r; struct amdgpu_device *adev = (struct amdgpu_device *)handle; + /* + * The issue mmhub can't disconnect from DF with MMHUB clock gating being disabled + * is a new problem observed at DF 3.0.3, however with the same suspend sequence not + * seen any issue on the DF 3.0.2 series platform. + */ + if (adev->in_s0ix && adev->ip_versions[DF_HWIP][0] > IP_VERSION(3, 0, 2)) { + dev_dbg(adev->dev, "keep mmhub clock gating being enabled for s0ix\n"); + return 0; + } + r = adev->mmhub.funcs->set_clockgating(adev, state); if (r) return r; @@ -1136,11 +1189,12 @@ static int gmc_v10_0_set_clockgating_state(void *handle, return athub_v2_0_set_clockgating(adev, state); } -static void gmc_v10_0_get_clockgating_state(void *handle, u32 *flags) +static void gmc_v10_0_get_clockgating_state(void *handle, u64 *flags) { struct amdgpu_device *adev = (struct amdgpu_device *)handle; - if (adev->ip_versions[GC_HWIP][0] == IP_VERSION(10, 1, 3)) + if (adev->ip_versions[GC_HWIP][0] == IP_VERSION(10, 1, 3) || + adev->ip_versions[GC_HWIP][0] == IP_VERSION(10, 1, 4)) return; adev->mmhub.funcs->get_clockgating(adev, flags); diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v6_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v6_0.c index cd6c38e083d0..ec291d28edff 100644 --- a/drivers/gpu/drm/amd/amdgpu/gmc_v6_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v6_0.c @@ -469,16 +469,14 @@ static void gmc_v6_0_set_prt(struct amdgpu_device *adev, bool enable) static int gmc_v6_0_gart_enable(struct amdgpu_device *adev) { uint64_t table_addr; - int r, i; u32 field; + int i; if (adev->gart.bo == NULL) { dev_err(adev->dev, "No VRAM object for PCIE GART.\n"); return -EINVAL; } - r = amdgpu_gtt_mgr_recover(&adev->mman.gtt_mgr); - if (r) - return r; + amdgpu_gtt_mgr_recover(&adev->mman.gtt_mgr); table_addr = amdgpu_bo_gpu_offset(adev->gart.bo); @@ -558,7 +556,6 @@ static int gmc_v6_0_gart_enable(struct amdgpu_device *adev) dev_info(adev->dev, "PCIE GART of %uM enabled (table at 0x%016llX).\n", (unsigned)(adev->gmc.gart_size >> 20), (unsigned long long)table_addr); - adev->gart.ready = true; return 0; } @@ -922,7 +919,10 @@ static int gmc_v6_0_hw_init(void *handle) if (r) return r; - return r; + if (amdgpu_emu_mode == 1) + return amdgpu_gmc_vram_checking(adev); + else + return r; } static int gmc_v6_0_hw_fini(void *handle) diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c index ab8adbff9e2d..979da6f510e8 100644 --- a/drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c @@ -381,8 +381,9 @@ static int gmc_v7_0_mc_init(struct amdgpu_device *adev) adev->gmc.aper_size = pci_resource_len(adev->pdev, 0); #ifdef CONFIG_X86_64 - if (adev->flags & AMD_IS_APU && - adev->gmc.real_vram_size > adev->gmc.aper_size) { + if ((adev->flags & AMD_IS_APU) && + adev->gmc.real_vram_size > adev->gmc.aper_size && + !amdgpu_passthrough(adev)) { adev->gmc.aper_base = ((u64)RREG32(mmMC_VM_FB_OFFSET)) << 22; adev->gmc.aper_size = adev->gmc.real_vram_size; } @@ -613,17 +614,14 @@ static void gmc_v7_0_set_prt(struct amdgpu_device *adev, bool enable) static int gmc_v7_0_gart_enable(struct amdgpu_device *adev) { uint64_t table_addr; - int r, i; u32 tmp, field; + int i; if (adev->gart.bo == NULL) { dev_err(adev->dev, "No VRAM object for PCIE GART.\n"); return -EINVAL; } - r = amdgpu_gtt_mgr_recover(&adev->mman.gtt_mgr); - if (r) - return r; - + amdgpu_gtt_mgr_recover(&adev->mman.gtt_mgr); table_addr = amdgpu_bo_gpu_offset(adev->gart.bo); /* Setup TLB control */ @@ -712,7 +710,6 @@ static int gmc_v7_0_gart_enable(struct amdgpu_device *adev) DRM_INFO("PCIE GART of %uM enabled (table at 0x%016llX).\n", (unsigned)(adev->gmc.gart_size >> 20), (unsigned long long)table_addr); - adev->gart.ready = true; return 0; } @@ -1111,7 +1108,10 @@ static int gmc_v7_0_hw_init(void *handle) if (r) return r; - return r; + if (amdgpu_emu_mode == 1) + return amdgpu_gmc_vram_checking(adev); + else + return r; } static int gmc_v7_0_hw_fini(void *handle) diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c index 054733838292..382dde1ce74c 100644 --- a/drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c @@ -581,7 +581,7 @@ static int gmc_v8_0_mc_init(struct amdgpu_device *adev) adev->gmc.aper_size = pci_resource_len(adev->pdev, 0); #ifdef CONFIG_X86_64 - if (adev->flags & AMD_IS_APU) { + if ((adev->flags & AMD_IS_APU) && !amdgpu_passthrough(adev)) { adev->gmc.aper_base = ((u64)RREG32(mmMC_VM_FB_OFFSET)) << 22; adev->gmc.aper_size = adev->gmc.real_vram_size; } @@ -837,17 +837,14 @@ static void gmc_v8_0_set_prt(struct amdgpu_device *adev, bool enable) static int gmc_v8_0_gart_enable(struct amdgpu_device *adev) { uint64_t table_addr; - int r, i; u32 tmp, field; + int i; if (adev->gart.bo == NULL) { dev_err(adev->dev, "No VRAM object for PCIE GART.\n"); return -EINVAL; } - r = amdgpu_gtt_mgr_recover(&adev->mman.gtt_mgr); - if (r) - return r; - + amdgpu_gtt_mgr_recover(&adev->mman.gtt_mgr); table_addr = amdgpu_bo_gpu_offset(adev->gart.bo); /* Setup TLB control */ @@ -953,7 +950,6 @@ static int gmc_v8_0_gart_enable(struct amdgpu_device *adev) DRM_INFO("PCIE GART of %uM enabled (table at 0x%016llX).\n", (unsigned)(adev->gmc.gart_size >> 20), (unsigned long long)table_addr); - adev->gart.ready = true; return 0; } @@ -1242,7 +1238,10 @@ static int gmc_v8_0_hw_init(void *handle) if (r) return r; - return r; + if (amdgpu_emu_mode == 1) + return amdgpu_gmc_vram_checking(adev); + else + return r; } static int gmc_v8_0_hw_fini(void *handle) @@ -1691,7 +1690,7 @@ static int gmc_v8_0_set_powergating_state(void *handle, return 0; } -static void gmc_v8_0_get_clockgating_state(void *handle, u32 *flags) +static void gmc_v8_0_get_clockgating_state(void *handle, u64 *flags) { struct amdgpu_device *adev = (struct amdgpu_device *)handle; int data; diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c index 88c1eb9ad068..22761a3bb818 100644 --- a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c @@ -62,6 +62,8 @@ #include "amdgpu_ras.h" #include "amdgpu_xgmi.h" +#include "amdgpu_reset.h" + /* add these here since we already include dce12 headers and these are for DCN */ #define mmHUBP0_DCSURF_PRI_VIEWPORT_DIMENSION 0x055d #define mmHUBP0_DCSURF_PRI_VIEWPORT_DIMENSION_BASE_IDX 2 @@ -787,13 +789,13 @@ static void gmc_v9_0_flush_gpu_tlb(struct amdgpu_device *adev, uint32_t vmid, */ if (adev->gfx.kiq.ring.sched.ready && (amdgpu_sriov_runtime(adev) || !amdgpu_sriov_vf(adev)) && - down_read_trylock(&adev->reset_sem)) { + down_read_trylock(&adev->reset_domain->sem)) { uint32_t req = hub->vm_inv_eng0_req + hub->eng_distance * eng; uint32_t ack = hub->vm_inv_eng0_ack + hub->eng_distance * eng; amdgpu_virt_kiq_reg_write_reg_wait(adev, req, ack, inv_req, 1 << vmid); - up_read(&adev->reset_sem); + up_read(&adev->reset_domain->sem); return; } @@ -900,7 +902,7 @@ static int gmc_v9_0_flush_gpu_tlb_pasid(struct amdgpu_device *adev, if (amdgpu_in_reset(adev)) return -EIO; - if (ring->sched.ready && down_read_trylock(&adev->reset_sem)) { + if (ring->sched.ready && down_read_trylock(&adev->reset_domain->sem)) { /* Vega20+XGMI caches PTEs in TC and TLB. Add a * heavy-weight TLB flush (type 2), which flushes * both. Due to a race condition with concurrent @@ -927,7 +929,7 @@ static int gmc_v9_0_flush_gpu_tlb_pasid(struct amdgpu_device *adev, if (r) { amdgpu_ring_undo(ring); spin_unlock(&adev->gfx.kiq.ring_lock); - up_read(&adev->reset_sem); + up_read(&adev->reset_domain->sem); return -ETIME; } @@ -936,10 +938,10 @@ static int gmc_v9_0_flush_gpu_tlb_pasid(struct amdgpu_device *adev, r = amdgpu_fence_wait_polling(ring, seq, adev->usec_timeout); if (r < 1) { dev_err(adev->dev, "wait for kiq fence error: %ld.\n", r); - up_read(&adev->reset_sem); + up_read(&adev->reset_domain->sem); return -ETIME; } - up_read(&adev->reset_sem); + up_read(&adev->reset_domain->sem); return 0; } @@ -1202,7 +1204,7 @@ static void gmc_v9_0_set_umc_funcs(struct amdgpu_device *adev) adev->umc.umc_inst_num = UMC_V6_1_UMC_INSTANCE_NUM; adev->umc.channel_offs = UMC_V6_1_PER_CHANNEL_OFFSET_VG20; adev->umc.channel_idx_tbl = &umc_v6_1_channel_idx_tbl[0][0]; - adev->umc.ras_funcs = &umc_v6_1_ras_funcs; + adev->umc.ras = &umc_v6_1_ras; break; case IP_VERSION(6, 1, 2): adev->umc.max_ras_err_cnt_per_query = UMC_V6_1_TOTAL_CHANNEL_NUM; @@ -1210,15 +1212,16 @@ static void gmc_v9_0_set_umc_funcs(struct amdgpu_device *adev) adev->umc.umc_inst_num = UMC_V6_1_UMC_INSTANCE_NUM; adev->umc.channel_offs = UMC_V6_1_PER_CHANNEL_OFFSET_ARCT; adev->umc.channel_idx_tbl = &umc_v6_1_channel_idx_tbl[0][0]; - adev->umc.ras_funcs = &umc_v6_1_ras_funcs; + adev->umc.ras = &umc_v6_1_ras; break; case IP_VERSION(6, 7, 0): - adev->umc.max_ras_err_cnt_per_query = UMC_V6_7_TOTAL_CHANNEL_NUM; + adev->umc.max_ras_err_cnt_per_query = + UMC_V6_7_TOTAL_CHANNEL_NUM * UMC_V6_7_BAD_PAGE_NUM_PER_CHANNEL; adev->umc.channel_inst_num = UMC_V6_7_CHANNEL_INSTANCE_NUM; adev->umc.umc_inst_num = UMC_V6_7_UMC_INSTANCE_NUM; adev->umc.channel_offs = UMC_V6_7_PER_CHANNEL_OFFSET; if (!adev->gmc.xgmi.connected_to_cpu) - adev->umc.ras_funcs = &umc_v6_7_ras_funcs; + adev->umc.ras = &umc_v6_7_ras; if (1 & adev->smuio.funcs->get_die_id(adev)) adev->umc.channel_idx_tbl = &umc_v6_7_channel_idx_tbl_first[0][0]; else @@ -1227,6 +1230,23 @@ static void gmc_v9_0_set_umc_funcs(struct amdgpu_device *adev) default: break; } + + if (adev->umc.ras) { + amdgpu_ras_register_ras_block(adev, &adev->umc.ras->ras_block); + + strcpy(adev->umc.ras->ras_block.ras_comm.name, "umc"); + adev->umc.ras->ras_block.ras_comm.block = AMDGPU_RAS_BLOCK__UMC; + adev->umc.ras->ras_block.ras_comm.type = AMDGPU_RAS_ERROR__MULTI_UNCORRECTABLE; + adev->umc.ras_if = &adev->umc.ras->ras_block.ras_comm; + + /* If don't define special ras_late_init function, use default ras_late_init */ + if (!adev->umc.ras->ras_block.ras_late_init) + adev->umc.ras->ras_block.ras_late_init = amdgpu_umc_ras_late_init; + + /* If not defined special ras_cb function, use default ras_cb */ + if (!adev->umc.ras->ras_block.ras_cb) + adev->umc.ras->ras_block.ras_cb = amdgpu_umc_process_ras_data_cb; + } } static void gmc_v9_0_set_mmhub_funcs(struct amdgpu_device *adev) @@ -1248,18 +1268,27 @@ static void gmc_v9_0_set_mmhub_ras_funcs(struct amdgpu_device *adev) { switch (adev->ip_versions[MMHUB_HWIP][0]) { case IP_VERSION(9, 4, 0): - adev->mmhub.ras_funcs = &mmhub_v1_0_ras_funcs; + adev->mmhub.ras = &mmhub_v1_0_ras; break; case IP_VERSION(9, 4, 1): - adev->mmhub.ras_funcs = &mmhub_v9_4_ras_funcs; + adev->mmhub.ras = &mmhub_v9_4_ras; break; case IP_VERSION(9, 4, 2): - adev->mmhub.ras_funcs = &mmhub_v1_7_ras_funcs; + adev->mmhub.ras = &mmhub_v1_7_ras; break; default: /* mmhub ras is not available */ break; } + + if (adev->mmhub.ras) { + amdgpu_ras_register_ras_block(adev, &adev->mmhub.ras->ras_block); + + strcpy(adev->mmhub.ras->ras_block.ras_comm.name, "mmhub"); + adev->mmhub.ras->ras_block.ras_comm.block = AMDGPU_RAS_BLOCK__MMHUB; + adev->mmhub.ras->ras_block.ras_comm.type = AMDGPU_RAS_ERROR__MULTI_UNCORRECTABLE; + adev->mmhub.ras_if = &adev->mmhub.ras->ras_block.ras_comm; + } } static void gmc_v9_0_set_gfxhub_funcs(struct amdgpu_device *adev) @@ -1269,7 +1298,9 @@ static void gmc_v9_0_set_gfxhub_funcs(struct amdgpu_device *adev) static void gmc_v9_0_set_hdp_ras_funcs(struct amdgpu_device *adev) { - adev->hdp.ras_funcs = &hdp_v4_0_ras_funcs; + adev->hdp.ras = &hdp_v4_0_ras; + amdgpu_ras_register_ras_block(adev, &adev->hdp.ras->ras_block); + adev->hdp.ras_if = &adev->hdp.ras->ras_block.ras_comm; } static void gmc_v9_0_set_mca_funcs(struct amdgpu_device *adev) @@ -1287,6 +1318,7 @@ static void gmc_v9_0_set_mca_funcs(struct amdgpu_device *adev) static int gmc_v9_0_early_init(void *handle) { + int r; struct amdgpu_device *adev = (struct amdgpu_device *)handle; /* ARCT and VEGA20 don't have XGMI defined in their IP discovery tables */ @@ -1316,6 +1348,10 @@ static int gmc_v9_0_early_init(void *handle) adev->gmc.private_aperture_end = adev->gmc.private_aperture_start + (4ULL << 30) - 1; + r = amdgpu_gmc_ras_early_init(adev); + if (r) + return r; + return 0; } @@ -1342,13 +1378,13 @@ static int gmc_v9_0_late_init(void *handle) } if (!amdgpu_persistent_edc_harvesting_supported(adev)) { - if (adev->mmhub.ras_funcs && - adev->mmhub.ras_funcs->reset_ras_error_count) - adev->mmhub.ras_funcs->reset_ras_error_count(adev); + if (adev->mmhub.ras && adev->mmhub.ras->ras_block.hw_ops && + adev->mmhub.ras->ras_block.hw_ops->reset_ras_error_count) + adev->mmhub.ras->ras_block.hw_ops->reset_ras_error_count(adev); - if (adev->hdp.ras_funcs && - adev->hdp.ras_funcs->reset_ras_error_count) - adev->hdp.ras_funcs->reset_ras_error_count(adev); + if (adev->hdp.ras && adev->hdp.ras->ras_block.hw_ops && + adev->hdp.ras->ras_block.hw_ops->reset_ras_error_count) + adev->hdp.ras->ras_block.hw_ops->reset_ras_error_count(adev); } r = amdgpu_gmc_ras_late_init(adev); @@ -1420,7 +1456,7 @@ static int gmc_v9_0_mc_init(struct amdgpu_device *adev) */ /* check whether both host-gpu and gpu-gpu xgmi links exist */ - if ((adev->flags & AMD_IS_APU) || + if (((adev->flags & AMD_IS_APU) && !amdgpu_passthrough(adev)) || (adev->gmc.xgmi.supported && adev->gmc.xgmi.connected_to_cpu)) { adev->gmc.aper_base = @@ -1517,7 +1553,7 @@ static void gmc_v9_0_save_registers(struct amdgpu_device *adev) static int gmc_v9_0_sw_init(void *handle) { - int r, vram_width = 0, vram_type = 0, vram_vendor = 0; + int r, vram_width = 0, vram_type = 0, vram_vendor = 0, dma_addr_bits; struct amdgpu_device *adev = (struct amdgpu_device *)handle; adev->gfxhub.funcs->init(adev); @@ -1633,12 +1669,13 @@ static int gmc_v9_0_sw_init(void *handle) */ adev->gmc.mc_mask = 0xffffffffffffULL; /* 48 bit MC */ - r = dma_set_mask_and_coherent(adev->dev, DMA_BIT_MASK(44)); + dma_addr_bits = adev->ip_versions[GC_HWIP][0] == IP_VERSION(9, 4, 2) ? 48:44; + r = dma_set_mask_and_coherent(adev->dev, DMA_BIT_MASK(dma_addr_bits)); if (r) { printk(KERN_WARNING "amdgpu: No suitable DMA available.\n"); return r; } - adev->need_swiotlb = drm_need_swiotlb(44); + adev->need_swiotlb = drm_need_swiotlb(dma_addr_bits); r = gmc_v9_0_mc_init(adev); if (r) @@ -1684,7 +1721,7 @@ static int gmc_v9_0_sw_fini(void *handle) amdgpu_gem_force_release(adev); amdgpu_vm_manager_fini(adev); amdgpu_gart_table_vram_free(adev); - amdgpu_bo_unref(&adev->gmc.pdb0_bo); + amdgpu_bo_free_kernel(&adev->gmc.pdb0_bo, NULL, &adev->gmc.ptr_pdb0); amdgpu_bo_fini(adev); return 0; @@ -1752,14 +1789,7 @@ static int gmc_v9_0_gart_enable(struct amdgpu_device *adev) return -EINVAL; } - if (amdgpu_sriov_vf(adev) && amdgpu_in_reset(adev)) - goto skip_pin_bo; - - r = amdgpu_gtt_mgr_recover(&adev->mman.gtt_mgr); - if (r) - return r; - -skip_pin_bo: + amdgpu_gtt_mgr_recover(&adev->mman.gtt_mgr); r = adev->gfxhub.funcs->gart_enable(adev); if (r) return r; @@ -1776,7 +1806,6 @@ skip_pin_bo: DRM_INFO("PTB located at 0x%016llX\n", (unsigned long long)amdgpu_bo_gpu_offset(adev->gart.bo)); - adev->gart.ready = true; return 0; } @@ -1784,7 +1813,7 @@ static int gmc_v9_0_hw_init(void *handle) { struct amdgpu_device *adev = (struct amdgpu_device *)handle; bool value; - int i; + int i, r; /* The sequence of these two function calls matters.*/ gmc_v9_0_init_golden_registers(adev); @@ -1819,7 +1848,14 @@ static int gmc_v9_0_hw_init(void *handle) if (adev->umc.funcs && adev->umc.funcs->init_registers) adev->umc.funcs->init_registers(adev); - return gmc_v9_0_gart_enable(adev); + r = gmc_v9_0_gart_enable(adev); + if (r) + return r; + + if (amdgpu_emu_mode == 1) + return amdgpu_gmc_vram_checking(adev); + else + return r; } /** @@ -1912,7 +1948,7 @@ static int gmc_v9_0_set_clockgating_state(void *handle, return 0; } -static void gmc_v9_0_get_clockgating_state(void *handle, u32 *flags) +static void gmc_v9_0_get_clockgating_state(void *handle, u64 *flags) { struct amdgpu_device *adev = (struct amdgpu_device *)handle; diff --git a/drivers/gpu/drm/amd/amdgpu/hdp_v4_0.c b/drivers/gpu/drm/amd/amdgpu/hdp_v4_0.c index eecfb1545c1e..adf89680f53e 100644 --- a/drivers/gpu/drm/amd/amdgpu/hdp_v4_0.c +++ b/drivers/gpu/drm/amd/amdgpu/hdp_v4_0.c @@ -124,7 +124,7 @@ static void hdp_v4_0_update_clock_gating(struct amdgpu_device *adev, } static void hdp_v4_0_get_clockgating_state(struct amdgpu_device *adev, - u32 *flags) + u64 *flags) { int data; @@ -146,17 +146,29 @@ static void hdp_v4_0_init_registers(struct amdgpu_device *adev) WREG32_FIELD15(HDP, 0, HDP_MISC_CNTL, FLUSH_INVALIDATE_CACHE, 1); + if (adev->ip_versions[HDP_HWIP][0] == IP_VERSION(4, 4, 0)) + WREG32_FIELD15(HDP, 0, HDP_MISC_CNTL, READ_BUFFER_WATERMARK, 2); + WREG32_SOC15(HDP, 0, mmHDP_NONSURFACE_BASE, (adev->gmc.vram_start >> 8)); WREG32_SOC15(HDP, 0, mmHDP_NONSURFACE_BASE_HI, (adev->gmc.vram_start >> 40)); } -const struct amdgpu_hdp_ras_funcs hdp_v4_0_ras_funcs = { - .ras_late_init = amdgpu_hdp_ras_late_init, - .ras_fini = amdgpu_hdp_ras_fini, +struct amdgpu_ras_block_hw_ops hdp_v4_0_ras_hw_ops = { .query_ras_error_count = hdp_v4_0_query_ras_error_count, .reset_ras_error_count = hdp_v4_0_reset_ras_error_count, }; +struct amdgpu_hdp_ras hdp_v4_0_ras = { + .ras_block = { + .ras_comm = { + .name = "hdp", + .block = AMDGPU_RAS_BLOCK__HDP, + .type = AMDGPU_RAS_ERROR__MULTI_UNCORRECTABLE, + }, + .hw_ops = &hdp_v4_0_ras_hw_ops, + }, +}; + const struct amdgpu_hdp_funcs hdp_v4_0_funcs = { .flush_hdp = hdp_v4_0_flush_hdp, .invalidate_hdp = hdp_v4_0_invalidate_hdp, diff --git a/drivers/gpu/drm/amd/amdgpu/hdp_v4_0.h b/drivers/gpu/drm/amd/amdgpu/hdp_v4_0.h index dc3a1b81dd62..c44eee9282ab 100644 --- a/drivers/gpu/drm/amd/amdgpu/hdp_v4_0.h +++ b/drivers/gpu/drm/amd/amdgpu/hdp_v4_0.h @@ -27,6 +27,6 @@ #include "soc15_common.h" extern const struct amdgpu_hdp_funcs hdp_v4_0_funcs; -extern const struct amdgpu_hdp_ras_funcs hdp_v4_0_ras_funcs; +extern struct amdgpu_hdp_ras hdp_v4_0_ras; #endif diff --git a/drivers/gpu/drm/amd/amdgpu/hdp_v5_0.c b/drivers/gpu/drm/amd/amdgpu/hdp_v5_0.c index 5793977953cc..a9ea23fa0def 100644 --- a/drivers/gpu/drm/amd/amdgpu/hdp_v5_0.c +++ b/drivers/gpu/drm/amd/amdgpu/hdp_v5_0.c @@ -181,7 +181,7 @@ static void hdp_v5_0_update_clock_gating(struct amdgpu_device *adev, } static void hdp_v5_0_get_clockgating_state(struct amdgpu_device *adev, - u32 *flags) + u64 *flags) { uint32_t tmp; diff --git a/drivers/gpu/drm/amd/amdgpu/jpeg_v2_5.c b/drivers/gpu/drm/amd/amdgpu/jpeg_v2_5.c index a29c86617fb5..8c3227d0b8b4 100644 --- a/drivers/gpu/drm/amd/amdgpu/jpeg_v2_5.c +++ b/drivers/gpu/drm/amd/amdgpu/jpeg_v2_5.c @@ -26,6 +26,7 @@ #include "soc15.h" #include "soc15d.h" #include "jpeg_v2_0.h" +#include "jpeg_v2_5.h" #include "vcn/vcn_2_5_offset.h" #include "vcn/vcn_2_5_sh_mask.h" @@ -39,6 +40,7 @@ static void jpeg_v2_5_set_dec_ring_funcs(struct amdgpu_device *adev); static void jpeg_v2_5_set_irq_funcs(struct amdgpu_device *adev); static int jpeg_v2_5_set_powergating_state(void *handle, enum amd_powergating_state state); +static void jpeg_v2_5_set_ras_funcs(struct amdgpu_device *adev); static int amdgpu_ih_clientid_jpeg[] = { SOC15_IH_CLIENTID_VCN, @@ -70,6 +72,7 @@ static int jpeg_v2_5_early_init(void *handle) jpeg_v2_5_set_dec_ring_funcs(adev); jpeg_v2_5_set_irq_funcs(adev); + jpeg_v2_5_set_ras_funcs(adev); return 0; } @@ -730,3 +733,74 @@ const struct amdgpu_ip_block_version jpeg_v2_6_ip_block = .rev = 0, .funcs = &jpeg_v2_6_ip_funcs, }; + +static uint32_t jpeg_v2_6_query_poison_by_instance(struct amdgpu_device *adev, + uint32_t instance, uint32_t sub_block) +{ + uint32_t poison_stat = 0, reg_value = 0; + + switch (sub_block) { + case AMDGPU_JPEG_V2_6_JPEG0: + reg_value = RREG32_SOC15(JPEG, instance, mmUVD_RAS_JPEG0_STATUS); + poison_stat = REG_GET_FIELD(reg_value, UVD_RAS_JPEG0_STATUS, POISONED_PF); + break; + case AMDGPU_JPEG_V2_6_JPEG1: + reg_value = RREG32_SOC15(JPEG, instance, mmUVD_RAS_JPEG1_STATUS); + poison_stat = REG_GET_FIELD(reg_value, UVD_RAS_JPEG1_STATUS, POISONED_PF); + break; + default: + break; + } + + if (poison_stat) + dev_info(adev->dev, "Poison detected in JPEG%d sub_block%d\n", + instance, sub_block); + + return poison_stat; +} + +static bool jpeg_v2_6_query_ras_poison_status(struct amdgpu_device *adev) +{ + uint32_t inst = 0, sub = 0, poison_stat = 0; + + for (inst = 0; inst < adev->jpeg.num_jpeg_inst; inst++) + for (sub = 0; sub < AMDGPU_JPEG_V2_6_MAX_SUB_BLOCK; sub++) + poison_stat += + jpeg_v2_6_query_poison_by_instance(adev, inst, sub); + + return !!poison_stat; +} + +const struct amdgpu_ras_block_hw_ops jpeg_v2_6_ras_hw_ops = { + .query_poison_status = jpeg_v2_6_query_ras_poison_status, +}; + +static struct amdgpu_jpeg_ras jpeg_v2_6_ras = { + .ras_block = { + .hw_ops = &jpeg_v2_6_ras_hw_ops, + }, +}; + +static void jpeg_v2_5_set_ras_funcs(struct amdgpu_device *adev) +{ + switch (adev->ip_versions[JPEG_HWIP][0]) { + case IP_VERSION(2, 6, 0): + adev->jpeg.ras = &jpeg_v2_6_ras; + break; + default: + break; + } + + if (adev->jpeg.ras) { + amdgpu_ras_register_ras_block(adev, &adev->jpeg.ras->ras_block); + + strcpy(adev->jpeg.ras->ras_block.ras_comm.name, "jpeg"); + adev->jpeg.ras->ras_block.ras_comm.block = AMDGPU_RAS_BLOCK__JPEG; + adev->jpeg.ras->ras_block.ras_comm.type = AMDGPU_RAS_ERROR__POISON; + adev->jpeg.ras_if = &adev->jpeg.ras->ras_block.ras_comm; + + /* If don't define special ras_late_init function, use default ras_late_init */ + if (!adev->jpeg.ras->ras_block.ras_late_init) + adev->jpeg.ras->ras_block.ras_late_init = amdgpu_ras_block_late_init; + } +} diff --git a/drivers/gpu/drm/amd/amdgpu/jpeg_v2_5.h b/drivers/gpu/drm/amd/amdgpu/jpeg_v2_5.h index 3b0aa29b9879..1e858c6cdf13 100644 --- a/drivers/gpu/drm/amd/amdgpu/jpeg_v2_5.h +++ b/drivers/gpu/drm/amd/amdgpu/jpeg_v2_5.h @@ -24,6 +24,13 @@ #ifndef __JPEG_V2_5_H__ #define __JPEG_V2_5_H__ +enum amdgpu_jpeg_v2_6_sub_block { + AMDGPU_JPEG_V2_6_JPEG0 = 0, + AMDGPU_JPEG_V2_6_JPEG1, + + AMDGPU_JPEG_V2_6_MAX_SUB_BLOCK, +}; + extern const struct amdgpu_ip_block_version jpeg_v2_5_ip_block; extern const struct amdgpu_ip_block_version jpeg_v2_6_ip_block; diff --git a/drivers/gpu/drm/amd/amdgpu/jpeg_v3_0.c b/drivers/gpu/drm/amd/amdgpu/jpeg_v3_0.c index 01c242c5abc3..41a00851b6c5 100644 --- a/drivers/gpu/drm/amd/amdgpu/jpeg_v3_0.c +++ b/drivers/gpu/drm/amd/amdgpu/jpeg_v3_0.c @@ -50,11 +50,16 @@ static int jpeg_v3_0_early_init(void *handle) { struct amdgpu_device *adev = (struct amdgpu_device *)handle; - if (adev->asic_type != CHIP_YELLOW_CARP) { - u32 harvest = RREG32_SOC15(JPEG, 0, mmCC_UVD_HARVESTING); + u32 harvest; + switch (adev->ip_versions[UVD_HWIP][0]) { + case IP_VERSION(3, 1, 1): + break; + default: + harvest = RREG32_SOC15(JPEG, 0, mmCC_UVD_HARVESTING); if (harvest & CC_UVD_HARVESTING__UVD_DISABLE_MASK) return -ENOENT; + break; } adev->jpeg.num_jpeg_inst = 1; diff --git a/drivers/gpu/drm/amd/amdgpu/mca_v3_0.c b/drivers/gpu/drm/amd/amdgpu/mca_v3_0.c index 8f7107d392af..d4bd7d1d2649 100644 --- a/drivers/gpu/drm/amd/amdgpu/mca_v3_0.c +++ b/drivers/gpu/drm/amd/amdgpu/mca_v3_0.c @@ -37,24 +37,36 @@ static void mca_v3_0_mp0_query_ras_error_count(struct amdgpu_device *adev, ras_error_status); } -static int mca_v3_0_mp0_ras_late_init(struct amdgpu_device *adev) +static int mca_v3_0_ras_block_match(struct amdgpu_ras_block_object *block_obj, + enum amdgpu_ras_block block, uint32_t sub_block_index) { - return amdgpu_mca_ras_late_init(adev, &adev->mca.mp0); -} + if (!block_obj) + return -EINVAL; -static void mca_v3_0_mp0_ras_fini(struct amdgpu_device *adev) -{ - amdgpu_mca_ras_fini(adev, &adev->mca.mp0); + if ((block_obj->ras_comm.block == block) && + (block_obj->ras_comm.sub_block_index == sub_block_index)) { + return 0; + } + + return -EINVAL; } -const struct amdgpu_mca_ras_funcs mca_v3_0_mp0_ras_funcs = { - .ras_late_init = mca_v3_0_mp0_ras_late_init, - .ras_fini = mca_v3_0_mp0_ras_fini, +const struct amdgpu_ras_block_hw_ops mca_v3_0_mp0_hw_ops = { .query_ras_error_count = mca_v3_0_mp0_query_ras_error_count, .query_ras_error_address = NULL, - .ras_block = AMDGPU_RAS_BLOCK__MCA, - .ras_sub_block = AMDGPU_RAS_MCA_BLOCK__MP0, - .sysfs_name = "mp0_err_count", +}; + +struct amdgpu_mca_ras_block mca_v3_0_mp0_ras = { + .ras_block = { + .ras_comm = { + .block = AMDGPU_RAS_BLOCK__MCA, + .sub_block_index = AMDGPU_RAS_MCA_BLOCK__MP0, + .type = AMDGPU_RAS_ERROR__MULTI_UNCORRECTABLE, + .name = "mp0", + }, + .hw_ops = &mca_v3_0_mp0_hw_ops, + .ras_block_match = mca_v3_0_ras_block_match, + }, }; static void mca_v3_0_mp1_query_ras_error_count(struct amdgpu_device *adev, @@ -65,24 +77,22 @@ static void mca_v3_0_mp1_query_ras_error_count(struct amdgpu_device *adev, ras_error_status); } -static int mca_v3_0_mp1_ras_late_init(struct amdgpu_device *adev) -{ - return amdgpu_mca_ras_late_init(adev, &adev->mca.mp1); -} - -static void mca_v3_0_mp1_ras_fini(struct amdgpu_device *adev) -{ - amdgpu_mca_ras_fini(adev, &adev->mca.mp1); -} - -const struct amdgpu_mca_ras_funcs mca_v3_0_mp1_ras_funcs = { - .ras_late_init = mca_v3_0_mp1_ras_late_init, - .ras_fini = mca_v3_0_mp1_ras_fini, +const struct amdgpu_ras_block_hw_ops mca_v3_0_mp1_hw_ops = { .query_ras_error_count = mca_v3_0_mp1_query_ras_error_count, .query_ras_error_address = NULL, - .ras_block = AMDGPU_RAS_BLOCK__MCA, - .ras_sub_block = AMDGPU_RAS_MCA_BLOCK__MP1, - .sysfs_name = "mp1_err_count", +}; + +struct amdgpu_mca_ras_block mca_v3_0_mp1_ras = { + .ras_block = { + .ras_comm = { + .block = AMDGPU_RAS_BLOCK__MCA, + .sub_block_index = AMDGPU_RAS_MCA_BLOCK__MP1, + .type = AMDGPU_RAS_ERROR__MULTI_UNCORRECTABLE, + .name = "mp1", + }, + .hw_ops = &mca_v3_0_mp1_hw_ops, + .ras_block_match = mca_v3_0_ras_block_match, + }, }; static void mca_v3_0_mpio_query_ras_error_count(struct amdgpu_device *adev, @@ -93,24 +103,22 @@ static void mca_v3_0_mpio_query_ras_error_count(struct amdgpu_device *adev, ras_error_status); } -static int mca_v3_0_mpio_ras_late_init(struct amdgpu_device *adev) -{ - return amdgpu_mca_ras_late_init(adev, &adev->mca.mpio); -} - -static void mca_v3_0_mpio_ras_fini(struct amdgpu_device *adev) -{ - amdgpu_mca_ras_fini(adev, &adev->mca.mpio); -} - -const struct amdgpu_mca_ras_funcs mca_v3_0_mpio_ras_funcs = { - .ras_late_init = mca_v3_0_mpio_ras_late_init, - .ras_fini = mca_v3_0_mpio_ras_fini, +const struct amdgpu_ras_block_hw_ops mca_v3_0_mpio_hw_ops = { .query_ras_error_count = mca_v3_0_mpio_query_ras_error_count, .query_ras_error_address = NULL, - .ras_block = AMDGPU_RAS_BLOCK__MCA, - .ras_sub_block = AMDGPU_RAS_MCA_BLOCK__MPIO, - .sysfs_name = "mpio_err_count", +}; + +struct amdgpu_mca_ras_block mca_v3_0_mpio_ras = { + .ras_block = { + .ras_comm = { + .block = AMDGPU_RAS_BLOCK__MCA, + .sub_block_index = AMDGPU_RAS_MCA_BLOCK__MPIO, + .type = AMDGPU_RAS_ERROR__MULTI_UNCORRECTABLE, + .name = "mpio", + }, + .hw_ops = &mca_v3_0_mpio_hw_ops, + .ras_block_match = mca_v3_0_ras_block_match, + }, }; @@ -118,9 +126,15 @@ static void mca_v3_0_init(struct amdgpu_device *adev) { struct amdgpu_mca *mca = &adev->mca; - mca->mp0.ras_funcs = &mca_v3_0_mp0_ras_funcs; - mca->mp1.ras_funcs = &mca_v3_0_mp1_ras_funcs; - mca->mpio.ras_funcs = &mca_v3_0_mpio_ras_funcs; + mca->mp0.ras = &mca_v3_0_mp0_ras; + mca->mp1.ras = &mca_v3_0_mp1_ras; + mca->mpio.ras = &mca_v3_0_mpio_ras; + amdgpu_ras_register_ras_block(adev, &mca->mp0.ras->ras_block); + amdgpu_ras_register_ras_block(adev, &mca->mp1.ras->ras_block); + amdgpu_ras_register_ras_block(adev, &mca->mpio.ras->ras_block); + mca->mp0.ras_if = &mca->mp0.ras->ras_block.ras_comm; + mca->mp1.ras_if = &mca->mp1.ras->ras_block.ras_comm; + mca->mpio.ras_if = &mca->mpio.ras->ras_block.ras_comm; } const struct amdgpu_mca_funcs mca_v3_0_funcs = { diff --git a/drivers/gpu/drm/amd/amdgpu/mmhub_v1_0.c b/drivers/gpu/drm/amd/amdgpu/mmhub_v1_0.c index 1da2ec692057..3f44a099c52a 100644 --- a/drivers/gpu/drm/amd/amdgpu/mmhub_v1_0.c +++ b/drivers/gpu/drm/amd/amdgpu/mmhub_v1_0.c @@ -546,7 +546,7 @@ static int mmhub_v1_0_set_clockgating(struct amdgpu_device *adev, return 0; } -static void mmhub_v1_0_get_clockgating(struct amdgpu_device *adev, u32 *flags) +static void mmhub_v1_0_get_clockgating(struct amdgpu_device *adev, u64 *flags) { int data, data1; @@ -774,13 +774,17 @@ static void mmhub_v1_0_reset_ras_error_count(struct amdgpu_device *adev) } } -const struct amdgpu_mmhub_ras_funcs mmhub_v1_0_ras_funcs = { - .ras_late_init = amdgpu_mmhub_ras_late_init, - .ras_fini = amdgpu_mmhub_ras_fini, +struct amdgpu_ras_block_hw_ops mmhub_v1_0_ras_hw_ops = { .query_ras_error_count = mmhub_v1_0_query_ras_error_count, .reset_ras_error_count = mmhub_v1_0_reset_ras_error_count, }; +struct amdgpu_mmhub_ras mmhub_v1_0_ras = { + .ras_block = { + .hw_ops = &mmhub_v1_0_ras_hw_ops, + }, +}; + const struct amdgpu_mmhub_funcs mmhub_v1_0_funcs = { .get_fb_location = mmhub_v1_0_get_fb_location, .init = mmhub_v1_0_init, diff --git a/drivers/gpu/drm/amd/amdgpu/mmhub_v1_0.h b/drivers/gpu/drm/amd/amdgpu/mmhub_v1_0.h index 4661b094e007..dae7ca48bd8b 100644 --- a/drivers/gpu/drm/amd/amdgpu/mmhub_v1_0.h +++ b/drivers/gpu/drm/amd/amdgpu/mmhub_v1_0.h @@ -24,6 +24,6 @@ #define __MMHUB_V1_0_H__ extern const struct amdgpu_mmhub_funcs mmhub_v1_0_funcs; -extern const struct amdgpu_mmhub_ras_funcs mmhub_v1_0_ras_funcs; +extern struct amdgpu_mmhub_ras mmhub_v1_0_ras; #endif diff --git a/drivers/gpu/drm/amd/amdgpu/mmhub_v1_7.c b/drivers/gpu/drm/amd/amdgpu/mmhub_v1_7.c index f5f7181f9af5..6fa7090bc6cb 100644 --- a/drivers/gpu/drm/amd/amdgpu/mmhub_v1_7.c +++ b/drivers/gpu/drm/amd/amdgpu/mmhub_v1_7.c @@ -542,7 +542,7 @@ static int mmhub_v1_7_set_clockgating(struct amdgpu_device *adev, return 0; } -static void mmhub_v1_7_get_clockgating(struct amdgpu_device *adev, u32 *flags) +static void mmhub_v1_7_get_clockgating(struct amdgpu_device *adev, u64 *flags) { int data, data1; @@ -1321,15 +1321,19 @@ static void mmhub_v1_7_reset_ras_error_status(struct amdgpu_device *adev) } } -const struct amdgpu_mmhub_ras_funcs mmhub_v1_7_ras_funcs = { - .ras_late_init = amdgpu_mmhub_ras_late_init, - .ras_fini = amdgpu_mmhub_ras_fini, +struct amdgpu_ras_block_hw_ops mmhub_v1_7_ras_hw_ops = { .query_ras_error_count = mmhub_v1_7_query_ras_error_count, .reset_ras_error_count = mmhub_v1_7_reset_ras_error_count, .query_ras_error_status = mmhub_v1_7_query_ras_error_status, .reset_ras_error_status = mmhub_v1_7_reset_ras_error_status, }; +struct amdgpu_mmhub_ras mmhub_v1_7_ras = { + .ras_block = { + .hw_ops = &mmhub_v1_7_ras_hw_ops, + }, +}; + const struct amdgpu_mmhub_funcs mmhub_v1_7_funcs = { .get_fb_location = mmhub_v1_7_get_fb_location, .init = mmhub_v1_7_init, diff --git a/drivers/gpu/drm/amd/amdgpu/mmhub_v1_7.h b/drivers/gpu/drm/amd/amdgpu/mmhub_v1_7.h index a7f9dfc24697..629f49052137 100644 --- a/drivers/gpu/drm/amd/amdgpu/mmhub_v1_7.h +++ b/drivers/gpu/drm/amd/amdgpu/mmhub_v1_7.h @@ -24,6 +24,6 @@ #define __MMHUB_V1_7_H__ extern const struct amdgpu_mmhub_funcs mmhub_v1_7_funcs; -extern const struct amdgpu_mmhub_ras_funcs mmhub_v1_7_ras_funcs; +extern struct amdgpu_mmhub_ras mmhub_v1_7_ras; #endif diff --git a/drivers/gpu/drm/amd/amdgpu/mmhub_v2_0.c b/drivers/gpu/drm/amd/amdgpu/mmhub_v2_0.c index 3718ff610ab2..636abd855686 100644 --- a/drivers/gpu/drm/amd/amdgpu/mmhub_v2_0.c +++ b/drivers/gpu/drm/amd/amdgpu/mmhub_v2_0.c @@ -682,7 +682,7 @@ static int mmhub_v2_0_set_clockgating(struct amdgpu_device *adev, return 0; } -static void mmhub_v2_0_get_clockgating(struct amdgpu_device *adev, u32 *flags) +static void mmhub_v2_0_get_clockgating(struct amdgpu_device *adev, u64 *flags) { int data, data1; diff --git a/drivers/gpu/drm/amd/amdgpu/mmhub_v2_3.c b/drivers/gpu/drm/amd/amdgpu/mmhub_v2_3.c index 9e16da28505a..ff44c5364a8c 100644 --- a/drivers/gpu/drm/amd/amdgpu/mmhub_v2_3.c +++ b/drivers/gpu/drm/amd/amdgpu/mmhub_v2_3.c @@ -93,6 +93,7 @@ mmhub_v2_3_print_l2_protection_fault_status(struct amdgpu_device *adev, switch (adev->ip_versions[MMHUB_HWIP][0]) { case IP_VERSION(2, 3, 0): case IP_VERSION(2, 4, 0): + case IP_VERSION(2, 4, 1): mmhub_cid = mmhub_client_ids_vangogh[cid][rw]; break; default: @@ -576,7 +577,7 @@ static int mmhub_v2_3_set_clockgating(struct amdgpu_device *adev, return 0; } -static void mmhub_v2_3_get_clockgating(struct amdgpu_device *adev, u32 *flags) +static void mmhub_v2_3_get_clockgating(struct amdgpu_device *adev, u64 *flags) { int data, data1, data2, data3; diff --git a/drivers/gpu/drm/amd/amdgpu/mmhub_v9_4.c b/drivers/gpu/drm/amd/amdgpu/mmhub_v9_4.c index ff49eeaf7882..6e0145b2b408 100644 --- a/drivers/gpu/drm/amd/amdgpu/mmhub_v9_4.c +++ b/drivers/gpu/drm/amd/amdgpu/mmhub_v9_4.c @@ -647,7 +647,7 @@ static int mmhub_v9_4_set_clockgating(struct amdgpu_device *adev, return 0; } -static void mmhub_v9_4_get_clockgating(struct amdgpu_device *adev, u32 *flags) +static void mmhub_v9_4_get_clockgating(struct amdgpu_device *adev, u64 *flags) { int data, data1; @@ -1655,14 +1655,18 @@ static void mmhub_v9_4_query_ras_error_status(struct amdgpu_device *adev) } } -const struct amdgpu_mmhub_ras_funcs mmhub_v9_4_ras_funcs = { - .ras_late_init = amdgpu_mmhub_ras_late_init, - .ras_fini = amdgpu_mmhub_ras_fini, +const struct amdgpu_ras_block_hw_ops mmhub_v9_4_ras_hw_ops = { .query_ras_error_count = mmhub_v9_4_query_ras_error_count, .reset_ras_error_count = mmhub_v9_4_reset_ras_error_count, .query_ras_error_status = mmhub_v9_4_query_ras_error_status, }; +struct amdgpu_mmhub_ras mmhub_v9_4_ras = { + .ras_block = { + .hw_ops = &mmhub_v9_4_ras_hw_ops, + }, +}; + const struct amdgpu_mmhub_funcs mmhub_v9_4_funcs = { .get_fb_location = mmhub_v9_4_get_fb_location, .init = mmhub_v9_4_init, diff --git a/drivers/gpu/drm/amd/amdgpu/mmhub_v9_4.h b/drivers/gpu/drm/amd/amdgpu/mmhub_v9_4.h index 90436efa92ef..a48329d95f71 100644 --- a/drivers/gpu/drm/amd/amdgpu/mmhub_v9_4.h +++ b/drivers/gpu/drm/amd/amdgpu/mmhub_v9_4.h @@ -24,6 +24,6 @@ #define __MMHUB_V9_4_H__ extern const struct amdgpu_mmhub_funcs mmhub_v9_4_funcs; -extern const struct amdgpu_mmhub_ras_funcs mmhub_v9_4_ras_funcs; +extern struct amdgpu_mmhub_ras mmhub_v9_4_ras; #endif diff --git a/drivers/gpu/drm/amd/amdgpu/mxgpu_ai.c b/drivers/gpu/drm/amd/amdgpu/mxgpu_ai.c index 56da5ab82987..b81acf59870c 100644 --- a/drivers/gpu/drm/amd/amdgpu/mxgpu_ai.c +++ b/drivers/gpu/drm/amd/amdgpu/mxgpu_ai.c @@ -32,6 +32,8 @@ #include "soc15_common.h" #include "mxgpu_ai.h" +#include "amdgpu_reset.h" + static void xgpu_ai_mailbox_send_ack(struct amdgpu_device *adev) { WREG8(AI_MAIBOX_CONTROL_RCV_OFFSET_BYTE, 2); @@ -257,10 +259,10 @@ static void xgpu_ai_mailbox_flr_work(struct work_struct *work) * otherwise the mailbox msg will be ruined/reseted by * the VF FLR. */ - if (atomic_cmpxchg(&adev->in_gpu_reset, 0, 1) != 0) + if (atomic_cmpxchg(&adev->reset_domain->in_gpu_reset, 0, 1) != 0) return; - down_write(&adev->reset_sem); + down_write(&adev->reset_domain->sem); amdgpu_virt_fini_data_exchange(adev); @@ -275,14 +277,14 @@ static void xgpu_ai_mailbox_flr_work(struct work_struct *work) } while (timeout > 1); flr_done: - atomic_set(&adev->in_gpu_reset, 0); - up_write(&adev->reset_sem); + atomic_set(&adev->reset_domain->in_gpu_reset, 0); + up_write(&adev->reset_domain->sem); /* Trigger recovery for world switch failure if no TDR */ if (amdgpu_device_should_recover_gpu(adev) && (!amdgpu_device_has_job_running(adev) || adev->sdma_timeout == MAX_SCHEDULE_TIMEOUT)) - amdgpu_device_gpu_recover(adev, NULL); + amdgpu_device_gpu_recover_imp(adev, NULL); } static int xgpu_ai_set_mailbox_rcv_irq(struct amdgpu_device *adev, @@ -307,8 +309,11 @@ static int xgpu_ai_mailbox_rcv_irq(struct amdgpu_device *adev, switch (event) { case IDH_FLR_NOTIFICATION: - if (amdgpu_sriov_runtime(adev)) - schedule_work(&adev->virt.flr_work); + if (amdgpu_sriov_runtime(adev) && !amdgpu_in_reset(adev)) + WARN_ONCE(!amdgpu_reset_domain_schedule(adev->reset_domain, + &adev->virt.flr_work), + "Failed to queue work! at %s", + __func__); break; case IDH_QUERY_ALIVE: xgpu_ai_mailbox_send_ack(adev); diff --git a/drivers/gpu/drm/amd/amdgpu/mxgpu_nv.c b/drivers/gpu/drm/amd/amdgpu/mxgpu_nv.c index 477d0dde19c5..22c10b97ea81 100644 --- a/drivers/gpu/drm/amd/amdgpu/mxgpu_nv.c +++ b/drivers/gpu/drm/amd/amdgpu/mxgpu_nv.c @@ -31,6 +31,8 @@ #include "soc15_common.h" #include "mxgpu_nv.h" +#include "amdgpu_reset.h" + static void xgpu_nv_mailbox_send_ack(struct amdgpu_device *adev) { WREG8(NV_MAIBOX_CONTROL_RCV_OFFSET_BYTE, 2); @@ -281,10 +283,10 @@ static void xgpu_nv_mailbox_flr_work(struct work_struct *work) * otherwise the mailbox msg will be ruined/reseted by * the VF FLR. */ - if (atomic_cmpxchg(&adev->in_gpu_reset, 0, 1) != 0) + if (atomic_cmpxchg(&adev->reset_domain->in_gpu_reset, 0, 1) != 0) return; - down_write(&adev->reset_sem); + down_write(&adev->reset_domain->sem); amdgpu_virt_fini_data_exchange(adev); @@ -299,8 +301,8 @@ static void xgpu_nv_mailbox_flr_work(struct work_struct *work) } while (timeout > 1); flr_done: - atomic_set(&adev->in_gpu_reset, 0); - up_write(&adev->reset_sem); + atomic_set(&adev->reset_domain->in_gpu_reset, 0); + up_write(&adev->reset_domain->sem); /* Trigger recovery for world switch failure if no TDR */ if (amdgpu_device_should_recover_gpu(adev) @@ -309,7 +311,7 @@ flr_done: adev->gfx_timeout == MAX_SCHEDULE_TIMEOUT || adev->compute_timeout == MAX_SCHEDULE_TIMEOUT || adev->video_timeout == MAX_SCHEDULE_TIMEOUT)) - amdgpu_device_gpu_recover(adev, NULL); + amdgpu_device_gpu_recover_imp(adev, NULL); } static int xgpu_nv_set_mailbox_rcv_irq(struct amdgpu_device *adev, @@ -337,8 +339,11 @@ static int xgpu_nv_mailbox_rcv_irq(struct amdgpu_device *adev, switch (event) { case IDH_FLR_NOTIFICATION: - if (amdgpu_sriov_runtime(adev)) - schedule_work(&adev->virt.flr_work); + if (amdgpu_sriov_runtime(adev) && !amdgpu_in_reset(adev)) + WARN_ONCE(!amdgpu_reset_domain_schedule(adev->reset_domain, + &adev->virt.flr_work), + "Failed to queue work! at %s", + __func__); break; /* READY_TO_ACCESS_GPU is fetched by kernel polling, IRQ can ignore * it byfar since that polling thread will handle it, diff --git a/drivers/gpu/drm/amd/amdgpu/mxgpu_vi.c b/drivers/gpu/drm/amd/amdgpu/mxgpu_vi.c index aef9d059ae52..7b63d30b9b79 100644 --- a/drivers/gpu/drm/amd/amdgpu/mxgpu_vi.c +++ b/drivers/gpu/drm/amd/amdgpu/mxgpu_vi.c @@ -42,6 +42,8 @@ #include "smu/smu_7_1_3_d.h" #include "mxgpu_vi.h" +#include "amdgpu_reset.h" + /* VI golden setting */ static const u32 xgpu_fiji_mgcg_cgcg_init[] = { mmRLC_CGTT_MGCG_OVERRIDE, 0xffffffff, 0xffffffff, @@ -521,7 +523,7 @@ static void xgpu_vi_mailbox_flr_work(struct work_struct *work) /* Trigger recovery due to world switch failure */ if (amdgpu_device_should_recover_gpu(adev)) - amdgpu_device_gpu_recover(adev, NULL); + amdgpu_device_gpu_recover_imp(adev, NULL); } static int xgpu_vi_set_mailbox_rcv_irq(struct amdgpu_device *adev, @@ -544,14 +546,17 @@ static int xgpu_vi_mailbox_rcv_irq(struct amdgpu_device *adev, { int r; - /* trigger gpu-reset by hypervisor only if TDR disbaled */ + /* trigger gpu-reset by hypervisor only if TDR disabled */ if (!amdgpu_gpu_recovery) { /* see what event we get */ r = xgpu_vi_mailbox_rcv_msg(adev, IDH_FLR_NOTIFICATION); /* only handle FLR_NOTIFY now */ - if (!r) - schedule_work(&adev->virt.flr_work); + if (!r && !amdgpu_in_reset(adev)) + WARN_ONCE(!amdgpu_reset_domain_schedule(adev->reset_domain, + &adev->virt.flr_work), + "Failed to queue work! at %s", + __func__); } return 0; diff --git a/drivers/gpu/drm/amd/amdgpu/navi10_ih.c b/drivers/gpu/drm/amd/amdgpu/navi10_ih.c index 8ce5b8ca1fd7..97201ab0965e 100644 --- a/drivers/gpu/drm/amd/amdgpu/navi10_ih.c +++ b/drivers/gpu/drm/amd/amdgpu/navi10_ih.c @@ -685,7 +685,7 @@ static int navi10_ih_set_powergating_state(void *handle, return 0; } -static void navi10_ih_get_clockgating_state(void *handle, u32 *flags) +static void navi10_ih_get_clockgating_state(void *handle, u64 *flags) { struct amdgpu_device *adev = (struct amdgpu_device *)handle; diff --git a/drivers/gpu/drm/amd/amdgpu/nbio_v2_3.c b/drivers/gpu/drm/amd/amdgpu/nbio_v2_3.c index ee7cab37dfd5..6cd1fb2eb913 100644 --- a/drivers/gpu/drm/amd/amdgpu/nbio_v2_3.c +++ b/drivers/gpu/drm/amd/amdgpu/nbio_v2_3.c @@ -278,7 +278,7 @@ static void nbio_v2_3_update_medium_grain_light_sleep(struct amdgpu_device *adev } static void nbio_v2_3_get_clockgating_state(struct amdgpu_device *adev, - u32 *flags) + u64 *flags) { int data; diff --git a/drivers/gpu/drm/amd/amdgpu/nbio_v6_1.c b/drivers/gpu/drm/amd/amdgpu/nbio_v6_1.c index 4bbacf1be25a..f7f6ddebd3e4 100644 --- a/drivers/gpu/drm/amd/amdgpu/nbio_v6_1.c +++ b/drivers/gpu/drm/amd/amdgpu/nbio_v6_1.c @@ -210,7 +210,7 @@ static void nbio_v6_1_update_medium_grain_light_sleep(struct amdgpu_device *adev } static void nbio_v6_1_get_clockgating_state(struct amdgpu_device *adev, - u32 *flags) + u64 *flags) { int data; diff --git a/drivers/gpu/drm/amd/amdgpu/nbio_v7_0.c b/drivers/gpu/drm/amd/amdgpu/nbio_v7_0.c index 37a4039fdfc5..aa0326d00c72 100644 --- a/drivers/gpu/drm/amd/amdgpu/nbio_v7_0.c +++ b/drivers/gpu/drm/amd/amdgpu/nbio_v7_0.c @@ -205,7 +205,7 @@ static void nbio_v7_0_update_medium_grain_light_sleep(struct amdgpu_device *adev } static void nbio_v7_0_get_clockgating_state(struct amdgpu_device *adev, - u32 *flags) + u64 *flags) { int data; diff --git a/drivers/gpu/drm/amd/amdgpu/nbio_v7_2.c b/drivers/gpu/drm/amd/amdgpu/nbio_v7_2.c index 3444332ea110..31776b12e4c4 100644 --- a/drivers/gpu/drm/amd/amdgpu/nbio_v7_2.c +++ b/drivers/gpu/drm/amd/amdgpu/nbio_v7_2.c @@ -59,10 +59,16 @@ static u32 nbio_v7_2_get_rev_id(struct amdgpu_device *adev) { u32 tmp; - if (adev->asic_type == CHIP_YELLOW_CARP) + switch (adev->ip_versions[NBIO_HWIP][0]) { + case IP_VERSION(7, 2, 1): + case IP_VERSION(7, 3, 0): + case IP_VERSION(7, 5, 0): tmp = RREG32_SOC15(NBIO, 0, regRCC_STRAP0_RCC_DEV0_EPF0_STRAP0_YC); - else + break; + default: tmp = RREG32_SOC15(NBIO, 0, regRCC_STRAP0_RCC_DEV0_EPF0_STRAP0); + break; + } tmp &= RCC_STRAP0_RCC_DEV0_EPF0_STRAP0__STRAP_ATI_REV_ID_DEV0_F0_MASK; tmp >>= RCC_STRAP0_RCC_DEV0_EPF0_STRAP0__STRAP_ATI_REV_ID_DEV0_F0__SHIFT; @@ -72,20 +78,26 @@ static u32 nbio_v7_2_get_rev_id(struct amdgpu_device *adev) static void nbio_v7_2_mc_access_enable(struct amdgpu_device *adev, bool enable) { - if (enable) - if (adev->asic_type == CHIP_YELLOW_CARP) + switch (adev->ip_versions[NBIO_HWIP][0]) { + case IP_VERSION(7, 2, 1): + case IP_VERSION(7, 3, 0): + case IP_VERSION(7, 5, 0): + if (enable) WREG32_SOC15(NBIO, 0, regBIF_BX0_BIF_FB_EN_YC, BIF_BX0_BIF_FB_EN__FB_READ_EN_MASK | BIF_BX0_BIF_FB_EN__FB_WRITE_EN_MASK); else + WREG32_SOC15(NBIO, 0, regBIF_BX0_BIF_FB_EN_YC, 0); + break; + default: + if (enable) WREG32_SOC15(NBIO, 0, regBIF_BX0_BIF_FB_EN, BIF_BX0_BIF_FB_EN__FB_READ_EN_MASK | BIF_BX0_BIF_FB_EN__FB_WRITE_EN_MASK); - else - if (adev->asic_type == CHIP_YELLOW_CARP) - WREG32_SOC15(NBIO, 0, regBIF_BX0_BIF_FB_EN_YC, 0); else WREG32_SOC15(NBIO, 0, regBIF_BX0_BIF_FB_EN, 0); + break; + } } static u32 nbio_v7_2_get_memsize(struct amdgpu_device *adev) @@ -250,7 +262,10 @@ static void nbio_v7_2_update_medium_grain_light_sleep(struct amdgpu_device *adev { uint32_t def, data; - if (adev->asic_type == CHIP_YELLOW_CARP) { + switch (adev->ip_versions[NBIO_HWIP][0]) { + case IP_VERSION(7, 2, 1): + case IP_VERSION(7, 3, 0): + case IP_VERSION(7, 5, 0): def = data = RREG32_PCIE_PORT(SOC15_REG_OFFSET(NBIO, 0, regPCIE_CNTL2)); if (enable && (adev->cg_flags & AMD_CG_SUPPORT_BIF_LS)) data |= PCIE_CNTL2__SLV_MEM_LS_EN_MASK; @@ -260,8 +275,8 @@ static void nbio_v7_2_update_medium_grain_light_sleep(struct amdgpu_device *adev if (def != data) WREG32_PCIE_PORT(SOC15_REG_OFFSET(NBIO, 0, regPCIE_CNTL2), data); - data = RREG32_PCIE_PORT(SOC15_REG_OFFSET(NBIO, 0, regBIF1_PCIE_TX_POWER_CTRL_1)); - def = data; + def = data = RREG32_PCIE_PORT(SOC15_REG_OFFSET(NBIO, 0, + regBIF1_PCIE_TX_POWER_CTRL_1)); if (enable && (adev->cg_flags & AMD_CG_SUPPORT_BIF_LS)) data |= (BIF1_PCIE_TX_POWER_CTRL_1__MST_MEM_LS_EN_MASK | BIF1_PCIE_TX_POWER_CTRL_1__REPLAY_MEM_LS_EN_MASK); @@ -272,7 +287,8 @@ static void nbio_v7_2_update_medium_grain_light_sleep(struct amdgpu_device *adev if (def != data) WREG32_PCIE_PORT(SOC15_REG_OFFSET(NBIO, 0, regBIF1_PCIE_TX_POWER_CTRL_1), data); - } else { + break; + default: def = data = RREG32_PCIE_PORT(SOC15_REG_OFFSET(NBIO, 0, regPCIE_CNTL2)); if (enable && (adev->cg_flags & AMD_CG_SUPPORT_BIF_LS)) data |= (PCIE_CNTL2__SLV_MEM_LS_EN_MASK | @@ -285,11 +301,12 @@ static void nbio_v7_2_update_medium_grain_light_sleep(struct amdgpu_device *adev if (def != data) WREG32_PCIE_PORT(SOC15_REG_OFFSET(NBIO, 0, regPCIE_CNTL2), data); + break; } } static void nbio_v7_2_get_clockgating_state(struct amdgpu_device *adev, - u32 *flags) + u64 *flags) { int data; @@ -352,7 +369,10 @@ const struct nbio_hdp_flush_reg nbio_v7_2_hdp_flush_reg = { static void nbio_v7_2_init_registers(struct amdgpu_device *adev) { uint32_t def, data; - if (adev->asic_type == CHIP_YELLOW_CARP) { + switch (adev->ip_versions[NBIO_HWIP][0]) { + case IP_VERSION(7, 2, 1): + case IP_VERSION(7, 3, 0): + case IP_VERSION(7, 5, 0): def = data = RREG32_PCIE_PORT(SOC15_REG_OFFSET(NBIO, 0, regBIF1_PCIE_MST_CTRL_3)); data = REG_SET_FIELD(data, BIF1_PCIE_MST_CTRL_3, CI_SWUS_MAX_READ_REQUEST_SIZE_MODE, 1); @@ -361,7 +381,8 @@ static void nbio_v7_2_init_registers(struct amdgpu_device *adev) if (def != data) WREG32_PCIE_PORT(SOC15_REG_OFFSET(NBIO, 0, regBIF1_PCIE_MST_CTRL_3), data); - } else { + break; + default: def = data = RREG32_PCIE_PORT(SOC15_REG_OFFSET(NBIO, 0, regPCIE_CONFIG_CNTL)); data = REG_SET_FIELD(data, PCIE_CONFIG_CNTL, CI_SWUS_MAX_READ_REQUEST_SIZE_MODE, 1); @@ -370,6 +391,7 @@ static void nbio_v7_2_init_registers(struct amdgpu_device *adev) if (def != data) WREG32_PCIE_PORT(SOC15_REG_OFFSET(NBIO, 0, regPCIE_CONFIG_CNTL), data); + break; } if (amdgpu_sriov_vf(adev)) diff --git a/drivers/gpu/drm/amd/amdgpu/nbio_v7_4.c b/drivers/gpu/drm/amd/amdgpu/nbio_v7_4.c index dc5e93756fea..4531761dcf77 100644 --- a/drivers/gpu/drm/amd/amdgpu/nbio_v7_4.c +++ b/drivers/gpu/drm/amd/amdgpu/nbio_v7_4.c @@ -273,7 +273,7 @@ static void nbio_v7_4_update_medium_grain_light_sleep(struct amdgpu_device *adev } static void nbio_v7_4_get_clockgating_state(struct amdgpu_device *adev, - u32 *flags) + u64 *flags) { int data; @@ -362,9 +362,24 @@ const struct nbio_hdp_flush_reg nbio_v7_4_hdp_flush_reg_ald = { static void nbio_v7_4_init_registers(struct amdgpu_device *adev) { + uint32_t baco_cntl; + if (amdgpu_sriov_vf(adev)) adev->rmmio_remap.reg_offset = SOC15_REG_OFFSET(NBIO, 0, mmBIF_BX_DEV0_EPF0_VF0_HDP_MEM_COHERENCY_FLUSH_CNTL) << 2; + + if (adev->ip_versions[NBIO_HWIP][0] == IP_VERSION(7, 4, 4) && + !amdgpu_sriov_vf(adev)) { + baco_cntl = RREG32_SOC15(NBIO, 0, mmBACO_CNTL); + if (baco_cntl & + (BACO_CNTL__BACO_DUMMY_EN_MASK | BACO_CNTL__BACO_EN_MASK)) { + baco_cntl &= ~(BACO_CNTL__BACO_DUMMY_EN_MASK | + BACO_CNTL__BACO_EN_MASK); + dev_dbg(adev->dev, "Unsetting baco dummy mode %x", + baco_cntl); + WREG32_SOC15(NBIO, 0, mmBACO_CNTL, baco_cntl); + } + } } static void nbio_v7_4_handle_ras_controller_intr_no_bifring(struct amdgpu_device *adev) @@ -658,16 +673,27 @@ static void nbio_v7_4_enable_doorbell_interrupt(struct amdgpu_device *adev, DOORBELL_INTERRUPT_DISABLE, enable ? 0 : 1); } -const struct amdgpu_nbio_ras_funcs nbio_v7_4_ras_funcs = { +const struct amdgpu_ras_block_hw_ops nbio_v7_4_ras_hw_ops = { + .query_ras_error_count = nbio_v7_4_query_ras_error_count, +}; + +struct amdgpu_nbio_ras nbio_v7_4_ras = { + .ras_block = { + .ras_comm = { + .name = "pcie_bif", + .block = AMDGPU_RAS_BLOCK__PCIE_BIF, + .type = AMDGPU_RAS_ERROR__MULTI_UNCORRECTABLE, + }, + .hw_ops = &nbio_v7_4_ras_hw_ops, + .ras_late_init = amdgpu_nbio_ras_late_init, + }, .handle_ras_controller_intr_no_bifring = nbio_v7_4_handle_ras_controller_intr_no_bifring, .handle_ras_err_event_athub_intr_no_bifring = nbio_v7_4_handle_ras_err_event_athub_intr_no_bifring, .init_ras_controller_interrupt = nbio_v7_4_init_ras_controller_interrupt, .init_ras_err_event_athub_interrupt = nbio_v7_4_init_ras_err_event_athub_interrupt, - .query_ras_error_count = nbio_v7_4_query_ras_error_count, - .ras_late_init = amdgpu_nbio_ras_late_init, - .ras_fini = amdgpu_nbio_ras_fini, }; + static void nbio_v7_4_program_ltr(struct amdgpu_device *adev) { uint32_t def, data; diff --git a/drivers/gpu/drm/amd/amdgpu/nbio_v7_4.h b/drivers/gpu/drm/amd/amdgpu/nbio_v7_4.h index cc5692db6f98..7490022d79d4 100644 --- a/drivers/gpu/drm/amd/amdgpu/nbio_v7_4.h +++ b/drivers/gpu/drm/amd/amdgpu/nbio_v7_4.h @@ -29,6 +29,6 @@ extern const struct nbio_hdp_flush_reg nbio_v7_4_hdp_flush_reg; extern const struct nbio_hdp_flush_reg nbio_v7_4_hdp_flush_reg_ald; extern const struct amdgpu_nbio_funcs nbio_v7_4_funcs; -extern const struct amdgpu_nbio_ras_funcs nbio_v7_4_ras_funcs; +extern struct amdgpu_nbio_ras nbio_v7_4_ras; #endif diff --git a/drivers/gpu/drm/amd/amdgpu/nv.c b/drivers/gpu/drm/amd/amdgpu/nv.c index 2ec1ffb36b1f..0a7946c59a42 100644 --- a/drivers/gpu/drm/amd/amdgpu/nv.c +++ b/drivers/gpu/drm/amd/amdgpu/nv.c @@ -204,6 +204,7 @@ static int nv_query_video_codecs(struct amdgpu_device *adev, bool encode, *codecs = &sc_video_codecs_decode; return 0; case IP_VERSION(3, 1, 1): + case IP_VERSION(3, 1, 2): if (encode) *codecs = &nv_video_codecs_encode; else @@ -258,21 +259,6 @@ static u64 nv_pcie_rreg64(struct amdgpu_device *adev, u32 reg) return amdgpu_device_indirect_rreg64(adev, address, data, reg); } -static u32 nv_pcie_port_rreg(struct amdgpu_device *adev, u32 reg) -{ - unsigned long flags, address, data; - u32 r; - address = adev->nbio.funcs->get_pcie_port_index_offset(adev); - data = adev->nbio.funcs->get_pcie_port_data_offset(adev); - - spin_lock_irqsave(&adev->pcie_idx_lock, flags); - WREG32(address, reg * 4); - (void)RREG32(address); - r = RREG32(data); - spin_unlock_irqrestore(&adev->pcie_idx_lock, flags); - return r; -} - static void nv_pcie_wreg64(struct amdgpu_device *adev, u32 reg, u64 v) { unsigned long address, data; @@ -283,21 +269,6 @@ static void nv_pcie_wreg64(struct amdgpu_device *adev, u32 reg, u64 v) amdgpu_device_indirect_wreg64(adev, address, data, reg, v); } -static void nv_pcie_port_wreg(struct amdgpu_device *adev, u32 reg, u32 v) -{ - unsigned long flags, address, data; - - address = adev->nbio.funcs->get_pcie_port_index_offset(adev); - data = adev->nbio.funcs->get_pcie_port_data_offset(adev); - - spin_lock_irqsave(&adev->pcie_idx_lock, flags); - WREG32(address, reg * 4); - (void)RREG32(address); - WREG32(data, v); - (void)RREG32(data); - spin_unlock_irqrestore(&adev->pcie_idx_lock, flags); -} - static u32 nv_didt_rreg(struct amdgpu_device *adev, u32 reg) { unsigned long flags, address, data; @@ -360,38 +331,6 @@ static bool nv_read_disabled_bios(struct amdgpu_device *adev) return false; } -static bool nv_read_bios_from_rom(struct amdgpu_device *adev, - u8 *bios, u32 length_bytes) -{ - u32 *dw_ptr; - u32 i, length_dw; - u32 rom_index_offset, rom_data_offset; - - if (bios == NULL) - return false; - if (length_bytes == 0) - return false; - /* APU vbios image is part of sbios image */ - if (adev->flags & AMD_IS_APU) - return false; - - dw_ptr = (u32 *)bios; - length_dw = ALIGN(length_bytes, 4) / 4; - - rom_index_offset = - adev->smuio.funcs->get_rom_index_offset(adev); - rom_data_offset = - adev->smuio.funcs->get_rom_data_offset(adev); - - /* set rom index to 0 */ - WREG32(rom_index_offset, 0); - /* read out the rom data */ - for (i = 0; i < length_dw; i++) - dw_ptr[i] = RREG32(rom_data_offset); - - return true; -} - static struct soc15_allowed_register_entry nv_allowed_read_registers[] = { { SOC15_REG_ENTRY(GC, 0, mmGRBM_STATUS)}, { SOC15_REG_ENTRY(GC, 0, mmGRBM_STATUS2)}, @@ -515,6 +454,8 @@ nv_asic_reset_method(struct amdgpu_device *adev) case IP_VERSION(11, 5, 0): case IP_VERSION(13, 0, 1): case IP_VERSION(13, 0, 3): + case IP_VERSION(13, 0, 5): + case IP_VERSION(13, 0, 8): return AMD_RESET_METHOD_MODE2; case IP_VERSION(11, 0, 7): case IP_VERSION(11, 0, 11): @@ -584,7 +525,7 @@ static void nv_pcie_gen3_enable(struct amdgpu_device *adev) static void nv_program_aspm(struct amdgpu_device *adev) { - if (!amdgpu_aspm) + if (!amdgpu_device_should_use_aspm(adev)) return; if (!(adev->flags & AMD_IS_APU) && @@ -699,7 +640,8 @@ static int nv_update_umd_stable_pstate(struct amdgpu_device *adev, adev->gfx.funcs->update_perfmon_mgcg(adev, !enter); if (!(adev->flags & AMD_IS_APU) && - (adev->nbio.funcs->enable_aspm)) + (adev->nbio.funcs->enable_aspm) && + amdgpu_device_should_use_aspm(adev)) adev->nbio.funcs->enable_aspm(adev, !enter); return 0; @@ -708,7 +650,7 @@ static int nv_update_umd_stable_pstate(struct amdgpu_device *adev, static const struct amdgpu_asic_funcs nv_asic_funcs = { .read_disabled_bios = &nv_read_disabled_bios, - .read_bios_from_rom = &nv_read_bios_from_rom, + .read_bios_from_rom = &amdgpu_soc15_read_bios_from_rom, .read_register = &nv_read_register, .reset = &nv_asic_reset, .reset_method = &nv_asic_reset_method, @@ -742,8 +684,8 @@ static int nv_common_early_init(void *handle) adev->pcie_wreg = &nv_pcie_wreg; adev->pcie_rreg64 = &nv_pcie_rreg64; adev->pcie_wreg64 = &nv_pcie_wreg64; - adev->pciep_rreg = &nv_pcie_port_rreg; - adev->pciep_wreg = &nv_pcie_port_wreg; + adev->pciep_rreg = amdgpu_device_pcie_port_rreg; + adev->pciep_wreg = amdgpu_device_pcie_port_wreg; /* TODO: will add them during VCN v2 implementation */ adev->uvd_ctx_rreg = NULL; @@ -964,10 +906,63 @@ static int nv_common_early_init(void *handle) adev->external_rev_id = adev->rev_id + 0x01; break; case IP_VERSION(10, 1, 3): + case IP_VERSION(10, 1, 4): adev->cg_flags = 0; adev->pg_flags = 0; adev->external_rev_id = adev->rev_id + 0x82; break; + case IP_VERSION(10, 3, 6): + adev->cg_flags = AMD_CG_SUPPORT_GFX_MGCG | + AMD_CG_SUPPORT_GFX_MGLS | + AMD_CG_SUPPORT_GFX_CGCG | + AMD_CG_SUPPORT_GFX_CGLS | + AMD_CG_SUPPORT_GFX_3D_CGCG | + AMD_CG_SUPPORT_GFX_3D_CGLS | + AMD_CG_SUPPORT_GFX_RLC_LS | + AMD_CG_SUPPORT_GFX_CP_LS | + AMD_CG_SUPPORT_GFX_FGCG | + AMD_CG_SUPPORT_MC_MGCG | + AMD_CG_SUPPORT_MC_LS | + AMD_CG_SUPPORT_SDMA_LS | + AMD_CG_SUPPORT_HDP_MGCG | + AMD_CG_SUPPORT_HDP_LS | + AMD_CG_SUPPORT_ATHUB_MGCG | + AMD_CG_SUPPORT_ATHUB_LS | + AMD_CG_SUPPORT_IH_CG | + AMD_CG_SUPPORT_VCN_MGCG | + AMD_CG_SUPPORT_JPEG_MGCG; + adev->pg_flags = AMD_PG_SUPPORT_GFX_PG | + AMD_PG_SUPPORT_VCN | + AMD_PG_SUPPORT_VCN_DPG | + AMD_PG_SUPPORT_JPEG; + adev->external_rev_id = adev->rev_id + 0x01; + break; + case IP_VERSION(10, 3, 7): + adev->cg_flags = AMD_CG_SUPPORT_GFX_MGCG | + AMD_CG_SUPPORT_GFX_MGLS | + AMD_CG_SUPPORT_GFX_CGCG | + AMD_CG_SUPPORT_GFX_CGLS | + AMD_CG_SUPPORT_GFX_3D_CGCG | + AMD_CG_SUPPORT_GFX_3D_CGLS | + AMD_CG_SUPPORT_GFX_RLC_LS | + AMD_CG_SUPPORT_GFX_CP_LS | + AMD_CG_SUPPORT_GFX_FGCG | + AMD_CG_SUPPORT_MC_MGCG | + AMD_CG_SUPPORT_MC_LS | + AMD_CG_SUPPORT_SDMA_LS | + AMD_CG_SUPPORT_HDP_MGCG | + AMD_CG_SUPPORT_HDP_LS | + AMD_CG_SUPPORT_ATHUB_MGCG | + AMD_CG_SUPPORT_ATHUB_LS | + AMD_CG_SUPPORT_IH_CG | + AMD_CG_SUPPORT_VCN_MGCG | + AMD_CG_SUPPORT_JPEG_MGCG; + adev->pg_flags = AMD_PG_SUPPORT_VCN | + AMD_PG_SUPPORT_VCN_DPG | + AMD_PG_SUPPORT_JPEG | + AMD_PG_SUPPORT_GFX_PG; + adev->external_rev_id = adev->rev_id + 0x01; + break; default: /* FIXME: not supported yet */ return -EINVAL; @@ -1120,7 +1115,7 @@ static int nv_common_set_powergating_state(void *handle, return 0; } -static void nv_common_get_clockgating_state(void *handle, u32 *flags) +static void nv_common_get_clockgating_state(void *handle, u64 *flags) { struct amdgpu_device *adev = (struct amdgpu_device *)handle; diff --git a/drivers/gpu/drm/amd/amdgpu/psp_gfx_if.h b/drivers/gpu/drm/amd/amdgpu/psp_gfx_if.h index dd0dce254901..1f276ddd26e9 100644 --- a/drivers/gpu/drm/amd/amdgpu/psp_gfx_if.h +++ b/drivers/gpu/drm/amd/amdgpu/psp_gfx_if.h @@ -258,6 +258,7 @@ enum psp_gfx_fw_type { GFX_FW_TYPE_SDMA6 = 56, /* SDMA6 MI */ GFX_FW_TYPE_SDMA7 = 57, /* SDMA7 MI */ GFX_FW_TYPE_VCN1 = 58, /* VCN1 MI */ + GFX_FW_TYPE_CAP = 62, /* CAP_FW */ GFX_FW_TYPE_REG_LIST = 67, /* REG_LIST MI */ GFX_FW_TYPE_MAX }; diff --git a/drivers/gpu/drm/amd/amdgpu/psp_v11_0.c b/drivers/gpu/drm/amd/amdgpu/psp_v11_0.c index d0e76b36d4ab..9518b4394a6e 100644 --- a/drivers/gpu/drm/amd/amdgpu/psp_v11_0.c +++ b/drivers/gpu/drm/amd/amdgpu/psp_v11_0.c @@ -53,11 +53,13 @@ MODULE_FIRMWARE("amdgpu/navi14_ta.bin"); MODULE_FIRMWARE("amdgpu/navi12_sos.bin"); MODULE_FIRMWARE("amdgpu/navi12_asd.bin"); MODULE_FIRMWARE("amdgpu/navi12_ta.bin"); +MODULE_FIRMWARE("amdgpu/navi12_cap.bin"); MODULE_FIRMWARE("amdgpu/arcturus_sos.bin"); MODULE_FIRMWARE("amdgpu/arcturus_asd.bin"); MODULE_FIRMWARE("amdgpu/arcturus_ta.bin"); MODULE_FIRMWARE("amdgpu/sienna_cichlid_sos.bin"); MODULE_FIRMWARE("amdgpu/sienna_cichlid_ta.bin"); +MODULE_FIRMWARE("amdgpu/sienna_cichlid_cap.bin"); MODULE_FIRMWARE("amdgpu/navy_flounder_sos.bin"); MODULE_FIRMWARE("amdgpu/navy_flounder_ta.bin"); MODULE_FIRMWARE("amdgpu/vangogh_asd.bin"); @@ -177,8 +179,6 @@ static int psp_v11_0_init_microcode(struct psp_context *psp) err = psp_init_asd_microcode(psp, chip_name); if (err) return err; - if (amdgpu_sriov_vf(adev)) - break; snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_ta.bin", chip_name); err = request_firmware(&adev->psp.ta_fw, fw_name, adev->dev); if (err) { diff --git a/drivers/gpu/drm/amd/amdgpu/psp_v13_0.c b/drivers/gpu/drm/amd/amdgpu/psp_v13_0.c index 17655bc6d2f1..024f60631faf 100644 --- a/drivers/gpu/drm/amd/amdgpu/psp_v13_0.c +++ b/drivers/gpu/drm/amd/amdgpu/psp_v13_0.c @@ -31,9 +31,16 @@ MODULE_FIRMWARE("amdgpu/aldebaran_sos.bin"); MODULE_FIRMWARE("amdgpu/aldebaran_ta.bin"); +MODULE_FIRMWARE("amdgpu/aldebaran_cap.bin"); MODULE_FIRMWARE("amdgpu/yellow_carp_asd.bin"); MODULE_FIRMWARE("amdgpu/yellow_carp_toc.bin"); MODULE_FIRMWARE("amdgpu/yellow_carp_ta.bin"); +MODULE_FIRMWARE("amdgpu/psp_13_0_5_asd.bin"); +MODULE_FIRMWARE("amdgpu/psp_13_0_5_toc.bin"); +MODULE_FIRMWARE("amdgpu/psp_13_0_5_ta.bin"); +MODULE_FIRMWARE("amdgpu/psp_13_0_8_asd.bin"); +MODULE_FIRMWARE("amdgpu/psp_13_0_8_toc.bin"); +MODULE_FIRMWARE("amdgpu/psp_13_0_8_ta.bin"); /* For large FW files the time to complete can be very long */ #define USBC_PD_POLLING_LIMIT_S 240 @@ -55,6 +62,12 @@ static int psp_v13_0_init_microcode(struct psp_context *psp) case IP_VERSION(13, 0, 3): chip_name = "yellow_carp"; break; + case IP_VERSION(13, 0, 5): + chip_name = "psp_13_0_5"; + break; + case IP_VERSION(13, 0, 8): + chip_name = "psp_13_0_8"; + break; default: BUG(); } @@ -69,6 +82,8 @@ static int psp_v13_0_init_microcode(struct psp_context *psp) break; case IP_VERSION(13, 0, 1): case IP_VERSION(13, 0, 3): + case IP_VERSION(13, 0, 5): + case IP_VERSION(13, 0, 8): err = psp_init_asd_microcode(psp, chip_name); if (err) return err; diff --git a/drivers/gpu/drm/amd/amdgpu/psp_v3_1.c b/drivers/gpu/drm/amd/amdgpu/psp_v3_1.c index 1ed357cb0f49..01f3bcc62a6c 100644 --- a/drivers/gpu/drm/amd/amdgpu/psp_v3_1.c +++ b/drivers/gpu/drm/amd/amdgpu/psp_v3_1.c @@ -44,6 +44,7 @@ MODULE_FIRMWARE("amdgpu/vega10_sos.bin"); MODULE_FIRMWARE("amdgpu/vega10_asd.bin"); +MODULE_FIRMWARE("amdgpu/vega10_cap.bin"); MODULE_FIRMWARE("amdgpu/vega12_sos.bin"); MODULE_FIRMWARE("amdgpu/vega12_asd.bin"); diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v2_4.c b/drivers/gpu/drm/amd/amdgpu/sdma_v2_4.c index 4509bd4cce2d..84b57b06b20c 100644 --- a/drivers/gpu/drm/amd/amdgpu/sdma_v2_4.c +++ b/drivers/gpu/drm/amd/amdgpu/sdma_v2_4.c @@ -223,7 +223,7 @@ static void sdma_v2_4_ring_set_wptr(struct amdgpu_ring *ring) { struct amdgpu_device *adev = ring->adev; - WREG32(mmSDMA0_GFX_RB_WPTR + sdma_offsets[ring->me], lower_32_bits(ring->wptr) << 2); + WREG32(mmSDMA0_GFX_RB_WPTR + sdma_offsets[ring->me], ring->wptr << 2); } static void sdma_v2_4_ring_insert_nop(struct amdgpu_ring *ring, uint32_t count) @@ -465,7 +465,7 @@ static int sdma_v2_4_gfx_resume(struct amdgpu_device *adev) WREG32(mmSDMA0_GFX_RB_BASE_HI + sdma_offsets[i], ring->gpu_addr >> 40); ring->wptr = 0; - WREG32(mmSDMA0_GFX_RB_WPTR + sdma_offsets[i], lower_32_bits(ring->wptr) << 2); + WREG32(mmSDMA0_GFX_RB_WPTR + sdma_offsets[i], ring->wptr << 2); /* enable DMA RB */ rb_cntl = REG_SET_FIELD(rb_cntl, SDMA0_GFX_RB_CNTL, RB_ENABLE, 1); @@ -1142,6 +1142,7 @@ static const struct amdgpu_ring_funcs sdma_v2_4_ring_funcs = { .align_mask = 0xf, .nop = SDMA_PKT_NOP_HEADER_OP(SDMA_OP_NOP), .support_64bit_ptrs = false, + .secure_submission_supported = true, .get_rptr = sdma_v2_4_ring_get_rptr, .get_wptr = sdma_v2_4_ring_get_wptr, .set_wptr = sdma_v2_4_ring_set_wptr, diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v3_0.c b/drivers/gpu/drm/amd/amdgpu/sdma_v3_0.c index 135727b59c41..8af5c94d526a 100644 --- a/drivers/gpu/drm/amd/amdgpu/sdma_v3_0.c +++ b/drivers/gpu/drm/amd/amdgpu/sdma_v3_0.c @@ -389,14 +389,14 @@ static void sdma_v3_0_ring_set_wptr(struct amdgpu_ring *ring) if (ring->use_doorbell) { u32 *wb = (u32 *)&adev->wb.wb[ring->wptr_offs]; /* XXX check if swapping is necessary on BE */ - WRITE_ONCE(*wb, (lower_32_bits(ring->wptr) << 2)); - WDOORBELL32(ring->doorbell_index, lower_32_bits(ring->wptr) << 2); + WRITE_ONCE(*wb, ring->wptr << 2); + WDOORBELL32(ring->doorbell_index, ring->wptr << 2); } else if (ring->use_pollmem) { u32 *wb = (u32 *)&adev->wb.wb[ring->wptr_offs]; - WRITE_ONCE(*wb, (lower_32_bits(ring->wptr) << 2)); + WRITE_ONCE(*wb, ring->wptr << 2); } else { - WREG32(mmSDMA0_GFX_RB_WPTR + sdma_offsets[ring->me], lower_32_bits(ring->wptr) << 2); + WREG32(mmSDMA0_GFX_RB_WPTR + sdma_offsets[ring->me], ring->wptr << 2); } } @@ -1535,7 +1535,7 @@ static int sdma_v3_0_set_powergating_state(void *handle, return 0; } -static void sdma_v3_0_get_clockgating_state(void *handle, u32 *flags) +static void sdma_v3_0_get_clockgating_state(void *handle, u64 *flags) { struct amdgpu_device *adev = (struct amdgpu_device *)handle; int data; @@ -1580,6 +1580,7 @@ static const struct amdgpu_ring_funcs sdma_v3_0_ring_funcs = { .align_mask = 0xf, .nop = SDMA_PKT_NOP_HEADER_OP(SDMA_OP_NOP), .support_64bit_ptrs = false, + .secure_submission_supported = true, .get_rptr = sdma_v3_0_ring_get_rptr, .get_wptr = sdma_v3_0_ring_get_wptr, .set_wptr = sdma_v3_0_ring_set_wptr, diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c b/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c index f0638db57111..80de85847712 100644 --- a/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c +++ b/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c @@ -772,8 +772,8 @@ static void sdma_v4_0_ring_set_wptr(struct amdgpu_ring *ring) DRM_DEBUG("Using doorbell -- " "wptr_offs == 0x%08x " - "lower_32_bits(ring->wptr) << 2 == 0x%08x " - "upper_32_bits(ring->wptr) << 2 == 0x%08x\n", + "lower_32_bits(ring->wptr << 2) == 0x%08x " + "upper_32_bits(ring->wptr << 2) == 0x%08x\n", ring->wptr_offs, lower_32_bits(ring->wptr << 2), upper_32_bits(ring->wptr << 2)); @@ -1885,22 +1885,16 @@ static int sdma_v4_0_process_ras_data_cb(struct amdgpu_device *adev, static int sdma_v4_0_late_init(void *handle) { struct amdgpu_device *adev = (struct amdgpu_device *)handle; - struct ras_ih_if ih_info = { - .cb = sdma_v4_0_process_ras_data_cb, - }; sdma_v4_0_setup_ulv(adev); if (!amdgpu_persistent_edc_harvesting_supported(adev)) { - if (adev->sdma.funcs && - adev->sdma.funcs->reset_ras_error_count) - adev->sdma.funcs->reset_ras_error_count(adev); + if (adev->sdma.ras && adev->sdma.ras->ras_block.hw_ops && + adev->sdma.ras->ras_block.hw_ops->reset_ras_error_count) + adev->sdma.ras->ras_block.hw_ops->reset_ras_error_count(adev); } - if (adev->sdma.funcs && adev->sdma.funcs->ras_late_init) - return adev->sdma.funcs->ras_late_init(adev, &ih_info); - else - return 0; + return 0; } static int sdma_v4_0_sw_init(void *handle) @@ -2001,9 +1995,6 @@ static int sdma_v4_0_sw_fini(void *handle) struct amdgpu_device *adev = (struct amdgpu_device *)handle; int i; - if (adev->sdma.funcs && adev->sdma.funcs->ras_fini) - adev->sdma.funcs->ras_fini(adev); - for (i = 0; i < adev->sdma.num_instances; i++) { amdgpu_ring_fini(&adev->sdma.instance[i].ring); if (adev->sdma.has_page_queue) @@ -2381,7 +2372,7 @@ static int sdma_v4_0_set_powergating_state(void *handle, return 0; } -static void sdma_v4_0_get_clockgating_state(void *handle, u32 *flags) +static void sdma_v4_0_get_clockgating_state(void *handle, u64 *flags) { struct amdgpu_device *adev = (struct amdgpu_device *)handle; int data; @@ -2423,6 +2414,7 @@ static const struct amdgpu_ring_funcs sdma_v4_0_ring_funcs = { .align_mask = 0xf, .nop = SDMA_PKT_NOP_HEADER_OP(SDMA_OP_NOP), .support_64bit_ptrs = true, + .secure_submission_supported = true, .vmhub = AMDGPU_MMHUB_0, .get_rptr = sdma_v4_0_ring_get_rptr, .get_wptr = sdma_v4_0_ring_get_wptr, @@ -2459,6 +2451,7 @@ static const struct amdgpu_ring_funcs sdma_v4_0_ring_funcs_2nd_mmhub = { .align_mask = 0xf, .nop = SDMA_PKT_NOP_HEADER_OP(SDMA_OP_NOP), .support_64bit_ptrs = true, + .secure_submission_supported = true, .vmhub = AMDGPU_MMHUB_1, .get_rptr = sdma_v4_0_ring_get_rptr, .get_wptr = sdma_v4_0_ring_get_wptr, @@ -2491,6 +2484,7 @@ static const struct amdgpu_ring_funcs sdma_v4_0_page_ring_funcs = { .align_mask = 0xf, .nop = SDMA_PKT_NOP_HEADER_OP(SDMA_OP_NOP), .support_64bit_ptrs = true, + .secure_submission_supported = true, .vmhub = AMDGPU_MMHUB_0, .get_rptr = sdma_v4_0_ring_get_rptr, .get_wptr = sdma_v4_0_page_ring_get_wptr, @@ -2523,6 +2517,7 @@ static const struct amdgpu_ring_funcs sdma_v4_0_page_ring_funcs_2nd_mmhub = { .align_mask = 0xf, .nop = SDMA_PKT_NOP_HEADER_OP(SDMA_OP_NOP), .support_64bit_ptrs = true, + .secure_submission_supported = true, .vmhub = AMDGPU_MMHUB_1, .get_rptr = sdma_v4_0_ring_get_rptr, .get_wptr = sdma_v4_0_page_ring_get_wptr, @@ -2748,7 +2743,7 @@ static void sdma_v4_0_get_ras_error_count(uint32_t value, } } -static int sdma_v4_0_query_ras_error_count(struct amdgpu_device *adev, +static int sdma_v4_0_query_ras_error_count_by_instance(struct amdgpu_device *adev, uint32_t instance, void *ras_error_status) { struct ras_err_data *err_data = (struct ras_err_data *)ras_error_status; @@ -2770,6 +2765,18 @@ static int sdma_v4_0_query_ras_error_count(struct amdgpu_device *adev, return 0; }; +static void sdma_v4_0_query_ras_error_count(struct amdgpu_device *adev, void *ras_error_status) +{ + int i = 0; + + for (i = 0; i < adev->sdma.num_instances; i++) { + if (sdma_v4_0_query_ras_error_count_by_instance(adev, i, ras_error_status)) { + dev_err(adev->dev, "Query ras error count failed in SDMA%d\n", i); + return; + } + } +} + static void sdma_v4_0_reset_ras_error_count(struct amdgpu_device *adev) { int i; @@ -2781,26 +2788,48 @@ static void sdma_v4_0_reset_ras_error_count(struct amdgpu_device *adev) } } -static const struct amdgpu_sdma_ras_funcs sdma_v4_0_ras_funcs = { - .ras_late_init = amdgpu_sdma_ras_late_init, - .ras_fini = amdgpu_sdma_ras_fini, +const struct amdgpu_ras_block_hw_ops sdma_v4_0_ras_hw_ops = { .query_ras_error_count = sdma_v4_0_query_ras_error_count, .reset_ras_error_count = sdma_v4_0_reset_ras_error_count, }; +static struct amdgpu_sdma_ras sdma_v4_0_ras = { + .ras_block = { + .hw_ops = &sdma_v4_0_ras_hw_ops, + .ras_cb = sdma_v4_0_process_ras_data_cb, + }, +}; + static void sdma_v4_0_set_ras_funcs(struct amdgpu_device *adev) { switch (adev->ip_versions[SDMA0_HWIP][0]) { case IP_VERSION(4, 2, 0): case IP_VERSION(4, 2, 2): - adev->sdma.funcs = &sdma_v4_0_ras_funcs; + adev->sdma.ras = &sdma_v4_0_ras; break; case IP_VERSION(4, 4, 0): - adev->sdma.funcs = &sdma_v4_4_ras_funcs; + adev->sdma.ras = &sdma_v4_4_ras; break; default: break; } + + if (adev->sdma.ras) { + amdgpu_ras_register_ras_block(adev, &adev->sdma.ras->ras_block); + + strcpy(adev->sdma.ras->ras_block.ras_comm.name, "sdma"); + adev->sdma.ras->ras_block.ras_comm.block = AMDGPU_RAS_BLOCK__SDMA; + adev->sdma.ras->ras_block.ras_comm.type = AMDGPU_RAS_ERROR__MULTI_UNCORRECTABLE; + adev->sdma.ras_if = &adev->sdma.ras->ras_block.ras_comm; + + /* If don't define special ras_late_init function, use default ras_late_init */ + if (!adev->sdma.ras->ras_block.ras_late_init) + adev->sdma.ras->ras_block.ras_late_init = amdgpu_sdma_ras_late_init; + + /* If not defined special ras_cb function, use default ras_cb */ + if (!adev->sdma.ras->ras_block.ras_cb) + adev->sdma.ras->ras_block.ras_cb = amdgpu_sdma_process_ras_data_cb; + } } const struct amdgpu_ip_block_version sdma_v4_0_ip_block = { diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v4_4.c b/drivers/gpu/drm/amd/amdgpu/sdma_v4_4.c index bf95007f0843..6f9895cdddb1 100644 --- a/drivers/gpu/drm/amd/amdgpu/sdma_v4_4.c +++ b/drivers/gpu/drm/amd/amdgpu/sdma_v4_4.c @@ -188,7 +188,7 @@ static void sdma_v4_4_get_ras_error_count(struct amdgpu_device *adev, } } -static int sdma_v4_4_query_ras_error_count(struct amdgpu_device *adev, +static int sdma_v4_4_query_ras_error_count_by_instance(struct amdgpu_device *adev, uint32_t instance, void *ras_error_status) { @@ -245,9 +245,26 @@ static void sdma_v4_4_reset_ras_error_count(struct amdgpu_device *adev) } } -const struct amdgpu_sdma_ras_funcs sdma_v4_4_ras_funcs = { - .ras_late_init = amdgpu_sdma_ras_late_init, - .ras_fini = amdgpu_sdma_ras_fini, +static void sdma_v4_4_query_ras_error_count(struct amdgpu_device *adev, void *ras_error_status) +{ + int i = 0; + + for (i = 0; i < adev->sdma.num_instances; i++) { + if (sdma_v4_4_query_ras_error_count_by_instance(adev, i, ras_error_status)) { + dev_err(adev->dev, "Query ras error count failed in SDMA%d\n", i); + return; + } + } + +} + +const struct amdgpu_ras_block_hw_ops sdma_v4_4_ras_hw_ops = { .query_ras_error_count = sdma_v4_4_query_ras_error_count, .reset_ras_error_count = sdma_v4_4_reset_ras_error_count, }; + +struct amdgpu_sdma_ras sdma_v4_4_ras = { + .ras_block = { + .hw_ops = &sdma_v4_4_ras_hw_ops, + }, +}; diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v4_4.h b/drivers/gpu/drm/amd/amdgpu/sdma_v4_4.h index 74a6e5b5e949..a9f0c68359e0 100644 --- a/drivers/gpu/drm/amd/amdgpu/sdma_v4_4.h +++ b/drivers/gpu/drm/amd/amdgpu/sdma_v4_4.h @@ -23,6 +23,6 @@ #ifndef __SDMA_V4_4_H__ #define __SDMA_V4_4_H__ -extern const struct amdgpu_sdma_ras_funcs sdma_v4_4_ras_funcs; +extern struct amdgpu_sdma_ras sdma_v4_4_ras; #endif diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v5_0.c b/drivers/gpu/drm/amd/amdgpu/sdma_v5_0.c index 81e033549dda..d3939c5f531d 100644 --- a/drivers/gpu/drm/amd/amdgpu/sdma_v5_0.c +++ b/drivers/gpu/drm/amd/amdgpu/sdma_v5_0.c @@ -51,9 +51,6 @@ MODULE_FIRMWARE("amdgpu/navi14_sdma1.bin"); MODULE_FIRMWARE("amdgpu/navi12_sdma.bin"); MODULE_FIRMWARE("amdgpu/navi12_sdma1.bin"); -MODULE_FIRMWARE("amdgpu/cyan_skillfish_sdma.bin"); -MODULE_FIRMWARE("amdgpu/cyan_skillfish_sdma1.bin"); - MODULE_FIRMWARE("amdgpu/cyan_skillfish2_sdma.bin"); MODULE_FIRMWARE("amdgpu/cyan_skillfish2_sdma1.bin"); @@ -264,10 +261,7 @@ static int sdma_v5_0_init_microcode(struct amdgpu_device *adev) chip_name = "navi12"; break; case IP_VERSION(5, 0, 1): - if (adev->apu_flags & AMD_APU_IS_CYAN_SKILLFISH2) - chip_name = "cyan_skillfish2"; - else - chip_name = "cyan_skillfish"; + chip_name = "cyan_skillfish2"; break; default: BUG(); @@ -400,8 +394,8 @@ static void sdma_v5_0_ring_set_wptr(struct amdgpu_ring *ring) if (ring->use_doorbell) { DRM_DEBUG("Using doorbell -- " "wptr_offs == 0x%08x " - "lower_32_bits(ring->wptr) << 2 == 0x%08x " - "upper_32_bits(ring->wptr) << 2 == 0x%08x\n", + "lower_32_bits(ring->wptr << 2) == 0x%08x " + "upper_32_bits(ring->wptr << 2) == 0x%08x\n", ring->wptr_offs, lower_32_bits(ring->wptr << 2), upper_32_bits(ring->wptr << 2)); @@ -780,9 +774,9 @@ static int sdma_v5_0_gfx_resume(struct amdgpu_device *adev) if (!amdgpu_sriov_vf(adev)) { /* only bare-metal use register write for wptr */ WREG32(sdma_v5_0_get_reg_offset(adev, i, mmSDMA0_GFX_RB_WPTR), - lower_32_bits(ring->wptr) << 2); + lower_32_bits(ring->wptr << 2)); WREG32(sdma_v5_0_get_reg_offset(adev, i, mmSDMA0_GFX_RB_WPTR_HI), - upper_32_bits(ring->wptr) << 2); + upper_32_bits(ring->wptr << 2)); } doorbell = RREG32_SOC15_IP(GC, sdma_v5_0_get_reg_offset(adev, i, mmSDMA0_GFX_DOORBELL)); @@ -1654,7 +1648,7 @@ static int sdma_v5_0_set_powergating_state(void *handle, return 0; } -static void sdma_v5_0_get_clockgating_state(void *handle, u32 *flags) +static void sdma_v5_0_get_clockgating_state(void *handle, u64 *flags) { struct amdgpu_device *adev = (struct amdgpu_device *)handle; int data; @@ -1696,6 +1690,7 @@ static const struct amdgpu_ring_funcs sdma_v5_0_ring_funcs = { .align_mask = 0xf, .nop = SDMA_PKT_NOP_HEADER_OP(SDMA_OP_NOP), .support_64bit_ptrs = true, + .secure_submission_supported = true, .vmhub = AMDGPU_GFXHUB_0, .get_rptr = sdma_v5_0_ring_get_rptr, .get_wptr = sdma_v5_0_ring_get_wptr, diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v5_2.c b/drivers/gpu/drm/amd/amdgpu/sdma_v5_2.c index d3d6d5b045b8..8298926f8502 100644 --- a/drivers/gpu/drm/amd/amdgpu/sdma_v5_2.c +++ b/drivers/gpu/drm/amd/amdgpu/sdma_v5_2.c @@ -51,6 +51,8 @@ MODULE_FIRMWARE("amdgpu/beige_goby_sdma.bin"); MODULE_FIRMWARE("amdgpu/vangogh_sdma.bin"); MODULE_FIRMWARE("amdgpu/yellow_carp_sdma.bin"); +MODULE_FIRMWARE("amdgpu/sdma_5_2_6.bin"); +MODULE_FIRMWARE("amdgpu/sdma_5_2_7.bin"); #define SDMA1_REG_OFFSET 0x600 #define SDMA3_REG_OFFSET 0x400 @@ -138,28 +140,34 @@ static int sdma_v5_2_init_microcode(struct amdgpu_device *adev) switch (adev->ip_versions[SDMA0_HWIP][0]) { case IP_VERSION(5, 2, 0): - chip_name = "sienna_cichlid"; + chip_name = "sienna_cichlid_sdma"; break; case IP_VERSION(5, 2, 2): - chip_name = "navy_flounder"; + chip_name = "navy_flounder_sdma"; break; case IP_VERSION(5, 2, 1): - chip_name = "vangogh"; + chip_name = "vangogh_sdma"; break; case IP_VERSION(5, 2, 4): - chip_name = "dimgrey_cavefish"; + chip_name = "dimgrey_cavefish_sdma"; break; case IP_VERSION(5, 2, 5): - chip_name = "beige_goby"; + chip_name = "beige_goby_sdma"; break; case IP_VERSION(5, 2, 3): - chip_name = "yellow_carp"; + chip_name = "yellow_carp_sdma"; + break; + case IP_VERSION(5, 2, 6): + chip_name = "sdma_5_2_6"; + break; + case IP_VERSION(5, 2, 7): + chip_name = "sdma_5_2_7"; break; default: BUG(); } - snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_sdma.bin", chip_name); + snprintf(fw_name, sizeof(fw_name), "amdgpu/%s.bin", chip_name); err = request_firmware(&adev->sdma.instance[0].fw, fw_name, adev->dev); if (err) @@ -287,8 +295,8 @@ static void sdma_v5_2_ring_set_wptr(struct amdgpu_ring *ring) if (ring->use_doorbell) { DRM_DEBUG("Using doorbell -- " "wptr_offs == 0x%08x " - "lower_32_bits(ring->wptr) << 2 == 0x%08x " - "upper_32_bits(ring->wptr) << 2 == 0x%08x\n", + "lower_32_bits(ring->wptr << 2) == 0x%08x " + "upper_32_bits(ring->wptr << 2) == 0x%08x\n", ring->wptr_offs, lower_32_bits(ring->wptr << 2), upper_32_bits(ring->wptr << 2)); @@ -664,8 +672,8 @@ static int sdma_v5_2_gfx_resume(struct amdgpu_device *adev) WREG32_SOC15_IP(GC, sdma_v5_2_get_reg_offset(adev, i, mmSDMA0_GFX_MINOR_PTR_UPDATE), 1); if (!amdgpu_sriov_vf(adev)) { /* only bare-metal use register write for wptr */ - WREG32(sdma_v5_2_get_reg_offset(adev, i, mmSDMA0_GFX_RB_WPTR), lower_32_bits(ring->wptr) << 2); - WREG32(sdma_v5_2_get_reg_offset(adev, i, mmSDMA0_GFX_RB_WPTR_HI), upper_32_bits(ring->wptr) << 2); + WREG32(sdma_v5_2_get_reg_offset(adev, i, mmSDMA0_GFX_RB_WPTR), lower_32_bits(ring->wptr << 2)); + WREG32(sdma_v5_2_get_reg_offset(adev, i, mmSDMA0_GFX_RB_WPTR_HI), upper_32_bits(ring->wptr << 2)); } doorbell = RREG32_SOC15_IP(GC, sdma_v5_2_get_reg_offset(adev, i, mmSDMA0_GFX_DOORBELL)); @@ -1617,6 +1625,7 @@ static int sdma_v5_2_set_clockgating_state(void *handle, case IP_VERSION(5, 2, 1): case IP_VERSION(5, 2, 4): case IP_VERSION(5, 2, 5): + case IP_VERSION(5, 2, 6): case IP_VERSION(5, 2, 3): sdma_v5_2_update_medium_grain_clock_gating(adev, state == AMD_CG_STATE_GATE); @@ -1636,7 +1645,7 @@ static int sdma_v5_2_set_powergating_state(void *handle, return 0; } -static void sdma_v5_2_get_clockgating_state(void *handle, u32 *flags) +static void sdma_v5_2_get_clockgating_state(void *handle, u64 *flags) { struct amdgpu_device *adev = (struct amdgpu_device *)handle; int data; @@ -1644,6 +1653,11 @@ static void sdma_v5_2_get_clockgating_state(void *handle, u32 *flags) if (amdgpu_sriov_vf(adev)) *flags = 0; + /* AMD_CG_SUPPORT_SDMA_MGCG */ + data = RREG32(sdma_v5_2_get_reg_offset(adev, 0, mmSDMA0_CLK_CTRL)); + if (!(data & SDMA0_CLK_CTRL__CGCG_EN_OVERRIDE_MASK)) + *flags |= AMD_CG_SUPPORT_SDMA_MGCG; + /* AMD_CG_SUPPORT_SDMA_LS */ data = RREG32_KIQ(sdma_v5_2_get_reg_offset(adev, 0, mmSDMA0_POWER_CNTL)); if (data & SDMA0_POWER_CNTL__MEM_POWER_OVERRIDE_MASK) @@ -1673,6 +1687,7 @@ static const struct amdgpu_ring_funcs sdma_v5_2_ring_funcs = { .align_mask = 0xf, .nop = SDMA_PKT_NOP_HEADER_OP(SDMA_OP_NOP), .support_64bit_ptrs = true, + .secure_submission_supported = true, .vmhub = AMDGPU_GFXHUB_0, .get_rptr = sdma_v5_2_ring_get_rptr, .get_wptr = sdma_v5_2_ring_get_wptr, diff --git a/drivers/gpu/drm/amd/amdgpu/si.c b/drivers/gpu/drm/amd/amdgpu/si.c index e6d2f74a7976..7f99e130acd0 100644 --- a/drivers/gpu/drm/amd/amdgpu/si.c +++ b/drivers/gpu/drm/amd/amdgpu/si.c @@ -2453,7 +2453,7 @@ static void si_program_aspm(struct amdgpu_device *adev) bool disable_l0s = false, disable_l1 = false, disable_plloff_in_l1 = false; bool disable_clkreq = false; - if (amdgpu_aspm == 0) + if (!amdgpu_device_should_use_aspm(adev)) return; if (adev->flags & AMD_IS_APU) diff --git a/drivers/gpu/drm/amd/amdgpu/si_dma.c b/drivers/gpu/drm/amd/amdgpu/si_dma.c index 195b45bcb8ad..2f95235bbfb3 100644 --- a/drivers/gpu/drm/amd/amdgpu/si_dma.c +++ b/drivers/gpu/drm/amd/amdgpu/si_dma.c @@ -56,8 +56,7 @@ static void si_dma_ring_set_wptr(struct amdgpu_ring *ring) struct amdgpu_device *adev = ring->adev; u32 me = (ring == &adev->sdma.instance[0].ring) ? 0 : 1; - WREG32(DMA_RB_WPTR + sdma_offsets[me], - (lower_32_bits(ring->wptr) << 2) & 0x3fffc); + WREG32(DMA_RB_WPTR + sdma_offsets[me], (ring->wptr << 2) & 0x3fffc); } static void si_dma_ring_emit_ib(struct amdgpu_ring *ring, @@ -175,7 +174,7 @@ static int si_dma_start(struct amdgpu_device *adev) WREG32(DMA_CNTL + sdma_offsets[i], dma_cntl); ring->wptr = 0; - WREG32(DMA_RB_WPTR + sdma_offsets[i], lower_32_bits(ring->wptr) << 2); + WREG32(DMA_RB_WPTR + sdma_offsets[i], ring->wptr << 2); WREG32(DMA_RB_CNTL + sdma_offsets[i], rb_cntl | DMA_RB_ENABLE); ring->sched.ready = true; diff --git a/drivers/gpu/drm/amd/amdgpu/smu_v11_0_i2c.c b/drivers/gpu/drm/amd/amdgpu/smu_v11_0_i2c.c index 73ffa8fde3df..dd2d66090d23 100644 --- a/drivers/gpu/drm/amd/amdgpu/smu_v11_0_i2c.c +++ b/drivers/gpu/drm/amd/amdgpu/smu_v11_0_i2c.c @@ -26,6 +26,7 @@ #include "smu_v11_0_i2c.h" #include "amdgpu.h" +#include "amdgpu_dpm.h" #include "soc15_common.h" #include <drm/drm_fixed.h> #include <drm/drm_drv.h> @@ -43,11 +44,10 @@ #define I2C_X_RESTART BIT(31) -#define to_amdgpu_device(x) (container_of(x, struct amdgpu_device, pm.smu_i2c)) - static void smu_v11_0_i2c_set_clock_gating(struct i2c_adapter *control, bool en) { - struct amdgpu_device *adev = to_amdgpu_device(control); + struct amdgpu_smu_i2c_bus *smu_i2c = i2c_get_adapdata(control); + struct amdgpu_device *adev = smu_i2c->adev; uint32_t reg = RREG32_SOC15(SMUIO, 0, mmSMUIO_PWRMGT); reg = REG_SET_FIELD(reg, SMUIO_PWRMGT, i2c_clk_gate_en, en ? 1 : 0); @@ -75,7 +75,8 @@ static void smu_v11_0_i2c_set_clock_gating(struct i2c_adapter *control, bool en) static int smu_v11_0_i2c_enable(struct i2c_adapter *control, bool enable) { - struct amdgpu_device *adev = to_amdgpu_device(control); + struct amdgpu_smu_i2c_bus *smu_i2c = i2c_get_adapdata(control); + struct amdgpu_device *adev = smu_i2c->adev; WREG32_SOC15(SMUIO, 0, mmCKSVII2C_IC_ENABLE, enable ? 1 : 0); @@ -100,7 +101,8 @@ static int smu_v11_0_i2c_enable(struct i2c_adapter *control, bool enable) static void smu_v11_0_i2c_clear_status(struct i2c_adapter *control) { - struct amdgpu_device *adev = to_amdgpu_device(control); + struct amdgpu_smu_i2c_bus *smu_i2c = i2c_get_adapdata(control); + struct amdgpu_device *adev = smu_i2c->adev; /* do */ { RREG32_SOC15(SMUIO, 0, mmCKSVII2C_IC_CLR_INTR); @@ -110,7 +112,8 @@ static void smu_v11_0_i2c_clear_status(struct i2c_adapter *control) static void smu_v11_0_i2c_configure(struct i2c_adapter *control) { - struct amdgpu_device *adev = to_amdgpu_device(control); + struct amdgpu_smu_i2c_bus *smu_i2c = i2c_get_adapdata(control); + struct amdgpu_device *adev = smu_i2c->adev; uint32_t reg = 0; reg = REG_SET_FIELD(reg, CKSVII2C_IC_CON, IC_SLAVE_DISABLE, 1); @@ -131,7 +134,8 @@ static void smu_v11_0_i2c_configure(struct i2c_adapter *control) static void smu_v11_0_i2c_set_clock(struct i2c_adapter *control) { - struct amdgpu_device *adev = to_amdgpu_device(control); + struct amdgpu_smu_i2c_bus *smu_i2c = i2c_get_adapdata(control); + struct amdgpu_device *adev = smu_i2c->adev; /* * Standard mode speed, These values are taken from SMUIO MAS, @@ -154,7 +158,8 @@ static void smu_v11_0_i2c_set_clock(struct i2c_adapter *control) static void smu_v11_0_i2c_set_address(struct i2c_adapter *control, u16 address) { - struct amdgpu_device *adev = to_amdgpu_device(control); + struct amdgpu_smu_i2c_bus *smu_i2c = i2c_get_adapdata(control); + struct amdgpu_device *adev = smu_i2c->adev; /* The IC_TAR::IC_TAR field is 10-bits wide. * It takes a 7-bit or 10-bit addresses as an address, @@ -165,7 +170,8 @@ static void smu_v11_0_i2c_set_address(struct i2c_adapter *control, u16 address) static uint32_t smu_v11_0_i2c_poll_tx_status(struct i2c_adapter *control) { - struct amdgpu_device *adev = to_amdgpu_device(control); + struct amdgpu_smu_i2c_bus *smu_i2c = i2c_get_adapdata(control); + struct amdgpu_device *adev = smu_i2c->adev; uint32_t ret = I2C_OK; uint32_t reg, reg_c_tx_abrt_source; @@ -216,7 +222,8 @@ static uint32_t smu_v11_0_i2c_poll_tx_status(struct i2c_adapter *control) static uint32_t smu_v11_0_i2c_poll_rx_status(struct i2c_adapter *control) { - struct amdgpu_device *adev = to_amdgpu_device(control); + struct amdgpu_smu_i2c_bus *smu_i2c = i2c_get_adapdata(control); + struct amdgpu_device *adev = smu_i2c->adev; uint32_t ret = I2C_OK; uint32_t reg_ic_status, reg_c_tx_abrt_source; @@ -262,7 +269,8 @@ static uint32_t smu_v11_0_i2c_transmit(struct i2c_adapter *control, u16 address, u8 *data, u32 numbytes, u32 i2c_flag) { - struct amdgpu_device *adev = to_amdgpu_device(control); + struct amdgpu_smu_i2c_bus *smu_i2c = i2c_get_adapdata(control); + struct amdgpu_device *adev = smu_i2c->adev; u32 bytes_sent, reg, ret = I2C_OK; unsigned long timeout_counter; @@ -360,7 +368,8 @@ static uint32_t smu_v11_0_i2c_receive(struct i2c_adapter *control, u16 address, u8 *data, u32 numbytes, u32 i2c_flag) { - struct amdgpu_device *adev = to_amdgpu_device(control); + struct amdgpu_smu_i2c_bus *smu_i2c = i2c_get_adapdata(control); + struct amdgpu_device *adev = smu_i2c->adev; uint32_t bytes_received, ret = I2C_OK; bytes_received = 0; @@ -431,7 +440,8 @@ static uint32_t smu_v11_0_i2c_receive(struct i2c_adapter *control, static void smu_v11_0_i2c_abort(struct i2c_adapter *control) { - struct amdgpu_device *adev = to_amdgpu_device(control); + struct amdgpu_smu_i2c_bus *smu_i2c = i2c_get_adapdata(control); + struct amdgpu_device *adev = smu_i2c->adev; uint32_t reg = 0; /* Enable I2C engine; */ @@ -447,7 +457,8 @@ static void smu_v11_0_i2c_abort(struct i2c_adapter *control) static bool smu_v11_0_i2c_activity_done(struct i2c_adapter *control) { - struct amdgpu_device *adev = to_amdgpu_device(control); + struct amdgpu_smu_i2c_bus *smu_i2c = i2c_get_adapdata(control); + struct amdgpu_device *adev = smu_i2c->adev; const uint32_t IDLE_TIMEOUT = 1024; uint32_t timeout_count = 0; @@ -508,7 +519,8 @@ static void smu_v11_0_i2c_init(struct i2c_adapter *control) static void smu_v11_0_i2c_fini(struct i2c_adapter *control) { - struct amdgpu_device *adev = to_amdgpu_device(control); + struct amdgpu_smu_i2c_bus *smu_i2c = i2c_get_adapdata(control); + struct amdgpu_device *adev = smu_i2c->adev; u32 status, enable, en_stat; int res; @@ -543,7 +555,8 @@ static void smu_v11_0_i2c_fini(struct i2c_adapter *control) static bool smu_v11_0_i2c_bus_lock(struct i2c_adapter *control) { - struct amdgpu_device *adev = to_amdgpu_device(control); + struct amdgpu_smu_i2c_bus *smu_i2c = i2c_get_adapdata(control); + struct amdgpu_device *adev = smu_i2c->adev; /* Send PPSMC_MSG_RequestI2CBus */ if (!amdgpu_dpm_smu_i2c_bus_access(adev, true)) @@ -554,7 +567,8 @@ static bool smu_v11_0_i2c_bus_lock(struct i2c_adapter *control) static bool smu_v11_0_i2c_bus_unlock(struct i2c_adapter *control) { - struct amdgpu_device *adev = to_amdgpu_device(control); + struct amdgpu_smu_i2c_bus *smu_i2c = i2c_get_adapdata(control); + struct amdgpu_device *adev = smu_i2c->adev; /* Send PPSMC_MSG_ReleaseI2CBus */ if (!amdgpu_dpm_smu_i2c_bus_access(adev, false)) @@ -587,16 +601,17 @@ static uint32_t smu_v11_0_i2c_write_data(struct i2c_adapter *control, if (ret != I2C_OK) DRM_ERROR("WriteI2CData() - I2C error occurred :%x", ret); - + return ret; } static void lock_bus(struct i2c_adapter *i2c, unsigned int flags) { - struct amdgpu_device *adev = to_amdgpu_device(i2c); + struct amdgpu_smu_i2c_bus *smu_i2c = i2c_get_adapdata(i2c); + struct amdgpu_device *adev = smu_i2c->adev; - mutex_lock(&adev->pm.smu_i2c_mutex); + mutex_lock(&smu_i2c->mutex); if (!smu_v11_0_i2c_bus_lock(i2c)) DRM_ERROR("Failed to lock the bus from SMU"); else @@ -611,13 +626,14 @@ static int trylock_bus(struct i2c_adapter *i2c, unsigned int flags) static void unlock_bus(struct i2c_adapter *i2c, unsigned int flags) { - struct amdgpu_device *adev = to_amdgpu_device(i2c); + struct amdgpu_smu_i2c_bus *smu_i2c = i2c_get_adapdata(i2c); + struct amdgpu_device *adev = smu_i2c->adev; if (!smu_v11_0_i2c_bus_unlock(i2c)) DRM_ERROR("Failed to unlock the bus from SMU"); else adev->pm.bus_locked = false; - mutex_unlock(&adev->pm.smu_i2c_mutex); + mutex_unlock(&smu_i2c->mutex); } static const struct i2c_lock_operations smu_v11_0_i2c_i2c_lock_ops = { @@ -706,19 +722,26 @@ static const struct i2c_adapter_quirks smu_v11_0_i2c_control_quirks = { .flags = I2C_AQ_NO_ZERO_LEN, }; -int smu_v11_0_i2c_control_init(struct i2c_adapter *control) +int smu_v11_0_i2c_control_init(struct amdgpu_device *adev) { - struct amdgpu_device *adev = to_amdgpu_device(control); + struct amdgpu_smu_i2c_bus *smu_i2c = &adev->pm.smu_i2c[0]; + struct i2c_adapter *control = &smu_i2c->adapter; int res; - mutex_init(&adev->pm.smu_i2c_mutex); + smu_i2c->adev = adev; + smu_i2c->port = 0; + mutex_init(&smu_i2c->mutex); control->owner = THIS_MODULE; control->class = I2C_CLASS_HWMON; control->dev.parent = &adev->pdev->dev; control->algo = &smu_v11_0_i2c_algo; - snprintf(control->name, sizeof(control->name), "AMDGPU SMU"); + snprintf(control->name, sizeof(control->name), "AMDGPU SMU 0"); control->lock_ops = &smu_v11_0_i2c_i2c_lock_ops; control->quirks = &smu_v11_0_i2c_control_quirks; + i2c_set_adapdata(control, smu_i2c); + + adev->pm.ras_eeprom_i2c_bus = &adev->pm.smu_i2c[0].adapter; + adev->pm.fru_eeprom_i2c_bus = &adev->pm.smu_i2c[0].adapter; res = i2c_add_adapter(control); if (res) @@ -727,9 +750,13 @@ int smu_v11_0_i2c_control_init(struct i2c_adapter *control) return res; } -void smu_v11_0_i2c_control_fini(struct i2c_adapter *control) +void smu_v11_0_i2c_control_fini(struct amdgpu_device *adev) { + struct i2c_adapter *control = adev->pm.ras_eeprom_i2c_bus; + i2c_del_adapter(control); + adev->pm.ras_eeprom_i2c_bus = NULL; + adev->pm.fru_eeprom_i2c_bus = NULL; } /* diff --git a/drivers/gpu/drm/amd/amdgpu/smu_v11_0_i2c.h b/drivers/gpu/drm/amd/amdgpu/smu_v11_0_i2c.h index 44467c05f642..96ad14288a0c 100644 --- a/drivers/gpu/drm/amd/amdgpu/smu_v11_0_i2c.h +++ b/drivers/gpu/drm/amd/amdgpu/smu_v11_0_i2c.h @@ -26,9 +26,9 @@ #include <linux/types.h> -struct i2c_adapter; +struct amdgpu_device; -int smu_v11_0_i2c_control_init(struct i2c_adapter *control); -void smu_v11_0_i2c_control_fini(struct i2c_adapter *control); +int smu_v11_0_i2c_control_init(struct amdgpu_device *adev); +void smu_v11_0_i2c_control_fini(struct amdgpu_device *adev); #endif diff --git a/drivers/gpu/drm/amd/amdgpu/smuio_v11_0.c b/drivers/gpu/drm/amd/amdgpu/smuio_v11_0.c index b6f1322f908c..acdc40f99ab3 100644 --- a/drivers/gpu/drm/amd/amdgpu/smuio_v11_0.c +++ b/drivers/gpu/drm/amd/amdgpu/smuio_v11_0.c @@ -59,7 +59,7 @@ static void smuio_v11_0_update_rom_clock_gating(struct amdgpu_device *adev, bool WREG32_SOC15(SMUIO, 0, mmCGTT_ROM_CLK_CTRL0, data); } -static void smuio_v11_0_get_clock_gating_state(struct amdgpu_device *adev, u32 *flags) +static void smuio_v11_0_get_clock_gating_state(struct amdgpu_device *adev, u64 *flags) { u32 data; diff --git a/drivers/gpu/drm/amd/amdgpu/smuio_v11_0_6.c b/drivers/gpu/drm/amd/amdgpu/smuio_v11_0_6.c index 3a18dbb55c32..2afeb8b37f62 100644 --- a/drivers/gpu/drm/amd/amdgpu/smuio_v11_0_6.c +++ b/drivers/gpu/drm/amd/amdgpu/smuio_v11_0_6.c @@ -56,7 +56,7 @@ static void smuio_v11_0_6_update_rom_clock_gating(struct amdgpu_device *adev, bo WREG32_SOC15(SMUIO, 0, mmCGTT_ROM_CLK_CTRL0, data); } -static void smuio_v11_0_6_get_clock_gating_state(struct amdgpu_device *adev, u32 *flags) +static void smuio_v11_0_6_get_clock_gating_state(struct amdgpu_device *adev, u64 *flags) { u32 data; diff --git a/drivers/gpu/drm/amd/amdgpu/smuio_v13_0.c b/drivers/gpu/drm/amd/amdgpu/smuio_v13_0.c index 39b7c206770f..13e905c22592 100644 --- a/drivers/gpu/drm/amd/amdgpu/smuio_v13_0.c +++ b/drivers/gpu/drm/amd/amdgpu/smuio_v13_0.c @@ -58,7 +58,7 @@ static void smuio_v13_0_update_rom_clock_gating(struct amdgpu_device *adev, bool WREG32_SOC15(SMUIO, 0, regCGTT_ROM_CLK_CTRL0, data); } -static void smuio_v13_0_get_clock_gating_state(struct amdgpu_device *adev, u32 *flags) +static void smuio_v13_0_get_clock_gating_state(struct amdgpu_device *adev, u64 *flags) { u32 data; diff --git a/drivers/gpu/drm/amd/amdgpu/smuio_v9_0.c b/drivers/gpu/drm/amd/amdgpu/smuio_v9_0.c index 8417890af227..e4e30b9d481b 100644 --- a/drivers/gpu/drm/amd/amdgpu/smuio_v9_0.c +++ b/drivers/gpu/drm/amd/amdgpu/smuio_v9_0.c @@ -56,7 +56,7 @@ static void smuio_v9_0_update_rom_clock_gating(struct amdgpu_device *adev, bool WREG32_SOC15(SMUIO, 0, mmCGTT_ROM_CLK_CTRL0, data); } -static void smuio_v9_0_get_clock_gating_state(struct amdgpu_device *adev, u32 *flags) +static void smuio_v9_0_get_clock_gating_state(struct amdgpu_device *adev, u64 *flags) { u32 data; diff --git a/drivers/gpu/drm/amd/amdgpu/soc15.c b/drivers/gpu/drm/amd/amdgpu/soc15.c index 12f80fdc1fbc..fde6154f2009 100644 --- a/drivers/gpu/drm/amd/amdgpu/soc15.c +++ b/drivers/gpu/drm/amd/amdgpu/soc15.c @@ -375,39 +375,6 @@ static bool soc15_read_disabled_bios(struct amdgpu_device *adev) return false; } -static bool soc15_read_bios_from_rom(struct amdgpu_device *adev, - u8 *bios, u32 length_bytes) -{ - u32 *dw_ptr; - u32 i, length_dw; - uint32_t rom_index_offset; - uint32_t rom_data_offset; - - if (bios == NULL) - return false; - if (length_bytes == 0) - return false; - /* APU vbios image is part of sbios image */ - if (adev->flags & AMD_IS_APU) - return false; - - dw_ptr = (u32 *)bios; - length_dw = ALIGN(length_bytes, 4) / 4; - - rom_index_offset = - adev->smuio.funcs->get_rom_index_offset(adev); - rom_data_offset = - adev->smuio.funcs->get_rom_data_offset(adev); - - /* set rom index to 0 */ - WREG32(rom_index_offset, 0); - /* read out the rom data */ - for (i = 0; i < length_dw; i++) - dw_ptr[i] = RREG32(rom_data_offset); - - return true; -} - static struct soc15_allowed_register_entry soc15_allowed_read_registers[] = { { SOC15_REG_ENTRY(GC, 0, mmGRBM_STATUS)}, { SOC15_REG_ENTRY(GC, 0, mmGRBM_STATUS2)}, @@ -703,7 +670,7 @@ static void soc15_pcie_gen3_enable(struct amdgpu_device *adev) static void soc15_program_aspm(struct amdgpu_device *adev) { - if (!amdgpu_aspm) + if (!amdgpu_device_should_use_aspm(adev)) return; if (!(adev->flags & AMD_IS_APU) && @@ -734,25 +701,12 @@ static uint32_t soc15_get_rev_id(struct amdgpu_device *adev) static void soc15_reg_base_init(struct amdgpu_device *adev) { - int r; - /* Set IP register base before any HW register access */ switch (adev->asic_type) { case CHIP_VEGA10: case CHIP_VEGA12: case CHIP_RAVEN: - vega10_reg_base_init(adev); - break; case CHIP_RENOIR: - /* It's safe to do ip discovery here for Renoir, - * it doesn't support SRIOV. */ - if (amdgpu_discovery) { - r = amdgpu_discovery_reg_base_init(adev); - if (r == 0) - break; - DRM_WARN("failed to init reg base from ip discovery table, " - "fallback to legacy init method\n"); - } vega10_reg_base_init(adev); break; case CHIP_VEGA20: @@ -886,6 +840,10 @@ static bool soc15_need_reset_on_init(struct amdgpu_device *adev) { u32 sol_reg; + /* CP hangs in IGT reloading test on RN, reset to WA */ + if (adev->asic_type == CHIP_RENOIR) + return true; + /* Just return false for soc15 GPUs. Reset does not seem to * be necessary. */ @@ -925,7 +883,7 @@ static void soc15_pre_asic_init(struct amdgpu_device *adev) static const struct amdgpu_asic_funcs soc15_asic_funcs = { .read_disabled_bios = &soc15_read_disabled_bios, - .read_bios_from_rom = &soc15_read_bios_from_rom, + .read_bios_from_rom = &amdgpu_soc15_read_bios_from_rom, .read_register = &soc15_read_register, .reset = &soc15_asic_reset, .reset_method = &soc15_asic_reset_method, @@ -947,7 +905,7 @@ static const struct amdgpu_asic_funcs soc15_asic_funcs = static const struct amdgpu_asic_funcs vega20_asic_funcs = { .read_disabled_bios = &soc15_read_disabled_bios, - .read_bios_from_rom = &soc15_read_bios_from_rom, + .read_bios_from_rom = &amdgpu_soc15_read_bios_from_rom, .read_register = &soc15_read_register, .reset = &soc15_asic_reset, .reset_method = &soc15_asic_reset_method, @@ -1222,16 +1180,11 @@ static int soc15_common_early_init(void *handle) static int soc15_common_late_init(void *handle) { struct amdgpu_device *adev = (struct amdgpu_device *)handle; - int r = 0; if (amdgpu_sriov_vf(adev)) xgpu_ai_mailbox_get_irq(adev); - if (adev->nbio.ras_funcs && - adev->nbio.ras_funcs->ras_late_init) - r = adev->nbio.ras_funcs->ras_late_init(adev); - - return r; + return 0; } static int soc15_common_sw_init(void *handle) @@ -1252,10 +1205,6 @@ static int soc15_common_sw_fini(void *handle) { struct amdgpu_device *adev = (struct amdgpu_device *)handle; - if (adev->nbio.ras_funcs && - adev->nbio.ras_funcs->ras_fini) - adev->nbio.ras_funcs->ras_fini(adev); - if (adev->df.funcs && adev->df.funcs->sw_fini) adev->df.funcs->sw_fini(adev); @@ -1321,11 +1270,11 @@ static int soc15_common_hw_fini(void *handle) if (adev->nbio.ras_if && amdgpu_ras_is_supported(adev, adev->nbio.ras_if->block)) { - if (adev->nbio.ras_funcs && - adev->nbio.ras_funcs->init_ras_controller_interrupt) + if (adev->nbio.ras && + adev->nbio.ras->init_ras_controller_interrupt) amdgpu_irq_put(adev, &adev->nbio.ras_controller_irq, 0); - if (adev->nbio.ras_funcs && - adev->nbio.ras_funcs->init_ras_err_event_athub_interrupt) + if (adev->nbio.ras && + adev->nbio.ras->init_ras_err_event_athub_interrupt) amdgpu_irq_put(adev, &adev->nbio.ras_err_event_athub_irq, 0); } @@ -1457,7 +1406,7 @@ static int soc15_common_set_clockgating_state(void *handle, return 0; } -static void soc15_common_get_clockgating_state(void *handle, u32 *flags) +static void soc15_common_get_clockgating_state(void *handle, u64 *flags) { struct amdgpu_device *adev = (struct amdgpu_device *)handle; int data; diff --git a/drivers/gpu/drm/amd/amdgpu/soc15_common.h b/drivers/gpu/drm/amd/amdgpu/soc15_common.h index 473767e03676..9fefd403e14f 100644 --- a/drivers/gpu/drm/amd/amdgpu/soc15_common.h +++ b/drivers/gpu/drm/amd/amdgpu/soc15_common.h @@ -28,13 +28,13 @@ #define SOC15_REG_OFFSET(ip, inst, reg) (adev->reg_offset[ip##_HWIP][inst][reg##_BASE_IDX] + reg) #define __WREG32_SOC15_RLC__(reg, value, flag, hwip) \ - ((amdgpu_sriov_vf(adev) && adev->gfx.rlc.funcs && adev->gfx.rlc.funcs->sriov_wreg) ? \ - adev->gfx.rlc.funcs->sriov_wreg(adev, reg, value, flag, hwip) : \ + ((amdgpu_sriov_vf(adev) && adev->gfx.rlc.funcs && adev->gfx.rlc.rlcg_reg_access_supported) ? \ + amdgpu_sriov_wreg(adev, reg, value, flag, hwip) : \ WREG32(reg, value)) #define __RREG32_SOC15_RLC__(reg, flag, hwip) \ - ((amdgpu_sriov_vf(adev) && adev->gfx.rlc.funcs && adev->gfx.rlc.funcs->sriov_rreg) ? \ - adev->gfx.rlc.funcs->sriov_rreg(adev, reg, flag, hwip) : \ + ((amdgpu_sriov_vf(adev) && adev->gfx.rlc.funcs && adev->gfx.rlc.rlcg_reg_access_supported) ? \ + amdgpu_sriov_rreg(adev, reg, flag, hwip) : \ RREG32(reg)) #define WREG32_FIELD15(ip, idx, reg, field, val) \ @@ -45,6 +45,14 @@ ~REG_FIELD_MASK(reg, field)) | (val) << REG_FIELD_SHIFT(reg, field), \ 0, ip##_HWIP) +#define WREG32_FIELD15_PREREG(ip, idx, reg_name, field, val) \ + __WREG32_SOC15_RLC__(adev->reg_offset[ip##_HWIP][idx][reg##reg_name##_BASE_IDX] + reg##reg_name, \ + (__RREG32_SOC15_RLC__( \ + adev->reg_offset[ip##_HWIP][idx][reg##reg_name##_BASE_IDX] + reg##reg_name, \ + 0, ip##_HWIP) & \ + ~REG_FIELD_MASK(reg_name, field)) | (val) << REG_FIELD_SHIFT(reg_name, field), \ + 0, ip##_HWIP) + #define RREG32_SOC15(ip, inst, reg) \ __RREG32_SOC15_RLC__(adev->reg_offset[ip##_HWIP][inst][reg##_BASE_IDX] + reg, \ 0, ip##_HWIP) diff --git a/drivers/gpu/drm/amd/amdgpu/soc21.c b/drivers/gpu/drm/amd/amdgpu/soc21.c new file mode 100644 index 000000000000..a139fd1d3127 --- /dev/null +++ b/drivers/gpu/drm/amd/amdgpu/soc21.c @@ -0,0 +1,620 @@ +/* + * Copyright 2021 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + */ +#include <linux/firmware.h> +#include <linux/slab.h> +#include <linux/module.h> +#include <linux/pci.h> + +#include "amdgpu.h" +#include "amdgpu_atombios.h" +#include "amdgpu_ih.h" +#include "amdgpu_uvd.h" +#include "amdgpu_vce.h" +#include "amdgpu_ucode.h" +#include "amdgpu_psp.h" +#include "amdgpu_smu.h" +#include "atom.h" +#include "amd_pcie.h" + +#include "gc/gc_11_0_0_offset.h" +#include "gc/gc_11_0_0_sh_mask.h" +#include "mp/mp_13_0_0_offset.h" + +#include "soc15.h" +#include "soc15_common.h" + +static const struct amd_ip_funcs soc21_common_ip_funcs; + +/* + * Indirect registers accessor + */ +static u32 soc21_pcie_rreg(struct amdgpu_device *adev, u32 reg) +{ + unsigned long address, data; + address = adev->nbio.funcs->get_pcie_index_offset(adev); + data = adev->nbio.funcs->get_pcie_data_offset(adev); + + return amdgpu_device_indirect_rreg(adev, address, data, reg); +} + +static void soc21_pcie_wreg(struct amdgpu_device *adev, u32 reg, u32 v) +{ + unsigned long address, data; + + address = adev->nbio.funcs->get_pcie_index_offset(adev); + data = adev->nbio.funcs->get_pcie_data_offset(adev); + + amdgpu_device_indirect_wreg(adev, address, data, reg, v); +} + +static u64 soc21_pcie_rreg64(struct amdgpu_device *adev, u32 reg) +{ + unsigned long address, data; + address = adev->nbio.funcs->get_pcie_index_offset(adev); + data = adev->nbio.funcs->get_pcie_data_offset(adev); + + return amdgpu_device_indirect_rreg64(adev, address, data, reg); +} + +static void soc21_pcie_wreg64(struct amdgpu_device *adev, u32 reg, u64 v) +{ + unsigned long address, data; + + address = adev->nbio.funcs->get_pcie_index_offset(adev); + data = adev->nbio.funcs->get_pcie_data_offset(adev); + + amdgpu_device_indirect_wreg64(adev, address, data, reg, v); +} + +static u32 soc21_didt_rreg(struct amdgpu_device *adev, u32 reg) +{ + unsigned long flags, address, data; + u32 r; + + address = SOC15_REG_OFFSET(GC, 0, regDIDT_IND_INDEX); + data = SOC15_REG_OFFSET(GC, 0, regDIDT_IND_DATA); + + spin_lock_irqsave(&adev->didt_idx_lock, flags); + WREG32(address, (reg)); + r = RREG32(data); + spin_unlock_irqrestore(&adev->didt_idx_lock, flags); + return r; +} + +static void soc21_didt_wreg(struct amdgpu_device *adev, u32 reg, u32 v) +{ + unsigned long flags, address, data; + + address = SOC15_REG_OFFSET(GC, 0, regDIDT_IND_INDEX); + data = SOC15_REG_OFFSET(GC, 0, regDIDT_IND_DATA); + + spin_lock_irqsave(&adev->didt_idx_lock, flags); + WREG32(address, (reg)); + WREG32(data, (v)); + spin_unlock_irqrestore(&adev->didt_idx_lock, flags); +} + +static u32 soc21_get_config_memsize(struct amdgpu_device *adev) +{ + return adev->nbio.funcs->get_memsize(adev); +} + +static u32 soc21_get_xclk(struct amdgpu_device *adev) +{ + return adev->clock.spll.reference_freq; +} + + +void soc21_grbm_select(struct amdgpu_device *adev, + u32 me, u32 pipe, u32 queue, u32 vmid) +{ + u32 grbm_gfx_cntl = 0; + grbm_gfx_cntl = REG_SET_FIELD(grbm_gfx_cntl, GRBM_GFX_CNTL, PIPEID, pipe); + grbm_gfx_cntl = REG_SET_FIELD(grbm_gfx_cntl, GRBM_GFX_CNTL, MEID, me); + grbm_gfx_cntl = REG_SET_FIELD(grbm_gfx_cntl, GRBM_GFX_CNTL, VMID, vmid); + grbm_gfx_cntl = REG_SET_FIELD(grbm_gfx_cntl, GRBM_GFX_CNTL, QUEUEID, queue); + + WREG32(SOC15_REG_OFFSET(GC, 0, regGRBM_GFX_CNTL), grbm_gfx_cntl); +} + +static void soc21_vga_set_state(struct amdgpu_device *adev, bool state) +{ + /* todo */ +} + +static bool soc21_read_disabled_bios(struct amdgpu_device *adev) +{ + /* todo */ + return false; +} + +static struct soc15_allowed_register_entry soc21_allowed_read_registers[] = { + { SOC15_REG_ENTRY(GC, 0, regGRBM_STATUS)}, + { SOC15_REG_ENTRY(GC, 0, regGRBM_STATUS2)}, + { SOC15_REG_ENTRY(GC, 0, regGRBM_STATUS_SE0)}, + { SOC15_REG_ENTRY(GC, 0, regGRBM_STATUS_SE1)}, + { SOC15_REG_ENTRY(GC, 0, regGRBM_STATUS_SE2)}, + { SOC15_REG_ENTRY(GC, 0, regGRBM_STATUS_SE3)}, + { SOC15_REG_ENTRY(SDMA0, 0, regSDMA0_STATUS_REG)}, + { SOC15_REG_ENTRY(SDMA1, 0, regSDMA1_STATUS_REG)}, + { SOC15_REG_ENTRY(GC, 0, regCP_STAT)}, + { SOC15_REG_ENTRY(GC, 0, regCP_STALLED_STAT1)}, + { SOC15_REG_ENTRY(GC, 0, regCP_STALLED_STAT2)}, + { SOC15_REG_ENTRY(GC, 0, regCP_STALLED_STAT3)}, + { SOC15_REG_ENTRY(GC, 0, regCP_CPF_BUSY_STAT)}, + { SOC15_REG_ENTRY(GC, 0, regCP_CPF_STALLED_STAT1)}, + { SOC15_REG_ENTRY(GC, 0, regCP_CPF_STATUS)}, + { SOC15_REG_ENTRY(GC, 0, regCP_CPC_BUSY_STAT)}, + { SOC15_REG_ENTRY(GC, 0, regCP_CPC_STALLED_STAT1)}, + { SOC15_REG_ENTRY(GC, 0, regCP_CPC_STATUS)}, + { SOC15_REG_ENTRY(GC, 0, regGB_ADDR_CONFIG)}, +}; + +static uint32_t soc21_read_indexed_register(struct amdgpu_device *adev, u32 se_num, + u32 sh_num, u32 reg_offset) +{ + uint32_t val; + + mutex_lock(&adev->grbm_idx_mutex); + if (se_num != 0xffffffff || sh_num != 0xffffffff) + amdgpu_gfx_select_se_sh(adev, se_num, sh_num, 0xffffffff); + + val = RREG32(reg_offset); + + if (se_num != 0xffffffff || sh_num != 0xffffffff) + amdgpu_gfx_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff); + mutex_unlock(&adev->grbm_idx_mutex); + return val; +} + +static uint32_t soc21_get_register_value(struct amdgpu_device *adev, + bool indexed, u32 se_num, + u32 sh_num, u32 reg_offset) +{ + if (indexed) { + return soc21_read_indexed_register(adev, se_num, sh_num, reg_offset); + } else { + if (reg_offset == SOC15_REG_OFFSET(GC, 0, regGB_ADDR_CONFIG) && adev->gfx.config.gb_addr_config) + return adev->gfx.config.gb_addr_config; + return RREG32(reg_offset); + } +} + +static int soc21_read_register(struct amdgpu_device *adev, u32 se_num, + u32 sh_num, u32 reg_offset, u32 *value) +{ + uint32_t i; + struct soc15_allowed_register_entry *en; + + *value = 0; + for (i = 0; i < ARRAY_SIZE(soc21_allowed_read_registers); i++) { + en = &soc21_allowed_read_registers[i]; + if (reg_offset != + (adev->reg_offset[en->hwip][en->inst][en->seg] + en->reg_offset)) + continue; + + *value = soc21_get_register_value(adev, + soc21_allowed_read_registers[i].grbm_indexed, + se_num, sh_num, reg_offset); + return 0; + } + return -EINVAL; +} + +#if 0 +static int soc21_asic_mode1_reset(struct amdgpu_device *adev) +{ + u32 i; + int ret = 0; + + amdgpu_atombios_scratch_regs_engine_hung(adev, true); + + /* disable BM */ + pci_clear_master(adev->pdev); + + amdgpu_device_cache_pci_state(adev->pdev); + + if (amdgpu_dpm_is_mode1_reset_supported(adev)) { + dev_info(adev->dev, "GPU smu mode1 reset\n"); + ret = amdgpu_dpm_mode1_reset(adev); + } else { + dev_info(adev->dev, "GPU psp mode1 reset\n"); + ret = psp_gpu_reset(adev); + } + + if (ret) + dev_err(adev->dev, "GPU mode1 reset failed\n"); + amdgpu_device_load_pci_state(adev->pdev); + + /* wait for asic to come out of reset */ + for (i = 0; i < adev->usec_timeout; i++) { + u32 memsize = adev->nbio.funcs->get_memsize(adev); + + if (memsize != 0xffffffff) + break; + udelay(1); + } + + amdgpu_atombios_scratch_regs_engine_hung(adev, false); + + return ret; +} +#endif + +static enum amd_reset_method +soc21_asic_reset_method(struct amdgpu_device *adev) +{ + if (amdgpu_reset_method == AMD_RESET_METHOD_MODE1 || + amdgpu_reset_method == AMD_RESET_METHOD_BACO) + return amdgpu_reset_method; + + if (amdgpu_reset_method != -1) + dev_warn(adev->dev, "Specified reset method:%d isn't supported, using AUTO instead.\n", + amdgpu_reset_method); + + switch (adev->ip_versions[MP1_HWIP][0]) { + case IP_VERSION(13, 0, 0): + return AMD_RESET_METHOD_MODE1; + default: + if (amdgpu_dpm_is_baco_supported(adev)) + return AMD_RESET_METHOD_BACO; + else + return AMD_RESET_METHOD_MODE1; + } +} + +static int soc21_asic_reset(struct amdgpu_device *adev) +{ + int ret = 0; + + switch (soc21_asic_reset_method(adev)) { + case AMD_RESET_METHOD_PCI: + dev_info(adev->dev, "PCI reset\n"); + ret = amdgpu_device_pci_reset(adev); + break; + case AMD_RESET_METHOD_BACO: + dev_info(adev->dev, "BACO reset\n"); + ret = amdgpu_dpm_baco_reset(adev); + break; + default: + dev_info(adev->dev, "MODE1 reset\n"); + ret = amdgpu_device_mode1_reset(adev); + break; + } + + return ret; +} + +static int soc21_set_uvd_clocks(struct amdgpu_device *adev, u32 vclk, u32 dclk) +{ + /* todo */ + return 0; +} + +static int soc21_set_vce_clocks(struct amdgpu_device *adev, u32 evclk, u32 ecclk) +{ + /* todo */ + return 0; +} + +static void soc21_pcie_gen3_enable(struct amdgpu_device *adev) +{ + if (pci_is_root_bus(adev->pdev->bus)) + return; + + if (amdgpu_pcie_gen2 == 0) + return; + + if (!(adev->pm.pcie_gen_mask & (CAIL_PCIE_LINK_SPEED_SUPPORT_GEN2 | + CAIL_PCIE_LINK_SPEED_SUPPORT_GEN3))) + return; + + /* todo */ +} + +static void soc21_program_aspm(struct amdgpu_device *adev) +{ + + if (amdgpu_aspm == 0) + return; + + /* todo */ +} + +static void soc21_enable_doorbell_aperture(struct amdgpu_device *adev, + bool enable) +{ + adev->nbio.funcs->enable_doorbell_aperture(adev, enable); + adev->nbio.funcs->enable_doorbell_selfring_aperture(adev, enable); +} + +const struct amdgpu_ip_block_version soc21_common_ip_block = +{ + .type = AMD_IP_BLOCK_TYPE_COMMON, + .major = 1, + .minor = 0, + .rev = 0, + .funcs = &soc21_common_ip_funcs, +}; + +static uint32_t soc21_get_rev_id(struct amdgpu_device *adev) +{ + return adev->nbio.funcs->get_rev_id(adev); +} + +static bool soc21_need_full_reset(struct amdgpu_device *adev) +{ + return true; +} + +static bool soc21_need_reset_on_init(struct amdgpu_device *adev) +{ + u32 sol_reg; + + if (adev->flags & AMD_IS_APU) + return false; + + /* Check sOS sign of life register to confirm sys driver and sOS + * are already been loaded. + */ + sol_reg = RREG32_SOC15(MP0, 0, regMP0_SMN_C2PMSG_81); + if (sol_reg) + return true; + + return false; +} + +static uint64_t soc21_get_pcie_replay_count(struct amdgpu_device *adev) +{ + + /* TODO + * dummy implement for pcie_replay_count sysfs interface + * */ + + return 0; +} + +static void soc21_init_doorbell_index(struct amdgpu_device *adev) +{ + adev->doorbell_index.kiq = AMDGPU_NAVI10_DOORBELL_KIQ; + adev->doorbell_index.mec_ring0 = AMDGPU_NAVI10_DOORBELL_MEC_RING0; + adev->doorbell_index.mec_ring1 = AMDGPU_NAVI10_DOORBELL_MEC_RING1; + adev->doorbell_index.mec_ring2 = AMDGPU_NAVI10_DOORBELL_MEC_RING2; + adev->doorbell_index.mec_ring3 = AMDGPU_NAVI10_DOORBELL_MEC_RING3; + adev->doorbell_index.mec_ring4 = AMDGPU_NAVI10_DOORBELL_MEC_RING4; + adev->doorbell_index.mec_ring5 = AMDGPU_NAVI10_DOORBELL_MEC_RING5; + adev->doorbell_index.mec_ring6 = AMDGPU_NAVI10_DOORBELL_MEC_RING6; + adev->doorbell_index.mec_ring7 = AMDGPU_NAVI10_DOORBELL_MEC_RING7; + adev->doorbell_index.userqueue_start = AMDGPU_NAVI10_DOORBELL_USERQUEUE_START; + adev->doorbell_index.userqueue_end = AMDGPU_NAVI10_DOORBELL_USERQUEUE_END; + adev->doorbell_index.gfx_ring0 = AMDGPU_NAVI10_DOORBELL_GFX_RING0; + adev->doorbell_index.gfx_ring1 = AMDGPU_NAVI10_DOORBELL_GFX_RING1; + adev->doorbell_index.mes_ring = AMDGPU_NAVI10_DOORBELL_MES_RING; + adev->doorbell_index.sdma_engine[0] = AMDGPU_NAVI10_DOORBELL_sDMA_ENGINE0; + adev->doorbell_index.sdma_engine[1] = AMDGPU_NAVI10_DOORBELL_sDMA_ENGINE1; + adev->doorbell_index.ih = AMDGPU_NAVI10_DOORBELL_IH; + adev->doorbell_index.vcn.vcn_ring0_1 = AMDGPU_NAVI10_DOORBELL64_VCN0_1; + adev->doorbell_index.vcn.vcn_ring2_3 = AMDGPU_NAVI10_DOORBELL64_VCN2_3; + adev->doorbell_index.vcn.vcn_ring4_5 = AMDGPU_NAVI10_DOORBELL64_VCN4_5; + adev->doorbell_index.vcn.vcn_ring6_7 = AMDGPU_NAVI10_DOORBELL64_VCN6_7; + adev->doorbell_index.first_non_cp = AMDGPU_NAVI10_DOORBELL64_FIRST_NON_CP; + adev->doorbell_index.last_non_cp = AMDGPU_NAVI10_DOORBELL64_LAST_NON_CP; + + adev->doorbell_index.max_assignment = AMDGPU_NAVI10_DOORBELL_MAX_ASSIGNMENT << 1; + adev->doorbell_index.sdma_doorbell_range = 20; +} + +static void soc21_pre_asic_init(struct amdgpu_device *adev) +{ +} + +static const struct amdgpu_asic_funcs soc21_asic_funcs = +{ + .read_disabled_bios = &soc21_read_disabled_bios, + .read_bios_from_rom = &amdgpu_soc15_read_bios_from_rom, + .read_register = &soc21_read_register, + .reset = &soc21_asic_reset, + .reset_method = &soc21_asic_reset_method, + .set_vga_state = &soc21_vga_set_state, + .get_xclk = &soc21_get_xclk, + .set_uvd_clocks = &soc21_set_uvd_clocks, + .set_vce_clocks = &soc21_set_vce_clocks, + .get_config_memsize = &soc21_get_config_memsize, + .init_doorbell_index = &soc21_init_doorbell_index, + .need_full_reset = &soc21_need_full_reset, + .need_reset_on_init = &soc21_need_reset_on_init, + .get_pcie_replay_count = &soc21_get_pcie_replay_count, + .supports_baco = &amdgpu_dpm_is_baco_supported, + .pre_asic_init = &soc21_pre_asic_init, +}; + +static int soc21_common_early_init(void *handle) +{ +#define MMIO_REG_HOLE_OFFSET (0x80000 - PAGE_SIZE) + struct amdgpu_device *adev = (struct amdgpu_device *)handle; + + adev->rmmio_remap.reg_offset = MMIO_REG_HOLE_OFFSET; + adev->rmmio_remap.bus_addr = adev->rmmio_base + MMIO_REG_HOLE_OFFSET; + adev->smc_rreg = NULL; + adev->smc_wreg = NULL; + adev->pcie_rreg = &soc21_pcie_rreg; + adev->pcie_wreg = &soc21_pcie_wreg; + adev->pcie_rreg64 = &soc21_pcie_rreg64; + adev->pcie_wreg64 = &soc21_pcie_wreg64; + + /* TODO: will add them during VCN v2 implementation */ + adev->uvd_ctx_rreg = NULL; + adev->uvd_ctx_wreg = NULL; + + adev->didt_rreg = &soc21_didt_rreg; + adev->didt_wreg = &soc21_didt_wreg; + + adev->asic_funcs = &soc21_asic_funcs; + + adev->rev_id = soc21_get_rev_id(adev); + adev->external_rev_id = 0xff; + switch (adev->ip_versions[GC_HWIP][0]) { + case IP_VERSION(11, 0, 0): + adev->cg_flags = 0; + adev->pg_flags = 0; + adev->external_rev_id = adev->rev_id + 0x1; // TODO: need update + break; + default: + /* FIXME: not supported yet */ + return -EINVAL; + } + + return 0; +} + +static int soc21_common_late_init(void *handle) +{ + return 0; +} + +static int soc21_common_sw_init(void *handle) +{ + return 0; +} + +static int soc21_common_sw_fini(void *handle) +{ + return 0; +} + +static int soc21_common_hw_init(void *handle) +{ + struct amdgpu_device *adev = (struct amdgpu_device *)handle; + + /* enable pcie gen2/3 link */ + soc21_pcie_gen3_enable(adev); + /* enable aspm */ + soc21_program_aspm(adev); + /* setup nbio registers */ + adev->nbio.funcs->init_registers(adev); + /* remap HDP registers to a hole in mmio space, + * for the purpose of expose those registers + * to process space + */ + if (adev->nbio.funcs->remap_hdp_registers) + adev->nbio.funcs->remap_hdp_registers(adev); + /* enable the doorbell aperture */ + soc21_enable_doorbell_aperture(adev, true); + + return 0; +} + +static int soc21_common_hw_fini(void *handle) +{ + struct amdgpu_device *adev = (struct amdgpu_device *)handle; + + /* disable the doorbell aperture */ + soc21_enable_doorbell_aperture(adev, false); + + return 0; +} + +static int soc21_common_suspend(void *handle) +{ + struct amdgpu_device *adev = (struct amdgpu_device *)handle; + + return soc21_common_hw_fini(adev); +} + +static int soc21_common_resume(void *handle) +{ + struct amdgpu_device *adev = (struct amdgpu_device *)handle; + + return soc21_common_hw_init(adev); +} + +static bool soc21_common_is_idle(void *handle) +{ + return true; +} + +static int soc21_common_wait_for_idle(void *handle) +{ + return 0; +} + +static int soc21_common_soft_reset(void *handle) +{ + return 0; +} + +static int soc21_common_set_clockgating_state(void *handle, + enum amd_clockgating_state state) +{ + struct amdgpu_device *adev = (struct amdgpu_device *)handle; + + switch (adev->ip_versions[NBIO_HWIP][0]) { + case IP_VERSION(4, 3, 0): + adev->nbio.funcs->update_medium_grain_clock_gating(adev, + state == AMD_CG_STATE_GATE); + adev->nbio.funcs->update_medium_grain_light_sleep(adev, + state == AMD_CG_STATE_GATE); + adev->hdp.funcs->update_clock_gating(adev, + state == AMD_CG_STATE_GATE); + break; + default: + break; + } + return 0; +} + +static int soc21_common_set_powergating_state(void *handle, + enum amd_powergating_state state) +{ + /* TODO */ + return 0; +} + +static void soc21_common_get_clockgating_state(void *handle, u64 *flags) +{ + struct amdgpu_device *adev = (struct amdgpu_device *)handle; + + adev->nbio.funcs->get_clockgating_state(adev, flags); + + adev->hdp.funcs->get_clock_gating_state(adev, flags); + + return; +} + +static const struct amd_ip_funcs soc21_common_ip_funcs = { + .name = "soc21_common", + .early_init = soc21_common_early_init, + .late_init = soc21_common_late_init, + .sw_init = soc21_common_sw_init, + .sw_fini = soc21_common_sw_fini, + .hw_init = soc21_common_hw_init, + .hw_fini = soc21_common_hw_fini, + .suspend = soc21_common_suspend, + .resume = soc21_common_resume, + .is_idle = soc21_common_is_idle, + .wait_for_idle = soc21_common_wait_for_idle, + .soft_reset = soc21_common_soft_reset, + .set_clockgating_state = soc21_common_set_clockgating_state, + .set_powergating_state = soc21_common_set_powergating_state, + .get_clockgating_state = soc21_common_get_clockgating_state, +}; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_hdp.c b/drivers/gpu/drm/amd/amdgpu/soc21.h index a766e1aad2b9..4c8067af1b65 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_hdp.c +++ b/drivers/gpu/drm/amd/amdgpu/soc21.h @@ -20,49 +20,11 @@ * OTHER DEALINGS IN THE SOFTWARE. * */ +#ifndef __SOC21_H__ +#define __SOC21_H__ -#include "amdgpu.h" -#include "amdgpu_ras.h" +extern const struct amdgpu_ip_block_version soc21_common_ip_block; -int amdgpu_hdp_ras_late_init(struct amdgpu_device *adev) -{ - int r; - struct ras_ih_if ih_info = { - .cb = NULL, - }; - struct ras_fs_if fs_info = { - .sysfs_name = "hdp_err_count", - }; - - if (!adev->hdp.ras_if) { - adev->hdp.ras_if = kmalloc(sizeof(struct ras_common_if), GFP_KERNEL); - if (!adev->hdp.ras_if) - return -ENOMEM; - adev->hdp.ras_if->block = AMDGPU_RAS_BLOCK__HDP; - adev->hdp.ras_if->type = AMDGPU_RAS_ERROR__MULTI_UNCORRECTABLE; - adev->hdp.ras_if->sub_block_index = 0; - } - ih_info.head = fs_info.head = *adev->hdp.ras_if; - r = amdgpu_ras_late_init(adev, adev->hdp.ras_if, - &fs_info, &ih_info); - if (r || !amdgpu_ras_is_supported(adev, adev->hdp.ras_if->block)) { - kfree(adev->hdp.ras_if); - adev->hdp.ras_if = NULL; - } - - return r; -} - -void amdgpu_hdp_ras_fini(struct amdgpu_device *adev) -{ - if (amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__HDP) && - adev->hdp.ras_if) { - struct ras_common_if *ras_if = adev->hdp.ras_if; - struct ras_ih_if ih_info = { - .cb = NULL, - }; - - amdgpu_ras_late_fini(adev, ras_if, &ih_info); - kfree(ras_if); - } -} +void soc21_grbm_select(struct amdgpu_device *adev, + u32 me, u32 pipe, u32 queue, u32 vmid); +#endif diff --git a/drivers/gpu/drm/amd/amdgpu/ta_ras_if.h b/drivers/gpu/drm/amd/amdgpu/ta_ras_if.h index 5093826a43d1..509d8a1945eb 100644 --- a/drivers/gpu/drm/amd/amdgpu/ta_ras_if.h +++ b/drivers/gpu/drm/amd/amdgpu/ta_ras_if.h @@ -64,7 +64,8 @@ enum ta_ras_status { TA_RAS_STATUS__ERROR_PCS_STATE_ERROR = 0xA016, TA_RAS_STATUS__ERROR_PCS_STATE_HANG = 0xA017, TA_RAS_STATUS__ERROR_PCS_STATE_UNKNOWN = 0xA018, - TA_RAS_STATUS__ERROR_UNSUPPORTED_ERROR_INJ = 0xA019 + TA_RAS_STATUS__ERROR_UNSUPPORTED_ERROR_INJ = 0xA019, + TA_RAS_STATUS__TEE_ERROR_ACCESS_DENIED = 0xA01A }; enum ta_ras_block { diff --git a/drivers/gpu/drm/amd/amdgpu/umc_v6_1.c b/drivers/gpu/drm/amd/amdgpu/umc_v6_1.c index 20b44983ac94..939cb203f7ad 100644 --- a/drivers/gpu/drm/amd/amdgpu/umc_v6_1.c +++ b/drivers/gpu/drm/amd/amdgpu/umc_v6_1.c @@ -300,7 +300,6 @@ static void umc_v6_1_query_error_address(struct amdgpu_device *adev, { uint32_t lsb, mc_umc_status_addr; uint64_t mc_umc_status, err_addr, retired_page, mc_umc_addrt0; - struct eeprom_table_record *err_rec; uint32_t channel_index = adev->umc.channel_idx_tbl[umc_inst * adev->umc.channel_inst_num + ch_inst]; if (adev->asic_type == CHIP_ARCTURUS) { @@ -328,8 +327,6 @@ static void umc_v6_1_query_error_address(struct amdgpu_device *adev, return; } - err_rec = &err_data->err_addr[err_data->err_addr_cnt]; - /* calculate error address if ue/ce error is detected */ if (REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, Val) == 1 && (REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, UECC) == 1 || @@ -348,18 +345,9 @@ static void umc_v6_1_query_error_address(struct amdgpu_device *adev, /* we only save ue error information currently, ce is skipped */ if (REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, UECC) - == 1) { - err_rec->address = err_addr; - /* page frame address is saved */ - err_rec->retired_page = retired_page >> AMDGPU_GPU_PAGE_SHIFT; - err_rec->ts = (uint64_t)ktime_get_real_seconds(); - err_rec->err_type = AMDGPU_RAS_EEPROM_ERR_NON_RECOVERABLE; - err_rec->cu = 0; - err_rec->mem_channel = channel_index; - err_rec->mcumc_id = umc_inst; - - err_data->err_addr_cnt++; - } + == 1) + amdgpu_umc_fill_error_record(err_data, err_addr, + retired_page, channel_index, umc_inst); } /* clear umc status */ @@ -465,10 +453,14 @@ static void umc_v6_1_err_cnt_init(struct amdgpu_device *adev) umc_v6_1_enable_umc_index_mode(adev); } -const struct amdgpu_umc_ras_funcs umc_v6_1_ras_funcs = { - .err_cnt_init = umc_v6_1_err_cnt_init, - .ras_late_init = amdgpu_umc_ras_late_init, - .ras_fini = amdgpu_umc_ras_fini, +const struct amdgpu_ras_block_hw_ops umc_v6_1_ras_hw_ops = { .query_ras_error_count = umc_v6_1_query_ras_error_count, .query_ras_error_address = umc_v6_1_query_ras_error_address, }; + +struct amdgpu_umc_ras umc_v6_1_ras = { + .ras_block = { + .hw_ops = &umc_v6_1_ras_hw_ops, + }, + .err_cnt_init = umc_v6_1_err_cnt_init, +};
\ No newline at end of file diff --git a/drivers/gpu/drm/amd/amdgpu/umc_v6_1.h b/drivers/gpu/drm/amd/amdgpu/umc_v6_1.h index 5dc36c730bb2..50c632eb4cc6 100644 --- a/drivers/gpu/drm/amd/amdgpu/umc_v6_1.h +++ b/drivers/gpu/drm/amd/amdgpu/umc_v6_1.h @@ -45,7 +45,7 @@ /* umc ce count initial value */ #define UMC_V6_1_CE_CNT_INIT (UMC_V6_1_CE_CNT_MAX - UMC_V6_1_CE_INT_THRESHOLD) -extern const struct amdgpu_umc_ras_funcs umc_v6_1_ras_funcs; +extern struct amdgpu_umc_ras umc_v6_1_ras; extern const uint32_t umc_v6_1_channel_idx_tbl[UMC_V6_1_UMC_INSTANCE_NUM][UMC_V6_1_CHANNEL_INSTANCE_NUM]; diff --git a/drivers/gpu/drm/amd/amdgpu/umc_v6_7.c b/drivers/gpu/drm/amd/amdgpu/umc_v6_7.c index 6dd1e19e8d43..606892dbea1c 100644 --- a/drivers/gpu/drm/amd/amdgpu/umc_v6_7.c +++ b/drivers/gpu/drm/amd/amdgpu/umc_v6_7.c @@ -47,6 +47,13 @@ static inline uint32_t get_umc_v6_7_reg_offset(struct amdgpu_device *adev, uint32_t umc_inst, uint32_t ch_inst) { + uint32_t index = umc_inst * adev->umc.channel_inst_num + ch_inst; + + /* adjust umc and channel index offset, + * the register address is not linear on each umc instace */ + umc_inst = index / 4; + ch_inst = index % 4; + return adev->umc.channel_offs * ch_inst + UMC_V6_7_INST_DIST * umc_inst; } @@ -57,50 +64,89 @@ static inline uint32_t get_umc_v6_7_channel_index(struct amdgpu_device *adev, return adev->umc.channel_idx_tbl[umc_inst * adev->umc.channel_inst_num + ch_inst]; } +static void umc_v6_7_query_error_status_helper(struct amdgpu_device *adev, + uint64_t mc_umc_status, uint32_t umc_reg_offset) +{ + uint32_t mc_umc_addr; + uint64_t reg_value; + + if (REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, Deferred) == 1) + dev_info(adev->dev, "Deferred error, no user action is needed.\n"); + + if (mc_umc_status) + dev_info(adev->dev, "MCA STATUS 0x%llx, umc_reg_offset 0x%x\n", mc_umc_status, umc_reg_offset); + + /* print IPID registers value */ + mc_umc_addr = + SOC15_REG_OFFSET(UMC, 0, regMCA_UMC_UMC0_MCUMC_IPIDT0); + reg_value = RREG64_PCIE((mc_umc_addr + umc_reg_offset) * 4); + if (reg_value) + dev_info(adev->dev, "MCA IPID 0x%llx, umc_reg_offset 0x%x\n", reg_value, umc_reg_offset); + + /* print SYND registers value */ + mc_umc_addr = + SOC15_REG_OFFSET(UMC, 0, regMCA_UMC_UMC0_MCUMC_SYNDT0); + reg_value = RREG64_PCIE((mc_umc_addr + umc_reg_offset) * 4); + if (reg_value) + dev_info(adev->dev, "MCA SYND 0x%llx, umc_reg_offset 0x%x\n", reg_value, umc_reg_offset); + + /* print MISC0 registers value */ + mc_umc_addr = + SOC15_REG_OFFSET(UMC, 0, regMCA_UMC_UMC0_MCUMC_MISC0T0); + reg_value = RREG64_PCIE((mc_umc_addr + umc_reg_offset) * 4); + if (reg_value) + dev_info(adev->dev, "MCA MISC0 0x%llx, umc_reg_offset 0x%x\n", reg_value, umc_reg_offset); +} + static void umc_v6_7_ecc_info_query_correctable_error_count(struct amdgpu_device *adev, - uint32_t channel_index, + uint32_t umc_inst, uint32_t ch_inst, unsigned long *error_count) { - uint32_t ecc_err_cnt; uint64_t mc_umc_status; + uint32_t eccinfo_table_idx; + uint32_t umc_reg_offset; struct amdgpu_ras *ras = amdgpu_ras_get_context(adev); - /* - * select the lower chip and check the error count - * skip add error count, calc error counter only from mca_umc_status - */ - ecc_err_cnt = ras->umc_ecc.ecc[channel_index].ce_count_lo_chip; - - /* - * select the higher chip and check the err counter - * skip add error count, calc error counter only from mca_umc_status - */ - ecc_err_cnt = ras->umc_ecc.ecc[channel_index].ce_count_hi_chip; + umc_reg_offset = get_umc_v6_7_reg_offset(adev, + umc_inst, ch_inst); + eccinfo_table_idx = umc_inst * adev->umc.channel_inst_num + ch_inst; /* check for SRAM correctable error MCUMC_STATUS is a 64 bit register */ - mc_umc_status = ras->umc_ecc.ecc[channel_index].mca_umc_status; + mc_umc_status = ras->umc_ecc.ecc[eccinfo_table_idx].mca_umc_status; if (REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, Val) == 1 && - REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, CECC) == 1) + REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, CECC) == 1) { *error_count += 1; + + umc_v6_7_query_error_status_helper(adev, mc_umc_status, umc_reg_offset); + } } static void umc_v6_7_ecc_info_querry_uncorrectable_error_count(struct amdgpu_device *adev, - uint32_t channel_index, + uint32_t umc_inst, uint32_t ch_inst, unsigned long *error_count) { uint64_t mc_umc_status; + uint32_t eccinfo_table_idx; + uint32_t umc_reg_offset; struct amdgpu_ras *ras = amdgpu_ras_get_context(adev); + umc_reg_offset = get_umc_v6_7_reg_offset(adev, + umc_inst, ch_inst); + + eccinfo_table_idx = umc_inst * adev->umc.channel_inst_num + ch_inst; /* check the MCUMC_STATUS */ - mc_umc_status = ras->umc_ecc.ecc[channel_index].mca_umc_status; + mc_umc_status = ras->umc_ecc.ecc[eccinfo_table_idx].mca_umc_status; if ((REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, Val) == 1) && (REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, Deferred) == 1 || REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, UECC) == 1 || REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, PCC) == 1 || REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, UC) == 1 || - REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, TCC) == 1)) + REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, TCC) == 1)) { *error_count += 1; + + umc_v6_7_query_error_status_helper(adev, mc_umc_status, umc_reg_offset); + } } static void umc_v6_7_ecc_info_query_ras_error_count(struct amdgpu_device *adev, @@ -110,42 +156,34 @@ static void umc_v6_7_ecc_info_query_ras_error_count(struct amdgpu_device *adev, uint32_t umc_inst = 0; uint32_t ch_inst = 0; - uint32_t umc_reg_offset = 0; - uint32_t channel_index = 0; /*TODO: driver needs to toggle DF Cstate to ensure * safe access of UMC registers. Will add the protection */ LOOP_UMC_INST_AND_CH(umc_inst, ch_inst) { - umc_reg_offset = get_umc_v6_7_reg_offset(adev, - umc_inst, - ch_inst); - channel_index = get_umc_v6_7_channel_index(adev, - umc_inst, - ch_inst); umc_v6_7_ecc_info_query_correctable_error_count(adev, - channel_index, + umc_inst, ch_inst, &(err_data->ce_count)); umc_v6_7_ecc_info_querry_uncorrectable_error_count(adev, - channel_index, + umc_inst, ch_inst, &(err_data->ue_count)); } } static void umc_v6_7_ecc_info_query_error_address(struct amdgpu_device *adev, struct ras_err_data *err_data, - uint32_t umc_reg_offset, uint32_t ch_inst, uint32_t umc_inst) { - uint64_t mc_umc_status, err_addr, retired_page; - struct eeprom_table_record *err_rec; + uint64_t mc_umc_status, err_addr, soc_pa, retired_page, column; uint32_t channel_index; + uint32_t eccinfo_table_idx; struct amdgpu_ras *ras = amdgpu_ras_get_context(adev); + eccinfo_table_idx = umc_inst * adev->umc.channel_inst_num + ch_inst; channel_index = adev->umc.channel_idx_tbl[umc_inst * adev->umc.channel_inst_num + ch_inst]; - mc_umc_status = ras->umc_ecc.ecc[channel_index].mca_umc_status; + mc_umc_status = ras->umc_ecc.ecc[eccinfo_table_idx].mca_umc_status; if (mc_umc_status == 0) return; @@ -153,34 +191,41 @@ static void umc_v6_7_ecc_info_query_error_address(struct amdgpu_device *adev, if (!err_data->err_addr) return; - err_rec = &err_data->err_addr[err_data->err_addr_cnt]; - /* calculate error address if ue/ce error is detected */ if (REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, Val) == 1 && (REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, UECC) == 1 || REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, CECC) == 1)) { - err_addr = ras->umc_ecc.ecc[channel_index].mca_umc_addr; + err_addr = ras->umc_ecc.ecc[eccinfo_table_idx].mca_umc_addr; err_addr = REG_GET_FIELD(err_addr, MCA_UMC_UMC0_MCUMC_ADDRT0, ErrorAddr); /* translate umc channel address to soc pa, 3 parts are included */ - retired_page = ADDR_OF_8KB_BLOCK(err_addr) | + soc_pa = ADDR_OF_8KB_BLOCK(err_addr) | ADDR_OF_256B_BLOCK(channel_index) | OFFSET_IN_256B_BLOCK(err_addr); + /* The umc channel bits are not original values, they are hashed */ + SET_CHANNEL_HASH(channel_index, soc_pa); + + /* clear [C4 C3 C2] in soc physical address */ + soc_pa &= ~(0x7ULL << UMC_V6_7_PA_C2_BIT); + /* we only save ue error information currently, ce is skipped */ if (REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, UECC) == 1) { - err_rec->address = err_addr; - /* page frame address is saved */ - err_rec->retired_page = retired_page >> AMDGPU_GPU_PAGE_SHIFT; - err_rec->ts = (uint64_t)ktime_get_real_seconds(); - err_rec->err_type = AMDGPU_RAS_EEPROM_ERR_NON_RECOVERABLE; - err_rec->cu = 0; - err_rec->mem_channel = channel_index; - err_rec->mcumc_id = umc_inst; - - err_data->err_addr_cnt++; + /* loop for all possibilities of [C4 C3 C2] */ + for (column = 0; column < UMC_V6_7_NA_MAP_PA_NUM; column++) { + retired_page = soc_pa | (column << UMC_V6_7_PA_C2_BIT); + dev_info(adev->dev, "Error Address(PA): 0x%llx\n", retired_page); + amdgpu_umc_fill_error_record(err_data, err_addr, + retired_page, channel_index, umc_inst); + + /* shift R14 bit */ + retired_page ^= (0x1ULL << UMC_V6_7_PA_R14_BIT); + dev_info(adev->dev, "Error Address(PA): 0x%llx\n", retired_page); + amdgpu_umc_fill_error_record(err_data, err_addr, + retired_page, channel_index, umc_inst); + } } } } @@ -192,18 +237,13 @@ static void umc_v6_7_ecc_info_query_ras_error_address(struct amdgpu_device *adev uint32_t umc_inst = 0; uint32_t ch_inst = 0; - uint32_t umc_reg_offset = 0; /*TODO: driver needs to toggle DF Cstate to ensure * safe access of UMC resgisters. Will add the protection * when firmware interface is ready */ LOOP_UMC_INST_AND_CH(umc_inst, ch_inst) { - umc_reg_offset = get_umc_v6_7_reg_offset(adev, - umc_inst, - ch_inst); umc_v6_7_ecc_info_query_error_address(adev, err_data, - umc_reg_offset, ch_inst, umc_inst); } @@ -251,8 +291,11 @@ static void umc_v6_7_query_correctable_error_count(struct amdgpu_device *adev, MCUMC_STATUS is a 64 bit register */ mc_umc_status = RREG64_PCIE((mc_umc_status_addr + umc_reg_offset) * 4); if (REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, Val) == 1 && - REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, CECC) == 1) + REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, CECC) == 1) { *error_count += 1; + + umc_v6_7_query_error_status_helper(adev, mc_umc_status, umc_reg_offset); + } } static void umc_v6_7_querry_uncorrectable_error_count(struct amdgpu_device *adev, @@ -272,8 +315,11 @@ static void umc_v6_7_querry_uncorrectable_error_count(struct amdgpu_device *adev REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, UECC) == 1 || REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, PCC) == 1 || REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, UC) == 1 || - REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, TCC) == 1)) + REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, TCC) == 1)) { *error_count += 1; + + umc_v6_7_query_error_status_helper(adev, mc_umc_status, umc_reg_offset); + } } static void umc_v6_7_reset_error_count_per_channel(struct amdgpu_device *adev, @@ -365,9 +411,9 @@ static void umc_v6_7_query_error_address(struct amdgpu_device *adev, uint32_t umc_inst) { uint32_t mc_umc_status_addr; - uint64_t mc_umc_status, err_addr, retired_page, mc_umc_addrt0; - struct eeprom_table_record *err_rec; uint32_t channel_index; + uint64_t mc_umc_status, mc_umc_addrt0; + uint64_t err_addr, soc_pa, retired_page, column; mc_umc_status_addr = SOC15_REG_OFFSET(UMC, 0, regMCA_UMC_UMC0_MCUMC_STATUST0); @@ -385,8 +431,6 @@ static void umc_v6_7_query_error_address(struct amdgpu_device *adev, return; } - err_rec = &err_data->err_addr[err_data->err_addr_cnt]; - channel_index = adev->umc.channel_idx_tbl[umc_inst * adev->umc.channel_inst_num + ch_inst]; @@ -399,23 +443,32 @@ static void umc_v6_7_query_error_address(struct amdgpu_device *adev, err_addr = REG_GET_FIELD(err_addr, MCA_UMC_UMC0_MCUMC_ADDRT0, ErrorAddr); /* translate umc channel address to soc pa, 3 parts are included */ - retired_page = ADDR_OF_8KB_BLOCK(err_addr) | + soc_pa = ADDR_OF_8KB_BLOCK(err_addr) | ADDR_OF_256B_BLOCK(channel_index) | OFFSET_IN_256B_BLOCK(err_addr); + /* The umc channel bits are not original values, they are hashed */ + SET_CHANNEL_HASH(channel_index, soc_pa); + + /* clear [C4 C3 C2] in soc physical address */ + soc_pa &= ~(0x7ULL << UMC_V6_7_PA_C2_BIT); + /* we only save ue error information currently, ce is skipped */ if (REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, UECC) == 1) { - err_rec->address = err_addr; - /* page frame address is saved */ - err_rec->retired_page = retired_page >> AMDGPU_GPU_PAGE_SHIFT; - err_rec->ts = (uint64_t)ktime_get_real_seconds(); - err_rec->err_type = AMDGPU_RAS_EEPROM_ERR_NON_RECOVERABLE; - err_rec->cu = 0; - err_rec->mem_channel = channel_index; - err_rec->mcumc_id = umc_inst; - - err_data->err_addr_cnt++; + /* loop for all possibilities of [C4 C3 C2] */ + for (column = 0; column < UMC_V6_7_NA_MAP_PA_NUM; column++) { + retired_page = soc_pa | (column << UMC_V6_7_PA_C2_BIT); + dev_info(adev->dev, "Error Address(PA): 0x%llx\n", retired_page); + amdgpu_umc_fill_error_record(err_data, err_addr, + retired_page, channel_index, umc_inst); + + /* shift R14 bit */ + retired_page ^= (0x1ULL << UMC_V6_7_PA_R14_BIT); + dev_info(adev->dev, "Error Address(PA): 0x%llx\n", retired_page); + amdgpu_umc_fill_error_record(err_data, err_addr, + retired_page, channel_index, umc_inst); + } } } @@ -463,28 +516,24 @@ static uint32_t umc_v6_7_query_ras_poison_mode_per_channel( static bool umc_v6_7_query_ras_poison_mode(struct amdgpu_device *adev) { - uint32_t umc_inst = 0; - uint32_t ch_inst = 0; uint32_t umc_reg_offset = 0; - LOOP_UMC_INST_AND_CH(umc_inst, ch_inst) { - umc_reg_offset = get_umc_v6_7_reg_offset(adev, - umc_inst, - ch_inst); - /* Enabling fatal error in one channel will be considered - as fatal error mode */ - if (umc_v6_7_query_ras_poison_mode_per_channel(adev, umc_reg_offset)) - return false; - } - - return true; + /* Enabling fatal error in umc instance0 channel0 will be + * considered as fatal error mode + */ + umc_reg_offset = get_umc_v6_7_reg_offset(adev, 0, 0); + return !umc_v6_7_query_ras_poison_mode_per_channel(adev, umc_reg_offset); } -const struct amdgpu_umc_ras_funcs umc_v6_7_ras_funcs = { - .ras_late_init = amdgpu_umc_ras_late_init, - .ras_fini = amdgpu_umc_ras_fini, +const struct amdgpu_ras_block_hw_ops umc_v6_7_ras_hw_ops = { .query_ras_error_count = umc_v6_7_query_ras_error_count, .query_ras_error_address = umc_v6_7_query_ras_error_address, +}; + +struct amdgpu_umc_ras umc_v6_7_ras = { + .ras_block = { + .hw_ops = &umc_v6_7_ras_hw_ops, + }, .query_ras_poison_mode = umc_v6_7_query_ras_poison_mode, .ecc_info_query_ras_error_count = umc_v6_7_ecc_info_query_ras_error_count, .ecc_info_query_ras_error_address = umc_v6_7_ecc_info_query_ras_error_address, diff --git a/drivers/gpu/drm/amd/amdgpu/umc_v6_7.h b/drivers/gpu/drm/amd/amdgpu/umc_v6_7.h index 57f2557e7aca..fe41ed2f5945 100644 --- a/drivers/gpu/drm/amd/amdgpu/umc_v6_7.h +++ b/drivers/gpu/drm/amd/amdgpu/umc_v6_7.h @@ -41,9 +41,32 @@ #define UMC_V6_7_CHANNEL_INSTANCE_NUM 8 /* total channel instances in one umc block */ #define UMC_V6_7_TOTAL_CHANNEL_NUM (UMC_V6_7_CHANNEL_INSTANCE_NUM * UMC_V6_7_UMC_INSTANCE_NUM) +/* one piece of normalizing address is mapped to 8 pieces of physical address */ +#define UMC_V6_7_NA_MAP_PA_NUM 8 +/* R14 bit shift should be considered, double the number */ +#define UMC_V6_7_BAD_PAGE_NUM_PER_CHANNEL (UMC_V6_7_NA_MAP_PA_NUM * 2) +/* The CH4 bit in SOC physical address */ +#define UMC_V6_7_PA_CH4_BIT 12 +/* The C2 bit in SOC physical address */ +#define UMC_V6_7_PA_C2_BIT 17 +/* The R14 bit in SOC physical address */ +#define UMC_V6_7_PA_R14_BIT 34 /* UMC regiser per channel offset */ #define UMC_V6_7_PER_CHANNEL_OFFSET 0x400 -extern const struct amdgpu_umc_ras_funcs umc_v6_7_ras_funcs; + +/* XOR bit 20, 25, 34 of PA into CH4 bit (bit 12 of PA), + * hash bit is only effective when related setting is enabled + */ +#define CHANNEL_HASH(channel_idx, pa) (((channel_idx) >> 4) ^ \ + (((pa) >> 20) & 0x1ULL & adev->df.hash_status.hash_64k) ^ \ + (((pa) >> 25) & 0x1ULL & adev->df.hash_status.hash_2m) ^ \ + (((pa) >> 34) & 0x1ULL & adev->df.hash_status.hash_1g)) +#define SET_CHANNEL_HASH(channel_idx, pa) do { \ + (pa) &= ~(0x1ULL << UMC_V6_7_PA_CH4_BIT); \ + (pa) |= (CHANNEL_HASH(channel_idx, pa) << UMC_V6_7_PA_CH4_BIT); \ + } while (0) + +extern struct amdgpu_umc_ras umc_v6_7_ras; extern const uint32_t umc_v6_7_channel_idx_tbl_second[UMC_V6_7_UMC_INSTANCE_NUM][UMC_V6_7_CHANNEL_INSTANCE_NUM]; extern const uint32_t diff --git a/drivers/gpu/drm/amd/amdgpu/umc_v8_7.c b/drivers/gpu/drm/amd/amdgpu/umc_v8_7.c index af59a35788e3..f35253e0eaa6 100644 --- a/drivers/gpu/drm/amd/amdgpu/umc_v8_7.c +++ b/drivers/gpu/drm/amd/amdgpu/umc_v8_7.c @@ -40,13 +40,137 @@ const uint32_t {9, 0}, {15, 6} }; -static inline uint32_t get_umc_8_reg_offset(struct amdgpu_device *adev, +static inline uint32_t get_umc_v8_7_reg_offset(struct amdgpu_device *adev, uint32_t umc_inst, uint32_t ch_inst) { return adev->umc.channel_offs*ch_inst + UMC_8_INST_DIST*umc_inst; } +static void umc_v8_7_ecc_info_query_correctable_error_count(struct amdgpu_device *adev, + uint32_t umc_inst, uint32_t ch_inst, + unsigned long *error_count) +{ + uint64_t mc_umc_status; + uint32_t eccinfo_table_idx; + struct amdgpu_ras *ras = amdgpu_ras_get_context(adev); + + eccinfo_table_idx = umc_inst * adev->umc.channel_inst_num + ch_inst; + + /* check for SRAM correctable error + * MCUMC_STATUS is a 64 bit register + */ + mc_umc_status = ras->umc_ecc.ecc[eccinfo_table_idx].mca_umc_status; + if (REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, Val) == 1 && + REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, CECC) == 1) + *error_count += 1; +} + +static void umc_v8_7_ecc_info_querry_uncorrectable_error_count(struct amdgpu_device *adev, + uint32_t umc_inst, uint32_t ch_inst, + unsigned long *error_count) +{ + uint64_t mc_umc_status; + uint32_t eccinfo_table_idx; + struct amdgpu_ras *ras = amdgpu_ras_get_context(adev); + + eccinfo_table_idx = umc_inst * adev->umc.channel_inst_num + ch_inst; + + /* check the MCUMC_STATUS */ + mc_umc_status = ras->umc_ecc.ecc[eccinfo_table_idx].mca_umc_status; + if ((REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, Val) == 1) && + (REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, Deferred) == 1 || + REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, UECC) == 1 || + REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, PCC) == 1 || + REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, UC) == 1 || + REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, TCC) == 1)) + *error_count += 1; +} + +static void umc_v8_7_ecc_info_query_ras_error_count(struct amdgpu_device *adev, + void *ras_error_status) +{ + struct ras_err_data *err_data = (struct ras_err_data *)ras_error_status; + + uint32_t umc_inst = 0; + uint32_t ch_inst = 0; + + /* TODO: driver needs to toggle DF Cstate to ensure + * safe access of UMC registers. Will add the protection + */ + LOOP_UMC_INST_AND_CH(umc_inst, ch_inst) { + umc_v8_7_ecc_info_query_correctable_error_count(adev, + umc_inst, ch_inst, + &(err_data->ce_count)); + umc_v8_7_ecc_info_querry_uncorrectable_error_count(adev, + umc_inst, ch_inst, + &(err_data->ue_count)); + } +} + +static void umc_v8_7_ecc_info_query_error_address(struct amdgpu_device *adev, + struct ras_err_data *err_data, + uint32_t ch_inst, + uint32_t umc_inst) +{ + uint64_t mc_umc_status, err_addr, retired_page; + uint32_t channel_index; + uint32_t eccinfo_table_idx; + struct amdgpu_ras *ras = amdgpu_ras_get_context(adev); + + eccinfo_table_idx = umc_inst * adev->umc.channel_inst_num + ch_inst; + channel_index = + adev->umc.channel_idx_tbl[umc_inst * adev->umc.channel_inst_num + ch_inst]; + + mc_umc_status = ras->umc_ecc.ecc[eccinfo_table_idx].mca_umc_status; + + if (mc_umc_status == 0) + return; + + if (!err_data->err_addr) + return; + + /* calculate error address if ue/ce error is detected */ + if (REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, Val) == 1 && + (REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, UECC) == 1 || + REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, CECC) == 1)) { + + err_addr = ras->umc_ecc.ecc[eccinfo_table_idx].mca_umc_addr; + err_addr = REG_GET_FIELD(err_addr, MCA_UMC_UMC0_MCUMC_ADDRT0, ErrorAddr); + + /* translate umc channel address to soc pa, 3 parts are included */ + retired_page = ADDR_OF_4KB_BLOCK(err_addr) | + ADDR_OF_256B_BLOCK(channel_index) | + OFFSET_IN_256B_BLOCK(err_addr); + + /* we only save ue error information currently, ce is skipped */ + if (REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, UECC) + == 1) + amdgpu_umc_fill_error_record(err_data, err_addr, + retired_page, channel_index, umc_inst); + } +} + +static void umc_v8_7_ecc_info_query_ras_error_address(struct amdgpu_device *adev, + void *ras_error_status) +{ + struct ras_err_data *err_data = (struct ras_err_data *)ras_error_status; + + uint32_t umc_inst = 0; + uint32_t ch_inst = 0; + + /* TODO: driver needs to toggle DF Cstate to ensure + * safe access of UMC resgisters. Will add the protection + * when firmware interface is ready + */ + LOOP_UMC_INST_AND_CH(umc_inst, ch_inst) { + umc_v8_7_ecc_info_query_error_address(adev, + err_data, + ch_inst, + umc_inst); + } +} + static void umc_v8_7_clear_error_count_per_channel(struct amdgpu_device *adev, uint32_t umc_reg_offset) { @@ -92,7 +216,7 @@ static void umc_v8_7_clear_error_count(struct amdgpu_device *adev) uint32_t umc_reg_offset = 0; LOOP_UMC_INST_AND_CH(umc_inst, ch_inst) { - umc_reg_offset = get_umc_8_reg_offset(adev, + umc_reg_offset = get_umc_v8_7_reg_offset(adev, umc_inst, ch_inst); @@ -178,7 +302,7 @@ static void umc_v8_7_query_ras_error_count(struct amdgpu_device *adev, uint32_t umc_reg_offset = 0; LOOP_UMC_INST_AND_CH(umc_inst, ch_inst) { - umc_reg_offset = get_umc_8_reg_offset(adev, + umc_reg_offset = get_umc_v8_7_reg_offset(adev, umc_inst, ch_inst); @@ -201,7 +325,6 @@ static void umc_v8_7_query_error_address(struct amdgpu_device *adev, { uint32_t lsb, mc_umc_status_addr; uint64_t mc_umc_status, err_addr, retired_page, mc_umc_addrt0; - struct eeprom_table_record *err_rec; uint32_t channel_index = adev->umc.channel_idx_tbl[umc_inst * adev->umc.channel_inst_num + ch_inst]; mc_umc_status_addr = @@ -220,8 +343,6 @@ static void umc_v8_7_query_error_address(struct amdgpu_device *adev, return; } - err_rec = &err_data->err_addr[err_data->err_addr_cnt]; - /* calculate error address if ue/ce error is detected */ if (REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, Val) == 1 && (REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, UECC) == 1 || @@ -240,18 +361,9 @@ static void umc_v8_7_query_error_address(struct amdgpu_device *adev, /* we only save ue error information currently, ce is skipped */ if (REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, UECC) - == 1) { - err_rec->address = err_addr; - /* page frame address is saved */ - err_rec->retired_page = retired_page >> AMDGPU_GPU_PAGE_SHIFT; - err_rec->ts = (uint64_t)ktime_get_real_seconds(); - err_rec->err_type = AMDGPU_RAS_EEPROM_ERR_NON_RECOVERABLE; - err_rec->cu = 0; - err_rec->mem_channel = channel_index; - err_rec->mcumc_id = umc_inst; - - err_data->err_addr_cnt++; - } + == 1) + amdgpu_umc_fill_error_record(err_data, err_addr, + retired_page, channel_index, umc_inst); } /* clear umc status */ @@ -268,7 +380,7 @@ static void umc_v8_7_query_ras_error_address(struct amdgpu_device *adev, uint32_t umc_reg_offset = 0; LOOP_UMC_INST_AND_CH(umc_inst, ch_inst) { - umc_reg_offset = get_umc_8_reg_offset(adev, + umc_reg_offset = get_umc_v8_7_reg_offset(adev, umc_inst, ch_inst); @@ -316,7 +428,7 @@ static void umc_v8_7_err_cnt_init(struct amdgpu_device *adev) uint32_t umc_reg_offset = 0; LOOP_UMC_INST_AND_CH(umc_inst, ch_inst) { - umc_reg_offset = get_umc_8_reg_offset(adev, + umc_reg_offset = get_umc_v8_7_reg_offset(adev, umc_inst, ch_inst); @@ -324,10 +436,16 @@ static void umc_v8_7_err_cnt_init(struct amdgpu_device *adev) } } -const struct amdgpu_umc_ras_funcs umc_v8_7_ras_funcs = { - .err_cnt_init = umc_v8_7_err_cnt_init, - .ras_late_init = amdgpu_umc_ras_late_init, - .ras_fini = amdgpu_umc_ras_fini, +const struct amdgpu_ras_block_hw_ops umc_v8_7_ras_hw_ops = { .query_ras_error_count = umc_v8_7_query_ras_error_count, .query_ras_error_address = umc_v8_7_query_ras_error_address, }; + +struct amdgpu_umc_ras umc_v8_7_ras = { + .ras_block = { + .hw_ops = &umc_v8_7_ras_hw_ops, + }, + .err_cnt_init = umc_v8_7_err_cnt_init, + .ecc_info_query_ras_error_count = umc_v8_7_ecc_info_query_ras_error_count, + .ecc_info_query_ras_error_address = umc_v8_7_ecc_info_query_ras_error_address, +}; diff --git a/drivers/gpu/drm/amd/amdgpu/umc_v8_7.h b/drivers/gpu/drm/amd/amdgpu/umc_v8_7.h index 37e6dc7c28e0..dd4993f5f78f 100644 --- a/drivers/gpu/drm/amd/amdgpu/umc_v8_7.h +++ b/drivers/gpu/drm/amd/amdgpu/umc_v8_7.h @@ -44,7 +44,7 @@ /* umc ce count initial value */ #define UMC_V8_7_CE_CNT_INIT (UMC_V8_7_CE_CNT_MAX - UMC_V8_7_CE_INT_THRESHOLD) -extern const struct amdgpu_umc_ras_funcs umc_v8_7_ras_funcs; +extern struct amdgpu_umc_ras umc_v8_7_ras; extern const uint32_t umc_v8_7_channel_idx_tbl[UMC_V8_7_UMC_INSTANCE_NUM][UMC_V8_7_CHANNEL_INSTANCE_NUM]; diff --git a/drivers/gpu/drm/amd/amdgpu/uvd_v5_0.c b/drivers/gpu/drm/amd/amdgpu/uvd_v5_0.c index 563493d1f830..d7e31e48a2b8 100644 --- a/drivers/gpu/drm/amd/amdgpu/uvd_v5_0.c +++ b/drivers/gpu/drm/amd/amdgpu/uvd_v5_0.c @@ -833,7 +833,7 @@ out: return ret; } -static void uvd_v5_0_get_clockgating_state(void *handle, u32 *flags) +static void uvd_v5_0_get_clockgating_state(void *handle, u64 *flags) { struct amdgpu_device *adev = (struct amdgpu_device *)handle; int data; diff --git a/drivers/gpu/drm/amd/amdgpu/uvd_v6_0.c b/drivers/gpu/drm/amd/amdgpu/uvd_v6_0.c index 2d558c2f417d..375c440957dc 100644 --- a/drivers/gpu/drm/amd/amdgpu/uvd_v6_0.c +++ b/drivers/gpu/drm/amd/amdgpu/uvd_v6_0.c @@ -1494,7 +1494,7 @@ out: return ret; } -static void uvd_v6_0_get_clockgating_state(void *handle, u32 *flags) +static void uvd_v6_0_get_clockgating_state(void *handle, u64 *flags) { struct amdgpu_device *adev = (struct amdgpu_device *)handle; int data; diff --git a/drivers/gpu/drm/amd/amdgpu/uvd_v7_0.c b/drivers/gpu/drm/amd/amdgpu/uvd_v7_0.c index b483f03b4591..2f15b8e0f7d7 100644 --- a/drivers/gpu/drm/amd/amdgpu/uvd_v7_0.c +++ b/drivers/gpu/drm/amd/amdgpu/uvd_v7_0.c @@ -25,6 +25,7 @@ #include "amdgpu.h" #include "amdgpu_uvd.h" +#include "amdgpu_cs.h" #include "soc15.h" #include "soc15d.h" #include "soc15_common.h" @@ -1275,14 +1276,15 @@ static int uvd_v7_0_ring_test_ring(struct amdgpu_ring *ring) * uvd_v7_0_ring_patch_cs_in_place - Patch the IB for command submission. * * @p: the CS parser with the IBs - * @ib_idx: which IB to patch + * @job: which job this ib is in + * @ib: which IB to patch * */ static int uvd_v7_0_ring_patch_cs_in_place(struct amdgpu_cs_parser *p, - uint32_t ib_idx) + struct amdgpu_job *job, + struct amdgpu_ib *ib) { - struct amdgpu_ring *ring = to_amdgpu_ring(p->entity->rq->sched); - struct amdgpu_ib *ib = &p->job->ibs[ib_idx]; + struct amdgpu_ring *ring = to_amdgpu_ring(job->base.sched); unsigned i; /* No patching necessary for the first instance */ @@ -1290,12 +1292,12 @@ static int uvd_v7_0_ring_patch_cs_in_place(struct amdgpu_cs_parser *p, return 0; for (i = 0; i < ib->length_dw; i += 2) { - uint32_t reg = amdgpu_get_ib_value(p, ib_idx, i); + uint32_t reg = amdgpu_ib_get_value(ib, i); reg -= p->adev->reg_offset[UVD_HWIP][0][1]; reg += p->adev->reg_offset[UVD_HWIP][1][1]; - amdgpu_set_ib_value(p, ib_idx, i, reg); + amdgpu_ib_set_value(ib, i, reg); } return 0; } diff --git a/drivers/gpu/drm/amd/amdgpu/vce_v3_0.c b/drivers/gpu/drm/amd/amdgpu/vce_v3_0.c index 142e291983b4..8def62c83ffd 100644 --- a/drivers/gpu/drm/amd/amdgpu/vce_v3_0.c +++ b/drivers/gpu/drm/amd/amdgpu/vce_v3_0.c @@ -831,7 +831,7 @@ out: return ret; } -static void vce_v3_0_get_clockgating_state(void *handle, u32 *flags) +static void vce_v3_0_get_clockgating_state(void *handle, u64 *flags) { struct amdgpu_device *adev = (struct amdgpu_device *)handle; int data; diff --git a/drivers/gpu/drm/amd/amdgpu/vcn_v1_0.c b/drivers/gpu/drm/amd/amdgpu/vcn_v1_0.c index 3799226defc0..f0fbcda76f5e 100644 --- a/drivers/gpu/drm/amd/amdgpu/vcn_v1_0.c +++ b/drivers/gpu/drm/amd/amdgpu/vcn_v1_0.c @@ -24,6 +24,7 @@ #include <linux/firmware.h> #include "amdgpu.h" +#include "amdgpu_cs.h" #include "amdgpu_vcn.h" #include "amdgpu_pm.h" #include "soc15.h" @@ -148,6 +149,13 @@ static int vcn_v1_0_sw_init(void *handle) adev->vcn.pause_dpg_mode = vcn_v1_0_pause_dpg_mode; + if (amdgpu_vcnfw_log) { + volatile struct amdgpu_fw_shared *fw_shared = adev->vcn.inst->fw_shared.cpu_addr; + + fw_shared->present_flag_0 = 0; + amdgpu_vcn_fwlog_init(adev->vcn.inst); + } + r = jpeg_v1_0_sw_init(handle); return r; @@ -1095,13 +1103,8 @@ static int vcn_v1_0_start_dpg_mode(struct amdgpu_device *adev) static int vcn_v1_0_start(struct amdgpu_device *adev) { - int r; - - if (adev->pg_flags & AMD_PG_SUPPORT_VCN_DPG) - r = vcn_v1_0_start_dpg_mode(adev); - else - r = vcn_v1_0_start_spg_mode(adev); - return r; + return (adev->pg_flags & AMD_PG_SUPPORT_VCN_DPG) ? + vcn_v1_0_start_dpg_mode(adev) : vcn_v1_0_start_spg_mode(adev); } /** @@ -1898,15 +1901,86 @@ static const struct amd_ip_funcs vcn_v1_0_ip_funcs = { .set_powergating_state = vcn_v1_0_set_powergating_state, }; +/* + * It is a hardware issue that VCN can't handle a GTT TMZ buffer on + * CHIP_RAVEN series ASIC. Move such a GTT TMZ buffer to VRAM domain + * before command submission as a workaround. + */ +static int vcn_v1_0_validate_bo(struct amdgpu_cs_parser *parser, + struct amdgpu_job *job, + uint64_t addr) +{ + struct ttm_operation_ctx ctx = { false, false }; + struct amdgpu_fpriv *fpriv = parser->filp->driver_priv; + struct amdgpu_vm *vm = &fpriv->vm; + struct amdgpu_bo_va_mapping *mapping; + struct amdgpu_bo *bo; + int r; + + addr &= AMDGPU_GMC_HOLE_MASK; + if (addr & 0x7) { + DRM_ERROR("VCN messages must be 8 byte aligned!\n"); + return -EINVAL; + } + + mapping = amdgpu_vm_bo_lookup_mapping(vm, addr/AMDGPU_GPU_PAGE_SIZE); + if (!mapping || !mapping->bo_va || !mapping->bo_va->base.bo) + return -EINVAL; + + bo = mapping->bo_va->base.bo; + if (!(bo->flags & AMDGPU_GEM_CREATE_ENCRYPTED)) + return 0; + + amdgpu_bo_placement_from_domain(bo, AMDGPU_GEM_DOMAIN_VRAM); + r = ttm_bo_validate(&bo->tbo, &bo->placement, &ctx); + if (r) { + DRM_ERROR("Failed to validate the VCN message BO (%d)!\n", r); + return r; + } + + return r; +} + +static int vcn_v1_0_ring_patch_cs_in_place(struct amdgpu_cs_parser *p, + struct amdgpu_job *job, + struct amdgpu_ib *ib) +{ + uint32_t msg_lo = 0, msg_hi = 0; + int i, r; + + if (!(ib->flags & AMDGPU_IB_FLAGS_SECURE)) + return 0; + + for (i = 0; i < ib->length_dw; i += 2) { + uint32_t reg = amdgpu_ib_get_value(ib, i); + uint32_t val = amdgpu_ib_get_value(ib, i + 1); + + if (reg == PACKET0(p->adev->vcn.internal.data0, 0)) { + msg_lo = val; + } else if (reg == PACKET0(p->adev->vcn.internal.data1, 0)) { + msg_hi = val; + } else if (reg == PACKET0(p->adev->vcn.internal.cmd, 0)) { + r = vcn_v1_0_validate_bo(p, job, + ((u64)msg_hi) << 32 | msg_lo); + if (r) + return r; + } + } + + return 0; +} + static const struct amdgpu_ring_funcs vcn_v1_0_dec_ring_vm_funcs = { .type = AMDGPU_RING_TYPE_VCN_DEC, .align_mask = 0xf, .support_64bit_ptrs = false, .no_user_fence = true, + .secure_submission_supported = true, .vmhub = AMDGPU_MMHUB_0, .get_rptr = vcn_v1_0_dec_ring_get_rptr, .get_wptr = vcn_v1_0_dec_ring_get_wptr, .set_wptr = vcn_v1_0_dec_ring_set_wptr, + .patch_cs_in_place = vcn_v1_0_ring_patch_cs_in_place, .emit_frame_size = 6 + 6 + /* hdp invalidate / flush */ SOC15_FLUSH_GPU_TLB_NUM_WREG * 6 + diff --git a/drivers/gpu/drm/amd/amdgpu/vcn_v2_0.c b/drivers/gpu/drm/amd/amdgpu/vcn_v2_0.c index 313fc1b53999..7a7f35e83dd5 100644 --- a/drivers/gpu/drm/amd/amdgpu/vcn_v2_0.c +++ b/drivers/gpu/drm/amd/amdgpu/vcn_v2_0.c @@ -172,8 +172,12 @@ static int vcn_v2_0_sw_init(void *handle) if (r) return r; - fw_shared = adev->vcn.inst->fw_shared_cpu_addr; + fw_shared = adev->vcn.inst->fw_shared.cpu_addr; fw_shared->present_flag_0 = cpu_to_le32(AMDGPU_VCN_MULTI_QUEUE_FLAG); + + if (amdgpu_vcnfw_log) + amdgpu_vcn_fwlog_init(adev->vcn.inst); + return 0; } @@ -188,7 +192,7 @@ static int vcn_v2_0_sw_fini(void *handle) { int r, idx; struct amdgpu_device *adev = (struct amdgpu_device *)handle; - volatile struct amdgpu_fw_shared *fw_shared = adev->vcn.inst->fw_shared_cpu_addr; + volatile struct amdgpu_fw_shared *fw_shared = adev->vcn.inst->fw_shared.cpu_addr; if (drm_dev_enter(adev_to_drm(adev), &idx)) { fw_shared->present_flag_0 = 0; @@ -364,9 +368,9 @@ static void vcn_v2_0_mc_resume(struct amdgpu_device *adev) /* non-cache window */ WREG32_SOC15(UVD, 0, mmUVD_LMI_VCPU_NC0_64BIT_BAR_LOW, - lower_32_bits(adev->vcn.inst->fw_shared_gpu_addr)); + lower_32_bits(adev->vcn.inst->fw_shared.gpu_addr)); WREG32_SOC15(UVD, 0, mmUVD_LMI_VCPU_NC0_64BIT_BAR_HIGH, - upper_32_bits(adev->vcn.inst->fw_shared_gpu_addr)); + upper_32_bits(adev->vcn.inst->fw_shared.gpu_addr)); WREG32_SOC15(UVD, 0, mmUVD_VCPU_NONCACHE_OFFSET0, 0); WREG32_SOC15(UVD, 0, mmUVD_VCPU_NONCACHE_SIZE0, AMDGPU_GPU_PAGE_ALIGN(sizeof(struct amdgpu_fw_shared))); @@ -455,10 +459,10 @@ static void vcn_v2_0_mc_resume_dpg_mode(struct amdgpu_device *adev, bool indirec /* non-cache window */ WREG32_SOC15_DPG_MODE(0, SOC15_DPG_MODE_OFFSET( UVD, 0, mmUVD_LMI_VCPU_NC0_64BIT_BAR_LOW), - lower_32_bits(adev->vcn.inst->fw_shared_gpu_addr), 0, indirect); + lower_32_bits(adev->vcn.inst->fw_shared.gpu_addr), 0, indirect); WREG32_SOC15_DPG_MODE(0, SOC15_DPG_MODE_OFFSET( UVD, 0, mmUVD_LMI_VCPU_NC0_64BIT_BAR_HIGH), - upper_32_bits(adev->vcn.inst->fw_shared_gpu_addr), 0, indirect); + upper_32_bits(adev->vcn.inst->fw_shared.gpu_addr), 0, indirect); WREG32_SOC15_DPG_MODE(0, SOC15_DPG_MODE_OFFSET( UVD, 0, mmUVD_VCPU_NONCACHE_OFFSET0), 0, 0, indirect); WREG32_SOC15_DPG_MODE(0, SOC15_DPG_MODE_OFFSET( @@ -784,7 +788,7 @@ static void vcn_v2_0_enable_static_power_gating(struct amdgpu_device *adev) static int vcn_v2_0_start_dpg_mode(struct amdgpu_device *adev, bool indirect) { - volatile struct amdgpu_fw_shared *fw_shared = adev->vcn.inst->fw_shared_cpu_addr; + volatile struct amdgpu_fw_shared *fw_shared = adev->vcn.inst->fw_shared.cpu_addr; struct amdgpu_ring *ring = &adev->vcn.inst->ring_dec; uint32_t rb_bufsz, tmp; @@ -921,7 +925,7 @@ static int vcn_v2_0_start_dpg_mode(struct amdgpu_device *adev, bool indirect) static int vcn_v2_0_start(struct amdgpu_device *adev) { - volatile struct amdgpu_fw_shared *fw_shared = adev->vcn.inst->fw_shared_cpu_addr; + volatile struct amdgpu_fw_shared *fw_shared = adev->vcn.inst->fw_shared.cpu_addr; struct amdgpu_ring *ring = &adev->vcn.inst->ring_dec; uint32_t rb_bufsz, tmp; uint32_t lmi_swap_cntl; @@ -1094,8 +1098,10 @@ static int vcn_v2_0_start(struct amdgpu_device *adev) static int vcn_v2_0_stop_dpg_mode(struct amdgpu_device *adev) { + struct dpg_pause_state state = {.fw_based = VCN_DPG_STATE__UNPAUSE}; uint32_t tmp; + vcn_v2_0_pause_dpg_mode(adev, 0, &state); /* Wait for power status to be 1 */ SOC15_WAIT_ON_RREG(UVD, 0, mmUVD_POWER_STATUS, 1, UVD_POWER_STATUS__UVD_POWER_STATUS_MASK); @@ -1207,7 +1213,7 @@ static int vcn_v2_0_pause_dpg_mode(struct amdgpu_device *adev, UVD_POWER_STATUS__UVD_POWER_STATUS_MASK); if (!ret_code) { - volatile struct amdgpu_fw_shared *fw_shared = adev->vcn.inst->fw_shared_cpu_addr; + volatile struct amdgpu_fw_shared *fw_shared = adev->vcn.inst->fw_shared.cpu_addr; /* pause DPG */ reg_data |= UVD_DPG_PAUSE__NJ_PAUSE_DPG_REQ_MASK; WREG32_SOC15(UVD, 0, mmUVD_DPG_PAUSE, reg_data); @@ -2003,6 +2009,7 @@ static const struct amd_ip_funcs vcn_v2_0_ip_funcs = { static const struct amdgpu_ring_funcs vcn_v2_0_dec_ring_vm_funcs = { .type = AMDGPU_RING_TYPE_VCN_DEC, .align_mask = 0xf, + .secure_submission_supported = true, .vmhub = AMDGPU_MMHUB_0, .get_rptr = vcn_v2_0_dec_ring_get_rptr, .get_wptr = vcn_v2_0_dec_ring_get_wptr, diff --git a/drivers/gpu/drm/amd/amdgpu/vcn_v2_5.c b/drivers/gpu/drm/amd/amdgpu/vcn_v2_5.c index 44fc4c218433..17d44be58877 100644 --- a/drivers/gpu/drm/amd/amdgpu/vcn_v2_5.c +++ b/drivers/gpu/drm/amd/amdgpu/vcn_v2_5.c @@ -31,6 +31,7 @@ #include "soc15d.h" #include "vcn_v2_0.h" #include "mmsch_v1_0.h" +#include "vcn_v2_5.h" #include "vcn/vcn_2_5_offset.h" #include "vcn/vcn_2_5_sh_mask.h" @@ -59,6 +60,7 @@ static int vcn_v2_5_set_powergating_state(void *handle, static int vcn_v2_5_pause_dpg_mode(struct amdgpu_device *adev, int inst_idx, struct dpg_pause_state *new_state); static int vcn_v2_5_sriov_start(struct amdgpu_device *adev); +static void vcn_v2_5_set_ras_funcs(struct amdgpu_device *adev); static int amdgpu_ih_clientid_vcns[] = { SOC15_IH_CLIENTID_VCN, @@ -100,6 +102,7 @@ static int vcn_v2_5_early_init(void *handle) vcn_v2_5_set_dec_ring_funcs(adev); vcn_v2_5_set_enc_ring_funcs(adev); vcn_v2_5_set_irq_funcs(adev); + vcn_v2_5_set_ras_funcs(adev); return 0; } @@ -196,8 +199,11 @@ static int vcn_v2_5_sw_init(void *handle) return r; } - fw_shared = adev->vcn.inst[j].fw_shared_cpu_addr; + fw_shared = adev->vcn.inst[j].fw_shared.cpu_addr; fw_shared->present_flag_0 = cpu_to_le32(AMDGPU_VCN_MULTI_QUEUE_FLAG); + + if (amdgpu_vcnfw_log) + amdgpu_vcn_fwlog_init(&adev->vcn.inst[i]); } if (amdgpu_sriov_vf(adev)) { @@ -229,7 +235,7 @@ static int vcn_v2_5_sw_fini(void *handle) for (i = 0; i < adev->vcn.num_vcn_inst; i++) { if (adev->vcn.harvest_config & (1 << i)) continue; - fw_shared = adev->vcn.inst[i].fw_shared_cpu_addr; + fw_shared = adev->vcn.inst[i].fw_shared.cpu_addr; fw_shared->present_flag_0 = 0; } drm_dev_exit(idx); @@ -423,9 +429,9 @@ static void vcn_v2_5_mc_resume(struct amdgpu_device *adev) /* non-cache window */ WREG32_SOC15(VCN, i, mmUVD_LMI_VCPU_NC0_64BIT_BAR_LOW, - lower_32_bits(adev->vcn.inst[i].fw_shared_gpu_addr)); + lower_32_bits(adev->vcn.inst[i].fw_shared.gpu_addr)); WREG32_SOC15(VCN, i, mmUVD_LMI_VCPU_NC0_64BIT_BAR_HIGH, - upper_32_bits(adev->vcn.inst[i].fw_shared_gpu_addr)); + upper_32_bits(adev->vcn.inst[i].fw_shared.gpu_addr)); WREG32_SOC15(VCN, i, mmUVD_VCPU_NONCACHE_OFFSET0, 0); WREG32_SOC15(VCN, i, mmUVD_VCPU_NONCACHE_SIZE0, AMDGPU_GPU_PAGE_ALIGN(sizeof(struct amdgpu_fw_shared))); @@ -513,10 +519,10 @@ static void vcn_v2_5_mc_resume_dpg_mode(struct amdgpu_device *adev, int inst_idx /* non-cache window */ WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET( VCN, 0, mmUVD_LMI_VCPU_NC0_64BIT_BAR_LOW), - lower_32_bits(adev->vcn.inst[inst_idx].fw_shared_gpu_addr), 0, indirect); + lower_32_bits(adev->vcn.inst[inst_idx].fw_shared.gpu_addr), 0, indirect); WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET( VCN, 0, mmUVD_LMI_VCPU_NC0_64BIT_BAR_HIGH), - upper_32_bits(adev->vcn.inst[inst_idx].fw_shared_gpu_addr), 0, indirect); + upper_32_bits(adev->vcn.inst[inst_idx].fw_shared.gpu_addr), 0, indirect); WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET( VCN, 0, mmUVD_VCPU_NONCACHE_OFFSET0), 0, 0, indirect); WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET( @@ -757,7 +763,7 @@ static void vcn_v2_5_enable_clock_gating(struct amdgpu_device *adev) static int vcn_v2_5_start_dpg_mode(struct amdgpu_device *adev, int inst_idx, bool indirect) { - volatile struct amdgpu_fw_shared *fw_shared = adev->vcn.inst[inst_idx].fw_shared_cpu_addr; + volatile struct amdgpu_fw_shared *fw_shared = adev->vcn.inst[inst_idx].fw_shared.cpu_addr; struct amdgpu_ring *ring; uint32_t rb_bufsz, tmp; @@ -981,7 +987,7 @@ static int vcn_v2_5_start(struct amdgpu_device *adev) vcn_v2_5_mc_resume(adev); for (i = 0; i < adev->vcn.num_vcn_inst; ++i) { - volatile struct amdgpu_fw_shared *fw_shared = adev->vcn.inst[i].fw_shared_cpu_addr; + volatile struct amdgpu_fw_shared *fw_shared = adev->vcn.inst[i].fw_shared.cpu_addr; if (adev->vcn.harvest_config & (1 << i)) continue; /* VCN global tiling registers */ @@ -1403,7 +1409,7 @@ static int vcn_v2_5_pause_dpg_mode(struct amdgpu_device *adev, UVD_POWER_STATUS__UVD_POWER_STATUS_MASK); if (!ret_code) { - volatile struct amdgpu_fw_shared *fw_shared = adev->vcn.inst[inst_idx].fw_shared_cpu_addr; + volatile struct amdgpu_fw_shared *fw_shared = adev->vcn.inst[inst_idx].fw_shared.cpu_addr; /* pause DPG */ reg_data |= UVD_DPG_PAUSE__NJ_PAUSE_DPG_REQ_MASK; @@ -1512,6 +1518,7 @@ static void vcn_v2_5_dec_ring_set_wptr(struct amdgpu_ring *ring) static const struct amdgpu_ring_funcs vcn_v2_5_dec_ring_vm_funcs = { .type = AMDGPU_RING_TYPE_VCN_DEC, .align_mask = 0xf, + .secure_submission_supported = true, .vmhub = AMDGPU_MMHUB_1, .get_rptr = vcn_v2_5_dec_ring_get_rptr, .get_wptr = vcn_v2_5_dec_ring_get_wptr, @@ -1542,6 +1549,7 @@ static const struct amdgpu_ring_funcs vcn_v2_5_dec_ring_vm_funcs = { static const struct amdgpu_ring_funcs vcn_v2_6_dec_ring_vm_funcs = { .type = AMDGPU_RING_TYPE_VCN_DEC, .align_mask = 0xf, + .secure_submission_supported = true, .vmhub = AMDGPU_MMHUB_0, .get_rptr = vcn_v2_5_dec_ring_get_rptr, .get_wptr = vcn_v2_5_dec_ring_get_wptr, @@ -1927,3 +1935,71 @@ const struct amdgpu_ip_block_version vcn_v2_6_ip_block = .rev = 0, .funcs = &vcn_v2_6_ip_funcs, }; + +static uint32_t vcn_v2_6_query_poison_by_instance(struct amdgpu_device *adev, + uint32_t instance, uint32_t sub_block) +{ + uint32_t poison_stat = 0, reg_value = 0; + + switch (sub_block) { + case AMDGPU_VCN_V2_6_VCPU_VCODEC: + reg_value = RREG32_SOC15(VCN, instance, mmUVD_RAS_VCPU_VCODEC_STATUS); + poison_stat = REG_GET_FIELD(reg_value, UVD_RAS_VCPU_VCODEC_STATUS, POISONED_PF); + break; + default: + break; + } + + if (poison_stat) + dev_info(adev->dev, "Poison detected in VCN%d, sub_block%d\n", + instance, sub_block); + + return poison_stat; +} + +static bool vcn_v2_6_query_poison_status(struct amdgpu_device *adev) +{ + uint32_t inst, sub; + uint32_t poison_stat = 0; + + for (inst = 0; inst < adev->vcn.num_vcn_inst; inst++) + for (sub = 0; sub < AMDGPU_VCN_V2_6_MAX_SUB_BLOCK; sub++) + poison_stat += + vcn_v2_6_query_poison_by_instance(adev, inst, sub); + + return !!poison_stat; +} + +const struct amdgpu_ras_block_hw_ops vcn_v2_6_ras_hw_ops = { + .query_poison_status = vcn_v2_6_query_poison_status, +}; + +static struct amdgpu_vcn_ras vcn_v2_6_ras = { + .ras_block = { + .hw_ops = &vcn_v2_6_ras_hw_ops, + }, +}; + +static void vcn_v2_5_set_ras_funcs(struct amdgpu_device *adev) +{ + switch (adev->ip_versions[VCN_HWIP][0]) { + case IP_VERSION(2, 6, 0): + adev->vcn.ras = &vcn_v2_6_ras; + break; + default: + break; + } + + if (adev->vcn.ras) { + amdgpu_ras_register_ras_block(adev, &adev->vcn.ras->ras_block); + + strcpy(adev->vcn.ras->ras_block.ras_comm.name, "vcn"); + adev->vcn.ras->ras_block.ras_comm.block = AMDGPU_RAS_BLOCK__VCN; + adev->vcn.ras->ras_block.ras_comm.type = AMDGPU_RAS_ERROR__POISON; + adev->vcn.ras_if = &adev->vcn.ras->ras_block.ras_comm; + + /* If don't define special ras_late_init function, use default ras_late_init */ + if (!adev->vcn.ras->ras_block.ras_late_init) + adev->vcn.ras->ras_block.ras_late_init = amdgpu_ras_block_late_init; + } +} diff --git a/drivers/gpu/drm/amd/amdgpu/vcn_v2_5.h b/drivers/gpu/drm/amd/amdgpu/vcn_v2_5.h index e72f799ed0fd..1c19af74e4fd 100644 --- a/drivers/gpu/drm/amd/amdgpu/vcn_v2_5.h +++ b/drivers/gpu/drm/amd/amdgpu/vcn_v2_5.h @@ -24,6 +24,12 @@ #ifndef __VCN_V2_5_H__ #define __VCN_V2_5_H__ +enum amdgpu_vcn_v2_6_sub_block { + AMDGPU_VCN_V2_6_VCPU_VCODEC = 0, + + AMDGPU_VCN_V2_6_MAX_SUB_BLOCK, +}; + extern const struct amdgpu_ip_block_version vcn_v2_5_ip_block; extern const struct amdgpu_ip_block_version vcn_v2_6_ip_block; diff --git a/drivers/gpu/drm/amd/amdgpu/vcn_v3_0.c b/drivers/gpu/drm/amd/amdgpu/vcn_v3_0.c index da11ceba0698..cb5f0a12333f 100644 --- a/drivers/gpu/drm/amd/amdgpu/vcn_v3_0.c +++ b/drivers/gpu/drm/amd/amdgpu/vcn_v3_0.c @@ -25,6 +25,7 @@ #include "amdgpu.h" #include "amdgpu_vcn.h" #include "amdgpu_pm.h" +#include "amdgpu_cs.h" #include "soc15.h" #include "soc15d.h" #include "vcn_v2_0.h" @@ -213,11 +214,19 @@ static int vcn_v3_0_sw_init(void *handle) return r; } - fw_shared = adev->vcn.inst[i].fw_shared_cpu_addr; + fw_shared = adev->vcn.inst[i].fw_shared.cpu_addr; fw_shared->present_flag_0 |= cpu_to_le32(AMDGPU_VCN_SW_RING_FLAG) | cpu_to_le32(AMDGPU_VCN_MULTI_QUEUE_FLAG) | cpu_to_le32(AMDGPU_VCN_FW_SHARED_FLAG_0_RB); fw_shared->sw_ring.is_enabled = cpu_to_le32(DEC_SW_RING_ENABLED); + fw_shared->present_flag_0 |= AMDGPU_VCN_SMU_VERSION_INFO_FLAG; + if (adev->ip_versions[UVD_HWIP][0] == IP_VERSION(3, 1, 2)) + fw_shared->smu_interface_info.smu_interface_type = 2; + else if (adev->ip_versions[UVD_HWIP][0] == IP_VERSION(3, 1, 1)) + fw_shared->smu_interface_info.smu_interface_type = 1; + + if (amdgpu_vcnfw_log) + amdgpu_vcn_fwlog_init(&adev->vcn.inst[i]); } if (amdgpu_sriov_vf(adev)) { @@ -249,7 +258,7 @@ static int vcn_v3_0_sw_fini(void *handle) if (adev->vcn.harvest_config & (1 << i)) continue; - fw_shared = adev->vcn.inst[i].fw_shared_cpu_addr; + fw_shared = adev->vcn.inst[i].fw_shared.cpu_addr; fw_shared->present_flag_0 = 0; fw_shared->sw_ring.is_enabled = false; } @@ -295,6 +304,7 @@ static int vcn_v3_0_hw_init(void *handle) ring = &adev->vcn.inst[i].ring_dec; if (amdgpu_vcn_is_disabled_vcn(adev, VCN_DECODE_RING, i)) { ring->sched.ready = false; + ring->no_scheduler = true; dev_info(adev->dev, "ring %s is disabled by hypervisor\n", ring->name); } else { ring->wptr = 0; @@ -307,6 +317,7 @@ static int vcn_v3_0_hw_init(void *handle) ring = &adev->vcn.inst[i].ring_enc[j]; if (amdgpu_vcn_is_disabled_vcn(adev, VCN_ENCODE_RING, i)) { ring->sched.ready = false; + ring->no_scheduler = true; dev_info(adev->dev, "ring %s is disabled by hypervisor\n", ring->name); } else { ring->wptr = 0; @@ -469,9 +480,9 @@ static void vcn_v3_0_mc_resume(struct amdgpu_device *adev, int inst) /* non-cache window */ WREG32_SOC15(VCN, inst, mmUVD_LMI_VCPU_NC0_64BIT_BAR_LOW, - lower_32_bits(adev->vcn.inst[inst].fw_shared_gpu_addr)); + lower_32_bits(adev->vcn.inst[inst].fw_shared.gpu_addr)); WREG32_SOC15(VCN, inst, mmUVD_LMI_VCPU_NC0_64BIT_BAR_HIGH, - upper_32_bits(adev->vcn.inst[inst].fw_shared_gpu_addr)); + upper_32_bits(adev->vcn.inst[inst].fw_shared.gpu_addr)); WREG32_SOC15(VCN, inst, mmUVD_VCPU_NONCACHE_OFFSET0, 0); WREG32_SOC15(VCN, inst, mmUVD_VCPU_NONCACHE_SIZE0, AMDGPU_GPU_PAGE_ALIGN(sizeof(struct amdgpu_fw_shared))); @@ -558,10 +569,10 @@ static void vcn_v3_0_mc_resume_dpg_mode(struct amdgpu_device *adev, int inst_idx /* non-cache window */ WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET( VCN, inst_idx, mmUVD_LMI_VCPU_NC0_64BIT_BAR_LOW), - lower_32_bits(adev->vcn.inst[inst_idx].fw_shared_gpu_addr), 0, indirect); + lower_32_bits(adev->vcn.inst[inst_idx].fw_shared.gpu_addr), 0, indirect); WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET( VCN, inst_idx, mmUVD_LMI_VCPU_NC0_64BIT_BAR_HIGH), - upper_32_bits(adev->vcn.inst[inst_idx].fw_shared_gpu_addr), 0, indirect); + upper_32_bits(adev->vcn.inst[inst_idx].fw_shared.gpu_addr), 0, indirect); WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET( VCN, inst_idx, mmUVD_VCPU_NONCACHE_OFFSET0), 0, 0, indirect); WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET( @@ -569,8 +580,8 @@ static void vcn_v3_0_mc_resume_dpg_mode(struct amdgpu_device *adev, int inst_idx AMDGPU_GPU_PAGE_ALIGN(sizeof(struct amdgpu_fw_shared)), 0, indirect); /* VCN global tiling registers */ - WREG32_SOC15_DPG_MODE(0, SOC15_DPG_MODE_OFFSET( - UVD, 0, mmUVD_GFX10_ADDR_CONFIG), adev->gfx.config.gb_addr_config, 0, indirect); + WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET( + UVD, inst_idx, mmUVD_GFX10_ADDR_CONFIG), adev->gfx.config.gb_addr_config, 0, indirect); } static void vcn_v3_0_disable_static_power_gating(struct amdgpu_device *adev, int inst) @@ -923,7 +934,7 @@ static void vcn_v3_0_enable_clock_gating(struct amdgpu_device *adev, int inst) static int vcn_v3_0_start_dpg_mode(struct amdgpu_device *adev, int inst_idx, bool indirect) { - volatile struct amdgpu_fw_shared *fw_shared = adev->vcn.inst[inst_idx].fw_shared_cpu_addr; + volatile struct amdgpu_fw_shared *fw_shared = adev->vcn.inst[inst_idx].fw_shared.cpu_addr; struct amdgpu_ring *ring; uint32_t rb_bufsz, tmp; @@ -1220,7 +1231,7 @@ static int vcn_v3_0_start(struct amdgpu_device *adev) tmp = REG_SET_FIELD(tmp, UVD_RBC_RB_CNTL, RB_RPTR_WR_EN, 1); WREG32_SOC15(VCN, i, mmUVD_RBC_RB_CNTL, tmp); - fw_shared = adev->vcn.inst[i].fw_shared_cpu_addr; + fw_shared = adev->vcn.inst[i].fw_shared.cpu_addr; fw_shared->multi_queue.decode_queue_mode |= cpu_to_le32(FW_QUEUE_RING_RESET); /* programm the RB_BASE for ring buffer */ @@ -1474,8 +1485,11 @@ static int vcn_v3_0_start_sriov(struct amdgpu_device *adev) static int vcn_v3_0_stop_dpg_mode(struct amdgpu_device *adev, int inst_idx) { + struct dpg_pause_state state = {.fw_based = VCN_DPG_STATE__UNPAUSE}; uint32_t tmp; + vcn_v3_0_pause_dpg_mode(adev, inst_idx, &state); + /* Wait for power status to be 1 */ SOC15_WAIT_ON_RREG(VCN, inst_idx, mmUVD_POWER_STATUS, 1, UVD_POWER_STATUS__UVD_POWER_STATUS_MASK); @@ -1611,7 +1625,7 @@ static int vcn_v3_0_pause_dpg_mode(struct amdgpu_device *adev, if (adev->ip_versions[UVD_HWIP][0] != IP_VERSION(3, 0, 33)) { /* Restore */ - fw_shared = adev->vcn.inst[inst_idx].fw_shared_cpu_addr; + fw_shared = adev->vcn.inst[inst_idx].fw_shared.cpu_addr; fw_shared->multi_queue.encode_generalpurpose_queue_mode |= cpu_to_le32(FW_QUEUE_RING_RESET); ring = &adev->vcn.inst[inst_idx].ring_enc[0]; ring->wptr = 0; @@ -1700,7 +1714,7 @@ static void vcn_v3_0_dec_ring_set_wptr(struct amdgpu_ring *ring) if (adev->pg_flags & AMD_PG_SUPPORT_VCN_DPG) { /*whenever update RBC_RB_WPTR, we save the wptr in shared rb.wptr and scratch2 */ - fw_shared = adev->vcn.inst[ring->me].fw_shared_cpu_addr; + fw_shared = adev->vcn.inst[ring->me].fw_shared.cpu_addr; fw_shared->rb.wptr = lower_32_bits(ring->wptr); WREG32_SOC15(VCN, ring->me, mmUVD_SCRATCH2, lower_32_bits(ring->wptr)); @@ -1780,6 +1794,7 @@ static const struct amdgpu_ring_funcs vcn_v3_0_dec_sw_ring_vm_funcs = { .type = AMDGPU_RING_TYPE_VCN_DEC, .align_mask = 0x3f, .nop = VCN_DEC_SW_CMD_NO_OP, + .secure_submission_supported = true, .vmhub = AMDGPU_MMHUB_0, .get_rptr = vcn_v3_0_dec_ring_get_rptr, .get_wptr = vcn_v3_0_dec_ring_get_wptr, @@ -1806,21 +1821,23 @@ static const struct amdgpu_ring_funcs vcn_v3_0_dec_sw_ring_vm_funcs = { .emit_reg_write_reg_wait = amdgpu_ring_emit_reg_write_reg_wait_helper, }; -static int vcn_v3_0_limit_sched(struct amdgpu_cs_parser *p) +static int vcn_v3_0_limit_sched(struct amdgpu_cs_parser *p, + struct amdgpu_job *job) { struct drm_gpu_scheduler **scheds; /* The create msg must be in the first IB submitted */ - if (atomic_read(&p->entity->fence_seq)) + if (atomic_read(&job->base.entity->fence_seq)) return -EINVAL; scheds = p->adev->gpu_sched[AMDGPU_HW_IP_VCN_DEC] [AMDGPU_RING_PRIO_DEFAULT].sched; - drm_sched_entity_modify_sched(p->entity, scheds, 1); + drm_sched_entity_modify_sched(job->base.entity, scheds, 1); return 0; } -static int vcn_v3_0_dec_msg(struct amdgpu_cs_parser *p, uint64_t addr) +static int vcn_v3_0_dec_msg(struct amdgpu_cs_parser *p, struct amdgpu_job *job, + uint64_t addr) { struct ttm_operation_ctx ctx = { false, false }; struct amdgpu_bo_va_mapping *map; @@ -1891,7 +1908,7 @@ static int vcn_v3_0_dec_msg(struct amdgpu_cs_parser *p, uint64_t addr) if (create[0] == 0x7 || create[0] == 0x10 || create[0] == 0x11) continue; - r = vcn_v3_0_limit_sched(p); + r = vcn_v3_0_limit_sched(p, job); if (r) goto out; } @@ -1902,10 +1919,10 @@ out: } static int vcn_v3_0_ring_patch_cs_in_place(struct amdgpu_cs_parser *p, - uint32_t ib_idx) + struct amdgpu_job *job, + struct amdgpu_ib *ib) { - struct amdgpu_ring *ring = to_amdgpu_ring(p->entity->rq->sched); - struct amdgpu_ib *ib = &p->job->ibs[ib_idx]; + struct amdgpu_ring *ring = to_amdgpu_ring(job->base.sched); uint32_t msg_lo = 0, msg_hi = 0; unsigned i; int r; @@ -1915,8 +1932,8 @@ static int vcn_v3_0_ring_patch_cs_in_place(struct amdgpu_cs_parser *p, return 0; for (i = 0; i < ib->length_dw; i += 2) { - uint32_t reg = amdgpu_get_ib_value(p, ib_idx, i); - uint32_t val = amdgpu_get_ib_value(p, ib_idx, i + 1); + uint32_t reg = amdgpu_ib_get_value(ib, i); + uint32_t val = amdgpu_ib_get_value(ib, i + 1); if (reg == PACKET0(p->adev->vcn.internal.data0, 0)) { msg_lo = val; @@ -1924,7 +1941,8 @@ static int vcn_v3_0_ring_patch_cs_in_place(struct amdgpu_cs_parser *p, msg_hi = val; } else if (reg == PACKET0(p->adev->vcn.internal.cmd, 0) && val == 0) { - r = vcn_v3_0_dec_msg(p, ((u64)msg_hi) << 32 | msg_lo); + r = vcn_v3_0_dec_msg(p, job, + ((u64)msg_hi) << 32 | msg_lo); if (r) return r; } @@ -1935,6 +1953,7 @@ static int vcn_v3_0_ring_patch_cs_in_place(struct amdgpu_cs_parser *p, static const struct amdgpu_ring_funcs vcn_v3_0_dec_ring_vm_funcs = { .type = AMDGPU_RING_TYPE_VCN_DEC, .align_mask = 0xf, + .secure_submission_supported = true, .vmhub = AMDGPU_MMHUB_0, .get_rptr = vcn_v3_0_dec_ring_get_rptr, .get_wptr = vcn_v3_0_dec_ring_get_wptr, diff --git a/drivers/gpu/drm/amd/amdgpu/vi.c b/drivers/gpu/drm/amd/amdgpu/vi.c index 6645ebbd2696..c5b88d15a6df 100644 --- a/drivers/gpu/drm/amd/amdgpu/vi.c +++ b/drivers/gpu/drm/amd/amdgpu/vi.c @@ -1140,7 +1140,7 @@ static void vi_program_aspm(struct amdgpu_device *adev) bool bL1SS = false; bool bClkReqSupport = true; - if (!amdgpu_aspm) + if (!amdgpu_device_should_use_aspm(adev)) return; if (adev->flags & AMD_IS_APU || @@ -2033,7 +2033,7 @@ static int vi_common_set_powergating_state(void *handle, return 0; } -static void vi_common_get_clockgating_state(void *handle, u32 *flags) +static void vi_common_get_clockgating_state(void *handle, u64 *flags) { struct amdgpu_device *adev = (struct amdgpu_device *)handle; int data; |