diff options
Diffstat (limited to 'drivers/gpu/drm/amd')
64 files changed, 1349 insertions, 179 deletions
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h index 0bb08e0e1611..bcc5d40a8d5f 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h @@ -977,6 +977,9 @@ struct amdgpu_device { uint64_t unique_id; uint64_t df_perfmon_config_assign_mask[AMDGPU_MAX_DF_PERFMONS]; + + /* device pstate */ + int pstate; }; static inline struct amdgpu_device *amdgpu_ttm_adev(struct ttm_bo_device *bdev) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_benchmark.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_benchmark.c index 649e68c4479b..d1495e1c9289 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_benchmark.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_benchmark.c @@ -33,7 +33,7 @@ static int amdgpu_benchmark_do_move(struct amdgpu_device *adev, unsigned size, { unsigned long start_jiffies; unsigned long end_jiffies; - struct dma_fence *fence = NULL; + struct dma_fence *fence; int i, r; start_jiffies = jiffies; @@ -44,16 +44,14 @@ static int amdgpu_benchmark_do_move(struct amdgpu_device *adev, unsigned size, if (r) goto exit_do_move; r = dma_fence_wait(fence, false); + dma_fence_put(fence); if (r) goto exit_do_move; - dma_fence_put(fence); } end_jiffies = jiffies; r = jiffies_to_msecs(end_jiffies - start_jiffies); exit_do_move: - if (fence) - dma_fence_put(fence); return r; } diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c index 6614d8a6f4c8..2cdaf3b2a721 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c @@ -604,8 +604,11 @@ void amdgpu_ctx_mgr_entity_fini(struct amdgpu_ctx_mgr *mgr) continue; } - for (i = 0; i < num_entities; i++) + for (i = 0; i < num_entities; i++) { + mutex_lock(&ctx->adev->lock_reset); drm_sched_entity_fini(&ctx->entities[0][i].entity); + mutex_unlock(&ctx->adev->lock_reset); + } } } diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c index 693f17e78791..8e6726e0d035 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c @@ -859,6 +859,9 @@ static int amdgpu_debugfs_test_ib(struct seq_file *m, void *data) struct amdgpu_device *adev = dev->dev_private; int r = 0, i; + /* Avoid accidently unparking the sched thread during GPU reset */ + mutex_lock(&adev->lock_reset); + /* hold on the scheduler */ for (i = 0; i < AMDGPU_MAX_RINGS; i++) { struct amdgpu_ring *ring = adev->rings[i]; @@ -884,6 +887,8 @@ static int amdgpu_debugfs_test_ib(struct seq_file *m, void *data) kthread_unpark(ring->sched.thread); } + mutex_unlock(&adev->lock_reset); + return 0; } @@ -1036,6 +1041,9 @@ static int amdgpu_debugfs_ib_preempt(void *data, u64 val) if (!fences) return -ENOMEM; + /* Avoid accidently unparking the sched thread during GPU reset */ + mutex_lock(&adev->lock_reset); + /* stop the scheduler */ kthread_park(ring->sched.thread); @@ -1075,6 +1083,8 @@ failure: /* restart the scheduler */ kthread_unpark(ring->sched.thread); + mutex_unlock(&adev->lock_reset); + ttm_bo_unlock_delayed_workqueue(&adev->mman.bdev, resched); kfree(fences); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c index d36d2b093539..4f76beafb2fa 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c @@ -2057,6 +2057,7 @@ out: */ static int amdgpu_device_ip_late_init(struct amdgpu_device *adev) { + struct amdgpu_gpu_instance *gpu_instance; int i = 0, r; for (i = 0; i < adev->num_ip_blocks; i++) { @@ -2082,8 +2083,39 @@ static int amdgpu_device_ip_late_init(struct amdgpu_device *adev) if (r) DRM_ERROR("enable mgpu fan boost failed (%d).\n", r); - /* set to low pstate by default */ - amdgpu_xgmi_set_pstate(adev, 0); + + if (adev->gmc.xgmi.num_physical_nodes > 1) { + mutex_lock(&mgpu_info.mutex); + + /* + * Reset device p-state to low as this was booted with high. + * + * This should be performed only after all devices from the same + * hive get initialized. + * + * However, it's unknown how many device in the hive in advance. + * As this is counted one by one during devices initializations. + * + * So, we wait for all XGMI interlinked devices initialized. + * This may bring some delays as those devices may come from + * different hives. But that should be OK. + */ + if (mgpu_info.num_dgpu == adev->gmc.xgmi.num_physical_nodes) { + for (i = 0; i < mgpu_info.num_gpu; i++) { + gpu_instance = &(mgpu_info.gpu_ins[i]); + if (gpu_instance->adev->flags & AMD_IS_APU) + continue; + + r = amdgpu_xgmi_set_pstate(gpu_instance->adev, 0); + if (r) { + DRM_ERROR("pstate setting failed (%d).\n", r); + break; + } + } + } + + mutex_unlock(&mgpu_info.mutex); + } return 0; } @@ -3020,6 +3052,13 @@ fence_driver_init: DRM_INFO("amdgpu: acceleration disabled, skipping benchmarks\n"); } + /* + * Register gpu instance before amdgpu_device_enable_mgpu_fan_boost. + * Otherwise the mgpu fan boost feature will be skipped due to the + * gpu instance is counted less. + */ + amdgpu_register_gpu_instance(adev); + /* enable clockgating, etc. after ib tests, etc. since some blocks require * explicit gating rather than handling it automatically. */ @@ -3070,7 +3109,9 @@ void amdgpu_device_fini(struct amdgpu_device *adev) int r; DRM_INFO("amdgpu: finishing device.\n"); + flush_delayed_work(&adev->delayed_init_work); adev->shutdown = true; + /* disable all interrupts */ amdgpu_irq_disable_all(adev); if (adev->mode_info.mode_config_initialized){ @@ -3088,7 +3129,6 @@ void amdgpu_device_fini(struct amdgpu_device *adev) adev->firmware.gpu_info_fw = NULL; } adev->accel_working = false; - cancel_delayed_work_sync(&adev->delayed_init_work); /* free i2c buses */ if (!amdgpu_device_has_dc_support(adev)) amdgpu_i2c_fini(adev); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_display.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_display.c index d2dd59a95e8a..3cadb0b76f22 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_display.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_display.c @@ -514,7 +514,7 @@ uint32_t amdgpu_display_supported_domains(struct amdgpu_device *adev, * Also, don't allow GTT domain if the BO doens't have USWC falg set. */ if (adev->asic_type >= CHIP_CARRIZO && - adev->asic_type <= CHIP_RAVEN && + adev->asic_type < CHIP_RAVEN && (adev->flags & AMD_IS_APU) && (bo_flags & AMDGPU_GEM_CREATE_CPU_GTT_USWC) && amdgpu_bo_support_uswc(bo_flags) && diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c index 1d6ce018c8f2..0ffc9447b573 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c @@ -998,9 +998,10 @@ static const struct pci_device_id pciidlist[] = { {0x1002, 0x731B, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_NAVI10}, {0x1002, 0x731F, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_NAVI10}, /* Navi14 */ - {0x1002, 0x7340, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_NAVI14|AMD_EXP_HW_SUPPORT}, - {0x1002, 0x7341, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_NAVI14|AMD_EXP_HW_SUPPORT}, - {0x1002, 0x7347, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_NAVI14|AMD_EXP_HW_SUPPORT}, + {0x1002, 0x7340, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_NAVI14}, + {0x1002, 0x7341, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_NAVI14}, + {0x1002, 0x7347, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_NAVI14}, + {0x1002, 0x734F, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_NAVI14}, /* Renoir */ {0x1002, 0x1636, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_RENOIR|AMD_IS_APU|AMD_EXP_HW_SUPPORT}, diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c index c9d1fada6188..e00b46180d2e 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c @@ -454,8 +454,6 @@ void amdgpu_gfx_mqd_sw_fini(struct amdgpu_device *adev) } ring = &adev->gfx.kiq.ring; - if (adev->asic_type >= CHIP_NAVI10 && amdgpu_async_gfx_ring) - kfree(adev->gfx.me.mqd_backup[AMDGPU_MAX_GFX_RINGS]); kfree(adev->gfx.mec.mqd_backup[AMDGPU_MAX_COMPUTE_RINGS]); amdgpu_bo_free_kernel(&ring->mqd_obj, &ring->mqd_gpu_addr, diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h index 459aa9059542..0ae0a2715b0d 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h @@ -225,7 +225,7 @@ struct amdgpu_me { uint32_t num_me; uint32_t num_pipe_per_me; uint32_t num_queue_per_pipe; - void *mqd_backup[AMDGPU_MAX_GFX_RINGS + 1]; + void *mqd_backup[AMDGPU_MAX_GFX_RINGS]; /* These are the resources for which amdgpu takes ownership */ DECLARE_BITMAP(queue_bitmap, AMDGPU_MAX_GFX_QUEUES); @@ -267,6 +267,7 @@ struct amdgpu_gfx { uint32_t mec2_feature_version; bool mec_fw_write_wait; bool me_fw_write_wait; + bool cp_fw_write_wait; struct amdgpu_ring gfx_ring[AMDGPU_MAX_GFX_RINGS]; unsigned num_gfx_rings; struct amdgpu_ring compute_ring[AMDGPU_MAX_COMPUTE_RINGS]; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.h index 555d8e57fae9..b499a3de8bb6 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.h @@ -77,6 +77,7 @@ struct amdgpu_gmc_fault { struct amdgpu_vmhub { uint32_t ctx0_ptb_addr_lo32; uint32_t ctx0_ptb_addr_hi32; + uint32_t vm_inv_eng0_sem; uint32_t vm_inv_eng0_req; uint32_t vm_inv_eng0_ack; uint32_t vm_context0_cntl; @@ -127,18 +128,48 @@ struct amdgpu_xgmi { }; struct amdgpu_gmc { + /* FB's physical address in MMIO space (for CPU to + * map FB). This is different compared to the agp/ + * gart/vram_start/end field as the later is from + * GPU's view and aper_base is from CPU's view. + */ resource_size_t aper_size; resource_size_t aper_base; /* for some chips with <= 32MB we need to lie * about vram size near mc fb location */ u64 mc_vram_size; u64 visible_vram_size; + /* AGP aperture start and end in MC address space + * Driver find a hole in the MC address space + * to place AGP by setting MC_VM_AGP_BOT/TOP registers + * Under VMID0, logical address == MC address. AGP + * aperture maps to physical bus or IOVA addressed. + * AGP aperture is used to simulate FB in ZFB case. + * AGP aperture is also used for page table in system + * memory (mainly for APU). + * + */ u64 agp_size; u64 agp_start; u64 agp_end; + /* GART aperture start and end in MC address space + * Driver find a hole in the MC address space + * to place GART by setting VM_CONTEXT0_PAGE_TABLE_START/END_ADDR + * registers + * Under VMID0, logical address inside GART aperture will + * be translated through gpuvm gart page table to access + * paged system memory + */ u64 gart_size; u64 gart_start; u64 gart_end; + /* Frame buffer aperture of this GPU device. Different from + * fb_start (see below), this only covers the local GPU device. + * Driver get fb_start from MC_VM_FB_LOCATION_BASE (set by vbios) + * and calculate vram_start of this local device by adding an + * offset inside the XGMI hive. + * Under VMID0, logical address == MC address + */ u64 vram_start; u64 vram_end; /* FB region , it's same as local vram region in single GPU, in XGMI diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_irq.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_irq.c index 6f3b03f6224f..30d540d23b77 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_irq.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_irq.c @@ -311,7 +311,7 @@ void amdgpu_irq_fini(struct amdgpu_device *adev) drm_irq_uninstall(adev->ddev); adev->irq.installed = false; if (adev->irq.msi_enabled) - pci_disable_msi(adev->pdev); + pci_free_irq_vectors(adev->pdev); if (!amdgpu_device_has_dc_support(adev)) flush_work(&adev->hotplug_work); } diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c index 137a8573d556..b6db28a570c2 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c @@ -190,7 +190,6 @@ int amdgpu_driver_load_kms(struct drm_device *dev, unsigned long flags) pm_runtime_put_autosuspend(dev->dev); } - amdgpu_register_gpu_instance(adev); out: if (r) { /* balance pm_runtime_get_sync in amdgpu_driver_unload_kms */ @@ -656,15 +655,19 @@ static int amdgpu_info_ioctl(struct drm_device *dev, void *data, struct drm_file return -ENOMEM; alloc_size = info->read_mmr_reg.count * sizeof(*regs); - for (i = 0; i < info->read_mmr_reg.count; i++) + amdgpu_gfx_off_ctrl(adev, false); + for (i = 0; i < info->read_mmr_reg.count; i++) { if (amdgpu_asic_read_register(adev, se_num, sh_num, info->read_mmr_reg.dword_offset + i, ®s[i])) { DRM_DEBUG_KMS("unallowed offset %#x\n", info->read_mmr_reg.dword_offset + i); kfree(regs); + amdgpu_gfx_off_ctrl(adev, true); return -EFAULT; } + } + amdgpu_gfx_off_ctrl(adev, true); n = copy_to_user(out, regs, min(size, alloc_size)); kfree(regs); return n ? -EFAULT : 0; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c index bbe9ac7e843f..2770cba56a6b 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c @@ -567,7 +567,9 @@ static int psp_xgmi_initialize(struct psp_context *psp) struct ta_xgmi_shared_memory *xgmi_cmd; int ret; - if (!psp->adev->psp.ta_fw) + if (!psp->adev->psp.ta_fw || + !psp->adev->psp.ta_xgmi_ucode_size || + !psp->adev->psp.ta_xgmi_start_addr) return -ENOENT; if (!psp->xgmi_context.initialized) { @@ -756,6 +758,12 @@ static int psp_ras_terminate(struct psp_context *psp) { int ret; + /* + * TODO: bypass the terminate in sriov for now + */ + if (amdgpu_sriov_vf(psp->adev)) + return 0; + if (!psp->ras.ras_initialized) return 0; @@ -777,6 +785,18 @@ static int psp_ras_initialize(struct psp_context *psp) { int ret; + /* + * TODO: bypass the initialize in sriov for now + */ + if (amdgpu_sriov_vf(psp->adev)) + return 0; + + if (!psp->adev->psp.ta_ras_ucode_size || + !psp->adev->psp.ta_ras_start_addr) { + dev_warn(psp->adev->dev, "RAS: ras ta ucode is not available\n"); + return 0; + } + if (!psp->ras.ras_initialized) { ret = psp_ras_init_shared_buf(psp); if (ret) @@ -866,6 +886,18 @@ static int psp_hdcp_initialize(struct psp_context *psp) { int ret; + /* + * TODO: bypass the initialize in sriov for now + */ + if (amdgpu_sriov_vf(psp->adev)) + return 0; + + if (!psp->adev->psp.ta_hdcp_ucode_size || + !psp->adev->psp.ta_hdcp_start_addr) { + dev_warn(psp->adev->dev, "HDCP: hdcp ta ucode is not available\n"); + return 0; + } + if (!psp->hdcp_context.hdcp_initialized) { ret = psp_hdcp_init_shared_buf(psp); if (ret) @@ -948,6 +980,12 @@ static int psp_hdcp_terminate(struct psp_context *psp) { int ret; + /* + * TODO: bypass the terminate in sriov for now + */ + if (amdgpu_sriov_vf(psp->adev)) + return 0; + if (!psp->hdcp_context.hdcp_initialized) return 0; @@ -1039,6 +1077,18 @@ static int psp_dtm_initialize(struct psp_context *psp) { int ret; + /* + * TODO: bypass the initialize in sriov for now + */ + if (amdgpu_sriov_vf(psp->adev)) + return 0; + + if (!psp->adev->psp.ta_dtm_ucode_size || + !psp->adev->psp.ta_dtm_start_addr) { + dev_warn(psp->adev->dev, "DTM: dtm ta ucode is not available\n"); + return 0; + } + if (!psp->dtm_context.dtm_initialized) { ret = psp_dtm_init_shared_buf(psp); if (ret) @@ -1091,6 +1141,12 @@ static int psp_dtm_terminate(struct psp_context *psp) { int ret; + /* + * TODO: bypass the terminate in sriov for now + */ + if (amdgpu_sriov_vf(psp->adev)) + return 0; + if (!psp->dtm_context.dtm_initialized) return 0; @@ -1411,7 +1467,10 @@ out: || ucode->ucode_id == AMDGPU_UCODE_ID_SDMA5 || ucode->ucode_id == AMDGPU_UCODE_ID_SDMA6 || ucode->ucode_id == AMDGPU_UCODE_ID_SDMA7 - || ucode->ucode_id == AMDGPU_UCODE_ID_RLC_G)) + || ucode->ucode_id == AMDGPU_UCODE_ID_RLC_G + || ucode->ucode_id == AMDGPU_UCODE_ID_RLC_RESTORE_LIST_CNTL + || ucode->ucode_id == AMDGPU_UCODE_ID_RLC_RESTORE_LIST_GPM_MEM + || ucode->ucode_id == AMDGPU_UCODE_ID_RLC_RESTORE_LIST_SRM_MEM)) /*skip ucode loading in SRIOV VF */ continue; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c index dab90c280476..404483437bd3 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c @@ -220,7 +220,7 @@ static struct ras_manager *amdgpu_ras_find_obj(struct amdgpu_device *adev, * As their names indicate, inject operation will write the * value to the address. * - * Second member: struct ras_debug_if::op. + * The second member: struct ras_debug_if::op. * It has three kinds of operations. * * - 0: disable RAS on the block. Take ::head as its data. @@ -228,14 +228,20 @@ static struct ras_manager *amdgpu_ras_find_obj(struct amdgpu_device *adev, * - 2: inject errors on the block. Take ::inject as its data. * * How to use the interface? - * programs: - * copy the struct ras_debug_if in your codes and initialize it. - * write the struct to the control node. + * + * Programs + * + * Copy the struct ras_debug_if in your codes and initialize it. + * Write the struct to the control node. + * + * Shells * * .. code-block:: bash * * echo op block [error [sub_block address value]] > .../ras/ras_ctrl * + * Parameters: + * * op: disable, enable, inject * disable: only block is needed * enable: block and error are needed @@ -265,8 +271,10 @@ static struct ras_manager *amdgpu_ras_find_obj(struct amdgpu_device *adev, * /sys/class/drm/card[0/1/2...]/device/ras/[gfx/sdma/...]_err_count * * .. note:: - * Operation is only allowed on blocks which are supported. + * Operations are only allowed on blocks which are supported. * Please check ras mask at /sys/module/amdgpu/parameters/ras_mask + * to see which blocks support RAS on a particular asic. + * */ static ssize_t amdgpu_ras_debugfs_ctrl_write(struct file *f, const char __user *buf, size_t size, loff_t *pos) @@ -322,7 +330,7 @@ static ssize_t amdgpu_ras_debugfs_ctrl_write(struct file *f, const char __user * * DOC: AMDGPU RAS debugfs EEPROM table reset interface * * Some boards contain an EEPROM which is used to persistently store a list of - * bad pages containing ECC errors detected in vram. This interface provides + * bad pages which experiences ECC errors in vram. This interface provides * a way to reset the EEPROM, e.g., after testing error injection. * * Usage: @@ -362,7 +370,7 @@ static const struct file_operations amdgpu_ras_debugfs_eeprom_ops = { /** * DOC: AMDGPU RAS sysfs Error Count Interface * - * It allows user to read the error count for each IP block on the gpu through + * It allows the user to read the error count for each IP block on the gpu through * /sys/class/drm/card[0/1/2...]/device/ras/[gfx/sdma/...]_err_count * * It outputs the multiple lines which report the uncorrected (ue) and corrected @@ -1027,6 +1035,24 @@ static int amdgpu_ras_sysfs_remove_all(struct amdgpu_device *adev) } /* sysfs end */ +/** + * DOC: AMDGPU RAS Reboot Behavior for Unrecoverable Errors + * + * Normally when there is an uncorrectable error, the driver will reset + * the GPU to recover. However, in the event of an unrecoverable error, + * the driver provides an interface to reboot the system automatically + * in that event. + * + * The following file in debugfs provides that interface: + * /sys/kernel/debug/dri/[0/1/2...]/ras/auto_reboot + * + * Usage: + * + * .. code-block:: bash + * + * echo true > .../ras/auto_reboot + * + */ /* debugfs begin */ static void amdgpu_ras_debugfs_create_ctrl_node(struct amdgpu_device *adev) { diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_test.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_test.c index b66d29d5ffa2..b158230af8db 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_test.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_test.c @@ -138,6 +138,7 @@ static void amdgpu_do_test_moves(struct amdgpu_device *adev) } dma_fence_put(fence); + fence = NULL; r = amdgpu_bo_kmap(vram_obj, &vram_map); if (r) { @@ -183,6 +184,7 @@ static void amdgpu_do_test_moves(struct amdgpu_device *adev) } dma_fence_put(fence); + fence = NULL; r = amdgpu_bo_kmap(gtt_obj[i], >t_map); if (r) { diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_trace.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_trace.h index f940526c5889..63e734a125fb 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_trace.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_trace.h @@ -473,7 +473,7 @@ TRACE_EVENT(amdgpu_ib_pipe_sync, TP_PROTO(struct amdgpu_job *sched_job, struct dma_fence *fence), TP_ARGS(sched_job, fence), TP_STRUCT__entry( - __string(ring, sched_job->base.sched->name); + __string(ring, sched_job->base.sched->name) __field(uint64_t, id) __field(struct dma_fence *, fence) __field(uint64_t, ctx) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c index d5d916169226..61d9b7774d42 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c @@ -1906,9 +1906,6 @@ void amdgpu_ttm_late_init(struct amdgpu_device *adev) void *stolen_vga_buf; /* return the VGA stolen memory (if any) back to VRAM */ amdgpu_bo_free_kernel(&adev->stolen_vga_memory, NULL, &stolen_vga_buf); - - /* return the IP Discovery TMR memory back to VRAM */ - amdgpu_bo_free_kernel(&adev->discovery_memory, NULL, NULL); } /** @@ -1921,7 +1918,10 @@ void amdgpu_ttm_fini(struct amdgpu_device *adev) amdgpu_ttm_debugfs_fini(adev); amdgpu_ttm_training_reserve_vram_fini(adev); + /* return the IP Discovery TMR memory back to VRAM */ + amdgpu_bo_free_kernel(&adev->discovery_memory, NULL, NULL); amdgpu_ttm_fw_reserve_vram_fini(adev); + if (adev->mman.aper_base_kaddr) iounmap(adev->mman.aper_base_kaddr); adev->mman.aper_base_kaddr = NULL; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c index b4dd89af6f09..e324bfe6c58f 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c @@ -299,6 +299,7 @@ int amdgpu_uvd_sw_fini(struct amdgpu_device *adev) { int i, j; + cancel_delayed_work_sync(&adev->uvd.idle_work); drm_sched_entity_destroy(&adev->uvd.entity); for (j = 0; j < adev->uvd.num_uvd_inst; ++j) { diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vce.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vce.c index 703677f2ff6f..46b590af2fd2 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vce.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vce.c @@ -216,6 +216,7 @@ int amdgpu_vce_sw_fini(struct amdgpu_device *adev) if (adev->vce.vcpu_bo == NULL) return 0; + cancel_delayed_work_sync(&adev->vce.idle_work); drm_sched_entity_destroy(&adev->vce.entity); amdgpu_bo_free_kernel(&adev->vce.vcpu_bo, &adev->vce.gpu_addr, diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.c index 3199e4a5ff12..9d870444d7d6 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.c @@ -193,6 +193,8 @@ int amdgpu_vcn_sw_fini(struct amdgpu_device *adev) { int i, j; + cancel_delayed_work_sync(&adev->vcn.idle_work); + if (adev->vcn.indirect_sram) { amdgpu_bo_free_kernel(&adev->vcn.dpg_sram_bo, &adev->vcn.dpg_sram_gpu_addr, diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c index e775f271f1ed..598c24505c73 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c @@ -1418,6 +1418,9 @@ static int amdgpu_vm_update_ptes(struct amdgpu_vm_update_params *params, uint64_t incr, entry_end, pe_start; struct amdgpu_bo *pt; + /* make sure that the page tables covering the address range are + * actually allocated + */ r = amdgpu_vm_alloc_pts(params->adev, params->vm, &cursor, params->direct); if (r) @@ -1491,7 +1494,12 @@ static int amdgpu_vm_update_ptes(struct amdgpu_vm_update_params *params, } while (frag_start < entry_end); if (amdgpu_vm_pt_descendant(adev, &cursor)) { - /* Free all child entries */ + /* Free all child entries. + * Update the tables with the flags and addresses and free up subsequent + * tables in the case of huge pages or freed up areas. + * This is the maximum you can free, because all other page tables are not + * completely covered by the range and so potentially still in use. + */ while (cursor.pfn < frag_start) { amdgpu_vm_free_pts(adev, params->vm, &cursor); amdgpu_vm_pt_next(adev, &cursor); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.c index 00371713c671..61d13d8b7b20 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.c @@ -274,22 +274,55 @@ int amdgpu_xgmi_set_pstate(struct amdgpu_device *adev, int pstate) { int ret = 0; struct amdgpu_hive_info *hive = amdgpu_get_xgmi_hive(adev, 0); + struct amdgpu_device *tmp_adev; + bool update_hive_pstate = true; + bool is_high_pstate = pstate && adev->asic_type == CHIP_VEGA20; if (!hive) return 0; - if (hive->pstate == pstate) - return 0; + mutex_lock(&hive->hive_lock); + + if (hive->pstate == pstate) { + adev->pstate = is_high_pstate ? pstate : adev->pstate; + goto out; + } dev_dbg(adev->dev, "Set xgmi pstate %d.\n", pstate); if (is_support_sw_smu_xgmi(adev)) ret = smu_set_xgmi_pstate(&adev->smu, pstate); - if (ret) + else if (adev->powerplay.pp_funcs && + adev->powerplay.pp_funcs->set_xgmi_pstate) + ret = adev->powerplay.pp_funcs->set_xgmi_pstate(adev->powerplay.pp_handle, + pstate); + + if (ret) { dev_err(adev->dev, "XGMI: Set pstate failure on device %llx, hive %llx, ret %d", adev->gmc.xgmi.node_id, adev->gmc.xgmi.hive_id, ret); + goto out; + } + + /* Update device pstate */ + adev->pstate = pstate; + + /* + * Update the hive pstate only all devices of the hive + * are in the same pstate + */ + list_for_each_entry(tmp_adev, &hive->device_list, gmc.xgmi.head) { + if (tmp_adev->pstate != adev->pstate) { + update_hive_pstate = false; + break; + } + } + if (update_hive_pstate || is_high_pstate) + hive->pstate = pstate; + +out: + mutex_unlock(&hive->hive_lock); return ret; } @@ -364,6 +397,9 @@ int amdgpu_xgmi_add_device(struct amdgpu_device *adev) goto exit; } + /* Set default device pstate */ + adev->pstate = -1; + top_info = &adev->psp.xgmi_context.top_info; list_add_tail(&adev->gmc.xgmi.head, &hive->device_list); diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c index e7bab4f094b3..ca5f0e7ea1ac 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c @@ -564,6 +564,32 @@ static void gfx_v10_0_free_microcode(struct amdgpu_device *adev) kfree(adev->gfx.rlc.register_list_format); } +static void gfx_v10_0_check_fw_write_wait(struct amdgpu_device *adev) +{ + adev->gfx.cp_fw_write_wait = false; + + switch (adev->asic_type) { + case CHIP_NAVI10: + case CHIP_NAVI12: + case CHIP_NAVI14: + if ((adev->gfx.me_fw_version >= 0x00000046) && + (adev->gfx.me_feature_version >= 27) && + (adev->gfx.pfp_fw_version >= 0x00000068) && + (adev->gfx.pfp_feature_version >= 27) && + (adev->gfx.mec_fw_version >= 0x0000005b) && + (adev->gfx.mec_feature_version >= 27)) + adev->gfx.cp_fw_write_wait = true; + break; + default: + break; + } + + if (adev->gfx.cp_fw_write_wait == false) + DRM_WARN_ONCE("Warning: check cp_fw_version and update it to realize \ + GRBM requires 1-cycle delay in cp firmware\n"); +} + + static void gfx_v10_0_init_rlc_ext_microcode(struct amdgpu_device *adev) { const struct rlc_firmware_header_v2_1 *rlc_hdr; @@ -832,6 +858,7 @@ static int gfx_v10_0_init_microcode(struct amdgpu_device *adev) } } + gfx_v10_0_check_fw_write_wait(adev); out: if (err) { dev_err(adev->dev, @@ -1758,27 +1785,52 @@ static void gfx_v10_0_enable_gui_idle_interrupt(struct amdgpu_device *adev, WREG32_SOC15(GC, 0, mmCP_INT_CNTL_RING0, tmp); } -static void gfx_v10_0_init_csb(struct amdgpu_device *adev) +static int gfx_v10_0_init_csb(struct amdgpu_device *adev) { + int r; + + if (adev->in_gpu_reset) { + r = amdgpu_bo_reserve(adev->gfx.rlc.clear_state_obj, false); + if (r) + return r; + + r = amdgpu_bo_kmap(adev->gfx.rlc.clear_state_obj, + (void **)&adev->gfx.rlc.cs_ptr); + if (!r) { + adev->gfx.rlc.funcs->get_csb_buffer(adev, + adev->gfx.rlc.cs_ptr); + amdgpu_bo_kunmap(adev->gfx.rlc.clear_state_obj); + } + + amdgpu_bo_unreserve(adev->gfx.rlc.clear_state_obj); + if (r) + return r; + } + /* csib */ WREG32_SOC15(GC, 0, mmRLC_CSIB_ADDR_HI, adev->gfx.rlc.clear_state_gpu_addr >> 32); WREG32_SOC15(GC, 0, mmRLC_CSIB_ADDR_LO, adev->gfx.rlc.clear_state_gpu_addr & 0xfffffffc); WREG32_SOC15(GC, 0, mmRLC_CSIB_LENGTH, adev->gfx.rlc.clear_state_size); + + return 0; } -static void gfx_v10_0_init_pg(struct amdgpu_device *adev) +static int gfx_v10_0_init_pg(struct amdgpu_device *adev) { int i; + int r; - gfx_v10_0_init_csb(adev); + r = gfx_v10_0_init_csb(adev); + if (r) + return r; for (i = 0; i < adev->num_vmhubs; i++) amdgpu_gmc_flush_gpu_tlb(adev, 0, i, 0); /* TODO: init power gating */ - return; + return 0; } void gfx_v10_0_rlc_stop(struct amdgpu_device *adev) @@ -1880,7 +1932,10 @@ static int gfx_v10_0_rlc_resume(struct amdgpu_device *adev) r = gfx_v10_0_wait_for_rlc_autoload_complete(adev); if (r) return r; - gfx_v10_0_init_pg(adev); + + r = gfx_v10_0_init_pg(adev); + if (r) + return r; /* enable RLC SRM */ gfx_v10_0_rlc_enable_srm(adev); @@ -1906,7 +1961,10 @@ static int gfx_v10_0_rlc_resume(struct amdgpu_device *adev) return r; } - gfx_v10_0_init_pg(adev); + r = gfx_v10_0_init_pg(adev); + if (r) + return r; + adev->gfx.rlc.funcs->start(adev); if (adev->firmware.load_type == AMDGPU_FW_LOAD_RLC_BACKDOOR_AUTO) { @@ -2373,7 +2431,7 @@ static int gfx_v10_0_wait_for_rlc_autoload_complete(struct amdgpu_device *adev) return 0; } -static void gfx_v10_0_cp_gfx_enable(struct amdgpu_device *adev, bool enable) +static int gfx_v10_0_cp_gfx_enable(struct amdgpu_device *adev, bool enable) { int i; u32 tmp = RREG32_SOC15(GC, 0, mmCP_ME_CNTL); @@ -2386,7 +2444,17 @@ static void gfx_v10_0_cp_gfx_enable(struct amdgpu_device *adev, bool enable) adev->gfx.gfx_ring[i].sched.ready = false; } WREG32_SOC15(GC, 0, mmCP_ME_CNTL, tmp); - udelay(50); + + for (i = 0; i < adev->usec_timeout; i++) { + if (RREG32_SOC15(GC, 0, mmCP_STAT) == 0) + break; + udelay(1); + } + + if (i >= adev->usec_timeout) + DRM_ERROR("failed to %s cp gfx\n", enable ? "unhalt" : "halt"); + + return 0; } static int gfx_v10_0_cp_gfx_load_pfp_microcode(struct amdgpu_device *adev) @@ -3087,6 +3155,7 @@ static int gfx_v10_0_gfx_init_queue(struct amdgpu_ring *ring) { struct amdgpu_device *adev = ring->adev; struct v10_gfx_mqd *mqd = ring->mqd_ptr; + int mqd_idx = ring - &adev->gfx.gfx_ring[0]; if (!adev->in_gpu_reset && !adev->in_suspend) { memset((void *)mqd, 0, sizeof(*mqd)); @@ -3098,12 +3167,12 @@ static int gfx_v10_0_gfx_init_queue(struct amdgpu_ring *ring) #endif nv_grbm_select(adev, 0, 0, 0, 0); mutex_unlock(&adev->srbm_mutex); - if (adev->gfx.me.mqd_backup[AMDGPU_MAX_GFX_RINGS]) - memcpy(adev->gfx.me.mqd_backup[AMDGPU_MAX_GFX_RINGS], mqd, sizeof(*mqd)); + if (adev->gfx.me.mqd_backup[mqd_idx]) + memcpy(adev->gfx.me.mqd_backup[mqd_idx], mqd, sizeof(*mqd)); } else if (adev->in_gpu_reset) { /* reset mqd with the backup copy */ - if (adev->gfx.me.mqd_backup[AMDGPU_MAX_GFX_RINGS]) - memcpy(mqd, adev->gfx.me.mqd_backup[AMDGPU_MAX_GFX_RINGS], sizeof(*mqd)); + if (adev->gfx.me.mqd_backup[mqd_idx]) + memcpy(mqd, adev->gfx.me.mqd_backup[mqd_idx], sizeof(*mqd)); /* reset the ring */ ring->wptr = 0; adev->wb.wb[ring->wptr_offs] = 0; @@ -4766,6 +4835,24 @@ static void gfx_v10_0_ring_emit_reg_wait(struct amdgpu_ring *ring, uint32_t reg, gfx_v10_0_wait_reg_mem(ring, 0, 0, 0, reg, 0, val, mask, 0x20); } +static void gfx_v10_0_ring_emit_reg_write_reg_wait(struct amdgpu_ring *ring, + uint32_t reg0, uint32_t reg1, + uint32_t ref, uint32_t mask) +{ + int usepfp = (ring->funcs->type == AMDGPU_RING_TYPE_GFX); + struct amdgpu_device *adev = ring->adev; + bool fw_version_ok = false; + + fw_version_ok = adev->gfx.cp_fw_write_wait; + + if (fw_version_ok) + gfx_v10_0_wait_reg_mem(ring, usepfp, 0, 1, reg0, reg1, + ref, mask, 0x20); + else + amdgpu_ring_emit_reg_write_reg_wait_helper(ring, reg0, reg1, + ref, mask); +} + static void gfx_v10_0_set_gfx_eop_interrupt_state(struct amdgpu_device *adev, uint32_t me, uint32_t pipe, @@ -5156,6 +5243,7 @@ static const struct amdgpu_ring_funcs gfx_v10_0_ring_funcs_gfx = { .emit_tmz = gfx_v10_0_ring_emit_tmz, .emit_wreg = gfx_v10_0_ring_emit_wreg, .emit_reg_wait = gfx_v10_0_ring_emit_reg_wait, + .emit_reg_write_reg_wait = gfx_v10_0_ring_emit_reg_write_reg_wait, }; static const struct amdgpu_ring_funcs gfx_v10_0_ring_funcs_compute = { @@ -5189,6 +5277,7 @@ static const struct amdgpu_ring_funcs gfx_v10_0_ring_funcs_compute = { .pad_ib = amdgpu_ring_generic_pad_ib, .emit_wreg = gfx_v10_0_ring_emit_wreg, .emit_reg_wait = gfx_v10_0_ring_emit_reg_wait, + .emit_reg_write_reg_wait = gfx_v10_0_ring_emit_reg_write_reg_wait, }; static const struct amdgpu_ring_funcs gfx_v10_0_ring_funcs_kiq = { @@ -5219,6 +5308,7 @@ static const struct amdgpu_ring_funcs gfx_v10_0_ring_funcs_kiq = { .emit_rreg = gfx_v10_0_ring_emit_rreg, .emit_wreg = gfx_v10_0_ring_emit_wreg, .emit_reg_wait = gfx_v10_0_ring_emit_reg_wait, + .emit_reg_write_reg_wait = gfx_v10_0_ring_emit_reg_write_reg_wait, }; static void gfx_v10_0_set_ring_funcs(struct amdgpu_device *adev) diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c index 39297baedfb4..faf2ffce5837 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c @@ -704,6 +704,7 @@ static const struct soc15_reg_golden golden_settings_gc_9_4_1_arct[] = SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_4_ARCT, 0x3fffffff, 0xb90f5b1), SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_5_ARCT, 0x3ff, 0x135), SOC15_REG_GOLDEN_VALUE(GC, 0, mmSQ_CONFIG, 0xffffffff, 0x011A0000), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmSQ_FIFO_SIZES, 0xffffffff, 0x00000f00), }; static const u32 GFX_RLC_SRM_INDEX_CNTL_ADDR_OFFSETS[] = @@ -986,6 +987,13 @@ static void gfx_v9_0_check_fw_write_wait(struct amdgpu_device *adev) adev->gfx.me_fw_write_wait = false; adev->gfx.mec_fw_write_wait = false; + if ((adev->gfx.mec_fw_version < 0x000001a5) || + (adev->gfx.mec_feature_version < 46) || + (adev->gfx.pfp_fw_version < 0x000000b7) || + (adev->gfx.pfp_feature_version < 46)) + DRM_WARN_ONCE("Warning: check cp_fw_version and update it to realize \ + GRBM requires 1-cycle delay in cp firmware\n"); + switch (adev->asic_type) { case CHIP_VEGA10: if ((adev->gfx.me_fw_version >= 0x0000009c) && @@ -1044,8 +1052,13 @@ static void gfx_v9_0_check_if_need_gfxoff(struct amdgpu_device *adev) case CHIP_VEGA20: break; case CHIP_RAVEN: - if (!(adev->rev_id >= 0x8 || adev->pdev->device == 0x15d8) - &&((adev->gfx.rlc_fw_version != 106 && + /* Disable GFXOFF on original raven. There are combinations + * of sbios and platforms that are not stable. + */ + if (!(adev->rev_id >= 0x8 || adev->pdev->device == 0x15d8)) + adev->pm.pp_feature &= ~PP_GFXOFF_MASK; + else if (!(adev->rev_id >= 0x8 || adev->pdev->device == 0x15d8) + &&((adev->gfx.rlc_fw_version != 106 && adev->gfx.rlc_fw_version < 531) || (adev->gfx.rlc_fw_version == 53815) || (adev->gfx.rlc_feature_version < 1) || @@ -1057,6 +1070,12 @@ static void gfx_v9_0_check_if_need_gfxoff(struct amdgpu_device *adev) AMD_PG_SUPPORT_CP | AMD_PG_SUPPORT_RLC_SMU_HS; break; + case CHIP_RENOIR: + if (adev->pm.pp_feature & PP_GFXOFF_MASK) + adev->pg_flags |= AMD_PG_SUPPORT_GFX_PG | + AMD_PG_SUPPORT_CP | + AMD_PG_SUPPORT_RLC_SMU_HS; + break; default: break; } @@ -2725,7 +2744,10 @@ static void gfx_v9_0_init_pg(struct amdgpu_device *adev) * And it's needed by gfxoff feature. */ if (adev->gfx.rlc.is_rlc_v2_1) { - gfx_v9_1_init_rlc_save_restore_list(adev); + if (adev->asic_type == CHIP_VEGA12 || + (adev->asic_type == CHIP_RAVEN && + adev->rev_id >= 8)) + gfx_v9_1_init_rlc_save_restore_list(adev); gfx_v9_0_enable_save_restore_machine(adev); } @@ -3876,9 +3898,22 @@ static uint64_t gfx_v9_0_get_gpu_clock_counter(struct amdgpu_device *adev) uint64_t clock; mutex_lock(&adev->gfx.gpu_clock_mutex); - WREG32_SOC15(GC, 0, mmRLC_CAPTURE_GPU_CLOCK_COUNT, 1); - clock = (uint64_t)RREG32_SOC15(GC, 0, mmRLC_GPU_CLOCK_COUNT_LSB) | - ((uint64_t)RREG32_SOC15(GC, 0, mmRLC_GPU_CLOCK_COUNT_MSB) << 32ULL); + if (adev->asic_type == CHIP_VEGA10 && amdgpu_sriov_runtime(adev)) { + uint32_t tmp, lsb, msb, i = 0; + do { + if (i != 0) + udelay(1); + tmp = RREG32_SOC15(GC, 0, mmRLC_REFCLOCK_TIMESTAMP_MSB); + lsb = RREG32_SOC15(GC, 0, mmRLC_REFCLOCK_TIMESTAMP_LSB); + msb = RREG32_SOC15(GC, 0, mmRLC_REFCLOCK_TIMESTAMP_MSB); + i++; + } while (unlikely(tmp != msb) && (i < adev->usec_timeout)); + clock = (uint64_t)lsb | ((uint64_t)msb << 32ULL); + } else { + WREG32_SOC15(GC, 0, mmRLC_CAPTURE_GPU_CLOCK_COUNT, 1); + clock = (uint64_t)RREG32_SOC15(GC, 0, mmRLC_GPU_CLOCK_COUNT_LSB) | + ((uint64_t)RREG32_SOC15(GC, 0, mmRLC_GPU_CLOCK_COUNT_MSB) << 32ULL); + } mutex_unlock(&adev->gfx.gpu_clock_mutex); return clock; } diff --git a/drivers/gpu/drm/amd/amdgpu/gfxhub_v1_0.c b/drivers/gpu/drm/amd/amdgpu/gfxhub_v1_0.c index 9ec4297e61e5..e91bd7945777 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfxhub_v1_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfxhub_v1_0.c @@ -367,6 +367,8 @@ void gfxhub_v1_0_init(struct amdgpu_device *adev) hub->ctx0_ptb_addr_hi32 = SOC15_REG_OFFSET(GC, 0, mmVM_CONTEXT0_PAGE_TABLE_BASE_ADDR_HI32); + hub->vm_inv_eng0_sem = + SOC15_REG_OFFSET(GC, 0, mmVM_INVALIDATE_ENG0_SEM); hub->vm_inv_eng0_req = SOC15_REG_OFFSET(GC, 0, mmVM_INVALIDATE_ENG0_REQ); hub->vm_inv_eng0_ack = diff --git a/drivers/gpu/drm/amd/amdgpu/gfxhub_v2_0.c b/drivers/gpu/drm/amd/amdgpu/gfxhub_v2_0.c index b4f32d853ca1..b70c7b483c24 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfxhub_v2_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfxhub_v2_0.c @@ -356,6 +356,8 @@ void gfxhub_v2_0_init(struct amdgpu_device *adev) hub->ctx0_ptb_addr_hi32 = SOC15_REG_OFFSET(GC, 0, mmGCVM_CONTEXT0_PAGE_TABLE_BASE_ADDR_HI32); + hub->vm_inv_eng0_sem = + SOC15_REG_OFFSET(GC, 0, mmGCVM_INVALIDATE_ENG0_SEM); hub->vm_inv_eng0_req = SOC15_REG_OFFSET(GC, 0, mmGCVM_INVALIDATE_ENG0_REQ); hub->vm_inv_eng0_ack = diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c index 6e1b25bd1fe7..321f8a997be8 100644 --- a/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c @@ -235,6 +235,29 @@ static void gmc_v10_0_flush_vm_hub(struct amdgpu_device *adev, uint32_t vmid, const unsigned eng = 17; unsigned int i; + spin_lock(&adev->gmc.invalidate_lock); + /* + * It may lose gpuvm invalidate acknowldege state across power-gating + * off cycle, add semaphore acquire before invalidation and semaphore + * release after invalidation to avoid entering power gated state + * to WA the Issue + */ + + /* TODO: It needs to continue working on debugging with semaphore for GFXHUB as well. */ + if (vmhub == AMDGPU_MMHUB_0 || + vmhub == AMDGPU_MMHUB_1) { + for (i = 0; i < adev->usec_timeout; i++) { + /* a read return value of 1 means semaphore acuqire */ + tmp = RREG32_NO_KIQ(hub->vm_inv_eng0_sem + eng); + if (tmp & 0x1) + break; + udelay(1); + } + + if (i >= adev->usec_timeout) + DRM_ERROR("Timeout waiting for sem acquire in VM flush!\n"); + } + WREG32_NO_KIQ(hub->vm_inv_eng0_req + eng, tmp); /* @@ -254,6 +277,17 @@ static void gmc_v10_0_flush_vm_hub(struct amdgpu_device *adev, uint32_t vmid, udelay(1); } + /* TODO: It needs to continue working on debugging with semaphore for GFXHUB as well. */ + if (vmhub == AMDGPU_MMHUB_0 || + vmhub == AMDGPU_MMHUB_1) + /* + * add semaphore release after invalidation, + * write with 0 means semaphore release + */ + WREG32_NO_KIQ(hub->vm_inv_eng0_sem + eng, 0); + + spin_unlock(&adev->gmc.invalidate_lock); + if (i < adev->usec_timeout) return; @@ -338,17 +372,38 @@ static uint64_t gmc_v10_0_emit_flush_gpu_tlb(struct amdgpu_ring *ring, uint32_t req = gmc_v10_0_get_invalidate_req(vmid, 0); unsigned eng = ring->vm_inv_eng; + /* + * It may lose gpuvm invalidate acknowldege state across power-gating + * off cycle, add semaphore acquire before invalidation and semaphore + * release after invalidation to avoid entering power gated state + * to WA the Issue + */ + + /* TODO: It needs to continue working on debugging with semaphore for GFXHUB as well. */ + if (ring->funcs->vmhub == AMDGPU_MMHUB_0 || + ring->funcs->vmhub == AMDGPU_MMHUB_1) + /* a read return value of 1 means semaphore acuqire */ + amdgpu_ring_emit_reg_wait(ring, + hub->vm_inv_eng0_sem + eng, 0x1, 0x1); + amdgpu_ring_emit_wreg(ring, hub->ctx0_ptb_addr_lo32 + (2 * vmid), lower_32_bits(pd_addr)); amdgpu_ring_emit_wreg(ring, hub->ctx0_ptb_addr_hi32 + (2 * vmid), upper_32_bits(pd_addr)); - amdgpu_ring_emit_wreg(ring, hub->vm_inv_eng0_req + eng, req); + amdgpu_ring_emit_reg_write_reg_wait(ring, hub->vm_inv_eng0_req + eng, + hub->vm_inv_eng0_ack + eng, + req, 1 << vmid); - /* wait for the invalidate to complete */ - amdgpu_ring_emit_reg_wait(ring, hub->vm_inv_eng0_ack + eng, - 1 << vmid, 1 << vmid); + /* TODO: It needs to continue working on debugging with semaphore for GFXHUB as well. */ + if (ring->funcs->vmhub == AMDGPU_MMHUB_0 || + ring->funcs->vmhub == AMDGPU_MMHUB_1) + /* + * add semaphore release after invalidation, + * write with 0 means semaphore release + */ + amdgpu_ring_emit_wreg(ring, hub->vm_inv_eng0_sem + eng, 0); return pd_addr; } diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c index 9f2a893871ec..3c355fb5d2b4 100644 --- a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c @@ -459,6 +459,29 @@ static void gmc_v9_0_flush_gpu_tlb(struct amdgpu_device *adev, uint32_t vmid, } spin_lock(&adev->gmc.invalidate_lock); + + /* + * It may lose gpuvm invalidate acknowldege state across power-gating + * off cycle, add semaphore acquire before invalidation and semaphore + * release after invalidation to avoid entering power gated state + * to WA the Issue + */ + + /* TODO: It needs to continue working on debugging with semaphore for GFXHUB as well. */ + if (vmhub == AMDGPU_MMHUB_0 || + vmhub == AMDGPU_MMHUB_1) { + for (j = 0; j < adev->usec_timeout; j++) { + /* a read return value of 1 means semaphore acuqire */ + tmp = RREG32_NO_KIQ(hub->vm_inv_eng0_sem + eng); + if (tmp & 0x1) + break; + udelay(1); + } + + if (j >= adev->usec_timeout) + DRM_ERROR("Timeout waiting for sem acquire in VM flush!\n"); + } + WREG32_NO_KIQ(hub->vm_inv_eng0_req + eng, tmp); /* @@ -474,7 +497,18 @@ static void gmc_v9_0_flush_gpu_tlb(struct amdgpu_device *adev, uint32_t vmid, break; udelay(1); } + + /* TODO: It needs to continue working on debugging with semaphore for GFXHUB as well. */ + if (vmhub == AMDGPU_MMHUB_0 || + vmhub == AMDGPU_MMHUB_1) + /* + * add semaphore release after invalidation, + * write with 0 means semaphore release + */ + WREG32_NO_KIQ(hub->vm_inv_eng0_sem + eng, 0); + spin_unlock(&adev->gmc.invalidate_lock); + if (j < adev->usec_timeout) return; @@ -489,6 +523,20 @@ static uint64_t gmc_v9_0_emit_flush_gpu_tlb(struct amdgpu_ring *ring, uint32_t req = gmc_v9_0_get_invalidate_req(vmid, 0); unsigned eng = ring->vm_inv_eng; + /* + * It may lose gpuvm invalidate acknowldege state across power-gating + * off cycle, add semaphore acquire before invalidation and semaphore + * release after invalidation to avoid entering power gated state + * to WA the Issue + */ + + /* TODO: It needs to continue working on debugging with semaphore for GFXHUB as well. */ + if (ring->funcs->vmhub == AMDGPU_MMHUB_0 || + ring->funcs->vmhub == AMDGPU_MMHUB_1) + /* a read return value of 1 means semaphore acuqire */ + amdgpu_ring_emit_reg_wait(ring, + hub->vm_inv_eng0_sem + eng, 0x1, 0x1); + amdgpu_ring_emit_wreg(ring, hub->ctx0_ptb_addr_lo32 + (2 * vmid), lower_32_bits(pd_addr)); @@ -499,6 +547,15 @@ static uint64_t gmc_v9_0_emit_flush_gpu_tlb(struct amdgpu_ring *ring, hub->vm_inv_eng0_ack + eng, req, 1 << vmid); + /* TODO: It needs to continue working on debugging with semaphore for GFXHUB as well. */ + if (ring->funcs->vmhub == AMDGPU_MMHUB_0 || + ring->funcs->vmhub == AMDGPU_MMHUB_1) + /* + * add semaphore release after invalidation, + * write with 0 means semaphore release + */ + amdgpu_ring_emit_wreg(ring, hub->vm_inv_eng0_sem + eng, 0); + return pd_addr; } diff --git a/drivers/gpu/drm/amd/amdgpu/mmhub_v1_0.c b/drivers/gpu/drm/amd/amdgpu/mmhub_v1_0.c index 6965e1e6fa9e..28105e4af507 100644 --- a/drivers/gpu/drm/amd/amdgpu/mmhub_v1_0.c +++ b/drivers/gpu/drm/amd/amdgpu/mmhub_v1_0.c @@ -420,6 +420,8 @@ void mmhub_v1_0_init(struct amdgpu_device *adev) hub->ctx0_ptb_addr_hi32 = SOC15_REG_OFFSET(MMHUB, 0, mmVM_CONTEXT0_PAGE_TABLE_BASE_ADDR_HI32); + hub->vm_inv_eng0_sem = + SOC15_REG_OFFSET(MMHUB, 0, mmVM_INVALIDATE_ENG0_SEM); hub->vm_inv_eng0_req = SOC15_REG_OFFSET(MMHUB, 0, mmVM_INVALIDATE_ENG0_REQ); hub->vm_inv_eng0_ack = diff --git a/drivers/gpu/drm/amd/amdgpu/mmhub_v2_0.c b/drivers/gpu/drm/amd/amdgpu/mmhub_v2_0.c index 945533634711..a7cb185d639a 100644 --- a/drivers/gpu/drm/amd/amdgpu/mmhub_v2_0.c +++ b/drivers/gpu/drm/amd/amdgpu/mmhub_v2_0.c @@ -348,6 +348,8 @@ void mmhub_v2_0_init(struct amdgpu_device *adev) hub->ctx0_ptb_addr_hi32 = SOC15_REG_OFFSET(MMHUB, 0, mmMMVM_CONTEXT0_PAGE_TABLE_BASE_ADDR_HI32); + hub->vm_inv_eng0_sem = + SOC15_REG_OFFSET(MMHUB, 0, mmMMVM_INVALIDATE_ENG0_SEM); hub->vm_inv_eng0_req = SOC15_REG_OFFSET(MMHUB, 0, mmMMVM_INVALIDATE_ENG0_REQ); hub->vm_inv_eng0_ack = diff --git a/drivers/gpu/drm/amd/amdgpu/mmhub_v9_4.c b/drivers/gpu/drm/amd/amdgpu/mmhub_v9_4.c index 657970f9ebfb..66efe2f7bd76 100644 --- a/drivers/gpu/drm/amd/amdgpu/mmhub_v9_4.c +++ b/drivers/gpu/drm/amd/amdgpu/mmhub_v9_4.c @@ -219,6 +219,15 @@ static void mmhub_v9_4_init_cache_regs(struct amdgpu_device *adev, int hubid) hubid * MMHUB_INSTANCE_REGISTER_OFFSET, tmp); tmp = mmVML2PF0_VM_L2_CNTL3_DEFAULT; + if (adev->gmc.translate_further) { + tmp = REG_SET_FIELD(tmp, VML2PF0_VM_L2_CNTL3, BANK_SELECT, 12); + tmp = REG_SET_FIELD(tmp, VML2PF0_VM_L2_CNTL3, + L2_CACHE_BIGK_FRAGMENT_SIZE, 9); + } else { + tmp = REG_SET_FIELD(tmp, VML2PF0_VM_L2_CNTL3, BANK_SELECT, 9); + tmp = REG_SET_FIELD(tmp, VML2PF0_VM_L2_CNTL3, + L2_CACHE_BIGK_FRAGMENT_SIZE, 6); + } WREG32_SOC15_OFFSET(MMHUB, 0, mmVML2PF0_VM_L2_CNTL3, hubid * MMHUB_INSTANCE_REGISTER_OFFSET, tmp); @@ -495,6 +504,10 @@ void mmhub_v9_4_init(struct amdgpu_device *adev) SOC15_REG_OFFSET(MMHUB, 0, mmVML2VC0_VM_CONTEXT0_PAGE_TABLE_BASE_ADDR_HI32) + i * MMHUB_INSTANCE_REGISTER_OFFSET; + hub[i]->vm_inv_eng0_sem = + SOC15_REG_OFFSET(MMHUB, 0, + mmVML2VC0_VM_INVALIDATE_ENG0_SEM) + + i * MMHUB_INSTANCE_REGISTER_OFFSET; hub[i]->vm_inv_eng0_req = SOC15_REG_OFFSET(MMHUB, 0, mmVML2VC0_VM_INVALIDATE_ENG0_REQ) + diff --git a/drivers/gpu/drm/amd/amdgpu/nv.c b/drivers/gpu/drm/amd/amdgpu/nv.c index a55a2e83fb19..0ba66bef5746 100644 --- a/drivers/gpu/drm/amd/amdgpu/nv.c +++ b/drivers/gpu/drm/amd/amdgpu/nv.c @@ -40,6 +40,7 @@ #include "gc/gc_10_1_0_sh_mask.h" #include "hdp/hdp_5_0_0_offset.h" #include "hdp/hdp_5_0_0_sh_mask.h" +#include "smuio/smuio_11_0_0_offset.h" #include "soc15.h" #include "soc15_common.h" @@ -156,8 +157,27 @@ static bool nv_read_disabled_bios(struct amdgpu_device *adev) static bool nv_read_bios_from_rom(struct amdgpu_device *adev, u8 *bios, u32 length_bytes) { - /* TODO: will implement it when SMU header is available */ - return false; + u32 *dw_ptr; + u32 i, length_dw; + + if (bios == NULL) + return false; + if (length_bytes == 0) + return false; + /* APU vbios image is part of sbios image */ + if (adev->flags & AMD_IS_APU) + return false; + + dw_ptr = (u32 *)bios; + length_dw = ALIGN(length_bytes, 4) / 4; + + /* set rom index to 0 */ + WREG32(SOC15_REG_OFFSET(SMUIO, 0, mmROM_INDEX), 0); + /* read out the rom data */ + for (i = 0; i < length_dw; i++) + dw_ptr[i] = RREG32(SOC15_REG_OFFSET(SMUIO, 0, mmROM_DATA)); + + return true; } static struct soc15_allowed_register_entry nv_allowed_read_registers[] = { @@ -539,6 +559,16 @@ static bool nv_need_reset_on_init(struct amdgpu_device *adev) return false; } +static uint64_t nv_get_pcie_replay_count(struct amdgpu_device *adev) +{ + + /* TODO + * dummy implement for pcie_replay_count sysfs interface + * */ + + return 0; +} + static void nv_init_doorbell_index(struct amdgpu_device *adev) { adev->doorbell_index.kiq = AMDGPU_NAVI10_DOORBELL_KIQ; @@ -586,6 +616,7 @@ static const struct amdgpu_asic_funcs nv_asic_funcs = .need_full_reset = &nv_need_full_reset, .get_pcie_usage = &nv_get_pcie_usage, .need_reset_on_init = &nv_need_reset_on_init, + .get_pcie_replay_count = &nv_get_pcie_replay_count, }; static int nv_common_early_init(void *handle) diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v5_0.c b/drivers/gpu/drm/amd/amdgpu/sdma_v5_0.c index b8fdb192f6d6..f4ad2990f973 100644 --- a/drivers/gpu/drm/amd/amdgpu/sdma_v5_0.c +++ b/drivers/gpu/drm/amd/amdgpu/sdma_v5_0.c @@ -1173,6 +1173,16 @@ static void sdma_v5_0_ring_emit_reg_wait(struct amdgpu_ring *ring, uint32_t reg, SDMA_PKT_POLL_REGMEM_DW5_INTERVAL(10)); } +static void sdma_v5_0_ring_emit_reg_write_reg_wait(struct amdgpu_ring *ring, + uint32_t reg0, uint32_t reg1, + uint32_t ref, uint32_t mask) +{ + amdgpu_ring_emit_wreg(ring, reg0, ref); + /* wait for a cycle to reset vm_inv_eng*_ack */ + amdgpu_ring_emit_reg_wait(ring, reg0, 0, 0); + amdgpu_ring_emit_reg_wait(ring, reg1, mask, mask); +} + static int sdma_v5_0_early_init(void *handle) { struct amdgpu_device *adev = (struct amdgpu_device *)handle; @@ -1588,7 +1598,7 @@ static const struct amdgpu_ring_funcs sdma_v5_0_ring_funcs = { 6 + /* sdma_v5_0_ring_emit_pipeline_sync */ /* sdma_v5_0_ring_emit_vm_flush */ SOC15_FLUSH_GPU_TLB_NUM_WREG * 3 + - SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 6 + + SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 6 * 2 + 10 + 10 + 10, /* sdma_v5_0_ring_emit_fence x3 for user fence, vm fence */ .emit_ib_size = 7 + 6, /* sdma_v5_0_ring_emit_ib */ .emit_ib = sdma_v5_0_ring_emit_ib, @@ -1602,6 +1612,7 @@ static const struct amdgpu_ring_funcs sdma_v5_0_ring_funcs = { .pad_ib = sdma_v5_0_ring_pad_ib, .emit_wreg = sdma_v5_0_ring_emit_wreg, .emit_reg_wait = sdma_v5_0_ring_emit_reg_wait, + .emit_reg_write_reg_wait = sdma_v5_0_ring_emit_reg_write_reg_wait, .init_cond_exec = sdma_v5_0_ring_init_cond_exec, .patch_cond_exec = sdma_v5_0_ring_patch_cond_exec, .preempt_ib = sdma_v5_0_ring_preempt_ib, diff --git a/drivers/gpu/drm/amd/amdgpu/si_ih.c b/drivers/gpu/drm/amd/amdgpu/si_ih.c index 57bb5f9e08b2..88ae27a5a03d 100644 --- a/drivers/gpu/drm/amd/amdgpu/si_ih.c +++ b/drivers/gpu/drm/amd/amdgpu/si_ih.c @@ -64,7 +64,8 @@ static int si_ih_irq_init(struct amdgpu_device *adev) u32 interrupt_cntl, ih_cntl, ih_rb_cntl; si_ih_disable_interrupts(adev); - WREG32(INTERRUPT_CNTL2, adev->irq.ih.gpu_addr >> 8); + /* set dummy read address to dummy page address */ + WREG32(INTERRUPT_CNTL2, adev->dummy_page_addr >> 8); interrupt_cntl = RREG32(INTERRUPT_CNTL); interrupt_cntl &= ~IH_DUMMY_RD_OVERRIDE; interrupt_cntl &= ~IH_REQ_NONSNOOP_EN; diff --git a/drivers/gpu/drm/amd/amdgpu/soc15.c b/drivers/gpu/drm/amd/amdgpu/soc15.c index 741b564b4aa5..8e1640bc07af 100644 --- a/drivers/gpu/drm/amd/amdgpu/soc15.c +++ b/drivers/gpu/drm/amd/amdgpu/soc15.c @@ -1145,7 +1145,9 @@ static int soc15_common_early_init(void *handle) AMD_CG_SUPPORT_SDMA_LS | AMD_CG_SUPPORT_VCN_MGCG; - adev->pg_flags = AMD_PG_SUPPORT_SDMA | AMD_PG_SUPPORT_VCN; + adev->pg_flags = AMD_PG_SUPPORT_SDMA | + AMD_PG_SUPPORT_VCN | + AMD_PG_SUPPORT_VCN_DPG; } else if (adev->pdev->device == 0x15d8) { adev->cg_flags = AMD_CG_SUPPORT_GFX_MGCG | AMD_CG_SUPPORT_GFX_MGLS | @@ -1188,7 +1190,9 @@ static int soc15_common_early_init(void *handle) AMD_CG_SUPPORT_SDMA_LS | AMD_CG_SUPPORT_VCN_MGCG; - adev->pg_flags = AMD_PG_SUPPORT_SDMA | AMD_PG_SUPPORT_VCN; + adev->pg_flags = AMD_PG_SUPPORT_SDMA | + AMD_PG_SUPPORT_VCN | + AMD_PG_SUPPORT_VCN_DPG; } break; case CHIP_ARCTURUS: @@ -1233,11 +1237,6 @@ static int soc15_common_early_init(void *handle) AMD_PG_SUPPORT_VCN | AMD_PG_SUPPORT_VCN_DPG; adev->external_rev_id = adev->rev_id + 0x91; - - if (adev->pm.pp_feature & PP_GFXOFF_MASK) - adev->pg_flags |= AMD_PG_SUPPORT_GFX_PG | - AMD_PG_SUPPORT_CP | - AMD_PG_SUPPORT_RLC_SMU_HS; break; default: /* FIXME: not supported yet */ diff --git a/drivers/gpu/drm/amd/amdgpu/soc15.h b/drivers/gpu/drm/amd/amdgpu/soc15.h index 9af6c6ffbfa2..57af489a5de3 100644 --- a/drivers/gpu/drm/amd/amdgpu/soc15.h +++ b/drivers/gpu/drm/amd/amdgpu/soc15.h @@ -28,8 +28,8 @@ #include "nbio_v7_0.h" #include "nbio_v7_4.h" -#define SOC15_FLUSH_GPU_TLB_NUM_WREG 4 -#define SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT 1 +#define SOC15_FLUSH_GPU_TLB_NUM_WREG 6 +#define SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT 3 extern const struct amd_ip_funcs soc15_common_ip_funcs; diff --git a/drivers/gpu/drm/amd/amdgpu/vcn_v2_5.c b/drivers/gpu/drm/amd/amdgpu/vcn_v2_5.c index 03083e5f731a..93edf9193a7b 100644 --- a/drivers/gpu/drm/amd/amdgpu/vcn_v2_5.c +++ b/drivers/gpu/drm/amd/amdgpu/vcn_v2_5.c @@ -293,7 +293,7 @@ static int vcn_v2_5_hw_fini(void *handle) { struct amdgpu_device *adev = (struct amdgpu_device *)handle; struct amdgpu_ring *ring; - int i; + int i, j; for (i = 0; i < adev->vcn.num_vcn_inst; ++i) { if (adev->vcn.harvest_config & (1 << i)) @@ -305,8 +305,8 @@ static int vcn_v2_5_hw_fini(void *handle) ring->sched.ready = false; - for (i = 0; i < adev->vcn.num_enc_rings; ++i) { - ring = &adev->vcn.inst[i].ring_enc[i]; + for (j = 0; j < adev->vcn.num_enc_rings; ++j) { + ring = &adev->vcn.inst[i].ring_enc[j]; ring->sched.ready = false; } diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c index 0a6173d727e3..7aac9568d3be 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c @@ -719,7 +719,7 @@ static int amdgpu_dm_init(struct amdgpu_device *adev) */ if (adev->flags & AMD_IS_APU && adev->asic_type >= CHIP_CARRIZO && - adev->asic_type <= CHIP_RAVEN) + adev->asic_type < CHIP_RAVEN) init_data.flags.gpu_vm_support = true; if (amdgpu_dc_feature_mask & DC_FBC_MASK) @@ -4239,8 +4239,8 @@ enum drm_mode_status amdgpu_dm_connector_mode_valid(struct drm_connector *connec result = MODE_OK; else DRM_DEBUG_KMS("Mode %dx%d (clk %d) failed DC validation with error %d\n", - mode->vdisplay, mode->hdisplay, + mode->vdisplay, mode->clock, dc_result); diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_mst_types.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_mst_types.c index 49cf39711dfc..2bf8534c18fb 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_mst_types.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_mst_types.c @@ -36,7 +36,9 @@ #include "dc_link_ddc.h" #include "i2caux_interface.h" - +#if defined(CONFIG_DEBUG_FS) +#include "amdgpu_dm_debugfs.h" +#endif /* #define TRACE_DPCD */ #ifdef TRACE_DPCD @@ -147,6 +149,12 @@ amdgpu_dm_mst_connector_late_register(struct drm_connector *connector) to_amdgpu_dm_connector(connector); struct drm_dp_mst_port *port = amdgpu_dm_connector->port; +#if defined(CONFIG_DEBUG_FS) + connector_debugfs_init(amdgpu_dm_connector); + amdgpu_dm_connector->debugfs_dpcd_address = 0; + amdgpu_dm_connector->debugfs_dpcd_size = 0; +#endif + return drm_dp_mst_connector_late_register(connector, port); } diff --git a/drivers/gpu/drm/amd/display/dc/calcs/Makefile b/drivers/gpu/drm/amd/display/dc/calcs/Makefile index 985633c08a26..26c6d735cdc7 100644 --- a/drivers/gpu/drm/amd/display/dc/calcs/Makefile +++ b/drivers/gpu/drm/amd/display/dc/calcs/Makefile @@ -24,15 +24,20 @@ # It calculates Bandwidth and Watermarks values for HW programming # -ifneq ($(call cc-option, -mpreferred-stack-boundary=4),) - cc_stack_align := -mpreferred-stack-boundary=4 -else ifneq ($(call cc-option, -mstack-alignment=16),) - cc_stack_align := -mstack-alignment=16 -endif +calcs_ccflags := -mhard-float -msse -calcs_ccflags := -mhard-float -msse $(cc_stack_align) +ifdef CONFIG_CC_IS_GCC +ifeq ($(call cc-ifversion, -lt, 0701, y), y) +IS_OLD_GCC = 1 +endif +endif -ifdef CONFIG_CC_IS_CLANG +ifdef IS_OLD_GCC +# Stack alignment mismatch, proceed with caution. +# GCC < 7.1 cannot compile code using `double` and -mpreferred-stack-boundary=3 +# (8B stack alignment). +calcs_ccflags += -mpreferred-stack-boundary=4 +else calcs_ccflags += -msse2 endif diff --git a/drivers/gpu/drm/amd/display/dc/core/dc_link.c b/drivers/gpu/drm/amd/display/dc/core/dc_link.c index cc94c1a73daa..12ba6fdf89b7 100644 --- a/drivers/gpu/drm/amd/display/dc/core/dc_link.c +++ b/drivers/gpu/drm/amd/display/dc/core/dc_link.c @@ -3027,15 +3027,6 @@ void core_link_enable_stream( CONTROLLER_DP_TEST_PATTERN_VIDEOMODE, COLOR_DEPTH_UNDEFINED); - /* This second call is needed to reconfigure the DIG - * as a workaround for the incorrect value being applied - * from transmitter control. - */ - if (!dc_is_virtual_signal(pipe_ctx->stream->signal)) - stream->link->link_enc->funcs->setup( - stream->link->link_enc, - pipe_ctx->stream->signal); - #ifdef CONFIG_DRM_AMD_DC_DSC_SUPPORT if (pipe_ctx->stream->timing.flags.DSC) { if (dc_is_dp_signal(pipe_ctx->stream->signal) || diff --git a/drivers/gpu/drm/amd/display/dc/dcn20/Makefile b/drivers/gpu/drm/amd/display/dc/dcn20/Makefile index ddb8d5649e79..63f3bddba7da 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn20/Makefile +++ b/drivers/gpu/drm/amd/display/dc/dcn20/Makefile @@ -10,15 +10,20 @@ ifdef CONFIG_DRM_AMD_DC_DSC_SUPPORT DCN20 += dcn20_dsc.o endif -ifneq ($(call cc-option, -mpreferred-stack-boundary=4),) - cc_stack_align := -mpreferred-stack-boundary=4 -else ifneq ($(call cc-option, -mstack-alignment=16),) - cc_stack_align := -mstack-alignment=16 -endif +CFLAGS_$(AMDDALPATH)/dc/dcn20/dcn20_resource.o := -mhard-float -msse -CFLAGS_$(AMDDALPATH)/dc/dcn20/dcn20_resource.o := -mhard-float -msse $(cc_stack_align) +ifdef CONFIG_CC_IS_GCC +ifeq ($(call cc-ifversion, -lt, 0701, y), y) +IS_OLD_GCC = 1 +endif +endif -ifdef CONFIG_CC_IS_CLANG +ifdef IS_OLD_GCC +# Stack alignment mismatch, proceed with caution. +# GCC < 7.1 cannot compile code using `double` and -mpreferred-stack-boundary=3 +# (8B stack alignment). +CFLAGS_$(AMDDALPATH)/dc/dcn20/dcn20_resource.o += -mpreferred-stack-boundary=4 +else CFLAGS_$(AMDDALPATH)/dc/dcn20/dcn20_resource.o += -msse2 endif diff --git a/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_resource.c b/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_resource.c index 924c2e303588..bbd1c98564be 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_resource.c +++ b/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_resource.c @@ -1152,6 +1152,11 @@ struct stream_encoder *dcn20_stream_encoder_create( if (!enc1) return NULL; + if (ASICREV_IS_NAVI14_M(ctx->asic_id.hw_internal_rev)) { + if (eng_id >= ENGINE_ID_DIGD) + eng_id++; + } + dcn20_stream_encoder_construct(enc1, ctx, ctx->dc_bios, eng_id, &stream_enc_regs[eng_id], &se_shift, &se_mask); diff --git a/drivers/gpu/drm/amd/display/dc/dcn21/Makefile b/drivers/gpu/drm/amd/display/dc/dcn21/Makefile index 19fabac13c65..14113ccf498d 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn21/Makefile +++ b/drivers/gpu/drm/amd/display/dc/dcn21/Makefile @@ -3,15 +3,20 @@ DCN21 = dcn21_hubp.o dcn21_hubbub.o dcn21_resource.o dcn21_hwseq.o dcn21_link_encoder.o -ifneq ($(call cc-option, -mpreferred-stack-boundary=4),) - cc_stack_align := -mpreferred-stack-boundary=4 -else ifneq ($(call cc-option, -mstack-alignment=16),) - cc_stack_align := -mstack-alignment=16 -endif +CFLAGS_$(AMDDALPATH)/dc/dcn21/dcn21_resource.o := -mhard-float -msse -CFLAGS_$(AMDDALPATH)/dc/dcn21/dcn21_resource.o := -mhard-float -msse $(cc_stack_align) +ifdef CONFIG_CC_IS_GCC +ifeq ($(call cc-ifversion, -lt, 0701, y), y) +IS_OLD_GCC = 1 +endif +endif -ifdef CONFIG_CC_IS_CLANG +ifdef IS_OLD_GCC +# Stack alignment mismatch, proceed with caution. +# GCC < 7.1 cannot compile code using `double` and -mpreferred-stack-boundary=3 +# (8B stack alignment). +CFLAGS_$(AMDDALPATH)/dc/dcn21/dcn21_resource.o += -mpreferred-stack-boundary=4 +else CFLAGS_$(AMDDALPATH)/dc/dcn21/dcn21_resource.o += -msse2 endif diff --git a/drivers/gpu/drm/amd/display/dc/dml/Makefile b/drivers/gpu/drm/amd/display/dc/dml/Makefile index 5b2a65b42403..8df251626e22 100644 --- a/drivers/gpu/drm/amd/display/dc/dml/Makefile +++ b/drivers/gpu/drm/amd/display/dc/dml/Makefile @@ -24,15 +24,20 @@ # It provides the general basic services required by other DAL # subcomponents. -ifneq ($(call cc-option, -mpreferred-stack-boundary=4),) - cc_stack_align := -mpreferred-stack-boundary=4 -else ifneq ($(call cc-option, -mstack-alignment=16),) - cc_stack_align := -mstack-alignment=16 -endif +dml_ccflags := -mhard-float -msse -dml_ccflags := -mhard-float -msse $(cc_stack_align) +ifdef CONFIG_CC_IS_GCC +ifeq ($(call cc-ifversion, -lt, 0701, y), y) +IS_OLD_GCC = 1 +endif +endif -ifdef CONFIG_CC_IS_CLANG +ifdef IS_OLD_GCC +# Stack alignment mismatch, proceed with caution. +# GCC < 7.1 cannot compile code using `double` and -mpreferred-stack-boundary=3 +# (8B stack alignment). +dml_ccflags += -mpreferred-stack-boundary=4 +else dml_ccflags += -msse2 endif diff --git a/drivers/gpu/drm/amd/display/dc/dsc/Makefile b/drivers/gpu/drm/amd/display/dc/dsc/Makefile index b456cd23c6fa..970737217e53 100644 --- a/drivers/gpu/drm/amd/display/dc/dsc/Makefile +++ b/drivers/gpu/drm/amd/display/dc/dsc/Makefile @@ -1,15 +1,20 @@ # # Makefile for the 'dsc' sub-component of DAL. -ifneq ($(call cc-option, -mpreferred-stack-boundary=4),) - cc_stack_align := -mpreferred-stack-boundary=4 -else ifneq ($(call cc-option, -mstack-alignment=16),) - cc_stack_align := -mstack-alignment=16 -endif +dsc_ccflags := -mhard-float -msse -dsc_ccflags := -mhard-float -msse $(cc_stack_align) +ifdef CONFIG_CC_IS_GCC +ifeq ($(call cc-ifversion, -lt, 0701, y), y) +IS_OLD_GCC = 1 +endif +endif -ifdef CONFIG_CC_IS_CLANG +ifdef IS_OLD_GCC +# Stack alignment mismatch, proceed with caution. +# GCC < 7.1 cannot compile code using `double` and -mpreferred-stack-boundary=3 +# (8B stack alignment). +dsc_ccflags += -mpreferred-stack-boundary=4 +else dsc_ccflags += -msse2 endif diff --git a/drivers/gpu/drm/amd/include/kgd_pp_interface.h b/drivers/gpu/drm/amd/include/kgd_pp_interface.h index 5902f80d1fce..a7f92d0b3a90 100644 --- a/drivers/gpu/drm/amd/include/kgd_pp_interface.h +++ b/drivers/gpu/drm/amd/include/kgd_pp_interface.h @@ -220,6 +220,9 @@ enum pp_df_cstate { ((group) << PP_GROUP_SHIFT | (block) << PP_BLOCK_SHIFT | \ (support) << PP_STATE_SUPPORT_SHIFT | (state) << PP_STATE_SHIFT) +#define XGMI_MODE_PSTATE_D3 0 +#define XGMI_MODE_PSTATE_D0 1 + struct seq_file; enum amd_pp_clock_type; struct amd_pp_simple_clock_info; @@ -318,6 +321,7 @@ struct amd_pm_funcs { int (*set_ppfeature_status)(void *handle, uint64_t ppfeature_masks); int (*asic_reset_mode_2)(void *handle); int (*set_df_cstate)(void *handle, enum pp_df_cstate state); + int (*set_xgmi_pstate)(void *handle, uint32_t pstate); }; #endif diff --git a/drivers/gpu/drm/amd/powerplay/amd_powerplay.c b/drivers/gpu/drm/amd/powerplay/amd_powerplay.c index f4ff15378e61..7932eb163a00 100644 --- a/drivers/gpu/drm/amd/powerplay/amd_powerplay.c +++ b/drivers/gpu/drm/amd/powerplay/amd_powerplay.c @@ -969,6 +969,14 @@ static int pp_dpm_switch_power_profile(void *handle, workload = hwmgr->workload_setting[index]; } + if (type == PP_SMC_POWER_PROFILE_COMPUTE && + hwmgr->hwmgr_func->disable_power_features_for_compute_performance) { + if (hwmgr->hwmgr_func->disable_power_features_for_compute_performance(hwmgr, en)) { + mutex_unlock(&hwmgr->smu_lock); + return -EINVAL; + } + } + if (hwmgr->dpm_level != AMD_DPM_FORCED_LEVEL_MANUAL) hwmgr->hwmgr_func->set_power_profile_mode(hwmgr, &workload, 0); mutex_unlock(&hwmgr->smu_lock); @@ -1566,6 +1574,23 @@ static int pp_set_df_cstate(void *handle, enum pp_df_cstate state) return 0; } +static int pp_set_xgmi_pstate(void *handle, uint32_t pstate) +{ + struct pp_hwmgr *hwmgr = handle; + + if (!hwmgr) + return -EINVAL; + + if (!hwmgr->pm_en || !hwmgr->hwmgr_func->set_xgmi_pstate) + return 0; + + mutex_lock(&hwmgr->smu_lock); + hwmgr->hwmgr_func->set_xgmi_pstate(hwmgr, pstate); + mutex_unlock(&hwmgr->smu_lock); + + return 0; +} + static const struct amd_pm_funcs pp_dpm_funcs = { .load_firmware = pp_dpm_load_fw, .wait_for_fw_loading_complete = pp_dpm_fw_loading_complete, @@ -1625,4 +1650,5 @@ static const struct amd_pm_funcs pp_dpm_funcs = { .asic_reset_mode_2 = pp_asic_reset_mode_2, .smu_i2c_bus_access = pp_smu_i2c_bus_access, .set_df_cstate = pp_set_df_cstate, + .set_xgmi_pstate = pp_set_xgmi_pstate, }; diff --git a/drivers/gpu/drm/amd/powerplay/amdgpu_smu.c b/drivers/gpu/drm/amd/powerplay/amdgpu_smu.c index 4e468b0272c3..40b546c75fc2 100644 --- a/drivers/gpu/drm/amd/powerplay/amdgpu_smu.c +++ b/drivers/gpu/drm/amd/powerplay/amdgpu_smu.c @@ -383,14 +383,25 @@ bool smu_clk_dpm_is_enabled(struct smu_context *smu, enum smu_clk_type clk_type) return true; } - +/** + * smu_dpm_set_power_gate - power gate/ungate the specific IP block + * + * @smu: smu_context pointer + * @block_type: the IP block to power gate/ungate + * @gate: to power gate if true, ungate otherwise + * + * This API uses no smu->mutex lock protection due to: + * 1. It is either called by other IP block(gfx/sdma/vcn/uvd/vce). + * This is guarded to be race condition free by the caller. + * 2. Or get called on user setting request of power_dpm_force_performance_level. + * Under this case, the smu->mutex lock protection is already enforced on + * the parent API smu_force_performance_level of the call path. + */ int smu_dpm_set_power_gate(struct smu_context *smu, uint32_t block_type, bool gate) { int ret = 0; - mutex_lock(&smu->mutex); - switch (block_type) { case AMD_IP_BLOCK_TYPE_UVD: ret = smu_dpm_set_uvd_enable(smu, gate); @@ -408,8 +419,6 @@ int smu_dpm_set_power_gate(struct smu_context *smu, uint32_t block_type, break; } - mutex_unlock(&smu->mutex); - return ret; } @@ -526,7 +535,7 @@ bool is_support_sw_smu(struct amdgpu_device *adev) bool is_support_sw_smu_xgmi(struct amdgpu_device *adev) { - if (amdgpu_dpm != 1) + if (!is_support_sw_smu(adev)) return false; if (adev->asic_type == CHIP_VEGA20) @@ -582,10 +591,18 @@ int smu_sys_set_pp_table(struct smu_context *smu, void *buf, size_t size) smu_table->power_play_table = smu_table->hardcode_pptable; smu_table->power_play_table_size = size; + /* + * Special hw_fini action(for Navi1x, the DPMs disablement will be + * skipped) may be needed for custom pptable uploading. + */ + smu->uploading_custom_pp_table = true; + ret = smu_reset(smu); if (ret) pr_info("smu reset failed, ret = %d\n", ret); + smu->uploading_custom_pp_table = false; + failed: mutex_unlock(&smu->mutex); return ret; @@ -705,8 +722,12 @@ static int smu_set_funcs(struct amdgpu_device *adev) { struct smu_context *smu = &adev->smu; + if (adev->pm.pp_feature & PP_OVERDRIVE_MASK) + smu->od_enabled = true; + switch (adev->asic_type) { case CHIP_VEGA20: + adev->pm.pp_feature &= ~PP_GFXOFF_MASK; vega20_set_ppt_funcs(smu); break; case CHIP_NAVI10: @@ -715,7 +736,10 @@ static int smu_set_funcs(struct amdgpu_device *adev) navi10_set_ppt_funcs(smu); break; case CHIP_ARCTURUS: + adev->pm.pp_feature &= ~PP_GFXOFF_MASK; arcturus_set_ppt_funcs(smu); + /* OD is not supported on Arcturus */ + smu->od_enabled =false; break; case CHIP_RENOIR: renoir_set_ppt_funcs(smu); @@ -724,9 +748,6 @@ static int smu_set_funcs(struct amdgpu_device *adev) return -EINVAL; } - if (adev->pm.pp_feature & PP_OVERDRIVE_MASK) - smu->od_enabled = true; - return 0; } @@ -1057,10 +1078,6 @@ static int smu_smc_table_hw_init(struct smu_context *smu, return ret; if (adev->asic_type != CHIP_ARCTURUS) { - ret = smu_override_pcie_parameters(smu); - if (ret) - return ret; - ret = smu_notify_display_change(smu); if (ret) return ret; @@ -1089,6 +1106,12 @@ static int smu_smc_table_hw_init(struct smu_context *smu, return ret; } + if (adev->asic_type != CHIP_ARCTURUS) { + ret = smu_override_pcie_parameters(smu); + if (ret) + return ret; + } + ret = smu_set_default_od_settings(smu, initialize); if (ret) return ret; @@ -1098,7 +1121,7 @@ static int smu_smc_table_hw_init(struct smu_context *smu, if (ret) return ret; - ret = smu_get_power_limit(smu, &smu->default_power_limit, true, false); + ret = smu_get_power_limit(smu, &smu->default_power_limit, false, false); if (ret) return ret; } @@ -1282,10 +1305,25 @@ static int smu_hw_fini(void *handle) return ret; } - ret = smu_stop_dpms(smu); - if (ret) { - pr_warn("Fail to stop Dpms!\n"); - return ret; + /* + * For custom pptable uploading, skip the DPM features + * disable process on Navi1x ASICs. + * - As the gfx related features are under control of + * RLC on those ASICs. RLC reinitialization will be + * needed to reenable them. That will cost much more + * efforts. + * + * - SMU firmware can handle the DPM reenablement + * properly. + */ + if (!smu->uploading_custom_pp_table || + !((adev->asic_type >= CHIP_NAVI10) && + (adev->asic_type <= CHIP_NAVI12))) { + ret = smu_stop_dpms(smu); + if (ret) { + pr_warn("Fail to stop Dpms!\n"); + return ret; + } } kfree(table_context->driver_pptable); @@ -2500,3 +2538,13 @@ int smu_get_dpm_clock_table(struct smu_context *smu, return ret; } + +uint32_t smu_get_pptable_power_limit(struct smu_context *smu) +{ + uint32_t ret = 0; + + if (smu->ppt_funcs->get_pptable_power_limit) + ret = smu->ppt_funcs->get_pptable_power_limit(smu); + + return ret; +} diff --git a/drivers/gpu/drm/amd/powerplay/arcturus_ppt.c b/drivers/gpu/drm/amd/powerplay/arcturus_ppt.c index 3099ac256bd3..58c7c4a3053e 100644 --- a/drivers/gpu/drm/amd/powerplay/arcturus_ppt.c +++ b/drivers/gpu/drm/amd/powerplay/arcturus_ppt.c @@ -1261,15 +1261,14 @@ arcturus_get_profiling_clk_mask(struct smu_context *smu, static int arcturus_get_power_limit(struct smu_context *smu, uint32_t *limit, - bool asic_default) + bool cap) { PPTable_t *pptable = smu->smu_table.driver_pptable; uint32_t asic_default_power_limit = 0; int ret = 0; int power_src; - if (!smu->default_power_limit || - !smu->power_limit) { + if (!smu->power_limit) { if (smu_feature_is_enabled(smu, SMU_FEATURE_PPT_BIT)) { power_src = smu_power_get_index(smu, SMU_POWER_SOURCE_AC); if (power_src < 0) @@ -1292,17 +1291,11 @@ static int arcturus_get_power_limit(struct smu_context *smu, pptable->SocketPowerLimitAc[PPT_THROTTLER_PPT0]; } - if (smu->od_enabled) { - asic_default_power_limit *= (100 + smu->smu_table.TDPODLimit); - asic_default_power_limit /= 100; - } - - smu->default_power_limit = asic_default_power_limit; smu->power_limit = asic_default_power_limit; } - if (asic_default) - *limit = smu->default_power_limit; + if (cap) + *limit = smu_v11_0_get_max_power_limit(smu); else *limit = smu->power_limit; @@ -2070,6 +2063,13 @@ static void arcturus_i2c_eeprom_control_fini(struct i2c_adapter *control) i2c_del_adapter(control); } +static uint32_t arcturus_get_pptable_power_limit(struct smu_context *smu) +{ + PPTable_t *pptable = smu->smu_table.driver_pptable; + + return pptable->SocketPowerLimitAc[PPT_THROTTLER_PPT0]; +} + static const struct pptable_funcs arcturus_ppt_funcs = { /* translate smu index into arcturus specific index */ .get_smu_msg_index = arcturus_get_smu_msg_index, @@ -2160,6 +2160,7 @@ static const struct pptable_funcs arcturus_ppt_funcs = { .get_dpm_ultimate_freq = smu_v11_0_get_dpm_ultimate_freq, .set_soft_freq_limited_range = smu_v11_0_set_soft_freq_limited_range, .override_pcie_parameters = smu_v11_0_override_pcie_parameters, + .get_pptable_power_limit = arcturus_get_pptable_power_limit, }; void arcturus_set_ppt_funcs(struct smu_context *smu) diff --git a/drivers/gpu/drm/amd/powerplay/hwmgr/hwmgr.c b/drivers/gpu/drm/amd/powerplay/hwmgr/hwmgr.c index a24beaa4fb01..d2909c91d65b 100644 --- a/drivers/gpu/drm/amd/powerplay/hwmgr/hwmgr.c +++ b/drivers/gpu/drm/amd/powerplay/hwmgr/hwmgr.c @@ -81,6 +81,8 @@ static void hwmgr_init_workload_prority(struct pp_hwmgr *hwmgr) int hwmgr_early_init(struct pp_hwmgr *hwmgr) { + struct amdgpu_device *adev; + if (!hwmgr) return -EINVAL; @@ -94,8 +96,11 @@ int hwmgr_early_init(struct pp_hwmgr *hwmgr) hwmgr_init_workload_prority(hwmgr); hwmgr->gfxoff_state_changed_by_workload = false; + adev = hwmgr->adev; + switch (hwmgr->chip_family) { case AMDGPU_FAMILY_CI: + adev->pm.pp_feature &= ~PP_GFXOFF_MASK; hwmgr->smumgr_funcs = &ci_smu_funcs; ci_set_asic_special_caps(hwmgr); hwmgr->feature_mask &= ~(PP_VBI_TIME_SUPPORT_MASK | @@ -106,12 +111,14 @@ int hwmgr_early_init(struct pp_hwmgr *hwmgr) smu7_init_function_pointers(hwmgr); break; case AMDGPU_FAMILY_CZ: + adev->pm.pp_feature &= ~PP_GFXOFF_MASK; hwmgr->od_enabled = false; hwmgr->smumgr_funcs = &smu8_smu_funcs; hwmgr->feature_mask &= ~PP_GFXOFF_MASK; smu8_init_function_pointers(hwmgr); break; case AMDGPU_FAMILY_VI: + adev->pm.pp_feature &= ~PP_GFXOFF_MASK; hwmgr->feature_mask &= ~PP_GFXOFF_MASK; switch (hwmgr->chip_id) { case CHIP_TOPAZ: @@ -153,6 +160,7 @@ int hwmgr_early_init(struct pp_hwmgr *hwmgr) case AMDGPU_FAMILY_AI: switch (hwmgr->chip_id) { case CHIP_VEGA10: + adev->pm.pp_feature &= ~PP_GFXOFF_MASK; hwmgr->feature_mask &= ~PP_GFXOFF_MASK; hwmgr->smumgr_funcs = &vega10_smu_funcs; vega10_hwmgr_init(hwmgr); @@ -162,6 +170,7 @@ int hwmgr_early_init(struct pp_hwmgr *hwmgr) vega12_hwmgr_init(hwmgr); break; case CHIP_VEGA20: + adev->pm.pp_feature &= ~PP_GFXOFF_MASK; hwmgr->feature_mask &= ~PP_GFXOFF_MASK; hwmgr->smumgr_funcs = &vega20_smu_funcs; vega20_hwmgr_init(hwmgr); diff --git a/drivers/gpu/drm/amd/powerplay/hwmgr/smu7_hwmgr.c b/drivers/gpu/drm/amd/powerplay/hwmgr/smu7_hwmgr.c index c805c6fcdba2..f73dff68e799 100644 --- a/drivers/gpu/drm/amd/powerplay/hwmgr/smu7_hwmgr.c +++ b/drivers/gpu/drm/amd/powerplay/hwmgr/smu7_hwmgr.c @@ -3477,18 +3477,31 @@ static int smu7_get_pp_table_entry(struct pp_hwmgr *hwmgr, static int smu7_get_gpu_power(struct pp_hwmgr *hwmgr, u32 *query) { + struct amdgpu_device *adev = hwmgr->adev; int i; u32 tmp = 0; if (!query) return -EINVAL; - smum_send_msg_to_smc_with_parameter(hwmgr, PPSMC_MSG_GetCurrPkgPwr, 0); - tmp = cgs_read_register(hwmgr->device, mmSMC_MSG_ARG_0); - *query = tmp; + /* + * PPSMC_MSG_GetCurrPkgPwr is not supported on: + * - Hawaii + * - Bonaire + * - Fiji + * - Tonga + */ + if ((adev->asic_type != CHIP_HAWAII) && + (adev->asic_type != CHIP_BONAIRE) && + (adev->asic_type != CHIP_FIJI) && + (adev->asic_type != CHIP_TONGA)) { + smum_send_msg_to_smc_with_parameter(hwmgr, PPSMC_MSG_GetCurrPkgPwr, 0); + tmp = cgs_read_register(hwmgr->device, mmSMC_MSG_ARG_0); + *query = tmp; - if (tmp != 0) - return 0; + if (tmp != 0) + return 0; + } smum_send_msg_to_smc(hwmgr, PPSMC_MSG_PmStatusLogStart); cgs_write_ind_register(hwmgr->device, CGS_IND_REG__SMC, @@ -3969,6 +3982,13 @@ static int smu7_set_power_state_tasks(struct pp_hwmgr *hwmgr, const void *input) "Failed to populate and upload SCLK MCLK DPM levels!", result = tmp_result); + /* + * If a custom pp table is loaded, set DPMTABLE_OD_UPDATE_VDDC flag. + * That effectively disables AVFS feature. + */ + if (hwmgr->hardcode_pp_table != NULL) + data->need_update_smu7_dpm_table |= DPMTABLE_OD_UPDATE_VDDC; + tmp_result = smu7_update_avfs(hwmgr); PP_ASSERT_WITH_CODE((0 == tmp_result), "Failed to update avfs voltages!", diff --git a/drivers/gpu/drm/amd/powerplay/hwmgr/vega10_hwmgr.c b/drivers/gpu/drm/amd/powerplay/hwmgr/vega10_hwmgr.c index 4ea63a2e17da..d71a492c87a3 100644 --- a/drivers/gpu/drm/amd/powerplay/hwmgr/vega10_hwmgr.c +++ b/drivers/gpu/drm/amd/powerplay/hwmgr/vega10_hwmgr.c @@ -3689,6 +3689,13 @@ static int vega10_set_power_state_tasks(struct pp_hwmgr *hwmgr, PP_ASSERT_WITH_CODE(!result, "Failed to upload PPtable!", return result); + /* + * If a custom pp table is loaded, set DPMTABLE_OD_UPDATE_VDDC flag. + * That effectively disables AVFS feature. + */ + if(hwmgr->hardcode_pp_table != NULL) + data->need_update_dpm_table |= DPMTABLE_OD_UPDATE_VDDC; + vega10_update_avfs(hwmgr); /* @@ -5263,6 +5270,59 @@ static int vega10_get_performance_level(struct pp_hwmgr *hwmgr, const struct pp_ return 0; } +static int vega10_disable_power_features_for_compute_performance(struct pp_hwmgr *hwmgr, bool disable) +{ + struct vega10_hwmgr *data = hwmgr->backend; + uint32_t feature_mask = 0; + + if (disable) { + feature_mask |= data->smu_features[GNLD_ULV].enabled ? + data->smu_features[GNLD_ULV].smu_feature_bitmap : 0; + feature_mask |= data->smu_features[GNLD_DS_GFXCLK].enabled ? + data->smu_features[GNLD_DS_GFXCLK].smu_feature_bitmap : 0; + feature_mask |= data->smu_features[GNLD_DS_SOCCLK].enabled ? + data->smu_features[GNLD_DS_SOCCLK].smu_feature_bitmap : 0; + feature_mask |= data->smu_features[GNLD_DS_LCLK].enabled ? + data->smu_features[GNLD_DS_LCLK].smu_feature_bitmap : 0; + feature_mask |= data->smu_features[GNLD_DS_DCEFCLK].enabled ? + data->smu_features[GNLD_DS_DCEFCLK].smu_feature_bitmap : 0; + } else { + feature_mask |= (!data->smu_features[GNLD_ULV].enabled) ? + data->smu_features[GNLD_ULV].smu_feature_bitmap : 0; + feature_mask |= (!data->smu_features[GNLD_DS_GFXCLK].enabled) ? + data->smu_features[GNLD_DS_GFXCLK].smu_feature_bitmap : 0; + feature_mask |= (!data->smu_features[GNLD_DS_SOCCLK].enabled) ? + data->smu_features[GNLD_DS_SOCCLK].smu_feature_bitmap : 0; + feature_mask |= (!data->smu_features[GNLD_DS_LCLK].enabled) ? + data->smu_features[GNLD_DS_LCLK].smu_feature_bitmap : 0; + feature_mask |= (!data->smu_features[GNLD_DS_DCEFCLK].enabled) ? + data->smu_features[GNLD_DS_DCEFCLK].smu_feature_bitmap : 0; + } + + if (feature_mask) + PP_ASSERT_WITH_CODE(!vega10_enable_smc_features(hwmgr, + !disable, feature_mask), + "enable/disable power features for compute performance Failed!", + return -EINVAL); + + if (disable) { + data->smu_features[GNLD_ULV].enabled = false; + data->smu_features[GNLD_DS_GFXCLK].enabled = false; + data->smu_features[GNLD_DS_SOCCLK].enabled = false; + data->smu_features[GNLD_DS_LCLK].enabled = false; + data->smu_features[GNLD_DS_DCEFCLK].enabled = false; + } else { + data->smu_features[GNLD_ULV].enabled = true; + data->smu_features[GNLD_DS_GFXCLK].enabled = true; + data->smu_features[GNLD_DS_SOCCLK].enabled = true; + data->smu_features[GNLD_DS_LCLK].enabled = true; + data->smu_features[GNLD_DS_DCEFCLK].enabled = true; + } + + return 0; + +} + static const struct pp_hwmgr_func vega10_hwmgr_funcs = { .backend_init = vega10_hwmgr_backend_init, .backend_fini = vega10_hwmgr_backend_fini, @@ -5330,6 +5390,8 @@ static const struct pp_hwmgr_func vega10_hwmgr_funcs = { .get_ppfeature_status = vega10_get_ppfeature_status, .set_ppfeature_status = vega10_set_ppfeature_status, .set_mp1_state = vega10_set_mp1_state, + .disable_power_features_for_compute_performance = + vega10_disable_power_features_for_compute_performance, }; int vega10_hwmgr_init(struct pp_hwmgr *hwmgr) diff --git a/drivers/gpu/drm/amd/powerplay/hwmgr/vega20_hwmgr.c b/drivers/gpu/drm/amd/powerplay/hwmgr/vega20_hwmgr.c index 9295bd90b792..5bcf0d684151 100644 --- a/drivers/gpu/drm/amd/powerplay/hwmgr/vega20_hwmgr.c +++ b/drivers/gpu/drm/amd/powerplay/hwmgr/vega20_hwmgr.c @@ -4176,6 +4176,20 @@ static int vega20_set_df_cstate(struct pp_hwmgr *hwmgr, return ret; } +static int vega20_set_xgmi_pstate(struct pp_hwmgr *hwmgr, + uint32_t pstate) +{ + int ret; + + ret = smum_send_msg_to_smc_with_parameter(hwmgr, + PPSMC_MSG_SetXgmiMode, + pstate ? XGMI_MODE_PSTATE_D0 : XGMI_MODE_PSTATE_D3); + if (ret) + pr_err("SetXgmiPstate failed!\n"); + + return ret; +} + static const struct pp_hwmgr_func vega20_hwmgr_funcs = { /* init/fini related */ .backend_init = vega20_hwmgr_backend_init, @@ -4245,6 +4259,7 @@ static const struct pp_hwmgr_func vega20_hwmgr_funcs = { .set_mp1_state = vega20_set_mp1_state, .smu_i2c_bus_access = vega20_smu_i2c_bus_access, .set_df_cstate = vega20_set_df_cstate, + .set_xgmi_pstate = vega20_set_xgmi_pstate, }; int vega20_hwmgr_init(struct pp_hwmgr *hwmgr) diff --git a/drivers/gpu/drm/amd/powerplay/inc/amdgpu_smu.h b/drivers/gpu/drm/amd/powerplay/inc/amdgpu_smu.h index 8120e7587585..031e0c22fcc7 100644 --- a/drivers/gpu/drm/amd/powerplay/inc/amdgpu_smu.h +++ b/drivers/gpu/drm/amd/powerplay/inc/amdgpu_smu.h @@ -261,7 +261,6 @@ struct smu_table_context struct smu_table *tables; struct smu_table memory_pool; uint8_t thermal_controller_type; - uint16_t TDPODLimit; void *overdrive_table; }; @@ -391,6 +390,7 @@ struct smu_context uint32_t smc_if_version; + bool uploading_custom_pp_table; }; struct i2c_adapter; @@ -548,6 +548,7 @@ struct pptable_funcs { int (*get_dpm_ultimate_freq)(struct smu_context *smu, enum smu_clk_type clk_type, uint32_t *min, uint32_t *max); int (*set_soft_freq_limited_range)(struct smu_context *smu, enum smu_clk_type clk_type, uint32_t min, uint32_t max); int (*override_pcie_parameters)(struct smu_context *smu); + uint32_t (*get_pptable_power_limit)(struct smu_context *smu); }; int smu_load_microcode(struct smu_context *smu); @@ -717,4 +718,6 @@ int smu_get_uclk_dpm_states(struct smu_context *smu, int smu_get_dpm_clock_table(struct smu_context *smu, struct dpm_clocks *clock_table); +uint32_t smu_get_pptable_power_limit(struct smu_context *smu); + #endif diff --git a/drivers/gpu/drm/amd/powerplay/inc/hwmgr.h b/drivers/gpu/drm/amd/powerplay/inc/hwmgr.h index bd8c922dfd3e..af977675fd33 100644 --- a/drivers/gpu/drm/amd/powerplay/inc/hwmgr.h +++ b/drivers/gpu/drm/amd/powerplay/inc/hwmgr.h @@ -356,6 +356,9 @@ struct pp_hwmgr_func { int (*asic_reset)(struct pp_hwmgr *hwmgr, enum SMU_ASIC_RESET_MODE mode); int (*smu_i2c_bus_access)(struct pp_hwmgr *hwmgr, bool aquire); int (*set_df_cstate)(struct pp_hwmgr *hwmgr, enum pp_df_cstate state); + int (*set_xgmi_pstate)(struct pp_hwmgr *hwmgr, uint32_t pstate); + int (*disable_power_features_for_compute_performance)(struct pp_hwmgr *hwmgr, + bool disable); }; struct pp_table_func { diff --git a/drivers/gpu/drm/amd/powerplay/inc/smu11_driver_if_arcturus.h b/drivers/gpu/drm/amd/powerplay/inc/smu11_driver_if_arcturus.h index 886b9a21ebd8..a886f0644d24 100644 --- a/drivers/gpu/drm/amd/powerplay/inc/smu11_driver_if_arcturus.h +++ b/drivers/gpu/drm/amd/powerplay/inc/smu11_driver_if_arcturus.h @@ -159,7 +159,7 @@ //FIXME need updating // Debug Overrides Bitmask #define DPM_OVERRIDE_DISABLE_UCLK_PID 0x00000001 -#define DPM_OVERRIDE_ENABLE_VOLT_LINK_VCN_FCLK 0x00000002 +#define DPM_OVERRIDE_DISABLE_VOLT_LINK_VCN_FCLK 0x00000002 // I2C Config Bit Defines #define I2C_CONTROLLER_ENABLED 1 diff --git a/drivers/gpu/drm/amd/powerplay/inc/smu_v11_0.h b/drivers/gpu/drm/amd/powerplay/inc/smu_v11_0.h index abd4debb3def..606149085683 100644 --- a/drivers/gpu/drm/amd/powerplay/inc/smu_v11_0.h +++ b/drivers/gpu/drm/amd/powerplay/inc/smu_v11_0.h @@ -27,7 +27,7 @@ #define SMU11_DRIVER_IF_VERSION_INV 0xFFFFFFFF #define SMU11_DRIVER_IF_VERSION_VG20 0x13 -#define SMU11_DRIVER_IF_VERSION_ARCT 0x0F +#define SMU11_DRIVER_IF_VERSION_ARCT 0x10 #define SMU11_DRIVER_IF_VERSION_NV10 0x33 #define SMU11_DRIVER_IF_VERSION_NV14 0x34 @@ -48,6 +48,8 @@ #define SMU11_TOOL_SIZE 0x19000 +#define MAX_PCIE_CONF 2 + #define CLK_MAP(clk, index) \ [SMU_##clk] = {1, (index)} @@ -88,6 +90,11 @@ struct smu_11_0_dpm_table { uint32_t max; /* MHz */ }; +struct smu_11_0_pcie_table { + uint8_t pcie_gen[MAX_PCIE_CONF]; + uint8_t pcie_lane[MAX_PCIE_CONF]; +}; + struct smu_11_0_dpm_tables { struct smu_11_0_dpm_table soc_table; struct smu_11_0_dpm_table gfx_table; @@ -100,6 +107,7 @@ struct smu_11_0_dpm_tables { struct smu_11_0_dpm_table display_table; struct smu_11_0_dpm_table phy_table; struct smu_11_0_dpm_table fclk_table; + struct smu_11_0_pcie_table pcie_table; }; struct smu_11_0_dpm_context { @@ -250,4 +258,8 @@ int smu_v11_0_set_soft_freq_limited_range(struct smu_context *smu, enum smu_clk_ int smu_v11_0_override_pcie_parameters(struct smu_context *smu); +int smu_v11_0_set_default_od_settings(struct smu_context *smu, bool initialize, size_t overdrive_table_size); + +uint32_t smu_v11_0_get_max_power_limit(struct smu_context *smu); + #endif diff --git a/drivers/gpu/drm/amd/powerplay/inc/smu_v11_0_pptable.h b/drivers/gpu/drm/amd/powerplay/inc/smu_v11_0_pptable.h index 86cdc3393eac..b2f96a101124 100644 --- a/drivers/gpu/drm/amd/powerplay/inc/smu_v11_0_pptable.h +++ b/drivers/gpu/drm/amd/powerplay/inc/smu_v11_0_pptable.h @@ -141,7 +141,9 @@ struct smu_11_0_powerplay_table struct smu_11_0_power_saving_clock_table power_saving_clock; struct smu_11_0_overdrive_table overdrive_table; +#ifndef SMU_11_0_PARTIAL_PPTABLE PPTable_t smc_pptable; //PPTable_t in smu11_driver_if.h +#endif } __attribute__((packed)); #endif diff --git a/drivers/gpu/drm/amd/powerplay/navi10_ppt.c b/drivers/gpu/drm/amd/powerplay/navi10_ppt.c index 769f9451d904..aaec884d63ed 100644 --- a/drivers/gpu/drm/amd/powerplay/navi10_ppt.c +++ b/drivers/gpu/drm/amd/powerplay/navi10_ppt.c @@ -36,6 +36,7 @@ #include "navi10_ppt.h" #include "smu_v11_0_pptable.h" #include "smu_v11_0_ppsmc.h" +#include "nbio/nbio_7_4_sh_mask.h" #include "asic_reg/mp/mp_11_0_sh_mask.h" @@ -207,7 +208,7 @@ static struct smu_11_0_cmn2aisc_mapping navi10_workload_map[PP_SMC_POWER_PROFILE WORKLOAD_MAP(PP_SMC_POWER_PROFILE_POWERSAVING, WORKLOAD_PPLIB_POWER_SAVING_BIT), WORKLOAD_MAP(PP_SMC_POWER_PROFILE_VIDEO, WORKLOAD_PPLIB_VIDEO_BIT), WORKLOAD_MAP(PP_SMC_POWER_PROFILE_VR, WORKLOAD_PPLIB_VR_BIT), - WORKLOAD_MAP(PP_SMC_POWER_PROFILE_COMPUTE, WORKLOAD_PPLIB_CUSTOM_BIT), + WORKLOAD_MAP(PP_SMC_POWER_PROFILE_COMPUTE, WORKLOAD_PPLIB_COMPUTE_BIT), WORKLOAD_MAP(PP_SMC_POWER_PROFILE_CUSTOM, WORKLOAD_PPLIB_CUSTOM_BIT), }; @@ -599,6 +600,7 @@ static int navi10_set_default_dpm_table(struct smu_context *smu) struct smu_table_context *table_context = &smu->smu_table; struct smu_11_0_dpm_context *dpm_context = smu_dpm->dpm_context; PPTable_t *driver_ppt = NULL; + int i; driver_ppt = table_context->driver_pptable; @@ -629,6 +631,11 @@ static int navi10_set_default_dpm_table(struct smu_context *smu) dpm_context->dpm_tables.phy_table.min = driver_ppt->FreqTablePhyclk[0]; dpm_context->dpm_tables.phy_table.max = driver_ppt->FreqTablePhyclk[NUM_PHYCLK_DPM_LEVELS - 1]; + for (i = 0; i < MAX_PCIE_CONF; i++) { + dpm_context->dpm_tables.pcie_table.pcie_gen[i] = driver_ppt->PcieGenSpeed[i]; + dpm_context->dpm_tables.pcie_table.pcie_lane[i] = driver_ppt->PcieLaneCount[i]; + } + return 0; } @@ -691,13 +698,29 @@ static bool navi10_is_support_fine_grained_dpm(struct smu_context *smu, enum smu return dpm_desc->SnapToDiscrete == 0 ? true : false; } +static inline bool navi10_od_feature_is_supported(struct smu_11_0_overdrive_table *od_table, enum SMU_11_0_ODFEATURE_ID feature) +{ + return od_table->cap[feature]; +} + + static int navi10_print_clk_levels(struct smu_context *smu, enum smu_clk_type clk_type, char *buf) { + uint16_t *curve_settings; int i, size = 0, ret = 0; uint32_t cur_value = 0, value = 0, count = 0; uint32_t freq_values[3] = {0}; uint32_t mark_index = 0; + struct smu_table_context *table_context = &smu->smu_table; + uint32_t gen_speed, lane_width; + struct smu_dpm_context *smu_dpm = &smu->smu_dpm; + struct smu_11_0_dpm_context *dpm_context = smu_dpm->dpm_context; + struct amdgpu_device *adev = smu->adev; + PPTable_t *pptable = (PPTable_t *)table_context->driver_pptable; + OverDriveTable_t *od_table = + (OverDriveTable_t *)table_context->overdrive_table; + struct smu_11_0_overdrive_table *od_settings = smu->od_settings; switch (clk_type) { case SMU_GFXCLK: @@ -748,6 +771,69 @@ static int navi10_print_clk_levels(struct smu_context *smu, } break; + case SMU_PCIE: + gen_speed = (RREG32_PCIE(smnPCIE_LC_SPEED_CNTL) & + PSWUSP0_PCIE_LC_SPEED_CNTL__LC_CURRENT_DATA_RATE_MASK) + >> PSWUSP0_PCIE_LC_SPEED_CNTL__LC_CURRENT_DATA_RATE__SHIFT; + lane_width = (RREG32_PCIE(smnPCIE_LC_LINK_WIDTH_CNTL) & + PCIE_LC_LINK_WIDTH_CNTL__LC_LINK_WIDTH_RD_MASK) + >> PCIE_LC_LINK_WIDTH_CNTL__LC_LINK_WIDTH_RD__SHIFT; + for (i = 0; i < NUM_LINK_LEVELS; i++) + size += sprintf(buf + size, "%d: %s %s %dMhz %s\n", i, + (dpm_context->dpm_tables.pcie_table.pcie_gen[i] == 0) ? "2.5GT/s," : + (dpm_context->dpm_tables.pcie_table.pcie_gen[i] == 1) ? "5.0GT/s," : + (dpm_context->dpm_tables.pcie_table.pcie_gen[i] == 2) ? "8.0GT/s," : + (dpm_context->dpm_tables.pcie_table.pcie_gen[i] == 3) ? "16.0GT/s," : "", + (dpm_context->dpm_tables.pcie_table.pcie_lane[i] == 1) ? "x1" : + (dpm_context->dpm_tables.pcie_table.pcie_lane[i] == 2) ? "x2" : + (dpm_context->dpm_tables.pcie_table.pcie_lane[i] == 3) ? "x4" : + (dpm_context->dpm_tables.pcie_table.pcie_lane[i] == 4) ? "x8" : + (dpm_context->dpm_tables.pcie_table.pcie_lane[i] == 5) ? "x12" : + (dpm_context->dpm_tables.pcie_table.pcie_lane[i] == 6) ? "x16" : "", + pptable->LclkFreq[i], + (gen_speed == dpm_context->dpm_tables.pcie_table.pcie_gen[i]) && + (lane_width == dpm_context->dpm_tables.pcie_table.pcie_lane[i]) ? + "*" : ""); + break; + case SMU_OD_SCLK: + if (!smu->od_enabled || !od_table || !od_settings) + break; + if (!navi10_od_feature_is_supported(od_settings, SMU_11_0_ODFEATURE_GFXCLK_LIMITS)) + break; + size += sprintf(buf + size, "OD_SCLK:\n"); + size += sprintf(buf + size, "0: %uMhz\n1: %uMhz\n", od_table->GfxclkFmin, od_table->GfxclkFmax); + break; + case SMU_OD_MCLK: + if (!smu->od_enabled || !od_table || !od_settings) + break; + if (!navi10_od_feature_is_supported(od_settings, SMU_11_0_ODFEATURE_UCLK_MAX)) + break; + size += sprintf(buf + size, "OD_MCLK:\n"); + size += sprintf(buf + size, "0: %uMHz\n", od_table->UclkFmax); + break; + case SMU_OD_VDDC_CURVE: + if (!smu->od_enabled || !od_table || !od_settings) + break; + if (!navi10_od_feature_is_supported(od_settings, SMU_11_0_ODFEATURE_GFXCLK_CURVE)) + break; + size += sprintf(buf + size, "OD_VDDC_CURVE:\n"); + for (i = 0; i < 3; i++) { + switch (i) { + case 0: + curve_settings = &od_table->GfxclkFreq1; + break; + case 1: + curve_settings = &od_table->GfxclkFreq2; + break; + case 2: + curve_settings = &od_table->GfxclkFreq3; + break; + default: + break; + } + size += sprintf(buf + size, "%d: %uMHz @ %umV\n", i, curve_settings[0], curve_settings[1] / NAVI10_VOLTAGE_SCALE); + } + break; default: break; } @@ -773,6 +859,12 @@ static int navi10_force_clk_levels(struct smu_context *smu, case SMU_UCLK: case SMU_DCEFCLK: case SMU_FCLK: + /* There is only 2 levels for fine grained DPM */ + if (navi10_is_support_fine_grained_dpm(smu, clk_type)) { + soft_max_level = (soft_max_level >= 1 ? 1 : 0); + soft_min_level = (soft_min_level >= 1 ? 1 : 0); + } + ret = smu_get_dpm_freq_by_index(smu, clk_type, soft_min_level, &min_freq); if (ret) return size; @@ -1582,17 +1674,22 @@ static int navi10_display_disable_memory_clock_switch(struct smu_context *smu, return ret; } +static uint32_t navi10_get_pptable_power_limit(struct smu_context *smu) +{ + PPTable_t *pptable = smu->smu_table.driver_pptable; + return pptable->SocketPowerLimitAc[PPT_THROTTLER_PPT0]; +} + static int navi10_get_power_limit(struct smu_context *smu, uint32_t *limit, - bool asic_default) + bool cap) { PPTable_t *pptable = smu->smu_table.driver_pptable; uint32_t asic_default_power_limit = 0; int ret = 0; int power_src; - if (!smu->default_power_limit || - !smu->power_limit) { + if (!smu->power_limit) { if (smu_feature_is_enabled(smu, SMU_FEATURE_PPT_BIT)) { power_src = smu_power_get_index(smu, SMU_POWER_SOURCE_AC); if (power_src < 0) @@ -1615,17 +1712,11 @@ static int navi10_get_power_limit(struct smu_context *smu, pptable->SocketPowerLimitAc[PPT_THROTTLER_PPT0]; } - if (smu->od_enabled) { - asic_default_power_limit *= (100 + smu->smu_table.TDPODLimit); - asic_default_power_limit /= 100; - } - - smu->default_power_limit = asic_default_power_limit; smu->power_limit = asic_default_power_limit; } - if (asic_default) - *limit = smu->default_power_limit; + if (cap) + *limit = smu_v11_0_get_max_power_limit(smu); else *limit = smu->power_limit; @@ -1640,6 +1731,9 @@ static int navi10_update_pcie_parameters(struct smu_context *smu, int ret, i; uint32_t smu_pcie_arg; + struct smu_dpm_context *smu_dpm = &smu->smu_dpm; + struct smu_11_0_dpm_context *dpm_context = smu_dpm->dpm_context; + for (i = 0; i < NUM_LINK_LEVELS; i++) { smu_pcie_arg = (i << 16) | ((pptable->PcieGenSpeed[i] <= pcie_gen_cap) ? (pptable->PcieGenSpeed[i] << 8) : @@ -1648,11 +1742,260 @@ static int navi10_update_pcie_parameters(struct smu_context *smu, ret = smu_send_smc_msg_with_param(smu, SMU_MSG_OverridePcieParameters, smu_pcie_arg); + + if (ret) + return ret; + + if (pptable->PcieGenSpeed[i] > pcie_gen_cap) + dpm_context->dpm_tables.pcie_table.pcie_gen[i] = pcie_gen_cap; + if (pptable->PcieLaneCount[i] > pcie_width_cap) + dpm_context->dpm_tables.pcie_table.pcie_lane[i] = pcie_width_cap; + } + + return 0; +} + +static inline void navi10_dump_od_table(OverDriveTable_t *od_table) { + pr_debug("OD: Gfxclk: (%d, %d)\n", od_table->GfxclkFmin, od_table->GfxclkFmax); + pr_debug("OD: Gfx1: (%d, %d)\n", od_table->GfxclkFreq1, od_table->GfxclkVolt1); + pr_debug("OD: Gfx2: (%d, %d)\n", od_table->GfxclkFreq2, od_table->GfxclkVolt2); + pr_debug("OD: Gfx3: (%d, %d)\n", od_table->GfxclkFreq3, od_table->GfxclkVolt3); + pr_debug("OD: UclkFmax: %d\n", od_table->UclkFmax); + pr_debug("OD: OverDrivePct: %d\n", od_table->OverDrivePct); +} + +static int navi10_od_setting_check_range(struct smu_11_0_overdrive_table *od_table, enum SMU_11_0_ODSETTING_ID setting, uint32_t value) +{ + if (value < od_table->min[setting]) { + pr_warn("OD setting (%d, %d) is less than the minimum allowed (%d)\n", setting, value, od_table->min[setting]); + return -EINVAL; + } + if (value > od_table->max[setting]) { + pr_warn("OD setting (%d, %d) is greater than the maximum allowed (%d)\n", setting, value, od_table->max[setting]); + return -EINVAL; + } + return 0; +} + +static int navi10_setup_od_limits(struct smu_context *smu) { + struct smu_11_0_overdrive_table *overdrive_table = NULL; + struct smu_11_0_powerplay_table *powerplay_table = NULL; + + if (!smu->smu_table.power_play_table) { + pr_err("powerplay table uninitialized!\n"); + return -ENOENT; + } + powerplay_table = (struct smu_11_0_powerplay_table *)smu->smu_table.power_play_table; + overdrive_table = &powerplay_table->overdrive_table; + if (!smu->od_settings) { + smu->od_settings = kmemdup(overdrive_table, sizeof(struct smu_11_0_overdrive_table), GFP_KERNEL); + } else { + memcpy(smu->od_settings, overdrive_table, sizeof(struct smu_11_0_overdrive_table)); + } + return 0; +} + +static int navi10_set_default_od_settings(struct smu_context *smu, bool initialize) { + OverDriveTable_t *od_table; + int ret = 0; + + ret = smu_v11_0_set_default_od_settings(smu, initialize, sizeof(OverDriveTable_t)); + if (ret) + return ret; + + if (initialize) { + ret = navi10_setup_od_limits(smu); + if (ret) { + pr_err("Failed to retrieve board OD limits\n"); + return ret; + } + } + od_table = (OverDriveTable_t *)smu->smu_table.overdrive_table; + if (od_table) { + navi10_dump_od_table(od_table); + } + + return ret; +} + +static int navi10_od_edit_dpm_table(struct smu_context *smu, enum PP_OD_DPM_TABLE_COMMAND type, long input[], uint32_t size) { + int i; + int ret = 0; + struct smu_table_context *table_context = &smu->smu_table; + OverDriveTable_t *od_table; + struct smu_11_0_overdrive_table *od_settings; + enum SMU_11_0_ODSETTING_ID freq_setting, voltage_setting; + uint16_t *freq_ptr, *voltage_ptr; + od_table = (OverDriveTable_t *)table_context->overdrive_table; + + if (!smu->od_enabled) { + pr_warn("OverDrive is not enabled!\n"); + return -EINVAL; + } + + if (!smu->od_settings) { + pr_err("OD board limits are not set!\n"); + return -ENOENT; + } + + od_settings = smu->od_settings; + + switch (type) { + case PP_OD_EDIT_SCLK_VDDC_TABLE: + if (!navi10_od_feature_is_supported(od_settings, SMU_11_0_ODFEATURE_GFXCLK_LIMITS)) { + pr_warn("GFXCLK_LIMITS not supported!\n"); + return -ENOTSUPP; + } + if (!table_context->overdrive_table) { + pr_err("Overdrive is not initialized\n"); + return -EINVAL; + } + for (i = 0; i < size; i += 2) { + if (i + 2 > size) { + pr_info("invalid number of input parameters %d\n", size); + return -EINVAL; + } + switch (input[i]) { + case 0: + freq_setting = SMU_11_0_ODSETTING_GFXCLKFMIN; + freq_ptr = &od_table->GfxclkFmin; + if (input[i + 1] > od_table->GfxclkFmax) { + pr_info("GfxclkFmin (%ld) must be <= GfxclkFmax (%u)!\n", + input[i + 1], + od_table->GfxclkFmin); + return -EINVAL; + } + break; + case 1: + freq_setting = SMU_11_0_ODSETTING_GFXCLKFMAX; + freq_ptr = &od_table->GfxclkFmax; + if (input[i + 1] < od_table->GfxclkFmin) { + pr_info("GfxclkFmax (%ld) must be >= GfxclkFmin (%u)!\n", + input[i + 1], + od_table->GfxclkFmax); + return -EINVAL; + } + break; + default: + pr_info("Invalid SCLK_VDDC_TABLE index: %ld\n", input[i]); + pr_info("Supported indices: [0:min,1:max]\n"); + return -EINVAL; + } + ret = navi10_od_setting_check_range(od_settings, freq_setting, input[i + 1]); + if (ret) + return ret; + *freq_ptr = input[i + 1]; + } + break; + case PP_OD_EDIT_MCLK_VDDC_TABLE: + if (!navi10_od_feature_is_supported(od_settings, SMU_11_0_ODFEATURE_UCLK_MAX)) { + pr_warn("UCLK_MAX not supported!\n"); + return -ENOTSUPP; + } + if (size < 2) { + pr_info("invalid number of parameters: %d\n", size); + return -EINVAL; + } + if (input[0] != 1) { + pr_info("Invalid MCLK_VDDC_TABLE index: %ld\n", input[0]); + pr_info("Supported indices: [1:max]\n"); + return -EINVAL; + } + ret = navi10_od_setting_check_range(od_settings, SMU_11_0_ODSETTING_UCLKFMAX, input[1]); + if (ret) + return ret; + od_table->UclkFmax = input[1]; + break; + case PP_OD_COMMIT_DPM_TABLE: + navi10_dump_od_table(od_table); + ret = smu_update_table(smu, SMU_TABLE_OVERDRIVE, 0, (void *)od_table, true); + if (ret) { + pr_err("Failed to import overdrive table!\n"); + return ret; + } + // no lock needed because smu_od_edit_dpm_table has it + ret = smu_handle_task(smu, smu->smu_dpm.dpm_level, + AMD_PP_TASK_READJUST_POWER_STATE, + false); + if (ret) { + return ret; + } + break; + case PP_OD_EDIT_VDDC_CURVE: + if (!navi10_od_feature_is_supported(od_settings, SMU_11_0_ODFEATURE_GFXCLK_CURVE)) { + pr_warn("GFXCLK_CURVE not supported!\n"); + return -ENOTSUPP; + } + if (size < 3) { + pr_info("invalid number of parameters: %d\n", size); + return -EINVAL; + } + if (!od_table) { + pr_info("Overdrive is not initialized\n"); + return -EINVAL; + } + + switch (input[0]) { + case 0: + freq_setting = SMU_11_0_ODSETTING_VDDGFXCURVEFREQ_P1; + voltage_setting = SMU_11_0_ODSETTING_VDDGFXCURVEVOLTAGE_P1; + freq_ptr = &od_table->GfxclkFreq1; + voltage_ptr = &od_table->GfxclkVolt1; + break; + case 1: + freq_setting = SMU_11_0_ODSETTING_VDDGFXCURVEFREQ_P2; + voltage_setting = SMU_11_0_ODSETTING_VDDGFXCURVEVOLTAGE_P2; + freq_ptr = &od_table->GfxclkFreq2; + voltage_ptr = &od_table->GfxclkVolt2; + break; + case 2: + freq_setting = SMU_11_0_ODSETTING_VDDGFXCURVEFREQ_P3; + voltage_setting = SMU_11_0_ODSETTING_VDDGFXCURVEVOLTAGE_P3; + freq_ptr = &od_table->GfxclkFreq3; + voltage_ptr = &od_table->GfxclkVolt3; + break; + default: + pr_info("Invalid VDDC_CURVE index: %ld\n", input[0]); + pr_info("Supported indices: [0, 1, 2]\n"); + return -EINVAL; + } + ret = navi10_od_setting_check_range(od_settings, freq_setting, input[1]); + if (ret) + return ret; + // Allow setting zero to disable the OverDrive VDDC curve + if (input[2] != 0) { + ret = navi10_od_setting_check_range(od_settings, voltage_setting, input[2]); + if (ret) + return ret; + *freq_ptr = input[1]; + *voltage_ptr = ((uint16_t)input[2]) * NAVI10_VOLTAGE_SCALE; + pr_debug("OD: set curve %ld: (%d, %d)\n", input[0], *freq_ptr, *voltage_ptr); + } else { + // If setting 0, disable all voltage curve settings + od_table->GfxclkVolt1 = 0; + od_table->GfxclkVolt2 = 0; + od_table->GfxclkVolt3 = 0; + } + navi10_dump_od_table(od_table); + break; + default: + return -ENOSYS; + } return ret; } +static int navi10_run_btc(struct smu_context *smu) +{ + int ret = 0; + + ret = smu_send_smc_msg(smu, SMU_MSG_RunBtc); + if (ret) + pr_err("RunBtc failed!\n"); + + return ret; +} static const struct pptable_funcs navi10_ppt_funcs = { .tables_init = navi10_tables_init, @@ -1742,6 +2085,10 @@ static const struct pptable_funcs navi10_ppt_funcs = { .get_dpm_ultimate_freq = smu_v11_0_get_dpm_ultimate_freq, .set_soft_freq_limited_range = smu_v11_0_set_soft_freq_limited_range, .override_pcie_parameters = smu_v11_0_override_pcie_parameters, + .set_default_od_settings = navi10_set_default_od_settings, + .od_edit_dpm_table = navi10_od_edit_dpm_table, + .get_pptable_power_limit = navi10_get_pptable_power_limit, + .run_btc = navi10_run_btc, }; void navi10_set_ppt_funcs(struct smu_context *smu) diff --git a/drivers/gpu/drm/amd/powerplay/navi10_ppt.h b/drivers/gpu/drm/amd/powerplay/navi10_ppt.h index a37e37c5f105..ec03c7992f6d 100644 --- a/drivers/gpu/drm/amd/powerplay/navi10_ppt.h +++ b/drivers/gpu/drm/amd/powerplay/navi10_ppt.h @@ -33,6 +33,11 @@ #define NAVI14_UMD_PSTATE_PEAK_XTX_GFXCLK (1717) #define NAVI14_UMD_PSTATE_PEAK_XL_GFXCLK (1448) +#define NAVI10_VOLTAGE_SCALE (4) + +#define smnPCIE_LC_SPEED_CNTL 0x11140290 +#define smnPCIE_LC_LINK_WIDTH_CNTL 0x11140288 + extern void navi10_set_ppt_funcs(struct smu_context *smu); #endif diff --git a/drivers/gpu/drm/amd/powerplay/renoir_ppt.c b/drivers/gpu/drm/amd/powerplay/renoir_ppt.c index 4a9751971a9d..04daf7e9fe05 100644 --- a/drivers/gpu/drm/amd/powerplay/renoir_ppt.c +++ b/drivers/gpu/drm/amd/powerplay/renoir_ppt.c @@ -180,11 +180,13 @@ static int renoir_print_clk_levels(struct smu_context *smu, int i, size = 0, ret = 0; uint32_t cur_value = 0, value = 0, count = 0, min = 0, max = 0; DpmClocks_t *clk_table = smu->smu_table.clocks_table; - SmuMetrics_t metrics = {0}; + SmuMetrics_t metrics; if (!clk_table || clk_type >= SMU_CLK_COUNT) return -EINVAL; + memset(&metrics, 0, sizeof(metrics)); + ret = smu_update_table(smu, SMU_TABLE_SMU_METRICS, 0, (void *)&metrics, false); if (ret) diff --git a/drivers/gpu/drm/amd/powerplay/smu_v11_0.c b/drivers/gpu/drm/amd/powerplay/smu_v11_0.c index bbb74b1d5d80..fc9679ea2368 100644 --- a/drivers/gpu/drm/amd/powerplay/smu_v11_0.c +++ b/drivers/gpu/drm/amd/powerplay/smu_v11_0.c @@ -24,6 +24,8 @@ #include <linux/module.h> #include <linux/pci.h> +#define SMU_11_0_PARTIAL_PPTABLE + #include "pp_debug.h" #include "amdgpu.h" #include "amdgpu_smu.h" @@ -31,6 +33,7 @@ #include "atomfirmware.h" #include "amdgpu_atomfirmware.h" #include "smu_v11_0.h" +#include "smu_v11_0_pptable.h" #include "soc15_common.h" #include "atom.h" #include "amd_pcie.h" @@ -368,6 +371,7 @@ int smu_v11_0_setup_pptable(struct smu_context *smu) version_major = le16_to_cpu(hdr->header.header_version_major); version_minor = le16_to_cpu(hdr->header.header_version_minor); if (version_major == 2 && smu->smu_table.boot_values.pp_table_id > 0) { + pr_info("use driver provided pptable %d\n", smu->smu_table.boot_values.pp_table_id); switch (version_minor) { case 0: ret = smu_v11_0_set_pptable_v2_0(smu, &table, &size); @@ -384,6 +388,7 @@ int smu_v11_0_setup_pptable(struct smu_context *smu) return ret; } else { + pr_info("use vbios provided pptable\n"); index = get_index_into_master_table(atom_master_list_of_data_tables_v2_1, powerplayinfo); @@ -1043,13 +1048,44 @@ int smu_v11_0_init_max_sustainable_clocks(struct smu_context *smu) return 0; } +uint32_t smu_v11_0_get_max_power_limit(struct smu_context *smu) { + uint32_t od_limit, max_power_limit; + struct smu_11_0_powerplay_table *powerplay_table = NULL; + struct smu_table_context *table_context = &smu->smu_table; + powerplay_table = table_context->power_play_table; + + max_power_limit = smu_get_pptable_power_limit(smu); + + if (!max_power_limit) { + // If we couldn't get the table limit, fall back on first-read value + if (!smu->default_power_limit) + smu->default_power_limit = smu->power_limit; + max_power_limit = smu->default_power_limit; + } + + if (smu->od_enabled) { + od_limit = le32_to_cpu(powerplay_table->overdrive_table.max[SMU_11_0_ODSETTING_POWERPERCENTAGE]); + + pr_debug("ODSETTING_POWERPERCENTAGE: %d (default: %d)\n", od_limit, smu->default_power_limit); + + max_power_limit *= (100 + od_limit); + max_power_limit /= 100; + } + + return max_power_limit; +} + int smu_v11_0_set_power_limit(struct smu_context *smu, uint32_t n) { int ret = 0; + uint32_t max_power_limit; - if (n > smu->default_power_limit) { - pr_err("New power limit is over the max allowed %d\n", - smu->default_power_limit); + max_power_limit = smu_v11_0_get_max_power_limit(smu); + + if (n > max_power_limit) { + pr_err("New power limit (%d) is over the max allowed %d\n", + n, + max_power_limit); return -EINVAL; } @@ -1463,16 +1499,13 @@ int smu_v11_0_set_fan_speed_rpm(struct smu_context *smu, return ret; } -#define XGMI_STATE_D0 1 -#define XGMI_STATE_D3 0 - int smu_v11_0_set_xgmi_pstate(struct smu_context *smu, uint32_t pstate) { int ret = 0; ret = smu_send_smc_msg_with_param(smu, SMU_MSG_SetXgmiMode, - pstate ? XGMI_STATE_D0 : XGMI_STATE_D3); + pstate ? XGMI_MODE_PSTATE_D0 : XGMI_MODE_PSTATE_D3); return ret; } @@ -1780,3 +1813,30 @@ int smu_v11_0_override_pcie_parameters(struct smu_context *smu) return ret; } + +int smu_v11_0_set_default_od_settings(struct smu_context *smu, bool initialize, size_t overdrive_table_size) +{ + struct smu_table_context *table_context = &smu->smu_table; + int ret = 0; + + if (initialize) { + if (table_context->overdrive_table) { + return -EINVAL; + } + table_context->overdrive_table = kzalloc(overdrive_table_size, GFP_KERNEL); + if (!table_context->overdrive_table) { + return -ENOMEM; + } + ret = smu_update_table(smu, SMU_TABLE_OVERDRIVE, 0, table_context->overdrive_table, false); + if (ret) { + pr_err("Failed to export overdrive table!\n"); + return ret; + } + } + ret = smu_update_table(smu, SMU_TABLE_OVERDRIVE, 0, table_context->overdrive_table, true); + if (ret) { + pr_err("Failed to import overdrive table!\n"); + return ret; + } + return ret; +} diff --git a/drivers/gpu/drm/amd/powerplay/vega20_ppt.c b/drivers/gpu/drm/amd/powerplay/vega20_ppt.c index 7c8933f987d1..0b4892833808 100644 --- a/drivers/gpu/drm/amd/powerplay/vega20_ppt.c +++ b/drivers/gpu/drm/amd/powerplay/vega20_ppt.c @@ -221,7 +221,7 @@ static struct smu_11_0_cmn2aisc_mapping vega20_workload_map[PP_SMC_POWER_PROFILE WORKLOAD_MAP(PP_SMC_POWER_PROFILE_POWERSAVING, WORKLOAD_PPLIB_POWER_SAVING_BIT), WORKLOAD_MAP(PP_SMC_POWER_PROFILE_VIDEO, WORKLOAD_PPLIB_VIDEO_BIT), WORKLOAD_MAP(PP_SMC_POWER_PROFILE_VR, WORKLOAD_PPLIB_VR_BIT), - WORKLOAD_MAP(PP_SMC_POWER_PROFILE_COMPUTE, WORKLOAD_PPLIB_CUSTOM_BIT), + WORKLOAD_MAP(PP_SMC_POWER_PROFILE_COMPUTE, WORKLOAD_PPLIB_COMPUTE_BIT), WORKLOAD_MAP(PP_SMC_POWER_PROFILE_CUSTOM, WORKLOAD_PPLIB_CUSTOM_BIT), }; @@ -466,7 +466,6 @@ static int vega20_store_powerplay_table(struct smu_context *smu) sizeof(PPTable_t)); table_context->thermal_controller_type = powerplay_table->ucThermalControllerType; - table_context->TDPODLimit = le32_to_cpu(powerplay_table->OverDrive8Table.ODSettingsMax[ATOM_VEGA20_ODSETTING_POWERPERCENTAGE]); return 0; } |