diff options
author | Dave Airlie <airlied@redhat.com> | 2020-04-30 11:08:54 +1000 |
---|---|---|
committer | Dave Airlie <airlied@redhat.com> | 2020-04-30 11:08:54 +1000 |
commit | 937eea297e26effac6809a0bf8c20e6ca9d90b9a (patch) | |
tree | 5e2d4ddc284776b56355d36c8d2c5a757956b3d4 /drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.c | |
parent | 126a34061eec2df05a5a28052edefd4e6125f31c (diff) | |
parent | e748f07d00c1c4a9106acafac52df7ea4ecf6264 (diff) |
Merge tag 'amd-drm-next-5.8-2020-04-24' of git://people.freedesktop.org/~agd5f/linux into drm-next
amd-drm-next-5.8-2020-04-24:
amdgpu:
- Documentation improvements
- Enable FRU chip access on boards that support it
- RAS updates
- SR-IOV updates
- Powerplay locking fixes for older SMU versions
- VCN DPG (dynamic powergating) cleanup
- VCN 2.5 DPG enablement
- Rework GPU scheduler handling
- Improve scheduler priority handling
- Add SPM (streaming performance monitor) golden settings for navi
- GFX10 clockgating fixes
- DC ABM (automatic backlight modulation) fixes
- DC cursor and plane fixes
- DC watermark fixes
- DC clock handling fixes
- DC color management fixes
- GPU reset fixes
- Clean up MMIO access macros
- EEPROM access fixes
- Misc code cleanups
amdkfd:
- Misc code cleanups
radeon:
- Clean up safe reg list generation
- Misc code cleanups
From: Alex Deucher <alexdeucher@gmail.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20200424190827.4542-1-alexander.deucher@amd.com
Diffstat (limited to 'drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.c')
-rw-r--r-- | drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.c | 98 |
1 files changed, 66 insertions, 32 deletions
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.c index 95b3327168ac..54d8a3e7e75c 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.c @@ -373,7 +373,13 @@ struct amdgpu_hive_info *amdgpu_get_xgmi_hive(struct amdgpu_device *adev, int lo if (lock) mutex_lock(&tmp->hive_lock); - tmp->pstate = -1; + tmp->pstate = AMDGPU_XGMI_PSTATE_UNKNOWN; + tmp->hi_req_gpu = NULL; + /* + * hive pstate on boot is high in vega20 so we have to go to low + * pstate on after boot. + */ + tmp->hi_req_count = AMDGPU_MAX_XGMI_DEVICE_PER_HIVE; mutex_unlock(&xgmi_mutex); return tmp; @@ -383,50 +389,51 @@ int amdgpu_xgmi_set_pstate(struct amdgpu_device *adev, int pstate) { int ret = 0; struct amdgpu_hive_info *hive = amdgpu_get_xgmi_hive(adev, 0); - struct amdgpu_device *tmp_adev; - bool update_hive_pstate = true; - bool is_high_pstate = pstate && adev->asic_type == CHIP_VEGA20; + struct amdgpu_device *request_adev = hive->hi_req_gpu ? + hive->hi_req_gpu : adev; + bool is_hi_req = pstate == AMDGPU_XGMI_PSTATE_MAX_VEGA20; + bool init_low = hive->pstate == AMDGPU_XGMI_PSTATE_UNKNOWN; - if (!hive) + /* fw bug so temporarily disable pstate switching */ + if (!hive || adev->asic_type == CHIP_VEGA20) return 0; mutex_lock(&hive->hive_lock); - if (hive->pstate == pstate) { - adev->pstate = is_high_pstate ? pstate : adev->pstate; + if (is_hi_req) + hive->hi_req_count++; + else + hive->hi_req_count--; + + /* + * Vega20 only needs single peer to request pstate high for the hive to + * go high but all peers must request pstate low for the hive to go low + */ + if (hive->pstate == pstate || + (!is_hi_req && hive->hi_req_count && !init_low)) goto out; - } - dev_dbg(adev->dev, "Set xgmi pstate %d.\n", pstate); + dev_dbg(request_adev->dev, "Set xgmi pstate %d.\n", pstate); - ret = amdgpu_dpm_set_xgmi_pstate(adev, pstate); + ret = amdgpu_dpm_set_xgmi_pstate(request_adev, pstate); if (ret) { - dev_err(adev->dev, + dev_err(request_adev->dev, "XGMI: Set pstate failure on device %llx, hive %llx, ret %d", - adev->gmc.xgmi.node_id, - adev->gmc.xgmi.hive_id, ret); + request_adev->gmc.xgmi.node_id, + request_adev->gmc.xgmi.hive_id, ret); goto out; } - /* Update device pstate */ - adev->pstate = pstate; - - /* - * Update the hive pstate only all devices of the hive - * are in the same pstate - */ - list_for_each_entry(tmp_adev, &hive->device_list, gmc.xgmi.head) { - if (tmp_adev->pstate != adev->pstate) { - update_hive_pstate = false; - break; - } - } - if (update_hive_pstate || is_high_pstate) + if (init_low) + hive->pstate = hive->hi_req_count ? + hive->pstate : AMDGPU_XGMI_PSTATE_MIN; + else { hive->pstate = pstate; - + hive->hi_req_gpu = pstate != AMDGPU_XGMI_PSTATE_MIN ? + adev : NULL; + } out: mutex_unlock(&hive->hive_lock); - return ret; } @@ -507,9 +514,6 @@ int amdgpu_xgmi_add_device(struct amdgpu_device *adev) goto exit; } - /* Set default device pstate */ - adev->pstate = -1; - top_info = &adev->psp.xgmi_context.top_info; list_add_tail(&adev->gmc.xgmi.head, &hive->device_list); @@ -604,6 +608,8 @@ int amdgpu_xgmi_ras_late_init(struct amdgpu_device *adev) adev->gmc.xgmi.num_physical_nodes == 0) return 0; + amdgpu_xgmi_reset_ras_error_count(adev); + if (!adev->gmc.xgmi.ras_if) { adev->gmc.xgmi.ras_if = kmalloc(sizeof(struct ras_common_if), GFP_KERNEL); if (!adev->gmc.xgmi.ras_if) @@ -668,6 +674,32 @@ uint64_t amdgpu_xgmi_get_relative_phy_addr(struct amdgpu_device *adev, return addr + dram_base_addr; } +static void pcs_clear_status(struct amdgpu_device *adev, uint32_t pcs_status_reg) +{ + WREG32_PCIE(pcs_status_reg, 0xFFFFFFFF); + WREG32_PCIE(pcs_status_reg, 0); +} + +void amdgpu_xgmi_reset_ras_error_count(struct amdgpu_device *adev) +{ + uint32_t i; + + switch (adev->asic_type) { + case CHIP_ARCTURUS: + for (i = 0; i < ARRAY_SIZE(xgmi_pcs_err_status_reg_arct); i++) + pcs_clear_status(adev, + xgmi_pcs_err_status_reg_arct[i]); + break; + case CHIP_VEGA20: + for (i = 0; i < ARRAY_SIZE(xgmi_pcs_err_status_reg_vg20); i++) + pcs_clear_status(adev, + xgmi_pcs_err_status_reg_vg20[i]); + break; + default: + break; + } +} + static int amdgpu_xgmi_query_pcs_error_status(struct amdgpu_device *adev, uint32_t value, uint32_t *ue_count, @@ -758,6 +790,8 @@ int amdgpu_xgmi_query_ras_error_count(struct amdgpu_device *adev, break; } + amdgpu_xgmi_reset_ras_error_count(adev); + err_data->ue_count += ue_cnt; err_data->ce_count += ce_cnt; |