diff options
author | Dave Airlie <airlied@redhat.com> | 2022-04-28 14:33:20 +1000 |
---|---|---|
committer | Dave Airlie <airlied@redhat.com> | 2022-04-28 14:33:20 +1000 |
commit | dbe946287e0825f0e9cd4cbeacfcde9d9b2dd168 (patch) | |
tree | 3ac3e33cfbb6670827e27cd22faf2ba03ddc4209 | |
parent | 19df0cfa258cd42f7f106f6085f1e625f26283db (diff) | |
parent | d68cf992ded575928cf4ddf7c64faff0d8dcce14 (diff) |
Merge tag 'amd-drm-next-5.19-2022-04-15' of https://gitlab.freedesktop.org/agd5f/linux into drm-next
amd-drm-next-5.19-2022-04-15:
amdgpu:
- USB-C updates
- GPUVM updates
- TMZ fixes for RV
- DCN 3.1 pstate fixes
- Display z state fixes
- RAS fixes
- Misc code cleanups and spelling fixes
- More DC FP rework
- GPUVM TLB handling rework
- Power management sysfs code cleanup
- Add RAS support for VCN
- Backlight fix
- Add unique id support for more asics
- Misc display updates
- SR-IOV fixes
- Extend CG and PG flags to 64 bits
- Enable VCN clk sysfs nodes for navi12
amdkfd:
- Fix IO link cleanup during device removal
- RAS fixes
- Retry fault fixes
- Asynchronously free events
- SVM fixes
radeon:
- Drop some dead code
- Misc code cleanups
From: Alex Deucher <alexander.deucher@amd.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20220415135144.5700-1-alexander.deucher@amd.com
Signed-off-by: Dave Airlie <airlied@redhat.com>
184 files changed, 4362 insertions, 4072 deletions
diff --git a/drivers/gpu/drm/amd/amdgpu/Makefile b/drivers/gpu/drm/amd/amdgpu/Makefile index 40e2c6e2df79..7d7af43a258f 100644 --- a/drivers/gpu/drm/amd/amdgpu/Makefile +++ b/drivers/gpu/drm/amd/amdgpu/Makefile @@ -49,7 +49,7 @@ amdgpu-y += amdgpu_device.o amdgpu_kms.o \ amdgpu_cs.o amdgpu_bios.o amdgpu_benchmark.o \ atombios_dp.o amdgpu_afmt.o amdgpu_trace_points.o \ atombios_encoders.o amdgpu_sa.o atombios_i2c.o \ - amdgpu_dma_buf.o amdgpu_vm.o amdgpu_ib.o amdgpu_pll.o \ + amdgpu_dma_buf.o amdgpu_vm.o amdgpu_vm_pt.o amdgpu_ib.o amdgpu_pll.o \ amdgpu_ucode.o amdgpu_bo_list.o amdgpu_ctx.o amdgpu_sync.o \ amdgpu_gtt_mgr.o amdgpu_preempt_mgr.o amdgpu_vram_mgr.o amdgpu_virt.o \ amdgpu_atomfirmware.o amdgpu_vf_error.o amdgpu_sched.o \ diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h index cdf0818088b3..bffd24845765 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h @@ -179,7 +179,7 @@ extern int amdgpu_sched_jobs; extern int amdgpu_sched_hw_submission; extern uint amdgpu_pcie_gen_cap; extern uint amdgpu_pcie_lane_cap; -extern uint amdgpu_cg_mask; +extern u64 amdgpu_cg_mask; extern uint amdgpu_pg_mask; extern uint amdgpu_sdma_phase_quantum; extern char *amdgpu_disable_cu; @@ -322,7 +322,7 @@ int amdgpu_device_ip_set_powergating_state(void *dev, enum amd_ip_block_type block_type, enum amd_powergating_state state); void amdgpu_device_ip_get_clockgating_state(struct amdgpu_device *adev, - u32 *flags); + u64 *flags); int amdgpu_device_ip_wait_for_idle(struct amdgpu_device *adev, enum amd_ip_block_type block_type); bool amdgpu_device_ip_is_idle(struct amdgpu_device *adev, @@ -860,7 +860,7 @@ struct amdgpu_device { /* powerplay */ struct amd_powerplay powerplay; struct amdgpu_pm pm; - u32 cg_flags; + u64 cg_flags; u32 pg_flags; /* nbio */ diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c index 6ca1db3c243f..64c6664b34e8 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c @@ -724,3 +724,11 @@ void amdgpu_amdkfd_ras_poison_consumption_handler(struct amdgpu_device *adev, bo else if (reset) amdgpu_amdkfd_gpu_reset(adev); } + +bool amdgpu_amdkfd_ras_query_utcl2_poison_status(struct amdgpu_device *adev) +{ + if (adev->gfx.ras && adev->gfx.ras->query_utcl2_poison_status) + return adev->gfx.ras->query_utcl2_poison_status(adev); + else + return false; +} diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h index 4cb14c2fe53f..f8b9f27adcf5 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h @@ -273,9 +273,8 @@ int amdgpu_amdkfd_gpuvm_alloc_memory_of_gpu( int amdgpu_amdkfd_gpuvm_free_memory_of_gpu( struct amdgpu_device *adev, struct kgd_mem *mem, void *drm_priv, uint64_t *size); -int amdgpu_amdkfd_gpuvm_map_memory_to_gpu( - struct amdgpu_device *adev, struct kgd_mem *mem, void *drm_priv, - bool *table_freed); +int amdgpu_amdkfd_gpuvm_map_memory_to_gpu(struct amdgpu_device *adev, + struct kgd_mem *mem, void *drm_priv); int amdgpu_amdkfd_gpuvm_unmap_memory_from_gpu( struct amdgpu_device *adev, struct kgd_mem *mem, void *drm_priv); int amdgpu_amdkfd_gpuvm_sync_memory( @@ -301,6 +300,7 @@ void amdgpu_amdkfd_ras_poison_consumption_handler(struct amdgpu_device *adev, bool amdgpu_amdkfd_bo_mapped_to_dev(struct amdgpu_device *adev, struct kgd_mem *mem); void amdgpu_amdkfd_block_mmu_notifications(void *p); int amdgpu_amdkfd_criu_resume(void *p); +bool amdgpu_amdkfd_ras_query_utcl2_poison_status(struct amdgpu_device *adev); #if IS_ENABLED(CONFIG_HSA_AMD) void amdgpu_amdkfd_gpuvm_init_mem_limits(void); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c index 3dc5ab2764ff..80b6b8e432fd 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c @@ -1058,8 +1058,7 @@ static void unmap_bo_from_gpuvm(struct kgd_mem *mem, static int update_gpuvm_pte(struct kgd_mem *mem, struct kfd_mem_attachment *entry, - struct amdgpu_sync *sync, - bool *table_freed) + struct amdgpu_sync *sync) { struct amdgpu_bo_va *bo_va = entry->bo_va; struct amdgpu_device *adev = entry->adev; @@ -1070,7 +1069,7 @@ static int update_gpuvm_pte(struct kgd_mem *mem, return ret; /* Update the page tables */ - ret = amdgpu_vm_bo_update(adev, bo_va, false, table_freed); + ret = amdgpu_vm_bo_update(adev, bo_va, false); if (ret) { pr_err("amdgpu_vm_bo_update failed\n"); return ret; @@ -1082,8 +1081,7 @@ static int update_gpuvm_pte(struct kgd_mem *mem, static int map_bo_to_gpuvm(struct kgd_mem *mem, struct kfd_mem_attachment *entry, struct amdgpu_sync *sync, - bool no_update_pte, - bool *table_freed) + bool no_update_pte) { int ret; @@ -1100,7 +1098,7 @@ static int map_bo_to_gpuvm(struct kgd_mem *mem, if (no_update_pte) return 0; - ret = update_gpuvm_pte(mem, entry, sync, table_freed); + ret = update_gpuvm_pte(mem, entry, sync); if (ret) { pr_err("update_gpuvm_pte() failed\n"); goto update_gpuvm_pte_failed; @@ -1710,7 +1708,7 @@ int amdgpu_amdkfd_gpuvm_free_memory_of_gpu( int amdgpu_amdkfd_gpuvm_map_memory_to_gpu( struct amdgpu_device *adev, struct kgd_mem *mem, - void *drm_priv, bool *table_freed) + void *drm_priv) { struct amdgpu_vm *avm = drm_priv_to_vm(drm_priv); int ret; @@ -1797,7 +1795,7 @@ int amdgpu_amdkfd_gpuvm_map_memory_to_gpu( entry->va, entry->va + bo_size, entry); ret = map_bo_to_gpuvm(mem, entry, ctx.sync, - is_invalid_userptr, table_freed); + is_invalid_userptr); if (ret) { pr_err("Failed to map bo to gpuvm\n"); goto out_unreserve; @@ -2265,7 +2263,7 @@ static int validate_invalid_user_pages(struct amdkfd_process_info *process_info) continue; kfd_mem_dmaunmap_attachment(mem, attachment); - ret = update_gpuvm_pte(mem, attachment, &sync, NULL); + ret = update_gpuvm_pte(mem, attachment, &sync); if (ret) { pr_err("%s: update PTE failed\n", __func__); /* make sure this gets validated again */ @@ -2476,7 +2474,7 @@ int amdgpu_amdkfd_gpuvm_restore_process_bos(void *info, struct dma_fence **ef) continue; kfd_mem_dmaunmap_attachment(mem, attachment); - ret = update_gpuvm_pte(mem, attachment, &sync_obj, NULL); + ret = update_gpuvm_pte(mem, attachment, &sync_obj); if (ret) { pr_debug("Memory eviction: update PTE failed. Try again\n"); goto validate_map_fail; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c index 8de283997769..71633a1fedfd 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c @@ -785,22 +785,22 @@ static int amdgpu_cs_vm_handling(struct amdgpu_cs_parser *p) if (r) return r; - r = amdgpu_vm_bo_update(adev, fpriv->prt_va, false, NULL); + r = amdgpu_vm_bo_update(adev, fpriv->prt_va, false); if (r) return r; - r = amdgpu_sync_vm_fence(&p->job->sync, fpriv->prt_va->last_pt_update); + r = amdgpu_sync_fence(&p->job->sync, fpriv->prt_va->last_pt_update); if (r) return r; if (amdgpu_mcbp || amdgpu_sriov_vf(adev)) { bo_va = fpriv->csa_va; BUG_ON(!bo_va); - r = amdgpu_vm_bo_update(adev, bo_va, false, NULL); + r = amdgpu_vm_bo_update(adev, bo_va, false); if (r) return r; - r = amdgpu_sync_vm_fence(&p->job->sync, bo_va->last_pt_update); + r = amdgpu_sync_fence(&p->job->sync, bo_va->last_pt_update); if (r) return r; } @@ -815,11 +815,11 @@ static int amdgpu_cs_vm_handling(struct amdgpu_cs_parser *p) if (bo_va == NULL) continue; - r = amdgpu_vm_bo_update(adev, bo_va, false, NULL); + r = amdgpu_vm_bo_update(adev, bo_va, false); if (r) return r; - r = amdgpu_sync_vm_fence(&p->job->sync, bo_va->last_pt_update); + r = amdgpu_sync_fence(&p->job->sync, bo_va->last_pt_update); if (r) return r; } @@ -832,7 +832,7 @@ static int amdgpu_cs_vm_handling(struct amdgpu_cs_parser *p) if (r) return r; - r = amdgpu_sync_vm_fence(&p->job->sync, vm->last_update); + r = amdgpu_sync_fence(&p->job->sync, vm->last_update); if (r) return r; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c index 5d04d24a0d5f..13e4d8f9b874 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c @@ -730,7 +730,7 @@ static ssize_t amdgpu_debugfs_gca_config_read(struct file *f, char __user *buf, return -ENOMEM; /* version, increment each time something is added */ - config[no_regs++] = 4; + config[no_regs++] = 5; config[no_regs++] = adev->gfx.config.max_shader_engines; config[no_regs++] = adev->gfx.config.max_tile_pipes; config[no_regs++] = adev->gfx.config.max_cu_per_sh; @@ -757,8 +757,8 @@ static ssize_t amdgpu_debugfs_gca_config_read(struct file *f, char __user *buf, /* rev==1 */ config[no_regs++] = adev->rev_id; - config[no_regs++] = adev->pg_flags; - config[no_regs++] = adev->cg_flags; + config[no_regs++] = lower_32_bits(adev->pg_flags); + config[no_regs++] = lower_32_bits(adev->cg_flags); /* rev==2 */ config[no_regs++] = adev->family; @@ -773,6 +773,10 @@ static ssize_t amdgpu_debugfs_gca_config_read(struct file *f, char __user *buf, /* rev==4 APU flag */ config[no_regs++] = adev->flags & AMD_IS_APU ? 1 : 0; + /* rev==5 PG/CG flag upper 32bit */ + config[no_regs++] = upper_32_bits(adev->pg_flags); + config[no_regs++] = upper_32_bits(adev->cg_flags); + while (size && (*pos < no_regs * 4)) { uint32_t value; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c index 49f734137f15..fb69b4188c8a 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c @@ -1703,7 +1703,7 @@ int amdgpu_device_ip_set_powergating_state(void *dev, * clockgating is enabled. */ void amdgpu_device_ip_get_clockgating_state(struct amdgpu_device *adev, - u32 *flags) + u64 *flags) { int i; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_df.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_df.h index 6b25837955c4..1538b2dbfff1 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_df.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_df.h @@ -40,7 +40,7 @@ struct amdgpu_df_funcs { void (*update_medium_grain_clock_gating)(struct amdgpu_device *adev, bool enable); void (*get_clockgating_state)(struct amdgpu_device *adev, - u32 *flags); + u64 *flags); void (*enable_ecc_force_par_wr_rmw)(struct amdgpu_device *adev, bool enable); int (*pmc_start)(struct amdgpu_device *adev, uint64_t config, diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.c index e4fcbb385a62..aaf2fc6b1a82 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.c @@ -430,7 +430,7 @@ static void amdgpu_discovery_read_harvest_bit_per_ip(struct amdgpu_device *adev, } } next_ip: - ip_offset += sizeof(*ip) + 4 * (ip->num_base_address - 1); + ip_offset += struct_size(ip, base_address, ip->num_base_address); } } } @@ -798,7 +798,7 @@ static int amdgpu_discovery_sysfs_ips(struct amdgpu_device *adev, res = kobject_add(&ip_hw_instance->kobj, NULL, "%d", ip_hw_instance->num_instance); next_ip: - ip_offset += sizeof(*ip) + 4 * (ip->num_base_address - 1); + ip_offset += struct_size(ip, base_address, ip->num_base_address); } } @@ -1063,7 +1063,7 @@ int amdgpu_discovery_reg_base_init(struct amdgpu_device *adev) } next_ip: - ip_offset += sizeof(*ip) + 4 * (ip->num_base_address - 1); + ip_offset += struct_size(ip, base_address, ip->num_base_address); } } @@ -1113,7 +1113,7 @@ int amdgpu_discovery_get_ip_version(struct amdgpu_device *adev, int hw_id, int n *revision = ip->revision; return 0; } - ip_offset += sizeof(*ip) + 4 * (ip->num_base_address - 1); + ip_offset += struct_size(ip, base_address, ip->num_base_address); } } @@ -1150,13 +1150,6 @@ void amdgpu_discovery_harvest_ip(struct amdgpu_device *adev) adev->harvest_ip_mask |= AMD_HARVEST_IP_VCN_MASK; adev->harvest_ip_mask |= AMD_HARVEST_IP_JPEG_MASK; } - if ((adev->pdev->device == 0x731E && - (adev->pdev->revision == 0xC6 || adev->pdev->revision == 0xC7)) || - (adev->pdev->device == 0x7340 && adev->pdev->revision == 0xC9) || - (adev->pdev->device == 0x7360 && adev->pdev->revision == 0xC7)) { - adev->harvest_ip_mask |= AMD_HARVEST_IP_VCN_MASK; - adev->harvest_ip_mask |= AMD_HARVEST_IP_JPEG_MASK; - } } union gc_info { diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_display.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_display.c index 7a6908d71820..17c9bbe0cbc5 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_display.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_display.c @@ -41,6 +41,11 @@ #include <drm/drm_fourcc.h> #include <drm/drm_vblank.h> +static int amdgpu_display_framebuffer_init(struct drm_device *dev, + struct amdgpu_framebuffer *rfb, + const struct drm_mode_fb_cmd2 *mode_cmd, + struct drm_gem_object *obj); + static void amdgpu_display_flip_callback(struct dma_fence *f, struct dma_fence_cb *cb) { @@ -113,8 +118,9 @@ static void amdgpu_display_flip_work_func(struct work_struct *__work) spin_unlock_irqrestore(&crtc->dev->event_lock, flags); - DRM_DEBUG_DRIVER("crtc:%d[%p], pflip_stat:AMDGPU_FLIP_SUBMITTED, work: %p,\n", - amdgpu_crtc->crtc_id, amdgpu_crtc, work); + drm_dbg_vbl(adev_to_drm(adev), + "crtc:%d[%p], pflip_stat:AMDGPU_FLIP_SUBMITTED, work: %p,\n", + amdgpu_crtc->crtc_id, amdgpu_crtc, work); } @@ -1038,35 +1044,11 @@ static int amdgpu_display_get_fb_info(const struct amdgpu_framebuffer *amdgpu_fb return r; } -int amdgpu_display_gem_fb_init(struct drm_device *dev, - struct amdgpu_framebuffer *rfb, - const struct drm_mode_fb_cmd2 *mode_cmd, - struct drm_gem_object *obj) -{ - int ret; - - rfb->base.obj[0] = obj; - drm_helper_mode_fill_fb_struct(dev, &rfb->base, mode_cmd); - - ret = amdgpu_display_framebuffer_init(dev, rfb, mode_cmd, obj); - if (ret) - goto err; - - ret = drm_framebuffer_init(dev, &rfb->base, &amdgpu_fb_funcs); - if (ret) - goto err; - - return 0; -err: - drm_dbg_kms(dev, "Failed to init gem fb: %d\n", ret); - rfb->base.obj[0] = NULL; - return ret; -} - -int amdgpu_display_gem_fb_verify_and_init( - struct drm_device *dev, struct amdgpu_framebuffer *rfb, - struct drm_file *file_priv, const struct drm_mode_fb_cmd2 *mode_cmd, - struct drm_gem_object *obj) +static int amdgpu_display_gem_fb_verify_and_init(struct drm_device *dev, + struct amdgpu_framebuffer *rfb, + struct drm_file *file_priv, + const struct drm_mode_fb_cmd2 *mode_cmd, + struct drm_gem_object *obj) { int ret; @@ -1098,10 +1080,10 @@ err: return ret; } -int amdgpu_display_framebuffer_init(struct drm_device *dev, - struct amdgpu_framebuffer *rfb, - const struct drm_mode_fb_cmd2 *mode_cmd, - struct drm_gem_object *obj) +static int amdgpu_display_framebuffer_init(struct drm_device *dev, + struct amdgpu_framebuffer *rfb, + const struct drm_mode_fb_cmd2 *mode_cmd, + struct drm_gem_object *obj) { struct amdgpu_device *adev = drm_to_adev(dev); int ret, i; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c index b03663f42cc9..4efaa183abcd 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c @@ -136,7 +136,7 @@ int amdgpu_sched_jobs = 32; int amdgpu_sched_hw_submission = 2; uint amdgpu_pcie_gen_cap; uint amdgpu_pcie_lane_cap; -uint amdgpu_cg_mask = 0xffffffff; +u64 amdgpu_cg_mask = 0xffffffffffffffff; uint amdgpu_pg_mask = 0xffffffff; uint amdgpu_sdma_phase_quantum = 32; char *amdgpu_disable_cu = NULL; @@ -454,12 +454,12 @@ MODULE_PARM_DESC(pcie_lane_cap, "PCIE Lane Caps (0: autodetect (default))"); module_param_named(pcie_lane_cap, amdgpu_pcie_lane_cap, uint, 0444); /** - * DOC: cg_mask (uint) + * DOC: cg_mask (ullong) * Override Clockgating features enabled on GPU (0 = disable clock gating). See the AMD_CG_SUPPORT flags in - * drivers/gpu/drm/amd/include/amd_shared.h. The default is 0xffffffff (all enabled). + * drivers/gpu/drm/amd/include/amd_shared.h. The default is 0xffffffffffffffff (all enabled). */ MODULE_PARM_DESC(cg_mask, "Clockgating flags mask (0 = disable clock gating)"); -module_param_named(cg_mask, amdgpu_cg_mask, uint, 0444); +module_param_named(cg_mask, amdgpu_cg_mask, ullong, 0444); /** * DOC: pg_mask (uint) @@ -2323,18 +2323,23 @@ static int amdgpu_pmops_suspend(struct device *dev) { struct drm_device *drm_dev = dev_get_drvdata(dev); struct amdgpu_device *adev = drm_to_adev(drm_dev); - int r; if (amdgpu_acpi_is_s0ix_active(adev)) adev->in_s0ix = true; else adev->in_s3 = true; - r = amdgpu_device_suspend(drm_dev, true); - if (r) - return r; + return amdgpu_device_suspend(drm_dev, true); +} + +static int amdgpu_pmops_suspend_noirq(struct device *dev) +{ + struct drm_device *drm_dev = dev_get_drvdata(dev); + struct amdgpu_device *adev = drm_to_adev(drm_dev); + if (!adev->in_s0ix) - r = amdgpu_asic_reset(adev); - return r; + return amdgpu_asic_reset(adev); + + return 0; } static int amdgpu_pmops_resume(struct device *dev) @@ -2575,6 +2580,7 @@ static const struct dev_pm_ops amdgpu_pm_ops = { .prepare = amdgpu_pmops_prepare, .complete = amdgpu_pmops_complete, .suspend = amdgpu_pmops_suspend, + .suspend_noirq = amdgpu_pmops_suspend_noirq, .resume = amdgpu_pmops_resume, .freeze = amdgpu_pmops_freeze, .thaw = amdgpu_pmops_thaw, diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c index 84a53758e18e..652571267077 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c @@ -613,7 +613,7 @@ static void amdgpu_gem_va_update_vm(struct amdgpu_device *adev, if (operation == AMDGPU_VA_OP_MAP || operation == AMDGPU_VA_OP_REPLACE) { - r = amdgpu_vm_bo_update(adev, bo_va, false, NULL); + r = amdgpu_vm_bo_update(adev, bo_va, false); if (r) goto error; } diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h index dcb3c7871c73..5ed9b8a4c571 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h @@ -202,6 +202,7 @@ struct amdgpu_cu_info { struct amdgpu_gfx_ras { struct amdgpu_ras_block_object ras_block; void (*enable_watchdog_timer)(struct amdgpu_device *adev); + bool (*query_utcl2_poison_status)(struct amdgpu_device *adev); }; struct amdgpu_gfx_funcs { diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c index a66a0881a934..88b852b3a2cb 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c @@ -25,6 +25,9 @@ */ #include <linux/io-64-nonatomic-lo-hi.h> +#ifdef CONFIG_X86 +#include <asm/hypervisor.h> +#endif #include "amdgpu.h" #include "amdgpu_gmc.h" @@ -647,12 +650,14 @@ void amdgpu_gmc_get_vbios_allocations(struct amdgpu_device *adev) case CHIP_VEGA10: adev->mman.keep_stolen_vga_memory = true; /* - * VEGA10 SRIOV VF needs some firmware reserved area. + * VEGA10 SRIOV VF with MS_HYPERV host needs some firmware reserved area. */ - if (amdgpu_sriov_vf(adev)) { - adev->mman.stolen_reserved_offset = 0x100000; - adev->mman.stolen_reserved_size = 0x600000; +#ifdef CONFIG_X86 + if (amdgpu_sriov_vf(adev) && hypervisor_is_type(X86_HYPER_MS_HYPERV)) { + adev->mman.stolen_reserved_offset = 0x500000; + adev->mman.stolen_reserved_size = 0x200000; } +#endif break; case CHIP_RAVEN: case CHIP_RENOIR: diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_hdp.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_hdp.h index 9181c7bef7c6..ac5c61d3de2b 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_hdp.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_hdp.h @@ -33,7 +33,7 @@ struct amdgpu_hdp_funcs { void (*invalidate_hdp)(struct amdgpu_device *adev, struct amdgpu_ring *ring); void (*update_clock_gating)(struct amdgpu_device *adev, bool enable); - void (*get_clock_gating_state)(struct amdgpu_device *adev, u32 *flags); + void (*get_clock_gating_state)(struct amdgpu_device *adev, u64 *flags); void (*init_registers)(struct amdgpu_device *adev); }; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ids.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ids.c index 4ba4b54092f1..03d115d2b5ed 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ids.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ids.c @@ -260,19 +260,15 @@ static int amdgpu_vmid_grab_reserved(struct amdgpu_vm *vm, struct amdgpu_device *adev = ring->adev; unsigned vmhub = ring->funcs->vmhub; uint64_t fence_context = adev->fence_context + ring->idx; - struct dma_fence *updates = sync->last_vm_update; bool needs_flush = vm->use_cpu_for_update; - int r = 0; + uint64_t updates = amdgpu_vm_tlb_seq(vm); + int r; *id = vm->reserved_vmid[vmhub]; - if (updates && (*id)->flushed_updates && - updates->context == (*id)->flushed_updates->context && - !dma_fence_is_later(updates, (*id)->flushed_updates)) - updates = NULL; - if ((*id)->owner != vm->immediate.fence_context || - job->vm_pd_addr != (*id)->pd_gpu_addr || - updates || !(*id)->last_flush || + (*id)->pd_gpu_addr != job->vm_pd_addr || + (*id)->flushed_updates < updates || + !(*id)->last_flush || ((*id)->last_flush->context != fence_context && !dma_fence_is_signaled((*id)->last_flush))) { struct dma_fence *tmp; @@ -286,8 +282,7 @@ static int amdgpu_vmid_grab_reserved(struct amdgpu_vm *vm, tmp = amdgpu_sync_peek_fence(&(*id)->active, ring); if (tmp) { *id = NULL; - r = amdgpu_sync_fence(sync, tmp); - return r; + return amdgpu_sync_fence(sync, tmp); } needs_flush = true; } @@ -299,10 +294,7 @@ static int amdgpu_vmid_grab_reserved(struct amdgpu_vm *vm, if (r) return r; - if (updates) { - dma_fence_put((*id)->flushed_updates); - (*id)->flushed_updates = dma_fence_get(updates); - } + (*id)->flushed_updates = updates; job->vm_needs_flush = needs_flush; return 0; } @@ -330,7 +322,7 @@ static int amdgpu_vmid_grab_used(struct amdgpu_vm *vm, unsigned vmhub = ring->funcs->vmhub; struct amdgpu_vmid_mgr *id_mgr = &adev->vm_manager.id_mgr[vmhub]; uint64_t fence_context = adev->fence_context + ring->idx; - struct dma_fence *updates = sync->last_vm_update; + uint64_t updates = amdgpu_vm_tlb_seq(vm); int r; job->vm_needs_flush = vm->use_cpu_for_update; @@ -338,7 +330,6 @@ static int amdgpu_vmid_grab_used(struct amdgpu_vm *vm, /* Check if we can use a VMID already assigned to this VM */ list_for_each_entry_reverse((*id), &id_mgr->ids_lru, list) { bool needs_flush = vm->use_cpu_for_update; - struct dma_fence *flushed; /* Check all the prerequisites to using this VMID */ if ((*id)->owner != vm->immediate.fence_context) @@ -352,8 +343,7 @@ static int amdgpu_vmid_grab_used(struct amdgpu_vm *vm, !dma_fence_is_signaled((*id)->last_flush))) needs_flush = true; - flushed = (*id)->flushed_updates; - if (updates && (!flushed || dma_fence_is_later(updates, flushed))) + if ((*id)->flushed_updates < updates) needs_flush = true; if (needs_flush && !adev->vm_manager.concurrent_flush) @@ -366,11 +356,7 @@ static int amdgpu_vmid_grab_used(struct amdgpu_vm *vm, if (r) return r; - if (updates && (!flushed || dma_fence_is_later(updates, flushed))) { - dma_fence_put((*id)->flushed_updates); - (*id)->flushed_updates = dma_fence_get(updates); - } - + (*id)->flushed_updates = updates; job->vm_needs_flush |= needs_flush; return 0; } @@ -416,8 +402,6 @@ int amdgpu_vmid_grab(struct amdgpu_vm *vm, struct amdgpu_ring *ring, goto error; if (!id) { - struct dma_fence *updates = sync->last_vm_update; - /* Still no ID to use? Then use the idle one found earlier */ id = idle; @@ -426,8 +410,7 @@ int amdgpu_vmid_grab(struct amdgpu_vm *vm, struct amdgpu_ring *ring, if (r) goto error; - dma_fence_put(id->flushed_updates); - id->flushed_updates = dma_fence_get(updates); + id->flushed_updates = amdgpu_vm_tlb_seq(vm); job->vm_needs_flush = true; } @@ -594,7 +577,6 @@ void amdgpu_vmid_mgr_fini(struct amdgpu_device *adev) struct amdgpu_vmid *id = &id_mgr->ids[j]; amdgpu_sync_free(&id->active); - dma_fence_put(id->flushed_updates); dma_fence_put(id->last_flush); dma_fence_put(id->pasid_mapping); } diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ids.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_ids.h index 0c3b4fa1f936..06c8a0034fa5 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ids.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ids.h @@ -47,7 +47,7 @@ struct amdgpu_vmid { uint64_t pd_gpu_addr; /* last flushed PD/PT update */ - struct dma_fence *flushed_updates; + uint64_t flushed_updates; uint32_t current_gpu_reset_count; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_jpeg.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_jpeg.h index 55fbff2be761..b6c7fb00e05a 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_jpeg.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_jpeg.h @@ -24,6 +24,8 @@ #ifndef __AMDGPU_JPEG_H__ #define __AMDGPU_JPEG_H__ +#include "amdgpu_ras.h" + #define AMDGPU_MAX_JPEG_INSTANCES 2 #define AMDGPU_JPEG_HARVEST_JPEG0 (1 << 0) @@ -39,6 +41,10 @@ struct amdgpu_jpeg_inst { struct amdgpu_jpeg_reg external; }; +struct amdgpu_jpeg_ras { + struct amdgpu_ras_block_object ras_block; +}; + struct amdgpu_jpeg { uint8_t num_jpeg_inst; struct amdgpu_jpeg_inst inst[AMDGPU_MAX_JPEG_INSTANCES]; @@ -48,6 +54,8 @@ struct amdgpu_jpeg { enum amd_powergating_state cur_state; struct mutex jpeg_pg_lock; atomic_t total_submission_cnt; + struct ras_common_if *ras_if; + struct amdgpu_jpeg_ras *ras; }; int amdgpu_jpeg_sw_init(struct amdgpu_device *adev); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_mmhub.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_mmhub.h index 9f1540f0ebf9..f939395c5914 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_mmhub.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_mmhub.h @@ -34,7 +34,7 @@ struct amdgpu_mmhub_funcs { void (*gart_disable)(struct amdgpu_device *adev); int (*set_clockgating)(struct amdgpu_device *adev, enum amd_clockgating_state state); - void (*get_clockgating)(struct amdgpu_device *adev, u32 *flags); + void (*get_clockgating)(struct amdgpu_device *adev, u64 *flags); void (*setup_vm_pt_regs)(struct amdgpu_device *adev, uint32_t vmid, uint64_t page_table_base); void (*update_power_gating)(struct amdgpu_device *adev, diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_mode.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_mode.h index a546cb3cfa18..e8da738b309e 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_mode.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_mode.h @@ -592,19 +592,6 @@ int amdgpu_display_get_crtc_scanoutpos(struct drm_device *dev, int *hpos, ktime_t *stime, ktime_t *etime, const struct drm_display_mode *mode); -int amdgpu_display_gem_fb_init(struct drm_device *dev, - struct amdgpu_framebuffer *rfb, - const struct drm_mode_fb_cmd2 *mode_cmd, - struct drm_gem_object *obj); -int amdgpu_display_gem_fb_verify_and_init( - struct drm_device *dev, struct amdgpu_framebuffer *rfb, - struct drm_file *file_priv, const struct drm_mode_fb_cmd2 *mode_cmd, - struct drm_gem_object *obj); -int amdgpu_display_framebuffer_init(struct drm_device *dev, - struct amdgpu_framebuffer *rfb, - const struct drm_mode_fb_cmd2 *mode_cmd, - struct drm_gem_object *obj); - int amdgpufb_remove(struct drm_device *dev, struct drm_framebuffer *fb); void amdgpu_enc_destroy(struct drm_encoder *encoder); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_nbio.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_nbio.h index 3d13e601fc35..03439083182a 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_nbio.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_nbio.h @@ -83,7 +83,7 @@ struct amdgpu_nbio_funcs { void (*update_medium_grain_light_sleep)(struct amdgpu_device *adev, bool enable); void (*get_clockgating_state)(struct amdgpu_device *adev, - u32 *flags); + u64 *flags); void (*ih_control)(struct amdgpu_device *adev); void (*init_registers)(struct amdgpu_device *adev); void (*remap_hdp_registers)(struct amdgpu_device *adev); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c index e92ecabfa7bd..5444515c1476 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c @@ -472,7 +472,7 @@ static bool amdgpu_bo_validate_size(struct amdgpu_device *adev, fail: DRM_DEBUG("BO size %lu > total memory in domain: %llu\n", size, - man->size << PAGE_SHIFT); + man->size); return false; } diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c index 424c22a841f4..ec709997c9c7 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c @@ -66,6 +66,8 @@ const char *ras_block_string[] = { "mp1", "fuse", "mca", + "vcn", + "jpeg", }; const char *ras_mca_block_string[] = { @@ -2205,6 +2207,13 @@ static void amdgpu_ras_check_supported(struct amdgpu_device *adev) dev_info(adev->dev, "SRAM ECC is active.\n"); adev->ras_hw_enabled |= ~(1 << AMDGPU_RAS_BLOCK__UMC | 1 << AMDGPU_RAS_BLOCK__DF); + + if (adev->ip_versions[VCN_HWIP][0] == IP_VERSION(2, 6, 0)) + adev->ras_hw_enabled |= (1 << AMDGPU_RAS_BLOCK__VCN | + 1 << AMDGPU_RAS_BLOCK__JPEG); + else + adev->ras_hw_enabled &= ~(1 << AMDGPU_RAS_BLOCK__VCN | + 1 << AMDGPU_RAS_BLOCK__JPEG); } else { dev_info(adev->dev, "SRAM ECC is not presented.\n"); } diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h index 9314fde81e68..606df8869b89 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h @@ -49,6 +49,8 @@ enum amdgpu_ras_block { AMDGPU_RAS_BLOCK__MP1, AMDGPU_RAS_BLOCK__FUSE, AMDGPU_RAS_BLOCK__MCA, + AMDGPU_RAS_BLOCK__VCN, + AMDGPU_RAS_BLOCK__JPEG, AMDGPU_RAS_BLOCK__LAST }; @@ -506,6 +508,7 @@ struct amdgpu_ras_block_hw_ops { void (*query_ras_error_address)(struct amdgpu_device *adev, void *ras_error_status); void (*reset_ras_error_count)(struct amdgpu_device *adev); void (*reset_ras_error_status)(struct amdgpu_device *adev); + bool (*query_poison_status)(struct amdgpu_device *adev); }; /* work flow diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_smuio.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_smuio.h index 484bb3dcec47..c7a823f3f2c5 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_smuio.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_smuio.h @@ -27,7 +27,7 @@ struct amdgpu_smuio_funcs { u32 (*get_rom_index_offset)(struct amdgpu_device *adev); u32 (*get_rom_data_offset)(struct amdgpu_device *adev); void (*update_rom_clock_gating)(struct amdgpu_device *adev, bool enable); - void (*get_clock_gating_state)(struct amdgpu_device *adev, u32 *flags); + void (*get_clock_gating_state)(struct amdgpu_device *adev, u64 *flags); u32 (*get_die_id)(struct amdgpu_device *adev); u32 (*get_socket_id)(struct amdgpu_device *adev); bool (*is_host_gpu_xgmi_supported)(struct amdgpu_device *adev); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_sync.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_sync.c index 11c46b3e4c60..504af1b93bfa 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_sync.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_sync.c @@ -51,7 +51,6 @@ static struct kmem_cache *amdgpu_sync_slab; void amdgpu_sync_create(struct amdgpu_sync *sync) { hash_init(sync->fences); - sync->last_vm_update = NULL; } /** @@ -171,23 +170,6 @@ int amdgpu_sync_fence(struct amdgpu_sync *sync, struct dma_fence *f) return 0; } -/** - * amdgpu_sync_vm_fence - remember to sync to this VM fence - * - * @sync: sync object to add fence to - * @fence: the VM fence to add - * - * Add the fence to the sync object and remember it as VM update. - */ -int amdgpu_sync_vm_fence(struct amdgpu_sync *sync, struct dma_fence *fence) -{ - if (!fence) - return 0; - - amdgpu_sync_keep_later(&sync->last_vm_update, fence); - return amdgpu_sync_fence(sync, fence); -} - /* Determine based on the owner and mode if we should sync to a fence or not */ static bool amdgpu_sync_test_fence(struct amdgpu_device *adev, enum amdgpu_sync_mode mode, @@ -377,9 +359,6 @@ int amdgpu_sync_clone(struct amdgpu_sync *source, struct amdgpu_sync *clone) } } - dma_fence_put(clone->last_vm_update); - clone->last_vm_update = dma_fence_get(source->last_vm_update); - return 0; } @@ -420,8 +399,6 @@ void amdgpu_sync_free(struct amdgpu_sync *sync) dma_fence_put(e->fence); kmem_cache_free(amdgpu_sync_slab, e); } - - dma_fence_put(sync->last_vm_update); } /** diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_sync.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_sync.h index 7c0fe20c470d..2d5c613cda10 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_sync.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_sync.h @@ -43,12 +43,10 @@ enum amdgpu_sync_mode { */ struct amdgpu_sync { DECLARE_HASHTABLE(fences, 4); - struct dma_fence *last_vm_update; }; void amdgpu_sync_create(struct amdgpu_sync *sync); int amdgpu_sync_fence(struct amdgpu_sync *sync, struct dma_fence *f); -int amdgpu_sync_vm_fence(struct amdgpu_sync *sync, struct dma_fence *fence); int amdgpu_sync_resv(struct amdgpu_device *adev, struct amdgpu_sync *sync, struct dma_resv *resv, enum amdgpu_sync_mode mode, void *owner); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.c index f99093f2ebc7..a0ee828a4a97 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.c @@ -52,7 +52,7 @@ #define FIRMWARE_ALDEBARAN "amdgpu/aldebaran_vcn.bin" #define FIRMWARE_BEIGE_GOBY "amdgpu/beige_goby_vcn.bin" #define FIRMWARE_YELLOW_CARP "amdgpu/yellow_carp_vcn.bin" -#define FIRMWARE_VCN_3_1_2 "amdgpu/vcn_3_1_2_vcn.bin" +#define FIRMWARE_VCN_3_1_2 "amdgpu/vcn_3_1_2.bin" MODULE_FIRMWARE(FIRMWARE_RAVEN); MODULE_FIRMWARE(FIRMWARE_PICASSO); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.h index f06fb7f882e2..fb39065a96bd 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.h @@ -24,6 +24,8 @@ #ifndef __AMDGPU_VCN_H__ #define __AMDGPU_VCN_H__ +#include "amdgpu_ras.h" + #define AMDGPU_VCN_STACK_SIZE (128*1024) #define AMDGPU_VCN_CONTEXT_SIZE (512*1024) @@ -233,6 +235,10 @@ struct amdgpu_vcn_inst { struct amdgpu_vcn_fw_shared fw_shared; }; +struct amdgpu_vcn_ras { + struct amdgpu_ras_block_object ras_block; +}; + struct amdgpu_vcn { unsigned fw_version; struct delayed_work idle_work; @@ -252,6 +258,9 @@ struct amdgpu_vcn { unsigned harvest_config; int (*pause_dpg_mode)(struct amdgpu_device *adev, int inst_idx, struct dpg_pause_state *new_state); + + struct ras_common_if *ras_if; + struct amdgpu_vcn_ras *ras; }; struct amdgpu_fw_shared_rb_ptrs_struct { diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c index a025f080aa6a..ea92edcc0432 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c @@ -23,6 +23,10 @@ #include <linux/module.h> +#ifdef CONFIG_X86 +#include <asm/hypervisor.h> +#endif + #include <drm/drm_drv.h> #include "amdgpu.h" @@ -723,8 +727,12 @@ void amdgpu_detect_virtualization(struct amdgpu_device *adev) break; case CHIP_VEGA10: soc15_set_virt_ops(adev); - /* send a dummy GPU_INIT_DATA request to host on vega10 */ - amdgpu_virt_request_init_data(adev); +#ifdef CONFIG_X86 + /* not send GPU_INIT_DATA with MS_HYPERV*/ + if (!hypervisor_is_type(X86_HYPER_MS_HYPERV)) +#endif + /* send a dummy GPU_INIT_DATA request to host on vega10 */ + amdgpu_virt_request_init_data(adev); break; case CHIP_VEGA20: case CHIP_ARCTURUS: @@ -862,11 +870,11 @@ static u32 amdgpu_virt_rlcg_reg_rw(struct amdgpu_device *adev, u32 offset, u32 v uint32_t timeout = 50000; uint32_t i, tmp; uint32_t ret = 0; - static void *scratch_reg0; - static void *scratch_reg1; - static void *scratch_reg2; - static void *scratch_reg3; - static void *spare_int; + void *scratch_reg0; + void *scratch_reg1; + void *scratch_reg2; + void *scratch_reg3; + void *spare_int; if (!adev->gfx.rlc.rlcg_reg_access_supported) { dev_err(adev->dev, @@ -919,7 +927,7 @@ static u32 amdgpu_virt_rlcg_reg_rw(struct amdgpu_device *adev, u32 offset, u32 v "wrong operation type, rlcg failed to program reg: 0x%05x\n", offset); } else if (tmp & AMDGPU_RLCG_REG_NOT_IN_RANGE) { dev_err(adev->dev, - "regiser is not in range, rlcg failed to program reg: 0x%05x\n", offset); + "register is not in range, rlcg failed to program reg: 0x%05x\n", offset); } else { dev_err(adev->dev, "unknown error type, rlcg failed to program reg: 0x%05x\n", offset); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c index 5277c10d901d..f9479e23de18 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c @@ -89,6 +89,21 @@ struct amdgpu_prt_cb { }; /** + * struct amdgpu_vm_tlb_seq_cb - Helper to increment the TLB flush sequence + */ +struct amdgpu_vm_tlb_seq_cb { + /** + * @vm: pointer to the amdgpu_vm structure to set the fence sequence on + */ + struct amdgpu_vm *vm; + + /** + * @cb: callback + */ + struct dma_fence_cb cb; +}; + +/** * amdgpu_vm_set_pasid - manage pasid and vm ptr mapping * * @adev: amdgpu_device pointer @@ -155,108 +170,6 @@ static inline void amdgpu_vm_eviction_unlock(struct amdgpu_vm *vm) } /** - * amdgpu_vm_level_shift - return the addr shift for each level - * - * @adev: amdgpu_device pointer - * @level: VMPT level - * - * Returns: - * The number of bits the pfn needs to be right shifted for a level. - */ -static unsigned amdgpu_vm_level_shift(struct amdgpu_device *adev, - unsigned level) -{ - switch (level) { - case AMDGPU_VM_PDB2: - case AMDGPU_VM_PDB1: - case AMDGPU_VM_PDB0: - return 9 * (AMDGPU_VM_PDB0 - level) + - adev->vm_manager.block_size; - case AMDGPU_VM_PTB: - return 0; - default: - return ~0; - } -} - -/** - * amdgpu_vm_num_entries - return the number of entries in a PD/PT - * - * @adev: amdgpu_device pointer - * @level: VMPT level - * - * Returns: - * The number of entries in a page directory or page table. - */ -static unsigned amdgpu_vm_num_entries(struct amdgpu_device *adev, - unsigned level) -{ - unsigned shift = amdgpu_vm_level_shift(adev, - adev->vm_manager.root_level); - - if (level == adev->vm_manager.root_level) - /* For the root directory */ - return round_up(adev->vm_manager.max_pfn, 1ULL << shift) - >> shift; - else if (level != AMDGPU_VM_PTB) - /* Everything in between */ - return 512; - else - /* For the page tables on the leaves */ - return AMDGPU_VM_PTE_COUNT(adev); -} - -/** - * amdgpu_vm_num_ats_entries - return the number of ATS entries in the root PD - * - * @adev: amdgpu_device pointer - * - * Returns: - * The number of entries in the root page directory which needs the ATS setting. - */ -static unsigned amdgpu_vm_num_ats_entries(struct amdgpu_device *adev) -{ - unsigned shift; - - shift = amdgpu_vm_level_shift(adev, adev->vm_manager.root_level); - return AMDGPU_GMC_HOLE_START >> (shift + AMDGPU_GPU_PAGE_SHIFT); -} - -/** - * amdgpu_vm_entries_mask - the mask to get the entry number of a PD/PT - * - * @adev: amdgpu_device pointer - * @level: VMPT level - * - * Returns: - * The mask to extract the entry number of a PD/PT from an address. - */ -static uint32_t amdgpu_vm_entries_mask(struct amdgpu_device *adev, - unsigned int level) -{ - if (level <= adev->vm_manager.root_level) - return 0xffffffff; - else if (level != AMDGPU_VM_PTB) - return 0x1ff; - else - return AMDGPU_VM_PTE_COUNT(adev) - 1; -} - -/** - * amdgpu_vm_bo_size - returns the size of the BOs in bytes - * - * @adev: amdgpu_device pointer - * @level: VMPT level - * - * Returns: - * The size of the BO for a page directory or page table in bytes. - */ -static unsigned amdgpu_vm_bo_size(struct amdgpu_device *adev, unsigned level) -{ - return AMDGPU_GPU_PAGE_ALIGN(amdgpu_vm_num_entries(adev, level) * 8); -} - -/** * amdgpu_vm_bo_evicted - vm_bo is evicted * * @vm_bo: vm_bo which is evicted @@ -358,9 +271,8 @@ static void amdgpu_vm_bo_done(struct amdgpu_vm_bo_base *vm_bo) * Initialize a bo_va_base structure and add it to the appropriate lists * */ -static void amdgpu_vm_bo_base_init(struct amdgpu_vm_bo_base *base, - struct amdgpu_vm *vm, - struct amdgpu_bo *bo) +void amdgpu_vm_bo_base_init(struct amdgpu_vm_bo_base *base, + struct amdgpu_vm *vm, struct amdgpu_bo *bo) { base->vm = vm; base->bo = bo; @@ -396,228 +308,6 @@ static void amdgpu_vm_bo_base_init(struct amdgpu_vm_bo_base *base, } /** - * amdgpu_vm_pt_parent - get the parent page directory - * - * @pt: child page table - * - * Helper to get the parent entry for the child page table. NULL if we are at - * the root page directory. - */ -static struct amdgpu_vm_bo_base *amdgpu_vm_pt_parent(struct amdgpu_vm_bo_base *pt) -{ - struct amdgpu_bo *parent = pt->bo->parent; - - if (!parent) - return NULL; - - return parent->vm_bo; -} - -/* - * amdgpu_vm_pt_cursor - state for for_each_amdgpu_vm_pt - */ -struct amdgpu_vm_pt_cursor { - uint64_t pfn; - struct amdgpu_vm_bo_base *parent; - struct amdgpu_vm_bo_base *entry; - unsigned level; -}; - -/** - * amdgpu_vm_pt_start - start PD/PT walk - * - * @adev: amdgpu_device pointer - * @vm: amdgpu_vm structure - * @start: start address of the walk - * @cursor: state to initialize - * - * Initialize a amdgpu_vm_pt_cursor to start a walk. - */ -static void amdgpu_vm_pt_start(struct amdgpu_device *adev, - struct amdgpu_vm *vm, uint64_t start, - struct amdgpu_vm_pt_cursor *cursor) -{ - cursor->pfn = start; - cursor->parent = NULL; - cursor->entry = &vm->root; - cursor->level = adev->vm_manager.root_level; -} - -/** - * amdgpu_vm_pt_descendant - go to child node - * - * @adev: amdgpu_device pointer - * @cursor: current state - * - * Walk to the child node of the current node. - * Returns: - * True if the walk was possible, false otherwise. - */ -static bool amdgpu_vm_pt_descendant(struct amdgpu_device *adev, - struct amdgpu_vm_pt_cursor *cursor) -{ - unsigned mask, shift, idx; - - if ((cursor->level == AMDGPU_VM_PTB) || !cursor->entry || - !cursor->entry->bo) - return false; - - mask = amdgpu_vm_entries_mask(adev, cursor->level); - shift = amdgpu_vm_level_shift(adev, cursor->level); - - ++cursor->level; - idx = (cursor->pfn >> shift) & mask; - cursor->parent = cursor->entry; - cursor->entry = &to_amdgpu_bo_vm(cursor->entry->bo)->entries[idx]; - return true; -} - -/** - * amdgpu_vm_pt_sibling - go to sibling node - * - * @adev: amdgpu_device pointer - * @cursor: current state - * - * Walk to the sibling node of the current node. - * Returns: - * True if the walk was possible, false otherwise. - */ -static bool amdgpu_vm_pt_sibling(struct amdgpu_device *adev, - struct amdgpu_vm_pt_cursor *cursor) -{ - unsigned shift, num_entries; - - /* Root doesn't have a sibling */ - if (!cursor->parent) - return false; - - /* Go to our parents and see if we got a sibling */ - shift = amdgpu_vm_level_shift(adev, cursor->level - 1); - num_entries = amdgpu_vm_num_entries(adev, cursor->level - 1); - - if (cursor->entry == &to_amdgpu_bo_vm(cursor->parent->bo)->entries[num_entries - 1]) - return false; - - cursor->pfn += 1ULL << shift; - cursor->pfn &= ~((1ULL << shift) - 1); - ++cursor->entry; - return true; -} - -/** - * amdgpu_vm_pt_ancestor - go to parent node - * - * @cursor: current state - * - * Walk to the parent node of the current node. - * Returns: - * True if the walk was possible, false otherwise. - */ -static bool amdgpu_vm_pt_ancestor(struct amdgpu_vm_pt_cursor *cursor) -{ - if (!cursor->parent) - return false; - - --cursor->level; - cursor->entry = cursor->parent; - cursor->parent = amdgpu_vm_pt_parent(cursor->parent); - return true; -} - -/** - * amdgpu_vm_pt_next - get next PD/PT in hieratchy - * - * @adev: amdgpu_device pointer - * @cursor: current state - * - * Walk the PD/PT tree to the next node. - */ -static void amdgpu_vm_pt_next(struct amdgpu_device *adev, - struct amdgpu_vm_pt_cursor *cursor) -{ - /* First try a newborn child */ - if (amdgpu_vm_pt_descendant(adev, cursor)) - return; - - /* If that didn't worked try to find a sibling */ - while (!amdgpu_vm_pt_sibling(adev, cursor)) { - /* No sibling, go to our parents and grandparents */ - if (!amdgpu_vm_pt_ancestor(cursor)) { - cursor->pfn = ~0ll; - return; - } - } -} - -/** - * amdgpu_vm_pt_first_dfs - start a deep first search - * - * @adev: amdgpu_device structure - * @vm: amdgpu_vm structure - * @start: optional cursor to start with - * @cursor: state to initialize - * - * Starts a deep first traversal of the PD/PT tree. - */ -static void amdgpu_vm_pt_first_dfs(struct amdgpu_device *adev, - struct amdgpu_vm *vm, - struct amdgpu_vm_pt_cursor *start, - struct amdgpu_vm_pt_cursor *cursor) -{ - if (start) - *cursor = *start; - else - amdgpu_vm_pt_start(adev, vm, 0, cursor); - while (amdgpu_vm_pt_descendant(adev, cursor)); -} - -/** - * amdgpu_vm_pt_continue_dfs - check if the deep first search should continue - * - * @start: starting point for the search - * @entry: current entry - * - * Returns: - * True when the search should continue, false otherwise. - */ -static bool amdgpu_vm_pt_continue_dfs(struct amdgpu_vm_pt_cursor *start, - struct amdgpu_vm_bo_base *entry) -{ - return entry && (!start || entry != start->entry); -} - -/** - * amdgpu_vm_pt_next_dfs - get the next node for a deep first search - * - * @adev: amdgpu_device structure - * @cursor: current state - * - * Move the cursor to the next node in a deep first search. - */ -static void amdgpu_vm_pt_next_dfs(struct amdgpu_device *adev, - struct amdgpu_vm_pt_cursor *cursor) -{ - if (!cursor->entry) - return; - - if (!cursor->parent) - cursor->entry = NULL; - else if (amdgpu_vm_pt_sibling(adev, cursor)) - while (amdgpu_vm_pt_descendant(adev, cursor)); - else - amdgpu_vm_pt_ancestor(cursor); -} - -/* - * for_each_amdgpu_vm_pt_dfs_safe - safe deep first search of all PDs/PTs - */ -#define for_each_amdgpu_vm_pt_dfs_safe(adev, vm, start, cursor, entry) \ - for (amdgpu_vm_pt_first_dfs((adev), (vm), (start), &(cursor)), \ - (entry) = (cursor).entry, amdgpu_vm_pt_next_dfs((adev), &(cursor));\ - amdgpu_vm_pt_continue_dfs((start), (entry)); \ - (entry) = (cursor).entry, amdgpu_vm_pt_next_dfs((adev), &(cursor))) - -/** * amdgpu_vm_get_pd_bo - add the VM PD to a validation list * * @vm: vm providing the BOs @@ -726,316 +416,6 @@ bool amdgpu_vm_ready(struct amdgpu_vm *vm) } /** - * amdgpu_vm_clear_bo - initially clear the PDs/PTs - * - * @adev: amdgpu_device pointer - * @vm: VM to clear BO from - * @vmbo: BO to clear - * @immediate: use an immediate update - * - * Root PD needs to be reserved when calling this. - * - * Returns: - * 0 on success, errno otherwise. - */ -static int amdgpu_vm_clear_bo(struct amdgpu_device *adev, - struct amdgpu_vm *vm, - struct amdgpu_bo_vm *vmbo, - bool immediate) -{ - struct ttm_operation_ctx ctx = { true, false }; - unsigned level = adev->vm_manager.root_level; - struct amdgpu_vm_update_params params; - struct amdgpu_bo *ancestor = &vmbo->bo; - struct amdgpu_bo *bo = &vmbo->bo; - unsigned entries, ats_entries; - uint64_t addr; - int r, idx; - - /* Figure out our place in the hierarchy */ - if (ancestor->parent) { - ++level; - while (ancestor->parent->parent) { - ++level; - ancestor = ancestor->parent; - } - } - - entries = amdgpu_bo_size(bo) / 8; - if (!vm->pte_support_ats) { - ats_entries = 0; - - } else if (!bo->parent) { - ats_entries = amdgpu_vm_num_ats_entries(adev); - ats_entries = min(ats_entries, entries); - entries -= ats_entries; - - } else { - struct amdgpu_vm_bo_base *pt; - - pt = ancestor->vm_bo; - ats_entries = amdgpu_vm_num_ats_entries(adev); - if ((pt - to_amdgpu_bo_vm(vm->root.bo)->entries) >= ats_entries) { - ats_entries = 0; - } else { - ats_entries = entries; - entries = 0; - } - } - - r = ttm_bo_validate(&bo->tbo, &bo->placement, &ctx); - if (r) - return r; - - if (vmbo->shadow) { - struct amdgpu_bo *shadow = vmbo->shadow; - - r = ttm_bo_validate(&shadow->tbo, &shadow->placement, &ctx); - if (r) - return r; - } - - if (!drm_dev_enter(adev_to_drm(adev), &idx)) - return -ENODEV; - - r = vm->update_funcs->map_table(vmbo); - if (r) - goto exit; - - memset(¶ms, 0, sizeof(params)); - params.adev = adev; - params.vm = vm; - params.immediate = immediate; - - r = vm->update_funcs->prepare(¶ms, NULL, AMDGPU_SYNC_EXPLICIT); - if (r) - goto exit; - - addr = 0; - if (ats_entries) { - uint64_t value = 0, flags; - - flags = AMDGPU_PTE_DEFAULT_ATC; - if (level != AMDGPU_VM_PTB) { - /* Handle leaf PDEs as PTEs */ - flags |= AMDGPU_PDE_PTE; - amdgpu_gmc_get_vm_pde(adev, level, &value, &flags); - } - - r = vm->update_funcs->update(¶ms, vmbo, addr, 0, ats_entries, - value, flags); - if (r) - goto exit; - - addr += ats_entries * 8; - } - - if (entries) { - uint64_t value = 0, flags = 0; - - if (adev->asic_type >= CHIP_VEGA10) { - if (level != AMDGPU_VM_PTB) { - /* Handle leaf PDEs as PTEs */ - flags |= AMDGPU_PDE_PTE; - amdgpu_gmc_get_vm_pde(adev, level, - &value, &flags); - } else { - /* Workaround for fault priority problem on GMC9 */ - flags = AMDGPU_PTE_EXECUTABLE; - } - } - - r = vm->update_funcs->update(¶ms, vmbo, addr, 0, entries, - value, flags); - if (r) - goto exit; - } - - r = vm->update_funcs->commit(¶ms, NULL); -exit: - drm_dev_exit(idx); - return r; -} - -/** - * amdgpu_vm_pt_create - create bo for PD/PT - * - * @adev: amdgpu_device pointer - * @vm: requesting vm - * @level: the page table level - * @immediate: use a immediate update - * @vmbo: pointer to the buffer object pointer - */ -static int amdgpu_vm_pt_create(struct amdgpu_device *adev, - struct amdgpu_vm *vm, - int level, bool immediate, - struct amdgpu_bo_vm **vmbo) -{ - struct amdgpu_bo_param bp; - struct amdgpu_bo *bo; - struct dma_resv *resv; - unsigned int num_entries; - int r; - - memset(&bp, 0, sizeof(bp)); - - bp.size = amdgpu_vm_bo_size(adev, level); - bp.byte_align = AMDGPU_GPU_PAGE_SIZE; - bp.domain = AMDGPU_GEM_DOMAIN_VRAM; - bp.domain = amdgpu_bo_get_preferred_domain(adev, bp.domain); - bp.flags = AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS | - AMDGPU_GEM_CREATE_CPU_GTT_USWC; - - if (level < AMDGPU_VM_PTB) - num_entries = amdgpu_vm_num_entries(adev, level); - else - num_entries = 0; - - bp.bo_ptr_size = struct_size((*vmbo), entries, num_entries); - - if (vm->use_cpu_for_update) - bp.flags |= AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED; - - bp.type = ttm_bo_type_kernel; - bp.no_wait_gpu = immediate; - if (vm->root.bo) - bp.resv = vm->root.bo->tbo.base.resv; - - r = amdgpu_bo_create_vm(adev, &bp, vmbo); - if (r) - return r; - - bo = &(*vmbo)->bo; - if (vm->is_compute_context || (adev->flags & AMD_IS_APU)) { - (*vmbo)->shadow = NULL; - return 0; - } - - if (!bp.resv) - WARN_ON(dma_resv_lock(bo->tbo.base.resv, - NULL)); - resv = bp.resv; - memset(&bp, 0, sizeof(bp)); - bp.size = amdgpu_vm_bo_size(adev, level); - bp.domain = AMDGPU_GEM_DOMAIN_GTT; - bp.flags = AMDGPU_GEM_CREATE_CPU_GTT_USWC; - bp.type = ttm_bo_type_kernel; - bp.resv = bo->tbo.base.resv; - bp.bo_ptr_size = sizeof(struct amdgpu_bo); - - r = amdgpu_bo_create(adev, &bp, &(*vmbo)->shadow); - - if (!resv) - dma_resv_unlock(bo->tbo.base.resv); - - if (r) { - amdgpu_bo_unref(&bo); - return r; - } - - (*vmbo)->shadow->parent = amdgpu_bo_ref(bo); - amdgpu_bo_add_to_shadow_list(*vmbo); - - return 0; -} - -/** - * amdgpu_vm_alloc_pts - Allocate a specific page table - * - * @adev: amdgpu_device pointer - * @vm: VM to allocate page tables for - * @cursor: Which page table to allocate - * @immediate: use an immediate update - * - * Make sure a specific page table or directory is allocated. - * - * Returns: - * 1 if page table needed to be allocated, 0 if page table was already - * allocated, negative errno if an error occurred. - */ -static int amdgpu_vm_alloc_pts(struct amdgpu_device *adev, - struct amdgpu_vm *vm, - struct amdgpu_vm_pt_cursor *cursor, - bool immediate) -{ - struct amdgpu_vm_bo_base *entry = cursor->entry; - struct amdgpu_bo *pt_bo; - struct amdgpu_bo_vm *pt; - int r; - - if (entry->bo) - return 0; - - r = amdgpu_vm_pt_create(adev, vm, cursor->level, immediate, &pt); - if (r) - return r; - - /* Keep a reference to the root directory to avoid - * freeing them up in the wrong order. - */ - pt_bo = &pt->bo; - pt_bo->parent = amdgpu_bo_ref(cursor->parent->bo); - amdgpu_vm_bo_base_init(entry, vm, pt_bo); - r = amdgpu_vm_clear_bo(adev, vm, pt, immediate); - if (r) - goto error_free_pt; - - return 0; - -error_free_pt: - amdgpu_bo_unref(&pt->shadow); - amdgpu_bo_unref(&pt_bo); - return r; -} - -/** - * amdgpu_vm_free_table - fre one PD/PT - * - * @entry: PDE to free - */ -static void amdgpu_vm_free_table(struct amdgpu_vm_bo_base *entry) -{ - struct amdgpu_bo *shadow; - - if (!entry->bo) - return; - - shadow = amdgpu_bo_shadowed(entry->bo); - if (shadow) { - ttm_bo_set_bulk_move(&shadow->tbo, NULL); - amdgpu_bo_unref(&shadow); - } - - ttm_bo_set_bulk_move(&entry->bo->tbo, NULL); - entry->bo->vm_bo = NULL; - list_del(&entry->vm_status); - amdgpu_bo_unref(&entry->bo); -} - -/** - * amdgpu_vm_free_pts - free PD/PT levels - * - * @adev: amdgpu device structure - * @vm: amdgpu vm structure - * @start: optional cursor where to start freeing PDs/PTs - * - * Free the page directory or page table level and all sub levels. - */ -static void amdgpu_vm_free_pts(struct amdgpu_device *adev, - struct amdgpu_vm *vm, - struct amdgpu_vm_pt_cursor *start) -{ - struct amdgpu_vm_pt_cursor cursor; - struct amdgpu_vm_bo_base *entry; - - for_each_amdgpu_vm_pt_dfs_safe(adev, vm, start, cursor, entry) - amdgpu_vm_free_table(entry); - - if (start) - amdgpu_vm_free_table(start->entry); -} - -/** * amdgpu_vm_check_compute_bug - check whether asic has compute vm bug * * @adev: amdgpu_device pointer @@ -1282,53 +662,6 @@ uint64_t amdgpu_vm_map_gart(const dma_addr_t *pages_addr, uint64_t addr) } /** - * amdgpu_vm_update_pde - update a single level in the hierarchy - * - * @params: parameters for the update - * @vm: requested vm - * @entry: entry to update - * - * Makes sure the requested entry in parent is up to date. - */ -static int amdgpu_vm_update_pde(struct amdgpu_vm_update_params *params, - struct amdgpu_vm *vm, - struct amdgpu_vm_bo_base *entry) -{ - struct amdgpu_vm_bo_base *parent = amdgpu_vm_pt_parent(entry); - struct amdgpu_bo *bo = parent->bo, *pbo; - uint64_t pde, pt, flags; - unsigned level; - - for (level = 0, pbo = bo->parent; pbo; ++level) - pbo = pbo->parent; - - level += params->adev->vm_manager.root_level; - amdgpu_gmc_get_pde_for_bo(entry->bo, level, &pt, &flags); - pde = (entry - to_amdgpu_bo_vm(parent->bo)->entries) * 8; - return vm->update_funcs->update(params, to_amdgpu_bo_vm(bo), pde, pt, - 1, 0, flags); -} - -/** - * amdgpu_vm_invalidate_pds - mark all PDs as invalid - * - * @adev: amdgpu_device pointer - * @vm: related vm - * - * Mark all PD level as invalid after an error. - */ -static void amdgpu_vm_invalidate_pds(struct amdgpu_device *adev, - struct amdgpu_vm *vm) -{ - struct amdgpu_vm_pt_cursor cursor; - struct amdgpu_vm_bo_base *entry; - - for_each_amdgpu_vm_pt_dfs_safe(adev, vm, NULL, cursor, entry) - if (entry->bo && !entry->moved) - amdgpu_vm_bo_relocated(entry); -} - -/** * amdgpu_vm_update_pdes - make sure that all directories are valid * * @adev: amdgpu_device pointer @@ -1344,6 +677,7 @@ int amdgpu_vm_update_pdes(struct amdgpu_device *adev, struct amdgpu_vm *vm, bool immediate) { struct amdgpu_vm_update_params params; + struct amdgpu_vm_bo_base *entry; int r, idx; if (list_empty(&vm->relocated)) @@ -1359,17 +693,10 @@ int amdgpu_vm_update_pdes(struct amdgpu_device *adev, r = vm->update_funcs->prepare(¶ms, NULL, AMDGPU_SYNC_EXPLICIT); if (r) - goto exit; - - while (!list_empty(&vm->relocated)) { - struct amdgpu_vm_bo_base *entry; - - entry = list_first_entry(&vm->relocated, - struct amdgpu_vm_bo_base, - vm_status); - amdgpu_vm_bo_idle(entry); + goto error; - r = amdgpu_vm_update_pde(¶ms, vm, entry); + list_for_each_entry(entry, &vm->relocated, vm_status) { + r = amdgpu_vm_pde_update(¶ms, entry); if (r) goto error; } @@ -1377,297 +704,68 @@ int amdgpu_vm_update_pdes(struct amdgpu_device *adev, r = vm->update_funcs->commit(¶ms, &vm->last_update); if (r) goto error; - drm_dev_exit(idx); - return 0; + + while (!list_empty(&vm->relocated)) { + entry = list_first_entry(&vm->relocated, + struct amdgpu_vm_bo_base, + vm_status); + amdgpu_vm_bo_idle(entry); + } error: - amdgpu_vm_invalidate_pds(adev, vm); -exit: drm_dev_exit(idx); return r; } -/* - * amdgpu_vm_update_flags - figure out flags for PTE updates - * - * Make sure to set the right flags for the PTEs at the desired level. - */ -static void amdgpu_vm_update_flags(struct amdgpu_vm_update_params *params, - struct amdgpu_bo_vm *pt, unsigned int level, - uint64_t pe, uint64_t addr, - unsigned int count, uint32_t incr, - uint64_t flags) - -{ - if (level != AMDGPU_VM_PTB) { - flags |= AMDGPU_PDE_PTE; - amdgpu_gmc_get_vm_pde(params->adev, level, &addr, &flags); - - } else if (params->adev->asic_type >= CHIP_VEGA10 && - !(flags & AMDGPU_PTE_VALID) && - !(flags & AMDGPU_PTE_PRT)) { - - /* Workaround for fault priority problem on GMC9 */ - flags |= AMDGPU_PTE_EXECUTABLE; - } - - params->vm->update_funcs->update(params, pt, pe, addr, count, incr, - flags); -} - /** - * amdgpu_vm_fragment - get fragment for PTEs + * amdgpu_vm_tlb_seq_cb - make sure to increment tlb sequence + * @fence: unused + * @cb: the callback structure * - * @params: see amdgpu_vm_update_params definition - * @start: first PTE to handle - * @end: last PTE to handle - * @flags: hw mapping flags - * @frag: resulting fragment size - * @frag_end: end of this fragment - * - * Returns the first possible fragment for the start and end address. - */ -static void amdgpu_vm_fragment(struct amdgpu_vm_update_params *params, - uint64_t start, uint64_t end, uint64_t flags, - unsigned int *frag, uint64_t *frag_end) -{ - /** - * The MC L1 TLB supports variable sized pages, based on a fragment - * field in the PTE. When this field is set to a non-zero value, page - * granularity is increased from 4KB to (1 << (12 + frag)). The PTE - * flags are considered valid for all PTEs within the fragment range - * and corresponding mappings are assumed to be physically contiguous. - * - * The L1 TLB can store a single PTE for the whole fragment, - * significantly increasing the space available for translation - * caching. This leads to large improvements in throughput when the - * TLB is under pressure. - * - * The L2 TLB distributes small and large fragments into two - * asymmetric partitions. The large fragment cache is significantly - * larger. Thus, we try to use large fragments wherever possible. - * Userspace can support this by aligning virtual base address and - * allocation size to the fragment size. - * - * Starting with Vega10 the fragment size only controls the L1. The L2 - * is now directly feed with small/huge/giant pages from the walker. - */ - unsigned max_frag; - - if (params->adev->asic_type < CHIP_VEGA10) - max_frag = params->adev->vm_manager.fragment_size; - else - max_frag = 31; - - /* system pages are non continuously */ - if (params->pages_addr) { - *frag = 0; - *frag_end = end; - return; - } - - /* This intentionally wraps around if no bit is set */ - *frag = min((unsigned)ffs(start) - 1, (unsigned)fls64(end - start) - 1); - if (*frag >= max_frag) { - *frag = max_frag; - *frag_end = end & ~((1ULL << max_frag) - 1); - } else { - *frag_end = start + (1 << *frag); - } -} - -/** - * amdgpu_vm_update_ptes - make sure that page tables are valid - * - * @params: see amdgpu_vm_update_params definition - * @start: start of GPU address range - * @end: end of GPU address range - * @dst: destination address to map to, the next dst inside the function - * @flags: mapping flags - * - * Update the page tables in the range @start - @end. - * - * Returns: - * 0 for success, -EINVAL for failure. + * Increments the tlb sequence to make sure that future CS execute a VM flush. */ -static int amdgpu_vm_update_ptes(struct amdgpu_vm_update_params *params, - uint64_t start, uint64_t end, - uint64_t dst, uint64_t flags) +static void amdgpu_vm_tlb_seq_cb(struct dma_fence *fence, + struct dma_fence_cb *cb) { - struct amdgpu_device *adev = params->adev; - struct amdgpu_vm_pt_cursor cursor; - uint64_t frag_start = start, frag_end; - unsigned int frag; - int r; - - /* figure out the initial fragment */ - amdgpu_vm_fragment(params, frag_start, end, flags, &frag, &frag_end); - - /* walk over the address space and update the PTs */ - amdgpu_vm_pt_start(adev, params->vm, start, &cursor); - while (cursor.pfn < end) { - unsigned shift, parent_shift, mask; - uint64_t incr, entry_end, pe_start; - struct amdgpu_bo *pt; - - if (!params->unlocked) { - /* make sure that the page tables covering the - * address range are actually allocated - */ - r = amdgpu_vm_alloc_pts(params->adev, params->vm, - &cursor, params->immediate); - if (r) - return r; - } - - shift = amdgpu_vm_level_shift(adev, cursor.level); - parent_shift = amdgpu_vm_level_shift(adev, cursor.level - 1); - if (params->unlocked) { - /* Unlocked updates are only allowed on the leaves */ - if (amdgpu_vm_pt_descendant(adev, &cursor)) - continue; - } else if (adev->asic_type < CHIP_VEGA10 && - (flags & AMDGPU_PTE_VALID)) { - /* No huge page support before GMC v9 */ - if (cursor.level != AMDGPU_VM_PTB) { - if (!amdgpu_vm_pt_descendant(adev, &cursor)) - return -ENOENT; - continue; - } - } else if (frag < shift) { - /* We can't use this level when the fragment size is - * smaller than the address shift. Go to the next - * child entry and try again. - */ - if (amdgpu_vm_pt_descendant(adev, &cursor)) - continue; - } else if (frag >= parent_shift) { - /* If the fragment size is even larger than the parent - * shift we should go up one level and check it again. - */ - if (!amdgpu_vm_pt_ancestor(&cursor)) - return -EINVAL; - continue; - } + struct amdgpu_vm_tlb_seq_cb *tlb_cb; - pt = cursor.entry->bo; - if (!pt) { - /* We need all PDs and PTs for mapping something, */ - if (flags & AMDGPU_PTE_VALID) - return -ENOENT; - - /* but unmapping something can happen at a higher - * level. - */ - if (!amdgpu_vm_pt_ancestor(&cursor)) - return -EINVAL; - - pt = cursor.entry->bo; - shift = parent_shift; - frag_end = max(frag_end, ALIGN(frag_start + 1, - 1ULL << shift)); - } - - /* Looks good so far, calculate parameters for the update */ - incr = (uint64_t)AMDGPU_GPU_PAGE_SIZE << shift; - mask = amdgpu_vm_entries_mask(adev, cursor.level); - pe_start = ((cursor.pfn >> shift) & mask) * 8; - entry_end = ((uint64_t)mask + 1) << shift; - entry_end += cursor.pfn & ~(entry_end - 1); - entry_end = min(entry_end, end); - - do { - struct amdgpu_vm *vm = params->vm; - uint64_t upd_end = min(entry_end, frag_end); - unsigned nptes = (upd_end - frag_start) >> shift; - uint64_t upd_flags = flags | AMDGPU_PTE_FRAG(frag); - - /* This can happen when we set higher level PDs to - * silent to stop fault floods. - */ - nptes = max(nptes, 1u); - - trace_amdgpu_vm_update_ptes(params, frag_start, upd_end, - min(nptes, 32u), dst, incr, upd_flags, - vm->task_info.pid, - vm->immediate.fence_context); - amdgpu_vm_update_flags(params, to_amdgpu_bo_vm(pt), - cursor.level, pe_start, dst, - nptes, incr, upd_flags); - - pe_start += nptes * 8; - dst += nptes * incr; - - frag_start = upd_end; - if (frag_start >= frag_end) { - /* figure out the next fragment */ - amdgpu_vm_fragment(params, frag_start, end, - flags, &frag, &frag_end); - if (frag < shift) - break; - } - } while (frag_start < entry_end); - - if (amdgpu_vm_pt_descendant(adev, &cursor)) { - /* Free all child entries. - * Update the tables with the flags and addresses and free up subsequent - * tables in the case of huge pages or freed up areas. - * This is the maximum you can free, because all other page tables are not - * completely covered by the range and so potentially still in use. - */ - while (cursor.pfn < frag_start) { - /* Make sure previous mapping is freed */ - if (cursor.entry->bo) { - params->table_freed = true; - amdgpu_vm_free_pts(adev, params->vm, &cursor); - } - amdgpu_vm_pt_next(adev, &cursor); - } - - } else if (frag >= shift) { - /* or just move on to the next on the same level. */ - amdgpu_vm_pt_next(adev, &cursor); - } - } - - return 0; + tlb_cb = container_of(cb, typeof(*tlb_cb), cb); + atomic64_inc(&tlb_cb->vm->tlb_seq); + kfree(tlb_cb); } /** - * amdgpu_vm_bo_update_mapping - update a mapping in the vm page table + * amdgpu_vm_update_range - update a range in the vm page table * - * @adev: amdgpu_device pointer of the VM - * @bo_adev: amdgpu_device pointer of the mapped BO - * @vm: requested vm + * @adev: amdgpu_device pointer to use for commands + * @vm: the VM to update the range * @immediate: immediate submission in a page fault * @unlocked: unlocked invalidation during MM callback + * @flush_tlb: trigger tlb invalidation after update completed * @resv: fences we need to sync to * @start: start of mapped range * @last: last mapped entry * @flags: flags for the entries * @offset: offset into nodes and pages_addr + * @vram_base: base for vram mappings * @res: ttm_resource to map * @pages_addr: DMA addresses to use for mapping * @fence: optional resulting fence - * @table_freed: return true if page table is freed * * Fill in the page table entries between @start and @last. * * Returns: - * 0 for success, -EINVAL for failure. + * 0 for success, negative erro code for failure. */ -int amdgpu_vm_bo_update_mapping(struct amdgpu_device *adev, - struct amdgpu_device *bo_adev, - struct amdgpu_vm *vm, bool immediate, - bool unlocked, struct dma_resv *resv, - uint64_t start, uint64_t last, - uint64_t flags, uint64_t offset, - struct ttm_resource *res, - dma_addr_t *pages_addr, - struct dma_fence **fence, - bool *table_freed) +int amdgpu_vm_update_range(struct amdgpu_device *adev, struct amdgpu_vm *vm, + bool immediate, bool unlocked, bool flush_tlb, + struct dma_resv *resv, uint64_t start, uint64_t last, + uint64_t flags, uint64_t offset, uint64_t vram_base, + struct ttm_resource *res, dma_addr_t *pages_addr, + struct dma_fence **fence) { struct amdgpu_vm_update_params params; + struct amdgpu_vm_tlb_seq_cb *tlb_cb; struct amdgpu_res_cursor cursor; enum amdgpu_sync_mode sync_mode; int r, idx; @@ -1675,6 +773,18 @@ int amdgpu_vm_bo_update_mapping(struct amdgpu_device *adev, if (!drm_dev_enter(adev_to_drm(adev), &idx)) return -ENODEV; + tlb_cb = kmalloc(sizeof(*tlb_cb), GFP_KERNEL); + if (!tlb_cb) { + r = -ENOMEM; + goto error_unlock; + } + + /* Vega20+XGMI where PTEs get inadvertently cached in L2 texture cache, + * heavy-weight flush TLB unconditionally. + */ + flush_tlb |= adev->gmc.xgmi.num_physical_nodes && + adev->ip_versions[GC_HWIP][0] == IP_VERSION(9, 4, 0); + memset(¶ms, 0, sizeof(params)); params.adev = adev; params.vm = vm; @@ -1693,7 +803,7 @@ int amdgpu_vm_bo_update_mapping(struct amdgpu_device *adev, amdgpu_vm_eviction_lock(vm); if (vm->evicting) { r = -EBUSY; - goto error_unlock; + goto error_free; } if (!unlocked && !dma_fence_is_signaled(vm->last_unlocked)) { @@ -1706,7 +816,7 @@ int amdgpu_vm_bo_update_mapping(struct amdgpu_device *adev, r = vm->update_funcs->prepare(¶ms, resv, sync_mode); if (r) - goto error_unlock; + goto error_free; amdgpu_res_first(pages_addr ? NULL : res, offset, (last - start + 1) * AMDGPU_GPU_PAGE_SIZE, &cursor); @@ -1746,16 +856,15 @@ int amdgpu_vm_bo_update_mapping(struct amdgpu_device *adev, } } else if (flags & (AMDGPU_PTE_VALID | AMDGPU_PTE_PRT)) { - addr = bo_adev->vm_manager.vram_base_offset + - cursor.start; + addr = vram_base + cursor.start; } else { addr = 0; } tmp = start + num_entries; - r = amdgpu_vm_update_ptes(¶ms, start, tmp, addr, flags); + r = amdgpu_vm_ptes_update(¶ms, start, tmp, addr, flags); if (r) - goto error_unlock; + goto error_free; amdgpu_res_next(&cursor, num_entries * AMDGPU_GPU_PAGE_SIZE); start = tmp; @@ -1763,8 +872,21 @@ int amdgpu_vm_bo_update_mapping(struct amdgpu_device *adev, r = vm->update_funcs->commit(¶ms, fence); - if (table_freed) - *table_freed = *table_freed || params.table_freed; + if (flush_tlb || params.table_freed) { + tlb_cb->vm = vm; + if (fence && *fence && + !dma_fence_add_callback(*fence, &tlb_cb->cb, + amdgpu_vm_tlb_seq_cb)) { + dma_fence_put(vm->last_tlb_flush); + vm->last_tlb_flush = dma_fence_get(*fence); + } else { + amdgpu_vm_tlb_seq_cb(NULL, &tlb_cb->cb); + } + tlb_cb = NULL; + } + +error_free: + kfree(tlb_cb); error_unlock: amdgpu_vm_eviction_unlock(vm); @@ -1822,7 +944,6 @@ void amdgpu_vm_get_memory(struct amdgpu_vm *vm, uint64_t *vram_mem, * @adev: amdgpu_device pointer * @bo_va: requested BO and VM object * @clear: if true clear the entries - * @table_freed: return true if page table is freed * * Fill in the page table entries for @bo_va. * @@ -1830,7 +951,7 @@ void amdgpu_vm_get_memory(struct amdgpu_vm *vm, uint64_t *vram_mem, * 0 for success, -EINVAL for failure. */ int amdgpu_vm_bo_update(struct amdgpu_device *adev, struct amdgpu_bo_va *bo_va, - bool clear, bool *table_freed) + bool clear) { struct amdgpu_bo *bo = bo_va->base.bo; struct amdgpu_vm *vm = bo_va->base.vm; @@ -1838,9 +959,10 @@ int amdgpu_vm_bo_update(struct amdgpu_device *adev, struct amdgpu_bo_va *bo_va, dma_addr_t *pages_addr = NULL; struct ttm_resource *mem; struct dma_fence **last_update; + bool flush_tlb = clear; struct dma_resv *resv; + uint64_t vram_base; uint64_t flags; - struct amdgpu_device *bo_adev = adev; int r; if (clear || !bo) { @@ -1865,14 +987,18 @@ int amdgpu_vm_bo_update(struct amdgpu_device *adev, struct amdgpu_bo_va *bo_va, } if (bo) { + struct amdgpu_device *bo_adev; + flags = amdgpu_ttm_tt_pte_flags(adev, bo->tbo.ttm, mem); if (amdgpu_bo_encrypted(bo)) flags |= AMDGPU_PTE_TMZ; bo_adev = amdgpu_ttm_adev(bo->tbo.bdev); + vram_base = bo_adev->vm_manager.vram_base_offset; } else { flags = 0x0; + vram_base = 0; } if (clear || (bo && bo->tbo.base.resv == @@ -1882,7 +1008,7 @@ int amdgpu_vm_bo_update(struct amdgpu_device *adev, struct amdgpu_bo_va *bo_va, last_update = &bo_va->last_pt_update; if (!clear && bo_va->base.moved) { - bo_va->base.moved = false; + flush_tlb = true; list_splice_init(&bo_va->valids, &bo_va->invalids); } else if (bo_va->cleared != clear) { @@ -1905,11 +1031,11 @@ int amdgpu_vm_bo_update(struct amdgpu_device *adev, struct amdgpu_bo_va *bo_va, trace_amdgpu_vm_bo_update(mapping); - r = amdgpu_vm_bo_update_mapping(adev, bo_adev, vm, false, false, - resv, mapping->start, - mapping->last, update_flags, - mapping->offset, mem, - pages_addr, last_update, table_freed); + r = amdgpu_vm_update_range(adev, vm, false, false, flush_tlb, + resv, mapping->start, mapping->last, + update_flags, mapping->offset, + vram_base, mem, pages_addr, + last_update); if (r) return r; } @@ -1932,6 +1058,7 @@ int amdgpu_vm_bo_update(struct amdgpu_device *adev, struct amdgpu_bo_va *bo_va, list_splice_init(&bo_va->invalids, &bo_va->valids); bo_va->cleared = clear; + bo_va->base.moved = false; if (trace_amdgpu_vm_bo_mapping_enabled()) { list_for_each_entry(mapping, &bo_va->valids, list) @@ -2100,10 +1227,10 @@ int amdgpu_vm_clear_freed(struct amdgpu_device *adev, mapping->start < AMDGPU_GMC_HOLE_START) init_pte_value = AMDGPU_PTE_DEFAULT_ATC; - r = amdgpu_vm_bo_update_mapping(adev, adev, vm, false, false, - resv, mapping->start, - mapping->last, init_pte_value, - 0, NULL, NULL, &f, NULL); + r = amdgpu_vm_update_range(adev, vm, false, false, true, resv, + mapping->start, mapping->last, + init_pte_value, 0, 0, NULL, NULL, + &f); amdgpu_vm_free_mapping(adev, vm, mapping, f); if (r) { dma_fence_put(f); @@ -2145,7 +1272,7 @@ int amdgpu_vm_handle_moved(struct amdgpu_device *adev, list_for_each_entry_safe(bo_va, tmp, &vm->moved, base.vm_status) { /* Per VM BOs never need to bo cleared in the page tables */ - r = amdgpu_vm_bo_update(adev, bo_va, false, NULL); + r = amdgpu_vm_bo_update(adev, bo_va, false); if (r) return r; } @@ -2164,7 +1291,7 @@ int amdgpu_vm_handle_moved(struct amdgpu_device *adev, else clear = true; - r = amdgpu_vm_bo_update(adev, bo_va, clear, NULL); + r = amdgpu_vm_bo_update(adev, bo_va, clear); if (r) return r; @@ -2914,6 +2041,7 @@ int amdgpu_vm_init(struct amdgpu_device *adev, struct amdgpu_vm *vm) vm->update_funcs = &amdgpu_vm_sdma_funcs; vm->last_update = NULL; vm->last_unlocked = dma_fence_get_stub(); + vm->last_tlb_flush = dma_fence_get_stub(); mutex_init(&vm->eviction_lock); vm->evicting = false; @@ -2933,7 +2061,7 @@ int amdgpu_vm_init(struct amdgpu_device *adev, struct amdgpu_vm *vm) amdgpu_vm_bo_base_init(&vm->root, vm, root_bo); - r = amdgpu_vm_clear_bo(adev, vm, root, false); + r = amdgpu_vm_pt_clear(adev, vm, root, false); if (r) goto error_unreserve; @@ -2952,6 +2080,7 @@ error_free_root: vm->root.bo = NULL; error_free_delayed: + dma_fence_put(vm->last_tlb_flush); dma_fence_put(vm->last_unlocked); drm_sched_entity_destroy(&vm->delayed); @@ -2962,34 +2091,6 @@ error_free_immediate: } /** - * amdgpu_vm_check_clean_reserved - check if a VM is clean - * - * @adev: amdgpu_device pointer - * @vm: the VM to check - * - * check all entries of the root PD, if any subsequent PDs are allocated, - * it means there are page table creating and filling, and is no a clean - * VM - * - * Returns: - * 0 if this VM is clean - */ -static int amdgpu_vm_check_clean_reserved(struct amdgpu_device *adev, - struct amdgpu_vm *vm) -{ - enum amdgpu_vm_level root = adev->vm_manager.root_level; - unsigned int entries = amdgpu_vm_num_entries(adev, root); - unsigned int i = 0; - - for (i = 0; i < entries; i++) { - if (to_amdgpu_bo_vm(vm->root.bo)->entries[i].bo) - return -EINVAL; - } - - return 0; -} - -/** * amdgpu_vm_make_compute - Turn a GFX VM into a compute VM * * @adev: amdgpu_device pointer @@ -3018,17 +2119,17 @@ int amdgpu_vm_make_compute(struct amdgpu_device *adev, struct amdgpu_vm *vm) return r; /* Sanity checks */ - r = amdgpu_vm_check_clean_reserved(adev, vm); - if (r) + if (!amdgpu_vm_pt_is_root_clean(adev, vm)) { + r = -EINVAL; goto unreserve_bo; + } /* Check if PD needs to be reinitialized and do it before * changing any other state, in case it fails. */ if (pte_support_ats != vm->pte_support_ats) { vm->pte_support_ats = pte_support_ats; - r = amdgpu_vm_clear_bo(adev, vm, - to_amdgpu_bo_vm(vm->root.bo), + r = amdgpu_vm_pt_clear(adev, vm, to_amdgpu_bo_vm(vm->root.bo), false); if (r) goto unreserve_bo; @@ -3096,6 +2197,7 @@ void amdgpu_vm_fini(struct amdgpu_device *adev, struct amdgpu_vm *vm) struct amdgpu_bo_va_mapping *mapping, *tmp; bool prt_fini_needed = !!adev->gmc.gmc_funcs->set_prt; struct amdgpu_bo *root; + unsigned long flags; int i; amdgpu_amdkfd_gpuvm_destroy_cb(adev, vm); @@ -3105,6 +2207,11 @@ void amdgpu_vm_fini(struct amdgpu_device *adev, struct amdgpu_vm *vm) amdgpu_vm_set_pasid(adev, vm, 0); dma_fence_wait(vm->last_unlocked, false); dma_fence_put(vm->last_unlocked); + dma_fence_wait(vm->last_tlb_flush, false); + /* Make sure that all fence callbacks have completed */ + spin_lock_irqsave(vm->last_tlb_flush->lock, flags); + spin_unlock_irqrestore(vm->last_tlb_flush->lock, flags); + dma_fence_put(vm->last_tlb_flush); list_for_each_entry_safe(mapping, tmp, &vm->freed, list) { if (mapping->flags & AMDGPU_PTE_PRT && prt_fini_needed) { @@ -3116,7 +2223,7 @@ void amdgpu_vm_fini(struct amdgpu_device *adev, struct amdgpu_vm *vm) amdgpu_vm_free_mapping(adev, vm, mapping, NULL); } - amdgpu_vm_free_pts(adev, vm, NULL); + amdgpu_vm_pt_free_root(adev, vm); amdgpu_bo_unreserve(root); amdgpu_bo_unref(&root); WARN_ON(vm->root.bo); @@ -3376,9 +2483,8 @@ bool amdgpu_vm_handle_fault(struct amdgpu_device *adev, u32 pasid, goto error_unlock; } - r = amdgpu_vm_bo_update_mapping(adev, adev, vm, true, false, NULL, addr, - addr, flags, value, NULL, NULL, NULL, - NULL); + r = amdgpu_vm_update_range(adev, vm, true, false, false, NULL, addr, + addr, flags, value, 0, NULL, NULL, NULL); if (r) goto error_unlock; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h index bd7892482bbf..9ecb7f663e19 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h @@ -284,6 +284,10 @@ struct amdgpu_vm { struct drm_sched_entity immediate; struct drm_sched_entity delayed; + /* Last finished delayed update */ + atomic64_t tlb_seq; + struct dma_fence *last_tlb_flush; + /* Last unlocked submission to the scheduler entities */ struct dma_fence *last_unlocked; @@ -395,18 +399,17 @@ int amdgpu_vm_clear_freed(struct amdgpu_device *adev, struct dma_fence **fence); int amdgpu_vm_handle_moved(struct amdgpu_device *adev, struct amdgpu_vm *vm); -int amdgpu_vm_bo_update_mapping(struct amdgpu_device *adev, - struct amdgpu_device *bo_adev, - struct amdgpu_vm *vm, bool immediate, - bool unlocked, struct dma_resv *resv, - uint64_t start, uint64_t last, - uint64_t flags, uint64_t offset, - struct ttm_resource *res, - dma_addr_t *pages_addr, - struct dma_fence **fence, bool *free_table); +void amdgpu_vm_bo_base_init(struct amdgpu_vm_bo_base *base, + struct amdgpu_vm *vm, struct amdgpu_bo *bo); +int amdgpu_vm_update_range(struct amdgpu_device *adev, struct amdgpu_vm *vm, + bool immediate, bool unlocked, bool flush_tlb, + struct dma_resv *resv, uint64_t start, uint64_t last, + uint64_t flags, uint64_t offset, uint64_t vram_base, + struct ttm_resource *res, dma_addr_t *pages_addr, + struct dma_fence **fence); int amdgpu_vm_bo_update(struct amdgpu_device *adev, struct amdgpu_bo_va *bo_va, - bool clear, bool *table_freed); + bool clear); bool amdgpu_vm_evictable(struct amdgpu_bo *bo); void amdgpu_vm_bo_invalidate(struct amdgpu_device *adev, struct amdgpu_bo *bo, bool evicted); @@ -455,8 +458,34 @@ void amdgpu_vm_move_to_lru_tail(struct amdgpu_device *adev, void amdgpu_vm_get_memory(struct amdgpu_vm *vm, uint64_t *vram_mem, uint64_t *gtt_mem, uint64_t *cpu_mem); +int amdgpu_vm_pt_clear(struct amdgpu_device *adev, struct amdgpu_vm *vm, + struct amdgpu_bo_vm *vmbo, bool immediate); +int amdgpu_vm_pt_create(struct amdgpu_device *adev, struct amdgpu_vm *vm, + int level, bool immediate, struct amdgpu_bo_vm **vmbo); +void amdgpu_vm_pt_free_root(struct amdgpu_device *adev, struct amdgpu_vm *vm); +bool amdgpu_vm_pt_is_root_clean(struct amdgpu_device *adev, + struct amdgpu_vm *vm); + +int amdgpu_vm_pde_update(struct amdgpu_vm_update_params *params, + struct amdgpu_vm_bo_base *entry); +int amdgpu_vm_ptes_update(struct amdgpu_vm_update_params *params, + uint64_t start, uint64_t end, + uint64_t dst, uint64_t flags); + #if defined(CONFIG_DEBUG_FS) void amdgpu_debugfs_vm_bo_info(struct amdgpu_vm *vm, struct seq_file *m); #endif +/** + * amdgpu_vm_tlb_seq - return tlb flush sequence number + * @vm: the amdgpu_vm structure to query + * + * Returns the tlb flush sequence number which indicates that the VM TLBs needs + * to be invalidated whenever the sequence number change. + */ +static inline uint64_t amdgpu_vm_tlb_seq(struct amdgpu_vm *vm) +{ + return atomic64_read(&vm->tlb_seq); +} + #endif diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm_pt.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm_pt.c new file mode 100644 index 000000000000..7761a3ea172e --- /dev/null +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm_pt.c @@ -0,0 +1,977 @@ +// SPDX-License-Identifier: GPL-2.0 OR MIT +/* + * Copyright 2022 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + */ + +#include <drm/drm_drv.h> + +#include "amdgpu.h" +#include "amdgpu_trace.h" +#include "amdgpu_vm.h" + +/* + * amdgpu_vm_pt_cursor - state for for_each_amdgpu_vm_pt + */ +struct amdgpu_vm_pt_cursor { + uint64_t pfn; + struct amdgpu_vm_bo_base *parent; + struct amdgpu_vm_bo_base *entry; + unsigned int level; +}; + +/** + * amdgpu_vm_pt_level_shift - return the addr shift for each level + * + * @adev: amdgpu_device pointer + * @level: VMPT level + * + * Returns: + * The number of bits the pfn needs to be right shifted for a level. + */ +static unsigned int amdgpu_vm_pt_level_shift(struct amdgpu_device *adev, + unsigned int level) +{ + switch (level) { + case AMDGPU_VM_PDB2: + case AMDGPU_VM_PDB1: + case AMDGPU_VM_PDB0: + return 9 * (AMDGPU_VM_PDB0 - level) + + adev->vm_manager.block_size; + case AMDGPU_VM_PTB: + return 0; + default: + return ~0; + } +} + +/** + * amdgpu_vm_pt_num_entries - return the number of entries in a PD/PT + * + * @adev: amdgpu_device pointer + * @level: VMPT level + * + * Returns: + * The number of entries in a page directory or page table. + */ +static unsigned int amdgpu_vm_pt_num_entries(struct amdgpu_device *adev, + unsigned int level) +{ + unsigned int shift; + + shift = amdgpu_vm_pt_level_shift(adev, adev->vm_manager.root_level); + if (level == adev->vm_manager.root_level) + /* For the root directory */ + return round_up(adev->vm_manager.max_pfn, 1ULL << shift) + >> shift; + else if (level != AMDGPU_VM_PTB) + /* Everything in between */ + return 512; + + /* For the page tables on the leaves */ + return AMDGPU_VM_PTE_COUNT(adev); +} + +/** + * amdgpu_vm_pt_num_ats_entries - return the number of ATS entries in the root PD + * + * @adev: amdgpu_device pointer + * + * Returns: + * The number of entries in the root page directory which needs the ATS setting. + */ +static unsigned int amdgpu_vm_pt_num_ats_entries(struct amdgpu_device *adev) +{ + unsigned int shift; + + shift = amdgpu_vm_pt_level_shift(adev, adev->vm_manager.root_level); + return AMDGPU_GMC_HOLE_START >> (shift + AMDGPU_GPU_PAGE_SHIFT); +} + +/** + * amdgpu_vm_pt_entries_mask - the mask to get the entry number of a PD/PT + * + * @adev: amdgpu_device pointer + * @level: VMPT level + * + * Returns: + * The mask to extract the entry number of a PD/PT from an address. + */ +static uint32_t amdgpu_vm_pt_entries_mask(struct amdgpu_device *adev, + unsigned int level) +{ + if (level <= adev->vm_manager.root_level) + return 0xffffffff; + else if (level != AMDGPU_VM_PTB) + return 0x1ff; + else + return AMDGPU_VM_PTE_COUNT(adev) - 1; +} + +/** + * amdgpu_vm_pt_size - returns the size of the page table in bytes + * + * @adev: amdgpu_device pointer + * @level: VMPT level + * + * Returns: + * The size of the BO for a page directory or page table in bytes. + */ +static unsigned int amdgpu_vm_pt_size(struct amdgpu_device *adev, + unsigned int level) +{ + return AMDGPU_GPU_PAGE_ALIGN(amdgpu_vm_pt_num_entries(adev, level) * 8); +} + +/** + * amdgpu_vm_pt_parent - get the parent page directory + * + * @pt: child page table + * + * Helper to get the parent entry for the child page table. NULL if we are at + * the root page directory. + */ +static struct amdgpu_vm_bo_base * +amdgpu_vm_pt_parent(struct amdgpu_vm_bo_base *pt) +{ + struct amdgpu_bo *parent = pt->bo->parent; + + if (!parent) + return NULL; + + return parent->vm_bo; +} + +/** + * amdgpu_vm_pt_start - start PD/PT walk + * + * @adev: amdgpu_device pointer + * @vm: amdgpu_vm structure + * @start: start address of the walk + * @cursor: state to initialize + * + * Initialize a amdgpu_vm_pt_cursor to start a walk. + */ +static void amdgpu_vm_pt_start(struct amdgpu_device *adev, + struct amdgpu_vm *vm, uint64_t start, + struct amdgpu_vm_pt_cursor *cursor) +{ + cursor->pfn = start; + cursor->parent = NULL; + cursor->entry = &vm->root; + cursor->level = adev->vm_manager.root_level; +} + +/** + * amdgpu_vm_pt_descendant - go to child node + * + * @adev: amdgpu_device pointer + * @cursor: current state + * + * Walk to the child node of the current node. + * Returns: + * True if the walk was possible, false otherwise. + */ +static bool amdgpu_vm_pt_descendant(struct amdgpu_device *adev, + struct amdgpu_vm_pt_cursor *cursor) +{ + unsigned int mask, shift, idx; + + if ((cursor->level == AMDGPU_VM_PTB) || !cursor->entry || + !cursor->entry->bo) + return false; + + mask = amdgpu_vm_pt_entries_mask(adev, cursor->level); + shift = amdgpu_vm_pt_level_shift(adev, cursor->level); + + ++cursor->level; + idx = (cursor->pfn >> shift) & mask; + cursor->parent = cursor->entry; + cursor->entry = &to_amdgpu_bo_vm(cursor->entry->bo)->entries[idx]; + return true; +} + +/** + * amdgpu_vm_pt_sibling - go to sibling node + * + * @adev: amdgpu_device pointer + * @cursor: current state + * + * Walk to the sibling node of the current node. + * Returns: + * True if the walk was possible, false otherwise. + */ +static bool amdgpu_vm_pt_sibling(struct amdgpu_device *adev, + struct amdgpu_vm_pt_cursor *cursor) +{ + + unsigned int shift, num_entries; + struct amdgpu_bo_vm *parent; + + /* Root doesn't have a sibling */ + if (!cursor->parent) + return false; + + /* Go to our parents and see if we got a sibling */ + shift = amdgpu_vm_pt_level_shift(adev, cursor->level - 1); + num_entries = amdgpu_vm_pt_num_entries(adev, cursor->level - 1); + parent = to_amdgpu_bo_vm(cursor->parent->bo); + + if (cursor->entry == &parent->entries[num_entries - 1]) + return false; + + cursor->pfn += 1ULL << shift; + cursor->pfn &= ~((1ULL << shift) - 1); + ++cursor->entry; + return true; +} + +/** + * amdgpu_vm_pt_ancestor - go to parent node + * + * @cursor: current state + * + * Walk to the parent node of the current node. + * Returns: + * True if the walk was possible, false otherwise. + */ +static bool amdgpu_vm_pt_ancestor(struct amdgpu_vm_pt_cursor *cursor) +{ + if (!cursor->parent) + return false; + + --cursor->level; + cursor->entry = cursor->parent; + cursor->parent = amdgpu_vm_pt_parent(cursor->parent); + return true; +} + +/** + * amdgpu_vm_pt_next - get next PD/PT in hieratchy + * + * @adev: amdgpu_device pointer + * @cursor: current state + * + * Walk the PD/PT tree to the next node. + */ +static void amdgpu_vm_pt_next(struct amdgpu_device *adev, + struct amdgpu_vm_pt_cursor *cursor) +{ + /* First try a newborn child */ + if (amdgpu_vm_pt_descendant(adev, cursor)) + return; + + /* If that didn't worked try to find a sibling */ + while (!amdgpu_vm_pt_sibling(adev, cursor)) { + /* No sibling, go to our parents and grandparents */ + if (!amdgpu_vm_pt_ancestor(cursor)) { + cursor->pfn = ~0ll; + return; + } + } +} + +/** + * amdgpu_vm_pt_first_dfs - start a deep first search + * + * @adev: amdgpu_device structure + * @vm: amdgpu_vm structure + * @start: optional cursor to start with + * @cursor: state to initialize + * + * Starts a deep first traversal of the PD/PT tree. + */ +static void amdgpu_vm_pt_first_dfs(struct amdgpu_device *adev, + struct amdgpu_vm *vm, + struct amdgpu_vm_pt_cursor *start, + struct amdgpu_vm_pt_cursor *cursor) +{ + if (start) + *cursor = *start; + else + amdgpu_vm_pt_start(adev, vm, 0, cursor); + + while (amdgpu_vm_pt_descendant(adev, cursor)) + ; +} + +/** + * amdgpu_vm_pt_continue_dfs - check if the deep first search should continue + * + * @start: starting point for the search + * @entry: current entry + * + * Returns: + * True when the search should continue, false otherwise. + */ +static bool amdgpu_vm_pt_continue_dfs(struct amdgpu_vm_pt_cursor *start, + struct amdgpu_vm_bo_base *entry) +{ + return entry && (!start || entry != start->entry); +} + +/** + * amdgpu_vm_pt_next_dfs - get the next node for a deep first search + * + * @adev: amdgpu_device structure + * @cursor: current state + * + * Move the cursor to the next node in a deep first search. + */ +static void amdgpu_vm_pt_next_dfs(struct amdgpu_device *adev, + struct amdgpu_vm_pt_cursor *cursor) +{ + if (!cursor->entry) + return; + + if (!cursor->parent) + cursor->entry = NULL; + else if (amdgpu_vm_pt_sibling(adev, cursor)) + while (amdgpu_vm_pt_descendant(adev, cursor)) + ; + else + amdgpu_vm_pt_ancestor(cursor); +} + +/* + * for_each_amdgpu_vm_pt_dfs_safe - safe deep first search of all PDs/PTs + */ +#define for_each_amdgpu_vm_pt_dfs_safe(adev, vm, start, cursor, entry) \ + for (amdgpu_vm_pt_first_dfs((adev), (vm), (start), &(cursor)), \ + (entry) = (cursor).entry, amdgpu_vm_pt_next_dfs((adev), &(cursor));\ + amdgpu_vm_pt_continue_dfs((start), (entry)); \ + (entry) = (cursor).entry, amdgpu_vm_pt_next_dfs((adev), &(cursor))) + +/** + * amdgpu_vm_pt_clear - initially clear the PDs/PTs + * + * @adev: amdgpu_device pointer + * @vm: VM to clear BO from + * @vmbo: BO to clear + * @immediate: use an immediate update + * + * Root PD needs to be reserved when calling this. + * + * Returns: + * 0 on success, errno otherwise. + */ +int amdgpu_vm_pt_clear(struct amdgpu_device *adev, struct amdgpu_vm *vm, + struct amdgpu_bo_vm *vmbo, bool immediate) +{ + unsigned int level = adev->vm_manager.root_level; + struct ttm_operation_ctx ctx = { true, false }; + struct amdgpu_vm_update_params params; + struct amdgpu_bo *ancestor = &vmbo->bo; + unsigned int entries, ats_entries; + struct amdgpu_bo *bo = &vmbo->bo; + uint64_t addr; + int r, idx; + + /* Figure out our place in the hierarchy */ + if (ancestor->parent) { + ++level; + while (ancestor->parent->parent) { + ++level; + ancestor = ancestor->parent; + } + } + + entries = amdgpu_bo_size(bo) / 8; + if (!vm->pte_support_ats) { + ats_entries = 0; + + } else if (!bo->parent) { + ats_entries = amdgpu_vm_pt_num_ats_entries(adev); + ats_entries = min(ats_entries, entries); + entries -= ats_entries; + + } else { + struct amdgpu_vm_bo_base *pt; + + pt = ancestor->vm_bo; + ats_entries = amdgpu_vm_pt_num_ats_entries(adev); + if ((pt - to_amdgpu_bo_vm(vm->root.bo)->entries) >= + ats_entries) { + ats_entries = 0; + } else { + ats_entries = entries; + entries = 0; + } + } + + r = ttm_bo_validate(&bo->tbo, &bo->placement, &ctx); + if (r) + return r; + + if (vmbo->shadow) { + struct amdgpu_bo *shadow = vmbo->shadow; + + r = ttm_bo_validate(&shadow->tbo, &shadow->placement, &ctx); + if (r) + return r; + } + + if (!drm_dev_enter(adev_to_drm(adev), &idx)) + return -ENODEV; + + r = vm->update_funcs->map_table(vmbo); + if (r) + goto exit; + + memset(¶ms, 0, sizeof(params)); + params.adev = adev; + params.vm = vm; + params.immediate = immediate; + + r = vm->update_funcs->prepare(¶ms, NULL, AMDGPU_SYNC_EXPLICIT); + if (r) + goto exit; + + addr = 0; + if (ats_entries) { + uint64_t value = 0, flags; + + flags = AMDGPU_PTE_DEFAULT_ATC; + if (level != AMDGPU_VM_PTB) { + /* Handle leaf PDEs as PTEs */ + flags |= AMDGPU_PDE_PTE; + amdgpu_gmc_get_vm_pde(adev, level, &value, &flags); + } + + r = vm->update_funcs->update(¶ms, vmbo, addr, 0, + ats_entries, value, flags); + if (r) + goto exit; + + addr += ats_entries * 8; + } + + if (entries) { + uint64_t value = 0, flags = 0; + + if (adev->asic_type >= CHIP_VEGA10) { + if (level != AMDGPU_VM_PTB) { + /* Handle leaf PDEs as PTEs */ + flags |= AMDGPU_PDE_PTE; + amdgpu_gmc_get_vm_pde(adev, level, + &value, &flags); + } else { + /* Workaround for fault priority problem on GMC9 */ + flags = AMDGPU_PTE_EXECUTABLE; + } + } + + r = vm->update_funcs->update(¶ms, vmbo, addr, 0, entries, + value, flags); + if (r) + goto exit; + } + + r = vm->update_funcs->commit(¶ms, NULL); +exit: + drm_dev_exit(idx); + return r; +} + +/** + * amdgpu_vm_pt_create - create bo for PD/PT + * + * @adev: amdgpu_device pointer + * @vm: requesting vm + * @level: the page table level + * @immediate: use a immediate update + * @vmbo: pointer to the buffer object pointer + */ +int amdgpu_vm_pt_create(struct amdgpu_device *adev, struct amdgpu_vm *vm, + int level, bool immediate, struct amdgpu_bo_vm **vmbo) +{ + struct amdgpu_bo_param bp; + struct amdgpu_bo *bo; + struct dma_resv *resv; + unsigned int num_entries; + int r; + + memset(&bp, 0, sizeof(bp)); + + bp.size = amdgpu_vm_pt_size(adev, level); + bp.byte_align = AMDGPU_GPU_PAGE_SIZE; + bp.domain = AMDGPU_GEM_DOMAIN_VRAM; + bp.domain = amdgpu_bo_get_preferred_domain(adev, bp.domain); + bp.flags = AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS | + AMDGPU_GEM_CREATE_CPU_GTT_USWC; + + if (level < AMDGPU_VM_PTB) + num_entries = amdgpu_vm_pt_num_entries(adev, level); + else + num_entries = 0; + + bp.bo_ptr_size = struct_size((*vmbo), entries, num_entries); + + if (vm->use_cpu_for_update) + bp.flags |= AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED; + + bp.type = ttm_bo_type_kernel; + bp.no_wait_gpu = immediate; + if (vm->root.bo) + bp.resv = vm->root.bo->tbo.base.resv; + + r = amdgpu_bo_create_vm(adev, &bp, vmbo); + if (r) + return r; + + bo = &(*vmbo)->bo; + if (vm->is_compute_context || (adev->flags & AMD_IS_APU)) { + (*vmbo)->shadow = NULL; + return 0; + } + + if (!bp.resv) + WARN_ON(dma_resv_lock(bo->tbo.base.resv, + NULL)); + resv = bp.resv; + memset(&bp, 0, sizeof(bp)); + bp.size = amdgpu_vm_pt_size(adev, level); + bp.domain = AMDGPU_GEM_DOMAIN_GTT; + bp.flags = AMDGPU_GEM_CREATE_CPU_GTT_USWC; + bp.type = ttm_bo_type_kernel; + bp.resv = bo->tbo.base.resv; + bp.bo_ptr_size = sizeof(struct amdgpu_bo); + + r = amdgpu_bo_create(adev, &bp, &(*vmbo)->shadow); + + if (!resv) + dma_resv_unlock(bo->tbo.base.resv); + + if (r) { + amdgpu_bo_unref(&bo); + return r; + } + + (*vmbo)->shadow->parent = amdgpu_bo_ref(bo); + amdgpu_bo_add_to_shadow_list(*vmbo); + + return 0; +} + +/** + * amdgpu_vm_pt_alloc - Allocate a specific page table + * + * @adev: amdgpu_device pointer + * @vm: VM to allocate page tables for + * @cursor: Which page table to allocate + * @immediate: use an immediate update + * + * Make sure a specific page table or directory is allocated. + * + * Returns: + * 1 if page table needed to be allocated, 0 if page table was already + * allocated, negative errno if an error occurred. + */ +static int amdgpu_vm_pt_alloc(struct amdgpu_device *adev, + struct amdgpu_vm *vm, + struct amdgpu_vm_pt_cursor *cursor, + bool immediate) +{ + struct amdgpu_vm_bo_base *entry = cursor->entry; + struct amdgpu_bo *pt_bo; + struct amdgpu_bo_vm *pt; + int r; + + if (entry->bo) + return 0; + + r = amdgpu_vm_pt_create(adev, vm, cursor->level, immediate, &pt); + if (r) + return r; + + /* Keep a reference to the root directory to avoid + * freeing them up in the wrong order. + */ + pt_bo = &pt->bo; + pt_bo->parent = amdgpu_bo_ref(cursor->parent->bo); + amdgpu_vm_bo_base_init(entry, vm, pt_bo); + r = amdgpu_vm_pt_clear(adev, vm, pt, immediate); + if (r) + goto error_free_pt; + + return 0; + +error_free_pt: + amdgpu_bo_unref(&pt->shadow); + amdgpu_bo_unref(&pt_bo); + return r; +} + +/** + * amdgpu_vm_pt_free - free one PD/PT + * + * @entry: PDE to free + */ +static void amdgpu_vm_pt_free(struct amdgpu_vm_bo_base *entry) +{ + struct amdgpu_bo *shadow; + + if (!entry->bo) + return; + shadow = amdgpu_bo_shadowed(entry->bo); + entry->bo->vm_bo = NULL; + list_del(&entry->vm_status); + amdgpu_bo_unref(&shadow); + amdgpu_bo_unref(&entry->bo); +} + +/** + * amdgpu_vm_pt_free_dfs - free PD/PT levels + * + * @adev: amdgpu device structure + * @vm: amdgpu vm structure + * @start: optional cursor where to start freeing PDs/PTs + * + * Free the page directory or page table level and all sub levels. + */ +static void amdgpu_vm_pt_free_dfs(struct amdgpu_device *adev, + struct amdgpu_vm *vm, + struct amdgpu_vm_pt_cursor *start) +{ + struct amdgpu_vm_pt_cursor cursor; + struct amdgpu_vm_bo_base *entry; + + for_each_amdgpu_vm_pt_dfs_safe(adev, vm, start, cursor, entry) + amdgpu_vm_pt_free(entry); + + if (start) + amdgpu_vm_pt_free(start->entry); +} + +/** + * amdgpu_vm_pt_free_root - free root PD + * @adev: amdgpu device structure + * @vm: amdgpu vm structure + * + * Free the root page directory and everything below it. + */ +void amdgpu_vm_pt_free_root(struct amdgpu_device *adev, struct amdgpu_vm *vm) +{ + amdgpu_vm_pt_free_dfs(adev, vm, NULL); +} + +/** + * amdgpu_vm_pt_is_root_clean - check if a root PD is clean + * + * @adev: amdgpu_device pointer + * @vm: the VM to check + * + * Check all entries of the root PD, if any subsequent PDs are allocated, + * it means there are page table creating and filling, and is no a clean + * VM + * + * Returns: + * 0 if this VM is clean + */ +bool amdgpu_vm_pt_is_root_clean(struct amdgpu_device *adev, + struct amdgpu_vm *vm) +{ + enum amdgpu_vm_level root = adev->vm_manager.root_level; + unsigned int entries = amdgpu_vm_pt_num_entries(adev, root); + unsigned int i = 0; + + for (i = 0; i < entries; i++) { + if (to_amdgpu_bo_vm(vm->root.bo)->entries[i].bo) + return false; + } + return true; +} + +/** + * amdgpu_vm_pde_update - update a single level in the hierarchy + * + * @params: parameters for the update + * @entry: entry to update + * + * Makes sure the requested entry in parent is up to date. + */ +int amdgpu_vm_pde_update(struct amdgpu_vm_update_params *params, + struct amdgpu_vm_bo_base *entry) +{ + struct amdgpu_vm_bo_base *parent = amdgpu_vm_pt_parent(entry); + struct amdgpu_bo *bo = parent->bo, *pbo; + struct amdgpu_vm *vm = params->vm; + uint64_t pde, pt, flags; + unsigned int level; + + for (level = 0, pbo = bo->parent; pbo; ++level) + pbo = pbo->parent; + + level += params->adev->vm_manager.root_level; + amdgpu_gmc_get_pde_for_bo(entry->bo, level, &pt, &flags); + pde = (entry - to_amdgpu_bo_vm(parent->bo)->entries) * 8; + return vm->update_funcs->update(params, to_amdgpu_bo_vm(bo), pde, pt, + 1, 0, flags); +} + +/* + * amdgpu_vm_pte_update_flags - figure out flags for PTE updates + * + * Make sure to set the right flags for the PTEs at the desired level. + */ +static void amdgpu_vm_pte_update_flags(struct amdgpu_vm_update_params *params, + struct amdgpu_bo_vm *pt, + unsigned int level, + uint64_t pe, uint64_t addr, + unsigned int count, uint32_t incr, + uint64_t flags) + +{ + if (level != AMDGPU_VM_PTB) { + flags |= AMDGPU_PDE_PTE; + amdgpu_gmc_get_vm_pde(params->adev, level, &addr, &flags); + + } else if (params->adev->asic_type >= CHIP_VEGA10 && + !(flags & AMDGPU_PTE_VALID) && + !(flags & AMDGPU_PTE_PRT)) { + + /* Workaround for fault priority problem on GMC9 */ + flags |= AMDGPU_PTE_EXECUTABLE; + } + + params->vm->update_funcs->update(params, pt, pe, addr, count, incr, + flags); +} + +/** + * amdgpu_vm_pte_fragment - get fragment for PTEs + * + * @params: see amdgpu_vm_update_params definition + * @start: first PTE to handle + * @end: last PTE to handle + * @flags: hw mapping flags + * @frag: resulting fragment size + * @frag_end: end of this fragment + * + * Returns the first possible fragment for the start and end address. + */ +static void amdgpu_vm_pte_fragment(struct amdgpu_vm_update_params *params, + uint64_t start, uint64_t end, uint64_t flags, + unsigned int *frag, uint64_t *frag_end) +{ + /** + * The MC L1 TLB supports variable sized pages, based on a fragment + * field in the PTE. When this field is set to a non-zero value, page + * granularity is increased from 4KB to (1 << (12 + frag)). The PTE + * flags are considered valid for all PTEs within the fragment range + * and corresponding mappings are assumed to be physically contiguous. + * + * The L1 TLB can store a single PTE for the whole fragment, + * significantly increasing the space available for translation + * caching. This leads to large improvements in throughput when the + * TLB is under pressure. + * + * The L2 TLB distributes small and large fragments into two + * asymmetric partitions. The large fragment cache is significantly + * larger. Thus, we try to use large fragments wherever possible. + * Userspace can support this by aligning virtual base address and + * allocation size to the fragment size. + * + * Starting with Vega10 the fragment size only controls the L1. The L2 + * is now directly feed with small/huge/giant pages from the walker. + */ + unsigned int max_frag; + + if (params->adev->asic_type < CHIP_VEGA10) + max_frag = params->adev->vm_manager.fragment_size; + else + max_frag = 31; + + /* system pages are non continuously */ + if (params->pages_addr) { + *frag = 0; + *frag_end = end; + return; + } + + /* This intentionally wraps around if no bit is set */ + *frag = min_t(unsigned int, ffs(start) - 1, fls64(end - start) - 1); + if (*frag >= max_frag) { + *frag = max_frag; + *frag_end = end & ~((1ULL << max_frag) - 1); + } else { + *frag_end = start + (1 << *frag); + } +} + +/** + * amdgpu_vm_ptes_update - make sure that page tables are valid + * + * @params: see amdgpu_vm_update_params definition + * @start: start of GPU address range + * @end: end of GPU address range + * @dst: destination address to map to, the next dst inside the function + * @flags: mapping flags + * + * Update the page tables in the range @start - @end. + * + * Returns: + * 0 for success, -EINVAL for failure. + */ +int amdgpu_vm_ptes_update(struct amdgpu_vm_update_params *params, + uint64_t start, uint64_t end, + uint64_t dst, uint64_t flags) +{ + struct amdgpu_device *adev = params->adev; + struct amdgpu_vm_pt_cursor cursor; + uint64_t frag_start = start, frag_end; + unsigned int frag; + int r; + + /* figure out the initial fragment */ + amdgpu_vm_pte_fragment(params, frag_start, end, flags, &frag, + &frag_end); + + /* walk over the address space and update the PTs */ + amdgpu_vm_pt_start(adev, params->vm, start, &cursor); + while (cursor.pfn < end) { + unsigned int shift, parent_shift, mask; + uint64_t incr, entry_end, pe_start; + struct amdgpu_bo *pt; + + if (!params->unlocked) { + /* make sure that the page tables covering the + * address range are actually allocated + */ + r = amdgpu_vm_pt_alloc(params->adev, params->vm, + &cursor, params->immediate); + if (r) + return r; + } + + shift = amdgpu_vm_pt_level_shift(adev, cursor.level); + parent_shift = amdgpu_vm_pt_level_shift(adev, cursor.level - 1); + if (params->unlocked) { + /* Unlocked updates are only allowed on the leaves */ + if (amdgpu_vm_pt_descendant(adev, &cursor)) + continue; + } else if (adev->asic_type < CHIP_VEGA10 && + (flags & AMDGPU_PTE_VALID)) { + /* No huge page support before GMC v9 */ + if (cursor.level != AMDGPU_VM_PTB) { + if (!amdgpu_vm_pt_descendant(adev, &cursor)) + return -ENOENT; + continue; + } + } else if (frag < shift) { + /* We can't use this level when the fragment size is + * smaller than the address shift. Go to the next + * child entry and try again. + */ + if (amdgpu_vm_pt_descendant(adev, &cursor)) + continue; + } else if (frag >= parent_shift) { + /* If the fragment size is even larger than the parent + * shift we should go up one level and check it again. + */ + if (!amdgpu_vm_pt_ancestor(&cursor)) + return -EINVAL; + continue; + } + + pt = cursor.entry->bo; + if (!pt) { + /* We need all PDs and PTs for mapping something, */ + if (flags & AMDGPU_PTE_VALID) + return -ENOENT; + + /* but unmapping something can happen at a higher + * level. + */ + if (!amdgpu_vm_pt_ancestor(&cursor)) + return -EINVAL; + + pt = cursor.entry->bo; + shift = parent_shift; + frag_end = max(frag_end, ALIGN(frag_start + 1, + 1ULL << shift)); + } + + /* Looks good so far, calculate parameters for the update */ + incr = (uint64_t)AMDGPU_GPU_PAGE_SIZE << shift; + mask = amdgpu_vm_pt_entries_mask(adev, cursor.level); + pe_start = ((cursor.pfn >> shift) & mask) * 8; + entry_end = ((uint64_t)mask + 1) << shift; + entry_end += cursor.pfn & ~(entry_end - 1); + entry_end = min(entry_end, end); + + do { + struct amdgpu_vm *vm = params->vm; + uint64_t upd_end = min(entry_end, frag_end); + unsigned int nptes = (upd_end - frag_start) >> shift; + uint64_t upd_flags = flags | AMDGPU_PTE_FRAG(frag); + + /* This can happen when we set higher level PDs to + * silent to stop fault floods. + */ + nptes = max(nptes, 1u); + + trace_amdgpu_vm_update_ptes(params, frag_start, upd_end, + min(nptes, 32u), dst, incr, + upd_flags, + vm->task_info.pid, + vm->immediate.fence_context); + amdgpu_vm_pte_update_flags(params, to_amdgpu_bo_vm(pt), + cursor.level, pe_start, dst, + nptes, incr, upd_flags); + + pe_start += nptes * 8; + dst += nptes * incr; + + frag_start = upd_end; + if (frag_start >= frag_end) { + /* figure out the next fragment */ + amdgpu_vm_pte_fragment(params, frag_start, end, + flags, &frag, &frag_end); + if (frag < shift) + break; + } + } while (frag_start < entry_end); + + if (amdgpu_vm_pt_descendant(adev, &cursor)) { + /* Free all child entries. + * Update the tables with the flags and addresses and free up subsequent + * tables in the case of huge pages or freed up areas. + * This is the maximum you can free, because all other page tables are not + * completely covered by the range and so potentially still in use. + */ + while (cursor.pfn < frag_start) { + /* Make sure previous mapping is freed */ + if (cursor.entry->bo) { + params->table_freed = true; + amdgpu_vm_pt_free_dfs(adev, params->vm, + &cursor); + } + amdgpu_vm_pt_next(adev, &cursor); + } + + } else if (frag >= shift) { + /* or just move on to the next on the same level. */ + amdgpu_vm_pt_next(adev, &cursor); + } + } + + return 0; +} diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm_sdma.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm_sdma.c index bdb44cee19d3..1fd3cbca20a2 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm_sdma.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm_sdma.c @@ -109,7 +109,7 @@ static int amdgpu_vm_sdma_commit(struct amdgpu_vm_update_params *p, if (p->unlocked) { struct dma_fence *tmp = dma_fence_get(f); - swap(p->vm->last_unlocked, f); + swap(p->vm->last_unlocked, tmp); dma_fence_put(tmp); } else { amdgpu_bo_fence(p->vm->root.bo, f, true); diff --git a/drivers/gpu/drm/amd/amdgpu/athub_v1_0.c b/drivers/gpu/drm/amd/amdgpu/athub_v1_0.c index 88642e7ecdf4..a13c443ea10f 100644 --- a/drivers/gpu/drm/amd/amdgpu/athub_v1_0.c +++ b/drivers/gpu/drm/amd/amdgpu/athub_v1_0.c @@ -87,7 +87,7 @@ int athub_v1_0_set_clockgating(struct amdgpu_device *adev, return 0; } -void athub_v1_0_get_clockgating(struct amdgpu_device *adev, u32 *flags) +void athub_v1_0_get_clockgating(struct amdgpu_device *adev, u64 *flags) { int data; diff --git a/drivers/gpu/drm/amd/amdgpu/athub_v1_0.h b/drivers/gpu/drm/amd/amdgpu/athub_v1_0.h index b279af59e34f..6be0a6704ea7 100644 --- a/drivers/gpu/drm/amd/amdgpu/athub_v1_0.h +++ b/drivers/gpu/drm/amd/amdgpu/athub_v1_0.h @@ -25,6 +25,6 @@ int athub_v1_0_set_clockgating(struct amdgpu_device *adev, enum amd_clockgating_state state); -void athub_v1_0_get_clockgating(struct amdgpu_device *adev, u32 *flags); +void athub_v1_0_get_clockgating(struct amdgpu_device *adev, u64 *flags); #endif diff --git a/drivers/gpu/drm/amd/amdgpu/athub_v2_0.c b/drivers/gpu/drm/amd/amdgpu/athub_v2_0.c index a720436857b4..a9521c98e7f7 100644 --- a/drivers/gpu/drm/amd/amdgpu/athub_v2_0.c +++ b/drivers/gpu/drm/amd/amdgpu/athub_v2_0.c @@ -93,7 +93,7 @@ int athub_v2_0_set_clockgating(struct amdgpu_device *adev, return 0; } -void athub_v2_0_get_clockgating(struct amdgpu_device *adev, u32 *flags) +void athub_v2_0_get_clockgating(struct amdgpu_device *adev, u64 *flags) { int data; diff --git a/drivers/gpu/drm/amd/amdgpu/athub_v2_0.h b/drivers/gpu/drm/amd/amdgpu/athub_v2_0.h index 02932c1c8bab..8b763f6dfd81 100644 --- a/drivers/gpu/drm/amd/amdgpu/athub_v2_0.h +++ b/drivers/gpu/drm/amd/amdgpu/athub_v2_0.h @@ -25,6 +25,6 @@ int athub_v2_0_set_clockgating(struct amdgpu_device *adev, enum amd_clockgating_state state); -void athub_v2_0_get_clockgating(struct amdgpu_device *adev, u32 *flags); +void athub_v2_0_get_clockgating(struct amdgpu_device *adev, u64 *flags); #endif diff --git a/drivers/gpu/drm/amd/amdgpu/athub_v2_1.c b/drivers/gpu/drm/amd/amdgpu/athub_v2_1.c index ad8e87d3d2cb..78508ae6a670 100644 --- a/drivers/gpu/drm/amd/amdgpu/athub_v2_1.c +++ b/drivers/gpu/drm/amd/amdgpu/athub_v2_1.c @@ -85,7 +85,7 @@ int athub_v2_1_set_clockgating(struct amdgpu_device *adev, return 0; } -void athub_v2_1_get_clockgating(struct amdgpu_device *adev, u32 *flags) +void athub_v2_1_get_clockgating(struct amdgpu_device *adev, u64 *flags) { int data; diff --git a/drivers/gpu/drm/amd/amdgpu/athub_v2_1.h b/drivers/gpu/drm/amd/amdgpu/athub_v2_1.h index 5e6824c0f591..b799f14bce03 100644 --- a/drivers/gpu/drm/amd/amdgpu/athub_v2_1.h +++ b/drivers/gpu/drm/amd/amdgpu/athub_v2_1.h @@ -25,6 +25,6 @@ int athub_v2_1_set_clockgating(struct amdgpu_device *adev, enum amd_clockgating_state state); -void athub_v2_1_get_clockgating(struct amdgpu_device *adev, u32 *flags); +void athub_v2_1_get_clockgating(struct amdgpu_device *adev, u64 *flags); #endif diff --git a/drivers/gpu/drm/amd/amdgpu/atombios_encoders.c b/drivers/gpu/drm/amd/amdgpu/atombios_encoders.c index a92d86e12718..d4f5a584075d 100644 --- a/drivers/gpu/drm/amd/amdgpu/atombios_encoders.c +++ b/drivers/gpu/drm/amd/amdgpu/atombios_encoders.c @@ -765,7 +765,6 @@ amdgpu_atombios_encoder_setup_dig_transmitter(struct drm_encoder *encoder, int a int dp_clock = 0; int dp_lane_count = 0; int connector_object_id = 0; - int igp_lane_info = 0; int dig_encoder = dig->dig_encoder; int hpd_id = AMDGPU_HPD_NONE; @@ -848,26 +847,6 @@ amdgpu_atombios_encoder_setup_dig_transmitter(struct drm_encoder *encoder, int a else args.v1.ucConfig |= ATOM_TRANSMITTER_CONFIG_DIG1_ENCODER; - if ((adev->flags & AMD_IS_APU) && - (amdgpu_encoder->encoder_id == ENCODER_OBJECT_ID_INTERNAL_UNIPHY)) { - if (is_dp || - !amdgpu_dig_monitor_is_duallink(encoder, amdgpu_encoder->pixel_clock)) { - if (igp_lane_info & 0x1) - args.v1.ucConfig |= ATOM_TRANSMITTER_CONFIG_LANE_0_3; - else if (igp_lane_info & 0x2) - args.v1.ucConfig |= ATOM_TRANSMITTER_CONFIG_LANE_4_7; - else if (igp_lane_info & 0x4) - args.v1.ucConfig |= ATOM_TRANSMITTER_CONFIG_LANE_8_11; - else if (igp_lane_info & 0x8) - args.v1.ucConfig |= ATOM_TRANSMITTER_CONFIG_LANE_12_15; - } else { - if (igp_lane_info & 0x3) - args.v1.ucConfig |= ATOM_TRANSMITTER_CONFIG_LANE_0_7; - else if (igp_lane_info & 0xc) - args.v1.ucConfig |= ATOM_TRANSMITTER_CONFIG_LANE_8_15; - } - } - if (dig->linkb) args.v1.ucConfig |= ATOM_TRANSMITTER_CONFIG_LINKB; else diff --git a/drivers/gpu/drm/amd/amdgpu/df_v1_7.c b/drivers/gpu/drm/amd/amdgpu/df_v1_7.c index 2d01ac0d4c11..b991609f46c1 100644 --- a/drivers/gpu/drm/amd/amdgpu/df_v1_7.c +++ b/drivers/gpu/drm/amd/amdgpu/df_v1_7.c @@ -99,7 +99,7 @@ static void df_v1_7_update_medium_grain_clock_gating(struct amdgpu_device *adev, } static void df_v1_7_get_clockgating_state(struct amdgpu_device *adev, - u32 *flags) + u64 *flags) { u32 tmp; diff --git a/drivers/gpu/drm/amd/amdgpu/df_v3_6.c b/drivers/gpu/drm/amd/amdgpu/df_v3_6.c index f4dfca013ec5..483a441b46aa 100644 --- a/drivers/gpu/drm/amd/amdgpu/df_v3_6.c +++ b/drivers/gpu/drm/amd/amdgpu/df_v3_6.c @@ -332,7 +332,7 @@ static void df_v3_6_update_medium_grain_clock_gating(struct amdgpu_device *adev, } static void df_v3_6_get_clockgating_state(struct amdgpu_device *adev, - u32 *flags) + u64 *flags) { u32 tmp; diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c index 9426e252d8aa..54446162db8b 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c @@ -4741,7 +4741,7 @@ static int gfx_v10_0_compute_ring_init(struct amdgpu_device *adev, int ring_id, + ((ring->me - 1) * adev->gfx.mec.num_pipe_per_mec) + ring->pipe; hw_prio = amdgpu_gfx_is_high_priority_compute_queue(adev, ring) ? - AMDGPU_GFX_PIPE_PRIO_HIGH : AMDGPU_GFX_PIPE_PRIO_NORMAL; + AMDGPU_RING_PRIO_2 : AMDGPU_RING_PRIO_DEFAULT; /* type-2 packets are deprecated on MEC, use type-3 instead */ r = amdgpu_ring_init(adev, ring, 1024, &adev->gfx.eop_irq, irq_type, hw_prio, NULL); @@ -8451,7 +8451,7 @@ static int gfx_v10_0_set_clockgating_state(void *handle, return 0; } -static void gfx_v10_0_get_clockgating_state(void *handle, u32 *flags) +static void gfx_v10_0_get_clockgating_state(void *handle, u64 *flags) { struct amdgpu_device *adev = (struct amdgpu_device *)handle; int data; diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c index 5f112efda634..25dc729d0ec2 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c @@ -1925,7 +1925,7 @@ static int gfx_v8_0_compute_ring_init(struct amdgpu_device *adev, int ring_id, + ring->pipe; hw_prio = amdgpu_gfx_is_high_priority_compute_queue(adev, ring) ? - AMDGPU_GFX_PIPE_PRIO_HIGH : AMDGPU_RING_PRIO_DEFAULT; + AMDGPU_RING_PRIO_2 : AMDGPU_RING_PRIO_DEFAULT; /* type-2 packets are deprecated on MEC, use type-3 instead */ r = amdgpu_ring_init(adev, ring, 1024, &adev->gfx.eop_irq, irq_type, hw_prio, NULL); @@ -5475,7 +5475,7 @@ static int gfx_v8_0_set_powergating_state(void *handle, return 0; } -static void gfx_v8_0_get_clockgating_state(void *handle, u32 *flags) +static void gfx_v8_0_get_clockgating_state(void *handle, u64 *flags) { struct amdgpu_device *adev = (struct amdgpu_device *)handle; int data; diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c index 46d4bf27ebbb..d58fd83524ac 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c @@ -1205,6 +1205,8 @@ static const struct amdgpu_gfxoff_quirk amdgpu_gfxoff_quirk_list[] = { { 0x1002, 0x15dd, 0x103c, 0x83e7, 0xd3 }, /* GFXOFF is unstable on C6 parts with a VBIOS 113-RAVEN-114 */ { 0x1002, 0x15dd, 0x1002, 0x15dd, 0xc6 }, + /* Apple MacBook Pro (15-inch, 2019) Radeon Pro Vega 20 4 GB */ + { 0x1002, 0x69af, 0x106b, 0x019a, 0xc0 }, { 0, 0, 0, 0, 0 }, }; @@ -2274,7 +2276,7 @@ static int gfx_v9_0_compute_ring_init(struct amdgpu_device *adev, int ring_id, + ((ring->me - 1) * adev->gfx.mec.num_pipe_per_mec) + ring->pipe; hw_prio = amdgpu_gfx_is_high_priority_compute_queue(adev, ring) ? - AMDGPU_GFX_PIPE_PRIO_HIGH : AMDGPU_GFX_PIPE_PRIO_NORMAL; + AMDGPU_RING_PRIO_2 : AMDGPU_RING_PRIO_DEFAULT; /* type-2 packets are deprecated on MEC, use type-3 instead */ return amdgpu_ring_init(adev, ring, 1024, &adev->gfx.eop_irq, irq_type, hw_prio, NULL); @@ -5231,7 +5233,7 @@ static int gfx_v9_0_set_clockgating_state(void *handle, return 0; } -static void gfx_v9_0_get_clockgating_state(void *handle, u32 *flags) +static void gfx_v9_0_get_clockgating_state(void *handle, u64 *flags) { struct amdgpu_device *adev = (struct amdgpu_device *)handle; int data; diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_2.c b/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_2.c index 7653ebd0e67b..3a797424579c 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_2.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_2.c @@ -1930,6 +1930,19 @@ static void gfx_v9_4_2_reset_sq_timeout_status(struct amdgpu_device *adev) mutex_unlock(&adev->grbm_idx_mutex); } +static bool gfx_v9_4_2_query_uctl2_poison_status(struct amdgpu_device *adev) +{ + u32 status = 0; + struct amdgpu_vmhub *hub; + + hub = &adev->vmhub[AMDGPU_GFXHUB_0]; + status = RREG32(hub->vm_l2_pro_fault_status); + /* reset page fault status */ + WREG32_P(hub->vm_l2_pro_fault_cntl, 1, ~1); + + return REG_GET_FIELD(status, VM_L2_PROTECTION_FAULT_STATUS, FED); +} + struct amdgpu_ras_block_hw_ops gfx_v9_4_2_ras_ops = { .ras_error_inject = &gfx_v9_4_2_ras_error_inject, .query_ras_error_count = &gfx_v9_4_2_query_ras_error_count, @@ -1943,4 +1956,5 @@ struct amdgpu_gfx_ras gfx_v9_4_2_ras = { .hw_ops = &gfx_v9_4_2_ras_ops, }, .enable_watchdog_timer = &gfx_v9_4_2_enable_watchdog_timer, + .query_utcl2_poison_status = gfx_v9_4_2_query_uctl2_poison_status, }; diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c index 5228421b0f72..a455e59f41f4 100644 --- a/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c @@ -1161,7 +1161,7 @@ static int gmc_v10_0_set_clockgating_state(void *handle, return athub_v2_0_set_clockgating(adev, state); } -static void gmc_v10_0_get_clockgating_state(void *handle, u32 *flags) +static void gmc_v10_0_get_clockgating_state(void *handle, u64 *flags) { struct amdgpu_device *adev = (struct amdgpu_device *)handle; diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c index 1932a3e4af7e..382dde1ce74c 100644 --- a/drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c @@ -1690,7 +1690,7 @@ static int gmc_v8_0_set_powergating_state(void *handle, return 0; } -static void gmc_v8_0_get_clockgating_state(void *handle, u32 *flags) +static void gmc_v8_0_get_clockgating_state(void *handle, u64 *flags) { struct amdgpu_device *adev = (struct amdgpu_device *)handle; int data; diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c index 6009fbfdcc19..22761a3bb818 100644 --- a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c @@ -1948,7 +1948,7 @@ static int gmc_v9_0_set_clockgating_state(void *handle, return 0; } -static void gmc_v9_0_get_clockgating_state(void *handle, u32 *flags) +static void gmc_v9_0_get_clockgating_state(void *handle, u64 *flags) { struct amdgpu_device *adev = (struct amdgpu_device *)handle; diff --git a/drivers/gpu/drm/amd/amdgpu/hdp_v4_0.c b/drivers/gpu/drm/amd/amdgpu/hdp_v4_0.c index 046216635262..adf89680f53e 100644 --- a/drivers/gpu/drm/amd/amdgpu/hdp_v4_0.c +++ b/drivers/gpu/drm/amd/amdgpu/hdp_v4_0.c @@ -124,7 +124,7 @@ static void hdp_v4_0_update_clock_gating(struct amdgpu_device *adev, } static void hdp_v4_0_get_clockgating_state(struct amdgpu_device *adev, - u32 *flags) + u64 *flags) { int data; diff --git a/drivers/gpu/drm/amd/amdgpu/hdp_v5_0.c b/drivers/gpu/drm/amd/amdgpu/hdp_v5_0.c index 5793977953cc..a9ea23fa0def 100644 --- a/drivers/gpu/drm/amd/amdgpu/hdp_v5_0.c +++ b/drivers/gpu/drm/amd/amdgpu/hdp_v5_0.c @@ -181,7 +181,7 @@ static void hdp_v5_0_update_clock_gating(struct amdgpu_device *adev, } static void hdp_v5_0_get_clockgating_state(struct amdgpu_device *adev, - u32 *flags) + u64 *flags) { uint32_t tmp; diff --git a/drivers/gpu/drm/amd/amdgpu/jpeg_v2_5.c b/drivers/gpu/drm/amd/amdgpu/jpeg_v2_5.c index a29c86617fb5..8c3227d0b8b4 100644 --- a/drivers/gpu/drm/amd/amdgpu/jpeg_v2_5.c +++ b/drivers/gpu/drm/amd/amdgpu/jpeg_v2_5.c @@ -26,6 +26,7 @@ #include "soc15.h" #include "soc15d.h" #include "jpeg_v2_0.h" +#include "jpeg_v2_5.h" #include "vcn/vcn_2_5_offset.h" #include "vcn/vcn_2_5_sh_mask.h" @@ -39,6 +40,7 @@ static void jpeg_v2_5_set_dec_ring_funcs(struct amdgpu_device *adev); static void jpeg_v2_5_set_irq_funcs(struct amdgpu_device *adev); static int jpeg_v2_5_set_powergating_state(void *handle, enum amd_powergating_state state); +static void jpeg_v2_5_set_ras_funcs(struct amdgpu_device *adev); static int amdgpu_ih_clientid_jpeg[] = { SOC15_IH_CLIENTID_VCN, @@ -70,6 +72,7 @@ static int jpeg_v2_5_early_init(void *handle) jpeg_v2_5_set_dec_ring_funcs(adev); jpeg_v2_5_set_irq_funcs(adev); + jpeg_v2_5_set_ras_funcs(adev); return 0; } @@ -730,3 +733,74 @@ const struct amdgpu_ip_block_version jpeg_v2_6_ip_block = .rev = 0, .funcs = &jpeg_v2_6_ip_funcs, }; + +static uint32_t jpeg_v2_6_query_poison_by_instance(struct amdgpu_device *adev, + uint32_t instance, uint32_t sub_block) +{ + uint32_t poison_stat = 0, reg_value = 0; + + switch (sub_block) { + case AMDGPU_JPEG_V2_6_JPEG0: + reg_value = RREG32_SOC15(JPEG, instance, mmUVD_RAS_JPEG0_STATUS); + poison_stat = REG_GET_FIELD(reg_value, UVD_RAS_JPEG0_STATUS, POISONED_PF); + break; + case AMDGPU_JPEG_V2_6_JPEG1: + reg_value = RREG32_SOC15(JPEG, instance, mmUVD_RAS_JPEG1_STATUS); + poison_stat = REG_GET_FIELD(reg_value, UVD_RAS_JPEG1_STATUS, POISONED_PF); + break; + default: + break; + } + + if (poison_stat) + dev_info(adev->dev, "Poison detected in JPEG%d sub_block%d\n", + instance, sub_block); + + return poison_stat; +} + +static bool jpeg_v2_6_query_ras_poison_status(struct amdgpu_device *adev) +{ + uint32_t inst = 0, sub = 0, poison_stat = 0; + + for (inst = 0; inst < adev->jpeg.num_jpeg_inst; inst++) + for (sub = 0; sub < AMDGPU_JPEG_V2_6_MAX_SUB_BLOCK; sub++) + poison_stat += + jpeg_v2_6_query_poison_by_instance(adev, inst, sub); + + return !!poison_stat; +} + +const struct amdgpu_ras_block_hw_ops jpeg_v2_6_ras_hw_ops = { + .query_poison_status = jpeg_v2_6_query_ras_poison_status, +}; + +static struct amdgpu_jpeg_ras jpeg_v2_6_ras = { + .ras_block = { + .hw_ops = &jpeg_v2_6_ras_hw_ops, + }, +}; + +static void jpeg_v2_5_set_ras_funcs(struct amdgpu_device *adev) +{ + switch (adev->ip_versions[JPEG_HWIP][0]) { + case IP_VERSION(2, 6, 0): + adev->jpeg.ras = &jpeg_v2_6_ras; + break; + default: + break; + } + + if (adev->jpeg.ras) { + amdgpu_ras_register_ras_block(adev, &adev->jpeg.ras->ras_block); + + strcpy(adev->jpeg.ras->ras_block.ras_comm.name, "jpeg"); + adev->jpeg.ras->ras_block.ras_comm.block = AMDGPU_RAS_BLOCK__JPEG; + adev->jpeg.ras->ras_block.ras_comm.type = AMDGPU_RAS_ERROR__POISON; + adev->jpeg.ras_if = &adev->jpeg.ras->ras_block.ras_comm; + + /* If don't define special ras_late_init function, use default ras_late_init */ + if (!adev->jpeg.ras->ras_block.ras_late_init) + adev->jpeg.ras->ras_block.ras_late_init = amdgpu_ras_block_late_init; + } +} diff --git a/drivers/gpu/drm/amd/amdgpu/jpeg_v2_5.h b/drivers/gpu/drm/amd/amdgpu/jpeg_v2_5.h index 3b0aa29b9879..1e858c6cdf13 100644 --- a/drivers/gpu/drm/amd/amdgpu/jpeg_v2_5.h +++ b/drivers/gpu/drm/amd/amdgpu/jpeg_v2_5.h @@ -24,6 +24,13 @@ #ifndef __JPEG_V2_5_H__ #define __JPEG_V2_5_H__ +enum amdgpu_jpeg_v2_6_sub_block { + AMDGPU_JPEG_V2_6_JPEG0 = 0, + AMDGPU_JPEG_V2_6_JPEG1, + + AMDGPU_JPEG_V2_6_MAX_SUB_BLOCK, +}; + extern const struct amdgpu_ip_block_version jpeg_v2_5_ip_block; extern const struct amdgpu_ip_block_version jpeg_v2_6_ip_block; diff --git a/drivers/gpu/drm/amd/amdgpu/mmhub_v1_0.c b/drivers/gpu/drm/amd/amdgpu/mmhub_v1_0.c index 4c9f0c0f3116..3f44a099c52a 100644 --- a/drivers/gpu/drm/amd/amdgpu/mmhub_v1_0.c +++ b/drivers/gpu/drm/amd/amdgpu/mmhub_v1_0.c @@ -546,7 +546,7 @@ static int mmhub_v1_0_set_clockgating(struct amdgpu_device *adev, return 0; } -static void mmhub_v1_0_get_clockgating(struct amdgpu_device *adev, u32 *flags) +static void mmhub_v1_0_get_clockgating(struct amdgpu_device *adev, u64 *flags) { int data, data1; diff --git a/drivers/gpu/drm/amd/amdgpu/mmhub_v1_7.c b/drivers/gpu/drm/amd/amdgpu/mmhub_v1_7.c index 3b901f941627..6fa7090bc6cb 100644 --- a/drivers/gpu/drm/amd/amdgpu/mmhub_v1_7.c +++ b/drivers/gpu/drm/amd/amdgpu/mmhub_v1_7.c @@ -542,7 +542,7 @@ static int mmhub_v1_7_set_clockgating(struct amdgpu_device *adev, return 0; } -static void mmhub_v1_7_get_clockgating(struct amdgpu_device *adev, u32 *flags) +static void mmhub_v1_7_get_clockgating(struct amdgpu_device *adev, u64 *flags) { int data, data1; diff --git a/drivers/gpu/drm/amd/amdgpu/mmhub_v2_0.c b/drivers/gpu/drm/amd/amdgpu/mmhub_v2_0.c index 3718ff610ab2..636abd855686 100644 --- a/drivers/gpu/drm/amd/amdgpu/mmhub_v2_0.c +++ b/drivers/gpu/drm/amd/amdgpu/mmhub_v2_0.c @@ -682,7 +682,7 @@ static int mmhub_v2_0_set_clockgating(struct amdgpu_device *adev, return 0; } -static void mmhub_v2_0_get_clockgating(struct amdgpu_device *adev, u32 *flags) +static void mmhub_v2_0_get_clockgating(struct amdgpu_device *adev, u64 *flags) { int data, data1; diff --git a/drivers/gpu/drm/amd/amdgpu/mmhub_v2_3.c b/drivers/gpu/drm/amd/amdgpu/mmhub_v2_3.c index 1957fb098c4d..ff44c5364a8c 100644 --- a/drivers/gpu/drm/amd/amdgpu/mmhub_v2_3.c +++ b/drivers/gpu/drm/amd/amdgpu/mmhub_v2_3.c @@ -577,7 +577,7 @@ static int mmhub_v2_3_set_clockgating(struct amdgpu_device *adev, return 0; } -static void mmhub_v2_3_get_clockgating(struct amdgpu_device *adev, u32 *flags) +static void mmhub_v2_3_get_clockgating(struct amdgpu_device *adev, u64 *flags) { int data, data1, data2, data3; diff --git a/drivers/gpu/drm/amd/amdgpu/mmhub_v9_4.c b/drivers/gpu/drm/amd/amdgpu/mmhub_v9_4.c index 619106f7d23d..6e0145b2b408 100644 --- a/drivers/gpu/drm/amd/amdgpu/mmhub_v9_4.c +++ b/drivers/gpu/drm/amd/amdgpu/mmhub_v9_4.c @@ -647,7 +647,7 @@ static int mmhub_v9_4_set_clockgating(struct amdgpu_device *adev, return 0; } -static void mmhub_v9_4_get_clockgating(struct amdgpu_device *adev, u32 *flags) +static void mmhub_v9_4_get_clockgating(struct amdgpu_device *adev, u64 *flags) { int data, data1; diff --git a/drivers/gpu/drm/amd/amdgpu/navi10_ih.c b/drivers/gpu/drm/amd/amdgpu/navi10_ih.c index 8ce5b8ca1fd7..97201ab0965e 100644 --- a/drivers/gpu/drm/amd/amdgpu/navi10_ih.c +++ b/drivers/gpu/drm/amd/amdgpu/navi10_ih.c @@ -685,7 +685,7 @@ static int navi10_ih_set_powergating_state(void *handle, return 0; } -static void navi10_ih_get_clockgating_state(void *handle, u32 *flags) +static void navi10_ih_get_clockgating_state(void *handle, u64 *flags) { struct amdgpu_device *adev = (struct amdgpu_device *)handle; diff --git a/drivers/gpu/drm/amd/amdgpu/nbio_v2_3.c b/drivers/gpu/drm/amd/amdgpu/nbio_v2_3.c index ee7cab37dfd5..6cd1fb2eb913 100644 --- a/drivers/gpu/drm/amd/amdgpu/nbio_v2_3.c +++ b/drivers/gpu/drm/amd/amdgpu/nbio_v2_3.c @@ -278,7 +278,7 @@ static void nbio_v2_3_update_medium_grain_light_sleep(struct amdgpu_device *adev } static void nbio_v2_3_get_clockgating_state(struct amdgpu_device *adev, - u32 *flags) + u64 *flags) { int data; diff --git a/drivers/gpu/drm/amd/amdgpu/nbio_v6_1.c b/drivers/gpu/drm/amd/amdgpu/nbio_v6_1.c index 4bbacf1be25a..f7f6ddebd3e4 100644 --- a/drivers/gpu/drm/amd/amdgpu/nbio_v6_1.c +++ b/drivers/gpu/drm/amd/amdgpu/nbio_v6_1.c @@ -210,7 +210,7 @@ static void nbio_v6_1_update_medium_grain_light_sleep(struct amdgpu_device *adev } static void nbio_v6_1_get_clockgating_state(struct amdgpu_device *adev, - u32 *flags) + u64 *flags) { int data; diff --git a/drivers/gpu/drm/amd/amdgpu/nbio_v7_0.c b/drivers/gpu/drm/amd/amdgpu/nbio_v7_0.c index 37a4039fdfc5..aa0326d00c72 100644 --- a/drivers/gpu/drm/amd/amdgpu/nbio_v7_0.c +++ b/drivers/gpu/drm/amd/amdgpu/nbio_v7_0.c @@ -205,7 +205,7 @@ static void nbio_v7_0_update_medium_grain_light_sleep(struct amdgpu_device *adev } static void nbio_v7_0_get_clockgating_state(struct amdgpu_device *adev, - u32 *flags) + u64 *flags) { int data; diff --git a/drivers/gpu/drm/amd/amdgpu/nbio_v7_2.c b/drivers/gpu/drm/amd/amdgpu/nbio_v7_2.c index 6f81de6f3cc4..31776b12e4c4 100644 --- a/drivers/gpu/drm/amd/amdgpu/nbio_v7_2.c +++ b/drivers/gpu/drm/amd/amdgpu/nbio_v7_2.c @@ -306,7 +306,7 @@ static void nbio_v7_2_update_medium_grain_light_sleep(struct amdgpu_device *adev } static void nbio_v7_2_get_clockgating_state(struct amdgpu_device *adev, - u32 *flags) + u64 *flags) { int data; diff --git a/drivers/gpu/drm/amd/amdgpu/nbio_v7_4.c b/drivers/gpu/drm/amd/amdgpu/nbio_v7_4.c index c2357e83a8c4..4531761dcf77 100644 --- a/drivers/gpu/drm/amd/amdgpu/nbio_v7_4.c +++ b/drivers/gpu/drm/amd/amdgpu/nbio_v7_4.c @@ -273,7 +273,7 @@ static void nbio_v7_4_update_medium_grain_light_sleep(struct amdgpu_device *adev } static void nbio_v7_4_get_clockgating_state(struct amdgpu_device *adev, - u32 *flags) + u64 *flags) { int data; diff --git a/drivers/gpu/drm/amd/amdgpu/nv.c b/drivers/gpu/drm/amd/amdgpu/nv.c index e19f14c3ef59..0a7946c59a42 100644 --- a/drivers/gpu/drm/amd/amdgpu/nv.c +++ b/drivers/gpu/drm/amd/amdgpu/nv.c @@ -1115,7 +1115,7 @@ static int nv_common_set_powergating_state(void *handle, return 0; } -static void nv_common_get_clockgating_state(void *handle, u32 *flags) +static void nv_common_get_clockgating_state(void *handle, u64 *flags) { struct amdgpu_device *adev = (struct amdgpu_device *)handle; diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v3_0.c b/drivers/gpu/drm/amd/amdgpu/sdma_v3_0.c index 4ef4feff5649..3695374896ed 100644 --- a/drivers/gpu/drm/amd/amdgpu/sdma_v3_0.c +++ b/drivers/gpu/drm/amd/amdgpu/sdma_v3_0.c @@ -1535,7 +1535,7 @@ static int sdma_v3_0_set_powergating_state(void *handle, return 0; } -static void sdma_v3_0_get_clockgating_state(void *handle, u32 *flags) +static void sdma_v3_0_get_clockgating_state(void *handle, u64 *flags) { struct amdgpu_device *adev = (struct amdgpu_device *)handle; int data; diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c b/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c index d7e8f7232364..8589ab1c9800 100644 --- a/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c +++ b/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c @@ -2372,7 +2372,7 @@ static int sdma_v4_0_set_powergating_state(void *handle, return 0; } -static void sdma_v4_0_get_clockgating_state(void *handle, u32 *flags) +static void sdma_v4_0_get_clockgating_state(void *handle, u64 *flags) { struct amdgpu_device *adev = (struct amdgpu_device *)handle; int data; diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v5_0.c b/drivers/gpu/drm/amd/amdgpu/sdma_v5_0.c index a8d49c005f73..775aabde1ae2 100644 --- a/drivers/gpu/drm/amd/amdgpu/sdma_v5_0.c +++ b/drivers/gpu/drm/amd/amdgpu/sdma_v5_0.c @@ -1648,7 +1648,7 @@ static int sdma_v5_0_set_powergating_state(void *handle, return 0; } -static void sdma_v5_0_get_clockgating_state(void *handle, u32 *flags) +static void sdma_v5_0_get_clockgating_state(void *handle, u64 *flags) { struct amdgpu_device *adev = (struct amdgpu_device *)handle; int data; diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v5_2.c b/drivers/gpu/drm/amd/amdgpu/sdma_v5_2.c index 824eace69884..ca50857b982d 100644 --- a/drivers/gpu/drm/amd/amdgpu/sdma_v5_2.c +++ b/drivers/gpu/drm/amd/amdgpu/sdma_v5_2.c @@ -1645,7 +1645,7 @@ static int sdma_v5_2_set_powergating_state(void *handle, return 0; } -static void sdma_v5_2_get_clockgating_state(void *handle, u32 *flags) +static void sdma_v5_2_get_clockgating_state(void *handle, u64 *flags) { struct amdgpu_device *adev = (struct amdgpu_device *)handle; int data; diff --git a/drivers/gpu/drm/amd/amdgpu/smuio_v11_0.c b/drivers/gpu/drm/amd/amdgpu/smuio_v11_0.c index b6f1322f908c..acdc40f99ab3 100644 --- a/drivers/gpu/drm/amd/amdgpu/smuio_v11_0.c +++ b/drivers/gpu/drm/amd/amdgpu/smuio_v11_0.c @@ -59,7 +59,7 @@ static void smuio_v11_0_update_rom_clock_gating(struct amdgpu_device *adev, bool WREG32_SOC15(SMUIO, 0, mmCGTT_ROM_CLK_CTRL0, data); } -static void smuio_v11_0_get_clock_gating_state(struct amdgpu_device *adev, u32 *flags) +static void smuio_v11_0_get_clock_gating_state(struct amdgpu_device *adev, u64 *flags) { u32 data; diff --git a/drivers/gpu/drm/amd/amdgpu/smuio_v11_0_6.c b/drivers/gpu/drm/amd/amdgpu/smuio_v11_0_6.c index 3a18dbb55c32..2afeb8b37f62 100644 --- a/drivers/gpu/drm/amd/amdgpu/smuio_v11_0_6.c +++ b/drivers/gpu/drm/amd/amdgpu/smuio_v11_0_6.c @@ -56,7 +56,7 @@ static void smuio_v11_0_6_update_rom_clock_gating(struct amdgpu_device *adev, bo WREG32_SOC15(SMUIO, 0, mmCGTT_ROM_CLK_CTRL0, data); } -static void smuio_v11_0_6_get_clock_gating_state(struct amdgpu_device *adev, u32 *flags) +static void smuio_v11_0_6_get_clock_gating_state(struct amdgpu_device *adev, u64 *flags) { u32 data; diff --git a/drivers/gpu/drm/amd/amdgpu/smuio_v13_0.c b/drivers/gpu/drm/amd/amdgpu/smuio_v13_0.c index 39b7c206770f..13e905c22592 100644 --- a/drivers/gpu/drm/amd/amdgpu/smuio_v13_0.c +++ b/drivers/gpu/drm/amd/amdgpu/smuio_v13_0.c @@ -58,7 +58,7 @@ static void smuio_v13_0_update_rom_clock_gating(struct amdgpu_device *adev, bool WREG32_SOC15(SMUIO, 0, regCGTT_ROM_CLK_CTRL0, data); } -static void smuio_v13_0_get_clock_gating_state(struct amdgpu_device *adev, u32 *flags) +static void smuio_v13_0_get_clock_gating_state(struct amdgpu_device *adev, u64 *flags) { u32 data; diff --git a/drivers/gpu/drm/amd/amdgpu/smuio_v9_0.c b/drivers/gpu/drm/amd/amdgpu/smuio_v9_0.c index 8417890af227..e4e30b9d481b 100644 --- a/drivers/gpu/drm/amd/amdgpu/smuio_v9_0.c +++ b/drivers/gpu/drm/amd/amdgpu/smuio_v9_0.c @@ -56,7 +56,7 @@ static void smuio_v9_0_update_rom_clock_gating(struct amdgpu_device *adev, bool WREG32_SOC15(SMUIO, 0, mmCGTT_ROM_CLK_CTRL0, data); } -static void smuio_v9_0_get_clock_gating_state(struct amdgpu_device *adev, u32 *flags) +static void smuio_v9_0_get_clock_gating_state(struct amdgpu_device *adev, u64 *flags) { u32 data; diff --git a/drivers/gpu/drm/amd/amdgpu/soc15.c b/drivers/gpu/drm/amd/amdgpu/soc15.c index 3d0251ef8d79..3ee7322081d2 100644 --- a/drivers/gpu/drm/amd/amdgpu/soc15.c +++ b/drivers/gpu/drm/amd/amdgpu/soc15.c @@ -1419,7 +1419,7 @@ static int soc15_common_set_clockgating_state(void *handle, return 0; } -static void soc15_common_get_clockgating_state(void *handle, u32 *flags) +static void soc15_common_get_clockgating_state(void *handle, u64 *flags) { struct amdgpu_device *adev = (struct amdgpu_device *)handle; int data; diff --git a/drivers/gpu/drm/amd/amdgpu/umc_v6_7.c b/drivers/gpu/drm/amd/amdgpu/umc_v6_7.c index c45d9c14ecbc..606892dbea1c 100644 --- a/drivers/gpu/drm/amd/amdgpu/umc_v6_7.c +++ b/drivers/gpu/drm/amd/amdgpu/umc_v6_7.c @@ -64,21 +64,62 @@ static inline uint32_t get_umc_v6_7_channel_index(struct amdgpu_device *adev, return adev->umc.channel_idx_tbl[umc_inst * adev->umc.channel_inst_num + ch_inst]; } +static void umc_v6_7_query_error_status_helper(struct amdgpu_device *adev, + uint64_t mc_umc_status, uint32_t umc_reg_offset) +{ + uint32_t mc_umc_addr; + uint64_t reg_value; + + if (REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, Deferred) == 1) + dev_info(adev->dev, "Deferred error, no user action is needed.\n"); + + if (mc_umc_status) + dev_info(adev->dev, "MCA STATUS 0x%llx, umc_reg_offset 0x%x\n", mc_umc_status, umc_reg_offset); + + /* print IPID registers value */ + mc_umc_addr = + SOC15_REG_OFFSET(UMC, 0, regMCA_UMC_UMC0_MCUMC_IPIDT0); + reg_value = RREG64_PCIE((mc_umc_addr + umc_reg_offset) * 4); + if (reg_value) + dev_info(adev->dev, "MCA IPID 0x%llx, umc_reg_offset 0x%x\n", reg_value, umc_reg_offset); + + /* print SYND registers value */ + mc_umc_addr = + SOC15_REG_OFFSET(UMC, 0, regMCA_UMC_UMC0_MCUMC_SYNDT0); + reg_value = RREG64_PCIE((mc_umc_addr + umc_reg_offset) * 4); + if (reg_value) + dev_info(adev->dev, "MCA SYND 0x%llx, umc_reg_offset 0x%x\n", reg_value, umc_reg_offset); + + /* print MISC0 registers value */ + mc_umc_addr = + SOC15_REG_OFFSET(UMC, 0, regMCA_UMC_UMC0_MCUMC_MISC0T0); + reg_value = RREG64_PCIE((mc_umc_addr + umc_reg_offset) * 4); + if (reg_value) + dev_info(adev->dev, "MCA MISC0 0x%llx, umc_reg_offset 0x%x\n", reg_value, umc_reg_offset); +} + static void umc_v6_7_ecc_info_query_correctable_error_count(struct amdgpu_device *adev, uint32_t umc_inst, uint32_t ch_inst, unsigned long *error_count) { uint64_t mc_umc_status; uint32_t eccinfo_table_idx; + uint32_t umc_reg_offset; struct amdgpu_ras *ras = amdgpu_ras_get_context(adev); + umc_reg_offset = get_umc_v6_7_reg_offset(adev, + umc_inst, ch_inst); + eccinfo_table_idx = umc_inst * adev->umc.channel_inst_num + ch_inst; /* check for SRAM correctable error MCUMC_STATUS is a 64 bit register */ mc_umc_status = ras->umc_ecc.ecc[eccinfo_table_idx].mca_umc_status; if (REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, Val) == 1 && - REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, CECC) == 1) + REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, CECC) == 1) { *error_count += 1; + + umc_v6_7_query_error_status_helper(adev, mc_umc_status, umc_reg_offset); + } } static void umc_v6_7_ecc_info_querry_uncorrectable_error_count(struct amdgpu_device *adev, @@ -88,8 +129,6 @@ static void umc_v6_7_ecc_info_querry_uncorrectable_error_count(struct amdgpu_dev uint64_t mc_umc_status; uint32_t eccinfo_table_idx; uint32_t umc_reg_offset; - uint32_t mc_umc_addr; - uint64_t reg_value; struct amdgpu_ras *ras = amdgpu_ras_get_context(adev); umc_reg_offset = get_umc_v6_7_reg_offset(adev, @@ -106,32 +145,7 @@ static void umc_v6_7_ecc_info_querry_uncorrectable_error_count(struct amdgpu_dev REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, TCC) == 1)) { *error_count += 1; - if (REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, Deferred) == 1) - dev_info(adev->dev, "Deferred error, no user action is needed.\n"); - - if (mc_umc_status) - dev_info(adev->dev, "MCA STATUS 0x%llx, umc_reg_offset 0x%x\n", mc_umc_status, umc_reg_offset); - - /* print IPID registers value */ - mc_umc_addr = - SOC15_REG_OFFSET(UMC, 0, regMCA_UMC_UMC0_MCUMC_IPIDT0); - reg_value = RREG64_PCIE((mc_umc_addr + umc_reg_offset) * 4); - if (reg_value) - dev_info(adev->dev, "MCA IPID 0x%llx, umc_reg_offset 0x%x\n", reg_value, umc_reg_offset); - - /* print SYND registers value */ - mc_umc_addr = - SOC15_REG_OFFSET(UMC, 0, regMCA_UMC_UMC0_MCUMC_SYNDT0); - reg_value = RREG64_PCIE((mc_umc_addr + umc_reg_offset) * 4); - if (reg_value) - dev_info(adev->dev, "MCA SYND 0x%llx, umc_reg_offset 0x%x\n", reg_value, umc_reg_offset); - - /* print MISC0 registers value */ - mc_umc_addr = - SOC15_REG_OFFSET(UMC, 0, regMCA_UMC_UMC0_MCUMC_MISC0T0); - reg_value = RREG64_PCIE((mc_umc_addr + umc_reg_offset) * 4); - if (reg_value) - dev_info(adev->dev, "MCA MISC0 0x%llx, umc_reg_offset 0x%x\n", reg_value, umc_reg_offset); + umc_v6_7_query_error_status_helper(adev, mc_umc_status, umc_reg_offset); } } @@ -277,8 +291,11 @@ static void umc_v6_7_query_correctable_error_count(struct amdgpu_device *adev, MCUMC_STATUS is a 64 bit register */ mc_umc_status = RREG64_PCIE((mc_umc_status_addr + umc_reg_offset) * 4); if (REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, Val) == 1 && - REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, CECC) == 1) + REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, CECC) == 1) { *error_count += 1; + + umc_v6_7_query_error_status_helper(adev, mc_umc_status, umc_reg_offset); + } } static void umc_v6_7_querry_uncorrectable_error_count(struct amdgpu_device *adev, @@ -287,8 +304,6 @@ static void umc_v6_7_querry_uncorrectable_error_count(struct amdgpu_device *adev { uint64_t mc_umc_status; uint32_t mc_umc_status_addr; - uint32_t mc_umc_addr; - uint64_t reg_value; mc_umc_status_addr = SOC15_REG_OFFSET(UMC, 0, regMCA_UMC_UMC0_MCUMC_STATUST0); @@ -303,32 +318,7 @@ static void umc_v6_7_querry_uncorrectable_error_count(struct amdgpu_device *adev REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, TCC) == 1)) { *error_count += 1; - if (REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, Deferred) == 1) - dev_info(adev->dev, "Deferred error, no user action is needed.\n"); - - if (mc_umc_status) - dev_info(adev->dev, "MCA STATUS 0x%llx, umc_reg_offset 0x%x\n", mc_umc_status, umc_reg_offset); - - /* print IPID registers value */ - mc_umc_addr = - SOC15_REG_OFFSET(UMC, 0, regMCA_UMC_UMC0_MCUMC_IPIDT0); - reg_value = RREG64_PCIE((mc_umc_addr + umc_reg_offset) * 4); - if (reg_value) - dev_info(adev->dev, "MCA IPID 0x%llx, umc_reg_offset 0x%x\n", reg_value, umc_reg_offset); - - /* print SYND registers value */ - mc_umc_addr = - SOC15_REG_OFFSET(UMC, 0, regMCA_UMC_UMC0_MCUMC_SYNDT0); - reg_value = RREG64_PCIE((mc_umc_addr + umc_reg_offset) * 4); - if (reg_value) - dev_info(adev->dev, "MCA SYND 0x%llx, umc_reg_offset 0x%x\n", reg_value, umc_reg_offset); - - /* print MISC0 registers value */ - mc_umc_addr = - SOC15_REG_OFFSET(UMC, 0, regMCA_UMC_UMC0_MCUMC_MISC0T0); - reg_value = RREG64_PCIE((mc_umc_addr + umc_reg_offset) * 4); - if (reg_value) - dev_info(adev->dev, "MCA MISC0 0x%llx, umc_reg_offset 0x%x\n", reg_value, umc_reg_offset); + umc_v6_7_query_error_status_helper(adev, mc_umc_status, umc_reg_offset); } } diff --git a/drivers/gpu/drm/amd/amdgpu/uvd_v5_0.c b/drivers/gpu/drm/amd/amdgpu/uvd_v5_0.c index 563493d1f830..d7e31e48a2b8 100644 --- a/drivers/gpu/drm/amd/amdgpu/uvd_v5_0.c +++ b/drivers/gpu/drm/amd/amdgpu/uvd_v5_0.c @@ -833,7 +833,7 @@ out: return ret; } -static void uvd_v5_0_get_clockgating_state(void *handle, u32 *flags) +static void uvd_v5_0_get_clockgating_state(void *handle, u64 *flags) { struct amdgpu_device *adev = (struct amdgpu_device *)handle; int data; diff --git a/drivers/gpu/drm/amd/amdgpu/uvd_v6_0.c b/drivers/gpu/drm/amd/amdgpu/uvd_v6_0.c index 2d558c2f417d..375c440957dc 100644 --- a/drivers/gpu/drm/amd/amdgpu/uvd_v6_0.c +++ b/drivers/gpu/drm/amd/amdgpu/uvd_v6_0.c @@ -1494,7 +1494,7 @@ out: return ret; } -static void uvd_v6_0_get_clockgating_state(void *handle, u32 *flags) +static void uvd_v6_0_get_clockgating_state(void *handle, u64 *flags) { struct amdgpu_device *adev = (struct amdgpu_device *)handle; int data; diff --git a/drivers/gpu/drm/amd/amdgpu/vce_v3_0.c b/drivers/gpu/drm/amd/amdgpu/vce_v3_0.c index 142e291983b4..8def62c83ffd 100644 --- a/drivers/gpu/drm/amd/amdgpu/vce_v3_0.c +++ b/drivers/gpu/drm/amd/amdgpu/vce_v3_0.c @@ -831,7 +831,7 @@ out: return ret; } -static void vce_v3_0_get_clockgating_state(void *handle, u32 *flags) +static void vce_v3_0_get_clockgating_state(void *handle, u64 *flags) { struct amdgpu_device *adev = (struct amdgpu_device *)handle; int data; diff --git a/drivers/gpu/drm/amd/amdgpu/vcn_v2_5.c b/drivers/gpu/drm/amd/amdgpu/vcn_v2_5.c index 1bf672966a62..17d44be58877 100644 --- a/drivers/gpu/drm/amd/amdgpu/vcn_v2_5.c +++ b/drivers/gpu/drm/amd/amdgpu/vcn_v2_5.c @@ -31,6 +31,7 @@ #include "soc15d.h" #include "vcn_v2_0.h" #include "mmsch_v1_0.h" +#include "vcn_v2_5.h" #include "vcn/vcn_2_5_offset.h" #include "vcn/vcn_2_5_sh_mask.h" @@ -59,6 +60,7 @@ static int vcn_v2_5_set_powergating_state(void *handle, static int vcn_v2_5_pause_dpg_mode(struct amdgpu_device *adev, int inst_idx, struct dpg_pause_state *new_state); static int vcn_v2_5_sriov_start(struct amdgpu_device *adev); +static void vcn_v2_5_set_ras_funcs(struct amdgpu_device *adev); static int amdgpu_ih_clientid_vcns[] = { SOC15_IH_CLIENTID_VCN, @@ -100,6 +102,7 @@ static int vcn_v2_5_early_init(void *handle) vcn_v2_5_set_dec_ring_funcs(adev); vcn_v2_5_set_enc_ring_funcs(adev); vcn_v2_5_set_irq_funcs(adev); + vcn_v2_5_set_ras_funcs(adev); return 0; } @@ -1932,3 +1935,71 @@ const struct amdgpu_ip_block_version vcn_v2_6_ip_block = .rev = 0, .funcs = &vcn_v2_6_ip_funcs, }; + +static uint32_t vcn_v2_6_query_poison_by_instance(struct amdgpu_device *adev, + uint32_t instance, uint32_t sub_block) +{ + uint32_t poison_stat = 0, reg_value = 0; + + switch (sub_block) { + case AMDGPU_VCN_V2_6_VCPU_VCODEC: + reg_value = RREG32_SOC15(VCN, instance, mmUVD_RAS_VCPU_VCODEC_STATUS); + poison_stat = REG_GET_FIELD(reg_value, UVD_RAS_VCPU_VCODEC_STATUS, POISONED_PF); + break; + default: + break; + } + + if (poison_stat) + dev_info(adev->dev, "Poison detected in VCN%d, sub_block%d\n", + instance, sub_block); + + return poison_stat; +} + +static bool vcn_v2_6_query_poison_status(struct amdgpu_device *adev) +{ + uint32_t inst, sub; + uint32_t poison_stat = 0; + + for (inst = 0; inst < adev->vcn.num_vcn_inst; inst++) + for (sub = 0; sub < AMDGPU_VCN_V2_6_MAX_SUB_BLOCK; sub++) + poison_stat += + vcn_v2_6_query_poison_by_instance(adev, inst, sub); + + return !!poison_stat; +} + +const struct amdgpu_ras_block_hw_ops vcn_v2_6_ras_hw_ops = { + .query_poison_status = vcn_v2_6_query_poison_status, +}; + +static struct amdgpu_vcn_ras vcn_v2_6_ras = { + .ras_block = { + .hw_ops = &vcn_v2_6_ras_hw_ops, + }, +}; + +static void vcn_v2_5_set_ras_funcs(struct amdgpu_device *adev) +{ + switch (adev->ip_versions[VCN_HWIP][0]) { + case IP_VERSION(2, 6, 0): + adev->vcn.ras = &vcn_v2_6_ras; + break; + default: + break; + } + + if (adev->vcn.ras) { + amdgpu_ras_register_ras_block(adev, &adev->vcn.ras->ras_block); + + strcpy(adev->vcn.ras->ras_block.ras_comm.name, "vcn"); + adev->vcn.ras->ras_block.ras_comm.block = AMDGPU_RAS_BLOCK__VCN; + adev->vcn.ras->ras_block.ras_comm.type = AMDGPU_RAS_ERROR__POISON; + adev->vcn.ras_if = &adev->vcn.ras->ras_block.ras_comm; + + /* If don't define special ras_late_init function, use default ras_late_init */ + if (!adev->vcn.ras->ras_block.ras_late_init) + adev->vcn.ras->ras_block.ras_late_init = amdgpu_ras_block_late_init; + } +} diff --git a/drivers/gpu/drm/amd/amdgpu/vcn_v2_5.h b/drivers/gpu/drm/amd/amdgpu/vcn_v2_5.h index e72f799ed0fd..1c19af74e4fd 100644 --- a/drivers/gpu/drm/amd/amdgpu/vcn_v2_5.h +++ b/drivers/gpu/drm/amd/amdgpu/vcn_v2_5.h @@ -24,6 +24,12 @@ #ifndef __VCN_V2_5_H__ #define __VCN_V2_5_H__ +enum amdgpu_vcn_v2_6_sub_block { + AMDGPU_VCN_V2_6_VCPU_VCODEC = 0, + + AMDGPU_VCN_V2_6_MAX_SUB_BLOCK, +}; + extern const struct amdgpu_ip_block_version vcn_v2_5_ip_block; extern const struct amdgpu_ip_block_version vcn_v2_6_ip_block; diff --git a/drivers/gpu/drm/amd/amdgpu/vi.c b/drivers/gpu/drm/amd/amdgpu/vi.c index 039b90cdc3bc..c5b88d15a6df 100644 --- a/drivers/gpu/drm/amd/amdgpu/vi.c +++ b/drivers/gpu/drm/amd/amdgpu/vi.c @@ -2033,7 +2033,7 @@ static int vi_common_set_powergating_state(void *handle, return 0; } -static void vi_common_get_clockgating_state(void *handle, u32 *flags) +static void vi_common_get_clockgating_state(void *handle, u64 *flags) { struct amdgpu_device *adev = (struct amdgpu_device *)handle; int data; diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c index 607f65ab39ac..60438193c0c1 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c @@ -1146,7 +1146,6 @@ static int kfd_ioctl_map_memory_to_gpu(struct file *filep, long err = 0; int i; uint32_t *devices_arr = NULL; - bool table_freed = false; if (!args->n_devices) { pr_debug("Device IDs array empty\n"); @@ -1208,7 +1207,7 @@ static int kfd_ioctl_map_memory_to_gpu(struct file *filep, err = amdgpu_amdkfd_gpuvm_map_memory_to_gpu( peer_pdd->dev->adev, (struct kgd_mem *)mem, - peer_pdd->drm_priv, &table_freed); + peer_pdd->drm_priv); if (err) { struct pci_dev *pdev = peer_pdd->dev->adev->pdev; @@ -1233,13 +1232,11 @@ static int kfd_ioctl_map_memory_to_gpu(struct file *filep, } /* Flush TLBs after waiting for the page table updates to complete */ - if (table_freed || !kfd_flush_tlb_after_unmap(dev)) { - for (i = 0; i < args->n_devices; i++) { - peer_pdd = kfd_process_device_data_by_id(p, devices_arr[i]); - if (WARN_ON_ONCE(!peer_pdd)) - continue; - kfd_flush_tlb(peer_pdd, TLB_FLUSH_LEGACY); - } + for (i = 0; i < args->n_devices; i++) { + peer_pdd = kfd_process_device_data_by_id(p, devices_arr[i]); + if (WARN_ON_ONCE(!peer_pdd)) + continue; + kfd_flush_tlb(peer_pdd, TLB_FLUSH_LEGACY); } kfree(devices_arr); @@ -2206,8 +2203,8 @@ static int criu_restore_bo(struct kfd_process *p, if (IS_ERR(peer_pdd)) return PTR_ERR(peer_pdd); - ret = amdgpu_amdkfd_gpuvm_map_memory_to_gpu(peer->adev, kgd_mem, peer_pdd->drm_priv, - NULL); + ret = amdgpu_amdkfd_gpuvm_map_memory_to_gpu(peer->adev, kgd_mem, + peer_pdd->drm_priv); if (ret) { pr_err("Failed to map to gpu %d/%d\n", j, p->n_pdds); return ret; diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_crat.c b/drivers/gpu/drm/amd/amdkfd/kfd_crat.c index 1eaabd2cb41b..afc8a7fcdad8 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_crat.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_crat.c @@ -1056,7 +1056,7 @@ static int kfd_parse_subtype_iolink(struct crat_subtype_iolink *iolink, * table, add corresponded reversed direction link now. */ if (props && (iolink->flags & CRAT_IOLINK_FLAGS_BI_DIRECTIONAL)) { - to_dev = kfd_topology_device_by_proximity_domain(id_to); + to_dev = kfd_topology_device_by_proximity_domain_no_lock(id_to); if (!to_dev) return -ENODEV; /* same everything but the other direction */ @@ -2225,7 +2225,7 @@ static int kfd_create_vcrat_image_gpu(void *pcrat_image, */ if (kdev->hive_id) { for (nid = 0; nid < proximity_domain; ++nid) { - peer_dev = kfd_topology_device_by_proximity_domain(nid); + peer_dev = kfd_topology_device_by_proximity_domain_no_lock(nid); if (!peer_dev->gpu) continue; if (peer_dev->gpu->hive_id != kdev->hive_id) diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_events.c b/drivers/gpu/drm/amd/amdkfd/kfd_events.c index 64f4a51cc880..6e5e8d637f48 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_events.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_events.c @@ -128,8 +128,8 @@ static int allocate_event_notification_slot(struct kfd_process *p, } /* - * Assumes that p->event_mutex is held and of course that p is not going - * away (current or locked). + * Assumes that p->event_mutex or rcu_readlock is held and of course that p is + * not going away. */ static struct kfd_event *lookup_event_by_id(struct kfd_process *p, uint32_t id) { @@ -251,16 +251,18 @@ static void destroy_event(struct kfd_process *p, struct kfd_event *ev) struct kfd_event_waiter *waiter; /* Wake up pending waiters. They will return failure */ + spin_lock(&ev->lock); list_for_each_entry(waiter, &ev->wq.head, wait.entry) - waiter->event = NULL; + WRITE_ONCE(waiter->event, NULL); wake_up_all(&ev->wq); + spin_unlock(&ev->lock); if (ev->type == KFD_EVENT_TYPE_SIGNAL || ev->type == KFD_EVENT_TYPE_DEBUG) p->signal_event_count--; idr_remove(&p->event_idr, ev->event_id); - kfree(ev); + kfree_rcu(ev, rcu); } static void destroy_events(struct kfd_process *p) @@ -392,6 +394,7 @@ int kfd_event_create(struct file *devkfd, struct kfd_process *p, ev->auto_reset = auto_reset; ev->signaled = false; + spin_lock_init(&ev->lock); init_waitqueue_head(&ev->wq); *event_page_offset = 0; @@ -466,6 +469,7 @@ int kfd_criu_restore_event(struct file *devkfd, ev->auto_reset = ev_priv->auto_reset; ev->signaled = ev_priv->signaled; + spin_lock_init(&ev->lock); init_waitqueue_head(&ev->wq); mutex_lock(&p->event_mutex); @@ -609,13 +613,13 @@ static void set_event(struct kfd_event *ev) /* Auto reset if the list is non-empty and we're waking * someone. waitqueue_active is safe here because we're - * protected by the p->event_mutex, which is also held when + * protected by the ev->lock, which is also held when * updating the wait queues in kfd_wait_on_events. */ ev->signaled = !ev->auto_reset || !waitqueue_active(&ev->wq); list_for_each_entry(waiter, &ev->wq.head, wait.entry) - waiter->activated = true; + WRITE_ONCE(waiter->activated, true); wake_up_all(&ev->wq); } @@ -626,16 +630,23 @@ int kfd_set_event(struct kfd_process *p, uint32_t event_id) int ret = 0; struct kfd_event *ev; - mutex_lock(&p->event_mutex); + rcu_read_lock(); ev = lookup_event_by_id(p, event_id); + if (!ev) { + ret = -EINVAL; + goto unlock_rcu; + } + spin_lock(&ev->lock); - if (ev && event_can_be_cpu_signaled(ev)) + if (event_can_be_cpu_signaled(ev)) set_event(ev); else ret = -EINVAL; - mutex_unlock(&p->event_mutex); + spin_unlock(&ev->lock); +unlock_rcu: + rcu_read_unlock(); return ret; } @@ -650,23 +661,30 @@ int kfd_reset_event(struct kfd_process *p, uint32_t event_id) int ret = 0; struct kfd_event *ev; - mutex_lock(&p->event_mutex); + rcu_read_lock(); ev = lookup_event_by_id(p, event_id); + if (!ev) { + ret = -EINVAL; + goto unlock_rcu; + } + spin_lock(&ev->lock); - if (ev && event_can_be_cpu_signaled(ev)) + if (event_can_be_cpu_signaled(ev)) reset_event(ev); else ret = -EINVAL; - mutex_unlock(&p->event_mutex); + spin_unlock(&ev->lock); +unlock_rcu: + rcu_read_unlock(); return ret; } static void acknowledge_signal(struct kfd_process *p, struct kfd_event *ev) { - page_slots(p->signal_page)[ev->event_id] = UNSIGNALED_EVENT_SLOT; + WRITE_ONCE(page_slots(p->signal_page)[ev->event_id], UNSIGNALED_EVENT_SLOT); } static void set_event_from_interrupt(struct kfd_process *p, @@ -674,7 +692,9 @@ static void set_event_from_interrupt(struct kfd_process *p, { if (ev && event_can_be_gpu_signaled(ev)) { acknowledge_signal(p, ev); + spin_lock(&ev->lock); set_event(ev); + spin_unlock(&ev->lock); } } @@ -693,7 +713,7 @@ void kfd_signal_event_interrupt(u32 pasid, uint32_t partial_id, if (!p) return; /* Presumably process exited. */ - mutex_lock(&p->event_mutex); + rcu_read_lock(); if (valid_id_bits) ev = lookup_signaled_event_by_partial_id(p, partial_id, @@ -721,7 +741,7 @@ void kfd_signal_event_interrupt(u32 pasid, uint32_t partial_id, if (id >= KFD_SIGNAL_EVENT_LIMIT) break; - if (slots[id] != UNSIGNALED_EVENT_SLOT) + if (READ_ONCE(slots[id]) != UNSIGNALED_EVENT_SLOT) set_event_from_interrupt(p, ev); } } else { @@ -730,14 +750,14 @@ void kfd_signal_event_interrupt(u32 pasid, uint32_t partial_id, * only signaled events from the IDR. */ for (id = 0; id < KFD_SIGNAL_EVENT_LIMIT; id++) - if (slots[id] != UNSIGNALED_EVENT_SLOT) { + if (READ_ONCE(slots[id]) != UNSIGNALED_EVENT_SLOT) { ev = lookup_event_by_id(p, id); set_event_from_interrupt(p, ev); } } } - mutex_unlock(&p->event_mutex); + rcu_read_unlock(); kfd_unref_process(p); } @@ -760,7 +780,7 @@ static struct kfd_event_waiter *alloc_event_waiters(uint32_t num_events) return event_waiters; } -static int init_event_waiter_get_status(struct kfd_process *p, +static int init_event_waiter(struct kfd_process *p, struct kfd_event_waiter *waiter, uint32_t event_id) { @@ -769,22 +789,15 @@ static int init_event_waiter_get_status(struct kfd_process *p, if (!ev) return -EINVAL; + spin_lock(&ev->lock); waiter->event = ev; waiter->activated = ev->signaled; ev->signaled = ev->signaled && !ev->auto_reset; - - return 0; -} - -static void init_event_waiter_add_to_waitlist(struct kfd_event_waiter *waiter) -{ - struct kfd_event *ev = waiter->event; - - /* Only add to the wait list if we actually need to - * wait on this event. - */ if (!waiter->activated) add_wait_queue(&ev->wq, &waiter->wait); + spin_unlock(&ev->lock); + + return 0; } /* test_event_condition - Test condition of events being waited for @@ -804,10 +817,10 @@ static uint32_t test_event_condition(bool all, uint32_t num_events, uint32_t activated_count = 0; for (i = 0; i < num_events; i++) { - if (!event_waiters[i].event) + if (!READ_ONCE(event_waiters[i].event)) return KFD_IOC_WAIT_RESULT_FAIL; - if (event_waiters[i].activated) { + if (READ_ONCE(event_waiters[i].activated)) { if (!all) return KFD_IOC_WAIT_RESULT_COMPLETE; @@ -836,6 +849,8 @@ static int copy_signaled_event_data(uint32_t num_events, for (i = 0; i < num_events; i++) { waiter = &event_waiters[i]; event = waiter->event; + if (!event) + return -EINVAL; /* event was destroyed */ if (waiter->activated && event->type == KFD_EVENT_TYPE_MEMORY) { dst = &data[i].memory_exception_data; src = &event->memory_exception_data; @@ -846,11 +861,8 @@ static int copy_signaled_event_data(uint32_t num_events, } return 0; - } - - static long user_timeout_to_jiffies(uint32_t user_timeout_ms) { if (user_timeout_ms == KFD_EVENT_TIMEOUT_IMMEDIATE) @@ -874,9 +886,12 @@ static void free_waiters(uint32_t num_events, struct kfd_event_waiter *waiters) uint32_t i; for (i = 0; i < num_events; i++) - if (waiters[i].event) + if (waiters[i].event) { + spin_lock(&waiters[i].event->lock); remove_wait_queue(&waiters[i].event->wq, &waiters[i].wait); + spin_unlock(&waiters[i].event->lock); + } kfree(waiters); } @@ -900,6 +915,9 @@ int kfd_wait_on_events(struct kfd_process *p, goto out; } + /* Use p->event_mutex here to protect against concurrent creation and + * destruction of events while we initialize event_waiters. + */ mutex_lock(&p->event_mutex); for (i = 0; i < num_events; i++) { @@ -911,8 +929,8 @@ int kfd_wait_on_events(struct kfd_process *p, goto out_unlock; } - ret = init_event_waiter_get_status(p, &event_waiters[i], - event_data.event_id); + ret = init_event_waiter(p, &event_waiters[i], + event_data.event_id); if (ret) goto out_unlock; } @@ -930,10 +948,6 @@ int kfd_wait_on_events(struct kfd_process *p, goto out_unlock; } - /* Add to wait lists if we need to wait. */ - for (i = 0; i < num_events; i++) - init_event_waiter_add_to_waitlist(&event_waiters[i]); - mutex_unlock(&p->event_mutex); while (true) { @@ -978,14 +992,19 @@ int kfd_wait_on_events(struct kfd_process *p, } __set_current_state(TASK_RUNNING); + mutex_lock(&p->event_mutex); /* copy_signaled_event_data may sleep. So this has to happen * after the task state is set back to RUNNING. + * + * The event may also have been destroyed after signaling. So + * copy_signaled_event_data also must confirm that the event + * still exists. Therefore this must be under the p->event_mutex + * which is also held when events are destroyed. */ if (!ret && *wait_result == KFD_IOC_WAIT_RESULT_COMPLETE) ret = copy_signaled_event_data(num_events, event_waiters, events); - mutex_lock(&p->event_mutex); out_unlock: free_waiters(num_events, event_waiters); mutex_unlock(&p->event_mutex); @@ -1044,8 +1063,7 @@ int kfd_event_mmap(struct kfd_process *p, struct vm_area_struct *vma) } /* - * Assumes that p->event_mutex is held and of course - * that p is not going away (current or locked). + * Assumes that p is not going away. */ static void lookup_events_by_type_and_signal(struct kfd_process *p, int type, void *event_data) @@ -1057,6 +1075,8 @@ static void lookup_events_by_type_and_signal(struct kfd_process *p, ev_data = (struct kfd_hsa_memory_exception_data *) event_data; + rcu_read_lock(); + id = KFD_FIRST_NONSIGNAL_EVENT_ID; idr_for_each_entry_continue(&p->event_idr, ev, id) if (ev->type == type) { @@ -1064,9 +1084,11 @@ static void lookup_events_by_type_and_signal(struct kfd_process *p, dev_dbg(kfd_device, "Event found: id %X type %d", ev->event_id, ev->type); + spin_lock(&ev->lock); set_event(ev); if (ev->type == KFD_EVENT_TYPE_MEMORY && ev_data) ev->memory_exception_data = *ev_data; + spin_unlock(&ev->lock); } if (type == KFD_EVENT_TYPE_MEMORY) { @@ -1089,6 +1111,8 @@ static void lookup_events_by_type_and_signal(struct kfd_process *p, p->lead_thread->pid, p->pasid); } } + + rcu_read_unlock(); } #ifdef KFD_SUPPORT_IOMMU_V2 @@ -1164,16 +1188,10 @@ void kfd_signal_iommu_event(struct kfd_dev *dev, u32 pasid, if (KFD_GC_VERSION(dev) != IP_VERSION(9, 1, 0) && KFD_GC_VERSION(dev) != IP_VERSION(9, 2, 2) && - KFD_GC_VERSION(dev) != IP_VERSION(9, 3, 0)) { - mutex_lock(&p->event_mutex); - - /* Lookup events by type and signal them */ + KFD_GC_VERSION(dev) != IP_VERSION(9, 3, 0)) lookup_events_by_type_and_signal(p, KFD_EVENT_TYPE_MEMORY, &memory_exception_data); - mutex_unlock(&p->event_mutex); - } - kfd_unref_process(p); } #endif /* KFD_SUPPORT_IOMMU_V2 */ @@ -1190,12 +1208,7 @@ void kfd_signal_hw_exception_event(u32 pasid) if (!p) return; /* Presumably process exited. */ - mutex_lock(&p->event_mutex); - - /* Lookup events by type and signal them */ lookup_events_by_type_and_signal(p, KFD_EVENT_TYPE_HW_EXCEPTION, NULL); - - mutex_unlock(&p->event_mutex); kfd_unref_process(p); } @@ -1231,16 +1244,19 @@ void kfd_signal_vm_fault_event(struct kfd_dev *dev, u32 pasid, info->prot_write ? 1 : 0; memory_exception_data.failure.imprecise = 0; } - mutex_lock(&p->event_mutex); + + rcu_read_lock(); id = KFD_FIRST_NONSIGNAL_EVENT_ID; idr_for_each_entry_continue(&p->event_idr, ev, id) if (ev->type == KFD_EVENT_TYPE_MEMORY) { + spin_lock(&ev->lock); ev->memory_exception_data = memory_exception_data; set_event(ev); + spin_unlock(&ev->lock); } - mutex_unlock(&p->event_mutex); + rcu_read_unlock(); kfd_unref_process(p); } @@ -1274,22 +1290,28 @@ void kfd_signal_reset_event(struct kfd_dev *dev) continue; } - mutex_lock(&p->event_mutex); + rcu_read_lock(); + id = KFD_FIRST_NONSIGNAL_EVENT_ID; idr_for_each_entry_continue(&p->event_idr, ev, id) { if (ev->type == KFD_EVENT_TYPE_HW_EXCEPTION) { + spin_lock(&ev->lock); ev->hw_exception_data = hw_exception_data; ev->hw_exception_data.gpu_id = user_gpu_id; set_event(ev); + spin_unlock(&ev->lock); } if (ev->type == KFD_EVENT_TYPE_MEMORY && reset_cause == KFD_HW_EXCEPTION_ECC) { + spin_lock(&ev->lock); ev->memory_exception_data = memory_exception_data; ev->memory_exception_data.gpu_id = user_gpu_id; set_event(ev); + spin_unlock(&ev->lock); } } - mutex_unlock(&p->event_mutex); + + rcu_read_unlock(); } srcu_read_unlock(&kfd_processes_srcu, idx); } @@ -1322,19 +1344,25 @@ void kfd_signal_poison_consumed_event(struct kfd_dev *dev, u32 pasid) memory_exception_data.gpu_id = user_gpu_id; memory_exception_data.failure.imprecise = true; - mutex_lock(&p->event_mutex); + rcu_read_lock(); + idr_for_each_entry_continue(&p->event_idr, ev, id) { if (ev->type == KFD_EVENT_TYPE_HW_EXCEPTION) { + spin_lock(&ev->lock); ev->hw_exception_data = hw_exception_data; set_event(ev); + spin_unlock(&ev->lock); } if (ev->type == KFD_EVENT_TYPE_MEMORY) { + spin_lock(&ev->lock); ev->memory_exception_data = memory_exception_data; set_event(ev); + spin_unlock(&ev->lock); } } - mutex_unlock(&p->event_mutex); + + rcu_read_unlock(); /* user application will handle SIGBUS signal */ send_sig(SIGBUS, p->lead_thread, 0); diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_events.h b/drivers/gpu/drm/amd/amdkfd/kfd_events.h index 1238af11916e..1c62c8dd6460 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_events.h +++ b/drivers/gpu/drm/amd/amdkfd/kfd_events.h @@ -59,6 +59,7 @@ struct kfd_event { int type; + spinlock_t lock; wait_queue_head_t wq; /* List of event waiters. */ /* Only for signal events. */ @@ -69,6 +70,8 @@ struct kfd_event { struct kfd_hsa_memory_exception_data memory_exception_data; struct kfd_hsa_hw_exception_data hw_exception_data; }; + + struct rcu_head rcu; /* for asynchronous kfree_rcu */ }; #define KFD_EVENT_TIMEOUT_IMMEDIATE 0 diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_int_process_v9.c b/drivers/gpu/drm/amd/amdkfd/kfd_int_process_v9.c index 7eedbcd14828..03c29bdd89a1 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_int_process_v9.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_int_process_v9.c @@ -91,28 +91,34 @@ enum SQ_INTERRUPT_ERROR_TYPE { #define KFD_SQ_INT_DATA__ERR_TYPE__SHIFT 20 static void event_interrupt_poison_consumption(struct kfd_dev *dev, - uint16_t pasid, uint16_t source_id) + uint16_t pasid, uint16_t client_id) { - int ret = -EINVAL; + int old_poison, ret = -EINVAL; struct kfd_process *p = kfd_lookup_process_by_pasid(pasid); if (!p) return; /* all queues of a process will be unmapped in one time */ - if (atomic_read(&p->poison)) { - kfd_unref_process(p); - return; - } - - atomic_set(&p->poison, 1); + old_poison = atomic_cmpxchg(&p->poison, 0, 1); kfd_unref_process(p); + if (old_poison) + return; - switch (source_id) { - case SOC15_INTSRC_SQ_INTERRUPT_MSG: + switch (client_id) { + case SOC15_IH_CLIENTID_SE0SH: + case SOC15_IH_CLIENTID_SE1SH: + case SOC15_IH_CLIENTID_SE2SH: + case SOC15_IH_CLIENTID_SE3SH: + case SOC15_IH_CLIENTID_UTCL2: ret = kfd_dqm_evict_pasid(dev->dqm, pasid); break; - case SOC15_INTSRC_SDMA_ECC: + case SOC15_IH_CLIENTID_SDMA0: + case SOC15_IH_CLIENTID_SDMA1: + case SOC15_IH_CLIENTID_SDMA2: + case SOC15_IH_CLIENTID_SDMA3: + case SOC15_IH_CLIENTID_SDMA4: + break; default: break; } @@ -122,10 +128,17 @@ static void event_interrupt_poison_consumption(struct kfd_dev *dev, /* resetting queue passes, do page retirement without gpu reset * resetting queue fails, fallback to gpu reset solution */ - if (!ret) + if (!ret) { + dev_warn(dev->adev->dev, + "RAS poison consumption, unmap queue flow succeeded: client id %d\n", + client_id); amdgpu_amdkfd_ras_poison_consumption_handler(dev->adev, false); - else + } else { + dev_warn(dev->adev->dev, + "RAS poison consumption, fall back to gpu reset flow: client id %d\n", + client_id); amdgpu_amdkfd_ras_poison_consumption_handler(dev->adev, true); + } } static bool event_interrupt_isr_v9(struct kfd_dev *dev, @@ -270,7 +283,7 @@ static void event_interrupt_wq_v9(struct kfd_dev *dev, sq_intr_err); if (sq_intr_err != SQ_INTERRUPT_ERROR_TYPE_ILLEGAL_INST && sq_intr_err != SQ_INTERRUPT_ERROR_TYPE_MEMVIOL) { - event_interrupt_poison_consumption(dev, pasid, source_id); + event_interrupt_poison_consumption(dev, pasid, client_id); return; } break; @@ -291,7 +304,7 @@ static void event_interrupt_wq_v9(struct kfd_dev *dev, if (source_id == SOC15_INTSRC_SDMA_TRAP) { kfd_signal_event_interrupt(pasid, context_id0 & 0xfffffff, 28); } else if (source_id == SOC15_INTSRC_SDMA_ECC) { - event_interrupt_poison_consumption(dev, pasid, source_id); + event_interrupt_poison_consumption(dev, pasid, client_id); return; } } else if (client_id == SOC15_IH_CLIENTID_VMC || @@ -300,6 +313,12 @@ static void event_interrupt_wq_v9(struct kfd_dev *dev, struct kfd_vm_fault_info info = {0}; uint16_t ring_id = SOC15_RING_ID_FROM_IH_ENTRY(ih_ring_entry); + if (client_id == SOC15_IH_CLIENTID_UTCL2 && + amdgpu_amdkfd_ras_query_utcl2_poison_status(dev->adev)) { + event_interrupt_poison_consumption(dev, pasid, client_id); + return; + } + info.vmid = vmid; info.mc_id = client_id; info.page_addr = ih_ring_entry[4] | diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_interrupt.c b/drivers/gpu/drm/amd/amdkfd/kfd_interrupt.c index 9178cfe34f20..a9466d154395 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_interrupt.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_interrupt.c @@ -146,15 +146,24 @@ static void interrupt_wq(struct work_struct *work) struct kfd_dev *dev = container_of(work, struct kfd_dev, interrupt_work); uint32_t ih_ring_entry[KFD_MAX_RING_ENTRY_SIZE]; + long start_jiffies = jiffies; if (dev->device_info.ih_ring_entry_size > sizeof(ih_ring_entry)) { dev_err_once(dev->adev->dev, "Ring entry too small\n"); return; } - while (dequeue_ih_ring_entry(dev, ih_ring_entry)) + while (dequeue_ih_ring_entry(dev, ih_ring_entry)) { dev->device_info.event_interrupt_class->interrupt_wq(dev, ih_ring_entry); + if (jiffies - start_jiffies > HZ) { + /* If we spent more than a second processing signals, + * reschedule the worker to avoid soft-lockup warnings + */ + queue_work(dev->ih_wq, &dev->interrupt_work); + break; + } + } } bool interrupt_is_wanted(struct kfd_dev *dev, diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h index 9967a73d5b0f..644f616b776f 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h +++ b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h @@ -706,6 +706,7 @@ struct kfd_process_device { /* VM context for GPUVM allocations */ struct file *drm_file; void *drm_priv; + atomic64_t tlb_seq; /* GPUVM allocations storage */ struct idr alloc_idr; @@ -1016,6 +1017,8 @@ int kfd_topology_add_device(struct kfd_dev *gpu); int kfd_topology_remove_device(struct kfd_dev *gpu); struct kfd_topology_device *kfd_topology_device_by_proximity_domain( uint32_t proximity_domain); +struct kfd_topology_device *kfd_topology_device_by_proximity_domain_no_lock( + uint32_t proximity_domain); struct kfd_topology_device *kfd_topology_device_by_id(uint32_t gpu_id); struct kfd_dev *kfd_device_by_id(uint32_t gpu_id); struct kfd_dev *kfd_device_by_pci_dev(const struct pci_dev *pdev); diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_process.c b/drivers/gpu/drm/amd/amdkfd/kfd_process.c index 59c04b2d383b..9e82d7aa67fa 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_process.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_process.c @@ -722,7 +722,7 @@ static int kfd_process_alloc_gpuvm(struct kfd_process_device *pdd, goto err_alloc_mem; err = amdgpu_amdkfd_gpuvm_map_memory_to_gpu(kdev->adev, *mem, - pdd->drm_priv, NULL); + pdd->drm_priv); if (err) goto err_map_mem; @@ -1560,6 +1560,7 @@ int kfd_process_device_init_vm(struct kfd_process_device *pdd, return ret; } pdd->drm_priv = drm_file->private_data; + atomic64_set(&pdd->tlb_seq, 0); ret = kfd_process_device_reserve_ib_mem(pdd); if (ret) @@ -1949,8 +1950,18 @@ int kfd_reserved_mem_mmap(struct kfd_dev *dev, struct kfd_process *process, void kfd_flush_tlb(struct kfd_process_device *pdd, enum TLB_FLUSH_TYPE type) { + struct amdgpu_vm *vm = drm_priv_to_vm(pdd->drm_priv); + uint64_t tlb_seq = amdgpu_vm_tlb_seq(vm); struct kfd_dev *dev = pdd->dev; + /* + * It can be that we race and lose here, but that is extremely unlikely + * and the worst thing which could happen is that we flush the changes + * into the TLB once more which is harmless. + */ + if (atomic64_xchg(&pdd->tlb_seq, tlb_seq) == tlb_seq) + return; + if (dev->dqm->sched_policy == KFD_SCHED_POLICY_NO_HWS) { /* Nothing to flush until a VMID is assigned, which * only happens when the first queue is created. diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_svm.c b/drivers/gpu/drm/amd/amdkfd/kfd_svm.c index b3fc3e958227..11b395b90a3d 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_svm.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_svm.c @@ -1188,9 +1188,9 @@ svm_range_unmap_from_gpu(struct amdgpu_device *adev, struct amdgpu_vm *vm, pr_debug("[0x%llx 0x%llx]\n", start, last); - return amdgpu_vm_bo_update_mapping(adev, adev, vm, false, true, NULL, - start, last, init_pte_value, 0, - NULL, NULL, fence, NULL); + return amdgpu_vm_update_range(adev, vm, false, true, true, NULL, start, + last, init_pte_value, 0, 0, NULL, NULL, + fence); } static int @@ -1243,7 +1243,6 @@ svm_range_map_to_gpu(struct kfd_process_device *pdd, struct svm_range *prange, { struct amdgpu_device *adev = pdd->dev->adev; struct amdgpu_vm *vm = drm_priv_to_vm(pdd->drm_priv); - bool table_freed = false; uint64_t pte_flags; unsigned long last_start; int last_domain; @@ -1278,13 +1277,12 @@ svm_range_map_to_gpu(struct kfd_process_device *pdd, struct svm_range *prange, (last_domain == SVM_RANGE_VRAM_DOMAIN) ? 1 : 0, pte_flags); - r = amdgpu_vm_bo_update_mapping(adev, bo_adev, vm, false, false, - NULL, last_start, - prange->start + i, pte_flags, - last_start - prange->start, - NULL, dma_addr, - &vm->last_update, - &table_freed); + r = amdgpu_vm_update_range(adev, vm, false, false, false, NULL, + last_start, prange->start + i, + pte_flags, + last_start - prange->start, + bo_adev ? bo_adev->vm_manager.vram_base_offset : 0, + NULL, dma_addr, &vm->last_update); for (j = last_start - prange->start; j <= i; j++) dma_addr[j] |= last_domain; @@ -1306,8 +1304,6 @@ svm_range_map_to_gpu(struct kfd_process_device *pdd, struct svm_range *prange, if (fence) *fence = dma_fence_get(vm->last_update); - if (table_freed) - kfd_flush_tlb(pdd, TLB_FLUSH_LEGACY); out: return r; } @@ -1363,6 +1359,8 @@ svm_range_map_to_gpus(struct svm_range *prange, unsigned long offset, break; } } + + kfd_flush_tlb(pdd, TLB_FLUSH_LEGACY); } return r; @@ -1372,7 +1370,7 @@ struct svm_validate_context { struct kfd_process *process; struct svm_range *prange; bool intr; - unsigned long bitmap[MAX_GPU_INSTANCE]; + DECLARE_BITMAP(bitmap, MAX_GPU_INSTANCE); struct ttm_validate_buffer tv[MAX_GPU_INSTANCE]; struct list_head validate_list; struct ww_acquire_ctx ticket; @@ -2687,11 +2685,6 @@ svm_range_restore_pages(struct amdgpu_device *adev, unsigned int pasid, pr_debug("kfd process not founded pasid 0x%x\n", pasid); return 0; } - if (!p->xnack_enabled) { - pr_debug("XNACK not enabled for pasid 0x%x\n", pasid); - r = -EFAULT; - goto out; - } svms = &p->svms; pr_debug("restoring svms 0x%p fault address 0x%llx\n", svms, addr); @@ -2702,6 +2695,12 @@ svm_range_restore_pages(struct amdgpu_device *adev, unsigned int pasid, goto out; } + if (!p->xnack_enabled) { + pr_debug("XNACK not enabled for pasid 0x%x\n", pasid); + r = -EFAULT; + goto out; + } + /* p->lead_thread is available as kfd_process_wq_release flush the work * before releasing task ref. */ diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_topology.c b/drivers/gpu/drm/amd/amdkfd/kfd_topology.c index 3bdcae239bc0..8b7710b4d3ed 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_topology.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_topology.c @@ -46,22 +46,32 @@ static struct list_head topology_device_list; static struct kfd_system_properties sys_props; static DECLARE_RWSEM(topology_lock); -static atomic_t topology_crat_proximity_domain; +static uint32_t topology_crat_proximity_domain; -struct kfd_topology_device *kfd_topology_device_by_proximity_domain( +struct kfd_topology_device *kfd_topology_device_by_proximity_domain_no_lock( uint32_t proximity_domain) { struct kfd_topology_device *top_dev; struct kfd_topology_device *device = NULL; - down_read(&topology_lock); - list_for_each_entry(top_dev, &topology_device_list, list) if (top_dev->proximity_domain == proximity_domain) { device = top_dev; break; } + return device; +} + +struct kfd_topology_device *kfd_topology_device_by_proximity_domain( + uint32_t proximity_domain) +{ + struct kfd_topology_device *device = NULL; + + down_read(&topology_lock); + + device = kfd_topology_device_by_proximity_domain_no_lock( + proximity_domain); up_read(&topology_lock); return device; @@ -1060,7 +1070,7 @@ int kfd_topology_init(void) down_write(&topology_lock); kfd_topology_update_device_list(&temp_topology_device_list, &topology_device_list); - atomic_set(&topology_crat_proximity_domain, sys_props.num_devices-1); + topology_crat_proximity_domain = sys_props.num_devices-1; ret = kfd_topology_update_sysfs(); up_write(&topology_lock); @@ -1295,8 +1305,6 @@ int kfd_topology_add_device(struct kfd_dev *gpu) pr_debug("Adding new GPU (ID: 0x%x) to topology\n", gpu_id); - proximity_domain = atomic_inc_return(&topology_crat_proximity_domain); - /* Include the CPU in xGMI hive if xGMI connected by assigning it the hive ID. */ if (gpu->hive_id && gpu->adev->gmc.xgmi.connected_to_cpu) { struct kfd_topology_device *top_dev; @@ -1321,12 +1329,16 @@ int kfd_topology_add_device(struct kfd_dev *gpu) */ dev = kfd_assign_gpu(gpu); if (!dev) { + down_write(&topology_lock); + proximity_domain = ++topology_crat_proximity_domain; + res = kfd_create_crat_image_virtual(&crat_image, &image_size, COMPUTE_UNIT_GPU, gpu, proximity_domain); if (res) { pr_err("Error creating VCRAT for GPU (ID: 0x%x)\n", gpu_id); + topology_crat_proximity_domain--; return res; } res = kfd_parse_crat_table(crat_image, @@ -1335,10 +1347,10 @@ int kfd_topology_add_device(struct kfd_dev *gpu) if (res) { pr_err("Error parsing VCRAT for GPU (ID: 0x%x)\n", gpu_id); + topology_crat_proximity_domain--; goto err; } - down_write(&topology_lock); kfd_topology_update_device_list(&temp_topology_device_list, &topology_device_list); @@ -1485,25 +1497,78 @@ err: return res; } +/** + * kfd_topology_update_io_links() - Update IO links after device removal. + * @proximity_domain: Proximity domain value of the dev being removed. + * + * The topology list currently is arranged in increasing order of + * proximity domain. + * + * Two things need to be done when a device is removed: + * 1. All the IO links to this device need to be removed. + * 2. All nodes after the current device node need to move + * up once this device node is removed from the topology + * list. As a result, the proximity domain values for + * all nodes after the node being deleted reduce by 1. + * This would also cause the proximity domain values for + * io links to be updated based on new proximity domain + * values. + * + * Context: The caller must hold write topology_lock. + */ +static void kfd_topology_update_io_links(int proximity_domain) +{ + struct kfd_topology_device *dev; + struct kfd_iolink_properties *iolink, *tmp; + + list_for_each_entry(dev, &topology_device_list, list) { + if (dev->proximity_domain > proximity_domain) + dev->proximity_domain--; + + list_for_each_entry_safe(iolink, tmp, &dev->io_link_props, list) { + /* + * If there is an io link to the dev being deleted + * then remove that IO link also. + */ + if (iolink->node_to == proximity_domain) { + list_del(&iolink->list); + dev->io_link_count--; + dev->node_props.io_links_count--; + } else if (iolink->node_from > proximity_domain) { + iolink->node_from--; + } else if (iolink->node_to > proximity_domain) { + iolink->node_to--; + } + } + + } +} + int kfd_topology_remove_device(struct kfd_dev *gpu) { struct kfd_topology_device *dev, *tmp; uint32_t gpu_id; int res = -ENODEV; + int i = 0; down_write(&topology_lock); - list_for_each_entry_safe(dev, tmp, &topology_device_list, list) + list_for_each_entry_safe(dev, tmp, &topology_device_list, list) { if (dev->gpu == gpu) { gpu_id = dev->gpu_id; kfd_remove_sysfs_node_entry(dev); kfd_release_topology_device(dev); sys_props.num_devices--; + kfd_topology_update_io_links(i); + topology_crat_proximity_domain = sys_props.num_devices-1; + sys_props.generation_count++; res = 0; if (kfd_topology_update_sysfs() < 0) kfd_topology_release_sysfs(); break; } + i++; + } up_write(&topology_lock); diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c index 73423b805b54..f732af52bd64 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c @@ -9251,7 +9251,7 @@ static void amdgpu_dm_commit_planes(struct drm_atomic_state *state, &bundle->flip_addrs[planes_count].address, afb->tmz_surface, false); - DRM_DEBUG_ATOMIC("plane: id=%d dcc_en=%d\n", + drm_dbg_state(state->dev, "plane: id=%d dcc_en=%d\n", new_plane_state->plane->index, bundle->plane_infos[planes_count].dcc.enable); @@ -9285,7 +9285,7 @@ static void amdgpu_dm_commit_planes(struct drm_atomic_state *state, dc_plane, bundle->flip_addrs[planes_count].flip_timestamp_in_us); - DRM_DEBUG_ATOMIC("%s Flipping to hi: 0x%x, low: 0x%x\n", + drm_dbg_state(state->dev, "%s Flipping to hi: 0x%x, low: 0x%x\n", __func__, bundle->flip_addrs[planes_count].address.grph.addr.high_part, bundle->flip_addrs[planes_count].address.grph.addr.low_part); @@ -9627,7 +9627,7 @@ static void amdgpu_dm_atomic_commit_tail(struct drm_atomic_state *state) dm_new_crtc_state = to_dm_crtc_state(new_crtc_state); dm_old_crtc_state = to_dm_crtc_state(old_crtc_state); - DRM_DEBUG_ATOMIC( + drm_dbg_state(state->dev, "amdgpu_crtc id:%d crtc_state_flags: enable:%d, active:%d, " "planes_changed:%d, mode_changed:%d,active_changed:%d," "connectors_changed:%d\n", @@ -10331,7 +10331,7 @@ static int dm_update_crtc_state(struct amdgpu_display_manager *dm, if (!drm_atomic_crtc_needs_modeset(new_crtc_state)) goto skip_modeset; - DRM_DEBUG_ATOMIC( + drm_dbg_state(state->dev, "amdgpu_crtc id:%d crtc_state_flags: enable:%d, active:%d, " "planes_changed:%d, mode_changed:%d,active_changed:%d," "connectors_changed:%d\n", diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_debugfs.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_debugfs.c index da17ece1a2c5..188039f14544 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_debugfs.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_debugfs.c @@ -3491,6 +3491,40 @@ DEFINE_SHOW_ATTRIBUTE(mst_topo); DEFINE_DEBUGFS_ATTRIBUTE(visual_confirm_fops, visual_confirm_get, visual_confirm_set, "%llu\n"); + +/* + * Sets the DC skip_detection_link_training debug option from the given string. + * Example usage: echo 1 > /sys/kernel/debug/dri/0/amdgpu_skip_detection_link_training + */ +static int skip_detection_link_training_set(void *data, u64 val) +{ + struct amdgpu_device *adev = data; + + if (val == 0) + adev->dm.dc->debug.skip_detection_link_training = false; + else + adev->dm.dc->debug.skip_detection_link_training = true; + + return 0; +} + +/* + * Reads the DC skip_detection_link_training debug option value into the given buffer. + * Example usage: cat /sys/kernel/debug/dri/0/amdgpu_dm_skip_detection_link_training + */ +static int skip_detection_link_training_get(void *data, u64 *val) +{ + struct amdgpu_device *adev = data; + + *val = adev->dm.dc->debug.skip_detection_link_training; + + return 0; +} + +DEFINE_DEBUGFS_ATTRIBUTE(skip_detection_link_training_fops, + skip_detection_link_training_get, + skip_detection_link_training_set, "%llu\n"); + /* * Dumps the DCC_EN bit for each pipe. * Example usage: cat /sys/kernel/debug/dri/0/amdgpu_dm_dcc_en @@ -3584,6 +3618,9 @@ void dtn_debugfs_init(struct amdgpu_device *adev) debugfs_create_file_unsafe("amdgpu_dm_visual_confirm", 0644, root, adev, &visual_confirm_fops); + debugfs_create_file_unsafe("amdgpu_dm_skip_detection_link_training", 0644, root, adev, + &skip_detection_link_training_fops); + debugfs_create_file_unsafe("amdgpu_dm_dmub_tracebuffer", 0644, root, adev, &dmub_tracebuffer_fops); diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_helpers.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_helpers.c index f5f39984702f..28cf24f6ab32 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_helpers.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_helpers.c @@ -90,7 +90,7 @@ enum dc_edid_status dm_helpers_parse_edid_caps( { struct amdgpu_dm_connector *aconnector = link->priv; struct drm_connector *connector = &aconnector->base; - struct edid *edid_buf = (struct edid *) edid->raw_edid; + struct edid *edid_buf = edid ? (struct edid *) edid->raw_edid : NULL; struct cea_sad *sads; int sad_count = -1; int sadb_count = -1; diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_psr.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_psr.c index 0c923a90615c..13b1751e69bf 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_psr.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_psr.c @@ -27,6 +27,7 @@ #include "dc.h" #include "dm_helpers.h" #include "amdgpu_dm.h" +#include "modules/power/power_helpers.h" #ifdef CONFIG_DRM_AMD_DC_DCN static bool link_supports_psrsu(struct dc_link *link) @@ -39,6 +40,9 @@ static bool link_supports_psrsu(struct dc_link *link) if (dc->ctx->dce_version < DCN_VERSION_3_1) return false; + if (!is_psr_su_specific_panel(link)) + return false; + if (!link->dpcd_caps.alpm_caps.bits.AUX_WAKE_ALPM_CAP || !link->dpcd_caps.psr_info.psr_dpcd_caps.bits.Y_COORDINATE_REQUIRED) return false; @@ -79,7 +83,10 @@ void amdgpu_dm_set_psr_caps(struct dc_link *link) link->psr_settings.psr_feature_enabled = true; } - DRM_INFO("PSR support:%d\n", link->psr_settings.psr_feature_enabled); + DRM_INFO("PSR support %d, DC PSR ver %d, sink PSR ver %d\n", + link->psr_settings.psr_feature_enabled, + link->psr_settings.psr_version, + link->dpcd_caps.psr_info.psr_version); } diff --git a/drivers/gpu/drm/amd/display/dc/bios/bios_parser_common.c b/drivers/gpu/drm/amd/display/dc/bios/bios_parser_common.c index a8cb039d2572..34e3a64f556e 100644 --- a/drivers/gpu/drm/amd/display/dc/bios/bios_parser_common.c +++ b/drivers/gpu/drm/amd/display/dc/bios/bios_parser_common.c @@ -213,6 +213,9 @@ static enum connector_id connector_id_from_bios_object_id( case CONNECTOR_OBJECT_ID_MXM: id = CONNECTOR_ID_MXM; break; + case CONNECTOR_OBJECT_ID_USBC: + id = CONNECTOR_ID_USBC; + break; default: id = CONNECTOR_ID_UNKNOWN; break; diff --git a/drivers/gpu/drm/amd/display/dc/bios/command_table.c b/drivers/gpu/drm/amd/display/dc/bios/command_table.c index 0e36cd800fc9..32efa92422e8 100644 --- a/drivers/gpu/drm/amd/display/dc/bios/command_table.c +++ b/drivers/gpu/drm/amd/display/dc/bios/command_table.c @@ -522,7 +522,8 @@ static enum bp_result transmitter_control_v2( */ params.acConfig.ucEncoderSel = 1; - if (CONNECTOR_ID_DISPLAY_PORT == connector_id) + if (CONNECTOR_ID_DISPLAY_PORT == connector_id + || CONNECTOR_ID_USBC == connector_id) /* Bit4: DP connector flag * =0 connector is none-DP connector * =1 connector is DP connector diff --git a/drivers/gpu/drm/amd/display/dc/clk_mgr/dce100/dce_clk_mgr.c b/drivers/gpu/drm/amd/display/dc/clk_mgr/dce100/dce_clk_mgr.c index dfba6138f538..26feefbb8990 100644 --- a/drivers/gpu/drm/amd/display/dc/clk_mgr/dce100/dce_clk_mgr.c +++ b/drivers/gpu/drm/amd/display/dc/clk_mgr/dce100/dce_clk_mgr.c @@ -374,7 +374,7 @@ void dce_clock_read_ss_info(struct clk_mgr_internal *clk_mgr_dce) clk_mgr_dce->dprefclk_ss_percentage = info.spread_spectrum_percentage; } - if (clk_mgr_dce->base.ctx->dc->debug.ignore_dpref_ss) + if (clk_mgr_dce->base.ctx->dc->config.ignore_dpref_ss) clk_mgr_dce->dprefclk_ss_percentage = 0; } } diff --git a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn201/dcn201_clk_mgr.c b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn201/dcn201_clk_mgr.c index fbdd0a92d146..451e8d6cd8bd 100644 --- a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn201/dcn201_clk_mgr.c +++ b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn201/dcn201_clk_mgr.c @@ -157,8 +157,7 @@ static void dcn201_update_clocks(struct clk_mgr *clk_mgr_base, } else { if (update_dppclk || update_dispclk) dcn20_update_clocks_update_dentist(clk_mgr, context); - if (new_clocks->dppclk_khz >= dc->current_state->bw_ctx.bw.dcn.clk.dppclk_khz) - dcn20_update_clocks_update_dpp_dto(clk_mgr, context, safe_to_lower); + dcn20_update_clocks_update_dpp_dto(clk_mgr, context, safe_to_lower); } } } diff --git a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn21/rn_clk_mgr.c b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn21/rn_clk_mgr.c index f4dee0e48a67..02943ca65807 100644 --- a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn21/rn_clk_mgr.c +++ b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn21/rn_clk_mgr.c @@ -88,11 +88,22 @@ static int rn_get_active_display_cnt_wa(struct dc *dc, struct dc_state *context) static void rn_set_low_power_state(struct clk_mgr *clk_mgr_base) { + int display_count; struct clk_mgr_internal *clk_mgr = TO_CLK_MGR_INTERNAL(clk_mgr_base); + struct dc *dc = clk_mgr_base->ctx->dc; + struct dc_state *context = dc->current_state; + + if (clk_mgr_base->clks.pwr_state != DCN_PWR_STATE_LOW_POWER) { + + display_count = rn_get_active_display_cnt_wa(dc, context); - rn_vbios_smu_set_dcn_low_power_state(clk_mgr, DCN_PWR_STATE_LOW_POWER); - /* update power state */ - clk_mgr_base->clks.pwr_state = DCN_PWR_STATE_LOW_POWER; + /* if we can go lower, go lower */ + if (display_count == 0) { + rn_vbios_smu_set_dcn_low_power_state(clk_mgr, DCN_PWR_STATE_LOW_POWER); + /* update power state */ + clk_mgr_base->clks.pwr_state = DCN_PWR_STATE_LOW_POWER; + } + } } static void rn_update_clocks_update_dpp_dto(struct clk_mgr_internal *clk_mgr, diff --git a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn21/rn_clk_mgr_vbios_smu.c b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn21/rn_clk_mgr_vbios_smu.c index 8161a6ae410d..30c6f9cd717f 100644 --- a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn21/rn_clk_mgr_vbios_smu.c +++ b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn21/rn_clk_mgr_vbios_smu.c @@ -94,6 +94,9 @@ static int rn_vbios_smu_send_msg_with_param(struct clk_mgr_internal *clk_mgr, { uint32_t result; + result = rn_smu_wait_for_response(clk_mgr, 10, 200000); + ASSERT(result == VBIOSSMC_Result_OK); + /* First clear response register */ REG_WRITE(MP1_SMN_C2PMSG_91, VBIOSSMC_Status_BUSY); diff --git a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn301/vg_clk_mgr.c b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn301/vg_clk_mgr.c index bc4ddc36fe58..f310b0d25a07 100644 --- a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn301/vg_clk_mgr.c +++ b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn301/vg_clk_mgr.c @@ -176,8 +176,7 @@ static void vg_update_clocks(struct clk_mgr *clk_mgr_base, if (update_dppclk || update_dispclk) dcn301_smu_set_dppclk(clk_mgr, clk_mgr_base->clks.dppclk_khz); // always update dtos unless clock is lowered and not safe to lower - if (new_clocks->dppclk_khz >= dc->current_state->bw_ctx.bw.dcn.clk.dppclk_khz) - dcn20_update_clocks_update_dpp_dto(clk_mgr, context, safe_to_lower); + dcn20_update_clocks_update_dpp_dto(clk_mgr, context, safe_to_lower); } } diff --git a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn31/dcn31_clk_mgr.c b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn31/dcn31_clk_mgr.c index 59fdd7f0d609..969b40250434 100644 --- a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn31/dcn31_clk_mgr.c +++ b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn31/dcn31_clk_mgr.c @@ -615,13 +615,37 @@ static void dcn31_clk_mgr_helper_populate_bw_params(struct clk_mgr_internal *clk } } +void dcn31_set_low_power_state(struct clk_mgr *clk_mgr_base) +{ + int display_count; + struct clk_mgr_internal *clk_mgr = TO_CLK_MGR_INTERNAL(clk_mgr_base); + struct dc *dc = clk_mgr_base->ctx->dc; + struct dc_state *context = dc->current_state; + + if (clk_mgr_base->clks.pwr_state != DCN_PWR_STATE_LOW_POWER) { + display_count = dcn31_get_active_display_cnt_wa(dc, context); + /* if we can go lower, go lower */ + if (display_count == 0) { + union display_idle_optimization_u idle_info = { 0 }; + + idle_info.idle_info.df_request_disabled = 1; + idle_info.idle_info.phy_ref_clk_off = 1; + idle_info.idle_info.s0i2_rdy = 1; + dcn31_smu_set_display_idle_optimization(clk_mgr, idle_info.data); + /* update power state */ + clk_mgr_base->clks.pwr_state = DCN_PWR_STATE_LOW_POWER; + } + } +} + static struct clk_mgr_funcs dcn31_funcs = { .get_dp_ref_clk_frequency = dce12_get_dp_ref_freq_khz, .update_clocks = dcn31_update_clocks, .init_clocks = dcn31_init_clocks, .enable_pme_wa = dcn31_enable_pme_wa, .are_clock_states_equal = dcn31_are_clock_states_equal, - .notify_wm_ranges = dcn31_notify_wm_ranges + .notify_wm_ranges = dcn31_notify_wm_ranges, + .set_low_power_state = dcn31_set_low_power_state }; extern struct clk_mgr_funcs dcn3_fpga_funcs; diff --git a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn316/dcn316_clk_mgr.c b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn316/dcn316_clk_mgr.c index 702d00ce7da4..3121dd2d2a91 100644 --- a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn316/dcn316_clk_mgr.c +++ b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn316/dcn316_clk_mgr.c @@ -686,8 +686,8 @@ void dcn316_clk_mgr_construct( clk_mgr->base.base.dprefclk_khz = dcn316_smu_get_dpref_clk(&clk_mgr->base); clk_mgr->base.dccg->ref_dtbclk_khz = clk_mgr->base.base.dprefclk_khz; dce_clock_read_ss_info(&clk_mgr->base); - clk_mgr->base.dccg->ref_dtbclk_khz = - dce_adjust_dp_ref_freq_for_ss(&clk_mgr->base, clk_mgr->base.base.dprefclk_khz); + /*clk_mgr->base.dccg->ref_dtbclk_khz = + dce_adjust_dp_ref_freq_for_ss(&clk_mgr->base, clk_mgr->base.base.dprefclk_khz);*/ clk_mgr->base.base.bw_params = &dcn316_bw_params; diff --git a/drivers/gpu/drm/amd/display/dc/core/dc.c b/drivers/gpu/drm/amd/display/dc/core/dc.c index c436db416708..c2fcd67bcc4d 100644 --- a/drivers/gpu/drm/amd/display/dc/core/dc.c +++ b/drivers/gpu/drm/amd/display/dc/core/dc.c @@ -1569,11 +1569,24 @@ bool dc_validate_boot_timing(const struct dc *dc, if (dc_is_dp_signal(link->connector_signal)) { unsigned int pix_clk_100hz; + uint32_t numOdmPipes = 1; + uint32_t id_src[4] = {0}; dc->res_pool->dp_clock_source->funcs->get_pixel_clk_frequency_100hz( dc->res_pool->dp_clock_source, tg_inst, &pix_clk_100hz); + if (tg->funcs->get_optc_source) + tg->funcs->get_optc_source(tg, + &numOdmPipes, &id_src[0], &id_src[1]); + + if (numOdmPipes == 2) + pix_clk_100hz *= 2; + if (numOdmPipes == 4) + pix_clk_100hz *= 4; + + // Note: In rare cases, HW pixclk may differ from crtc's pixclk + // slightly due to rounding issues in 10 kHz units. if (crtc_timing->pix_clk_100hz != pix_clk_100hz) return false; diff --git a/drivers/gpu/drm/amd/display/dc/core/dc_link.c b/drivers/gpu/drm/amd/display/dc/core/dc_link.c index bbaa5abdf888..faab1460d0b5 100644 --- a/drivers/gpu/drm/amd/display/dc/core/dc_link.c +++ b/drivers/gpu/drm/amd/display/dc/core/dc_link.c @@ -345,6 +345,7 @@ static enum signal_type get_basic_signal_type(struct graphics_object_id encoder, case CONNECTOR_ID_LVDS: return SIGNAL_TYPE_LVDS; case CONNECTOR_ID_DISPLAY_PORT: + case CONNECTOR_ID_USBC: return SIGNAL_TYPE_DISPLAY_PORT; case CONNECTOR_ID_EDP: return SIGNAL_TYPE_EDP; @@ -380,7 +381,8 @@ bool dc_link_is_dp_sink_present(struct dc_link *link) bool present = ((connector_id == CONNECTOR_ID_DISPLAY_PORT) || - (connector_id == CONNECTOR_ID_EDP)); + (connector_id == CONNECTOR_ID_EDP) || + (connector_id == CONNECTOR_ID_USBC)); ddc = dal_ddc_service_get_ddc_pin(link->ddc); @@ -476,7 +478,8 @@ static enum signal_type link_detect_sink(struct dc_link *link, result = SIGNAL_TYPE_DVI_SINGLE_LINK; } break; - case CONNECTOR_ID_DISPLAY_PORT: { + case CONNECTOR_ID_DISPLAY_PORT: + case CONNECTOR_ID_USBC: { /* DP HPD short pulse. Passive DP dongle will not * have short pulse */ @@ -1591,6 +1594,7 @@ static bool dc_link_construct_legacy(struct dc_link *link, link->connector_signal = SIGNAL_TYPE_DVI_DUAL_LINK; break; case CONNECTOR_ID_DISPLAY_PORT: + case CONNECTOR_ID_USBC: link->connector_signal = SIGNAL_TYPE_DISPLAY_PORT; if (link->hpd_gpio) @@ -3075,6 +3079,11 @@ bool dc_link_set_psr_allow_active(struct dc_link *link, const bool *allow_active if (!dc_get_edp_link_panel_inst(dc, link, &panel_inst)) return false; + if (allow_active && link->type == dc_connection_none) { + // Don't enter PSR if panel is not connected + return false; + } + /* Set power optimization flag */ if (power_opts && link->psr_settings.psr_power_opt != *power_opts) { link->psr_settings.psr_power_opt = *power_opts; @@ -3083,6 +3092,10 @@ bool dc_link_set_psr_allow_active(struct dc_link *link, const bool *allow_active psr->funcs->psr_set_power_opt(psr, link->psr_settings.psr_power_opt, panel_inst); } + if (psr != NULL && link->psr_settings.psr_feature_enabled && + force_static && psr->funcs->psr_force_static) + psr->funcs->psr_force_static(psr, panel_inst); + /* Enable or Disable PSR */ if (allow_active && link->psr_settings.psr_allow_active != *allow_active) { link->psr_settings.psr_allow_active = *allow_active; @@ -3093,8 +3106,6 @@ bool dc_link_set_psr_allow_active(struct dc_link *link, const bool *allow_active #endif if (psr != NULL && link->psr_settings.psr_feature_enabled) { - if (force_static && psr->funcs->psr_force_static) - psr->funcs->psr_force_static(psr, panel_inst); psr->funcs->psr_enable(psr, link->psr_settings.psr_allow_active, wait, panel_inst); } else if ((dmcu != NULL && dmcu->funcs->is_dmcu_initialized(dmcu)) && link->psr_settings.psr_feature_enabled) @@ -3476,8 +3487,6 @@ static enum dc_status dc_link_update_sst_payload(struct pipe_ctx *pipe_ctx, { struct dc_stream_state *stream = pipe_ctx->stream; struct dc_link *link = stream->link; - struct hpo_dp_link_encoder *hpo_dp_link_encoder = pipe_ctx->link_res.hpo_dp_link_enc; - struct hpo_dp_stream_encoder *hpo_dp_stream_encoder = pipe_ctx->stream_res.hpo_dp_stream_enc; struct link_mst_stream_allocation_table proposed_table = {0}; struct fixed31_32 avg_time_slots_per_mtp; const struct dc_link_settings empty_link_settings = {0}; @@ -3511,7 +3520,7 @@ static enum dc_status dc_link_update_sst_payload(struct pipe_ctx *pipe_ctx, pipe_ctx->pipe_idx); } - proposed_table.stream_allocations[0].hpo_dp_stream_enc = hpo_dp_stream_encoder; + proposed_table.stream_allocations[0].hpo_dp_stream_enc = pipe_ctx->stream_res.hpo_dp_stream_enc; ASSERT(proposed_table.stream_count == 1); @@ -3524,8 +3533,7 @@ static enum dc_status dc_link_update_sst_payload(struct pipe_ctx *pipe_ctx, proposed_table.stream_allocations[0].slot_count); /* program DP source TX for payload */ - hpo_dp_link_encoder->funcs->update_stream_allocation_table( - hpo_dp_link_encoder, + link_hwss->ext.update_stream_allocation_table(link, &pipe_ctx->link_res, &proposed_table); /* poll for ACT handled */ @@ -3562,8 +3570,6 @@ enum dc_status dc_link_allocate_mst_payload(struct pipe_ctx *pipe_ctx) { struct dc_stream_state *stream = pipe_ctx->stream; struct dc_link *link = stream->link; - struct link_encoder *link_encoder = NULL; - struct hpo_dp_link_encoder *hpo_dp_link_encoder = pipe_ctx->link_res.hpo_dp_link_enc; struct dp_mst_stream_allocation_table proposed_table = {0}; struct fixed31_32 avg_time_slots_per_mtp; struct fixed31_32 pbn; @@ -3573,9 +3579,6 @@ enum dc_status dc_link_allocate_mst_payload(struct pipe_ctx *pipe_ctx) const struct link_hwss *link_hwss = get_link_hwss(link, &pipe_ctx->link_res); DC_LOGGER_INIT(link->ctx->logger); - link_encoder = link_enc_cfg_get_link_enc(link); - ASSERT(link_encoder); - /* enable_link_dp_mst already check link->enabled_stream_count * and stream is in link->stream[]. This is called during set mode, * stream_enc is available. @@ -3620,37 +3623,17 @@ enum dc_status dc_link_allocate_mst_payload(struct pipe_ctx *pipe_ctx) ASSERT(proposed_table.stream_count > 0); - if (link->ep_type == DISPLAY_ENDPOINT_USB4_DPIA) { - static enum dc_status status; - uint8_t mst_alloc_slots = 0, prev_mst_slots_in_use = 0xFF; - - for (i = 0; i < link->mst_stream_alloc_table.stream_count; i++) - mst_alloc_slots += link->mst_stream_alloc_table.stream_allocations[i].slot_count; - - status = dc_process_dmub_set_mst_slots(link->dc, link->link_index, - mst_alloc_slots, &prev_mst_slots_in_use); - ASSERT(status == DC_OK); - DC_LOG_MST("dpia : status[%d]: alloc_slots[%d]: used_slots[%d]\n", - status, mst_alloc_slots, prev_mst_slots_in_use); - } - /* program DP source TX for payload */ - switch (dp_get_link_encoding_format(&link->cur_link_settings)) { - case DP_8b_10b_ENCODING: - link_encoder->funcs->update_mst_stream_allocation_table( - link_encoder, - &link->mst_stream_alloc_table); - break; - case DP_128b_132b_ENCODING: - hpo_dp_link_encoder->funcs->update_stream_allocation_table( - hpo_dp_link_encoder, - &link->mst_stream_alloc_table); - break; - case DP_UNKNOWN_ENCODING: + if (link_hwss->ext.update_stream_allocation_table == NULL || + dp_get_link_encoding_format(&link->cur_link_settings) == DP_UNKNOWN_ENCODING) { DC_LOG_ERROR("Failure: unknown encoding format\n"); return DC_ERROR_UNEXPECTED; } + link_hwss->ext.update_stream_allocation_table(link, + &pipe_ctx->link_res, + &link->mst_stream_alloc_table); + /* send down message */ ret = dm_helpers_dp_mst_poll_for_allocation_change_trigger( stream->ctx, @@ -3692,7 +3675,6 @@ enum dc_status dc_link_reduce_mst_payload(struct pipe_ctx *pipe_ctx, uint32_t bw struct fixed31_32 avg_time_slots_per_mtp; struct fixed31_32 pbn; struct fixed31_32 pbn_per_slot; - struct link_encoder *link_encoder = link->link_enc; struct dp_mst_stream_allocation_table proposed_table = {0}; uint8_t i; enum act_return_status ret; @@ -3756,8 +3738,13 @@ enum dc_status dc_link_reduce_mst_payload(struct pipe_ctx *pipe_ctx, uint32_t bw ASSERT(proposed_table.stream_count > 0); /* update mst stream allocation table hardware state */ - link_encoder->funcs->update_mst_stream_allocation_table( - link_encoder, + if (link_hwss->ext.update_stream_allocation_table == NULL || + dp_get_link_encoding_format(&link->cur_link_settings) == DP_UNKNOWN_ENCODING) { + DC_LOG_ERROR("Failure: unknown encoding format\n"); + return DC_ERROR_UNEXPECTED; + } + + link_hwss->ext.update_stream_allocation_table(link, &pipe_ctx->link_res, &link->mst_stream_alloc_table); /* poll for immediate branch device ACT handled */ @@ -3852,8 +3839,6 @@ static enum dc_status deallocate_mst_payload(struct pipe_ctx *pipe_ctx) { struct dc_stream_state *stream = pipe_ctx->stream; struct dc_link *link = stream->link; - struct link_encoder *link_encoder = NULL; - struct hpo_dp_link_encoder *hpo_dp_link_encoder = pipe_ctx->link_res.hpo_dp_link_enc; struct dp_mst_stream_allocation_table proposed_table = {0}; struct fixed31_32 avg_time_slots_per_mtp = dc_fixpt_from_int(0); int i; @@ -3862,9 +3847,6 @@ static enum dc_status deallocate_mst_payload(struct pipe_ctx *pipe_ctx) const struct dc_link_settings empty_link_settings = {0}; DC_LOGGER_INIT(link->ctx->logger); - link_encoder = link_enc_cfg_get_link_enc(link); - ASSERT(link_encoder); - /* deallocate_mst_payload is called before disable link. When mode or * disable/enable monitor, new stream is created which is not in link * stream[] yet. For this, payload is not allocated yet, so de-alloc @@ -3922,36 +3904,16 @@ static enum dc_status deallocate_mst_payload(struct pipe_ctx *pipe_ctx) link->mst_stream_alloc_table.stream_allocations[i].slot_count); } - if (link->ep_type == DISPLAY_ENDPOINT_USB4_DPIA) { - enum dc_status status; - uint8_t mst_alloc_slots = 0, prev_mst_slots_in_use = 0xFF; - - for (i = 0; i < link->mst_stream_alloc_table.stream_count; i++) - mst_alloc_slots += link->mst_stream_alloc_table.stream_allocations[i].slot_count; - - status = dc_process_dmub_set_mst_slots(link->dc, link->link_index, - mst_alloc_slots, &prev_mst_slots_in_use); - ASSERT(status != DC_NOT_SUPPORTED); - DC_LOG_MST("dpia : status[%d]: alloc_slots[%d]: used_slots[%d]\n", - status, mst_alloc_slots, prev_mst_slots_in_use); - } - - switch (dp_get_link_encoding_format(&link->cur_link_settings)) { - case DP_8b_10b_ENCODING: - link_encoder->funcs->update_mst_stream_allocation_table( - link_encoder, - &link->mst_stream_alloc_table); - break; - case DP_128b_132b_ENCODING: - hpo_dp_link_encoder->funcs->update_stream_allocation_table( - hpo_dp_link_encoder, - &link->mst_stream_alloc_table); - break; - case DP_UNKNOWN_ENCODING: + /* update mst stream allocation table hardware state */ + if (link_hwss->ext.update_stream_allocation_table == NULL || + dp_get_link_encoding_format(&link->cur_link_settings) == DP_UNKNOWN_ENCODING) { DC_LOG_DEBUG("Unknown encoding format\n"); return DC_ERROR_UNEXPECTED; } + link_hwss->ext.update_stream_allocation_table(link, &pipe_ctx->link_res, + &link->mst_stream_alloc_table); + if (mst_mode) { dm_helpers_dp_mst_poll_for_allocation_change_trigger( stream->ctx, @@ -4098,8 +4060,8 @@ static void fpga_dp_hpo_enable_link_and_stream(struct dc_state *state, struct pi proposed_table.stream_allocations[0].hpo_dp_stream_enc = pipe_ctx->stream_res.hpo_dp_stream_enc; } - pipe_ctx->link_res.hpo_dp_link_enc->funcs->update_stream_allocation_table( - pipe_ctx->link_res.hpo_dp_link_enc, + link_hwss->ext.update_stream_allocation_table(stream->link, + &pipe_ctx->link_res, &proposed_table); if (link_hwss->ext.set_throttled_vcp_size) @@ -4119,6 +4081,7 @@ void core_link_enable_stream( struct link_encoder *link_enc; enum otg_out_mux_dest otg_out_dest = OUT_MUX_DIO; struct vpg *vpg = pipe_ctx->stream_res.stream_enc->vpg; + const struct link_hwss *link_hwss = get_link_hwss(link, &pipe_ctx->link_res); if (is_dp_128b_132b_signal(pipe_ctx)) vpg = pipe_ctx->stream_res.hpo_dp_stream_enc->vpg; @@ -4147,56 +4110,19 @@ void core_link_enable_stream( link_enc->funcs->setup( link_enc, pipe_ctx->stream->signal); - pipe_ctx->stream_res.stream_enc->funcs->setup_stereo_sync( - pipe_ctx->stream_res.stream_enc, - pipe_ctx->stream_res.tg->inst, - stream->timing.timing_3d_format != TIMING_3D_FORMAT_NONE); - } - - if (is_dp_128b_132b_signal(pipe_ctx)) { - pipe_ctx->stream_res.hpo_dp_stream_enc->funcs->set_stream_attribute( - pipe_ctx->stream_res.hpo_dp_stream_enc, - &stream->timing, - stream->output_color_space, - stream->use_vsc_sdp_for_colorimetry, - stream->timing.flags.DSC, - false); - otg_out_dest = OUT_MUX_HPO_DP; - } else if (dc_is_dp_signal(pipe_ctx->stream->signal)) { - pipe_ctx->stream_res.stream_enc->funcs->dp_set_stream_attribute( - pipe_ctx->stream_res.stream_enc, - &stream->timing, - stream->output_color_space, - stream->use_vsc_sdp_for_colorimetry, - stream->link->dpcd_caps.dprx_feature.bits.SST_SPLIT_SDP_CAP); } - if (dc_is_dp_signal(pipe_ctx->stream->signal)) - dp_source_sequence_trace(link, DPCD_SOURCE_SEQ_AFTER_DP_STREAM_ATTR); - - if (dc_is_hdmi_tmds_signal(pipe_ctx->stream->signal)) - pipe_ctx->stream_res.stream_enc->funcs->hdmi_set_stream_attribute( - pipe_ctx->stream_res.stream_enc, - &stream->timing, - stream->phy_pix_clk, - pipe_ctx->stream_res.audio != NULL); - pipe_ctx->stream->link->link_state_valid = true; - if (pipe_ctx->stream_res.tg->funcs->set_out_mux) + if (pipe_ctx->stream_res.tg->funcs->set_out_mux) { + if (is_dp_128b_132b_signal(pipe_ctx)) + otg_out_dest = OUT_MUX_HPO_DP; + else + otg_out_dest = OUT_MUX_DIO; pipe_ctx->stream_res.tg->funcs->set_out_mux(pipe_ctx->stream_res.tg, otg_out_dest); + } - if (dc_is_dvi_signal(pipe_ctx->stream->signal)) - pipe_ctx->stream_res.stream_enc->funcs->dvi_set_stream_attribute( - pipe_ctx->stream_res.stream_enc, - &stream->timing, - (pipe_ctx->stream->signal == SIGNAL_TYPE_DVI_DUAL_LINK) ? - true : false); - - if (dc_is_lvds_signal(pipe_ctx->stream->signal)) - pipe_ctx->stream_res.stream_enc->funcs->lvds_set_stream_attribute( - pipe_ctx->stream_res.stream_enc, - &stream->timing); + link_hwss->setup_stream_attribute(pipe_ctx); if (!IS_FPGA_MAXIMUS_DC(dc->ctx->dce_environment)) { bool apply_edp_fast_boot_optimization = @@ -4331,13 +4257,11 @@ void core_link_enable_stream( dc->hwss.enable_audio_stream(pipe_ctx); } else { // if (IS_FPGA_MAXIMUS_DC(dc->ctx->dce_environment)) - if (is_dp_128b_132b_signal(pipe_ctx)) { + if (is_dp_128b_132b_signal(pipe_ctx)) fpga_dp_hpo_enable_link_and_stream(state, pipe_ctx); - } if (dc_is_dp_signal(pipe_ctx->stream->signal) || dc_is_virtual_signal(pipe_ctx->stream->signal)) dp_set_dsc_enable(pipe_ctx, true); - } if (pipe_ctx->stream->signal == SIGNAL_TYPE_HDMI_TYPE_A) { @@ -4683,22 +4607,22 @@ bool dc_link_is_fec_supported(const struct dc_link *link) bool dc_link_should_enable_fec(const struct dc_link *link) { - bool is_fec_disable = false; - bool ret = false; + bool force_disable = false; - if ((link->connector_signal != SIGNAL_TYPE_DISPLAY_PORT_MST && + if (link->fec_state == dc_link_fec_enabled) + force_disable = false; + else if (link->connector_signal != SIGNAL_TYPE_DISPLAY_PORT_MST && link->local_sink && - link->local_sink->edid_caps.panel_patch.disable_fec) || - (link->connector_signal == SIGNAL_TYPE_EDP - // enable FEC for EDP if DSC is supported - && link->dpcd_caps.dsc_caps.dsc_basic_caps.fields.dsc_support.DSC_SUPPORT == false - )) - is_fec_disable = true; - - if (dc_link_is_fec_supported(link) && !link->dc->debug.disable_fec && !is_fec_disable) - ret = true; - - return ret; + link->local_sink->edid_caps.panel_patch.disable_fec) + force_disable = true; + else if (link->connector_signal == SIGNAL_TYPE_EDP + && (link->dpcd_caps.dsc_caps.dsc_basic_caps.fields. + dsc_support.DSC_SUPPORT == false + || link->dc->debug.disable_dsc_edp + || !link->dc->caps.edp_dsc_support)) + force_disable = true; + + return !force_disable && dc_link_is_fec_supported(link); } uint32_t dc_bandwidth_in_kbps_from_timing( diff --git a/drivers/gpu/drm/amd/display/dc/core/dc_link_dp.c b/drivers/gpu/drm/amd/display/dc/core/dc_link_dp.c index 22dabe596dfc..c5f5d25035d2 100644 --- a/drivers/gpu/drm/amd/display/dc/core/dc_link_dp.c +++ b/drivers/gpu/drm/amd/display/dc/core/dc_link_dp.c @@ -4085,9 +4085,32 @@ static bool handle_hpd_irq_psr_sink(struct dc_link *link) return false; } +static enum dc_link_rate get_link_rate_from_test_link_rate(uint8_t test_rate) +{ + switch (test_rate) { + case DP_TEST_LINK_RATE_RBR: + return LINK_RATE_LOW; + case DP_TEST_LINK_RATE_HBR: + return LINK_RATE_HIGH; + case DP_TEST_LINK_RATE_HBR2: + return LINK_RATE_HIGH2; + case DP_TEST_LINK_RATE_HBR3: + return LINK_RATE_HIGH3; + case DP_TEST_LINK_RATE_UHBR10: + return LINK_RATE_UHBR10; + case DP_TEST_LINK_RATE_UHBR20: + return LINK_RATE_UHBR20; + case DP_TEST_LINK_RATE_UHBR13_5: + return LINK_RATE_UHBR13_5; + default: + return LINK_RATE_UNKNOWN; + } +} + static void dp_test_send_link_training(struct dc_link *link) { struct dc_link_settings link_settings = {0}; + uint8_t test_rate = 0; core_link_read_dpcd( link, @@ -4097,8 +4120,9 @@ static void dp_test_send_link_training(struct dc_link *link) core_link_read_dpcd( link, DP_TEST_LINK_RATE, - (unsigned char *)(&link_settings.link_rate), + &test_rate, 1); + link_settings.link_rate = get_link_rate_from_test_link_rate(test_rate); /* Set preferred link settings */ link->verified_link_cap.lane_count = link_settings.lane_count; diff --git a/drivers/gpu/drm/amd/display/dc/core/dc_resource.c b/drivers/gpu/drm/amd/display/dc/core/dc_resource.c index d251c3f3a714..f292303b75a5 100644 --- a/drivers/gpu/drm/amd/display/dc/core/dc_resource.c +++ b/drivers/gpu/drm/amd/display/dc/core/dc_resource.c @@ -1076,6 +1076,15 @@ bool resource_build_scaling_params(struct pipe_ctx *pipe_ctx) bool res = false; DC_LOGGER_INIT(pipe_ctx->stream->ctx->logger); + /* Invalid input */ + if (!plane_state->dst_rect.width || + !plane_state->dst_rect.height || + !plane_state->src_rect.width || + !plane_state->src_rect.height) { + ASSERT(0); + return false; + } + pipe_ctx->plane_res.scl_data.format = convert_pixel_format_to_dalsurface( pipe_ctx->plane_state->format); @@ -2111,6 +2120,8 @@ static int acquire_resource_from_hw_enabled_state( { struct dc_link *link = stream->link; unsigned int i, inst, tg_inst = 0; + uint32_t numPipes = 1; + uint32_t id_src[4] = {0}; /* Check for enabled DIG to identify enabled display */ if (!link->link_enc->funcs->is_dig_enabled(link->link_enc)) @@ -2139,38 +2150,62 @@ static int acquire_resource_from_hw_enabled_state( if (!res_ctx->pipe_ctx[tg_inst].stream) { struct pipe_ctx *pipe_ctx = &res_ctx->pipe_ctx[tg_inst]; - pipe_ctx->stream_res.tg = pool->timing_generators[tg_inst]; - pipe_ctx->plane_res.mi = pool->mis[tg_inst]; - pipe_ctx->plane_res.hubp = pool->hubps[tg_inst]; - pipe_ctx->plane_res.ipp = pool->ipps[tg_inst]; - pipe_ctx->plane_res.xfm = pool->transforms[tg_inst]; - pipe_ctx->plane_res.dpp = pool->dpps[tg_inst]; - pipe_ctx->stream_res.opp = pool->opps[tg_inst]; - - if (pool->dpps[tg_inst]) { - pipe_ctx->plane_res.mpcc_inst = pool->dpps[tg_inst]->inst; - - // Read DPP->MPCC->OPP Pipe from HW State - if (pool->mpc->funcs->read_mpcc_state) { - struct mpcc_state s = {0}; - - pool->mpc->funcs->read_mpcc_state(pool->mpc, pipe_ctx->plane_res.mpcc_inst, &s); - - if (s.dpp_id < MAX_MPCC) - pool->mpc->mpcc_array[pipe_ctx->plane_res.mpcc_inst].dpp_id = s.dpp_id; - - if (s.bot_mpcc_id < MAX_MPCC) - pool->mpc->mpcc_array[pipe_ctx->plane_res.mpcc_inst].mpcc_bot = - &pool->mpc->mpcc_array[s.bot_mpcc_id]; + id_src[0] = tg_inst; + + if (pipe_ctx->stream_res.tg->funcs->get_optc_source) + pipe_ctx->stream_res.tg->funcs->get_optc_source(pipe_ctx->stream_res.tg, + &numPipes, &id_src[0], &id_src[1]); + + for (i = 0; i < numPipes; i++) { + //Check if src id invalid + if (id_src[i] == 0xf) + return -1; + + pipe_ctx->stream_res.tg = pool->timing_generators[tg_inst]; + pipe_ctx->plane_res.mi = pool->mis[id_src[i]]; + pipe_ctx->plane_res.hubp = pool->hubps[id_src[i]]; + pipe_ctx->plane_res.ipp = pool->ipps[id_src[i]]; + pipe_ctx->plane_res.xfm = pool->transforms[id_src[i]]; + pipe_ctx->plane_res.dpp = pool->dpps[id_src[i]]; + pipe_ctx->stream_res.opp = pool->opps[id_src[i]]; + + if (pool->dpps[id_src[i]]) { + pipe_ctx->plane_res.mpcc_inst = pool->dpps[id_src[i]]->inst; + + if (pool->mpc->funcs->read_mpcc_state) { + struct mpcc_state s = {0}; + pool->mpc->funcs->read_mpcc_state(pool->mpc, pipe_ctx->plane_res.mpcc_inst, &s); + if (s.dpp_id < MAX_MPCC) + pool->mpc->mpcc_array[pipe_ctx->plane_res.mpcc_inst].dpp_id = + s.dpp_id; + if (s.bot_mpcc_id < MAX_MPCC) + pool->mpc->mpcc_array[pipe_ctx->plane_res.mpcc_inst].mpcc_bot = + &pool->mpc->mpcc_array[s.bot_mpcc_id]; + if (s.opp_id < MAX_OPP) + pipe_ctx->stream_res.opp->mpc_tree_params.opp_id = s.opp_id; + } + } + pipe_ctx->pipe_idx = id_src[i]; - if (s.opp_id < MAX_OPP) - pipe_ctx->stream_res.opp->mpc_tree_params.opp_id = s.opp_id; + if (id_src[i] >= pool->timing_generator_count) { + id_src[i] = pool->timing_generator_count - 1; + pipe_ctx->stream_res.tg = pool->timing_generators[id_src[i]]; + pipe_ctx->stream_res.opp = pool->opps[id_src[i]]; } + + pipe_ctx->stream = stream; } - pipe_ctx->pipe_idx = tg_inst; - pipe_ctx->stream = stream; - return tg_inst; + if (numPipes == 2) { + stream->apply_boot_odm_mode = dm_odm_combine_policy_2to1; + res_ctx->pipe_ctx[id_src[0]].next_odm_pipe = &res_ctx->pipe_ctx[id_src[1]]; + res_ctx->pipe_ctx[id_src[0]].prev_odm_pipe = NULL; + res_ctx->pipe_ctx[id_src[1]].next_odm_pipe = NULL; + res_ctx->pipe_ctx[id_src[1]].prev_odm_pipe = &res_ctx->pipe_ctx[id_src[0]]; + } else + stream->apply_boot_odm_mode = dm_odm_combine_mode_disabled; + + return id_src[0]; } return -1; diff --git a/drivers/gpu/drm/amd/display/dc/dc.h b/drivers/gpu/drm/amd/display/dc/dc.h index 77ef9d1f9ea8..2f0c436dae4c 100644 --- a/drivers/gpu/drm/amd/display/dc/dc.h +++ b/drivers/gpu/drm/amd/display/dc/dc.h @@ -47,7 +47,7 @@ struct aux_payload; struct set_config_cmd_payload; struct dmub_notification; -#define DC_VER "3.2.177" +#define DC_VER "3.2.181" #define MAX_SURFACES 3 #define MAX_PLANES 6 @@ -340,6 +340,7 @@ struct dc_config { bool is_asymmetric_memory; bool is_single_rank_dimm; bool use_pipe_ctx_sync_logic; + bool ignore_dpref_ss; }; enum visual_confirm { @@ -416,6 +417,7 @@ struct dc_clocks { #if defined(CONFIG_DRM_AMD_DC_DCN) enum dcn_zstate_support_state zstate_support; bool dtbclk_en; + int dtbclk_khz; #endif enum dcn_pwr_state pwr_state; /* @@ -665,6 +667,7 @@ struct dc_debug_options { uint32_t edid_read_retry_times; bool remove_disconnect_edp; unsigned int force_odm_combine; //bit vector based on otg inst + unsigned int seamless_boot_odm_combine; #if defined(CONFIG_DRM_AMD_DC_DCN) unsigned int force_odm_combine_4to1; //bit vector based on otg inst bool disable_z9_mpc; @@ -729,7 +732,6 @@ struct dc_debug_options { bool apply_vendor_specific_lttpr_wa; bool extended_blank_optimization; union aux_wake_wa_options aux_wake_wa; - bool ignore_dpref_ss; uint8_t psr_power_use_phy_fsm; }; diff --git a/drivers/gpu/drm/amd/display/dc/dc_dp_types.h b/drivers/gpu/drm/amd/display/dc/dc_dp_types.h index 36ac2a8746bd..2c54b6e0498b 100644 --- a/drivers/gpu/drm/amd/display/dc/dc_dp_types.h +++ b/drivers/gpu/drm/amd/display/dc/dc_dp_types.h @@ -133,6 +133,16 @@ enum dp_link_encoding { DP_128b_132b_ENCODING = 2, }; +enum dp_test_link_rate { + DP_TEST_LINK_RATE_RBR = 0x06, + DP_TEST_LINK_RATE_HBR = 0x0A, + DP_TEST_LINK_RATE_HBR2 = 0x14, + DP_TEST_LINK_RATE_HBR3 = 0x1E, + DP_TEST_LINK_RATE_UHBR10 = 0x01, + DP_TEST_LINK_RATE_UHBR20 = 0x02, + DP_TEST_LINK_RATE_UHBR13_5 = 0x03, +}; + struct dc_link_settings { enum dc_lane_count lane_count; enum dc_link_rate link_rate; @@ -620,7 +630,7 @@ union test_request { uint8_t LINK_TEST_PATTRN :1; uint8_t EDID_READ :1; uint8_t PHY_TEST_PATTERN :1; - uint8_t RESERVED :1; + uint8_t PHY_TEST_CHANNEL_CODING_TYPE :2; uint8_t AUDIO_TEST_PATTERN :1; uint8_t TEST_AUDIO_DISABLED_VIDEO :1; } bits; @@ -993,8 +1003,8 @@ union dp_128b_132b_supported_link_rates { union dp_128b_132b_supported_lttpr_link_rates { struct { uint8_t UHBR10 :1; - uint8_t UHBR13_5:1; uint8_t UHBR20 :1; + uint8_t UHBR13_5:1; uint8_t RESERVED:5; } bits; uint8_t raw; diff --git a/drivers/gpu/drm/amd/display/dc/dc_link.h b/drivers/gpu/drm/amd/display/dc/dc_link.h index aa818bf840eb..a3c37ee3f849 100644 --- a/drivers/gpu/drm/amd/display/dc/dc_link.h +++ b/drivers/gpu/drm/amd/display/dc/dc_link.h @@ -67,13 +67,9 @@ struct link_mst_stream_allocation_table { struct link_mst_stream_allocation stream_allocations[MAX_CONTROLLER_NUM]; }; -struct time_stamp { - uint64_t edp_poweroff; - uint64_t edp_poweron; -}; - -struct link_trace { - struct time_stamp time_stamp; +struct edp_trace_power_timestamps { + uint64_t poweroff; + uint64_t poweron; }; struct dp_trace_lt_counts { @@ -96,6 +92,7 @@ struct dp_trace { struct dp_trace_lt commit_lt_trace; unsigned int link_loss_count; bool is_initialized; + struct edp_trace_power_timestamps edp_trace_power_timestamps; }; /* PSR feature flags */ @@ -231,7 +228,6 @@ struct dc_link { struct dc_link_status link_status; struct dprx_states dprx_states; - struct link_trace link_trace; struct gpio *hpd_gpio; enum dc_link_fec_state fec_state; }; diff --git a/drivers/gpu/drm/amd/display/dc/dc_stream.h b/drivers/gpu/drm/amd/display/dc/dc_stream.h index c4168c11257c..580420c3eedc 100644 --- a/drivers/gpu/drm/amd/display/dc/dc_stream.h +++ b/drivers/gpu/drm/amd/display/dc/dc_stream.h @@ -246,6 +246,7 @@ struct dc_stream_state { bool apply_edp_fast_boot_optimization; bool apply_seamless_boot_optimization; + uint32_t apply_boot_odm_mode; uint32_t stream_id; diff --git a/drivers/gpu/drm/amd/display/dc/dce/dce_clock_source.c b/drivers/gpu/drm/amd/display/dc/dce/dce_clock_source.c index cc5128e67daf..760653e2b607 100644 --- a/drivers/gpu/drm/amd/display/dc/dce/dce_clock_source.c +++ b/drivers/gpu/drm/amd/display/dc/dce/dce_clock_source.c @@ -985,7 +985,7 @@ static bool dcn31_program_pix_clk( struct bp_pixel_clock_parameters bp_pc_params = {0}; enum transmitter_color_depth bp_pc_colour_depth = TRANSMITTER_COLOR_DEPTH_24; // For these signal types Driver to program DP_DTO without calling VBIOS Command table - if (dc_is_dp_signal(pix_clk_params->signal_type)) { + if (dc_is_dp_signal(pix_clk_params->signal_type) || dc_is_virtual_signal(pix_clk_params->signal_type)) { if (e) { /* Set DTO values: phase = target clock, modulo = reference clock*/ REG_WRITE(PHASE[inst], e->target_pixel_rate_khz * e->mult_factor); @@ -1254,7 +1254,7 @@ static uint32_t dcn3_get_pix_clk_dividers( struct pixel_clk_params *pix_clk_params, struct pll_settings *pll_settings) { - unsigned long long actual_pix_clk_100Hz = pix_clk_params->requested_pix_clk_100hz; + unsigned long long actual_pix_clk_100Hz = pix_clk_params ? pix_clk_params->requested_pix_clk_100hz : 0; struct dce110_clk_src *clk_src; clk_src = TO_DCE110_CLK_SRC(cs); diff --git a/drivers/gpu/drm/amd/display/dc/dce110/dce110_hw_sequencer.c b/drivers/gpu/drm/amd/display/dc/dce110/dce110_hw_sequencer.c index 248602c15f3a..9fc1ba12ec19 100644 --- a/drivers/gpu/drm/amd/display/dc/dce110/dce110_hw_sequencer.c +++ b/drivers/gpu/drm/amd/display/dc/dce110/dce110_hw_sequencer.c @@ -67,6 +67,7 @@ #include "dcn10/dcn10_hw_sequencer.h" +#include "link/link_dp_trace.h" #include "dce110_hw_sequencer.h" #define GAMMA_HW_POINTS_NUM 256 @@ -819,19 +820,19 @@ void dce110_edp_power_control( div64_u64(dm_get_elapse_time_in_ns( ctx, current_ts, - link->link_trace.time_stamp.edp_poweroff), 1000000); + dp_trace_get_edp_poweroff_timestamp(link)), 1000000); unsigned long long time_since_edp_poweron_ms = div64_u64(dm_get_elapse_time_in_ns( ctx, current_ts, - link->link_trace.time_stamp.edp_poweron), 1000000); + dp_trace_get_edp_poweron_timestamp(link)), 1000000); DC_LOG_HW_RESUME_S3( "%s: transition: power_up=%d current_ts=%llu edp_poweroff=%llu edp_poweron=%llu time_since_edp_poweroff_ms=%llu time_since_edp_poweron_ms=%llu", __func__, power_up, current_ts, - link->link_trace.time_stamp.edp_poweroff, - link->link_trace.time_stamp.edp_poweron, + dp_trace_get_edp_poweroff_timestamp(link), + dp_trace_get_edp_poweron_timestamp(link), time_since_edp_poweroff_ms, time_since_edp_poweron_ms); @@ -846,7 +847,7 @@ void dce110_edp_power_control( link->local_sink->edid_caps.panel_patch.extra_t12_ms; /* Adjust remaining_min_edp_poweroff_time_ms if this is not the first time. */ - if (link->link_trace.time_stamp.edp_poweroff != 0) { + if (dp_trace_get_edp_poweroff_timestamp(link) != 0) { if (time_since_edp_poweroff_ms < remaining_min_edp_poweroff_time_ms) remaining_min_edp_poweroff_time_ms = remaining_min_edp_poweroff_time_ms - time_since_edp_poweroff_ms; @@ -904,17 +905,13 @@ void dce110_edp_power_control( __func__, (power_up ? "On":"Off"), bp_result); - if (!power_up) - /*save driver power off time stamp*/ - link->link_trace.time_stamp.edp_poweroff = dm_get_timestamp(ctx); - else - link->link_trace.time_stamp.edp_poweron = dm_get_timestamp(ctx); + dp_trace_set_edp_power_timestamp(link, power_up); DC_LOG_HW_RESUME_S3( "%s: updated values: edp_poweroff=%llu edp_poweron=%llu\n", __func__, - link->link_trace.time_stamp.edp_poweroff, - link->link_trace.time_stamp.edp_poweron); + dp_trace_get_edp_poweroff_timestamp(link), + dp_trace_get_edp_poweron_timestamp(link)); if (bp_result != BP_RESULT_OK) DC_LOG_ERROR( @@ -942,14 +939,14 @@ void dce110_edp_wait_for_T12( return; if (!link->panel_cntl->funcs->is_panel_powered_on(link->panel_cntl) && - link->link_trace.time_stamp.edp_poweroff != 0) { + dp_trace_get_edp_poweroff_timestamp(link) != 0) { unsigned int t12_duration = 500; // Default T12 as per spec unsigned long long current_ts = dm_get_timestamp(ctx); unsigned long long time_since_edp_poweroff_ms = div64_u64(dm_get_elapse_time_in_ns( ctx, current_ts, - link->link_trace.time_stamp.edp_poweroff), 1000000); + dp_trace_get_edp_poweroff_timestamp(link)), 1000000); t12_duration += link->local_sink->edid_caps.panel_patch.extra_t12_ms; // Add extra T12 diff --git a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_hubp.h b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_hubp.h index 0b17c2993ca5..4ccb03a7564a 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_hubp.h +++ b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_hubp.h @@ -790,5 +790,6 @@ bool hubp1_in_blank(struct hubp *hubp); void hubp1_soft_reset(struct hubp *hubp, bool reset); void hubp1_set_flip_int(struct hubp *hubp); +void hubp1_wait_pipe_read_start(struct hubp *hubp); #endif diff --git a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_hw_sequencer.c b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_hw_sequencer.c index 781334b395ba..e02ac75afbf7 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_hw_sequencer.c +++ b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_hw_sequencer.c @@ -1259,6 +1259,7 @@ void dcn10_init_pipes(struct dc *dc, struct dc_state *context) { int i; struct dce_hwseq *hws = dc->hwseq; + struct hubbub *hubbub = dc->res_pool->hubbub; bool can_apply_seamless_boot = false; for (i = 0; i < context->stream_count; i++) { @@ -1294,6 +1295,21 @@ void dcn10_init_pipes(struct dc *dc, struct dc_state *context) } } + /* Reset det size */ + for (i = 0; i < dc->res_pool->pipe_count; i++) { + struct pipe_ctx *pipe_ctx = &context->res_ctx.pipe_ctx[i]; + struct hubp *hubp = dc->res_pool->hubps[i]; + + /* Do not need to reset for seamless boot */ + if (pipe_ctx->stream != NULL && can_apply_seamless_boot) + continue; + + if (hubbub && hubp) { + if (hubbub->funcs->program_det_size) + hubbub->funcs->program_det_size(hubbub, hubp->inst, 0); + } + } + /* num_opp will be equal to number of mpcc */ for (i = 0; i < dc->res_pool->res_cap->num_opp; i++) { struct pipe_ctx *pipe_ctx = &context->res_ctx.pipe_ctx[i]; @@ -1359,6 +1375,11 @@ void dcn10_init_pipes(struct dc *dc, struct dc_state *context) pipe_ctx->stream_res.tg = NULL; pipe_ctx->plane_res.hubp = NULL; + if (tg->funcs->is_tg_enabled(tg)) { + if (tg->funcs->init_odm) + tg->funcs->init_odm(tg); + } + tg->funcs->tg_init(tg); } @@ -1493,8 +1514,12 @@ void dcn10_init_hw(struct dc *dc) /* Check for enabled DIG to identify enabled display */ if (link->link_enc->funcs->is_dig_enabled && - link->link_enc->funcs->is_dig_enabled(link->link_enc)) + link->link_enc->funcs->is_dig_enabled(link->link_enc)) { link->link_status.link_active = true; + if (link->link_enc->funcs->fec_is_active && + link->link_enc->funcs->fec_is_active(link->link_enc)) + link->fec_state = dc_link_fec_enabled; + } } /* we want to turn off all dp displays before doing detection */ @@ -2522,14 +2547,18 @@ void dcn10_update_mpcc(struct dc *dc, struct pipe_ctx *pipe_ctx) struct mpc *mpc = dc->res_pool->mpc; struct mpc_tree *mpc_tree_params = &(pipe_ctx->stream_res.opp->mpc_tree_params); - if (per_pixel_alpha) - blnd_cfg.alpha_mode = MPCC_ALPHA_BLEND_MODE_PER_PIXEL_ALPHA; - else - blnd_cfg.alpha_mode = MPCC_ALPHA_BLEND_MODE_GLOBAL_ALPHA; - blnd_cfg.overlap_only = false; blnd_cfg.global_gain = 0xff; + if (per_pixel_alpha && pipe_ctx->plane_state->global_alpha) { + blnd_cfg.alpha_mode = MPCC_ALPHA_BLEND_MODE_PER_PIXEL_ALPHA_COMBINED_GLOBAL_GAIN; + blnd_cfg.global_gain = pipe_ctx->plane_state->global_alpha_value; + } else if (per_pixel_alpha) { + blnd_cfg.alpha_mode = MPCC_ALPHA_BLEND_MODE_PER_PIXEL_ALPHA; + } else { + blnd_cfg.alpha_mode = MPCC_ALPHA_BLEND_MODE_GLOBAL_ALPHA; + } + if (pipe_ctx->plane_state->global_alpha) blnd_cfg.global_alpha = pipe_ctx->plane_state->global_alpha_value; else @@ -2979,8 +3008,11 @@ void dcn10_prepare_bandwidth( true); dcn10_stereo_hw_frame_pack_wa(dc, context); - if (dc->debug.pplib_wm_report_mode == WM_REPORT_OVERRIDE) + if (dc->debug.pplib_wm_report_mode == WM_REPORT_OVERRIDE) { + DC_FP_START(); dcn_bw_notify_pplib_of_wm_ranges(dc); + DC_FP_END(); + } if (dc->debug.sanity_checks) hws->funcs.verify_allow_pstate_change_high(dc); @@ -3013,8 +3045,11 @@ void dcn10_optimize_bandwidth( dcn10_stereo_hw_frame_pack_wa(dc, context); - if (dc->debug.pplib_wm_report_mode == WM_REPORT_OVERRIDE) + if (dc->debug.pplib_wm_report_mode == WM_REPORT_OVERRIDE) { + DC_FP_START(); dcn_bw_notify_pplib_of_wm_ranges(dc); + DC_FP_END(); + } if (dc->debug.sanity_checks) hws->funcs.verify_allow_pstate_change_high(dc); @@ -3039,12 +3074,16 @@ void dcn10_set_drr(struct pipe_ctx **pipe_ctx, * as well. */ for (i = 0; i < num_pipes; i++) { - pipe_ctx[i]->stream_res.tg->funcs->set_drr( - pipe_ctx[i]->stream_res.tg, ¶ms); - if (adjust.v_total_max != 0 && adjust.v_total_min != 0) - pipe_ctx[i]->stream_res.tg->funcs->set_static_screen_control( - pipe_ctx[i]->stream_res.tg, - event_triggers, num_frames); + if ((pipe_ctx[i]->stream_res.tg != NULL) && pipe_ctx[i]->stream_res.tg->funcs) { + if (pipe_ctx[i]->stream_res.tg->funcs->set_drr) + pipe_ctx[i]->stream_res.tg->funcs->set_drr( + pipe_ctx[i]->stream_res.tg, ¶ms); + if (adjust.v_total_max != 0 && adjust.v_total_min != 0) + if (pipe_ctx[i]->stream_res.tg->funcs->set_static_screen_control) + pipe_ctx[i]->stream_res.tg->funcs->set_static_screen_control( + pipe_ctx[i]->stream_res.tg, + event_triggers, num_frames); + } } } @@ -3175,7 +3214,8 @@ void dcn10_wait_for_mpcc_disconnect( if (pipe_ctx->stream_res.opp->mpcc_disconnect_pending[mpcc_inst]) { struct hubp *hubp = get_hubp_by_inst(res_pool, mpcc_inst); - res_pool->mpc->funcs->wait_for_idle(res_pool->mpc, mpcc_inst); + if (pipe_ctx->stream_res.tg->funcs->is_tg_enabled(pipe_ctx->stream_res.tg)) + res_pool->mpc->funcs->wait_for_idle(res_pool->mpc, mpcc_inst); pipe_ctx->stream_res.opp->mpcc_disconnect_pending[mpcc_inst] = false; hubp->funcs->set_blank(hubp, true); } diff --git a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_resource.c b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_resource.c index 4048908dd265..bca049b2f867 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_resource.c +++ b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_resource.c @@ -1141,6 +1141,20 @@ static void dcn10_destroy_resource_pool(struct resource_pool **pool) *pool = NULL; } +static bool dcn10_validate_bandwidth( + struct dc *dc, + struct dc_state *context, + bool fast_validate) +{ + bool voltage_supported; + + DC_FP_START(); + voltage_supported = dcn_validate_bandwidth(dc, context, fast_validate); + DC_FP_END(); + + return voltage_supported; +} + static enum dc_status dcn10_validate_plane(const struct dc_plane_state *plane_state, struct dc_caps *caps) { if (plane_state->format >= SURFACE_PIXEL_FORMAT_VIDEO_BEGIN @@ -1492,6 +1506,7 @@ static bool dcn10_resource_construct( && pool->base.pp_smu->rv_funcs.set_pme_wa_enable != NULL) dc->debug.az_endpoint_mute_only = false; + DC_FP_START(); if (!dc->debug.disable_pplib_clock_request) dcn_bw_update_from_pplib(dc); dcn_bw_sync_calcs_and_dml(dc); @@ -1499,6 +1514,7 @@ static bool dcn10_resource_construct( dc->res_pool = &pool->base; dcn_bw_notify_pplib_of_wm_ranges(dc); } + DC_FP_END(); { struct irq_service_init_data init_data; diff --git a/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_hwseq.c b/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_hwseq.c index 4290eaf11a04..b627c41713cc 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_hwseq.c +++ b/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_hwseq.c @@ -2344,14 +2344,18 @@ void dcn20_update_mpcc(struct dc *dc, struct pipe_ctx *pipe_ctx) struct mpc *mpc = dc->res_pool->mpc; struct mpc_tree *mpc_tree_params = &(pipe_ctx->stream_res.opp->mpc_tree_params); - if (per_pixel_alpha) - blnd_cfg.alpha_mode = MPCC_ALPHA_BLEND_MODE_PER_PIXEL_ALPHA; - else - blnd_cfg.alpha_mode = MPCC_ALPHA_BLEND_MODE_GLOBAL_ALPHA; - blnd_cfg.overlap_only = false; blnd_cfg.global_gain = 0xff; + if (per_pixel_alpha && pipe_ctx->plane_state->global_alpha) { + blnd_cfg.alpha_mode = MPCC_ALPHA_BLEND_MODE_PER_PIXEL_ALPHA_COMBINED_GLOBAL_GAIN; + blnd_cfg.global_gain = pipe_ctx->plane_state->global_alpha_value; + } else if (per_pixel_alpha) { + blnd_cfg.alpha_mode = MPCC_ALPHA_BLEND_MODE_PER_PIXEL_ALPHA; + } else { + blnd_cfg.alpha_mode = MPCC_ALPHA_BLEND_MODE_GLOBAL_ALPHA; + } + if (pipe_ctx->plane_state->global_alpha) blnd_cfg.global_alpha = pipe_ctx->plane_state->global_alpha_value; else diff --git a/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_hwseq.c b/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_hwseq.c index f61ec8763844..782b8db451b4 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_hwseq.c +++ b/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_hwseq.c @@ -535,8 +535,12 @@ void dcn30_init_hw(struct dc *dc) /* Check for enabled DIG to identify enabled display */ if (link->link_enc->funcs->is_dig_enabled && - link->link_enc->funcs->is_dig_enabled(link->link_enc)) + link->link_enc->funcs->is_dig_enabled(link->link_enc)) { link->link_status.link_active = true; + if (link->link_enc->funcs->fec_is_active && + link->link_enc->funcs->fec_is_active(link->link_enc)) + link->fec_state = dc_link_fec_enabled; + } } /* Power gate DSCs */ diff --git a/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_resource.c b/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_resource.c index e6a62cc75139..336b2ce6a636 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_resource.c +++ b/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_resource.c @@ -2602,9 +2602,9 @@ static bool dcn30_resource_construct( dc->caps.mall_size_total = dc->caps.mall_size_per_mem_channel * dc->ctx->dc_bios->vram_info.num_chans * 1048576; dc->caps.cursor_cache_size = dc->caps.max_cursor_size * dc->caps.max_cursor_size * 8; - dc->caps.max_slave_planes = 1; - dc->caps.max_slave_yuv_planes = 1; - dc->caps.max_slave_rgb_planes = 1; + dc->caps.max_slave_planes = 2; + dc->caps.max_slave_yuv_planes = 2; + dc->caps.max_slave_rgb_planes = 2; dc->caps.post_blend_color_processing = true; dc->caps.force_dp_tps4_for_cp2520 = true; dc->caps.extended_aux_timeout_support = true; diff --git a/drivers/gpu/drm/amd/display/dc/dcn302/dcn302_resource.c b/drivers/gpu/drm/amd/display/dc/dcn302/dcn302_resource.c index 88318e8ffca8..f0938653bb88 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn302/dcn302_resource.c +++ b/drivers/gpu/drm/amd/display/dc/dcn302/dcn302_resource.c @@ -1219,9 +1219,9 @@ static bool dcn302_resource_construct( /* total size = mall per channel * num channels * 1024 * 1024 */ dc->caps.mall_size_total = dc->caps.mall_size_per_mem_channel * dc->ctx->dc_bios->vram_info.num_chans * 1048576; dc->caps.cursor_cache_size = dc->caps.max_cursor_size * dc->caps.max_cursor_size * 8; - dc->caps.max_slave_planes = 1; - dc->caps.max_slave_yuv_planes = 1; - dc->caps.max_slave_rgb_planes = 1; + dc->caps.max_slave_planes = 2; + dc->caps.max_slave_yuv_planes = 2; + dc->caps.max_slave_rgb_planes = 2; dc->caps.post_blend_color_processing = true; dc->caps.force_dp_tps4_for_cp2520 = true; dc->caps.extended_aux_timeout_support = true; diff --git a/drivers/gpu/drm/amd/display/dc/dcn31/Makefile b/drivers/gpu/drm/amd/display/dc/dcn31/Makefile index d20e3b8ccc30..ec041e3cda30 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn31/Makefile +++ b/drivers/gpu/drm/amd/display/dc/dcn31/Makefile @@ -15,32 +15,6 @@ DCN31 = dcn31_resource.o dcn31_hubbub.o dcn31_hwseq.o dcn31_init.o dcn31_hubp.o dcn31_apg.o dcn31_hpo_dp_stream_encoder.o dcn31_hpo_dp_link_encoder.o \ dcn31_afmt.o dcn31_vpg.o -ifdef CONFIG_X86 -CFLAGS_$(AMDDALPATH)/dc/dcn31/dcn31_resource.o := -msse -endif - -ifdef CONFIG_PPC64 -CFLAGS_$(AMDDALPATH)/dc/dcn31/dcn31_resource.o := -mhard-float -maltivec -endif - -ifdef CONFIG_CC_IS_GCC -ifeq ($(call cc-ifversion, -lt, 0701, y), y) -IS_OLD_GCC = 1 -endif -CFLAGS_$(AMDDALPATH)/dc/dcn31/dcn31_resource.o += -mhard-float -endif - -ifdef CONFIG_X86 -ifdef IS_OLD_GCC -# Stack alignment mismatch, proceed with caution. -# GCC < 7.1 cannot compile code using `double` and -mpreferred-stack-boundary=3 -# (8B stack alignment). -CFLAGS_$(AMDDALPATH)/dc/dcn31/dcn31_resource.o += -mpreferred-stack-boundary=4 -else -CFLAGS_$(AMDDALPATH)/dc/dcn31/dcn31_resource.o += -msse2 -endif -endif - AMD_DAL_DCN31 = $(addprefix $(AMDDALPATH)/dc/dcn31/,$(DCN31)) AMD_DISPLAY_FILES += $(AMD_DAL_DCN31) diff --git a/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_hwseq.c b/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_hwseq.c index 631d8ac63aa4..531dd2c65007 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_hwseq.c +++ b/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_hwseq.c @@ -188,8 +188,12 @@ void dcn31_init_hw(struct dc *dc) /* Check for enabled DIG to identify enabled display */ if (link->link_enc->funcs->is_dig_enabled && - link->link_enc->funcs->is_dig_enabled(link->link_enc)) + link->link_enc->funcs->is_dig_enabled(link->link_enc)) { link->link_status.link_active = true; + if (link->link_enc->funcs->fec_is_active && + link->link_enc->funcs->fec_is_active(link->link_enc)) + link->fec_state = dc_link_fec_enabled; + } } /* Enables outbox notifications for usb4 dpia */ diff --git a/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_init.c b/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_init.c index d7559e5a99ce..e708f07fe75a 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_init.c +++ b/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_init.c @@ -153,9 +153,4 @@ void dcn31_hw_sequencer_construct(struct dc *dc) dc->hwss.init_hw = dcn20_fpga_init_hw; dc->hwseq->funcs.init_pipes = NULL; } - if (dc->debug.disable_z10) { - /*hw not support z10 or sw disable it*/ - dc->hwss.z10_restore = NULL; - dc->hwss.z10_save_init = NULL; - } } diff --git a/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_optc.c b/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_optc.c index e05527a3a8ba..c51f7dca94f8 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_optc.c +++ b/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_optc.c @@ -91,8 +91,7 @@ static void optc31_set_odm_combine(struct timing_generator *optc, int *opp_id, i optc1->opp_count = opp_cnt; } -/** - * Enable CRTC +/* * Enable CRTC - call ASIC Control Object to enable Timing generator. */ static bool optc31_enable_crtc(struct timing_generator *optc) @@ -214,6 +213,26 @@ void optc31_set_drr( } } +void optc3_init_odm(struct timing_generator *optc) +{ + struct optc *optc1 = DCN10TG_FROM_TG(optc); + + REG_SET_5(OPTC_DATA_SOURCE_SELECT, 0, + OPTC_NUM_OF_INPUT_SEGMENT, 0, + OPTC_SEG0_SRC_SEL, optc->inst, + OPTC_SEG1_SRC_SEL, 0xf, + OPTC_SEG2_SRC_SEL, 0xf, + OPTC_SEG3_SRC_SEL, 0xf + ); + + REG_SET(OTG_H_TIMING_CNTL, 0, + OTG_H_TIMING_DIV_MODE, 0); + + REG_SET(OPTC_MEMORY_CONFIG, 0, + OPTC_MEM_SEL, 0); + optc1->opp_count = 1; +} + static struct timing_generator_funcs dcn31_tg_funcs = { .validate_timing = optc1_validate_timing, .program_timing = optc1_program_timing, @@ -273,6 +292,7 @@ static struct timing_generator_funcs dcn31_tg_funcs = { .program_manual_trigger = optc2_program_manual_trigger, .setup_manual_trigger = optc2_setup_manual_trigger, .get_hw_timing = optc1_get_hw_timing, + .init_odm = optc3_init_odm, }; void dcn31_timing_generator_init(struct optc *optc1) diff --git a/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_optc.h b/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_optc.h index a37b16040c1d..9e881f2ce74b 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_optc.h +++ b/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_optc.h @@ -258,4 +258,6 @@ void dcn31_timing_generator_init(struct optc *optc1); void optc31_set_drr(struct timing_generator *optc, const struct drr_params *params); +void optc3_init_odm(struct timing_generator *optc); + #endif /* __DC_OPTC_DCN31_H__ */ diff --git a/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_resource.c b/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_resource.c index 63934ecf6be8..5b3f0c2dfb55 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_resource.c +++ b/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_resource.c @@ -65,6 +65,7 @@ #include "virtual/virtual_stream_encoder.h" #include "dce110/dce110_resource.h" #include "dml/display_mode_vba.h" +#include "dml/dcn31/dcn31_fpu.h" #include "dcn31/dcn31_dccg.h" #include "dcn10/dcn10_resource.h" #include "dcn31_panel_cntl.h" @@ -102,152 +103,6 @@ #define DC_LOGGER_INIT(logger) -#define DCN3_1_DEFAULT_DET_SIZE 384 - -struct _vcs_dpi_ip_params_st dcn3_1_ip = { - .gpuvm_enable = 1, - .gpuvm_max_page_table_levels = 1, - .hostvm_enable = 1, - .hostvm_max_page_table_levels = 2, - .rob_buffer_size_kbytes = 64, - .det_buffer_size_kbytes = DCN3_1_DEFAULT_DET_SIZE, - .config_return_buffer_size_in_kbytes = 1792, - .compressed_buffer_segment_size_in_kbytes = 64, - .meta_fifo_size_in_kentries = 32, - .zero_size_buffer_entries = 512, - .compbuf_reserved_space_64b = 256, - .compbuf_reserved_space_zs = 64, - .dpp_output_buffer_pixels = 2560, - .opp_output_buffer_lines = 1, - .pixel_chunk_size_kbytes = 8, - .meta_chunk_size_kbytes = 2, - .min_meta_chunk_size_bytes = 256, - .writeback_chunk_size_kbytes = 8, - .ptoi_supported = false, - .num_dsc = 3, - .maximum_dsc_bits_per_component = 10, - .dsc422_native_support = false, - .is_line_buffer_bpp_fixed = true, - .line_buffer_fixed_bpp = 48, - .line_buffer_size_bits = 789504, - .max_line_buffer_lines = 12, - .writeback_interface_buffer_size_kbytes = 90, - .max_num_dpp = 4, - .max_num_otg = 4, - .max_num_hdmi_frl_outputs = 1, - .max_num_wb = 1, - .max_dchub_pscl_bw_pix_per_clk = 4, - .max_pscl_lb_bw_pix_per_clk = 2, - .max_lb_vscl_bw_pix_per_clk = 4, - .max_vscl_hscl_bw_pix_per_clk = 4, - .max_hscl_ratio = 6, - .max_vscl_ratio = 6, - .max_hscl_taps = 8, - .max_vscl_taps = 8, - .dpte_buffer_size_in_pte_reqs_luma = 64, - .dpte_buffer_size_in_pte_reqs_chroma = 34, - .dispclk_ramp_margin_percent = 1, - .max_inter_dcn_tile_repeaters = 8, - .cursor_buffer_size = 16, - .cursor_chunk_size = 2, - .writeback_line_buffer_buffer_size = 0, - .writeback_min_hscl_ratio = 1, - .writeback_min_vscl_ratio = 1, - .writeback_max_hscl_ratio = 1, - .writeback_max_vscl_ratio = 1, - .writeback_max_hscl_taps = 1, - .writeback_max_vscl_taps = 1, - .dppclk_delay_subtotal = 46, - .dppclk_delay_scl = 50, - .dppclk_delay_scl_lb_only = 16, - .dppclk_delay_cnvc_formatter = 27, - .dppclk_delay_cnvc_cursor = 6, - .dispclk_delay_subtotal = 119, - .dynamic_metadata_vm_enabled = false, - .odm_combine_4to1_supported = false, - .dcc_supported = true, -}; - -struct _vcs_dpi_soc_bounding_box_st dcn3_1_soc = { - /*TODO: correct dispclk/dppclk voltage level determination*/ - .clock_limits = { - { - .state = 0, - .dispclk_mhz = 1200.0, - .dppclk_mhz = 1200.0, - .phyclk_mhz = 600.0, - .phyclk_d18_mhz = 667.0, - .dscclk_mhz = 186.0, - .dtbclk_mhz = 625.0, - }, - { - .state = 1, - .dispclk_mhz = 1200.0, - .dppclk_mhz = 1200.0, - .phyclk_mhz = 810.0, - .phyclk_d18_mhz = 667.0, - .dscclk_mhz = 209.0, - .dtbclk_mhz = 625.0, - }, - { - .state = 2, - .dispclk_mhz = 1200.0, - .dppclk_mhz = 1200.0, - .phyclk_mhz = 810.0, - .phyclk_d18_mhz = 667.0, - .dscclk_mhz = 209.0, - .dtbclk_mhz = 625.0, - }, - { - .state = 3, - .dispclk_mhz = 1200.0, - .dppclk_mhz = 1200.0, - .phyclk_mhz = 810.0, - .phyclk_d18_mhz = 667.0, - .dscclk_mhz = 371.0, - .dtbclk_mhz = 625.0, - }, - { - .state = 4, - .dispclk_mhz = 1200.0, - .dppclk_mhz = 1200.0, - .phyclk_mhz = 810.0, - .phyclk_d18_mhz = 667.0, - .dscclk_mhz = 417.0, - .dtbclk_mhz = 625.0, - }, - }, - .num_states = 5, - .sr_exit_time_us = 9.0, - .sr_enter_plus_exit_time_us = 11.0, - .sr_exit_z8_time_us = 442.0, - .sr_enter_plus_exit_z8_time_us = 560.0, - .writeback_latency_us = 12.0, - .dram_channel_width_bytes = 4, - .round_trip_ping_latency_dcfclk_cycles = 106, - .urgent_latency_pixel_data_only_us = 4.0, - .urgent_latency_pixel_mixed_with_vm_data_us = 4.0, - .urgent_latency_vm_data_only_us = 4.0, - .urgent_out_of_order_return_per_channel_pixel_only_bytes = 4096, - .urgent_out_of_order_return_per_channel_pixel_and_vm_bytes = 4096, - .urgent_out_of_order_return_per_channel_vm_only_bytes = 4096, - .pct_ideal_sdp_bw_after_urgent = 80.0, - .pct_ideal_dram_sdp_bw_after_urgent_pixel_only = 65.0, - .pct_ideal_dram_sdp_bw_after_urgent_pixel_and_vm = 60.0, - .pct_ideal_dram_sdp_bw_after_urgent_vm_only = 30.0, - .max_avg_sdp_bw_use_normal_percent = 60.0, - .max_avg_dram_bw_use_normal_percent = 60.0, - .fabric_datapath_to_dcn_data_return_bytes = 32, - .return_bus_width_bytes = 64, - .downspread_percent = 0.38, - .dcn_downspread_percent = 0.5, - .gpuvm_min_page_size_bytes = 4096, - .hostvm_min_page_size_bytes = 4096, - .do_urgent_latency_adjustment = false, - .urgent_latency_adjustment_fabric_clock_component_us = 0, - .urgent_latency_adjustment_fabric_clock_reference_mhz = 0, -}; - enum dcn31_clk_src_array_id { DCN31_CLK_SRC_PLL0, DCN31_CLK_SRC_PLL1, @@ -1030,6 +885,7 @@ static const struct dc_debug_options debug_defaults_drv = { .afmt = true, } }, + .disable_z10 = true, .optimize_edp_link_rate = true, .enable_sw_cntl_psr = true, .apply_vendor_specific_lttpr_wa = true, @@ -1812,7 +1668,6 @@ int dcn31_populate_dml_pipes_from_context( pipes[pipe_cnt].pipe.src.immediate_flip = true; pipes[pipe_cnt].pipe.src.unbounded_req_mode = false; - pipes[pipe_cnt].pipe.src.hostvm = dc->res_pool->hubbub->riommu_active; pipes[pipe_cnt].pipe.src.gpuvm = true; pipes[pipe_cnt].pipe.src.dcc_fraction_of_zs_req_luma = 0; pipes[pipe_cnt].pipe.src.dcc_fraction_of_zs_req_chroma = 0; @@ -1869,143 +1724,6 @@ void dcn31_update_soc_for_wm_a(struct dc *dc, struct dc_state *context) } } -static void dcn31_calculate_wm_and_dlg_fp( - struct dc *dc, struct dc_state *context, - display_e2e_pipe_params_st *pipes, - int pipe_cnt, - int vlevel) -{ - int i, pipe_idx; - double dcfclk = context->bw_ctx.dml.vba.DCFCLKState[vlevel][context->bw_ctx.dml.vba.maxMpcComb]; - - if (context->bw_ctx.dml.soc.min_dcfclk > dcfclk) - dcfclk = context->bw_ctx.dml.soc.min_dcfclk; - - /* We don't recalculate clocks for 0 pipe configs, which can block - * S0i3 as high clocks will block low power states - * Override any clocks that can block S0i3 to min here - */ - if (pipe_cnt == 0) { - context->bw_ctx.bw.dcn.clk.dcfclk_khz = dcfclk; // always should be vlevel 0 - return; - } - - pipes[0].clks_cfg.voltage = vlevel; - pipes[0].clks_cfg.dcfclk_mhz = dcfclk; - pipes[0].clks_cfg.socclk_mhz = context->bw_ctx.dml.soc.clock_limits[vlevel].socclk_mhz; - -#if 0 // TODO - /* Set B: - * TODO - */ - if (dc->clk_mgr->bw_params->wm_table.nv_entries[WM_B].valid) { - if (vlevel == 0) { - pipes[0].clks_cfg.voltage = 1; - pipes[0].clks_cfg.dcfclk_mhz = context->bw_ctx.dml.soc.clock_limits[0].dcfclk_mhz; - } - context->bw_ctx.dml.soc.dram_clock_change_latency_us = dc->clk_mgr->bw_params->wm_table.nv_entries[WM_B].dml_input.pstate_latency_us; - context->bw_ctx.dml.soc.sr_enter_plus_exit_time_us = dc->clk_mgr->bw_params->wm_table.nv_entries[WM_B].dml_input.sr_enter_plus_exit_time_us; - context->bw_ctx.dml.soc.sr_exit_time_us = dc->clk_mgr->bw_params->wm_table.nv_entries[WM_B].dml_input.sr_exit_time_us; - } - context->bw_ctx.bw.dcn.watermarks.b.urgent_ns = get_wm_urgent(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000; - context->bw_ctx.bw.dcn.watermarks.b.cstate_pstate.cstate_enter_plus_exit_ns = get_wm_stutter_enter_exit(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000; - context->bw_ctx.bw.dcn.watermarks.b.cstate_pstate.cstate_exit_ns = get_wm_stutter_exit(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000; - context->bw_ctx.bw.dcn.watermarks.b.cstate_pstate.cstate_enter_plus_exit_z8_ns = get_wm_z8_stutter_enter_exit(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000; - context->bw_ctx.bw.dcn.watermarks.b.cstate_pstate.cstate_exit_z8_ns = get_wm_z8_stutter_exit(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000; - context->bw_ctx.bw.dcn.watermarks.b.cstate_pstate.pstate_change_ns = get_wm_dram_clock_change(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000; - context->bw_ctx.bw.dcn.watermarks.b.pte_meta_urgent_ns = get_wm_memory_trip(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000; - context->bw_ctx.bw.dcn.watermarks.b.frac_urg_bw_nom = get_fraction_of_urgent_bandwidth(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000; - context->bw_ctx.bw.dcn.watermarks.b.frac_urg_bw_flip = get_fraction_of_urgent_bandwidth_imm_flip(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000; - context->bw_ctx.bw.dcn.watermarks.b.urgent_latency_ns = get_urgent_latency(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000; - - pipes[0].clks_cfg.voltage = vlevel; - pipes[0].clks_cfg.dcfclk_mhz = dcfclk; - - /* Set C: - * TODO - */ - if (dc->clk_mgr->bw_params->wm_table.nv_entries[WM_C].valid) { - context->bw_ctx.dml.soc.dram_clock_change_latency_us = dc->clk_mgr->bw_params->wm_table.nv_entries[WM_C].dml_input.pstate_latency_us; - context->bw_ctx.dml.soc.sr_enter_plus_exit_time_us = dc->clk_mgr->bw_params->wm_table.nv_entries[WM_C].dml_input.sr_enter_plus_exit_time_us; - context->bw_ctx.dml.soc.sr_exit_time_us = dc->clk_mgr->bw_params->wm_table.nv_entries[WM_C].dml_input.sr_exit_time_us; - } - context->bw_ctx.bw.dcn.watermarks.c.urgent_ns = get_wm_urgent(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000; - context->bw_ctx.bw.dcn.watermarks.c.cstate_pstate.cstate_enter_plus_exit_ns = get_wm_stutter_enter_exit(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000; - context->bw_ctx.bw.dcn.watermarks.c.cstate_pstate.cstate_exit_ns = get_wm_stutter_exit(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000; - context->bw_ctx.bw.dcn.watermarks.c.cstate_pstate.cstate_enter_plus_exit_z8_ns = get_wm_z8_stutter_enter_exit(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000; - context->bw_ctx.bw.dcn.watermarks.c.cstate_pstate.cstate_exit_z8_ns = get_wm_z8_stutter_exit(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000; - context->bw_ctx.bw.dcn.watermarks.c.cstate_pstate.pstate_change_ns = get_wm_dram_clock_change(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000; - context->bw_ctx.bw.dcn.watermarks.c.pte_meta_urgent_ns = get_wm_memory_trip(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000; - context->bw_ctx.bw.dcn.watermarks.c.frac_urg_bw_nom = get_fraction_of_urgent_bandwidth(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000; - context->bw_ctx.bw.dcn.watermarks.c.frac_urg_bw_flip = get_fraction_of_urgent_bandwidth_imm_flip(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000; - context->bw_ctx.bw.dcn.watermarks.c.urgent_latency_ns = get_urgent_latency(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000; - - /* Set D: - * TODO - */ - if (dc->clk_mgr->bw_params->wm_table.nv_entries[WM_D].valid) { - context->bw_ctx.dml.soc.dram_clock_change_latency_us = dc->clk_mgr->bw_params->wm_table.nv_entries[WM_D].dml_input.pstate_latency_us; - context->bw_ctx.dml.soc.sr_enter_plus_exit_time_us = dc->clk_mgr->bw_params->wm_table.nv_entries[WM_D].dml_input.sr_enter_plus_exit_time_us; - context->bw_ctx.dml.soc.sr_exit_time_us = dc->clk_mgr->bw_params->wm_table.nv_entries[WM_D].dml_input.sr_exit_time_us; - } - context->bw_ctx.bw.dcn.watermarks.d.urgent_ns = get_wm_urgent(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000; - context->bw_ctx.bw.dcn.watermarks.d.cstate_pstate.cstate_enter_plus_exit_ns = get_wm_stutter_enter_exit(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000; - context->bw_ctx.bw.dcn.watermarks.d.cstate_pstate.cstate_exit_ns = get_wm_stutter_exit(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000; - context->bw_ctx.bw.dcn.watermarks.d.cstate_pstate.pstate_change_ns = get_wm_dram_clock_change(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000; - context->bw_ctx.bw.dcn.watermarks.d.cstate_pstate.cstate_enter_plus_exit_z8_ns = get_wm_z8_stutter_enter_exit(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000; - context->bw_ctx.bw.dcn.watermarks.d.cstate_pstate.cstate_exit_z8_ns = get_wm_z8_stutter_exit(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000; - context->bw_ctx.bw.dcn.watermarks.d.pte_meta_urgent_ns = get_wm_memory_trip(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000; - context->bw_ctx.bw.dcn.watermarks.d.frac_urg_bw_nom = get_fraction_of_urgent_bandwidth(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000; - context->bw_ctx.bw.dcn.watermarks.d.frac_urg_bw_flip = get_fraction_of_urgent_bandwidth_imm_flip(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000; - context->bw_ctx.bw.dcn.watermarks.d.urgent_latency_ns = get_urgent_latency(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000; -#endif - - /* Set A: - * All clocks min required - * - * Set A calculated last so that following calculations are based on Set A - */ - dc->res_pool->funcs->update_soc_for_wm_a(dc, context); - context->bw_ctx.bw.dcn.watermarks.a.urgent_ns = get_wm_urgent(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000; - context->bw_ctx.bw.dcn.watermarks.a.cstate_pstate.cstate_enter_plus_exit_ns = get_wm_stutter_enter_exit(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000; - context->bw_ctx.bw.dcn.watermarks.a.cstate_pstate.cstate_exit_ns = get_wm_stutter_exit(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000; - context->bw_ctx.bw.dcn.watermarks.a.cstate_pstate.pstate_change_ns = get_wm_dram_clock_change(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000; - context->bw_ctx.bw.dcn.watermarks.a.cstate_pstate.cstate_enter_plus_exit_z8_ns = get_wm_z8_stutter_enter_exit(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000; - context->bw_ctx.bw.dcn.watermarks.a.cstate_pstate.cstate_exit_z8_ns = get_wm_z8_stutter_exit(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000; - context->bw_ctx.bw.dcn.watermarks.a.pte_meta_urgent_ns = get_wm_memory_trip(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000; - context->bw_ctx.bw.dcn.watermarks.a.frac_urg_bw_nom = get_fraction_of_urgent_bandwidth(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000; - context->bw_ctx.bw.dcn.watermarks.a.frac_urg_bw_flip = get_fraction_of_urgent_bandwidth_imm_flip(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000; - context->bw_ctx.bw.dcn.watermarks.a.urgent_latency_ns = get_urgent_latency(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000; - /* TODO: remove: */ - context->bw_ctx.bw.dcn.watermarks.b = context->bw_ctx.bw.dcn.watermarks.a; - context->bw_ctx.bw.dcn.watermarks.c = context->bw_ctx.bw.dcn.watermarks.a; - context->bw_ctx.bw.dcn.watermarks.d = context->bw_ctx.bw.dcn.watermarks.a; - /* end remove*/ - - for (i = 0, pipe_idx = 0; i < dc->res_pool->pipe_count; i++) { - if (!context->res_ctx.pipe_ctx[i].stream) - continue; - - pipes[pipe_idx].clks_cfg.dispclk_mhz = get_dispclk_calculated(&context->bw_ctx.dml, pipes, pipe_cnt); - pipes[pipe_idx].clks_cfg.dppclk_mhz = get_dppclk_calculated(&context->bw_ctx.dml, pipes, pipe_cnt, pipe_idx); - - if (dc->config.forced_clocks || dc->debug.max_disp_clk) { - pipes[pipe_idx].clks_cfg.dispclk_mhz = context->bw_ctx.dml.soc.clock_limits[0].dispclk_mhz; - pipes[pipe_idx].clks_cfg.dppclk_mhz = context->bw_ctx.dml.soc.clock_limits[0].dppclk_mhz; - } - if (dc->debug.min_disp_clk_khz > pipes[pipe_idx].clks_cfg.dispclk_mhz * 1000) - pipes[pipe_idx].clks_cfg.dispclk_mhz = dc->debug.min_disp_clk_khz / 1000.0; - if (dc->debug.min_dpp_clk_khz > pipes[pipe_idx].clks_cfg.dppclk_mhz * 1000) - pipes[pipe_idx].clks_cfg.dppclk_mhz = dc->debug.min_dpp_clk_khz / 1000.0; - - pipe_idx++; - } - - DC_FP_START(); - dcn20_calculate_dlg_params(dc, context, pipes, pipe_cnt, vlevel); - DC_FP_END(); -} - void dcn31_calculate_wm_and_dlg( struct dc *dc, struct dc_state *context, display_e2e_pipe_params_st *pipes, @@ -2075,77 +1793,6 @@ static struct dc_cap_funcs cap_funcs = { .get_dcc_compression_cap = dcn20_get_dcc_compression_cap }; -void dcn31_update_bw_bounding_box(struct dc *dc, struct clk_bw_params *bw_params) -{ - struct clk_limit_table *clk_table = &bw_params->clk_table; - struct _vcs_dpi_voltage_scaling_st clock_limits[DC__VOLTAGE_STATES]; - unsigned int i, closest_clk_lvl; - int j; - - // Default clock levels are used for diags, which may lead to overclocking. - if (!IS_DIAG_DC(dc->ctx->dce_environment)) { - int max_dispclk_mhz = 0, max_dppclk_mhz = 0; - - dcn3_1_ip.max_num_otg = dc->res_pool->res_cap->num_timing_generator; - dcn3_1_ip.max_num_dpp = dc->res_pool->pipe_count; - dcn3_1_soc.num_chans = bw_params->num_channels; - - ASSERT(clk_table->num_entries); - - /* Prepass to find max clocks independent of voltage level. */ - for (i = 0; i < clk_table->num_entries; ++i) { - if (clk_table->entries[i].dispclk_mhz > max_dispclk_mhz) - max_dispclk_mhz = clk_table->entries[i].dispclk_mhz; - if (clk_table->entries[i].dppclk_mhz > max_dppclk_mhz) - max_dppclk_mhz = clk_table->entries[i].dppclk_mhz; - } - - for (i = 0; i < clk_table->num_entries; i++) { - /* loop backwards*/ - for (closest_clk_lvl = 0, j = dcn3_1_soc.num_states - 1; j >= 0; j--) { - if ((unsigned int) dcn3_1_soc.clock_limits[j].dcfclk_mhz <= clk_table->entries[i].dcfclk_mhz) { - closest_clk_lvl = j; - break; - } - } - - clock_limits[i].state = i; - - /* Clocks dependent on voltage level. */ - clock_limits[i].dcfclk_mhz = clk_table->entries[i].dcfclk_mhz; - clock_limits[i].fabricclk_mhz = clk_table->entries[i].fclk_mhz; - clock_limits[i].socclk_mhz = clk_table->entries[i].socclk_mhz; - clock_limits[i].dram_speed_mts = clk_table->entries[i].memclk_mhz * 2 * clk_table->entries[i].wck_ratio; - - /* Clocks independent of voltage level. */ - clock_limits[i].dispclk_mhz = max_dispclk_mhz ? max_dispclk_mhz : - dcn3_1_soc.clock_limits[closest_clk_lvl].dispclk_mhz; - - clock_limits[i].dppclk_mhz = max_dppclk_mhz ? max_dppclk_mhz : - dcn3_1_soc.clock_limits[closest_clk_lvl].dppclk_mhz; - - clock_limits[i].dram_bw_per_chan_gbps = dcn3_1_soc.clock_limits[closest_clk_lvl].dram_bw_per_chan_gbps; - clock_limits[i].dscclk_mhz = dcn3_1_soc.clock_limits[closest_clk_lvl].dscclk_mhz; - clock_limits[i].dtbclk_mhz = dcn3_1_soc.clock_limits[closest_clk_lvl].dtbclk_mhz; - clock_limits[i].phyclk_d18_mhz = dcn3_1_soc.clock_limits[closest_clk_lvl].phyclk_d18_mhz; - clock_limits[i].phyclk_mhz = dcn3_1_soc.clock_limits[closest_clk_lvl].phyclk_mhz; - } - for (i = 0; i < clk_table->num_entries; i++) - dcn3_1_soc.clock_limits[i] = clock_limits[i]; - if (clk_table->num_entries) { - dcn3_1_soc.num_states = clk_table->num_entries; - } - } - - dcn3_1_soc.dispclk_dppclk_vco_speed_mhz = dc->clk_mgr->dentist_vco_freq_khz / 1000.0; - dc->dml.soc.dispclk_dppclk_vco_speed_mhz = dc->clk_mgr->dentist_vco_freq_khz / 1000.0; - - if (!IS_FPGA_MAXIMUS_DC(dc->ctx->dce_environment)) - dml_init_instance(&dc->dml, &dcn3_1_soc, &dcn3_1_ip, DML_PROJECT_DCN31); - else - dml_init_instance(&dc->dml, &dcn3_1_soc, &dcn3_1_ip, DML_PROJECT_DCN31_FPGA); -} - static struct resource_funcs dcn31_res_pool_funcs = { .destroy = dcn31_destroy_resource_pool, .link_enc_create = dcn31_link_encoder_create, @@ -2223,9 +1870,9 @@ static bool dcn31_resource_construct( dc->caps.min_horizontal_blanking_period = 80; dc->caps.dmdata_alloc_size = 2048; - dc->caps.max_slave_planes = 1; - dc->caps.max_slave_yuv_planes = 1; - dc->caps.max_slave_rgb_planes = 1; + dc->caps.max_slave_planes = 2; + dc->caps.max_slave_yuv_planes = 2; + dc->caps.max_slave_rgb_planes = 2; dc->caps.post_blend_color_processing = true; dc->caps.force_dp_tps4_for_cp2520 = true; dc->caps.dp_hpo = true; diff --git a/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_resource.h b/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_resource.h index 4b7ab21ea15b..1ce6509c1ed1 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_resource.h +++ b/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_resource.h @@ -31,6 +31,9 @@ #define TO_DCN31_RES_POOL(pool)\ container_of(pool, struct dcn31_resource_pool, base) +extern struct _vcs_dpi_ip_params_st dcn3_1_ip; +extern struct _vcs_dpi_soc_bounding_box_st dcn3_1_soc; + struct dcn31_resource_pool { struct resource_pool base; }; @@ -47,7 +50,6 @@ int dcn31_populate_dml_pipes_from_context( struct dc *dc, struct dc_state *context, display_e2e_pipe_params_st *pipes, bool fast_validate); -void dcn31_update_bw_bounding_box(struct dc *dc, struct clk_bw_params *bw_params); void dcn31_update_soc_for_wm_a(struct dc *dc, struct dc_state *context); struct resource_pool *dcn31_create_resource_pool( diff --git a/drivers/gpu/drm/amd/display/dc/dcn315/Makefile b/drivers/gpu/drm/amd/display/dc/dcn315/Makefile index c831ad46e81c..59381d24800b 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn315/Makefile +++ b/drivers/gpu/drm/amd/display/dc/dcn315/Makefile @@ -25,32 +25,6 @@ DCN315 = dcn315_resource.o -ifdef CONFIG_X86 -CFLAGS_$(AMDDALPATH)/dc/dcn315/dcn315_resource.o := -msse -endif - -ifdef CONFIG_PPC64 -CFLAGS_$(AMDDALPATH)/dc/dcn315/dcn315_resource.o := -mhard-float -maltivec -endif - -ifdef CONFIG_CC_IS_GCC -ifeq ($(call cc-ifversion, -lt, 0701, y), y) -IS_OLD_GCC = 1 -endif -CFLAGS_$(AMDDALPATH)/dc/dcn315/dcn315_resource.o += -mhard-float -endif - -ifdef CONFIG_X86 -ifdef IS_OLD_GCC -# Stack alignment mismatch, proceed with caution. -# GCC < 7.1 cannot compile code using `double` and -mpreferred-stack-boundary=3 -# (8B stack alignment). -CFLAGS_$(AMDDALPATH)/dc/dcn315/dcn315_resource.o += -mpreferred-stack-boundary=4 -else -CFLAGS_$(AMDDALPATH)/dc/dcn315/dcn315_resource.o += -msse2 -endif -endif - AMD_DAL_DCN315 = $(addprefix $(AMDDALPATH)/dc/dcn315/,$(DCN315)) AMD_DISPLAY_FILES += $(AMD_DAL_DCN315) diff --git a/drivers/gpu/drm/amd/display/dc/dcn315/dcn315_resource.c b/drivers/gpu/drm/amd/display/dc/dcn315/dcn315_resource.c index 06adb77c206b..e6f9312e3a48 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn315/dcn315_resource.c +++ b/drivers/gpu/drm/amd/display/dc/dcn315/dcn315_resource.c @@ -66,6 +66,7 @@ #include "virtual/virtual_stream_encoder.h" #include "dce110/dce110_resource.h" #include "dml/display_mode_vba.h" +#include "dml/dcn31/dcn31_fpu.h" #include "dcn31/dcn31_dccg.h" #include "dcn10/dcn10_resource.h" #include "dcn31/dcn31_panel_cntl.h" @@ -133,158 +134,9 @@ #include "link_enc_cfg.h" -#define DC_LOGGER_INIT(logger) - -#define DCN3_15_DEFAULT_DET_SIZE 192 #define DCN3_15_MAX_DET_SIZE 384 -#define DCN3_15_MIN_COMPBUF_SIZE_KB 128 #define DCN3_15_CRB_SEGMENT_SIZE_KB 64 -struct _vcs_dpi_ip_params_st dcn3_15_ip = { - .gpuvm_enable = 1, - .gpuvm_max_page_table_levels = 1, - .hostvm_enable = 1, - .hostvm_max_page_table_levels = 2, - .rob_buffer_size_kbytes = 64, - .det_buffer_size_kbytes = DCN3_15_DEFAULT_DET_SIZE, - .min_comp_buffer_size_kbytes = DCN3_15_MIN_COMPBUF_SIZE_KB, - .config_return_buffer_size_in_kbytes = 1024, - .compressed_buffer_segment_size_in_kbytes = 64, - .meta_fifo_size_in_kentries = 32, - .zero_size_buffer_entries = 512, - .compbuf_reserved_space_64b = 256, - .compbuf_reserved_space_zs = 64, - .dpp_output_buffer_pixels = 2560, - .opp_output_buffer_lines = 1, - .pixel_chunk_size_kbytes = 8, - .meta_chunk_size_kbytes = 2, - .min_meta_chunk_size_bytes = 256, - .writeback_chunk_size_kbytes = 8, - .ptoi_supported = false, - .num_dsc = 3, - .maximum_dsc_bits_per_component = 10, - .dsc422_native_support = false, - .is_line_buffer_bpp_fixed = true, - .line_buffer_fixed_bpp = 49, - .line_buffer_size_bits = 789504, - .max_line_buffer_lines = 12, - .writeback_interface_buffer_size_kbytes = 90, - .max_num_dpp = 4, - .max_num_otg = 4, - .max_num_hdmi_frl_outputs = 1, - .max_num_wb = 1, - .max_dchub_pscl_bw_pix_per_clk = 4, - .max_pscl_lb_bw_pix_per_clk = 2, - .max_lb_vscl_bw_pix_per_clk = 4, - .max_vscl_hscl_bw_pix_per_clk = 4, - .max_hscl_ratio = 6, - .max_vscl_ratio = 6, - .max_hscl_taps = 8, - .max_vscl_taps = 8, - .dpte_buffer_size_in_pte_reqs_luma = 64, - .dpte_buffer_size_in_pte_reqs_chroma = 34, - .dispclk_ramp_margin_percent = 1, - .max_inter_dcn_tile_repeaters = 9, - .cursor_buffer_size = 16, - .cursor_chunk_size = 2, - .writeback_line_buffer_buffer_size = 0, - .writeback_min_hscl_ratio = 1, - .writeback_min_vscl_ratio = 1, - .writeback_max_hscl_ratio = 1, - .writeback_max_vscl_ratio = 1, - .writeback_max_hscl_taps = 1, - .writeback_max_vscl_taps = 1, - .dppclk_delay_subtotal = 46, - .dppclk_delay_scl = 50, - .dppclk_delay_scl_lb_only = 16, - .dppclk_delay_cnvc_formatter = 27, - .dppclk_delay_cnvc_cursor = 6, - .dispclk_delay_subtotal = 119, - .dynamic_metadata_vm_enabled = false, - .odm_combine_4to1_supported = false, - .dcc_supported = true, -}; - -struct _vcs_dpi_soc_bounding_box_st dcn3_15_soc = { - /*TODO: correct dispclk/dppclk voltage level determination*/ - .clock_limits = { - { - .state = 0, - .dispclk_mhz = 1372.0, - .dppclk_mhz = 1372.0, - .phyclk_mhz = 810.0, - .phyclk_d18_mhz = 667.0, - .dscclk_mhz = 417.0, - .dtbclk_mhz = 600.0, - }, - { - .state = 1, - .dispclk_mhz = 1372.0, - .dppclk_mhz = 1372.0, - .phyclk_mhz = 810.0, - .phyclk_d18_mhz = 667.0, - .dscclk_mhz = 417.0, - .dtbclk_mhz = 600.0, - }, - { - .state = 2, - .dispclk_mhz = 1372.0, - .dppclk_mhz = 1372.0, - .phyclk_mhz = 810.0, - .phyclk_d18_mhz = 667.0, - .dscclk_mhz = 417.0, - .dtbclk_mhz = 600.0, - }, - { - .state = 3, - .dispclk_mhz = 1372.0, - .dppclk_mhz = 1372.0, - .phyclk_mhz = 810.0, - .phyclk_d18_mhz = 667.0, - .dscclk_mhz = 417.0, - .dtbclk_mhz = 600.0, - }, - { - .state = 4, - .dispclk_mhz = 1372.0, - .dppclk_mhz = 1372.0, - .phyclk_mhz = 810.0, - .phyclk_d18_mhz = 667.0, - .dscclk_mhz = 417.0, - .dtbclk_mhz = 600.0, - }, - }, - .num_states = 5, - .sr_exit_time_us = 9.0, - .sr_enter_plus_exit_time_us = 11.0, - .sr_exit_z8_time_us = 50.0, - .sr_enter_plus_exit_z8_time_us = 50.0, - .writeback_latency_us = 12.0, - .dram_channel_width_bytes = 4, - .round_trip_ping_latency_dcfclk_cycles = 106, - .urgent_latency_pixel_data_only_us = 4.0, - .urgent_latency_pixel_mixed_with_vm_data_us = 4.0, - .urgent_latency_vm_data_only_us = 4.0, - .urgent_out_of_order_return_per_channel_pixel_only_bytes = 4096, - .urgent_out_of_order_return_per_channel_pixel_and_vm_bytes = 4096, - .urgent_out_of_order_return_per_channel_vm_only_bytes = 4096, - .pct_ideal_sdp_bw_after_urgent = 80.0, - .pct_ideal_dram_sdp_bw_after_urgent_pixel_only = 65.0, - .pct_ideal_dram_sdp_bw_after_urgent_pixel_and_vm = 60.0, - .pct_ideal_dram_sdp_bw_after_urgent_vm_only = 30.0, - .max_avg_sdp_bw_use_normal_percent = 60.0, - .max_avg_dram_bw_use_normal_percent = 60.0, - .fabric_datapath_to_dcn_data_return_bytes = 32, - .return_bus_width_bytes = 64, - .downspread_percent = 0.38, - .dcn_downspread_percent = 0.38, - .gpuvm_min_page_size_bytes = 4096, - .hostvm_min_page_size_bytes = 4096, - .do_urgent_latency_adjustment = false, - .urgent_latency_adjustment_fabric_clock_component_us = 0, - .urgent_latency_adjustment_fabric_clock_reference_mhz = 0, -}; - enum dcn31_clk_src_array_id { DCN31_CLK_SRC_PLL0, DCN31_CLK_SRC_PLL1, @@ -1859,88 +1711,6 @@ static struct dc_cap_funcs cap_funcs = { .get_dcc_compression_cap = dcn20_get_dcc_compression_cap }; -static void dcn315_update_bw_bounding_box(struct dc *dc, struct clk_bw_params *bw_params) -{ - struct clk_limit_table *clk_table = &bw_params->clk_table; - struct _vcs_dpi_voltage_scaling_st clock_limits[DC__VOLTAGE_STATES]; - unsigned int i, closest_clk_lvl; - int max_dispclk_mhz = 0, max_dppclk_mhz = 0; - int j; - - // Default clock levels are used for diags, which may lead to overclocking. - if (!IS_DIAG_DC(dc->ctx->dce_environment)) { - - dcn3_15_ip.max_num_otg = dc->res_pool->res_cap->num_timing_generator; - dcn3_15_ip.max_num_dpp = dc->res_pool->pipe_count; - dcn3_15_soc.num_chans = bw_params->num_channels; - - ASSERT(clk_table->num_entries); - - /* Prepass to find max clocks independent of voltage level. */ - for (i = 0; i < clk_table->num_entries; ++i) { - if (clk_table->entries[i].dispclk_mhz > max_dispclk_mhz) - max_dispclk_mhz = clk_table->entries[i].dispclk_mhz; - if (clk_table->entries[i].dppclk_mhz > max_dppclk_mhz) - max_dppclk_mhz = clk_table->entries[i].dppclk_mhz; - } - - for (i = 0; i < clk_table->num_entries; i++) { - /* loop backwards*/ - for (closest_clk_lvl = 0, j = dcn3_15_soc.num_states - 1; j >= 0; j--) { - if ((unsigned int) dcn3_15_soc.clock_limits[j].dcfclk_mhz <= clk_table->entries[i].dcfclk_mhz) { - closest_clk_lvl = j; - break; - } - } - if (clk_table->num_entries == 1) { - /*smu gives one DPM level, let's take the highest one*/ - closest_clk_lvl = dcn3_15_soc.num_states - 1; - } - - clock_limits[i].state = i; - - /* Clocks dependent on voltage level. */ - clock_limits[i].dcfclk_mhz = clk_table->entries[i].dcfclk_mhz; - if (clk_table->num_entries == 1 && - clock_limits[i].dcfclk_mhz < dcn3_15_soc.clock_limits[closest_clk_lvl].dcfclk_mhz) { - /*SMU fix not released yet*/ - clock_limits[i].dcfclk_mhz = dcn3_15_soc.clock_limits[closest_clk_lvl].dcfclk_mhz; - } - clock_limits[i].fabricclk_mhz = clk_table->entries[i].fclk_mhz; - clock_limits[i].socclk_mhz = clk_table->entries[i].socclk_mhz; - clock_limits[i].dram_speed_mts = clk_table->entries[i].memclk_mhz * 2 * clk_table->entries[i].wck_ratio; - - /* Clocks independent of voltage level. */ - clock_limits[i].dispclk_mhz = max_dispclk_mhz ? max_dispclk_mhz : - dcn3_15_soc.clock_limits[closest_clk_lvl].dispclk_mhz; - - clock_limits[i].dppclk_mhz = max_dppclk_mhz ? max_dppclk_mhz : - dcn3_15_soc.clock_limits[closest_clk_lvl].dppclk_mhz; - - clock_limits[i].dram_bw_per_chan_gbps = dcn3_15_soc.clock_limits[closest_clk_lvl].dram_bw_per_chan_gbps; - clock_limits[i].dscclk_mhz = dcn3_15_soc.clock_limits[closest_clk_lvl].dscclk_mhz; - clock_limits[i].dtbclk_mhz = dcn3_15_soc.clock_limits[closest_clk_lvl].dtbclk_mhz; - clock_limits[i].phyclk_d18_mhz = dcn3_15_soc.clock_limits[closest_clk_lvl].phyclk_d18_mhz; - clock_limits[i].phyclk_mhz = dcn3_15_soc.clock_limits[closest_clk_lvl].phyclk_mhz; - } - for (i = 0; i < clk_table->num_entries; i++) - dcn3_15_soc.clock_limits[i] = clock_limits[i]; - if (clk_table->num_entries) { - dcn3_15_soc.num_states = clk_table->num_entries; - } - } - - if (max_dispclk_mhz) { - dcn3_15_soc.dispclk_dppclk_vco_speed_mhz = max_dispclk_mhz * 2; - dc->dml.soc.dispclk_dppclk_vco_speed_mhz = max_dispclk_mhz * 2; - } - - if (!IS_FPGA_MAXIMUS_DC(dc->ctx->dce_environment)) - dml_init_instance(&dc->dml, &dcn3_15_soc, &dcn3_15_ip, DML_PROJECT_DCN31); - else - dml_init_instance(&dc->dml, &dcn3_15_soc, &dcn3_15_ip, DML_PROJECT_DCN31_FPGA); -} - static struct resource_funcs dcn315_res_pool_funcs = { .destroy = dcn315_destroy_resource_pool, .link_enc_create = dcn31_link_encoder_create, @@ -1988,11 +1758,10 @@ static bool dcn315_resource_construct( pool->base.mpcc_count = pool->base.res_cap->num_timing_generator; dc->caps.max_downscale_ratio = 600; dc->caps.i2c_speed_in_khz = 100; - dc->caps.i2c_speed_in_khz_hdcp = 5; /*1.4 w/a applied by default*/ + dc->caps.i2c_speed_in_khz_hdcp = 100; dc->caps.max_cursor_size = 256; dc->caps.min_horizontal_blanking_period = 80; dc->caps.dmdata_alloc_size = 2048; - dc->caps.max_slave_planes = 1; dc->caps.max_slave_yuv_planes = 1; dc->caps.max_slave_rgb_planes = 1; diff --git a/drivers/gpu/drm/amd/display/dc/dcn315/dcn315_resource.h b/drivers/gpu/drm/amd/display/dc/dcn315/dcn315_resource.h index f3a36820a31f..39929fa67a51 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn315/dcn315_resource.h +++ b/drivers/gpu/drm/amd/display/dc/dcn315/dcn315_resource.h @@ -31,6 +31,9 @@ #define TO_DCN315_RES_POOL(pool)\ container_of(pool, struct dcn315_resource_pool, base) +extern struct _vcs_dpi_ip_params_st dcn3_15_ip; +extern struct _vcs_dpi_ip_params_st dcn3_15_soc; + struct dcn315_resource_pool { struct resource_pool base; }; diff --git a/drivers/gpu/drm/amd/display/dc/dcn316/Makefile b/drivers/gpu/drm/amd/display/dc/dcn316/Makefile index cd87b687c5e2..819d44a9439b 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn316/Makefile +++ b/drivers/gpu/drm/amd/display/dc/dcn316/Makefile @@ -25,32 +25,6 @@ DCN316 = dcn316_resource.o -ifdef CONFIG_X86 -CFLAGS_$(AMDDALPATH)/dc/dcn316/dcn316_resource.o := -msse -endif - -ifdef CONFIG_PPC64 -CFLAGS_$(AMDDALPATH)/dc/dcn316/dcn316_resource.o := -mhard-float -maltivec -endif - -ifdef CONFIG_CC_IS_GCC -ifeq ($(call cc-ifversion, -lt, 0701, y), y) -IS_OLD_GCC = 1 -endif -CFLAGS_$(AMDDALPATH)/dc/dcn316/dcn316_resource.o += -mhard-float -endif - -ifdef CONFIG_X86 -ifdef IS_OLD_GCC -# Stack alignment mismatch, proceed with caution. -# GCC < 7.1 cannot compile code using `double` and -mpreferred-stack-boundary=3 -# (8B stack alignment). -CFLAGS_$(AMDDALPATH)/dc/dcn316/dcn316_resource.o += -mpreferred-stack-boundary=4 -else -CFLAGS_$(AMDDALPATH)/dc/dcn316/dcn316_resource.o += -msse2 -endif -endif - AMD_DAL_DCN316 = $(addprefix $(AMDDALPATH)/dc/dcn316/,$(DCN316)) AMD_DISPLAY_FILES += $(AMD_DAL_DCN316) diff --git a/drivers/gpu/drm/amd/display/dc/dcn316/dcn316_resource.c b/drivers/gpu/drm/amd/display/dc/dcn316/dcn316_resource.c index 8decc3ccf8ca..d5c195749a81 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn316/dcn316_resource.c +++ b/drivers/gpu/drm/amd/display/dc/dcn316/dcn316_resource.c @@ -66,6 +66,7 @@ #include "virtual/virtual_stream_encoder.h" #include "dce110/dce110_resource.h" #include "dml/display_mode_vba.h" +#include "dml/dcn31/dcn31_fpu.h" #include "dcn31/dcn31_dccg.h" #include "dcn10/dcn10_resource.h" #include "dcn31/dcn31_panel_cntl.h" @@ -123,157 +124,10 @@ #include "link_enc_cfg.h" -#define DC_LOGGER_INIT(logger) - -#define DCN3_16_DEFAULT_DET_SIZE 192 #define DCN3_16_MAX_DET_SIZE 384 #define DCN3_16_MIN_COMPBUF_SIZE_KB 128 #define DCN3_16_CRB_SEGMENT_SIZE_KB 64 -struct _vcs_dpi_ip_params_st dcn3_16_ip = { - .gpuvm_enable = 1, - .gpuvm_max_page_table_levels = 1, - .hostvm_enable = 1, - .hostvm_max_page_table_levels = 2, - .rob_buffer_size_kbytes = 64, - .det_buffer_size_kbytes = DCN3_16_DEFAULT_DET_SIZE, - .config_return_buffer_size_in_kbytes = 1024, - .compressed_buffer_segment_size_in_kbytes = 64, - .meta_fifo_size_in_kentries = 32, - .zero_size_buffer_entries = 512, - .compbuf_reserved_space_64b = 256, - .compbuf_reserved_space_zs = 64, - .dpp_output_buffer_pixels = 2560, - .opp_output_buffer_lines = 1, - .pixel_chunk_size_kbytes = 8, - .meta_chunk_size_kbytes = 2, - .min_meta_chunk_size_bytes = 256, - .writeback_chunk_size_kbytes = 8, - .ptoi_supported = false, - .num_dsc = 3, - .maximum_dsc_bits_per_component = 10, - .dsc422_native_support = false, - .is_line_buffer_bpp_fixed = true, - .line_buffer_fixed_bpp = 48, - .line_buffer_size_bits = 789504, - .max_line_buffer_lines = 12, - .writeback_interface_buffer_size_kbytes = 90, - .max_num_dpp = 4, - .max_num_otg = 4, - .max_num_hdmi_frl_outputs = 1, - .max_num_wb = 1, - .max_dchub_pscl_bw_pix_per_clk = 4, - .max_pscl_lb_bw_pix_per_clk = 2, - .max_lb_vscl_bw_pix_per_clk = 4, - .max_vscl_hscl_bw_pix_per_clk = 4, - .max_hscl_ratio = 6, - .max_vscl_ratio = 6, - .max_hscl_taps = 8, - .max_vscl_taps = 8, - .dpte_buffer_size_in_pte_reqs_luma = 64, - .dpte_buffer_size_in_pte_reqs_chroma = 34, - .dispclk_ramp_margin_percent = 1, - .max_inter_dcn_tile_repeaters = 8, - .cursor_buffer_size = 16, - .cursor_chunk_size = 2, - .writeback_line_buffer_buffer_size = 0, - .writeback_min_hscl_ratio = 1, - .writeback_min_vscl_ratio = 1, - .writeback_max_hscl_ratio = 1, - .writeback_max_vscl_ratio = 1, - .writeback_max_hscl_taps = 1, - .writeback_max_vscl_taps = 1, - .dppclk_delay_subtotal = 46, - .dppclk_delay_scl = 50, - .dppclk_delay_scl_lb_only = 16, - .dppclk_delay_cnvc_formatter = 27, - .dppclk_delay_cnvc_cursor = 6, - .dispclk_delay_subtotal = 119, - .dynamic_metadata_vm_enabled = false, - .odm_combine_4to1_supported = false, - .dcc_supported = true, -}; - -struct _vcs_dpi_soc_bounding_box_st dcn3_16_soc = { - /*TODO: correct dispclk/dppclk voltage level determination*/ - .clock_limits = { - { - .state = 0, - .dispclk_mhz = 556.0, - .dppclk_mhz = 556.0, - .phyclk_mhz = 600.0, - .phyclk_d18_mhz = 445.0, - .dscclk_mhz = 186.0, - .dtbclk_mhz = 625.0, - }, - { - .state = 1, - .dispclk_mhz = 625.0, - .dppclk_mhz = 625.0, - .phyclk_mhz = 810.0, - .phyclk_d18_mhz = 667.0, - .dscclk_mhz = 209.0, - .dtbclk_mhz = 625.0, - }, - { - .state = 2, - .dispclk_mhz = 625.0, - .dppclk_mhz = 625.0, - .phyclk_mhz = 810.0, - .phyclk_d18_mhz = 667.0, - .dscclk_mhz = 209.0, - .dtbclk_mhz = 625.0, - }, - { - .state = 3, - .dispclk_mhz = 1112.0, - .dppclk_mhz = 1112.0, - .phyclk_mhz = 810.0, - .phyclk_d18_mhz = 667.0, - .dscclk_mhz = 371.0, - .dtbclk_mhz = 625.0, - }, - { - .state = 4, - .dispclk_mhz = 1250.0, - .dppclk_mhz = 1250.0, - .phyclk_mhz = 810.0, - .phyclk_d18_mhz = 667.0, - .dscclk_mhz = 417.0, - .dtbclk_mhz = 625.0, - }, - }, - .num_states = 5, - .sr_exit_time_us = 9.0, - .sr_enter_plus_exit_time_us = 11.0, - .sr_exit_z8_time_us = 442.0, - .sr_enter_plus_exit_z8_time_us = 560.0, - .writeback_latency_us = 12.0, - .dram_channel_width_bytes = 4, - .round_trip_ping_latency_dcfclk_cycles = 106, - .urgent_latency_pixel_data_only_us = 4.0, - .urgent_latency_pixel_mixed_with_vm_data_us = 4.0, - .urgent_latency_vm_data_only_us = 4.0, - .urgent_out_of_order_return_per_channel_pixel_only_bytes = 4096, - .urgent_out_of_order_return_per_channel_pixel_and_vm_bytes = 4096, - .urgent_out_of_order_return_per_channel_vm_only_bytes = 4096, - .pct_ideal_sdp_bw_after_urgent = 80.0, - .pct_ideal_dram_sdp_bw_after_urgent_pixel_only = 65.0, - .pct_ideal_dram_sdp_bw_after_urgent_pixel_and_vm = 60.0, - .pct_ideal_dram_sdp_bw_after_urgent_vm_only = 30.0, - .max_avg_sdp_bw_use_normal_percent = 60.0, - .max_avg_dram_bw_use_normal_percent = 60.0, - .fabric_datapath_to_dcn_data_return_bytes = 32, - .return_bus_width_bytes = 64, - .downspread_percent = 0.38, - .dcn_downspread_percent = 0.5, - .gpuvm_min_page_size_bytes = 4096, - .hostvm_min_page_size_bytes = 4096, - .do_urgent_latency_adjustment = false, - .urgent_latency_adjustment_fabric_clock_component_us = 0, - .urgent_latency_adjustment_fabric_clock_reference_mhz = 0, -}; - enum dcn31_clk_src_array_id { DCN31_CLK_SRC_PLL0, DCN31_CLK_SRC_PLL1, @@ -1859,89 +1713,6 @@ static struct dc_cap_funcs cap_funcs = { .get_dcc_compression_cap = dcn20_get_dcc_compression_cap }; -static void dcn316_update_bw_bounding_box(struct dc *dc, struct clk_bw_params *bw_params) -{ - struct clk_limit_table *clk_table = &bw_params->clk_table; - struct _vcs_dpi_voltage_scaling_st clock_limits[DC__VOLTAGE_STATES]; - unsigned int i, closest_clk_lvl; - int max_dispclk_mhz = 0, max_dppclk_mhz = 0; - int j; - - // Default clock levels are used for diags, which may lead to overclocking. - if (!IS_DIAG_DC(dc->ctx->dce_environment)) { - - dcn3_16_ip.max_num_otg = dc->res_pool->res_cap->num_timing_generator; - dcn3_16_ip.max_num_dpp = dc->res_pool->pipe_count; - dcn3_16_soc.num_chans = bw_params->num_channels; - - ASSERT(clk_table->num_entries); - - /* Prepass to find max clocks independent of voltage level. */ - for (i = 0; i < clk_table->num_entries; ++i) { - if (clk_table->entries[i].dispclk_mhz > max_dispclk_mhz) - max_dispclk_mhz = clk_table->entries[i].dispclk_mhz; - if (clk_table->entries[i].dppclk_mhz > max_dppclk_mhz) - max_dppclk_mhz = clk_table->entries[i].dppclk_mhz; - } - - for (i = 0; i < clk_table->num_entries; i++) { - /* loop backwards*/ - for (closest_clk_lvl = 0, j = dcn3_16_soc.num_states - 1; j >= 0; j--) { - if ((unsigned int) dcn3_16_soc.clock_limits[j].dcfclk_mhz <= clk_table->entries[i].dcfclk_mhz) { - closest_clk_lvl = j; - break; - } - } - // Ported from DCN315 - if (clk_table->num_entries == 1) { - /*smu gives one DPM level, let's take the highest one*/ - closest_clk_lvl = dcn3_16_soc.num_states - 1; - } - - clock_limits[i].state = i; - - /* Clocks dependent on voltage level. */ - clock_limits[i].dcfclk_mhz = clk_table->entries[i].dcfclk_mhz; - if (clk_table->num_entries == 1 && - clock_limits[i].dcfclk_mhz < dcn3_16_soc.clock_limits[closest_clk_lvl].dcfclk_mhz) { - /*SMU fix not released yet*/ - clock_limits[i].dcfclk_mhz = dcn3_16_soc.clock_limits[closest_clk_lvl].dcfclk_mhz; - } - clock_limits[i].fabricclk_mhz = clk_table->entries[i].fclk_mhz; - clock_limits[i].socclk_mhz = clk_table->entries[i].socclk_mhz; - clock_limits[i].dram_speed_mts = clk_table->entries[i].memclk_mhz * 2 * clk_table->entries[i].wck_ratio; - - /* Clocks independent of voltage level. */ - clock_limits[i].dispclk_mhz = max_dispclk_mhz ? max_dispclk_mhz : - dcn3_16_soc.clock_limits[closest_clk_lvl].dispclk_mhz; - - clock_limits[i].dppclk_mhz = max_dppclk_mhz ? max_dppclk_mhz : - dcn3_16_soc.clock_limits[closest_clk_lvl].dppclk_mhz; - - clock_limits[i].dram_bw_per_chan_gbps = dcn3_16_soc.clock_limits[closest_clk_lvl].dram_bw_per_chan_gbps; - clock_limits[i].dscclk_mhz = dcn3_16_soc.clock_limits[closest_clk_lvl].dscclk_mhz; - clock_limits[i].dtbclk_mhz = dcn3_16_soc.clock_limits[closest_clk_lvl].dtbclk_mhz; - clock_limits[i].phyclk_d18_mhz = dcn3_16_soc.clock_limits[closest_clk_lvl].phyclk_d18_mhz; - clock_limits[i].phyclk_mhz = dcn3_16_soc.clock_limits[closest_clk_lvl].phyclk_mhz; - } - for (i = 0; i < clk_table->num_entries; i++) - dcn3_16_soc.clock_limits[i] = clock_limits[i]; - if (clk_table->num_entries) { - dcn3_16_soc.num_states = clk_table->num_entries; - } - } - - if (max_dispclk_mhz) { - dcn3_16_soc.dispclk_dppclk_vco_speed_mhz = max_dispclk_mhz * 2; - dc->dml.soc.dispclk_dppclk_vco_speed_mhz = max_dispclk_mhz * 2; - } - - if (!IS_FPGA_MAXIMUS_DC(dc->ctx->dce_environment)) - dml_init_instance(&dc->dml, &dcn3_16_soc, &dcn3_16_ip, DML_PROJECT_DCN31); - else - dml_init_instance(&dc->dml, &dcn3_16_soc, &dcn3_16_ip, DML_PROJECT_DCN31_FPGA); -} - static struct resource_funcs dcn316_res_pool_funcs = { .destroy = dcn316_destroy_resource_pool, .link_enc_create = dcn31_link_encoder_create, @@ -1989,11 +1760,10 @@ static bool dcn316_resource_construct( pool->base.mpcc_count = pool->base.res_cap->num_timing_generator; dc->caps.max_downscale_ratio = 600; dc->caps.i2c_speed_in_khz = 100; - dc->caps.i2c_speed_in_khz_hdcp = 5; /*1.4 w/a applied by default*/ + dc->caps.i2c_speed_in_khz_hdcp = 100; dc->caps.max_cursor_size = 256; dc->caps.min_horizontal_blanking_period = 80; dc->caps.dmdata_alloc_size = 2048; - dc->caps.max_slave_planes = 1; dc->caps.max_slave_yuv_planes = 1; dc->caps.max_slave_rgb_planes = 1; diff --git a/drivers/gpu/drm/amd/display/dc/dcn316/dcn316_resource.h b/drivers/gpu/drm/amd/display/dc/dcn316/dcn316_resource.h index 9d0d60cb9482..0dc5a6c13ae7 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn316/dcn316_resource.h +++ b/drivers/gpu/drm/amd/display/dc/dcn316/dcn316_resource.h @@ -31,6 +31,9 @@ #define TO_DCN316_RES_POOL(pool)\ container_of(pool, struct dcn316_resource_pool, base) +extern struct _vcs_dpi_ip_params_st dcn3_16_ip; +extern struct _vcs_dpi_ip_params_st dcn3_16_soc; + struct dcn316_resource_pool { struct resource_pool base; }; diff --git a/drivers/gpu/drm/amd/display/dc/dml/Makefile b/drivers/gpu/drm/amd/display/dc/dml/Makefile index 28978ce62f87..ee911452c048 100644 --- a/drivers/gpu/drm/amd/display/dc/dml/Makefile +++ b/drivers/gpu/drm/amd/display/dc/dml/Makefile @@ -71,6 +71,7 @@ CFLAGS_$(AMDDALPATH)/dc/dml/dcn30/display_mode_vba_30.o := $(dml_ccflags) $(fram CFLAGS_$(AMDDALPATH)/dc/dml/dcn30/display_rq_dlg_calc_30.o := $(dml_ccflags) CFLAGS_$(AMDDALPATH)/dc/dml/dcn31/display_mode_vba_31.o := $(dml_ccflags) $(frame_warn_flag) CFLAGS_$(AMDDALPATH)/dc/dml/dcn31/display_rq_dlg_calc_31.o := $(dml_ccflags) +CFLAGS_$(AMDDALPATH)/dc/dml/dcn31/dcn31_fpu.o := $(dml_ccflags) CFLAGS_$(AMDDALPATH)/dc/dml/dcn301/dcn301_fpu.o := $(dml_ccflags) CFLAGS_$(AMDDALPATH)/dc/dml/dcn302/dcn302_fpu.o := $(dml_ccflags) CFLAGS_$(AMDDALPATH)/dc/dml/dcn303/dcn303_fpu.o := $(dml_ccflags) @@ -114,6 +115,7 @@ DML += dcn20/display_rq_dlg_calc_20v2.o dcn20/display_mode_vba_20v2.o DML += dcn21/display_rq_dlg_calc_21.o dcn21/display_mode_vba_21.o DML += dcn30/display_mode_vba_30.o dcn30/display_rq_dlg_calc_30.o DML += dcn31/display_mode_vba_31.o dcn31/display_rq_dlg_calc_31.o +DML += dcn31/dcn31_fpu.o DML += dcn301/dcn301_fpu.o DML += dcn302/dcn302_fpu.o DML += dcn303/dcn303_fpu.o diff --git a/drivers/gpu/drm/amd/display/dc/dml/calcs/dcn_calcs.c b/drivers/gpu/drm/amd/display/dc/dml/calcs/dcn_calcs.c index e447c74be713..db3b16b77034 100644 --- a/drivers/gpu/drm/amd/display/dc/dml/calcs/dcn_calcs.c +++ b/drivers/gpu/drm/amd/display/dc/dml/calcs/dcn_calcs.c @@ -639,7 +639,6 @@ static bool dcn_bw_apply_registry_override(struct dc *dc) { bool updated = false; - DC_FP_START(); if ((int)(dc->dcn_soc->sr_exit_time * 1000) != dc->debug.sr_exit_time_ns && dc->debug.sr_exit_time_ns) { updated = true; @@ -675,7 +674,6 @@ static bool dcn_bw_apply_registry_override(struct dc *dc) dc->dcn_soc->dram_clock_change_latency = dc->debug.dram_clock_change_latency_ns / 1000.0; } - DC_FP_END(); return updated; } @@ -764,7 +762,7 @@ static unsigned int get_highest_allowed_voltage_level(uint32_t chip_family, return 4; } -bool dcn10_validate_bandwidth( +bool dcn_validate_bandwidth( struct dc *dc, struct dc_state *context, bool fast_validate) @@ -790,7 +788,6 @@ bool dcn10_validate_bandwidth( dcn_bw_sync_calcs_and_dml(dc); memset(v, 0, sizeof(*v)); - DC_FP_START(); v->sr_exit_time = dc->dcn_soc->sr_exit_time; v->sr_enter_plus_exit_time = dc->dcn_soc->sr_enter_plus_exit_time; @@ -1323,8 +1320,6 @@ bool dcn10_validate_bandwidth( bw_limit = dc->dcn_soc->percent_disp_bw_limit * v->fabric_and_dram_bandwidth_vmax0p9; bw_limit_pass = (v->total_data_read_bandwidth / 1000.0) < bw_limit; - DC_FP_END(); - PERFORMANCE_TRACE_END(); BW_VAL_TRACE_FINISH(); @@ -1495,8 +1490,6 @@ void dcn_bw_update_from_pplib(struct dc *dc) res = dm_pp_get_clock_levels_by_type_with_voltage( ctx, DM_PP_CLOCK_TYPE_FCLK, &fclks); - DC_FP_START(); - if (res) res = verify_clock_values(&fclks); @@ -1526,13 +1519,9 @@ void dcn_bw_update_from_pplib(struct dc *dc) } else BREAK_TO_DEBUGGER(); - DC_FP_END(); - res = dm_pp_get_clock_levels_by_type_with_voltage( ctx, DM_PP_CLOCK_TYPE_DCFCLK, &dcfclks); - DC_FP_START(); - if (res) res = verify_clock_values(&dcfclks); @@ -1543,8 +1532,6 @@ void dcn_bw_update_from_pplib(struct dc *dc) dc->dcn_soc->dcfclkv_max0p9 = dcfclks.data[dcfclks.num_levels - 1].clocks_in_khz / 1000.0; } else BREAK_TO_DEBUGGER(); - - DC_FP_END(); } void dcn_bw_notify_pplib_of_wm_ranges(struct dc *dc) @@ -1559,11 +1546,9 @@ void dcn_bw_notify_pplib_of_wm_ranges(struct dc *dc) if (!pp || !pp->set_wm_ranges) return; - DC_FP_START(); min_fclk_khz = dc->dcn_soc->fabric_and_dram_bandwidth_vmin0p65 * 1000000 / 32; min_dcfclk_khz = dc->dcn_soc->dcfclkv_min0p65 * 1000; socclk_khz = dc->dcn_soc->socclk * 1000; - DC_FP_END(); /* Now notify PPLib/SMU about which Watermarks sets they should select * depending on DPM state they are in. And update BW MGR GFX Engine and @@ -1614,7 +1599,6 @@ void dcn_bw_notify_pplib_of_wm_ranges(struct dc *dc) void dcn_bw_sync_calcs_and_dml(struct dc *dc) { - DC_FP_START(); DC_LOG_BANDWIDTH_CALCS("sr_exit_time: %f ns\n" "sr_enter_plus_exit_time: %f ns\n" "urgent_latency: %f ns\n" @@ -1803,5 +1787,4 @@ void dcn_bw_sync_calcs_and_dml(struct dc *dc) dc->dml.ip.bug_forcing_LC_req_same_size_fixed = dc->dcn_ip->bug_forcing_luma_and_chroma_request_to_same_size_fixed == dcn_bw_yes; dc->dml.ip.dcfclk_cstate_latency = dc->dcn_ip->dcfclk_cstate_latency; - DC_FP_END(); } diff --git a/drivers/gpu/drm/amd/display/dc/dml/dcn20/dcn20_fpu.c b/drivers/gpu/drm/amd/display/dc/dml/dcn20/dcn20_fpu.c index f93af45aeab4..f79dd40f8d81 100644 --- a/drivers/gpu/drm/amd/display/dc/dml/dcn20/dcn20_fpu.c +++ b/drivers/gpu/drm/amd/display/dc/dml/dcn20/dcn20_fpu.c @@ -1302,9 +1302,7 @@ int dcn20_populate_dml_pipes_from_context( } /* populate writeback information */ - DC_FP_START(); dc->res_pool->funcs->populate_dml_writeback_from_context(dc, res_ctx, pipes); - DC_FP_END(); return pipe_cnt; } diff --git a/drivers/gpu/drm/amd/display/dc/dml/dcn31/dcn31_fpu.c b/drivers/gpu/drm/amd/display/dc/dml/dcn31/dcn31_fpu.c new file mode 100644 index 000000000000..a0a2e125c9c8 --- /dev/null +++ b/drivers/gpu/drm/amd/display/dc/dml/dcn31/dcn31_fpu.c @@ -0,0 +1,863 @@ +/* + * Copyright 2019-2021 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + * Authors: AMD + * + */ + +#include "resource.h" +#include "clk_mgr.h" + +#include "dml/dcn20/dcn20_fpu.h" +#include "dcn31_fpu.h" + +/** + * DOC: DCN31x FPU manipulation Overview + * + * The DCN architecture relies on FPU operations, which require special + * compilation flags and the use of kernel_fpu_begin/end functions; ideally, we + * want to avoid spreading FPU access across multiple files. With this idea in + * mind, this file aims to centralize all DCN3.1.x functions that require FPU + * access in a single place. Code in this file follows the following code + * pattern: + * + * 1. Functions that use FPU operations should be isolated in static functions. + * 2. The FPU functions should have the noinline attribute to ensure anything + * that deals with FP register is contained within this call. + * 3. All function that needs to be accessed outside this file requires a + * public interface that not uses any FPU reference. + * 4. Developers **must not** use DC_FP_START/END in this file, but they need + * to ensure that the caller invokes it before access any function available + * in this file. For this reason, public functions in this file must invoke + * dc_assert_fp_enabled(); + */ + +struct _vcs_dpi_ip_params_st dcn3_1_ip = { + .gpuvm_enable = 1, + .gpuvm_max_page_table_levels = 1, + .hostvm_enable = 1, + .hostvm_max_page_table_levels = 2, + .rob_buffer_size_kbytes = 64, + .det_buffer_size_kbytes = DCN3_1_DEFAULT_DET_SIZE, + .config_return_buffer_size_in_kbytes = 1792, + .compressed_buffer_segment_size_in_kbytes = 64, + .meta_fifo_size_in_kentries = 32, + .zero_size_buffer_entries = 512, + .compbuf_reserved_space_64b = 256, + .compbuf_reserved_space_zs = 64, + .dpp_output_buffer_pixels = 2560, + .opp_output_buffer_lines = 1, + .pixel_chunk_size_kbytes = 8, + .meta_chunk_size_kbytes = 2, + .min_meta_chunk_size_bytes = 256, + .writeback_chunk_size_kbytes = 8, + .ptoi_supported = false, + .num_dsc = 3, + .maximum_dsc_bits_per_component = 10, + .dsc422_native_support = false, + .is_line_buffer_bpp_fixed = true, + .line_buffer_fixed_bpp = 48, + .line_buffer_size_bits = 789504, + .max_line_buffer_lines = 12, + .writeback_interface_buffer_size_kbytes = 90, + .max_num_dpp = 4, + .max_num_otg = 4, + .max_num_hdmi_frl_outputs = 1, + .max_num_wb = 1, + .max_dchub_pscl_bw_pix_per_clk = 4, + .max_pscl_lb_bw_pix_per_clk = 2, + .max_lb_vscl_bw_pix_per_clk = 4, + .max_vscl_hscl_bw_pix_per_clk = 4, + .max_hscl_ratio = 6, + .max_vscl_ratio = 6, + .max_hscl_taps = 8, + .max_vscl_taps = 8, + .dpte_buffer_size_in_pte_reqs_luma = 64, + .dpte_buffer_size_in_pte_reqs_chroma = 34, + .dispclk_ramp_margin_percent = 1, + .max_inter_dcn_tile_repeaters = 8, + .cursor_buffer_size = 16, + .cursor_chunk_size = 2, + .writeback_line_buffer_buffer_size = 0, + .writeback_min_hscl_ratio = 1, + .writeback_min_vscl_ratio = 1, + .writeback_max_hscl_ratio = 1, + .writeback_max_vscl_ratio = 1, + .writeback_max_hscl_taps = 1, + .writeback_max_vscl_taps = 1, + .dppclk_delay_subtotal = 46, + .dppclk_delay_scl = 50, + .dppclk_delay_scl_lb_only = 16, + .dppclk_delay_cnvc_formatter = 27, + .dppclk_delay_cnvc_cursor = 6, + .dispclk_delay_subtotal = 119, + .dynamic_metadata_vm_enabled = false, + .odm_combine_4to1_supported = false, + .dcc_supported = true, +}; + +struct _vcs_dpi_soc_bounding_box_st dcn3_1_soc = { + /*TODO: correct dispclk/dppclk voltage level determination*/ + .clock_limits = { + { + .state = 0, + .dispclk_mhz = 1200.0, + .dppclk_mhz = 1200.0, + .phyclk_mhz = 600.0, + .phyclk_d18_mhz = 667.0, + .dscclk_mhz = 186.0, + .dtbclk_mhz = 625.0, + }, + { + .state = 1, + .dispclk_mhz = 1200.0, + .dppclk_mhz = 1200.0, + .phyclk_mhz = 810.0, + .phyclk_d18_mhz = 667.0, + .dscclk_mhz = 209.0, + .dtbclk_mhz = 625.0, + }, + { + .state = 2, + .dispclk_mhz = 1200.0, + .dppclk_mhz = 1200.0, + .phyclk_mhz = 810.0, + .phyclk_d18_mhz = 667.0, + .dscclk_mhz = 209.0, + .dtbclk_mhz = 625.0, + }, + { + .state = 3, + .dispclk_mhz = 1200.0, + .dppclk_mhz = 1200.0, + .phyclk_mhz = 810.0, + .phyclk_d18_mhz = 667.0, + .dscclk_mhz = 371.0, + .dtbclk_mhz = 625.0, + }, + { + .state = 4, + .dispclk_mhz = 1200.0, + .dppclk_mhz = 1200.0, + .phyclk_mhz = 810.0, + .phyclk_d18_mhz = 667.0, + .dscclk_mhz = 417.0, + .dtbclk_mhz = 625.0, + }, + }, + .num_states = 5, + .sr_exit_time_us = 9.0, + .sr_enter_plus_exit_time_us = 11.0, + .sr_exit_z8_time_us = 442.0, + .sr_enter_plus_exit_z8_time_us = 560.0, + .writeback_latency_us = 12.0, + .dram_channel_width_bytes = 4, + .round_trip_ping_latency_dcfclk_cycles = 106, + .urgent_latency_pixel_data_only_us = 4.0, + .urgent_latency_pixel_mixed_with_vm_data_us = 4.0, + .urgent_latency_vm_data_only_us = 4.0, + .urgent_out_of_order_return_per_channel_pixel_only_bytes = 4096, + .urgent_out_of_order_return_per_channel_pixel_and_vm_bytes = 4096, + .urgent_out_of_order_return_per_channel_vm_only_bytes = 4096, + .pct_ideal_sdp_bw_after_urgent = 80.0, + .pct_ideal_dram_sdp_bw_after_urgent_pixel_only = 65.0, + .pct_ideal_dram_sdp_bw_after_urgent_pixel_and_vm = 60.0, + .pct_ideal_dram_sdp_bw_after_urgent_vm_only = 30.0, + .max_avg_sdp_bw_use_normal_percent = 60.0, + .max_avg_dram_bw_use_normal_percent = 60.0, + .fabric_datapath_to_dcn_data_return_bytes = 32, + .return_bus_width_bytes = 64, + .downspread_percent = 0.38, + .dcn_downspread_percent = 0.5, + .gpuvm_min_page_size_bytes = 4096, + .hostvm_min_page_size_bytes = 4096, + .do_urgent_latency_adjustment = false, + .urgent_latency_adjustment_fabric_clock_component_us = 0, + .urgent_latency_adjustment_fabric_clock_reference_mhz = 0, +}; + +struct _vcs_dpi_ip_params_st dcn3_15_ip = { + .gpuvm_enable = 1, + .gpuvm_max_page_table_levels = 1, + .hostvm_enable = 1, + .hostvm_max_page_table_levels = 2, + .rob_buffer_size_kbytes = 64, + .det_buffer_size_kbytes = DCN3_15_DEFAULT_DET_SIZE, + .min_comp_buffer_size_kbytes = DCN3_15_MIN_COMPBUF_SIZE_KB, + .config_return_buffer_size_in_kbytes = 1024, + .compressed_buffer_segment_size_in_kbytes = 64, + .meta_fifo_size_in_kentries = 32, + .zero_size_buffer_entries = 512, + .compbuf_reserved_space_64b = 256, + .compbuf_reserved_space_zs = 64, + .dpp_output_buffer_pixels = 2560, + .opp_output_buffer_lines = 1, + .pixel_chunk_size_kbytes = 8, + .meta_chunk_size_kbytes = 2, + .min_meta_chunk_size_bytes = 256, + .writeback_chunk_size_kbytes = 8, + .ptoi_supported = false, + .num_dsc = 3, + .maximum_dsc_bits_per_component = 10, + .dsc422_native_support = false, + .is_line_buffer_bpp_fixed = true, + .line_buffer_fixed_bpp = 49, + .line_buffer_size_bits = 789504, + .max_line_buffer_lines = 12, + .writeback_interface_buffer_size_kbytes = 90, + .max_num_dpp = 4, + .max_num_otg = 4, + .max_num_hdmi_frl_outputs = 1, + .max_num_wb = 1, + .max_dchub_pscl_bw_pix_per_clk = 4, + .max_pscl_lb_bw_pix_per_clk = 2, + .max_lb_vscl_bw_pix_per_clk = 4, + .max_vscl_hscl_bw_pix_per_clk = 4, + .max_hscl_ratio = 6, + .max_vscl_ratio = 6, + .max_hscl_taps = 8, + .max_vscl_taps = 8, + .dpte_buffer_size_in_pte_reqs_luma = 64, + .dpte_buffer_size_in_pte_reqs_chroma = 34, + .dispclk_ramp_margin_percent = 1, + .max_inter_dcn_tile_repeaters = 9, + .cursor_buffer_size = 16, + .cursor_chunk_size = 2, + .writeback_line_buffer_buffer_size = 0, + .writeback_min_hscl_ratio = 1, + .writeback_min_vscl_ratio = 1, + .writeback_max_hscl_ratio = 1, + .writeback_max_vscl_ratio = 1, + .writeback_max_hscl_taps = 1, + .writeback_max_vscl_taps = 1, + .dppclk_delay_subtotal = 46, + .dppclk_delay_scl = 50, + .dppclk_delay_scl_lb_only = 16, + .dppclk_delay_cnvc_formatter = 27, + .dppclk_delay_cnvc_cursor = 6, + .dispclk_delay_subtotal = 119, + .dynamic_metadata_vm_enabled = false, + .odm_combine_4to1_supported = false, + .dcc_supported = true, +}; + +struct _vcs_dpi_soc_bounding_box_st dcn3_15_soc = { + /*TODO: correct dispclk/dppclk voltage level determination*/ + .clock_limits = { + { + .state = 0, + .dispclk_mhz = 1372.0, + .dppclk_mhz = 1372.0, + .phyclk_mhz = 810.0, + .phyclk_d18_mhz = 667.0, + .dscclk_mhz = 417.0, + .dtbclk_mhz = 600.0, + }, + { + .state = 1, + .dispclk_mhz = 1372.0, + .dppclk_mhz = 1372.0, + .phyclk_mhz = 810.0, + .phyclk_d18_mhz = 667.0, + .dscclk_mhz = 417.0, + .dtbclk_mhz = 600.0, + }, + { + .state = 2, + .dispclk_mhz = 1372.0, + .dppclk_mhz = 1372.0, + .phyclk_mhz = 810.0, + .phyclk_d18_mhz = 667.0, + .dscclk_mhz = 417.0, + .dtbclk_mhz = 600.0, + }, + { + .state = 3, + .dispclk_mhz = 1372.0, + .dppclk_mhz = 1372.0, + .phyclk_mhz = 810.0, + .phyclk_d18_mhz = 667.0, + .dscclk_mhz = 417.0, + .dtbclk_mhz = 600.0, + }, + { + .state = 4, + .dispclk_mhz = 1372.0, + .dppclk_mhz = 1372.0, + .phyclk_mhz = 810.0, + .phyclk_d18_mhz = 667.0, + .dscclk_mhz = 417.0, + .dtbclk_mhz = 600.0, + }, + }, + .num_states = 5, + .sr_exit_time_us = 9.0, + .sr_enter_plus_exit_time_us = 11.0, + .sr_exit_z8_time_us = 50.0, + .sr_enter_plus_exit_z8_time_us = 50.0, + .writeback_latency_us = 12.0, + .dram_channel_width_bytes = 4, + .round_trip_ping_latency_dcfclk_cycles = 106, + .urgent_latency_pixel_data_only_us = 4.0, + .urgent_latency_pixel_mixed_with_vm_data_us = 4.0, + .urgent_latency_vm_data_only_us = 4.0, + .urgent_out_of_order_return_per_channel_pixel_only_bytes = 4096, + .urgent_out_of_order_return_per_channel_pixel_and_vm_bytes = 4096, + .urgent_out_of_order_return_per_channel_vm_only_bytes = 4096, + .pct_ideal_sdp_bw_after_urgent = 80.0, + .pct_ideal_dram_sdp_bw_after_urgent_pixel_only = 65.0, + .pct_ideal_dram_sdp_bw_after_urgent_pixel_and_vm = 60.0, + .pct_ideal_dram_sdp_bw_after_urgent_vm_only = 30.0, + .max_avg_sdp_bw_use_normal_percent = 60.0, + .max_avg_dram_bw_use_normal_percent = 60.0, + .fabric_datapath_to_dcn_data_return_bytes = 32, + .return_bus_width_bytes = 64, + .downspread_percent = 0.38, + .dcn_downspread_percent = 0.38, + .gpuvm_min_page_size_bytes = 4096, + .hostvm_min_page_size_bytes = 4096, + .do_urgent_latency_adjustment = false, + .urgent_latency_adjustment_fabric_clock_component_us = 0, + .urgent_latency_adjustment_fabric_clock_reference_mhz = 0, +}; + +struct _vcs_dpi_ip_params_st dcn3_16_ip = { + .gpuvm_enable = 1, + .gpuvm_max_page_table_levels = 1, + .hostvm_enable = 1, + .hostvm_max_page_table_levels = 2, + .rob_buffer_size_kbytes = 64, + .det_buffer_size_kbytes = DCN3_16_DEFAULT_DET_SIZE, + .config_return_buffer_size_in_kbytes = 1024, + .compressed_buffer_segment_size_in_kbytes = 64, + .meta_fifo_size_in_kentries = 32, + .zero_size_buffer_entries = 512, + .compbuf_reserved_space_64b = 256, + .compbuf_reserved_space_zs = 64, + .dpp_output_buffer_pixels = 2560, + .opp_output_buffer_lines = 1, + .pixel_chunk_size_kbytes = 8, + .meta_chunk_size_kbytes = 2, + .min_meta_chunk_size_bytes = 256, + .writeback_chunk_size_kbytes = 8, + .ptoi_supported = false, + .num_dsc = 3, + .maximum_dsc_bits_per_component = 10, + .dsc422_native_support = false, + .is_line_buffer_bpp_fixed = true, + .line_buffer_fixed_bpp = 48, + .line_buffer_size_bits = 789504, + .max_line_buffer_lines = 12, + .writeback_interface_buffer_size_kbytes = 90, + .max_num_dpp = 4, + .max_num_otg = 4, + .max_num_hdmi_frl_outputs = 1, + .max_num_wb = 1, + .max_dchub_pscl_bw_pix_per_clk = 4, + .max_pscl_lb_bw_pix_per_clk = 2, + .max_lb_vscl_bw_pix_per_clk = 4, + .max_vscl_hscl_bw_pix_per_clk = 4, + .max_hscl_ratio = 6, + .max_vscl_ratio = 6, + .max_hscl_taps = 8, + .max_vscl_taps = 8, + .dpte_buffer_size_in_pte_reqs_luma = 64, + .dpte_buffer_size_in_pte_reqs_chroma = 34, + .dispclk_ramp_margin_percent = 1, + .max_inter_dcn_tile_repeaters = 8, + .cursor_buffer_size = 16, + .cursor_chunk_size = 2, + .writeback_line_buffer_buffer_size = 0, + .writeback_min_hscl_ratio = 1, + .writeback_min_vscl_ratio = 1, + .writeback_max_hscl_ratio = 1, + .writeback_max_vscl_ratio = 1, + .writeback_max_hscl_taps = 1, + .writeback_max_vscl_taps = 1, + .dppclk_delay_subtotal = 46, + .dppclk_delay_scl = 50, + .dppclk_delay_scl_lb_only = 16, + .dppclk_delay_cnvc_formatter = 27, + .dppclk_delay_cnvc_cursor = 6, + .dispclk_delay_subtotal = 119, + .dynamic_metadata_vm_enabled = false, + .odm_combine_4to1_supported = false, + .dcc_supported = true, +}; + +struct _vcs_dpi_soc_bounding_box_st dcn3_16_soc = { + /*TODO: correct dispclk/dppclk voltage level determination*/ + .clock_limits = { + { + .state = 0, + .dispclk_mhz = 556.0, + .dppclk_mhz = 556.0, + .phyclk_mhz = 600.0, + .phyclk_d18_mhz = 445.0, + .dscclk_mhz = 186.0, + .dtbclk_mhz = 625.0, + }, + { + .state = 1, + .dispclk_mhz = 625.0, + .dppclk_mhz = 625.0, + .phyclk_mhz = 810.0, + .phyclk_d18_mhz = 667.0, + .dscclk_mhz = 209.0, + .dtbclk_mhz = 625.0, + }, + { + .state = 2, + .dispclk_mhz = 625.0, + .dppclk_mhz = 625.0, + .phyclk_mhz = 810.0, + .phyclk_d18_mhz = 667.0, + .dscclk_mhz = 209.0, + .dtbclk_mhz = 625.0, + }, + { + .state = 3, + .dispclk_mhz = 1112.0, + .dppclk_mhz = 1112.0, + .phyclk_mhz = 810.0, + .phyclk_d18_mhz = 667.0, + .dscclk_mhz = 371.0, + .dtbclk_mhz = 625.0, + }, + { + .state = 4, + .dispclk_mhz = 1250.0, + .dppclk_mhz = 1250.0, + .phyclk_mhz = 810.0, + .phyclk_d18_mhz = 667.0, + .dscclk_mhz = 417.0, + .dtbclk_mhz = 625.0, + }, + }, + .num_states = 5, + .sr_exit_time_us = 9.0, + .sr_enter_plus_exit_time_us = 11.0, + .sr_exit_z8_time_us = 442.0, + .sr_enter_plus_exit_z8_time_us = 560.0, + .writeback_latency_us = 12.0, + .dram_channel_width_bytes = 4, + .round_trip_ping_latency_dcfclk_cycles = 106, + .urgent_latency_pixel_data_only_us = 4.0, + .urgent_latency_pixel_mixed_with_vm_data_us = 4.0, + .urgent_latency_vm_data_only_us = 4.0, + .urgent_out_of_order_return_per_channel_pixel_only_bytes = 4096, + .urgent_out_of_order_return_per_channel_pixel_and_vm_bytes = 4096, + .urgent_out_of_order_return_per_channel_vm_only_bytes = 4096, + .pct_ideal_sdp_bw_after_urgent = 80.0, + .pct_ideal_dram_sdp_bw_after_urgent_pixel_only = 65.0, + .pct_ideal_dram_sdp_bw_after_urgent_pixel_and_vm = 60.0, + .pct_ideal_dram_sdp_bw_after_urgent_vm_only = 30.0, + .max_avg_sdp_bw_use_normal_percent = 60.0, + .max_avg_dram_bw_use_normal_percent = 60.0, + .fabric_datapath_to_dcn_data_return_bytes = 32, + .return_bus_width_bytes = 64, + .downspread_percent = 0.38, + .dcn_downspread_percent = 0.5, + .gpuvm_min_page_size_bytes = 4096, + .hostvm_min_page_size_bytes = 4096, + .do_urgent_latency_adjustment = false, + .urgent_latency_adjustment_fabric_clock_component_us = 0, + .urgent_latency_adjustment_fabric_clock_reference_mhz = 0, +}; + +void dcn31_calculate_wm_and_dlg_fp( + struct dc *dc, struct dc_state *context, + display_e2e_pipe_params_st *pipes, + int pipe_cnt, + int vlevel) +{ + int i, pipe_idx; + double dcfclk = context->bw_ctx.dml.vba.DCFCLKState[vlevel][context->bw_ctx.dml.vba.maxMpcComb]; + + dc_assert_fp_enabled(); + + if (context->bw_ctx.dml.soc.min_dcfclk > dcfclk) + dcfclk = context->bw_ctx.dml.soc.min_dcfclk; + + /* We don't recalculate clocks for 0 pipe configs, which can block + * S0i3 as high clocks will block low power states + * Override any clocks that can block S0i3 to min here + */ + if (pipe_cnt == 0) { + context->bw_ctx.bw.dcn.clk.dcfclk_khz = dcfclk; // always should be vlevel 0 + return; + } + + pipes[0].clks_cfg.voltage = vlevel; + pipes[0].clks_cfg.dcfclk_mhz = dcfclk; + pipes[0].clks_cfg.socclk_mhz = context->bw_ctx.dml.soc.clock_limits[vlevel].socclk_mhz; + +#if 0 // TODO + /* Set B: + * TODO + */ + if (dc->clk_mgr->bw_params->wm_table.nv_entries[WM_B].valid) { + if (vlevel == 0) { + pipes[0].clks_cfg.voltage = 1; + pipes[0].clks_cfg.dcfclk_mhz = context->bw_ctx.dml.soc.clock_limits[0].dcfclk_mhz; + } + context->bw_ctx.dml.soc.dram_clock_change_latency_us = dc->clk_mgr->bw_params->wm_table.nv_entries[WM_B].dml_input.pstate_latency_us; + context->bw_ctx.dml.soc.sr_enter_plus_exit_time_us = dc->clk_mgr->bw_params->wm_table.nv_entries[WM_B].dml_input.sr_enter_plus_exit_time_us; + context->bw_ctx.dml.soc.sr_exit_time_us = dc->clk_mgr->bw_params->wm_table.nv_entries[WM_B].dml_input.sr_exit_time_us; + } + context->bw_ctx.bw.dcn.watermarks.b.urgent_ns = get_wm_urgent(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000; + context->bw_ctx.bw.dcn.watermarks.b.cstate_pstate.cstate_enter_plus_exit_ns = get_wm_stutter_enter_exit(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000; + context->bw_ctx.bw.dcn.watermarks.b.cstate_pstate.cstate_exit_ns = get_wm_stutter_exit(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000; + context->bw_ctx.bw.dcn.watermarks.b.cstate_pstate.cstate_enter_plus_exit_z8_ns = get_wm_z8_stutter_enter_exit(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000; + context->bw_ctx.bw.dcn.watermarks.b.cstate_pstate.cstate_exit_z8_ns = get_wm_z8_stutter_exit(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000; + context->bw_ctx.bw.dcn.watermarks.b.cstate_pstate.pstate_change_ns = get_wm_dram_clock_change(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000; + context->bw_ctx.bw.dcn.watermarks.b.pte_meta_urgent_ns = get_wm_memory_trip(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000; + context->bw_ctx.bw.dcn.watermarks.b.frac_urg_bw_nom = get_fraction_of_urgent_bandwidth(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000; + context->bw_ctx.bw.dcn.watermarks.b.frac_urg_bw_flip = get_fraction_of_urgent_bandwidth_imm_flip(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000; + context->bw_ctx.bw.dcn.watermarks.b.urgent_latency_ns = get_urgent_latency(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000; + + pipes[0].clks_cfg.voltage = vlevel; + pipes[0].clks_cfg.dcfclk_mhz = dcfclk; + + /* Set C: + * TODO + */ + if (dc->clk_mgr->bw_params->wm_table.nv_entries[WM_C].valid) { + context->bw_ctx.dml.soc.dram_clock_change_latency_us = dc->clk_mgr->bw_params->wm_table.nv_entries[WM_C].dml_input.pstate_latency_us; + context->bw_ctx.dml.soc.sr_enter_plus_exit_time_us = dc->clk_mgr->bw_params->wm_table.nv_entries[WM_C].dml_input.sr_enter_plus_exit_time_us; + context->bw_ctx.dml.soc.sr_exit_time_us = dc->clk_mgr->bw_params->wm_table.nv_entries[WM_C].dml_input.sr_exit_time_us; + } + context->bw_ctx.bw.dcn.watermarks.c.urgent_ns = get_wm_urgent(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000; + context->bw_ctx.bw.dcn.watermarks.c.cstate_pstate.cstate_enter_plus_exit_ns = get_wm_stutter_enter_exit(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000; + context->bw_ctx.bw.dcn.watermarks.c.cstate_pstate.cstate_exit_ns = get_wm_stutter_exit(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000; + context->bw_ctx.bw.dcn.watermarks.c.cstate_pstate.cstate_enter_plus_exit_z8_ns = get_wm_z8_stutter_enter_exit(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000; + context->bw_ctx.bw.dcn.watermarks.c.cstate_pstate.cstate_exit_z8_ns = get_wm_z8_stutter_exit(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000; + context->bw_ctx.bw.dcn.watermarks.c.cstate_pstate.pstate_change_ns = get_wm_dram_clock_change(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000; + context->bw_ctx.bw.dcn.watermarks.c.pte_meta_urgent_ns = get_wm_memory_trip(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000; + context->bw_ctx.bw.dcn.watermarks.c.frac_urg_bw_nom = get_fraction_of_urgent_bandwidth(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000; + context->bw_ctx.bw.dcn.watermarks.c.frac_urg_bw_flip = get_fraction_of_urgent_bandwidth_imm_flip(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000; + context->bw_ctx.bw.dcn.watermarks.c.urgent_latency_ns = get_urgent_latency(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000; + + /* Set D: + * TODO + */ + if (dc->clk_mgr->bw_params->wm_table.nv_entries[WM_D].valid) { + context->bw_ctx.dml.soc.dram_clock_change_latency_us = dc->clk_mgr->bw_params->wm_table.nv_entries[WM_D].dml_input.pstate_latency_us; + context->bw_ctx.dml.soc.sr_enter_plus_exit_time_us = dc->clk_mgr->bw_params->wm_table.nv_entries[WM_D].dml_input.sr_enter_plus_exit_time_us; + context->bw_ctx.dml.soc.sr_exit_time_us = dc->clk_mgr->bw_params->wm_table.nv_entries[WM_D].dml_input.sr_exit_time_us; + } + context->bw_ctx.bw.dcn.watermarks.d.urgent_ns = get_wm_urgent(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000; + context->bw_ctx.bw.dcn.watermarks.d.cstate_pstate.cstate_enter_plus_exit_ns = get_wm_stutter_enter_exit(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000; + context->bw_ctx.bw.dcn.watermarks.d.cstate_pstate.cstate_exit_ns = get_wm_stutter_exit(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000; + context->bw_ctx.bw.dcn.watermarks.d.cstate_pstate.pstate_change_ns = get_wm_dram_clock_change(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000; + context->bw_ctx.bw.dcn.watermarks.d.cstate_pstate.cstate_enter_plus_exit_z8_ns = get_wm_z8_stutter_enter_exit(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000; + context->bw_ctx.bw.dcn.watermarks.d.cstate_pstate.cstate_exit_z8_ns = get_wm_z8_stutter_exit(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000; + context->bw_ctx.bw.dcn.watermarks.d.pte_meta_urgent_ns = get_wm_memory_trip(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000; + context->bw_ctx.bw.dcn.watermarks.d.frac_urg_bw_nom = get_fraction_of_urgent_bandwidth(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000; + context->bw_ctx.bw.dcn.watermarks.d.frac_urg_bw_flip = get_fraction_of_urgent_bandwidth_imm_flip(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000; + context->bw_ctx.bw.dcn.watermarks.d.urgent_latency_ns = get_urgent_latency(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000; +#endif + + /* Set A: + * All clocks min required + * + * Set A calculated last so that following calculations are based on Set A + */ + dc->res_pool->funcs->update_soc_for_wm_a(dc, context); + context->bw_ctx.bw.dcn.watermarks.a.urgent_ns = get_wm_urgent(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000; + context->bw_ctx.bw.dcn.watermarks.a.cstate_pstate.cstate_enter_plus_exit_ns = get_wm_stutter_enter_exit(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000; + context->bw_ctx.bw.dcn.watermarks.a.cstate_pstate.cstate_exit_ns = get_wm_stutter_exit(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000; + context->bw_ctx.bw.dcn.watermarks.a.cstate_pstate.pstate_change_ns = get_wm_dram_clock_change(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000; + context->bw_ctx.bw.dcn.watermarks.a.cstate_pstate.cstate_enter_plus_exit_z8_ns = get_wm_z8_stutter_enter_exit(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000; + context->bw_ctx.bw.dcn.watermarks.a.cstate_pstate.cstate_exit_z8_ns = get_wm_z8_stutter_exit(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000; + context->bw_ctx.bw.dcn.watermarks.a.pte_meta_urgent_ns = get_wm_memory_trip(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000; + context->bw_ctx.bw.dcn.watermarks.a.frac_urg_bw_nom = get_fraction_of_urgent_bandwidth(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000; + context->bw_ctx.bw.dcn.watermarks.a.frac_urg_bw_flip = get_fraction_of_urgent_bandwidth_imm_flip(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000; + context->bw_ctx.bw.dcn.watermarks.a.urgent_latency_ns = get_urgent_latency(&context->bw_ctx.dml, pipes, pipe_cnt) * 1000; + /* TODO: remove: */ + context->bw_ctx.bw.dcn.watermarks.b = context->bw_ctx.bw.dcn.watermarks.a; + context->bw_ctx.bw.dcn.watermarks.c = context->bw_ctx.bw.dcn.watermarks.a; + context->bw_ctx.bw.dcn.watermarks.d = context->bw_ctx.bw.dcn.watermarks.a; + /* end remove*/ + + for (i = 0, pipe_idx = 0; i < dc->res_pool->pipe_count; i++) { + if (!context->res_ctx.pipe_ctx[i].stream) + continue; + + pipes[pipe_idx].clks_cfg.dispclk_mhz = get_dispclk_calculated(&context->bw_ctx.dml, pipes, pipe_cnt); + pipes[pipe_idx].clks_cfg.dppclk_mhz = get_dppclk_calculated(&context->bw_ctx.dml, pipes, pipe_cnt, pipe_idx); + + if (dc->config.forced_clocks || dc->debug.max_disp_clk) { + pipes[pipe_idx].clks_cfg.dispclk_mhz = context->bw_ctx.dml.soc.clock_limits[0].dispclk_mhz; + pipes[pipe_idx].clks_cfg.dppclk_mhz = context->bw_ctx.dml.soc.clock_limits[0].dppclk_mhz; + } + if (dc->debug.min_disp_clk_khz > pipes[pipe_idx].clks_cfg.dispclk_mhz * 1000) + pipes[pipe_idx].clks_cfg.dispclk_mhz = dc->debug.min_disp_clk_khz / 1000.0; + if (dc->debug.min_dpp_clk_khz > pipes[pipe_idx].clks_cfg.dppclk_mhz * 1000) + pipes[pipe_idx].clks_cfg.dppclk_mhz = dc->debug.min_dpp_clk_khz / 1000.0; + + pipe_idx++; + } + + dcn20_calculate_dlg_params(dc, context, pipes, pipe_cnt, vlevel); +} + +void dcn31_update_bw_bounding_box(struct dc *dc, struct clk_bw_params *bw_params) +{ + struct clk_limit_table *clk_table = &bw_params->clk_table; + struct _vcs_dpi_voltage_scaling_st clock_limits[DC__VOLTAGE_STATES]; + unsigned int i, closest_clk_lvl; + int j; + + dc_assert_fp_enabled(); + + // Default clock levels are used for diags, which may lead to overclocking. + if (!IS_DIAG_DC(dc->ctx->dce_environment)) { + int max_dispclk_mhz = 0, max_dppclk_mhz = 0; + + dcn3_1_ip.max_num_otg = dc->res_pool->res_cap->num_timing_generator; + dcn3_1_ip.max_num_dpp = dc->res_pool->pipe_count; + dcn3_1_soc.num_chans = bw_params->num_channels; + + ASSERT(clk_table->num_entries); + + /* Prepass to find max clocks independent of voltage level. */ + for (i = 0; i < clk_table->num_entries; ++i) { + if (clk_table->entries[i].dispclk_mhz > max_dispclk_mhz) + max_dispclk_mhz = clk_table->entries[i].dispclk_mhz; + if (clk_table->entries[i].dppclk_mhz > max_dppclk_mhz) + max_dppclk_mhz = clk_table->entries[i].dppclk_mhz; + } + + for (i = 0; i < clk_table->num_entries; i++) { + /* loop backwards*/ + for (closest_clk_lvl = 0, j = dcn3_1_soc.num_states - 1; j >= 0; j--) { + if ((unsigned int) dcn3_1_soc.clock_limits[j].dcfclk_mhz <= clk_table->entries[i].dcfclk_mhz) { + closest_clk_lvl = j; + break; + } + } + + clock_limits[i].state = i; + + /* Clocks dependent on voltage level. */ + clock_limits[i].dcfclk_mhz = clk_table->entries[i].dcfclk_mhz; + clock_limits[i].fabricclk_mhz = clk_table->entries[i].fclk_mhz; + clock_limits[i].socclk_mhz = clk_table->entries[i].socclk_mhz; + clock_limits[i].dram_speed_mts = clk_table->entries[i].memclk_mhz * 2 * clk_table->entries[i].wck_ratio; + + /* Clocks independent of voltage level. */ + clock_limits[i].dispclk_mhz = max_dispclk_mhz ? max_dispclk_mhz : + dcn3_1_soc.clock_limits[closest_clk_lvl].dispclk_mhz; + + clock_limits[i].dppclk_mhz = max_dppclk_mhz ? max_dppclk_mhz : + dcn3_1_soc.clock_limits[closest_clk_lvl].dppclk_mhz; + + clock_limits[i].dram_bw_per_chan_gbps = dcn3_1_soc.clock_limits[closest_clk_lvl].dram_bw_per_chan_gbps; + clock_limits[i].dscclk_mhz = dcn3_1_soc.clock_limits[closest_clk_lvl].dscclk_mhz; + clock_limits[i].dtbclk_mhz = dcn3_1_soc.clock_limits[closest_clk_lvl].dtbclk_mhz; + clock_limits[i].phyclk_d18_mhz = dcn3_1_soc.clock_limits[closest_clk_lvl].phyclk_d18_mhz; + clock_limits[i].phyclk_mhz = dcn3_1_soc.clock_limits[closest_clk_lvl].phyclk_mhz; + } + for (i = 0; i < clk_table->num_entries; i++) + dcn3_1_soc.clock_limits[i] = clock_limits[i]; + if (clk_table->num_entries) { + dcn3_1_soc.num_states = clk_table->num_entries; + } + } + + dcn3_1_soc.dispclk_dppclk_vco_speed_mhz = dc->clk_mgr->dentist_vco_freq_khz / 1000.0; + dc->dml.soc.dispclk_dppclk_vco_speed_mhz = dc->clk_mgr->dentist_vco_freq_khz / 1000.0; + + if (!IS_FPGA_MAXIMUS_DC(dc->ctx->dce_environment)) + dml_init_instance(&dc->dml, &dcn3_1_soc, &dcn3_1_ip, DML_PROJECT_DCN31); + else + dml_init_instance(&dc->dml, &dcn3_1_soc, &dcn3_1_ip, DML_PROJECT_DCN31_FPGA); +} + +void dcn315_update_bw_bounding_box(struct dc *dc, struct clk_bw_params *bw_params) +{ + struct clk_limit_table *clk_table = &bw_params->clk_table; + struct _vcs_dpi_voltage_scaling_st clock_limits[DC__VOLTAGE_STATES]; + unsigned int i, closest_clk_lvl; + int max_dispclk_mhz = 0, max_dppclk_mhz = 0; + int j; + + dc_assert_fp_enabled(); + + // Default clock levels are used for diags, which may lead to overclocking. + if (!IS_DIAG_DC(dc->ctx->dce_environment)) { + + dcn3_15_ip.max_num_otg = dc->res_pool->res_cap->num_timing_generator; + dcn3_15_ip.max_num_dpp = dc->res_pool->pipe_count; + dcn3_15_soc.num_chans = bw_params->num_channels; + + ASSERT(clk_table->num_entries); + + /* Prepass to find max clocks independent of voltage level. */ + for (i = 0; i < clk_table->num_entries; ++i) { + if (clk_table->entries[i].dispclk_mhz > max_dispclk_mhz) + max_dispclk_mhz = clk_table->entries[i].dispclk_mhz; + if (clk_table->entries[i].dppclk_mhz > max_dppclk_mhz) + max_dppclk_mhz = clk_table->entries[i].dppclk_mhz; + } + + for (i = 0; i < clk_table->num_entries; i++) { + /* loop backwards*/ + for (closest_clk_lvl = 0, j = dcn3_15_soc.num_states - 1; j >= 0; j--) { + if ((unsigned int) dcn3_15_soc.clock_limits[j].dcfclk_mhz <= clk_table->entries[i].dcfclk_mhz) { + closest_clk_lvl = j; + break; + } + } + if (clk_table->num_entries == 1) { + /*smu gives one DPM level, let's take the highest one*/ + closest_clk_lvl = dcn3_15_soc.num_states - 1; + } + + clock_limits[i].state = i; + + /* Clocks dependent on voltage level. */ + clock_limits[i].dcfclk_mhz = clk_table->entries[i].dcfclk_mhz; + if (clk_table->num_entries == 1 && + clock_limits[i].dcfclk_mhz < dcn3_15_soc.clock_limits[closest_clk_lvl].dcfclk_mhz) { + /*SMU fix not released yet*/ + clock_limits[i].dcfclk_mhz = dcn3_15_soc.clock_limits[closest_clk_lvl].dcfclk_mhz; + } + clock_limits[i].fabricclk_mhz = clk_table->entries[i].fclk_mhz; + clock_limits[i].socclk_mhz = clk_table->entries[i].socclk_mhz; + clock_limits[i].dram_speed_mts = clk_table->entries[i].memclk_mhz * 2 * clk_table->entries[i].wck_ratio; + + /* Clocks independent of voltage level. */ + clock_limits[i].dispclk_mhz = max_dispclk_mhz ? max_dispclk_mhz : + dcn3_15_soc.clock_limits[closest_clk_lvl].dispclk_mhz; + + clock_limits[i].dppclk_mhz = max_dppclk_mhz ? max_dppclk_mhz : + dcn3_15_soc.clock_limits[closest_clk_lvl].dppclk_mhz; + + clock_limits[i].dram_bw_per_chan_gbps = dcn3_15_soc.clock_limits[closest_clk_lvl].dram_bw_per_chan_gbps; + clock_limits[i].dscclk_mhz = dcn3_15_soc.clock_limits[closest_clk_lvl].dscclk_mhz; + clock_limits[i].dtbclk_mhz = dcn3_15_soc.clock_limits[closest_clk_lvl].dtbclk_mhz; + clock_limits[i].phyclk_d18_mhz = dcn3_15_soc.clock_limits[closest_clk_lvl].phyclk_d18_mhz; + clock_limits[i].phyclk_mhz = dcn3_15_soc.clock_limits[closest_clk_lvl].phyclk_mhz; + } + for (i = 0; i < clk_table->num_entries; i++) + dcn3_15_soc.clock_limits[i] = clock_limits[i]; + if (clk_table->num_entries) { + dcn3_15_soc.num_states = clk_table->num_entries; + } + } + + if (max_dispclk_mhz) { + dcn3_15_soc.dispclk_dppclk_vco_speed_mhz = max_dispclk_mhz * 2; + dc->dml.soc.dispclk_dppclk_vco_speed_mhz = max_dispclk_mhz * 2; + } + + if (!IS_FPGA_MAXIMUS_DC(dc->ctx->dce_environment)) + dml_init_instance(&dc->dml, &dcn3_15_soc, &dcn3_15_ip, DML_PROJECT_DCN31); + else + dml_init_instance(&dc->dml, &dcn3_15_soc, &dcn3_15_ip, DML_PROJECT_DCN31_FPGA); +} + +void dcn316_update_bw_bounding_box(struct dc *dc, struct clk_bw_params *bw_params) +{ + struct clk_limit_table *clk_table = &bw_params->clk_table; + struct _vcs_dpi_voltage_scaling_st clock_limits[DC__VOLTAGE_STATES]; + unsigned int i, closest_clk_lvl; + int max_dispclk_mhz = 0, max_dppclk_mhz = 0; + int j; + + dc_assert_fp_enabled(); + + // Default clock levels are used for diags, which may lead to overclocking. + if (!IS_DIAG_DC(dc->ctx->dce_environment)) { + + dcn3_16_ip.max_num_otg = dc->res_pool->res_cap->num_timing_generator; + dcn3_16_ip.max_num_dpp = dc->res_pool->pipe_count; + dcn3_16_soc.num_chans = bw_params->num_channels; + + ASSERT(clk_table->num_entries); + + /* Prepass to find max clocks independent of voltage level. */ + for (i = 0; i < clk_table->num_entries; ++i) { + if (clk_table->entries[i].dispclk_mhz > max_dispclk_mhz) + max_dispclk_mhz = clk_table->entries[i].dispclk_mhz; + if (clk_table->entries[i].dppclk_mhz > max_dppclk_mhz) + max_dppclk_mhz = clk_table->entries[i].dppclk_mhz; + } + + for (i = 0; i < clk_table->num_entries; i++) { + /* loop backwards*/ + for (closest_clk_lvl = 0, j = dcn3_16_soc.num_states - 1; j >= 0; j--) { + if ((unsigned int) dcn3_16_soc.clock_limits[j].dcfclk_mhz <= clk_table->entries[i].dcfclk_mhz) { + closest_clk_lvl = j; + break; + } + } + // Ported from DCN315 + if (clk_table->num_entries == 1) { + /*smu gives one DPM level, let's take the highest one*/ + closest_clk_lvl = dcn3_16_soc.num_states - 1; + } + + clock_limits[i].state = i; + + /* Clocks dependent on voltage level. */ + clock_limits[i].dcfclk_mhz = clk_table->entries[i].dcfclk_mhz; + if (clk_table->num_entries == 1 && + clock_limits[i].dcfclk_mhz < dcn3_16_soc.clock_limits[closest_clk_lvl].dcfclk_mhz) { + /*SMU fix not released yet*/ + clock_limits[i].dcfclk_mhz = dcn3_16_soc.clock_limits[closest_clk_lvl].dcfclk_mhz; + } + clock_limits[i].fabricclk_mhz = clk_table->entries[i].fclk_mhz; + clock_limits[i].socclk_mhz = clk_table->entries[i].socclk_mhz; + clock_limits[i].dram_speed_mts = clk_table->entries[i].memclk_mhz * 2 * clk_table->entries[i].wck_ratio; + + /* Clocks independent of voltage level. */ + clock_limits[i].dispclk_mhz = max_dispclk_mhz ? max_dispclk_mhz : + dcn3_16_soc.clock_limits[closest_clk_lvl].dispclk_mhz; + + clock_limits[i].dppclk_mhz = max_dppclk_mhz ? max_dppclk_mhz : + dcn3_16_soc.clock_limits[closest_clk_lvl].dppclk_mhz; + + clock_limits[i].dram_bw_per_chan_gbps = dcn3_16_soc.clock_limits[closest_clk_lvl].dram_bw_per_chan_gbps; + clock_limits[i].dscclk_mhz = dcn3_16_soc.clock_limits[closest_clk_lvl].dscclk_mhz; + clock_limits[i].dtbclk_mhz = dcn3_16_soc.clock_limits[closest_clk_lvl].dtbclk_mhz; + clock_limits[i].phyclk_d18_mhz = dcn3_16_soc.clock_limits[closest_clk_lvl].phyclk_d18_mhz; + clock_limits[i].phyclk_mhz = dcn3_16_soc.clock_limits[closest_clk_lvl].phyclk_mhz; + } + for (i = 0; i < clk_table->num_entries; i++) + dcn3_16_soc.clock_limits[i] = clock_limits[i]; + if (clk_table->num_entries) { + dcn3_16_soc.num_states = clk_table->num_entries; + } + } + + if (max_dispclk_mhz) { + dcn3_16_soc.dispclk_dppclk_vco_speed_mhz = max_dispclk_mhz * 2; + dc->dml.soc.dispclk_dppclk_vco_speed_mhz = max_dispclk_mhz * 2; + } + + if (!IS_FPGA_MAXIMUS_DC(dc->ctx->dce_environment)) + dml_init_instance(&dc->dml, &dcn3_16_soc, &dcn3_16_ip, DML_PROJECT_DCN31); + else + dml_init_instance(&dc->dml, &dcn3_16_soc, &dcn3_16_ip, DML_PROJECT_DCN31_FPGA); +} diff --git a/drivers/gpu/drm/amd/display/dc/dml/dcn31/dcn31_fpu.h b/drivers/gpu/drm/amd/display/dc/dml/dcn31/dcn31_fpu.h new file mode 100644 index 000000000000..24ac19c83687 --- /dev/null +++ b/drivers/gpu/drm/amd/display/dc/dml/dcn31/dcn31_fpu.h @@ -0,0 +1,44 @@ +/* + * Copyright 2019-2021 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + * Authors: AMD + * + */ + +#ifndef __DCN31_FPU_H__ +#define __DCN31_FPU_H__ + +#define DCN3_1_DEFAULT_DET_SIZE 384 +#define DCN3_15_DEFAULT_DET_SIZE 192 +#define DCN3_15_MIN_COMPBUF_SIZE_KB 128 +#define DCN3_16_DEFAULT_DET_SIZE 192 + +void dcn31_calculate_wm_and_dlg_fp( + struct dc *dc, struct dc_state *context, + display_e2e_pipe_params_st *pipes, + int pipe_cnt, + int vlevel); + +void dcn31_update_bw_bounding_box(struct dc *dc, struct clk_bw_params *bw_params); +void dcn315_update_bw_bounding_box(struct dc *dc, struct clk_bw_params *bw_params); +void dcn316_update_bw_bounding_box(struct dc *dc, struct clk_bw_params *bw_params); + +#endif /* __DCN31_FPU_H__*/ diff --git a/drivers/gpu/drm/amd/display/dc/inc/dcn_calcs.h b/drivers/gpu/drm/amd/display/dc/inc/dcn_calcs.h index 337c0161e72d..806f3041db14 100644 --- a/drivers/gpu/drm/amd/display/dc/inc/dcn_calcs.h +++ b/drivers/gpu/drm/amd/display/dc/inc/dcn_calcs.h @@ -619,7 +619,7 @@ struct dcn_ip_params { }; extern const struct dcn_ip_params dcn10_ip_defaults; -bool dcn10_validate_bandwidth( +bool dcn_validate_bandwidth( struct dc *dc, struct dc_state *context, bool fast_validate); diff --git a/drivers/gpu/drm/amd/display/dc/inc/hw/timing_generator.h b/drivers/gpu/drm/amd/display/dc/inc/hw/timing_generator.h index 59a704781e34..554d2e33bd7f 100644 --- a/drivers/gpu/drm/amd/display/dc/inc/hw/timing_generator.h +++ b/drivers/gpu/drm/amd/display/dc/inc/hw/timing_generator.h @@ -310,6 +310,8 @@ struct timing_generator_funcs { uint32_t slave_pixel_clock_100Hz, uint8_t master_clock_divider, uint8_t slave_clock_divider); + + void (*init_odm)(struct timing_generator *tg); }; #endif diff --git a/drivers/gpu/drm/amd/display/dc/inc/link_hwss.h b/drivers/gpu/drm/amd/display/dc/inc/link_hwss.h index 3b3090e3d327..e6c49ef8b584 100644 --- a/drivers/gpu/drm/amd/display/dc/inc/link_hwss.h +++ b/drivers/gpu/drm/amd/display/dc/inc/link_hwss.h @@ -37,9 +37,12 @@ struct dc_link; struct link_resource; struct pipe_ctx; struct encoder_set_dp_phy_pattern_param; +struct link_mst_stream_allocation_table; struct link_hwss_ext { - /* function pointers below require check for NULL at all time + /* function pointers below may require to check for NULL if caller + * considers missing implementation as expected in some cases or none + * critical to be investigated immediately * ********************************************************************* */ void (*set_hblank_min_symbol_width)(struct pipe_ctx *pipe_ctx, @@ -62,6 +65,9 @@ struct link_hwss_ext { const struct link_resource *link_res, const struct dc_link_settings *link_settings, const struct dc_lane_settings lane_settings[LANE_COUNT_DP_MAX]); + void (*update_stream_allocation_table)(struct dc_link *link, + const struct link_resource *link_res, + const struct link_mst_stream_allocation_table *table); }; struct link_hwss { @@ -72,6 +78,7 @@ struct link_hwss { */ void (*setup_stream_encoder)(struct pipe_ctx *pipe_ctx); void (*reset_stream_encoder)(struct pipe_ctx *pipe_ctx); + void (*setup_stream_attribute)(struct pipe_ctx *pipe_ctx); }; #endif /* __DC_LINK_HWSS_H__ */ diff --git a/drivers/gpu/drm/amd/display/dc/link/link_dp_trace.c b/drivers/gpu/drm/amd/display/dc/link/link_dp_trace.c index e7047391934b..2c1a3bfcdb50 100644 --- a/drivers/gpu/drm/amd/display/dc/link/link_dp_trace.c +++ b/drivers/gpu/drm/amd/display/dc/link/link_dp_trace.c @@ -144,3 +144,23 @@ unsigned int dc_dp_trace_get_link_loss_count(struct dc_link *link) { return link->dp_trace.link_loss_count; } + +void dp_trace_set_edp_power_timestamp(struct dc_link *link, + bool power_up) +{ + if (!power_up) + /*save driver power off time stamp*/ + link->dp_trace.edp_trace_power_timestamps.poweroff = dm_get_timestamp(link->dc->ctx); + else + link->dp_trace.edp_trace_power_timestamps.poweron = dm_get_timestamp(link->dc->ctx); +} + +uint64_t dp_trace_get_edp_poweron_timestamp(struct dc_link *link) +{ + return link->dp_trace.edp_trace_power_timestamps.poweron; +} + +uint64_t dp_trace_get_edp_poweroff_timestamp(struct dc_link *link) +{ + return link->dp_trace.edp_trace_power_timestamps.poweroff; +}
\ No newline at end of file diff --git a/drivers/gpu/drm/amd/display/dc/link/link_dp_trace.h b/drivers/gpu/drm/amd/display/dc/link/link_dp_trace.h index 702f97c6ead0..26700e3cd65e 100644 --- a/drivers/gpu/drm/amd/display/dc/link/link_dp_trace.h +++ b/drivers/gpu/drm/amd/display/dc/link/link_dp_trace.h @@ -54,4 +54,9 @@ struct dp_trace_lt_counts *dc_dp_trace_get_lt_counts(struct dc_link *link, bool in_detection); unsigned int dc_dp_trace_get_link_loss_count(struct dc_link *link); +void dp_trace_set_edp_power_timestamp(struct dc_link *link, + bool power_up); +uint64_t dp_trace_get_edp_poweron_timestamp(struct dc_link *link); +uint64_t dp_trace_get_edp_poweroff_timestamp(struct dc_link *link); + #endif /* __LINK_DP_TRACE_H__ */ diff --git a/drivers/gpu/drm/amd/display/dc/link/link_hwss_dio.c b/drivers/gpu/drm/amd/display/dc/link/link_hwss_dio.c index 0f845113a6aa..776e822abcbb 100644 --- a/drivers/gpu/drm/amd/display/dc/link/link_hwss_dio.c +++ b/drivers/gpu/drm/amd/display/dc/link/link_hwss_dio.c @@ -62,6 +62,46 @@ void reset_dio_stream_encoder(struct pipe_ctx *pipe_ctx) } +void setup_dio_stream_attribute(struct pipe_ctx *pipe_ctx) +{ + struct stream_encoder *stream_encoder = pipe_ctx->stream_res.stream_enc; + struct dc_stream_state *stream = pipe_ctx->stream; + struct dc_link *link = stream->link; + + if (!dc_is_virtual_signal(stream->signal)) + stream_encoder->funcs->setup_stereo_sync( + stream_encoder, + pipe_ctx->stream_res.tg->inst, + stream->timing.timing_3d_format != TIMING_3D_FORMAT_NONE); + + if (dc_is_dp_signal(stream->signal)) + stream_encoder->funcs->dp_set_stream_attribute( + stream_encoder, + &stream->timing, + stream->output_color_space, + stream->use_vsc_sdp_for_colorimetry, + link->dpcd_caps.dprx_feature.bits.SST_SPLIT_SDP_CAP); + else if (dc_is_hdmi_tmds_signal(stream->signal)) + stream_encoder->funcs->hdmi_set_stream_attribute( + stream_encoder, + &stream->timing, + stream->phy_pix_clk, + pipe_ctx->stream_res.audio != NULL); + else if (dc_is_dvi_signal(stream->signal)) + stream_encoder->funcs->dvi_set_stream_attribute( + stream_encoder, + &stream->timing, + (stream->signal == SIGNAL_TYPE_DVI_DUAL_LINK) ? + true : false); + else if (dc_is_lvds_signal(stream->signal)) + stream_encoder->funcs->lvds_set_stream_attribute( + stream_encoder, + &stream->timing); + + if (dc_is_dp_signal(stream->signal)) + dp_source_sequence_trace(link, DPCD_SOURCE_SEQ_AFTER_DP_STREAM_ATTR); +} + void enable_dio_dp_link_output(struct dc_link *link, const struct link_resource *link_res, enum signal_type signal, @@ -113,15 +153,27 @@ void set_dio_dp_lane_settings(struct dc_link *link, link_enc->funcs->dp_set_lane_settings(link_enc, link_settings, lane_settings); } +static void update_dio_stream_allocation_table(struct dc_link *link, + const struct link_resource *link_res, + const struct link_mst_stream_allocation_table *table) +{ + struct link_encoder *link_enc = link_enc_cfg_get_link_enc(link); + + ASSERT(link_enc); + link_enc->funcs->update_mst_stream_allocation_table(link_enc, table); +} + static const struct link_hwss dio_link_hwss = { .setup_stream_encoder = setup_dio_stream_encoder, .reset_stream_encoder = reset_dio_stream_encoder, + .setup_stream_attribute = setup_dio_stream_attribute, .ext = { .set_throttled_vcp_size = set_dio_throttled_vcp_size, .enable_dp_link_output = enable_dio_dp_link_output, .disable_dp_link_output = disable_dio_dp_link_output, .set_dp_link_test_pattern = set_dio_dp_link_test_pattern, .set_dp_lane_settings = set_dio_dp_lane_settings, + .update_stream_allocation_table = update_dio_stream_allocation_table, }, }; diff --git a/drivers/gpu/drm/amd/display/dc/link/link_hwss_dio.h b/drivers/gpu/drm/amd/display/dc/link/link_hwss_dio.h index 680df20b1fa3..08f22b32df48 100644 --- a/drivers/gpu/drm/amd/display/dc/link/link_hwss_dio.h +++ b/drivers/gpu/drm/amd/display/dc/link/link_hwss_dio.h @@ -34,6 +34,7 @@ void set_dio_throttled_vcp_size(struct pipe_ctx *pipe_ctx, struct fixed31_32 throttled_vcp_size); void setup_dio_stream_encoder(struct pipe_ctx *pipe_ctx); void reset_dio_stream_encoder(struct pipe_ctx *pipe_ctx); +void setup_dio_stream_attribute(struct pipe_ctx *pipe_ctx); void enable_dio_dp_link_output(struct dc_link *link, const struct link_resource *link_res, enum signal_type signal, diff --git a/drivers/gpu/drm/amd/display/dc/link/link_hwss_dpia.c b/drivers/gpu/drm/amd/display/dc/link/link_hwss_dpia.c index 35b206225201..89d4e8159138 100644 --- a/drivers/gpu/drm/amd/display/dc/link/link_hwss_dpia.c +++ b/drivers/gpu/drm/amd/display/dc/link/link_hwss_dpia.c @@ -25,16 +25,44 @@ #include "link_hwss_dpia.h" #include "core_types.h" #include "link_hwss_dio.h" +#include "link_enc_cfg.h" + +#define DC_LOGGER_INIT(logger) + +static void update_dpia_stream_allocation_table(struct dc_link *link, + const struct link_resource *link_res, + const struct link_mst_stream_allocation_table *table) +{ + struct link_encoder *link_enc = link_enc_cfg_get_link_enc(link); + static enum dc_status status; + uint8_t mst_alloc_slots = 0, prev_mst_slots_in_use = 0xFF; + int i; + DC_LOGGER_INIT(link->ctx->logger); + + for (i = 0; i < table->stream_count; i++) + mst_alloc_slots += table->stream_allocations[i].slot_count; + + status = dc_process_dmub_set_mst_slots(link->dc, link->link_index, + mst_alloc_slots, &prev_mst_slots_in_use); + ASSERT(status == DC_OK); + DC_LOG_MST("dpia : status[%d]: alloc_slots[%d]: used_slots[%d]\n", + status, mst_alloc_slots, prev_mst_slots_in_use); + + ASSERT(link_enc); + link_enc->funcs->update_mst_stream_allocation_table(link_enc, table); +} static const struct link_hwss dpia_link_hwss = { .setup_stream_encoder = setup_dio_stream_encoder, .reset_stream_encoder = reset_dio_stream_encoder, + .setup_stream_attribute = setup_dio_stream_attribute, .ext = { .set_throttled_vcp_size = set_dio_throttled_vcp_size, .enable_dp_link_output = enable_dio_dp_link_output, .disable_dp_link_output = disable_dio_dp_link_output, .set_dp_link_test_pattern = set_dio_dp_link_test_pattern, .set_dp_lane_settings = set_dio_dp_lane_settings, + .update_stream_allocation_table = update_dpia_stream_allocation_table, }, }; diff --git a/drivers/gpu/drm/amd/display/dc/link/link_hwss_hpo_dp.c b/drivers/gpu/drm/amd/display/dc/link/link_hwss_hpo_dp.c index 74919491675f..87972dc8443d 100644 --- a/drivers/gpu/drm/amd/display/dc/link/link_hwss_hpo_dp.c +++ b/drivers/gpu/drm/amd/display/dc/link/link_hwss_hpo_dp.c @@ -131,6 +131,22 @@ static void reset_hpo_dp_stream_encoder(struct pipe_ctx *pipe_ctx) dccg->funcs->set_dpstreamclk(dccg, REFCLK, tg->inst); } +static void setup_hpo_dp_stream_attribute(struct pipe_ctx *pipe_ctx) +{ + struct hpo_dp_stream_encoder *stream_enc = pipe_ctx->stream_res.hpo_dp_stream_enc; + struct dc_stream_state *stream = pipe_ctx->stream; + struct dc_link *link = stream->link; + + stream_enc->funcs->set_stream_attribute( + stream_enc, + &stream->timing, + stream->output_color_space, + stream->use_vsc_sdp_for_colorimetry, + stream->timing.flags.DSC, + false); + dp_source_sequence_trace(link, DPCD_SOURCE_SEQ_AFTER_DP_STREAM_ATTR); +} + static void enable_hpo_dp_fpga_link_output(struct dc_link *link, const struct link_resource *link_res, enum signal_type signal, @@ -228,9 +244,19 @@ static void set_hpo_dp_lane_settings(struct dc_link *link, lane_settings[0].FFE_PRESET.raw); } +static void update_hpo_dp_stream_allocation_table(struct dc_link *link, + const struct link_resource *link_res, + const struct link_mst_stream_allocation_table *table) +{ + link_res->hpo_dp_link_enc->funcs->update_stream_allocation_table( + link_res->hpo_dp_link_enc, + table); +} + static const struct link_hwss hpo_dp_link_hwss = { .setup_stream_encoder = setup_hpo_dp_stream_encoder, .reset_stream_encoder = reset_hpo_dp_stream_encoder, + .setup_stream_attribute = setup_hpo_dp_stream_attribute, .ext = { .set_throttled_vcp_size = set_hpo_dp_throttled_vcp_size, .set_hblank_min_symbol_width = set_hpo_dp_hblank_min_symbol_width, @@ -238,6 +264,7 @@ static const struct link_hwss hpo_dp_link_hwss = { .disable_dp_link_output = disable_hpo_dp_link_output, .set_dp_link_test_pattern = set_hpo_dp_link_test_pattern, .set_dp_lane_settings = set_hpo_dp_lane_settings, + .update_stream_allocation_table = update_hpo_dp_stream_allocation_table, }, }; diff --git a/drivers/gpu/drm/amd/display/dc/link/link_hwss_hpo_frl.c b/drivers/gpu/drm/amd/display/dc/link/link_hwss_hpo_frl.c index 9df273ca699b..4b5eccd994c4 100644 --- a/drivers/gpu/drm/amd/display/dc/link/link_hwss_hpo_frl.c +++ b/drivers/gpu/drm/amd/display/dc/link/link_hwss_hpo_frl.c @@ -26,9 +26,28 @@ #include "core_types.h" #include "virtual/virtual_link_hwss.h" +static void setup_hpo_frl_stream_attribute(struct pipe_ctx *pipe_ctx) +{ + struct hpo_frl_stream_encoder *stream_enc = pipe_ctx->stream_res.hpo_frl_stream_enc; + struct dc_stream_state *stream = pipe_ctx->stream; + struct pipe_ctx *odm_pipe; + int odm_combine_num_segments = 1; + + /* get number of ODM combine input segments */ + for (odm_pipe = pipe_ctx->next_odm_pipe; odm_pipe; odm_pipe = odm_pipe->next_odm_pipe) + odm_combine_num_segments++; + + stream_enc->funcs->hdmi_frl_set_stream_attribute( + stream_enc, + &stream->timing, + &stream->link->frl_link_settings.borrow_params, + odm_combine_num_segments); +} + static const struct link_hwss hpo_frl_link_hwss = { .setup_stream_encoder = virtual_setup_stream_encoder, .reset_stream_encoder = virtual_reset_stream_encoder, + .setup_stream_attribute = setup_hpo_frl_stream_attribute, }; bool can_use_hpo_frl_link_hwss(const struct dc_link *link, diff --git a/drivers/gpu/drm/amd/display/dc/virtual/virtual_link_hwss.c b/drivers/gpu/drm/amd/display/dc/virtual/virtual_link_hwss.c index 525eba2a3354..501173ce270e 100644 --- a/drivers/gpu/drm/amd/display/dc/virtual/virtual_link_hwss.c +++ b/drivers/gpu/drm/amd/display/dc/virtual/virtual_link_hwss.c @@ -29,12 +29,17 @@ void virtual_setup_stream_encoder(struct pipe_ctx *pipe_ctx) { } +void virtual_setup_stream_attribute(struct pipe_ctx *pipe_ctx) +{ +} + void virtual_reset_stream_encoder(struct pipe_ctx *pipe_ctx) { } static const struct link_hwss virtual_link_hwss = { .setup_stream_encoder = virtual_setup_stream_encoder, .reset_stream_encoder = virtual_reset_stream_encoder, + .setup_stream_attribute = virtual_setup_stream_attribute, }; const struct link_hwss *get_virtual_link_hwss(void) diff --git a/drivers/gpu/drm/amd/display/dmub/inc/dmub_cmd.h b/drivers/gpu/drm/amd/display/dmub/inc/dmub_cmd.h index 71214c7a60fc..05c8d91ad4ab 100644 --- a/drivers/gpu/drm/amd/display/dmub/inc/dmub_cmd.h +++ b/drivers/gpu/drm/amd/display/dmub/inc/dmub_cmd.h @@ -44,24 +44,6 @@ #endif // defined(_TEST_HARNESS) || defined(FPGA_USB4) -/* Firmware versioning. */ -#ifdef DMUB_EXPOSE_VERSION -#define DMUB_FW_VERSION_GIT_HASH 0x929554ba -#define DMUB_FW_VERSION_MAJOR 0 -#define DMUB_FW_VERSION_MINOR 0 -#define DMUB_FW_VERSION_REVISION 108 -#define DMUB_FW_VERSION_TEST 0 -#define DMUB_FW_VERSION_VBIOS 0 -#define DMUB_FW_VERSION_HOTFIX 0 -#define DMUB_FW_VERSION_UCODE (((DMUB_FW_VERSION_MAJOR & 0xFF) << 24) | \ - ((DMUB_FW_VERSION_MINOR & 0xFF) << 16) | \ - ((DMUB_FW_VERSION_REVISION & 0xFF) << 8) | \ - ((DMUB_FW_VERSION_TEST & 0x1) << 7) | \ - ((DMUB_FW_VERSION_VBIOS & 0x1) << 6) | \ - (DMUB_FW_VERSION_HOTFIX & 0x3F)) - -#endif - //<DMUB_TYPES>================================================================== /* Basic type definitions. */ @@ -1523,8 +1505,6 @@ enum dmub_phy_fsm_state { DMUB_PHY_FSM_FAST_LP, }; - - /** * Data passed from driver to FW in a DMUB_CMD__PSR_COPY_SETTINGS command. */ @@ -1704,9 +1684,16 @@ struct dmub_rb_cmd_psr_enable_data { */ uint8_t panel_inst; /** - * Explicit padding to 4 byte boundary. + * Phy state to enter. + * Values to use are defined in dmub_phy_fsm_state */ - uint8_t pad[2]; + uint8_t phy_fsm_state; + /** + * Phy rate for DP - RBR/HBR/HBR2/HBR3. + * Set this using enum phy_link_rate. + * This does not support HDMI/DP2 for now. + */ + uint8_t phy_rate; }; /** @@ -1772,16 +1759,9 @@ struct dmub_cmd_psr_force_static_data { */ uint8_t panel_inst; /** - * Phy state to enter. - * Values to use are defined in dmub_phy_fsm_state - */ - uint8_t phy_fsm_state; - /** - * Phy rate for DP - RBR/HBR/HBR2/HBR3. - * Set this using enum phy_link_rate. - * This does not support HDMI/DP2 for now. + * Explicit padding to 4 byte boundary. */ - uint8_t phy_rate; + uint8_t pad[2]; }; /** @@ -3044,9 +3024,7 @@ static inline void dmub_rb_flush_pending(const struct dmub_rb *rb) uint32_t wptr = rb->wrpt; while (rptr != wptr) { - uint64_t volatile *data = (uint64_t volatile *)((uint8_t *)(rb->base_address) + rptr); - //uint64_t volatile *p = (uint64_t volatile *)data; - uint64_t temp; + uint64_t *data = (uint64_t *)((uint8_t *)(rb->base_address) + rptr); uint8_t i; /* Don't remove this. @@ -3054,7 +3032,7 @@ static inline void dmub_rb_flush_pending(const struct dmub_rb *rb) * for this function to be effective. */ for (i = 0; i < DMUB_RB_CMD_SIZE / sizeof(uint64_t); i++) - temp = *data++; + (void)READ_ONCE(*data++); rptr += DMUB_RB_CMD_SIZE; if (rptr >= rb->capacity) diff --git a/drivers/gpu/drm/amd/display/include/grph_object_id.h b/drivers/gpu/drm/amd/display/include/grph_object_id.h index fed1edc038d8..c6bbd262f1ac 100644 --- a/drivers/gpu/drm/amd/display/include/grph_object_id.h +++ b/drivers/gpu/drm/amd/display/include/grph_object_id.h @@ -162,6 +162,7 @@ enum connector_id { CONNECTOR_ID_MXM = 21, CONNECTOR_ID_WIRELESS = 22, CONNECTOR_ID_MIRACAST = 23, + CONNECTOR_ID_USBC = 24, CONNECTOR_ID_VIRTUAL = 100 }; diff --git a/drivers/gpu/drm/amd/display/modules/freesync/freesync.c b/drivers/gpu/drm/amd/display/modules/freesync/freesync.c index bd1d1dc93629..03fa63d56fa6 100644 --- a/drivers/gpu/drm/amd/display/modules/freesync/freesync.c +++ b/drivers/gpu/drm/amd/display/modules/freesync/freesync.c @@ -46,6 +46,10 @@ /* Number of consecutive frames to check before entering/exiting fixed refresh */ #define FIXED_REFRESH_ENTER_FRAME_COUNT 5 #define FIXED_REFRESH_EXIT_FRAME_COUNT 10 +/* Flip interval workaround constants */ +#define VSYNCS_BETWEEN_FLIP_THRESHOLD 2 +#define FREESYNC_CONSEC_FLIP_AFTER_VSYNC 5 +#define FREESYNC_VSYNC_TO_FLIP_DELTA_IN_US 500 struct core_freesync { struct mod_freesync public; @@ -466,6 +470,41 @@ static void apply_fixed_refresh(struct core_freesync *core_freesync, } } +static void determine_flip_interval_workaround_req(struct mod_vrr_params *in_vrr, + unsigned int curr_time_stamp_in_us) +{ + in_vrr->flip_interval.vsync_to_flip_in_us = curr_time_stamp_in_us - + in_vrr->flip_interval.v_update_timestamp_in_us; + + /* Determine conditions for stopping workaround */ + if (in_vrr->flip_interval.flip_interval_workaround_active && + in_vrr->flip_interval.vsyncs_between_flip < VSYNCS_BETWEEN_FLIP_THRESHOLD && + in_vrr->flip_interval.vsync_to_flip_in_us > FREESYNC_VSYNC_TO_FLIP_DELTA_IN_US) { + in_vrr->flip_interval.flip_interval_detect_counter = 0; + in_vrr->flip_interval.program_flip_interval_workaround = true; + in_vrr->flip_interval.flip_interval_workaround_active = false; + } else { + /* Determine conditions for starting workaround */ + if (in_vrr->flip_interval.vsyncs_between_flip >= VSYNCS_BETWEEN_FLIP_THRESHOLD && + in_vrr->flip_interval.vsync_to_flip_in_us < FREESYNC_VSYNC_TO_FLIP_DELTA_IN_US) { + /* Increase flip interval counter we have 2 vsyncs between flips and + * vsync to flip interval is less than 500us + */ + in_vrr->flip_interval.flip_interval_detect_counter++; + if (in_vrr->flip_interval.flip_interval_detect_counter > FREESYNC_CONSEC_FLIP_AFTER_VSYNC) { + /* Start workaround if we detect 5 consecutive instances of the above case */ + in_vrr->flip_interval.program_flip_interval_workaround = true; + in_vrr->flip_interval.flip_interval_workaround_active = true; + } + } else { + /* Reset the flip interval counter if we condition is no longer met */ + in_vrr->flip_interval.flip_interval_detect_counter = 0; + } + } + + in_vrr->flip_interval.vsyncs_between_flip = 0; +} + static bool vrr_settings_require_update(struct core_freesync *core_freesync, struct mod_freesync_config *in_config, unsigned int min_refresh_in_uhz, @@ -1179,6 +1218,9 @@ void mod_freesync_handle_preflip(struct mod_freesync *mod_freesync, in_out_vrr); } + determine_flip_interval_workaround_req(in_out_vrr, + curr_time_stamp_in_us); + } } @@ -1187,6 +1229,8 @@ void mod_freesync_handle_v_update(struct mod_freesync *mod_freesync, struct mod_vrr_params *in_out_vrr) { struct core_freesync *core_freesync = NULL; + unsigned int cur_timestamp_in_us; + unsigned long long cur_tick; if ((mod_freesync == NULL) || (stream == NULL) || (in_out_vrr == NULL)) return; @@ -1196,6 +1240,36 @@ void mod_freesync_handle_v_update(struct mod_freesync *mod_freesync, if (in_out_vrr->supported == false) return; + cur_tick = dm_get_timestamp(core_freesync->dc->ctx); + cur_timestamp_in_us = (unsigned int) + div_u64(dm_get_elapse_time_in_ns(core_freesync->dc->ctx, cur_tick, 0), 1000); + + in_out_vrr->flip_interval.vsyncs_between_flip++; + in_out_vrr->flip_interval.v_update_timestamp_in_us = cur_timestamp_in_us; + + if (in_out_vrr->state == VRR_STATE_ACTIVE_VARIABLE && + (in_out_vrr->flip_interval.flip_interval_workaround_active || + (!in_out_vrr->flip_interval.flip_interval_workaround_active && + in_out_vrr->flip_interval.program_flip_interval_workaround))) { + // set freesync vmin vmax to nominal for workaround + in_out_vrr->adjust.v_total_min = + mod_freesync_calc_v_total_from_refresh( + stream, in_out_vrr->max_refresh_in_uhz); + in_out_vrr->adjust.v_total_max = + in_out_vrr->adjust.v_total_min; + in_out_vrr->flip_interval.program_flip_interval_workaround = false; + in_out_vrr->flip_interval.do_flip_interval_workaround_cleanup = true; + return; + } + + if (in_out_vrr->state != VRR_STATE_ACTIVE_VARIABLE && + in_out_vrr->flip_interval.do_flip_interval_workaround_cleanup) { + in_out_vrr->flip_interval.do_flip_interval_workaround_cleanup = false; + in_out_vrr->flip_interval.flip_interval_detect_counter = 0; + in_out_vrr->flip_interval.vsyncs_between_flip = 0; + in_out_vrr->flip_interval.vsync_to_flip_in_us = 0; + } + /* Below the Range Logic */ /* Only execute if in fullscreen mode */ @@ -1302,7 +1376,7 @@ unsigned long long mod_freesync_calc_field_rate_from_timing( bool mod_freesync_is_valid_range(uint32_t min_refresh_cap_in_uhz, uint32_t max_refresh_cap_in_uhz, - uint32_t nominal_field_rate_in_uhz) + uint32_t nominal_field_rate_in_uhz) { /* Typically nominal refresh calculated can have some fractional part. diff --git a/drivers/gpu/drm/amd/display/modules/hdcp/hdcp.c b/drivers/gpu/drm/amd/display/modules/hdcp/hdcp.c index 3e81850a7ffe..5e01c6e24cbc 100644 --- a/drivers/gpu/drm/amd/display/modules/hdcp/hdcp.c +++ b/drivers/gpu/drm/amd/display/modules/hdcp/hdcp.c @@ -251,6 +251,33 @@ out: return status; } +static enum mod_hdcp_status update_display_adjustments(struct mod_hdcp *hdcp, + struct mod_hdcp_display *display, + struct mod_hdcp_display_adjustment *adj) +{ + enum mod_hdcp_status status = MOD_HDCP_STATUS_NOT_IMPLEMENTED; + + if (is_in_authenticated_states(hdcp) && + is_dp_mst_hdcp(hdcp) && + display->adjust.disable == true && + adj->disable == false) { + display->adjust.disable = false; + if (is_hdcp1(hdcp)) + status = mod_hdcp_hdcp1_enable_dp_stream_encryption(hdcp); + else if (is_hdcp2(hdcp)) + status = mod_hdcp_hdcp2_enable_dp_stream_encryption(hdcp); + + if (status != MOD_HDCP_STATUS_SUCCESS) + display->adjust.disable = true; + } + + if (status == MOD_HDCP_STATUS_SUCCESS && + memcmp(adj, &display->adjust, + sizeof(struct mod_hdcp_display_adjustment)) != 0) + status = MOD_HDCP_STATUS_NOT_IMPLEMENTED; + + return status; +} /* * Implementation of functions in mod_hdcp.h */ @@ -391,7 +418,7 @@ out: return status; } -enum mod_hdcp_status mod_hdcp_update_authentication(struct mod_hdcp *hdcp, +enum mod_hdcp_status mod_hdcp_update_display(struct mod_hdcp *hdcp, uint8_t index, struct mod_hdcp_link_adjustment *link_adjust, struct mod_hdcp_display_adjustment *display_adjust, @@ -419,6 +446,15 @@ enum mod_hdcp_status mod_hdcp_update_authentication(struct mod_hdcp *hdcp, goto out; } + if (memcmp(link_adjust, &hdcp->connection.link.adjust, + sizeof(struct mod_hdcp_link_adjustment)) == 0 && + memcmp(display_adjust, &display->adjust, + sizeof(struct mod_hdcp_display_adjustment)) != 0) { + status = update_display_adjustments(hdcp, display, display_adjust); + if (status != MOD_HDCP_STATUS_NOT_IMPLEMENTED) + goto out; + } + /* stop current authentication */ status = reset_authentication(hdcp, output); if (status != MOD_HDCP_STATUS_SUCCESS) diff --git a/drivers/gpu/drm/amd/display/modules/hdcp/hdcp.h b/drivers/gpu/drm/amd/display/modules/hdcp/hdcp.h index 8502263d2968..4e7021c3c845 100644 --- a/drivers/gpu/drm/amd/display/modules/hdcp/hdcp.h +++ b/drivers/gpu/drm/amd/display/modules/hdcp/hdcp.h @@ -445,6 +445,14 @@ static inline uint8_t is_in_hdcp2_dp_states(struct mod_hdcp *hdcp) current_state(hdcp) <= HDCP2_DP_STATE_END); } +static inline uint8_t is_in_authenticated_states(struct mod_hdcp *hdcp) +{ + return (current_state(hdcp) == D1_A4_AUTHENTICATED || + current_state(hdcp) == H1_A45_AUTHENTICATED || + current_state(hdcp) == D2_A5_AUTHENTICATED || + current_state(hdcp) == H2_A5_AUTHENTICATED); +} + static inline uint8_t is_hdcp1(struct mod_hdcp *hdcp) { return (is_in_hdcp1_states(hdcp) || is_in_hdcp1_dp_states(hdcp)); diff --git a/drivers/gpu/drm/amd/display/modules/hdcp/hdcp1_execution.c b/drivers/gpu/drm/amd/display/modules/hdcp/hdcp1_execution.c index 6ec918af3bff..1ddb4f5eac8e 100644 --- a/drivers/gpu/drm/amd/display/modules/hdcp/hdcp1_execution.c +++ b/drivers/gpu/drm/amd/display/modules/hdcp/hdcp1_execution.c @@ -497,9 +497,9 @@ enum mod_hdcp_status mod_hdcp_hdcp1_execution(struct mod_hdcp *hdcp, return status; } -extern enum mod_hdcp_status mod_hdcp_hdcp1_dp_execution(struct mod_hdcp *hdcp, - struct mod_hdcp_event_context *event_ctx, - struct mod_hdcp_transition_input_hdcp1 *input) +enum mod_hdcp_status mod_hdcp_hdcp1_dp_execution(struct mod_hdcp *hdcp, + struct mod_hdcp_event_context *event_ctx, + struct mod_hdcp_transition_input_hdcp1 *input) { enum mod_hdcp_status status = MOD_HDCP_STATUS_SUCCESS; diff --git a/drivers/gpu/drm/amd/display/modules/inc/mod_freesync.h b/drivers/gpu/drm/amd/display/modules/inc/mod_freesync.h index 75a158a2514c..cf6bc9446244 100644 --- a/drivers/gpu/drm/amd/display/modules/inc/mod_freesync.h +++ b/drivers/gpu/drm/amd/display/modules/inc/mod_freesync.h @@ -105,6 +105,16 @@ struct mod_vrr_params_fixed_refresh { uint32_t frame_counter; }; +struct mod_vrr_params_flip_interval { + bool flip_interval_workaround_active; + bool program_flip_interval_workaround; + bool do_flip_interval_workaround_cleanup; + uint32_t flip_interval_detect_counter; + uint32_t vsyncs_between_flip; + uint32_t vsync_to_flip_in_us; + uint32_t v_update_timestamp_in_us; +}; + struct mod_vrr_params { bool supported; bool send_info_frame; @@ -121,6 +131,8 @@ struct mod_vrr_params { struct mod_vrr_params_fixed_refresh fixed; struct mod_vrr_params_btr btr; + + struct mod_vrr_params_flip_interval flip_interval; }; struct mod_freesync *mod_freesync_create(struct dc *dc); diff --git a/drivers/gpu/drm/amd/display/modules/inc/mod_hdcp.h b/drivers/gpu/drm/amd/display/modules/inc/mod_hdcp.h index f7420c3f5672..3348bb97ef81 100644 --- a/drivers/gpu/drm/amd/display/modules/inc/mod_hdcp.h +++ b/drivers/gpu/drm/amd/display/modules/inc/mod_hdcp.h @@ -294,7 +294,7 @@ enum mod_hdcp_status mod_hdcp_remove_display(struct mod_hdcp *hdcp, uint8_t index, struct mod_hdcp_output *output); /* called per display to apply new authentication adjustment */ -enum mod_hdcp_status mod_hdcp_update_authentication(struct mod_hdcp *hdcp, +enum mod_hdcp_status mod_hdcp_update_display(struct mod_hdcp *hdcp, uint8_t index, struct mod_hdcp_link_adjustment *link_adjust, struct mod_hdcp_display_adjustment *display_adjust, diff --git a/drivers/gpu/drm/amd/display/modules/power/power_helpers.c b/drivers/gpu/drm/amd/display/modules/power/power_helpers.c index 2b00f334e93d..97928d4c3b9a 100644 --- a/drivers/gpu/drm/amd/display/modules/power/power_helpers.c +++ b/drivers/gpu/drm/amd/display/modules/power/power_helpers.c @@ -784,3 +784,41 @@ bool dmcu_load_iram(struct dmcu *dmcu, return result; } +/* + * is_psr_su_specific_panel() - check if sink is AMD vendor-specific PSR-SU + * supported eDP device. + * + * @link: dc link pointer + * + * Return: true if AMDGPU vendor specific PSR-SU eDP panel + */ +bool is_psr_su_specific_panel(struct dc_link *link) +{ + if (link->dpcd_caps.edp_rev >= DP_EDP_14) { + if (link->dpcd_caps.psr_info.psr_version >= DP_PSR2_WITH_Y_COORD_ET_SUPPORTED) + return true; + /* + * Some panels will report PSR capabilities over additional DPCD bits. + * Such panels are approved despite reporting only PSR v3, as long as + * the additional bits are reported. + */ + if (link->dpcd_caps.psr_info.psr_version < DP_PSR2_WITH_Y_COORD_IS_SUPPORTED) + return false; + + if (link->dpcd_caps.sink_dev_id == DP_BRANCH_DEVICE_ID_001CF8) { + /* + * FIXME: + * This is the temporary workaround to disable PSRSU when system turned on + * DSC function on the sepcific sink. Once the PSRSU + DSC is fixed, this + * condition should be removed. + */ + if (link->dpcd_caps.dsc_caps.dsc_basic_caps.fields.dsc_support.DSC_SUPPORT) + return false; + + if (link->dpcd_caps.psr_info.force_psrsu_cap == 0x1) + return true; + } + } + + return false; +} diff --git a/drivers/gpu/drm/amd/display/modules/power/power_helpers.h b/drivers/gpu/drm/amd/display/modules/power/power_helpers.h index 2a9f8e2d8080..1a634d8c78c5 100644 --- a/drivers/gpu/drm/amd/display/modules/power/power_helpers.h +++ b/drivers/gpu/drm/amd/display/modules/power/power_helpers.h @@ -52,4 +52,5 @@ bool dmub_init_abm_config(struct resource_pool *res_pool, struct dmcu_iram_parameters params, unsigned int inst); +bool is_psr_su_specific_panel(struct dc_link *link); #endif /* MODULES_POWER_POWER_HELPERS_H_ */ diff --git a/drivers/gpu/drm/amd/include/amd_shared.h b/drivers/gpu/drm/amd/include/amd_shared.h index fe4e585781bb..741dae17562a 100644 --- a/drivers/gpu/drm/amd/include/amd_shared.h +++ b/drivers/gpu/drm/amd/include/amd_shared.h @@ -116,38 +116,38 @@ enum amd_powergating_state { /* CG flags */ -#define AMD_CG_SUPPORT_GFX_MGCG (1 << 0) -#define AMD_CG_SUPPORT_GFX_MGLS (1 << 1) -#define AMD_CG_SUPPORT_GFX_CGCG (1 << 2) -#define AMD_CG_SUPPORT_GFX_CGLS (1 << 3) -#define AMD_CG_SUPPORT_GFX_CGTS (1 << 4) -#define AMD_CG_SUPPORT_GFX_CGTS_LS (1 << 5) -#define AMD_CG_SUPPORT_GFX_CP_LS (1 << 6) -#define AMD_CG_SUPPORT_GFX_RLC_LS (1 << 7) -#define AMD_CG_SUPPORT_MC_LS (1 << 8) -#define AMD_CG_SUPPORT_MC_MGCG (1 << 9) -#define AMD_CG_SUPPORT_SDMA_LS (1 << 10) -#define AMD_CG_SUPPORT_SDMA_MGCG (1 << 11) -#define AMD_CG_SUPPORT_BIF_LS (1 << 12) -#define AMD_CG_SUPPORT_UVD_MGCG (1 << 13) -#define AMD_CG_SUPPORT_VCE_MGCG (1 << 14) -#define AMD_CG_SUPPORT_HDP_LS (1 << 15) -#define AMD_CG_SUPPORT_HDP_MGCG (1 << 16) -#define AMD_CG_SUPPORT_ROM_MGCG (1 << 17) -#define AMD_CG_SUPPORT_DRM_LS (1 << 18) -#define AMD_CG_SUPPORT_BIF_MGCG (1 << 19) -#define AMD_CG_SUPPORT_GFX_3D_CGCG (1 << 20) -#define AMD_CG_SUPPORT_GFX_3D_CGLS (1 << 21) -#define AMD_CG_SUPPORT_DRM_MGCG (1 << 22) -#define AMD_CG_SUPPORT_DF_MGCG (1 << 23) -#define AMD_CG_SUPPORT_VCN_MGCG (1 << 24) -#define AMD_CG_SUPPORT_HDP_DS (1 << 25) -#define AMD_CG_SUPPORT_HDP_SD (1 << 26) -#define AMD_CG_SUPPORT_IH_CG (1 << 27) -#define AMD_CG_SUPPORT_ATHUB_LS (1 << 28) -#define AMD_CG_SUPPORT_ATHUB_MGCG (1 << 29) -#define AMD_CG_SUPPORT_JPEG_MGCG (1 << 30) -#define AMD_CG_SUPPORT_GFX_FGCG (1 << 31) +#define AMD_CG_SUPPORT_GFX_MGCG (1ULL << 0) +#define AMD_CG_SUPPORT_GFX_MGLS (1ULL << 1) +#define AMD_CG_SUPPORT_GFX_CGCG (1ULL << 2) +#define AMD_CG_SUPPORT_GFX_CGLS (1ULL << 3) +#define AMD_CG_SUPPORT_GFX_CGTS (1ULL << 4) +#define AMD_CG_SUPPORT_GFX_CGTS_LS (1ULL << 5) +#define AMD_CG_SUPPORT_GFX_CP_LS (1ULL << 6) +#define AMD_CG_SUPPORT_GFX_RLC_LS (1ULL << 7) +#define AMD_CG_SUPPORT_MC_LS (1ULL << 8) +#define AMD_CG_SUPPORT_MC_MGCG (1ULL << 9) +#define AMD_CG_SUPPORT_SDMA_LS (1ULL << 10) +#define AMD_CG_SUPPORT_SDMA_MGCG (1ULL << 11) +#define AMD_CG_SUPPORT_BIF_LS (1ULL << 12) +#define AMD_CG_SUPPORT_UVD_MGCG (1ULL << 13) +#define AMD_CG_SUPPORT_VCE_MGCG (1ULL << 14) +#define AMD_CG_SUPPORT_HDP_LS (1ULL << 15) +#define AMD_CG_SUPPORT_HDP_MGCG (1ULL << 16) +#define AMD_CG_SUPPORT_ROM_MGCG (1ULL << 17) +#define AMD_CG_SUPPORT_DRM_LS (1ULL << 18) +#define AMD_CG_SUPPORT_BIF_MGCG (1ULL << 19) +#define AMD_CG_SUPPORT_GFX_3D_CGCG (1ULL << 20) +#define AMD_CG_SUPPORT_GFX_3D_CGLS (1ULL << 21) +#define AMD_CG_SUPPORT_DRM_MGCG (1ULL << 22) +#define AMD_CG_SUPPORT_DF_MGCG (1ULL << 23) +#define AMD_CG_SUPPORT_VCN_MGCG (1ULL << 24) +#define AMD_CG_SUPPORT_HDP_DS (1ULL << 25) +#define AMD_CG_SUPPORT_HDP_SD (1ULL << 26) +#define AMD_CG_SUPPORT_IH_CG (1ULL << 27) +#define AMD_CG_SUPPORT_ATHUB_LS (1ULL << 28) +#define AMD_CG_SUPPORT_ATHUB_MGCG (1ULL << 29) +#define AMD_CG_SUPPORT_JPEG_MGCG (1ULL << 30) +#define AMD_CG_SUPPORT_GFX_FGCG (1ULL << 31) /* PG flags */ #define AMD_PG_SUPPORT_GFX_PG (1 << 0) #define AMD_PG_SUPPORT_GFX_SMG (1 << 1) @@ -298,7 +298,7 @@ struct amd_ip_funcs { enum amd_clockgating_state state); int (*set_powergating_state)(void *handle, enum amd_powergating_state state); - void (*get_clockgating_state)(void *handle, u32 *flags); + void (*get_clockgating_state)(void *handle, u64 *flags); }; diff --git a/drivers/gpu/drm/amd/include/asic_reg/vcn/vcn_2_5_offset.h b/drivers/gpu/drm/amd/include/asic_reg/vcn/vcn_2_5_offset.h index 90350f46a0c4..363d2139cea2 100644 --- a/drivers/gpu/drm/amd/include/asic_reg/vcn/vcn_2_5_offset.h +++ b/drivers/gpu/drm/amd/include/asic_reg/vcn/vcn_2_5_offset.h @@ -988,4 +988,17 @@ #define mmMDM_WIG_PIPE_BUSY_BASE_IDX 1 +/* VCN 2_6_0 regs */ +#define mmUVD_RAS_VCPU_VCODEC_STATUS 0x0057 +#define mmUVD_RAS_VCPU_VCODEC_STATUS_BASE_IDX 1 +#define mmUVD_RAS_MMSCH_FATAL_ERROR 0x0058 +#define mmUVD_RAS_MMSCH_FATAL_ERROR_BASE_IDX 1 + + +/* JPEG 2_6_0 regs */ +#define mmUVD_RAS_JPEG0_STATUS 0x0059 +#define mmUVD_RAS_JPEG0_STATUS_BASE_IDX 1 +#define mmUVD_RAS_JPEG1_STATUS 0x005a +#define mmUVD_RAS_JPEG1_STATUS_BASE_IDX 1 + #endif diff --git a/drivers/gpu/drm/amd/include/asic_reg/vcn/vcn_2_5_sh_mask.h b/drivers/gpu/drm/amd/include/asic_reg/vcn/vcn_2_5_sh_mask.h index c41c59c30006..8de883b76d90 100644 --- a/drivers/gpu/drm/amd/include/asic_reg/vcn/vcn_2_5_sh_mask.h +++ b/drivers/gpu/drm/amd/include/asic_reg/vcn/vcn_2_5_sh_mask.h @@ -3606,4 +3606,28 @@ #define UVD_LMI_CRC3__CRC32_MASK 0xFFFFFFFFL +/* VCN 2_6_0 UVD_RAS_VCPU_VCODEC_STATUS */ +#define UVD_RAS_VCPU_VCODEC_STATUS__POISONED_VF__SHIFT 0x0 +#define UVD_RAS_VCPU_VCODEC_STATUS__POISONED_PF__SHIFT 0x1f +#define UVD_RAS_VCPU_VCODEC_STATUS__POISONED_VF_MASK 0x7FFFFFFFL +#define UVD_RAS_VCPU_VCODEC_STATUS__POISONED_PF_MASK 0x80000000L + +/* VCN 2_6_0 UVD_RAS_MMSCH_FATAL_ERROR */ +#define UVD_RAS_MMSCH_FATAL_ERROR__POISONED_VF__SHIFT 0x0 +#define UVD_RAS_MMSCH_FATAL_ERROR__POISONED_PF__SHIFT 0x1f +#define UVD_RAS_MMSCH_FATAL_ERROR__POISONED_VF_MASK 0x7FFFFFFFL +#define UVD_RAS_MMSCH_FATAL_ERROR__POISONED_PF_MASK 0x80000000L + +/* JPEG 2_6_0 UVD_RAS_JPEG0_STATUS */ +#define UVD_RAS_JPEG0_STATUS__POISONED_VF__SHIFT 0x0 +#define UVD_RAS_JPEG0_STATUS__POISONED_PF__SHIFT 0x1f +#define UVD_RAS_JPEG0_STATUS__POISONED_VF_MASK 0x7FFFFFFFL +#define UVD_RAS_JPEG0_STATUS__POISONED_PF_MASK 0x80000000L + +/* JPEG 2_6_0 UVD_RAS_JPEG1_STATUS */ +#define UVD_RAS_JPEG1_STATUS__POISONED_VF__SHIFT 0x0 +#define UVD_RAS_JPEG1_STATUS__POISONED_PF__SHIFT 0x1f +#define UVD_RAS_JPEG1_STATUS__POISONED_VF_MASK 0x7FFFFFFFL +#define UVD_RAS_JPEG1_STATUS__POISONED_PF_MASK 0x80000000L + #endif diff --git a/drivers/gpu/drm/amd/include/discovery.h b/drivers/gpu/drm/amd/include/discovery.h index a486769b66c6..b25026c3ec96 100644 --- a/drivers/gpu/drm/amd/include/discovery.h +++ b/drivers/gpu/drm/amd/include/discovery.h @@ -93,7 +93,7 @@ typedef struct ip uint8_t harvest : 4; /* Harvest */ uint8_t reserved : 4; /* Placeholder field */ #endif - uint32_t base_address[1]; /* variable number of Addresses */ + uint32_t base_address[]; /* variable number of Addresses */ } ip; typedef struct die_header diff --git a/drivers/gpu/drm/amd/pm/amdgpu_pm.c b/drivers/gpu/drm/amd/pm/amdgpu_pm.c index 5cd67ddf8495..78ec9b71197d 100644 --- a/drivers/gpu/drm/amd/pm/amdgpu_pm.c +++ b/drivers/gpu/drm/amd/pm/amdgpu_pm.c @@ -1954,8 +1954,9 @@ static int default_attr_update(struct amdgpu_device *adev, struct amdgpu_device_ uint32_t mask, enum amdgpu_device_attr_states *states) { struct device_attribute *dev_attr = &attr->dev_attr; + uint32_t mp1_ver = adev->ip_versions[MP1_HWIP][0]; + uint32_t gc_ver = adev->ip_versions[GC_HWIP][0]; const char *attr_name = dev_attr->attr.name; - enum amd_asic_type asic_type = adev->asic_type; if (!(attr->flags & mask)) { *states = ATTR_STATE_UNSUPPORTED; @@ -1965,53 +1966,63 @@ static int default_attr_update(struct amdgpu_device *adev, struct amdgpu_device_ #define DEVICE_ATTR_IS(_name) (!strcmp(attr_name, #_name)) if (DEVICE_ATTR_IS(pp_dpm_socclk)) { - if (asic_type < CHIP_VEGA10) + if (gc_ver < IP_VERSION(9, 0, 0)) *states = ATTR_STATE_UNSUPPORTED; } else if (DEVICE_ATTR_IS(pp_dpm_dcefclk)) { - if (asic_type < CHIP_VEGA10 || - asic_type == CHIP_ARCTURUS || - asic_type == CHIP_ALDEBARAN) + if (gc_ver < IP_VERSION(9, 0, 0) || + gc_ver == IP_VERSION(9, 4, 1) || + gc_ver == IP_VERSION(9, 4, 2)) *states = ATTR_STATE_UNSUPPORTED; } else if (DEVICE_ATTR_IS(pp_dpm_fclk)) { - if (asic_type < CHIP_VEGA20) + if (mp1_ver < IP_VERSION(10, 0, 0)) *states = ATTR_STATE_UNSUPPORTED; } else if (DEVICE_ATTR_IS(pp_od_clk_voltage)) { *states = ATTR_STATE_UNSUPPORTED; if (amdgpu_dpm_is_overdrive_supported(adev)) *states = ATTR_STATE_SUPPORTED; } else if (DEVICE_ATTR_IS(mem_busy_percent)) { - if (adev->flags & AMD_IS_APU || asic_type == CHIP_VEGA10) + if (adev->flags & AMD_IS_APU || gc_ver == IP_VERSION(9, 0, 1)) *states = ATTR_STATE_UNSUPPORTED; } else if (DEVICE_ATTR_IS(pcie_bw)) { /* PCIe Perf counters won't work on APU nodes */ if (adev->flags & AMD_IS_APU) *states = ATTR_STATE_UNSUPPORTED; } else if (DEVICE_ATTR_IS(unique_id)) { - if (asic_type != CHIP_VEGA10 && - asic_type != CHIP_VEGA20 && - asic_type != CHIP_ARCTURUS && - asic_type != CHIP_ALDEBARAN) + switch (gc_ver) { + case IP_VERSION(9, 0, 1): + case IP_VERSION(9, 4, 0): + case IP_VERSION(9, 4, 1): + case IP_VERSION(9, 4, 2): + case IP_VERSION(10, 3, 0): + *states = ATTR_STATE_SUPPORTED; + break; + default: *states = ATTR_STATE_UNSUPPORTED; + } } else if (DEVICE_ATTR_IS(pp_features)) { - if (adev->flags & AMD_IS_APU || asic_type < CHIP_VEGA10) + if (adev->flags & AMD_IS_APU || gc_ver < IP_VERSION(9, 0, 0)) *states = ATTR_STATE_UNSUPPORTED; } else if (DEVICE_ATTR_IS(gpu_metrics)) { - if (asic_type < CHIP_VEGA12) + if (gc_ver < IP_VERSION(9, 1, 0)) *states = ATTR_STATE_UNSUPPORTED; } else if (DEVICE_ATTR_IS(pp_dpm_vclk)) { - if (!(asic_type == CHIP_VANGOGH || asic_type == CHIP_SIENNA_CICHLID)) + if (!(gc_ver == IP_VERSION(10, 3, 1) || + gc_ver == IP_VERSION(10, 3, 0) || + gc_ver == IP_VERSION(10, 1, 2))) *states = ATTR_STATE_UNSUPPORTED; } else if (DEVICE_ATTR_IS(pp_dpm_dclk)) { - if (!(asic_type == CHIP_VANGOGH || asic_type == CHIP_SIENNA_CICHLID)) + if (!(gc_ver == IP_VERSION(10, 3, 1) || + gc_ver == IP_VERSION(10, 3, 0) || + gc_ver == IP_VERSION(10, 1, 2))) *states = ATTR_STATE_UNSUPPORTED; } else if (DEVICE_ATTR_IS(pp_power_profile_mode)) { if (amdgpu_dpm_get_power_profile_mode(adev, NULL) == -EOPNOTSUPP) *states = ATTR_STATE_UNSUPPORTED; } - switch (asic_type) { - case CHIP_ARCTURUS: - case CHIP_ALDEBARAN: + switch (gc_ver) { + case IP_VERSION(9, 4, 1): + case IP_VERSION(9, 4, 2): /* the Mi series card does not support standalone mclk/socclk/fclk level setting */ if (DEVICE_ATTR_IS(pp_dpm_mclk) || DEVICE_ATTR_IS(pp_dpm_socclk) || @@ -2026,7 +2037,7 @@ static int default_attr_update(struct amdgpu_device *adev, struct amdgpu_device_ if (DEVICE_ATTR_IS(pp_dpm_dcefclk)) { /* SMU MP1 does not support dcefclk level setting */ - if (asic_type >= CHIP_NAVI10) { + if (gc_ver >= IP_VERSION(10, 0, 0)) { dev_attr->attr.mode &= ~S_IWUGO; dev_attr->store = NULL; } @@ -2864,8 +2875,9 @@ static ssize_t amdgpu_hwmon_show_power_label(struct device *dev, char *buf) { struct amdgpu_device *adev = dev_get_drvdata(dev); + uint32_t gc_ver = adev->ip_versions[GC_HWIP][0]; - if (adev->asic_type == CHIP_VANGOGH) + if (gc_ver == IP_VERSION(10, 3, 1)) return sysfs_emit(buf, "%s\n", to_sensor_dev_attr(attr)->index == PP_PWR_TYPE_FAST ? "fastPPT" : "slowPPT"); @@ -3177,6 +3189,7 @@ static umode_t hwmon_attributes_visible(struct kobject *kobj, struct device *dev = kobj_to_dev(kobj); struct amdgpu_device *adev = dev_get_drvdata(dev); umode_t effective_mode = attr->mode; + uint32_t gc_ver = adev->ip_versions[GC_HWIP][0]; /* under multi-vf mode, the hwmon attributes are all not supported */ if (amdgpu_sriov_vf(adev) && !amdgpu_sriov_is_pp_one_vf(adev)) @@ -3245,18 +3258,18 @@ static umode_t hwmon_attributes_visible(struct kobject *kobj, attr == &sensor_dev_attr_pwm1_enable.dev_attr.attr)) /* can't manage state */ effective_mode &= ~S_IWUSR; + /* not implemented yet for GC 10.3.1 APUs */ if (((adev->family == AMDGPU_FAMILY_SI) || - ((adev->flags & AMD_IS_APU) && - (adev->asic_type != CHIP_VANGOGH))) && /* not implemented yet */ + ((adev->flags & AMD_IS_APU) && (gc_ver != IP_VERSION(10, 3, 1)))) && (attr == &sensor_dev_attr_power1_cap_max.dev_attr.attr || - attr == &sensor_dev_attr_power1_cap_min.dev_attr.attr|| + attr == &sensor_dev_attr_power1_cap_min.dev_attr.attr || attr == &sensor_dev_attr_power1_cap.dev_attr.attr || attr == &sensor_dev_attr_power1_cap_default.dev_attr.attr)) return 0; + /* not implemented yet for APUs having <= GC 9.3.0 */ if (((adev->family == AMDGPU_FAMILY_SI) || - ((adev->flags & AMD_IS_APU) && - (adev->asic_type < CHIP_RENOIR))) && /* not implemented yet */ + ((adev->flags & AMD_IS_APU) && (gc_ver < IP_VERSION(9, 3, 0)))) && (attr == &sensor_dev_attr_power1_average.dev_attr.attr)) return 0; @@ -3294,8 +3307,7 @@ static umode_t hwmon_attributes_visible(struct kobject *kobj, return 0; /* only SOC15 dGPUs support hotspot and mem temperatures */ - if (((adev->flags & AMD_IS_APU) || - adev->asic_type < CHIP_VEGA10) && + if (((adev->flags & AMD_IS_APU) || gc_ver < IP_VERSION(9, 0, 0)) && (attr == &sensor_dev_attr_temp2_crit.dev_attr.attr || attr == &sensor_dev_attr_temp2_crit_hyst.dev_attr.attr || attr == &sensor_dev_attr_temp3_crit.dev_attr.attr || @@ -3310,13 +3322,13 @@ static umode_t hwmon_attributes_visible(struct kobject *kobj, return 0; /* only Vangogh has fast PPT limit and power labels */ - if (!(adev->asic_type == CHIP_VANGOGH) && + if (!(gc_ver == IP_VERSION(10, 3, 1)) && (attr == &sensor_dev_attr_power2_average.dev_attr.attr || - attr == &sensor_dev_attr_power2_cap_max.dev_attr.attr || + attr == &sensor_dev_attr_power2_cap_max.dev_attr.attr || attr == &sensor_dev_attr_power2_cap_min.dev_attr.attr || - attr == &sensor_dev_attr_power2_cap.dev_attr.attr || - attr == &sensor_dev_attr_power2_cap_default.dev_attr.attr || - attr == &sensor_dev_attr_power2_label.dev_attr.attr)) + attr == &sensor_dev_attr_power2_cap.dev_attr.attr || + attr == &sensor_dev_attr_power2_cap_default.dev_attr.attr || + attr == &sensor_dev_attr_power2_label.dev_attr.attr)) return 0; return effective_mode; @@ -3421,6 +3433,8 @@ static void amdgpu_debugfs_prints_cpu_info(struct seq_file *m, static int amdgpu_debugfs_pm_info_pp(struct seq_file *m, struct amdgpu_device *adev) { + uint32_t mp1_ver = adev->ip_versions[MP1_HWIP][0]; + uint32_t gc_ver = adev->ip_versions[GC_HWIP][0]; uint32_t value; uint64_t value64 = 0; uint32_t query = 0; @@ -3467,7 +3481,8 @@ static int amdgpu_debugfs_pm_info_pp(struct seq_file *m, struct amdgpu_device *a if (!amdgpu_dpm_read_sensor(adev, AMDGPU_PP_SENSOR_ENABLED_SMC_FEATURES_MASK, (void *)&value64, &size)) seq_printf(m, "SMC Feature Mask: 0x%016llx\n", value64); - if (adev->asic_type > CHIP_VEGA20) { + /* ASICs greater than CHIP_VEGA20 supports these sensors */ + if (gc_ver != IP_VERSION(9, 4, 0) && mp1_ver > IP_VERSION(9, 0, 0)) { /* VCN clocks */ if (!amdgpu_dpm_read_sensor(adev, AMDGPU_PP_SENSOR_VCN_POWER_STATE, (void *)&value, &size)) { if (!value) { @@ -3511,7 +3526,7 @@ static int amdgpu_debugfs_pm_info_pp(struct seq_file *m, struct amdgpu_device *a return 0; } -static void amdgpu_parse_cg_state(struct seq_file *m, u32 flags) +static void amdgpu_parse_cg_state(struct seq_file *m, u64 flags) { int i; @@ -3524,7 +3539,7 @@ static int amdgpu_debugfs_pm_info_show(struct seq_file *m, void *unused) { struct amdgpu_device *adev = (struct amdgpu_device *)m->private; struct drm_device *dev = adev_to_drm(adev); - u32 flags = 0; + u64 flags = 0; int r; if (amdgpu_in_reset(adev)) @@ -3546,7 +3561,7 @@ static int amdgpu_debugfs_pm_info_show(struct seq_file *m, void *unused) amdgpu_device_ip_get_clockgating_state(adev, &flags); - seq_printf(m, "Clock Gating Flags Mask: 0x%x\n", flags); + seq_printf(m, "Clock Gating Flags Mask: 0x%llx\n", flags); amdgpu_parse_cg_state(m, flags); seq_printf(m, "\n"); diff --git a/drivers/gpu/drm/amd/pm/inc/amdgpu_pm.h b/drivers/gpu/drm/amd/pm/inc/amdgpu_pm.h index a920515e2274..52045ad59bed 100644 --- a/drivers/gpu/drm/amd/pm/inc/amdgpu_pm.h +++ b/drivers/gpu/drm/amd/pm/inc/amdgpu_pm.h @@ -26,7 +26,7 @@ struct cg_flag_name { - u32 flag; + u64 flag; const char *name; }; diff --git a/drivers/gpu/drm/amd/pm/powerplay/amd_powerplay.c b/drivers/gpu/drm/amd/pm/powerplay/amd_powerplay.c index a2da46bf3985..dbed72c1e0c6 100644 --- a/drivers/gpu/drm/amd/pm/powerplay/amd_powerplay.c +++ b/drivers/gpu/drm/amd/pm/powerplay/amd_powerplay.c @@ -671,6 +671,22 @@ static int pp_dpm_force_clock_level(void *handle, return hwmgr->hwmgr_func->force_clock_level(hwmgr, type, mask); } +static int pp_dpm_emit_clock_levels(void *handle, + enum pp_clock_type type, + char *buf, + int *offset) +{ + struct pp_hwmgr *hwmgr = handle; + + if (!hwmgr || !hwmgr->pm_en) + return -EOPNOTSUPP; + + if (!hwmgr->hwmgr_func->emit_clock_levels) + return -ENOENT; + + return hwmgr->hwmgr_func->emit_clock_levels(hwmgr, type, buf, offset); +} + static int pp_dpm_print_clock_levels(void *handle, enum pp_clock_type type, char *buf) { @@ -1535,6 +1551,7 @@ static const struct amd_pm_funcs pp_dpm_funcs = { .get_pp_table = pp_dpm_get_pp_table, .set_pp_table = pp_dpm_set_pp_table, .force_clock_level = pp_dpm_force_clock_level, + .emit_clock_levels = pp_dpm_emit_clock_levels, .print_clock_levels = pp_dpm_print_clock_levels, .get_sclk_od = pp_dpm_get_sclk_od, .set_sclk_od = pp_dpm_set_sclk_od, diff --git a/drivers/gpu/drm/amd/pm/powerplay/hwmgr/vega10_hwmgr.c b/drivers/gpu/drm/amd/pm/powerplay/hwmgr/vega10_hwmgr.c index 37324f2009ca..99bfe5efe171 100644 --- a/drivers/gpu/drm/amd/pm/powerplay/hwmgr/vega10_hwmgr.c +++ b/drivers/gpu/drm/amd/pm/powerplay/hwmgr/vega10_hwmgr.c @@ -4625,6 +4625,152 @@ static int vega10_get_current_pcie_link_speed_level(struct pp_hwmgr *hwmgr) >> PSWUSP0_PCIE_LC_SPEED_CNTL__LC_CURRENT_DATA_RATE__SHIFT; } +static int vega10_emit_clock_levels(struct pp_hwmgr *hwmgr, + enum pp_clock_type type, char *buf, int *offset) +{ + struct vega10_hwmgr *data = hwmgr->backend; + struct vega10_single_dpm_table *sclk_table = &(data->dpm_table.gfx_table); + struct vega10_single_dpm_table *mclk_table = &(data->dpm_table.mem_table); + struct vega10_single_dpm_table *soc_table = &(data->dpm_table.soc_table); + struct vega10_single_dpm_table *dcef_table = &(data->dpm_table.dcef_table); + struct vega10_odn_clock_voltage_dependency_table *podn_vdd_dep = NULL; + uint32_t gen_speed, lane_width, current_gen_speed, current_lane_width; + PPTable_t *pptable = &(data->smc_state_table.pp_table); + + uint32_t i, now, count = 0; + int ret = 0; + + switch (type) { + case PP_SCLK: + if (data->registry_data.sclk_dpm_key_disabled) + return -EOPNOTSUPP; + + ret = smum_send_msg_to_smc(hwmgr, PPSMC_MSG_GetCurrentGfxclkIndex, &now); + if (unlikely(ret != 0)) + return ret; + + if (hwmgr->pp_one_vf && + (hwmgr->dpm_level == AMD_DPM_FORCED_LEVEL_PROFILE_PEAK)) + count = 5; + else + count = sclk_table->count; + for (i = 0; i < count; i++) + *offset += sysfs_emit_at(buf, *offset, "%d: %uMhz %s\n", + i, sclk_table->dpm_levels[i].value / 100, + (i == now) ? "*" : ""); + break; + case PP_MCLK: + if (data->registry_data.mclk_dpm_key_disabled) + return -EOPNOTSUPP; + + ret = smum_send_msg_to_smc(hwmgr, PPSMC_MSG_GetCurrentUclkIndex, &now); + if (unlikely(ret != 0)) + return ret; + + for (i = 0; i < mclk_table->count; i++) + *offset += sysfs_emit_at(buf, *offset, "%d: %uMhz %s\n", + i, mclk_table->dpm_levels[i].value / 100, + (i == now) ? "*" : ""); + break; + case PP_SOCCLK: + if (data->registry_data.socclk_dpm_key_disabled) + return -EOPNOTSUPP; + + ret = smum_send_msg_to_smc(hwmgr, PPSMC_MSG_GetCurrentSocclkIndex, &now); + if (unlikely(ret != 0)) + return ret; + + for (i = 0; i < soc_table->count; i++) + *offset += sysfs_emit_at(buf, *offset, "%d: %uMhz %s\n", + i, soc_table->dpm_levels[i].value / 100, + (i == now) ? "*" : ""); + break; + case PP_DCEFCLK: + if (data->registry_data.dcefclk_dpm_key_disabled) + return -EOPNOTSUPP; + + ret = smum_send_msg_to_smc_with_parameter(hwmgr, + PPSMC_MSG_GetClockFreqMHz, + CLK_DCEFCLK, &now); + if (unlikely(ret != 0)) + return ret; + + for (i = 0; i < dcef_table->count; i++) + *offset += sysfs_emit_at(buf, *offset, "%d: %uMhz %s\n", + i, dcef_table->dpm_levels[i].value / 100, + (dcef_table->dpm_levels[i].value / 100 == now) ? + "*" : ""); + break; + case PP_PCIE: + current_gen_speed = + vega10_get_current_pcie_link_speed_level(hwmgr); + current_lane_width = + vega10_get_current_pcie_link_width_level(hwmgr); + for (i = 0; i < NUM_LINK_LEVELS; i++) { + gen_speed = pptable->PcieGenSpeed[i]; + lane_width = pptable->PcieLaneCount[i]; + + *offset += sysfs_emit_at(buf, *offset, "%d: %s %s %s\n", i, + (gen_speed == 0) ? "2.5GT/s," : + (gen_speed == 1) ? "5.0GT/s," : + (gen_speed == 2) ? "8.0GT/s," : + (gen_speed == 3) ? "16.0GT/s," : "", + (lane_width == 1) ? "x1" : + (lane_width == 2) ? "x2" : + (lane_width == 3) ? "x4" : + (lane_width == 4) ? "x8" : + (lane_width == 5) ? "x12" : + (lane_width == 6) ? "x16" : "", + (current_gen_speed == gen_speed) && + (current_lane_width == lane_width) ? + "*" : ""); + } + break; + + case OD_SCLK: + if (!hwmgr->od_enabled) + return -EOPNOTSUPP; + + *offset += sysfs_emit_at(buf, *offset, "%s:\n", "OD_SCLK"); + podn_vdd_dep = &data->odn_dpm_table.vdd_dep_on_sclk; + for (i = 0; i < podn_vdd_dep->count; i++) + *offset += sysfs_emit_at(buf, *offset, "%d: %10uMhz %10umV\n", + i, podn_vdd_dep->entries[i].clk / 100, + podn_vdd_dep->entries[i].vddc); + break; + case OD_MCLK: + if (!hwmgr->od_enabled) + return -EOPNOTSUPP; + + *offset += sysfs_emit_at(buf, *offset, "%s:\n", "OD_MCLK"); + podn_vdd_dep = &data->odn_dpm_table.vdd_dep_on_mclk; + for (i = 0; i < podn_vdd_dep->count; i++) + *offset += sysfs_emit_at(buf, *offset, "%d: %10uMhz %10umV\n", + i, podn_vdd_dep->entries[i].clk/100, + podn_vdd_dep->entries[i].vddc); + break; + case OD_RANGE: + if (!hwmgr->od_enabled) + return -EOPNOTSUPP; + + *offset += sysfs_emit_at(buf, *offset, "%s:\n", "OD_RANGE"); + *offset += sysfs_emit_at(buf, *offset, "SCLK: %7uMHz %10uMHz\n", + data->golden_dpm_table.gfx_table.dpm_levels[0].value/100, + hwmgr->platform_descriptor.overdriveLimit.engineClock/100); + *offset += sysfs_emit_at(buf, *offset, "MCLK: %7uMHz %10uMHz\n", + data->golden_dpm_table.mem_table.dpm_levels[0].value/100, + hwmgr->platform_descriptor.overdriveLimit.memoryClock/100); + *offset += sysfs_emit_at(buf, *offset, "VDDC: %7umV %11umV\n", + data->odn_dpm_table.min_vddc, + data->odn_dpm_table.max_vddc); + break; + default: + ret = -ENOENT; + break; + } + return ret; +} + static int vega10_print_clock_levels(struct pp_hwmgr *hwmgr, enum pp_clock_type type, char *buf) { @@ -5559,6 +5705,7 @@ static const struct pp_hwmgr_func vega10_hwmgr_funcs = { .set_watermarks_for_clocks_ranges = vega10_set_watermarks_for_clocks_ranges, .display_clock_voltage_request = vega10_display_clock_voltage_request, .force_clock_level = vega10_force_clock_level, + .emit_clock_levels = vega10_emit_clock_levels, .print_clock_levels = vega10_print_clock_levels, .display_config_changed = vega10_display_configuration_changed_task, .powergate_uvd = vega10_power_gate_uvd, diff --git a/drivers/gpu/drm/amd/pm/powerplay/inc/hwmgr.h b/drivers/gpu/drm/amd/pm/powerplay/inc/hwmgr.h index 4f7f2f455301..27f8d0e0e6a8 100644 --- a/drivers/gpu/drm/amd/pm/powerplay/inc/hwmgr.h +++ b/drivers/gpu/drm/amd/pm/powerplay/inc/hwmgr.h @@ -313,6 +313,8 @@ struct pp_hwmgr_func { int (*get_max_high_clocks)(struct pp_hwmgr *hwmgr, struct amd_pp_simple_clock_info *clocks); int (*power_off_asic)(struct pp_hwmgr *hwmgr); int (*force_clock_level)(struct pp_hwmgr *hwmgr, enum pp_clock_type type, uint32_t mask); + int (*emit_clock_levels)(struct pp_hwmgr *hwmgr, + enum pp_clock_type type, char *buf, int *offset); int (*print_clock_levels)(struct pp_hwmgr *hwmgr, enum pp_clock_type type, char *buf); int (*powergate_gfx)(struct pp_hwmgr *hwmgr, bool enable); int (*get_sclk_od)(struct pp_hwmgr *hwmgr); diff --git a/drivers/gpu/drm/amd/pm/swsmu/inc/amdgpu_smu.h b/drivers/gpu/drm/amd/pm/swsmu/inc/amdgpu_smu.h index ef57b6089c69..46e34ed8a3c8 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/inc/amdgpu_smu.h +++ b/drivers/gpu/drm/amd/pm/swsmu/inc/amdgpu_smu.h @@ -1333,6 +1333,8 @@ typedef enum { METRICS_VOLTAGE_VDDGFX, METRICS_SS_APU_SHARE, METRICS_SS_DGPU_SHARE, + METRICS_UNIQUE_ID_UPPER32, + METRICS_UNIQUE_ID_LOWER32, } MetricsMember_t; enum smu_cmn2asic_mapping_type { diff --git a/drivers/gpu/drm/amd/pm/swsmu/inc/pmfw_if/smu11_driver_if_sienna_cichlid.h b/drivers/gpu/drm/amd/pm/swsmu/inc/pmfw_if/smu11_driver_if_sienna_cichlid.h index 3e4a314ef925..08f0bb2af5d2 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/inc/pmfw_if/smu11_driver_if_sienna_cichlid.h +++ b/drivers/gpu/drm/amd/pm/swsmu/inc/pmfw_if/smu11_driver_if_sienna_cichlid.h @@ -1419,8 +1419,8 @@ typedef struct { uint8_t PcieRate ; uint8_t PcieWidth ; uint16_t AverageGfxclkFrequencyTarget; - uint16_t Padding16_2; + uint16_t Padding16_2; } SmuMetrics_t; typedef struct { @@ -1476,8 +1476,8 @@ typedef struct { uint8_t PcieRate ; uint8_t PcieWidth ; uint16_t AverageGfxclkFrequencyTarget; - uint16_t Padding16_2; + uint16_t Padding16_2; } SmuMetrics_V2_t; typedef struct { @@ -1535,6 +1535,9 @@ typedef struct { uint8_t PcieWidth; uint16_t AverageGfxclkFrequencyTarget; + uint32_t PublicSerialNumLower32; + uint32_t PublicSerialNumUpper32; + } SmuMetrics_V3_t; typedef struct { diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu11/sienna_cichlid_ppt.c b/drivers/gpu/drm/amd/pm/swsmu/smu11/sienna_cichlid_ppt.c index 38f04836c82f..ab3e9d8b831e 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/smu11/sienna_cichlid_ppt.c +++ b/drivers/gpu/drm/amd/pm/swsmu/smu11/sienna_cichlid_ppt.c @@ -715,6 +715,14 @@ static int sienna_cichlid_get_smu_metrics_data(struct smu_context *smu, *value = use_metrics_v3 ? metrics_v3->CurrFanSpeed : use_metrics_v2 ? metrics_v2->CurrFanSpeed : metrics->CurrFanSpeed; break; + case METRICS_UNIQUE_ID_UPPER32: + /* Only supported in 0x3A5300+, metrics_v3 requires 0x3A4900+ */ + *value = use_metrics_v3 ? metrics_v3->PublicSerialNumUpper32 : 0; + break; + case METRICS_UNIQUE_ID_LOWER32: + /* Only supported in 0x3A5300+, metrics_v3 requires 0x3A4900+ */ + *value = use_metrics_v3 ? metrics_v3->PublicSerialNumLower32 : 0; + break; default: *value = UINT_MAX; break; @@ -1773,6 +1781,28 @@ static int sienna_cichlid_read_sensor(struct smu_context *smu, return ret; } +static void sienna_cichlid_get_unique_id(struct smu_context *smu) +{ + struct amdgpu_device *adev = smu->adev; + uint32_t upper32 = 0, lower32 = 0; + + /* Only supported as of version 0.58.83.0 and only on Sienna Cichlid */ + if (smu->smc_fw_version < 0x3A5300 || + smu->adev->ip_versions[MP1_HWIP][0] != IP_VERSION(11, 0, 7)) + return; + + if (sienna_cichlid_get_smu_metrics_data(smu, METRICS_UNIQUE_ID_UPPER32, &upper32)) + goto out; + if (sienna_cichlid_get_smu_metrics_data(smu, METRICS_UNIQUE_ID_LOWER32, &lower32)) + goto out; + +out: + + adev->unique_id = ((uint64_t)upper32 << 32) | lower32; + if (adev->serial[0] == '\0') + sprintf(adev->serial, "%016llx", adev->unique_id); +} + static int sienna_cichlid_get_uclk_dpm_states(struct smu_context *smu, uint32_t *clocks_in_khz, uint32_t *num_states) { uint32_t num_discrete_levels = 0; @@ -4182,6 +4212,7 @@ static const struct pptable_funcs sienna_cichlid_ppt_funcs = { .get_ecc_info = sienna_cichlid_get_ecc_info, .get_default_config_table_settings = sienna_cichlid_get_default_config_table_settings, .set_config_table = sienna_cichlid_set_config_table, + .get_unique_id = sienna_cichlid_get_unique_id, }; void sienna_cichlid_set_ppt_funcs(struct smu_context *smu) diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu13/aldebaran_ppt.c b/drivers/gpu/drm/amd/pm/swsmu/smu13/aldebaran_ppt.c index cd81f848d45a..38af648cb857 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/smu13/aldebaran_ppt.c +++ b/drivers/gpu/drm/amd/pm/swsmu/smu13/aldebaran_ppt.c @@ -650,6 +650,12 @@ static int aldebaran_get_smu_metrics_data(struct smu_context *smu, case METRICS_THROTTLER_STATUS: *value = metrics->ThrottlerStatus; break; + case METRICS_UNIQUE_ID_UPPER32: + *value = metrics->PublicSerialNumUpper32; + break; + case METRICS_UNIQUE_ID_LOWER32: + *value = metrics->PublicSerialNumLower32; + break; default: *value = UINT_MAX; break; @@ -1614,16 +1620,12 @@ static void aldebaran_i2c_control_fini(struct smu_context *smu) static void aldebaran_get_unique_id(struct smu_context *smu) { struct amdgpu_device *adev = smu->adev; - SmuMetrics_t *metrics = smu->smu_table.metrics_table; uint32_t upper32 = 0, lower32 = 0; - int ret; - ret = smu_cmn_get_metrics_table(smu, NULL, false); - if (ret) + if (aldebaran_get_smu_metrics_data(smu, METRICS_UNIQUE_ID_UPPER32, &upper32)) + goto out; + if (aldebaran_get_smu_metrics_data(smu, METRICS_UNIQUE_ID_LOWER32, &lower32)) goto out; - - upper32 = metrics->PublicSerialNumUpper32; - lower32 = metrics->PublicSerialNumLower32; out: adev->unique_id = ((uint64_t)upper32 << 32) | lower32; diff --git a/drivers/gpu/drm/radeon/Makefile b/drivers/gpu/drm/radeon/Makefile index 11c97edde54d..4deedaacd655 100644 --- a/drivers/gpu/drm/radeon/Makefile +++ b/drivers/gpu/drm/radeon/Makefile @@ -39,12 +39,12 @@ radeon-y += radeon_device.o radeon_asic.o radeon_kms.o \ radeon_clocks.o radeon_fb.o radeon_gem.o radeon_ring.o radeon_irq_kms.o \ radeon_cs.o radeon_bios.o radeon_benchmark.o r100.o r300.o r420.o \ rs400.o rs600.o rs690.o rv515.o r520.o r600.o rv770.o radeon_test.o \ - r200.o radeon_legacy_tv.o r600_cs.o r600_blit_shaders.o \ + r200.o radeon_legacy_tv.o r600_cs.o \ radeon_pm.o atombios_dp.o r600_hdmi.o dce3_1_afmt.o \ evergreen.o evergreen_cs.o evergreen_blit_shaders.o \ - evergreen_hdmi.o radeon_trace_points.o ni.o cayman_blit_shaders.o \ + evergreen_hdmi.o radeon_trace_points.o ni.o \ atombios_encoders.o radeon_semaphore.o radeon_sa.o atombios_i2c.o si.o \ - si_blit_shaders.o radeon_prime.o cik.o cik_blit_shaders.o \ + radeon_prime.o cik.o cik_blit_shaders.o \ r600_dpm.o rs780_dpm.o rv6xx_dpm.o rv770_dpm.o rv730_dpm.o rv740_dpm.o \ rv770_smc.o cypress_dpm.o btc_dpm.o sumo_dpm.o sumo_smc.o trinity_dpm.o \ trinity_smc.o ni_dpm.o si_smc.o si_dpm.o kv_smc.o kv_dpm.o ci_smc.o \ diff --git a/drivers/gpu/drm/radeon/cayman_blit_shaders.c b/drivers/gpu/drm/radeon/cayman_blit_shaders.c deleted file mode 100644 index 9fec4d09f383..000000000000 --- a/drivers/gpu/drm/radeon/cayman_blit_shaders.c +++ /dev/null @@ -1,320 +0,0 @@ -/* - * Copyright 2010 Advanced Micro Devices, Inc. - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice (including the next - * paragraph) shall be included in all copies or substantial portions of the - * Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE COPYRIGHT HOLDER(S) AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, DAMAGES OR - * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, - * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER - * DEALINGS IN THE SOFTWARE. - * - * Authors: - * Alex Deucher <alexander.deucher@amd.com> - */ - -#include <linux/bug.h> -#include <linux/types.h> -#include <linux/kernel.h> - -/* - * evergreen cards need to use the 3D engine to blit data which requires - * quite a bit of hw state setup. Rather than pull the whole 3D driver - * (which normally generates the 3D state) into the DRM, we opt to use - * statically generated state tables. The register state and shaders - * were hand generated to support blitting functionality. See the 3D - * driver or documentation for descriptions of the registers and - * shader instructions. - */ - -const u32 cayman_default_state[] = -{ - 0xc0066900, - 0x00000000, - 0x00000060, /* DB_RENDER_CONTROL */ - 0x00000000, /* DB_COUNT_CONTROL */ - 0x00000000, /* DB_DEPTH_VIEW */ - 0x0000002a, /* DB_RENDER_OVERRIDE */ - 0x00000000, /* DB_RENDER_OVERRIDE2 */ - 0x00000000, /* DB_HTILE_DATA_BASE */ - - 0xc0026900, - 0x0000000a, - 0x00000000, /* DB_STENCIL_CLEAR */ - 0x00000000, /* DB_DEPTH_CLEAR */ - - 0xc0036900, - 0x0000000f, - 0x00000000, /* DB_DEPTH_INFO */ - 0x00000000, /* DB_Z_INFO */ - 0x00000000, /* DB_STENCIL_INFO */ - - 0xc0016900, - 0x00000080, - 0x00000000, /* PA_SC_WINDOW_OFFSET */ - - 0xc00d6900, - 0x00000083, - 0x0000ffff, /* PA_SC_CLIPRECT_RULE */ - 0x00000000, /* PA_SC_CLIPRECT_0_TL */ - 0x20002000, /* PA_SC_CLIPRECT_0_BR */ - 0x00000000, - 0x20002000, - 0x00000000, - 0x20002000, - 0x00000000, - 0x20002000, - 0xaaaaaaaa, /* PA_SC_EDGERULE */ - 0x00000000, /* PA_SU_HARDWARE_SCREEN_OFFSET */ - 0x0000000f, /* CB_TARGET_MASK */ - 0x0000000f, /* CB_SHADER_MASK */ - - 0xc0226900, - 0x00000094, - 0x80000000, /* PA_SC_VPORT_SCISSOR_0_TL */ - 0x20002000, /* PA_SC_VPORT_SCISSOR_0_BR */ - 0x80000000, - 0x20002000, - 0x80000000, - 0x20002000, - 0x80000000, - 0x20002000, - 0x80000000, - 0x20002000, - 0x80000000, - 0x20002000, - 0x80000000, - 0x20002000, - 0x80000000, - 0x20002000, - 0x80000000, - 0x20002000, - 0x80000000, - 0x20002000, - 0x80000000, - 0x20002000, - 0x80000000, - 0x20002000, - 0x80000000, - 0x20002000, - 0x80000000, - 0x20002000, - 0x80000000, - 0x20002000, - 0x80000000, - 0x20002000, - 0x00000000, /* PA_SC_VPORT_ZMIN_0 */ - 0x3f800000, /* PA_SC_VPORT_ZMAX_0 */ - - 0xc0016900, - 0x000000d4, - 0x00000000, /* SX_MISC */ - - 0xc0026900, - 0x000000d9, - 0x00000000, /* CP_RINGID */ - 0x00000000, /* CP_VMID */ - - 0xc0096900, - 0x00000100, - 0x00ffffff, /* VGT_MAX_VTX_INDX */ - 0x00000000, /* VGT_MIN_VTX_INDX */ - 0x00000000, /* VGT_INDX_OFFSET */ - 0x00000000, /* VGT_MULTI_PRIM_IB_RESET_INDX */ - 0x00000000, /* SX_ALPHA_TEST_CONTROL */ - 0x00000000, /* CB_BLEND_RED */ - 0x00000000, /* CB_BLEND_GREEN */ - 0x00000000, /* CB_BLEND_BLUE */ - 0x00000000, /* CB_BLEND_ALPHA */ - - 0xc0016900, - 0x00000187, - 0x00000100, /* SPI_VS_OUT_ID_0 */ - - 0xc0026900, - 0x00000191, - 0x00000100, /* SPI_PS_INPUT_CNTL_0 */ - 0x00000101, /* SPI_PS_INPUT_CNTL_1 */ - - 0xc0016900, - 0x000001b1, - 0x00000000, /* SPI_VS_OUT_CONFIG */ - - 0xc0106900, - 0x000001b3, - 0x20000001, /* SPI_PS_IN_CONTROL_0 */ - 0x00000000, /* SPI_PS_IN_CONTROL_1 */ - 0x00000000, /* SPI_INTERP_CONTROL_0 */ - 0x00000000, /* SPI_INPUT_Z */ - 0x00000000, /* SPI_FOG_CNTL */ - 0x00100000, /* SPI_BARYC_CNTL */ - 0x00000000, /* SPI_PS_IN_CONTROL_2 */ - 0x00000000, /* SPI_COMPUTE_INPUT_CNTL */ - 0x00000000, /* SPI_COMPUTE_NUM_THREAD_X */ - 0x00000000, /* SPI_COMPUTE_NUM_THREAD_Y */ - 0x00000000, /* SPI_COMPUTE_NUM_THREAD_Z */ - 0x00000000, /* SPI_GPR_MGMT */ - 0x00000000, /* SPI_LDS_MGMT */ - 0x00000000, /* SPI_STACK_MGMT */ - 0x00000000, /* SPI_WAVE_MGMT_1 */ - 0x00000000, /* SPI_WAVE_MGMT_2 */ - - 0xc0016900, - 0x000001e0, - 0x00000000, /* CB_BLEND0_CONTROL */ - - 0xc00e6900, - 0x00000200, - 0x00000000, /* DB_DEPTH_CONTROL */ - 0x00000000, /* DB_EQAA */ - 0x00cc0010, /* CB_COLOR_CONTROL */ - 0x00000210, /* DB_SHADER_CONTROL */ - 0x00010000, /* PA_CL_CLIP_CNTL */ - 0x00000004, /* PA_SU_SC_MODE_CNTL */ - 0x00000100, /* PA_CL_VTE_CNTL */ - 0x00000000, /* PA_CL_VS_OUT_CNTL */ - 0x00000000, /* PA_CL_NANINF_CNTL */ - 0x00000000, /* PA_SU_LINE_STIPPLE_CNTL */ - 0x00000000, /* PA_SU_LINE_STIPPLE_SCALE */ - 0x00000000, /* PA_SU_PRIM_FILTER_CNTL */ - 0x00000000, /* */ - 0x00000000, /* */ - - 0xc0026900, - 0x00000229, - 0x00000000, /* SQ_PGM_START_FS */ - 0x00000000, - - 0xc0016900, - 0x0000023b, - 0x00000000, /* SQ_LDS_ALLOC_PS */ - - 0xc0066900, - 0x00000240, - 0x00000000, /* SQ_ESGS_RING_ITEMSIZE */ - 0x00000000, - 0x00000000, - 0x00000000, - 0x00000000, - 0x00000000, - - 0xc0046900, - 0x00000247, - 0x00000000, /* SQ_GS_VERT_ITEMSIZE */ - 0x00000000, - 0x00000000, - 0x00000000, - - 0xc0116900, - 0x00000280, - 0x00000000, /* PA_SU_POINT_SIZE */ - 0x00000000, /* PA_SU_POINT_MINMAX */ - 0x00000008, /* PA_SU_LINE_CNTL */ - 0x00000000, /* PA_SC_LINE_STIPPLE */ - 0x00000000, /* VGT_OUTPUT_PATH_CNTL */ - 0x00000000, /* VGT_HOS_CNTL */ - 0x00000000, - 0x00000000, - 0x00000000, - 0x00000000, - 0x00000000, - 0x00000000, - 0x00000000, - 0x00000000, - 0x00000000, - 0x00000000, - 0x00000000, /* VGT_GS_MODE */ - - 0xc0026900, - 0x00000292, - 0x00000000, /* PA_SC_MODE_CNTL_0 */ - 0x00000000, /* PA_SC_MODE_CNTL_1 */ - - 0xc0016900, - 0x000002a1, - 0x00000000, /* VGT_PRIMITIVEID_EN */ - - 0xc0016900, - 0x000002a5, - 0x00000000, /* VGT_MULTI_PRIM_IB_RESET_EN */ - - 0xc0026900, - 0x000002a8, - 0x00000000, /* VGT_INSTANCE_STEP_RATE_0 */ - 0x00000000, - - 0xc0026900, - 0x000002ad, - 0x00000000, /* VGT_REUSE_OFF */ - 0x00000000, - - 0xc0016900, - 0x000002d5, - 0x00000000, /* VGT_SHADER_STAGES_EN */ - - 0xc0016900, - 0x000002dc, - 0x0000aa00, /* DB_ALPHA_TO_MASK */ - - 0xc0066900, - 0x000002de, - 0x00000000, /* PA_SU_POLY_OFFSET_DB_FMT_CNTL */ - 0x00000000, - 0x00000000, - 0x00000000, - 0x00000000, - 0x00000000, - - 0xc0026900, - 0x000002e5, - 0x00000000, /* VGT_STRMOUT_CONFIG */ - 0x00000000, - - 0xc01b6900, - 0x000002f5, - 0x76543210, /* PA_SC_CENTROID_PRIORITY_0 */ - 0xfedcba98, /* PA_SC_CENTROID_PRIORITY_1 */ - 0x00000000, /* PA_SC_LINE_CNTL */ - 0x00000000, /* PA_SC_AA_CONFIG */ - 0x00000005, /* PA_SU_VTX_CNTL */ - 0x3f800000, /* PA_CL_GB_VERT_CLIP_ADJ */ - 0x3f800000, /* PA_CL_GB_VERT_DISC_ADJ */ - 0x3f800000, /* PA_CL_GB_HORZ_CLIP_ADJ */ - 0x3f800000, /* PA_CL_GB_HORZ_DISC_ADJ */ - 0x00000000, /* PA_SC_AA_SAMPLE_LOCS_PIXEL_X0Y0_0 */ - 0x00000000, - 0x00000000, - 0x00000000, - 0x00000000, - 0x00000000, - 0x00000000, - 0x00000000, - 0x00000000, - 0x00000000, - 0x00000000, - 0x00000000, - 0x00000000, - 0x00000000, - 0x00000000, - 0x00000000, - 0xffffffff, /* PA_SC_AA_MASK_X0Y0_X1Y0 */ - 0xffffffff, - - 0xc0026900, - 0x00000316, - 0x0000000e, /* VGT_VERTEX_REUSE_BLOCK_CNTL */ - 0x00000010, /* */ -}; - -const u32 cayman_default_size = ARRAY_SIZE(cayman_default_state); diff --git a/drivers/gpu/drm/radeon/cayman_blit_shaders.h b/drivers/gpu/drm/radeon/cayman_blit_shaders.h index f5d0e9a60267..1dca73d9e005 100644 --- a/drivers/gpu/drm/radeon/cayman_blit_shaders.h +++ b/drivers/gpu/drm/radeon/cayman_blit_shaders.h @@ -20,16 +20,300 @@ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER * DEALINGS IN THE SOFTWARE. * + * Authors: + * Alex Deucher <alexander.deucher@amd.com> */ #ifndef CAYMAN_BLIT_SHADERS_H #define CAYMAN_BLIT_SHADERS_H -extern const u32 cayman_ps[]; -extern const u32 cayman_vs[]; -extern const u32 cayman_default_state[]; +/* + * evergreen cards need to use the 3D engine to blit data which requires + * quite a bit of hw state setup. Rather than pull the whole 3D driver + * (which normally generates the 3D state) into the DRM, we opt to use + * statically generated state tables. The register state and shaders + * were hand generated to support blitting functionality. See the 3D + * driver or documentation for descriptions of the registers and + * shader instructions. + */ +static const u32 cayman_default_state[] = { + 0xc0066900, + 0x00000000, + 0x00000060, /* DB_RENDER_CONTROL */ + 0x00000000, /* DB_COUNT_CONTROL */ + 0x00000000, /* DB_DEPTH_VIEW */ + 0x0000002a, /* DB_RENDER_OVERRIDE */ + 0x00000000, /* DB_RENDER_OVERRIDE2 */ + 0x00000000, /* DB_HTILE_DATA_BASE */ + + 0xc0026900, + 0x0000000a, + 0x00000000, /* DB_STENCIL_CLEAR */ + 0x00000000, /* DB_DEPTH_CLEAR */ + + 0xc0036900, + 0x0000000f, + 0x00000000, /* DB_DEPTH_INFO */ + 0x00000000, /* DB_Z_INFO */ + 0x00000000, /* DB_STENCIL_INFO */ + + 0xc0016900, + 0x00000080, + 0x00000000, /* PA_SC_WINDOW_OFFSET */ + + 0xc00d6900, + 0x00000083, + 0x0000ffff, /* PA_SC_CLIPRECT_RULE */ + 0x00000000, /* PA_SC_CLIPRECT_0_TL */ + 0x20002000, /* PA_SC_CLIPRECT_0_BR */ + 0x00000000, + 0x20002000, + 0x00000000, + 0x20002000, + 0x00000000, + 0x20002000, + 0xaaaaaaaa, /* PA_SC_EDGERULE */ + 0x00000000, /* PA_SU_HARDWARE_SCREEN_OFFSET */ + 0x0000000f, /* CB_TARGET_MASK */ + 0x0000000f, /* CB_SHADER_MASK */ + + 0xc0226900, + 0x00000094, + 0x80000000, /* PA_SC_VPORT_SCISSOR_0_TL */ + 0x20002000, /* PA_SC_VPORT_SCISSOR_0_BR */ + 0x80000000, + 0x20002000, + 0x80000000, + 0x20002000, + 0x80000000, + 0x20002000, + 0x80000000, + 0x20002000, + 0x80000000, + 0x20002000, + 0x80000000, + 0x20002000, + 0x80000000, + 0x20002000, + 0x80000000, + 0x20002000, + 0x80000000, + 0x20002000, + 0x80000000, + 0x20002000, + 0x80000000, + 0x20002000, + 0x80000000, + 0x20002000, + 0x80000000, + 0x20002000, + 0x80000000, + 0x20002000, + 0x80000000, + 0x20002000, + 0x00000000, /* PA_SC_VPORT_ZMIN_0 */ + 0x3f800000, /* PA_SC_VPORT_ZMAX_0 */ + + 0xc0016900, + 0x000000d4, + 0x00000000, /* SX_MISC */ + + 0xc0026900, + 0x000000d9, + 0x00000000, /* CP_RINGID */ + 0x00000000, /* CP_VMID */ + + 0xc0096900, + 0x00000100, + 0x00ffffff, /* VGT_MAX_VTX_INDX */ + 0x00000000, /* VGT_MIN_VTX_INDX */ + 0x00000000, /* VGT_INDX_OFFSET */ + 0x00000000, /* VGT_MULTI_PRIM_IB_RESET_INDX */ + 0x00000000, /* SX_ALPHA_TEST_CONTROL */ + 0x00000000, /* CB_BLEND_RED */ + 0x00000000, /* CB_BLEND_GREEN */ + 0x00000000, /* CB_BLEND_BLUE */ + 0x00000000, /* CB_BLEND_ALPHA */ + + 0xc0016900, + 0x00000187, + 0x00000100, /* SPI_VS_OUT_ID_0 */ + + 0xc0026900, + 0x00000191, + 0x00000100, /* SPI_PS_INPUT_CNTL_0 */ + 0x00000101, /* SPI_PS_INPUT_CNTL_1 */ + + 0xc0016900, + 0x000001b1, + 0x00000000, /* SPI_VS_OUT_CONFIG */ + + 0xc0106900, + 0x000001b3, + 0x20000001, /* SPI_PS_IN_CONTROL_0 */ + 0x00000000, /* SPI_PS_IN_CONTROL_1 */ + 0x00000000, /* SPI_INTERP_CONTROL_0 */ + 0x00000000, /* SPI_INPUT_Z */ + 0x00000000, /* SPI_FOG_CNTL */ + 0x00100000, /* SPI_BARYC_CNTL */ + 0x00000000, /* SPI_PS_IN_CONTROL_2 */ + 0x00000000, /* SPI_COMPUTE_INPUT_CNTL */ + 0x00000000, /* SPI_COMPUTE_NUM_THREAD_X */ + 0x00000000, /* SPI_COMPUTE_NUM_THREAD_Y */ + 0x00000000, /* SPI_COMPUTE_NUM_THREAD_Z */ + 0x00000000, /* SPI_GPR_MGMT */ + 0x00000000, /* SPI_LDS_MGMT */ + 0x00000000, /* SPI_STACK_MGMT */ + 0x00000000, /* SPI_WAVE_MGMT_1 */ + 0x00000000, /* SPI_WAVE_MGMT_2 */ + + 0xc0016900, + 0x000001e0, + 0x00000000, /* CB_BLEND0_CONTROL */ + + 0xc00e6900, + 0x00000200, + 0x00000000, /* DB_DEPTH_CONTROL */ + 0x00000000, /* DB_EQAA */ + 0x00cc0010, /* CB_COLOR_CONTROL */ + 0x00000210, /* DB_SHADER_CONTROL */ + 0x00010000, /* PA_CL_CLIP_CNTL */ + 0x00000004, /* PA_SU_SC_MODE_CNTL */ + 0x00000100, /* PA_CL_VTE_CNTL */ + 0x00000000, /* PA_CL_VS_OUT_CNTL */ + 0x00000000, /* PA_CL_NANINF_CNTL */ + 0x00000000, /* PA_SU_LINE_STIPPLE_CNTL */ + 0x00000000, /* PA_SU_LINE_STIPPLE_SCALE */ + 0x00000000, /* PA_SU_PRIM_FILTER_CNTL */ + 0x00000000, /* */ + 0x00000000, /* */ + + 0xc0026900, + 0x00000229, + 0x00000000, /* SQ_PGM_START_FS */ + 0x00000000, + + 0xc0016900, + 0x0000023b, + 0x00000000, /* SQ_LDS_ALLOC_PS */ + + 0xc0066900, + 0x00000240, + 0x00000000, /* SQ_ESGS_RING_ITEMSIZE */ + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + + 0xc0046900, + 0x00000247, + 0x00000000, /* SQ_GS_VERT_ITEMSIZE */ + 0x00000000, + 0x00000000, + 0x00000000, + + 0xc0116900, + 0x00000280, + 0x00000000, /* PA_SU_POINT_SIZE */ + 0x00000000, /* PA_SU_POINT_MINMAX */ + 0x00000008, /* PA_SU_LINE_CNTL */ + 0x00000000, /* PA_SC_LINE_STIPPLE */ + 0x00000000, /* VGT_OUTPUT_PATH_CNTL */ + 0x00000000, /* VGT_HOS_CNTL */ + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, /* VGT_GS_MODE */ + + 0xc0026900, + 0x00000292, + 0x00000000, /* PA_SC_MODE_CNTL_0 */ + 0x00000000, /* PA_SC_MODE_CNTL_1 */ + + 0xc0016900, + 0x000002a1, + 0x00000000, /* VGT_PRIMITIVEID_EN */ + + 0xc0016900, + 0x000002a5, + 0x00000000, /* VGT_MULTI_PRIM_IB_RESET_EN */ + + 0xc0026900, + 0x000002a8, + 0x00000000, /* VGT_INSTANCE_STEP_RATE_0 */ + 0x00000000, + + 0xc0026900, + 0x000002ad, + 0x00000000, /* VGT_REUSE_OFF */ + 0x00000000, + + 0xc0016900, + 0x000002d5, + 0x00000000, /* VGT_SHADER_STAGES_EN */ + + 0xc0016900, + 0x000002dc, + 0x0000aa00, /* DB_ALPHA_TO_MASK */ + + 0xc0066900, + 0x000002de, + 0x00000000, /* PA_SU_POLY_OFFSET_DB_FMT_CNTL */ + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + + 0xc0026900, + 0x000002e5, + 0x00000000, /* VGT_STRMOUT_CONFIG */ + 0x00000000, + + 0xc01b6900, + 0x000002f5, + 0x76543210, /* PA_SC_CENTROID_PRIORITY_0 */ + 0xfedcba98, /* PA_SC_CENTROID_PRIORITY_1 */ + 0x00000000, /* PA_SC_LINE_CNTL */ + 0x00000000, /* PA_SC_AA_CONFIG */ + 0x00000005, /* PA_SU_VTX_CNTL */ + 0x3f800000, /* PA_CL_GB_VERT_CLIP_ADJ */ + 0x3f800000, /* PA_CL_GB_VERT_DISC_ADJ */ + 0x3f800000, /* PA_CL_GB_HORZ_CLIP_ADJ */ + 0x3f800000, /* PA_CL_GB_HORZ_DISC_ADJ */ + 0x00000000, /* PA_SC_AA_SAMPLE_LOCS_PIXEL_X0Y0_0 */ + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0xffffffff, /* PA_SC_AA_MASK_X0Y0_X1Y0 */ + 0xffffffff, + + 0xc0026900, + 0x00000316, + 0x0000000e, /* VGT_VERTEX_REUSE_BLOCK_CNTL */ + 0x00000010, /* */ +}; -extern const u32 cayman_ps_size, cayman_vs_size; -extern const u32 cayman_default_size; +static const u32 cayman_default_size = ARRAY_SIZE(cayman_default_state); #endif diff --git a/drivers/gpu/drm/radeon/r600_blit_shaders.c b/drivers/gpu/drm/radeon/r600_blit_shaders.c deleted file mode 100644 index 443cbe59b274..000000000000 --- a/drivers/gpu/drm/radeon/r600_blit_shaders.c +++ /dev/null @@ -1,719 +0,0 @@ -/* - * Copyright 2009 Advanced Micro Devices, Inc. - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice (including the next - * paragraph) shall be included in all copies or substantial portions of the - * Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE COPYRIGHT HOLDER(S) AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, DAMAGES OR - * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, - * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER - * DEALINGS IN THE SOFTWARE. - * - * Authors: - * Alex Deucher <alexander.deucher@amd.com> - */ - -#include <linux/bug.h> -#include <linux/types.h> -#include <linux/kernel.h> - -/* - * R6xx+ cards need to use the 3D engine to blit data which requires - * quite a bit of hw state setup. Rather than pull the whole 3D driver - * (which normally generates the 3D state) into the DRM, we opt to use - * statically generated state tables. The register state and shaders - * were hand generated to support blitting functionality. See the 3D - * driver or documentation for descriptions of the registers and - * shader instructions. - */ - -const u32 r6xx_default_state[] = -{ - 0xc0002400, /* START_3D_CMDBUF */ - 0x00000000, - - 0xc0012800, /* CONTEXT_CONTROL */ - 0x80000000, - 0x80000000, - - 0xc0016800, - 0x00000010, - 0x00008000, /* WAIT_UNTIL */ - - 0xc0016800, - 0x00000542, - 0x07000003, /* TA_CNTL_AUX */ - - 0xc0016800, - 0x000005c5, - 0x00000000, /* VC_ENHANCE */ - - 0xc0016800, - 0x00000363, - 0x00000000, /* SQ_DYN_GPR_CNTL_PS_FLUSH_REQ */ - - 0xc0016800, - 0x0000060c, - 0x82000000, /* DB_DEBUG */ - - 0xc0016800, - 0x0000060e, - 0x01020204, /* DB_WATERMARKS */ - - 0xc0026f00, - 0x00000000, - 0x00000000, /* SQ_VTX_BASE_VTX_LOC */ - 0x00000000, /* SQ_VTX_START_INST_LOC */ - - 0xc0096900, - 0x0000022a, - 0x00000000, /* SQ_ESGS_RING_ITEMSIZE */ - 0x00000000, - 0x00000000, - 0x00000000, - 0x00000000, - 0x00000000, - 0x00000000, - 0x00000000, - 0x00000000, - - 0xc0016900, - 0x00000004, - 0x00000000, /* DB_DEPTH_INFO */ - - 0xc0026900, - 0x0000000a, - 0x00000000, /* DB_STENCIL_CLEAR */ - 0x00000000, /* DB_DEPTH_CLEAR */ - - 0xc0016900, - 0x00000200, - 0x00000000, /* DB_DEPTH_CONTROL */ - - 0xc0026900, - 0x00000343, - 0x00000060, /* DB_RENDER_CONTROL */ - 0x00000040, /* DB_RENDER_OVERRIDE */ - - 0xc0016900, - 0x00000351, - 0x0000aa00, /* DB_ALPHA_TO_MASK */ - - 0xc00f6900, - 0x00000100, - 0x00000800, /* VGT_MAX_VTX_INDX */ - 0x00000000, /* VGT_MIN_VTX_INDX */ - 0x00000000, /* VGT_INDX_OFFSET */ - 0x00000000, /* VGT_MULTI_PRIM_IB_RESET_INDX */ - 0x00000000, /* SX_ALPHA_TEST_CONTROL */ - 0x00000000, /* CB_BLEND_RED */ - 0x00000000, - 0x00000000, - 0x00000000, - 0x00000000, /* CB_FOG_RED */ - 0x00000000, - 0x00000000, - 0x00000000, /* DB_STENCILREFMASK */ - 0x00000000, /* DB_STENCILREFMASK_BF */ - 0x00000000, /* SX_ALPHA_REF */ - - 0xc0046900, - 0x0000030c, - 0x01000000, /* CB_CLRCMP_CNTL */ - 0x00000000, - 0x00000000, - 0x00000000, - - 0xc0046900, - 0x00000048, - 0x3f800000, /* CB_CLEAR_RED */ - 0x00000000, - 0x3f800000, - 0x3f800000, - - 0xc0016900, - 0x00000080, - 0x00000000, /* PA_SC_WINDOW_OFFSET */ - - 0xc00a6900, - 0x00000083, - 0x0000ffff, /* PA_SC_CLIP_RECT_RULE */ - 0x00000000, /* PA_SC_CLIPRECT_0_TL */ - 0x20002000, - 0x00000000, - 0x20002000, - 0x00000000, - 0x20002000, - 0x00000000, - 0x20002000, - 0x00000000, /* PA_SC_EDGERULE */ - - 0xc0406900, - 0x00000094, - 0x80000000, /* PA_SC_VPORT_SCISSOR_0_TL */ - 0x20002000, /* PA_SC_VPORT_SCISSOR_0_BR */ - 0x80000000, /* PA_SC_VPORT_SCISSOR_1_TL */ - 0x20002000, - 0x80000000, - 0x20002000, - 0x80000000, - 0x20002000, - 0x80000000, - 0x20002000, - 0x80000000, - 0x20002000, - 0x80000000, - 0x20002000, - 0x80000000, - 0x20002000, - 0x80000000, - 0x20002000, - 0x80000000, - 0x20002000, - 0x80000000, - 0x20002000, - 0x80000000, - 0x20002000, - 0x80000000, - 0x20002000, - 0x80000000, - 0x20002000, - 0x80000000, - 0x20002000, - 0x80000000, - 0x20002000, - 0x00000000, /* PA_SC_VPORT_ZMIN_0 */ - 0x3f800000, - 0x00000000, - 0x3f800000, - 0x00000000, - 0x3f800000, - 0x00000000, - 0x3f800000, - 0x00000000, - 0x3f800000, - 0x00000000, - 0x3f800000, - 0x00000000, - 0x3f800000, - 0x00000000, - 0x3f800000, - 0x00000000, - 0x3f800000, - 0x00000000, - 0x3f800000, - 0x00000000, - 0x3f800000, - 0x00000000, - 0x3f800000, - 0x00000000, - 0x3f800000, - 0x00000000, - 0x3f800000, - 0x00000000, - 0x3f800000, - 0x00000000, - 0x3f800000, - - 0xc0026900, - 0x00000292, - 0x00000000, /* PA_SC_MPASS_PS_CNTL */ - 0x00004010, /* PA_SC_MODE_CNTL */ - - 0xc0096900, - 0x00000300, - 0x00000000, /* PA_SC_LINE_CNTL */ - 0x00000000, /* PA_SC_AA_CONFIG */ - 0x0000002d, /* PA_SU_VTX_CNTL */ - 0x3f800000, /* PA_CL_GB_VERT_CLIP_ADJ */ - 0x3f800000, - 0x3f800000, - 0x3f800000, - 0x00000000, /* PA_SC_SAMPLE_LOCS_MCTX */ - 0x00000000, - - 0xc0016900, - 0x00000312, - 0xffffffff, /* PA_SC_AA_MASK */ - - 0xc0066900, - 0x0000037e, - 0x00000000, /* PA_SU_POLY_OFFSET_DB_FMT_CNTL */ - 0x00000000, /* PA_SU_POLY_OFFSET_CLAMP */ - 0x00000000, /* PA_SU_POLY_OFFSET_FRONT_SCALE */ - 0x00000000, /* PA_SU_POLY_OFFSET_FRONT_OFFSET */ - 0x00000000, /* PA_SU_POLY_OFFSET_BACK_SCALE */ - 0x00000000, /* PA_SU_POLY_OFFSET_BACK_OFFSET */ - - 0xc0046900, - 0x000001b6, - 0x00000000, /* SPI_INPUT_Z */ - 0x00000000, /* SPI_FOG_CNTL */ - 0x00000000, /* SPI_FOG_FUNC_SCALE */ - 0x00000000, /* SPI_FOG_FUNC_BIAS */ - - 0xc0016900, - 0x00000225, - 0x00000000, /* SQ_PGM_START_FS */ - - 0xc0016900, - 0x00000229, - 0x00000000, /* SQ_PGM_RESOURCES_FS */ - - 0xc0016900, - 0x00000237, - 0x00000000, /* SQ_PGM_CF_OFFSET_FS */ - - 0xc0026900, - 0x000002a8, - 0x00000000, /* VGT_INSTANCE_STEP_RATE_0 */ - 0x00000000, /* VGT_INSTANCE_STEP_RATE_1 */ - - 0xc0116900, - 0x00000280, - 0x00000000, /* PA_SU_POINT_SIZE */ - 0x00000000, /* PA_SU_POINT_MINMAX */ - 0x00000008, /* PA_SU_LINE_CNTL */ - 0x00000000, /* PA_SC_LINE_STIPPLE */ - 0x00000000, /* VGT_OUTPUT_PATH_CNTL */ - 0x00000000, /* VGT_HOS_CNTL */ - 0x00000000, /* VGT_HOS_MAX_TESS_LEVEL */ - 0x00000000, /* VGT_HOS_MIN_TESS_LEVEL */ - 0x00000000, /* VGT_HOS_REUSE_DEPTH */ - 0x00000000, /* VGT_GROUP_PRIM_TYPE */ - 0x00000000, /* VGT_GROUP_FIRST_DECR */ - 0x00000000, /* VGT_GROUP_DECR */ - 0x00000000, /* VGT_GROUP_VECT_0_CNTL */ - 0x00000000, /* VGT_GROUP_VECT_1_CNTL */ - 0x00000000, /* VGT_GROUP_VECT_0_FMT_CNTL */ - 0x00000000, /* VGT_GROUP_VECT_1_FMT_CNTL */ - 0x00000000, /* VGT_GS_MODE */ - - 0xc0016900, - 0x000002a1, - 0x00000000, /* VGT_PRIMITIVEID_EN */ - - 0xc0016900, - 0x000002a5, - 0x00000000, /* VGT_MULTI_PRIM_ID_RESET_EN */ - - 0xc0036900, - 0x000002ac, - 0x00000000, /* VGT_STRMOUT_EN */ - 0x00000000, /* VGT_REUSE_OFF */ - 0x00000000, /* VGT_VTX_CNT_EN */ - - 0xc0016900, - 0x000000d4, - 0x00000000, /* SX_MISC */ - - 0xc0016900, - 0x000002c8, - 0x00000000, /* VGT_STRMOUT_BUFFER_EN */ - - 0xc0076900, - 0x00000202, - 0x00cc0000, /* CB_COLOR_CONTROL */ - 0x00000210, /* DB_SHADER_CNTL */ - 0x00010000, /* PA_CL_CLIP_CNTL */ - 0x00000244, /* PA_SU_SC_MODE_CNTL */ - 0x00000100, /* PA_CL_VTE_CNTL */ - 0x00000000, /* PA_CL_VS_OUT_CNTL */ - 0x00000000, /* PA_CL_NANINF_CNTL */ - - 0xc0026900, - 0x0000008e, - 0x0000000f, /* CB_TARGET_MASK */ - 0x0000000f, /* CB_SHADER_MASK */ - - 0xc0016900, - 0x000001e8, - 0x00000001, /* CB_SHADER_CONTROL */ - - 0xc0016900, - 0x00000185, - 0x00000000, /* SPI_VS_OUT_ID_0 */ - - 0xc0016900, - 0x00000191, - 0x00000b00, /* SPI_PS_INPUT_CNTL_0 */ - - 0xc0056900, - 0x000001b1, - 0x00000000, /* SPI_VS_OUT_CONFIG */ - 0x00000000, /* SPI_THREAD_GROUPING */ - 0x00000001, /* SPI_PS_IN_CONTROL_0 */ - 0x00000000, /* SPI_PS_IN_CONTROL_1 */ - 0x00000000, /* SPI_INTERP_CONTROL_0 */ - - 0xc0036e00, /* SET_SAMPLER */ - 0x00000000, - 0x00000012, - 0x00000000, - 0x00000000, -}; - -const u32 r7xx_default_state[] = -{ - 0xc0012800, /* CONTEXT_CONTROL */ - 0x80000000, - 0x80000000, - - 0xc0016800, - 0x00000010, - 0x00008000, /* WAIT_UNTIL */ - - 0xc0016800, - 0x00000542, - 0x07000002, /* TA_CNTL_AUX */ - - 0xc0016800, - 0x000005c5, - 0x00000000, /* VC_ENHANCE */ - - 0xc0016800, - 0x00000363, - 0x00004000, /* SQ_DYN_GPR_CNTL_PS_FLUSH_REQ */ - - 0xc0016800, - 0x0000060c, - 0x00000000, /* DB_DEBUG */ - - 0xc0016800, - 0x0000060e, - 0x00420204, /* DB_WATERMARKS */ - - 0xc0026f00, - 0x00000000, - 0x00000000, /* SQ_VTX_BASE_VTX_LOC */ - 0x00000000, /* SQ_VTX_START_INST_LOC */ - - 0xc0096900, - 0x0000022a, - 0x00000000, /* SQ_ESGS_RING_ITEMSIZE */ - 0x00000000, - 0x00000000, - 0x00000000, - 0x00000000, - 0x00000000, - 0x00000000, - 0x00000000, - 0x00000000, - - 0xc0016900, - 0x00000004, - 0x00000000, /* DB_DEPTH_INFO */ - - 0xc0026900, - 0x0000000a, - 0x00000000, /* DB_STENCIL_CLEAR */ - 0x00000000, /* DB_DEPTH_CLEAR */ - - 0xc0016900, - 0x00000200, - 0x00000000, /* DB_DEPTH_CONTROL */ - - 0xc0026900, - 0x00000343, - 0x00000060, /* DB_RENDER_CONTROL */ - 0x00000000, /* DB_RENDER_OVERRIDE */ - - 0xc0016900, - 0x00000351, - 0x0000aa00, /* DB_ALPHA_TO_MASK */ - - 0xc0096900, - 0x00000100, - 0x00000800, /* VGT_MAX_VTX_INDX */ - 0x00000000, /* VGT_MIN_VTX_INDX */ - 0x00000000, /* VGT_INDX_OFFSET */ - 0x00000000, /* VGT_MULTI_PRIM_IB_RESET_INDX */ - 0x00000000, /* SX_ALPHA_TEST_CONTROL */ - 0x00000000, /* CB_BLEND_RED */ - 0x00000000, - 0x00000000, - 0x00000000, - - 0xc0036900, - 0x0000010c, - 0x00000000, /* DB_STENCILREFMASK */ - 0x00000000, /* DB_STENCILREFMASK_BF */ - 0x00000000, /* SX_ALPHA_REF */ - - 0xc0046900, - 0x0000030c, /* CB_CLRCMP_CNTL */ - 0x01000000, - 0x00000000, - 0x00000000, - 0x00000000, - - 0xc0016900, - 0x00000080, - 0x00000000, /* PA_SC_WINDOW_OFFSET */ - - 0xc00a6900, - 0x00000083, - 0x0000ffff, /* PA_SC_CLIP_RECT_RULE */ - 0x00000000, /* PA_SC_CLIPRECT_0_TL */ - 0x20002000, - 0x00000000, - 0x20002000, - 0x00000000, - 0x20002000, - 0x00000000, - 0x20002000, - 0xaaaaaaaa, /* PA_SC_EDGERULE */ - - 0xc0406900, - 0x00000094, - 0x80000000, /* PA_SC_VPORT_SCISSOR_0_TL */ - 0x20002000, /* PA_SC_VPORT_SCISSOR_0_BR */ - 0x80000000, /* PA_SC_VPORT_SCISSOR_1_TL */ - 0x20002000, - 0x80000000, - 0x20002000, - 0x80000000, - 0x20002000, - 0x80000000, - 0x20002000, - 0x80000000, - 0x20002000, - 0x80000000, - 0x20002000, - 0x80000000, - 0x20002000, - 0x80000000, - 0x20002000, - 0x80000000, - 0x20002000, - 0x80000000, - 0x20002000, - 0x80000000, - 0x20002000, - 0x80000000, - 0x20002000, - 0x80000000, - 0x20002000, - 0x80000000, - 0x20002000, - 0x80000000, - 0x20002000, - 0x00000000, /* PA_SC_VPORT_ZMIN_0 */ - 0x3f800000, - 0x00000000, - 0x3f800000, - 0x00000000, - 0x3f800000, - 0x00000000, - 0x3f800000, - 0x00000000, - 0x3f800000, - 0x00000000, - 0x3f800000, - 0x00000000, - 0x3f800000, - 0x00000000, - 0x3f800000, - 0x00000000, - 0x3f800000, - 0x00000000, - 0x3f800000, - 0x00000000, - 0x3f800000, - 0x00000000, - 0x3f800000, - 0x00000000, - 0x3f800000, - 0x00000000, - 0x3f800000, - 0x00000000, - 0x3f800000, - 0x00000000, - 0x3f800000, - - 0xc0026900, - 0x00000292, - 0x00000000, /* PA_SC_MPASS_PS_CNTL */ - 0x00514000, /* PA_SC_MODE_CNTL */ - - 0xc0096900, - 0x00000300, - 0x00000000, /* PA_SC_LINE_CNTL */ - 0x00000000, /* PA_SC_AA_CONFIG */ - 0x0000002d, /* PA_SU_VTX_CNTL */ - 0x3f800000, /* PA_CL_GB_VERT_CLIP_ADJ */ - 0x3f800000, - 0x3f800000, - 0x3f800000, - 0x00000000, /* PA_SC_SAMPLE_LOCS_MCTX */ - 0x00000000, - - 0xc0016900, - 0x00000312, - 0xffffffff, /* PA_SC_AA_MASK */ - - 0xc0066900, - 0x0000037e, - 0x00000000, /* PA_SU_POLY_OFFSET_DB_FMT_CNTL */ - 0x00000000, /* PA_SU_POLY_OFFSET_CLAMP */ - 0x00000000, /* PA_SU_POLY_OFFSET_FRONT_SCALE */ - 0x00000000, /* PA_SU_POLY_OFFSET_FRONT_OFFSET */ - 0x00000000, /* PA_SU_POLY_OFFSET_BACK_SCALE */ - 0x00000000, /* PA_SU_POLY_OFFSET_BACK_OFFSET */ - - 0xc0046900, - 0x000001b6, - 0x00000000, /* SPI_INPUT_Z */ - 0x00000000, /* SPI_FOG_CNTL */ - 0x00000000, /* SPI_FOG_FUNC_SCALE */ - 0x00000000, /* SPI_FOG_FUNC_BIAS */ - - 0xc0016900, - 0x00000225, - 0x00000000, /* SQ_PGM_START_FS */ - - 0xc0016900, - 0x00000229, - 0x00000000, /* SQ_PGM_RESOURCES_FS */ - - 0xc0016900, - 0x00000237, - 0x00000000, /* SQ_PGM_CF_OFFSET_FS */ - - 0xc0026900, - 0x000002a8, - 0x00000000, /* VGT_INSTANCE_STEP_RATE_0 */ - 0x00000000, /* VGT_INSTANCE_STEP_RATE_1 */ - - 0xc0116900, - 0x00000280, - 0x00000000, /* PA_SU_POINT_SIZE */ - 0x00000000, /* PA_SU_POINT_MINMAX */ - 0x00000008, /* PA_SU_LINE_CNTL */ - 0x00000000, /* PA_SC_LINE_STIPPLE */ - 0x00000000, /* VGT_OUTPUT_PATH_CNTL */ - 0x00000000, /* VGT_HOS_CNTL */ - 0x00000000, /* VGT_HOS_MAX_TESS_LEVEL */ - 0x00000000, /* VGT_HOS_MIN_TESS_LEVEL */ - 0x00000000, /* VGT_HOS_REUSE_DEPTH */ - 0x00000000, /* VGT_GROUP_PRIM_TYPE */ - 0x00000000, /* VGT_GROUP_FIRST_DECR */ - 0x00000000, /* VGT_GROUP_DECR */ - 0x00000000, /* VGT_GROUP_VECT_0_CNTL */ - 0x00000000, /* VGT_GROUP_VECT_1_CNTL */ - 0x00000000, /* VGT_GROUP_VECT_0_FMT_CNTL */ - 0x00000000, /* VGT_GROUP_VECT_1_FMT_CNTL */ - 0x00000000, /* VGT_GS_MODE */ - - 0xc0016900, - 0x000002a1, - 0x00000000, /* VGT_PRIMITIVEID_EN */ - - 0xc0016900, - 0x000002a5, - 0x00000000, /* VGT_MULTI_PRIM_ID_RESET_EN */ - - 0xc0036900, - 0x000002ac, - 0x00000000, /* VGT_STRMOUT_EN */ - 0x00000000, /* VGT_REUSE_OFF */ - 0x00000000, /* VGT_VTX_CNT_EN */ - - 0xc0016900, - 0x000000d4, - 0x00000000, /* SX_MISC */ - - 0xc0016900, - 0x000002c8, - 0x00000000, /* VGT_STRMOUT_BUFFER_EN */ - - 0xc0076900, - 0x00000202, - 0x00cc0000, /* CB_COLOR_CONTROL */ - 0x00000210, /* DB_SHADER_CNTL */ - 0x00010000, /* PA_CL_CLIP_CNTL */ - 0x00000244, /* PA_SU_SC_MODE_CNTL */ - 0x00000100, /* PA_CL_VTE_CNTL */ - 0x00000000, /* PA_CL_VS_OUT_CNTL */ - 0x00000000, /* PA_CL_NANINF_CNTL */ - - 0xc0026900, - 0x0000008e, - 0x0000000f, /* CB_TARGET_MASK */ - 0x0000000f, /* CB_SHADER_MASK */ - - 0xc0016900, - 0x000001e8, - 0x00000001, /* CB_SHADER_CONTROL */ - - 0xc0016900, - 0x00000185, - 0x00000000, /* SPI_VS_OUT_ID_0 */ - - 0xc0016900, - 0x00000191, - 0x00000b00, /* SPI_PS_INPUT_CNTL_0 */ - - 0xc0056900, - 0x000001b1, - 0x00000000, /* SPI_VS_OUT_CONFIG */ - 0x00000001, /* SPI_THREAD_GROUPING */ - 0x00000001, /* SPI_PS_IN_CONTROL_0 */ - 0x00000000, /* SPI_PS_IN_CONTROL_1 */ - 0x00000000, /* SPI_INTERP_CONTROL_0 */ - - 0xc0036e00, /* SET_SAMPLER */ - 0x00000000, - 0x00000012, - 0x00000000, - 0x00000000, -}; - -/* same for r6xx/r7xx */ -const u32 r6xx_vs[] = -{ - 0x00000004, - 0x81000000, - 0x0000203c, - 0x94000b08, - 0x00004000, - 0x14200b1a, - 0x00000000, - 0x00000000, - 0x3c000000, - 0x68cd1000, -#ifdef __BIG_ENDIAN - 0x000a0000, -#else - 0x00080000, -#endif - 0x00000000, -}; - -const u32 r6xx_ps[] = -{ - 0x00000002, - 0x80800000, - 0x00000000, - 0x94200688, - 0x00000010, - 0x000d1000, - 0xb0800000, - 0x00000000, -}; - -const u32 r6xx_ps_size = ARRAY_SIZE(r6xx_ps); -const u32 r6xx_vs_size = ARRAY_SIZE(r6xx_vs); -const u32 r6xx_default_size = ARRAY_SIZE(r6xx_default_state); -const u32 r7xx_default_size = ARRAY_SIZE(r7xx_default_state); diff --git a/drivers/gpu/drm/radeon/r600_blit_shaders.h b/drivers/gpu/drm/radeon/r600_blit_shaders.h deleted file mode 100644 index f437d36dd98c..000000000000 --- a/drivers/gpu/drm/radeon/r600_blit_shaders.h +++ /dev/null @@ -1,38 +0,0 @@ -/* - * Copyright 2009 Advanced Micro Devices, Inc. - * Copyright 2009 Red Hat Inc. - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice (including the next - * paragraph) shall be included in all copies or substantial portions of the - * Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE COPYRIGHT HOLDER(S) AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, DAMAGES OR - * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, - * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER - * DEALINGS IN THE SOFTWARE. - * - */ - -#ifndef R600_BLIT_SHADERS_H -#define R600_BLIT_SHADERS_H - -extern const u32 r6xx_ps[]; -extern const u32 r6xx_vs[]; -extern const u32 r7xx_default_state[]; -extern const u32 r6xx_default_state[]; - - -extern const u32 r6xx_ps_size, r6xx_vs_size; -extern const u32 r6xx_default_size, r7xx_default_size; - -#endif diff --git a/drivers/gpu/drm/radeon/radeon_pm.c b/drivers/gpu/drm/radeon/radeon_pm.c index c67b6ddb29a4..e765abcb3b01 100644 --- a/drivers/gpu/drm/radeon/radeon_pm.c +++ b/drivers/gpu/drm/radeon/radeon_pm.c @@ -1629,7 +1629,7 @@ int radeon_pm_late_init(struct radeon_device *rdev) ret = device_create_file(rdev->dev, &dev_attr_power_method); if (ret) DRM_ERROR("failed to create device file for power method\n"); - if (!ret) + else rdev->pm.sysfs_initialized = true; } } diff --git a/drivers/gpu/drm/radeon/si_blit_shaders.c b/drivers/gpu/drm/radeon/si_blit_shaders.c deleted file mode 100644 index ec415e7dfa4b..000000000000 --- a/drivers/gpu/drm/radeon/si_blit_shaders.c +++ /dev/null @@ -1,253 +0,0 @@ -/* - * Copyright 2011 Advanced Micro Devices, Inc. - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice (including the next - * paragraph) shall be included in all copies or substantial portions of the - * Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE COPYRIGHT HOLDER(S) AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, DAMAGES OR - * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, - * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER - * DEALINGS IN THE SOFTWARE. - * - * Authors: - * Alex Deucher <alexander.deucher@amd.com> - */ - -#include <linux/types.h> -#include <linux/bug.h> -#include <linux/kernel.h> - -const u32 si_default_state[] = -{ - 0xc0066900, - 0x00000000, - 0x00000060, /* DB_RENDER_CONTROL */ - 0x00000000, /* DB_COUNT_CONTROL */ - 0x00000000, /* DB_DEPTH_VIEW */ - 0x0000002a, /* DB_RENDER_OVERRIDE */ - 0x00000000, /* DB_RENDER_OVERRIDE2 */ - 0x00000000, /* DB_HTILE_DATA_BASE */ - - 0xc0046900, - 0x00000008, - 0x00000000, /* DB_DEPTH_BOUNDS_MIN */ - 0x00000000, /* DB_DEPTH_BOUNDS_MAX */ - 0x00000000, /* DB_STENCIL_CLEAR */ - 0x00000000, /* DB_DEPTH_CLEAR */ - - 0xc0036900, - 0x0000000f, - 0x00000000, /* DB_DEPTH_INFO */ - 0x00000000, /* DB_Z_INFO */ - 0x00000000, /* DB_STENCIL_INFO */ - - 0xc0016900, - 0x00000080, - 0x00000000, /* PA_SC_WINDOW_OFFSET */ - - 0xc00d6900, - 0x00000083, - 0x0000ffff, /* PA_SC_CLIPRECT_RULE */ - 0x00000000, /* PA_SC_CLIPRECT_0_TL */ - 0x20002000, /* PA_SC_CLIPRECT_0_BR */ - 0x00000000, - 0x20002000, - 0x00000000, - 0x20002000, - 0x00000000, - 0x20002000, - 0xaaaaaaaa, /* PA_SC_EDGERULE */ - 0x00000000, /* PA_SU_HARDWARE_SCREEN_OFFSET */ - 0x0000000f, /* CB_TARGET_MASK */ - 0x0000000f, /* CB_SHADER_MASK */ - - 0xc0226900, - 0x00000094, - 0x80000000, /* PA_SC_VPORT_SCISSOR_0_TL */ - 0x20002000, /* PA_SC_VPORT_SCISSOR_0_BR */ - 0x80000000, - 0x20002000, - 0x80000000, - 0x20002000, - 0x80000000, - 0x20002000, - 0x80000000, - 0x20002000, - 0x80000000, - 0x20002000, - 0x80000000, - 0x20002000, - 0x80000000, - 0x20002000, - 0x80000000, - 0x20002000, - 0x80000000, - 0x20002000, - 0x80000000, - 0x20002000, - 0x80000000, - 0x20002000, - 0x80000000, - 0x20002000, - 0x80000000, - 0x20002000, - 0x80000000, - 0x20002000, - 0x80000000, - 0x20002000, - 0x00000000, /* PA_SC_VPORT_ZMIN_0 */ - 0x3f800000, /* PA_SC_VPORT_ZMAX_0 */ - - 0xc0026900, - 0x000000d9, - 0x00000000, /* CP_RINGID */ - 0x00000000, /* CP_VMID */ - - 0xc0046900, - 0x00000100, - 0xffffffff, /* VGT_MAX_VTX_INDX */ - 0x00000000, /* VGT_MIN_VTX_INDX */ - 0x00000000, /* VGT_INDX_OFFSET */ - 0x00000000, /* VGT_MULTI_PRIM_IB_RESET_INDX */ - - 0xc0046900, - 0x00000105, - 0x00000000, /* CB_BLEND_RED */ - 0x00000000, /* CB_BLEND_GREEN */ - 0x00000000, /* CB_BLEND_BLUE */ - 0x00000000, /* CB_BLEND_ALPHA */ - - 0xc0016900, - 0x000001e0, - 0x00000000, /* CB_BLEND0_CONTROL */ - - 0xc00e6900, - 0x00000200, - 0x00000000, /* DB_DEPTH_CONTROL */ - 0x00000000, /* DB_EQAA */ - 0x00cc0010, /* CB_COLOR_CONTROL */ - 0x00000210, /* DB_SHADER_CONTROL */ - 0x00010000, /* PA_CL_CLIP_CNTL */ - 0x00000004, /* PA_SU_SC_MODE_CNTL */ - 0x00000100, /* PA_CL_VTE_CNTL */ - 0x00000000, /* PA_CL_VS_OUT_CNTL */ - 0x00000000, /* PA_CL_NANINF_CNTL */ - 0x00000000, /* PA_SU_LINE_STIPPLE_CNTL */ - 0x00000000, /* PA_SU_LINE_STIPPLE_SCALE */ - 0x00000000, /* PA_SU_PRIM_FILTER_CNTL */ - 0x00000000, /* */ - 0x00000000, /* */ - - 0xc0116900, - 0x00000280, - 0x00000000, /* PA_SU_POINT_SIZE */ - 0x00000000, /* PA_SU_POINT_MINMAX */ - 0x00000008, /* PA_SU_LINE_CNTL */ - 0x00000000, /* PA_SC_LINE_STIPPLE */ - 0x00000000, /* VGT_OUTPUT_PATH_CNTL */ - 0x00000000, /* VGT_HOS_CNTL */ - 0x00000000, - 0x00000000, - 0x00000000, - 0x00000000, - 0x00000000, - 0x00000000, - 0x00000000, - 0x00000000, - 0x00000000, - 0x00000000, - 0x00000000, /* VGT_GS_MODE */ - - 0xc0026900, - 0x00000292, - 0x00000000, /* PA_SC_MODE_CNTL_0 */ - 0x00000000, /* PA_SC_MODE_CNTL_1 */ - - 0xc0016900, - 0x000002a1, - 0x00000000, /* VGT_PRIMITIVEID_EN */ - - 0xc0016900, - 0x000002a5, - 0x00000000, /* VGT_MULTI_PRIM_IB_RESET_EN */ - - 0xc0026900, - 0x000002a8, - 0x00000000, /* VGT_INSTANCE_STEP_RATE_0 */ - 0x00000000, - - 0xc0026900, - 0x000002ad, - 0x00000000, /* VGT_REUSE_OFF */ - 0x00000000, - - 0xc0016900, - 0x000002d5, - 0x00000000, /* VGT_SHADER_STAGES_EN */ - - 0xc0016900, - 0x000002dc, - 0x0000aa00, /* DB_ALPHA_TO_MASK */ - - 0xc0066900, - 0x000002de, - 0x00000000, /* PA_SU_POLY_OFFSET_DB_FMT_CNTL */ - 0x00000000, - 0x00000000, - 0x00000000, - 0x00000000, - 0x00000000, - - 0xc0026900, - 0x000002e5, - 0x00000000, /* VGT_STRMOUT_CONFIG */ - 0x00000000, - - 0xc01b6900, - 0x000002f5, - 0x76543210, /* PA_SC_CENTROID_PRIORITY_0 */ - 0xfedcba98, /* PA_SC_CENTROID_PRIORITY_1 */ - 0x00000000, /* PA_SC_LINE_CNTL */ - 0x00000000, /* PA_SC_AA_CONFIG */ - 0x00000005, /* PA_SU_VTX_CNTL */ - 0x3f800000, /* PA_CL_GB_VERT_CLIP_ADJ */ - 0x3f800000, /* PA_CL_GB_VERT_DISC_ADJ */ - 0x3f800000, /* PA_CL_GB_HORZ_CLIP_ADJ */ - 0x3f800000, /* PA_CL_GB_HORZ_DISC_ADJ */ - 0x00000000, /* PA_SC_AA_SAMPLE_LOCS_PIXEL_X0Y0_0 */ - 0x00000000, - 0x00000000, - 0x00000000, - 0x00000000, - 0x00000000, - 0x00000000, - 0x00000000, - 0x00000000, - 0x00000000, - 0x00000000, - 0x00000000, - 0x00000000, - 0x00000000, - 0x00000000, - 0x00000000, - 0xffffffff, /* PA_SC_AA_MASK_X0Y0_X1Y0 */ - 0xffffffff, - - 0xc0026900, - 0x00000316, - 0x0000000e, /* VGT_VERTEX_REUSE_BLOCK_CNTL */ - 0x00000010, /* */ -}; - -const u32 si_default_size = ARRAY_SIZE(si_default_state); diff --git a/drivers/gpu/drm/radeon/si_blit_shaders.h b/drivers/gpu/drm/radeon/si_blit_shaders.h index c739e51e3961..829a2b6228b7 100644 --- a/drivers/gpu/drm/radeon/si_blit_shaders.h +++ b/drivers/gpu/drm/radeon/si_blit_shaders.h @@ -25,8 +25,227 @@ #ifndef SI_BLIT_SHADERS_H #define SI_BLIT_SHADERS_H -extern const u32 si_default_state[]; +static const u32 si_default_state[] = { + 0xc0066900, + 0x00000000, + 0x00000060, /* DB_RENDER_CONTROL */ + 0x00000000, /* DB_COUNT_CONTROL */ + 0x00000000, /* DB_DEPTH_VIEW */ + 0x0000002a, /* DB_RENDER_OVERRIDE */ + 0x00000000, /* DB_RENDER_OVERRIDE2 */ + 0x00000000, /* DB_HTILE_DATA_BASE */ -extern const u32 si_default_size; + 0xc0046900, + 0x00000008, + 0x00000000, /* DB_DEPTH_BOUNDS_MIN */ + 0x00000000, /* DB_DEPTH_BOUNDS_MAX */ + 0x00000000, /* DB_STENCIL_CLEAR */ + 0x00000000, /* DB_DEPTH_CLEAR */ + + 0xc0036900, + 0x0000000f, + 0x00000000, /* DB_DEPTH_INFO */ + 0x00000000, /* DB_Z_INFO */ + 0x00000000, /* DB_STENCIL_INFO */ + + 0xc0016900, + 0x00000080, + 0x00000000, /* PA_SC_WINDOW_OFFSET */ + + 0xc00d6900, + 0x00000083, + 0x0000ffff, /* PA_SC_CLIPRECT_RULE */ + 0x00000000, /* PA_SC_CLIPRECT_0_TL */ + 0x20002000, /* PA_SC_CLIPRECT_0_BR */ + 0x00000000, + 0x20002000, + 0x00000000, + 0x20002000, + 0x00000000, + 0x20002000, + 0xaaaaaaaa, /* PA_SC_EDGERULE */ + 0x00000000, /* PA_SU_HARDWARE_SCREEN_OFFSET */ + 0x0000000f, /* CB_TARGET_MASK */ + 0x0000000f, /* CB_SHADER_MASK */ + + 0xc0226900, + 0x00000094, + 0x80000000, /* PA_SC_VPORT_SCISSOR_0_TL */ + 0x20002000, /* PA_SC_VPORT_SCISSOR_0_BR */ + 0x80000000, + 0x20002000, + 0x80000000, + 0x20002000, + 0x80000000, + 0x20002000, + 0x80000000, + 0x20002000, + 0x80000000, + 0x20002000, + 0x80000000, + 0x20002000, + 0x80000000, + 0x20002000, + 0x80000000, + 0x20002000, + 0x80000000, + 0x20002000, + 0x80000000, + 0x20002000, + 0x80000000, + 0x20002000, + 0x80000000, + 0x20002000, + 0x80000000, + 0x20002000, + 0x80000000, + 0x20002000, + 0x80000000, + 0x20002000, + 0x00000000, /* PA_SC_VPORT_ZMIN_0 */ + 0x3f800000, /* PA_SC_VPORT_ZMAX_0 */ + + 0xc0026900, + 0x000000d9, + 0x00000000, /* CP_RINGID */ + 0x00000000, /* CP_VMID */ + + 0xc0046900, + 0x00000100, + 0xffffffff, /* VGT_MAX_VTX_INDX */ + 0x00000000, /* VGT_MIN_VTX_INDX */ + 0x00000000, /* VGT_INDX_OFFSET */ + 0x00000000, /* VGT_MULTI_PRIM_IB_RESET_INDX */ + + 0xc0046900, + 0x00000105, + 0x00000000, /* CB_BLEND_RED */ + 0x00000000, /* CB_BLEND_GREEN */ + 0x00000000, /* CB_BLEND_BLUE */ + 0x00000000, /* CB_BLEND_ALPHA */ + + 0xc0016900, + 0x000001e0, + 0x00000000, /* CB_BLEND0_CONTROL */ + + 0xc00e6900, + 0x00000200, + 0x00000000, /* DB_DEPTH_CONTROL */ + 0x00000000, /* DB_EQAA */ + 0x00cc0010, /* CB_COLOR_CONTROL */ + 0x00000210, /* DB_SHADER_CONTROL */ + 0x00010000, /* PA_CL_CLIP_CNTL */ + 0x00000004, /* PA_SU_SC_MODE_CNTL */ + 0x00000100, /* PA_CL_VTE_CNTL */ + 0x00000000, /* PA_CL_VS_OUT_CNTL */ + 0x00000000, /* PA_CL_NANINF_CNTL */ + 0x00000000, /* PA_SU_LINE_STIPPLE_CNTL */ + 0x00000000, /* PA_SU_LINE_STIPPLE_SCALE */ + 0x00000000, /* PA_SU_PRIM_FILTER_CNTL */ + 0x00000000, /* */ + 0x00000000, /* */ + + 0xc0116900, + 0x00000280, + 0x00000000, /* PA_SU_POINT_SIZE */ + 0x00000000, /* PA_SU_POINT_MINMAX */ + 0x00000008, /* PA_SU_LINE_CNTL */ + 0x00000000, /* PA_SC_LINE_STIPPLE */ + 0x00000000, /* VGT_OUTPUT_PATH_CNTL */ + 0x00000000, /* VGT_HOS_CNTL */ + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, /* VGT_GS_MODE */ + + 0xc0026900, + 0x00000292, + 0x00000000, /* PA_SC_MODE_CNTL_0 */ + 0x00000000, /* PA_SC_MODE_CNTL_1 */ + + 0xc0016900, + 0x000002a1, + 0x00000000, /* VGT_PRIMITIVEID_EN */ + + 0xc0016900, + 0x000002a5, + 0x00000000, /* VGT_MULTI_PRIM_IB_RESET_EN */ + + 0xc0026900, + 0x000002a8, + 0x00000000, /* VGT_INSTANCE_STEP_RATE_0 */ + 0x00000000, + + 0xc0026900, + 0x000002ad, + 0x00000000, /* VGT_REUSE_OFF */ + 0x00000000, + + 0xc0016900, + 0x000002d5, + 0x00000000, /* VGT_SHADER_STAGES_EN */ + + 0xc0016900, + 0x000002dc, + 0x0000aa00, /* DB_ALPHA_TO_MASK */ + + 0xc0066900, + 0x000002de, + 0x00000000, /* PA_SU_POLY_OFFSET_DB_FMT_CNTL */ + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + + 0xc0026900, + 0x000002e5, + 0x00000000, /* VGT_STRMOUT_CONFIG */ + 0x00000000, + + 0xc01b6900, + 0x000002f5, + 0x76543210, /* PA_SC_CENTROID_PRIORITY_0 */ + 0xfedcba98, /* PA_SC_CENTROID_PRIORITY_1 */ + 0x00000000, /* PA_SC_LINE_CNTL */ + 0x00000000, /* PA_SC_AA_CONFIG */ + 0x00000005, /* PA_SU_VTX_CNTL */ + 0x3f800000, /* PA_CL_GB_VERT_CLIP_ADJ */ + 0x3f800000, /* PA_CL_GB_VERT_DISC_ADJ */ + 0x3f800000, /* PA_CL_GB_HORZ_CLIP_ADJ */ + 0x3f800000, /* PA_CL_GB_HORZ_DISC_ADJ */ + 0x00000000, /* PA_SC_AA_SAMPLE_LOCS_PIXEL_X0Y0_0 */ + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0x00000000, + 0xffffffff, /* PA_SC_AA_MASK_X0Y0_X1Y0 */ + 0xffffffff, + + 0xc0026900, + 0x00000316, + 0x0000000e, /* VGT_VERTEX_REUSE_BLOCK_CNTL */ + 0x00000010, /* */ +}; + +static const u32 si_default_size = ARRAY_SIZE(si_default_state); #endif diff --git a/include/drm/dp/drm_dp_helper.h b/include/drm/dp/drm_dp_helper.h index 91af98e6617c..c2eb119e1f4a 100644 --- a/include/drm/dp/drm_dp_helper.h +++ b/include/drm/dp/drm_dp_helper.h @@ -361,6 +361,7 @@ struct drm_panel; # define DP_PSR_IS_SUPPORTED 1 # define DP_PSR2_IS_SUPPORTED 2 /* eDP 1.4 */ # define DP_PSR2_WITH_Y_COORD_IS_SUPPORTED 3 /* eDP 1.4a */ +# define DP_PSR2_WITH_Y_COORD_ET_SUPPORTED 4 /* eDP 1.5, adopted eDP 1.4b SCR */ #define DP_PSR_CAPS 0x071 /* XXX 1.2? */ # define DP_PSR_NO_TRAIN_ON_EXIT 1 @@ -375,6 +376,7 @@ struct drm_panel; # define DP_PSR_SETUP_TIME_SHIFT 1 # define DP_PSR2_SU_Y_COORDINATE_REQUIRED (1 << 4) /* eDP 1.4a */ # define DP_PSR2_SU_GRANULARITY_REQUIRED (1 << 5) /* eDP 1.4b */ +# define DP_PSR2_SU_AUX_FRAME_SYNC_NOT_NEEDED (1 << 6)/* eDP 1.5, adopted eDP 1.4b SCR */ #define DP_PSR2_SU_X_GRANULARITY 0x072 /* eDP 1.4b */ #define DP_PSR2_SU_Y_GRANULARITY 0x074 /* eDP 1.4b */ |