diff options
Diffstat (limited to 'drivers/gpu/drm/amd/amdgpu')
98 files changed, 2537 insertions, 1762 deletions
diff --git a/drivers/gpu/drm/amd/amdgpu/Makefile b/drivers/gpu/drm/amd/amdgpu/Makefile index 40e2c6e2df79..b525f9be9326 100644 --- a/drivers/gpu/drm/amd/amdgpu/Makefile +++ b/drivers/gpu/drm/amd/amdgpu/Makefile @@ -49,7 +49,7 @@ amdgpu-y += amdgpu_device.o amdgpu_kms.o \ amdgpu_cs.o amdgpu_bios.o amdgpu_benchmark.o \ atombios_dp.o amdgpu_afmt.o amdgpu_trace_points.o \ atombios_encoders.o amdgpu_sa.o atombios_i2c.o \ - amdgpu_dma_buf.o amdgpu_vm.o amdgpu_ib.o amdgpu_pll.o \ + amdgpu_dma_buf.o amdgpu_vm.o amdgpu_vm_pt.o amdgpu_ib.o amdgpu_pll.o \ amdgpu_ucode.o amdgpu_bo_list.o amdgpu_ctx.o amdgpu_sync.o \ amdgpu_gtt_mgr.o amdgpu_preempt_mgr.o amdgpu_vram_mgr.o amdgpu_virt.o \ amdgpu_atomfirmware.o amdgpu_vf_error.o amdgpu_sched.o \ @@ -58,7 +58,7 @@ amdgpu-y += amdgpu_device.o amdgpu_kms.o \ amdgpu_vm_sdma.o amdgpu_discovery.o amdgpu_ras_eeprom.o amdgpu_nbio.o \ amdgpu_umc.o smu_v11_0_i2c.o amdgpu_fru_eeprom.o amdgpu_rap.o \ amdgpu_fw_attestation.o amdgpu_securedisplay.o \ - amdgpu_eeprom.o amdgpu_mca.o + amdgpu_eeprom.o amdgpu_mca.o amdgpu_psp_ta.o amdgpu-$(CONFIG_PROC_FS) += amdgpu_fdinfo.o diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h index cdf0818088b3..bffd24845765 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h @@ -179,7 +179,7 @@ extern int amdgpu_sched_jobs; extern int amdgpu_sched_hw_submission; extern uint amdgpu_pcie_gen_cap; extern uint amdgpu_pcie_lane_cap; -extern uint amdgpu_cg_mask; +extern u64 amdgpu_cg_mask; extern uint amdgpu_pg_mask; extern uint amdgpu_sdma_phase_quantum; extern char *amdgpu_disable_cu; @@ -322,7 +322,7 @@ int amdgpu_device_ip_set_powergating_state(void *dev, enum amd_ip_block_type block_type, enum amd_powergating_state state); void amdgpu_device_ip_get_clockgating_state(struct amdgpu_device *adev, - u32 *flags); + u64 *flags); int amdgpu_device_ip_wait_for_idle(struct amdgpu_device *adev, enum amd_ip_block_type block_type); bool amdgpu_device_ip_is_idle(struct amdgpu_device *adev, @@ -860,7 +860,7 @@ struct amdgpu_device { /* powerplay */ struct amd_powerplay powerplay; struct amdgpu_pm pm; - u32 cg_flags; + u64 cg_flags; u32 pg_flags; /* nbio */ diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c index 6ca1db3c243f..64c6664b34e8 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c @@ -724,3 +724,11 @@ void amdgpu_amdkfd_ras_poison_consumption_handler(struct amdgpu_device *adev, bo else if (reset) amdgpu_amdkfd_gpu_reset(adev); } + +bool amdgpu_amdkfd_ras_query_utcl2_poison_status(struct amdgpu_device *adev) +{ + if (adev->gfx.ras && adev->gfx.ras->query_utcl2_poison_status) + return adev->gfx.ras->query_utcl2_poison_status(adev); + else + return false; +} diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h index 4cb14c2fe53f..f8b9f27adcf5 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h @@ -273,9 +273,8 @@ int amdgpu_amdkfd_gpuvm_alloc_memory_of_gpu( int amdgpu_amdkfd_gpuvm_free_memory_of_gpu( struct amdgpu_device *adev, struct kgd_mem *mem, void *drm_priv, uint64_t *size); -int amdgpu_amdkfd_gpuvm_map_memory_to_gpu( - struct amdgpu_device *adev, struct kgd_mem *mem, void *drm_priv, - bool *table_freed); +int amdgpu_amdkfd_gpuvm_map_memory_to_gpu(struct amdgpu_device *adev, + struct kgd_mem *mem, void *drm_priv); int amdgpu_amdkfd_gpuvm_unmap_memory_from_gpu( struct amdgpu_device *adev, struct kgd_mem *mem, void *drm_priv); int amdgpu_amdkfd_gpuvm_sync_memory( @@ -301,6 +300,7 @@ void amdgpu_amdkfd_ras_poison_consumption_handler(struct amdgpu_device *adev, bool amdgpu_amdkfd_bo_mapped_to_dev(struct amdgpu_device *adev, struct kgd_mem *mem); void amdgpu_amdkfd_block_mmu_notifications(void *p); int amdgpu_amdkfd_criu_resume(void *p); +bool amdgpu_amdkfd_ras_query_utcl2_poison_status(struct amdgpu_device *adev); #if IS_ENABLED(CONFIG_HSA_AMD) void amdgpu_amdkfd_gpuvm_init_mem_limits(void); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c index 98b1736bb221..80b6b8e432fd 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c @@ -263,7 +263,7 @@ static int amdgpu_amdkfd_remove_eviction_fence(struct amdgpu_bo *bo, */ replacement = dma_fence_get_stub(); dma_resv_replace_fences(bo->tbo.base.resv, ef->base.context, - replacement); + replacement, DMA_RESV_USAGE_READ); dma_fence_put(replacement); return 0; } @@ -1058,8 +1058,7 @@ static void unmap_bo_from_gpuvm(struct kgd_mem *mem, static int update_gpuvm_pte(struct kgd_mem *mem, struct kfd_mem_attachment *entry, - struct amdgpu_sync *sync, - bool *table_freed) + struct amdgpu_sync *sync) { struct amdgpu_bo_va *bo_va = entry->bo_va; struct amdgpu_device *adev = entry->adev; @@ -1070,7 +1069,7 @@ static int update_gpuvm_pte(struct kgd_mem *mem, return ret; /* Update the page tables */ - ret = amdgpu_vm_bo_update(adev, bo_va, false, table_freed); + ret = amdgpu_vm_bo_update(adev, bo_va, false); if (ret) { pr_err("amdgpu_vm_bo_update failed\n"); return ret; @@ -1082,8 +1081,7 @@ static int update_gpuvm_pte(struct kgd_mem *mem, static int map_bo_to_gpuvm(struct kgd_mem *mem, struct kfd_mem_attachment *entry, struct amdgpu_sync *sync, - bool no_update_pte, - bool *table_freed) + bool no_update_pte) { int ret; @@ -1100,7 +1098,7 @@ static int map_bo_to_gpuvm(struct kgd_mem *mem, if (no_update_pte) return 0; - ret = update_gpuvm_pte(mem, entry, sync, table_freed); + ret = update_gpuvm_pte(mem, entry, sync); if (ret) { pr_err("update_gpuvm_pte() failed\n"); goto update_gpuvm_pte_failed; @@ -1710,7 +1708,7 @@ int amdgpu_amdkfd_gpuvm_free_memory_of_gpu( int amdgpu_amdkfd_gpuvm_map_memory_to_gpu( struct amdgpu_device *adev, struct kgd_mem *mem, - void *drm_priv, bool *table_freed) + void *drm_priv) { struct amdgpu_vm *avm = drm_priv_to_vm(drm_priv); int ret; @@ -1797,7 +1795,7 @@ int amdgpu_amdkfd_gpuvm_map_memory_to_gpu( entry->va, entry->va + bo_size, entry); ret = map_bo_to_gpuvm(mem, entry, ctx.sync, - is_invalid_userptr, table_freed); + is_invalid_userptr); if (ret) { pr_err("Failed to map bo to gpuvm\n"); goto out_unreserve; @@ -2265,7 +2263,7 @@ static int validate_invalid_user_pages(struct amdkfd_process_info *process_info) continue; kfd_mem_dmaunmap_attachment(mem, attachment); - ret = update_gpuvm_pte(mem, attachment, &sync, NULL); + ret = update_gpuvm_pte(mem, attachment, &sync); if (ret) { pr_err("%s: update PTE failed\n", __func__); /* make sure this gets validated again */ @@ -2447,6 +2445,8 @@ int amdgpu_amdkfd_gpuvm_restore_process_bos(void *info, struct dma_fence **ef) struct amdgpu_bo *bo = mem->bo; uint32_t domain = mem->domain; struct kfd_mem_attachment *attachment; + struct dma_resv_iter cursor; + struct dma_fence *fence; total_size += amdgpu_bo_size(bo); @@ -2461,17 +2461,20 @@ int amdgpu_amdkfd_gpuvm_restore_process_bos(void *info, struct dma_fence **ef) goto validate_map_fail; } } - ret = amdgpu_sync_fence(&sync_obj, bo->tbo.moving); - if (ret) { - pr_debug("Memory eviction: Sync BO fence failed. Try again\n"); - goto validate_map_fail; + dma_resv_for_each_fence(&cursor, bo->tbo.base.resv, + DMA_RESV_USAGE_KERNEL, fence) { + ret = amdgpu_sync_fence(&sync_obj, fence); + if (ret) { + pr_debug("Memory eviction: Sync BO fence failed. Try again\n"); + goto validate_map_fail; + } } list_for_each_entry(attachment, &mem->attachments, list) { if (!attachment->is_mapped) continue; kfd_mem_dmaunmap_attachment(mem, attachment); - ret = update_gpuvm_pte(mem, attachment, &sync_obj, NULL); + ret = update_gpuvm_pte(mem, attachment, &sync_obj); if (ret) { pr_debug("Memory eviction: update PTE failed. Try again\n"); goto validate_map_fail; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.h index 044b41f0bfd9..529d52a204cf 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.h @@ -34,7 +34,6 @@ struct amdgpu_fpriv; struct amdgpu_bo_list_entry { struct ttm_validate_buffer tv; struct amdgpu_bo_va *bo_va; - struct dma_fence_chain *chain; uint32_t priority; struct page **user_pages; bool user_invalidated; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c index e85e347eb670..71633a1fedfd 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c @@ -55,8 +55,8 @@ static int amdgpu_cs_user_fence_chunk(struct amdgpu_cs_parser *p, bo = amdgpu_bo_ref(gem_to_amdgpu_bo(gobj)); p->uf_entry.priority = 0; p->uf_entry.tv.bo = &bo->tbo; - /* One for TTM and one for the CS job */ - p->uf_entry.tv.num_shared = 2; + /* One for TTM and two for the CS job */ + p->uf_entry.tv.num_shared = 3; drm_gem_object_put(gobj); @@ -574,14 +574,6 @@ static int amdgpu_cs_parser_bos(struct amdgpu_cs_parser *p, struct amdgpu_bo *bo = ttm_to_amdgpu_bo(e->tv.bo); e->bo_va = amdgpu_vm_bo_find(vm, bo); - - if (bo->tbo.base.dma_buf && !amdgpu_bo_explicit_sync(bo)) { - e->chain = dma_fence_chain_alloc(); - if (!e->chain) { - r = -ENOMEM; - goto error_validate; - } - } } /* Move fence waiting after getting reservation lock of @@ -642,13 +634,8 @@ static int amdgpu_cs_parser_bos(struct amdgpu_cs_parser *p, } error_validate: - if (r) { - amdgpu_bo_list_for_each_entry(e, p->bo_list) { - dma_fence_chain_free(e->chain); - e->chain = NULL; - } + if (r) ttm_eu_backoff_reservation(&p->ticket, &p->validated); - } out: return r; } @@ -688,17 +675,9 @@ static void amdgpu_cs_parser_fini(struct amdgpu_cs_parser *parser, int error, { unsigned i; - if (error && backoff) { - struct amdgpu_bo_list_entry *e; - - amdgpu_bo_list_for_each_entry(e, parser->bo_list) { - dma_fence_chain_free(e->chain); - e->chain = NULL; - } - + if (error && backoff) ttm_eu_backoff_reservation(&parser->ticket, &parser->validated); - } for (i = 0; i < parser->num_post_deps; i++) { drm_syncobj_put(parser->post_deps[i].syncobj); @@ -806,22 +785,22 @@ static int amdgpu_cs_vm_handling(struct amdgpu_cs_parser *p) if (r) return r; - r = amdgpu_vm_bo_update(adev, fpriv->prt_va, false, NULL); + r = amdgpu_vm_bo_update(adev, fpriv->prt_va, false); if (r) return r; - r = amdgpu_sync_vm_fence(&p->job->sync, fpriv->prt_va->last_pt_update); + r = amdgpu_sync_fence(&p->job->sync, fpriv->prt_va->last_pt_update); if (r) return r; if (amdgpu_mcbp || amdgpu_sriov_vf(adev)) { bo_va = fpriv->csa_va; BUG_ON(!bo_va); - r = amdgpu_vm_bo_update(adev, bo_va, false, NULL); + r = amdgpu_vm_bo_update(adev, bo_va, false); if (r) return r; - r = amdgpu_sync_vm_fence(&p->job->sync, bo_va->last_pt_update); + r = amdgpu_sync_fence(&p->job->sync, bo_va->last_pt_update); if (r) return r; } @@ -836,11 +815,11 @@ static int amdgpu_cs_vm_handling(struct amdgpu_cs_parser *p) if (bo_va == NULL) continue; - r = amdgpu_vm_bo_update(adev, bo_va, false, NULL); + r = amdgpu_vm_bo_update(adev, bo_va, false); if (r) return r; - r = amdgpu_sync_vm_fence(&p->job->sync, bo_va->last_pt_update); + r = amdgpu_sync_fence(&p->job->sync, bo_va->last_pt_update); if (r) return r; } @@ -853,7 +832,7 @@ static int amdgpu_cs_vm_handling(struct amdgpu_cs_parser *p) if (r) return r; - r = amdgpu_sync_vm_fence(&p->job->sync, vm->last_update); + r = amdgpu_sync_fence(&p->job->sync, vm->last_update); if (r) return r; @@ -1272,29 +1251,9 @@ static int amdgpu_cs_submit(struct amdgpu_cs_parser *p, amdgpu_vm_move_to_lru_tail(p->adev, &fpriv->vm); - amdgpu_bo_list_for_each_entry(e, p->bo_list) { - struct dma_resv *resv = e->tv.bo->base.resv; - struct dma_fence_chain *chain = e->chain; - struct dma_resv_iter cursor; - struct dma_fence *fence; - - if (!chain) - continue; - - /* - * Temporary workaround dma_resv shortcommings by wrapping up - * the submission in a dma_fence_chain and add it as exclusive - * fence. - * - * TODO: Remove together with dma_resv rework. - */ - dma_resv_for_each_fence(&cursor, resv, false, fence) { - break; - } - dma_fence_chain_init(chain, fence, dma_fence_get(p->fence), 1); - rcu_assign_pointer(resv->fence_excl, &chain->base); - e->chain = NULL; - } + /* Make sure all BOs are remembered as writers */ + amdgpu_bo_list_for_each_entry(e, p->bo_list) + e->tv.num_shared = 0; ttm_eu_fence_buffer_objects(&p->ticket, &p->validated, p->fence); mutex_unlock(&p->adev->notifier_lock); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c index 5d04d24a0d5f..eedb12f6b8a3 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c @@ -38,6 +38,7 @@ #include "amdgpu_umr.h" #include "amdgpu_reset.h" +#include "amdgpu_psp_ta.h" #if defined(CONFIG_DEBUG_FS) @@ -730,7 +731,7 @@ static ssize_t amdgpu_debugfs_gca_config_read(struct file *f, char __user *buf, return -ENOMEM; /* version, increment each time something is added */ - config[no_regs++] = 4; + config[no_regs++] = 5; config[no_regs++] = adev->gfx.config.max_shader_engines; config[no_regs++] = adev->gfx.config.max_tile_pipes; config[no_regs++] = adev->gfx.config.max_cu_per_sh; @@ -757,8 +758,8 @@ static ssize_t amdgpu_debugfs_gca_config_read(struct file *f, char __user *buf, /* rev==1 */ config[no_regs++] = adev->rev_id; - config[no_regs++] = adev->pg_flags; - config[no_regs++] = adev->cg_flags; + config[no_regs++] = lower_32_bits(adev->pg_flags); + config[no_regs++] = lower_32_bits(adev->cg_flags); /* rev==2 */ config[no_regs++] = adev->family; @@ -773,6 +774,10 @@ static ssize_t amdgpu_debugfs_gca_config_read(struct file *f, char __user *buf, /* rev==4 APU flag */ config[no_regs++] = adev->flags & AMD_IS_APU ? 1 : 0; + /* rev==5 PG/CG flag upper 32bit */ + config[no_regs++] = upper_32_bits(adev->pg_flags); + config[no_regs++] = upper_32_bits(adev->cg_flags); + while (size && (*pos < no_regs * 4)) { uint32_t value; @@ -1763,6 +1768,7 @@ int amdgpu_debugfs_init(struct amdgpu_device *adev) DRM_ERROR("registering register debugfs failed (%d).\n", r); amdgpu_debugfs_firmware_init(adev); + amdgpu_ta_if_debugfs_init(adev); #if defined(CONFIG_DRM_AMD_DC) if (amdgpu_device_has_dc_support(adev)) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c index 49f734137f15..94666c2417c6 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c @@ -1703,7 +1703,7 @@ int amdgpu_device_ip_set_powergating_state(void *dev, * clockgating is enabled. */ void amdgpu_device_ip_get_clockgating_state(struct amdgpu_device *adev, - u32 *flags) + u64 *flags) { int i; @@ -3700,7 +3700,7 @@ int amdgpu_device_init(struct amdgpu_device *adev, /* enable PCIE atomic ops */ if (amdgpu_sriov_vf(adev)) adev->have_atomics_support = ((struct amd_sriov_msg_pf2vf_info *) - adev->virt.fw_reserve.p_pf2vf)->pcie_atomic_ops_enabled_flags == + adev->virt.fw_reserve.p_pf2vf)->pcie_atomic_ops_support_flags == (PCI_EXP_DEVCAP2_ATOMIC_COMP32 | PCI_EXP_DEVCAP2_ATOMIC_COMP64); else adev->have_atomics_support = diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_df.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_df.h index 6b25837955c4..1538b2dbfff1 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_df.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_df.h @@ -40,7 +40,7 @@ struct amdgpu_df_funcs { void (*update_medium_grain_clock_gating)(struct amdgpu_device *adev, bool enable); void (*get_clockgating_state)(struct amdgpu_device *adev, - u32 *flags); + u64 *flags); void (*enable_ecc_force_par_wr_rmw)(struct amdgpu_device *adev, bool enable); int (*pmc_start)(struct amdgpu_device *adev, uint64_t config, diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.c index e4fcbb385a62..aaf2fc6b1a82 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.c @@ -430,7 +430,7 @@ static void amdgpu_discovery_read_harvest_bit_per_ip(struct amdgpu_device *adev, } } next_ip: - ip_offset += sizeof(*ip) + 4 * (ip->num_base_address - 1); + ip_offset += struct_size(ip, base_address, ip->num_base_address); } } } @@ -798,7 +798,7 @@ static int amdgpu_discovery_sysfs_ips(struct amdgpu_device *adev, res = kobject_add(&ip_hw_instance->kobj, NULL, "%d", ip_hw_instance->num_instance); next_ip: - ip_offset += sizeof(*ip) + 4 * (ip->num_base_address - 1); + ip_offset += struct_size(ip, base_address, ip->num_base_address); } } @@ -1063,7 +1063,7 @@ int amdgpu_discovery_reg_base_init(struct amdgpu_device *adev) } next_ip: - ip_offset += sizeof(*ip) + 4 * (ip->num_base_address - 1); + ip_offset += struct_size(ip, base_address, ip->num_base_address); } } @@ -1113,7 +1113,7 @@ int amdgpu_discovery_get_ip_version(struct amdgpu_device *adev, int hw_id, int n *revision = ip->revision; return 0; } - ip_offset += sizeof(*ip) + 4 * (ip->num_base_address - 1); + ip_offset += struct_size(ip, base_address, ip->num_base_address); } } @@ -1150,13 +1150,6 @@ void amdgpu_discovery_harvest_ip(struct amdgpu_device *adev) adev->harvest_ip_mask |= AMD_HARVEST_IP_VCN_MASK; adev->harvest_ip_mask |= AMD_HARVEST_IP_JPEG_MASK; } - if ((adev->pdev->device == 0x731E && - (adev->pdev->revision == 0xC6 || adev->pdev->revision == 0xC7)) || - (adev->pdev->device == 0x7340 && adev->pdev->revision == 0xC9) || - (adev->pdev->device == 0x7360 && adev->pdev->revision == 0xC7)) { - adev->harvest_ip_mask |= AMD_HARVEST_IP_VCN_MASK; - adev->harvest_ip_mask |= AMD_HARVEST_IP_JPEG_MASK; - } } union gc_info { diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_display.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_display.c index fae5c1debfad..17c9bbe0cbc5 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_display.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_display.c @@ -41,6 +41,11 @@ #include <drm/drm_fourcc.h> #include <drm/drm_vblank.h> +static int amdgpu_display_framebuffer_init(struct drm_device *dev, + struct amdgpu_framebuffer *rfb, + const struct drm_mode_fb_cmd2 *mode_cmd, + struct drm_gem_object *obj); + static void amdgpu_display_flip_callback(struct dma_fence *f, struct dma_fence_cb *cb) { @@ -113,8 +118,9 @@ static void amdgpu_display_flip_work_func(struct work_struct *__work) spin_unlock_irqrestore(&crtc->dev->event_lock, flags); - DRM_DEBUG_DRIVER("crtc:%d[%p], pflip_stat:AMDGPU_FLIP_SUBMITTED, work: %p,\n", - amdgpu_crtc->crtc_id, amdgpu_crtc, work); + drm_dbg_vbl(adev_to_drm(adev), + "crtc:%d[%p], pflip_stat:AMDGPU_FLIP_SUBMITTED, work: %p,\n", + amdgpu_crtc->crtc_id, amdgpu_crtc, work); } @@ -200,8 +206,7 @@ int amdgpu_display_crtc_page_flip_target(struct drm_crtc *crtc, goto unpin; } - /* TODO: Unify this with other drivers */ - r = dma_resv_get_fences(new_abo->tbo.base.resv, true, + r = dma_resv_get_fences(new_abo->tbo.base.resv, DMA_RESV_USAGE_WRITE, &work->shared_count, &work->shared); if (unlikely(r != 0)) { @@ -1039,35 +1044,11 @@ static int amdgpu_display_get_fb_info(const struct amdgpu_framebuffer *amdgpu_fb return r; } -int amdgpu_display_gem_fb_init(struct drm_device *dev, - struct amdgpu_framebuffer *rfb, - const struct drm_mode_fb_cmd2 *mode_cmd, - struct drm_gem_object *obj) -{ - int ret; - - rfb->base.obj[0] = obj; - drm_helper_mode_fill_fb_struct(dev, &rfb->base, mode_cmd); - - ret = amdgpu_display_framebuffer_init(dev, rfb, mode_cmd, obj); - if (ret) - goto err; - - ret = drm_framebuffer_init(dev, &rfb->base, &amdgpu_fb_funcs); - if (ret) - goto err; - - return 0; -err: - drm_dbg_kms(dev, "Failed to init gem fb: %d\n", ret); - rfb->base.obj[0] = NULL; - return ret; -} - -int amdgpu_display_gem_fb_verify_and_init( - struct drm_device *dev, struct amdgpu_framebuffer *rfb, - struct drm_file *file_priv, const struct drm_mode_fb_cmd2 *mode_cmd, - struct drm_gem_object *obj) +static int amdgpu_display_gem_fb_verify_and_init(struct drm_device *dev, + struct amdgpu_framebuffer *rfb, + struct drm_file *file_priv, + const struct drm_mode_fb_cmd2 *mode_cmd, + struct drm_gem_object *obj) { int ret; @@ -1099,10 +1080,10 @@ err: return ret; } -int amdgpu_display_framebuffer_init(struct drm_device *dev, - struct amdgpu_framebuffer *rfb, - const struct drm_mode_fb_cmd2 *mode_cmd, - struct drm_gem_object *obj) +static int amdgpu_display_framebuffer_init(struct drm_device *dev, + struct amdgpu_framebuffer *rfb, + const struct drm_mode_fb_cmd2 *mode_cmd, + struct drm_gem_object *obj) { struct amdgpu_device *adev = drm_to_adev(dev); int ret, i; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_dma_buf.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_dma_buf.c index 579adfafe4d0..782cbca37538 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_dma_buf.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_dma_buf.c @@ -102,21 +102,9 @@ static int amdgpu_dma_buf_pin(struct dma_buf_attachment *attach) { struct drm_gem_object *obj = attach->dmabuf->priv; struct amdgpu_bo *bo = gem_to_amdgpu_bo(obj); - int r; /* pin buffer into GTT */ - r = amdgpu_bo_pin(bo, AMDGPU_GEM_DOMAIN_GTT); - if (r) - return r; - - if (bo->tbo.moving) { - r = dma_fence_wait(bo->tbo.moving, true); - if (r) { - amdgpu_bo_unpin(bo); - return r; - } - } - return 0; + return amdgpu_bo_pin(bo, AMDGPU_GEM_DOMAIN_GTT); } /** diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c index b03663f42cc9..ebd37fb19cdb 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c @@ -136,7 +136,7 @@ int amdgpu_sched_jobs = 32; int amdgpu_sched_hw_submission = 2; uint amdgpu_pcie_gen_cap; uint amdgpu_pcie_lane_cap; -uint amdgpu_cg_mask = 0xffffffff; +u64 amdgpu_cg_mask = 0xffffffffffffffff; uint amdgpu_pg_mask = 0xffffffff; uint amdgpu_sdma_phase_quantum = 32; char *amdgpu_disable_cu = NULL; @@ -454,12 +454,12 @@ MODULE_PARM_DESC(pcie_lane_cap, "PCIE Lane Caps (0: autodetect (default))"); module_param_named(pcie_lane_cap, amdgpu_pcie_lane_cap, uint, 0444); /** - * DOC: cg_mask (uint) + * DOC: cg_mask (ullong) * Override Clockgating features enabled on GPU (0 = disable clock gating). See the AMD_CG_SUPPORT flags in - * drivers/gpu/drm/amd/include/amd_shared.h. The default is 0xffffffff (all enabled). + * drivers/gpu/drm/amd/include/amd_shared.h. The default is 0xffffffffffffffff (all enabled). */ MODULE_PARM_DESC(cg_mask, "Clockgating flags mask (0 = disable clock gating)"); -module_param_named(cg_mask, amdgpu_cg_mask, uint, 0444); +module_param_named(cg_mask, amdgpu_cg_mask, ullong, 0444); /** * DOC: pg_mask (uint) @@ -2323,18 +2323,23 @@ static int amdgpu_pmops_suspend(struct device *dev) { struct drm_device *drm_dev = dev_get_drvdata(dev); struct amdgpu_device *adev = drm_to_adev(drm_dev); - int r; if (amdgpu_acpi_is_s0ix_active(adev)) adev->in_s0ix = true; else adev->in_s3 = true; - r = amdgpu_device_suspend(drm_dev, true); - if (r) - return r; + return amdgpu_device_suspend(drm_dev, true); +} + +static int amdgpu_pmops_suspend_noirq(struct device *dev) +{ + struct drm_device *drm_dev = dev_get_drvdata(dev); + struct amdgpu_device *adev = drm_to_adev(drm_dev); + if (!adev->in_s0ix) - r = amdgpu_asic_reset(adev); - return r; + return amdgpu_asic_reset(adev); + + return 0; } static int amdgpu_pmops_resume(struct device *dev) @@ -2390,6 +2395,71 @@ static int amdgpu_pmops_restore(struct device *dev) return amdgpu_device_resume(drm_dev, true); } +static int amdgpu_runtime_idle_check_display(struct device *dev) +{ + struct pci_dev *pdev = to_pci_dev(dev); + struct drm_device *drm_dev = pci_get_drvdata(pdev); + struct amdgpu_device *adev = drm_to_adev(drm_dev); + + if (adev->mode_info.num_crtc) { + struct drm_connector *list_connector; + struct drm_connector_list_iter iter; + int ret = 0; + + /* XXX: Return busy if any displays are connected to avoid + * possible display wakeups after runtime resume due to + * hotplug events in case any displays were connected while + * the GPU was in suspend. Remove this once that is fixed. + */ + mutex_lock(&drm_dev->mode_config.mutex); + drm_connector_list_iter_begin(drm_dev, &iter); + drm_for_each_connector_iter(list_connector, &iter) { + if (list_connector->status == connector_status_connected) { + ret = -EBUSY; + break; + } + } + drm_connector_list_iter_end(&iter); + mutex_unlock(&drm_dev->mode_config.mutex); + + if (ret) + return ret; + + if (amdgpu_device_has_dc_support(adev)) { + struct drm_crtc *crtc; + + drm_for_each_crtc(crtc, drm_dev) { + drm_modeset_lock(&crtc->mutex, NULL); + if (crtc->state->active) + ret = -EBUSY; + drm_modeset_unlock(&crtc->mutex); + if (ret < 0) + break; + } + } else { + mutex_lock(&drm_dev->mode_config.mutex); + drm_modeset_lock(&drm_dev->mode_config.connection_mutex, NULL); + + drm_connector_list_iter_begin(drm_dev, &iter); + drm_for_each_connector_iter(list_connector, &iter) { + if (list_connector->dpms == DRM_MODE_DPMS_ON) { + ret = -EBUSY; + break; + } + } + + drm_connector_list_iter_end(&iter); + + drm_modeset_unlock(&drm_dev->mode_config.connection_mutex); + mutex_unlock(&drm_dev->mode_config.mutex); + } + if (ret) + return ret; + } + + return 0; +} + static int amdgpu_pmops_runtime_suspend(struct device *dev) { struct pci_dev *pdev = to_pci_dev(dev); @@ -2402,6 +2472,10 @@ static int amdgpu_pmops_runtime_suspend(struct device *dev) return -EBUSY; } + ret = amdgpu_runtime_idle_check_display(dev); + if (ret) + return ret; + /* wait for all rings to drain before suspending */ for (i = 0; i < AMDGPU_MAX_RINGS; i++) { struct amdgpu_ring *ring = adev->rings[i]; @@ -2511,41 +2585,7 @@ static int amdgpu_pmops_runtime_idle(struct device *dev) return -EBUSY; } - if (amdgpu_device_has_dc_support(adev)) { - struct drm_crtc *crtc; - - drm_for_each_crtc(crtc, drm_dev) { - drm_modeset_lock(&crtc->mutex, NULL); - if (crtc->state->active) - ret = -EBUSY; - drm_modeset_unlock(&crtc->mutex); - if (ret < 0) - break; - } - - } else { - struct drm_connector *list_connector; - struct drm_connector_list_iter iter; - - mutex_lock(&drm_dev->mode_config.mutex); - drm_modeset_lock(&drm_dev->mode_config.connection_mutex, NULL); - - drm_connector_list_iter_begin(drm_dev, &iter); - drm_for_each_connector_iter(list_connector, &iter) { - if (list_connector->dpms == DRM_MODE_DPMS_ON) { - ret = -EBUSY; - break; - } - } - - drm_connector_list_iter_end(&iter); - - drm_modeset_unlock(&drm_dev->mode_config.connection_mutex); - mutex_unlock(&drm_dev->mode_config.mutex); - } - - if (ret == -EBUSY) - DRM_DEBUG_DRIVER("failing to power off - crtc active\n"); + ret = amdgpu_runtime_idle_check_display(dev); pm_runtime_mark_last_busy(dev); pm_runtime_autosuspend(dev); @@ -2575,6 +2615,7 @@ static const struct dev_pm_ops amdgpu_pm_ops = { .prepare = amdgpu_pmops_prepare, .complete = amdgpu_pmops_complete, .suspend = amdgpu_pmops_suspend, + .suspend_noirq = amdgpu_pmops_suspend_noirq, .resume = amdgpu_pmops_resume, .freeze = amdgpu_pmops_freeze, .thaw = amdgpu_pmops_thaw, diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c index 57b74d35052f..652571267077 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c @@ -526,7 +526,8 @@ int amdgpu_gem_wait_idle_ioctl(struct drm_device *dev, void *data, return -ENOENT; } robj = gem_to_amdgpu_bo(gobj); - ret = dma_resv_wait_timeout(robj->tbo.base.resv, true, true, timeout); + ret = dma_resv_wait_timeout(robj->tbo.base.resv, DMA_RESV_USAGE_READ, + true, timeout); /* ret == 0 means not signaled, * ret > 0 means signaled @@ -612,7 +613,7 @@ static void amdgpu_gem_va_update_vm(struct amdgpu_device *adev, if (operation == AMDGPU_VA_OP_MAP || operation == AMDGPU_VA_OP_REPLACE) { - r = amdgpu_vm_bo_update(adev, bo_va, false, NULL); + r = amdgpu_vm_bo_update(adev, bo_va, false); if (r) goto error; } diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h index dcb3c7871c73..5ed9b8a4c571 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h @@ -202,6 +202,7 @@ struct amdgpu_cu_info { struct amdgpu_gfx_ras { struct amdgpu_ras_block_object ras_block; void (*enable_watchdog_timer)(struct amdgpu_device *adev); + bool (*query_utcl2_poison_status)(struct amdgpu_device *adev); }; struct amdgpu_gfx_funcs { diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c index a66a0881a934..88b852b3a2cb 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c @@ -25,6 +25,9 @@ */ #include <linux/io-64-nonatomic-lo-hi.h> +#ifdef CONFIG_X86 +#include <asm/hypervisor.h> +#endif #include "amdgpu.h" #include "amdgpu_gmc.h" @@ -647,12 +650,14 @@ void amdgpu_gmc_get_vbios_allocations(struct amdgpu_device *adev) case CHIP_VEGA10: adev->mman.keep_stolen_vga_memory = true; /* - * VEGA10 SRIOV VF needs some firmware reserved area. + * VEGA10 SRIOV VF with MS_HYPERV host needs some firmware reserved area. */ - if (amdgpu_sriov_vf(adev)) { - adev->mman.stolen_reserved_offset = 0x100000; - adev->mman.stolen_reserved_size = 0x600000; +#ifdef CONFIG_X86 + if (amdgpu_sriov_vf(adev) && hypervisor_is_type(X86_HYPER_MS_HYPERV)) { + adev->mman.stolen_reserved_offset = 0x500000; + adev->mman.stolen_reserved_size = 0x200000; } +#endif break; case CHIP_RAVEN: case CHIP_RENOIR: diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_hdp.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_hdp.h index 9181c7bef7c6..ac5c61d3de2b 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_hdp.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_hdp.h @@ -33,7 +33,7 @@ struct amdgpu_hdp_funcs { void (*invalidate_hdp)(struct amdgpu_device *adev, struct amdgpu_ring *ring); void (*update_clock_gating)(struct amdgpu_device *adev, bool enable); - void (*get_clock_gating_state)(struct amdgpu_device *adev, u32 *flags); + void (*get_clock_gating_state)(struct amdgpu_device *adev, u64 *flags); void (*init_registers)(struct amdgpu_device *adev); }; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ids.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ids.c index 81207737c716..03d115d2b5ed 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ids.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ids.c @@ -111,7 +111,7 @@ void amdgpu_pasid_free_delayed(struct dma_resv *resv, struct dma_fence *fence; int r; - r = dma_resv_get_singleton(resv, true, &fence); + r = dma_resv_get_singleton(resv, DMA_RESV_USAGE_BOOKKEEP, &fence); if (r) goto fallback; @@ -139,7 +139,8 @@ fallback: /* Not enough memory for the delayed delete, as last resort * block for all the fences to complete. */ - dma_resv_wait_timeout(resv, true, false, MAX_SCHEDULE_TIMEOUT); + dma_resv_wait_timeout(resv, DMA_RESV_USAGE_BOOKKEEP, + false, MAX_SCHEDULE_TIMEOUT); amdgpu_pasid_free(pasid); } @@ -259,19 +260,15 @@ static int amdgpu_vmid_grab_reserved(struct amdgpu_vm *vm, struct amdgpu_device *adev = ring->adev; unsigned vmhub = ring->funcs->vmhub; uint64_t fence_context = adev->fence_context + ring->idx; - struct dma_fence *updates = sync->last_vm_update; bool needs_flush = vm->use_cpu_for_update; - int r = 0; + uint64_t updates = amdgpu_vm_tlb_seq(vm); + int r; *id = vm->reserved_vmid[vmhub]; - if (updates && (*id)->flushed_updates && - updates->context == (*id)->flushed_updates->context && - !dma_fence_is_later(updates, (*id)->flushed_updates)) - updates = NULL; - if ((*id)->owner != vm->immediate.fence_context || - job->vm_pd_addr != (*id)->pd_gpu_addr || - updates || !(*id)->last_flush || + (*id)->pd_gpu_addr != job->vm_pd_addr || + (*id)->flushed_updates < updates || + !(*id)->last_flush || ((*id)->last_flush->context != fence_context && !dma_fence_is_signaled((*id)->last_flush))) { struct dma_fence *tmp; @@ -285,8 +282,7 @@ static int amdgpu_vmid_grab_reserved(struct amdgpu_vm *vm, tmp = amdgpu_sync_peek_fence(&(*id)->active, ring); if (tmp) { *id = NULL; - r = amdgpu_sync_fence(sync, tmp); - return r; + return amdgpu_sync_fence(sync, tmp); } needs_flush = true; } @@ -298,10 +294,7 @@ static int amdgpu_vmid_grab_reserved(struct amdgpu_vm *vm, if (r) return r; - if (updates) { - dma_fence_put((*id)->flushed_updates); - (*id)->flushed_updates = dma_fence_get(updates); - } + (*id)->flushed_updates = updates; job->vm_needs_flush = needs_flush; return 0; } @@ -329,7 +322,7 @@ static int amdgpu_vmid_grab_used(struct amdgpu_vm *vm, unsigned vmhub = ring->funcs->vmhub; struct amdgpu_vmid_mgr *id_mgr = &adev->vm_manager.id_mgr[vmhub]; uint64_t fence_context = adev->fence_context + ring->idx; - struct dma_fence *updates = sync->last_vm_update; + uint64_t updates = amdgpu_vm_tlb_seq(vm); int r; job->vm_needs_flush = vm->use_cpu_for_update; @@ -337,7 +330,6 @@ static int amdgpu_vmid_grab_used(struct amdgpu_vm *vm, /* Check if we can use a VMID already assigned to this VM */ list_for_each_entry_reverse((*id), &id_mgr->ids_lru, list) { bool needs_flush = vm->use_cpu_for_update; - struct dma_fence *flushed; /* Check all the prerequisites to using this VMID */ if ((*id)->owner != vm->immediate.fence_context) @@ -351,8 +343,7 @@ static int amdgpu_vmid_grab_used(struct amdgpu_vm *vm, !dma_fence_is_signaled((*id)->last_flush))) needs_flush = true; - flushed = (*id)->flushed_updates; - if (updates && (!flushed || dma_fence_is_later(updates, flushed))) + if ((*id)->flushed_updates < updates) needs_flush = true; if (needs_flush && !adev->vm_manager.concurrent_flush) @@ -365,11 +356,7 @@ static int amdgpu_vmid_grab_used(struct amdgpu_vm *vm, if (r) return r; - if (updates && (!flushed || dma_fence_is_later(updates, flushed))) { - dma_fence_put((*id)->flushed_updates); - (*id)->flushed_updates = dma_fence_get(updates); - } - + (*id)->flushed_updates = updates; job->vm_needs_flush |= needs_flush; return 0; } @@ -415,8 +402,6 @@ int amdgpu_vmid_grab(struct amdgpu_vm *vm, struct amdgpu_ring *ring, goto error; if (!id) { - struct dma_fence *updates = sync->last_vm_update; - /* Still no ID to use? Then use the idle one found earlier */ id = idle; @@ -425,8 +410,7 @@ int amdgpu_vmid_grab(struct amdgpu_vm *vm, struct amdgpu_ring *ring, if (r) goto error; - dma_fence_put(id->flushed_updates); - id->flushed_updates = dma_fence_get(updates); + id->flushed_updates = amdgpu_vm_tlb_seq(vm); job->vm_needs_flush = true; } @@ -593,7 +577,6 @@ void amdgpu_vmid_mgr_fini(struct amdgpu_device *adev) struct amdgpu_vmid *id = &id_mgr->ids[j]; amdgpu_sync_free(&id->active); - dma_fence_put(id->flushed_updates); dma_fence_put(id->last_flush); dma_fence_put(id->pasid_mapping); } diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ids.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_ids.h index 0c3b4fa1f936..06c8a0034fa5 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ids.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ids.h @@ -47,7 +47,7 @@ struct amdgpu_vmid { uint64_t pd_gpu_addr; /* last flushed PD/PT update */ - struct dma_fence *flushed_updates; + uint64_t flushed_updates; uint32_t current_gpu_reset_count; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_jpeg.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_jpeg.h index 55fbff2be761..b6c7fb00e05a 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_jpeg.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_jpeg.h @@ -24,6 +24,8 @@ #ifndef __AMDGPU_JPEG_H__ #define __AMDGPU_JPEG_H__ +#include "amdgpu_ras.h" + #define AMDGPU_MAX_JPEG_INSTANCES 2 #define AMDGPU_JPEG_HARVEST_JPEG0 (1 << 0) @@ -39,6 +41,10 @@ struct amdgpu_jpeg_inst { struct amdgpu_jpeg_reg external; }; +struct amdgpu_jpeg_ras { + struct amdgpu_ras_block_object ras_block; +}; + struct amdgpu_jpeg { uint8_t num_jpeg_inst; struct amdgpu_jpeg_inst inst[AMDGPU_MAX_JPEG_INSTANCES]; @@ -48,6 +54,8 @@ struct amdgpu_jpeg { enum amd_powergating_state cur_state; struct mutex jpeg_pg_lock; atomic_t total_submission_cnt; + struct ras_common_if *ras_if; + struct amdgpu_jpeg_ras *ras; }; int amdgpu_jpeg_sw_init(struct amdgpu_device *adev); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_mmhub.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_mmhub.h index 9f1540f0ebf9..f939395c5914 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_mmhub.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_mmhub.h @@ -34,7 +34,7 @@ struct amdgpu_mmhub_funcs { void (*gart_disable)(struct amdgpu_device *adev); int (*set_clockgating)(struct amdgpu_device *adev, enum amd_clockgating_state state); - void (*get_clockgating)(struct amdgpu_device *adev, u32 *flags); + void (*get_clockgating)(struct amdgpu_device *adev, u64 *flags); void (*setup_vm_pt_regs)(struct amdgpu_device *adev, uint32_t vmid, uint64_t page_table_base); void (*update_power_gating)(struct amdgpu_device *adev, diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_mn.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_mn.c index 4b153daf283d..b86c0b8252a5 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_mn.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_mn.c @@ -75,8 +75,8 @@ static bool amdgpu_mn_invalidate_gfx(struct mmu_interval_notifier *mni, mmu_interval_set_seq(mni, cur_seq); - r = dma_resv_wait_timeout(bo->tbo.base.resv, true, false, - MAX_SCHEDULE_TIMEOUT); + r = dma_resv_wait_timeout(bo->tbo.base.resv, DMA_RESV_USAGE_BOOKKEEP, + false, MAX_SCHEDULE_TIMEOUT); mutex_unlock(&adev->notifier_lock); if (r <= 0) DRM_ERROR("(%ld) failed to wait for user bo\n", r); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_mode.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_mode.h index a546cb3cfa18..e8da738b309e 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_mode.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_mode.h @@ -592,19 +592,6 @@ int amdgpu_display_get_crtc_scanoutpos(struct drm_device *dev, int *hpos, ktime_t *stime, ktime_t *etime, const struct drm_display_mode *mode); -int amdgpu_display_gem_fb_init(struct drm_device *dev, - struct amdgpu_framebuffer *rfb, - const struct drm_mode_fb_cmd2 *mode_cmd, - struct drm_gem_object *obj); -int amdgpu_display_gem_fb_verify_and_init( - struct drm_device *dev, struct amdgpu_framebuffer *rfb, - struct drm_file *file_priv, const struct drm_mode_fb_cmd2 *mode_cmd, - struct drm_gem_object *obj); -int amdgpu_display_framebuffer_init(struct drm_device *dev, - struct amdgpu_framebuffer *rfb, - const struct drm_mode_fb_cmd2 *mode_cmd, - struct drm_gem_object *obj); - int amdgpufb_remove(struct drm_device *dev, struct drm_framebuffer *fb); void amdgpu_enc_destroy(struct drm_encoder *encoder); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_nbio.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_nbio.h index 3d13e601fc35..03439083182a 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_nbio.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_nbio.h @@ -83,7 +83,7 @@ struct amdgpu_nbio_funcs { void (*update_medium_grain_light_sleep)(struct amdgpu_device *adev, bool enable); void (*get_clockgating_state)(struct amdgpu_device *adev, - u32 *flags); + u64 *flags); void (*ih_control)(struct amdgpu_device *adev); void (*init_registers)(struct amdgpu_device *adev); void (*remap_hdp_registers)(struct amdgpu_device *adev); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c index dd6693fff280..5444515c1476 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c @@ -472,7 +472,7 @@ static bool amdgpu_bo_validate_size(struct amdgpu_device *adev, fail: DRM_DEBUG("BO size %lu > total memory in domain: %llu\n", size, - man->size << PAGE_SHIFT); + man->size); return false; } @@ -612,9 +612,8 @@ int amdgpu_bo_create(struct amdgpu_device *adev, if (unlikely(r)) goto fail_unreserve; - amdgpu_bo_fence(bo, fence, false); - dma_fence_put(bo->tbo.moving); - bo->tbo.moving = dma_fence_get(fence); + dma_resv_add_fence(bo->tbo.base.resv, fence, + DMA_RESV_USAGE_KERNEL); dma_fence_put(fence); } if (!bp->resv) @@ -761,6 +760,11 @@ int amdgpu_bo_kmap(struct amdgpu_bo *bo, void **ptr) if (bo->flags & AMDGPU_GEM_CREATE_NO_CPU_ACCESS) return -EPERM; + r = dma_resv_wait_timeout(bo->tbo.base.resv, DMA_RESV_USAGE_KERNEL, + false, MAX_SCHEDULE_TIMEOUT); + if (r < 0) + return r; + kptr = amdgpu_bo_kptr(bo); if (kptr) { if (ptr) @@ -768,11 +772,6 @@ int amdgpu_bo_kmap(struct amdgpu_bo *bo, void **ptr) return 0; } - r = dma_resv_wait_timeout(bo->tbo.base.resv, false, false, - MAX_SCHEDULE_TIMEOUT); - if (r < 0) - return r; - r = ttm_bo_kmap(&bo->tbo, 0, bo->tbo.resource->num_pages, &bo->kmap); if (r) return r; @@ -1399,10 +1398,8 @@ void amdgpu_bo_fence(struct amdgpu_bo *bo, struct dma_fence *fence, return; } - if (shared) - dma_resv_add_shared_fence(resv, fence); - else - dma_resv_add_excl_fence(resv, fence); + dma_resv_add_fence(resv, fence, shared ? DMA_RESV_USAGE_READ : + DMA_RESV_USAGE_WRITE); } /** diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c index a6acec1a6155..f6527aa19238 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c @@ -46,8 +46,6 @@ static int psp_sysfs_init(struct amdgpu_device *adev); static void psp_sysfs_fini(struct amdgpu_device *adev); static int psp_load_smu_fw(struct psp_context *psp); -static int psp_ta_unload(struct psp_context *psp, struct ta_context *context); -static int psp_ta_load(struct psp_context *psp, struct ta_context *context); static int psp_rap_terminate(struct psp_context *psp); static int psp_securedisplay_terminate(struct psp_context *psp); @@ -862,7 +860,7 @@ static void psp_prep_ta_unload_cmd_buf(struct psp_gfx_cmd_resp *cmd, cmd->cmd.cmd_unload_ta.session_id = session_id; } -static int psp_ta_unload(struct psp_context *psp, struct ta_context *context) +int psp_ta_unload(struct psp_context *psp, struct ta_context *context) { int ret; struct psp_gfx_cmd_resp *cmd = acquire_psp_cmd_buf(psp); @@ -944,7 +942,7 @@ static void psp_prep_ta_load_cmd_buf(struct psp_gfx_cmd_resp *cmd, cmd->cmd.cmd_load_ta.cmd_buf_len = context->mem_context.shared_mem_size; } -static int psp_ta_init_shared_buf(struct psp_context *psp, +int psp_ta_init_shared_buf(struct psp_context *psp, struct ta_mem_context *mem_ctx) { /* @@ -958,7 +956,7 @@ static int psp_ta_init_shared_buf(struct psp_context *psp, &mem_ctx->shared_buf); } -static void psp_ta_free_shared_buf(struct ta_mem_context *mem_ctx) +void psp_ta_free_shared_buf(struct ta_mem_context *mem_ctx) { amdgpu_bo_free_kernel(&mem_ctx->shared_bo, &mem_ctx->shared_mc_addr, &mem_ctx->shared_buf); @@ -969,6 +967,42 @@ static int psp_xgmi_init_shared_buf(struct psp_context *psp) return psp_ta_init_shared_buf(psp, &psp->xgmi_context.context.mem_context); } +static void psp_prep_ta_invoke_indirect_cmd_buf(struct psp_gfx_cmd_resp *cmd, + uint32_t ta_cmd_id, + struct ta_context *context) +{ + cmd->cmd_id = GFX_CMD_ID_INVOKE_CMD; + cmd->cmd.cmd_invoke_cmd.session_id = context->session_id; + cmd->cmd.cmd_invoke_cmd.ta_cmd_id = ta_cmd_id; + + cmd->cmd.cmd_invoke_cmd.buf.num_desc = 1; + cmd->cmd.cmd_invoke_cmd.buf.total_size = context->mem_context.shared_mem_size; + cmd->cmd.cmd_invoke_cmd.buf.buf_desc[0].buf_size = context->mem_context.shared_mem_size; + cmd->cmd.cmd_invoke_cmd.buf.buf_desc[0].buf_phy_addr_lo = + lower_32_bits(context->mem_context.shared_mc_addr); + cmd->cmd.cmd_invoke_cmd.buf.buf_desc[0].buf_phy_addr_hi = + upper_32_bits(context->mem_context.shared_mc_addr); +} + +int psp_ta_invoke_indirect(struct psp_context *psp, + uint32_t ta_cmd_id, + struct ta_context *context) +{ + int ret; + struct psp_gfx_cmd_resp *cmd = acquire_psp_cmd_buf(psp); + + psp_prep_ta_invoke_indirect_cmd_buf(cmd, ta_cmd_id, context); + + ret = psp_cmd_submit_buf(psp, NULL, cmd, + psp->fence_buf_mc_addr); + + context->resp_status = cmd->resp.status; + + release_psp_cmd_buf(psp); + + return ret; +} + static void psp_prep_ta_invoke_cmd_buf(struct psp_gfx_cmd_resp *cmd, uint32_t ta_cmd_id, uint32_t session_id) @@ -978,7 +1012,7 @@ static void psp_prep_ta_invoke_cmd_buf(struct psp_gfx_cmd_resp *cmd, cmd->cmd.cmd_invoke_cmd.ta_cmd_id = ta_cmd_id; } -static int psp_ta_invoke(struct psp_context *psp, +int psp_ta_invoke(struct psp_context *psp, uint32_t ta_cmd_id, struct ta_context *context) { @@ -990,12 +1024,14 @@ static int psp_ta_invoke(struct psp_context *psp, ret = psp_cmd_submit_buf(psp, NULL, cmd, psp->fence_buf_mc_addr); + context->resp_status = cmd->resp.status; + release_psp_cmd_buf(psp); return ret; } -static int psp_ta_load(struct psp_context *psp, struct ta_context *context) +int psp_ta_load(struct psp_context *psp, struct ta_context *context) { int ret; struct psp_gfx_cmd_resp *cmd; @@ -1010,6 +1046,8 @@ static int psp_ta_load(struct psp_context *psp, struct ta_context *context) ret = psp_cmd_submit_buf(psp, NULL, cmd, psp->fence_buf_mc_addr); + context->resp_status = cmd->resp.status; + if (!ret) { context->session_id = cmd->resp.session_id; } @@ -1415,7 +1453,7 @@ int psp_ras_enable_features(struct psp_context *psp, return 0; } -static int psp_ras_terminate(struct psp_context *psp) +int psp_ras_terminate(struct psp_context *psp) { int ret; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.h index ff7d533eb746..cf8d3199b35b 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.h @@ -48,6 +48,17 @@ enum psp_shared_mem_size { PSP_SECUREDISPLAY_SHARED_MEM_SIZE = 0x4000, }; +enum ta_type_id { + TA_TYPE_XGMI = 1, + TA_TYPE_RAS, + TA_TYPE_HDCP, + TA_TYPE_DTM, + TA_TYPE_RAP, + TA_TYPE_SECUREDISPLAY, + + TA_TYPE_MAX_INDEX, +}; + struct psp_context; struct psp_xgmi_node_info; struct psp_xgmi_topology_info; @@ -151,9 +162,11 @@ struct ta_mem_context { struct ta_context { bool initialized; uint32_t session_id; + uint32_t resp_status; struct ta_mem_context mem_context; struct psp_bin_desc bin_desc; enum psp_gfx_cmd_id ta_load_type; + enum ta_type_id ta_type; }; struct ta_cp_context { @@ -407,6 +420,18 @@ int psp_gpu_reset(struct amdgpu_device *adev); int psp_update_vcn_sram(struct amdgpu_device *adev, int inst_idx, uint64_t cmd_gpu_addr, int cmd_size); +int psp_ta_init_shared_buf(struct psp_context *psp, + struct ta_mem_context *mem_ctx); +void psp_ta_free_shared_buf(struct ta_mem_context *mem_ctx); +int psp_ta_unload(struct psp_context *psp, struct ta_context *context); +int psp_ta_load(struct psp_context *psp, struct ta_context *context); +int psp_ta_invoke(struct psp_context *psp, + uint32_t ta_cmd_id, + struct ta_context *context); +int psp_ta_invoke_indirect(struct psp_context *psp, + uint32_t ta_cmd_id, + struct ta_context *context); + int psp_xgmi_initialize(struct psp_context *psp, bool set_extended_data, bool load_ta); int psp_xgmi_terminate(struct psp_context *psp); int psp_xgmi_invoke(struct psp_context *psp, uint32_t ta_cmd_id); @@ -425,6 +450,7 @@ int psp_ras_enable_features(struct psp_context *psp, union ta_ras_cmd_input *info, bool enable); int psp_ras_trigger_error(struct psp_context *psp, struct ta_ras_trigger_error_input *info); +int psp_ras_terminate(struct psp_context *psp); int psp_hdcp_invoke(struct psp_context *psp, uint32_t ta_cmd_id); int psp_dtm_invoke(struct psp_context *psp, uint32_t ta_cmd_id); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp_ta.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp_ta.c new file mode 100644 index 000000000000..247a476e6354 --- /dev/null +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp_ta.c @@ -0,0 +1,308 @@ +/* + * Copyright 2022 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + */ + +#include "amdgpu.h" +#include "amdgpu_psp_ta.h" + +static const char *TA_IF_FS_NAME = "ta_if"; + +struct dentry *dir; +static struct dentry *ta_load_debugfs_dentry; +static struct dentry *ta_unload_debugfs_dentry; +static struct dentry *ta_invoke_debugfs_dentry; + +static ssize_t ta_if_load_debugfs_write(struct file *fp, const char *buf, + size_t len, loff_t *off); +static ssize_t ta_if_unload_debugfs_write(struct file *fp, const char *buf, + size_t len, loff_t *off); +static ssize_t ta_if_invoke_debugfs_write(struct file *fp, const char *buf, + size_t len, loff_t *off); + + +static uint32_t get_bin_version(const uint8_t *bin) +{ + const struct common_firmware_header *hdr = + (const struct common_firmware_header *)bin; + + return hdr->ucode_version; +} + +static void prep_ta_mem_context(struct psp_context *psp, + struct ta_context *context, + uint8_t *shared_buf, + uint32_t shared_buf_len) +{ + context->mem_context.shared_mem_size = PAGE_ALIGN(shared_buf_len); + psp_ta_init_shared_buf(psp, &context->mem_context); + + memcpy((void *)context->mem_context.shared_buf, shared_buf, shared_buf_len); +} + +static bool is_ta_type_valid(enum ta_type_id ta_type) +{ + bool ret = false; + + switch (ta_type) { + case TA_TYPE_RAS: + ret = true; + break; + default: + break; + } + + return ret; +} + +static const struct file_operations ta_load_debugfs_fops = { + .write = ta_if_load_debugfs_write, + .llseek = default_llseek, + .owner = THIS_MODULE +}; + +static const struct file_operations ta_unload_debugfs_fops = { + .write = ta_if_unload_debugfs_write, + .llseek = default_llseek, + .owner = THIS_MODULE +}; + +static const struct file_operations ta_invoke_debugfs_fops = { + .write = ta_if_invoke_debugfs_write, + .llseek = default_llseek, + .owner = THIS_MODULE +}; + + +/** + * DOC: AMDGPU TA debugfs interfaces + * + * Three debugfs interfaces can be opened by a program to + * load/invoke/unload TA, + * + * - /sys/kernel/debug/dri/<N>/ta_if/ta_load + * - /sys/kernel/debug/dri/<N>/ta_if/ta_invoke + * - /sys/kernel/debug/dri/<N>/ta_if/ta_unload + * + * How to use the interfaces in a program? + * + * A program needs to provide transmit buffer to the interfaces + * and will receive buffer from the interfaces below, + * + * - For TA load debugfs interface: + * Transmit buffer: + * - TA type (4bytes) + * - TA bin length (4bytes) + * - TA bin + * Receive buffer: + * - TA ID (4bytes) + * + * - For TA invoke debugfs interface: + * Transmit buffer: + * - TA ID (4bytes) + * - TA CMD ID (4bytes) + * - TA shard buf length (4bytes) + * - TA shared buf + * Receive buffer: + * - TA shared buf + * + * - For TA unload debugfs interface: + * Transmit buffer: + * - TA ID (4bytes) + */ + +static ssize_t ta_if_load_debugfs_write(struct file *fp, const char *buf, size_t len, loff_t *off) +{ + uint32_t ta_type = 0; + uint32_t ta_bin_len = 0; + uint8_t *ta_bin = NULL; + uint32_t copy_pos = 0; + int ret = 0; + + struct amdgpu_device *adev = (struct amdgpu_device *)file_inode(fp)->i_private; + struct psp_context *psp = &adev->psp; + struct ta_context context = {0}; + + if (!buf) + return -EINVAL; + + ret = copy_from_user((void *)&ta_type, &buf[copy_pos], sizeof(uint32_t)); + if (ret || (!is_ta_type_valid(ta_type))) + return -EINVAL; + + copy_pos += sizeof(uint32_t); + + ret = copy_from_user((void *)&ta_bin_len, &buf[copy_pos], sizeof(uint32_t)); + if (ret) + return -EINVAL; + + copy_pos += sizeof(uint32_t); + + ta_bin = kzalloc(ta_bin_len, GFP_KERNEL); + if (!ta_bin) + ret = -ENOMEM; + ret = copy_from_user((void *)ta_bin, &buf[copy_pos], ta_bin_len); + if (ret) + goto err_free_bin; + + ret = psp_ras_terminate(psp); + if (ret) { + dev_err(adev->dev, "Failed to unload embedded RAS TA\n"); + goto err_free_bin; + } + + context.ta_type = ta_type; + context.ta_load_type = GFX_CMD_ID_LOAD_TA; + context.bin_desc.fw_version = get_bin_version(ta_bin); + context.bin_desc.size_bytes = ta_bin_len; + context.bin_desc.start_addr = ta_bin; + + ret = psp_ta_load(psp, &context); + + if (ret || context.resp_status) { + dev_err(adev->dev, "TA load via debugfs failed (%d) status %d\n", + ret, context.resp_status); + goto err_free_bin; + } + + context.initialized = true; + ret = copy_to_user((char *)buf, (void *)&context.session_id, sizeof(uint32_t)); + +err_free_bin: + kfree(ta_bin); + + return ret; +} + +static ssize_t ta_if_unload_debugfs_write(struct file *fp, const char *buf, size_t len, loff_t *off) +{ + uint32_t ta_id = 0; + int ret = 0; + + struct amdgpu_device *adev = (struct amdgpu_device *)file_inode(fp)->i_private; + struct psp_context *psp = &adev->psp; + struct ta_context context = {0}; + + if (!buf) + return -EINVAL; + + ret = copy_from_user((void *)&ta_id, buf, sizeof(uint32_t)); + if (ret) + return -EINVAL; + + context.session_id = ta_id; + + ret = psp_ta_unload(psp, &context); + if (!ret) + context.initialized = false; + + return ret; +} + +static ssize_t ta_if_invoke_debugfs_write(struct file *fp, const char *buf, size_t len, loff_t *off) +{ + uint32_t ta_id = 0; + uint32_t cmd_id = 0; + uint32_t shared_buf_len = 0; + uint8_t *shared_buf = NULL; + uint32_t copy_pos = 0; + int ret = 0; + + struct amdgpu_device *adev = (struct amdgpu_device *)file_inode(fp)->i_private; + struct psp_context *psp = &adev->psp; + struct ta_context context = {0}; + + if (!buf) + return -EINVAL; + + ret = copy_from_user((void *)&ta_id, &buf[copy_pos], sizeof(uint32_t)); + if (ret) + return -EINVAL; + copy_pos += sizeof(uint32_t); + + ret = copy_from_user((void *)&cmd_id, &buf[copy_pos], sizeof(uint32_t)); + if (ret) + return -EINVAL; + copy_pos += sizeof(uint32_t); + + ret = copy_from_user((void *)&shared_buf_len, &buf[copy_pos], sizeof(uint32_t)); + if (ret) + return -EINVAL; + copy_pos += sizeof(uint32_t); + + shared_buf = kzalloc(shared_buf_len, GFP_KERNEL); + if (!shared_buf) + ret = -ENOMEM; + ret = copy_from_user((void *)shared_buf, &buf[copy_pos], shared_buf_len); + if (ret) + goto err_free_shared_buf; + + context.session_id = ta_id; + + prep_ta_mem_context(psp, &context, shared_buf, shared_buf_len); + + ret = psp_ta_invoke_indirect(psp, cmd_id, &context); + + if (ret || context.resp_status) { + dev_err(adev->dev, "TA invoke via debugfs failed (%d) status %d\n", + ret, context.resp_status); + goto err_free_ta_shared_buf; + } + + ret = copy_to_user((char *)buf, context.mem_context.shared_buf, shared_buf_len); + +err_free_ta_shared_buf: + psp_ta_free_shared_buf(&context.mem_context); + +err_free_shared_buf: + kfree(shared_buf); + + return ret; +} + +static struct dentry *amdgpu_ta_if_debugfs_create(struct amdgpu_device *adev) +{ + struct drm_minor *minor = adev_to_drm(adev)->primary; + + dir = debugfs_create_dir(TA_IF_FS_NAME, minor->debugfs_root); + + ta_load_debugfs_dentry = debugfs_create_file("ta_load", 0200, dir, adev, + &ta_load_debugfs_fops); + + ta_unload_debugfs_dentry = debugfs_create_file("ta_unload", 0200, dir, + adev, &ta_unload_debugfs_fops); + + ta_invoke_debugfs_dentry = debugfs_create_file("ta_invoke", 0200, dir, + adev, &ta_invoke_debugfs_fops); + return dir; +} + +void amdgpu_ta_if_debugfs_init(struct amdgpu_device *adev) +{ +#if defined(CONFIG_DEBUG_FS) + dir = amdgpu_ta_if_debugfs_create(adev); +#endif +} + +void amdgpu_ta_if_debugfs_remove(void) +{ + debugfs_remove_recursive(dir); +} diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp_ta.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp_ta.h new file mode 100644 index 000000000000..883f89d57616 --- /dev/null +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp_ta.h @@ -0,0 +1,30 @@ +/* + * Copyright 2022 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + */ + +#ifndef __AMDGPU_PSP_TA_H__ +#define __AMDGPU_PSP_TA_H__ + +void amdgpu_ta_if_debugfs_init(struct amdgpu_device *adev); +void amdgpu_ta_if_debugfs_remove(void); + +#endif diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c index 424c22a841f4..ec709997c9c7 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c @@ -66,6 +66,8 @@ const char *ras_block_string[] = { "mp1", "fuse", "mca", + "vcn", + "jpeg", }; const char *ras_mca_block_string[] = { @@ -2205,6 +2207,13 @@ static void amdgpu_ras_check_supported(struct amdgpu_device *adev) dev_info(adev->dev, "SRAM ECC is active.\n"); adev->ras_hw_enabled |= ~(1 << AMDGPU_RAS_BLOCK__UMC | 1 << AMDGPU_RAS_BLOCK__DF); + + if (adev->ip_versions[VCN_HWIP][0] == IP_VERSION(2, 6, 0)) + adev->ras_hw_enabled |= (1 << AMDGPU_RAS_BLOCK__VCN | + 1 << AMDGPU_RAS_BLOCK__JPEG); + else + adev->ras_hw_enabled &= ~(1 << AMDGPU_RAS_BLOCK__VCN | + 1 << AMDGPU_RAS_BLOCK__JPEG); } else { dev_info(adev->dev, "SRAM ECC is not presented.\n"); } diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h index 9314fde81e68..606df8869b89 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h @@ -49,6 +49,8 @@ enum amdgpu_ras_block { AMDGPU_RAS_BLOCK__MP1, AMDGPU_RAS_BLOCK__FUSE, AMDGPU_RAS_BLOCK__MCA, + AMDGPU_RAS_BLOCK__VCN, + AMDGPU_RAS_BLOCK__JPEG, AMDGPU_RAS_BLOCK__LAST }; @@ -506,6 +508,7 @@ struct amdgpu_ras_block_hw_ops { void (*query_ras_error_address)(struct amdgpu_device *adev, void *ras_error_status); void (*reset_ras_error_count)(struct amdgpu_device *adev); void (*reset_ras_error_status)(struct amdgpu_device *adev); + bool (*query_poison_status)(struct amdgpu_device *adev); }; /* work flow diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_res_cursor.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_res_cursor.h index acfa207cf970..6546552e596c 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_res_cursor.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_res_cursor.h @@ -30,12 +30,15 @@ #include <drm/ttm/ttm_resource.h> #include <drm/ttm/ttm_range_manager.h> +#include "amdgpu_vram_mgr.h" + /* state back for walking over vram_mgr and gtt_mgr allocations */ struct amdgpu_res_cursor { uint64_t start; uint64_t size; uint64_t remaining; - struct drm_mm_node *node; + void *node; + uint32_t mem_type; }; /** @@ -52,27 +55,63 @@ static inline void amdgpu_res_first(struct ttm_resource *res, uint64_t start, uint64_t size, struct amdgpu_res_cursor *cur) { + struct drm_buddy_block *block; + struct list_head *head, *next; struct drm_mm_node *node; - if (!res || res->mem_type == TTM_PL_SYSTEM) { - cur->start = start; - cur->size = size; - cur->remaining = size; - cur->node = NULL; - WARN_ON(res && start + size > res->num_pages << PAGE_SHIFT); - return; - } + if (!res) + goto fallback; BUG_ON(start + size > res->num_pages << PAGE_SHIFT); - node = to_ttm_range_mgr_node(res)->mm_nodes; - while (start >= node->size << PAGE_SHIFT) - start -= node++->size << PAGE_SHIFT; + cur->mem_type = res->mem_type; + + switch (cur->mem_type) { + case TTM_PL_VRAM: + head = &to_amdgpu_vram_mgr_resource(res)->blocks; + + block = list_first_entry_or_null(head, + struct drm_buddy_block, + link); + if (!block) + goto fallback; + + while (start >= amdgpu_vram_mgr_block_size(block)) { + start -= amdgpu_vram_mgr_block_size(block); + + next = block->link.next; + if (next != head) + block = list_entry(next, struct drm_buddy_block, link); + } + + cur->start = amdgpu_vram_mgr_block_start(block) + start; + cur->size = min(amdgpu_vram_mgr_block_size(block) - start, size); + cur->remaining = size; + cur->node = block; + break; + case TTM_PL_TT: + node = to_ttm_range_mgr_node(res)->mm_nodes; + while (start >= node->size << PAGE_SHIFT) + start -= node++->size << PAGE_SHIFT; + + cur->start = (node->start << PAGE_SHIFT) + start; + cur->size = min((node->size << PAGE_SHIFT) - start, size); + cur->remaining = size; + cur->node = node; + break; + default: + goto fallback; + } - cur->start = (node->start << PAGE_SHIFT) + start; - cur->size = min((node->size << PAGE_SHIFT) - start, size); + return; + +fallback: + cur->start = start; + cur->size = size; cur->remaining = size; - cur->node = node; + cur->node = NULL; + WARN_ON(res && start + size > res->num_pages << PAGE_SHIFT); + return; } /** @@ -85,7 +124,9 @@ static inline void amdgpu_res_first(struct ttm_resource *res, */ static inline void amdgpu_res_next(struct amdgpu_res_cursor *cur, uint64_t size) { - struct drm_mm_node *node = cur->node; + struct drm_buddy_block *block; + struct drm_mm_node *node; + struct list_head *next; BUG_ON(size > cur->remaining); @@ -99,9 +140,27 @@ static inline void amdgpu_res_next(struct amdgpu_res_cursor *cur, uint64_t size) return; } - cur->node = ++node; - cur->start = node->start << PAGE_SHIFT; - cur->size = min(node->size << PAGE_SHIFT, cur->remaining); + switch (cur->mem_type) { + case TTM_PL_VRAM: + block = cur->node; + + next = block->link.next; + block = list_entry(next, struct drm_buddy_block, link); + + cur->node = block; + cur->start = amdgpu_vram_mgr_block_start(block); + cur->size = min(amdgpu_vram_mgr_block_size(block), cur->remaining); + break; + case TTM_PL_TT: + node = cur->node; + + cur->node = ++node; + cur->start = node->start << PAGE_SHIFT; + cur->size = min(node->size << PAGE_SHIFT, cur->remaining); + break; + default: + return; + } } #endif diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_smuio.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_smuio.h index 484bb3dcec47..c7a823f3f2c5 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_smuio.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_smuio.h @@ -27,7 +27,7 @@ struct amdgpu_smuio_funcs { u32 (*get_rom_index_offset)(struct amdgpu_device *adev); u32 (*get_rom_data_offset)(struct amdgpu_device *adev); void (*update_rom_clock_gating)(struct amdgpu_device *adev, bool enable); - void (*get_clock_gating_state)(struct amdgpu_device *adev, u32 *flags); + void (*get_clock_gating_state)(struct amdgpu_device *adev, u64 *flags); u32 (*get_die_id)(struct amdgpu_device *adev); u32 (*get_socket_id)(struct amdgpu_device *adev); bool (*is_host_gpu_xgmi_supported)(struct amdgpu_device *adev); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_sync.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_sync.c index 40e06745fae9..504af1b93bfa 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_sync.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_sync.c @@ -51,7 +51,6 @@ static struct kmem_cache *amdgpu_sync_slab; void amdgpu_sync_create(struct amdgpu_sync *sync) { hash_init(sync->fences); - sync->last_vm_update = NULL; } /** @@ -171,23 +170,6 @@ int amdgpu_sync_fence(struct amdgpu_sync *sync, struct dma_fence *f) return 0; } -/** - * amdgpu_sync_vm_fence - remember to sync to this VM fence - * - * @sync: sync object to add fence to - * @fence: the VM fence to add - * - * Add the fence to the sync object and remember it as VM update. - */ -int amdgpu_sync_vm_fence(struct amdgpu_sync *sync, struct dma_fence *fence) -{ - if (!fence) - return 0; - - amdgpu_sync_keep_later(&sync->last_vm_update, fence); - return amdgpu_sync_fence(sync, fence); -} - /* Determine based on the owner and mode if we should sync to a fence or not */ static bool amdgpu_sync_test_fence(struct amdgpu_device *adev, enum amdgpu_sync_mode mode, @@ -259,7 +241,8 @@ int amdgpu_sync_resv(struct amdgpu_device *adev, struct amdgpu_sync *sync, if (resv == NULL) return -EINVAL; - dma_resv_for_each_fence(&cursor, resv, true, f) { + /* TODO: Use DMA_RESV_USAGE_READ here */ + dma_resv_for_each_fence(&cursor, resv, DMA_RESV_USAGE_BOOKKEEP, f) { dma_fence_chain_for_each(f, f) { struct dma_fence *tmp = dma_fence_chain_contained(f); @@ -376,9 +359,6 @@ int amdgpu_sync_clone(struct amdgpu_sync *source, struct amdgpu_sync *clone) } } - dma_fence_put(clone->last_vm_update); - clone->last_vm_update = dma_fence_get(source->last_vm_update); - return 0; } @@ -419,8 +399,6 @@ void amdgpu_sync_free(struct amdgpu_sync *sync) dma_fence_put(e->fence); kmem_cache_free(amdgpu_sync_slab, e); } - - dma_fence_put(sync->last_vm_update); } /** diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_sync.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_sync.h index 7c0fe20c470d..2d5c613cda10 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_sync.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_sync.h @@ -43,12 +43,10 @@ enum amdgpu_sync_mode { */ struct amdgpu_sync { DECLARE_HASHTABLE(fences, 4); - struct dma_fence *last_vm_update; }; void amdgpu_sync_create(struct amdgpu_sync *sync); int amdgpu_sync_fence(struct amdgpu_sync *sync, struct dma_fence *f); -int amdgpu_sync_vm_fence(struct amdgpu_sync *sync, struct dma_fence *fence); int amdgpu_sync_resv(struct amdgpu_device *adev, struct amdgpu_sync *sync, struct dma_resv *resv, enum amdgpu_sync_mode mode, void *owner); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c index f7f149588432..ec26edd4f4d8 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c @@ -1344,7 +1344,8 @@ static bool amdgpu_ttm_bo_eviction_valuable(struct ttm_buffer_object *bo, * If true, then return false as any KFD process needs all its BOs to * be resident to run successfully */ - dma_resv_for_each_fence(&resv_cursor, bo->base.resv, true, f) { + dma_resv_for_each_fence(&resv_cursor, bo->base.resv, + DMA_RESV_USAGE_BOOKKEEP, f) { if (amdkfd_fence_check_mm(f, current->mm)) return false; } @@ -2160,17 +2161,6 @@ int amdgpu_ttm_evict_resources(struct amdgpu_device *adev, int mem_type) #if defined(CONFIG_DEBUG_FS) -static int amdgpu_mm_vram_table_show(struct seq_file *m, void *unused) -{ - struct amdgpu_device *adev = (struct amdgpu_device *)m->private; - struct ttm_resource_manager *man = ttm_manager_type(&adev->mman.bdev, - TTM_PL_VRAM); - struct drm_printer p = drm_seq_file_printer(m); - - ttm_resource_manager_debug(man, &p); - return 0; -} - static int amdgpu_ttm_page_pool_show(struct seq_file *m, void *unused) { struct amdgpu_device *adev = (struct amdgpu_device *)m->private; @@ -2178,55 +2168,6 @@ static int amdgpu_ttm_page_pool_show(struct seq_file *m, void *unused) return ttm_pool_debugfs(&adev->mman.bdev.pool, m); } -static int amdgpu_mm_tt_table_show(struct seq_file *m, void *unused) -{ - struct amdgpu_device *adev = (struct amdgpu_device *)m->private; - struct ttm_resource_manager *man = ttm_manager_type(&adev->mman.bdev, - TTM_PL_TT); - struct drm_printer p = drm_seq_file_printer(m); - - ttm_resource_manager_debug(man, &p); - return 0; -} - -static int amdgpu_mm_gds_table_show(struct seq_file *m, void *unused) -{ - struct amdgpu_device *adev = (struct amdgpu_device *)m->private; - struct ttm_resource_manager *man = ttm_manager_type(&adev->mman.bdev, - AMDGPU_PL_GDS); - struct drm_printer p = drm_seq_file_printer(m); - - ttm_resource_manager_debug(man, &p); - return 0; -} - -static int amdgpu_mm_gws_table_show(struct seq_file *m, void *unused) -{ - struct amdgpu_device *adev = (struct amdgpu_device *)m->private; - struct ttm_resource_manager *man = ttm_manager_type(&adev->mman.bdev, - AMDGPU_PL_GWS); - struct drm_printer p = drm_seq_file_printer(m); - - ttm_resource_manager_debug(man, &p); - return 0; -} - -static int amdgpu_mm_oa_table_show(struct seq_file *m, void *unused) -{ - struct amdgpu_device *adev = (struct amdgpu_device *)m->private; - struct ttm_resource_manager *man = ttm_manager_type(&adev->mman.bdev, - AMDGPU_PL_OA); - struct drm_printer p = drm_seq_file_printer(m); - - ttm_resource_manager_debug(man, &p); - return 0; -} - -DEFINE_SHOW_ATTRIBUTE(amdgpu_mm_vram_table); -DEFINE_SHOW_ATTRIBUTE(amdgpu_mm_tt_table); -DEFINE_SHOW_ATTRIBUTE(amdgpu_mm_gds_table); -DEFINE_SHOW_ATTRIBUTE(amdgpu_mm_gws_table); -DEFINE_SHOW_ATTRIBUTE(amdgpu_mm_oa_table); DEFINE_SHOW_ATTRIBUTE(amdgpu_ttm_page_pool); /* @@ -2436,17 +2377,23 @@ void amdgpu_ttm_debugfs_init(struct amdgpu_device *adev) &amdgpu_ttm_vram_fops, adev->gmc.mc_vram_size); debugfs_create_file("amdgpu_iomem", 0444, root, adev, &amdgpu_ttm_iomem_fops); - debugfs_create_file("amdgpu_vram_mm", 0444, root, adev, - &amdgpu_mm_vram_table_fops); - debugfs_create_file("amdgpu_gtt_mm", 0444, root, adev, - &amdgpu_mm_tt_table_fops); - debugfs_create_file("amdgpu_gds_mm", 0444, root, adev, - &amdgpu_mm_gds_table_fops); - debugfs_create_file("amdgpu_gws_mm", 0444, root, adev, - &amdgpu_mm_gws_table_fops); - debugfs_create_file("amdgpu_oa_mm", 0444, root, adev, - &amdgpu_mm_oa_table_fops); debugfs_create_file("ttm_page_pool", 0444, root, adev, &amdgpu_ttm_page_pool_fops); + ttm_resource_manager_create_debugfs(ttm_manager_type(&adev->mman.bdev, + TTM_PL_VRAM), + root, "amdgpu_vram_mm"); + ttm_resource_manager_create_debugfs(ttm_manager_type(&adev->mman.bdev, + TTM_PL_TT), + root, "amdgpu_gtt_mm"); + ttm_resource_manager_create_debugfs(ttm_manager_type(&adev->mman.bdev, + AMDGPU_PL_GDS), + root, "amdgpu_gds_mm"); + ttm_resource_manager_create_debugfs(ttm_manager_type(&adev->mman.bdev, + AMDGPU_PL_GWS), + root, "amdgpu_gws_mm"); + ttm_resource_manager_create_debugfs(ttm_manager_type(&adev->mman.bdev, + AMDGPU_PL_OA), + root, "amdgpu_oa_mm"); + #endif } diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h index 9120ae80ef52..6a70818039dd 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h @@ -26,6 +26,7 @@ #include <linux/dma-direction.h> #include <drm/gpu_scheduler.h> +#include "amdgpu_vram_mgr.h" #include "amdgpu.h" #define AMDGPU_PL_GDS (TTM_PL_PRIV + 0) @@ -38,15 +39,6 @@ #define AMDGPU_POISON 0xd0bed0be -struct amdgpu_vram_mgr { - struct ttm_resource_manager manager; - struct drm_mm mm; - spinlock_t lock; - struct list_head reservations_pending; - struct list_head reserved_pages; - atomic64_t vis_usage; -}; - struct amdgpu_gtt_mgr { struct ttm_resource_manager manager; struct drm_mm mm; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c index 39c74d9fa7cc..6eac649499d3 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c @@ -1163,7 +1163,8 @@ static int amdgpu_uvd_send_msg(struct amdgpu_ring *ring, struct amdgpu_bo *bo, ib->length_dw = 16; if (direct) { - r = dma_resv_wait_timeout(bo->tbo.base.resv, true, false, + r = dma_resv_wait_timeout(bo->tbo.base.resv, + DMA_RESV_USAGE_KERNEL, false, msecs_to_jiffies(10)); if (r == 0) r = -ETIMEDOUT; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.c index f99093f2ebc7..a0ee828a4a97 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.c @@ -52,7 +52,7 @@ #define FIRMWARE_ALDEBARAN "amdgpu/aldebaran_vcn.bin" #define FIRMWARE_BEIGE_GOBY "amdgpu/beige_goby_vcn.bin" #define FIRMWARE_YELLOW_CARP "amdgpu/yellow_carp_vcn.bin" -#define FIRMWARE_VCN_3_1_2 "amdgpu/vcn_3_1_2_vcn.bin" +#define FIRMWARE_VCN_3_1_2 "amdgpu/vcn_3_1_2.bin" MODULE_FIRMWARE(FIRMWARE_RAVEN); MODULE_FIRMWARE(FIRMWARE_PICASSO); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.h index f06fb7f882e2..fb39065a96bd 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.h @@ -24,6 +24,8 @@ #ifndef __AMDGPU_VCN_H__ #define __AMDGPU_VCN_H__ +#include "amdgpu_ras.h" + #define AMDGPU_VCN_STACK_SIZE (128*1024) #define AMDGPU_VCN_CONTEXT_SIZE (512*1024) @@ -233,6 +235,10 @@ struct amdgpu_vcn_inst { struct amdgpu_vcn_fw_shared fw_shared; }; +struct amdgpu_vcn_ras { + struct amdgpu_ras_block_object ras_block; +}; + struct amdgpu_vcn { unsigned fw_version; struct delayed_work idle_work; @@ -252,6 +258,9 @@ struct amdgpu_vcn { unsigned harvest_config; int (*pause_dpg_mode)(struct amdgpu_device *adev, int inst_idx, struct dpg_pause_state *new_state); + + struct ras_common_if *ras_if; + struct amdgpu_vcn_ras *ras; }; struct amdgpu_fw_shared_rb_ptrs_struct { diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c index a025f080aa6a..ea92edcc0432 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c @@ -23,6 +23,10 @@ #include <linux/module.h> +#ifdef CONFIG_X86 +#include <asm/hypervisor.h> +#endif + #include <drm/drm_drv.h> #include "amdgpu.h" @@ -723,8 +727,12 @@ void amdgpu_detect_virtualization(struct amdgpu_device *adev) break; case CHIP_VEGA10: soc15_set_virt_ops(adev); - /* send a dummy GPU_INIT_DATA request to host on vega10 */ - amdgpu_virt_request_init_data(adev); +#ifdef CONFIG_X86 + /* not send GPU_INIT_DATA with MS_HYPERV*/ + if (!hypervisor_is_type(X86_HYPER_MS_HYPERV)) +#endif + /* send a dummy GPU_INIT_DATA request to host on vega10 */ + amdgpu_virt_request_init_data(adev); break; case CHIP_VEGA20: case CHIP_ARCTURUS: @@ -862,11 +870,11 @@ static u32 amdgpu_virt_rlcg_reg_rw(struct amdgpu_device *adev, u32 offset, u32 v uint32_t timeout = 50000; uint32_t i, tmp; uint32_t ret = 0; - static void *scratch_reg0; - static void *scratch_reg1; - static void *scratch_reg2; - static void *scratch_reg3; - static void *spare_int; + void *scratch_reg0; + void *scratch_reg1; + void *scratch_reg2; + void *scratch_reg3; + void *spare_int; if (!adev->gfx.rlc.rlcg_reg_access_supported) { dev_err(adev->dev, @@ -919,7 +927,7 @@ static u32 amdgpu_virt_rlcg_reg_rw(struct amdgpu_device *adev, u32 offset, u32 v "wrong operation type, rlcg failed to program reg: 0x%05x\n", offset); } else if (tmp & AMDGPU_RLCG_REG_NOT_IN_RANGE) { dev_err(adev->dev, - "regiser is not in range, rlcg failed to program reg: 0x%05x\n", offset); + "register is not in range, rlcg failed to program reg: 0x%05x\n", offset); } else { dev_err(adev->dev, "unknown error type, rlcg failed to program reg: 0x%05x\n", offset); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c index b13451255e8b..f9479e23de18 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c @@ -89,6 +89,21 @@ struct amdgpu_prt_cb { }; /** + * struct amdgpu_vm_tlb_seq_cb - Helper to increment the TLB flush sequence + */ +struct amdgpu_vm_tlb_seq_cb { + /** + * @vm: pointer to the amdgpu_vm structure to set the fence sequence on + */ + struct amdgpu_vm *vm; + + /** + * @cb: callback + */ + struct dma_fence_cb cb; +}; + +/** * amdgpu_vm_set_pasid - manage pasid and vm ptr mapping * * @adev: amdgpu_device pointer @@ -155,108 +170,6 @@ static inline void amdgpu_vm_eviction_unlock(struct amdgpu_vm *vm) } /** - * amdgpu_vm_level_shift - return the addr shift for each level - * - * @adev: amdgpu_device pointer - * @level: VMPT level - * - * Returns: - * The number of bits the pfn needs to be right shifted for a level. - */ -static unsigned amdgpu_vm_level_shift(struct amdgpu_device *adev, - unsigned level) -{ - switch (level) { - case AMDGPU_VM_PDB2: - case AMDGPU_VM_PDB1: - case AMDGPU_VM_PDB0: - return 9 * (AMDGPU_VM_PDB0 - level) + - adev->vm_manager.block_size; - case AMDGPU_VM_PTB: - return 0; - default: - return ~0; - } -} - -/** - * amdgpu_vm_num_entries - return the number of entries in a PD/PT - * - * @adev: amdgpu_device pointer - * @level: VMPT level - * - * Returns: - * The number of entries in a page directory or page table. - */ -static unsigned amdgpu_vm_num_entries(struct amdgpu_device *adev, - unsigned level) -{ - unsigned shift = amdgpu_vm_level_shift(adev, - adev->vm_manager.root_level); - - if (level == adev->vm_manager.root_level) - /* For the root directory */ - return round_up(adev->vm_manager.max_pfn, 1ULL << shift) - >> shift; - else if (level != AMDGPU_VM_PTB) - /* Everything in between */ - return 512; - else - /* For the page tables on the leaves */ - return AMDGPU_VM_PTE_COUNT(adev); -} - -/** - * amdgpu_vm_num_ats_entries - return the number of ATS entries in the root PD - * - * @adev: amdgpu_device pointer - * - * Returns: - * The number of entries in the root page directory which needs the ATS setting. - */ -static unsigned amdgpu_vm_num_ats_entries(struct amdgpu_device *adev) -{ - unsigned shift; - - shift = amdgpu_vm_level_shift(adev, adev->vm_manager.root_level); - return AMDGPU_GMC_HOLE_START >> (shift + AMDGPU_GPU_PAGE_SHIFT); -} - -/** - * amdgpu_vm_entries_mask - the mask to get the entry number of a PD/PT - * - * @adev: amdgpu_device pointer - * @level: VMPT level - * - * Returns: - * The mask to extract the entry number of a PD/PT from an address. - */ -static uint32_t amdgpu_vm_entries_mask(struct amdgpu_device *adev, - unsigned int level) -{ - if (level <= adev->vm_manager.root_level) - return 0xffffffff; - else if (level != AMDGPU_VM_PTB) - return 0x1ff; - else - return AMDGPU_VM_PTE_COUNT(adev) - 1; -} - -/** - * amdgpu_vm_bo_size - returns the size of the BOs in bytes - * - * @adev: amdgpu_device pointer - * @level: VMPT level - * - * Returns: - * The size of the BO for a page directory or page table in bytes. - */ -static unsigned amdgpu_vm_bo_size(struct amdgpu_device *adev, unsigned level) -{ - return AMDGPU_GPU_PAGE_ALIGN(amdgpu_vm_num_entries(adev, level) * 8); -} - -/** * amdgpu_vm_bo_evicted - vm_bo is evicted * * @vm_bo: vm_bo which is evicted @@ -358,9 +271,8 @@ static void amdgpu_vm_bo_done(struct amdgpu_vm_bo_base *vm_bo) * Initialize a bo_va_base structure and add it to the appropriate lists * */ -static void amdgpu_vm_bo_base_init(struct amdgpu_vm_bo_base *base, - struct amdgpu_vm *vm, - struct amdgpu_bo *bo) +void amdgpu_vm_bo_base_init(struct amdgpu_vm_bo_base *base, + struct amdgpu_vm *vm, struct amdgpu_bo *bo) { base->vm = vm; base->bo = bo; @@ -396,228 +308,6 @@ static void amdgpu_vm_bo_base_init(struct amdgpu_vm_bo_base *base, } /** - * amdgpu_vm_pt_parent - get the parent page directory - * - * @pt: child page table - * - * Helper to get the parent entry for the child page table. NULL if we are at - * the root page directory. - */ -static struct amdgpu_vm_bo_base *amdgpu_vm_pt_parent(struct amdgpu_vm_bo_base *pt) -{ - struct amdgpu_bo *parent = pt->bo->parent; - - if (!parent) - return NULL; - - return parent->vm_bo; -} - -/* - * amdgpu_vm_pt_cursor - state for for_each_amdgpu_vm_pt - */ -struct amdgpu_vm_pt_cursor { - uint64_t pfn; - struct amdgpu_vm_bo_base *parent; - struct amdgpu_vm_bo_base *entry; - unsigned level; -}; - -/** - * amdgpu_vm_pt_start - start PD/PT walk - * - * @adev: amdgpu_device pointer - * @vm: amdgpu_vm structure - * @start: start address of the walk - * @cursor: state to initialize - * - * Initialize a amdgpu_vm_pt_cursor to start a walk. - */ -static void amdgpu_vm_pt_start(struct amdgpu_device *adev, - struct amdgpu_vm *vm, uint64_t start, - struct amdgpu_vm_pt_cursor *cursor) -{ - cursor->pfn = start; - cursor->parent = NULL; - cursor->entry = &vm->root; - cursor->level = adev->vm_manager.root_level; -} - -/** - * amdgpu_vm_pt_descendant - go to child node - * - * @adev: amdgpu_device pointer - * @cursor: current state - * - * Walk to the child node of the current node. - * Returns: - * True if the walk was possible, false otherwise. - */ -static bool amdgpu_vm_pt_descendant(struct amdgpu_device *adev, - struct amdgpu_vm_pt_cursor *cursor) -{ - unsigned mask, shift, idx; - - if ((cursor->level == AMDGPU_VM_PTB) || !cursor->entry || - !cursor->entry->bo) - return false; - - mask = amdgpu_vm_entries_mask(adev, cursor->level); - shift = amdgpu_vm_level_shift(adev, cursor->level); - - ++cursor->level; - idx = (cursor->pfn >> shift) & mask; - cursor->parent = cursor->entry; - cursor->entry = &to_amdgpu_bo_vm(cursor->entry->bo)->entries[idx]; - return true; -} - -/** - * amdgpu_vm_pt_sibling - go to sibling node - * - * @adev: amdgpu_device pointer - * @cursor: current state - * - * Walk to the sibling node of the current node. - * Returns: - * True if the walk was possible, false otherwise. - */ -static bool amdgpu_vm_pt_sibling(struct amdgpu_device *adev, - struct amdgpu_vm_pt_cursor *cursor) -{ - unsigned shift, num_entries; - - /* Root doesn't have a sibling */ - if (!cursor->parent) - return false; - - /* Go to our parents and see if we got a sibling */ - shift = amdgpu_vm_level_shift(adev, cursor->level - 1); - num_entries = amdgpu_vm_num_entries(adev, cursor->level - 1); - - if (cursor->entry == &to_amdgpu_bo_vm(cursor->parent->bo)->entries[num_entries - 1]) - return false; - - cursor->pfn += 1ULL << shift; - cursor->pfn &= ~((1ULL << shift) - 1); - ++cursor->entry; - return true; -} - -/** - * amdgpu_vm_pt_ancestor - go to parent node - * - * @cursor: current state - * - * Walk to the parent node of the current node. - * Returns: - * True if the walk was possible, false otherwise. - */ -static bool amdgpu_vm_pt_ancestor(struct amdgpu_vm_pt_cursor *cursor) -{ - if (!cursor->parent) - return false; - - --cursor->level; - cursor->entry = cursor->parent; - cursor->parent = amdgpu_vm_pt_parent(cursor->parent); - return true; -} - -/** - * amdgpu_vm_pt_next - get next PD/PT in hieratchy - * - * @adev: amdgpu_device pointer - * @cursor: current state - * - * Walk the PD/PT tree to the next node. - */ -static void amdgpu_vm_pt_next(struct amdgpu_device *adev, - struct amdgpu_vm_pt_cursor *cursor) -{ - /* First try a newborn child */ - if (amdgpu_vm_pt_descendant(adev, cursor)) - return; - - /* If that didn't worked try to find a sibling */ - while (!amdgpu_vm_pt_sibling(adev, cursor)) { - /* No sibling, go to our parents and grandparents */ - if (!amdgpu_vm_pt_ancestor(cursor)) { - cursor->pfn = ~0ll; - return; - } - } -} - -/** - * amdgpu_vm_pt_first_dfs - start a deep first search - * - * @adev: amdgpu_device structure - * @vm: amdgpu_vm structure - * @start: optional cursor to start with - * @cursor: state to initialize - * - * Starts a deep first traversal of the PD/PT tree. - */ -static void amdgpu_vm_pt_first_dfs(struct amdgpu_device *adev, - struct amdgpu_vm *vm, - struct amdgpu_vm_pt_cursor *start, - struct amdgpu_vm_pt_cursor *cursor) -{ - if (start) - *cursor = *start; - else - amdgpu_vm_pt_start(adev, vm, 0, cursor); - while (amdgpu_vm_pt_descendant(adev, cursor)); -} - -/** - * amdgpu_vm_pt_continue_dfs - check if the deep first search should continue - * - * @start: starting point for the search - * @entry: current entry - * - * Returns: - * True when the search should continue, false otherwise. - */ -static bool amdgpu_vm_pt_continue_dfs(struct amdgpu_vm_pt_cursor *start, - struct amdgpu_vm_bo_base *entry) -{ - return entry && (!start || entry != start->entry); -} - -/** - * amdgpu_vm_pt_next_dfs - get the next node for a deep first search - * - * @adev: amdgpu_device structure - * @cursor: current state - * - * Move the cursor to the next node in a deep first search. - */ -static void amdgpu_vm_pt_next_dfs(struct amdgpu_device *adev, - struct amdgpu_vm_pt_cursor *cursor) -{ - if (!cursor->entry) - return; - - if (!cursor->parent) - cursor->entry = NULL; - else if (amdgpu_vm_pt_sibling(adev, cursor)) - while (amdgpu_vm_pt_descendant(adev, cursor)); - else - amdgpu_vm_pt_ancestor(cursor); -} - -/* - * for_each_amdgpu_vm_pt_dfs_safe - safe deep first search of all PDs/PTs - */ -#define for_each_amdgpu_vm_pt_dfs_safe(adev, vm, start, cursor, entry) \ - for (amdgpu_vm_pt_first_dfs((adev), (vm), (start), &(cursor)), \ - (entry) = (cursor).entry, amdgpu_vm_pt_next_dfs((adev), &(cursor));\ - amdgpu_vm_pt_continue_dfs((start), (entry)); \ - (entry) = (cursor).entry, amdgpu_vm_pt_next_dfs((adev), &(cursor))) - -/** * amdgpu_vm_get_pd_bo - add the VM PD to a validation list * * @vm: vm providing the BOs @@ -726,316 +416,6 @@ bool amdgpu_vm_ready(struct amdgpu_vm *vm) } /** - * amdgpu_vm_clear_bo - initially clear the PDs/PTs - * - * @adev: amdgpu_device pointer - * @vm: VM to clear BO from - * @vmbo: BO to clear - * @immediate: use an immediate update - * - * Root PD needs to be reserved when calling this. - * - * Returns: - * 0 on success, errno otherwise. - */ -static int amdgpu_vm_clear_bo(struct amdgpu_device *adev, - struct amdgpu_vm *vm, - struct amdgpu_bo_vm *vmbo, - bool immediate) -{ - struct ttm_operation_ctx ctx = { true, false }; - unsigned level = adev->vm_manager.root_level; - struct amdgpu_vm_update_params params; - struct amdgpu_bo *ancestor = &vmbo->bo; - struct amdgpu_bo *bo = &vmbo->bo; - unsigned entries, ats_entries; - uint64_t addr; - int r, idx; - - /* Figure out our place in the hierarchy */ - if (ancestor->parent) { - ++level; - while (ancestor->parent->parent) { - ++level; - ancestor = ancestor->parent; - } - } - - entries = amdgpu_bo_size(bo) / 8; - if (!vm->pte_support_ats) { - ats_entries = 0; - - } else if (!bo->parent) { - ats_entries = amdgpu_vm_num_ats_entries(adev); - ats_entries = min(ats_entries, entries); - entries -= ats_entries; - - } else { - struct amdgpu_vm_bo_base *pt; - - pt = ancestor->vm_bo; - ats_entries = amdgpu_vm_num_ats_entries(adev); - if ((pt - to_amdgpu_bo_vm(vm->root.bo)->entries) >= ats_entries) { - ats_entries = 0; - } else { - ats_entries = entries; - entries = 0; - } - } - - r = ttm_bo_validate(&bo->tbo, &bo->placement, &ctx); - if (r) - return r; - - if (vmbo->shadow) { - struct amdgpu_bo *shadow = vmbo->shadow; - - r = ttm_bo_validate(&shadow->tbo, &shadow->placement, &ctx); - if (r) - return r; - } - - if (!drm_dev_enter(adev_to_drm(adev), &idx)) - return -ENODEV; - - r = vm->update_funcs->map_table(vmbo); - if (r) - goto exit; - - memset(¶ms, 0, sizeof(params)); - params.adev = adev; - params.vm = vm; - params.immediate = immediate; - - r = vm->update_funcs->prepare(¶ms, NULL, AMDGPU_SYNC_EXPLICIT); - if (r) - goto exit; - - addr = 0; - if (ats_entries) { - uint64_t value = 0, flags; - - flags = AMDGPU_PTE_DEFAULT_ATC; - if (level != AMDGPU_VM_PTB) { - /* Handle leaf PDEs as PTEs */ - flags |= AMDGPU_PDE_PTE; - amdgpu_gmc_get_vm_pde(adev, level, &value, &flags); - } - - r = vm->update_funcs->update(¶ms, vmbo, addr, 0, ats_entries, - value, flags); - if (r) - goto exit; - - addr += ats_entries * 8; - } - - if (entries) { - uint64_t value = 0, flags = 0; - - if (adev->asic_type >= CHIP_VEGA10) { - if (level != AMDGPU_VM_PTB) { - /* Handle leaf PDEs as PTEs */ - flags |= AMDGPU_PDE_PTE; - amdgpu_gmc_get_vm_pde(adev, level, - &value, &flags); - } else { - /* Workaround for fault priority problem on GMC9 */ - flags = AMDGPU_PTE_EXECUTABLE; - } - } - - r = vm->update_funcs->update(¶ms, vmbo, addr, 0, entries, - value, flags); - if (r) - goto exit; - } - - r = vm->update_funcs->commit(¶ms, NULL); -exit: - drm_dev_exit(idx); - return r; -} - -/** - * amdgpu_vm_pt_create - create bo for PD/PT - * - * @adev: amdgpu_device pointer - * @vm: requesting vm - * @level: the page table level - * @immediate: use a immediate update - * @vmbo: pointer to the buffer object pointer - */ -static int amdgpu_vm_pt_create(struct amdgpu_device *adev, - struct amdgpu_vm *vm, - int level, bool immediate, - struct amdgpu_bo_vm **vmbo) -{ - struct amdgpu_bo_param bp; - struct amdgpu_bo *bo; - struct dma_resv *resv; - unsigned int num_entries; - int r; - - memset(&bp, 0, sizeof(bp)); - - bp.size = amdgpu_vm_bo_size(adev, level); - bp.byte_align = AMDGPU_GPU_PAGE_SIZE; - bp.domain = AMDGPU_GEM_DOMAIN_VRAM; - bp.domain = amdgpu_bo_get_preferred_domain(adev, bp.domain); - bp.flags = AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS | - AMDGPU_GEM_CREATE_CPU_GTT_USWC; - - if (level < AMDGPU_VM_PTB) - num_entries = amdgpu_vm_num_entries(adev, level); - else - num_entries = 0; - - bp.bo_ptr_size = struct_size((*vmbo), entries, num_entries); - - if (vm->use_cpu_for_update) - bp.flags |= AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED; - - bp.type = ttm_bo_type_kernel; - bp.no_wait_gpu = immediate; - if (vm->root.bo) - bp.resv = vm->root.bo->tbo.base.resv; - - r = amdgpu_bo_create_vm(adev, &bp, vmbo); - if (r) - return r; - - bo = &(*vmbo)->bo; - if (vm->is_compute_context || (adev->flags & AMD_IS_APU)) { - (*vmbo)->shadow = NULL; - return 0; - } - - if (!bp.resv) - WARN_ON(dma_resv_lock(bo->tbo.base.resv, - NULL)); - resv = bp.resv; - memset(&bp, 0, sizeof(bp)); - bp.size = amdgpu_vm_bo_size(adev, level); - bp.domain = AMDGPU_GEM_DOMAIN_GTT; - bp.flags = AMDGPU_GEM_CREATE_CPU_GTT_USWC; - bp.type = ttm_bo_type_kernel; - bp.resv = bo->tbo.base.resv; - bp.bo_ptr_size = sizeof(struct amdgpu_bo); - - r = amdgpu_bo_create(adev, &bp, &(*vmbo)->shadow); - - if (!resv) - dma_resv_unlock(bo->tbo.base.resv); - - if (r) { - amdgpu_bo_unref(&bo); - return r; - } - - (*vmbo)->shadow->parent = amdgpu_bo_ref(bo); - amdgpu_bo_add_to_shadow_list(*vmbo); - - return 0; -} - -/** - * amdgpu_vm_alloc_pts - Allocate a specific page table - * - * @adev: amdgpu_device pointer - * @vm: VM to allocate page tables for - * @cursor: Which page table to allocate - * @immediate: use an immediate update - * - * Make sure a specific page table or directory is allocated. - * - * Returns: - * 1 if page table needed to be allocated, 0 if page table was already - * allocated, negative errno if an error occurred. - */ -static int amdgpu_vm_alloc_pts(struct amdgpu_device *adev, - struct amdgpu_vm *vm, - struct amdgpu_vm_pt_cursor *cursor, - bool immediate) -{ - struct amdgpu_vm_bo_base *entry = cursor->entry; - struct amdgpu_bo *pt_bo; - struct amdgpu_bo_vm *pt; - int r; - - if (entry->bo) - return 0; - - r = amdgpu_vm_pt_create(adev, vm, cursor->level, immediate, &pt); - if (r) - return r; - - /* Keep a reference to the root directory to avoid - * freeing them up in the wrong order. - */ - pt_bo = &pt->bo; - pt_bo->parent = amdgpu_bo_ref(cursor->parent->bo); - amdgpu_vm_bo_base_init(entry, vm, pt_bo); - r = amdgpu_vm_clear_bo(adev, vm, pt, immediate); - if (r) - goto error_free_pt; - - return 0; - -error_free_pt: - amdgpu_bo_unref(&pt->shadow); - amdgpu_bo_unref(&pt_bo); - return r; -} - -/** - * amdgpu_vm_free_table - fre one PD/PT - * - * @entry: PDE to free - */ -static void amdgpu_vm_free_table(struct amdgpu_vm_bo_base *entry) -{ - struct amdgpu_bo *shadow; - - if (!entry->bo) - return; - - shadow = amdgpu_bo_shadowed(entry->bo); - if (shadow) { - ttm_bo_set_bulk_move(&shadow->tbo, NULL); - amdgpu_bo_unref(&shadow); - } - - ttm_bo_set_bulk_move(&entry->bo->tbo, NULL); - entry->bo->vm_bo = NULL; - list_del(&entry->vm_status); - amdgpu_bo_unref(&entry->bo); -} - -/** - * amdgpu_vm_free_pts - free PD/PT levels - * - * @adev: amdgpu device structure - * @vm: amdgpu vm structure - * @start: optional cursor where to start freeing PDs/PTs - * - * Free the page directory or page table level and all sub levels. - */ -static void amdgpu_vm_free_pts(struct amdgpu_device *adev, - struct amdgpu_vm *vm, - struct amdgpu_vm_pt_cursor *start) -{ - struct amdgpu_vm_pt_cursor cursor; - struct amdgpu_vm_bo_base *entry; - - for_each_amdgpu_vm_pt_dfs_safe(adev, vm, start, cursor, entry) - amdgpu_vm_free_table(entry); - - if (start) - amdgpu_vm_free_table(start->entry); -} - -/** * amdgpu_vm_check_compute_bug - check whether asic has compute vm bug * * @adev: amdgpu_device pointer @@ -1282,53 +662,6 @@ uint64_t amdgpu_vm_map_gart(const dma_addr_t *pages_addr, uint64_t addr) } /** - * amdgpu_vm_update_pde - update a single level in the hierarchy - * - * @params: parameters for the update - * @vm: requested vm - * @entry: entry to update - * - * Makes sure the requested entry in parent is up to date. - */ -static int amdgpu_vm_update_pde(struct amdgpu_vm_update_params *params, - struct amdgpu_vm *vm, - struct amdgpu_vm_bo_base *entry) -{ - struct amdgpu_vm_bo_base *parent = amdgpu_vm_pt_parent(entry); - struct amdgpu_bo *bo = parent->bo, *pbo; - uint64_t pde, pt, flags; - unsigned level; - - for (level = 0, pbo = bo->parent; pbo; ++level) - pbo = pbo->parent; - - level += params->adev->vm_manager.root_level; - amdgpu_gmc_get_pde_for_bo(entry->bo, level, &pt, &flags); - pde = (entry - to_amdgpu_bo_vm(parent->bo)->entries) * 8; - return vm->update_funcs->update(params, to_amdgpu_bo_vm(bo), pde, pt, - 1, 0, flags); -} - -/** - * amdgpu_vm_invalidate_pds - mark all PDs as invalid - * - * @adev: amdgpu_device pointer - * @vm: related vm - * - * Mark all PD level as invalid after an error. - */ -static void amdgpu_vm_invalidate_pds(struct amdgpu_device *adev, - struct amdgpu_vm *vm) -{ - struct amdgpu_vm_pt_cursor cursor; - struct amdgpu_vm_bo_base *entry; - - for_each_amdgpu_vm_pt_dfs_safe(adev, vm, NULL, cursor, entry) - if (entry->bo && !entry->moved) - amdgpu_vm_bo_relocated(entry); -} - -/** * amdgpu_vm_update_pdes - make sure that all directories are valid * * @adev: amdgpu_device pointer @@ -1344,6 +677,7 @@ int amdgpu_vm_update_pdes(struct amdgpu_device *adev, struct amdgpu_vm *vm, bool immediate) { struct amdgpu_vm_update_params params; + struct amdgpu_vm_bo_base *entry; int r, idx; if (list_empty(&vm->relocated)) @@ -1359,17 +693,10 @@ int amdgpu_vm_update_pdes(struct amdgpu_device *adev, r = vm->update_funcs->prepare(¶ms, NULL, AMDGPU_SYNC_EXPLICIT); if (r) - goto exit; - - while (!list_empty(&vm->relocated)) { - struct amdgpu_vm_bo_base *entry; - - entry = list_first_entry(&vm->relocated, - struct amdgpu_vm_bo_base, - vm_status); - amdgpu_vm_bo_idle(entry); + goto error; - r = amdgpu_vm_update_pde(¶ms, vm, entry); + list_for_each_entry(entry, &vm->relocated, vm_status) { + r = amdgpu_vm_pde_update(¶ms, entry); if (r) goto error; } @@ -1377,297 +704,68 @@ int amdgpu_vm_update_pdes(struct amdgpu_device *adev, r = vm->update_funcs->commit(¶ms, &vm->last_update); if (r) goto error; - drm_dev_exit(idx); - return 0; + + while (!list_empty(&vm->relocated)) { + entry = list_first_entry(&vm->relocated, + struct amdgpu_vm_bo_base, + vm_status); + amdgpu_vm_bo_idle(entry); + } error: - amdgpu_vm_invalidate_pds(adev, vm); -exit: drm_dev_exit(idx); return r; } -/* - * amdgpu_vm_update_flags - figure out flags for PTE updates - * - * Make sure to set the right flags for the PTEs at the desired level. - */ -static void amdgpu_vm_update_flags(struct amdgpu_vm_update_params *params, - struct amdgpu_bo_vm *pt, unsigned int level, - uint64_t pe, uint64_t addr, - unsigned int count, uint32_t incr, - uint64_t flags) - -{ - if (level != AMDGPU_VM_PTB) { - flags |= AMDGPU_PDE_PTE; - amdgpu_gmc_get_vm_pde(params->adev, level, &addr, &flags); - - } else if (params->adev->asic_type >= CHIP_VEGA10 && - !(flags & AMDGPU_PTE_VALID) && - !(flags & AMDGPU_PTE_PRT)) { - - /* Workaround for fault priority problem on GMC9 */ - flags |= AMDGPU_PTE_EXECUTABLE; - } - - params->vm->update_funcs->update(params, pt, pe, addr, count, incr, - flags); -} - /** - * amdgpu_vm_fragment - get fragment for PTEs + * amdgpu_vm_tlb_seq_cb - make sure to increment tlb sequence + * @fence: unused + * @cb: the callback structure * - * @params: see amdgpu_vm_update_params definition - * @start: first PTE to handle - * @end: last PTE to handle - * @flags: hw mapping flags - * @frag: resulting fragment size - * @frag_end: end of this fragment - * - * Returns the first possible fragment for the start and end address. - */ -static void amdgpu_vm_fragment(struct amdgpu_vm_update_params *params, - uint64_t start, uint64_t end, uint64_t flags, - unsigned int *frag, uint64_t *frag_end) -{ - /** - * The MC L1 TLB supports variable sized pages, based on a fragment - * field in the PTE. When this field is set to a non-zero value, page - * granularity is increased from 4KB to (1 << (12 + frag)). The PTE - * flags are considered valid for all PTEs within the fragment range - * and corresponding mappings are assumed to be physically contiguous. - * - * The L1 TLB can store a single PTE for the whole fragment, - * significantly increasing the space available for translation - * caching. This leads to large improvements in throughput when the - * TLB is under pressure. - * - * The L2 TLB distributes small and large fragments into two - * asymmetric partitions. The large fragment cache is significantly - * larger. Thus, we try to use large fragments wherever possible. - * Userspace can support this by aligning virtual base address and - * allocation size to the fragment size. - * - * Starting with Vega10 the fragment size only controls the L1. The L2 - * is now directly feed with small/huge/giant pages from the walker. - */ - unsigned max_frag; - - if (params->adev->asic_type < CHIP_VEGA10) - max_frag = params->adev->vm_manager.fragment_size; - else - max_frag = 31; - - /* system pages are non continuously */ - if (params->pages_addr) { - *frag = 0; - *frag_end = end; - return; - } - - /* This intentionally wraps around if no bit is set */ - *frag = min((unsigned)ffs(start) - 1, (unsigned)fls64(end - start) - 1); - if (*frag >= max_frag) { - *frag = max_frag; - *frag_end = end & ~((1ULL << max_frag) - 1); - } else { - *frag_end = start + (1 << *frag); - } -} - -/** - * amdgpu_vm_update_ptes - make sure that page tables are valid - * - * @params: see amdgpu_vm_update_params definition - * @start: start of GPU address range - * @end: end of GPU address range - * @dst: destination address to map to, the next dst inside the function - * @flags: mapping flags - * - * Update the page tables in the range @start - @end. - * - * Returns: - * 0 for success, -EINVAL for failure. + * Increments the tlb sequence to make sure that future CS execute a VM flush. */ -static int amdgpu_vm_update_ptes(struct amdgpu_vm_update_params *params, - uint64_t start, uint64_t end, - uint64_t dst, uint64_t flags) +static void amdgpu_vm_tlb_seq_cb(struct dma_fence *fence, + struct dma_fence_cb *cb) { - struct amdgpu_device *adev = params->adev; - struct amdgpu_vm_pt_cursor cursor; - uint64_t frag_start = start, frag_end; - unsigned int frag; - int r; - - /* figure out the initial fragment */ - amdgpu_vm_fragment(params, frag_start, end, flags, &frag, &frag_end); - - /* walk over the address space and update the PTs */ - amdgpu_vm_pt_start(adev, params->vm, start, &cursor); - while (cursor.pfn < end) { - unsigned shift, parent_shift, mask; - uint64_t incr, entry_end, pe_start; - struct amdgpu_bo *pt; - - if (!params->unlocked) { - /* make sure that the page tables covering the - * address range are actually allocated - */ - r = amdgpu_vm_alloc_pts(params->adev, params->vm, - &cursor, params->immediate); - if (r) - return r; - } - - shift = amdgpu_vm_level_shift(adev, cursor.level); - parent_shift = amdgpu_vm_level_shift(adev, cursor.level - 1); - if (params->unlocked) { - /* Unlocked updates are only allowed on the leaves */ - if (amdgpu_vm_pt_descendant(adev, &cursor)) - continue; - } else if (adev->asic_type < CHIP_VEGA10 && - (flags & AMDGPU_PTE_VALID)) { - /* No huge page support before GMC v9 */ - if (cursor.level != AMDGPU_VM_PTB) { - if (!amdgpu_vm_pt_descendant(adev, &cursor)) - return -ENOENT; - continue; - } - } else if (frag < shift) { - /* We can't use this level when the fragment size is - * smaller than the address shift. Go to the next - * child entry and try again. - */ - if (amdgpu_vm_pt_descendant(adev, &cursor)) - continue; - } else if (frag >= parent_shift) { - /* If the fragment size is even larger than the parent - * shift we should go up one level and check it again. - */ - if (!amdgpu_vm_pt_ancestor(&cursor)) - return -EINVAL; - continue; - } + struct amdgpu_vm_tlb_seq_cb *tlb_cb; - pt = cursor.entry->bo; - if (!pt) { - /* We need all PDs and PTs for mapping something, */ - if (flags & AMDGPU_PTE_VALID) - return -ENOENT; - - /* but unmapping something can happen at a higher - * level. - */ - if (!amdgpu_vm_pt_ancestor(&cursor)) - return -EINVAL; - - pt = cursor.entry->bo; - shift = parent_shift; - frag_end = max(frag_end, ALIGN(frag_start + 1, - 1ULL << shift)); - } - - /* Looks good so far, calculate parameters for the update */ - incr = (uint64_t)AMDGPU_GPU_PAGE_SIZE << shift; - mask = amdgpu_vm_entries_mask(adev, cursor.level); - pe_start = ((cursor.pfn >> shift) & mask) * 8; - entry_end = ((uint64_t)mask + 1) << shift; - entry_end += cursor.pfn & ~(entry_end - 1); - entry_end = min(entry_end, end); - - do { - struct amdgpu_vm *vm = params->vm; - uint64_t upd_end = min(entry_end, frag_end); - unsigned nptes = (upd_end - frag_start) >> shift; - uint64_t upd_flags = flags | AMDGPU_PTE_FRAG(frag); - - /* This can happen when we set higher level PDs to - * silent to stop fault floods. - */ - nptes = max(nptes, 1u); - - trace_amdgpu_vm_update_ptes(params, frag_start, upd_end, - min(nptes, 32u), dst, incr, upd_flags, - vm->task_info.pid, - vm->immediate.fence_context); - amdgpu_vm_update_flags(params, to_amdgpu_bo_vm(pt), - cursor.level, pe_start, dst, - nptes, incr, upd_flags); - - pe_start += nptes * 8; - dst += nptes * incr; - - frag_start = upd_end; - if (frag_start >= frag_end) { - /* figure out the next fragment */ - amdgpu_vm_fragment(params, frag_start, end, - flags, &frag, &frag_end); - if (frag < shift) - break; - } - } while (frag_start < entry_end); - - if (amdgpu_vm_pt_descendant(adev, &cursor)) { - /* Free all child entries. - * Update the tables with the flags and addresses and free up subsequent - * tables in the case of huge pages or freed up areas. - * This is the maximum you can free, because all other page tables are not - * completely covered by the range and so potentially still in use. - */ - while (cursor.pfn < frag_start) { - /* Make sure previous mapping is freed */ - if (cursor.entry->bo) { - params->table_freed = true; - amdgpu_vm_free_pts(adev, params->vm, &cursor); - } - amdgpu_vm_pt_next(adev, &cursor); - } - - } else if (frag >= shift) { - /* or just move on to the next on the same level. */ - amdgpu_vm_pt_next(adev, &cursor); - } - } - - return 0; + tlb_cb = container_of(cb, typeof(*tlb_cb), cb); + atomic64_inc(&tlb_cb->vm->tlb_seq); + kfree(tlb_cb); } /** - * amdgpu_vm_bo_update_mapping - update a mapping in the vm page table + * amdgpu_vm_update_range - update a range in the vm page table * - * @adev: amdgpu_device pointer of the VM - * @bo_adev: amdgpu_device pointer of the mapped BO - * @vm: requested vm + * @adev: amdgpu_device pointer to use for commands + * @vm: the VM to update the range * @immediate: immediate submission in a page fault * @unlocked: unlocked invalidation during MM callback + * @flush_tlb: trigger tlb invalidation after update completed * @resv: fences we need to sync to * @start: start of mapped range * @last: last mapped entry * @flags: flags for the entries * @offset: offset into nodes and pages_addr + * @vram_base: base for vram mappings * @res: ttm_resource to map * @pages_addr: DMA addresses to use for mapping * @fence: optional resulting fence - * @table_freed: return true if page table is freed * * Fill in the page table entries between @start and @last. * * Returns: - * 0 for success, -EINVAL for failure. + * 0 for success, negative erro code for failure. */ -int amdgpu_vm_bo_update_mapping(struct amdgpu_device *adev, - struct amdgpu_device *bo_adev, - struct amdgpu_vm *vm, bool immediate, - bool unlocked, struct dma_resv *resv, - uint64_t start, uint64_t last, - uint64_t flags, uint64_t offset, - struct ttm_resource *res, - dma_addr_t *pages_addr, - struct dma_fence **fence, - bool *table_freed) +int amdgpu_vm_update_range(struct amdgpu_device *adev, struct amdgpu_vm *vm, + bool immediate, bool unlocked, bool flush_tlb, + struct dma_resv *resv, uint64_t start, uint64_t last, + uint64_t flags, uint64_t offset, uint64_t vram_base, + struct ttm_resource *res, dma_addr_t *pages_addr, + struct dma_fence **fence) { struct amdgpu_vm_update_params params; + struct amdgpu_vm_tlb_seq_cb *tlb_cb; struct amdgpu_res_cursor cursor; enum amdgpu_sync_mode sync_mode; int r, idx; @@ -1675,6 +773,18 @@ int amdgpu_vm_bo_update_mapping(struct amdgpu_device *adev, if (!drm_dev_enter(adev_to_drm(adev), &idx)) return -ENODEV; + tlb_cb = kmalloc(sizeof(*tlb_cb), GFP_KERNEL); + if (!tlb_cb) { + r = -ENOMEM; + goto error_unlock; + } + + /* Vega20+XGMI where PTEs get inadvertently cached in L2 texture cache, + * heavy-weight flush TLB unconditionally. + */ + flush_tlb |= adev->gmc.xgmi.num_physical_nodes && + adev->ip_versions[GC_HWIP][0] == IP_VERSION(9, 4, 0); + memset(¶ms, 0, sizeof(params)); params.adev = adev; params.vm = vm; @@ -1693,7 +803,7 @@ int amdgpu_vm_bo_update_mapping(struct amdgpu_device *adev, amdgpu_vm_eviction_lock(vm); if (vm->evicting) { r = -EBUSY; - goto error_unlock; + goto error_free; } if (!unlocked && !dma_fence_is_signaled(vm->last_unlocked)) { @@ -1706,7 +816,7 @@ int amdgpu_vm_bo_update_mapping(struct amdgpu_device *adev, r = vm->update_funcs->prepare(¶ms, resv, sync_mode); if (r) - goto error_unlock; + goto error_free; amdgpu_res_first(pages_addr ? NULL : res, offset, (last - start + 1) * AMDGPU_GPU_PAGE_SIZE, &cursor); @@ -1746,16 +856,15 @@ int amdgpu_vm_bo_update_mapping(struct amdgpu_device *adev, } } else if (flags & (AMDGPU_PTE_VALID | AMDGPU_PTE_PRT)) { - addr = bo_adev->vm_manager.vram_base_offset + - cursor.start; + addr = vram_base + cursor.start; } else { addr = 0; } tmp = start + num_entries; - r = amdgpu_vm_update_ptes(¶ms, start, tmp, addr, flags); + r = amdgpu_vm_ptes_update(¶ms, start, tmp, addr, flags); if (r) - goto error_unlock; + goto error_free; amdgpu_res_next(&cursor, num_entries * AMDGPU_GPU_PAGE_SIZE); start = tmp; @@ -1763,8 +872,21 @@ int amdgpu_vm_bo_update_mapping(struct amdgpu_device *adev, r = vm->update_funcs->commit(¶ms, fence); - if (table_freed) - *table_freed = *table_freed || params.table_freed; + if (flush_tlb || params.table_freed) { + tlb_cb->vm = vm; + if (fence && *fence && + !dma_fence_add_callback(*fence, &tlb_cb->cb, + amdgpu_vm_tlb_seq_cb)) { + dma_fence_put(vm->last_tlb_flush); + vm->last_tlb_flush = dma_fence_get(*fence); + } else { + amdgpu_vm_tlb_seq_cb(NULL, &tlb_cb->cb); + } + tlb_cb = NULL; + } + +error_free: + kfree(tlb_cb); error_unlock: amdgpu_vm_eviction_unlock(vm); @@ -1822,7 +944,6 @@ void amdgpu_vm_get_memory(struct amdgpu_vm *vm, uint64_t *vram_mem, * @adev: amdgpu_device pointer * @bo_va: requested BO and VM object * @clear: if true clear the entries - * @table_freed: return true if page table is freed * * Fill in the page table entries for @bo_va. * @@ -1830,7 +951,7 @@ void amdgpu_vm_get_memory(struct amdgpu_vm *vm, uint64_t *vram_mem, * 0 for success, -EINVAL for failure. */ int amdgpu_vm_bo_update(struct amdgpu_device *adev, struct amdgpu_bo_va *bo_va, - bool clear, bool *table_freed) + bool clear) { struct amdgpu_bo *bo = bo_va->base.bo; struct amdgpu_vm *vm = bo_va->base.vm; @@ -1838,9 +959,10 @@ int amdgpu_vm_bo_update(struct amdgpu_device *adev, struct amdgpu_bo_va *bo_va, dma_addr_t *pages_addr = NULL; struct ttm_resource *mem; struct dma_fence **last_update; + bool flush_tlb = clear; struct dma_resv *resv; + uint64_t vram_base; uint64_t flags; - struct amdgpu_device *bo_adev = adev; int r; if (clear || !bo) { @@ -1865,14 +987,18 @@ int amdgpu_vm_bo_update(struct amdgpu_device *adev, struct amdgpu_bo_va *bo_va, } if (bo) { + struct amdgpu_device *bo_adev; + flags = amdgpu_ttm_tt_pte_flags(adev, bo->tbo.ttm, mem); if (amdgpu_bo_encrypted(bo)) flags |= AMDGPU_PTE_TMZ; bo_adev = amdgpu_ttm_adev(bo->tbo.bdev); + vram_base = bo_adev->vm_manager.vram_base_offset; } else { flags = 0x0; + vram_base = 0; } if (clear || (bo && bo->tbo.base.resv == @@ -1882,7 +1008,7 @@ int amdgpu_vm_bo_update(struct amdgpu_device *adev, struct amdgpu_bo_va *bo_va, last_update = &bo_va->last_pt_update; if (!clear && bo_va->base.moved) { - bo_va->base.moved = false; + flush_tlb = true; list_splice_init(&bo_va->valids, &bo_va->invalids); } else if (bo_va->cleared != clear) { @@ -1905,11 +1031,11 @@ int amdgpu_vm_bo_update(struct amdgpu_device *adev, struct amdgpu_bo_va *bo_va, trace_amdgpu_vm_bo_update(mapping); - r = amdgpu_vm_bo_update_mapping(adev, bo_adev, vm, false, false, - resv, mapping->start, - mapping->last, update_flags, - mapping->offset, mem, - pages_addr, last_update, table_freed); + r = amdgpu_vm_update_range(adev, vm, false, false, flush_tlb, + resv, mapping->start, mapping->last, + update_flags, mapping->offset, + vram_base, mem, pages_addr, + last_update); if (r) return r; } @@ -1932,6 +1058,7 @@ int amdgpu_vm_bo_update(struct amdgpu_device *adev, struct amdgpu_bo_va *bo_va, list_splice_init(&bo_va->invalids, &bo_va->valids); bo_va->cleared = clear; + bo_va->base.moved = false; if (trace_amdgpu_vm_bo_mapping_enabled()) { list_for_each_entry(mapping, &bo_va->valids, list) @@ -2059,7 +1186,7 @@ static void amdgpu_vm_prt_fini(struct amdgpu_device *adev, struct amdgpu_vm *vm) struct dma_resv_iter cursor; struct dma_fence *fence; - dma_resv_for_each_fence(&cursor, resv, true, fence) { + dma_resv_for_each_fence(&cursor, resv, DMA_RESV_USAGE_BOOKKEEP, fence) { /* Add a callback for each fence in the reservation object */ amdgpu_vm_prt_get(adev); amdgpu_vm_add_prt_cb(adev, fence); @@ -2100,10 +1227,10 @@ int amdgpu_vm_clear_freed(struct amdgpu_device *adev, mapping->start < AMDGPU_GMC_HOLE_START) init_pte_value = AMDGPU_PTE_DEFAULT_ATC; - r = amdgpu_vm_bo_update_mapping(adev, adev, vm, false, false, - resv, mapping->start, - mapping->last, init_pte_value, - 0, NULL, NULL, &f, NULL); + r = amdgpu_vm_update_range(adev, vm, false, false, true, resv, + mapping->start, mapping->last, + init_pte_value, 0, 0, NULL, NULL, + &f); amdgpu_vm_free_mapping(adev, vm, mapping, f); if (r) { dma_fence_put(f); @@ -2145,7 +1272,7 @@ int amdgpu_vm_handle_moved(struct amdgpu_device *adev, list_for_each_entry_safe(bo_va, tmp, &vm->moved, base.vm_status) { /* Per VM BOs never need to bo cleared in the page tables */ - r = amdgpu_vm_bo_update(adev, bo_va, false, NULL); + r = amdgpu_vm_bo_update(adev, bo_va, false); if (r) return r; } @@ -2164,7 +1291,7 @@ int amdgpu_vm_handle_moved(struct amdgpu_device *adev, else clear = true; - r = amdgpu_vm_bo_update(adev, bo_va, clear, NULL); + r = amdgpu_vm_bo_update(adev, bo_va, clear); if (r) return r; @@ -2665,7 +1792,7 @@ bool amdgpu_vm_evictable(struct amdgpu_bo *bo) return true; /* Don't evict VM page tables while they are busy */ - if (!dma_resv_test_signaled(bo->tbo.base.resv, true)) + if (!dma_resv_test_signaled(bo->tbo.base.resv, DMA_RESV_USAGE_BOOKKEEP)) return false; /* Try to block ongoing updates */ @@ -2845,7 +1972,8 @@ void amdgpu_vm_adjust_size(struct amdgpu_device *adev, uint32_t min_vm_size, */ long amdgpu_vm_wait_idle(struct amdgpu_vm *vm, long timeout) { - timeout = dma_resv_wait_timeout(vm->root.bo->tbo.base.resv, true, + timeout = dma_resv_wait_timeout(vm->root.bo->tbo.base.resv, + DMA_RESV_USAGE_BOOKKEEP, true, timeout); if (timeout <= 0) return timeout; @@ -2913,6 +2041,7 @@ int amdgpu_vm_init(struct amdgpu_device *adev, struct amdgpu_vm *vm) vm->update_funcs = &amdgpu_vm_sdma_funcs; vm->last_update = NULL; vm->last_unlocked = dma_fence_get_stub(); + vm->last_tlb_flush = dma_fence_get_stub(); mutex_init(&vm->eviction_lock); vm->evicting = false; @@ -2932,7 +2061,7 @@ int amdgpu_vm_init(struct amdgpu_device *adev, struct amdgpu_vm *vm) amdgpu_vm_bo_base_init(&vm->root, vm, root_bo); - r = amdgpu_vm_clear_bo(adev, vm, root, false); + r = amdgpu_vm_pt_clear(adev, vm, root, false); if (r) goto error_unreserve; @@ -2951,6 +2080,7 @@ error_free_root: vm->root.bo = NULL; error_free_delayed: + dma_fence_put(vm->last_tlb_flush); dma_fence_put(vm->last_unlocked); drm_sched_entity_destroy(&vm->delayed); @@ -2961,34 +2091,6 @@ error_free_immediate: } /** - * amdgpu_vm_check_clean_reserved - check if a VM is clean - * - * @adev: amdgpu_device pointer - * @vm: the VM to check - * - * check all entries of the root PD, if any subsequent PDs are allocated, - * it means there are page table creating and filling, and is no a clean - * VM - * - * Returns: - * 0 if this VM is clean - */ -static int amdgpu_vm_check_clean_reserved(struct amdgpu_device *adev, - struct amdgpu_vm *vm) -{ - enum amdgpu_vm_level root = adev->vm_manager.root_level; - unsigned int entries = amdgpu_vm_num_entries(adev, root); - unsigned int i = 0; - - for (i = 0; i < entries; i++) { - if (to_amdgpu_bo_vm(vm->root.bo)->entries[i].bo) - return -EINVAL; - } - - return 0; -} - -/** * amdgpu_vm_make_compute - Turn a GFX VM into a compute VM * * @adev: amdgpu_device pointer @@ -3017,17 +2119,17 @@ int amdgpu_vm_make_compute(struct amdgpu_device *adev, struct amdgpu_vm *vm) return r; /* Sanity checks */ - r = amdgpu_vm_check_clean_reserved(adev, vm); - if (r) + if (!amdgpu_vm_pt_is_root_clean(adev, vm)) { + r = -EINVAL; goto unreserve_bo; + } /* Check if PD needs to be reinitialized and do it before * changing any other state, in case it fails. */ if (pte_support_ats != vm->pte_support_ats) { vm->pte_support_ats = pte_support_ats; - r = amdgpu_vm_clear_bo(adev, vm, - to_amdgpu_bo_vm(vm->root.bo), + r = amdgpu_vm_pt_clear(adev, vm, to_amdgpu_bo_vm(vm->root.bo), false); if (r) goto unreserve_bo; @@ -3095,6 +2197,7 @@ void amdgpu_vm_fini(struct amdgpu_device *adev, struct amdgpu_vm *vm) struct amdgpu_bo_va_mapping *mapping, *tmp; bool prt_fini_needed = !!adev->gmc.gmc_funcs->set_prt; struct amdgpu_bo *root; + unsigned long flags; int i; amdgpu_amdkfd_gpuvm_destroy_cb(adev, vm); @@ -3104,6 +2207,11 @@ void amdgpu_vm_fini(struct amdgpu_device *adev, struct amdgpu_vm *vm) amdgpu_vm_set_pasid(adev, vm, 0); dma_fence_wait(vm->last_unlocked, false); dma_fence_put(vm->last_unlocked); + dma_fence_wait(vm->last_tlb_flush, false); + /* Make sure that all fence callbacks have completed */ + spin_lock_irqsave(vm->last_tlb_flush->lock, flags); + spin_unlock_irqrestore(vm->last_tlb_flush->lock, flags); + dma_fence_put(vm->last_tlb_flush); list_for_each_entry_safe(mapping, tmp, &vm->freed, list) { if (mapping->flags & AMDGPU_PTE_PRT && prt_fini_needed) { @@ -3115,7 +2223,7 @@ void amdgpu_vm_fini(struct amdgpu_device *adev, struct amdgpu_vm *vm) amdgpu_vm_free_mapping(adev, vm, mapping, NULL); } - amdgpu_vm_free_pts(adev, vm, NULL); + amdgpu_vm_pt_free_root(adev, vm); amdgpu_bo_unreserve(root); amdgpu_bo_unref(&root); WARN_ON(vm->root.bo); @@ -3375,9 +2483,8 @@ bool amdgpu_vm_handle_fault(struct amdgpu_device *adev, u32 pasid, goto error_unlock; } - r = amdgpu_vm_bo_update_mapping(adev, adev, vm, true, false, NULL, addr, - addr, flags, value, NULL, NULL, NULL, - NULL); + r = amdgpu_vm_update_range(adev, vm, true, false, false, NULL, addr, + addr, flags, value, 0, NULL, NULL, NULL); if (r) goto error_unlock; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h index bd7892482bbf..9ecb7f663e19 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h @@ -284,6 +284,10 @@ struct amdgpu_vm { struct drm_sched_entity immediate; struct drm_sched_entity delayed; + /* Last finished delayed update */ + atomic64_t tlb_seq; + struct dma_fence *last_tlb_flush; + /* Last unlocked submission to the scheduler entities */ struct dma_fence *last_unlocked; @@ -395,18 +399,17 @@ int amdgpu_vm_clear_freed(struct amdgpu_device *adev, struct dma_fence **fence); int amdgpu_vm_handle_moved(struct amdgpu_device *adev, struct amdgpu_vm *vm); -int amdgpu_vm_bo_update_mapping(struct amdgpu_device *adev, - struct amdgpu_device *bo_adev, - struct amdgpu_vm *vm, bool immediate, - bool unlocked, struct dma_resv *resv, - uint64_t start, uint64_t last, - uint64_t flags, uint64_t offset, - struct ttm_resource *res, - dma_addr_t *pages_addr, - struct dma_fence **fence, bool *free_table); +void amdgpu_vm_bo_base_init(struct amdgpu_vm_bo_base *base, + struct amdgpu_vm *vm, struct amdgpu_bo *bo); +int amdgpu_vm_update_range(struct amdgpu_device *adev, struct amdgpu_vm *vm, + bool immediate, bool unlocked, bool flush_tlb, + struct dma_resv *resv, uint64_t start, uint64_t last, + uint64_t flags, uint64_t offset, uint64_t vram_base, + struct ttm_resource *res, dma_addr_t *pages_addr, + struct dma_fence **fence); int amdgpu_vm_bo_update(struct amdgpu_device *adev, struct amdgpu_bo_va *bo_va, - bool clear, bool *table_freed); + bool clear); bool amdgpu_vm_evictable(struct amdgpu_bo *bo); void amdgpu_vm_bo_invalidate(struct amdgpu_device *adev, struct amdgpu_bo *bo, bool evicted); @@ -455,8 +458,34 @@ void amdgpu_vm_move_to_lru_tail(struct amdgpu_device *adev, void amdgpu_vm_get_memory(struct amdgpu_vm *vm, uint64_t *vram_mem, uint64_t *gtt_mem, uint64_t *cpu_mem); +int amdgpu_vm_pt_clear(struct amdgpu_device *adev, struct amdgpu_vm *vm, + struct amdgpu_bo_vm *vmbo, bool immediate); +int amdgpu_vm_pt_create(struct amdgpu_device *adev, struct amdgpu_vm *vm, + int level, bool immediate, struct amdgpu_bo_vm **vmbo); +void amdgpu_vm_pt_free_root(struct amdgpu_device *adev, struct amdgpu_vm *vm); +bool amdgpu_vm_pt_is_root_clean(struct amdgpu_device *adev, + struct amdgpu_vm *vm); + +int amdgpu_vm_pde_update(struct amdgpu_vm_update_params *params, + struct amdgpu_vm_bo_base *entry); +int amdgpu_vm_ptes_update(struct amdgpu_vm_update_params *params, + uint64_t start, uint64_t end, + uint64_t dst, uint64_t flags); + #if defined(CONFIG_DEBUG_FS) void amdgpu_debugfs_vm_bo_info(struct amdgpu_vm *vm, struct seq_file *m); #endif +/** + * amdgpu_vm_tlb_seq - return tlb flush sequence number + * @vm: the amdgpu_vm structure to query + * + * Returns the tlb flush sequence number which indicates that the VM TLBs needs + * to be invalidated whenever the sequence number change. + */ +static inline uint64_t amdgpu_vm_tlb_seq(struct amdgpu_vm *vm) +{ + return atomic64_read(&vm->tlb_seq); +} + #endif diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm_cpu.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm_cpu.c index e3fbf0f10add..31913ae86de6 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm_cpu.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm_cpu.c @@ -74,13 +74,12 @@ static int amdgpu_vm_cpu_update(struct amdgpu_vm_update_params *p, { unsigned int i; uint64_t value; - int r; + long r; - if (vmbo->bo.tbo.moving) { - r = dma_fence_wait(vmbo->bo.tbo.moving, true); - if (r) - return r; - } + r = dma_resv_wait_timeout(vmbo->bo.tbo.base.resv, DMA_RESV_USAGE_KERNEL, + true, MAX_SCHEDULE_TIMEOUT); + if (r < 0) + return r; pe += (unsigned long)amdgpu_bo_kptr(&vmbo->bo); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm_pt.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm_pt.c new file mode 100644 index 000000000000..7761a3ea172e --- /dev/null +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm_pt.c @@ -0,0 +1,977 @@ +// SPDX-License-Identifier: GPL-2.0 OR MIT +/* + * Copyright 2022 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + */ + +#include <drm/drm_drv.h> + +#include "amdgpu.h" +#include "amdgpu_trace.h" +#include "amdgpu_vm.h" + +/* + * amdgpu_vm_pt_cursor - state for for_each_amdgpu_vm_pt + */ +struct amdgpu_vm_pt_cursor { + uint64_t pfn; + struct amdgpu_vm_bo_base *parent; + struct amdgpu_vm_bo_base *entry; + unsigned int level; +}; + +/** + * amdgpu_vm_pt_level_shift - return the addr shift for each level + * + * @adev: amdgpu_device pointer + * @level: VMPT level + * + * Returns: + * The number of bits the pfn needs to be right shifted for a level. + */ +static unsigned int amdgpu_vm_pt_level_shift(struct amdgpu_device *adev, + unsigned int level) +{ + switch (level) { + case AMDGPU_VM_PDB2: + case AMDGPU_VM_PDB1: + case AMDGPU_VM_PDB0: + return 9 * (AMDGPU_VM_PDB0 - level) + + adev->vm_manager.block_size; + case AMDGPU_VM_PTB: + return 0; + default: + return ~0; + } +} + +/** + * amdgpu_vm_pt_num_entries - return the number of entries in a PD/PT + * + * @adev: amdgpu_device pointer + * @level: VMPT level + * + * Returns: + * The number of entries in a page directory or page table. + */ +static unsigned int amdgpu_vm_pt_num_entries(struct amdgpu_device *adev, + unsigned int level) +{ + unsigned int shift; + + shift = amdgpu_vm_pt_level_shift(adev, adev->vm_manager.root_level); + if (level == adev->vm_manager.root_level) + /* For the root directory */ + return round_up(adev->vm_manager.max_pfn, 1ULL << shift) + >> shift; + else if (level != AMDGPU_VM_PTB) + /* Everything in between */ + return 512; + + /* For the page tables on the leaves */ + return AMDGPU_VM_PTE_COUNT(adev); +} + +/** + * amdgpu_vm_pt_num_ats_entries - return the number of ATS entries in the root PD + * + * @adev: amdgpu_device pointer + * + * Returns: + * The number of entries in the root page directory which needs the ATS setting. + */ +static unsigned int amdgpu_vm_pt_num_ats_entries(struct amdgpu_device *adev) +{ + unsigned int shift; + + shift = amdgpu_vm_pt_level_shift(adev, adev->vm_manager.root_level); + return AMDGPU_GMC_HOLE_START >> (shift + AMDGPU_GPU_PAGE_SHIFT); +} + +/** + * amdgpu_vm_pt_entries_mask - the mask to get the entry number of a PD/PT + * + * @adev: amdgpu_device pointer + * @level: VMPT level + * + * Returns: + * The mask to extract the entry number of a PD/PT from an address. + */ +static uint32_t amdgpu_vm_pt_entries_mask(struct amdgpu_device *adev, + unsigned int level) +{ + if (level <= adev->vm_manager.root_level) + return 0xffffffff; + else if (level != AMDGPU_VM_PTB) + return 0x1ff; + else + return AMDGPU_VM_PTE_COUNT(adev) - 1; +} + +/** + * amdgpu_vm_pt_size - returns the size of the page table in bytes + * + * @adev: amdgpu_device pointer + * @level: VMPT level + * + * Returns: + * The size of the BO for a page directory or page table in bytes. + */ +static unsigned int amdgpu_vm_pt_size(struct amdgpu_device *adev, + unsigned int level) +{ + return AMDGPU_GPU_PAGE_ALIGN(amdgpu_vm_pt_num_entries(adev, level) * 8); +} + +/** + * amdgpu_vm_pt_parent - get the parent page directory + * + * @pt: child page table + * + * Helper to get the parent entry for the child page table. NULL if we are at + * the root page directory. + */ +static struct amdgpu_vm_bo_base * +amdgpu_vm_pt_parent(struct amdgpu_vm_bo_base *pt) +{ + struct amdgpu_bo *parent = pt->bo->parent; + + if (!parent) + return NULL; + + return parent->vm_bo; +} + +/** + * amdgpu_vm_pt_start - start PD/PT walk + * + * @adev: amdgpu_device pointer + * @vm: amdgpu_vm structure + * @start: start address of the walk + * @cursor: state to initialize + * + * Initialize a amdgpu_vm_pt_cursor to start a walk. + */ +static void amdgpu_vm_pt_start(struct amdgpu_device *adev, + struct amdgpu_vm *vm, uint64_t start, + struct amdgpu_vm_pt_cursor *cursor) +{ + cursor->pfn = start; + cursor->parent = NULL; + cursor->entry = &vm->root; + cursor->level = adev->vm_manager.root_level; +} + +/** + * amdgpu_vm_pt_descendant - go to child node + * + * @adev: amdgpu_device pointer + * @cursor: current state + * + * Walk to the child node of the current node. + * Returns: + * True if the walk was possible, false otherwise. + */ +static bool amdgpu_vm_pt_descendant(struct amdgpu_device *adev, + struct amdgpu_vm_pt_cursor *cursor) +{ + unsigned int mask, shift, idx; + + if ((cursor->level == AMDGPU_VM_PTB) || !cursor->entry || + !cursor->entry->bo) + return false; + + mask = amdgpu_vm_pt_entries_mask(adev, cursor->level); + shift = amdgpu_vm_pt_level_shift(adev, cursor->level); + + ++cursor->level; + idx = (cursor->pfn >> shift) & mask; + cursor->parent = cursor->entry; + cursor->entry = &to_amdgpu_bo_vm(cursor->entry->bo)->entries[idx]; + return true; +} + +/** + * amdgpu_vm_pt_sibling - go to sibling node + * + * @adev: amdgpu_device pointer + * @cursor: current state + * + * Walk to the sibling node of the current node. + * Returns: + * True if the walk was possible, false otherwise. + */ +static bool amdgpu_vm_pt_sibling(struct amdgpu_device *adev, + struct amdgpu_vm_pt_cursor *cursor) +{ + + unsigned int shift, num_entries; + struct amdgpu_bo_vm *parent; + + /* Root doesn't have a sibling */ + if (!cursor->parent) + return false; + + /* Go to our parents and see if we got a sibling */ + shift = amdgpu_vm_pt_level_shift(adev, cursor->level - 1); + num_entries = amdgpu_vm_pt_num_entries(adev, cursor->level - 1); + parent = to_amdgpu_bo_vm(cursor->parent->bo); + + if (cursor->entry == &parent->entries[num_entries - 1]) + return false; + + cursor->pfn += 1ULL << shift; + cursor->pfn &= ~((1ULL << shift) - 1); + ++cursor->entry; + return true; +} + +/** + * amdgpu_vm_pt_ancestor - go to parent node + * + * @cursor: current state + * + * Walk to the parent node of the current node. + * Returns: + * True if the walk was possible, false otherwise. + */ +static bool amdgpu_vm_pt_ancestor(struct amdgpu_vm_pt_cursor *cursor) +{ + if (!cursor->parent) + return false; + + --cursor->level; + cursor->entry = cursor->parent; + cursor->parent = amdgpu_vm_pt_parent(cursor->parent); + return true; +} + +/** + * amdgpu_vm_pt_next - get next PD/PT in hieratchy + * + * @adev: amdgpu_device pointer + * @cursor: current state + * + * Walk the PD/PT tree to the next node. + */ +static void amdgpu_vm_pt_next(struct amdgpu_device *adev, + struct amdgpu_vm_pt_cursor *cursor) +{ + /* First try a newborn child */ + if (amdgpu_vm_pt_descendant(adev, cursor)) + return; + + /* If that didn't worked try to find a sibling */ + while (!amdgpu_vm_pt_sibling(adev, cursor)) { + /* No sibling, go to our parents and grandparents */ + if (!amdgpu_vm_pt_ancestor(cursor)) { + cursor->pfn = ~0ll; + return; + } + } +} + +/** + * amdgpu_vm_pt_first_dfs - start a deep first search + * + * @adev: amdgpu_device structure + * @vm: amdgpu_vm structure + * @start: optional cursor to start with + * @cursor: state to initialize + * + * Starts a deep first traversal of the PD/PT tree. + */ +static void amdgpu_vm_pt_first_dfs(struct amdgpu_device *adev, + struct amdgpu_vm *vm, + struct amdgpu_vm_pt_cursor *start, + struct amdgpu_vm_pt_cursor *cursor) +{ + if (start) + *cursor = *start; + else + amdgpu_vm_pt_start(adev, vm, 0, cursor); + + while (amdgpu_vm_pt_descendant(adev, cursor)) + ; +} + +/** + * amdgpu_vm_pt_continue_dfs - check if the deep first search should continue + * + * @start: starting point for the search + * @entry: current entry + * + * Returns: + * True when the search should continue, false otherwise. + */ +static bool amdgpu_vm_pt_continue_dfs(struct amdgpu_vm_pt_cursor *start, + struct amdgpu_vm_bo_base *entry) +{ + return entry && (!start || entry != start->entry); +} + +/** + * amdgpu_vm_pt_next_dfs - get the next node for a deep first search + * + * @adev: amdgpu_device structure + * @cursor: current state + * + * Move the cursor to the next node in a deep first search. + */ +static void amdgpu_vm_pt_next_dfs(struct amdgpu_device *adev, + struct amdgpu_vm_pt_cursor *cursor) +{ + if (!cursor->entry) + return; + + if (!cursor->parent) + cursor->entry = NULL; + else if (amdgpu_vm_pt_sibling(adev, cursor)) + while (amdgpu_vm_pt_descendant(adev, cursor)) + ; + else + amdgpu_vm_pt_ancestor(cursor); +} + +/* + * for_each_amdgpu_vm_pt_dfs_safe - safe deep first search of all PDs/PTs + */ +#define for_each_amdgpu_vm_pt_dfs_safe(adev, vm, start, cursor, entry) \ + for (amdgpu_vm_pt_first_dfs((adev), (vm), (start), &(cursor)), \ + (entry) = (cursor).entry, amdgpu_vm_pt_next_dfs((adev), &(cursor));\ + amdgpu_vm_pt_continue_dfs((start), (entry)); \ + (entry) = (cursor).entry, amdgpu_vm_pt_next_dfs((adev), &(cursor))) + +/** + * amdgpu_vm_pt_clear - initially clear the PDs/PTs + * + * @adev: amdgpu_device pointer + * @vm: VM to clear BO from + * @vmbo: BO to clear + * @immediate: use an immediate update + * + * Root PD needs to be reserved when calling this. + * + * Returns: + * 0 on success, errno otherwise. + */ +int amdgpu_vm_pt_clear(struct amdgpu_device *adev, struct amdgpu_vm *vm, + struct amdgpu_bo_vm *vmbo, bool immediate) +{ + unsigned int level = adev->vm_manager.root_level; + struct ttm_operation_ctx ctx = { true, false }; + struct amdgpu_vm_update_params params; + struct amdgpu_bo *ancestor = &vmbo->bo; + unsigned int entries, ats_entries; + struct amdgpu_bo *bo = &vmbo->bo; + uint64_t addr; + int r, idx; + + /* Figure out our place in the hierarchy */ + if (ancestor->parent) { + ++level; + while (ancestor->parent->parent) { + ++level; + ancestor = ancestor->parent; + } + } + + entries = amdgpu_bo_size(bo) / 8; + if (!vm->pte_support_ats) { + ats_entries = 0; + + } else if (!bo->parent) { + ats_entries = amdgpu_vm_pt_num_ats_entries(adev); + ats_entries = min(ats_entries, entries); + entries -= ats_entries; + + } else { + struct amdgpu_vm_bo_base *pt; + + pt = ancestor->vm_bo; + ats_entries = amdgpu_vm_pt_num_ats_entries(adev); + if ((pt - to_amdgpu_bo_vm(vm->root.bo)->entries) >= + ats_entries) { + ats_entries = 0; + } else { + ats_entries = entries; + entries = 0; + } + } + + r = ttm_bo_validate(&bo->tbo, &bo->placement, &ctx); + if (r) + return r; + + if (vmbo->shadow) { + struct amdgpu_bo *shadow = vmbo->shadow; + + r = ttm_bo_validate(&shadow->tbo, &shadow->placement, &ctx); + if (r) + return r; + } + + if (!drm_dev_enter(adev_to_drm(adev), &idx)) + return -ENODEV; + + r = vm->update_funcs->map_table(vmbo); + if (r) + goto exit; + + memset(¶ms, 0, sizeof(params)); + params.adev = adev; + params.vm = vm; + params.immediate = immediate; + + r = vm->update_funcs->prepare(¶ms, NULL, AMDGPU_SYNC_EXPLICIT); + if (r) + goto exit; + + addr = 0; + if (ats_entries) { + uint64_t value = 0, flags; + + flags = AMDGPU_PTE_DEFAULT_ATC; + if (level != AMDGPU_VM_PTB) { + /* Handle leaf PDEs as PTEs */ + flags |= AMDGPU_PDE_PTE; + amdgpu_gmc_get_vm_pde(adev, level, &value, &flags); + } + + r = vm->update_funcs->update(¶ms, vmbo, addr, 0, + ats_entries, value, flags); + if (r) + goto exit; + + addr += ats_entries * 8; + } + + if (entries) { + uint64_t value = 0, flags = 0; + + if (adev->asic_type >= CHIP_VEGA10) { + if (level != AMDGPU_VM_PTB) { + /* Handle leaf PDEs as PTEs */ + flags |= AMDGPU_PDE_PTE; + amdgpu_gmc_get_vm_pde(adev, level, + &value, &flags); + } else { + /* Workaround for fault priority problem on GMC9 */ + flags = AMDGPU_PTE_EXECUTABLE; + } + } + + r = vm->update_funcs->update(¶ms, vmbo, addr, 0, entries, + value, flags); + if (r) + goto exit; + } + + r = vm->update_funcs->commit(¶ms, NULL); +exit: + drm_dev_exit(idx); + return r; +} + +/** + * amdgpu_vm_pt_create - create bo for PD/PT + * + * @adev: amdgpu_device pointer + * @vm: requesting vm + * @level: the page table level + * @immediate: use a immediate update + * @vmbo: pointer to the buffer object pointer + */ +int amdgpu_vm_pt_create(struct amdgpu_device *adev, struct amdgpu_vm *vm, + int level, bool immediate, struct amdgpu_bo_vm **vmbo) +{ + struct amdgpu_bo_param bp; + struct amdgpu_bo *bo; + struct dma_resv *resv; + unsigned int num_entries; + int r; + + memset(&bp, 0, sizeof(bp)); + + bp.size = amdgpu_vm_pt_size(adev, level); + bp.byte_align = AMDGPU_GPU_PAGE_SIZE; + bp.domain = AMDGPU_GEM_DOMAIN_VRAM; + bp.domain = amdgpu_bo_get_preferred_domain(adev, bp.domain); + bp.flags = AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS | + AMDGPU_GEM_CREATE_CPU_GTT_USWC; + + if (level < AMDGPU_VM_PTB) + num_entries = amdgpu_vm_pt_num_entries(adev, level); + else + num_entries = 0; + + bp.bo_ptr_size = struct_size((*vmbo), entries, num_entries); + + if (vm->use_cpu_for_update) + bp.flags |= AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED; + + bp.type = ttm_bo_type_kernel; + bp.no_wait_gpu = immediate; + if (vm->root.bo) + bp.resv = vm->root.bo->tbo.base.resv; + + r = amdgpu_bo_create_vm(adev, &bp, vmbo); + if (r) + return r; + + bo = &(*vmbo)->bo; + if (vm->is_compute_context || (adev->flags & AMD_IS_APU)) { + (*vmbo)->shadow = NULL; + return 0; + } + + if (!bp.resv) + WARN_ON(dma_resv_lock(bo->tbo.base.resv, + NULL)); + resv = bp.resv; + memset(&bp, 0, sizeof(bp)); + bp.size = amdgpu_vm_pt_size(adev, level); + bp.domain = AMDGPU_GEM_DOMAIN_GTT; + bp.flags = AMDGPU_GEM_CREATE_CPU_GTT_USWC; + bp.type = ttm_bo_type_kernel; + bp.resv = bo->tbo.base.resv; + bp.bo_ptr_size = sizeof(struct amdgpu_bo); + + r = amdgpu_bo_create(adev, &bp, &(*vmbo)->shadow); + + if (!resv) + dma_resv_unlock(bo->tbo.base.resv); + + if (r) { + amdgpu_bo_unref(&bo); + return r; + } + + (*vmbo)->shadow->parent = amdgpu_bo_ref(bo); + amdgpu_bo_add_to_shadow_list(*vmbo); + + return 0; +} + +/** + * amdgpu_vm_pt_alloc - Allocate a specific page table + * + * @adev: amdgpu_device pointer + * @vm: VM to allocate page tables for + * @cursor: Which page table to allocate + * @immediate: use an immediate update + * + * Make sure a specific page table or directory is allocated. + * + * Returns: + * 1 if page table needed to be allocated, 0 if page table was already + * allocated, negative errno if an error occurred. + */ +static int amdgpu_vm_pt_alloc(struct amdgpu_device *adev, + struct amdgpu_vm *vm, + struct amdgpu_vm_pt_cursor *cursor, + bool immediate) +{ + struct amdgpu_vm_bo_base *entry = cursor->entry; + struct amdgpu_bo *pt_bo; + struct amdgpu_bo_vm *pt; + int r; + + if (entry->bo) + return 0; + + r = amdgpu_vm_pt_create(adev, vm, cursor->level, immediate, &pt); + if (r) + return r; + + /* Keep a reference to the root directory to avoid + * freeing them up in the wrong order. + */ + pt_bo = &pt->bo; + pt_bo->parent = amdgpu_bo_ref(cursor->parent->bo); + amdgpu_vm_bo_base_init(entry, vm, pt_bo); + r = amdgpu_vm_pt_clear(adev, vm, pt, immediate); + if (r) + goto error_free_pt; + + return 0; + +error_free_pt: + amdgpu_bo_unref(&pt->shadow); + amdgpu_bo_unref(&pt_bo); + return r; +} + +/** + * amdgpu_vm_pt_free - free one PD/PT + * + * @entry: PDE to free + */ +static void amdgpu_vm_pt_free(struct amdgpu_vm_bo_base *entry) +{ + struct amdgpu_bo *shadow; + + if (!entry->bo) + return; + shadow = amdgpu_bo_shadowed(entry->bo); + entry->bo->vm_bo = NULL; + list_del(&entry->vm_status); + amdgpu_bo_unref(&shadow); + amdgpu_bo_unref(&entry->bo); +} + +/** + * amdgpu_vm_pt_free_dfs - free PD/PT levels + * + * @adev: amdgpu device structure + * @vm: amdgpu vm structure + * @start: optional cursor where to start freeing PDs/PTs + * + * Free the page directory or page table level and all sub levels. + */ +static void amdgpu_vm_pt_free_dfs(struct amdgpu_device *adev, + struct amdgpu_vm *vm, + struct amdgpu_vm_pt_cursor *start) +{ + struct amdgpu_vm_pt_cursor cursor; + struct amdgpu_vm_bo_base *entry; + + for_each_amdgpu_vm_pt_dfs_safe(adev, vm, start, cursor, entry) + amdgpu_vm_pt_free(entry); + + if (start) + amdgpu_vm_pt_free(start->entry); +} + +/** + * amdgpu_vm_pt_free_root - free root PD + * @adev: amdgpu device structure + * @vm: amdgpu vm structure + * + * Free the root page directory and everything below it. + */ +void amdgpu_vm_pt_free_root(struct amdgpu_device *adev, struct amdgpu_vm *vm) +{ + amdgpu_vm_pt_free_dfs(adev, vm, NULL); +} + +/** + * amdgpu_vm_pt_is_root_clean - check if a root PD is clean + * + * @adev: amdgpu_device pointer + * @vm: the VM to check + * + * Check all entries of the root PD, if any subsequent PDs are allocated, + * it means there are page table creating and filling, and is no a clean + * VM + * + * Returns: + * 0 if this VM is clean + */ +bool amdgpu_vm_pt_is_root_clean(struct amdgpu_device *adev, + struct amdgpu_vm *vm) +{ + enum amdgpu_vm_level root = adev->vm_manager.root_level; + unsigned int entries = amdgpu_vm_pt_num_entries(adev, root); + unsigned int i = 0; + + for (i = 0; i < entries; i++) { + if (to_amdgpu_bo_vm(vm->root.bo)->entries[i].bo) + return false; + } + return true; +} + +/** + * amdgpu_vm_pde_update - update a single level in the hierarchy + * + * @params: parameters for the update + * @entry: entry to update + * + * Makes sure the requested entry in parent is up to date. + */ +int amdgpu_vm_pde_update(struct amdgpu_vm_update_params *params, + struct amdgpu_vm_bo_base *entry) +{ + struct amdgpu_vm_bo_base *parent = amdgpu_vm_pt_parent(entry); + struct amdgpu_bo *bo = parent->bo, *pbo; + struct amdgpu_vm *vm = params->vm; + uint64_t pde, pt, flags; + unsigned int level; + + for (level = 0, pbo = bo->parent; pbo; ++level) + pbo = pbo->parent; + + level += params->adev->vm_manager.root_level; + amdgpu_gmc_get_pde_for_bo(entry->bo, level, &pt, &flags); + pde = (entry - to_amdgpu_bo_vm(parent->bo)->entries) * 8; + return vm->update_funcs->update(params, to_amdgpu_bo_vm(bo), pde, pt, + 1, 0, flags); +} + +/* + * amdgpu_vm_pte_update_flags - figure out flags for PTE updates + * + * Make sure to set the right flags for the PTEs at the desired level. + */ +static void amdgpu_vm_pte_update_flags(struct amdgpu_vm_update_params *params, + struct amdgpu_bo_vm *pt, + unsigned int level, + uint64_t pe, uint64_t addr, + unsigned int count, uint32_t incr, + uint64_t flags) + +{ + if (level != AMDGPU_VM_PTB) { + flags |= AMDGPU_PDE_PTE; + amdgpu_gmc_get_vm_pde(params->adev, level, &addr, &flags); + + } else if (params->adev->asic_type >= CHIP_VEGA10 && + !(flags & AMDGPU_PTE_VALID) && + !(flags & AMDGPU_PTE_PRT)) { + + /* Workaround for fault priority problem on GMC9 */ + flags |= AMDGPU_PTE_EXECUTABLE; + } + + params->vm->update_funcs->update(params, pt, pe, addr, count, incr, + flags); +} + +/** + * amdgpu_vm_pte_fragment - get fragment for PTEs + * + * @params: see amdgpu_vm_update_params definition + * @start: first PTE to handle + * @end: last PTE to handle + * @flags: hw mapping flags + * @frag: resulting fragment size + * @frag_end: end of this fragment + * + * Returns the first possible fragment for the start and end address. + */ +static void amdgpu_vm_pte_fragment(struct amdgpu_vm_update_params *params, + uint64_t start, uint64_t end, uint64_t flags, + unsigned int *frag, uint64_t *frag_end) +{ + /** + * The MC L1 TLB supports variable sized pages, based on a fragment + * field in the PTE. When this field is set to a non-zero value, page + * granularity is increased from 4KB to (1 << (12 + frag)). The PTE + * flags are considered valid for all PTEs within the fragment range + * and corresponding mappings are assumed to be physically contiguous. + * + * The L1 TLB can store a single PTE for the whole fragment, + * significantly increasing the space available for translation + * caching. This leads to large improvements in throughput when the + * TLB is under pressure. + * + * The L2 TLB distributes small and large fragments into two + * asymmetric partitions. The large fragment cache is significantly + * larger. Thus, we try to use large fragments wherever possible. + * Userspace can support this by aligning virtual base address and + * allocation size to the fragment size. + * + * Starting with Vega10 the fragment size only controls the L1. The L2 + * is now directly feed with small/huge/giant pages from the walker. + */ + unsigned int max_frag; + + if (params->adev->asic_type < CHIP_VEGA10) + max_frag = params->adev->vm_manager.fragment_size; + else + max_frag = 31; + + /* system pages are non continuously */ + if (params->pages_addr) { + *frag = 0; + *frag_end = end; + return; + } + + /* This intentionally wraps around if no bit is set */ + *frag = min_t(unsigned int, ffs(start) - 1, fls64(end - start) - 1); + if (*frag >= max_frag) { + *frag = max_frag; + *frag_end = end & ~((1ULL << max_frag) - 1); + } else { + *frag_end = start + (1 << *frag); + } +} + +/** + * amdgpu_vm_ptes_update - make sure that page tables are valid + * + * @params: see amdgpu_vm_update_params definition + * @start: start of GPU address range + * @end: end of GPU address range + * @dst: destination address to map to, the next dst inside the function + * @flags: mapping flags + * + * Update the page tables in the range @start - @end. + * + * Returns: + * 0 for success, -EINVAL for failure. + */ +int amdgpu_vm_ptes_update(struct amdgpu_vm_update_params *params, + uint64_t start, uint64_t end, + uint64_t dst, uint64_t flags) +{ + struct amdgpu_device *adev = params->adev; + struct amdgpu_vm_pt_cursor cursor; + uint64_t frag_start = start, frag_end; + unsigned int frag; + int r; + + /* figure out the initial fragment */ + amdgpu_vm_pte_fragment(params, frag_start, end, flags, &frag, + &frag_end); + + /* walk over the address space and update the PTs */ + amdgpu_vm_pt_start(adev, params->vm, start, &cursor); + while (cursor.pfn < end) { + unsigned int shift, parent_shift, mask; + uint64_t incr, entry_end, pe_start; + struct amdgpu_bo *pt; + + if (!params->unlocked) { + /* make sure that the page tables covering the + * address range are actually allocated + */ + r = amdgpu_vm_pt_alloc(params->adev, params->vm, + &cursor, params->immediate); + if (r) + return r; + } + + shift = amdgpu_vm_pt_level_shift(adev, cursor.level); + parent_shift = amdgpu_vm_pt_level_shift(adev, cursor.level - 1); + if (params->unlocked) { + /* Unlocked updates are only allowed on the leaves */ + if (amdgpu_vm_pt_descendant(adev, &cursor)) + continue; + } else if (adev->asic_type < CHIP_VEGA10 && + (flags & AMDGPU_PTE_VALID)) { + /* No huge page support before GMC v9 */ + if (cursor.level != AMDGPU_VM_PTB) { + if (!amdgpu_vm_pt_descendant(adev, &cursor)) + return -ENOENT; + continue; + } + } else if (frag < shift) { + /* We can't use this level when the fragment size is + * smaller than the address shift. Go to the next + * child entry and try again. + */ + if (amdgpu_vm_pt_descendant(adev, &cursor)) + continue; + } else if (frag >= parent_shift) { + /* If the fragment size is even larger than the parent + * shift we should go up one level and check it again. + */ + if (!amdgpu_vm_pt_ancestor(&cursor)) + return -EINVAL; + continue; + } + + pt = cursor.entry->bo; + if (!pt) { + /* We need all PDs and PTs for mapping something, */ + if (flags & AMDGPU_PTE_VALID) + return -ENOENT; + + /* but unmapping something can happen at a higher + * level. + */ + if (!amdgpu_vm_pt_ancestor(&cursor)) + return -EINVAL; + + pt = cursor.entry->bo; + shift = parent_shift; + frag_end = max(frag_end, ALIGN(frag_start + 1, + 1ULL << shift)); + } + + /* Looks good so far, calculate parameters for the update */ + incr = (uint64_t)AMDGPU_GPU_PAGE_SIZE << shift; + mask = amdgpu_vm_pt_entries_mask(adev, cursor.level); + pe_start = ((cursor.pfn >> shift) & mask) * 8; + entry_end = ((uint64_t)mask + 1) << shift; + entry_end += cursor.pfn & ~(entry_end - 1); + entry_end = min(entry_end, end); + + do { + struct amdgpu_vm *vm = params->vm; + uint64_t upd_end = min(entry_end, frag_end); + unsigned int nptes = (upd_end - frag_start) >> shift; + uint64_t upd_flags = flags | AMDGPU_PTE_FRAG(frag); + + /* This can happen when we set higher level PDs to + * silent to stop fault floods. + */ + nptes = max(nptes, 1u); + + trace_amdgpu_vm_update_ptes(params, frag_start, upd_end, + min(nptes, 32u), dst, incr, + upd_flags, + vm->task_info.pid, + vm->immediate.fence_context); + amdgpu_vm_pte_update_flags(params, to_amdgpu_bo_vm(pt), + cursor.level, pe_start, dst, + nptes, incr, upd_flags); + + pe_start += nptes * 8; + dst += nptes * incr; + + frag_start = upd_end; + if (frag_start >= frag_end) { + /* figure out the next fragment */ + amdgpu_vm_pte_fragment(params, frag_start, end, + flags, &frag, &frag_end); + if (frag < shift) + break; + } + } while (frag_start < entry_end); + + if (amdgpu_vm_pt_descendant(adev, &cursor)) { + /* Free all child entries. + * Update the tables with the flags and addresses and free up subsequent + * tables in the case of huge pages or freed up areas. + * This is the maximum you can free, because all other page tables are not + * completely covered by the range and so potentially still in use. + */ + while (cursor.pfn < frag_start) { + /* Make sure previous mapping is freed */ + if (cursor.entry->bo) { + params->table_freed = true; + amdgpu_vm_pt_free_dfs(adev, params->vm, + &cursor); + } + amdgpu_vm_pt_next(adev, &cursor); + } + + } else if (frag >= shift) { + /* or just move on to the next on the same level. */ + amdgpu_vm_pt_next(adev, &cursor); + } + } + + return 0; +} diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm_sdma.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm_sdma.c index dbb551762805..1fd3cbca20a2 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm_sdma.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm_sdma.c @@ -109,7 +109,7 @@ static int amdgpu_vm_sdma_commit(struct amdgpu_vm_update_params *p, if (p->unlocked) { struct dma_fence *tmp = dma_fence_get(f); - swap(p->vm->last_unlocked, f); + swap(p->vm->last_unlocked, tmp); dma_fence_put(tmp); } else { amdgpu_bo_fence(p->vm->root.bo, f, true); @@ -204,14 +204,19 @@ static int amdgpu_vm_sdma_update(struct amdgpu_vm_update_params *p, struct amdgpu_bo *bo = &vmbo->bo; enum amdgpu_ib_pool_type pool = p->immediate ? AMDGPU_IB_POOL_IMMEDIATE : AMDGPU_IB_POOL_DELAYED; + struct dma_resv_iter cursor; unsigned int i, ndw, nptes; + struct dma_fence *fence; uint64_t *pte; int r; /* Wait for PD/PT moves to be completed */ - r = amdgpu_sync_fence(&p->job->sync, bo->tbo.moving); - if (r) - return r; + dma_resv_for_each_fence(&cursor, bo->tbo.base.resv, + DMA_RESV_USAGE_KERNEL, fence) { + r = amdgpu_sync_fence(&p->job->sync, fence); + if (r) + return r; + } do { ndw = p->num_dw_left; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c index 0a7611648573..49e4092f447f 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c @@ -32,8 +32,10 @@ #include "atom.h" struct amdgpu_vram_reservation { - struct list_head node; - struct drm_mm_node mm_node; + u64 start; + u64 size; + struct list_head allocated; + struct list_head blocks; }; static inline struct amdgpu_vram_mgr * @@ -186,18 +188,18 @@ const struct attribute_group amdgpu_vram_mgr_attr_group = { }; /** - * amdgpu_vram_mgr_vis_size - Calculate visible node size + * amdgpu_vram_mgr_vis_size - Calculate visible block size * * @adev: amdgpu_device pointer - * @node: MM node structure + * @block: DRM BUDDY block structure * - * Calculate how many bytes of the MM node are inside visible VRAM + * Calculate how many bytes of the DRM BUDDY block are inside visible VRAM */ static u64 amdgpu_vram_mgr_vis_size(struct amdgpu_device *adev, - struct drm_mm_node *node) + struct drm_buddy_block *block) { - uint64_t start = node->start << PAGE_SHIFT; - uint64_t end = (node->size + node->start) << PAGE_SHIFT; + u64 start = amdgpu_vram_mgr_block_start(block); + u64 end = start + amdgpu_vram_mgr_block_size(block); if (start >= adev->gmc.visible_vram_size) return 0; @@ -218,9 +220,9 @@ u64 amdgpu_vram_mgr_bo_visible_size(struct amdgpu_bo *bo) { struct amdgpu_device *adev = amdgpu_ttm_adev(bo->tbo.bdev); struct ttm_resource *res = bo->tbo.resource; - unsigned pages = res->num_pages; - struct drm_mm_node *mm; - u64 usage; + struct amdgpu_vram_mgr_resource *vres = to_amdgpu_vram_mgr_resource(res); + struct drm_buddy_block *block; + u64 usage = 0; if (amdgpu_gmc_vram_full_visible(&adev->gmc)) return amdgpu_bo_size(bo); @@ -228,9 +230,8 @@ u64 amdgpu_vram_mgr_bo_visible_size(struct amdgpu_bo *bo) if (res->start >= adev->gmc.visible_vram_size >> PAGE_SHIFT) return 0; - mm = &container_of(res, struct ttm_range_mgr_node, base)->mm_nodes[0]; - for (usage = 0; pages; pages -= mm->size, mm++) - usage += amdgpu_vram_mgr_vis_size(adev, mm); + list_for_each_entry(block, &vres->blocks, link) + usage += amdgpu_vram_mgr_vis_size(adev, block); return usage; } @@ -240,23 +241,30 @@ static void amdgpu_vram_mgr_do_reserve(struct ttm_resource_manager *man) { struct amdgpu_vram_mgr *mgr = to_vram_mgr(man); struct amdgpu_device *adev = to_amdgpu_device(mgr); - struct drm_mm *mm = &mgr->mm; + struct drm_buddy *mm = &mgr->mm; struct amdgpu_vram_reservation *rsv, *temp; + struct drm_buddy_block *block; uint64_t vis_usage; - list_for_each_entry_safe(rsv, temp, &mgr->reservations_pending, node) { - if (drm_mm_reserve_node(mm, &rsv->mm_node)) + list_for_each_entry_safe(rsv, temp, &mgr->reservations_pending, blocks) { + if (drm_buddy_alloc_blocks(mm, rsv->start, rsv->start + rsv->size, + rsv->size, mm->chunk_size, &rsv->allocated, + DRM_BUDDY_RANGE_ALLOCATION)) + continue; + + block = amdgpu_vram_mgr_first_block(&rsv->allocated); + if (!block) continue; dev_dbg(adev->dev, "Reservation 0x%llx - %lld, Succeeded\n", - rsv->mm_node.start, rsv->mm_node.size); + rsv->start, rsv->size); - vis_usage = amdgpu_vram_mgr_vis_size(adev, &rsv->mm_node); + vis_usage = amdgpu_vram_mgr_vis_size(adev, block); atomic64_add(vis_usage, &mgr->vis_usage); spin_lock(&man->bdev->lru_lock); - man->usage += rsv->mm_node.size << PAGE_SHIFT; + man->usage += rsv->size; spin_unlock(&man->bdev->lru_lock); - list_move(&rsv->node, &mgr->reserved_pages); + list_move(&rsv->blocks, &mgr->reserved_pages); } } @@ -278,14 +286,16 @@ int amdgpu_vram_mgr_reserve_range(struct amdgpu_vram_mgr *mgr, if (!rsv) return -ENOMEM; - INIT_LIST_HEAD(&rsv->node); - rsv->mm_node.start = start >> PAGE_SHIFT; - rsv->mm_node.size = size >> PAGE_SHIFT; + INIT_LIST_HEAD(&rsv->allocated); + INIT_LIST_HEAD(&rsv->blocks); - spin_lock(&mgr->lock); - list_add_tail(&rsv->node, &mgr->reservations_pending); + rsv->start = start; + rsv->size = size; + + mutex_lock(&mgr->lock); + list_add_tail(&rsv->blocks, &mgr->reservations_pending); amdgpu_vram_mgr_do_reserve(&mgr->manager); - spin_unlock(&mgr->lock); + mutex_unlock(&mgr->lock); return 0; } @@ -307,19 +317,19 @@ int amdgpu_vram_mgr_query_page_status(struct amdgpu_vram_mgr *mgr, struct amdgpu_vram_reservation *rsv; int ret; - spin_lock(&mgr->lock); + mutex_lock(&mgr->lock); - list_for_each_entry(rsv, &mgr->reservations_pending, node) { - if ((rsv->mm_node.start <= start) && - (start < (rsv->mm_node.start + rsv->mm_node.size))) { + list_for_each_entry(rsv, &mgr->reservations_pending, blocks) { + if (rsv->start <= start && + (start < (rsv->start + rsv->size))) { ret = -EBUSY; goto out; } } - list_for_each_entry(rsv, &mgr->reserved_pages, node) { - if ((rsv->mm_node.start <= start) && - (start < (rsv->mm_node.start + rsv->mm_node.size))) { + list_for_each_entry(rsv, &mgr->reserved_pages, blocks) { + if (rsv->start <= start && + (start < (rsv->start + rsv->size))) { ret = 0; goto out; } @@ -327,33 +337,11 @@ int amdgpu_vram_mgr_query_page_status(struct amdgpu_vram_mgr *mgr, ret = -ENOENT; out: - spin_unlock(&mgr->lock); + mutex_unlock(&mgr->lock); return ret; } /** - * amdgpu_vram_mgr_virt_start - update virtual start address - * - * @mem: ttm_resource to update - * @node: just allocated node - * - * Calculate a virtual BO start address to easily check if everything is CPU - * accessible. - */ -static void amdgpu_vram_mgr_virt_start(struct ttm_resource *mem, - struct drm_mm_node *node) -{ - unsigned long start; - - start = node->start + node->size; - if (start > mem->num_pages) - start -= mem->num_pages; - else - start = 0; - mem->start = max(mem->start, start); -} - -/** * amdgpu_vram_mgr_new - allocate new ranges * * @man: TTM memory type manager @@ -368,46 +356,44 @@ static int amdgpu_vram_mgr_new(struct ttm_resource_manager *man, const struct ttm_place *place, struct ttm_resource **res) { - unsigned long lpfn, num_nodes, pages_per_node, pages_left, pages; + u64 vis_usage = 0, max_bytes, cur_size, min_block_size; struct amdgpu_vram_mgr *mgr = to_vram_mgr(man); struct amdgpu_device *adev = to_amdgpu_device(mgr); - uint64_t vis_usage = 0, mem_bytes, max_bytes; - struct ttm_range_mgr_node *node; - struct drm_mm *mm = &mgr->mm; - enum drm_mm_insert_mode mode; - unsigned i; + struct amdgpu_vram_mgr_resource *vres; + u64 size, remaining_size, lpfn, fpfn; + struct drm_buddy *mm = &mgr->mm; + struct drm_buddy_block *block; + unsigned long pages_per_block; int r; - lpfn = place->lpfn; + lpfn = place->lpfn << PAGE_SHIFT; if (!lpfn) - lpfn = man->size >> PAGE_SHIFT; + lpfn = man->size; + + fpfn = place->fpfn << PAGE_SHIFT; max_bytes = adev->gmc.mc_vram_size; if (tbo->type != ttm_bo_type_kernel) max_bytes -= AMDGPU_VM_RESERVED_VRAM; - mem_bytes = tbo->base.size; if (place->flags & TTM_PL_FLAG_CONTIGUOUS) { - pages_per_node = ~0ul; - num_nodes = 1; + pages_per_block = ~0ul; } else { #ifdef CONFIG_TRANSPARENT_HUGEPAGE - pages_per_node = HPAGE_PMD_NR; + pages_per_block = HPAGE_PMD_NR; #else /* default to 2MB */ - pages_per_node = 2UL << (20UL - PAGE_SHIFT); + pages_per_block = 2UL << (20UL - PAGE_SHIFT); #endif - pages_per_node = max_t(uint32_t, pages_per_node, - tbo->page_alignment); - num_nodes = DIV_ROUND_UP_ULL(PFN_UP(mem_bytes), pages_per_node); + pages_per_block = max_t(uint32_t, pages_per_block, + tbo->page_alignment); } - node = kvmalloc(struct_size(node, mm_nodes, num_nodes), - GFP_KERNEL | __GFP_ZERO); - if (!node) + vres = kzalloc(sizeof(*vres), GFP_KERNEL); + if (!vres) return -ENOMEM; - ttm_resource_init(tbo, place, &node->base); + ttm_resource_init(tbo, place, &vres->base); /* bail out quickly if there's likely not enough VRAM for this BO */ if (ttm_resource_manager_usage(man) > max_bytes) { @@ -415,66 +401,130 @@ static int amdgpu_vram_mgr_new(struct ttm_resource_manager *man, goto error_fini; } - mode = DRM_MM_INSERT_BEST; + INIT_LIST_HEAD(&vres->blocks); + if (place->flags & TTM_PL_FLAG_TOPDOWN) - mode = DRM_MM_INSERT_HIGH; - - pages_left = node->base.num_pages; - - /* Limit maximum size to 2GB due to SG table limitations */ - pages = min(pages_left, 2UL << (30 - PAGE_SHIFT)); - - i = 0; - spin_lock(&mgr->lock); - while (pages_left) { - uint32_t alignment = tbo->page_alignment; - - if (pages >= pages_per_node) - alignment = pages_per_node; - - r = drm_mm_insert_node_in_range(mm, &node->mm_nodes[i], pages, - alignment, 0, place->fpfn, - lpfn, mode); - if (unlikely(r)) { - if (pages > pages_per_node) { - if (is_power_of_2(pages)) - pages = pages / 2; - else - pages = rounddown_pow_of_two(pages); - continue; + vres->flags |= DRM_BUDDY_TOPDOWN_ALLOCATION; + + if (fpfn || lpfn != man->size) + /* Allocate blocks in desired range */ + vres->flags |= DRM_BUDDY_RANGE_ALLOCATION; + + remaining_size = vres->base.num_pages << PAGE_SHIFT; + + mutex_lock(&mgr->lock); + while (remaining_size) { + if (tbo->page_alignment) + min_block_size = tbo->page_alignment << PAGE_SHIFT; + else + min_block_size = mgr->default_page_size; + + BUG_ON(min_block_size < mm->chunk_size); + + /* Limit maximum size to 2GiB due to SG table limitations */ + size = min(remaining_size, 2ULL << 30); + + if (size >= pages_per_block << PAGE_SHIFT) + min_block_size = pages_per_block << PAGE_SHIFT; + + cur_size = size; + + if (fpfn + size != place->lpfn << PAGE_SHIFT) { + /* + * Except for actual range allocation, modify the size and + * min_block_size conforming to continuous flag enablement + */ + if (place->flags & TTM_PL_FLAG_CONTIGUOUS) { + size = roundup_pow_of_two(size); + min_block_size = size; + /* + * Modify the size value if size is not + * aligned with min_block_size + */ + } else if (!IS_ALIGNED(size, min_block_size)) { + size = round_up(size, min_block_size); } - goto error_free; } - vis_usage += amdgpu_vram_mgr_vis_size(adev, &node->mm_nodes[i]); - amdgpu_vram_mgr_virt_start(&node->base, &node->mm_nodes[i]); - pages_left -= pages; - ++i; + r = drm_buddy_alloc_blocks(mm, fpfn, + lpfn, + size, + min_block_size, + &vres->blocks, + vres->flags); + if (unlikely(r)) + goto error_free_blocks; + + if (size > remaining_size) + remaining_size = 0; + else + remaining_size -= size; + } + mutex_unlock(&mgr->lock); + + if (cur_size != size) { + struct drm_buddy_block *block; + struct list_head *trim_list; + u64 original_size; + LIST_HEAD(temp); + + trim_list = &vres->blocks; + original_size = vres->base.num_pages << PAGE_SHIFT; + + /* + * If size value is rounded up to min_block_size, trim the last + * block to the required size + */ + if (!list_is_singular(&vres->blocks)) { + block = list_last_entry(&vres->blocks, typeof(*block), link); + list_move_tail(&block->link, &temp); + trim_list = &temp; + /* + * Compute the original_size value by subtracting the + * last block size with (aligned size - original size) + */ + original_size = amdgpu_vram_mgr_block_size(block) - (size - cur_size); + } - if (pages > pages_left) - pages = pages_left; + mutex_lock(&mgr->lock); + drm_buddy_block_trim(mm, + original_size, + trim_list); + mutex_unlock(&mgr->lock); + + if (!list_empty(&temp)) + list_splice_tail(trim_list, &vres->blocks); + } + + list_for_each_entry(block, &vres->blocks, link) + vis_usage += amdgpu_vram_mgr_vis_size(adev, block); + + block = amdgpu_vram_mgr_first_block(&vres->blocks); + if (!block) { + r = -EINVAL; + goto error_fini; } - spin_unlock(&mgr->lock); - if (i == 1) - node->base.placement |= TTM_PL_FLAG_CONTIGUOUS; + vres->base.start = amdgpu_vram_mgr_block_start(block) >> PAGE_SHIFT; + + if (amdgpu_is_vram_mgr_blocks_contiguous(&vres->blocks)) + vres->base.placement |= TTM_PL_FLAG_CONTIGUOUS; if (adev->gmc.xgmi.connected_to_cpu) - node->base.bus.caching = ttm_cached; + vres->base.bus.caching = ttm_cached; else - node->base.bus.caching = ttm_write_combined; + vres->base.bus.caching = ttm_write_combined; atomic64_add(vis_usage, &mgr->vis_usage); - *res = &node->base; + *res = &vres->base; return 0; -error_free: - while (i--) - drm_mm_remove_node(&node->mm_nodes[i]); - spin_unlock(&mgr->lock); +error_free_blocks: + drm_buddy_free_list(mm, &vres->blocks); + mutex_unlock(&mgr->lock); error_fini: - ttm_resource_fini(man, &node->base); - kvfree(node); + ttm_resource_fini(man, &vres->base); + kfree(vres); return r; } @@ -490,27 +540,26 @@ error_fini: static void amdgpu_vram_mgr_del(struct ttm_resource_manager *man, struct ttm_resource *res) { - struct ttm_range_mgr_node *node = to_ttm_range_mgr_node(res); + struct amdgpu_vram_mgr_resource *vres = to_amdgpu_vram_mgr_resource(res); struct amdgpu_vram_mgr *mgr = to_vram_mgr(man); struct amdgpu_device *adev = to_amdgpu_device(mgr); + struct drm_buddy *mm = &mgr->mm; + struct drm_buddy_block *block; uint64_t vis_usage = 0; - unsigned i, pages; - spin_lock(&mgr->lock); - for (i = 0, pages = res->num_pages; pages; - pages -= node->mm_nodes[i].size, ++i) { - struct drm_mm_node *mm = &node->mm_nodes[i]; + mutex_lock(&mgr->lock); + list_for_each_entry(block, &vres->blocks, link) + vis_usage += amdgpu_vram_mgr_vis_size(adev, block); - drm_mm_remove_node(mm); - vis_usage += amdgpu_vram_mgr_vis_size(adev, mm); - } amdgpu_vram_mgr_do_reserve(man); - spin_unlock(&mgr->lock); + + drm_buddy_free_list(mm, &vres->blocks); + mutex_unlock(&mgr->lock); atomic64_sub(vis_usage, &mgr->vis_usage); ttm_resource_fini(man, res); - kvfree(node); + kfree(vres); } /** @@ -542,7 +591,7 @@ int amdgpu_vram_mgr_alloc_sgt(struct amdgpu_device *adev, if (!*sgt) return -ENOMEM; - /* Determine the number of DRM_MM nodes to export */ + /* Determine the number of DRM_BUDDY blocks to export */ amdgpu_res_first(res, offset, length, &cursor); while (cursor.remaining) { num_entries++; @@ -558,10 +607,10 @@ int amdgpu_vram_mgr_alloc_sgt(struct amdgpu_device *adev, sg->length = 0; /* - * Walk down DRM_MM nodes to populate scatterlist nodes - * @note: Use iterator api to get first the DRM_MM node + * Walk down DRM_BUDDY blocks to populate scatterlist nodes + * @note: Use iterator api to get first the DRM_BUDDY block * and the number of bytes from it. Access the following - * DRM_MM node(s) if more buffer needs to exported + * DRM_BUDDY block(s) if more buffer needs to exported */ amdgpu_res_first(res, offset, length, &cursor); for_each_sgtable_sg((*sgt), sg, i) { @@ -648,13 +697,22 @@ static void amdgpu_vram_mgr_debug(struct ttm_resource_manager *man, struct drm_printer *printer) { struct amdgpu_vram_mgr *mgr = to_vram_mgr(man); + struct drm_buddy *mm = &mgr->mm; + struct drm_buddy_block *block; drm_printf(printer, " vis usage:%llu\n", amdgpu_vram_mgr_vis_usage(mgr)); - spin_lock(&mgr->lock); - drm_mm_print(&mgr->mm, printer); - spin_unlock(&mgr->lock); + mutex_lock(&mgr->lock); + drm_printf(printer, "default_page_size: %lluKiB\n", + mgr->default_page_size >> 10); + + drm_buddy_print(mm, printer); + + drm_printf(printer, "reserved:\n"); + list_for_each_entry(block, &mgr->reserved_pages, link) + drm_buddy_block_print(mm, block, printer); + mutex_unlock(&mgr->lock); } static const struct ttm_resource_manager_func amdgpu_vram_mgr_func = { @@ -674,16 +732,21 @@ int amdgpu_vram_mgr_init(struct amdgpu_device *adev) { struct amdgpu_vram_mgr *mgr = &adev->mman.vram_mgr; struct ttm_resource_manager *man = &mgr->manager; + int err; ttm_resource_manager_init(man, &adev->mman.bdev, adev->gmc.real_vram_size); man->func = &amdgpu_vram_mgr_func; - drm_mm_init(&mgr->mm, 0, man->size >> PAGE_SHIFT); - spin_lock_init(&mgr->lock); + err = drm_buddy_init(&mgr->mm, man->size, PAGE_SIZE); + if (err) + return err; + + mutex_init(&mgr->lock); INIT_LIST_HEAD(&mgr->reservations_pending); INIT_LIST_HEAD(&mgr->reserved_pages); + mgr->default_page_size = PAGE_SIZE; ttm_set_driver_manager(&adev->mman.bdev, TTM_PL_VRAM, &mgr->manager); ttm_resource_manager_set_used(man, true); @@ -711,16 +774,16 @@ void amdgpu_vram_mgr_fini(struct amdgpu_device *adev) if (ret) return; - spin_lock(&mgr->lock); - list_for_each_entry_safe(rsv, temp, &mgr->reservations_pending, node) + mutex_lock(&mgr->lock); + list_for_each_entry_safe(rsv, temp, &mgr->reservations_pending, blocks) kfree(rsv); - list_for_each_entry_safe(rsv, temp, &mgr->reserved_pages, node) { - drm_mm_remove_node(&rsv->mm_node); + list_for_each_entry_safe(rsv, temp, &mgr->reserved_pages, blocks) { + drm_buddy_free_list(&mgr->mm, &rsv->blocks); kfree(rsv); } - drm_mm_takedown(&mgr->mm); - spin_unlock(&mgr->lock); + drm_buddy_fini(&mgr->mm); + mutex_unlock(&mgr->lock); ttm_resource_manager_cleanup(man); ttm_set_driver_manager(&adev->mman.bdev, TTM_PL_VRAM, NULL); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.h new file mode 100644 index 000000000000..9a2db87186c7 --- /dev/null +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.h @@ -0,0 +1,89 @@ +/* SPDX-License-Identifier: MIT + * Copyright 2021 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + */ + +#ifndef __AMDGPU_VRAM_MGR_H__ +#define __AMDGPU_VRAM_MGR_H__ + +#include <drm/drm_buddy.h> + +struct amdgpu_vram_mgr { + struct ttm_resource_manager manager; + struct drm_buddy mm; + /* protects access to buffer objects */ + struct mutex lock; + struct list_head reservations_pending; + struct list_head reserved_pages; + atomic64_t vis_usage; + u64 default_page_size; +}; + +struct amdgpu_vram_mgr_resource { + struct ttm_resource base; + struct list_head blocks; + unsigned long flags; +}; + +static inline u64 amdgpu_vram_mgr_block_start(struct drm_buddy_block *block) +{ + return drm_buddy_block_offset(block); +} + +static inline u64 amdgpu_vram_mgr_block_size(struct drm_buddy_block *block) +{ + return PAGE_SIZE << drm_buddy_block_order(block); +} + +static inline struct drm_buddy_block * +amdgpu_vram_mgr_first_block(struct list_head *list) +{ + return list_first_entry_or_null(list, struct drm_buddy_block, link); +} + +static inline bool amdgpu_is_vram_mgr_blocks_contiguous(struct list_head *head) +{ + struct drm_buddy_block *block; + u64 start, size; + + block = amdgpu_vram_mgr_first_block(head); + if (!block) + return false; + + while (head != block->link.next) { + start = amdgpu_vram_mgr_block_start(block); + size = amdgpu_vram_mgr_block_size(block); + + block = list_entry(block->link.next, struct drm_buddy_block, link); + if (start + size != amdgpu_vram_mgr_block_start(block)) + return false; + } + + return true; +} + +static inline struct amdgpu_vram_mgr_resource * +to_amdgpu_vram_mgr_resource(struct ttm_resource *res) +{ + return container_of(res, struct amdgpu_vram_mgr_resource, base); +} + +#endif diff --git a/drivers/gpu/drm/amd/amdgpu/amdgv_sriovmsg.h b/drivers/gpu/drm/amd/amdgpu/amdgv_sriovmsg.h index 7326b6c1b71c..e78e4c27b62a 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgv_sriovmsg.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgv_sriovmsg.h @@ -1,34 +1,33 @@ /* - * Copyright 2018-2019 Advanced Micro Devices, Inc. + * Copyright (c) 2018-2021 Advanced Micro Devices, Inc. All rights reserved. * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: * * The above copyright notice and this permission notice shall be included in * all copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR - * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, - * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR - * OTHER DEALINGS IN THE SOFTWARE. - * + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. */ #ifndef AMDGV_SRIOV_MSG__H_ #define AMDGV_SRIOV_MSG__H_ /* unit in kilobytes */ -#define AMD_SRIOV_MSG_VBIOS_OFFSET 0 -#define AMD_SRIOV_MSG_VBIOS_SIZE_KB 64 -#define AMD_SRIOV_MSG_DATAEXCHANGE_OFFSET_KB AMD_SRIOV_MSG_VBIOS_SIZE_KB -#define AMD_SRIOV_MSG_DATAEXCHANGE_SIZE_KB 4 +#define AMD_SRIOV_MSG_VBIOS_OFFSET 0 +#define AMD_SRIOV_MSG_VBIOS_SIZE_KB 64 +#define AMD_SRIOV_MSG_DATAEXCHANGE_OFFSET_KB AMD_SRIOV_MSG_VBIOS_SIZE_KB +#define AMD_SRIOV_MSG_DATAEXCHANGE_SIZE_KB 4 /* * layout @@ -51,10 +50,10 @@ * v2 defined in amdgim * v3 current */ -#define AMD_SRIOV_MSG_FW_VRAM_PF2VF_VER 2 -#define AMD_SRIOV_MSG_FW_VRAM_VF2PF_VER 3 +#define AMD_SRIOV_MSG_FW_VRAM_PF2VF_VER 2 +#define AMD_SRIOV_MSG_FW_VRAM_VF2PF_VER 3 -#define AMD_SRIOV_MSG_RESERVE_UCODE 24 +#define AMD_SRIOV_MSG_RESERVE_UCODE 24 #define AMD_SRIOV_MSG_RESERVE_VCN_INST 4 @@ -83,19 +82,19 @@ enum amd_sriov_ucode_engine_id { AMD_SRIOV_UCODE_ID__MAX }; -#pragma pack(push, 1) // PF2VF / VF2PF data areas are byte packed +#pragma pack(push, 1) // PF2VF / VF2PF data areas are byte packed union amd_sriov_msg_feature_flags { struct { - uint32_t error_log_collect : 1; - uint32_t host_load_ucodes : 1; - uint32_t host_flr_vramlost : 1; - uint32_t mm_bw_management : 1; - uint32_t pp_one_vf_mode : 1; - uint32_t reg_indirect_acc : 1; - uint32_t reserved : 26; + uint32_t error_log_collect : 1; + uint32_t host_load_ucodes : 1; + uint32_t host_flr_vramlost : 1; + uint32_t mm_bw_management : 1; + uint32_t pp_one_vf_mode : 1; + uint32_t reg_indirect_acc : 1; + uint32_t reserved : 26; } flags; - uint32_t all; + uint32_t all; }; union amd_sriov_reg_access_flags { @@ -110,10 +109,10 @@ union amd_sriov_reg_access_flags { union amd_sriov_msg_os_info { struct { - uint32_t windows : 1; - uint32_t reserved : 31; + uint32_t windows : 1; + uint32_t reserved : 31; } info; - uint32_t all; + uint32_t all; }; struct amd_sriov_msg_uuid_info { @@ -156,6 +155,7 @@ struct amd_sriov_msg_pf2vf_info_header { uint32_t reserved[2]; }; +#define AMD_SRIOV_MSG_PF2VF_INFO_FILLED_SIZE (48) struct amd_sriov_msg_pf2vf_info { /* header contains size and version */ struct amd_sriov_msg_pf2vf_info_header header; @@ -204,10 +204,10 @@ struct amd_sriov_msg_pf2vf_info { } mm_bw_management[AMD_SRIOV_MSG_RESERVE_VCN_INST]; /* UUID info */ struct amd_sriov_msg_uuid_info uuid_info; - /* pcie atomic Ops info */ - uint32_t pcie_atomic_ops_enabled_flags; + /* PCIE atomic ops support flag */ + uint32_t pcie_atomic_ops_support_flags; /* reserved */ - uint32_t reserved[256 - 48]; + uint32_t reserved[256 - AMD_SRIOV_MSG_PF2VF_INFO_FILLED_SIZE]; }; struct amd_sriov_msg_vf2pf_info_header { @@ -219,12 +219,13 @@ struct amd_sriov_msg_vf2pf_info_header { uint32_t reserved[2]; }; +#define AMD_SRIOV_MSG_VF2PF_INFO_FILLED_SIZE (70) struct amd_sriov_msg_vf2pf_info { /* header contains size and version */ struct amd_sriov_msg_vf2pf_info_header header; uint32_t checksum; /* driver version */ - uint8_t driver_version[64]; + uint8_t driver_version[64]; /* driver certification, 1=WHQL, 0=None */ uint32_t driver_cert; /* guest OS type and version */ @@ -258,13 +259,13 @@ struct amd_sriov_msg_vf2pf_info { uint32_t fb_size; /* guest ucode data, each one is 1.25 Dword */ struct { - uint8_t id; + uint8_t id; uint32_t version; } ucode_info[AMD_SRIOV_MSG_RESERVE_UCODE]; uint64_t dummy_page_addr; /* reserved */ - uint32_t reserved[256-70]; + uint32_t reserved[256 - AMD_SRIOV_MSG_VF2PF_INFO_FILLED_SIZE]; }; /* mailbox message send from guest to host */ @@ -276,7 +277,7 @@ enum amd_sriov_mailbox_request_message { MB_REQ_MSG_REQ_GPU_RESET_ACCESS, MB_REQ_MSG_REQ_GPU_INIT_DATA, - MB_REQ_MSG_LOG_VF_ERROR = 200, + MB_REQ_MSG_LOG_VF_ERROR = 200, }; /* mailbox message send from host to guest */ @@ -298,17 +299,15 @@ enum amd_sriov_gpu_init_data_version { GPU_INIT_DATA_READY_V1 = 1, }; -#pragma pack(pop) // Restore previous packing option +#pragma pack(pop) // Restore previous packing option /* checksum function between host and guest */ -unsigned int amd_sriov_msg_checksum(void *obj, - unsigned long obj_size, - unsigned int key, - unsigned int checksum); +unsigned int amd_sriov_msg_checksum(void *obj, unsigned long obj_size, unsigned int key, + unsigned int checksum); /* assertion at compile time */ #ifdef __linux__ -#define stringification(s) _stringification(s) +#define stringification(s) _stringification(s) #define _stringification(s) #s _Static_assert( @@ -319,13 +318,11 @@ _Static_assert( sizeof(struct amd_sriov_msg_pf2vf_info) == AMD_SRIOV_MSG_SIZE_KB << 10, "amd_sriov_msg_pf2vf_info must be " stringification(AMD_SRIOV_MSG_SIZE_KB) " KB"); -_Static_assert( - AMD_SRIOV_MSG_RESERVE_UCODE % 4 == 0, - "AMD_SRIOV_MSG_RESERVE_UCODE must be multiple of 4"); +_Static_assert(AMD_SRIOV_MSG_RESERVE_UCODE % 4 == 0, + "AMD_SRIOV_MSG_RESERVE_UCODE must be multiple of 4"); -_Static_assert( - AMD_SRIOV_MSG_RESERVE_UCODE > AMD_SRIOV_UCODE_ID__MAX, - "AMD_SRIOV_MSG_RESERVE_UCODE must be bigger than AMD_SRIOV_UCODE_ID__MAX"); +_Static_assert(AMD_SRIOV_MSG_RESERVE_UCODE > AMD_SRIOV_UCODE_ID__MAX, + "AMD_SRIOV_MSG_RESERVE_UCODE must be bigger than AMD_SRIOV_UCODE_ID__MAX"); #undef _stringification #undef stringification diff --git a/drivers/gpu/drm/amd/amdgpu/athub_v1_0.c b/drivers/gpu/drm/amd/amdgpu/athub_v1_0.c index 88642e7ecdf4..a13c443ea10f 100644 --- a/drivers/gpu/drm/amd/amdgpu/athub_v1_0.c +++ b/drivers/gpu/drm/amd/amdgpu/athub_v1_0.c @@ -87,7 +87,7 @@ int athub_v1_0_set_clockgating(struct amdgpu_device *adev, return 0; } -void athub_v1_0_get_clockgating(struct amdgpu_device *adev, u32 *flags) +void athub_v1_0_get_clockgating(struct amdgpu_device *adev, u64 *flags) { int data; diff --git a/drivers/gpu/drm/amd/amdgpu/athub_v1_0.h b/drivers/gpu/drm/amd/amdgpu/athub_v1_0.h index b279af59e34f..6be0a6704ea7 100644 --- a/drivers/gpu/drm/amd/amdgpu/athub_v1_0.h +++ b/drivers/gpu/drm/amd/amdgpu/athub_v1_0.h @@ -25,6 +25,6 @@ int athub_v1_0_set_clockgating(struct amdgpu_device *adev, enum amd_clockgating_state state); -void athub_v1_0_get_clockgating(struct amdgpu_device *adev, u32 *flags); +void athub_v1_0_get_clockgating(struct amdgpu_device *adev, u64 *flags); #endif diff --git a/drivers/gpu/drm/amd/amdgpu/athub_v2_0.c b/drivers/gpu/drm/amd/amdgpu/athub_v2_0.c index a720436857b4..a9521c98e7f7 100644 --- a/drivers/gpu/drm/amd/amdgpu/athub_v2_0.c +++ b/drivers/gpu/drm/amd/amdgpu/athub_v2_0.c @@ -93,7 +93,7 @@ int athub_v2_0_set_clockgating(struct amdgpu_device *adev, return 0; } -void athub_v2_0_get_clockgating(struct amdgpu_device *adev, u32 *flags) +void athub_v2_0_get_clockgating(struct amdgpu_device *adev, u64 *flags) { int data; diff --git a/drivers/gpu/drm/amd/amdgpu/athub_v2_0.h b/drivers/gpu/drm/amd/amdgpu/athub_v2_0.h index 02932c1c8bab..8b763f6dfd81 100644 --- a/drivers/gpu/drm/amd/amdgpu/athub_v2_0.h +++ b/drivers/gpu/drm/amd/amdgpu/athub_v2_0.h @@ -25,6 +25,6 @@ int athub_v2_0_set_clockgating(struct amdgpu_device *adev, enum amd_clockgating_state state); -void athub_v2_0_get_clockgating(struct amdgpu_device *adev, u32 *flags); +void athub_v2_0_get_clockgating(struct amdgpu_device *adev, u64 *flags); #endif diff --git a/drivers/gpu/drm/amd/amdgpu/athub_v2_1.c b/drivers/gpu/drm/amd/amdgpu/athub_v2_1.c index ad8e87d3d2cb..78508ae6a670 100644 --- a/drivers/gpu/drm/amd/amdgpu/athub_v2_1.c +++ b/drivers/gpu/drm/amd/amdgpu/athub_v2_1.c @@ -85,7 +85,7 @@ int athub_v2_1_set_clockgating(struct amdgpu_device *adev, return 0; } -void athub_v2_1_get_clockgating(struct amdgpu_device *adev, u32 *flags) +void athub_v2_1_get_clockgating(struct amdgpu_device *adev, u64 *flags) { int data; diff --git a/drivers/gpu/drm/amd/amdgpu/athub_v2_1.h b/drivers/gpu/drm/amd/amdgpu/athub_v2_1.h index 5e6824c0f591..b799f14bce03 100644 --- a/drivers/gpu/drm/amd/amdgpu/athub_v2_1.h +++ b/drivers/gpu/drm/amd/amdgpu/athub_v2_1.h @@ -25,6 +25,6 @@ int athub_v2_1_set_clockgating(struct amdgpu_device *adev, enum amd_clockgating_state state); -void athub_v2_1_get_clockgating(struct amdgpu_device *adev, u32 *flags); +void athub_v2_1_get_clockgating(struct amdgpu_device *adev, u64 *flags); #endif diff --git a/drivers/gpu/drm/amd/amdgpu/atombios_encoders.c b/drivers/gpu/drm/amd/amdgpu/atombios_encoders.c index a92d86e12718..d4f5a584075d 100644 --- a/drivers/gpu/drm/amd/amdgpu/atombios_encoders.c +++ b/drivers/gpu/drm/amd/amdgpu/atombios_encoders.c @@ -765,7 +765,6 @@ amdgpu_atombios_encoder_setup_dig_transmitter(struct drm_encoder *encoder, int a int dp_clock = 0; int dp_lane_count = 0; int connector_object_id = 0; - int igp_lane_info = 0; int dig_encoder = dig->dig_encoder; int hpd_id = AMDGPU_HPD_NONE; @@ -848,26 +847,6 @@ amdgpu_atombios_encoder_setup_dig_transmitter(struct drm_encoder *encoder, int a else args.v1.ucConfig |= ATOM_TRANSMITTER_CONFIG_DIG1_ENCODER; - if ((adev->flags & AMD_IS_APU) && - (amdgpu_encoder->encoder_id == ENCODER_OBJECT_ID_INTERNAL_UNIPHY)) { - if (is_dp || - !amdgpu_dig_monitor_is_duallink(encoder, amdgpu_encoder->pixel_clock)) { - if (igp_lane_info & 0x1) - args.v1.ucConfig |= ATOM_TRANSMITTER_CONFIG_LANE_0_3; - else if (igp_lane_info & 0x2) - args.v1.ucConfig |= ATOM_TRANSMITTER_CONFIG_LANE_4_7; - else if (igp_lane_info & 0x4) - args.v1.ucConfig |= ATOM_TRANSMITTER_CONFIG_LANE_8_11; - else if (igp_lane_info & 0x8) - args.v1.ucConfig |= ATOM_TRANSMITTER_CONFIG_LANE_12_15; - } else { - if (igp_lane_info & 0x3) - args.v1.ucConfig |= ATOM_TRANSMITTER_CONFIG_LANE_0_7; - else if (igp_lane_info & 0xc) - args.v1.ucConfig |= ATOM_TRANSMITTER_CONFIG_LANE_8_15; - } - } - if (dig->linkb) args.v1.ucConfig |= ATOM_TRANSMITTER_CONFIG_LINKB; else diff --git a/drivers/gpu/drm/amd/amdgpu/df_v1_7.c b/drivers/gpu/drm/amd/amdgpu/df_v1_7.c index 2d01ac0d4c11..b991609f46c1 100644 --- a/drivers/gpu/drm/amd/amdgpu/df_v1_7.c +++ b/drivers/gpu/drm/amd/amdgpu/df_v1_7.c @@ -99,7 +99,7 @@ static void df_v1_7_update_medium_grain_clock_gating(struct amdgpu_device *adev, } static void df_v1_7_get_clockgating_state(struct amdgpu_device *adev, - u32 *flags) + u64 *flags) { u32 tmp; diff --git a/drivers/gpu/drm/amd/amdgpu/df_v3_6.c b/drivers/gpu/drm/amd/amdgpu/df_v3_6.c index f4dfca013ec5..483a441b46aa 100644 --- a/drivers/gpu/drm/amd/amdgpu/df_v3_6.c +++ b/drivers/gpu/drm/amd/amdgpu/df_v3_6.c @@ -332,7 +332,7 @@ static void df_v3_6_update_medium_grain_clock_gating(struct amdgpu_device *adev, } static void df_v3_6_get_clockgating_state(struct amdgpu_device *adev, - u32 *flags) + u64 *flags) { u32 tmp; diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c index 9426e252d8aa..54446162db8b 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c @@ -4741,7 +4741,7 @@ static int gfx_v10_0_compute_ring_init(struct amdgpu_device *adev, int ring_id, + ((ring->me - 1) * adev->gfx.mec.num_pipe_per_mec) + ring->pipe; hw_prio = amdgpu_gfx_is_high_priority_compute_queue(adev, ring) ? - AMDGPU_GFX_PIPE_PRIO_HIGH : AMDGPU_GFX_PIPE_PRIO_NORMAL; + AMDGPU_RING_PRIO_2 : AMDGPU_RING_PRIO_DEFAULT; /* type-2 packets are deprecated on MEC, use type-3 instead */ r = amdgpu_ring_init(adev, ring, 1024, &adev->gfx.eop_irq, irq_type, hw_prio, NULL); @@ -8451,7 +8451,7 @@ static int gfx_v10_0_set_clockgating_state(void *handle, return 0; } -static void gfx_v10_0_get_clockgating_state(void *handle, u32 *flags) +static void gfx_v10_0_get_clockgating_state(void *handle, u64 *flags) { struct amdgpu_device *adev = (struct amdgpu_device *)handle; int data; diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c index 5f112efda634..25dc729d0ec2 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c @@ -1925,7 +1925,7 @@ static int gfx_v8_0_compute_ring_init(struct amdgpu_device *adev, int ring_id, + ring->pipe; hw_prio = amdgpu_gfx_is_high_priority_compute_queue(adev, ring) ? - AMDGPU_GFX_PIPE_PRIO_HIGH : AMDGPU_RING_PRIO_DEFAULT; + AMDGPU_RING_PRIO_2 : AMDGPU_RING_PRIO_DEFAULT; /* type-2 packets are deprecated on MEC, use type-3 instead */ r = amdgpu_ring_init(adev, ring, 1024, &adev->gfx.eop_irq, irq_type, hw_prio, NULL); @@ -5475,7 +5475,7 @@ static int gfx_v8_0_set_powergating_state(void *handle, return 0; } -static void gfx_v8_0_get_clockgating_state(void *handle, u32 *flags) +static void gfx_v8_0_get_clockgating_state(void *handle, u64 *flags) { struct amdgpu_device *adev = (struct amdgpu_device *)handle; int data; diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c index 46d4bf27ebbb..d58fd83524ac 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c @@ -1205,6 +1205,8 @@ static const struct amdgpu_gfxoff_quirk amdgpu_gfxoff_quirk_list[] = { { 0x1002, 0x15dd, 0x103c, 0x83e7, 0xd3 }, /* GFXOFF is unstable on C6 parts with a VBIOS 113-RAVEN-114 */ { 0x1002, 0x15dd, 0x1002, 0x15dd, 0xc6 }, + /* Apple MacBook Pro (15-inch, 2019) Radeon Pro Vega 20 4 GB */ + { 0x1002, 0x69af, 0x106b, 0x019a, 0xc0 }, { 0, 0, 0, 0, 0 }, }; @@ -2274,7 +2276,7 @@ static int gfx_v9_0_compute_ring_init(struct amdgpu_device *adev, int ring_id, + ((ring->me - 1) * adev->gfx.mec.num_pipe_per_mec) + ring->pipe; hw_prio = amdgpu_gfx_is_high_priority_compute_queue(adev, ring) ? - AMDGPU_GFX_PIPE_PRIO_HIGH : AMDGPU_GFX_PIPE_PRIO_NORMAL; + AMDGPU_RING_PRIO_2 : AMDGPU_RING_PRIO_DEFAULT; /* type-2 packets are deprecated on MEC, use type-3 instead */ return amdgpu_ring_init(adev, ring, 1024, &adev->gfx.eop_irq, irq_type, hw_prio, NULL); @@ -5231,7 +5233,7 @@ static int gfx_v9_0_set_clockgating_state(void *handle, return 0; } -static void gfx_v9_0_get_clockgating_state(void *handle, u32 *flags) +static void gfx_v9_0_get_clockgating_state(void *handle, u64 *flags) { struct amdgpu_device *adev = (struct amdgpu_device *)handle; int data; diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_2.c b/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_2.c index 7653ebd0e67b..3a797424579c 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_2.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_2.c @@ -1930,6 +1930,19 @@ static void gfx_v9_4_2_reset_sq_timeout_status(struct amdgpu_device *adev) mutex_unlock(&adev->grbm_idx_mutex); } +static bool gfx_v9_4_2_query_uctl2_poison_status(struct amdgpu_device *adev) +{ + u32 status = 0; + struct amdgpu_vmhub *hub; + + hub = &adev->vmhub[AMDGPU_GFXHUB_0]; + status = RREG32(hub->vm_l2_pro_fault_status); + /* reset page fault status */ + WREG32_P(hub->vm_l2_pro_fault_cntl, 1, ~1); + + return REG_GET_FIELD(status, VM_L2_PROTECTION_FAULT_STATUS, FED); +} + struct amdgpu_ras_block_hw_ops gfx_v9_4_2_ras_ops = { .ras_error_inject = &gfx_v9_4_2_ras_error_inject, .query_ras_error_count = &gfx_v9_4_2_query_ras_error_count, @@ -1943,4 +1956,5 @@ struct amdgpu_gfx_ras gfx_v9_4_2_ras = { .hw_ops = &gfx_v9_4_2_ras_ops, }, .enable_watchdog_timer = &gfx_v9_4_2_enable_watchdog_timer, + .query_utcl2_poison_status = gfx_v9_4_2_query_uctl2_poison_status, }; diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c index 5228421b0f72..a455e59f41f4 100644 --- a/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c @@ -1161,7 +1161,7 @@ static int gmc_v10_0_set_clockgating_state(void *handle, return athub_v2_0_set_clockgating(adev, state); } -static void gmc_v10_0_get_clockgating_state(void *handle, u32 *flags) +static void gmc_v10_0_get_clockgating_state(void *handle, u64 *flags) { struct amdgpu_device *adev = (struct amdgpu_device *)handle; diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c index 1932a3e4af7e..382dde1ce74c 100644 --- a/drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c @@ -1690,7 +1690,7 @@ static int gmc_v8_0_set_powergating_state(void *handle, return 0; } -static void gmc_v8_0_get_clockgating_state(void *handle, u32 *flags) +static void gmc_v8_0_get_clockgating_state(void *handle, u64 *flags) { struct amdgpu_device *adev = (struct amdgpu_device *)handle; int data; diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c index 6009fbfdcc19..22761a3bb818 100644 --- a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c @@ -1948,7 +1948,7 @@ static int gmc_v9_0_set_clockgating_state(void *handle, return 0; } -static void gmc_v9_0_get_clockgating_state(void *handle, u32 *flags) +static void gmc_v9_0_get_clockgating_state(void *handle, u64 *flags) { struct amdgpu_device *adev = (struct amdgpu_device *)handle; diff --git a/drivers/gpu/drm/amd/amdgpu/hdp_v4_0.c b/drivers/gpu/drm/amd/amdgpu/hdp_v4_0.c index 046216635262..adf89680f53e 100644 --- a/drivers/gpu/drm/amd/amdgpu/hdp_v4_0.c +++ b/drivers/gpu/drm/amd/amdgpu/hdp_v4_0.c @@ -124,7 +124,7 @@ static void hdp_v4_0_update_clock_gating(struct amdgpu_device *adev, } static void hdp_v4_0_get_clockgating_state(struct amdgpu_device *adev, - u32 *flags) + u64 *flags) { int data; diff --git a/drivers/gpu/drm/amd/amdgpu/hdp_v5_0.c b/drivers/gpu/drm/amd/amdgpu/hdp_v5_0.c index 5793977953cc..a9ea23fa0def 100644 --- a/drivers/gpu/drm/amd/amdgpu/hdp_v5_0.c +++ b/drivers/gpu/drm/amd/amdgpu/hdp_v5_0.c @@ -181,7 +181,7 @@ static void hdp_v5_0_update_clock_gating(struct amdgpu_device *adev, } static void hdp_v5_0_get_clockgating_state(struct amdgpu_device *adev, - u32 *flags) + u64 *flags) { uint32_t tmp; diff --git a/drivers/gpu/drm/amd/amdgpu/jpeg_v2_5.c b/drivers/gpu/drm/amd/amdgpu/jpeg_v2_5.c index a29c86617fb5..8c3227d0b8b4 100644 --- a/drivers/gpu/drm/amd/amdgpu/jpeg_v2_5.c +++ b/drivers/gpu/drm/amd/amdgpu/jpeg_v2_5.c @@ -26,6 +26,7 @@ #include "soc15.h" #include "soc15d.h" #include "jpeg_v2_0.h" +#include "jpeg_v2_5.h" #include "vcn/vcn_2_5_offset.h" #include "vcn/vcn_2_5_sh_mask.h" @@ -39,6 +40,7 @@ static void jpeg_v2_5_set_dec_ring_funcs(struct amdgpu_device *adev); static void jpeg_v2_5_set_irq_funcs(struct amdgpu_device *adev); static int jpeg_v2_5_set_powergating_state(void *handle, enum amd_powergating_state state); +static void jpeg_v2_5_set_ras_funcs(struct amdgpu_device *adev); static int amdgpu_ih_clientid_jpeg[] = { SOC15_IH_CLIENTID_VCN, @@ -70,6 +72,7 @@ static int jpeg_v2_5_early_init(void *handle) jpeg_v2_5_set_dec_ring_funcs(adev); jpeg_v2_5_set_irq_funcs(adev); + jpeg_v2_5_set_ras_funcs(adev); return 0; } @@ -730,3 +733,74 @@ const struct amdgpu_ip_block_version jpeg_v2_6_ip_block = .rev = 0, .funcs = &jpeg_v2_6_ip_funcs, }; + +static uint32_t jpeg_v2_6_query_poison_by_instance(struct amdgpu_device *adev, + uint32_t instance, uint32_t sub_block) +{ + uint32_t poison_stat = 0, reg_value = 0; + + switch (sub_block) { + case AMDGPU_JPEG_V2_6_JPEG0: + reg_value = RREG32_SOC15(JPEG, instance, mmUVD_RAS_JPEG0_STATUS); + poison_stat = REG_GET_FIELD(reg_value, UVD_RAS_JPEG0_STATUS, POISONED_PF); + break; + case AMDGPU_JPEG_V2_6_JPEG1: + reg_value = RREG32_SOC15(JPEG, instance, mmUVD_RAS_JPEG1_STATUS); + poison_stat = REG_GET_FIELD(reg_value, UVD_RAS_JPEG1_STATUS, POISONED_PF); + break; + default: + break; + } + + if (poison_stat) + dev_info(adev->dev, "Poison detected in JPEG%d sub_block%d\n", + instance, sub_block); + + return poison_stat; +} + +static bool jpeg_v2_6_query_ras_poison_status(struct amdgpu_device *adev) +{ + uint32_t inst = 0, sub = 0, poison_stat = 0; + + for (inst = 0; inst < adev->jpeg.num_jpeg_inst; inst++) + for (sub = 0; sub < AMDGPU_JPEG_V2_6_MAX_SUB_BLOCK; sub++) + poison_stat += + jpeg_v2_6_query_poison_by_instance(adev, inst, sub); + + return !!poison_stat; +} + +const struct amdgpu_ras_block_hw_ops jpeg_v2_6_ras_hw_ops = { + .query_poison_status = jpeg_v2_6_query_ras_poison_status, +}; + +static struct amdgpu_jpeg_ras jpeg_v2_6_ras = { + .ras_block = { + .hw_ops = &jpeg_v2_6_ras_hw_ops, + }, +}; + +static void jpeg_v2_5_set_ras_funcs(struct amdgpu_device *adev) +{ + switch (adev->ip_versions[JPEG_HWIP][0]) { + case IP_VERSION(2, 6, 0): + adev->jpeg.ras = &jpeg_v2_6_ras; + break; + default: + break; + } + + if (adev->jpeg.ras) { + amdgpu_ras_register_ras_block(adev, &adev->jpeg.ras->ras_block); + + strcpy(adev->jpeg.ras->ras_block.ras_comm.name, "jpeg"); + adev->jpeg.ras->ras_block.ras_comm.block = AMDGPU_RAS_BLOCK__JPEG; + adev->jpeg.ras->ras_block.ras_comm.type = AMDGPU_RAS_ERROR__POISON; + adev->jpeg.ras_if = &adev->jpeg.ras->ras_block.ras_comm; + + /* If don't define special ras_late_init function, use default ras_late_init */ + if (!adev->jpeg.ras->ras_block.ras_late_init) + adev->jpeg.ras->ras_block.ras_late_init = amdgpu_ras_block_late_init; + } +} diff --git a/drivers/gpu/drm/amd/amdgpu/jpeg_v2_5.h b/drivers/gpu/drm/amd/amdgpu/jpeg_v2_5.h index 3b0aa29b9879..1e858c6cdf13 100644 --- a/drivers/gpu/drm/amd/amdgpu/jpeg_v2_5.h +++ b/drivers/gpu/drm/amd/amdgpu/jpeg_v2_5.h @@ -24,6 +24,13 @@ #ifndef __JPEG_V2_5_H__ #define __JPEG_V2_5_H__ +enum amdgpu_jpeg_v2_6_sub_block { + AMDGPU_JPEG_V2_6_JPEG0 = 0, + AMDGPU_JPEG_V2_6_JPEG1, + + AMDGPU_JPEG_V2_6_MAX_SUB_BLOCK, +}; + extern const struct amdgpu_ip_block_version jpeg_v2_5_ip_block; extern const struct amdgpu_ip_block_version jpeg_v2_6_ip_block; diff --git a/drivers/gpu/drm/amd/amdgpu/mmhub_v1_0.c b/drivers/gpu/drm/amd/amdgpu/mmhub_v1_0.c index 4c9f0c0f3116..3f44a099c52a 100644 --- a/drivers/gpu/drm/amd/amdgpu/mmhub_v1_0.c +++ b/drivers/gpu/drm/amd/amdgpu/mmhub_v1_0.c @@ -546,7 +546,7 @@ static int mmhub_v1_0_set_clockgating(struct amdgpu_device *adev, return 0; } -static void mmhub_v1_0_get_clockgating(struct amdgpu_device *adev, u32 *flags) +static void mmhub_v1_0_get_clockgating(struct amdgpu_device *adev, u64 *flags) { int data, data1; diff --git a/drivers/gpu/drm/amd/amdgpu/mmhub_v1_7.c b/drivers/gpu/drm/amd/amdgpu/mmhub_v1_7.c index 3b901f941627..6fa7090bc6cb 100644 --- a/drivers/gpu/drm/amd/amdgpu/mmhub_v1_7.c +++ b/drivers/gpu/drm/amd/amdgpu/mmhub_v1_7.c @@ -542,7 +542,7 @@ static int mmhub_v1_7_set_clockgating(struct amdgpu_device *adev, return 0; } -static void mmhub_v1_7_get_clockgating(struct amdgpu_device *adev, u32 *flags) +static void mmhub_v1_7_get_clockgating(struct amdgpu_device *adev, u64 *flags) { int data, data1; diff --git a/drivers/gpu/drm/amd/amdgpu/mmhub_v2_0.c b/drivers/gpu/drm/amd/amdgpu/mmhub_v2_0.c index 3718ff610ab2..636abd855686 100644 --- a/drivers/gpu/drm/amd/amdgpu/mmhub_v2_0.c +++ b/drivers/gpu/drm/amd/amdgpu/mmhub_v2_0.c @@ -682,7 +682,7 @@ static int mmhub_v2_0_set_clockgating(struct amdgpu_device *adev, return 0; } -static void mmhub_v2_0_get_clockgating(struct amdgpu_device *adev, u32 *flags) +static void mmhub_v2_0_get_clockgating(struct amdgpu_device *adev, u64 *flags) { int data, data1; diff --git a/drivers/gpu/drm/amd/amdgpu/mmhub_v2_3.c b/drivers/gpu/drm/amd/amdgpu/mmhub_v2_3.c index 1957fb098c4d..ff44c5364a8c 100644 --- a/drivers/gpu/drm/amd/amdgpu/mmhub_v2_3.c +++ b/drivers/gpu/drm/amd/amdgpu/mmhub_v2_3.c @@ -577,7 +577,7 @@ static int mmhub_v2_3_set_clockgating(struct amdgpu_device *adev, return 0; } -static void mmhub_v2_3_get_clockgating(struct amdgpu_device *adev, u32 *flags) +static void mmhub_v2_3_get_clockgating(struct amdgpu_device *adev, u64 *flags) { int data, data1, data2, data3; diff --git a/drivers/gpu/drm/amd/amdgpu/mmhub_v9_4.c b/drivers/gpu/drm/amd/amdgpu/mmhub_v9_4.c index 619106f7d23d..6e0145b2b408 100644 --- a/drivers/gpu/drm/amd/amdgpu/mmhub_v9_4.c +++ b/drivers/gpu/drm/amd/amdgpu/mmhub_v9_4.c @@ -647,7 +647,7 @@ static int mmhub_v9_4_set_clockgating(struct amdgpu_device *adev, return 0; } -static void mmhub_v9_4_get_clockgating(struct amdgpu_device *adev, u32 *flags) +static void mmhub_v9_4_get_clockgating(struct amdgpu_device *adev, u64 *flags) { int data, data1; diff --git a/drivers/gpu/drm/amd/amdgpu/navi10_ih.c b/drivers/gpu/drm/amd/amdgpu/navi10_ih.c index 8ce5b8ca1fd7..97201ab0965e 100644 --- a/drivers/gpu/drm/amd/amdgpu/navi10_ih.c +++ b/drivers/gpu/drm/amd/amdgpu/navi10_ih.c @@ -685,7 +685,7 @@ static int navi10_ih_set_powergating_state(void *handle, return 0; } -static void navi10_ih_get_clockgating_state(void *handle, u32 *flags) +static void navi10_ih_get_clockgating_state(void *handle, u64 *flags) { struct amdgpu_device *adev = (struct amdgpu_device *)handle; diff --git a/drivers/gpu/drm/amd/amdgpu/nbio_v2_3.c b/drivers/gpu/drm/amd/amdgpu/nbio_v2_3.c index ee7cab37dfd5..6cd1fb2eb913 100644 --- a/drivers/gpu/drm/amd/amdgpu/nbio_v2_3.c +++ b/drivers/gpu/drm/amd/amdgpu/nbio_v2_3.c @@ -278,7 +278,7 @@ static void nbio_v2_3_update_medium_grain_light_sleep(struct amdgpu_device *adev } static void nbio_v2_3_get_clockgating_state(struct amdgpu_device *adev, - u32 *flags) + u64 *flags) { int data; diff --git a/drivers/gpu/drm/amd/amdgpu/nbio_v6_1.c b/drivers/gpu/drm/amd/amdgpu/nbio_v6_1.c index 4bbacf1be25a..f7f6ddebd3e4 100644 --- a/drivers/gpu/drm/amd/amdgpu/nbio_v6_1.c +++ b/drivers/gpu/drm/amd/amdgpu/nbio_v6_1.c @@ -210,7 +210,7 @@ static void nbio_v6_1_update_medium_grain_light_sleep(struct amdgpu_device *adev } static void nbio_v6_1_get_clockgating_state(struct amdgpu_device *adev, - u32 *flags) + u64 *flags) { int data; diff --git a/drivers/gpu/drm/amd/amdgpu/nbio_v7_0.c b/drivers/gpu/drm/amd/amdgpu/nbio_v7_0.c index 37a4039fdfc5..aa0326d00c72 100644 --- a/drivers/gpu/drm/amd/amdgpu/nbio_v7_0.c +++ b/drivers/gpu/drm/amd/amdgpu/nbio_v7_0.c @@ -205,7 +205,7 @@ static void nbio_v7_0_update_medium_grain_light_sleep(struct amdgpu_device *adev } static void nbio_v7_0_get_clockgating_state(struct amdgpu_device *adev, - u32 *flags) + u64 *flags) { int data; diff --git a/drivers/gpu/drm/amd/amdgpu/nbio_v7_2.c b/drivers/gpu/drm/amd/amdgpu/nbio_v7_2.c index 6f81de6f3cc4..31776b12e4c4 100644 --- a/drivers/gpu/drm/amd/amdgpu/nbio_v7_2.c +++ b/drivers/gpu/drm/amd/amdgpu/nbio_v7_2.c @@ -306,7 +306,7 @@ static void nbio_v7_2_update_medium_grain_light_sleep(struct amdgpu_device *adev } static void nbio_v7_2_get_clockgating_state(struct amdgpu_device *adev, - u32 *flags) + u64 *flags) { int data; diff --git a/drivers/gpu/drm/amd/amdgpu/nbio_v7_4.c b/drivers/gpu/drm/amd/amdgpu/nbio_v7_4.c index c2357e83a8c4..4531761dcf77 100644 --- a/drivers/gpu/drm/amd/amdgpu/nbio_v7_4.c +++ b/drivers/gpu/drm/amd/amdgpu/nbio_v7_4.c @@ -273,7 +273,7 @@ static void nbio_v7_4_update_medium_grain_light_sleep(struct amdgpu_device *adev } static void nbio_v7_4_get_clockgating_state(struct amdgpu_device *adev, - u32 *flags) + u64 *flags) { int data; diff --git a/drivers/gpu/drm/amd/amdgpu/nv.c b/drivers/gpu/drm/amd/amdgpu/nv.c index e19f14c3ef59..0a7946c59a42 100644 --- a/drivers/gpu/drm/amd/amdgpu/nv.c +++ b/drivers/gpu/drm/amd/amdgpu/nv.c @@ -1115,7 +1115,7 @@ static int nv_common_set_powergating_state(void *handle, return 0; } -static void nv_common_get_clockgating_state(void *handle, u32 *flags) +static void nv_common_get_clockgating_state(void *handle, u64 *flags) { struct amdgpu_device *adev = (struct amdgpu_device *)handle; diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v3_0.c b/drivers/gpu/drm/amd/amdgpu/sdma_v3_0.c index 4ef4feff5649..3695374896ed 100644 --- a/drivers/gpu/drm/amd/amdgpu/sdma_v3_0.c +++ b/drivers/gpu/drm/amd/amdgpu/sdma_v3_0.c @@ -1535,7 +1535,7 @@ static int sdma_v3_0_set_powergating_state(void *handle, return 0; } -static void sdma_v3_0_get_clockgating_state(void *handle, u32 *flags) +static void sdma_v3_0_get_clockgating_state(void *handle, u64 *flags) { struct amdgpu_device *adev = (struct amdgpu_device *)handle; int data; diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c b/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c index d7e8f7232364..8589ab1c9800 100644 --- a/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c +++ b/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c @@ -2372,7 +2372,7 @@ static int sdma_v4_0_set_powergating_state(void *handle, return 0; } -static void sdma_v4_0_get_clockgating_state(void *handle, u32 *flags) +static void sdma_v4_0_get_clockgating_state(void *handle, u64 *flags) { struct amdgpu_device *adev = (struct amdgpu_device *)handle; int data; diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v5_0.c b/drivers/gpu/drm/amd/amdgpu/sdma_v5_0.c index a8d49c005f73..775aabde1ae2 100644 --- a/drivers/gpu/drm/amd/amdgpu/sdma_v5_0.c +++ b/drivers/gpu/drm/amd/amdgpu/sdma_v5_0.c @@ -1648,7 +1648,7 @@ static int sdma_v5_0_set_powergating_state(void *handle, return 0; } -static void sdma_v5_0_get_clockgating_state(void *handle, u32 *flags) +static void sdma_v5_0_get_clockgating_state(void *handle, u64 *flags) { struct amdgpu_device *adev = (struct amdgpu_device *)handle; int data; diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v5_2.c b/drivers/gpu/drm/amd/amdgpu/sdma_v5_2.c index 824eace69884..ca50857b982d 100644 --- a/drivers/gpu/drm/amd/amdgpu/sdma_v5_2.c +++ b/drivers/gpu/drm/amd/amdgpu/sdma_v5_2.c @@ -1645,7 +1645,7 @@ static int sdma_v5_2_set_powergating_state(void *handle, return 0; } -static void sdma_v5_2_get_clockgating_state(void *handle, u32 *flags) +static void sdma_v5_2_get_clockgating_state(void *handle, u64 *flags) { struct amdgpu_device *adev = (struct amdgpu_device *)handle; int data; diff --git a/drivers/gpu/drm/amd/amdgpu/smuio_v11_0.c b/drivers/gpu/drm/amd/amdgpu/smuio_v11_0.c index b6f1322f908c..acdc40f99ab3 100644 --- a/drivers/gpu/drm/amd/amdgpu/smuio_v11_0.c +++ b/drivers/gpu/drm/amd/amdgpu/smuio_v11_0.c @@ -59,7 +59,7 @@ static void smuio_v11_0_update_rom_clock_gating(struct amdgpu_device *adev, bool WREG32_SOC15(SMUIO, 0, mmCGTT_ROM_CLK_CTRL0, data); } -static void smuio_v11_0_get_clock_gating_state(struct amdgpu_device *adev, u32 *flags) +static void smuio_v11_0_get_clock_gating_state(struct amdgpu_device *adev, u64 *flags) { u32 data; diff --git a/drivers/gpu/drm/amd/amdgpu/smuio_v11_0_6.c b/drivers/gpu/drm/amd/amdgpu/smuio_v11_0_6.c index 3a18dbb55c32..2afeb8b37f62 100644 --- a/drivers/gpu/drm/amd/amdgpu/smuio_v11_0_6.c +++ b/drivers/gpu/drm/amd/amdgpu/smuio_v11_0_6.c @@ -56,7 +56,7 @@ static void smuio_v11_0_6_update_rom_clock_gating(struct amdgpu_device *adev, bo WREG32_SOC15(SMUIO, 0, mmCGTT_ROM_CLK_CTRL0, data); } -static void smuio_v11_0_6_get_clock_gating_state(struct amdgpu_device *adev, u32 *flags) +static void smuio_v11_0_6_get_clock_gating_state(struct amdgpu_device *adev, u64 *flags) { u32 data; diff --git a/drivers/gpu/drm/amd/amdgpu/smuio_v13_0.c b/drivers/gpu/drm/amd/amdgpu/smuio_v13_0.c index 39b7c206770f..13e905c22592 100644 --- a/drivers/gpu/drm/amd/amdgpu/smuio_v13_0.c +++ b/drivers/gpu/drm/amd/amdgpu/smuio_v13_0.c @@ -58,7 +58,7 @@ static void smuio_v13_0_update_rom_clock_gating(struct amdgpu_device *adev, bool WREG32_SOC15(SMUIO, 0, regCGTT_ROM_CLK_CTRL0, data); } -static void smuio_v13_0_get_clock_gating_state(struct amdgpu_device *adev, u32 *flags) +static void smuio_v13_0_get_clock_gating_state(struct amdgpu_device *adev, u64 *flags) { u32 data; diff --git a/drivers/gpu/drm/amd/amdgpu/smuio_v9_0.c b/drivers/gpu/drm/amd/amdgpu/smuio_v9_0.c index 8417890af227..e4e30b9d481b 100644 --- a/drivers/gpu/drm/amd/amdgpu/smuio_v9_0.c +++ b/drivers/gpu/drm/amd/amdgpu/smuio_v9_0.c @@ -56,7 +56,7 @@ static void smuio_v9_0_update_rom_clock_gating(struct amdgpu_device *adev, bool WREG32_SOC15(SMUIO, 0, mmCGTT_ROM_CLK_CTRL0, data); } -static void smuio_v9_0_get_clock_gating_state(struct amdgpu_device *adev, u32 *flags) +static void smuio_v9_0_get_clock_gating_state(struct amdgpu_device *adev, u64 *flags) { u32 data; diff --git a/drivers/gpu/drm/amd/amdgpu/soc15.c b/drivers/gpu/drm/amd/amdgpu/soc15.c index 3d0251ef8d79..3ee7322081d2 100644 --- a/drivers/gpu/drm/amd/amdgpu/soc15.c +++ b/drivers/gpu/drm/amd/amdgpu/soc15.c @@ -1419,7 +1419,7 @@ static int soc15_common_set_clockgating_state(void *handle, return 0; } -static void soc15_common_get_clockgating_state(void *handle, u32 *flags) +static void soc15_common_get_clockgating_state(void *handle, u64 *flags) { struct amdgpu_device *adev = (struct amdgpu_device *)handle; int data; diff --git a/drivers/gpu/drm/amd/amdgpu/umc_v6_7.c b/drivers/gpu/drm/amd/amdgpu/umc_v6_7.c index c45d9c14ecbc..606892dbea1c 100644 --- a/drivers/gpu/drm/amd/amdgpu/umc_v6_7.c +++ b/drivers/gpu/drm/amd/amdgpu/umc_v6_7.c @@ -64,21 +64,62 @@ static inline uint32_t get_umc_v6_7_channel_index(struct amdgpu_device *adev, return adev->umc.channel_idx_tbl[umc_inst * adev->umc.channel_inst_num + ch_inst]; } +static void umc_v6_7_query_error_status_helper(struct amdgpu_device *adev, + uint64_t mc_umc_status, uint32_t umc_reg_offset) +{ + uint32_t mc_umc_addr; + uint64_t reg_value; + + if (REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, Deferred) == 1) + dev_info(adev->dev, "Deferred error, no user action is needed.\n"); + + if (mc_umc_status) + dev_info(adev->dev, "MCA STATUS 0x%llx, umc_reg_offset 0x%x\n", mc_umc_status, umc_reg_offset); + + /* print IPID registers value */ + mc_umc_addr = + SOC15_REG_OFFSET(UMC, 0, regMCA_UMC_UMC0_MCUMC_IPIDT0); + reg_value = RREG64_PCIE((mc_umc_addr + umc_reg_offset) * 4); + if (reg_value) + dev_info(adev->dev, "MCA IPID 0x%llx, umc_reg_offset 0x%x\n", reg_value, umc_reg_offset); + + /* print SYND registers value */ + mc_umc_addr = + SOC15_REG_OFFSET(UMC, 0, regMCA_UMC_UMC0_MCUMC_SYNDT0); + reg_value = RREG64_PCIE((mc_umc_addr + umc_reg_offset) * 4); + if (reg_value) + dev_info(adev->dev, "MCA SYND 0x%llx, umc_reg_offset 0x%x\n", reg_value, umc_reg_offset); + + /* print MISC0 registers value */ + mc_umc_addr = + SOC15_REG_OFFSET(UMC, 0, regMCA_UMC_UMC0_MCUMC_MISC0T0); + reg_value = RREG64_PCIE((mc_umc_addr + umc_reg_offset) * 4); + if (reg_value) + dev_info(adev->dev, "MCA MISC0 0x%llx, umc_reg_offset 0x%x\n", reg_value, umc_reg_offset); +} + static void umc_v6_7_ecc_info_query_correctable_error_count(struct amdgpu_device *adev, uint32_t umc_inst, uint32_t ch_inst, unsigned long *error_count) { uint64_t mc_umc_status; uint32_t eccinfo_table_idx; + uint32_t umc_reg_offset; struct amdgpu_ras *ras = amdgpu_ras_get_context(adev); + umc_reg_offset = get_umc_v6_7_reg_offset(adev, + umc_inst, ch_inst); + eccinfo_table_idx = umc_inst * adev->umc.channel_inst_num + ch_inst; /* check for SRAM correctable error MCUMC_STATUS is a 64 bit register */ mc_umc_status = ras->umc_ecc.ecc[eccinfo_table_idx].mca_umc_status; if (REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, Val) == 1 && - REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, CECC) == 1) + REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, CECC) == 1) { *error_count += 1; + + umc_v6_7_query_error_status_helper(adev, mc_umc_status, umc_reg_offset); + } } static void umc_v6_7_ecc_info_querry_uncorrectable_error_count(struct amdgpu_device *adev, @@ -88,8 +129,6 @@ static void umc_v6_7_ecc_info_querry_uncorrectable_error_count(struct amdgpu_dev uint64_t mc_umc_status; uint32_t eccinfo_table_idx; uint32_t umc_reg_offset; - uint32_t mc_umc_addr; - uint64_t reg_value; struct amdgpu_ras *ras = amdgpu_ras_get_context(adev); umc_reg_offset = get_umc_v6_7_reg_offset(adev, @@ -106,32 +145,7 @@ static void umc_v6_7_ecc_info_querry_uncorrectable_error_count(struct amdgpu_dev REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, TCC) == 1)) { *error_count += 1; - if (REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, Deferred) == 1) - dev_info(adev->dev, "Deferred error, no user action is needed.\n"); - - if (mc_umc_status) - dev_info(adev->dev, "MCA STATUS 0x%llx, umc_reg_offset 0x%x\n", mc_umc_status, umc_reg_offset); - - /* print IPID registers value */ - mc_umc_addr = - SOC15_REG_OFFSET(UMC, 0, regMCA_UMC_UMC0_MCUMC_IPIDT0); - reg_value = RREG64_PCIE((mc_umc_addr + umc_reg_offset) * 4); - if (reg_value) - dev_info(adev->dev, "MCA IPID 0x%llx, umc_reg_offset 0x%x\n", reg_value, umc_reg_offset); - - /* print SYND registers value */ - mc_umc_addr = - SOC15_REG_OFFSET(UMC, 0, regMCA_UMC_UMC0_MCUMC_SYNDT0); - reg_value = RREG64_PCIE((mc_umc_addr + umc_reg_offset) * 4); - if (reg_value) - dev_info(adev->dev, "MCA SYND 0x%llx, umc_reg_offset 0x%x\n", reg_value, umc_reg_offset); - - /* print MISC0 registers value */ - mc_umc_addr = - SOC15_REG_OFFSET(UMC, 0, regMCA_UMC_UMC0_MCUMC_MISC0T0); - reg_value = RREG64_PCIE((mc_umc_addr + umc_reg_offset) * 4); - if (reg_value) - dev_info(adev->dev, "MCA MISC0 0x%llx, umc_reg_offset 0x%x\n", reg_value, umc_reg_offset); + umc_v6_7_query_error_status_helper(adev, mc_umc_status, umc_reg_offset); } } @@ -277,8 +291,11 @@ static void umc_v6_7_query_correctable_error_count(struct amdgpu_device *adev, MCUMC_STATUS is a 64 bit register */ mc_umc_status = RREG64_PCIE((mc_umc_status_addr + umc_reg_offset) * 4); if (REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, Val) == 1 && - REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, CECC) == 1) + REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, CECC) == 1) { *error_count += 1; + + umc_v6_7_query_error_status_helper(adev, mc_umc_status, umc_reg_offset); + } } static void umc_v6_7_querry_uncorrectable_error_count(struct amdgpu_device *adev, @@ -287,8 +304,6 @@ static void umc_v6_7_querry_uncorrectable_error_count(struct amdgpu_device *adev { uint64_t mc_umc_status; uint32_t mc_umc_status_addr; - uint32_t mc_umc_addr; - uint64_t reg_value; mc_umc_status_addr = SOC15_REG_OFFSET(UMC, 0, regMCA_UMC_UMC0_MCUMC_STATUST0); @@ -303,32 +318,7 @@ static void umc_v6_7_querry_uncorrectable_error_count(struct amdgpu_device *adev REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, TCC) == 1)) { *error_count += 1; - if (REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, Deferred) == 1) - dev_info(adev->dev, "Deferred error, no user action is needed.\n"); - - if (mc_umc_status) - dev_info(adev->dev, "MCA STATUS 0x%llx, umc_reg_offset 0x%x\n", mc_umc_status, umc_reg_offset); - - /* print IPID registers value */ - mc_umc_addr = - SOC15_REG_OFFSET(UMC, 0, regMCA_UMC_UMC0_MCUMC_IPIDT0); - reg_value = RREG64_PCIE((mc_umc_addr + umc_reg_offset) * 4); - if (reg_value) - dev_info(adev->dev, "MCA IPID 0x%llx, umc_reg_offset 0x%x\n", reg_value, umc_reg_offset); - - /* print SYND registers value */ - mc_umc_addr = - SOC15_REG_OFFSET(UMC, 0, regMCA_UMC_UMC0_MCUMC_SYNDT0); - reg_value = RREG64_PCIE((mc_umc_addr + umc_reg_offset) * 4); - if (reg_value) - dev_info(adev->dev, "MCA SYND 0x%llx, umc_reg_offset 0x%x\n", reg_value, umc_reg_offset); - - /* print MISC0 registers value */ - mc_umc_addr = - SOC15_REG_OFFSET(UMC, 0, regMCA_UMC_UMC0_MCUMC_MISC0T0); - reg_value = RREG64_PCIE((mc_umc_addr + umc_reg_offset) * 4); - if (reg_value) - dev_info(adev->dev, "MCA MISC0 0x%llx, umc_reg_offset 0x%x\n", reg_value, umc_reg_offset); + umc_v6_7_query_error_status_helper(adev, mc_umc_status, umc_reg_offset); } } diff --git a/drivers/gpu/drm/amd/amdgpu/uvd_v5_0.c b/drivers/gpu/drm/amd/amdgpu/uvd_v5_0.c index 563493d1f830..d7e31e48a2b8 100644 --- a/drivers/gpu/drm/amd/amdgpu/uvd_v5_0.c +++ b/drivers/gpu/drm/amd/amdgpu/uvd_v5_0.c @@ -833,7 +833,7 @@ out: return ret; } -static void uvd_v5_0_get_clockgating_state(void *handle, u32 *flags) +static void uvd_v5_0_get_clockgating_state(void *handle, u64 *flags) { struct amdgpu_device *adev = (struct amdgpu_device *)handle; int data; diff --git a/drivers/gpu/drm/amd/amdgpu/uvd_v6_0.c b/drivers/gpu/drm/amd/amdgpu/uvd_v6_0.c index 2d558c2f417d..375c440957dc 100644 --- a/drivers/gpu/drm/amd/amdgpu/uvd_v6_0.c +++ b/drivers/gpu/drm/amd/amdgpu/uvd_v6_0.c @@ -1494,7 +1494,7 @@ out: return ret; } -static void uvd_v6_0_get_clockgating_state(void *handle, u32 *flags) +static void uvd_v6_0_get_clockgating_state(void *handle, u64 *flags) { struct amdgpu_device *adev = (struct amdgpu_device *)handle; int data; diff --git a/drivers/gpu/drm/amd/amdgpu/vce_v3_0.c b/drivers/gpu/drm/amd/amdgpu/vce_v3_0.c index 142e291983b4..8def62c83ffd 100644 --- a/drivers/gpu/drm/amd/amdgpu/vce_v3_0.c +++ b/drivers/gpu/drm/amd/amdgpu/vce_v3_0.c @@ -831,7 +831,7 @@ out: return ret; } -static void vce_v3_0_get_clockgating_state(void *handle, u32 *flags) +static void vce_v3_0_get_clockgating_state(void *handle, u64 *flags) { struct amdgpu_device *adev = (struct amdgpu_device *)handle; int data; diff --git a/drivers/gpu/drm/amd/amdgpu/vcn_v2_5.c b/drivers/gpu/drm/amd/amdgpu/vcn_v2_5.c index 1bf672966a62..17d44be58877 100644 --- a/drivers/gpu/drm/amd/amdgpu/vcn_v2_5.c +++ b/drivers/gpu/drm/amd/amdgpu/vcn_v2_5.c @@ -31,6 +31,7 @@ #include "soc15d.h" #include "vcn_v2_0.h" #include "mmsch_v1_0.h" +#include "vcn_v2_5.h" #include "vcn/vcn_2_5_offset.h" #include "vcn/vcn_2_5_sh_mask.h" @@ -59,6 +60,7 @@ static int vcn_v2_5_set_powergating_state(void *handle, static int vcn_v2_5_pause_dpg_mode(struct amdgpu_device *adev, int inst_idx, struct dpg_pause_state *new_state); static int vcn_v2_5_sriov_start(struct amdgpu_device *adev); +static void vcn_v2_5_set_ras_funcs(struct amdgpu_device *adev); static int amdgpu_ih_clientid_vcns[] = { SOC15_IH_CLIENTID_VCN, @@ -100,6 +102,7 @@ static int vcn_v2_5_early_init(void *handle) vcn_v2_5_set_dec_ring_funcs(adev); vcn_v2_5_set_enc_ring_funcs(adev); vcn_v2_5_set_irq_funcs(adev); + vcn_v2_5_set_ras_funcs(adev); return 0; } @@ -1932,3 +1935,71 @@ const struct amdgpu_ip_block_version vcn_v2_6_ip_block = .rev = 0, .funcs = &vcn_v2_6_ip_funcs, }; + +static uint32_t vcn_v2_6_query_poison_by_instance(struct amdgpu_device *adev, + uint32_t instance, uint32_t sub_block) +{ + uint32_t poison_stat = 0, reg_value = 0; + + switch (sub_block) { + case AMDGPU_VCN_V2_6_VCPU_VCODEC: + reg_value = RREG32_SOC15(VCN, instance, mmUVD_RAS_VCPU_VCODEC_STATUS); + poison_stat = REG_GET_FIELD(reg_value, UVD_RAS_VCPU_VCODEC_STATUS, POISONED_PF); + break; + default: + break; + } + + if (poison_stat) + dev_info(adev->dev, "Poison detected in VCN%d, sub_block%d\n", + instance, sub_block); + + return poison_stat; +} + +static bool vcn_v2_6_query_poison_status(struct amdgpu_device *adev) +{ + uint32_t inst, sub; + uint32_t poison_stat = 0; + + for (inst = 0; inst < adev->vcn.num_vcn_inst; inst++) + for (sub = 0; sub < AMDGPU_VCN_V2_6_MAX_SUB_BLOCK; sub++) + poison_stat += + vcn_v2_6_query_poison_by_instance(adev, inst, sub); + + return !!poison_stat; +} + +const struct amdgpu_ras_block_hw_ops vcn_v2_6_ras_hw_ops = { + .query_poison_status = vcn_v2_6_query_poison_status, +}; + +static struct amdgpu_vcn_ras vcn_v2_6_ras = { + .ras_block = { + .hw_ops = &vcn_v2_6_ras_hw_ops, + }, +}; + +static void vcn_v2_5_set_ras_funcs(struct amdgpu_device *adev) +{ + switch (adev->ip_versions[VCN_HWIP][0]) { + case IP_VERSION(2, 6, 0): + adev->vcn.ras = &vcn_v2_6_ras; + break; + default: + break; + } + + if (adev->vcn.ras) { + amdgpu_ras_register_ras_block(adev, &adev->vcn.ras->ras_block); + + strcpy(adev->vcn.ras->ras_block.ras_comm.name, "vcn"); + adev->vcn.ras->ras_block.ras_comm.block = AMDGPU_RAS_BLOCK__VCN; + adev->vcn.ras->ras_block.ras_comm.type = AMDGPU_RAS_ERROR__POISON; + adev->vcn.ras_if = &adev->vcn.ras->ras_block.ras_comm; + + /* If don't define special ras_late_init function, use default ras_late_init */ + if (!adev->vcn.ras->ras_block.ras_late_init) + adev->vcn.ras->ras_block.ras_late_init = amdgpu_ras_block_late_init; + } +} diff --git a/drivers/gpu/drm/amd/amdgpu/vcn_v2_5.h b/drivers/gpu/drm/amd/amdgpu/vcn_v2_5.h index e72f799ed0fd..1c19af74e4fd 100644 --- a/drivers/gpu/drm/amd/amdgpu/vcn_v2_5.h +++ b/drivers/gpu/drm/amd/amdgpu/vcn_v2_5.h @@ -24,6 +24,12 @@ #ifndef __VCN_V2_5_H__ #define __VCN_V2_5_H__ +enum amdgpu_vcn_v2_6_sub_block { + AMDGPU_VCN_V2_6_VCPU_VCODEC = 0, + + AMDGPU_VCN_V2_6_MAX_SUB_BLOCK, +}; + extern const struct amdgpu_ip_block_version vcn_v2_5_ip_block; extern const struct amdgpu_ip_block_version vcn_v2_6_ip_block; diff --git a/drivers/gpu/drm/amd/amdgpu/vi.c b/drivers/gpu/drm/amd/amdgpu/vi.c index 039b90cdc3bc..c5b88d15a6df 100644 --- a/drivers/gpu/drm/amd/amdgpu/vi.c +++ b/drivers/gpu/drm/amd/amdgpu/vi.c @@ -2033,7 +2033,7 @@ static int vi_common_set_powergating_state(void *handle, return 0; } -static void vi_common_get_clockgating_state(void *handle, u32 *flags) +static void vi_common_get_clockgating_state(void *handle, u64 *flags) { struct amdgpu_device *adev = (struct amdgpu_device *)handle; int data; |