aboutsummaryrefslogtreecommitdiff
path: root/drivers/gpu/drm/amd/amdgpu
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/gpu/drm/amd/amdgpu')
-rw-r--r--drivers/gpu/drm/amd/amdgpu/Makefile6
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu.h11
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c8
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h6
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c84
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_atomfirmware.c371
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_atomfirmware.h1
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_bios.c13
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.h1
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_connectors.c2
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c99
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c2
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.h1
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c12
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_device.c13
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_df.h2
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.c304
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.h4
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_display.c55
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_dma_buf.c14
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c131
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c5
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h12
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c13
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.h5
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_gtt_mgr.c49
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_hdp.h2
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_ids.c66
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_ids.h2
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_irq.c15
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_jpeg.h8
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c14
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_mmhub.h2
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_mn.c4
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_mode.h17
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_nbio.h3
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_object.c31
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c55
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_psp.h26
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_psp_ta.c304
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_psp_ta.h29
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c134
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h5
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_res_cursor.h97
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_smuio.h2
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_sync.c26
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_sync.h2
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c90
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h10
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.c33
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.h2
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c3
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.c2
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.h15
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c28
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_vkms.c32
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c1203
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h52
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_vm_cpu.c11
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_vm_pt.c981
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_vm_sdma.c13
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c359
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.h89
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgv_sriovmsg.h99
-rw-r--r--drivers/gpu/drm/amd/amdgpu/athub_v1_0.c2
-rw-r--r--drivers/gpu/drm/amd/amdgpu/athub_v1_0.h2
-rw-r--r--drivers/gpu/drm/amd/amdgpu/athub_v2_0.c2
-rw-r--r--drivers/gpu/drm/amd/amdgpu/athub_v2_0.h2
-rw-r--r--drivers/gpu/drm/amd/amdgpu/athub_v2_1.c2
-rw-r--r--drivers/gpu/drm/amd/amdgpu/athub_v2_1.h2
-rw-r--r--drivers/gpu/drm/amd/amdgpu/atombios_dp.c3
-rw-r--r--drivers/gpu/drm/amd/amdgpu/atombios_encoders.c21
-rw-r--r--drivers/gpu/drm/amd/amdgpu/cik_sdma.c4
-rw-r--r--drivers/gpu/drm/amd/amdgpu/df_v1_7.c2
-rw-r--r--drivers/gpu/drm/amd/amdgpu/df_v3_6.c2
-rw-r--r--drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c7
-rw-r--r--drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c4
-rw-r--r--drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c6
-rw-r--r--drivers/gpu/drm/amd/amdgpu/gfx_v9_4_2.c14
-rw-r--r--drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c30
-rw-r--r--drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c2
-rw-r--r--drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c2
-rw-r--r--drivers/gpu/drm/amd/amdgpu/hdp_v4_0.c2
-rw-r--r--drivers/gpu/drm/amd/amdgpu/hdp_v5_0.c2
-rw-r--r--drivers/gpu/drm/amd/amdgpu/jpeg_v2_5.c74
-rw-r--r--drivers/gpu/drm/amd/amdgpu/jpeg_v2_5.h7
-rw-r--r--drivers/gpu/drm/amd/amdgpu/mmhub_v1_0.c2
-rw-r--r--drivers/gpu/drm/amd/amdgpu/mmhub_v1_7.c2
-rw-r--r--drivers/gpu/drm/amd/amdgpu/mmhub_v2_0.c2
-rw-r--r--drivers/gpu/drm/amd/amdgpu/mmhub_v2_3.c2
-rw-r--r--drivers/gpu/drm/amd/amdgpu/mmhub_v9_4.c2
-rw-r--r--drivers/gpu/drm/amd/amdgpu/navi10_ih.c2
-rw-r--r--drivers/gpu/drm/amd/amdgpu/nbio_v2_3.c2
-rw-r--r--drivers/gpu/drm/amd/amdgpu/nbio_v6_1.c2
-rw-r--r--drivers/gpu/drm/amd/amdgpu/nbio_v7_0.c2
-rw-r--r--drivers/gpu/drm/amd/amdgpu/nbio_v7_2.c2
-rw-r--r--drivers/gpu/drm/amd/amdgpu/nbio_v7_4.c2
-rw-r--r--drivers/gpu/drm/amd/amdgpu/nv.c2
-rw-r--r--drivers/gpu/drm/amd/amdgpu/sdma_v2_4.c4
-rw-r--r--drivers/gpu/drm/amd/amdgpu/sdma_v3_0.c10
-rw-r--r--drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c6
-rw-r--r--drivers/gpu/drm/amd/amdgpu/sdma_v5_0.c10
-rw-r--r--drivers/gpu/drm/amd/amdgpu/sdma_v5_2.c10
-rw-r--r--drivers/gpu/drm/amd/amdgpu/si_dma.c5
-rw-r--r--drivers/gpu/drm/amd/amdgpu/smuio_v11_0.c2
-rw-r--r--drivers/gpu/drm/amd/amdgpu/smuio_v11_0_6.c2
-rw-r--r--drivers/gpu/drm/amd/amdgpu/smuio_v13_0.c2
-rw-r--r--drivers/gpu/drm/amd/amdgpu/smuio_v9_0.c2
-rw-r--r--drivers/gpu/drm/amd/amdgpu/soc15.c15
-rw-r--r--drivers/gpu/drm/amd/amdgpu/soc15_common.h8
-rw-r--r--drivers/gpu/drm/amd/amdgpu/soc21.c620
-rw-r--r--drivers/gpu/drm/amd/amdgpu/soc21.h30
-rw-r--r--drivers/gpu/drm/amd/amdgpu/umc_v6_7.c106
-rw-r--r--drivers/gpu/drm/amd/amdgpu/uvd_v5_0.c2
-rw-r--r--drivers/gpu/drm/amd/amdgpu/uvd_v6_0.c2
-rw-r--r--drivers/gpu/drm/amd/amdgpu/vce_v3_0.c2
-rw-r--r--drivers/gpu/drm/amd/amdgpu/vcn_v2_5.c71
-rw-r--r--drivers/gpu/drm/amd/amdgpu/vcn_v2_5.h6
-rw-r--r--drivers/gpu/drm/amd/amdgpu/vi.c2
119 files changed, 4040 insertions, 2185 deletions
diff --git a/drivers/gpu/drm/amd/amdgpu/Makefile b/drivers/gpu/drm/amd/amdgpu/Makefile
index 40e2c6e2df79..2b454e7d7a76 100644
--- a/drivers/gpu/drm/amd/amdgpu/Makefile
+++ b/drivers/gpu/drm/amd/amdgpu/Makefile
@@ -49,7 +49,7 @@ amdgpu-y += amdgpu_device.o amdgpu_kms.o \
amdgpu_cs.o amdgpu_bios.o amdgpu_benchmark.o \
atombios_dp.o amdgpu_afmt.o amdgpu_trace_points.o \
atombios_encoders.o amdgpu_sa.o atombios_i2c.o \
- amdgpu_dma_buf.o amdgpu_vm.o amdgpu_ib.o amdgpu_pll.o \
+ amdgpu_dma_buf.o amdgpu_vm.o amdgpu_vm_pt.o amdgpu_ib.o amdgpu_pll.o \
amdgpu_ucode.o amdgpu_bo_list.o amdgpu_ctx.o amdgpu_sync.o \
amdgpu_gtt_mgr.o amdgpu_preempt_mgr.o amdgpu_vram_mgr.o amdgpu_virt.o \
amdgpu_atomfirmware.o amdgpu_vf_error.o amdgpu_sched.o \
@@ -58,7 +58,7 @@ amdgpu-y += amdgpu_device.o amdgpu_kms.o \
amdgpu_vm_sdma.o amdgpu_discovery.o amdgpu_ras_eeprom.o amdgpu_nbio.o \
amdgpu_umc.o smu_v11_0_i2c.o amdgpu_fru_eeprom.o amdgpu_rap.o \
amdgpu_fw_attestation.o amdgpu_securedisplay.o \
- amdgpu_eeprom.o amdgpu_mca.o
+ amdgpu_eeprom.o amdgpu_mca.o amdgpu_psp_ta.o
amdgpu-$(CONFIG_PROC_FS) += amdgpu_fdinfo.o
@@ -74,7 +74,7 @@ amdgpu-$(CONFIG_DRM_AMDGPU_SI)+= si.o gmc_v6_0.o gfx_v6_0.o si_ih.o si_dma.o dce
amdgpu-y += \
vi.o mxgpu_vi.o nbio_v6_1.o soc15.o emu_soc.o mxgpu_ai.o nbio_v7_0.o vega10_reg_init.o \
vega20_reg_init.o nbio_v7_4.o nbio_v2_3.o nv.o arct_reg_init.o mxgpu_nv.o \
- nbio_v7_2.o hdp_v4_0.o hdp_v5_0.o aldebaran_reg_init.o aldebaran.o
+ nbio_v7_2.o hdp_v4_0.o hdp_v5_0.o aldebaran_reg_init.o aldebaran.o soc21.o
# add DF block
amdgpu-y += \
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
index cdf0818088b3..d557f4db2565 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
@@ -179,7 +179,7 @@ extern int amdgpu_sched_jobs;
extern int amdgpu_sched_hw_submission;
extern uint amdgpu_pcie_gen_cap;
extern uint amdgpu_pcie_lane_cap;
-extern uint amdgpu_cg_mask;
+extern u64 amdgpu_cg_mask;
extern uint amdgpu_pg_mask;
extern uint amdgpu_sdma_phase_quantum;
extern char *amdgpu_disable_cu;
@@ -322,7 +322,7 @@ int amdgpu_device_ip_set_powergating_state(void *dev,
enum amd_ip_block_type block_type,
enum amd_powergating_state state);
void amdgpu_device_ip_get_clockgating_state(struct amdgpu_device *adev,
- u32 *flags);
+ u64 *flags);
int amdgpu_device_ip_wait_for_idle(struct amdgpu_device *adev,
enum amd_ip_block_type block_type);
bool amdgpu_device_ip_is_idle(struct amdgpu_device *adev,
@@ -666,10 +666,13 @@ enum amd_hw_ip_block_type {
MAX_HWIP
};
-#define HWIP_MAX_INSTANCE 10
+#define HWIP_MAX_INSTANCE 11
#define HW_ID_MAX 300
#define IP_VERSION(mj, mn, rv) (((mj) << 16) | ((mn) << 8) | (rv))
+#define IP_VERSION_MAJ(ver) ((ver) >> 16)
+#define IP_VERSION_MIN(ver) (((ver) >> 8) & 0xFF)
+#define IP_VERSION_REV(ver) ((ver) & 0xFF)
struct amd_powerplay {
void *pp_handle;
@@ -860,7 +863,7 @@ struct amdgpu_device {
/* powerplay */
struct amd_powerplay powerplay;
struct amdgpu_pm pm;
- u32 cg_flags;
+ u64 cg_flags;
u32 pg_flags;
/* nbio */
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c
index 6ca1db3c243f..64c6664b34e8 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c
@@ -724,3 +724,11 @@ void amdgpu_amdkfd_ras_poison_consumption_handler(struct amdgpu_device *adev, bo
else if (reset)
amdgpu_amdkfd_gpu_reset(adev);
}
+
+bool amdgpu_amdkfd_ras_query_utcl2_poison_status(struct amdgpu_device *adev)
+{
+ if (adev->gfx.ras && adev->gfx.ras->query_utcl2_poison_status)
+ return adev->gfx.ras->query_utcl2_poison_status(adev);
+ else
+ return false;
+}
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h
index 4cb14c2fe53f..f8b9f27adcf5 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h
@@ -273,9 +273,8 @@ int amdgpu_amdkfd_gpuvm_alloc_memory_of_gpu(
int amdgpu_amdkfd_gpuvm_free_memory_of_gpu(
struct amdgpu_device *adev, struct kgd_mem *mem, void *drm_priv,
uint64_t *size);
-int amdgpu_amdkfd_gpuvm_map_memory_to_gpu(
- struct amdgpu_device *adev, struct kgd_mem *mem, void *drm_priv,
- bool *table_freed);
+int amdgpu_amdkfd_gpuvm_map_memory_to_gpu(struct amdgpu_device *adev,
+ struct kgd_mem *mem, void *drm_priv);
int amdgpu_amdkfd_gpuvm_unmap_memory_from_gpu(
struct amdgpu_device *adev, struct kgd_mem *mem, void *drm_priv);
int amdgpu_amdkfd_gpuvm_sync_memory(
@@ -301,6 +300,7 @@ void amdgpu_amdkfd_ras_poison_consumption_handler(struct amdgpu_device *adev,
bool amdgpu_amdkfd_bo_mapped_to_dev(struct amdgpu_device *adev, struct kgd_mem *mem);
void amdgpu_amdkfd_block_mmu_notifications(void *p);
int amdgpu_amdkfd_criu_resume(void *p);
+bool amdgpu_amdkfd_ras_query_utcl2_poison_status(struct amdgpu_device *adev);
#if IS_ENABLED(CONFIG_HSA_AMD)
void amdgpu_amdkfd_gpuvm_init_mem_limits(void);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
index cd89d2e46852..80b6b8e432fd 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
@@ -253,53 +253,18 @@ void amdgpu_amdkfd_release_notify(struct amdgpu_bo *bo)
static int amdgpu_amdkfd_remove_eviction_fence(struct amdgpu_bo *bo,
struct amdgpu_amdkfd_fence *ef)
{
- struct dma_resv *resv = bo->tbo.base.resv;
- struct dma_resv_list *old, *new;
- unsigned int i, j, k;
+ struct dma_fence *replacement;
if (!ef)
return -EINVAL;
- old = dma_resv_shared_list(resv);
- if (!old)
- return 0;
-
- new = kmalloc(struct_size(new, shared, old->shared_max), GFP_KERNEL);
- if (!new)
- return -ENOMEM;
-
- /* Go through all the shared fences in the resevation object and sort
- * the interesting ones to the end of the list.
+ /* TODO: Instead of block before we should use the fence of the page
+ * table update and TLB flush here directly.
*/
- for (i = 0, j = old->shared_count, k = 0; i < old->shared_count; ++i) {
- struct dma_fence *f;
-
- f = rcu_dereference_protected(old->shared[i],
- dma_resv_held(resv));
-
- if (f->context == ef->base.context)
- RCU_INIT_POINTER(new->shared[--j], f);
- else
- RCU_INIT_POINTER(new->shared[k++], f);
- }
- new->shared_max = old->shared_max;
- new->shared_count = k;
-
- /* Install the new fence list, seqcount provides the barriers */
- write_seqcount_begin(&resv->seq);
- RCU_INIT_POINTER(resv->fence, new);
- write_seqcount_end(&resv->seq);
-
- /* Drop the references to the removed fences or move them to ef_list */
- for (i = j; i < old->shared_count; ++i) {
- struct dma_fence *f;
-
- f = rcu_dereference_protected(new->shared[i],
- dma_resv_held(resv));
- dma_fence_put(f);
- }
- kfree_rcu(old, rcu);
-
+ replacement = dma_fence_get_stub();
+ dma_resv_replace_fences(bo->tbo.base.resv, ef->base.context,
+ replacement, DMA_RESV_USAGE_READ);
+ dma_fence_put(replacement);
return 0;
}
@@ -1093,8 +1058,7 @@ static void unmap_bo_from_gpuvm(struct kgd_mem *mem,
static int update_gpuvm_pte(struct kgd_mem *mem,
struct kfd_mem_attachment *entry,
- struct amdgpu_sync *sync,
- bool *table_freed)
+ struct amdgpu_sync *sync)
{
struct amdgpu_bo_va *bo_va = entry->bo_va;
struct amdgpu_device *adev = entry->adev;
@@ -1105,7 +1069,7 @@ static int update_gpuvm_pte(struct kgd_mem *mem,
return ret;
/* Update the page tables */
- ret = amdgpu_vm_bo_update(adev, bo_va, false, table_freed);
+ ret = amdgpu_vm_bo_update(adev, bo_va, false);
if (ret) {
pr_err("amdgpu_vm_bo_update failed\n");
return ret;
@@ -1117,8 +1081,7 @@ static int update_gpuvm_pte(struct kgd_mem *mem,
static int map_bo_to_gpuvm(struct kgd_mem *mem,
struct kfd_mem_attachment *entry,
struct amdgpu_sync *sync,
- bool no_update_pte,
- bool *table_freed)
+ bool no_update_pte)
{
int ret;
@@ -1135,7 +1098,7 @@ static int map_bo_to_gpuvm(struct kgd_mem *mem,
if (no_update_pte)
return 0;
- ret = update_gpuvm_pte(mem, entry, sync, table_freed);
+ ret = update_gpuvm_pte(mem, entry, sync);
if (ret) {
pr_err("update_gpuvm_pte() failed\n");
goto update_gpuvm_pte_failed;
@@ -1268,7 +1231,7 @@ static int init_kfd_vm(struct amdgpu_vm *vm, void **process_info,
AMDGPU_FENCE_OWNER_KFD, false);
if (ret)
goto wait_pd_fail;
- ret = dma_resv_reserve_shared(vm->root.bo->tbo.base.resv, 1);
+ ret = dma_resv_reserve_fences(vm->root.bo->tbo.base.resv, 1);
if (ret)
goto reserve_shared_fail;
amdgpu_bo_fence(vm->root.bo,
@@ -1745,7 +1708,7 @@ int amdgpu_amdkfd_gpuvm_free_memory_of_gpu(
int amdgpu_amdkfd_gpuvm_map_memory_to_gpu(
struct amdgpu_device *adev, struct kgd_mem *mem,
- void *drm_priv, bool *table_freed)
+ void *drm_priv)
{
struct amdgpu_vm *avm = drm_priv_to_vm(drm_priv);
int ret;
@@ -1832,7 +1795,7 @@ int amdgpu_amdkfd_gpuvm_map_memory_to_gpu(
entry->va, entry->va + bo_size, entry);
ret = map_bo_to_gpuvm(mem, entry, ctx.sync,
- is_invalid_userptr, table_freed);
+ is_invalid_userptr);
if (ret) {
pr_err("Failed to map bo to gpuvm\n");
goto out_unreserve;
@@ -2300,7 +2263,7 @@ static int validate_invalid_user_pages(struct amdkfd_process_info *process_info)
continue;
kfd_mem_dmaunmap_attachment(mem, attachment);
- ret = update_gpuvm_pte(mem, attachment, &sync, NULL);
+ ret = update_gpuvm_pte(mem, attachment, &sync);
if (ret) {
pr_err("%s: update PTE failed\n", __func__);
/* make sure this gets validated again */
@@ -2482,6 +2445,8 @@ int amdgpu_amdkfd_gpuvm_restore_process_bos(void *info, struct dma_fence **ef)
struct amdgpu_bo *bo = mem->bo;
uint32_t domain = mem->domain;
struct kfd_mem_attachment *attachment;
+ struct dma_resv_iter cursor;
+ struct dma_fence *fence;
total_size += amdgpu_bo_size(bo);
@@ -2496,17 +2461,20 @@ int amdgpu_amdkfd_gpuvm_restore_process_bos(void *info, struct dma_fence **ef)
goto validate_map_fail;
}
}
- ret = amdgpu_sync_fence(&sync_obj, bo->tbo.moving);
- if (ret) {
- pr_debug("Memory eviction: Sync BO fence failed. Try again\n");
- goto validate_map_fail;
+ dma_resv_for_each_fence(&cursor, bo->tbo.base.resv,
+ DMA_RESV_USAGE_KERNEL, fence) {
+ ret = amdgpu_sync_fence(&sync_obj, fence);
+ if (ret) {
+ pr_debug("Memory eviction: Sync BO fence failed. Try again\n");
+ goto validate_map_fail;
+ }
}
list_for_each_entry(attachment, &mem->attachments, list) {
if (!attachment->is_mapped)
continue;
kfd_mem_dmaunmap_attachment(mem, attachment);
- ret = update_gpuvm_pte(mem, attachment, &sync_obj, NULL);
+ ret = update_gpuvm_pte(mem, attachment, &sync_obj);
if (ret) {
pr_debug("Memory eviction: update PTE failed. Try again\n");
goto validate_map_fail;
@@ -2606,7 +2574,7 @@ int amdgpu_amdkfd_add_gws_to_process(void *info, void *gws, struct kgd_mem **mem
* Add process eviction fence to bo so they can
* evict each other.
*/
- ret = dma_resv_reserve_shared(gws_bo->tbo.base.resv, 1);
+ ret = dma_resv_reserve_fences(gws_bo->tbo.base.resv, 1);
if (ret)
goto reserve_shared_fail;
amdgpu_bo_fence(gws_bo, &process_info->eviction_fence->base, true);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_atomfirmware.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_atomfirmware.c
index 4d4ddf026faf..494ca6a0f47a 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_atomfirmware.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_atomfirmware.c
@@ -162,12 +162,14 @@ union vram_info {
struct atom_vram_info_header_v2_4 v24;
struct atom_vram_info_header_v2_5 v25;
struct atom_vram_info_header_v2_6 v26;
+ struct atom_vram_info_header_v3_0 v30;
};
union vram_module {
struct atom_vram_module_v9 v9;
struct atom_vram_module_v10 v10;
struct atom_vram_module_v11 v11;
+ struct atom_vram_module_v3_0 v30;
};
static int convert_atom_mem_type_to_vram_type(struct amdgpu_device *adev,
@@ -294,88 +296,116 @@ amdgpu_atomfirmware_get_vram_info(struct amdgpu_device *adev,
vram_info = (union vram_info *)
(mode_info->atom_context->bios + data_offset);
module_id = (RREG32(adev->bios_scratch_reg_offset + 4) & 0x00ff0000) >> 16;
- switch (crev) {
- case 3:
- if (module_id > vram_info->v23.vram_module_num)
- module_id = 0;
- vram_module = (union vram_module *)vram_info->v23.vram_module;
- while (i < module_id) {
- vram_module = (union vram_module *)
- ((u8 *)vram_module + vram_module->v9.vram_module_size);
- i++;
- }
- mem_type = vram_module->v9.memory_type;
- if (vram_type)
- *vram_type = convert_atom_mem_type_to_vram_type(adev, mem_type);
- mem_channel_number = vram_module->v9.channel_num;
- mem_channel_width = vram_module->v9.channel_width;
- if (vram_width)
- *vram_width = mem_channel_number * (1 << mem_channel_width);
- mem_vendor = (vram_module->v9.vender_rev_id) & 0xF;
- if (vram_vendor)
- *vram_vendor = mem_vendor;
- break;
- case 4:
- if (module_id > vram_info->v24.vram_module_num)
- module_id = 0;
- vram_module = (union vram_module *)vram_info->v24.vram_module;
- while (i < module_id) {
- vram_module = (union vram_module *)
- ((u8 *)vram_module + vram_module->v10.vram_module_size);
- i++;
- }
- mem_type = vram_module->v10.memory_type;
- if (vram_type)
- *vram_type = convert_atom_mem_type_to_vram_type(adev, mem_type);
- mem_channel_number = vram_module->v10.channel_num;
- mem_channel_width = vram_module->v10.channel_width;
- if (vram_width)
- *vram_width = mem_channel_number * (1 << mem_channel_width);
- mem_vendor = (vram_module->v10.vender_rev_id) & 0xF;
- if (vram_vendor)
- *vram_vendor = mem_vendor;
- break;
- case 5:
- if (module_id > vram_info->v25.vram_module_num)
- module_id = 0;
- vram_module = (union vram_module *)vram_info->v25.vram_module;
- while (i < module_id) {
- vram_module = (union vram_module *)
- ((u8 *)vram_module + vram_module->v11.vram_module_size);
- i++;
+ if (frev == 3) {
+ switch (crev) {
+ /* v30 */
+ case 0:
+ vram_module = (union vram_module *)vram_info->v30.vram_module;
+ mem_vendor = (vram_module->v30.dram_vendor_id) & 0xF;
+ if (vram_vendor)
+ *vram_vendor = mem_vendor;
+ mem_type = vram_info->v30.memory_type;
+ if (vram_type)
+ *vram_type = convert_atom_mem_type_to_vram_type(adev, mem_type);
+ mem_channel_number = vram_info->v30.channel_num;
+ mem_channel_width = vram_info->v30.channel_width;
+ if (vram_width)
+ *vram_width = mem_channel_number * mem_channel_width;
+ break;
+ default:
+ return -EINVAL;
}
- mem_type = vram_module->v11.memory_type;
- if (vram_type)
- *vram_type = convert_atom_mem_type_to_vram_type(adev, mem_type);
- mem_channel_number = vram_module->v11.channel_num;
- mem_channel_width = vram_module->v11.channel_width;
- if (vram_width)
- *vram_width = mem_channel_number * (1 << mem_channel_width);
- mem_vendor = (vram_module->v11.vender_rev_id) & 0xF;
- if (vram_vendor)
- *vram_vendor = mem_vendor;
- break;
- case 6:
- if (module_id > vram_info->v26.vram_module_num)
- module_id = 0;
- vram_module = (union vram_module *)vram_info->v26.vram_module;
- while (i < module_id) {
- vram_module = (union vram_module *)
- ((u8 *)vram_module + vram_module->v9.vram_module_size);
- i++;
+ } else if (frev == 2) {
+ switch (crev) {
+ /* v23 */
+ case 3:
+ if (module_id > vram_info->v23.vram_module_num)
+ module_id = 0;
+ vram_module = (union vram_module *)vram_info->v23.vram_module;
+ while (i < module_id) {
+ vram_module = (union vram_module *)
+ ((u8 *)vram_module + vram_module->v9.vram_module_size);
+ i++;
+ }
+ mem_type = vram_module->v9.memory_type;
+ if (vram_type)
+ *vram_type = convert_atom_mem_type_to_vram_type(adev, mem_type);
+ mem_channel_number = vram_module->v9.channel_num;
+ mem_channel_width = vram_module->v9.channel_width;
+ if (vram_width)
+ *vram_width = mem_channel_number * (1 << mem_channel_width);
+ mem_vendor = (vram_module->v9.vender_rev_id) & 0xF;
+ if (vram_vendor)
+ *vram_vendor = mem_vendor;
+ break;
+ /* v24 */
+ case 4:
+ if (module_id > vram_info->v24.vram_module_num)
+ module_id = 0;
+ vram_module = (union vram_module *)vram_info->v24.vram_module;
+ while (i < module_id) {
+ vram_module = (union vram_module *)
+ ((u8 *)vram_module + vram_module->v10.vram_module_size);
+ i++;
+ }
+ mem_type = vram_module->v10.memory_type;
+ if (vram_type)
+ *vram_type = convert_atom_mem_type_to_vram_type(adev, mem_type);
+ mem_channel_number = vram_module->v10.channel_num;
+ mem_channel_width = vram_module->v10.channel_width;
+ if (vram_width)
+ *vram_width = mem_channel_number * (1 << mem_channel_width);
+ mem_vendor = (vram_module->v10.vender_rev_id) & 0xF;
+ if (vram_vendor)
+ *vram_vendor = mem_vendor;
+ break;
+ /* v25 */
+ case 5:
+ if (module_id > vram_info->v25.vram_module_num)
+ module_id = 0;
+ vram_module = (union vram_module *)vram_info->v25.vram_module;
+ while (i < module_id) {
+ vram_module = (union vram_module *)
+ ((u8 *)vram_module + vram_module->v11.vram_module_size);
+ i++;
+ }
+ mem_type = vram_module->v11.memory_type;
+ if (vram_type)
+ *vram_type = convert_atom_mem_type_to_vram_type(adev, mem_type);
+ mem_channel_number = vram_module->v11.channel_num;
+ mem_channel_width = vram_module->v11.channel_width;
+ if (vram_width)
+ *vram_width = mem_channel_number * (1 << mem_channel_width);
+ mem_vendor = (vram_module->v11.vender_rev_id) & 0xF;
+ if (vram_vendor)
+ *vram_vendor = mem_vendor;
+ break;
+ /* v26 */
+ case 6:
+ if (module_id > vram_info->v26.vram_module_num)
+ module_id = 0;
+ vram_module = (union vram_module *)vram_info->v26.vram_module;
+ while (i < module_id) {
+ vram_module = (union vram_module *)
+ ((u8 *)vram_module + vram_module->v9.vram_module_size);
+ i++;
+ }
+ mem_type = vram_module->v9.memory_type;
+ if (vram_type)
+ *vram_type = convert_atom_mem_type_to_vram_type(adev, mem_type);
+ mem_channel_number = vram_module->v9.channel_num;
+ mem_channel_width = vram_module->v9.channel_width;
+ if (vram_width)
+ *vram_width = mem_channel_number * (1 << mem_channel_width);
+ mem_vendor = (vram_module->v9.vender_rev_id) & 0xF;
+ if (vram_vendor)
+ *vram_vendor = mem_vendor;
+ break;
+ default:
+ return -EINVAL;
}
- mem_type = vram_module->v9.memory_type;
- if (vram_type)
- *vram_type = convert_atom_mem_type_to_vram_type(adev, mem_type);
- mem_channel_number = vram_module->v9.channel_num;
- mem_channel_width = vram_module->v9.channel_width;
- if (vram_width)
- *vram_width = mem_channel_number * (1 << mem_channel_width);
- mem_vendor = (vram_module->v9.vender_rev_id) & 0xF;
- if (vram_vendor)
- *vram_vendor = mem_vendor;
- break;
- default:
+ } else {
+ /* invalid frev */
return -EINVAL;
}
}
@@ -528,6 +558,13 @@ union smu_info {
struct atom_smu_info_v3_1 v31;
};
+union gfx_info {
+ struct atom_gfx_info_v2_2 v22;
+ struct atom_gfx_info_v2_4 v24;
+ struct atom_gfx_info_v2_7 v27;
+ struct atom_gfx_info_v3_0 v30;
+};
+
int amdgpu_atomfirmware_get_clock_info(struct amdgpu_device *adev)
{
struct amdgpu_mode_info *mode_info = &adev->mode_info;
@@ -609,22 +646,26 @@ int amdgpu_atomfirmware_get_clock_info(struct amdgpu_device *adev)
gfx_info);
if (amdgpu_atom_parse_data_header(mode_info->atom_context, index, NULL,
&frev, &crev, &data_offset)) {
- struct atom_gfx_info_v2_2 *gfx_info = (struct atom_gfx_info_v2_2*)
+ union gfx_info *gfx_info = (union gfx_info *)
(mode_info->atom_context->bios + data_offset);
- if ((frev == 2) && (crev >= 2))
- spll->reference_freq = le32_to_cpu(gfx_info->rlc_gpu_timer_refclk);
- ret = 0;
+ if ((frev == 3) ||
+ (frev == 2 && crev == 6)) {
+ spll->reference_freq = le32_to_cpu(gfx_info->v30.golden_tsc_count_lower_refclk);
+ ret = 0;
+ } else if ((frev == 2) &&
+ (crev >= 2) &&
+ (crev != 6)) {
+ spll->reference_freq = le32_to_cpu(gfx_info->v22.rlc_gpu_timer_refclk);
+ ret = 0;
+ } else {
+ BUG();
+ }
}
}
return ret;
}
-union gfx_info {
- struct atom_gfx_info_v2_4 v24;
- struct atom_gfx_info_v2_7 v27;
-};
-
int amdgpu_atomfirmware_get_gfx_info(struct amdgpu_device *adev)
{
struct amdgpu_mode_info *mode_info = &adev->mode_info;
@@ -638,42 +679,58 @@ int amdgpu_atomfirmware_get_gfx_info(struct amdgpu_device *adev)
&frev, &crev, &data_offset)) {
union gfx_info *gfx_info = (union gfx_info *)
(mode_info->atom_context->bios + data_offset);
- switch (crev) {
- case 4:
- adev->gfx.config.max_shader_engines = gfx_info->v24.max_shader_engines;
- adev->gfx.config.max_cu_per_sh = gfx_info->v24.max_cu_per_sh;
- adev->gfx.config.max_sh_per_se = gfx_info->v24.max_sh_per_se;
- adev->gfx.config.max_backends_per_se = gfx_info->v24.max_backends_per_se;
- adev->gfx.config.max_texture_channel_caches = gfx_info->v24.max_texture_channel_caches;
- adev->gfx.config.max_gprs = le16_to_cpu(gfx_info->v24.gc_num_gprs);
- adev->gfx.config.max_gs_threads = gfx_info->v24.gc_num_max_gs_thds;
- adev->gfx.config.gs_vgt_table_depth = gfx_info->v24.gc_gs_table_depth;
- adev->gfx.config.gs_prim_buffer_depth =
- le16_to_cpu(gfx_info->v24.gc_gsprim_buff_depth);
- adev->gfx.config.double_offchip_lds_buf =
- gfx_info->v24.gc_double_offchip_lds_buffer;
- adev->gfx.cu_info.wave_front_size = le16_to_cpu(gfx_info->v24.gc_wave_size);
- adev->gfx.cu_info.max_waves_per_simd = le16_to_cpu(gfx_info->v24.gc_max_waves_per_simd);
- adev->gfx.cu_info.max_scratch_slots_per_cu = gfx_info->v24.gc_max_scratch_slots_per_cu;
- adev->gfx.cu_info.lds_size = le16_to_cpu(gfx_info->v24.gc_lds_size);
- return 0;
- case 7:
- adev->gfx.config.max_shader_engines = gfx_info->v27.max_shader_engines;
- adev->gfx.config.max_cu_per_sh = gfx_info->v27.max_cu_per_sh;
- adev->gfx.config.max_sh_per_se = gfx_info->v27.max_sh_per_se;
- adev->gfx.config.max_backends_per_se = gfx_info->v27.max_backends_per_se;
- adev->gfx.config.max_texture_channel_caches = gfx_info->v27.max_texture_channel_caches;
- adev->gfx.config.max_gprs = le16_to_cpu(gfx_info->v27.gc_num_gprs);
- adev->gfx.config.max_gs_threads = gfx_info->v27.gc_num_max_gs_thds;
- adev->gfx.config.gs_vgt_table_depth = gfx_info->v27.gc_gs_table_depth;
- adev->gfx.config.gs_prim_buffer_depth = le16_to_cpu(gfx_info->v27.gc_gsprim_buff_depth);
- adev->gfx.config.double_offchip_lds_buf = gfx_info->v27.gc_double_offchip_lds_buffer;
- adev->gfx.cu_info.wave_front_size = le16_to_cpu(gfx_info->v27.gc_wave_size);
- adev->gfx.cu_info.max_waves_per_simd = le16_to_cpu(gfx_info->v27.gc_max_waves_per_simd);
- adev->gfx.cu_info.max_scratch_slots_per_cu = gfx_info->v27.gc_max_scratch_slots_per_cu;
- adev->gfx.cu_info.lds_size = le16_to_cpu(gfx_info->v27.gc_lds_size);
- return 0;
- default:
+ if (frev == 2) {
+ switch (crev) {
+ case 4:
+ adev->gfx.config.max_shader_engines = gfx_info->v24.max_shader_engines;
+ adev->gfx.config.max_cu_per_sh = gfx_info->v24.max_cu_per_sh;
+ adev->gfx.config.max_sh_per_se = gfx_info->v24.max_sh_per_se;
+ adev->gfx.config.max_backends_per_se = gfx_info->v24.max_backends_per_se;
+ adev->gfx.config.max_texture_channel_caches = gfx_info->v24.max_texture_channel_caches;
+ adev->gfx.config.max_gprs = le16_to_cpu(gfx_info->v24.gc_num_gprs);
+ adev->gfx.config.max_gs_threads = gfx_info->v24.gc_num_max_gs_thds;
+ adev->gfx.config.gs_vgt_table_depth = gfx_info->v24.gc_gs_table_depth;
+ adev->gfx.config.gs_prim_buffer_depth =
+ le16_to_cpu(gfx_info->v24.gc_gsprim_buff_depth);
+ adev->gfx.config.double_offchip_lds_buf =
+ gfx_info->v24.gc_double_offchip_lds_buffer;
+ adev->gfx.cu_info.wave_front_size = le16_to_cpu(gfx_info->v24.gc_wave_size);
+ adev->gfx.cu_info.max_waves_per_simd = le16_to_cpu(gfx_info->v24.gc_max_waves_per_simd);
+ adev->gfx.cu_info.max_scratch_slots_per_cu = gfx_info->v24.gc_max_scratch_slots_per_cu;
+ adev->gfx.cu_info.lds_size = le16_to_cpu(gfx_info->v24.gc_lds_size);
+ return 0;
+ case 7:
+ adev->gfx.config.max_shader_engines = gfx_info->v27.max_shader_engines;
+ adev->gfx.config.max_cu_per_sh = gfx_info->v27.max_cu_per_sh;
+ adev->gfx.config.max_sh_per_se = gfx_info->v27.max_sh_per_se;
+ adev->gfx.config.max_backends_per_se = gfx_info->v27.max_backends_per_se;
+ adev->gfx.config.max_texture_channel_caches = gfx_info->v27.max_texture_channel_caches;
+ adev->gfx.config.max_gprs = le16_to_cpu(gfx_info->v27.gc_num_gprs);
+ adev->gfx.config.max_gs_threads = gfx_info->v27.gc_num_max_gs_thds;
+ adev->gfx.config.gs_vgt_table_depth = gfx_info->v27.gc_gs_table_depth;
+ adev->gfx.config.gs_prim_buffer_depth = le16_to_cpu(gfx_info->v27.gc_gsprim_buff_depth);
+ adev->gfx.config.double_offchip_lds_buf = gfx_info->v27.gc_double_offchip_lds_buffer;
+ adev->gfx.cu_info.wave_front_size = le16_to_cpu(gfx_info->v27.gc_wave_size);
+ adev->gfx.cu_info.max_waves_per_simd = le16_to_cpu(gfx_info->v27.gc_max_waves_per_simd);
+ adev->gfx.cu_info.max_scratch_slots_per_cu = gfx_info->v27.gc_max_scratch_slots_per_cu;
+ adev->gfx.cu_info.lds_size = le16_to_cpu(gfx_info->v27.gc_lds_size);
+ return 0;
+ default:
+ return -EINVAL;
+ }
+ } else if (frev == 3) {
+ switch (crev) {
+ case 0:
+ adev->gfx.config.max_shader_engines = gfx_info->v30.max_shader_engines;
+ adev->gfx.config.max_cu_per_sh = gfx_info->v30.max_cu_per_sh;
+ adev->gfx.config.max_sh_per_se = gfx_info->v30.max_sh_per_se;
+ adev->gfx.config.max_backends_per_se = gfx_info->v30.max_backends_per_se;
+ adev->gfx.config.max_texture_channel_caches = gfx_info->v30.max_texture_channel_caches;
+ return 0;
+ default:
+ return -EINVAL;
+ }
+ } else {
return -EINVAL;
}
@@ -731,3 +788,67 @@ int amdgpu_atomfirmware_get_fw_reserved_fb_size(struct amdgpu_device *adev)
return fw_reserved_fb_size;
}
+
+/*
+ * Helper function to execute asic_init table
+ *
+ * @adev: amdgpu_device pointer
+ * @fb_reset: flag to indicate whether fb is reset or not
+ *
+ * Return 0 if succeed, otherwise failed
+ */
+int amdgpu_atomfirmware_asic_init(struct amdgpu_device *adev, bool fb_reset)
+{
+ struct amdgpu_mode_info *mode_info = &adev->mode_info;
+ struct atom_context *ctx;
+ uint8_t frev, crev;
+ uint16_t data_offset;
+ uint32_t bootup_sclk_in10khz, bootup_mclk_in10khz;
+ struct asic_init_ps_allocation_v2_1 asic_init_ps_v2_1;
+ int index;
+
+ if (!mode_info)
+ return -EINVAL;
+
+ ctx = mode_info->atom_context;
+ if (!ctx)
+ return -EINVAL;
+
+ /* query bootup sclk/mclk from firmware_info table */
+ index = get_index_into_master_table(atom_master_list_of_data_tables_v2_1,
+ firmwareinfo);
+ if (amdgpu_atom_parse_data_header(ctx, index, NULL,
+ &frev, &crev, &data_offset)) {
+ union firmware_info *firmware_info =
+ (union firmware_info *)(ctx->bios +
+ data_offset);
+
+ bootup_sclk_in10khz =
+ le32_to_cpu(firmware_info->v31.bootup_sclk_in10khz);
+ bootup_mclk_in10khz =
+ le32_to_cpu(firmware_info->v31.bootup_mclk_in10khz);
+ } else {
+ return -EINVAL;
+ }
+
+ index = get_index_into_master_table(atom_master_list_of_command_functions_v2_1,
+ asic_init);
+ if (amdgpu_atom_parse_cmd_header(mode_info->atom_context, index, &frev, &crev)) {
+ if (frev == 2 && crev >= 1) {
+ memset(&asic_init_ps_v2_1, 0, sizeof(asic_init_ps_v2_1));
+ asic_init_ps_v2_1.param.engineparam.sclkfreqin10khz = bootup_sclk_in10khz;
+ asic_init_ps_v2_1.param.memparam.mclkfreqin10khz = bootup_mclk_in10khz;
+ asic_init_ps_v2_1.param.engineparam.engineflag = b3NORMAL_ENGINE_INIT;
+ if (!fb_reset)
+ asic_init_ps_v2_1.param.memparam.memflag = b3DRAM_SELF_REFRESH_EXIT;
+ else
+ asic_init_ps_v2_1.param.memparam.memflag = 0;
+ } else {
+ return -EINVAL;
+ }
+ } else {
+ return -EINVAL;
+ }
+
+ return amdgpu_atom_execute_table(ctx, ATOM_CMD_INIT, (uint32_t *)&asic_init_ps_v2_1);
+}
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_atomfirmware.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_atomfirmware.h
index 751248b253de..c7eb2caec65a 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_atomfirmware.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_atomfirmware.h
@@ -40,5 +40,6 @@ bool amdgpu_atomfirmware_ras_rom_addr(struct amdgpu_device *adev, uint8_t* i2c_a
bool amdgpu_atomfirmware_mem_training_supported(struct amdgpu_device *adev);
bool amdgpu_atomfirmware_dynamic_boot_config_supported(struct amdgpu_device *adev);
int amdgpu_atomfirmware_get_fw_reserved_fb_size(struct amdgpu_device *adev);
+int amdgpu_atomfirmware_asic_init(struct amdgpu_device *adev, bool fb_reset);
#endif
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_bios.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_bios.c
index 0eddca795e96..e363f56c72af 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_bios.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_bios.c
@@ -471,6 +471,7 @@ bool amdgpu_soc15_read_bios_from_rom(struct amdgpu_device *adev,
{
u32 *dw_ptr;
u32 i, length_dw;
+ u32 rom_offset;
u32 rom_index_offset;
u32 rom_data_offset;
@@ -494,8 +495,16 @@ bool amdgpu_soc15_read_bios_from_rom(struct amdgpu_device *adev,
rom_data_offset =
adev->smuio.funcs->get_rom_data_offset(adev);
- /* set rom index to 0 */
- WREG32(rom_index_offset, 0);
+ if (adev->nbio.funcs &&
+ adev->nbio.funcs->get_rom_offset) {
+ rom_offset = adev->nbio.funcs->get_rom_offset(adev);
+ rom_offset = rom_offset << 17;
+ } else {
+ rom_offset = 0;
+ }
+
+ /* set rom index to rom_offset */
+ WREG32(rom_index_offset, rom_offset);
/* read out the rom data */
for (i = 0; i < length_dw; i++)
dw_ptr[i] = RREG32(rom_data_offset);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.h
index 044b41f0bfd9..529d52a204cf 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.h
@@ -34,7 +34,6 @@ struct amdgpu_fpriv;
struct amdgpu_bo_list_entry {
struct ttm_validate_buffer tv;
struct amdgpu_bo_va *bo_va;
- struct dma_fence_chain *chain;
uint32_t priority;
struct page **user_pages;
bool user_invalidated;
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_connectors.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_connectors.c
index 673078faa27a..b7933c2ce765 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_connectors.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_connectors.c
@@ -24,9 +24,9 @@
* Alex Deucher
*/
+#include <drm/display/drm_dp_helper.h>
#include <drm/drm_edid.h>
#include <drm/drm_fb_helper.h>
-#include <drm/dp/drm_dp_helper.h>
#include <drm/drm_probe_helper.h>
#include <drm/amdgpu_drm.h>
#include "amdgpu.h"
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
index 970b065e9a6b..2982b543c27f 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
@@ -55,8 +55,8 @@ static int amdgpu_cs_user_fence_chunk(struct amdgpu_cs_parser *p,
bo = amdgpu_bo_ref(gem_to_amdgpu_bo(gobj));
p->uf_entry.priority = 0;
p->uf_entry.tv.bo = &bo->tbo;
- /* One for TTM and one for the CS job */
- p->uf_entry.tv.num_shared = 2;
+ /* One for TTM and two for the CS job */
+ p->uf_entry.tv.num_shared = 3;
drm_gem_object_put(gobj);
@@ -128,6 +128,8 @@ static int amdgpu_cs_parser_init(struct amdgpu_cs_parser *p, union drm_amdgpu_cs
goto free_chunk;
}
+ mutex_lock(&p->ctx->lock);
+
/* skip guilty context job */
if (atomic_read(&p->ctx->guilty) == 1) {
ret = -ECANCELED;
@@ -550,7 +552,7 @@ static int amdgpu_cs_parser_bos(struct amdgpu_cs_parser *p,
if (r) {
kvfree(e->user_pages);
e->user_pages = NULL;
- return r;
+ goto out_free_user_pages;
}
for (i = 0; i < bo->tbo.ttm->num_pages; i++) {
@@ -567,21 +569,13 @@ static int amdgpu_cs_parser_bos(struct amdgpu_cs_parser *p,
if (unlikely(r != 0)) {
if (r != -ERESTARTSYS)
DRM_ERROR("ttm_eu_reserve_buffers failed.\n");
- goto out;
+ goto out_free_user_pages;
}
amdgpu_bo_list_for_each_entry(e, p->bo_list) {
struct amdgpu_bo *bo = ttm_to_amdgpu_bo(e->tv.bo);
e->bo_va = amdgpu_vm_bo_find(vm, bo);
-
- if (bo->tbo.base.dma_buf && !amdgpu_bo_explicit_sync(bo)) {
- e->chain = dma_fence_chain_alloc();
- if (!e->chain) {
- r = -ENOMEM;
- goto error_validate;
- }
- }
}
/* Move fence waiting after getting reservation lock of
@@ -642,14 +636,21 @@ static int amdgpu_cs_parser_bos(struct amdgpu_cs_parser *p,
}
error_validate:
+ if (r)
+ ttm_eu_backoff_reservation(&p->ticket, &p->validated);
+
+out_free_user_pages:
if (r) {
- amdgpu_bo_list_for_each_entry(e, p->bo_list) {
- dma_fence_chain_free(e->chain);
- e->chain = NULL;
+ amdgpu_bo_list_for_each_userptr_entry(e, p->bo_list) {
+ struct amdgpu_bo *bo = ttm_to_amdgpu_bo(e->tv.bo);
+
+ if (!e->user_pages)
+ continue;
+ amdgpu_ttm_tt_get_user_pages_done(bo->tbo.ttm);
+ kvfree(e->user_pages);
+ e->user_pages = NULL;
}
- ttm_eu_backoff_reservation(&p->ticket, &p->validated);
}
-out:
return r;
}
@@ -688,17 +689,9 @@ static void amdgpu_cs_parser_fini(struct amdgpu_cs_parser *parser, int error,
{
unsigned i;
- if (error && backoff) {
- struct amdgpu_bo_list_entry *e;
-
- amdgpu_bo_list_for_each_entry(e, parser->bo_list) {
- dma_fence_chain_free(e->chain);
- e->chain = NULL;
- }
-
+ if (error && backoff)
ttm_eu_backoff_reservation(&parser->ticket,
&parser->validated);
- }
for (i = 0; i < parser->num_post_deps; i++) {
drm_syncobj_put(parser->post_deps[i].syncobj);
@@ -709,6 +702,7 @@ static void amdgpu_cs_parser_fini(struct amdgpu_cs_parser *parser, int error,
dma_fence_put(parser->fence);
if (parser->ctx) {
+ mutex_unlock(&parser->ctx->lock);
amdgpu_ctx_put(parser->ctx);
}
if (parser->bo_list)
@@ -806,22 +800,22 @@ static int amdgpu_cs_vm_handling(struct amdgpu_cs_parser *p)
if (r)
return r;
- r = amdgpu_vm_bo_update(adev, fpriv->prt_va, false, NULL);
+ r = amdgpu_vm_bo_update(adev, fpriv->prt_va, false);
if (r)
return r;
- r = amdgpu_sync_vm_fence(&p->job->sync, fpriv->prt_va->last_pt_update);
+ r = amdgpu_sync_fence(&p->job->sync, fpriv->prt_va->last_pt_update);
if (r)
return r;
if (amdgpu_mcbp || amdgpu_sriov_vf(adev)) {
bo_va = fpriv->csa_va;
BUG_ON(!bo_va);
- r = amdgpu_vm_bo_update(adev, bo_va, false, NULL);
+ r = amdgpu_vm_bo_update(adev, bo_va, false);
if (r)
return r;
- r = amdgpu_sync_vm_fence(&p->job->sync, bo_va->last_pt_update);
+ r = amdgpu_sync_fence(&p->job->sync, bo_va->last_pt_update);
if (r)
return r;
}
@@ -836,11 +830,11 @@ static int amdgpu_cs_vm_handling(struct amdgpu_cs_parser *p)
if (bo_va == NULL)
continue;
- r = amdgpu_vm_bo_update(adev, bo_va, false, NULL);
+ r = amdgpu_vm_bo_update(adev, bo_va, false);
if (r)
return r;
- r = amdgpu_sync_vm_fence(&p->job->sync, bo_va->last_pt_update);
+ r = amdgpu_sync_fence(&p->job->sync, bo_va->last_pt_update);
if (r)
return r;
}
@@ -853,7 +847,7 @@ static int amdgpu_cs_vm_handling(struct amdgpu_cs_parser *p)
if (r)
return r;
- r = amdgpu_sync_vm_fence(&p->job->sync, vm->last_update);
+ r = amdgpu_sync_fence(&p->job->sync, vm->last_update);
if (r)
return r;
@@ -1157,6 +1151,9 @@ static int amdgpu_cs_dependencies(struct amdgpu_device *adev,
{
int i, r;
+ /* TODO: Investigate why we still need the context lock */
+ mutex_unlock(&p->ctx->lock);
+
for (i = 0; i < p->nchunks; ++i) {
struct amdgpu_cs_chunk *chunk;
@@ -1167,32 +1164,34 @@ static int amdgpu_cs_dependencies(struct amdgpu_device *adev,
case AMDGPU_CHUNK_ID_SCHEDULED_DEPENDENCIES:
r = amdgpu_cs_process_fence_dep(p, chunk);
if (r)
- return r;
+ goto out;
break;
case AMDGPU_CHUNK_ID_SYNCOBJ_IN:
r = amdgpu_cs_process_syncobj_in_dep(p, chunk);
if (r)
- return r;
+ goto out;
break;
case AMDGPU_CHUNK_ID_SYNCOBJ_OUT:
r = amdgpu_cs_process_syncobj_out_dep(p, chunk);
if (r)
- return r;
+ goto out;
break;
case AMDGPU_CHUNK_ID_SYNCOBJ_TIMELINE_WAIT:
r = amdgpu_cs_process_syncobj_timeline_in_dep(p, chunk);
if (r)
- return r;
+ goto out;
break;
case AMDGPU_CHUNK_ID_SYNCOBJ_TIMELINE_SIGNAL:
r = amdgpu_cs_process_syncobj_timeline_out_dep(p, chunk);
if (r)
- return r;
+ goto out;
break;
}
}
- return 0;
+out:
+ mutex_lock(&p->ctx->lock);
+ return r;
}
static void amdgpu_cs_post_dependencies(struct amdgpu_cs_parser *p)
@@ -1272,24 +1271,9 @@ static int amdgpu_cs_submit(struct amdgpu_cs_parser *p,
amdgpu_vm_move_to_lru_tail(p->adev, &fpriv->vm);
- amdgpu_bo_list_for_each_entry(e, p->bo_list) {
- struct dma_resv *resv = e->tv.bo->base.resv;
- struct dma_fence_chain *chain = e->chain;
-
- if (!chain)
- continue;
-
- /*
- * Work around dma_resv shortcomings by wrapping up the
- * submission in a dma_fence_chain and add it as exclusive
- * fence.
- */
- dma_fence_chain_init(chain, dma_resv_excl_fence(resv),
- dma_fence_get(p->fence), 1);
-
- rcu_assign_pointer(resv->fence_excl, &chain->base);
- e->chain = NULL;
- }
+ /* Make sure all BOs are remembered as writers */
+ amdgpu_bo_list_for_each_entry(e, p->bo_list)
+ e->tv.num_shared = 0;
ttm_eu_fence_buffer_objects(&p->ticket, &p->validated, p->fence);
mutex_unlock(&p->adev->notifier_lock);
@@ -1368,6 +1352,7 @@ int amdgpu_cs_ioctl(struct drm_device *dev, void *data, struct drm_file *filp)
goto out;
r = amdgpu_cs_submit(&parser, cs);
+
out:
amdgpu_cs_parser_fini(&parser, r, reserved_buffers);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c
index 5981c7d9bd48..8f0e6d93bb9c 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c
@@ -237,6 +237,7 @@ static int amdgpu_ctx_init(struct amdgpu_device *adev,
kref_init(&ctx->refcount);
spin_lock_init(&ctx->ring_lock);
+ mutex_init(&ctx->lock);
ctx->reset_counter = atomic_read(&adev->gpu_reset_counter);
ctx->reset_counter_query = ctx->reset_counter;
@@ -357,6 +358,7 @@ static void amdgpu_ctx_fini(struct kref *ref)
drm_dev_exit(idx);
}
+ mutex_destroy(&ctx->lock);
kfree(ctx);
}
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.h
index d0cbfcea90f7..142f2f87d44c 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.h
@@ -49,6 +49,7 @@ struct amdgpu_ctx {
bool preamble_presented;
int32_t init_priority;
int32_t override_priority;
+ struct mutex lock;
atomic_t guilty;
unsigned long ras_counter_ce;
unsigned long ras_counter_ue;
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c
index 5d04d24a0d5f..eedb12f6b8a3 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c
@@ -38,6 +38,7 @@
#include "amdgpu_umr.h"
#include "amdgpu_reset.h"
+#include "amdgpu_psp_ta.h"
#if defined(CONFIG_DEBUG_FS)
@@ -730,7 +731,7 @@ static ssize_t amdgpu_debugfs_gca_config_read(struct file *f, char __user *buf,
return -ENOMEM;
/* version, increment each time something is added */
- config[no_regs++] = 4;
+ config[no_regs++] = 5;
config[no_regs++] = adev->gfx.config.max_shader_engines;
config[no_regs++] = adev->gfx.config.max_tile_pipes;
config[no_regs++] = adev->gfx.config.max_cu_per_sh;
@@ -757,8 +758,8 @@ static ssize_t amdgpu_debugfs_gca_config_read(struct file *f, char __user *buf,
/* rev==1 */
config[no_regs++] = adev->rev_id;
- config[no_regs++] = adev->pg_flags;
- config[no_regs++] = adev->cg_flags;
+ config[no_regs++] = lower_32_bits(adev->pg_flags);
+ config[no_regs++] = lower_32_bits(adev->cg_flags);
/* rev==2 */
config[no_regs++] = adev->family;
@@ -773,6 +774,10 @@ static ssize_t amdgpu_debugfs_gca_config_read(struct file *f, char __user *buf,
/* rev==4 APU flag */
config[no_regs++] = adev->flags & AMD_IS_APU ? 1 : 0;
+ /* rev==5 PG/CG flag upper 32bit */
+ config[no_regs++] = upper_32_bits(adev->pg_flags);
+ config[no_regs++] = upper_32_bits(adev->cg_flags);
+
while (size && (*pos < no_regs * 4)) {
uint32_t value;
@@ -1763,6 +1768,7 @@ int amdgpu_debugfs_init(struct amdgpu_device *adev)
DRM_ERROR("registering register debugfs failed (%d).\n", r);
amdgpu_debugfs_firmware_init(adev);
+ amdgpu_ta_if_debugfs_init(adev);
#if defined(CONFIG_DRM_AMD_DC)
if (amdgpu_device_has_dc_support(adev))
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
index 49f734137f15..53d938d5a00a 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
@@ -913,7 +913,10 @@ static int amdgpu_device_asic_init(struct amdgpu_device *adev)
{
amdgpu_asic_pre_asic_init(adev);
- return amdgpu_atom_asic_init(adev->mode_info.atom_context);
+ if (adev->ip_versions[GC_HWIP][0] >= IP_VERSION(11, 0, 0))
+ return amdgpu_atomfirmware_asic_init(adev, true);
+ else
+ return amdgpu_atom_asic_init(adev->mode_info.atom_context);
}
/**
@@ -1703,7 +1706,7 @@ int amdgpu_device_ip_set_powergating_state(void *dev,
* clockgating is enabled.
*/
void amdgpu_device_ip_get_clockgating_state(struct amdgpu_device *adev,
- u32 *flags)
+ u64 *flags)
{
int i;
@@ -1926,11 +1929,9 @@ static int amdgpu_device_parse_gpu_info_fw(struct amdgpu_device *adev)
adev->firmware.gpu_info_fw = NULL;
if (adev->mman.discovery_bin) {
- amdgpu_discovery_get_gfx_info(adev);
-
/*
* FIXME: The bounding box is still needed by Navi12, so
- * temporarily read it from gpu_info firmware. Should be droped
+ * temporarily read it from gpu_info firmware. Should be dropped
* when DAL no longer needs it.
*/
if (adev->asic_type != CHIP_NAVI12)
@@ -3700,7 +3701,7 @@ int amdgpu_device_init(struct amdgpu_device *adev,
/* enable PCIE atomic ops */
if (amdgpu_sriov_vf(adev))
adev->have_atomics_support = ((struct amd_sriov_msg_pf2vf_info *)
- adev->virt.fw_reserve.p_pf2vf)->pcie_atomic_ops_enabled_flags ==
+ adev->virt.fw_reserve.p_pf2vf)->pcie_atomic_ops_support_flags ==
(PCI_EXP_DEVCAP2_ATOMIC_COMP32 | PCI_EXP_DEVCAP2_ATOMIC_COMP64);
else
adev->have_atomics_support =
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_df.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_df.h
index 6b25837955c4..1538b2dbfff1 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_df.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_df.h
@@ -40,7 +40,7 @@ struct amdgpu_df_funcs {
void (*update_medium_grain_clock_gating)(struct amdgpu_device *adev,
bool enable);
void (*get_clockgating_state)(struct amdgpu_device *adev,
- u32 *flags);
+ u64 *flags);
void (*enable_ecc_force_par_wr_rmw)(struct amdgpu_device *adev,
bool enable);
int (*pmc_start)(struct amdgpu_device *adev, uint64_t config,
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.c
index e4fcbb385a62..0c359ad9fd63 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.c
@@ -271,8 +271,6 @@ static int amdgpu_discovery_init(struct amdgpu_device *adev)
{
struct table_info *info;
struct binary_header *bhdr;
- struct ip_discovery_header *ihdr;
- struct gpu_info_header *ghdr;
uint16_t offset;
uint16_t size;
uint16_t checksum;
@@ -290,7 +288,7 @@ static int amdgpu_discovery_init(struct amdgpu_device *adev)
goto out;
}
- if(!amdgpu_discovery_verify_binary_signature(adev->mman.discovery_bin)) {
+ if (!amdgpu_discovery_verify_binary_signature(adev->mman.discovery_bin)) {
dev_warn(adev->dev, "get invalid ip discovery binary signature from vram\n");
/* retry read ip discovery binary from file */
r = amdgpu_discovery_read_binary_from_file(adev, adev->mman.discovery_bin);
@@ -324,31 +322,110 @@ static int amdgpu_discovery_init(struct amdgpu_device *adev)
info = &bhdr->table_list[IP_DISCOVERY];
offset = le16_to_cpu(info->offset);
checksum = le16_to_cpu(info->checksum);
- ihdr = (struct ip_discovery_header *)(adev->mman.discovery_bin + offset);
- if (le32_to_cpu(ihdr->signature) != DISCOVERY_TABLE_SIGNATURE) {
- dev_err(adev->dev, "invalid ip discovery data table signature\n");
- r = -EINVAL;
- goto out;
- }
+ if (offset) {
+ struct ip_discovery_header *ihdr =
+ (struct ip_discovery_header *)(adev->mman.discovery_bin + offset);
+ if (le32_to_cpu(ihdr->signature) != DISCOVERY_TABLE_SIGNATURE) {
+ dev_err(adev->dev, "invalid ip discovery data table signature\n");
+ r = -EINVAL;
+ goto out;
+ }
- if (!amdgpu_discovery_verify_checksum(adev->mman.discovery_bin + offset,
- le16_to_cpu(ihdr->size), checksum)) {
- dev_err(adev->dev, "invalid ip discovery data table checksum\n");
- r = -EINVAL;
- goto out;
+ if (!amdgpu_discovery_verify_checksum(adev->mman.discovery_bin + offset,
+ le16_to_cpu(ihdr->size), checksum)) {
+ dev_err(adev->dev, "invalid ip discovery data table checksum\n");
+ r = -EINVAL;
+ goto out;
+ }
}
info = &bhdr->table_list[GC];
offset = le16_to_cpu(info->offset);
checksum = le16_to_cpu(info->checksum);
- ghdr = (struct gpu_info_header *)(adev->mman.discovery_bin + offset);
- if (!amdgpu_discovery_verify_checksum(adev->mman.discovery_bin + offset,
- le32_to_cpu(ghdr->size), checksum)) {
- dev_err(adev->dev, "invalid gc data table checksum\n");
- r = -EINVAL;
- goto out;
+ if (offset) {
+ struct gpu_info_header *ghdr =
+ (struct gpu_info_header *)(adev->mman.discovery_bin + offset);
+
+ if (le32_to_cpu(ghdr->table_id) != GC_TABLE_ID) {
+ dev_err(adev->dev, "invalid ip discovery gc table id\n");
+ r = -EINVAL;
+ goto out;
+ }
+
+ if (!amdgpu_discovery_verify_checksum(adev->mman.discovery_bin + offset,
+ le32_to_cpu(ghdr->size), checksum)) {
+ dev_err(adev->dev, "invalid gc data table checksum\n");
+ r = -EINVAL;
+ goto out;
+ }
+ }
+
+ info = &bhdr->table_list[HARVEST_INFO];
+ offset = le16_to_cpu(info->offset);
+ checksum = le16_to_cpu(info->checksum);
+
+ if (offset) {
+ struct harvest_info_header *hhdr =
+ (struct harvest_info_header *)(adev->mman.discovery_bin + offset);
+
+ if (le32_to_cpu(hhdr->signature) != HARVEST_TABLE_SIGNATURE) {
+ dev_err(adev->dev, "invalid ip discovery harvest table signature\n");
+ r = -EINVAL;
+ goto out;
+ }
+
+ if (!amdgpu_discovery_verify_checksum(adev->mman.discovery_bin + offset,
+ sizeof(struct harvest_table), checksum)) {
+ dev_err(adev->dev, "invalid harvest data table checksum\n");
+ r = -EINVAL;
+ goto out;
+ }
+ }
+
+ info = &bhdr->table_list[VCN_INFO];
+ offset = le16_to_cpu(info->offset);
+ checksum = le16_to_cpu(info->checksum);
+
+ if (offset) {
+ struct vcn_info_header *vhdr =
+ (struct vcn_info_header *)(adev->mman.discovery_bin + offset);
+
+ if (le32_to_cpu(vhdr->table_id) != VCN_INFO_TABLE_ID) {
+ dev_err(adev->dev, "invalid ip discovery vcn table id\n");
+ r = -EINVAL;
+ goto out;
+ }
+
+ if (!amdgpu_discovery_verify_checksum(adev->mman.discovery_bin + offset,
+ le32_to_cpu(vhdr->size_bytes), checksum)) {
+ dev_err(adev->dev, "invalid vcn data table checksum\n");
+ r = -EINVAL;
+ goto out;
+ }
+ }
+
+ info = &bhdr->table_list[MALL_INFO];
+ offset = le16_to_cpu(info->offset);
+ checksum = le16_to_cpu(info->checksum);
+
+ if (0 && offset) {
+ struct mall_info_header *mhdr =
+ (struct mall_info_header *)(adev->mman.discovery_bin + offset);
+
+ if (le32_to_cpu(mhdr->table_id) != MALL_INFO_TABLE_ID) {
+ dev_err(adev->dev, "invalid ip discovery mall table id\n");
+ r = -EINVAL;
+ goto out;
+ }
+
+ if (!amdgpu_discovery_verify_checksum(adev->mman.discovery_bin + offset,
+ le32_to_cpu(mhdr->size_bytes), checksum)) {
+ dev_err(adev->dev, "invalid mall data table checksum\n");
+ r = -EINVAL;
+ goto out;
+ }
}
return 0;
@@ -430,21 +507,30 @@ static void amdgpu_discovery_read_harvest_bit_per_ip(struct amdgpu_device *adev,
}
}
next_ip:
- ip_offset += sizeof(*ip) + 4 * (ip->num_base_address - 1);
+ ip_offset += struct_size(ip, base_address, ip->num_base_address);
}
}
}
static void amdgpu_discovery_read_from_harvest_table(struct amdgpu_device *adev,
- uint32_t *vcn_harvest_count)
+ uint32_t *vcn_harvest_count,
+ uint32_t *umc_harvest_count)
{
struct binary_header *bhdr;
struct harvest_table *harvest_info;
+ u16 offset;
int i;
bhdr = (struct binary_header *)adev->mman.discovery_bin;
- harvest_info = (struct harvest_table *)(adev->mman.discovery_bin +
- le16_to_cpu(bhdr->table_list[HARVEST_INFO].offset));
+ offset = le16_to_cpu(bhdr->table_list[HARVEST_INFO].offset);
+
+ if (!offset) {
+ dev_err(adev->dev, "invalid harvest table offset\n");
+ return;
+ }
+
+ harvest_info = (struct harvest_table *)(adev->mman.discovery_bin + offset);
+
for (i = 0; i < 32; i++) {
if (le16_to_cpu(harvest_info->list[i].hw_id) == 0)
break;
@@ -460,6 +546,9 @@ static void amdgpu_discovery_read_from_harvest_table(struct amdgpu_device *adev,
case DMU_HWID:
adev->harvest_ip_mask |= AMD_HARVEST_IP_DMU_MASK;
break;
+ case UMC_HWID:
+ (*umc_harvest_count)++;
+ break;
default:
break;
}
@@ -798,7 +887,7 @@ static int amdgpu_discovery_sysfs_ips(struct amdgpu_device *adev,
res = kobject_add(&ip_hw_instance->kobj, NULL,
"%d", ip_hw_instance->num_instance);
next_ip:
- ip_offset += sizeof(*ip) + 4 * (ip->num_base_address - 1);
+ ip_offset += struct_size(ip, base_address, ip->num_base_address);
}
}
@@ -957,7 +1046,7 @@ static void amdgpu_discovery_sysfs_fini(struct amdgpu_device *adev)
/* ================================================== */
-int amdgpu_discovery_reg_base_init(struct amdgpu_device *adev)
+static int amdgpu_discovery_reg_base_init(struct amdgpu_device *adev)
{
struct binary_header *bhdr;
struct ip_discovery_header *ihdr;
@@ -1033,6 +1122,9 @@ int amdgpu_discovery_reg_base_init(struct amdgpu_device *adev)
le16_to_cpu(ip->hw_id) == SDMA3_HWID)
adev->sdma.num_instances++;
+ if (le16_to_cpu(ip->hw_id) == UMC_HWID)
+ adev->gmc.num_umc++;
+
for (k = 0; k < num_base_address; k++) {
/*
* convert the endianness of base addresses in place,
@@ -1063,7 +1155,7 @@ int amdgpu_discovery_reg_base_init(struct amdgpu_device *adev)
}
next_ip:
- ip_offset += sizeof(*ip) + 4 * (ip->num_base_address - 1);
+ ip_offset += struct_size(ip, base_address, ip->num_base_address);
}
}
@@ -1113,16 +1205,17 @@ int amdgpu_discovery_get_ip_version(struct amdgpu_device *adev, int hw_id, int n
*revision = ip->revision;
return 0;
}
- ip_offset += sizeof(*ip) + 4 * (ip->num_base_address - 1);
+ ip_offset += struct_size(ip, base_address, ip->num_base_address);
}
}
return -EINVAL;
}
-void amdgpu_discovery_harvest_ip(struct amdgpu_device *adev)
+static void amdgpu_discovery_harvest_ip(struct amdgpu_device *adev)
{
int vcn_harvest_count = 0;
+ int umc_harvest_count = 0;
/*
* Harvest table does not fit Navi1x and legacy GPUs,
@@ -1141,7 +1234,8 @@ void amdgpu_discovery_harvest_ip(struct amdgpu_device *adev)
&vcn_harvest_count);
} else {
amdgpu_discovery_read_from_harvest_table(adev,
- &vcn_harvest_count);
+ &vcn_harvest_count,
+ &umc_harvest_count);
}
amdgpu_discovery_harvest_config_quirk(adev);
@@ -1150,24 +1244,24 @@ void amdgpu_discovery_harvest_ip(struct amdgpu_device *adev)
adev->harvest_ip_mask |= AMD_HARVEST_IP_VCN_MASK;
adev->harvest_ip_mask |= AMD_HARVEST_IP_JPEG_MASK;
}
- if ((adev->pdev->device == 0x731E &&
- (adev->pdev->revision == 0xC6 || adev->pdev->revision == 0xC7)) ||
- (adev->pdev->device == 0x7340 && adev->pdev->revision == 0xC9) ||
- (adev->pdev->device == 0x7360 && adev->pdev->revision == 0xC7)) {
- adev->harvest_ip_mask |= AMD_HARVEST_IP_VCN_MASK;
- adev->harvest_ip_mask |= AMD_HARVEST_IP_JPEG_MASK;
+
+ if (umc_harvest_count < adev->gmc.num_umc) {
+ adev->gmc.num_umc -= umc_harvest_count;
}
}
union gc_info {
struct gc_info_v1_0 v1;
+ struct gc_info_v1_1 v1_1;
+ struct gc_info_v1_2 v1_2;
struct gc_info_v2_0 v2;
};
-int amdgpu_discovery_get_gfx_info(struct amdgpu_device *adev)
+static int amdgpu_discovery_get_gfx_info(struct amdgpu_device *adev)
{
struct binary_header *bhdr;
union gc_info *gc_info;
+ u16 offset;
if (!adev->mman.discovery_bin) {
DRM_ERROR("ip discovery uninitialized\n");
@@ -1175,9 +1269,14 @@ int amdgpu_discovery_get_gfx_info(struct amdgpu_device *adev)
}
bhdr = (struct binary_header *)adev->mman.discovery_bin;
- gc_info = (union gc_info *)(adev->mman.discovery_bin +
- le16_to_cpu(bhdr->table_list[GC].offset));
- switch (gc_info->v1.header.version_major) {
+ offset = le16_to_cpu(bhdr->table_list[GC].offset);
+
+ if (!offset)
+ return 0;
+
+ gc_info = (union gc_info *)(adev->mman.discovery_bin + offset);
+
+ switch (le16_to_cpu(gc_info->v1.header.version_major)) {
case 1:
adev->gfx.config.max_shader_engines = le32_to_cpu(gc_info->v1.gc_num_se);
adev->gfx.config.max_cu_per_sh = 2 * (le32_to_cpu(gc_info->v1.gc_num_wgp0_per_sa) +
@@ -1197,6 +1296,21 @@ int amdgpu_discovery_get_gfx_info(struct amdgpu_device *adev)
adev->gfx.config.num_sc_per_sh = le32_to_cpu(gc_info->v1.gc_num_sc_per_se) /
le32_to_cpu(gc_info->v1.gc_num_sa_per_se);
adev->gfx.config.num_packer_per_sc = le32_to_cpu(gc_info->v1.gc_num_packer_per_sc);
+ if (gc_info->v1.header.version_minor >= 1) {
+ adev->gfx.config.gc_num_tcp_per_sa = le32_to_cpu(gc_info->v1_1.gc_num_tcp_per_sa);
+ adev->gfx.config.gc_num_sdp_interface = le32_to_cpu(gc_info->v1_1.gc_num_sdp_interface);
+ adev->gfx.config.gc_num_tcps = le32_to_cpu(gc_info->v1_1.gc_num_tcps);
+ }
+ if (gc_info->v1.header.version_minor >= 2) {
+ adev->gfx.config.gc_num_tcp_per_wpg = le32_to_cpu(gc_info->v1_2.gc_num_tcp_per_wpg);
+ adev->gfx.config.gc_tcp_l1_size = le32_to_cpu(gc_info->v1_2.gc_tcp_l1_size);
+ adev->gfx.config.gc_num_sqc_per_wgp = le32_to_cpu(gc_info->v1_2.gc_num_sqc_per_wgp);
+ adev->gfx.config.gc_l1_instruction_cache_size_per_sqc = le32_to_cpu(gc_info->v1_2.gc_l1_instruction_cache_size_per_sqc);
+ adev->gfx.config.gc_l1_data_cache_size_per_sqc = le32_to_cpu(gc_info->v1_2.gc_l1_data_cache_size_per_sqc);
+ adev->gfx.config.gc_gl1c_per_sa = le32_to_cpu(gc_info->v1_2.gc_gl1c_per_sa);
+ adev->gfx.config.gc_gl1c_size_per_instance = le32_to_cpu(gc_info->v1_2.gc_gl1c_size_per_instance);
+ adev->gfx.config.gc_gl2c_per_gpu = le32_to_cpu(gc_info->v1_2.gc_gl2c_per_gpu);
+ }
break;
case 2:
adev->gfx.config.max_shader_engines = le32_to_cpu(gc_info->v2.gc_num_se);
@@ -1220,8 +1334,105 @@ int amdgpu_discovery_get_gfx_info(struct amdgpu_device *adev)
default:
dev_err(adev->dev,
"Unhandled GC info table %d.%d\n",
- gc_info->v1.header.version_major,
- gc_info->v1.header.version_minor);
+ le16_to_cpu(gc_info->v1.header.version_major),
+ le16_to_cpu(gc_info->v1.header.version_minor));
+ return -EINVAL;
+ }
+ return 0;
+}
+
+union mall_info {
+ struct mall_info_v1_0 v1;
+};
+
+int amdgpu_discovery_get_mall_info(struct amdgpu_device *adev)
+{
+ struct binary_header *bhdr;
+ union mall_info *mall_info;
+ u32 u, mall_size_per_umc, m_s_present, half_use;
+ u64 mall_size;
+ u16 offset;
+
+ if (!adev->mman.discovery_bin) {
+ DRM_ERROR("ip discovery uninitialized\n");
+ return -EINVAL;
+ }
+
+ bhdr = (struct binary_header *)adev->mman.discovery_bin;
+ offset = le16_to_cpu(bhdr->table_list[MALL_INFO].offset);
+
+ if (!offset)
+ return 0;
+
+ mall_info = (union mall_info *)(adev->mman.discovery_bin + offset);
+
+ switch (le16_to_cpu(mall_info->v1.header.version_major)) {
+ case 1:
+ mall_size = 0;
+ mall_size_per_umc = le32_to_cpu(mall_info->v1.mall_size_per_m);
+ m_s_present = le32_to_cpu(mall_info->v1.m_s_present);
+ half_use = le32_to_cpu(mall_info->v1.m_half_use);
+ for (u = 0; u < adev->gmc.num_umc; u++) {
+ if (m_s_present & (1 << u))
+ mall_size += mall_size_per_umc * 2;
+ else if (half_use & (1 << u))
+ mall_size += mall_size_per_umc / 2;
+ else
+ mall_size += mall_size_per_umc;
+ }
+ adev->gmc.mall_size = mall_size;
+ break;
+ default:
+ dev_err(adev->dev,
+ "Unhandled MALL info table %d.%d\n",
+ le16_to_cpu(mall_info->v1.header.version_major),
+ le16_to_cpu(mall_info->v1.header.version_minor));
+ return -EINVAL;
+ }
+ return 0;
+}
+
+union vcn_info {
+ struct vcn_info_v1_0 v1;
+};
+
+static int amdgpu_discovery_get_vcn_info(struct amdgpu_device *adev)
+{
+ struct binary_header *bhdr;
+ union vcn_info *vcn_info;
+ u16 offset;
+ int v;
+
+ if (!adev->mman.discovery_bin) {
+ DRM_ERROR("ip discovery uninitialized\n");
+ return -EINVAL;
+ }
+
+ if (adev->vcn.num_vcn_inst > VCN_INFO_TABLE_MAX_NUM_INSTANCES) {
+ dev_err(adev->dev, "invalid vcn instances\n");
+ return -EINVAL;
+ }
+
+ bhdr = (struct binary_header *)adev->mman.discovery_bin;
+ offset = le16_to_cpu(bhdr->table_list[VCN_INFO].offset);
+
+ if (!offset)
+ return 0;
+
+ vcn_info = (union vcn_info *)(adev->mman.discovery_bin + offset);
+
+ switch (le16_to_cpu(vcn_info->v1.header.version_major)) {
+ case 1:
+ for (v = 0; v < adev->vcn.num_vcn_inst; v++) {
+ adev->vcn.vcn_codec_disable_mask[v] =
+ le32_to_cpu(vcn_info->v1.instance_info[v].fuse_data.all_bits);
+ }
+ break;
+ default:
+ dev_err(adev->dev,
+ "Unhandled VCN info table %d.%d\n",
+ le16_to_cpu(vcn_info->v1.header.version_major),
+ le16_to_cpu(vcn_info->v1.header.version_minor));
return -EINVAL;
}
return 0;
@@ -1657,6 +1868,7 @@ int amdgpu_discovery_set_ip_blocks(struct amdgpu_device *adev)
case CHIP_VEGA10:
vega10_reg_base_init(adev);
adev->sdma.num_instances = 2;
+ adev->gmc.num_umc = 4;
adev->ip_versions[MMHUB_HWIP][0] = IP_VERSION(9, 0, 0);
adev->ip_versions[ATHUB_HWIP][0] = IP_VERSION(9, 0, 0);
adev->ip_versions[OSSSYS_HWIP][0] = IP_VERSION(4, 0, 0);
@@ -1678,6 +1890,7 @@ int amdgpu_discovery_set_ip_blocks(struct amdgpu_device *adev)
case CHIP_VEGA12:
vega10_reg_base_init(adev);
adev->sdma.num_instances = 2;
+ adev->gmc.num_umc = 4;
adev->ip_versions[MMHUB_HWIP][0] = IP_VERSION(9, 3, 0);
adev->ip_versions[ATHUB_HWIP][0] = IP_VERSION(9, 3, 0);
adev->ip_versions[OSSSYS_HWIP][0] = IP_VERSION(4, 0, 1);
@@ -1700,6 +1913,7 @@ int amdgpu_discovery_set_ip_blocks(struct amdgpu_device *adev)
vega10_reg_base_init(adev);
adev->sdma.num_instances = 1;
adev->vcn.num_vcn_inst = 1;
+ adev->gmc.num_umc = 2;
if (adev->apu_flags & AMD_APU_IS_RAVEN2) {
adev->ip_versions[MMHUB_HWIP][0] = IP_VERSION(9, 2, 0);
adev->ip_versions[ATHUB_HWIP][0] = IP_VERSION(9, 2, 0);
@@ -1737,6 +1951,7 @@ int amdgpu_discovery_set_ip_blocks(struct amdgpu_device *adev)
case CHIP_VEGA20:
vega20_reg_base_init(adev);
adev->sdma.num_instances = 2;
+ adev->gmc.num_umc = 8;
adev->ip_versions[MMHUB_HWIP][0] = IP_VERSION(9, 4, 0);
adev->ip_versions[ATHUB_HWIP][0] = IP_VERSION(9, 4, 0);
adev->ip_versions[OSSSYS_HWIP][0] = IP_VERSION(4, 2, 0);
@@ -1760,6 +1975,7 @@ int amdgpu_discovery_set_ip_blocks(struct amdgpu_device *adev)
arct_reg_base_init(adev);
adev->sdma.num_instances = 8;
adev->vcn.num_vcn_inst = 2;
+ adev->gmc.num_umc = 8;
adev->ip_versions[MMHUB_HWIP][0] = IP_VERSION(9, 4, 1);
adev->ip_versions[ATHUB_HWIP][0] = IP_VERSION(9, 4, 1);
adev->ip_versions[OSSSYS_HWIP][0] = IP_VERSION(4, 2, 1);
@@ -1787,6 +2003,7 @@ int amdgpu_discovery_set_ip_blocks(struct amdgpu_device *adev)
aldebaran_reg_base_init(adev);
adev->sdma.num_instances = 5;
adev->vcn.num_vcn_inst = 2;
+ adev->gmc.num_umc = 4;
adev->ip_versions[MMHUB_HWIP][0] = IP_VERSION(9, 4, 2);
adev->ip_versions[ATHUB_HWIP][0] = IP_VERSION(9, 4, 2);
adev->ip_versions[OSSSYS_HWIP][0] = IP_VERSION(4, 4, 0);
@@ -1814,6 +2031,9 @@ int amdgpu_discovery_set_ip_blocks(struct amdgpu_device *adev)
return -EINVAL;
amdgpu_discovery_harvest_ip(adev);
+ amdgpu_discovery_get_gfx_info(adev);
+ amdgpu_discovery_get_mall_info(adev);
+ amdgpu_discovery_get_vcn_info(adev);
break;
}
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.h
index 14537cec19db..8563dd4a7dc2 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.h
@@ -28,12 +28,8 @@
#define DISCOVERY_TMR_OFFSET (64 << 10)
void amdgpu_discovery_fini(struct amdgpu_device *adev);
-int amdgpu_discovery_reg_base_init(struct amdgpu_device *adev);
-void amdgpu_discovery_harvest_ip(struct amdgpu_device *adev);
int amdgpu_discovery_get_ip_version(struct amdgpu_device *adev, int hw_id, int number_instance,
int *major, int *minor, int *revision);
-
-int amdgpu_discovery_get_gfx_info(struct amdgpu_device *adev);
int amdgpu_discovery_set_ip_blocks(struct amdgpu_device *adev);
#endif /* __AMDGPU_DISCOVERY__ */
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_display.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_display.c
index fae5c1debfad..17c9bbe0cbc5 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_display.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_display.c
@@ -41,6 +41,11 @@
#include <drm/drm_fourcc.h>
#include <drm/drm_vblank.h>
+static int amdgpu_display_framebuffer_init(struct drm_device *dev,
+ struct amdgpu_framebuffer *rfb,
+ const struct drm_mode_fb_cmd2 *mode_cmd,
+ struct drm_gem_object *obj);
+
static void amdgpu_display_flip_callback(struct dma_fence *f,
struct dma_fence_cb *cb)
{
@@ -113,8 +118,9 @@ static void amdgpu_display_flip_work_func(struct work_struct *__work)
spin_unlock_irqrestore(&crtc->dev->event_lock, flags);
- DRM_DEBUG_DRIVER("crtc:%d[%p], pflip_stat:AMDGPU_FLIP_SUBMITTED, work: %p,\n",
- amdgpu_crtc->crtc_id, amdgpu_crtc, work);
+ drm_dbg_vbl(adev_to_drm(adev),
+ "crtc:%d[%p], pflip_stat:AMDGPU_FLIP_SUBMITTED, work: %p,\n",
+ amdgpu_crtc->crtc_id, amdgpu_crtc, work);
}
@@ -200,8 +206,7 @@ int amdgpu_display_crtc_page_flip_target(struct drm_crtc *crtc,
goto unpin;
}
- /* TODO: Unify this with other drivers */
- r = dma_resv_get_fences(new_abo->tbo.base.resv, true,
+ r = dma_resv_get_fences(new_abo->tbo.base.resv, DMA_RESV_USAGE_WRITE,
&work->shared_count,
&work->shared);
if (unlikely(r != 0)) {
@@ -1039,35 +1044,11 @@ static int amdgpu_display_get_fb_info(const struct amdgpu_framebuffer *amdgpu_fb
return r;
}
-int amdgpu_display_gem_fb_init(struct drm_device *dev,
- struct amdgpu_framebuffer *rfb,
- const struct drm_mode_fb_cmd2 *mode_cmd,
- struct drm_gem_object *obj)
-{
- int ret;
-
- rfb->base.obj[0] = obj;
- drm_helper_mode_fill_fb_struct(dev, &rfb->base, mode_cmd);
-
- ret = amdgpu_display_framebuffer_init(dev, rfb, mode_cmd, obj);
- if (ret)
- goto err;
-
- ret = drm_framebuffer_init(dev, &rfb->base, &amdgpu_fb_funcs);
- if (ret)
- goto err;
-
- return 0;
-err:
- drm_dbg_kms(dev, "Failed to init gem fb: %d\n", ret);
- rfb->base.obj[0] = NULL;
- return ret;
-}
-
-int amdgpu_display_gem_fb_verify_and_init(
- struct drm_device *dev, struct amdgpu_framebuffer *rfb,
- struct drm_file *file_priv, const struct drm_mode_fb_cmd2 *mode_cmd,
- struct drm_gem_object *obj)
+static int amdgpu_display_gem_fb_verify_and_init(struct drm_device *dev,
+ struct amdgpu_framebuffer *rfb,
+ struct drm_file *file_priv,
+ const struct drm_mode_fb_cmd2 *mode_cmd,
+ struct drm_gem_object *obj)
{
int ret;
@@ -1099,10 +1080,10 @@ err:
return ret;
}
-int amdgpu_display_framebuffer_init(struct drm_device *dev,
- struct amdgpu_framebuffer *rfb,
- const struct drm_mode_fb_cmd2 *mode_cmd,
- struct drm_gem_object *obj)
+static int amdgpu_display_framebuffer_init(struct drm_device *dev,
+ struct amdgpu_framebuffer *rfb,
+ const struct drm_mode_fb_cmd2 *mode_cmd,
+ struct drm_gem_object *obj)
{
struct amdgpu_device *adev = drm_to_adev(dev);
int ret, i;
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_dma_buf.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_dma_buf.c
index 579adfafe4d0..782cbca37538 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_dma_buf.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_dma_buf.c
@@ -102,21 +102,9 @@ static int amdgpu_dma_buf_pin(struct dma_buf_attachment *attach)
{
struct drm_gem_object *obj = attach->dmabuf->priv;
struct amdgpu_bo *bo = gem_to_amdgpu_bo(obj);
- int r;
/* pin buffer into GTT */
- r = amdgpu_bo_pin(bo, AMDGPU_GEM_DOMAIN_GTT);
- if (r)
- return r;
-
- if (bo->tbo.moving) {
- r = dma_fence_wait(bo->tbo.moving, true);
- if (r) {
- amdgpu_bo_unpin(bo);
- return r;
- }
- }
- return 0;
+ return amdgpu_bo_pin(bo, AMDGPU_GEM_DOMAIN_GTT);
}
/**
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
index b03663f42cc9..ebd37fb19cdb 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
@@ -136,7 +136,7 @@ int amdgpu_sched_jobs = 32;
int amdgpu_sched_hw_submission = 2;
uint amdgpu_pcie_gen_cap;
uint amdgpu_pcie_lane_cap;
-uint amdgpu_cg_mask = 0xffffffff;
+u64 amdgpu_cg_mask = 0xffffffffffffffff;
uint amdgpu_pg_mask = 0xffffffff;
uint amdgpu_sdma_phase_quantum = 32;
char *amdgpu_disable_cu = NULL;
@@ -454,12 +454,12 @@ MODULE_PARM_DESC(pcie_lane_cap, "PCIE Lane Caps (0: autodetect (default))");
module_param_named(pcie_lane_cap, amdgpu_pcie_lane_cap, uint, 0444);
/**
- * DOC: cg_mask (uint)
+ * DOC: cg_mask (ullong)
* Override Clockgating features enabled on GPU (0 = disable clock gating). See the AMD_CG_SUPPORT flags in
- * drivers/gpu/drm/amd/include/amd_shared.h. The default is 0xffffffff (all enabled).
+ * drivers/gpu/drm/amd/include/amd_shared.h. The default is 0xffffffffffffffff (all enabled).
*/
MODULE_PARM_DESC(cg_mask, "Clockgating flags mask (0 = disable clock gating)");
-module_param_named(cg_mask, amdgpu_cg_mask, uint, 0444);
+module_param_named(cg_mask, amdgpu_cg_mask, ullong, 0444);
/**
* DOC: pg_mask (uint)
@@ -2323,18 +2323,23 @@ static int amdgpu_pmops_suspend(struct device *dev)
{
struct drm_device *drm_dev = dev_get_drvdata(dev);
struct amdgpu_device *adev = drm_to_adev(drm_dev);
- int r;
if (amdgpu_acpi_is_s0ix_active(adev))
adev->in_s0ix = true;
else
adev->in_s3 = true;
- r = amdgpu_device_suspend(drm_dev, true);
- if (r)
- return r;
+ return amdgpu_device_suspend(drm_dev, true);
+}
+
+static int amdgpu_pmops_suspend_noirq(struct device *dev)
+{
+ struct drm_device *drm_dev = dev_get_drvdata(dev);
+ struct amdgpu_device *adev = drm_to_adev(drm_dev);
+
if (!adev->in_s0ix)
- r = amdgpu_asic_reset(adev);
- return r;
+ return amdgpu_asic_reset(adev);
+
+ return 0;
}
static int amdgpu_pmops_resume(struct device *dev)
@@ -2390,6 +2395,71 @@ static int amdgpu_pmops_restore(struct device *dev)
return amdgpu_device_resume(drm_dev, true);
}
+static int amdgpu_runtime_idle_check_display(struct device *dev)
+{
+ struct pci_dev *pdev = to_pci_dev(dev);
+ struct drm_device *drm_dev = pci_get_drvdata(pdev);
+ struct amdgpu_device *adev = drm_to_adev(drm_dev);
+
+ if (adev->mode_info.num_crtc) {
+ struct drm_connector *list_connector;
+ struct drm_connector_list_iter iter;
+ int ret = 0;
+
+ /* XXX: Return busy if any displays are connected to avoid
+ * possible display wakeups after runtime resume due to
+ * hotplug events in case any displays were connected while
+ * the GPU was in suspend. Remove this once that is fixed.
+ */
+ mutex_lock(&drm_dev->mode_config.mutex);
+ drm_connector_list_iter_begin(drm_dev, &iter);
+ drm_for_each_connector_iter(list_connector, &iter) {
+ if (list_connector->status == connector_status_connected) {
+ ret = -EBUSY;
+ break;
+ }
+ }
+ drm_connector_list_iter_end(&iter);
+ mutex_unlock(&drm_dev->mode_config.mutex);
+
+ if (ret)
+ return ret;
+
+ if (amdgpu_device_has_dc_support(adev)) {
+ struct drm_crtc *crtc;
+
+ drm_for_each_crtc(crtc, drm_dev) {
+ drm_modeset_lock(&crtc->mutex, NULL);
+ if (crtc->state->active)
+ ret = -EBUSY;
+ drm_modeset_unlock(&crtc->mutex);
+ if (ret < 0)
+ break;
+ }
+ } else {
+ mutex_lock(&drm_dev->mode_config.mutex);
+ drm_modeset_lock(&drm_dev->mode_config.connection_mutex, NULL);
+
+ drm_connector_list_iter_begin(drm_dev, &iter);
+ drm_for_each_connector_iter(list_connector, &iter) {
+ if (list_connector->dpms == DRM_MODE_DPMS_ON) {
+ ret = -EBUSY;
+ break;
+ }
+ }
+
+ drm_connector_list_iter_end(&iter);
+
+ drm_modeset_unlock(&drm_dev->mode_config.connection_mutex);
+ mutex_unlock(&drm_dev->mode_config.mutex);
+ }
+ if (ret)
+ return ret;
+ }
+
+ return 0;
+}
+
static int amdgpu_pmops_runtime_suspend(struct device *dev)
{
struct pci_dev *pdev = to_pci_dev(dev);
@@ -2402,6 +2472,10 @@ static int amdgpu_pmops_runtime_suspend(struct device *dev)
return -EBUSY;
}
+ ret = amdgpu_runtime_idle_check_display(dev);
+ if (ret)
+ return ret;
+
/* wait for all rings to drain before suspending */
for (i = 0; i < AMDGPU_MAX_RINGS; i++) {
struct amdgpu_ring *ring = adev->rings[i];
@@ -2511,41 +2585,7 @@ static int amdgpu_pmops_runtime_idle(struct device *dev)
return -EBUSY;
}
- if (amdgpu_device_has_dc_support(adev)) {
- struct drm_crtc *crtc;
-
- drm_for_each_crtc(crtc, drm_dev) {
- drm_modeset_lock(&crtc->mutex, NULL);
- if (crtc->state->active)
- ret = -EBUSY;
- drm_modeset_unlock(&crtc->mutex);
- if (ret < 0)
- break;
- }
-
- } else {
- struct drm_connector *list_connector;
- struct drm_connector_list_iter iter;
-
- mutex_lock(&drm_dev->mode_config.mutex);
- drm_modeset_lock(&drm_dev->mode_config.connection_mutex, NULL);
-
- drm_connector_list_iter_begin(drm_dev, &iter);
- drm_for_each_connector_iter(list_connector, &iter) {
- if (list_connector->dpms == DRM_MODE_DPMS_ON) {
- ret = -EBUSY;
- break;
- }
- }
-
- drm_connector_list_iter_end(&iter);
-
- drm_modeset_unlock(&drm_dev->mode_config.connection_mutex);
- mutex_unlock(&drm_dev->mode_config.mutex);
- }
-
- if (ret == -EBUSY)
- DRM_DEBUG_DRIVER("failing to power off - crtc active\n");
+ ret = amdgpu_runtime_idle_check_display(dev);
pm_runtime_mark_last_busy(dev);
pm_runtime_autosuspend(dev);
@@ -2575,6 +2615,7 @@ static const struct dev_pm_ops amdgpu_pm_ops = {
.prepare = amdgpu_pmops_prepare,
.complete = amdgpu_pmops_complete,
.suspend = amdgpu_pmops_suspend,
+ .suspend_noirq = amdgpu_pmops_suspend_noirq,
.resume = amdgpu_pmops_resume,
.freeze = amdgpu_pmops_freeze,
.thaw = amdgpu_pmops_thaw,
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c
index 57b74d35052f..652571267077 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c
@@ -526,7 +526,8 @@ int amdgpu_gem_wait_idle_ioctl(struct drm_device *dev, void *data,
return -ENOENT;
}
robj = gem_to_amdgpu_bo(gobj);
- ret = dma_resv_wait_timeout(robj->tbo.base.resv, true, true, timeout);
+ ret = dma_resv_wait_timeout(robj->tbo.base.resv, DMA_RESV_USAGE_READ,
+ true, timeout);
/* ret == 0 means not signaled,
* ret > 0 means signaled
@@ -612,7 +613,7 @@ static void amdgpu_gem_va_update_vm(struct amdgpu_device *adev,
if (operation == AMDGPU_VA_OP_MAP ||
operation == AMDGPU_VA_OP_REPLACE) {
- r = amdgpu_vm_bo_update(adev, bo_va, false, NULL);
+ r = amdgpu_vm_bo_update(adev, bo_va, false);
if (r)
goto error;
}
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h
index dcb3c7871c73..ad8e7d486a7d 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h
@@ -183,6 +183,17 @@ struct amdgpu_gfx_config {
uint32_t num_packer_per_sc;
uint32_t pa_sc_tile_steering_override;
uint64_t tcc_disabled_mask;
+ uint32_t gc_num_tcp_per_sa;
+ uint32_t gc_num_sdp_interface;
+ uint32_t gc_num_tcps;
+ uint32_t gc_num_tcp_per_wpg;
+ uint32_t gc_tcp_l1_size;
+ uint32_t gc_num_sqc_per_wgp;
+ uint32_t gc_l1_instruction_cache_size_per_sqc;
+ uint32_t gc_l1_data_cache_size_per_sqc;
+ uint32_t gc_gl1c_per_sa;
+ uint32_t gc_gl1c_size_per_instance;
+ uint32_t gc_gl2c_per_gpu;
};
struct amdgpu_cu_info {
@@ -202,6 +213,7 @@ struct amdgpu_cu_info {
struct amdgpu_gfx_ras {
struct amdgpu_ras_block_object ras_block;
void (*enable_watchdog_timer)(struct amdgpu_device *adev);
+ bool (*query_utcl2_poison_status)(struct amdgpu_device *adev);
};
struct amdgpu_gfx_funcs {
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c
index a66a0881a934..88b852b3a2cb 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c
@@ -25,6 +25,9 @@
*/
#include <linux/io-64-nonatomic-lo-hi.h>
+#ifdef CONFIG_X86
+#include <asm/hypervisor.h>
+#endif
#include "amdgpu.h"
#include "amdgpu_gmc.h"
@@ -647,12 +650,14 @@ void amdgpu_gmc_get_vbios_allocations(struct amdgpu_device *adev)
case CHIP_VEGA10:
adev->mman.keep_stolen_vga_memory = true;
/*
- * VEGA10 SRIOV VF needs some firmware reserved area.
+ * VEGA10 SRIOV VF with MS_HYPERV host needs some firmware reserved area.
*/
- if (amdgpu_sriov_vf(adev)) {
- adev->mman.stolen_reserved_offset = 0x100000;
- adev->mman.stolen_reserved_size = 0x600000;
+#ifdef CONFIG_X86
+ if (amdgpu_sriov_vf(adev) && hypervisor_is_type(X86_HYPER_MS_HYPERV)) {
+ adev->mman.stolen_reserved_offset = 0x500000;
+ adev->mman.stolen_reserved_size = 0x200000;
}
+#endif
break;
case CHIP_RAVEN:
case CHIP_RENOIR:
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.h
index 032b0313f277..e7dc069c4512 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.h
@@ -257,6 +257,11 @@ struct amdgpu_gmc {
struct amdgpu_bo *pdb0_bo;
/* CPU kmapped address of pdb0*/
void *ptr_pdb0;
+
+ /* MALL size */
+ u64 mall_size;
+ /* number of UMC instances */
+ int num_umc;
};
#define amdgpu_gmc_flush_gpu_tlb(adev, vmid, vmhub, type) ((adev)->gmc.gmc_funcs->flush_gpu_tlb((adev), (vmid), (vmhub), (type)))
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gtt_mgr.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_gtt_mgr.c
index dd78402e3cb0..8c6b2284cf56 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gtt_mgr.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gtt_mgr.c
@@ -26,23 +26,12 @@
#include "amdgpu.h"
-struct amdgpu_gtt_node {
- struct ttm_buffer_object *tbo;
- struct ttm_range_mgr_node base;
-};
-
static inline struct amdgpu_gtt_mgr *
to_gtt_mgr(struct ttm_resource_manager *man)
{
return container_of(man, struct amdgpu_gtt_mgr, manager);
}
-static inline struct amdgpu_gtt_node *
-to_amdgpu_gtt_node(struct ttm_resource *res)
-{
- return container_of(res, struct amdgpu_gtt_node, base.base);
-}
-
/**
* DOC: mem_info_gtt_total
*
@@ -106,9 +95,9 @@ const struct attribute_group amdgpu_gtt_mgr_attr_group = {
*/
bool amdgpu_gtt_mgr_has_gart_addr(struct ttm_resource *res)
{
- struct amdgpu_gtt_node *node = to_amdgpu_gtt_node(res);
+ struct ttm_range_mgr_node *node = to_ttm_range_mgr_node(res);
- return drm_mm_node_allocated(&node->base.mm_nodes[0]);
+ return drm_mm_node_allocated(&node->mm_nodes[0]);
}
/**
@@ -128,15 +117,14 @@ static int amdgpu_gtt_mgr_new(struct ttm_resource_manager *man,
{
struct amdgpu_gtt_mgr *mgr = to_gtt_mgr(man);
uint32_t num_pages = PFN_UP(tbo->base.size);
- struct amdgpu_gtt_node *node;
+ struct ttm_range_mgr_node *node;
int r;
- node = kzalloc(struct_size(node, base.mm_nodes, 1), GFP_KERNEL);
+ node = kzalloc(struct_size(node, mm_nodes, 1), GFP_KERNEL);
if (!node)
return -ENOMEM;
- node->tbo = tbo;
- ttm_resource_init(tbo, place, &node->base.base);
+ ttm_resource_init(tbo, place, &node->base);
if (!(place->flags & TTM_PL_FLAG_TEMPORARY) &&
ttm_resource_manager_usage(man) > man->size) {
r = -ENOSPC;
@@ -145,8 +133,7 @@ static int amdgpu_gtt_mgr_new(struct ttm_resource_manager *man,
if (place->lpfn) {
spin_lock(&mgr->lock);
- r = drm_mm_insert_node_in_range(&mgr->mm,
- &node->base.mm_nodes[0],
+ r = drm_mm_insert_node_in_range(&mgr->mm, &node->mm_nodes[0],
num_pages, tbo->page_alignment,
0, place->fpfn, place->lpfn,
DRM_MM_INSERT_BEST);
@@ -154,18 +141,18 @@ static int amdgpu_gtt_mgr_new(struct ttm_resource_manager *man,
if (unlikely(r))
goto err_free;
- node->base.base.start = node->base.mm_nodes[0].start;
+ node->base.start = node->mm_nodes[0].start;
} else {
- node->base.mm_nodes[0].start = 0;
- node->base.mm_nodes[0].size = node->base.base.num_pages;
- node->base.base.start = AMDGPU_BO_INVALID_OFFSET;
+ node->mm_nodes[0].start = 0;
+ node->mm_nodes[0].size = node->base.num_pages;
+ node->base.start = AMDGPU_BO_INVALID_OFFSET;
}
- *res = &node->base.base;
+ *res = &node->base;
return 0;
err_free:
- ttm_resource_fini(man, &node->base.base);
+ ttm_resource_fini(man, &node->base);
kfree(node);
return r;
}
@@ -181,12 +168,12 @@ err_free:
static void amdgpu_gtt_mgr_del(struct ttm_resource_manager *man,
struct ttm_resource *res)
{
- struct amdgpu_gtt_node *node = to_amdgpu_gtt_node(res);
+ struct ttm_range_mgr_node *node = to_ttm_range_mgr_node(res);
struct amdgpu_gtt_mgr *mgr = to_gtt_mgr(man);
spin_lock(&mgr->lock);
- if (drm_mm_node_allocated(&node->base.mm_nodes[0]))
- drm_mm_remove_node(&node->base.mm_nodes[0]);
+ if (drm_mm_node_allocated(&node->mm_nodes[0]))
+ drm_mm_remove_node(&node->mm_nodes[0]);
spin_unlock(&mgr->lock);
ttm_resource_fini(man, res);
@@ -202,15 +189,15 @@ static void amdgpu_gtt_mgr_del(struct ttm_resource_manager *man,
*/
void amdgpu_gtt_mgr_recover(struct amdgpu_gtt_mgr *mgr)
{
- struct amdgpu_gtt_node *node;
+ struct ttm_range_mgr_node *node;
struct drm_mm_node *mm_node;
struct amdgpu_device *adev;
adev = container_of(mgr, typeof(*adev), mman.gtt_mgr);
spin_lock(&mgr->lock);
drm_mm_for_each_node(mm_node, &mgr->mm) {
- node = container_of(mm_node, typeof(*node), base.mm_nodes[0]);
- amdgpu_ttm_recover_gart(node->tbo);
+ node = container_of(mm_node, typeof(*node), mm_nodes[0]);
+ amdgpu_ttm_recover_gart(node->base.bo);
}
spin_unlock(&mgr->lock);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_hdp.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_hdp.h
index 9181c7bef7c6..ac5c61d3de2b 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_hdp.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_hdp.h
@@ -33,7 +33,7 @@ struct amdgpu_hdp_funcs {
void (*invalidate_hdp)(struct amdgpu_device *adev,
struct amdgpu_ring *ring);
void (*update_clock_gating)(struct amdgpu_device *adev, bool enable);
- void (*get_clock_gating_state)(struct amdgpu_device *adev, u32 *flags);
+ void (*get_clock_gating_state)(struct amdgpu_device *adev, u64 *flags);
void (*init_registers)(struct amdgpu_device *adev);
};
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ids.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ids.c
index 92a70fb57fa3..03d115d2b5ed 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ids.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ids.c
@@ -107,36 +107,19 @@ static void amdgpu_pasid_free_cb(struct dma_fence *fence,
void amdgpu_pasid_free_delayed(struct dma_resv *resv,
u32 pasid)
{
- struct dma_fence *fence, **fences;
struct amdgpu_pasid_cb *cb;
- unsigned count;
+ struct dma_fence *fence;
int r;
- r = dma_resv_get_fences(resv, true, &count, &fences);
+ r = dma_resv_get_singleton(resv, DMA_RESV_USAGE_BOOKKEEP, &fence);
if (r)
goto fallback;
- if (count == 0) {
+ if (!fence) {
amdgpu_pasid_free(pasid);
return;
}
- if (count == 1) {
- fence = fences[0];
- kfree(fences);
- } else {
- uint64_t context = dma_fence_context_alloc(1);
- struct dma_fence_array *array;
-
- array = dma_fence_array_create(count, fences, context,
- 1, false);
- if (!array) {
- kfree(fences);
- goto fallback;
- }
- fence = &array->base;
- }
-
cb = kmalloc(sizeof(*cb), GFP_KERNEL);
if (!cb) {
/* Last resort when we are OOM */
@@ -156,7 +139,8 @@ fallback:
/* Not enough memory for the delayed delete, as last resort
* block for all the fences to complete.
*/
- dma_resv_wait_timeout(resv, true, false, MAX_SCHEDULE_TIMEOUT);
+ dma_resv_wait_timeout(resv, DMA_RESV_USAGE_BOOKKEEP,
+ false, MAX_SCHEDULE_TIMEOUT);
amdgpu_pasid_free(pasid);
}
@@ -276,19 +260,15 @@ static int amdgpu_vmid_grab_reserved(struct amdgpu_vm *vm,
struct amdgpu_device *adev = ring->adev;
unsigned vmhub = ring->funcs->vmhub;
uint64_t fence_context = adev->fence_context + ring->idx;
- struct dma_fence *updates = sync->last_vm_update;
bool needs_flush = vm->use_cpu_for_update;
- int r = 0;
+ uint64_t updates = amdgpu_vm_tlb_seq(vm);
+ int r;
*id = vm->reserved_vmid[vmhub];
- if (updates && (*id)->flushed_updates &&
- updates->context == (*id)->flushed_updates->context &&
- !dma_fence_is_later(updates, (*id)->flushed_updates))
- updates = NULL;
-
if ((*id)->owner != vm->immediate.fence_context ||
- job->vm_pd_addr != (*id)->pd_gpu_addr ||
- updates || !(*id)->last_flush ||
+ (*id)->pd_gpu_addr != job->vm_pd_addr ||
+ (*id)->flushed_updates < updates ||
+ !(*id)->last_flush ||
((*id)->last_flush->context != fence_context &&
!dma_fence_is_signaled((*id)->last_flush))) {
struct dma_fence *tmp;
@@ -302,8 +282,7 @@ static int amdgpu_vmid_grab_reserved(struct amdgpu_vm *vm,
tmp = amdgpu_sync_peek_fence(&(*id)->active, ring);
if (tmp) {
*id = NULL;
- r = amdgpu_sync_fence(sync, tmp);
- return r;
+ return amdgpu_sync_fence(sync, tmp);
}
needs_flush = true;
}
@@ -315,10 +294,7 @@ static int amdgpu_vmid_grab_reserved(struct amdgpu_vm *vm,
if (r)
return r;
- if (updates) {
- dma_fence_put((*id)->flushed_updates);
- (*id)->flushed_updates = dma_fence_get(updates);
- }
+ (*id)->flushed_updates = updates;
job->vm_needs_flush = needs_flush;
return 0;
}
@@ -346,7 +322,7 @@ static int amdgpu_vmid_grab_used(struct amdgpu_vm *vm,
unsigned vmhub = ring->funcs->vmhub;
struct amdgpu_vmid_mgr *id_mgr = &adev->vm_manager.id_mgr[vmhub];
uint64_t fence_context = adev->fence_context + ring->idx;
- struct dma_fence *updates = sync->last_vm_update;
+ uint64_t updates = amdgpu_vm_tlb_seq(vm);
int r;
job->vm_needs_flush = vm->use_cpu_for_update;
@@ -354,7 +330,6 @@ static int amdgpu_vmid_grab_used(struct amdgpu_vm *vm,
/* Check if we can use a VMID already assigned to this VM */
list_for_each_entry_reverse((*id), &id_mgr->ids_lru, list) {
bool needs_flush = vm->use_cpu_for_update;
- struct dma_fence *flushed;
/* Check all the prerequisites to using this VMID */
if ((*id)->owner != vm->immediate.fence_context)
@@ -368,8 +343,7 @@ static int amdgpu_vmid_grab_used(struct amdgpu_vm *vm,
!dma_fence_is_signaled((*id)->last_flush)))
needs_flush = true;
- flushed = (*id)->flushed_updates;
- if (updates && (!flushed || dma_fence_is_later(updates, flushed)))
+ if ((*id)->flushed_updates < updates)
needs_flush = true;
if (needs_flush && !adev->vm_manager.concurrent_flush)
@@ -382,11 +356,7 @@ static int amdgpu_vmid_grab_used(struct amdgpu_vm *vm,
if (r)
return r;
- if (updates && (!flushed || dma_fence_is_later(updates, flushed))) {
- dma_fence_put((*id)->flushed_updates);
- (*id)->flushed_updates = dma_fence_get(updates);
- }
-
+ (*id)->flushed_updates = updates;
job->vm_needs_flush |= needs_flush;
return 0;
}
@@ -432,8 +402,6 @@ int amdgpu_vmid_grab(struct amdgpu_vm *vm, struct amdgpu_ring *ring,
goto error;
if (!id) {
- struct dma_fence *updates = sync->last_vm_update;
-
/* Still no ID to use? Then use the idle one found earlier */
id = idle;
@@ -442,8 +410,7 @@ int amdgpu_vmid_grab(struct amdgpu_vm *vm, struct amdgpu_ring *ring,
if (r)
goto error;
- dma_fence_put(id->flushed_updates);
- id->flushed_updates = dma_fence_get(updates);
+ id->flushed_updates = amdgpu_vm_tlb_seq(vm);
job->vm_needs_flush = true;
}
@@ -610,7 +577,6 @@ void amdgpu_vmid_mgr_fini(struct amdgpu_device *adev)
struct amdgpu_vmid *id = &id_mgr->ids[j];
amdgpu_sync_free(&id->active);
- dma_fence_put(id->flushed_updates);
dma_fence_put(id->last_flush);
dma_fence_put(id->pasid_mapping);
}
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ids.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_ids.h
index 0c3b4fa1f936..06c8a0034fa5 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ids.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ids.h
@@ -47,7 +47,7 @@ struct amdgpu_vmid {
uint64_t pd_gpu_addr;
/* last flushed PD/PT update */
- struct dma_fence *flushed_updates;
+ uint64_t flushed_updates;
uint32_t current_gpu_reset_count;
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_irq.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_irq.c
index ea3e8c66211f..b4cf8717f554 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_irq.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_irq.c
@@ -193,20 +193,7 @@ static irqreturn_t amdgpu_irq_handler(int irq, void *arg)
if (ret == IRQ_HANDLED)
pm_runtime_mark_last_busy(dev->dev);
- /* For the hardware that cannot enable bif ring for both ras_controller_irq
- * and ras_err_evnet_athub_irq ih cookies, the driver has to poll status
- * register to check whether the interrupt is triggered or not, and properly
- * ack the interrupt if it is there
- */
- if (amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__PCIE_BIF)) {
- if (adev->nbio.ras &&
- adev->nbio.ras->handle_ras_controller_intr_no_bifring)
- adev->nbio.ras->handle_ras_controller_intr_no_bifring(adev);
-
- if (adev->nbio.ras &&
- adev->nbio.ras->handle_ras_err_event_athub_intr_no_bifring)
- adev->nbio.ras->handle_ras_err_event_athub_intr_no_bifring(adev);
- }
+ amdgpu_ras_interrupt_fatal_error_handler(adev);
return ret;
}
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_jpeg.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_jpeg.h
index 55fbff2be761..b6c7fb00e05a 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_jpeg.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_jpeg.h
@@ -24,6 +24,8 @@
#ifndef __AMDGPU_JPEG_H__
#define __AMDGPU_JPEG_H__
+#include "amdgpu_ras.h"
+
#define AMDGPU_MAX_JPEG_INSTANCES 2
#define AMDGPU_JPEG_HARVEST_JPEG0 (1 << 0)
@@ -39,6 +41,10 @@ struct amdgpu_jpeg_inst {
struct amdgpu_jpeg_reg external;
};
+struct amdgpu_jpeg_ras {
+ struct amdgpu_ras_block_object ras_block;
+};
+
struct amdgpu_jpeg {
uint8_t num_jpeg_inst;
struct amdgpu_jpeg_inst inst[AMDGPU_MAX_JPEG_INSTANCES];
@@ -48,6 +54,8 @@ struct amdgpu_jpeg {
enum amd_powergating_state cur_state;
struct mutex jpeg_pg_lock;
atomic_t total_submission_cnt;
+ struct ras_common_if *ras_if;
+ struct amdgpu_jpeg_ras *ras;
};
int amdgpu_jpeg_sw_init(struct amdgpu_device *adev);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c
index 6b626c293e72..51bb977154eb 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c
@@ -43,6 +43,17 @@
#include "amdgpu_display.h"
#include "amdgpu_ras.h"
+static void amdgpu_runtime_pm_quirk(struct amdgpu_device *adev)
+{
+ /*
+ * Add below quirk on several sienna_cichlid cards to disable
+ * runtime pm to fix EMI failures.
+ */
+ if (((adev->pdev->device == 0x73A1) && (adev->pdev->revision == 0x00)) ||
+ ((adev->pdev->device == 0x73BF) && (adev->pdev->revision == 0xCF)))
+ adev->runpm = false;
+}
+
void amdgpu_unregister_gpu_instance(struct amdgpu_device *adev)
{
struct amdgpu_gpu_instance *gpu_instance;
@@ -180,6 +191,9 @@ int amdgpu_driver_load_kms(struct amdgpu_device *adev, unsigned long flags)
*/
if (adev->is_fw_fb)
adev->runpm = false;
+
+ amdgpu_runtime_pm_quirk(adev);
+
if (adev->runpm)
dev_info(adev->dev, "Using BACO for runtime pm\n");
}
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_mmhub.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_mmhub.h
index 9f1540f0ebf9..f939395c5914 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_mmhub.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_mmhub.h
@@ -34,7 +34,7 @@ struct amdgpu_mmhub_funcs {
void (*gart_disable)(struct amdgpu_device *adev);
int (*set_clockgating)(struct amdgpu_device *adev,
enum amd_clockgating_state state);
- void (*get_clockgating)(struct amdgpu_device *adev, u32 *flags);
+ void (*get_clockgating)(struct amdgpu_device *adev, u64 *flags);
void (*setup_vm_pt_regs)(struct amdgpu_device *adev, uint32_t vmid,
uint64_t page_table_base);
void (*update_power_gating)(struct amdgpu_device *adev,
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_mn.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_mn.c
index 4b153daf283d..b86c0b8252a5 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_mn.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_mn.c
@@ -75,8 +75,8 @@ static bool amdgpu_mn_invalidate_gfx(struct mmu_interval_notifier *mni,
mmu_interval_set_seq(mni, cur_seq);
- r = dma_resv_wait_timeout(bo->tbo.base.resv, true, false,
- MAX_SCHEDULE_TIMEOUT);
+ r = dma_resv_wait_timeout(bo->tbo.base.resv, DMA_RESV_USAGE_BOOKKEEP,
+ false, MAX_SCHEDULE_TIMEOUT);
mutex_unlock(&adev->notifier_lock);
if (r <= 0)
DRM_ERROR("(%ld) failed to wait for user bo\n", r);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_mode.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_mode.h
index a546cb3cfa18..f80b4838cea1 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_mode.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_mode.h
@@ -30,10 +30,10 @@
#ifndef AMDGPU_MODE_H
#define AMDGPU_MODE_H
+#include <drm/display/drm_dp_helper.h>
#include <drm/drm_crtc.h>
#include <drm/drm_edid.h>
#include <drm/drm_encoder.h>
-#include <drm/dp/drm_dp_helper.h>
#include <drm/drm_fixed.h>
#include <drm/drm_crtc_helper.h>
#include <drm/drm_fb_helper.h>
@@ -44,7 +44,7 @@
#include <linux/hrtimer.h>
#include "amdgpu_irq.h"
-#include <drm/dp/drm_dp_mst_helper.h>
+#include <drm/display/drm_dp_mst_helper.h>
#include "modules/inc/mod_freesync.h"
#include "amdgpu_dm_irq_params.h"
@@ -592,19 +592,6 @@ int amdgpu_display_get_crtc_scanoutpos(struct drm_device *dev,
int *hpos, ktime_t *stime, ktime_t *etime,
const struct drm_display_mode *mode);
-int amdgpu_display_gem_fb_init(struct drm_device *dev,
- struct amdgpu_framebuffer *rfb,
- const struct drm_mode_fb_cmd2 *mode_cmd,
- struct drm_gem_object *obj);
-int amdgpu_display_gem_fb_verify_and_init(
- struct drm_device *dev, struct amdgpu_framebuffer *rfb,
- struct drm_file *file_priv, const struct drm_mode_fb_cmd2 *mode_cmd,
- struct drm_gem_object *obj);
-int amdgpu_display_framebuffer_init(struct drm_device *dev,
- struct amdgpu_framebuffer *rfb,
- const struct drm_mode_fb_cmd2 *mode_cmd,
- struct drm_gem_object *obj);
-
int amdgpufb_remove(struct drm_device *dev, struct drm_framebuffer *fb);
void amdgpu_enc_destroy(struct drm_encoder *encoder);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_nbio.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_nbio.h
index 3d13e601fc35..ccd9fe96fab7 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_nbio.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_nbio.h
@@ -83,7 +83,7 @@ struct amdgpu_nbio_funcs {
void (*update_medium_grain_light_sleep)(struct amdgpu_device *adev,
bool enable);
void (*get_clockgating_state)(struct amdgpu_device *adev,
- u32 *flags);
+ u64 *flags);
void (*ih_control)(struct amdgpu_device *adev);
void (*init_registers)(struct amdgpu_device *adev);
void (*remap_hdp_registers)(struct amdgpu_device *adev);
@@ -93,6 +93,7 @@ struct amdgpu_nbio_funcs {
void (*apply_lc_spc_mode_wa)(struct amdgpu_device *adev);
void (*apply_l1_link_width_reconfig_wa)(struct amdgpu_device *adev);
void (*clear_doorbell_interrupt)(struct amdgpu_device *adev);
+ u32 (*get_rom_offset)(struct amdgpu_device *adev);
};
struct amdgpu_nbio {
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
index 940752488330..5444515c1476 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
@@ -472,7 +472,7 @@ static bool amdgpu_bo_validate_size(struct amdgpu_device *adev,
fail:
DRM_DEBUG("BO size %lu > total memory in domain: %llu\n", size,
- man->size << PAGE_SHIFT);
+ man->size);
return false;
}
@@ -612,9 +612,8 @@ int amdgpu_bo_create(struct amdgpu_device *adev,
if (unlikely(r))
goto fail_unreserve;
- amdgpu_bo_fence(bo, fence, false);
- dma_fence_put(bo->tbo.moving);
- bo->tbo.moving = dma_fence_get(fence);
+ dma_resv_add_fence(bo->tbo.base.resv, fence,
+ DMA_RESV_USAGE_KERNEL);
dma_fence_put(fence);
}
if (!bp->resv)
@@ -761,6 +760,11 @@ int amdgpu_bo_kmap(struct amdgpu_bo *bo, void **ptr)
if (bo->flags & AMDGPU_GEM_CREATE_NO_CPU_ACCESS)
return -EPERM;
+ r = dma_resv_wait_timeout(bo->tbo.base.resv, DMA_RESV_USAGE_KERNEL,
+ false, MAX_SCHEDULE_TIMEOUT);
+ if (r < 0)
+ return r;
+
kptr = amdgpu_bo_kptr(bo);
if (kptr) {
if (ptr)
@@ -768,11 +772,6 @@ int amdgpu_bo_kmap(struct amdgpu_bo *bo, void **ptr)
return 0;
}
- r = dma_resv_wait_timeout(bo->tbo.base.resv, false, false,
- MAX_SCHEDULE_TIMEOUT);
- if (r < 0)
- return r;
-
r = ttm_bo_kmap(&bo->tbo, 0, bo->tbo.resource->num_pages, &bo->kmap);
if (r)
return r;
@@ -1390,11 +1389,17 @@ void amdgpu_bo_fence(struct amdgpu_bo *bo, struct dma_fence *fence,
bool shared)
{
struct dma_resv *resv = bo->tbo.base.resv;
+ int r;
- if (shared)
- dma_resv_add_shared_fence(resv, fence);
- else
- dma_resv_add_excl_fence(resv, fence);
+ r = dma_resv_reserve_fences(resv, 1);
+ if (r) {
+ /* As last resort on OOM we block for the fence */
+ dma_fence_wait(fence, false);
+ return;
+ }
+
+ dma_resv_add_fence(resv, fence, shared ? DMA_RESV_USAGE_READ :
+ DMA_RESV_USAGE_WRITE);
}
/**
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c
index a6acec1a6155..0bd22ebcc3d1 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c
@@ -46,8 +46,6 @@ static int psp_sysfs_init(struct amdgpu_device *adev);
static void psp_sysfs_fini(struct amdgpu_device *adev);
static int psp_load_smu_fw(struct psp_context *psp);
-static int psp_ta_unload(struct psp_context *psp, struct ta_context *context);
-static int psp_ta_load(struct psp_context *psp, struct ta_context *context);
static int psp_rap_terminate(struct psp_context *psp);
static int psp_securedisplay_terminate(struct psp_context *psp);
@@ -278,6 +276,7 @@ static int psp_init_sriov_microcode(struct psp_context *psp)
break;
case IP_VERSION(13, 0, 2):
ret = psp_init_cap_microcode(psp, "aldebaran");
+ ret &= psp_init_ta_microcode(psp, "aldebaran");
break;
default:
BUG();
@@ -862,7 +861,7 @@ static void psp_prep_ta_unload_cmd_buf(struct psp_gfx_cmd_resp *cmd,
cmd->cmd.cmd_unload_ta.session_id = session_id;
}
-static int psp_ta_unload(struct psp_context *psp, struct ta_context *context)
+int psp_ta_unload(struct psp_context *psp, struct ta_context *context)
{
int ret;
struct psp_gfx_cmd_resp *cmd = acquire_psp_cmd_buf(psp);
@@ -944,7 +943,7 @@ static void psp_prep_ta_load_cmd_buf(struct psp_gfx_cmd_resp *cmd,
cmd->cmd.cmd_load_ta.cmd_buf_len = context->mem_context.shared_mem_size;
}
-static int psp_ta_init_shared_buf(struct psp_context *psp,
+int psp_ta_init_shared_buf(struct psp_context *psp,
struct ta_mem_context *mem_ctx)
{
/*
@@ -958,7 +957,7 @@ static int psp_ta_init_shared_buf(struct psp_context *psp,
&mem_ctx->shared_buf);
}
-static void psp_ta_free_shared_buf(struct ta_mem_context *mem_ctx)
+void psp_ta_free_shared_buf(struct ta_mem_context *mem_ctx)
{
amdgpu_bo_free_kernel(&mem_ctx->shared_bo, &mem_ctx->shared_mc_addr,
&mem_ctx->shared_buf);
@@ -969,6 +968,42 @@ static int psp_xgmi_init_shared_buf(struct psp_context *psp)
return psp_ta_init_shared_buf(psp, &psp->xgmi_context.context.mem_context);
}
+static void psp_prep_ta_invoke_indirect_cmd_buf(struct psp_gfx_cmd_resp *cmd,
+ uint32_t ta_cmd_id,
+ struct ta_context *context)
+{
+ cmd->cmd_id = GFX_CMD_ID_INVOKE_CMD;
+ cmd->cmd.cmd_invoke_cmd.session_id = context->session_id;
+ cmd->cmd.cmd_invoke_cmd.ta_cmd_id = ta_cmd_id;
+
+ cmd->cmd.cmd_invoke_cmd.buf.num_desc = 1;
+ cmd->cmd.cmd_invoke_cmd.buf.total_size = context->mem_context.shared_mem_size;
+ cmd->cmd.cmd_invoke_cmd.buf.buf_desc[0].buf_size = context->mem_context.shared_mem_size;
+ cmd->cmd.cmd_invoke_cmd.buf.buf_desc[0].buf_phy_addr_lo =
+ lower_32_bits(context->mem_context.shared_mc_addr);
+ cmd->cmd.cmd_invoke_cmd.buf.buf_desc[0].buf_phy_addr_hi =
+ upper_32_bits(context->mem_context.shared_mc_addr);
+}
+
+int psp_ta_invoke_indirect(struct psp_context *psp,
+ uint32_t ta_cmd_id,
+ struct ta_context *context)
+{
+ int ret;
+ struct psp_gfx_cmd_resp *cmd = acquire_psp_cmd_buf(psp);
+
+ psp_prep_ta_invoke_indirect_cmd_buf(cmd, ta_cmd_id, context);
+
+ ret = psp_cmd_submit_buf(psp, NULL, cmd,
+ psp->fence_buf_mc_addr);
+
+ context->resp_status = cmd->resp.status;
+
+ release_psp_cmd_buf(psp);
+
+ return ret;
+}
+
static void psp_prep_ta_invoke_cmd_buf(struct psp_gfx_cmd_resp *cmd,
uint32_t ta_cmd_id,
uint32_t session_id)
@@ -978,7 +1013,7 @@ static void psp_prep_ta_invoke_cmd_buf(struct psp_gfx_cmd_resp *cmd,
cmd->cmd.cmd_invoke_cmd.ta_cmd_id = ta_cmd_id;
}
-static int psp_ta_invoke(struct psp_context *psp,
+int psp_ta_invoke(struct psp_context *psp,
uint32_t ta_cmd_id,
struct ta_context *context)
{
@@ -990,12 +1025,14 @@ static int psp_ta_invoke(struct psp_context *psp,
ret = psp_cmd_submit_buf(psp, NULL, cmd,
psp->fence_buf_mc_addr);
+ context->resp_status = cmd->resp.status;
+
release_psp_cmd_buf(psp);
return ret;
}
-static int psp_ta_load(struct psp_context *psp, struct ta_context *context)
+int psp_ta_load(struct psp_context *psp, struct ta_context *context)
{
int ret;
struct psp_gfx_cmd_resp *cmd;
@@ -1010,6 +1047,8 @@ static int psp_ta_load(struct psp_context *psp, struct ta_context *context)
ret = psp_cmd_submit_buf(psp, NULL, cmd,
psp->fence_buf_mc_addr);
+ context->resp_status = cmd->resp.status;
+
if (!ret) {
context->session_id = cmd->resp.session_id;
}
@@ -1415,7 +1454,7 @@ int psp_ras_enable_features(struct psp_context *psp,
return 0;
}
-static int psp_ras_terminate(struct psp_context *psp)
+int psp_ras_terminate(struct psp_context *psp)
{
int ret;
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.h
index ff7d533eb746..cf8d3199b35b 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.h
@@ -48,6 +48,17 @@ enum psp_shared_mem_size {
PSP_SECUREDISPLAY_SHARED_MEM_SIZE = 0x4000,
};
+enum ta_type_id {
+ TA_TYPE_XGMI = 1,
+ TA_TYPE_RAS,
+ TA_TYPE_HDCP,
+ TA_TYPE_DTM,
+ TA_TYPE_RAP,
+ TA_TYPE_SECUREDISPLAY,
+
+ TA_TYPE_MAX_INDEX,
+};
+
struct psp_context;
struct psp_xgmi_node_info;
struct psp_xgmi_topology_info;
@@ -151,9 +162,11 @@ struct ta_mem_context {
struct ta_context {
bool initialized;
uint32_t session_id;
+ uint32_t resp_status;
struct ta_mem_context mem_context;
struct psp_bin_desc bin_desc;
enum psp_gfx_cmd_id ta_load_type;
+ enum ta_type_id ta_type;
};
struct ta_cp_context {
@@ -407,6 +420,18 @@ int psp_gpu_reset(struct amdgpu_device *adev);
int psp_update_vcn_sram(struct amdgpu_device *adev, int inst_idx,
uint64_t cmd_gpu_addr, int cmd_size);
+int psp_ta_init_shared_buf(struct psp_context *psp,
+ struct ta_mem_context *mem_ctx);
+void psp_ta_free_shared_buf(struct ta_mem_context *mem_ctx);
+int psp_ta_unload(struct psp_context *psp, struct ta_context *context);
+int psp_ta_load(struct psp_context *psp, struct ta_context *context);
+int psp_ta_invoke(struct psp_context *psp,
+ uint32_t ta_cmd_id,
+ struct ta_context *context);
+int psp_ta_invoke_indirect(struct psp_context *psp,
+ uint32_t ta_cmd_id,
+ struct ta_context *context);
+
int psp_xgmi_initialize(struct psp_context *psp, bool set_extended_data, bool load_ta);
int psp_xgmi_terminate(struct psp_context *psp);
int psp_xgmi_invoke(struct psp_context *psp, uint32_t ta_cmd_id);
@@ -425,6 +450,7 @@ int psp_ras_enable_features(struct psp_context *psp,
union ta_ras_cmd_input *info, bool enable);
int psp_ras_trigger_error(struct psp_context *psp,
struct ta_ras_trigger_error_input *info);
+int psp_ras_terminate(struct psp_context *psp);
int psp_hdcp_invoke(struct psp_context *psp, uint32_t ta_cmd_id);
int psp_dtm_invoke(struct psp_context *psp, uint32_t ta_cmd_id);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp_ta.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp_ta.c
new file mode 100644
index 000000000000..0988e00612e5
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp_ta.c
@@ -0,0 +1,304 @@
+/*
+ * Copyright 2022 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#include "amdgpu.h"
+#include "amdgpu_psp_ta.h"
+
+#if defined(CONFIG_DEBUG_FS)
+
+static ssize_t ta_if_load_debugfs_write(struct file *fp, const char *buf,
+ size_t len, loff_t *off);
+static ssize_t ta_if_unload_debugfs_write(struct file *fp, const char *buf,
+ size_t len, loff_t *off);
+static ssize_t ta_if_invoke_debugfs_write(struct file *fp, const char *buf,
+ size_t len, loff_t *off);
+
+static uint32_t get_bin_version(const uint8_t *bin)
+{
+ const struct common_firmware_header *hdr =
+ (const struct common_firmware_header *)bin;
+
+ return hdr->ucode_version;
+}
+
+static void prep_ta_mem_context(struct psp_context *psp,
+ struct ta_context *context,
+ uint8_t *shared_buf,
+ uint32_t shared_buf_len)
+{
+ context->mem_context.shared_mem_size = PAGE_ALIGN(shared_buf_len);
+ psp_ta_init_shared_buf(psp, &context->mem_context);
+
+ memcpy((void *)context->mem_context.shared_buf, shared_buf, shared_buf_len);
+}
+
+static bool is_ta_type_valid(enum ta_type_id ta_type)
+{
+ bool ret = false;
+
+ switch (ta_type) {
+ case TA_TYPE_RAS:
+ ret = true;
+ break;
+ default:
+ break;
+ }
+
+ return ret;
+}
+
+static const struct file_operations ta_load_debugfs_fops = {
+ .write = ta_if_load_debugfs_write,
+ .llseek = default_llseek,
+ .owner = THIS_MODULE
+};
+
+static const struct file_operations ta_unload_debugfs_fops = {
+ .write = ta_if_unload_debugfs_write,
+ .llseek = default_llseek,
+ .owner = THIS_MODULE
+};
+
+static const struct file_operations ta_invoke_debugfs_fops = {
+ .write = ta_if_invoke_debugfs_write,
+ .llseek = default_llseek,
+ .owner = THIS_MODULE
+};
+
+
+/**
+ * DOC: AMDGPU TA debugfs interfaces
+ *
+ * Three debugfs interfaces can be opened by a program to
+ * load/invoke/unload TA,
+ *
+ * - /sys/kernel/debug/dri/<N>/ta_if/ta_load
+ * - /sys/kernel/debug/dri/<N>/ta_if/ta_invoke
+ * - /sys/kernel/debug/dri/<N>/ta_if/ta_unload
+ *
+ * How to use the interfaces in a program?
+ *
+ * A program needs to provide transmit buffer to the interfaces
+ * and will receive buffer from the interfaces below,
+ *
+ * - For TA load debugfs interface:
+ * Transmit buffer:
+ * - TA type (4bytes)
+ * - TA bin length (4bytes)
+ * - TA bin
+ * Receive buffer:
+ * - TA ID (4bytes)
+ *
+ * - For TA invoke debugfs interface:
+ * Transmit buffer:
+ * - TA ID (4bytes)
+ * - TA CMD ID (4bytes)
+ * - TA shard buf length (4bytes)
+ * - TA shared buf
+ * Receive buffer:
+ * - TA shared buf
+ *
+ * - For TA unload debugfs interface:
+ * Transmit buffer:
+ * - TA ID (4bytes)
+ */
+
+static ssize_t ta_if_load_debugfs_write(struct file *fp, const char *buf, size_t len, loff_t *off)
+{
+ uint32_t ta_type = 0;
+ uint32_t ta_bin_len = 0;
+ uint8_t *ta_bin = NULL;
+ uint32_t copy_pos = 0;
+ int ret = 0;
+
+ struct amdgpu_device *adev = (struct amdgpu_device *)file_inode(fp)->i_private;
+ struct psp_context *psp = &adev->psp;
+ struct ta_context context = {0};
+
+ if (!buf)
+ return -EINVAL;
+
+ ret = copy_from_user((void *)&ta_type, &buf[copy_pos], sizeof(uint32_t));
+ if (ret || (!is_ta_type_valid(ta_type)))
+ return -EINVAL;
+
+ copy_pos += sizeof(uint32_t);
+
+ ret = copy_from_user((void *)&ta_bin_len, &buf[copy_pos], sizeof(uint32_t));
+ if (ret)
+ return -EINVAL;
+
+ copy_pos += sizeof(uint32_t);
+
+ ta_bin = kzalloc(ta_bin_len, GFP_KERNEL);
+ if (!ta_bin)
+ ret = -ENOMEM;
+ if (copy_from_user((void *)ta_bin, &buf[copy_pos], ta_bin_len)) {
+ ret = -EFAULT;
+ goto err_free_bin;
+ }
+
+ ret = psp_ras_terminate(psp);
+ if (ret) {
+ dev_err(adev->dev, "Failed to unload embedded RAS TA\n");
+ goto err_free_bin;
+ }
+
+ context.ta_type = ta_type;
+ context.ta_load_type = GFX_CMD_ID_LOAD_TA;
+ context.bin_desc.fw_version = get_bin_version(ta_bin);
+ context.bin_desc.size_bytes = ta_bin_len;
+ context.bin_desc.start_addr = ta_bin;
+
+ ret = psp_ta_load(psp, &context);
+
+ if (ret || context.resp_status) {
+ dev_err(adev->dev, "TA load via debugfs failed (%d) status %d\n",
+ ret, context.resp_status);
+ if (!ret)
+ ret = -EINVAL;
+ goto err_free_bin;
+ }
+
+ context.initialized = true;
+ if (copy_to_user((char *)buf, (void *)&context.session_id, sizeof(uint32_t)))
+ ret = -EFAULT;
+
+err_free_bin:
+ kfree(ta_bin);
+
+ return ret;
+}
+
+static ssize_t ta_if_unload_debugfs_write(struct file *fp, const char *buf, size_t len, loff_t *off)
+{
+ uint32_t ta_id = 0;
+ int ret = 0;
+
+ struct amdgpu_device *adev = (struct amdgpu_device *)file_inode(fp)->i_private;
+ struct psp_context *psp = &adev->psp;
+ struct ta_context context = {0};
+
+ if (!buf)
+ return -EINVAL;
+
+ ret = copy_from_user((void *)&ta_id, buf, sizeof(uint32_t));
+ if (ret)
+ return -EINVAL;
+
+ context.session_id = ta_id;
+
+ ret = psp_ta_unload(psp, &context);
+ if (!ret)
+ context.initialized = false;
+
+ return ret;
+}
+
+static ssize_t ta_if_invoke_debugfs_write(struct file *fp, const char *buf, size_t len, loff_t *off)
+{
+ uint32_t ta_id = 0;
+ uint32_t cmd_id = 0;
+ uint32_t shared_buf_len = 0;
+ uint8_t *shared_buf = NULL;
+ uint32_t copy_pos = 0;
+ int ret = 0;
+
+ struct amdgpu_device *adev = (struct amdgpu_device *)file_inode(fp)->i_private;
+ struct psp_context *psp = &adev->psp;
+ struct ta_context context = {0};
+
+ if (!buf)
+ return -EINVAL;
+
+ ret = copy_from_user((void *)&ta_id, &buf[copy_pos], sizeof(uint32_t));
+ if (ret)
+ return -EINVAL;
+ copy_pos += sizeof(uint32_t);
+
+ ret = copy_from_user((void *)&cmd_id, &buf[copy_pos], sizeof(uint32_t));
+ if (ret)
+ return -EINVAL;
+ copy_pos += sizeof(uint32_t);
+
+ ret = copy_from_user((void *)&shared_buf_len, &buf[copy_pos], sizeof(uint32_t));
+ if (ret)
+ return -EINVAL;
+ copy_pos += sizeof(uint32_t);
+
+ shared_buf = kzalloc(shared_buf_len, GFP_KERNEL);
+ if (!shared_buf)
+ return -ENOMEM;
+ if (copy_from_user((void *)shared_buf, &buf[copy_pos], shared_buf_len)) {
+ ret = -EFAULT;
+ goto err_free_shared_buf;
+ }
+
+ context.session_id = ta_id;
+
+ prep_ta_mem_context(psp, &context, shared_buf, shared_buf_len);
+
+ ret = psp_ta_invoke_indirect(psp, cmd_id, &context);
+
+ if (ret || context.resp_status) {
+ dev_err(adev->dev, "TA invoke via debugfs failed (%d) status %d\n",
+ ret, context.resp_status);
+ if (!ret)
+ ret = -EINVAL;
+ goto err_free_ta_shared_buf;
+ }
+
+ if (copy_to_user((char *)buf, context.mem_context.shared_buf, shared_buf_len))
+ ret = -EFAULT;
+
+err_free_ta_shared_buf:
+ psp_ta_free_shared_buf(&context.mem_context);
+
+err_free_shared_buf:
+ kfree(shared_buf);
+
+ return ret;
+}
+
+void amdgpu_ta_if_debugfs_init(struct amdgpu_device *adev)
+{
+ struct drm_minor *minor = adev_to_drm(adev)->primary;
+
+ struct dentry *dir = debugfs_create_dir("ta_if", minor->debugfs_root);
+
+ debugfs_create_file("ta_load", 0200, dir, adev,
+ &ta_load_debugfs_fops);
+
+ debugfs_create_file("ta_unload", 0200, dir,
+ adev, &ta_unload_debugfs_fops);
+
+ debugfs_create_file("ta_invoke", 0200, dir,
+ adev, &ta_invoke_debugfs_fops);
+}
+
+#else
+void amdgpu_ta_if_debugfs_init(struct amdgpu_device *adev)
+{
+
+}
+#endif
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp_ta.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp_ta.h
new file mode 100644
index 000000000000..cfc1542f63ef
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp_ta.h
@@ -0,0 +1,29 @@
+/*
+ * Copyright 2022 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#ifndef __AMDGPU_PSP_TA_H__
+#define __AMDGPU_PSP_TA_H__
+
+void amdgpu_ta_if_debugfs_init(struct amdgpu_device *adev);
+
+#endif
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c
index 424c22a841f4..7e126dff004f 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c
@@ -66,6 +66,8 @@ const char *ras_block_string[] = {
"mp1",
"fuse",
"mca",
+ "vcn",
+ "jpeg",
};
const char *ras_mca_block_string[] = {
@@ -1513,12 +1515,97 @@ static int amdgpu_ras_fs_fini(struct amdgpu_device *adev)
/* ras fs end */
/* ih begin */
+
+/* For the hardware that cannot enable bif ring for both ras_controller_irq
+ * and ras_err_evnet_athub_irq ih cookies, the driver has to poll status
+ * register to check whether the interrupt is triggered or not, and properly
+ * ack the interrupt if it is there
+ */
+void amdgpu_ras_interrupt_fatal_error_handler(struct amdgpu_device *adev)
+{
+ if (!amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__PCIE_BIF))
+ return;
+
+ if (adev->nbio.ras &&
+ adev->nbio.ras->handle_ras_controller_intr_no_bifring)
+ adev->nbio.ras->handle_ras_controller_intr_no_bifring(adev);
+
+ if (adev->nbio.ras &&
+ adev->nbio.ras->handle_ras_err_event_athub_intr_no_bifring)
+ adev->nbio.ras->handle_ras_err_event_athub_intr_no_bifring(adev);
+}
+
+static void amdgpu_ras_interrupt_poison_consumption_handler(struct ras_manager *obj,
+ struct amdgpu_iv_entry *entry)
+{
+ bool poison_stat = true, need_reset = true;
+ struct amdgpu_device *adev = obj->adev;
+ struct ras_err_data err_data = {0, 0, 0, NULL};
+ struct amdgpu_ras_block_object *block_obj =
+ amdgpu_ras_get_ras_block(adev, obj->head.block, 0);
+
+ if (!adev->gmc.xgmi.connected_to_cpu)
+ amdgpu_umc_poison_handler(adev, &err_data, false);
+
+ /* both query_poison_status and handle_poison_consumption are optional */
+ if (block_obj && block_obj->hw_ops) {
+ if (block_obj->hw_ops->query_poison_status) {
+ poison_stat = block_obj->hw_ops->query_poison_status(adev);
+ if (!poison_stat)
+ dev_info(adev->dev, "No RAS poison status in %s poison IH.\n",
+ block_obj->ras_comm.name);
+ }
+
+ if (poison_stat && block_obj->hw_ops->handle_poison_consumption) {
+ poison_stat = block_obj->hw_ops->handle_poison_consumption(adev);
+ need_reset = poison_stat;
+ }
+ }
+
+ /* gpu reset is fallback for all failed cases */
+ if (need_reset)
+ amdgpu_ras_reset_gpu(adev);
+}
+
+static void amdgpu_ras_interrupt_poison_creation_handler(struct ras_manager *obj,
+ struct amdgpu_iv_entry *entry)
+{
+ dev_info(obj->adev->dev,
+ "Poison is created, no user action is needed.\n");
+}
+
+static void amdgpu_ras_interrupt_umc_handler(struct ras_manager *obj,
+ struct amdgpu_iv_entry *entry)
+{
+ struct ras_ih_data *data = &obj->ih_data;
+ struct ras_err_data err_data = {0, 0, 0, NULL};
+ int ret;
+
+ if (!data->cb)
+ return;
+
+ /* Let IP handle its data, maybe we need get the output
+ * from the callback to update the error type/count, etc
+ */
+ ret = data->cb(obj->adev, &err_data, entry);
+ /* ue will trigger an interrupt, and in that case
+ * we need do a reset to recovery the whole system.
+ * But leave IP do that recovery, here we just dispatch
+ * the error.
+ */
+ if (ret == AMDGPU_RAS_SUCCESS) {
+ /* these counts could be left as 0 if
+ * some blocks do not count error number
+ */
+ obj->err_data.ue_count += err_data.ue_count;
+ obj->err_data.ce_count += err_data.ce_count;
+ }
+}
+
static void amdgpu_ras_interrupt_handler(struct ras_manager *obj)
{
struct ras_ih_data *data = &obj->ih_data;
struct amdgpu_iv_entry entry;
- int ret;
- struct ras_err_data err_data = {0, 0, 0, NULL};
while (data->rptr != data->wptr) {
rmb();
@@ -1529,30 +1616,17 @@ static void amdgpu_ras_interrupt_handler(struct ras_manager *obj)
data->rptr = (data->aligned_element_size +
data->rptr) % data->ring_size;
- if (data->cb) {
- if (amdgpu_ras_is_poison_mode_supported(obj->adev) &&
- obj->head.block == AMDGPU_RAS_BLOCK__UMC)
- dev_info(obj->adev->dev,
- "Poison is created, no user action is needed.\n");
- else {
- /* Let IP handle its data, maybe we need get the output
- * from the callback to udpate the error type/count, etc
- */
- memset(&err_data, 0, sizeof(err_data));
- ret = data->cb(obj->adev, &err_data, &entry);
- /* ue will trigger an interrupt, and in that case
- * we need do a reset to recovery the whole system.
- * But leave IP do that recovery, here we just dispatch
- * the error.
- */
- if (ret == AMDGPU_RAS_SUCCESS) {
- /* these counts could be left as 0 if
- * some blocks do not count error number
- */
- obj->err_data.ue_count += err_data.ue_count;
- obj->err_data.ce_count += err_data.ce_count;
- }
- }
+ if (amdgpu_ras_is_poison_mode_supported(obj->adev)) {
+ if (obj->head.block == AMDGPU_RAS_BLOCK__UMC)
+ amdgpu_ras_interrupt_poison_creation_handler(obj, &entry);
+ else
+ amdgpu_ras_interrupt_poison_consumption_handler(obj, &entry);
+ } else {
+ if (obj->head.block == AMDGPU_RAS_BLOCK__UMC)
+ amdgpu_ras_interrupt_umc_handler(obj, &entry);
+ else
+ dev_warn(obj->adev->dev,
+ "No RAS interrupt handler for non-UMC block with poison disabled.\n");
}
}
}
@@ -1847,7 +1921,6 @@ static int amdgpu_ras_realloc_eh_data_space(struct amdgpu_device *adev,
void *bps = kmalloc(align_space * sizeof(*data->bps), GFP_KERNEL);
if (!bps) {
- kfree(bps);
return -ENOMEM;
}
@@ -2205,6 +2278,13 @@ static void amdgpu_ras_check_supported(struct amdgpu_device *adev)
dev_info(adev->dev, "SRAM ECC is active.\n");
adev->ras_hw_enabled |= ~(1 << AMDGPU_RAS_BLOCK__UMC |
1 << AMDGPU_RAS_BLOCK__DF);
+
+ if (adev->ip_versions[VCN_HWIP][0] == IP_VERSION(2, 6, 0))
+ adev->ras_hw_enabled |= (1 << AMDGPU_RAS_BLOCK__VCN |
+ 1 << AMDGPU_RAS_BLOCK__JPEG);
+ else
+ adev->ras_hw_enabled &= ~(1 << AMDGPU_RAS_BLOCK__VCN |
+ 1 << AMDGPU_RAS_BLOCK__JPEG);
} else {
dev_info(adev->dev, "SRAM ECC is not presented.\n");
}
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h
index 9314fde81e68..b9a6fac2b8b2 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h
@@ -49,6 +49,8 @@ enum amdgpu_ras_block {
AMDGPU_RAS_BLOCK__MP1,
AMDGPU_RAS_BLOCK__FUSE,
AMDGPU_RAS_BLOCK__MCA,
+ AMDGPU_RAS_BLOCK__VCN,
+ AMDGPU_RAS_BLOCK__JPEG,
AMDGPU_RAS_BLOCK__LAST
};
@@ -506,6 +508,8 @@ struct amdgpu_ras_block_hw_ops {
void (*query_ras_error_address)(struct amdgpu_device *adev, void *ras_error_status);
void (*reset_ras_error_count)(struct amdgpu_device *adev);
void (*reset_ras_error_status)(struct amdgpu_device *adev);
+ bool (*query_poison_status)(struct amdgpu_device *adev);
+ bool (*handle_poison_consumption)(struct amdgpu_device *adev);
};
/* work flow
@@ -679,4 +683,5 @@ int amdgpu_ras_set_context(struct amdgpu_device *adev, struct amdgpu_ras *ras_co
int amdgpu_ras_register_ras_block(struct amdgpu_device *adev,
struct amdgpu_ras_block_object *ras_block_obj);
+void amdgpu_ras_interrupt_fatal_error_handler(struct amdgpu_device *adev);
#endif
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_res_cursor.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_res_cursor.h
index acfa207cf970..6546552e596c 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_res_cursor.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_res_cursor.h
@@ -30,12 +30,15 @@
#include <drm/ttm/ttm_resource.h>
#include <drm/ttm/ttm_range_manager.h>
+#include "amdgpu_vram_mgr.h"
+
/* state back for walking over vram_mgr and gtt_mgr allocations */
struct amdgpu_res_cursor {
uint64_t start;
uint64_t size;
uint64_t remaining;
- struct drm_mm_node *node;
+ void *node;
+ uint32_t mem_type;
};
/**
@@ -52,27 +55,63 @@ static inline void amdgpu_res_first(struct ttm_resource *res,
uint64_t start, uint64_t size,
struct amdgpu_res_cursor *cur)
{
+ struct drm_buddy_block *block;
+ struct list_head *head, *next;
struct drm_mm_node *node;
- if (!res || res->mem_type == TTM_PL_SYSTEM) {
- cur->start = start;
- cur->size = size;
- cur->remaining = size;
- cur->node = NULL;
- WARN_ON(res && start + size > res->num_pages << PAGE_SHIFT);
- return;
- }
+ if (!res)
+ goto fallback;
BUG_ON(start + size > res->num_pages << PAGE_SHIFT);
- node = to_ttm_range_mgr_node(res)->mm_nodes;
- while (start >= node->size << PAGE_SHIFT)
- start -= node++->size << PAGE_SHIFT;
+ cur->mem_type = res->mem_type;
+
+ switch (cur->mem_type) {
+ case TTM_PL_VRAM:
+ head = &to_amdgpu_vram_mgr_resource(res)->blocks;
+
+ block = list_first_entry_or_null(head,
+ struct drm_buddy_block,
+ link);
+ if (!block)
+ goto fallback;
+
+ while (start >= amdgpu_vram_mgr_block_size(block)) {
+ start -= amdgpu_vram_mgr_block_size(block);
+
+ next = block->link.next;
+ if (next != head)
+ block = list_entry(next, struct drm_buddy_block, link);
+ }
+
+ cur->start = amdgpu_vram_mgr_block_start(block) + start;
+ cur->size = min(amdgpu_vram_mgr_block_size(block) - start, size);
+ cur->remaining = size;
+ cur->node = block;
+ break;
+ case TTM_PL_TT:
+ node = to_ttm_range_mgr_node(res)->mm_nodes;
+ while (start >= node->size << PAGE_SHIFT)
+ start -= node++->size << PAGE_SHIFT;
+
+ cur->start = (node->start << PAGE_SHIFT) + start;
+ cur->size = min((node->size << PAGE_SHIFT) - start, size);
+ cur->remaining = size;
+ cur->node = node;
+ break;
+ default:
+ goto fallback;
+ }
- cur->start = (node->start << PAGE_SHIFT) + start;
- cur->size = min((node->size << PAGE_SHIFT) - start, size);
+ return;
+
+fallback:
+ cur->start = start;
+ cur->size = size;
cur->remaining = size;
- cur->node = node;
+ cur->node = NULL;
+ WARN_ON(res && start + size > res->num_pages << PAGE_SHIFT);
+ return;
}
/**
@@ -85,7 +124,9 @@ static inline void amdgpu_res_first(struct ttm_resource *res,
*/
static inline void amdgpu_res_next(struct amdgpu_res_cursor *cur, uint64_t size)
{
- struct drm_mm_node *node = cur->node;
+ struct drm_buddy_block *block;
+ struct drm_mm_node *node;
+ struct list_head *next;
BUG_ON(size > cur->remaining);
@@ -99,9 +140,27 @@ static inline void amdgpu_res_next(struct amdgpu_res_cursor *cur, uint64_t size)
return;
}
- cur->node = ++node;
- cur->start = node->start << PAGE_SHIFT;
- cur->size = min(node->size << PAGE_SHIFT, cur->remaining);
+ switch (cur->mem_type) {
+ case TTM_PL_VRAM:
+ block = cur->node;
+
+ next = block->link.next;
+ block = list_entry(next, struct drm_buddy_block, link);
+
+ cur->node = block;
+ cur->start = amdgpu_vram_mgr_block_start(block);
+ cur->size = min(amdgpu_vram_mgr_block_size(block), cur->remaining);
+ break;
+ case TTM_PL_TT:
+ node = cur->node;
+
+ cur->node = ++node;
+ cur->start = node->start << PAGE_SHIFT;
+ cur->size = min(node->size << PAGE_SHIFT, cur->remaining);
+ break;
+ default:
+ return;
+ }
}
#endif
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_smuio.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_smuio.h
index 484bb3dcec47..c7a823f3f2c5 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_smuio.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_smuio.h
@@ -27,7 +27,7 @@ struct amdgpu_smuio_funcs {
u32 (*get_rom_index_offset)(struct amdgpu_device *adev);
u32 (*get_rom_data_offset)(struct amdgpu_device *adev);
void (*update_rom_clock_gating)(struct amdgpu_device *adev, bool enable);
- void (*get_clock_gating_state)(struct amdgpu_device *adev, u32 *flags);
+ void (*get_clock_gating_state)(struct amdgpu_device *adev, u64 *flags);
u32 (*get_die_id)(struct amdgpu_device *adev);
u32 (*get_socket_id)(struct amdgpu_device *adev);
bool (*is_host_gpu_xgmi_supported)(struct amdgpu_device *adev);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_sync.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_sync.c
index 40e06745fae9..504af1b93bfa 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_sync.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_sync.c
@@ -51,7 +51,6 @@ static struct kmem_cache *amdgpu_sync_slab;
void amdgpu_sync_create(struct amdgpu_sync *sync)
{
hash_init(sync->fences);
- sync->last_vm_update = NULL;
}
/**
@@ -171,23 +170,6 @@ int amdgpu_sync_fence(struct amdgpu_sync *sync, struct dma_fence *f)
return 0;
}
-/**
- * amdgpu_sync_vm_fence - remember to sync to this VM fence
- *
- * @sync: sync object to add fence to
- * @fence: the VM fence to add
- *
- * Add the fence to the sync object and remember it as VM update.
- */
-int amdgpu_sync_vm_fence(struct amdgpu_sync *sync, struct dma_fence *fence)
-{
- if (!fence)
- return 0;
-
- amdgpu_sync_keep_later(&sync->last_vm_update, fence);
- return amdgpu_sync_fence(sync, fence);
-}
-
/* Determine based on the owner and mode if we should sync to a fence or not */
static bool amdgpu_sync_test_fence(struct amdgpu_device *adev,
enum amdgpu_sync_mode mode,
@@ -259,7 +241,8 @@ int amdgpu_sync_resv(struct amdgpu_device *adev, struct amdgpu_sync *sync,
if (resv == NULL)
return -EINVAL;
- dma_resv_for_each_fence(&cursor, resv, true, f) {
+ /* TODO: Use DMA_RESV_USAGE_READ here */
+ dma_resv_for_each_fence(&cursor, resv, DMA_RESV_USAGE_BOOKKEEP, f) {
dma_fence_chain_for_each(f, f) {
struct dma_fence *tmp = dma_fence_chain_contained(f);
@@ -376,9 +359,6 @@ int amdgpu_sync_clone(struct amdgpu_sync *source, struct amdgpu_sync *clone)
}
}
- dma_fence_put(clone->last_vm_update);
- clone->last_vm_update = dma_fence_get(source->last_vm_update);
-
return 0;
}
@@ -419,8 +399,6 @@ void amdgpu_sync_free(struct amdgpu_sync *sync)
dma_fence_put(e->fence);
kmem_cache_free(amdgpu_sync_slab, e);
}
-
- dma_fence_put(sync->last_vm_update);
}
/**
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_sync.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_sync.h
index 7c0fe20c470d..2d5c613cda10 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_sync.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_sync.h
@@ -43,12 +43,10 @@ enum amdgpu_sync_mode {
*/
struct amdgpu_sync {
DECLARE_HASHTABLE(fences, 4);
- struct dma_fence *last_vm_update;
};
void amdgpu_sync_create(struct amdgpu_sync *sync);
int amdgpu_sync_fence(struct amdgpu_sync *sync, struct dma_fence *f);
-int amdgpu_sync_vm_fence(struct amdgpu_sync *sync, struct dma_fence *fence);
int amdgpu_sync_resv(struct amdgpu_device *adev, struct amdgpu_sync *sync,
struct dma_resv *resv, enum amdgpu_sync_mode mode,
void *owner);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
index 4b9ee6e27f74..ec26edd4f4d8 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
@@ -1344,7 +1344,8 @@ static bool amdgpu_ttm_bo_eviction_valuable(struct ttm_buffer_object *bo,
* If true, then return false as any KFD process needs all its BOs to
* be resident to run successfully
*/
- dma_resv_for_each_fence(&resv_cursor, bo->base.resv, true, f) {
+ dma_resv_for_each_fence(&resv_cursor, bo->base.resv,
+ DMA_RESV_USAGE_BOOKKEEP, f) {
if (amdkfd_fence_check_mm(f, current->mm))
return false;
}
@@ -1547,7 +1548,6 @@ static struct ttm_device_funcs amdgpu_bo_driver = {
.io_mem_reserve = &amdgpu_ttm_io_mem_reserve,
.io_mem_pfn = amdgpu_ttm_io_mem_pfn,
.access_memory = &amdgpu_ttm_access_memory,
- .del_from_lru_notify = &amdgpu_vm_del_from_lru_notify
};
/*
@@ -2161,17 +2161,6 @@ int amdgpu_ttm_evict_resources(struct amdgpu_device *adev, int mem_type)
#if defined(CONFIG_DEBUG_FS)
-static int amdgpu_mm_vram_table_show(struct seq_file *m, void *unused)
-{
- struct amdgpu_device *adev = (struct amdgpu_device *)m->private;
- struct ttm_resource_manager *man = ttm_manager_type(&adev->mman.bdev,
- TTM_PL_VRAM);
- struct drm_printer p = drm_seq_file_printer(m);
-
- ttm_resource_manager_debug(man, &p);
- return 0;
-}
-
static int amdgpu_ttm_page_pool_show(struct seq_file *m, void *unused)
{
struct amdgpu_device *adev = (struct amdgpu_device *)m->private;
@@ -2179,55 +2168,6 @@ static int amdgpu_ttm_page_pool_show(struct seq_file *m, void *unused)
return ttm_pool_debugfs(&adev->mman.bdev.pool, m);
}
-static int amdgpu_mm_tt_table_show(struct seq_file *m, void *unused)
-{
- struct amdgpu_device *adev = (struct amdgpu_device *)m->private;
- struct ttm_resource_manager *man = ttm_manager_type(&adev->mman.bdev,
- TTM_PL_TT);
- struct drm_printer p = drm_seq_file_printer(m);
-
- ttm_resource_manager_debug(man, &p);
- return 0;
-}
-
-static int amdgpu_mm_gds_table_show(struct seq_file *m, void *unused)
-{
- struct amdgpu_device *adev = (struct amdgpu_device *)m->private;
- struct ttm_resource_manager *man = ttm_manager_type(&adev->mman.bdev,
- AMDGPU_PL_GDS);
- struct drm_printer p = drm_seq_file_printer(m);
-
- ttm_resource_manager_debug(man, &p);
- return 0;
-}
-
-static int amdgpu_mm_gws_table_show(struct seq_file *m, void *unused)
-{
- struct amdgpu_device *adev = (struct amdgpu_device *)m->private;
- struct ttm_resource_manager *man = ttm_manager_type(&adev->mman.bdev,
- AMDGPU_PL_GWS);
- struct drm_printer p = drm_seq_file_printer(m);
-
- ttm_resource_manager_debug(man, &p);
- return 0;
-}
-
-static int amdgpu_mm_oa_table_show(struct seq_file *m, void *unused)
-{
- struct amdgpu_device *adev = (struct amdgpu_device *)m->private;
- struct ttm_resource_manager *man = ttm_manager_type(&adev->mman.bdev,
- AMDGPU_PL_OA);
- struct drm_printer p = drm_seq_file_printer(m);
-
- ttm_resource_manager_debug(man, &p);
- return 0;
-}
-
-DEFINE_SHOW_ATTRIBUTE(amdgpu_mm_vram_table);
-DEFINE_SHOW_ATTRIBUTE(amdgpu_mm_tt_table);
-DEFINE_SHOW_ATTRIBUTE(amdgpu_mm_gds_table);
-DEFINE_SHOW_ATTRIBUTE(amdgpu_mm_gws_table);
-DEFINE_SHOW_ATTRIBUTE(amdgpu_mm_oa_table);
DEFINE_SHOW_ATTRIBUTE(amdgpu_ttm_page_pool);
/*
@@ -2437,17 +2377,23 @@ void amdgpu_ttm_debugfs_init(struct amdgpu_device *adev)
&amdgpu_ttm_vram_fops, adev->gmc.mc_vram_size);
debugfs_create_file("amdgpu_iomem", 0444, root, adev,
&amdgpu_ttm_iomem_fops);
- debugfs_create_file("amdgpu_vram_mm", 0444, root, adev,
- &amdgpu_mm_vram_table_fops);
- debugfs_create_file("amdgpu_gtt_mm", 0444, root, adev,
- &amdgpu_mm_tt_table_fops);
- debugfs_create_file("amdgpu_gds_mm", 0444, root, adev,
- &amdgpu_mm_gds_table_fops);
- debugfs_create_file("amdgpu_gws_mm", 0444, root, adev,
- &amdgpu_mm_gws_table_fops);
- debugfs_create_file("amdgpu_oa_mm", 0444, root, adev,
- &amdgpu_mm_oa_table_fops);
debugfs_create_file("ttm_page_pool", 0444, root, adev,
&amdgpu_ttm_page_pool_fops);
+ ttm_resource_manager_create_debugfs(ttm_manager_type(&adev->mman.bdev,
+ TTM_PL_VRAM),
+ root, "amdgpu_vram_mm");
+ ttm_resource_manager_create_debugfs(ttm_manager_type(&adev->mman.bdev,
+ TTM_PL_TT),
+ root, "amdgpu_gtt_mm");
+ ttm_resource_manager_create_debugfs(ttm_manager_type(&adev->mman.bdev,
+ AMDGPU_PL_GDS),
+ root, "amdgpu_gds_mm");
+ ttm_resource_manager_create_debugfs(ttm_manager_type(&adev->mman.bdev,
+ AMDGPU_PL_GWS),
+ root, "amdgpu_gws_mm");
+ ttm_resource_manager_create_debugfs(ttm_manager_type(&adev->mman.bdev,
+ AMDGPU_PL_OA),
+ root, "amdgpu_oa_mm");
+
#endif
}
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h
index 9120ae80ef52..6a70818039dd 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h
@@ -26,6 +26,7 @@
#include <linux/dma-direction.h>
#include <drm/gpu_scheduler.h>
+#include "amdgpu_vram_mgr.h"
#include "amdgpu.h"
#define AMDGPU_PL_GDS (TTM_PL_PRIV + 0)
@@ -38,15 +39,6 @@
#define AMDGPU_POISON 0xd0bed0be
-struct amdgpu_vram_mgr {
- struct ttm_resource_manager manager;
- struct drm_mm mm;
- spinlock_t lock;
- struct list_head reservations_pending;
- struct list_head reserved_pages;
- atomic64_t vis_usage;
-};
-
struct amdgpu_gtt_mgr {
struct ttm_resource_manager manager;
struct drm_mm mm;
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.c
index ca3350502618..016477fa2f90 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.c
@@ -760,3 +760,36 @@ int amdgpu_ucode_init_bo(struct amdgpu_device *adev)
}
return 0;
}
+
+void amdgpu_ucode_ip_version_decode(struct amdgpu_device *adev, int block_type, char *ucode_prefix, int len)
+{
+ int maj, min, rev;
+ char *ip_name;
+ uint32_t version = adev->ip_versions[block_type][0];
+
+ switch (block_type) {
+ case GC_HWIP:
+ ip_name = "gc";
+ break;
+ case SDMA0_HWIP:
+ ip_name = "sdma";
+ break;
+ case MP0_HWIP:
+ ip_name = "psp";
+ break;
+ case MP1_HWIP:
+ ip_name = "smu";
+ break;
+ case UVD_HWIP:
+ ip_name = "vcn";
+ break;
+ default:
+ BUG();
+ }
+
+ maj = IP_VERSION_MAJ(version);
+ min = IP_VERSION_MIN(version);
+ rev = IP_VERSION_REV(version);
+
+ snprintf(ucode_prefix, len, "%s_%d_%d_%d", ip_name, maj, min, rev);
+}
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.h
index 40dffbac85a0..864984d0d3ef 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.h
@@ -463,4 +463,6 @@ amdgpu_ucode_get_load_type(struct amdgpu_device *adev, int load_type);
const char *amdgpu_ucode_name(enum AMDGPU_UCODE_ID ucode_id);
+void amdgpu_ucode_ip_version_decode(struct amdgpu_device *adev, int block_type, char *ucode_prefix, int len);
+
#endif
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c
index 39c74d9fa7cc..6eac649499d3 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c
@@ -1163,7 +1163,8 @@ static int amdgpu_uvd_send_msg(struct amdgpu_ring *ring, struct amdgpu_bo *bo,
ib->length_dw = 16;
if (direct) {
- r = dma_resv_wait_timeout(bo->tbo.base.resv, true, false,
+ r = dma_resv_wait_timeout(bo->tbo.base.resv,
+ DMA_RESV_USAGE_KERNEL, false,
msecs_to_jiffies(10));
if (r == 0)
r = -ETIMEDOUT;
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.c
index f99093f2ebc7..a0ee828a4a97 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.c
@@ -52,7 +52,7 @@
#define FIRMWARE_ALDEBARAN "amdgpu/aldebaran_vcn.bin"
#define FIRMWARE_BEIGE_GOBY "amdgpu/beige_goby_vcn.bin"
#define FIRMWARE_YELLOW_CARP "amdgpu/yellow_carp_vcn.bin"
-#define FIRMWARE_VCN_3_1_2 "amdgpu/vcn_3_1_2_vcn.bin"
+#define FIRMWARE_VCN_3_1_2 "amdgpu/vcn_3_1_2.bin"
MODULE_FIRMWARE(FIRMWARE_RAVEN);
MODULE_FIRMWARE(FIRMWARE_PICASSO);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.h
index f06fb7f882e2..5f7da4c19822 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.h
@@ -24,6 +24,8 @@
#ifndef __AMDGPU_VCN_H__
#define __AMDGPU_VCN_H__
+#include "amdgpu_ras.h"
+
#define AMDGPU_VCN_STACK_SIZE (128*1024)
#define AMDGPU_VCN_CONTEXT_SIZE (512*1024)
@@ -164,6 +166,11 @@
#define AMDGPU_VCN_IB_FLAG_DECODE_BUFFER 0x00000001
#define AMDGPU_VCN_CMD_FLAG_MSG_BUFFER 0x00000001
+#define VCN_CODEC_DISABLE_MASK_AV1 (1 << 0)
+#define VCN_CODEC_DISABLE_MASK_VP9 (1 << 1)
+#define VCN_CODEC_DISABLE_MASK_HEVC (1 << 2)
+#define VCN_CODEC_DISABLE_MASK_H264 (1 << 3)
+
enum fw_queue_mode {
FW_QUEUE_RING_RESET = 1,
FW_QUEUE_DPG_HOLD_OFF = 2,
@@ -233,6 +240,10 @@ struct amdgpu_vcn_inst {
struct amdgpu_vcn_fw_shared fw_shared;
};
+struct amdgpu_vcn_ras {
+ struct amdgpu_ras_block_object ras_block;
+};
+
struct amdgpu_vcn {
unsigned fw_version;
struct delayed_work idle_work;
@@ -244,6 +255,7 @@ struct amdgpu_vcn {
uint8_t num_vcn_inst;
struct amdgpu_vcn_inst inst[AMDGPU_MAX_VCN_INSTANCES];
uint8_t vcn_config[AMDGPU_MAX_VCN_INSTANCES];
+ uint32_t vcn_codec_disable_mask[AMDGPU_MAX_VCN_INSTANCES];
struct amdgpu_vcn_reg internal;
struct mutex vcn_pg_lock;
struct mutex vcn1_jpeg1_workaround;
@@ -252,6 +264,9 @@ struct amdgpu_vcn {
unsigned harvest_config;
int (*pause_dpg_mode)(struct amdgpu_device *adev,
int inst_idx, struct dpg_pause_state *new_state);
+
+ struct ras_common_if *ras_if;
+ struct amdgpu_vcn_ras *ras;
};
struct amdgpu_fw_shared_rb_ptrs_struct {
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c
index a025f080aa6a..a8ecf04389b3 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c
@@ -23,7 +23,12 @@
#include <linux/module.h>
+#ifdef CONFIG_X86
+#include <asm/hypervisor.h>
+#endif
+
#include <drm/drm_drv.h>
+#include <xen/xen.h>
#include "amdgpu.h"
#include "amdgpu_ras.h"
@@ -710,7 +715,8 @@ void amdgpu_detect_virtualization(struct amdgpu_device *adev)
adev->virt.caps |= AMDGPU_SRIOV_CAPS_ENABLE_IOV;
if (!reg) {
- if (is_virtual_machine()) /* passthrough mode exclus sriov mod */
+ /* passthrough mode exclus sriov mod */
+ if (is_virtual_machine() && !xen_initial_domain())
adev->virt.caps |= AMDGPU_PASSTHROUGH_MODE;
}
@@ -723,8 +729,12 @@ void amdgpu_detect_virtualization(struct amdgpu_device *adev)
break;
case CHIP_VEGA10:
soc15_set_virt_ops(adev);
- /* send a dummy GPU_INIT_DATA request to host on vega10 */
- amdgpu_virt_request_init_data(adev);
+#ifdef CONFIG_X86
+ /* not send GPU_INIT_DATA with MS_HYPERV*/
+ if (!hypervisor_is_type(X86_HYPER_MS_HYPERV))
+#endif
+ /* send a dummy GPU_INIT_DATA request to host on vega10 */
+ amdgpu_virt_request_init_data(adev);
break;
case CHIP_VEGA20:
case CHIP_ARCTURUS:
@@ -862,11 +872,11 @@ static u32 amdgpu_virt_rlcg_reg_rw(struct amdgpu_device *adev, u32 offset, u32 v
uint32_t timeout = 50000;
uint32_t i, tmp;
uint32_t ret = 0;
- static void *scratch_reg0;
- static void *scratch_reg1;
- static void *scratch_reg2;
- static void *scratch_reg3;
- static void *spare_int;
+ void *scratch_reg0;
+ void *scratch_reg1;
+ void *scratch_reg2;
+ void *scratch_reg3;
+ void *spare_int;
if (!adev->gfx.rlc.rlcg_reg_access_supported) {
dev_err(adev->dev,
@@ -919,7 +929,7 @@ static u32 amdgpu_virt_rlcg_reg_rw(struct amdgpu_device *adev, u32 offset, u32 v
"wrong operation type, rlcg failed to program reg: 0x%05x\n", offset);
} else if (tmp & AMDGPU_RLCG_REG_NOT_IN_RANGE) {
dev_err(adev->dev,
- "regiser is not in range, rlcg failed to program reg: 0x%05x\n", offset);
+ "register is not in range, rlcg failed to program reg: 0x%05x\n", offset);
} else {
dev_err(adev->dev,
"unknown error type, rlcg failed to program reg: 0x%05x\n", offset);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vkms.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vkms.c
index 5224d9a39737..576849e95296 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vkms.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vkms.c
@@ -302,9 +302,6 @@ static int amdgpu_vkms_prepare_fb(struct drm_plane *plane,
struct drm_gem_object *obj;
struct amdgpu_device *adev;
struct amdgpu_bo *rbo;
- struct list_head list;
- struct ttm_validate_buffer tv;
- struct ww_acquire_ctx ticket;
uint32_t domain;
int r;
@@ -316,18 +313,19 @@ static int amdgpu_vkms_prepare_fb(struct drm_plane *plane,
obj = new_state->fb->obj[0];
rbo = gem_to_amdgpu_bo(obj);
adev = amdgpu_ttm_adev(rbo->tbo.bdev);
- INIT_LIST_HEAD(&list);
- tv.bo = &rbo->tbo;
- tv.num_shared = 1;
- list_add(&tv.head, &list);
-
- r = ttm_eu_reserve_buffers(&ticket, &list, false, NULL);
+ r = amdgpu_bo_reserve(rbo, true);
if (r) {
dev_err(adev->dev, "fail to reserve bo (%d)\n", r);
return r;
}
+ r = dma_resv_reserve_fences(rbo->tbo.base.resv, 1);
+ if (r) {
+ dev_err(adev->dev, "allocating fence slot failed (%d)\n", r);
+ goto error_unlock;
+ }
+
if (plane->type != DRM_PLANE_TYPE_CURSOR)
domain = amdgpu_display_supported_domains(adev, rbo->flags);
else
@@ -337,25 +335,29 @@ static int amdgpu_vkms_prepare_fb(struct drm_plane *plane,
if (unlikely(r != 0)) {
if (r != -ERESTARTSYS)
DRM_ERROR("Failed to pin framebuffer with error %d\n", r);
- ttm_eu_backoff_reservation(&ticket, &list);
- return r;
+ goto error_unlock;
}
r = amdgpu_ttm_alloc_gart(&rbo->tbo);
if (unlikely(r != 0)) {
- amdgpu_bo_unpin(rbo);
- ttm_eu_backoff_reservation(&ticket, &list);
DRM_ERROR("%p bind failed\n", rbo);
- return r;
+ goto error_unpin;
}
- ttm_eu_backoff_reservation(&ticket, &list);
+ amdgpu_bo_unreserve(rbo);
afb->address = amdgpu_bo_gpu_offset(rbo);
amdgpu_bo_ref(rbo);
return 0;
+
+error_unpin:
+ amdgpu_bo_unpin(rbo);
+
+error_unlock:
+ amdgpu_bo_unreserve(rbo);
+ return r;
}
static void amdgpu_vkms_cleanup_fb(struct drm_plane *plane,
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
index fc4563cf2828..f9479e23de18 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
@@ -89,6 +89,21 @@ struct amdgpu_prt_cb {
};
/**
+ * struct amdgpu_vm_tlb_seq_cb - Helper to increment the TLB flush sequence
+ */
+struct amdgpu_vm_tlb_seq_cb {
+ /**
+ * @vm: pointer to the amdgpu_vm structure to set the fence sequence on
+ */
+ struct amdgpu_vm *vm;
+
+ /**
+ * @cb: callback
+ */
+ struct dma_fence_cb cb;
+};
+
+/**
* amdgpu_vm_set_pasid - manage pasid and vm ptr mapping
*
* @adev: amdgpu_device pointer
@@ -155,108 +170,6 @@ static inline void amdgpu_vm_eviction_unlock(struct amdgpu_vm *vm)
}
/**
- * amdgpu_vm_level_shift - return the addr shift for each level
- *
- * @adev: amdgpu_device pointer
- * @level: VMPT level
- *
- * Returns:
- * The number of bits the pfn needs to be right shifted for a level.
- */
-static unsigned amdgpu_vm_level_shift(struct amdgpu_device *adev,
- unsigned level)
-{
- switch (level) {
- case AMDGPU_VM_PDB2:
- case AMDGPU_VM_PDB1:
- case AMDGPU_VM_PDB0:
- return 9 * (AMDGPU_VM_PDB0 - level) +
- adev->vm_manager.block_size;
- case AMDGPU_VM_PTB:
- return 0;
- default:
- return ~0;
- }
-}
-
-/**
- * amdgpu_vm_num_entries - return the number of entries in a PD/PT
- *
- * @adev: amdgpu_device pointer
- * @level: VMPT level
- *
- * Returns:
- * The number of entries in a page directory or page table.
- */
-static unsigned amdgpu_vm_num_entries(struct amdgpu_device *adev,
- unsigned level)
-{
- unsigned shift = amdgpu_vm_level_shift(adev,
- adev->vm_manager.root_level);
-
- if (level == adev->vm_manager.root_level)
- /* For the root directory */
- return round_up(adev->vm_manager.max_pfn, 1ULL << shift)
- >> shift;
- else if (level != AMDGPU_VM_PTB)
- /* Everything in between */
- return 512;
- else
- /* For the page tables on the leaves */
- return AMDGPU_VM_PTE_COUNT(adev);
-}
-
-/**
- * amdgpu_vm_num_ats_entries - return the number of ATS entries in the root PD
- *
- * @adev: amdgpu_device pointer
- *
- * Returns:
- * The number of entries in the root page directory which needs the ATS setting.
- */
-static unsigned amdgpu_vm_num_ats_entries(struct amdgpu_device *adev)
-{
- unsigned shift;
-
- shift = amdgpu_vm_level_shift(adev, adev->vm_manager.root_level);
- return AMDGPU_GMC_HOLE_START >> (shift + AMDGPU_GPU_PAGE_SHIFT);
-}
-
-/**
- * amdgpu_vm_entries_mask - the mask to get the entry number of a PD/PT
- *
- * @adev: amdgpu_device pointer
- * @level: VMPT level
- *
- * Returns:
- * The mask to extract the entry number of a PD/PT from an address.
- */
-static uint32_t amdgpu_vm_entries_mask(struct amdgpu_device *adev,
- unsigned int level)
-{
- if (level <= adev->vm_manager.root_level)
- return 0xffffffff;
- else if (level != AMDGPU_VM_PTB)
- return 0x1ff;
- else
- return AMDGPU_VM_PTE_COUNT(adev) - 1;
-}
-
-/**
- * amdgpu_vm_bo_size - returns the size of the BOs in bytes
- *
- * @adev: amdgpu_device pointer
- * @level: VMPT level
- *
- * Returns:
- * The size of the BO for a page directory or page table in bytes.
- */
-static unsigned amdgpu_vm_bo_size(struct amdgpu_device *adev, unsigned level)
-{
- return AMDGPU_GPU_PAGE_ALIGN(amdgpu_vm_num_entries(adev, level) * 8);
-}
-
-/**
* amdgpu_vm_bo_evicted - vm_bo is evicted
*
* @vm_bo: vm_bo which is evicted
@@ -358,9 +271,8 @@ static void amdgpu_vm_bo_done(struct amdgpu_vm_bo_base *vm_bo)
* Initialize a bo_va_base structure and add it to the appropriate lists
*
*/
-static void amdgpu_vm_bo_base_init(struct amdgpu_vm_bo_base *base,
- struct amdgpu_vm *vm,
- struct amdgpu_bo *bo)
+void amdgpu_vm_bo_base_init(struct amdgpu_vm_bo_base *base,
+ struct amdgpu_vm *vm, struct amdgpu_bo *bo)
{
base->vm = vm;
base->bo = bo;
@@ -377,7 +289,7 @@ static void amdgpu_vm_bo_base_init(struct amdgpu_vm_bo_base *base,
dma_resv_assert_held(vm->root.bo->tbo.base.resv);
- vm->bulk_moveable = false;
+ ttm_bo_set_bulk_move(&bo->tbo, &vm->lru_bulk_move);
if (bo->tbo.type == ttm_bo_type_kernel && bo->parent)
amdgpu_vm_bo_relocated(base);
else
@@ -396,228 +308,6 @@ static void amdgpu_vm_bo_base_init(struct amdgpu_vm_bo_base *base,
}
/**
- * amdgpu_vm_pt_parent - get the parent page directory
- *
- * @pt: child page table
- *
- * Helper to get the parent entry for the child page table. NULL if we are at
- * the root page directory.
- */
-static struct amdgpu_vm_bo_base *amdgpu_vm_pt_parent(struct amdgpu_vm_bo_base *pt)
-{
- struct amdgpu_bo *parent = pt->bo->parent;
-
- if (!parent)
- return NULL;
-
- return parent->vm_bo;
-}
-
-/*
- * amdgpu_vm_pt_cursor - state for for_each_amdgpu_vm_pt
- */
-struct amdgpu_vm_pt_cursor {
- uint64_t pfn;
- struct amdgpu_vm_bo_base *parent;
- struct amdgpu_vm_bo_base *entry;
- unsigned level;
-};
-
-/**
- * amdgpu_vm_pt_start - start PD/PT walk
- *
- * @adev: amdgpu_device pointer
- * @vm: amdgpu_vm structure
- * @start: start address of the walk
- * @cursor: state to initialize
- *
- * Initialize a amdgpu_vm_pt_cursor to start a walk.
- */
-static void amdgpu_vm_pt_start(struct amdgpu_device *adev,
- struct amdgpu_vm *vm, uint64_t start,
- struct amdgpu_vm_pt_cursor *cursor)
-{
- cursor->pfn = start;
- cursor->parent = NULL;
- cursor->entry = &vm->root;
- cursor->level = adev->vm_manager.root_level;
-}
-
-/**
- * amdgpu_vm_pt_descendant - go to child node
- *
- * @adev: amdgpu_device pointer
- * @cursor: current state
- *
- * Walk to the child node of the current node.
- * Returns:
- * True if the walk was possible, false otherwise.
- */
-static bool amdgpu_vm_pt_descendant(struct amdgpu_device *adev,
- struct amdgpu_vm_pt_cursor *cursor)
-{
- unsigned mask, shift, idx;
-
- if ((cursor->level == AMDGPU_VM_PTB) || !cursor->entry ||
- !cursor->entry->bo)
- return false;
-
- mask = amdgpu_vm_entries_mask(adev, cursor->level);
- shift = amdgpu_vm_level_shift(adev, cursor->level);
-
- ++cursor->level;
- idx = (cursor->pfn >> shift) & mask;
- cursor->parent = cursor->entry;
- cursor->entry = &to_amdgpu_bo_vm(cursor->entry->bo)->entries[idx];
- return true;
-}
-
-/**
- * amdgpu_vm_pt_sibling - go to sibling node
- *
- * @adev: amdgpu_device pointer
- * @cursor: current state
- *
- * Walk to the sibling node of the current node.
- * Returns:
- * True if the walk was possible, false otherwise.
- */
-static bool amdgpu_vm_pt_sibling(struct amdgpu_device *adev,
- struct amdgpu_vm_pt_cursor *cursor)
-{
- unsigned shift, num_entries;
-
- /* Root doesn't have a sibling */
- if (!cursor->parent)
- return false;
-
- /* Go to our parents and see if we got a sibling */
- shift = amdgpu_vm_level_shift(adev, cursor->level - 1);
- num_entries = amdgpu_vm_num_entries(adev, cursor->level - 1);
-
- if (cursor->entry == &to_amdgpu_bo_vm(cursor->parent->bo)->entries[num_entries - 1])
- return false;
-
- cursor->pfn += 1ULL << shift;
- cursor->pfn &= ~((1ULL << shift) - 1);
- ++cursor->entry;
- return true;
-}
-
-/**
- * amdgpu_vm_pt_ancestor - go to parent node
- *
- * @cursor: current state
- *
- * Walk to the parent node of the current node.
- * Returns:
- * True if the walk was possible, false otherwise.
- */
-static bool amdgpu_vm_pt_ancestor(struct amdgpu_vm_pt_cursor *cursor)
-{
- if (!cursor->parent)
- return false;
-
- --cursor->level;
- cursor->entry = cursor->parent;
- cursor->parent = amdgpu_vm_pt_parent(cursor->parent);
- return true;
-}
-
-/**
- * amdgpu_vm_pt_next - get next PD/PT in hieratchy
- *
- * @adev: amdgpu_device pointer
- * @cursor: current state
- *
- * Walk the PD/PT tree to the next node.
- */
-static void amdgpu_vm_pt_next(struct amdgpu_device *adev,
- struct amdgpu_vm_pt_cursor *cursor)
-{
- /* First try a newborn child */
- if (amdgpu_vm_pt_descendant(adev, cursor))
- return;
-
- /* If that didn't worked try to find a sibling */
- while (!amdgpu_vm_pt_sibling(adev, cursor)) {
- /* No sibling, go to our parents and grandparents */
- if (!amdgpu_vm_pt_ancestor(cursor)) {
- cursor->pfn = ~0ll;
- return;
- }
- }
-}
-
-/**
- * amdgpu_vm_pt_first_dfs - start a deep first search
- *
- * @adev: amdgpu_device structure
- * @vm: amdgpu_vm structure
- * @start: optional cursor to start with
- * @cursor: state to initialize
- *
- * Starts a deep first traversal of the PD/PT tree.
- */
-static void amdgpu_vm_pt_first_dfs(struct amdgpu_device *adev,
- struct amdgpu_vm *vm,
- struct amdgpu_vm_pt_cursor *start,
- struct amdgpu_vm_pt_cursor *cursor)
-{
- if (start)
- *cursor = *start;
- else
- amdgpu_vm_pt_start(adev, vm, 0, cursor);
- while (amdgpu_vm_pt_descendant(adev, cursor));
-}
-
-/**
- * amdgpu_vm_pt_continue_dfs - check if the deep first search should continue
- *
- * @start: starting point for the search
- * @entry: current entry
- *
- * Returns:
- * True when the search should continue, false otherwise.
- */
-static bool amdgpu_vm_pt_continue_dfs(struct amdgpu_vm_pt_cursor *start,
- struct amdgpu_vm_bo_base *entry)
-{
- return entry && (!start || entry != start->entry);
-}
-
-/**
- * amdgpu_vm_pt_next_dfs - get the next node for a deep first search
- *
- * @adev: amdgpu_device structure
- * @cursor: current state
- *
- * Move the cursor to the next node in a deep first search.
- */
-static void amdgpu_vm_pt_next_dfs(struct amdgpu_device *adev,
- struct amdgpu_vm_pt_cursor *cursor)
-{
- if (!cursor->entry)
- return;
-
- if (!cursor->parent)
- cursor->entry = NULL;
- else if (amdgpu_vm_pt_sibling(adev, cursor))
- while (amdgpu_vm_pt_descendant(adev, cursor));
- else
- amdgpu_vm_pt_ancestor(cursor);
-}
-
-/*
- * for_each_amdgpu_vm_pt_dfs_safe - safe deep first search of all PDs/PTs
- */
-#define for_each_amdgpu_vm_pt_dfs_safe(adev, vm, start, cursor, entry) \
- for (amdgpu_vm_pt_first_dfs((adev), (vm), (start), &(cursor)), \
- (entry) = (cursor).entry, amdgpu_vm_pt_next_dfs((adev), &(cursor));\
- amdgpu_vm_pt_continue_dfs((start), (entry)); \
- (entry) = (cursor).entry, amdgpu_vm_pt_next_dfs((adev), &(cursor)))
-
-/**
* amdgpu_vm_get_pd_bo - add the VM PD to a validation list
*
* @vm: vm providing the BOs
@@ -640,36 +330,6 @@ void amdgpu_vm_get_pd_bo(struct amdgpu_vm *vm,
}
/**
- * amdgpu_vm_del_from_lru_notify - update bulk_moveable flag
- *
- * @bo: BO which was removed from the LRU
- *
- * Make sure the bulk_moveable flag is updated when a BO is removed from the
- * LRU.
- */
-void amdgpu_vm_del_from_lru_notify(struct ttm_buffer_object *bo)
-{
- struct amdgpu_bo *abo;
- struct amdgpu_vm_bo_base *bo_base;
-
- if (!amdgpu_bo_is_amdgpu_bo(bo))
- return;
-
- if (bo->pin_count)
- return;
-
- abo = ttm_to_amdgpu_bo(bo);
- if (!abo->parent)
- return;
- for (bo_base = abo->vm_bo; bo_base; bo_base = bo_base->next) {
- struct amdgpu_vm *vm = bo_base->vm;
-
- if (abo->tbo.base.resv == vm->root.bo->tbo.base.resv)
- vm->bulk_moveable = false;
- }
-
-}
-/**
* amdgpu_vm_move_to_lru_tail - move all BOs to the end of LRU
*
* @adev: amdgpu device pointer
@@ -681,35 +341,9 @@ void amdgpu_vm_del_from_lru_notify(struct ttm_buffer_object *bo)
void amdgpu_vm_move_to_lru_tail(struct amdgpu_device *adev,
struct amdgpu_vm *vm)
{
- struct amdgpu_vm_bo_base *bo_base;
-
- if (vm->bulk_moveable) {
- spin_lock(&adev->mman.bdev.lru_lock);
- ttm_bo_bulk_move_lru_tail(&vm->lru_bulk_move);
- spin_unlock(&adev->mman.bdev.lru_lock);
- return;
- }
-
- memset(&vm->lru_bulk_move, 0, sizeof(vm->lru_bulk_move));
-
spin_lock(&adev->mman.bdev.lru_lock);
- list_for_each_entry(bo_base, &vm->idle, vm_status) {
- struct amdgpu_bo *bo = bo_base->bo;
- struct amdgpu_bo *shadow = amdgpu_bo_shadowed(bo);
-
- if (!bo->parent)
- continue;
-
- ttm_bo_move_to_lru_tail(&bo->tbo, bo->tbo.resource,
- &vm->lru_bulk_move);
- if (shadow)
- ttm_bo_move_to_lru_tail(&shadow->tbo,
- shadow->tbo.resource,
- &vm->lru_bulk_move);
- }
+ ttm_lru_bulk_move_tail(&vm->lru_bulk_move);
spin_unlock(&adev->mman.bdev.lru_lock);
-
- vm->bulk_moveable = true;
}
/**
@@ -732,8 +366,6 @@ int amdgpu_vm_validate_pt_bos(struct amdgpu_device *adev, struct amdgpu_vm *vm,
struct amdgpu_vm_bo_base *bo_base, *tmp;
int r;
- vm->bulk_moveable &= list_empty(&vm->evicted);
-
list_for_each_entry_safe(bo_base, tmp, &vm->evicted, vm_status) {
struct amdgpu_bo *bo = bo_base->bo;
struct amdgpu_bo *shadow = amdgpu_bo_shadowed(bo);
@@ -784,312 +416,6 @@ bool amdgpu_vm_ready(struct amdgpu_vm *vm)
}
/**
- * amdgpu_vm_clear_bo - initially clear the PDs/PTs
- *
- * @adev: amdgpu_device pointer
- * @vm: VM to clear BO from
- * @vmbo: BO to clear
- * @immediate: use an immediate update
- *
- * Root PD needs to be reserved when calling this.
- *
- * Returns:
- * 0 on success, errno otherwise.
- */
-static int amdgpu_vm_clear_bo(struct amdgpu_device *adev,
- struct amdgpu_vm *vm,
- struct amdgpu_bo_vm *vmbo,
- bool immediate)
-{
- struct ttm_operation_ctx ctx = { true, false };
- unsigned level = adev->vm_manager.root_level;
- struct amdgpu_vm_update_params params;
- struct amdgpu_bo *ancestor = &vmbo->bo;
- struct amdgpu_bo *bo = &vmbo->bo;
- unsigned entries, ats_entries;
- uint64_t addr;
- int r, idx;
-
- /* Figure out our place in the hierarchy */
- if (ancestor->parent) {
- ++level;
- while (ancestor->parent->parent) {
- ++level;
- ancestor = ancestor->parent;
- }
- }
-
- entries = amdgpu_bo_size(bo) / 8;
- if (!vm->pte_support_ats) {
- ats_entries = 0;
-
- } else if (!bo->parent) {
- ats_entries = amdgpu_vm_num_ats_entries(adev);
- ats_entries = min(ats_entries, entries);
- entries -= ats_entries;
-
- } else {
- struct amdgpu_vm_bo_base *pt;
-
- pt = ancestor->vm_bo;
- ats_entries = amdgpu_vm_num_ats_entries(adev);
- if ((pt - to_amdgpu_bo_vm(vm->root.bo)->entries) >= ats_entries) {
- ats_entries = 0;
- } else {
- ats_entries = entries;
- entries = 0;
- }
- }
-
- r = ttm_bo_validate(&bo->tbo, &bo->placement, &ctx);
- if (r)
- return r;
-
- if (vmbo->shadow) {
- struct amdgpu_bo *shadow = vmbo->shadow;
-
- r = ttm_bo_validate(&shadow->tbo, &shadow->placement, &ctx);
- if (r)
- return r;
- }
-
- if (!drm_dev_enter(adev_to_drm(adev), &idx))
- return -ENODEV;
-
- r = vm->update_funcs->map_table(vmbo);
- if (r)
- goto exit;
-
- memset(&params, 0, sizeof(params));
- params.adev = adev;
- params.vm = vm;
- params.immediate = immediate;
-
- r = vm->update_funcs->prepare(&params, NULL, AMDGPU_SYNC_EXPLICIT);
- if (r)
- goto exit;
-
- addr = 0;
- if (ats_entries) {
- uint64_t value = 0, flags;
-
- flags = AMDGPU_PTE_DEFAULT_ATC;
- if (level != AMDGPU_VM_PTB) {
- /* Handle leaf PDEs as PTEs */
- flags |= AMDGPU_PDE_PTE;
- amdgpu_gmc_get_vm_pde(adev, level, &value, &flags);
- }
-
- r = vm->update_funcs->update(&params, vmbo, addr, 0, ats_entries,
- value, flags);
- if (r)
- goto exit;
-
- addr += ats_entries * 8;
- }
-
- if (entries) {
- uint64_t value = 0, flags = 0;
-
- if (adev->asic_type >= CHIP_VEGA10) {
- if (level != AMDGPU_VM_PTB) {
- /* Handle leaf PDEs as PTEs */
- flags |= AMDGPU_PDE_PTE;
- amdgpu_gmc_get_vm_pde(adev, level,
- &value, &flags);
- } else {
- /* Workaround for fault priority problem on GMC9 */
- flags = AMDGPU_PTE_EXECUTABLE;
- }
- }
-
- r = vm->update_funcs->update(&params, vmbo, addr, 0, entries,
- value, flags);
- if (r)
- goto exit;
- }
-
- r = vm->update_funcs->commit(&params, NULL);
-exit:
- drm_dev_exit(idx);
- return r;
-}
-
-/**
- * amdgpu_vm_pt_create - create bo for PD/PT
- *
- * @adev: amdgpu_device pointer
- * @vm: requesting vm
- * @level: the page table level
- * @immediate: use a immediate update
- * @vmbo: pointer to the buffer object pointer
- */
-static int amdgpu_vm_pt_create(struct amdgpu_device *adev,
- struct amdgpu_vm *vm,
- int level, bool immediate,
- struct amdgpu_bo_vm **vmbo)
-{
- struct amdgpu_bo_param bp;
- struct amdgpu_bo *bo;
- struct dma_resv *resv;
- unsigned int num_entries;
- int r;
-
- memset(&bp, 0, sizeof(bp));
-
- bp.size = amdgpu_vm_bo_size(adev, level);
- bp.byte_align = AMDGPU_GPU_PAGE_SIZE;
- bp.domain = AMDGPU_GEM_DOMAIN_VRAM;
- bp.domain = amdgpu_bo_get_preferred_domain(adev, bp.domain);
- bp.flags = AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS |
- AMDGPU_GEM_CREATE_CPU_GTT_USWC;
-
- if (level < AMDGPU_VM_PTB)
- num_entries = amdgpu_vm_num_entries(adev, level);
- else
- num_entries = 0;
-
- bp.bo_ptr_size = struct_size((*vmbo), entries, num_entries);
-
- if (vm->use_cpu_for_update)
- bp.flags |= AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED;
-
- bp.type = ttm_bo_type_kernel;
- bp.no_wait_gpu = immediate;
- if (vm->root.bo)
- bp.resv = vm->root.bo->tbo.base.resv;
-
- r = amdgpu_bo_create_vm(adev, &bp, vmbo);
- if (r)
- return r;
-
- bo = &(*vmbo)->bo;
- if (vm->is_compute_context || (adev->flags & AMD_IS_APU)) {
- (*vmbo)->shadow = NULL;
- return 0;
- }
-
- if (!bp.resv)
- WARN_ON(dma_resv_lock(bo->tbo.base.resv,
- NULL));
- resv = bp.resv;
- memset(&bp, 0, sizeof(bp));
- bp.size = amdgpu_vm_bo_size(adev, level);
- bp.domain = AMDGPU_GEM_DOMAIN_GTT;
- bp.flags = AMDGPU_GEM_CREATE_CPU_GTT_USWC;
- bp.type = ttm_bo_type_kernel;
- bp.resv = bo->tbo.base.resv;
- bp.bo_ptr_size = sizeof(struct amdgpu_bo);
-
- r = amdgpu_bo_create(adev, &bp, &(*vmbo)->shadow);
-
- if (!resv)
- dma_resv_unlock(bo->tbo.base.resv);
-
- if (r) {
- amdgpu_bo_unref(&bo);
- return r;
- }
-
- (*vmbo)->shadow->parent = amdgpu_bo_ref(bo);
- amdgpu_bo_add_to_shadow_list(*vmbo);
-
- return 0;
-}
-
-/**
- * amdgpu_vm_alloc_pts - Allocate a specific page table
- *
- * @adev: amdgpu_device pointer
- * @vm: VM to allocate page tables for
- * @cursor: Which page table to allocate
- * @immediate: use an immediate update
- *
- * Make sure a specific page table or directory is allocated.
- *
- * Returns:
- * 1 if page table needed to be allocated, 0 if page table was already
- * allocated, negative errno if an error occurred.
- */
-static int amdgpu_vm_alloc_pts(struct amdgpu_device *adev,
- struct amdgpu_vm *vm,
- struct amdgpu_vm_pt_cursor *cursor,
- bool immediate)
-{
- struct amdgpu_vm_bo_base *entry = cursor->entry;
- struct amdgpu_bo *pt_bo;
- struct amdgpu_bo_vm *pt;
- int r;
-
- if (entry->bo)
- return 0;
-
- r = amdgpu_vm_pt_create(adev, vm, cursor->level, immediate, &pt);
- if (r)
- return r;
-
- /* Keep a reference to the root directory to avoid
- * freeing them up in the wrong order.
- */
- pt_bo = &pt->bo;
- pt_bo->parent = amdgpu_bo_ref(cursor->parent->bo);
- amdgpu_vm_bo_base_init(entry, vm, pt_bo);
- r = amdgpu_vm_clear_bo(adev, vm, pt, immediate);
- if (r)
- goto error_free_pt;
-
- return 0;
-
-error_free_pt:
- amdgpu_bo_unref(&pt->shadow);
- amdgpu_bo_unref(&pt_bo);
- return r;
-}
-
-/**
- * amdgpu_vm_free_table - fre one PD/PT
- *
- * @entry: PDE to free
- */
-static void amdgpu_vm_free_table(struct amdgpu_vm_bo_base *entry)
-{
- struct amdgpu_bo *shadow;
-
- if (!entry->bo)
- return;
- shadow = amdgpu_bo_shadowed(entry->bo);
- entry->bo->vm_bo = NULL;
- list_del(&entry->vm_status);
- amdgpu_bo_unref(&shadow);
- amdgpu_bo_unref(&entry->bo);
-}
-
-/**
- * amdgpu_vm_free_pts - free PD/PT levels
- *
- * @adev: amdgpu device structure
- * @vm: amdgpu vm structure
- * @start: optional cursor where to start freeing PDs/PTs
- *
- * Free the page directory or page table level and all sub levels.
- */
-static void amdgpu_vm_free_pts(struct amdgpu_device *adev,
- struct amdgpu_vm *vm,
- struct amdgpu_vm_pt_cursor *start)
-{
- struct amdgpu_vm_pt_cursor cursor;
- struct amdgpu_vm_bo_base *entry;
-
- vm->bulk_moveable = false;
-
- for_each_amdgpu_vm_pt_dfs_safe(adev, vm, start, cursor, entry)
- amdgpu_vm_free_table(entry);
-
- if (start)
- amdgpu_vm_free_table(start->entry);
-}
-
-/**
* amdgpu_vm_check_compute_bug - check whether asic has compute vm bug
*
* @adev: amdgpu_device pointer
@@ -1336,53 +662,6 @@ uint64_t amdgpu_vm_map_gart(const dma_addr_t *pages_addr, uint64_t addr)
}
/**
- * amdgpu_vm_update_pde - update a single level in the hierarchy
- *
- * @params: parameters for the update
- * @vm: requested vm
- * @entry: entry to update
- *
- * Makes sure the requested entry in parent is up to date.
- */
-static int amdgpu_vm_update_pde(struct amdgpu_vm_update_params *params,
- struct amdgpu_vm *vm,
- struct amdgpu_vm_bo_base *entry)
-{
- struct amdgpu_vm_bo_base *parent = amdgpu_vm_pt_parent(entry);
- struct amdgpu_bo *bo = parent->bo, *pbo;
- uint64_t pde, pt, flags;
- unsigned level;
-
- for (level = 0, pbo = bo->parent; pbo; ++level)
- pbo = pbo->parent;
-
- level += params->adev->vm_manager.root_level;
- amdgpu_gmc_get_pde_for_bo(entry->bo, level, &pt, &flags);
- pde = (entry - to_amdgpu_bo_vm(parent->bo)->entries) * 8;
- return vm->update_funcs->update(params, to_amdgpu_bo_vm(bo), pde, pt,
- 1, 0, flags);
-}
-
-/**
- * amdgpu_vm_invalidate_pds - mark all PDs as invalid
- *
- * @adev: amdgpu_device pointer
- * @vm: related vm
- *
- * Mark all PD level as invalid after an error.
- */
-static void amdgpu_vm_invalidate_pds(struct amdgpu_device *adev,
- struct amdgpu_vm *vm)
-{
- struct amdgpu_vm_pt_cursor cursor;
- struct amdgpu_vm_bo_base *entry;
-
- for_each_amdgpu_vm_pt_dfs_safe(adev, vm, NULL, cursor, entry)
- if (entry->bo && !entry->moved)
- amdgpu_vm_bo_relocated(entry);
-}
-
-/**
* amdgpu_vm_update_pdes - make sure that all directories are valid
*
* @adev: amdgpu_device pointer
@@ -1398,6 +677,7 @@ int amdgpu_vm_update_pdes(struct amdgpu_device *adev,
struct amdgpu_vm *vm, bool immediate)
{
struct amdgpu_vm_update_params params;
+ struct amdgpu_vm_bo_base *entry;
int r, idx;
if (list_empty(&vm->relocated))
@@ -1413,17 +693,10 @@ int amdgpu_vm_update_pdes(struct amdgpu_device *adev,
r = vm->update_funcs->prepare(&params, NULL, AMDGPU_SYNC_EXPLICIT);
if (r)
- goto exit;
-
- while (!list_empty(&vm->relocated)) {
- struct amdgpu_vm_bo_base *entry;
-
- entry = list_first_entry(&vm->relocated,
- struct amdgpu_vm_bo_base,
- vm_status);
- amdgpu_vm_bo_idle(entry);
+ goto error;
- r = amdgpu_vm_update_pde(&params, vm, entry);
+ list_for_each_entry(entry, &vm->relocated, vm_status) {
+ r = amdgpu_vm_pde_update(&params, entry);
if (r)
goto error;
}
@@ -1431,297 +704,68 @@ int amdgpu_vm_update_pdes(struct amdgpu_device *adev,
r = vm->update_funcs->commit(&params, &vm->last_update);
if (r)
goto error;
- drm_dev_exit(idx);
- return 0;
+
+ while (!list_empty(&vm->relocated)) {
+ entry = list_first_entry(&vm->relocated,
+ struct amdgpu_vm_bo_base,
+ vm_status);
+ amdgpu_vm_bo_idle(entry);
+ }
error:
- amdgpu_vm_invalidate_pds(adev, vm);
-exit:
drm_dev_exit(idx);
return r;
}
-/*
- * amdgpu_vm_update_flags - figure out flags for PTE updates
- *
- * Make sure to set the right flags for the PTEs at the desired level.
- */
-static void amdgpu_vm_update_flags(struct amdgpu_vm_update_params *params,
- struct amdgpu_bo_vm *pt, unsigned int level,
- uint64_t pe, uint64_t addr,
- unsigned int count, uint32_t incr,
- uint64_t flags)
-
-{
- if (level != AMDGPU_VM_PTB) {
- flags |= AMDGPU_PDE_PTE;
- amdgpu_gmc_get_vm_pde(params->adev, level, &addr, &flags);
-
- } else if (params->adev->asic_type >= CHIP_VEGA10 &&
- !(flags & AMDGPU_PTE_VALID) &&
- !(flags & AMDGPU_PTE_PRT)) {
-
- /* Workaround for fault priority problem on GMC9 */
- flags |= AMDGPU_PTE_EXECUTABLE;
- }
-
- params->vm->update_funcs->update(params, pt, pe, addr, count, incr,
- flags);
-}
-
/**
- * amdgpu_vm_fragment - get fragment for PTEs
+ * amdgpu_vm_tlb_seq_cb - make sure to increment tlb sequence
+ * @fence: unused
+ * @cb: the callback structure
*
- * @params: see amdgpu_vm_update_params definition
- * @start: first PTE to handle
- * @end: last PTE to handle
- * @flags: hw mapping flags
- * @frag: resulting fragment size
- * @frag_end: end of this fragment
- *
- * Returns the first possible fragment for the start and end address.
+ * Increments the tlb sequence to make sure that future CS execute a VM flush.
*/
-static void amdgpu_vm_fragment(struct amdgpu_vm_update_params *params,
- uint64_t start, uint64_t end, uint64_t flags,
- unsigned int *frag, uint64_t *frag_end)
+static void amdgpu_vm_tlb_seq_cb(struct dma_fence *fence,
+ struct dma_fence_cb *cb)
{
- /**
- * The MC L1 TLB supports variable sized pages, based on a fragment
- * field in the PTE. When this field is set to a non-zero value, page
- * granularity is increased from 4KB to (1 << (12 + frag)). The PTE
- * flags are considered valid for all PTEs within the fragment range
- * and corresponding mappings are assumed to be physically contiguous.
- *
- * The L1 TLB can store a single PTE for the whole fragment,
- * significantly increasing the space available for translation
- * caching. This leads to large improvements in throughput when the
- * TLB is under pressure.
- *
- * The L2 TLB distributes small and large fragments into two
- * asymmetric partitions. The large fragment cache is significantly
- * larger. Thus, we try to use large fragments wherever possible.
- * Userspace can support this by aligning virtual base address and
- * allocation size to the fragment size.
- *
- * Starting with Vega10 the fragment size only controls the L1. The L2
- * is now directly feed with small/huge/giant pages from the walker.
- */
- unsigned max_frag;
+ struct amdgpu_vm_tlb_seq_cb *tlb_cb;
- if (params->adev->asic_type < CHIP_VEGA10)
- max_frag = params->adev->vm_manager.fragment_size;
- else
- max_frag = 31;
-
- /* system pages are non continuously */
- if (params->pages_addr) {
- *frag = 0;
- *frag_end = end;
- return;
- }
-
- /* This intentionally wraps around if no bit is set */
- *frag = min((unsigned)ffs(start) - 1, (unsigned)fls64(end - start) - 1);
- if (*frag >= max_frag) {
- *frag = max_frag;
- *frag_end = end & ~((1ULL << max_frag) - 1);
- } else {
- *frag_end = start + (1 << *frag);
- }
+ tlb_cb = container_of(cb, typeof(*tlb_cb), cb);
+ atomic64_inc(&tlb_cb->vm->tlb_seq);
+ kfree(tlb_cb);
}
/**
- * amdgpu_vm_update_ptes - make sure that page tables are valid
+ * amdgpu_vm_update_range - update a range in the vm page table
*
- * @params: see amdgpu_vm_update_params definition
- * @start: start of GPU address range
- * @end: end of GPU address range
- * @dst: destination address to map to, the next dst inside the function
- * @flags: mapping flags
- *
- * Update the page tables in the range @start - @end.
- *
- * Returns:
- * 0 for success, -EINVAL for failure.
- */
-static int amdgpu_vm_update_ptes(struct amdgpu_vm_update_params *params,
- uint64_t start, uint64_t end,
- uint64_t dst, uint64_t flags)
-{
- struct amdgpu_device *adev = params->adev;
- struct amdgpu_vm_pt_cursor cursor;
- uint64_t frag_start = start, frag_end;
- unsigned int frag;
- int r;
-
- /* figure out the initial fragment */
- amdgpu_vm_fragment(params, frag_start, end, flags, &frag, &frag_end);
-
- /* walk over the address space and update the PTs */
- amdgpu_vm_pt_start(adev, params->vm, start, &cursor);
- while (cursor.pfn < end) {
- unsigned shift, parent_shift, mask;
- uint64_t incr, entry_end, pe_start;
- struct amdgpu_bo *pt;
-
- if (!params->unlocked) {
- /* make sure that the page tables covering the
- * address range are actually allocated
- */
- r = amdgpu_vm_alloc_pts(params->adev, params->vm,
- &cursor, params->immediate);
- if (r)
- return r;
- }
-
- shift = amdgpu_vm_level_shift(adev, cursor.level);
- parent_shift = amdgpu_vm_level_shift(adev, cursor.level - 1);
- if (params->unlocked) {
- /* Unlocked updates are only allowed on the leaves */
- if (amdgpu_vm_pt_descendant(adev, &cursor))
- continue;
- } else if (adev->asic_type < CHIP_VEGA10 &&
- (flags & AMDGPU_PTE_VALID)) {
- /* No huge page support before GMC v9 */
- if (cursor.level != AMDGPU_VM_PTB) {
- if (!amdgpu_vm_pt_descendant(adev, &cursor))
- return -ENOENT;
- continue;
- }
- } else if (frag < shift) {
- /* We can't use this level when the fragment size is
- * smaller than the address shift. Go to the next
- * child entry and try again.
- */
- if (amdgpu_vm_pt_descendant(adev, &cursor))
- continue;
- } else if (frag >= parent_shift) {
- /* If the fragment size is even larger than the parent
- * shift we should go up one level and check it again.
- */
- if (!amdgpu_vm_pt_ancestor(&cursor))
- return -EINVAL;
- continue;
- }
-
- pt = cursor.entry->bo;
- if (!pt) {
- /* We need all PDs and PTs for mapping something, */
- if (flags & AMDGPU_PTE_VALID)
- return -ENOENT;
-
- /* but unmapping something can happen at a higher
- * level.
- */
- if (!amdgpu_vm_pt_ancestor(&cursor))
- return -EINVAL;
-
- pt = cursor.entry->bo;
- shift = parent_shift;
- frag_end = max(frag_end, ALIGN(frag_start + 1,
- 1ULL << shift));
- }
-
- /* Looks good so far, calculate parameters for the update */
- incr = (uint64_t)AMDGPU_GPU_PAGE_SIZE << shift;
- mask = amdgpu_vm_entries_mask(adev, cursor.level);
- pe_start = ((cursor.pfn >> shift) & mask) * 8;
- entry_end = ((uint64_t)mask + 1) << shift;
- entry_end += cursor.pfn & ~(entry_end - 1);
- entry_end = min(entry_end, end);
-
- do {
- struct amdgpu_vm *vm = params->vm;
- uint64_t upd_end = min(entry_end, frag_end);
- unsigned nptes = (upd_end - frag_start) >> shift;
- uint64_t upd_flags = flags | AMDGPU_PTE_FRAG(frag);
-
- /* This can happen when we set higher level PDs to
- * silent to stop fault floods.
- */
- nptes = max(nptes, 1u);
-
- trace_amdgpu_vm_update_ptes(params, frag_start, upd_end,
- min(nptes, 32u), dst, incr, upd_flags,
- vm->task_info.pid,
- vm->immediate.fence_context);
- amdgpu_vm_update_flags(params, to_amdgpu_bo_vm(pt),
- cursor.level, pe_start, dst,
- nptes, incr, upd_flags);
-
- pe_start += nptes * 8;
- dst += nptes * incr;
-
- frag_start = upd_end;
- if (frag_start >= frag_end) {
- /* figure out the next fragment */
- amdgpu_vm_fragment(params, frag_start, end,
- flags, &frag, &frag_end);
- if (frag < shift)
- break;
- }
- } while (frag_start < entry_end);
-
- if (amdgpu_vm_pt_descendant(adev, &cursor)) {
- /* Free all child entries.
- * Update the tables with the flags and addresses and free up subsequent
- * tables in the case of huge pages or freed up areas.
- * This is the maximum you can free, because all other page tables are not
- * completely covered by the range and so potentially still in use.
- */
- while (cursor.pfn < frag_start) {
- /* Make sure previous mapping is freed */
- if (cursor.entry->bo) {
- params->table_freed = true;
- amdgpu_vm_free_pts(adev, params->vm, &cursor);
- }
- amdgpu_vm_pt_next(adev, &cursor);
- }
-
- } else if (frag >= shift) {
- /* or just move on to the next on the same level. */
- amdgpu_vm_pt_next(adev, &cursor);
- }
- }
-
- return 0;
-}
-
-/**
- * amdgpu_vm_bo_update_mapping - update a mapping in the vm page table
- *
- * @adev: amdgpu_device pointer of the VM
- * @bo_adev: amdgpu_device pointer of the mapped BO
- * @vm: requested vm
+ * @adev: amdgpu_device pointer to use for commands
+ * @vm: the VM to update the range
* @immediate: immediate submission in a page fault
* @unlocked: unlocked invalidation during MM callback
+ * @flush_tlb: trigger tlb invalidation after update completed
* @resv: fences we need to sync to
* @start: start of mapped range
* @last: last mapped entry
* @flags: flags for the entries
* @offset: offset into nodes and pages_addr
+ * @vram_base: base for vram mappings
* @res: ttm_resource to map
* @pages_addr: DMA addresses to use for mapping
* @fence: optional resulting fence
- * @table_freed: return true if page table is freed
*
* Fill in the page table entries between @start and @last.
*
* Returns:
- * 0 for success, -EINVAL for failure.
+ * 0 for success, negative erro code for failure.
*/
-int amdgpu_vm_bo_update_mapping(struct amdgpu_device *adev,
- struct amdgpu_device *bo_adev,
- struct amdgpu_vm *vm, bool immediate,
- bool unlocked, struct dma_resv *resv,
- uint64_t start, uint64_t last,
- uint64_t flags, uint64_t offset,
- struct ttm_resource *res,
- dma_addr_t *pages_addr,
- struct dma_fence **fence,
- bool *table_freed)
+int amdgpu_vm_update_range(struct amdgpu_device *adev, struct amdgpu_vm *vm,
+ bool immediate, bool unlocked, bool flush_tlb,
+ struct dma_resv *resv, uint64_t start, uint64_t last,
+ uint64_t flags, uint64_t offset, uint64_t vram_base,
+ struct ttm_resource *res, dma_addr_t *pages_addr,
+ struct dma_fence **fence)
{
struct amdgpu_vm_update_params params;
+ struct amdgpu_vm_tlb_seq_cb *tlb_cb;
struct amdgpu_res_cursor cursor;
enum amdgpu_sync_mode sync_mode;
int r, idx;
@@ -1729,6 +773,18 @@ int amdgpu_vm_bo_update_mapping(struct amdgpu_device *adev,
if (!drm_dev_enter(adev_to_drm(adev), &idx))
return -ENODEV;
+ tlb_cb = kmalloc(sizeof(*tlb_cb), GFP_KERNEL);
+ if (!tlb_cb) {
+ r = -ENOMEM;
+ goto error_unlock;
+ }
+
+ /* Vega20+XGMI where PTEs get inadvertently cached in L2 texture cache,
+ * heavy-weight flush TLB unconditionally.
+ */
+ flush_tlb |= adev->gmc.xgmi.num_physical_nodes &&
+ adev->ip_versions[GC_HWIP][0] == IP_VERSION(9, 4, 0);
+
memset(&params, 0, sizeof(params));
params.adev = adev;
params.vm = vm;
@@ -1747,7 +803,7 @@ int amdgpu_vm_bo_update_mapping(struct amdgpu_device *adev,
amdgpu_vm_eviction_lock(vm);
if (vm->evicting) {
r = -EBUSY;
- goto error_unlock;
+ goto error_free;
}
if (!unlocked && !dma_fence_is_signaled(vm->last_unlocked)) {
@@ -1760,7 +816,7 @@ int amdgpu_vm_bo_update_mapping(struct amdgpu_device *adev,
r = vm->update_funcs->prepare(&params, resv, sync_mode);
if (r)
- goto error_unlock;
+ goto error_free;
amdgpu_res_first(pages_addr ? NULL : res, offset,
(last - start + 1) * AMDGPU_GPU_PAGE_SIZE, &cursor);
@@ -1800,16 +856,15 @@ int amdgpu_vm_bo_update_mapping(struct amdgpu_device *adev,
}
} else if (flags & (AMDGPU_PTE_VALID | AMDGPU_PTE_PRT)) {
- addr = bo_adev->vm_manager.vram_base_offset +
- cursor.start;
+ addr = vram_base + cursor.start;
} else {
addr = 0;
}
tmp = start + num_entries;
- r = amdgpu_vm_update_ptes(&params, start, tmp, addr, flags);
+ r = amdgpu_vm_ptes_update(&params, start, tmp, addr, flags);
if (r)
- goto error_unlock;
+ goto error_free;
amdgpu_res_next(&cursor, num_entries * AMDGPU_GPU_PAGE_SIZE);
start = tmp;
@@ -1817,8 +872,21 @@ int amdgpu_vm_bo_update_mapping(struct amdgpu_device *adev,
r = vm->update_funcs->commit(&params, fence);
- if (table_freed)
- *table_freed = *table_freed || params.table_freed;
+ if (flush_tlb || params.table_freed) {
+ tlb_cb->vm = vm;
+ if (fence && *fence &&
+ !dma_fence_add_callback(*fence, &tlb_cb->cb,
+ amdgpu_vm_tlb_seq_cb)) {
+ dma_fence_put(vm->last_tlb_flush);
+ vm->last_tlb_flush = dma_fence_get(*fence);
+ } else {
+ amdgpu_vm_tlb_seq_cb(NULL, &tlb_cb->cb);
+ }
+ tlb_cb = NULL;
+ }
+
+error_free:
+ kfree(tlb_cb);
error_unlock:
amdgpu_vm_eviction_unlock(vm);
@@ -1876,7 +944,6 @@ void amdgpu_vm_get_memory(struct amdgpu_vm *vm, uint64_t *vram_mem,
* @adev: amdgpu_device pointer
* @bo_va: requested BO and VM object
* @clear: if true clear the entries
- * @table_freed: return true if page table is freed
*
* Fill in the page table entries for @bo_va.
*
@@ -1884,7 +951,7 @@ void amdgpu_vm_get_memory(struct amdgpu_vm *vm, uint64_t *vram_mem,
* 0 for success, -EINVAL for failure.
*/
int amdgpu_vm_bo_update(struct amdgpu_device *adev, struct amdgpu_bo_va *bo_va,
- bool clear, bool *table_freed)
+ bool clear)
{
struct amdgpu_bo *bo = bo_va->base.bo;
struct amdgpu_vm *vm = bo_va->base.vm;
@@ -1892,9 +959,10 @@ int amdgpu_vm_bo_update(struct amdgpu_device *adev, struct amdgpu_bo_va *bo_va,
dma_addr_t *pages_addr = NULL;
struct ttm_resource *mem;
struct dma_fence **last_update;
+ bool flush_tlb = clear;
struct dma_resv *resv;
+ uint64_t vram_base;
uint64_t flags;
- struct amdgpu_device *bo_adev = adev;
int r;
if (clear || !bo) {
@@ -1919,14 +987,18 @@ int amdgpu_vm_bo_update(struct amdgpu_device *adev, struct amdgpu_bo_va *bo_va,
}
if (bo) {
+ struct amdgpu_device *bo_adev;
+
flags = amdgpu_ttm_tt_pte_flags(adev, bo->tbo.ttm, mem);
if (amdgpu_bo_encrypted(bo))
flags |= AMDGPU_PTE_TMZ;
bo_adev = amdgpu_ttm_adev(bo->tbo.bdev);
+ vram_base = bo_adev->vm_manager.vram_base_offset;
} else {
flags = 0x0;
+ vram_base = 0;
}
if (clear || (bo && bo->tbo.base.resv ==
@@ -1936,7 +1008,7 @@ int amdgpu_vm_bo_update(struct amdgpu_device *adev, struct amdgpu_bo_va *bo_va,
last_update = &bo_va->last_pt_update;
if (!clear && bo_va->base.moved) {
- bo_va->base.moved = false;
+ flush_tlb = true;
list_splice_init(&bo_va->valids, &bo_va->invalids);
} else if (bo_va->cleared != clear) {
@@ -1959,11 +1031,11 @@ int amdgpu_vm_bo_update(struct amdgpu_device *adev, struct amdgpu_bo_va *bo_va,
trace_amdgpu_vm_bo_update(mapping);
- r = amdgpu_vm_bo_update_mapping(adev, bo_adev, vm, false, false,
- resv, mapping->start,
- mapping->last, update_flags,
- mapping->offset, mem,
- pages_addr, last_update, table_freed);
+ r = amdgpu_vm_update_range(adev, vm, false, false, flush_tlb,
+ resv, mapping->start, mapping->last,
+ update_flags, mapping->offset,
+ vram_base, mem, pages_addr,
+ last_update);
if (r)
return r;
}
@@ -1986,6 +1058,7 @@ int amdgpu_vm_bo_update(struct amdgpu_device *adev, struct amdgpu_bo_va *bo_va,
list_splice_init(&bo_va->invalids, &bo_va->valids);
bo_va->cleared = clear;
+ bo_va->base.moved = false;
if (trace_amdgpu_vm_bo_mapping_enabled()) {
list_for_each_entry(mapping, &bo_va->valids, list)
@@ -2113,7 +1186,7 @@ static void amdgpu_vm_prt_fini(struct amdgpu_device *adev, struct amdgpu_vm *vm)
struct dma_resv_iter cursor;
struct dma_fence *fence;
- dma_resv_for_each_fence(&cursor, resv, true, fence) {
+ dma_resv_for_each_fence(&cursor, resv, DMA_RESV_USAGE_BOOKKEEP, fence) {
/* Add a callback for each fence in the reservation object */
amdgpu_vm_prt_get(adev);
amdgpu_vm_add_prt_cb(adev, fence);
@@ -2154,10 +1227,10 @@ int amdgpu_vm_clear_freed(struct amdgpu_device *adev,
mapping->start < AMDGPU_GMC_HOLE_START)
init_pte_value = AMDGPU_PTE_DEFAULT_ATC;
- r = amdgpu_vm_bo_update_mapping(adev, adev, vm, false, false,
- resv, mapping->start,
- mapping->last, init_pte_value,
- 0, NULL, NULL, &f, NULL);
+ r = amdgpu_vm_update_range(adev, vm, false, false, true, resv,
+ mapping->start, mapping->last,
+ init_pte_value, 0, 0, NULL, NULL,
+ &f);
amdgpu_vm_free_mapping(adev, vm, mapping, f);
if (r) {
dma_fence_put(f);
@@ -2199,7 +1272,7 @@ int amdgpu_vm_handle_moved(struct amdgpu_device *adev,
list_for_each_entry_safe(bo_va, tmp, &vm->moved, base.vm_status) {
/* Per VM BOs never need to bo cleared in the page tables */
- r = amdgpu_vm_bo_update(adev, bo_va, false, NULL);
+ r = amdgpu_vm_bo_update(adev, bo_va, false);
if (r)
return r;
}
@@ -2218,7 +1291,7 @@ int amdgpu_vm_handle_moved(struct amdgpu_device *adev,
else
clear = true;
- r = amdgpu_vm_bo_update(adev, bo_va, clear, NULL);
+ r = amdgpu_vm_bo_update(adev, bo_va, clear);
if (r)
return r;
@@ -2665,7 +1738,7 @@ void amdgpu_vm_bo_del(struct amdgpu_device *adev,
if (bo) {
dma_resv_assert_held(bo->tbo.base.resv);
if (bo->tbo.base.resv == vm->root.bo->tbo.base.resv)
- vm->bulk_moveable = false;
+ ttm_bo_set_bulk_move(&bo->tbo, NULL);
for (base = &bo_va->base.bo->vm_bo; *base;
base = &(*base)->next) {
@@ -2719,7 +1792,7 @@ bool amdgpu_vm_evictable(struct amdgpu_bo *bo)
return true;
/* Don't evict VM page tables while they are busy */
- if (!dma_resv_test_signaled(bo->tbo.base.resv, true))
+ if (!dma_resv_test_signaled(bo->tbo.base.resv, DMA_RESV_USAGE_BOOKKEEP))
return false;
/* Try to block ongoing updates */
@@ -2899,7 +1972,8 @@ void amdgpu_vm_adjust_size(struct amdgpu_device *adev, uint32_t min_vm_size,
*/
long amdgpu_vm_wait_idle(struct amdgpu_vm *vm, long timeout)
{
- timeout = dma_resv_wait_timeout(vm->root.bo->tbo.base.resv, true,
+ timeout = dma_resv_wait_timeout(vm->root.bo->tbo.base.resv,
+ DMA_RESV_USAGE_BOOKKEEP,
true, timeout);
if (timeout <= 0)
return timeout;
@@ -2967,6 +2041,7 @@ int amdgpu_vm_init(struct amdgpu_device *adev, struct amdgpu_vm *vm)
vm->update_funcs = &amdgpu_vm_sdma_funcs;
vm->last_update = NULL;
vm->last_unlocked = dma_fence_get_stub();
+ vm->last_tlb_flush = dma_fence_get_stub();
mutex_init(&vm->eviction_lock);
vm->evicting = false;
@@ -2980,13 +2055,13 @@ int amdgpu_vm_init(struct amdgpu_device *adev, struct amdgpu_vm *vm)
if (r)
goto error_free_root;
- r = dma_resv_reserve_shared(root_bo->tbo.base.resv, 1);
+ r = dma_resv_reserve_fences(root_bo->tbo.base.resv, 1);
if (r)
goto error_unreserve;
amdgpu_vm_bo_base_init(&vm->root, vm, root_bo);
- r = amdgpu_vm_clear_bo(adev, vm, root, false);
+ r = amdgpu_vm_pt_clear(adev, vm, root, false);
if (r)
goto error_unreserve;
@@ -3005,6 +2080,7 @@ error_free_root:
vm->root.bo = NULL;
error_free_delayed:
+ dma_fence_put(vm->last_tlb_flush);
dma_fence_put(vm->last_unlocked);
drm_sched_entity_destroy(&vm->delayed);
@@ -3015,34 +2091,6 @@ error_free_immediate:
}
/**
- * amdgpu_vm_check_clean_reserved - check if a VM is clean
- *
- * @adev: amdgpu_device pointer
- * @vm: the VM to check
- *
- * check all entries of the root PD, if any subsequent PDs are allocated,
- * it means there are page table creating and filling, and is no a clean
- * VM
- *
- * Returns:
- * 0 if this VM is clean
- */
-static int amdgpu_vm_check_clean_reserved(struct amdgpu_device *adev,
- struct amdgpu_vm *vm)
-{
- enum amdgpu_vm_level root = adev->vm_manager.root_level;
- unsigned int entries = amdgpu_vm_num_entries(adev, root);
- unsigned int i = 0;
-
- for (i = 0; i < entries; i++) {
- if (to_amdgpu_bo_vm(vm->root.bo)->entries[i].bo)
- return -EINVAL;
- }
-
- return 0;
-}
-
-/**
* amdgpu_vm_make_compute - Turn a GFX VM into a compute VM
*
* @adev: amdgpu_device pointer
@@ -3071,17 +2119,17 @@ int amdgpu_vm_make_compute(struct amdgpu_device *adev, struct amdgpu_vm *vm)
return r;
/* Sanity checks */
- r = amdgpu_vm_check_clean_reserved(adev, vm);
- if (r)
+ if (!amdgpu_vm_pt_is_root_clean(adev, vm)) {
+ r = -EINVAL;
goto unreserve_bo;
+ }
/* Check if PD needs to be reinitialized and do it before
* changing any other state, in case it fails.
*/
if (pte_support_ats != vm->pte_support_ats) {
vm->pte_support_ats = pte_support_ats;
- r = amdgpu_vm_clear_bo(adev, vm,
- to_amdgpu_bo_vm(vm->root.bo),
+ r = amdgpu_vm_pt_clear(adev, vm, to_amdgpu_bo_vm(vm->root.bo),
false);
if (r)
goto unreserve_bo;
@@ -3149,6 +2197,7 @@ void amdgpu_vm_fini(struct amdgpu_device *adev, struct amdgpu_vm *vm)
struct amdgpu_bo_va_mapping *mapping, *tmp;
bool prt_fini_needed = !!adev->gmc.gmc_funcs->set_prt;
struct amdgpu_bo *root;
+ unsigned long flags;
int i;
amdgpu_amdkfd_gpuvm_destroy_cb(adev, vm);
@@ -3158,6 +2207,11 @@ void amdgpu_vm_fini(struct amdgpu_device *adev, struct amdgpu_vm *vm)
amdgpu_vm_set_pasid(adev, vm, 0);
dma_fence_wait(vm->last_unlocked, false);
dma_fence_put(vm->last_unlocked);
+ dma_fence_wait(vm->last_tlb_flush, false);
+ /* Make sure that all fence callbacks have completed */
+ spin_lock_irqsave(vm->last_tlb_flush->lock, flags);
+ spin_unlock_irqrestore(vm->last_tlb_flush->lock, flags);
+ dma_fence_put(vm->last_tlb_flush);
list_for_each_entry_safe(mapping, tmp, &vm->freed, list) {
if (mapping->flags & AMDGPU_PTE_PRT && prt_fini_needed) {
@@ -3169,7 +2223,7 @@ void amdgpu_vm_fini(struct amdgpu_device *adev, struct amdgpu_vm *vm)
amdgpu_vm_free_mapping(adev, vm, mapping, NULL);
}
- amdgpu_vm_free_pts(adev, vm, NULL);
+ amdgpu_vm_pt_free_root(adev, vm);
amdgpu_bo_unreserve(root);
amdgpu_bo_unref(&root);
WARN_ON(vm->root.bo);
@@ -3423,15 +2477,14 @@ bool amdgpu_vm_handle_fault(struct amdgpu_device *adev, u32 pasid,
value = 0;
}
- r = dma_resv_reserve_shared(root->tbo.base.resv, 1);
+ r = dma_resv_reserve_fences(root->tbo.base.resv, 1);
if (r) {
pr_debug("failed %d to reserve fence slot\n", r);
goto error_unlock;
}
- r = amdgpu_vm_bo_update_mapping(adev, adev, vm, true, false, NULL, addr,
- addr, flags, value, NULL, NULL, NULL,
- NULL);
+ r = amdgpu_vm_update_range(adev, vm, true, false, false, NULL, addr,
+ addr, flags, value, 0, NULL, NULL, NULL);
if (r)
goto error_unlock;
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
index a40a6a993bb0..9ecb7f663e19 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
@@ -284,6 +284,10 @@ struct amdgpu_vm {
struct drm_sched_entity immediate;
struct drm_sched_entity delayed;
+ /* Last finished delayed update */
+ atomic64_t tlb_seq;
+ struct dma_fence *last_tlb_flush;
+
/* Last unlocked submission to the scheduler entities */
struct dma_fence *last_unlocked;
@@ -317,8 +321,6 @@ struct amdgpu_vm {
/* Store positions of group of BOs */
struct ttm_lru_bulk_move lru_bulk_move;
- /* mark whether can do the bulk move */
- bool bulk_moveable;
/* Flag to indicate if VM is used for compute */
bool is_compute_context;
};
@@ -397,18 +399,17 @@ int amdgpu_vm_clear_freed(struct amdgpu_device *adev,
struct dma_fence **fence);
int amdgpu_vm_handle_moved(struct amdgpu_device *adev,
struct amdgpu_vm *vm);
-int amdgpu_vm_bo_update_mapping(struct amdgpu_device *adev,
- struct amdgpu_device *bo_adev,
- struct amdgpu_vm *vm, bool immediate,
- bool unlocked, struct dma_resv *resv,
- uint64_t start, uint64_t last,
- uint64_t flags, uint64_t offset,
- struct ttm_resource *res,
- dma_addr_t *pages_addr,
- struct dma_fence **fence, bool *free_table);
+void amdgpu_vm_bo_base_init(struct amdgpu_vm_bo_base *base,
+ struct amdgpu_vm *vm, struct amdgpu_bo *bo);
+int amdgpu_vm_update_range(struct amdgpu_device *adev, struct amdgpu_vm *vm,
+ bool immediate, bool unlocked, bool flush_tlb,
+ struct dma_resv *resv, uint64_t start, uint64_t last,
+ uint64_t flags, uint64_t offset, uint64_t vram_base,
+ struct ttm_resource *res, dma_addr_t *pages_addr,
+ struct dma_fence **fence);
int amdgpu_vm_bo_update(struct amdgpu_device *adev,
struct amdgpu_bo_va *bo_va,
- bool clear, bool *table_freed);
+ bool clear);
bool amdgpu_vm_evictable(struct amdgpu_bo *bo);
void amdgpu_vm_bo_invalidate(struct amdgpu_device *adev,
struct amdgpu_bo *bo, bool evicted);
@@ -454,12 +455,37 @@ void amdgpu_vm_set_task_info(struct amdgpu_vm *vm);
void amdgpu_vm_move_to_lru_tail(struct amdgpu_device *adev,
struct amdgpu_vm *vm);
-void amdgpu_vm_del_from_lru_notify(struct ttm_buffer_object *bo);
void amdgpu_vm_get_memory(struct amdgpu_vm *vm, uint64_t *vram_mem,
uint64_t *gtt_mem, uint64_t *cpu_mem);
+int amdgpu_vm_pt_clear(struct amdgpu_device *adev, struct amdgpu_vm *vm,
+ struct amdgpu_bo_vm *vmbo, bool immediate);
+int amdgpu_vm_pt_create(struct amdgpu_device *adev, struct amdgpu_vm *vm,
+ int level, bool immediate, struct amdgpu_bo_vm **vmbo);
+void amdgpu_vm_pt_free_root(struct amdgpu_device *adev, struct amdgpu_vm *vm);
+bool amdgpu_vm_pt_is_root_clean(struct amdgpu_device *adev,
+ struct amdgpu_vm *vm);
+
+int amdgpu_vm_pde_update(struct amdgpu_vm_update_params *params,
+ struct amdgpu_vm_bo_base *entry);
+int amdgpu_vm_ptes_update(struct amdgpu_vm_update_params *params,
+ uint64_t start, uint64_t end,
+ uint64_t dst, uint64_t flags);
+
#if defined(CONFIG_DEBUG_FS)
void amdgpu_debugfs_vm_bo_info(struct amdgpu_vm *vm, struct seq_file *m);
#endif
+/**
+ * amdgpu_vm_tlb_seq - return tlb flush sequence number
+ * @vm: the amdgpu_vm structure to query
+ *
+ * Returns the tlb flush sequence number which indicates that the VM TLBs needs
+ * to be invalidated whenever the sequence number change.
+ */
+static inline uint64_t amdgpu_vm_tlb_seq(struct amdgpu_vm *vm)
+{
+ return atomic64_read(&vm->tlb_seq);
+}
+
#endif
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm_cpu.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm_cpu.c
index e3fbf0f10add..31913ae86de6 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm_cpu.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm_cpu.c
@@ -74,13 +74,12 @@ static int amdgpu_vm_cpu_update(struct amdgpu_vm_update_params *p,
{
unsigned int i;
uint64_t value;
- int r;
+ long r;
- if (vmbo->bo.tbo.moving) {
- r = dma_fence_wait(vmbo->bo.tbo.moving, true);
- if (r)
- return r;
- }
+ r = dma_resv_wait_timeout(vmbo->bo.tbo.base.resv, DMA_RESV_USAGE_KERNEL,
+ true, MAX_SCHEDULE_TIMEOUT);
+ if (r < 0)
+ return r;
pe += (unsigned long)amdgpu_bo_kptr(&vmbo->bo);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm_pt.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm_pt.c
new file mode 100644
index 000000000000..88de9f0d4728
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm_pt.c
@@ -0,0 +1,981 @@
+// SPDX-License-Identifier: GPL-2.0 OR MIT
+/*
+ * Copyright 2022 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+#include <drm/drm_drv.h>
+
+#include "amdgpu.h"
+#include "amdgpu_trace.h"
+#include "amdgpu_vm.h"
+
+/*
+ * amdgpu_vm_pt_cursor - state for for_each_amdgpu_vm_pt
+ */
+struct amdgpu_vm_pt_cursor {
+ uint64_t pfn;
+ struct amdgpu_vm_bo_base *parent;
+ struct amdgpu_vm_bo_base *entry;
+ unsigned int level;
+};
+
+/**
+ * amdgpu_vm_pt_level_shift - return the addr shift for each level
+ *
+ * @adev: amdgpu_device pointer
+ * @level: VMPT level
+ *
+ * Returns:
+ * The number of bits the pfn needs to be right shifted for a level.
+ */
+static unsigned int amdgpu_vm_pt_level_shift(struct amdgpu_device *adev,
+ unsigned int level)
+{
+ switch (level) {
+ case AMDGPU_VM_PDB2:
+ case AMDGPU_VM_PDB1:
+ case AMDGPU_VM_PDB0:
+ return 9 * (AMDGPU_VM_PDB0 - level) +
+ adev->vm_manager.block_size;
+ case AMDGPU_VM_PTB:
+ return 0;
+ default:
+ return ~0;
+ }
+}
+
+/**
+ * amdgpu_vm_pt_num_entries - return the number of entries in a PD/PT
+ *
+ * @adev: amdgpu_device pointer
+ * @level: VMPT level
+ *
+ * Returns:
+ * The number of entries in a page directory or page table.
+ */
+static unsigned int amdgpu_vm_pt_num_entries(struct amdgpu_device *adev,
+ unsigned int level)
+{
+ unsigned int shift;
+
+ shift = amdgpu_vm_pt_level_shift(adev, adev->vm_manager.root_level);
+ if (level == adev->vm_manager.root_level)
+ /* For the root directory */
+ return round_up(adev->vm_manager.max_pfn, 1ULL << shift)
+ >> shift;
+ else if (level != AMDGPU_VM_PTB)
+ /* Everything in between */
+ return 512;
+
+ /* For the page tables on the leaves */
+ return AMDGPU_VM_PTE_COUNT(adev);
+}
+
+/**
+ * amdgpu_vm_pt_num_ats_entries - return the number of ATS entries in the root PD
+ *
+ * @adev: amdgpu_device pointer
+ *
+ * Returns:
+ * The number of entries in the root page directory which needs the ATS setting.
+ */
+static unsigned int amdgpu_vm_pt_num_ats_entries(struct amdgpu_device *adev)
+{
+ unsigned int shift;
+
+ shift = amdgpu_vm_pt_level_shift(adev, adev->vm_manager.root_level);
+ return AMDGPU_GMC_HOLE_START >> (shift + AMDGPU_GPU_PAGE_SHIFT);
+}
+
+/**
+ * amdgpu_vm_pt_entries_mask - the mask to get the entry number of a PD/PT
+ *
+ * @adev: amdgpu_device pointer
+ * @level: VMPT level
+ *
+ * Returns:
+ * The mask to extract the entry number of a PD/PT from an address.
+ */
+static uint32_t amdgpu_vm_pt_entries_mask(struct amdgpu_device *adev,
+ unsigned int level)
+{
+ if (level <= adev->vm_manager.root_level)
+ return 0xffffffff;
+ else if (level != AMDGPU_VM_PTB)
+ return 0x1ff;
+ else
+ return AMDGPU_VM_PTE_COUNT(adev) - 1;
+}
+
+/**
+ * amdgpu_vm_pt_size - returns the size of the page table in bytes
+ *
+ * @adev: amdgpu_device pointer
+ * @level: VMPT level
+ *
+ * Returns:
+ * The size of the BO for a page directory or page table in bytes.
+ */
+static unsigned int amdgpu_vm_pt_size(struct amdgpu_device *adev,
+ unsigned int level)
+{
+ return AMDGPU_GPU_PAGE_ALIGN(amdgpu_vm_pt_num_entries(adev, level) * 8);
+}
+
+/**
+ * amdgpu_vm_pt_parent - get the parent page directory
+ *
+ * @pt: child page table
+ *
+ * Helper to get the parent entry for the child page table. NULL if we are at
+ * the root page directory.
+ */
+static struct amdgpu_vm_bo_base *
+amdgpu_vm_pt_parent(struct amdgpu_vm_bo_base *pt)
+{
+ struct amdgpu_bo *parent = pt->bo->parent;
+
+ if (!parent)
+ return NULL;
+
+ return parent->vm_bo;
+}
+
+/**
+ * amdgpu_vm_pt_start - start PD/PT walk
+ *
+ * @adev: amdgpu_device pointer
+ * @vm: amdgpu_vm structure
+ * @start: start address of the walk
+ * @cursor: state to initialize
+ *
+ * Initialize a amdgpu_vm_pt_cursor to start a walk.
+ */
+static void amdgpu_vm_pt_start(struct amdgpu_device *adev,
+ struct amdgpu_vm *vm, uint64_t start,
+ struct amdgpu_vm_pt_cursor *cursor)
+{
+ cursor->pfn = start;
+ cursor->parent = NULL;
+ cursor->entry = &vm->root;
+ cursor->level = adev->vm_manager.root_level;
+}
+
+/**
+ * amdgpu_vm_pt_descendant - go to child node
+ *
+ * @adev: amdgpu_device pointer
+ * @cursor: current state
+ *
+ * Walk to the child node of the current node.
+ * Returns:
+ * True if the walk was possible, false otherwise.
+ */
+static bool amdgpu_vm_pt_descendant(struct amdgpu_device *adev,
+ struct amdgpu_vm_pt_cursor *cursor)
+{
+ unsigned int mask, shift, idx;
+
+ if ((cursor->level == AMDGPU_VM_PTB) || !cursor->entry ||
+ !cursor->entry->bo)
+ return false;
+
+ mask = amdgpu_vm_pt_entries_mask(adev, cursor->level);
+ shift = amdgpu_vm_pt_level_shift(adev, cursor->level);
+
+ ++cursor->level;
+ idx = (cursor->pfn >> shift) & mask;
+ cursor->parent = cursor->entry;
+ cursor->entry = &to_amdgpu_bo_vm(cursor->entry->bo)->entries[idx];
+ return true;
+}
+
+/**
+ * amdgpu_vm_pt_sibling - go to sibling node
+ *
+ * @adev: amdgpu_device pointer
+ * @cursor: current state
+ *
+ * Walk to the sibling node of the current node.
+ * Returns:
+ * True if the walk was possible, false otherwise.
+ */
+static bool amdgpu_vm_pt_sibling(struct amdgpu_device *adev,
+ struct amdgpu_vm_pt_cursor *cursor)
+{
+
+ unsigned int shift, num_entries;
+ struct amdgpu_bo_vm *parent;
+
+ /* Root doesn't have a sibling */
+ if (!cursor->parent)
+ return false;
+
+ /* Go to our parents and see if we got a sibling */
+ shift = amdgpu_vm_pt_level_shift(adev, cursor->level - 1);
+ num_entries = amdgpu_vm_pt_num_entries(adev, cursor->level - 1);
+ parent = to_amdgpu_bo_vm(cursor->parent->bo);
+
+ if (cursor->entry == &parent->entries[num_entries - 1])
+ return false;
+
+ cursor->pfn += 1ULL << shift;
+ cursor->pfn &= ~((1ULL << shift) - 1);
+ ++cursor->entry;
+ return true;
+}
+
+/**
+ * amdgpu_vm_pt_ancestor - go to parent node
+ *
+ * @cursor: current state
+ *
+ * Walk to the parent node of the current node.
+ * Returns:
+ * True if the walk was possible, false otherwise.
+ */
+static bool amdgpu_vm_pt_ancestor(struct amdgpu_vm_pt_cursor *cursor)
+{
+ if (!cursor->parent)
+ return false;
+
+ --cursor->level;
+ cursor->entry = cursor->parent;
+ cursor->parent = amdgpu_vm_pt_parent(cursor->parent);
+ return true;
+}
+
+/**
+ * amdgpu_vm_pt_next - get next PD/PT in hieratchy
+ *
+ * @adev: amdgpu_device pointer
+ * @cursor: current state
+ *
+ * Walk the PD/PT tree to the next node.
+ */
+static void amdgpu_vm_pt_next(struct amdgpu_device *adev,
+ struct amdgpu_vm_pt_cursor *cursor)
+{
+ /* First try a newborn child */
+ if (amdgpu_vm_pt_descendant(adev, cursor))
+ return;
+
+ /* If that didn't worked try to find a sibling */
+ while (!amdgpu_vm_pt_sibling(adev, cursor)) {
+ /* No sibling, go to our parents and grandparents */
+ if (!amdgpu_vm_pt_ancestor(cursor)) {
+ cursor->pfn = ~0ll;
+ return;
+ }
+ }
+}
+
+/**
+ * amdgpu_vm_pt_first_dfs - start a deep first search
+ *
+ * @adev: amdgpu_device structure
+ * @vm: amdgpu_vm structure
+ * @start: optional cursor to start with
+ * @cursor: state to initialize
+ *
+ * Starts a deep first traversal of the PD/PT tree.
+ */
+static void amdgpu_vm_pt_first_dfs(struct amdgpu_device *adev,
+ struct amdgpu_vm *vm,
+ struct amdgpu_vm_pt_cursor *start,
+ struct amdgpu_vm_pt_cursor *cursor)
+{
+ if (start)
+ *cursor = *start;
+ else
+ amdgpu_vm_pt_start(adev, vm, 0, cursor);
+
+ while (amdgpu_vm_pt_descendant(adev, cursor))
+ ;
+}
+
+/**
+ * amdgpu_vm_pt_continue_dfs - check if the deep first search should continue
+ *
+ * @start: starting point for the search
+ * @entry: current entry
+ *
+ * Returns:
+ * True when the search should continue, false otherwise.
+ */
+static bool amdgpu_vm_pt_continue_dfs(struct amdgpu_vm_pt_cursor *start,
+ struct amdgpu_vm_bo_base *entry)
+{
+ return entry && (!start || entry != start->entry);
+}
+
+/**
+ * amdgpu_vm_pt_next_dfs - get the next node for a deep first search
+ *
+ * @adev: amdgpu_device structure
+ * @cursor: current state
+ *
+ * Move the cursor to the next node in a deep first search.
+ */
+static void amdgpu_vm_pt_next_dfs(struct amdgpu_device *adev,
+ struct amdgpu_vm_pt_cursor *cursor)
+{
+ if (!cursor->entry)
+ return;
+
+ if (!cursor->parent)
+ cursor->entry = NULL;
+ else if (amdgpu_vm_pt_sibling(adev, cursor))
+ while (amdgpu_vm_pt_descendant(adev, cursor))
+ ;
+ else
+ amdgpu_vm_pt_ancestor(cursor);
+}
+
+/*
+ * for_each_amdgpu_vm_pt_dfs_safe - safe deep first search of all PDs/PTs
+ */
+#define for_each_amdgpu_vm_pt_dfs_safe(adev, vm, start, cursor, entry) \
+ for (amdgpu_vm_pt_first_dfs((adev), (vm), (start), &(cursor)), \
+ (entry) = (cursor).entry, amdgpu_vm_pt_next_dfs((adev), &(cursor));\
+ amdgpu_vm_pt_continue_dfs((start), (entry)); \
+ (entry) = (cursor).entry, amdgpu_vm_pt_next_dfs((adev), &(cursor)))
+
+/**
+ * amdgpu_vm_pt_clear - initially clear the PDs/PTs
+ *
+ * @adev: amdgpu_device pointer
+ * @vm: VM to clear BO from
+ * @vmbo: BO to clear
+ * @immediate: use an immediate update
+ *
+ * Root PD needs to be reserved when calling this.
+ *
+ * Returns:
+ * 0 on success, errno otherwise.
+ */
+int amdgpu_vm_pt_clear(struct amdgpu_device *adev, struct amdgpu_vm *vm,
+ struct amdgpu_bo_vm *vmbo, bool immediate)
+{
+ unsigned int level = adev->vm_manager.root_level;
+ struct ttm_operation_ctx ctx = { true, false };
+ struct amdgpu_vm_update_params params;
+ struct amdgpu_bo *ancestor = &vmbo->bo;
+ unsigned int entries, ats_entries;
+ struct amdgpu_bo *bo = &vmbo->bo;
+ uint64_t addr;
+ int r, idx;
+
+ /* Figure out our place in the hierarchy */
+ if (ancestor->parent) {
+ ++level;
+ while (ancestor->parent->parent) {
+ ++level;
+ ancestor = ancestor->parent;
+ }
+ }
+
+ entries = amdgpu_bo_size(bo) / 8;
+ if (!vm->pte_support_ats) {
+ ats_entries = 0;
+
+ } else if (!bo->parent) {
+ ats_entries = amdgpu_vm_pt_num_ats_entries(adev);
+ ats_entries = min(ats_entries, entries);
+ entries -= ats_entries;
+
+ } else {
+ struct amdgpu_vm_bo_base *pt;
+
+ pt = ancestor->vm_bo;
+ ats_entries = amdgpu_vm_pt_num_ats_entries(adev);
+ if ((pt - to_amdgpu_bo_vm(vm->root.bo)->entries) >=
+ ats_entries) {
+ ats_entries = 0;
+ } else {
+ ats_entries = entries;
+ entries = 0;
+ }
+ }
+
+ r = ttm_bo_validate(&bo->tbo, &bo->placement, &ctx);
+ if (r)
+ return r;
+
+ if (vmbo->shadow) {
+ struct amdgpu_bo *shadow = vmbo->shadow;
+
+ r = ttm_bo_validate(&shadow->tbo, &shadow->placement, &ctx);
+ if (r)
+ return r;
+ }
+
+ if (!drm_dev_enter(adev_to_drm(adev), &idx))
+ return -ENODEV;
+
+ r = vm->update_funcs->map_table(vmbo);
+ if (r)
+ goto exit;
+
+ memset(&params, 0, sizeof(params));
+ params.adev = adev;
+ params.vm = vm;
+ params.immediate = immediate;
+
+ r = vm->update_funcs->prepare(&params, NULL, AMDGPU_SYNC_EXPLICIT);
+ if (r)
+ goto exit;
+
+ addr = 0;
+ if (ats_entries) {
+ uint64_t value = 0, flags;
+
+ flags = AMDGPU_PTE_DEFAULT_ATC;
+ if (level != AMDGPU_VM_PTB) {
+ /* Handle leaf PDEs as PTEs */
+ flags |= AMDGPU_PDE_PTE;
+ amdgpu_gmc_get_vm_pde(adev, level, &value, &flags);
+ }
+
+ r = vm->update_funcs->update(&params, vmbo, addr, 0,
+ ats_entries, value, flags);
+ if (r)
+ goto exit;
+
+ addr += ats_entries * 8;
+ }
+
+ if (entries) {
+ uint64_t value = 0, flags = 0;
+
+ if (adev->asic_type >= CHIP_VEGA10) {
+ if (level != AMDGPU_VM_PTB) {
+ /* Handle leaf PDEs as PTEs */
+ flags |= AMDGPU_PDE_PTE;
+ amdgpu_gmc_get_vm_pde(adev, level,
+ &value, &flags);
+ } else {
+ /* Workaround for fault priority problem on GMC9 */
+ flags = AMDGPU_PTE_EXECUTABLE;
+ }
+ }
+
+ r = vm->update_funcs->update(&params, vmbo, addr, 0, entries,
+ value, flags);
+ if (r)
+ goto exit;
+ }
+
+ r = vm->update_funcs->commit(&params, NULL);
+exit:
+ drm_dev_exit(idx);
+ return r;
+}
+
+/**
+ * amdgpu_vm_pt_create - create bo for PD/PT
+ *
+ * @adev: amdgpu_device pointer
+ * @vm: requesting vm
+ * @level: the page table level
+ * @immediate: use a immediate update
+ * @vmbo: pointer to the buffer object pointer
+ */
+int amdgpu_vm_pt_create(struct amdgpu_device *adev, struct amdgpu_vm *vm,
+ int level, bool immediate, struct amdgpu_bo_vm **vmbo)
+{
+ struct amdgpu_bo_param bp;
+ struct amdgpu_bo *bo;
+ struct dma_resv *resv;
+ unsigned int num_entries;
+ int r;
+
+ memset(&bp, 0, sizeof(bp));
+
+ bp.size = amdgpu_vm_pt_size(adev, level);
+ bp.byte_align = AMDGPU_GPU_PAGE_SIZE;
+ bp.domain = AMDGPU_GEM_DOMAIN_VRAM;
+ bp.domain = amdgpu_bo_get_preferred_domain(adev, bp.domain);
+ bp.flags = AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS |
+ AMDGPU_GEM_CREATE_CPU_GTT_USWC;
+
+ if (level < AMDGPU_VM_PTB)
+ num_entries = amdgpu_vm_pt_num_entries(adev, level);
+ else
+ num_entries = 0;
+
+ bp.bo_ptr_size = struct_size((*vmbo), entries, num_entries);
+
+ if (vm->use_cpu_for_update)
+ bp.flags |= AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED;
+
+ bp.type = ttm_bo_type_kernel;
+ bp.no_wait_gpu = immediate;
+ if (vm->root.bo)
+ bp.resv = vm->root.bo->tbo.base.resv;
+
+ r = amdgpu_bo_create_vm(adev, &bp, vmbo);
+ if (r)
+ return r;
+
+ bo = &(*vmbo)->bo;
+ if (vm->is_compute_context || (adev->flags & AMD_IS_APU)) {
+ (*vmbo)->shadow = NULL;
+ return 0;
+ }
+
+ if (!bp.resv)
+ WARN_ON(dma_resv_lock(bo->tbo.base.resv,
+ NULL));
+ resv = bp.resv;
+ memset(&bp, 0, sizeof(bp));
+ bp.size = amdgpu_vm_pt_size(adev, level);
+ bp.domain = AMDGPU_GEM_DOMAIN_GTT;
+ bp.flags = AMDGPU_GEM_CREATE_CPU_GTT_USWC;
+ bp.type = ttm_bo_type_kernel;
+ bp.resv = bo->tbo.base.resv;
+ bp.bo_ptr_size = sizeof(struct amdgpu_bo);
+
+ r = amdgpu_bo_create(adev, &bp, &(*vmbo)->shadow);
+
+ if (!resv)
+ dma_resv_unlock(bo->tbo.base.resv);
+
+ if (r) {
+ amdgpu_bo_unref(&bo);
+ return r;
+ }
+
+ (*vmbo)->shadow->parent = amdgpu_bo_ref(bo);
+ amdgpu_bo_add_to_shadow_list(*vmbo);
+
+ return 0;
+}
+
+/**
+ * amdgpu_vm_pt_alloc - Allocate a specific page table
+ *
+ * @adev: amdgpu_device pointer
+ * @vm: VM to allocate page tables for
+ * @cursor: Which page table to allocate
+ * @immediate: use an immediate update
+ *
+ * Make sure a specific page table or directory is allocated.
+ *
+ * Returns:
+ * 1 if page table needed to be allocated, 0 if page table was already
+ * allocated, negative errno if an error occurred.
+ */
+static int amdgpu_vm_pt_alloc(struct amdgpu_device *adev,
+ struct amdgpu_vm *vm,
+ struct amdgpu_vm_pt_cursor *cursor,
+ bool immediate)
+{
+ struct amdgpu_vm_bo_base *entry = cursor->entry;
+ struct amdgpu_bo *pt_bo;
+ struct amdgpu_bo_vm *pt;
+ int r;
+
+ if (entry->bo)
+ return 0;
+
+ r = amdgpu_vm_pt_create(adev, vm, cursor->level, immediate, &pt);
+ if (r)
+ return r;
+
+ /* Keep a reference to the root directory to avoid
+ * freeing them up in the wrong order.
+ */
+ pt_bo = &pt->bo;
+ pt_bo->parent = amdgpu_bo_ref(cursor->parent->bo);
+ amdgpu_vm_bo_base_init(entry, vm, pt_bo);
+ r = amdgpu_vm_pt_clear(adev, vm, pt, immediate);
+ if (r)
+ goto error_free_pt;
+
+ return 0;
+
+error_free_pt:
+ amdgpu_bo_unref(&pt->shadow);
+ amdgpu_bo_unref(&pt_bo);
+ return r;
+}
+
+/**
+ * amdgpu_vm_pt_free - free one PD/PT
+ *
+ * @entry: PDE to free
+ */
+static void amdgpu_vm_pt_free(struct amdgpu_vm_bo_base *entry)
+{
+ struct amdgpu_bo *shadow;
+
+ if (!entry->bo)
+ return;
+ shadow = amdgpu_bo_shadowed(entry->bo);
+ if (shadow) {
+ ttm_bo_set_bulk_move(&shadow->tbo, NULL);
+ amdgpu_bo_unref(&shadow);
+ }
+ ttm_bo_set_bulk_move(&entry->bo->tbo, NULL);
+ entry->bo->vm_bo = NULL;
+ list_del(&entry->vm_status);
+ amdgpu_bo_unref(&entry->bo);
+}
+
+/**
+ * amdgpu_vm_pt_free_dfs - free PD/PT levels
+ *
+ * @adev: amdgpu device structure
+ * @vm: amdgpu vm structure
+ * @start: optional cursor where to start freeing PDs/PTs
+ *
+ * Free the page directory or page table level and all sub levels.
+ */
+static void amdgpu_vm_pt_free_dfs(struct amdgpu_device *adev,
+ struct amdgpu_vm *vm,
+ struct amdgpu_vm_pt_cursor *start)
+{
+ struct amdgpu_vm_pt_cursor cursor;
+ struct amdgpu_vm_bo_base *entry;
+
+ for_each_amdgpu_vm_pt_dfs_safe(adev, vm, start, cursor, entry)
+ amdgpu_vm_pt_free(entry);
+
+ if (start)
+ amdgpu_vm_pt_free(start->entry);
+}
+
+/**
+ * amdgpu_vm_pt_free_root - free root PD
+ * @adev: amdgpu device structure
+ * @vm: amdgpu vm structure
+ *
+ * Free the root page directory and everything below it.
+ */
+void amdgpu_vm_pt_free_root(struct amdgpu_device *adev, struct amdgpu_vm *vm)
+{
+ amdgpu_vm_pt_free_dfs(adev, vm, NULL);
+}
+
+/**
+ * amdgpu_vm_pt_is_root_clean - check if a root PD is clean
+ *
+ * @adev: amdgpu_device pointer
+ * @vm: the VM to check
+ *
+ * Check all entries of the root PD, if any subsequent PDs are allocated,
+ * it means there are page table creating and filling, and is no a clean
+ * VM
+ *
+ * Returns:
+ * 0 if this VM is clean
+ */
+bool amdgpu_vm_pt_is_root_clean(struct amdgpu_device *adev,
+ struct amdgpu_vm *vm)
+{
+ enum amdgpu_vm_level root = adev->vm_manager.root_level;
+ unsigned int entries = amdgpu_vm_pt_num_entries(adev, root);
+ unsigned int i = 0;
+
+ for (i = 0; i < entries; i++) {
+ if (to_amdgpu_bo_vm(vm->root.bo)->entries[i].bo)
+ return false;
+ }
+ return true;
+}
+
+/**
+ * amdgpu_vm_pde_update - update a single level in the hierarchy
+ *
+ * @params: parameters for the update
+ * @entry: entry to update
+ *
+ * Makes sure the requested entry in parent is up to date.
+ */
+int amdgpu_vm_pde_update(struct amdgpu_vm_update_params *params,
+ struct amdgpu_vm_bo_base *entry)
+{
+ struct amdgpu_vm_bo_base *parent = amdgpu_vm_pt_parent(entry);
+ struct amdgpu_bo *bo = parent->bo, *pbo;
+ struct amdgpu_vm *vm = params->vm;
+ uint64_t pde, pt, flags;
+ unsigned int level;
+
+ for (level = 0, pbo = bo->parent; pbo; ++level)
+ pbo = pbo->parent;
+
+ level += params->adev->vm_manager.root_level;
+ amdgpu_gmc_get_pde_for_bo(entry->bo, level, &pt, &flags);
+ pde = (entry - to_amdgpu_bo_vm(parent->bo)->entries) * 8;
+ return vm->update_funcs->update(params, to_amdgpu_bo_vm(bo), pde, pt,
+ 1, 0, flags);
+}
+
+/*
+ * amdgpu_vm_pte_update_flags - figure out flags for PTE updates
+ *
+ * Make sure to set the right flags for the PTEs at the desired level.
+ */
+static void amdgpu_vm_pte_update_flags(struct amdgpu_vm_update_params *params,
+ struct amdgpu_bo_vm *pt,
+ unsigned int level,
+ uint64_t pe, uint64_t addr,
+ unsigned int count, uint32_t incr,
+ uint64_t flags)
+
+{
+ if (level != AMDGPU_VM_PTB) {
+ flags |= AMDGPU_PDE_PTE;
+ amdgpu_gmc_get_vm_pde(params->adev, level, &addr, &flags);
+
+ } else if (params->adev->asic_type >= CHIP_VEGA10 &&
+ !(flags & AMDGPU_PTE_VALID) &&
+ !(flags & AMDGPU_PTE_PRT)) {
+
+ /* Workaround for fault priority problem on GMC9 */
+ flags |= AMDGPU_PTE_EXECUTABLE;
+ }
+
+ params->vm->update_funcs->update(params, pt, pe, addr, count, incr,
+ flags);
+}
+
+/**
+ * amdgpu_vm_pte_fragment - get fragment for PTEs
+ *
+ * @params: see amdgpu_vm_update_params definition
+ * @start: first PTE to handle
+ * @end: last PTE to handle
+ * @flags: hw mapping flags
+ * @frag: resulting fragment size
+ * @frag_end: end of this fragment
+ *
+ * Returns the first possible fragment for the start and end address.
+ */
+static void amdgpu_vm_pte_fragment(struct amdgpu_vm_update_params *params,
+ uint64_t start, uint64_t end, uint64_t flags,
+ unsigned int *frag, uint64_t *frag_end)
+{
+ /**
+ * The MC L1 TLB supports variable sized pages, based on a fragment
+ * field in the PTE. When this field is set to a non-zero value, page
+ * granularity is increased from 4KB to (1 << (12 + frag)). The PTE
+ * flags are considered valid for all PTEs within the fragment range
+ * and corresponding mappings are assumed to be physically contiguous.
+ *
+ * The L1 TLB can store a single PTE for the whole fragment,
+ * significantly increasing the space available for translation
+ * caching. This leads to large improvements in throughput when the
+ * TLB is under pressure.
+ *
+ * The L2 TLB distributes small and large fragments into two
+ * asymmetric partitions. The large fragment cache is significantly
+ * larger. Thus, we try to use large fragments wherever possible.
+ * Userspace can support this by aligning virtual base address and
+ * allocation size to the fragment size.
+ *
+ * Starting with Vega10 the fragment size only controls the L1. The L2
+ * is now directly feed with small/huge/giant pages from the walker.
+ */
+ unsigned int max_frag;
+
+ if (params->adev->asic_type < CHIP_VEGA10)
+ max_frag = params->adev->vm_manager.fragment_size;
+ else
+ max_frag = 31;
+
+ /* system pages are non continuously */
+ if (params->pages_addr) {
+ *frag = 0;
+ *frag_end = end;
+ return;
+ }
+
+ /* This intentionally wraps around if no bit is set */
+ *frag = min_t(unsigned int, ffs(start) - 1, fls64(end - start) - 1);
+ if (*frag >= max_frag) {
+ *frag = max_frag;
+ *frag_end = end & ~((1ULL << max_frag) - 1);
+ } else {
+ *frag_end = start + (1 << *frag);
+ }
+}
+
+/**
+ * amdgpu_vm_ptes_update - make sure that page tables are valid
+ *
+ * @params: see amdgpu_vm_update_params definition
+ * @start: start of GPU address range
+ * @end: end of GPU address range
+ * @dst: destination address to map to, the next dst inside the function
+ * @flags: mapping flags
+ *
+ * Update the page tables in the range @start - @end.
+ *
+ * Returns:
+ * 0 for success, -EINVAL for failure.
+ */
+int amdgpu_vm_ptes_update(struct amdgpu_vm_update_params *params,
+ uint64_t start, uint64_t end,
+ uint64_t dst, uint64_t flags)
+{
+ struct amdgpu_device *adev = params->adev;
+ struct amdgpu_vm_pt_cursor cursor;
+ uint64_t frag_start = start, frag_end;
+ unsigned int frag;
+ int r;
+
+ /* figure out the initial fragment */
+ amdgpu_vm_pte_fragment(params, frag_start, end, flags, &frag,
+ &frag_end);
+
+ /* walk over the address space and update the PTs */
+ amdgpu_vm_pt_start(adev, params->vm, start, &cursor);
+ while (cursor.pfn < end) {
+ unsigned int shift, parent_shift, mask;
+ uint64_t incr, entry_end, pe_start;
+ struct amdgpu_bo *pt;
+
+ if (!params->unlocked) {
+ /* make sure that the page tables covering the
+ * address range are actually allocated
+ */
+ r = amdgpu_vm_pt_alloc(params->adev, params->vm,
+ &cursor, params->immediate);
+ if (r)
+ return r;
+ }
+
+ shift = amdgpu_vm_pt_level_shift(adev, cursor.level);
+ parent_shift = amdgpu_vm_pt_level_shift(adev, cursor.level - 1);
+ if (params->unlocked) {
+ /* Unlocked updates are only allowed on the leaves */
+ if (amdgpu_vm_pt_descendant(adev, &cursor))
+ continue;
+ } else if (adev->asic_type < CHIP_VEGA10 &&
+ (flags & AMDGPU_PTE_VALID)) {
+ /* No huge page support before GMC v9 */
+ if (cursor.level != AMDGPU_VM_PTB) {
+ if (!amdgpu_vm_pt_descendant(adev, &cursor))
+ return -ENOENT;
+ continue;
+ }
+ } else if (frag < shift) {
+ /* We can't use this level when the fragment size is
+ * smaller than the address shift. Go to the next
+ * child entry and try again.
+ */
+ if (amdgpu_vm_pt_descendant(adev, &cursor))
+ continue;
+ } else if (frag >= parent_shift) {
+ /* If the fragment size is even larger than the parent
+ * shift we should go up one level and check it again.
+ */
+ if (!amdgpu_vm_pt_ancestor(&cursor))
+ return -EINVAL;
+ continue;
+ }
+
+ pt = cursor.entry->bo;
+ if (!pt) {
+ /* We need all PDs and PTs for mapping something, */
+ if (flags & AMDGPU_PTE_VALID)
+ return -ENOENT;
+
+ /* but unmapping something can happen at a higher
+ * level.
+ */
+ if (!amdgpu_vm_pt_ancestor(&cursor))
+ return -EINVAL;
+
+ pt = cursor.entry->bo;
+ shift = parent_shift;
+ frag_end = max(frag_end, ALIGN(frag_start + 1,
+ 1ULL << shift));
+ }
+
+ /* Looks good so far, calculate parameters for the update */
+ incr = (uint64_t)AMDGPU_GPU_PAGE_SIZE << shift;
+ mask = amdgpu_vm_pt_entries_mask(adev, cursor.level);
+ pe_start = ((cursor.pfn >> shift) & mask) * 8;
+ entry_end = ((uint64_t)mask + 1) << shift;
+ entry_end += cursor.pfn & ~(entry_end - 1);
+ entry_end = min(entry_end, end);
+
+ do {
+ struct amdgpu_vm *vm = params->vm;
+ uint64_t upd_end = min(entry_end, frag_end);
+ unsigned int nptes = (upd_end - frag_start) >> shift;
+ uint64_t upd_flags = flags | AMDGPU_PTE_FRAG(frag);
+
+ /* This can happen when we set higher level PDs to
+ * silent to stop fault floods.
+ */
+ nptes = max(nptes, 1u);
+
+ trace_amdgpu_vm_update_ptes(params, frag_start, upd_end,
+ min(nptes, 32u), dst, incr,
+ upd_flags,
+ vm->task_info.pid,
+ vm->immediate.fence_context);
+ amdgpu_vm_pte_update_flags(params, to_amdgpu_bo_vm(pt),
+ cursor.level, pe_start, dst,
+ nptes, incr, upd_flags);
+
+ pe_start += nptes * 8;
+ dst += nptes * incr;
+
+ frag_start = upd_end;
+ if (frag_start >= frag_end) {
+ /* figure out the next fragment */
+ amdgpu_vm_pte_fragment(params, frag_start, end,
+ flags, &frag, &frag_end);
+ if (frag < shift)
+ break;
+ }
+ } while (frag_start < entry_end);
+
+ if (amdgpu_vm_pt_descendant(adev, &cursor)) {
+ /* Free all child entries.
+ * Update the tables with the flags and addresses and free up subsequent
+ * tables in the case of huge pages or freed up areas.
+ * This is the maximum you can free, because all other page tables are not
+ * completely covered by the range and so potentially still in use.
+ */
+ while (cursor.pfn < frag_start) {
+ /* Make sure previous mapping is freed */
+ if (cursor.entry->bo) {
+ params->table_freed = true;
+ amdgpu_vm_pt_free_dfs(adev, params->vm,
+ &cursor);
+ }
+ amdgpu_vm_pt_next(adev, &cursor);
+ }
+
+ } else if (frag >= shift) {
+ /* or just move on to the next on the same level. */
+ amdgpu_vm_pt_next(adev, &cursor);
+ }
+ }
+
+ return 0;
+}
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm_sdma.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm_sdma.c
index dbb551762805..1fd3cbca20a2 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm_sdma.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm_sdma.c
@@ -109,7 +109,7 @@ static int amdgpu_vm_sdma_commit(struct amdgpu_vm_update_params *p,
if (p->unlocked) {
struct dma_fence *tmp = dma_fence_get(f);
- swap(p->vm->last_unlocked, f);
+ swap(p->vm->last_unlocked, tmp);
dma_fence_put(tmp);
} else {
amdgpu_bo_fence(p->vm->root.bo, f, true);
@@ -204,14 +204,19 @@ static int amdgpu_vm_sdma_update(struct amdgpu_vm_update_params *p,
struct amdgpu_bo *bo = &vmbo->bo;
enum amdgpu_ib_pool_type pool = p->immediate ? AMDGPU_IB_POOL_IMMEDIATE
: AMDGPU_IB_POOL_DELAYED;
+ struct dma_resv_iter cursor;
unsigned int i, ndw, nptes;
+ struct dma_fence *fence;
uint64_t *pte;
int r;
/* Wait for PD/PT moves to be completed */
- r = amdgpu_sync_fence(&p->job->sync, bo->tbo.moving);
- if (r)
- return r;
+ dma_resv_for_each_fence(&cursor, bo->tbo.base.resv,
+ DMA_RESV_USAGE_KERNEL, fence) {
+ r = amdgpu_sync_fence(&p->job->sync, fence);
+ if (r)
+ return r;
+ }
do {
ndw = p->num_dw_left;
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c
index 0a7611648573..49e4092f447f 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c
@@ -32,8 +32,10 @@
#include "atom.h"
struct amdgpu_vram_reservation {
- struct list_head node;
- struct drm_mm_node mm_node;
+ u64 start;
+ u64 size;
+ struct list_head allocated;
+ struct list_head blocks;
};
static inline struct amdgpu_vram_mgr *
@@ -186,18 +188,18 @@ const struct attribute_group amdgpu_vram_mgr_attr_group = {
};
/**
- * amdgpu_vram_mgr_vis_size - Calculate visible node size
+ * amdgpu_vram_mgr_vis_size - Calculate visible block size
*
* @adev: amdgpu_device pointer
- * @node: MM node structure
+ * @block: DRM BUDDY block structure
*
- * Calculate how many bytes of the MM node are inside visible VRAM
+ * Calculate how many bytes of the DRM BUDDY block are inside visible VRAM
*/
static u64 amdgpu_vram_mgr_vis_size(struct amdgpu_device *adev,
- struct drm_mm_node *node)
+ struct drm_buddy_block *block)
{
- uint64_t start = node->start << PAGE_SHIFT;
- uint64_t end = (node->size + node->start) << PAGE_SHIFT;
+ u64 start = amdgpu_vram_mgr_block_start(block);
+ u64 end = start + amdgpu_vram_mgr_block_size(block);
if (start >= adev->gmc.visible_vram_size)
return 0;
@@ -218,9 +220,9 @@ u64 amdgpu_vram_mgr_bo_visible_size(struct amdgpu_bo *bo)
{
struct amdgpu_device *adev = amdgpu_ttm_adev(bo->tbo.bdev);
struct ttm_resource *res = bo->tbo.resource;
- unsigned pages = res->num_pages;
- struct drm_mm_node *mm;
- u64 usage;
+ struct amdgpu_vram_mgr_resource *vres = to_amdgpu_vram_mgr_resource(res);
+ struct drm_buddy_block *block;
+ u64 usage = 0;
if (amdgpu_gmc_vram_full_visible(&adev->gmc))
return amdgpu_bo_size(bo);
@@ -228,9 +230,8 @@ u64 amdgpu_vram_mgr_bo_visible_size(struct amdgpu_bo *bo)
if (res->start >= adev->gmc.visible_vram_size >> PAGE_SHIFT)
return 0;
- mm = &container_of(res, struct ttm_range_mgr_node, base)->mm_nodes[0];
- for (usage = 0; pages; pages -= mm->size, mm++)
- usage += amdgpu_vram_mgr_vis_size(adev, mm);
+ list_for_each_entry(block, &vres->blocks, link)
+ usage += amdgpu_vram_mgr_vis_size(adev, block);
return usage;
}
@@ -240,23 +241,30 @@ static void amdgpu_vram_mgr_do_reserve(struct ttm_resource_manager *man)
{
struct amdgpu_vram_mgr *mgr = to_vram_mgr(man);
struct amdgpu_device *adev = to_amdgpu_device(mgr);
- struct drm_mm *mm = &mgr->mm;
+ struct drm_buddy *mm = &mgr->mm;
struct amdgpu_vram_reservation *rsv, *temp;
+ struct drm_buddy_block *block;
uint64_t vis_usage;
- list_for_each_entry_safe(rsv, temp, &mgr->reservations_pending, node) {
- if (drm_mm_reserve_node(mm, &rsv->mm_node))
+ list_for_each_entry_safe(rsv, temp, &mgr->reservations_pending, blocks) {
+ if (drm_buddy_alloc_blocks(mm, rsv->start, rsv->start + rsv->size,
+ rsv->size, mm->chunk_size, &rsv->allocated,
+ DRM_BUDDY_RANGE_ALLOCATION))
+ continue;
+
+ block = amdgpu_vram_mgr_first_block(&rsv->allocated);
+ if (!block)
continue;
dev_dbg(adev->dev, "Reservation 0x%llx - %lld, Succeeded\n",
- rsv->mm_node.start, rsv->mm_node.size);
+ rsv->start, rsv->size);
- vis_usage = amdgpu_vram_mgr_vis_size(adev, &rsv->mm_node);
+ vis_usage = amdgpu_vram_mgr_vis_size(adev, block);
atomic64_add(vis_usage, &mgr->vis_usage);
spin_lock(&man->bdev->lru_lock);
- man->usage += rsv->mm_node.size << PAGE_SHIFT;
+ man->usage += rsv->size;
spin_unlock(&man->bdev->lru_lock);
- list_move(&rsv->node, &mgr->reserved_pages);
+ list_move(&rsv->blocks, &mgr->reserved_pages);
}
}
@@ -278,14 +286,16 @@ int amdgpu_vram_mgr_reserve_range(struct amdgpu_vram_mgr *mgr,
if (!rsv)
return -ENOMEM;
- INIT_LIST_HEAD(&rsv->node);
- rsv->mm_node.start = start >> PAGE_SHIFT;
- rsv->mm_node.size = size >> PAGE_SHIFT;
+ INIT_LIST_HEAD(&rsv->allocated);
+ INIT_LIST_HEAD(&rsv->blocks);
- spin_lock(&mgr->lock);
- list_add_tail(&rsv->node, &mgr->reservations_pending);
+ rsv->start = start;
+ rsv->size = size;
+
+ mutex_lock(&mgr->lock);
+ list_add_tail(&rsv->blocks, &mgr->reservations_pending);
amdgpu_vram_mgr_do_reserve(&mgr->manager);
- spin_unlock(&mgr->lock);
+ mutex_unlock(&mgr->lock);
return 0;
}
@@ -307,19 +317,19 @@ int amdgpu_vram_mgr_query_page_status(struct amdgpu_vram_mgr *mgr,
struct amdgpu_vram_reservation *rsv;
int ret;
- spin_lock(&mgr->lock);
+ mutex_lock(&mgr->lock);
- list_for_each_entry(rsv, &mgr->reservations_pending, node) {
- if ((rsv->mm_node.start <= start) &&
- (start < (rsv->mm_node.start + rsv->mm_node.size))) {
+ list_for_each_entry(rsv, &mgr->reservations_pending, blocks) {
+ if (rsv->start <= start &&
+ (start < (rsv->start + rsv->size))) {
ret = -EBUSY;
goto out;
}
}
- list_for_each_entry(rsv, &mgr->reserved_pages, node) {
- if ((rsv->mm_node.start <= start) &&
- (start < (rsv->mm_node.start + rsv->mm_node.size))) {
+ list_for_each_entry(rsv, &mgr->reserved_pages, blocks) {
+ if (rsv->start <= start &&
+ (start < (rsv->start + rsv->size))) {
ret = 0;
goto out;
}
@@ -327,33 +337,11 @@ int amdgpu_vram_mgr_query_page_status(struct amdgpu_vram_mgr *mgr,
ret = -ENOENT;
out:
- spin_unlock(&mgr->lock);
+ mutex_unlock(&mgr->lock);
return ret;
}
/**
- * amdgpu_vram_mgr_virt_start - update virtual start address
- *
- * @mem: ttm_resource to update
- * @node: just allocated node
- *
- * Calculate a virtual BO start address to easily check if everything is CPU
- * accessible.
- */
-static void amdgpu_vram_mgr_virt_start(struct ttm_resource *mem,
- struct drm_mm_node *node)
-{
- unsigned long start;
-
- start = node->start + node->size;
- if (start > mem->num_pages)
- start -= mem->num_pages;
- else
- start = 0;
- mem->start = max(mem->start, start);
-}
-
-/**
* amdgpu_vram_mgr_new - allocate new ranges
*
* @man: TTM memory type manager
@@ -368,46 +356,44 @@ static int amdgpu_vram_mgr_new(struct ttm_resource_manager *man,
const struct ttm_place *place,
struct ttm_resource **res)
{
- unsigned long lpfn, num_nodes, pages_per_node, pages_left, pages;
+ u64 vis_usage = 0, max_bytes, cur_size, min_block_size;
struct amdgpu_vram_mgr *mgr = to_vram_mgr(man);
struct amdgpu_device *adev = to_amdgpu_device(mgr);
- uint64_t vis_usage = 0, mem_bytes, max_bytes;
- struct ttm_range_mgr_node *node;
- struct drm_mm *mm = &mgr->mm;
- enum drm_mm_insert_mode mode;
- unsigned i;
+ struct amdgpu_vram_mgr_resource *vres;
+ u64 size, remaining_size, lpfn, fpfn;
+ struct drm_buddy *mm = &mgr->mm;
+ struct drm_buddy_block *block;
+ unsigned long pages_per_block;
int r;
- lpfn = place->lpfn;
+ lpfn = place->lpfn << PAGE_SHIFT;
if (!lpfn)
- lpfn = man->size >> PAGE_SHIFT;
+ lpfn = man->size;
+
+ fpfn = place->fpfn << PAGE_SHIFT;
max_bytes = adev->gmc.mc_vram_size;
if (tbo->type != ttm_bo_type_kernel)
max_bytes -= AMDGPU_VM_RESERVED_VRAM;
- mem_bytes = tbo->base.size;
if (place->flags & TTM_PL_FLAG_CONTIGUOUS) {
- pages_per_node = ~0ul;
- num_nodes = 1;
+ pages_per_block = ~0ul;
} else {
#ifdef CONFIG_TRANSPARENT_HUGEPAGE
- pages_per_node = HPAGE_PMD_NR;
+ pages_per_block = HPAGE_PMD_NR;
#else
/* default to 2MB */
- pages_per_node = 2UL << (20UL - PAGE_SHIFT);
+ pages_per_block = 2UL << (20UL - PAGE_SHIFT);
#endif
- pages_per_node = max_t(uint32_t, pages_per_node,
- tbo->page_alignment);
- num_nodes = DIV_ROUND_UP_ULL(PFN_UP(mem_bytes), pages_per_node);
+ pages_per_block = max_t(uint32_t, pages_per_block,
+ tbo->page_alignment);
}
- node = kvmalloc(struct_size(node, mm_nodes, num_nodes),
- GFP_KERNEL | __GFP_ZERO);
- if (!node)
+ vres = kzalloc(sizeof(*vres), GFP_KERNEL);
+ if (!vres)
return -ENOMEM;
- ttm_resource_init(tbo, place, &node->base);
+ ttm_resource_init(tbo, place, &vres->base);
/* bail out quickly if there's likely not enough VRAM for this BO */
if (ttm_resource_manager_usage(man) > max_bytes) {
@@ -415,66 +401,130 @@ static int amdgpu_vram_mgr_new(struct ttm_resource_manager *man,
goto error_fini;
}
- mode = DRM_MM_INSERT_BEST;
+ INIT_LIST_HEAD(&vres->blocks);
+
if (place->flags & TTM_PL_FLAG_TOPDOWN)
- mode = DRM_MM_INSERT_HIGH;
-
- pages_left = node->base.num_pages;
-
- /* Limit maximum size to 2GB due to SG table limitations */
- pages = min(pages_left, 2UL << (30 - PAGE_SHIFT));
-
- i = 0;
- spin_lock(&mgr->lock);
- while (pages_left) {
- uint32_t alignment = tbo->page_alignment;
-
- if (pages >= pages_per_node)
- alignment = pages_per_node;
-
- r = drm_mm_insert_node_in_range(mm, &node->mm_nodes[i], pages,
- alignment, 0, place->fpfn,
- lpfn, mode);
- if (unlikely(r)) {
- if (pages > pages_per_node) {
- if (is_power_of_2(pages))
- pages = pages / 2;
- else
- pages = rounddown_pow_of_two(pages);
- continue;
+ vres->flags |= DRM_BUDDY_TOPDOWN_ALLOCATION;
+
+ if (fpfn || lpfn != man->size)
+ /* Allocate blocks in desired range */
+ vres->flags |= DRM_BUDDY_RANGE_ALLOCATION;
+
+ remaining_size = vres->base.num_pages << PAGE_SHIFT;
+
+ mutex_lock(&mgr->lock);
+ while (remaining_size) {
+ if (tbo->page_alignment)
+ min_block_size = tbo->page_alignment << PAGE_SHIFT;
+ else
+ min_block_size = mgr->default_page_size;
+
+ BUG_ON(min_block_size < mm->chunk_size);
+
+ /* Limit maximum size to 2GiB due to SG table limitations */
+ size = min(remaining_size, 2ULL << 30);
+
+ if (size >= pages_per_block << PAGE_SHIFT)
+ min_block_size = pages_per_block << PAGE_SHIFT;
+
+ cur_size = size;
+
+ if (fpfn + size != place->lpfn << PAGE_SHIFT) {
+ /*
+ * Except for actual range allocation, modify the size and
+ * min_block_size conforming to continuous flag enablement
+ */
+ if (place->flags & TTM_PL_FLAG_CONTIGUOUS) {
+ size = roundup_pow_of_two(size);
+ min_block_size = size;
+ /*
+ * Modify the size value if size is not
+ * aligned with min_block_size
+ */
+ } else if (!IS_ALIGNED(size, min_block_size)) {
+ size = round_up(size, min_block_size);
}
- goto error_free;
}
- vis_usage += amdgpu_vram_mgr_vis_size(adev, &node->mm_nodes[i]);
- amdgpu_vram_mgr_virt_start(&node->base, &node->mm_nodes[i]);
- pages_left -= pages;
- ++i;
+ r = drm_buddy_alloc_blocks(mm, fpfn,
+ lpfn,
+ size,
+ min_block_size,
+ &vres->blocks,
+ vres->flags);
+ if (unlikely(r))
+ goto error_free_blocks;
+
+ if (size > remaining_size)
+ remaining_size = 0;
+ else
+ remaining_size -= size;
+ }
+ mutex_unlock(&mgr->lock);
+
+ if (cur_size != size) {
+ struct drm_buddy_block *block;
+ struct list_head *trim_list;
+ u64 original_size;
+ LIST_HEAD(temp);
+
+ trim_list = &vres->blocks;
+ original_size = vres->base.num_pages << PAGE_SHIFT;
+
+ /*
+ * If size value is rounded up to min_block_size, trim the last
+ * block to the required size
+ */
+ if (!list_is_singular(&vres->blocks)) {
+ block = list_last_entry(&vres->blocks, typeof(*block), link);
+ list_move_tail(&block->link, &temp);
+ trim_list = &temp;
+ /*
+ * Compute the original_size value by subtracting the
+ * last block size with (aligned size - original size)
+ */
+ original_size = amdgpu_vram_mgr_block_size(block) - (size - cur_size);
+ }
- if (pages > pages_left)
- pages = pages_left;
+ mutex_lock(&mgr->lock);
+ drm_buddy_block_trim(mm,
+ original_size,
+ trim_list);
+ mutex_unlock(&mgr->lock);
+
+ if (!list_empty(&temp))
+ list_splice_tail(trim_list, &vres->blocks);
+ }
+
+ list_for_each_entry(block, &vres->blocks, link)
+ vis_usage += amdgpu_vram_mgr_vis_size(adev, block);
+
+ block = amdgpu_vram_mgr_first_block(&vres->blocks);
+ if (!block) {
+ r = -EINVAL;
+ goto error_fini;
}
- spin_unlock(&mgr->lock);
- if (i == 1)
- node->base.placement |= TTM_PL_FLAG_CONTIGUOUS;
+ vres->base.start = amdgpu_vram_mgr_block_start(block) >> PAGE_SHIFT;
+
+ if (amdgpu_is_vram_mgr_blocks_contiguous(&vres->blocks))
+ vres->base.placement |= TTM_PL_FLAG_CONTIGUOUS;
if (adev->gmc.xgmi.connected_to_cpu)
- node->base.bus.caching = ttm_cached;
+ vres->base.bus.caching = ttm_cached;
else
- node->base.bus.caching = ttm_write_combined;
+ vres->base.bus.caching = ttm_write_combined;
atomic64_add(vis_usage, &mgr->vis_usage);
- *res = &node->base;
+ *res = &vres->base;
return 0;
-error_free:
- while (i--)
- drm_mm_remove_node(&node->mm_nodes[i]);
- spin_unlock(&mgr->lock);
+error_free_blocks:
+ drm_buddy_free_list(mm, &vres->blocks);
+ mutex_unlock(&mgr->lock);
error_fini:
- ttm_resource_fini(man, &node->base);
- kvfree(node);
+ ttm_resource_fini(man, &vres->base);
+ kfree(vres);
return r;
}
@@ -490,27 +540,26 @@ error_fini:
static void amdgpu_vram_mgr_del(struct ttm_resource_manager *man,
struct ttm_resource *res)
{
- struct ttm_range_mgr_node *node = to_ttm_range_mgr_node(res);
+ struct amdgpu_vram_mgr_resource *vres = to_amdgpu_vram_mgr_resource(res);
struct amdgpu_vram_mgr *mgr = to_vram_mgr(man);
struct amdgpu_device *adev = to_amdgpu_device(mgr);
+ struct drm_buddy *mm = &mgr->mm;
+ struct drm_buddy_block *block;
uint64_t vis_usage = 0;
- unsigned i, pages;
- spin_lock(&mgr->lock);
- for (i = 0, pages = res->num_pages; pages;
- pages -= node->mm_nodes[i].size, ++i) {
- struct drm_mm_node *mm = &node->mm_nodes[i];
+ mutex_lock(&mgr->lock);
+ list_for_each_entry(block, &vres->blocks, link)
+ vis_usage += amdgpu_vram_mgr_vis_size(adev, block);
- drm_mm_remove_node(mm);
- vis_usage += amdgpu_vram_mgr_vis_size(adev, mm);
- }
amdgpu_vram_mgr_do_reserve(man);
- spin_unlock(&mgr->lock);
+
+ drm_buddy_free_list(mm, &vres->blocks);
+ mutex_unlock(&mgr->lock);
atomic64_sub(vis_usage, &mgr->vis_usage);
ttm_resource_fini(man, res);
- kvfree(node);
+ kfree(vres);
}
/**
@@ -542,7 +591,7 @@ int amdgpu_vram_mgr_alloc_sgt(struct amdgpu_device *adev,
if (!*sgt)
return -ENOMEM;
- /* Determine the number of DRM_MM nodes to export */
+ /* Determine the number of DRM_BUDDY blocks to export */
amdgpu_res_first(res, offset, length, &cursor);
while (cursor.remaining) {
num_entries++;
@@ -558,10 +607,10 @@ int amdgpu_vram_mgr_alloc_sgt(struct amdgpu_device *adev,
sg->length = 0;
/*
- * Walk down DRM_MM nodes to populate scatterlist nodes
- * @note: Use iterator api to get first the DRM_MM node
+ * Walk down DRM_BUDDY blocks to populate scatterlist nodes
+ * @note: Use iterator api to get first the DRM_BUDDY block
* and the number of bytes from it. Access the following
- * DRM_MM node(s) if more buffer needs to exported
+ * DRM_BUDDY block(s) if more buffer needs to exported
*/
amdgpu_res_first(res, offset, length, &cursor);
for_each_sgtable_sg((*sgt), sg, i) {
@@ -648,13 +697,22 @@ static void amdgpu_vram_mgr_debug(struct ttm_resource_manager *man,
struct drm_printer *printer)
{
struct amdgpu_vram_mgr *mgr = to_vram_mgr(man);
+ struct drm_buddy *mm = &mgr->mm;
+ struct drm_buddy_block *block;
drm_printf(printer, " vis usage:%llu\n",
amdgpu_vram_mgr_vis_usage(mgr));
- spin_lock(&mgr->lock);
- drm_mm_print(&mgr->mm, printer);
- spin_unlock(&mgr->lock);
+ mutex_lock(&mgr->lock);
+ drm_printf(printer, "default_page_size: %lluKiB\n",
+ mgr->default_page_size >> 10);
+
+ drm_buddy_print(mm, printer);
+
+ drm_printf(printer, "reserved:\n");
+ list_for_each_entry(block, &mgr->reserved_pages, link)
+ drm_buddy_block_print(mm, block, printer);
+ mutex_unlock(&mgr->lock);
}
static const struct ttm_resource_manager_func amdgpu_vram_mgr_func = {
@@ -674,16 +732,21 @@ int amdgpu_vram_mgr_init(struct amdgpu_device *adev)
{
struct amdgpu_vram_mgr *mgr = &adev->mman.vram_mgr;
struct ttm_resource_manager *man = &mgr->manager;
+ int err;
ttm_resource_manager_init(man, &adev->mman.bdev,
adev->gmc.real_vram_size);
man->func = &amdgpu_vram_mgr_func;
- drm_mm_init(&mgr->mm, 0, man->size >> PAGE_SHIFT);
- spin_lock_init(&mgr->lock);
+ err = drm_buddy_init(&mgr->mm, man->size, PAGE_SIZE);
+ if (err)
+ return err;
+
+ mutex_init(&mgr->lock);
INIT_LIST_HEAD(&mgr->reservations_pending);
INIT_LIST_HEAD(&mgr->reserved_pages);
+ mgr->default_page_size = PAGE_SIZE;
ttm_set_driver_manager(&adev->mman.bdev, TTM_PL_VRAM, &mgr->manager);
ttm_resource_manager_set_used(man, true);
@@ -711,16 +774,16 @@ void amdgpu_vram_mgr_fini(struct amdgpu_device *adev)
if (ret)
return;
- spin_lock(&mgr->lock);
- list_for_each_entry_safe(rsv, temp, &mgr->reservations_pending, node)
+ mutex_lock(&mgr->lock);
+ list_for_each_entry_safe(rsv, temp, &mgr->reservations_pending, blocks)
kfree(rsv);
- list_for_each_entry_safe(rsv, temp, &mgr->reserved_pages, node) {
- drm_mm_remove_node(&rsv->mm_node);
+ list_for_each_entry_safe(rsv, temp, &mgr->reserved_pages, blocks) {
+ drm_buddy_free_list(&mgr->mm, &rsv->blocks);
kfree(rsv);
}
- drm_mm_takedown(&mgr->mm);
- spin_unlock(&mgr->lock);
+ drm_buddy_fini(&mgr->mm);
+ mutex_unlock(&mgr->lock);
ttm_resource_manager_cleanup(man);
ttm_set_driver_manager(&adev->mman.bdev, TTM_PL_VRAM, NULL);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.h
new file mode 100644
index 000000000000..9a2db87186c7
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.h
@@ -0,0 +1,89 @@
+/* SPDX-License-Identifier: MIT
+ * Copyright 2021 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#ifndef __AMDGPU_VRAM_MGR_H__
+#define __AMDGPU_VRAM_MGR_H__
+
+#include <drm/drm_buddy.h>
+
+struct amdgpu_vram_mgr {
+ struct ttm_resource_manager manager;
+ struct drm_buddy mm;
+ /* protects access to buffer objects */
+ struct mutex lock;
+ struct list_head reservations_pending;
+ struct list_head reserved_pages;
+ atomic64_t vis_usage;
+ u64 default_page_size;
+};
+
+struct amdgpu_vram_mgr_resource {
+ struct ttm_resource base;
+ struct list_head blocks;
+ unsigned long flags;
+};
+
+static inline u64 amdgpu_vram_mgr_block_start(struct drm_buddy_block *block)
+{
+ return drm_buddy_block_offset(block);
+}
+
+static inline u64 amdgpu_vram_mgr_block_size(struct drm_buddy_block *block)
+{
+ return PAGE_SIZE << drm_buddy_block_order(block);
+}
+
+static inline struct drm_buddy_block *
+amdgpu_vram_mgr_first_block(struct list_head *list)
+{
+ return list_first_entry_or_null(list, struct drm_buddy_block, link);
+}
+
+static inline bool amdgpu_is_vram_mgr_blocks_contiguous(struct list_head *head)
+{
+ struct drm_buddy_block *block;
+ u64 start, size;
+
+ block = amdgpu_vram_mgr_first_block(head);
+ if (!block)
+ return false;
+
+ while (head != block->link.next) {
+ start = amdgpu_vram_mgr_block_start(block);
+ size = amdgpu_vram_mgr_block_size(block);
+
+ block = list_entry(block->link.next, struct drm_buddy_block, link);
+ if (start + size != amdgpu_vram_mgr_block_start(block))
+ return false;
+ }
+
+ return true;
+}
+
+static inline struct amdgpu_vram_mgr_resource *
+to_amdgpu_vram_mgr_resource(struct ttm_resource *res)
+{
+ return container_of(res, struct amdgpu_vram_mgr_resource, base);
+}
+
+#endif
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgv_sriovmsg.h b/drivers/gpu/drm/amd/amdgpu/amdgv_sriovmsg.h
index 7326b6c1b71c..e78e4c27b62a 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgv_sriovmsg.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgv_sriovmsg.h
@@ -1,34 +1,33 @@
/*
- * Copyright 2018-2019 Advanced Micro Devices, Inc.
+ * Copyright (c) 2018-2021 Advanced Micro Devices, Inc. All rights reserved.
*
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the "Software"),
- * to deal in the Software without restriction, including without limitation
- * the rights to use, copy, modify, merge, publish, distribute, sublicense,
- * and/or sell copies of the Software, and to permit persons to whom the
- * Software is furnished to do so, subject to the following conditions:
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
- * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
- * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
- * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
- * OTHER DEALINGS IN THE SOFTWARE.
- *
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
*/
#ifndef AMDGV_SRIOV_MSG__H_
#define AMDGV_SRIOV_MSG__H_
/* unit in kilobytes */
-#define AMD_SRIOV_MSG_VBIOS_OFFSET 0
-#define AMD_SRIOV_MSG_VBIOS_SIZE_KB 64
-#define AMD_SRIOV_MSG_DATAEXCHANGE_OFFSET_KB AMD_SRIOV_MSG_VBIOS_SIZE_KB
-#define AMD_SRIOV_MSG_DATAEXCHANGE_SIZE_KB 4
+#define AMD_SRIOV_MSG_VBIOS_OFFSET 0
+#define AMD_SRIOV_MSG_VBIOS_SIZE_KB 64
+#define AMD_SRIOV_MSG_DATAEXCHANGE_OFFSET_KB AMD_SRIOV_MSG_VBIOS_SIZE_KB
+#define AMD_SRIOV_MSG_DATAEXCHANGE_SIZE_KB 4
/*
* layout
@@ -51,10 +50,10 @@
* v2 defined in amdgim
* v3 current
*/
-#define AMD_SRIOV_MSG_FW_VRAM_PF2VF_VER 2
-#define AMD_SRIOV_MSG_FW_VRAM_VF2PF_VER 3
+#define AMD_SRIOV_MSG_FW_VRAM_PF2VF_VER 2
+#define AMD_SRIOV_MSG_FW_VRAM_VF2PF_VER 3
-#define AMD_SRIOV_MSG_RESERVE_UCODE 24
+#define AMD_SRIOV_MSG_RESERVE_UCODE 24
#define AMD_SRIOV_MSG_RESERVE_VCN_INST 4
@@ -83,19 +82,19 @@ enum amd_sriov_ucode_engine_id {
AMD_SRIOV_UCODE_ID__MAX
};
-#pragma pack(push, 1) // PF2VF / VF2PF data areas are byte packed
+#pragma pack(push, 1) // PF2VF / VF2PF data areas are byte packed
union amd_sriov_msg_feature_flags {
struct {
- uint32_t error_log_collect : 1;
- uint32_t host_load_ucodes : 1;
- uint32_t host_flr_vramlost : 1;
- uint32_t mm_bw_management : 1;
- uint32_t pp_one_vf_mode : 1;
- uint32_t reg_indirect_acc : 1;
- uint32_t reserved : 26;
+ uint32_t error_log_collect : 1;
+ uint32_t host_load_ucodes : 1;
+ uint32_t host_flr_vramlost : 1;
+ uint32_t mm_bw_management : 1;
+ uint32_t pp_one_vf_mode : 1;
+ uint32_t reg_indirect_acc : 1;
+ uint32_t reserved : 26;
} flags;
- uint32_t all;
+ uint32_t all;
};
union amd_sriov_reg_access_flags {
@@ -110,10 +109,10 @@ union amd_sriov_reg_access_flags {
union amd_sriov_msg_os_info {
struct {
- uint32_t windows : 1;
- uint32_t reserved : 31;
+ uint32_t windows : 1;
+ uint32_t reserved : 31;
} info;
- uint32_t all;
+ uint32_t all;
};
struct amd_sriov_msg_uuid_info {
@@ -156,6 +155,7 @@ struct amd_sriov_msg_pf2vf_info_header {
uint32_t reserved[2];
};
+#define AMD_SRIOV_MSG_PF2VF_INFO_FILLED_SIZE (48)
struct amd_sriov_msg_pf2vf_info {
/* header contains size and version */
struct amd_sriov_msg_pf2vf_info_header header;
@@ -204,10 +204,10 @@ struct amd_sriov_msg_pf2vf_info {
} mm_bw_management[AMD_SRIOV_MSG_RESERVE_VCN_INST];
/* UUID info */
struct amd_sriov_msg_uuid_info uuid_info;
- /* pcie atomic Ops info */
- uint32_t pcie_atomic_ops_enabled_flags;
+ /* PCIE atomic ops support flag */
+ uint32_t pcie_atomic_ops_support_flags;
/* reserved */
- uint32_t reserved[256 - 48];
+ uint32_t reserved[256 - AMD_SRIOV_MSG_PF2VF_INFO_FILLED_SIZE];
};
struct amd_sriov_msg_vf2pf_info_header {
@@ -219,12 +219,13 @@ struct amd_sriov_msg_vf2pf_info_header {
uint32_t reserved[2];
};
+#define AMD_SRIOV_MSG_VF2PF_INFO_FILLED_SIZE (70)
struct amd_sriov_msg_vf2pf_info {
/* header contains size and version */
struct amd_sriov_msg_vf2pf_info_header header;
uint32_t checksum;
/* driver version */
- uint8_t driver_version[64];
+ uint8_t driver_version[64];
/* driver certification, 1=WHQL, 0=None */
uint32_t driver_cert;
/* guest OS type and version */
@@ -258,13 +259,13 @@ struct amd_sriov_msg_vf2pf_info {
uint32_t fb_size;
/* guest ucode data, each one is 1.25 Dword */
struct {
- uint8_t id;
+ uint8_t id;
uint32_t version;
} ucode_info[AMD_SRIOV_MSG_RESERVE_UCODE];
uint64_t dummy_page_addr;
/* reserved */
- uint32_t reserved[256-70];
+ uint32_t reserved[256 - AMD_SRIOV_MSG_VF2PF_INFO_FILLED_SIZE];
};
/* mailbox message send from guest to host */
@@ -276,7 +277,7 @@ enum amd_sriov_mailbox_request_message {
MB_REQ_MSG_REQ_GPU_RESET_ACCESS,
MB_REQ_MSG_REQ_GPU_INIT_DATA,
- MB_REQ_MSG_LOG_VF_ERROR = 200,
+ MB_REQ_MSG_LOG_VF_ERROR = 200,
};
/* mailbox message send from host to guest */
@@ -298,17 +299,15 @@ enum amd_sriov_gpu_init_data_version {
GPU_INIT_DATA_READY_V1 = 1,
};
-#pragma pack(pop) // Restore previous packing option
+#pragma pack(pop) // Restore previous packing option
/* checksum function between host and guest */
-unsigned int amd_sriov_msg_checksum(void *obj,
- unsigned long obj_size,
- unsigned int key,
- unsigned int checksum);
+unsigned int amd_sriov_msg_checksum(void *obj, unsigned long obj_size, unsigned int key,
+ unsigned int checksum);
/* assertion at compile time */
#ifdef __linux__
-#define stringification(s) _stringification(s)
+#define stringification(s) _stringification(s)
#define _stringification(s) #s
_Static_assert(
@@ -319,13 +318,11 @@ _Static_assert(
sizeof(struct amd_sriov_msg_pf2vf_info) == AMD_SRIOV_MSG_SIZE_KB << 10,
"amd_sriov_msg_pf2vf_info must be " stringification(AMD_SRIOV_MSG_SIZE_KB) " KB");
-_Static_assert(
- AMD_SRIOV_MSG_RESERVE_UCODE % 4 == 0,
- "AMD_SRIOV_MSG_RESERVE_UCODE must be multiple of 4");
+_Static_assert(AMD_SRIOV_MSG_RESERVE_UCODE % 4 == 0,
+ "AMD_SRIOV_MSG_RESERVE_UCODE must be multiple of 4");
-_Static_assert(
- AMD_SRIOV_MSG_RESERVE_UCODE > AMD_SRIOV_UCODE_ID__MAX,
- "AMD_SRIOV_MSG_RESERVE_UCODE must be bigger than AMD_SRIOV_UCODE_ID__MAX");
+_Static_assert(AMD_SRIOV_MSG_RESERVE_UCODE > AMD_SRIOV_UCODE_ID__MAX,
+ "AMD_SRIOV_MSG_RESERVE_UCODE must be bigger than AMD_SRIOV_UCODE_ID__MAX");
#undef _stringification
#undef stringification
diff --git a/drivers/gpu/drm/amd/amdgpu/athub_v1_0.c b/drivers/gpu/drm/amd/amdgpu/athub_v1_0.c
index 88642e7ecdf4..a13c443ea10f 100644
--- a/drivers/gpu/drm/amd/amdgpu/athub_v1_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/athub_v1_0.c
@@ -87,7 +87,7 @@ int athub_v1_0_set_clockgating(struct amdgpu_device *adev,
return 0;
}
-void athub_v1_0_get_clockgating(struct amdgpu_device *adev, u32 *flags)
+void athub_v1_0_get_clockgating(struct amdgpu_device *adev, u64 *flags)
{
int data;
diff --git a/drivers/gpu/drm/amd/amdgpu/athub_v1_0.h b/drivers/gpu/drm/amd/amdgpu/athub_v1_0.h
index b279af59e34f..6be0a6704ea7 100644
--- a/drivers/gpu/drm/amd/amdgpu/athub_v1_0.h
+++ b/drivers/gpu/drm/amd/amdgpu/athub_v1_0.h
@@ -25,6 +25,6 @@
int athub_v1_0_set_clockgating(struct amdgpu_device *adev,
enum amd_clockgating_state state);
-void athub_v1_0_get_clockgating(struct amdgpu_device *adev, u32 *flags);
+void athub_v1_0_get_clockgating(struct amdgpu_device *adev, u64 *flags);
#endif
diff --git a/drivers/gpu/drm/amd/amdgpu/athub_v2_0.c b/drivers/gpu/drm/amd/amdgpu/athub_v2_0.c
index a720436857b4..a9521c98e7f7 100644
--- a/drivers/gpu/drm/amd/amdgpu/athub_v2_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/athub_v2_0.c
@@ -93,7 +93,7 @@ int athub_v2_0_set_clockgating(struct amdgpu_device *adev,
return 0;
}
-void athub_v2_0_get_clockgating(struct amdgpu_device *adev, u32 *flags)
+void athub_v2_0_get_clockgating(struct amdgpu_device *adev, u64 *flags)
{
int data;
diff --git a/drivers/gpu/drm/amd/amdgpu/athub_v2_0.h b/drivers/gpu/drm/amd/amdgpu/athub_v2_0.h
index 02932c1c8bab..8b763f6dfd81 100644
--- a/drivers/gpu/drm/amd/amdgpu/athub_v2_0.h
+++ b/drivers/gpu/drm/amd/amdgpu/athub_v2_0.h
@@ -25,6 +25,6 @@
int athub_v2_0_set_clockgating(struct amdgpu_device *adev,
enum amd_clockgating_state state);
-void athub_v2_0_get_clockgating(struct amdgpu_device *adev, u32 *flags);
+void athub_v2_0_get_clockgating(struct amdgpu_device *adev, u64 *flags);
#endif
diff --git a/drivers/gpu/drm/amd/amdgpu/athub_v2_1.c b/drivers/gpu/drm/amd/amdgpu/athub_v2_1.c
index ad8e87d3d2cb..78508ae6a670 100644
--- a/drivers/gpu/drm/amd/amdgpu/athub_v2_1.c
+++ b/drivers/gpu/drm/amd/amdgpu/athub_v2_1.c
@@ -85,7 +85,7 @@ int athub_v2_1_set_clockgating(struct amdgpu_device *adev,
return 0;
}
-void athub_v2_1_get_clockgating(struct amdgpu_device *adev, u32 *flags)
+void athub_v2_1_get_clockgating(struct amdgpu_device *adev, u64 *flags)
{
int data;
diff --git a/drivers/gpu/drm/amd/amdgpu/athub_v2_1.h b/drivers/gpu/drm/amd/amdgpu/athub_v2_1.h
index 5e6824c0f591..b799f14bce03 100644
--- a/drivers/gpu/drm/amd/amdgpu/athub_v2_1.h
+++ b/drivers/gpu/drm/amd/amdgpu/athub_v2_1.h
@@ -25,6 +25,6 @@
int athub_v2_1_set_clockgating(struct amdgpu_device *adev,
enum amd_clockgating_state state);
-void athub_v2_1_get_clockgating(struct amdgpu_device *adev, u32 *flags);
+void athub_v2_1_get_clockgating(struct amdgpu_device *adev, u64 *flags);
#endif
diff --git a/drivers/gpu/drm/amd/amdgpu/atombios_dp.c b/drivers/gpu/drm/amd/amdgpu/atombios_dp.c
index 49a2f594fb2c..87c41e0e9b7c 100644
--- a/drivers/gpu/drm/amd/amdgpu/atombios_dp.c
+++ b/drivers/gpu/drm/amd/amdgpu/atombios_dp.c
@@ -26,6 +26,8 @@
*/
#include <drm/amdgpu_drm.h>
+#include <drm/display/drm_dp_helper.h>
+
#include "amdgpu.h"
#include "atom.h"
@@ -34,7 +36,6 @@
#include "atombios_dp.h"
#include "amdgpu_connectors.h"
#include "amdgpu_atombios.h"
-#include <drm/dp/drm_dp_helper.h>
/* move these to drm_dp_helper.c/h */
#define DP_LINK_CONFIGURATION_SIZE 9
diff --git a/drivers/gpu/drm/amd/amdgpu/atombios_encoders.c b/drivers/gpu/drm/amd/amdgpu/atombios_encoders.c
index a92d86e12718..d4f5a584075d 100644
--- a/drivers/gpu/drm/amd/amdgpu/atombios_encoders.c
+++ b/drivers/gpu/drm/amd/amdgpu/atombios_encoders.c
@@ -765,7 +765,6 @@ amdgpu_atombios_encoder_setup_dig_transmitter(struct drm_encoder *encoder, int a
int dp_clock = 0;
int dp_lane_count = 0;
int connector_object_id = 0;
- int igp_lane_info = 0;
int dig_encoder = dig->dig_encoder;
int hpd_id = AMDGPU_HPD_NONE;
@@ -848,26 +847,6 @@ amdgpu_atombios_encoder_setup_dig_transmitter(struct drm_encoder *encoder, int a
else
args.v1.ucConfig |= ATOM_TRANSMITTER_CONFIG_DIG1_ENCODER;
- if ((adev->flags & AMD_IS_APU) &&
- (amdgpu_encoder->encoder_id == ENCODER_OBJECT_ID_INTERNAL_UNIPHY)) {
- if (is_dp ||
- !amdgpu_dig_monitor_is_duallink(encoder, amdgpu_encoder->pixel_clock)) {
- if (igp_lane_info & 0x1)
- args.v1.ucConfig |= ATOM_TRANSMITTER_CONFIG_LANE_0_3;
- else if (igp_lane_info & 0x2)
- args.v1.ucConfig |= ATOM_TRANSMITTER_CONFIG_LANE_4_7;
- else if (igp_lane_info & 0x4)
- args.v1.ucConfig |= ATOM_TRANSMITTER_CONFIG_LANE_8_11;
- else if (igp_lane_info & 0x8)
- args.v1.ucConfig |= ATOM_TRANSMITTER_CONFIG_LANE_12_15;
- } else {
- if (igp_lane_info & 0x3)
- args.v1.ucConfig |= ATOM_TRANSMITTER_CONFIG_LANE_0_7;
- else if (igp_lane_info & 0xc)
- args.v1.ucConfig |= ATOM_TRANSMITTER_CONFIG_LANE_8_15;
- }
- }
-
if (dig->linkb)
args.v1.ucConfig |= ATOM_TRANSMITTER_CONFIG_LINKB;
else
diff --git a/drivers/gpu/drm/amd/amdgpu/cik_sdma.c b/drivers/gpu/drm/amd/amdgpu/cik_sdma.c
index c8ebd108548d..6c01199e9112 100644
--- a/drivers/gpu/drm/amd/amdgpu/cik_sdma.c
+++ b/drivers/gpu/drm/amd/amdgpu/cik_sdma.c
@@ -195,7 +195,7 @@ static void cik_sdma_ring_set_wptr(struct amdgpu_ring *ring)
struct amdgpu_device *adev = ring->adev;
WREG32(mmSDMA0_GFX_RB_WPTR + sdma_offsets[ring->me],
- (lower_32_bits(ring->wptr) << 2) & 0x3fffc);
+ (ring->wptr << 2) & 0x3fffc);
}
static void cik_sdma_ring_insert_nop(struct amdgpu_ring *ring, uint32_t count)
@@ -487,7 +487,7 @@ static int cik_sdma_gfx_resume(struct amdgpu_device *adev)
WREG32(mmSDMA0_GFX_RB_BASE_HI + sdma_offsets[i], ring->gpu_addr >> 40);
ring->wptr = 0;
- WREG32(mmSDMA0_GFX_RB_WPTR + sdma_offsets[i], lower_32_bits(ring->wptr) << 2);
+ WREG32(mmSDMA0_GFX_RB_WPTR + sdma_offsets[i], ring->wptr << 2);
/* enable DMA RB */
WREG32(mmSDMA0_GFX_RB_CNTL + sdma_offsets[i],
diff --git a/drivers/gpu/drm/amd/amdgpu/df_v1_7.c b/drivers/gpu/drm/amd/amdgpu/df_v1_7.c
index 2d01ac0d4c11..b991609f46c1 100644
--- a/drivers/gpu/drm/amd/amdgpu/df_v1_7.c
+++ b/drivers/gpu/drm/amd/amdgpu/df_v1_7.c
@@ -99,7 +99,7 @@ static void df_v1_7_update_medium_grain_clock_gating(struct amdgpu_device *adev,
}
static void df_v1_7_get_clockgating_state(struct amdgpu_device *adev,
- u32 *flags)
+ u64 *flags)
{
u32 tmp;
diff --git a/drivers/gpu/drm/amd/amdgpu/df_v3_6.c b/drivers/gpu/drm/amd/amdgpu/df_v3_6.c
index f4dfca013ec5..483a441b46aa 100644
--- a/drivers/gpu/drm/amd/amdgpu/df_v3_6.c
+++ b/drivers/gpu/drm/amd/amdgpu/df_v3_6.c
@@ -332,7 +332,7 @@ static void df_v3_6_update_medium_grain_clock_gating(struct amdgpu_device *adev,
}
static void df_v3_6_get_clockgating_state(struct amdgpu_device *adev,
- u32 *flags)
+ u64 *flags)
{
u32 tmp;
diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c
index 9426e252d8aa..407074f958f4 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c
@@ -3830,8 +3830,7 @@ static int gfx_v10_0_ring_test_ib(struct amdgpu_ring *ring, long timeout)
gpu_addr = adev->wb.gpu_addr + (index * 4);
adev->wb.wb[index] = cpu_to_le32(0xCAFEDEAD);
memset(&ib, 0, sizeof(ib));
- r = amdgpu_ib_get(adev, NULL, 16,
- AMDGPU_IB_POOL_DIRECT, &ib);
+ r = amdgpu_ib_get(adev, NULL, 20, AMDGPU_IB_POOL_DIRECT, &ib);
if (r)
goto err1;
@@ -4741,7 +4740,7 @@ static int gfx_v10_0_compute_ring_init(struct amdgpu_device *adev, int ring_id,
+ ((ring->me - 1) * adev->gfx.mec.num_pipe_per_mec)
+ ring->pipe;
hw_prio = amdgpu_gfx_is_high_priority_compute_queue(adev, ring) ?
- AMDGPU_GFX_PIPE_PRIO_HIGH : AMDGPU_GFX_PIPE_PRIO_NORMAL;
+ AMDGPU_RING_PRIO_2 : AMDGPU_RING_PRIO_DEFAULT;
/* type-2 packets are deprecated on MEC, use type-3 instead */
r = amdgpu_ring_init(adev, ring, 1024, &adev->gfx.eop_irq, irq_type,
hw_prio, NULL);
@@ -8451,7 +8450,7 @@ static int gfx_v10_0_set_clockgating_state(void *handle,
return 0;
}
-static void gfx_v10_0_get_clockgating_state(void *handle, u32 *flags)
+static void gfx_v10_0_get_clockgating_state(void *handle, u64 *flags)
{
struct amdgpu_device *adev = (struct amdgpu_device *)handle;
int data;
diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c
index 5f112efda634..25dc729d0ec2 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c
@@ -1925,7 +1925,7 @@ static int gfx_v8_0_compute_ring_init(struct amdgpu_device *adev, int ring_id,
+ ring->pipe;
hw_prio = amdgpu_gfx_is_high_priority_compute_queue(adev, ring) ?
- AMDGPU_GFX_PIPE_PRIO_HIGH : AMDGPU_RING_PRIO_DEFAULT;
+ AMDGPU_RING_PRIO_2 : AMDGPU_RING_PRIO_DEFAULT;
/* type-2 packets are deprecated on MEC, use type-3 instead */
r = amdgpu_ring_init(adev, ring, 1024, &adev->gfx.eop_irq, irq_type,
hw_prio, NULL);
@@ -5475,7 +5475,7 @@ static int gfx_v8_0_set_powergating_state(void *handle,
return 0;
}
-static void gfx_v8_0_get_clockgating_state(void *handle, u32 *flags)
+static void gfx_v8_0_get_clockgating_state(void *handle, u64 *flags)
{
struct amdgpu_device *adev = (struct amdgpu_device *)handle;
int data;
diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
index 46d4bf27ebbb..d58fd83524ac 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
@@ -1205,6 +1205,8 @@ static const struct amdgpu_gfxoff_quirk amdgpu_gfxoff_quirk_list[] = {
{ 0x1002, 0x15dd, 0x103c, 0x83e7, 0xd3 },
/* GFXOFF is unstable on C6 parts with a VBIOS 113-RAVEN-114 */
{ 0x1002, 0x15dd, 0x1002, 0x15dd, 0xc6 },
+ /* Apple MacBook Pro (15-inch, 2019) Radeon Pro Vega 20 4 GB */
+ { 0x1002, 0x69af, 0x106b, 0x019a, 0xc0 },
{ 0, 0, 0, 0, 0 },
};
@@ -2274,7 +2276,7 @@ static int gfx_v9_0_compute_ring_init(struct amdgpu_device *adev, int ring_id,
+ ((ring->me - 1) * adev->gfx.mec.num_pipe_per_mec)
+ ring->pipe;
hw_prio = amdgpu_gfx_is_high_priority_compute_queue(adev, ring) ?
- AMDGPU_GFX_PIPE_PRIO_HIGH : AMDGPU_GFX_PIPE_PRIO_NORMAL;
+ AMDGPU_RING_PRIO_2 : AMDGPU_RING_PRIO_DEFAULT;
/* type-2 packets are deprecated on MEC, use type-3 instead */
return amdgpu_ring_init(adev, ring, 1024, &adev->gfx.eop_irq, irq_type,
hw_prio, NULL);
@@ -5231,7 +5233,7 @@ static int gfx_v9_0_set_clockgating_state(void *handle,
return 0;
}
-static void gfx_v9_0_get_clockgating_state(void *handle, u32 *flags)
+static void gfx_v9_0_get_clockgating_state(void *handle, u64 *flags)
{
struct amdgpu_device *adev = (struct amdgpu_device *)handle;
int data;
diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_2.c b/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_2.c
index 7653ebd0e67b..3a797424579c 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_2.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_2.c
@@ -1930,6 +1930,19 @@ static void gfx_v9_4_2_reset_sq_timeout_status(struct amdgpu_device *adev)
mutex_unlock(&adev->grbm_idx_mutex);
}
+static bool gfx_v9_4_2_query_uctl2_poison_status(struct amdgpu_device *adev)
+{
+ u32 status = 0;
+ struct amdgpu_vmhub *hub;
+
+ hub = &adev->vmhub[AMDGPU_GFXHUB_0];
+ status = RREG32(hub->vm_l2_pro_fault_status);
+ /* reset page fault status */
+ WREG32_P(hub->vm_l2_pro_fault_cntl, 1, ~1);
+
+ return REG_GET_FIELD(status, VM_L2_PROTECTION_FAULT_STATUS, FED);
+}
+
struct amdgpu_ras_block_hw_ops gfx_v9_4_2_ras_ops = {
.ras_error_inject = &gfx_v9_4_2_ras_error_inject,
.query_ras_error_count = &gfx_v9_4_2_query_ras_error_count,
@@ -1943,4 +1956,5 @@ struct amdgpu_gfx_ras gfx_v9_4_2_ras = {
.hw_ops = &gfx_v9_4_2_ras_ops,
},
.enable_watchdog_timer = &gfx_v9_4_2_enable_watchdog_timer,
+ .query_utcl2_poison_status = gfx_v9_4_2_query_uctl2_poison_status,
};
diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c
index 5228421b0f72..487c33937a87 100644
--- a/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c
@@ -884,6 +884,24 @@ static int gmc_v10_0_sw_init(void *handle)
}
switch (adev->ip_versions[GC_HWIP][0]) {
+ case IP_VERSION(10, 3, 0):
+ adev->gmc.mall_size = 128 * 1024 * 1024;
+ break;
+ case IP_VERSION(10, 3, 2):
+ adev->gmc.mall_size = 96 * 1024 * 1024;
+ break;
+ case IP_VERSION(10, 3, 4):
+ adev->gmc.mall_size = 32 * 1024 * 1024;
+ break;
+ case IP_VERSION(10, 3, 5):
+ adev->gmc.mall_size = 16 * 1024 * 1024;
+ break;
+ default:
+ adev->gmc.mall_size = 0;
+ break;
+ }
+
+ switch (adev->ip_versions[GC_HWIP][0]) {
case IP_VERSION(10, 1, 10):
case IP_VERSION(10, 1, 1):
case IP_VERSION(10, 1, 2):
@@ -1151,6 +1169,16 @@ static int gmc_v10_0_set_clockgating_state(void *handle,
int r;
struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ /*
+ * The issue mmhub can't disconnect from DF with MMHUB clock gating being disabled
+ * is a new problem observed at DF 3.0.3, however with the same suspend sequence not
+ * seen any issue on the DF 3.0.2 series platform.
+ */
+ if (adev->in_s0ix && adev->ip_versions[DF_HWIP][0] > IP_VERSION(3, 0, 2)) {
+ dev_dbg(adev->dev, "keep mmhub clock gating being enabled for s0ix\n");
+ return 0;
+ }
+
r = adev->mmhub.funcs->set_clockgating(adev, state);
if (r)
return r;
@@ -1161,7 +1189,7 @@ static int gmc_v10_0_set_clockgating_state(void *handle,
return athub_v2_0_set_clockgating(adev, state);
}
-static void gmc_v10_0_get_clockgating_state(void *handle, u32 *flags)
+static void gmc_v10_0_get_clockgating_state(void *handle, u64 *flags)
{
struct amdgpu_device *adev = (struct amdgpu_device *)handle;
diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c
index 1932a3e4af7e..382dde1ce74c 100644
--- a/drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c
@@ -1690,7 +1690,7 @@ static int gmc_v8_0_set_powergating_state(void *handle,
return 0;
}
-static void gmc_v8_0_get_clockgating_state(void *handle, u32 *flags)
+static void gmc_v8_0_get_clockgating_state(void *handle, u64 *flags)
{
struct amdgpu_device *adev = (struct amdgpu_device *)handle;
int data;
diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
index 6009fbfdcc19..22761a3bb818 100644
--- a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
@@ -1948,7 +1948,7 @@ static int gmc_v9_0_set_clockgating_state(void *handle,
return 0;
}
-static void gmc_v9_0_get_clockgating_state(void *handle, u32 *flags)
+static void gmc_v9_0_get_clockgating_state(void *handle, u64 *flags)
{
struct amdgpu_device *adev = (struct amdgpu_device *)handle;
diff --git a/drivers/gpu/drm/amd/amdgpu/hdp_v4_0.c b/drivers/gpu/drm/amd/amdgpu/hdp_v4_0.c
index 046216635262..adf89680f53e 100644
--- a/drivers/gpu/drm/amd/amdgpu/hdp_v4_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/hdp_v4_0.c
@@ -124,7 +124,7 @@ static void hdp_v4_0_update_clock_gating(struct amdgpu_device *adev,
}
static void hdp_v4_0_get_clockgating_state(struct amdgpu_device *adev,
- u32 *flags)
+ u64 *flags)
{
int data;
diff --git a/drivers/gpu/drm/amd/amdgpu/hdp_v5_0.c b/drivers/gpu/drm/amd/amdgpu/hdp_v5_0.c
index 5793977953cc..a9ea23fa0def 100644
--- a/drivers/gpu/drm/amd/amdgpu/hdp_v5_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/hdp_v5_0.c
@@ -181,7 +181,7 @@ static void hdp_v5_0_update_clock_gating(struct amdgpu_device *adev,
}
static void hdp_v5_0_get_clockgating_state(struct amdgpu_device *adev,
- u32 *flags)
+ u64 *flags)
{
uint32_t tmp;
diff --git a/drivers/gpu/drm/amd/amdgpu/jpeg_v2_5.c b/drivers/gpu/drm/amd/amdgpu/jpeg_v2_5.c
index a29c86617fb5..8c3227d0b8b4 100644
--- a/drivers/gpu/drm/amd/amdgpu/jpeg_v2_5.c
+++ b/drivers/gpu/drm/amd/amdgpu/jpeg_v2_5.c
@@ -26,6 +26,7 @@
#include "soc15.h"
#include "soc15d.h"
#include "jpeg_v2_0.h"
+#include "jpeg_v2_5.h"
#include "vcn/vcn_2_5_offset.h"
#include "vcn/vcn_2_5_sh_mask.h"
@@ -39,6 +40,7 @@ static void jpeg_v2_5_set_dec_ring_funcs(struct amdgpu_device *adev);
static void jpeg_v2_5_set_irq_funcs(struct amdgpu_device *adev);
static int jpeg_v2_5_set_powergating_state(void *handle,
enum amd_powergating_state state);
+static void jpeg_v2_5_set_ras_funcs(struct amdgpu_device *adev);
static int amdgpu_ih_clientid_jpeg[] = {
SOC15_IH_CLIENTID_VCN,
@@ -70,6 +72,7 @@ static int jpeg_v2_5_early_init(void *handle)
jpeg_v2_5_set_dec_ring_funcs(adev);
jpeg_v2_5_set_irq_funcs(adev);
+ jpeg_v2_5_set_ras_funcs(adev);
return 0;
}
@@ -730,3 +733,74 @@ const struct amdgpu_ip_block_version jpeg_v2_6_ip_block =
.rev = 0,
.funcs = &jpeg_v2_6_ip_funcs,
};
+
+static uint32_t jpeg_v2_6_query_poison_by_instance(struct amdgpu_device *adev,
+ uint32_t instance, uint32_t sub_block)
+{
+ uint32_t poison_stat = 0, reg_value = 0;
+
+ switch (sub_block) {
+ case AMDGPU_JPEG_V2_6_JPEG0:
+ reg_value = RREG32_SOC15(JPEG, instance, mmUVD_RAS_JPEG0_STATUS);
+ poison_stat = REG_GET_FIELD(reg_value, UVD_RAS_JPEG0_STATUS, POISONED_PF);
+ break;
+ case AMDGPU_JPEG_V2_6_JPEG1:
+ reg_value = RREG32_SOC15(JPEG, instance, mmUVD_RAS_JPEG1_STATUS);
+ poison_stat = REG_GET_FIELD(reg_value, UVD_RAS_JPEG1_STATUS, POISONED_PF);
+ break;
+ default:
+ break;
+ }
+
+ if (poison_stat)
+ dev_info(adev->dev, "Poison detected in JPEG%d sub_block%d\n",
+ instance, sub_block);
+
+ return poison_stat;
+}
+
+static bool jpeg_v2_6_query_ras_poison_status(struct amdgpu_device *adev)
+{
+ uint32_t inst = 0, sub = 0, poison_stat = 0;
+
+ for (inst = 0; inst < adev->jpeg.num_jpeg_inst; inst++)
+ for (sub = 0; sub < AMDGPU_JPEG_V2_6_MAX_SUB_BLOCK; sub++)
+ poison_stat +=
+ jpeg_v2_6_query_poison_by_instance(adev, inst, sub);
+
+ return !!poison_stat;
+}
+
+const struct amdgpu_ras_block_hw_ops jpeg_v2_6_ras_hw_ops = {
+ .query_poison_status = jpeg_v2_6_query_ras_poison_status,
+};
+
+static struct amdgpu_jpeg_ras jpeg_v2_6_ras = {
+ .ras_block = {
+ .hw_ops = &jpeg_v2_6_ras_hw_ops,
+ },
+};
+
+static void jpeg_v2_5_set_ras_funcs(struct amdgpu_device *adev)
+{
+ switch (adev->ip_versions[JPEG_HWIP][0]) {
+ case IP_VERSION(2, 6, 0):
+ adev->jpeg.ras = &jpeg_v2_6_ras;
+ break;
+ default:
+ break;
+ }
+
+ if (adev->jpeg.ras) {
+ amdgpu_ras_register_ras_block(adev, &adev->jpeg.ras->ras_block);
+
+ strcpy(adev->jpeg.ras->ras_block.ras_comm.name, "jpeg");
+ adev->jpeg.ras->ras_block.ras_comm.block = AMDGPU_RAS_BLOCK__JPEG;
+ adev->jpeg.ras->ras_block.ras_comm.type = AMDGPU_RAS_ERROR__POISON;
+ adev->jpeg.ras_if = &adev->jpeg.ras->ras_block.ras_comm;
+
+ /* If don't define special ras_late_init function, use default ras_late_init */
+ if (!adev->jpeg.ras->ras_block.ras_late_init)
+ adev->jpeg.ras->ras_block.ras_late_init = amdgpu_ras_block_late_init;
+ }
+}
diff --git a/drivers/gpu/drm/amd/amdgpu/jpeg_v2_5.h b/drivers/gpu/drm/amd/amdgpu/jpeg_v2_5.h
index 3b0aa29b9879..1e858c6cdf13 100644
--- a/drivers/gpu/drm/amd/amdgpu/jpeg_v2_5.h
+++ b/drivers/gpu/drm/amd/amdgpu/jpeg_v2_5.h
@@ -24,6 +24,13 @@
#ifndef __JPEG_V2_5_H__
#define __JPEG_V2_5_H__
+enum amdgpu_jpeg_v2_6_sub_block {
+ AMDGPU_JPEG_V2_6_JPEG0 = 0,
+ AMDGPU_JPEG_V2_6_JPEG1,
+
+ AMDGPU_JPEG_V2_6_MAX_SUB_BLOCK,
+};
+
extern const struct amdgpu_ip_block_version jpeg_v2_5_ip_block;
extern const struct amdgpu_ip_block_version jpeg_v2_6_ip_block;
diff --git a/drivers/gpu/drm/amd/amdgpu/mmhub_v1_0.c b/drivers/gpu/drm/amd/amdgpu/mmhub_v1_0.c
index 4c9f0c0f3116..3f44a099c52a 100644
--- a/drivers/gpu/drm/amd/amdgpu/mmhub_v1_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/mmhub_v1_0.c
@@ -546,7 +546,7 @@ static int mmhub_v1_0_set_clockgating(struct amdgpu_device *adev,
return 0;
}
-static void mmhub_v1_0_get_clockgating(struct amdgpu_device *adev, u32 *flags)
+static void mmhub_v1_0_get_clockgating(struct amdgpu_device *adev, u64 *flags)
{
int data, data1;
diff --git a/drivers/gpu/drm/amd/amdgpu/mmhub_v1_7.c b/drivers/gpu/drm/amd/amdgpu/mmhub_v1_7.c
index 3b901f941627..6fa7090bc6cb 100644
--- a/drivers/gpu/drm/amd/amdgpu/mmhub_v1_7.c
+++ b/drivers/gpu/drm/amd/amdgpu/mmhub_v1_7.c
@@ -542,7 +542,7 @@ static int mmhub_v1_7_set_clockgating(struct amdgpu_device *adev,
return 0;
}
-static void mmhub_v1_7_get_clockgating(struct amdgpu_device *adev, u32 *flags)
+static void mmhub_v1_7_get_clockgating(struct amdgpu_device *adev, u64 *flags)
{
int data, data1;
diff --git a/drivers/gpu/drm/amd/amdgpu/mmhub_v2_0.c b/drivers/gpu/drm/amd/amdgpu/mmhub_v2_0.c
index 3718ff610ab2..636abd855686 100644
--- a/drivers/gpu/drm/amd/amdgpu/mmhub_v2_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/mmhub_v2_0.c
@@ -682,7 +682,7 @@ static int mmhub_v2_0_set_clockgating(struct amdgpu_device *adev,
return 0;
}
-static void mmhub_v2_0_get_clockgating(struct amdgpu_device *adev, u32 *flags)
+static void mmhub_v2_0_get_clockgating(struct amdgpu_device *adev, u64 *flags)
{
int data, data1;
diff --git a/drivers/gpu/drm/amd/amdgpu/mmhub_v2_3.c b/drivers/gpu/drm/amd/amdgpu/mmhub_v2_3.c
index 1957fb098c4d..ff44c5364a8c 100644
--- a/drivers/gpu/drm/amd/amdgpu/mmhub_v2_3.c
+++ b/drivers/gpu/drm/amd/amdgpu/mmhub_v2_3.c
@@ -577,7 +577,7 @@ static int mmhub_v2_3_set_clockgating(struct amdgpu_device *adev,
return 0;
}
-static void mmhub_v2_3_get_clockgating(struct amdgpu_device *adev, u32 *flags)
+static void mmhub_v2_3_get_clockgating(struct amdgpu_device *adev, u64 *flags)
{
int data, data1, data2, data3;
diff --git a/drivers/gpu/drm/amd/amdgpu/mmhub_v9_4.c b/drivers/gpu/drm/amd/amdgpu/mmhub_v9_4.c
index 619106f7d23d..6e0145b2b408 100644
--- a/drivers/gpu/drm/amd/amdgpu/mmhub_v9_4.c
+++ b/drivers/gpu/drm/amd/amdgpu/mmhub_v9_4.c
@@ -647,7 +647,7 @@ static int mmhub_v9_4_set_clockgating(struct amdgpu_device *adev,
return 0;
}
-static void mmhub_v9_4_get_clockgating(struct amdgpu_device *adev, u32 *flags)
+static void mmhub_v9_4_get_clockgating(struct amdgpu_device *adev, u64 *flags)
{
int data, data1;
diff --git a/drivers/gpu/drm/amd/amdgpu/navi10_ih.c b/drivers/gpu/drm/amd/amdgpu/navi10_ih.c
index 8ce5b8ca1fd7..97201ab0965e 100644
--- a/drivers/gpu/drm/amd/amdgpu/navi10_ih.c
+++ b/drivers/gpu/drm/amd/amdgpu/navi10_ih.c
@@ -685,7 +685,7 @@ static int navi10_ih_set_powergating_state(void *handle,
return 0;
}
-static void navi10_ih_get_clockgating_state(void *handle, u32 *flags)
+static void navi10_ih_get_clockgating_state(void *handle, u64 *flags)
{
struct amdgpu_device *adev = (struct amdgpu_device *)handle;
diff --git a/drivers/gpu/drm/amd/amdgpu/nbio_v2_3.c b/drivers/gpu/drm/amd/amdgpu/nbio_v2_3.c
index ee7cab37dfd5..6cd1fb2eb913 100644
--- a/drivers/gpu/drm/amd/amdgpu/nbio_v2_3.c
+++ b/drivers/gpu/drm/amd/amdgpu/nbio_v2_3.c
@@ -278,7 +278,7 @@ static void nbio_v2_3_update_medium_grain_light_sleep(struct amdgpu_device *adev
}
static void nbio_v2_3_get_clockgating_state(struct amdgpu_device *adev,
- u32 *flags)
+ u64 *flags)
{
int data;
diff --git a/drivers/gpu/drm/amd/amdgpu/nbio_v6_1.c b/drivers/gpu/drm/amd/amdgpu/nbio_v6_1.c
index 4bbacf1be25a..f7f6ddebd3e4 100644
--- a/drivers/gpu/drm/amd/amdgpu/nbio_v6_1.c
+++ b/drivers/gpu/drm/amd/amdgpu/nbio_v6_1.c
@@ -210,7 +210,7 @@ static void nbio_v6_1_update_medium_grain_light_sleep(struct amdgpu_device *adev
}
static void nbio_v6_1_get_clockgating_state(struct amdgpu_device *adev,
- u32 *flags)
+ u64 *flags)
{
int data;
diff --git a/drivers/gpu/drm/amd/amdgpu/nbio_v7_0.c b/drivers/gpu/drm/amd/amdgpu/nbio_v7_0.c
index 37a4039fdfc5..aa0326d00c72 100644
--- a/drivers/gpu/drm/amd/amdgpu/nbio_v7_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/nbio_v7_0.c
@@ -205,7 +205,7 @@ static void nbio_v7_0_update_medium_grain_light_sleep(struct amdgpu_device *adev
}
static void nbio_v7_0_get_clockgating_state(struct amdgpu_device *adev,
- u32 *flags)
+ u64 *flags)
{
int data;
diff --git a/drivers/gpu/drm/amd/amdgpu/nbio_v7_2.c b/drivers/gpu/drm/amd/amdgpu/nbio_v7_2.c
index 6f81de6f3cc4..31776b12e4c4 100644
--- a/drivers/gpu/drm/amd/amdgpu/nbio_v7_2.c
+++ b/drivers/gpu/drm/amd/amdgpu/nbio_v7_2.c
@@ -306,7 +306,7 @@ static void nbio_v7_2_update_medium_grain_light_sleep(struct amdgpu_device *adev
}
static void nbio_v7_2_get_clockgating_state(struct amdgpu_device *adev,
- u32 *flags)
+ u64 *flags)
{
int data;
diff --git a/drivers/gpu/drm/amd/amdgpu/nbio_v7_4.c b/drivers/gpu/drm/amd/amdgpu/nbio_v7_4.c
index c2357e83a8c4..4531761dcf77 100644
--- a/drivers/gpu/drm/amd/amdgpu/nbio_v7_4.c
+++ b/drivers/gpu/drm/amd/amdgpu/nbio_v7_4.c
@@ -273,7 +273,7 @@ static void nbio_v7_4_update_medium_grain_light_sleep(struct amdgpu_device *adev
}
static void nbio_v7_4_get_clockgating_state(struct amdgpu_device *adev,
- u32 *flags)
+ u64 *flags)
{
int data;
diff --git a/drivers/gpu/drm/amd/amdgpu/nv.c b/drivers/gpu/drm/amd/amdgpu/nv.c
index e19f14c3ef59..0a7946c59a42 100644
--- a/drivers/gpu/drm/amd/amdgpu/nv.c
+++ b/drivers/gpu/drm/amd/amdgpu/nv.c
@@ -1115,7 +1115,7 @@ static int nv_common_set_powergating_state(void *handle,
return 0;
}
-static void nv_common_get_clockgating_state(void *handle, u32 *flags)
+static void nv_common_get_clockgating_state(void *handle, u64 *flags)
{
struct amdgpu_device *adev = (struct amdgpu_device *)handle;
diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v2_4.c b/drivers/gpu/drm/amd/amdgpu/sdma_v2_4.c
index 1d8bbcbd7a37..84b57b06b20c 100644
--- a/drivers/gpu/drm/amd/amdgpu/sdma_v2_4.c
+++ b/drivers/gpu/drm/amd/amdgpu/sdma_v2_4.c
@@ -223,7 +223,7 @@ static void sdma_v2_4_ring_set_wptr(struct amdgpu_ring *ring)
{
struct amdgpu_device *adev = ring->adev;
- WREG32(mmSDMA0_GFX_RB_WPTR + sdma_offsets[ring->me], lower_32_bits(ring->wptr) << 2);
+ WREG32(mmSDMA0_GFX_RB_WPTR + sdma_offsets[ring->me], ring->wptr << 2);
}
static void sdma_v2_4_ring_insert_nop(struct amdgpu_ring *ring, uint32_t count)
@@ -465,7 +465,7 @@ static int sdma_v2_4_gfx_resume(struct amdgpu_device *adev)
WREG32(mmSDMA0_GFX_RB_BASE_HI + sdma_offsets[i], ring->gpu_addr >> 40);
ring->wptr = 0;
- WREG32(mmSDMA0_GFX_RB_WPTR + sdma_offsets[i], lower_32_bits(ring->wptr) << 2);
+ WREG32(mmSDMA0_GFX_RB_WPTR + sdma_offsets[i], ring->wptr << 2);
/* enable DMA RB */
rb_cntl = REG_SET_FIELD(rb_cntl, SDMA0_GFX_RB_CNTL, RB_ENABLE, 1);
diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v3_0.c b/drivers/gpu/drm/amd/amdgpu/sdma_v3_0.c
index 4ef4feff5649..8af5c94d526a 100644
--- a/drivers/gpu/drm/amd/amdgpu/sdma_v3_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/sdma_v3_0.c
@@ -389,14 +389,14 @@ static void sdma_v3_0_ring_set_wptr(struct amdgpu_ring *ring)
if (ring->use_doorbell) {
u32 *wb = (u32 *)&adev->wb.wb[ring->wptr_offs];
/* XXX check if swapping is necessary on BE */
- WRITE_ONCE(*wb, (lower_32_bits(ring->wptr) << 2));
- WDOORBELL32(ring->doorbell_index, lower_32_bits(ring->wptr) << 2);
+ WRITE_ONCE(*wb, ring->wptr << 2);
+ WDOORBELL32(ring->doorbell_index, ring->wptr << 2);
} else if (ring->use_pollmem) {
u32 *wb = (u32 *)&adev->wb.wb[ring->wptr_offs];
- WRITE_ONCE(*wb, (lower_32_bits(ring->wptr) << 2));
+ WRITE_ONCE(*wb, ring->wptr << 2);
} else {
- WREG32(mmSDMA0_GFX_RB_WPTR + sdma_offsets[ring->me], lower_32_bits(ring->wptr) << 2);
+ WREG32(mmSDMA0_GFX_RB_WPTR + sdma_offsets[ring->me], ring->wptr << 2);
}
}
@@ -1535,7 +1535,7 @@ static int sdma_v3_0_set_powergating_state(void *handle,
return 0;
}
-static void sdma_v3_0_get_clockgating_state(void *handle, u32 *flags)
+static void sdma_v3_0_get_clockgating_state(void *handle, u64 *flags)
{
struct amdgpu_device *adev = (struct amdgpu_device *)handle;
int data;
diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c b/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c
index d7e8f7232364..80de85847712 100644
--- a/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c
@@ -772,8 +772,8 @@ static void sdma_v4_0_ring_set_wptr(struct amdgpu_ring *ring)
DRM_DEBUG("Using doorbell -- "
"wptr_offs == 0x%08x "
- "lower_32_bits(ring->wptr) << 2 == 0x%08x "
- "upper_32_bits(ring->wptr) << 2 == 0x%08x\n",
+ "lower_32_bits(ring->wptr << 2) == 0x%08x "
+ "upper_32_bits(ring->wptr << 2) == 0x%08x\n",
ring->wptr_offs,
lower_32_bits(ring->wptr << 2),
upper_32_bits(ring->wptr << 2));
@@ -2372,7 +2372,7 @@ static int sdma_v4_0_set_powergating_state(void *handle,
return 0;
}
-static void sdma_v4_0_get_clockgating_state(void *handle, u32 *flags)
+static void sdma_v4_0_get_clockgating_state(void *handle, u64 *flags)
{
struct amdgpu_device *adev = (struct amdgpu_device *)handle;
int data;
diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v5_0.c b/drivers/gpu/drm/amd/amdgpu/sdma_v5_0.c
index a8d49c005f73..d3939c5f531d 100644
--- a/drivers/gpu/drm/amd/amdgpu/sdma_v5_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/sdma_v5_0.c
@@ -394,8 +394,8 @@ static void sdma_v5_0_ring_set_wptr(struct amdgpu_ring *ring)
if (ring->use_doorbell) {
DRM_DEBUG("Using doorbell -- "
"wptr_offs == 0x%08x "
- "lower_32_bits(ring->wptr) << 2 == 0x%08x "
- "upper_32_bits(ring->wptr) << 2 == 0x%08x\n",
+ "lower_32_bits(ring->wptr << 2) == 0x%08x "
+ "upper_32_bits(ring->wptr << 2) == 0x%08x\n",
ring->wptr_offs,
lower_32_bits(ring->wptr << 2),
upper_32_bits(ring->wptr << 2));
@@ -774,9 +774,9 @@ static int sdma_v5_0_gfx_resume(struct amdgpu_device *adev)
if (!amdgpu_sriov_vf(adev)) { /* only bare-metal use register write for wptr */
WREG32(sdma_v5_0_get_reg_offset(adev, i, mmSDMA0_GFX_RB_WPTR),
- lower_32_bits(ring->wptr) << 2);
+ lower_32_bits(ring->wptr << 2));
WREG32(sdma_v5_0_get_reg_offset(adev, i, mmSDMA0_GFX_RB_WPTR_HI),
- upper_32_bits(ring->wptr) << 2);
+ upper_32_bits(ring->wptr << 2));
}
doorbell = RREG32_SOC15_IP(GC, sdma_v5_0_get_reg_offset(adev, i, mmSDMA0_GFX_DOORBELL));
@@ -1648,7 +1648,7 @@ static int sdma_v5_0_set_powergating_state(void *handle,
return 0;
}
-static void sdma_v5_0_get_clockgating_state(void *handle, u32 *flags)
+static void sdma_v5_0_get_clockgating_state(void *handle, u64 *flags)
{
struct amdgpu_device *adev = (struct amdgpu_device *)handle;
int data;
diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v5_2.c b/drivers/gpu/drm/amd/amdgpu/sdma_v5_2.c
index 824eace69884..8298926f8502 100644
--- a/drivers/gpu/drm/amd/amdgpu/sdma_v5_2.c
+++ b/drivers/gpu/drm/amd/amdgpu/sdma_v5_2.c
@@ -295,8 +295,8 @@ static void sdma_v5_2_ring_set_wptr(struct amdgpu_ring *ring)
if (ring->use_doorbell) {
DRM_DEBUG("Using doorbell -- "
"wptr_offs == 0x%08x "
- "lower_32_bits(ring->wptr) << 2 == 0x%08x "
- "upper_32_bits(ring->wptr) << 2 == 0x%08x\n",
+ "lower_32_bits(ring->wptr << 2) == 0x%08x "
+ "upper_32_bits(ring->wptr << 2) == 0x%08x\n",
ring->wptr_offs,
lower_32_bits(ring->wptr << 2),
upper_32_bits(ring->wptr << 2));
@@ -672,8 +672,8 @@ static int sdma_v5_2_gfx_resume(struct amdgpu_device *adev)
WREG32_SOC15_IP(GC, sdma_v5_2_get_reg_offset(adev, i, mmSDMA0_GFX_MINOR_PTR_UPDATE), 1);
if (!amdgpu_sriov_vf(adev)) { /* only bare-metal use register write for wptr */
- WREG32(sdma_v5_2_get_reg_offset(adev, i, mmSDMA0_GFX_RB_WPTR), lower_32_bits(ring->wptr) << 2);
- WREG32(sdma_v5_2_get_reg_offset(adev, i, mmSDMA0_GFX_RB_WPTR_HI), upper_32_bits(ring->wptr) << 2);
+ WREG32(sdma_v5_2_get_reg_offset(adev, i, mmSDMA0_GFX_RB_WPTR), lower_32_bits(ring->wptr << 2));
+ WREG32(sdma_v5_2_get_reg_offset(adev, i, mmSDMA0_GFX_RB_WPTR_HI), upper_32_bits(ring->wptr << 2));
}
doorbell = RREG32_SOC15_IP(GC, sdma_v5_2_get_reg_offset(adev, i, mmSDMA0_GFX_DOORBELL));
@@ -1645,7 +1645,7 @@ static int sdma_v5_2_set_powergating_state(void *handle,
return 0;
}
-static void sdma_v5_2_get_clockgating_state(void *handle, u32 *flags)
+static void sdma_v5_2_get_clockgating_state(void *handle, u64 *flags)
{
struct amdgpu_device *adev = (struct amdgpu_device *)handle;
int data;
diff --git a/drivers/gpu/drm/amd/amdgpu/si_dma.c b/drivers/gpu/drm/amd/amdgpu/si_dma.c
index 195b45bcb8ad..2f95235bbfb3 100644
--- a/drivers/gpu/drm/amd/amdgpu/si_dma.c
+++ b/drivers/gpu/drm/amd/amdgpu/si_dma.c
@@ -56,8 +56,7 @@ static void si_dma_ring_set_wptr(struct amdgpu_ring *ring)
struct amdgpu_device *adev = ring->adev;
u32 me = (ring == &adev->sdma.instance[0].ring) ? 0 : 1;
- WREG32(DMA_RB_WPTR + sdma_offsets[me],
- (lower_32_bits(ring->wptr) << 2) & 0x3fffc);
+ WREG32(DMA_RB_WPTR + sdma_offsets[me], (ring->wptr << 2) & 0x3fffc);
}
static void si_dma_ring_emit_ib(struct amdgpu_ring *ring,
@@ -175,7 +174,7 @@ static int si_dma_start(struct amdgpu_device *adev)
WREG32(DMA_CNTL + sdma_offsets[i], dma_cntl);
ring->wptr = 0;
- WREG32(DMA_RB_WPTR + sdma_offsets[i], lower_32_bits(ring->wptr) << 2);
+ WREG32(DMA_RB_WPTR + sdma_offsets[i], ring->wptr << 2);
WREG32(DMA_RB_CNTL + sdma_offsets[i], rb_cntl | DMA_RB_ENABLE);
ring->sched.ready = true;
diff --git a/drivers/gpu/drm/amd/amdgpu/smuio_v11_0.c b/drivers/gpu/drm/amd/amdgpu/smuio_v11_0.c
index b6f1322f908c..acdc40f99ab3 100644
--- a/drivers/gpu/drm/amd/amdgpu/smuio_v11_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/smuio_v11_0.c
@@ -59,7 +59,7 @@ static void smuio_v11_0_update_rom_clock_gating(struct amdgpu_device *adev, bool
WREG32_SOC15(SMUIO, 0, mmCGTT_ROM_CLK_CTRL0, data);
}
-static void smuio_v11_0_get_clock_gating_state(struct amdgpu_device *adev, u32 *flags)
+static void smuio_v11_0_get_clock_gating_state(struct amdgpu_device *adev, u64 *flags)
{
u32 data;
diff --git a/drivers/gpu/drm/amd/amdgpu/smuio_v11_0_6.c b/drivers/gpu/drm/amd/amdgpu/smuio_v11_0_6.c
index 3a18dbb55c32..2afeb8b37f62 100644
--- a/drivers/gpu/drm/amd/amdgpu/smuio_v11_0_6.c
+++ b/drivers/gpu/drm/amd/amdgpu/smuio_v11_0_6.c
@@ -56,7 +56,7 @@ static void smuio_v11_0_6_update_rom_clock_gating(struct amdgpu_device *adev, bo
WREG32_SOC15(SMUIO, 0, mmCGTT_ROM_CLK_CTRL0, data);
}
-static void smuio_v11_0_6_get_clock_gating_state(struct amdgpu_device *adev, u32 *flags)
+static void smuio_v11_0_6_get_clock_gating_state(struct amdgpu_device *adev, u64 *flags)
{
u32 data;
diff --git a/drivers/gpu/drm/amd/amdgpu/smuio_v13_0.c b/drivers/gpu/drm/amd/amdgpu/smuio_v13_0.c
index 39b7c206770f..13e905c22592 100644
--- a/drivers/gpu/drm/amd/amdgpu/smuio_v13_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/smuio_v13_0.c
@@ -58,7 +58,7 @@ static void smuio_v13_0_update_rom_clock_gating(struct amdgpu_device *adev, bool
WREG32_SOC15(SMUIO, 0, regCGTT_ROM_CLK_CTRL0, data);
}
-static void smuio_v13_0_get_clock_gating_state(struct amdgpu_device *adev, u32 *flags)
+static void smuio_v13_0_get_clock_gating_state(struct amdgpu_device *adev, u64 *flags)
{
u32 data;
diff --git a/drivers/gpu/drm/amd/amdgpu/smuio_v9_0.c b/drivers/gpu/drm/amd/amdgpu/smuio_v9_0.c
index 8417890af227..e4e30b9d481b 100644
--- a/drivers/gpu/drm/amd/amdgpu/smuio_v9_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/smuio_v9_0.c
@@ -56,7 +56,7 @@ static void smuio_v9_0_update_rom_clock_gating(struct amdgpu_device *adev, bool
WREG32_SOC15(SMUIO, 0, mmCGTT_ROM_CLK_CTRL0, data);
}
-static void smuio_v9_0_get_clock_gating_state(struct amdgpu_device *adev, u32 *flags)
+static void smuio_v9_0_get_clock_gating_state(struct amdgpu_device *adev, u64 *flags)
{
u32 data;
diff --git a/drivers/gpu/drm/amd/amdgpu/soc15.c b/drivers/gpu/drm/amd/amdgpu/soc15.c
index 3d0251ef8d79..fde6154f2009 100644
--- a/drivers/gpu/drm/amd/amdgpu/soc15.c
+++ b/drivers/gpu/drm/amd/amdgpu/soc15.c
@@ -701,25 +701,12 @@ static uint32_t soc15_get_rev_id(struct amdgpu_device *adev)
static void soc15_reg_base_init(struct amdgpu_device *adev)
{
- int r;
-
/* Set IP register base before any HW register access */
switch (adev->asic_type) {
case CHIP_VEGA10:
case CHIP_VEGA12:
case CHIP_RAVEN:
- vega10_reg_base_init(adev);
- break;
case CHIP_RENOIR:
- /* It's safe to do ip discovery here for Renoir,
- * it doesn't support SRIOV. */
- if (amdgpu_discovery) {
- r = amdgpu_discovery_reg_base_init(adev);
- if (r == 0)
- break;
- DRM_WARN("failed to init reg base from ip discovery table, "
- "fallback to legacy init method\n");
- }
vega10_reg_base_init(adev);
break;
case CHIP_VEGA20:
@@ -1419,7 +1406,7 @@ static int soc15_common_set_clockgating_state(void *handle,
return 0;
}
-static void soc15_common_get_clockgating_state(void *handle, u32 *flags)
+static void soc15_common_get_clockgating_state(void *handle, u64 *flags)
{
struct amdgpu_device *adev = (struct amdgpu_device *)handle;
int data;
diff --git a/drivers/gpu/drm/amd/amdgpu/soc15_common.h b/drivers/gpu/drm/amd/amdgpu/soc15_common.h
index acce8c2e0328..9fefd403e14f 100644
--- a/drivers/gpu/drm/amd/amdgpu/soc15_common.h
+++ b/drivers/gpu/drm/amd/amdgpu/soc15_common.h
@@ -45,6 +45,14 @@
~REG_FIELD_MASK(reg, field)) | (val) << REG_FIELD_SHIFT(reg, field), \
0, ip##_HWIP)
+#define WREG32_FIELD15_PREREG(ip, idx, reg_name, field, val) \
+ __WREG32_SOC15_RLC__(adev->reg_offset[ip##_HWIP][idx][reg##reg_name##_BASE_IDX] + reg##reg_name, \
+ (__RREG32_SOC15_RLC__( \
+ adev->reg_offset[ip##_HWIP][idx][reg##reg_name##_BASE_IDX] + reg##reg_name, \
+ 0, ip##_HWIP) & \
+ ~REG_FIELD_MASK(reg_name, field)) | (val) << REG_FIELD_SHIFT(reg_name, field), \
+ 0, ip##_HWIP)
+
#define RREG32_SOC15(ip, inst, reg) \
__RREG32_SOC15_RLC__(adev->reg_offset[ip##_HWIP][inst][reg##_BASE_IDX] + reg, \
0, ip##_HWIP)
diff --git a/drivers/gpu/drm/amd/amdgpu/soc21.c b/drivers/gpu/drm/amd/amdgpu/soc21.c
new file mode 100644
index 000000000000..a139fd1d3127
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/soc21.c
@@ -0,0 +1,620 @@
+/*
+ * Copyright 2021 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+#include <linux/firmware.h>
+#include <linux/slab.h>
+#include <linux/module.h>
+#include <linux/pci.h>
+
+#include "amdgpu.h"
+#include "amdgpu_atombios.h"
+#include "amdgpu_ih.h"
+#include "amdgpu_uvd.h"
+#include "amdgpu_vce.h"
+#include "amdgpu_ucode.h"
+#include "amdgpu_psp.h"
+#include "amdgpu_smu.h"
+#include "atom.h"
+#include "amd_pcie.h"
+
+#include "gc/gc_11_0_0_offset.h"
+#include "gc/gc_11_0_0_sh_mask.h"
+#include "mp/mp_13_0_0_offset.h"
+
+#include "soc15.h"
+#include "soc15_common.h"
+
+static const struct amd_ip_funcs soc21_common_ip_funcs;
+
+/*
+ * Indirect registers accessor
+ */
+static u32 soc21_pcie_rreg(struct amdgpu_device *adev, u32 reg)
+{
+ unsigned long address, data;
+ address = adev->nbio.funcs->get_pcie_index_offset(adev);
+ data = adev->nbio.funcs->get_pcie_data_offset(adev);
+
+ return amdgpu_device_indirect_rreg(adev, address, data, reg);
+}
+
+static void soc21_pcie_wreg(struct amdgpu_device *adev, u32 reg, u32 v)
+{
+ unsigned long address, data;
+
+ address = adev->nbio.funcs->get_pcie_index_offset(adev);
+ data = adev->nbio.funcs->get_pcie_data_offset(adev);
+
+ amdgpu_device_indirect_wreg(adev, address, data, reg, v);
+}
+
+static u64 soc21_pcie_rreg64(struct amdgpu_device *adev, u32 reg)
+{
+ unsigned long address, data;
+ address = adev->nbio.funcs->get_pcie_index_offset(adev);
+ data = adev->nbio.funcs->get_pcie_data_offset(adev);
+
+ return amdgpu_device_indirect_rreg64(adev, address, data, reg);
+}
+
+static void soc21_pcie_wreg64(struct amdgpu_device *adev, u32 reg, u64 v)
+{
+ unsigned long address, data;
+
+ address = adev->nbio.funcs->get_pcie_index_offset(adev);
+ data = adev->nbio.funcs->get_pcie_data_offset(adev);
+
+ amdgpu_device_indirect_wreg64(adev, address, data, reg, v);
+}
+
+static u32 soc21_didt_rreg(struct amdgpu_device *adev, u32 reg)
+{
+ unsigned long flags, address, data;
+ u32 r;
+
+ address = SOC15_REG_OFFSET(GC, 0, regDIDT_IND_INDEX);
+ data = SOC15_REG_OFFSET(GC, 0, regDIDT_IND_DATA);
+
+ spin_lock_irqsave(&adev->didt_idx_lock, flags);
+ WREG32(address, (reg));
+ r = RREG32(data);
+ spin_unlock_irqrestore(&adev->didt_idx_lock, flags);
+ return r;
+}
+
+static void soc21_didt_wreg(struct amdgpu_device *adev, u32 reg, u32 v)
+{
+ unsigned long flags, address, data;
+
+ address = SOC15_REG_OFFSET(GC, 0, regDIDT_IND_INDEX);
+ data = SOC15_REG_OFFSET(GC, 0, regDIDT_IND_DATA);
+
+ spin_lock_irqsave(&adev->didt_idx_lock, flags);
+ WREG32(address, (reg));
+ WREG32(data, (v));
+ spin_unlock_irqrestore(&adev->didt_idx_lock, flags);
+}
+
+static u32 soc21_get_config_memsize(struct amdgpu_device *adev)
+{
+ return adev->nbio.funcs->get_memsize(adev);
+}
+
+static u32 soc21_get_xclk(struct amdgpu_device *adev)
+{
+ return adev->clock.spll.reference_freq;
+}
+
+
+void soc21_grbm_select(struct amdgpu_device *adev,
+ u32 me, u32 pipe, u32 queue, u32 vmid)
+{
+ u32 grbm_gfx_cntl = 0;
+ grbm_gfx_cntl = REG_SET_FIELD(grbm_gfx_cntl, GRBM_GFX_CNTL, PIPEID, pipe);
+ grbm_gfx_cntl = REG_SET_FIELD(grbm_gfx_cntl, GRBM_GFX_CNTL, MEID, me);
+ grbm_gfx_cntl = REG_SET_FIELD(grbm_gfx_cntl, GRBM_GFX_CNTL, VMID, vmid);
+ grbm_gfx_cntl = REG_SET_FIELD(grbm_gfx_cntl, GRBM_GFX_CNTL, QUEUEID, queue);
+
+ WREG32(SOC15_REG_OFFSET(GC, 0, regGRBM_GFX_CNTL), grbm_gfx_cntl);
+}
+
+static void soc21_vga_set_state(struct amdgpu_device *adev, bool state)
+{
+ /* todo */
+}
+
+static bool soc21_read_disabled_bios(struct amdgpu_device *adev)
+{
+ /* todo */
+ return false;
+}
+
+static struct soc15_allowed_register_entry soc21_allowed_read_registers[] = {
+ { SOC15_REG_ENTRY(GC, 0, regGRBM_STATUS)},
+ { SOC15_REG_ENTRY(GC, 0, regGRBM_STATUS2)},
+ { SOC15_REG_ENTRY(GC, 0, regGRBM_STATUS_SE0)},
+ { SOC15_REG_ENTRY(GC, 0, regGRBM_STATUS_SE1)},
+ { SOC15_REG_ENTRY(GC, 0, regGRBM_STATUS_SE2)},
+ { SOC15_REG_ENTRY(GC, 0, regGRBM_STATUS_SE3)},
+ { SOC15_REG_ENTRY(SDMA0, 0, regSDMA0_STATUS_REG)},
+ { SOC15_REG_ENTRY(SDMA1, 0, regSDMA1_STATUS_REG)},
+ { SOC15_REG_ENTRY(GC, 0, regCP_STAT)},
+ { SOC15_REG_ENTRY(GC, 0, regCP_STALLED_STAT1)},
+ { SOC15_REG_ENTRY(GC, 0, regCP_STALLED_STAT2)},
+ { SOC15_REG_ENTRY(GC, 0, regCP_STALLED_STAT3)},
+ { SOC15_REG_ENTRY(GC, 0, regCP_CPF_BUSY_STAT)},
+ { SOC15_REG_ENTRY(GC, 0, regCP_CPF_STALLED_STAT1)},
+ { SOC15_REG_ENTRY(GC, 0, regCP_CPF_STATUS)},
+ { SOC15_REG_ENTRY(GC, 0, regCP_CPC_BUSY_STAT)},
+ { SOC15_REG_ENTRY(GC, 0, regCP_CPC_STALLED_STAT1)},
+ { SOC15_REG_ENTRY(GC, 0, regCP_CPC_STATUS)},
+ { SOC15_REG_ENTRY(GC, 0, regGB_ADDR_CONFIG)},
+};
+
+static uint32_t soc21_read_indexed_register(struct amdgpu_device *adev, u32 se_num,
+ u32 sh_num, u32 reg_offset)
+{
+ uint32_t val;
+
+ mutex_lock(&adev->grbm_idx_mutex);
+ if (se_num != 0xffffffff || sh_num != 0xffffffff)
+ amdgpu_gfx_select_se_sh(adev, se_num, sh_num, 0xffffffff);
+
+ val = RREG32(reg_offset);
+
+ if (se_num != 0xffffffff || sh_num != 0xffffffff)
+ amdgpu_gfx_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
+ mutex_unlock(&adev->grbm_idx_mutex);
+ return val;
+}
+
+static uint32_t soc21_get_register_value(struct amdgpu_device *adev,
+ bool indexed, u32 se_num,
+ u32 sh_num, u32 reg_offset)
+{
+ if (indexed) {
+ return soc21_read_indexed_register(adev, se_num, sh_num, reg_offset);
+ } else {
+ if (reg_offset == SOC15_REG_OFFSET(GC, 0, regGB_ADDR_CONFIG) && adev->gfx.config.gb_addr_config)
+ return adev->gfx.config.gb_addr_config;
+ return RREG32(reg_offset);
+ }
+}
+
+static int soc21_read_register(struct amdgpu_device *adev, u32 se_num,
+ u32 sh_num, u32 reg_offset, u32 *value)
+{
+ uint32_t i;
+ struct soc15_allowed_register_entry *en;
+
+ *value = 0;
+ for (i = 0; i < ARRAY_SIZE(soc21_allowed_read_registers); i++) {
+ en = &soc21_allowed_read_registers[i];
+ if (reg_offset !=
+ (adev->reg_offset[en->hwip][en->inst][en->seg] + en->reg_offset))
+ continue;
+
+ *value = soc21_get_register_value(adev,
+ soc21_allowed_read_registers[i].grbm_indexed,
+ se_num, sh_num, reg_offset);
+ return 0;
+ }
+ return -EINVAL;
+}
+
+#if 0
+static int soc21_asic_mode1_reset(struct amdgpu_device *adev)
+{
+ u32 i;
+ int ret = 0;
+
+ amdgpu_atombios_scratch_regs_engine_hung(adev, true);
+
+ /* disable BM */
+ pci_clear_master(adev->pdev);
+
+ amdgpu_device_cache_pci_state(adev->pdev);
+
+ if (amdgpu_dpm_is_mode1_reset_supported(adev)) {
+ dev_info(adev->dev, "GPU smu mode1 reset\n");
+ ret = amdgpu_dpm_mode1_reset(adev);
+ } else {
+ dev_info(adev->dev, "GPU psp mode1 reset\n");
+ ret = psp_gpu_reset(adev);
+ }
+
+ if (ret)
+ dev_err(adev->dev, "GPU mode1 reset failed\n");
+ amdgpu_device_load_pci_state(adev->pdev);
+
+ /* wait for asic to come out of reset */
+ for (i = 0; i < adev->usec_timeout; i++) {
+ u32 memsize = adev->nbio.funcs->get_memsize(adev);
+
+ if (memsize != 0xffffffff)
+ break;
+ udelay(1);
+ }
+
+ amdgpu_atombios_scratch_regs_engine_hung(adev, false);
+
+ return ret;
+}
+#endif
+
+static enum amd_reset_method
+soc21_asic_reset_method(struct amdgpu_device *adev)
+{
+ if (amdgpu_reset_method == AMD_RESET_METHOD_MODE1 ||
+ amdgpu_reset_method == AMD_RESET_METHOD_BACO)
+ return amdgpu_reset_method;
+
+ if (amdgpu_reset_method != -1)
+ dev_warn(adev->dev, "Specified reset method:%d isn't supported, using AUTO instead.\n",
+ amdgpu_reset_method);
+
+ switch (adev->ip_versions[MP1_HWIP][0]) {
+ case IP_VERSION(13, 0, 0):
+ return AMD_RESET_METHOD_MODE1;
+ default:
+ if (amdgpu_dpm_is_baco_supported(adev))
+ return AMD_RESET_METHOD_BACO;
+ else
+ return AMD_RESET_METHOD_MODE1;
+ }
+}
+
+static int soc21_asic_reset(struct amdgpu_device *adev)
+{
+ int ret = 0;
+
+ switch (soc21_asic_reset_method(adev)) {
+ case AMD_RESET_METHOD_PCI:
+ dev_info(adev->dev, "PCI reset\n");
+ ret = amdgpu_device_pci_reset(adev);
+ break;
+ case AMD_RESET_METHOD_BACO:
+ dev_info(adev->dev, "BACO reset\n");
+ ret = amdgpu_dpm_baco_reset(adev);
+ break;
+ default:
+ dev_info(adev->dev, "MODE1 reset\n");
+ ret = amdgpu_device_mode1_reset(adev);
+ break;
+ }
+
+ return ret;
+}
+
+static int soc21_set_uvd_clocks(struct amdgpu_device *adev, u32 vclk, u32 dclk)
+{
+ /* todo */
+ return 0;
+}
+
+static int soc21_set_vce_clocks(struct amdgpu_device *adev, u32 evclk, u32 ecclk)
+{
+ /* todo */
+ return 0;
+}
+
+static void soc21_pcie_gen3_enable(struct amdgpu_device *adev)
+{
+ if (pci_is_root_bus(adev->pdev->bus))
+ return;
+
+ if (amdgpu_pcie_gen2 == 0)
+ return;
+
+ if (!(adev->pm.pcie_gen_mask & (CAIL_PCIE_LINK_SPEED_SUPPORT_GEN2 |
+ CAIL_PCIE_LINK_SPEED_SUPPORT_GEN3)))
+ return;
+
+ /* todo */
+}
+
+static void soc21_program_aspm(struct amdgpu_device *adev)
+{
+
+ if (amdgpu_aspm == 0)
+ return;
+
+ /* todo */
+}
+
+static void soc21_enable_doorbell_aperture(struct amdgpu_device *adev,
+ bool enable)
+{
+ adev->nbio.funcs->enable_doorbell_aperture(adev, enable);
+ adev->nbio.funcs->enable_doorbell_selfring_aperture(adev, enable);
+}
+
+const struct amdgpu_ip_block_version soc21_common_ip_block =
+{
+ .type = AMD_IP_BLOCK_TYPE_COMMON,
+ .major = 1,
+ .minor = 0,
+ .rev = 0,
+ .funcs = &soc21_common_ip_funcs,
+};
+
+static uint32_t soc21_get_rev_id(struct amdgpu_device *adev)
+{
+ return adev->nbio.funcs->get_rev_id(adev);
+}
+
+static bool soc21_need_full_reset(struct amdgpu_device *adev)
+{
+ return true;
+}
+
+static bool soc21_need_reset_on_init(struct amdgpu_device *adev)
+{
+ u32 sol_reg;
+
+ if (adev->flags & AMD_IS_APU)
+ return false;
+
+ /* Check sOS sign of life register to confirm sys driver and sOS
+ * are already been loaded.
+ */
+ sol_reg = RREG32_SOC15(MP0, 0, regMP0_SMN_C2PMSG_81);
+ if (sol_reg)
+ return true;
+
+ return false;
+}
+
+static uint64_t soc21_get_pcie_replay_count(struct amdgpu_device *adev)
+{
+
+ /* TODO
+ * dummy implement for pcie_replay_count sysfs interface
+ * */
+
+ return 0;
+}
+
+static void soc21_init_doorbell_index(struct amdgpu_device *adev)
+{
+ adev->doorbell_index.kiq = AMDGPU_NAVI10_DOORBELL_KIQ;
+ adev->doorbell_index.mec_ring0 = AMDGPU_NAVI10_DOORBELL_MEC_RING0;
+ adev->doorbell_index.mec_ring1 = AMDGPU_NAVI10_DOORBELL_MEC_RING1;
+ adev->doorbell_index.mec_ring2 = AMDGPU_NAVI10_DOORBELL_MEC_RING2;
+ adev->doorbell_index.mec_ring3 = AMDGPU_NAVI10_DOORBELL_MEC_RING3;
+ adev->doorbell_index.mec_ring4 = AMDGPU_NAVI10_DOORBELL_MEC_RING4;
+ adev->doorbell_index.mec_ring5 = AMDGPU_NAVI10_DOORBELL_MEC_RING5;
+ adev->doorbell_index.mec_ring6 = AMDGPU_NAVI10_DOORBELL_MEC_RING6;
+ adev->doorbell_index.mec_ring7 = AMDGPU_NAVI10_DOORBELL_MEC_RING7;
+ adev->doorbell_index.userqueue_start = AMDGPU_NAVI10_DOORBELL_USERQUEUE_START;
+ adev->doorbell_index.userqueue_end = AMDGPU_NAVI10_DOORBELL_USERQUEUE_END;
+ adev->doorbell_index.gfx_ring0 = AMDGPU_NAVI10_DOORBELL_GFX_RING0;
+ adev->doorbell_index.gfx_ring1 = AMDGPU_NAVI10_DOORBELL_GFX_RING1;
+ adev->doorbell_index.mes_ring = AMDGPU_NAVI10_DOORBELL_MES_RING;
+ adev->doorbell_index.sdma_engine[0] = AMDGPU_NAVI10_DOORBELL_sDMA_ENGINE0;
+ adev->doorbell_index.sdma_engine[1] = AMDGPU_NAVI10_DOORBELL_sDMA_ENGINE1;
+ adev->doorbell_index.ih = AMDGPU_NAVI10_DOORBELL_IH;
+ adev->doorbell_index.vcn.vcn_ring0_1 = AMDGPU_NAVI10_DOORBELL64_VCN0_1;
+ adev->doorbell_index.vcn.vcn_ring2_3 = AMDGPU_NAVI10_DOORBELL64_VCN2_3;
+ adev->doorbell_index.vcn.vcn_ring4_5 = AMDGPU_NAVI10_DOORBELL64_VCN4_5;
+ adev->doorbell_index.vcn.vcn_ring6_7 = AMDGPU_NAVI10_DOORBELL64_VCN6_7;
+ adev->doorbell_index.first_non_cp = AMDGPU_NAVI10_DOORBELL64_FIRST_NON_CP;
+ adev->doorbell_index.last_non_cp = AMDGPU_NAVI10_DOORBELL64_LAST_NON_CP;
+
+ adev->doorbell_index.max_assignment = AMDGPU_NAVI10_DOORBELL_MAX_ASSIGNMENT << 1;
+ adev->doorbell_index.sdma_doorbell_range = 20;
+}
+
+static void soc21_pre_asic_init(struct amdgpu_device *adev)
+{
+}
+
+static const struct amdgpu_asic_funcs soc21_asic_funcs =
+{
+ .read_disabled_bios = &soc21_read_disabled_bios,
+ .read_bios_from_rom = &amdgpu_soc15_read_bios_from_rom,
+ .read_register = &soc21_read_register,
+ .reset = &soc21_asic_reset,
+ .reset_method = &soc21_asic_reset_method,
+ .set_vga_state = &soc21_vga_set_state,
+ .get_xclk = &soc21_get_xclk,
+ .set_uvd_clocks = &soc21_set_uvd_clocks,
+ .set_vce_clocks = &soc21_set_vce_clocks,
+ .get_config_memsize = &soc21_get_config_memsize,
+ .init_doorbell_index = &soc21_init_doorbell_index,
+ .need_full_reset = &soc21_need_full_reset,
+ .need_reset_on_init = &soc21_need_reset_on_init,
+ .get_pcie_replay_count = &soc21_get_pcie_replay_count,
+ .supports_baco = &amdgpu_dpm_is_baco_supported,
+ .pre_asic_init = &soc21_pre_asic_init,
+};
+
+static int soc21_common_early_init(void *handle)
+{
+#define MMIO_REG_HOLE_OFFSET (0x80000 - PAGE_SIZE)
+ struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+
+ adev->rmmio_remap.reg_offset = MMIO_REG_HOLE_OFFSET;
+ adev->rmmio_remap.bus_addr = adev->rmmio_base + MMIO_REG_HOLE_OFFSET;
+ adev->smc_rreg = NULL;
+ adev->smc_wreg = NULL;
+ adev->pcie_rreg = &soc21_pcie_rreg;
+ adev->pcie_wreg = &soc21_pcie_wreg;
+ adev->pcie_rreg64 = &soc21_pcie_rreg64;
+ adev->pcie_wreg64 = &soc21_pcie_wreg64;
+
+ /* TODO: will add them during VCN v2 implementation */
+ adev->uvd_ctx_rreg = NULL;
+ adev->uvd_ctx_wreg = NULL;
+
+ adev->didt_rreg = &soc21_didt_rreg;
+ adev->didt_wreg = &soc21_didt_wreg;
+
+ adev->asic_funcs = &soc21_asic_funcs;
+
+ adev->rev_id = soc21_get_rev_id(adev);
+ adev->external_rev_id = 0xff;
+ switch (adev->ip_versions[GC_HWIP][0]) {
+ case IP_VERSION(11, 0, 0):
+ adev->cg_flags = 0;
+ adev->pg_flags = 0;
+ adev->external_rev_id = adev->rev_id + 0x1; // TODO: need update
+ break;
+ default:
+ /* FIXME: not supported yet */
+ return -EINVAL;
+ }
+
+ return 0;
+}
+
+static int soc21_common_late_init(void *handle)
+{
+ return 0;
+}
+
+static int soc21_common_sw_init(void *handle)
+{
+ return 0;
+}
+
+static int soc21_common_sw_fini(void *handle)
+{
+ return 0;
+}
+
+static int soc21_common_hw_init(void *handle)
+{
+ struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+
+ /* enable pcie gen2/3 link */
+ soc21_pcie_gen3_enable(adev);
+ /* enable aspm */
+ soc21_program_aspm(adev);
+ /* setup nbio registers */
+ adev->nbio.funcs->init_registers(adev);
+ /* remap HDP registers to a hole in mmio space,
+ * for the purpose of expose those registers
+ * to process space
+ */
+ if (adev->nbio.funcs->remap_hdp_registers)
+ adev->nbio.funcs->remap_hdp_registers(adev);
+ /* enable the doorbell aperture */
+ soc21_enable_doorbell_aperture(adev, true);
+
+ return 0;
+}
+
+static int soc21_common_hw_fini(void *handle)
+{
+ struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+
+ /* disable the doorbell aperture */
+ soc21_enable_doorbell_aperture(adev, false);
+
+ return 0;
+}
+
+static int soc21_common_suspend(void *handle)
+{
+ struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+
+ return soc21_common_hw_fini(adev);
+}
+
+static int soc21_common_resume(void *handle)
+{
+ struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+
+ return soc21_common_hw_init(adev);
+}
+
+static bool soc21_common_is_idle(void *handle)
+{
+ return true;
+}
+
+static int soc21_common_wait_for_idle(void *handle)
+{
+ return 0;
+}
+
+static int soc21_common_soft_reset(void *handle)
+{
+ return 0;
+}
+
+static int soc21_common_set_clockgating_state(void *handle,
+ enum amd_clockgating_state state)
+{
+ struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+
+ switch (adev->ip_versions[NBIO_HWIP][0]) {
+ case IP_VERSION(4, 3, 0):
+ adev->nbio.funcs->update_medium_grain_clock_gating(adev,
+ state == AMD_CG_STATE_GATE);
+ adev->nbio.funcs->update_medium_grain_light_sleep(adev,
+ state == AMD_CG_STATE_GATE);
+ adev->hdp.funcs->update_clock_gating(adev,
+ state == AMD_CG_STATE_GATE);
+ break;
+ default:
+ break;
+ }
+ return 0;
+}
+
+static int soc21_common_set_powergating_state(void *handle,
+ enum amd_powergating_state state)
+{
+ /* TODO */
+ return 0;
+}
+
+static void soc21_common_get_clockgating_state(void *handle, u64 *flags)
+{
+ struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+
+ adev->nbio.funcs->get_clockgating_state(adev, flags);
+
+ adev->hdp.funcs->get_clock_gating_state(adev, flags);
+
+ return;
+}
+
+static const struct amd_ip_funcs soc21_common_ip_funcs = {
+ .name = "soc21_common",
+ .early_init = soc21_common_early_init,
+ .late_init = soc21_common_late_init,
+ .sw_init = soc21_common_sw_init,
+ .sw_fini = soc21_common_sw_fini,
+ .hw_init = soc21_common_hw_init,
+ .hw_fini = soc21_common_hw_fini,
+ .suspend = soc21_common_suspend,
+ .resume = soc21_common_resume,
+ .is_idle = soc21_common_is_idle,
+ .wait_for_idle = soc21_common_wait_for_idle,
+ .soft_reset = soc21_common_soft_reset,
+ .set_clockgating_state = soc21_common_set_clockgating_state,
+ .set_powergating_state = soc21_common_set_powergating_state,
+ .get_clockgating_state = soc21_common_get_clockgating_state,
+};
diff --git a/drivers/gpu/drm/amd/amdgpu/soc21.h b/drivers/gpu/drm/amd/amdgpu/soc21.h
new file mode 100644
index 000000000000..4c8067af1b65
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/soc21.h
@@ -0,0 +1,30 @@
+/*
+ * Copyright 2021 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+#ifndef __SOC21_H__
+#define __SOC21_H__
+
+extern const struct amdgpu_ip_block_version soc21_common_ip_block;
+
+void soc21_grbm_select(struct amdgpu_device *adev,
+ u32 me, u32 pipe, u32 queue, u32 vmid);
+#endif
diff --git a/drivers/gpu/drm/amd/amdgpu/umc_v6_7.c b/drivers/gpu/drm/amd/amdgpu/umc_v6_7.c
index c45d9c14ecbc..606892dbea1c 100644
--- a/drivers/gpu/drm/amd/amdgpu/umc_v6_7.c
+++ b/drivers/gpu/drm/amd/amdgpu/umc_v6_7.c
@@ -64,21 +64,62 @@ static inline uint32_t get_umc_v6_7_channel_index(struct amdgpu_device *adev,
return adev->umc.channel_idx_tbl[umc_inst * adev->umc.channel_inst_num + ch_inst];
}
+static void umc_v6_7_query_error_status_helper(struct amdgpu_device *adev,
+ uint64_t mc_umc_status, uint32_t umc_reg_offset)
+{
+ uint32_t mc_umc_addr;
+ uint64_t reg_value;
+
+ if (REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, Deferred) == 1)
+ dev_info(adev->dev, "Deferred error, no user action is needed.\n");
+
+ if (mc_umc_status)
+ dev_info(adev->dev, "MCA STATUS 0x%llx, umc_reg_offset 0x%x\n", mc_umc_status, umc_reg_offset);
+
+ /* print IPID registers value */
+ mc_umc_addr =
+ SOC15_REG_OFFSET(UMC, 0, regMCA_UMC_UMC0_MCUMC_IPIDT0);
+ reg_value = RREG64_PCIE((mc_umc_addr + umc_reg_offset) * 4);
+ if (reg_value)
+ dev_info(adev->dev, "MCA IPID 0x%llx, umc_reg_offset 0x%x\n", reg_value, umc_reg_offset);
+
+ /* print SYND registers value */
+ mc_umc_addr =
+ SOC15_REG_OFFSET(UMC, 0, regMCA_UMC_UMC0_MCUMC_SYNDT0);
+ reg_value = RREG64_PCIE((mc_umc_addr + umc_reg_offset) * 4);
+ if (reg_value)
+ dev_info(adev->dev, "MCA SYND 0x%llx, umc_reg_offset 0x%x\n", reg_value, umc_reg_offset);
+
+ /* print MISC0 registers value */
+ mc_umc_addr =
+ SOC15_REG_OFFSET(UMC, 0, regMCA_UMC_UMC0_MCUMC_MISC0T0);
+ reg_value = RREG64_PCIE((mc_umc_addr + umc_reg_offset) * 4);
+ if (reg_value)
+ dev_info(adev->dev, "MCA MISC0 0x%llx, umc_reg_offset 0x%x\n", reg_value, umc_reg_offset);
+}
+
static void umc_v6_7_ecc_info_query_correctable_error_count(struct amdgpu_device *adev,
uint32_t umc_inst, uint32_t ch_inst,
unsigned long *error_count)
{
uint64_t mc_umc_status;
uint32_t eccinfo_table_idx;
+ uint32_t umc_reg_offset;
struct amdgpu_ras *ras = amdgpu_ras_get_context(adev);
+ umc_reg_offset = get_umc_v6_7_reg_offset(adev,
+ umc_inst, ch_inst);
+
eccinfo_table_idx = umc_inst * adev->umc.channel_inst_num + ch_inst;
/* check for SRAM correctable error
MCUMC_STATUS is a 64 bit register */
mc_umc_status = ras->umc_ecc.ecc[eccinfo_table_idx].mca_umc_status;
if (REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, Val) == 1 &&
- REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, CECC) == 1)
+ REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, CECC) == 1) {
*error_count += 1;
+
+ umc_v6_7_query_error_status_helper(adev, mc_umc_status, umc_reg_offset);
+ }
}
static void umc_v6_7_ecc_info_querry_uncorrectable_error_count(struct amdgpu_device *adev,
@@ -88,8 +129,6 @@ static void umc_v6_7_ecc_info_querry_uncorrectable_error_count(struct amdgpu_dev
uint64_t mc_umc_status;
uint32_t eccinfo_table_idx;
uint32_t umc_reg_offset;
- uint32_t mc_umc_addr;
- uint64_t reg_value;
struct amdgpu_ras *ras = amdgpu_ras_get_context(adev);
umc_reg_offset = get_umc_v6_7_reg_offset(adev,
@@ -106,32 +145,7 @@ static void umc_v6_7_ecc_info_querry_uncorrectable_error_count(struct amdgpu_dev
REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, TCC) == 1)) {
*error_count += 1;
- if (REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, Deferred) == 1)
- dev_info(adev->dev, "Deferred error, no user action is needed.\n");
-
- if (mc_umc_status)
- dev_info(adev->dev, "MCA STATUS 0x%llx, umc_reg_offset 0x%x\n", mc_umc_status, umc_reg_offset);
-
- /* print IPID registers value */
- mc_umc_addr =
- SOC15_REG_OFFSET(UMC, 0, regMCA_UMC_UMC0_MCUMC_IPIDT0);
- reg_value = RREG64_PCIE((mc_umc_addr + umc_reg_offset) * 4);
- if (reg_value)
- dev_info(adev->dev, "MCA IPID 0x%llx, umc_reg_offset 0x%x\n", reg_value, umc_reg_offset);
-
- /* print SYND registers value */
- mc_umc_addr =
- SOC15_REG_OFFSET(UMC, 0, regMCA_UMC_UMC0_MCUMC_SYNDT0);
- reg_value = RREG64_PCIE((mc_umc_addr + umc_reg_offset) * 4);
- if (reg_value)
- dev_info(adev->dev, "MCA SYND 0x%llx, umc_reg_offset 0x%x\n", reg_value, umc_reg_offset);
-
- /* print MISC0 registers value */
- mc_umc_addr =
- SOC15_REG_OFFSET(UMC, 0, regMCA_UMC_UMC0_MCUMC_MISC0T0);
- reg_value = RREG64_PCIE((mc_umc_addr + umc_reg_offset) * 4);
- if (reg_value)
- dev_info(adev->dev, "MCA MISC0 0x%llx, umc_reg_offset 0x%x\n", reg_value, umc_reg_offset);
+ umc_v6_7_query_error_status_helper(adev, mc_umc_status, umc_reg_offset);
}
}
@@ -277,8 +291,11 @@ static void umc_v6_7_query_correctable_error_count(struct amdgpu_device *adev,
MCUMC_STATUS is a 64 bit register */
mc_umc_status = RREG64_PCIE((mc_umc_status_addr + umc_reg_offset) * 4);
if (REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, Val) == 1 &&
- REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, CECC) == 1)
+ REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, CECC) == 1) {
*error_count += 1;
+
+ umc_v6_7_query_error_status_helper(adev, mc_umc_status, umc_reg_offset);
+ }
}
static void umc_v6_7_querry_uncorrectable_error_count(struct amdgpu_device *adev,
@@ -287,8 +304,6 @@ static void umc_v6_7_querry_uncorrectable_error_count(struct amdgpu_device *adev
{
uint64_t mc_umc_status;
uint32_t mc_umc_status_addr;
- uint32_t mc_umc_addr;
- uint64_t reg_value;
mc_umc_status_addr =
SOC15_REG_OFFSET(UMC, 0, regMCA_UMC_UMC0_MCUMC_STATUST0);
@@ -303,32 +318,7 @@ static void umc_v6_7_querry_uncorrectable_error_count(struct amdgpu_device *adev
REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, TCC) == 1)) {
*error_count += 1;
- if (REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, Deferred) == 1)
- dev_info(adev->dev, "Deferred error, no user action is needed.\n");
-
- if (mc_umc_status)
- dev_info(adev->dev, "MCA STATUS 0x%llx, umc_reg_offset 0x%x\n", mc_umc_status, umc_reg_offset);
-
- /* print IPID registers value */
- mc_umc_addr =
- SOC15_REG_OFFSET(UMC, 0, regMCA_UMC_UMC0_MCUMC_IPIDT0);
- reg_value = RREG64_PCIE((mc_umc_addr + umc_reg_offset) * 4);
- if (reg_value)
- dev_info(adev->dev, "MCA IPID 0x%llx, umc_reg_offset 0x%x\n", reg_value, umc_reg_offset);
-
- /* print SYND registers value */
- mc_umc_addr =
- SOC15_REG_OFFSET(UMC, 0, regMCA_UMC_UMC0_MCUMC_SYNDT0);
- reg_value = RREG64_PCIE((mc_umc_addr + umc_reg_offset) * 4);
- if (reg_value)
- dev_info(adev->dev, "MCA SYND 0x%llx, umc_reg_offset 0x%x\n", reg_value, umc_reg_offset);
-
- /* print MISC0 registers value */
- mc_umc_addr =
- SOC15_REG_OFFSET(UMC, 0, regMCA_UMC_UMC0_MCUMC_MISC0T0);
- reg_value = RREG64_PCIE((mc_umc_addr + umc_reg_offset) * 4);
- if (reg_value)
- dev_info(adev->dev, "MCA MISC0 0x%llx, umc_reg_offset 0x%x\n", reg_value, umc_reg_offset);
+ umc_v6_7_query_error_status_helper(adev, mc_umc_status, umc_reg_offset);
}
}
diff --git a/drivers/gpu/drm/amd/amdgpu/uvd_v5_0.c b/drivers/gpu/drm/amd/amdgpu/uvd_v5_0.c
index 563493d1f830..d7e31e48a2b8 100644
--- a/drivers/gpu/drm/amd/amdgpu/uvd_v5_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/uvd_v5_0.c
@@ -833,7 +833,7 @@ out:
return ret;
}
-static void uvd_v5_0_get_clockgating_state(void *handle, u32 *flags)
+static void uvd_v5_0_get_clockgating_state(void *handle, u64 *flags)
{
struct amdgpu_device *adev = (struct amdgpu_device *)handle;
int data;
diff --git a/drivers/gpu/drm/amd/amdgpu/uvd_v6_0.c b/drivers/gpu/drm/amd/amdgpu/uvd_v6_0.c
index 2d558c2f417d..375c440957dc 100644
--- a/drivers/gpu/drm/amd/amdgpu/uvd_v6_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/uvd_v6_0.c
@@ -1494,7 +1494,7 @@ out:
return ret;
}
-static void uvd_v6_0_get_clockgating_state(void *handle, u32 *flags)
+static void uvd_v6_0_get_clockgating_state(void *handle, u64 *flags)
{
struct amdgpu_device *adev = (struct amdgpu_device *)handle;
int data;
diff --git a/drivers/gpu/drm/amd/amdgpu/vce_v3_0.c b/drivers/gpu/drm/amd/amdgpu/vce_v3_0.c
index 142e291983b4..8def62c83ffd 100644
--- a/drivers/gpu/drm/amd/amdgpu/vce_v3_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/vce_v3_0.c
@@ -831,7 +831,7 @@ out:
return ret;
}
-static void vce_v3_0_get_clockgating_state(void *handle, u32 *flags)
+static void vce_v3_0_get_clockgating_state(void *handle, u64 *flags)
{
struct amdgpu_device *adev = (struct amdgpu_device *)handle;
int data;
diff --git a/drivers/gpu/drm/amd/amdgpu/vcn_v2_5.c b/drivers/gpu/drm/amd/amdgpu/vcn_v2_5.c
index 1bf672966a62..17d44be58877 100644
--- a/drivers/gpu/drm/amd/amdgpu/vcn_v2_5.c
+++ b/drivers/gpu/drm/amd/amdgpu/vcn_v2_5.c
@@ -31,6 +31,7 @@
#include "soc15d.h"
#include "vcn_v2_0.h"
#include "mmsch_v1_0.h"
+#include "vcn_v2_5.h"
#include "vcn/vcn_2_5_offset.h"
#include "vcn/vcn_2_5_sh_mask.h"
@@ -59,6 +60,7 @@ static int vcn_v2_5_set_powergating_state(void *handle,
static int vcn_v2_5_pause_dpg_mode(struct amdgpu_device *adev,
int inst_idx, struct dpg_pause_state *new_state);
static int vcn_v2_5_sriov_start(struct amdgpu_device *adev);
+static void vcn_v2_5_set_ras_funcs(struct amdgpu_device *adev);
static int amdgpu_ih_clientid_vcns[] = {
SOC15_IH_CLIENTID_VCN,
@@ -100,6 +102,7 @@ static int vcn_v2_5_early_init(void *handle)
vcn_v2_5_set_dec_ring_funcs(adev);
vcn_v2_5_set_enc_ring_funcs(adev);
vcn_v2_5_set_irq_funcs(adev);
+ vcn_v2_5_set_ras_funcs(adev);
return 0;
}
@@ -1932,3 +1935,71 @@ const struct amdgpu_ip_block_version vcn_v2_6_ip_block =
.rev = 0,
.funcs = &vcn_v2_6_ip_funcs,
};
+
+static uint32_t vcn_v2_6_query_poison_by_instance(struct amdgpu_device *adev,
+ uint32_t instance, uint32_t sub_block)
+{
+ uint32_t poison_stat = 0, reg_value = 0;
+
+ switch (sub_block) {
+ case AMDGPU_VCN_V2_6_VCPU_VCODEC:
+ reg_value = RREG32_SOC15(VCN, instance, mmUVD_RAS_VCPU_VCODEC_STATUS);
+ poison_stat = REG_GET_FIELD(reg_value, UVD_RAS_VCPU_VCODEC_STATUS, POISONED_PF);
+ break;
+ default:
+ break;
+ }
+
+ if (poison_stat)
+ dev_info(adev->dev, "Poison detected in VCN%d, sub_block%d\n",
+ instance, sub_block);
+
+ return poison_stat;
+}
+
+static bool vcn_v2_6_query_poison_status(struct amdgpu_device *adev)
+{
+ uint32_t inst, sub;
+ uint32_t poison_stat = 0;
+
+ for (inst = 0; inst < adev->vcn.num_vcn_inst; inst++)
+ for (sub = 0; sub < AMDGPU_VCN_V2_6_MAX_SUB_BLOCK; sub++)
+ poison_stat +=
+ vcn_v2_6_query_poison_by_instance(adev, inst, sub);
+
+ return !!poison_stat;
+}
+
+const struct amdgpu_ras_block_hw_ops vcn_v2_6_ras_hw_ops = {
+ .query_poison_status = vcn_v2_6_query_poison_status,
+};
+
+static struct amdgpu_vcn_ras vcn_v2_6_ras = {
+ .ras_block = {
+ .hw_ops = &vcn_v2_6_ras_hw_ops,
+ },
+};
+
+static void vcn_v2_5_set_ras_funcs(struct amdgpu_device *adev)
+{
+ switch (adev->ip_versions[VCN_HWIP][0]) {
+ case IP_VERSION(2, 6, 0):
+ adev->vcn.ras = &vcn_v2_6_ras;
+ break;
+ default:
+ break;
+ }
+
+ if (adev->vcn.ras) {
+ amdgpu_ras_register_ras_block(adev, &adev->vcn.ras->ras_block);
+
+ strcpy(adev->vcn.ras->ras_block.ras_comm.name, "vcn");
+ adev->vcn.ras->ras_block.ras_comm.block = AMDGPU_RAS_BLOCK__VCN;
+ adev->vcn.ras->ras_block.ras_comm.type = AMDGPU_RAS_ERROR__POISON;
+ adev->vcn.ras_if = &adev->vcn.ras->ras_block.ras_comm;
+
+ /* If don't define special ras_late_init function, use default ras_late_init */
+ if (!adev->vcn.ras->ras_block.ras_late_init)
+ adev->vcn.ras->ras_block.ras_late_init = amdgpu_ras_block_late_init;
+ }
+}
diff --git a/drivers/gpu/drm/amd/amdgpu/vcn_v2_5.h b/drivers/gpu/drm/amd/amdgpu/vcn_v2_5.h
index e72f799ed0fd..1c19af74e4fd 100644
--- a/drivers/gpu/drm/amd/amdgpu/vcn_v2_5.h
+++ b/drivers/gpu/drm/amd/amdgpu/vcn_v2_5.h
@@ -24,6 +24,12 @@
#ifndef __VCN_V2_5_H__
#define __VCN_V2_5_H__
+enum amdgpu_vcn_v2_6_sub_block {
+ AMDGPU_VCN_V2_6_VCPU_VCODEC = 0,
+
+ AMDGPU_VCN_V2_6_MAX_SUB_BLOCK,
+};
+
extern const struct amdgpu_ip_block_version vcn_v2_5_ip_block;
extern const struct amdgpu_ip_block_version vcn_v2_6_ip_block;
diff --git a/drivers/gpu/drm/amd/amdgpu/vi.c b/drivers/gpu/drm/amd/amdgpu/vi.c
index 039b90cdc3bc..c5b88d15a6df 100644
--- a/drivers/gpu/drm/amd/amdgpu/vi.c
+++ b/drivers/gpu/drm/amd/amdgpu/vi.c
@@ -2033,7 +2033,7 @@ static int vi_common_set_powergating_state(void *handle,
return 0;
}
-static void vi_common_get_clockgating_state(void *handle, u32 *flags)
+static void vi_common_get_clockgating_state(void *handle, u64 *flags)
{
struct amdgpu_device *adev = (struct amdgpu_device *)handle;
int data;