From 277bd3371f11400d5b02df54f057569be4b10cea Mon Sep 17 00:00:00 2001 From: Le Ma Date: Tue, 24 May 2022 10:51:43 +0800 Subject: drm/amdgpu: convert gfx.kiq to array type (v3) v1: more kiq instances are a available in SOC (Le) v2: squash commits to avoid breaking the build (Le) v3: make the conversion for gfx/mec v11_0 (Hawking) Signed-off-by: Le Ma Reviewed-by: Hawking Zhang Signed-off-by: Hawking Zhang Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) (limited to 'drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c') diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c index 64ab1a306dfe..290804a06e05 100644 --- a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c @@ -824,7 +824,7 @@ static void gmc_v9_0_flush_gpu_tlb(struct amdgpu_device *adev, uint32_t vmid, /* This is necessary for a HW workaround under SRIOV as well * as GFXOFF under bare metal */ - if (adev->gfx.kiq.ring.sched.ready && + if (adev->gfx.kiq[0].ring.sched.ready && (amdgpu_sriov_runtime(adev) || !amdgpu_sriov_vf(adev)) && down_read_trylock(&adev->reset_domain->sem)) { uint32_t req = hub->vm_inv_eng0_req + hub->eng_distance * eng; @@ -934,8 +934,8 @@ static int gmc_v9_0_flush_gpu_tlb_pasid(struct amdgpu_device *adev, uint16_t queried_pasid; bool ret; u32 usec_timeout = amdgpu_sriov_vf(adev) ? SRIOV_USEC_TIMEOUT : adev->usec_timeout; - struct amdgpu_ring *ring = &adev->gfx.kiq.ring; - struct amdgpu_kiq *kiq = &adev->gfx.kiq; + struct amdgpu_ring *ring = &adev->gfx.kiq[0].ring; + struct amdgpu_kiq *kiq = &adev->gfx.kiq[0]; if (amdgpu_in_reset(adev)) return -EIO; @@ -955,7 +955,7 @@ static int gmc_v9_0_flush_gpu_tlb_pasid(struct amdgpu_device *adev, if (vega20_xgmi_wa) ndw += kiq->pmf->invalidate_tlbs_size; - spin_lock(&adev->gfx.kiq.ring_lock); + spin_lock(&adev->gfx.kiq[0].ring_lock); /* 2 dwords flush + 8 dwords fence */ amdgpu_ring_alloc(ring, ndw); if (vega20_xgmi_wa) @@ -966,13 +966,13 @@ static int gmc_v9_0_flush_gpu_tlb_pasid(struct amdgpu_device *adev, r = amdgpu_fence_emit_polling(ring, &seq, MAX_KIQ_REG_WAIT); if (r) { amdgpu_ring_undo(ring); - spin_unlock(&adev->gfx.kiq.ring_lock); + spin_unlock(&adev->gfx.kiq[0].ring_lock); up_read(&adev->reset_domain->sem); return -ETIME; } amdgpu_ring_commit(ring); - spin_unlock(&adev->gfx.kiq.ring_lock); + spin_unlock(&adev->gfx.kiq[0].ring_lock); r = amdgpu_fence_wait_polling(ring, seq, usec_timeout); if (r < 1) { dev_err(adev->dev, "wait for kiq fence error: %ld.\n", r); -- cgit From c0c27428903700d86920394aa2302506b5d95b17 Mon Sep 17 00:00:00 2001 From: Hamza Mahfooz Date: Tue, 2 May 2023 11:59:08 -0400 Subject: drm/amdgpu: fix an amdgpu_irq_put() issue in gmc_v9_0_hw_fini() As made mention of in commit c56edea58c31 ("drm/amdgpu: fix amdgpu_irq_put call trace in gmc_v10_0_hw_fini") and commit aa6ac247ed7d ("drm/amdgpu: fix amdgpu_irq_put call trace in gmc_v11_0_hw_fini"). It is meaningless to call amdgpu_irq_put() for gmc.ecc_irq. So, remove it from gmc_v9_0_hw_fini(). Link: https://gitlab.freedesktop.org/drm/amd/-/issues/2522 Fixes: c8b5a95b5709 ("drm/amdgpu: Fix desktop freezed after gpu-reset") Reviewed-by: Mario Limonciello Signed-off-by: Hamza Mahfooz Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c | 1 - 1 file changed, 1 deletion(-) (limited to 'drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c') diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c index 290804a06e05..6ae5cee9b64b 100644 --- a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c @@ -1999,7 +1999,6 @@ static int gmc_v9_0_hw_fini(void *handle) if (adev->mmhub.funcs->update_power_gating) adev->mmhub.funcs->update_power_gating(adev, false); - amdgpu_irq_put(adev, &adev->gmc.ecc_irq, 0); amdgpu_irq_put(adev, &adev->gmc.vm_fault, 0); return 0; -- cgit From f4caf5842652f08e024741ef6d423cb0c101d863 Mon Sep 17 00:00:00 2001 From: Hawking Zhang Date: Wed, 14 Sep 2022 16:35:50 +0800 Subject: drm/amdgpu: introduce vmhub definition for multi-partition cases (v3) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit v1: Each partition has its own gfxhub or mmhub. adjust the num of MAX_VMHUBS and the GFXHUB/MMHUB layout (Le) v2: re-design the AMDGPU_GFXHUB/AMDGPU_MMHUB layout (Le) v3: apply the gfxhub/mmhub layout to new IPs (Hawking) v4: fix up gmc11 (Alex) v5: rebase (Alex) Signed-off-by: Le Ma Acked-by: Christian König Reviewed-by: Hawking Zhang Signed-off-by: Hawking Zhang Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c | 2 +- drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c | 2 +- drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c | 4 +- drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c | 4 +- drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h | 13 +++-- drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c | 6 +-- drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c | 8 +-- drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c | 8 +-- drivers/gpu/drm/amd/amdgpu/gfx_v9_4_2.c | 2 +- drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c | 4 +- drivers/gpu/drm/amd/amdgpu/gfxhub_v1_0.c | 10 ++-- drivers/gpu/drm/amd/amdgpu/gfxhub_v1_2.c | 10 ++-- drivers/gpu/drm/amd/amdgpu/gfxhub_v2_0.c | 10 ++-- drivers/gpu/drm/amd/amdgpu/gfxhub_v2_1.c | 12 ++--- drivers/gpu/drm/amd/amdgpu/gfxhub_v3_0.c | 10 ++-- drivers/gpu/drm/amd/amdgpu/gfxhub_v3_0_3.c | 10 ++-- drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c | 33 ++++++------- drivers/gpu/drm/amd/amdgpu/gmc_v11_0.c | 26 +++++----- drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c | 78 +++++++++++++++--------------- drivers/gpu/drm/amd/amdgpu/jpeg_v1_0.c | 2 +- drivers/gpu/drm/amd/amdgpu/jpeg_v2_0.c | 2 +- drivers/gpu/drm/amd/amdgpu/jpeg_v2_5.c | 4 +- drivers/gpu/drm/amd/amdgpu/jpeg_v3_0.c | 2 +- drivers/gpu/drm/amd/amdgpu/jpeg_v4_0.c | 2 +- drivers/gpu/drm/amd/amdgpu/mes_v10_1.c | 2 +- drivers/gpu/drm/amd/amdgpu/mes_v11_0.c | 2 +- drivers/gpu/drm/amd/amdgpu/mmhub_v1_0.c | 10 ++-- drivers/gpu/drm/amd/amdgpu/mmhub_v1_7.c | 10 ++-- drivers/gpu/drm/amd/amdgpu/mmhub_v1_8.c | 10 ++-- drivers/gpu/drm/amd/amdgpu/mmhub_v2_0.c | 10 ++-- drivers/gpu/drm/amd/amdgpu/mmhub_v2_3.c | 10 ++-- drivers/gpu/drm/amd/amdgpu/mmhub_v3_0.c | 10 ++-- drivers/gpu/drm/amd/amdgpu/mmhub_v3_0_1.c | 10 ++-- drivers/gpu/drm/amd/amdgpu/mmhub_v3_0_2.c | 10 ++-- drivers/gpu/drm/amd/amdgpu/mmhub_v9_4.c | 12 ++--- drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c | 10 ++-- drivers/gpu/drm/amd/amdgpu/sdma_v4_4_2.c | 4 +- drivers/gpu/drm/amd/amdgpu/sdma_v5_0.c | 2 +- drivers/gpu/drm/amd/amdgpu/sdma_v5_2.c | 2 +- drivers/gpu/drm/amd/amdgpu/sdma_v6_0.c | 2 +- drivers/gpu/drm/amd/amdgpu/uvd_v7_0.c | 4 +- drivers/gpu/drm/amd/amdgpu/vce_v4_0.c | 2 +- drivers/gpu/drm/amd/amdgpu/vcn_v1_0.c | 4 +- drivers/gpu/drm/amd/amdgpu/vcn_v2_0.c | 4 +- drivers/gpu/drm/amd/amdgpu/vcn_v2_5.c | 8 +-- drivers/gpu/drm/amd/amdgpu/vcn_v3_0.c | 4 +- drivers/gpu/drm/amd/amdgpu/vcn_v4_0.c | 2 +- 47 files changed, 204 insertions(+), 204 deletions(-) (limited to 'drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c') diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c index a46285841d17..f0a136d35279 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c @@ -736,7 +736,7 @@ int amdgpu_amdkfd_flush_gpu_tlb_vmid(struct amdgpu_device *adev, for (i = 0; i < adev->num_vmhubs; i++) amdgpu_gmc_flush_gpu_tlb(adev, vmid, i, 0); } else { - amdgpu_gmc_flush_gpu_tlb(adev, vmid, AMDGPU_GFXHUB_0, 0); + amdgpu_gmc_flush_gpu_tlb(adev, vmid, AMDGPU_GFXHUB(0), 0); } return 0; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c index 87e1a1a9f298..488b3bb6dcb1 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c @@ -315,7 +315,7 @@ int amdgpu_gfx_kiq_init_ring(struct amdgpu_device *adev, ring->use_doorbell = true; ring->doorbell_index = adev->doorbell_index.kiq; ring->xcc_id = xcc_id; - ring->vm_hub = AMDGPU_GFXHUB_0; + ring->vm_hub = AMDGPU_GFXHUB(0); if (xcc_id >= 1) ring->doorbell_index = adev->doorbell_index.xcc1_kiq_start + xcc_id - 1; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c index 4e2531758866..0a4e5fcfec6b 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c @@ -670,7 +670,7 @@ void amdgpu_gmc_set_vm_fault_masks(struct amdgpu_device *adev, int hub_type, for (i = 0; i < 16; i++) { reg = hub->vm_context0_cntl + hub->ctx_distance * i; - tmp = (hub_type == AMDGPU_GFXHUB_0) ? + tmp = (hub_type == AMDGPU_GFXHUB(0)) ? RREG32_SOC15_IP(GC, reg) : RREG32_SOC15_IP(MMHUB, reg); @@ -679,7 +679,7 @@ void amdgpu_gmc_set_vm_fault_masks(struct amdgpu_device *adev, int hub_type, else tmp &= ~hub->vm_cntx_cntl_vm_fault; - (hub_type == AMDGPU_GFXHUB_0) ? + (hub_type == AMDGPU_GFXHUB(0)) ? WREG32_SOC15_IP(GC, reg, tmp) : WREG32_SOC15_IP(MMHUB, reg, tmp); } diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c index b6bd667df676..c3964c14f215 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c @@ -2374,12 +2374,12 @@ int amdgpu_vm_ioctl(struct drm_device *dev, void *data, struct drm_file *filp) case AMDGPU_VM_OP_RESERVE_VMID: /* We only have requirement to reserve vmid from gfxhub */ r = amdgpu_vmid_alloc_reserved(adev, &fpriv->vm, - AMDGPU_GFXHUB_0); + AMDGPU_GFXHUB(0)); if (r) return r; break; case AMDGPU_VM_OP_UNRESERVE_VMID: - amdgpu_vmid_free_reserved(adev, &fpriv->vm, AMDGPU_GFXHUB_0); + amdgpu_vmid_free_reserved(adev, &fpriv->vm, AMDGPU_GFXHUB(0)); break; default: return -EINVAL; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h index 6f085f0b4ef3..9f5d32b0fda1 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h @@ -111,11 +111,14 @@ struct amdgpu_mem_stats; /* Reserve 4MB VRAM for page tables */ #define AMDGPU_VM_RESERVED_VRAM (8ULL << 20) -/* max number of VMHUB */ -#define AMDGPU_MAX_VMHUBS 3 -#define AMDGPU_GFXHUB_0 0 -#define AMDGPU_MMHUB_0 1 -#define AMDGPU_MMHUB_1 2 +/* + * max number of VMHUB + * layout: max 8 GFXHUB + 4 MMHUB0 + 1 MMHUB1 + */ +#define AMDGPU_MAX_VMHUBS 13 +#define AMDGPU_GFXHUB(x) (x) +#define AMDGPU_MMHUB0(x) (8 + x) +#define AMDGPU_MMHUB1(x) (8 + 4 + x) /* Reserve 2MB at top/bottom of address space for kernel use */ #define AMDGPU_VA_RESERVED_SIZE (2ULL << 20) diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c index 8e86b2c23c0a..7b585141e10e 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c @@ -4461,7 +4461,7 @@ static int gfx_v10_0_gfx_ring_init(struct amdgpu_device *adev, int ring_id, ring->doorbell_index = adev->doorbell_index.gfx_ring0 << 1; else ring->doorbell_index = adev->doorbell_index.gfx_ring1 << 1; - ring->vm_hub = AMDGPU_GFXHUB_0; + ring->vm_hub = AMDGPU_GFXHUB(0); sprintf(ring->name, "gfx_%d.%d.%d", ring->me, ring->pipe, ring->queue); irq_type = AMDGPU_CP_IRQ_GFX_ME0_PIPE0_EOP + ring->pipe; @@ -4490,7 +4490,7 @@ static int gfx_v10_0_compute_ring_init(struct amdgpu_device *adev, int ring_id, ring->doorbell_index = (adev->doorbell_index.mec_ring0 + ring_id) << 1; ring->eop_gpu_addr = adev->gfx.mec.hpd_eop_gpu_addr + (ring_id * GFX10_MEC_HPD_SIZE); - ring->vm_hub = AMDGPU_GFXHUB_0; + ring->vm_hub = AMDGPU_GFXHUB(0); sprintf(ring->name, "comp_%d.%d.%d", ring->me, ring->pipe, ring->queue); irq_type = AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE0_EOP @@ -4978,7 +4978,7 @@ static void gfx_v10_0_constants_init(struct amdgpu_device *adev) /* XXX SH_MEM regs */ /* where to put LDS, scratch, GPUVM in FSA64 space */ mutex_lock(&adev->srbm_mutex); - for (i = 0; i < adev->vm_manager.id_mgr[AMDGPU_GFXHUB_0].num_ids; i++) { + for (i = 0; i < adev->vm_manager.id_mgr[AMDGPU_GFXHUB(0)].num_ids; i++) { nv_grbm_select(adev, 0, 0, 0, i); /* CP and shaders */ WREG32_SOC15(GC, 0, mmSH_MEM_CONFIG, DEFAULT_SH_MEM_CONFIG); diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c index f77779c31043..790df2cc3480 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c @@ -906,7 +906,7 @@ static int gfx_v11_0_gfx_ring_init(struct amdgpu_device *adev, int ring_id, ring->doorbell_index = adev->doorbell_index.gfx_ring0 << 1; else ring->doorbell_index = adev->doorbell_index.gfx_ring1 << 1; - ring->vm_hub = AMDGPU_GFXHUB_0; + ring->vm_hub = AMDGPU_GFXHUB(0); sprintf(ring->name, "gfx_%d.%d.%d", ring->me, ring->pipe, ring->queue); irq_type = AMDGPU_CP_IRQ_GFX_ME0_PIPE0_EOP + ring->pipe; @@ -937,7 +937,7 @@ static int gfx_v11_0_compute_ring_init(struct amdgpu_device *adev, int ring_id, ring->doorbell_index = (adev->doorbell_index.mec_ring0 + ring_id) << 1; ring->eop_gpu_addr = adev->gfx.mec.hpd_eop_gpu_addr + (ring_id * GFX11_MEC_HPD_SIZE); - ring->vm_hub = AMDGPU_GFXHUB_0; + ring->vm_hub = AMDGPU_GFXHUB(0); sprintf(ring->name, "comp_%d.%d.%d", ring->me, ring->pipe, ring->queue); irq_type = AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE0_EOP @@ -1707,7 +1707,7 @@ static void gfx_v11_0_constants_init(struct amdgpu_device *adev) /* XXX SH_MEM regs */ /* where to put LDS, scratch, GPUVM in FSA64 space */ mutex_lock(&adev->srbm_mutex); - for (i = 0; i < adev->vm_manager.id_mgr[AMDGPU_GFXHUB_0].num_ids; i++) { + for (i = 0; i < adev->vm_manager.id_mgr[AMDGPU_GFXHUB(0)].num_ids; i++) { soc21_grbm_select(adev, 0, 0, 0, i); /* CP and shaders */ WREG32_SOC15(GC, 0, regSH_MEM_CONFIG, DEFAULT_SH_MEM_CONFIG); @@ -4190,7 +4190,7 @@ static int gfx_v11_0_gfxhub_enable(struct amdgpu_device *adev) false : true; adev->gfxhub.funcs->set_fault_enable_default(adev, value); - amdgpu_gmc_flush_gpu_tlb(adev, 0, AMDGPU_GFXHUB_0, 0); + amdgpu_gmc_flush_gpu_tlb(adev, 0, AMDGPU_GFXHUB(0), 0); return 0; } diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c index 46577b59cb04..91814dc083c9 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c @@ -2005,7 +2005,7 @@ static int gfx_v9_0_compute_ring_init(struct amdgpu_device *adev, int ring_id, ring->doorbell_index = (adev->doorbell_index.mec_ring0 + ring_id) << 1; ring->eop_gpu_addr = adev->gfx.mec.hpd_eop_gpu_addr + (ring_id * GFX9_MEC_HPD_SIZE); - ring->vm_hub = AMDGPU_GFXHUB_0; + ring->vm_hub = AMDGPU_GFXHUB(0); sprintf(ring->name, "comp_%d.%d.%d", ring->me, ring->pipe, ring->queue); irq_type = AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE0_EOP @@ -2105,7 +2105,7 @@ static int gfx_v9_0_sw_init(void *handle) /* disable scheduler on the real ring */ ring->no_scheduler = true; - ring->vm_hub = AMDGPU_GFXHUB_0; + ring->vm_hub = AMDGPU_GFXHUB(0); r = amdgpu_ring_init(adev, ring, 1024, &adev->gfx.eop_irq, AMDGPU_CP_IRQ_GFX_ME0_PIPE0_EOP, AMDGPU_RING_PRIO_DEFAULT, NULL); @@ -2123,7 +2123,7 @@ static int gfx_v9_0_sw_init(void *handle) ring->doorbell_index = adev->doorbell_index.gfx_ring0 << 1; ring->is_sw_ring = true; hw_prio = amdgpu_sw_ring_priority(i); - ring->vm_hub = AMDGPU_GFXHUB_0; + ring->vm_hub = AMDGPU_GFXHUB(0); r = amdgpu_ring_init(adev, ring, 1024, &adev->gfx.eop_irq, AMDGPU_CP_IRQ_GFX_ME0_PIPE0_EOP, hw_prio, NULL); @@ -2393,7 +2393,7 @@ static void gfx_v9_0_constants_init(struct amdgpu_device *adev) /* XXX SH_MEM regs */ /* where to put LDS, scratch, GPUVM in FSA64 space */ mutex_lock(&adev->srbm_mutex); - for (i = 0; i < adev->vm_manager.id_mgr[AMDGPU_GFXHUB_0].num_ids; i++) { + for (i = 0; i < adev->vm_manager.id_mgr[AMDGPU_GFXHUB(0)].num_ids; i++) { soc15_grbm_select(adev, 0, 0, 0, i, 0); /* CP and shaders */ if (i == 0) { diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_2.c b/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_2.c index d648a29c33e0..ec7c049c5952 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_2.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_2.c @@ -1935,7 +1935,7 @@ static bool gfx_v9_4_2_query_uctl2_poison_status(struct amdgpu_device *adev) u32 status = 0; struct amdgpu_vmhub *hub; - hub = &adev->vmhub[AMDGPU_GFXHUB_0]; + hub = &adev->vmhub[AMDGPU_GFXHUB(0)]; status = RREG32(hub->vm_l2_pro_fault_status); /* reset page fault status */ WREG32_P(hub->vm_l2_pro_fault_cntl, 1, ~1); diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c b/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c index 9d17dcfae130..f5104b982633 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c @@ -757,7 +757,7 @@ static int gfx_v9_4_3_compute_ring_init(struct amdgpu_device *adev, int ring_id, (adev->doorbell_index.mec_ring0 + ring_id) << 1; ring->eop_gpu_addr = adev->gfx.mec.hpd_eop_gpu_addr + (ring_id * GFX9_MEC_HPD_SIZE); - ring->vm_hub = AMDGPU_GFXHUB_0; + ring->vm_hub = AMDGPU_GFXHUB(0); sprintf(ring->name, "comp_%d.%d.%d.%d", ring->xcc_id, ring->me, ring->pipe, ring->queue); @@ -996,7 +996,7 @@ static void gfx_v9_4_3_constants_init(struct amdgpu_device *adev) /* XXX SH_MEM regs */ /* where to put LDS, scratch, GPUVM in FSA64 space */ mutex_lock(&adev->srbm_mutex); - for (i = 0; i < adev->vm_manager.id_mgr[AMDGPU_GFXHUB_0].num_ids; i++) { + for (i = 0; i < adev->vm_manager.id_mgr[AMDGPU_GFXHUB(0)].num_ids; i++) { for (j = 0; j < adev->gfx.num_xcd; j++) { soc15_grbm_select(adev, 0, 0, 0, i, j); /* CP and shaders */ diff --git a/drivers/gpu/drm/amd/amdgpu/gfxhub_v1_0.c b/drivers/gpu/drm/amd/amdgpu/gfxhub_v1_0.c index ab2325f6c7ac..d94cc1ec7242 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfxhub_v1_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfxhub_v1_0.c @@ -40,7 +40,7 @@ static void gfxhub_v1_0_setup_vm_pt_regs(struct amdgpu_device *adev, uint32_t vmid, uint64_t page_table_base) { - struct amdgpu_vmhub *hub = &adev->vmhub[AMDGPU_GFXHUB_0]; + struct amdgpu_vmhub *hub = &adev->vmhub[AMDGPU_GFXHUB(0)]; WREG32_SOC15_OFFSET(GC, 0, mmVM_CONTEXT0_PAGE_TABLE_BASE_ADDR_LO32, hub->ctx_addr_distance * vmid, @@ -247,7 +247,7 @@ static void gfxhub_v1_0_disable_identity_aperture(struct amdgpu_device *adev) static void gfxhub_v1_0_setup_vmid_config(struct amdgpu_device *adev) { - struct amdgpu_vmhub *hub = &adev->vmhub[AMDGPU_GFXHUB_0]; + struct amdgpu_vmhub *hub = &adev->vmhub[AMDGPU_GFXHUB(0)]; unsigned num_level, block_size; uint32_t tmp; int i; @@ -307,7 +307,7 @@ static void gfxhub_v1_0_setup_vmid_config(struct amdgpu_device *adev) static void gfxhub_v1_0_program_invalidation(struct amdgpu_device *adev) { - struct amdgpu_vmhub *hub = &adev->vmhub[AMDGPU_GFXHUB_0]; + struct amdgpu_vmhub *hub = &adev->vmhub[AMDGPU_GFXHUB(0)]; unsigned i; for (i = 0 ; i < 18; ++i) { @@ -338,7 +338,7 @@ static int gfxhub_v1_0_gart_enable(struct amdgpu_device *adev) static void gfxhub_v1_0_gart_disable(struct amdgpu_device *adev) { - struct amdgpu_vmhub *hub = &adev->vmhub[AMDGPU_GFXHUB_0]; + struct amdgpu_vmhub *hub = &adev->vmhub[AMDGPU_GFXHUB(0)]; u32 tmp; u32 i; @@ -411,7 +411,7 @@ static void gfxhub_v1_0_set_fault_enable_default(struct amdgpu_device *adev, static void gfxhub_v1_0_init(struct amdgpu_device *adev) { - struct amdgpu_vmhub *hub = &adev->vmhub[AMDGPU_GFXHUB_0]; + struct amdgpu_vmhub *hub = &adev->vmhub[AMDGPU_GFXHUB(0)]; hub->ctx0_ptb_addr_lo32 = SOC15_REG_OFFSET(GC, 0, diff --git a/drivers/gpu/drm/amd/amdgpu/gfxhub_v1_2.c b/drivers/gpu/drm/amd/amdgpu/gfxhub_v1_2.c index 79af32bb078c..9c385ce3a8c4 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfxhub_v1_2.c +++ b/drivers/gpu/drm/amd/amdgpu/gfxhub_v1_2.c @@ -42,7 +42,7 @@ static void gfxhub_v1_2_setup_vm_pt_regs(struct amdgpu_device *adev, uint32_t vmid, uint64_t page_table_base) { - struct amdgpu_vmhub *hub = &adev->vmhub[AMDGPU_GFXHUB_0]; + struct amdgpu_vmhub *hub = &adev->vmhub[AMDGPU_GFXHUB(0)]; int i; for (i = 0; i < adev->gfx.num_xcd; i++) { @@ -291,7 +291,7 @@ static void gfxhub_v1_2_disable_identity_aperture(struct amdgpu_device *adev) static void gfxhub_v1_2_setup_vmid_config(struct amdgpu_device *adev) { - struct amdgpu_vmhub *hub = &adev->vmhub[AMDGPU_GFXHUB_0]; + struct amdgpu_vmhub *hub = &adev->vmhub[AMDGPU_GFXHUB(0)]; unsigned num_level, block_size; uint32_t tmp; int i, j; @@ -357,7 +357,7 @@ static void gfxhub_v1_2_setup_vmid_config(struct amdgpu_device *adev) static void gfxhub_v1_2_program_invalidation(struct amdgpu_device *adev) { - struct amdgpu_vmhub *hub = &adev->vmhub[AMDGPU_GFXHUB_0]; + struct amdgpu_vmhub *hub = &adev->vmhub[AMDGPU_GFXHUB(0)]; unsigned i, j; for (j = 0; j < adev->gfx.num_xcd; j++) { @@ -406,7 +406,7 @@ static int gfxhub_v1_2_gart_enable(struct amdgpu_device *adev) static void gfxhub_v1_2_gart_disable(struct amdgpu_device *adev) { - struct amdgpu_vmhub *hub = &adev->vmhub[AMDGPU_GFXHUB_0]; + struct amdgpu_vmhub *hub = &adev->vmhub[AMDGPU_GFXHUB(0)]; u32 tmp; u32 i, j; @@ -483,7 +483,7 @@ static void gfxhub_v1_2_set_fault_enable_default(struct amdgpu_device *adev, static void gfxhub_v1_2_init(struct amdgpu_device *adev) { - struct amdgpu_vmhub *hub = &adev->vmhub[AMDGPU_GFXHUB_0]; + struct amdgpu_vmhub *hub = &adev->vmhub[AMDGPU_GFXHUB(0)]; hub->ctx0_ptb_addr_lo32 = SOC15_REG_OFFSET(GC, 0, diff --git a/drivers/gpu/drm/amd/amdgpu/gfxhub_v2_0.c b/drivers/gpu/drm/amd/amdgpu/gfxhub_v2_0.c index 9b3a02527318..f173a61c6c15 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfxhub_v2_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfxhub_v2_0.c @@ -120,7 +120,7 @@ static u64 gfxhub_v2_0_get_mc_fb_offset(struct amdgpu_device *adev) static void gfxhub_v2_0_setup_vm_pt_regs(struct amdgpu_device *adev, uint32_t vmid, uint64_t page_table_base) { - struct amdgpu_vmhub *hub = &adev->vmhub[AMDGPU_GFXHUB_0]; + struct amdgpu_vmhub *hub = &adev->vmhub[AMDGPU_GFXHUB(0)]; WREG32_SOC15_OFFSET(GC, 0, mmGCVM_CONTEXT0_PAGE_TABLE_BASE_ADDR_LO32, hub->ctx_addr_distance * vmid, @@ -282,7 +282,7 @@ static void gfxhub_v2_0_disable_identity_aperture(struct amdgpu_device *adev) static void gfxhub_v2_0_setup_vmid_config(struct amdgpu_device *adev) { - struct amdgpu_vmhub *hub = &adev->vmhub[AMDGPU_GFXHUB_0]; + struct amdgpu_vmhub *hub = &adev->vmhub[AMDGPU_GFXHUB(0)]; int i; uint32_t tmp; @@ -331,7 +331,7 @@ static void gfxhub_v2_0_setup_vmid_config(struct amdgpu_device *adev) static void gfxhub_v2_0_program_invalidation(struct amdgpu_device *adev) { - struct amdgpu_vmhub *hub = &adev->vmhub[AMDGPU_GFXHUB_0]; + struct amdgpu_vmhub *hub = &adev->vmhub[AMDGPU_GFXHUB(0)]; unsigned i; for (i = 0 ; i < 18; ++i) { @@ -360,7 +360,7 @@ static int gfxhub_v2_0_gart_enable(struct amdgpu_device *adev) static void gfxhub_v2_0_gart_disable(struct amdgpu_device *adev) { - struct amdgpu_vmhub *hub = &adev->vmhub[AMDGPU_GFXHUB_0]; + struct amdgpu_vmhub *hub = &adev->vmhub[AMDGPU_GFXHUB(0)]; u32 tmp; u32 i; @@ -433,7 +433,7 @@ static const struct amdgpu_vmhub_funcs gfxhub_v2_0_vmhub_funcs = { static void gfxhub_v2_0_init(struct amdgpu_device *adev) { - struct amdgpu_vmhub *hub = &adev->vmhub[AMDGPU_GFXHUB_0]; + struct amdgpu_vmhub *hub = &adev->vmhub[AMDGPU_GFXHUB(0)]; hub->ctx0_ptb_addr_lo32 = SOC15_REG_OFFSET(GC, 0, diff --git a/drivers/gpu/drm/amd/amdgpu/gfxhub_v2_1.c b/drivers/gpu/drm/amd/amdgpu/gfxhub_v2_1.c index 4aacbbec31e2..d8fc3e8088cd 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfxhub_v2_1.c +++ b/drivers/gpu/drm/amd/amdgpu/gfxhub_v2_1.c @@ -123,7 +123,7 @@ static u64 gfxhub_v2_1_get_mc_fb_offset(struct amdgpu_device *adev) static void gfxhub_v2_1_setup_vm_pt_regs(struct amdgpu_device *adev, uint32_t vmid, uint64_t page_table_base) { - struct amdgpu_vmhub *hub = &adev->vmhub[AMDGPU_GFXHUB_0]; + struct amdgpu_vmhub *hub = &adev->vmhub[AMDGPU_GFXHUB(0)]; WREG32_SOC15_OFFSET(GC, 0, mmGCVM_CONTEXT0_PAGE_TABLE_BASE_ADDR_LO32, hub->ctx_addr_distance * vmid, @@ -291,7 +291,7 @@ static void gfxhub_v2_1_disable_identity_aperture(struct amdgpu_device *adev) static void gfxhub_v2_1_setup_vmid_config(struct amdgpu_device *adev) { - struct amdgpu_vmhub *hub = &adev->vmhub[AMDGPU_GFXHUB_0]; + struct amdgpu_vmhub *hub = &adev->vmhub[AMDGPU_GFXHUB(0)]; int i; uint32_t tmp; @@ -340,7 +340,7 @@ static void gfxhub_v2_1_setup_vmid_config(struct amdgpu_device *adev) static void gfxhub_v2_1_program_invalidation(struct amdgpu_device *adev) { - struct amdgpu_vmhub *hub = &adev->vmhub[AMDGPU_GFXHUB_0]; + struct amdgpu_vmhub *hub = &adev->vmhub[AMDGPU_GFXHUB(0)]; unsigned i; for (i = 0 ; i < 18; ++i) { @@ -381,7 +381,7 @@ static int gfxhub_v2_1_gart_enable(struct amdgpu_device *adev) static void gfxhub_v2_1_gart_disable(struct amdgpu_device *adev) { - struct amdgpu_vmhub *hub = &adev->vmhub[AMDGPU_GFXHUB_0]; + struct amdgpu_vmhub *hub = &adev->vmhub[AMDGPU_GFXHUB(0)]; u32 tmp; u32 i; @@ -462,7 +462,7 @@ static const struct amdgpu_vmhub_funcs gfxhub_v2_1_vmhub_funcs = { static void gfxhub_v2_1_init(struct amdgpu_device *adev) { - struct amdgpu_vmhub *hub = &adev->vmhub[AMDGPU_GFXHUB_0]; + struct amdgpu_vmhub *hub = &adev->vmhub[AMDGPU_GFXHUB(0)]; hub->ctx0_ptb_addr_lo32 = SOC15_REG_OFFSET(GC, 0, @@ -651,7 +651,7 @@ static void gfxhub_v2_1_restore_regs(struct amdgpu_device *adev) static void gfxhub_v2_1_halt(struct amdgpu_device *adev) { - struct amdgpu_vmhub *hub = &adev->vmhub[AMDGPU_GFXHUB_0]; + struct amdgpu_vmhub *hub = &adev->vmhub[AMDGPU_GFXHUB(0)]; int i; uint32_t tmp; int time = 1000; diff --git a/drivers/gpu/drm/amd/amdgpu/gfxhub_v3_0.c b/drivers/gpu/drm/amd/amdgpu/gfxhub_v3_0.c index 13712640fa46..c53147f9c9fc 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfxhub_v3_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfxhub_v3_0.c @@ -119,7 +119,7 @@ static u64 gfxhub_v3_0_get_mc_fb_offset(struct amdgpu_device *adev) static void gfxhub_v3_0_setup_vm_pt_regs(struct amdgpu_device *adev, uint32_t vmid, uint64_t page_table_base) { - struct amdgpu_vmhub *hub = &adev->vmhub[AMDGPU_GFXHUB_0]; + struct amdgpu_vmhub *hub = &adev->vmhub[AMDGPU_GFXHUB(0)]; WREG32_SOC15_OFFSET(GC, 0, regGCVM_CONTEXT0_PAGE_TABLE_BASE_ADDR_LO32, hub->ctx_addr_distance * vmid, @@ -290,7 +290,7 @@ static void gfxhub_v3_0_disable_identity_aperture(struct amdgpu_device *adev) static void gfxhub_v3_0_setup_vmid_config(struct amdgpu_device *adev) { - struct amdgpu_vmhub *hub = &adev->vmhub[AMDGPU_GFXHUB_0]; + struct amdgpu_vmhub *hub = &adev->vmhub[AMDGPU_GFXHUB(0)]; int i; uint32_t tmp; @@ -339,7 +339,7 @@ static void gfxhub_v3_0_setup_vmid_config(struct amdgpu_device *adev) static void gfxhub_v3_0_program_invalidation(struct amdgpu_device *adev) { - struct amdgpu_vmhub *hub = &adev->vmhub[AMDGPU_GFXHUB_0]; + struct amdgpu_vmhub *hub = &adev->vmhub[AMDGPU_GFXHUB(0)]; unsigned i; for (i = 0 ; i < 18; ++i) { @@ -380,7 +380,7 @@ static int gfxhub_v3_0_gart_enable(struct amdgpu_device *adev) static void gfxhub_v3_0_gart_disable(struct amdgpu_device *adev) { - struct amdgpu_vmhub *hub = &adev->vmhub[AMDGPU_GFXHUB_0]; + struct amdgpu_vmhub *hub = &adev->vmhub[AMDGPU_GFXHUB(0)]; u32 tmp; u32 i; @@ -463,7 +463,7 @@ static const struct amdgpu_vmhub_funcs gfxhub_v3_0_vmhub_funcs = { static void gfxhub_v3_0_init(struct amdgpu_device *adev) { - struct amdgpu_vmhub *hub = &adev->vmhub[AMDGPU_GFXHUB_0]; + struct amdgpu_vmhub *hub = &adev->vmhub[AMDGPU_GFXHUB(0)]; hub->ctx0_ptb_addr_lo32 = SOC15_REG_OFFSET(GC, 0, diff --git a/drivers/gpu/drm/amd/amdgpu/gfxhub_v3_0_3.c b/drivers/gpu/drm/amd/amdgpu/gfxhub_v3_0_3.c index 6e0bd628c889..ae777487d72e 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfxhub_v3_0_3.c +++ b/drivers/gpu/drm/amd/amdgpu/gfxhub_v3_0_3.c @@ -122,7 +122,7 @@ static u64 gfxhub_v3_0_3_get_mc_fb_offset(struct amdgpu_device *adev) static void gfxhub_v3_0_3_setup_vm_pt_regs(struct amdgpu_device *adev, uint32_t vmid, uint64_t page_table_base) { - struct amdgpu_vmhub *hub = &adev->vmhub[AMDGPU_GFXHUB_0]; + struct amdgpu_vmhub *hub = &adev->vmhub[AMDGPU_GFXHUB(0)]; WREG32_SOC15_OFFSET(GC, 0, regGCVM_CONTEXT0_PAGE_TABLE_BASE_ADDR_LO32, hub->ctx_addr_distance * vmid, @@ -295,7 +295,7 @@ static void gfxhub_v3_0_3_disable_identity_aperture(struct amdgpu_device *adev) static void gfxhub_v3_0_3_setup_vmid_config(struct amdgpu_device *adev) { - struct amdgpu_vmhub *hub = &adev->vmhub[AMDGPU_GFXHUB_0]; + struct amdgpu_vmhub *hub = &adev->vmhub[AMDGPU_GFXHUB(0)]; int i; uint32_t tmp; @@ -344,7 +344,7 @@ static void gfxhub_v3_0_3_setup_vmid_config(struct amdgpu_device *adev) static void gfxhub_v3_0_3_program_invalidation(struct amdgpu_device *adev) { - struct amdgpu_vmhub *hub = &adev->vmhub[AMDGPU_GFXHUB_0]; + struct amdgpu_vmhub *hub = &adev->vmhub[AMDGPU_GFXHUB(0)]; unsigned i; for (i = 0 ; i < 18; ++i) { @@ -373,7 +373,7 @@ static int gfxhub_v3_0_3_gart_enable(struct amdgpu_device *adev) static void gfxhub_v3_0_3_gart_disable(struct amdgpu_device *adev) { - struct amdgpu_vmhub *hub = &adev->vmhub[AMDGPU_GFXHUB_0]; + struct amdgpu_vmhub *hub = &adev->vmhub[AMDGPU_GFXHUB(0)]; u32 tmp; u32 i; @@ -451,7 +451,7 @@ static const struct amdgpu_vmhub_funcs gfxhub_v3_0_3_vmhub_funcs = { static void gfxhub_v3_0_3_init(struct amdgpu_device *adev) { - struct amdgpu_vmhub *hub = &adev->vmhub[AMDGPU_GFXHUB_0]; + struct amdgpu_vmhub *hub = &adev->vmhub[AMDGPU_GFXHUB(0)]; hub->ctx0_ptb_addr_lo32 = SOC15_REG_OFFSET(GC, 0, diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c index 5697b66bf0de..ea2a448147e3 100644 --- a/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c @@ -76,7 +76,7 @@ gmc_v10_0_vm_fault_interrupt_state(struct amdgpu_device *adev, switch (state) { case AMDGPU_IRQ_STATE_DISABLE: /* MM HUB */ - amdgpu_gmc_set_vm_fault_masks(adev, AMDGPU_MMHUB_0, false); + amdgpu_gmc_set_vm_fault_masks(adev, AMDGPU_MMHUB0(0), false); /* GFX HUB */ /* This works because this interrupt is only * enabled at init/resume and disabled in @@ -84,11 +84,11 @@ gmc_v10_0_vm_fault_interrupt_state(struct amdgpu_device *adev, * change over the course of suspend/resume. */ if (!adev->in_s0ix) - amdgpu_gmc_set_vm_fault_masks(adev, AMDGPU_GFXHUB_0, false); + amdgpu_gmc_set_vm_fault_masks(adev, AMDGPU_GFXHUB(0), false); break; case AMDGPU_IRQ_STATE_ENABLE: /* MM HUB */ - amdgpu_gmc_set_vm_fault_masks(adev, AMDGPU_MMHUB_0, true); + amdgpu_gmc_set_vm_fault_masks(adev, AMDGPU_MMHUB0(0), true); /* GFX HUB */ /* This works because this interrupt is only * enabled at init/resume and disabled in @@ -96,7 +96,7 @@ gmc_v10_0_vm_fault_interrupt_state(struct amdgpu_device *adev, * change over the course of suspend/resume. */ if (!adev->in_s0ix) - amdgpu_gmc_set_vm_fault_masks(adev, AMDGPU_GFXHUB_0, true); + amdgpu_gmc_set_vm_fault_masks(adev, AMDGPU_GFXHUB(0), true); break; default: break; @@ -149,7 +149,7 @@ static int gmc_v10_0_process_interrupt(struct amdgpu_device *adev, * be updated to avoid reading an incorrect value due to * the new fast GRBM interface. */ - if ((entry->vmid_src == AMDGPU_GFXHUB_0) && + if ((entry->vmid_src == AMDGPU_GFXHUB(0)) && (adev->ip_versions[GC_HWIP][0] < IP_VERSION(10, 3, 0))) RREG32(hub->vm_l2_pro_fault_status); @@ -212,8 +212,7 @@ static void gmc_v10_0_set_irq_funcs(struct amdgpu_device *adev) static bool gmc_v10_0_use_invalidate_semaphore(struct amdgpu_device *adev, uint32_t vmhub) { - return ((vmhub == AMDGPU_MMHUB_0 || - vmhub == AMDGPU_MMHUB_1) && + return ((vmhub == AMDGPU_MMHUB0(0)) && (!amdgpu_sriov_vf(adev))); } @@ -249,7 +248,7 @@ static void gmc_v10_0_flush_vm_hub(struct amdgpu_device *adev, uint32_t vmid, unsigned int i; unsigned char hub_ip = 0; - hub_ip = (vmhub == AMDGPU_GFXHUB_0) ? + hub_ip = (vmhub == AMDGPU_GFXHUB(0)) ? GC_HWIP : MMHUB_HWIP; spin_lock(&adev->gmc.invalidate_lock); @@ -284,7 +283,7 @@ static void gmc_v10_0_flush_vm_hub(struct amdgpu_device *adev, uint32_t vmid, * Issue a dummy read to wait for the ACK register to be cleared * to avoid a false ACK due to the new fast GRBM interface. */ - if ((vmhub == AMDGPU_GFXHUB_0) && + if ((vmhub == AMDGPU_GFXHUB(0)) && (adev->ip_versions[GC_HWIP][0] < IP_VERSION(10, 3, 0))) RREG32_RLC_NO_KIQ(hub->vm_inv_eng0_req + hub->eng_distance * eng, hub_ip); @@ -361,19 +360,19 @@ static void gmc_v10_0_flush_gpu_tlb(struct amdgpu_device *adev, uint32_t vmid, mutex_lock(&adev->mman.gtt_window_lock); - if (vmhub == AMDGPU_MMHUB_0) { - gmc_v10_0_flush_vm_hub(adev, vmid, AMDGPU_MMHUB_0, 0); + if (vmhub == AMDGPU_MMHUB0(0)) { + gmc_v10_0_flush_vm_hub(adev, vmid, AMDGPU_MMHUB0(0), 0); mutex_unlock(&adev->mman.gtt_window_lock); return; } - BUG_ON(vmhub != AMDGPU_GFXHUB_0); + BUG_ON(vmhub != AMDGPU_GFXHUB(0)); if (!adev->mman.buffer_funcs_enabled || !adev->ib_pool_ready || amdgpu_in_reset(adev) || ring->sched.ready == false) { - gmc_v10_0_flush_vm_hub(adev, vmid, AMDGPU_GFXHUB_0, 0); + gmc_v10_0_flush_vm_hub(adev, vmid, AMDGPU_GFXHUB(0), 0); mutex_unlock(&adev->mman.gtt_window_lock); return; } @@ -466,7 +465,7 @@ static int gmc_v10_0_flush_gpu_tlb_pasid(struct amdgpu_device *adev, i, flush_type); } else { gmc_v10_0_flush_gpu_tlb(adev, vmid, - AMDGPU_GFXHUB_0, flush_type); + AMDGPU_GFXHUB(0), flush_type); } if (!adev->enable_mes) break; @@ -534,7 +533,7 @@ static void gmc_v10_0_emit_pasid_mapping(struct amdgpu_ring *ring, unsigned vmid if (ring->is_mes_queue) return; - if (ring->vm_hub == AMDGPU_GFXHUB_0) + if (ring->vm_hub == AMDGPU_GFXHUB(0)) reg = SOC15_REG_OFFSET(OSSSYS, 0, mmIH_VMID_0_LUT) + vmid; else reg = SOC15_REG_OFFSET(OSSSYS, 0, mmIH_VMID_0_LUT_MM) + vmid; @@ -1075,9 +1074,9 @@ static int gmc_v10_0_gart_enable(struct amdgpu_device *adev) if (!adev->in_s0ix) adev->gfxhub.funcs->set_fault_enable_default(adev, value); adev->mmhub.funcs->set_fault_enable_default(adev, value); - gmc_v10_0_flush_gpu_tlb(adev, 0, AMDGPU_MMHUB_0, 0); + gmc_v10_0_flush_gpu_tlb(adev, 0, AMDGPU_MMHUB0(0), 0); if (!adev->in_s0ix) - gmc_v10_0_flush_gpu_tlb(adev, 0, AMDGPU_GFXHUB_0, 0); + gmc_v10_0_flush_gpu_tlb(adev, 0, AMDGPU_GFXHUB(0), 0); DRM_INFO("PCIE GART of %uM enabled (table at 0x%016llX).\n", (unsigned)(adev->gmc.gart_size >> 20), diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v11_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v11_0.c index 2f570fb5febe..fb2ac31cbba7 100644 --- a/drivers/gpu/drm/amd/amdgpu/gmc_v11_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v11_0.c @@ -64,7 +64,7 @@ gmc_v11_0_vm_fault_interrupt_state(struct amdgpu_device *adev, switch (state) { case AMDGPU_IRQ_STATE_DISABLE: /* MM HUB */ - amdgpu_gmc_set_vm_fault_masks(adev, AMDGPU_MMHUB_0, false); + amdgpu_gmc_set_vm_fault_masks(adev, AMDGPU_MMHUB0(0), false); /* GFX HUB */ /* This works because this interrupt is only * enabled at init/resume and disabled in @@ -72,11 +72,11 @@ gmc_v11_0_vm_fault_interrupt_state(struct amdgpu_device *adev, * change over the course of suspend/resume. */ if (!adev->in_s0ix) - amdgpu_gmc_set_vm_fault_masks(adev, AMDGPU_GFXHUB_0, false); + amdgpu_gmc_set_vm_fault_masks(adev, AMDGPU_GFXHUB(0), false); break; case AMDGPU_IRQ_STATE_ENABLE: /* MM HUB */ - amdgpu_gmc_set_vm_fault_masks(adev, AMDGPU_MMHUB_0, true); + amdgpu_gmc_set_vm_fault_masks(adev, AMDGPU_MMHUB0(0), true); /* GFX HUB */ /* This works because this interrupt is only * enabled at init/resume and disabled in @@ -84,7 +84,7 @@ gmc_v11_0_vm_fault_interrupt_state(struct amdgpu_device *adev, * change over the course of suspend/resume. */ if (!adev->in_s0ix) - amdgpu_gmc_set_vm_fault_masks(adev, AMDGPU_GFXHUB_0, true); + amdgpu_gmc_set_vm_fault_masks(adev, AMDGPU_GFXHUB(0), true); break; default: break; @@ -110,7 +110,7 @@ static int gmc_v11_0_process_interrupt(struct amdgpu_device *adev, * be updated to avoid reading an incorrect value due to * the new fast GRBM interface. */ - if (entry->vmid_src == AMDGPU_GFXHUB_0) + if (entry->vmid_src == AMDGPU_GFXHUB(0)) RREG32(hub->vm_l2_pro_fault_status); status = RREG32(hub->vm_l2_pro_fault_status); @@ -170,7 +170,7 @@ static void gmc_v11_0_set_irq_funcs(struct amdgpu_device *adev) static bool gmc_v11_0_use_invalidate_semaphore(struct amdgpu_device *adev, uint32_t vmhub) { - return ((vmhub == AMDGPU_MMHUB_0) && + return ((vmhub == AMDGPU_MMHUB0(0)) && (!amdgpu_sriov_vf(adev))); } @@ -202,7 +202,7 @@ static void gmc_v11_0_flush_vm_hub(struct amdgpu_device *adev, uint32_t vmid, unsigned int i; unsigned char hub_ip = 0; - hub_ip = (vmhub == AMDGPU_GFXHUB_0) ? + hub_ip = (vmhub == AMDGPU_GFXHUB(0)) ? GC_HWIP : MMHUB_HWIP; spin_lock(&adev->gmc.invalidate_lock); @@ -251,7 +251,7 @@ static void gmc_v11_0_flush_vm_hub(struct amdgpu_device *adev, uint32_t vmid, hub->eng_distance * eng, 0, hub_ip); /* Issue additional private vm invalidation to MMHUB */ - if ((vmhub != AMDGPU_GFXHUB_0) && + if ((vmhub != AMDGPU_GFXHUB(0)) && (hub->vm_l2_bank_select_reserved_cid2) && !amdgpu_sriov_vf(adev)) { inv_req = RREG32_NO_KIQ(hub->vm_l2_bank_select_reserved_cid2); @@ -284,7 +284,7 @@ static void gmc_v11_0_flush_vm_hub(struct amdgpu_device *adev, uint32_t vmid, static void gmc_v11_0_flush_gpu_tlb(struct amdgpu_device *adev, uint32_t vmid, uint32_t vmhub, uint32_t flush_type) { - if ((vmhub == AMDGPU_GFXHUB_0) && !adev->gfx.is_poweron) + if ((vmhub == AMDGPU_GFXHUB(0)) && !adev->gfx.is_poweron) return; /* flush hdp cache */ @@ -369,7 +369,7 @@ static int gmc_v11_0_flush_gpu_tlb_pasid(struct amdgpu_device *adev, i, flush_type); } else { gmc_v11_0_flush_gpu_tlb(adev, vmid, - AMDGPU_GFXHUB_0, flush_type); + AMDGPU_GFXHUB(0), flush_type); } } } @@ -435,7 +435,7 @@ static void gmc_v11_0_emit_pasid_mapping(struct amdgpu_ring *ring, unsigned vmid if (ring->is_mes_queue) return; - if (ring->vm_hub == AMDGPU_GFXHUB_0) + if (ring->vm_hub == AMDGPU_GFXHUB(0)) reg = SOC15_REG_OFFSET(OSSSYS, 0, regIH_VMID_0_LUT) + vmid; else reg = SOC15_REG_OFFSET(OSSSYS, 0, regIH_VMID_0_LUT_MM) + vmid; @@ -886,7 +886,7 @@ static int gmc_v11_0_sw_fini(void *handle) static void gmc_v11_0_init_golden_registers(struct amdgpu_device *adev) { if (amdgpu_sriov_vf(adev)) { - struct amdgpu_vmhub *hub = &adev->vmhub[AMDGPU_MMHUB_0]; + struct amdgpu_vmhub *hub = &adev->vmhub[AMDGPU_MMHUB0(0)]; WREG32(hub->vm_contexts_disable, 0); return; @@ -921,7 +921,7 @@ static int gmc_v11_0_gart_enable(struct amdgpu_device *adev) false : true; adev->mmhub.funcs->set_fault_enable_default(adev, value); - gmc_v11_0_flush_gpu_tlb(adev, 0, AMDGPU_MMHUB_0, 0); + gmc_v11_0_flush_gpu_tlb(adev, 0, AMDGPU_MMHUB0(0), 0); DRM_INFO("PCIE GART of %uM enabled (table at 0x%016llX).\n", (unsigned)(adev->gmc.gart_size >> 20), diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c index 6ae5cee9b64b..193ba4d912a6 100644 --- a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c @@ -491,20 +491,20 @@ static int gmc_v9_0_vm_fault_interrupt_state(struct amdgpu_device *adev, * fini/suspend, so the overall state doesn't * change over the course of suspend/resume. */ - if (adev->in_s0ix && (j == AMDGPU_GFXHUB_0)) + if (adev->in_s0ix && (j == AMDGPU_GFXHUB(0))) continue; - if (j == AMDGPU_GFXHUB_0) - tmp = RREG32_SOC15_IP(GC, reg); - else + if (j >= AMDGPU_MMHUB0(0)) tmp = RREG32_SOC15_IP(MMHUB, reg); + else + tmp = RREG32_SOC15_IP(GC, reg); tmp &= ~bits; - if (j == AMDGPU_GFXHUB_0) - WREG32_SOC15_IP(GC, reg, tmp); - else + if (j >= AMDGPU_MMHUB0(0)) WREG32_SOC15_IP(MMHUB, reg, tmp); + else + WREG32_SOC15_IP(GC, reg, tmp); } } break; @@ -519,20 +519,20 @@ static int gmc_v9_0_vm_fault_interrupt_state(struct amdgpu_device *adev, * fini/suspend, so the overall state doesn't * change over the course of suspend/resume. */ - if (adev->in_s0ix && (j == AMDGPU_GFXHUB_0)) + if (adev->in_s0ix && (j == AMDGPU_GFXHUB(0))) continue; - if (j == AMDGPU_GFXHUB_0) - tmp = RREG32_SOC15_IP(GC, reg); - else + if (j >= AMDGPU_MMHUB0(0)) tmp = RREG32_SOC15_IP(MMHUB, reg); + else + tmp = RREG32_SOC15_IP(GC, reg); tmp |= bits; - if (j == AMDGPU_GFXHUB_0) - WREG32_SOC15_IP(GC, reg, tmp); - else + if (j >= AMDGPU_MMHUB0(0)) WREG32_SOC15_IP(MMHUB, reg, tmp); + else + WREG32_SOC15_IP(GC, reg, tmp); } } break; @@ -605,13 +605,13 @@ static int gmc_v9_0_process_interrupt(struct amdgpu_device *adev, if (entry->client_id == SOC15_IH_CLIENTID_VMC) { hub_name = "mmhub0"; - hub = &adev->vmhub[AMDGPU_MMHUB_0]; + hub = &adev->vmhub[AMDGPU_MMHUB0(0)]; } else if (entry->client_id == SOC15_IH_CLIENTID_VMC1) { hub_name = "mmhub1"; - hub = &adev->vmhub[AMDGPU_MMHUB_1]; + hub = &adev->vmhub[AMDGPU_MMHUB1(0)]; } else { hub_name = "gfxhub0"; - hub = &adev->vmhub[AMDGPU_GFXHUB_0]; + hub = &adev->vmhub[AMDGPU_GFXHUB(0)]; } memset(&task_info, 0, sizeof(struct amdgpu_task_info)); @@ -636,7 +636,7 @@ static int gmc_v9_0_process_interrupt(struct amdgpu_device *adev, * be updated to avoid reading an incorrect value due to * the new fast GRBM interface. */ - if ((entry->vmid_src == AMDGPU_GFXHUB_0) && + if ((entry->vmid_src == AMDGPU_GFXHUB(0)) && (adev->ip_versions[GC_HWIP][0] < IP_VERSION(9, 4, 2))) RREG32(hub->vm_l2_pro_fault_status); @@ -649,7 +649,7 @@ static int gmc_v9_0_process_interrupt(struct amdgpu_device *adev, dev_err(adev->dev, "VM_L2_PROTECTION_FAULT_STATUS:0x%08X\n", status); - if (hub == &adev->vmhub[AMDGPU_GFXHUB_0]) { + if (hub == &adev->vmhub[AMDGPU_GFXHUB(0)]) { dev_err(adev->dev, "\t Faulty UTCL2 client ID: %s (0x%x)\n", cid >= ARRAY_SIZE(gfxhub_client_ids) ? "unknown" : gfxhub_client_ids[cid], @@ -759,8 +759,8 @@ static bool gmc_v9_0_use_invalidate_semaphore(struct amdgpu_device *adev, adev->ip_versions[GC_HWIP][0] == IP_VERSION(9, 4, 3)) return false; - return ((vmhub == AMDGPU_MMHUB_0 || - vmhub == AMDGPU_MMHUB_1) && + return ((vmhub == AMDGPU_MMHUB0(0) || + vmhub == AMDGPU_MMHUB1(0)) && (!amdgpu_sriov_vf(adev)) && (!(!(adev->apu_flags & AMD_APU_IS_RAVEN2) && (adev->apu_flags & AMD_APU_IS_PICASSO)))); @@ -849,11 +849,10 @@ static void gmc_v9_0_flush_gpu_tlb(struct amdgpu_device *adev, uint32_t vmid, if (use_semaphore) { for (j = 0; j < adev->usec_timeout; j++) { /* a read return value of 1 means semaphore acquire */ - if (vmhub == AMDGPU_GFXHUB_0) - tmp = RREG32_SOC15_IP_NO_KIQ(GC, hub->vm_inv_eng0_sem + hub->eng_distance * eng); - else + if (vmhub >= AMDGPU_MMHUB0(0)) tmp = RREG32_SOC15_IP_NO_KIQ(MMHUB, hub->vm_inv_eng0_sem + hub->eng_distance * eng); - + else + tmp = RREG32_SOC15_IP_NO_KIQ(GC, hub->vm_inv_eng0_sem + hub->eng_distance * eng); if (tmp & 0x1) break; udelay(1); @@ -864,27 +863,26 @@ static void gmc_v9_0_flush_gpu_tlb(struct amdgpu_device *adev, uint32_t vmid, } do { - if (vmhub == AMDGPU_GFXHUB_0) - WREG32_SOC15_IP_NO_KIQ(GC, hub->vm_inv_eng0_req + hub->eng_distance * eng, inv_req); - else + if (vmhub >= AMDGPU_MMHUB0(0)) WREG32_SOC15_IP_NO_KIQ(MMHUB, hub->vm_inv_eng0_req + hub->eng_distance * eng, inv_req); + else + WREG32_SOC15_IP_NO_KIQ(GC, hub->vm_inv_eng0_req + hub->eng_distance * eng, inv_req); /* * Issue a dummy read to wait for the ACK register to * be cleared to avoid a false ACK due to the new fast * GRBM interface. */ - if ((vmhub == AMDGPU_GFXHUB_0) && + if ((vmhub == AMDGPU_GFXHUB(0)) && (adev->ip_versions[GC_HWIP][0] < IP_VERSION(9, 4, 2))) RREG32_NO_KIQ(hub->vm_inv_eng0_req + hub->eng_distance * eng); for (j = 0; j < adev->usec_timeout; j++) { - if (vmhub == AMDGPU_GFXHUB_0) - tmp = RREG32_SOC15_IP_NO_KIQ(GC, hub->vm_inv_eng0_ack + hub->eng_distance * eng); - else + if (vmhub >= AMDGPU_MMHUB0(0)) tmp = RREG32_SOC15_IP_NO_KIQ(MMHUB, hub->vm_inv_eng0_ack + hub->eng_distance * eng); - + else + tmp = RREG32_SOC15_IP_NO_KIQ(GC, hub->vm_inv_eng0_ack + hub->eng_distance * eng); if (tmp & (1 << vmid)) break; udelay(1); @@ -900,10 +898,10 @@ static void gmc_v9_0_flush_gpu_tlb(struct amdgpu_device *adev, uint32_t vmid, * add semaphore release after invalidation, * write with 0 means semaphore release */ - if (vmhub == AMDGPU_GFXHUB_0) - WREG32_SOC15_IP_NO_KIQ(GC, hub->vm_inv_eng0_sem + hub->eng_distance * eng, 0); + if (vmhub >= AMDGPU_MMHUB0(0)) + WREG32_SOC15_IP_NO_KIQ(MMHUB, hub->vm_inv_eng0_req + hub->eng_distance * eng, inv_req); else - WREG32_SOC15_IP_NO_KIQ(MMHUB, hub->vm_inv_eng0_sem + hub->eng_distance * eng, 0); + WREG32_SOC15_IP_NO_KIQ(GC, hub->vm_inv_eng0_req + hub->eng_distance * eng, inv_req); } spin_unlock(&adev->gmc.invalidate_lock); @@ -994,7 +992,7 @@ static int gmc_v9_0_flush_gpu_tlb_pasid(struct amdgpu_device *adev, i, flush_type); } else { gmc_v9_0_flush_gpu_tlb(adev, vmid, - AMDGPU_GFXHUB_0, flush_type); + AMDGPU_GFXHUB(0), flush_type); } break; } @@ -1060,10 +1058,10 @@ static void gmc_v9_0_emit_pasid_mapping(struct amdgpu_ring *ring, unsigned vmid, uint32_t reg; /* Do nothing because there's no lut register for mmhub1. */ - if (ring->vm_hub == AMDGPU_MMHUB_1) + if (ring->vm_hub == AMDGPU_MMHUB1(0)) return; - if (ring->vm_hub == AMDGPU_GFXHUB_0) + if (ring->vm_hub == AMDGPU_GFXHUB(0)) reg = SOC15_REG_OFFSET(OSSSYS, 0, mmIH_VMID_0_LUT) + vmid; else reg = SOC15_REG_OFFSET(OSSSYS, 0, mmIH_VMID_0_LUT_MM) + vmid; @@ -1947,7 +1945,7 @@ static int gmc_v9_0_hw_init(void *handle) adev->mmhub.funcs->set_fault_enable_default(adev, value); } for (i = 0; i < adev->num_vmhubs; ++i) { - if (adev->in_s0ix && (i == AMDGPU_GFXHUB_0)) + if (adev->in_s0ix && (i == AMDGPU_GFXHUB(0))) continue; gmc_v9_0_flush_gpu_tlb(adev, 0, i, 0); } diff --git a/drivers/gpu/drm/amd/amdgpu/jpeg_v1_0.c b/drivers/gpu/drm/amd/amdgpu/jpeg_v1_0.c index a3076eb8af6a..71fe7f6f9889 100644 --- a/drivers/gpu/drm/amd/amdgpu/jpeg_v1_0.c +++ b/drivers/gpu/drm/amd/amdgpu/jpeg_v1_0.c @@ -485,7 +485,7 @@ int jpeg_v1_0_sw_init(void *handle) return r; ring = &adev->jpeg.inst->ring_dec; - ring->vm_hub = AMDGPU_MMHUB_0; + ring->vm_hub = AMDGPU_MMHUB0(0); sprintf(ring->name, "jpeg_dec"); r = amdgpu_ring_init(adev, ring, 512, &adev->jpeg.inst->irq, 0, AMDGPU_RING_PRIO_DEFAULT, NULL); diff --git a/drivers/gpu/drm/amd/amdgpu/jpeg_v2_0.c b/drivers/gpu/drm/amd/amdgpu/jpeg_v2_0.c index 0eddf7c824a7..3a43e42f4834 100644 --- a/drivers/gpu/drm/amd/amdgpu/jpeg_v2_0.c +++ b/drivers/gpu/drm/amd/amdgpu/jpeg_v2_0.c @@ -86,7 +86,7 @@ static int jpeg_v2_0_sw_init(void *handle) ring = &adev->jpeg.inst->ring_dec; ring->use_doorbell = true; ring->doorbell_index = (adev->doorbell_index.vcn.vcn_ring0_1 << 1) + 1; - ring->vm_hub = AMDGPU_MMHUB_0; + ring->vm_hub = AMDGPU_MMHUB0(0); sprintf(ring->name, "jpeg_dec"); r = amdgpu_ring_init(adev, ring, 512, &adev->jpeg.inst->irq, 0, AMDGPU_RING_PRIO_DEFAULT, NULL); diff --git a/drivers/gpu/drm/amd/amdgpu/jpeg_v2_5.c b/drivers/gpu/drm/amd/amdgpu/jpeg_v2_5.c index b040f51d9aa9..259b7ba6a842 100644 --- a/drivers/gpu/drm/amd/amdgpu/jpeg_v2_5.c +++ b/drivers/gpu/drm/amd/amdgpu/jpeg_v2_5.c @@ -128,9 +128,9 @@ static int jpeg_v2_5_sw_init(void *handle) ring = &adev->jpeg.inst[i].ring_dec; ring->use_doorbell = true; if (adev->ip_versions[UVD_HWIP][0] == IP_VERSION(2, 5, 0)) - ring->vm_hub = AMDGPU_MMHUB_1; + ring->vm_hub = AMDGPU_MMHUB1(0); else - ring->vm_hub = AMDGPU_MMHUB_0; + ring->vm_hub = AMDGPU_MMHUB0(0); ring->doorbell_index = (adev->doorbell_index.vcn.vcn_ring0_1 << 1) + 1 + 8 * i; sprintf(ring->name, "jpeg_dec_%d", i); r = amdgpu_ring_init(adev, ring, 512, &adev->jpeg.inst[i].irq, diff --git a/drivers/gpu/drm/amd/amdgpu/jpeg_v3_0.c b/drivers/gpu/drm/amd/amdgpu/jpeg_v3_0.c index 1c2292cc5f2c..c55386c22311 100644 --- a/drivers/gpu/drm/amd/amdgpu/jpeg_v3_0.c +++ b/drivers/gpu/drm/amd/amdgpu/jpeg_v3_0.c @@ -101,7 +101,7 @@ static int jpeg_v3_0_sw_init(void *handle) ring = &adev->jpeg.inst->ring_dec; ring->use_doorbell = true; ring->doorbell_index = (adev->doorbell_index.vcn.vcn_ring0_1 << 1) + 1; - ring->vm_hub = AMDGPU_MMHUB_0; + ring->vm_hub = AMDGPU_MMHUB0(0); sprintf(ring->name, "jpeg_dec"); r = amdgpu_ring_init(adev, ring, 512, &adev->jpeg.inst->irq, 0, AMDGPU_RING_PRIO_DEFAULT, NULL); diff --git a/drivers/gpu/drm/amd/amdgpu/jpeg_v4_0.c b/drivers/gpu/drm/amd/amdgpu/jpeg_v4_0.c index 77e1e64aa1d1..d7d5ffc29393 100644 --- a/drivers/gpu/drm/amd/amdgpu/jpeg_v4_0.c +++ b/drivers/gpu/drm/amd/amdgpu/jpeg_v4_0.c @@ -108,7 +108,7 @@ static int jpeg_v4_0_sw_init(void *handle) ring = &adev->jpeg.inst->ring_dec; ring->use_doorbell = true; ring->doorbell_index = amdgpu_sriov_vf(adev) ? (((adev->doorbell_index.vcn.vcn_ring0_1) << 1) + 4) : ((adev->doorbell_index.vcn.vcn_ring0_1 << 1) + 1); - ring->vm_hub = AMDGPU_MMHUB_0; + ring->vm_hub = AMDGPU_MMHUB0(0); sprintf(ring->name, "jpeg_dec"); r = amdgpu_ring_init(adev, ring, 512, &adev->jpeg.inst->irq, 0, diff --git a/drivers/gpu/drm/amd/amdgpu/mes_v10_1.c b/drivers/gpu/drm/amd/amdgpu/mes_v10_1.c index 4560476c7c31..f1a6abdad21b 100644 --- a/drivers/gpu/drm/amd/amdgpu/mes_v10_1.c +++ b/drivers/gpu/drm/amd/amdgpu/mes_v10_1.c @@ -149,7 +149,7 @@ static int mes_v10_1_add_hw_queue(struct amdgpu_mes *mes, { struct amdgpu_device *adev = mes->adev; union MESAPI__ADD_QUEUE mes_add_queue_pkt; - struct amdgpu_vmhub *hub = &adev->vmhub[AMDGPU_GFXHUB_0]; + struct amdgpu_vmhub *hub = &adev->vmhub[AMDGPU_GFXHUB(0)]; uint32_t vm_cntx_cntl = hub->vm_cntx_cntl; memset(&mes_add_queue_pkt, 0, sizeof(mes_add_queue_pkt)); diff --git a/drivers/gpu/drm/amd/amdgpu/mes_v11_0.c b/drivers/gpu/drm/amd/amdgpu/mes_v11_0.c index 3adb450eec07..9791f3581786 100644 --- a/drivers/gpu/drm/amd/amdgpu/mes_v11_0.c +++ b/drivers/gpu/drm/amd/amdgpu/mes_v11_0.c @@ -164,7 +164,7 @@ static int mes_v11_0_add_hw_queue(struct amdgpu_mes *mes, { struct amdgpu_device *adev = mes->adev; union MESAPI__ADD_QUEUE mes_add_queue_pkt; - struct amdgpu_vmhub *hub = &adev->vmhub[AMDGPU_GFXHUB_0]; + struct amdgpu_vmhub *hub = &adev->vmhub[AMDGPU_GFXHUB(0)]; uint32_t vm_cntx_cntl = hub->vm_cntx_cntl; memset(&mes_add_queue_pkt, 0, sizeof(mes_add_queue_pkt)); diff --git a/drivers/gpu/drm/amd/amdgpu/mmhub_v1_0.c b/drivers/gpu/drm/amd/amdgpu/mmhub_v1_0.c index 15e7cbeae75b..fb91b31056ca 100644 --- a/drivers/gpu/drm/amd/amdgpu/mmhub_v1_0.c +++ b/drivers/gpu/drm/amd/amdgpu/mmhub_v1_0.c @@ -54,7 +54,7 @@ static u64 mmhub_v1_0_get_fb_location(struct amdgpu_device *adev) static void mmhub_v1_0_setup_vm_pt_regs(struct amdgpu_device *adev, uint32_t vmid, uint64_t page_table_base) { - struct amdgpu_vmhub *hub = &adev->vmhub[AMDGPU_MMHUB_0]; + struct amdgpu_vmhub *hub = &adev->vmhub[AMDGPU_MMHUB0(0)]; WREG32_SOC15_OFFSET(MMHUB, 0, mmVM_CONTEXT0_PAGE_TABLE_BASE_ADDR_LO32, hub->ctx_addr_distance * vmid, @@ -229,7 +229,7 @@ static void mmhub_v1_0_disable_identity_aperture(struct amdgpu_device *adev) static void mmhub_v1_0_setup_vmid_config(struct amdgpu_device *adev) { - struct amdgpu_vmhub *hub = &adev->vmhub[AMDGPU_MMHUB_0]; + struct amdgpu_vmhub *hub = &adev->vmhub[AMDGPU_MMHUB0(0)]; unsigned num_level, block_size; uint32_t tmp; int i; @@ -285,7 +285,7 @@ static void mmhub_v1_0_setup_vmid_config(struct amdgpu_device *adev) static void mmhub_v1_0_program_invalidation(struct amdgpu_device *adev) { - struct amdgpu_vmhub *hub = &adev->vmhub[AMDGPU_MMHUB_0]; + struct amdgpu_vmhub *hub = &adev->vmhub[AMDGPU_MMHUB0(0)]; unsigned i; for (i = 0; i < 18; ++i) { @@ -338,7 +338,7 @@ static int mmhub_v1_0_gart_enable(struct amdgpu_device *adev) static void mmhub_v1_0_gart_disable(struct amdgpu_device *adev) { - struct amdgpu_vmhub *hub = &adev->vmhub[AMDGPU_MMHUB_0]; + struct amdgpu_vmhub *hub = &adev->vmhub[AMDGPU_MMHUB0(0)]; u32 tmp; u32 i; @@ -415,7 +415,7 @@ static void mmhub_v1_0_set_fault_enable_default(struct amdgpu_device *adev, bool static void mmhub_v1_0_init(struct amdgpu_device *adev) { - struct amdgpu_vmhub *hub = &adev->vmhub[AMDGPU_MMHUB_0]; + struct amdgpu_vmhub *hub = &adev->vmhub[AMDGPU_MMHUB0(0)]; hub->ctx0_ptb_addr_lo32 = SOC15_REG_OFFSET(MMHUB, 0, diff --git a/drivers/gpu/drm/amd/amdgpu/mmhub_v1_7.c b/drivers/gpu/drm/amd/amdgpu/mmhub_v1_7.c index 73afbf2facc9..9086f2fdfaf4 100644 --- a/drivers/gpu/drm/amd/amdgpu/mmhub_v1_7.c +++ b/drivers/gpu/drm/amd/amdgpu/mmhub_v1_7.c @@ -54,7 +54,7 @@ static u64 mmhub_v1_7_get_fb_location(struct amdgpu_device *adev) static void mmhub_v1_7_setup_vm_pt_regs(struct amdgpu_device *adev, uint32_t vmid, uint64_t page_table_base) { - struct amdgpu_vmhub *hub = &adev->vmhub[AMDGPU_MMHUB_0]; + struct amdgpu_vmhub *hub = &adev->vmhub[AMDGPU_MMHUB0(0)]; WREG32_SOC15_OFFSET(MMHUB, 0, regVM_CONTEXT0_PAGE_TABLE_BASE_ADDR_LO32, hub->ctx_addr_distance * vmid, lower_32_bits(page_table_base)); @@ -261,7 +261,7 @@ static void mmhub_v1_7_disable_identity_aperture(struct amdgpu_device *adev) static void mmhub_v1_7_setup_vmid_config(struct amdgpu_device *adev) { - struct amdgpu_vmhub *hub = &adev->vmhub[AMDGPU_MMHUB_0]; + struct amdgpu_vmhub *hub = &adev->vmhub[AMDGPU_MMHUB0(0)]; unsigned num_level, block_size; uint32_t tmp; int i; @@ -319,7 +319,7 @@ static void mmhub_v1_7_setup_vmid_config(struct amdgpu_device *adev) static void mmhub_v1_7_program_invalidation(struct amdgpu_device *adev) { - struct amdgpu_vmhub *hub = &adev->vmhub[AMDGPU_MMHUB_0]; + struct amdgpu_vmhub *hub = &adev->vmhub[AMDGPU_MMHUB0(0)]; unsigned i; for (i = 0; i < 18; ++i) { @@ -348,7 +348,7 @@ static int mmhub_v1_7_gart_enable(struct amdgpu_device *adev) static void mmhub_v1_7_gart_disable(struct amdgpu_device *adev) { - struct amdgpu_vmhub *hub = &adev->vmhub[AMDGPU_MMHUB_0]; + struct amdgpu_vmhub *hub = &adev->vmhub[AMDGPU_MMHUB0(0)]; u32 tmp; u32 i; @@ -425,7 +425,7 @@ static void mmhub_v1_7_set_fault_enable_default(struct amdgpu_device *adev, bool static void mmhub_v1_7_init(struct amdgpu_device *adev) { - struct amdgpu_vmhub *hub = &adev->vmhub[AMDGPU_MMHUB_0]; + struct amdgpu_vmhub *hub = &adev->vmhub[AMDGPU_MMHUB0(0)]; hub->ctx0_ptb_addr_lo32 = SOC15_REG_OFFSET(MMHUB, 0, diff --git a/drivers/gpu/drm/amd/amdgpu/mmhub_v1_8.c b/drivers/gpu/drm/amd/amdgpu/mmhub_v1_8.c index 342d1702104c..9ec06f9db761 100644 --- a/drivers/gpu/drm/amd/amdgpu/mmhub_v1_8.c +++ b/drivers/gpu/drm/amd/amdgpu/mmhub_v1_8.c @@ -53,7 +53,7 @@ static u64 mmhub_v1_8_get_fb_location(struct amdgpu_device *adev) static void mmhub_v1_8_setup_vm_pt_regs(struct amdgpu_device *adev, uint32_t vmid, uint64_t page_table_base) { - struct amdgpu_vmhub *hub = &adev->vmhub[AMDGPU_MMHUB_0]; + struct amdgpu_vmhub *hub = &adev->vmhub[AMDGPU_MMHUB0(0)]; WREG32_SOC15_OFFSET(MMHUB, 0, regVM_CONTEXT0_PAGE_TABLE_BASE_ADDR_LO32, hub->ctx_addr_distance * vmid, lower_32_bits(page_table_base)); @@ -253,7 +253,7 @@ static void mmhub_v1_8_disable_identity_aperture(struct amdgpu_device *adev) static void mmhub_v1_8_setup_vmid_config(struct amdgpu_device *adev) { - struct amdgpu_vmhub *hub = &adev->vmhub[AMDGPU_MMHUB_0]; + struct amdgpu_vmhub *hub = &adev->vmhub[AMDGPU_MMHUB0(0)]; unsigned num_level, block_size; uint32_t tmp; int i; @@ -311,7 +311,7 @@ static void mmhub_v1_8_setup_vmid_config(struct amdgpu_device *adev) static void mmhub_v1_8_program_invalidation(struct amdgpu_device *adev) { - struct amdgpu_vmhub *hub = &adev->vmhub[AMDGPU_MMHUB_0]; + struct amdgpu_vmhub *hub = &adev->vmhub[AMDGPU_MMHUB0(0)]; unsigned i; for (i = 0; i < 18; ++i) { @@ -352,7 +352,7 @@ static int mmhub_v1_8_gart_enable(struct amdgpu_device *adev) static void mmhub_v1_8_gart_disable(struct amdgpu_device *adev) { - struct amdgpu_vmhub *hub = &adev->vmhub[AMDGPU_MMHUB_0]; + struct amdgpu_vmhub *hub = &adev->vmhub[AMDGPU_MMHUB0(0)]; u32 tmp; u32 i; @@ -426,7 +426,7 @@ static void mmhub_v1_8_set_fault_enable_default(struct amdgpu_device *adev, bool static void mmhub_v1_8_init(struct amdgpu_device *adev) { - struct amdgpu_vmhub *hub = &adev->vmhub[AMDGPU_MMHUB_0]; + struct amdgpu_vmhub *hub = &adev->vmhub[AMDGPU_MMHUB0(0)]; hub->ctx0_ptb_addr_lo32 = SOC15_REG_OFFSET(MMHUB, 0, diff --git a/drivers/gpu/drm/amd/amdgpu/mmhub_v2_0.c b/drivers/gpu/drm/amd/amdgpu/mmhub_v2_0.c index 278e32db878d..8f76c6ecf50a 100644 --- a/drivers/gpu/drm/amd/amdgpu/mmhub_v2_0.c +++ b/drivers/gpu/drm/amd/amdgpu/mmhub_v2_0.c @@ -187,7 +187,7 @@ mmhub_v2_0_print_l2_protection_fault_status(struct amdgpu_device *adev, static void mmhub_v2_0_setup_vm_pt_regs(struct amdgpu_device *adev, uint32_t vmid, uint64_t page_table_base) { - struct amdgpu_vmhub *hub = &adev->vmhub[AMDGPU_MMHUB_0]; + struct amdgpu_vmhub *hub = &adev->vmhub[AMDGPU_MMHUB0(0)]; WREG32_SOC15_OFFSET_RLC(MMHUB, 0, mmMMVM_CONTEXT0_PAGE_TABLE_BASE_ADDR_LO32, hub->ctx_addr_distance * vmid, @@ -362,7 +362,7 @@ static void mmhub_v2_0_disable_identity_aperture(struct amdgpu_device *adev) static void mmhub_v2_0_setup_vmid_config(struct amdgpu_device *adev) { - struct amdgpu_vmhub *hub = &adev->vmhub[AMDGPU_MMHUB_0]; + struct amdgpu_vmhub *hub = &adev->vmhub[AMDGPU_MMHUB0(0)]; int i; uint32_t tmp; @@ -412,7 +412,7 @@ static void mmhub_v2_0_setup_vmid_config(struct amdgpu_device *adev) static void mmhub_v2_0_program_invalidation(struct amdgpu_device *adev) { - struct amdgpu_vmhub *hub = &adev->vmhub[AMDGPU_MMHUB_0]; + struct amdgpu_vmhub *hub = &adev->vmhub[AMDGPU_MMHUB0(0)]; unsigned i; for (i = 0; i < 18; ++i) { @@ -441,7 +441,7 @@ static int mmhub_v2_0_gart_enable(struct amdgpu_device *adev) static void mmhub_v2_0_gart_disable(struct amdgpu_device *adev) { - struct amdgpu_vmhub *hub = &adev->vmhub[AMDGPU_MMHUB_0]; + struct amdgpu_vmhub *hub = &adev->vmhub[AMDGPU_MMHUB0(0)]; u32 tmp; u32 i; @@ -520,7 +520,7 @@ static const struct amdgpu_vmhub_funcs mmhub_v2_0_vmhub_funcs = { static void mmhub_v2_0_init(struct amdgpu_device *adev) { - struct amdgpu_vmhub *hub = &adev->vmhub[AMDGPU_MMHUB_0]; + struct amdgpu_vmhub *hub = &adev->vmhub[AMDGPU_MMHUB0(0)]; hub->ctx0_ptb_addr_lo32 = SOC15_REG_OFFSET(MMHUB, 0, diff --git a/drivers/gpu/drm/amd/amdgpu/mmhub_v2_3.c b/drivers/gpu/drm/amd/amdgpu/mmhub_v2_3.c index fcf2813e70db..8bd0fc8d9d25 100644 --- a/drivers/gpu/drm/amd/amdgpu/mmhub_v2_3.c +++ b/drivers/gpu/drm/amd/amdgpu/mmhub_v2_3.c @@ -121,7 +121,7 @@ static void mmhub_v2_3_setup_vm_pt_regs(struct amdgpu_device *adev, uint32_t vmid, uint64_t page_table_base) { - struct amdgpu_vmhub *hub = &adev->vmhub[AMDGPU_MMHUB_0]; + struct amdgpu_vmhub *hub = &adev->vmhub[AMDGPU_MMHUB0(0)]; WREG32_SOC15_OFFSET(MMHUB, 0, mmMMVM_CONTEXT0_PAGE_TABLE_BASE_ADDR_LO32, hub->ctx_addr_distance * vmid, lower_32_bits(page_table_base)); @@ -280,7 +280,7 @@ static void mmhub_v2_3_disable_identity_aperture(struct amdgpu_device *adev) static void mmhub_v2_3_setup_vmid_config(struct amdgpu_device *adev) { - struct amdgpu_vmhub *hub = &adev->vmhub[AMDGPU_MMHUB_0]; + struct amdgpu_vmhub *hub = &adev->vmhub[AMDGPU_MMHUB0(0)]; int i; uint32_t tmp; @@ -330,7 +330,7 @@ static void mmhub_v2_3_setup_vmid_config(struct amdgpu_device *adev) static void mmhub_v2_3_program_invalidation(struct amdgpu_device *adev) { - struct amdgpu_vmhub *hub = &adev->vmhub[AMDGPU_MMHUB_0]; + struct amdgpu_vmhub *hub = &adev->vmhub[AMDGPU_MMHUB0(0)]; unsigned i; for (i = 0; i < 18; ++i) { @@ -373,7 +373,7 @@ static int mmhub_v2_3_gart_enable(struct amdgpu_device *adev) static void mmhub_v2_3_gart_disable(struct amdgpu_device *adev) { - struct amdgpu_vmhub *hub = &adev->vmhub[AMDGPU_MMHUB_0]; + struct amdgpu_vmhub *hub = &adev->vmhub[AMDGPU_MMHUB0(0)]; u32 tmp; u32 i; @@ -446,7 +446,7 @@ static const struct amdgpu_vmhub_funcs mmhub_v2_3_vmhub_funcs = { static void mmhub_v2_3_init(struct amdgpu_device *adev) { - struct amdgpu_vmhub *hub = &adev->vmhub[AMDGPU_MMHUB_0]; + struct amdgpu_vmhub *hub = &adev->vmhub[AMDGPU_MMHUB0(0)]; hub->ctx0_ptb_addr_lo32 = SOC15_REG_OFFSET(MMHUB, 0, diff --git a/drivers/gpu/drm/amd/amdgpu/mmhub_v3_0.c b/drivers/gpu/drm/amd/amdgpu/mmhub_v3_0.c index 17a792616979..441379e91cfa 100644 --- a/drivers/gpu/drm/amd/amdgpu/mmhub_v3_0.c +++ b/drivers/gpu/drm/amd/amdgpu/mmhub_v3_0.c @@ -136,7 +136,7 @@ mmhub_v3_0_print_l2_protection_fault_status(struct amdgpu_device *adev, static void mmhub_v3_0_setup_vm_pt_regs(struct amdgpu_device *adev, uint32_t vmid, uint64_t page_table_base) { - struct amdgpu_vmhub *hub = &adev->vmhub[AMDGPU_MMHUB_0]; + struct amdgpu_vmhub *hub = &adev->vmhub[AMDGPU_MMHUB0(0)]; WREG32_SOC15_OFFSET(MMHUB, 0, regMMVM_CONTEXT0_PAGE_TABLE_BASE_ADDR_LO32, hub->ctx_addr_distance * vmid, @@ -319,7 +319,7 @@ static void mmhub_v3_0_disable_identity_aperture(struct amdgpu_device *adev) static void mmhub_v3_0_setup_vmid_config(struct amdgpu_device *adev) { - struct amdgpu_vmhub *hub = &adev->vmhub[AMDGPU_MMHUB_0]; + struct amdgpu_vmhub *hub = &adev->vmhub[AMDGPU_MMHUB0(0)]; int i; uint32_t tmp; @@ -369,7 +369,7 @@ static void mmhub_v3_0_setup_vmid_config(struct amdgpu_device *adev) static void mmhub_v3_0_program_invalidation(struct amdgpu_device *adev) { - struct amdgpu_vmhub *hub = &adev->vmhub[AMDGPU_MMHUB_0]; + struct amdgpu_vmhub *hub = &adev->vmhub[AMDGPU_MMHUB0(0)]; unsigned i; for (i = 0; i < 18; ++i) { @@ -398,7 +398,7 @@ static int mmhub_v3_0_gart_enable(struct amdgpu_device *adev) static void mmhub_v3_0_gart_disable(struct amdgpu_device *adev) { - struct amdgpu_vmhub *hub = &adev->vmhub[AMDGPU_MMHUB_0]; + struct amdgpu_vmhub *hub = &adev->vmhub[AMDGPU_MMHUB0(0)]; u32 tmp; u32 i; @@ -477,7 +477,7 @@ static const struct amdgpu_vmhub_funcs mmhub_v3_0_vmhub_funcs = { static void mmhub_v3_0_init(struct amdgpu_device *adev) { - struct amdgpu_vmhub *hub = &adev->vmhub[AMDGPU_MMHUB_0]; + struct amdgpu_vmhub *hub = &adev->vmhub[AMDGPU_MMHUB0(0)]; hub->ctx0_ptb_addr_lo32 = SOC15_REG_OFFSET(MMHUB, 0, diff --git a/drivers/gpu/drm/amd/amdgpu/mmhub_v3_0_1.c b/drivers/gpu/drm/amd/amdgpu/mmhub_v3_0_1.c index 26509b6b8c24..12c7f4b46ea9 100644 --- a/drivers/gpu/drm/amd/amdgpu/mmhub_v3_0_1.c +++ b/drivers/gpu/drm/amd/amdgpu/mmhub_v3_0_1.c @@ -138,7 +138,7 @@ static void mmhub_v3_0_1_setup_vm_pt_regs(struct amdgpu_device *adev, uint32_t vmid, uint64_t page_table_base) { - struct amdgpu_vmhub *hub = &adev->vmhub[AMDGPU_MMHUB_0]; + struct amdgpu_vmhub *hub = &adev->vmhub[AMDGPU_MMHUB0(0)]; WREG32_SOC15_OFFSET(MMHUB, 0, regMMVM_CONTEXT0_PAGE_TABLE_BASE_ADDR_LO32, hub->ctx_addr_distance * vmid, @@ -306,7 +306,7 @@ static void mmhub_v3_0_1_disable_identity_aperture(struct amdgpu_device *adev) static void mmhub_v3_0_1_setup_vmid_config(struct amdgpu_device *adev) { - struct amdgpu_vmhub *hub = &adev->vmhub[AMDGPU_MMHUB_0]; + struct amdgpu_vmhub *hub = &adev->vmhub[AMDGPU_MMHUB0(0)]; int i; uint32_t tmp; @@ -356,7 +356,7 @@ static void mmhub_v3_0_1_setup_vmid_config(struct amdgpu_device *adev) static void mmhub_v3_0_1_program_invalidation(struct amdgpu_device *adev) { - struct amdgpu_vmhub *hub = &adev->vmhub[AMDGPU_MMHUB_0]; + struct amdgpu_vmhub *hub = &adev->vmhub[AMDGPU_MMHUB0(0)]; unsigned i; for (i = 0; i < 18; ++i) { @@ -385,7 +385,7 @@ static int mmhub_v3_0_1_gart_enable(struct amdgpu_device *adev) static void mmhub_v3_0_1_gart_disable(struct amdgpu_device *adev) { - struct amdgpu_vmhub *hub = &adev->vmhub[AMDGPU_MMHUB_0]; + struct amdgpu_vmhub *hub = &adev->vmhub[AMDGPU_MMHUB0(0)]; u32 tmp; u32 i; @@ -459,7 +459,7 @@ static const struct amdgpu_vmhub_funcs mmhub_v3_0_1_vmhub_funcs = { static void mmhub_v3_0_1_init(struct amdgpu_device *adev) { - struct amdgpu_vmhub *hub = &adev->vmhub[AMDGPU_MMHUB_0]; + struct amdgpu_vmhub *hub = &adev->vmhub[AMDGPU_MMHUB0(0)]; hub->ctx0_ptb_addr_lo32 = SOC15_REG_OFFSET(MMHUB, 0, diff --git a/drivers/gpu/drm/amd/amdgpu/mmhub_v3_0_2.c b/drivers/gpu/drm/amd/amdgpu/mmhub_v3_0_2.c index 26abbc6a47ab..5dadc85abf7e 100644 --- a/drivers/gpu/drm/amd/amdgpu/mmhub_v3_0_2.c +++ b/drivers/gpu/drm/amd/amdgpu/mmhub_v3_0_2.c @@ -129,7 +129,7 @@ mmhub_v3_0_2_print_l2_protection_fault_status(struct amdgpu_device *adev, static void mmhub_v3_0_2_setup_vm_pt_regs(struct amdgpu_device *adev, uint32_t vmid, uint64_t page_table_base) { - struct amdgpu_vmhub *hub = &adev->vmhub[AMDGPU_MMHUB_0]; + struct amdgpu_vmhub *hub = &adev->vmhub[AMDGPU_MMHUB0(0)]; WREG32_SOC15_OFFSET(MMHUB, 0, regMMVM_CONTEXT0_PAGE_TABLE_BASE_ADDR_LO32, hub->ctx_addr_distance * vmid, @@ -311,7 +311,7 @@ static void mmhub_v3_0_2_disable_identity_aperture(struct amdgpu_device *adev) static void mmhub_v3_0_2_setup_vmid_config(struct amdgpu_device *adev) { - struct amdgpu_vmhub *hub = &adev->vmhub[AMDGPU_MMHUB_0]; + struct amdgpu_vmhub *hub = &adev->vmhub[AMDGPU_MMHUB0(0)]; int i; uint32_t tmp; @@ -361,7 +361,7 @@ static void mmhub_v3_0_2_setup_vmid_config(struct amdgpu_device *adev) static void mmhub_v3_0_2_program_invalidation(struct amdgpu_device *adev) { - struct amdgpu_vmhub *hub = &adev->vmhub[AMDGPU_MMHUB_0]; + struct amdgpu_vmhub *hub = &adev->vmhub[AMDGPU_MMHUB0(0)]; unsigned i; for (i = 0; i < 18; ++i) { @@ -390,7 +390,7 @@ static int mmhub_v3_0_2_gart_enable(struct amdgpu_device *adev) static void mmhub_v3_0_2_gart_disable(struct amdgpu_device *adev) { - struct amdgpu_vmhub *hub = &adev->vmhub[AMDGPU_MMHUB_0]; + struct amdgpu_vmhub *hub = &adev->vmhub[AMDGPU_MMHUB0(0)]; u32 tmp; u32 i; @@ -469,7 +469,7 @@ static const struct amdgpu_vmhub_funcs mmhub_v3_0_2_vmhub_funcs = { static void mmhub_v3_0_2_init(struct amdgpu_device *adev) { - struct amdgpu_vmhub *hub = &adev->vmhub[AMDGPU_MMHUB_0]; + struct amdgpu_vmhub *hub = &adev->vmhub[AMDGPU_MMHUB0(0)]; hub->ctx0_ptb_addr_lo32 = SOC15_REG_OFFSET(MMHUB, 0, diff --git a/drivers/gpu/drm/amd/amdgpu/mmhub_v9_4.c b/drivers/gpu/drm/amd/amdgpu/mmhub_v9_4.c index 72083e96222f..e790f890aec6 100644 --- a/drivers/gpu/drm/amd/amdgpu/mmhub_v9_4.c +++ b/drivers/gpu/drm/amd/amdgpu/mmhub_v9_4.c @@ -57,7 +57,7 @@ static u64 mmhub_v9_4_get_fb_location(struct amdgpu_device *adev) static void mmhub_v9_4_setup_hubid_vm_pt_regs(struct amdgpu_device *adev, int hubid, uint32_t vmid, uint64_t value) { - struct amdgpu_vmhub *hub = &adev->vmhub[AMDGPU_MMHUB_0]; + struct amdgpu_vmhub *hub = &adev->vmhub[AMDGPU_MMHUB0(0)]; WREG32_SOC15_OFFSET(MMHUB, 0, mmVML2VC0_VM_CONTEXT0_PAGE_TABLE_BASE_ADDR_LO32, @@ -294,7 +294,7 @@ static void mmhub_v9_4_disable_identity_aperture(struct amdgpu_device *adev, static void mmhub_v9_4_setup_vmid_config(struct amdgpu_device *adev, int hubid) { - struct amdgpu_vmhub *hub = &adev->vmhub[AMDGPU_MMHUB_0]; + struct amdgpu_vmhub *hub = &adev->vmhub[AMDGPU_MMHUB0(0)]; unsigned int num_level, block_size; uint32_t tmp; int i; @@ -363,7 +363,7 @@ static void mmhub_v9_4_setup_vmid_config(struct amdgpu_device *adev, int hubid) static void mmhub_v9_4_program_invalidation(struct amdgpu_device *adev, int hubid) { - struct amdgpu_vmhub *hub = &adev->vmhub[AMDGPU_MMHUB_0]; + struct amdgpu_vmhub *hub = &adev->vmhub[AMDGPU_MMHUB0(0)]; unsigned i; for (i = 0; i < 18; ++i) { @@ -404,7 +404,7 @@ static int mmhub_v9_4_gart_enable(struct amdgpu_device *adev) static void mmhub_v9_4_gart_disable(struct amdgpu_device *adev) { - struct amdgpu_vmhub *hub = &adev->vmhub[AMDGPU_MMHUB_0]; + struct amdgpu_vmhub *hub = &adev->vmhub[AMDGPU_MMHUB0(0)]; u32 tmp; u32 i, j; @@ -507,8 +507,8 @@ static void mmhub_v9_4_set_fault_enable_default(struct amdgpu_device *adev, bool static void mmhub_v9_4_init(struct amdgpu_device *adev) { - struct amdgpu_vmhub *hub[MMHUB_NUM_INSTANCES] = - {&adev->vmhub[AMDGPU_MMHUB_0], &adev->vmhub[AMDGPU_MMHUB_1]}; + struct amdgpu_vmhub *hub[MMHUB_NUM_INSTANCES] = { + &adev->vmhub[AMDGPU_MMHUB0(0)], &adev->vmhub[AMDGPU_MMHUB1(0)]}; int i; for (i = 0; i < MMHUB_NUM_INSTANCES; i++) { diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c b/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c index 9295ac7edd56..50b6eb9bcfda 100644 --- a/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c +++ b/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c @@ -1825,12 +1825,12 @@ static int sdma_v4_0_sw_init(void *handle) /* * On Arcturus, SDMA instance 5~7 has a different vmhub - * type(AMDGPU_MMHUB_1). + * type(AMDGPU_MMHUB1). */ if (adev->ip_versions[SDMA0_HWIP][0] == IP_VERSION(4, 2, 2) && i >= 5) - ring->vm_hub = AMDGPU_MMHUB_1; + ring->vm_hub = AMDGPU_MMHUB1(0); else - ring->vm_hub = AMDGPU_MMHUB_0; + ring->vm_hub = AMDGPU_MMHUB0(0); sprintf(ring->name, "sdma%d", i); r = amdgpu_ring_init(adev, ring, 1024, &adev->sdma.trap_irq, @@ -1851,9 +1851,9 @@ static int sdma_v4_0_sw_init(void *handle) ring->doorbell_index += 0x400; if (adev->ip_versions[SDMA0_HWIP][0] == IP_VERSION(4, 2, 2) && i >= 5) - ring->vm_hub = AMDGPU_MMHUB_1; + ring->vm_hub = AMDGPU_MMHUB1(0); else - ring->vm_hub = AMDGPU_MMHUB_0; + ring->vm_hub = AMDGPU_MMHUB0(0); sprintf(ring->name, "page%d", i); r = amdgpu_ring_init(adev, ring, 1024, diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v4_4_2.c b/drivers/gpu/drm/amd/amdgpu/sdma_v4_4_2.c index 64dcaa2670dd..7efe7c43fffb 100644 --- a/drivers/gpu/drm/amd/amdgpu/sdma_v4_4_2.c +++ b/drivers/gpu/drm/amd/amdgpu/sdma_v4_4_2.c @@ -1309,7 +1309,7 @@ static int sdma_v4_4_2_sw_init(void *handle) /* doorbell size is 2 dwords, get DWORD offset */ ring->doorbell_index = adev->doorbell_index.sdma_engine[i] << 1; - ring->vm_hub = AMDGPU_MMHUB_0; + ring->vm_hub = AMDGPU_MMHUB0(0); sprintf(ring->name, "sdma%d", i); r = amdgpu_ring_init(adev, ring, 1024, &adev->sdma.trap_irq, @@ -1328,7 +1328,7 @@ static int sdma_v4_4_2_sw_init(void *handle) */ ring->doorbell_index = adev->doorbell_index.sdma_engine[i] << 1; ring->doorbell_index += 0x400; - ring->vm_hub = AMDGPU_MMHUB_0; + ring->vm_hub = AMDGPU_MMHUB0(0); sprintf(ring->name, "page%d", i); r = amdgpu_ring_init(adev, ring, 1024, diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v5_0.c b/drivers/gpu/drm/amd/amdgpu/sdma_v5_0.c index 92e1299be021..a0077cf41295 100644 --- a/drivers/gpu/drm/amd/amdgpu/sdma_v5_0.c +++ b/drivers/gpu/drm/amd/amdgpu/sdma_v5_0.c @@ -1389,7 +1389,7 @@ static int sdma_v5_0_sw_init(void *handle) (adev->doorbell_index.sdma_engine[0] << 1) //get DWORD offset : (adev->doorbell_index.sdma_engine[1] << 1); // get DWORD offset - ring->vm_hub = AMDGPU_GFXHUB_0; + ring->vm_hub = AMDGPU_GFXHUB(0); sprintf(ring->name, "sdma%d", i); r = amdgpu_ring_init(adev, ring, 1024, &adev->sdma.trap_irq, (i == 0) ? AMDGPU_SDMA_IRQ_INSTANCE0 : diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v5_2.c b/drivers/gpu/drm/amd/amdgpu/sdma_v5_2.c index ca7e8757d78e..efa2c84ee78e 100644 --- a/drivers/gpu/drm/amd/amdgpu/sdma_v5_2.c +++ b/drivers/gpu/drm/amd/amdgpu/sdma_v5_2.c @@ -1253,7 +1253,7 @@ static int sdma_v5_2_sw_init(void *handle) ring->doorbell_index = (adev->doorbell_index.sdma_engine[i] << 1); //get DWORD offset - ring->vm_hub = AMDGPU_GFXHUB_0; + ring->vm_hub = AMDGPU_GFXHUB(0); sprintf(ring->name, "sdma%d", i); r = amdgpu_ring_init(adev, ring, 1024, &adev->sdma.trap_irq, AMDGPU_SDMA_IRQ_INSTANCE0 + i, diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v6_0.c b/drivers/gpu/drm/amd/amdgpu/sdma_v6_0.c index 3d9a80511a45..79d09792d2ce 100644 --- a/drivers/gpu/drm/amd/amdgpu/sdma_v6_0.c +++ b/drivers/gpu/drm/amd/amdgpu/sdma_v6_0.c @@ -1298,7 +1298,7 @@ static int sdma_v6_0_sw_init(void *handle) ring->doorbell_index = (adev->doorbell_index.sdma_engine[i] << 1); // get DWORD offset - ring->vm_hub = AMDGPU_GFXHUB_0; + ring->vm_hub = AMDGPU_GFXHUB(0); sprintf(ring->name, "sdma%d", i); r = amdgpu_ring_init(adev, ring, 1024, &adev->sdma.trap_irq, diff --git a/drivers/gpu/drm/amd/amdgpu/uvd_v7_0.c b/drivers/gpu/drm/amd/amdgpu/uvd_v7_0.c index e32b656b3dab..abaa4463e906 100644 --- a/drivers/gpu/drm/amd/amdgpu/uvd_v7_0.c +++ b/drivers/gpu/drm/amd/amdgpu/uvd_v7_0.c @@ -444,7 +444,7 @@ static int uvd_v7_0_sw_init(void *handle) continue; if (!amdgpu_sriov_vf(adev)) { ring = &adev->uvd.inst[j].ring; - ring->vm_hub = AMDGPU_MMHUB_0; + ring->vm_hub = AMDGPU_MMHUB0(0); sprintf(ring->name, "uvd_%d", ring->me); r = amdgpu_ring_init(adev, ring, 512, &adev->uvd.inst[j].irq, 0, @@ -455,7 +455,7 @@ static int uvd_v7_0_sw_init(void *handle) for (i = 0; i < adev->uvd.num_enc_rings; ++i) { ring = &adev->uvd.inst[j].ring_enc[i]; - ring->vm_hub = AMDGPU_MMHUB_0; + ring->vm_hub = AMDGPU_MMHUB0(0); sprintf(ring->name, "uvd_enc_%d.%d", ring->me, i); if (amdgpu_sriov_vf(adev)) { ring->use_doorbell = true; diff --git a/drivers/gpu/drm/amd/amdgpu/vce_v4_0.c b/drivers/gpu/drm/amd/amdgpu/vce_v4_0.c index 57b85bb6a1e4..e0b70cd3b697 100644 --- a/drivers/gpu/drm/amd/amdgpu/vce_v4_0.c +++ b/drivers/gpu/drm/amd/amdgpu/vce_v4_0.c @@ -466,7 +466,7 @@ static int vce_v4_0_sw_init(void *handle) enum amdgpu_ring_priority_level hw_prio = amdgpu_vce_get_ring_prio(i); ring = &adev->vce.ring[i]; - ring->vm_hub = AMDGPU_MMHUB_0; + ring->vm_hub = AMDGPU_MMHUB0(0); sprintf(ring->name, "vce%d", i); if (amdgpu_sriov_vf(adev)) { /* DOORBELL only works under SRIOV */ diff --git a/drivers/gpu/drm/amd/amdgpu/vcn_v1_0.c b/drivers/gpu/drm/amd/amdgpu/vcn_v1_0.c index 761c28fa6ec1..f877c39c7cdd 100644 --- a/drivers/gpu/drm/amd/amdgpu/vcn_v1_0.c +++ b/drivers/gpu/drm/amd/amdgpu/vcn_v1_0.c @@ -120,7 +120,7 @@ static int vcn_v1_0_sw_init(void *handle) return r; ring = &adev->vcn.inst->ring_dec; - ring->vm_hub = AMDGPU_MMHUB_0; + ring->vm_hub = AMDGPU_MMHUB0(0); sprintf(ring->name, "vcn_dec"); r = amdgpu_ring_init(adev, ring, 512, &adev->vcn.inst->irq, 0, AMDGPU_RING_PRIO_DEFAULT, NULL); @@ -142,7 +142,7 @@ static int vcn_v1_0_sw_init(void *handle) enum amdgpu_ring_priority_level hw_prio = amdgpu_vcn_get_enc_ring_prio(i); ring = &adev->vcn.inst->ring_enc[i]; - ring->vm_hub = AMDGPU_MMHUB_0; + ring->vm_hub = AMDGPU_MMHUB0(0); sprintf(ring->name, "vcn_enc%d", i); r = amdgpu_ring_init(adev, ring, 512, &adev->vcn.inst->irq, 0, hw_prio, NULL); diff --git a/drivers/gpu/drm/amd/amdgpu/vcn_v2_0.c b/drivers/gpu/drm/amd/amdgpu/vcn_v2_0.c index 7c2b3aa48083..c975aed2f6c7 100644 --- a/drivers/gpu/drm/amd/amdgpu/vcn_v2_0.c +++ b/drivers/gpu/drm/amd/amdgpu/vcn_v2_0.c @@ -129,7 +129,7 @@ static int vcn_v2_0_sw_init(void *handle) ring->use_doorbell = true; ring->doorbell_index = adev->doorbell_index.vcn.vcn_ring0_1 << 1; - ring->vm_hub = AMDGPU_MMHUB_0; + ring->vm_hub = AMDGPU_MMHUB0(0); sprintf(ring->name, "vcn_dec"); r = amdgpu_ring_init(adev, ring, 512, &adev->vcn.inst->irq, 0, @@ -160,7 +160,7 @@ static int vcn_v2_0_sw_init(void *handle) ring = &adev->vcn.inst->ring_enc[i]; ring->use_doorbell = true; - ring->vm_hub = AMDGPU_MMHUB_0; + ring->vm_hub = AMDGPU_MMHUB0(0); if (!amdgpu_sriov_vf(adev)) ring->doorbell_index = (adev->doorbell_index.vcn.vcn_ring0_1 << 1) + 2 + i; else diff --git a/drivers/gpu/drm/amd/amdgpu/vcn_v2_5.c b/drivers/gpu/drm/amd/amdgpu/vcn_v2_5.c index ab0b45d0ead1..7044bd7c9f62 100644 --- a/drivers/gpu/drm/amd/amdgpu/vcn_v2_5.c +++ b/drivers/gpu/drm/amd/amdgpu/vcn_v2_5.c @@ -188,9 +188,9 @@ static int vcn_v2_5_sw_init(void *handle) (amdgpu_sriov_vf(adev) ? 2*j : 8*j); if (adev->ip_versions[UVD_HWIP][0] == IP_VERSION(2, 5, 0)) - ring->vm_hub = AMDGPU_MMHUB_1; + ring->vm_hub = AMDGPU_MMHUB1(0); else - ring->vm_hub = AMDGPU_MMHUB_0; + ring->vm_hub = AMDGPU_MMHUB0(0); sprintf(ring->name, "vcn_dec_%d", j); r = amdgpu_ring_init(adev, ring, 512, &adev->vcn.inst[j].irq, @@ -208,9 +208,9 @@ static int vcn_v2_5_sw_init(void *handle) (amdgpu_sriov_vf(adev) ? (1 + i + 2*j) : (2 + i + 8*j)); if (adev->ip_versions[UVD_HWIP][0] == IP_VERSION(2, 5, 0)) - ring->vm_hub = AMDGPU_MMHUB_1; + ring->vm_hub = AMDGPU_MMHUB1(0); else - ring->vm_hub = AMDGPU_MMHUB_0; + ring->vm_hub = AMDGPU_MMHUB0(0); sprintf(ring->name, "vcn_enc_%d.%d", j, i); r = amdgpu_ring_init(adev, ring, 512, diff --git a/drivers/gpu/drm/amd/amdgpu/vcn_v3_0.c b/drivers/gpu/drm/amd/amdgpu/vcn_v3_0.c index 3eab186261aa..70fefbf26c48 100644 --- a/drivers/gpu/drm/amd/amdgpu/vcn_v3_0.c +++ b/drivers/gpu/drm/amd/amdgpu/vcn_v3_0.c @@ -189,7 +189,7 @@ static int vcn_v3_0_sw_init(void *handle) } else { ring->doorbell_index = (adev->doorbell_index.vcn.vcn_ring0_1 << 1) + 8 * i; } - ring->vm_hub = AMDGPU_MMHUB_0; + ring->vm_hub = AMDGPU_MMHUB0(0); sprintf(ring->name, "vcn_dec_%d", i); r = amdgpu_ring_init(adev, ring, 512, &adev->vcn.inst[i].irq, 0, AMDGPU_RING_PRIO_DEFAULT, @@ -213,7 +213,7 @@ static int vcn_v3_0_sw_init(void *handle) } else { ring->doorbell_index = (adev->doorbell_index.vcn.vcn_ring0_1 << 1) + 2 + j + 8 * i; } - ring->vm_hub = AMDGPU_MMHUB_0; + ring->vm_hub = AMDGPU_MMHUB0(0); sprintf(ring->name, "vcn_enc_%d.%d", i, j); r = amdgpu_ring_init(adev, ring, 512, &adev->vcn.inst[i].irq, 0, hw_prio, &adev->vcn.inst[i].sched_score); diff --git a/drivers/gpu/drm/amd/amdgpu/vcn_v4_0.c b/drivers/gpu/drm/amd/amdgpu/vcn_v4_0.c index bf0674039598..81446e6996df 100644 --- a/drivers/gpu/drm/amd/amdgpu/vcn_v4_0.c +++ b/drivers/gpu/drm/amd/amdgpu/vcn_v4_0.c @@ -149,7 +149,7 @@ static int vcn_v4_0_sw_init(void *handle) ring->doorbell_index = (adev->doorbell_index.vcn.vcn_ring0_1 << 1) + i * (adev->vcn.num_enc_rings + 1) + 1; else ring->doorbell_index = (adev->doorbell_index.vcn.vcn_ring0_1 << 1) + 2 + 8 * i; - ring->vm_hub = AMDGPU_MMHUB_0; + ring->vm_hub = AMDGPU_MMHUB0(0); sprintf(ring->name, "vcn_unified_%d", i); r = amdgpu_ring_init(adev, ring, 512, &adev->vcn.inst[i].irq, 0, -- cgit From d9426c3d9b4e91dda4f1f1684f9296762fafe0de Mon Sep 17 00:00:00 2001 From: Le Ma Date: Mon, 20 Dec 2021 16:06:25 +0800 Subject: drm/amdgpu: add bitmask to iterate vmhubs MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit As the layout of VMHUB definition has been changed to cover multiple XCD/AID case, the original num_vmhubs is not appropriate to do vmhub iteration any more. Drop num_vmhubs and introduce vmhubs_mask instead. v2: switch to the new VMHUB layout v3: use DECLARE_BITMAP to define vmhubs_mask Signed-off-by: Le Ma Reviewed-by: Christian König Reviewed-by: Hawking Zhang Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu.h | 2 +- drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c | 2 +- drivers/gpu/drm/amd/amdgpu/amdgpu_gart.c | 4 ++-- drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c | 5 +++-- drivers/gpu/drm/amd/amdgpu/gmc_v11_0.c | 5 +++-- drivers/gpu/drm/amd/amdgpu/gmc_v6_0.c | 2 +- drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c | 2 +- drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c | 2 +- drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c | 21 ++++++++++++--------- 9 files changed, 25 insertions(+), 20 deletions(-) (limited to 'drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c') diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h index 957b18bda4a7..0f163d266812 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h @@ -829,7 +829,7 @@ struct amdgpu_device { dma_addr_t dummy_page_addr; struct amdgpu_vm_manager vm_manager; struct amdgpu_vmhub vmhub[AMDGPU_MAX_VMHUBS]; - unsigned num_vmhubs; + DECLARE_BITMAP(vmhubs_mask, AMDGPU_MAX_VMHUBS); /* memory management */ struct amdgpu_mman mman; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c index f0a136d35279..5afbcc390d89 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c @@ -733,7 +733,7 @@ int amdgpu_amdkfd_flush_gpu_tlb_vmid(struct amdgpu_device *adev, if (adev->family == AMDGPU_FAMILY_AI) { int i; - for (i = 0; i < adev->num_vmhubs; i++) + for_each_set_bit(i, adev->vmhubs_mask, AMDGPU_MAX_VMHUBS) amdgpu_gmc_flush_gpu_tlb(adev, vmid, i, 0); } else { amdgpu_gmc_flush_gpu_tlb(adev, vmid, AMDGPU_GFXHUB(0), 0); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gart.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_gart.c index 01cb89ffbd56..6b12f4a75fc3 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gart.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gart.c @@ -182,7 +182,7 @@ void amdgpu_gart_unbind(struct amdgpu_device *adev, uint64_t offset, } mb(); amdgpu_device_flush_hdp(adev, NULL); - for (i = 0; i < adev->num_vmhubs; i++) + for_each_set_bit(i, adev->vmhubs_mask, AMDGPU_MAX_VMHUBS) amdgpu_gmc_flush_gpu_tlb(adev, 0, i, 0); drm_dev_exit(idx); @@ -264,7 +264,7 @@ void amdgpu_gart_invalidate_tlb(struct amdgpu_device *adev) mb(); amdgpu_device_flush_hdp(adev, NULL); - for (i = 0; i < adev->num_vmhubs; i++) + for_each_set_bit(i, adev->vmhubs_mask, AMDGPU_MAX_VMHUBS) amdgpu_gmc_flush_gpu_tlb(adev, 0, i, 0); } diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c index ea2a448147e3..ff96f11c2adf 100644 --- a/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c @@ -460,7 +460,7 @@ static int gmc_v10_0_flush_gpu_tlb_pasid(struct amdgpu_device *adev, &queried_pasid); if (ret && queried_pasid == pasid) { if (all_hub) { - for (i = 0; i < adev->num_vmhubs; i++) + for_each_set_bit(i, adev->vmhubs_mask, AMDGPU_MAX_VMHUBS) gmc_v10_0_flush_gpu_tlb(adev, vmid, i, flush_type); } else { @@ -928,7 +928,8 @@ static int gmc_v10_0_sw_init(void *handle) case IP_VERSION(10, 3, 6): case IP_VERSION(10, 3, 3): case IP_VERSION(10, 3, 7): - adev->num_vmhubs = 2; + set_bit(AMDGPU_GFXHUB(0), adev->vmhubs_mask); + set_bit(AMDGPU_MMHUB0(0), adev->vmhubs_mask); /* * To fulfill 4-level page support, * vm size is 256TB (48bit), maximum size of Navi10/Navi14/Navi12, diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v11_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v11_0.c index fb2ac31cbba7..3453f1c0e066 100644 --- a/drivers/gpu/drm/amd/amdgpu/gmc_v11_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v11_0.c @@ -364,7 +364,7 @@ static int gmc_v11_0_flush_gpu_tlb_pasid(struct amdgpu_device *adev, &queried_pasid); if (ret && queried_pasid == pasid) { if (all_hub) { - for (i = 0; i < adev->num_vmhubs; i++) + for_each_set_bit(i, adev->vmhubs_mask, AMDGPU_MAX_VMHUBS) gmc_v11_0_flush_gpu_tlb(adev, vmid, i, flush_type); } else { @@ -779,7 +779,8 @@ static int gmc_v11_0_sw_init(void *handle) case IP_VERSION(11, 0, 2): case IP_VERSION(11, 0, 3): case IP_VERSION(11, 0, 4): - adev->num_vmhubs = 2; + set_bit(AMDGPU_GFXHUB(0), adev->vmhubs_mask); + set_bit(AMDGPU_MMHUB0(0), adev->vmhubs_mask); /* * To fulfill 4-level page support, * vm size is 256TB (48bit), maximum size, diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v6_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v6_0.c index b7dad4e67813..aa754c95a0b3 100644 --- a/drivers/gpu/drm/amd/amdgpu/gmc_v6_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v6_0.c @@ -808,7 +808,7 @@ static int gmc_v6_0_sw_init(void *handle) int r; struct amdgpu_device *adev = (struct amdgpu_device *)handle; - adev->num_vmhubs = 1; + set_bit(AMDGPU_GFXHUB(0), adev->vmhubs_mask); if (adev->flags & AMD_IS_APU) { adev->gmc.vram_type = AMDGPU_VRAM_TYPE_UNKNOWN; diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c index 402960b0174e..81609a2b226f 100644 --- a/drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c @@ -977,7 +977,7 @@ static int gmc_v7_0_sw_init(void *handle) int r; struct amdgpu_device *adev = (struct amdgpu_device *)handle; - adev->num_vmhubs = 1; + set_bit(AMDGPU_GFXHUB(0), adev->vmhubs_mask); if (adev->flags & AMD_IS_APU) { adev->gmc.vram_type = AMDGPU_VRAM_TYPE_UNKNOWN; diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c index 504c1b34dab7..d48e33738a88 100644 --- a/drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c @@ -1093,7 +1093,7 @@ static int gmc_v8_0_sw_init(void *handle) int r; struct amdgpu_device *adev = (struct amdgpu_device *)handle; - adev->num_vmhubs = 1; + set_bit(AMDGPU_GFXHUB(0), adev->vmhubs_mask); if (adev->flags & AMD_IS_APU) { adev->gmc.vram_type = AMDGPU_VRAM_TYPE_UNKNOWN; diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c index 193ba4d912a6..d4bfb5f8308a 100644 --- a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c @@ -481,7 +481,7 @@ static int gmc_v9_0_vm_fault_interrupt_state(struct amdgpu_device *adev, switch (state) { case AMDGPU_IRQ_STATE_DISABLE: - for (j = 0; j < adev->num_vmhubs; j++) { + for_each_set_bit(j, adev->vmhubs_mask, AMDGPU_MAX_VMHUBS) { hub = &adev->vmhub[j]; for (i = 0; i < 16; i++) { reg = hub->vm_context0_cntl + i; @@ -509,7 +509,7 @@ static int gmc_v9_0_vm_fault_interrupt_state(struct amdgpu_device *adev, } break; case AMDGPU_IRQ_STATE_ENABLE: - for (j = 0; j < adev->num_vmhubs; j++) { + for_each_set_bit(j, adev->vmhubs_mask, AMDGPU_MAX_VMHUBS) { hub = &adev->vmhub[j]; for (i = 0; i < 16; i++) { reg = hub->vm_context0_cntl + i; @@ -803,7 +803,7 @@ static void gmc_v9_0_flush_gpu_tlb(struct amdgpu_device *adev, uint32_t vmid, u32 j, inv_req, inv_req2, tmp; struct amdgpu_vmhub *hub; - BUG_ON(vmhub >= adev->num_vmhubs); + BUG_ON(vmhub >= AMDGPU_MAX_VMHUBS); hub = &adev->vmhub[vmhub]; if (adev->gmc.xgmi.num_physical_nodes && @@ -987,7 +987,7 @@ static int gmc_v9_0_flush_gpu_tlb_pasid(struct amdgpu_device *adev, &queried_pasid); if (ret && queried_pasid == pasid) { if (all_hub) { - for (i = 0; i < adev->num_vmhubs; i++) + for_each_set_bit(i, adev->vmhubs_mask, AMDGPU_MAX_VMHUBS) gmc_v9_0_flush_gpu_tlb(adev, vmid, i, flush_type); } else { @@ -1684,7 +1684,8 @@ static int gmc_v9_0_sw_init(void *handle) switch (adev->ip_versions[GC_HWIP][0]) { case IP_VERSION(9, 1, 0): case IP_VERSION(9, 2, 2): - adev->num_vmhubs = 2; + set_bit(AMDGPU_GFXHUB(0), adev->vmhubs_mask); + set_bit(AMDGPU_MMHUB0(0), adev->vmhubs_mask); if (adev->rev_id == 0x0 || adev->rev_id == 0x1) { amdgpu_vm_adjust_size(adev, 256 * 1024, 9, 3, 48); @@ -1701,8 +1702,8 @@ static int gmc_v9_0_sw_init(void *handle) case IP_VERSION(9, 3, 0): case IP_VERSION(9, 4, 2): case IP_VERSION(9, 4, 3): - adev->num_vmhubs = 2; - + set_bit(AMDGPU_GFXHUB(0), adev->vmhubs_mask); + set_bit(AMDGPU_MMHUB0(0), adev->vmhubs_mask); /* * To fulfill 4-level page support, @@ -1718,7 +1719,9 @@ static int gmc_v9_0_sw_init(void *handle) adev->gmc.translate_further = adev->vm_manager.num_level > 1; break; case IP_VERSION(9, 4, 1): - adev->num_vmhubs = 3; + set_bit(AMDGPU_GFXHUB(0), adev->vmhubs_mask); + set_bit(AMDGPU_MMHUB0(0), adev->vmhubs_mask); + set_bit(AMDGPU_MMHUB1(0), adev->vmhubs_mask); /* Keep the vm size same with Vega20 */ amdgpu_vm_adjust_size(adev, 256 * 1024, 9, 3, 48); @@ -1944,7 +1947,7 @@ static int gmc_v9_0_hw_init(void *handle) adev->gfxhub.funcs->set_fault_enable_default(adev, value); adev->mmhub.funcs->set_fault_enable_default(adev, value); } - for (i = 0; i < adev->num_vmhubs; ++i) { + for_each_set_bit(i, adev->vmhubs_mask, AMDGPU_MAX_VMHUBS) { if (adev->in_s0ix && (i == AMDGPU_GFXHUB(0))) continue; gmc_v9_0_flush_gpu_tlb(adev, 0, i, 0); -- cgit From ce8a12a532ed62d7037be91c5714243fdfa9f672 Mon Sep 17 00:00:00 2001 From: Le Ma Date: Mon, 20 Dec 2021 16:42:20 +0800 Subject: drm/amdgpu: init vmhubs bitmask for GC 9.4.3 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Each XCD owns one GFXHUB. v2: switch to the new VMHUB layout Signed-off-by: Le Ma Acked-by: Christian König Reviewed-by: Hawking Zhang Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) (limited to 'drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c') diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c index d4bfb5f8308a..6da85365e5aa 100644 --- a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c @@ -1701,7 +1701,6 @@ static int gmc_v9_0_sw_init(void *handle) case IP_VERSION(9, 4, 0): case IP_VERSION(9, 3, 0): case IP_VERSION(9, 4, 2): - case IP_VERSION(9, 4, 3): set_bit(AMDGPU_GFXHUB(0), adev->vmhubs_mask); set_bit(AMDGPU_MMHUB0(0), adev->vmhubs_mask); @@ -1727,6 +1726,12 @@ static int gmc_v9_0_sw_init(void *handle) amdgpu_vm_adjust_size(adev, 256 * 1024, 9, 3, 48); adev->gmc.translate_further = adev->vm_manager.num_level > 1; break; + case IP_VERSION(9, 4, 3): + bitmap_set(adev->vmhubs_mask, AMDGPU_GFXHUB(0), adev->gfx.num_xcd); + bitmap_set(adev->vmhubs_mask, AMDGPU_MMHUB0(0), 1); + + amdgpu_vm_adjust_size(adev, 256 * 1024, 9, 3, 48); + break; default: break; } -- cgit From f87f686482c6d2d4465245356854710b01f312c1 Mon Sep 17 00:00:00 2001 From: Mukul Joshi Date: Mon, 9 May 2022 22:22:20 -0400 Subject: drm/amdgpu: Add XCC inst to PASID TLB flushing Add XCC instance to select the correct KIQ ring when flushing TLBs on a multi-XCC setup. Signed-off-by: Mukul Joshi Tested-by: Amber Lin Reviewed-by: Felix Kuehling Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c | 6 ++++-- drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h | 3 ++- drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.h | 7 ++++--- drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c | 2 +- drivers/gpu/drm/amd/amdgpu/gmc_v11_0.c | 2 +- drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c | 2 +- drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c | 2 +- drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c | 12 ++++++------ drivers/gpu/drm/amd/amdkfd/kfd_process.c | 7 +++++-- 9 files changed, 25 insertions(+), 18 deletions(-) (limited to 'drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c') diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c index 5afbcc390d89..9d19c7ceda3f 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c @@ -743,7 +743,9 @@ int amdgpu_amdkfd_flush_gpu_tlb_vmid(struct amdgpu_device *adev, } int amdgpu_amdkfd_flush_gpu_tlb_pasid(struct amdgpu_device *adev, - uint16_t pasid, enum TLB_FLUSH_TYPE flush_type) + uint16_t pasid, + enum TLB_FLUSH_TYPE flush_type, + uint32_t inst) { bool all_hub = false; @@ -751,7 +753,7 @@ int amdgpu_amdkfd_flush_gpu_tlb_pasid(struct amdgpu_device *adev, adev->family == AMDGPU_FAMILY_RV) all_hub = true; - return amdgpu_gmc_flush_gpu_tlb_pasid(adev, pasid, flush_type, all_hub); + return amdgpu_gmc_flush_gpu_tlb_pasid(adev, pasid, flush_type, all_hub, inst); } bool amdgpu_amdkfd_have_atomics_support(struct amdgpu_device *adev) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h index 01ba3589b60a..df07e212c21e 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h @@ -160,7 +160,8 @@ bool amdgpu_amdkfd_have_atomics_support(struct amdgpu_device *adev); int amdgpu_amdkfd_flush_gpu_tlb_vmid(struct amdgpu_device *adev, uint16_t vmid); int amdgpu_amdkfd_flush_gpu_tlb_pasid(struct amdgpu_device *adev, - uint16_t pasid, enum TLB_FLUSH_TYPE flush_type); + uint16_t pasid, enum TLB_FLUSH_TYPE flush_type, + uint32_t inst); bool amdgpu_amdkfd_is_kfd_vmid(struct amdgpu_device *adev, u32 vmid); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.h index 6d105d7fb98b..572ef5be539f 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.h @@ -119,7 +119,8 @@ struct amdgpu_gmc_funcs { uint32_t vmhub, uint32_t flush_type); /* flush the vm tlb via pasid */ int (*flush_gpu_tlb_pasid)(struct amdgpu_device *adev, uint16_t pasid, - uint32_t flush_type, bool all_hub); + uint32_t flush_type, bool all_hub, + uint32_t inst); /* flush the vm tlb via ring */ uint64_t (*emit_flush_gpu_tlb)(struct amdgpu_ring *ring, unsigned vmid, uint64_t pd_addr); @@ -296,9 +297,9 @@ struct amdgpu_gmc { }; #define amdgpu_gmc_flush_gpu_tlb(adev, vmid, vmhub, type) ((adev)->gmc.gmc_funcs->flush_gpu_tlb((adev), (vmid), (vmhub), (type))) -#define amdgpu_gmc_flush_gpu_tlb_pasid(adev, pasid, type, allhub) \ +#define amdgpu_gmc_flush_gpu_tlb_pasid(adev, pasid, type, allhub, inst) \ ((adev)->gmc.gmc_funcs->flush_gpu_tlb_pasid \ - ((adev), (pasid), (type), (allhub))) + ((adev), (pasid), (type), (allhub), (inst))) #define amdgpu_gmc_emit_flush_gpu_tlb(r, vmid, addr) (r)->adev->gmc.gmc_funcs->emit_flush_gpu_tlb((r), (vmid), (addr)) #define amdgpu_gmc_emit_pasid_mapping(r, vmid, pasid) (r)->adev->gmc.gmc_funcs->emit_pasid_mapping((r), (vmid), (pasid)) #define amdgpu_gmc_map_mtype(adev, flags) (adev)->gmc.gmc_funcs->map_mtype((adev),(flags)) diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c index ff96f11c2adf..d76f5c8d4977 100644 --- a/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c @@ -419,7 +419,7 @@ error_alloc: */ static int gmc_v10_0_flush_gpu_tlb_pasid(struct amdgpu_device *adev, uint16_t pasid, uint32_t flush_type, - bool all_hub) + bool all_hub, uint32_t inst) { int vmid, i; signed long r; diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v11_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v11_0.c index 3453f1c0e066..4bf807d825c0 100644 --- a/drivers/gpu/drm/amd/amdgpu/gmc_v11_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v11_0.c @@ -324,7 +324,7 @@ static void gmc_v11_0_flush_gpu_tlb(struct amdgpu_device *adev, uint32_t vmid, */ static int gmc_v11_0_flush_gpu_tlb_pasid(struct amdgpu_device *adev, uint16_t pasid, uint32_t flush_type, - bool all_hub) + bool all_hub, uint32_t inst) { int vmid, i; signed long r; diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c index 81609a2b226f..6f53049619cd 100644 --- a/drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c @@ -424,7 +424,7 @@ static int gmc_v7_0_mc_init(struct amdgpu_device *adev) */ static int gmc_v7_0_flush_gpu_tlb_pasid(struct amdgpu_device *adev, uint16_t pasid, uint32_t flush_type, - bool all_hub) + bool all_hub, uint32_t inst) { int vmid; unsigned int tmp; diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c index d48e33738a88..48475077ca92 100644 --- a/drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c @@ -622,7 +622,7 @@ static int gmc_v8_0_mc_init(struct amdgpu_device *adev) */ static int gmc_v8_0_flush_gpu_tlb_pasid(struct amdgpu_device *adev, uint16_t pasid, uint32_t flush_type, - bool all_hub) + bool all_hub, uint32_t inst) { int vmid; unsigned int tmp; diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c index 6da85365e5aa..0163a761ccf0 100644 --- a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c @@ -924,7 +924,7 @@ static void gmc_v9_0_flush_gpu_tlb(struct amdgpu_device *adev, uint32_t vmid, */ static int gmc_v9_0_flush_gpu_tlb_pasid(struct amdgpu_device *adev, uint16_t pasid, uint32_t flush_type, - bool all_hub) + bool all_hub, uint32_t inst) { int vmid, i; signed long r; @@ -932,8 +932,8 @@ static int gmc_v9_0_flush_gpu_tlb_pasid(struct amdgpu_device *adev, uint16_t queried_pasid; bool ret; u32 usec_timeout = amdgpu_sriov_vf(adev) ? SRIOV_USEC_TIMEOUT : adev->usec_timeout; - struct amdgpu_ring *ring = &adev->gfx.kiq[0].ring; - struct amdgpu_kiq *kiq = &adev->gfx.kiq[0]; + struct amdgpu_ring *ring = &adev->gfx.kiq[inst].ring; + struct amdgpu_kiq *kiq = &adev->gfx.kiq[inst]; if (amdgpu_in_reset(adev)) return -EIO; @@ -953,7 +953,7 @@ static int gmc_v9_0_flush_gpu_tlb_pasid(struct amdgpu_device *adev, if (vega20_xgmi_wa) ndw += kiq->pmf->invalidate_tlbs_size; - spin_lock(&adev->gfx.kiq[0].ring_lock); + spin_lock(&adev->gfx.kiq[inst].ring_lock); /* 2 dwords flush + 8 dwords fence */ amdgpu_ring_alloc(ring, ndw); if (vega20_xgmi_wa) @@ -964,13 +964,13 @@ static int gmc_v9_0_flush_gpu_tlb_pasid(struct amdgpu_device *adev, r = amdgpu_fence_emit_polling(ring, &seq, MAX_KIQ_REG_WAIT); if (r) { amdgpu_ring_undo(ring); - spin_unlock(&adev->gfx.kiq[0].ring_lock); + spin_unlock(&adev->gfx.kiq[inst].ring_lock); up_read(&adev->reset_domain->sem); return -ETIME; } amdgpu_ring_commit(ring); - spin_unlock(&adev->gfx.kiq[0].ring_lock); + spin_unlock(&adev->gfx.kiq[inst].ring_lock); r = amdgpu_fence_wait_polling(ring, seq, usec_timeout); if (r < 1) { dev_err(adev->dev, "wait for kiq fence error: %ld.\n", r); diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_process.c b/drivers/gpu/drm/amd/amdkfd/kfd_process.c index 888590dfa646..9b1e84d33cdc 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_process.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_process.c @@ -2052,6 +2052,7 @@ void kfd_flush_tlb(struct kfd_process_device *pdd, enum TLB_FLUSH_TYPE type) struct amdgpu_vm *vm = drm_priv_to_vm(pdd->drm_priv); uint64_t tlb_seq = amdgpu_vm_tlb_seq(vm); struct kfd_node *dev = pdd->dev; + int xcc = 0; /* * It can be that we race and lose here, but that is extremely unlikely @@ -2069,8 +2070,10 @@ void kfd_flush_tlb(struct kfd_process_device *pdd, enum TLB_FLUSH_TYPE type) amdgpu_amdkfd_flush_gpu_tlb_vmid(dev->adev, pdd->qpd.vmid); } else { - amdgpu_amdkfd_flush_gpu_tlb_pasid(dev->adev, - pdd->process->pasid, type); + for (xcc = 0; xcc < dev->num_xcc_per_node; xcc++) + amdgpu_amdkfd_flush_gpu_tlb_pasid(dev->adev, + pdd->process->pasid, type, + dev->start_xcc_id + xcc); } } -- cgit From 21e1217b4c0e0234704d50ea303c7603266604ac Mon Sep 17 00:00:00 2001 From: Mukul Joshi Date: Mon, 9 May 2022 22:30:57 -0400 Subject: drm/amdgpu: Fix VM fault reporting on XCC1 Fix VM fault reporting and clear VM fault register for XCC1. Signed-off-by: Mukul Joshi Reviewed-by: Felix Kuehling Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) (limited to 'drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c') diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c index 0163a761ccf0..681bc9d354fe 100644 --- a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c @@ -557,6 +557,7 @@ static int gmc_v9_0_process_interrupt(struct amdgpu_device *adev, u64 addr; uint32_t cam_index = 0; int ret; + uint32_t node_id; addr = (u64)entry->src_data[0] << 12; addr |= ((u64)entry->src_data[1] & 0xf) << 44; @@ -611,7 +612,9 @@ static int gmc_v9_0_process_interrupt(struct amdgpu_device *adev, hub = &adev->vmhub[AMDGPU_MMHUB1(0)]; } else { hub_name = "gfxhub0"; - hub = &adev->vmhub[AMDGPU_GFXHUB(0)]; + node_id = (adev->ip_versions[GC_HWIP][0] == + IP_VERSION(9, 4, 3)) ? entry->node_id : 0; + hub = &adev->vmhub[node_id/2]; } memset(&task_info, 0, sizeof(struct amdgpu_task_info)); @@ -645,11 +648,10 @@ static int gmc_v9_0_process_interrupt(struct amdgpu_device *adev, rw = REG_GET_FIELD(status, VM_L2_PROTECTION_FAULT_STATUS, RW); WREG32_P(hub->vm_l2_pro_fault_cntl, 1, ~1); - dev_err(adev->dev, "VM_L2_PROTECTION_FAULT_STATUS:0x%08X\n", status); - if (hub == &adev->vmhub[AMDGPU_GFXHUB(0)]) { + if (entry->vmid_src == AMDGPU_GFXHUB(0)) { dev_err(adev->dev, "\t Faulty UTCL2 client ID: %s (0x%x)\n", cid >= ARRAY_SIZE(gfxhub_client_ids) ? "unknown" : gfxhub_client_ids[cid], -- cgit From 5de6bd6a13f1c717279c870eb8290e466c8f6a80 Mon Sep 17 00:00:00 2001 From: Le Ma Date: Fri, 25 Feb 2022 15:47:20 +0800 Subject: drm/amdgpu: set mmhub bitmask for multiple AIDs Like GFXHUB, set MMHUB0 bitmask for each AID. Signed-off-by: Le Ma Acked-by: Felix Kuehling Reviewed-by: Lijo Lazar Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c') diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c index 681bc9d354fe..59be0c0293c4 100644 --- a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c @@ -1730,7 +1730,7 @@ static int gmc_v9_0_sw_init(void *handle) break; case IP_VERSION(9, 4, 3): bitmap_set(adev->vmhubs_mask, AMDGPU_GFXHUB(0), adev->gfx.num_xcd); - bitmap_set(adev->vmhubs_mask, AMDGPU_MMHUB0(0), 1); + bitmap_set(adev->vmhubs_mask, AMDGPU_MMHUB0(0), adev->num_aid); amdgpu_vm_adjust_size(adev, 256 * 1024, 9, 3, 48); break; -- cgit From 9eb7681f760c77adece36bc62953245c9f44a3be Mon Sep 17 00:00:00 2001 From: Shiwu Zhang Date: Mon, 21 Feb 2022 15:38:39 +0800 Subject: drm/amdgpu: add the support of XGMI link for GC 9.4.3 Add the xgmi LFB_CNTL/LBF_SIZE reg addresses to fetch the xgmi info from. v2: move get_xgmi_info() to GC_V9_4_3 sepecific source files to utilize the register definitions specific for GC_V9_4_3 v3: remove the duplicated register definitions v4: enable xgmi based on asic_type as XGMI_IP ver is not available yet for IP discovery Signed-off-by: Shiwu Zhang Reviewed-by: Le Ma Ack-by: Lijo Lazar Reviewed-by: Hawking Zhang Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/gfxhub_v1_2.c | 41 +++++++++++++++++++++++++++++++- drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c | 10 +++++--- 2 files changed, 47 insertions(+), 4 deletions(-) (limited to 'drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c') diff --git a/drivers/gpu/drm/amd/amdgpu/gfxhub_v1_2.c b/drivers/gpu/drm/amd/amdgpu/gfxhub_v1_2.c index d3424ce97aa8..1bb17d95f720 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfxhub_v1_2.c +++ b/drivers/gpu/drm/amd/amdgpu/gfxhub_v1_2.c @@ -528,6 +528,45 @@ static void gfxhub_v1_2_init(struct amdgpu_device *adev) } } +static int gfxhub_v1_2_get_xgmi_info(struct amdgpu_device *adev) +{ + u32 max_num_physical_nodes; + u32 max_physical_node_id; + u32 xgmi_lfb_cntl; + u32 max_region; + u64 seg_size; + + xgmi_lfb_cntl = RREG32_SOC15(GC, 0, regMC_VM_XGMI_LFB_CNTL); + seg_size = REG_GET_FIELD( + RREG32_SOC15(GC, 0, regMC_VM_XGMI_LFB_SIZE), + MC_VM_XGMI_LFB_SIZE, PF_LFB_SIZE) << 24; + max_region = + REG_GET_FIELD(xgmi_lfb_cntl, MC_VM_XGMI_LFB_CNTL, PF_MAX_REGION); + + + + max_num_physical_nodes = 8; + max_physical_node_id = 7; + + /* PF_MAX_REGION=0 means xgmi is disabled */ + if (max_region || adev->gmc.xgmi.connected_to_cpu) { + adev->gmc.xgmi.num_physical_nodes = max_region + 1; + + if (adev->gmc.xgmi.num_physical_nodes > max_num_physical_nodes) + return -EINVAL; + + adev->gmc.xgmi.physical_node_id = + REG_GET_FIELD(xgmi_lfb_cntl, MC_VM_XGMI_LFB_CNTL, + PF_LFB_REGION); + + if (adev->gmc.xgmi.physical_node_id > max_physical_node_id) + return -EINVAL; + + adev->gmc.xgmi.node_segment_size = seg_size; + } + + return 0; +} const struct amdgpu_gfxhub_funcs gfxhub_v1_2_funcs = { .get_mc_fb_offset = gfxhub_v1_2_get_mc_fb_offset, @@ -536,5 +575,5 @@ const struct amdgpu_gfxhub_funcs gfxhub_v1_2_funcs = { .gart_disable = gfxhub_v1_2_gart_disable, .set_fault_enable_default = gfxhub_v1_2_set_fault_enable_default, .init = gfxhub_v1_2_init, - .get_xgmi_info = gfxhub_v1_1_get_xgmi_info, + .get_xgmi_info = gfxhub_v1_2_get_xgmi_info, }; diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c index 59be0c0293c4..4b2c4ecd7253 100644 --- a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c @@ -1419,9 +1419,13 @@ static int gmc_v9_0_early_init(void *handle) { struct amdgpu_device *adev = (struct amdgpu_device *)handle; - /* ARCT and VEGA20 don't have XGMI defined in their IP discovery tables */ - if (adev->asic_type == CHIP_VEGA20 || - adev->asic_type == CHIP_ARCTURUS) + /* + * 9.4.0, 9.4.1 and 9.4.3 don't have XGMI defined + * in their IP discovery tables + */ + if (adev->ip_versions[GC_HWIP][0] == IP_VERSION(9, 4, 0) || + adev->ip_versions[GC_HWIP][0] == IP_VERSION(9, 4, 1) || + adev->ip_versions[GC_HWIP][0] == IP_VERSION(9, 4, 3)) adev->gmc.xgmi.supported = true; if (adev->ip_versions[XGMI_HWIP][0] == IP_VERSION(6, 1, 0)) { -- cgit From 8078f1c610fdcdd8003e2c538fb04af41fa5c269 Mon Sep 17 00:00:00 2001 From: Lijo Lazar Date: Wed, 29 Jun 2022 11:41:53 +0530 Subject: drm/amdgpu: Change num_xcd to xcc_mask Instead of number of XCCs, keep a mask of XCCs for the exact XCCs available on the ASIC. XCC configuration could differ based on different ASIC configs. v2: Rename num_xcd to num_xcc (Hawking) Use smaller xcc_mask size, changed to u16 (Le) Signed-off-by: Lijo Lazar Reviewed-by: Hawking Zhang Reviewed-by: Le Ma Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c | 21 ++--- drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h | 4 +- drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c | 2 +- drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c | 133 ++++++++++++++++++------------- drivers/gpu/drm/amd/amdgpu/gfxhub_v1_2.c | 67 ++++++++++------ drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c | 3 +- drivers/gpu/drm/amd/amdkfd/kfd_device.c | 10 +-- 7 files changed, 141 insertions(+), 99 deletions(-) (limited to 'drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c') diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c index 76438f197de1..069b259f384c 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c @@ -209,12 +209,12 @@ void amdgpu_gfx_compute_queue_acquire(struct amdgpu_device *adev) int max_queues_per_mec = min(adev->gfx.mec.num_pipe_per_mec * adev->gfx.mec.num_queue_per_pipe, adev->gfx.num_compute_rings); - int num_xcd = (adev->gfx.num_xcd > 1) ? adev->gfx.num_xcd : 1; + int num_xcc = adev->gfx.xcc_mask ? NUM_XCC(adev->gfx.xcc_mask) : 1; if (multipipe_policy) { /* policy: make queues evenly cross all pipes on MEC1 only * for multiple xcc, just use the original policy for simplicity */ - for (j = 0; j < num_xcd; j++) { + for (j = 0; j < num_xcc; j++) { for (i = 0; i < max_queues_per_mec; i++) { pipe = i % adev->gfx.mec.num_pipe_per_mec; queue = (i / adev->gfx.mec.num_pipe_per_mec) % @@ -226,13 +226,13 @@ void amdgpu_gfx_compute_queue_acquire(struct amdgpu_device *adev) } } else { /* policy: amdgpu owns all queues in the given pipe */ - for (j = 0; j < num_xcd; j++) { + for (j = 0; j < num_xcc; j++) { for (i = 0; i < max_queues_per_mec; ++i) set_bit(i, adev->gfx.mec_bitmap[j].queue_bitmap); } } - for (j = 0; j < num_xcd; j++) { + for (j = 0; j < num_xcc; j++) { dev_dbg(adev->dev, "mec queue bitmap weight=%d\n", bitmap_weight(adev->gfx.mec_bitmap[j].queue_bitmap, AMDGPU_MAX_COMPUTE_QUEUES)); } @@ -1207,23 +1207,24 @@ static ssize_t amdgpu_gfx_set_compute_partition(struct device *dev, struct drm_device *ddev = dev_get_drvdata(dev); struct amdgpu_device *adev = drm_to_adev(ddev); enum amdgpu_gfx_partition mode; - int ret; + int ret = 0, num_xcc; - if (adev->gfx.num_xcd % 2 != 0) + num_xcc = NUM_XCC(adev->gfx.xcc_mask); + if (num_xcc % 2 != 0) return -EINVAL; if (!strncasecmp("SPX", buf, strlen("SPX"))) { mode = AMDGPU_SPX_PARTITION_MODE; } else if (!strncasecmp("DPX", buf, strlen("DPX"))) { - if (adev->gfx.num_xcd != 4 || adev->gfx.num_xcd != 8) + if (num_xcc != 4 || num_xcc != 8) return -EINVAL; mode = AMDGPU_DPX_PARTITION_MODE; } else if (!strncasecmp("TPX", buf, strlen("TPX"))) { - if (adev->gfx.num_xcd != 6) + if (num_xcc != 6) return -EINVAL; mode = AMDGPU_TPX_PARTITION_MODE; } else if (!strncasecmp("QPX", buf, strlen("QPX"))) { - if (adev->gfx.num_xcd != 8) + if (num_xcc != 8) return -EINVAL; mode = AMDGPU_QPX_PARTITION_MODE; } else if (!strncasecmp("CPX", buf, strlen("CPX"))) { @@ -1253,7 +1254,7 @@ static ssize_t amdgpu_gfx_get_available_compute_partition(struct device *dev, char *supported_partition; /* TBD */ - switch (adev->gfx.num_xcd) { + switch (NUM_XCC(adev->gfx.xcc_mask)) { case 8: supported_partition = "SPX, DPX, QPX, CPX"; break; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h index 8df36527aee9..93f9875154db 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h @@ -64,6 +64,8 @@ enum amdgpu_gfx_partition { AMDGPU_UNKNOWN_COMPUTE_PARTITION_MODE, }; +#define NUM_XCC(x) hweight16(x) + struct amdgpu_mec { struct amdgpu_bo *hpd_eop_obj; u64 hpd_eop_gpu_addr; @@ -396,7 +398,7 @@ struct amdgpu_gfx { bool cp_gfx_shadow; /* for gfx11 */ enum amdgpu_gfx_partition partition_mode; - uint32_t num_xcd; + uint16_t xcc_mask; uint32_t num_xcc_per_xcp; struct mutex partition_mutex; }; diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c index 91814dc083c9..da69177dc76f 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c @@ -4536,7 +4536,7 @@ static int gfx_v9_0_early_init(void *handle) adev->gfx.num_gfx_rings = 0; else adev->gfx.num_gfx_rings = GFX9_NUM_GFX_RINGS; - adev->gfx.num_xcd = 1; + adev->gfx.xcc_mask = 1; adev->gfx.num_compute_rings = min(amdgpu_gfx_get_num_kcq(adev), AMDGPU_MAX_COMPUTE_RINGS); gfx_v9_0_set_kiq_pm4_funcs(adev); diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c b/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c index 52185b1d5d31..c776fc5884de 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c @@ -177,16 +177,19 @@ static const struct kiq_pm4_funcs gfx_v9_4_3_kiq_pm4_funcs = { static void gfx_v9_4_3_set_kiq_pm4_funcs(struct amdgpu_device *adev) { - int i; - for (i = 0; i < adev->gfx.num_xcd; i++) + int i, num_xcc; + + num_xcc = NUM_XCC(adev->gfx.xcc_mask); + for (i = 0; i < num_xcc; i++) adev->gfx.kiq[i].pmf = &gfx_v9_4_3_kiq_pm4_funcs; } static void gfx_v9_4_3_init_golden_registers(struct amdgpu_device *adev) { - int i; + int i, num_xcc; - for (i = 2; i < adev->gfx.num_xcd; i++) + num_xcc = NUM_XCC(adev->gfx.xcc_mask); + for (i = 2; i < num_xcc; i++) WREG32_SOC15(GC, i, regGRBM_MCM_ADDR, 0x4); } @@ -499,7 +502,7 @@ static void gfx_v9_4_3_mec_fini(struct amdgpu_device *adev) static int gfx_v9_4_3_mec_init(struct amdgpu_device *adev) { - int r, i; + int r, i, num_xcc; u32 *hpd; const __le32 *fw_data; unsigned fw_size; @@ -508,7 +511,8 @@ static int gfx_v9_4_3_mec_init(struct amdgpu_device *adev) const struct gfx_firmware_header_v1_0 *mec_hdr; - for (i = 0; i < adev->gfx.num_xcd; i++) + num_xcc = NUM_XCC(adev->gfx.xcc_mask); + for (i = 0; i < num_xcc; i++) bitmap_zero(adev->gfx.mec_bitmap[i].queue_bitmap, AMDGPU_MAX_COMPUTE_QUEUES); @@ -683,23 +687,24 @@ static int gfx_v9_4_3_switch_compute_partition(struct amdgpu_device *adev, enum amdgpu_gfx_partition mode) { u32 tmp = 0; - int num_xcc_per_partition, i; + int num_xcc_per_partition, i, num_xcc; if (mode == adev->gfx.partition_mode) return mode; + num_xcc = NUM_XCC(adev->gfx.xcc_mask); switch (mode) { case AMDGPU_SPX_PARTITION_MODE: - num_xcc_per_partition = adev->gfx.num_xcd; + num_xcc_per_partition = num_xcc; break; case AMDGPU_DPX_PARTITION_MODE: - num_xcc_per_partition = adev->gfx.num_xcd / 2; + num_xcc_per_partition = num_xcc / 2; break; case AMDGPU_TPX_PARTITION_MODE: - num_xcc_per_partition = adev->gfx.num_xcd / 3; + num_xcc_per_partition = num_xcc / 3; break; case AMDGPU_QPX_PARTITION_MODE: - num_xcc_per_partition = adev->gfx.num_xcd / 4; + num_xcc_per_partition = num_xcc / 4; break; case AMDGPU_CPX_PARTITION_MODE: num_xcc_per_partition = 1; @@ -712,7 +717,7 @@ static int gfx_v9_4_3_switch_compute_partition(struct amdgpu_device *adev, * Stop user queues and threads, and make sure GPU is empty of work. */ - for (i = 0; i < adev->gfx.num_xcd; i++) { + for (i = 0; i < num_xcc; i++) { tmp = REG_SET_FIELD(tmp, CP_HYP_XCP_CTL, NUM_XCC_IN_XCP, num_xcc_per_partition); tmp = REG_SET_FIELD(tmp, CP_HYP_XCP_CTL, VIRTUAL_XCC_ID, @@ -836,7 +841,7 @@ static int gfx_v9_4_3_compute_ring_init(struct amdgpu_device *adev, int ring_id, static int gfx_v9_4_3_sw_init(void *handle) { - int i, j, k, r, ring_id, xcc_id; + int i, j, k, r, ring_id, xcc_id, num_xcc; struct amdgpu_kiq *kiq; struct amdgpu_device *adev = (struct amdgpu_device *)handle; @@ -844,6 +849,8 @@ static int gfx_v9_4_3_sw_init(void *handle) adev->gfx.mec.num_pipe_per_mec = 4; adev->gfx.mec.num_queue_per_pipe = 8; + num_xcc = NUM_XCC(adev->gfx.xcc_mask); + /* EOP Event */ r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_GRBM_CP, GFX_9_0__SRCID__CP_EOP_INTERRUPT, &adev->gfx.eop_irq); if (r) @@ -877,8 +884,7 @@ static int gfx_v9_4_3_sw_init(void *handle) /* set up the compute queues - allocate horizontally across pipes */ ring_id = 0; - for (xcc_id = 0; xcc_id < adev->gfx.num_xcd; xcc_id++) { - + for (xcc_id = 0; xcc_id < num_xcc; xcc_id++) { for (i = 0; i < adev->gfx.mec.num_mec; ++i) { for (j = 0; j < adev->gfx.mec.num_queue_per_pipe; j++) { for (k = 0; k < adev->gfx.mec.num_pipe_per_mec; @@ -930,14 +936,14 @@ static int gfx_v9_4_3_sw_init(void *handle) static int gfx_v9_4_3_sw_fini(void *handle) { - int i; + int i, num_xcc; struct amdgpu_device *adev = (struct amdgpu_device *)handle; - for (i = 0; i < adev->gfx.num_compute_rings * - adev->gfx.num_xcd; i++) + num_xcc = NUM_XCC(adev->gfx.xcc_mask); + for (i = 0; i < adev->gfx.num_compute_rings * num_xcc; i++) amdgpu_ring_fini(&adev->gfx.compute_ring[i]); - for (i = 0; i < adev->gfx.num_xcd; i++) { + for (i = 0; i < num_xcc; i++) { amdgpu_gfx_mqd_sw_fini(adev, i); amdgpu_gfx_kiq_free_ring(&adev->gfx.kiq[i].ring); amdgpu_gfx_kiq_fini(adev, i); @@ -1050,9 +1056,10 @@ static void gfx_v9_4_3_init_gds_vmid(struct amdgpu_device *adev, int xcc_id) static void gfx_v9_4_3_constants_init(struct amdgpu_device *adev) { u32 tmp; - int i, j; + int i, j, num_xcc; - for (i = 0; i < adev->gfx.num_xcd; i++) { + num_xcc = NUM_XCC(adev->gfx.xcc_mask); + for (i = 0; i < num_xcc; i++) { WREG32_FIELD15_PREREG(GC, i, GRBM_CNTL, READ_TIMEOUT, 0xff); gfx_v9_4_3_setup_rb(adev, i); } @@ -1064,7 +1071,7 @@ static void gfx_v9_4_3_constants_init(struct amdgpu_device *adev) /* where to put LDS, scratch, GPUVM in FSA64 space */ mutex_lock(&adev->srbm_mutex); for (i = 0; i < adev->vm_manager.id_mgr[AMDGPU_GFXHUB(0)].num_ids; i++) { - for (j = 0; j < adev->gfx.num_xcd; j++) { + for (j = 0; j < num_xcc; j++) { soc15_grbm_select(adev, 0, 0, 0, i, j); /* CP and shaders */ if (i == 0) { @@ -1092,7 +1099,7 @@ static void gfx_v9_4_3_constants_init(struct amdgpu_device *adev) mutex_unlock(&adev->srbm_mutex); - for (i = 0; i < adev->gfx.num_xcd; i++) { + for (i = 0; i < num_xcc; i++) { gfx_v9_4_3_init_compute_vmid(adev, i); gfx_v9_4_3_init_gds_vmid(adev, i); } @@ -1150,8 +1157,10 @@ static void gfx_v9_4_3_disable_gpa_mode(struct amdgpu_device *adev, int xcc_id) static void gfx_v9_4_3_program_xcc_id(struct amdgpu_device *adev, int xcc_id) { uint32_t tmp = 0; + int num_xcc; - switch (adev->gfx.num_xcd) { + num_xcc = NUM_XCC(adev->gfx.xcc_mask); + switch (num_xcc) { /* directly config VIRTUAL_XCC_ID to 0 for 1-XCC */ case 1: WREG32_SOC15(GC, xcc_id, regCP_HYP_XCP_CTL, 0x8); @@ -1288,9 +1297,10 @@ static void gfx_v9_4_3_enable_gui_idle_interrupt(struct amdgpu_device *adev, static void gfx_v9_4_3_rlc_stop(struct amdgpu_device *adev) { - int i; + int i, num_xcc; - for (i = 0; i < adev->gfx.num_xcd; i++) { + num_xcc = NUM_XCC(adev->gfx.xcc_mask); + for (i = 0; i < num_xcc; i++) { WREG32_FIELD15_PREREG(GC, i, RLC_CNTL, RLC_ENABLE_F32, 0); gfx_v9_4_3_enable_gui_idle_interrupt(adev, false, i); gfx_v9_4_3_wait_for_rlc_serdes(adev, i); @@ -1299,9 +1309,10 @@ static void gfx_v9_4_3_rlc_stop(struct amdgpu_device *adev) static void gfx_v9_4_3_rlc_reset(struct amdgpu_device *adev) { - int i; + int i, num_xcc; - for (i = 0; i < adev->gfx.num_xcd; i++) { + num_xcc = NUM_XCC(adev->gfx.xcc_mask); + for (i = 0; i < num_xcc; i++) { WREG32_FIELD15_PREREG(GC, i, GRBM_SOFT_RESET, SOFT_RESET_RLC, 1); udelay(50); WREG32_FIELD15_PREREG(GC, i, GRBM_SOFT_RESET, SOFT_RESET_RLC, 0); @@ -1314,9 +1325,10 @@ static void gfx_v9_4_3_rlc_start(struct amdgpu_device *adev) #ifdef AMDGPU_RLC_DEBUG_RETRY u32 rlc_ucode_ver; #endif - int i; + int i, num_xcc; - for (i = 0; i < adev->gfx.num_xcd; i++) { + num_xcc = NUM_XCC(adev->gfx.xcc_mask); + for (i = 0; i < num_xcc; i++) { WREG32_FIELD15_PREREG(GC, i, RLC_CNTL, RLC_ENABLE_F32, 1); udelay(50); @@ -1377,11 +1389,12 @@ static int gfx_v9_4_3_rlc_load_microcode(struct amdgpu_device *adev, int xcc_id) static int gfx_v9_4_3_rlc_resume(struct amdgpu_device *adev) { - int r, i; + int r, i, num_xcc; adev->gfx.rlc.funcs->stop(adev); - for (i = 0; i < adev->gfx.num_xcd; i++) { + num_xcc = NUM_XCC(adev->gfx.xcc_mask); + for (i = 0; i < num_xcc; i++) { /* disable CG */ WREG32_SOC15(GC, i, regRLC_CGCG_CGLS_CTRL, 0); @@ -1954,10 +1967,11 @@ done: static int gfx_v9_4_3_cp_resume(struct amdgpu_device *adev) { - int r, i, j; + int r, i, j, num_xcc; struct amdgpu_ring *ring; - for (i = 0; i < adev->gfx.num_xcd; i++) { + num_xcc = NUM_XCC(adev->gfx.xcc_mask); + for (i = 0; i < num_xcc; i++) { gfx_v9_4_3_enable_gui_idle_interrupt(adev, false, i); if (adev->firmware.load_type != AMDGPU_FW_LOAD_PSP) { @@ -2021,12 +2035,13 @@ static int gfx_v9_4_3_hw_init(void *handle) static int gfx_v9_4_3_hw_fini(void *handle) { struct amdgpu_device *adev = (struct amdgpu_device *)handle; - int i; + int i, num_xcc; amdgpu_irq_put(adev, &adev->gfx.priv_reg_irq, 0); amdgpu_irq_put(adev, &adev->gfx.priv_inst_irq, 0); - for (i = 0; i < adev->gfx.num_xcd; i++) { + num_xcc = NUM_XCC(adev->gfx.xcc_mask); + for (i = 0; i < num_xcc; i++) { if (amdgpu_gfx_disable_kcq(adev, i)) DRM_ERROR("XCD %d KCQ disable failed\n", i); @@ -2069,9 +2084,10 @@ static int gfx_v9_4_3_resume(void *handle) static bool gfx_v9_4_3_is_idle(void *handle) { struct amdgpu_device *adev = (struct amdgpu_device *)handle; - int i; + int i, num_xcc; - for (i = 0; i < adev->gfx.num_xcd; i++) { + num_xcc = NUM_XCC(adev->gfx.xcc_mask); + for (i = 0; i < num_xcc; i++) { if (REG_GET_FIELD(RREG32_SOC15(GC, i, regGRBM_STATUS), GRBM_STATUS, GUI_ACTIVE)) return false; @@ -2183,30 +2199,30 @@ static void gfx_v9_4_3_ring_emit_gds_switch(struct amdgpu_ring *ring, static int gfx_v9_4_3_early_init(void *handle) { struct amdgpu_device *adev = (struct amdgpu_device *)handle; + int num_xcc; - /* hardcode in emulation phase */ - adev->gfx.num_xcd = 1; + num_xcc = NUM_XCC(adev->gfx.xcc_mask); adev->gfx.partition_mode = amdgpu_user_partt_mode; /* calculate the num_xcc_in_xcp for the partition mode*/ switch (amdgpu_user_partt_mode) { case AMDGPU_SPX_PARTITION_MODE: - adev->gfx.num_xcc_per_xcp = adev->gfx.num_xcd; + adev->gfx.num_xcc_per_xcp = num_xcc; break; case AMDGPU_DPX_PARTITION_MODE: - adev->gfx.num_xcc_per_xcp = adev->gfx.num_xcd / 2; + adev->gfx.num_xcc_per_xcp = num_xcc / 2; break; case AMDGPU_TPX_PARTITION_MODE: - adev->gfx.num_xcc_per_xcp = adev->gfx.num_xcd / 3; + adev->gfx.num_xcc_per_xcp = num_xcc / 3; break; case AMDGPU_QPX_PARTITION_MODE: - adev->gfx.num_xcc_per_xcp = adev->gfx.num_xcd / 4; + adev->gfx.num_xcc_per_xcp = num_xcc / 4; break; case AMDGPU_CPX_PARTITION_MODE: adev->gfx.num_xcc_per_xcp = 1; break; default: - adev->gfx.num_xcc_per_xcp = adev->gfx.num_xcd; + adev->gfx.num_xcc_per_xcp = num_xcc; break; } @@ -2404,14 +2420,15 @@ static int gfx_v9_4_3_set_clockgating_state(void *handle, enum amd_clockgating_state state) { struct amdgpu_device *adev = (struct amdgpu_device *)handle; - int i; + int i, num_xcc; if (amdgpu_sriov_vf(adev)) return 0; + num_xcc = NUM_XCC(adev->gfx.xcc_mask); switch (adev->ip_versions[GC_HWIP][0]) { case IP_VERSION(9, 4, 3): - for (i = 0; i < adev->gfx.num_xcd; i++) + for (i = 0; i < num_xcc; i++) gfx_v9_4_3_update_gfx_clock_gating(adev, state == AMD_CG_STATE_GATE, i); break; @@ -2739,12 +2756,13 @@ static int gfx_v9_4_3_set_priv_reg_fault_state(struct amdgpu_device *adev, unsigned type, enum amdgpu_interrupt_state state) { - int i; + int i, num_xcc; + num_xcc = NUM_XCC(adev->gfx.xcc_mask); switch (state) { case AMDGPU_IRQ_STATE_DISABLE: case AMDGPU_IRQ_STATE_ENABLE: - for (i = 0; i < adev->gfx.num_xcd; i++) + for (i = 0; i < num_xcc; i++) WREG32_FIELD15_PREREG(GC, i, CP_INT_CNTL_RING0, PRIV_REG_INT_ENABLE, state == AMDGPU_IRQ_STATE_ENABLE ? 1 : 0); @@ -2761,12 +2779,13 @@ static int gfx_v9_4_3_set_priv_inst_fault_state(struct amdgpu_device *adev, unsigned type, enum amdgpu_interrupt_state state) { - int i; + int i, num_xcc; + num_xcc = NUM_XCC(adev->gfx.xcc_mask); switch (state) { case AMDGPU_IRQ_STATE_DISABLE: case AMDGPU_IRQ_STATE_ENABLE: - for (i = 0; i < adev->gfx.num_xcd; i++) + for (i = 0; i < num_xcc; i++) WREG32_FIELD15_PREREG(GC, i, CP_INT_CNTL_RING0, PRIV_INSTR_INT_ENABLE, state == AMDGPU_IRQ_STATE_ENABLE ? 1 : 0); @@ -2783,8 +2802,10 @@ static int gfx_v9_4_3_set_eop_interrupt_state(struct amdgpu_device *adev, unsigned type, enum amdgpu_interrupt_state state) { - int i; - for (i = 0; i < adev->gfx.num_xcd; i++) { + int i, num_xcc; + + num_xcc = NUM_XCC(adev->gfx.xcc_mask); + for (i = 0; i < num_xcc; i++) { switch (type) { case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE0_EOP: gfx_v9_4_3_set_compute_eop_interrupt_state(adev, 1, 0, state, i); @@ -2842,6 +2863,7 @@ static int gfx_v9_4_3_eop_irq(struct amdgpu_device *adev, /* Per-queue interrupt is supported for MEC starting from VI. * The interrupt can only be enabled/disabled per pipe instead of per queue. */ + if ((ring->me == me_id) && (ring->pipe == pipe_id) && (ring->queue == queue_id)) amdgpu_fence_process(ring); } @@ -3056,9 +3078,10 @@ static const struct amdgpu_ring_funcs gfx_v9_4_3_ring_funcs_kiq = { static void gfx_v9_4_3_set_ring_funcs(struct amdgpu_device *adev) { - int i, j; + int i, j, num_xcc; - for (i = 0; i < adev->gfx.num_xcd; i++) { + num_xcc = NUM_XCC(adev->gfx.xcc_mask); + for (i = 0; i < num_xcc; i++) { adev->gfx.kiq[i].ring.funcs = &gfx_v9_4_3_ring_funcs_kiq; for (j = 0; j < adev->gfx.num_compute_rings; j++) diff --git a/drivers/gpu/drm/amd/amdgpu/gfxhub_v1_2.c b/drivers/gpu/drm/amd/amdgpu/gfxhub_v1_2.c index 1bb17d95f720..e35365ab3f1f 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfxhub_v1_2.c +++ b/drivers/gpu/drm/amd/amdgpu/gfxhub_v1_2.c @@ -43,9 +43,10 @@ static void gfxhub_v1_2_setup_vm_pt_regs(struct amdgpu_device *adev, uint64_t page_table_base) { struct amdgpu_vmhub *hub; - int i; + int i, num_xcc; - for (i = 0; i < adev->gfx.num_xcd; i++) { + num_xcc = NUM_XCC(adev->gfx.xcc_mask); + for (i = 0; i < num_xcc; i++) { hub = &adev->vmhub[AMDGPU_GFXHUB(i)]; WREG32_SOC15_OFFSET(GC, i, regVM_CONTEXT0_PAGE_TABLE_BASE_ADDR_LO32, @@ -56,13 +57,14 @@ static void gfxhub_v1_2_setup_vm_pt_regs(struct amdgpu_device *adev, regVM_CONTEXT0_PAGE_TABLE_BASE_ADDR_HI32, hub->ctx_addr_distance * vmid, upper_32_bits(page_table_base)); + } } static void gfxhub_v1_2_init_gart_aperture_regs(struct amdgpu_device *adev) { uint64_t pt_base; - int i; + int i, num_xcc; if (adev->gmc.pdb0_bo) pt_base = amdgpu_gmc_pd_addr(adev->gmc.pdb0_bo); @@ -74,7 +76,8 @@ static void gfxhub_v1_2_init_gart_aperture_regs(struct amdgpu_device *adev) /* If use GART for FB translation, vmid0 page table covers both * vram and system memory (gart) */ - for (i = 0; i < adev->gfx.num_xcd; i++) { + num_xcc = NUM_XCC(adev->gfx.xcc_mask); + for (i = 0; i < num_xcc; i++) { if (adev->gmc.pdb0_bo) { WREG32_SOC15(GC, i, regVM_CONTEXT0_PAGE_TABLE_START_ADDR_LO32, @@ -111,9 +114,10 @@ static void gfxhub_v1_2_init_system_aperture_regs(struct amdgpu_device *adev) { uint64_t value; uint32_t tmp; - int i; + int i, num_xcc; - for (i = 0; i < adev->gfx.num_xcd; i++) { + num_xcc = NUM_XCC(adev->gfx.xcc_mask); + for (i = 0; i < num_xcc; i++) { /* Program the AGP BAR */ WREG32_SOC15_RLC(GC, i, regMC_VM_AGP_BASE, 0); WREG32_SOC15_RLC(GC, i, regMC_VM_AGP_BOT, adev->gmc.agp_start >> 24); @@ -177,9 +181,10 @@ static void gfxhub_v1_2_init_system_aperture_regs(struct amdgpu_device *adev) static void gfxhub_v1_2_init_tlb_regs(struct amdgpu_device *adev) { uint32_t tmp; - int i; + int i, num_xcc; - for (i = 0; i < adev->gfx.num_xcd; i++) { + num_xcc = NUM_XCC(adev->gfx.xcc_mask); + for (i = 0; i < num_xcc; i++) { /* Setup TLB control */ tmp = RREG32_SOC15(GC, i, regMC_VM_MX_L1_TLB_CNTL); @@ -202,9 +207,10 @@ static void gfxhub_v1_2_init_tlb_regs(struct amdgpu_device *adev) static void gfxhub_v1_2_init_cache_regs(struct amdgpu_device *adev) { uint32_t tmp; - int i; + int i, num_xcc; - for (i = 0; i < adev->gfx.num_xcd; i++) { + num_xcc = NUM_XCC(adev->gfx.xcc_mask); + for (i = 0; i < num_xcc; i++) { /* Setup L2 cache */ tmp = RREG32_SOC15(GC, i, regVM_L2_CNTL); tmp = REG_SET_FIELD(tmp, VM_L2_CNTL, ENABLE_L2_CACHE, 1); @@ -249,9 +255,10 @@ static void gfxhub_v1_2_init_cache_regs(struct amdgpu_device *adev) static void gfxhub_v1_2_enable_system_domain(struct amdgpu_device *adev) { uint32_t tmp; - int i; + int i, num_xcc; - for (i = 0; i < adev->gfx.num_xcd; i++) { + num_xcc = NUM_XCC(adev->gfx.xcc_mask); + for (i = 0; i < num_xcc; i++) { tmp = RREG32_SOC15(GC, i, regVM_CONTEXT0_CNTL); tmp = REG_SET_FIELD(tmp, VM_CONTEXT0_CNTL, ENABLE_CONTEXT, 1); tmp = REG_SET_FIELD(tmp, VM_CONTEXT0_CNTL, PAGE_TABLE_DEPTH, @@ -266,9 +273,10 @@ static void gfxhub_v1_2_enable_system_domain(struct amdgpu_device *adev) static void gfxhub_v1_2_disable_identity_aperture(struct amdgpu_device *adev) { - int i; + int i, num_xcc; - for (i = 0; i < adev->gfx.num_xcd; i++) { + num_xcc = NUM_XCC(adev->gfx.xcc_mask); + for (i = 0; i < num_xcc; i++) { WREG32_SOC15(GC, i, regVM_L2_CONTEXT1_IDENTITY_APERTURE_LOW_ADDR_LO32, 0XFFFFFFFF); @@ -295,7 +303,7 @@ static void gfxhub_v1_2_setup_vmid_config(struct amdgpu_device *adev) struct amdgpu_vmhub *hub; unsigned num_level, block_size; uint32_t tmp; - int i, j; + int i, j, num_xcc; num_level = adev->vm_manager.num_level; block_size = adev->vm_manager.block_size; @@ -304,7 +312,8 @@ static void gfxhub_v1_2_setup_vmid_config(struct amdgpu_device *adev) else block_size -= 9; - for (j = 0; j < adev->gfx.num_xcd; j++) { + num_xcc = NUM_XCC(adev->gfx.xcc_mask); + for (j = 0; j < num_xcc; j++) { hub = &adev->vmhub[AMDGPU_GFXHUB(j)]; for (i = 0; i <= 14; i++) { tmp = RREG32_SOC15_OFFSET(GC, j, regVM_CONTEXT1_CNTL, i); @@ -362,10 +371,12 @@ static void gfxhub_v1_2_setup_vmid_config(struct amdgpu_device *adev) static void gfxhub_v1_2_program_invalidation(struct amdgpu_device *adev) { struct amdgpu_vmhub *hub; - unsigned i, j; + unsigned i, j, num_xcc; - for (j = 0; j < adev->gfx.num_xcd; j++) { + num_xcc = NUM_XCC(adev->gfx.xcc_mask); + for (j = 0; j < num_xcc; j++) { hub = &adev->vmhub[AMDGPU_GFXHUB(j)]; + for (i = 0 ; i < 18; ++i) { WREG32_SOC15_OFFSET(GC, j, regVM_INVALIDATE_ENG0_ADDR_RANGE_LO32, i * hub->eng_addr_distance, 0xffffffff); @@ -377,9 +388,10 @@ static void gfxhub_v1_2_program_invalidation(struct amdgpu_device *adev) static int gfxhub_v1_2_gart_enable(struct amdgpu_device *adev) { - int i; + int i, num_xcc; - for (i = 0; i < adev->gfx.num_xcd; i++) { + num_xcc = NUM_XCC(adev->gfx.xcc_mask); + for (i = 0; i < num_xcc; i++) { if (amdgpu_sriov_vf(adev)) { /* * MC_VM_FB_LOCATION_BASE/TOP is NULL for VF, becuase they are @@ -413,9 +425,10 @@ static void gfxhub_v1_2_gart_disable(struct amdgpu_device *adev) { struct amdgpu_vmhub *hub; u32 tmp; - u32 i, j; + u32 i, j, num_xcc; - for (j = 0; j < adev->gfx.num_xcd; j++) { + num_xcc = NUM_XCC(adev->gfx.xcc_mask); + for (j = 0; j < num_xcc; j++) { hub = &adev->vmhub[AMDGPU_GFXHUB(j)]; /* Disable all tables */ for (i = 0; i < 16; i++) @@ -449,9 +462,10 @@ static void gfxhub_v1_2_set_fault_enable_default(struct amdgpu_device *adev, bool value) { u32 tmp; - int i; + int i, num_xcc; - for (i = 0; i < adev->gfx.num_xcd; i++) { + num_xcc = NUM_XCC(adev->gfx.xcc_mask); + for (i = 0; i < num_xcc; i++) { tmp = RREG32_SOC15(GC, i, regVM_L2_PROTECTION_FAULT_CNTL); tmp = REG_SET_FIELD(tmp, VM_L2_PROTECTION_FAULT_CNTL, RANGE_PROTECTION_FAULT_ENABLE_DEFAULT, value); @@ -490,9 +504,10 @@ static void gfxhub_v1_2_set_fault_enable_default(struct amdgpu_device *adev, static void gfxhub_v1_2_init(struct amdgpu_device *adev) { struct amdgpu_vmhub *hub; - int i; + int i, num_xcc; - for (i = 0; i < adev->gfx.num_xcd; i++) { + num_xcc = NUM_XCC(adev->gfx.xcc_mask); + for (i = 0; i < num_xcc; i++) { hub = &adev->vmhub[AMDGPU_GFXHUB(i)]; hub->ctx0_ptb_addr_lo32 = diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c index 4b2c4ecd7253..2c322a25bf1c 100644 --- a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c @@ -1733,7 +1733,8 @@ static int gmc_v9_0_sw_init(void *handle) adev->gmc.translate_further = adev->vm_manager.num_level > 1; break; case IP_VERSION(9, 4, 3): - bitmap_set(adev->vmhubs_mask, AMDGPU_GFXHUB(0), adev->gfx.num_xcd); + bitmap_set(adev->vmhubs_mask, AMDGPU_GFXHUB(0), + NUM_XCC(adev->gfx.xcc_mask)); bitmap_set(adev->vmhubs_mask, AMDGPU_MMHUB0(0), adev->num_aid); amdgpu_vm_adjust_size(adev, 256 * 1024, 9, 3, 48); diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device.c b/drivers/gpu/drm/amd/amdkfd/kfd_device.c index 829e32433faf..df96c4c508a0 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_device.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_device.c @@ -592,6 +592,7 @@ bool kgd2kfd_device_init(struct kfd_dev *kfd, struct kfd_node *node; uint32_t first_vmid_kfd, last_vmid_kfd, vmid_num_kfd; unsigned int max_proc_per_quantum; + int num_xcd; kfd->mec_fw_version = amdgpu_amdkfd_get_fw_version(kfd->adev, KGD_ENGINE_MEC1); @@ -601,16 +602,15 @@ bool kgd2kfd_device_init(struct kfd_dev *kfd, KGD_ENGINE_SDMA1); kfd->shared_resources = *gpu_resources; - if (kfd->adev->gfx.num_xcd == 0 || kfd->adev->gfx.num_xcd == 1 || - kfd->adev->gfx.num_xcc_per_xcp == 0) + num_xcd = NUM_XCC(kfd->adev->gfx.xcc_mask); + if (num_xcd == 0 || num_xcd == 1 || kfd->adev->gfx.num_xcc_per_xcp == 0) kfd->num_nodes = 1; else - kfd->num_nodes = - kfd->adev->gfx.num_xcd/kfd->adev->gfx.num_xcc_per_xcp; + kfd->num_nodes = num_xcd / kfd->adev->gfx.num_xcc_per_xcp; if (kfd->num_nodes == 0) { dev_err(kfd_device, "KFD num nodes cannot be 0, GC inst: %d, num_xcc_in_node: %d\n", - kfd->adev->gfx.num_xcd, kfd->adev->gfx.num_xcc_per_xcp); + num_xcd, kfd->adev->gfx.num_xcc_per_xcp); goto out; } -- cgit From 5fb34bd9cf9e248d7e84e431a4a6b731334ab564 Mon Sep 17 00:00:00 2001 From: Alex Sierra Date: Tue, 24 May 2022 10:22:12 -0500 Subject: drm/amdkfd: pass kfd_node ref to svm migration api This work is required for GC 9.4.3, previous to support memory partitions per node at SVM. When multiple partition is configured, every BO should be allocated inside one specific partition which corresponds to the current amdgpu_device and kfd_node. v2: squash in compilation fix (Alex) v3: squash in fix for pre-gfx 9.4.3 (Alex) v4: squash in best_loc fix (Alex) Signed-off-by: Alex Sierra Reviewed-by: Felix Kuehling Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c | 7 +- drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h | 3 +- drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c | 2 +- drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c | 33 ++++--- drivers/gpu/drm/amd/amdkfd/kfd_migrate.c | 45 ++++----- drivers/gpu/drm/amd/amdkfd/kfd_priv.h | 30 +++++- drivers/gpu/drm/amd/amdkfd/kfd_process.c | 6 +- drivers/gpu/drm/amd/amdkfd/kfd_svm.c | 152 +++++++++++++++---------------- drivers/gpu/drm/amd/amdkfd/kfd_svm.h | 21 +++-- 9 files changed, 166 insertions(+), 133 deletions(-) (limited to 'drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c') diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c index c3964c14f215..c390b2856cc9 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c @@ -2441,7 +2441,8 @@ void amdgpu_vm_set_task_info(struct amdgpu_vm *vm) * shouldn't be reported any more. */ bool amdgpu_vm_handle_fault(struct amdgpu_device *adev, u32 pasid, - uint64_t addr, bool write_fault) + u32 client_id, u32 node_id, uint64_t addr, + bool write_fault) { bool is_compute_context = false; struct amdgpu_bo *root; @@ -2465,8 +2466,8 @@ bool amdgpu_vm_handle_fault(struct amdgpu_device *adev, u32 pasid, addr /= AMDGPU_GPU_PAGE_SIZE; - if (is_compute_context && - !svm_range_restore_pages(adev, pasid, addr, write_fault)) { + if (is_compute_context && !svm_range_restore_pages(adev, pasid, client_id, + node_id, addr, write_fault)) { amdgpu_bo_unref(&root); return true; } diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h index 9f5d32b0fda1..dbab31647186 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h @@ -455,7 +455,8 @@ void amdgpu_vm_check_compute_bug(struct amdgpu_device *adev); void amdgpu_vm_get_task_info(struct amdgpu_device *adev, u32 pasid, struct amdgpu_task_info *task_info); bool amdgpu_vm_handle_fault(struct amdgpu_device *adev, u32 pasid, - uint64_t addr, bool write_fault); + u32 client_id, u32 node_id, uint64_t addr, + bool write_fault); void amdgpu_vm_set_task_info(struct amdgpu_vm *vm); diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c index d76f5c8d4977..01bd45651382 100644 --- a/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c @@ -139,7 +139,7 @@ static int gmc_v10_0_process_interrupt(struct amdgpu_device *adev, /* Try to handle the recoverable page faults by filling page * tables */ - if (amdgpu_vm_handle_fault(adev, entry->pasid, addr, write_fault)) + if (amdgpu_vm_handle_fault(adev, entry->pasid, 0, 0, addr, write_fault)) return 1; } diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c index 2c322a25bf1c..c5752a349f3d 100644 --- a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c @@ -557,11 +557,24 @@ static int gmc_v9_0_process_interrupt(struct amdgpu_device *adev, u64 addr; uint32_t cam_index = 0; int ret; - uint32_t node_id; + uint32_t node_id = 0; addr = (u64)entry->src_data[0] << 12; addr |= ((u64)entry->src_data[1] & 0xf) << 44; + if (entry->client_id == SOC15_IH_CLIENTID_VMC) { + hub_name = "mmhub0"; + hub = &adev->vmhub[AMDGPU_MMHUB0(0)]; + } else if (entry->client_id == SOC15_IH_CLIENTID_VMC1) { + hub_name = "mmhub1"; + hub = &adev->vmhub[AMDGPU_MMHUB1(0)]; + } else { + hub_name = "gfxhub0"; + node_id = (adev->ip_versions[GC_HWIP][0] == + IP_VERSION(9, 4, 3)) ? entry->node_id : 0; + hub = &adev->vmhub[node_id/2]; + } + if (retry_fault) { if (adev->irq.retry_cam_enabled) { /* Delegate it to a different ring if the hardware hasn't @@ -574,7 +587,8 @@ static int gmc_v9_0_process_interrupt(struct amdgpu_device *adev, cam_index = entry->src_data[2] & 0x3ff; - ret = amdgpu_vm_handle_fault(adev, entry->pasid, addr, write_fault); + ret = amdgpu_vm_handle_fault(adev, entry->pasid, entry->client_id, node_id, + addr, write_fault); WDOORBELL32(adev->irq.retry_cam_doorbell_index, cam_index); if (ret) return 1; @@ -596,7 +610,8 @@ static int gmc_v9_0_process_interrupt(struct amdgpu_device *adev, /* Try to handle the recoverable page faults by filling page * tables */ - if (amdgpu_vm_handle_fault(adev, entry->pasid, addr, write_fault)) + if (amdgpu_vm_handle_fault(adev, entry->pasid, entry->client_id, node_id, + addr, write_fault)) return 1; } } @@ -604,18 +619,6 @@ static int gmc_v9_0_process_interrupt(struct amdgpu_device *adev, if (!printk_ratelimit()) return 0; - if (entry->client_id == SOC15_IH_CLIENTID_VMC) { - hub_name = "mmhub0"; - hub = &adev->vmhub[AMDGPU_MMHUB0(0)]; - } else if (entry->client_id == SOC15_IH_CLIENTID_VMC1) { - hub_name = "mmhub1"; - hub = &adev->vmhub[AMDGPU_MMHUB1(0)]; - } else { - hub_name = "gfxhub0"; - node_id = (adev->ip_versions[GC_HWIP][0] == - IP_VERSION(9, 4, 3)) ? entry->node_id : 0; - hub = &adev->vmhub[node_id/2]; - } memset(&task_info, 0, sizeof(struct amdgpu_task_info)); amdgpu_vm_get_task_info(adev, entry->pasid, &task_info); diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_migrate.c b/drivers/gpu/drm/amd/amdkfd/kfd_migrate.c index 5f4dc2a45bd0..e7e5abc32c84 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_migrate.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_migrate.c @@ -287,11 +287,12 @@ static unsigned long svm_migrate_unsuccessful_pages(struct migrate_vma *migrate) } static int -svm_migrate_copy_to_vram(struct amdgpu_device *adev, struct svm_range *prange, +svm_migrate_copy_to_vram(struct kfd_node *node, struct svm_range *prange, struct migrate_vma *migrate, struct dma_fence **mfence, dma_addr_t *scratch, uint64_t ttm_res_offset) { - uint64_t npages = migrate->npages; + uint64_t npages = migrate->cpages; + struct amdgpu_device *adev = node->adev; struct device *dev = adev->dev; struct amdgpu_res_cursor cursor; dma_addr_t *src; @@ -321,7 +322,7 @@ svm_migrate_copy_to_vram(struct amdgpu_device *adev, struct svm_range *prange, DMA_TO_DEVICE); r = dma_mapping_error(dev, src[i]); if (r) { - dev_err(adev->dev, "%s: fail %d dma_map_page\n", + dev_err(dev, "%s: fail %d dma_map_page\n", __func__, r); goto out_free_vram_pages; } @@ -390,12 +391,13 @@ out_free_vram_pages: } static long -svm_migrate_vma_to_vram(struct amdgpu_device *adev, struct svm_range *prange, +svm_migrate_vma_to_vram(struct kfd_node *node, struct svm_range *prange, struct vm_area_struct *vma, uint64_t start, uint64_t end, uint32_t trigger, uint64_t ttm_res_offset) { struct kfd_process *p = container_of(prange->svms, struct kfd_process, svms); uint64_t npages = (end - start) >> PAGE_SHIFT; + struct amdgpu_device *adev = node->adev; struct kfd_process_device *pdd; struct dma_fence *mfence = NULL; struct migrate_vma migrate = { 0 }; @@ -445,7 +447,7 @@ svm_migrate_vma_to_vram(struct amdgpu_device *adev, struct svm_range *prange, else pr_debug("0x%lx pages migrated\n", cpages); - r = svm_migrate_copy_to_vram(adev, prange, &migrate, &mfence, scratch, ttm_res_offset); + r = svm_migrate_copy_to_vram(node, prange, &migrate, &mfence, scratch, ttm_res_offset); migrate_vma_pages(&migrate); pr_debug("successful/cpages/npages 0x%lx/0x%lx/0x%lx\n", @@ -465,7 +467,7 @@ out_free: kvfree(buf); out: if (!r && cpages) { - pdd = svm_range_get_pdd_by_adev(prange, adev); + pdd = svm_range_get_pdd_by_node(prange, node); if (pdd) WRITE_ONCE(pdd->page_in, pdd->page_in + cpages); @@ -492,8 +494,8 @@ svm_migrate_ram_to_vram(struct svm_range *prange, uint32_t best_loc, { unsigned long addr, start, end; struct vm_area_struct *vma; - struct amdgpu_device *adev; uint64_t ttm_res_offset; + struct kfd_node *node; unsigned long cpages = 0; long r = 0; @@ -503,9 +505,9 @@ svm_migrate_ram_to_vram(struct svm_range *prange, uint32_t best_loc, return 0; } - adev = svm_range_get_adev_by_id(prange, best_loc); - if (!adev) { - pr_debug("failed to get device by id 0x%x\n", best_loc); + node = svm_range_get_node_by_id(prange, best_loc); + if (!node) { + pr_debug("failed to get kfd node by id 0x%x\n", best_loc); return -ENODEV; } @@ -515,9 +517,9 @@ svm_migrate_ram_to_vram(struct svm_range *prange, uint32_t best_loc, start = prange->start << PAGE_SHIFT; end = (prange->last + 1) << PAGE_SHIFT; - r = svm_range_vram_node_new(adev, prange, true); + r = svm_range_vram_node_new(node, prange, true); if (r) { - dev_dbg(adev->dev, "fail %ld to alloc vram\n", r); + dev_dbg(node->adev->dev, "fail %ld to alloc vram\n", r); return r; } ttm_res_offset = prange->offset << PAGE_SHIFT; @@ -530,7 +532,7 @@ svm_migrate_ram_to_vram(struct svm_range *prange, uint32_t best_loc, break; next = min(vma->vm_end, end); - r = svm_migrate_vma_to_vram(adev, prange, vma, addr, next, trigger, ttm_res_offset); + r = svm_migrate_vma_to_vram(node, prange, vma, addr, next, trigger, ttm_res_offset); if (r < 0) { pr_debug("failed %ld to migrate\n", r); break; @@ -663,7 +665,7 @@ out_oom: * positive values - partial migration, number of pages not migrated */ static long -svm_migrate_vma_to_ram(struct amdgpu_device *adev, struct svm_range *prange, +svm_migrate_vma_to_ram(struct kfd_node *node, struct svm_range *prange, struct vm_area_struct *vma, uint64_t start, uint64_t end, uint32_t trigger, struct page *fault_page) { @@ -671,6 +673,7 @@ svm_migrate_vma_to_ram(struct amdgpu_device *adev, struct svm_range *prange, uint64_t npages = (end - start) >> PAGE_SHIFT; unsigned long upages = npages; unsigned long cpages = 0; + struct amdgpu_device *adev = node->adev; struct kfd_process_device *pdd; struct dma_fence *mfence = NULL; struct migrate_vma migrate = { 0 }; @@ -745,7 +748,7 @@ out_free: kvfree(buf); out: if (!r && cpages) { - pdd = svm_range_get_pdd_by_adev(prange, adev); + pdd = svm_range_get_pdd_by_node(prange, node); if (pdd) WRITE_ONCE(pdd->page_out, pdd->page_out + cpages); } @@ -766,7 +769,7 @@ out: int svm_migrate_vram_to_ram(struct svm_range *prange, struct mm_struct *mm, uint32_t trigger, struct page *fault_page) { - struct amdgpu_device *adev; + struct kfd_node *node; struct vm_area_struct *vma; unsigned long addr; unsigned long start; @@ -780,13 +783,11 @@ int svm_migrate_vram_to_ram(struct svm_range *prange, struct mm_struct *mm, return 0; } - adev = svm_range_get_adev_by_id(prange, prange->actual_loc); - if (!adev) { - pr_debug("failed to get device by id 0x%x\n", - prange->actual_loc); + node = svm_range_get_node_by_id(prange, prange->actual_loc); + if (!node) { + pr_debug("failed to get kfd node by id 0x%x\n", prange->actual_loc); return -ENODEV; } - pr_debug("svms 0x%p prange 0x%p [0x%lx 0x%lx] from gpu 0x%x to ram\n", prange->svms, prange, prange->start, prange->last, prange->actual_loc); @@ -805,7 +806,7 @@ int svm_migrate_vram_to_ram(struct svm_range *prange, struct mm_struct *mm, } next = min(vma->vm_end, end); - r = svm_migrate_vma_to_ram(adev, prange, vma, addr, next, trigger, + r = svm_migrate_vma_to_ram(node, prange, vma, addr, next, trigger, fault_page); if (r < 0) { pr_debug("failed %ld to migrate prange %p\n", r, prange); diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h index 400b4dcbdf05..df372de6b056 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h +++ b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h @@ -289,6 +289,7 @@ struct kfd_node { * from the HW ring into a SW ring. */ bool interrupts_active; + uint32_t interrupt_bitmap; /* Only used for GFX 9.4.3 */ /* QCM Device instance */ struct device_queue_manager *dqm; @@ -971,9 +972,8 @@ struct kfd_process *kfd_lookup_process_by_pasid(u32 pasid); struct kfd_process *kfd_lookup_process_by_mm(const struct mm_struct *mm); int kfd_process_gpuidx_from_gpuid(struct kfd_process *p, uint32_t gpu_id); -int kfd_process_gpuid_from_adev(struct kfd_process *p, - struct amdgpu_device *adev, uint32_t *gpuid, - uint32_t *gpuidx); +int kfd_process_gpuid_from_node(struct kfd_process *p, struct kfd_node *node, + uint32_t *gpuid, uint32_t *gpuidx); static inline int kfd_process_gpuid_from_gpuidx(struct kfd_process *p, uint32_t gpuidx, uint32_t *gpuid) { return gpuidx < p->n_pdds ? p->pdds[gpuidx]->dev->id : -EINVAL; @@ -1073,6 +1073,30 @@ struct kfd_topology_device *kfd_topology_device_by_id(uint32_t gpu_id); struct kfd_node *kfd_device_by_id(uint32_t gpu_id); struct kfd_node *kfd_device_by_pci_dev(const struct pci_dev *pdev); struct kfd_node *kfd_device_by_adev(const struct amdgpu_device *adev); +static inline bool kfd_irq_is_from_node(struct kfd_node *node, uint32_t client_id, + uint32_t node_id) +{ + if ((node->interrupt_bitmap & (0x1U << node_id)) || + ((node_id % 4) == 0 && + (node->interrupt_bitmap >> 16) & (0x1U << client_id))) + return true; + + return false; +} +static inline struct kfd_node *kfd_node_by_irq_ids(struct amdgpu_device *adev, + uint32_t client_id, uint32_t node_id) { + struct kfd_dev *dev = adev->kfd.dev; + uint32_t i; + + if (adev->ip_versions[GC_HWIP][0] != IP_VERSION(9, 4, 3)) + return dev->nodes[0]; + + for (i = 0; i < dev->num_nodes; i++) + if (kfd_irq_is_from_node(dev->nodes[i], client_id, node_id)) + return dev->nodes[i]; + + return NULL; +} int kfd_topology_enum_kfd_devices(uint8_t idx, struct kfd_node **kdev); int kfd_numa_node_to_apic_id(int numa_node_id); void kfd_double_confirm_iommu_support(struct kfd_dev *gpu); diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_process.c b/drivers/gpu/drm/amd/amdkfd/kfd_process.c index c3d43e6e5236..666815b227a8 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_process.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_process.c @@ -1891,13 +1891,13 @@ int kfd_process_gpuidx_from_gpuid(struct kfd_process *p, uint32_t gpu_id) } int -kfd_process_gpuid_from_adev(struct kfd_process *p, struct amdgpu_device *adev, - uint32_t *gpuid, uint32_t *gpuidx) +kfd_process_gpuid_from_node(struct kfd_process *p, struct kfd_node *node, + uint32_t *gpuid, uint32_t *gpuidx) { int i; for (i = 0; i < p->n_pdds; i++) - if (p->pdds[i] && p->pdds[i]->dev->adev == adev) { + if (p->pdds[i] && p->pdds[i]->dev == node) { *gpuid = p->pdds[i]->user_gpu_id; *gpuidx = i; return 0; diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_svm.c b/drivers/gpu/drm/amd/amdkfd/kfd_svm.c index 4b4f3bf8b823..639831fbb6ca 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_svm.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_svm.c @@ -170,8 +170,7 @@ svm_range_dma_map_dev(struct amdgpu_device *adev, struct svm_range *prange, page = hmm_pfn_to_page(hmm_pfns[i]); if (is_zone_device_page(page)) { - struct amdgpu_device *bo_adev = - amdgpu_ttm_adev(prange->svm_bo->bo->tbo.bdev); + struct amdgpu_device *bo_adev = prange->svm_bo->node->adev; addr[i] = (hmm_pfns[i] << PAGE_SHIFT) + bo_adev->vm_manager.vram_base_offset - @@ -424,10 +423,8 @@ static void svm_range_bo_unref(struct svm_range_bo *svm_bo) } static bool -svm_range_validate_svm_bo(struct amdgpu_device *adev, struct svm_range *prange) +svm_range_validate_svm_bo(struct kfd_node *node, struct svm_range *prange) { - struct amdgpu_device *bo_adev; - mutex_lock(&prange->lock); if (!prange->svm_bo) { mutex_unlock(&prange->lock); @@ -440,12 +437,11 @@ svm_range_validate_svm_bo(struct amdgpu_device *adev, struct svm_range *prange) } if (svm_bo_ref_unless_zero(prange->svm_bo)) { /* - * Migrate from GPU to GPU, remove range from source bo_adev - * svm_bo range list, and return false to allocate svm_bo from - * destination adev. + * Migrate from GPU to GPU, remove range from source svm_bo->node + * range list, and return false to allocate svm_bo from destination + * node. */ - bo_adev = amdgpu_ttm_adev(prange->svm_bo->bo->tbo.bdev); - if (bo_adev != adev) { + if (prange->svm_bo->node != node) { mutex_unlock(&prange->lock); spin_lock(&prange->svm_bo->list_lock); @@ -513,7 +509,7 @@ static struct svm_range_bo *svm_range_bo_new(void) } int -svm_range_vram_node_new(struct amdgpu_device *adev, struct svm_range *prange, +svm_range_vram_node_new(struct kfd_node *node, struct svm_range *prange, bool clear) { struct amdgpu_bo_param bp; @@ -528,7 +524,7 @@ svm_range_vram_node_new(struct amdgpu_device *adev, struct svm_range *prange, pr_debug("pasid: %x svms 0x%p [0x%lx 0x%lx]\n", p->pasid, prange->svms, prange->start, prange->last); - if (svm_range_validate_svm_bo(adev, prange)) + if (svm_range_validate_svm_bo(node, prange)) return 0; svm_bo = svm_range_bo_new(); @@ -542,6 +538,7 @@ svm_range_vram_node_new(struct amdgpu_device *adev, struct svm_range *prange, kfree(svm_bo); return -ESRCH; } + svm_bo->node = node; svm_bo->eviction_fence = amdgpu_amdkfd_fence_create(dma_fence_context_alloc(1), mm, @@ -559,7 +556,10 @@ svm_range_vram_node_new(struct amdgpu_device *adev, struct svm_range *prange, bp.type = ttm_bo_type_device; bp.resv = NULL; - r = amdgpu_bo_create_user(adev, &bp, &ubo); + /* TODO: Allocate memory from the right memory partition. We can sort + * out the details later, once basic memory partitioning is working + */ + r = amdgpu_bo_create_user(node->adev, &bp, &ubo); if (r) { pr_debug("failed %d to create bo\n", r); goto create_bo_failed; @@ -617,45 +617,30 @@ void svm_range_vram_node_free(struct svm_range *prange) prange->ttm_res = NULL; } -struct amdgpu_device * -svm_range_get_adev_by_id(struct svm_range *prange, uint32_t gpu_id) +struct kfd_node * +svm_range_get_node_by_id(struct svm_range *prange, uint32_t gpu_id) { - struct kfd_process_device *pdd; struct kfd_process *p; - int32_t gpu_idx; + struct kfd_process_device *pdd; p = container_of(prange->svms, struct kfd_process, svms); - - gpu_idx = kfd_process_gpuidx_from_gpuid(p, gpu_id); - if (gpu_idx < 0) { - pr_debug("failed to get device by id 0x%x\n", gpu_id); - return NULL; - } - pdd = kfd_process_device_from_gpuidx(p, gpu_idx); + pdd = kfd_process_device_data_by_id(p, gpu_id); if (!pdd) { - pr_debug("failed to get device by idx 0x%x\n", gpu_idx); + pr_debug("failed to get kfd process device by id 0x%x\n", gpu_id); return NULL; } - return pdd->dev->adev; + return pdd->dev; } struct kfd_process_device * -svm_range_get_pdd_by_adev(struct svm_range *prange, struct amdgpu_device *adev) +svm_range_get_pdd_by_node(struct svm_range *prange, struct kfd_node *node) { struct kfd_process *p; - int32_t gpu_idx, gpuid; - int r; p = container_of(prange->svms, struct kfd_process, svms); - r = kfd_process_gpuid_from_adev(p, adev, &gpuid, &gpu_idx); - if (r) { - pr_debug("failed to get device id by adev %p\n", adev); - return NULL; - } - - return kfd_process_device_from_gpuidx(p, gpu_idx); + return kfd_get_process_device_data(node, p); } static int svm_range_bo_validate(void *param, struct amdgpu_bo *bo) @@ -1148,12 +1133,18 @@ svm_range_split_by_granularity(struct kfd_process *p, struct mm_struct *mm, } return 0; } +static bool +svm_nodes_in_same_hive(struct kfd_node *node_a, struct kfd_node *node_b) +{ + return (node_a->adev == node_b->adev || + amdgpu_xgmi_same_hive(node_a->adev, node_b->adev)); +} static uint64_t -svm_range_get_pte_flags(struct amdgpu_device *adev, struct svm_range *prange, - int domain) +svm_range_get_pte_flags(struct kfd_node *node, + struct svm_range *prange, int domain) { - struct amdgpu_device *bo_adev; + struct kfd_node *bo_node; uint32_t flags = prange->flags; uint32_t mapping_flags = 0; uint64_t pte_flags; @@ -1162,18 +1153,18 @@ svm_range_get_pte_flags(struct amdgpu_device *adev, struct svm_range *prange, bool uncached = flags & KFD_IOCTL_SVM_FLAG_UNCACHED; if (domain == SVM_RANGE_VRAM_DOMAIN) - bo_adev = amdgpu_ttm_adev(prange->svm_bo->bo->tbo.bdev); + bo_node = prange->svm_bo->node; - switch (KFD_GC_VERSION(adev->kfd.dev)) { + switch (node->adev->ip_versions[GC_HWIP][0]) { case IP_VERSION(9, 4, 1): if (domain == SVM_RANGE_VRAM_DOMAIN) { - if (bo_adev == adev) { + if (bo_node == node) { mapping_flags |= coherent ? AMDGPU_VM_MTYPE_CC : AMDGPU_VM_MTYPE_RW; } else { mapping_flags |= coherent ? AMDGPU_VM_MTYPE_UC : AMDGPU_VM_MTYPE_NC; - if (amdgpu_xgmi_same_hive(adev, bo_adev)) + if (svm_nodes_in_same_hive(node, bo_node)) snoop = true; } } else { @@ -1183,15 +1174,15 @@ svm_range_get_pte_flags(struct amdgpu_device *adev, struct svm_range *prange, break; case IP_VERSION(9, 4, 2): if (domain == SVM_RANGE_VRAM_DOMAIN) { - if (bo_adev == adev) { + if (bo_node == node) { mapping_flags |= coherent ? AMDGPU_VM_MTYPE_CC : AMDGPU_VM_MTYPE_RW; - if (adev->gmc.xgmi.connected_to_cpu) + if (node->adev->gmc.xgmi.connected_to_cpu) snoop = true; } else { mapping_flags |= coherent ? AMDGPU_VM_MTYPE_UC : AMDGPU_VM_MTYPE_NC; - if (amdgpu_xgmi_same_hive(adev, bo_adev)) + if (svm_nodes_in_same_hive(node, bo_node)) snoop = true; } } else { @@ -1207,7 +1198,7 @@ svm_range_get_pte_flags(struct amdgpu_device *adev, struct svm_range *prange, if (uncached) mapping_flags |= AMDGPU_VM_MTYPE_UC; /* local HBM region close to partition*/ - else if (bo_adev == adev) + else if (bo_node == node) mapping_flags |= AMDGPU_VM_MTYPE_RW; /* local HBM region far from partition or remote XGMI GPU or * system memory @@ -1231,7 +1222,7 @@ svm_range_get_pte_flags(struct amdgpu_device *adev, struct svm_range *prange, pte_flags |= (domain == SVM_RANGE_VRAM_DOMAIN) ? 0 : AMDGPU_PTE_SYSTEM; pte_flags |= snoop ? AMDGPU_PTE_SNOOPED : 0; - pte_flags |= amdgpu_gem_va_map_flags(adev, mapping_flags); + pte_flags |= amdgpu_gem_va_map_flags(node->adev, mapping_flags); return pte_flags; } @@ -1338,7 +1329,7 @@ svm_range_map_to_gpu(struct kfd_process_device *pdd, struct svm_range *prange, pr_debug("Mapping range [0x%lx 0x%llx] on domain: %s\n", last_start, prange->start + i, last_domain ? "GPU" : "CPU"); - pte_flags = svm_range_get_pte_flags(adev, prange, last_domain); + pte_flags = svm_range_get_pte_flags(pdd->dev, prange, last_domain); if (readonly) pte_flags &= ~AMDGPU_PTE_WRITEABLE; @@ -1347,6 +1338,9 @@ svm_range_map_to_gpu(struct kfd_process_device *pdd, struct svm_range *prange, (last_domain == SVM_RANGE_VRAM_DOMAIN) ? 1 : 0, pte_flags); + /* TODO: we still need to determine the vm_manager.vram_base_offset based on + * the memory partition. + */ r = amdgpu_vm_update_range(adev, vm, false, false, flush_tlb, NULL, last_start, prange->start + i, pte_flags, @@ -1384,16 +1378,14 @@ svm_range_map_to_gpus(struct svm_range *prange, unsigned long offset, unsigned long *bitmap, bool wait, bool flush_tlb) { struct kfd_process_device *pdd; - struct amdgpu_device *bo_adev; + struct amdgpu_device *bo_adev = NULL; struct kfd_process *p; struct dma_fence *fence = NULL; uint32_t gpuidx; int r = 0; if (prange->svm_bo && prange->ttm_res) - bo_adev = amdgpu_ttm_adev(prange->svm_bo->bo->tbo.bdev); - else - bo_adev = NULL; + bo_adev = prange->svm_bo->node->adev; p = container_of(prange->svms, struct kfd_process, svms); for_each_set_bit(gpuidx, bitmap, MAX_GPU_INSTANCE) { @@ -2526,17 +2518,17 @@ svm_range_from_addr(struct svm_range_list *svms, unsigned long addr, */ static int32_t svm_range_best_restore_location(struct svm_range *prange, - struct amdgpu_device *adev, + struct kfd_node *node, int32_t *gpuidx) { - struct amdgpu_device *bo_adev, *preferred_adev; + struct kfd_node *bo_node, *preferred_node; struct kfd_process *p; uint32_t gpuid; int r; p = container_of(prange->svms, struct kfd_process, svms); - r = kfd_process_gpuid_from_adev(p, adev, &gpuid, gpuidx); + r = kfd_process_gpuid_from_node(p, node, &gpuid, gpuidx); if (r < 0) { pr_debug("failed to get gpuid from kgd\n"); return -1; @@ -2546,9 +2538,8 @@ svm_range_best_restore_location(struct svm_range *prange, prange->preferred_loc == KFD_IOCTL_SVM_LOCATION_SYSMEM) { return prange->preferred_loc; } else if (prange->preferred_loc != KFD_IOCTL_SVM_LOCATION_UNDEFINED) { - preferred_adev = svm_range_get_adev_by_id(prange, - prange->preferred_loc); - if (amdgpu_xgmi_same_hive(adev, preferred_adev)) + preferred_node = svm_range_get_node_by_id(prange, prange->preferred_loc); + if (preferred_node && svm_nodes_in_same_hive(node, preferred_node)) return prange->preferred_loc; /* fall through */ } @@ -2560,8 +2551,8 @@ svm_range_best_restore_location(struct svm_range *prange, if (!prange->actual_loc) return 0; - bo_adev = svm_range_get_adev_by_id(prange, prange->actual_loc); - if (amdgpu_xgmi_same_hive(adev, bo_adev)) + bo_node = svm_range_get_node_by_id(prange, prange->actual_loc); + if (bo_node && svm_nodes_in_same_hive(node, bo_node)) return prange->actual_loc; else return 0; @@ -2678,7 +2669,7 @@ svm_range_check_vm_userptr(struct kfd_process *p, uint64_t start, uint64_t last, } static struct -svm_range *svm_range_create_unregistered_range(struct amdgpu_device *adev, +svm_range *svm_range_create_unregistered_range(struct kfd_node *node, struct kfd_process *p, struct mm_struct *mm, int64_t addr) @@ -2713,7 +2704,7 @@ svm_range *svm_range_create_unregistered_range(struct amdgpu_device *adev, pr_debug("Failed to create prange in address [0x%llx]\n", addr); return NULL; } - if (kfd_process_gpuid_from_adev(p, adev, &gpuid, &gpuidx)) { + if (kfd_process_gpuid_from_node(p, node, &gpuid, &gpuidx)) { pr_debug("failed to get gpuid from kgd\n"); svm_range_free(prange, true); return NULL; @@ -2767,7 +2758,7 @@ static bool svm_range_skip_recover(struct svm_range *prange) } static void -svm_range_count_fault(struct amdgpu_device *adev, struct kfd_process *p, +svm_range_count_fault(struct kfd_node *node, struct kfd_process *p, int32_t gpuidx) { struct kfd_process_device *pdd; @@ -2780,7 +2771,7 @@ svm_range_count_fault(struct amdgpu_device *adev, struct kfd_process *p, uint32_t gpuid; int r; - r = kfd_process_gpuid_from_adev(p, adev, &gpuid, &gpuidx); + r = kfd_process_gpuid_from_node(p, node, &gpuid, &gpuidx); if (r < 0) return; } @@ -2808,6 +2799,7 @@ svm_fault_allowed(struct vm_area_struct *vma, bool write_fault) int svm_range_restore_pages(struct amdgpu_device *adev, unsigned int pasid, + uint32_t client_id, uint32_t node_id, uint64_t addr, bool write_fault) { struct mm_struct *mm = NULL; @@ -2815,6 +2807,7 @@ svm_range_restore_pages(struct amdgpu_device *adev, unsigned int pasid, struct svm_range *prange; struct kfd_process *p; ktime_t timestamp = ktime_get_boottime(); + struct kfd_node *node; int32_t best_loc; int32_t gpuidx = MAX_GPU_INSTANCE; bool write_locked = false; @@ -2858,6 +2851,13 @@ svm_range_restore_pages(struct amdgpu_device *adev, unsigned int pasid, goto out; } + node = kfd_node_by_irq_ids(adev, node_id, client_id); + if (!node) { + pr_debug("kfd node does not exist node_id: %d, client_id: %d\n", node_id, + client_id); + r = -EFAULT; + goto out; + } mmap_read_lock(mm); retry_write_locked: mutex_lock(&svms->lock); @@ -2876,7 +2876,7 @@ retry_write_locked: write_locked = true; goto retry_write_locked; } - prange = svm_range_create_unregistered_range(adev, p, mm, addr); + prange = svm_range_create_unregistered_range(node, p, mm, addr); if (!prange) { pr_debug("failed to create unregistered range svms 0x%p address [0x%llx]\n", svms, addr); @@ -2891,7 +2891,7 @@ retry_write_locked: mutex_lock(&prange->migrate_mutex); if (svm_range_skip_recover(prange)) { - amdgpu_gmc_filter_faults_remove(adev, addr, pasid); + amdgpu_gmc_filter_faults_remove(node->adev, addr, pasid); r = 0; goto out_unlock_range; } @@ -2922,7 +2922,7 @@ retry_write_locked: goto out_unlock_range; } - best_loc = svm_range_best_restore_location(prange, adev, &gpuidx); + best_loc = svm_range_best_restore_location(prange, node, &gpuidx); if (best_loc == -1) { pr_debug("svms %p failed get best restore loc [0x%lx 0x%lx]\n", svms, prange->start, prange->last); @@ -2981,7 +2981,7 @@ out_unlock_svms: mutex_unlock(&svms->lock); mmap_read_unlock(mm); - svm_range_count_fault(adev, p, gpuidx); + svm_range_count_fault(node, p, gpuidx); mmput(mm); out: @@ -2989,7 +2989,7 @@ out: if (r == -EAGAIN) { pr_debug("recover vm fault later\n"); - amdgpu_gmc_filter_faults_remove(adev, addr, pasid); + amdgpu_gmc_filter_faults_remove(node->adev, addr, pasid); r = 0; } return r; @@ -3231,7 +3231,7 @@ svm_range_best_prefetch_location(struct svm_range *prange) DECLARE_BITMAP(bitmap, MAX_GPU_INSTANCE); uint32_t best_loc = prange->prefetch_loc; struct kfd_process_device *pdd; - struct amdgpu_device *bo_adev; + struct kfd_node *bo_node; struct kfd_process *p; uint32_t gpuidx; @@ -3240,9 +3240,9 @@ svm_range_best_prefetch_location(struct svm_range *prange) if (!best_loc || best_loc == KFD_IOCTL_SVM_LOCATION_UNDEFINED) goto out; - bo_adev = svm_range_get_adev_by_id(prange, best_loc); - if (!bo_adev) { - WARN_ONCE(1, "failed to get device by id 0x%x\n", best_loc); + bo_node = svm_range_get_node_by_id(prange, best_loc); + if (!bo_node) { + WARN_ONCE(1, "failed to get valid kfd node at id%x\n", best_loc); best_loc = 0; goto out; } @@ -3260,10 +3260,10 @@ svm_range_best_prefetch_location(struct svm_range *prange) continue; } - if (pdd->dev->adev == bo_adev) + if (pdd->dev->adev == bo_node->adev) continue; - if (!amdgpu_xgmi_same_hive(pdd->dev->adev, bo_adev)) { + if (!svm_nodes_in_same_hive(pdd->dev, bo_node)) { best_loc = 0; break; } diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_svm.h b/drivers/gpu/drm/amd/amdkfd/kfd_svm.h index 7a33b93f9df6..a165c73b40b2 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_svm.h +++ b/drivers/gpu/drm/amd/amdkfd/kfd_svm.h @@ -48,6 +48,7 @@ struct svm_range_bo { struct work_struct eviction_work; uint32_t evicting; struct work_struct release_work; + struct kfd_node *node; }; enum svm_work_list_ops { @@ -163,16 +164,17 @@ int svm_ioctl(struct kfd_process *p, enum kfd_ioctl_svm_op op, uint64_t start, struct svm_range *svm_range_from_addr(struct svm_range_list *svms, unsigned long addr, struct svm_range **parent); -struct amdgpu_device *svm_range_get_adev_by_id(struct svm_range *prange, - uint32_t id); -int svm_range_vram_node_new(struct amdgpu_device *adev, - struct svm_range *prange, bool clear); +struct kfd_node *svm_range_get_node_by_id(struct svm_range *prange, + uint32_t gpu_id); +int svm_range_vram_node_new(struct kfd_node *node, struct svm_range *prange, + bool clear); void svm_range_vram_node_free(struct svm_range *prange); int svm_range_split_by_granularity(struct kfd_process *p, struct mm_struct *mm, unsigned long addr, struct svm_range *parent, struct svm_range *prange); -int svm_range_restore_pages(struct amdgpu_device *adev, - unsigned int pasid, uint64_t addr, bool write_fault); +int svm_range_restore_pages(struct amdgpu_device *adev, unsigned int pasid, + uint32_t client_id, uint32_t node_id, uint64_t addr, + bool write_fault); int svm_range_schedule_evict_svm_bo(struct amdgpu_amdkfd_fence *fence); void svm_range_add_list_work(struct svm_range_list *svms, struct svm_range *prange, struct mm_struct *mm, @@ -192,7 +194,7 @@ int kfd_criu_restore_svm(struct kfd_process *p, uint64_t max_priv_data_size); int kfd_criu_resume_svm(struct kfd_process *p); struct kfd_process_device * -svm_range_get_pdd_by_adev(struct svm_range *prange, struct amdgpu_device *adev); +svm_range_get_pdd_by_node(struct svm_range *prange, struct kfd_node *node); void svm_range_list_lock_and_flush_work(struct svm_range_list *svms, struct mm_struct *mm); /* SVM API and HMM page migration work together, device memory type @@ -219,8 +221,9 @@ static inline void svm_range_list_fini(struct kfd_process *p) } static inline int svm_range_restore_pages(struct amdgpu_device *adev, - unsigned int pasid, uint64_t addr, - bool write_fault) + unsigned int pasid, + uint32_t client_id, uint32_t node_id, + uint64_t addr, bool write_fault) { return -EFAULT; } -- cgit From f5fe7edfd6ce62cd23fbd707e7f9fe0f56a45e94 Mon Sep 17 00:00:00 2001 From: Mukul Joshi Date: Fri, 30 Sep 2022 09:16:21 -0400 Subject: drm/amdkfd: Update interrupt handling for GFX9.4.3 Update interrupt handling in CPX mode for GFX9.4.3 by using the VMID space instead of SDMA client id to determine if an interrupt should be processed by a KFD node. This is especially needed for handling retry faults from MMHUB. Signed-off-by: Mukul Joshi Reviewed-by: Felix Kuehling Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c | 7 +++++-- drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h | 2 +- drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c | 4 ++-- drivers/gpu/drm/amd/amdkfd/kfd_priv.h | 16 ++++++---------- drivers/gpu/drm/amd/amdkfd/kfd_svm.c | 8 ++++---- drivers/gpu/drm/amd/amdkfd/kfd_svm.h | 2 +- 6 files changed, 19 insertions(+), 20 deletions(-) (limited to 'drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c') diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c index c390b2856cc9..22a900f298f2 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c @@ -2434,6 +2434,9 @@ void amdgpu_vm_set_task_info(struct amdgpu_vm *vm) * amdgpu_vm_handle_fault - graceful handling of VM faults. * @adev: amdgpu device pointer * @pasid: PASID of the VM + * @vmid: VMID, only used for GFX 9.4.3. + * @node_id: Node_id received in IH cookie. Only applicable for + * GFX 9.4.3. * @addr: Address of the fault * @write_fault: true is write fault, false is read fault * @@ -2441,7 +2444,7 @@ void amdgpu_vm_set_task_info(struct amdgpu_vm *vm) * shouldn't be reported any more. */ bool amdgpu_vm_handle_fault(struct amdgpu_device *adev, u32 pasid, - u32 client_id, u32 node_id, uint64_t addr, + u32 vmid, u32 node_id, uint64_t addr, bool write_fault) { bool is_compute_context = false; @@ -2466,7 +2469,7 @@ bool amdgpu_vm_handle_fault(struct amdgpu_device *adev, u32 pasid, addr /= AMDGPU_GPU_PAGE_SIZE; - if (is_compute_context && !svm_range_restore_pages(adev, pasid, client_id, + if (is_compute_context && !svm_range_restore_pages(adev, pasid, vmid, node_id, addr, write_fault)) { amdgpu_bo_unref(&root); return true; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h index dbab31647186..8add5f5eb92a 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h @@ -455,7 +455,7 @@ void amdgpu_vm_check_compute_bug(struct amdgpu_device *adev); void amdgpu_vm_get_task_info(struct amdgpu_device *adev, u32 pasid, struct amdgpu_task_info *task_info); bool amdgpu_vm_handle_fault(struct amdgpu_device *adev, u32 pasid, - u32 client_id, u32 node_id, uint64_t addr, + u32 vmid, u32 node_id, uint64_t addr, bool write_fault); void amdgpu_vm_set_task_info(struct amdgpu_vm *vm); diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c index c5752a349f3d..f2814270da40 100644 --- a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c @@ -587,7 +587,7 @@ static int gmc_v9_0_process_interrupt(struct amdgpu_device *adev, cam_index = entry->src_data[2] & 0x3ff; - ret = amdgpu_vm_handle_fault(adev, entry->pasid, entry->client_id, node_id, + ret = amdgpu_vm_handle_fault(adev, entry->pasid, entry->vmid, node_id, addr, write_fault); WDOORBELL32(adev->irq.retry_cam_doorbell_index, cam_index); if (ret) @@ -610,7 +610,7 @@ static int gmc_v9_0_process_interrupt(struct amdgpu_device *adev, /* Try to handle the recoverable page faults by filling page * tables */ - if (amdgpu_vm_handle_fault(adev, entry->pasid, entry->client_id, node_id, + if (amdgpu_vm_handle_fault(adev, entry->pasid, entry->vmid, node_id, addr, write_fault)) return 1; } diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h index df372de6b056..fb3cf2c51da8 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h +++ b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h @@ -1073,18 +1073,14 @@ struct kfd_topology_device *kfd_topology_device_by_id(uint32_t gpu_id); struct kfd_node *kfd_device_by_id(uint32_t gpu_id); struct kfd_node *kfd_device_by_pci_dev(const struct pci_dev *pdev); struct kfd_node *kfd_device_by_adev(const struct amdgpu_device *adev); -static inline bool kfd_irq_is_from_node(struct kfd_node *node, uint32_t client_id, - uint32_t node_id) +static inline bool kfd_irq_is_from_node(struct kfd_node *node, uint32_t node_id, + uint32_t vmid) { - if ((node->interrupt_bitmap & (0x1U << node_id)) || - ((node_id % 4) == 0 && - (node->interrupt_bitmap >> 16) & (0x1U << client_id))) - return true; - - return false; + return (node->interrupt_bitmap & (1 << node_id)) != 0 && + (node->compute_vmid_bitmap & (1 << vmid)) != 0; } static inline struct kfd_node *kfd_node_by_irq_ids(struct amdgpu_device *adev, - uint32_t client_id, uint32_t node_id) { + uint32_t node_id, uint32_t vmid) { struct kfd_dev *dev = adev->kfd.dev; uint32_t i; @@ -1092,7 +1088,7 @@ static inline struct kfd_node *kfd_node_by_irq_ids(struct amdgpu_device *adev, return dev->nodes[0]; for (i = 0; i < dev->num_nodes; i++) - if (kfd_irq_is_from_node(dev->nodes[i], client_id, node_id)) + if (kfd_irq_is_from_node(dev->nodes[i], node_id, vmid)) return dev->nodes[i]; return NULL; diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_svm.c b/drivers/gpu/drm/amd/amdkfd/kfd_svm.c index 0dafbbe954ca..5d6e02559d8e 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_svm.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_svm.c @@ -2799,7 +2799,7 @@ svm_fault_allowed(struct vm_area_struct *vma, bool write_fault) int svm_range_restore_pages(struct amdgpu_device *adev, unsigned int pasid, - uint32_t client_id, uint32_t node_id, + uint32_t vmid, uint32_t node_id, uint64_t addr, bool write_fault) { struct mm_struct *mm = NULL; @@ -2851,10 +2851,10 @@ svm_range_restore_pages(struct amdgpu_device *adev, unsigned int pasid, goto out; } - node = kfd_node_by_irq_ids(adev, node_id, client_id); + node = kfd_node_by_irq_ids(adev, node_id, vmid); if (!node) { - pr_debug("kfd node does not exist node_id: %d, client_id: %d\n", node_id, - client_id); + pr_debug("kfd node does not exist node_id: %d, vmid: %d\n", node_id, + vmid); r = -EFAULT; goto out; } diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_svm.h b/drivers/gpu/drm/amd/amdkfd/kfd_svm.h index a165c73b40b2..5116786718b6 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_svm.h +++ b/drivers/gpu/drm/amd/amdkfd/kfd_svm.h @@ -173,7 +173,7 @@ int svm_range_split_by_granularity(struct kfd_process *p, struct mm_struct *mm, unsigned long addr, struct svm_range *parent, struct svm_range *prange); int svm_range_restore_pages(struct amdgpu_device *adev, unsigned int pasid, - uint32_t client_id, uint32_t node_id, uint64_t addr, + uint32_t vmid, uint32_t node_id, uint64_t addr, bool write_fault); int svm_range_schedule_evict_svm_bo(struct amdgpu_amdkfd_fence *fence); void svm_range_add_list_work(struct svm_range_list *svms, -- cgit From eaae4beee8a94b30f37341c9d14837c82e7e2647 Mon Sep 17 00:00:00 2001 From: Philip Yang Date: Mon, 14 Nov 2022 17:35:43 -0500 Subject: drm/amdgpu: more GPU page fault info for GC v9.4.3 Output IH cookie node_id and translate it to the corresponding AID id and XCC id, to help debug the GPU page fault. Signed-off-by: Philip Yang Reviewed-by: Felix Kuehling Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) (limited to 'drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c') diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c index f2814270da40..2966aca9545d 100644 --- a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c @@ -557,7 +557,9 @@ static int gmc_v9_0_process_interrupt(struct amdgpu_device *adev, u64 addr; uint32_t cam_index = 0; int ret; - uint32_t node_id = 0; + uint32_t node_id; + + node_id = (adev->ip_versions[GC_HWIP][0] == IP_VERSION(9, 4, 3)) ? entry->node_id : 0; addr = (u64)entry->src_data[0] << 12; addr |= ((u64)entry->src_data[1] & 0xf) << 44; @@ -570,8 +572,6 @@ static int gmc_v9_0_process_interrupt(struct amdgpu_device *adev, hub = &adev->vmhub[AMDGPU_MMHUB1(0)]; } else { hub_name = "gfxhub0"; - node_id = (adev->ip_versions[GC_HWIP][0] == - IP_VERSION(9, 4, 3)) ? entry->node_id : 0; hub = &adev->vmhub[node_id/2]; } @@ -634,6 +634,11 @@ static int gmc_v9_0_process_interrupt(struct amdgpu_device *adev, addr, entry->client_id, soc15_ih_clientid_name[entry->client_id]); + if (adev->ip_versions[GC_HWIP][0] == IP_VERSION(9, 4, 3)) + dev_err(adev->dev, " cookie node_id %d fault from die %s%d%s\n", + node_id, node_id % 4 == 3 ? "RSV" : "AID", node_id / 4, + node_id % 4 == 1 ? ".XCD0" : node_id % 4 == 2 ? ".XCD1" : ""); + if (amdgpu_sriov_vf(adev)) return 0; -- cgit From 497db7ea33f7cec2a0019894e844789f003dbd22 Mon Sep 17 00:00:00 2001 From: Rajneesh Bhardwaj Date: Tue, 8 Nov 2022 23:04:30 -0500 Subject: drm/amdgpu: Check APU supports true APP mode On GPXIP 9.4.3 APU, in no carveout mode there is no real vram heap and could be emulated by the driver over the interleaved NUMA system memory and the APU could also be in the carveout mode during early development stage or otherwise for debugging purpose so introduce a new member in amdgpu_gmc to figure out whether the APU is in the native mode as per the production configuration. AMD_IS_APU cannot be used for Accelerated Processing Platform APUs as it might be used in a different context on previous generations or on small APUs. Reviewed-by: Hawking Zhang Reviewed-by: Felix Kuehling Tested-by: Graham Sider Signed-off-by: Rajneesh Bhardwaj Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.h | 1 + drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c | 14 ++++++++++++++ 2 files changed, 15 insertions(+) (limited to 'drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c') diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.h index 572ef5be539f..e408abfc2daf 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.h @@ -251,6 +251,7 @@ struct amdgpu_gmc { uint64_t last_fault:AMDGPU_GMC_FAULT_RING_ORDER; bool tmz_enabled; + bool is_app_apu; const struct amdgpu_gmc_funcs *gmc_funcs; diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c index 2966aca9545d..0792c48fe347 100644 --- a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c @@ -1442,6 +1442,20 @@ static int gmc_v9_0_early_init(void *handle) adev->smuio.funcs->is_host_gpu_xgmi_supported(adev); } + if (adev->ip_versions[GC_HWIP][0] == IP_VERSION(9, 4, 3)) { + enum amdgpu_pkg_type pkg_type = + adev->smuio.funcs->get_pkg_type(adev); + /* On GFXIP 9.4.3. APU, there is no physical VRAM domain present + * and the APU, can be in used two possible modes: + * - carveout mode + * - native APU mode + * "is_app_apu" can be used to identify the APU in the native + * mode. + */ + adev->gmc.is_app_apu = (pkg_type == AMDGPU_PKG_TYPE_APU && + !pci_resource_len(adev->pdev, 0)); + } + gmc_v9_0_set_gmc_funcs(adev); gmc_v9_0_set_irq_funcs(adev); gmc_v9_0_set_umc_funcs(adev); -- cgit From 7a1efad04c210594069c4ab9f9c25039cd6915e4 Mon Sep 17 00:00:00 2001 From: Lijo Lazar Date: Tue, 29 Nov 2022 14:00:37 +0530 Subject: drm/amdgpu: Use mask for active clusters Use a mask of available active clusters instead of using only the number of active clusters. Signed-off-by: Lijo Lazar Reviewed-by: Hawking Zhang Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu.h | 3 +- .../gpu/drm/amd/amdgpu/aqua_vanjaram_reg_init.c | 13 ++--- drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c | 5 +- drivers/gpu/drm/amd/amdgpu/mmhub_v1_8.c | 56 ++++++++++++++-------- 4 files changed, 49 insertions(+), 28 deletions(-) (limited to 'drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c') diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h index 3858d29baef1..279057ec7a0b 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h @@ -1070,7 +1070,8 @@ struct amdgpu_device { bool job_hang; bool dc_enabled; - uint32_t num_aid; + /* Mask of active clusters */ + uint32_t aid_mask; }; static inline struct amdgpu_device *drm_to_adev(struct drm_device *ddev) diff --git a/drivers/gpu/drm/amd/amdgpu/aqua_vanjaram_reg_init.c b/drivers/gpu/drm/amd/amdgpu/aqua_vanjaram_reg_init.c index 6f7226b5d446..0d7bc212def1 100644 --- a/drivers/gpu/drm/amd/amdgpu/aqua_vanjaram_reg_init.c +++ b/drivers/gpu/drm/amd/amdgpu/aqua_vanjaram_reg_init.c @@ -358,25 +358,26 @@ static int aqua_vanjaram_xcp_mgr_init(struct amdgpu_device *adev) int aqua_vanjaram_init_soc_config(struct amdgpu_device *adev) { u32 inst_mask = adev->sdma.sdma_mask; - int ret; + int ret, i, num_inst; /* generally 1 AID supports 4 instances */ adev->sdma.num_inst_per_aid = 4; adev->sdma.num_instances = NUM_SDMA(adev->sdma.sdma_mask); - adev->num_aid = 1; + adev->aid_mask = i = 1; inst_mask >>= adev->sdma.num_inst_per_aid; for (const u32 mask = (1 << adev->sdma.num_inst_per_aid) - 1; inst_mask; - inst_mask >>= adev->sdma.num_inst_per_aid) { + inst_mask >>= adev->sdma.num_inst_per_aid, ++i) { if ((inst_mask & mask) == mask) - adev->num_aid++; + adev->aid_mask |= (1 << i); } + num_inst = hweight32(adev->aid_mask); adev->vcn.num_inst_per_aid = 1; - adev->vcn.num_vcn_inst = adev->vcn.num_inst_per_aid * adev->num_aid; + adev->vcn.num_vcn_inst = adev->vcn.num_inst_per_aid * num_inst; adev->jpeg.num_inst_per_aid = 1; - adev->jpeg.num_jpeg_inst = adev->jpeg.num_inst_per_aid * adev->num_aid; + adev->jpeg.num_jpeg_inst = adev->jpeg.num_inst_per_aid * num_inst; ret = aqua_vanjaram_xcp_mgr_init(adev); if (ret) diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c index 0792c48fe347..b3f64f2f306d 100644 --- a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c @@ -1674,6 +1674,7 @@ static int gmc_v9_0_sw_init(void *handle) { int r, vram_width = 0, vram_type = 0, vram_vendor = 0, dma_addr_bits; struct amdgpu_device *adev = (struct amdgpu_device *)handle; + unsigned long inst_mask = adev->aid_mask; adev->gfxhub.funcs->init(adev); @@ -1757,7 +1758,9 @@ static int gmc_v9_0_sw_init(void *handle) case IP_VERSION(9, 4, 3): bitmap_set(adev->vmhubs_mask, AMDGPU_GFXHUB(0), NUM_XCC(adev->gfx.xcc_mask)); - bitmap_set(adev->vmhubs_mask, AMDGPU_MMHUB0(0), adev->num_aid); + + inst_mask <<= AMDGPU_MMHUB0(0); + bitmap_or(adev->vmhubs_mask, adev->vmhubs_mask, &inst_mask, 32); amdgpu_vm_adjust_size(adev, 256 * 1024, 9, 3, 48); break; diff --git a/drivers/gpu/drm/amd/amdgpu/mmhub_v1_8.c b/drivers/gpu/drm/amd/amdgpu/mmhub_v1_8.c index 6f469b9aa9a0..a530e2a3cc28 100644 --- a/drivers/gpu/drm/amd/amdgpu/mmhub_v1_8.c +++ b/drivers/gpu/drm/amd/amdgpu/mmhub_v1_8.c @@ -54,9 +54,11 @@ static void mmhub_v1_8_setup_vm_pt_regs(struct amdgpu_device *adev, uint32_t vmi uint64_t page_table_base) { struct amdgpu_vmhub *hub; + u32 inst_mask; int i; - for (i = 0; i < adev->num_aid; i++) { + inst_mask = adev->aid_mask; + for_each_inst(i, inst_mask) { hub = &adev->vmhub[AMDGPU_MMHUB0(i)]; WREG32_SOC15_OFFSET(MMHUB, i, regVM_CONTEXT0_PAGE_TABLE_BASE_ADDR_LO32, @@ -73,6 +75,7 @@ static void mmhub_v1_8_setup_vm_pt_regs(struct amdgpu_device *adev, uint32_t vmi static void mmhub_v1_8_init_gart_aperture_regs(struct amdgpu_device *adev) { uint64_t pt_base; + u32 inst_mask; int i; if (adev->gmc.pdb0_bo) @@ -85,7 +88,8 @@ static void mmhub_v1_8_init_gart_aperture_regs(struct amdgpu_device *adev) /* If use GART for FB translation, vmid0 page table covers both * vram and system memory (gart) */ - for (i = 0; i < adev->num_aid; i++) { + inst_mask = adev->aid_mask; + for_each_inst(i, inst_mask) { if (adev->gmc.pdb0_bo) { WREG32_SOC15(MMHUB, i, regVM_CONTEXT0_PAGE_TABLE_START_ADDR_LO32, @@ -121,11 +125,12 @@ static void mmhub_v1_8_init_gart_aperture_regs(struct amdgpu_device *adev) static void mmhub_v1_8_init_system_aperture_regs(struct amdgpu_device *adev) { + uint32_t tmp, inst_mask; uint64_t value; - uint32_t tmp; int i; - for (i = 0; i < adev->num_aid; i++) { + inst_mask = adev->aid_mask; + for_each_inst(i, inst_mask) { /* Program the AGP BAR */ WREG32_SOC15(MMHUB, i, regMC_VM_AGP_BASE, 0); WREG32_SOC15(MMHUB, i, regMC_VM_AGP_BOT, @@ -183,11 +188,12 @@ static void mmhub_v1_8_init_system_aperture_regs(struct amdgpu_device *adev) static void mmhub_v1_8_init_tlb_regs(struct amdgpu_device *adev) { - uint32_t tmp; + uint32_t tmp, inst_mask; int i; /* Setup TLB control */ - for (i = 0; i < adev->num_aid; i++) { + inst_mask = adev->aid_mask; + for_each_inst(i, inst_mask) { tmp = RREG32_SOC15(MMHUB, i, regMC_VM_MX_L1_TLB_CNTL); tmp = REG_SET_FIELD(tmp, MC_VM_MX_L1_TLB_CNTL, ENABLE_L1_TLB, @@ -208,14 +214,15 @@ static void mmhub_v1_8_init_tlb_regs(struct amdgpu_device *adev) static void mmhub_v1_8_init_cache_regs(struct amdgpu_device *adev) { - uint32_t tmp; + uint32_t tmp, inst_mask; int i; if (amdgpu_sriov_vf(adev)) return; /* Setup L2 cache */ - for (i = 0; i < adev->num_aid; i++) { + inst_mask = adev->aid_mask; + for_each_inst(i, inst_mask) { tmp = RREG32_SOC15(MMHUB, i, regVM_L2_CNTL); tmp = REG_SET_FIELD(tmp, VM_L2_CNTL, ENABLE_L2_CACHE, 1); tmp = REG_SET_FIELD(tmp, VM_L2_CNTL, @@ -267,10 +274,11 @@ static void mmhub_v1_8_init_cache_regs(struct amdgpu_device *adev) static void mmhub_v1_8_enable_system_domain(struct amdgpu_device *adev) { - uint32_t tmp; + uint32_t tmp, inst_mask; int i; - for (i = 0; i < adev->num_aid; i++) { + inst_mask = adev->aid_mask; + for_each_inst(i, inst_mask) { tmp = RREG32_SOC15(MMHUB, i, regVM_CONTEXT0_CNTL); tmp = REG_SET_FIELD(tmp, VM_CONTEXT0_CNTL, ENABLE_CONTEXT, 1); tmp = REG_SET_FIELD(tmp, VM_CONTEXT0_CNTL, PAGE_TABLE_DEPTH, @@ -286,12 +294,14 @@ static void mmhub_v1_8_enable_system_domain(struct amdgpu_device *adev) static void mmhub_v1_8_disable_identity_aperture(struct amdgpu_device *adev) { + u32 inst_mask; int i; if (amdgpu_sriov_vf(adev)) return; - for (i = 0; i < adev->num_aid; i++) { + inst_mask = adev->aid_mask; + for_each_inst(i, inst_mask) { WREG32_SOC15(MMHUB, i, regVM_L2_CONTEXT1_IDENTITY_APERTURE_LOW_ADDR_LO32, 0XFFFFFFFF); @@ -317,7 +327,7 @@ static void mmhub_v1_8_setup_vmid_config(struct amdgpu_device *adev) { struct amdgpu_vmhub *hub; unsigned num_level, block_size; - uint32_t tmp; + uint32_t tmp, inst_mask; int i, j; num_level = adev->vm_manager.num_level; @@ -327,7 +337,8 @@ static void mmhub_v1_8_setup_vmid_config(struct amdgpu_device *adev) else block_size -= 9; - for (j = 0; j < adev->num_aid; j++) { + inst_mask = adev->aid_mask; + for_each_inst(j, inst_mask) { hub = &adev->vmhub[AMDGPU_MMHUB0(j)]; for (i = 0; i <= 14; i++) { tmp = RREG32_SOC15_OFFSET(MMHUB, j, regVM_CONTEXT1_CNTL, @@ -382,9 +393,10 @@ static void mmhub_v1_8_setup_vmid_config(struct amdgpu_device *adev) static void mmhub_v1_8_program_invalidation(struct amdgpu_device *adev) { struct amdgpu_vmhub *hub; - unsigned i, j; + u32 i, j, inst_mask; - for (j = 0; j < adev->num_aid; j++) { + inst_mask = adev->aid_mask; + for_each_inst(j, inst_mask) { hub = &adev->vmhub[AMDGPU_MMHUB0(j)]; for (i = 0; i < 18; ++i) { WREG32_SOC15_OFFSET(MMHUB, j, @@ -429,10 +441,11 @@ static void mmhub_v1_8_gart_disable(struct amdgpu_device *adev) { struct amdgpu_vmhub *hub; u32 tmp; - u32 i, j; + u32 i, j, inst_mask; /* Disable all tables */ - for (j = 0; j < adev->num_aid; j++) { + inst_mask = adev->aid_mask; + for_each_inst(j, inst_mask) { hub = &adev->vmhub[AMDGPU_MMHUB0(j)]; for (i = 0; i < 16; i++) WREG32_SOC15_OFFSET(MMHUB, j, regVM_CONTEXT0_CNTL, @@ -465,13 +478,14 @@ static void mmhub_v1_8_gart_disable(struct amdgpu_device *adev) */ static void mmhub_v1_8_set_fault_enable_default(struct amdgpu_device *adev, bool value) { - u32 tmp; + u32 tmp, inst_mask; int i; if (amdgpu_sriov_vf(adev)) return; - for (i = 0; i < adev->num_aid; i++) { + inst_mask = adev->aid_mask; + for_each_inst(i, inst_mask) { tmp = RREG32_SOC15(MMHUB, i, regVM_L2_PROTECTION_FAULT_CNTL); tmp = REG_SET_FIELD(tmp, VM_L2_PROTECTION_FAULT_CNTL, RANGE_PROTECTION_FAULT_ENABLE_DEFAULT, value); @@ -510,9 +524,11 @@ static void mmhub_v1_8_set_fault_enable_default(struct amdgpu_device *adev, bool static void mmhub_v1_8_init(struct amdgpu_device *adev) { struct amdgpu_vmhub *hub; + u32 inst_mask; int i; - for (i = 0; i < adev->num_aid; i++) { + inst_mask = adev->aid_mask; + for_each_inst(i, inst_mask) { hub = &adev->vmhub[AMDGPU_MMHUB0(i)]; hub->ctx0_ptb_addr_lo32 = SOC15_REG_OFFSET(MMHUB, i, -- cgit From 753b999afe47900531282f86bf430aec250b4232 Mon Sep 17 00:00:00 2001 From: Rajneesh Bhardwaj Date: Wed, 7 Dec 2022 00:29:40 -0500 Subject: drm/amdgpu: set MTYPE in PTE for GFXIP 9.4.3 Apply the GFXIP 9.4.3 specific snoop and mtype settings for various scenarios such as APU, APU in Carveout mode and dGPU mode. Note: This is expected to change due to: 1 - NPS > 1 support in future 2 - Hardware bugs found during initial asic bringup. Cc: Graham Sider Cc: Hawking Zhang Signed-off-by: Rajneesh Bhardwaj Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c | 40 ++++++++++++++++++++++++++++++++++- 1 file changed, 39 insertions(+), 1 deletion(-) (limited to 'drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c') diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c index b3f64f2f306d..3765178e6fc5 100644 --- a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c @@ -1173,7 +1173,6 @@ static void gmc_v9_0_get_coherence_flags(struct amdgpu_device *adev, switch (adev->ip_versions[GC_HWIP][0]) { case IP_VERSION(9, 4, 1): case IP_VERSION(9, 4, 2): - case IP_VERSION(9, 4, 3): if (is_vram) { if (bo_adev == adev) { if (uncached) @@ -1207,6 +1206,45 @@ static void gmc_v9_0_get_coherence_flags(struct amdgpu_device *adev, */ snoop = true; } + break; + case IP_VERSION(9, 4, 3): + /* FIXME: Needs more work for handling multiple memory + * partitions (> NPS1 mode) e.g. NPS4 for both APU and dGPU + * modes. + */ + snoop = true; + if (uncached) { + mtype = MTYPE_UC; + } else if (adev->gmc.is_app_apu) { + /* FIXME: APU in native mode, NPS1 single socket only + * + * For suporting NUMA partitioned APU e.g. in NPS4 mode, + * this need to look at the NUMA node on which the + * system memory allocation was done. + * + * Memory access by a different partition within same + * socket should be treated as remote access so MTYPE_RW + * cannot be used always. + */ + mtype = MTYPE_RW; + } else if (adev->flags & AMD_IS_APU) { + /* APU on carve out mode */ + mtype = MTYPE_RW; + } else { + /* dGPU */ + /* + if ((mem->alloc_flags & KFD_IOC_ALLOC_MEM_FLAGS_VRAM) && + bo_adev == adev) + mapping_flags |= AMDGPU_VM_MTYPE_RW; + else + */ + /* Temporarily comment out above lines and use MTYPE_NC + * on both VRAM and system memory access until + * MTYPE_RW can properly work on VRAM access + */ + mtype = MTYPE_NC; + } + break; default: if (uncached || coherent) -- cgit From 98b2e9cad2279132e3aa4b9caf9164b2e35c1a52 Mon Sep 17 00:00:00 2001 From: Le Ma Date: Fri, 9 Dec 2022 19:44:05 +0800 Subject: drm/amdgpu: correct the vmhub index when page fault occurs The AMDGPU_GFXHUB was bind to each xcc in the logical order. Thus convert the node_id to logical xcc_id to index the correct AMDGPU_GFXHUB. And "node_id / 4" can get the correct AMDGPU_MMHUB0 index. Signed-off-by: Le Ma Tested-by: Asad kamal Reviewed-by: Hawking Zhang Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h | 1 + drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c | 27 ++++++++++++++------------- drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c | 14 ++++++++++---- 3 files changed, 25 insertions(+), 17 deletions(-) (limited to 'drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c') diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h index 2287768ed141..81b4c7e684af 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h @@ -280,6 +280,7 @@ struct amdgpu_gfx_funcs { (*query_mem_partition_mode)(struct amdgpu_device *adev); int (*switch_partition_mode)(struct amdgpu_device *adev, int num_xccs_per_xcp); + int (*ih_node_to_logical_xcc)(struct amdgpu_device *adev, int ih_node); }; struct sq_work { diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c b/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c index ef552c9b19b5..6aaa810ea044 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c @@ -637,6 +637,19 @@ static int gfx_v9_4_3_switch_compute_partition(struct amdgpu_device *adev, return 0; } +static int gfx_v9_4_3_ih_to_xcc_inst(struct amdgpu_device *adev, int ih_node) +{ + int xcc; + + xcc = hweight8(adev->gfx.xcc_mask & GENMASK(ih_node / 2, 0)); + if (!xcc) { + dev_err(adev->dev, "Couldn't find xcc mapping from IH node"); + return -EINVAL; + } + + return xcc - 1; +} + static const struct amdgpu_gfx_funcs gfx_v9_4_3_gfx_funcs = { .get_gpu_clock_counter = &gfx_v9_4_3_get_gpu_clock_counter, .select_se_sh = &gfx_v9_4_3_xcc_select_se_sh, @@ -646,6 +659,7 @@ static const struct amdgpu_gfx_funcs gfx_v9_4_3_gfx_funcs = { .select_me_pipe_q = &gfx_v9_4_3_select_me_pipe_q, .switch_partition_mode = &gfx_v9_4_3_switch_compute_partition, .query_mem_partition_mode = &gfx_v9_4_3_query_memory_partition, + .ih_node_to_logical_xcc = &gfx_v9_4_3_ih_to_xcc_inst, }; static int gfx_v9_4_3_gpu_early_init(struct amdgpu_device *adev) @@ -2754,19 +2768,6 @@ static int gfx_v9_4_3_set_eop_interrupt_state(struct amdgpu_device *adev, return 0; } -static int gfx_v9_4_3_ih_to_xcc_inst(struct amdgpu_device *adev, int ih_node) -{ - int xcc; - - xcc = hweight8(adev->gfx.xcc_mask & GENMASK(ih_node / 2, 0)); - if (!xcc) { - dev_err(adev->dev, "Couldn't find xcc mapping from IH node"); - return -EINVAL; - } - - return xcc - 1; -} - static int gfx_v9_4_3_eop_irq(struct amdgpu_device *adev, struct amdgpu_irq_src *source, struct amdgpu_iv_entry *entry) diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c index 3765178e6fc5..841333148610 100644 --- a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c @@ -557,22 +557,28 @@ static int gmc_v9_0_process_interrupt(struct amdgpu_device *adev, u64 addr; uint32_t cam_index = 0; int ret; - uint32_t node_id; + uint32_t node_id, xcc_id = 0; - node_id = (adev->ip_versions[GC_HWIP][0] == IP_VERSION(9, 4, 3)) ? entry->node_id : 0; + node_id = entry->node_id; addr = (u64)entry->src_data[0] << 12; addr |= ((u64)entry->src_data[1] & 0xf) << 44; if (entry->client_id == SOC15_IH_CLIENTID_VMC) { hub_name = "mmhub0"; - hub = &adev->vmhub[AMDGPU_MMHUB0(0)]; + hub = &adev->vmhub[AMDGPU_MMHUB0(node_id / 4)]; } else if (entry->client_id == SOC15_IH_CLIENTID_VMC1) { hub_name = "mmhub1"; hub = &adev->vmhub[AMDGPU_MMHUB1(0)]; } else { hub_name = "gfxhub0"; - hub = &adev->vmhub[node_id/2]; + if (adev->gfx.funcs->ih_node_to_logical_xcc) { + xcc_id = adev->gfx.funcs->ih_node_to_logical_xcc(adev, + node_id); + if (xcc_id < 0) + xcc_id = 0; + } + hub = &adev->vmhub[xcc_id]; } if (retry_fault) { -- cgit From a0a0c69c05bff025abf49ec66b2bfb94aeabcc6e Mon Sep 17 00:00:00 2001 From: Lijo Lazar Date: Wed, 14 Dec 2022 10:28:50 +0530 Subject: drm/amdgpu: Fix semaphore release Use the right register for semaphore release during invalidation. Signed-off-by: Lijo Lazar Reviewed-by: Le Ma Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c') diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c index 841333148610..1e4364120845 100644 --- a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c @@ -915,9 +915,9 @@ static void gmc_v9_0_flush_gpu_tlb(struct amdgpu_device *adev, uint32_t vmid, * write with 0 means semaphore release */ if (vmhub >= AMDGPU_MMHUB0(0)) - WREG32_SOC15_IP_NO_KIQ(MMHUB, hub->vm_inv_eng0_req + hub->eng_distance * eng, inv_req); + WREG32_SOC15_IP_NO_KIQ(MMHUB, hub->vm_inv_eng0_sem + hub->eng_distance * eng, 0); else - WREG32_SOC15_IP_NO_KIQ(GC, hub->vm_inv_eng0_req + hub->eng_distance * eng, inv_req); + WREG32_SOC15_IP_NO_KIQ(GC, hub->vm_inv_eng0_sem + hub->eng_distance * eng, 0); } spin_unlock(&adev->gmc.invalidate_lock); -- cgit From 12c4d7edfb7238ded6c7a2584995d888b4d877ec Mon Sep 17 00:00:00 2001 From: Lijo Lazar Date: Thu, 15 Dec 2022 13:13:29 +0530 Subject: drm/amdgpu: Fix GFX 9.4.3 dma address capability MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit ASICs with GFX 9.4.3 support 48-bit addressing. Signed-off-by: Lijo Lazar Acked-by: Christian König Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c') diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c index 1e4364120845..444441c6b7e3 100644 --- a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c @@ -1846,7 +1846,7 @@ static int gmc_v9_0_sw_init(void *handle) */ adev->gmc.mc_mask = 0xffffffffffffULL; /* 48 bit MC */ - dma_addr_bits = adev->ip_versions[GC_HWIP][0] == IP_VERSION(9, 4, 2) ? 48:44; + dma_addr_bits = adev->ip_versions[GC_HWIP][0] >= IP_VERSION(9, 4, 2) ? 48:44; r = dma_set_mask_and_coherent(adev->dev, DMA_BIT_MASK(dma_addr_bits)); if (r) { printk(KERN_WARNING "amdgpu: No suitable DMA available.\n"); -- cgit From c9a502e981a961053f3f873b14677d95e804251e Mon Sep 17 00:00:00 2001 From: Felix Kuehling Date: Tue, 29 Nov 2022 12:45:26 -0500 Subject: drm/amdgpu: Allocate GART table in RAM for AMD APU Some AMD APUs may not have a dedicated VRAM. On such platforms the GART table should be allocated on the system memory. When real vram size is zero, place the GART table in system memory and create an SG BO to make it GPU accessible. v2: fix includes Reviewed-by: Felix Kuehling (rajneesh: removed set_memory_wc workaround) Signed-off-by: Rajneesh Bhardwaj Signed-off-by: Harish Kasiviswanathan Signed-off-by: Felix Kuehling Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_gart.c | 137 +++++++++++++++++++++++++++++++ drivers/gpu/drm/amd/amdgpu/amdgpu_gart.h | 2 + drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c | 23 ++++-- 3 files changed, 156 insertions(+), 6 deletions(-) (limited to 'drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c') diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gart.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_gart.c index 6b12f4a75fc3..a070adf30c88 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gart.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gart.c @@ -35,6 +35,7 @@ #endif #include "amdgpu.h" #include +#include /* * GART @@ -102,6 +103,142 @@ void amdgpu_gart_dummy_page_fini(struct amdgpu_device *adev) adev->dummy_page_addr = 0; } +/** + * amdgpu_gart_table_ram_alloc - allocate system ram for gart page table + * + * @adev: amdgpu_device pointer + * + * Allocate system memory for GART page table for ASICs that don't have + * dedicated VRAM. + * Returns 0 for success, error for failure. + */ +int amdgpu_gart_table_ram_alloc(struct amdgpu_device *adev) +{ + unsigned int order = get_order(adev->gart.table_size); + gfp_t gfp_flags = GFP_KERNEL | __GFP_ZERO; + struct amdgpu_bo *bo = NULL; + struct sg_table *sg = NULL; + struct amdgpu_bo_param bp; + dma_addr_t dma_addr; + struct page *p; + int ret; + + if (adev->gart.bo != NULL) + return 0; + + p = alloc_pages(gfp_flags, order); + if (!p) + return -ENOMEM; + + /* If the hardware does not support UTCL2 snooping of the CPU caches + * then set_memory_wc() could be used as a workaround to mark the pages + * as write combine memory. + */ + dma_addr = dma_map_page(&adev->pdev->dev, p, 0, adev->gart.table_size, + DMA_BIDIRECTIONAL); + if (dma_mapping_error(&adev->pdev->dev, dma_addr)) { + dev_err(&adev->pdev->dev, "Failed to DMA MAP the GART BO page\n"); + __free_pages(p, order); + p = NULL; + return -EFAULT; + } + + dev_info(adev->dev, "%s dma_addr:%llx\n", __func__, dma_addr); + /* Create SG table */ + sg = kmalloc(sizeof(*sg), GFP_KERNEL); + if (!sg) { + ret = -ENOMEM; + goto error; + } + ret = sg_alloc_table(sg, 1, GFP_KERNEL); + if (ret) + goto error; + + sg_dma_address(sg->sgl) = dma_addr; + sg->sgl->length = adev->gart.table_size; +#ifdef CONFIG_NEED_SG_DMA_LENGTH + sg->sgl->dma_length = adev->gart.table_size; +#endif + /* Create SG BO */ + memset(&bp, 0, sizeof(bp)); + bp.size = adev->gart.table_size; + bp.byte_align = PAGE_SIZE; + bp.domain = AMDGPU_GEM_DOMAIN_CPU; + bp.type = ttm_bo_type_sg; + bp.resv = NULL; + bp.bo_ptr_size = sizeof(struct amdgpu_bo); + bp.flags = 0; + ret = amdgpu_bo_create(adev, &bp, &bo); + if (ret) + goto error; + + bo->tbo.sg = sg; + bo->tbo.ttm->sg = sg; + bo->allowed_domains = AMDGPU_GEM_DOMAIN_GTT; + bo->preferred_domains = AMDGPU_GEM_DOMAIN_GTT; + + ret = amdgpu_bo_reserve(bo, true); + if (ret) { + dev_err(adev->dev, "(%d) failed to reserve bo for GART system bo\n", ret); + goto error; + } + + ret = amdgpu_bo_pin(bo, AMDGPU_GEM_DOMAIN_GTT); + WARN(ret, "Pinning the GART table failed"); + if (ret) + goto error_resv; + + adev->gart.bo = bo; + adev->gart.ptr = page_to_virt(p); + /* Make GART table accessible in VMID0 */ + ret = amdgpu_ttm_alloc_gart(&adev->gart.bo->tbo); + if (ret) + amdgpu_gart_table_ram_free(adev); + amdgpu_bo_unreserve(bo); + + return 0; + +error_resv: + amdgpu_bo_unreserve(bo); +error: + amdgpu_bo_unref(&bo); + if (sg) { + sg_free_table(sg); + kfree(sg); + } + __free_pages(p, order); + return ret; +} + +/** + * amdgpu_gart_table_ram_free - free gart page table system ram + * + * @adev: amdgpu_device pointer + * + * Free the system memory used for the GART page tableon ASICs that don't + * have dedicated VRAM. + */ +void amdgpu_gart_table_ram_free(struct amdgpu_device *adev) +{ + unsigned int order = get_order(adev->gart.table_size); + struct sg_table *sg = adev->gart.bo->tbo.sg; + struct page *p; + int ret; + + ret = amdgpu_bo_reserve(adev->gart.bo, false); + if (!ret) { + amdgpu_bo_unpin(adev->gart.bo); + amdgpu_bo_unreserve(adev->gart.bo); + } + amdgpu_bo_unref(&adev->gart.bo); + sg_free_table(sg); + kfree(sg); + p = virt_to_page(adev->gart.ptr); + __free_pages(p, order); + + adev->gart.ptr = NULL; +} + /** * amdgpu_gart_table_vram_alloc - allocate vram for gart page table * diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gart.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_gart.h index 8fea3e04e411..8283d682f543 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gart.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gart.h @@ -51,6 +51,8 @@ struct amdgpu_gart { uint64_t gart_pte_flags; }; +int amdgpu_gart_table_ram_alloc(struct amdgpu_device *adev); +void amdgpu_gart_table_ram_free(struct amdgpu_device *adev); int amdgpu_gart_table_vram_alloc(struct amdgpu_device *adev); void amdgpu_gart_table_vram_free(struct amdgpu_device *adev); int amdgpu_gart_table_vram_pin(struct amdgpu_device *adev); diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c index 444441c6b7e3..aca8489635b8 100644 --- a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c @@ -1688,12 +1688,18 @@ static int gmc_v9_0_gart_init(struct amdgpu_device *adev) adev->gart.gart_pte_flags = AMDGPU_PTE_MTYPE_VG10(MTYPE_UC) | AMDGPU_PTE_EXECUTABLE; - r = amdgpu_gart_table_vram_alloc(adev); - if (r) - return r; + if (!adev->gmc.real_vram_size) { + dev_info(adev->dev, "Put GART in system memory for APU\n"); + r = amdgpu_gart_table_ram_alloc(adev); + if (r) + dev_err(adev->dev, "Failed to allocate GART in system memory\n"); + } else { + r = amdgpu_gart_table_vram_alloc(adev); + if (r) + return r; - if (adev->gmc.xgmi.connected_to_cpu) { - r = amdgpu_gmc_pdb0_alloc(adev); + if (adev->gmc.xgmi.connected_to_cpu) + r = amdgpu_gmc_pdb0_alloc(adev); } return r; @@ -1902,7 +1908,12 @@ static int gmc_v9_0_sw_fini(void *handle) amdgpu_gmc_ras_fini(adev); amdgpu_gem_force_release(adev); amdgpu_vm_manager_fini(adev); - amdgpu_gart_table_vram_free(adev); + if (!adev->gmc.real_vram_size) { + dev_info(adev->dev, "Put GART in system memory for APU free\n"); + amdgpu_gart_table_ram_free(adev); + } else { + amdgpu_gart_table_vram_free(adev); + } amdgpu_bo_free_kernel(&adev->gmc.pdb0_bo, NULL, &adev->gmc.ptr_pdb0); amdgpu_bo_fini(adev); -- cgit From d839a158b2480814bc438f9f46f440a7b9f63cb6 Mon Sep 17 00:00:00 2001 From: Graham Sider Date: Thu, 5 Jan 2023 10:58:07 -0500 Subject: drm/amdgpu: Correct dGPU MTYPE settings for gfx943 Revert temporary dGPU VRAM MTYPE setting and align with expected coherency protocol. Signed-off-by: Graham Sider Reviewed-by: Felix Kuehling Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c | 15 +++++---------- drivers/gpu/drm/amd/amdkfd/kfd_svm.c | 8 ++------ 2 files changed, 7 insertions(+), 16 deletions(-) (limited to 'drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c') diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c index aca8489635b8..b6c500be6f70 100644 --- a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c @@ -1238,17 +1238,12 @@ static void gmc_v9_0_get_coherence_flags(struct amdgpu_device *adev, mtype = MTYPE_RW; } else { /* dGPU */ - /* - if ((mem->alloc_flags & KFD_IOC_ALLOC_MEM_FLAGS_VRAM) && - bo_adev == adev) - mapping_flags |= AMDGPU_VM_MTYPE_RW; + if (is_vram && bo_adev == adev) + mtype = MTYPE_RW; + else if (is_vram) + mtype = MTYPE_NC; else - */ - /* Temporarily comment out above lines and use MTYPE_NC - * on both VRAM and system memory access until - * MTYPE_RW can properly work on VRAM access - */ - mtype = MTYPE_NC; + mtype = MTYPE_UC; } break; diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_svm.c b/drivers/gpu/drm/amd/amdkfd/kfd_svm.c index 6daba0582bf3..2b79849ddd30 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_svm.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_svm.c @@ -1198,13 +1198,9 @@ svm_range_get_pte_flags(struct kfd_node *node, if (uncached) { mapping_flags |= AMDGPU_VM_MTYPE_UC; } else if (domain == SVM_RANGE_VRAM_DOMAIN) { - /* local HBM region close to partition with a workaround - * for Endpoint systems. - */ + /* local HBM region close to partition */ if (bo_node == node) - mapping_flags |= - (node->adev->flags & AMD_IS_APU) ? - AMDGPU_VM_MTYPE_RW : AMDGPU_VM_MTYPE_NC; + mapping_flags |= AMDGPU_VM_MTYPE_RW; /* local HBM region far from partition or remote XGMI GPU */ else if (svm_nodes_in_same_hive(bo_node, node)) mapping_flags |= AMDGPU_VM_MTYPE_NC; -- cgit From 73c2b3fd2c515bcb819d801c5c4bf053fdb1e5cb Mon Sep 17 00:00:00 2001 From: Hawking Zhang Date: Sun, 22 Jan 2023 23:26:40 +0800 Subject: drm/amdgpu: Initialize mmhub v1_8 ras function Initialize mmhub v1_8 ras function. Signed-off-by: Hawking Zhang Reviewed-by: Tao Zhou Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c | 3 +++ drivers/gpu/drm/amd/amdgpu/mmhub_v1_8.c | 13 +++++++++++++ drivers/gpu/drm/amd/amdgpu/mmhub_v1_8.h | 1 + 3 files changed, 17 insertions(+) (limited to 'drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c') diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c index b6c500be6f70..16634a791e10 100644 --- a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c @@ -1419,6 +1419,9 @@ static void gmc_v9_0_set_mmhub_ras_funcs(struct amdgpu_device *adev) case IP_VERSION(9, 4, 2): adev->mmhub.ras = &mmhub_v1_7_ras; break; + case IP_VERSION(1, 8, 0): + adev->mmhub.ras = &mmhub_v1_8_ras; + break; default: /* mmhub ras is not available */ break; diff --git a/drivers/gpu/drm/amd/amdgpu/mmhub_v1_8.c b/drivers/gpu/drm/amd/amdgpu/mmhub_v1_8.c index 4f274c7db591..3648994724c2 100644 --- a/drivers/gpu/drm/amd/amdgpu/mmhub_v1_8.c +++ b/drivers/gpu/drm/amd/amdgpu/mmhub_v1_8.c @@ -847,3 +847,16 @@ static void mmhub_v1_8_reset_ras_error_status(struct amdgpu_device *adev) for_each_inst(i, inst_mask) mmhub_v1_8_inst_reset_ras_err_status(adev, i); } + +static const struct amdgpu_ras_block_hw_ops mmhub_v1_8_ras_hw_ops = { + .query_ras_error_count = mmhub_v1_8_query_ras_error_count, + .reset_ras_error_count = mmhub_v1_8_reset_ras_error_count, + .query_ras_error_status = mmhub_v1_8_query_ras_error_status, + .reset_ras_error_status = mmhub_v1_8_reset_ras_error_status, +}; + +struct amdgpu_mmhub_ras mmhub_v1_8_ras = { + .ras_block = { + .hw_ops = &mmhub_v1_8_ras_hw_ops, + }, +}; diff --git a/drivers/gpu/drm/amd/amdgpu/mmhub_v1_8.h b/drivers/gpu/drm/amd/amdgpu/mmhub_v1_8.h index 0bb36200e4e5..126f0075ac50 100644 --- a/drivers/gpu/drm/amd/amdgpu/mmhub_v1_8.h +++ b/drivers/gpu/drm/amd/amdgpu/mmhub_v1_8.h @@ -24,5 +24,6 @@ #define __MMHUB_V1_8_H__ extern const struct amdgpu_mmhub_funcs mmhub_v1_8_funcs; +extern struct amdgpu_mmhub_ras mmhub_v1_8_ras; #endif -- cgit From 228ce176434b0f61451019065393040d58e1668d Mon Sep 17 00:00:00 2001 From: Rajneesh Bhardwaj Date: Fri, 27 Jan 2023 21:57:00 -0500 Subject: drm/amdgpu: Handle VRAM dependencies on GFXIP9.4.3 [For 1P NPS1 mode driver bringup] Changes required to initialize the amdgpu driver with frontdoor firmware loading and discovery=2 with the native mode SBIOS that enables CPU GPU unified interleaved memory. sudo modprobe amdgpu discovery=2 Once PSP TMR region is reported via the ACPI interface, the dependency on the ip_discovery.bin will be removed. Choice of where to allocate driver table is given to each IP version. In general, both GTT and VRAM domains will be considered. If one of the tables has a strict restriction for VRAM domain, then only VRAM domain is considered. Reviewed-by: Felix Kuehling (lijo: Modified the handling for SMU Tables) Signed-off-by: Lijo Lazar Signed-off-by: Rajneesh Bhardwaj Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c | 3 +- drivers/gpu/drm/amd/amdgpu/amdgpu_object.c | 2 +- drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c | 6 +- drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c | 89 +++++++++++++--------- drivers/gpu/drm/amd/amdgpu/amdgpu_vm_pt.c | 7 +- drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c | 3 +- drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c | 9 ++- drivers/gpu/drm/amd/amdkfd/kfd_chardev.c | 6 ++ drivers/gpu/drm/amd/amdkfd/kfd_crat.c | 5 ++ drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c | 10 ++- .../gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_6_ppt.c | 6 +- 11 files changed, 99 insertions(+), 47 deletions(-) (limited to 'drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c') diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c index af37f2ef4438..4e179e50de25 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c @@ -2292,8 +2292,9 @@ int amdgpu_amdkfd_gpuvm_import_dmabuf(struct amdgpu_device *adev, (*mem)->dmabuf = dma_buf; (*mem)->bo = bo; (*mem)->va = va; - (*mem)->domain = (bo->preferred_domains & AMDGPU_GEM_DOMAIN_VRAM) ? + (*mem)->domain = (bo->preferred_domains & AMDGPU_GEM_DOMAIN_VRAM) && !adev->gmc.is_app_apu ? AMDGPU_GEM_DOMAIN_VRAM : AMDGPU_GEM_DOMAIN_GTT; + (*mem)->mapped_to_gpu_memory = 0; (*mem)->process_info = avm->process_info; add_kgd_mem_to_kfd_bo_list(*mem, avm->process_info, false); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c index 9f0d5f02119e..f431205e1077 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c @@ -1044,7 +1044,7 @@ static const char * const amdgpu_vram_names[] = { int amdgpu_bo_init(struct amdgpu_device *adev) { /* On A+A platform, VRAM can be mapped as WB */ - if (!adev->gmc.xgmi.connected_to_cpu) { + if (!adev->gmc.xgmi.connected_to_cpu && !adev->gmc.is_app_apu) { /* reserve PAT memory space to WC for VRAM */ int r = arch_io_reserve_memtype_wc(adev->gmc.aper_base, adev->gmc.aper_size); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c index 863fa331e6ff..4395c53d09d8 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c @@ -476,7 +476,8 @@ static int psp_sw_init(void *handle) return ret; ret = amdgpu_bo_create_kernel(adev, PSP_FENCE_BUFFER_SIZE, PAGE_SIZE, - AMDGPU_GEM_DOMAIN_VRAM, + AMDGPU_GEM_DOMAIN_VRAM | + AMDGPU_GEM_DOMAIN_GTT, &psp->fence_buf_bo, &psp->fence_buf_mc_addr, &psp->fence_buf); @@ -484,7 +485,8 @@ static int psp_sw_init(void *handle) goto failed1; ret = amdgpu_bo_create_kernel(adev, PSP_CMD_BUFFER_SIZE, PAGE_SIZE, - AMDGPU_GEM_DOMAIN_VRAM, + AMDGPU_GEM_DOMAIN_VRAM | + AMDGPU_GEM_DOMAIN_GTT, &psp->cmd_buf_bo, &psp->cmd_buf_mc_addr, (void **)&psp->cmd_buf_mem); if (ret) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c index 6bbe3b89aef5..bc11ae56bba5 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c @@ -1708,15 +1708,20 @@ static int amdgpu_ttm_reserve_tmr(struct amdgpu_device *adev) ctx->init = PSP_MEM_TRAIN_RESERVE_SUCCESS; } - ret = amdgpu_bo_create_kernel_at(adev, - adev->gmc.real_vram_size - adev->mman.discovery_tmr_size, - adev->mman.discovery_tmr_size, - &adev->mman.discovery_memory, - NULL); - if (ret) { - DRM_ERROR("alloc tmr failed(%d)!\n", ret); - amdgpu_bo_free_kernel(&adev->mman.discovery_memory, NULL, NULL); - return ret; + if (!adev->gmc.is_app_apu) { + ret = amdgpu_bo_create_kernel_at(adev, + adev->gmc.real_vram_size - + adev->mman.discovery_tmr_size, + adev->mman.discovery_tmr_size, + &adev->mman.discovery_memory, + NULL); + if (ret) { + DRM_ERROR("alloc tmr failed(%d)!\n", ret); + amdgpu_bo_free_kernel(&adev->mman.discovery_memory, NULL, NULL); + return ret; + } + } else { + DRM_DEBUG_DRIVER("backdoor fw loading path for PSP TMR, no reservation needed\n"); } return 0; @@ -1765,10 +1770,12 @@ int amdgpu_ttm_init(struct amdgpu_device *adev) adev->mman.aper_base_kaddr = ioremap_cache(adev->gmc.aper_base, adev->gmc.visible_vram_size); - else + else if (!adev->gmc.is_app_apu) #endif adev->mman.aper_base_kaddr = ioremap_wc(adev->gmc.aper_base, adev->gmc.visible_vram_size); + else + DRM_DEBUG_DRIVER("No need to ioremap when real vram size is 0\n"); #endif /* @@ -1803,23 +1810,32 @@ int amdgpu_ttm_init(struct amdgpu_device *adev) * This is used for VGA emulation and pre-OS scanout buffers to * avoid display artifacts while transitioning between pre-OS * and driver. */ - r = amdgpu_bo_create_kernel_at(adev, 0, adev->mman.stolen_vga_size, - &adev->mman.stolen_vga_memory, - NULL); - if (r) - return r; - r = amdgpu_bo_create_kernel_at(adev, adev->mman.stolen_vga_size, - adev->mman.stolen_extended_size, - &adev->mman.stolen_extended_memory, - NULL); - if (r) - return r; - r = amdgpu_bo_create_kernel_at(adev, adev->mman.stolen_reserved_offset, - adev->mman.stolen_reserved_size, - &adev->mman.stolen_reserved_memory, - NULL); - if (r) - return r; + if (!adev->gmc.is_app_apu) { + r = amdgpu_bo_create_kernel_at(adev, 0, + adev->mman.stolen_vga_size, + &adev->mman.stolen_vga_memory, + NULL); + if (r) + return r; + + r = amdgpu_bo_create_kernel_at(adev, adev->mman.stolen_vga_size, + adev->mman.stolen_extended_size, + &adev->mman.stolen_extended_memory, + NULL); + + if (r) + return r; + + r = amdgpu_bo_create_kernel_at(adev, + adev->mman.stolen_reserved_offset, + adev->mman.stolen_reserved_size, + &adev->mman.stolen_reserved_memory, + NULL); + if (r) + return r; + } else { + DRM_DEBUG_DRIVER("Skipped stolen memory reservation\n"); + } DRM_INFO("amdgpu: %uM of VRAM memory ready\n", (unsigned) (adev->gmc.real_vram_size / (1024 * 1024))); @@ -1866,7 +1882,6 @@ int amdgpu_ttm_init(struct amdgpu_device *adev) DRM_ERROR("Failed initializing oa heap.\n"); return r; } - if (amdgpu_bo_create_kernel(adev, PAGE_SIZE, PAGE_SIZE, AMDGPU_GEM_DOMAIN_GTT, &adev->mman.sdma_access_bo, NULL, @@ -1887,13 +1902,15 @@ void amdgpu_ttm_fini(struct amdgpu_device *adev) amdgpu_ttm_training_reserve_vram_fini(adev); /* return the stolen vga memory back to VRAM */ - amdgpu_bo_free_kernel(&adev->mman.stolen_vga_memory, NULL, NULL); - amdgpu_bo_free_kernel(&adev->mman.stolen_extended_memory, NULL, NULL); - /* return the IP Discovery TMR memory back to VRAM */ - amdgpu_bo_free_kernel(&adev->mman.discovery_memory, NULL, NULL); - if (adev->mman.stolen_reserved_size) - amdgpu_bo_free_kernel(&adev->mman.stolen_reserved_memory, - NULL, NULL); + if (!adev->gmc.is_app_apu) { + amdgpu_bo_free_kernel(&adev->mman.stolen_vga_memory, NULL, NULL); + amdgpu_bo_free_kernel(&adev->mman.stolen_extended_memory, NULL, NULL); + /* return the IP Discovery TMR memory back to VRAM */ + amdgpu_bo_free_kernel(&adev->mman.discovery_memory, NULL, NULL); + if (adev->mman.stolen_reserved_size) + amdgpu_bo_free_kernel(&adev->mman.stolen_reserved_memory, + NULL, NULL); + } amdgpu_bo_free_kernel(&adev->mman.sdma_access_bo, NULL, &adev->mman.sdma_access_ptr); amdgpu_ttm_fw_reserve_vram_fini(adev); @@ -1935,7 +1952,7 @@ void amdgpu_ttm_set_buffer_funcs_status(struct amdgpu_device *adev, bool enable) int r; if (!adev->mman.initialized || amdgpu_in_reset(adev) || - adev->mman.buffer_funcs_enabled == enable) + adev->mman.buffer_funcs_enabled == enable || adev->gmc.is_app_apu) return; if (enable) { diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm_pt.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm_pt.c index df63dc3bca18..bc5d126b600b 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm_pt.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm_pt.c @@ -512,7 +512,12 @@ int amdgpu_vm_pt_create(struct amdgpu_device *adev, struct amdgpu_vm *vm, bp.size = amdgpu_vm_pt_size(adev, level); bp.byte_align = AMDGPU_GPU_PAGE_SIZE; - bp.domain = AMDGPU_GEM_DOMAIN_VRAM; + + if (!adev->gmc.is_app_apu) + bp.domain = AMDGPU_GEM_DOMAIN_VRAM; + else + bp.domain = AMDGPU_GEM_DOMAIN_GTT; + bp.domain = amdgpu_bo_get_preferred_domain(adev, bp.domain); bp.flags = AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS | AMDGPU_GEM_CREATE_CPU_GTT_USWC; diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c b/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c index 1f1268cd5e09..42877c4505f1 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c @@ -459,7 +459,8 @@ static int gfx_v9_4_3_mec_init(struct amdgpu_device *adev) adev->gfx.num_compute_rings * num_xcc * GFX9_MEC_HPD_SIZE; if (mec_hpd_size) { r = amdgpu_bo_create_reserved(adev, mec_hpd_size, PAGE_SIZE, - AMDGPU_GEM_DOMAIN_VRAM, + AMDGPU_GEM_DOMAIN_VRAM | + AMDGPU_GEM_DOMAIN_GTT, &adev->gfx.mec.hpd_eop_obj, &adev->gfx.mec.hpd_eop_gpu_addr, (void **)&hpd); diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c index 16634a791e10..245de27c7540 100644 --- a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c @@ -1593,8 +1593,13 @@ static int gmc_v9_0_mc_init(struct amdgpu_device *adev) int r; /* size in MB on si */ - adev->gmc.mc_vram_size = - adev->nbio.funcs->get_memsize(adev) * 1024ULL * 1024ULL; + if (!adev->gmc.is_app_apu) { + adev->gmc.mc_vram_size = + adev->nbio.funcs->get_memsize(adev) * 1024ULL * 1024ULL; + } else { + DRM_DEBUG("Set mc_vram_size = 0 for APP APU\n"); + adev->gmc.mc_vram_size = 0; + } adev->gmc.real_vram_size = adev->gmc.mc_vram_size; if (!(adev->flags & AMD_IS_APU) && diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c index 8b9accecf49b..f85ac4dbc673 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c @@ -1026,6 +1026,12 @@ bool kfd_dev_is_large_bar(struct kfd_node *dev) if (dev->kfd->local_mem_info.local_mem_size_private == 0 && dev->kfd->local_mem_info.local_mem_size_public > 0) return true; + + if (dev->kfd->local_mem_info.local_mem_size_public == 0 && dev->kfd->adev->gmc.is_app_apu) { + pr_debug("APP APU, Consider like a large bar system\n"); + return true; + } + return false; } diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_crat.c b/drivers/gpu/drm/amd/amdkfd/kfd_crat.c index 16475921587b..1aaf933f9f48 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_crat.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_crat.c @@ -30,6 +30,9 @@ #include "amdgpu.h" #include "amdgpu_amdkfd.h" +/* Fixme: Fake 32GB for 1PNPS1 mode bringup */ +#define DUMMY_VRAM_SIZE 31138512896 + /* GPU Processor ID base for dGPUs for which VCRAT needs to be created. * GPU processor ID are expressed with Bit[31]=1. * The base is set to 0x8000_0000 + 0x1000 to avoid collision with GPU IDs @@ -1053,6 +1056,8 @@ static int kfd_parse_subtype_mem(struct crat_subtype_memory *mem, props->heap_type = heap_type; props->flags = flags; + if (size_in_bytes == 0) + size_in_bytes = DUMMY_VRAM_SIZE; /* Fixme: TBD */ props->size_in_bytes = size_in_bytes; props->width = width; diff --git a/drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c b/drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c index 2ddf5198e5c4..4dea79a0c5b5 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c +++ b/drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c @@ -822,11 +822,20 @@ static int smu_init_fb_allocations(struct smu_context *smu) } } + driver_table->domain = AMDGPU_GEM_DOMAIN_VRAM | AMDGPU_GEM_DOMAIN_GTT; /* VRAM allocation for driver table */ for (i = 0; i < SMU_TABLE_COUNT; i++) { if (tables[i].size == 0) continue; + /* If one of the tables has VRAM domain restriction, keep it in + * VRAM + */ + if ((tables[i].domain & + (AMDGPU_GEM_DOMAIN_VRAM | AMDGPU_GEM_DOMAIN_GTT)) == + AMDGPU_GEM_DOMAIN_VRAM) + driver_table->domain = AMDGPU_GEM_DOMAIN_VRAM; + if (i == SMU_TABLE_PMSTATUSLOG) continue; @@ -836,7 +845,6 @@ static int smu_init_fb_allocations(struct smu_context *smu) driver_table->size = max_table_size; driver_table->align = PAGE_SIZE; - driver_table->domain = AMDGPU_GEM_DOMAIN_VRAM; ret = amdgpu_bo_create_kernel(adev, driver_table->size, diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_6_ppt.c b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_6_ppt.c index ea8f3d6fb98b..8969b3ff5c8f 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_6_ppt.c +++ b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_6_ppt.c @@ -220,10 +220,12 @@ static int smu_v13_0_6_tables_init(struct smu_context *smu) PAGE_SIZE, AMDGPU_GEM_DOMAIN_VRAM); SMU_TABLE_INIT(tables, SMU_TABLE_SMU_METRICS, sizeof(MetricsTable_t), - PAGE_SIZE, AMDGPU_GEM_DOMAIN_VRAM); + PAGE_SIZE, + AMDGPU_GEM_DOMAIN_VRAM | AMDGPU_GEM_DOMAIN_GTT); SMU_TABLE_INIT(tables, SMU_TABLE_I2C_COMMANDS, sizeof(SwI2cRequest_t), - PAGE_SIZE, AMDGPU_GEM_DOMAIN_VRAM); + PAGE_SIZE, + AMDGPU_GEM_DOMAIN_VRAM | AMDGPU_GEM_DOMAIN_GTT); smu_table->metrics_table = kzalloc(sizeof(MetricsTable_t), GFP_KERNEL); if (!smu_table->metrics_table) -- cgit From b6f90baafe267a0705c5d9b1429c875d3c39fbc7 Mon Sep 17 00:00:00 2001 From: Lijo Lazar Date: Tue, 31 Jan 2023 12:39:49 +0530 Subject: drm/amdgpu: Move memory partition query to gmc GMC block handles memory related information, it makes more sense to keep memory partition functions in gmc block. Signed-off-by: Lijo Lazar Reviewed-by: Le Ma Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c | 30 +--------------------- drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h | 11 --------- drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c | 44 +++++++++++++++++++++++++++++++++ drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.h | 16 ++++++++++++ drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c | 10 -------- drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c | 18 ++++++++++++++ 6 files changed, 79 insertions(+), 50 deletions(-) (limited to 'drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c') diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c index 1487ecac2705..2f7a101593e7 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c @@ -1204,24 +1204,6 @@ static ssize_t amdgpu_gfx_get_current_compute_partition(struct device *dev, return sysfs_emit(buf, "%s\n", partition_mode); } -static ssize_t amdgpu_gfx_get_current_memory_partition(struct device *dev, - struct device_attribute *addr, - char *buf) -{ - struct drm_device *ddev = dev_get_drvdata(dev); - struct amdgpu_device *adev = drm_to_adev(ddev); - enum amdgpu_memory_partition mode; - static const char *partition_modes[] = { - "UNKNOWN", "NPS1", "NPS2", "NPS4", "NPS8" - }; - BUILD_BUG_ON(ARRAY_SIZE(partition_modes) <= AMDGPU_NPS8_PARTITION_MODE); - - mode = min((int)adev->gfx.funcs->query_mem_partition_mode(adev), - AMDGPU_NPS8_PARTITION_MODE); - - return sysfs_emit(buf, "%s\n", partition_modes[mode]); -} - static ssize_t amdgpu_gfx_set_compute_partition(struct device *dev, struct device_attribute *addr, const char *buf, size_t count) @@ -1305,9 +1287,6 @@ static DEVICE_ATTR(current_compute_partition, S_IRUGO | S_IWUSR, static DEVICE_ATTR(available_compute_partition, S_IRUGO, amdgpu_gfx_get_available_compute_partition, NULL); -static DEVICE_ATTR(current_memory_partition, S_IRUGO, - amdgpu_gfx_get_current_memory_partition, NULL); - int amdgpu_gfx_sysfs_init(struct amdgpu_device *adev) { int r; @@ -1317,19 +1296,12 @@ int amdgpu_gfx_sysfs_init(struct amdgpu_device *adev) return r; r = device_create_file(adev->dev, &dev_attr_available_compute_partition); - if (r) - return r; - r = device_create_file(adev->dev, &dev_attr_current_memory_partition); - if (r) - return r; - - return 0; + return r; } void amdgpu_gfx_sysfs_fini(struct amdgpu_device *adev) { device_remove_file(adev->dev, &dev_attr_current_compute_partition); device_remove_file(adev->dev, &dev_attr_available_compute_partition); - device_remove_file(adev->dev, &dev_attr_current_memory_partition); } diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h index 81b4c7e684af..728977f8afe7 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h @@ -71,14 +71,6 @@ enum amdgpu_pkg_type { AMDGPU_PKG_TYPE_UNKNOWN, }; -enum amdgpu_memory_partition { - UNKNOWN_MEMORY_PARTITION_MODE = 0, - AMDGPU_NPS1_PARTITION_MODE = 1, - AMDGPU_NPS2_PARTITION_MODE = 2, - AMDGPU_NPS4_PARTITION_MODE = 3, - AMDGPU_NPS8_PARTITION_MODE = 4, -}; - struct amdgpu_mec { struct amdgpu_bo *hpd_eop_obj; u64 hpd_eop_gpu_addr; @@ -276,8 +268,6 @@ struct amdgpu_gfx_funcs { struct amdgpu_gfx_shadow_info *shadow_info); enum amdgpu_gfx_partition (*query_partition_mode)(struct amdgpu_device *adev); - enum amdgpu_memory_partition - (*query_mem_partition_mode)(struct amdgpu_device *adev); int (*switch_partition_mode)(struct amdgpu_device *adev, int num_xccs_per_xcp); int (*ih_node_to_logical_xcc)(struct amdgpu_device *adev, int ih_node); @@ -414,7 +404,6 @@ struct amdgpu_gfx { bool cp_gfx_shadow; /* for gfx11 */ uint16_t xcc_mask; - enum amdgpu_memory_partition mem_partition_mode; uint32_t num_xcc_per_xcp; struct mutex partition_mutex; }; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c index b8825a0670a4..d12625f1de5a 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c @@ -891,3 +891,47 @@ int amdgpu_gmc_vram_checking(struct amdgpu_device *adev) return 0; } + +static ssize_t current_memory_partition_show( + struct device *dev, struct device_attribute *addr, char *buf) +{ + struct drm_device *ddev = dev_get_drvdata(dev); + struct amdgpu_device *adev = drm_to_adev(ddev); + enum amdgpu_memory_partition mode; + + mode = adev->gmc.gmc_funcs->query_mem_partition_mode(adev); + switch (mode) { + case AMDGPU_NPS1_PARTITION_MODE: + return sysfs_emit(buf, "NPS1\n"); + case AMDGPU_NPS2_PARTITION_MODE: + return sysfs_emit(buf, "NPS2\n"); + case AMDGPU_NPS3_PARTITION_MODE: + return sysfs_emit(buf, "NPS3\n"); + case AMDGPU_NPS4_PARTITION_MODE: + return sysfs_emit(buf, "NPS4\n"); + case AMDGPU_NPS6_PARTITION_MODE: + return sysfs_emit(buf, "NPS6\n"); + case AMDGPU_NPS8_PARTITION_MODE: + return sysfs_emit(buf, "NPS8\n"); + default: + return sysfs_emit(buf, "UNKNOWN\n"); + } + + return sysfs_emit(buf, "UNKNOWN\n"); +} + +static DEVICE_ATTR_RO(current_memory_partition); + +int amdgpu_gmc_sysfs_init(struct amdgpu_device *adev) +{ + if (!adev->gmc.gmc_funcs->query_mem_partition_mode) + return 0; + + return device_create_file(adev->dev, + &dev_attr_current_memory_partition); +} + +void amdgpu_gmc_sysfs_fini(struct amdgpu_device *adev) +{ + device_remove_file(adev->dev, &dev_attr_current_memory_partition); +} diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.h index e408abfc2daf..2bd3b9665ebf 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.h @@ -63,6 +63,16 @@ struct firmware; +enum amdgpu_memory_partition { + UNKNOWN_MEMORY_PARTITION_MODE = 0, + AMDGPU_NPS1_PARTITION_MODE = 1, + AMDGPU_NPS2_PARTITION_MODE = 2, + AMDGPU_NPS3_PARTITION_MODE = 3, + AMDGPU_NPS4_PARTITION_MODE = 4, + AMDGPU_NPS6_PARTITION_MODE = 6, + AMDGPU_NPS8_PARTITION_MODE = 8, +}; + /* * GMC page fault information */ @@ -140,6 +150,9 @@ struct amdgpu_gmc_funcs { uint64_t *flags); /* get the amount of memory used by the vbios for pre-OS console */ unsigned int (*get_vbios_fb_size)(struct amdgpu_device *adev); + + enum amdgpu_memory_partition (*query_mem_partition_mode)( + struct amdgpu_device *adev); }; struct amdgpu_xgmi_ras { @@ -375,4 +388,7 @@ uint64_t amdgpu_gmc_vram_mc2pa(struct amdgpu_device *adev, uint64_t mc_addr); uint64_t amdgpu_gmc_vram_pa(struct amdgpu_device *adev, struct amdgpu_bo *bo); uint64_t amdgpu_gmc_vram_cpu_pa(struct amdgpu_device *adev, struct amdgpu_bo *bo); int amdgpu_gmc_vram_checking(struct amdgpu_device *adev); +int amdgpu_gmc_sysfs_init(struct amdgpu_device *adev); +void amdgpu_gmc_sysfs_fini(struct amdgpu_device *adev); + #endif diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c b/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c index 69867294117e..81ab3cd2f229 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c @@ -607,16 +607,7 @@ static void gfx_v9_4_3_select_me_pipe_q(struct amdgpu_device *adev, { soc15_grbm_select(adev, me, pipe, q, vm, GET_INST(GC, xcc_id)); } -static enum amdgpu_memory_partition -gfx_v9_4_3_query_memory_partition(struct amdgpu_device *adev) -{ - enum amdgpu_memory_partition mode = UNKNOWN_MEMORY_PARTITION_MODE; - - if (adev->nbio.funcs->get_memory_partition_mode) - mode = adev->nbio.funcs->get_memory_partition_mode(adev); - return mode; -} static int gfx_v9_4_3_switch_compute_partition(struct amdgpu_device *adev, int num_xccs_per_xcp) @@ -660,7 +651,6 @@ static const struct amdgpu_gfx_funcs gfx_v9_4_3_gfx_funcs = { .read_wave_vgprs = &gfx_v9_4_3_read_wave_vgprs, .select_me_pipe_q = &gfx_v9_4_3_select_me_pipe_q, .switch_partition_mode = &gfx_v9_4_3_switch_compute_partition, - .query_mem_partition_mode = &gfx_v9_4_3_query_memory_partition, .ih_node_to_logical_xcc = &gfx_v9_4_3_ih_to_xcc_inst, }; diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c index 245de27c7540..db157a31a780 100644 --- a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c @@ -1330,6 +1330,17 @@ static unsigned gmc_v9_0_get_vbios_fb_size(struct amdgpu_device *adev) return size; } +static enum amdgpu_memory_partition +gmc_v9_0_query_memory_partition(struct amdgpu_device *adev) +{ + enum amdgpu_memory_partition mode = UNKNOWN_MEMORY_PARTITION_MODE; + + if (adev->nbio.funcs->get_memory_partition_mode) + mode = adev->nbio.funcs->get_memory_partition_mode(adev); + + return mode; +} + static const struct amdgpu_gmc_funcs gmc_v9_0_gmc_funcs = { .flush_gpu_tlb = gmc_v9_0_flush_gpu_tlb, .flush_gpu_tlb_pasid = gmc_v9_0_flush_gpu_tlb_pasid, @@ -1339,6 +1350,7 @@ static const struct amdgpu_gmc_funcs gmc_v9_0_gmc_funcs = { .get_vm_pde = gmc_v9_0_get_vm_pde, .get_vm_pte = gmc_v9_0_get_vm_pte, .get_vbios_fb_size = gmc_v9_0_get_vbios_fb_size, + .query_mem_partition_mode = &gmc_v9_0_query_memory_partition, }; static void gmc_v9_0_set_gmc_funcs(struct amdgpu_device *adev) @@ -1901,6 +1913,9 @@ static int gmc_v9_0_sw_init(void *handle) if (r) return r; + if (adev->ip_versions[GC_HWIP][0] == IP_VERSION(9, 4, 3)) + amdgpu_gmc_sysfs_init(adev); + return 0; } @@ -1908,6 +1923,9 @@ static int gmc_v9_0_sw_fini(void *handle) { struct amdgpu_device *adev = (struct amdgpu_device *)handle; + if (adev->ip_versions[GC_HWIP][0] == IP_VERSION(9, 4, 3)) + amdgpu_gmc_sysfs_fini(adev); + amdgpu_gmc_ras_fini(adev); amdgpu_gem_force_release(adev); amdgpu_vm_manager_fini(adev); -- cgit From 0f2e1d620eca56c4ceebc041aabb1eda26b2cfd0 Mon Sep 17 00:00:00 2001 From: Lijo Lazar Date: Fri, 17 Feb 2023 09:32:44 +0530 Subject: drm/amdgpu: Get supported memory partition modes Expand the interface to get supported memory partition modes also along with the current memory partition mode. Signed-off-by: Lijo Lazar Reviewed-by: Le Ma Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_nbio.h | 3 ++- drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c | 11 +++++++++-- drivers/gpu/drm/amd/amdgpu/nbio_v7_9.c | 9 ++++++++- 3 files changed, 19 insertions(+), 4 deletions(-) (limited to 'drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c') diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_nbio.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_nbio.h index eb25ac98903f..095aecfb201e 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_nbio.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_nbio.h @@ -97,7 +97,8 @@ struct amdgpu_nbio_funcs { void (*clear_doorbell_interrupt)(struct amdgpu_device *adev); u32 (*get_rom_offset)(struct amdgpu_device *adev); int (*get_compute_partition_mode)(struct amdgpu_device *adev); - u32 (*get_memory_partition_mode)(struct amdgpu_device *adev); + u32 (*get_memory_partition_mode)(struct amdgpu_device *adev, + u32 *supp_modes); void (*set_compute_partition_mode)(struct amdgpu_device *adev, enum amdgpu_gfx_partition mode); }; diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c index db157a31a780..d6a1dac01952 100644 --- a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c @@ -1331,16 +1331,23 @@ static unsigned gmc_v9_0_get_vbios_fb_size(struct amdgpu_device *adev) } static enum amdgpu_memory_partition -gmc_v9_0_query_memory_partition(struct amdgpu_device *adev) +gmc_v9_0_get_memory_partition(struct amdgpu_device *adev, u32 *supp_modes) { enum amdgpu_memory_partition mode = UNKNOWN_MEMORY_PARTITION_MODE; if (adev->nbio.funcs->get_memory_partition_mode) - mode = adev->nbio.funcs->get_memory_partition_mode(adev); + mode = adev->nbio.funcs->get_memory_partition_mode(adev, + supp_modes); return mode; } +static enum amdgpu_memory_partition +gmc_v9_0_query_memory_partition(struct amdgpu_device *adev) +{ + return gmc_v9_0_get_memory_partition(adev, NULL); +} + static const struct amdgpu_gmc_funcs gmc_v9_0_gmc_funcs = { .flush_gpu_tlb = gmc_v9_0_flush_gpu_tlb, .flush_gpu_tlb_pasid = gmc_v9_0_flush_gpu_tlb_pasid, diff --git a/drivers/gpu/drm/amd/amdgpu/nbio_v7_9.c b/drivers/gpu/drm/amd/amdgpu/nbio_v7_9.c index fa4b423c399b..e1552d645308 100644 --- a/drivers/gpu/drm/amd/amdgpu/nbio_v7_9.c +++ b/drivers/gpu/drm/amd/amdgpu/nbio_v7_9.c @@ -408,12 +408,19 @@ static void nbio_v7_9_set_compute_partition_mode(struct amdgpu_device *adev, WREG32_SOC15(NBIO, 0, regBIF_BX_PF0_PARTITION_COMPUTE_STATUS, tmp); } -static enum amdgpu_memory_partition nbio_v7_9_get_memory_partition_mode(struct amdgpu_device *adev) +static enum amdgpu_memory_partition +nbio_v7_9_get_memory_partition_mode(struct amdgpu_device *adev, u32 *supp_modes) { u32 tmp; + tmp = RREG32_SOC15(NBIO, 0, regBIF_BX_PF0_PARTITION_MEM_STATUS); tmp = REG_GET_FIELD(tmp, BIF_BX_PF0_PARTITION_MEM_STATUS, NPS_MODE); + if (supp_modes) { + *supp_modes = + RREG32_SOC15(NBIO, 0, regBIF_BX_PF0_PARTITION_MEM_CAP); + } + return ffs(tmp); } -- cgit From a433f1f59484fba7a7743a3c5a5f320d9e828b3a Mon Sep 17 00:00:00 2001 From: Lijo Lazar Date: Tue, 14 Feb 2023 14:45:45 +0530 Subject: drm/amdgpu: Initialize memory ranges for GC 9.4.3 GC 9.4.3 ASICS may have memory split into multiple partitions.Initialize the memory partition information for each range. The information may be in the form of a numa node id or a range of pages. Signed-off-by: Lijo Lazar Reviewed-by: Le Ma Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c | 172 ++++++++++++++++++++++++++++++++++ 1 file changed, 172 insertions(+) (limited to 'drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c') diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c index d6a1dac01952..1653d77df3ba 100644 --- a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c @@ -79,6 +79,7 @@ #define mmHUBP0_DCSURF_PRI_VIEWPORT_DIMENSION_DCN2 0x05ea #define mmHUBP0_DCSURF_PRI_VIEWPORT_DIMENSION_DCN2_BASE_IDX 2 +#define MAX_MEM_RANGES 8 static const char *gfxhub_client_ids[] = { "CB", @@ -1742,6 +1743,169 @@ static void gmc_v9_0_save_registers(struct amdgpu_device *adev) adev->gmc.sdpif_register = RREG32_SOC15(DCE, 0, mmDCHUBBUB_SDPIF_MMIO_CNTRL_0); } +static bool gmc_v9_0_validate_partition_info(struct amdgpu_device *adev) +{ + enum amdgpu_memory_partition mode; + u32 supp_modes; + bool valid; + + mode = gmc_v9_0_get_memory_partition(adev, &supp_modes); + + /* Mode detected by hardware not present in supported modes */ + if ((mode != UNKNOWN_MEMORY_PARTITION_MODE) && + !(BIT(mode - 1) & supp_modes)) + return false; + + switch (mode) { + case UNKNOWN_MEMORY_PARTITION_MODE: + case AMDGPU_NPS1_PARTITION_MODE: + valid = (adev->gmc.num_mem_partitions == 1); + break; + case AMDGPU_NPS2_PARTITION_MODE: + valid = (adev->gmc.num_mem_partitions == 2); + break; + case AMDGPU_NPS4_PARTITION_MODE: + valid = (adev->gmc.num_mem_partitions == 3 || + adev->gmc.num_mem_partitions == 4); + break; + default: + valid = false; + } + + return valid; +} + +static bool gmc_v9_0_is_node_present(int *node_ids, int num_ids, int nid) +{ + int i; + + /* Check if node with id 'nid' is present in 'node_ids' array */ + for (i = 0; i < num_ids; ++i) + if (node_ids[i] == nid) + return true; + + return false; +} + +static void +gmc_v9_0_init_acpi_mem_ranges(struct amdgpu_device *adev, + struct amdgpu_mem_partition_info *mem_ranges) +{ + int num_ranges = 0, ret, mem_groups; + struct amdgpu_numa_info numa_info; + int node_ids[MAX_MEM_RANGES]; + int num_xcc, xcc_id; + uint32_t xcc_mask; + + num_xcc = NUM_XCC(adev->gfx.xcc_mask); + xcc_mask = (1U << num_xcc) - 1; + mem_groups = hweight32(adev->aid_mask); + + for_each_inst(xcc_id, xcc_mask) { + ret = amdgpu_acpi_get_mem_info(adev, xcc_id, &numa_info); + if (ret) + continue; + + if (numa_info.nid == NUMA_NO_NODE) { + mem_ranges[0].size = numa_info.size; + mem_ranges[0].numa.node = numa_info.nid; + num_ranges = 1; + break; + } + + if (gmc_v9_0_is_node_present(node_ids, num_ranges, + numa_info.nid)) + continue; + + node_ids[num_ranges] = numa_info.nid; + mem_ranges[num_ranges].numa.node = numa_info.nid; + mem_ranges[num_ranges].size = numa_info.size; + ++num_ranges; + } + + adev->gmc.num_mem_partitions = num_ranges; + + /* If there is only partition, don't use entire size */ + if (adev->gmc.num_mem_partitions == 1) + mem_ranges[0].size = + (mem_ranges[0].size * (mem_groups - 1) / mem_groups); +} + +static void +gmc_v9_0_init_sw_mem_ranges(struct amdgpu_device *adev, + struct amdgpu_mem_partition_info *mem_ranges) +{ + enum amdgpu_memory_partition mode; + u32 start_addr = 0, size; + int i; + + mode = gmc_v9_0_query_memory_partition(adev); + + switch (mode) { + case UNKNOWN_MEMORY_PARTITION_MODE: + case AMDGPU_NPS1_PARTITION_MODE: + adev->gmc.num_mem_partitions = 1; + break; + case AMDGPU_NPS2_PARTITION_MODE: + adev->gmc.num_mem_partitions = 2; + break; + case AMDGPU_NPS4_PARTITION_MODE: + if (adev->flags & AMD_IS_APU) + adev->gmc.num_mem_partitions = 3; + else + adev->gmc.num_mem_partitions = 4; + break; + default: + adev->gmc.num_mem_partitions = 1; + break; + } + + size = (adev->gmc.real_vram_size >> AMDGPU_GPU_PAGE_SHIFT) / + adev->gmc.num_mem_partitions; + + for (i = 0; i < adev->gmc.num_mem_partitions; ++i) { + mem_ranges[i].range.fpfn = start_addr; + mem_ranges[i].size = ((u64)size << AMDGPU_GPU_PAGE_SHIFT); + mem_ranges[i].range.lpfn = start_addr + size - 1; + start_addr += size; + } + + /* Adjust the last one */ + mem_ranges[adev->gmc.num_mem_partitions - 1].range.lpfn = + (adev->gmc.real_vram_size >> AMDGPU_GPU_PAGE_SHIFT) - 1; + mem_ranges[adev->gmc.num_mem_partitions - 1].size = + adev->gmc.real_vram_size - + ((u64)mem_ranges[adev->gmc.num_mem_partitions - 1].range.fpfn + << AMDGPU_GPU_PAGE_SHIFT); +} + +static int gmc_v9_0_init_mem_ranges(struct amdgpu_device *adev) +{ + bool valid; + + adev->gmc.mem_partitions = kzalloc( + MAX_MEM_RANGES * sizeof(struct amdgpu_mem_partition_info), + GFP_KERNEL); + + if (!adev->gmc.mem_partitions) + return -ENOMEM; + + /* TODO : Get the range from PSP/Discovery for dGPU */ + if (adev->gmc.is_app_apu) + gmc_v9_0_init_acpi_mem_ranges(adev, adev->gmc.mem_partitions); + else + gmc_v9_0_init_sw_mem_ranges(adev, adev->gmc.mem_partitions); + + valid = gmc_v9_0_validate_partition_info(adev); + if (!valid) { + /* TODO: handle invalid case */ + dev_WARN(adev->dev, + "Mem ranges not matching with hardware config"); + } + + return 0; +} + static int gmc_v9_0_sw_init(void *handle) { int r, vram_width = 0, vram_type = 0, vram_vendor = 0, dma_addr_bits; @@ -1888,6 +2052,12 @@ static int gmc_v9_0_sw_init(void *handle) amdgpu_gmc_get_vbios_allocations(adev); + if (adev->ip_versions[GC_HWIP][0] == IP_VERSION(9, 4, 3)) { + r = gmc_v9_0_init_mem_ranges(adev); + if (r) + return r; + } + /* Memory manager */ r = amdgpu_bo_init(adev); if (r) @@ -1932,6 +2102,8 @@ static int gmc_v9_0_sw_fini(void *handle) if (adev->ip_versions[GC_HWIP][0] == IP_VERSION(9, 4, 3)) amdgpu_gmc_sysfs_fini(adev); + adev->gmc.num_mem_partitions = 0; + kfree(adev->gmc.mem_partitions); amdgpu_gmc_ras_fini(adev); amdgpu_gem_force_release(adev); -- cgit From b0a3bbf947f6ed690336cec1f6cde2a30d082dbb Mon Sep 17 00:00:00 2001 From: Gavin Wan Date: Mon, 3 Apr 2023 17:49:41 -0400 Subject: drm/amdgpu: Skip using MC FB Offset when APU flag is set for SRIOV. The MC_VM_FB_OFFSET is PF only register. It cannot be read on VF. So, the driver should not use MC_VM_FB_OFFSET address to set the address of dev->gmc.aper_base. Signed-off-by: Gavin Wan Reviewed-by: Zhigang Luo Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c') diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c index 1653d77df3ba..58bcd1e1f1b6 100644 --- a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c @@ -1644,7 +1644,8 @@ static int gmc_v9_0_mc_init(struct amdgpu_device *adev) */ /* check whether both host-gpu and gpu-gpu xgmi links exist */ - if (((adev->flags & AMD_IS_APU) && !amdgpu_passthrough(adev)) || + if ((!amdgpu_sriov_vf(adev) && + (adev->flags & AMD_IS_APU) && !amdgpu_passthrough(adev)) || (adev->gmc.xgmi.supported && adev->gmc.xgmi.connected_to_cpu)) { adev->gmc.aper_base = -- cgit From 46f7b4deb30558593c1d2e62d561a3cee21f558a Mon Sep 17 00:00:00 2001 From: Gavin Wan Date: Mon, 10 Apr 2023 15:04:26 -0400 Subject: drm/amdgpu: Set memory partitions to 1 for SRIOV. For SRIOV, the memory partitions are set on host drover. Each VF only has one memory partition. We need set the memory partitions to 1 on guest driver for SRIOV. V2: sqaush in fix ("drm/amdgpu: Fix memory range info of GC 9.4.3 VFs") Signed-off-by: Gavin Wan Acked-by: Zhigang Luo Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) (limited to 'drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c') diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c index 58bcd1e1f1b6..95c3253e240a 100644 --- a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c @@ -1346,6 +1346,9 @@ gmc_v9_0_get_memory_partition(struct amdgpu_device *adev, u32 *supp_modes) static enum amdgpu_memory_partition gmc_v9_0_query_memory_partition(struct amdgpu_device *adev) { + if (amdgpu_sriov_vf(adev)) + return AMDGPU_NPS1_PARTITION_MODE; + return gmc_v9_0_get_memory_partition(adev, NULL); } @@ -1897,7 +1900,10 @@ static int gmc_v9_0_init_mem_ranges(struct amdgpu_device *adev) else gmc_v9_0_init_sw_mem_ranges(adev, adev->gmc.mem_partitions); - valid = gmc_v9_0_validate_partition_info(adev); + if (amdgpu_sriov_vf(adev)) + valid = true; + else + valid = gmc_v9_0_validate_partition_info(adev); if (!valid) { /* TODO: handle invalid case */ dev_WARN(adev->dev, -- cgit From 2e8cc5d317d12f7fb4f66361a3ce5427f0abe2cd Mon Sep 17 00:00:00 2001 From: Graham Sider Date: Wed, 8 Feb 2023 11:10:57 -0500 Subject: drm/amdgpu: Use legacy TLB flush for gfx943 Invalidate TLBs via a legacy flush request (flush_type=0) prior to the heavyweight flush requests (flush_type=2) in gmc_v9_0.c. This is temporarily required to mitigate a bug causing CPC UTCL1 to return stale translations after invalidation requests in address range mode. v2: squash in long term fix "drm/amdgpu: disable extra gfx943 legacy flush on rev1+" Signed-off-by: Graham Sider Reviewed-by: Philip Yang Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c | 12 ++++++++++++ 1 file changed, 12 insertions(+) (limited to 'drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c') diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c index 95c3253e240a..2eb67b53e497 100644 --- a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c @@ -833,6 +833,11 @@ static void gmc_v9_0_flush_gpu_tlb(struct amdgpu_device *adev, uint32_t vmid, */ inv_req = gmc_v9_0_get_invalidate_req(vmid, 2); inv_req2 = gmc_v9_0_get_invalidate_req(vmid, flush_type); + } else if (flush_type == 2 && + adev->ip_versions[GC_HWIP][0] == IP_VERSION(9, 4, 3) && + adev->rev_id == 0) { + inv_req = gmc_v9_0_get_invalidate_req(vmid, 0); + inv_req2 = gmc_v9_0_get_invalidate_req(vmid, flush_type); } else { inv_req = gmc_v9_0_get_invalidate_req(vmid, flush_type); inv_req2 = 0; @@ -976,6 +981,13 @@ static int gmc_v9_0_flush_gpu_tlb_pasid(struct amdgpu_device *adev, if (vega20_xgmi_wa) kiq->pmf->kiq_invalidate_tlbs(ring, pasid, 2, all_hub); + + if (flush_type == 2 && + adev->ip_versions[GC_HWIP][0] == IP_VERSION(9, 4, 3) && + adev->rev_id == 0) + kiq->pmf->kiq_invalidate_tlbs(ring, + pasid, 0, all_hub); + kiq->pmf->kiq_invalidate_tlbs(ring, pasid, flush_type, all_hub); r = amdgpu_fence_emit_polling(ring, &seq, MAX_KIQ_REG_WAIT); -- cgit From 895797d9193b38e759bc01268a8e3887e521f682 Mon Sep 17 00:00:00 2001 From: Graham Sider Date: Mon, 6 Feb 2023 14:04:42 -0500 Subject: drm/amdgpu/bu: Add use_mtype_cc_wa module param By default, set use_mtype_cc_wa to 1 to set PTE coherence flag MTYPE_CC instead of MTYPE_RW by default. This is required for the time being to mitigate a bug causing XCCs to hit stale data due to TCC marking fully dirty lines as exclusive. Signed-off-by: Graham Sider Reviewed-by: Joseph Greathouse Reviewed-by: Felix Kuehling Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu.h | 1 + drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c | 7 +++++++ drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c | 10 +++++++--- drivers/gpu/drm/amd/amdkfd/kfd_svm.c | 7 +++++-- 4 files changed, 20 insertions(+), 5 deletions(-) (limited to 'drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c') diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h index cb9373f8c25a..cd2a29a7e26d 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h @@ -212,6 +212,7 @@ extern int amdgpu_noretry; extern int amdgpu_force_asic_type; extern int amdgpu_smartshift_bias; extern int amdgpu_use_xgmi_p2p; +extern bool amdgpu_use_mtype_cc_wa; #ifdef CONFIG_HSA_AMD extern int sched_policy; extern bool debug_evictions; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c index da4e50aef95a..8bc37826a99f 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c @@ -822,6 +822,13 @@ MODULE_PARM_DESC(no_queue_eviction_on_vm_fault, "No queue eviction on VM fault ( module_param_named(no_queue_eviction_on_vm_fault, amdgpu_no_queue_eviction_on_vm_fault, int, 0444); #endif +/** + * DOC: use_mtype_cc_wa (bool) + */ +bool amdgpu_use_mtype_cc_wa = true; +MODULE_PARM_DESC(use_mtype_cc_wa, "Use MTYPE_CC workaround (0 = use MTYPE_RW where applicable, 1 = use MTYPE_CC where applicable (default))"); +module_param_named(use_mtype_cc_wa, amdgpu_use_mtype_cc_wa, bool, 0444); + /** * DOC: pcie_p2p (bool) * Enable PCIe P2P (requires large-BAR). Default value: true (on) diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c index 2eb67b53e497..8623b93c05ee 100644 --- a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c @@ -1187,6 +1187,7 @@ static void gmc_v9_0_get_coherence_flags(struct amdgpu_device *adev, bool coherent = bo->flags & AMDGPU_GEM_CREATE_COHERENT; bool uncached = bo->flags & AMDGPU_GEM_CREATE_UNCACHED; unsigned int mtype; + unsigned int mtype_default; bool snoop = false; switch (adev->ip_versions[GC_HWIP][0]) { @@ -1230,7 +1231,10 @@ static void gmc_v9_0_get_coherence_flags(struct amdgpu_device *adev, /* FIXME: Needs more work for handling multiple memory * partitions (> NPS1 mode) e.g. NPS4 for both APU and dGPU * modes. + * FIXME: Temporarily using MTYPE_CC instead of MTYPE_RW where applicable. + * To force use of MTYPE_RW, set use_mtype_cc_wa=0 */ + mtype_default = amdgpu_use_mtype_cc_wa ? MTYPE_CC : MTYPE_RW; snoop = true; if (uncached) { mtype = MTYPE_UC; @@ -1245,14 +1249,14 @@ static void gmc_v9_0_get_coherence_flags(struct amdgpu_device *adev, * socket should be treated as remote access so MTYPE_RW * cannot be used always. */ - mtype = MTYPE_RW; + mtype = mtype_default; } else if (adev->flags & AMD_IS_APU) { /* APU on carve out mode */ - mtype = MTYPE_RW; + mtype = mtype_default; } else { /* dGPU */ if (is_vram && bo_adev == adev) - mtype = MTYPE_RW; + mtype = mtype_default; else if (is_vram) mtype = MTYPE_NC; else diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_svm.c b/drivers/gpu/drm/amd/amdkfd/kfd_svm.c index 2b2129dd1e4a..477ef9294203 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_svm.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_svm.c @@ -1198,9 +1198,12 @@ svm_range_get_pte_flags(struct kfd_node *node, if (uncached) { mapping_flags |= AMDGPU_VM_MTYPE_UC; } else if (domain == SVM_RANGE_VRAM_DOMAIN) { - /* local HBM region close to partition */ + /* local HBM region close to partition + * FIXME: Temporarily using MTYPE_CC instead of MTYPE_RW where applicable. + * To force use of MTYPE_RW, set use_mtype_cc_wa=0 + */ if (bo_node == node) - mapping_flags |= AMDGPU_VM_MTYPE_RW; + mapping_flags |= amdgpu_use_mtype_cc_wa ? AMDGPU_VM_MTYPE_CC : AMDGPU_VM_MTYPE_RW; /* local HBM region far from partition or remote XGMI GPU */ else if (svm_nodes_in_same_hive(bo_node, node)) mapping_flags |= AMDGPU_VM_MTYPE_NC; -- cgit From 1e4a00334add40f609162914af7a24bc92951008 Mon Sep 17 00:00:00 2001 From: Felix Kuehling Date: Tue, 21 Feb 2023 17:31:32 -0500 Subject: drm/amdgpu: Fix per-BO MTYPE selection for GFXv9.4.3 Treat system memory on NUMA systems as remote by default. Overriding with a more efficient MTYPE per page will be implemented in the next patch. No need for a special case for APP APUs. System memory is handled the same for carve-out and native mode. And VRAM doesn't exist in native mode. Signed-off-by: Felix Kuehling Reviewed-by: Philip Yang Reviewed-and-tested-by: Rajneesh Bhardwaj Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c | 40 ++++++++++++++--------------------- drivers/gpu/drm/amd/amdkfd/kfd_svm.c | 24 ++++++++++++--------- 2 files changed, 30 insertions(+), 34 deletions(-) (limited to 'drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c') diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c index 8623b93c05ee..cf976b5b7b63 100644 --- a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c @@ -1186,9 +1186,10 @@ static void gmc_v9_0_get_coherence_flags(struct amdgpu_device *adev, bool is_vram = bo->tbo.resource->mem_type == TTM_PL_VRAM; bool coherent = bo->flags & AMDGPU_GEM_CREATE_COHERENT; bool uncached = bo->flags & AMDGPU_GEM_CREATE_UNCACHED; - unsigned int mtype; - unsigned int mtype_default; + /* TODO: memory partitions struct amdgpu_vm *vm = mapping->bo_va->base.vm;*/ + unsigned int mtype_local, mtype; bool snoop = false; + bool is_local; switch (adev->ip_versions[GC_HWIP][0]) { case IP_VERSION(9, 4, 1): @@ -1228,35 +1229,26 @@ static void gmc_v9_0_get_coherence_flags(struct amdgpu_device *adev, } break; case IP_VERSION(9, 4, 3): - /* FIXME: Needs more work for handling multiple memory - * partitions (> NPS1 mode) e.g. NPS4 for both APU and dGPU - * modes. - * FIXME: Temporarily using MTYPE_CC instead of MTYPE_RW where applicable. - * To force use of MTYPE_RW, set use_mtype_cc_wa=0 + /* Only local VRAM BOs or system memory on non-NUMA APUs + * can be assumed to be local in their entirety. Choose + * MTYPE_NC as safe fallback for all system memory BOs on + * NUMA systems. Their MTYPE can be overridden per-page in + * gmc_v9_0_override_vm_pte_flags. */ - mtype_default = amdgpu_use_mtype_cc_wa ? MTYPE_CC : MTYPE_RW; + mtype_local = amdgpu_use_mtype_cc_wa ? MTYPE_CC : MTYPE_RW; + is_local = (!is_vram && (adev->flags & AMD_IS_APU) && + num_possible_nodes() <= 1) || + (is_vram && adev == bo_adev /* TODO: memory partitions && + bo->mem_id == vm->mem_id*/); snoop = true; if (uncached) { mtype = MTYPE_UC; - } else if (adev->gmc.is_app_apu) { - /* FIXME: APU in native mode, NPS1 single socket only - * - * For suporting NUMA partitioned APU e.g. in NPS4 mode, - * this need to look at the NUMA node on which the - * system memory allocation was done. - * - * Memory access by a different partition within same - * socket should be treated as remote access so MTYPE_RW - * cannot be used always. - */ - mtype = mtype_default; } else if (adev->flags & AMD_IS_APU) { - /* APU on carve out mode */ - mtype = mtype_default; + mtype = is_local ? mtype_local : MTYPE_NC; } else { /* dGPU */ - if (is_vram && bo_adev == adev) - mtype = mtype_default; + if (is_local) + mtype = mtype_local; else if (is_vram) mtype = MTYPE_NC; else diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_svm.c b/drivers/gpu/drm/amd/amdkfd/kfd_svm.c index 477ef9294203..4eec75b28917 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_svm.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_svm.c @@ -1151,6 +1151,7 @@ svm_range_get_pte_flags(struct kfd_node *node, bool snoop = (domain != SVM_RANGE_VRAM_DOMAIN); bool coherent = flags & KFD_IOCTL_SVM_FLAG_COHERENT; bool uncached = flags & KFD_IOCTL_SVM_FLAG_UNCACHED; + unsigned int mtype_local; if (domain == SVM_RANGE_VRAM_DOMAIN) bo_node = prange->svm_bo->node; @@ -1191,19 +1192,16 @@ svm_range_get_pte_flags(struct kfd_node *node, } break; case IP_VERSION(9, 4, 3): - //TODO: Need more work for handling multiple memory partitions - //e.g. NPS4. Current approch is only applicable without memory - //partitions. + mtype_local = amdgpu_use_mtype_cc_wa ? AMDGPU_VM_MTYPE_CC : + AMDGPU_VM_MTYPE_RW; snoop = true; if (uncached) { mapping_flags |= AMDGPU_VM_MTYPE_UC; } else if (domain == SVM_RANGE_VRAM_DOMAIN) { - /* local HBM region close to partition - * FIXME: Temporarily using MTYPE_CC instead of MTYPE_RW where applicable. - * To force use of MTYPE_RW, set use_mtype_cc_wa=0 - */ - if (bo_node == node) - mapping_flags |= amdgpu_use_mtype_cc_wa ? AMDGPU_VM_MTYPE_CC : AMDGPU_VM_MTYPE_RW; + /* local HBM region close to partition */ + if (bo_node->adev == node->adev /* TODO: memory partitions && + bo_node->mem_id == node->mem_id*/) + mapping_flags |= mtype_local; /* local HBM region far from partition or remote XGMI GPU */ else if (svm_nodes_in_same_hive(bo_node, node)) mapping_flags |= AMDGPU_VM_MTYPE_NC; @@ -1212,7 +1210,13 @@ svm_range_get_pte_flags(struct kfd_node *node, mapping_flags |= AMDGPU_VM_MTYPE_UC; /* system memory accessed by the APU */ } else if (node->adev->flags & AMD_IS_APU) { - mapping_flags |= AMDGPU_VM_MTYPE_NC; + /* On NUMA systems, locality is determined per-page + * in amdgpu_gmc_override_vm_pte_flags + */ + if (num_possible_nodes() <= 1) + mapping_flags |= mtype_local; + else + mapping_flags |= AMDGPU_VM_MTYPE_NC; /* system memory accessed by the dGPU */ } else { mapping_flags |= AMDGPU_VM_MTYPE_UC; -- cgit From 352b919c1e5ff50c71d665395b27acbd1bf23a05 Mon Sep 17 00:00:00 2001 From: Felix Kuehling Date: Tue, 21 Feb 2023 17:44:18 -0500 Subject: drm/amdgpu: Override MTYPE per page on GFXv9.4.3 APUs On GFXv9.4.3 NUMA APUs, system memory locality must be determined per page to choose the correct MTYPE. This patch adds a GMC callback that can provide this per-page override and implements it for native mode. Carve-out mode is not yet supported and will use the safe default (remote) MTYPE for system memory. Signed-off-by: Felix Kuehling Reviewed-by: Philip Yang Reviewed-and-tested-by: Rajneesh Bhardwaj Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.h | 7 ++++ drivers/gpu/drm/amd/amdgpu/amdgpu_vm_pt.c | 22 +++++++++-- drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c | 64 +++++++++++++++++++++++++++++++ 3 files changed, 90 insertions(+), 3 deletions(-) (limited to 'drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c') diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.h index 43357d699e6e..6794edd1d2d2 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.h @@ -148,6 +148,10 @@ struct amdgpu_gmc_funcs { void (*get_vm_pte)(struct amdgpu_device *adev, struct amdgpu_bo_va_mapping *mapping, uint64_t *flags); + /* override per-page pte flags */ + void (*override_vm_pte_flags)(struct amdgpu_device *dev, + struct amdgpu_vm *vm, + uint64_t addr, uint64_t *flags); /* get the amount of memory used by the vbios for pre-OS console */ unsigned int (*get_vbios_fb_size)(struct amdgpu_device *adev); @@ -336,6 +340,9 @@ struct amdgpu_gmc { #define amdgpu_gmc_map_mtype(adev, flags) (adev)->gmc.gmc_funcs->map_mtype((adev),(flags)) #define amdgpu_gmc_get_vm_pde(adev, level, dst, flags) (adev)->gmc.gmc_funcs->get_vm_pde((adev), (level), (dst), (flags)) #define amdgpu_gmc_get_vm_pte(adev, mapping, flags) (adev)->gmc.gmc_funcs->get_vm_pte((adev), (mapping), (flags)) +#define amdgpu_gmc_override_vm_pte_flags(adev, vm, addr, pte_flags) \ + (adev)->gmc.gmc_funcs->override_vm_pte_flags \ + ((adev), (vm), (addr), (pte_flags)) #define amdgpu_gmc_get_vbios_fb_size(adev) (adev)->gmc.gmc_funcs->get_vbios_fb_size((adev)) /** diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm_pt.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm_pt.c index bc5d126b600b..60b1da93b06d 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm_pt.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm_pt.c @@ -786,13 +786,14 @@ static void amdgpu_vm_pte_update_flags(struct amdgpu_vm_update_params *params, uint64_t pe, uint64_t addr, unsigned int count, uint32_t incr, uint64_t flags) - { + struct amdgpu_device *adev = params->adev; + if (level != AMDGPU_VM_PTB) { flags |= AMDGPU_PDE_PTE; - amdgpu_gmc_get_vm_pde(params->adev, level, &addr, &flags); + amdgpu_gmc_get_vm_pde(adev, level, &addr, &flags); - } else if (params->adev->asic_type >= CHIP_VEGA10 && + } else if (adev->asic_type >= CHIP_VEGA10 && !(flags & AMDGPU_PTE_VALID) && !(flags & AMDGPU_PTE_PRT)) { @@ -800,6 +801,21 @@ static void amdgpu_vm_pte_update_flags(struct amdgpu_vm_update_params *params, flags |= AMDGPU_PTE_EXECUTABLE; } + /* APUs mapping system memory may need different MTYPEs on different + * NUMA nodes. Only do this for contiguous ranges that can be assumed + * to be on the same NUMA node. + */ + if ((flags & AMDGPU_PTE_SYSTEM) && (adev->flags & AMD_IS_APU) && + adev->gmc.gmc_funcs->override_vm_pte_flags && + num_possible_nodes() > 1) { + if (!params->pages_addr) + amdgpu_gmc_override_vm_pte_flags(adev, params->vm, + addr, &flags); + else + dev_dbg(adev->dev, + "override_vm_pte_flags skipped: non-contiguous\n"); + } + params->vm->update_funcs->update(params, pt, pe, addr, count, incr, flags); } diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c index cf976b5b7b63..c64a69f75da2 100644 --- a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c @@ -1297,6 +1297,69 @@ static void gmc_v9_0_get_vm_pte(struct amdgpu_device *adev, mapping, flags); } +static void gmc_v9_0_override_vm_pte_flags(struct amdgpu_device *adev, + struct amdgpu_vm *vm, + uint64_t addr, uint64_t *flags) +{ + int local_node, nid; + + /* Only GFX 9.4.3 APUs associate GPUs with NUMA nodes. Local system + * memory can use more efficient MTYPEs. + */ + if (adev->ip_versions[GC_HWIP][0] != IP_VERSION(9, 4, 3)) + return; + + /* Only direct-mapped memory allows us to determine the NUMA node from + * the DMA address. + */ + if (!adev->ram_is_direct_mapped) { + dev_dbg(adev->dev, "RAM is not direct mapped\n"); + return; + } + + /* Only override mappings with MTYPE_NC, which is the safe default for + * cacheable memory. + */ + if ((*flags & AMDGPU_PTE_MTYPE_VG10_MASK) != + AMDGPU_PTE_MTYPE_VG10(MTYPE_NC)) { + dev_dbg(adev->dev, "MTYPE is not NC\n"); + return; + } + + /* TODO: memory partitions. mem_id is hard-coded to 0 for now. + * FIXME: Only supported on native mode for now. For carve-out, the + * NUMA affinity of the GPU/VM needs to come from the PCI info because + * memory partitions are not associated with different NUMA nodes. + */ + if (adev->gmc.is_app_apu) { + local_node = adev->gmc.mem_partitions[/*vm->mem_id*/0].numa.node; + } else { + dev_dbg(adev->dev, "Only native mode APU is supported.\n"); + return; + } + + /* Only handle real RAM. Mappings of PCIe resources don't have struct + * page or NUMA nodes. + */ + if (!page_is_ram(addr >> PAGE_SHIFT)) { + dev_dbg(adev->dev, "Page is not RAM.\n"); + return; + } + nid = pfn_to_nid(addr >> PAGE_SHIFT); + dev_dbg(adev->dev, "vm->mem_id=%d, local_node=%d, nid=%d\n", + /*vm->mem_id*/0, local_node, nid); + if (nid == local_node) { + unsigned int mtype_local = + amdgpu_use_mtype_cc_wa ? MTYPE_CC : MTYPE_RW; + uint64_t old_flags = *flags; + + *flags = (*flags & ~AMDGPU_PTE_MTYPE_VG10_MASK) | + AMDGPU_PTE_MTYPE_VG10(mtype_local); + dev_dbg(adev->dev, "flags updated from %llx to %llx\n", + old_flags, *flags); + } +} + static unsigned gmc_v9_0_get_vbios_fb_size(struct amdgpu_device *adev) { u32 d1vga_control = RREG32_SOC15(DCE, 0, mmD1VGA_CONTROL); @@ -1368,6 +1431,7 @@ static const struct amdgpu_gmc_funcs gmc_v9_0_gmc_funcs = { .map_mtype = gmc_v9_0_map_mtype, .get_vm_pde = gmc_v9_0_get_vm_pde, .get_vm_pte = gmc_v9_0_get_vm_pte, + .override_vm_pte_flags = gmc_v9_0_override_vm_pte_flags, .get_vbios_fb_size = gmc_v9_0_get_vbios_fb_size, .query_mem_partition_mode = &gmc_v9_0_query_memory_partition, }; -- cgit From 76eb9c95a409ea820b2e7c968c220e7a38f27d76 Mon Sep 17 00:00:00 2001 From: David Francis Date: Mon, 27 Feb 2023 10:33:11 -0500 Subject: drm/amdgpu/bu: add mtype_local as a module parameter Selects the MTYPE to be used for local memory, (0 = MTYPE_CC (default), 1 = MTYPE_NC, 2 = MTYPE_RW) v2: squash in build fix (Alex) Reviewed-by: Graham Sider Signed-off-by: David Francis Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu.h | 2 +- drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c | 8 ++++---- drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c | 19 ++++++++++++++++--- drivers/gpu/drm/amd/amdkfd/kfd_svm.c | 3 +-- 4 files changed, 22 insertions(+), 10 deletions(-) (limited to 'drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c') diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h index cd2a29a7e26d..c2feaf2fd070 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h @@ -212,7 +212,7 @@ extern int amdgpu_noretry; extern int amdgpu_force_asic_type; extern int amdgpu_smartshift_bias; extern int amdgpu_use_xgmi_p2p; -extern bool amdgpu_use_mtype_cc_wa; +extern int amdgpu_mtype_local; #ifdef CONFIG_HSA_AMD extern int sched_policy; extern bool debug_evictions; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c index 8bc37826a99f..706ba4af062f 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c @@ -823,11 +823,11 @@ module_param_named(no_queue_eviction_on_vm_fault, amdgpu_no_queue_eviction_on_vm #endif /** - * DOC: use_mtype_cc_wa (bool) + * DOC: mtype_local (int) */ -bool amdgpu_use_mtype_cc_wa = true; -MODULE_PARM_DESC(use_mtype_cc_wa, "Use MTYPE_CC workaround (0 = use MTYPE_RW where applicable, 1 = use MTYPE_CC where applicable (default))"); -module_param_named(use_mtype_cc_wa, amdgpu_use_mtype_cc_wa, bool, 0444); +int amdgpu_mtype_local; +MODULE_PARM_DESC(mtype_local, "MTYPE for local memory (0 = MTYPE_CC (default), 1 = MTYPE_NC, 2 = MTYPE_RW)"); +module_param_named(mtype_local, amdgpu_mtype_local, int, 0444); /** * DOC: pcie_p2p (bool) diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c index c64a69f75da2..5a1414300271 100644 --- a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c @@ -1235,7 +1235,16 @@ static void gmc_v9_0_get_coherence_flags(struct amdgpu_device *adev, * NUMA systems. Their MTYPE can be overridden per-page in * gmc_v9_0_override_vm_pte_flags. */ - mtype_local = amdgpu_use_mtype_cc_wa ? MTYPE_CC : MTYPE_RW; + mtype_local = MTYPE_CC; + if (amdgpu_mtype_local == 1) { + DRM_INFO_ONCE("Using MTYPE_NC for local memory\n"); + mtype_local = MTYPE_NC; + } else if (amdgpu_mtype_local == 2) { + DRM_INFO_ONCE("Using MTYPE_RW for local memory\n"); + mtype_local = MTYPE_RW; + } else { + DRM_INFO_ONCE("Using MTYPE_CC for local memory\n"); + } is_local = (!is_vram && (adev->flags & AMD_IS_APU) && num_possible_nodes() <= 1) || (is_vram && adev == bo_adev /* TODO: memory partitions && @@ -1349,9 +1358,13 @@ static void gmc_v9_0_override_vm_pte_flags(struct amdgpu_device *adev, dev_dbg(adev->dev, "vm->mem_id=%d, local_node=%d, nid=%d\n", /*vm->mem_id*/0, local_node, nid); if (nid == local_node) { - unsigned int mtype_local = - amdgpu_use_mtype_cc_wa ? MTYPE_CC : MTYPE_RW; uint64_t old_flags = *flags; + unsigned int mtype_local = MTYPE_CC; + + if (amdgpu_mtype_local == 1) + mtype_local = MTYPE_NC; + else if (amdgpu_mtype_local == 2) + mtype_local = MTYPE_RW; *flags = (*flags & ~AMDGPU_PTE_MTYPE_VG10_MASK) | AMDGPU_PTE_MTYPE_VG10(mtype_local); diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_svm.c b/drivers/gpu/drm/amd/amdkfd/kfd_svm.c index 4eec75b28917..df0ed5677609 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_svm.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_svm.c @@ -1192,8 +1192,7 @@ svm_range_get_pte_flags(struct kfd_node *node, } break; case IP_VERSION(9, 4, 3): - mtype_local = amdgpu_use_mtype_cc_wa ? AMDGPU_VM_MTYPE_CC : - AMDGPU_VM_MTYPE_RW; + mtype_local = amdgpu_mtype_local == 1 ? AMDGPU_VM_MTYPE_NC : (amdgpu_mtype_local == 2 ? AMDGPU_VM_MTYPE_RW : AMDGPU_VM_MTYPE_CC); snoop = true; if (uncached) { mapping_flags |= AMDGPU_VM_MTYPE_UC; -- cgit From b9cbd51000ad3541351ca832b00600870ac08e5c Mon Sep 17 00:00:00 2001 From: Graham Sider Date: Mon, 6 Mar 2023 17:56:44 -0500 Subject: drm/amdgpu/bu: update mtype_local parameter settings Update mtype_local module parameter to use MTYPE_RW by default. 0: MTYPE_RW (default) 1: MTYPE_NC 2: MTYPE_CC Signed-off-by: Graham Sider Reviewed-by: Harish Kasiviswanathan Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c | 2 +- drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c | 12 ++++++------ drivers/gpu/drm/amd/amdkfd/kfd_svm.c | 3 ++- 3 files changed, 9 insertions(+), 8 deletions(-) (limited to 'drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c') diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c index 706ba4af062f..aa466a9eb956 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c @@ -826,7 +826,7 @@ module_param_named(no_queue_eviction_on_vm_fault, amdgpu_no_queue_eviction_on_vm * DOC: mtype_local (int) */ int amdgpu_mtype_local; -MODULE_PARM_DESC(mtype_local, "MTYPE for local memory (0 = MTYPE_CC (default), 1 = MTYPE_NC, 2 = MTYPE_RW)"); +MODULE_PARM_DESC(mtype_local, "MTYPE for local memory (0 = MTYPE_RW (default), 1 = MTYPE_NC, 2 = MTYPE_CC)"); module_param_named(mtype_local, amdgpu_mtype_local, int, 0444); /** diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c index 5a1414300271..32eb4f4f5492 100644 --- a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c @@ -1235,15 +1235,15 @@ static void gmc_v9_0_get_coherence_flags(struct amdgpu_device *adev, * NUMA systems. Their MTYPE can be overridden per-page in * gmc_v9_0_override_vm_pte_flags. */ - mtype_local = MTYPE_CC; + mtype_local = MTYPE_RW; if (amdgpu_mtype_local == 1) { DRM_INFO_ONCE("Using MTYPE_NC for local memory\n"); mtype_local = MTYPE_NC; } else if (amdgpu_mtype_local == 2) { - DRM_INFO_ONCE("Using MTYPE_RW for local memory\n"); - mtype_local = MTYPE_RW; - } else { DRM_INFO_ONCE("Using MTYPE_CC for local memory\n"); + mtype_local = MTYPE_CC; + } else { + DRM_INFO_ONCE("Using MTYPE_RW for local memory\n"); } is_local = (!is_vram && (adev->flags & AMD_IS_APU) && num_possible_nodes() <= 1) || @@ -1359,12 +1359,12 @@ static void gmc_v9_0_override_vm_pte_flags(struct amdgpu_device *adev, /*vm->mem_id*/0, local_node, nid); if (nid == local_node) { uint64_t old_flags = *flags; - unsigned int mtype_local = MTYPE_CC; + unsigned int mtype_local = MTYPE_RW; if (amdgpu_mtype_local == 1) mtype_local = MTYPE_NC; else if (amdgpu_mtype_local == 2) - mtype_local = MTYPE_RW; + mtype_local = MTYPE_CC; *flags = (*flags & ~AMDGPU_PTE_MTYPE_VG10_MASK) | AMDGPU_PTE_MTYPE_VG10(mtype_local); diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_svm.c b/drivers/gpu/drm/amd/amdkfd/kfd_svm.c index df0ed5677609..e6348d4133fd 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_svm.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_svm.c @@ -1192,7 +1192,8 @@ svm_range_get_pte_flags(struct kfd_node *node, } break; case IP_VERSION(9, 4, 3): - mtype_local = amdgpu_mtype_local == 1 ? AMDGPU_VM_MTYPE_NC : (amdgpu_mtype_local == 2 ? AMDGPU_VM_MTYPE_RW : AMDGPU_VM_MTYPE_CC); + mtype_local = amdgpu_mtype_local == 1 ? AMDGPU_VM_MTYPE_NC : + (amdgpu_mtype_local == 2 ? AMDGPU_VM_MTYPE_CC : AMDGPU_VM_MTYPE_RW); snoop = true; if (uncached) { mapping_flags |= AMDGPU_VM_MTYPE_UC; -- cgit From dc12f9eddedb8b41f4dc948e5e636e5221fb4d43 Mon Sep 17 00:00:00 2001 From: Philip Yang Date: Thu, 2 Feb 2023 11:07:53 -0500 Subject: drm/amdkfd: Update MTYPE for far memory partition Use MTYPE RW/MTYPE_CC for mapping system memory or VRAM to KFD node within the same memory partition, use MTYPE_NC for mapping on KFD node from the far memory partition of the same socket or from another socket on same XGMI hive. On NPS4 or 4P system, MTYPE will be overridden per page depending on the memory NUMA node id and vm->mem_id. Signed-off-by: Philip Yang Reviewed-by: Felix Kuehling Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c | 15 +++++++-------- drivers/gpu/drm/amd/amdkfd/kfd_svm.c | 9 +++++---- 2 files changed, 12 insertions(+), 12 deletions(-) (limited to 'drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c') diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c index 32eb4f4f5492..263d17a8b433 100644 --- a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c @@ -1186,7 +1186,7 @@ static void gmc_v9_0_get_coherence_flags(struct amdgpu_device *adev, bool is_vram = bo->tbo.resource->mem_type == TTM_PL_VRAM; bool coherent = bo->flags & AMDGPU_GEM_CREATE_COHERENT; bool uncached = bo->flags & AMDGPU_GEM_CREATE_UNCACHED; - /* TODO: memory partitions struct amdgpu_vm *vm = mapping->bo_va->base.vm;*/ + struct amdgpu_vm *vm = mapping->bo_va->base.vm; unsigned int mtype_local, mtype; bool snoop = false; bool is_local; @@ -1247,8 +1247,8 @@ static void gmc_v9_0_get_coherence_flags(struct amdgpu_device *adev, } is_local = (!is_vram && (adev->flags & AMD_IS_APU) && num_possible_nodes() <= 1) || - (is_vram && adev == bo_adev /* TODO: memory partitions && - bo->mem_id == vm->mem_id*/); + (is_vram && adev == bo_adev && + bo->mem_id == vm->mem_id); snoop = true; if (uncached) { mtype = MTYPE_UC; @@ -1335,13 +1335,12 @@ static void gmc_v9_0_override_vm_pte_flags(struct amdgpu_device *adev, return; } - /* TODO: memory partitions. mem_id is hard-coded to 0 for now. - * FIXME: Only supported on native mode for now. For carve-out, the + /* FIXME: Only supported on native mode for now. For carve-out, the * NUMA affinity of the GPU/VM needs to come from the PCI info because * memory partitions are not associated with different NUMA nodes. */ - if (adev->gmc.is_app_apu) { - local_node = adev->gmc.mem_partitions[/*vm->mem_id*/0].numa.node; + if (adev->gmc.is_app_apu && vm->mem_id >= 0) { + local_node = adev->gmc.mem_partitions[vm->mem_id].numa.node; } else { dev_dbg(adev->dev, "Only native mode APU is supported.\n"); return; @@ -1356,7 +1355,7 @@ static void gmc_v9_0_override_vm_pte_flags(struct amdgpu_device *adev, } nid = pfn_to_nid(addr >> PAGE_SHIFT); dev_dbg(adev->dev, "vm->mem_id=%d, local_node=%d, nid=%d\n", - /*vm->mem_id*/0, local_node, nid); + vm->mem_id, local_node, nid); if (nid == local_node) { uint64_t old_flags = *flags; unsigned int mtype_local = MTYPE_RW; diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_svm.c b/drivers/gpu/drm/amd/amdkfd/kfd_svm.c index 62aa7fb2eaa5..a700d9ccd054 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_svm.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_svm.c @@ -1203,8 +1203,8 @@ svm_range_get_pte_flags(struct kfd_node *node, mapping_flags |= AMDGPU_VM_MTYPE_UC; } else if (domain == SVM_RANGE_VRAM_DOMAIN) { /* local HBM region close to partition */ - if (bo_node->adev == node->adev /* TODO: memory partitions && - bo_node->mem_id == node->mem_id*/) + if (bo_node->adev == node->adev && + (!bo_node->xcp || !node->xcp || bo_node->xcp->mem_id == node->xcp->mem_id)) mapping_flags |= mtype_local; /* local HBM region far from partition or remote XGMI GPU */ else if (svm_nodes_in_same_hive(bo_node, node)) @@ -1358,8 +1358,9 @@ svm_range_map_to_gpu(struct kfd_process_device *pdd, struct svm_range *prange, (last_domain == SVM_RANGE_VRAM_DOMAIN) ? 1 : 0, pte_flags); - /* TODO: we still need to determine the vm_manager.vram_base_offset based on - * the memory partition. + /* For dGPU mode, we use same vm_manager to allocate VRAM for + * different memory partition based on fpfn/lpfn, we should use + * same vm_manager.vram_base_offset regardless memory partition. */ r = amdgpu_vm_update_range(adev, vm, false, false, flush_tlb, NULL, last_start, prange->start + i, -- cgit From 3ebfd221c1a83e5f0edadb87d173d8fd93d1d125 Mon Sep 17 00:00:00 2001 From: Philip Yang Date: Wed, 8 Mar 2023 11:57:00 -0500 Subject: drm/amdkfd: Store xcp partition id to amdgpu bo For memory accounting per compute partition and export drm amdgpu bo and then import to KFD, we need the xcp id to account the memory usage or find the KFD node of the original amdgpu bo to create the KFD bo on the correct adev KFD node. Set xcp_id_plus1 of amdgpu_bo_param to create bo and store xcp_id to amddgpu bo. Add helper macro to get the mem_id from adev and xcp_id. v2: squash in fix ("drm/amdgpu: Fix BO creation failure on GFX 9.4.3 dGPU") Signed-off-by: Philip Yang Reviewed-by: Felix Kuehling Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h | 4 ++++ drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c | 11 ++++++----- drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c | 4 ++-- drivers/gpu/drm/amd/amdgpu/amdgpu_gem.h | 2 +- drivers/gpu/drm/amd/amdgpu/amdgpu_object.c | 15 ++++++++++----- drivers/gpu/drm/amd/amdgpu/amdgpu_object.h | 12 ++++++++---- drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c | 6 +++++- drivers/gpu/drm/amd/amdgpu/amdgpu_vm_pt.c | 5 +++-- drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c | 2 +- drivers/gpu/drm/amd/amdkfd/kfd_svm.c | 4 ++-- 10 files changed, 42 insertions(+), 23 deletions(-) (limited to 'drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c') diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h index 324cb566ca2f..05c54776951b 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h @@ -330,6 +330,10 @@ int amdgpu_amdkfd_reserve_mem_limit(struct amdgpu_device *adev, void amdgpu_amdkfd_unreserve_mem_limit(struct amdgpu_device *adev, uint64_t size, u32 alloc_flag); +#define KFD_XCP_MEM_ID(adev, xcp_id) \ + ((adev)->xcp_mgr && (xcp_id) >= 0 ?\ + (adev)->xcp_mgr->xcp[(xcp_id)].mem_id : -1) + #define KFD_XCP_MEMORY_SIZE(n) ((n)->adev->gmc.num_mem_partitions ?\ (n)->adev->gmc.mem_partitions[(n)->xcp->mem_id].size /\ (n)->adev->xcp_mgr->num_xcp_per_mem_partition :\ diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c index c234dc0db799..8724a0be31b8 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c @@ -1634,6 +1634,7 @@ int amdgpu_amdkfd_gpuvm_alloc_memory_of_gpu( uint64_t *offset, uint32_t flags, bool criu_resume) { struct amdgpu_vm *avm = drm_priv_to_vm(drm_priv); + struct amdgpu_fpriv *fpriv = container_of(avm, struct amdgpu_fpriv, vm); enum ttm_bo_type bo_type = ttm_bo_type_device; struct sg_table *sg = NULL; uint64_t user_addr = 0; @@ -1641,7 +1642,7 @@ int amdgpu_amdkfd_gpuvm_alloc_memory_of_gpu( struct drm_gem_object *gobj = NULL; u32 domain, alloc_domain; uint64_t aligned_size; - int8_t mem_id = -1; + int8_t xcp_id = -1; u64 alloc_flags; int ret; @@ -1660,7 +1661,7 @@ int amdgpu_amdkfd_gpuvm_alloc_memory_of_gpu( alloc_flags |= (flags & KFD_IOC_ALLOC_MEM_FLAGS_PUBLIC) ? AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED : 0; } - mem_id = avm->mem_id; + xcp_id = fpriv->xcp_id == ~0 ? 0 : fpriv->xcp_id; } else if (flags & KFD_IOC_ALLOC_MEM_FLAGS_GTT) { domain = alloc_domain = AMDGPU_GEM_DOMAIN_GTT; alloc_flags = 0; @@ -1718,12 +1719,12 @@ int amdgpu_amdkfd_gpuvm_alloc_memory_of_gpu( goto err_reserve_limit; } - pr_debug("\tcreate BO VA 0x%llx size 0x%llx domain %s mem_id %d\n", + pr_debug("\tcreate BO VA 0x%llx size 0x%llx domain %s xcp_id %d\n", va, (*mem)->aql_queue ? size << 1 : size, - domain_string(alloc_domain), mem_id); + domain_string(alloc_domain), xcp_id); ret = amdgpu_gem_object_create(adev, aligned_size, 1, alloc_domain, alloc_flags, - bo_type, NULL, &gobj, mem_id + 1); + bo_type, NULL, &gobj, xcp_id + 1); if (ret) { pr_debug("Failed to create BO on domain %s. ret %d\n", domain_string(alloc_domain), ret); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c index 33ebee18b80d..7e8839cc6f58 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c @@ -98,7 +98,7 @@ int amdgpu_gem_object_create(struct amdgpu_device *adev, unsigned long size, int alignment, u32 initial_domain, u64 flags, enum ttm_bo_type type, struct dma_resv *resv, - struct drm_gem_object **obj, int8_t mem_id_plus1) + struct drm_gem_object **obj, int8_t xcp_id_plus1) { struct amdgpu_bo *bo; struct amdgpu_bo_user *ubo; @@ -116,7 +116,7 @@ int amdgpu_gem_object_create(struct amdgpu_device *adev, unsigned long size, bp.flags = flags; bp.domain = initial_domain; bp.bo_ptr_size = sizeof(struct amdgpu_bo); - bp.mem_id_plus1 = mem_id_plus1; + bp.xcp_id_plus1 = xcp_id_plus1; r = amdgpu_bo_create_user(adev, &bp, &ubo); if (r) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.h index 646c4fcc8e40..f30264782ba2 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.h @@ -43,7 +43,7 @@ int amdgpu_gem_object_create(struct amdgpu_device *adev, unsigned long size, int alignment, u32 initial_domain, u64 flags, enum ttm_bo_type type, struct dma_resv *resv, - struct drm_gem_object **obj, int8_t mem_id_plus1); + struct drm_gem_object **obj, int8_t xcp_id_plus1); int amdgpu_mode_dumb_create(struct drm_file *file_priv, struct drm_device *dev, struct drm_mode_create_dumb *args); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c index b2d11c4f39b0..42c02f48c3a8 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c @@ -131,14 +131,15 @@ void amdgpu_bo_placement_from_domain(struct amdgpu_bo *abo, u32 domain) if (domain & AMDGPU_GEM_DOMAIN_VRAM) { unsigned int visible_pfn = adev->gmc.visible_vram_size >> PAGE_SHIFT; + int8_t mem_id = KFD_XCP_MEM_ID(adev, abo->xcp_id); - if (adev->gmc.mem_partitions && abo->mem_id >= 0) { - places[c].fpfn = adev->gmc.mem_partitions[abo->mem_id].range.fpfn; + if (adev->gmc.mem_partitions && mem_id >= 0) { + places[c].fpfn = adev->gmc.mem_partitions[mem_id].range.fpfn; /* * memory partition range lpfn is inclusive start + size - 1 * TTM place lpfn is exclusive start + size */ - places[c].lpfn = adev->gmc.mem_partitions[abo->mem_id].range.lpfn + 1; + places[c].lpfn = adev->gmc.mem_partitions[mem_id].range.lpfn + 1; } else { places[c].fpfn = 0; places[c].lpfn = 0; @@ -583,8 +584,12 @@ int amdgpu_bo_create(struct amdgpu_device *adev, bo->flags = bp->flags; - /* bo->mem_id -1 means any partition */ - bo->mem_id = bp->mem_id_plus1 - 1; + if (adev->gmc.mem_partitions) + /* For GPUs with spatial partitioning, bo->xcp_id=-1 means any partition */ + bo->xcp_id = bp->xcp_id_plus1 - 1; + else + /* For GPUs without spatial partitioning */ + bo->xcp_id = 0; if (!amdgpu_bo_support_uswc(bo->flags)) bo->flags &= ~AMDGPU_GEM_CREATE_CPU_GTT_USWC; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h index eb24a66ccee5..05496b97ef93 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h @@ -56,8 +56,8 @@ struct amdgpu_bo_param { bool no_wait_gpu; struct dma_resv *resv; void (*destroy)(struct ttm_buffer_object *bo); - /* memory partition number plus 1, 0 means any partition */ - int8_t mem_id_plus1; + /* xcp partition number plus 1, 0 means any partition */ + int8_t xcp_id_plus1; }; /* bo virtual addresses in a vm */ @@ -111,8 +111,12 @@ struct amdgpu_bo { #endif struct kgd_mem *kfd_bo; - /* memory partition number, -1 means any partition */ - int8_t mem_id; + /* + * For GPUs with spatial partitioning, xcp partition number, -1 means + * any partition. For other ASICs without spatial partition, always 0 + * for memory accounting. + */ + int8_t xcp_id; }; struct amdgpu_bo_user { diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c index 129c593cb2bd..23101c82519a 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c @@ -1051,6 +1051,7 @@ static void amdgpu_ttm_backend_destroy(struct ttm_device *bdev, static struct ttm_tt *amdgpu_ttm_tt_create(struct ttm_buffer_object *bo, uint32_t page_flags) { + struct amdgpu_device *adev = amdgpu_ttm_adev(bo->bdev); struct amdgpu_bo *abo = ttm_to_amdgpu_bo(bo); struct amdgpu_ttm_tt *gtt; enum ttm_caching caching; @@ -1060,7 +1061,10 @@ static struct ttm_tt *amdgpu_ttm_tt_create(struct ttm_buffer_object *bo, return NULL; } gtt->gobj = &bo->base; - gtt->pool_id = abo->mem_id; + if (adev->gmc.mem_partitions && abo->xcp_id >= 0) + gtt->pool_id = KFD_XCP_MEM_ID(adev, abo->xcp_id); + else + gtt->pool_id = abo->xcp_id; if (abo->flags & AMDGPU_GEM_CREATE_CPU_GTT_USWC) caching = ttm_write_combined; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm_pt.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm_pt.c index 62fc7e8d326e..cc3b1b596e56 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm_pt.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm_pt.c @@ -502,6 +502,7 @@ exit: int amdgpu_vm_pt_create(struct amdgpu_device *adev, struct amdgpu_vm *vm, int level, bool immediate, struct amdgpu_bo_vm **vmbo) { + struct amdgpu_fpriv *fpriv = container_of(vm, struct amdgpu_fpriv, vm); struct amdgpu_bo_param bp; struct amdgpu_bo *bo; struct dma_resv *resv; @@ -534,7 +535,7 @@ int amdgpu_vm_pt_create(struct amdgpu_device *adev, struct amdgpu_vm *vm, bp.type = ttm_bo_type_kernel; bp.no_wait_gpu = immediate; - bp.mem_id_plus1 = vm->mem_id + 1; + bp.xcp_id_plus1 = fpriv->xcp_id == ~0 ? 0 : fpriv->xcp_id + 1; if (vm->root.bo) bp.resv = vm->root.bo->tbo.base.resv; @@ -560,7 +561,7 @@ int amdgpu_vm_pt_create(struct amdgpu_device *adev, struct amdgpu_vm *vm, bp.type = ttm_bo_type_kernel; bp.resv = bo->tbo.base.resv; bp.bo_ptr_size = sizeof(struct amdgpu_bo); - bp.mem_id_plus1 = vm->mem_id + 1; + bp.xcp_id_plus1 = fpriv->xcp_id == ~0 ? 0 : fpriv->xcp_id + 1; r = amdgpu_bo_create(adev, &bp, &(*vmbo)->shadow); diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c index 263d17a8b433..7ea80bdf8e1e 100644 --- a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c @@ -1248,7 +1248,7 @@ static void gmc_v9_0_get_coherence_flags(struct amdgpu_device *adev, is_local = (!is_vram && (adev->flags & AMD_IS_APU) && num_possible_nodes() <= 1) || (is_vram && adev == bo_adev && - bo->mem_id == vm->mem_id); + KFD_XCP_MEM_ID(adev, bo->xcp_id) == vm->mem_id); snoop = true; if (uncached) { mtype = MTYPE_UC; diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_svm.c b/drivers/gpu/drm/amd/amdkfd/kfd_svm.c index a700d9ccd054..45959892bc0f 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_svm.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_svm.c @@ -556,7 +556,7 @@ svm_range_vram_node_new(struct kfd_node *node, struct svm_range *prange, bp.type = ttm_bo_type_device; bp.resv = NULL; if (node->xcp) - bp.mem_id_plus1 = node->xcp->mem_id + 1; + bp.xcp_id_plus1 = node->xcp->id + 1; r = amdgpu_bo_create_user(node->adev, &bp, &ubo); if (r) { @@ -567,7 +567,7 @@ svm_range_vram_node_new(struct kfd_node *node, struct svm_range *prange, pr_debug("alloc bo at offset 0x%lx size 0x%lx on partition %d\n", bo->tbo.resource->start << PAGE_SHIFT, bp.size, - bp.mem_id_plus1 - 1); + bp.xcp_id_plus1 - 1); r = amdgpu_bo_reserve(bo, true); if (r) { -- cgit From 45b3a914d40e63d2c9e3a3e02fb2014be975b9b0 Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Tue, 16 May 2023 17:16:30 -0400 Subject: drm/amdgpu/gmc9: fix 64 bit division in partition code Rework logic or use do_div() to avoid problems on 32 bit. v2: add a missing case for XCP macro v3: fix out of bounds array access v4: fix xcp handling harder Acked-by: Guchun Chen (v1) Reviewed-by: Mukul Joshi (v3) Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c | 15 +++++++++++++++ drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h | 9 ++++----- drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c | 5 ++++- drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c | 11 ++++++----- drivers/gpu/drm/amd/amdkfd/kfd_svm.c | 8 ++++++-- 5 files changed, 35 insertions(+), 13 deletions(-) (limited to 'drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c') diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c index 739eb7c0d133..5de92c9ab18f 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c @@ -794,3 +794,18 @@ void amdgpu_amdkfd_unlock_kfd(struct amdgpu_device *adev) { kgd2kfd_unlock_kfd(); } + + +u64 amdgpu_amdkfd_xcp_memory_size(struct amdgpu_device *adev, int xcp_id) +{ + u64 tmp; + s8 mem_id = KFD_XCP_MEM_ID(adev, xcp_id); + + if (adev->gmc.num_mem_partitions && xcp_id >= 0 && mem_id >= 0) { + tmp = adev->gmc.mem_partitions[mem_id].size; + do_div(tmp, adev->xcp_mgr->num_xcp_per_mem_partition); + return tmp; + } else { + return adev->gmc.real_vram_size; + } +} diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h index be43d71ba7ef..94cc456761e5 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h @@ -333,15 +333,14 @@ int amdgpu_amdkfd_reserve_mem_limit(struct amdgpu_device *adev, void amdgpu_amdkfd_unreserve_mem_limit(struct amdgpu_device *adev, uint64_t size, u32 alloc_flag, int8_t xcp_id); +u64 amdgpu_amdkfd_xcp_memory_size(struct amdgpu_device *adev, int xcp_id); + #define KFD_XCP_MEM_ID(adev, xcp_id) \ ((adev)->xcp_mgr && (xcp_id) >= 0 ?\ (adev)->xcp_mgr->xcp[(xcp_id)].mem_id : -1) -#define KFD_XCP_MEMORY_SIZE(adev, xcp_id)\ - ((adev)->gmc.num_mem_partitions && (xcp_id) >= 0 ?\ - (adev)->gmc.mem_partitions[KFD_XCP_MEM_ID((adev), (xcp_id))].size /\ - (adev)->xcp_mgr->num_xcp_per_mem_partition :\ - (adev)->gmc.real_vram_size) +#define KFD_XCP_MEMORY_SIZE(adev, xcp_id) amdgpu_amdkfd_xcp_memory_size((adev), (xcp_id)) + #if IS_ENABLED(CONFIG_HSA_AMD) void amdgpu_amdkfd_gpuvm_init_mem_limits(void); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c index 23101c82519a..902773ce41b5 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c @@ -814,11 +814,14 @@ static void amdgpu_ttm_gart_bind_gfx9_mqd(struct amdgpu_device *adev, struct amdgpu_ttm_tt *gtt = (void *)ttm; uint64_t total_pages = ttm->num_pages; int num_xcc = max(1U, adev->gfx.num_xcc_per_xcp); - uint64_t page_idx, pages_per_xcc = total_pages / num_xcc; + uint64_t page_idx, pages_per_xcc; int i; uint64_t ctrl_flags = (flags & ~AMDGPU_PTE_MTYPE_VG10_MASK) | AMDGPU_PTE_MTYPE_VG10(AMDGPU_MTYPE_NC); + pages_per_xcc = total_pages; + do_div(pages_per_xcc, num_xcc); + for (i = 0, page_idx = 0; i < num_xcc; i++, page_idx += pages_per_xcc) { /* MQD page: use default flags */ amdgpu_gart_bind(adev, diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c index 7ea80bdf8e1e..f70e666cecf2 100644 --- a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c @@ -1914,9 +1914,10 @@ gmc_v9_0_init_acpi_mem_ranges(struct amdgpu_device *adev, adev->gmc.num_mem_partitions = num_ranges; /* If there is only partition, don't use entire size */ - if (adev->gmc.num_mem_partitions == 1) - mem_ranges[0].size = - (mem_ranges[0].size * (mem_groups - 1) / mem_groups); + if (adev->gmc.num_mem_partitions == 1) { + mem_ranges[0].size = mem_ranges[0].size * (mem_groups - 1); + do_div(mem_ranges[0].size, mem_groups); + } } static void @@ -1948,8 +1949,8 @@ gmc_v9_0_init_sw_mem_ranges(struct amdgpu_device *adev, break; } - size = (adev->gmc.real_vram_size >> AMDGPU_GPU_PAGE_SHIFT) / - adev->gmc.num_mem_partitions; + size = adev->gmc.real_vram_size >> AMDGPU_GPU_PAGE_SHIFT; + size /= adev->gmc.num_mem_partitions; for (i = 0; i < adev->gmc.num_mem_partitions; ++i) { mem_ranges[i].range.fpfn = start_addr; diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_svm.c b/drivers/gpu/drm/amd/amdkfd/kfd_svm.c index 206851c9e642..b0f0d31bf3e6 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_svm.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_svm.c @@ -1939,10 +1939,14 @@ void svm_range_set_max_pages(struct amdgpu_device *adev) uint64_t max_pages; uint64_t pages, _pages; uint64_t min_pages = 0; - int i; + int i, id; for (i = 0; i < adev->kfd.dev->num_nodes; i++) { - pages = KFD_XCP_MEMORY_SIZE(adev, adev->kfd.dev->nodes[i]->xcp->id) >> 17; + if (adev->kfd.dev->nodes[i]->xcp) + id = adev->kfd.dev->nodes[i]->xcp->id; + else + id = -1; + pages = KFD_XCP_MEMORY_SIZE(adev, id) >> 17; pages = clamp(pages, 1ULL << 9, 1ULL << 18); pages = rounddown_pow_of_two(pages); min_pages = min_not_zero(min_pages, pages); -- cgit From 6dabce860d40703d7c27b71a120317f09293cf9c Mon Sep 17 00:00:00 2001 From: Harshit Mogalapalli Date: Mon, 22 May 2023 00:30:15 -0700 Subject: drm/amdgpu: Fix unsigned comparison with zero in gmc_v9_0_process_interrupt() Smatch warns: drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c:579: unsigned 'xcc_id' is never less than zero. gfx_v9_4_3_ih_to_xcc_inst() returns negative numbers as well. Fix this by changing type of xcc_id to int. Fixes: 98b2e9cad227 ("drm/amdgpu: correct the vmhub index when page fault occurs") Signed-off-by: Harshit Mogalapalli Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c') diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c index f70e666cecf2..1e8b2aaa48c1 100644 --- a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c @@ -557,8 +557,8 @@ static int gmc_v9_0_process_interrupt(struct amdgpu_device *adev, const char *hub_name; u64 addr; uint32_t cam_index = 0; - int ret; - uint32_t node_id, xcc_id = 0; + int ret, xcc_id = 0; + uint32_t node_id; node_id = entry->node_id; -- cgit From 9535a86a4072babc37dc6bdadae52bdbb88166f5 Mon Sep 17 00:00:00 2001 From: Shiwu Zhang Date: Wed, 17 May 2023 14:15:05 +0800 Subject: drm/amdgpu: bypass bios dependent operations Since bios reading does not work currently so just bypass all operations related to bios v2: hardcode the vram info for APP_APU case (hawking) v3: correct the vram_width with channel number * channel size (lijo) Signed-off-by: Shiwu Zhang Reviewed-by: Yang Wang Reviewed-by: Hawking Zhang Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 66 +++++++++++++++++++----------- drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c | 7 +++- drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c | 63 +++++++++++++++++----------- 3 files changed, 85 insertions(+), 51 deletions(-) (limited to 'drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c') diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c index ff9ca0dbeb5a..8f50ca2bee97 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c @@ -1389,6 +1389,15 @@ int amdgpu_device_resize_fb_bar(struct amdgpu_device *adev) return 0; } +static bool amdgpu_device_read_bios(struct amdgpu_device *adev) +{ + if (hweight32(adev->aid_mask) && (adev->flags & AMD_IS_APU)) { + return false; + } + + return true; +} + /* * GPU helpers function. */ @@ -1408,6 +1417,9 @@ bool amdgpu_device_need_post(struct amdgpu_device *adev) if (amdgpu_sriov_vf(adev)) return false; + if (!amdgpu_device_read_bios(adev)) + return false; + if (amdgpu_passthrough(adev)) { /* for FIJI: In whole GPU pass-through virtualization case, after VM reboot * some old smc fw still need driver do vPost otherwise gpu hang, while @@ -2318,14 +2330,16 @@ static int amdgpu_device_ip_early_init(struct amdgpu_device *adev) return r; /* Read BIOS */ - if (!amdgpu_get_bios(adev)) - return -EINVAL; + if (amdgpu_device_read_bios(adev)) { + if (!amdgpu_get_bios(adev)) + return -EINVAL; - r = amdgpu_atombios_init(adev); - if (r) { - dev_err(adev->dev, "amdgpu_atombios_init failed\n"); - amdgpu_vf_error_put(adev, AMDGIM_ERROR_VF_ATOMBIOS_INIT_FAIL, 0, 0); - return r; + r = amdgpu_atombios_init(adev); + if (r) { + dev_err(adev->dev, "amdgpu_atombios_init failed\n"); + amdgpu_vf_error_put(adev, AMDGIM_ERROR_VF_ATOMBIOS_INIT_FAIL, 0, 0); + return r; + } } /*get pf2vf msg info at it's earliest time*/ @@ -3945,25 +3959,27 @@ int amdgpu_device_init(struct amdgpu_device *adev, } } - if (adev->is_atom_fw) { - /* Initialize clocks */ - r = amdgpu_atomfirmware_get_clock_info(adev); - if (r) { - dev_err(adev->dev, "amdgpu_atomfirmware_get_clock_info failed\n"); - amdgpu_vf_error_put(adev, AMDGIM_ERROR_VF_ATOMBIOS_GET_CLOCK_FAIL, 0, 0); - goto failed; - } - } else { - /* Initialize clocks */ - r = amdgpu_atombios_get_clock_info(adev); - if (r) { - dev_err(adev->dev, "amdgpu_atombios_get_clock_info failed\n"); - amdgpu_vf_error_put(adev, AMDGIM_ERROR_VF_ATOMBIOS_GET_CLOCK_FAIL, 0, 0); - goto failed; + if (adev->bios) { + if (adev->is_atom_fw) { + /* Initialize clocks */ + r = amdgpu_atomfirmware_get_clock_info(adev); + if (r) { + dev_err(adev->dev, "amdgpu_atomfirmware_get_clock_info failed\n"); + amdgpu_vf_error_put(adev, AMDGIM_ERROR_VF_ATOMBIOS_GET_CLOCK_FAIL, 0, 0); + goto failed; + } + } else { + /* Initialize clocks */ + r = amdgpu_atombios_get_clock_info(adev); + if (r) { + dev_err(adev->dev, "amdgpu_atombios_get_clock_info failed\n"); + amdgpu_vf_error_put(adev, AMDGIM_ERROR_VF_ATOMBIOS_GET_CLOCK_FAIL, 0, 0); + goto failed; + } + /* init i2c buses */ + if (!amdgpu_device_has_dc_support(adev)) + amdgpu_atombios_i2c_init(adev); } - /* init i2c buses */ - if (!amdgpu_device_has_dc_support(adev)) - amdgpu_atombios_i2c_init(adev); } fence_driver_init: diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c index 473eeac1f03b..d2d0d27f9053 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c @@ -1696,7 +1696,7 @@ static int amdgpu_ttm_reserve_tmr(struct amdgpu_device *adev) uint32_t reserve_size = 0; int ret; - if (!amdgpu_sriov_vf(adev)) { + if (adev->bios && !amdgpu_sriov_vf(adev)) { if (amdgpu_atomfirmware_mem_training_supported(adev)) mem_train_support = true; else @@ -1713,7 +1713,10 @@ static int amdgpu_ttm_reserve_tmr(struct amdgpu_device *adev) if (adev->bios) reserve_size = amdgpu_atomfirmware_get_fw_reserved_fb_size(adev); - if (!reserve_size) + + if (!adev->bios && adev->ip_versions[GC_HWIP][0] == IP_VERSION(9, 4, 3)) + reserve_size = max(reserve_size, (uint32_t)280 << 20); + else if (!reserve_size) reserve_size = DISCOVERY_TMR_OFFSET; if (mem_train_support) { diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c index 1e8b2aaa48c1..be7823d82150 100644 --- a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c @@ -2010,34 +2010,49 @@ static int gmc_v9_0_sw_init(void *handle) spin_lock_init(&adev->gmc.invalidate_lock); - r = amdgpu_atomfirmware_get_vram_info(adev, - &vram_width, &vram_type, &vram_vendor); - if (amdgpu_sriov_vf(adev)) - /* For Vega10 SR-IOV, vram_width can't be read from ATOM as RAVEN, - * and DF related registers is not readable, seems hardcord is the - * only way to set the correct vram_width - */ - adev->gmc.vram_width = 2048; - else if (amdgpu_emu_mode != 1) - adev->gmc.vram_width = vram_width; + if (!(adev->bios) || adev->gmc.is_app_apu) { + if (adev->flags & AMD_IS_APU) { + if (adev->gmc.is_app_apu) { + adev->gmc.vram_type = AMDGPU_VRAM_TYPE_HBM; + adev->gmc.vram_width = 128 * 64; + } else { + adev->gmc.vram_type = AMDGPU_VRAM_TYPE_DDR4; + adev->gmc.vram_width = 64 * 64; + } + } else { + adev->gmc.vram_type = AMDGPU_VRAM_TYPE_HBM; + adev->gmc.vram_width = 128 * 64; + } + } else { + r = amdgpu_atomfirmware_get_vram_info(adev, + &vram_width, &vram_type, &vram_vendor); + if (amdgpu_sriov_vf(adev)) + /* For Vega10 SR-IOV, vram_width can't be read from ATOM as RAVEN, + * and DF related registers is not readable, seems hardcord is the + * only way to set the correct vram_width + */ + adev->gmc.vram_width = 2048; + else if (amdgpu_emu_mode != 1) + adev->gmc.vram_width = vram_width; - if (!adev->gmc.vram_width) { - int chansize, numchan; + if (!adev->gmc.vram_width) { + int chansize, numchan; - /* hbm memory channel size */ - if (adev->flags & AMD_IS_APU) - chansize = 64; - else - chansize = 128; - if (adev->df.funcs && - adev->df.funcs->get_hbm_channel_number) { - numchan = adev->df.funcs->get_hbm_channel_number(adev); - adev->gmc.vram_width = numchan * chansize; + /* hbm memory channel size */ + if (adev->flags & AMD_IS_APU) + chansize = 64; + else + chansize = 128; + if (adev->df.funcs && + adev->df.funcs->get_hbm_channel_number) { + numchan = adev->df.funcs->get_hbm_channel_number(adev); + adev->gmc.vram_width = numchan * chansize; + } } - } - adev->gmc.vram_type = vram_type; - adev->gmc.vram_vendor = vram_vendor; + adev->gmc.vram_type = vram_type; + adev->gmc.vram_vendor = vram_vendor; + } switch (adev->ip_versions[GC_HWIP][0]) { case IP_VERSION(9, 1, 0): case IP_VERSION(9, 2, 2): -- cgit From 1bae03aab2b41770b9198b3ef1ddc7dc7efb0678 Mon Sep 17 00:00:00 2001 From: Srinivasan Shanmugam Date: Tue, 30 May 2023 14:43:14 +0530 Subject: drm/amdgpu: Fix up missing parameter in kdoc for 'inst' in gmc_ v7, v8, v9, v10, v11.c MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Fix these warnings by adding 'inst' arguments to kdocs. gcc with W=1 drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c:428: warning: Function parameter or member 'inst' not described in 'gmc_v7_0_flush_gpu_tlb_pasid' drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c:626: warning: Function parameter or member 'inst' not described in 'gmc_v8_0_flush_gpu_tlb_pasid' drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c:423: warning: Function parameter or member 'inst' not described in 'gmc_v10_0_flush_gpu_tlb_pasid' drivers/gpu/drm/amd/amdgpu/gmc_v11_0.c:328: warning: Function parameter or member 'inst' not described in 'gmc_v11_0_flush_gpu_tlb_pasid' drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c:950: warning: Function parameter or member 'inst' not described in 'gmc_v9_0_flush_gpu_tlb_pasid' Cc: Christian König Cc: Alex Deucher Cc: Hawking Zhang Signed-off-by: Srinivasan Shanmugam Reviewed-by: Alex Deucher Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c | 1 + drivers/gpu/drm/amd/amdgpu/gmc_v11_0.c | 1 + drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c | 1 + drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c | 1 + drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c | 1 + 5 files changed, 5 insertions(+) (limited to 'drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c') diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c index 01bd45651382..b2e42f1b0f12 100644 --- a/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c @@ -414,6 +414,7 @@ error_alloc: * @pasid: pasid to be flush * @flush_type: the flush type * @all_hub: Used with PACKET3_INVALIDATE_TLBS_ALL_HUB() + * @inst: is used to select which instance of KIQ to use for the invalidation * * Flush the TLB for the requested pasid. */ diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v11_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v11_0.c index 4bf807d825c0..c571f0d95994 100644 --- a/drivers/gpu/drm/amd/amdgpu/gmc_v11_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v11_0.c @@ -319,6 +319,7 @@ static void gmc_v11_0_flush_gpu_tlb(struct amdgpu_device *adev, uint32_t vmid, * @pasid: pasid to be flush * @flush_type: the flush type * @all_hub: flush all hubs + * @inst: is used to select which instance of KIQ to use for the invalidation * * Flush the TLB for the requested pasid. */ diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c index 6f53049619cd..acd2b407860f 100644 --- a/drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c @@ -419,6 +419,7 @@ static int gmc_v7_0_mc_init(struct amdgpu_device *adev) * @pasid: pasid to be flush * @flush_type: type of flush * @all_hub: flush all hubs + * @inst: is used to select which instance of KIQ to use for the invalidation * * Flush the TLB for the requested pasid. */ diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c index 48475077ca92..85dead2a5702 100644 --- a/drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c @@ -617,6 +617,7 @@ static int gmc_v8_0_mc_init(struct amdgpu_device *adev) * @pasid: pasid to be flush * @flush_type: type of flush * @all_hub: flush all hubs + * @inst: is used to select which instance of KIQ to use for the invalidation * * Flush the TLB for the requested pasid. */ diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c index be7823d82150..3ed286b72cae 100644 --- a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c @@ -941,6 +941,7 @@ static void gmc_v9_0_flush_gpu_tlb(struct amdgpu_device *adev, uint32_t vmid, * @pasid: pasid to be flush * @flush_type: the flush type * @all_hub: flush all hubs + * @inst: is used to select which instance of KIQ to use for the invalidation * * Flush the TLB for the requested pasid. */ -- cgit