diff options
author | Dave Airlie <airlied@redhat.com> | 2022-05-19 14:09:46 +1000 |
---|---|---|
committer | Dave Airlie <airlied@redhat.com> | 2022-05-19 14:09:54 +1000 |
commit | 00df0514ab13813655a6fbaba85425f8f4780be2 (patch) | |
tree | 85e9e8908b702575ff4a7e4a58cf36dcca93c204 /drivers/gpu/drm/amd/amdgpu/sdma_v5_0.c | |
parent | f8122500a039abeabfff41b0ad8b6a2c94c1107d (diff) | |
parent | 0223e516470aa0589da6c03e6d177c10594cabbd (diff) |
Merge tag 'amd-drm-next-5.19-2022-05-18' of https://gitlab.freedesktop.org/agd5f/linux into drm-next
amd-drm-next-5.19-2022-05-18:
amdgpu:
- Misc code cleanups
- Additional SMU 13.x enablement
- Smartshift fixes
- GFX11 fixes
- Support for SMU 13.0.4
- SMU mutex fix
- Suspend/resume fix
amdkfd:
- static checker fix
- Doorbell/MMIO resource handling fix
Signed-off-by: Dave Airlie <airlied@redhat.com>
From: Alex Deucher <alexander.deucher@amd.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20220518205621.5741-1-alexander.deucher@amd.com
Diffstat (limited to 'drivers/gpu/drm/amd/amdgpu/sdma_v5_0.c')
-rw-r--r-- | drivers/gpu/drm/amd/amdgpu/sdma_v5_0.c | 169 |
1 files changed, 134 insertions, 35 deletions
diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v5_0.c b/drivers/gpu/drm/amd/amdgpu/sdma_v5_0.c index d3939c5f531d..1f9021f896a1 100644 --- a/drivers/gpu/drm/amd/amdgpu/sdma_v5_0.c +++ b/drivers/gpu/drm/amd/amdgpu/sdma_v5_0.c @@ -347,7 +347,7 @@ static uint64_t sdma_v5_0_ring_get_rptr(struct amdgpu_ring *ring) u64 *rptr; /* XXX check if swapping is necessary on BE */ - rptr = ((u64 *)&ring->adev->wb.wb[ring->rptr_offs]); + rptr = (u64 *)ring->rptr_cpu_addr; DRM_DEBUG("rptr before shift == 0x%016llx\n", *rptr); return ((*rptr) >> 2); @@ -367,7 +367,7 @@ static uint64_t sdma_v5_0_ring_get_wptr(struct amdgpu_ring *ring) if (ring->use_doorbell) { /* XXX check if swapping is necessary on BE */ - wptr = READ_ONCE(*((u64 *)&adev->wb.wb[ring->wptr_offs])); + wptr = READ_ONCE(*((u64 *)ring->wptr_cpu_addr)); DRM_DEBUG("wptr/doorbell before shift == 0x%016llx\n", wptr); } else { wptr = RREG32_SOC15_IP(GC, sdma_v5_0_get_reg_offset(adev, ring->me, mmSDMA0_GFX_RB_WPTR_HI)); @@ -400,8 +400,8 @@ static void sdma_v5_0_ring_set_wptr(struct amdgpu_ring *ring) lower_32_bits(ring->wptr << 2), upper_32_bits(ring->wptr << 2)); /* XXX check if swapping is necessary on BE */ - adev->wb.wb[ring->wptr_offs] = lower_32_bits(ring->wptr << 2); - adev->wb.wb[ring->wptr_offs + 1] = upper_32_bits(ring->wptr << 2); + atomic64_set((atomic64_t *)ring->wptr_cpu_addr, + ring->wptr << 2); DRM_DEBUG("calling WDOORBELL64(0x%08x, 0x%016llx)\n", ring->doorbell_index, ring->wptr << 2); WDOORBELL64(ring->doorbell_index, ring->wptr << 2); @@ -562,9 +562,11 @@ static void sdma_v5_0_ring_emit_fence(struct amdgpu_ring *ring, u64 addr, u64 se } if (flags & AMDGPU_FENCE_FLAG_INT) { + uint32_t ctx = ring->is_mes_queue ? + (ring->hw_queue_id | AMDGPU_FENCE_MES_QUEUE_FLAG) : 0; /* generate an interrupt */ amdgpu_ring_write(ring, SDMA_PKT_HEADER_OP(SDMA_OP_TRAP)); - amdgpu_ring_write(ring, SDMA_PKT_TRAP_INT_CONTEXT_INT_CONTEXT(0)); + amdgpu_ring_write(ring, SDMA_PKT_TRAP_INT_CONTEXT_INT_CONTEXT(ctx)); } } @@ -708,7 +710,6 @@ static int sdma_v5_0_gfx_resume(struct amdgpu_device *adev) struct amdgpu_ring *ring; u32 rb_cntl, ib_cntl; u32 rb_bufsz; - u32 wb_offset; u32 doorbell; u32 doorbell_offset; u32 temp; @@ -718,7 +719,6 @@ static int sdma_v5_0_gfx_resume(struct amdgpu_device *adev) for (i = 0; i < adev->sdma.num_instances; i++) { ring = &adev->sdma.instance[i].ring; - wb_offset = (ring->rptr_offs * 4); if (!amdgpu_sriov_vf(adev)) WREG32(sdma_v5_0_get_reg_offset(adev, i, mmSDMA0_SEM_WAIT_FAIL_TIMER_CNTL), 0); @@ -741,7 +741,7 @@ static int sdma_v5_0_gfx_resume(struct amdgpu_device *adev) WREG32_SOC15_IP(GC, sdma_v5_0_get_reg_offset(adev, i, mmSDMA0_GFX_RB_WPTR_HI), 0); /* setup the wptr shadow polling */ - wptr_gpu_addr = adev->wb.gpu_addr + (ring->wptr_offs * 4); + wptr_gpu_addr = ring->wptr_gpu_addr; WREG32_SOC15_IP(GC, sdma_v5_0_get_reg_offset(adev, i, mmSDMA0_GFX_RB_WPTR_POLL_ADDR_LO), lower_32_bits(wptr_gpu_addr)); WREG32_SOC15_IP(GC, sdma_v5_0_get_reg_offset(adev, i, mmSDMA0_GFX_RB_WPTR_POLL_ADDR_HI), @@ -756,9 +756,9 @@ static int sdma_v5_0_gfx_resume(struct amdgpu_device *adev) /* set the wb address whether it's enabled or not */ WREG32_SOC15_IP(GC, sdma_v5_0_get_reg_offset(adev, i, mmSDMA0_GFX_RB_RPTR_ADDR_HI), - upper_32_bits(adev->wb.gpu_addr + wb_offset) & 0xFFFFFFFF); + upper_32_bits(ring->rptr_gpu_addr) & 0xFFFFFFFF); WREG32_SOC15_IP(GC, sdma_v5_0_get_reg_offset(adev, i, mmSDMA0_GFX_RB_RPTR_ADDR_LO), - lower_32_bits(adev->wb.gpu_addr + wb_offset) & 0xFFFFFFFC); + lower_32_bits(ring->rptr_gpu_addr) & 0xFFFFFFFC); rb_cntl = REG_SET_FIELD(rb_cntl, SDMA0_GFX_RB_CNTL, RPTR_WRITEBACK_ENABLE, 1); @@ -961,6 +961,49 @@ static int sdma_v5_0_start(struct amdgpu_device *adev) return r; } +static int sdma_v5_0_mqd_init(struct amdgpu_device *adev, void *mqd, + struct amdgpu_mqd_prop *prop) +{ + struct v10_sdma_mqd *m = mqd; + uint64_t wb_gpu_addr; + + m->sdmax_rlcx_rb_cntl = + order_base_2(prop->queue_size / 4) << SDMA0_RLC0_RB_CNTL__RB_SIZE__SHIFT | + 1 << SDMA0_RLC0_RB_CNTL__RPTR_WRITEBACK_ENABLE__SHIFT | + 6 << SDMA0_RLC0_RB_CNTL__RPTR_WRITEBACK_TIMER__SHIFT | + 1 << SDMA0_RLC0_RB_CNTL__RB_PRIV__SHIFT; + + m->sdmax_rlcx_rb_base = lower_32_bits(prop->hqd_base_gpu_addr >> 8); + m->sdmax_rlcx_rb_base_hi = upper_32_bits(prop->hqd_base_gpu_addr >> 8); + + m->sdmax_rlcx_rb_wptr_poll_cntl = RREG32(sdma_v5_0_get_reg_offset(adev, 0, + mmSDMA0_GFX_RB_WPTR_POLL_CNTL)); + + wb_gpu_addr = prop->wptr_gpu_addr; + m->sdmax_rlcx_rb_wptr_poll_addr_lo = lower_32_bits(wb_gpu_addr); + m->sdmax_rlcx_rb_wptr_poll_addr_hi = upper_32_bits(wb_gpu_addr); + + wb_gpu_addr = prop->rptr_gpu_addr; + m->sdmax_rlcx_rb_rptr_addr_lo = lower_32_bits(wb_gpu_addr); + m->sdmax_rlcx_rb_rptr_addr_hi = upper_32_bits(wb_gpu_addr); + + m->sdmax_rlcx_ib_cntl = RREG32(sdma_v5_0_get_reg_offset(adev, 0, + mmSDMA0_GFX_IB_CNTL)); + + m->sdmax_rlcx_doorbell_offset = + prop->doorbell_index << SDMA0_RLC0_DOORBELL_OFFSET__OFFSET__SHIFT; + + m->sdmax_rlcx_doorbell = REG_SET_FIELD(0, SDMA0_RLC0_DOORBELL, ENABLE, 1); + + return 0; +} + +static void sdma_v5_0_set_mqd_funcs(struct amdgpu_device *adev) +{ + adev->mqds[AMDGPU_HW_IP_DMA].mqd_size = sizeof(struct v10_sdma_mqd); + adev->mqds[AMDGPU_HW_IP_DMA].init_mqd = sdma_v5_0_mqd_init; +} + /** * sdma_v5_0_ring_test_ring - simple async dma engine test * @@ -978,18 +1021,29 @@ static int sdma_v5_0_ring_test_ring(struct amdgpu_ring *ring) int r; u32 tmp; u64 gpu_addr; + volatile uint32_t *cpu_ptr = NULL; - r = amdgpu_device_wb_get(adev, &index); - if (r) { - dev_err(adev->dev, "(%d) failed to allocate wb slot\n", r); - return r; - } - - gpu_addr = adev->wb.gpu_addr + (index * 4); tmp = 0xCAFEDEAD; - adev->wb.wb[index] = cpu_to_le32(tmp); - r = amdgpu_ring_alloc(ring, 5); + if (ring->is_mes_queue) { + uint32_t offset = 0; + offset = amdgpu_mes_ctx_get_offs(ring, + AMDGPU_MES_CTX_PADDING_OFFS); + gpu_addr = amdgpu_mes_ctx_get_offs_gpu_addr(ring, offset); + cpu_ptr = amdgpu_mes_ctx_get_offs_cpu_addr(ring, offset); + *cpu_ptr = tmp; + } else { + r = amdgpu_device_wb_get(adev, &index); + if (r) { + dev_err(adev->dev, "(%d) failed to allocate wb slot\n", r); + return r; + } + + gpu_addr = adev->wb.gpu_addr + (index * 4); + adev->wb.wb[index] = cpu_to_le32(tmp); + } + + r = amdgpu_ring_alloc(ring, 20); if (r) { DRM_ERROR("amdgpu: dma failed to lock ring %d (%d).\n", ring->idx, r); amdgpu_device_wb_free(adev, index); @@ -1005,7 +1059,10 @@ static int sdma_v5_0_ring_test_ring(struct amdgpu_ring *ring) amdgpu_ring_commit(ring); for (i = 0; i < adev->usec_timeout; i++) { - tmp = le32_to_cpu(adev->wb.wb[index]); + if (ring->is_mes_queue) + tmp = le32_to_cpu(*cpu_ptr); + else + tmp = le32_to_cpu(adev->wb.wb[index]); if (tmp == 0xDEADBEEF) break; if (amdgpu_emu_mode == 1) @@ -1017,7 +1074,8 @@ static int sdma_v5_0_ring_test_ring(struct amdgpu_ring *ring) if (i >= adev->usec_timeout) r = -ETIMEDOUT; - amdgpu_device_wb_free(adev, index); + if (!ring->is_mes_queue) + amdgpu_device_wb_free(adev, index); return r; } @@ -1040,22 +1098,38 @@ static int sdma_v5_0_ring_test_ib(struct amdgpu_ring *ring, long timeout) long r; u32 tmp = 0; u64 gpu_addr; + volatile uint32_t *cpu_ptr = NULL; - r = amdgpu_device_wb_get(adev, &index); - if (r) { - dev_err(adev->dev, "(%ld) failed to allocate wb slot\n", r); - return r; - } - - gpu_addr = adev->wb.gpu_addr + (index * 4); tmp = 0xCAFEDEAD; - adev->wb.wb[index] = cpu_to_le32(tmp); memset(&ib, 0, sizeof(ib)); - r = amdgpu_ib_get(adev, NULL, 256, + + if (ring->is_mes_queue) { + uint32_t offset = 0; + offset = amdgpu_mes_ctx_get_offs(ring, AMDGPU_MES_CTX_IB_OFFS); + ib.gpu_addr = amdgpu_mes_ctx_get_offs_gpu_addr(ring, offset); + ib.ptr = (void *)amdgpu_mes_ctx_get_offs_cpu_addr(ring, offset); + + offset = amdgpu_mes_ctx_get_offs(ring, + AMDGPU_MES_CTX_PADDING_OFFS); + gpu_addr = amdgpu_mes_ctx_get_offs_gpu_addr(ring, offset); + cpu_ptr = amdgpu_mes_ctx_get_offs_cpu_addr(ring, offset); + *cpu_ptr = tmp; + } else { + r = amdgpu_device_wb_get(adev, &index); + if (r) { + dev_err(adev->dev, "(%ld) failed to allocate wb slot\n", r); + return r; + } + + gpu_addr = adev->wb.gpu_addr + (index * 4); + adev->wb.wb[index] = cpu_to_le32(tmp); + + r = amdgpu_ib_get(adev, NULL, 256, AMDGPU_IB_POOL_DIRECT, &ib); - if (r) { - DRM_ERROR("amdgpu: failed to get ib (%ld).\n", r); - goto err0; + if (r) { + DRM_ERROR("amdgpu: failed to get ib (%ld).\n", r); + goto err0; + } } ib.ptr[0] = SDMA_PKT_HEADER_OP(SDMA_OP_WRITE) | @@ -1082,7 +1156,12 @@ static int sdma_v5_0_ring_test_ib(struct amdgpu_ring *ring, long timeout) DRM_ERROR("amdgpu: fence wait failed (%ld).\n", r); goto err1; } - tmp = le32_to_cpu(adev->wb.wb[index]); + + if (ring->is_mes_queue) + tmp = le32_to_cpu(*cpu_ptr); + else + tmp = le32_to_cpu(adev->wb.wb[index]); + if (tmp == 0xDEADBEEF) r = 0; else @@ -1092,7 +1171,8 @@ err1: amdgpu_ib_free(adev, &ib, NULL); dma_fence_put(f); err0: - amdgpu_device_wb_free(adev, index); + if (!ring->is_mes_queue) + amdgpu_device_wb_free(adev, index); return r; } @@ -1291,6 +1371,7 @@ static int sdma_v5_0_early_init(void *handle) sdma_v5_0_set_buffer_funcs(adev); sdma_v5_0_set_vm_pte_funcs(adev); sdma_v5_0_set_irq_funcs(adev); + sdma_v5_0_set_mqd_funcs(adev); return 0; } @@ -1511,7 +1592,25 @@ static int sdma_v5_0_process_trap_irq(struct amdgpu_device *adev, struct amdgpu_irq_src *source, struct amdgpu_iv_entry *entry) { + uint32_t mes_queue_id = entry->src_data[0]; + DRM_DEBUG("IH: SDMA trap\n"); + + if (adev->enable_mes && (mes_queue_id & AMDGPU_FENCE_MES_QUEUE_FLAG)) { + struct amdgpu_mes_queue *queue; + + mes_queue_id &= AMDGPU_FENCE_MES_QUEUE_ID_MASK; + + spin_lock(&adev->mes.queue_id_lock); + queue = idr_find(&adev->mes.queue_id_idr, mes_queue_id); + if (queue) { + DRM_DEBUG("process smda queue id = %d\n", mes_queue_id); + amdgpu_fence_process(queue->ring); + } + spin_unlock(&adev->mes.queue_id_lock); + return 0; + } + switch (entry->client_id) { case SOC15_IH_CLIENTID_SDMA0: switch (entry->ring_id) { |