diff options
Diffstat (limited to 'drivers/gpu/drm/amd/amdgpu/sdma_v5_2.c')
| -rw-r--r-- | drivers/gpu/drm/amd/amdgpu/sdma_v5_2.c | 210 |
1 files changed, 162 insertions, 48 deletions
diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v5_2.c b/drivers/gpu/drm/amd/amdgpu/sdma_v5_2.c index d3d6d5b045b8..83c6ccaaa9e4 100644 --- a/drivers/gpu/drm/amd/amdgpu/sdma_v5_2.c +++ b/drivers/gpu/drm/amd/amdgpu/sdma_v5_2.c @@ -51,6 +51,8 @@ MODULE_FIRMWARE("amdgpu/beige_goby_sdma.bin"); MODULE_FIRMWARE("amdgpu/vangogh_sdma.bin"); MODULE_FIRMWARE("amdgpu/yellow_carp_sdma.bin"); +MODULE_FIRMWARE("amdgpu/sdma_5_2_6.bin"); +MODULE_FIRMWARE("amdgpu/sdma_5_2_7.bin"); #define SDMA1_REG_OFFSET 0x600 #define SDMA3_REG_OFFSET 0x400 @@ -138,28 +140,34 @@ static int sdma_v5_2_init_microcode(struct amdgpu_device *adev) switch (adev->ip_versions[SDMA0_HWIP][0]) { case IP_VERSION(5, 2, 0): - chip_name = "sienna_cichlid"; + chip_name = "sienna_cichlid_sdma"; break; case IP_VERSION(5, 2, 2): - chip_name = "navy_flounder"; + chip_name = "navy_flounder_sdma"; break; case IP_VERSION(5, 2, 1): - chip_name = "vangogh"; + chip_name = "vangogh_sdma"; break; case IP_VERSION(5, 2, 4): - chip_name = "dimgrey_cavefish"; + chip_name = "dimgrey_cavefish_sdma"; break; case IP_VERSION(5, 2, 5): - chip_name = "beige_goby"; + chip_name = "beige_goby_sdma"; break; case IP_VERSION(5, 2, 3): - chip_name = "yellow_carp"; + chip_name = "yellow_carp_sdma"; + break; + case IP_VERSION(5, 2, 6): + chip_name = "sdma_5_2_6"; + break; + case IP_VERSION(5, 2, 7): + chip_name = "sdma_5_2_7"; break; default: BUG(); } - snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_sdma.bin", chip_name); + snprintf(fw_name, sizeof(fw_name), "amdgpu/%s.bin", chip_name); err = request_firmware(&adev->sdma.instance[0].fw, fw_name, adev->dev); if (err) @@ -240,7 +248,7 @@ static uint64_t sdma_v5_2_ring_get_rptr(struct amdgpu_ring *ring) u64 *rptr; /* XXX check if swapping is necessary on BE */ - rptr = ((u64 *)&ring->adev->wb.wb[ring->rptr_offs]); + rptr = (u64 *)ring->rptr_cpu_addr; DRM_DEBUG("rptr before shift == 0x%016llx\n", *rptr); return ((*rptr) >> 2); @@ -260,7 +268,7 @@ static uint64_t sdma_v5_2_ring_get_wptr(struct amdgpu_ring *ring) if (ring->use_doorbell) { /* XXX check if swapping is necessary on BE */ - wptr = READ_ONCE(*((u64 *)&adev->wb.wb[ring->wptr_offs])); + wptr = READ_ONCE(*((u64 *)ring->wptr_cpu_addr)); DRM_DEBUG("wptr/doorbell before shift == 0x%016llx\n", wptr); } else { wptr = RREG32(sdma_v5_2_get_reg_offset(adev, ring->me, mmSDMA0_GFX_RB_WPTR_HI)); @@ -287,14 +295,14 @@ static void sdma_v5_2_ring_set_wptr(struct amdgpu_ring *ring) if (ring->use_doorbell) { DRM_DEBUG("Using doorbell -- " "wptr_offs == 0x%08x " - "lower_32_bits(ring->wptr) << 2 == 0x%08x " - "upper_32_bits(ring->wptr) << 2 == 0x%08x\n", + "lower_32_bits(ring->wptr << 2) == 0x%08x " + "upper_32_bits(ring->wptr << 2) == 0x%08x\n", ring->wptr_offs, lower_32_bits(ring->wptr << 2), upper_32_bits(ring->wptr << 2)); /* XXX check if swapping is necessary on BE */ - adev->wb.wb[ring->wptr_offs] = lower_32_bits(ring->wptr << 2); - adev->wb.wb[ring->wptr_offs + 1] = upper_32_bits(ring->wptr << 2); + atomic64_set((atomic64_t *)ring->wptr_cpu_addr, + ring->wptr << 2); DRM_DEBUG("calling WDOORBELL64(0x%08x, 0x%016llx)\n", ring->doorbell_index, ring->wptr << 2); WDOORBELL64(ring->doorbell_index, ring->wptr << 2); @@ -452,10 +460,12 @@ static void sdma_v5_2_ring_emit_fence(struct amdgpu_ring *ring, u64 addr, u64 se amdgpu_ring_write(ring, upper_32_bits(seq)); } - if (flags & AMDGPU_FENCE_FLAG_INT) { + if ((flags & AMDGPU_FENCE_FLAG_INT)) { + uint32_t ctx = ring->is_mes_queue ? + (ring->hw_queue_id | AMDGPU_FENCE_MES_QUEUE_FLAG) : 0; /* generate an interrupt */ amdgpu_ring_write(ring, SDMA_PKT_HEADER_OP(SDMA_OP_TRAP)); - amdgpu_ring_write(ring, SDMA_PKT_TRAP_INT_CONTEXT_INT_CONTEXT(0)); + amdgpu_ring_write(ring, SDMA_PKT_TRAP_INT_CONTEXT_INT_CONTEXT(ctx)); } } @@ -601,7 +611,6 @@ static int sdma_v5_2_gfx_resume(struct amdgpu_device *adev) struct amdgpu_ring *ring; u32 rb_cntl, ib_cntl; u32 rb_bufsz; - u32 wb_offset; u32 doorbell; u32 doorbell_offset; u32 temp; @@ -611,7 +620,6 @@ static int sdma_v5_2_gfx_resume(struct amdgpu_device *adev) for (i = 0; i < adev->sdma.num_instances; i++) { ring = &adev->sdma.instance[i].ring; - wb_offset = (ring->rptr_offs * 4); if (!amdgpu_sriov_vf(adev)) WREG32_SOC15_IP(GC, sdma_v5_2_get_reg_offset(adev, i, mmSDMA0_SEM_WAIT_FAIL_TIMER_CNTL), 0); @@ -634,7 +642,7 @@ static int sdma_v5_2_gfx_resume(struct amdgpu_device *adev) WREG32_SOC15_IP(GC, sdma_v5_2_get_reg_offset(adev, i, mmSDMA0_GFX_RB_WPTR_HI), 0); /* setup the wptr shadow polling */ - wptr_gpu_addr = adev->wb.gpu_addr + (ring->wptr_offs * 4); + wptr_gpu_addr = ring->wptr_gpu_addr; WREG32_SOC15_IP(GC, sdma_v5_2_get_reg_offset(adev, i, mmSDMA0_GFX_RB_WPTR_POLL_ADDR_LO), lower_32_bits(wptr_gpu_addr)); WREG32_SOC15_IP(GC, sdma_v5_2_get_reg_offset(adev, i, mmSDMA0_GFX_RB_WPTR_POLL_ADDR_HI), @@ -649,9 +657,9 @@ static int sdma_v5_2_gfx_resume(struct amdgpu_device *adev) /* set the wb address whether it's enabled or not */ WREG32_SOC15_IP(GC, sdma_v5_2_get_reg_offset(adev, i, mmSDMA0_GFX_RB_RPTR_ADDR_HI), - upper_32_bits(adev->wb.gpu_addr + wb_offset) & 0xFFFFFFFF); + upper_32_bits(ring->rptr_gpu_addr) & 0xFFFFFFFF); WREG32_SOC15_IP(GC, sdma_v5_2_get_reg_offset(adev, i, mmSDMA0_GFX_RB_RPTR_ADDR_LO), - lower_32_bits(adev->wb.gpu_addr + wb_offset) & 0xFFFFFFFC); + lower_32_bits(ring->rptr_gpu_addr) & 0xFFFFFFFC); rb_cntl = REG_SET_FIELD(rb_cntl, SDMA0_GFX_RB_CNTL, RPTR_WRITEBACK_ENABLE, 1); @@ -664,8 +672,8 @@ static int sdma_v5_2_gfx_resume(struct amdgpu_device *adev) WREG32_SOC15_IP(GC, sdma_v5_2_get_reg_offset(adev, i, mmSDMA0_GFX_MINOR_PTR_UPDATE), 1); if (!amdgpu_sriov_vf(adev)) { /* only bare-metal use register write for wptr */ - WREG32(sdma_v5_2_get_reg_offset(adev, i, mmSDMA0_GFX_RB_WPTR), lower_32_bits(ring->wptr) << 2); - WREG32(sdma_v5_2_get_reg_offset(adev, i, mmSDMA0_GFX_RB_WPTR_HI), upper_32_bits(ring->wptr) << 2); + WREG32(sdma_v5_2_get_reg_offset(adev, i, mmSDMA0_GFX_RB_WPTR), lower_32_bits(ring->wptr << 2)); + WREG32(sdma_v5_2_get_reg_offset(adev, i, mmSDMA0_GFX_RB_WPTR_HI), upper_32_bits(ring->wptr << 2)); } doorbell = RREG32_SOC15_IP(GC, sdma_v5_2_get_reg_offset(adev, i, mmSDMA0_GFX_DOORBELL)); @@ -897,6 +905,49 @@ static int sdma_v5_2_start(struct amdgpu_device *adev) return r; } +static int sdma_v5_2_mqd_init(struct amdgpu_device *adev, void *mqd, + struct amdgpu_mqd_prop *prop) +{ + struct v10_sdma_mqd *m = mqd; + uint64_t wb_gpu_addr; + + m->sdmax_rlcx_rb_cntl = + order_base_2(prop->queue_size / 4) << SDMA0_RLC0_RB_CNTL__RB_SIZE__SHIFT | + 1 << SDMA0_RLC0_RB_CNTL__RPTR_WRITEBACK_ENABLE__SHIFT | + 6 << SDMA0_RLC0_RB_CNTL__RPTR_WRITEBACK_TIMER__SHIFT | + 1 << SDMA0_RLC0_RB_CNTL__RB_PRIV__SHIFT; + + m->sdmax_rlcx_rb_base = lower_32_bits(prop->hqd_base_gpu_addr >> 8); + m->sdmax_rlcx_rb_base_hi = upper_32_bits(prop->hqd_base_gpu_addr >> 8); + + m->sdmax_rlcx_rb_wptr_poll_cntl = RREG32(sdma_v5_2_get_reg_offset(adev, 0, + mmSDMA0_GFX_RB_WPTR_POLL_CNTL)); + + wb_gpu_addr = prop->wptr_gpu_addr; + m->sdmax_rlcx_rb_wptr_poll_addr_lo = lower_32_bits(wb_gpu_addr); + m->sdmax_rlcx_rb_wptr_poll_addr_hi = upper_32_bits(wb_gpu_addr); + + wb_gpu_addr = prop->rptr_gpu_addr; + m->sdmax_rlcx_rb_rptr_addr_lo = lower_32_bits(wb_gpu_addr); + m->sdmax_rlcx_rb_rptr_addr_hi = upper_32_bits(wb_gpu_addr); + + m->sdmax_rlcx_ib_cntl = RREG32(sdma_v5_2_get_reg_offset(adev, 0, + mmSDMA0_GFX_IB_CNTL)); + + m->sdmax_rlcx_doorbell_offset = + prop->doorbell_index << SDMA0_RLC0_DOORBELL_OFFSET__OFFSET__SHIFT; + + m->sdmax_rlcx_doorbell = REG_SET_FIELD(0, SDMA0_RLC0_DOORBELL, ENABLE, 1); + + return 0; +} + +static void sdma_v5_2_set_mqd_funcs(struct amdgpu_device *adev) +{ + adev->mqds[AMDGPU_HW_IP_DMA].mqd_size = sizeof(struct v10_sdma_mqd); + adev->mqds[AMDGPU_HW_IP_DMA].init_mqd = sdma_v5_2_mqd_init; +} + /** * sdma_v5_2_ring_test_ring - simple async dma engine test * @@ -914,18 +965,29 @@ static int sdma_v5_2_ring_test_ring(struct amdgpu_ring *ring) int r; u32 tmp; u64 gpu_addr; + volatile uint32_t *cpu_ptr = NULL; - r = amdgpu_device_wb_get(adev, &index); - if (r) { - dev_err(adev->dev, "(%d) failed to allocate wb slot\n", r); - return r; - } - - gpu_addr = adev->wb.gpu_addr + (index * 4); tmp = 0xCAFEDEAD; - adev->wb.wb[index] = cpu_to_le32(tmp); - r = amdgpu_ring_alloc(ring, 5); + if (ring->is_mes_queue) { + uint32_t offset = 0; + offset = amdgpu_mes_ctx_get_offs(ring, + AMDGPU_MES_CTX_PADDING_OFFS); + gpu_addr = amdgpu_mes_ctx_get_offs_gpu_addr(ring, offset); + cpu_ptr = amdgpu_mes_ctx_get_offs_cpu_addr(ring, offset); + *cpu_ptr = tmp; + } else { + r = amdgpu_device_wb_get(adev, &index); + if (r) { + dev_err(adev->dev, "(%d) failed to allocate wb slot\n", r); + return r; + } + + gpu_addr = adev->wb.gpu_addr + (index * 4); + adev->wb.wb[index] = cpu_to_le32(tmp); + } + + r = amdgpu_ring_alloc(ring, 20); if (r) { DRM_ERROR("amdgpu: dma failed to lock ring %d (%d).\n", ring->idx, r); amdgpu_device_wb_free(adev, index); @@ -941,7 +1003,10 @@ static int sdma_v5_2_ring_test_ring(struct amdgpu_ring *ring) amdgpu_ring_commit(ring); for (i = 0; i < adev->usec_timeout; i++) { - tmp = le32_to_cpu(adev->wb.wb[index]); + if (ring->is_mes_queue) + tmp = le32_to_cpu(*cpu_ptr); + else + tmp = le32_to_cpu(adev->wb.wb[index]); if (tmp == 0xDEADBEEF) break; if (amdgpu_emu_mode == 1) @@ -953,7 +1018,8 @@ static int sdma_v5_2_ring_test_ring(struct amdgpu_ring *ring) if (i >= adev->usec_timeout) r = -ETIMEDOUT; - amdgpu_device_wb_free(adev, index); + if (!ring->is_mes_queue) + amdgpu_device_wb_free(adev, index); return r; } @@ -976,21 +1042,37 @@ static int sdma_v5_2_ring_test_ib(struct amdgpu_ring *ring, long timeout) long r; u32 tmp = 0; u64 gpu_addr; + volatile uint32_t *cpu_ptr = NULL; - r = amdgpu_device_wb_get(adev, &index); - if (r) { - dev_err(adev->dev, "(%ld) failed to allocate wb slot\n", r); - return r; - } - - gpu_addr = adev->wb.gpu_addr + (index * 4); tmp = 0xCAFEDEAD; - adev->wb.wb[index] = cpu_to_le32(tmp); memset(&ib, 0, sizeof(ib)); - r = amdgpu_ib_get(adev, NULL, 256, AMDGPU_IB_POOL_DIRECT, &ib); - if (r) { - DRM_ERROR("amdgpu: failed to get ib (%ld).\n", r); - goto err0; + + if (ring->is_mes_queue) { + uint32_t offset = 0; + offset = amdgpu_mes_ctx_get_offs(ring, AMDGPU_MES_CTX_IB_OFFS); + ib.gpu_addr = amdgpu_mes_ctx_get_offs_gpu_addr(ring, offset); + ib.ptr = (void *)amdgpu_mes_ctx_get_offs_cpu_addr(ring, offset); + + offset = amdgpu_mes_ctx_get_offs(ring, + AMDGPU_MES_CTX_PADDING_OFFS); + gpu_addr = amdgpu_mes_ctx_get_offs_gpu_addr(ring, offset); + cpu_ptr = amdgpu_mes_ctx_get_offs_cpu_addr(ring, offset); + *cpu_ptr = tmp; + } else { + r = amdgpu_device_wb_get(adev, &index); + if (r) { + dev_err(adev->dev, "(%ld) failed to allocate wb slot\n", r); + return r; + } + + gpu_addr = adev->wb.gpu_addr + (index * 4); + adev->wb.wb[index] = cpu_to_le32(tmp); + + r = amdgpu_ib_get(adev, NULL, 256, AMDGPU_IB_POOL_DIRECT, &ib); + if (r) { + DRM_ERROR("amdgpu: failed to get ib (%ld).\n", r); + goto err0; + } } ib.ptr[0] = SDMA_PKT_HEADER_OP(SDMA_OP_WRITE) | @@ -1017,7 +1099,12 @@ static int sdma_v5_2_ring_test_ib(struct amdgpu_ring *ring, long timeout) DRM_ERROR("amdgpu: fence wait failed (%ld).\n", r); goto err1; } - tmp = le32_to_cpu(adev->wb.wb[index]); + + if (ring->is_mes_queue) + tmp = le32_to_cpu(*cpu_ptr); + else + tmp = le32_to_cpu(adev->wb.wb[index]); + if (tmp == 0xDEADBEEF) r = 0; else @@ -1027,7 +1114,8 @@ err1: amdgpu_ib_free(adev, &ib, NULL); dma_fence_put(f); err0: - amdgpu_device_wb_free(adev, index); + if (!ring->is_mes_queue) + amdgpu_device_wb_free(adev, index); return r; } @@ -1227,6 +1315,7 @@ static int sdma_v5_2_early_init(void *handle) sdma_v5_2_set_buffer_funcs(adev); sdma_v5_2_set_vm_pte_funcs(adev); sdma_v5_2_set_irq_funcs(adev); + sdma_v5_2_set_mqd_funcs(adev); return 0; } @@ -1460,7 +1549,25 @@ static int sdma_v5_2_process_trap_irq(struct amdgpu_device *adev, struct amdgpu_irq_src *source, struct amdgpu_iv_entry *entry) { + uint32_t mes_queue_id = entry->src_data[0]; + DRM_DEBUG("IH: SDMA trap\n"); + + if (adev->enable_mes && (mes_queue_id & AMDGPU_FENCE_MES_QUEUE_FLAG)) { + struct amdgpu_mes_queue *queue; + + mes_queue_id &= AMDGPU_FENCE_MES_QUEUE_ID_MASK; + + spin_lock(&adev->mes.queue_id_lock); + queue = idr_find(&adev->mes.queue_id_idr, mes_queue_id); + if (queue) { + DRM_DEBUG("process smda queue id = %d\n", mes_queue_id); + amdgpu_fence_process(queue->ring); + } + spin_unlock(&adev->mes.queue_id_lock); + return 0; + } + switch (entry->client_id) { case SOC15_IH_CLIENTID_SDMA0: switch (entry->ring_id) { @@ -1617,6 +1724,7 @@ static int sdma_v5_2_set_clockgating_state(void *handle, case IP_VERSION(5, 2, 1): case IP_VERSION(5, 2, 4): case IP_VERSION(5, 2, 5): + case IP_VERSION(5, 2, 6): case IP_VERSION(5, 2, 3): sdma_v5_2_update_medium_grain_clock_gating(adev, state == AMD_CG_STATE_GATE); @@ -1636,7 +1744,7 @@ static int sdma_v5_2_set_powergating_state(void *handle, return 0; } -static void sdma_v5_2_get_clockgating_state(void *handle, u32 *flags) +static void sdma_v5_2_get_clockgating_state(void *handle, u64 *flags) { struct amdgpu_device *adev = (struct amdgpu_device *)handle; int data; @@ -1644,6 +1752,11 @@ static void sdma_v5_2_get_clockgating_state(void *handle, u32 *flags) if (amdgpu_sriov_vf(adev)) *flags = 0; + /* AMD_CG_SUPPORT_SDMA_MGCG */ + data = RREG32(sdma_v5_2_get_reg_offset(adev, 0, mmSDMA0_CLK_CTRL)); + if (!(data & SDMA0_CLK_CTRL__CGCG_EN_OVERRIDE_MASK)) + *flags |= AMD_CG_SUPPORT_SDMA_MGCG; + /* AMD_CG_SUPPORT_SDMA_LS */ data = RREG32_KIQ(sdma_v5_2_get_reg_offset(adev, 0, mmSDMA0_POWER_CNTL)); if (data & SDMA0_POWER_CNTL__MEM_POWER_OVERRIDE_MASK) @@ -1673,6 +1786,7 @@ static const struct amdgpu_ring_funcs sdma_v5_2_ring_funcs = { .align_mask = 0xf, .nop = SDMA_PKT_NOP_HEADER_OP(SDMA_OP_NOP), .support_64bit_ptrs = true, + .secure_submission_supported = true, .vmhub = AMDGPU_GFXHUB_0, .get_rptr = sdma_v5_2_ring_get_rptr, .get_wptr = sdma_v5_2_ring_get_wptr, |