aboutsummaryrefslogtreecommitdiff
path: root/drivers/gpu/drm/amd/amdgpu/cik_sdma.c
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/gpu/drm/amd/amdgpu/cik_sdma.c')
-rw-r--r--drivers/gpu/drm/amd/amdgpu/cik_sdma.c206
1 files changed, 96 insertions, 110 deletions
diff --git a/drivers/gpu/drm/amd/amdgpu/cik_sdma.c b/drivers/gpu/drm/amd/amdgpu/cik_sdma.c
index 518dca43b133..cb952acc7133 100644
--- a/drivers/gpu/drm/amd/amdgpu/cik_sdma.c
+++ b/drivers/gpu/drm/amd/amdgpu/cik_sdma.c
@@ -52,6 +52,7 @@ static void cik_sdma_set_ring_funcs(struct amdgpu_device *adev);
static void cik_sdma_set_irq_funcs(struct amdgpu_device *adev);
static void cik_sdma_set_buffer_funcs(struct amdgpu_device *adev);
static void cik_sdma_set_vm_pte_funcs(struct amdgpu_device *adev);
+static int cik_sdma_soft_reset(void *handle);
MODULE_FIRMWARE("radeon/bonaire_sdma.bin");
MODULE_FIRMWARE("radeon/bonaire_sdma1.bin");
@@ -66,6 +67,16 @@ MODULE_FIRMWARE("radeon/mullins_sdma1.bin");
u32 amdgpu_cik_gpu_check_soft_reset(struct amdgpu_device *adev);
+
+static void cik_sdma_free_microcode(struct amdgpu_device *adev)
+{
+ int i;
+ for (i = 0; i < adev->sdma.num_instances; i++) {
+ release_firmware(adev->sdma.instance[i].fw);
+ adev->sdma.instance[i].fw = NULL;
+ }
+}
+
/*
* sDMA - System DMA
* Starting with CIK, the GPU has new asynchronous
@@ -214,17 +225,6 @@ static void cik_sdma_ring_emit_ib(struct amdgpu_ring *ring,
unsigned vm_id, bool ctx_switch)
{
u32 extra_bits = vm_id & 0xf;
- u32 next_rptr = ring->wptr + 5;
-
- while ((next_rptr & 7) != 4)
- next_rptr++;
-
- next_rptr += 4;
- amdgpu_ring_write(ring, SDMA_PACKET(SDMA_OPCODE_WRITE, SDMA_WRITE_SUB_OPCODE_LINEAR, 0));
- amdgpu_ring_write(ring, ring->next_rptr_gpu_addr & 0xfffffffc);
- amdgpu_ring_write(ring, upper_32_bits(ring->next_rptr_gpu_addr) & 0xffffffff);
- amdgpu_ring_write(ring, 1); /* number of DWs to follow */
- amdgpu_ring_write(ring, next_rptr);
/* IB packet must end on a 8 DW boundary */
cik_sdma_ring_insert_nop(ring, (12 - (ring->wptr & 7)) % 8);
@@ -355,7 +355,7 @@ static void cik_sdma_enable(struct amdgpu_device *adev, bool enable)
u32 me_cntl;
int i;
- if (enable == false) {
+ if (!enable) {
cik_sdma_gfx_stop(adev);
cik_sdma_rlc_stop(adev);
}
@@ -419,6 +419,8 @@ static int cik_sdma_gfx_resume(struct amdgpu_device *adev)
/* Initialize the ring buffer's read and write pointers */
WREG32(mmSDMA0_GFX_RB_RPTR + sdma_offsets[i], 0);
WREG32(mmSDMA0_GFX_RB_WPTR + sdma_offsets[i], 0);
+ WREG32(mmSDMA0_GFX_IB_RPTR + sdma_offsets[i], 0);
+ WREG32(mmSDMA0_GFX_IB_OFFSET + sdma_offsets[i], 0);
/* set the wb address whether it's enabled or not */
WREG32(mmSDMA0_GFX_RB_RPTR_ADDR_HI + sdma_offsets[i],
@@ -446,7 +448,12 @@ static int cik_sdma_gfx_resume(struct amdgpu_device *adev)
WREG32(mmSDMA0_GFX_IB_CNTL + sdma_offsets[i], ib_cntl);
ring->ready = true;
+ }
+ cik_sdma_enable(adev, true);
+
+ for (i = 0; i < adev->sdma.num_instances; i++) {
+ ring = &adev->sdma.instance[i].ring;
r = amdgpu_ring_test_ring(ring);
if (r) {
ring->ready = false;
@@ -529,8 +536,8 @@ static int cik_sdma_start(struct amdgpu_device *adev)
if (r)
return r;
- /* unhalt the MEs */
- cik_sdma_enable(adev, true);
+ /* halt the engine before programing */
+ cik_sdma_enable(adev, false);
/* start the gfx rings and rlc compute queues */
r = cik_sdma_gfx_resume(adev);
@@ -611,20 +618,19 @@ static int cik_sdma_ring_test_ring(struct amdgpu_ring *ring)
* Test a simple IB in the DMA ring (CIK).
* Returns 0 on success, error on failure.
*/
-static int cik_sdma_ring_test_ib(struct amdgpu_ring *ring)
+static int cik_sdma_ring_test_ib(struct amdgpu_ring *ring, long timeout)
{
struct amdgpu_device *adev = ring->adev;
struct amdgpu_ib ib;
struct fence *f = NULL;
- unsigned i;
unsigned index;
- int r;
u32 tmp = 0;
u64 gpu_addr;
+ long r;
r = amdgpu_wb_get(adev, &index);
if (r) {
- dev_err(adev->dev, "(%d) failed to allocate wb slot\n", r);
+ dev_err(adev->dev, "(%ld) failed to allocate wb slot\n", r);
return r;
}
@@ -634,11 +640,12 @@ static int cik_sdma_ring_test_ib(struct amdgpu_ring *ring)
memset(&ib, 0, sizeof(ib));
r = amdgpu_ib_get(adev, NULL, 256, &ib);
if (r) {
- DRM_ERROR("amdgpu: failed to get ib (%d).\n", r);
+ DRM_ERROR("amdgpu: failed to get ib (%ld).\n", r);
goto err0;
}
- ib.ptr[0] = SDMA_PACKET(SDMA_OPCODE_WRITE, SDMA_WRITE_SUB_OPCODE_LINEAR, 0);
+ ib.ptr[0] = SDMA_PACKET(SDMA_OPCODE_WRITE,
+ SDMA_WRITE_SUB_OPCODE_LINEAR, 0);
ib.ptr[1] = lower_32_bits(gpu_addr);
ib.ptr[2] = upper_32_bits(gpu_addr);
ib.ptr[3] = 1;
@@ -648,28 +655,25 @@ static int cik_sdma_ring_test_ib(struct amdgpu_ring *ring)
if (r)
goto err1;
- r = fence_wait(f, false);
- if (r) {
- DRM_ERROR("amdgpu: fence wait failed (%d).\n", r);
+ r = fence_wait_timeout(f, false, timeout);
+ if (r == 0) {
+ DRM_ERROR("amdgpu: IB test timed out\n");
+ r = -ETIMEDOUT;
goto err1;
- }
- for (i = 0; i < adev->usec_timeout; i++) {
- tmp = le32_to_cpu(adev->wb.wb[index]);
- if (tmp == 0xDEADBEEF)
- break;
- DRM_UDELAY(1);
- }
- if (i < adev->usec_timeout) {
- DRM_INFO("ib test on ring %d succeeded in %u usecs\n",
- ring->idx, i);
+ } else if (r < 0) {
+ DRM_ERROR("amdgpu: fence wait failed (%ld).\n", r);
goto err1;
+ }
+ tmp = le32_to_cpu(adev->wb.wb[index]);
+ if (tmp == 0xDEADBEEF) {
+ DRM_INFO("ib test on ring %d succeeded\n", ring->idx);
+ r = 0;
} else {
DRM_ERROR("amdgpu: ib test failed (0x%08X)\n", tmp);
r = -EINVAL;
}
err1:
- fence_put(f);
amdgpu_ib_free(adev, &ib, NULL);
fence_put(f);
err0:
@@ -691,24 +695,16 @@ static void cik_sdma_vm_copy_pte(struct amdgpu_ib *ib,
uint64_t pe, uint64_t src,
unsigned count)
{
- while (count) {
- unsigned bytes = count * 8;
- if (bytes > 0x1FFFF8)
- bytes = 0x1FFFF8;
-
- ib->ptr[ib->length_dw++] = SDMA_PACKET(SDMA_OPCODE_COPY,
- SDMA_WRITE_SUB_OPCODE_LINEAR, 0);
- ib->ptr[ib->length_dw++] = bytes;
- ib->ptr[ib->length_dw++] = 0; /* src/dst endian swap */
- ib->ptr[ib->length_dw++] = lower_32_bits(src);
- ib->ptr[ib->length_dw++] = upper_32_bits(src);
- ib->ptr[ib->length_dw++] = lower_32_bits(pe);
- ib->ptr[ib->length_dw++] = upper_32_bits(pe);
-
- pe += bytes;
- src += bytes;
- count -= bytes / 8;
- }
+ unsigned bytes = count * 8;
+
+ ib->ptr[ib->length_dw++] = SDMA_PACKET(SDMA_OPCODE_COPY,
+ SDMA_WRITE_SUB_OPCODE_LINEAR, 0);
+ ib->ptr[ib->length_dw++] = bytes;
+ ib->ptr[ib->length_dw++] = 0; /* src/dst endian swap */
+ ib->ptr[ib->length_dw++] = lower_32_bits(src);
+ ib->ptr[ib->length_dw++] = upper_32_bits(src);
+ ib->ptr[ib->length_dw++] = lower_32_bits(pe);
+ ib->ptr[ib->length_dw++] = upper_32_bits(pe);
}
/**
@@ -716,39 +712,27 @@ static void cik_sdma_vm_copy_pte(struct amdgpu_ib *ib,
*
* @ib: indirect buffer to fill with commands
* @pe: addr of the page entry
- * @addr: dst addr to write into pe
+ * @value: dst addr to write into pe
* @count: number of page entries to update
* @incr: increase next addr by incr bytes
- * @flags: access flags
*
* Update PTEs by writing them manually using sDMA (CIK).
*/
-static void cik_sdma_vm_write_pte(struct amdgpu_ib *ib,
- const dma_addr_t *pages_addr, uint64_t pe,
- uint64_t addr, unsigned count,
- uint32_t incr, uint32_t flags)
+static void cik_sdma_vm_write_pte(struct amdgpu_ib *ib, uint64_t pe,
+ uint64_t value, unsigned count,
+ uint32_t incr)
{
- uint64_t value;
- unsigned ndw;
-
- while (count) {
- ndw = count * 2;
- if (ndw > 0xFFFFE)
- ndw = 0xFFFFE;
-
- /* for non-physically contiguous pages (system) */
- ib->ptr[ib->length_dw++] = SDMA_PACKET(SDMA_OPCODE_WRITE,
- SDMA_WRITE_SUB_OPCODE_LINEAR, 0);
- ib->ptr[ib->length_dw++] = pe;
- ib->ptr[ib->length_dw++] = upper_32_bits(pe);
- ib->ptr[ib->length_dw++] = ndw;
- for (; ndw > 0; ndw -= 2, --count, pe += 8) {
- value = amdgpu_vm_map_gart(pages_addr, addr);
- addr += incr;
- value |= flags;
- ib->ptr[ib->length_dw++] = value;
- ib->ptr[ib->length_dw++] = upper_32_bits(value);
- }
+ unsigned ndw = count * 2;
+
+ ib->ptr[ib->length_dw++] = SDMA_PACKET(SDMA_OPCODE_WRITE,
+ SDMA_WRITE_SUB_OPCODE_LINEAR, 0);
+ ib->ptr[ib->length_dw++] = lower_32_bits(pe);
+ ib->ptr[ib->length_dw++] = upper_32_bits(pe);
+ ib->ptr[ib->length_dw++] = ndw;
+ for (; ndw > 0; ndw -= 2) {
+ ib->ptr[ib->length_dw++] = lower_32_bits(value);
+ ib->ptr[ib->length_dw++] = upper_32_bits(value);
+ value += incr;
}
}
@@ -764,40 +748,21 @@ static void cik_sdma_vm_write_pte(struct amdgpu_ib *ib,
*
* Update the page tables using sDMA (CIK).
*/
-static void cik_sdma_vm_set_pte_pde(struct amdgpu_ib *ib,
- uint64_t pe,
+static void cik_sdma_vm_set_pte_pde(struct amdgpu_ib *ib, uint64_t pe,
uint64_t addr, unsigned count,
uint32_t incr, uint32_t flags)
{
- uint64_t value;
- unsigned ndw;
-
- while (count) {
- ndw = count;
- if (ndw > 0x7FFFF)
- ndw = 0x7FFFF;
-
- if (flags & AMDGPU_PTE_VALID)
- value = addr;
- else
- value = 0;
-
- /* for physically contiguous pages (vram) */
- ib->ptr[ib->length_dw++] = SDMA_PACKET(SDMA_OPCODE_GENERATE_PTE_PDE, 0, 0);
- ib->ptr[ib->length_dw++] = pe; /* dst addr */
- ib->ptr[ib->length_dw++] = upper_32_bits(pe);
- ib->ptr[ib->length_dw++] = flags; /* mask */
- ib->ptr[ib->length_dw++] = 0;
- ib->ptr[ib->length_dw++] = value; /* value */
- ib->ptr[ib->length_dw++] = upper_32_bits(value);
- ib->ptr[ib->length_dw++] = incr; /* increment size */
- ib->ptr[ib->length_dw++] = 0;
- ib->ptr[ib->length_dw++] = ndw; /* number of entries */
-
- pe += ndw * 8;
- addr += ndw * incr;
- count -= ndw;
- }
+ /* for physically contiguous pages (vram) */
+ ib->ptr[ib->length_dw++] = SDMA_PACKET(SDMA_OPCODE_GENERATE_PTE_PDE, 0, 0);
+ ib->ptr[ib->length_dw++] = lower_32_bits(pe); /* dst addr */
+ ib->ptr[ib->length_dw++] = upper_32_bits(pe);
+ ib->ptr[ib->length_dw++] = flags; /* mask */
+ ib->ptr[ib->length_dw++] = 0;
+ ib->ptr[ib->length_dw++] = lower_32_bits(addr); /* value */
+ ib->ptr[ib->length_dw++] = upper_32_bits(addr);
+ ib->ptr[ib->length_dw++] = incr; /* increment size */
+ ib->ptr[ib->length_dw++] = 0;
+ ib->ptr[ib->length_dw++] = count; /* number of entries */
}
/**
@@ -883,6 +848,22 @@ static void cik_sdma_ring_emit_vm_flush(struct amdgpu_ring *ring,
amdgpu_ring_write(ring, (0xfff << 16) | 10); /* retry count, poll interval */
}
+static unsigned cik_sdma_ring_get_emit_ib_size(struct amdgpu_ring *ring)
+{
+ return
+ 7 + 4; /* cik_sdma_ring_emit_ib */
+}
+
+static unsigned cik_sdma_ring_get_dma_frame_size(struct amdgpu_ring *ring)
+{
+ return
+ 6 + /* cik_sdma_ring_emit_hdp_flush */
+ 3 + /* cik_sdma_ring_emit_hdp_invalidate */
+ 6 + /* cik_sdma_ring_emit_pipeline_sync */
+ 12 + /* cik_sdma_ring_emit_vm_flush */
+ 9 + 9 + 9; /* cik_sdma_ring_emit_fence x3 for user fence, vm fence */
+}
+
static void cik_enable_sdma_mgcg(struct amdgpu_device *adev,
bool enable)
{
@@ -998,6 +979,7 @@ static int cik_sdma_sw_fini(void *handle)
for (i = 0; i < adev->sdma.num_instances; i++)
amdgpu_ring_fini(&adev->sdma.instance[i].ring);
+ cik_sdma_free_microcode(adev);
return 0;
}
@@ -1033,6 +1015,8 @@ static int cik_sdma_resume(void *handle)
{
struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ cik_sdma_soft_reset(handle);
+
return cik_sdma_hw_init(adev);
}
@@ -1255,6 +1239,8 @@ static const struct amdgpu_ring_funcs cik_sdma_ring_funcs = {
.test_ib = cik_sdma_ring_test_ib,
.insert_nop = cik_sdma_ring_insert_nop,
.pad_ib = cik_sdma_ring_pad_ib,
+ .get_emit_ib_size = cik_sdma_ring_get_emit_ib_size,
+ .get_dma_frame_size = cik_sdma_ring_get_dma_frame_size,
};
static void cik_sdma_set_ring_funcs(struct amdgpu_device *adev)