From 6e7a3840745c950c37d37cbb0af2e753a765d4ec Mon Sep 17 00:00:00 2001 From: Chunming Zhou Date: Thu, 27 Aug 2015 13:46:09 +0800 Subject: drm/amdgpu: use IB for fill_buffer instead of direct command Signed-off-by: Chunming Zhou Reviewed-by: Christian K?nig Reviewed-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/cik_sdma.c | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) (limited to 'drivers/gpu/drm/amd/amdgpu/cik_sdma.c') diff --git a/drivers/gpu/drm/amd/amdgpu/cik_sdma.c b/drivers/gpu/drm/amd/amdgpu/cik_sdma.c index 3920c1e346f8..c1e782952bd7 100644 --- a/drivers/gpu/drm/amd/amdgpu/cik_sdma.c +++ b/drivers/gpu/drm/amd/amdgpu/cik_sdma.c @@ -1363,16 +1363,16 @@ static void cik_sdma_emit_copy_buffer(struct amdgpu_ib *ib, * * Fill GPU buffers using the DMA engine (CIK). */ -static void cik_sdma_emit_fill_buffer(struct amdgpu_ring *ring, +static void cik_sdma_emit_fill_buffer(struct amdgpu_ib *ib, uint32_t src_data, uint64_t dst_offset, uint32_t byte_count) { - amdgpu_ring_write(ring, SDMA_PACKET(SDMA_OPCODE_CONSTANT_FILL, 0, 0)); - amdgpu_ring_write(ring, lower_32_bits(dst_offset)); - amdgpu_ring_write(ring, upper_32_bits(dst_offset)); - amdgpu_ring_write(ring, src_data); - amdgpu_ring_write(ring, byte_count); + ib->ptr[ib->length_dw++] = SDMA_PACKET(SDMA_OPCODE_CONSTANT_FILL, 0, 0); + ib->ptr[ib->length_dw++] = lower_32_bits(dst_offset); + ib->ptr[ib->length_dw++] = upper_32_bits(dst_offset); + ib->ptr[ib->length_dw++] = src_data; + ib->ptr[ib->length_dw++] = byte_count; } static const struct amdgpu_buffer_funcs cik_sdma_buffer_funcs = { -- cgit From 18111de0dfc38c582c4348af3bda5d3331d35012 Mon Sep 17 00:00:00 2001 From: Jammy Zhou Date: Mon, 31 Aug 2015 14:06:39 +0800 Subject: drm/amdgpu: add burst_nop flag for sdma The burst NOP is supported for SDMA when feature_version is >= 20. Signed-off-by: Jammy Zhou Reviewed-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu.h | 1 + drivers/gpu/drm/amd/amdgpu/cik_sdma.c | 2 ++ drivers/gpu/drm/amd/amdgpu/sdma_v2_4.c | 2 ++ drivers/gpu/drm/amd/amdgpu/sdma_v3_0.c | 2 ++ 4 files changed, 7 insertions(+) (limited to 'drivers/gpu/drm/amd/amdgpu/cik_sdma.c') diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h index b66938dbb5cf..34812eccd7d0 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h @@ -1708,6 +1708,7 @@ struct amdgpu_sdma { uint32_t feature_version; struct amdgpu_ring ring; + bool burst_nop; }; /* diff --git a/drivers/gpu/drm/amd/amdgpu/cik_sdma.c b/drivers/gpu/drm/amd/amdgpu/cik_sdma.c index c1e782952bd7..cc909c9cee63 100644 --- a/drivers/gpu/drm/amd/amdgpu/cik_sdma.c +++ b/drivers/gpu/drm/amd/amdgpu/cik_sdma.c @@ -501,6 +501,8 @@ static int cik_sdma_load_microcode(struct amdgpu_device *adev) fw_size = le32_to_cpu(hdr->header.ucode_size_bytes) / 4; adev->sdma[i].fw_version = le32_to_cpu(hdr->header.ucode_version); adev->sdma[i].feature_version = le32_to_cpu(hdr->ucode_feature_version); + if (adev->sdma[i].feature_version >= 20) + adev->sdma[i].burst_nop = true; fw_data = (const __le32 *) (adev->sdma[i].fw->data + le32_to_cpu(hdr->header.ucode_array_offset_bytes)); WREG32(mmSDMA0_UCODE_ADDR + sdma_offsets[i], 0); diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v2_4.c b/drivers/gpu/drm/amd/amdgpu/sdma_v2_4.c index 2f3948c09081..2457bf3e3a1e 100644 --- a/drivers/gpu/drm/amd/amdgpu/sdma_v2_4.c +++ b/drivers/gpu/drm/amd/amdgpu/sdma_v2_4.c @@ -146,6 +146,8 @@ static int sdma_v2_4_init_microcode(struct amdgpu_device *adev) hdr = (const struct sdma_firmware_header_v1_0 *)adev->sdma[i].fw->data; adev->sdma[i].fw_version = le32_to_cpu(hdr->header.ucode_version); adev->sdma[i].feature_version = le32_to_cpu(hdr->ucode_feature_version); + if (adev->sdma[i].feature_version >= 20) + adev->sdma[i].burst_nop = true; if (adev->firmware.smu_load) { info = &adev->firmware.ucode[AMDGPU_UCODE_ID_SDMA0 + i]; diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v3_0.c b/drivers/gpu/drm/amd/amdgpu/sdma_v3_0.c index a9d9607e8d91..70b2f03c5712 100644 --- a/drivers/gpu/drm/amd/amdgpu/sdma_v3_0.c +++ b/drivers/gpu/drm/amd/amdgpu/sdma_v3_0.c @@ -218,6 +218,8 @@ static int sdma_v3_0_init_microcode(struct amdgpu_device *adev) hdr = (const struct sdma_firmware_header_v1_0 *)adev->sdma[i].fw->data; adev->sdma[i].fw_version = le32_to_cpu(hdr->header.ucode_version); adev->sdma[i].feature_version = le32_to_cpu(hdr->ucode_feature_version); + if (adev->sdma[i].feature_version >= 20) + adev->sdma[i].burst_nop = true; if (adev->firmware.smu_load) { info = &adev->firmware.ucode[AMDGPU_UCODE_ID_SDMA0 + i]; -- cgit From edff0e2826412be38f0c7977cbf89262141aad87 Mon Sep 17 00:00:00 2001 From: Jammy Zhou Date: Tue, 1 Sep 2015 13:04:08 +0800 Subject: drm/amdgpu: add insert_nop ring func and default implementation The insert_nop function is added to amdgpu_ring_funcs structure as well as the default implementation Signed-off-by: Jammy Zhou Reviewed-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu.h | 3 +++ drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c | 24 +++++++++++++++++++++--- drivers/gpu/drm/amd/amdgpu/cik_sdma.c | 1 + drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c | 2 ++ drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c | 2 ++ drivers/gpu/drm/amd/amdgpu/sdma_v2_4.c | 1 + drivers/gpu/drm/amd/amdgpu/sdma_v3_0.c | 1 + drivers/gpu/drm/amd/amdgpu/uvd_v4_2.c | 1 + drivers/gpu/drm/amd/amdgpu/uvd_v5_0.c | 1 + drivers/gpu/drm/amd/amdgpu/uvd_v6_0.c | 1 + drivers/gpu/drm/amd/amdgpu/vce_v2_0.c | 1 + drivers/gpu/drm/amd/amdgpu/vce_v3_0.c | 1 + 12 files changed, 36 insertions(+), 3 deletions(-) (limited to 'drivers/gpu/drm/amd/amdgpu/cik_sdma.c') diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h index 057e7ef5c0d0..668939a14206 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h @@ -343,6 +343,8 @@ struct amdgpu_ring_funcs { int (*test_ring)(struct amdgpu_ring *ring); int (*test_ib)(struct amdgpu_ring *ring); bool (*is_lockup)(struct amdgpu_ring *ring); + /* insert NOP packets */ + void (*insert_nop)(struct amdgpu_ring *ring, uint32_t count); }; /* @@ -1217,6 +1219,7 @@ int amdgpu_ib_ring_tests(struct amdgpu_device *adev); void amdgpu_ring_free_size(struct amdgpu_ring *ring); int amdgpu_ring_alloc(struct amdgpu_ring *ring, unsigned ndw); int amdgpu_ring_lock(struct amdgpu_ring *ring, unsigned ndw); +void amdgpu_ring_insert_nop(struct amdgpu_ring *ring, uint32_t count); void amdgpu_ring_commit(struct amdgpu_ring *ring); void amdgpu_ring_unlock_commit(struct amdgpu_ring *ring); void amdgpu_ring_undo(struct amdgpu_ring *ring); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c index 7d442c51063e..9bec91484c24 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c @@ -131,6 +131,21 @@ int amdgpu_ring_lock(struct amdgpu_ring *ring, unsigned ndw) return 0; } +/** amdgpu_ring_insert_nop - insert NOP packets + * + * @ring: amdgpu_ring structure holding ring information + * @count: the number of NOP packets to insert + * + * This is the generic insert_nop function for rings except SDMA + */ +void amdgpu_ring_insert_nop(struct amdgpu_ring *ring, uint32_t count) +{ + int i; + + for (i = 0; i < count; i++) + amdgpu_ring_write(ring, ring->nop); +} + /** * amdgpu_ring_commit - tell the GPU to execute the new * commands on the ring buffer @@ -143,10 +158,13 @@ int amdgpu_ring_lock(struct amdgpu_ring *ring, unsigned ndw) */ void amdgpu_ring_commit(struct amdgpu_ring *ring) { + uint32_t count; + /* We pad to match fetch size */ - while (ring->wptr & ring->align_mask) { - amdgpu_ring_write(ring, ring->nop); - } + count = ring->align_mask + 1 - (ring->wptr & ring->align_mask); + count %= ring->align_mask + 1; + ring->funcs->insert_nop(ring, count); + mb(); amdgpu_ring_set_wptr(ring); } diff --git a/drivers/gpu/drm/amd/amdgpu/cik_sdma.c b/drivers/gpu/drm/amd/amdgpu/cik_sdma.c index cc909c9cee63..f0661b269a63 100644 --- a/drivers/gpu/drm/amd/amdgpu/cik_sdma.c +++ b/drivers/gpu/drm/amd/amdgpu/cik_sdma.c @@ -1305,6 +1305,7 @@ static const struct amdgpu_ring_funcs cik_sdma_ring_funcs = { .test_ring = cik_sdma_ring_test_ring, .test_ib = cik_sdma_ring_test_ib, .is_lockup = cik_sdma_ring_is_lockup, + .insert_nop = amdgpu_ring_insert_nop, }; static void cik_sdma_set_ring_funcs(struct amdgpu_device *adev) diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c index fab7b236f37f..517a68f82ec3 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c @@ -5598,6 +5598,7 @@ static const struct amdgpu_ring_funcs gfx_v7_0_ring_funcs_gfx = { .test_ring = gfx_v7_0_ring_test_ring, .test_ib = gfx_v7_0_ring_test_ib, .is_lockup = gfx_v7_0_ring_is_lockup, + .insert_nop = amdgpu_ring_insert_nop, }; static const struct amdgpu_ring_funcs gfx_v7_0_ring_funcs_compute = { @@ -5614,6 +5615,7 @@ static const struct amdgpu_ring_funcs gfx_v7_0_ring_funcs_compute = { .test_ring = gfx_v7_0_ring_test_ring, .test_ib = gfx_v7_0_ring_test_ib, .is_lockup = gfx_v7_0_ring_is_lockup, + .insert_nop = amdgpu_ring_insert_nop, }; static void gfx_v7_0_set_ring_funcs(struct amdgpu_device *adev) diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c index 818edb37fa9c..956a35822f86 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c @@ -4378,6 +4378,7 @@ static const struct amdgpu_ring_funcs gfx_v8_0_ring_funcs_gfx = { .test_ring = gfx_v8_0_ring_test_ring, .test_ib = gfx_v8_0_ring_test_ib, .is_lockup = gfx_v8_0_ring_is_lockup, + .insert_nop = amdgpu_ring_insert_nop, }; static const struct amdgpu_ring_funcs gfx_v8_0_ring_funcs_compute = { @@ -4394,6 +4395,7 @@ static const struct amdgpu_ring_funcs gfx_v8_0_ring_funcs_compute = { .test_ring = gfx_v8_0_ring_test_ring, .test_ib = gfx_v8_0_ring_test_ib, .is_lockup = gfx_v8_0_ring_is_lockup, + .insert_nop = amdgpu_ring_insert_nop, }; static void gfx_v8_0_set_ring_funcs(struct amdgpu_device *adev) diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v2_4.c b/drivers/gpu/drm/amd/amdgpu/sdma_v2_4.c index 2457bf3e3a1e..1b913bce2599 100644 --- a/drivers/gpu/drm/amd/amdgpu/sdma_v2_4.c +++ b/drivers/gpu/drm/amd/amdgpu/sdma_v2_4.c @@ -1316,6 +1316,7 @@ static const struct amdgpu_ring_funcs sdma_v2_4_ring_funcs = { .test_ring = sdma_v2_4_ring_test_ring, .test_ib = sdma_v2_4_ring_test_ib, .is_lockup = sdma_v2_4_ring_is_lockup, + .insert_nop = amdgpu_ring_insert_nop, }; static void sdma_v2_4_set_ring_funcs(struct amdgpu_device *adev) diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v3_0.c b/drivers/gpu/drm/amd/amdgpu/sdma_v3_0.c index 70b2f03c5712..a7550a8f5d84 100644 --- a/drivers/gpu/drm/amd/amdgpu/sdma_v3_0.c +++ b/drivers/gpu/drm/amd/amdgpu/sdma_v3_0.c @@ -1440,6 +1440,7 @@ static const struct amdgpu_ring_funcs sdma_v3_0_ring_funcs = { .test_ring = sdma_v3_0_ring_test_ring, .test_ib = sdma_v3_0_ring_test_ib, .is_lockup = sdma_v3_0_ring_is_lockup, + .insert_nop = amdgpu_ring_insert_nop, }; static void sdma_v3_0_set_ring_funcs(struct amdgpu_device *adev) diff --git a/drivers/gpu/drm/amd/amdgpu/uvd_v4_2.c b/drivers/gpu/drm/amd/amdgpu/uvd_v4_2.c index 9ac383bc6c1f..5fac5da694f0 100644 --- a/drivers/gpu/drm/amd/amdgpu/uvd_v4_2.c +++ b/drivers/gpu/drm/amd/amdgpu/uvd_v4_2.c @@ -886,6 +886,7 @@ static const struct amdgpu_ring_funcs uvd_v4_2_ring_funcs = { .test_ring = uvd_v4_2_ring_test_ring, .test_ib = uvd_v4_2_ring_test_ib, .is_lockup = amdgpu_ring_test_lockup, + .insert_nop = amdgpu_ring_insert_nop, }; static void uvd_v4_2_set_ring_funcs(struct amdgpu_device *adev) diff --git a/drivers/gpu/drm/amd/amdgpu/uvd_v5_0.c b/drivers/gpu/drm/amd/amdgpu/uvd_v5_0.c index de4b3f57902d..2d5c59c318af 100644 --- a/drivers/gpu/drm/amd/amdgpu/uvd_v5_0.c +++ b/drivers/gpu/drm/amd/amdgpu/uvd_v5_0.c @@ -825,6 +825,7 @@ static const struct amdgpu_ring_funcs uvd_v5_0_ring_funcs = { .test_ring = uvd_v5_0_ring_test_ring, .test_ib = uvd_v5_0_ring_test_ib, .is_lockup = amdgpu_ring_test_lockup, + .insert_nop = amdgpu_ring_insert_nop, }; static void uvd_v5_0_set_ring_funcs(struct amdgpu_device *adev) diff --git a/drivers/gpu/drm/amd/amdgpu/uvd_v6_0.c b/drivers/gpu/drm/amd/amdgpu/uvd_v6_0.c index 66c975870e97..d9f553fce531 100644 --- a/drivers/gpu/drm/amd/amdgpu/uvd_v6_0.c +++ b/drivers/gpu/drm/amd/amdgpu/uvd_v6_0.c @@ -805,6 +805,7 @@ static const struct amdgpu_ring_funcs uvd_v6_0_ring_funcs = { .test_ring = uvd_v6_0_ring_test_ring, .test_ib = uvd_v6_0_ring_test_ib, .is_lockup = amdgpu_ring_test_lockup, + .insert_nop = amdgpu_ring_insert_nop, }; static void uvd_v6_0_set_ring_funcs(struct amdgpu_device *adev) diff --git a/drivers/gpu/drm/amd/amdgpu/vce_v2_0.c b/drivers/gpu/drm/amd/amdgpu/vce_v2_0.c index 303d961d57bd..cd16df543f64 100644 --- a/drivers/gpu/drm/amd/amdgpu/vce_v2_0.c +++ b/drivers/gpu/drm/amd/amdgpu/vce_v2_0.c @@ -643,6 +643,7 @@ static const struct amdgpu_ring_funcs vce_v2_0_ring_funcs = { .test_ring = amdgpu_vce_ring_test_ring, .test_ib = amdgpu_vce_ring_test_ib, .is_lockup = amdgpu_ring_test_lockup, + .insert_nop = amdgpu_ring_insert_nop, }; static void vce_v2_0_set_ring_funcs(struct amdgpu_device *adev) diff --git a/drivers/gpu/drm/amd/amdgpu/vce_v3_0.c b/drivers/gpu/drm/amd/amdgpu/vce_v3_0.c index 4349658081ff..5642b8eb92ad 100644 --- a/drivers/gpu/drm/amd/amdgpu/vce_v3_0.c +++ b/drivers/gpu/drm/amd/amdgpu/vce_v3_0.c @@ -608,6 +608,7 @@ static const struct amdgpu_ring_funcs vce_v3_0_ring_funcs = { .test_ring = amdgpu_vce_ring_test_ring, .test_ib = amdgpu_vce_ring_test_ib, .is_lockup = amdgpu_ring_test_lockup, + .insert_nop = amdgpu_ring_insert_nop, }; static void vce_v3_0_set_ring_funcs(struct amdgpu_device *adev) -- cgit From ac01db3dd5a0c7916e804a52752b780c5c18b98d Mon Sep 17 00:00:00 2001 From: Jammy Zhou Date: Tue, 1 Sep 2015 13:13:54 +0800 Subject: drm/amdgpu: implement burst NOP for SDMA Customize the insert_nop func for SDMA rings, and use burst NOP for ring/IB submissions in other places as well Signed-off-by: Jammy Zhou Reviewed-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/cik_sdma.c | 34 +++++++++++++++++++++++++++++----- drivers/gpu/drm/amd/amdgpu/sdma_v2_4.c | 34 +++++++++++++++++++++++++++++----- drivers/gpu/drm/amd/amdgpu/sdma_v3_0.c | 33 ++++++++++++++++++++++++++++----- 3 files changed, 86 insertions(+), 15 deletions(-) (limited to 'drivers/gpu/drm/amd/amdgpu/cik_sdma.c') diff --git a/drivers/gpu/drm/amd/amdgpu/cik_sdma.c b/drivers/gpu/drm/amd/amdgpu/cik_sdma.c index f0661b269a63..9ea9de457da3 100644 --- a/drivers/gpu/drm/amd/amdgpu/cik_sdma.c +++ b/drivers/gpu/drm/amd/amdgpu/cik_sdma.c @@ -188,6 +188,19 @@ static void cik_sdma_ring_set_wptr(struct amdgpu_ring *ring) WREG32(mmSDMA0_GFX_RB_WPTR + sdma_offsets[me], (ring->wptr << 2) & 0x3fffc); } +static void cik_sdma_ring_insert_nop(struct amdgpu_ring *ring, uint32_t count) +{ + struct amdgpu_sdma *sdma = amdgpu_get_sdma_instance(ring); + int i; + + for (i = 0; i < count; i++) + if (sdma && sdma->burst_nop && (i == 0)) + amdgpu_ring_write(ring, ring->nop | + SDMA_NOP_COUNT(count - 1)); + else + amdgpu_ring_write(ring, ring->nop); +} + /** * cik_sdma_ring_emit_ib - Schedule an IB on the DMA engine * @@ -213,8 +226,8 @@ static void cik_sdma_ring_emit_ib(struct amdgpu_ring *ring, amdgpu_ring_write(ring, next_rptr); /* IB packet must end on a 8 DW boundary */ - while ((ring->wptr & 7) != 4) - amdgpu_ring_write(ring, SDMA_PACKET(SDMA_OPCODE_NOP, 0, 0)); + cik_sdma_ring_insert_nop(ring, (12 - (ring->wptr & 7)) % 8); + amdgpu_ring_write(ring, SDMA_PACKET(SDMA_OPCODE_INDIRECT_BUFFER, 0, extra_bits)); amdgpu_ring_write(ring, ib->gpu_addr & 0xffffffe0); /* base must be 32 byte aligned */ amdgpu_ring_write(ring, upper_32_bits(ib->gpu_addr) & 0xffffffff); @@ -817,8 +830,19 @@ static void cik_sdma_vm_set_pte_pde(struct amdgpu_ib *ib, */ static void cik_sdma_vm_pad_ib(struct amdgpu_ib *ib) { - while (ib->length_dw & 0x7) - ib->ptr[ib->length_dw++] = SDMA_PACKET(SDMA_OPCODE_NOP, 0, 0); + struct amdgpu_sdma *sdma = amdgpu_get_sdma_instance(ib->ring); + u32 pad_count; + int i; + + pad_count = (8 - (ib->length_dw & 0x7)) % 8; + for (i = 0; i < pad_count; i++) + if (sdma && sdma->burst_nop && (i == 0)) + ib->ptr[ib->length_dw++] = + SDMA_PACKET(SDMA_OPCODE_NOP, 0, 0) | + SDMA_NOP_COUNT(pad_count - 1); + else + ib->ptr[ib->length_dw++] = + SDMA_PACKET(SDMA_OPCODE_NOP, 0, 0); } /** @@ -1305,7 +1329,7 @@ static const struct amdgpu_ring_funcs cik_sdma_ring_funcs = { .test_ring = cik_sdma_ring_test_ring, .test_ib = cik_sdma_ring_test_ib, .is_lockup = cik_sdma_ring_is_lockup, - .insert_nop = amdgpu_ring_insert_nop, + .insert_nop = cik_sdma_ring_insert_nop, }; static void cik_sdma_set_ring_funcs(struct amdgpu_device *adev) diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v2_4.c b/drivers/gpu/drm/amd/amdgpu/sdma_v2_4.c index 1b913bce2599..14e87234171a 100644 --- a/drivers/gpu/drm/amd/amdgpu/sdma_v2_4.c +++ b/drivers/gpu/drm/amd/amdgpu/sdma_v2_4.c @@ -220,6 +220,19 @@ static void sdma_v2_4_ring_set_wptr(struct amdgpu_ring *ring) WREG32(mmSDMA0_GFX_RB_WPTR + sdma_offsets[me], ring->wptr << 2); } +static void sdma_v2_4_ring_insert_nop(struct amdgpu_ring *ring, uint32_t count) +{ + struct amdgpu_sdma *sdma = amdgpu_get_sdma_instance(ring); + int i; + + for (i = 0; i < count; i++) + if (sdma && sdma->burst_nop && (i == 0)) + amdgpu_ring_write(ring, ring->nop | + SDMA_PKT_NOP_HEADER_COUNT(count - 1)); + else + amdgpu_ring_write(ring, ring->nop); +} + /** * sdma_v2_4_ring_emit_ib - Schedule an IB on the DMA engine * @@ -247,8 +260,8 @@ static void sdma_v2_4_ring_emit_ib(struct amdgpu_ring *ring, amdgpu_ring_write(ring, next_rptr); /* IB packet must end on a 8 DW boundary */ - while ((ring->wptr & 7) != 2) - amdgpu_ring_write(ring, SDMA_PKT_HEADER_OP(SDMA_OP_NOP)); + sdma_v2_4_ring_insert_nop(ring, (10 - (ring->wptr & 7)) % 8); + amdgpu_ring_write(ring, SDMA_PKT_HEADER_OP(SDMA_OP_INDIRECT) | SDMA_PKT_INDIRECT_HEADER_VMID(vmid)); /* base must be 32 byte aligned */ @@ -881,8 +894,19 @@ static void sdma_v2_4_vm_set_pte_pde(struct amdgpu_ib *ib, */ static void sdma_v2_4_vm_pad_ib(struct amdgpu_ib *ib) { - while (ib->length_dw & 0x7) - ib->ptr[ib->length_dw++] = SDMA_PKT_HEADER_OP(SDMA_OP_NOP); + struct amdgpu_sdma *sdma = amdgpu_get_sdma_instance(ib->ring); + u32 pad_count; + int i; + + pad_count = (8 - (ib->length_dw & 0x7)) % 8; + for (i = 0; i < pad_count; i++) + if (sdma && sdma->burst_nop && (i == 0)) + ib->ptr[ib->length_dw++] = + SDMA_PKT_HEADER_OP(SDMA_OP_NOP) | + SDMA_PKT_NOP_HEADER_COUNT(pad_count - 1); + else + ib->ptr[ib->length_dw++] = + SDMA_PKT_HEADER_OP(SDMA_OP_NOP); } /** @@ -1316,7 +1340,7 @@ static const struct amdgpu_ring_funcs sdma_v2_4_ring_funcs = { .test_ring = sdma_v2_4_ring_test_ring, .test_ib = sdma_v2_4_ring_test_ib, .is_lockup = sdma_v2_4_ring_is_lockup, - .insert_nop = amdgpu_ring_insert_nop, + .insert_nop = sdma_v2_4_ring_insert_nop, }; static void sdma_v2_4_set_ring_funcs(struct amdgpu_device *adev) diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v3_0.c b/drivers/gpu/drm/amd/amdgpu/sdma_v3_0.c index a7550a8f5d84..9bfe92df15f7 100644 --- a/drivers/gpu/drm/amd/amdgpu/sdma_v3_0.c +++ b/drivers/gpu/drm/amd/amdgpu/sdma_v3_0.c @@ -306,6 +306,19 @@ static void sdma_v3_0_ring_set_wptr(struct amdgpu_ring *ring) } } +static void sdma_v3_0_ring_insert_nop(struct amdgpu_ring *ring, uint32_t count) +{ + struct amdgpu_sdma *sdma = amdgpu_get_sdma_instance(ring); + int i; + + for (i = 0; i < count; i++) + if (sdma && sdma->burst_nop && (i == 0)) + amdgpu_ring_write(ring, ring->nop | + SDMA_PKT_NOP_HEADER_COUNT(count - 1)); + else + amdgpu_ring_write(ring, ring->nop); +} + /** * sdma_v3_0_ring_emit_ib - Schedule an IB on the DMA engine * @@ -332,8 +345,7 @@ static void sdma_v3_0_ring_emit_ib(struct amdgpu_ring *ring, amdgpu_ring_write(ring, next_rptr); /* IB packet must end on a 8 DW boundary */ - while ((ring->wptr & 7) != 2) - amdgpu_ring_write(ring, SDMA_PKT_HEADER_OP(SDMA_OP_NOP)); + sdma_v3_0_ring_insert_nop(ring, (10 - (ring->wptr & 7)) % 8); amdgpu_ring_write(ring, SDMA_PKT_HEADER_OP(SDMA_OP_INDIRECT) | SDMA_PKT_INDIRECT_HEADER_VMID(vmid)); @@ -1001,8 +1013,19 @@ static void sdma_v3_0_vm_set_pte_pde(struct amdgpu_ib *ib, */ static void sdma_v3_0_vm_pad_ib(struct amdgpu_ib *ib) { - while (ib->length_dw & 0x7) - ib->ptr[ib->length_dw++] = SDMA_PKT_HEADER_OP(SDMA_OP_NOP); + struct amdgpu_sdma *sdma = amdgpu_get_sdma_instance(ib->ring); + u32 pad_count; + int i; + + pad_count = (8 - (ib->length_dw & 0x7)) % 8; + for (i = 0; i < pad_count; i++) + if (sdma && sdma->burst_nop && (i == 0)) + ib->ptr[ib->length_dw++] = + SDMA_PKT_HEADER_OP(SDMA_OP_NOP) | + SDMA_PKT_NOP_HEADER_COUNT(pad_count - 1); + else + ib->ptr[ib->length_dw++] = + SDMA_PKT_HEADER_OP(SDMA_OP_NOP); } /** @@ -1440,7 +1463,7 @@ static const struct amdgpu_ring_funcs sdma_v3_0_ring_funcs = { .test_ring = sdma_v3_0_ring_test_ring, .test_ib = sdma_v3_0_ring_test_ib, .is_lockup = sdma_v3_0_ring_is_lockup, - .insert_nop = amdgpu_ring_insert_nop, + .insert_nop = sdma_v3_0_ring_insert_nop, }; static void sdma_v3_0_set_ring_funcs(struct amdgpu_device *adev) -- cgit