aboutsummaryrefslogtreecommitdiff
path: root/drivers/gpu
diff options
context:
space:
mode:
authorSunil Khatri <sunil.khatri@amd.com>2024-07-30 00:24:44 +0530
committerAlex Deucher <alexander.deucher@amd.com>2024-08-06 10:43:01 -0400
commite89d2fec4cde967445e16e02e406481bac380cc4 (patch)
tree0492e67f1d3b10750f758d022fa8a6676129d1c3 /drivers/gpu
parent4a4c815b08dc774dde67fb90a0286925f98204af (diff)
drm/amdgpu: optimize the padding for gfx10
Adding NOP packets one by one in the ring does not use the CP efficiently. Solution: Use CP optimization while adding NOP packet's so PFP can discard NOP packets based on information of count from the Header instead of fetching all NOP packets one by one. Cc: Christian König <christian.koenig@amd.com> Cc: Pierre-Eric Pelloux-Prayer <pierre-eric.pelloux-prayer@amd.com> Cc: Tvrtko Ursulin <tursulin@igalia.com> Cc: Marek Olšák <marek.olsak@amd.com> Reviewed-by: Christian König <christian.koenig@amd.com> Signed-off-by: Sunil Khatri <sunil.khatri@amd.com> Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
Diffstat (limited to 'drivers/gpu')
-rw-r--r--drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c22
1 files changed, 20 insertions, 2 deletions
diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c
index 853084a2ce7f..1b88528b512b 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c
@@ -9397,6 +9397,24 @@ static void gfx_v10_0_emit_mem_sync(struct amdgpu_ring *ring)
amdgpu_ring_write(ring, gcr_cntl); /* GCR_CNTL */
}
+static void gfx_v10_ring_insert_nop(struct amdgpu_ring *ring, uint32_t num_nop)
+{
+ int i;
+
+ /* Header itself is a NOP packet */
+ if (num_nop == 1) {
+ amdgpu_ring_write(ring, ring->funcs->nop);
+ return;
+ }
+
+ /* Max HW optimization till 0x3ffe, followed by remaining one NOP at a time*/
+ amdgpu_ring_write(ring, PACKET3(PACKET3_NOP, min(num_nop - 2, 0x3ffe)));
+
+ /* Header is at index 0, followed by num_nops - 1 NOP packet's */
+ for (i = 1; i < num_nop; i++)
+ amdgpu_ring_write(ring, ring->funcs->nop);
+}
+
static void gfx_v10_ip_print(void *handle, struct drm_printer *p)
{
struct amdgpu_device *adev = (struct amdgpu_device *)handle;
@@ -9588,7 +9606,7 @@ static const struct amdgpu_ring_funcs gfx_v10_0_ring_funcs_gfx = {
.emit_hdp_flush = gfx_v10_0_ring_emit_hdp_flush,
.test_ring = gfx_v10_0_ring_test_ring,
.test_ib = gfx_v10_0_ring_test_ib,
- .insert_nop = amdgpu_ring_insert_nop,
+ .insert_nop = gfx_v10_ring_insert_nop,
.pad_ib = amdgpu_ring_generic_pad_ib,
.emit_switch_buffer = gfx_v10_0_ring_emit_sb,
.emit_cntxcntl = gfx_v10_0_ring_emit_cntxcntl,
@@ -9629,7 +9647,7 @@ static const struct amdgpu_ring_funcs gfx_v10_0_ring_funcs_compute = {
.emit_hdp_flush = gfx_v10_0_ring_emit_hdp_flush,
.test_ring = gfx_v10_0_ring_test_ring,
.test_ib = gfx_v10_0_ring_test_ib,
- .insert_nop = amdgpu_ring_insert_nop,
+ .insert_nop = gfx_v10_ring_insert_nop,
.pad_ib = amdgpu_ring_generic_pad_ib,
.emit_wreg = gfx_v10_0_ring_emit_wreg,
.emit_reg_wait = gfx_v10_0_ring_emit_reg_wait,