aboutsummaryrefslogtreecommitdiff
path: root/drivers/gpu/drm/amd/amdgpu/sdma_v3_0.c
diff options
context:
space:
mode:
authorDave Airlie <airlied@redhat.com>2015-09-04 13:06:29 +1000
committerDave Airlie <airlied@redhat.com>2015-09-04 13:06:29 +1000
commit99495589aa4de7166af254bc497cdbe133fc24bb (patch)
treed525e957854064f2492976e9beb8a04dddc28143 /drivers/gpu/drm/amd/amdgpu/sdma_v3_0.c
parent879a37d00f1882b1e56a66e626af4194d592d257 (diff)
parentbddf8026386927985ef6d0d11c3ba78f70b76bad (diff)
Merge branch 'drm-next-4.3' of git://people.freedesktop.org/~agd5f/linux into drm-next
More fixes for radeon and amdgpu for 4.3: - Send full DP aux address fixes for radeon and amdgpu - Fix an HDMI display regression for pre-DCE5 parts - UVD suspend fixes for amdgpu - Add an rs480 suspend quirk - Fix bo reserve handling in amdgpu GEM_OP ioctl - GPU scheduler fixes - SDMA optimizations - MEC fix for Fiji * 'drm-next-4.3' of git://people.freedesktop.org/~agd5f/linux: (21 commits) drm/amdgpu: set MEC doorbell range for Fiji drm/amdgpu: implement burst NOP for SDMA drm/amdgpu: add insert_nop ring func and default implementation drm/amdgpu: add amdgpu_get_sdma_instance helper function drm/amdgpu: add AMDGPU_MAX_SDMA_INSTANCES drm/amdgpu: add burst_nop flag for sdma drm/amdgpu: add count field for the SDMA NOP packet v2 drm/amdgpu: use PT for VM sync on unmap drm/amdgpu: make wait_event uninterruptible in push_job drm/amdgpu: fix amdgpu_bo_unreserve order in GEM_OP IOCTL v2 drm/amdgpu: partially revert "modify amdgpu_fence_wait_any() to amdgpu_fence_wait_multiple()" v2 Add radeon suspend/resume quirk for HP Compaq dc5750. drm/amdgpu: re-work sync_resv drm/amdgpu/atom: Send out the full AUX address drm/radeon/native: Send out the full AUX address drm/radeon/atom: Send out the full AUX address drm/amdgpu: use IB for fill_buffer instead of direct command drm/amdgpu: stop trying to suspend UVD sessions v2 drm/amdgpu: add scheduler dependency callback v2 drm/amdgpu: let the scheduler work more with jobs v2 ...
Diffstat (limited to 'drivers/gpu/drm/amd/amdgpu/sdma_v3_0.c')
-rw-r--r--drivers/gpu/drm/amd/amdgpu/sdma_v3_0.c46
1 files changed, 36 insertions, 10 deletions
diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v3_0.c b/drivers/gpu/drm/amd/amdgpu/sdma_v3_0.c
index 67128c8e78b8..9bfe92df15f7 100644
--- a/drivers/gpu/drm/amd/amdgpu/sdma_v3_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/sdma_v3_0.c
@@ -218,6 +218,8 @@ static int sdma_v3_0_init_microcode(struct amdgpu_device *adev)
hdr = (const struct sdma_firmware_header_v1_0 *)adev->sdma[i].fw->data;
adev->sdma[i].fw_version = le32_to_cpu(hdr->header.ucode_version);
adev->sdma[i].feature_version = le32_to_cpu(hdr->ucode_feature_version);
+ if (adev->sdma[i].feature_version >= 20)
+ adev->sdma[i].burst_nop = true;
if (adev->firmware.smu_load) {
info = &adev->firmware.ucode[AMDGPU_UCODE_ID_SDMA0 + i];
@@ -304,6 +306,19 @@ static void sdma_v3_0_ring_set_wptr(struct amdgpu_ring *ring)
}
}
+static void sdma_v3_0_ring_insert_nop(struct amdgpu_ring *ring, uint32_t count)
+{
+ struct amdgpu_sdma *sdma = amdgpu_get_sdma_instance(ring);
+ int i;
+
+ for (i = 0; i < count; i++)
+ if (sdma && sdma->burst_nop && (i == 0))
+ amdgpu_ring_write(ring, ring->nop |
+ SDMA_PKT_NOP_HEADER_COUNT(count - 1));
+ else
+ amdgpu_ring_write(ring, ring->nop);
+}
+
/**
* sdma_v3_0_ring_emit_ib - Schedule an IB on the DMA engine
*
@@ -330,8 +345,7 @@ static void sdma_v3_0_ring_emit_ib(struct amdgpu_ring *ring,
amdgpu_ring_write(ring, next_rptr);
/* IB packet must end on a 8 DW boundary */
- while ((ring->wptr & 7) != 2)
- amdgpu_ring_write(ring, SDMA_PKT_HEADER_OP(SDMA_OP_NOP));
+ sdma_v3_0_ring_insert_nop(ring, (10 - (ring->wptr & 7)) % 8);
amdgpu_ring_write(ring, SDMA_PKT_HEADER_OP(SDMA_OP_INDIRECT) |
SDMA_PKT_INDIRECT_HEADER_VMID(vmid));
@@ -999,8 +1013,19 @@ static void sdma_v3_0_vm_set_pte_pde(struct amdgpu_ib *ib,
*/
static void sdma_v3_0_vm_pad_ib(struct amdgpu_ib *ib)
{
- while (ib->length_dw & 0x7)
- ib->ptr[ib->length_dw++] = SDMA_PKT_HEADER_OP(SDMA_OP_NOP);
+ struct amdgpu_sdma *sdma = amdgpu_get_sdma_instance(ib->ring);
+ u32 pad_count;
+ int i;
+
+ pad_count = (8 - (ib->length_dw & 0x7)) % 8;
+ for (i = 0; i < pad_count; i++)
+ if (sdma && sdma->burst_nop && (i == 0))
+ ib->ptr[ib->length_dw++] =
+ SDMA_PKT_HEADER_OP(SDMA_OP_NOP) |
+ SDMA_PKT_NOP_HEADER_COUNT(pad_count - 1);
+ else
+ ib->ptr[ib->length_dw++] =
+ SDMA_PKT_HEADER_OP(SDMA_OP_NOP);
}
/**
@@ -1438,6 +1463,7 @@ static const struct amdgpu_ring_funcs sdma_v3_0_ring_funcs = {
.test_ring = sdma_v3_0_ring_test_ring,
.test_ib = sdma_v3_0_ring_test_ib,
.is_lockup = sdma_v3_0_ring_is_lockup,
+ .insert_nop = sdma_v3_0_ring_insert_nop,
};
static void sdma_v3_0_set_ring_funcs(struct amdgpu_device *adev)
@@ -1499,16 +1525,16 @@ static void sdma_v3_0_emit_copy_buffer(struct amdgpu_ib *ib,
*
* Fill GPU buffers using the DMA engine (VI).
*/
-static void sdma_v3_0_emit_fill_buffer(struct amdgpu_ring *ring,
+static void sdma_v3_0_emit_fill_buffer(struct amdgpu_ib *ib,
uint32_t src_data,
uint64_t dst_offset,
uint32_t byte_count)
{
- amdgpu_ring_write(ring, SDMA_PKT_HEADER_OP(SDMA_OP_CONST_FILL));
- amdgpu_ring_write(ring, lower_32_bits(dst_offset));
- amdgpu_ring_write(ring, upper_32_bits(dst_offset));
- amdgpu_ring_write(ring, src_data);
- amdgpu_ring_write(ring, byte_count);
+ ib->ptr[ib->length_dw++] = SDMA_PKT_HEADER_OP(SDMA_OP_CONST_FILL);
+ ib->ptr[ib->length_dw++] = lower_32_bits(dst_offset);
+ ib->ptr[ib->length_dw++] = upper_32_bits(dst_offset);
+ ib->ptr[ib->length_dw++] = src_data;
+ ib->ptr[ib->length_dw++] = byte_count;
}
static const struct amdgpu_buffer_funcs sdma_v3_0_buffer_funcs = {