diff options
author | Dave Airlie <airlied@redhat.com> | 2015-09-04 13:06:29 +1000 |
---|---|---|
committer | Dave Airlie <airlied@redhat.com> | 2015-09-04 13:06:29 +1000 |
commit | 99495589aa4de7166af254bc497cdbe133fc24bb (patch) | |
tree | d525e957854064f2492976e9beb8a04dddc28143 /drivers/gpu/drm/amd/amdgpu/cik_sdma.c | |
parent | 879a37d00f1882b1e56a66e626af4194d592d257 (diff) | |
parent | bddf8026386927985ef6d0d11c3ba78f70b76bad (diff) |
Merge branch 'drm-next-4.3' of git://people.freedesktop.org/~agd5f/linux into drm-next
More fixes for radeon and amdgpu for 4.3:
- Send full DP aux address fixes for radeon and amdgpu
- Fix an HDMI display regression for pre-DCE5 parts
- UVD suspend fixes for amdgpu
- Add an rs480 suspend quirk
- Fix bo reserve handling in amdgpu GEM_OP ioctl
- GPU scheduler fixes
- SDMA optimizations
- MEC fix for Fiji
* 'drm-next-4.3' of git://people.freedesktop.org/~agd5f/linux: (21 commits)
drm/amdgpu: set MEC doorbell range for Fiji
drm/amdgpu: implement burst NOP for SDMA
drm/amdgpu: add insert_nop ring func and default implementation
drm/amdgpu: add amdgpu_get_sdma_instance helper function
drm/amdgpu: add AMDGPU_MAX_SDMA_INSTANCES
drm/amdgpu: add burst_nop flag for sdma
drm/amdgpu: add count field for the SDMA NOP packet v2
drm/amdgpu: use PT for VM sync on unmap
drm/amdgpu: make wait_event uninterruptible in push_job
drm/amdgpu: fix amdgpu_bo_unreserve order in GEM_OP IOCTL v2
drm/amdgpu: partially revert "modify amdgpu_fence_wait_any() to amdgpu_fence_wait_multiple()" v2
Add radeon suspend/resume quirk for HP Compaq dc5750.
drm/amdgpu: re-work sync_resv
drm/amdgpu/atom: Send out the full AUX address
drm/radeon/native: Send out the full AUX address
drm/radeon/atom: Send out the full AUX address
drm/amdgpu: use IB for fill_buffer instead of direct command
drm/amdgpu: stop trying to suspend UVD sessions v2
drm/amdgpu: add scheduler dependency callback v2
drm/amdgpu: let the scheduler work more with jobs v2
...
Diffstat (limited to 'drivers/gpu/drm/amd/amdgpu/cik_sdma.c')
-rw-r--r-- | drivers/gpu/drm/amd/amdgpu/cik_sdma.c | 47 |
1 files changed, 37 insertions, 10 deletions
diff --git a/drivers/gpu/drm/amd/amdgpu/cik_sdma.c b/drivers/gpu/drm/amd/amdgpu/cik_sdma.c index 3920c1e346f8..9ea9de457da3 100644 --- a/drivers/gpu/drm/amd/amdgpu/cik_sdma.c +++ b/drivers/gpu/drm/amd/amdgpu/cik_sdma.c @@ -188,6 +188,19 @@ static void cik_sdma_ring_set_wptr(struct amdgpu_ring *ring) WREG32(mmSDMA0_GFX_RB_WPTR + sdma_offsets[me], (ring->wptr << 2) & 0x3fffc); } +static void cik_sdma_ring_insert_nop(struct amdgpu_ring *ring, uint32_t count) +{ + struct amdgpu_sdma *sdma = amdgpu_get_sdma_instance(ring); + int i; + + for (i = 0; i < count; i++) + if (sdma && sdma->burst_nop && (i == 0)) + amdgpu_ring_write(ring, ring->nop | + SDMA_NOP_COUNT(count - 1)); + else + amdgpu_ring_write(ring, ring->nop); +} + /** * cik_sdma_ring_emit_ib - Schedule an IB on the DMA engine * @@ -213,8 +226,8 @@ static void cik_sdma_ring_emit_ib(struct amdgpu_ring *ring, amdgpu_ring_write(ring, next_rptr); /* IB packet must end on a 8 DW boundary */ - while ((ring->wptr & 7) != 4) - amdgpu_ring_write(ring, SDMA_PACKET(SDMA_OPCODE_NOP, 0, 0)); + cik_sdma_ring_insert_nop(ring, (12 - (ring->wptr & 7)) % 8); + amdgpu_ring_write(ring, SDMA_PACKET(SDMA_OPCODE_INDIRECT_BUFFER, 0, extra_bits)); amdgpu_ring_write(ring, ib->gpu_addr & 0xffffffe0); /* base must be 32 byte aligned */ amdgpu_ring_write(ring, upper_32_bits(ib->gpu_addr) & 0xffffffff); @@ -501,6 +514,8 @@ static int cik_sdma_load_microcode(struct amdgpu_device *adev) fw_size = le32_to_cpu(hdr->header.ucode_size_bytes) / 4; adev->sdma[i].fw_version = le32_to_cpu(hdr->header.ucode_version); adev->sdma[i].feature_version = le32_to_cpu(hdr->ucode_feature_version); + if (adev->sdma[i].feature_version >= 20) + adev->sdma[i].burst_nop = true; fw_data = (const __le32 *) (adev->sdma[i].fw->data + le32_to_cpu(hdr->header.ucode_array_offset_bytes)); WREG32(mmSDMA0_UCODE_ADDR + sdma_offsets[i], 0); @@ -815,8 +830,19 @@ static void cik_sdma_vm_set_pte_pde(struct amdgpu_ib *ib, */ static void cik_sdma_vm_pad_ib(struct amdgpu_ib *ib) { - while (ib->length_dw & 0x7) - ib->ptr[ib->length_dw++] = SDMA_PACKET(SDMA_OPCODE_NOP, 0, 0); + struct amdgpu_sdma *sdma = amdgpu_get_sdma_instance(ib->ring); + u32 pad_count; + int i; + + pad_count = (8 - (ib->length_dw & 0x7)) % 8; + for (i = 0; i < pad_count; i++) + if (sdma && sdma->burst_nop && (i == 0)) + ib->ptr[ib->length_dw++] = + SDMA_PACKET(SDMA_OPCODE_NOP, 0, 0) | + SDMA_NOP_COUNT(pad_count - 1); + else + ib->ptr[ib->length_dw++] = + SDMA_PACKET(SDMA_OPCODE_NOP, 0, 0); } /** @@ -1303,6 +1329,7 @@ static const struct amdgpu_ring_funcs cik_sdma_ring_funcs = { .test_ring = cik_sdma_ring_test_ring, .test_ib = cik_sdma_ring_test_ib, .is_lockup = cik_sdma_ring_is_lockup, + .insert_nop = cik_sdma_ring_insert_nop, }; static void cik_sdma_set_ring_funcs(struct amdgpu_device *adev) @@ -1363,16 +1390,16 @@ static void cik_sdma_emit_copy_buffer(struct amdgpu_ib *ib, * * Fill GPU buffers using the DMA engine (CIK). */ -static void cik_sdma_emit_fill_buffer(struct amdgpu_ring *ring, +static void cik_sdma_emit_fill_buffer(struct amdgpu_ib *ib, uint32_t src_data, uint64_t dst_offset, uint32_t byte_count) { - amdgpu_ring_write(ring, SDMA_PACKET(SDMA_OPCODE_CONSTANT_FILL, 0, 0)); - amdgpu_ring_write(ring, lower_32_bits(dst_offset)); - amdgpu_ring_write(ring, upper_32_bits(dst_offset)); - amdgpu_ring_write(ring, src_data); - amdgpu_ring_write(ring, byte_count); + ib->ptr[ib->length_dw++] = SDMA_PACKET(SDMA_OPCODE_CONSTANT_FILL, 0, 0); + ib->ptr[ib->length_dw++] = lower_32_bits(dst_offset); + ib->ptr[ib->length_dw++] = upper_32_bits(dst_offset); + ib->ptr[ib->length_dw++] = src_data; + ib->ptr[ib->length_dw++] = byte_count; } static const struct amdgpu_buffer_funcs cik_sdma_buffer_funcs = { |