aboutsummaryrefslogtreecommitdiff
path: root/drivers/gpu/drm/amd/amdgpu/si_dma.c
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/gpu/drm/amd/amdgpu/si_dma.c')
-rw-r--r--drivers/gpu/drm/amd/amdgpu/si_dma.c72
1 files changed, 26 insertions, 46 deletions
diff --git a/drivers/gpu/drm/amd/amdgpu/si_dma.c b/drivers/gpu/drm/amd/amdgpu/si_dma.c
index 3fa2fbf8c9a1..b75d901ba3c4 100644
--- a/drivers/gpu/drm/amd/amdgpu/si_dma.c
+++ b/drivers/gpu/drm/amd/amdgpu/si_dma.c
@@ -24,6 +24,7 @@
#include <drm/drmP.h>
#include "amdgpu.h"
#include "amdgpu_trace.h"
+#include "si.h"
#include "sid.h"
const u32 sdma_offsets[SDMA_MAX_INSTANCE] =
@@ -61,33 +62,19 @@ static void si_dma_ring_set_wptr(struct amdgpu_ring *ring)
static void si_dma_ring_emit_ib(struct amdgpu_ring *ring,
struct amdgpu_ib *ib,
- unsigned vm_id, bool ctx_switch)
+ unsigned vmid, bool ctx_switch)
{
/* The indirect buffer packet must end on an 8 DW boundary in the DMA ring.
* Pad as necessary with NOPs.
*/
while ((lower_32_bits(ring->wptr) & 7) != 5)
amdgpu_ring_write(ring, DMA_PACKET(DMA_PACKET_NOP, 0, 0, 0, 0));
- amdgpu_ring_write(ring, DMA_IB_PACKET(DMA_PACKET_INDIRECT_BUFFER, vm_id, 0));
+ amdgpu_ring_write(ring, DMA_IB_PACKET(DMA_PACKET_INDIRECT_BUFFER, vmid, 0));
amdgpu_ring_write(ring, (ib->gpu_addr & 0xFFFFFFE0));
amdgpu_ring_write(ring, (ib->length_dw << 12) | (upper_32_bits(ib->gpu_addr) & 0xFF));
}
-static void si_dma_ring_emit_hdp_flush(struct amdgpu_ring *ring)
-{
- amdgpu_ring_write(ring, DMA_PACKET(DMA_PACKET_SRBM_WRITE, 0, 0, 0, 0));
- amdgpu_ring_write(ring, (0xf << 16) | (HDP_MEM_COHERENCY_FLUSH_CNTL));
- amdgpu_ring_write(ring, 1);
-}
-
-static void si_dma_ring_emit_hdp_invalidate(struct amdgpu_ring *ring)
-{
- amdgpu_ring_write(ring, DMA_PACKET(DMA_PACKET_SRBM_WRITE, 0, 0, 0, 0));
- amdgpu_ring_write(ring, (0xf << 16) | (HDP_DEBUG0));
- amdgpu_ring_write(ring, 1);
-}
-
/**
* si_dma_ring_emit_fence - emit a fence on the DMA ring
*
@@ -134,7 +121,7 @@ static void si_dma_stop(struct amdgpu_device *adev)
WREG32(DMA_RB_CNTL + sdma_offsets[i], rb_cntl);
if (adev->mman.buffer_funcs_ring == ring)
- amdgpu_ttm_set_active_vram_size(adev, adev->mc.visible_vram_size);
+ amdgpu_ttm_set_buffer_funcs_status(adev, false);
ring->ready = false;
}
}
@@ -197,7 +184,7 @@ static int si_dma_start(struct amdgpu_device *adev)
}
if (adev->mman.buffer_funcs_ring == ring)
- amdgpu_ttm_set_active_vram_size(adev, adev->mc.real_vram_size);
+ amdgpu_ttm_set_buffer_funcs_status(adev, true);
}
return 0;
@@ -221,7 +208,7 @@ static int si_dma_ring_test_ring(struct amdgpu_ring *ring)
u32 tmp;
u64 gpu_addr;
- r = amdgpu_wb_get(adev, &index);
+ r = amdgpu_device_wb_get(adev, &index);
if (r) {
dev_err(adev->dev, "(%d) failed to allocate wb slot\n", r);
return r;
@@ -234,7 +221,7 @@ static int si_dma_ring_test_ring(struct amdgpu_ring *ring)
r = amdgpu_ring_alloc(ring, 4);
if (r) {
DRM_ERROR("amdgpu: dma failed to lock ring %d (%d).\n", ring->idx, r);
- amdgpu_wb_free(adev, index);
+ amdgpu_device_wb_free(adev, index);
return r;
}
@@ -252,13 +239,13 @@ static int si_dma_ring_test_ring(struct amdgpu_ring *ring)
}
if (i < adev->usec_timeout) {
- DRM_INFO("ring test on %d succeeded in %d usecs\n", ring->idx, i);
+ DRM_DEBUG("ring test on %d succeeded in %d usecs\n", ring->idx, i);
} else {
DRM_ERROR("amdgpu: ring %d test failed (0x%08X)\n",
ring->idx, tmp);
r = -EINVAL;
}
- amdgpu_wb_free(adev, index);
+ amdgpu_device_wb_free(adev, index);
return r;
}
@@ -281,7 +268,7 @@ static int si_dma_ring_test_ib(struct amdgpu_ring *ring, long timeout)
u64 gpu_addr;
long r;
- r = amdgpu_wb_get(adev, &index);
+ r = amdgpu_device_wb_get(adev, &index);
if (r) {
dev_err(adev->dev, "(%ld) failed to allocate wb slot\n", r);
return r;
@@ -317,7 +304,7 @@ static int si_dma_ring_test_ib(struct amdgpu_ring *ring, long timeout)
}
tmp = le32_to_cpu(adev->wb.wb[index]);
if (tmp == 0xDEADBEEF) {
- DRM_INFO("ib test on ring %d succeeded\n", ring->idx);
+ DRM_DEBUG("ib test on ring %d succeeded\n", ring->idx);
r = 0;
} else {
DRM_ERROR("amdgpu: ib test failed (0x%08X)\n", tmp);
@@ -328,7 +315,7 @@ err1:
amdgpu_ib_free(adev, &ib, NULL);
dma_fence_put(f);
err0:
- amdgpu_wb_free(adev, index);
+ amdgpu_device_wb_free(adev, index);
return r;
}
@@ -473,29 +460,27 @@ static void si_dma_ring_emit_pipeline_sync(struct amdgpu_ring *ring)
* using sDMA (VI).
*/
static void si_dma_ring_emit_vm_flush(struct amdgpu_ring *ring,
- unsigned vm_id, uint64_t pd_addr)
+ unsigned vmid, uint64_t pd_addr)
{
- amdgpu_ring_write(ring, DMA_PACKET(DMA_PACKET_SRBM_WRITE, 0, 0, 0, 0));
- if (vm_id < 8)
- amdgpu_ring_write(ring, (0xf << 16) | (VM_CONTEXT0_PAGE_TABLE_BASE_ADDR + vm_id));
- else
- amdgpu_ring_write(ring, (0xf << 16) | (VM_CONTEXT8_PAGE_TABLE_BASE_ADDR + (vm_id - 8)));
- amdgpu_ring_write(ring, pd_addr >> 12);
-
- /* bits 0-7 are the VM contexts0-7 */
- amdgpu_ring_write(ring, DMA_PACKET(DMA_PACKET_SRBM_WRITE, 0, 0, 0, 0));
- amdgpu_ring_write(ring, (0xf << 16) | (VM_INVALIDATE_REQUEST));
- amdgpu_ring_write(ring, 1 << vm_id);
+ amdgpu_gmc_emit_flush_gpu_tlb(ring, vmid, pd_addr);
/* wait for invalidate to complete */
amdgpu_ring_write(ring, DMA_PACKET(DMA_PACKET_POLL_REG_MEM, 0, 0, 0, 0));
amdgpu_ring_write(ring, VM_INVALIDATE_REQUEST);
amdgpu_ring_write(ring, 0xff << 16); /* retry */
- amdgpu_ring_write(ring, 1 << vm_id); /* mask */
+ amdgpu_ring_write(ring, 1 << vmid); /* mask */
amdgpu_ring_write(ring, 0); /* value */
amdgpu_ring_write(ring, (0 << 28) | 0x20); /* func(always) | poll interval */
}
+static void si_dma_ring_emit_wreg(struct amdgpu_ring *ring,
+ uint32_t reg, uint32_t val)
+{
+ amdgpu_ring_write(ring, DMA_PACKET(DMA_PACKET_SRBM_WRITE, 0, 0, 0, 0));
+ amdgpu_ring_write(ring, (0xf << 16) | reg);
+ amdgpu_ring_write(ring, val);
+}
+
static int si_dma_early_init(void *handle)
{
struct amdgpu_device *adev = (struct amdgpu_device *)handle;
@@ -772,22 +757,20 @@ static const struct amdgpu_ring_funcs si_dma_ring_funcs = {
.get_wptr = si_dma_ring_get_wptr,
.set_wptr = si_dma_ring_set_wptr,
.emit_frame_size =
- 3 + /* si_dma_ring_emit_hdp_flush */
- 3 + /* si_dma_ring_emit_hdp_invalidate */
+ 3 + 3 + /* hdp flush / invalidate */
6 + /* si_dma_ring_emit_pipeline_sync */
- 12 + /* si_dma_ring_emit_vm_flush */
+ SI_FLUSH_GPU_TLB_NUM_WREG * 3 + 6 + /* si_dma_ring_emit_vm_flush */
9 + 9 + 9, /* si_dma_ring_emit_fence x3 for user fence, vm fence */
.emit_ib_size = 7 + 3, /* si_dma_ring_emit_ib */
.emit_ib = si_dma_ring_emit_ib,
.emit_fence = si_dma_ring_emit_fence,
.emit_pipeline_sync = si_dma_ring_emit_pipeline_sync,
.emit_vm_flush = si_dma_ring_emit_vm_flush,
- .emit_hdp_flush = si_dma_ring_emit_hdp_flush,
- .emit_hdp_invalidate = si_dma_ring_emit_hdp_invalidate,
.test_ring = si_dma_ring_test_ring,
.test_ib = si_dma_ring_test_ib,
.insert_nop = amdgpu_ring_insert_nop,
.pad_ib = si_dma_ring_pad_ib,
+ .emit_wreg = si_dma_ring_emit_wreg,
};
static void si_dma_set_ring_funcs(struct amdgpu_device *adev)
@@ -891,9 +874,6 @@ static const struct amdgpu_vm_pte_funcs si_dma_vm_pte_funcs = {
.copy_pte = si_dma_vm_copy_pte,
.write_pte = si_dma_vm_write_pte,
-
- .set_max_nums_pte_pde = 0xffff8 >> 3,
- .set_pte_pde_num_dw = 9,
.set_pte_pde = si_dma_vm_set_pte_pde,
};