diff options
author | Sean Paul <seanpaul@chromium.org> | 2018-03-21 09:40:55 -0400 |
---|---|---|
committer | Sean Paul <seanpaul@chromium.org> | 2018-03-21 09:40:55 -0400 |
commit | 1c7095d2836baafd84e596dd34ba1a1293a4faa9 (patch) | |
tree | 498f529809b9c0a3c75c8b8bb1098ed4f71233db /drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c | |
parent | 2793c1d77aa8876e5674e901d051c79570e99db2 (diff) | |
parent | 78230c46ec0a91dd4256c9e54934b3c7095a7ee3 (diff) |
Merge airlied/drm-next into drm-misc-next
Refresh -misc-next
Signed-off-by: Sean Paul <seanpaul@chromium.org>
Diffstat (limited to 'drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c')
-rw-r--r-- | drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c | 100 |
1 files changed, 37 insertions, 63 deletions
diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c b/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c index e92fb372bc99..9448c45d1b60 100644 --- a/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c +++ b/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c @@ -31,8 +31,6 @@ #include "sdma0/sdma0_4_0_sh_mask.h" #include "sdma1/sdma1_4_0_offset.h" #include "sdma1/sdma1_4_0_sh_mask.h" -#include "mmhub/mmhub_1_0_offset.h" -#include "mmhub/mmhub_1_0_sh_mask.h" #include "hdp/hdp_4_0_offset.h" #include "sdma0/sdma0_4_1_default.h" @@ -238,31 +236,27 @@ static uint64_t sdma_v4_0_ring_get_rptr(struct amdgpu_ring *ring) static uint64_t sdma_v4_0_ring_get_wptr(struct amdgpu_ring *ring) { struct amdgpu_device *adev = ring->adev; - u64 *wptr = NULL; - uint64_t local_wptr = 0; + u64 wptr; if (ring->use_doorbell) { /* XXX check if swapping is necessary on BE */ - wptr = ((u64 *)&adev->wb.wb[ring->wptr_offs]); - DRM_DEBUG("wptr/doorbell before shift == 0x%016llx\n", *wptr); - *wptr = (*wptr) >> 2; - DRM_DEBUG("wptr/doorbell after shift == 0x%016llx\n", *wptr); + wptr = READ_ONCE(*((u64 *)&adev->wb.wb[ring->wptr_offs])); + DRM_DEBUG("wptr/doorbell before shift == 0x%016llx\n", wptr); } else { u32 lowbit, highbit; int me = (ring == &adev->sdma.instance[0].ring) ? 0 : 1; - wptr = &local_wptr; lowbit = RREG32(sdma_v4_0_get_reg_offset(adev, me, mmSDMA0_GFX_RB_WPTR)) >> 2; highbit = RREG32(sdma_v4_0_get_reg_offset(adev, me, mmSDMA0_GFX_RB_WPTR_HI)) >> 2; DRM_DEBUG("wptr [%i]high== 0x%08x low==0x%08x\n", me, highbit, lowbit); - *wptr = highbit; - *wptr = (*wptr) << 32; - *wptr |= lowbit; + wptr = highbit; + wptr = wptr << 32; + wptr |= lowbit; } - return *wptr; + return wptr >> 2; } /** @@ -375,16 +369,6 @@ static void sdma_v4_0_ring_emit_hdp_flush(struct amdgpu_ring *ring) SDMA_PKT_POLL_REGMEM_DW5_INTERVAL(10)); /* retry count, poll interval */ } -static void sdma_v4_0_ring_emit_hdp_invalidate(struct amdgpu_ring *ring) -{ - struct amdgpu_device *adev = ring->adev; - - amdgpu_ring_write(ring, SDMA_PKT_HEADER_OP(SDMA_OP_SRBM_WRITE) | - SDMA_PKT_SRBM_WRITE_HEADER_BYTE_EN(0xf)); - amdgpu_ring_write(ring, SOC15_REG_OFFSET(HDP, 0, mmHDP_READ_CACHE_INVALIDATE)); - amdgpu_ring_write(ring, 1); -} - /** * sdma_v4_0_ring_emit_fence - emit a fence on the DMA ring * @@ -440,7 +424,7 @@ static void sdma_v4_0_gfx_stop(struct amdgpu_device *adev) if ((adev->mman.buffer_funcs_ring == sdma0) || (adev->mman.buffer_funcs_ring == sdma1)) - amdgpu_ttm_set_active_vram_size(adev, adev->mc.visible_vram_size); + amdgpu_ttm_set_buffer_funcs_status(adev, false); for (i = 0; i < adev->sdma.num_instances; i++) { rb_cntl = RREG32(sdma_v4_0_get_reg_offset(adev, i, mmSDMA0_GFX_RB_CNTL)); @@ -682,7 +666,7 @@ static int sdma_v4_0_gfx_resume(struct amdgpu_device *adev) } if (adev->mman.buffer_funcs_ring == ring) - amdgpu_ttm_set_active_vram_size(adev, adev->mc.real_vram_size); + amdgpu_ttm_set_buffer_funcs_status(adev, true); } @@ -1135,38 +1119,28 @@ static void sdma_v4_0_ring_emit_pipeline_sync(struct amdgpu_ring *ring) static void sdma_v4_0_ring_emit_vm_flush(struct amdgpu_ring *ring, unsigned vmid, uint64_t pd_addr) { - struct amdgpu_vmhub *hub = &ring->adev->vmhub[ring->funcs->vmhub]; - uint32_t req = ring->adev->gart.gart_funcs->get_invalidate_req(vmid); - uint64_t flags = AMDGPU_PTE_VALID; - unsigned eng = ring->vm_inv_eng; - - amdgpu_gart_get_vm_pde(ring->adev, -1, &pd_addr, &flags); - pd_addr |= flags; - - amdgpu_ring_write(ring, SDMA_PKT_HEADER_OP(SDMA_OP_SRBM_WRITE) | - SDMA_PKT_SRBM_WRITE_HEADER_BYTE_EN(0xf)); - amdgpu_ring_write(ring, hub->ctx0_ptb_addr_lo32 + vmid * 2); - amdgpu_ring_write(ring, lower_32_bits(pd_addr)); - - amdgpu_ring_write(ring, SDMA_PKT_HEADER_OP(SDMA_OP_SRBM_WRITE) | - SDMA_PKT_SRBM_WRITE_HEADER_BYTE_EN(0xf)); - amdgpu_ring_write(ring, hub->ctx0_ptb_addr_hi32 + vmid * 2); - amdgpu_ring_write(ring, upper_32_bits(pd_addr)); + amdgpu_gmc_emit_flush_gpu_tlb(ring, vmid, pd_addr); +} - /* flush TLB */ +static void sdma_v4_0_ring_emit_wreg(struct amdgpu_ring *ring, + uint32_t reg, uint32_t val) +{ amdgpu_ring_write(ring, SDMA_PKT_HEADER_OP(SDMA_OP_SRBM_WRITE) | SDMA_PKT_SRBM_WRITE_HEADER_BYTE_EN(0xf)); - amdgpu_ring_write(ring, hub->vm_inv_eng0_req + eng); - amdgpu_ring_write(ring, req); + amdgpu_ring_write(ring, reg); + amdgpu_ring_write(ring, val); +} - /* wait for flush */ +static void sdma_v4_0_ring_emit_reg_wait(struct amdgpu_ring *ring, uint32_t reg, + uint32_t val, uint32_t mask) +{ amdgpu_ring_write(ring, SDMA_PKT_HEADER_OP(SDMA_OP_POLL_REGMEM) | SDMA_PKT_POLL_REGMEM_HEADER_HDP_FLUSH(0) | SDMA_PKT_POLL_REGMEM_HEADER_FUNC(3)); /* equal */ - amdgpu_ring_write(ring, (hub->vm_inv_eng0_ack + eng) << 2); + amdgpu_ring_write(ring, reg << 2); amdgpu_ring_write(ring, 0); - amdgpu_ring_write(ring, 1 << vmid); /* reference */ - amdgpu_ring_write(ring, 1 << vmid); /* mask */ + amdgpu_ring_write(ring, val); /* reference */ + amdgpu_ring_write(ring, mask); /* mask */ amdgpu_ring_write(ring, SDMA_PKT_POLL_REGMEM_DW5_RETRY_COUNT(0xfff) | SDMA_PKT_POLL_REGMEM_DW5_INTERVAL(10)); } @@ -1196,13 +1170,13 @@ static int sdma_v4_0_sw_init(void *handle) struct amdgpu_device *adev = (struct amdgpu_device *)handle; /* SDMA trap event */ - r = amdgpu_irq_add_id(adev, AMDGPU_IH_CLIENTID_SDMA0, 224, + r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_SDMA0, 224, &adev->sdma.trap_irq); if (r) return r; /* SDMA trap event */ - r = amdgpu_irq_add_id(adev, AMDGPU_IH_CLIENTID_SDMA1, 224, + r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_SDMA1, 224, &adev->sdma.trap_irq); if (r) return r; @@ -1357,7 +1331,7 @@ static int sdma_v4_0_process_trap_irq(struct amdgpu_device *adev, { DRM_DEBUG("IH: SDMA trap\n"); switch (entry->client_id) { - case AMDGPU_IH_CLIENTID_SDMA0: + case SOC15_IH_CLIENTID_SDMA0: switch (entry->ring_id) { case 0: amdgpu_fence_process(&adev->sdma.instance[0].ring); @@ -1373,7 +1347,7 @@ static int sdma_v4_0_process_trap_irq(struct amdgpu_device *adev, break; } break; - case AMDGPU_IH_CLIENTID_SDMA1: + case SOC15_IH_CLIENTID_SDMA1: switch (entry->ring_id) { case 0: amdgpu_fence_process(&adev->sdma.instance[1].ring); @@ -1423,7 +1397,7 @@ static void sdma_v4_0_update_medium_grain_clock_gating( if (def != data) WREG32(SOC15_REG_OFFSET(SDMA0, 0, mmSDMA0_CLK_CTRL), data); - if (adev->asic_type == CHIP_VEGA10) { + if (adev->sdma.num_instances > 1) { def = data = RREG32(SOC15_REG_OFFSET(SDMA1, 0, mmSDMA1_CLK_CTRL)); data &= ~(SDMA1_CLK_CTRL__SOFT_OVERRIDE7_MASK | SDMA1_CLK_CTRL__SOFT_OVERRIDE6_MASK | @@ -1451,7 +1425,7 @@ static void sdma_v4_0_update_medium_grain_clock_gating( if (def != data) WREG32(SOC15_REG_OFFSET(SDMA0, 0, mmSDMA0_CLK_CTRL), data); - if (adev->asic_type == CHIP_VEGA10) { + if (adev->sdma.num_instances > 1) { def = data = RREG32(SOC15_REG_OFFSET(SDMA1, 0, mmSDMA1_CLK_CTRL)); data |= (SDMA1_CLK_CTRL__SOFT_OVERRIDE7_MASK | SDMA1_CLK_CTRL__SOFT_OVERRIDE6_MASK | @@ -1482,7 +1456,7 @@ static void sdma_v4_0_update_medium_grain_light_sleep( WREG32(SOC15_REG_OFFSET(SDMA0, 0, mmSDMA0_POWER_CNTL), data); /* 1-not override: enable sdma1 mem light sleep */ - if (adev->asic_type == CHIP_VEGA10) { + if (adev->sdma.num_instances > 1) { def = data = RREG32(SOC15_REG_OFFSET(SDMA1, 0, mmSDMA1_POWER_CNTL)); data |= SDMA1_POWER_CNTL__MEM_POWER_OVERRIDE_MASK; if (def != data) @@ -1496,7 +1470,7 @@ static void sdma_v4_0_update_medium_grain_light_sleep( WREG32(SOC15_REG_OFFSET(SDMA0, 0, mmSDMA0_POWER_CNTL), data); /* 0-override:disable sdma1 mem light sleep */ - if (adev->asic_type == CHIP_VEGA10) { + if (adev->sdma.num_instances > 1) { def = data = RREG32(SOC15_REG_OFFSET(SDMA1, 0, mmSDMA1_POWER_CNTL)); data &= ~SDMA1_POWER_CNTL__MEM_POWER_OVERRIDE_MASK; if (def != data) @@ -1592,9 +1566,11 @@ static const struct amdgpu_ring_funcs sdma_v4_0_ring_funcs = { .set_wptr = sdma_v4_0_ring_set_wptr, .emit_frame_size = 6 + /* sdma_v4_0_ring_emit_hdp_flush */ - 3 + /* sdma_v4_0_ring_emit_hdp_invalidate */ + 3 + /* hdp invalidate */ 6 + /* sdma_v4_0_ring_emit_pipeline_sync */ - 18 + /* sdma_v4_0_ring_emit_vm_flush */ + /* sdma_v4_0_ring_emit_vm_flush */ + SOC15_FLUSH_GPU_TLB_NUM_WREG * 3 + + SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 6 + 10 + 10 + 10, /* sdma_v4_0_ring_emit_fence x3 for user fence, vm fence */ .emit_ib_size = 7 + 6, /* sdma_v4_0_ring_emit_ib */ .emit_ib = sdma_v4_0_ring_emit_ib, @@ -1602,11 +1578,12 @@ static const struct amdgpu_ring_funcs sdma_v4_0_ring_funcs = { .emit_pipeline_sync = sdma_v4_0_ring_emit_pipeline_sync, .emit_vm_flush = sdma_v4_0_ring_emit_vm_flush, .emit_hdp_flush = sdma_v4_0_ring_emit_hdp_flush, - .emit_hdp_invalidate = sdma_v4_0_ring_emit_hdp_invalidate, .test_ring = sdma_v4_0_ring_test_ring, .test_ib = sdma_v4_0_ring_test_ib, .insert_nop = sdma_v4_0_ring_insert_nop, .pad_ib = sdma_v4_0_ring_pad_ib, + .emit_wreg = sdma_v4_0_ring_emit_wreg, + .emit_reg_wait = sdma_v4_0_ring_emit_reg_wait, }; static void sdma_v4_0_set_ring_funcs(struct amdgpu_device *adev) @@ -1705,9 +1682,6 @@ static const struct amdgpu_vm_pte_funcs sdma_v4_0_vm_pte_funcs = { .copy_pte = sdma_v4_0_vm_copy_pte, .write_pte = sdma_v4_0_vm_write_pte, - - .set_max_nums_pte_pde = 0x400000 >> 3, - .set_pte_pde_num_dw = 10, .set_pte_pde = sdma_v4_0_vm_set_pte_pde, }; |