diff options
author | Sean Paul <seanpaul@chromium.org> | 2017-05-18 09:24:30 -0400 |
---|---|---|
committer | Sean Paul <seanpaul@chromium.org> | 2017-05-18 09:24:30 -0400 |
commit | 6b7781b42dc9bc9bcd1523b6c24b876cdda0bef3 (patch) | |
tree | ee55c67e4ea30b9eb44f301ba0bde2e631a26162 /drivers/gpu/drm/amd/amdgpu/vce_v4_0.c | |
parent | 52d9d38c183bf0e09601d875ea31bb53c05dd8cf (diff) | |
parent | e98c58e55f68f8785aebfab1f8c9a03d8de0afe1 (diff) |
Merge remote-tracking branch 'airlied/drm-next' into drm-misc-next
Picking up drm-next @ 4.12-rc1 in order to apply Michal Hocko's vmalloc patch set
Signed-off-by: Sean Paul <seanpaul@chromium.org>
Diffstat (limited to 'drivers/gpu/drm/amd/amdgpu/vce_v4_0.c')
-rw-r--r-- | drivers/gpu/drm/amd/amdgpu/vce_v4_0.c | 224 |
1 files changed, 80 insertions, 144 deletions
diff --git a/drivers/gpu/drm/amd/amdgpu/vce_v4_0.c b/drivers/gpu/drm/amd/amdgpu/vce_v4_0.c index edde5fe938d6..139f964196b4 100644 --- a/drivers/gpu/drm/amd/amdgpu/vce_v4_0.c +++ b/drivers/gpu/drm/amd/amdgpu/vce_v4_0.c @@ -49,63 +49,6 @@ static void vce_v4_0_mc_resume(struct amdgpu_device *adev); static void vce_v4_0_set_ring_funcs(struct amdgpu_device *adev); static void vce_v4_0_set_irq_funcs(struct amdgpu_device *adev); -static inline void mmsch_insert_direct_wt(struct mmsch_v1_0_cmd_direct_write *direct_wt, - uint32_t *init_table, - uint32_t reg_offset, - uint32_t value) -{ - direct_wt->cmd_header.reg_offset = reg_offset; - direct_wt->reg_value = value; - memcpy((void *)init_table, direct_wt, sizeof(struct mmsch_v1_0_cmd_direct_write)); -} - -static inline void mmsch_insert_direct_rd_mod_wt(struct mmsch_v1_0_cmd_direct_read_modify_write *direct_rd_mod_wt, - uint32_t *init_table, - uint32_t reg_offset, - uint32_t mask, uint32_t data) -{ - direct_rd_mod_wt->cmd_header.reg_offset = reg_offset; - direct_rd_mod_wt->mask_value = mask; - direct_rd_mod_wt->write_data = data; - memcpy((void *)init_table, direct_rd_mod_wt, - sizeof(struct mmsch_v1_0_cmd_direct_read_modify_write)); -} - -static inline void mmsch_insert_direct_poll(struct mmsch_v1_0_cmd_direct_polling *direct_poll, - uint32_t *init_table, - uint32_t reg_offset, - uint32_t mask, uint32_t wait) -{ - direct_poll->cmd_header.reg_offset = reg_offset; - direct_poll->mask_value = mask; - direct_poll->wait_value = wait; - memcpy((void *)init_table, direct_poll, sizeof(struct mmsch_v1_0_cmd_direct_polling)); -} - -#define INSERT_DIRECT_RD_MOD_WT(reg, mask, data) { \ - mmsch_insert_direct_rd_mod_wt(&direct_rd_mod_wt, \ - init_table, (reg), \ - (mask), (data)); \ - init_table += sizeof(struct mmsch_v1_0_cmd_direct_read_modify_write)/4; \ - table_size += sizeof(struct mmsch_v1_0_cmd_direct_read_modify_write)/4; \ -} - -#define INSERT_DIRECT_WT(reg, value) { \ - mmsch_insert_direct_wt(&direct_wt, \ - init_table, (reg), \ - (value)); \ - init_table += sizeof(struct mmsch_v1_0_cmd_direct_write)/4; \ - table_size += sizeof(struct mmsch_v1_0_cmd_direct_write)/4; \ -} - -#define INSERT_DIRECT_POLL(reg, mask, wait) { \ - mmsch_insert_direct_poll(&direct_poll, \ - init_table, (reg), \ - (mask), (wait)); \ - init_table += sizeof(struct mmsch_v1_0_cmd_direct_polling)/4; \ - table_size += sizeof(struct mmsch_v1_0_cmd_direct_polling)/4; \ -} - /** * vce_v4_0_ring_get_rptr - get read pointer * @@ -280,60 +223,73 @@ static int vce_v4_0_sriov_start(struct amdgpu_device *adev) init_table += header->vce_table_offset; ring = &adev->vce.ring[0]; - INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_RPTR), ring->wptr); - INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_WPTR), ring->wptr); - INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_BASE_LO), lower_32_bits(ring->gpu_addr)); - INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_BASE_HI), upper_32_bits(ring->gpu_addr)); - INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_SIZE), ring->ring_size / 4); + MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_BASE_LO), + lower_32_bits(ring->gpu_addr)); + MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_BASE_HI), + upper_32_bits(ring->gpu_addr)); + MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_RB_SIZE), + ring->ring_size / 4); /* BEGING OF MC_RESUME */ - INSERT_DIRECT_RD_MOD_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_CLOCK_GATING_A), ~(1 << 16), 0); - INSERT_DIRECT_RD_MOD_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_CLOCK_GATING), ~0xFF9FF000, 0x1FF000); - INSERT_DIRECT_RD_MOD_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_REG_CLOCK_GATING), ~0x3F, 0x3F); - INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_CLOCK_GATING_B), 0x1FF); - - INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_CTRL), 0x398000); - INSERT_DIRECT_RD_MOD_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_CACHE_CTRL), ~0x1, 0); - INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_SWAP_CNTL), 0); - INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_SWAP_CNTL1), 0); - INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_VM_CTRL), 0); - - INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_VCPU_CACHE_40BIT_BAR0), adev->vce.gpu_addr >> 8); - INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_VCPU_CACHE_40BIT_BAR1), adev->vce.gpu_addr >> 8); - INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_VCPU_CACHE_40BIT_BAR2), adev->vce.gpu_addr >> 8); + MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_CTRL), 0x398000); + MMSCH_V1_0_INSERT_DIRECT_RD_MOD_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_CACHE_CTRL), ~0x1, 0); + MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_SWAP_CNTL), 0); + MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_SWAP_CNTL1), 0); + MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_VM_CTRL), 0); + + if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) { + MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_VCPU_CACHE_40BIT_BAR0), + adev->firmware.ucode[AMDGPU_UCODE_ID_VCE].mc_addr >> 8); + MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_VCPU_CACHE_40BIT_BAR1), + adev->firmware.ucode[AMDGPU_UCODE_ID_VCE].mc_addr >> 8); + MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_VCPU_CACHE_40BIT_BAR2), + adev->firmware.ucode[AMDGPU_UCODE_ID_VCE].mc_addr >> 8); + } else { + MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_VCPU_CACHE_40BIT_BAR0), + adev->vce.gpu_addr >> 8); + MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_VCPU_CACHE_40BIT_BAR1), + adev->vce.gpu_addr >> 8); + MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_VCPU_CACHE_40BIT_BAR2), + adev->vce.gpu_addr >> 8); + } offset = AMDGPU_VCE_FIRMWARE_OFFSET; size = VCE_V4_0_FW_SIZE; - INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_VCPU_CACHE_OFFSET0), offset & 0x7FFFFFFF); - INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_VCPU_CACHE_SIZE0), size); + MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_VCPU_CACHE_OFFSET0), + offset & 0x7FFFFFFF); + MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_VCPU_CACHE_SIZE0), size); offset += size; size = VCE_V4_0_STACK_SIZE; - INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_VCPU_CACHE_OFFSET1), offset & 0x7FFFFFFF); - INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_VCPU_CACHE_SIZE1), size); + MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_VCPU_CACHE_OFFSET1), + offset & 0x7FFFFFFF); + MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_VCPU_CACHE_SIZE1), size); offset += size; size = VCE_V4_0_DATA_SIZE; - INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_VCPU_CACHE_OFFSET2), offset & 0x7FFFFFFF); - INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_VCPU_CACHE_SIZE2), size); + MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_VCPU_CACHE_OFFSET2), + offset & 0x7FFFFFFF); + MMSCH_V1_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_VCPU_CACHE_SIZE2), size); - INSERT_DIRECT_RD_MOD_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_CTRL2), ~0x100, 0); - INSERT_DIRECT_RD_MOD_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_SYS_INT_EN), - 0xffffffff, VCE_SYS_INT_EN__VCE_SYS_INT_TRAP_INTERRUPT_EN_MASK); + MMSCH_V1_0_INSERT_DIRECT_RD_MOD_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_CTRL2), ~0x100, 0); + MMSCH_V1_0_INSERT_DIRECT_RD_MOD_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_SYS_INT_EN), + 0xffffffff, VCE_SYS_INT_EN__VCE_SYS_INT_TRAP_INTERRUPT_EN_MASK); /* end of MC_RESUME */ - INSERT_DIRECT_RD_MOD_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_VCPU_CNTL), - ~0x200001, VCE_VCPU_CNTL__CLK_EN_MASK); - INSERT_DIRECT_RD_MOD_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_SOFT_RESET), - ~VCE_SOFT_RESET__ECPU_SOFT_RESET_MASK, 0); + MMSCH_V1_0_INSERT_DIRECT_RD_MOD_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_STATUS), + VCE_STATUS__JOB_BUSY_MASK, ~VCE_STATUS__JOB_BUSY_MASK); + MMSCH_V1_0_INSERT_DIRECT_RD_MOD_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_VCPU_CNTL), + ~0x200001, VCE_VCPU_CNTL__CLK_EN_MASK); + MMSCH_V1_0_INSERT_DIRECT_RD_MOD_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_SOFT_RESET), + ~VCE_SOFT_RESET__ECPU_SOFT_RESET_MASK, 0); - INSERT_DIRECT_POLL(SOC15_REG_OFFSET(VCE, 0, mmVCE_STATUS), - VCE_STATUS_VCPU_REPORT_FW_LOADED_MASK, - VCE_STATUS_VCPU_REPORT_FW_LOADED_MASK); + MMSCH_V1_0_INSERT_DIRECT_POLL(SOC15_REG_OFFSET(VCE, 0, mmVCE_STATUS), + VCE_STATUS_VCPU_REPORT_FW_LOADED_MASK, + VCE_STATUS_VCPU_REPORT_FW_LOADED_MASK); /* clear BUSY flag */ - INSERT_DIRECT_RD_MOD_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_STATUS), - ~VCE_STATUS__JOB_BUSY_MASK, 0); + MMSCH_V1_0_INSERT_DIRECT_RD_MOD_WT(SOC15_REG_OFFSET(VCE, 0, mmVCE_STATUS), + ~VCE_STATUS__JOB_BUSY_MASK, 0); /* add end packet */ memcpy((void *)init_table, &end, sizeof(struct mmsch_v1_0_cmd_end)); @@ -494,20 +450,9 @@ static int vce_v4_0_sw_init(void *handle) return r; } - if (amdgpu_sriov_vf(adev)) { - r = amdgpu_bo_create_kernel(adev, PAGE_SIZE, PAGE_SIZE, - AMDGPU_GEM_DOMAIN_VRAM, - &adev->virt.mm_table.bo, - &adev->virt.mm_table.gpu_addr, - (void *)&adev->virt.mm_table.cpu_addr); - if (!r) { - memset((void *)adev->virt.mm_table.cpu_addr, 0, PAGE_SIZE); - printk("mm table gpu addr = 0x%llx, cpu addr = %p. \n", - adev->virt.mm_table.gpu_addr, - adev->virt.mm_table.cpu_addr); - } + r = amdgpu_virt_alloc_mm_table(adev); + if (r) return r; - } return r; } @@ -518,10 +463,7 @@ static int vce_v4_0_sw_fini(void *handle) struct amdgpu_device *adev = (struct amdgpu_device *)handle; /* free MM table */ - if (amdgpu_sriov_vf(adev)) - amdgpu_bo_free_kernel(&adev->virt.mm_table.bo, - &adev->virt.mm_table.gpu_addr, - (void *)&adev->virt.mm_table.cpu_addr); + amdgpu_virt_free_mm_table(adev); r = amdgpu_vce_suspend(adev); if (r) @@ -973,44 +915,37 @@ static void vce_v4_0_ring_insert_end(struct amdgpu_ring *ring) static void vce_v4_0_emit_vm_flush(struct amdgpu_ring *ring, unsigned int vm_id, uint64_t pd_addr) { + struct amdgpu_vmhub *hub = &ring->adev->vmhub[ring->funcs->vmhub]; uint32_t req = ring->adev->gart.gart_funcs->get_invalidate_req(vm_id); - unsigned eng = ring->idx; - unsigned i; + unsigned eng = ring->vm_inv_eng; pd_addr = pd_addr | 0x1; /* valid bit */ /* now only use physical base address of PDE and valid */ BUG_ON(pd_addr & 0xFFFF00000000003EULL); - for (i = 0; i < AMDGPU_MAX_VMHUBS; ++i) { - struct amdgpu_vmhub *hub = &ring->adev->vmhub[i]; - - amdgpu_ring_write(ring, VCE_CMD_REG_WRITE); - amdgpu_ring_write(ring, - (hub->ctx0_ptb_addr_hi32 + vm_id * 2) << 2); - amdgpu_ring_write(ring, upper_32_bits(pd_addr)); - - amdgpu_ring_write(ring, VCE_CMD_REG_WRITE); - amdgpu_ring_write(ring, - (hub->ctx0_ptb_addr_lo32 + vm_id * 2) << 2); - amdgpu_ring_write(ring, lower_32_bits(pd_addr)); - - amdgpu_ring_write(ring, VCE_CMD_REG_WAIT); - amdgpu_ring_write(ring, - (hub->ctx0_ptb_addr_lo32 + vm_id * 2) << 2); - amdgpu_ring_write(ring, 0xffffffff); - amdgpu_ring_write(ring, lower_32_bits(pd_addr)); - - /* flush TLB */ - amdgpu_ring_write(ring, VCE_CMD_REG_WRITE); - amdgpu_ring_write(ring, (hub->vm_inv_eng0_req + eng) << 2); - amdgpu_ring_write(ring, req); - - /* wait for flush */ - amdgpu_ring_write(ring, VCE_CMD_REG_WAIT); - amdgpu_ring_write(ring, (hub->vm_inv_eng0_ack + eng) << 2); - amdgpu_ring_write(ring, 1 << vm_id); - amdgpu_ring_write(ring, 1 << vm_id); - } + amdgpu_ring_write(ring, VCE_CMD_REG_WRITE); + amdgpu_ring_write(ring, (hub->ctx0_ptb_addr_hi32 + vm_id * 2) << 2); + amdgpu_ring_write(ring, upper_32_bits(pd_addr)); + + amdgpu_ring_write(ring, VCE_CMD_REG_WRITE); + amdgpu_ring_write(ring, (hub->ctx0_ptb_addr_lo32 + vm_id * 2) << 2); + amdgpu_ring_write(ring, lower_32_bits(pd_addr)); + + amdgpu_ring_write(ring, VCE_CMD_REG_WAIT); + amdgpu_ring_write(ring, (hub->ctx0_ptb_addr_lo32 + vm_id * 2) << 2); + amdgpu_ring_write(ring, 0xffffffff); + amdgpu_ring_write(ring, lower_32_bits(pd_addr)); + + /* flush TLB */ + amdgpu_ring_write(ring, VCE_CMD_REG_WRITE); + amdgpu_ring_write(ring, (hub->vm_inv_eng0_req + eng) << 2); + amdgpu_ring_write(ring, req); + + /* wait for flush */ + amdgpu_ring_write(ring, VCE_CMD_REG_WAIT); + amdgpu_ring_write(ring, (hub->vm_inv_eng0_ack + eng) << 2); + amdgpu_ring_write(ring, 1 << vm_id); + amdgpu_ring_write(ring, 1 << vm_id); } static int vce_v4_0_set_interrupt_state(struct amdgpu_device *adev, @@ -1078,12 +1013,13 @@ static const struct amdgpu_ring_funcs vce_v4_0_ring_vm_funcs = { .align_mask = 0x3f, .nop = VCE_CMD_NO_OP, .support_64bit_ptrs = false, + .vmhub = AMDGPU_MMHUB, .get_rptr = vce_v4_0_ring_get_rptr, .get_wptr = vce_v4_0_ring_get_wptr, .set_wptr = vce_v4_0_ring_set_wptr, .parse_cs = amdgpu_vce_ring_parse_cs_vm, .emit_frame_size = - 17 * AMDGPU_MAX_VMHUBS + /* vce_v4_0_emit_vm_flush */ + 17 + /* vce_v4_0_emit_vm_flush */ 5 + 5 + /* amdgpu_vce_ring_emit_fence x2 vm fence */ 1, /* vce_v4_0_ring_insert_end */ .emit_ib_size = 5, /* vce_v4_0_ring_emit_ib */ |