From e684e654eba9481a9f462a7bbf5c385b7d1c076e Mon Sep 17 00:00:00 2001 From: James Zhu Date: Thu, 6 Jan 2022 17:04:42 -0500 Subject: drm/amdgpu/jpeg: add jpeg support for VCN4_0_3 Add jpeg support for VCN4_0_3. v2: squash in delayed work typo fix (Alex) Signed-off-by: James Zhu Reviewed-by: Leo Liu Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/jpeg_v4_0_3.c | 762 +++++++++++++++++++++++++++++++ 1 file changed, 762 insertions(+) create mode 100644 drivers/gpu/drm/amd/amdgpu/jpeg_v4_0_3.c (limited to 'drivers/gpu/drm/amd/amdgpu/jpeg_v4_0_3.c') diff --git a/drivers/gpu/drm/amd/amdgpu/jpeg_v4_0_3.c b/drivers/gpu/drm/amd/amdgpu/jpeg_v4_0_3.c new file mode 100644 index 000000000000..1fc72f9b52ed --- /dev/null +++ b/drivers/gpu/drm/amd/amdgpu/jpeg_v4_0_3.c @@ -0,0 +1,762 @@ +/* + * Copyright 2022 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + */ + +#include "amdgpu.h" +#include "amdgpu_jpeg.h" +#include "soc15.h" +#include "soc15d.h" +#include "jpeg_v4_0_3.h" + +#include "vcn/vcn_4_0_3_offset.h" +#include "vcn/vcn_4_0_3_sh_mask.h" +#include "ivsrcid/vcn/irqsrcs_vcn_2_0.h" + +enum jpeg_engin_status { + UVD_PGFSM_STATUS__UVDJ_PWR_ON = 0, + UVD_PGFSM_STATUS__UVDJ_PWR_OFF = 2, +}; + +static void jpeg_v4_0_3_set_dec_ring_funcs(struct amdgpu_device *adev); +static void jpeg_v4_0_3_set_irq_funcs(struct amdgpu_device *adev); +static int jpeg_v4_0_3_set_powergating_state(void *handle, + enum amd_powergating_state state); + +/** + * jpeg_v4_0_3_early_init - set function pointers + * + * @handle: amdgpu_device pointer + * + * Set ring and irq function pointers + */ +static int jpeg_v4_0_3_early_init(void *handle) +{ + struct amdgpu_device *adev = (struct amdgpu_device *)handle; + + jpeg_v4_0_3_set_dec_ring_funcs(adev); + jpeg_v4_0_3_set_irq_funcs(adev); + + return 0; +} + +/** + * jpeg_v4_0_3_sw_init - sw init for JPEG block + * + * @handle: amdgpu_device pointer + * + * Load firmware and sw initialization + */ +static int jpeg_v4_0_3_sw_init(void *handle) +{ + struct amdgpu_device *adev = (struct amdgpu_device *)handle; + struct amdgpu_ring *ring; + int r; + + /* JPEG TRAP */ + r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_VCN, + VCN_2_0__SRCID__JPEG_DECODE, &adev->jpeg.inst->irq); + if (r) + return r; + + r = amdgpu_jpeg_sw_init(adev); + if (r) + return r; + + r = amdgpu_jpeg_resume(adev); + if (r) + return r; + + ring = &adev->jpeg.inst->ring_dec; + ring->use_doorbell = false; + ring->vm_hub = AMDGPU_MMHUB0(0); + sprintf(ring->name, "jpeg_dec"); + r = amdgpu_ring_init(adev, ring, 512, &adev->jpeg.inst->irq, 0, + AMDGPU_RING_PRIO_DEFAULT, NULL); + if (r) + return r; + + adev->jpeg.internal.jpeg_pitch = regUVD_JPEG_PITCH_INTERNAL_OFFSET; + adev->jpeg.inst->external.jpeg_pitch = SOC15_REG_OFFSET(JPEG, 0, regUVD_JPEG_PITCH); + + return 0; +} + +/** + * jpeg_v4_0_3_sw_fini - sw fini for JPEG block + * + * @handle: amdgpu_device pointer + * + * JPEG suspend and free up sw allocation + */ +static int jpeg_v4_0_3_sw_fini(void *handle) +{ + struct amdgpu_device *adev = (struct amdgpu_device *)handle; + int r; + + r = amdgpu_jpeg_suspend(adev); + if (r) + return r; + + r = amdgpu_jpeg_sw_fini(adev); + + return r; +} + +/** + * jpeg_v4_0_3_hw_init - start and test JPEG block + * + * @handle: amdgpu_device pointer + * + */ +static int jpeg_v4_0_3_hw_init(void *handle) +{ + struct amdgpu_device *adev = (struct amdgpu_device *)handle; + struct amdgpu_ring *ring = &adev->jpeg.inst->ring_dec; + int r; + + r = amdgpu_ring_test_helper(ring); + if (!r) + DRM_DEV_INFO(adev->dev, "JPEG decode initialized successfully.\n"); + + return r; +} + +/** + * jpeg_v4_0_3_hw_fini - stop the hardware block + * + * @handle: amdgpu_device pointer + * + * Stop the JPEG block, mark ring as not ready any more + */ +static int jpeg_v4_0_3_hw_fini(void *handle) +{ + struct amdgpu_device *adev = (struct amdgpu_device *)handle; + + cancel_delayed_work_sync(&adev->jpeg.idle_work); + + if (adev->jpeg.cur_state != AMD_PG_STATE_GATE) + jpeg_v4_0_3_set_powergating_state(adev, AMD_PG_STATE_GATE); + + return 0; +} + +/** + * jpeg_v4_0_3_suspend - suspend JPEG block + * + * @handle: amdgpu_device pointer + * + * HW fini and suspend JPEG block + */ +static int jpeg_v4_0_3_suspend(void *handle) +{ + struct amdgpu_device *adev = (struct amdgpu_device *)handle; + int r; + + r = jpeg_v4_0_3_hw_fini(adev); + if (r) + return r; + + r = amdgpu_jpeg_suspend(adev); + + return r; +} + +/** + * jpeg_v4_0_3_resume - resume JPEG block + * + * @handle: amdgpu_device pointer + * + * Resume firmware and hw init JPEG block + */ +static int jpeg_v4_0_3_resume(void *handle) +{ + struct amdgpu_device *adev = (struct amdgpu_device *)handle; + int r; + + r = amdgpu_jpeg_resume(adev); + if (r) + return r; + + r = jpeg_v4_0_3_hw_init(adev); + + return r; +} + +static void jpeg_v4_0_3_disable_clock_gating(struct amdgpu_device *adev) +{ + uint32_t data; + + data = RREG32_SOC15(JPEG, 0, regJPEG_CGC_CTRL); + if (adev->cg_flags & AMD_CG_SUPPORT_JPEG_MGCG) + data |= 1 << JPEG_CGC_CTRL__DYN_CLOCK_MODE__SHIFT; + else + data &= ~JPEG_CGC_CTRL__DYN_CLOCK_MODE__SHIFT; + + data |= 1 << JPEG_CGC_CTRL__CLK_GATE_DLY_TIMER__SHIFT; + data |= 4 << JPEG_CGC_CTRL__CLK_OFF_DELAY__SHIFT; + WREG32_SOC15(JPEG, 0, regJPEG_CGC_CTRL, data); + + data = RREG32_SOC15(JPEG, 0, regJPEG_CGC_GATE); + data &= ~(JPEG_CGC_GATE__JPEG0_DEC_MASK + | JPEG_CGC_GATE__JPEG2_DEC_MASK + | JPEG_CGC_GATE__JMCIF_MASK + | JPEG_CGC_GATE__JRBBM_MASK); + WREG32_SOC15(JPEG, 0, regJPEG_CGC_GATE, data); +} + +static void jpeg_v4_0_3_enable_clock_gating(struct amdgpu_device *adev) +{ + uint32_t data; + + data = RREG32_SOC15(JPEG, 0, regJPEG_CGC_CTRL); + if (adev->cg_flags & AMD_CG_SUPPORT_JPEG_MGCG) + data |= 1 << JPEG_CGC_CTRL__DYN_CLOCK_MODE__SHIFT; + else + data &= ~JPEG_CGC_CTRL__DYN_CLOCK_MODE__SHIFT; + + data |= 1 << JPEG_CGC_CTRL__CLK_GATE_DLY_TIMER__SHIFT; + data |= 4 << JPEG_CGC_CTRL__CLK_OFF_DELAY__SHIFT; + WREG32_SOC15(JPEG, 0, regJPEG_CGC_CTRL, data); + + data = RREG32_SOC15(JPEG, 0, regJPEG_CGC_GATE); + data |= (JPEG_CGC_GATE__JPEG0_DEC_MASK + |JPEG_CGC_GATE__JPEG2_DEC_MASK + |JPEG_CGC_GATE__JMCIF_MASK + |JPEG_CGC_GATE__JRBBM_MASK); + WREG32_SOC15(JPEG, 0, regJPEG_CGC_GATE, data); +} + +/** + * jpeg_v4_0_3_start - start JPEG block + * + * @adev: amdgpu_device pointer + * + * Setup and start the JPEG block + */ +static int jpeg_v4_0_3_start(struct amdgpu_device *adev) +{ + struct amdgpu_ring *ring = &adev->jpeg.inst->ring_dec; + + WREG32_SOC15(JPEG, 0, regUVD_PGFSM_CONFIG, + 1 << UVD_PGFSM_CONFIG__UVDJ_PWR_CONFIG__SHIFT); + SOC15_WAIT_ON_RREG(JPEG, 0, regUVD_PGFSM_STATUS, + UVD_PGFSM_STATUS__UVDJ_PWR_ON << + UVD_PGFSM_STATUS__UVDJ_PWR_STATUS__SHIFT, + UVD_PGFSM_STATUS__UVDJ_PWR_STATUS_MASK); + + /* disable anti hang mechanism */ + WREG32_P(SOC15_REG_OFFSET(JPEG, 0, regUVD_JPEG_POWER_STATUS), 0, + ~UVD_JPEG_POWER_STATUS__JPEG_POWER_STATUS_MASK); + + /* JPEG disable CGC */ + jpeg_v4_0_3_disable_clock_gating(adev); + + /* MJPEG global tiling registers */ + WREG32_SOC15(JPEG, 0, regJPEG_DEC_GFX8_ADDR_CONFIG, + adev->gfx.config.gb_addr_config); + WREG32_SOC15(JPEG, 0, regJPEG_DEC_GFX10_ADDR_CONFIG, + adev->gfx.config.gb_addr_config); + + /* enable JMI channel */ + WREG32_P(SOC15_REG_OFFSET(JPEG, 0, regUVD_JMI_CNTL), 0, + ~UVD_JMI_CNTL__SOFT_RESET_MASK); + + /* enable System Interrupt for JRBC */ + WREG32_P(SOC15_REG_OFFSET(JPEG, 0, regJPEG_SYS_INT_EN), + JPEG_SYS_INT_EN__DJRBC0_MASK, + ~JPEG_SYS_INT_EN__DJRBC0_MASK); + + WREG32_SOC15(JPEG, 0, regUVD_JMI0_UVD_LMI_JRBC_RB_VMID, 0); + WREG32_SOC15(JPEG, 0, regUVD_JRBC0_UVD_JRBC_RB_CNTL, (0x00000001L | 0x00000002L)); + WREG32_SOC15(JPEG, 0, regUVD_JMI0_UVD_LMI_JRBC_RB_64BIT_BAR_LOW, + lower_32_bits(ring->gpu_addr)); + WREG32_SOC15(JPEG, 0, regUVD_JMI0_UVD_LMI_JRBC_RB_64BIT_BAR_HIGH, + upper_32_bits(ring->gpu_addr)); + WREG32_SOC15(JPEG, 0, regUVD_JRBC0_UVD_JRBC_RB_RPTR, 0); + WREG32_SOC15(JPEG, 0, regUVD_JRBC0_UVD_JRBC_RB_WPTR, 0); + WREG32_SOC15(JPEG, 0, regUVD_JRBC0_UVD_JRBC_RB_CNTL, 0x00000002L); + WREG32_SOC15(JPEG, 0, regUVD_JRBC0_UVD_JRBC_RB_SIZE, ring->ring_size / 4); + ring->wptr = RREG32_SOC15(JPEG, 0, regUVD_JRBC0_UVD_JRBC_RB_WPTR); + + return 0; +} + +/** + * jpeg_v4_0_3_stop - stop JPEG block + * + * @adev: amdgpu_device pointer + * + * stop the JPEG block + */ +static int jpeg_v4_0_3_stop(struct amdgpu_device *adev) +{ + /* reset JMI */ + WREG32_P(SOC15_REG_OFFSET(JPEG, 0, regUVD_JMI_CNTL), + UVD_JMI_CNTL__SOFT_RESET_MASK, + ~UVD_JMI_CNTL__SOFT_RESET_MASK); + + jpeg_v4_0_3_enable_clock_gating(adev); + + /* enable anti hang mechanism */ + WREG32_P(SOC15_REG_OFFSET(JPEG, 0, regUVD_JPEG_POWER_STATUS), + UVD_JPEG_POWER_STATUS__JPEG_POWER_STATUS_MASK, + ~UVD_JPEG_POWER_STATUS__JPEG_POWER_STATUS_MASK); + + WREG32_SOC15(JPEG, 0, regUVD_PGFSM_CONFIG, + 2 << UVD_PGFSM_CONFIG__UVDJ_PWR_CONFIG__SHIFT); + SOC15_WAIT_ON_RREG(JPEG, 0, regUVD_PGFSM_STATUS, + UVD_PGFSM_STATUS__UVDJ_PWR_OFF << + UVD_PGFSM_STATUS__UVDJ_PWR_STATUS__SHIFT, + UVD_PGFSM_STATUS__UVDJ_PWR_STATUS_MASK); + + return 0; +} + +/** + * jpeg_v4_0_3_dec_ring_get_rptr - get read pointer + * + * @ring: amdgpu_ring pointer + * + * Returns the current hardware read pointer + */ +static uint64_t jpeg_v4_0_3_dec_ring_get_rptr(struct amdgpu_ring *ring) +{ + struct amdgpu_device *adev = ring->adev; + + return RREG32_SOC15(JPEG, ring->me, regUVD_JRBC0_UVD_JRBC_RB_RPTR); +} + +/** + * jpeg_v4_0_3_dec_ring_get_wptr - get write pointer + * + * @ring: amdgpu_ring pointer + * + * Returns the current hardware write pointer + */ +static uint64_t jpeg_v4_0_3_dec_ring_get_wptr(struct amdgpu_ring *ring) +{ + struct amdgpu_device *adev = ring->adev; + + if (ring->use_doorbell) + return adev->wb.wb[ring->wptr_offs]; + else + return RREG32_SOC15(JPEG, ring->me, regUVD_JRBC0_UVD_JRBC_RB_WPTR); +} + +/** + * jpeg_v4_0_3_dec_ring_set_wptr - set write pointer + * + * @ring: amdgpu_ring pointer + * + * Commits the write pointer to the hardware + */ +static void jpeg_v4_0_3_dec_ring_set_wptr(struct amdgpu_ring *ring) +{ + struct amdgpu_device *adev = ring->adev; + + if (ring->use_doorbell) { + adev->wb.wb[ring->wptr_offs] = lower_32_bits(ring->wptr); + WDOORBELL32(ring->doorbell_index, lower_32_bits(ring->wptr)); + } else { + WREG32_SOC15(JPEG, ring->me, + regUVD_JRBC0_UVD_JRBC_RB_WPTR, lower_32_bits(ring->wptr)); + } +} + +/** + * jpeg_v4_0_3_dec_ring_insert_start - insert a start command + * + * @ring: amdgpu_ring pointer + * + * Write a start command to the ring. + */ +static void jpeg_v4_0_3_dec_ring_insert_start(struct amdgpu_ring *ring) +{ + amdgpu_ring_write(ring, PACKETJ(regUVD_JRBC_EXTERNAL_REG_INTERNAL_OFFSET, + 0, 0, PACKETJ_TYPE0)); + amdgpu_ring_write(ring, 0x62a04);/* TODO: PCTL0_MMHUB_DEEPSLEEP_IB */ + + amdgpu_ring_write(ring, PACKETJ(JRBC_DEC_EXTERNAL_REG_WRITE_ADDR, + 0, 0, PACKETJ_TYPE0)); + amdgpu_ring_write(ring, 0x80004000); +} + +/** + * jpeg_v4_0_3_dec_ring_insert_end - insert a end command + * + * @ring: amdgpu_ring pointer + * + * Write a end command to the ring. + */ +static void jpeg_v4_0_3_dec_ring_insert_end(struct amdgpu_ring *ring) +{ + amdgpu_ring_write(ring, PACKETJ(regUVD_JRBC_EXTERNAL_REG_INTERNAL_OFFSET, + 0, 0, PACKETJ_TYPE0)); + amdgpu_ring_write(ring, 0x62a04); + + amdgpu_ring_write(ring, PACKETJ(JRBC_DEC_EXTERNAL_REG_WRITE_ADDR, + 0, 0, PACKETJ_TYPE0)); + amdgpu_ring_write(ring, 0x00004000); +} + +/** + * jpeg_v4_0_3_dec_ring_emit_fence - emit an fence & trap command + * + * @ring: amdgpu_ring pointer + * @addr: address + * @seq: sequence number + * @flags: fence related flags + * + * Write a fence and a trap command to the ring. + */ +static void jpeg_v4_0_3_dec_ring_emit_fence(struct amdgpu_ring *ring, u64 addr, u64 seq, + unsigned int flags) +{ + WARN_ON(flags & AMDGPU_FENCE_FLAG_64BIT); + + amdgpu_ring_write(ring, PACKETJ(regUVD_JPEG_GPCOM_DATA0_INTERNAL_OFFSET, + 0, 0, PACKETJ_TYPE0)); + amdgpu_ring_write(ring, seq); + + amdgpu_ring_write(ring, PACKETJ(regUVD_JPEG_GPCOM_DATA1_INTERNAL_OFFSET, + 0, 0, PACKETJ_TYPE0)); + amdgpu_ring_write(ring, seq); + + amdgpu_ring_write(ring, PACKETJ(regUVD_LMI_JRBC_RB_MEM_WR_64BIT_BAR_LOW_INTERNAL_OFFSET, + 0, 0, PACKETJ_TYPE0)); + amdgpu_ring_write(ring, lower_32_bits(addr)); + + amdgpu_ring_write(ring, PACKETJ(regUVD_LMI_JRBC_RB_MEM_WR_64BIT_BAR_HIGH_INTERNAL_OFFSET, + 0, 0, PACKETJ_TYPE0)); + amdgpu_ring_write(ring, upper_32_bits(addr)); + + amdgpu_ring_write(ring, PACKETJ(regUVD_JPEG_GPCOM_CMD_INTERNAL_OFFSET, + 0, 0, PACKETJ_TYPE0)); + amdgpu_ring_write(ring, 0x8); + + amdgpu_ring_write(ring, PACKETJ(regUVD_JPEG_GPCOM_CMD_INTERNAL_OFFSET, + 0, PACKETJ_CONDITION_CHECK0, PACKETJ_TYPE4)); + amdgpu_ring_write(ring, 0); + + amdgpu_ring_write(ring, PACKETJ(regUVD_JRBC_EXTERNAL_REG_INTERNAL_OFFSET, + 0, 0, PACKETJ_TYPE0)); + amdgpu_ring_write(ring, 0x3fbc); + + amdgpu_ring_write(ring, PACKETJ(JRBC_DEC_EXTERNAL_REG_WRITE_ADDR, + 0, 0, PACKETJ_TYPE0)); + amdgpu_ring_write(ring, 0x1); + + amdgpu_ring_write(ring, PACKETJ(0, 0, 0, PACKETJ_TYPE7)); + amdgpu_ring_write(ring, 0); +} + +/** + * jpeg_v4_0_3_dec_ring_emit_ib - execute indirect buffer + * + * @ring: amdgpu_ring pointer + * @job: job to retrieve vmid from + * @ib: indirect buffer to execute + * @flags: unused + * + * Write ring commands to execute the indirect buffer. + */ +static void jpeg_v4_0_3_dec_ring_emit_ib(struct amdgpu_ring *ring, + struct amdgpu_job *job, + struct amdgpu_ib *ib, + uint32_t flags) +{ + unsigned int vmid = AMDGPU_JOB_GET_VMID(job); + + amdgpu_ring_write(ring, PACKETJ(regUVD_LMI_JRBC_IB_VMID_INTERNAL_OFFSET, + 0, 0, PACKETJ_TYPE0)); + amdgpu_ring_write(ring, (vmid | (vmid << 4))); + + amdgpu_ring_write(ring, PACKETJ(regUVD_LMI_JPEG_VMID_INTERNAL_OFFSET, + 0, 0, PACKETJ_TYPE0)); + amdgpu_ring_write(ring, (vmid | (vmid << 4))); + + amdgpu_ring_write(ring, PACKETJ(regUVD_LMI_JRBC_IB_64BIT_BAR_LOW_INTERNAL_OFFSET, + 0, 0, PACKETJ_TYPE0)); + amdgpu_ring_write(ring, lower_32_bits(ib->gpu_addr)); + + amdgpu_ring_write(ring, PACKETJ(regUVD_LMI_JRBC_IB_64BIT_BAR_HIGH_INTERNAL_OFFSET, + 0, 0, PACKETJ_TYPE0)); + amdgpu_ring_write(ring, upper_32_bits(ib->gpu_addr)); + + amdgpu_ring_write(ring, PACKETJ(regUVD_JRBC_IB_SIZE_INTERNAL_OFFSET, + 0, 0, PACKETJ_TYPE0)); + amdgpu_ring_write(ring, ib->length_dw); + + amdgpu_ring_write(ring, PACKETJ(regUVD_LMI_JRBC_RB_MEM_RD_64BIT_BAR_LOW_INTERNAL_OFFSET, + 0, 0, PACKETJ_TYPE0)); + amdgpu_ring_write(ring, lower_32_bits(ring->gpu_addr)); + + amdgpu_ring_write(ring, PACKETJ(regUVD_LMI_JRBC_RB_MEM_RD_64BIT_BAR_HIGH_INTERNAL_OFFSET, + 0, 0, PACKETJ_TYPE0)); + amdgpu_ring_write(ring, upper_32_bits(ring->gpu_addr)); + + amdgpu_ring_write(ring, PACKETJ(0, 0, PACKETJ_CONDITION_CHECK0, PACKETJ_TYPE2)); + amdgpu_ring_write(ring, 0); + + amdgpu_ring_write(ring, PACKETJ(regUVD_JRBC_RB_COND_RD_TIMER_INTERNAL_OFFSET, + 0, 0, PACKETJ_TYPE0)); + amdgpu_ring_write(ring, 0x01400200); + + amdgpu_ring_write(ring, PACKETJ(regUVD_JRBC_RB_REF_DATA_INTERNAL_OFFSET, + 0, 0, PACKETJ_TYPE0)); + amdgpu_ring_write(ring, 0x2); + + amdgpu_ring_write(ring, PACKETJ(regUVD_JRBC_STATUS_INTERNAL_OFFSET, + 0, PACKETJ_CONDITION_CHECK3, PACKETJ_TYPE3)); + amdgpu_ring_write(ring, 0x2); +} + +static void jpeg_v4_0_3_dec_ring_emit_reg_wait(struct amdgpu_ring *ring, uint32_t reg, + uint32_t val, uint32_t mask) +{ + uint32_t reg_offset = (reg << 2); + + amdgpu_ring_write(ring, PACKETJ(regUVD_JRBC_RB_COND_RD_TIMER_INTERNAL_OFFSET, + 0, 0, PACKETJ_TYPE0)); + amdgpu_ring_write(ring, 0x01400200); + + amdgpu_ring_write(ring, PACKETJ(regUVD_JRBC_RB_REF_DATA_INTERNAL_OFFSET, + 0, 0, PACKETJ_TYPE0)); + amdgpu_ring_write(ring, val); + + amdgpu_ring_write(ring, PACKETJ(regUVD_JRBC_EXTERNAL_REG_INTERNAL_OFFSET, + 0, 0, PACKETJ_TYPE0)); + if (reg_offset >= 0x10000 && reg_offset <= 0x105ff) { + amdgpu_ring_write(ring, 0); + amdgpu_ring_write(ring, + PACKETJ((reg_offset >> 2), 0, 0, PACKETJ_TYPE3)); + } else { + amdgpu_ring_write(ring, reg_offset); + amdgpu_ring_write(ring, PACKETJ(JRBC_DEC_EXTERNAL_REG_WRITE_ADDR, + 0, 0, PACKETJ_TYPE3)); + } + amdgpu_ring_write(ring, mask); +} + +static void jpeg_v4_0_3_dec_ring_emit_vm_flush(struct amdgpu_ring *ring, + unsigned int vmid, uint64_t pd_addr) +{ + struct amdgpu_vmhub *hub = &ring->adev->vmhub[ring->vm_hub]; + uint32_t data0, data1, mask; + + pd_addr = amdgpu_gmc_emit_flush_gpu_tlb(ring, vmid, pd_addr); + + /* wait for register write */ + data0 = hub->ctx0_ptb_addr_lo32 + vmid * hub->ctx_addr_distance; + data1 = lower_32_bits(pd_addr); + mask = 0xffffffff; + jpeg_v4_0_3_dec_ring_emit_reg_wait(ring, data0, data1, mask); +} + +static void jpeg_v4_0_3_dec_ring_emit_wreg(struct amdgpu_ring *ring, uint32_t reg, uint32_t val) +{ + uint32_t reg_offset = (reg << 2); + + amdgpu_ring_write(ring, PACKETJ(regUVD_JRBC_EXTERNAL_REG_INTERNAL_OFFSET, + 0, 0, PACKETJ_TYPE0)); + if (reg_offset >= 0x10000 && reg_offset <= 0x105ff) { + amdgpu_ring_write(ring, 0); + amdgpu_ring_write(ring, + PACKETJ((reg_offset >> 2), 0, 0, PACKETJ_TYPE0)); + } else { + amdgpu_ring_write(ring, reg_offset); + amdgpu_ring_write(ring, PACKETJ(JRBC_DEC_EXTERNAL_REG_WRITE_ADDR, + 0, 0, PACKETJ_TYPE0)); + } + amdgpu_ring_write(ring, val); +} + +static void jpeg_v4_0_3_dec_ring_nop(struct amdgpu_ring *ring, uint32_t count) +{ + int i; + + WARN_ON(ring->wptr % 2 || count % 2); + + for (i = 0; i < count / 2; i++) { + amdgpu_ring_write(ring, PACKETJ(0, 0, 0, PACKETJ_TYPE6)); + amdgpu_ring_write(ring, 0); + } +} + +static bool jpeg_v4_0_3_is_idle(void *handle) +{ + struct amdgpu_device *adev = (struct amdgpu_device *)handle; + + return ((RREG32_SOC15(JPEG, 0, regUVD_JRBC0_UVD_JRBC_STATUS) & + UVD_JRBC0_UVD_JRBC_STATUS__RB_JOB_DONE_MASK) == + UVD_JRBC0_UVD_JRBC_STATUS__RB_JOB_DONE_MASK); +} + +static int jpeg_v4_0_3_wait_for_idle(void *handle) +{ + struct amdgpu_device *adev = (struct amdgpu_device *)handle; + int ret; + + ret = SOC15_WAIT_ON_RREG(JPEG, 0, regUVD_JRBC0_UVD_JRBC_STATUS, + UVD_JRBC0_UVD_JRBC_STATUS__RB_JOB_DONE_MASK, + UVD_JRBC0_UVD_JRBC_STATUS__RB_JOB_DONE_MASK); + return ret; +} + +static int jpeg_v4_0_3_set_clockgating_state(void *handle, + enum amd_clockgating_state state) +{ + struct amdgpu_device *adev = (struct amdgpu_device *)handle; + bool enable = (state == AMD_CG_STATE_GATE); + + if (enable) { + if (!jpeg_v4_0_3_is_idle(handle)) + return -EBUSY; + jpeg_v4_0_3_enable_clock_gating(adev); + } else { + jpeg_v4_0_3_disable_clock_gating(adev); + } + + return 0; +} + +static int jpeg_v4_0_3_set_powergating_state(void *handle, + enum amd_powergating_state state) +{ + struct amdgpu_device *adev = (struct amdgpu_device *)handle; + int ret; + + if (state == adev->jpeg.cur_state) + return 0; + + if (state == AMD_PG_STATE_GATE) + ret = jpeg_v4_0_3_stop(adev); + else + ret = jpeg_v4_0_3_start(adev); + + if (!ret) + adev->jpeg.cur_state = state; + + return ret; +} + +static int jpeg_v4_0_3_set_interrupt_state(struct amdgpu_device *adev, + struct amdgpu_irq_src *source, + unsigned int type, + enum amdgpu_interrupt_state state) +{ + return 0; +} + +static int jpeg_v4_0_3_process_interrupt(struct amdgpu_device *adev, + struct amdgpu_irq_src *source, + struct amdgpu_iv_entry *entry) +{ + DRM_DEV_DEBUG(adev->dev, "IH: JPEG TRAP\n"); + + switch (entry->src_id) { + case VCN_2_0__SRCID__JPEG_DECODE: + amdgpu_fence_process(&adev->jpeg.inst->ring_dec); + break; + default: + DRM_DEV_ERROR(adev->dev, "Unhandled interrupt: %d %d\n", + entry->src_id, entry->src_data[0]); + break; + } + + return 0; +} + +static const struct amd_ip_funcs jpeg_v4_0_3_ip_funcs = { + .name = "jpeg_v4_0_3", + .early_init = jpeg_v4_0_3_early_init, + .late_init = NULL, + .sw_init = jpeg_v4_0_3_sw_init, + .sw_fini = jpeg_v4_0_3_sw_fini, + .hw_init = jpeg_v4_0_3_hw_init, + .hw_fini = jpeg_v4_0_3_hw_fini, + .suspend = jpeg_v4_0_3_suspend, + .resume = jpeg_v4_0_3_resume, + .is_idle = jpeg_v4_0_3_is_idle, + .wait_for_idle = jpeg_v4_0_3_wait_for_idle, + .check_soft_reset = NULL, + .pre_soft_reset = NULL, + .soft_reset = NULL, + .post_soft_reset = NULL, + .set_clockgating_state = jpeg_v4_0_3_set_clockgating_state, + .set_powergating_state = jpeg_v4_0_3_set_powergating_state, +}; + +static const struct amdgpu_ring_funcs jpeg_v4_0_3_dec_ring_vm_funcs = { + .type = AMDGPU_RING_TYPE_VCN_JPEG, + .align_mask = 0xf, + .get_rptr = jpeg_v4_0_3_dec_ring_get_rptr, + .get_wptr = jpeg_v4_0_3_dec_ring_get_wptr, + .set_wptr = jpeg_v4_0_3_dec_ring_set_wptr, + .emit_frame_size = + SOC15_FLUSH_GPU_TLB_NUM_WREG * 6 + + SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 8 + + 8 + /* jpeg_v4_0_3_dec_ring_emit_vm_flush */ + 18 + 18 + /* jpeg_v4_0_3_dec_ring_emit_fence x2 vm fence */ + 8 + 16, + .emit_ib_size = 22, /* jpeg_v4_0_3_dec_ring_emit_ib */ + .emit_ib = jpeg_v4_0_3_dec_ring_emit_ib, + .emit_fence = jpeg_v4_0_3_dec_ring_emit_fence, + .emit_vm_flush = jpeg_v4_0_3_dec_ring_emit_vm_flush, + .test_ring = amdgpu_jpeg_dec_ring_test_ring, + .test_ib = amdgpu_jpeg_dec_ring_test_ib, + .insert_nop = jpeg_v4_0_3_dec_ring_nop, + .insert_start = jpeg_v4_0_3_dec_ring_insert_start, + .insert_end = jpeg_v4_0_3_dec_ring_insert_end, + .pad_ib = amdgpu_ring_generic_pad_ib, + .begin_use = amdgpu_jpeg_ring_begin_use, + .end_use = amdgpu_jpeg_ring_end_use, + .emit_wreg = jpeg_v4_0_3_dec_ring_emit_wreg, + .emit_reg_wait = jpeg_v4_0_3_dec_ring_emit_reg_wait, + .emit_reg_write_reg_wait = amdgpu_ring_emit_reg_write_reg_wait_helper, +}; + +static void jpeg_v4_0_3_set_dec_ring_funcs(struct amdgpu_device *adev) +{ + adev->jpeg.inst->ring_dec.funcs = &jpeg_v4_0_3_dec_ring_vm_funcs; + adev->jpeg.inst->ring_dec.me = 0; + DRM_DEV_INFO(adev->dev, "JPEG decode is enabled in VM mode\n"); +} + +static const struct amdgpu_irq_src_funcs jpeg_v4_0_3_irq_funcs = { + .set = jpeg_v4_0_3_set_interrupt_state, + .process = jpeg_v4_0_3_process_interrupt, +}; + +static void jpeg_v4_0_3_set_irq_funcs(struct amdgpu_device *adev) +{ + adev->jpeg.inst->irq.num_types = 1; + adev->jpeg.inst->irq.funcs = &jpeg_v4_0_3_irq_funcs; +} + +const struct amdgpu_ip_block_version jpeg_v4_0_3_ip_block = { + .type = AMD_IP_BLOCK_TYPE_JPEG, + .major = 4, + .minor = 0, + .rev = 3, + .funcs = &jpeg_v4_0_3_ip_funcs, +}; -- cgit From bc224553843e526bad4bb91188363aea1664a70d Mon Sep 17 00:00:00 2001 From: James Zhu Date: Tue, 24 May 2022 12:03:03 +0800 Subject: drm/amdgpu/jpeg: add multiple jpeg rings support Add multiple jpeg rings support. Signed-off-by: James Zhu Reviewed-by: Leo Liu Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_jpeg.c | 21 +++++++++++---------- drivers/gpu/drm/amd/amdgpu/amdgpu_jpeg.h | 6 ++++-- drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c | 5 +++-- drivers/gpu/drm/amd/amdgpu/jpeg_v1_0.c | 12 ++++++------ drivers/gpu/drm/amd/amdgpu/jpeg_v2_0.c | 14 +++++++------- drivers/gpu/drm/amd/amdgpu/jpeg_v2_5.c | 18 +++++++++--------- drivers/gpu/drm/amd/amdgpu/jpeg_v3_0.c | 14 +++++++------- drivers/gpu/drm/amd/amdgpu/jpeg_v4_0.c | 16 ++++++++-------- drivers/gpu/drm/amd/amdgpu/jpeg_v4_0_3.c | 16 ++++++++-------- drivers/gpu/drm/amd/amdgpu/vcn_v1_0.c | 12 ++++++------ 10 files changed, 69 insertions(+), 65 deletions(-) (limited to 'drivers/gpu/drm/amd/amdgpu/jpeg_v4_0_3.c') diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_jpeg.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_jpeg.c index b07c000fc8ba..388466a5f730 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_jpeg.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_jpeg.c @@ -45,13 +45,14 @@ int amdgpu_jpeg_sw_init(struct amdgpu_device *adev) int amdgpu_jpeg_sw_fini(struct amdgpu_device *adev) { - int i; + int i, j; for (i = 0; i < adev->jpeg.num_jpeg_inst; ++i) { if (adev->jpeg.harvest_config & (1 << i)) continue; - amdgpu_ring_fini(&adev->jpeg.inst[i].ring_dec); + for (j = 0; j < adev->jpeg.num_jpeg_rings; ++j) + amdgpu_ring_fini(&adev->jpeg.inst[i].ring_dec[j]); } mutex_destroy(&adev->jpeg.jpeg_pg_lock); @@ -76,13 +77,14 @@ static void amdgpu_jpeg_idle_work_handler(struct work_struct *work) struct amdgpu_device *adev = container_of(work, struct amdgpu_device, jpeg.idle_work.work); unsigned int fences = 0; - unsigned int i; + unsigned int i, j; for (i = 0; i < adev->jpeg.num_jpeg_inst; ++i) { if (adev->jpeg.harvest_config & (1 << i)) continue; - fences += amdgpu_fence_count_emitted(&adev->jpeg.inst[i].ring_dec); + for (j = 0; j < adev->jpeg.num_jpeg_rings; ++j) + fences += amdgpu_fence_count_emitted(&adev->jpeg.inst[i].ring_dec[j]); } if (!fences && !atomic_read(&adev->jpeg.total_submission_cnt)) @@ -122,17 +124,17 @@ int amdgpu_jpeg_dec_ring_test_ring(struct amdgpu_ring *ring) if (amdgpu_sriov_vf(adev)) return 0; - WREG32(adev->jpeg.inst[ring->me].external.jpeg_pitch, 0xCAFEDEAD); + WREG32(adev->jpeg.inst[ring->me].external.jpeg_pitch[ring->pipe], 0xCAFEDEAD); r = amdgpu_ring_alloc(ring, 3); if (r) return r; - amdgpu_ring_write(ring, PACKET0(adev->jpeg.internal.jpeg_pitch, 0)); + amdgpu_ring_write(ring, PACKET0(adev->jpeg.internal.jpeg_pitch[ring->pipe], 0)); amdgpu_ring_write(ring, 0xDEADBEEF); amdgpu_ring_commit(ring); for (i = 0; i < adev->usec_timeout; i++) { - tmp = RREG32(adev->jpeg.inst[ring->me].external.jpeg_pitch); + tmp = RREG32(adev->jpeg.inst[ring->me].external.jpeg_pitch[ring->pipe]); if (tmp == 0xDEADBEEF) break; udelay(1); @@ -161,8 +163,7 @@ static int amdgpu_jpeg_dec_set_reg(struct amdgpu_ring *ring, uint32_t handle, ib = &job->ibs[0]; - ib->ptr[0] = PACKETJ(adev->jpeg.internal.jpeg_pitch, 0, 0, - PACKETJ_TYPE0); + ib->ptr[0] = PACKETJ(adev->jpeg.internal.jpeg_pitch[ring->pipe], 0, 0, PACKETJ_TYPE0); ib->ptr[1] = 0xDEADBEEF; for (i = 2; i < 16; i += 2) { ib->ptr[i] = PACKETJ(0, 0, 0, PACKETJ_TYPE6); @@ -208,7 +209,7 @@ int amdgpu_jpeg_dec_ring_test_ib(struct amdgpu_ring *ring, long timeout) } if (!amdgpu_sriov_vf(adev)) { for (i = 0; i < adev->usec_timeout; i++) { - tmp = RREG32(adev->jpeg.inst[ring->me].external.jpeg_pitch); + tmp = RREG32(adev->jpeg.inst[ring->me].external.jpeg_pitch[ring->pipe]); if (tmp == 0xDEADBEEF) break; udelay(1); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_jpeg.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_jpeg.h index 0ca76f0f23e9..cb6c127ab81d 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_jpeg.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_jpeg.h @@ -27,16 +27,17 @@ #include "amdgpu_ras.h" #define AMDGPU_MAX_JPEG_INSTANCES 2 +#define AMDGPU_MAX_JPEG_RINGS 8 #define AMDGPU_JPEG_HARVEST_JPEG0 (1 << 0) #define AMDGPU_JPEG_HARVEST_JPEG1 (1 << 1) struct amdgpu_jpeg_reg{ - unsigned jpeg_pitch; + unsigned jpeg_pitch[AMDGPU_MAX_JPEG_RINGS]; }; struct amdgpu_jpeg_inst { - struct amdgpu_ring ring_dec; + struct amdgpu_ring ring_dec[AMDGPU_MAX_JPEG_RINGS]; struct amdgpu_irq_src irq; struct amdgpu_jpeg_reg external; }; @@ -48,6 +49,7 @@ struct amdgpu_jpeg_ras { struct amdgpu_jpeg { uint8_t num_jpeg_inst; struct amdgpu_jpeg_inst inst[AMDGPU_MAX_JPEG_INSTANCES]; + unsigned num_jpeg_rings; struct amdgpu_jpeg_reg internal; unsigned harvest_config; struct delayed_work idle_work; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c index 1d3b224b8b28..44997c7ee89d 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c @@ -462,8 +462,9 @@ static int amdgpu_hw_ip_info(struct amdgpu_device *adev, if (adev->jpeg.harvest_config & (1 << i)) continue; - if (adev->jpeg.inst[i].ring_dec.sched.ready) - ++num_rings; + for (j = 0; j < adev->jpeg.num_jpeg_rings; j++) + if (adev->jpeg.inst[i].ring_dec[j].sched.ready) + ++num_rings; } ib_start_alignment = 16; ib_size_alignment = 16; diff --git a/drivers/gpu/drm/amd/amdgpu/jpeg_v1_0.c b/drivers/gpu/drm/amd/amdgpu/jpeg_v1_0.c index 71fe7f6f9889..1c5b60604a19 100644 --- a/drivers/gpu/drm/amd/amdgpu/jpeg_v1_0.c +++ b/drivers/gpu/drm/amd/amdgpu/jpeg_v1_0.c @@ -437,7 +437,7 @@ static int jpeg_v1_0_process_interrupt(struct amdgpu_device *adev, switch (entry->src_id) { case 126: - amdgpu_fence_process(&adev->jpeg.inst->ring_dec); + amdgpu_fence_process(adev->jpeg.inst->ring_dec); break; default: DRM_ERROR("Unhandled interrupt: %d %d\n", @@ -484,7 +484,7 @@ int jpeg_v1_0_sw_init(void *handle) if (r) return r; - ring = &adev->jpeg.inst->ring_dec; + ring = adev->jpeg.inst->ring_dec; ring->vm_hub = AMDGPU_MMHUB0(0); sprintf(ring->name, "jpeg_dec"); r = amdgpu_ring_init(adev, ring, 512, &adev->jpeg.inst->irq, @@ -492,7 +492,7 @@ int jpeg_v1_0_sw_init(void *handle) if (r) return r; - adev->jpeg.internal.jpeg_pitch = adev->jpeg.inst->external.jpeg_pitch = + adev->jpeg.internal.jpeg_pitch[0] = adev->jpeg.inst->external.jpeg_pitch[0] = SOC15_REG_OFFSET(JPEG, 0, mmUVD_JPEG_PITCH); return 0; @@ -509,7 +509,7 @@ void jpeg_v1_0_sw_fini(void *handle) { struct amdgpu_device *adev = (struct amdgpu_device *)handle; - amdgpu_ring_fini(&adev->jpeg.inst[0].ring_dec); + amdgpu_ring_fini(adev->jpeg.inst->ring_dec); } /** @@ -522,7 +522,7 @@ void jpeg_v1_0_sw_fini(void *handle) */ void jpeg_v1_0_start(struct amdgpu_device *adev, int mode) { - struct amdgpu_ring *ring = &adev->jpeg.inst->ring_dec; + struct amdgpu_ring *ring = adev->jpeg.inst->ring_dec; if (mode == 0) { WREG32_SOC15(JPEG, 0, mmUVD_LMI_JRBC_RB_VMID, 0); @@ -579,7 +579,7 @@ static const struct amdgpu_ring_funcs jpeg_v1_0_decode_ring_vm_funcs = { static void jpeg_v1_0_set_dec_ring_funcs(struct amdgpu_device *adev) { - adev->jpeg.inst->ring_dec.funcs = &jpeg_v1_0_decode_ring_vm_funcs; + adev->jpeg.inst->ring_dec->funcs = &jpeg_v1_0_decode_ring_vm_funcs; DRM_INFO("JPEG decode is enabled in VM mode\n"); } diff --git a/drivers/gpu/drm/amd/amdgpu/jpeg_v2_0.c b/drivers/gpu/drm/amd/amdgpu/jpeg_v2_0.c index 3a43e42f4834..3aeeceae34a5 100644 --- a/drivers/gpu/drm/amd/amdgpu/jpeg_v2_0.c +++ b/drivers/gpu/drm/amd/amdgpu/jpeg_v2_0.c @@ -83,7 +83,7 @@ static int jpeg_v2_0_sw_init(void *handle) if (r) return r; - ring = &adev->jpeg.inst->ring_dec; + ring = adev->jpeg.inst->ring_dec; ring->use_doorbell = true; ring->doorbell_index = (adev->doorbell_index.vcn.vcn_ring0_1 << 1) + 1; ring->vm_hub = AMDGPU_MMHUB0(0); @@ -93,8 +93,8 @@ static int jpeg_v2_0_sw_init(void *handle) if (r) return r; - adev->jpeg.internal.jpeg_pitch = mmUVD_JPEG_PITCH_INTERNAL_OFFSET; - adev->jpeg.inst->external.jpeg_pitch = SOC15_REG_OFFSET(JPEG, 0, mmUVD_JPEG_PITCH); + adev->jpeg.internal.jpeg_pitch[0] = mmUVD_JPEG_PITCH_INTERNAL_OFFSET; + adev->jpeg.inst->external.jpeg_pitch[0] = SOC15_REG_OFFSET(JPEG, 0, mmUVD_JPEG_PITCH); return 0; } @@ -129,7 +129,7 @@ static int jpeg_v2_0_sw_fini(void *handle) static int jpeg_v2_0_hw_init(void *handle) { struct amdgpu_device *adev = (struct amdgpu_device *)handle; - struct amdgpu_ring *ring = &adev->jpeg.inst->ring_dec; + struct amdgpu_ring *ring = adev->jpeg.inst->ring_dec; int r; adev->nbio.funcs->vcn_doorbell_range(adev, ring->use_doorbell, @@ -312,7 +312,7 @@ static void jpeg_v2_0_enable_clock_gating(struct amdgpu_device *adev) */ static int jpeg_v2_0_start(struct amdgpu_device *adev) { - struct amdgpu_ring *ring = &adev->jpeg.inst->ring_dec; + struct amdgpu_ring *ring = adev->jpeg.inst->ring_dec; int r; if (adev->pm.dpm_enabled) @@ -729,7 +729,7 @@ static int jpeg_v2_0_process_interrupt(struct amdgpu_device *adev, switch (entry->src_id) { case VCN_2_0__SRCID__JPEG_DECODE: - amdgpu_fence_process(&adev->jpeg.inst->ring_dec); + amdgpu_fence_process(adev->jpeg.inst->ring_dec); break; default: DRM_ERROR("Unhandled interrupt: %d %d\n", @@ -791,7 +791,7 @@ static const struct amdgpu_ring_funcs jpeg_v2_0_dec_ring_vm_funcs = { static void jpeg_v2_0_set_dec_ring_funcs(struct amdgpu_device *adev) { - adev->jpeg.inst->ring_dec.funcs = &jpeg_v2_0_dec_ring_vm_funcs; + adev->jpeg.inst->ring_dec->funcs = &jpeg_v2_0_dec_ring_vm_funcs; DRM_INFO("JPEG decode is enabled in VM mode\n"); } diff --git a/drivers/gpu/drm/amd/amdgpu/jpeg_v2_5.c b/drivers/gpu/drm/amd/amdgpu/jpeg_v2_5.c index 259b7ba6a842..b79edb12b90e 100644 --- a/drivers/gpu/drm/amd/amdgpu/jpeg_v2_5.c +++ b/drivers/gpu/drm/amd/amdgpu/jpeg_v2_5.c @@ -125,7 +125,7 @@ static int jpeg_v2_5_sw_init(void *handle) if (adev->jpeg.harvest_config & (1 << i)) continue; - ring = &adev->jpeg.inst[i].ring_dec; + ring = adev->jpeg.inst[i].ring_dec; ring->use_doorbell = true; if (adev->ip_versions[UVD_HWIP][0] == IP_VERSION(2, 5, 0)) ring->vm_hub = AMDGPU_MMHUB1(0); @@ -138,8 +138,8 @@ static int jpeg_v2_5_sw_init(void *handle) if (r) return r; - adev->jpeg.internal.jpeg_pitch = mmUVD_JPEG_PITCH_INTERNAL_OFFSET; - adev->jpeg.inst[i].external.jpeg_pitch = SOC15_REG_OFFSET(JPEG, i, mmUVD_JPEG_PITCH); + adev->jpeg.internal.jpeg_pitch[0] = mmUVD_JPEG_PITCH_INTERNAL_OFFSET; + adev->jpeg.inst[i].external.jpeg_pitch[0] = SOC15_REG_OFFSET(JPEG, i, mmUVD_JPEG_PITCH); } r = amdgpu_jpeg_ras_sw_init(adev); @@ -186,7 +186,7 @@ static int jpeg_v2_5_hw_init(void *handle) if (adev->jpeg.harvest_config & (1 << i)) continue; - ring = &adev->jpeg.inst[i].ring_dec; + ring = adev->jpeg.inst[i].ring_dec; adev->nbio.funcs->vcn_doorbell_range(adev, ring->use_doorbell, (adev->doorbell_index.vcn.vcn_ring0_1 << 1) + 8 * i, i); @@ -326,7 +326,7 @@ static int jpeg_v2_5_start(struct amdgpu_device *adev) if (adev->jpeg.harvest_config & (1 << i)) continue; - ring = &adev->jpeg.inst[i].ring_dec; + ring = adev->jpeg.inst[i].ring_dec; /* disable anti hang mechanism */ WREG32_P(SOC15_REG_OFFSET(JPEG, i, mmUVD_JPEG_POWER_STATUS), 0, ~UVD_JPEG_POWER_STATUS__JPEG_POWER_STATUS_MASK); @@ -591,7 +591,7 @@ static int jpeg_v2_5_process_interrupt(struct amdgpu_device *adev, switch (entry->src_id) { case VCN_2_0__SRCID__JPEG_DECODE: - amdgpu_fence_process(&adev->jpeg.inst[ip_instance].ring_dec); + amdgpu_fence_process(adev->jpeg.inst[ip_instance].ring_dec); break; case VCN_2_6__SRCID_DJPEG0_POISON: case VCN_2_6__SRCID_EJPEG0_POISON: @@ -712,10 +712,10 @@ static void jpeg_v2_5_set_dec_ring_funcs(struct amdgpu_device *adev) if (adev->jpeg.harvest_config & (1 << i)) continue; if (adev->asic_type == CHIP_ARCTURUS) - adev->jpeg.inst[i].ring_dec.funcs = &jpeg_v2_5_dec_ring_vm_funcs; + adev->jpeg.inst[i].ring_dec->funcs = &jpeg_v2_5_dec_ring_vm_funcs; else /* CHIP_ALDEBARAN */ - adev->jpeg.inst[i].ring_dec.funcs = &jpeg_v2_6_dec_ring_vm_funcs; - adev->jpeg.inst[i].ring_dec.me = i; + adev->jpeg.inst[i].ring_dec->funcs = &jpeg_v2_6_dec_ring_vm_funcs; + adev->jpeg.inst[i].ring_dec->me = i; DRM_INFO("JPEG(%d) JPEG decode is enabled in VM mode\n", i); } } diff --git a/drivers/gpu/drm/amd/amdgpu/jpeg_v3_0.c b/drivers/gpu/drm/amd/amdgpu/jpeg_v3_0.c index c55386c22311..cb5494effc0f 100644 --- a/drivers/gpu/drm/amd/amdgpu/jpeg_v3_0.c +++ b/drivers/gpu/drm/amd/amdgpu/jpeg_v3_0.c @@ -98,7 +98,7 @@ static int jpeg_v3_0_sw_init(void *handle) if (r) return r; - ring = &adev->jpeg.inst->ring_dec; + ring = adev->jpeg.inst->ring_dec; ring->use_doorbell = true; ring->doorbell_index = (adev->doorbell_index.vcn.vcn_ring0_1 << 1) + 1; ring->vm_hub = AMDGPU_MMHUB0(0); @@ -108,8 +108,8 @@ static int jpeg_v3_0_sw_init(void *handle) if (r) return r; - adev->jpeg.internal.jpeg_pitch = mmUVD_JPEG_PITCH_INTERNAL_OFFSET; - adev->jpeg.inst->external.jpeg_pitch = SOC15_REG_OFFSET(JPEG, 0, mmUVD_JPEG_PITCH); + adev->jpeg.internal.jpeg_pitch[0] = mmUVD_JPEG_PITCH_INTERNAL_OFFSET; + adev->jpeg.inst->external.jpeg_pitch[0] = SOC15_REG_OFFSET(JPEG, 0, mmUVD_JPEG_PITCH); return 0; } @@ -144,7 +144,7 @@ static int jpeg_v3_0_sw_fini(void *handle) static int jpeg_v3_0_hw_init(void *handle) { struct amdgpu_device *adev = (struct amdgpu_device *)handle; - struct amdgpu_ring *ring = &adev->jpeg.inst->ring_dec; + struct amdgpu_ring *ring = adev->jpeg.inst->ring_dec; int r; adev->nbio.funcs->vcn_doorbell_range(adev, ring->use_doorbell, @@ -330,7 +330,7 @@ static int jpeg_v3_0_enable_static_power_gating(struct amdgpu_device *adev) */ static int jpeg_v3_0_start(struct amdgpu_device *adev) { - struct amdgpu_ring *ring = &adev->jpeg.inst->ring_dec; + struct amdgpu_ring *ring = adev->jpeg.inst->ring_dec; int r; if (adev->pm.dpm_enabled) @@ -527,7 +527,7 @@ static int jpeg_v3_0_process_interrupt(struct amdgpu_device *adev, switch (entry->src_id) { case VCN_2_0__SRCID__JPEG_DECODE: - amdgpu_fence_process(&adev->jpeg.inst->ring_dec); + amdgpu_fence_process(adev->jpeg.inst->ring_dec); break; default: DRM_ERROR("Unhandled interrupt: %d %d\n", @@ -589,7 +589,7 @@ static const struct amdgpu_ring_funcs jpeg_v3_0_dec_ring_vm_funcs = { static void jpeg_v3_0_set_dec_ring_funcs(struct amdgpu_device *adev) { - adev->jpeg.inst->ring_dec.funcs = &jpeg_v3_0_dec_ring_vm_funcs; + adev->jpeg.inst->ring_dec->funcs = &jpeg_v3_0_dec_ring_vm_funcs; DRM_INFO("JPEG decode is enabled in VM mode\n"); } diff --git a/drivers/gpu/drm/amd/amdgpu/jpeg_v4_0.c b/drivers/gpu/drm/amd/amdgpu/jpeg_v4_0.c index d7d5ffc29393..495facb885f4 100644 --- a/drivers/gpu/drm/amd/amdgpu/jpeg_v4_0.c +++ b/drivers/gpu/drm/amd/amdgpu/jpeg_v4_0.c @@ -105,7 +105,7 @@ static int jpeg_v4_0_sw_init(void *handle) if (r) return r; - ring = &adev->jpeg.inst->ring_dec; + ring = adev->jpeg.inst->ring_dec; ring->use_doorbell = true; ring->doorbell_index = amdgpu_sriov_vf(adev) ? (((adev->doorbell_index.vcn.vcn_ring0_1) << 1) + 4) : ((adev->doorbell_index.vcn.vcn_ring0_1 << 1) + 1); ring->vm_hub = AMDGPU_MMHUB0(0); @@ -116,8 +116,8 @@ static int jpeg_v4_0_sw_init(void *handle) if (r) return r; - adev->jpeg.internal.jpeg_pitch = regUVD_JPEG_PITCH_INTERNAL_OFFSET; - adev->jpeg.inst->external.jpeg_pitch = SOC15_REG_OFFSET(JPEG, 0, regUVD_JPEG_PITCH); + adev->jpeg.internal.jpeg_pitch[0] = regUVD_JPEG_PITCH_INTERNAL_OFFSET; + adev->jpeg.inst->external.jpeg_pitch[0] = SOC15_REG_OFFSET(JPEG, 0, regUVD_JPEG_PITCH); r = amdgpu_jpeg_ras_sw_init(adev); if (r) @@ -156,7 +156,7 @@ static int jpeg_v4_0_sw_fini(void *handle) static int jpeg_v4_0_hw_init(void *handle) { struct amdgpu_device *adev = (struct amdgpu_device *)handle; - struct amdgpu_ring *ring = &adev->jpeg.inst->ring_dec; + struct amdgpu_ring *ring = adev->jpeg.inst->ring_dec; int r; if (amdgpu_sriov_vf(adev)) { @@ -363,7 +363,7 @@ static int jpeg_v4_0_enable_static_power_gating(struct amdgpu_device *adev) */ static int jpeg_v4_0_start(struct amdgpu_device *adev) { - struct amdgpu_ring *ring = &adev->jpeg.inst->ring_dec; + struct amdgpu_ring *ring = adev->jpeg.inst->ring_dec; int r; if (adev->pm.dpm_enabled) @@ -441,7 +441,7 @@ static int jpeg_v4_0_start_sriov(struct amdgpu_device *adev) table_size = 0; - ring = &adev->jpeg.inst->ring_dec; + ring = adev->jpeg.inst->ring_dec; MMSCH_V4_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(JPEG, 0, regUVD_LMI_JRBC_RB_64BIT_BAR_LOW), @@ -678,7 +678,7 @@ static int jpeg_v4_0_process_interrupt(struct amdgpu_device *adev, switch (entry->src_id) { case VCN_4_0__SRCID__JPEG_DECODE: - amdgpu_fence_process(&adev->jpeg.inst->ring_dec); + amdgpu_fence_process(adev->jpeg.inst->ring_dec); break; case VCN_4_0__SRCID_DJPEG0_POISON: case VCN_4_0__SRCID_EJPEG0_POISON: @@ -744,7 +744,7 @@ static const struct amdgpu_ring_funcs jpeg_v4_0_dec_ring_vm_funcs = { static void jpeg_v4_0_set_dec_ring_funcs(struct amdgpu_device *adev) { - adev->jpeg.inst->ring_dec.funcs = &jpeg_v4_0_dec_ring_vm_funcs; + adev->jpeg.inst->ring_dec->funcs = &jpeg_v4_0_dec_ring_vm_funcs; DRM_DEV_INFO(adev->dev, "JPEG decode is enabled in VM mode\n"); } diff --git a/drivers/gpu/drm/amd/amdgpu/jpeg_v4_0_3.c b/drivers/gpu/drm/amd/amdgpu/jpeg_v4_0_3.c index 1fc72f9b52ed..784c83994ca1 100644 --- a/drivers/gpu/drm/amd/amdgpu/jpeg_v4_0_3.c +++ b/drivers/gpu/drm/amd/amdgpu/jpeg_v4_0_3.c @@ -85,7 +85,7 @@ static int jpeg_v4_0_3_sw_init(void *handle) if (r) return r; - ring = &adev->jpeg.inst->ring_dec; + ring = adev->jpeg.inst->ring_dec; ring->use_doorbell = false; ring->vm_hub = AMDGPU_MMHUB0(0); sprintf(ring->name, "jpeg_dec"); @@ -94,8 +94,8 @@ static int jpeg_v4_0_3_sw_init(void *handle) if (r) return r; - adev->jpeg.internal.jpeg_pitch = regUVD_JPEG_PITCH_INTERNAL_OFFSET; - adev->jpeg.inst->external.jpeg_pitch = SOC15_REG_OFFSET(JPEG, 0, regUVD_JPEG_PITCH); + adev->jpeg.internal.jpeg_pitch[0] = regUVD_JPEG_PITCH_INTERNAL_OFFSET; + adev->jpeg.inst->external.jpeg_pitch[0] = SOC15_REG_OFFSET(JPEG, 0, regUVD_JPEG_PITCH); return 0; } @@ -130,7 +130,7 @@ static int jpeg_v4_0_3_sw_fini(void *handle) static int jpeg_v4_0_3_hw_init(void *handle) { struct amdgpu_device *adev = (struct amdgpu_device *)handle; - struct amdgpu_ring *ring = &adev->jpeg.inst->ring_dec; + struct amdgpu_ring *ring = adev->jpeg.inst->ring_dec; int r; r = amdgpu_ring_test_helper(ring); @@ -254,7 +254,7 @@ static void jpeg_v4_0_3_enable_clock_gating(struct amdgpu_device *adev) */ static int jpeg_v4_0_3_start(struct amdgpu_device *adev) { - struct amdgpu_ring *ring = &adev->jpeg.inst->ring_dec; + struct amdgpu_ring *ring = adev->jpeg.inst->ring_dec; WREG32_SOC15(JPEG, 0, regUVD_PGFSM_CONFIG, 1 << UVD_PGFSM_CONFIG__UVDJ_PWR_CONFIG__SHIFT); @@ -675,7 +675,7 @@ static int jpeg_v4_0_3_process_interrupt(struct amdgpu_device *adev, switch (entry->src_id) { case VCN_2_0__SRCID__JPEG_DECODE: - amdgpu_fence_process(&adev->jpeg.inst->ring_dec); + amdgpu_fence_process(adev->jpeg.inst->ring_dec); break; default: DRM_DEV_ERROR(adev->dev, "Unhandled interrupt: %d %d\n", @@ -737,8 +737,8 @@ static const struct amdgpu_ring_funcs jpeg_v4_0_3_dec_ring_vm_funcs = { static void jpeg_v4_0_3_set_dec_ring_funcs(struct amdgpu_device *adev) { - adev->jpeg.inst->ring_dec.funcs = &jpeg_v4_0_3_dec_ring_vm_funcs; - adev->jpeg.inst->ring_dec.me = 0; + adev->jpeg.inst->ring_dec->funcs = &jpeg_v4_0_3_dec_ring_vm_funcs; + adev->jpeg.inst->ring_dec->me = 0; DRM_DEV_INFO(adev->dev, "JPEG decode is enabled in VM mode\n"); } diff --git a/drivers/gpu/drm/amd/amdgpu/vcn_v1_0.c b/drivers/gpu/drm/amd/amdgpu/vcn_v1_0.c index f877c39c7cdd..16feb491adf5 100644 --- a/drivers/gpu/drm/amd/amdgpu/vcn_v1_0.c +++ b/drivers/gpu/drm/amd/amdgpu/vcn_v1_0.c @@ -211,7 +211,7 @@ static int vcn_v1_0_hw_init(void *handle) goto done; } - ring = &adev->jpeg.inst->ring_dec; + ring = adev->jpeg.inst->ring_dec; r = amdgpu_ring_test_helper(ring); if (r) goto done; @@ -1304,7 +1304,7 @@ static int vcn_v1_0_pause_dpg_mode(struct amdgpu_device *adev, UVD_DPG_PAUSE__JPEG_PAUSE_DPG_ACK_MASK); /* Restore */ - ring = &adev->jpeg.inst->ring_dec; + ring = adev->jpeg.inst->ring_dec; WREG32_SOC15(UVD, 0, mmUVD_LMI_JRBC_RB_VMID, 0); WREG32_SOC15(UVD, 0, mmUVD_JRBC_RB_CNTL, UVD_JRBC_RB_CNTL__RB_NO_FETCH_MASK | @@ -1802,7 +1802,7 @@ static void vcn_v1_0_idle_work_handler(struct work_struct *work) else new_state.fw_based = VCN_DPG_STATE__UNPAUSE; - if (amdgpu_fence_count_emitted(&adev->jpeg.inst->ring_dec)) + if (amdgpu_fence_count_emitted(adev->jpeg.inst->ring_dec)) new_state.jpeg = VCN_DPG_STATE__PAUSE; else new_state.jpeg = VCN_DPG_STATE__UNPAUSE; @@ -1810,7 +1810,7 @@ static void vcn_v1_0_idle_work_handler(struct work_struct *work) adev->vcn.pause_dpg_mode(adev, 0, &new_state); } - fences += amdgpu_fence_count_emitted(&adev->jpeg.inst->ring_dec); + fences += amdgpu_fence_count_emitted(adev->jpeg.inst->ring_dec); fences += amdgpu_fence_count_emitted(&adev->vcn.inst->ring_dec); if (fences == 0) { @@ -1832,7 +1832,7 @@ static void vcn_v1_0_ring_begin_use(struct amdgpu_ring *ring) mutex_lock(&adev->vcn.vcn1_jpeg1_workaround); - if (amdgpu_fence_wait_empty(&ring->adev->jpeg.inst->ring_dec)) + if (amdgpu_fence_wait_empty(ring->adev->jpeg.inst->ring_dec)) DRM_ERROR("VCN dec: jpeg dec ring may not be empty\n"); vcn_v1_0_set_pg_for_begin_use(ring, set_clocks); @@ -1864,7 +1864,7 @@ void vcn_v1_0_set_pg_for_begin_use(struct amdgpu_ring *ring, bool set_clocks) else new_state.fw_based = VCN_DPG_STATE__UNPAUSE; - if (amdgpu_fence_count_emitted(&adev->jpeg.inst->ring_dec)) + if (amdgpu_fence_count_emitted(adev->jpeg.inst->ring_dec)) new_state.jpeg = VCN_DPG_STATE__PAUSE; else new_state.jpeg = VCN_DPG_STATE__UNPAUSE; -- cgit From db77081fe3c88a31eaade8a9c565c48c4d51b093 Mon Sep 17 00:00:00 2001 From: James Zhu Date: Wed, 19 Jan 2022 23:32:43 -0500 Subject: drm/amdgpu/jpeg: add multiple jpeg rings support for vcn4_0_3 Add multiple jpeg rings support for vcn4_0_3 Signed-off-by: James Zhu Reviewed-by: Leo Liu Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/jpeg_v4_0_3.c | 214 +++++++++++++++++++++---------- drivers/gpu/drm/amd/amdgpu/jpeg_v4_0_3.h | 1 + 2 files changed, 147 insertions(+), 68 deletions(-) (limited to 'drivers/gpu/drm/amd/amdgpu/jpeg_v4_0_3.c') diff --git a/drivers/gpu/drm/amd/amdgpu/jpeg_v4_0_3.c b/drivers/gpu/drm/amd/amdgpu/jpeg_v4_0_3.c index 784c83994ca1..0d3509409d3a 100644 --- a/drivers/gpu/drm/amd/amdgpu/jpeg_v4_0_3.c +++ b/drivers/gpu/drm/amd/amdgpu/jpeg_v4_0_3.c @@ -29,7 +29,7 @@ #include "vcn/vcn_4_0_3_offset.h" #include "vcn/vcn_4_0_3_sh_mask.h" -#include "ivsrcid/vcn/irqsrcs_vcn_2_0.h" +#include "ivsrcid/vcn/irqsrcs_vcn_4_0.h" enum jpeg_engin_status { UVD_PGFSM_STATUS__UVDJ_PWR_ON = 0, @@ -41,6 +41,17 @@ static void jpeg_v4_0_3_set_irq_funcs(struct amdgpu_device *adev); static int jpeg_v4_0_3_set_powergating_state(void *handle, enum amd_powergating_state state); +static int amdgpu_ih_srcid_jpeg[] = { + VCN_4_0__SRCID__JPEG_DECODE, + VCN_4_0__SRCID__JPEG1_DECODE, + VCN_4_0__SRCID__JPEG2_DECODE, + VCN_4_0__SRCID__JPEG3_DECODE, + VCN_4_0__SRCID__JPEG4_DECODE, + VCN_4_0__SRCID__JPEG5_DECODE, + VCN_4_0__SRCID__JPEG6_DECODE, + VCN_4_0__SRCID__JPEG7_DECODE +}; + /** * jpeg_v4_0_3_early_init - set function pointers * @@ -69,13 +80,15 @@ static int jpeg_v4_0_3_sw_init(void *handle) { struct amdgpu_device *adev = (struct amdgpu_device *)handle; struct amdgpu_ring *ring; - int r; - - /* JPEG TRAP */ - r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_VCN, - VCN_2_0__SRCID__JPEG_DECODE, &adev->jpeg.inst->irq); - if (r) - return r; + int i, r; + + for (i = 0; i < adev->jpeg.num_jpeg_rings; ++i) { + /* JPEG TRAP */ + r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_VCN, + amdgpu_ih_srcid_jpeg[i], &adev->jpeg.inst->irq); + if (r) + return r; + } r = amdgpu_jpeg_sw_init(adev); if (r) @@ -85,17 +98,22 @@ static int jpeg_v4_0_3_sw_init(void *handle) if (r) return r; - ring = adev->jpeg.inst->ring_dec; - ring->use_doorbell = false; - ring->vm_hub = AMDGPU_MMHUB0(0); - sprintf(ring->name, "jpeg_dec"); - r = amdgpu_ring_init(adev, ring, 512, &adev->jpeg.inst->irq, 0, - AMDGPU_RING_PRIO_DEFAULT, NULL); - if (r) - return r; - - adev->jpeg.internal.jpeg_pitch[0] = regUVD_JPEG_PITCH_INTERNAL_OFFSET; - adev->jpeg.inst->external.jpeg_pitch[0] = SOC15_REG_OFFSET(JPEG, 0, regUVD_JPEG_PITCH); + for (i = 0; i < adev->jpeg.num_jpeg_rings; ++i) { + ring = &adev->jpeg.inst->ring_dec[i]; + ring->use_doorbell = false; + ring->vm_hub = AMDGPU_MMHUB0(0); + sprintf(ring->name, "jpeg_dec_%d", i); + r = amdgpu_ring_init(adev, ring, 512, &adev->jpeg.inst->irq, 0, + AMDGPU_RING_PRIO_DEFAULT, NULL); + if (r) + return r; + + adev->jpeg.internal.jpeg_pitch[i] = + regUVD_JRBC0_UVD_JRBC_SCRATCH0_INTERNAL_OFFSET; + adev->jpeg.inst->external.jpeg_pitch[i] = + SOC15_REG_OFFSET1(JPEG, 0, regUVD_JRBC0_UVD_JRBC_SCRATCH0, + (i?(0x40 * i - 0xc80):0)); + } return 0; } @@ -130,14 +148,18 @@ static int jpeg_v4_0_3_sw_fini(void *handle) static int jpeg_v4_0_3_hw_init(void *handle) { struct amdgpu_device *adev = (struct amdgpu_device *)handle; - struct amdgpu_ring *ring = adev->jpeg.inst->ring_dec; - int r; + struct amdgpu_ring *ring; + int i, r; - r = amdgpu_ring_test_helper(ring); - if (!r) - DRM_DEV_INFO(adev->dev, "JPEG decode initialized successfully.\n"); + for (i = 0; i < adev->jpeg.num_jpeg_rings; ++i) { + ring = &adev->jpeg.inst->ring_dec[i]; + r = amdgpu_ring_test_helper(ring); + if (r) + return r; + } + DRM_DEV_INFO(adev->dev, "JPEG decode initialized successfully.\n"); - return r; + return 0; } /** @@ -150,13 +172,14 @@ static int jpeg_v4_0_3_hw_init(void *handle) static int jpeg_v4_0_3_hw_fini(void *handle) { struct amdgpu_device *adev = (struct amdgpu_device *)handle; + int ret = 0; cancel_delayed_work_sync(&adev->jpeg.idle_work); if (adev->jpeg.cur_state != AMD_PG_STATE_GATE) - jpeg_v4_0_3_set_powergating_state(adev, AMD_PG_STATE_GATE); + ret = jpeg_v4_0_3_set_powergating_state(adev, AMD_PG_STATE_GATE); - return 0; + return ret; } /** @@ -204,6 +227,7 @@ static int jpeg_v4_0_3_resume(void *handle) static void jpeg_v4_0_3_disable_clock_gating(struct amdgpu_device *adev) { uint32_t data; + int i; data = RREG32_SOC15(JPEG, 0, regJPEG_CGC_CTRL); if (adev->cg_flags & AMD_CG_SUPPORT_JPEG_MGCG) @@ -216,16 +240,16 @@ static void jpeg_v4_0_3_disable_clock_gating(struct amdgpu_device *adev) WREG32_SOC15(JPEG, 0, regJPEG_CGC_CTRL, data); data = RREG32_SOC15(JPEG, 0, regJPEG_CGC_GATE); - data &= ~(JPEG_CGC_GATE__JPEG0_DEC_MASK - | JPEG_CGC_GATE__JPEG2_DEC_MASK - | JPEG_CGC_GATE__JMCIF_MASK - | JPEG_CGC_GATE__JRBBM_MASK); + data &= ~(JPEG_CGC_GATE__JMCIF_MASK | JPEG_CGC_GATE__JRBBM_MASK); + for (i = 0; i < adev->jpeg.num_jpeg_rings; ++i) + data &= ~(JPEG_CGC_GATE__JPEG0_DEC_MASK << i); WREG32_SOC15(JPEG, 0, regJPEG_CGC_GATE, data); } static void jpeg_v4_0_3_enable_clock_gating(struct amdgpu_device *adev) { uint32_t data; + int i; data = RREG32_SOC15(JPEG, 0, regJPEG_CGC_CTRL); if (adev->cg_flags & AMD_CG_SUPPORT_JPEG_MGCG) @@ -238,10 +262,9 @@ static void jpeg_v4_0_3_enable_clock_gating(struct amdgpu_device *adev) WREG32_SOC15(JPEG, 0, regJPEG_CGC_CTRL, data); data = RREG32_SOC15(JPEG, 0, regJPEG_CGC_GATE); - data |= (JPEG_CGC_GATE__JPEG0_DEC_MASK - |JPEG_CGC_GATE__JPEG2_DEC_MASK - |JPEG_CGC_GATE__JMCIF_MASK - |JPEG_CGC_GATE__JRBBM_MASK); + data |= (JPEG_CGC_GATE__JMCIF_MASK | JPEG_CGC_GATE__JRBBM_MASK); + for (i = 0; i < adev->jpeg.num_jpeg_rings; ++i) + data |= (JPEG_CGC_GATE__JPEG0_DEC_MASK << i); WREG32_SOC15(JPEG, 0, regJPEG_CGC_GATE, data); } @@ -255,6 +278,7 @@ static void jpeg_v4_0_3_enable_clock_gating(struct amdgpu_device *adev) static int jpeg_v4_0_3_start(struct amdgpu_device *adev) { struct amdgpu_ring *ring = adev->jpeg.inst->ring_dec; + int i; WREG32_SOC15(JPEG, 0, regUVD_PGFSM_CONFIG, 1 << UVD_PGFSM_CONFIG__UVDJ_PWR_CONFIG__SHIFT); @@ -280,22 +304,32 @@ static int jpeg_v4_0_3_start(struct amdgpu_device *adev) WREG32_P(SOC15_REG_OFFSET(JPEG, 0, regUVD_JMI_CNTL), 0, ~UVD_JMI_CNTL__SOFT_RESET_MASK); - /* enable System Interrupt for JRBC */ - WREG32_P(SOC15_REG_OFFSET(JPEG, 0, regJPEG_SYS_INT_EN), - JPEG_SYS_INT_EN__DJRBC0_MASK, - ~JPEG_SYS_INT_EN__DJRBC0_MASK); - - WREG32_SOC15(JPEG, 0, regUVD_JMI0_UVD_LMI_JRBC_RB_VMID, 0); - WREG32_SOC15(JPEG, 0, regUVD_JRBC0_UVD_JRBC_RB_CNTL, (0x00000001L | 0x00000002L)); - WREG32_SOC15(JPEG, 0, regUVD_JMI0_UVD_LMI_JRBC_RB_64BIT_BAR_LOW, - lower_32_bits(ring->gpu_addr)); - WREG32_SOC15(JPEG, 0, regUVD_JMI0_UVD_LMI_JRBC_RB_64BIT_BAR_HIGH, - upper_32_bits(ring->gpu_addr)); - WREG32_SOC15(JPEG, 0, regUVD_JRBC0_UVD_JRBC_RB_RPTR, 0); - WREG32_SOC15(JPEG, 0, regUVD_JRBC0_UVD_JRBC_RB_WPTR, 0); - WREG32_SOC15(JPEG, 0, regUVD_JRBC0_UVD_JRBC_RB_CNTL, 0x00000002L); - WREG32_SOC15(JPEG, 0, regUVD_JRBC0_UVD_JRBC_RB_SIZE, ring->ring_size / 4); - ring->wptr = RREG32_SOC15(JPEG, 0, regUVD_JRBC0_UVD_JRBC_RB_WPTR); + for (i = 0; i < adev->jpeg.num_jpeg_rings; ++i) { + unsigned int reg_offset = (i?(0x40 * i - 0xc80):0); + + ring = &adev->jpeg.inst->ring_dec[i]; + + /* enable System Interrupt for JRBC */ + WREG32_P(SOC15_REG_OFFSET(JPEG, 0, regJPEG_SYS_INT_EN), + JPEG_SYS_INT_EN__DJRBC0_MASK << i, + ~(JPEG_SYS_INT_EN__DJRBC0_MASK << i)); + + WREG32_SOC15_OFFSET(JPEG, 0, regUVD_JMI0_UVD_LMI_JRBC_RB_VMID, reg_offset, 0); + WREG32_SOC15_OFFSET(JPEG, 0, regUVD_JRBC0_UVD_JRBC_RB_CNTL, reg_offset, + (0x00000001L | 0x00000002L)); + WREG32_SOC15_OFFSET(JPEG, 0, regUVD_JMI0_UVD_LMI_JRBC_RB_64BIT_BAR_LOW, + reg_offset, lower_32_bits(ring->gpu_addr)); + WREG32_SOC15_OFFSET(JPEG, 0, regUVD_JMI0_UVD_LMI_JRBC_RB_64BIT_BAR_HIGH, + reg_offset, upper_32_bits(ring->gpu_addr)); + WREG32_SOC15_OFFSET(JPEG, 0, regUVD_JRBC0_UVD_JRBC_RB_RPTR, reg_offset, 0); + WREG32_SOC15_OFFSET(JPEG, 0, regUVD_JRBC0_UVD_JRBC_RB_WPTR, reg_offset, 0); + WREG32_SOC15_OFFSET(JPEG, 0, regUVD_JRBC0_UVD_JRBC_RB_CNTL, reg_offset, + 0x00000002L); + WREG32_SOC15_OFFSET(JPEG, 0, regUVD_JRBC0_UVD_JRBC_RB_SIZE, reg_offset, + ring->ring_size / 4); + ring->wptr = RREG32_SOC15_OFFSET(JPEG, 0, regUVD_JRBC0_UVD_JRBC_RB_WPTR, + reg_offset); + } return 0; } @@ -342,7 +376,8 @@ static uint64_t jpeg_v4_0_3_dec_ring_get_rptr(struct amdgpu_ring *ring) { struct amdgpu_device *adev = ring->adev; - return RREG32_SOC15(JPEG, ring->me, regUVD_JRBC0_UVD_JRBC_RB_RPTR); + return RREG32_SOC15_OFFSET(JPEG, ring->me, regUVD_JRBC0_UVD_JRBC_RB_RPTR, + ring->pipe?(0x40 * ring->pipe - 0xc80):0); } /** @@ -359,7 +394,8 @@ static uint64_t jpeg_v4_0_3_dec_ring_get_wptr(struct amdgpu_ring *ring) if (ring->use_doorbell) return adev->wb.wb[ring->wptr_offs]; else - return RREG32_SOC15(JPEG, ring->me, regUVD_JRBC0_UVD_JRBC_RB_WPTR); + return RREG32_SOC15_OFFSET(JPEG, ring->me, regUVD_JRBC0_UVD_JRBC_RB_WPTR, + ring->pipe?(0x40 * ring->pipe - 0xc80):0); } /** @@ -377,8 +413,8 @@ static void jpeg_v4_0_3_dec_ring_set_wptr(struct amdgpu_ring *ring) adev->wb.wb[ring->wptr_offs] = lower_32_bits(ring->wptr); WDOORBELL32(ring->doorbell_index, lower_32_bits(ring->wptr)); } else { - WREG32_SOC15(JPEG, ring->me, - regUVD_JRBC0_UVD_JRBC_RB_WPTR, lower_32_bits(ring->wptr)); + WREG32_SOC15_OFFSET(JPEG, ring->me, regUVD_JRBC0_UVD_JRBC_RB_WPTR, + (ring->pipe?(0x40 * ring->pipe - 0xc80):0), lower_32_bits(ring->wptr)); } } @@ -393,7 +429,7 @@ static void jpeg_v4_0_3_dec_ring_insert_start(struct amdgpu_ring *ring) { amdgpu_ring_write(ring, PACKETJ(regUVD_JRBC_EXTERNAL_REG_INTERNAL_OFFSET, 0, 0, PACKETJ_TYPE0)); - amdgpu_ring_write(ring, 0x62a04);/* TODO: PCTL0_MMHUB_DEEPSLEEP_IB */ + amdgpu_ring_write(ring, 0x62a04); /* PCTL0_MMHUB_DEEPSLEEP_IB */ amdgpu_ring_write(ring, PACKETJ(JRBC_DEC_EXTERNAL_REG_WRITE_ADDR, 0, 0, PACKETJ_TYPE0)); @@ -605,20 +641,36 @@ static void jpeg_v4_0_3_dec_ring_nop(struct amdgpu_ring *ring, uint32_t count) static bool jpeg_v4_0_3_is_idle(void *handle) { struct amdgpu_device *adev = (struct amdgpu_device *)handle; + bool ret; + int i; + + for (i = 0; i < adev->jpeg.num_jpeg_rings; ++i) { + unsigned int reg_offset = (i?(0x40 * i - 0xc80):0); + + ret &= ((RREG32_SOC15_OFFSET(JPEG, 0, + regUVD_JRBC0_UVD_JRBC_STATUS, reg_offset) & + UVD_JRBC0_UVD_JRBC_STATUS__RB_JOB_DONE_MASK) == + UVD_JRBC0_UVD_JRBC_STATUS__RB_JOB_DONE_MASK); + } - return ((RREG32_SOC15(JPEG, 0, regUVD_JRBC0_UVD_JRBC_STATUS) & - UVD_JRBC0_UVD_JRBC_STATUS__RB_JOB_DONE_MASK) == - UVD_JRBC0_UVD_JRBC_STATUS__RB_JOB_DONE_MASK); + return ret; } static int jpeg_v4_0_3_wait_for_idle(void *handle) { struct amdgpu_device *adev = (struct amdgpu_device *)handle; int ret; + int i; + + for (i = 0; i < adev->jpeg.num_jpeg_rings; ++i) { + unsigned int reg_offset = (i?(0x40 * i - 0xc80):0); + + ret &= SOC15_WAIT_ON_RREG_OFFSET(JPEG, 0, + regUVD_JRBC0_UVD_JRBC_STATUS, reg_offset, + UVD_JRBC0_UVD_JRBC_STATUS__RB_JOB_DONE_MASK, + UVD_JRBC0_UVD_JRBC_STATUS__RB_JOB_DONE_MASK); + } - ret = SOC15_WAIT_ON_RREG(JPEG, 0, regUVD_JRBC0_UVD_JRBC_STATUS, - UVD_JRBC0_UVD_JRBC_STATUS__RB_JOB_DONE_MASK, - UVD_JRBC0_UVD_JRBC_STATUS__RB_JOB_DONE_MASK); return ret; } @@ -626,7 +678,7 @@ static int jpeg_v4_0_3_set_clockgating_state(void *handle, enum amd_clockgating_state state) { struct amdgpu_device *adev = (struct amdgpu_device *)handle; - bool enable = (state == AMD_CG_STATE_GATE); + bool enable = (state == AMD_CG_STATE_GATE) ? true : false; if (enable) { if (!jpeg_v4_0_3_is_idle(handle)) @@ -674,8 +726,29 @@ static int jpeg_v4_0_3_process_interrupt(struct amdgpu_device *adev, DRM_DEV_DEBUG(adev->dev, "IH: JPEG TRAP\n"); switch (entry->src_id) { - case VCN_2_0__SRCID__JPEG_DECODE: - amdgpu_fence_process(adev->jpeg.inst->ring_dec); + case VCN_4_0__SRCID__JPEG_DECODE: + amdgpu_fence_process(&adev->jpeg.inst->ring_dec[0]); + break; + case VCN_4_0__SRCID__JPEG1_DECODE: + amdgpu_fence_process(&adev->jpeg.inst->ring_dec[1]); + break; + case VCN_4_0__SRCID__JPEG2_DECODE: + amdgpu_fence_process(&adev->jpeg.inst->ring_dec[2]); + break; + case VCN_4_0__SRCID__JPEG3_DECODE: + amdgpu_fence_process(&adev->jpeg.inst->ring_dec[3]); + break; + case VCN_4_0__SRCID__JPEG4_DECODE: + amdgpu_fence_process(&adev->jpeg.inst->ring_dec[4]); + break; + case VCN_4_0__SRCID__JPEG5_DECODE: + amdgpu_fence_process(&adev->jpeg.inst->ring_dec[5]); + break; + case VCN_4_0__SRCID__JPEG6_DECODE: + amdgpu_fence_process(&adev->jpeg.inst->ring_dec[6]); + break; + case VCN_4_0__SRCID__JPEG7_DECODE: + amdgpu_fence_process(&adev->jpeg.inst->ring_dec[7]); break; default: DRM_DEV_ERROR(adev->dev, "Unhandled interrupt: %d %d\n", @@ -737,8 +810,13 @@ static const struct amdgpu_ring_funcs jpeg_v4_0_3_dec_ring_vm_funcs = { static void jpeg_v4_0_3_set_dec_ring_funcs(struct amdgpu_device *adev) { - adev->jpeg.inst->ring_dec->funcs = &jpeg_v4_0_3_dec_ring_vm_funcs; - adev->jpeg.inst->ring_dec->me = 0; + int i; + + for (i = 0; i < adev->jpeg.num_jpeg_rings; ++i) { + adev->jpeg.inst->ring_dec[i].funcs = &jpeg_v4_0_3_dec_ring_vm_funcs; + adev->jpeg.inst->ring_dec[i].me = 0; + adev->jpeg.inst->ring_dec[i].pipe = i; + } DRM_DEV_INFO(adev->dev, "JPEG decode is enabled in VM mode\n"); } @@ -749,7 +827,7 @@ static const struct amdgpu_irq_src_funcs jpeg_v4_0_3_irq_funcs = { static void jpeg_v4_0_3_set_irq_funcs(struct amdgpu_device *adev) { - adev->jpeg.inst->irq.num_types = 1; + adev->jpeg.inst->irq.num_types = adev->jpeg.num_jpeg_rings; adev->jpeg.inst->irq.funcs = &jpeg_v4_0_3_irq_funcs; } diff --git a/drivers/gpu/drm/amd/amdgpu/jpeg_v4_0_3.h b/drivers/gpu/drm/amd/amdgpu/jpeg_v4_0_3.h index ca03d17e13fa..70a5f030d5f7 100644 --- a/drivers/gpu/drm/amd/amdgpu/jpeg_v4_0_3.h +++ b/drivers/gpu/drm/amd/amdgpu/jpeg_v4_0_3.h @@ -41,6 +41,7 @@ #define regUVD_JRBC_RB_REF_DATA_INTERNAL_OFFSET 0x4084 #define regUVD_JRBC_STATUS_INTERNAL_OFFSET 0x4089 #define regUVD_JPEG_PITCH_INTERNAL_OFFSET 0x4043 +#define regUVD_JRBC0_UVD_JRBC_SCRATCH0_INTERNAL_OFFSET 0x4094 #define JRBC_DEC_EXTERNAL_REG_WRITE_ADDR 0x18000 -- cgit From 6ddae0f3ab18a64e83bcf7b090e085394046f130 Mon Sep 17 00:00:00 2001 From: James Zhu Date: Sat, 29 Jan 2022 10:34:05 -0500 Subject: drm/amdgpu/jpeg: enable jpeg doorbell for jpeg4.0.3 Enable jpeg doorbell for jpeg4.0.3. Signed-off-by: James Zhu Reviewed-by: Leo Liu Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/jpeg_v4_0_3.c | 13 +++++++++++-- 1 file changed, 11 insertions(+), 2 deletions(-) (limited to 'drivers/gpu/drm/amd/amdgpu/jpeg_v4_0_3.c') diff --git a/drivers/gpu/drm/amd/amdgpu/jpeg_v4_0_3.c b/drivers/gpu/drm/amd/amdgpu/jpeg_v4_0_3.c index 0d3509409d3a..8914f3c6c80f 100644 --- a/drivers/gpu/drm/amd/amdgpu/jpeg_v4_0_3.c +++ b/drivers/gpu/drm/amd/amdgpu/jpeg_v4_0_3.c @@ -100,8 +100,9 @@ static int jpeg_v4_0_3_sw_init(void *handle) for (i = 0; i < adev->jpeg.num_jpeg_rings; ++i) { ring = &adev->jpeg.inst->ring_dec[i]; - ring->use_doorbell = false; + ring->use_doorbell = true; ring->vm_hub = AMDGPU_MMHUB0(0); + ring->doorbell_index = (adev->doorbell_index.vcn.vcn_ring0_1 << 1) + (i?8:1) + i; sprintf(ring->name, "jpeg_dec_%d", i); r = amdgpu_ring_init(adev, ring, 512, &adev->jpeg.inst->irq, 0, AMDGPU_RING_PRIO_DEFAULT, NULL); @@ -148,11 +149,19 @@ static int jpeg_v4_0_3_sw_fini(void *handle) static int jpeg_v4_0_3_hw_init(void *handle) { struct amdgpu_device *adev = (struct amdgpu_device *)handle; - struct amdgpu_ring *ring; + struct amdgpu_ring *ring = adev->jpeg.inst->ring_dec; int i, r; + adev->nbio.funcs->vcn_doorbell_range(adev, ring->use_doorbell, + (adev->doorbell_index.vcn.vcn_ring0_1 << 1), 0); + for (i = 0; i < adev->jpeg.num_jpeg_rings; ++i) { ring = &adev->jpeg.inst->ring_dec[i]; + if (ring->use_doorbell) + WREG32_SOC15_OFFSET(VCN, 0, regVCN_JPEG_DB_CTRL, + (ring->pipe?(ring->pipe - 0x15):0), + ring->doorbell_index << VCN_JPEG_DB_CTRL__OFFSET__SHIFT | + VCN_JPEG_DB_CTRL__EN_MASK); r = amdgpu_ring_test_helper(ring); if (r) return r; -- cgit From 53054e9a7775c228ada4d052f3e7849e71072811 Mon Sep 17 00:00:00 2001 From: James Zhu Date: Sat, 2 Jul 2022 19:34:00 -0400 Subject: drm/amdgpu/vcn: update new doorbell map New doorbell map is used for VCN 4.0.3. Signed-off-by: James Zhu Acked-by: Leo Liu Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/jpeg_v4_0_3.c | 2 +- drivers/gpu/drm/amd/amdgpu/nbio_v7_9.c | 4 ++-- drivers/gpu/drm/amd/amdgpu/vcn_v4_0_3.c | 4 ++-- 3 files changed, 5 insertions(+), 5 deletions(-) (limited to 'drivers/gpu/drm/amd/amdgpu/jpeg_v4_0_3.c') diff --git a/drivers/gpu/drm/amd/amdgpu/jpeg_v4_0_3.c b/drivers/gpu/drm/amd/amdgpu/jpeg_v4_0_3.c index 8914f3c6c80f..e12e3646c49a 100644 --- a/drivers/gpu/drm/amd/amdgpu/jpeg_v4_0_3.c +++ b/drivers/gpu/drm/amd/amdgpu/jpeg_v4_0_3.c @@ -102,7 +102,7 @@ static int jpeg_v4_0_3_sw_init(void *handle) ring = &adev->jpeg.inst->ring_dec[i]; ring->use_doorbell = true; ring->vm_hub = AMDGPU_MMHUB0(0); - ring->doorbell_index = (adev->doorbell_index.vcn.vcn_ring0_1 << 1) + (i?8:1) + i; + ring->doorbell_index = (adev->doorbell_index.vcn.vcn_ring0_1 << 1) + 1 + i; sprintf(ring->name, "jpeg_dec_%d", i); r = amdgpu_ring_init(adev, ring, 512, &adev->jpeg.inst->irq, 0, AMDGPU_RING_PRIO_DEFAULT, NULL); diff --git a/drivers/gpu/drm/amd/amdgpu/nbio_v7_9.c b/drivers/gpu/drm/amd/amdgpu/nbio_v7_9.c index 266b504fd83e..962627005961 100644 --- a/drivers/gpu/drm/amd/amdgpu/nbio_v7_9.c +++ b/drivers/gpu/drm/amd/amdgpu/nbio_v7_9.c @@ -161,7 +161,7 @@ static void nbio_v7_9_vcn_doorbell_range(struct amdgpu_device *adev, bool use_do doorbell_range = REG_SET_FIELD(doorbell_range, DOORBELL0_CTRL_ENTRY_0, BIF_DOORBELL0_RANGE_SIZE_ENTRY, - 0x10); + 0x9); doorbell_ctrl = REG_SET_FIELD(doorbell_ctrl, S2A_DOORBELL_ENTRY_1_CTRL, @@ -174,7 +174,7 @@ static void nbio_v7_9_vcn_doorbell_range(struct amdgpu_device *adev, bool use_do S2A_DOORBELL_PORT1_RANGE_OFFSET, 0x4); doorbell_ctrl = REG_SET_FIELD(doorbell_ctrl, S2A_DOORBELL_ENTRY_1_CTRL, - S2A_DOORBELL_PORT1_RANGE_SIZE, 0x10); + S2A_DOORBELL_PORT1_RANGE_SIZE, 0x9); doorbell_ctrl = REG_SET_FIELD(doorbell_ctrl, S2A_DOORBELL_ENTRY_1_CTRL, S2A_DOORBELL_PORT1_AWADDR_31_28_VALUE, 0x4); diff --git a/drivers/gpu/drm/amd/amdgpu/vcn_v4_0_3.c b/drivers/gpu/drm/amd/amdgpu/vcn_v4_0_3.c index fafce2beb6cf..ddd844cca02e 100644 --- a/drivers/gpu/drm/amd/amdgpu/vcn_v4_0_3.c +++ b/drivers/gpu/drm/amd/amdgpu/vcn_v4_0_3.c @@ -110,7 +110,7 @@ static int vcn_v4_0_3_sw_init(void *handle) ring = &adev->vcn.inst->ring_dec; ring->use_doorbell = true; - ring->doorbell_index = (adev->doorbell_index.vcn.vcn_ring0_1 << 1) + 5; + ring->doorbell_index = (adev->doorbell_index.vcn.vcn_ring0_1 << 1); ring->vm_hub = AMDGPU_MMHUB0(0); sprintf(ring->name, "vcn_dec"); r = amdgpu_ring_init(adev, ring, 512, &adev->vcn.inst->irq, 0, @@ -176,7 +176,7 @@ static int vcn_v4_0_3_hw_init(void *handle) int r; adev->nbio.funcs->vcn_doorbell_range(adev, ring->use_doorbell, - (adev->doorbell_index.vcn.vcn_ring0_1 << 1), ring->me); + (adev->doorbell_index.vcn.vcn_ring0_1 << 1), 0); if (ring->use_doorbell) WREG32_SOC15(VCN, ring->me, regVCN_RB4_DB_CTRL, ring->doorbell_index << VCN_RB4_DB_CTRL__OFFSET__SHIFT | -- cgit From d4ad24a0b796ad429403bf17ba97ee7e2470ad68 Mon Sep 17 00:00:00 2001 From: James Zhu Date: Sat, 2 Jul 2022 19:53:36 -0400 Subject: drm/amdgpu/jpeg: add JPEG multiple AIDs support Add JPEG multiple AIDs support. Signed-off-by: James Zhu Acked-by: Leo Liu Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/jpeg_v4_0_3.c | 380 ++++++++++++++++++------------- 1 file changed, 227 insertions(+), 153 deletions(-) (limited to 'drivers/gpu/drm/amd/amdgpu/jpeg_v4_0_3.c') diff --git a/drivers/gpu/drm/amd/amdgpu/jpeg_v4_0_3.c b/drivers/gpu/drm/amd/amdgpu/jpeg_v4_0_3.c index e12e3646c49a..aa14a6619e9a 100644 --- a/drivers/gpu/drm/amd/amdgpu/jpeg_v4_0_3.c +++ b/drivers/gpu/drm/amd/amdgpu/jpeg_v4_0_3.c @@ -63,6 +63,8 @@ static int jpeg_v4_0_3_early_init(void *handle) { struct amdgpu_device *adev = (struct amdgpu_device *)handle; + adev->jpeg.num_jpeg_rings = AMDGPU_MAX_JPEG_RINGS; + jpeg_v4_0_3_set_dec_ring_funcs(adev); jpeg_v4_0_3_set_irq_funcs(adev); @@ -80,12 +82,12 @@ static int jpeg_v4_0_3_sw_init(void *handle) { struct amdgpu_device *adev = (struct amdgpu_device *)handle; struct amdgpu_ring *ring; - int i, r; + int i, j, r; - for (i = 0; i < adev->jpeg.num_jpeg_rings; ++i) { + for (j = 0; j < adev->jpeg.num_jpeg_rings; ++j) { /* JPEG TRAP */ r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_VCN, - amdgpu_ih_srcid_jpeg[i], &adev->jpeg.inst->irq); + amdgpu_ih_srcid_jpeg[j], &adev->jpeg.inst->irq); if (r) return r; } @@ -98,22 +100,27 @@ static int jpeg_v4_0_3_sw_init(void *handle) if (r) return r; - for (i = 0; i < adev->jpeg.num_jpeg_rings; ++i) { - ring = &adev->jpeg.inst->ring_dec[i]; - ring->use_doorbell = true; - ring->vm_hub = AMDGPU_MMHUB0(0); - ring->doorbell_index = (adev->doorbell_index.vcn.vcn_ring0_1 << 1) + 1 + i; - sprintf(ring->name, "jpeg_dec_%d", i); - r = amdgpu_ring_init(adev, ring, 512, &adev->jpeg.inst->irq, 0, - AMDGPU_RING_PRIO_DEFAULT, NULL); - if (r) - return r; - - adev->jpeg.internal.jpeg_pitch[i] = - regUVD_JRBC0_UVD_JRBC_SCRATCH0_INTERNAL_OFFSET; - adev->jpeg.inst->external.jpeg_pitch[i] = - SOC15_REG_OFFSET1(JPEG, 0, regUVD_JRBC0_UVD_JRBC_SCRATCH0, - (i?(0x40 * i - 0xc80):0)); + for (i = 0; i < adev->jpeg.num_jpeg_inst; ++i) { + if (adev->jpeg.harvest_config & (1 << i)) + continue; + for (j = 0; j < adev->jpeg.num_jpeg_rings; ++j) { + ring = &adev->jpeg.inst[i].ring_dec[j]; + ring->use_doorbell = true; + ring->vm_hub = AMDGPU_MMHUB0(adev->jpeg.inst[i].aid_id); + ring->doorbell_index = + (adev->doorbell_index.vcn.vcn_ring0_1 << 1) + 1 + j + 9 * i; + sprintf(ring->name, "jpeg_dec_%d.%d", i, j); + r = amdgpu_ring_init(adev, ring, 512, &adev->jpeg.inst->irq, 0, + AMDGPU_RING_PRIO_DEFAULT, NULL); + if (r) + return r; + + adev->jpeg.internal.jpeg_pitch[j] = + regUVD_JRBC0_UVD_JRBC_SCRATCH0_INTERNAL_OFFSET; + adev->jpeg.inst[i].external.jpeg_pitch[j] = + SOC15_REG_OFFSET1(JPEG, i, regUVD_JRBC0_UVD_JRBC_SCRATCH0, + (j?(0x40 * j - 0xc80):0)); + } } return 0; @@ -149,22 +156,30 @@ static int jpeg_v4_0_3_sw_fini(void *handle) static int jpeg_v4_0_3_hw_init(void *handle) { struct amdgpu_device *adev = (struct amdgpu_device *)handle; - struct amdgpu_ring *ring = adev->jpeg.inst->ring_dec; - int i, r; + struct amdgpu_ring *ring; + int i, j, r; - adev->nbio.funcs->vcn_doorbell_range(adev, ring->use_doorbell, - (adev->doorbell_index.vcn.vcn_ring0_1 << 1), 0); + for (i = 0; i < adev->jpeg.num_jpeg_inst; ++i) { + if (adev->jpeg.harvest_config & (1 << i)) + continue; + ring = adev->jpeg.inst[i].ring_dec; - for (i = 0; i < adev->jpeg.num_jpeg_rings; ++i) { - ring = &adev->jpeg.inst->ring_dec[i]; if (ring->use_doorbell) - WREG32_SOC15_OFFSET(VCN, 0, regVCN_JPEG_DB_CTRL, - (ring->pipe?(ring->pipe - 0x15):0), - ring->doorbell_index << VCN_JPEG_DB_CTRL__OFFSET__SHIFT | - VCN_JPEG_DB_CTRL__EN_MASK); - r = amdgpu_ring_test_helper(ring); - if (r) - return r; + adev->nbio.funcs->vcn_doorbell_range(adev, ring->use_doorbell, + (adev->doorbell_index.vcn.vcn_ring0_1 << 1) + 9 * i, + adev->jpeg.inst[i].aid_id); + + for (j = 0; j < adev->jpeg.num_jpeg_rings; ++j) { + ring = &adev->jpeg.inst[i].ring_dec[j]; + if (ring->use_doorbell) + WREG32_SOC15_OFFSET(VCN, i, regVCN_JPEG_DB_CTRL, + (ring->pipe?(ring->pipe - 0x15):0), + ring->doorbell_index << VCN_JPEG_DB_CTRL__OFFSET__SHIFT | + VCN_JPEG_DB_CTRL__EN_MASK); + r = amdgpu_ring_test_helper(ring); + if (r) + return r; + } } DRM_DEV_INFO(adev->dev, "JPEG decode initialized successfully.\n"); @@ -233,48 +248,52 @@ static int jpeg_v4_0_3_resume(void *handle) return r; } -static void jpeg_v4_0_3_disable_clock_gating(struct amdgpu_device *adev) +static void jpeg_v4_0_3_disable_clock_gating(struct amdgpu_device *adev, int inst_idx) { uint32_t data; int i; - data = RREG32_SOC15(JPEG, 0, regJPEG_CGC_CTRL); - if (adev->cg_flags & AMD_CG_SUPPORT_JPEG_MGCG) + data = RREG32_SOC15(JPEG, inst_idx, regJPEG_CGC_CTRL); + if (adev->cg_flags & AMD_CG_SUPPORT_JPEG_MGCG) { data |= 1 << JPEG_CGC_CTRL__DYN_CLOCK_MODE__SHIFT; - else + data &= (~(JPEG_CGC_CTRL__JPEG0_DEC_MODE_MASK << 1)); + } else { data &= ~JPEG_CGC_CTRL__DYN_CLOCK_MODE__SHIFT; + } data |= 1 << JPEG_CGC_CTRL__CLK_GATE_DLY_TIMER__SHIFT; data |= 4 << JPEG_CGC_CTRL__CLK_OFF_DELAY__SHIFT; - WREG32_SOC15(JPEG, 0, regJPEG_CGC_CTRL, data); + WREG32_SOC15(JPEG, inst_idx, regJPEG_CGC_CTRL, data); - data = RREG32_SOC15(JPEG, 0, regJPEG_CGC_GATE); + data = RREG32_SOC15(JPEG, inst_idx, regJPEG_CGC_GATE); data &= ~(JPEG_CGC_GATE__JMCIF_MASK | JPEG_CGC_GATE__JRBBM_MASK); for (i = 0; i < adev->jpeg.num_jpeg_rings; ++i) data &= ~(JPEG_CGC_GATE__JPEG0_DEC_MASK << i); - WREG32_SOC15(JPEG, 0, regJPEG_CGC_GATE, data); + WREG32_SOC15(JPEG, inst_idx, regJPEG_CGC_GATE, data); } -static void jpeg_v4_0_3_enable_clock_gating(struct amdgpu_device *adev) +static void jpeg_v4_0_3_enable_clock_gating(struct amdgpu_device *adev, int inst_idx) { uint32_t data; int i; - data = RREG32_SOC15(JPEG, 0, regJPEG_CGC_CTRL); - if (adev->cg_flags & AMD_CG_SUPPORT_JPEG_MGCG) + data = RREG32_SOC15(JPEG, inst_idx, regJPEG_CGC_CTRL); + if (adev->cg_flags & AMD_CG_SUPPORT_JPEG_MGCG) { data |= 1 << JPEG_CGC_CTRL__DYN_CLOCK_MODE__SHIFT; - else + data |= (JPEG_CGC_CTRL__JPEG0_DEC_MODE_MASK << 1); + } else { data &= ~JPEG_CGC_CTRL__DYN_CLOCK_MODE__SHIFT; + } data |= 1 << JPEG_CGC_CTRL__CLK_GATE_DLY_TIMER__SHIFT; data |= 4 << JPEG_CGC_CTRL__CLK_OFF_DELAY__SHIFT; - WREG32_SOC15(JPEG, 0, regJPEG_CGC_CTRL, data); + WREG32_SOC15(JPEG, inst_idx, regJPEG_CGC_CTRL, data); - data = RREG32_SOC15(JPEG, 0, regJPEG_CGC_GATE); + data = RREG32_SOC15(JPEG, inst_idx, regJPEG_CGC_GATE); data |= (JPEG_CGC_GATE__JMCIF_MASK | JPEG_CGC_GATE__JRBBM_MASK); for (i = 0; i < adev->jpeg.num_jpeg_rings; ++i) data |= (JPEG_CGC_GATE__JPEG0_DEC_MASK << i); - WREG32_SOC15(JPEG, 0, regJPEG_CGC_GATE, data); + WREG32_SOC15(JPEG, inst_idx, regJPEG_CGC_GATE, data); } /** @@ -286,58 +305,63 @@ static void jpeg_v4_0_3_enable_clock_gating(struct amdgpu_device *adev) */ static int jpeg_v4_0_3_start(struct amdgpu_device *adev) { - struct amdgpu_ring *ring = adev->jpeg.inst->ring_dec; - int i; - - WREG32_SOC15(JPEG, 0, regUVD_PGFSM_CONFIG, - 1 << UVD_PGFSM_CONFIG__UVDJ_PWR_CONFIG__SHIFT); - SOC15_WAIT_ON_RREG(JPEG, 0, regUVD_PGFSM_STATUS, - UVD_PGFSM_STATUS__UVDJ_PWR_ON << - UVD_PGFSM_STATUS__UVDJ_PWR_STATUS__SHIFT, - UVD_PGFSM_STATUS__UVDJ_PWR_STATUS_MASK); - - /* disable anti hang mechanism */ - WREG32_P(SOC15_REG_OFFSET(JPEG, 0, regUVD_JPEG_POWER_STATUS), 0, - ~UVD_JPEG_POWER_STATUS__JPEG_POWER_STATUS_MASK); - - /* JPEG disable CGC */ - jpeg_v4_0_3_disable_clock_gating(adev); - - /* MJPEG global tiling registers */ - WREG32_SOC15(JPEG, 0, regJPEG_DEC_GFX8_ADDR_CONFIG, - adev->gfx.config.gb_addr_config); - WREG32_SOC15(JPEG, 0, regJPEG_DEC_GFX10_ADDR_CONFIG, - adev->gfx.config.gb_addr_config); - - /* enable JMI channel */ - WREG32_P(SOC15_REG_OFFSET(JPEG, 0, regUVD_JMI_CNTL), 0, - ~UVD_JMI_CNTL__SOFT_RESET_MASK); - - for (i = 0; i < adev->jpeg.num_jpeg_rings; ++i) { - unsigned int reg_offset = (i?(0x40 * i - 0xc80):0); - - ring = &adev->jpeg.inst->ring_dec[i]; - - /* enable System Interrupt for JRBC */ - WREG32_P(SOC15_REG_OFFSET(JPEG, 0, regJPEG_SYS_INT_EN), - JPEG_SYS_INT_EN__DJRBC0_MASK << i, - ~(JPEG_SYS_INT_EN__DJRBC0_MASK << i)); - - WREG32_SOC15_OFFSET(JPEG, 0, regUVD_JMI0_UVD_LMI_JRBC_RB_VMID, reg_offset, 0); - WREG32_SOC15_OFFSET(JPEG, 0, regUVD_JRBC0_UVD_JRBC_RB_CNTL, reg_offset, - (0x00000001L | 0x00000002L)); - WREG32_SOC15_OFFSET(JPEG, 0, regUVD_JMI0_UVD_LMI_JRBC_RB_64BIT_BAR_LOW, - reg_offset, lower_32_bits(ring->gpu_addr)); - WREG32_SOC15_OFFSET(JPEG, 0, regUVD_JMI0_UVD_LMI_JRBC_RB_64BIT_BAR_HIGH, - reg_offset, upper_32_bits(ring->gpu_addr)); - WREG32_SOC15_OFFSET(JPEG, 0, regUVD_JRBC0_UVD_JRBC_RB_RPTR, reg_offset, 0); - WREG32_SOC15_OFFSET(JPEG, 0, regUVD_JRBC0_UVD_JRBC_RB_WPTR, reg_offset, 0); - WREG32_SOC15_OFFSET(JPEG, 0, regUVD_JRBC0_UVD_JRBC_RB_CNTL, reg_offset, - 0x00000002L); - WREG32_SOC15_OFFSET(JPEG, 0, regUVD_JRBC0_UVD_JRBC_RB_SIZE, reg_offset, - ring->ring_size / 4); - ring->wptr = RREG32_SOC15_OFFSET(JPEG, 0, regUVD_JRBC0_UVD_JRBC_RB_WPTR, - reg_offset); + struct amdgpu_ring *ring; + int i, j; + + for (i = 0; i < adev->jpeg.num_jpeg_inst; ++i) { + if (adev->jpeg.harvest_config & (1 << i)) + continue; + WREG32_SOC15(JPEG, i, regUVD_PGFSM_CONFIG, + 1 << UVD_PGFSM_CONFIG__UVDJ_PWR_CONFIG__SHIFT); + SOC15_WAIT_ON_RREG(JPEG, i, regUVD_PGFSM_STATUS, + UVD_PGFSM_STATUS__UVDJ_PWR_ON << + UVD_PGFSM_STATUS__UVDJ_PWR_STATUS__SHIFT, + UVD_PGFSM_STATUS__UVDJ_PWR_STATUS_MASK); + + /* disable anti hang mechanism */ + WREG32_P(SOC15_REG_OFFSET(JPEG, i, regUVD_JPEG_POWER_STATUS), 0, + ~UVD_JPEG_POWER_STATUS__JPEG_POWER_STATUS_MASK); + + /* JPEG disable CGC */ + jpeg_v4_0_3_disable_clock_gating(adev, i); + + /* MJPEG global tiling registers */ + WREG32_SOC15(JPEG, i, regJPEG_DEC_GFX8_ADDR_CONFIG, + adev->gfx.config.gb_addr_config); + WREG32_SOC15(JPEG, i, regJPEG_DEC_GFX10_ADDR_CONFIG, + adev->gfx.config.gb_addr_config); + + /* enable JMI channel */ + WREG32_P(SOC15_REG_OFFSET(JPEG, i, regUVD_JMI_CNTL), 0, + ~UVD_JMI_CNTL__SOFT_RESET_MASK); + + for (j = 0; j < adev->jpeg.num_jpeg_rings; ++j) { + unsigned int reg_offset = (j?(0x40 * j - 0xc80):0); + + ring = &adev->jpeg.inst[i].ring_dec[j]; + + /* enable System Interrupt for JRBC */ + WREG32_P(SOC15_REG_OFFSET(JPEG, i, regJPEG_SYS_INT_EN), + JPEG_SYS_INT_EN__DJRBC0_MASK << j, + ~(JPEG_SYS_INT_EN__DJRBC0_MASK << j)); + + WREG32_SOC15_OFFSET(JPEG, i, + regUVD_JMI0_UVD_LMI_JRBC_RB_VMID, reg_offset, 0); + WREG32_SOC15_OFFSET(JPEG, i, regUVD_JRBC0_UVD_JRBC_RB_CNTL, reg_offset, + (0x00000001L | 0x00000002L)); + WREG32_SOC15_OFFSET(JPEG, i, regUVD_JMI0_UVD_LMI_JRBC_RB_64BIT_BAR_LOW, + reg_offset, lower_32_bits(ring->gpu_addr)); + WREG32_SOC15_OFFSET(JPEG, i, regUVD_JMI0_UVD_LMI_JRBC_RB_64BIT_BAR_HIGH, + reg_offset, upper_32_bits(ring->gpu_addr)); + WREG32_SOC15_OFFSET(JPEG, i, regUVD_JRBC0_UVD_JRBC_RB_RPTR, reg_offset, 0); + WREG32_SOC15_OFFSET(JPEG, i, regUVD_JRBC0_UVD_JRBC_RB_WPTR, reg_offset, 0); + WREG32_SOC15_OFFSET(JPEG, i, regUVD_JRBC0_UVD_JRBC_RB_CNTL, reg_offset, + 0x00000002L); + WREG32_SOC15_OFFSET(JPEG, i, regUVD_JRBC0_UVD_JRBC_RB_SIZE, reg_offset, + ring->ring_size / 4); + ring->wptr = RREG32_SOC15_OFFSET(JPEG, i, regUVD_JRBC0_UVD_JRBC_RB_WPTR, + reg_offset); + } } return 0; @@ -352,24 +376,31 @@ static int jpeg_v4_0_3_start(struct amdgpu_device *adev) */ static int jpeg_v4_0_3_stop(struct amdgpu_device *adev) { - /* reset JMI */ - WREG32_P(SOC15_REG_OFFSET(JPEG, 0, regUVD_JMI_CNTL), - UVD_JMI_CNTL__SOFT_RESET_MASK, - ~UVD_JMI_CNTL__SOFT_RESET_MASK); + int i; + + for (i = 0; i < adev->jpeg.num_jpeg_inst; ++i) { + if (adev->jpeg.harvest_config & (1 << i)) + continue; - jpeg_v4_0_3_enable_clock_gating(adev); + /* reset JMI */ + WREG32_P(SOC15_REG_OFFSET(JPEG, i, regUVD_JMI_CNTL), + UVD_JMI_CNTL__SOFT_RESET_MASK, + ~UVD_JMI_CNTL__SOFT_RESET_MASK); - /* enable anti hang mechanism */ - WREG32_P(SOC15_REG_OFFSET(JPEG, 0, regUVD_JPEG_POWER_STATUS), - UVD_JPEG_POWER_STATUS__JPEG_POWER_STATUS_MASK, - ~UVD_JPEG_POWER_STATUS__JPEG_POWER_STATUS_MASK); + jpeg_v4_0_3_enable_clock_gating(adev, i); - WREG32_SOC15(JPEG, 0, regUVD_PGFSM_CONFIG, - 2 << UVD_PGFSM_CONFIG__UVDJ_PWR_CONFIG__SHIFT); - SOC15_WAIT_ON_RREG(JPEG, 0, regUVD_PGFSM_STATUS, - UVD_PGFSM_STATUS__UVDJ_PWR_OFF << - UVD_PGFSM_STATUS__UVDJ_PWR_STATUS__SHIFT, - UVD_PGFSM_STATUS__UVDJ_PWR_STATUS_MASK); + /* enable anti hang mechanism */ + WREG32_P(SOC15_REG_OFFSET(JPEG, i, regUVD_JPEG_POWER_STATUS), + UVD_JPEG_POWER_STATUS__JPEG_POWER_STATUS_MASK, + ~UVD_JPEG_POWER_STATUS__JPEG_POWER_STATUS_MASK); + + WREG32_SOC15(JPEG, i, regUVD_PGFSM_CONFIG, + 2 << UVD_PGFSM_CONFIG__UVDJ_PWR_CONFIG__SHIFT); + SOC15_WAIT_ON_RREG(JPEG, i, regUVD_PGFSM_STATUS, + UVD_PGFSM_STATUS__UVDJ_PWR_OFF << + UVD_PGFSM_STATUS__UVDJ_PWR_STATUS__SHIFT, + UVD_PGFSM_STATUS__UVDJ_PWR_STATUS_MASK); + } return 0; } @@ -502,10 +533,28 @@ static void jpeg_v4_0_3_dec_ring_emit_fence(struct amdgpu_ring *ring, u64 addr, 0, PACKETJ_CONDITION_CHECK0, PACKETJ_TYPE4)); amdgpu_ring_write(ring, 0); + if (ring->adev->jpeg.inst[ring->me].aid_id) { + amdgpu_ring_write(ring, PACKETJ(regUVD_JRBC_EXTERNAL_MCM_ADDR_INTERNAL_OFFSET, + 0, PACKETJ_CONDITION_CHECK0, PACKETJ_TYPE0)); + amdgpu_ring_write(ring, 0x4); + } else { + amdgpu_ring_write(ring, PACKETJ(0, 0, 0, PACKETJ_TYPE6)); + amdgpu_ring_write(ring, 0); + } + amdgpu_ring_write(ring, PACKETJ(regUVD_JRBC_EXTERNAL_REG_INTERNAL_OFFSET, 0, 0, PACKETJ_TYPE0)); amdgpu_ring_write(ring, 0x3fbc); + if (ring->adev->jpeg.inst[ring->me].aid_id) { + amdgpu_ring_write(ring, PACKETJ(regUVD_JRBC_EXTERNAL_MCM_ADDR_INTERNAL_OFFSET, + 0, PACKETJ_CONDITION_CHECK0, PACKETJ_TYPE0)); + amdgpu_ring_write(ring, 0x0); + } else { + amdgpu_ring_write(ring, PACKETJ(0, 0, 0, PACKETJ_TYPE6)); + amdgpu_ring_write(ring, 0); + } + amdgpu_ring_write(ring, PACKETJ(JRBC_DEC_EXTERNAL_REG_WRITE_ADDR, 0, 0, PACKETJ_TYPE0)); amdgpu_ring_write(ring, 0x1); @@ -651,15 +700,19 @@ static bool jpeg_v4_0_3_is_idle(void *handle) { struct amdgpu_device *adev = (struct amdgpu_device *)handle; bool ret; - int i; - - for (i = 0; i < adev->jpeg.num_jpeg_rings; ++i) { - unsigned int reg_offset = (i?(0x40 * i - 0xc80):0); - - ret &= ((RREG32_SOC15_OFFSET(JPEG, 0, - regUVD_JRBC0_UVD_JRBC_STATUS, reg_offset) & - UVD_JRBC0_UVD_JRBC_STATUS__RB_JOB_DONE_MASK) == - UVD_JRBC0_UVD_JRBC_STATUS__RB_JOB_DONE_MASK); + int i, j; + + for (i = 0; i < adev->jpeg.num_jpeg_inst; ++i) { + if (adev->jpeg.harvest_config & (1 << i)) + continue; + for (j = 0; j < adev->jpeg.num_jpeg_rings; ++j) { + unsigned int reg_offset = (j?(0x40 * j - 0xc80):0); + + ret &= ((RREG32_SOC15_OFFSET(JPEG, i, + regUVD_JRBC0_UVD_JRBC_STATUS, reg_offset) & + UVD_JRBC0_UVD_JRBC_STATUS__RB_JOB_DONE_MASK) == + UVD_JRBC0_UVD_JRBC_STATUS__RB_JOB_DONE_MASK); + } } return ret; @@ -669,17 +722,20 @@ static int jpeg_v4_0_3_wait_for_idle(void *handle) { struct amdgpu_device *adev = (struct amdgpu_device *)handle; int ret; - int i; + int i, j; - for (i = 0; i < adev->jpeg.num_jpeg_rings; ++i) { - unsigned int reg_offset = (i?(0x40 * i - 0xc80):0); + for (i = 0; i < adev->jpeg.num_jpeg_inst; ++i) { + if (adev->jpeg.harvest_config & (1 << i)) + continue; + for (j = 0; j < adev->jpeg.num_jpeg_rings; ++j) { + unsigned int reg_offset = (j?(0x40 * j - 0xc80):0); - ret &= SOC15_WAIT_ON_RREG_OFFSET(JPEG, 0, - regUVD_JRBC0_UVD_JRBC_STATUS, reg_offset, - UVD_JRBC0_UVD_JRBC_STATUS__RB_JOB_DONE_MASK, - UVD_JRBC0_UVD_JRBC_STATUS__RB_JOB_DONE_MASK); + ret &= SOC15_WAIT_ON_RREG_OFFSET(JPEG, i, + regUVD_JRBC0_UVD_JRBC_STATUS, reg_offset, + UVD_JRBC0_UVD_JRBC_STATUS__RB_JOB_DONE_MASK, + UVD_JRBC0_UVD_JRBC_STATUS__RB_JOB_DONE_MASK); + } } - return ret; } @@ -688,15 +744,19 @@ static int jpeg_v4_0_3_set_clockgating_state(void *handle, { struct amdgpu_device *adev = (struct amdgpu_device *)handle; bool enable = (state == AMD_CG_STATE_GATE) ? true : false; + int i; - if (enable) { - if (!jpeg_v4_0_3_is_idle(handle)) - return -EBUSY; - jpeg_v4_0_3_enable_clock_gating(adev); - } else { - jpeg_v4_0_3_disable_clock_gating(adev); + for (i = 0; i < adev->jpeg.num_jpeg_inst; ++i) { + if (adev->jpeg.harvest_config & (1 << i)) + continue; + if (enable) { + if (!jpeg_v4_0_3_is_idle(handle)) + return -EBUSY; + jpeg_v4_0_3_enable_clock_gating(adev, i); + } else { + jpeg_v4_0_3_disable_clock_gating(adev, i); + } } - return 0; } @@ -732,32 +792,35 @@ static int jpeg_v4_0_3_process_interrupt(struct amdgpu_device *adev, struct amdgpu_irq_src *source, struct amdgpu_iv_entry *entry) { + uint32_t i; + + i = node_id_to_phys_map[entry->node_id]; DRM_DEV_DEBUG(adev->dev, "IH: JPEG TRAP\n"); switch (entry->src_id) { case VCN_4_0__SRCID__JPEG_DECODE: - amdgpu_fence_process(&adev->jpeg.inst->ring_dec[0]); + amdgpu_fence_process(&adev->jpeg.inst[i].ring_dec[0]); break; case VCN_4_0__SRCID__JPEG1_DECODE: - amdgpu_fence_process(&adev->jpeg.inst->ring_dec[1]); + amdgpu_fence_process(&adev->jpeg.inst[i].ring_dec[1]); break; case VCN_4_0__SRCID__JPEG2_DECODE: - amdgpu_fence_process(&adev->jpeg.inst->ring_dec[2]); + amdgpu_fence_process(&adev->jpeg.inst[i].ring_dec[2]); break; case VCN_4_0__SRCID__JPEG3_DECODE: - amdgpu_fence_process(&adev->jpeg.inst->ring_dec[3]); + amdgpu_fence_process(&adev->jpeg.inst[i].ring_dec[3]); break; case VCN_4_0__SRCID__JPEG4_DECODE: - amdgpu_fence_process(&adev->jpeg.inst->ring_dec[4]); + amdgpu_fence_process(&adev->jpeg.inst[i].ring_dec[4]); break; case VCN_4_0__SRCID__JPEG5_DECODE: - amdgpu_fence_process(&adev->jpeg.inst->ring_dec[5]); + amdgpu_fence_process(&adev->jpeg.inst[i].ring_dec[5]); break; case VCN_4_0__SRCID__JPEG6_DECODE: - amdgpu_fence_process(&adev->jpeg.inst->ring_dec[6]); + amdgpu_fence_process(&adev->jpeg.inst[i].ring_dec[6]); break; case VCN_4_0__SRCID__JPEG7_DECODE: - amdgpu_fence_process(&adev->jpeg.inst->ring_dec[7]); + amdgpu_fence_process(&adev->jpeg.inst[i].ring_dec[7]); break; default: DRM_DEV_ERROR(adev->dev, "Unhandled interrupt: %d %d\n", @@ -798,7 +861,7 @@ static const struct amdgpu_ring_funcs jpeg_v4_0_3_dec_ring_vm_funcs = { SOC15_FLUSH_GPU_TLB_NUM_WREG * 6 + SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 8 + 8 + /* jpeg_v4_0_3_dec_ring_emit_vm_flush */ - 18 + 18 + /* jpeg_v4_0_3_dec_ring_emit_fence x2 vm fence */ + 22 + 22 + /* jpeg_v4_0_3_dec_ring_emit_fence x2 vm fence */ 8 + 16, .emit_ib_size = 22, /* jpeg_v4_0_3_dec_ring_emit_ib */ .emit_ib = jpeg_v4_0_3_dec_ring_emit_ib, @@ -819,12 +882,17 @@ static const struct amdgpu_ring_funcs jpeg_v4_0_3_dec_ring_vm_funcs = { static void jpeg_v4_0_3_set_dec_ring_funcs(struct amdgpu_device *adev) { - int i; - - for (i = 0; i < adev->jpeg.num_jpeg_rings; ++i) { - adev->jpeg.inst->ring_dec[i].funcs = &jpeg_v4_0_3_dec_ring_vm_funcs; - adev->jpeg.inst->ring_dec[i].me = 0; - adev->jpeg.inst->ring_dec[i].pipe = i; + int i, j; + + for (i = 0; i < adev->jpeg.num_jpeg_inst; ++i) { + if (adev->jpeg.harvest_config & (1 << i)) + continue; + for (j = 0; j < adev->jpeg.num_jpeg_rings; ++j) { + adev->jpeg.inst[i].ring_dec[j].funcs = &jpeg_v4_0_3_dec_ring_vm_funcs; + adev->jpeg.inst[i].ring_dec[j].me = i; + adev->jpeg.inst[i].ring_dec[j].pipe = j; + } + adev->jpeg.inst[i].aid_id = i / adev->jpeg.num_inst_per_aid; } DRM_DEV_INFO(adev->dev, "JPEG decode is enabled in VM mode\n"); } @@ -836,7 +904,13 @@ static const struct amdgpu_irq_src_funcs jpeg_v4_0_3_irq_funcs = { static void jpeg_v4_0_3_set_irq_funcs(struct amdgpu_device *adev) { - adev->jpeg.inst->irq.num_types = adev->jpeg.num_jpeg_rings; + int i; + + for (i = 0; i < adev->jpeg.num_jpeg_inst; ++i) { + if (adev->jpeg.harvest_config & (1 << i)) + continue; + adev->jpeg.inst->irq.num_types += adev->jpeg.num_jpeg_rings; + } adev->jpeg.inst->irq.funcs = &jpeg_v4_0_3_irq_funcs; } -- cgit From fd91d38b5275959a5b0804d4b4dbc5a4c0a8aac9 Mon Sep 17 00:00:00 2001 From: Lijo Lazar Date: Fri, 17 Feb 2023 19:41:06 +0530 Subject: drm/amdgpu: Use logical ids for VCN/JPEG v4.0.3 Address VCN/JPEG instances using logical ids. Whenever register access is required, get the physical instance using GET_INST. Signed-off-by: Lijo Lazar Acked-by: Leo Liu Tested-by: James Zhu Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.h | 29 +- .../gpu/drm/amd/amdgpu/aqua_vanjaram_reg_init.c | 14 +- drivers/gpu/drm/amd/amdgpu/jpeg_v4_0_3.c | 251 +++++++----- drivers/gpu/drm/amd/amdgpu/vcn_v4_0_3.c | 431 +++++++++++---------- 4 files changed, 408 insertions(+), 317 deletions(-) (limited to 'drivers/gpu/drm/amd/amdgpu/jpeg_v4_0_3.c') diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.h index 47463ef10fce..1eb9ccd1d83d 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.h @@ -141,18 +141,23 @@ RREG32_SOC15(VCN, inst_idx, mmUVD_DPG_LMA_DATA); \ }) -#define WREG32_SOC15_DPG_MODE(inst_idx, offset, value, mask_en, indirect) \ - do { \ - if (!indirect) { \ - WREG32_SOC15(VCN, inst_idx, mmUVD_DPG_LMA_DATA, value); \ - WREG32_SOC15(VCN, inst_idx, mmUVD_DPG_LMA_CTL, \ - (0x1 << UVD_DPG_LMA_CTL__READ_WRITE__SHIFT | \ - mask_en << UVD_DPG_LMA_CTL__MASK_EN__SHIFT | \ - offset << UVD_DPG_LMA_CTL__READ_WRITE_ADDR__SHIFT)); \ - } else { \ - *adev->vcn.inst[inst_idx].dpg_sram_curr_addr++ = offset; \ - *adev->vcn.inst[inst_idx].dpg_sram_curr_addr++ = value; \ - } \ +#define WREG32_SOC15_DPG_MODE(inst_idx, offset, value, mask_en, indirect) \ + do { \ + if (!indirect) { \ + WREG32_SOC15(VCN, GET_INST(VCN, inst_idx), \ + mmUVD_DPG_LMA_DATA, value); \ + WREG32_SOC15( \ + VCN, GET_INST(VCN, inst_idx), \ + mmUVD_DPG_LMA_CTL, \ + (0x1 << UVD_DPG_LMA_CTL__READ_WRITE__SHIFT | \ + mask_en << UVD_DPG_LMA_CTL__MASK_EN__SHIFT | \ + offset << UVD_DPG_LMA_CTL__READ_WRITE_ADDR__SHIFT)); \ + } else { \ + *adev->vcn.inst[inst_idx].dpg_sram_curr_addr++ = \ + offset; \ + *adev->vcn.inst[inst_idx].dpg_sram_curr_addr++ = \ + value; \ + } \ } while (0) #define AMDGPU_FW_SHARED_FLAG_0_UNIFIED_QUEUE (1 << 2) diff --git a/drivers/gpu/drm/amd/amdgpu/aqua_vanjaram_reg_init.c b/drivers/gpu/drm/amd/amdgpu/aqua_vanjaram_reg_init.c index 90fe77db9bee..51d3cb81e37a 100644 --- a/drivers/gpu/drm/amd/amdgpu/aqua_vanjaram_reg_init.c +++ b/drivers/gpu/drm/amd/amdgpu/aqua_vanjaram_reg_init.c @@ -70,6 +70,8 @@ static int8_t aqua_vanjaram_logical_to_dev_inst(struct amdgpu_device *adev, switch (block) { case GC_HWIP: case SDMA0_HWIP: + /* Both JPEG and VCN as JPEG is only alias of VCN */ + case VCN_HWIP: dev_inst = adev->ip_map.dev_inst[block][inst]; break; default: @@ -379,7 +381,7 @@ static int aqua_vanjaram_xcp_mgr_init(struct amdgpu_device *adev) int aqua_vanjaram_init_soc_config(struct amdgpu_device *adev) { u32 mask, inst_mask = adev->sdma.sdma_mask; - int ret, i, num_inst; + int ret, i; /* generally 1 AID supports 4 instances */ adev->sdma.num_inst_per_aid = 4; @@ -394,11 +396,15 @@ int aqua_vanjaram_init_soc_config(struct amdgpu_device *adev) adev->aid_mask |= (1 << i); } - num_inst = hweight32(adev->aid_mask); + /* Harvest config is not used for aqua vanjaram. VCN and JPEGs will be + * addressed based on logical instance ids. + */ + adev->vcn.harvest_config = 0; adev->vcn.num_inst_per_aid = 1; - adev->vcn.num_vcn_inst = adev->vcn.num_inst_per_aid * num_inst; + adev->vcn.num_vcn_inst = hweight32(adev->vcn.inst_mask); + adev->jpeg.harvest_config = 0; adev->jpeg.num_inst_per_aid = 1; - adev->jpeg.num_jpeg_inst = adev->jpeg.num_inst_per_aid * num_inst; + adev->jpeg.num_jpeg_inst = hweight32(adev->jpeg.inst_mask); ret = aqua_vanjaram_xcp_mgr_init(adev); if (ret) diff --git a/drivers/gpu/drm/amd/amdgpu/jpeg_v4_0_3.c b/drivers/gpu/drm/amd/amdgpu/jpeg_v4_0_3.c index aa14a6619e9a..c0e90e27f24b 100644 --- a/drivers/gpu/drm/amd/amdgpu/jpeg_v4_0_3.c +++ b/drivers/gpu/drm/amd/amdgpu/jpeg_v4_0_3.c @@ -82,7 +82,7 @@ static int jpeg_v4_0_3_sw_init(void *handle) { struct amdgpu_device *adev = (struct amdgpu_device *)handle; struct amdgpu_ring *ring; - int i, j, r; + int i, j, r, jpeg_inst; for (j = 0; j < adev->jpeg.num_jpeg_rings; ++j) { /* JPEG TRAP */ @@ -101,14 +101,15 @@ static int jpeg_v4_0_3_sw_init(void *handle) return r; for (i = 0; i < adev->jpeg.num_jpeg_inst; ++i) { - if (adev->jpeg.harvest_config & (1 << i)) - continue; + jpeg_inst = GET_INST(JPEG, i); + for (j = 0; j < adev->jpeg.num_jpeg_rings; ++j) { ring = &adev->jpeg.inst[i].ring_dec[j]; ring->use_doorbell = true; ring->vm_hub = AMDGPU_MMHUB0(adev->jpeg.inst[i].aid_id); ring->doorbell_index = - (adev->doorbell_index.vcn.vcn_ring0_1 << 1) + 1 + j + 9 * i; + (adev->doorbell_index.vcn.vcn_ring0_1 << 1) + + 1 + j + 9 * jpeg_inst; sprintf(ring->name, "jpeg_dec_%d.%d", i, j); r = amdgpu_ring_init(adev, ring, 512, &adev->jpeg.inst->irq, 0, AMDGPU_RING_PRIO_DEFAULT, NULL); @@ -118,8 +119,10 @@ static int jpeg_v4_0_3_sw_init(void *handle) adev->jpeg.internal.jpeg_pitch[j] = regUVD_JRBC0_UVD_JRBC_SCRATCH0_INTERNAL_OFFSET; adev->jpeg.inst[i].external.jpeg_pitch[j] = - SOC15_REG_OFFSET1(JPEG, i, regUVD_JRBC0_UVD_JRBC_SCRATCH0, - (j?(0x40 * j - 0xc80):0)); + SOC15_REG_OFFSET1( + JPEG, jpeg_inst, + regUVD_JRBC0_UVD_JRBC_SCRATCH0, + (j ? (0x40 * j - 0xc80) : 0)); } } @@ -157,25 +160,30 @@ static int jpeg_v4_0_3_hw_init(void *handle) { struct amdgpu_device *adev = (struct amdgpu_device *)handle; struct amdgpu_ring *ring; - int i, j, r; + int i, j, r, jpeg_inst; for (i = 0; i < adev->jpeg.num_jpeg_inst; ++i) { - if (adev->jpeg.harvest_config & (1 << i)) - continue; + jpeg_inst = GET_INST(JPEG, i); + ring = adev->jpeg.inst[i].ring_dec; if (ring->use_doorbell) - adev->nbio.funcs->vcn_doorbell_range(adev, ring->use_doorbell, - (adev->doorbell_index.vcn.vcn_ring0_1 << 1) + 9 * i, + adev->nbio.funcs->vcn_doorbell_range( + adev, ring->use_doorbell, + (adev->doorbell_index.vcn.vcn_ring0_1 << 1) + + 9 * jpeg_inst, adev->jpeg.inst[i].aid_id); for (j = 0; j < adev->jpeg.num_jpeg_rings; ++j) { ring = &adev->jpeg.inst[i].ring_dec[j]; if (ring->use_doorbell) - WREG32_SOC15_OFFSET(VCN, i, regVCN_JPEG_DB_CTRL, - (ring->pipe?(ring->pipe - 0x15):0), - ring->doorbell_index << VCN_JPEG_DB_CTRL__OFFSET__SHIFT | - VCN_JPEG_DB_CTRL__EN_MASK); + WREG32_SOC15_OFFSET( + VCN, GET_INST(VCN, i), + regVCN_JPEG_DB_CTRL, + (ring->pipe ? (ring->pipe - 0x15) : 0), + ring->doorbell_index + << VCN_JPEG_DB_CTRL__OFFSET__SHIFT | + VCN_JPEG_DB_CTRL__EN_MASK); r = amdgpu_ring_test_helper(ring); if (r) return r; @@ -250,10 +258,11 @@ static int jpeg_v4_0_3_resume(void *handle) static void jpeg_v4_0_3_disable_clock_gating(struct amdgpu_device *adev, int inst_idx) { + int i, jpeg_inst; uint32_t data; - int i; - data = RREG32_SOC15(JPEG, inst_idx, regJPEG_CGC_CTRL); + jpeg_inst = GET_INST(JPEG, inst_idx); + data = RREG32_SOC15(JPEG, jpeg_inst, regJPEG_CGC_CTRL); if (adev->cg_flags & AMD_CG_SUPPORT_JPEG_MGCG) { data |= 1 << JPEG_CGC_CTRL__DYN_CLOCK_MODE__SHIFT; data &= (~(JPEG_CGC_CTRL__JPEG0_DEC_MODE_MASK << 1)); @@ -263,21 +272,22 @@ static void jpeg_v4_0_3_disable_clock_gating(struct amdgpu_device *adev, int ins data |= 1 << JPEG_CGC_CTRL__CLK_GATE_DLY_TIMER__SHIFT; data |= 4 << JPEG_CGC_CTRL__CLK_OFF_DELAY__SHIFT; - WREG32_SOC15(JPEG, inst_idx, regJPEG_CGC_CTRL, data); + WREG32_SOC15(JPEG, jpeg_inst, regJPEG_CGC_CTRL, data); - data = RREG32_SOC15(JPEG, inst_idx, regJPEG_CGC_GATE); + data = RREG32_SOC15(JPEG, jpeg_inst, regJPEG_CGC_GATE); data &= ~(JPEG_CGC_GATE__JMCIF_MASK | JPEG_CGC_GATE__JRBBM_MASK); for (i = 0; i < adev->jpeg.num_jpeg_rings; ++i) data &= ~(JPEG_CGC_GATE__JPEG0_DEC_MASK << i); - WREG32_SOC15(JPEG, inst_idx, regJPEG_CGC_GATE, data); + WREG32_SOC15(JPEG, jpeg_inst, regJPEG_CGC_GATE, data); } static void jpeg_v4_0_3_enable_clock_gating(struct amdgpu_device *adev, int inst_idx) { + int i, jpeg_inst; uint32_t data; - int i; - data = RREG32_SOC15(JPEG, inst_idx, regJPEG_CGC_CTRL); + jpeg_inst = GET_INST(JPEG, inst_idx); + data = RREG32_SOC15(JPEG, jpeg_inst, regJPEG_CGC_CTRL); if (adev->cg_flags & AMD_CG_SUPPORT_JPEG_MGCG) { data |= 1 << JPEG_CGC_CTRL__DYN_CLOCK_MODE__SHIFT; data |= (JPEG_CGC_CTRL__JPEG0_DEC_MODE_MASK << 1); @@ -287,13 +297,13 @@ static void jpeg_v4_0_3_enable_clock_gating(struct amdgpu_device *adev, int inst data |= 1 << JPEG_CGC_CTRL__CLK_GATE_DLY_TIMER__SHIFT; data |= 4 << JPEG_CGC_CTRL__CLK_OFF_DELAY__SHIFT; - WREG32_SOC15(JPEG, inst_idx, regJPEG_CGC_CTRL, data); + WREG32_SOC15(JPEG, jpeg_inst, regJPEG_CGC_CTRL, data); - data = RREG32_SOC15(JPEG, inst_idx, regJPEG_CGC_GATE); + data = RREG32_SOC15(JPEG, jpeg_inst, regJPEG_CGC_GATE); data |= (JPEG_CGC_GATE__JMCIF_MASK | JPEG_CGC_GATE__JRBBM_MASK); for (i = 0; i < adev->jpeg.num_jpeg_rings; ++i) data |= (JPEG_CGC_GATE__JPEG0_DEC_MASK << i); - WREG32_SOC15(JPEG, inst_idx, regJPEG_CGC_GATE, data); + WREG32_SOC15(JPEG, jpeg_inst, regJPEG_CGC_GATE, data); } /** @@ -306,34 +316,36 @@ static void jpeg_v4_0_3_enable_clock_gating(struct amdgpu_device *adev, int inst static int jpeg_v4_0_3_start(struct amdgpu_device *adev) { struct amdgpu_ring *ring; - int i, j; + int i, j, jpeg_inst; for (i = 0; i < adev->jpeg.num_jpeg_inst; ++i) { - if (adev->jpeg.harvest_config & (1 << i)) - continue; - WREG32_SOC15(JPEG, i, regUVD_PGFSM_CONFIG, - 1 << UVD_PGFSM_CONFIG__UVDJ_PWR_CONFIG__SHIFT); - SOC15_WAIT_ON_RREG(JPEG, i, regUVD_PGFSM_STATUS, - UVD_PGFSM_STATUS__UVDJ_PWR_ON << - UVD_PGFSM_STATUS__UVDJ_PWR_STATUS__SHIFT, + jpeg_inst = GET_INST(JPEG, i); + + WREG32_SOC15(JPEG, jpeg_inst, regUVD_PGFSM_CONFIG, + 1 << UVD_PGFSM_CONFIG__UVDJ_PWR_CONFIG__SHIFT); + SOC15_WAIT_ON_RREG( + JPEG, jpeg_inst, regUVD_PGFSM_STATUS, + UVD_PGFSM_STATUS__UVDJ_PWR_ON + << UVD_PGFSM_STATUS__UVDJ_PWR_STATUS__SHIFT, UVD_PGFSM_STATUS__UVDJ_PWR_STATUS_MASK); /* disable anti hang mechanism */ - WREG32_P(SOC15_REG_OFFSET(JPEG, i, regUVD_JPEG_POWER_STATUS), 0, - ~UVD_JPEG_POWER_STATUS__JPEG_POWER_STATUS_MASK); + WREG32_P(SOC15_REG_OFFSET(JPEG, jpeg_inst, + regUVD_JPEG_POWER_STATUS), + 0, ~UVD_JPEG_POWER_STATUS__JPEG_POWER_STATUS_MASK); /* JPEG disable CGC */ jpeg_v4_0_3_disable_clock_gating(adev, i); /* MJPEG global tiling registers */ - WREG32_SOC15(JPEG, i, regJPEG_DEC_GFX8_ADDR_CONFIG, - adev->gfx.config.gb_addr_config); - WREG32_SOC15(JPEG, i, regJPEG_DEC_GFX10_ADDR_CONFIG, - adev->gfx.config.gb_addr_config); + WREG32_SOC15(JPEG, jpeg_inst, regJPEG_DEC_GFX8_ADDR_CONFIG, + adev->gfx.config.gb_addr_config); + WREG32_SOC15(JPEG, jpeg_inst, regJPEG_DEC_GFX10_ADDR_CONFIG, + adev->gfx.config.gb_addr_config); /* enable JMI channel */ - WREG32_P(SOC15_REG_OFFSET(JPEG, i, regUVD_JMI_CNTL), 0, - ~UVD_JMI_CNTL__SOFT_RESET_MASK); + WREG32_P(SOC15_REG_OFFSET(JPEG, jpeg_inst, regUVD_JMI_CNTL), 0, + ~UVD_JMI_CNTL__SOFT_RESET_MASK); for (j = 0; j < adev->jpeg.num_jpeg_rings; ++j) { unsigned int reg_offset = (j?(0x40 * j - 0xc80):0); @@ -341,25 +353,40 @@ static int jpeg_v4_0_3_start(struct amdgpu_device *adev) ring = &adev->jpeg.inst[i].ring_dec[j]; /* enable System Interrupt for JRBC */ - WREG32_P(SOC15_REG_OFFSET(JPEG, i, regJPEG_SYS_INT_EN), - JPEG_SYS_INT_EN__DJRBC0_MASK << j, - ~(JPEG_SYS_INT_EN__DJRBC0_MASK << j)); - - WREG32_SOC15_OFFSET(JPEG, i, - regUVD_JMI0_UVD_LMI_JRBC_RB_VMID, reg_offset, 0); - WREG32_SOC15_OFFSET(JPEG, i, regUVD_JRBC0_UVD_JRBC_RB_CNTL, reg_offset, - (0x00000001L | 0x00000002L)); - WREG32_SOC15_OFFSET(JPEG, i, regUVD_JMI0_UVD_LMI_JRBC_RB_64BIT_BAR_LOW, + WREG32_P(SOC15_REG_OFFSET(JPEG, jpeg_inst, + regJPEG_SYS_INT_EN), + JPEG_SYS_INT_EN__DJRBC0_MASK << j, + ~(JPEG_SYS_INT_EN__DJRBC0_MASK << j)); + + WREG32_SOC15_OFFSET(JPEG, jpeg_inst, + regUVD_JMI0_UVD_LMI_JRBC_RB_VMID, + reg_offset, 0); + WREG32_SOC15_OFFSET(JPEG, jpeg_inst, + regUVD_JRBC0_UVD_JRBC_RB_CNTL, + reg_offset, + (0x00000001L | 0x00000002L)); + WREG32_SOC15_OFFSET( + JPEG, jpeg_inst, + regUVD_JMI0_UVD_LMI_JRBC_RB_64BIT_BAR_LOW, reg_offset, lower_32_bits(ring->gpu_addr)); - WREG32_SOC15_OFFSET(JPEG, i, regUVD_JMI0_UVD_LMI_JRBC_RB_64BIT_BAR_HIGH, + WREG32_SOC15_OFFSET( + JPEG, jpeg_inst, + regUVD_JMI0_UVD_LMI_JRBC_RB_64BIT_BAR_HIGH, reg_offset, upper_32_bits(ring->gpu_addr)); - WREG32_SOC15_OFFSET(JPEG, i, regUVD_JRBC0_UVD_JRBC_RB_RPTR, reg_offset, 0); - WREG32_SOC15_OFFSET(JPEG, i, regUVD_JRBC0_UVD_JRBC_RB_WPTR, reg_offset, 0); - WREG32_SOC15_OFFSET(JPEG, i, regUVD_JRBC0_UVD_JRBC_RB_CNTL, reg_offset, - 0x00000002L); - WREG32_SOC15_OFFSET(JPEG, i, regUVD_JRBC0_UVD_JRBC_RB_SIZE, reg_offset, - ring->ring_size / 4); - ring->wptr = RREG32_SOC15_OFFSET(JPEG, i, regUVD_JRBC0_UVD_JRBC_RB_WPTR, + WREG32_SOC15_OFFSET(JPEG, jpeg_inst, + regUVD_JRBC0_UVD_JRBC_RB_RPTR, + reg_offset, 0); + WREG32_SOC15_OFFSET(JPEG, jpeg_inst, + regUVD_JRBC0_UVD_JRBC_RB_WPTR, + reg_offset, 0); + WREG32_SOC15_OFFSET(JPEG, jpeg_inst, + regUVD_JRBC0_UVD_JRBC_RB_CNTL, + reg_offset, 0x00000002L); + WREG32_SOC15_OFFSET(JPEG, jpeg_inst, + regUVD_JRBC0_UVD_JRBC_RB_SIZE, + reg_offset, ring->ring_size / 4); + ring->wptr = RREG32_SOC15_OFFSET( + JPEG, jpeg_inst, regUVD_JRBC0_UVD_JRBC_RB_WPTR, reg_offset); } } @@ -376,29 +403,29 @@ static int jpeg_v4_0_3_start(struct amdgpu_device *adev) */ static int jpeg_v4_0_3_stop(struct amdgpu_device *adev) { - int i; + int i, jpeg_inst; for (i = 0; i < adev->jpeg.num_jpeg_inst; ++i) { - if (adev->jpeg.harvest_config & (1 << i)) - continue; - + jpeg_inst = GET_INST(JPEG, i); /* reset JMI */ - WREG32_P(SOC15_REG_OFFSET(JPEG, i, regUVD_JMI_CNTL), - UVD_JMI_CNTL__SOFT_RESET_MASK, - ~UVD_JMI_CNTL__SOFT_RESET_MASK); + WREG32_P(SOC15_REG_OFFSET(JPEG, jpeg_inst, regUVD_JMI_CNTL), + UVD_JMI_CNTL__SOFT_RESET_MASK, + ~UVD_JMI_CNTL__SOFT_RESET_MASK); jpeg_v4_0_3_enable_clock_gating(adev, i); /* enable anti hang mechanism */ - WREG32_P(SOC15_REG_OFFSET(JPEG, i, regUVD_JPEG_POWER_STATUS), - UVD_JPEG_POWER_STATUS__JPEG_POWER_STATUS_MASK, - ~UVD_JPEG_POWER_STATUS__JPEG_POWER_STATUS_MASK); - - WREG32_SOC15(JPEG, i, regUVD_PGFSM_CONFIG, - 2 << UVD_PGFSM_CONFIG__UVDJ_PWR_CONFIG__SHIFT); - SOC15_WAIT_ON_RREG(JPEG, i, regUVD_PGFSM_STATUS, - UVD_PGFSM_STATUS__UVDJ_PWR_OFF << - UVD_PGFSM_STATUS__UVDJ_PWR_STATUS__SHIFT, + WREG32_P(SOC15_REG_OFFSET(JPEG, jpeg_inst, + regUVD_JPEG_POWER_STATUS), + UVD_JPEG_POWER_STATUS__JPEG_POWER_STATUS_MASK, + ~UVD_JPEG_POWER_STATUS__JPEG_POWER_STATUS_MASK); + + WREG32_SOC15(JPEG, jpeg_inst, regUVD_PGFSM_CONFIG, + 2 << UVD_PGFSM_CONFIG__UVDJ_PWR_CONFIG__SHIFT); + SOC15_WAIT_ON_RREG( + JPEG, jpeg_inst, regUVD_PGFSM_STATUS, + UVD_PGFSM_STATUS__UVDJ_PWR_OFF + << UVD_PGFSM_STATUS__UVDJ_PWR_STATUS__SHIFT, UVD_PGFSM_STATUS__UVDJ_PWR_STATUS_MASK); } @@ -416,8 +443,9 @@ static uint64_t jpeg_v4_0_3_dec_ring_get_rptr(struct amdgpu_ring *ring) { struct amdgpu_device *adev = ring->adev; - return RREG32_SOC15_OFFSET(JPEG, ring->me, regUVD_JRBC0_UVD_JRBC_RB_RPTR, - ring->pipe?(0x40 * ring->pipe - 0xc80):0); + return RREG32_SOC15_OFFSET( + JPEG, GET_INST(JPEG, ring->me), regUVD_JRBC0_UVD_JRBC_RB_RPTR, + ring->pipe ? (0x40 * ring->pipe - 0xc80) : 0); } /** @@ -434,8 +462,10 @@ static uint64_t jpeg_v4_0_3_dec_ring_get_wptr(struct amdgpu_ring *ring) if (ring->use_doorbell) return adev->wb.wb[ring->wptr_offs]; else - return RREG32_SOC15_OFFSET(JPEG, ring->me, regUVD_JRBC0_UVD_JRBC_RB_WPTR, - ring->pipe?(0x40 * ring->pipe - 0xc80):0); + return RREG32_SOC15_OFFSET( + JPEG, GET_INST(JPEG, ring->me), + regUVD_JRBC0_UVD_JRBC_RB_WPTR, + ring->pipe ? (0x40 * ring->pipe - 0xc80) : 0); } /** @@ -453,8 +483,11 @@ static void jpeg_v4_0_3_dec_ring_set_wptr(struct amdgpu_ring *ring) adev->wb.wb[ring->wptr_offs] = lower_32_bits(ring->wptr); WDOORBELL32(ring->doorbell_index, lower_32_bits(ring->wptr)); } else { - WREG32_SOC15_OFFSET(JPEG, ring->me, regUVD_JRBC0_UVD_JRBC_RB_WPTR, - (ring->pipe?(0x40 * ring->pipe - 0xc80):0), lower_32_bits(ring->wptr)); + WREG32_SOC15_OFFSET(JPEG, GET_INST(JPEG, ring->me), + regUVD_JRBC0_UVD_JRBC_RB_WPTR, + (ring->pipe ? (0x40 * ring->pipe - 0xc80) : + 0), + lower_32_bits(ring->wptr)); } } @@ -703,15 +736,15 @@ static bool jpeg_v4_0_3_is_idle(void *handle) int i, j; for (i = 0; i < adev->jpeg.num_jpeg_inst; ++i) { - if (adev->jpeg.harvest_config & (1 << i)) - continue; for (j = 0; j < adev->jpeg.num_jpeg_rings; ++j) { unsigned int reg_offset = (j?(0x40 * j - 0xc80):0); - ret &= ((RREG32_SOC15_OFFSET(JPEG, i, - regUVD_JRBC0_UVD_JRBC_STATUS, reg_offset) & - UVD_JRBC0_UVD_JRBC_STATUS__RB_JOB_DONE_MASK) == - UVD_JRBC0_UVD_JRBC_STATUS__RB_JOB_DONE_MASK); + ret &= ((RREG32_SOC15_OFFSET( + JPEG, GET_INST(JPEG, i), + regUVD_JRBC0_UVD_JRBC_STATUS, + reg_offset) & + UVD_JRBC0_UVD_JRBC_STATUS__RB_JOB_DONE_MASK) == + UVD_JRBC0_UVD_JRBC_STATUS__RB_JOB_DONE_MASK); } } @@ -725,12 +758,11 @@ static int jpeg_v4_0_3_wait_for_idle(void *handle) int i, j; for (i = 0; i < adev->jpeg.num_jpeg_inst; ++i) { - if (adev->jpeg.harvest_config & (1 << i)) - continue; for (j = 0; j < adev->jpeg.num_jpeg_rings; ++j) { unsigned int reg_offset = (j?(0x40 * j - 0xc80):0); - ret &= SOC15_WAIT_ON_RREG_OFFSET(JPEG, i, + ret &= SOC15_WAIT_ON_RREG_OFFSET( + JPEG, GET_INST(JPEG, i), regUVD_JRBC0_UVD_JRBC_STATUS, reg_offset, UVD_JRBC0_UVD_JRBC_STATUS__RB_JOB_DONE_MASK, UVD_JRBC0_UVD_JRBC_STATUS__RB_JOB_DONE_MASK); @@ -747,8 +779,6 @@ static int jpeg_v4_0_3_set_clockgating_state(void *handle, int i; for (i = 0; i < adev->jpeg.num_jpeg_inst; ++i) { - if (adev->jpeg.harvest_config & (1 << i)) - continue; if (enable) { if (!jpeg_v4_0_3_is_idle(handle)) return -EBUSY; @@ -792,35 +822,46 @@ static int jpeg_v4_0_3_process_interrupt(struct amdgpu_device *adev, struct amdgpu_irq_src *source, struct amdgpu_iv_entry *entry) { - uint32_t i; + uint32_t i, inst; i = node_id_to_phys_map[entry->node_id]; DRM_DEV_DEBUG(adev->dev, "IH: JPEG TRAP\n"); + for (inst = 0; inst < adev->jpeg.num_jpeg_inst; ++inst) + if (adev->jpeg.inst[inst].aid_id == i) + break; + + if (inst >= adev->jpeg.num_jpeg_inst) { + dev_WARN_ONCE(adev->dev, 1, + "Interrupt received for unknown JPEG instance %d", + entry->node_id); + return 0; + } + switch (entry->src_id) { case VCN_4_0__SRCID__JPEG_DECODE: - amdgpu_fence_process(&adev->jpeg.inst[i].ring_dec[0]); + amdgpu_fence_process(&adev->jpeg.inst[inst].ring_dec[0]); break; case VCN_4_0__SRCID__JPEG1_DECODE: - amdgpu_fence_process(&adev->jpeg.inst[i].ring_dec[1]); + amdgpu_fence_process(&adev->jpeg.inst[inst].ring_dec[1]); break; case VCN_4_0__SRCID__JPEG2_DECODE: - amdgpu_fence_process(&adev->jpeg.inst[i].ring_dec[2]); + amdgpu_fence_process(&adev->jpeg.inst[inst].ring_dec[2]); break; case VCN_4_0__SRCID__JPEG3_DECODE: - amdgpu_fence_process(&adev->jpeg.inst[i].ring_dec[3]); + amdgpu_fence_process(&adev->jpeg.inst[inst].ring_dec[3]); break; case VCN_4_0__SRCID__JPEG4_DECODE: - amdgpu_fence_process(&adev->jpeg.inst[i].ring_dec[4]); + amdgpu_fence_process(&adev->jpeg.inst[inst].ring_dec[4]); break; case VCN_4_0__SRCID__JPEG5_DECODE: - amdgpu_fence_process(&adev->jpeg.inst[i].ring_dec[5]); + amdgpu_fence_process(&adev->jpeg.inst[inst].ring_dec[5]); break; case VCN_4_0__SRCID__JPEG6_DECODE: - amdgpu_fence_process(&adev->jpeg.inst[i].ring_dec[6]); + amdgpu_fence_process(&adev->jpeg.inst[inst].ring_dec[6]); break; case VCN_4_0__SRCID__JPEG7_DECODE: - amdgpu_fence_process(&adev->jpeg.inst[i].ring_dec[7]); + amdgpu_fence_process(&adev->jpeg.inst[inst].ring_dec[7]); break; default: DRM_DEV_ERROR(adev->dev, "Unhandled interrupt: %d %d\n", @@ -882,17 +923,17 @@ static const struct amdgpu_ring_funcs jpeg_v4_0_3_dec_ring_vm_funcs = { static void jpeg_v4_0_3_set_dec_ring_funcs(struct amdgpu_device *adev) { - int i, j; + int i, j, jpeg_inst; for (i = 0; i < adev->jpeg.num_jpeg_inst; ++i) { - if (adev->jpeg.harvest_config & (1 << i)) - continue; for (j = 0; j < adev->jpeg.num_jpeg_rings; ++j) { adev->jpeg.inst[i].ring_dec[j].funcs = &jpeg_v4_0_3_dec_ring_vm_funcs; adev->jpeg.inst[i].ring_dec[j].me = i; adev->jpeg.inst[i].ring_dec[j].pipe = j; } - adev->jpeg.inst[i].aid_id = i / adev->jpeg.num_inst_per_aid; + jpeg_inst = GET_INST(JPEG, i); + adev->jpeg.inst[i].aid_id = + jpeg_inst / adev->jpeg.num_inst_per_aid; } DRM_DEV_INFO(adev->dev, "JPEG decode is enabled in VM mode\n"); } @@ -907,8 +948,6 @@ static void jpeg_v4_0_3_set_irq_funcs(struct amdgpu_device *adev) int i; for (i = 0; i < adev->jpeg.num_jpeg_inst; ++i) { - if (adev->jpeg.harvest_config & (1 << i)) - continue; adev->jpeg.inst->irq.num_types += adev->jpeg.num_jpeg_rings; } adev->jpeg.inst->irq.funcs = &jpeg_v4_0_3_irq_funcs; diff --git a/drivers/gpu/drm/amd/amdgpu/vcn_v4_0_3.c b/drivers/gpu/drm/amd/amdgpu/vcn_v4_0_3.c index 308dfe80a87c..49b07843efd1 100644 --- a/drivers/gpu/drm/amd/amdgpu/vcn_v4_0_3.c +++ b/drivers/gpu/drm/amd/amdgpu/vcn_v4_0_3.c @@ -83,7 +83,7 @@ static int vcn_v4_0_3_sw_init(void *handle) { struct amdgpu_device *adev = (struct amdgpu_device *)handle; struct amdgpu_ring *ring; - int i, r; + int i, r, vcn_inst; r = amdgpu_vcn_sw_init(adev); if (r) @@ -104,12 +104,13 @@ static int vcn_v4_0_3_sw_init(void *handle) for (i = 0; i < adev->vcn.num_vcn_inst; i++) { volatile struct amdgpu_vcn4_fw_shared *fw_shared; - if (adev->vcn.harvest_config & (1 << i)) - continue; + vcn_inst = GET_INST(VCN, i); ring = &adev->vcn.inst[i].ring_enc[0]; ring->use_doorbell = true; - ring->doorbell_index = (adev->doorbell_index.vcn.vcn_ring0_1 << 1) + 9 * i; + ring->doorbell_index = + (adev->doorbell_index.vcn.vcn_ring0_1 << 1) + + 9 * vcn_inst; ring->vm_hub = AMDGPU_MMHUB0(adev->vcn.inst[i].aid_id); sprintf(ring->name, "vcn_unified_%d", i); r = amdgpu_ring_init(adev, ring, 512, &adev->vcn.inst->irq, 0, @@ -148,8 +149,6 @@ static int vcn_v4_0_3_sw_fini(void *handle) for (i = 0; i < adev->vcn.num_vcn_inst; i++) { volatile struct amdgpu_vcn4_fw_shared *fw_shared; - if (adev->vcn.harvest_config & (1 << i)) - continue; fw_shared = adev->vcn.inst[i].fw_shared.cpu_addr; fw_shared->present_flag_0 = 0; fw_shared->sq.is_enabled = cpu_to_le32(false); @@ -177,21 +176,25 @@ static int vcn_v4_0_3_hw_init(void *handle) { struct amdgpu_device *adev = (struct amdgpu_device *)handle; struct amdgpu_ring *ring; - int i, r; + int i, r, vcn_inst; for (i = 0; i < adev->vcn.num_vcn_inst; ++i) { - if (adev->vcn.harvest_config & (1 << i)) - continue; + vcn_inst = GET_INST(VCN, i); ring = &adev->vcn.inst[i].ring_enc[0]; if (ring->use_doorbell) { - adev->nbio.funcs->vcn_doorbell_range(adev, ring->use_doorbell, - (adev->doorbell_index.vcn.vcn_ring0_1 << 1) + 9 * i, - adev->vcn.inst[i].aid_id); - - WREG32_SOC15(VCN, ring->me, regVCN_RB1_DB_CTRL, - ring->doorbell_index << VCN_RB1_DB_CTRL__OFFSET__SHIFT | - VCN_RB1_DB_CTRL__EN_MASK); + adev->nbio.funcs->vcn_doorbell_range( + adev, ring->use_doorbell, + (adev->doorbell_index.vcn.vcn_ring0_1 << 1) + + 9 * vcn_inst, + adev->vcn.inst[i].aid_id); + + WREG32_SOC15( + VCN, GET_INST(VCN, ring->me), + regVCN_RB1_DB_CTRL, + ring->doorbell_index + << VCN_RB1_DB_CTRL__OFFSET__SHIFT | + VCN_RB1_DB_CTRL__EN_MASK); } r = amdgpu_ring_test_helper(ring); @@ -278,54 +281,67 @@ static int vcn_v4_0_3_resume(void *handle) */ static void vcn_v4_0_3_mc_resume(struct amdgpu_device *adev, int inst_idx) { - uint32_t offset, size; + uint32_t offset, size, vcn_inst; const struct common_firmware_header *hdr; hdr = (const struct common_firmware_header *)adev->vcn.fw->data; size = AMDGPU_GPU_PAGE_ALIGN(le32_to_cpu(hdr->ucode_size_bytes) + 8); + vcn_inst = GET_INST(VCN, inst_idx); /* cache window 0: fw */ if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) { - WREG32_SOC15(VCN, inst_idx, regUVD_LMI_VCPU_CACHE_64BIT_BAR_LOW, - (adev->firmware.ucode[AMDGPU_UCODE_ID_VCN + inst_idx].tmr_mc_addr_lo)); - WREG32_SOC15(VCN, inst_idx, regUVD_LMI_VCPU_CACHE_64BIT_BAR_HIGH, - (adev->firmware.ucode[AMDGPU_UCODE_ID_VCN + inst_idx].tmr_mc_addr_hi)); - WREG32_SOC15(VCN, inst_idx, regUVD_VCPU_CACHE_OFFSET0, 0); + WREG32_SOC15( + VCN, vcn_inst, regUVD_LMI_VCPU_CACHE_64BIT_BAR_LOW, + (adev->firmware.ucode[AMDGPU_UCODE_ID_VCN + inst_idx] + .tmr_mc_addr_lo)); + WREG32_SOC15( + VCN, vcn_inst, regUVD_LMI_VCPU_CACHE_64BIT_BAR_HIGH, + (adev->firmware.ucode[AMDGPU_UCODE_ID_VCN + inst_idx] + .tmr_mc_addr_hi)); + WREG32_SOC15(VCN, vcn_inst, regUVD_VCPU_CACHE_OFFSET0, 0); offset = 0; } else { - WREG32_SOC15(VCN, inst_idx, regUVD_LMI_VCPU_CACHE_64BIT_BAR_LOW, - lower_32_bits(adev->vcn.inst[inst_idx].gpu_addr)); - WREG32_SOC15(VCN, inst_idx, regUVD_LMI_VCPU_CACHE_64BIT_BAR_HIGH, - upper_32_bits(adev->vcn.inst[inst_idx].gpu_addr)); + WREG32_SOC15(VCN, vcn_inst, regUVD_LMI_VCPU_CACHE_64BIT_BAR_LOW, + lower_32_bits(adev->vcn.inst[inst_idx].gpu_addr)); + WREG32_SOC15(VCN, vcn_inst, + regUVD_LMI_VCPU_CACHE_64BIT_BAR_HIGH, + upper_32_bits(adev->vcn.inst[inst_idx].gpu_addr)); offset = size; - WREG32_SOC15(VCN, inst_idx, regUVD_VCPU_CACHE_OFFSET0, - AMDGPU_UVD_FIRMWARE_OFFSET >> 3); + WREG32_SOC15(VCN, vcn_inst, regUVD_VCPU_CACHE_OFFSET0, + AMDGPU_UVD_FIRMWARE_OFFSET >> 3); } - WREG32_SOC15(VCN, inst_idx, regUVD_VCPU_CACHE_SIZE0, size); + WREG32_SOC15(VCN, vcn_inst, regUVD_VCPU_CACHE_SIZE0, size); /* cache window 1: stack */ - WREG32_SOC15(VCN, inst_idx, regUVD_LMI_VCPU_CACHE1_64BIT_BAR_LOW, - lower_32_bits(adev->vcn.inst[inst_idx].gpu_addr + offset)); - WREG32_SOC15(VCN, inst_idx, regUVD_LMI_VCPU_CACHE1_64BIT_BAR_HIGH, - upper_32_bits(adev->vcn.inst[inst_idx].gpu_addr + offset)); - WREG32_SOC15(VCN, inst_idx, regUVD_VCPU_CACHE_OFFSET1, 0); - WREG32_SOC15(VCN, inst_idx, regUVD_VCPU_CACHE_SIZE1, AMDGPU_VCN_STACK_SIZE); + WREG32_SOC15(VCN, vcn_inst, regUVD_LMI_VCPU_CACHE1_64BIT_BAR_LOW, + lower_32_bits(adev->vcn.inst[inst_idx].gpu_addr + offset)); + WREG32_SOC15(VCN, vcn_inst, regUVD_LMI_VCPU_CACHE1_64BIT_BAR_HIGH, + upper_32_bits(adev->vcn.inst[inst_idx].gpu_addr + offset)); + WREG32_SOC15(VCN, vcn_inst, regUVD_VCPU_CACHE_OFFSET1, 0); + WREG32_SOC15(VCN, vcn_inst, regUVD_VCPU_CACHE_SIZE1, + AMDGPU_VCN_STACK_SIZE); /* cache window 2: context */ - WREG32_SOC15(VCN, inst_idx, regUVD_LMI_VCPU_CACHE2_64BIT_BAR_LOW, - lower_32_bits(adev->vcn.inst[inst_idx].gpu_addr + offset + AMDGPU_VCN_STACK_SIZE)); - WREG32_SOC15(VCN, inst_idx, regUVD_LMI_VCPU_CACHE2_64BIT_BAR_HIGH, - upper_32_bits(adev->vcn.inst[inst_idx].gpu_addr + offset + AMDGPU_VCN_STACK_SIZE)); - WREG32_SOC15(VCN, inst_idx, regUVD_VCPU_CACHE_OFFSET2, 0); - WREG32_SOC15(VCN, inst_idx, regUVD_VCPU_CACHE_SIZE2, AMDGPU_VCN_CONTEXT_SIZE); + WREG32_SOC15(VCN, vcn_inst, regUVD_LMI_VCPU_CACHE2_64BIT_BAR_LOW, + lower_32_bits(adev->vcn.inst[inst_idx].gpu_addr + offset + + AMDGPU_VCN_STACK_SIZE)); + WREG32_SOC15(VCN, vcn_inst, regUVD_LMI_VCPU_CACHE2_64BIT_BAR_HIGH, + upper_32_bits(adev->vcn.inst[inst_idx].gpu_addr + offset + + AMDGPU_VCN_STACK_SIZE)); + WREG32_SOC15(VCN, vcn_inst, regUVD_VCPU_CACHE_OFFSET2, 0); + WREG32_SOC15(VCN, vcn_inst, regUVD_VCPU_CACHE_SIZE2, + AMDGPU_VCN_CONTEXT_SIZE); /* non-cache window */ - WREG32_SOC15(VCN, inst_idx, regUVD_LMI_VCPU_NC0_64BIT_BAR_LOW, + WREG32_SOC15( + VCN, vcn_inst, regUVD_LMI_VCPU_NC0_64BIT_BAR_LOW, lower_32_bits(adev->vcn.inst[inst_idx].fw_shared.gpu_addr)); - WREG32_SOC15(VCN, inst_idx, regUVD_LMI_VCPU_NC0_64BIT_BAR_HIGH, + WREG32_SOC15( + VCN, vcn_inst, regUVD_LMI_VCPU_NC0_64BIT_BAR_HIGH, upper_32_bits(adev->vcn.inst[inst_idx].fw_shared.gpu_addr)); - WREG32_SOC15(VCN, inst_idx, regUVD_VCPU_NONCACHE_OFFSET0, 0); - WREG32_SOC15(VCN, inst_idx, regUVD_VCPU_NONCACHE_SIZE0, + WREG32_SOC15(VCN, vcn_inst, regUVD_VCPU_NONCACHE_OFFSET0, 0); + WREG32_SOC15( + VCN, vcn_inst, regUVD_VCPU_NONCACHE_SIZE0, AMDGPU_GPU_PAGE_ALIGN(sizeof(struct amdgpu_vcn4_fw_shared))); } @@ -454,18 +470,21 @@ static void vcn_v4_0_3_mc_resume_dpg_mode(struct amdgpu_device *adev, int inst_i static void vcn_v4_0_3_disable_clock_gating(struct amdgpu_device *adev, int inst_idx) { uint32_t data; + int vcn_inst; if (adev->cg_flags & AMD_CG_SUPPORT_VCN_MGCG) return; + vcn_inst = GET_INST(VCN, inst_idx); + /* VCN disable CGC */ - data = RREG32_SOC15(VCN, inst_idx, regUVD_CGC_CTRL); + data = RREG32_SOC15(VCN, vcn_inst, regUVD_CGC_CTRL); data &= ~UVD_CGC_CTRL__DYN_CLOCK_MODE_MASK; data |= 1 << UVD_CGC_CTRL__CLK_GATE_DLY_TIMER__SHIFT; data |= 4 << UVD_CGC_CTRL__CLK_OFF_DELAY__SHIFT; - WREG32_SOC15(VCN, inst_idx, regUVD_CGC_CTRL, data); + WREG32_SOC15(VCN, vcn_inst, regUVD_CGC_CTRL, data); - data = RREG32_SOC15(VCN, inst_idx, regUVD_CGC_GATE); + data = RREG32_SOC15(VCN, vcn_inst, regUVD_CGC_GATE); data &= ~(UVD_CGC_GATE__SYS_MASK | UVD_CGC_GATE__MPEG2_MASK | UVD_CGC_GATE__REGS_MASK @@ -479,10 +498,10 @@ static void vcn_v4_0_3_disable_clock_gating(struct amdgpu_device *adev, int inst | UVD_CGC_GATE__VCPU_MASK | UVD_CGC_GATE__MMSCH_MASK); - WREG32_SOC15(VCN, inst_idx, regUVD_CGC_GATE, data); - SOC15_WAIT_ON_RREG(VCN, inst_idx, regUVD_CGC_GATE, 0, 0xFFFFFFFF); + WREG32_SOC15(VCN, vcn_inst, regUVD_CGC_GATE, data); + SOC15_WAIT_ON_RREG(VCN, vcn_inst, regUVD_CGC_GATE, 0, 0xFFFFFFFF); - data = RREG32_SOC15(VCN, inst_idx, regUVD_CGC_CTRL); + data = RREG32_SOC15(VCN, vcn_inst, regUVD_CGC_CTRL); data &= ~(UVD_CGC_CTRL__SYS_MODE_MASK | UVD_CGC_CTRL__MPEG2_MODE_MASK | UVD_CGC_CTRL__REGS_MODE_MASK @@ -495,9 +514,9 @@ static void vcn_v4_0_3_disable_clock_gating(struct amdgpu_device *adev, int inst | UVD_CGC_CTRL__WCB_MODE_MASK | UVD_CGC_CTRL__VCPU_MODE_MASK | UVD_CGC_CTRL__MMSCH_MODE_MASK); - WREG32_SOC15(VCN, inst_idx, regUVD_CGC_CTRL, data); + WREG32_SOC15(VCN, vcn_inst, regUVD_CGC_CTRL, data); - data = RREG32_SOC15(VCN, inst_idx, regUVD_SUVD_CGC_GATE); + data = RREG32_SOC15(VCN, vcn_inst, regUVD_SUVD_CGC_GATE); data |= (UVD_SUVD_CGC_GATE__SRE_MASK | UVD_SUVD_CGC_GATE__SIT_MASK | UVD_SUVD_CGC_GATE__SMP_MASK @@ -519,9 +538,9 @@ static void vcn_v4_0_3_disable_clock_gating(struct amdgpu_device *adev, int inst | UVD_SUVD_CGC_GATE__SIT_VP9_DEC_MASK | UVD_SUVD_CGC_GATE__SDB_VP9_MASK | UVD_SUVD_CGC_GATE__IME_HEVC_MASK); - WREG32_SOC15(VCN, inst_idx, regUVD_SUVD_CGC_GATE, data); + WREG32_SOC15(VCN, vcn_inst, regUVD_SUVD_CGC_GATE, data); - data = RREG32_SOC15(VCN, inst_idx, regUVD_SUVD_CGC_CTRL); + data = RREG32_SOC15(VCN, vcn_inst, regUVD_SUVD_CGC_CTRL); data &= ~(UVD_SUVD_CGC_CTRL__SRE_MODE_MASK | UVD_SUVD_CGC_CTRL__SIT_MODE_MASK | UVD_SUVD_CGC_CTRL__SMP_MODE_MASK @@ -530,7 +549,7 @@ static void vcn_v4_0_3_disable_clock_gating(struct amdgpu_device *adev, int inst | UVD_SUVD_CGC_CTRL__ENT_MODE_MASK | UVD_SUVD_CGC_CTRL__IME_MODE_MASK | UVD_SUVD_CGC_CTRL__SITE_MODE_MASK); - WREG32_SOC15(VCN, inst_idx, regUVD_SUVD_CGC_CTRL, data); + WREG32_SOC15(VCN, vcn_inst, regUVD_SUVD_CGC_CTRL, data); } /** @@ -595,18 +614,21 @@ static void vcn_v4_0_3_disable_clock_gating_dpg_mode(struct amdgpu_device *adev, static void vcn_v4_0_3_enable_clock_gating(struct amdgpu_device *adev, int inst_idx) { uint32_t data; + int vcn_inst; if (adev->cg_flags & AMD_CG_SUPPORT_VCN_MGCG) return; + vcn_inst = GET_INST(VCN, inst_idx); + /* enable VCN CGC */ - data = RREG32_SOC15(VCN, inst_idx, regUVD_CGC_CTRL); + data = RREG32_SOC15(VCN, vcn_inst, regUVD_CGC_CTRL); data |= 0 << UVD_CGC_CTRL__DYN_CLOCK_MODE__SHIFT; data |= 1 << UVD_CGC_CTRL__CLK_GATE_DLY_TIMER__SHIFT; data |= 4 << UVD_CGC_CTRL__CLK_OFF_DELAY__SHIFT; - WREG32_SOC15(VCN, inst_idx, regUVD_CGC_CTRL, data); + WREG32_SOC15(VCN, vcn_inst, regUVD_CGC_CTRL, data); - data = RREG32_SOC15(VCN, inst_idx, regUVD_CGC_CTRL); + data = RREG32_SOC15(VCN, vcn_inst, regUVD_CGC_CTRL); data |= (UVD_CGC_CTRL__SYS_MODE_MASK | UVD_CGC_CTRL__MPEG2_MODE_MASK | UVD_CGC_CTRL__REGS_MODE_MASK @@ -618,9 +640,9 @@ static void vcn_v4_0_3_enable_clock_gating(struct amdgpu_device *adev, int inst_ | UVD_CGC_CTRL__LRBBM_MODE_MASK | UVD_CGC_CTRL__WCB_MODE_MASK | UVD_CGC_CTRL__VCPU_MODE_MASK); - WREG32_SOC15(VCN, inst_idx, regUVD_CGC_CTRL, data); + WREG32_SOC15(VCN, vcn_inst, regUVD_CGC_CTRL, data); - data = RREG32_SOC15(VCN, inst_idx, regUVD_SUVD_CGC_CTRL); + data = RREG32_SOC15(VCN, vcn_inst, regUVD_SUVD_CGC_CTRL); data |= (UVD_SUVD_CGC_CTRL__SRE_MODE_MASK | UVD_SUVD_CGC_CTRL__SIT_MODE_MASK | UVD_SUVD_CGC_CTRL__SMP_MODE_MASK @@ -629,7 +651,7 @@ static void vcn_v4_0_3_enable_clock_gating(struct amdgpu_device *adev, int inst_ | UVD_SUVD_CGC_CTRL__ENT_MODE_MASK | UVD_SUVD_CGC_CTRL__IME_MODE_MASK | UVD_SUVD_CGC_CTRL__SITE_MODE_MASK); - WREG32_SOC15(VCN, inst_idx, regUVD_SUVD_CGC_CTRL, data); + WREG32_SOC15(VCN, vcn_inst, regUVD_SUVD_CGC_CTRL, data); } /** @@ -646,16 +668,18 @@ static int vcn_v4_0_3_start_dpg_mode(struct amdgpu_device *adev, int inst_idx, b volatile struct amdgpu_vcn4_fw_shared *fw_shared = adev->vcn.inst[inst_idx].fw_shared.cpu_addr; struct amdgpu_ring *ring; + int vcn_inst; uint32_t tmp; + vcn_inst = GET_INST(VCN, inst_idx); /* disable register anti-hang mechanism */ - WREG32_P(SOC15_REG_OFFSET(VCN, inst_idx, regUVD_POWER_STATUS), 1, - ~UVD_POWER_STATUS__UVD_POWER_STATUS_MASK); + WREG32_P(SOC15_REG_OFFSET(VCN, vcn_inst, regUVD_POWER_STATUS), 1, + ~UVD_POWER_STATUS__UVD_POWER_STATUS_MASK); /* enable dynamic power gating mode */ - tmp = RREG32_SOC15(VCN, inst_idx, regUVD_POWER_STATUS); + tmp = RREG32_SOC15(VCN, vcn_inst, regUVD_POWER_STATUS); tmp |= UVD_POWER_STATUS__UVD_PG_MODE_MASK; tmp |= UVD_POWER_STATUS__UVD_PG_EN_MASK; - WREG32_SOC15(VCN, inst_idx, regUVD_POWER_STATUS, tmp); + WREG32_SOC15(VCN, vcn_inst, regUVD_POWER_STATUS, tmp); if (indirect) adev->vcn.inst[inst_idx].dpg_sram_curr_addr = @@ -737,27 +761,28 @@ static int vcn_v4_0_3_start_dpg_mode(struct amdgpu_device *adev, int inst_idx, b ring = &adev->vcn.inst[inst_idx].ring_enc[0]; /* program the RB_BASE for ring buffer */ - WREG32_SOC15(VCN, inst_idx, regUVD_RB_BASE_LO, - lower_32_bits(ring->gpu_addr)); - WREG32_SOC15(VCN, inst_idx, regUVD_RB_BASE_HI, - upper_32_bits(ring->gpu_addr)); + WREG32_SOC15(VCN, vcn_inst, regUVD_RB_BASE_LO, + lower_32_bits(ring->gpu_addr)); + WREG32_SOC15(VCN, vcn_inst, regUVD_RB_BASE_HI, + upper_32_bits(ring->gpu_addr)); - WREG32_SOC15(VCN, inst_idx, regUVD_RB_SIZE, ring->ring_size / sizeof(uint32_t)); + WREG32_SOC15(VCN, vcn_inst, regUVD_RB_SIZE, + ring->ring_size / sizeof(uint32_t)); /* resetting ring, fw should not check RB ring */ - tmp = RREG32_SOC15(VCN, inst_idx, regVCN_RB_ENABLE); + tmp = RREG32_SOC15(VCN, vcn_inst, regVCN_RB_ENABLE); tmp &= ~(VCN_RB_ENABLE__RB_EN_MASK); - WREG32_SOC15(VCN, inst_idx, regVCN_RB_ENABLE, tmp); + WREG32_SOC15(VCN, vcn_inst, regVCN_RB_ENABLE, tmp); fw_shared->sq.queue_mode |= FW_QUEUE_RING_RESET; /* Initialize the ring buffer's read and write pointers */ - WREG32_SOC15(VCN, inst_idx, regUVD_RB_RPTR, 0); - WREG32_SOC15(VCN, inst_idx, regUVD_RB_WPTR, 0); - ring->wptr = RREG32_SOC15(VCN, inst_idx, regUVD_RB_WPTR); + WREG32_SOC15(VCN, vcn_inst, regUVD_RB_RPTR, 0); + WREG32_SOC15(VCN, vcn_inst, regUVD_RB_WPTR, 0); + ring->wptr = RREG32_SOC15(VCN, vcn_inst, regUVD_RB_WPTR); - tmp = RREG32_SOC15(VCN, inst_idx, regVCN_RB_ENABLE); + tmp = RREG32_SOC15(VCN, vcn_inst, regVCN_RB_ENABLE); tmp |= VCN_RB_ENABLE__RB_EN_MASK; - WREG32_SOC15(VCN, inst_idx, regVCN_RB_ENABLE, tmp); + WREG32_SOC15(VCN, vcn_inst, regVCN_RB_ENABLE, tmp); fw_shared->sq.queue_mode &= ~(FW_QUEUE_RING_RESET | FW_QUEUE_DPG_HOLD_OFF); /*resetting done, fw can check RB ring */ @@ -777,99 +802,101 @@ static int vcn_v4_0_3_start(struct amdgpu_device *adev) { volatile struct amdgpu_vcn4_fw_shared *fw_shared; struct amdgpu_ring *ring; + int i, j, k, r, vcn_inst; uint32_t tmp; - int i, j, k, r; if (adev->pm.dpm_enabled) amdgpu_dpm_enable_uvd(adev, true); for (i = 0; i < adev->vcn.num_vcn_inst; ++i) { - if (adev->vcn.harvest_config & (1 << i)) - continue; if (adev->pg_flags & AMD_PG_SUPPORT_VCN_DPG) { r = vcn_v4_0_3_start_dpg_mode(adev, i, adev->vcn.indirect_sram); continue; } + vcn_inst = GET_INST(VCN, i); /* set VCN status busy */ - tmp = RREG32_SOC15(VCN, i, regUVD_STATUS) | UVD_STATUS__UVD_BUSY; - WREG32_SOC15(VCN, i, regUVD_STATUS, tmp); + tmp = RREG32_SOC15(VCN, vcn_inst, regUVD_STATUS) | + UVD_STATUS__UVD_BUSY; + WREG32_SOC15(VCN, vcn_inst, regUVD_STATUS, tmp); /*SW clock gating */ vcn_v4_0_3_disable_clock_gating(adev, i); /* enable VCPU clock */ - WREG32_P(SOC15_REG_OFFSET(VCN, i, regUVD_VCPU_CNTL), - UVD_VCPU_CNTL__CLK_EN_MASK, ~UVD_VCPU_CNTL__CLK_EN_MASK); + WREG32_P(SOC15_REG_OFFSET(VCN, vcn_inst, regUVD_VCPU_CNTL), + UVD_VCPU_CNTL__CLK_EN_MASK, + ~UVD_VCPU_CNTL__CLK_EN_MASK); /* disable master interrupt */ - WREG32_P(SOC15_REG_OFFSET(VCN, i, regUVD_MASTINT_EN), 0, - ~UVD_MASTINT_EN__VCPU_EN_MASK); + WREG32_P(SOC15_REG_OFFSET(VCN, vcn_inst, regUVD_MASTINT_EN), 0, + ~UVD_MASTINT_EN__VCPU_EN_MASK); /* enable LMI MC and UMC channels */ - WREG32_P(SOC15_REG_OFFSET(VCN, i, regUVD_LMI_CTRL2), 0, - ~UVD_LMI_CTRL2__STALL_ARB_UMC_MASK); + WREG32_P(SOC15_REG_OFFSET(VCN, vcn_inst, regUVD_LMI_CTRL2), 0, + ~UVD_LMI_CTRL2__STALL_ARB_UMC_MASK); - tmp = RREG32_SOC15(VCN, i, regUVD_SOFT_RESET); + tmp = RREG32_SOC15(VCN, vcn_inst, regUVD_SOFT_RESET); tmp &= ~UVD_SOFT_RESET__LMI_SOFT_RESET_MASK; tmp &= ~UVD_SOFT_RESET__LMI_UMC_SOFT_RESET_MASK; - WREG32_SOC15(VCN, i, regUVD_SOFT_RESET, tmp); + WREG32_SOC15(VCN, vcn_inst, regUVD_SOFT_RESET, tmp); /* setup regUVD_LMI_CTRL */ - tmp = RREG32_SOC15(VCN, i, regUVD_LMI_CTRL); - WREG32_SOC15(VCN, i, regUVD_LMI_CTRL, tmp | - UVD_LMI_CTRL__WRITE_CLEAN_TIMER_EN_MASK | - UVD_LMI_CTRL__MASK_MC_URGENT_MASK | - UVD_LMI_CTRL__DATA_COHERENCY_EN_MASK | - UVD_LMI_CTRL__VCPU_DATA_COHERENCY_EN_MASK); + tmp = RREG32_SOC15(VCN, vcn_inst, regUVD_LMI_CTRL); + WREG32_SOC15(VCN, vcn_inst, regUVD_LMI_CTRL, + tmp | UVD_LMI_CTRL__WRITE_CLEAN_TIMER_EN_MASK | + UVD_LMI_CTRL__MASK_MC_URGENT_MASK | + UVD_LMI_CTRL__DATA_COHERENCY_EN_MASK | + UVD_LMI_CTRL__VCPU_DATA_COHERENCY_EN_MASK); /* setup regUVD_MPC_CNTL */ - tmp = RREG32_SOC15(VCN, i, regUVD_MPC_CNTL); + tmp = RREG32_SOC15(VCN, vcn_inst, regUVD_MPC_CNTL); tmp &= ~UVD_MPC_CNTL__REPLACEMENT_MODE_MASK; tmp |= 0x2 << UVD_MPC_CNTL__REPLACEMENT_MODE__SHIFT; - WREG32_SOC15(VCN, i, regUVD_MPC_CNTL, tmp); + WREG32_SOC15(VCN, vcn_inst, regUVD_MPC_CNTL, tmp); /* setup UVD_MPC_SET_MUXA0 */ - WREG32_SOC15(VCN, i, regUVD_MPC_SET_MUXA0, - ((0x1 << UVD_MPC_SET_MUXA0__VARA_1__SHIFT) | - (0x2 << UVD_MPC_SET_MUXA0__VARA_2__SHIFT) | - (0x3 << UVD_MPC_SET_MUXA0__VARA_3__SHIFT) | - (0x4 << UVD_MPC_SET_MUXA0__VARA_4__SHIFT))); + WREG32_SOC15(VCN, vcn_inst, regUVD_MPC_SET_MUXA0, + ((0x1 << UVD_MPC_SET_MUXA0__VARA_1__SHIFT) | + (0x2 << UVD_MPC_SET_MUXA0__VARA_2__SHIFT) | + (0x3 << UVD_MPC_SET_MUXA0__VARA_3__SHIFT) | + (0x4 << UVD_MPC_SET_MUXA0__VARA_4__SHIFT))); /* setup UVD_MPC_SET_MUXB0 */ - WREG32_SOC15(VCN, i, regUVD_MPC_SET_MUXB0, - ((0x1 << UVD_MPC_SET_MUXB0__VARB_1__SHIFT) | - (0x2 << UVD_MPC_SET_MUXB0__VARB_2__SHIFT) | - (0x3 << UVD_MPC_SET_MUXB0__VARB_3__SHIFT) | - (0x4 << UVD_MPC_SET_MUXB0__VARB_4__SHIFT))); + WREG32_SOC15(VCN, vcn_inst, regUVD_MPC_SET_MUXB0, + ((0x1 << UVD_MPC_SET_MUXB0__VARB_1__SHIFT) | + (0x2 << UVD_MPC_SET_MUXB0__VARB_2__SHIFT) | + (0x3 << UVD_MPC_SET_MUXB0__VARB_3__SHIFT) | + (0x4 << UVD_MPC_SET_MUXB0__VARB_4__SHIFT))); /* setup UVD_MPC_SET_MUX */ - WREG32_SOC15(VCN, i, regUVD_MPC_SET_MUX, - ((0x0 << UVD_MPC_SET_MUX__SET_0__SHIFT) | - (0x1 << UVD_MPC_SET_MUX__SET_1__SHIFT) | - (0x2 << UVD_MPC_SET_MUX__SET_2__SHIFT))); + WREG32_SOC15(VCN, vcn_inst, regUVD_MPC_SET_MUX, + ((0x0 << UVD_MPC_SET_MUX__SET_0__SHIFT) | + (0x1 << UVD_MPC_SET_MUX__SET_1__SHIFT) | + (0x2 << UVD_MPC_SET_MUX__SET_2__SHIFT))); vcn_v4_0_3_mc_resume(adev, i); /* VCN global tiling registers */ - WREG32_SOC15(VCN, i, regUVD_GFX8_ADDR_CONFIG, - adev->gfx.config.gb_addr_config); - WREG32_SOC15(VCN, i, regUVD_GFX10_ADDR_CONFIG, - adev->gfx.config.gb_addr_config); + WREG32_SOC15(VCN, vcn_inst, regUVD_GFX8_ADDR_CONFIG, + adev->gfx.config.gb_addr_config); + WREG32_SOC15(VCN, vcn_inst, regUVD_GFX10_ADDR_CONFIG, + adev->gfx.config.gb_addr_config); /* unblock VCPU register access */ - WREG32_P(SOC15_REG_OFFSET(VCN, i, regUVD_RB_ARB_CTRL), 0, - ~UVD_RB_ARB_CTRL__VCPU_DIS_MASK); + WREG32_P(SOC15_REG_OFFSET(VCN, vcn_inst, regUVD_RB_ARB_CTRL), 0, + ~UVD_RB_ARB_CTRL__VCPU_DIS_MASK); /* release VCPU reset to boot */ - WREG32_P(SOC15_REG_OFFSET(VCN, i, regUVD_VCPU_CNTL), 0, - ~UVD_VCPU_CNTL__BLK_RST_MASK); + WREG32_P(SOC15_REG_OFFSET(VCN, vcn_inst, regUVD_VCPU_CNTL), 0, + ~UVD_VCPU_CNTL__BLK_RST_MASK); for (j = 0; j < 10; ++j) { uint32_t status; for (k = 0; k < 100; ++k) { - status = RREG32_SOC15(VCN, i, regUVD_STATUS); + status = RREG32_SOC15(VCN, vcn_inst, + regUVD_STATUS); if (status & 2) break; mdelay(10); @@ -880,12 +907,14 @@ static int vcn_v4_0_3_start(struct amdgpu_device *adev) DRM_DEV_ERROR(adev->dev, "VCN decode not responding, trying to reset the VCPU!!!\n"); - WREG32_P(SOC15_REG_OFFSET(VCN, i, regUVD_VCPU_CNTL), - UVD_VCPU_CNTL__BLK_RST_MASK, - ~UVD_VCPU_CNTL__BLK_RST_MASK); + WREG32_P(SOC15_REG_OFFSET(VCN, vcn_inst, + regUVD_VCPU_CNTL), + UVD_VCPU_CNTL__BLK_RST_MASK, + ~UVD_VCPU_CNTL__BLK_RST_MASK); mdelay(10); - WREG32_P(SOC15_REG_OFFSET(VCN, i, regUVD_VCPU_CNTL), 0, - ~UVD_VCPU_CNTL__BLK_RST_MASK); + WREG32_P(SOC15_REG_OFFSET(VCN, vcn_inst, + regUVD_VCPU_CNTL), + 0, ~UVD_VCPU_CNTL__BLK_RST_MASK); mdelay(10); r = -1; @@ -897,39 +926,40 @@ static int vcn_v4_0_3_start(struct amdgpu_device *adev) } /* enable master interrupt */ - WREG32_P(SOC15_REG_OFFSET(VCN, i, regUVD_MASTINT_EN), - UVD_MASTINT_EN__VCPU_EN_MASK, - ~UVD_MASTINT_EN__VCPU_EN_MASK); + WREG32_P(SOC15_REG_OFFSET(VCN, vcn_inst, regUVD_MASTINT_EN), + UVD_MASTINT_EN__VCPU_EN_MASK, + ~UVD_MASTINT_EN__VCPU_EN_MASK); /* clear the busy bit of VCN_STATUS */ - WREG32_P(SOC15_REG_OFFSET(VCN, i, regUVD_STATUS), 0, - ~(2 << UVD_STATUS__VCPU_REPORT__SHIFT)); + WREG32_P(SOC15_REG_OFFSET(VCN, vcn_inst, regUVD_STATUS), 0, + ~(2 << UVD_STATUS__VCPU_REPORT__SHIFT)); ring = &adev->vcn.inst[i].ring_enc[0]; fw_shared = adev->vcn.inst[i].fw_shared.cpu_addr; /* program the RB_BASE for ring buffer */ - WREG32_SOC15(VCN, i, regUVD_RB_BASE_LO, - lower_32_bits(ring->gpu_addr)); - WREG32_SOC15(VCN, i, regUVD_RB_BASE_HI, - upper_32_bits(ring->gpu_addr)); + WREG32_SOC15(VCN, vcn_inst, regUVD_RB_BASE_LO, + lower_32_bits(ring->gpu_addr)); + WREG32_SOC15(VCN, vcn_inst, regUVD_RB_BASE_HI, + upper_32_bits(ring->gpu_addr)); - WREG32_SOC15(VCN, i, regUVD_RB_SIZE, ring->ring_size / sizeof(uint32_t)); + WREG32_SOC15(VCN, vcn_inst, regUVD_RB_SIZE, + ring->ring_size / sizeof(uint32_t)); /* resetting ring, fw should not check RB ring */ - tmp = RREG32_SOC15(VCN, i, regVCN_RB_ENABLE); + tmp = RREG32_SOC15(VCN, vcn_inst, regVCN_RB_ENABLE); tmp &= ~(VCN_RB_ENABLE__RB_EN_MASK); - WREG32_SOC15(VCN, i, regVCN_RB_ENABLE, tmp); + WREG32_SOC15(VCN, vcn_inst, regVCN_RB_ENABLE, tmp); /* Initialize the ring buffer's read and write pointers */ - WREG32_SOC15(VCN, i, regUVD_RB_RPTR, 0); - WREG32_SOC15(VCN, i, regUVD_RB_WPTR, 0); + WREG32_SOC15(VCN, vcn_inst, regUVD_RB_RPTR, 0); + WREG32_SOC15(VCN, vcn_inst, regUVD_RB_WPTR, 0); - tmp = RREG32_SOC15(VCN, i, regVCN_RB_ENABLE); + tmp = RREG32_SOC15(VCN, vcn_inst, regVCN_RB_ENABLE); tmp |= VCN_RB_ENABLE__RB_EN_MASK; - WREG32_SOC15(VCN, i, regVCN_RB_ENABLE, tmp); + WREG32_SOC15(VCN, vcn_inst, regVCN_RB_ENABLE, tmp); - ring->wptr = RREG32_SOC15(VCN, i, regUVD_RB_WPTR); + ring->wptr = RREG32_SOC15(VCN, vcn_inst, regUVD_RB_WPTR); fw_shared->sq.queue_mode &= cpu_to_le32(~(FW_QUEUE_RING_RESET | FW_QUEUE_DPG_HOLD_OFF)); @@ -948,21 +978,24 @@ static int vcn_v4_0_3_start(struct amdgpu_device *adev) static int vcn_v4_0_3_stop_dpg_mode(struct amdgpu_device *adev, int inst_idx) { uint32_t tmp; + int vcn_inst; + + vcn_inst = GET_INST(VCN, inst_idx); /* Wait for power status to be 1 */ - SOC15_WAIT_ON_RREG(VCN, inst_idx, regUVD_POWER_STATUS, 1, - UVD_POWER_STATUS__UVD_POWER_STATUS_MASK); + SOC15_WAIT_ON_RREG(VCN, vcn_inst, regUVD_POWER_STATUS, 1, + UVD_POWER_STATUS__UVD_POWER_STATUS_MASK); /* wait for read ptr to be equal to write ptr */ - tmp = RREG32_SOC15(VCN, inst_idx, regUVD_RB_WPTR); - SOC15_WAIT_ON_RREG(VCN, inst_idx, regUVD_RB_RPTR, tmp, 0xFFFFFFFF); + tmp = RREG32_SOC15(VCN, vcn_inst, regUVD_RB_WPTR); + SOC15_WAIT_ON_RREG(VCN, vcn_inst, regUVD_RB_RPTR, tmp, 0xFFFFFFFF); - SOC15_WAIT_ON_RREG(VCN, inst_idx, regUVD_POWER_STATUS, 1, - UVD_POWER_STATUS__UVD_POWER_STATUS_MASK); + SOC15_WAIT_ON_RREG(VCN, vcn_inst, regUVD_POWER_STATUS, 1, + UVD_POWER_STATUS__UVD_POWER_STATUS_MASK); /* disable dynamic power gating mode */ - WREG32_P(SOC15_REG_OFFSET(VCN, inst_idx, regUVD_POWER_STATUS), 0, - ~UVD_POWER_STATUS__UVD_PG_MODE_MASK); + WREG32_P(SOC15_REG_OFFSET(VCN, vcn_inst, regUVD_POWER_STATUS), 0, + ~UVD_POWER_STATUS__UVD_PG_MODE_MASK); return 0; } @@ -976,12 +1009,11 @@ static int vcn_v4_0_3_stop_dpg_mode(struct amdgpu_device *adev, int inst_idx) static int vcn_v4_0_3_stop(struct amdgpu_device *adev) { volatile struct amdgpu_vcn4_fw_shared *fw_shared; + int i, r = 0, vcn_inst; uint32_t tmp; - int i, r = 0; for (i = 0; i < adev->vcn.num_vcn_inst; ++i) { - if (adev->vcn.harvest_config & (1 << i)) - continue; + vcn_inst = GET_INST(VCN, i); fw_shared = adev->vcn.inst[i].fw_shared.cpu_addr; fw_shared->sq.queue_mode |= FW_QUEUE_DPG_HOLD_OFF; @@ -992,7 +1024,8 @@ static int vcn_v4_0_3_stop(struct amdgpu_device *adev) } /* wait for vcn idle */ - r = SOC15_WAIT_ON_RREG(VCN, i, regUVD_STATUS, UVD_STATUS__IDLE, 0x7); + r = SOC15_WAIT_ON_RREG(VCN, vcn_inst, regUVD_STATUS, + UVD_STATUS__IDLE, 0x7); if (r) goto Done; @@ -1000,45 +1033,47 @@ static int vcn_v4_0_3_stop(struct amdgpu_device *adev) UVD_LMI_STATUS__READ_CLEAN_MASK | UVD_LMI_STATUS__WRITE_CLEAN_MASK | UVD_LMI_STATUS__WRITE_CLEAN_RAW_MASK; - r = SOC15_WAIT_ON_RREG(VCN, i, regUVD_LMI_STATUS, tmp, tmp); + r = SOC15_WAIT_ON_RREG(VCN, vcn_inst, regUVD_LMI_STATUS, tmp, + tmp); if (r) goto Done; /* stall UMC channel */ - tmp = RREG32_SOC15(VCN, i, regUVD_LMI_CTRL2); + tmp = RREG32_SOC15(VCN, vcn_inst, regUVD_LMI_CTRL2); tmp |= UVD_LMI_CTRL2__STALL_ARB_UMC_MASK; - WREG32_SOC15(VCN, i, regUVD_LMI_CTRL2, tmp); + WREG32_SOC15(VCN, vcn_inst, regUVD_LMI_CTRL2, tmp); tmp = UVD_LMI_STATUS__UMC_READ_CLEAN_RAW_MASK | UVD_LMI_STATUS__UMC_WRITE_CLEAN_RAW_MASK; - r = SOC15_WAIT_ON_RREG(VCN, i, regUVD_LMI_STATUS, tmp, tmp); + r = SOC15_WAIT_ON_RREG(VCN, vcn_inst, regUVD_LMI_STATUS, tmp, + tmp); if (r) goto Done; /* Unblock VCPU Register access */ - WREG32_P(SOC15_REG_OFFSET(VCN, i, regUVD_RB_ARB_CTRL), - UVD_RB_ARB_CTRL__VCPU_DIS_MASK, - ~UVD_RB_ARB_CTRL__VCPU_DIS_MASK); + WREG32_P(SOC15_REG_OFFSET(VCN, vcn_inst, regUVD_RB_ARB_CTRL), + UVD_RB_ARB_CTRL__VCPU_DIS_MASK, + ~UVD_RB_ARB_CTRL__VCPU_DIS_MASK); /* release VCPU reset to boot */ - WREG32_P(SOC15_REG_OFFSET(VCN, i, regUVD_VCPU_CNTL), - UVD_VCPU_CNTL__BLK_RST_MASK, - ~UVD_VCPU_CNTL__BLK_RST_MASK); + WREG32_P(SOC15_REG_OFFSET(VCN, vcn_inst, regUVD_VCPU_CNTL), + UVD_VCPU_CNTL__BLK_RST_MASK, + ~UVD_VCPU_CNTL__BLK_RST_MASK); /* disable VCPU clock */ - WREG32_P(SOC15_REG_OFFSET(VCN, i, regUVD_VCPU_CNTL), 0, - ~(UVD_VCPU_CNTL__CLK_EN_MASK)); + WREG32_P(SOC15_REG_OFFSET(VCN, vcn_inst, regUVD_VCPU_CNTL), 0, + ~(UVD_VCPU_CNTL__CLK_EN_MASK)); /* reset LMI UMC/LMI/VCPU */ - tmp = RREG32_SOC15(VCN, i, regUVD_SOFT_RESET); + tmp = RREG32_SOC15(VCN, vcn_inst, regUVD_SOFT_RESET); tmp |= UVD_SOFT_RESET__LMI_UMC_SOFT_RESET_MASK; - WREG32_SOC15(VCN, i, regUVD_SOFT_RESET, tmp); + WREG32_SOC15(VCN, vcn_inst, regUVD_SOFT_RESET, tmp); - tmp = RREG32_SOC15(VCN, i, regUVD_SOFT_RESET); + tmp = RREG32_SOC15(VCN, vcn_inst, regUVD_SOFT_RESET); tmp |= UVD_SOFT_RESET__LMI_SOFT_RESET_MASK; - WREG32_SOC15(VCN, i, regUVD_SOFT_RESET, tmp); + WREG32_SOC15(VCN, vcn_inst, regUVD_SOFT_RESET, tmp); /* clear VCN status */ - WREG32_SOC15(VCN, i, regUVD_STATUS, 0); + WREG32_SOC15(VCN, vcn_inst, regUVD_STATUS, 0); /* apply HW clock gating */ vcn_v4_0_3_enable_clock_gating(adev, i); @@ -1080,7 +1115,7 @@ static uint64_t vcn_v4_0_3_unified_ring_get_rptr(struct amdgpu_ring *ring) if (ring != &adev->vcn.inst[ring->me].ring_enc[0]) DRM_ERROR("wrong ring id is identified in %s", __func__); - return RREG32_SOC15(VCN, ring->me, regUVD_RB_RPTR); + return RREG32_SOC15(VCN, GET_INST(VCN, ring->me), regUVD_RB_RPTR); } /** @@ -1100,7 +1135,8 @@ static uint64_t vcn_v4_0_3_unified_ring_get_wptr(struct amdgpu_ring *ring) if (ring->use_doorbell) return *ring->wptr_cpu_addr; else - return RREG32_SOC15(VCN, ring->me, regUVD_RB_WPTR); + return RREG32_SOC15(VCN, GET_INST(VCN, ring->me), + regUVD_RB_WPTR); } /** @@ -1121,7 +1157,8 @@ static void vcn_v4_0_3_unified_ring_set_wptr(struct amdgpu_ring *ring) *ring->wptr_cpu_addr = lower_32_bits(ring->wptr); WDOORBELL32(ring->doorbell_index, lower_32_bits(ring->wptr)); } else { - WREG32_SOC15(VCN, ring->me, regUVD_RB_WPTR, lower_32_bits(ring->wptr)); + WREG32_SOC15(VCN, GET_INST(VCN, ring->me), regUVD_RB_WPTR, + lower_32_bits(ring->wptr)); } } @@ -1163,14 +1200,14 @@ static const struct amdgpu_ring_funcs vcn_v4_0_3_unified_ring_vm_funcs = { */ static void vcn_v4_0_3_set_unified_ring_funcs(struct amdgpu_device *adev) { - int i; + int i, vcn_inst; for (i = 0; i < adev->vcn.num_vcn_inst; ++i) { - if (adev->vcn.harvest_config & (1 << i)) - continue; adev->vcn.inst[i].ring_enc[0].funcs = &vcn_v4_0_3_unified_ring_vm_funcs; adev->vcn.inst[i].ring_enc[0].me = i; - adev->vcn.inst[i].aid_id = i / adev->vcn.num_inst_per_aid; + vcn_inst = GET_INST(VCN, i); + adev->vcn.inst[i].aid_id = + vcn_inst / adev->vcn.num_inst_per_aid; } DRM_DEV_INFO(adev->dev, "VCN decode is enabled in VM mode\n"); } @@ -1188,9 +1225,8 @@ static bool vcn_v4_0_3_is_idle(void *handle) int i, ret = 1; for (i = 0; i < adev->vcn.num_vcn_inst; ++i) { - if (adev->vcn.harvest_config & (1 << i)) - continue; - ret &= (RREG32_SOC15(VCN, i, regUVD_STATUS) == UVD_STATUS__IDLE); + ret &= (RREG32_SOC15(VCN, GET_INST(VCN, i), regUVD_STATUS) == + UVD_STATUS__IDLE); } return ret; @@ -1209,10 +1245,8 @@ static int vcn_v4_0_3_wait_for_idle(void *handle) int i, ret = 0; for (i = 0; i < adev->vcn.num_vcn_inst; ++i) { - if (adev->vcn.harvest_config & (1 << i)) - continue; - ret = SOC15_WAIT_ON_RREG(VCN, i, regUVD_STATUS, UVD_STATUS__IDLE, - UVD_STATUS__IDLE); + ret = SOC15_WAIT_ON_RREG(VCN, GET_INST(VCN, i), regUVD_STATUS, + UVD_STATUS__IDLE, UVD_STATUS__IDLE); if (ret) return ret; } @@ -1235,10 +1269,9 @@ static int vcn_v4_0_3_set_clockgating_state(void *handle, int i; for (i = 0; i < adev->vcn.num_vcn_inst; ++i) { - if (adev->vcn.harvest_config & (1 << i)) - continue; if (enable) { - if (RREG32_SOC15(VCN, i, regUVD_STATUS) != UVD_STATUS__IDLE) + if (RREG32_SOC15(VCN, GET_INST(VCN, i), + regUVD_STATUS) != UVD_STATUS__IDLE) return -EBUSY; vcn_v4_0_3_enable_clock_gating(adev, i); } else { @@ -1307,15 +1340,26 @@ static int vcn_v4_0_3_process_interrupt(struct amdgpu_device *adev, struct amdgpu_irq_src *source, struct amdgpu_iv_entry *entry) { - uint32_t i; + uint32_t i, inst; i = node_id_to_phys_map[entry->node_id]; DRM_DEV_DEBUG(adev->dev, "IH: VCN TRAP\n"); + for (inst = 0; inst < adev->vcn.num_vcn_inst; ++inst) + if (adev->vcn.inst[inst].aid_id == i) + break; + + if (inst >= adev->vcn.num_vcn_inst) { + dev_WARN_ONCE(adev->dev, 1, + "Interrupt received for unknown VCN instance %d", + entry->node_id); + return 0; + } + switch (entry->src_id) { case VCN_4_0__SRCID__UVD_ENC_GENERAL_PURPOSE: - amdgpu_fence_process(&adev->vcn.inst[i].ring_enc[0]); + amdgpu_fence_process(&adev->vcn.inst[inst].ring_enc[0]); break; default: DRM_DEV_ERROR(adev->dev, "Unhandled interrupt: %d %d\n", @@ -1343,9 +1387,6 @@ static void vcn_v4_0_3_set_irq_funcs(struct amdgpu_device *adev) int i; for (i = 0; i < adev->vcn.num_vcn_inst; ++i) { - if (adev->vcn.harvest_config & (1 << i)) - continue; - adev->vcn.inst->irq.num_types++; } adev->vcn.inst->irq.funcs = &vcn_v4_0_3_irq_funcs; -- cgit From 358e6c38300b7d2b7d7122d4fe485d8a4580dc1e Mon Sep 17 00:00:00 2001 From: James Zhu Date: Wed, 15 Mar 2023 04:09:25 -0400 Subject: drm/amdgpu: use physical AID index for ring name Use physical AID index for VCN/JPEG ring name instead of logical AID index. Signed-off-by: James Zhu Reviewed-by: Sonny Jiang Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/jpeg_v4_0_3.c | 2 +- drivers/gpu/drm/amd/amdgpu/vcn_v4_0_3.c | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) (limited to 'drivers/gpu/drm/amd/amdgpu/jpeg_v4_0_3.c') diff --git a/drivers/gpu/drm/amd/amdgpu/jpeg_v4_0_3.c b/drivers/gpu/drm/amd/amdgpu/jpeg_v4_0_3.c index c0e90e27f24b..ea9cb098a144 100644 --- a/drivers/gpu/drm/amd/amdgpu/jpeg_v4_0_3.c +++ b/drivers/gpu/drm/amd/amdgpu/jpeg_v4_0_3.c @@ -110,7 +110,7 @@ static int jpeg_v4_0_3_sw_init(void *handle) ring->doorbell_index = (adev->doorbell_index.vcn.vcn_ring0_1 << 1) + 1 + j + 9 * jpeg_inst; - sprintf(ring->name, "jpeg_dec_%d.%d", i, j); + sprintf(ring->name, "jpeg_dec_%d.%d", adev->jpeg.inst[i].aid_id, j); r = amdgpu_ring_init(adev, ring, 512, &adev->jpeg.inst->irq, 0, AMDGPU_RING_PRIO_DEFAULT, NULL); if (r) diff --git a/drivers/gpu/drm/amd/amdgpu/vcn_v4_0_3.c b/drivers/gpu/drm/amd/amdgpu/vcn_v4_0_3.c index 759f64a4acf4..b0e28d611f2d 100644 --- a/drivers/gpu/drm/amd/amdgpu/vcn_v4_0_3.c +++ b/drivers/gpu/drm/amd/amdgpu/vcn_v4_0_3.c @@ -112,7 +112,7 @@ static int vcn_v4_0_3_sw_init(void *handle) (adev->doorbell_index.vcn.vcn_ring0_1 << 1) + 9 * vcn_inst; ring->vm_hub = AMDGPU_MMHUB0(adev->vcn.inst[i].aid_id); - sprintf(ring->name, "vcn_unified_%d", i); + sprintf(ring->name, "vcn_unified_%d", adev->vcn.inst[i].aid_id); r = amdgpu_ring_init(adev, ring, 512, &adev->vcn.inst->irq, 0, AMDGPU_RING_PRIO_DEFAULT, &adev->vcn.inst[i].sched_score); -- cgit From 41e491d8b606ea55b7234967f802cec8e6d77952 Mon Sep 17 00:00:00 2001 From: Hawking Zhang Date: Thu, 2 Mar 2023 17:56:59 +0800 Subject: drm/amdgpu: Add query_ras_error_count for jpeg v4_0_3 Add query_ras_error_count callback for jpeg v4_0_3. It will be used to query and log jpeg error count. Signed-off-by: Hawking Zhang Reviewed-by: Tao Zhou Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/jpeg_v4_0_3.c | 64 ++++++++++++++++++++++++++++++++ 1 file changed, 64 insertions(+) (limited to 'drivers/gpu/drm/amd/amdgpu/jpeg_v4_0_3.c') diff --git a/drivers/gpu/drm/amd/amdgpu/jpeg_v4_0_3.c b/drivers/gpu/drm/amd/amdgpu/jpeg_v4_0_3.c index ea9cb098a144..5dedba91fa32 100644 --- a/drivers/gpu/drm/amd/amdgpu/jpeg_v4_0_3.c +++ b/drivers/gpu/drm/amd/amdgpu/jpeg_v4_0_3.c @@ -960,3 +960,67 @@ const struct amdgpu_ip_block_version jpeg_v4_0_3_ip_block = { .rev = 3, .funcs = &jpeg_v4_0_3_ip_funcs, }; + +static const struct amdgpu_ras_err_status_reg_entry jpeg_v4_0_3_ue_reg_list[] = { + {AMDGPU_RAS_REG_ENTRY(JPEG, 0, regVCN_UE_ERR_STATUS_LO_JPEG0S, regVCN_UE_ERR_STATUS_HI_JPEG0S), + 1, (AMDGPU_RAS_ERR_INFO_VALID | AMDGPU_RAS_ERR_STATUS_VALID), "JPEG0S"}, + {AMDGPU_RAS_REG_ENTRY(JPEG, 0, regVCN_UE_ERR_STATUS_LO_JPEG0D, regVCN_UE_ERR_STATUS_HI_JPEG0D), + 1, (AMDGPU_RAS_ERR_INFO_VALID | AMDGPU_RAS_ERR_STATUS_VALID), "JPEG0D"}, + {AMDGPU_RAS_REG_ENTRY(JPEG, 0, regVCN_UE_ERR_STATUS_LO_JPEG1S, regVCN_UE_ERR_STATUS_HI_JPEG1S), + 1, (AMDGPU_RAS_ERR_INFO_VALID | AMDGPU_RAS_ERR_STATUS_VALID), "JPEG1S"}, + {AMDGPU_RAS_REG_ENTRY(JPEG, 0, regVCN_UE_ERR_STATUS_LO_JPEG1D, regVCN_UE_ERR_STATUS_HI_JPEG1D), + 1, (AMDGPU_RAS_ERR_INFO_VALID | AMDGPU_RAS_ERR_STATUS_VALID), "JPEG1D"}, + {AMDGPU_RAS_REG_ENTRY(JPEG, 0, regVCN_UE_ERR_STATUS_LO_JPEG2S, regVCN_UE_ERR_STATUS_HI_JPEG2S), + 1, (AMDGPU_RAS_ERR_INFO_VALID | AMDGPU_RAS_ERR_STATUS_VALID), "JPEG2S"}, + {AMDGPU_RAS_REG_ENTRY(JPEG, 0, regVCN_UE_ERR_STATUS_LO_JPEG2D, regVCN_UE_ERR_STATUS_HI_JPEG2D), + 1, (AMDGPU_RAS_ERR_INFO_VALID | AMDGPU_RAS_ERR_STATUS_VALID), "JPEG2D"}, + {AMDGPU_RAS_REG_ENTRY(JPEG, 0, regVCN_UE_ERR_STATUS_LO_JPEG3S, regVCN_UE_ERR_STATUS_HI_JPEG3S), + 1, (AMDGPU_RAS_ERR_INFO_VALID | AMDGPU_RAS_ERR_STATUS_VALID), "JPEG3S"}, + {AMDGPU_RAS_REG_ENTRY(JPEG, 0, regVCN_UE_ERR_STATUS_LO_JPEG3D, regVCN_UE_ERR_STATUS_HI_JPEG3D), + 1, (AMDGPU_RAS_ERR_INFO_VALID | AMDGPU_RAS_ERR_STATUS_VALID), "JPEG3D"}, + {AMDGPU_RAS_REG_ENTRY(JPEG, 0, regVCN_UE_ERR_STATUS_LO_JPEG4S, regVCN_UE_ERR_STATUS_HI_JPEG4S), + 1, (AMDGPU_RAS_ERR_INFO_VALID | AMDGPU_RAS_ERR_STATUS_VALID), "JPEG4S"}, + {AMDGPU_RAS_REG_ENTRY(JPEG, 0, regVCN_UE_ERR_STATUS_LO_JPEG4D, regVCN_UE_ERR_STATUS_HI_JPEG4D), + 1, (AMDGPU_RAS_ERR_INFO_VALID | AMDGPU_RAS_ERR_STATUS_VALID), "JPEG4D"}, + {AMDGPU_RAS_REG_ENTRY(JPEG, 0, regVCN_UE_ERR_STATUS_LO_JPEG5S, regVCN_UE_ERR_STATUS_HI_JPEG5S), + 1, (AMDGPU_RAS_ERR_INFO_VALID | AMDGPU_RAS_ERR_STATUS_VALID), "JPEG5S"}, + {AMDGPU_RAS_REG_ENTRY(JPEG, 0, regVCN_UE_ERR_STATUS_LO_JPEG5D, regVCN_UE_ERR_STATUS_HI_JPEG5D), + 1, (AMDGPU_RAS_ERR_INFO_VALID | AMDGPU_RAS_ERR_STATUS_VALID), "JPEG5D"}, + {AMDGPU_RAS_REG_ENTRY(JPEG, 0, regVCN_UE_ERR_STATUS_LO_JPEG6S, regVCN_UE_ERR_STATUS_HI_JPEG6S), + 1, (AMDGPU_RAS_ERR_INFO_VALID | AMDGPU_RAS_ERR_STATUS_VALID), "JPEG6S"}, + {AMDGPU_RAS_REG_ENTRY(JPEG, 0, regVCN_UE_ERR_STATUS_LO_JPEG6D, regVCN_UE_ERR_STATUS_HI_JPEG6D), + 1, (AMDGPU_RAS_ERR_INFO_VALID | AMDGPU_RAS_ERR_STATUS_VALID), "JPEG6D"}, + {AMDGPU_RAS_REG_ENTRY(JPEG, 0, regVCN_UE_ERR_STATUS_LO_JPEG7S, regVCN_UE_ERR_STATUS_HI_JPEG7S), + 1, (AMDGPU_RAS_ERR_INFO_VALID | AMDGPU_RAS_ERR_STATUS_VALID), "JPEG7S"}, + {AMDGPU_RAS_REG_ENTRY(JPEG, 0, regVCN_UE_ERR_STATUS_LO_JPEG7D, regVCN_UE_ERR_STATUS_HI_JPEG7D), + 1, (AMDGPU_RAS_ERR_INFO_VALID | AMDGPU_RAS_ERR_STATUS_VALID), "JPEG7D"}, +}; + +static void jpeg_v4_0_3_inst_query_ras_error_count(struct amdgpu_device *adev, + uint32_t jpeg_inst, + void *ras_err_status) +{ + struct ras_err_data *err_data = (struct ras_err_data *)ras_err_status; + + /* jpeg v4_0_3 only support uncorrectable errors */ + amdgpu_ras_inst_query_ras_error_count(adev, + jpeg_v4_0_3_ue_reg_list, + ARRAY_SIZE(jpeg_v4_0_3_ue_reg_list), + NULL, 0, GET_INST(VCN, jpeg_inst), + AMDGPU_RAS_ERROR__MULTI_UNCORRECTABLE, + &err_data->ue_count); +} + +static void jpeg_v4_0_3_query_ras_error_count(struct amdgpu_device *adev, + void *ras_err_status) +{ + uint32_t i; + + if (!amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__JPEG)) { + dev_warn(adev->dev, "JPEG RAS is not supported\n"); + return; + } + + for (i = 0; i < adev->jpeg.num_jpeg_inst; i++) + jpeg_v4_0_3_inst_query_ras_error_count(adev, i, ras_err_status); +} -- cgit From 570df4bca6187f493a1315a7373d7eb1285b3e86 Mon Sep 17 00:00:00 2001 From: Hawking Zhang Date: Thu, 2 Mar 2023 18:04:24 +0800 Subject: drm/amdgpu: Add reset_ras_error_count for jpeg v4_0_3 Add reset_ras_error_count callback for jpeg v4_0_3. It will be used to reset jpeg ras error count. Signed-off-by: Hawking Zhang Reviewed-by: Tao Zhou Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/jpeg_v4_0_3.c | 22 ++++++++++++++++++++++ 1 file changed, 22 insertions(+) (limited to 'drivers/gpu/drm/amd/amdgpu/jpeg_v4_0_3.c') diff --git a/drivers/gpu/drm/amd/amdgpu/jpeg_v4_0_3.c b/drivers/gpu/drm/amd/amdgpu/jpeg_v4_0_3.c index 5dedba91fa32..21226d6d26f8 100644 --- a/drivers/gpu/drm/amd/amdgpu/jpeg_v4_0_3.c +++ b/drivers/gpu/drm/amd/amdgpu/jpeg_v4_0_3.c @@ -1024,3 +1024,25 @@ static void jpeg_v4_0_3_query_ras_error_count(struct amdgpu_device *adev, for (i = 0; i < adev->jpeg.num_jpeg_inst; i++) jpeg_v4_0_3_inst_query_ras_error_count(adev, i, ras_err_status); } + +static void jpeg_v4_0_3_inst_reset_ras_error_count(struct amdgpu_device *adev, + uint32_t jpeg_inst) +{ + amdgpu_ras_inst_reset_ras_error_count(adev, + jpeg_v4_0_3_ue_reg_list, + ARRAY_SIZE(jpeg_v4_0_3_ue_reg_list), + GET_INST(VCN, jpeg_inst)); +} + +static void jpeg_v4_0_3_reset_ras_error_count(struct amdgpu_device *adev) +{ + uint32_t i; + + if (!amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__JPEG)) { + dev_warn(adev->dev, "JPEG RAS is not supported\n"); + return; + } + + for (i = 0; i < adev->jpeg.num_jpeg_inst; i++) + jpeg_v4_0_3_inst_reset_ras_error_count(adev, i); +} -- cgit From 35d54e21e002198c13647b6cd8c77586f683cf39 Mon Sep 17 00:00:00 2001 From: Hawking Zhang Date: Mon, 6 Mar 2023 11:03:27 +0800 Subject: drm/amdgpu: Initialize jpeg v4_0_3 ras function Initialize jpeg v4_0_3 ras function. Signed-off-by: Hawking Zhang Reviewed-by: Tao Zhou Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/jpeg_v4_0_3.c | 26 ++++++++++++++++++++++++++ 1 file changed, 26 insertions(+) (limited to 'drivers/gpu/drm/amd/amdgpu/jpeg_v4_0_3.c') diff --git a/drivers/gpu/drm/amd/amdgpu/jpeg_v4_0_3.c b/drivers/gpu/drm/amd/amdgpu/jpeg_v4_0_3.c index 21226d6d26f8..ede15a3a4701 100644 --- a/drivers/gpu/drm/amd/amdgpu/jpeg_v4_0_3.c +++ b/drivers/gpu/drm/amd/amdgpu/jpeg_v4_0_3.c @@ -40,6 +40,7 @@ static void jpeg_v4_0_3_set_dec_ring_funcs(struct amdgpu_device *adev); static void jpeg_v4_0_3_set_irq_funcs(struct amdgpu_device *adev); static int jpeg_v4_0_3_set_powergating_state(void *handle, enum amd_powergating_state state); +static void jpeg_v4_0_3_set_ras_funcs(struct amdgpu_device *adev); static int amdgpu_ih_srcid_jpeg[] = { VCN_4_0__SRCID__JPEG_DECODE, @@ -67,6 +68,7 @@ static int jpeg_v4_0_3_early_init(void *handle) jpeg_v4_0_3_set_dec_ring_funcs(adev); jpeg_v4_0_3_set_irq_funcs(adev); + jpeg_v4_0_3_set_ras_funcs(adev); return 0; } @@ -126,6 +128,14 @@ static int jpeg_v4_0_3_sw_init(void *handle) } } + if (amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__JPEG)) { + r = amdgpu_jpeg_ras_sw_init(adev); + if (r) { + dev_err(adev->dev, "Failed to initialize jpeg ras block!\n"); + return r; + } + } + return 0; } @@ -1046,3 +1056,19 @@ static void jpeg_v4_0_3_reset_ras_error_count(struct amdgpu_device *adev) for (i = 0; i < adev->jpeg.num_jpeg_inst; i++) jpeg_v4_0_3_inst_reset_ras_error_count(adev, i); } + +static const struct amdgpu_ras_block_hw_ops jpeg_v4_0_3_ras_hw_ops = { + .query_ras_error_count = jpeg_v4_0_3_query_ras_error_count, + .reset_ras_error_count = jpeg_v4_0_3_reset_ras_error_count, +}; + +static struct amdgpu_jpeg_ras jpeg_v4_0_3_ras = { + .ras_block = { + .hw_ops = &jpeg_v4_0_3_ras_hw_ops, + }, +}; + +static void jpeg_v4_0_3_set_ras_funcs(struct amdgpu_device *adev) +{ + adev->jpeg.ras = &jpeg_v4_0_3_ras; +} -- cgit From b3b0e016ec44d94db48a7d01b69570b5de37a31c Mon Sep 17 00:00:00 2001 From: Srinivasan Shanmugam Date: Fri, 19 May 2023 10:34:50 +0530 Subject: drm/amdgpu: Fix uninitalized variable in jpeg_v4_0_3_is_idle & jpeg_v4_0_3_wait_for_idle MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit drivers/gpu/drm/amd/amdgpu/jpeg_v4_0_3.c:752:4: error: variable 'ret' is uninitialized when used here [-Werror,-Wuninitialized] ret &= ((RREG32_SOC15_OFFSET( ^~~ drivers/gpu/drm/amd/amdgpu/jpeg_v4_0_3.c:745:10: note: initialize the variable 'ret' to silence this warning bool ret; ^ = 0 drivers/gpu/drm/amd/amdgpu/jpeg_v4_0_3.c:774:4: error: variable 'ret' is uninitialized when used here [-Werror,-Wuninitialized] ret &= SOC15_WAIT_ON_RREG_OFFSET( ^~~ drivers/gpu/drm/amd/amdgpu/jpeg_v4_0_3.c:767:9: note: initialize the variable 'ret' to silence this warning int ret; ^ = 0 2 errors generated. Cc: Luben Tuikov Cc: Alex Deucher Cc: Christian König Cc: James Zhu Cc: Leo Liu Signed-off-by: Srinivasan Shanmugam Reviewed-by: Alex Deucher Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/jpeg_v4_0_3.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'drivers/gpu/drm/amd/amdgpu/jpeg_v4_0_3.c') diff --git a/drivers/gpu/drm/amd/amdgpu/jpeg_v4_0_3.c b/drivers/gpu/drm/amd/amdgpu/jpeg_v4_0_3.c index ede15a3a4701..ce2b22f7e4e4 100644 --- a/drivers/gpu/drm/amd/amdgpu/jpeg_v4_0_3.c +++ b/drivers/gpu/drm/amd/amdgpu/jpeg_v4_0_3.c @@ -742,7 +742,7 @@ static void jpeg_v4_0_3_dec_ring_nop(struct amdgpu_ring *ring, uint32_t count) static bool jpeg_v4_0_3_is_idle(void *handle) { struct amdgpu_device *adev = (struct amdgpu_device *)handle; - bool ret; + bool ret = false; int i, j; for (i = 0; i < adev->jpeg.num_jpeg_inst; ++i) { @@ -764,7 +764,7 @@ static bool jpeg_v4_0_3_is_idle(void *handle) static int jpeg_v4_0_3_wait_for_idle(void *handle) { struct amdgpu_device *adev = (struct amdgpu_device *)handle; - int ret; + int ret = 0; int i, j; for (i = 0; i < adev->jpeg.num_jpeg_inst; ++i) { -- cgit