diff options
author | Jiri Kosina <jkosina@suse.cz> | 2017-05-02 11:02:41 +0200 |
---|---|---|
committer | Jiri Kosina <jkosina@suse.cz> | 2017-05-02 11:02:41 +0200 |
commit | 4d6ca227c768b50b05cf183974b40abe444e9d0c (patch) | |
tree | bf953d8e895281053548b9967a2c4b58d641df00 /drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c | |
parent | 800f3eef8ebc1264e9c135bfa892c8ae41fa4792 (diff) | |
parent | af22a610bc38508d5ea760507d31be6b6983dfa8 (diff) |
Merge branch 'for-4.12/asus' into for-linus
Diffstat (limited to 'drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c')
-rw-r--r-- | drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c | 898 |
1 files changed, 782 insertions, 116 deletions
diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c index 373374164bd5..67afc901905c 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c @@ -657,6 +657,8 @@ static void gfx_v8_0_set_gds_init(struct amdgpu_device *adev); static void gfx_v8_0_set_rlc_funcs(struct amdgpu_device *adev); static u32 gfx_v8_0_get_csb_size(struct amdgpu_device *adev); static void gfx_v8_0_get_cu_info(struct amdgpu_device *adev); +static void gfx_v8_0_ring_emit_ce_meta_init(struct amdgpu_ring *ring, uint64_t addr); +static void gfx_v8_0_ring_emit_de_meta_init(struct amdgpu_ring *ring, uint64_t addr); static void gfx_v8_0_init_golden_registers(struct amdgpu_device *adev) { @@ -749,14 +751,9 @@ static void gfx_v8_0_init_golden_registers(struct amdgpu_device *adev) static void gfx_v8_0_scratch_init(struct amdgpu_device *adev) { - int i; - adev->gfx.scratch.num_reg = 7; adev->gfx.scratch.reg_base = mmSCRATCH_REG0; - for (i = 0; i < adev->gfx.scratch.num_reg; i++) { - adev->gfx.scratch.free[i] = true; - adev->gfx.scratch.reg[i] = adev->gfx.scratch.reg_base + i; - } + adev->gfx.scratch.free_mask = (1u << adev->gfx.scratch.num_reg) - 1; } static int gfx_v8_0_ring_test_ring(struct amdgpu_ring *ring) @@ -829,7 +826,7 @@ static int gfx_v8_0_ring_test_ib(struct amdgpu_ring *ring, long timeout) ib.ptr[2] = 0xDEADBEEF; ib.length_dw = 3; - r = amdgpu_ib_schedule(ring, 1, &ib, NULL, NULL, &f); + r = amdgpu_ib_schedule(ring, 1, &ib, NULL, &f); if (r) goto err2; @@ -941,6 +938,13 @@ static int gfx_v8_0_init_microcode(struct amdgpu_device *adev) goto out; cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.me_fw->data; adev->gfx.me_fw_version = le32_to_cpu(cp_hdr->header.ucode_version); + + /* chain ib ucode isn't formal released, just disable it by far + * TODO: when ucod ready we should use ucode version to judge if + * chain-ib support or not. + */ + adev->virt.chained_ib_support = false; + adev->gfx.me_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version); snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_ce.bin", chip_name); @@ -1367,6 +1371,51 @@ static void gfx_v8_0_mec_fini(struct amdgpu_device *adev) } } +static int gfx_v8_0_kiq_init_ring(struct amdgpu_device *adev, + struct amdgpu_ring *ring, + struct amdgpu_irq_src *irq) +{ + int r = 0; + + if (amdgpu_sriov_vf(adev)) { + r = amdgpu_wb_get(adev, &adev->virt.reg_val_offs); + if (r) + return r; + } + + ring->adev = NULL; + ring->ring_obj = NULL; + ring->use_doorbell = true; + ring->doorbell_index = AMDGPU_DOORBELL_KIQ; + if (adev->gfx.mec2_fw) { + ring->me = 2; + ring->pipe = 0; + } else { + ring->me = 1; + ring->pipe = 1; + } + + irq->data = ring; + ring->queue = 0; + sprintf(ring->name, "kiq %d.%d.%d", ring->me, ring->pipe, ring->queue); + r = amdgpu_ring_init(adev, ring, 1024, + irq, AMDGPU_CP_KIQ_IRQ_DRIVER0); + if (r) + dev_warn(adev->dev, "(%d) failed to init kiq ring\n", r); + + return r; +} + +static void gfx_v8_0_kiq_free_ring(struct amdgpu_ring *ring, + struct amdgpu_irq_src *irq) +{ + if (amdgpu_sriov_vf(ring->adev)) + amdgpu_wb_free(ring->adev, ring->adev->virt.reg_val_offs); + + amdgpu_ring_fini(ring); + irq->data = NULL; +} + #define MEC_HPD_SIZE 2048 static int gfx_v8_0_mec_init(struct amdgpu_device *adev) @@ -1421,6 +1470,35 @@ static int gfx_v8_0_mec_init(struct amdgpu_device *adev) return 0; } +static void gfx_v8_0_kiq_fini(struct amdgpu_device *adev) +{ + struct amdgpu_kiq *kiq = &adev->gfx.kiq; + + amdgpu_bo_free_kernel(&kiq->eop_obj, &kiq->eop_gpu_addr, NULL); + kiq->eop_obj = NULL; +} + +static int gfx_v8_0_kiq_init(struct amdgpu_device *adev) +{ + int r; + u32 *hpd; + struct amdgpu_kiq *kiq = &adev->gfx.kiq; + + r = amdgpu_bo_create_kernel(adev, MEC_HPD_SIZE, PAGE_SIZE, + AMDGPU_GEM_DOMAIN_GTT, &kiq->eop_obj, + &kiq->eop_gpu_addr, (void **)&hpd); + if (r) { + dev_warn(adev->dev, "failed to create KIQ bo (%d).\n", r); + return r; + } + + memset(hpd, 0, MEC_HPD_SIZE); + + amdgpu_bo_kunmap(kiq->eop_obj); + + return 0; +} + static const u32 vgpr_init_compute_shader[] = { 0x7e000209, 0x7e020208, @@ -1702,7 +1780,7 @@ static int gfx_v8_0_do_edc_gpr_workarounds(struct amdgpu_device *adev) ib.ptr[ib.length_dw++] = EVENT_TYPE(7) | EVENT_INDEX(4); /* shedule the ib on the ring */ - r = amdgpu_ib_schedule(ring, 1, &ib, NULL, NULL, &f); + r = amdgpu_ib_schedule(ring, 1, &ib, NULL, &f); if (r) { DRM_ERROR("amdgpu: ib submit failed (%d).\n", r); goto fail; @@ -1997,8 +2075,14 @@ static int gfx_v8_0_sw_init(void *handle) { int i, r; struct amdgpu_ring *ring; + struct amdgpu_kiq *kiq; struct amdgpu_device *adev = (struct amdgpu_device *)handle; + /* KIQ event */ + r = amdgpu_irq_add_id(adev, 178, &adev->gfx.kiq.irq); + if (r) + return r; + /* EOP Event */ r = amdgpu_irq_add_id(adev, 181, &adev->gfx.eop_irq); if (r) @@ -2036,6 +2120,17 @@ static int gfx_v8_0_sw_init(void *handle) return r; } + r = gfx_v8_0_kiq_init(adev); + if (r) { + DRM_ERROR("Failed to init KIQ BOs!\n"); + return r; + } + + kiq = &adev->gfx.kiq; + r = gfx_v8_0_kiq_init_ring(adev, &kiq->ring, &kiq->irq); + if (r) + return r; + /* set up the gfx ring */ for (i = 0; i < adev->gfx.num_gfx_rings; i++) { ring = &adev->gfx.gfx_ring[i]; @@ -2119,7 +2214,9 @@ static int gfx_v8_0_sw_fini(void *handle) amdgpu_ring_fini(&adev->gfx.gfx_ring[i]); for (i = 0; i < adev->gfx.num_compute_rings; i++) amdgpu_ring_fini(&adev->gfx.compute_ring[i]); + gfx_v8_0_kiq_free_ring(&adev->gfx.kiq.ring, &adev->gfx.kiq.irq); + gfx_v8_0_kiq_fini(adev); gfx_v8_0_mec_fini(adev); gfx_v8_0_rlc_fini(adev); gfx_v8_0_free_microcode(adev); @@ -3801,6 +3898,14 @@ static void gfx_v8_0_gpu_init(struct amdgpu_device *adev) PA_SC_FIFO_SIZE__SC_HIZ_TILE_FIFO_SIZE__SHIFT) | (adev->gfx.config.sc_earlyz_tile_fifo_size << PA_SC_FIFO_SIZE__SC_EARLYZ_TILE_FIFO_SIZE__SHIFT)); + + tmp = RREG32(mmSPI_ARB_PRIORITY); + tmp = REG_SET_FIELD(tmp, SPI_ARB_PRIORITY, PIPE_ORDER_TS0, 2); + tmp = REG_SET_FIELD(tmp, SPI_ARB_PRIORITY, PIPE_ORDER_TS1, 2); + tmp = REG_SET_FIELD(tmp, SPI_ARB_PRIORITY, PIPE_ORDER_TS2, 2); + tmp = REG_SET_FIELD(tmp, SPI_ARB_PRIORITY, PIPE_ORDER_TS3, 2); + WREG32(mmSPI_ARB_PRIORITY, tmp); + mutex_unlock(&adev->grbm_idx_mutex); } @@ -4024,17 +4129,6 @@ static void gfx_v8_0_init_pg(struct amdgpu_device *adev) WREG32(mmRLC_JUMP_TABLE_RESTORE, adev->gfx.rlc.cp_table_gpu_addr >> 8); gfx_v8_0_init_power_gating(adev); WREG32(mmRLC_PG_ALWAYS_ON_CU_MASK, adev->gfx.cu_info.ao_cu_mask); - if (adev->pg_flags & AMD_PG_SUPPORT_RLC_SMU_HS) { - cz_enable_sck_slow_down_on_power_up(adev, true); - cz_enable_sck_slow_down_on_power_down(adev, true); - } else { - cz_enable_sck_slow_down_on_power_up(adev, false); - cz_enable_sck_slow_down_on_power_down(adev, false); - } - if (adev->pg_flags & AMD_PG_SUPPORT_CP) - cz_enable_cp_power_gating(adev, true); - else - cz_enable_cp_power_gating(adev, false); } else if ((adev->asic_type == CHIP_POLARIS11) || (adev->asic_type == CHIP_POLARIS12)) { gfx_v8_0_init_csb(adev); @@ -4506,6 +4600,393 @@ static void gfx_v8_0_cp_compute_fini(struct amdgpu_device *adev) } } +/* KIQ functions */ +static void gfx_v8_0_kiq_setting(struct amdgpu_ring *ring) +{ + uint32_t tmp; + struct amdgpu_device *adev = ring->adev; + + /* tell RLC which is KIQ queue */ + tmp = RREG32(mmRLC_CP_SCHEDULERS); + tmp &= 0xffffff00; + tmp |= (ring->me << 5) | (ring->pipe << 3) | (ring->queue); + WREG32(mmRLC_CP_SCHEDULERS, tmp); + tmp |= 0x80; + WREG32(mmRLC_CP_SCHEDULERS, tmp); +} + +static void gfx_v8_0_kiq_enable(struct amdgpu_ring *ring) +{ + amdgpu_ring_alloc(ring, 8); + /* set resources */ + amdgpu_ring_write(ring, PACKET3(PACKET3_SET_RESOURCES, 6)); + amdgpu_ring_write(ring, 0); /* vmid_mask:0 queue_type:0 (KIQ) */ + amdgpu_ring_write(ring, 0x000000FF); /* queue mask lo */ + amdgpu_ring_write(ring, 0); /* queue mask hi */ + amdgpu_ring_write(ring, 0); /* gws mask lo */ + amdgpu_ring_write(ring, 0); /* gws mask hi */ + amdgpu_ring_write(ring, 0); /* oac mask */ + amdgpu_ring_write(ring, 0); /* gds heap base:0, gds heap size:0 */ + amdgpu_ring_commit(ring); + udelay(50); +} + +static void gfx_v8_0_map_queue_enable(struct amdgpu_ring *kiq_ring, + struct amdgpu_ring *ring) +{ + struct amdgpu_device *adev = kiq_ring->adev; + uint64_t mqd_addr, wptr_addr; + + mqd_addr = amdgpu_bo_gpu_offset(ring->mqd_obj); + wptr_addr = adev->wb.gpu_addr + (ring->wptr_offs * 4); + amdgpu_ring_alloc(kiq_ring, 8); + + amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_MAP_QUEUES, 5)); + /* Q_sel:0, vmid:0, vidmem: 1, engine:0, num_Q:1*/ + amdgpu_ring_write(kiq_ring, 0x21010000); + amdgpu_ring_write(kiq_ring, (ring->doorbell_index << 2) | + (ring->queue << 26) | + (ring->pipe << 29) | + ((ring->me == 1 ? 0 : 1) << 31)); /* doorbell */ + amdgpu_ring_write(kiq_ring, lower_32_bits(mqd_addr)); + amdgpu_ring_write(kiq_ring, upper_32_bits(mqd_addr)); + amdgpu_ring_write(kiq_ring, lower_32_bits(wptr_addr)); + amdgpu_ring_write(kiq_ring, upper_32_bits(wptr_addr)); + amdgpu_ring_commit(kiq_ring); + udelay(50); +} + +static int gfx_v8_0_mqd_init(struct amdgpu_device *adev, + struct vi_mqd *mqd, + uint64_t mqd_gpu_addr, + uint64_t eop_gpu_addr, + struct amdgpu_ring *ring) +{ + uint64_t hqd_gpu_addr, wb_gpu_addr, eop_base_addr; + uint32_t tmp; + + mqd->header = 0xC0310800; + mqd->compute_pipelinestat_enable = 0x00000001; + mqd->compute_static_thread_mgmt_se0 = 0xffffffff; + mqd->compute_static_thread_mgmt_se1 = 0xffffffff; + mqd->compute_static_thread_mgmt_se2 = 0xffffffff; + mqd->compute_static_thread_mgmt_se3 = 0xffffffff; + mqd->compute_misc_reserved = 0x00000003; + + eop_base_addr = eop_gpu_addr >> 8; + mqd->cp_hqd_eop_base_addr_lo = eop_base_addr; + mqd->cp_hqd_eop_base_addr_hi = upper_32_bits(eop_base_addr); + + /* set the EOP size, register value is 2^(EOP_SIZE+1) dwords */ + tmp = RREG32(mmCP_HQD_EOP_CONTROL); + tmp = REG_SET_FIELD(tmp, CP_HQD_EOP_CONTROL, EOP_SIZE, + (order_base_2(MEC_HPD_SIZE / 4) - 1)); + + mqd->cp_hqd_eop_control = tmp; + + /* enable doorbell? */ + tmp = RREG32(mmCP_HQD_PQ_DOORBELL_CONTROL); + + if (ring->use_doorbell) + tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL, + DOORBELL_EN, 1); + else + tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL, + DOORBELL_EN, 0); + + mqd->cp_hqd_pq_doorbell_control = tmp; + + /* disable the queue if it's active */ + mqd->cp_hqd_dequeue_request = 0; + mqd->cp_hqd_pq_rptr = 0; + mqd->cp_hqd_pq_wptr = 0; + + /* set the pointer to the MQD */ + mqd->cp_mqd_base_addr_lo = mqd_gpu_addr & 0xfffffffc; + mqd->cp_mqd_base_addr_hi = upper_32_bits(mqd_gpu_addr); + + /* set MQD vmid to 0 */ + tmp = RREG32(mmCP_MQD_CONTROL); + tmp = REG_SET_FIELD(tmp, CP_MQD_CONTROL, VMID, 0); + mqd->cp_mqd_control = tmp; + + /* set the pointer to the HQD, this is similar CP_RB0_BASE/_HI */ + hqd_gpu_addr = ring->gpu_addr >> 8; + mqd->cp_hqd_pq_base_lo = hqd_gpu_addr; + mqd->cp_hqd_pq_base_hi = upper_32_bits(hqd_gpu_addr); + + /* set up the HQD, this is similar to CP_RB0_CNTL */ + tmp = RREG32(mmCP_HQD_PQ_CONTROL); + tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, QUEUE_SIZE, + (order_base_2(ring->ring_size / 4) - 1)); + tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, RPTR_BLOCK_SIZE, + ((order_base_2(AMDGPU_GPU_PAGE_SIZE / 4) - 1) << 8)); +#ifdef __BIG_ENDIAN + tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, ENDIAN_SWAP, 1); +#endif + tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, UNORD_DISPATCH, 0); + tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, ROQ_PQ_IB_FLIP, 0); + tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, PRIV_STATE, 1); + tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, KMD_QUEUE, 1); + mqd->cp_hqd_pq_control = tmp; + + /* set the wb address whether it's enabled or not */ + wb_gpu_addr = adev->wb.gpu_addr + (ring->rptr_offs * 4); + mqd->cp_hqd_pq_rptr_report_addr_lo = wb_gpu_addr & 0xfffffffc; + mqd->cp_hqd_pq_rptr_report_addr_hi = + upper_32_bits(wb_gpu_addr) & 0xffff; + + /* only used if CP_PQ_WPTR_POLL_CNTL.CP_PQ_WPTR_POLL_CNTL__EN_MASK=1 */ + wb_gpu_addr = adev->wb.gpu_addr + (ring->wptr_offs * 4); + mqd->cp_hqd_pq_wptr_poll_addr_lo = wb_gpu_addr & 0xfffffffc; + mqd->cp_hqd_pq_wptr_poll_addr_hi = upper_32_bits(wb_gpu_addr) & 0xffff; + + tmp = 0; + /* enable the doorbell if requested */ + if (ring->use_doorbell) { + tmp = RREG32(mmCP_HQD_PQ_DOORBELL_CONTROL); + tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL, + DOORBELL_OFFSET, ring->doorbell_index); + + tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL, + DOORBELL_EN, 1); + tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL, + DOORBELL_SOURCE, 0); + tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL, + DOORBELL_HIT, 0); + } + + mqd->cp_hqd_pq_doorbell_control = tmp; + + /* reset read and write pointers, similar to CP_RB0_WPTR/_RPTR */ + ring->wptr = 0; + mqd->cp_hqd_pq_wptr = ring->wptr; + mqd->cp_hqd_pq_rptr = RREG32(mmCP_HQD_PQ_RPTR); + + /* set the vmid for the queue */ + mqd->cp_hqd_vmid = 0; + + tmp = RREG32(mmCP_HQD_PERSISTENT_STATE); + tmp = REG_SET_FIELD(tmp, CP_HQD_PERSISTENT_STATE, PRELOAD_SIZE, 0x53); + mqd->cp_hqd_persistent_state = tmp; + + /* activate the queue */ + mqd->cp_hqd_active = 1; + + return 0; +} + +static int gfx_v8_0_kiq_init_register(struct amdgpu_device *adev, + struct vi_mqd *mqd, + struct amdgpu_ring *ring) +{ + uint32_t tmp; + int j; + + /* disable wptr polling */ + tmp = RREG32(mmCP_PQ_WPTR_POLL_CNTL); + tmp = REG_SET_FIELD(tmp, CP_PQ_WPTR_POLL_CNTL, EN, 0); + WREG32(mmCP_PQ_WPTR_POLL_CNTL, tmp); + + WREG32(mmCP_HQD_EOP_BASE_ADDR, mqd->cp_hqd_eop_base_addr_lo); + WREG32(mmCP_HQD_EOP_BASE_ADDR_HI, mqd->cp_hqd_eop_base_addr_hi); + + /* set the EOP size, register value is 2^(EOP_SIZE+1) dwords */ + WREG32(mmCP_HQD_EOP_CONTROL, mqd->cp_hqd_eop_control); + + /* enable doorbell? */ + WREG32(mmCP_HQD_PQ_DOORBELL_CONTROL, mqd->cp_hqd_pq_doorbell_control); + + /* disable the queue if it's active */ + if (RREG32(mmCP_HQD_ACTIVE) & 1) { + WREG32(mmCP_HQD_DEQUEUE_REQUEST, 1); + for (j = 0; j < adev->usec_timeout; j++) { + if (!(RREG32(mmCP_HQD_ACTIVE) & 1)) + break; + udelay(1); + } + WREG32(mmCP_HQD_DEQUEUE_REQUEST, mqd->cp_hqd_dequeue_request); + WREG32(mmCP_HQD_PQ_RPTR, mqd->cp_hqd_pq_rptr); + WREG32(mmCP_HQD_PQ_WPTR, mqd->cp_hqd_pq_wptr); + } + + /* set the pointer to the MQD */ + WREG32(mmCP_MQD_BASE_ADDR, mqd->cp_mqd_base_addr_lo); + WREG32(mmCP_MQD_BASE_ADDR_HI, mqd->cp_mqd_base_addr_hi); + + /* set MQD vmid to 0 */ + WREG32(mmCP_MQD_CONTROL, mqd->cp_mqd_control); + + /* set the pointer to the HQD, this is similar CP_RB0_BASE/_HI */ + WREG32(mmCP_HQD_PQ_BASE, mqd->cp_hqd_pq_base_lo); + WREG32(mmCP_HQD_PQ_BASE_HI, mqd->cp_hqd_pq_base_hi); + + /* set up the HQD, this is similar to CP_RB0_CNTL */ + WREG32(mmCP_HQD_PQ_CONTROL, mqd->cp_hqd_pq_control); + + /* set the wb address whether it's enabled or not */ + WREG32(mmCP_HQD_PQ_RPTR_REPORT_ADDR, + mqd->cp_hqd_pq_rptr_report_addr_lo); + WREG32(mmCP_HQD_PQ_RPTR_REPORT_ADDR_HI, + mqd->cp_hqd_pq_rptr_report_addr_hi); + + /* only used if CP_PQ_WPTR_POLL_CNTL.CP_PQ_WPTR_POLL_CNTL__EN_MASK=1 */ + WREG32(mmCP_HQD_PQ_WPTR_POLL_ADDR, mqd->cp_hqd_pq_wptr_poll_addr_lo); + WREG32(mmCP_HQD_PQ_WPTR_POLL_ADDR_HI, mqd->cp_hqd_pq_wptr_poll_addr_hi); + + /* enable the doorbell if requested */ + if (ring->use_doorbell) { + if ((adev->asic_type == CHIP_CARRIZO) || + (adev->asic_type == CHIP_FIJI) || + (adev->asic_type == CHIP_STONEY)) { + WREG32(mmCP_MEC_DOORBELL_RANGE_LOWER, + AMDGPU_DOORBELL_KIQ << 2); + WREG32(mmCP_MEC_DOORBELL_RANGE_UPPER, + AMDGPU_DOORBELL_MEC_RING7 << 2); + } + } + WREG32(mmCP_HQD_PQ_DOORBELL_CONTROL, mqd->cp_hqd_pq_doorbell_control); + + /* reset read and write pointers, similar to CP_RB0_WPTR/_RPTR */ + WREG32(mmCP_HQD_PQ_WPTR, mqd->cp_hqd_pq_wptr); + + /* set the vmid for the queue */ + WREG32(mmCP_HQD_VMID, mqd->cp_hqd_vmid); + + WREG32(mmCP_HQD_PERSISTENT_STATE, mqd->cp_hqd_persistent_state); + + /* activate the queue */ + WREG32(mmCP_HQD_ACTIVE, mqd->cp_hqd_active); + + if (ring->use_doorbell) { + tmp = RREG32(mmCP_PQ_STATUS); + tmp = REG_SET_FIELD(tmp, CP_PQ_STATUS, DOORBELL_ENABLE, 1); + WREG32(mmCP_PQ_STATUS, tmp); + } + + return 0; +} + +static int gfx_v8_0_kiq_init_queue(struct amdgpu_ring *ring, + struct vi_mqd *mqd, + u64 mqd_gpu_addr) +{ + struct amdgpu_device *adev = ring->adev; + struct amdgpu_kiq *kiq = &adev->gfx.kiq; + uint64_t eop_gpu_addr; + bool is_kiq = false; + + if (ring->funcs->type == AMDGPU_RING_TYPE_KIQ) + is_kiq = true; + + if (is_kiq) { + eop_gpu_addr = kiq->eop_gpu_addr; + gfx_v8_0_kiq_setting(&kiq->ring); + } else + eop_gpu_addr = adev->gfx.mec.hpd_eop_gpu_addr + + ring->queue * MEC_HPD_SIZE; + + mutex_lock(&adev->srbm_mutex); + vi_srbm_select(adev, ring->me, ring->pipe, ring->queue, 0); + + gfx_v8_0_mqd_init(adev, mqd, mqd_gpu_addr, eop_gpu_addr, ring); + + if (is_kiq) + gfx_v8_0_kiq_init_register(adev, mqd, ring); + + vi_srbm_select(adev, 0, 0, 0, 0); + mutex_unlock(&adev->srbm_mutex); + + if (is_kiq) + gfx_v8_0_kiq_enable(ring); + else + gfx_v8_0_map_queue_enable(&kiq->ring, ring); + + return 0; +} + +static void gfx_v8_0_kiq_free_queue(struct amdgpu_device *adev) +{ + struct amdgpu_ring *ring = NULL; + int i; + + for (i = 0; i < adev->gfx.num_compute_rings; i++) { + ring = &adev->gfx.compute_ring[i]; + amdgpu_bo_free_kernel(&ring->mqd_obj, NULL, NULL); + ring->mqd_obj = NULL; + } + + ring = &adev->gfx.kiq.ring; + amdgpu_bo_free_kernel(&ring->mqd_obj, NULL, NULL); + ring->mqd_obj = NULL; +} + +static int gfx_v8_0_kiq_setup_queue(struct amdgpu_device *adev, + struct amdgpu_ring *ring) +{ + struct vi_mqd *mqd; + u64 mqd_gpu_addr; + u32 *buf; + int r = 0; + + r = amdgpu_bo_create_kernel(adev, sizeof(struct vi_mqd), PAGE_SIZE, + AMDGPU_GEM_DOMAIN_GTT, &ring->mqd_obj, + &mqd_gpu_addr, (void **)&buf); + if (r) { + dev_warn(adev->dev, "failed to create ring mqd ob (%d)", r); + return r; + } + + /* init the mqd struct */ + memset(buf, 0, sizeof(struct vi_mqd)); + mqd = (struct vi_mqd *)buf; + + r = gfx_v8_0_kiq_init_queue(ring, mqd, mqd_gpu_addr); + if (r) + return r; + + amdgpu_bo_kunmap(ring->mqd_obj); + + return 0; +} + +static int gfx_v8_0_kiq_resume(struct amdgpu_device *adev) +{ + struct amdgpu_ring *ring = NULL; + int r, i; + + ring = &adev->gfx.kiq.ring; + r = gfx_v8_0_kiq_setup_queue(adev, ring); + if (r) + return r; + + for (i = 0; i < adev->gfx.num_compute_rings; i++) { + ring = &adev->gfx.compute_ring[i]; + r = gfx_v8_0_kiq_setup_queue(adev, ring); + if (r) + return r; + } + + gfx_v8_0_cp_compute_enable(adev, true); + + for (i = 0; i < adev->gfx.num_compute_rings; i++) { + ring = &adev->gfx.compute_ring[i]; + + ring->ready = true; + r = amdgpu_ring_test_ring(ring); + if (r) + ring->ready = false; + } + + ring = &adev->gfx.kiq.ring; + ring->ready = true; + r = amdgpu_ring_test_ring(ring); + if (r) + ring->ready = false; + + return 0; +} + static int gfx_v8_0_cp_compute_resume(struct amdgpu_device *adev) { int r, i, j; @@ -4806,7 +5287,10 @@ static int gfx_v8_0_cp_resume(struct amdgpu_device *adev) if (r) return r; - r = gfx_v8_0_cp_compute_resume(adev); + if (amdgpu_sriov_vf(adev)) + r = gfx_v8_0_kiq_resume(adev); + else + r = gfx_v8_0_cp_compute_resume(adev); if (r) return r; @@ -4845,6 +5329,7 @@ static int gfx_v8_0_hw_fini(void *handle) amdgpu_irq_put(adev, &adev->gfx.priv_reg_irq, 0); amdgpu_irq_put(adev, &adev->gfx.priv_inst_irq, 0); if (amdgpu_sriov_vf(adev)) { + gfx_v8_0_kiq_free_queue(adev); pr_debug("For SRIOV client, shouldn't do anything.\n"); return 0; } @@ -5360,6 +5845,18 @@ static int gfx_v8_0_set_powergating_state(void *handle, case CHIP_CARRIZO: case CHIP_STONEY: + if (adev->pg_flags & AMD_PG_SUPPORT_RLC_SMU_HS) { + cz_enable_sck_slow_down_on_power_up(adev, true); + cz_enable_sck_slow_down_on_power_down(adev, true); + } else { + cz_enable_sck_slow_down_on_power_up(adev, false); + cz_enable_sck_slow_down_on_power_down(adev, false); + } + if (adev->pg_flags & AMD_PG_SUPPORT_CP) + cz_enable_cp_power_gating(adev, true); + else + cz_enable_cp_power_gating(adev, false); + cz_update_gfx_cg_power_gating(adev, enable); if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_SMG) && enable) @@ -5396,6 +5893,45 @@ static int gfx_v8_0_set_powergating_state(void *handle, return 0; } +static void gfx_v8_0_get_clockgating_state(void *handle, u32 *flags) +{ + struct amdgpu_device *adev = (struct amdgpu_device *)handle; + int data; + + /* AMD_CG_SUPPORT_GFX_MGCG */ + data = RREG32(mmRLC_CGTT_MGCG_OVERRIDE); + if (!(data & RLC_CGTT_MGCG_OVERRIDE__CPF_MASK)) + *flags |= AMD_CG_SUPPORT_GFX_MGCG; + + /* AMD_CG_SUPPORT_GFX_CGLG */ + data = RREG32(mmRLC_CGCG_CGLS_CTRL); + if (data & RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK) + *flags |= AMD_CG_SUPPORT_GFX_CGCG; + + /* AMD_CG_SUPPORT_GFX_CGLS */ + if (data & RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK) + *flags |= AMD_CG_SUPPORT_GFX_CGLS; + + /* AMD_CG_SUPPORT_GFX_CGTS */ + data = RREG32(mmCGTS_SM_CTRL_REG); + if (!(data & CGTS_SM_CTRL_REG__OVERRIDE_MASK)) + *flags |= AMD_CG_SUPPORT_GFX_CGTS; + + /* AMD_CG_SUPPORT_GFX_CGTS_LS */ + if (!(data & CGTS_SM_CTRL_REG__LS_OVERRIDE_MASK)) + *flags |= AMD_CG_SUPPORT_GFX_CGTS_LS; + + /* AMD_CG_SUPPORT_GFX_RLC_LS */ + data = RREG32(mmRLC_MEM_SLP_CNTL); + if (data & RLC_MEM_SLP_CNTL__RLC_MEM_LS_EN_MASK) + *flags |= AMD_CG_SUPPORT_GFX_RLC_LS | AMD_CG_SUPPORT_GFX_MGLS; + + /* AMD_CG_SUPPORT_GFX_CP_LS */ + data = RREG32(mmCP_MEM_SLP_CNTL); + if (data & CP_MEM_SLP_CNTL__CP_MEM_LS_EN_MASK) + *flags |= AMD_CG_SUPPORT_GFX_CP_LS | AMD_CG_SUPPORT_GFX_MGLS; +} + static void gfx_v8_0_send_serdes_cmd(struct amdgpu_device *adev, uint32_t reg_addr, uint32_t cmd) { @@ -5444,68 +5980,6 @@ static void gfx_v8_0_send_serdes_cmd(struct amdgpu_device *adev, #define RLC_GPR_REG2__MESSAGE__SHIFT 0x00000001 #define RLC_GPR_REG2__MESSAGE_MASK 0x0000001e -static void cz_enter_rlc_safe_mode(struct amdgpu_device *adev) -{ - u32 data = 0; - unsigned i; - - data = RREG32(mmRLC_CNTL); - if ((data & RLC_CNTL__RLC_ENABLE_F32_MASK) == 0) - return; - - if ((adev->cg_flags & (AMD_CG_SUPPORT_GFX_CGCG | AMD_CG_SUPPORT_GFX_MGCG)) || - (adev->pg_flags & (AMD_PG_SUPPORT_GFX_PG | AMD_PG_SUPPORT_GFX_SMG | - AMD_PG_SUPPORT_GFX_DMG))) { - data |= RLC_GPR_REG2__REQ_MASK; - data &= ~RLC_GPR_REG2__MESSAGE_MASK; - data |= (MSG_ENTER_RLC_SAFE_MODE << RLC_GPR_REG2__MESSAGE__SHIFT); - WREG32(mmRLC_GPR_REG2, data); - - for (i = 0; i < adev->usec_timeout; i++) { - if ((RREG32(mmRLC_GPM_STAT) & - (RLC_GPM_STAT__GFX_CLOCK_STATUS_MASK | - RLC_GPM_STAT__GFX_POWER_STATUS_MASK)) == - (RLC_GPM_STAT__GFX_CLOCK_STATUS_MASK | - RLC_GPM_STAT__GFX_POWER_STATUS_MASK)) - break; - udelay(1); - } - - for (i = 0; i < adev->usec_timeout; i++) { - if (!REG_GET_FIELD(RREG32(mmRLC_GPR_REG2), RLC_GPR_REG2, REQ)) - break; - udelay(1); - } - adev->gfx.rlc.in_safe_mode = true; - } -} - -static void cz_exit_rlc_safe_mode(struct amdgpu_device *adev) -{ - u32 data; - unsigned i; - - data = RREG32(mmRLC_CNTL); - if ((data & RLC_CNTL__RLC_ENABLE_F32_MASK) == 0) - return; - - if ((adev->cg_flags & (AMD_CG_SUPPORT_GFX_CGCG | AMD_CG_SUPPORT_GFX_MGCG)) || - (adev->pg_flags & (AMD_PG_SUPPORT_GFX_PG | AMD_PG_SUPPORT_GFX_SMG | - AMD_PG_SUPPORT_GFX_DMG))) { - data |= RLC_GPR_REG2__REQ_MASK; - data &= ~RLC_GPR_REG2__MESSAGE_MASK; - data |= (MSG_EXIT_RLC_SAFE_MODE << RLC_GPR_REG2__MESSAGE__SHIFT); - WREG32(mmRLC_GPR_REG2, data); - adev->gfx.rlc.in_safe_mode = false; - } - - for (i = 0; i < adev->usec_timeout; i++) { - if (!REG_GET_FIELD(RREG32(mmRLC_GPR_REG2), RLC_GPR_REG2, REQ)) - break; - udelay(1); - } -} - static void iceland_enter_rlc_safe_mode(struct amdgpu_device *adev) { u32 data; @@ -5565,31 +6039,11 @@ static void iceland_exit_rlc_safe_mode(struct amdgpu_device *adev) } } -static void gfx_v8_0_nop_enter_rlc_safe_mode(struct amdgpu_device *adev) -{ - adev->gfx.rlc.in_safe_mode = true; -} - -static void gfx_v8_0_nop_exit_rlc_safe_mode(struct amdgpu_device *adev) -{ - adev->gfx.rlc.in_safe_mode = false; -} - -static const struct amdgpu_rlc_funcs cz_rlc_funcs = { - .enter_safe_mode = cz_enter_rlc_safe_mode, - .exit_safe_mode = cz_exit_rlc_safe_mode -}; - static const struct amdgpu_rlc_funcs iceland_rlc_funcs = { .enter_safe_mode = iceland_enter_rlc_safe_mode, .exit_safe_mode = iceland_exit_rlc_safe_mode }; -static const struct amdgpu_rlc_funcs gfx_v8_0_nop_rlc_funcs = { - .enter_safe_mode = gfx_v8_0_nop_enter_rlc_safe_mode, - .exit_safe_mode = gfx_v8_0_nop_exit_rlc_safe_mode -}; - static void gfx_v8_0_update_medium_grain_clock_gating(struct amdgpu_device *adev, bool enable) { @@ -6011,7 +6465,8 @@ static void gfx_v8_0_ring_emit_hdp_flush(struct amdgpu_ring *ring) { u32 ref_and_mask, reg_mem_engine; - if (ring->funcs->type == AMDGPU_RING_TYPE_COMPUTE) { + if ((ring->funcs->type == AMDGPU_RING_TYPE_COMPUTE) || + (ring->funcs->type == AMDGPU_RING_TYPE_KIQ)) { switch (ring->me) { case 1: ref_and_mask = GPU_HDP_FLUSH_DONE__CP2_MASK << ring->pipe; @@ -6224,6 +6679,31 @@ static void gfx_v8_0_ring_emit_fence_compute(struct amdgpu_ring *ring, amdgpu_ring_write(ring, upper_32_bits(seq)); } +static void gfx_v8_0_ring_emit_fence_kiq(struct amdgpu_ring *ring, u64 addr, + u64 seq, unsigned int flags) +{ + /* we only allocate 32bit for each seq wb address */ + BUG_ON(flags & AMDGPU_FENCE_FLAG_64BIT); + + /* write fence seq to the "addr" */ + amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3)); + amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) | + WRITE_DATA_DST_SEL(5) | WR_CONFIRM)); + amdgpu_ring_write(ring, lower_32_bits(addr)); + amdgpu_ring_write(ring, upper_32_bits(addr)); + amdgpu_ring_write(ring, lower_32_bits(seq)); + + if (flags & AMDGPU_FENCE_FLAG_INT) { + /* set register to trigger INT */ + amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3)); + amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) | + WRITE_DATA_DST_SEL(0) | WR_CONFIRM)); + amdgpu_ring_write(ring, mmCPC_INT_STATUS); + amdgpu_ring_write(ring, 0); + amdgpu_ring_write(ring, 0x20000000); /* src_id is 178 */ + } +} + static void gfx_v8_ring_emit_sb(struct amdgpu_ring *ring) { amdgpu_ring_write(ring, PACKET3(PACKET3_SWITCH_BUFFER, 0)); @@ -6234,6 +6714,10 @@ static void gfx_v8_ring_emit_cntxcntl(struct amdgpu_ring *ring, uint32_t flags) { uint32_t dw2 = 0; + if (amdgpu_sriov_vf(ring->adev)) + gfx_v8_0_ring_emit_ce_meta_init(ring, + (flags & AMDGPU_VM_DOMAIN) ? AMDGPU_CSA_VADDR : ring->adev->virt.csa_vmid0_addr); + dw2 |= 0x80000000; /* set load_enable otherwise this package is just NOPs */ if (flags & AMDGPU_HAVE_CTX_SWITCH) { gfx_v8_0_ring_emit_vgt_flush(ring); @@ -6258,6 +6742,36 @@ static void gfx_v8_ring_emit_cntxcntl(struct amdgpu_ring *ring, uint32_t flags) amdgpu_ring_write(ring, PACKET3(PACKET3_CONTEXT_CONTROL, 1)); amdgpu_ring_write(ring, dw2); amdgpu_ring_write(ring, 0); + + if (amdgpu_sriov_vf(ring->adev)) + gfx_v8_0_ring_emit_de_meta_init(ring, + (flags & AMDGPU_VM_DOMAIN) ? AMDGPU_CSA_VADDR : ring->adev->virt.csa_vmid0_addr); +} + +static void gfx_v8_0_ring_emit_rreg(struct amdgpu_ring *ring, uint32_t reg) +{ + struct amdgpu_device *adev = ring->adev; + + amdgpu_ring_write(ring, PACKET3(PACKET3_COPY_DATA, 4)); + amdgpu_ring_write(ring, 0 | /* src: register*/ + (5 << 8) | /* dst: memory */ + (1 << 20)); /* write confirm */ + amdgpu_ring_write(ring, reg); + amdgpu_ring_write(ring, 0); + amdgpu_ring_write(ring, lower_32_bits(adev->wb.gpu_addr + + adev->virt.reg_val_offs * 4)); + amdgpu_ring_write(ring, upper_32_bits(adev->wb.gpu_addr + + adev->virt.reg_val_offs * 4)); +} + +static void gfx_v8_0_ring_emit_wreg(struct amdgpu_ring *ring, uint32_t reg, + uint32_t val) +{ + amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3)); + amdgpu_ring_write(ring, (1 << 16)); /* no inc addr */ + amdgpu_ring_write(ring, reg); + amdgpu_ring_write(ring, 0); + amdgpu_ring_write(ring, val); } static void gfx_v8_0_set_gfx_eop_interrupt_state(struct amdgpu_device *adev, @@ -6405,6 +6919,72 @@ static int gfx_v8_0_priv_inst_irq(struct amdgpu_device *adev, return 0; } +static int gfx_v8_0_kiq_set_interrupt_state(struct amdgpu_device *adev, + struct amdgpu_irq_src *src, + unsigned int type, + enum amdgpu_interrupt_state state) +{ + uint32_t tmp, target; + struct amdgpu_ring *ring = (struct amdgpu_ring *)src->data; + + BUG_ON(!ring || (ring->funcs->type != AMDGPU_RING_TYPE_KIQ)); + + if (ring->me == 1) + target = mmCP_ME1_PIPE0_INT_CNTL; + else + target = mmCP_ME2_PIPE0_INT_CNTL; + target += ring->pipe; + + switch (type) { + case AMDGPU_CP_KIQ_IRQ_DRIVER0: + if (state == AMDGPU_IRQ_STATE_DISABLE) { + tmp = RREG32(mmCPC_INT_CNTL); + tmp = REG_SET_FIELD(tmp, CPC_INT_CNTL, + GENERIC2_INT_ENABLE, 0); + WREG32(mmCPC_INT_CNTL, tmp); + + tmp = RREG32(target); + tmp = REG_SET_FIELD(tmp, CP_ME2_PIPE0_INT_CNTL, + GENERIC2_INT_ENABLE, 0); + WREG32(target, tmp); + } else { + tmp = RREG32(mmCPC_INT_CNTL); + tmp = REG_SET_FIELD(tmp, CPC_INT_CNTL, + GENERIC2_INT_ENABLE, 1); + WREG32(mmCPC_INT_CNTL, tmp); + + tmp = RREG32(target); + tmp = REG_SET_FIELD(tmp, CP_ME2_PIPE0_INT_CNTL, + GENERIC2_INT_ENABLE, 1); + WREG32(target, tmp); + } + break; + default: + BUG(); /* kiq only support GENERIC2_INT now */ + break; + } + return 0; +} + +static int gfx_v8_0_kiq_irq(struct amdgpu_device *adev, + struct amdgpu_irq_src *source, + struct amdgpu_iv_entry *entry) +{ + u8 me_id, pipe_id, queue_id; + struct amdgpu_ring *ring = (struct amdgpu_ring *)source->data; + + BUG_ON(!ring || (ring->funcs->type != AMDGPU_RING_TYPE_KIQ)); + + me_id = (entry->ring_id & 0x0c) >> 2; + pipe_id = (entry->ring_id & 0x03) >> 0; + queue_id = (entry->ring_id & 0x70) >> 4; + DRM_DEBUG("IH: CPC GENERIC2_INT, me:%d, pipe:%d, queue:%d\n", + me_id, pipe_id, queue_id); + + amdgpu_fence_process(ring); + return 0; +} + static const struct amd_ip_funcs gfx_v8_0_ip_funcs = { .name = "gfx_v8_0", .early_init = gfx_v8_0_early_init, @@ -6423,6 +7003,7 @@ static const struct amd_ip_funcs gfx_v8_0_ip_funcs = { .post_soft_reset = gfx_v8_0_post_soft_reset, .set_clockgating_state = gfx_v8_0_set_clockgating_state, .set_powergating_state = gfx_v8_0_set_powergating_state, + .get_clockgating_state = gfx_v8_0_get_clockgating_state, }; static const struct amdgpu_ring_funcs gfx_v8_0_ring_funcs_gfx = { @@ -6440,7 +7021,7 @@ static const struct amdgpu_ring_funcs gfx_v8_0_ring_funcs_gfx = { 7 + /* gfx_v8_0_ring_emit_pipeline_sync */ 128 + 19 + /* gfx_v8_0_ring_emit_vm_flush */ 2 + /* gfx_v8_ring_emit_sb */ - 3 + 4, /* gfx_v8_ring_emit_cntxcntl including vgt flush */ + 3 + 4 + 29, /* gfx_v8_ring_emit_cntxcntl including vgt flush/meta-data */ .emit_ib_size = 4, /* gfx_v8_0_ring_emit_ib_gfx */ .emit_ib = gfx_v8_0_ring_emit_ib_gfx, .emit_fence = gfx_v8_0_ring_emit_fence_gfx, @@ -6485,10 +7066,39 @@ static const struct amdgpu_ring_funcs gfx_v8_0_ring_funcs_compute = { .pad_ib = amdgpu_ring_generic_pad_ib, }; +static const struct amdgpu_ring_funcs gfx_v8_0_ring_funcs_kiq = { + .type = AMDGPU_RING_TYPE_KIQ, + .align_mask = 0xff, + .nop = PACKET3(PACKET3_NOP, 0x3FFF), + .get_rptr = gfx_v8_0_ring_get_rptr, + .get_wptr = gfx_v8_0_ring_get_wptr_compute, + .set_wptr = gfx_v8_0_ring_set_wptr_compute, + .emit_frame_size = + 20 + /* gfx_v8_0_ring_emit_gds_switch */ + 7 + /* gfx_v8_0_ring_emit_hdp_flush */ + 5 + /* gfx_v8_0_ring_emit_hdp_invalidate */ + 7 + /* gfx_v8_0_ring_emit_pipeline_sync */ + 17 + /* gfx_v8_0_ring_emit_vm_flush */ + 7 + 7 + 7, /* gfx_v8_0_ring_emit_fence_kiq x3 for user fence, vm fence */ + .emit_ib_size = 4, /* gfx_v8_0_ring_emit_ib_compute */ + .emit_ib = gfx_v8_0_ring_emit_ib_compute, + .emit_fence = gfx_v8_0_ring_emit_fence_kiq, + .emit_hdp_flush = gfx_v8_0_ring_emit_hdp_flush, + .emit_hdp_invalidate = gfx_v8_0_ring_emit_hdp_invalidate, + .test_ring = gfx_v8_0_ring_test_ring, + .test_ib = gfx_v8_0_ring_test_ib, + .insert_nop = amdgpu_ring_insert_nop, + .pad_ib = amdgpu_ring_generic_pad_ib, + .emit_rreg = gfx_v8_0_ring_emit_rreg, + .emit_wreg = gfx_v8_0_ring_emit_wreg, +}; + static void gfx_v8_0_set_ring_funcs(struct amdgpu_device *adev) { int i; + adev->gfx.kiq.ring.funcs = &gfx_v8_0_ring_funcs_kiq; + for (i = 0; i < adev->gfx.num_gfx_rings; i++) adev->gfx.gfx_ring[i].funcs = &gfx_v8_0_ring_funcs_gfx; @@ -6511,6 +7121,11 @@ static const struct amdgpu_irq_src_funcs gfx_v8_0_priv_inst_irq_funcs = { .process = gfx_v8_0_priv_inst_irq, }; +static const struct amdgpu_irq_src_funcs gfx_v8_0_kiq_irq_funcs = { + .set = gfx_v8_0_kiq_set_interrupt_state, + .process = gfx_v8_0_kiq_irq, +}; + static void gfx_v8_0_set_irq_funcs(struct amdgpu_device *adev) { adev->gfx.eop_irq.num_types = AMDGPU_CP_IRQ_LAST; @@ -6521,22 +7136,14 @@ static void gfx_v8_0_set_irq_funcs(struct amdgpu_device *adev) adev->gfx.priv_inst_irq.num_types = 1; adev->gfx.priv_inst_irq.funcs = &gfx_v8_0_priv_inst_irq_funcs; + + adev->gfx.kiq.irq.num_types = AMDGPU_CP_KIQ_IRQ_LAST; + adev->gfx.kiq.irq.funcs = &gfx_v8_0_kiq_irq_funcs; } static void gfx_v8_0_set_rlc_funcs(struct amdgpu_device *adev) { - switch (adev->asic_type) { - case CHIP_TOPAZ: - adev->gfx.rlc.funcs = &iceland_rlc_funcs; - break; - case CHIP_STONEY: - case CHIP_CARRIZO: - adev->gfx.rlc.funcs = &cz_rlc_funcs; - break; - default: - adev->gfx.rlc.funcs = &gfx_v8_0_nop_rlc_funcs; - break; - } + adev->gfx.rlc.funcs = &iceland_rlc_funcs; } static void gfx_v8_0_set_gds_init(struct amdgpu_device *adev) @@ -6653,3 +7260,62 @@ const struct amdgpu_ip_block_version gfx_v8_1_ip_block = .rev = 0, .funcs = &gfx_v8_0_ip_funcs, }; + +static void gfx_v8_0_ring_emit_ce_meta_init(struct amdgpu_ring *ring, uint64_t csa_addr) +{ + uint64_t ce_payload_addr; + int cnt_ce; + static union { + struct amdgpu_ce_ib_state regular; + struct amdgpu_ce_ib_state_chained_ib chained; + } ce_payload = {}; + + if (ring->adev->virt.chained_ib_support) { + ce_payload_addr = csa_addr + offsetof(struct amdgpu_gfx_meta_data_chained_ib, ce_payload); + cnt_ce = (sizeof(ce_payload.chained) >> 2) + 4 - 2; + } else { + ce_payload_addr = csa_addr + offsetof(struct amdgpu_gfx_meta_data, ce_payload); + cnt_ce = (sizeof(ce_payload.regular) >> 2) + 4 - 2; + } + + amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, cnt_ce)); + amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(2) | + WRITE_DATA_DST_SEL(8) | + WR_CONFIRM) | + WRITE_DATA_CACHE_POLICY(0)); + amdgpu_ring_write(ring, lower_32_bits(ce_payload_addr)); + amdgpu_ring_write(ring, upper_32_bits(ce_payload_addr)); + amdgpu_ring_write_multiple(ring, (void *)&ce_payload, cnt_ce - 2); +} + +static void gfx_v8_0_ring_emit_de_meta_init(struct amdgpu_ring *ring, uint64_t csa_addr) +{ + uint64_t de_payload_addr, gds_addr; + int cnt_de; + static union { + struct amdgpu_de_ib_state regular; + struct amdgpu_de_ib_state_chained_ib chained; + } de_payload = {}; + + gds_addr = csa_addr + 4096; + if (ring->adev->virt.chained_ib_support) { + de_payload.chained.gds_backup_addrlo = lower_32_bits(gds_addr); + de_payload.chained.gds_backup_addrhi = upper_32_bits(gds_addr); + de_payload_addr = csa_addr + offsetof(struct amdgpu_gfx_meta_data_chained_ib, de_payload); + cnt_de = (sizeof(de_payload.chained) >> 2) + 4 - 2; + } else { + de_payload.regular.gds_backup_addrlo = lower_32_bits(gds_addr); + de_payload.regular.gds_backup_addrhi = upper_32_bits(gds_addr); + de_payload_addr = csa_addr + offsetof(struct amdgpu_gfx_meta_data, de_payload); + cnt_de = (sizeof(de_payload.regular) >> 2) + 4 - 2; + } + + amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, cnt_de)); + amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(1) | + WRITE_DATA_DST_SEL(8) | + WR_CONFIRM) | + WRITE_DATA_CACHE_POLICY(0)); + amdgpu_ring_write(ring, lower_32_bits(de_payload_addr)); + amdgpu_ring_write(ring, upper_32_bits(de_payload_addr)); + amdgpu_ring_write_multiple(ring, (void *)&de_payload, cnt_de - 2); +} |