diff options
Diffstat (limited to 'drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c')
-rw-r--r-- | drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c | 425 |
1 files changed, 334 insertions, 91 deletions
diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c index 7f18a53ab53a..71116da9e782 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c @@ -991,6 +991,22 @@ out: return err; } +static void gfx_v7_0_free_microcode(struct amdgpu_device *adev) +{ + release_firmware(adev->gfx.pfp_fw); + adev->gfx.pfp_fw = NULL; + release_firmware(adev->gfx.me_fw); + adev->gfx.me_fw = NULL; + release_firmware(adev->gfx.ce_fw); + adev->gfx.ce_fw = NULL; + release_firmware(adev->gfx.mec_fw); + adev->gfx.mec_fw = NULL; + release_firmware(adev->gfx.mec2_fw); + adev->gfx.mec2_fw = NULL; + release_firmware(adev->gfx.rlc_fw); + adev->gfx.rlc_fw = NULL; +} + /** * gfx_v7_0_tiling_mode_table_init - init the hw tiling table * @@ -1567,9 +1583,15 @@ static void gfx_v7_0_tiling_mode_table_init(struct amdgpu_device *adev) * registers are instanced per SE or SH. 0xffffffff means * broadcast to all SEs or SHs (CIK). */ -void gfx_v7_0_select_se_sh(struct amdgpu_device *adev, u32 se_num, u32 sh_num) +static void gfx_v7_0_select_se_sh(struct amdgpu_device *adev, + u32 se_num, u32 sh_num, u32 instance) { - u32 data = GRBM_GFX_INDEX__INSTANCE_BROADCAST_WRITES_MASK; + u32 data; + + if (instance == 0xffffffff) + data = REG_SET_FIELD(0, GRBM_GFX_INDEX, INSTANCE_BROADCAST_WRITES, 1); + else + data = REG_SET_FIELD(0, GRBM_GFX_INDEX, INSTANCE_INDEX, instance); if ((se_num == 0xffffffff) && (sh_num == 0xffffffff)) data |= GRBM_GFX_INDEX__SH_BROADCAST_WRITES_MASK | @@ -1623,6 +1645,147 @@ static u32 gfx_v7_0_get_rb_active_bitmap(struct amdgpu_device *adev) return (~data) & mask; } +static void +gfx_v7_0_raster_config(struct amdgpu_device *adev, u32 *rconf, u32 *rconf1) +{ + switch (adev->asic_type) { + case CHIP_BONAIRE: + *rconf |= RB_MAP_PKR0(2) | RB_XSEL2(1) | SE_MAP(2) | + SE_XSEL(1) | SE_YSEL(1); + *rconf1 |= 0x0; + break; + case CHIP_HAWAII: + *rconf |= RB_MAP_PKR0(2) | RB_MAP_PKR1(2) | + RB_XSEL2(1) | PKR_MAP(2) | PKR_XSEL(1) | + PKR_YSEL(1) | SE_MAP(2) | SE_XSEL(2) | + SE_YSEL(3); + *rconf1 |= SE_PAIR_MAP(2) | SE_PAIR_XSEL(3) | + SE_PAIR_YSEL(2); + break; + case CHIP_KAVERI: + *rconf |= RB_MAP_PKR0(2); + *rconf1 |= 0x0; + break; + case CHIP_KABINI: + case CHIP_MULLINS: + *rconf |= 0x0; + *rconf1 |= 0x0; + break; + default: + DRM_ERROR("unknown asic: 0x%x\n", adev->asic_type); + break; + } +} + +static void +gfx_v7_0_write_harvested_raster_configs(struct amdgpu_device *adev, + u32 raster_config, u32 raster_config_1, + unsigned rb_mask, unsigned num_rb) +{ + unsigned sh_per_se = max_t(unsigned, adev->gfx.config.max_sh_per_se, 1); + unsigned num_se = max_t(unsigned, adev->gfx.config.max_shader_engines, 1); + unsigned rb_per_pkr = min_t(unsigned, num_rb / num_se / sh_per_se, 2); + unsigned rb_per_se = num_rb / num_se; + unsigned se_mask[4]; + unsigned se; + + se_mask[0] = ((1 << rb_per_se) - 1) & rb_mask; + se_mask[1] = (se_mask[0] << rb_per_se) & rb_mask; + se_mask[2] = (se_mask[1] << rb_per_se) & rb_mask; + se_mask[3] = (se_mask[2] << rb_per_se) & rb_mask; + + WARN_ON(!(num_se == 1 || num_se == 2 || num_se == 4)); + WARN_ON(!(sh_per_se == 1 || sh_per_se == 2)); + WARN_ON(!(rb_per_pkr == 1 || rb_per_pkr == 2)); + + if ((num_se > 2) && ((!se_mask[0] && !se_mask[1]) || + (!se_mask[2] && !se_mask[3]))) { + raster_config_1 &= ~SE_PAIR_MAP_MASK; + + if (!se_mask[0] && !se_mask[1]) { + raster_config_1 |= + SE_PAIR_MAP(RASTER_CONFIG_SE_PAIR_MAP_3); + } else { + raster_config_1 |= + SE_PAIR_MAP(RASTER_CONFIG_SE_PAIR_MAP_0); + } + } + + for (se = 0; se < num_se; se++) { + unsigned raster_config_se = raster_config; + unsigned pkr0_mask = ((1 << rb_per_pkr) - 1) << (se * rb_per_se); + unsigned pkr1_mask = pkr0_mask << rb_per_pkr; + int idx = (se / 2) * 2; + + if ((num_se > 1) && (!se_mask[idx] || !se_mask[idx + 1])) { + raster_config_se &= ~SE_MAP_MASK; + + if (!se_mask[idx]) { + raster_config_se |= SE_MAP(RASTER_CONFIG_SE_MAP_3); + } else { + raster_config_se |= SE_MAP(RASTER_CONFIG_SE_MAP_0); + } + } + + pkr0_mask &= rb_mask; + pkr1_mask &= rb_mask; + if (rb_per_se > 2 && (!pkr0_mask || !pkr1_mask)) { + raster_config_se &= ~PKR_MAP_MASK; + + if (!pkr0_mask) { + raster_config_se |= PKR_MAP(RASTER_CONFIG_PKR_MAP_3); + } else { + raster_config_se |= PKR_MAP(RASTER_CONFIG_PKR_MAP_0); + } + } + + if (rb_per_se >= 2) { + unsigned rb0_mask = 1 << (se * rb_per_se); + unsigned rb1_mask = rb0_mask << 1; + + rb0_mask &= rb_mask; + rb1_mask &= rb_mask; + if (!rb0_mask || !rb1_mask) { + raster_config_se &= ~RB_MAP_PKR0_MASK; + + if (!rb0_mask) { + raster_config_se |= + RB_MAP_PKR0(RASTER_CONFIG_RB_MAP_3); + } else { + raster_config_se |= + RB_MAP_PKR0(RASTER_CONFIG_RB_MAP_0); + } + } + + if (rb_per_se > 2) { + rb0_mask = 1 << (se * rb_per_se + rb_per_pkr); + rb1_mask = rb0_mask << 1; + rb0_mask &= rb_mask; + rb1_mask &= rb_mask; + if (!rb0_mask || !rb1_mask) { + raster_config_se &= ~RB_MAP_PKR1_MASK; + + if (!rb0_mask) { + raster_config_se |= + RB_MAP_PKR1(RASTER_CONFIG_RB_MAP_3); + } else { + raster_config_se |= + RB_MAP_PKR1(RASTER_CONFIG_RB_MAP_0); + } + } + } + } + + /* GRBM_GFX_INDEX has a different offset on CI+ */ + gfx_v7_0_select_se_sh(adev, se, 0xffffffff, 0xffffffff); + WREG32(mmPA_SC_RASTER_CONFIG, raster_config_se); + WREG32(mmPA_SC_RASTER_CONFIG_1, raster_config_1); + } + + /* GRBM_GFX_INDEX has a different offset on CI+ */ + gfx_v7_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff); +} + /** * gfx_v7_0_setup_rb - setup the RBs on the asic * @@ -1636,24 +1799,41 @@ static void gfx_v7_0_setup_rb(struct amdgpu_device *adev) { int i, j; u32 data; + u32 raster_config = 0, raster_config_1 = 0; u32 active_rbs = 0; u32 rb_bitmap_width_per_sh = adev->gfx.config.max_backends_per_se / adev->gfx.config.max_sh_per_se; + unsigned num_rb_pipes; mutex_lock(&adev->grbm_idx_mutex); for (i = 0; i < adev->gfx.config.max_shader_engines; i++) { for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) { - gfx_v7_0_select_se_sh(adev, i, j); + gfx_v7_0_select_se_sh(adev, i, j, 0xffffffff); data = gfx_v7_0_get_rb_active_bitmap(adev); active_rbs |= data << ((i * adev->gfx.config.max_sh_per_se + j) * rb_bitmap_width_per_sh); } } - gfx_v7_0_select_se_sh(adev, 0xffffffff, 0xffffffff); - mutex_unlock(&adev->grbm_idx_mutex); + gfx_v7_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff); adev->gfx.config.backend_enable_mask = active_rbs; adev->gfx.config.num_rbs = hweight32(active_rbs); + + num_rb_pipes = min_t(unsigned, adev->gfx.config.max_backends_per_se * + adev->gfx.config.max_shader_engines, 16); + + gfx_v7_0_raster_config(adev, &raster_config, &raster_config_1); + + if (!adev->gfx.config.backend_enable_mask || + adev->gfx.config.num_rbs >= num_rb_pipes) { + WREG32(mmPA_SC_RASTER_CONFIG, raster_config); + WREG32(mmPA_SC_RASTER_CONFIG_1, raster_config_1); + } else { + gfx_v7_0_write_harvested_raster_configs(adev, raster_config, raster_config_1, + adev->gfx.config.backend_enable_mask, + num_rb_pipes); + } + mutex_unlock(&adev->grbm_idx_mutex); } /** @@ -1730,7 +1910,7 @@ static void gfx_v7_0_gpu_init(struct amdgpu_device *adev) * making sure that the following register writes will be broadcasted * to all the shaders */ - gfx_v7_0_select_se_sh(adev, 0xffffffff, 0xffffffff); + gfx_v7_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff); /* XXX SH_MEM regs */ /* where to put LDS, scratch, GPUVM in FSA64 space */ @@ -2034,17 +2214,6 @@ static void gfx_v7_0_ring_emit_ib_gfx(struct amdgpu_ring *ring, unsigned vm_id, bool ctx_switch) { u32 header, control = 0; - u32 next_rptr = ring->wptr + 5; - - if (ctx_switch) - next_rptr += 2; - - next_rptr += 4; - amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3)); - amdgpu_ring_write(ring, WRITE_DATA_DST_SEL(5) | WR_CONFIRM); - amdgpu_ring_write(ring, ring->next_rptr_gpu_addr & 0xfffffffc); - amdgpu_ring_write(ring, upper_32_bits(ring->next_rptr_gpu_addr) & 0xffffffff); - amdgpu_ring_write(ring, next_rptr); /* insert SWITCH_BUFFER packet before first IB in the ring frame */ if (ctx_switch) { @@ -2073,22 +2242,9 @@ static void gfx_v7_0_ring_emit_ib_compute(struct amdgpu_ring *ring, struct amdgpu_ib *ib, unsigned vm_id, bool ctx_switch) { - u32 header, control = 0; - u32 next_rptr = ring->wptr + 5; + u32 control = INDIRECT_BUFFER_VALID | ib->length_dw | (vm_id << 24); - control |= INDIRECT_BUFFER_VALID; - next_rptr += 4; - amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3)); - amdgpu_ring_write(ring, WRITE_DATA_DST_SEL(5) | WR_CONFIRM); - amdgpu_ring_write(ring, ring->next_rptr_gpu_addr & 0xfffffffc); - amdgpu_ring_write(ring, upper_32_bits(ring->next_rptr_gpu_addr) & 0xffffffff); - amdgpu_ring_write(ring, next_rptr); - - header = PACKET3(PACKET3_INDIRECT_BUFFER, 2); - - control |= ib->length_dw | (vm_id << 24); - - amdgpu_ring_write(ring, header); + amdgpu_ring_write(ring, PACKET3(PACKET3_INDIRECT_BUFFER, 2)); amdgpu_ring_write(ring, #ifdef __BIG_ENDIAN (2 << 0) | @@ -2098,6 +2254,25 @@ static void gfx_v7_0_ring_emit_ib_compute(struct amdgpu_ring *ring, amdgpu_ring_write(ring, control); } +static void gfx_v7_ring_emit_cntxcntl(struct amdgpu_ring *ring, uint32_t flags) +{ + uint32_t dw2 = 0; + + dw2 |= 0x80000000; /* set load_enable otherwise this package is just NOPs */ + if (flags & AMDGPU_HAVE_CTX_SWITCH) { + /* set load_global_config & load_global_uconfig */ + dw2 |= 0x8001; + /* set load_cs_sh_regs */ + dw2 |= 0x01000000; + /* set load_per_context_state & load_gfx_sh_regs */ + dw2 |= 0x10002; + } + + amdgpu_ring_write(ring, PACKET3(PACKET3_CONTEXT_CONTROL, 1)); + amdgpu_ring_write(ring, dw2); + amdgpu_ring_write(ring, 0); +} + /** * gfx_v7_0_ring_test_ib - basic ring IB test * @@ -2107,26 +2282,25 @@ static void gfx_v7_0_ring_emit_ib_compute(struct amdgpu_ring *ring, * Provides a basic gfx ring test to verify that IBs are working. * Returns 0 on success, error on failure. */ -static int gfx_v7_0_ring_test_ib(struct amdgpu_ring *ring) +static int gfx_v7_0_ring_test_ib(struct amdgpu_ring *ring, long timeout) { struct amdgpu_device *adev = ring->adev; struct amdgpu_ib ib; struct fence *f = NULL; uint32_t scratch; uint32_t tmp = 0; - unsigned i; - int r; + long r; r = amdgpu_gfx_scratch_get(adev, &scratch); if (r) { - DRM_ERROR("amdgpu: failed to get scratch reg (%d).\n", r); + DRM_ERROR("amdgpu: failed to get scratch reg (%ld).\n", r); return r; } WREG32(scratch, 0xCAFEDEAD); memset(&ib, 0, sizeof(ib)); r = amdgpu_ib_get(adev, NULL, 256, &ib); if (r) { - DRM_ERROR("amdgpu: failed to get ib (%d).\n", r); + DRM_ERROR("amdgpu: failed to get ib (%ld).\n", r); goto err1; } ib.ptr[0] = PACKET3(PACKET3_SET_UCONFIG_REG, 1); @@ -2138,21 +2312,19 @@ static int gfx_v7_0_ring_test_ib(struct amdgpu_ring *ring) if (r) goto err2; - r = fence_wait(f, false); - if (r) { - DRM_ERROR("amdgpu: fence wait failed (%d).\n", r); + r = fence_wait_timeout(f, false, timeout); + if (r == 0) { + DRM_ERROR("amdgpu: IB test timed out\n"); + r = -ETIMEDOUT; goto err2; - } - for (i = 0; i < adev->usec_timeout; i++) { - tmp = RREG32(scratch); - if (tmp == 0xDEADBEEF) - break; - DRM_UDELAY(1); - } - if (i < adev->usec_timeout) { - DRM_INFO("ib test on ring %d succeeded in %u usecs\n", - ring->idx, i); + } else if (r < 0) { + DRM_ERROR("amdgpu: fence wait failed (%ld).\n", r); goto err2; + } + tmp = RREG32(scratch); + if (tmp == 0xDEADBEEF) { + DRM_INFO("ib test on ring %d succeeded\n", ring->idx); + r = 0; } else { DRM_ERROR("amdgpu: ib test failed (scratch(0x%04X)=0x%08X)\n", scratch, tmp); @@ -2160,7 +2332,6 @@ static int gfx_v7_0_ring_test_ib(struct amdgpu_ring *ring) } err2: - fence_put(f); amdgpu_ib_free(adev, &ib, NULL); fence_put(f); err1: @@ -2449,7 +2620,7 @@ static int gfx_v7_0_cp_gfx_resume(struct amdgpu_device *adev) return 0; } -static u32 gfx_v7_0_ring_get_rptr_gfx(struct amdgpu_ring *ring) +static u32 gfx_v7_0_ring_get_rptr(struct amdgpu_ring *ring) { return ring->adev->wb.wb[ring->rptr_offs]; } @@ -2469,11 +2640,6 @@ static void gfx_v7_0_ring_set_wptr_gfx(struct amdgpu_ring *ring) (void)RREG32(mmCP_RB0_WPTR); } -static u32 gfx_v7_0_ring_get_rptr_compute(struct amdgpu_ring *ring) -{ - return ring->adev->wb.wb[ring->rptr_offs]; -} - static u32 gfx_v7_0_ring_get_wptr_compute(struct amdgpu_ring *ring) { /* XXX check if swapping is necessary on BE */ @@ -2761,8 +2927,7 @@ static int gfx_v7_0_cp_compute_resume(struct amdgpu_device *adev) u64 wb_gpu_addr; u32 *buf; struct bonaire_mqd *mqd; - - gfx_v7_0_cp_compute_enable(adev, true); + struct amdgpu_ring *ring; /* fix up chicken bits */ tmp = RREG32(mmCP_CPF_DEBUG); @@ -2797,7 +2962,7 @@ static int gfx_v7_0_cp_compute_resume(struct amdgpu_device *adev) /* init the queues. Just two for now. */ for (i = 0; i < adev->gfx.num_compute_rings; i++) { - struct amdgpu_ring *ring = &adev->gfx.compute_ring[i]; + ring = &adev->gfx.compute_ring[i]; if (ring->mqd_obj == NULL) { r = amdgpu_bo_create(adev, @@ -2976,6 +3141,13 @@ static int gfx_v7_0_cp_compute_resume(struct amdgpu_device *adev) amdgpu_bo_unreserve(ring->mqd_obj); ring->ready = true; + } + + gfx_v7_0_cp_compute_enable(adev, true); + + for (i = 0; i < adev->gfx.num_compute_rings; i++) { + ring = &adev->gfx.compute_ring[i]; + r = amdgpu_ring_test_ring(ring); if (r) ring->ready = false; @@ -3205,7 +3377,8 @@ static int gfx_v7_0_rlc_init(struct amdgpu_device *adev) } } adev->gfx.rlc.cs_data = ci_cs_data; - adev->gfx.rlc.cp_table_size = CP_ME_TABLE_SIZE * 5 * 4; + adev->gfx.rlc.cp_table_size = ALIGN(CP_ME_TABLE_SIZE * 5 * 4, 2048); /* CP JT */ + adev->gfx.rlc.cp_table_size += 64 * 1024; /* GDS */ src_ptr = adev->gfx.rlc.reg_list; dws = adev->gfx.rlc.reg_list_size; @@ -3363,7 +3536,7 @@ static void gfx_v7_0_wait_for_rlc_serdes(struct amdgpu_device *adev) mutex_lock(&adev->grbm_idx_mutex); for (i = 0; i < adev->gfx.config.max_shader_engines; i++) { for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) { - gfx_v7_0_select_se_sh(adev, i, j); + gfx_v7_0_select_se_sh(adev, i, j, 0xffffffff); for (k = 0; k < adev->usec_timeout; k++) { if (RREG32(mmRLC_SERDES_CU_MASTER_BUSY) == 0) break; @@ -3371,7 +3544,7 @@ static void gfx_v7_0_wait_for_rlc_serdes(struct amdgpu_device *adev) } } } - gfx_v7_0_select_se_sh(adev, 0xffffffff, 0xffffffff); + gfx_v7_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff); mutex_unlock(&adev->grbm_idx_mutex); mask = RLC_SERDES_NONCU_MASTER_BUSY__SE_MASTER_BUSY_MASK | @@ -3418,7 +3591,7 @@ static u32 gfx_v7_0_halt_rlc(struct amdgpu_device *adev) return orig; } -void gfx_v7_0_enter_rlc_safe_mode(struct amdgpu_device *adev) +static void gfx_v7_0_enter_rlc_safe_mode(struct amdgpu_device *adev) { u32 tmp, i, mask; @@ -3440,7 +3613,7 @@ void gfx_v7_0_enter_rlc_safe_mode(struct amdgpu_device *adev) } } -void gfx_v7_0_exit_rlc_safe_mode(struct amdgpu_device *adev) +static void gfx_v7_0_exit_rlc_safe_mode(struct amdgpu_device *adev) { u32 tmp; @@ -3455,7 +3628,7 @@ void gfx_v7_0_exit_rlc_safe_mode(struct amdgpu_device *adev) * * Halt the RLC ME (MicroEngine) (CIK). */ -void gfx_v7_0_rlc_stop(struct amdgpu_device *adev) +static void gfx_v7_0_rlc_stop(struct amdgpu_device *adev) { WREG32(mmRLC_CNTL, 0); @@ -3531,7 +3704,7 @@ static int gfx_v7_0_rlc_resume(struct amdgpu_device *adev) WREG32(mmRLC_LB_CNTR_MAX, 0x00008000); mutex_lock(&adev->grbm_idx_mutex); - gfx_v7_0_select_se_sh(adev, 0xffffffff, 0xffffffff); + gfx_v7_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff); WREG32(mmRLC_LB_INIT_CU_MASK, 0xffffffff); WREG32(mmRLC_LB_PARAMS, 0x00600408); WREG32(mmRLC_LB_CNTL, 0x80000004); @@ -3571,7 +3744,7 @@ static void gfx_v7_0_enable_cgcg(struct amdgpu_device *adev, bool enable) tmp = gfx_v7_0_halt_rlc(adev); mutex_lock(&adev->grbm_idx_mutex); - gfx_v7_0_select_se_sh(adev, 0xffffffff, 0xffffffff); + gfx_v7_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff); WREG32(mmRLC_SERDES_WR_CU_MASTER_MASK, 0xffffffff); WREG32(mmRLC_SERDES_WR_NONCU_MASTER_MASK, 0xffffffff); tmp2 = RLC_SERDES_WR_CTRL__BPM_ADDR_MASK | @@ -3622,7 +3795,7 @@ static void gfx_v7_0_enable_mgcg(struct amdgpu_device *adev, bool enable) tmp = gfx_v7_0_halt_rlc(adev); mutex_lock(&adev->grbm_idx_mutex); - gfx_v7_0_select_se_sh(adev, 0xffffffff, 0xffffffff); + gfx_v7_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff); WREG32(mmRLC_SERDES_WR_CU_MASTER_MASK, 0xffffffff); WREG32(mmRLC_SERDES_WR_NONCU_MASTER_MASK, 0xffffffff); data = RLC_SERDES_WR_CTRL__BPM_ADDR_MASK | @@ -3673,7 +3846,7 @@ static void gfx_v7_0_enable_mgcg(struct amdgpu_device *adev, bool enable) tmp = gfx_v7_0_halt_rlc(adev); mutex_lock(&adev->grbm_idx_mutex); - gfx_v7_0_select_se_sh(adev, 0xffffffff, 0xffffffff); + gfx_v7_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff); WREG32(mmRLC_SERDES_WR_CU_MASTER_MASK, 0xffffffff); WREG32(mmRLC_SERDES_WR_NONCU_MASTER_MASK, 0xffffffff); data = RLC_SERDES_WR_CTRL__BPM_ADDR_MASK | RLC_SERDES_WR_CTRL__MGCG_OVERRIDE_1_MASK; @@ -3851,6 +4024,20 @@ static void gfx_v7_0_enable_gfx_cgpg(struct amdgpu_device *adev, } } +static void gfx_v7_0_set_user_cu_inactive_bitmap(struct amdgpu_device *adev, + u32 bitmap) +{ + u32 data; + + if (!bitmap) + return; + + data = bitmap << GC_USER_SHADER_ARRAY_CONFIG__INACTIVE_CUS__SHIFT; + data &= GC_USER_SHADER_ARRAY_CONFIG__INACTIVE_CUS_MASK; + + WREG32(mmGC_USER_SHADER_ARRAY_CONFIG, data); +} + static u32 gfx_v7_0_get_cu_active_bitmap(struct amdgpu_device *adev) { u32 data, mask; @@ -4107,7 +4294,7 @@ static void gfx_v7_0_fini_pg(struct amdgpu_device *adev) * Fetches a GPU clock counter snapshot (SI). * Returns the 64 bit clock counter snapshot. */ -uint64_t gfx_v7_0_get_gpu_clock_counter(struct amdgpu_device *adev) +static uint64_t gfx_v7_0_get_gpu_clock_counter(struct amdgpu_device *adev) { uint64_t clock; @@ -4167,12 +4354,59 @@ static void gfx_v7_0_ring_emit_gds_switch(struct amdgpu_ring *ring, amdgpu_ring_write(ring, (1 << (oa_size + oa_base)) - (1 << oa_base)); } +static unsigned gfx_v7_0_ring_get_emit_ib_size_gfx(struct amdgpu_ring *ring) +{ + return + 4; /* gfx_v7_0_ring_emit_ib_gfx */ +} + +static unsigned gfx_v7_0_ring_get_dma_frame_size_gfx(struct amdgpu_ring *ring) +{ + return + 20 + /* gfx_v7_0_ring_emit_gds_switch */ + 7 + /* gfx_v7_0_ring_emit_hdp_flush */ + 5 + /* gfx_v7_0_ring_emit_hdp_invalidate */ + 12 + 12 + 12 + /* gfx_v7_0_ring_emit_fence_gfx x3 for user fence, vm fence */ + 7 + 4 + /* gfx_v7_0_ring_emit_pipeline_sync */ + 17 + 6 + /* gfx_v7_0_ring_emit_vm_flush */ + 3; /* gfx_v7_ring_emit_cntxcntl */ +} + +static unsigned gfx_v7_0_ring_get_emit_ib_size_compute(struct amdgpu_ring *ring) +{ + return + 4; /* gfx_v7_0_ring_emit_ib_compute */ +} + +static unsigned gfx_v7_0_ring_get_dma_frame_size_compute(struct amdgpu_ring *ring) +{ + return + 20 + /* gfx_v7_0_ring_emit_gds_switch */ + 7 + /* gfx_v7_0_ring_emit_hdp_flush */ + 5 + /* gfx_v7_0_ring_emit_hdp_invalidate */ + 7 + /* gfx_v7_0_ring_emit_pipeline_sync */ + 17 + /* gfx_v7_0_ring_emit_vm_flush */ + 7 + 7 + 7; /* gfx_v7_0_ring_emit_fence_compute x3 for user fence, vm fence */ +} + +static const struct amdgpu_gfx_funcs gfx_v7_0_gfx_funcs = { + .get_gpu_clock_counter = &gfx_v7_0_get_gpu_clock_counter, + .select_se_sh = &gfx_v7_0_select_se_sh, +}; + +static const struct amdgpu_rlc_funcs gfx_v7_0_rlc_funcs = { + .enter_safe_mode = gfx_v7_0_enter_rlc_safe_mode, + .exit_safe_mode = gfx_v7_0_exit_rlc_safe_mode +}; + static int gfx_v7_0_early_init(void *handle) { struct amdgpu_device *adev = (struct amdgpu_device *)handle; adev->gfx.num_gfx_rings = GFX7_NUM_GFX_RINGS; adev->gfx.num_compute_rings = GFX7_NUM_COMPUTE_RINGS; + adev->gfx.funcs = &gfx_v7_0_gfx_funcs; + adev->gfx.rlc.funcs = &gfx_v7_0_rlc_funcs; gfx_v7_0_set_ring_funcs(adev); gfx_v7_0_set_irq_funcs(adev); gfx_v7_0_set_gds_init(adev); @@ -4444,24 +4678,21 @@ static int gfx_v7_0_sw_init(void *handle) } /* reserve GDS, GWS and OA resource for gfx */ - r = amdgpu_bo_create(adev, adev->gds.mem.gfx_partition_size, - PAGE_SIZE, true, - AMDGPU_GEM_DOMAIN_GDS, 0, - NULL, NULL, &adev->gds.gds_gfx_bo); + r = amdgpu_bo_create_kernel(adev, adev->gds.mem.gfx_partition_size, + PAGE_SIZE, AMDGPU_GEM_DOMAIN_GDS, + &adev->gds.gds_gfx_bo, NULL, NULL); if (r) return r; - r = amdgpu_bo_create(adev, adev->gds.gws.gfx_partition_size, - PAGE_SIZE, true, - AMDGPU_GEM_DOMAIN_GWS, 0, - NULL, NULL, &adev->gds.gws_gfx_bo); + r = amdgpu_bo_create_kernel(adev, adev->gds.gws.gfx_partition_size, + PAGE_SIZE, AMDGPU_GEM_DOMAIN_GWS, + &adev->gds.gws_gfx_bo, NULL, NULL); if (r) return r; - r = amdgpu_bo_create(adev, adev->gds.oa.gfx_partition_size, - PAGE_SIZE, true, - AMDGPU_GEM_DOMAIN_OA, 0, - NULL, NULL, &adev->gds.oa_gfx_bo); + r = amdgpu_bo_create_kernel(adev, adev->gds.oa.gfx_partition_size, + PAGE_SIZE, AMDGPU_GEM_DOMAIN_OA, + &adev->gds.oa_gfx_bo, NULL, NULL); if (r) return r; @@ -4477,9 +4708,9 @@ static int gfx_v7_0_sw_fini(void *handle) int i; struct amdgpu_device *adev = (struct amdgpu_device *)handle; - amdgpu_bo_unref(&adev->gds.oa_gfx_bo); - amdgpu_bo_unref(&adev->gds.gws_gfx_bo); - amdgpu_bo_unref(&adev->gds.gds_gfx_bo); + amdgpu_bo_free_kernel(&adev->gds.oa_gfx_bo, NULL, NULL); + amdgpu_bo_free_kernel(&adev->gds.gws_gfx_bo, NULL, NULL); + amdgpu_bo_free_kernel(&adev->gds.gds_gfx_bo, NULL, NULL); for (i = 0; i < adev->gfx.num_gfx_rings; i++) amdgpu_ring_fini(&adev->gfx.gfx_ring[i]); @@ -4489,6 +4720,7 @@ static int gfx_v7_0_sw_fini(void *handle) gfx_v7_0_cp_compute_fini(adev); gfx_v7_0_rlc_fini(adev); gfx_v7_0_mec_fini(adev); + gfx_v7_0_free_microcode(adev); return 0; } @@ -4816,7 +5048,7 @@ static int gfx_v7_0_eop_irq(struct amdgpu_device *adev, case 2: for (i = 0; i < adev->gfx.num_compute_rings; i++) { ring = &adev->gfx.compute_ring[i]; - if ((ring->me == me_id) & (ring->pipe == pipe_id)) + if ((ring->me == me_id) && (ring->pipe == pipe_id)) amdgpu_fence_process(ring); } break; @@ -4909,7 +5141,7 @@ const struct amd_ip_funcs gfx_v7_0_ip_funcs = { }; static const struct amdgpu_ring_funcs gfx_v7_0_ring_funcs_gfx = { - .get_rptr = gfx_v7_0_ring_get_rptr_gfx, + .get_rptr = gfx_v7_0_ring_get_rptr, .get_wptr = gfx_v7_0_ring_get_wptr_gfx, .set_wptr = gfx_v7_0_ring_set_wptr_gfx, .parse_cs = NULL, @@ -4924,10 +5156,13 @@ static const struct amdgpu_ring_funcs gfx_v7_0_ring_funcs_gfx = { .test_ib = gfx_v7_0_ring_test_ib, .insert_nop = amdgpu_ring_insert_nop, .pad_ib = amdgpu_ring_generic_pad_ib, + .emit_cntxcntl = gfx_v7_ring_emit_cntxcntl, + .get_emit_ib_size = gfx_v7_0_ring_get_emit_ib_size_gfx, + .get_dma_frame_size = gfx_v7_0_ring_get_dma_frame_size_gfx, }; static const struct amdgpu_ring_funcs gfx_v7_0_ring_funcs_compute = { - .get_rptr = gfx_v7_0_ring_get_rptr_compute, + .get_rptr = gfx_v7_0_ring_get_rptr, .get_wptr = gfx_v7_0_ring_get_wptr_compute, .set_wptr = gfx_v7_0_ring_set_wptr_compute, .parse_cs = NULL, @@ -4942,6 +5177,8 @@ static const struct amdgpu_ring_funcs gfx_v7_0_ring_funcs_compute = { .test_ib = gfx_v7_0_ring_test_ib, .insert_nop = amdgpu_ring_insert_nop, .pad_ib = amdgpu_ring_generic_pad_ib, + .get_emit_ib_size = gfx_v7_0_ring_get_emit_ib_size_compute, + .get_dma_frame_size = gfx_v7_0_ring_get_dma_frame_size_compute, }; static void gfx_v7_0_set_ring_funcs(struct amdgpu_device *adev) @@ -5015,16 +5252,22 @@ static void gfx_v7_0_get_cu_info(struct amdgpu_device *adev) int i, j, k, counter, active_cu_number = 0; u32 mask, bitmap, ao_bitmap, ao_cu_mask = 0; struct amdgpu_cu_info *cu_info = &adev->gfx.cu_info; + unsigned disable_masks[4 * 2]; memset(cu_info, 0, sizeof(*cu_info)); + amdgpu_gfx_parse_disable_cu(disable_masks, 4, 2); + mutex_lock(&adev->grbm_idx_mutex); for (i = 0; i < adev->gfx.config.max_shader_engines; i++) { for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) { mask = 1; ao_bitmap = 0; counter = 0; - gfx_v7_0_select_se_sh(adev, i, j); + gfx_v7_0_select_se_sh(adev, i, j, 0xffffffff); + if (i < 4 && j < 2) + gfx_v7_0_set_user_cu_inactive_bitmap( + adev, disable_masks[i * 2 + j]); bitmap = gfx_v7_0_get_cu_active_bitmap(adev); cu_info->bitmap[i][j] = bitmap; @@ -5040,7 +5283,7 @@ static void gfx_v7_0_get_cu_info(struct amdgpu_device *adev) ao_cu_mask |= (ao_bitmap << (i * 16 + j * 8)); } } - gfx_v7_0_select_se_sh(adev, 0xffffffff, 0xffffffff); + gfx_v7_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff); mutex_unlock(&adev->grbm_idx_mutex); cu_info->number = active_cu_number; |