diff options
Diffstat (limited to 'drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.c')
| -rw-r--r-- | drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.c | 107 | 
1 files changed, 82 insertions, 25 deletions
| diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.c index a41272fbcba2..2badbc0355f2 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.c @@ -56,19 +56,23 @@ static void amdgpu_vcn_idle_work_handler(struct work_struct *work);  int amdgpu_vcn_sw_init(struct amdgpu_device *adev)  { -	unsigned long bo_size; +	unsigned long bo_size, fw_shared_bo_size;  	const char *fw_name;  	const struct common_firmware_header *hdr;  	unsigned char fw_check;  	int i, r;  	INIT_DELAYED_WORK(&adev->vcn.idle_work, amdgpu_vcn_idle_work_handler); +	mutex_init(&adev->vcn.vcn_pg_lock); +	atomic_set(&adev->vcn.total_submission_cnt, 0); +	for (i = 0; i < adev->vcn.num_vcn_inst; i++) +		atomic_set(&adev->vcn.inst[i].dpg_enc_submission_cnt, 0);  	switch (adev->asic_type) {  	case CHIP_RAVEN: -		if (adev->rev_id >= 8) +		if (adev->apu_flags & AMD_APU_IS_RAVEN2)  			fw_name = FIRMWARE_RAVEN2; -		else if (adev->pdev->device == 0x15d8) +		else if (adev->apu_flags & AMD_APU_IS_PICASSO)  			fw_name = FIRMWARE_PICASSO;  		else  			fw_name = FIRMWARE_RAVEN; @@ -178,6 +182,17 @@ int amdgpu_vcn_sw_init(struct amdgpu_device *adev)  				return r;  			}  		} + +		r = amdgpu_bo_create_kernel(adev, AMDGPU_GPU_PAGE_ALIGN(sizeof(struct amdgpu_fw_shared)), +				PAGE_SIZE, AMDGPU_GEM_DOMAIN_VRAM, &adev->vcn.inst[i].fw_shared_bo, +				&adev->vcn.inst[i].fw_shared_gpu_addr, &adev->vcn.inst[i].fw_shared_cpu_addr); +		if (r) { +			dev_err(adev->dev, "VCN %d (%d) failed to allocate firmware shared bo\n", i, r); +			return r; +		} + +		fw_shared_bo_size = amdgpu_bo_size(adev->vcn.inst[i].fw_shared_bo); +		adev->vcn.inst[i].saved_shm_bo = kvmalloc(fw_shared_bo_size, GFP_KERNEL);  	}  	return 0; @@ -192,6 +207,12 @@ int amdgpu_vcn_sw_fini(struct amdgpu_device *adev)  	for (j = 0; j < adev->vcn.num_vcn_inst; ++j) {  		if (adev->vcn.harvest_config & (1 << j))  			continue; + +		kvfree(adev->vcn.inst[j].saved_shm_bo); +		amdgpu_bo_free_kernel(&adev->vcn.inst[j].fw_shared_bo, +					  &adev->vcn.inst[j].fw_shared_gpu_addr, +					  (void **)&adev->vcn.inst[j].fw_shared_cpu_addr); +  		if (adev->vcn.indirect_sram) {  			amdgpu_bo_free_kernel(&adev->vcn.inst[j].dpg_sram_bo,  						  &adev->vcn.inst[j].dpg_sram_gpu_addr, @@ -210,6 +231,7 @@ int amdgpu_vcn_sw_fini(struct amdgpu_device *adev)  	}  	release_firmware(adev->vcn.fw); +	mutex_destroy(&adev->vcn.vcn_pg_lock);  	return 0;  } @@ -236,6 +258,17 @@ int amdgpu_vcn_suspend(struct amdgpu_device *adev)  			return -ENOMEM;  		memcpy_fromio(adev->vcn.inst[i].saved_bo, ptr, size); + +		if (adev->vcn.inst[i].fw_shared_bo == NULL) +			return 0; + +		if (!adev->vcn.inst[i].saved_shm_bo) +			return -ENOMEM; + +		size = amdgpu_bo_size(adev->vcn.inst[i].fw_shared_bo); +		ptr = adev->vcn.inst[i].fw_shared_cpu_addr; + +		memcpy_fromio(adev->vcn.inst[i].saved_shm_bo, ptr, size);  	}  	return 0;  } @@ -273,6 +306,17 @@ int amdgpu_vcn_resume(struct amdgpu_device *adev)  			}  			memset_io(ptr, 0, size);  		} + +		if (adev->vcn.inst[i].fw_shared_bo == NULL) +			return -EINVAL; + +		size = amdgpu_bo_size(adev->vcn.inst[i].fw_shared_bo); +		ptr = adev->vcn.inst[i].fw_shared_cpu_addr; + +		if (adev->vcn.inst[i].saved_shm_bo != NULL) +			memcpy_toio(ptr, adev->vcn.inst[i].saved_shm_bo, size); +		else +			memset_io(ptr, 0, size);  	}  	return 0;  } @@ -295,7 +339,8 @@ static void amdgpu_vcn_idle_work_handler(struct work_struct *work)  		if (adev->pg_flags & AMD_PG_SUPPORT_VCN_DPG)	{  			struct dpg_pause_state new_state; -			if (fence[j]) +			if (fence[j] || +				unlikely(atomic_read(&adev->vcn.inst[j].dpg_enc_submission_cnt)))  				new_state.fw_based = VCN_DPG_STATE__PAUSE;  			else  				new_state.fw_based = VCN_DPG_STATE__UNPAUSE; @@ -307,8 +352,7 @@ static void amdgpu_vcn_idle_work_handler(struct work_struct *work)  		fences += fence[j];  	} -	if (fences == 0) { -		amdgpu_gfx_off_ctrl(adev, true); +	if (!fences && !atomic_read(&adev->vcn.total_submission_cnt)) {  		amdgpu_device_ip_set_powergating_state(adev, AMD_IP_BLOCK_TYPE_VCN,  		       AMD_PG_STATE_GATE);  	} else { @@ -319,36 +363,46 @@ static void amdgpu_vcn_idle_work_handler(struct work_struct *work)  void amdgpu_vcn_ring_begin_use(struct amdgpu_ring *ring)  {  	struct amdgpu_device *adev = ring->adev; -	bool set_clocks = !cancel_delayed_work_sync(&adev->vcn.idle_work); -	if (set_clocks) { -		amdgpu_gfx_off_ctrl(adev, false); -		amdgpu_device_ip_set_powergating_state(adev, AMD_IP_BLOCK_TYPE_VCN, -		       AMD_PG_STATE_UNGATE); -	} +	atomic_inc(&adev->vcn.total_submission_cnt); +	cancel_delayed_work_sync(&adev->vcn.idle_work); + +	mutex_lock(&adev->vcn.vcn_pg_lock); +	amdgpu_device_ip_set_powergating_state(adev, AMD_IP_BLOCK_TYPE_VCN, +	       AMD_PG_STATE_UNGATE);  	if (adev->pg_flags & AMD_PG_SUPPORT_VCN_DPG)	{  		struct dpg_pause_state new_state; -		unsigned int fences = 0; -		unsigned int i; -		for (i = 0; i < adev->vcn.num_enc_rings; ++i) { -			fences += amdgpu_fence_count_emitted(&adev->vcn.inst[ring->me].ring_enc[i]); -		} -		if (fences) +		if (ring->funcs->type == AMDGPU_RING_TYPE_VCN_ENC) { +			atomic_inc(&adev->vcn.inst[ring->me].dpg_enc_submission_cnt);  			new_state.fw_based = VCN_DPG_STATE__PAUSE; -		else -			new_state.fw_based = VCN_DPG_STATE__UNPAUSE; +		} else { +			unsigned int fences = 0; +			unsigned int i; -		if (ring->funcs->type == AMDGPU_RING_TYPE_VCN_ENC) -			new_state.fw_based = VCN_DPG_STATE__PAUSE; +			for (i = 0; i < adev->vcn.num_enc_rings; ++i) +				fences += amdgpu_fence_count_emitted(&adev->vcn.inst[ring->me].ring_enc[i]); + +			if (fences || atomic_read(&adev->vcn.inst[ring->me].dpg_enc_submission_cnt)) +				new_state.fw_based = VCN_DPG_STATE__PAUSE; +			else +				new_state.fw_based = VCN_DPG_STATE__UNPAUSE; +		}  		adev->vcn.pause_dpg_mode(adev, ring->me, &new_state);  	} +	mutex_unlock(&adev->vcn.vcn_pg_lock);  }  void amdgpu_vcn_ring_end_use(struct amdgpu_ring *ring)  { +	if (ring->adev->pg_flags & AMD_PG_SUPPORT_VCN_DPG && +		ring->funcs->type == AMDGPU_RING_TYPE_VCN_ENC) +		atomic_dec(&ring->adev->vcn.inst[ring->me].dpg_enc_submission_cnt); + +	atomic_dec(&ring->adev->vcn.total_submission_cnt); +  	schedule_delayed_work(&ring->adev->vcn.idle_work, VCN_IDLE_TIMEOUT);  } @@ -390,7 +444,8 @@ static int amdgpu_vcn_dec_send_msg(struct amdgpu_ring *ring,  	uint64_t addr;  	int i, r; -	r = amdgpu_job_alloc_with_ib(adev, 64, &job); +	r = amdgpu_job_alloc_with_ib(adev, 64, +					AMDGPU_IB_POOL_DIRECT, &job);  	if (r)  		goto err; @@ -557,7 +612,8 @@ static int amdgpu_vcn_enc_get_create_msg(struct amdgpu_ring *ring, uint32_t hand  	uint64_t addr;  	int i, r; -	r = amdgpu_job_alloc_with_ib(ring->adev, ib_size_dw * 4, &job); +	r = amdgpu_job_alloc_with_ib(ring->adev, ib_size_dw * 4, +					AMDGPU_IB_POOL_DIRECT, &job);  	if (r)  		return r; @@ -610,7 +666,8 @@ static int amdgpu_vcn_enc_get_destroy_msg(struct amdgpu_ring *ring, uint32_t han  	uint64_t addr;  	int i, r; -	r = amdgpu_job_alloc_with_ib(ring->adev, ib_size_dw * 4, &job); +	r = amdgpu_job_alloc_with_ib(ring->adev, ib_size_dw * 4, +					AMDGPU_IB_POOL_DIRECT, &job);  	if (r)  		return r; |