diff options
Diffstat (limited to 'drivers/gpu/drm/amd/amdgpu/amdgpu_device.c')
| -rw-r--r-- | drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 272 | 
1 files changed, 183 insertions, 89 deletions
| diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c index c17505fba988..39cd545976b7 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c @@ -66,6 +66,7 @@  #include "amdgpu_pmu.h"  #include <linux/suspend.h> +#include <drm/task_barrier.h>  MODULE_FIRMWARE("amdgpu/vega10_gpu_info.bin");  MODULE_FIRMWARE("amdgpu/vega12_gpu_info.bin"); @@ -137,14 +138,14 @@ static DEVICE_ATTR(pcie_replay_count, S_IRUGO,  static void amdgpu_device_get_pcie_info(struct amdgpu_device *adev);  /** - * amdgpu_device_is_px - Is the device is a dGPU with HG/PX power control + * amdgpu_device_supports_boco - Is the device a dGPU with HG/PX power control   *   * @dev: drm_device pointer   *   * Returns true if the device is a dGPU with HG/PX power control,   * otherwise return false.   */ -bool amdgpu_device_is_px(struct drm_device *dev) +bool amdgpu_device_supports_boco(struct drm_device *dev)  {  	struct amdgpu_device *adev = dev->dev_private; @@ -154,6 +155,21 @@ bool amdgpu_device_is_px(struct drm_device *dev)  }  /** + * amdgpu_device_supports_baco - Does the device support BACO + * + * @dev: drm_device pointer + * + * Returns true if the device supporte BACO, + * otherwise return false. + */ +bool amdgpu_device_supports_baco(struct drm_device *dev) +{ +	struct amdgpu_device *adev = dev->dev_private; + +	return amdgpu_asic_supports_baco(adev); +} + +/**   * VRAM access helper functions.   *   * amdgpu_device_vram_access - read/write a buffer in vram @@ -200,8 +216,8 @@ uint32_t amdgpu_mm_rreg(struct amdgpu_device *adev, uint32_t reg,  {  	uint32_t ret; -	if (!(acc_flags & AMDGPU_REGS_NO_KIQ) && amdgpu_sriov_runtime(adev)) -		return amdgpu_virt_kiq_rreg(adev, reg); +	if ((acc_flags & AMDGPU_REGS_KIQ) || (!(acc_flags & AMDGPU_REGS_NO_KIQ) && amdgpu_sriov_runtime(adev))) +		return amdgpu_kiq_rreg(adev, reg);  	if ((reg * 4) < adev->rmmio_size && !(acc_flags & AMDGPU_REGS_IDX))  		ret = readl(((void __iomem *)adev->rmmio) + (reg * 4)); @@ -278,8 +294,8 @@ void amdgpu_mm_wreg(struct amdgpu_device *adev, uint32_t reg, uint32_t v,  		adev->last_mm_index = v;  	} -	if (!(acc_flags & AMDGPU_REGS_NO_KIQ) && amdgpu_sriov_runtime(adev)) -		return amdgpu_virt_kiq_wreg(adev, reg, v); +	if ((acc_flags & AMDGPU_REGS_KIQ) || (!(acc_flags & AMDGPU_REGS_NO_KIQ) && amdgpu_sriov_runtime(adev))) +		return amdgpu_kiq_wreg(adev, reg, v);  	if ((reg * 4) < adev->rmmio_size && !(acc_flags & AMDGPU_REGS_IDX))  		writel(v, ((void __iomem *)adev->rmmio) + (reg * 4)); @@ -969,7 +985,7 @@ static void amdgpu_device_check_vm_size(struct amdgpu_device *adev)  static void amdgpu_device_check_smu_prv_buffer_size(struct amdgpu_device *adev)  {  	struct sysinfo si; -	bool is_os_64 = (sizeof(void *) == 8) ? true : false; +	bool is_os_64 = (sizeof(void *) == 8);  	uint64_t total_memory;  	uint64_t dram_size_seven_GB = 0x1B8000000;  	uint64_t dram_size_three_GB = 0xB8000000; @@ -1016,8 +1032,6 @@ def_value:   */  static int amdgpu_device_check_arguments(struct amdgpu_device *adev)  { -	int ret = 0; -  	if (amdgpu_sched_jobs < 4) {  		dev_warn(adev->dev, "sched jobs (%d) must be at least 4\n",  			 amdgpu_sched_jobs); @@ -1057,7 +1071,7 @@ static int amdgpu_device_check_arguments(struct amdgpu_device *adev)  	adev->firmware.load_type = amdgpu_ucode_get_load_type(adev, amdgpu_fw_load_type); -	return ret; +	return 0;  }  /** @@ -1072,8 +1086,9 @@ static int amdgpu_device_check_arguments(struct amdgpu_device *adev)  static void amdgpu_switcheroo_set_state(struct pci_dev *pdev, enum vga_switcheroo_state state)  {  	struct drm_device *dev = pci_get_drvdata(pdev); +	int r; -	if (amdgpu_device_is_px(dev) && state == VGA_SWITCHEROO_OFF) +	if (amdgpu_device_supports_boco(dev) && state == VGA_SWITCHEROO_OFF)  		return;  	if (state == VGA_SWITCHEROO_ON) { @@ -1081,7 +1096,12 @@ static void amdgpu_switcheroo_set_state(struct pci_dev *pdev, enum vga_switchero  		/* don't suspend or resume card normally */  		dev->switch_power_state = DRM_SWITCH_POWER_CHANGING; -		amdgpu_device_resume(dev, true, true); +		pci_set_power_state(dev->pdev, PCI_D0); +		pci_restore_state(dev->pdev); +		r = pci_enable_device(dev->pdev); +		if (r) +			DRM_WARN("pci_enable_device failed (%d)\n", r); +		amdgpu_device_resume(dev, true);  		dev->switch_power_state = DRM_SWITCH_POWER_ON;  		drm_kms_helper_poll_enable(dev); @@ -1089,7 +1109,11 @@ static void amdgpu_switcheroo_set_state(struct pci_dev *pdev, enum vga_switchero  		pr_info("amdgpu: switched off\n");  		drm_kms_helper_poll_disable(dev);  		dev->switch_power_state = DRM_SWITCH_POWER_CHANGING; -		amdgpu_device_suspend(dev, true, true); +		amdgpu_device_suspend(dev, true); +		pci_save_state(dev->pdev); +		/* Shut down the device */ +		pci_disable_device(dev->pdev); +		pci_set_power_state(dev->pdev, PCI_D3cold);  		dev->switch_power_state = DRM_SWITCH_POWER_OFF;  	}  } @@ -1527,7 +1551,6 @@ static int amdgpu_device_parse_gpu_info_fw(struct amdgpu_device *adev)  		}  parse_soc_bounding_box: -#ifdef CONFIG_DRM_AMD_DC_DCN2_0  		/*  		 * soc bounding box info is not integrated in disocovery table,  		 * we always need to parse it from gpu info firmware. @@ -1538,7 +1561,6 @@ parse_soc_bounding_box:  									le32_to_cpu(hdr->header.ucode_array_offset_bytes));  			adev->dm.soc_bounding_box = &gpu_info_fw->soc_bounding_box;  		} -#endif  		break;  	}  	default: @@ -1787,7 +1809,8 @@ static int amdgpu_device_fw_loading(struct amdgpu_device *adev)  		}  	} -	r = amdgpu_pm_load_smu_firmware(adev, &smu_version); +	if (!amdgpu_sriov_vf(adev) || adev->asic_type == CHIP_TONGA) +		r = amdgpu_pm_load_smu_firmware(adev, &smu_version);  	return r;  } @@ -1854,6 +1877,9 @@ static int amdgpu_device_ip_init(struct amdgpu_device *adev)  		}  	} +	if (amdgpu_sriov_vf(adev)) +		amdgpu_virt_init_data_exchange(adev); +  	r = amdgpu_ib_pool_init(adev);  	if (r) {  		dev_err(adev->dev, "IB initialization failed (%d).\n", r); @@ -1895,11 +1921,8 @@ static int amdgpu_device_ip_init(struct amdgpu_device *adev)  	amdgpu_amdkfd_device_init(adev);  init_failed: -	if (amdgpu_sriov_vf(adev)) { -		if (!r) -			amdgpu_virt_init_data_exchange(adev); +	if (amdgpu_sriov_vf(adev))  		amdgpu_virt_release_full_gpu(adev, true); -	}  	return r;  } @@ -1938,6 +1961,7 @@ static bool amdgpu_device_check_vram_lost(struct amdgpu_device *adev)   * amdgpu_device_set_cg_state - set clockgating for amdgpu device   *   * @adev: amdgpu_device pointer + * @state: clockgating state (gate or ungate)   *   * The list of all the hardware IPs that make up the asic is walked and the   * set_clockgating_state callbacks are run. @@ -1962,6 +1986,7 @@ static int amdgpu_device_set_cg_state(struct amdgpu_device *adev,  		if (adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_UVD &&  		    adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_VCE &&  		    adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_VCN && +		    adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_JPEG &&  		    adev->ip_blocks[i].version->funcs->set_clockgating_state) {  			/* enable clockgating to save power */  			r = adev->ip_blocks[i].version->funcs->set_clockgating_state((void *)adev, @@ -1992,6 +2017,7 @@ static int amdgpu_device_set_pg_state(struct amdgpu_device *adev, enum amd_power  		if (adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_UVD &&  		    adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_VCE &&  		    adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_VCN && +		    adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_JPEG &&  		    adev->ip_blocks[i].version->funcs->set_powergating_state) {  			/* enable powergating to save power */  			r = adev->ip_blocks[i].version->funcs->set_powergating_state((void *)adev, @@ -2319,14 +2345,7 @@ static int amdgpu_device_ip_suspend_phase2(struct amdgpu_device *adev)  		adev->ip_blocks[i].status.hw = false;  		/* handle putting the SMC in the appropriate state */  		if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_SMC) { -			if (is_support_sw_smu(adev)) { -				r = smu_set_mp1_state(&adev->smu, adev->mp1_state); -			} else if (adev->powerplay.pp_funcs && -					   adev->powerplay.pp_funcs->set_mp1_state) { -				r = adev->powerplay.pp_funcs->set_mp1_state( -					adev->powerplay.pp_handle, -					adev->mp1_state); -			} +			r = amdgpu_dpm_set_mp1_state(adev, adev->mp1_state);  			if (r) {  				DRM_ERROR("SMC failed to set mp1 state %d, %d\n",  					  adev->mp1_state, r); @@ -2413,7 +2432,8 @@ static int amdgpu_device_ip_reinit_late_sriov(struct amdgpu_device *adev)  		AMD_IP_BLOCK_TYPE_GFX,  		AMD_IP_BLOCK_TYPE_SDMA,  		AMD_IP_BLOCK_TYPE_UVD, -		AMD_IP_BLOCK_TYPE_VCE +		AMD_IP_BLOCK_TYPE_VCE, +		AMD_IP_BLOCK_TYPE_VCN  	};  	for (i = 0; i < ARRAY_SIZE(ip_order); i++) { @@ -2428,7 +2448,11 @@ static int amdgpu_device_ip_reinit_late_sriov(struct amdgpu_device *adev)  				block->status.hw)  				continue; -			r = block->version->funcs->hw_init(adev); +			if (block->version->type == AMD_IP_BLOCK_TYPE_SMC) +				r = block->version->funcs->resume(adev); +			else +				r = block->version->funcs->hw_init(adev); +  			DRM_INFO("RE-INIT-late: %s %s\n", block->version->funcs->name, r?"failed":"succeeded");  			if (r)  				return r; @@ -2600,20 +2624,19 @@ bool amdgpu_device_asic_has_dc_support(enum amd_asic_type asic_type)  	case CHIP_VEGA10:  	case CHIP_VEGA12:  	case CHIP_VEGA20: -#if defined(CONFIG_DRM_AMD_DC_DCN1_0) +#if defined(CONFIG_DRM_AMD_DC_DCN)  	case CHIP_RAVEN: -#endif -#if defined(CONFIG_DRM_AMD_DC_DCN2_0)  	case CHIP_NAVI10:  	case CHIP_NAVI14:  	case CHIP_NAVI12: -#endif -#if defined(CONFIG_DRM_AMD_DC_DCN2_1)  	case CHIP_RENOIR:  #endif  		return amdgpu_dc != 0;  #endif  	default: +		if (amdgpu_dc > 0) +			DRM_INFO("Display Core has been requested via kernel parameter " +					 "but isn't supported by ASIC, ignoring\n");  		return false;  	}  } @@ -2638,8 +2661,38 @@ static void amdgpu_device_xgmi_reset_func(struct work_struct *__work)  {  	struct amdgpu_device *adev =  		container_of(__work, struct amdgpu_device, xgmi_reset_work); +	struct amdgpu_hive_info *hive = amdgpu_get_xgmi_hive(adev, 0); + +	/* It's a bug to not have a hive within this function */ +	if (WARN_ON(!hive)) +		return; + +	/* +	 * Use task barrier to synchronize all xgmi reset works across the +	 * hive. task_barrier_enter and task_barrier_exit will block +	 * until all the threads running the xgmi reset works reach +	 * those points. task_barrier_full will do both blocks. +	 */ +	if (amdgpu_asic_reset_method(adev) == AMD_RESET_METHOD_BACO) { + +		task_barrier_enter(&hive->tb); +		adev->asic_reset_res = amdgpu_device_baco_enter(adev->ddev); + +		if (adev->asic_reset_res) +			goto fail; + +		task_barrier_exit(&hive->tb); +		adev->asic_reset_res = amdgpu_device_baco_exit(adev->ddev); + +		if (adev->asic_reset_res) +			goto fail; +	} else { + +		task_barrier_full(&hive->tb); +		adev->asic_reset_res =  amdgpu_asic_reset(adev); +	} -	adev->asic_reset_res =  amdgpu_asic_reset(adev); +fail:  	if (adev->asic_reset_res)  		DRM_WARN("ASIC reset failed with error, %d for drm dev, %s",  			 adev->asic_reset_res, adev->ddev->unique); @@ -2731,7 +2784,7 @@ int amdgpu_device_init(struct amdgpu_device *adev,  		       uint32_t flags)  {  	int r, i; -	bool runtime = false; +	bool boco = false;  	u32 max_MBps;  	adev->shutdown = false; @@ -2754,7 +2807,7 @@ int amdgpu_device_init(struct amdgpu_device *adev,  	adev->mman.buffer_funcs = NULL;  	adev->mman.buffer_funcs_ring = NULL;  	adev->vm_manager.vm_pte_funcs = NULL; -	adev->vm_manager.vm_pte_num_rqs = 0; +	adev->vm_manager.vm_pte_num_scheds = 0;  	adev->gmc.gmc_funcs = NULL;  	adev->fence_context = dma_fence_context_alloc(AMDGPU_MAX_RINGS);  	bitmap_zero(adev->gfx.pipe_reserve_bitmap, AMDGPU_MAX_COMPUTE_QUEUES); @@ -2794,9 +2847,8 @@ int amdgpu_device_init(struct amdgpu_device *adev,  	mutex_init(&adev->virt.vf_errors.lock);  	hash_init(adev->mn_hash);  	mutex_init(&adev->lock_reset); -	mutex_init(&adev->notifier_lock); -	mutex_init(&adev->virt.dpm_mutex);  	mutex_init(&adev->psp.mutex); +	mutex_init(&adev->notifier_lock);  	r = amdgpu_device_check_arguments(adev);  	if (r) @@ -2902,12 +2954,15 @@ int amdgpu_device_init(struct amdgpu_device *adev,  	 * ignore it */  	vga_client_register(adev->pdev, adev, NULL, amdgpu_device_vga_set_decode); -	if (amdgpu_device_is_px(ddev)) -		runtime = true; -	if (!pci_is_thunderbolt_attached(adev->pdev)) +	if (amdgpu_device_supports_boco(ddev)) +		boco = true; +	if (amdgpu_has_atpx() && +	    (amdgpu_is_atpx_hybrid() || +	     amdgpu_has_atpx_dgpu_power_cntl()) && +	    !pci_is_thunderbolt_attached(adev->pdev))  		vga_switcheroo_register_client(adev->pdev, -					       &amdgpu_switcheroo_ops, runtime); -	if (runtime) +					       &amdgpu_switcheroo_ops, boco); +	if (boco)  		vga_switcheroo_init_domain_pm_ops(adev->dev, &adev->vga_pm_domain);  	if (amdgpu_emu_mode == 1) { @@ -2994,11 +3049,17 @@ fence_driver_init:  		}  		dev_err(adev->dev, "amdgpu_device_ip_init failed\n");  		amdgpu_vf_error_put(adev, AMDGIM_ERROR_VF_AMDGPU_INIT_FAIL, 0, 0); -		if (amdgpu_virt_request_full_gpu(adev, false)) -			amdgpu_virt_release_full_gpu(adev, false);  		goto failed;  	} +	DRM_DEBUG("SE %d, SH per SE %d, CU per SH %d, active_cu_number %d\n", +			adev->gfx.config.max_shader_engines, +			adev->gfx.config.max_sh_per_se, +			adev->gfx.config.max_cu_per_sh, +			adev->gfx.cu_info.number); + +	amdgpu_ctx_init_sched(adev); +  	adev->accel_working = true;  	amdgpu_vm_check_compute_bug(adev); @@ -3013,16 +3074,19 @@ fence_driver_init:  	amdgpu_fbdev_init(adev); -	if (amdgpu_sriov_vf(adev) && amdgim_is_hwperf(adev)) -		amdgpu_pm_virt_sysfs_init(adev); -  	r = amdgpu_pm_sysfs_init(adev); -	if (r) +	if (r) { +		adev->pm_sysfs_en = false;  		DRM_ERROR("registering pm debugfs failed (%d).\n", r); +	} else +		adev->pm_sysfs_en = true;  	r = amdgpu_ucode_sysfs_init(adev); -	if (r) +	if (r) { +		adev->ucode_sysfs_en = false;  		DRM_ERROR("Creating firmware sysfs failed (%d).\n", r); +	} else +		adev->ucode_sysfs_en = true;  	r = amdgpu_debugfs_gem_init(adev);  	if (r) @@ -3091,7 +3155,7 @@ fence_driver_init:  failed:  	amdgpu_vf_error_trans_all(adev); -	if (runtime) +	if (boco)  		vga_switcheroo_fini_domain_pm_ops(adev->dev);  	return r; @@ -3122,7 +3186,8 @@ void amdgpu_device_fini(struct amdgpu_device *adev)  			drm_atomic_helper_shutdown(adev->ddev);  	}  	amdgpu_fence_driver_fini(adev); -	amdgpu_pm_sysfs_fini(adev); +	if (adev->pm_sysfs_en) +		amdgpu_pm_sysfs_fini(adev);  	amdgpu_fbdev_fini(adev);  	r = amdgpu_device_ip_fini(adev);  	if (adev->firmware.gpu_info_fw) { @@ -3139,9 +3204,12 @@ void amdgpu_device_fini(struct amdgpu_device *adev)  	kfree(adev->bios);  	adev->bios = NULL; -	if (!pci_is_thunderbolt_attached(adev->pdev)) +	if (amdgpu_has_atpx() && +	    (amdgpu_is_atpx_hybrid() || +	     amdgpu_has_atpx_dgpu_power_cntl()) && +	    !pci_is_thunderbolt_attached(adev->pdev))  		vga_switcheroo_unregister_client(adev->pdev); -	if (adev->flags & AMD_IS_PX) +	if (amdgpu_device_supports_boco(adev->ddev))  		vga_switcheroo_fini_domain_pm_ops(adev->dev);  	vga_client_register(adev->pdev, NULL, NULL, NULL);  	if (adev->rio_mem) @@ -3150,12 +3218,11 @@ void amdgpu_device_fini(struct amdgpu_device *adev)  	iounmap(adev->rmmio);  	adev->rmmio = NULL;  	amdgpu_device_doorbell_fini(adev); -	if (amdgpu_sriov_vf(adev) && amdgim_is_hwperf(adev)) -		amdgpu_pm_virt_sysfs_fini(adev);  	amdgpu_debugfs_regs_cleanup(adev);  	device_remove_file(adev->dev, &dev_attr_pcie_replay_count); -	amdgpu_ucode_sysfs_fini(adev); +	if (adev->ucode_sysfs_en) +		amdgpu_ucode_sysfs_fini(adev);  	if (IS_ENABLED(CONFIG_PERF_EVENTS))  		amdgpu_pmu_fini(adev);  	amdgpu_debugfs_preempt_cleanup(adev); @@ -3178,7 +3245,7 @@ void amdgpu_device_fini(struct amdgpu_device *adev)   * Returns 0 for success or an error on failure.   * Called at driver suspend.   */ -int amdgpu_device_suspend(struct drm_device *dev, bool suspend, bool fbcon) +int amdgpu_device_suspend(struct drm_device *dev, bool fbcon)  {  	struct amdgpu_device *adev;  	struct drm_crtc *crtc; @@ -3261,13 +3328,6 @@ int amdgpu_device_suspend(struct drm_device *dev, bool suspend, bool fbcon)  	 */  	amdgpu_bo_evict_vram(adev); -	if (suspend) { -		pci_save_state(dev->pdev); -		/* Shut down the device */ -		pci_disable_device(dev->pdev); -		pci_set_power_state(dev->pdev, PCI_D3hot); -	} -  	return 0;  } @@ -3282,7 +3342,7 @@ int amdgpu_device_suspend(struct drm_device *dev, bool suspend, bool fbcon)   * Returns 0 for success or an error on failure.   * Called at driver resume.   */ -int amdgpu_device_resume(struct drm_device *dev, bool resume, bool fbcon) +int amdgpu_device_resume(struct drm_device *dev, bool fbcon)  {  	struct drm_connector *connector;  	struct drm_connector_list_iter iter; @@ -3293,14 +3353,6 @@ int amdgpu_device_resume(struct drm_device *dev, bool resume, bool fbcon)  	if (dev->switch_power_state == DRM_SWITCH_POWER_OFF)  		return 0; -	if (resume) { -		pci_set_power_state(dev->pdev, PCI_D0); -		pci_restore_state(dev->pdev); -		r = pci_enable_device(dev->pdev); -		if (r) -			return r; -	} -  	/* post card */  	if (amdgpu_device_need_post(adev)) {  		r = amdgpu_atom_asic_init(adev->mode_info.atom_context); @@ -3639,13 +3691,12 @@ static int amdgpu_device_reset_sriov(struct amdgpu_device *adev,  	if (r)  		return r; -	amdgpu_amdkfd_pre_reset(adev); -  	/* Resume IP prior to SMC */  	r = amdgpu_device_ip_reinit_early_sriov(adev);  	if (r)  		goto error; +	amdgpu_virt_init_data_exchange(adev);  	/* we need recover gart prior to run SMC/CP/SDMA resume */  	amdgpu_gtt_mgr_recover(&adev->mman.bdev.man[TTM_PL_TT]); @@ -3663,7 +3714,6 @@ static int amdgpu_device_reset_sriov(struct amdgpu_device *adev,  	amdgpu_amdkfd_post_reset(adev);  error: -	amdgpu_virt_init_data_exchange(adev);  	amdgpu_virt_release_full_gpu(adev, true);  	if (!r && adev->virt.gim_feature & AMDGIM_FEATURE_GIM_FLR_VRAMLOST) {  		amdgpu_inc_vram_lost(adev); @@ -3709,6 +3759,11 @@ bool amdgpu_device_should_recover_gpu(struct amdgpu_device *adev)  		case CHIP_VEGA10:  		case CHIP_VEGA12:  		case CHIP_RAVEN: +		case CHIP_ARCTURUS: +		case CHIP_RENOIR: +		case CHIP_NAVI10: +		case CHIP_NAVI14: +		case CHIP_NAVI12:  			break;  		default:  			goto disabled; @@ -3785,7 +3840,7 @@ static int amdgpu_do_asic_reset(struct amdgpu_hive_info *hive,  		list_for_each_entry(tmp_adev, device_list_handle, gmc.xgmi.head) {  			/* For XGMI run all resets in parallel to speed up the process */  			if (tmp_adev->gmc.xgmi.num_physical_nodes > 1) { -				if (!queue_work(system_highpri_wq, &tmp_adev->xgmi_reset_work)) +				if (!queue_work(system_unbound_wq, &tmp_adev->xgmi_reset_work))  					r = -EALREADY;  			} else  				r = amdgpu_asic_reset(tmp_adev); @@ -3797,7 +3852,7 @@ static int amdgpu_do_asic_reset(struct amdgpu_hive_info *hive,  			}  		} -		/* For XGMI wait for all PSP resets to complete before proceed */ +		/* For XGMI wait for all resets to complete before proceed */  		if (!r) {  			list_for_each_entry(tmp_adev, device_list_handle,  					    gmc.xgmi.head) { @@ -3811,6 +3866,8 @@ static int amdgpu_do_asic_reset(struct amdgpu_hive_info *hive,  		}  	} +	if (!r && amdgpu_ras_intr_triggered()) +		amdgpu_ras_intr_cleared();  	list_for_each_entry(tmp_adev, device_list_handle, gmc.xgmi.head) {  		if (need_full_reset) { @@ -3899,7 +3956,7 @@ static bool amdgpu_device_lock_adev(struct amdgpu_device *adev, bool trylock)  		mutex_lock(&adev->lock_reset);  	atomic_inc(&adev->gpu_reset_counter); -	adev->in_gpu_reset = 1; +	adev->in_gpu_reset = true;  	switch (amdgpu_asic_reset_method(adev)) {  	case AMD_RESET_METHOD_MODE1:  		adev->mp1_state = PP_MP1_STATE_SHUTDOWN; @@ -3919,7 +3976,7 @@ static void amdgpu_device_unlock_adev(struct amdgpu_device *adev)  {  	amdgpu_vf_error_trans_all(adev);  	adev->mp1_state = PP_MP1_STATE_NONE; -	adev->in_gpu_reset = 0; +	adev->in_gpu_reset = false;  	mutex_unlock(&adev->lock_reset);  } @@ -3943,12 +4000,15 @@ int amdgpu_device_gpu_recover(struct amdgpu_device *adev,  	struct amdgpu_device *tmp_adev = NULL;  	int i, r = 0;  	bool in_ras_intr = amdgpu_ras_intr_triggered(); +	bool use_baco = +		(amdgpu_asic_reset_method(adev) == AMD_RESET_METHOD_BACO) ? +		true : false;  	/*  	 * Flush RAM to disk so that after reboot  	 * the user can read log and see why the system rebooted.  	 */ -	if (in_ras_intr && amdgpu_ras_get_context(adev)->reboot) { +	if (in_ras_intr && !use_baco && amdgpu_ras_get_context(adev)->reboot) {  		DRM_WARN("Emergency reboot."); @@ -3959,7 +4019,8 @@ int amdgpu_device_gpu_recover(struct amdgpu_device *adev,  	need_full_reset = job_signaled = false;  	INIT_LIST_HEAD(&device_list); -	dev_info(adev->dev, "GPU %s begin!\n", in_ras_intr ? "jobs stop":"reset"); +	dev_info(adev->dev, "GPU %s begin!\n", +		(in_ras_intr && !use_baco) ? "jobs stop":"reset");  	cancel_delayed_work_sync(&adev->delayed_init_work); @@ -4026,7 +4087,8 @@ int amdgpu_device_gpu_recover(struct amdgpu_device *adev,  		amdgpu_unregister_gpu_instance(tmp_adev);  		/* disable ras on ALL IPs */ -		if (!in_ras_intr && amdgpu_device_ip_need_full_reset(tmp_adev)) +		if (!(in_ras_intr && !use_baco) && +		      amdgpu_device_ip_need_full_reset(tmp_adev))  			amdgpu_ras_suspend(tmp_adev);  		for (i = 0; i < AMDGPU_MAX_RINGS; ++i) { @@ -4037,13 +4099,13 @@ int amdgpu_device_gpu_recover(struct amdgpu_device *adev,  			drm_sched_stop(&ring->sched, job ? &job->base : NULL); -			if (in_ras_intr) +			if (in_ras_intr && !use_baco)  				amdgpu_job_stop_all_jobs_on_sched(&ring->sched);  		}  	} -	if (in_ras_intr) +	if (in_ras_intr && !use_baco)  		goto skip_sched_resume;  	/* @@ -4136,7 +4198,7 @@ skip_hw_reset:  skip_sched_resume:  	list_for_each_entry(tmp_adev, device_list_handle, gmc.xgmi.head) {  		/*unlock kfd: SRIOV would do it separately */ -		if (!in_ras_intr && !amdgpu_sriov_vf(tmp_adev)) +		if (!(in_ras_intr && !use_baco) && !amdgpu_sriov_vf(tmp_adev))  	                amdgpu_amdkfd_post_reset(tmp_adev);  		amdgpu_device_unlock_adev(tmp_adev);  	} @@ -4285,3 +4347,35 @@ static void amdgpu_device_get_pcie_info(struct amdgpu_device *adev)  	}  } +int amdgpu_device_baco_enter(struct drm_device *dev) +{ +	struct amdgpu_device *adev = dev->dev_private; +	struct amdgpu_ras *ras = amdgpu_ras_get_context(adev); + +	if (!amdgpu_device_supports_baco(adev->ddev)) +		return -ENOTSUPP; + +	if (ras && ras->supported) +		adev->nbio.funcs->enable_doorbell_interrupt(adev, false); + +	return amdgpu_dpm_baco_enter(adev); +} + +int amdgpu_device_baco_exit(struct drm_device *dev) +{ +	struct amdgpu_device *adev = dev->dev_private; +	struct amdgpu_ras *ras = amdgpu_ras_get_context(adev); +	int ret = 0; + +	if (!amdgpu_device_supports_baco(adev->ddev)) +		return -ENOTSUPP; + +	ret = amdgpu_dpm_baco_exit(adev); +	if (ret) +		return ret; + +	if (ras && ras->supported) +		adev->nbio.funcs->enable_doorbell_interrupt(adev, true); + +	return 0; +} |