diff options
Diffstat (limited to 'drivers/gpu/drm/amd/amdgpu/amdgpu_device.c')
| -rw-r--r-- | drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 745 | 
1 files changed, 380 insertions, 365 deletions
| diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c index 981a9cfb63b5..3f001a50b34a 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c @@ -159,76 +159,11 @@ static ssize_t amdgpu_device_get_pcie_replay_count(struct device *dev,  	return sysfs_emit(buf, "%llu\n", cnt);  } -static DEVICE_ATTR(pcie_replay_count, S_IRUGO, +static DEVICE_ATTR(pcie_replay_count, 0444,  		amdgpu_device_get_pcie_replay_count, NULL);  static void amdgpu_device_get_pcie_info(struct amdgpu_device *adev); -/** - * DOC: product_name - * - * The amdgpu driver provides a sysfs API for reporting the product name - * for the device - * The file product_name is used for this and returns the product name - * as returned from the FRU. - * NOTE: This is only available for certain server cards - */ - -static ssize_t amdgpu_device_get_product_name(struct device *dev, -		struct device_attribute *attr, char *buf) -{ -	struct drm_device *ddev = dev_get_drvdata(dev); -	struct amdgpu_device *adev = drm_to_adev(ddev); - -	return sysfs_emit(buf, "%s\n", adev->product_name); -} - -static DEVICE_ATTR(product_name, S_IRUGO, -		amdgpu_device_get_product_name, NULL); - -/** - * DOC: product_number - * - * The amdgpu driver provides a sysfs API for reporting the part number - * for the device - * The file product_number is used for this and returns the part number - * as returned from the FRU. - * NOTE: This is only available for certain server cards - */ - -static ssize_t amdgpu_device_get_product_number(struct device *dev, -		struct device_attribute *attr, char *buf) -{ -	struct drm_device *ddev = dev_get_drvdata(dev); -	struct amdgpu_device *adev = drm_to_adev(ddev); - -	return sysfs_emit(buf, "%s\n", adev->product_number); -} - -static DEVICE_ATTR(product_number, S_IRUGO, -		amdgpu_device_get_product_number, NULL); - -/** - * DOC: serial_number - * - * The amdgpu driver provides a sysfs API for reporting the serial number - * for the device - * The file serial_number is used for this and returns the serial number - * as returned from the FRU. - * NOTE: This is only available for certain server cards - */ - -static ssize_t amdgpu_device_get_serial_number(struct device *dev, -		struct device_attribute *attr, char *buf) -{ -	struct drm_device *ddev = dev_get_drvdata(dev); -	struct amdgpu_device *adev = drm_to_adev(ddev); - -	return sysfs_emit(buf, "%s\n", adev->serial); -} - -static DEVICE_ATTR(serial_number, S_IRUGO, -		amdgpu_device_get_serial_number, NULL);  /**   * amdgpu_device_supports_px - Is the device a dGPU with ATPX power control @@ -370,10 +305,16 @@ size_t amdgpu_device_aper_access(struct amdgpu_device *adev, loff_t pos,  		if (write) {  			memcpy_toio(addr, buf, count); +			/* Make sure HDP write cache flush happens without any reordering +			 * after the system memory contents are sent over PCIe device +			 */  			mb();  			amdgpu_device_flush_hdp(adev, NULL);  		} else {  			amdgpu_device_invalidate_hdp(adev, NULL); +			/* Make sure HDP read cache is invalidated before issuing a read +			 * to the PCIe device +			 */  			mb();  			memcpy_fromio(buf, addr, count);  		} @@ -481,8 +422,7 @@ uint32_t amdgpu_device_rreg(struct amdgpu_device *adev,  /*   * MMIO register read with bytes helper functions   * @offset:bytes offset from MMIO start - * -*/ + */  /**   * amdgpu_mm_rreg8 - read a memory mapped IO register @@ -506,8 +446,8 @@ uint8_t amdgpu_mm_rreg8(struct amdgpu_device *adev, uint32_t offset)   * MMIO register write with bytes helper functions   * @offset:bytes offset from MMIO start   * @value: the value want to be written to the register - * -*/ + */ +  /**   * amdgpu_mm_wreg8 - read a memory mapped IO register   * @@ -571,7 +511,8 @@ void amdgpu_device_wreg(struct amdgpu_device *adev,   * this function is invoked only for the debugfs register access   */  void amdgpu_mm_wreg_mmio_rlc(struct amdgpu_device *adev, -			     uint32_t reg, uint32_t v) +			     uint32_t reg, uint32_t v, +			     uint32_t xcc_id)  {  	if (amdgpu_device_skip_hw_access(adev))  		return; @@ -580,7 +521,7 @@ void amdgpu_mm_wreg_mmio_rlc(struct amdgpu_device *adev,  	    adev->gfx.rlc.funcs &&  	    adev->gfx.rlc.funcs->is_rlcg_access_range) {  		if (adev->gfx.rlc.funcs->is_rlcg_access_range(adev, reg)) -			return amdgpu_sriov_wreg(adev, reg, v, 0, 0); +			return amdgpu_sriov_wreg(adev, reg, v, 0, 0, xcc_id);  	} else if ((reg * 4) >= adev->rmmio_size) {  		adev->pcie_wreg(adev, reg * 4, v);  	} else { @@ -589,94 +530,6 @@ void amdgpu_mm_wreg_mmio_rlc(struct amdgpu_device *adev,  }  /** - * amdgpu_mm_rdoorbell - read a doorbell dword - * - * @adev: amdgpu_device pointer - * @index: doorbell index - * - * Returns the value in the doorbell aperture at the - * requested doorbell index (CIK). - */ -u32 amdgpu_mm_rdoorbell(struct amdgpu_device *adev, u32 index) -{ -	if (amdgpu_device_skip_hw_access(adev)) -		return 0; - -	if (index < adev->doorbell.num_kernel_doorbells) { -		return readl(adev->doorbell.ptr + index); -	} else { -		DRM_ERROR("reading beyond doorbell aperture: 0x%08x!\n", index); -		return 0; -	} -} - -/** - * amdgpu_mm_wdoorbell - write a doorbell dword - * - * @adev: amdgpu_device pointer - * @index: doorbell index - * @v: value to write - * - * Writes @v to the doorbell aperture at the - * requested doorbell index (CIK). - */ -void amdgpu_mm_wdoorbell(struct amdgpu_device *adev, u32 index, u32 v) -{ -	if (amdgpu_device_skip_hw_access(adev)) -		return; - -	if (index < adev->doorbell.num_kernel_doorbells) { -		writel(v, adev->doorbell.ptr + index); -	} else { -		DRM_ERROR("writing beyond doorbell aperture: 0x%08x!\n", index); -	} -} - -/** - * amdgpu_mm_rdoorbell64 - read a doorbell Qword - * - * @adev: amdgpu_device pointer - * @index: doorbell index - * - * Returns the value in the doorbell aperture at the - * requested doorbell index (VEGA10+). - */ -u64 amdgpu_mm_rdoorbell64(struct amdgpu_device *adev, u32 index) -{ -	if (amdgpu_device_skip_hw_access(adev)) -		return 0; - -	if (index < adev->doorbell.num_kernel_doorbells) { -		return atomic64_read((atomic64_t *)(adev->doorbell.ptr + index)); -	} else { -		DRM_ERROR("reading beyond doorbell aperture: 0x%08x!\n", index); -		return 0; -	} -} - -/** - * amdgpu_mm_wdoorbell64 - write a doorbell Qword - * - * @adev: amdgpu_device pointer - * @index: doorbell index - * @v: value to write - * - * Writes @v to the doorbell aperture at the - * requested doorbell index (VEGA10+). - */ -void amdgpu_mm_wdoorbell64(struct amdgpu_device *adev, u32 index, u64 v) -{ -	if (amdgpu_device_skip_hw_access(adev)) -		return; - -	if (index < adev->doorbell.num_kernel_doorbells) { -		atomic64_set((atomic64_t *)(adev->doorbell.ptr + index), v); -	} else { -		DRM_ERROR("writing beyond doorbell aperture: 0x%08x!\n", index); -	} -} - -/**   * amdgpu_device_indirect_rreg - read an indirect register   *   * @adev: amdgpu_device pointer @@ -707,6 +560,48 @@ u32 amdgpu_device_indirect_rreg(struct amdgpu_device *adev,  	return r;  } +u32 amdgpu_device_indirect_rreg_ext(struct amdgpu_device *adev, +				    u64 reg_addr) +{ +	unsigned long flags, pcie_index, pcie_index_hi, pcie_data; +	u32 r; +	void __iomem *pcie_index_offset; +	void __iomem *pcie_index_hi_offset; +	void __iomem *pcie_data_offset; + +	pcie_index = adev->nbio.funcs->get_pcie_index_offset(adev); +	pcie_data = adev->nbio.funcs->get_pcie_data_offset(adev); +	if (adev->nbio.funcs->get_pcie_index_hi_offset) +		pcie_index_hi = adev->nbio.funcs->get_pcie_index_hi_offset(adev); +	else +		pcie_index_hi = 0; + +	spin_lock_irqsave(&adev->pcie_idx_lock, flags); +	pcie_index_offset = (void __iomem *)adev->rmmio + pcie_index * 4; +	pcie_data_offset = (void __iomem *)adev->rmmio + pcie_data * 4; +	if (pcie_index_hi != 0) +		pcie_index_hi_offset = (void __iomem *)adev->rmmio + +				pcie_index_hi * 4; + +	writel(reg_addr, pcie_index_offset); +	readl(pcie_index_offset); +	if (pcie_index_hi != 0) { +		writel((reg_addr >> 32) & 0xff, pcie_index_hi_offset); +		readl(pcie_index_hi_offset); +	} +	r = readl(pcie_data_offset); + +	/* clear the high bits */ +	if (pcie_index_hi != 0) { +		writel(0, pcie_index_hi_offset); +		readl(pcie_index_hi_offset); +	} + +	spin_unlock_irqrestore(&adev->pcie_idx_lock, flags); + +	return r; +} +  /**   * amdgpu_device_indirect_rreg64 - read a 64bits indirect register   * @@ -747,8 +642,6 @@ u64 amdgpu_device_indirect_rreg64(struct amdgpu_device *adev,   * amdgpu_device_indirect_wreg - write an indirect register address   *   * @adev: amdgpu_device pointer - * @pcie_index: mmio register offset - * @pcie_data: mmio register offset   * @reg_addr: indirect register offset   * @reg_data: indirect register data   * @@ -774,12 +667,50 @@ void amdgpu_device_indirect_wreg(struct amdgpu_device *adev,  	spin_unlock_irqrestore(&adev->pcie_idx_lock, flags);  } +void amdgpu_device_indirect_wreg_ext(struct amdgpu_device *adev, +				     u64 reg_addr, u32 reg_data) +{ +	unsigned long flags, pcie_index, pcie_index_hi, pcie_data; +	void __iomem *pcie_index_offset; +	void __iomem *pcie_index_hi_offset; +	void __iomem *pcie_data_offset; + +	pcie_index = adev->nbio.funcs->get_pcie_index_offset(adev); +	pcie_data = adev->nbio.funcs->get_pcie_data_offset(adev); +	if (adev->nbio.funcs->get_pcie_index_hi_offset) +		pcie_index_hi = adev->nbio.funcs->get_pcie_index_hi_offset(adev); +	else +		pcie_index_hi = 0; + +	spin_lock_irqsave(&adev->pcie_idx_lock, flags); +	pcie_index_offset = (void __iomem *)adev->rmmio + pcie_index * 4; +	pcie_data_offset = (void __iomem *)adev->rmmio + pcie_data * 4; +	if (pcie_index_hi != 0) +		pcie_index_hi_offset = (void __iomem *)adev->rmmio + +				pcie_index_hi * 4; + +	writel(reg_addr, pcie_index_offset); +	readl(pcie_index_offset); +	if (pcie_index_hi != 0) { +		writel((reg_addr >> 32) & 0xff, pcie_index_hi_offset); +		readl(pcie_index_hi_offset); +	} +	writel(reg_data, pcie_data_offset); +	readl(pcie_data_offset); + +	/* clear the high bits */ +	if (pcie_index_hi != 0) { +		writel(0, pcie_index_hi_offset); +		readl(pcie_index_hi_offset); +	} + +	spin_unlock_irqrestore(&adev->pcie_idx_lock, flags); +} +  /**   * amdgpu_device_indirect_wreg64 - write a 64bits indirect register address   *   * @adev: amdgpu_device pointer - * @pcie_index: mmio register offset - * @pcie_data: mmio register offset   * @reg_addr: indirect register offset   * @reg_data: indirect register data   * @@ -840,6 +771,13 @@ static uint32_t amdgpu_invalid_rreg(struct amdgpu_device *adev, uint32_t reg)  	return 0;  } +static uint32_t amdgpu_invalid_rreg_ext(struct amdgpu_device *adev, uint64_t reg) +{ +	DRM_ERROR("Invalid callback to read register 0x%llX\n", reg); +	BUG(); +	return 0; +} +  /**   * amdgpu_invalid_wreg - dummy reg write function   * @@ -857,6 +795,13 @@ static void amdgpu_invalid_wreg(struct amdgpu_device *adev, uint32_t reg, uint32  	BUG();  } +static void amdgpu_invalid_wreg_ext(struct amdgpu_device *adev, uint64_t reg, uint32_t v) +{ +	DRM_ERROR("Invalid callback to write register 0x%llX with 0x%08X\n", +		  reg, v); +	BUG(); +} +  /**   * amdgpu_invalid_rreg64 - dummy 64 bit reg read function   * @@ -940,12 +885,20 @@ static void amdgpu_block_invalid_wreg(struct amdgpu_device *adev,   */  static int amdgpu_device_asic_init(struct amdgpu_device *adev)  { +	int ret; +  	amdgpu_asic_pre_asic_init(adev); -	if (adev->ip_versions[GC_HWIP][0] >= IP_VERSION(11, 0, 0)) -		return amdgpu_atomfirmware_asic_init(adev, true); -	else +	if (adev->ip_versions[GC_HWIP][0] == IP_VERSION(9, 4, 3) || +	    adev->ip_versions[GC_HWIP][0] >= IP_VERSION(11, 0, 0)) { +		amdgpu_psp_wait_for_bootloader(adev); +		ret = amdgpu_atomfirmware_asic_init(adev, true); +		return ret; +	} else {  		return amdgpu_atom_asic_init(adev->mode_info.atom_context); +	} + +	return 0;  }  /** @@ -985,7 +938,7 @@ static void amdgpu_device_mem_scratch_fini(struct amdgpu_device *adev)   * @registers: pointer to the register array   * @array_size: size of the register array   * - * Programs an array or registers with and and or masks. + * Programs an array or registers with and or masks.   * This is a helper for setting golden registers.   */  void amdgpu_device_program_register_sequence(struct amdgpu_device *adev, @@ -998,7 +951,7 @@ void amdgpu_device_program_register_sequence(struct amdgpu_device *adev,  	if (array_size % 3)  		return; -	for (i = 0; i < array_size; i +=3) { +	for (i = 0; i < array_size; i += 3) {  		reg = registers[i + 0];  		and_mask = registers[i + 1];  		or_mask = registers[i + 2]; @@ -1043,82 +996,6 @@ int amdgpu_device_pci_reset(struct amdgpu_device *adev)  }  /* - * GPU doorbell aperture helpers function. - */ -/** - * amdgpu_device_doorbell_init - Init doorbell driver information. - * - * @adev: amdgpu_device pointer - * - * Init doorbell driver information (CIK) - * Returns 0 on success, error on failure. - */ -static int amdgpu_device_doorbell_init(struct amdgpu_device *adev) -{ - -	/* No doorbell on SI hardware generation */ -	if (adev->asic_type < CHIP_BONAIRE) { -		adev->doorbell.base = 0; -		adev->doorbell.size = 0; -		adev->doorbell.num_kernel_doorbells = 0; -		adev->doorbell.ptr = NULL; -		return 0; -	} - -	if (pci_resource_flags(adev->pdev, 2) & IORESOURCE_UNSET) -		return -EINVAL; - -	amdgpu_asic_init_doorbell_index(adev); - -	/* doorbell bar mapping */ -	adev->doorbell.base = pci_resource_start(adev->pdev, 2); -	adev->doorbell.size = pci_resource_len(adev->pdev, 2); - -	if (adev->enable_mes) { -		adev->doorbell.num_kernel_doorbells = -			adev->doorbell.size / sizeof(u32); -	} else { -		adev->doorbell.num_kernel_doorbells = -			min_t(u32, adev->doorbell.size / sizeof(u32), -			      adev->doorbell_index.max_assignment+1); -		if (adev->doorbell.num_kernel_doorbells == 0) -			return -EINVAL; - -		/* For Vega, reserve and map two pages on doorbell BAR since SDMA -		 * paging queue doorbell use the second page. The -		 * AMDGPU_DOORBELL64_MAX_ASSIGNMENT definition assumes all the -		 * doorbells are in the first page. So with paging queue enabled, -		 * the max num_kernel_doorbells should + 1 page (0x400 in dword) -		 */ -		if (adev->asic_type >= CHIP_VEGA10) -			adev->doorbell.num_kernel_doorbells += 0x400; -	} - -	adev->doorbell.ptr = ioremap(adev->doorbell.base, -				     adev->doorbell.num_kernel_doorbells * -				     sizeof(u32)); -	if (adev->doorbell.ptr == NULL) -		return -ENOMEM; - -	return 0; -} - -/** - * amdgpu_device_doorbell_fini - Tear down doorbell driver information. - * - * @adev: amdgpu_device pointer - * - * Tear down doorbell driver information (CIK) - */ -static void amdgpu_device_doorbell_fini(struct amdgpu_device *adev) -{ -	iounmap(adev->doorbell.ptr); -	adev->doorbell.ptr = NULL; -} - - - -/*   * amdgpu_device_wb_*()   * Writeback is the method by which the GPU updates special pages in memory   * with the status of certain GPU events (fences, ring pointers,etc.). @@ -1227,10 +1104,13 @@ int amdgpu_device_resize_fb_bar(struct amdgpu_device *adev)  	int rbar_size = pci_rebar_bytes_to_size(adev->gmc.real_vram_size);  	struct pci_bus *root;  	struct resource *res; -	unsigned i; +	unsigned int i;  	u16 cmd;  	int r; +	if (!IS_ENABLED(CONFIG_PHYS_ADDR_T_64BIT)) +		return 0; +  	/* Bypass for VF */  	if (amdgpu_sriov_vf(adev))  		return 0; @@ -1265,7 +1145,7 @@ int amdgpu_device_resize_fb_bar(struct amdgpu_device *adev)  			      cmd & ~PCI_COMMAND_MEMORY);  	/* Free the VRAM and doorbell BAR, we most likely need to move both. */ -	amdgpu_device_doorbell_fini(adev); +	amdgpu_doorbell_fini(adev);  	if (adev->asic_type >= CHIP_BONAIRE)  		pci_release_resource(adev->pdev, 2); @@ -1282,7 +1162,7 @@ int amdgpu_device_resize_fb_bar(struct amdgpu_device *adev)  	/* When the doorbell or fb BAR isn't available we have no chance of  	 * using the device.  	 */ -	r = amdgpu_device_doorbell_init(adev); +	r = amdgpu_doorbell_init(adev);  	if (r || (pci_resource_flags(adev->pdev, 0) & IORESOURCE_UNSET))  		return -ENODEV; @@ -1291,6 +1171,14 @@ int amdgpu_device_resize_fb_bar(struct amdgpu_device *adev)  	return 0;  } +static bool amdgpu_device_read_bios(struct amdgpu_device *adev) +{ +	if (hweight32(adev->aid_mask) && (adev->flags & AMD_IS_APU)) +		return false; + +	return true; +} +  /*   * GPU helpers function.   */ @@ -1310,6 +1198,9 @@ bool amdgpu_device_need_post(struct amdgpu_device *adev)  	if (amdgpu_sriov_vf(adev))  		return false; +	if (!amdgpu_device_read_bios(adev)) +		return false; +  	if (amdgpu_passthrough(adev)) {  		/* for FIJI: In whole GPU pass-through virtualization case, after VM reboot  		 * some old smc fw still need driver do vPost otherwise gpu hang, while @@ -1319,6 +1210,7 @@ bool amdgpu_device_need_post(struct amdgpu_device *adev)  		if (adev->asic_type == CHIP_FIJI) {  			int err;  			uint32_t fw_ver; +  			err = request_firmware(&adev->pm.fw, "amdgpu/fiji_smc.bin", adev->dev);  			/* force vPost if error occured */  			if (err) @@ -1352,6 +1244,51 @@ bool amdgpu_device_need_post(struct amdgpu_device *adev)  	return true;  } +/* + * On APUs with >= 64GB white flickering has been observed w/ SG enabled. + * Disable S/G on such systems until we have a proper fix. + * https://gitlab.freedesktop.org/drm/amd/-/issues/2354 + * https://gitlab.freedesktop.org/drm/amd/-/issues/2735 + */ +bool amdgpu_sg_display_supported(struct amdgpu_device *adev) +{ +	switch (amdgpu_sg_display) { +	case -1: +		break; +	case 0: +		return false; +	case 1: +		return true; +	default: +		return false; +	} +	if ((totalram_pages() << (PAGE_SHIFT - 10)) + +	    (adev->gmc.real_vram_size / 1024) >= 64000000) { +		DRM_WARN("Disabling S/G due to >=64GB RAM\n"); +		return false; +	} +	return true; +} + +/* + * Intel hosts such as Raptor Lake and Sapphire Rapids don't support dynamic + * speed switching. Until we have confirmation from Intel that a specific host + * supports it, it's safer that we keep it disabled for all. + * + * https://edc.intel.com/content/www/us/en/design/products/platforms/details/raptor-lake-s/13th-generation-core-processors-datasheet-volume-1-of-2/005/pci-express-support/ + * https://gitlab.freedesktop.org/drm/amd/-/issues/2663 + */ +bool amdgpu_device_pcie_dynamic_switching_supported(void) +{ +#if IS_ENABLED(CONFIG_X86) +	struct cpuinfo_x86 *c = &cpu_data(0); + +	if (c->x86_vendor == X86_VENDOR_INTEL) +		return false; +#endif +	return true; +} +  /**   * amdgpu_device_should_use_aspm - check if the device should program ASPM   * @@ -1402,6 +1339,7 @@ static unsigned int amdgpu_device_vga_set_decode(struct pci_dev *pdev,  		bool state)  {  	struct amdgpu_device *adev = drm_to_adev(pci_get_drvdata(pdev)); +  	amdgpu_asic_set_vga_state(adev, state);  	if (state)  		return VGA_RSRC_LEGACY_IO | VGA_RSRC_LEGACY_MEM | @@ -1424,7 +1362,8 @@ static void amdgpu_device_check_block_size(struct amdgpu_device *adev)  {  	/* defines number of bits in page table versus page directory,  	 * a page is 4KB so we have 12 bits offset, minimum 9 bits in the -	 * page table and the remaining bits are in the page directory */ +	 * page table and the remaining bits are in the page directory +	 */  	if (amdgpu_vm_block_size == -1)  		return; @@ -1547,7 +1486,7 @@ static int amdgpu_device_check_arguments(struct amdgpu_device *adev)  		dev_warn(adev->dev, "sched jobs (%d) must be at least 4\n",  			 amdgpu_sched_jobs);  		amdgpu_sched_jobs = 4; -	} else if (!is_power_of_2(amdgpu_sched_jobs)){ +	} else if (!is_power_of_2(amdgpu_sched_jobs)) {  		dev_warn(adev->dev, "sched jobs (%d) must be a power of 2\n",  			 amdgpu_sched_jobs);  		amdgpu_sched_jobs = roundup_pow_of_two(amdgpu_sched_jobs); @@ -1656,7 +1595,7 @@ static bool amdgpu_switcheroo_can_switch(struct pci_dev *pdev)  {  	struct drm_device *dev = pci_get_drvdata(pdev); -	/* +       /*  	* FIXME: open_count is protected by drm_global_mutex but that would lead to  	* locking inversion with the driver load path. And the access here is  	* completely racy anyway. So don't bother with locking for now. @@ -2194,7 +2133,7 @@ static int amdgpu_device_ip_early_init(struct amdgpu_device *adev)  	total = true;  	for (i = 0; i < adev->num_ip_blocks; i++) {  		if ((amdgpu_ip_block_mask & (1 << i)) == 0) { -			DRM_ERROR("disabled ip block: %d <%s>\n", +			DRM_WARN("disabled ip block: %d <%s>\n",  				  i, adev->ip_blocks[i].version->funcs->name);  			adev->ip_blocks[i].status.valid = false;  		} else { @@ -2220,14 +2159,16 @@ static int amdgpu_device_ip_early_init(struct amdgpu_device *adev)  				return r;  			/* Read BIOS */ -			if (!amdgpu_get_bios(adev)) -				return -EINVAL; +			if (amdgpu_device_read_bios(adev)) { +				if (!amdgpu_get_bios(adev)) +					return -EINVAL; -			r = amdgpu_atombios_init(adev); -			if (r) { -				dev_err(adev->dev, "amdgpu_atombios_init failed\n"); -				amdgpu_vf_error_put(adev, AMDGIM_ERROR_VF_ATOMBIOS_INIT_FAIL, 0, 0); -				return r; +				r = amdgpu_atombios_init(adev); +				if (r) { +					dev_err(adev->dev, "amdgpu_atombios_init failed\n"); +					amdgpu_vf_error_put(adev, AMDGIM_ERROR_VF_ATOMBIOS_INIT_FAIL, 0, 0); +					return r; +				}  			}  			/*get pf2vf msg info at it's earliest time*/ @@ -2376,6 +2317,8 @@ static int amdgpu_device_init_schedulers(struct amdgpu_device *adev)  		}  	} +	amdgpu_xcp_update_partition_sched_list(adev); +  	return 0;  } @@ -2442,7 +2385,7 @@ static int amdgpu_device_ip_init(struct amdgpu_device *adev)  			adev->ip_blocks[i].status.hw = true;  			/* right after GMC hw init, we create CSA */ -			if (amdgpu_mcbp) { +			if (adev->gfx.mcbp) {  				r = amdgpu_allocate_static_csa(adev, &adev->virt.csa_obj,  							       AMDGPU_GEM_DOMAIN_VRAM |  							       AMDGPU_GEM_DOMAIN_GTT, @@ -2533,8 +2476,10 @@ static int amdgpu_device_ip_init(struct amdgpu_device *adev)  		goto init_failed;  	/* Don't init kfd if whole hive need to be reset during init */ -	if (!adev->gmc.xgmi.pending_reset) +	if (!adev->gmc.xgmi.pending_reset) { +		kgd2kfd_init_zone_device(adev);  		amdgpu_amdkfd_device_init(adev); +	}  	amdgpu_fru_get_product_info(adev); @@ -2759,8 +2704,9 @@ static int amdgpu_device_ip_late_init(struct amdgpu_device *adev)  		DRM_ERROR("enable mgpu fan boost failed (%d).\n", r);  	/* For passthrough configuration on arcturus and aldebaran, enable special handling SBR */ -	if (amdgpu_passthrough(adev) && ((adev->asic_type == CHIP_ARCTURUS && adev->gmc.xgmi.num_physical_nodes > 1)|| -			       adev->asic_type == CHIP_ALDEBARAN )) +	if (amdgpu_passthrough(adev) && +	    ((adev->asic_type == CHIP_ARCTURUS && adev->gmc.xgmi.num_physical_nodes > 1) || +	     adev->asic_type == CHIP_ALDEBARAN))  		amdgpu_dpm_handle_passthrough_sbr(adev, true);  	if (adev->gmc.xgmi.num_physical_nodes > 1) { @@ -3089,7 +3035,7 @@ static int amdgpu_device_ip_suspend_phase2(struct amdgpu_device *adev)  		}  		adev->ip_blocks[i].status.hw = false;  		/* handle putting the SMC in the appropriate state */ -		if(!amdgpu_sriov_vf(adev)){ +		if (!amdgpu_sriov_vf(adev)) {  			if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_SMC) {  				r = amdgpu_dpm_set_mp1_state(adev, adev->mp1_state);  				if (r) { @@ -3294,7 +3240,7 @@ static int amdgpu_device_ip_resume_phase2(struct amdgpu_device *adev)   *   * Main resume function for hardware IPs.  The hardware IPs   * are split into two resume functions because they are - * are also used in in recovering from a GPU reset and some additional + * also used in recovering from a GPU reset and some additional   * steps need to be take between them.  In this case (S3/S4) they are   * run sequentially.   * Returns 0 on success, negative error code on failure. @@ -3303,12 +3249,6 @@ static int amdgpu_device_ip_resume(struct amdgpu_device *adev)  {  	int r; -	if (!adev->in_s0ix) { -		r = amdgpu_amdkfd_resume_iommu(adev); -		if (r) -			return r; -	} -  	r = amdgpu_device_ip_resume_phase1(adev);  	if (r)  		return r; @@ -3396,8 +3336,7 @@ bool amdgpu_device_asic_has_dc_support(enum amd_asic_type asic_type)  #else  	default:  		if (amdgpu_dc > 0) -			DRM_INFO_ONCE("Display Core has been requested via kernel parameter " -					 "but isn't supported by ASIC, ignoring\n"); +			DRM_INFO_ONCE("Display Core has been requested via kernel parameter but isn't supported by ASIC, ignoring\n");  		return false;  #endif  	} @@ -3553,13 +3492,28 @@ static void amdgpu_device_check_iommu_direct_map(struct amdgpu_device *adev)  }  static const struct attribute *amdgpu_dev_attributes[] = { -	&dev_attr_product_name.attr, -	&dev_attr_product_number.attr, -	&dev_attr_serial_number.attr,  	&dev_attr_pcie_replay_count.attr,  	NULL  }; +static void amdgpu_device_set_mcbp(struct amdgpu_device *adev) +{ +	if (amdgpu_mcbp == 1) +		adev->gfx.mcbp = true; +	else if (amdgpu_mcbp == 0) +		adev->gfx.mcbp = false; +	else if ((adev->ip_versions[GC_HWIP][0] >= IP_VERSION(9, 0, 0)) && +		 (adev->ip_versions[GC_HWIP][0] < IP_VERSION(10, 0, 0)) && +		 adev->gfx.num_gfx_rings) +		adev->gfx.mcbp = true; + +	if (amdgpu_sriov_vf(adev)) +		adev->gfx.mcbp = true; + +	if (adev->gfx.mcbp) +		DRM_INFO("MCBP is enabled\n"); +} +  /**   * amdgpu_device_init - initialize the driver   * @@ -3608,6 +3562,8 @@ int amdgpu_device_init(struct amdgpu_device *adev,  	adev->smc_wreg = &amdgpu_invalid_wreg;  	adev->pcie_rreg = &amdgpu_invalid_rreg;  	adev->pcie_wreg = &amdgpu_invalid_wreg; +	adev->pcie_rreg_ext = &amdgpu_invalid_rreg_ext; +	adev->pcie_wreg_ext = &amdgpu_invalid_wreg_ext;  	adev->pciep_rreg = &amdgpu_invalid_rreg;  	adev->pciep_wreg = &amdgpu_invalid_wreg;  	adev->pcie_rreg64 = &amdgpu_invalid_rreg64; @@ -3626,13 +3582,15 @@ int amdgpu_device_init(struct amdgpu_device *adev,  		 pdev->subsystem_vendor, pdev->subsystem_device, pdev->revision);  	/* mutex initialization are all done here so we -	 * can recall function without having locking issues */ +	 * can recall function without having locking issues +	 */  	mutex_init(&adev->firmware.mutex);  	mutex_init(&adev->pm.mutex);  	mutex_init(&adev->gfx.gpu_clock_mutex);  	mutex_init(&adev->srbm_mutex);  	mutex_init(&adev->gfx.pipe_reserve_mutex);  	mutex_init(&adev->gfx.gfx_off_mutex); +	mutex_init(&adev->gfx.partition_mutex);  	mutex_init(&adev->grbm_idx_mutex);  	mutex_init(&adev->mn_lock);  	mutex_init(&adev->virt.vf_errors.lock); @@ -3702,16 +3660,11 @@ int amdgpu_device_init(struct amdgpu_device *adev,  		atomic_set(&adev->pm.pwr_state[i], POWER_STATE_UNKNOWN);  	adev->rmmio = ioremap(adev->rmmio_base, adev->rmmio_size); -	if (adev->rmmio == NULL) { +	if (!adev->rmmio)  		return -ENOMEM; -	} -	DRM_INFO("register mmio base: 0x%08X\n", (uint32_t)adev->rmmio_base); -	DRM_INFO("register mmio size: %u\n", (unsigned)adev->rmmio_size); - -	amdgpu_device_get_pcie_info(adev); -	if (amdgpu_mcbp) -		DRM_INFO("MCBP is enabled\n"); +	DRM_INFO("register mmio base: 0x%08X\n", (uint32_t)adev->rmmio_base); +	DRM_INFO("register mmio size: %u\n", (unsigned int)adev->rmmio_size);  	/*  	 * Reset domain needs to be present early, before XGMI hive discovered @@ -3725,6 +3678,8 @@ int amdgpu_device_init(struct amdgpu_device *adev,  	/* detect hw virtualization here */  	amdgpu_detect_virtualization(adev); +	amdgpu_device_get_pcie_info(adev); +  	r = amdgpu_device_get_job_timeout_settings(adev);  	if (r) {  		dev_err(adev->dev, "invalid lockup_timeout parameter syntax\n"); @@ -3736,6 +3691,8 @@ int amdgpu_device_init(struct amdgpu_device *adev,  	if (r)  		return r; +	amdgpu_device_set_mcbp(adev); +  	/* Get rid of things like offb */  	r = drm_aperture_remove_conflicting_pci_framebuffers(adev->pdev, &amdgpu_kms_driver);  	if (r) @@ -3753,20 +3710,29 @@ int amdgpu_device_init(struct amdgpu_device *adev,  	}  	/* enable PCIE atomic ops */ -	if (amdgpu_sriov_vf(adev)) -		adev->have_atomics_support = ((struct amd_sriov_msg_pf2vf_info *) -			adev->virt.fw_reserve.p_pf2vf)->pcie_atomic_ops_support_flags == -			(PCI_EXP_DEVCAP2_ATOMIC_COMP32 | PCI_EXP_DEVCAP2_ATOMIC_COMP64); -	else +	if (amdgpu_sriov_vf(adev)) { +		if (adev->virt.fw_reserve.p_pf2vf) +			adev->have_atomics_support = ((struct amd_sriov_msg_pf2vf_info *) +						      adev->virt.fw_reserve.p_pf2vf)->pcie_atomic_ops_support_flags == +				(PCI_EXP_DEVCAP2_ATOMIC_COMP32 | PCI_EXP_DEVCAP2_ATOMIC_COMP64); +	/* APUs w/ gfx9 onwards doesn't reply on PCIe atomics, rather it is a +	 * internal path natively support atomics, set have_atomics_support to true. +	 */ +	} else if ((adev->flags & AMD_IS_APU) && +		   (adev->ip_versions[GC_HWIP][0] > IP_VERSION(9, 0, 0))) { +		adev->have_atomics_support = true; +	} else {  		adev->have_atomics_support =  			!pci_enable_atomic_ops_to_root(adev->pdev,  					  PCI_EXP_DEVCAP2_ATOMIC_COMP32 |  					  PCI_EXP_DEVCAP2_ATOMIC_COMP64); +	} +  	if (!adev->have_atomics_support)  		dev_info(adev->dev, "PCIE atomic ops is not supported\n");  	/* doorbell bar mapping and doorbell index init*/ -	amdgpu_device_doorbell_init(adev); +	amdgpu_doorbell_init(adev);  	if (amdgpu_emu_mode == 1) {  		/* post the asic on emulation mode */ @@ -3777,7 +3743,8 @@ int amdgpu_device_init(struct amdgpu_device *adev,  	amdgpu_reset_init(adev);  	/* detect if we are with an SRIOV vbios */ -	amdgpu_device_detect_sriov_bios(adev); +	if (adev->bios) +		amdgpu_device_detect_sriov_bios(adev);  	/* check if we need to reset the asic  	 *  E.g., driver was not cleanly unloaded previously, etc. @@ -3829,25 +3796,27 @@ int amdgpu_device_init(struct amdgpu_device *adev,  		}  	} -	if (adev->is_atom_fw) { -		/* Initialize clocks */ -		r = amdgpu_atomfirmware_get_clock_info(adev); -		if (r) { -			dev_err(adev->dev, "amdgpu_atomfirmware_get_clock_info failed\n"); -			amdgpu_vf_error_put(adev, AMDGIM_ERROR_VF_ATOMBIOS_GET_CLOCK_FAIL, 0, 0); -			goto failed; -		} -	} else { -		/* Initialize clocks */ -		r = amdgpu_atombios_get_clock_info(adev); -		if (r) { -			dev_err(adev->dev, "amdgpu_atombios_get_clock_info failed\n"); -			amdgpu_vf_error_put(adev, AMDGIM_ERROR_VF_ATOMBIOS_GET_CLOCK_FAIL, 0, 0); -			goto failed; +	if (adev->bios) { +		if (adev->is_atom_fw) { +			/* Initialize clocks */ +			r = amdgpu_atomfirmware_get_clock_info(adev); +			if (r) { +				dev_err(adev->dev, "amdgpu_atomfirmware_get_clock_info failed\n"); +				amdgpu_vf_error_put(adev, AMDGIM_ERROR_VF_ATOMBIOS_GET_CLOCK_FAIL, 0, 0); +				goto failed; +			} +		} else { +			/* Initialize clocks */ +			r = amdgpu_atombios_get_clock_info(adev); +			if (r) { +				dev_err(adev->dev, "amdgpu_atombios_get_clock_info failed\n"); +				amdgpu_vf_error_put(adev, AMDGIM_ERROR_VF_ATOMBIOS_GET_CLOCK_FAIL, 0, 0); +				goto failed; +			} +			/* init i2c buses */ +			if (!amdgpu_device_has_dc_support(adev)) +				amdgpu_atombios_i2c_init(adev);  		} -		/* init i2c buses */ -		if (!amdgpu_device_has_dc_support(adev)) -			amdgpu_atombios_i2c_init(adev);  	}  fence_driver_init: @@ -3890,6 +3859,11 @@ fence_driver_init:  	/* Get a log2 for easy divisions. */  	adev->mm_stats.log2_max_MBps = ilog2(max(1u, max_MBps)); +	r = amdgpu_atombios_sysfs_init(adev); +	if (r) +		drm_err(&adev->ddev, +			"registering atombios sysfs failed (%d).\n", r); +  	r = amdgpu_pm_sysfs_init(adev);  	if (r)  		DRM_ERROR("registering pm sysfs failed (%d).\n", r); @@ -3901,14 +3875,6 @@ fence_driver_init:  	} else  		adev->ucode_sysfs_en = true; -	r = amdgpu_psp_sysfs_init(adev); -	if (r) { -		adev->psp_sysfs_en = false; -		if (!amdgpu_sriov_vf(adev)) -			DRM_ERROR("Creating psp sysfs failed\n"); -	} else -		adev->psp_sysfs_en = true; -  	/*  	 * Register gpu instance before amdgpu_device_enable_mgpu_fan_boost.  	 * Otherwise the mgpu fan boost feature will be skipped due to the @@ -3941,6 +3907,8 @@ fence_driver_init:  	if (r)  		dev_err(adev->dev, "Could not create amdgpu device attr\n"); +	amdgpu_fru_sysfs_init(adev); +  	if (IS_ENABLED(CONFIG_PERF_EVENTS))  		r = amdgpu_pmu_init(adev);  	if (r) @@ -3952,7 +3920,8 @@ fence_driver_init:  	/* if we have > 1 VGA cards, then disable the amdgpu VGA resources */  	/* this will fail for cards that aren't VGA class devices, just -	 * ignore it */ +	 * ignore it +	 */  	if ((adev->pdev->class >> 8) == PCI_CLASS_DISPLAY_VGA)  		vga_client_register(adev->pdev, amdgpu_device_vga_set_decode); @@ -4004,7 +3973,7 @@ static void amdgpu_device_unmap_mmio(struct amdgpu_device *adev)  	unmap_mapping_range(adev->ddev.anon_inode->i_mapping, 0, 0, 1);  	/* Unmap all mapped bars - Doorbell, registers and VRAM */ -	amdgpu_device_doorbell_fini(adev); +	amdgpu_doorbell_fini(adev);  	iounmap(adev->rmmio);  	adev->rmmio = NULL; @@ -4013,7 +3982,7 @@ static void amdgpu_device_unmap_mmio(struct amdgpu_device *adev)  	adev->mman.aper_base_kaddr = NULL;  	/* Memory manager related */ -	if (!adev->gmc.xgmi.connected_to_cpu) { +	if (!adev->gmc.xgmi.connected_to_cpu && !adev->gmc.is_app_apu) {  		arch_phys_wc_del(adev->gmc.vram_mtrr);  		arch_io_free_memtype_wc(adev->gmc.aper_base, adev->gmc.aper_size);  	} @@ -4035,7 +4004,7 @@ void amdgpu_device_fini_hw(struct amdgpu_device *adev)  	/* make sure IB test finished before entering exclusive mode  	 * to avoid preemption on IB test -	 * */ +	 */  	if (amdgpu_sriov_vf(adev)) {  		amdgpu_virt_request_full_gpu(adev, false);  		amdgpu_virt_fini_data_exchange(adev); @@ -4043,7 +4012,7 @@ void amdgpu_device_fini_hw(struct amdgpu_device *adev)  	/* disable all interrupts */  	amdgpu_irq_disable_all(adev); -	if (adev->mode_info.mode_config_initialized){ +	if (adev->mode_info.mode_config_initialized) {  		if (!drm_drv_uses_atomic_modeset(adev_to_drm(adev)))  			drm_helper_force_disable_all(adev_to_drm(adev));  		else @@ -4058,9 +4027,8 @@ void amdgpu_device_fini_hw(struct amdgpu_device *adev)  		amdgpu_pm_sysfs_fini(adev);  	if (adev->ucode_sysfs_en)  		amdgpu_ucode_sysfs_fini(adev); -	if (adev->psp_sysfs_en) -		amdgpu_psp_sysfs_fini(adev);  	sysfs_remove_files(&adev->dev->kobj, amdgpu_dev_attributes); +	amdgpu_fru_sysfs_fini(adev);  	/* disable ras feature must before hw fini */  	amdgpu_ras_pre_fini(adev); @@ -4118,7 +4086,7 @@ void amdgpu_device_fini_sw(struct amdgpu_device *adev)  		iounmap(adev->rmmio);  		adev->rmmio = NULL; -		amdgpu_device_doorbell_fini(adev); +		amdgpu_doorbell_fini(adev);  		drm_dev_exit(idx);  	} @@ -4199,6 +4167,7 @@ int amdgpu_device_suspend(struct drm_device *dev, bool fbcon)  		drm_fb_helper_set_suspend_unlocked(adev_to_drm(adev)->fb_helper, true);  	cancel_delayed_work_sync(&adev->delayed_init_work); +	flush_delayed_work(&adev->gfx.gfx_off_delay_work);  	amdgpu_ras_suspend(adev); @@ -4506,7 +4475,11 @@ static int amdgpu_device_recover_vram(struct amdgpu_device *adev)  	dev_info(adev->dev, "recover vram bo from shadow start\n");  	mutex_lock(&adev->shadow_list_lock);  	list_for_each_entry(vmbo, &adev->shadow_list, shadow_list) { -		shadow = &vmbo->bo; +		/* If vm is compute context or adev is APU, shadow will be NULL */ +		if (!vmbo->shadow) +			continue; +		shadow = vmbo->shadow; +  		/* No need to recover an evicted BO */  		if (shadow->tbo.resource->mem_type != TTM_PL_TT ||  		    shadow->tbo.resource->start == AMDGPU_BO_INVALID_OFFSET || @@ -4573,6 +4546,10 @@ retry:  		r = amdgpu_virt_reset_gpu(adev);  	if (r)  		return r; +	amdgpu_irq_gpu_reset_resume_helper(adev); + +	/* some sw clean up VF needs to do before recover */ +	amdgpu_virt_post_reset(adev);  	/* Resume IP prior to SMC */  	r = amdgpu_device_ip_reinit_early_sriov(adev); @@ -4599,7 +4576,6 @@ retry:  		amdgpu_put_xgmi_hive(hive);  	if (!r) { -		amdgpu_irq_gpu_reset_resume_helper(adev);  		r = amdgpu_ib_ring_tests(adev);  		amdgpu_amdkfd_post_reset(adev); @@ -4704,42 +4680,55 @@ disabled:  int amdgpu_device_mode1_reset(struct amdgpu_device *adev)  { -        u32 i; -        int ret = 0; +	u32 i; +	int ret = 0; -        amdgpu_atombios_scratch_regs_engine_hung(adev, true); +	amdgpu_atombios_scratch_regs_engine_hung(adev, true); -        dev_info(adev->dev, "GPU mode1 reset\n"); +	dev_info(adev->dev, "GPU mode1 reset\n"); -        /* disable BM */ -        pci_clear_master(adev->pdev); +	/* disable BM */ +	pci_clear_master(adev->pdev); -        amdgpu_device_cache_pci_state(adev->pdev); +	amdgpu_device_cache_pci_state(adev->pdev); -        if (amdgpu_dpm_is_mode1_reset_supported(adev)) { -                dev_info(adev->dev, "GPU smu mode1 reset\n"); -                ret = amdgpu_dpm_mode1_reset(adev); -        } else { -                dev_info(adev->dev, "GPU psp mode1 reset\n"); -                ret = psp_gpu_reset(adev); -        } +	if (amdgpu_dpm_is_mode1_reset_supported(adev)) { +		dev_info(adev->dev, "GPU smu mode1 reset\n"); +		ret = amdgpu_dpm_mode1_reset(adev); +	} else { +		dev_info(adev->dev, "GPU psp mode1 reset\n"); +		ret = psp_gpu_reset(adev); +	} -        if (ret) -                dev_err(adev->dev, "GPU mode1 reset failed\n"); +	if (ret) +		goto mode1_reset_failed; -        amdgpu_device_load_pci_state(adev->pdev); +	amdgpu_device_load_pci_state(adev->pdev); +	ret = amdgpu_psp_wait_for_bootloader(adev); +	if (ret) +		goto mode1_reset_failed; -        /* wait for asic to come out of reset */ -        for (i = 0; i < adev->usec_timeout; i++) { -                u32 memsize = adev->nbio.funcs->get_memsize(adev); +	/* wait for asic to come out of reset */ +	for (i = 0; i < adev->usec_timeout; i++) { +		u32 memsize = adev->nbio.funcs->get_memsize(adev); -                if (memsize != 0xffffffff) -                        break; -                udelay(1); -        } +		if (memsize != 0xffffffff) +			break; +		udelay(1); +	} + +	if (i >= adev->usec_timeout) { +		ret = -ETIMEDOUT; +		goto mode1_reset_failed; +	} + +	amdgpu_atombios_scratch_regs_engine_hung(adev, false); + +	return 0; -        amdgpu_atombios_scratch_regs_engine_hung(adev, false); -        return ret; +mode1_reset_failed: +	dev_err(adev->dev, "GPU mode1 reset failed\n"); +	return ret;  }  int amdgpu_device_pre_asic_reset(struct amdgpu_device *adev, @@ -4767,8 +4756,9 @@ int amdgpu_device_pre_asic_reset(struct amdgpu_device *adev,  		if (!ring || !ring->sched.thread)  			continue; -		/*clear job fence from fence drv to avoid force_completion -		 *leave NULL and vm flush fence in fence drv */ +		/* Clear job fence from fence drv to avoid force_completion +		 * leave NULL and vm flush fence in fence drv +		 */  		amdgpu_fence_driver_clear_job_fences(ring);  		/* after all hw jobs are reset, hw fence is meaningless, so force_completion */ @@ -4782,7 +4772,7 @@ int amdgpu_device_pre_asic_reset(struct amdgpu_device *adev,  	r = amdgpu_reset_prepare_hwcontext(adev, reset_context);  	/* If reset handler not implemented, continue; otherwise return */ -	if (r == -ENOSYS) +	if (r == -EOPNOTSUPP)  		r = 0;  	else  		return r; @@ -4879,7 +4869,7 @@ static void amdgpu_reset_capture_coredumpm(struct amdgpu_device *adev)  	struct drm_device *dev = adev_to_drm(adev);  	ktime_get_ts64(&adev->reset_time); -	dev_coredumpm(dev->dev, THIS_MODULE, adev, 0, GFP_KERNEL, +	dev_coredumpm(dev->dev, THIS_MODULE, adev, 0, GFP_NOWAIT,  		      amdgpu_devcoredump_read, amdgpu_devcoredump_free);  }  #endif @@ -4900,7 +4890,7 @@ int amdgpu_do_asic_reset(struct list_head *device_list_handle,  	reset_context->reset_device_list = device_list_handle;  	r = amdgpu_reset_perform_reset(tmp_adev, reset_context);  	/* If reset handler not implemented, continue; otherwise return */ -	if (r == -ENOSYS) +	if (r == -EOPNOTSUPP)  		r = 0;  	else  		return r; @@ -4978,9 +4968,6 @@ int amdgpu_do_asic_reset(struct list_head *device_list_handle,  				dev_warn(tmp_adev->dev, "asic atom init failed!");  			} else {  				dev_info(tmp_adev->dev, "GPU reset succeeded, trying to resume\n"); -				r = amdgpu_amdkfd_resume_iommu(tmp_adev); -				if (r) -					goto out;  				r = amdgpu_device_ip_resume_phase1(tmp_adev);  				if (r) @@ -5389,9 +5376,8 @@ skip_hw_reset:  		if (adev->enable_mes && adev->ip_versions[GC_HWIP][0] != IP_VERSION(11, 0, 3))  			amdgpu_mes_self_test(tmp_adev); -		if (!drm_drv_uses_atomic_modeset(adev_to_drm(tmp_adev)) && !job_signaled) { +		if (!drm_drv_uses_atomic_modeset(adev_to_drm(tmp_adev)) && !job_signaled)  			drm_helper_resume_force_mode(adev_to_drm(tmp_adev)); -		}  		if (tmp_adev->asic_reset_res)  			r = tmp_adev->asic_reset_res; @@ -5468,7 +5454,7 @@ static void amdgpu_device_get_pcie_info(struct amdgpu_device *adev)  		adev->pm.pcie_mlw_mask = amdgpu_pcie_lane_cap;  	/* covers APUs as well */ -	if (pci_is_root_bus(adev->pdev->bus)) { +	if (pci_is_root_bus(adev->pdev->bus) && !amdgpu_passthrough(adev)) {  		if (adev->pm.pcie_gen_mask == 0)  			adev->pm.pcie_gen_mask = AMDGPU_DEFAULT_PCIE_GEN_MASK;  		if (adev->pm.pcie_mlw_mask == 0) @@ -5949,6 +5935,7 @@ void amdgpu_device_halt(struct amdgpu_device *adev)  	struct pci_dev *pdev = adev->pdev;  	struct drm_device *ddev = adev_to_drm(adev); +	amdgpu_xcp_dev_unplug(adev);  	drm_dev_unplug(ddev);  	amdgpu_irq_disable_all(adev); @@ -6069,3 +6056,31 @@ bool amdgpu_device_has_display_hardware(struct amdgpu_device *adev)  		return true;  	}  } + +uint32_t amdgpu_device_wait_on_rreg(struct amdgpu_device *adev, +		uint32_t inst, uint32_t reg_addr, char reg_name[], +		uint32_t expected_value, uint32_t mask) +{ +	uint32_t ret = 0; +	uint32_t old_ = 0; +	uint32_t tmp_ = RREG32(reg_addr); +	uint32_t loop = adev->usec_timeout; + +	while ((tmp_ & (mask)) != (expected_value)) { +		if (old_ != tmp_) { +			loop = adev->usec_timeout; +			old_ = tmp_; +		} else +			udelay(1); +		tmp_ = RREG32(reg_addr); +		loop--; +		if (!loop) { +			DRM_WARN("Register(%d) [%s] failed to reach value 0x%08x != 0x%08xn", +				  inst, reg_name, (uint32_t)expected_value, +				  (uint32_t)(tmp_ & (mask))); +			ret = -ETIMEDOUT; +			break; +		} +	} +	return ret; +} |