diff options
Diffstat (limited to 'drivers/gpu/drm/amd/amdkfd')
31 files changed, 951 insertions, 1399 deletions
| diff --git a/drivers/gpu/drm/amd/amdkfd/cik_event_interrupt.c b/drivers/gpu/drm/amd/amdkfd/cik_event_interrupt.c index f6233019f042..d60576ce10cd 100644 --- a/drivers/gpu/drm/amd/amdkfd/cik_event_interrupt.c +++ b/drivers/gpu/drm/amd/amdkfd/cik_event_interrupt.c @@ -43,15 +43,15 @@ static bool cik_event_interrupt_isr(struct kfd_dev *dev,  	 */  	if ((ihre->source_id == CIK_INTSRC_GFX_PAGE_INV_FAULT ||  		ihre->source_id == CIK_INTSRC_GFX_MEM_PROT_FAULT) && -		dev->device_info->asic_family == CHIP_HAWAII) { +		dev->adev->asic_type == CHIP_HAWAII) {  		struct cik_ih_ring_entry *tmp_ihre =  			(struct cik_ih_ring_entry *)patched_ihre;  		*patched_flag = true;  		*tmp_ihre = *ihre; -		vmid = f2g->read_vmid_from_vmfault_reg(dev->kgd); -		ret = f2g->get_atc_vmid_pasid_mapping_info(dev->kgd, vmid, &pasid); +		vmid = f2g->read_vmid_from_vmfault_reg(dev->adev); +		ret = f2g->get_atc_vmid_pasid_mapping_info(dev->adev, vmid, &pasid);  		tmp_ihre->ring_id &= 0x000000ff;  		tmp_ihre->ring_id |= vmid << 8; @@ -113,7 +113,7 @@ static void cik_event_interrupt_wq(struct kfd_dev *dev,  		kfd_process_vm_fault(dev->dqm, pasid);  		memset(&info, 0, sizeof(info)); -		amdgpu_amdkfd_gpuvm_get_vm_fault_info(dev->kgd, &info); +		amdgpu_amdkfd_gpuvm_get_vm_fault_info(dev->adev, &info);  		if (!info.page_addr && !info.status)  			return; diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c index 24ebd61395d8..4bfc0c8ab764 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c @@ -321,7 +321,7 @@ static int kfd_ioctl_create_queue(struct file *filep, struct kfd_process *p,  	/* Return gpu_id as doorbell offset for mmap usage */  	args->doorbell_offset = KFD_MMAP_TYPE_DOORBELL;  	args->doorbell_offset |= KFD_MMAP_GPU_ID(args->gpu_id); -	if (KFD_IS_SOC15(dev->device_info->asic_family)) +	if (KFD_IS_SOC15(dev))  		/* On SOC15 ASICs, include the doorbell offset within the  		 * process doorbell frame, which is 2 pages.  		 */ @@ -580,7 +580,7 @@ static int kfd_ioctl_dbg_register(struct file *filep,  	if (!dev)  		return -EINVAL; -	if (dev->device_info->asic_family == CHIP_CARRIZO) { +	if (dev->adev->asic_type == CHIP_CARRIZO) {  		pr_debug("kfd_ioctl_dbg_register not supported on CZ\n");  		return -EINVAL;  	} @@ -631,7 +631,7 @@ static int kfd_ioctl_dbg_unregister(struct file *filep,  	if (!dev || !dev->dbgmgr)  		return -EINVAL; -	if (dev->device_info->asic_family == CHIP_CARRIZO) { +	if (dev->adev->asic_type == CHIP_CARRIZO) {  		pr_debug("kfd_ioctl_dbg_unregister not supported on CZ\n");  		return -EINVAL;  	} @@ -676,7 +676,7 @@ static int kfd_ioctl_dbg_address_watch(struct file *filep,  	if (!dev)  		return -EINVAL; -	if (dev->device_info->asic_family == CHIP_CARRIZO) { +	if (dev->adev->asic_type == CHIP_CARRIZO) {  		pr_debug("kfd_ioctl_dbg_wave_control not supported on CZ\n");  		return -EINVAL;  	} @@ -784,7 +784,7 @@ static int kfd_ioctl_dbg_wave_control(struct file *filep,  	if (!dev)  		return -EINVAL; -	if (dev->device_info->asic_family == CHIP_CARRIZO) { +	if (dev->adev->asic_type == CHIP_CARRIZO) {  		pr_debug("kfd_ioctl_dbg_wave_control not supported on CZ\n");  		return -EINVAL;  	} @@ -851,7 +851,7 @@ static int kfd_ioctl_get_clock_counters(struct file *filep,  	dev = kfd_device_by_id(args->gpu_id);  	if (dev)  		/* Reading GPU clock counter from KGD */ -		args->gpu_clock_counter = amdgpu_amdkfd_get_gpu_clock_counter(dev->kgd); +		args->gpu_clock_counter = amdgpu_amdkfd_get_gpu_clock_counter(dev->adev);  	else  		/* Node without GPU resource */  		args->gpu_clock_counter = 0; @@ -1041,7 +1041,7 @@ static int kfd_ioctl_create_event(struct file *filp, struct kfd_process *p,  			goto out_unlock;  		} -		err = amdgpu_amdkfd_gpuvm_map_gtt_bo_to_kernel(kfd->kgd, +		err = amdgpu_amdkfd_gpuvm_map_gtt_bo_to_kernel(kfd->adev,  						mem, &kern_addr, &size);  		if (err) {  			pr_err("Failed to map event page to kernel\n"); @@ -1051,7 +1051,7 @@ static int kfd_ioctl_create_event(struct file *filp, struct kfd_process *p,  		err = kfd_event_page_set(p, kern_addr, size);  		if (err) {  			pr_err("Failed to set event page\n"); -			amdgpu_amdkfd_gpuvm_unmap_gtt_bo_from_kernel(kfd->kgd, mem); +			amdgpu_amdkfd_gpuvm_unmap_gtt_bo_from_kernel(kfd->adev, mem);  			goto out_unlock;  		} @@ -1137,7 +1137,7 @@ static int kfd_ioctl_set_scratch_backing_va(struct file *filep,  	if (dev->dqm->sched_policy == KFD_SCHED_POLICY_NO_HWS &&  	    pdd->qpd.vmid != 0 && dev->kfd2kgd->set_scratch_backing_va)  		dev->kfd2kgd->set_scratch_backing_va( -			dev->kgd, args->va_addr, pdd->qpd.vmid); +			dev->adev, args->va_addr, pdd->qpd.vmid);  	return 0; @@ -1158,7 +1158,7 @@ static int kfd_ioctl_get_tile_config(struct file *filep,  	if (!dev)  		return -EINVAL; -	amdgpu_amdkfd_get_tile_config(dev->kgd, &config); +	amdgpu_amdkfd_get_tile_config(dev->adev, &config);  	args->gb_addr_config = config.gb_addr_config;  	args->num_banks = config.num_banks; @@ -1244,7 +1244,7 @@ bool kfd_dev_is_large_bar(struct kfd_dev *dev)  	if (dev->use_iommu_v2)  		return false; -	amdgpu_amdkfd_get_local_mem_info(dev->kgd, &mem_info); +	amdgpu_amdkfd_get_local_mem_info(dev->adev, &mem_info);  	if (mem_info.local_mem_size_private == 0 &&  			mem_info.local_mem_size_public > 0)  		return true; @@ -1313,7 +1313,7 @@ static int kfd_ioctl_alloc_memory_of_gpu(struct file *filep,  			err = -EINVAL;  			goto err_unlock;  		} -		offset = amdgpu_amdkfd_get_mmio_remap_phys_addr(dev->kgd); +		offset = dev->adev->rmmio_remap.bus_addr;  		if (!offset) {  			err = -ENOMEM;  			goto err_unlock; @@ -1321,7 +1321,7 @@ static int kfd_ioctl_alloc_memory_of_gpu(struct file *filep,  	}  	err = amdgpu_amdkfd_gpuvm_alloc_memory_of_gpu( -		dev->kgd, args->va_addr, args->size, +		dev->adev, args->va_addr, args->size,  		pdd->drm_priv, (struct kgd_mem **) &mem, &offset,  		flags); @@ -1353,7 +1353,7 @@ static int kfd_ioctl_alloc_memory_of_gpu(struct file *filep,  	return 0;  err_free: -	amdgpu_amdkfd_gpuvm_free_memory_of_gpu(dev->kgd, (struct kgd_mem *)mem, +	amdgpu_amdkfd_gpuvm_free_memory_of_gpu(dev->adev, (struct kgd_mem *)mem,  					       pdd->drm_priv, NULL);  err_unlock:  	mutex_unlock(&p->mutex); @@ -1399,7 +1399,7 @@ static int kfd_ioctl_free_memory_of_gpu(struct file *filep,  		goto err_unlock;  	} -	ret = amdgpu_amdkfd_gpuvm_free_memory_of_gpu(dev->kgd, +	ret = amdgpu_amdkfd_gpuvm_free_memory_of_gpu(dev->adev,  				(struct kgd_mem *)mem, pdd->drm_priv, &size);  	/* If freeing the buffer failed, leave the handle in place for @@ -1484,7 +1484,7 @@ static int kfd_ioctl_map_memory_to_gpu(struct file *filep,  			goto get_mem_obj_from_handle_failed;  		}  		err = amdgpu_amdkfd_gpuvm_map_memory_to_gpu( -			peer->kgd, (struct kgd_mem *)mem, +			peer->adev, (struct kgd_mem *)mem,  			peer_pdd->drm_priv, &table_freed);  		if (err) {  			pr_err("Failed to map to gpu %d/%d\n", @@ -1496,7 +1496,7 @@ static int kfd_ioctl_map_memory_to_gpu(struct file *filep,  	mutex_unlock(&p->mutex); -	err = amdgpu_amdkfd_gpuvm_sync_memory(dev->kgd, (struct kgd_mem *) mem, true); +	err = amdgpu_amdkfd_gpuvm_sync_memory(dev->adev, (struct kgd_mem *) mem, true);  	if (err) {  		pr_debug("Sync memory failed, wait interrupted by user signal\n");  		goto sync_memory_failed; @@ -1593,7 +1593,7 @@ static int kfd_ioctl_unmap_memory_from_gpu(struct file *filep,  			goto get_mem_obj_from_handle_failed;  		}  		err = amdgpu_amdkfd_gpuvm_unmap_memory_from_gpu( -			peer->kgd, (struct kgd_mem *)mem, peer_pdd->drm_priv); +			peer->adev, (struct kgd_mem *)mem, peer_pdd->drm_priv);  		if (err) {  			pr_err("Failed to unmap from gpu %d/%d\n",  			       i, args->n_devices); @@ -1603,8 +1603,8 @@ static int kfd_ioctl_unmap_memory_from_gpu(struct file *filep,  	}  	mutex_unlock(&p->mutex); -	if (dev->device_info->asic_family == CHIP_ALDEBARAN) { -		err = amdgpu_amdkfd_gpuvm_sync_memory(dev->kgd, +	if (KFD_GC_VERSION(dev) == IP_VERSION(9, 4, 2)) { +		err = amdgpu_amdkfd_gpuvm_sync_memory(dev->adev,  				(struct kgd_mem *) mem, true);  		if (err) {  			pr_debug("Sync memory failed, wait interrupted by user signal\n"); @@ -1680,7 +1680,7 @@ static int kfd_ioctl_get_dmabuf_info(struct file *filep,  {  	struct kfd_ioctl_get_dmabuf_info_args *args = data;  	struct kfd_dev *dev = NULL; -	struct kgd_dev *dma_buf_kgd; +	struct amdgpu_device *dmabuf_adev;  	void *metadata_buffer = NULL;  	uint32_t flags;  	unsigned int i; @@ -1700,15 +1700,15 @@ static int kfd_ioctl_get_dmabuf_info(struct file *filep,  	}  	/* Get dmabuf info from KGD */ -	r = amdgpu_amdkfd_get_dmabuf_info(dev->kgd, args->dmabuf_fd, -					  &dma_buf_kgd, &args->size, +	r = amdgpu_amdkfd_get_dmabuf_info(dev->adev, args->dmabuf_fd, +					  &dmabuf_adev, &args->size,  					  metadata_buffer, args->metadata_size,  					  &args->metadata_size, &flags);  	if (r)  		goto exit;  	/* Reverse-lookup gpu_id from kgd pointer */ -	dev = kfd_device_by_kgd(dma_buf_kgd); +	dev = kfd_device_by_adev(dmabuf_adev);  	if (!dev) {  		r = -EINVAL;  		goto exit; @@ -1758,7 +1758,7 @@ static int kfd_ioctl_import_dmabuf(struct file *filep,  		goto err_unlock;  	} -	r = amdgpu_amdkfd_gpuvm_import_dmabuf(dev->kgd, dmabuf, +	r = amdgpu_amdkfd_gpuvm_import_dmabuf(dev->adev, dmabuf,  					      args->va_addr, pdd->drm_priv,  					      (struct kgd_mem **)&mem, &size,  					      NULL); @@ -1779,7 +1779,7 @@ static int kfd_ioctl_import_dmabuf(struct file *filep,  	return 0;  err_free: -	amdgpu_amdkfd_gpuvm_free_memory_of_gpu(dev->kgd, (struct kgd_mem *)mem, +	amdgpu_amdkfd_gpuvm_free_memory_of_gpu(dev->adev, (struct kgd_mem *)mem,  					       pdd->drm_priv, NULL);  err_unlock:  	mutex_unlock(&p->mutex); @@ -2066,7 +2066,7 @@ static int kfd_mmio_mmap(struct kfd_dev *dev, struct kfd_process *process,  	if (vma->vm_end - vma->vm_start != PAGE_SIZE)  		return -EINVAL; -	address = amdgpu_amdkfd_get_mmio_remap_phys_addr(dev->kgd); +	address = dev->adev->rmmio_remap.bus_addr;  	vma->vm_flags |= VM_IO | VM_DONTCOPY | VM_DONTEXPAND | VM_NORESERVE |  				VM_DONTDUMP | VM_PFNMAP; diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_crat.c b/drivers/gpu/drm/amd/amdkfd/kfd_crat.c index cfedfb1e8596..9624bbe8b501 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_crat.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_crat.c @@ -1060,6 +1060,9 @@ static int kfd_parse_subtype_iolink(struct crat_subtype_iolink *iolink,  			return -ENODEV;  		/* same everything but the other direction */  		props2 = kmemdup(props, sizeof(*props2), GFP_KERNEL); +		if (!props2) +			return -ENOMEM; +  		props2->node_from = id_to;  		props2->node_to = id_from;  		props2->kobj = NULL; @@ -1340,7 +1343,7 @@ static int kfd_fill_gpu_cache_info(struct kfd_dev *kdev,  	int ret;  	unsigned int num_cu_shared; -	switch (kdev->device_info->asic_family) { +	switch (kdev->adev->asic_type) {  	case CHIP_KAVERI:  		pcache_info = kaveri_cache_info;  		num_of_cache_types = ARRAY_SIZE(kaveri_cache_info); @@ -1377,67 +1380,71 @@ static int kfd_fill_gpu_cache_info(struct kfd_dev *kdev,  		pcache_info = vegam_cache_info;  		num_of_cache_types = ARRAY_SIZE(vegam_cache_info);  		break; -	case CHIP_VEGA10: -		pcache_info = vega10_cache_info; -		num_of_cache_types = ARRAY_SIZE(vega10_cache_info); -		break; -	case CHIP_VEGA12: -		pcache_info = vega12_cache_info; -		num_of_cache_types = ARRAY_SIZE(vega12_cache_info); -		break; -	case CHIP_VEGA20: -	case CHIP_ARCTURUS: -		pcache_info = vega20_cache_info; -		num_of_cache_types = ARRAY_SIZE(vega20_cache_info); -		break; -	case CHIP_ALDEBARAN: -		pcache_info = aldebaran_cache_info; -		num_of_cache_types = ARRAY_SIZE(aldebaran_cache_info); -		break; -	case CHIP_RAVEN: -		pcache_info = raven_cache_info; -		num_of_cache_types = ARRAY_SIZE(raven_cache_info); -		break; -	case CHIP_RENOIR: -		pcache_info = renoir_cache_info; -		num_of_cache_types = ARRAY_SIZE(renoir_cache_info); -		break; -	case CHIP_NAVI10: -	case CHIP_NAVI12: -	case CHIP_CYAN_SKILLFISH: -		pcache_info = navi10_cache_info; -		num_of_cache_types = ARRAY_SIZE(navi10_cache_info); -		break; -	case CHIP_NAVI14: -		pcache_info = navi14_cache_info; -		num_of_cache_types = ARRAY_SIZE(navi14_cache_info); -		break; -	case CHIP_SIENNA_CICHLID: -		pcache_info = sienna_cichlid_cache_info; -		num_of_cache_types = ARRAY_SIZE(sienna_cichlid_cache_info); -		break; -	case CHIP_NAVY_FLOUNDER: -		pcache_info = navy_flounder_cache_info; -		num_of_cache_types = ARRAY_SIZE(navy_flounder_cache_info); -		break; -	case CHIP_DIMGREY_CAVEFISH: -		pcache_info = dimgrey_cavefish_cache_info; -		num_of_cache_types = ARRAY_SIZE(dimgrey_cavefish_cache_info); -		break; -	case CHIP_VANGOGH: -		pcache_info = vangogh_cache_info; -		num_of_cache_types = ARRAY_SIZE(vangogh_cache_info); -		break; -	case CHIP_BEIGE_GOBY: -		pcache_info = beige_goby_cache_info; -		num_of_cache_types = ARRAY_SIZE(beige_goby_cache_info); -		break; -	case CHIP_YELLOW_CARP: -		pcache_info = yellow_carp_cache_info; -		num_of_cache_types = ARRAY_SIZE(yellow_carp_cache_info); -		break;  	default: -		return -EINVAL; +		switch(KFD_GC_VERSION(kdev)) { +		case IP_VERSION(9, 0, 1): +			pcache_info = vega10_cache_info; +			num_of_cache_types = ARRAY_SIZE(vega10_cache_info); +			break; +		case IP_VERSION(9, 2, 1): +			pcache_info = vega12_cache_info; +			num_of_cache_types = ARRAY_SIZE(vega12_cache_info); +			break; +		case IP_VERSION(9, 4, 0): +		case IP_VERSION(9, 4, 1): +			pcache_info = vega20_cache_info; +			num_of_cache_types = ARRAY_SIZE(vega20_cache_info); +			break; +		case IP_VERSION(9, 4, 2): +			pcache_info = aldebaran_cache_info; +			num_of_cache_types = ARRAY_SIZE(aldebaran_cache_info); +			break; +		case IP_VERSION(9, 1, 0): +		case IP_VERSION(9, 2, 2): +			pcache_info = raven_cache_info; +			num_of_cache_types = ARRAY_SIZE(raven_cache_info); +			break; +		case IP_VERSION(9, 3, 0): +			pcache_info = renoir_cache_info; +			num_of_cache_types = ARRAY_SIZE(renoir_cache_info); +			break; +		case IP_VERSION(10, 1, 10): +		case IP_VERSION(10, 1, 2): +		case IP_VERSION(10, 1, 3): +			pcache_info = navi10_cache_info; +			num_of_cache_types = ARRAY_SIZE(navi10_cache_info); +			break; +		case IP_VERSION(10, 1, 1): +			pcache_info = navi14_cache_info; +			num_of_cache_types = ARRAY_SIZE(navi14_cache_info); +			break; +		case IP_VERSION(10, 3, 0): +			pcache_info = sienna_cichlid_cache_info; +			num_of_cache_types = ARRAY_SIZE(sienna_cichlid_cache_info); +			break; +		case IP_VERSION(10, 3, 2): +			pcache_info = navy_flounder_cache_info; +			num_of_cache_types = ARRAY_SIZE(navy_flounder_cache_info); +			break; +		case IP_VERSION(10, 3, 4): +			pcache_info = dimgrey_cavefish_cache_info; +			num_of_cache_types = ARRAY_SIZE(dimgrey_cavefish_cache_info); +			break; +		case IP_VERSION(10, 3, 1): +			pcache_info = vangogh_cache_info; +			num_of_cache_types = ARRAY_SIZE(vangogh_cache_info); +			break; +		case IP_VERSION(10, 3, 5): +			pcache_info = beige_goby_cache_info; +			num_of_cache_types = ARRAY_SIZE(beige_goby_cache_info); +			break; +		case IP_VERSION(10, 3, 3): +			pcache_info = yellow_carp_cache_info; +			num_of_cache_types = ARRAY_SIZE(yellow_carp_cache_info); +			break; +		default: +			return -EINVAL; +		}  	}  	*size_filled = 0; @@ -1963,8 +1970,6 @@ static int kfd_fill_gpu_direct_io_link_to_cpu(int *avail_size,  			struct crat_subtype_iolink *sub_type_hdr,  			uint32_t proximity_domain)  { -	struct amdgpu_device *adev = (struct amdgpu_device *)kdev->kgd; -  	*avail_size -= sizeof(struct crat_subtype_iolink);  	if (*avail_size < 0)  		return -ENOMEM; @@ -1981,7 +1986,7 @@ static int kfd_fill_gpu_direct_io_link_to_cpu(int *avail_size,  	/* Fill in IOLINK subtype.  	 * TODO: Fill-in other fields of iolink subtype  	 */ -	if (adev->gmc.xgmi.connected_to_cpu) { +	if (kdev->adev->gmc.xgmi.connected_to_cpu) {  		/*  		 * with host gpu xgmi link, host can access gpu memory whether  		 * or not pcie bar type is large, so always create bidirectional @@ -1990,19 +1995,19 @@ static int kfd_fill_gpu_direct_io_link_to_cpu(int *avail_size,  		sub_type_hdr->flags |= CRAT_IOLINK_FLAGS_BI_DIRECTIONAL;  		sub_type_hdr->io_interface_type = CRAT_IOLINK_TYPE_XGMI;  		sub_type_hdr->num_hops_xgmi = 1; -		if (adev->asic_type == CHIP_ALDEBARAN) { +		if (KFD_GC_VERSION(kdev) == IP_VERSION(9, 4, 2)) {  			sub_type_hdr->minimum_bandwidth_mbs =  					amdgpu_amdkfd_get_xgmi_bandwidth_mbytes( -							kdev->kgd, NULL, true); +							kdev->adev, NULL, true);  			sub_type_hdr->maximum_bandwidth_mbs =  					sub_type_hdr->minimum_bandwidth_mbs;  		}  	} else {  		sub_type_hdr->io_interface_type = CRAT_IOLINK_TYPE_PCIEXPRESS;  		sub_type_hdr->minimum_bandwidth_mbs = -				amdgpu_amdkfd_get_pcie_bandwidth_mbytes(kdev->kgd, true); +				amdgpu_amdkfd_get_pcie_bandwidth_mbytes(kdev->adev, true);  		sub_type_hdr->maximum_bandwidth_mbs = -				amdgpu_amdkfd_get_pcie_bandwidth_mbytes(kdev->kgd, false); +				amdgpu_amdkfd_get_pcie_bandwidth_mbytes(kdev->adev, false);  	}  	sub_type_hdr->proximity_domain_from = proximity_domain; @@ -2044,11 +2049,11 @@ static int kfd_fill_gpu_xgmi_link_to_gpu(int *avail_size,  	sub_type_hdr->proximity_domain_from = proximity_domain_from;  	sub_type_hdr->proximity_domain_to = proximity_domain_to;  	sub_type_hdr->num_hops_xgmi = -		amdgpu_amdkfd_get_xgmi_hops_count(kdev->kgd, peer_kdev->kgd); +		amdgpu_amdkfd_get_xgmi_hops_count(kdev->adev, peer_kdev->adev);  	sub_type_hdr->maximum_bandwidth_mbs = -		amdgpu_amdkfd_get_xgmi_bandwidth_mbytes(kdev->kgd, peer_kdev->kgd, false); +		amdgpu_amdkfd_get_xgmi_bandwidth_mbytes(kdev->adev, peer_kdev->adev, false);  	sub_type_hdr->minimum_bandwidth_mbs = sub_type_hdr->maximum_bandwidth_mbs ? -		amdgpu_amdkfd_get_xgmi_bandwidth_mbytes(kdev->kgd, NULL, true) : 0; +		amdgpu_amdkfd_get_xgmi_bandwidth_mbytes(kdev->adev, NULL, true) : 0;  	return 0;  } @@ -2114,7 +2119,7 @@ static int kfd_create_vcrat_image_gpu(void *pcrat_image,  	cu->flags |= CRAT_CU_FLAGS_GPU_PRESENT;  	cu->proximity_domain = proximity_domain; -	amdgpu_amdkfd_get_cu_info(kdev->kgd, &cu_info); +	amdgpu_amdkfd_get_cu_info(kdev->adev, &cu_info);  	cu->num_simd_per_cu = cu_info.simd_per_cu;  	cu->num_simd_cores = cu_info.simd_per_cu * cu_info.cu_active_number;  	cu->max_waves_simd = cu_info.max_waves_per_simd; @@ -2145,7 +2150,7 @@ static int kfd_create_vcrat_image_gpu(void *pcrat_image,  	 * report the total FB size (public+private) as a single  	 * private heap.  	 */ -	amdgpu_amdkfd_get_local_mem_info(kdev->kgd, &local_mem_info); +	amdgpu_amdkfd_get_local_mem_info(kdev->adev, &local_mem_info);  	sub_type_hdr = (typeof(sub_type_hdr))((char *)sub_type_hdr +  			sub_type_hdr->length); diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_dbgdev.c b/drivers/gpu/drm/amd/amdkfd/kfd_dbgdev.c index 159add0f5aaa..1e30717b5253 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_dbgdev.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_dbgdev.c @@ -41,7 +41,7 @@  static void dbgdev_address_watch_disable_nodiq(struct kfd_dev *dev)  { -	dev->kfd2kgd->address_watch_disable(dev->kgd); +	dev->kfd2kgd->address_watch_disable(dev->adev);  }  static int dbgdev_diq_submit_ib(struct kfd_dbgdev *dbgdev, @@ -322,7 +322,7 @@ static int dbgdev_address_watch_nodiq(struct kfd_dbgdev *dbgdev,  		pr_debug("\t\t%30s\n", "* * * * * * * * * * * * * * * * * *");  		pdd->dev->kfd2kgd->address_watch_execute( -						dbgdev->dev->kgd, +						dbgdev->dev->adev,  						i,  						cntl.u32All,  						addrHi.u32All, @@ -420,7 +420,7 @@ static int dbgdev_address_watch_diq(struct kfd_dbgdev *dbgdev,  		aw_reg_add_dword =  				dbgdev->dev->kfd2kgd->address_watch_get_offset( -					dbgdev->dev->kgd, +					dbgdev->dev->adev,  					i,  					ADDRESS_WATCH_REG_CNTL); @@ -431,7 +431,7 @@ static int dbgdev_address_watch_diq(struct kfd_dbgdev *dbgdev,  		aw_reg_add_dword =  				dbgdev->dev->kfd2kgd->address_watch_get_offset( -					dbgdev->dev->kgd, +					dbgdev->dev->adev,  					i,  					ADDRESS_WATCH_REG_ADDR_HI); @@ -441,7 +441,7 @@ static int dbgdev_address_watch_diq(struct kfd_dbgdev *dbgdev,  		aw_reg_add_dword =  				dbgdev->dev->kfd2kgd->address_watch_get_offset( -					dbgdev->dev->kgd, +					dbgdev->dev->adev,  					i,  					ADDRESS_WATCH_REG_ADDR_LO); @@ -457,7 +457,7 @@ static int dbgdev_address_watch_diq(struct kfd_dbgdev *dbgdev,  		aw_reg_add_dword =  				dbgdev->dev->kfd2kgd->address_watch_get_offset( -					dbgdev->dev->kgd, +					dbgdev->dev->adev,  					i,  					ADDRESS_WATCH_REG_CNTL); @@ -752,7 +752,7 @@ static int dbgdev_wave_control_nodiq(struct kfd_dbgdev *dbgdev,  	pr_debug("\t\t %30s\n", "* * * * * * * * * * * * * * * * * *"); -	return dbgdev->dev->kfd2kgd->wave_control_execute(dbgdev->dev->kgd, +	return dbgdev->dev->kfd2kgd->wave_control_execute(dbgdev->dev->adev,  							reg_gfx_index.u32All,  							reg_sq_cmd.u32All);  } @@ -784,7 +784,7 @@ int dbgdev_wave_reset_wavefronts(struct kfd_dev *dev, struct kfd_process *p)  	for (vmid = first_vmid_to_scan; vmid <= last_vmid_to_scan; vmid++) {  		status = dev->kfd2kgd->get_atc_vmid_pasid_mapping_info -				(dev->kgd, vmid, &queried_pasid); +				(dev->adev, vmid, &queried_pasid);  		if (status && queried_pasid == p->pasid) {  			pr_debug("Killing wave fronts of vmid %d and pasid 0x%x\n", @@ -811,7 +811,7 @@ int dbgdev_wave_reset_wavefronts(struct kfd_dev *dev, struct kfd_process *p)  	/* for non DIQ we need to patch the VMID: */  	reg_sq_cmd.bits.vm_id = vmid; -	dev->kfd2kgd->wave_control_execute(dev->kgd, +	dev->kfd2kgd->wave_control_execute(dev->adev,  					reg_gfx_index.u32All,  					reg_sq_cmd.u32All); diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device.c b/drivers/gpu/drm/amd/amdkfd/kfd_device.c index 3b119db16003..2b65d0acae2c 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_device.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_device.c @@ -53,770 +53,310 @@ extern const struct kfd2kgd_calls aldebaran_kfd2kgd;  extern const struct kfd2kgd_calls gfx_v10_kfd2kgd;  extern const struct kfd2kgd_calls gfx_v10_3_kfd2kgd; -#ifdef KFD_SUPPORT_IOMMU_V2 -static const struct kfd_device_info kaveri_device_info = { -	.asic_family = CHIP_KAVERI, -	.asic_name = "kaveri", -	.gfx_target_version = 70000, -	.max_pasid_bits = 16, -	/* max num of queues for KV.TODO should be a dynamic value */ -	.max_no_of_hqd	= 24, -	.doorbell_size  = 4, -	.ih_ring_entry_size = 4 * sizeof(uint32_t), -	.event_interrupt_class = &event_interrupt_class_cik, -	.num_of_watch_points = 4, -	.mqd_size_aligned = MQD_SIZE_ALIGNED, -	.supports_cwsr = false, -	.needs_iommu_device = true, -	.needs_pci_atomics = false, -	.num_sdma_engines = 2, -	.num_xgmi_sdma_engines = 0, -	.num_sdma_queues_per_engine = 2, -}; - -static const struct kfd_device_info carrizo_device_info = { -	.asic_family = CHIP_CARRIZO, -	.asic_name = "carrizo", -	.gfx_target_version = 80001, -	.max_pasid_bits = 16, -	/* max num of queues for CZ.TODO should be a dynamic value */ -	.max_no_of_hqd	= 24, -	.doorbell_size  = 4, -	.ih_ring_entry_size = 4 * sizeof(uint32_t), -	.event_interrupt_class = &event_interrupt_class_cik, -	.num_of_watch_points = 4, -	.mqd_size_aligned = MQD_SIZE_ALIGNED, -	.supports_cwsr = true, -	.needs_iommu_device = true, -	.needs_pci_atomics = false, -	.num_sdma_engines = 2, -	.num_xgmi_sdma_engines = 0, -	.num_sdma_queues_per_engine = 2, -}; - -static const struct kfd_device_info raven_device_info = { -	.asic_family = CHIP_RAVEN, -	.asic_name = "raven", -	.gfx_target_version = 90002, -	.max_pasid_bits = 16, -	.max_no_of_hqd  = 24, -	.doorbell_size  = 8, -	.ih_ring_entry_size = 8 * sizeof(uint32_t), -	.event_interrupt_class = &event_interrupt_class_v9, -	.num_of_watch_points = 4, -	.mqd_size_aligned = MQD_SIZE_ALIGNED, -	.supports_cwsr = true, -	.needs_iommu_device = true, -	.needs_pci_atomics = true, -	.num_sdma_engines = 1, -	.num_xgmi_sdma_engines = 0, -	.num_sdma_queues_per_engine = 2, -}; -#endif - -#ifdef CONFIG_DRM_AMDGPU_CIK -static const struct kfd_device_info hawaii_device_info = { -	.asic_family = CHIP_HAWAII, -	.asic_name = "hawaii", -	.gfx_target_version = 70001, -	.max_pasid_bits = 16, -	/* max num of queues for KV.TODO should be a dynamic value */ -	.max_no_of_hqd	= 24, -	.doorbell_size  = 4, -	.ih_ring_entry_size = 4 * sizeof(uint32_t), -	.event_interrupt_class = &event_interrupt_class_cik, -	.num_of_watch_points = 4, -	.mqd_size_aligned = MQD_SIZE_ALIGNED, -	.supports_cwsr = false, -	.needs_iommu_device = false, -	.needs_pci_atomics = false, -	.num_sdma_engines = 2, -	.num_xgmi_sdma_engines = 0, -	.num_sdma_queues_per_engine = 2, -}; -#endif - -static const struct kfd_device_info tonga_device_info = { -	.asic_family = CHIP_TONGA, -	.asic_name = "tonga", -	.gfx_target_version = 80002, -	.max_pasid_bits = 16, -	.max_no_of_hqd  = 24, -	.doorbell_size  = 4, -	.ih_ring_entry_size = 4 * sizeof(uint32_t), -	.event_interrupt_class = &event_interrupt_class_cik, -	.num_of_watch_points = 4, -	.mqd_size_aligned = MQD_SIZE_ALIGNED, -	.supports_cwsr = false, -	.needs_iommu_device = false, -	.needs_pci_atomics = true, -	.num_sdma_engines = 2, -	.num_xgmi_sdma_engines = 0, -	.num_sdma_queues_per_engine = 2, -}; - -static const struct kfd_device_info fiji_device_info = { -	.asic_family = CHIP_FIJI, -	.asic_name = "fiji", -	.gfx_target_version = 80003, -	.max_pasid_bits = 16, -	.max_no_of_hqd  = 24, -	.doorbell_size  = 4, -	.ih_ring_entry_size = 4 * sizeof(uint32_t), -	.event_interrupt_class = &event_interrupt_class_cik, -	.num_of_watch_points = 4, -	.mqd_size_aligned = MQD_SIZE_ALIGNED, -	.supports_cwsr = true, -	.needs_iommu_device = false, -	.needs_pci_atomics = true, -	.num_sdma_engines = 2, -	.num_xgmi_sdma_engines = 0, -	.num_sdma_queues_per_engine = 2, -}; - -static const struct kfd_device_info fiji_vf_device_info = { -	.asic_family = CHIP_FIJI, -	.asic_name = "fiji", -	.gfx_target_version = 80003, -	.max_pasid_bits = 16, -	.max_no_of_hqd  = 24, -	.doorbell_size  = 4, -	.ih_ring_entry_size = 4 * sizeof(uint32_t), -	.event_interrupt_class = &event_interrupt_class_cik, -	.num_of_watch_points = 4, -	.mqd_size_aligned = MQD_SIZE_ALIGNED, -	.supports_cwsr = true, -	.needs_iommu_device = false, -	.needs_pci_atomics = false, -	.num_sdma_engines = 2, -	.num_xgmi_sdma_engines = 0, -	.num_sdma_queues_per_engine = 2, -}; - - -static const struct kfd_device_info polaris10_device_info = { -	.asic_family = CHIP_POLARIS10, -	.asic_name = "polaris10", -	.gfx_target_version = 80003, -	.max_pasid_bits = 16, -	.max_no_of_hqd  = 24, -	.doorbell_size  = 4, -	.ih_ring_entry_size = 4 * sizeof(uint32_t), -	.event_interrupt_class = &event_interrupt_class_cik, -	.num_of_watch_points = 4, -	.mqd_size_aligned = MQD_SIZE_ALIGNED, -	.supports_cwsr = true, -	.needs_iommu_device = false, -	.needs_pci_atomics = true, -	.num_sdma_engines = 2, -	.num_xgmi_sdma_engines = 0, -	.num_sdma_queues_per_engine = 2, -}; - -static const struct kfd_device_info polaris10_vf_device_info = { -	.asic_family = CHIP_POLARIS10, -	.asic_name = "polaris10", -	.gfx_target_version = 80003, -	.max_pasid_bits = 16, -	.max_no_of_hqd  = 24, -	.doorbell_size  = 4, -	.ih_ring_entry_size = 4 * sizeof(uint32_t), -	.event_interrupt_class = &event_interrupt_class_cik, -	.num_of_watch_points = 4, -	.mqd_size_aligned = MQD_SIZE_ALIGNED, -	.supports_cwsr = true, -	.needs_iommu_device = false, -	.needs_pci_atomics = false, -	.num_sdma_engines = 2, -	.num_xgmi_sdma_engines = 0, -	.num_sdma_queues_per_engine = 2, -}; - -static const struct kfd_device_info polaris11_device_info = { -	.asic_family = CHIP_POLARIS11, -	.asic_name = "polaris11", -	.gfx_target_version = 80003, -	.max_pasid_bits = 16, -	.max_no_of_hqd  = 24, -	.doorbell_size  = 4, -	.ih_ring_entry_size = 4 * sizeof(uint32_t), -	.event_interrupt_class = &event_interrupt_class_cik, -	.num_of_watch_points = 4, -	.mqd_size_aligned = MQD_SIZE_ALIGNED, -	.supports_cwsr = true, -	.needs_iommu_device = false, -	.needs_pci_atomics = true, -	.num_sdma_engines = 2, -	.num_xgmi_sdma_engines = 0, -	.num_sdma_queues_per_engine = 2, -}; - -static const struct kfd_device_info polaris12_device_info = { -	.asic_family = CHIP_POLARIS12, -	.asic_name = "polaris12", -	.gfx_target_version = 80003, -	.max_pasid_bits = 16, -	.max_no_of_hqd  = 24, -	.doorbell_size  = 4, -	.ih_ring_entry_size = 4 * sizeof(uint32_t), -	.event_interrupt_class = &event_interrupt_class_cik, -	.num_of_watch_points = 4, -	.mqd_size_aligned = MQD_SIZE_ALIGNED, -	.supports_cwsr = true, -	.needs_iommu_device = false, -	.needs_pci_atomics = true, -	.num_sdma_engines = 2, -	.num_xgmi_sdma_engines = 0, -	.num_sdma_queues_per_engine = 2, -}; - -static const struct kfd_device_info vegam_device_info = { -	.asic_family = CHIP_VEGAM, -	.asic_name = "vegam", -	.gfx_target_version = 80003, -	.max_pasid_bits = 16, -	.max_no_of_hqd  = 24, -	.doorbell_size  = 4, -	.ih_ring_entry_size = 4 * sizeof(uint32_t), -	.event_interrupt_class = &event_interrupt_class_cik, -	.num_of_watch_points = 4, -	.mqd_size_aligned = MQD_SIZE_ALIGNED, -	.supports_cwsr = true, -	.needs_iommu_device = false, -	.needs_pci_atomics = true, -	.num_sdma_engines = 2, -	.num_xgmi_sdma_engines = 0, -	.num_sdma_queues_per_engine = 2, -}; - -static const struct kfd_device_info vega10_device_info = { -	.asic_family = CHIP_VEGA10, -	.asic_name = "vega10", -	.gfx_target_version = 90000, -	.max_pasid_bits = 16, -	.max_no_of_hqd  = 24, -	.doorbell_size  = 8, -	.ih_ring_entry_size = 8 * sizeof(uint32_t), -	.event_interrupt_class = &event_interrupt_class_v9, -	.num_of_watch_points = 4, -	.mqd_size_aligned = MQD_SIZE_ALIGNED, -	.supports_cwsr = true, -	.needs_iommu_device = false, -	.needs_pci_atomics = false, -	.num_sdma_engines = 2, -	.num_xgmi_sdma_engines = 0, -	.num_sdma_queues_per_engine = 2, -}; - -static const struct kfd_device_info vega10_vf_device_info = { -	.asic_family = CHIP_VEGA10, -	.asic_name = "vega10", -	.gfx_target_version = 90000, -	.max_pasid_bits = 16, -	.max_no_of_hqd  = 24, -	.doorbell_size  = 8, -	.ih_ring_entry_size = 8 * sizeof(uint32_t), -	.event_interrupt_class = &event_interrupt_class_v9, -	.num_of_watch_points = 4, -	.mqd_size_aligned = MQD_SIZE_ALIGNED, -	.supports_cwsr = true, -	.needs_iommu_device = false, -	.needs_pci_atomics = false, -	.num_sdma_engines = 2, -	.num_xgmi_sdma_engines = 0, -	.num_sdma_queues_per_engine = 2, -}; - -static const struct kfd_device_info vega12_device_info = { -	.asic_family = CHIP_VEGA12, -	.asic_name = "vega12", -	.gfx_target_version = 90004, -	.max_pasid_bits = 16, -	.max_no_of_hqd  = 24, -	.doorbell_size  = 8, -	.ih_ring_entry_size = 8 * sizeof(uint32_t), -	.event_interrupt_class = &event_interrupt_class_v9, -	.num_of_watch_points = 4, -	.mqd_size_aligned = MQD_SIZE_ALIGNED, -	.supports_cwsr = true, -	.needs_iommu_device = false, -	.needs_pci_atomics = false, -	.num_sdma_engines = 2, -	.num_xgmi_sdma_engines = 0, -	.num_sdma_queues_per_engine = 2, -}; - -static const struct kfd_device_info vega20_device_info = { -	.asic_family = CHIP_VEGA20, -	.asic_name = "vega20", -	.gfx_target_version = 90006, -	.max_pasid_bits = 16, -	.max_no_of_hqd	= 24, -	.doorbell_size	= 8, -	.ih_ring_entry_size = 8 * sizeof(uint32_t), -	.event_interrupt_class = &event_interrupt_class_v9, -	.num_of_watch_points = 4, -	.mqd_size_aligned = MQD_SIZE_ALIGNED, -	.supports_cwsr = true, -	.needs_iommu_device = false, -	.needs_pci_atomics = false, -	.num_sdma_engines = 2, -	.num_xgmi_sdma_engines = 0, -	.num_sdma_queues_per_engine = 8, -}; - -static const struct kfd_device_info arcturus_device_info = { -	.asic_family = CHIP_ARCTURUS, -	.asic_name = "arcturus", -	.gfx_target_version = 90008, -	.max_pasid_bits = 16, -	.max_no_of_hqd	= 24, -	.doorbell_size	= 8, -	.ih_ring_entry_size = 8 * sizeof(uint32_t), -	.event_interrupt_class = &event_interrupt_class_v9, -	.num_of_watch_points = 4, -	.mqd_size_aligned = MQD_SIZE_ALIGNED, -	.supports_cwsr = true, -	.needs_iommu_device = false, -	.needs_pci_atomics = false, -	.num_sdma_engines = 2, -	.num_xgmi_sdma_engines = 6, -	.num_sdma_queues_per_engine = 8, -}; - -static const struct kfd_device_info aldebaran_device_info = { -	.asic_family = CHIP_ALDEBARAN, -	.asic_name = "aldebaran", -	.gfx_target_version = 90010, -	.max_pasid_bits = 16, -	.max_no_of_hqd	= 24, -	.doorbell_size	= 8, -	.ih_ring_entry_size = 8 * sizeof(uint32_t), -	.event_interrupt_class = &event_interrupt_class_v9, -	.num_of_watch_points = 4, -	.mqd_size_aligned = MQD_SIZE_ALIGNED, -	.supports_cwsr = true, -	.needs_iommu_device = false, -	.needs_pci_atomics = false, -	.num_sdma_engines = 2, -	.num_xgmi_sdma_engines = 3, -	.num_sdma_queues_per_engine = 8, -}; - -static const struct kfd_device_info renoir_device_info = { -	.asic_family = CHIP_RENOIR, -	.asic_name = "renoir", -	.gfx_target_version = 90012, -	.max_pasid_bits = 16, -	.max_no_of_hqd  = 24, -	.doorbell_size  = 8, -	.ih_ring_entry_size = 8 * sizeof(uint32_t), -	.event_interrupt_class = &event_interrupt_class_v9, -	.num_of_watch_points = 4, -	.mqd_size_aligned = MQD_SIZE_ALIGNED, -	.supports_cwsr = true, -	.needs_iommu_device = false, -	.needs_pci_atomics = false, -	.num_sdma_engines = 1, -	.num_xgmi_sdma_engines = 0, -	.num_sdma_queues_per_engine = 2, -}; - -static const struct kfd_device_info navi10_device_info = { -	.asic_family = CHIP_NAVI10, -	.asic_name = "navi10", -	.gfx_target_version = 100100, -	.max_pasid_bits = 16, -	.max_no_of_hqd  = 24, -	.doorbell_size  = 8, -	.ih_ring_entry_size = 8 * sizeof(uint32_t), -	.event_interrupt_class = &event_interrupt_class_v9, -	.num_of_watch_points = 4, -	.mqd_size_aligned = MQD_SIZE_ALIGNED, -	.needs_iommu_device = false, -	.supports_cwsr = true, -	.needs_pci_atomics = true, -	.no_atomic_fw_version = 145, -	.num_sdma_engines = 2, -	.num_xgmi_sdma_engines = 0, -	.num_sdma_queues_per_engine = 8, -}; - -static const struct kfd_device_info navi12_device_info = { -	.asic_family = CHIP_NAVI12, -	.asic_name = "navi12", -	.gfx_target_version = 100101, -	.max_pasid_bits = 16, -	.max_no_of_hqd  = 24, -	.doorbell_size  = 8, -	.ih_ring_entry_size = 8 * sizeof(uint32_t), -	.event_interrupt_class = &event_interrupt_class_v9, -	.num_of_watch_points = 4, -	.mqd_size_aligned = MQD_SIZE_ALIGNED, -	.needs_iommu_device = false, -	.supports_cwsr = true, -	.needs_pci_atomics = true, -	.no_atomic_fw_version = 145, -	.num_sdma_engines = 2, -	.num_xgmi_sdma_engines = 0, -	.num_sdma_queues_per_engine = 8, -}; - -static const struct kfd_device_info navi14_device_info = { -	.asic_family = CHIP_NAVI14, -	.asic_name = "navi14", -	.gfx_target_version = 100102, -	.max_pasid_bits = 16, -	.max_no_of_hqd  = 24, -	.doorbell_size  = 8, -	.ih_ring_entry_size = 8 * sizeof(uint32_t), -	.event_interrupt_class = &event_interrupt_class_v9, -	.num_of_watch_points = 4, -	.mqd_size_aligned = MQD_SIZE_ALIGNED, -	.needs_iommu_device = false, -	.supports_cwsr = true, -	.needs_pci_atomics = true, -	.no_atomic_fw_version = 145, -	.num_sdma_engines = 2, -	.num_xgmi_sdma_engines = 0, -	.num_sdma_queues_per_engine = 8, -}; - -static const struct kfd_device_info sienna_cichlid_device_info = { -	.asic_family = CHIP_SIENNA_CICHLID, -	.asic_name = "sienna_cichlid", -	.gfx_target_version = 100300, -	.max_pasid_bits = 16, -	.max_no_of_hqd  = 24, -	.doorbell_size  = 8, -	.ih_ring_entry_size = 8 * sizeof(uint32_t), -	.event_interrupt_class = &event_interrupt_class_v9, -	.num_of_watch_points = 4, -	.mqd_size_aligned = MQD_SIZE_ALIGNED, -	.needs_iommu_device = false, -	.supports_cwsr = true, -	.needs_pci_atomics = true, -	.no_atomic_fw_version = 92, -	.num_sdma_engines = 4, -	.num_xgmi_sdma_engines = 0, -	.num_sdma_queues_per_engine = 8, -}; - -static const struct kfd_device_info navy_flounder_device_info = { -	.asic_family = CHIP_NAVY_FLOUNDER, -	.asic_name = "navy_flounder", -	.gfx_target_version = 100301, -	.max_pasid_bits = 16, -	.max_no_of_hqd  = 24, -	.doorbell_size  = 8, -	.ih_ring_entry_size = 8 * sizeof(uint32_t), -	.event_interrupt_class = &event_interrupt_class_v9, -	.num_of_watch_points = 4, -	.mqd_size_aligned = MQD_SIZE_ALIGNED, -	.needs_iommu_device = false, -	.supports_cwsr = true, -	.needs_pci_atomics = true, -	.no_atomic_fw_version = 92, -	.num_sdma_engines = 2, -	.num_xgmi_sdma_engines = 0, -	.num_sdma_queues_per_engine = 8, -}; - -static const struct kfd_device_info vangogh_device_info = { -	.asic_family = CHIP_VANGOGH, -	.asic_name = "vangogh", -	.gfx_target_version = 100303, -	.max_pasid_bits = 16, -	.max_no_of_hqd  = 24, -	.doorbell_size  = 8, -	.ih_ring_entry_size = 8 * sizeof(uint32_t), -	.event_interrupt_class = &event_interrupt_class_v9, -	.num_of_watch_points = 4, -	.mqd_size_aligned = MQD_SIZE_ALIGNED, -	.needs_iommu_device = false, -	.supports_cwsr = true, -	.needs_pci_atomics = true, -	.no_atomic_fw_version = 92, -	.num_sdma_engines = 1, -	.num_xgmi_sdma_engines = 0, -	.num_sdma_queues_per_engine = 2, -}; - -static const struct kfd_device_info dimgrey_cavefish_device_info = { -	.asic_family = CHIP_DIMGREY_CAVEFISH, -	.asic_name = "dimgrey_cavefish", -	.gfx_target_version = 100302, -	.max_pasid_bits = 16, -	.max_no_of_hqd  = 24, -	.doorbell_size  = 8, -	.ih_ring_entry_size = 8 * sizeof(uint32_t), -	.event_interrupt_class = &event_interrupt_class_v9, -	.num_of_watch_points = 4, -	.mqd_size_aligned = MQD_SIZE_ALIGNED, -	.needs_iommu_device = false, -	.supports_cwsr = true, -	.needs_pci_atomics = true, -	.no_atomic_fw_version = 92, -	.num_sdma_engines = 2, -	.num_xgmi_sdma_engines = 0, -	.num_sdma_queues_per_engine = 8, -}; - -static const struct kfd_device_info beige_goby_device_info = { -	.asic_family = CHIP_BEIGE_GOBY, -	.asic_name = "beige_goby", -	.gfx_target_version = 100304, -	.max_pasid_bits = 16, -	.max_no_of_hqd  = 24, -	.doorbell_size  = 8, -	.ih_ring_entry_size = 8 * sizeof(uint32_t), -	.event_interrupt_class = &event_interrupt_class_v9, -	.num_of_watch_points = 4, -	.mqd_size_aligned = MQD_SIZE_ALIGNED, -	.needs_iommu_device = false, -	.supports_cwsr = true, -	.needs_pci_atomics = true, -	.no_atomic_fw_version = 92, -	.num_sdma_engines = 1, -	.num_xgmi_sdma_engines = 0, -	.num_sdma_queues_per_engine = 8, -}; - -static const struct kfd_device_info yellow_carp_device_info = { -	.asic_family = CHIP_YELLOW_CARP, -	.asic_name = "yellow_carp", -	.gfx_target_version = 100305, -	.max_pasid_bits = 16, -	.max_no_of_hqd  = 24, -	.doorbell_size  = 8, -	.ih_ring_entry_size = 8 * sizeof(uint32_t), -	.event_interrupt_class = &event_interrupt_class_v9, -	.num_of_watch_points = 4, -	.mqd_size_aligned = MQD_SIZE_ALIGNED, -	.needs_iommu_device = false, -	.supports_cwsr = true, -	.needs_pci_atomics = true, -	.no_atomic_fw_version = 92, -	.num_sdma_engines = 1, -	.num_xgmi_sdma_engines = 0, -	.num_sdma_queues_per_engine = 2, -}; - -static const struct kfd_device_info cyan_skillfish_device_info = { -	.asic_family = CHIP_CYAN_SKILLFISH, -	.asic_name = "cyan_skillfish", -	.gfx_target_version = 100103, -	.max_pasid_bits = 16, -	.max_no_of_hqd  = 24, -	.doorbell_size  = 8, -	.ih_ring_entry_size = 8 * sizeof(uint32_t), -	.event_interrupt_class = &event_interrupt_class_v9, -	.num_of_watch_points = 4, -	.mqd_size_aligned = MQD_SIZE_ALIGNED, -	.needs_iommu_device = false, -	.supports_cwsr = true, -	.needs_pci_atomics = true, -	.num_sdma_engines = 2, -	.num_xgmi_sdma_engines = 0, -	.num_sdma_queues_per_engine = 8, -}; -  static int kfd_gtt_sa_init(struct kfd_dev *kfd, unsigned int buf_size,  				unsigned int chunk_size);  static void kfd_gtt_sa_fini(struct kfd_dev *kfd);  static int kfd_resume(struct kfd_dev *kfd); -struct kfd_dev *kgd2kfd_probe(struct kgd_dev *kgd, bool vf) +static void kfd_device_info_set_sdma_queue_num(struct kfd_dev *kfd)  { -	struct kfd_dev *kfd; -	const struct kfd_device_info *device_info; -	const struct kfd2kgd_calls *f2g; -	struct amdgpu_device *adev = (struct amdgpu_device *)kgd; +	uint32_t sdma_version = kfd->adev->ip_versions[SDMA0_HWIP][0]; + +	switch (sdma_version) { +		case IP_VERSION(4, 0, 0):/* VEGA10 */ +		case IP_VERSION(4, 0, 1):/* VEGA12 */ +		case IP_VERSION(4, 1, 0):/* RAVEN */ +		case IP_VERSION(4, 1, 1):/* RAVEN */ +		case IP_VERSION(4, 1, 2):/* RENOIR */ +		case IP_VERSION(5, 2, 1):/* VANGOGH */ +		case IP_VERSION(5, 2, 3):/* YELLOW_CARP */ +			kfd->device_info.num_sdma_queues_per_engine = 2; +			break; +		case IP_VERSION(4, 2, 0):/* VEGA20 */ +		case IP_VERSION(4, 2, 2):/* ARCTURUS */ +		case IP_VERSION(4, 4, 0):/* ALDEBARAN */ +		case IP_VERSION(5, 0, 0):/* NAVI10 */ +		case IP_VERSION(5, 0, 1):/* CYAN_SKILLFISH */ +		case IP_VERSION(5, 0, 2):/* NAVI14 */ +		case IP_VERSION(5, 0, 5):/* NAVI12 */ +		case IP_VERSION(5, 2, 0):/* SIENNA_CICHLID */ +		case IP_VERSION(5, 2, 2):/* NAVY_FLOUNDER */ +		case IP_VERSION(5, 2, 4):/* DIMGREY_CAVEFISH */ +		case IP_VERSION(5, 2, 5):/* BEIGE_GOBY */ +			kfd->device_info.num_sdma_queues_per_engine = 8; +			break; +		default: +			dev_warn(kfd_device, +				"Default sdma queue per engine(8) is set due to " +				"mismatch of sdma ip block(SDMA_HWIP:0x%x).\n", +                                sdma_version); +			kfd->device_info.num_sdma_queues_per_engine = 8; +	} +} + +static void kfd_device_info_set_event_interrupt_class(struct kfd_dev *kfd) +{ +	uint32_t gc_version = KFD_GC_VERSION(kfd); + +	switch (gc_version) { +	case IP_VERSION(9, 0, 1): /* VEGA10 */ +	case IP_VERSION(9, 1, 0): /* RAVEN */ +	case IP_VERSION(9, 2, 1): /* VEGA12 */ +	case IP_VERSION(9, 2, 2): /* RAVEN */ +	case IP_VERSION(9, 3, 0): /* RENOIR */ +	case IP_VERSION(9, 4, 0): /* VEGA20 */ +	case IP_VERSION(9, 4, 1): /* ARCTURUS */ +	case IP_VERSION(9, 4, 2): /* ALDEBARAN */ +	case IP_VERSION(10, 3, 1): /* VANGOGH */ +	case IP_VERSION(10, 3, 3): /* YELLOW_CARP */ +	case IP_VERSION(10, 1, 3): /* CYAN_SKILLFISH */ +	case IP_VERSION(10, 1, 10): /* NAVI10 */ +	case IP_VERSION(10, 1, 2): /* NAVI12 */ +	case IP_VERSION(10, 1, 1): /* NAVI14 */ +	case IP_VERSION(10, 3, 0): /* SIENNA_CICHLID */ +	case IP_VERSION(10, 3, 2): /* NAVY_FLOUNDER */ +	case IP_VERSION(10, 3, 4): /* DIMGREY_CAVEFISH */ +	case IP_VERSION(10, 3, 5): /* BEIGE_GOBY */ +		kfd->device_info.event_interrupt_class = &event_interrupt_class_v9; +		break; +	default: +		dev_warn(kfd_device, "v9 event interrupt handler is set due to " +			"mismatch of gc ip block(GC_HWIP:0x%x).\n", gc_version); +		kfd->device_info.event_interrupt_class = &event_interrupt_class_v9; +	} +} + +static void kfd_device_info_init(struct kfd_dev *kfd, +				 bool vf, uint32_t gfx_target_version) +{ +	uint32_t gc_version = KFD_GC_VERSION(kfd); +	uint32_t asic_type = kfd->adev->asic_type; + +	kfd->device_info.max_pasid_bits = 16; +	kfd->device_info.max_no_of_hqd = 24; +	kfd->device_info.num_of_watch_points = 4; +	kfd->device_info.mqd_size_aligned = MQD_SIZE_ALIGNED; +	kfd->device_info.gfx_target_version = gfx_target_version; + +	if (KFD_IS_SOC15(kfd)) { +		kfd->device_info.doorbell_size = 8; +		kfd->device_info.ih_ring_entry_size = 8 * sizeof(uint32_t); +		kfd->device_info.supports_cwsr = true; + +		kfd_device_info_set_sdma_queue_num(kfd); + +		kfd_device_info_set_event_interrupt_class(kfd); + +		/* Raven */ +		if (gc_version == IP_VERSION(9, 1, 0) || +		    gc_version == IP_VERSION(9, 2, 2)) +			kfd->device_info.needs_iommu_device = true; + +		if (gc_version < IP_VERSION(11, 0, 0)) { +			/* Navi2x+, Navi1x+ */ +			if (gc_version >= IP_VERSION(10, 3, 0)) +				kfd->device_info.no_atomic_fw_version = 92; +			else if (gc_version >= IP_VERSION(10, 1, 1)) +				kfd->device_info.no_atomic_fw_version = 145; + +			/* Navi1x+ */ +			if (gc_version >= IP_VERSION(10, 1, 1)) +				kfd->device_info.needs_pci_atomics = true; +		} +	} else { +		kfd->device_info.doorbell_size = 4; +		kfd->device_info.ih_ring_entry_size = 4 * sizeof(uint32_t); +		kfd->device_info.event_interrupt_class = &event_interrupt_class_cik; +		kfd->device_info.num_sdma_queues_per_engine = 2; + +		if (asic_type != CHIP_KAVERI && +		    asic_type != CHIP_HAWAII && +		    asic_type != CHIP_TONGA) +			kfd->device_info.supports_cwsr = true; + +		if (asic_type == CHIP_KAVERI || +		    asic_type == CHIP_CARRIZO) +			kfd->device_info.needs_iommu_device = true; + +		if (asic_type != CHIP_HAWAII && !vf) +			kfd->device_info.needs_pci_atomics = true; +	} +} + +struct kfd_dev *kgd2kfd_probe(struct amdgpu_device *adev, bool vf) +{ +	struct kfd_dev *kfd = NULL; +	const struct kfd2kgd_calls *f2g = NULL;  	struct pci_dev *pdev = adev->pdev; +	uint32_t gfx_target_version = 0;  	switch (adev->asic_type) {  #ifdef KFD_SUPPORT_IOMMU_V2  #ifdef CONFIG_DRM_AMDGPU_CIK  	case CHIP_KAVERI: -		if (vf) -			device_info = NULL; -		else -			device_info = &kaveri_device_info; -		f2g = &gfx_v7_kfd2kgd; +		gfx_target_version = 70000; +		if (!vf) +			f2g = &gfx_v7_kfd2kgd;  		break;  #endif  	case CHIP_CARRIZO: -		if (vf) -			device_info = NULL; -		else -			device_info = &carrizo_device_info; -		f2g = &gfx_v8_kfd2kgd; +		gfx_target_version = 80001; +		if (!vf) +			f2g = &gfx_v8_kfd2kgd;  		break;  #endif  #ifdef CONFIG_DRM_AMDGPU_CIK  	case CHIP_HAWAII: -		if (vf) -			device_info = NULL; -		else -			device_info = &hawaii_device_info; -		f2g = &gfx_v7_kfd2kgd; +		gfx_target_version = 70001; +		if (!amdgpu_exp_hw_support) +			pr_info( +	"KFD support on Hawaii is experimental. See modparam exp_hw_support\n" +				); +		else if (!vf) +			f2g = &gfx_v7_kfd2kgd;  		break;  #endif  	case CHIP_TONGA: -		if (vf) -			device_info = NULL; -		else -			device_info = &tonga_device_info; -		f2g = &gfx_v8_kfd2kgd; +		gfx_target_version = 80002; +		if (!vf) +			f2g = &gfx_v8_kfd2kgd;  		break;  	case CHIP_FIJI: -		if (vf) -			device_info = &fiji_vf_device_info; -		else -			device_info = &fiji_device_info; +		gfx_target_version = 80003;  		f2g = &gfx_v8_kfd2kgd;  		break;  	case CHIP_POLARIS10: -		if (vf) -			device_info = &polaris10_vf_device_info; -		else -			device_info = &polaris10_device_info; +		gfx_target_version = 80003;  		f2g = &gfx_v8_kfd2kgd;  		break;  	case CHIP_POLARIS11: -		if (vf) -			device_info = NULL; -		else -			device_info = &polaris11_device_info; -		f2g = &gfx_v8_kfd2kgd; +		gfx_target_version = 80003; +		if (!vf) +			f2g = &gfx_v8_kfd2kgd;  		break;  	case CHIP_POLARIS12: -		if (vf) -			device_info = NULL; -		else -			device_info = &polaris12_device_info; -		f2g = &gfx_v8_kfd2kgd; +		gfx_target_version = 80003; +		if (!vf) +			f2g = &gfx_v8_kfd2kgd;  		break;  	case CHIP_VEGAM: -		if (vf) -			device_info = NULL; -		else -			device_info = &vegam_device_info; -		f2g = &gfx_v8_kfd2kgd; +		gfx_target_version = 80003; +		if (!vf) +			f2g = &gfx_v8_kfd2kgd;  		break;  	default:  		switch (adev->ip_versions[GC_HWIP][0]) { +		/* Vega 10 */  		case IP_VERSION(9, 0, 1): -			if (vf) -				device_info = &vega10_vf_device_info; -			else -				device_info = &vega10_device_info; +			gfx_target_version = 90000;  			f2g = &gfx_v9_kfd2kgd;  			break;  #ifdef KFD_SUPPORT_IOMMU_V2 +		/* Raven */  		case IP_VERSION(9, 1, 0):  		case IP_VERSION(9, 2, 2): -			if (vf) -				device_info = NULL; -			else -				device_info = &raven_device_info; -			f2g = &gfx_v9_kfd2kgd; +			gfx_target_version = 90002; +			if (!vf) +				f2g = &gfx_v9_kfd2kgd;  			break;  #endif +		/* Vega12 */  		case IP_VERSION(9, 2, 1): -			if (vf) -				device_info = NULL; -			else -				device_info = &vega12_device_info; -			f2g = &gfx_v9_kfd2kgd; +			gfx_target_version = 90004; +			if (!vf) +				f2g = &gfx_v9_kfd2kgd;  			break; +		/* Renoir */  		case IP_VERSION(9, 3, 0): -			if (vf) -				device_info = NULL; -			else -				device_info = &renoir_device_info; -			f2g = &gfx_v9_kfd2kgd; +			gfx_target_version = 90012; +			if (!vf) +				f2g = &gfx_v9_kfd2kgd;  			break; +		/* Vega20 */  		case IP_VERSION(9, 4, 0): -			if (vf) -				device_info = NULL; -			else -				device_info = &vega20_device_info; -			f2g = &gfx_v9_kfd2kgd; +			gfx_target_version = 90006; +			if (!vf) +				f2g = &gfx_v9_kfd2kgd;  			break; +		/* Arcturus */  		case IP_VERSION(9, 4, 1): -			device_info = &arcturus_device_info; +			gfx_target_version = 90008;  			f2g = &arcturus_kfd2kgd;  			break; +		/* Aldebaran */  		case IP_VERSION(9, 4, 2): -			device_info = &aldebaran_device_info; +			gfx_target_version = 90010;  			f2g = &aldebaran_kfd2kgd;  			break; +		/* Navi10 */  		case IP_VERSION(10, 1, 10): -			if (vf) -				device_info = NULL; -			else -				device_info = &navi10_device_info; -			f2g = &gfx_v10_kfd2kgd; +			gfx_target_version = 100100; +			if (!vf) +				f2g = &gfx_v10_kfd2kgd;  			break; +		/* Navi12 */  		case IP_VERSION(10, 1, 2): -			device_info = &navi12_device_info; +			gfx_target_version = 100101;  			f2g = &gfx_v10_kfd2kgd;  			break; +		/* Navi14 */  		case IP_VERSION(10, 1, 1): -			if (vf) -				device_info = NULL; -			else -				device_info = &navi14_device_info; -			f2g = &gfx_v10_kfd2kgd; +			gfx_target_version = 100102; +			if (!vf) +				f2g = &gfx_v10_kfd2kgd;  			break; +		/* Cyan Skillfish */  		case IP_VERSION(10, 1, 3): -			if (vf) -				device_info = NULL; -			else -				device_info = &cyan_skillfish_device_info; -			f2g = &gfx_v10_kfd2kgd; +			gfx_target_version = 100103; +			if (!vf) +				f2g = &gfx_v10_kfd2kgd;  			break; +		/* Sienna Cichlid */  		case IP_VERSION(10, 3, 0): -			device_info = &sienna_cichlid_device_info; +			gfx_target_version = 100300;  			f2g = &gfx_v10_3_kfd2kgd;  			break; +		/* Navy Flounder */  		case IP_VERSION(10, 3, 2): -			device_info = &navy_flounder_device_info; +			gfx_target_version = 100301;  			f2g = &gfx_v10_3_kfd2kgd;  			break; +		/* Van Gogh */  		case IP_VERSION(10, 3, 1): -			if (vf) -				device_info = NULL; -			else -				device_info = &vangogh_device_info; -			f2g = &gfx_v10_3_kfd2kgd; +			gfx_target_version = 100303; +			if (!vf) +				f2g = &gfx_v10_3_kfd2kgd;  			break; +		/* Dimgrey Cavefish */  		case IP_VERSION(10, 3, 4): -			device_info = &dimgrey_cavefish_device_info; +			gfx_target_version = 100302;  			f2g = &gfx_v10_3_kfd2kgd;  			break; +		/* Beige Goby */  		case IP_VERSION(10, 3, 5): -			device_info = &beige_goby_device_info; +			gfx_target_version = 100304;  			f2g = &gfx_v10_3_kfd2kgd;  			break; +		/* Yellow Carp */  		case IP_VERSION(10, 3, 3): -			if (vf) -				device_info = NULL; -			else -				device_info = &yellow_carp_device_info; -			f2g = &gfx_v10_3_kfd2kgd; +			gfx_target_version = 100305; +			if (!vf) +				f2g = &gfx_v10_3_kfd2kgd;  			break;  		default: -			return NULL; +			break;  		}  		break;  	} -	if (!device_info || !f2g) { -		dev_err(kfd_device, "%s %s not supported in kfd\n", -			amdgpu_asic_name[adev->asic_type], vf ? "VF" : ""); +	if (!f2g) { +		if (adev->ip_versions[GC_HWIP][0]) +			dev_err(kfd_device, "GC IP %06x %s not supported in kfd\n", +				adev->ip_versions[GC_HWIP][0], vf ? "VF" : ""); +		else +			dev_err(kfd_device, "%s %s not supported in kfd\n", +				amdgpu_asic_name[adev->asic_type], vf ? "VF" : "");  		return NULL;  	} @@ -824,8 +364,8 @@ struct kfd_dev *kgd2kfd_probe(struct kgd_dev *kgd, bool vf)  	if (!kfd)  		return NULL; -	kfd->kgd = kgd; -	kfd->device_info = device_info; +	kfd->adev = adev; +	kfd_device_info_init(kfd, vf, gfx_target_version);  	kfd->pdev = pdev;  	kfd->init_complete = false;  	kfd->kfd2kgd = f2g; @@ -844,24 +384,24 @@ struct kfd_dev *kgd2kfd_probe(struct kgd_dev *kgd, bool vf)  static void kfd_cwsr_init(struct kfd_dev *kfd)  { -	if (cwsr_enable && kfd->device_info->supports_cwsr) { -		if (kfd->device_info->asic_family < CHIP_VEGA10) { +	if (cwsr_enable && kfd->device_info.supports_cwsr) { +		if (KFD_GC_VERSION(kfd) < IP_VERSION(9, 0, 1)) {  			BUILD_BUG_ON(sizeof(cwsr_trap_gfx8_hex) > PAGE_SIZE);  			kfd->cwsr_isa = cwsr_trap_gfx8_hex;  			kfd->cwsr_isa_size = sizeof(cwsr_trap_gfx8_hex); -		} else if (kfd->device_info->asic_family == CHIP_ARCTURUS) { +		} else if (KFD_GC_VERSION(kfd) == IP_VERSION(9, 4, 1)) {  			BUILD_BUG_ON(sizeof(cwsr_trap_arcturus_hex) > PAGE_SIZE);  			kfd->cwsr_isa = cwsr_trap_arcturus_hex;  			kfd->cwsr_isa_size = sizeof(cwsr_trap_arcturus_hex); -		} else if (kfd->device_info->asic_family == CHIP_ALDEBARAN) { +		} else if (KFD_GC_VERSION(kfd) == IP_VERSION(9, 4, 2)) {  			BUILD_BUG_ON(sizeof(cwsr_trap_aldebaran_hex) > PAGE_SIZE);  			kfd->cwsr_isa = cwsr_trap_aldebaran_hex;  			kfd->cwsr_isa_size = sizeof(cwsr_trap_aldebaran_hex); -		} else if (kfd->device_info->asic_family < CHIP_NAVI10) { +		} else if (KFD_GC_VERSION(kfd) < IP_VERSION(10, 1, 1)) {  			BUILD_BUG_ON(sizeof(cwsr_trap_gfx9_hex) > PAGE_SIZE);  			kfd->cwsr_isa = cwsr_trap_gfx9_hex;  			kfd->cwsr_isa_size = sizeof(cwsr_trap_gfx9_hex); -		} else if (kfd->device_info->asic_family < CHIP_SIENNA_CICHLID) { +		} else if (KFD_GC_VERSION(kfd) < IP_VERSION(10, 3, 0)) {  			BUILD_BUG_ON(sizeof(cwsr_trap_nv1x_hex) > PAGE_SIZE);  			kfd->cwsr_isa = cwsr_trap_nv1x_hex;  			kfd->cwsr_isa_size = sizeof(cwsr_trap_nv1x_hex); @@ -882,18 +422,17 @@ static int kfd_gws_init(struct kfd_dev *kfd)  	if (kfd->dqm->sched_policy == KFD_SCHED_POLICY_NO_HWS)  		return 0; -	if (hws_gws_support -		|| (kfd->device_info->asic_family == CHIP_VEGA10 -			&& kfd->mec2_fw_version >= 0x81b3) -		|| (kfd->device_info->asic_family >= CHIP_VEGA12 -			&& kfd->device_info->asic_family <= CHIP_RAVEN -			&& kfd->mec2_fw_version >= 0x1b3) -		|| (kfd->device_info->asic_family == CHIP_ARCTURUS -			&& kfd->mec2_fw_version >= 0x30) -		|| (kfd->device_info->asic_family == CHIP_ALDEBARAN -			&& kfd->mec2_fw_version >= 0x28)) -		ret = amdgpu_amdkfd_alloc_gws(kfd->kgd, -				amdgpu_amdkfd_get_num_gws(kfd->kgd), &kfd->gws); +	if (hws_gws_support || (KFD_IS_SOC15(kfd) && +		((KFD_GC_VERSION(kfd) == IP_VERSION(9, 0, 1) +			&& kfd->mec2_fw_version >= 0x81b3) || +		(KFD_GC_VERSION(kfd) <= IP_VERSION(9, 4, 0) +			&& kfd->mec2_fw_version >= 0x1b3)  || +		(KFD_GC_VERSION(kfd) == IP_VERSION(9, 4, 1) +			&& kfd->mec2_fw_version >= 0x30)   || +		(KFD_GC_VERSION(kfd) == IP_VERSION(9, 4, 2) +			&& kfd->mec2_fw_version >= 0x28)))) +		ret = amdgpu_amdkfd_alloc_gws(kfd->adev, +				kfd->adev->gds.gws_size, &kfd->gws);  	return ret;  } @@ -910,11 +449,11 @@ bool kgd2kfd_device_init(struct kfd_dev *kfd,  	unsigned int size, map_process_packet_size;  	kfd->ddev = ddev; -	kfd->mec_fw_version = amdgpu_amdkfd_get_fw_version(kfd->kgd, +	kfd->mec_fw_version = amdgpu_amdkfd_get_fw_version(kfd->adev,  			KGD_ENGINE_MEC1); -	kfd->mec2_fw_version = amdgpu_amdkfd_get_fw_version(kfd->kgd, +	kfd->mec2_fw_version = amdgpu_amdkfd_get_fw_version(kfd->adev,  			KGD_ENGINE_MEC2); -	kfd->sdma_fw_version = amdgpu_amdkfd_get_fw_version(kfd->kgd, +	kfd->sdma_fw_version = amdgpu_amdkfd_get_fw_version(kfd->adev,  			KGD_ENGINE_SDMA1);  	kfd->shared_resources = *gpu_resources; @@ -927,16 +466,16 @@ bool kgd2kfd_device_init(struct kfd_dev *kfd,  	 * 32 and 64-bit requests are possible and must be  	 * supported.  	 */ -	kfd->pci_atomic_requested = amdgpu_amdkfd_have_atomics_support(kfd->kgd); +	kfd->pci_atomic_requested = amdgpu_amdkfd_have_atomics_support(kfd->adev);  	if (!kfd->pci_atomic_requested && -	    kfd->device_info->needs_pci_atomics && -	    (!kfd->device_info->no_atomic_fw_version || -	     kfd->mec_fw_version < kfd->device_info->no_atomic_fw_version)) { +	    kfd->device_info.needs_pci_atomics && +	    (!kfd->device_info.no_atomic_fw_version || +	     kfd->mec_fw_version < kfd->device_info.no_atomic_fw_version)) {  		dev_info(kfd_device,  			 "skipped device %x:%x, PCI rejects atomics %d<%d\n",  			 kfd->pdev->vendor, kfd->pdev->device,  			 kfd->mec_fw_version, -			 kfd->device_info->no_atomic_fw_version); +			 kfd->device_info.no_atomic_fw_version);  		return false;  	} @@ -953,16 +492,15 @@ bool kgd2kfd_device_init(struct kfd_dev *kfd,  	/* calculate max size of mqds needed for queues */  	size = max_num_of_queues_per_device * -			kfd->device_info->mqd_size_aligned; +			kfd->device_info.mqd_size_aligned;  	/*  	 * calculate max size of runlist packet.  	 * There can be only 2 packets at once  	 */ -	map_process_packet_size = -			kfd->device_info->asic_family == CHIP_ALDEBARAN ? +	map_process_packet_size = KFD_GC_VERSION(kfd) == IP_VERSION(9, 4, 2) ?  				sizeof(struct pm4_mes_map_process_aldebaran) : -					sizeof(struct pm4_mes_map_process); +				sizeof(struct pm4_mes_map_process);  	size += (KFD_MAX_NUM_OF_PROCESSES * map_process_packet_size +  		max_num_of_queues_per_device * sizeof(struct pm4_mes_map_queues)  		+ sizeof(struct pm4_mes_runlist)) * 2; @@ -974,7 +512,7 @@ bool kgd2kfd_device_init(struct kfd_dev *kfd,  	size += 512 * 1024;  	if (amdgpu_amdkfd_alloc_gtt_mem( -			kfd->kgd, size, &kfd->gtt_mem, +			kfd->adev, size, &kfd->gtt_mem,  			&kfd->gtt_start_gpu_addr, &kfd->gtt_start_cpu_ptr,  			false)) {  		dev_err(kfd_device, "Could not allocate %d bytes\n", size); @@ -995,9 +533,9 @@ bool kgd2kfd_device_init(struct kfd_dev *kfd,  		goto kfd_doorbell_error;  	} -	kfd->hive_id = amdgpu_amdkfd_get_hive_id(kfd->kgd); +	kfd->hive_id = kfd->adev->gmc.xgmi.hive_id; -	kfd->noretry = amdgpu_amdkfd_get_noretry(kfd->kgd); +	kfd->noretry = kfd->adev->gmc.noretry;  	if (kfd_interrupt_init(kfd)) {  		dev_err(kfd_device, "Error initializing interrupts\n"); @@ -1015,7 +553,7 @@ bool kgd2kfd_device_init(struct kfd_dev *kfd,  	 */  	if (kfd_gws_init(kfd)) {  		dev_err(kfd_device, "Could not allocate %d gws\n", -			amdgpu_amdkfd_get_num_gws(kfd->kgd)); +			kfd->adev->gds.gws_size);  		goto gws_error;  	} @@ -1030,7 +568,7 @@ bool kgd2kfd_device_init(struct kfd_dev *kfd,  	kfd_cwsr_init(kfd); -	svm_migrate_init((struct amdgpu_device *)kfd->kgd); +	svm_migrate_init(kfd->adev);  	if(kgd2kfd_resume_iommu(kfd))  		goto device_iommu_error; @@ -1068,10 +606,10 @@ kfd_interrupt_error:  kfd_doorbell_error:  	kfd_gtt_sa_fini(kfd);  kfd_gtt_sa_init_error: -	amdgpu_amdkfd_free_gtt_mem(kfd->kgd, kfd->gtt_mem); +	amdgpu_amdkfd_free_gtt_mem(kfd->adev, kfd->gtt_mem);  alloc_gtt_mem_failure:  	if (kfd->gws) -		amdgpu_amdkfd_free_gws(kfd->kgd, kfd->gws); +		amdgpu_amdkfd_free_gws(kfd->adev, kfd->gws);  	dev_err(kfd_device,  		"device %x:%x NOT added due to errors\n",  		kfd->pdev->vendor, kfd->pdev->device); @@ -1088,9 +626,9 @@ void kgd2kfd_device_exit(struct kfd_dev *kfd)  		kfd_doorbell_fini(kfd);  		ida_destroy(&kfd->doorbell_ida);  		kfd_gtt_sa_fini(kfd); -		amdgpu_amdkfd_free_gtt_mem(kfd->kgd, kfd->gtt_mem); +		amdgpu_amdkfd_free_gtt_mem(kfd->adev, kfd->gtt_mem);  		if (kfd->gws) -			amdgpu_amdkfd_free_gws(kfd->kgd, kfd->gws); +			amdgpu_amdkfd_free_gws(kfd->adev, kfd->gws);  	}  	kfree(kfd); @@ -1229,7 +767,7 @@ void kgd2kfd_interrupt(struct kfd_dev *kfd, const void *ih_ring_entry)  	if (!kfd->init_complete)  		return; -	if (kfd->device_info->ih_ring_entry_size > sizeof(patched_ihre)) { +	if (kfd->device_info.ih_ring_entry_size > sizeof(patched_ihre)) {  		dev_err_once(kfd_device, "Ring entry too small\n");  		return;  	} @@ -1526,7 +1064,7 @@ void kgd2kfd_set_sram_ecc_flag(struct kfd_dev *kfd)  void kfd_inc_compute_active(struct kfd_dev *kfd)  {  	if (atomic_inc_return(&kfd->compute_profile) == 1) -		amdgpu_amdkfd_set_compute_idle(kfd->kgd, false); +		amdgpu_amdkfd_set_compute_idle(kfd->adev, false);  }  void kfd_dec_compute_active(struct kfd_dev *kfd) @@ -1534,7 +1072,7 @@ void kfd_dec_compute_active(struct kfd_dev *kfd)  	int count = atomic_dec_return(&kfd->compute_profile);  	if (count == 0) -		amdgpu_amdkfd_set_compute_idle(kfd->kgd, true); +		amdgpu_amdkfd_set_compute_idle(kfd->adev, true);  	WARN_ONCE(count < 0, "Compute profile ref. count error");  } @@ -1544,6 +1082,26 @@ void kgd2kfd_smi_event_throttle(struct kfd_dev *kfd, uint64_t throttle_bitmask)  		kfd_smi_event_update_thermal_throttling(kfd, throttle_bitmask);  } +/* kfd_get_num_sdma_engines returns the number of PCIe optimized SDMA and + * kfd_get_num_xgmi_sdma_engines returns the number of XGMI SDMA. + * When the device has more than two engines, we reserve two for PCIe to enable + * full-duplex and the rest are used as XGMI. + */ +unsigned int kfd_get_num_sdma_engines(struct kfd_dev *kdev) +{ +	/* If XGMI is not supported, all SDMA engines are PCIe */ +	if (!kdev->adev->gmc.xgmi.supported) +		return kdev->adev->sdma.num_instances; + +	return min(kdev->adev->sdma.num_instances, 2); +} + +unsigned int kfd_get_num_xgmi_sdma_engines(struct kfd_dev *kdev) +{ +	/* After reserved for PCIe, the rest of engines are XGMI */ +	return kdev->adev->sdma.num_instances - kfd_get_num_sdma_engines(kdev); +} +  #if defined(CONFIG_DEBUG_FS)  /* This function will send a package to HIQ to hang the HWS diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c index 003ba6a373ff..4b6814949aad 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c @@ -47,7 +47,7 @@ static int execute_queues_cpsch(struct device_queue_manager *dqm,  				uint32_t filter_param);  static int unmap_queues_cpsch(struct device_queue_manager *dqm,  				enum kfd_unmap_queues_filter filter, -				uint32_t filter_param); +				uint32_t filter_param, bool reset);  static int map_queues_cpsch(struct device_queue_manager *dqm); @@ -99,38 +99,29 @@ unsigned int get_pipes_per_mec(struct device_queue_manager *dqm)  	return dqm->dev->shared_resources.num_pipe_per_mec;  } -static unsigned int get_num_sdma_engines(struct device_queue_manager *dqm) -{ -	return dqm->dev->device_info->num_sdma_engines; -} - -static unsigned int get_num_xgmi_sdma_engines(struct device_queue_manager *dqm) -{ -	return dqm->dev->device_info->num_xgmi_sdma_engines; -} -  static unsigned int get_num_all_sdma_engines(struct device_queue_manager *dqm)  { -	return get_num_sdma_engines(dqm) + get_num_xgmi_sdma_engines(dqm); +	return kfd_get_num_sdma_engines(dqm->dev) + +		kfd_get_num_xgmi_sdma_engines(dqm->dev);  }  unsigned int get_num_sdma_queues(struct device_queue_manager *dqm)  { -	return dqm->dev->device_info->num_sdma_engines -			* dqm->dev->device_info->num_sdma_queues_per_engine; +	return kfd_get_num_sdma_engines(dqm->dev) * +		dqm->dev->device_info.num_sdma_queues_per_engine;  }  unsigned int get_num_xgmi_sdma_queues(struct device_queue_manager *dqm)  { -	return dqm->dev->device_info->num_xgmi_sdma_engines -			* dqm->dev->device_info->num_sdma_queues_per_engine; +	return kfd_get_num_xgmi_sdma_engines(dqm->dev) * +		dqm->dev->device_info.num_sdma_queues_per_engine;  }  void program_sh_mem_settings(struct device_queue_manager *dqm,  					struct qcm_process_device *qpd)  {  	return dqm->dev->kfd2kgd->program_sh_mem_settings( -						dqm->dev->kgd, qpd->vmid, +						dqm->dev->adev, qpd->vmid,  						qpd->sh_mem_config,  						qpd->sh_mem_ape1_base,  						qpd->sh_mem_ape1_limit, @@ -157,7 +148,7 @@ static int allocate_doorbell(struct qcm_process_device *qpd, struct queue *q)  {  	struct kfd_dev *dev = qpd->dqm->dev; -	if (!KFD_IS_SOC15(dev->device_info->asic_family)) { +	if (!KFD_IS_SOC15(dev)) {  		/* On pre-SOC15 chips we need to use the queue ID to  		 * preserve the user mode ABI.  		 */ @@ -202,7 +193,7 @@ static void deallocate_doorbell(struct qcm_process_device *qpd,  	unsigned int old;  	struct kfd_dev *dev = qpd->dqm->dev; -	if (!KFD_IS_SOC15(dev->device_info->asic_family) || +	if (!KFD_IS_SOC15(dev) ||  	    q->properties.type == KFD_QUEUE_TYPE_SDMA ||  	    q->properties.type == KFD_QUEUE_TYPE_SDMA_XGMI)  		return; @@ -216,7 +207,7 @@ static void program_trap_handler_settings(struct device_queue_manager *dqm,  {  	if (dqm->dev->kfd2kgd->program_trap_handler_settings)  		dqm->dev->kfd2kgd->program_trap_handler_settings( -						dqm->dev->kgd, qpd->vmid, +						dqm->dev->adev, qpd->vmid,  						qpd->tba_addr, qpd->tma_addr);  } @@ -250,21 +241,20 @@ static int allocate_vmid(struct device_queue_manager *dqm,  	program_sh_mem_settings(dqm, qpd); -	if (dqm->dev->device_info->asic_family >= CHIP_VEGA10 && -	    dqm->dev->cwsr_enabled) +	if (KFD_IS_SOC15(dqm->dev) && dqm->dev->cwsr_enabled)  		program_trap_handler_settings(dqm, qpd);  	/* qpd->page_table_base is set earlier when register_process()  	 * is called, i.e. when the first queue is created.  	 */ -	dqm->dev->kfd2kgd->set_vm_context_page_table_base(dqm->dev->kgd, +	dqm->dev->kfd2kgd->set_vm_context_page_table_base(dqm->dev->adev,  			qpd->vmid,  			qpd->page_table_base);  	/* invalidate the VM context after pasid and vmid mapping is set up */  	kfd_flush_tlb(qpd_to_pdd(qpd), TLB_FLUSH_LEGACY);  	if (dqm->dev->kfd2kgd->set_scratch_backing_va) -		dqm->dev->kfd2kgd->set_scratch_backing_va(dqm->dev->kgd, +		dqm->dev->kfd2kgd->set_scratch_backing_va(dqm->dev->adev,  				qpd->sh_hidden_private_base, qpd->vmid);  	return 0; @@ -283,7 +273,7 @@ static int flush_texture_cache_nocpsch(struct kfd_dev *kdev,  	if (ret)  		return ret; -	return amdgpu_amdkfd_submit_ib(kdev->kgd, KGD_ENGINE_MEC1, qpd->vmid, +	return amdgpu_amdkfd_submit_ib(kdev->adev, KGD_ENGINE_MEC1, qpd->vmid,  				qpd->ib_base, (uint32_t *)qpd->ib_kaddr,  				pmf->release_mem_size / sizeof(uint32_t));  } @@ -293,7 +283,7 @@ static void deallocate_vmid(struct device_queue_manager *dqm,  				struct queue *q)  {  	/* On GFX v7, CP doesn't flush TC at dequeue */ -	if (q->device->device_info->asic_family == CHIP_HAWAII) +	if (q->device->adev->asic_type == CHIP_HAWAII)  		if (flush_texture_cache_nocpsch(q->device, qpd))  			pr_err("Failed to flush TC\n"); @@ -580,7 +570,7 @@ static int update_queue(struct device_queue_manager *dqm, struct queue *q,  	/* Make sure the queue is unmapped before updating the MQD */  	if (dqm->sched_policy != KFD_SCHED_POLICY_NO_HWS) {  		retval = unmap_queues_cpsch(dqm, -				KFD_UNMAP_QUEUES_FILTER_DYNAMIC_QUEUES, 0); +				KFD_UNMAP_QUEUES_FILTER_DYNAMIC_QUEUES, 0, false);  		if (retval) {  			pr_err("unmap queue failed\n");  			goto out_unlock; @@ -776,7 +766,7 @@ static int restore_process_queues_nocpsch(struct device_queue_manager *dqm,  	if (!list_empty(&qpd->queues_list)) {  		dqm->dev->kfd2kgd->set_vm_context_page_table_base( -				dqm->dev->kgd, +				dqm->dev->adev,  				qpd->vmid,  				qpd->page_table_base);  		kfd_flush_tlb(pdd, TLB_FLUSH_LEGACY); @@ -954,7 +944,7 @@ set_pasid_vmid_mapping(struct device_queue_manager *dqm, u32 pasid,  			unsigned int vmid)  {  	return dqm->dev->kfd2kgd->set_pasid_vmid_mapping( -						dqm->dev->kgd, pasid, vmid); +						dqm->dev->adev, pasid, vmid);  }  static void init_interrupts(struct device_queue_manager *dqm) @@ -963,7 +953,7 @@ static void init_interrupts(struct device_queue_manager *dqm)  	for (i = 0 ; i < get_pipes_per_mec(dqm) ; i++)  		if (is_pipe_enabled(dqm, 0, i)) -			dqm->dev->kfd2kgd->init_interrupts(dqm->dev->kgd, i); +			dqm->dev->kfd2kgd->init_interrupts(dqm->dev->adev, i);  }  static int initialize_nocpsch(struct device_queue_manager *dqm) @@ -1014,19 +1004,22 @@ static void uninitialize(struct device_queue_manager *dqm)  static int start_nocpsch(struct device_queue_manager *dqm)  { +	int r = 0; +  	pr_info("SW scheduler is used");  	init_interrupts(dqm); -	if (dqm->dev->device_info->asic_family == CHIP_HAWAII) -		return pm_init(&dqm->packet_mgr, dqm); -	dqm->sched_running = true; +	if (dqm->dev->adev->asic_type == CHIP_HAWAII) +		r = pm_init(&dqm->packet_mgr, dqm); +	if (!r) +		dqm->sched_running = true; -	return 0; +	return r;  }  static int stop_nocpsch(struct device_queue_manager *dqm)  { -	if (dqm->dev->device_info->asic_family == CHIP_HAWAII) +	if (dqm->dev->adev->asic_type == CHIP_HAWAII)  		pm_uninit(&dqm->packet_mgr, false);  	dqm->sched_running = false; @@ -1055,9 +1048,9 @@ static int allocate_sdma_queue(struct device_queue_manager *dqm,  		dqm->sdma_bitmap &= ~(1ULL << bit);  		q->sdma_id = bit;  		q->properties.sdma_engine_id = q->sdma_id % -				get_num_sdma_engines(dqm); +				kfd_get_num_sdma_engines(dqm->dev);  		q->properties.sdma_queue_id = q->sdma_id / -				get_num_sdma_engines(dqm); +				kfd_get_num_sdma_engines(dqm->dev);  	} else if (q->properties.type == KFD_QUEUE_TYPE_SDMA_XGMI) {  		if (dqm->xgmi_sdma_bitmap == 0) {  			pr_err("No more XGMI SDMA queue to allocate\n"); @@ -1072,10 +1065,11 @@ static int allocate_sdma_queue(struct device_queue_manager *dqm,  		 * assumes the first N engines are always  		 * PCIe-optimized ones  		 */ -		q->properties.sdma_engine_id = get_num_sdma_engines(dqm) + -				q->sdma_id % get_num_xgmi_sdma_engines(dqm); +		q->properties.sdma_engine_id = +			kfd_get_num_sdma_engines(dqm->dev) + +			q->sdma_id % kfd_get_num_xgmi_sdma_engines(dqm->dev);  		q->properties.sdma_queue_id = q->sdma_id / -				get_num_xgmi_sdma_engines(dqm); +			kfd_get_num_xgmi_sdma_engines(dqm->dev);  	}  	pr_debug("SDMA engine id: %d\n", q->properties.sdma_engine_id); @@ -1132,7 +1126,7 @@ static int set_sched_resources(struct device_queue_manager *dqm)  		res.queue_mask |= 1ull  			<< amdgpu_queue_mask_bit_to_set_resource_bit( -				(struct amdgpu_device *)dqm->dev->kgd, i); +				dqm->dev->adev, i);  	}  	res.gws_mask = ~0ull;  	res.oac_mask = res.gds_heap_base = res.gds_heap_size = 0; @@ -1226,8 +1220,13 @@ static int stop_cpsch(struct device_queue_manager *dqm)  	bool hanging;  	dqm_lock(dqm); +	if (!dqm->sched_running) { +		dqm_unlock(dqm); +		return 0; +	} +  	if (!dqm->is_hws_hang) -		unmap_queues_cpsch(dqm, KFD_UNMAP_QUEUES_FILTER_ALL_QUEUES, 0); +		unmap_queues_cpsch(dqm, KFD_UNMAP_QUEUES_FILTER_ALL_QUEUES, 0, false);  	hanging = dqm->is_hws_hang || dqm->is_resetting;  	dqm->sched_running = false; @@ -1423,7 +1422,7 @@ static int map_queues_cpsch(struct device_queue_manager *dqm)  /* dqm->lock mutex has to be locked before calling this function */  static int unmap_queues_cpsch(struct device_queue_manager *dqm,  				enum kfd_unmap_queues_filter filter, -				uint32_t filter_param) +				uint32_t filter_param, bool reset)  {  	int retval = 0;  	struct mqd_manager *mqd_mgr; @@ -1436,7 +1435,7 @@ static int unmap_queues_cpsch(struct device_queue_manager *dqm,  		return retval;  	retval = pm_send_unmap_queue(&dqm->packet_mgr, KFD_QUEUE_TYPE_COMPUTE, -			filter, filter_param, false, 0); +			filter, filter_param, reset, 0);  	if (retval)  		return retval; @@ -1480,6 +1479,21 @@ static int unmap_queues_cpsch(struct device_queue_manager *dqm,  	return retval;  } +/* only for compute queue */ +static int reset_queues_cpsch(struct device_queue_manager *dqm, +			uint16_t pasid) +{ +	int retval; + +	dqm_lock(dqm); + +	retval = unmap_queues_cpsch(dqm, KFD_UNMAP_QUEUES_FILTER_BY_PASID, +			pasid, true); + +	dqm_unlock(dqm); +	return retval; +} +  /* dqm->lock mutex has to be locked before calling this function */  static int execute_queues_cpsch(struct device_queue_manager *dqm,  				enum kfd_unmap_queues_filter filter, @@ -1489,7 +1503,7 @@ static int execute_queues_cpsch(struct device_queue_manager *dqm,  	if (dqm->is_hws_hang)  		return -EIO; -	retval = unmap_queues_cpsch(dqm, filter, filter_param); +	retval = unmap_queues_cpsch(dqm, filter, filter_param, false);  	if (retval)  		return retval; @@ -1842,10 +1856,10 @@ static int allocate_hiq_sdma_mqd(struct device_queue_manager *dqm)  	struct kfd_mem_obj *mem_obj = &dqm->hiq_sdma_mqd;  	uint32_t size = dqm->mqd_mgrs[KFD_MQD_TYPE_SDMA]->mqd_size *  		get_num_all_sdma_engines(dqm) * -		dev->device_info->num_sdma_queues_per_engine + +		dev->device_info.num_sdma_queues_per_engine +  		dqm->mqd_mgrs[KFD_MQD_TYPE_HIQ]->mqd_size; -	retval = amdgpu_amdkfd_alloc_gtt_mem(dev->kgd, size, +	retval = amdgpu_amdkfd_alloc_gtt_mem(dev->adev, size,  		&(mem_obj->gtt_mem), &(mem_obj->gpu_addr),  		(void *)&(mem_obj->cpu_ptr), false); @@ -1862,7 +1876,7 @@ struct device_queue_manager *device_queue_manager_init(struct kfd_dev *dev)  	if (!dqm)  		return NULL; -	switch (dev->device_info->asic_family) { +	switch (dev->adev->asic_type) {  	/* HWS is not available on Hawaii. */  	case CHIP_HAWAII:  	/* HWS depends on CWSR for timely dequeue. CWSR is not @@ -1900,6 +1914,7 @@ struct device_queue_manager *device_queue_manager_init(struct kfd_dev *dev)  		dqm->ops.evict_process_queues = evict_process_queues_cpsch;  		dqm->ops.restore_process_queues = restore_process_queues_cpsch;  		dqm->ops.get_wave_state = get_wave_state; +		dqm->ops.reset_queues = reset_queues_cpsch;  		break;  	case KFD_SCHED_POLICY_NO_HWS:  		/* initialize dqm for no cp scheduling */ @@ -1925,7 +1940,7 @@ struct device_queue_manager *device_queue_manager_init(struct kfd_dev *dev)  		goto out_free;  	} -	switch (dev->device_info->asic_family) { +	switch (dev->adev->asic_type) {  	case CHIP_CARRIZO:  		device_queue_manager_init_vi(&dqm->asic_ops);  		break; @@ -1947,31 +1962,16 @@ struct device_queue_manager *device_queue_manager_init(struct kfd_dev *dev)  		device_queue_manager_init_vi_tonga(&dqm->asic_ops);  		break; -	case CHIP_VEGA10: -	case CHIP_VEGA12: -	case CHIP_VEGA20: -	case CHIP_RAVEN: -	case CHIP_RENOIR: -	case CHIP_ARCTURUS: -	case CHIP_ALDEBARAN: -		device_queue_manager_init_v9(&dqm->asic_ops); -		break; -	case CHIP_NAVI10: -	case CHIP_NAVI12: -	case CHIP_NAVI14: -	case CHIP_SIENNA_CICHLID: -	case CHIP_NAVY_FLOUNDER: -	case CHIP_VANGOGH: -	case CHIP_DIMGREY_CAVEFISH: -	case CHIP_BEIGE_GOBY: -	case CHIP_YELLOW_CARP: -	case CHIP_CYAN_SKILLFISH: -		device_queue_manager_init_v10_navi10(&dqm->asic_ops); -		break;  	default: -		WARN(1, "Unexpected ASIC family %u", -		     dev->device_info->asic_family); -		goto out_free; +		if (KFD_GC_VERSION(dev) >= IP_VERSION(10, 1, 1)) +			device_queue_manager_init_v10_navi10(&dqm->asic_ops); +		else if (KFD_GC_VERSION(dev) >= IP_VERSION(9, 0, 1)) +			device_queue_manager_init_v9(&dqm->asic_ops); +		else { +			WARN(1, "Unexpected ASIC family %u", +			     dev->adev->asic_type); +			goto out_free; +		}  	}  	if (init_mqd_managers(dqm)) @@ -1995,7 +1995,7 @@ static void deallocate_hiq_sdma_mqd(struct kfd_dev *dev,  {  	WARN(!mqd, "No hiq sdma mqd trunk to free"); -	amdgpu_amdkfd_free_gtt_mem(dev->kgd, mqd->gtt_mem); +	amdgpu_amdkfd_free_gtt_mem(dev->adev, mqd->gtt_mem);  }  void device_queue_manager_uninit(struct device_queue_manager *dqm) @@ -2026,7 +2026,7 @@ static void kfd_process_hw_exception(struct work_struct *work)  {  	struct device_queue_manager *dqm = container_of(work,  			struct device_queue_manager, hw_exception_work); -	amdgpu_amdkfd_gpu_reset(dqm->dev->kgd); +	amdgpu_amdkfd_gpu_reset(dqm->dev->adev);  }  #if defined(CONFIG_DEBUG_FS) @@ -2065,7 +2065,7 @@ int dqm_debugfs_hqds(struct seq_file *m, void *data)  		return 0;  	} -	r = dqm->dev->kfd2kgd->hqd_dump(dqm->dev->kgd, +	r = dqm->dev->kfd2kgd->hqd_dump(dqm->dev->adev,  					KFD_CIK_HIQ_PIPE, KFD_CIK_HIQ_QUEUE,  					&dump, &n_regs);  	if (!r) { @@ -2087,7 +2087,7 @@ int dqm_debugfs_hqds(struct seq_file *m, void *data)  				continue;  			r = dqm->dev->kfd2kgd->hqd_dump( -				dqm->dev->kgd, pipe, queue, &dump, &n_regs); +				dqm->dev->adev, pipe, queue, &dump, &n_regs);  			if (r)  				break; @@ -2101,10 +2101,10 @@ int dqm_debugfs_hqds(struct seq_file *m, void *data)  	for (pipe = 0; pipe < get_num_all_sdma_engines(dqm); pipe++) {  		for (queue = 0; -		     queue < dqm->dev->device_info->num_sdma_queues_per_engine; +		     queue < dqm->dev->device_info.num_sdma_queues_per_engine;  		     queue++) {  			r = dqm->dev->kfd2kgd->hqd_sdma_dump( -				dqm->dev->kgd, pipe, queue, &dump, &n_regs); +				dqm->dev->adev, pipe, queue, &dump, &n_regs);  			if (r)  				break; diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.h b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.h index 499fc0ea387f..e145e4deb53a 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.h +++ b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.h @@ -81,6 +81,8 @@ struct device_process_node {   *   * @get_wave_state: Retrieves context save state and optionally copies the   * control stack, if kept in the MQD, to the given userspace address. + * + * @reset_queues: reset queues which consume RAS poison   */  struct device_queue_manager_ops { @@ -134,6 +136,9 @@ struct device_queue_manager_ops {  				  void __user *ctl_stack,  				  u32 *ctl_stack_used_size,  				  u32 *save_area_used_size); + +	int (*reset_queues)(struct device_queue_manager *dqm, +					uint16_t pasid);  };  struct device_queue_manager_asic_ops { diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager_v9.c b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager_v9.c index b5c3d13643f1..f20434d9980e 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager_v9.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager_v9.c @@ -62,7 +62,7 @@ static int update_qpd_v9(struct device_queue_manager *dqm,  				SH_MEM_ALIGNMENT_MODE_UNALIGNED <<  					SH_MEM_CONFIG__ALIGNMENT_MODE__SHIFT; -		if (dqm->dev->device_info->asic_family == CHIP_ALDEBARAN) { +		if (KFD_GC_VERSION(dqm->dev) == IP_VERSION(9, 4, 2)) {  			/* Aldebaran can safely support different XNACK modes  			 * per process  			 */ diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_doorbell.c b/drivers/gpu/drm/amd/amdkfd/kfd_doorbell.c index 768d153acff4..0dbcf54657ed 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_doorbell.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_doorbell.c @@ -48,7 +48,7 @@  /* # of doorbell bytes allocated for each process. */  size_t kfd_doorbell_process_slice(struct kfd_dev *kfd)  { -	return roundup(kfd->device_info->doorbell_size * +	return roundup(kfd->device_info.doorbell_size *  			KFD_MAX_NUM_OF_QUEUES_PER_PROCESS,  			PAGE_SIZE);  } @@ -180,7 +180,7 @@ void __iomem *kfd_get_kernel_doorbell(struct kfd_dev *kfd,  	if (inx >= KFD_MAX_NUM_OF_QUEUES_PER_PROCESS)  		return NULL; -	inx *= kfd->device_info->doorbell_size / sizeof(u32); +	inx *= kfd->device_info.doorbell_size / sizeof(u32);  	/*  	 * Calculating the kernel doorbell offset using the first @@ -201,7 +201,7 @@ void kfd_release_kernel_doorbell(struct kfd_dev *kfd, u32 __iomem *db_addr)  	unsigned int inx;  	inx = (unsigned int)(db_addr - kfd->doorbell_kernel_ptr) -		* sizeof(u32) / kfd->device_info->doorbell_size; +		* sizeof(u32) / kfd->device_info.doorbell_size;  	mutex_lock(&kfd->doorbell_mutex);  	__clear_bit(inx, kfd->doorbell_available_index); @@ -239,7 +239,7 @@ unsigned int kfd_get_doorbell_dw_offset_in_bar(struct kfd_dev *kfd,  	return kfd->doorbell_base_dw_offset +  		pdd->doorbell_index  		* kfd_doorbell_process_slice(kfd) / sizeof(u32) + -		doorbell_id * kfd->device_info->doorbell_size / sizeof(u32); +		doorbell_id * kfd->device_info.doorbell_size / sizeof(u32);  }  uint64_t kfd_get_number_elems(struct kfd_dev *kfd) diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_events.c b/drivers/gpu/drm/amd/amdkfd/kfd_events.c index 3eea4edee355..afe72dd11325 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_events.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_events.c @@ -935,8 +935,10 @@ void kfd_signal_iommu_event(struct kfd_dev *dev, u32 pasid,  	/* Workaround on Raven to not kill the process when memory is freed  	 * before IOMMU is able to finish processing all the excessive PPRs  	 */ -	if (dev->device_info->asic_family != CHIP_RAVEN && -	    dev->device_info->asic_family != CHIP_RENOIR) { + +	if (KFD_GC_VERSION(dev) != IP_VERSION(9, 1, 0) && +	    KFD_GC_VERSION(dev) != IP_VERSION(9, 2, 2) && +	    KFD_GC_VERSION(dev) != IP_VERSION(9, 3, 0)) {  		mutex_lock(&p->event_mutex);  		/* Lookup events by type and signal them */ diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_flat_memory.c b/drivers/gpu/drm/amd/amdkfd/kfd_flat_memory.c index d1388896f9c1..2e2b7ceb71db 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_flat_memory.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_flat_memory.c @@ -394,7 +394,7 @@ int kfd_init_apertures(struct kfd_process *process)  			pdd->gpuvm_base = pdd->gpuvm_limit = 0;  			pdd->scratch_base = pdd->scratch_limit = 0;  		} else { -			switch (dev->device_info->asic_family) { +			switch (dev->adev->asic_type) {  			case CHIP_KAVERI:  			case CHIP_HAWAII:  			case CHIP_CARRIZO: @@ -406,29 +406,14 @@ int kfd_init_apertures(struct kfd_process *process)  			case CHIP_VEGAM:  				kfd_init_apertures_vi(pdd, id);  				break; -			case CHIP_VEGA10: -			case CHIP_VEGA12: -			case CHIP_VEGA20: -			case CHIP_RAVEN: -			case CHIP_RENOIR: -			case CHIP_ARCTURUS: -			case CHIP_ALDEBARAN: -			case CHIP_NAVI10: -			case CHIP_NAVI12: -			case CHIP_NAVI14: -			case CHIP_SIENNA_CICHLID: -			case CHIP_NAVY_FLOUNDER: -			case CHIP_VANGOGH: -			case CHIP_DIMGREY_CAVEFISH: -			case CHIP_BEIGE_GOBY: -			case CHIP_YELLOW_CARP: -			case CHIP_CYAN_SKILLFISH: -				kfd_init_apertures_v9(pdd, id); -				break;  			default: -				WARN(1, "Unexpected ASIC family %u", -				     dev->device_info->asic_family); -				return -EINVAL; +				if (KFD_GC_VERSION(dev) >= IP_VERSION(9, 0, 1)) +					kfd_init_apertures_v9(pdd, id); +				else { +					WARN(1, "Unexpected ASIC family %u", +					     dev->adev->asic_type); +					return -EINVAL; +				}  			}  			if (!dev->use_iommu_v2) { diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_int_process_v9.c b/drivers/gpu/drm/amd/amdkfd/kfd_int_process_v9.c index 543e7ea75593..e8bc28009c22 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_int_process_v9.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_int_process_v9.c @@ -89,6 +89,44 @@ enum SQ_INTERRUPT_ERROR_TYPE {  #define KFD_SQ_INT_DATA__ERR_TYPE_MASK 0xF00000  #define KFD_SQ_INT_DATA__ERR_TYPE__SHIFT 20 +static void event_interrupt_poison_consumption(struct kfd_dev *dev, +				uint16_t pasid, uint16_t source_id) +{ +	int ret = -EINVAL; +	struct kfd_process *p = kfd_lookup_process_by_pasid(pasid); + +	if (!p) +		return; + +	/* all queues of a process will be unmapped in one time */ +	if (atomic_read(&p->poison)) { +		kfd_unref_process(p); +		return; +	} + +	atomic_set(&p->poison, 1); +	kfd_unref_process(p); + +	switch (source_id) { +	case SOC15_INTSRC_SQ_INTERRUPT_MSG: +		if (dev->dqm->ops.reset_queues) +			ret = dev->dqm->ops.reset_queues(dev->dqm, pasid); +		break; +	case SOC15_INTSRC_SDMA_ECC: +	default: +		break; +	} + +	kfd_signal_poison_consumed_event(dev, pasid); + +	/* resetting queue passes, do page retirement without gpu reset +	   resetting queue fails, fallback to gpu reset solution */ +	if (!ret) +		amdgpu_amdkfd_ras_poison_consumption_handler(dev->adev, false); +	else +		amdgpu_amdkfd_ras_poison_consumption_handler(dev->adev, true); +} +  static bool event_interrupt_isr_v9(struct kfd_dev *dev,  					const uint32_t *ih_ring_entry,  					uint32_t *patched_ihre, @@ -135,7 +173,7 @@ static bool event_interrupt_isr_v9(struct kfd_dev *dev,  		*patched_flag = true;  		memcpy(patched_ihre, ih_ring_entry, -				dev->device_info->ih_ring_entry_size); +				dev->device_info.ih_ring_entry_size);  		pasid = dev->dqm->vmid_pasid[vmid]; @@ -159,6 +197,7 @@ static bool event_interrupt_isr_v9(struct kfd_dev *dev,  	 */  	return source_id == SOC15_INTSRC_CP_END_OF_PIPE ||  		source_id == SOC15_INTSRC_SDMA_TRAP || +		source_id == SOC15_INTSRC_SDMA_ECC ||  		source_id == SOC15_INTSRC_SQ_INTERRUPT_MSG ||  		source_id == SOC15_INTSRC_CP_BAD_OPCODE ||  		((client_id == SOC15_IH_CLIENTID_VMC || @@ -230,8 +269,7 @@ static void event_interrupt_wq_v9(struct kfd_dev *dev,  					sq_intr_err);  				if (sq_intr_err != SQ_INTERRUPT_ERROR_TYPE_ILLEGAL_INST &&  					sq_intr_err != SQ_INTERRUPT_ERROR_TYPE_MEMVIOL) { -					kfd_signal_poison_consumed_event(dev, pasid); -					amdgpu_amdkfd_ras_poison_consumption_handler(dev->kgd); +					event_interrupt_poison_consumption(dev, pasid, source_id);  					return;  				}  				break; @@ -252,8 +290,7 @@ static void event_interrupt_wq_v9(struct kfd_dev *dev,  		if (source_id == SOC15_INTSRC_SDMA_TRAP) {  			kfd_signal_event_interrupt(pasid, context_id0 & 0xfffffff, 28);  		} else if (source_id == SOC15_INTSRC_SDMA_ECC) { -			kfd_signal_poison_consumed_event(dev, pasid); -			amdgpu_amdkfd_ras_poison_consumption_handler(dev->kgd); +			event_interrupt_poison_consumption(dev, pasid, source_id);  			return;  		}  	} else if (client_id == SOC15_IH_CLIENTID_VMC || diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_interrupt.c b/drivers/gpu/drm/amd/amdkfd/kfd_interrupt.c index bc47f6a44456..81887c2013c9 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_interrupt.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_interrupt.c @@ -54,7 +54,7 @@ int kfd_interrupt_init(struct kfd_dev *kfd)  	int r;  	r = kfifo_alloc(&kfd->ih_fifo, -		KFD_IH_NUM_ENTRIES * kfd->device_info->ih_ring_entry_size, +		KFD_IH_NUM_ENTRIES * kfd->device_info.ih_ring_entry_size,  		GFP_KERNEL);  	if (r) {  		dev_err(kfd_chardev(), "Failed to allocate IH fifo\n"); @@ -114,8 +114,8 @@ bool enqueue_ih_ring_entry(struct kfd_dev *kfd,	const void *ih_ring_entry)  	int count;  	count = kfifo_in(&kfd->ih_fifo, ih_ring_entry, -				kfd->device_info->ih_ring_entry_size); -	if (count != kfd->device_info->ih_ring_entry_size) { +				kfd->device_info.ih_ring_entry_size); +	if (count != kfd->device_info.ih_ring_entry_size) {  		dev_err_ratelimited(kfd_chardev(),  			"Interrupt ring overflow, dropping interrupt %d\n",  			count); @@ -133,11 +133,11 @@ static bool dequeue_ih_ring_entry(struct kfd_dev *kfd, void *ih_ring_entry)  	int count;  	count = kfifo_out(&kfd->ih_fifo, ih_ring_entry, -				kfd->device_info->ih_ring_entry_size); +				kfd->device_info.ih_ring_entry_size); -	WARN_ON(count && count != kfd->device_info->ih_ring_entry_size); +	WARN_ON(count && count != kfd->device_info.ih_ring_entry_size); -	return count == kfd->device_info->ih_ring_entry_size; +	return count == kfd->device_info.ih_ring_entry_size;  }  static void interrupt_wq(struct work_struct *work) @@ -146,13 +146,13 @@ static void interrupt_wq(struct work_struct *work)  						interrupt_work);  	uint32_t ih_ring_entry[KFD_MAX_RING_ENTRY_SIZE]; -	if (dev->device_info->ih_ring_entry_size > sizeof(ih_ring_entry)) { +	if (dev->device_info.ih_ring_entry_size > sizeof(ih_ring_entry)) {  		dev_err_once(kfd_chardev(), "Ring entry too small\n");  		return;  	}  	while (dequeue_ih_ring_entry(dev, ih_ring_entry)) -		dev->device_info->event_interrupt_class->interrupt_wq(dev, +		dev->device_info.event_interrupt_class->interrupt_wq(dev,  								ih_ring_entry);  } @@ -163,7 +163,7 @@ bool interrupt_is_wanted(struct kfd_dev *dev,  	/* integer and bitwise OR so there is no boolean short-circuiting */  	unsigned int wanted = 0; -	wanted |= dev->device_info->event_interrupt_class->interrupt_isr(dev, +	wanted |= dev->device_info.event_interrupt_class->interrupt_isr(dev,  					 ih_ring_entry, patched_ihre, flag);  	return wanted != 0; diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_iommu.c b/drivers/gpu/drm/amd/amdkfd/kfd_iommu.c index 73f2257acc23..66ad8d0b8f7f 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_iommu.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_iommu.c @@ -89,7 +89,7 @@ int kfd_iommu_device_init(struct kfd_dev *kfd)  	}  	pasid_limit = min_t(unsigned int, -			(unsigned int)(1 << kfd->device_info->max_pasid_bits), +			(unsigned int)(1 << kfd->device_info.max_pasid_bits),  			iommu_info.max_pasids);  	if (!kfd_set_pasid_limit(pasid_limit)) { diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue.c b/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue.c index 64b4ac339904..16f8bc4ca7f6 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue.c @@ -91,7 +91,7 @@ static bool kq_initialize(struct kernel_queue *kq, struct kfd_dev *dev,  	kq->pq_gpu_addr = kq->pq->gpu_addr;  	/* For CIK family asics, kq->eop_mem is not needed */ -	if (dev->device_info->asic_family > CHIP_MULLINS) { +	if (dev->adev->asic_type > CHIP_MULLINS) {  		retval = kfd_gtt_sa_allocate(dev, PAGE_SIZE, &kq->eop_mem);  		if (retval != 0)  			goto err_eop_allocate_vidmem; @@ -111,7 +111,7 @@ static bool kq_initialize(struct kernel_queue *kq, struct kfd_dev *dev,  	kq->rptr_kernel = kq->rptr_mem->cpu_ptr;  	kq->rptr_gpu_addr = kq->rptr_mem->gpu_addr; -	retval = kfd_gtt_sa_allocate(dev, dev->device_info->doorbell_size, +	retval = kfd_gtt_sa_allocate(dev, dev->device_info.doorbell_size,  					&kq->wptr_mem);  	if (retval != 0) @@ -297,7 +297,7 @@ void kq_submit_packet(struct kernel_queue *kq)  	}  	pr_debug("\n");  #endif -	if (kq->dev->device_info->doorbell_size == 8) { +	if (kq->dev->device_info.doorbell_size == 8) {  		*kq->wptr64_kernel = kq->pending_wptr64;  		write_kernel_doorbell64(kq->queue->properties.doorbell_ptr,  					kq->pending_wptr64); @@ -310,7 +310,7 @@ void kq_submit_packet(struct kernel_queue *kq)  void kq_rollback_packet(struct kernel_queue *kq)  { -	if (kq->dev->device_info->doorbell_size == 8) { +	if (kq->dev->device_info.doorbell_size == 8) {  		kq->pending_wptr64 = *kq->wptr64_kernel;  		kq->pending_wptr = *kq->wptr_kernel %  			(kq->queue->properties.queue_size / 4); diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_migrate.c b/drivers/gpu/drm/amd/amdkfd/kfd_migrate.c index 9b9c2b9bf2ef..ed5385137f48 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_migrate.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_migrate.c @@ -108,8 +108,8 @@ error_free:   * svm_migrate_copy_memory_gart - sdma copy data between ram and vram   *   * @adev: amdgpu device the sdma ring running - * @src: source page address array - * @dst: destination page address array + * @sys: system DMA pointer to be copied + * @vram: vram destination DMA pointer   * @npages: number of pages to copy   * @direction: enum MIGRATION_COPY_DIR   * @mfence: output, sdma fence to signal after sdma is done @@ -549,7 +549,7 @@ static void svm_migrate_page_free(struct page *page)  	if (svm_bo) {  		pr_debug_ratelimited("ref: %d\n", kref_read(&svm_bo->kref)); -		svm_range_bo_unref(svm_bo); +		svm_range_bo_unref_async(svm_bo);  	}  } @@ -938,7 +938,7 @@ int svm_migrate_init(struct amdgpu_device *adev)  	void *r;  	/* Page migration works on Vega10 or newer */ -	if (kfddev->device_info->asic_family < CHIP_VEGA10) +	if (!KFD_IS_SOC15(kfddev))  		return -EINVAL;  	pgmap = &kfddev->pgmap; diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager.c b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager.c index c021519af810..e2825ad4d699 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager.c @@ -71,7 +71,7 @@ struct kfd_mem_obj *allocate_sdma_mqd(struct kfd_dev *dev,  		return NULL;  	offset = (q->sdma_engine_id * -		dev->device_info->num_sdma_queues_per_engine + +		dev->device_info.num_sdma_queues_per_engine +  		q->sdma_queue_id) *  		dev->dqm->mqd_mgrs[KFD_MQD_TYPE_SDMA]->mqd_size; @@ -100,7 +100,7 @@ void mqd_symmetrically_map_cu_mask(struct mqd_manager *mm,  	struct kfd_cu_info cu_info;  	uint32_t cu_per_sh[KFD_MAX_NUM_SE][KFD_MAX_NUM_SH_PER_SE] = {0};  	int i, se, sh, cu; -	amdgpu_amdkfd_get_cu_info(mm->dev->kgd, &cu_info); +	amdgpu_amdkfd_get_cu_info(mm->dev->adev, &cu_info);  	if (cu_mask_count > cu_info.cu_active_number)  		cu_mask_count = cu_info.cu_active_number; diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_cik.c b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_cik.c index 8128f4d312f1..e9a8e21e144e 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_cik.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_cik.c @@ -171,7 +171,7 @@ static int load_mqd(struct mqd_manager *mm, void *mqd, uint32_t pipe_id,  	uint32_t wptr_shift = (p->format == KFD_QUEUE_FORMAT_AQL ? 4 : 0);  	uint32_t wptr_mask = (uint32_t)((p->queue_size / 4) - 1); -	return mm->dev->kfd2kgd->hqd_load(mm->dev->kgd, mqd, pipe_id, queue_id, +	return mm->dev->kfd2kgd->hqd_load(mm->dev->adev, mqd, pipe_id, queue_id,  					  (uint32_t __user *)p->write_ptr,  					  wptr_shift, wptr_mask, mms);  } @@ -180,7 +180,7 @@ static int load_mqd_sdma(struct mqd_manager *mm, void *mqd,  			 uint32_t pipe_id, uint32_t queue_id,  			 struct queue_properties *p, struct mm_struct *mms)  { -	return mm->dev->kfd2kgd->hqd_sdma_load(mm->dev->kgd, mqd, +	return mm->dev->kfd2kgd->hqd_sdma_load(mm->dev->adev, mqd,  					       (uint32_t __user *)p->write_ptr,  					       mms);  } @@ -276,7 +276,7 @@ static int destroy_mqd(struct mqd_manager *mm, void *mqd,  			unsigned int timeout, uint32_t pipe_id,  			uint32_t queue_id)  { -	return mm->dev->kfd2kgd->hqd_destroy(mm->dev->kgd, mqd, type, timeout, +	return mm->dev->kfd2kgd->hqd_destroy(mm->dev->adev, mqd, type, timeout,  					pipe_id, queue_id);  } @@ -289,7 +289,7 @@ static int destroy_mqd_sdma(struct mqd_manager *mm, void *mqd,  				unsigned int timeout, uint32_t pipe_id,  				uint32_t queue_id)  { -	return mm->dev->kfd2kgd->hqd_sdma_destroy(mm->dev->kgd, mqd, timeout); +	return mm->dev->kfd2kgd->hqd_sdma_destroy(mm->dev->adev, mqd, timeout);  }  static bool is_occupied(struct mqd_manager *mm, void *mqd, @@ -297,7 +297,7 @@ static bool is_occupied(struct mqd_manager *mm, void *mqd,  			uint32_t queue_id)  { -	return mm->dev->kfd2kgd->hqd_is_occupied(mm->dev->kgd, queue_address, +	return mm->dev->kfd2kgd->hqd_is_occupied(mm->dev->adev, queue_address,  					pipe_id, queue_id);  } @@ -306,7 +306,7 @@ static bool is_occupied_sdma(struct mqd_manager *mm, void *mqd,  			uint64_t queue_address,	uint32_t pipe_id,  			uint32_t queue_id)  { -	return mm->dev->kfd2kgd->hqd_sdma_is_occupied(mm->dev->kgd, mqd); +	return mm->dev->kfd2kgd->hqd_sdma_is_occupied(mm->dev->adev, mqd);  }  /* diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v10.c b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v10.c index 270160fc401b..d74d8a6ac27a 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v10.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v10.c @@ -148,7 +148,7 @@ static int load_mqd(struct mqd_manager *mm, void *mqd,  	/* AQL write pointer counts in 64B packets, PM4/CP counts in dwords. */  	uint32_t wptr_shift = (p->format == KFD_QUEUE_FORMAT_AQL ? 4 : 0); -	r = mm->dev->kfd2kgd->hqd_load(mm->dev->kgd, mqd, pipe_id, queue_id, +	r = mm->dev->kfd2kgd->hqd_load(mm->dev->adev, mqd, pipe_id, queue_id,  					  (uint32_t __user *)p->write_ptr,  					  wptr_shift, 0, mms);  	return r; @@ -158,7 +158,7 @@ static int hiq_load_mqd_kiq(struct mqd_manager *mm, void *mqd,  			    uint32_t pipe_id, uint32_t queue_id,  			    struct queue_properties *p, struct mm_struct *mms)  { -	return mm->dev->kfd2kgd->hiq_mqd_load(mm->dev->kgd, mqd, pipe_id, +	return mm->dev->kfd2kgd->hiq_mqd_load(mm->dev->adev, mqd, pipe_id,  					      queue_id, p->doorbell_off);  } @@ -239,7 +239,7 @@ static int destroy_mqd(struct mqd_manager *mm, void *mqd,  		       uint32_t queue_id)  {  	return mm->dev->kfd2kgd->hqd_destroy -		(mm->dev->kgd, mqd, type, timeout, +		(mm->dev->adev, mqd, type, timeout,  		 pipe_id, queue_id);  } @@ -254,7 +254,7 @@ static bool is_occupied(struct mqd_manager *mm, void *mqd,  			uint32_t queue_id)  {  	return mm->dev->kfd2kgd->hqd_is_occupied( -		mm->dev->kgd, queue_address, +		mm->dev->adev, queue_address,  		pipe_id, queue_id);  } @@ -320,7 +320,7 @@ static int load_mqd_sdma(struct mqd_manager *mm, void *mqd,  		uint32_t pipe_id, uint32_t queue_id,  		struct queue_properties *p, struct mm_struct *mms)  { -	return mm->dev->kfd2kgd->hqd_sdma_load(mm->dev->kgd, mqd, +	return mm->dev->kfd2kgd->hqd_sdma_load(mm->dev->adev, mqd,  					       (uint32_t __user *)p->write_ptr,  					       mms);  } @@ -363,14 +363,14 @@ static int destroy_mqd_sdma(struct mqd_manager *mm, void *mqd,  		unsigned int timeout, uint32_t pipe_id,  		uint32_t queue_id)  { -	return mm->dev->kfd2kgd->hqd_sdma_destroy(mm->dev->kgd, mqd, timeout); +	return mm->dev->kfd2kgd->hqd_sdma_destroy(mm->dev->adev, mqd, timeout);  }  static bool is_occupied_sdma(struct mqd_manager *mm, void *mqd,  		uint64_t queue_address, uint32_t pipe_id,  		uint32_t queue_id)  { -	return mm->dev->kfd2kgd->hqd_sdma_is_occupied(mm->dev->kgd, mqd); +	return mm->dev->kfd2kgd->hqd_sdma_is_occupied(mm->dev->adev, mqd);  }  #if defined(CONFIG_DEBUG_FS) diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v9.c b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v9.c index 4e5932f54b5a..326eb2285029 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v9.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v9.c @@ -108,7 +108,7 @@ static struct kfd_mem_obj *allocate_mqd(struct kfd_dev *kfd,  		mqd_mem_obj = kzalloc(sizeof(struct kfd_mem_obj), GFP_KERNEL);  		if (!mqd_mem_obj)  			return NULL; -		retval = amdgpu_amdkfd_alloc_gtt_mem(kfd->kgd, +		retval = amdgpu_amdkfd_alloc_gtt_mem(kfd->adev,  			ALIGN(q->ctl_stack_size, PAGE_SIZE) +  				ALIGN(sizeof(struct v9_mqd), PAGE_SIZE),  			&(mqd_mem_obj->gtt_mem), @@ -199,7 +199,7 @@ static int load_mqd(struct mqd_manager *mm, void *mqd,  	/* AQL write pointer counts in 64B packets, PM4/CP counts in dwords. */  	uint32_t wptr_shift = (p->format == KFD_QUEUE_FORMAT_AQL ? 4 : 0); -	return mm->dev->kfd2kgd->hqd_load(mm->dev->kgd, mqd, pipe_id, queue_id, +	return mm->dev->kfd2kgd->hqd_load(mm->dev->adev, mqd, pipe_id, queue_id,  					  (uint32_t __user *)p->write_ptr,  					  wptr_shift, 0, mms);  } @@ -208,7 +208,7 @@ static int hiq_load_mqd_kiq(struct mqd_manager *mm, void *mqd,  			    uint32_t pipe_id, uint32_t queue_id,  			    struct queue_properties *p, struct mm_struct *mms)  { -	return mm->dev->kfd2kgd->hiq_mqd_load(mm->dev->kgd, mqd, pipe_id, +	return mm->dev->kfd2kgd->hiq_mqd_load(mm->dev->adev, mqd, pipe_id,  					      queue_id, p->doorbell_off);  } @@ -291,7 +291,7 @@ static int destroy_mqd(struct mqd_manager *mm, void *mqd,  			uint32_t queue_id)  {  	return mm->dev->kfd2kgd->hqd_destroy -		(mm->dev->kgd, mqd, type, timeout, +		(mm->dev->adev, mqd, type, timeout,  		pipe_id, queue_id);  } @@ -301,7 +301,7 @@ static void free_mqd(struct mqd_manager *mm, void *mqd,  	struct kfd_dev *kfd = mm->dev;  	if (mqd_mem_obj->gtt_mem) { -		amdgpu_amdkfd_free_gtt_mem(kfd->kgd, mqd_mem_obj->gtt_mem); +		amdgpu_amdkfd_free_gtt_mem(kfd->adev, mqd_mem_obj->gtt_mem);  		kfree(mqd_mem_obj);  	} else {  		kfd_gtt_sa_free(mm->dev, mqd_mem_obj); @@ -313,7 +313,7 @@ static bool is_occupied(struct mqd_manager *mm, void *mqd,  			uint32_t queue_id)  {  	return mm->dev->kfd2kgd->hqd_is_occupied( -		mm->dev->kgd, queue_address, +		mm->dev->adev, queue_address,  		pipe_id, queue_id);  } @@ -375,7 +375,7 @@ static int load_mqd_sdma(struct mqd_manager *mm, void *mqd,  		uint32_t pipe_id, uint32_t queue_id,  		struct queue_properties *p, struct mm_struct *mms)  { -	return mm->dev->kfd2kgd->hqd_sdma_load(mm->dev->kgd, mqd, +	return mm->dev->kfd2kgd->hqd_sdma_load(mm->dev->adev, mqd,  					       (uint32_t __user *)p->write_ptr,  					       mms);  } @@ -418,14 +418,14 @@ static int destroy_mqd_sdma(struct mqd_manager *mm, void *mqd,  		unsigned int timeout, uint32_t pipe_id,  		uint32_t queue_id)  { -	return mm->dev->kfd2kgd->hqd_sdma_destroy(mm->dev->kgd, mqd, timeout); +	return mm->dev->kfd2kgd->hqd_sdma_destroy(mm->dev->adev, mqd, timeout);  }  static bool is_occupied_sdma(struct mqd_manager *mm, void *mqd,  		uint64_t queue_address, uint32_t pipe_id,  		uint32_t queue_id)  { -	return mm->dev->kfd2kgd->hqd_sdma_is_occupied(mm->dev->kgd, mqd); +	return mm->dev->kfd2kgd->hqd_sdma_is_occupied(mm->dev->adev, mqd);  }  #if defined(CONFIG_DEBUG_FS) diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_vi.c b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_vi.c index cd9220eb8a7a..d456e950ce1d 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_vi.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_vi.c @@ -162,7 +162,7 @@ static int load_mqd(struct mqd_manager *mm, void *mqd,  	uint32_t wptr_shift = (p->format == KFD_QUEUE_FORMAT_AQL ? 4 : 0);  	uint32_t wptr_mask = (uint32_t)((p->queue_size / 4) - 1); -	return mm->dev->kfd2kgd->hqd_load(mm->dev->kgd, mqd, pipe_id, queue_id, +	return mm->dev->kfd2kgd->hqd_load(mm->dev->adev, mqd, pipe_id, queue_id,  					  (uint32_t __user *)p->write_ptr,  					  wptr_shift, wptr_mask, mms);  } @@ -265,7 +265,7 @@ static int destroy_mqd(struct mqd_manager *mm, void *mqd,  			uint32_t queue_id)  {  	return mm->dev->kfd2kgd->hqd_destroy -		(mm->dev->kgd, mqd, type, timeout, +		(mm->dev->adev, mqd, type, timeout,  		pipe_id, queue_id);  } @@ -280,7 +280,7 @@ static bool is_occupied(struct mqd_manager *mm, void *mqd,  			uint32_t queue_id)  {  	return mm->dev->kfd2kgd->hqd_is_occupied( -		mm->dev->kgd, queue_address, +		mm->dev->adev, queue_address,  		pipe_id, queue_id);  } @@ -347,7 +347,7 @@ static int load_mqd_sdma(struct mqd_manager *mm, void *mqd,  		uint32_t pipe_id, uint32_t queue_id,  		struct queue_properties *p, struct mm_struct *mms)  { -	return mm->dev->kfd2kgd->hqd_sdma_load(mm->dev->kgd, mqd, +	return mm->dev->kfd2kgd->hqd_sdma_load(mm->dev->adev, mqd,  					       (uint32_t __user *)p->write_ptr,  					       mms);  } @@ -389,14 +389,14 @@ static int destroy_mqd_sdma(struct mqd_manager *mm, void *mqd,  		unsigned int timeout, uint32_t pipe_id,  		uint32_t queue_id)  { -	return mm->dev->kfd2kgd->hqd_sdma_destroy(mm->dev->kgd, mqd, timeout); +	return mm->dev->kfd2kgd->hqd_sdma_destroy(mm->dev->adev, mqd, timeout);  }  static bool is_occupied_sdma(struct mqd_manager *mm, void *mqd,  		uint64_t queue_address, uint32_t pipe_id,  		uint32_t queue_id)  { -	return mm->dev->kfd2kgd->hqd_sdma_is_occupied(mm->dev->kgd, mqd); +	return mm->dev->kfd2kgd->hqd_sdma_is_occupied(mm->dev->adev, mqd);  }  #if defined(CONFIG_DEBUG_FS) diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_packet_manager.c b/drivers/gpu/drm/amd/amdkfd/kfd_packet_manager.c index e547f1f8c49f..1439420925a0 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_packet_manager.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_packet_manager.c @@ -223,7 +223,7 @@ static int pm_create_runlist_ib(struct packet_manager *pm,  int pm_init(struct packet_manager *pm, struct device_queue_manager *dqm)  { -	switch (dqm->dev->device_info->asic_family) { +	switch (dqm->dev->adev->asic_type) {  	case CHIP_KAVERI:  	case CHIP_HAWAII:  		/* PM4 packet structures on CIK are the same as on VI */ @@ -236,31 +236,16 @@ int pm_init(struct packet_manager *pm, struct device_queue_manager *dqm)  	case CHIP_VEGAM:  		pm->pmf = &kfd_vi_pm_funcs;  		break; -	case CHIP_VEGA10: -	case CHIP_VEGA12: -	case CHIP_VEGA20: -	case CHIP_RAVEN: -	case CHIP_RENOIR: -	case CHIP_ARCTURUS: -	case CHIP_NAVI10: -	case CHIP_NAVI12: -	case CHIP_NAVI14: -	case CHIP_SIENNA_CICHLID: -	case CHIP_NAVY_FLOUNDER: -	case CHIP_VANGOGH: -	case CHIP_DIMGREY_CAVEFISH: -	case CHIP_BEIGE_GOBY: -	case CHIP_YELLOW_CARP: -	case CHIP_CYAN_SKILLFISH: -		pm->pmf = &kfd_v9_pm_funcs; -		break; -	case CHIP_ALDEBARAN: -		pm->pmf = &kfd_aldebaran_pm_funcs; -		break;  	default: -		WARN(1, "Unexpected ASIC family %u", -		     dqm->dev->device_info->asic_family); -		return -EINVAL; +		if (KFD_GC_VERSION(dqm->dev) == IP_VERSION(9, 4, 2)) +			pm->pmf = &kfd_aldebaran_pm_funcs; +		else if (KFD_GC_VERSION(dqm->dev) >= IP_VERSION(9, 0, 1)) +			pm->pmf = &kfd_v9_pm_funcs; +		else { +			WARN(1, "Unexpected ASIC family %u", +			     dqm->dev->adev->asic_type); +			return -EINVAL; +		}  	}  	pm->dqm = dqm; diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_packet_manager_vi.c b/drivers/gpu/drm/amd/amdkfd/kfd_packet_manager_vi.c index 08442e7d9944..3c0658e32e93 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_packet_manager_vi.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_packet_manager_vi.c @@ -110,8 +110,8 @@ static int pm_runlist_vi(struct packet_manager *pm, uint32_t *buffer,  	return 0;  } -int pm_set_resources_vi(struct packet_manager *pm, uint32_t *buffer, -				struct scheduling_resources *res) +static int pm_set_resources_vi(struct packet_manager *pm, uint32_t *buffer, +			       struct scheduling_resources *res)  {  	struct pm4_mes_set_resources *packet; diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h index 94e92c0812db..ea68f3b3a4e9 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h +++ b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h @@ -183,7 +183,8 @@ enum cache_policy {  	cache_policy_noncoherent  }; -#define KFD_IS_SOC15(chip) ((chip) >= CHIP_VEGA10) +#define KFD_GC_VERSION(dev) ((dev)->adev->ip_versions[GC_HWIP][0]) +#define KFD_IS_SOC15(dev)   ((KFD_GC_VERSION(dev)) >= (IP_VERSION(9, 0, 1)))  struct kfd_event_interrupt_class {  	bool (*interrupt_isr)(struct kfd_dev *dev, @@ -194,8 +195,6 @@ struct kfd_event_interrupt_class {  };  struct kfd_device_info { -	enum amd_asic_type asic_family; -	const char *asic_name;  	uint32_t gfx_target_version;  	const struct kfd_event_interrupt_class *event_interrupt_class;  	unsigned int max_pasid_bits; @@ -208,11 +207,12 @@ struct kfd_device_info {  	bool needs_iommu_device;  	bool needs_pci_atomics;  	uint32_t no_atomic_fw_version; -	unsigned int num_sdma_engines; -	unsigned int num_xgmi_sdma_engines;  	unsigned int num_sdma_queues_per_engine;  }; +unsigned int kfd_get_num_sdma_engines(struct kfd_dev *kdev); +unsigned int kfd_get_num_xgmi_sdma_engines(struct kfd_dev *kdev); +  struct kfd_mem_obj {  	uint32_t range_start;  	uint32_t range_end; @@ -228,9 +228,9 @@ struct kfd_vmid_info {  };  struct kfd_dev { -	struct kgd_dev *kgd; +	struct amdgpu_device *adev; -	const struct kfd_device_info *device_info; +	struct kfd_device_info device_info;  	struct pci_dev *pdev;  	struct drm_device *ddev; @@ -766,7 +766,7 @@ struct svm_range_list {  	struct list_head		deferred_range_list;  	spinlock_t			deferred_list_lock;  	atomic_t			evicted_ranges; -	bool				drain_pagefaults; +	atomic_t			drain_pagefaults;  	struct delayed_work		restore_work;  	DECLARE_BITMAP(bitmap_supported, MAX_GPU_INSTANCE);  	struct task_struct 		*faulting_task; @@ -856,6 +856,8 @@ struct kfd_process {  	struct svm_range_list svms;  	bool xnack_enabled; + +	atomic_t poison;  };  #define KFD_PROCESS_TABLE_SIZE 5 /* bits: 32 entries */ @@ -891,7 +893,7 @@ struct kfd_process *kfd_lookup_process_by_pasid(u32 pasid);  struct kfd_process *kfd_lookup_process_by_mm(const struct mm_struct *mm);  int kfd_process_gpuidx_from_gpuid(struct kfd_process *p, uint32_t gpu_id); -int kfd_process_gpuid_from_kgd(struct kfd_process *p, +int kfd_process_gpuid_from_adev(struct kfd_process *p,  			       struct amdgpu_device *adev, uint32_t *gpuid,  			       uint32_t *gpuidx);  static inline int kfd_process_gpuid_from_gpuidx(struct kfd_process *p, @@ -984,7 +986,7 @@ struct kfd_topology_device *kfd_topology_device_by_proximity_domain(  struct kfd_topology_device *kfd_topology_device_by_id(uint32_t gpu_id);  struct kfd_dev *kfd_device_by_id(uint32_t gpu_id);  struct kfd_dev *kfd_device_by_pci_dev(const struct pci_dev *pdev); -struct kfd_dev *kfd_device_by_kgd(const struct kgd_dev *kgd); +struct kfd_dev *kfd_device_by_adev(const struct amdgpu_device *adev);  int kfd_topology_enum_kfd_devices(uint8_t idx, struct kfd_dev **kdev);  int kfd_numa_node_to_apic_id(int numa_node_id);  void kfd_double_confirm_iommu_support(struct kfd_dev *gpu); diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_process.c b/drivers/gpu/drm/amd/amdkfd/kfd_process.c index b993011cfa64..d1145da5348f 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_process.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_process.c @@ -251,14 +251,13 @@ cleanup:  }  /** - * @kfd_get_cu_occupancy - Collect number of waves in-flight on this device + * kfd_get_cu_occupancy - Collect number of waves in-flight on this device   * by current process. Translates acquired wave count into number of compute units   * that are occupied.   * - * @atr: Handle of attribute that allows reporting of wave count. The attribute + * @attr: Handle of attribute that allows reporting of wave count. The attribute   * handle encapsulates GPU device it is associated with, thereby allowing collection   * of waves in flight, etc - *   * @buffer: Handle of user provided buffer updated with wave count   *   * Return: Number of bytes written to user buffer or an error value @@ -288,7 +287,7 @@ static int kfd_get_cu_occupancy(struct attribute *attr, char *buffer)  	/* Collect wave count from device if it supports */  	wave_cnt = 0;  	max_waves_per_cu = 0; -	dev->kfd2kgd->get_cu_occupancy(dev->kgd, proc->pasid, &wave_cnt, +	dev->kfd2kgd->get_cu_occupancy(dev->adev, proc->pasid, &wave_cnt,  			&max_waves_per_cu);  	/* Translate wave count to number of compute units */ @@ -462,6 +461,7 @@ static struct attribute *procfs_queue_attrs[] = {  	&attr_queue_gpuid,  	NULL  }; +ATTRIBUTE_GROUPS(procfs_queue);  static const struct sysfs_ops procfs_queue_ops = {  	.show = kfd_procfs_queue_show, @@ -469,7 +469,7 @@ static const struct sysfs_ops procfs_queue_ops = {  static struct kobj_type procfs_queue_type = {  	.sysfs_ops = &procfs_queue_ops, -	.default_attrs = procfs_queue_attrs, +	.default_groups = procfs_queue_groups,  };  static const struct sysfs_ops procfs_stats_ops = { @@ -692,12 +692,12 @@ static void kfd_process_free_gpuvm(struct kgd_mem *mem,  	struct kfd_dev *dev = pdd->dev;  	if (kptr) { -		amdgpu_amdkfd_gpuvm_unmap_gtt_bo_from_kernel(dev->kgd, mem); +		amdgpu_amdkfd_gpuvm_unmap_gtt_bo_from_kernel(dev->adev, mem);  		kptr = NULL;  	} -	amdgpu_amdkfd_gpuvm_unmap_memory_from_gpu(dev->kgd, mem, pdd->drm_priv); -	amdgpu_amdkfd_gpuvm_free_memory_of_gpu(dev->kgd, mem, pdd->drm_priv, +	amdgpu_amdkfd_gpuvm_unmap_memory_from_gpu(dev->adev, mem, pdd->drm_priv); +	amdgpu_amdkfd_gpuvm_free_memory_of_gpu(dev->adev, mem, pdd->drm_priv,  					       NULL);  } @@ -714,24 +714,24 @@ static int kfd_process_alloc_gpuvm(struct kfd_process_device *pdd,  	struct kfd_dev *kdev = pdd->dev;  	int err; -	err = amdgpu_amdkfd_gpuvm_alloc_memory_of_gpu(kdev->kgd, gpu_va, size, +	err = amdgpu_amdkfd_gpuvm_alloc_memory_of_gpu(kdev->adev, gpu_va, size,  						 pdd->drm_priv, mem, NULL, flags);  	if (err)  		goto err_alloc_mem; -	err = amdgpu_amdkfd_gpuvm_map_memory_to_gpu(kdev->kgd, *mem, +	err = amdgpu_amdkfd_gpuvm_map_memory_to_gpu(kdev->adev, *mem,  			pdd->drm_priv, NULL);  	if (err)  		goto err_map_mem; -	err = amdgpu_amdkfd_gpuvm_sync_memory(kdev->kgd, *mem, true); +	err = amdgpu_amdkfd_gpuvm_sync_memory(kdev->adev, *mem, true);  	if (err) {  		pr_debug("Sync memory failed, wait interrupted by user signal\n");  		goto sync_memory_failed;  	}  	if (kptr) { -		err = amdgpu_amdkfd_gpuvm_map_gtt_bo_to_kernel(kdev->kgd, +		err = amdgpu_amdkfd_gpuvm_map_gtt_bo_to_kernel(kdev->adev,  				(struct kgd_mem *)*mem, kptr, NULL);  		if (err) {  			pr_debug("Map GTT BO to kernel failed\n"); @@ -742,10 +742,10 @@ static int kfd_process_alloc_gpuvm(struct kfd_process_device *pdd,  	return err;  sync_memory_failed: -	amdgpu_amdkfd_gpuvm_unmap_memory_from_gpu(kdev->kgd, *mem, pdd->drm_priv); +	amdgpu_amdkfd_gpuvm_unmap_memory_from_gpu(kdev->adev, *mem, pdd->drm_priv);  err_map_mem: -	amdgpu_amdkfd_gpuvm_free_memory_of_gpu(kdev->kgd, *mem, pdd->drm_priv, +	amdgpu_amdkfd_gpuvm_free_memory_of_gpu(kdev->adev, *mem, pdd->drm_priv,  					       NULL);  err_alloc_mem:  	*mem = NULL; @@ -940,10 +940,10 @@ static void kfd_process_device_free_bos(struct kfd_process_device *pdd)  			if (!peer_pdd->drm_priv)  				continue;  			amdgpu_amdkfd_gpuvm_unmap_memory_from_gpu( -				peer_pdd->dev->kgd, mem, peer_pdd->drm_priv); +				peer_pdd->dev->adev, mem, peer_pdd->drm_priv);  		} -		amdgpu_amdkfd_gpuvm_free_memory_of_gpu(pdd->dev->kgd, mem, +		amdgpu_amdkfd_gpuvm_free_memory_of_gpu(pdd->dev->adev, mem,  						       pdd->drm_priv, NULL);  		kfd_process_device_remove_obj_handle(pdd, id);  	} @@ -974,7 +974,7 @@ static void kfd_process_kunmap_signal_bo(struct kfd_process *p)  	if (!mem)  		goto out; -	amdgpu_amdkfd_gpuvm_unmap_gtt_bo_from_kernel(kdev->kgd, mem); +	amdgpu_amdkfd_gpuvm_unmap_gtt_bo_from_kernel(kdev->adev, mem);  out:  	mutex_unlock(&p->mutex); @@ -1003,7 +1003,7 @@ static void kfd_process_destroy_pdds(struct kfd_process *p)  		if (pdd->drm_file) {  			amdgpu_amdkfd_gpuvm_release_process_vm( -					pdd->dev->kgd, pdd->drm_priv); +					pdd->dev->adev, pdd->drm_priv);  			fput(pdd->drm_file);  		} @@ -1011,7 +1011,7 @@ static void kfd_process_destroy_pdds(struct kfd_process *p)  			free_pages((unsigned long)pdd->qpd.cwsr_kaddr,  				get_order(KFD_CWSR_TBA_TMA_SIZE)); -		kfree(pdd->qpd.doorbell_bitmap); +		bitmap_free(pdd->qpd.doorbell_bitmap);  		idr_destroy(&pdd->alloc_idr);  		kfd_free_process_doorbells(pdd->dev, pdd->doorbell_index); @@ -1317,14 +1317,13 @@ bool kfd_process_xnack_mode(struct kfd_process *p, bool supported)  		 * support the SVM APIs and don't need to be considered  		 * for the XNACK mode selection.  		 */ -		if (dev->device_info->asic_family < CHIP_VEGA10) +		if (!KFD_IS_SOC15(dev))  			continue;  		/* Aldebaran can always support XNACK because it can support  		 * per-process XNACK mode selection. But let the dev->noretry  		 * setting still influence the default XNACK mode.  		 */ -		if (supported && -		    dev->device_info->asic_family == CHIP_ALDEBARAN) +		if (supported && KFD_GC_VERSION(dev) == IP_VERSION(9, 4, 2))  			continue;  		/* GFXv10 and later GPUs do not support shader preemption @@ -1332,7 +1331,7 @@ bool kfd_process_xnack_mode(struct kfd_process *p, bool supported)  		 * management and memory-manager-related preemptions or  		 * even deadlocks.  		 */ -		if (dev->device_info->asic_family >= CHIP_NAVI10) +		if (KFD_GC_VERSION(dev) >= IP_VERSION(10, 1, 1))  			return false;  		if (dev->noretry) @@ -1431,12 +1430,11 @@ static int init_doorbell_bitmap(struct qcm_process_device *qpd,  	int range_start = dev->shared_resources.non_cp_doorbells_start;  	int range_end = dev->shared_resources.non_cp_doorbells_end; -	if (!KFD_IS_SOC15(dev->device_info->asic_family)) +	if (!KFD_IS_SOC15(dev))  		return 0; -	qpd->doorbell_bitmap = -		kzalloc(DIV_ROUND_UP(KFD_MAX_NUM_OF_QUEUES_PER_PROCESS, -				     BITS_PER_BYTE), GFP_KERNEL); +	qpd->doorbell_bitmap = bitmap_zalloc(KFD_MAX_NUM_OF_QUEUES_PER_PROCESS, +					     GFP_KERNEL);  	if (!qpd->doorbell_bitmap)  		return -ENOMEM; @@ -1448,9 +1446,9 @@ static int init_doorbell_bitmap(struct qcm_process_device *qpd,  	for (i = 0; i < KFD_MAX_NUM_OF_QUEUES_PER_PROCESS / 2; i++) {  		if (i >= range_start && i <= range_end) { -			set_bit(i, qpd->doorbell_bitmap); -			set_bit(i + KFD_QUEUE_DOORBELL_MIRROR_OFFSET, -				qpd->doorbell_bitmap); +			__set_bit(i, qpd->doorbell_bitmap); +			__set_bit(i + KFD_QUEUE_DOORBELL_MIRROR_OFFSET, +				  qpd->doorbell_bitmap);  		}  	} @@ -1547,7 +1545,7 @@ int kfd_process_device_init_vm(struct kfd_process_device *pdd,  	dev = pdd->dev;  	ret = amdgpu_amdkfd_gpuvm_acquire_process_vm( -		dev->kgd, drm_file, p->pasid, +		dev->adev, drm_file, p->pasid,  		&p->kgd_process_info, &p->ef);  	if (ret) {  		pr_err("Failed to create process VM object\n"); @@ -1779,14 +1777,13 @@ int kfd_process_gpuidx_from_gpuid(struct kfd_process *p, uint32_t gpu_id)  }  int -kfd_process_gpuid_from_kgd(struct kfd_process *p, struct amdgpu_device *adev, +kfd_process_gpuid_from_adev(struct kfd_process *p, struct amdgpu_device *adev,  			   uint32_t *gpuid, uint32_t *gpuidx)  { -	struct kgd_dev *kgd = (struct kgd_dev *)adev;  	int i;  	for (i = 0; i < p->n_pdds; i++) -		if (p->pdds[i] && p->pdds[i]->dev->kgd == kgd) { +		if (p->pdds[i] && p->pdds[i]->dev->adev == adev) {  			*gpuid = p->pdds[i]->dev->id;  			*gpuidx = i;  			return 0; @@ -1951,10 +1948,10 @@ void kfd_flush_tlb(struct kfd_process_device *pdd, enum TLB_FLUSH_TYPE type)  		 * only happens when the first queue is created.  		 */  		if (pdd->qpd.vmid) -			amdgpu_amdkfd_flush_gpu_tlb_vmid(dev->kgd, +			amdgpu_amdkfd_flush_gpu_tlb_vmid(dev->adev,  							pdd->qpd.vmid);  	} else { -		amdgpu_amdkfd_flush_gpu_tlb_pasid(dev->kgd, +		amdgpu_amdkfd_flush_gpu_tlb_pasid(dev->adev,  					pdd->process->pasid, type);  	}  } diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c b/drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c index 3627e7ac161b..5e5c84a8e1ef 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c @@ -118,7 +118,7 @@ int pqm_set_gws(struct process_queue_manager *pqm, unsigned int qid,  		return ret;  	pqn->q->gws = mem; -	pdd->qpd.num_gws = gws ? amdgpu_amdkfd_get_num_gws(dev->kgd) : 0; +	pdd->qpd.num_gws = gws ? dev->adev->gds.gws_size : 0;  	return pqn->q->device->dqm->ops.update_queue(pqn->q->device->dqm,  							pqn->q, NULL); @@ -135,9 +135,8 @@ void kfd_process_dequeue_from_all_devices(struct kfd_process *p)  int pqm_init(struct process_queue_manager *pqm, struct kfd_process *p)  {  	INIT_LIST_HEAD(&pqm->queues); -	pqm->queue_slot_bitmap = -			kzalloc(DIV_ROUND_UP(KFD_MAX_NUM_OF_QUEUES_PER_PROCESS, -					BITS_PER_BYTE), GFP_KERNEL); +	pqm->queue_slot_bitmap = bitmap_zalloc(KFD_MAX_NUM_OF_QUEUES_PER_PROCESS, +					       GFP_KERNEL);  	if (!pqm->queue_slot_bitmap)  		return -ENOMEM;  	pqm->process = p; @@ -159,7 +158,7 @@ void pqm_uninit(struct process_queue_manager *pqm)  		kfree(pqn);  	} -	kfree(pqm->queue_slot_bitmap); +	bitmap_free(pqm->queue_slot_bitmap);  	pqm->queue_slot_bitmap = NULL;  } @@ -220,7 +219,7 @@ int pqm_create_queue(struct process_queue_manager *pqm,  	 * Hence we also check the type as well  	 */  	if ((pdd->qpd.is_debug) || (type == KFD_QUEUE_TYPE_DIQ)) -		max_queues = dev->device_info->max_no_of_hqd/2; +		max_queues = dev->device_info.max_no_of_hqd/2;  	if (pdd->qpd.queue_count >= max_queues)  		return -ENOSPC; diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_smi_events.c b/drivers/gpu/drm/amd/amdkfd/kfd_smi_events.c index ed4bc5f844ce..deae12dc777d 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_smi_events.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_smi_events.c @@ -207,7 +207,6 @@ void kfd_smi_event_update_gpu_reset(struct kfd_dev *dev, bool post_reset)  void kfd_smi_event_update_thermal_throttling(struct kfd_dev *dev,  					     uint64_t throttle_bitmask)  { -	struct amdgpu_device *adev = (struct amdgpu_device *)dev->kgd;  	/*  	 * ThermalThrottle msg = throttle_bitmask(8):  	 * 			 thermal_interrupt_count(16): @@ -223,14 +222,13 @@ void kfd_smi_event_update_thermal_throttling(struct kfd_dev *dev,  	len = snprintf(fifo_in, sizeof(fifo_in), "%x %llx:%llx\n",  		       KFD_SMI_EVENT_THERMAL_THROTTLE, throttle_bitmask, -		       atomic64_read(&adev->smu.throttle_int_counter)); +		       atomic64_read(&dev->adev->smu.throttle_int_counter));  	add_event_to_kfifo(dev, KFD_SMI_EVENT_THERMAL_THROTTLE,	fifo_in, len);  }  void kfd_smi_event_update_vmfault(struct kfd_dev *dev, uint16_t pasid)  { -	struct amdgpu_device *adev = (struct amdgpu_device *)dev->kgd;  	struct amdgpu_task_info task_info;  	/* VmFault msg = (hex)uint32_pid(8) + :(1) + task name(16) = 25 */  	/* 1 byte event + 1 byte space + 25 bytes msg + 1 byte \n + @@ -243,7 +241,7 @@ void kfd_smi_event_update_vmfault(struct kfd_dev *dev, uint16_t pasid)  		return;  	memset(&task_info, 0, sizeof(struct amdgpu_task_info)); -	amdgpu_vm_get_task_info(adev, pasid, &task_info); +	amdgpu_vm_get_task_info(dev->adev, pasid, &task_info);  	/* Report VM faults from user applications, not retry from kernel */  	if (!task_info.pid)  		return; diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_svm.c b/drivers/gpu/drm/amd/amdkfd/kfd_svm.c index 16137c4247bb..f2805ba74c80 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_svm.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_svm.c @@ -107,7 +107,7 @@ static void svm_range_add_to_svms(struct svm_range *prange)  	pr_debug("svms 0x%p prange 0x%p [0x%lx 0x%lx]\n", prange->svms,  		 prange, prange->start, prange->last); -	list_add_tail(&prange->list, &prange->svms->list); +	list_move_tail(&prange->list, &prange->svms->list);  	prange->it_node.start = prange->start;  	prange->it_node.last = prange->last;  	interval_tree_insert(&prange->it_node, &prange->svms->objects); @@ -193,7 +193,6 @@ svm_range_dma_map(struct svm_range *prange, unsigned long *bitmap,  	for_each_set_bit(gpuidx, bitmap, MAX_GPU_INSTANCE) {  		struct kfd_process_device *pdd; -		struct amdgpu_device *adev;  		pr_debug("mapping to gpu idx 0x%x\n", gpuidx);  		pdd = kfd_process_device_from_gpuidx(p, gpuidx); @@ -201,9 +200,8 @@ svm_range_dma_map(struct svm_range *prange, unsigned long *bitmap,  			pr_debug("failed to find device idx %d\n", gpuidx);  			return -EINVAL;  		} -		adev = (struct amdgpu_device *)pdd->dev->kgd; -		r = svm_range_dma_map_dev(adev, prange, offset, npages, +		r = svm_range_dma_map_dev(pdd->dev->adev, prange, offset, npages,  					  hmm_pfns, gpuidx);  		if (r)  			break; @@ -297,8 +295,6 @@ svm_range *svm_range_new(struct svm_range_list *svms, uint64_t start,  	prange->last = last;  	INIT_LIST_HEAD(&prange->list);  	INIT_LIST_HEAD(&prange->update_list); -	INIT_LIST_HEAD(&prange->remove_list); -	INIT_LIST_HEAD(&prange->insert_list);  	INIT_LIST_HEAD(&prange->svm_bo_list);  	INIT_LIST_HEAD(&prange->deferred_list);  	INIT_LIST_HEAD(&prange->child_list); @@ -334,6 +330,8 @@ static void svm_range_bo_release(struct kref *kref)  	struct svm_range_bo *svm_bo;  	svm_bo = container_of(kref, struct svm_range_bo, kref); +	pr_debug("svm_bo 0x%p\n", svm_bo); +  	spin_lock(&svm_bo->list_lock);  	while (!list_empty(&svm_bo->range_list)) {  		struct svm_range *prange = @@ -367,12 +365,33 @@ static void svm_range_bo_release(struct kref *kref)  	kfree(svm_bo);  } -void svm_range_bo_unref(struct svm_range_bo *svm_bo) +static void svm_range_bo_wq_release(struct work_struct *work)  { -	if (!svm_bo) -		return; +	struct svm_range_bo *svm_bo; + +	svm_bo = container_of(work, struct svm_range_bo, release_work); +	svm_range_bo_release(&svm_bo->kref); +} + +static void svm_range_bo_release_async(struct kref *kref) +{ +	struct svm_range_bo *svm_bo; + +	svm_bo = container_of(kref, struct svm_range_bo, kref); +	pr_debug("svm_bo 0x%p\n", svm_bo); +	INIT_WORK(&svm_bo->release_work, svm_range_bo_wq_release); +	schedule_work(&svm_bo->release_work); +} + +void svm_range_bo_unref_async(struct svm_range_bo *svm_bo) +{ +	kref_put(&svm_bo->kref, svm_range_bo_release_async); +} -	kref_put(&svm_bo->kref, svm_range_bo_release); +static void svm_range_bo_unref(struct svm_range_bo *svm_bo) +{ +	if (svm_bo) +		kref_put(&svm_bo->kref, svm_range_bo_release);  }  static bool @@ -581,7 +600,7 @@ svm_range_get_adev_by_id(struct svm_range *prange, uint32_t gpu_id)  		return NULL;  	} -	return (struct amdgpu_device *)pdd->dev->kgd; +	return pdd->dev->adev;  }  struct kfd_process_device * @@ -593,7 +612,7 @@ svm_range_get_pdd_by_adev(struct svm_range *prange, struct amdgpu_device *adev)  	p = container_of(prange->svms, struct kfd_process, svms); -	r = kfd_process_gpuid_from_kgd(p, adev, &gpuid, &gpu_idx); +	r = kfd_process_gpuid_from_adev(p, adev, &gpuid, &gpu_idx);  	if (r) {  		pr_debug("failed to get device id by adev %p\n", adev);  		return NULL; @@ -706,6 +725,61 @@ svm_range_apply_attrs(struct kfd_process *p, struct svm_range *prange,  	}  } +static bool +svm_range_is_same_attrs(struct kfd_process *p, struct svm_range *prange, +			uint32_t nattr, struct kfd_ioctl_svm_attribute *attrs) +{ +	uint32_t i; +	int gpuidx; + +	for (i = 0; i < nattr; i++) { +		switch (attrs[i].type) { +		case KFD_IOCTL_SVM_ATTR_PREFERRED_LOC: +			if (prange->preferred_loc != attrs[i].value) +				return false; +			break; +		case KFD_IOCTL_SVM_ATTR_PREFETCH_LOC: +			/* Prefetch should always trigger a migration even +			 * if the value of the attribute didn't change. +			 */ +			return false; +		case KFD_IOCTL_SVM_ATTR_ACCESS: +		case KFD_IOCTL_SVM_ATTR_ACCESS_IN_PLACE: +		case KFD_IOCTL_SVM_ATTR_NO_ACCESS: +			gpuidx = kfd_process_gpuidx_from_gpuid(p, +							       attrs[i].value); +			if (attrs[i].type == KFD_IOCTL_SVM_ATTR_NO_ACCESS) { +				if (test_bit(gpuidx, prange->bitmap_access) || +				    test_bit(gpuidx, prange->bitmap_aip)) +					return false; +			} else if (attrs[i].type == KFD_IOCTL_SVM_ATTR_ACCESS) { +				if (!test_bit(gpuidx, prange->bitmap_access)) +					return false; +			} else { +				if (!test_bit(gpuidx, prange->bitmap_aip)) +					return false; +			} +			break; +		case KFD_IOCTL_SVM_ATTR_SET_FLAGS: +			if ((prange->flags & attrs[i].value) != attrs[i].value) +				return false; +			break; +		case KFD_IOCTL_SVM_ATTR_CLR_FLAGS: +			if ((prange->flags & attrs[i].value) != 0) +				return false; +			break; +		case KFD_IOCTL_SVM_ATTR_GRANULARITY: +			if (prange->granularity != attrs[i].value) +				return false; +			break; +		default: +			WARN_ONCE(1, "svm_range_check_attrs wasn't called?"); +		} +	} + +	return true; +} +  /**   * svm_range_debug_dump - print all range information from svms   * @svms: svm range list header @@ -743,14 +817,6 @@ static void svm_range_debug_dump(struct svm_range_list *svms)  	}  } -static bool -svm_range_is_same_attrs(struct svm_range *old, struct svm_range *new) -{ -	return (old->prefetch_loc == new->prefetch_loc && -		old->flags == new->flags && -		old->granularity == new->granularity); -} -  static int  svm_range_split_array(void *ppnew, void *ppold, size_t size,  		      uint64_t old_start, uint64_t old_n, @@ -943,26 +1009,26 @@ svm_range_split(struct svm_range *prange, uint64_t start, uint64_t last,  }  static int -svm_range_split_tail(struct svm_range *prange, struct svm_range *new, +svm_range_split_tail(struct svm_range *prange,  		     uint64_t new_last, struct list_head *insert_list)  {  	struct svm_range *tail;  	int r = svm_range_split(prange, prange->start, new_last, &tail);  	if (!r) -		list_add(&tail->insert_list, insert_list); +		list_add(&tail->list, insert_list);  	return r;  }  static int -svm_range_split_head(struct svm_range *prange, struct svm_range *new, +svm_range_split_head(struct svm_range *prange,  		     uint64_t new_start, struct list_head *insert_list)  {  	struct svm_range *head;  	int r = svm_range_split(prange, new_start, prange->last, &head);  	if (!r) -		list_add(&head->insert_list, insert_list); +		list_add(&head->list, insert_list);  	return r;  } @@ -1053,8 +1119,8 @@ svm_range_get_pte_flags(struct amdgpu_device *adev, struct svm_range *prange,  	if (domain == SVM_RANGE_VRAM_DOMAIN)  		bo_adev = amdgpu_ttm_adev(prange->svm_bo->bo->tbo.bdev); -	switch (adev->asic_type) { -	case CHIP_ARCTURUS: +	switch (KFD_GC_VERSION(adev->kfd.dev)) { +	case IP_VERSION(9, 4, 1):  		if (domain == SVM_RANGE_VRAM_DOMAIN) {  			if (bo_adev == adev) {  				mapping_flags |= coherent ? @@ -1070,7 +1136,7 @@ svm_range_get_pte_flags(struct amdgpu_device *adev, struct svm_range *prange,  				AMDGPU_VM_MTYPE_UC : AMDGPU_VM_MTYPE_NC;  		}  		break; -	case CHIP_ALDEBARAN: +	case IP_VERSION(9, 4, 2):  		if (domain == SVM_RANGE_VRAM_DOMAIN) {  			if (bo_adev == adev) {  				mapping_flags |= coherent ? @@ -1129,7 +1195,6 @@ svm_range_unmap_from_gpus(struct svm_range *prange, unsigned long start,  	DECLARE_BITMAP(bitmap, MAX_GPU_INSTANCE);  	struct kfd_process_device *pdd;  	struct dma_fence *fence = NULL; -	struct amdgpu_device *adev;  	struct kfd_process *p;  	uint32_t gpuidx;  	int r = 0; @@ -1145,9 +1210,9 @@ svm_range_unmap_from_gpus(struct svm_range *prange, unsigned long start,  			pr_debug("failed to find device idx %d\n", gpuidx);  			return -EINVAL;  		} -		adev = (struct amdgpu_device *)pdd->dev->kgd; -		r = svm_range_unmap_from_gpu(adev, drm_priv_to_vm(pdd->drm_priv), +		r = svm_range_unmap_from_gpu(pdd->dev->adev, +					     drm_priv_to_vm(pdd->drm_priv),  					     start, last, &fence);  		if (r)  			break; @@ -1159,7 +1224,7 @@ svm_range_unmap_from_gpus(struct svm_range *prange, unsigned long start,  			if (r)  				break;  		} -		amdgpu_amdkfd_flush_gpu_tlb_pasid((struct kgd_dev *)adev, +		amdgpu_amdkfd_flush_gpu_tlb_pasid(pdd->dev->adev,  					p->pasid, TLB_FLUSH_HEAVYWEIGHT);  	} @@ -1172,7 +1237,6 @@ svm_range_map_to_gpu(struct amdgpu_device *adev, struct amdgpu_vm *vm,  		     unsigned long npages, bool readonly, dma_addr_t *dma_addr,  		     struct amdgpu_device *bo_adev, struct dma_fence **fence)  { -	struct amdgpu_bo_va bo_va;  	bool table_freed = false;  	uint64_t pte_flags;  	unsigned long last_start; @@ -1185,9 +1249,6 @@ svm_range_map_to_gpu(struct amdgpu_device *adev, struct amdgpu_vm *vm,  	pr_debug("svms 0x%p [0x%lx 0x%lx] readonly %d\n", prange->svms,  		 last_start, last_start + npages - 1, readonly); -	if (prange->svm_bo && prange->ttm_res) -		bo_va.is_xgmi = amdgpu_xgmi_same_hive(adev, bo_adev); -  	for (i = offset; i < offset + npages; i++) {  		last_domain = dma_addr[i] & SVM_RANGE_VRAM_DOMAIN;  		dma_addr[i] &= ~SVM_RANGE_VRAM_DOMAIN; @@ -1243,8 +1304,7 @@ svm_range_map_to_gpu(struct amdgpu_device *adev, struct amdgpu_vm *vm,  		struct kfd_process *p;  		p = container_of(prange->svms, struct kfd_process, svms); -		amdgpu_amdkfd_flush_gpu_tlb_pasid((struct kgd_dev *)adev, -						p->pasid, TLB_FLUSH_LEGACY); +		amdgpu_amdkfd_flush_gpu_tlb_pasid(adev, p->pasid, TLB_FLUSH_LEGACY);  	}  out:  	return r; @@ -1257,7 +1317,6 @@ svm_range_map_to_gpus(struct svm_range *prange, unsigned long offset,  {  	struct kfd_process_device *pdd;  	struct amdgpu_device *bo_adev; -	struct amdgpu_device *adev;  	struct kfd_process *p;  	struct dma_fence *fence = NULL;  	uint32_t gpuidx; @@ -1276,19 +1335,18 @@ svm_range_map_to_gpus(struct svm_range *prange, unsigned long offset,  			pr_debug("failed to find device idx %d\n", gpuidx);  			return -EINVAL;  		} -		adev = (struct amdgpu_device *)pdd->dev->kgd;  		pdd = kfd_bind_process_to_device(pdd->dev, p);  		if (IS_ERR(pdd))  			return -EINVAL; -		if (bo_adev && adev != bo_adev && -		    !amdgpu_xgmi_same_hive(adev, bo_adev)) { +		if (bo_adev && pdd->dev->adev != bo_adev && +		    !amdgpu_xgmi_same_hive(pdd->dev->adev, bo_adev)) {  			pr_debug("cannot map to device idx %d\n", gpuidx);  			continue;  		} -		r = svm_range_map_to_gpu(adev, drm_priv_to_vm(pdd->drm_priv), +		r = svm_range_map_to_gpu(pdd->dev->adev, drm_priv_to_vm(pdd->drm_priv),  					 prange, offset, npages, readonly,  					 prange->dma_addr[gpuidx],  					 bo_adev, wait ? &fence : NULL); @@ -1322,7 +1380,6 @@ struct svm_validate_context {  static int svm_range_reserve_bos(struct svm_validate_context *ctx)  {  	struct kfd_process_device *pdd; -	struct amdgpu_device *adev;  	struct amdgpu_vm *vm;  	uint32_t gpuidx;  	int r; @@ -1334,7 +1391,6 @@ static int svm_range_reserve_bos(struct svm_validate_context *ctx)  			pr_debug("failed to find device idx %d\n", gpuidx);  			return -EINVAL;  		} -		adev = (struct amdgpu_device *)pdd->dev->kgd;  		vm = drm_priv_to_vm(pdd->drm_priv);  		ctx->tv[gpuidx].bo = &vm->root.bo->tbo; @@ -1356,9 +1412,9 @@ static int svm_range_reserve_bos(struct svm_validate_context *ctx)  			r = -EINVAL;  			goto unreserve_out;  		} -		adev = (struct amdgpu_device *)pdd->dev->kgd; -		r = amdgpu_vm_validate_pt_bos(adev, drm_priv_to_vm(pdd->drm_priv), +		r = amdgpu_vm_validate_pt_bos(pdd->dev->adev, +					      drm_priv_to_vm(pdd->drm_priv),  					      svm_range_bo_validate, NULL);  		if (r) {  			pr_debug("failed %d validate pt bos\n", r); @@ -1381,12 +1437,10 @@ static void svm_range_unreserve_bos(struct svm_validate_context *ctx)  static void *kfd_svm_page_owner(struct kfd_process *p, int32_t gpuidx)  {  	struct kfd_process_device *pdd; -	struct amdgpu_device *adev;  	pdd = kfd_process_device_from_gpuidx(p, gpuidx); -	adev = (struct amdgpu_device *)pdd->dev->kgd; -	return SVM_ADEV_PGMAP_OWNER(adev); +	return SVM_ADEV_PGMAP_OWNER(pdd->dev->adev);  }  /* @@ -1574,7 +1628,6 @@ retry_flush_work:  static void svm_range_restore_work(struct work_struct *work)  {  	struct delayed_work *dwork = to_delayed_work(work); -	struct amdkfd_process_info *process_info;  	struct svm_range_list *svms;  	struct svm_range *prange;  	struct kfd_process *p; @@ -1594,12 +1647,10 @@ static void svm_range_restore_work(struct work_struct *work)  	 * the lifetime of this thread, kfd_process and mm will be valid.  	 */  	p = container_of(svms, struct kfd_process, svms); -	process_info = p->kgd_process_info;  	mm = p->mm;  	if (!mm)  		return; -	mutex_lock(&process_info->lock);  	svm_range_list_lock_and_flush_work(svms, mm);  	mutex_lock(&svms->lock); @@ -1652,7 +1703,6 @@ static void svm_range_restore_work(struct work_struct *work)  out_reschedule:  	mutex_unlock(&svms->lock);  	mmap_write_unlock(mm); -	mutex_unlock(&process_info->lock);  	/* If validation failed, reschedule another attempt */  	if (evicted_ranges) { @@ -1664,6 +1714,10 @@ out_reschedule:  /**   * svm_range_evict - evict svm range + * @prange: svm range structure + * @mm: current process mm_struct + * @start: starting process queue number + * @last: last process queue number   *   * Stop all queues of the process to ensure GPU doesn't access the memory, then   * return to let CPU evict the buffer and proceed CPU pagetable update. @@ -1768,46 +1822,49 @@ static struct svm_range *svm_range_clone(struct svm_range *old)  }  /** - * svm_range_handle_overlap - split overlap ranges - * @svms: svm range list header - * @new: range added with this attributes - * @start: range added start address, in pages - * @last: range last address, in pages - * @update_list: output, the ranges attributes are updated. For set_attr, this - *               will do validation and map to GPUs. For unmap, this will be - *               removed and unmap from GPUs - * @insert_list: output, the ranges will be inserted into svms, attributes are - *               not changes. For set_attr, this will add into svms. - * @remove_list:output, the ranges will be removed from svms - * @left: the remaining range after overlap, For set_attr, this will be added - *        as new range. + * svm_range_add - add svm range and handle overlap + * @p: the range add to this process svms + * @start: page size aligned + * @size: page size aligned + * @nattr: number of attributes + * @attrs: array of attributes + * @update_list: output, the ranges need validate and update GPU mapping + * @insert_list: output, the ranges need insert to svms + * @remove_list: output, the ranges are replaced and need remove from svms   * - * Total have 5 overlap cases. + * Check if the virtual address range has overlap with any existing ranges, + * split partly overlapping ranges and add new ranges in the gaps. All changes + * should be applied to the range_list and interval tree transactionally. If + * any range split or allocation fails, the entire update fails. Therefore any + * existing overlapping svm_ranges are cloned and the original svm_ranges left + * unchanged.   * - * This function handles overlap of an address interval with existing - * struct svm_ranges for applying new attributes. This may require - * splitting existing struct svm_ranges. All changes should be applied to - * the range_list and interval tree transactionally. If any split operation - * fails, the entire update fails. Therefore the existing overlapping - * svm_ranges are cloned and the original svm_ranges left unchanged. If the - * transaction succeeds, the modified clones are added and the originals - * freed. Otherwise the clones are removed and the old svm_ranges remain. + * If the transaction succeeds, the caller can update and insert clones and + * new ranges, then free the originals.   * - * Context: The caller must hold svms->lock + * Otherwise the caller can free the clones and new ranges, while the old + * svm_ranges remain unchanged. + * + * Context: Process context, caller must hold svms->lock + * + * Return: + * 0 - OK, otherwise error code   */  static int -svm_range_handle_overlap(struct svm_range_list *svms, struct svm_range *new, -			 unsigned long start, unsigned long last, -			 struct list_head *update_list, -			 struct list_head *insert_list, -			 struct list_head *remove_list, -			 unsigned long *left) +svm_range_add(struct kfd_process *p, uint64_t start, uint64_t size, +	      uint32_t nattr, struct kfd_ioctl_svm_attribute *attrs, +	      struct list_head *update_list, struct list_head *insert_list, +	      struct list_head *remove_list)  { +	unsigned long last = start + size - 1UL; +	struct svm_range_list *svms = &p->svms;  	struct interval_tree_node *node;  	struct svm_range *prange;  	struct svm_range *tmp;  	int r = 0; +	pr_debug("svms 0x%p [0x%llx 0x%lx]\n", &p->svms, start, last); +  	INIT_LIST_HEAD(update_list);  	INIT_LIST_HEAD(insert_list);  	INIT_LIST_HEAD(remove_list); @@ -1815,37 +1872,44 @@ svm_range_handle_overlap(struct svm_range_list *svms, struct svm_range *new,  	node = interval_tree_iter_first(&svms->objects, start, last);  	while (node) {  		struct interval_tree_node *next; -		struct svm_range *old;  		unsigned long next_start;  		pr_debug("found overlap node [0x%lx 0x%lx]\n", node->start,  			 node->last); -		old = container_of(node, struct svm_range, it_node); +		prange = container_of(node, struct svm_range, it_node);  		next = interval_tree_iter_next(node, start, last);  		next_start = min(node->last, last) + 1; -		if (node->start < start || node->last > last) { -			/* node intersects the updated range, clone+split it */ +		if (svm_range_is_same_attrs(p, prange, nattr, attrs)) { +			/* nothing to do */ +		} else if (node->start < start || node->last > last) { +			/* node intersects the update range and its attributes +			 * will change. Clone and split it, apply updates only +			 * to the overlapping part +			 */ +			struct svm_range *old = prange; +  			prange = svm_range_clone(old);  			if (!prange) {  				r = -ENOMEM;  				goto out;  			} -			list_add(&old->remove_list, remove_list); -			list_add(&prange->insert_list, insert_list); +			list_add(&old->update_list, remove_list); +			list_add(&prange->list, insert_list); +			list_add(&prange->update_list, update_list);  			if (node->start < start) {  				pr_debug("change old range start\n"); -				r = svm_range_split_head(prange, new, start, +				r = svm_range_split_head(prange, start,  							 insert_list);  				if (r)  					goto out;  			}  			if (node->last > last) {  				pr_debug("change old range last\n"); -				r = svm_range_split_tail(prange, new, last, +				r = svm_range_split_tail(prange, last,  							 insert_list);  				if (r)  					goto out; @@ -1854,22 +1918,18 @@ svm_range_handle_overlap(struct svm_range_list *svms, struct svm_range *new,  			/* The node is contained within start..last,  			 * just update it  			 */ -			prange = old; -		} - -		if (!svm_range_is_same_attrs(prange, new))  			list_add(&prange->update_list, update_list); +		}  		/* insert a new node if needed */  		if (node->start > start) { -			prange = svm_range_new(prange->svms, start, -					       node->start - 1); +			prange = svm_range_new(svms, start, node->start - 1);  			if (!prange) {  				r = -ENOMEM;  				goto out;  			} -			list_add(&prange->insert_list, insert_list); +			list_add(&prange->list, insert_list);  			list_add(&prange->update_list, update_list);  		} @@ -1877,12 +1937,20 @@ svm_range_handle_overlap(struct svm_range_list *svms, struct svm_range *new,  		start = next_start;  	} -	if (left && start <= last) -		*left = last - start + 1; +	/* add a final range at the end if needed */ +	if (start <= last) { +		prange = svm_range_new(svms, start, last); +		if (!prange) { +			r = -ENOMEM; +			goto out; +		} +		list_add(&prange->list, insert_list); +		list_add(&prange->update_list, update_list); +	}  out:  	if (r) -		list_for_each_entry_safe(prange, tmp, insert_list, insert_list) +		list_for_each_entry_safe(prange, tmp, insert_list, list)  			svm_range_free(prange);  	return r; @@ -1966,23 +2034,30 @@ svm_range_handle_list_op(struct svm_range_list *svms, struct svm_range *prange)  static void svm_range_drain_retry_fault(struct svm_range_list *svms)  {  	struct kfd_process_device *pdd; -	struct amdgpu_device *adev;  	struct kfd_process *p; +	int drain;  	uint32_t i;  	p = container_of(svms, struct kfd_process, svms); +restart: +	drain = atomic_read(&svms->drain_pagefaults); +	if (!drain) +		return; +  	for_each_set_bit(i, svms->bitmap_supported, p->n_pdds) {  		pdd = p->pdds[i];  		if (!pdd)  			continue;  		pr_debug("drain retry fault gpu %d svms %p\n", i, svms); -		adev = (struct amdgpu_device *)pdd->dev->kgd; -		amdgpu_ih_wait_on_checkpoint_process(adev, &adev->irq.ih1); +		amdgpu_ih_wait_on_checkpoint_process_ts(pdd->dev->adev, +						     &pdd->dev->adev->irq.ih1);  		pr_debug("drain retry fault gpu %d svms 0x%p done\n", i, svms);  	} +	if (atomic_cmpxchg(&svms->drain_pagefaults, drain, 0) != drain) +		goto restart;  }  static void svm_range_deferred_list_work(struct work_struct *work) @@ -1990,43 +2065,41 @@ static void svm_range_deferred_list_work(struct work_struct *work)  	struct svm_range_list *svms;  	struct svm_range *prange;  	struct mm_struct *mm; +	struct kfd_process *p;  	svms = container_of(work, struct svm_range_list, deferred_list_work);  	pr_debug("enter svms 0x%p\n", svms); +	p = container_of(svms, struct kfd_process, svms); +	/* Avoid mm is gone when inserting mmu notifier */ +	mm = get_task_mm(p->lead_thread); +	if (!mm) { +		pr_debug("svms 0x%p process mm gone\n", svms); +		return; +	} +retry: +	mmap_write_lock(mm); + +	/* Checking for the need to drain retry faults must be inside +	 * mmap write lock to serialize with munmap notifiers. +	 */ +	if (unlikely(atomic_read(&svms->drain_pagefaults))) { +		mmap_write_unlock(mm); +		svm_range_drain_retry_fault(svms); +		goto retry; +	} +  	spin_lock(&svms->deferred_list_lock);  	while (!list_empty(&svms->deferred_range_list)) {  		prange = list_first_entry(&svms->deferred_range_list,  					  struct svm_range, deferred_list); +		list_del_init(&prange->deferred_list);  		spin_unlock(&svms->deferred_list_lock); +  		pr_debug("prange 0x%p [0x%lx 0x%lx] op %d\n", prange,  			 prange->start, prange->last, prange->work_item.op); -		mm = prange->work_item.mm; -retry: -		mmap_write_lock(mm);  		mutex_lock(&svms->lock); - -		/* Checking for the need to drain retry faults must be in -		 * mmap write lock to serialize with munmap notifiers. -		 * -		 * Remove from deferred_list must be inside mmap write lock, -		 * otherwise, svm_range_list_lock_and_flush_work may hold mmap -		 * write lock, and continue because deferred_list is empty, then -		 * deferred_list handle is blocked by mmap write lock. -		 */ -		spin_lock(&svms->deferred_list_lock); -		if (unlikely(svms->drain_pagefaults)) { -			svms->drain_pagefaults = false; -			spin_unlock(&svms->deferred_list_lock); -			mutex_unlock(&svms->lock); -			mmap_write_unlock(mm); -			svm_range_drain_retry_fault(svms); -			goto retry; -		} -		list_del_init(&prange->deferred_list); -		spin_unlock(&svms->deferred_list_lock); -  		mutex_lock(&prange->migrate_mutex);  		while (!list_empty(&prange->child_list)) {  			struct svm_range *pchild; @@ -2042,12 +2115,13 @@ retry:  		svm_range_handle_list_op(svms, prange);  		mutex_unlock(&svms->lock); -		mmap_write_unlock(mm);  		spin_lock(&svms->deferred_list_lock);  	}  	spin_unlock(&svms->deferred_list_lock); +	mmap_write_unlock(mm); +	mmput(mm);  	pr_debug("exit svms 0x%p\n", svms);  } @@ -2056,12 +2130,6 @@ svm_range_add_list_work(struct svm_range_list *svms, struct svm_range *prange,  			struct mm_struct *mm, enum svm_work_list_ops op)  {  	spin_lock(&svms->deferred_list_lock); -	/* Make sure pending page faults are drained in the deferred worker -	 * before the range is freed to avoid straggler interrupts on -	 * unmapped memory causing "phantom faults". -	 */ -	if (op == SVM_OP_UNMAP_RANGE) -		svms->drain_pagefaults = true;  	/* if prange is on the deferred list */  	if (!list_empty(&prange->deferred_list)) {  		pr_debug("update exist prange 0x%p work op %d\n", prange, op); @@ -2140,6 +2208,12 @@ svm_range_unmap_from_cpu(struct mm_struct *mm, struct svm_range *prange,  	pr_debug("svms 0x%p prange 0x%p [0x%lx 0x%lx] [0x%lx 0x%lx]\n", svms,  		 prange, prange->start, prange->last, start, last); +	/* Make sure pending page faults are drained in the deferred worker +	 * before the range is freed to avoid straggler interrupts on +	 * unmapped memory causing "phantom faults". +	 */ +	atomic_inc(&svms->drain_pagefaults); +  	unmap_parent = start <= prange->start && last >= prange->last;  	list_for_each_entry(pchild, &prange->child_list, child_list) { @@ -2169,6 +2243,9 @@ svm_range_unmap_from_cpu(struct mm_struct *mm, struct svm_range *prange,  /**   * svm_range_cpu_invalidate_pagetables - interval notifier callback + * @mni: mmu_interval_notifier struct + * @range: mmu_notifier_range struct + * @cur_seq: value to pass to mmu_interval_set_seq()   *   * If event is MMU_NOTIFY_UNMAP, this is from CPU unmap range, otherwise, it   * is from migration, or CPU page invalidation callback. @@ -2198,8 +2275,8 @@ svm_range_cpu_invalidate_pagetables(struct mmu_interval_notifier *mni,  	start = mni->interval_tree.start;  	last = mni->interval_tree.last; -	start = (start > range->start ? start : range->start) >> PAGE_SHIFT; -	last = (last < (range->end - 1) ? last : range->end - 1) >> PAGE_SHIFT; +	start = max(start, range->start) >> PAGE_SHIFT; +	last = min(last, range->end - 1) >> PAGE_SHIFT;  	pr_debug("[0x%lx 0x%lx] range[0x%lx 0x%lx] notifier[0x%lx 0x%lx] %d\n",  		 start, last, range->start >> PAGE_SHIFT,  		 (range->end - 1) >> PAGE_SHIFT, @@ -2301,7 +2378,7 @@ svm_range_best_restore_location(struct svm_range *prange,  	p = container_of(prange->svms, struct kfd_process, svms); -	r = kfd_process_gpuid_from_kgd(p, adev, &gpuid, gpuidx); +	r = kfd_process_gpuid_from_adev(p, adev, &gpuid, gpuidx);  	if (r < 0) {  		pr_debug("failed to get gpuid from kgd\n");  		return -1; @@ -2478,7 +2555,7 @@ svm_range *svm_range_create_unregistered_range(struct amdgpu_device *adev,  		pr_debug("Failed to create prange in address [0x%llx]\n", addr);  		return NULL;  	} -	if (kfd_process_gpuid_from_kgd(p, adev, &gpuid, &gpuidx)) { +	if (kfd_process_gpuid_from_adev(p, adev, &gpuid, &gpuidx)) {  		pr_debug("failed to get gpuid from kgd\n");  		svm_range_free(prange);  		return NULL; @@ -2545,7 +2622,7 @@ svm_range_count_fault(struct amdgpu_device *adev, struct kfd_process *p,  		uint32_t gpuid;  		int r; -		r = kfd_process_gpuid_from_kgd(p, adev, &gpuid, &gpuidx); +		r = kfd_process_gpuid_from_adev(p, adev, &gpuid, &gpuidx);  		if (r < 0)  			return;  	} @@ -2559,20 +2636,13 @@ svm_range_count_fault(struct amdgpu_device *adev, struct kfd_process *p,  }  static bool -svm_fault_allowed(struct mm_struct *mm, uint64_t addr, bool write_fault) +svm_fault_allowed(struct vm_area_struct *vma, bool write_fault)  {  	unsigned long requested = VM_READ; -	struct vm_area_struct *vma;  	if (write_fault)  		requested |= VM_WRITE; -	vma = find_vma(mm, addr << PAGE_SHIFT); -	if (!vma || (addr << PAGE_SHIFT) < vma->vm_start) { -		pr_debug("address 0x%llx VMA is removed\n", addr); -		return true; -	} -  	pr_debug("requested 0x%lx, vma permission flags 0x%lx\n", requested,  		vma->vm_flags);  	return (vma->vm_flags & requested) == requested; @@ -2590,6 +2660,7 @@ svm_range_restore_pages(struct amdgpu_device *adev, unsigned int pasid,  	int32_t best_loc;  	int32_t gpuidx = MAX_GPU_INSTANCE;  	bool write_locked = false; +	struct vm_area_struct *vma;  	int r = 0;  	if (!KFD_IS_SVM_API_SUPPORTED(adev->kfd.dev)) { @@ -2600,7 +2671,7 @@ svm_range_restore_pages(struct amdgpu_device *adev, unsigned int pasid,  	p = kfd_lookup_process_by_pasid(pasid);  	if (!p) {  		pr_debug("kfd process not founded pasid 0x%x\n", pasid); -		return -ESRCH; +		return 0;  	}  	if (!p->xnack_enabled) {  		pr_debug("XNACK not enabled for pasid 0x%x\n", pasid); @@ -2611,10 +2682,19 @@ svm_range_restore_pages(struct amdgpu_device *adev, unsigned int pasid,  	pr_debug("restoring svms 0x%p fault address 0x%llx\n", svms, addr); +	if (atomic_read(&svms->drain_pagefaults)) { +		pr_debug("draining retry fault, drop fault 0x%llx\n", addr); +		r = 0; +		goto out; +	} + +	/* p->lead_thread is available as kfd_process_wq_release flush the work +	 * before releasing task ref. +	 */  	mm = get_task_mm(p->lead_thread);  	if (!mm) {  		pr_debug("svms 0x%p failed to get mm\n", svms); -		r = -ESRCH; +		r = 0;  		goto out;  	} @@ -2652,6 +2732,7 @@ retry_write_locked:  	if (svm_range_skip_recover(prange)) {  		amdgpu_gmc_filter_faults_remove(adev, addr, pasid); +		r = 0;  		goto out_unlock_range;  	} @@ -2660,10 +2741,21 @@ retry_write_locked:  	if (timestamp < AMDGPU_SVM_RANGE_RETRY_FAULT_PENDING) {  		pr_debug("svms 0x%p [0x%lx %lx] already restored\n",  			 svms, prange->start, prange->last); +		r = 0; +		goto out_unlock_range; +	} + +	/* __do_munmap removed VMA, return success as we are handling stale +	 * retry fault. +	 */ +	vma = find_vma(mm, addr << PAGE_SHIFT); +	if (!vma || (addr << PAGE_SHIFT) < vma->vm_start) { +		pr_debug("address 0x%llx VMA is removed\n", addr); +		r = 0;  		goto out_unlock_range;  	} -	if (!svm_fault_allowed(mm, addr, write_fault)) { +	if (!svm_fault_allowed(vma, write_fault)) {  		pr_debug("fault addr 0x%llx no %s permission\n", addr,  			write_fault ? "write" : "read");  		r = -EPERM; @@ -2741,6 +2833,14 @@ void svm_range_list_fini(struct kfd_process *p)  	/* Ensure list work is finished before process is destroyed */  	flush_work(&p->svms.deferred_list_work); +	/* +	 * Ensure no retry fault comes in afterwards, as page fault handler will +	 * not find kfd process and take mm lock to recover fault. +	 */ +	atomic_inc(&p->svms.drain_pagefaults); +	svm_range_drain_retry_fault(&p->svms); + +  	list_for_each_entry_safe(prange, next, &p->svms.list, list) {  		svm_range_unlink(prange);  		svm_range_remove_notifier(prange); @@ -2761,6 +2861,7 @@ int svm_range_list_init(struct kfd_process *p)  	mutex_init(&svms->lock);  	INIT_LIST_HEAD(&svms->list);  	atomic_set(&svms->evicted_ranges, 0); +	atomic_set(&svms->drain_pagefaults, 0);  	INIT_DELAYED_WORK(&svms->restore_work, svm_range_restore_work);  	INIT_WORK(&svms->deferred_list_work, svm_range_deferred_list_work);  	INIT_LIST_HEAD(&svms->deferred_range_list); @@ -2868,59 +2969,6 @@ svm_range_is_valid(struct kfd_process *p, uint64_t start, uint64_t size)  }  /** - * svm_range_add - add svm range and handle overlap - * @p: the range add to this process svms - * @start: page size aligned - * @size: page size aligned - * @nattr: number of attributes - * @attrs: array of attributes - * @update_list: output, the ranges need validate and update GPU mapping - * @insert_list: output, the ranges need insert to svms - * @remove_list: output, the ranges are replaced and need remove from svms - * - * Check if the virtual address range has overlap with the registered ranges, - * split the overlapped range, copy and adjust pages address and vram nodes in - * old and new ranges. - * - * Context: Process context, caller must hold svms->lock - * - * Return: - * 0 - OK, otherwise error code - */ -static int -svm_range_add(struct kfd_process *p, uint64_t start, uint64_t size, -	      uint32_t nattr, struct kfd_ioctl_svm_attribute *attrs, -	      struct list_head *update_list, struct list_head *insert_list, -	      struct list_head *remove_list) -{ -	uint64_t last = start + size - 1UL; -	struct svm_range_list *svms; -	struct svm_range new = {0}; -	struct svm_range *prange; -	unsigned long left = 0; -	int r = 0; - -	pr_debug("svms 0x%p [0x%llx 0x%llx]\n", &p->svms, start, last); - -	svm_range_apply_attrs(p, &new, nattr, attrs); - -	svms = &p->svms; - -	r = svm_range_handle_overlap(svms, &new, start, last, update_list, -				     insert_list, remove_list, &left); -	if (r) -		return r; - -	if (left) { -		prange = svm_range_new(svms, last - left + 1, last); -		list_add(&prange->insert_list, insert_list); -		list_add(&prange->update_list, update_list); -	} - -	return 0; -} - -/**   * svm_range_best_prefetch_location - decide the best prefetch location   * @prange: svm range structure   * @@ -2953,7 +3001,6 @@ svm_range_best_prefetch_location(struct svm_range *prange)  	uint32_t best_loc = prange->prefetch_loc;  	struct kfd_process_device *pdd;  	struct amdgpu_device *bo_adev; -	struct amdgpu_device *adev;  	struct kfd_process *p;  	uint32_t gpuidx; @@ -2981,12 +3028,11 @@ svm_range_best_prefetch_location(struct svm_range *prange)  			pr_debug("failed to get device by idx 0x%x\n", gpuidx);  			continue;  		} -		adev = (struct amdgpu_device *)pdd->dev->kgd; -		if (adev == bo_adev) +		if (pdd->dev->adev == bo_adev)  			continue; -		if (!amdgpu_xgmi_same_hive(adev, bo_adev)) { +		if (!amdgpu_xgmi_same_hive(pdd->dev->adev, bo_adev)) {  			best_loc = 0;  			break;  		} @@ -3150,7 +3196,6 @@ static int  svm_range_set_attr(struct kfd_process *p, uint64_t start, uint64_t size,  		   uint32_t nattr, struct kfd_ioctl_svm_attribute *attrs)  { -	struct amdkfd_process_info *process_info = p->kgd_process_info;  	struct mm_struct *mm = current->mm;  	struct list_head update_list;  	struct list_head insert_list; @@ -3169,8 +3214,6 @@ svm_range_set_attr(struct kfd_process *p, uint64_t start, uint64_t size,  	svms = &p->svms; -	mutex_lock(&process_info->lock); -  	svm_range_list_lock_and_flush_work(svms, mm);  	r = svm_range_is_valid(p, start, size); @@ -3191,7 +3234,7 @@ svm_range_set_attr(struct kfd_process *p, uint64_t start, uint64_t size,  		goto out;  	}  	/* Apply changes as a transaction */ -	list_for_each_entry_safe(prange, next, &insert_list, insert_list) { +	list_for_each_entry_safe(prange, next, &insert_list, list) {  		svm_range_add_to_svms(prange);  		svm_range_add_notifier_locked(mm, prange);  	} @@ -3199,8 +3242,7 @@ svm_range_set_attr(struct kfd_process *p, uint64_t start, uint64_t size,  		svm_range_apply_attrs(p, prange, nattr, attrs);  		/* TODO: unmap ranges from GPU that lost access */  	} -	list_for_each_entry_safe(prange, next, &remove_list, -				remove_list) { +	list_for_each_entry_safe(prange, next, &remove_list, update_list) {  		pr_debug("unlink old 0x%p prange 0x%p [0x%lx 0x%lx]\n",  			 prange->svms, prange, prange->start,  			 prange->last); @@ -3246,8 +3288,6 @@ out_unlock_range:  	mutex_unlock(&svms->lock);  	mmap_read_unlock(mm);  out: -	mutex_unlock(&process_info->lock); -  	pr_debug("pasid 0x%x svms 0x%p [0x%llx 0x%llx] done, r=%d\n", p->pasid,  		 &p->svms, start, start + size - 1, r); diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_svm.h b/drivers/gpu/drm/amd/amdkfd/kfd_svm.h index 6dc91c33e80f..949b477e2f4c 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_svm.h +++ b/drivers/gpu/drm/amd/amdkfd/kfd_svm.h @@ -48,6 +48,7 @@ struct svm_range_bo {  	struct work_struct		eviction_work;  	struct svm_range_list		*svms;  	uint32_t			evicting; +	struct work_struct		release_work;  };  enum svm_work_list_ops { @@ -75,8 +76,6 @@ struct svm_work_list_item {   *              aligned, page size is (last - start + 1)   * @list:       link list node, used to scan all ranges of svms   * @update_list:link list node used to add to update_list - * @remove_list:link list node used to add to remove list - * @insert_list:link list node used to add to insert list   * @mapping:    bo_va mapping structure to create and update GPU page table   * @npages:     number of pages   * @dma_addr:   dma mapping address on each GPU for system memory physical page @@ -112,8 +111,6 @@ struct svm_range {  	struct interval_tree_node	it_node;  	struct list_head		list;  	struct list_head		update_list; -	struct list_head		remove_list; -	struct list_head		insert_list;  	uint64_t			npages;  	dma_addr_t			*dma_addr[MAX_GPU_INSTANCE];  	struct ttm_resource		*ttm_res; @@ -195,7 +192,7 @@ void svm_range_list_lock_and_flush_work(struct svm_range_list *svms, struct mm_s   */  #define KFD_IS_SVM_API_SUPPORTED(dev) ((dev)->pgmap.type != 0) -void svm_range_bo_unref(struct svm_range_bo *svm_bo); +void svm_range_bo_unref_async(struct svm_range_bo *svm_bo);  #else  struct kfd_process; diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_topology.c b/drivers/gpu/drm/amd/amdkfd/kfd_topology.c index dd593ad0614a..948fbb39336e 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_topology.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_topology.c @@ -113,7 +113,7 @@ struct kfd_dev *kfd_device_by_pci_dev(const struct pci_dev *pdev)  	return device;  } -struct kfd_dev *kfd_device_by_kgd(const struct kgd_dev *kgd) +struct kfd_dev *kfd_device_by_adev(const struct amdgpu_device *adev)  {  	struct kfd_topology_device *top_dev;  	struct kfd_dev *device = NULL; @@ -121,7 +121,7 @@ struct kfd_dev *kfd_device_by_kgd(const struct kgd_dev *kgd)  	down_read(&topology_lock);  	list_for_each_entry(top_dev, &topology_device_list, list) -		if (top_dev->gpu && top_dev->gpu->kgd == kgd) { +		if (top_dev->gpu && top_dev->gpu->adev == adev) {  			device = top_dev->gpu;  			break;  		} @@ -503,7 +503,7 @@ static ssize_t node_show(struct kobject *kobj, struct attribute *attr,  	if (dev->gpu) {  		log_max_watch_addr = -			__ilog2_u32(dev->gpu->device_info->num_of_watch_points); +			__ilog2_u32(dev->gpu->device_info.num_of_watch_points);  		if (log_max_watch_addr) {  			dev->node_props.capability |= @@ -515,7 +515,7 @@ static ssize_t node_show(struct kobject *kobj, struct attribute *attr,  				HSA_CAP_WATCH_POINTS_TOTALBITS_MASK);  		} -		if (dev->gpu->device_info->asic_family == CHIP_TONGA) +		if (dev->gpu->adev->asic_type == CHIP_TONGA)  			dev->node_props.capability |=  					HSA_CAP_AQL_QUEUE_DOUBLE_MAP; @@ -531,7 +531,7 @@ static ssize_t node_show(struct kobject *kobj, struct attribute *attr,  		sysfs_show_32bit_prop(buffer, offs, "sdma_fw_version",  				      dev->gpu->sdma_fw_version);  		sysfs_show_64bit_prop(buffer, offs, "unique_id", -				      amdgpu_amdkfd_get_unique_id(dev->gpu->kgd)); +				      dev->gpu->adev->unique_id);  	} @@ -1106,7 +1106,7 @@ static uint32_t kfd_generate_gpu_id(struct kfd_dev *gpu)  	if (!gpu)  		return 0; -	amdgpu_amdkfd_get_local_mem_info(gpu->kgd, &local_mem_info); +	amdgpu_amdkfd_get_local_mem_info(gpu->adev, &local_mem_info);  	local_mem_size = local_mem_info.local_mem_size_private +  			local_mem_info.local_mem_size_public; @@ -1189,7 +1189,7 @@ static void kfd_fill_mem_clk_max_info(struct kfd_topology_device *dev)  	 * for APUs - If CRAT from ACPI reports more than one bank, then  	 *	all the banks will report the same mem_clk_max information  	 */ -	amdgpu_amdkfd_get_local_mem_info(dev->gpu->kgd, &local_mem_info); +	amdgpu_amdkfd_get_local_mem_info(dev->gpu->adev, &local_mem_info);  	list_for_each_entry(mem, &dev->mem_props, list)  		mem->mem_clk_max = local_mem_info.mem_clk_max; @@ -1217,8 +1217,7 @@ static void kfd_set_iolink_no_atomics(struct kfd_topology_device *dev,  	/* set gpu (dev) flags. */  	} else {  		if (!dev->gpu->pci_atomic_requested || -				dev->gpu->device_info->asic_family == -							CHIP_HAWAII) +				dev->gpu->adev->asic_type == CHIP_HAWAII)  			link->flags |= CRAT_IOLINK_FLAGS_NO_ATOMICS_32_BIT |  				CRAT_IOLINK_FLAGS_NO_ATOMICS_64_BIT;  	} @@ -1239,7 +1238,7 @@ static void kfd_set_iolink_non_coherent(struct kfd_topology_device *to_dev,  		 */  		if (inbound_link->iolink_type == CRAT_IOLINK_TYPE_PCIEXPRESS ||  		    (inbound_link->iolink_type == CRAT_IOLINK_TYPE_XGMI && -		    to_dev->gpu->device_info->asic_family == CHIP_VEGA20)) { +		    KFD_GC_VERSION(to_dev->gpu) == IP_VERSION(9, 4, 0))) {  			outbound_link->flags |= CRAT_IOLINK_FLAGS_NON_COHERENT;  			inbound_link->flags |= CRAT_IOLINK_FLAGS_NON_COHERENT;  		} @@ -1286,7 +1285,8 @@ int kfd_topology_add_device(struct kfd_dev *gpu)  	void *crat_image = NULL;  	size_t image_size = 0;  	int proximity_domain; -	struct amdgpu_device *adev; +	int i; +	const char *asic_name = amdgpu_asic_name[gpu->adev->asic_type];  	INIT_LIST_HEAD(&temp_topology_device_list); @@ -1296,10 +1296,8 @@ int kfd_topology_add_device(struct kfd_dev *gpu)  	proximity_domain = atomic_inc_return(&topology_crat_proximity_domain); -	adev = (struct amdgpu_device *)(gpu->kgd); -  	/* Include the CPU in xGMI hive if xGMI connected by assigning it the hive ID. */ -	if (gpu->hive_id && adev->gmc.xgmi.connected_to_cpu) { +	if (gpu->hive_id && gpu->adev->gmc.xgmi.connected_to_cpu) {  		struct kfd_topology_device *top_dev;  		down_read(&topology_lock); @@ -1372,45 +1370,48 @@ int kfd_topology_add_device(struct kfd_dev *gpu)  	 * needed for the topology  	 */ -	amdgpu_amdkfd_get_cu_info(dev->gpu->kgd, &cu_info); +	amdgpu_amdkfd_get_cu_info(dev->gpu->adev, &cu_info); -	strncpy(dev->node_props.name, gpu->device_info->asic_name, -			KFD_TOPOLOGY_PUBLIC_NAME_SIZE); +	for (i = 0; i < KFD_TOPOLOGY_PUBLIC_NAME_SIZE-1; i++) { +		dev->node_props.name[i] = __tolower(asic_name[i]); +		if (asic_name[i] == '\0') +			break; +	} +	dev->node_props.name[i] = '\0';  	dev->node_props.simd_arrays_per_engine =  		cu_info.num_shader_arrays_per_engine; -	dev->node_props.gfx_target_version = gpu->device_info->gfx_target_version; +	dev->node_props.gfx_target_version = gpu->device_info.gfx_target_version;  	dev->node_props.vendor_id = gpu->pdev->vendor;  	dev->node_props.device_id = gpu->pdev->device;  	dev->node_props.capability |= -		((amdgpu_amdkfd_get_asic_rev_id(dev->gpu->kgd) << -			HSA_CAP_ASIC_REVISION_SHIFT) & +		((dev->gpu->adev->rev_id << HSA_CAP_ASIC_REVISION_SHIFT) &  			HSA_CAP_ASIC_REVISION_MASK);  	dev->node_props.location_id = pci_dev_id(gpu->pdev);  	dev->node_props.domain = pci_domain_nr(gpu->pdev->bus);  	dev->node_props.max_engine_clk_fcompute = -		amdgpu_amdkfd_get_max_engine_clock_in_mhz(dev->gpu->kgd); +		amdgpu_amdkfd_get_max_engine_clock_in_mhz(dev->gpu->adev);  	dev->node_props.max_engine_clk_ccompute =  		cpufreq_quick_get_max(0) / 1000;  	dev->node_props.drm_render_minor =  		gpu->shared_resources.drm_render_minor;  	dev->node_props.hive_id = gpu->hive_id; -	dev->node_props.num_sdma_engines = gpu->device_info->num_sdma_engines; +	dev->node_props.num_sdma_engines = kfd_get_num_sdma_engines(gpu);  	dev->node_props.num_sdma_xgmi_engines = -				gpu->device_info->num_xgmi_sdma_engines; +					kfd_get_num_xgmi_sdma_engines(gpu);  	dev->node_props.num_sdma_queues_per_engine = -				gpu->device_info->num_sdma_queues_per_engine; +				gpu->device_info.num_sdma_queues_per_engine;  	dev->node_props.num_gws = (dev->gpu->gws &&  		dev->gpu->dqm->sched_policy != KFD_SCHED_POLICY_NO_HWS) ? -		amdgpu_amdkfd_get_num_gws(dev->gpu->kgd) : 0; +		dev->gpu->adev->gds.gws_size : 0;  	dev->node_props.num_cp_queues = get_cp_queues_num(dev->gpu->dqm);  	kfd_fill_mem_clk_max_info(dev);  	kfd_fill_iolink_non_crat_info(dev); -	switch (dev->gpu->device_info->asic_family) { +	switch (dev->gpu->adev->asic_type) {  	case CHIP_KAVERI:  	case CHIP_HAWAII:  	case CHIP_TONGA: @@ -1429,30 +1430,14 @@ int kfd_topology_add_device(struct kfd_dev *gpu)  			HSA_CAP_DOORBELL_TYPE_TOTALBITS_SHIFT) &  			HSA_CAP_DOORBELL_TYPE_TOTALBITS_MASK);  		break; -	case CHIP_VEGA10: -	case CHIP_VEGA12: -	case CHIP_VEGA20: -	case CHIP_RAVEN: -	case CHIP_RENOIR: -	case CHIP_ARCTURUS: -	case CHIP_ALDEBARAN: -	case CHIP_NAVI10: -	case CHIP_NAVI12: -	case CHIP_NAVI14: -	case CHIP_SIENNA_CICHLID: -	case CHIP_NAVY_FLOUNDER: -	case CHIP_VANGOGH: -	case CHIP_DIMGREY_CAVEFISH: -	case CHIP_BEIGE_GOBY: -	case CHIP_YELLOW_CARP: -	case CHIP_CYAN_SKILLFISH: -		dev->node_props.capability |= ((HSA_CAP_DOORBELL_TYPE_2_0 << -			HSA_CAP_DOORBELL_TYPE_TOTALBITS_SHIFT) & -			HSA_CAP_DOORBELL_TYPE_TOTALBITS_MASK); -		break;  	default: -		WARN(1, "Unexpected ASIC family %u", -		     dev->gpu->device_info->asic_family); +		if (KFD_GC_VERSION(dev->gpu) >= IP_VERSION(9, 0, 1)) +			dev->node_props.capability |= ((HSA_CAP_DOORBELL_TYPE_2_0 << +				HSA_CAP_DOORBELL_TYPE_TOTALBITS_SHIFT) & +				HSA_CAP_DOORBELL_TYPE_TOTALBITS_MASK); +		else +			WARN(1, "Unexpected ASIC family %u", +			     dev->gpu->adev->asic_type);  	}  	/* @@ -1469,7 +1454,7 @@ int kfd_topology_add_device(struct kfd_dev *gpu)  	 *		because it doesn't consider masked out CUs  	 * max_waves_per_simd: Carrizo reports wrong max_waves_per_simd  	 */ -	if (dev->gpu->device_info->asic_family == CHIP_CARRIZO) { +	if (dev->gpu->adev->asic_type == CHIP_CARRIZO) {  		dev->node_props.simd_count =  			cu_info.simd_per_cu * cu_info.cu_active_number;  		dev->node_props.max_waves_per_simd = 10; @@ -1477,16 +1462,17 @@ int kfd_topology_add_device(struct kfd_dev *gpu)  	/* kfd only concerns sram ecc on GFX and HBM ecc on UMC */  	dev->node_props.capability |= -		((adev->ras_enabled & BIT(AMDGPU_RAS_BLOCK__GFX)) != 0) ? +		((dev->gpu->adev->ras_enabled & BIT(AMDGPU_RAS_BLOCK__GFX)) != 0) ?  		HSA_CAP_SRAM_EDCSUPPORTED : 0; -	dev->node_props.capability |= ((adev->ras_enabled & BIT(AMDGPU_RAS_BLOCK__UMC)) != 0) ? +	dev->node_props.capability |= +		((dev->gpu->adev->ras_enabled & BIT(AMDGPU_RAS_BLOCK__UMC)) != 0) ?  		HSA_CAP_MEM_EDCSUPPORTED : 0; -	if (adev->asic_type != CHIP_VEGA10) -		dev->node_props.capability |= (adev->ras_enabled != 0) ? +	if (KFD_GC_VERSION(dev->gpu) != IP_VERSION(9, 0, 1)) +		dev->node_props.capability |= (dev->gpu->adev->ras_enabled != 0) ?  			HSA_CAP_RASEVENTNOTIFY : 0; -	if (KFD_IS_SVM_API_SUPPORTED(adev->kfd.dev)) +	if (KFD_IS_SVM_API_SUPPORTED(dev->gpu->adev->kfd.dev))  		dev->node_props.capability |= HSA_CAP_SVMAPI_SUPPORTED;  	kfd_debug_print_topology(); @@ -1592,7 +1578,7 @@ void kfd_double_confirm_iommu_support(struct kfd_dev *gpu)  	gpu->use_iommu_v2 = false; -	if (!gpu->device_info->needs_iommu_device) +	if (!gpu->device_info.needs_iommu_device)  		return;  	down_read(&topology_lock); diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_topology.h b/drivers/gpu/drm/amd/amdkfd/kfd_topology.h index a8db017c9b8e..f0cc59d2fd5d 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_topology.h +++ b/drivers/gpu/drm/amd/amdkfd/kfd_topology.h @@ -25,38 +25,11 @@  #include <linux/types.h>  #include <linux/list.h> +#include <linux/kfd_sysfs.h>  #include "kfd_crat.h"  #define KFD_TOPOLOGY_PUBLIC_NAME_SIZE 32 -#define HSA_CAP_HOT_PLUGGABLE			0x00000001 -#define HSA_CAP_ATS_PRESENT			0x00000002 -#define HSA_CAP_SHARED_WITH_GRAPHICS		0x00000004 -#define HSA_CAP_QUEUE_SIZE_POW2			0x00000008 -#define HSA_CAP_QUEUE_SIZE_32BIT		0x00000010 -#define HSA_CAP_QUEUE_IDLE_EVENT		0x00000020 -#define HSA_CAP_VA_LIMIT			0x00000040 -#define HSA_CAP_WATCH_POINTS_SUPPORTED		0x00000080 -#define HSA_CAP_WATCH_POINTS_TOTALBITS_MASK	0x00000f00 -#define HSA_CAP_WATCH_POINTS_TOTALBITS_SHIFT	8 -#define HSA_CAP_DOORBELL_TYPE_TOTALBITS_MASK	0x00003000 -#define HSA_CAP_DOORBELL_TYPE_TOTALBITS_SHIFT	12 - -#define HSA_CAP_DOORBELL_TYPE_PRE_1_0		0x0 -#define HSA_CAP_DOORBELL_TYPE_1_0		0x1 -#define HSA_CAP_DOORBELL_TYPE_2_0		0x2 -#define HSA_CAP_AQL_QUEUE_DOUBLE_MAP		0x00004000 - -#define HSA_CAP_RESERVED_WAS_SRAM_EDCSUPPORTED	0x00080000 /* Old buggy user mode depends on this being 0 */ -#define HSA_CAP_MEM_EDCSUPPORTED		0x00100000 -#define HSA_CAP_RASEVENTNOTIFY			0x00200000 -#define HSA_CAP_ASIC_REVISION_MASK		0x03c00000 -#define HSA_CAP_ASIC_REVISION_SHIFT		22 -#define HSA_CAP_SRAM_EDCSUPPORTED		0x04000000 -#define HSA_CAP_SVMAPI_SUPPORTED		0x08000000 -#define HSA_CAP_FLAGS_COHERENTHOSTACCESS	0x10000000 -#define HSA_CAP_RESERVED			0xe00f8000 -  struct kfd_node_properties {  	uint64_t hive_id;  	uint32_t cpu_cores_count; @@ -93,17 +66,6 @@ struct kfd_node_properties {  	char name[KFD_TOPOLOGY_PUBLIC_NAME_SIZE];  }; -#define HSA_MEM_HEAP_TYPE_SYSTEM	0 -#define HSA_MEM_HEAP_TYPE_FB_PUBLIC	1 -#define HSA_MEM_HEAP_TYPE_FB_PRIVATE	2 -#define HSA_MEM_HEAP_TYPE_GPU_GDS	3 -#define HSA_MEM_HEAP_TYPE_GPU_LDS	4 -#define HSA_MEM_HEAP_TYPE_GPU_SCRATCH	5 - -#define HSA_MEM_FLAGS_HOT_PLUGGABLE		0x00000001 -#define HSA_MEM_FLAGS_NON_VOLATILE		0x00000002 -#define HSA_MEM_FLAGS_RESERVED			0xfffffffc -  struct kfd_mem_properties {  	struct list_head	list;  	uint32_t		heap_type; @@ -116,12 +78,6 @@ struct kfd_mem_properties {  	struct attribute	attr;  }; -#define HSA_CACHE_TYPE_DATA		0x00000001 -#define HSA_CACHE_TYPE_INSTRUCTION	0x00000002 -#define HSA_CACHE_TYPE_CPU		0x00000004 -#define HSA_CACHE_TYPE_HSACU		0x00000008 -#define HSA_CACHE_TYPE_RESERVED		0xfffffff0 -  struct kfd_cache_properties {  	struct list_head	list;  	uint32_t		processor_id_low; |