aboutsummaryrefslogtreecommitdiff
path: root/drivers/gpu/drm/amd/amdkfd
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/gpu/drm/amd/amdkfd')
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_chardev.c79
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_device.c275
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c5
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.h2
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_int_process_v9.c4
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue.c1
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_migrate.c153
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager.h3
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_cik.c32
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v10.c19
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v9.c19
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_vi.c35
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_priv.h26
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_process.c109
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c24
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_svm.c192
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_svm.h1
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_topology.c19
18 files changed, 700 insertions, 298 deletions
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
index 86afd37b098d..24ebd61395d8 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
@@ -405,7 +405,7 @@ static int kfd_ioctl_update_queue(struct file *filp, struct kfd_process *p,
mutex_lock(&p->mutex);
- retval = pqm_update_queue(&p->pqm, args->queue_id, &properties);
+ retval = pqm_update_queue_properties(&p->pqm, args->queue_id, &properties);
mutex_unlock(&p->mutex);
@@ -418,7 +418,7 @@ static int kfd_ioctl_set_cu_mask(struct file *filp, struct kfd_process *p,
int retval;
const int max_num_cus = 1024;
struct kfd_ioctl_set_cu_mask_args *args = data;
- struct queue_properties properties;
+ struct mqd_update_info minfo = {0};
uint32_t __user *cu_mask_ptr = (uint32_t __user *)args->cu_mask_ptr;
size_t cu_mask_size = sizeof(uint32_t) * (args->num_cu_mask / 32);
@@ -428,8 +428,8 @@ static int kfd_ioctl_set_cu_mask(struct file *filp, struct kfd_process *p,
return -EINVAL;
}
- properties.cu_mask_count = args->num_cu_mask;
- if (properties.cu_mask_count == 0) {
+ minfo.cu_mask.count = args->num_cu_mask;
+ if (minfo.cu_mask.count == 0) {
pr_debug("CU mask cannot be 0");
return -EINVAL;
}
@@ -438,32 +438,33 @@ static int kfd_ioctl_set_cu_mask(struct file *filp, struct kfd_process *p,
* limit of max_num_cus bits. We can then just drop any CU mask bits
* past max_num_cus bits and just use the first max_num_cus bits.
*/
- if (properties.cu_mask_count > max_num_cus) {
+ if (minfo.cu_mask.count > max_num_cus) {
pr_debug("CU mask cannot be greater than 1024 bits");
- properties.cu_mask_count = max_num_cus;
+ minfo.cu_mask.count = max_num_cus;
cu_mask_size = sizeof(uint32_t) * (max_num_cus/32);
}
- properties.cu_mask = kzalloc(cu_mask_size, GFP_KERNEL);
- if (!properties.cu_mask)
+ minfo.cu_mask.ptr = kzalloc(cu_mask_size, GFP_KERNEL);
+ if (!minfo.cu_mask.ptr)
return -ENOMEM;
- retval = copy_from_user(properties.cu_mask, cu_mask_ptr, cu_mask_size);
+ retval = copy_from_user(minfo.cu_mask.ptr, cu_mask_ptr, cu_mask_size);
if (retval) {
pr_debug("Could not copy CU mask from userspace");
- kfree(properties.cu_mask);
- return -EFAULT;
+ retval = -EFAULT;
+ goto out;
}
+ minfo.update_flag = UPDATE_FLAG_CU_MASK;
+
mutex_lock(&p->mutex);
- retval = pqm_set_cu_mask(&p->pqm, args->queue_id, &properties);
+ retval = pqm_update_mqd(&p->pqm, args->queue_id, &minfo);
mutex_unlock(&p->mutex);
- if (retval)
- kfree(properties.cu_mask);
-
+out:
+ kfree(minfo.cu_mask.ptr);
return retval;
}
@@ -1011,11 +1012,6 @@ static int kfd_ioctl_create_event(struct file *filp, struct kfd_process *p,
void *mem, *kern_addr;
uint64_t size;
- if (p->signal_page) {
- pr_err("Event page is already set\n");
- return -EINVAL;
- }
-
kfd = kfd_device_by_id(GET_GPU_ID(args->event_page_offset));
if (!kfd) {
pr_err("Getting device by id failed in %s\n", __func__);
@@ -1023,6 +1019,13 @@ static int kfd_ioctl_create_event(struct file *filp, struct kfd_process *p,
}
mutex_lock(&p->mutex);
+
+ if (p->signal_page) {
+ pr_err("Event page is already set\n");
+ err = -EINVAL;
+ goto out_unlock;
+ }
+
pdd = kfd_bind_process_to_device(kfd, p);
if (IS_ERR(pdd)) {
err = PTR_ERR(pdd);
@@ -1037,20 +1040,24 @@ static int kfd_ioctl_create_event(struct file *filp, struct kfd_process *p,
err = -EINVAL;
goto out_unlock;
}
- mutex_unlock(&p->mutex);
err = amdgpu_amdkfd_gpuvm_map_gtt_bo_to_kernel(kfd->kgd,
mem, &kern_addr, &size);
if (err) {
pr_err("Failed to map event page to kernel\n");
- return err;
+ goto out_unlock;
}
err = kfd_event_page_set(p, kern_addr, size);
if (err) {
pr_err("Failed to set event page\n");
- return err;
+ amdgpu_amdkfd_gpuvm_unmap_gtt_bo_from_kernel(kfd->kgd, mem);
+ goto out_unlock;
}
+
+ p->signal_handle = args->event_page_offset;
+
+ mutex_unlock(&p->mutex);
}
err = kfd_event_create(filp, p, args->event_type,
@@ -1259,6 +1266,23 @@ static int kfd_ioctl_alloc_memory_of_gpu(struct file *filep,
if (args->size == 0)
return -EINVAL;
+#if IS_ENABLED(CONFIG_HSA_AMD_SVM)
+ /* Flush pending deferred work to avoid racing with deferred actions
+ * from previous memory map changes (e.g. munmap).
+ */
+ svm_range_list_lock_and_flush_work(&p->svms, current->mm);
+ mutex_lock(&p->svms.lock);
+ mmap_write_unlock(current->mm);
+ if (interval_tree_iter_first(&p->svms.objects,
+ args->va_addr >> PAGE_SHIFT,
+ (args->va_addr + args->size - 1) >> PAGE_SHIFT)) {
+ pr_err("Address: 0x%llx already allocated by SVM\n",
+ args->va_addr);
+ mutex_unlock(&p->svms.lock);
+ return -EADDRINUSE;
+ }
+ mutex_unlock(&p->svms.lock);
+#endif
dev = kfd_device_by_id(args->gpu_id);
if (!dev)
return -EINVAL;
@@ -1351,6 +1375,15 @@ static int kfd_ioctl_free_memory_of_gpu(struct file *filep,
return -EINVAL;
mutex_lock(&p->mutex);
+ /*
+ * Safeguard to prevent user space from freeing signal BO.
+ * It will be freed at process termination.
+ */
+ if (p->signal_handle && (p->signal_handle == args->handle)) {
+ pr_err("Free signal BO is not allowed\n");
+ ret = -EPERM;
+ goto err_unlock;
+ }
pdd = kfd_get_process_device_data(dev, p);
if (!pdd) {
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device.c b/drivers/gpu/drm/amd/amdkfd/kfd_device.c
index c2a4d920da40..0fffaf859c59 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_device.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_device.c
@@ -32,6 +32,7 @@
#include "amdgpu_amdkfd.h"
#include "kfd_smi_events.h"
#include "kfd_migrate.h"
+#include "amdgpu.h"
#define MQD_SIZE_ALIGNED 768
@@ -52,41 +53,6 @@ extern const struct kfd2kgd_calls aldebaran_kfd2kgd;
extern const struct kfd2kgd_calls gfx_v10_kfd2kgd;
extern const struct kfd2kgd_calls gfx_v10_3_kfd2kgd;
-static const struct kfd2kgd_calls *kfd2kgd_funcs[] = {
-#ifdef KFD_SUPPORT_IOMMU_V2
-#ifdef CONFIG_DRM_AMDGPU_CIK
- [CHIP_KAVERI] = &gfx_v7_kfd2kgd,
-#endif
- [CHIP_CARRIZO] = &gfx_v8_kfd2kgd,
- [CHIP_RAVEN] = &gfx_v9_kfd2kgd,
-#endif
-#ifdef CONFIG_DRM_AMDGPU_CIK
- [CHIP_HAWAII] = &gfx_v7_kfd2kgd,
-#endif
- [CHIP_TONGA] = &gfx_v8_kfd2kgd,
- [CHIP_FIJI] = &gfx_v8_kfd2kgd,
- [CHIP_POLARIS10] = &gfx_v8_kfd2kgd,
- [CHIP_POLARIS11] = &gfx_v8_kfd2kgd,
- [CHIP_POLARIS12] = &gfx_v8_kfd2kgd,
- [CHIP_VEGAM] = &gfx_v8_kfd2kgd,
- [CHIP_VEGA10] = &gfx_v9_kfd2kgd,
- [CHIP_VEGA12] = &gfx_v9_kfd2kgd,
- [CHIP_VEGA20] = &gfx_v9_kfd2kgd,
- [CHIP_RENOIR] = &gfx_v9_kfd2kgd,
- [CHIP_ARCTURUS] = &arcturus_kfd2kgd,
- [CHIP_ALDEBARAN] = &aldebaran_kfd2kgd,
- [CHIP_NAVI10] = &gfx_v10_kfd2kgd,
- [CHIP_NAVI12] = &gfx_v10_kfd2kgd,
- [CHIP_NAVI14] = &gfx_v10_kfd2kgd,
- [CHIP_SIENNA_CICHLID] = &gfx_v10_3_kfd2kgd,
- [CHIP_NAVY_FLOUNDER] = &gfx_v10_3_kfd2kgd,
- [CHIP_VANGOGH] = &gfx_v10_3_kfd2kgd,
- [CHIP_DIMGREY_CAVEFISH] = &gfx_v10_3_kfd2kgd,
- [CHIP_BEIGE_GOBY] = &gfx_v10_3_kfd2kgd,
- [CHIP_YELLOW_CARP] = &gfx_v10_3_kfd2kgd,
- [CHIP_CYAN_SKILLFISH] = &gfx_v10_kfd2kgd,
-};
-
#ifdef KFD_SUPPORT_IOMMU_V2
static const struct kfd_device_info kaveri_device_info = {
.asic_family = CHIP_KAVERI,
@@ -127,7 +93,6 @@ static const struct kfd_device_info carrizo_device_info = {
.num_xgmi_sdma_engines = 0,
.num_sdma_queues_per_engine = 2,
};
-#endif
static const struct kfd_device_info raven_device_info = {
.asic_family = CHIP_RAVEN,
@@ -147,7 +112,9 @@ static const struct kfd_device_info raven_device_info = {
.num_xgmi_sdma_engines = 0,
.num_sdma_queues_per_engine = 2,
};
+#endif
+#ifdef CONFIG_DRM_AMDGPU_CIK
static const struct kfd_device_info hawaii_device_info = {
.asic_family = CHIP_HAWAII,
.asic_name = "hawaii",
@@ -167,6 +134,7 @@ static const struct kfd_device_info hawaii_device_info = {
.num_xgmi_sdma_engines = 0,
.num_sdma_queues_per_engine = 2,
};
+#endif
static const struct kfd_device_info tonga_device_info = {
.asic_family = CHIP_TONGA,
@@ -653,63 +621,202 @@ static const struct kfd_device_info cyan_skillfish_device_info = {
.num_sdma_queues_per_engine = 8,
};
-/* For each entry, [0] is regular and [1] is virtualisation device. */
-static const struct kfd_device_info *kfd_supported_devices[][2] = {
-#ifdef KFD_SUPPORT_IOMMU_V2
- [CHIP_KAVERI] = {&kaveri_device_info, NULL},
- [CHIP_CARRIZO] = {&carrizo_device_info, NULL},
-#endif
- [CHIP_RAVEN] = {&raven_device_info, NULL},
- [CHIP_HAWAII] = {&hawaii_device_info, NULL},
- [CHIP_TONGA] = {&tonga_device_info, NULL},
- [CHIP_FIJI] = {&fiji_device_info, &fiji_vf_device_info},
- [CHIP_POLARIS10] = {&polaris10_device_info, &polaris10_vf_device_info},
- [CHIP_POLARIS11] = {&polaris11_device_info, NULL},
- [CHIP_POLARIS12] = {&polaris12_device_info, NULL},
- [CHIP_VEGAM] = {&vegam_device_info, NULL},
- [CHIP_VEGA10] = {&vega10_device_info, &vega10_vf_device_info},
- [CHIP_VEGA12] = {&vega12_device_info, NULL},
- [CHIP_VEGA20] = {&vega20_device_info, NULL},
- [CHIP_RENOIR] = {&renoir_device_info, NULL},
- [CHIP_ARCTURUS] = {&arcturus_device_info, &arcturus_device_info},
- [CHIP_ALDEBARAN] = {&aldebaran_device_info, &aldebaran_device_info},
- [CHIP_NAVI10] = {&navi10_device_info, NULL},
- [CHIP_NAVI12] = {&navi12_device_info, &navi12_device_info},
- [CHIP_NAVI14] = {&navi14_device_info, NULL},
- [CHIP_SIENNA_CICHLID] = {&sienna_cichlid_device_info, &sienna_cichlid_device_info},
- [CHIP_NAVY_FLOUNDER] = {&navy_flounder_device_info, &navy_flounder_device_info},
- [CHIP_VANGOGH] = {&vangogh_device_info, NULL},
- [CHIP_DIMGREY_CAVEFISH] = {&dimgrey_cavefish_device_info, &dimgrey_cavefish_device_info},
- [CHIP_BEIGE_GOBY] = {&beige_goby_device_info, &beige_goby_device_info},
- [CHIP_YELLOW_CARP] = {&yellow_carp_device_info, NULL},
- [CHIP_CYAN_SKILLFISH] = {&cyan_skillfish_device_info, NULL},
-};
-
static int kfd_gtt_sa_init(struct kfd_dev *kfd, unsigned int buf_size,
unsigned int chunk_size);
static void kfd_gtt_sa_fini(struct kfd_dev *kfd);
static int kfd_resume(struct kfd_dev *kfd);
-struct kfd_dev *kgd2kfd_probe(struct kgd_dev *kgd,
- struct pci_dev *pdev, unsigned int asic_type, bool vf)
+struct kfd_dev *kgd2kfd_probe(struct kgd_dev *kgd, bool vf)
{
struct kfd_dev *kfd;
const struct kfd_device_info *device_info;
const struct kfd2kgd_calls *f2g;
+ struct amdgpu_device *adev = (struct amdgpu_device *)kgd;
+ struct pci_dev *pdev = adev->pdev;
- if (asic_type >= sizeof(kfd_supported_devices) / (sizeof(void *) * 2)
- || asic_type >= sizeof(kfd2kgd_funcs) / sizeof(void *)) {
- dev_err(kfd_device, "asic_type %d out of range\n", asic_type);
- return NULL; /* asic_type out of range */
+ switch (adev->asic_type) {
+#ifdef KFD_SUPPORT_IOMMU_V2
+#ifdef CONFIG_DRM_AMDGPU_CIK
+ case CHIP_KAVERI:
+ if (vf)
+ device_info = NULL;
+ else
+ device_info = &kaveri_device_info;
+ f2g = &gfx_v7_kfd2kgd;
+ break;
+#endif
+ case CHIP_CARRIZO:
+ if (vf)
+ device_info = NULL;
+ else
+ device_info = &carrizo_device_info;
+ f2g = &gfx_v8_kfd2kgd;
+ break;
+#endif
+#ifdef CONFIG_DRM_AMDGPU_CIK
+ case CHIP_HAWAII:
+ if (vf)
+ device_info = NULL;
+ else
+ device_info = &hawaii_device_info;
+ f2g = &gfx_v7_kfd2kgd;
+ break;
+#endif
+ case CHIP_TONGA:
+ if (vf)
+ device_info = NULL;
+ else
+ device_info = &tonga_device_info;
+ f2g = &gfx_v8_kfd2kgd;
+ break;
+ case CHIP_FIJI:
+ if (vf)
+ device_info = &fiji_vf_device_info;
+ else
+ device_info = &fiji_device_info;
+ f2g = &gfx_v8_kfd2kgd;
+ break;
+ case CHIP_POLARIS10:
+ if (vf)
+ device_info = &polaris10_vf_device_info;
+ else
+ device_info = &polaris10_device_info;
+ f2g = &gfx_v8_kfd2kgd;
+ break;
+ case CHIP_POLARIS11:
+ if (vf)
+ device_info = NULL;
+ else
+ device_info = &polaris11_device_info;
+ f2g = &gfx_v8_kfd2kgd;
+ break;
+ case CHIP_POLARIS12:
+ if (vf)
+ device_info = NULL;
+ else
+ device_info = &polaris12_device_info;
+ f2g = &gfx_v8_kfd2kgd;
+ break;
+ case CHIP_VEGAM:
+ if (vf)
+ device_info = NULL;
+ else
+ device_info = &vegam_device_info;
+ f2g = &gfx_v8_kfd2kgd;
+ break;
+ default:
+ switch (adev->ip_versions[GC_HWIP][0]) {
+ case IP_VERSION(9, 0, 1):
+ if (vf)
+ device_info = &vega10_vf_device_info;
+ else
+ device_info = &vega10_device_info;
+ f2g = &gfx_v9_kfd2kgd;
+ break;
+#ifdef KFD_SUPPORT_IOMMU_V2
+ case IP_VERSION(9, 1, 0):
+ case IP_VERSION(9, 2, 2):
+ if (vf)
+ device_info = NULL;
+ else
+ device_info = &raven_device_info;
+ f2g = &gfx_v9_kfd2kgd;
+ break;
+#endif
+ case IP_VERSION(9, 2, 1):
+ if (vf)
+ device_info = NULL;
+ else
+ device_info = &vega12_device_info;
+ f2g = &gfx_v9_kfd2kgd;
+ break;
+ case IP_VERSION(9, 3, 0):
+ if (vf)
+ device_info = NULL;
+ else
+ device_info = &renoir_device_info;
+ f2g = &gfx_v9_kfd2kgd;
+ break;
+ case IP_VERSION(9, 4, 0):
+ if (vf)
+ device_info = NULL;
+ else
+ device_info = &vega20_device_info;
+ f2g = &gfx_v9_kfd2kgd;
+ break;
+ case IP_VERSION(9, 4, 1):
+ device_info = &arcturus_device_info;
+ f2g = &arcturus_kfd2kgd;
+ break;
+ case IP_VERSION(9, 4, 2):
+ device_info = &aldebaran_device_info;
+ f2g = &aldebaran_kfd2kgd;
+ break;
+ case IP_VERSION(10, 1, 10):
+ if (vf)
+ device_info = NULL;
+ else
+ device_info = &navi10_device_info;
+ f2g = &gfx_v10_kfd2kgd;
+ break;
+ case IP_VERSION(10, 1, 2):
+ device_info = &navi12_device_info;
+ f2g = &gfx_v10_kfd2kgd;
+ break;
+ case IP_VERSION(10, 1, 1):
+ if (vf)
+ device_info = NULL;
+ else
+ device_info = &navi14_device_info;
+ f2g = &gfx_v10_kfd2kgd;
+ break;
+ case IP_VERSION(10, 1, 3):
+ if (vf)
+ device_info = NULL;
+ else
+ device_info = &cyan_skillfish_device_info;
+ f2g = &gfx_v10_kfd2kgd;
+ break;
+ case IP_VERSION(10, 3, 0):
+ device_info = &sienna_cichlid_device_info;
+ f2g = &gfx_v10_3_kfd2kgd;
+ break;
+ case IP_VERSION(10, 3, 2):
+ device_info = &navy_flounder_device_info;
+ f2g = &gfx_v10_3_kfd2kgd;
+ break;
+ case IP_VERSION(10, 3, 1):
+ if (vf)
+ device_info = NULL;
+ else
+ device_info = &vangogh_device_info;
+ f2g = &gfx_v10_3_kfd2kgd;
+ break;
+ case IP_VERSION(10, 3, 4):
+ device_info = &dimgrey_cavefish_device_info;
+ f2g = &gfx_v10_3_kfd2kgd;
+ break;
+ case IP_VERSION(10, 3, 5):
+ device_info = &beige_goby_device_info;
+ f2g = &gfx_v10_3_kfd2kgd;
+ break;
+ case IP_VERSION(10, 3, 3):
+ if (vf)
+ device_info = NULL;
+ else
+ device_info = &yellow_carp_device_info;
+ f2g = &gfx_v10_3_kfd2kgd;
+ break;
+ default:
+ return NULL;
+ }
+ break;
}
- device_info = kfd_supported_devices[asic_type][vf];
- f2g = kfd2kgd_funcs[asic_type];
-
if (!device_info || !f2g) {
dev_err(kfd_device, "%s %s not supported in kfd\n",
- amdgpu_asic_name[asic_type], vf ? "VF" : "");
+ amdgpu_asic_name[adev->asic_type], vf ? "VF" : "");
return NULL;
}
@@ -916,6 +1023,7 @@ bool kgd2kfd_device_init(struct kfd_dev *kfd,
kfd_double_confirm_iommu_support(kfd);
if (kfd_iommu_device_init(kfd)) {
+ kfd->use_iommu_v2 = false;
dev_err(kfd_device, "Error initializing iommuv2\n");
goto device_iommu_error;
}
@@ -924,6 +1032,9 @@ bool kgd2kfd_device_init(struct kfd_dev *kfd,
svm_migrate_init((struct amdgpu_device *)kfd->kgd);
+ if(kgd2kfd_resume_iommu(kfd))
+ goto device_iommu_error;
+
if (kfd_resume(kfd))
goto kfd_resume_error;
@@ -1085,18 +1196,12 @@ static int kfd_resume(struct kfd_dev *kfd)
int err = 0;
err = kfd->dqm->ops.start(kfd->dqm);
- if (err) {
+ if (err)
dev_err(kfd_device,
"Error starting queue manager for device %x:%x\n",
kfd->pdev->vendor, kfd->pdev->device);
- goto dqm_start_error;
- }
return err;
-
-dqm_start_error:
- kfd_iommu_suspend(kfd);
- return err;
}
static inline void kfd_queue_work(struct workqueue_struct *wq,
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
index f8fce9d05f50..533b27b35fc9 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
@@ -557,7 +557,8 @@ static int destroy_queue_nocpsch(struct device_queue_manager *dqm,
return retval;
}
-static int update_queue(struct device_queue_manager *dqm, struct queue *q)
+static int update_queue(struct device_queue_manager *dqm, struct queue *q,
+ struct mqd_update_info *minfo)
{
int retval = 0;
struct mqd_manager *mqd_mgr;
@@ -605,7 +606,7 @@ static int update_queue(struct device_queue_manager *dqm, struct queue *q)
}
}
- mqd_mgr->update_mqd(mqd_mgr, q->mqd, &q->properties);
+ mqd_mgr->update_mqd(mqd_mgr, q->mqd, &q->properties, minfo);
/*
* check active state vs. the previous state and modify
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.h b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.h
index c8719682c4da..499fc0ea387f 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.h
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.h
@@ -93,7 +93,7 @@ struct device_queue_manager_ops {
struct queue *q);
int (*update_queue)(struct device_queue_manager *dqm,
- struct queue *q);
+ struct queue *q, struct mqd_update_info *minfo);
int (*register_process)(struct device_queue_manager *dqm,
struct qcm_process_device *qpd);
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_int_process_v9.c b/drivers/gpu/drm/amd/amdkfd/kfd_int_process_v9.c
index 12d91e53556c..543e7ea75593 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_int_process_v9.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_int_process_v9.c
@@ -231,7 +231,7 @@ static void event_interrupt_wq_v9(struct kfd_dev *dev,
if (sq_intr_err != SQ_INTERRUPT_ERROR_TYPE_ILLEGAL_INST &&
sq_intr_err != SQ_INTERRUPT_ERROR_TYPE_MEMVIOL) {
kfd_signal_poison_consumed_event(dev, pasid);
- amdgpu_amdkfd_gpu_reset(dev->kgd);
+ amdgpu_amdkfd_ras_poison_consumption_handler(dev->kgd);
return;
}
break;
@@ -253,7 +253,7 @@ static void event_interrupt_wq_v9(struct kfd_dev *dev,
kfd_signal_event_interrupt(pasid, context_id0 & 0xfffffff, 28);
} else if (source_id == SOC15_INTSRC_SDMA_ECC) {
kfd_signal_poison_consumed_event(dev, pasid);
- amdgpu_amdkfd_gpu_reset(dev->kgd);
+ amdgpu_amdkfd_ras_poison_consumption_handler(dev->kgd);
return;
}
} else if (client_id == SOC15_IH_CLIENTID_VMC ||
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue.c b/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue.c
index a2b77d1df854..64b4ac339904 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue.c
@@ -136,7 +136,6 @@ static bool kq_initialize(struct kernel_queue *kq, struct kfd_dev *dev,
prop.write_ptr = (uint32_t *) kq->wptr_gpu_addr;
prop.eop_ring_buffer_address = kq->eop_gpu_addr;
prop.eop_ring_buffer_size = PAGE_SIZE;
- prop.cu_mask = NULL;
if (init_queue(&kq->queue, &prop) != 0)
goto err_init_queue;
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_migrate.c b/drivers/gpu/drm/amd/amdkfd/kfd_migrate.c
index 4a16e3c257b9..6d8634e40b3b 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_migrate.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_migrate.c
@@ -20,7 +20,6 @@
* ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
* OTHER DEALINGS IN THE SOFTWARE.
*/
-
#include <linux/types.h>
#include <linux/hmm.h>
#include <linux/dma-direction.h>
@@ -34,6 +33,11 @@
#include "kfd_svm.h"
#include "kfd_migrate.h"
+#ifdef dev_fmt
+#undef dev_fmt
+#endif
+#define dev_fmt(fmt) "kfd_migrate: %s: " fmt, __func__
+
static uint64_t
svm_migrate_direct_mapping_addr(struct amdgpu_device *adev, uint64_t addr)
{
@@ -151,14 +155,14 @@ svm_migrate_copy_memory_gart(struct amdgpu_device *adev, dma_addr_t *sys,
gart_d = svm_migrate_direct_mapping_addr(adev, *vram);
}
if (r) {
- pr_debug("failed %d to create gart mapping\n", r);
+ dev_err(adev->dev, "fail %d create gart mapping\n", r);
goto out_unlock;
}
r = amdgpu_copy_buffer(ring, gart_s, gart_d, size * PAGE_SIZE,
NULL, &next, false, true, false);
if (r) {
- pr_debug("failed %d to copy memory\n", r);
+ dev_err(adev->dev, "fail %d to copy memory\n", r);
goto out_unlock;
}
@@ -264,6 +268,19 @@ static void svm_migrate_put_sys_page(unsigned long addr)
put_page(page);
}
+static unsigned long svm_migrate_successful_pages(struct migrate_vma *migrate)
+{
+ unsigned long cpages = 0;
+ unsigned long i;
+
+ for (i = 0; i < migrate->npages; i++) {
+ if (migrate->src[i] & MIGRATE_PFN_VALID &&
+ migrate->src[i] & MIGRATE_PFN_MIGRATE)
+ cpages++;
+ }
+ return cpages;
+}
+
static int
svm_migrate_copy_to_vram(struct amdgpu_device *adev, struct svm_range *prange,
struct migrate_vma *migrate, struct dma_fence **mfence,
@@ -285,7 +302,7 @@ svm_migrate_copy_to_vram(struct amdgpu_device *adev, struct svm_range *prange,
r = svm_range_vram_node_new(adev, prange, true);
if (r) {
- pr_debug("failed %d get 0x%llx pages from vram\n", r, npages);
+ dev_err(adev->dev, "fail %d to alloc vram\n", r);
goto out;
}
@@ -305,7 +322,7 @@ svm_migrate_copy_to_vram(struct amdgpu_device *adev, struct svm_range *prange,
DMA_TO_DEVICE);
r = dma_mapping_error(dev, src[i]);
if (r) {
- pr_debug("failed %d dma_map_page\n", r);
+ dev_err(adev->dev, "fail %d dma_map_page\n", r);
goto out_free_vram_pages;
}
} else {
@@ -325,8 +342,8 @@ svm_migrate_copy_to_vram(struct amdgpu_device *adev, struct svm_range *prange,
continue;
}
- pr_debug("dma mapping src to 0x%llx, page_to_pfn 0x%lx\n",
- src[i] >> PAGE_SHIFT, page_to_pfn(spage));
+ pr_debug_ratelimited("dma mapping src to 0x%llx, pfn 0x%lx\n",
+ src[i] >> PAGE_SHIFT, page_to_pfn(spage));
if (j >= (cursor.size >> PAGE_SHIFT) - 1 && i < npages - 1) {
r = svm_migrate_copy_memory_gart(adev, src + i - j,
@@ -372,7 +389,7 @@ out:
return r;
}
-static int
+static long
svm_migrate_vma_to_vram(struct amdgpu_device *adev, struct svm_range *prange,
struct vm_area_struct *vma, uint64_t start,
uint64_t end)
@@ -381,6 +398,7 @@ svm_migrate_vma_to_vram(struct amdgpu_device *adev, struct svm_range *prange,
struct kfd_process_device *pdd;
struct dma_fence *mfence = NULL;
struct migrate_vma migrate;
+ unsigned long cpages = 0;
dma_addr_t *scratch;
size_t size;
void *buf;
@@ -405,23 +423,31 @@ svm_migrate_vma_to_vram(struct amdgpu_device *adev, struct svm_range *prange,
r = migrate_vma_setup(&migrate);
if (r) {
- pr_debug("failed %d prepare migrate svms 0x%p [0x%lx 0x%lx]\n",
- r, prange->svms, prange->start, prange->last);
+ dev_err(adev->dev, "vma setup fail %d range [0x%lx 0x%lx]\n", r,
+ prange->start, prange->last);
goto out_free;
}
- if (migrate.cpages != npages) {
- pr_debug("Partial migration. 0x%lx/0x%llx pages can be migrated\n",
- migrate.cpages,
- npages);
- }
- if (migrate.cpages) {
- r = svm_migrate_copy_to_vram(adev, prange, &migrate, &mfence,
- scratch);
- migrate_vma_pages(&migrate);
- svm_migrate_copy_done(adev, mfence);
- migrate_vma_finalize(&migrate);
+ cpages = migrate.cpages;
+ if (!cpages) {
+ pr_debug("failed collect migrate sys pages [0x%lx 0x%lx]\n",
+ prange->start, prange->last);
+ goto out_free;
}
+ if (cpages != npages)
+ pr_debug("partial migration, 0x%lx/0x%llx pages migrated\n",
+ cpages, npages);
+ else
+ pr_debug("0x%lx pages migrated\n", cpages);
+
+ r = svm_migrate_copy_to_vram(adev, prange, &migrate, &mfence, scratch);
+ migrate_vma_pages(&migrate);
+
+ pr_debug("successful/cpages/npages 0x%lx/0x%lx/0x%lx\n",
+ svm_migrate_successful_pages(&migrate), cpages, migrate.npages);
+
+ svm_migrate_copy_done(adev, mfence);
+ migrate_vma_finalize(&migrate);
svm_range_dma_unmap(adev->dev, scratch, 0, npages);
svm_range_free_dma_mappings(prange);
@@ -429,12 +455,13 @@ svm_migrate_vma_to_vram(struct amdgpu_device *adev, struct svm_range *prange,
out_free:
kvfree(buf);
out:
- if (!r) {
+ if (!r && cpages) {
pdd = svm_range_get_pdd_by_adev(prange, adev);
if (pdd)
- WRITE_ONCE(pdd->page_in, pdd->page_in + migrate.cpages);
- }
+ WRITE_ONCE(pdd->page_in, pdd->page_in + cpages);
+ return cpages;
+ }
return r;
}
@@ -456,7 +483,8 @@ svm_migrate_ram_to_vram(struct svm_range *prange, uint32_t best_loc,
unsigned long addr, start, end;
struct vm_area_struct *vma;
struct amdgpu_device *adev;
- int r = 0;
+ unsigned long cpages = 0;
+ long r = 0;
if (prange->actual_loc == best_loc) {
pr_debug("svms 0x%p [0x%lx 0x%lx] already on best_loc 0x%x\n",
@@ -488,17 +516,19 @@ svm_migrate_ram_to_vram(struct svm_range *prange, uint32_t best_loc,
next = min(vma->vm_end, end);
r = svm_migrate_vma_to_vram(adev, prange, vma, addr, next);
- if (r) {
- pr_debug("failed to migrate\n");
+ if (r < 0) {
+ pr_debug("failed %ld to migrate\n", r);
break;
+ } else {
+ cpages += r;
}
addr = next;
}
- if (!r)
+ if (cpages)
prange->actual_loc = best_loc;
- return r;
+ return r < 0 ? r : 0;
}
static void svm_migrate_page_free(struct page *page)
@@ -506,7 +536,7 @@ static void svm_migrate_page_free(struct page *page)
struct svm_range_bo *svm_bo = page->zone_device_data;
if (svm_bo) {
- pr_debug("svm_bo ref left: %d\n", kref_read(&svm_bo->kref));
+ pr_debug_ratelimited("ref: %d\n", kref_read(&svm_bo->kref));
svm_range_bo_unref(svm_bo);
}
}
@@ -572,12 +602,12 @@ svm_migrate_copy_to_ram(struct amdgpu_device *adev, struct svm_range *prange,
dst[i] = dma_map_page(dev, dpage, 0, PAGE_SIZE, DMA_FROM_DEVICE);
r = dma_mapping_error(dev, dst[i]);
if (r) {
- pr_debug("failed %d dma_map_page\n", r);
+ dev_err(adev->dev, "fail %d dma_map_page\n", r);
goto out_oom;
}
- pr_debug("dma mapping dst to 0x%llx, page_to_pfn 0x%lx\n",
- dst[i] >> PAGE_SHIFT, page_to_pfn(dpage));
+ pr_debug_ratelimited("dma mapping dst to 0x%llx, pfn 0x%lx\n",
+ dst[i] >> PAGE_SHIFT, page_to_pfn(dpage));
migrate->dst[i] = migrate_pfn(page_to_pfn(dpage));
migrate->dst[i] |= MIGRATE_PFN_LOCKED;
@@ -599,7 +629,7 @@ out_oom:
return r;
}
-static int
+static long
svm_migrate_vma_to_ram(struct amdgpu_device *adev, struct svm_range *prange,
struct vm_area_struct *vma, uint64_t start, uint64_t end)
{
@@ -607,6 +637,7 @@ svm_migrate_vma_to_ram(struct amdgpu_device *adev, struct svm_range *prange,
struct kfd_process_device *pdd;
struct dma_fence *mfence = NULL;
struct migrate_vma migrate;
+ unsigned long cpages = 0;
dma_addr_t *scratch;
size_t size;
void *buf;
@@ -631,34 +662,43 @@ svm_migrate_vma_to_ram(struct amdgpu_device *adev, struct svm_range *prange,
r = migrate_vma_setup(&migrate);
if (r) {
- pr_debug("failed %d prepare migrate svms 0x%p [0x%lx 0x%lx]\n",
- r, prange->svms, prange->start, prange->last);
+ dev_err(adev->dev, "vma setup fail %d range [0x%lx 0x%lx]\n", r,
+ prange->start, prange->last);
goto out_free;
}
- pr_debug("cpages %ld\n", migrate.cpages);
-
- if (migrate.cpages) {
- r = svm_migrate_copy_to_ram(adev, prange, &migrate, &mfence,
- scratch, npages);
- migrate_vma_pages(&migrate);
- svm_migrate_copy_done(adev, mfence);
- migrate_vma_finalize(&migrate);
- } else {
+ cpages = migrate.cpages;
+ if (!cpages) {
pr_debug("failed collect migrate device pages [0x%lx 0x%lx]\n",
prange->start, prange->last);
+ goto out_free;
}
+ if (cpages != npages)
+ pr_debug("partial migration, 0x%lx/0x%llx pages migrated\n",
+ cpages, npages);
+ else
+ pr_debug("0x%lx pages migrated\n", cpages);
+ r = svm_migrate_copy_to_ram(adev, prange, &migrate, &mfence,
+ scratch, npages);
+ migrate_vma_pages(&migrate);
+
+ pr_debug("successful/cpages/npages 0x%lx/0x%lx/0x%lx\n",
+ svm_migrate_successful_pages(&migrate), cpages, migrate.npages);
+
+ svm_migrate_copy_done(adev, mfence);
+ migrate_vma_finalize(&migrate);
svm_range_dma_unmap(adev->dev, scratch, 0, npages);
out_free:
kvfree(buf);
out:
- if (!r) {
+ if (!r && cpages) {
pdd = svm_range_get_pdd_by_adev(prange, adev);
if (pdd)
- WRITE_ONCE(pdd->page_out,
- pdd->page_out + migrate.cpages);
+ WRITE_ONCE(pdd->page_out, pdd->page_out + cpages);
+
+ return cpages;
}
return r;
}
@@ -680,7 +720,8 @@ int svm_migrate_vram_to_ram(struct svm_range *prange, struct mm_struct *mm)
unsigned long addr;
unsigned long start;
unsigned long end;
- int r = 0;
+ unsigned long cpages = 0;
+ long r = 0;
if (!prange->actual_loc) {
pr_debug("[0x%lx 0x%lx] already migrated to ram\n",
@@ -711,18 +752,21 @@ int svm_migrate_vram_to_ram(struct svm_range *prange, struct mm_struct *mm)
next = min(vma->vm_end, end);
r = svm_migrate_vma_to_ram(adev, prange, vma, addr, next);
- if (r) {
- pr_debug("failed %d to migrate\n", r);
+ if (r < 0) {
+ pr_debug("failed %ld to migrate\n", r);
break;
+ } else {
+ cpages += r;
}
addr = next;
}
- if (!r) {
+ if (cpages) {
svm_range_vram_node_free(prange);
prange->actual_loc = 0;
}
- return r;
+
+ return r < 0 ? r : 0;
}
/**
@@ -901,8 +945,7 @@ int svm_migrate_init(struct amdgpu_device *adev)
/* Disable SVM support capability */
pgmap->type = 0;
- devm_release_mem_region(adev->dev, res->start,
- res->end - res->start + 1);
+ devm_release_mem_region(adev->dev, res->start, resource_size(res));
return PTR_ERR(r);
}
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager.h b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager.h
index 6e6918ccedfd..965e17c5dbb4 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager.h
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager.h
@@ -80,7 +80,8 @@ struct mqd_manager {
struct mm_struct *mms);
void (*update_mqd)(struct mqd_manager *mm, void *mqd,
- struct queue_properties *q);
+ struct queue_properties *q,
+ struct mqd_update_info *minfo);
int (*destroy_mqd)(struct mqd_manager *mm, void *mqd,
enum kfd_preempt_type type,
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_cik.c b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_cik.c
index 064914e1e8d6..8128f4d312f1 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_cik.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_cik.c
@@ -42,16 +42,17 @@ static inline struct cik_sdma_rlc_registers *get_sdma_mqd(void *mqd)
}
static void update_cu_mask(struct mqd_manager *mm, void *mqd,
- struct queue_properties *q)
+ struct mqd_update_info *minfo)
{
struct cik_mqd *m;
uint32_t se_mask[4] = {0}; /* 4 is the max # of SEs */
- if (q->cu_mask_count == 0)
+ if (!minfo || (minfo->update_flag != UPDATE_FLAG_CU_MASK) ||
+ !minfo->cu_mask.ptr)
return;
mqd_symmetrically_map_cu_mask(mm,
- q->cu_mask, q->cu_mask_count, se_mask);
+ minfo->cu_mask.ptr, minfo->cu_mask.count, se_mask);
m = get_mqd(mqd);
m->compute_static_thread_mgmt_se0 = se_mask[0];
@@ -135,7 +136,7 @@ static void init_mqd(struct mqd_manager *mm, void **mqd,
*mqd = m;
if (gart_addr)
*gart_addr = addr;
- mm->update_mqd(mm, m, q);
+ mm->update_mqd(mm, m, q, NULL);
}
static void init_mqd_sdma(struct mqd_manager *mm, void **mqd,
@@ -152,7 +153,7 @@ static void init_mqd_sdma(struct mqd_manager *mm, void **mqd,
if (gart_addr)
*gart_addr = mqd_mem_obj->gpu_addr;
- mm->update_mqd(mm, m, q);
+ mm->update_mqd(mm, m, q, NULL);
}
static void free_mqd(struct mqd_manager *mm, void *mqd,
@@ -185,7 +186,8 @@ static int load_mqd_sdma(struct mqd_manager *mm, void *mqd,
}
static void __update_mqd(struct mqd_manager *mm, void *mqd,
- struct queue_properties *q, unsigned int atc_bit)
+ struct queue_properties *q, struct mqd_update_info *minfo,
+ unsigned int atc_bit)
{
struct cik_mqd *m;
@@ -214,16 +216,17 @@ static void __update_mqd(struct mqd_manager *mm, void *mqd,
if (q->format == KFD_QUEUE_FORMAT_AQL)
m->cp_hqd_pq_control |= NO_UPDATE_RPTR;
- update_cu_mask(mm, mqd, q);
+ update_cu_mask(mm, mqd, minfo);
set_priority(m, q);
q->is_active = QUEUE_IS_ACTIVE(*q);
}
static void update_mqd(struct mqd_manager *mm, void *mqd,
- struct queue_properties *q)
+ struct queue_properties *q,
+ struct mqd_update_info *minfo)
{
- __update_mqd(mm, mqd, q, 1);
+ __update_mqd(mm, mqd, q, minfo, 1);
}
static uint32_t read_doorbell_id(void *mqd)
@@ -234,13 +237,15 @@ static uint32_t read_doorbell_id(void *mqd)
}
static void update_mqd_hawaii(struct mqd_manager *mm, void *mqd,
- struct queue_properties *q)
+ struct queue_properties *q,
+ struct mqd_update_info *minfo)
{
- __update_mqd(mm, mqd, q, 0);
+ __update_mqd(mm, mqd, q, minfo, 0);
}
static void update_mqd_sdma(struct mqd_manager *mm, void *mqd,
- struct queue_properties *q)
+ struct queue_properties *q,
+ struct mqd_update_info *minfo)
{
struct cik_sdma_rlc_registers *m;
@@ -318,7 +323,8 @@ static void init_mqd_hiq(struct mqd_manager *mm, void **mqd,
}
static void update_mqd_hiq(struct mqd_manager *mm, void *mqd,
- struct queue_properties *q)
+ struct queue_properties *q,
+ struct mqd_update_info *minfo)
{
struct cik_mqd *m;
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v10.c b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v10.c
index c7fb59ca597f..270160fc401b 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v10.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v10.c
@@ -42,16 +42,17 @@ static inline struct v10_sdma_mqd *get_sdma_mqd(void *mqd)
}
static void update_cu_mask(struct mqd_manager *mm, void *mqd,
- struct queue_properties *q)
+ struct mqd_update_info *minfo)
{
struct v10_compute_mqd *m;
uint32_t se_mask[4] = {0}; /* 4 is the max # of SEs */
- if (q->cu_mask_count == 0)
+ if (!minfo || (minfo->update_flag != UPDATE_FLAG_CU_MASK) ||
+ !minfo->cu_mask.ptr)
return;
mqd_symmetrically_map_cu_mask(mm,
- q->cu_mask, q->cu_mask_count, se_mask);
+ minfo->cu_mask.ptr, minfo->cu_mask.count, se_mask);
m = get_mqd(mqd);
m->compute_static_thread_mgmt_se0 = se_mask[0];
@@ -136,7 +137,7 @@ static void init_mqd(struct mqd_manager *mm, void **mqd,
*mqd = m;
if (gart_addr)
*gart_addr = addr;
- mm->update_mqd(mm, m, q);
+ mm->update_mqd(mm, m, q, NULL);
}
static int load_mqd(struct mqd_manager *mm, void *mqd,
@@ -162,7 +163,8 @@ static int hiq_load_mqd_kiq(struct mqd_manager *mm, void *mqd,
}
static void update_mqd(struct mqd_manager *mm, void *mqd,
- struct queue_properties *q)
+ struct queue_properties *q,
+ struct mqd_update_info *minfo)
{
struct v10_compute_mqd *m;
@@ -218,7 +220,7 @@ static void update_mqd(struct mqd_manager *mm, void *mqd,
if (mm->dev->cwsr_enabled)
m->cp_hqd_ctx_save_control = 0;
- update_cu_mask(mm, mqd, q);
+ update_cu_mask(mm, mqd, minfo);
set_priority(m, q);
q->is_active = QUEUE_IS_ACTIVE(*q);
@@ -311,7 +313,7 @@ static void init_mqd_sdma(struct mqd_manager *mm, void **mqd,
if (gart_addr)
*gart_addr = mqd_mem_obj->gpu_addr;
- mm->update_mqd(mm, m, q);
+ mm->update_mqd(mm, m, q, NULL);
}
static int load_mqd_sdma(struct mqd_manager *mm, void *mqd,
@@ -326,7 +328,8 @@ static int load_mqd_sdma(struct mqd_manager *mm, void *mqd,
#define SDMA_RLC_DUMMY_DEFAULT 0xf
static void update_mqd_sdma(struct mqd_manager *mm, void *mqd,
- struct queue_properties *q)
+ struct queue_properties *q,
+ struct mqd_update_info *minfo)
{
struct v10_sdma_mqd *m;
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v9.c b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v9.c
index 7f4e102ff4bd..4e5932f54b5a 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v9.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v9.c
@@ -43,16 +43,17 @@ static inline struct v9_sdma_mqd *get_sdma_mqd(void *mqd)
}
static void update_cu_mask(struct mqd_manager *mm, void *mqd,
- struct queue_properties *q)
+ struct mqd_update_info *minfo)
{
struct v9_mqd *m;
uint32_t se_mask[KFD_MAX_NUM_SE] = {0};
- if (q->cu_mask_count == 0)
+ if (!minfo || (minfo->update_flag != UPDATE_FLAG_CU_MASK) ||
+ !minfo->cu_mask.ptr)
return;
mqd_symmetrically_map_cu_mask(mm,
- q->cu_mask, q->cu_mask_count, se_mask);
+ minfo->cu_mask.ptr, minfo->cu_mask.count, se_mask);
m = get_mqd(mqd);
m->compute_static_thread_mgmt_se0 = se_mask[0];
@@ -188,7 +189,7 @@ static void init_mqd(struct mqd_manager *mm, void **mqd,
*mqd = m;
if (gart_addr)
*gart_addr = addr;
- mm->update_mqd(mm, m, q);
+ mm->update_mqd(mm, m, q, NULL);
}
static int load_mqd(struct mqd_manager *mm, void *mqd,
@@ -212,7 +213,8 @@ static int hiq_load_mqd_kiq(struct mqd_manager *mm, void *mqd,
}
static void update_mqd(struct mqd_manager *mm, void *mqd,
- struct queue_properties *q)
+ struct queue_properties *q,
+ struct mqd_update_info *minfo)
{
struct v9_mqd *m;
@@ -269,7 +271,7 @@ static void update_mqd(struct mqd_manager *mm, void *mqd,
if (mm->dev->cwsr_enabled && q->ctx_save_restore_area_address)
m->cp_hqd_ctx_save_control = 0;
- update_cu_mask(mm, mqd, q);
+ update_cu_mask(mm, mqd, minfo);
set_priority(m, q);
q->is_active = QUEUE_IS_ACTIVE(*q);
@@ -366,7 +368,7 @@ static void init_mqd_sdma(struct mqd_manager *mm, void **mqd,
if (gart_addr)
*gart_addr = mqd_mem_obj->gpu_addr;
- mm->update_mqd(mm, m, q);
+ mm->update_mqd(mm, m, q, NULL);
}
static int load_mqd_sdma(struct mqd_manager *mm, void *mqd,
@@ -381,7 +383,8 @@ static int load_mqd_sdma(struct mqd_manager *mm, void *mqd,
#define SDMA_RLC_DUMMY_DEFAULT 0xf
static void update_mqd_sdma(struct mqd_manager *mm, void *mqd,
- struct queue_properties *q)
+ struct queue_properties *q,
+ struct mqd_update_info *minfo)
{
struct v9_sdma_mqd *m;
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_vi.c b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_vi.c
index 33dbd22d290f..cd9220eb8a7a 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_vi.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_vi.c
@@ -45,16 +45,17 @@ static inline struct vi_sdma_mqd *get_sdma_mqd(void *mqd)
}
static void update_cu_mask(struct mqd_manager *mm, void *mqd,
- struct queue_properties *q)
+ struct mqd_update_info *minfo)
{
struct vi_mqd *m;
uint32_t se_mask[4] = {0}; /* 4 is the max # of SEs */
- if (q->cu_mask_count == 0)
+ if (!minfo || (minfo->update_flag != UPDATE_FLAG_CU_MASK) ||
+ !minfo->cu_mask.ptr)
return;
mqd_symmetrically_map_cu_mask(mm,
- q->cu_mask, q->cu_mask_count, se_mask);
+ minfo->cu_mask.ptr, minfo->cu_mask.count, se_mask);
m = get_mqd(mqd);
m->compute_static_thread_mgmt_se0 = se_mask[0];
@@ -150,7 +151,7 @@ static void init_mqd(struct mqd_manager *mm, void **mqd,
*mqd = m;
if (gart_addr)
*gart_addr = addr;
- mm->update_mqd(mm, m, q);
+ mm->update_mqd(mm, m, q, NULL);
}
static int load_mqd(struct mqd_manager *mm, void *mqd,
@@ -167,8 +168,8 @@ static int load_mqd(struct mqd_manager *mm, void *mqd,
}
static void __update_mqd(struct mqd_manager *mm, void *mqd,
- struct queue_properties *q, unsigned int mtype,
- unsigned int atc_bit)
+ struct queue_properties *q, struct mqd_update_info *minfo,
+ unsigned int mtype, unsigned int atc_bit)
{
struct vi_mqd *m;
@@ -230,7 +231,7 @@ static void __update_mqd(struct mqd_manager *mm, void *mqd,
atc_bit << CP_HQD_CTX_SAVE_CONTROL__ATC__SHIFT |
mtype << CP_HQD_CTX_SAVE_CONTROL__MTYPE__SHIFT;
- update_cu_mask(mm, mqd, q);
+ update_cu_mask(mm, mqd, minfo);
set_priority(m, q);
q->is_active = QUEUE_IS_ACTIVE(*q);
@@ -238,9 +239,10 @@ static void __update_mqd(struct mqd_manager *mm, void *mqd,
static void update_mqd(struct mqd_manager *mm, void *mqd,
- struct queue_properties *q)
+ struct queue_properties *q,
+ struct mqd_update_info *minfo)
{
- __update_mqd(mm, mqd, q, MTYPE_CC, 1);
+ __update_mqd(mm, mqd, q, minfo, MTYPE_CC, 1);
}
static uint32_t read_doorbell_id(void *mqd)
@@ -251,9 +253,10 @@ static uint32_t read_doorbell_id(void *mqd)
}
static void update_mqd_tonga(struct mqd_manager *mm, void *mqd,
- struct queue_properties *q)
+ struct queue_properties *q,
+ struct mqd_update_info *minfo)
{
- __update_mqd(mm, mqd, q, MTYPE_UC, 0);
+ __update_mqd(mm, mqd, q, minfo, MTYPE_UC, 0);
}
static int destroy_mqd(struct mqd_manager *mm, void *mqd,
@@ -317,9 +320,10 @@ static void init_mqd_hiq(struct mqd_manager *mm, void **mqd,
}
static void update_mqd_hiq(struct mqd_manager *mm, void *mqd,
- struct queue_properties *q)
+ struct queue_properties *q,
+ struct mqd_update_info *minfo)
{
- __update_mqd(mm, mqd, q, MTYPE_UC, 0);
+ __update_mqd(mm, mqd, q, minfo, MTYPE_UC, 0);
}
static void init_mqd_sdma(struct mqd_manager *mm, void **mqd,
@@ -336,7 +340,7 @@ static void init_mqd_sdma(struct mqd_manager *mm, void **mqd,
if (gart_addr)
*gart_addr = mqd_mem_obj->gpu_addr;
- mm->update_mqd(mm, m, q);
+ mm->update_mqd(mm, m, q, NULL);
}
static int load_mqd_sdma(struct mqd_manager *mm, void *mqd,
@@ -349,7 +353,8 @@ static int load_mqd_sdma(struct mqd_manager *mm, void *mqd,
}
static void update_mqd_sdma(struct mqd_manager *mm, void *mqd,
- struct queue_properties *q)
+ struct queue_properties *q,
+ struct mqd_update_info *minfo)
{
struct vi_sdma_mqd *m;
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
index 6d8f9bb2d905..4104b167e721 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
@@ -472,9 +472,6 @@ struct queue_properties {
uint32_t ctl_stack_size;
uint64_t tba_addr;
uint64_t tma_addr;
- /* Relevant for CU */
- uint32_t cu_mask_count; /* Must be a multiple of 32 */
- uint32_t *cu_mask;
};
#define QUEUE_IS_ACTIVE(q) ((q).queue_size > 0 && \
@@ -482,6 +479,20 @@ struct queue_properties {
(q).queue_percent > 0 && \
!(q).is_evicted)
+enum mqd_update_flag {
+ UPDATE_FLAG_CU_MASK = 0,
+};
+
+struct mqd_update_info {
+ union {
+ struct {
+ uint32_t count; /* Must be a multiple of 32 */
+ uint32_t *ptr;
+ } cu_mask;
+ };
+ enum mqd_update_flag update_flag;
+};
+
/**
* struct queue
*
@@ -608,12 +619,14 @@ struct qcm_process_device {
uint32_t sh_hidden_private_base;
/* CWSR memory */
+ struct kgd_mem *cwsr_mem;
void *cwsr_kaddr;
uint64_t cwsr_base;
uint64_t tba_addr;
uint64_t tma_addr;
/* IB memory */
+ struct kgd_mem *ib_mem;
uint64_t ib_base;
void *ib_kaddr;
@@ -808,6 +821,7 @@ struct kfd_process {
/* Event ID allocator and lookup */
struct idr event_idr;
/* Event page */
+ u64 signal_handle;
struct kfd_signal_page *signal_page;
size_t signal_mapped_size;
size_t signal_event_count;
@@ -1031,10 +1045,10 @@ int pqm_create_queue(struct process_queue_manager *pqm,
unsigned int *qid,
uint32_t *p_doorbell_offset_in_process);
int pqm_destroy_queue(struct process_queue_manager *pqm, unsigned int qid);
-int pqm_update_queue(struct process_queue_manager *pqm, unsigned int qid,
- struct queue_properties *p);
-int pqm_set_cu_mask(struct process_queue_manager *pqm, unsigned int qid,
+int pqm_update_queue_properties(struct process_queue_manager *pqm, unsigned int qid,
struct queue_properties *p);
+int pqm_update_mqd(struct process_queue_manager *pqm, unsigned int qid,
+ struct mqd_update_info *minfo);
int pqm_set_gws(struct process_queue_manager *pqm, unsigned int qid,
void *gws);
struct kernel_queue *pqm_get_kernel_queue(struct process_queue_manager *pqm,
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_process.c b/drivers/gpu/drm/amd/amdkfd/kfd_process.c
index 21ec8a18cad2..457863861d6f 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_process.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_process.c
@@ -72,6 +72,8 @@ static int kfd_process_init_cwsr_apu(struct kfd_process *p, struct file *filep);
static void evict_process_worker(struct work_struct *work);
static void restore_process_worker(struct work_struct *work);
+static void kfd_process_device_destroy_cwsr_dgpu(struct kfd_process_device *pdd);
+
struct kfd_procfs_tree {
struct kobject *kobj;
};
@@ -685,10 +687,15 @@ void kfd_process_destroy_wq(void)
}
static void kfd_process_free_gpuvm(struct kgd_mem *mem,
- struct kfd_process_device *pdd)
+ struct kfd_process_device *pdd, void *kptr)
{
struct kfd_dev *dev = pdd->dev;
+ if (kptr) {
+ amdgpu_amdkfd_gpuvm_unmap_gtt_bo_from_kernel(dev->kgd, mem);
+ kptr = NULL;
+ }
+
amdgpu_amdkfd_gpuvm_unmap_memory_from_gpu(dev->kgd, mem, pdd->drm_priv);
amdgpu_amdkfd_gpuvm_free_memory_of_gpu(dev->kgd, mem, pdd->drm_priv,
NULL);
@@ -702,63 +709,46 @@ static void kfd_process_free_gpuvm(struct kgd_mem *mem,
*/
static int kfd_process_alloc_gpuvm(struct kfd_process_device *pdd,
uint64_t gpu_va, uint32_t size,
- uint32_t flags, void **kptr)
+ uint32_t flags, struct kgd_mem **mem, void **kptr)
{
struct kfd_dev *kdev = pdd->dev;
- struct kgd_mem *mem = NULL;
- int handle;
int err;
err = amdgpu_amdkfd_gpuvm_alloc_memory_of_gpu(kdev->kgd, gpu_va, size,
- pdd->drm_priv, &mem, NULL, flags);
+ pdd->drm_priv, mem, NULL, flags);
if (err)
goto err_alloc_mem;
- err = amdgpu_amdkfd_gpuvm_map_memory_to_gpu(kdev->kgd, mem,
+ err = amdgpu_amdkfd_gpuvm_map_memory_to_gpu(kdev->kgd, *mem,
pdd->drm_priv, NULL);
if (err)
goto err_map_mem;
- err = amdgpu_amdkfd_gpuvm_sync_memory(kdev->kgd, mem, true);
+ err = amdgpu_amdkfd_gpuvm_sync_memory(kdev->kgd, *mem, true);
if (err) {
pr_debug("Sync memory failed, wait interrupted by user signal\n");
goto sync_memory_failed;
}
- /* Create an obj handle so kfd_process_device_remove_obj_handle
- * will take care of the bo removal when the process finishes.
- * We do not need to take p->mutex, because the process is just
- * created and the ioctls have not had the chance to run.
- */
- handle = kfd_process_device_create_obj_handle(pdd, mem);
-
- if (handle < 0) {
- err = handle;
- goto free_gpuvm;
- }
-
if (kptr) {
err = amdgpu_amdkfd_gpuvm_map_gtt_bo_to_kernel(kdev->kgd,
- (struct kgd_mem *)mem, kptr, NULL);
+ (struct kgd_mem *)*mem, kptr, NULL);
if (err) {
pr_debug("Map GTT BO to kernel failed\n");
- goto free_obj_handle;
+ goto sync_memory_failed;
}
}
return err;
-free_obj_handle:
- kfd_process_device_remove_obj_handle(pdd, handle);
-free_gpuvm:
sync_memory_failed:
- kfd_process_free_gpuvm(mem, pdd);
- return err;
+ amdgpu_amdkfd_gpuvm_unmap_memory_from_gpu(kdev->kgd, *mem, pdd->drm_priv);
err_map_mem:
- amdgpu_amdkfd_gpuvm_free_memory_of_gpu(kdev->kgd, mem, pdd->drm_priv,
+ amdgpu_amdkfd_gpuvm_free_memory_of_gpu(kdev->kgd, *mem, pdd->drm_priv,
NULL);
err_alloc_mem:
+ *mem = NULL;
*kptr = NULL;
return err;
}
@@ -776,6 +766,7 @@ static int kfd_process_device_reserve_ib_mem(struct kfd_process_device *pdd)
KFD_IOC_ALLOC_MEM_FLAGS_NO_SUBSTITUTE |
KFD_IOC_ALLOC_MEM_FLAGS_WRITABLE |
KFD_IOC_ALLOC_MEM_FLAGS_EXECUTABLE;
+ struct kgd_mem *mem;
void *kaddr;
int ret;
@@ -784,15 +775,26 @@ static int kfd_process_device_reserve_ib_mem(struct kfd_process_device *pdd)
/* ib_base is only set for dGPU */
ret = kfd_process_alloc_gpuvm(pdd, qpd->ib_base, PAGE_SIZE, flags,
- &kaddr);
+ &mem, &kaddr);
if (ret)
return ret;
+ qpd->ib_mem = mem;
qpd->ib_kaddr = kaddr;
return 0;
}
+static void kfd_process_device_destroy_ib_mem(struct kfd_process_device *pdd)
+{
+ struct qcm_process_device *qpd = &pdd->qpd;
+
+ if (!qpd->ib_kaddr || !qpd->ib_base)
+ return;
+
+ kfd_process_free_gpuvm(qpd->ib_mem, pdd, qpd->ib_kaddr);
+}
+
struct kfd_process *kfd_create_process(struct file *filep)
{
struct kfd_process *process;
@@ -947,6 +949,37 @@ static void kfd_process_device_free_bos(struct kfd_process_device *pdd)
}
}
+/*
+ * Just kunmap and unpin signal BO here. It will be freed in
+ * kfd_process_free_outstanding_kfd_bos()
+ */
+static void kfd_process_kunmap_signal_bo(struct kfd_process *p)
+{
+ struct kfd_process_device *pdd;
+ struct kfd_dev *kdev;
+ void *mem;
+
+ kdev = kfd_device_by_id(GET_GPU_ID(p->signal_handle));
+ if (!kdev)
+ return;
+
+ mutex_lock(&p->mutex);
+
+ pdd = kfd_get_process_device_data(kdev, p);
+ if (!pdd)
+ goto out;
+
+ mem = kfd_process_device_translate_handle(
+ pdd, GET_IDR_HANDLE(p->signal_handle));
+ if (!mem)
+ goto out;
+
+ amdgpu_amdkfd_gpuvm_unmap_gtt_bo_from_kernel(kdev->kgd, mem);
+
+out:
+ mutex_unlock(&p->mutex);
+}
+
static void kfd_process_free_outstanding_kfd_bos(struct kfd_process *p)
{
int i;
@@ -965,6 +998,9 @@ static void kfd_process_destroy_pdds(struct kfd_process *p)
pr_debug("Releasing pdd (topology id %d) for process (pasid 0x%x)\n",
pdd->dev->id, p->pasid);
+ kfd_process_device_destroy_cwsr_dgpu(pdd);
+ kfd_process_device_destroy_ib_mem(pdd);
+
if (pdd->drm_file) {
amdgpu_amdkfd_gpuvm_release_process_vm(
pdd->dev->kgd, pdd->drm_priv);
@@ -1049,9 +1085,11 @@ static void kfd_process_wq_release(struct work_struct *work)
{
struct kfd_process *p = container_of(work, struct kfd_process,
release_work);
+
kfd_process_remove_sysfs(p);
kfd_iommu_unbind_process(p);
+ kfd_process_kunmap_signal_bo(p);
kfd_process_free_outstanding_kfd_bos(p);
svm_range_list_fini(p);
@@ -1198,6 +1236,7 @@ static int kfd_process_device_init_cwsr_dgpu(struct kfd_process_device *pdd)
uint32_t flags = KFD_IOC_ALLOC_MEM_FLAGS_GTT
| KFD_IOC_ALLOC_MEM_FLAGS_NO_SUBSTITUTE
| KFD_IOC_ALLOC_MEM_FLAGS_EXECUTABLE;
+ struct kgd_mem *mem;
void *kaddr;
int ret;
@@ -1206,10 +1245,11 @@ static int kfd_process_device_init_cwsr_dgpu(struct kfd_process_device *pdd)
/* cwsr_base is only set for dGPU */
ret = kfd_process_alloc_gpuvm(pdd, qpd->cwsr_base,
- KFD_CWSR_TBA_TMA_SIZE, flags, &kaddr);
+ KFD_CWSR_TBA_TMA_SIZE, flags, &mem, &kaddr);
if (ret)
return ret;
+ qpd->cwsr_mem = mem;
qpd->cwsr_kaddr = kaddr;
qpd->tba_addr = qpd->cwsr_base;
@@ -1222,6 +1262,17 @@ static int kfd_process_device_init_cwsr_dgpu(struct kfd_process_device *pdd)
return 0;
}
+static void kfd_process_device_destroy_cwsr_dgpu(struct kfd_process_device *pdd)
+{
+ struct kfd_dev *dev = pdd->dev;
+ struct qcm_process_device *qpd = &pdd->qpd;
+
+ if (!dev->cwsr_enabled || !qpd->cwsr_kaddr || !qpd->cwsr_base)
+ return;
+
+ kfd_process_free_gpuvm(qpd->cwsr_mem, pdd, qpd->cwsr_kaddr);
+}
+
void kfd_process_set_trap_handler(struct qcm_process_device *qpd,
uint64_t tba_addr,
uint64_t tma_addr)
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c b/drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c
index 243dd1efcdbf..3627e7ac161b 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c
@@ -121,7 +121,7 @@ int pqm_set_gws(struct process_queue_manager *pqm, unsigned int qid,
pdd->qpd.num_gws = gws ? amdgpu_amdkfd_get_num_gws(dev->kgd) : 0;
return pqn->q->device->dqm->ops.update_queue(pqn->q->device->dqm,
- pqn->q);
+ pqn->q, NULL);
}
void kfd_process_dequeue_from_all_devices(struct kfd_process *p)
@@ -394,8 +394,6 @@ int pqm_destroy_queue(struct process_queue_manager *pqm, unsigned int qid)
pdd->qpd.num_gws = 0;
}
- kfree(pqn->q->properties.cu_mask);
- pqn->q->properties.cu_mask = NULL;
uninit_queue(pqn->q);
}
@@ -411,8 +409,8 @@ err_destroy_queue:
return retval;
}
-int pqm_update_queue(struct process_queue_manager *pqm, unsigned int qid,
- struct queue_properties *p)
+int pqm_update_queue_properties(struct process_queue_manager *pqm,
+ unsigned int qid, struct queue_properties *p)
{
int retval;
struct process_queue_node *pqn;
@@ -429,15 +427,15 @@ int pqm_update_queue(struct process_queue_manager *pqm, unsigned int qid,
pqn->q->properties.priority = p->priority;
retval = pqn->q->device->dqm->ops.update_queue(pqn->q->device->dqm,
- pqn->q);
+ pqn->q, NULL);
if (retval != 0)
return retval;
return 0;
}
-int pqm_set_cu_mask(struct process_queue_manager *pqm, unsigned int qid,
- struct queue_properties *p)
+int pqm_update_mqd(struct process_queue_manager *pqm,
+ unsigned int qid, struct mqd_update_info *minfo)
{
int retval;
struct process_queue_node *pqn;
@@ -448,16 +446,8 @@ int pqm_set_cu_mask(struct process_queue_manager *pqm, unsigned int qid,
return -EFAULT;
}
- /* Free the old CU mask memory if it is already allocated, then
- * allocate memory for the new CU mask.
- */
- kfree(pqn->q->properties.cu_mask);
-
- pqn->q->properties.cu_mask_count = p->cu_mask_count;
- pqn->q->properties.cu_mask = p->cu_mask;
-
retval = pqn->q->device->dqm->ops.update_queue(pqn->q->device->dqm,
- pqn->q);
+ pqn->q, minfo);
if (retval != 0)
return retval;
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_svm.c b/drivers/gpu/drm/amd/amdkfd/kfd_svm.c
index 9d0f65a90002..b691c8495d66 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_svm.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_svm.c
@@ -33,6 +33,11 @@
#include "kfd_svm.h"
#include "kfd_migrate.h"
+#ifdef dev_fmt
+#undef dev_fmt
+#endif
+#define dev_fmt(fmt) "kfd_svm: %s: " fmt, __func__
+
#define AMDGPU_SVM_RANGE_RESTORE_DELAY_MS 1
/* Long enough to ensure no retry fault comes after svm range is restored and
@@ -45,7 +50,9 @@ static bool
svm_range_cpu_invalidate_pagetables(struct mmu_interval_notifier *mni,
const struct mmu_notifier_range *range,
unsigned long cur_seq);
-
+static int
+svm_range_check_vm(struct kfd_process *p, uint64_t start, uint64_t last,
+ uint64_t *bo_s, uint64_t *bo_l);
static const struct mmu_interval_notifier_ops svm_range_mn_ops = {
.invalidate = svm_range_cpu_invalidate_pagetables,
};
@@ -158,17 +165,17 @@ svm_range_dma_map_dev(struct amdgpu_device *adev, struct svm_range *prange,
bo_adev->vm_manager.vram_base_offset -
bo_adev->kfd.dev->pgmap.range.start;
addr[i] |= SVM_RANGE_VRAM_DOMAIN;
- pr_debug("vram address detected: 0x%llx\n", addr[i]);
+ pr_debug_ratelimited("vram address: 0x%llx\n", addr[i]);
continue;
}
addr[i] = dma_map_page(dev, page, 0, PAGE_SIZE, dir);
r = dma_mapping_error(dev, addr[i]);
if (r) {
- pr_debug("failed %d dma_map_page\n", r);
+ dev_err(dev, "failed %d dma_map_page\n", r);
return r;
}
- pr_debug("dma mapping 0x%llx for page addr 0x%lx\n",
- addr[i] >> PAGE_SHIFT, page_to_pfn(page));
+ pr_debug_ratelimited("dma mapping 0x%llx for page addr 0x%lx\n",
+ addr[i] >> PAGE_SHIFT, page_to_pfn(page));
}
return 0;
}
@@ -217,7 +224,7 @@ void svm_range_dma_unmap(struct device *dev, dma_addr_t *dma_addr,
for (i = offset; i < offset + npages; i++) {
if (!svm_is_valid_dma_mapping_addr(dev, dma_addr[i]))
continue;
- pr_debug("dma unmapping 0x%llx\n", dma_addr[i] >> PAGE_SHIFT);
+ pr_debug_ratelimited("unmap 0x%llx\n", dma_addr[i] >> PAGE_SHIFT);
dma_unmap_page(dev, dma_addr[i], PAGE_SIZE, dir);
dma_addr[i] = 0;
}
@@ -1307,7 +1314,7 @@ struct svm_validate_context {
struct svm_range *prange;
bool intr;
unsigned long bitmap[MAX_GPU_INSTANCE];
- struct ttm_validate_buffer tv[MAX_GPU_INSTANCE+1];
+ struct ttm_validate_buffer tv[MAX_GPU_INSTANCE];
struct list_head validate_list;
struct ww_acquire_ctx ticket;
};
@@ -1334,11 +1341,6 @@ static int svm_range_reserve_bos(struct svm_validate_context *ctx)
ctx->tv[gpuidx].num_shared = 4;
list_add(&ctx->tv[gpuidx].head, &ctx->validate_list);
}
- if (ctx->prange->svm_bo && ctx->prange->ttm_res) {
- ctx->tv[MAX_GPU_INSTANCE].bo = &ctx->prange->svm_bo->bo->tbo;
- ctx->tv[MAX_GPU_INSTANCE].num_shared = 1;
- list_add(&ctx->tv[MAX_GPU_INSTANCE].head, &ctx->validate_list);
- }
r = ttm_eu_reserve_buffers(&ctx->ticket, &ctx->validate_list,
ctx->intr, NULL);
@@ -1459,7 +1461,7 @@ static int svm_range_validate_and_map(struct mm_struct *mm,
/* This should never happen. actual_loc gets set by
* svm_migrate_ram_to_vram after allocating a BO.
*/
- WARN(1, "VRAM BO missing during validation\n");
+ WARN_ONCE(1, "VRAM BO missing during validation\n");
return -EINVAL;
}
@@ -1552,7 +1554,7 @@ unreserve_out:
* Context: Returns with mmap write lock held, pending deferred work flushed
*
*/
-static void
+void
svm_range_list_lock_and_flush_work(struct svm_range_list *svms,
struct mm_struct *mm)
{
@@ -2308,6 +2310,7 @@ svm_range_best_restore_location(struct svm_range *prange,
return -1;
}
+
static int
svm_range_get_range_boundaries(struct kfd_process *p, int64_t addr,
unsigned long *start, unsigned long *last)
@@ -2355,8 +2358,59 @@ svm_range_get_range_boundaries(struct kfd_process *p, int64_t addr,
vma->vm_end >> PAGE_SHIFT, *last);
return 0;
+}
+static int
+svm_range_check_vm_userptr(struct kfd_process *p, uint64_t start, uint64_t last,
+ uint64_t *bo_s, uint64_t *bo_l)
+{
+ struct amdgpu_bo_va_mapping *mapping;
+ struct interval_tree_node *node;
+ struct amdgpu_bo *bo = NULL;
+ unsigned long userptr;
+ uint32_t i;
+ int r;
+
+ for (i = 0; i < p->n_pdds; i++) {
+ struct amdgpu_vm *vm;
+
+ if (!p->pdds[i]->drm_priv)
+ continue;
+
+ vm = drm_priv_to_vm(p->pdds[i]->drm_priv);
+ r = amdgpu_bo_reserve(vm->root.bo, false);
+ if (r)
+ return r;
+
+ /* Check userptr by searching entire vm->va interval tree */
+ node = interval_tree_iter_first(&vm->va, 0, ~0ULL);
+ while (node) {
+ mapping = container_of((struct rb_node *)node,
+ struct amdgpu_bo_va_mapping, rb);
+ bo = mapping->bo_va->base.bo;
+
+ if (!amdgpu_ttm_tt_affect_userptr(bo->tbo.ttm,
+ start << PAGE_SHIFT,
+ last << PAGE_SHIFT,
+ &userptr)) {
+ node = interval_tree_iter_next(node, 0, ~0ULL);
+ continue;
+ }
+
+ pr_debug("[0x%llx 0x%llx] already userptr mapped\n",
+ start, last);
+ if (bo_s && bo_l) {
+ *bo_s = userptr >> PAGE_SHIFT;
+ *bo_l = *bo_s + bo->tbo.ttm->num_pages - 1;
+ }
+ amdgpu_bo_unreserve(vm->root.bo);
+ return -EADDRINUSE;
+ }
+ amdgpu_bo_unreserve(vm->root.bo);
+ }
+ return 0;
}
+
static struct
svm_range *svm_range_create_unregistered_range(struct amdgpu_device *adev,
struct kfd_process *p,
@@ -2366,10 +2420,26 @@ svm_range *svm_range_create_unregistered_range(struct amdgpu_device *adev,
struct svm_range *prange = NULL;
unsigned long start, last;
uint32_t gpuid, gpuidx;
+ uint64_t bo_s = 0;
+ uint64_t bo_l = 0;
+ int r;
if (svm_range_get_range_boundaries(p, addr, &start, &last))
return NULL;
+ r = svm_range_check_vm(p, start, last, &bo_s, &bo_l);
+ if (r != -EADDRINUSE)
+ r = svm_range_check_vm_userptr(p, start, last, &bo_s, &bo_l);
+
+ if (r == -EADDRINUSE) {
+ if (addr >= bo_s && addr <= bo_l)
+ return NULL;
+
+ /* Create one page svm range if 2MB range overlapping */
+ start = addr;
+ last = addr;
+ }
+
prange = svm_range_new(&p->svms, start, last);
if (!prange) {
pr_debug("Failed to create prange in address [0x%llx]\n", addr);
@@ -2668,8 +2738,67 @@ int svm_range_list_init(struct kfd_process *p)
}
/**
+ * svm_range_check_vm - check if virtual address range mapped already
+ * @p: current kfd_process
+ * @start: range start address, in pages
+ * @last: range last address, in pages
+ * @bo_s: mapping start address in pages if address range already mapped
+ * @bo_l: mapping last address in pages if address range already mapped
+ *
+ * The purpose is to avoid virtual address ranges already allocated by
+ * kfd_ioctl_alloc_memory_of_gpu ioctl.
+ * It looks for each pdd in the kfd_process.
+ *
+ * Context: Process context
+ *
+ * Return 0 - OK, if the range is not mapped.
+ * Otherwise error code:
+ * -EADDRINUSE - if address is mapped already by kfd_ioctl_alloc_memory_of_gpu
+ * -ERESTARTSYS - A wait for the buffer to become unreserved was interrupted by
+ * a signal. Release all buffer reservations and return to user-space.
+ */
+static int
+svm_range_check_vm(struct kfd_process *p, uint64_t start, uint64_t last,
+ uint64_t *bo_s, uint64_t *bo_l)
+{
+ struct amdgpu_bo_va_mapping *mapping;
+ struct interval_tree_node *node;
+ uint32_t i;
+ int r;
+
+ for (i = 0; i < p->n_pdds; i++) {
+ struct amdgpu_vm *vm;
+
+ if (!p->pdds[i]->drm_priv)
+ continue;
+
+ vm = drm_priv_to_vm(p->pdds[i]->drm_priv);
+ r = amdgpu_bo_reserve(vm->root.bo, false);
+ if (r)
+ return r;
+
+ node = interval_tree_iter_first(&vm->va, start, last);
+ if (node) {
+ pr_debug("range [0x%llx 0x%llx] already TTM mapped\n",
+ start, last);
+ mapping = container_of((struct rb_node *)node,
+ struct amdgpu_bo_va_mapping, rb);
+ if (bo_s && bo_l) {
+ *bo_s = mapping->start;
+ *bo_l = mapping->last;
+ }
+ amdgpu_bo_unreserve(vm->root.bo);
+ return -EADDRINUSE;
+ }
+ amdgpu_bo_unreserve(vm->root.bo);
+ }
+
+ return 0;
+}
+
+/**
* svm_range_is_valid - check if virtual address range is valid
- * @mm: current process mm_struct
+ * @p: current kfd_process
* @start: range start address, in pages
* @size: range size, in pages
*
@@ -2678,28 +2807,28 @@ int svm_range_list_init(struct kfd_process *p)
* Context: Process context
*
* Return:
- * true - valid svm range
- * false - invalid svm range
+ * 0 - OK, otherwise error code
*/
-static bool
-svm_range_is_valid(struct mm_struct *mm, uint64_t start, uint64_t size)
+static int
+svm_range_is_valid(struct kfd_process *p, uint64_t start, uint64_t size)
{
const unsigned long device_vma = VM_IO | VM_PFNMAP | VM_MIXEDMAP;
struct vm_area_struct *vma;
unsigned long end;
+ unsigned long start_unchg = start;
start <<= PAGE_SHIFT;
end = start + (size << PAGE_SHIFT);
-
do {
- vma = find_vma(mm, start);
+ vma = find_vma(p->mm, start);
if (!vma || start < vma->vm_start ||
(vma->vm_flags & device_vma))
- return false;
+ return -EFAULT;
start = min(end, vma->vm_end);
} while (start < end);
- return true;
+ return svm_range_check_vm(p, start_unchg, (end - 1) >> PAGE_SHIFT, NULL,
+ NULL);
}
/**
@@ -3002,9 +3131,9 @@ svm_range_set_attr(struct kfd_process *p, uint64_t start, uint64_t size,
svm_range_list_lock_and_flush_work(svms, mm);
- if (!svm_range_is_valid(mm, start, size)) {
- pr_debug("invalid range\n");
- r = -EFAULT;
+ r = svm_range_is_valid(p, start, size);
+ if (r) {
+ pr_debug("invalid range r=%d\n", r);
mmap_write_unlock(mm);
goto out;
}
@@ -3106,6 +3235,7 @@ svm_range_get_attr(struct kfd_process *p, uint64_t start, uint64_t size,
uint32_t flags_or = 0;
int gpuidx;
uint32_t i;
+ int r = 0;
pr_debug("svms 0x%p [0x%llx 0x%llx] nattr 0x%x\n", &p->svms, start,
start + size - 1, nattr);
@@ -3119,12 +3249,12 @@ svm_range_get_attr(struct kfd_process *p, uint64_t start, uint64_t size,
flush_work(&p->svms.deferred_list_work);
mmap_read_lock(mm);
- if (!svm_range_is_valid(mm, start, size)) {
- pr_debug("invalid range\n");
- mmap_read_unlock(mm);
- return -EINVAL;
- }
+ r = svm_range_is_valid(p, start, size);
mmap_read_unlock(mm);
+ if (r) {
+ pr_debug("invalid range r=%d\n", r);
+ return r;
+ }
for (i = 0; i < nattr; i++) {
switch (attrs[i].type) {
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_svm.h b/drivers/gpu/drm/amd/amdkfd/kfd_svm.h
index c6ec55354c7b..6dc91c33e80f 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_svm.h
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_svm.h
@@ -188,6 +188,7 @@ void svm_range_prefault(struct svm_range *prange, struct mm_struct *mm,
void *owner);
struct kfd_process_device *
svm_range_get_pdd_by_adev(struct svm_range *prange, struct amdgpu_device *adev);
+void svm_range_list_lock_and_flush_work(struct svm_range_list *svms, struct mm_struct *mm);
/* SVM API and HMM page migration work together, device memory type
* is initialized to not 0 when page migration register device memory.
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_topology.c b/drivers/gpu/drm/amd/amdkfd/kfd_topology.c
index 98cca5f2b27f..dd593ad0614a 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_topology.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_topology.c
@@ -1296,6 +1296,24 @@ int kfd_topology_add_device(struct kfd_dev *gpu)
proximity_domain = atomic_inc_return(&topology_crat_proximity_domain);
+ adev = (struct amdgpu_device *)(gpu->kgd);
+
+ /* Include the CPU in xGMI hive if xGMI connected by assigning it the hive ID. */
+ if (gpu->hive_id && adev->gmc.xgmi.connected_to_cpu) {
+ struct kfd_topology_device *top_dev;
+
+ down_read(&topology_lock);
+
+ list_for_each_entry(top_dev, &topology_device_list, list) {
+ if (top_dev->gpu)
+ break;
+
+ top_dev->node_props.hive_id = gpu->hive_id;
+ }
+
+ up_read(&topology_lock);
+ }
+
/* Check to see if this gpu device exists in the topology_device_list.
* If so, assign the gpu to that device,
* else create a Virtual CRAT for this gpu device and then parse that
@@ -1457,7 +1475,6 @@ int kfd_topology_add_device(struct kfd_dev *gpu)
dev->node_props.max_waves_per_simd = 10;
}
- adev = (struct amdgpu_device *)(dev->gpu->kgd);
/* kfd only concerns sram ecc on GFX and HBM ecc on UMC */
dev->node_props.capability |=
((adev->ras_enabled & BIT(AMDGPU_RAS_BLOCK__GFX)) != 0) ?