aboutsummaryrefslogtreecommitdiff
path: root/drivers/gpu/drm/amd/amdkfd
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/gpu/drm/amd/amdkfd')
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_chardev.c27
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_device.c11
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c7
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager_v9.c27
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_doorbell.c4
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_events.c13
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_migrate.c3
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v11.c15
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_priv.h2
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_process.c18
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_svm.c10
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_topology.c2
12 files changed, 87 insertions, 52 deletions
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
index 6d291aa6386b..a0e30f21e12e 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
@@ -1065,6 +1065,20 @@ static int kfd_ioctl_alloc_memory_of_gpu(struct file *filep,
mutex_unlock(&p->svms.lock);
return -EADDRINUSE;
}
+
+ /* When register user buffer check if it has been registered by svm by
+ * buffer cpu virtual address.
+ */
+ if ((flags & KFD_IOC_ALLOC_MEM_FLAGS_USERPTR) &&
+ interval_tree_iter_first(&p->svms.objects,
+ args->mmap_offset >> PAGE_SHIFT,
+ (args->mmap_offset + args->size - 1) >> PAGE_SHIFT)) {
+ pr_err("User Buffer Address: 0x%llx already allocated by SVM\n",
+ args->mmap_offset);
+ mutex_unlock(&p->svms.lock);
+ return -EADDRINUSE;
+ }
+
mutex_unlock(&p->svms.lock);
#endif
mutex_lock(&p->mutex);
@@ -1127,8 +1141,13 @@ static int kfd_ioctl_alloc_memory_of_gpu(struct file *filep,
}
/* Update the VRAM usage count */
- if (flags & KFD_IOC_ALLOC_MEM_FLAGS_VRAM)
- WRITE_ONCE(pdd->vram_usage, pdd->vram_usage + args->size);
+ if (flags & KFD_IOC_ALLOC_MEM_FLAGS_VRAM) {
+ uint64_t size = args->size;
+
+ if (flags & KFD_IOC_ALLOC_MEM_FLAGS_AQL_QUEUE_MEM)
+ size >>= 1;
+ WRITE_ONCE(pdd->vram_usage, pdd->vram_usage + PAGE_ALIGN(size));
+ }
mutex_unlock(&p->mutex);
@@ -2879,8 +2898,8 @@ static int kfd_mmio_mmap(struct kfd_dev *dev, struct kfd_process *process,
address = dev->adev->rmmio_remap.bus_addr;
- vma->vm_flags |= VM_IO | VM_DONTCOPY | VM_DONTEXPAND | VM_NORESERVE |
- VM_DONTDUMP | VM_PFNMAP;
+ vm_flags_set(vma, VM_IO | VM_DONTCOPY | VM_DONTEXPAND | VM_NORESERVE |
+ VM_DONTDUMP | VM_PFNMAP);
vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot);
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device.c b/drivers/gpu/drm/amd/amdkfd/kfd_device.c
index b8936340742b..3de7f616a001 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_device.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_device.c
@@ -262,23 +262,12 @@ struct kfd_dev *kgd2kfd_probe(struct amdgpu_device *adev, bool vf)
f2g = &gfx_v8_kfd2kgd;
break;
case CHIP_FIJI:
- gfx_target_version = 80003;
- f2g = &gfx_v8_kfd2kgd;
- break;
case CHIP_POLARIS10:
gfx_target_version = 80003;
f2g = &gfx_v8_kfd2kgd;
break;
case CHIP_POLARIS11:
- gfx_target_version = 80003;
- if (!vf)
- f2g = &gfx_v8_kfd2kgd;
- break;
case CHIP_POLARIS12:
- gfx_target_version = 80003;
- if (!vf)
- f2g = &gfx_v8_kfd2kgd;
- break;
case CHIP_VEGAM:
gfx_target_version = 80003;
if (!vf)
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
index ecb4c3abc629..7a95698d83f7 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
@@ -200,7 +200,7 @@ static int add_queue_mes(struct device_queue_manager *dqm, struct queue *q,
queue_input.wptr_addr = (uint64_t)q->properties.write_ptr;
if (q->wptr_bo) {
- wptr_addr_off = (uint64_t)q->properties.write_ptr - (uint64_t)q->wptr_bo->kfd_bo->va;
+ wptr_addr_off = (uint64_t)q->properties.write_ptr & (PAGE_SIZE - 1);
queue_input.wptr_mc_addr = ((uint64_t)q->wptr_bo->tbo.resource->start << PAGE_SHIFT) + wptr_addr_off;
}
@@ -2373,7 +2373,7 @@ struct device_queue_manager *device_queue_manager_init(struct kfd_dev *dev)
if (init_mqd_managers(dqm))
goto out_free;
- if (allocate_hiq_sdma_mqd(dqm)) {
+ if (!dev->shared_resources.enable_mes && allocate_hiq_sdma_mqd(dqm)) {
pr_err("Failed to allocate hiq sdma mqd trunk buffer\n");
goto out_free;
}
@@ -2397,7 +2397,8 @@ static void deallocate_hiq_sdma_mqd(struct kfd_dev *dev,
void device_queue_manager_uninit(struct device_queue_manager *dqm)
{
dqm->ops.uninitialize(dqm);
- deallocate_hiq_sdma_mqd(dqm->dev, &dqm->hiq_sdma_mqd);
+ if (!dqm->dev->shared_resources.enable_mes)
+ deallocate_hiq_sdma_mqd(dqm->dev, &dqm->hiq_sdma_mqd);
kfree(dqm);
}
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager_v9.c b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager_v9.c
index d119070956fb..8b2dd2670ab7 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager_v9.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager_v9.c
@@ -59,30 +59,27 @@ static int update_qpd_v9(struct device_queue_manager *dqm,
/* check if sh_mem_config register already configured */
if (qpd->sh_mem_config == 0) {
- qpd->sh_mem_config =
- SH_MEM_ALIGNMENT_MODE_UNALIGNED <<
+ qpd->sh_mem_config = SH_MEM_ALIGNMENT_MODE_UNALIGNED <<
SH_MEM_CONFIG__ALIGNMENT_MODE__SHIFT;
- if (KFD_GC_VERSION(dqm->dev) == IP_VERSION(9, 4, 2)) {
- /* Aldebaran can safely support different XNACK modes
- * per process
- */
- if (!pdd->process->xnack_enabled)
- qpd->sh_mem_config |=
- 1 << SH_MEM_CONFIG__RETRY_DISABLE__SHIFT;
- } else if (dqm->dev->noretry &&
- !dqm->dev->use_iommu_v2) {
- qpd->sh_mem_config |=
- 1 << SH_MEM_CONFIG__RETRY_DISABLE__SHIFT;
- }
+ if (dqm->dev->noretry && !dqm->dev->use_iommu_v2)
+ qpd->sh_mem_config |= 1 << SH_MEM_CONFIG__RETRY_DISABLE__SHIFT;
qpd->sh_mem_ape1_limit = 0;
qpd->sh_mem_ape1_base = 0;
}
+ if (KFD_SUPPORT_XNACK_PER_PROCESS(dqm->dev)) {
+ if (!pdd->process->xnack_enabled)
+ qpd->sh_mem_config |= 1 << SH_MEM_CONFIG__RETRY_DISABLE__SHIFT;
+ else
+ qpd->sh_mem_config &= ~(1 << SH_MEM_CONFIG__RETRY_DISABLE__SHIFT);
+ }
+
qpd->sh_mem_bases = compute_sh_mem_bases_64bit(pdd);
- pr_debug("sh_mem_bases 0x%X\n", qpd->sh_mem_bases);
+ pr_debug("sh_mem_bases 0x%X sh_mem_config 0x%X\n", qpd->sh_mem_bases,
+ qpd->sh_mem_config);
return 0;
}
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_doorbell.c b/drivers/gpu/drm/amd/amdkfd/kfd_doorbell.c
index cd4e61bf0493..cbef2e147da5 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_doorbell.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_doorbell.c
@@ -159,8 +159,8 @@ int kfd_doorbell_mmap(struct kfd_dev *dev, struct kfd_process *process,
address = kfd_get_process_doorbells(pdd);
if (!address)
return -ENOMEM;
- vma->vm_flags |= VM_IO | VM_DONTCOPY | VM_DONTEXPAND | VM_NORESERVE |
- VM_DONTDUMP | VM_PFNMAP;
+ vm_flags_set(vma, VM_IO | VM_DONTCOPY | VM_DONTEXPAND | VM_NORESERVE |
+ VM_DONTDUMP | VM_PFNMAP);
vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot);
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_events.c b/drivers/gpu/drm/amd/amdkfd/kfd_events.c
index 729d26d648af..c894cf8f7c50 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_events.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_events.c
@@ -778,16 +778,13 @@ static struct kfd_event_waiter *alloc_event_waiters(uint32_t num_events)
struct kfd_event_waiter *event_waiters;
uint32_t i;
- event_waiters = kmalloc_array(num_events,
- sizeof(struct kfd_event_waiter),
- GFP_KERNEL);
+ event_waiters = kcalloc(num_events, sizeof(struct kfd_event_waiter),
+ GFP_KERNEL);
if (!event_waiters)
return NULL;
- for (i = 0; (event_waiters) && (i < num_events) ; i++) {
+ for (i = 0; i < num_events; i++)
init_wait(&event_waiters[i].wait);
- event_waiters[i].activated = false;
- }
return event_waiters;
}
@@ -1052,8 +1049,8 @@ int kfd_event_mmap(struct kfd_process *p, struct vm_area_struct *vma)
pfn = __pa(page->kernel_address);
pfn >>= PAGE_SHIFT;
- vma->vm_flags |= VM_IO | VM_DONTCOPY | VM_DONTEXPAND | VM_NORESERVE
- | VM_DONTDUMP | VM_PFNMAP;
+ vm_flags_set(vma, VM_IO | VM_DONTCOPY | VM_DONTEXPAND | VM_NORESERVE
+ | VM_DONTDUMP | VM_PFNMAP);
pr_debug("Mapping signal page\n");
pr_debug(" start user address == 0x%08lx\n", vma->vm_start);
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_migrate.c b/drivers/gpu/drm/amd/amdkfd/kfd_migrate.c
index 10048ce16aea..de8ce72344fc 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_migrate.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_migrate.c
@@ -1027,8 +1027,7 @@ int svm_migrate_init(struct amdgpu_device *adev)
/* Disable SVM support capability */
pgmap->type = 0;
if (pgmap->type == MEMORY_DEVICE_PRIVATE)
- devm_release_mem_region(adev->dev, res->start,
- res->end - res->start + 1);
+ devm_release_mem_region(adev->dev, res->start, resource_size(res));
return PTR_ERR(r);
}
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v11.c b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v11.c
index 4f6390f3236e..4a9af800b1f1 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v11.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v11.c
@@ -308,11 +308,16 @@ static void init_mqd_sdma(struct mqd_manager *mm, void **mqd,
struct queue_properties *q)
{
struct v11_sdma_mqd *m;
+ int size;
m = (struct v11_sdma_mqd *) mqd_mem_obj->cpu_ptr;
- memset(m, 0, sizeof(struct v11_sdma_mqd));
+ if (mm->dev->shared_resources.enable_mes)
+ size = PAGE_SIZE;
+ else
+ size = sizeof(struct v11_sdma_mqd);
+ memset(m, 0, size);
*mqd = m;
if (gart_addr)
*gart_addr = mqd_mem_obj->gpu_addr;
@@ -443,6 +448,14 @@ struct mqd_manager *mqd_manager_init_v11(enum KFD_MQD_TYPE type,
#if defined(CONFIG_DEBUG_FS)
mqd->debugfs_show_mqd = debugfs_show_mqd_sdma;
#endif
+ /*
+ * To allocate SDMA MQDs by generic functions
+ * when MES is enabled.
+ */
+ if (dev->shared_resources.enable_mes) {
+ mqd->allocate_mqd = allocate_mqd;
+ mqd->free_mqd = kfd_free_mqd_cp;
+ }
pr_debug("%s@%i\n", __func__, __LINE__);
break;
default:
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
index 552c3ac85a13..bfa30d12406b 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
@@ -206,6 +206,8 @@ enum cache_policy {
#define KFD_GC_VERSION(dev) ((dev)->adev->ip_versions[GC_HWIP][0])
#define KFD_IS_SOC15(dev) ((KFD_GC_VERSION(dev)) >= (IP_VERSION(9, 0, 1)))
+#define KFD_SUPPORT_XNACK_PER_PROCESS(dev)\
+ (KFD_GC_VERSION(dev) == IP_VERSION(9, 4, 2))
struct kfd_event_interrupt_class {
bool (*interrupt_isr)(struct kfd_dev *dev,
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_process.c b/drivers/gpu/drm/amd/amdkfd/kfd_process.c
index 51b1683ac5c1..7acd55a814b2 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_process.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_process.c
@@ -1330,7 +1330,7 @@ bool kfd_process_xnack_mode(struct kfd_process *p, bool supported)
* per-process XNACK mode selection. But let the dev->noretry
* setting still influence the default XNACK mode.
*/
- if (supported && KFD_GC_VERSION(dev) == IP_VERSION(9, 4, 2))
+ if (supported && KFD_SUPPORT_XNACK_PER_PROCESS(dev))
continue;
/* GFXv10 and later GPUs do not support shader preemption
@@ -1563,6 +1563,8 @@ err_free_pdd:
int kfd_process_device_init_vm(struct kfd_process_device *pdd,
struct file *drm_file)
{
+ struct amdgpu_fpriv *drv_priv;
+ struct amdgpu_vm *avm;
struct kfd_process *p;
struct kfd_dev *dev;
int ret;
@@ -1573,10 +1575,15 @@ int kfd_process_device_init_vm(struct kfd_process_device *pdd,
if (pdd->drm_priv)
return -EBUSY;
+ ret = amdgpu_file_to_fpriv(drm_file, &drv_priv);
+ if (ret)
+ return ret;
+ avm = &drv_priv->vm;
+
p = pdd->process;
dev = pdd->dev;
- ret = amdgpu_amdkfd_gpuvm_acquire_process_vm(dev->adev, drm_file,
+ ret = amdgpu_amdkfd_gpuvm_acquire_process_vm(dev->adev, avm,
&p->kgd_process_info,
&p->ef);
if (ret) {
@@ -1593,7 +1600,7 @@ int kfd_process_device_init_vm(struct kfd_process_device *pdd,
if (ret)
goto err_init_cwsr;
- ret = amdgpu_amdkfd_gpuvm_set_vm_pasid(dev->adev, drm_file, p->pasid);
+ ret = amdgpu_amdkfd_gpuvm_set_vm_pasid(dev->adev, avm, p->pasid);
if (ret)
goto err_set_pasid;
@@ -1607,6 +1614,7 @@ err_init_cwsr:
kfd_process_device_destroy_ib_mem(pdd);
err_reserve_ib_mem:
pdd->drm_priv = NULL;
+ amdgpu_amdkfd_gpuvm_destroy_cb(dev->adev, avm);
return ret;
}
@@ -1978,8 +1986,8 @@ int kfd_reserved_mem_mmap(struct kfd_dev *dev, struct kfd_process *process,
return -ENOMEM;
}
- vma->vm_flags |= VM_IO | VM_DONTCOPY | VM_DONTEXPAND
- | VM_NORESERVE | VM_DONTDUMP | VM_PFNMAP;
+ vm_flags_set(vma, VM_IO | VM_DONTCOPY | VM_DONTEXPAND
+ | VM_NORESERVE | VM_DONTDUMP | VM_PFNMAP);
/* Mapping pages to user process */
return remap_pfn_range(vma, vma->vm_start,
PFN_DOWN(__pa(qpd->cwsr_kaddr)),
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_svm.c b/drivers/gpu/drm/amd/amdkfd/kfd_svm.c
index 814f99888ab1..dc6fd6967050 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_svm.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_svm.c
@@ -23,6 +23,7 @@
#include <linux/types.h>
#include <linux/sched/task.h>
+#include <drm/ttm/ttm_tt.h>
#include "amdgpu_sync.h"
#include "amdgpu_object.h"
#include "amdgpu_vm.h"
@@ -570,6 +571,15 @@ svm_range_vram_node_new(struct amdgpu_device *adev, struct svm_range *prange,
goto reserve_bo_failed;
}
+ if (clear) {
+ r = amdgpu_bo_sync_wait(bo, AMDGPU_FENCE_OWNER_KFD, false);
+ if (r) {
+ pr_debug("failed %d to sync bo\n", r);
+ amdgpu_bo_unreserve(bo);
+ goto reserve_bo_failed;
+ }
+ }
+
r = dma_resv_reserve_fences(bo->tbo.base.resv, 1);
if (r) {
pr_debug("failed %d to reserve bo\n", r);
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_topology.c b/drivers/gpu/drm/amd/amdkfd/kfd_topology.c
index bceb1a5b2518..3fdaba56be6f 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_topology.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_topology.c
@@ -801,7 +801,7 @@ static int kfd_build_sysfs_node_entry(struct kfd_topology_device *dev,
p2plink->attr.name = "properties";
p2plink->attr.mode = KFD_SYSFS_FILE_MODE;
- sysfs_attr_init(&iolink->attr);
+ sysfs_attr_init(&p2plink->attr);
ret = sysfs_create_file(p2plink->kobj, &p2plink->attr);
if (ret < 0)
return ret;