diff options
Diffstat (limited to 'drivers/gpu/drm/amd/amdkfd/kfd_chardev.c')
-rw-r--r-- | drivers/gpu/drm/amd/amdkfd/kfd_chardev.c | 85 |
1 files changed, 80 insertions, 5 deletions
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c index 1c7016958d6d..dc774ddf3445 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c @@ -65,6 +65,25 @@ static int kfd_char_dev_major = -1; static struct class *kfd_class; struct device *kfd_device; +static inline struct kfd_process_device *kfd_lock_pdd_by_id(struct kfd_process *p, __u32 gpu_id) +{ + struct kfd_process_device *pdd; + + mutex_lock(&p->mutex); + pdd = kfd_process_device_data_by_id(p, gpu_id); + + if (pdd) + return pdd; + + mutex_unlock(&p->mutex); + return NULL; +} + +static inline void kfd_unlock_pdd(struct kfd_process_device *pdd) +{ + mutex_unlock(&pdd->process->mutex); +} + int kfd_chardev_init(void) { int err = 0; @@ -280,6 +299,7 @@ static int kfd_ioctl_create_queue(struct file *filep, struct kfd_process *p, struct kfd_process_device *pdd; struct queue_properties q_properties; uint32_t doorbell_offset_in_process = 0; + struct amdgpu_bo *wptr_bo = NULL; memset(&q_properties, 0, sizeof(struct queue_properties)); @@ -307,12 +327,49 @@ static int kfd_ioctl_create_queue(struct file *filep, struct kfd_process *p, goto err_bind_process; } + /* Starting with GFX11, wptr BOs must be mapped to GART for MES to determine work + * on unmapped queues for usermode queue oversubscription (no aggregated doorbell) + */ + if (dev->shared_resources.enable_mes && + ((dev->adev->mes.sched_version & AMDGPU_MES_API_VERSION_MASK) + >> AMDGPU_MES_API_VERSION_SHIFT) >= 2) { + struct amdgpu_bo_va_mapping *wptr_mapping; + struct amdgpu_vm *wptr_vm; + + wptr_vm = drm_priv_to_vm(pdd->drm_priv); + err = amdgpu_bo_reserve(wptr_vm->root.bo, false); + if (err) + goto err_wptr_map_gart; + + wptr_mapping = amdgpu_vm_bo_lookup_mapping( + wptr_vm, args->write_pointer_address >> PAGE_SHIFT); + amdgpu_bo_unreserve(wptr_vm->root.bo); + if (!wptr_mapping) { + pr_err("Failed to lookup wptr bo\n"); + err = -EINVAL; + goto err_wptr_map_gart; + } + + wptr_bo = wptr_mapping->bo_va->base.bo; + if (wptr_bo->tbo.base.size > PAGE_SIZE) { + pr_err("Requested GART mapping for wptr bo larger than one page\n"); + err = -EINVAL; + goto err_wptr_map_gart; + } + + err = amdgpu_amdkfd_map_gtt_bo_to_gart(dev->adev, wptr_bo); + if (err) { + pr_err("Failed to map wptr bo to GART\n"); + goto err_wptr_map_gart; + } + } + pr_debug("Creating queue for PASID 0x%x on gpu 0x%x\n", p->pasid, dev->id); - err = pqm_create_queue(&p->pqm, dev, filep, &q_properties, &queue_id, NULL, NULL, NULL, - &doorbell_offset_in_process); + err = pqm_create_queue(&p->pqm, dev, filep, &q_properties, &queue_id, wptr_bo, + NULL, NULL, NULL, &doorbell_offset_in_process); if (err != 0) goto err_create_queue; @@ -344,6 +401,9 @@ static int kfd_ioctl_create_queue(struct file *filep, struct kfd_process *p, return 0; err_create_queue: + if (wptr_bo) + amdgpu_amdkfd_free_gtt_mem(dev->adev, wptr_bo); +err_wptr_map_gart: err_bind_process: err_pdd: mutex_unlock(&p->mutex); @@ -814,7 +874,7 @@ static int kfd_ioctl_wait_events(struct file *filp, struct kfd_process *p, err = kfd_wait_on_events(p, args->num_events, (void __user *)args->events_ptr, (args->wait_for_all != 0), - args->timeout, &args->wait_result); + &args->timeout, &args->wait_result); return err; } @@ -958,6 +1018,19 @@ bool kfd_dev_is_large_bar(struct kfd_dev *dev) return false; } +static int kfd_ioctl_get_available_memory(struct file *filep, + struct kfd_process *p, void *data) +{ + struct kfd_ioctl_get_available_memory_args *args = data; + struct kfd_process_device *pdd = kfd_lock_pdd_by_id(p, args->gpu_id); + + if (!pdd) + return -EINVAL; + args->available = amdgpu_amdkfd_get_available_memory(pdd->dev->adev); + kfd_unlock_pdd(pdd); + return 0; +} + static int kfd_ioctl_alloc_memory_of_gpu(struct file *filep, struct kfd_process *p, void *data) { @@ -2361,7 +2434,7 @@ static int criu_restore(struct file *filep, * Set the process to evicted state to avoid running any new queues before all the memory * mappings are ready. */ - ret = kfd_process_evict_queues(p); + ret = kfd_process_evict_queues(p, KFD_QUEUE_EVICTION_CRIU_RESTORE); if (ret) goto exit_unlock; @@ -2480,7 +2553,7 @@ static int criu_process_info(struct file *filep, goto err_unlock; } - ret = kfd_process_evict_queues(p); + ret = kfd_process_evict_queues(p, KFD_QUEUE_EVICTION_CRIU_CHECKPOINT); if (ret) goto err_unlock; @@ -2648,6 +2721,8 @@ static const struct amdkfd_ioctl_desc amdkfd_ioctls[] = { AMDKFD_IOCTL_DEF(AMDKFD_IOC_CRIU_OP, kfd_ioctl_criu, KFD_IOC_FLAG_CHECKPOINT_RESTORE), + AMDKFD_IOCTL_DEF(AMDKFD_IOC_AVAILABLE_MEMORY, + kfd_ioctl_get_available_memory, 0), }; #define AMDKFD_CORE_IOCTL_COUNT ARRAY_SIZE(amdkfd_ioctls) |