diff options
Diffstat (limited to 'drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c')
-rw-r--r-- | drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c | 180 |
1 files changed, 142 insertions, 38 deletions
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c index f3129b912714..be1ab43473c6 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c @@ -25,6 +25,7 @@ #include <linux/list.h> #include <linux/pagemap.h> #include <linux/sched/mm.h> +#include <linux/dma-buf.h> #include <drm/drmP.h> #include "amdgpu_object.h" #include "amdgpu_vm.h" @@ -110,17 +111,17 @@ void amdgpu_amdkfd_gpuvm_init_mem_limits(void) (kfd_mem_limit.max_ttm_mem_limit >> 20)); } -static int amdgpu_amdkfd_reserve_system_mem_limit(struct amdgpu_device *adev, +static int amdgpu_amdkfd_reserve_mem_limit(struct amdgpu_device *adev, uint64_t size, u32 domain, bool sg) { - size_t acc_size, system_mem_needed, ttm_mem_needed; + size_t acc_size, system_mem_needed, ttm_mem_needed, vram_needed; + uint64_t reserved_for_pt = amdgpu_amdkfd_total_mem_size >> 9; int ret = 0; acc_size = ttm_bo_dma_acc_size(&adev->mman.bdev, size, sizeof(struct amdgpu_bo)); - spin_lock(&kfd_mem_limit.mem_limit_lock); - + vram_needed = 0; if (domain == AMDGPU_GEM_DOMAIN_GTT) { /* TTM GTT memory */ system_mem_needed = acc_size + size; @@ -133,23 +134,30 @@ static int amdgpu_amdkfd_reserve_system_mem_limit(struct amdgpu_device *adev, /* VRAM and SG */ system_mem_needed = acc_size; ttm_mem_needed = acc_size; + if (domain == AMDGPU_GEM_DOMAIN_VRAM) + vram_needed = size; } + spin_lock(&kfd_mem_limit.mem_limit_lock); + if ((kfd_mem_limit.system_mem_used + system_mem_needed > - kfd_mem_limit.max_system_mem_limit) || - (kfd_mem_limit.ttm_mem_used + ttm_mem_needed > - kfd_mem_limit.max_ttm_mem_limit)) + kfd_mem_limit.max_system_mem_limit) || + (kfd_mem_limit.ttm_mem_used + ttm_mem_needed > + kfd_mem_limit.max_ttm_mem_limit) || + (adev->kfd.vram_used + vram_needed > + adev->gmc.real_vram_size - reserved_for_pt)) { ret = -ENOMEM; - else { + } else { kfd_mem_limit.system_mem_used += system_mem_needed; kfd_mem_limit.ttm_mem_used += ttm_mem_needed; + adev->kfd.vram_used += vram_needed; } spin_unlock(&kfd_mem_limit.mem_limit_lock); return ret; } -static void unreserve_system_mem_limit(struct amdgpu_device *adev, +static void unreserve_mem_limit(struct amdgpu_device *adev, uint64_t size, u32 domain, bool sg) { size_t acc_size; @@ -167,6 +175,11 @@ static void unreserve_system_mem_limit(struct amdgpu_device *adev, } else { kfd_mem_limit.system_mem_used -= acc_size; kfd_mem_limit.ttm_mem_used -= acc_size; + if (domain == AMDGPU_GEM_DOMAIN_VRAM) { + adev->kfd.vram_used -= size; + WARN_ONCE(adev->kfd.vram_used < 0, + "kfd VRAM memory accounting unbalanced"); + } } WARN_ONCE(kfd_mem_limit.system_mem_used < 0, "kfd system memory accounting unbalanced"); @@ -176,29 +189,18 @@ static void unreserve_system_mem_limit(struct amdgpu_device *adev, spin_unlock(&kfd_mem_limit.mem_limit_lock); } -void amdgpu_amdkfd_unreserve_system_memory_limit(struct amdgpu_bo *bo) +void amdgpu_amdkfd_unreserve_memory_limit(struct amdgpu_bo *bo) { - spin_lock(&kfd_mem_limit.mem_limit_lock); + struct amdgpu_device *adev = amdgpu_ttm_adev(bo->tbo.bdev); + u32 domain = bo->preferred_domains; + bool sg = (bo->preferred_domains == AMDGPU_GEM_DOMAIN_CPU); if (bo->flags & AMDGPU_AMDKFD_USERPTR_BO) { - kfd_mem_limit.system_mem_used -= - (bo->tbo.acc_size + amdgpu_bo_size(bo)); - kfd_mem_limit.ttm_mem_used -= bo->tbo.acc_size; - } else if (bo->preferred_domains == AMDGPU_GEM_DOMAIN_GTT) { - kfd_mem_limit.system_mem_used -= - (bo->tbo.acc_size + amdgpu_bo_size(bo)); - kfd_mem_limit.ttm_mem_used -= - (bo->tbo.acc_size + amdgpu_bo_size(bo)); - } else { - kfd_mem_limit.system_mem_used -= bo->tbo.acc_size; - kfd_mem_limit.ttm_mem_used -= bo->tbo.acc_size; + domain = AMDGPU_GEM_DOMAIN_CPU; + sg = false; } - WARN_ONCE(kfd_mem_limit.system_mem_used < 0, - "kfd system memory accounting unbalanced"); - WARN_ONCE(kfd_mem_limit.ttm_mem_used < 0, - "kfd TTM memory accounting unbalanced"); - spin_unlock(&kfd_mem_limit.mem_limit_lock); + unreserve_mem_limit(adev, amdgpu_bo_size(bo), domain, sg); } @@ -535,7 +537,7 @@ static void add_kgd_mem_to_kfd_bo_list(struct kgd_mem *mem, struct amdgpu_bo *bo = mem->bo; INIT_LIST_HEAD(&entry->head); - entry->shared = true; + entry->num_shared = 1; entry->bo = &bo->tbo; mutex_lock(&process_info->lock); if (userptr) @@ -676,7 +678,7 @@ static int reserve_bo_and_vm(struct kgd_mem *mem, ctx->kfd_bo.priority = 0; ctx->kfd_bo.tv.bo = &bo->tbo; - ctx->kfd_bo.tv.shared = true; + ctx->kfd_bo.tv.num_shared = 1; ctx->kfd_bo.user_pages = NULL; list_add(&ctx->kfd_bo.tv.head, &ctx->list); @@ -740,7 +742,7 @@ static int reserve_bo_and_cond_vms(struct kgd_mem *mem, ctx->kfd_bo.priority = 0; ctx->kfd_bo.tv.bo = &bo->tbo; - ctx->kfd_bo.tv.shared = true; + ctx->kfd_bo.tv.num_shared = 1; ctx->kfd_bo.user_pages = NULL; list_add(&ctx->kfd_bo.tv.head, &ctx->list); @@ -885,6 +887,24 @@ update_gpuvm_pte_failed: return ret; } +static struct sg_table *create_doorbell_sg(uint64_t addr, uint32_t size) +{ + struct sg_table *sg = kmalloc(sizeof(*sg), GFP_KERNEL); + + if (!sg) + return NULL; + if (sg_alloc_table(sg, 1, GFP_KERNEL)) { + kfree(sg); + return NULL; + } + sg->sgl->dma_address = addr; + sg->sgl->length = size; +#ifdef CONFIG_NEED_SG_DMA_LENGTH + sg->sgl->dma_length = size; +#endif + return sg; +} + static int process_validate_vms(struct amdkfd_process_info *process_info) { struct amdgpu_vm *peer_vm; @@ -1168,6 +1188,8 @@ int amdgpu_amdkfd_gpuvm_alloc_memory_of_gpu( { struct amdgpu_device *adev = get_amdgpu_device(kgd); struct amdgpu_vm *avm = (struct amdgpu_vm *)vm; + enum ttm_bo_type bo_type = ttm_bo_type_device; + struct sg_table *sg = NULL; uint64_t user_addr = 0; struct amdgpu_bo *bo; struct amdgpu_bo_param bp; @@ -1196,13 +1218,25 @@ int amdgpu_amdkfd_gpuvm_alloc_memory_of_gpu( if (!offset || !*offset) return -EINVAL; user_addr = *offset; + } else if (flags & ALLOC_MEM_FLAGS_DOORBELL) { + domain = AMDGPU_GEM_DOMAIN_GTT; + alloc_domain = AMDGPU_GEM_DOMAIN_CPU; + bo_type = ttm_bo_type_sg; + alloc_flags = 0; + if (size > UINT_MAX) + return -EINVAL; + sg = create_doorbell_sg(*offset, size); + if (!sg) + return -ENOMEM; } else { return -EINVAL; } *mem = kzalloc(sizeof(struct kgd_mem), GFP_KERNEL); - if (!*mem) - return -ENOMEM; + if (!*mem) { + ret = -ENOMEM; + goto err; + } INIT_LIST_HEAD(&(*mem)->bo_va_list); mutex_init(&(*mem)->lock); (*mem)->aql_queue = !!(flags & ALLOC_MEM_FLAGS_AQL_QUEUE_MEM); @@ -1235,8 +1269,7 @@ int amdgpu_amdkfd_gpuvm_alloc_memory_of_gpu( amdgpu_sync_create(&(*mem)->sync); - ret = amdgpu_amdkfd_reserve_system_mem_limit(adev, size, - alloc_domain, false); + ret = amdgpu_amdkfd_reserve_mem_limit(adev, size, alloc_domain, !!sg); if (ret) { pr_debug("Insufficient system memory\n"); goto err_reserve_limit; @@ -1250,7 +1283,7 @@ int amdgpu_amdkfd_gpuvm_alloc_memory_of_gpu( bp.byte_align = byte_align; bp.domain = alloc_domain; bp.flags = alloc_flags; - bp.type = ttm_bo_type_device; + bp.type = bo_type; bp.resv = NULL; ret = amdgpu_bo_create(adev, &bp, &bo); if (ret) { @@ -1258,6 +1291,10 @@ int amdgpu_amdkfd_gpuvm_alloc_memory_of_gpu( domain_string(alloc_domain), ret); goto err_bo_create; } + if (bo_type == ttm_bo_type_sg) { + bo->tbo.sg = sg; + bo->tbo.ttm->sg = sg; + } bo->kfd_bo = *mem; (*mem)->bo = bo; if (user_addr) @@ -1289,10 +1326,15 @@ allocate_init_user_pages_failed: /* Don't unreserve system mem limit twice */ goto err_reserve_limit; err_bo_create: - unreserve_system_mem_limit(adev, size, alloc_domain, false); + unreserve_mem_limit(adev, size, alloc_domain, !!sg); err_reserve_limit: mutex_destroy(&(*mem)->lock); kfree(*mem); +err: + if (sg) { + sg_free_table(sg); + kfree(sg); + } return ret; } @@ -1362,6 +1404,14 @@ int amdgpu_amdkfd_gpuvm_free_memory_of_gpu( /* Free the sync object */ amdgpu_sync_free(&mem->sync); + /* If the SG is not NULL, it's one we created for a doorbell + * BO. We need to free it. + */ + if (mem->bo->tbo.sg) { + sg_free_table(mem->bo->tbo.sg); + kfree(mem->bo->tbo.sg); + } + /* Free the BO*/ amdgpu_bo_unref(&mem->bo); mutex_destroy(&mem->lock); @@ -1664,6 +1714,60 @@ int amdgpu_amdkfd_gpuvm_get_vm_fault_info(struct kgd_dev *kgd, return 0; } +int amdgpu_amdkfd_gpuvm_import_dmabuf(struct kgd_dev *kgd, + struct dma_buf *dma_buf, + uint64_t va, void *vm, + struct kgd_mem **mem, uint64_t *size, + uint64_t *mmap_offset) +{ + struct amdgpu_device *adev = (struct amdgpu_device *)kgd; + struct drm_gem_object *obj; + struct amdgpu_bo *bo; + struct amdgpu_vm *avm = (struct amdgpu_vm *)vm; + + if (dma_buf->ops != &amdgpu_dmabuf_ops) + /* Can't handle non-graphics buffers */ + return -EINVAL; + + obj = dma_buf->priv; + if (obj->dev->dev_private != adev) + /* Can't handle buffers from other devices */ + return -EINVAL; + + bo = gem_to_amdgpu_bo(obj); + if (!(bo->preferred_domains & (AMDGPU_GEM_DOMAIN_VRAM | + AMDGPU_GEM_DOMAIN_GTT))) + /* Only VRAM and GTT BOs are supported */ + return -EINVAL; + + *mem = kzalloc(sizeof(struct kgd_mem), GFP_KERNEL); + if (!*mem) + return -ENOMEM; + + if (size) + *size = amdgpu_bo_size(bo); + + if (mmap_offset) + *mmap_offset = amdgpu_bo_mmap_offset(bo); + + INIT_LIST_HEAD(&(*mem)->bo_va_list); + mutex_init(&(*mem)->lock); + (*mem)->mapping_flags = + AMDGPU_VM_PAGE_READABLE | AMDGPU_VM_PAGE_WRITEABLE | + AMDGPU_VM_PAGE_EXECUTABLE | AMDGPU_VM_MTYPE_NC; + + (*mem)->bo = amdgpu_bo_ref(bo); + (*mem)->va = va; + (*mem)->domain = (bo->preferred_domains & AMDGPU_GEM_DOMAIN_VRAM) ? + AMDGPU_GEM_DOMAIN_VRAM : AMDGPU_GEM_DOMAIN_GTT; + (*mem)->mapped_to_gpu_memory = 0; + (*mem)->process_info = avm->process_info; + add_kgd_mem_to_kfd_bo_list(*mem, avm->process_info, false); + amdgpu_sync_create(&(*mem)->sync); + + return 0; +} + /* Evict a userptr BO by stopping the queues if necessary * * Runs in MMU notifier, may be in RECLAIM_FS context. This means it @@ -1830,7 +1934,7 @@ static int validate_invalid_user_pages(struct amdkfd_process_info *process_info) validate_list.head) { list_add_tail(&mem->resv_list.head, &resv_list); mem->resv_list.bo = mem->validate_list.bo; - mem->resv_list.shared = mem->validate_list.shared; + mem->resv_list.num_shared = mem->validate_list.num_shared; } /* Reserve all BOs and page tables for validation */ @@ -2049,7 +2153,7 @@ int amdgpu_amdkfd_gpuvm_restore_process_bos(void *info, struct dma_fence **ef) list_add_tail(&mem->resv_list.head, &ctx.list); mem->resv_list.bo = mem->validate_list.bo; - mem->resv_list.shared = mem->validate_list.shared; + mem->resv_list.num_shared = mem->validate_list.num_shared; } ret = ttm_eu_reserve_buffers(&ctx.ticket, &ctx.list, |