diff options
author | Alex Deucher <alexander.deucher@amd.com> | 2019-03-28 10:15:26 -0500 |
---|---|---|
committer | Alex Deucher <alexander.deucher@amd.com> | 2019-03-28 10:15:26 -0500 |
commit | 318c3f4bf337d904d442997274d8b63c8f1ea1ae (patch) | |
tree | 7f1d1eb4bd4de6e4b63c30df5b30f4dc0a039063 /drivers | |
parent | 8944042dece3ed80cd7320ef1f0de8920220ca49 (diff) |
Revert "drm/amdgpu: replace get_user_pages with HMM mirror helpers"
This reverts commit 915d3eecfa23693bac9e54cdacf84fb4efdcc5c4.
This depends on an HMM fix which is not upstream yet.
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
Diffstat (limited to 'drivers')
-rw-r--r-- | drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h | 1 | ||||
-rw-r--r-- | drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c | 95 | ||||
-rw-r--r-- | drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.h | 2 | ||||
-rw-r--r-- | drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c | 138 | ||||
-rw-r--r-- | drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c | 14 | ||||
-rw-r--r-- | drivers/gpu/drm/amd/amdgpu/amdgpu_mn.c | 25 | ||||
-rw-r--r-- | drivers/gpu/drm/amd/amdgpu/amdgpu_mn.h | 4 | ||||
-rw-r--r-- | drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c | 178 | ||||
-rw-r--r-- | drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h | 3 |
9 files changed, 278 insertions, 182 deletions
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h index e6a503760b62..775f815f9521 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h @@ -61,6 +61,7 @@ struct kgd_mem { atomic_t invalid; struct amdkfd_process_info *process_info; + struct page **user_pages; struct amdgpu_sync sync; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c index 314c048fcac6..a6e5184d436c 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c @@ -491,12 +491,28 @@ static int init_user_pages(struct kgd_mem *mem, struct mm_struct *mm, goto out; } - ret = amdgpu_ttm_tt_get_user_pages(bo->tbo.ttm, bo->tbo.ttm->pages); + /* If no restore worker is running concurrently, user_pages + * should not be allocated + */ + WARN(mem->user_pages, "Leaking user_pages array"); + + mem->user_pages = kvmalloc_array(bo->tbo.ttm->num_pages, + sizeof(struct page *), + GFP_KERNEL | __GFP_ZERO); + if (!mem->user_pages) { + pr_err("%s: Failed to allocate pages array\n", __func__); + ret = -ENOMEM; + goto unregister_out; + } + + ret = amdgpu_ttm_tt_get_user_pages(bo->tbo.ttm, mem->user_pages); if (ret) { pr_err("%s: Failed to get user pages: %d\n", __func__, ret); - goto unregister_out; + goto free_out; } + amdgpu_ttm_tt_set_user_pages(bo->tbo.ttm, mem->user_pages); + ret = amdgpu_bo_reserve(bo, true); if (ret) { pr_err("%s: Failed to reserve BO\n", __func__); @@ -509,7 +525,11 @@ static int init_user_pages(struct kgd_mem *mem, struct mm_struct *mm, amdgpu_bo_unreserve(bo); release_out: - amdgpu_ttm_tt_get_user_pages_done(bo->tbo.ttm); + if (ret) + release_pages(mem->user_pages, bo->tbo.ttm->num_pages); +free_out: + kvfree(mem->user_pages); + mem->user_pages = NULL; unregister_out: if (ret) amdgpu_mn_unregister(bo); @@ -568,6 +588,7 @@ static int reserve_bo_and_vm(struct kgd_mem *mem, ctx->kfd_bo.priority = 0; ctx->kfd_bo.tv.bo = &bo->tbo; ctx->kfd_bo.tv.num_shared = 1; + ctx->kfd_bo.user_pages = NULL; list_add(&ctx->kfd_bo.tv.head, &ctx->list); amdgpu_vm_get_pd_bo(vm, &ctx->list, &ctx->vm_pd[0]); @@ -631,6 +652,7 @@ static int reserve_bo_and_cond_vms(struct kgd_mem *mem, ctx->kfd_bo.priority = 0; ctx->kfd_bo.tv.bo = &bo->tbo; ctx->kfd_bo.tv.num_shared = 1; + ctx->kfd_bo.user_pages = NULL; list_add(&ctx->kfd_bo.tv.head, &ctx->list); i = 0; @@ -1240,6 +1262,15 @@ int amdgpu_amdkfd_gpuvm_free_memory_of_gpu( list_del(&bo_list_entry->head); mutex_unlock(&process_info->lock); + /* Free user pages if necessary */ + if (mem->user_pages) { + pr_debug("%s: Freeing user_pages array\n", __func__); + if (mem->user_pages[0]) + release_pages(mem->user_pages, + mem->bo->tbo.ttm->num_pages); + kvfree(mem->user_pages); + } + ret = reserve_bo_and_cond_vms(mem, NULL, BO_VM_ALL, &ctx); if (unlikely(ret)) return ret; @@ -1713,11 +1744,25 @@ static int update_invalid_user_pages(struct amdkfd_process_info *process_info, bo = mem->bo; + if (!mem->user_pages) { + mem->user_pages = + kvmalloc_array(bo->tbo.ttm->num_pages, + sizeof(struct page *), + GFP_KERNEL | __GFP_ZERO); + if (!mem->user_pages) { + pr_err("%s: Failed to allocate pages array\n", + __func__); + return -ENOMEM; + } + } else if (mem->user_pages[0]) { + release_pages(mem->user_pages, bo->tbo.ttm->num_pages); + } + /* Get updated user pages */ ret = amdgpu_ttm_tt_get_user_pages(bo->tbo.ttm, - bo->tbo.ttm->pages); + mem->user_pages); if (ret) { - bo->tbo.ttm->pages[0] = NULL; + mem->user_pages[0] = NULL; pr_info("%s: Failed to get user pages: %d\n", __func__, ret); /* Pretend it succeeded. It will fail later @@ -1726,6 +1771,12 @@ static int update_invalid_user_pages(struct amdkfd_process_info *process_info, * stalled user mode queues. */ } + + /* Mark the BO as valid unless it was invalidated + * again concurrently + */ + if (atomic_cmpxchg(&mem->invalid, invalid, 0) != invalid) + return -EAGAIN; } return 0; @@ -1755,8 +1806,7 @@ static int validate_invalid_user_pages(struct amdkfd_process_info *process_info) GFP_KERNEL); if (!pd_bo_list_entries) { pr_err("%s: Failed to allocate PD BO list entries\n", __func__); - ret = -ENOMEM; - goto out_no_mem; + return -ENOMEM; } INIT_LIST_HEAD(&resv_list); @@ -1780,7 +1830,7 @@ static int validate_invalid_user_pages(struct amdkfd_process_info *process_info) ret = ttm_eu_reserve_buffers(&ticket, &resv_list, false, &duplicates); WARN(!list_empty(&duplicates), "Duplicates should be empty"); if (ret) - goto out_free; + goto out; amdgpu_sync_create(&sync); @@ -1796,8 +1846,10 @@ static int validate_invalid_user_pages(struct amdkfd_process_info *process_info) bo = mem->bo; - /* Validate the BO if we got user pages */ - if (bo->tbo.ttm->pages[0]) { + /* Copy pages array and validate the BO if we got user pages */ + if (mem->user_pages[0]) { + amdgpu_ttm_tt_set_user_pages(bo->tbo.ttm, + mem->user_pages); amdgpu_bo_placement_from_domain(bo, mem->domain); ret = ttm_bo_validate(&bo->tbo, &bo->placement, &ctx); if (ret) { @@ -1806,16 +1858,16 @@ static int validate_invalid_user_pages(struct amdkfd_process_info *process_info) } } + /* Validate succeeded, now the BO owns the pages, free + * our copy of the pointer array. Put this BO back on + * the userptr_valid_list. If we need to revalidate + * it, we need to start from scratch. + */ + kvfree(mem->user_pages); + mem->user_pages = NULL; list_move_tail(&mem->validate_list.head, &process_info->userptr_valid_list); - /* Stop HMM track the userptr update. We dont check the return - * value for concurrent CPU page table update because we will - * reschedule the restore worker if process_info->evicted_bos - * is updated. - */ - amdgpu_ttm_tt_get_user_pages_done(bo->tbo.ttm); - /* Update mapping. If the BO was not validated * (because we couldn't get user pages), this will * clear the page table entries, which will result in @@ -1845,15 +1897,8 @@ unreserve_out: ttm_eu_backoff_reservation(&ticket, &resv_list); amdgpu_sync_wait(&sync, false); amdgpu_sync_free(&sync); -out_free: +out: kfree(pd_bo_list_entries); -out_no_mem: - list_for_each_entry_safe(mem, tmp_mem, - &process_info->userptr_inval_list, - validate_list.head) { - bo = mem->bo; - amdgpu_ttm_tt_get_user_pages_done(bo->tbo.ttm); - } return ret; } diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.h index a130e766cbdb..7c5f5d1601e6 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.h @@ -36,7 +36,7 @@ struct amdgpu_bo_list_entry { struct amdgpu_bo_va *bo_va; uint32_t priority; struct page **user_pages; - bool user_invalidated; + int user_invalidated; }; struct amdgpu_bo_list { diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c index 545302d0955f..52a5e4fdc95b 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c @@ -52,6 +52,7 @@ static int amdgpu_cs_user_fence_chunk(struct amdgpu_cs_parser *p, p->uf_entry.tv.bo = &bo->tbo; /* One for TTM and one for the CS job */ p->uf_entry.tv.num_shared = 2; + p->uf_entry.user_pages = NULL; drm_gem_object_put_unlocked(gobj); @@ -539,14 +540,14 @@ static int amdgpu_cs_list_validate(struct amdgpu_cs_parser *p, if (usermm && usermm != current->mm) return -EPERM; - if (amdgpu_ttm_tt_is_userptr(bo->tbo.ttm) && - lobj->user_invalidated && lobj->user_pages) { + /* Check if we have user pages and nobody bound the BO already */ + if (amdgpu_ttm_tt_userptr_needs_pages(bo->tbo.ttm) && + lobj->user_pages) { amdgpu_bo_placement_from_domain(bo, AMDGPU_GEM_DOMAIN_CPU); r = ttm_bo_validate(&bo->tbo, &bo->placement, &ctx); if (r) return r; - amdgpu_ttm_tt_set_user_pages(bo->tbo.ttm, lobj->user_pages); binding_userptr = true; @@ -577,6 +578,7 @@ static int amdgpu_cs_parser_bos(struct amdgpu_cs_parser *p, struct amdgpu_bo *gds; struct amdgpu_bo *gws; struct amdgpu_bo *oa; + unsigned tries = 10; int r; INIT_LIST_HEAD(&p->validated); @@ -612,45 +614,79 @@ static int amdgpu_cs_parser_bos(struct amdgpu_cs_parser *p, if (p->uf_entry.tv.bo && !ttm_to_amdgpu_bo(p->uf_entry.tv.bo)->parent) list_add(&p->uf_entry.tv.head, &p->validated); - /* Get userptr backing pages. If pages are updated after registered - * in amdgpu_gem_userptr_ioctl(), amdgpu_cs_list_validate() will do - * amdgpu_ttm_backend_bind() to flush and invalidate new pages - */ - amdgpu_bo_list_for_each_userptr_entry(e, p->bo_list) { - struct amdgpu_bo *bo = ttm_to_amdgpu_bo(e->tv.bo); - bool userpage_invalidated = false; - int i; - - e->user_pages = kvmalloc_array(bo->tbo.ttm->num_pages, - sizeof(struct page *), - GFP_KERNEL | __GFP_ZERO); - if (!e->user_pages) { - DRM_ERROR("calloc failure\n"); - return -ENOMEM; + while (1) { + struct list_head need_pages; + + r = ttm_eu_reserve_buffers(&p->ticket, &p->validated, true, + &duplicates); + if (unlikely(r != 0)) { + if (r != -ERESTARTSYS) + DRM_ERROR("ttm_eu_reserve_buffers failed.\n"); + goto error_free_pages; } - r = amdgpu_ttm_tt_get_user_pages(bo->tbo.ttm, e->user_pages); - if (r) { - kvfree(e->user_pages); - e->user_pages = NULL; - return r; + INIT_LIST_HEAD(&need_pages); + amdgpu_bo_list_for_each_userptr_entry(e, p->bo_list) { + struct amdgpu_bo *bo = ttm_to_amdgpu_bo(e->tv.bo); + + if (amdgpu_ttm_tt_userptr_invalidated(bo->tbo.ttm, + &e->user_invalidated) && e->user_pages) { + + /* We acquired a page array, but somebody + * invalidated it. Free it and try again + */ + release_pages(e->user_pages, + bo->tbo.ttm->num_pages); + kvfree(e->user_pages); + e->user_pages = NULL; + } + + if (amdgpu_ttm_tt_userptr_needs_pages(bo->tbo.ttm) && + !e->user_pages) { + list_del(&e->tv.head); + list_add(&e->tv.head, &need_pages); + + amdgpu_bo_unreserve(bo); + } + } + + if (list_empty(&need_pages)) + break; + + /* Unreserve everything again. */ + ttm_eu_backoff_reservation(&p->ticket, &p->validated); + + /* We tried too many times, just abort */ + if (!--tries) { + r = -EDEADLK; + DRM_ERROR("deadlock in %s\n", __func__); + goto error_free_pages; } - for (i = 0; i < bo->tbo.ttm->num_pages; i++) { - if (bo->tbo.ttm->pages[i] != e->user_pages[i]) { - userpage_invalidated = true; - break; + /* Fill the page arrays for all userptrs. */ + list_for_each_entry(e, &need_pages, tv.head) { + struct ttm_tt *ttm = e->tv.bo->ttm; + + e->user_pages = kvmalloc_array(ttm->num_pages, + sizeof(struct page*), + GFP_KERNEL | __GFP_ZERO); + if (!e->user_pages) { + r = -ENOMEM; + DRM_ERROR("calloc failure in %s\n", __func__); + goto error_free_pages; + } + + r = amdgpu_ttm_tt_get_user_pages(ttm, e->user_pages); + if (r) { + DRM_ERROR("amdgpu_ttm_tt_get_user_pages failed.\n"); + kvfree(e->user_pages); + e->user_pages = NULL; + goto error_free_pages; } } - e->user_invalidated = userpage_invalidated; - } - r = ttm_eu_reserve_buffers(&p->ticket, &p->validated, true, - &duplicates); - if (unlikely(r != 0)) { - if (r != -ERESTARTSYS) - DRM_ERROR("ttm_eu_reserve_buffers failed.\n"); - goto out; + /* And try again. */ + list_splice(&need_pages, &p->validated); } amdgpu_cs_get_threshold_for_moves(p->adev, &p->bytes_moved_threshold, @@ -719,7 +755,17 @@ static int amdgpu_cs_parser_bos(struct amdgpu_cs_parser *p, error_validate: if (r) ttm_eu_backoff_reservation(&p->ticket, &p->validated); -out: + +error_free_pages: + + amdgpu_bo_list_for_each_userptr_entry(e, p->bo_list) { + if (!e->user_pages) + continue; + + release_pages(e->user_pages, e->tv.bo->ttm->num_pages); + kvfree(e->user_pages); + } + return r; } @@ -1178,6 +1224,7 @@ static int amdgpu_cs_submit(struct amdgpu_cs_parser *p, struct amdgpu_bo_list_entry *e; struct amdgpu_job *job; uint64_t seq; + int r; job = p->job; @@ -1187,23 +1234,15 @@ static int amdgpu_cs_submit(struct amdgpu_cs_parser *p, if (r) goto error_unlock; - /* No memory allocation is allowed while holding the mn lock. - * p->mn is hold until amdgpu_cs_submit is finished and fence is added - * to BOs. - */ + /* No memory allocation is allowed while holding the mn lock */ amdgpu_mn_lock(p->mn); - - /* If userptr are invalidated after amdgpu_cs_parser_bos(), return - * -EAGAIN, drmIoctl in libdrm will restart the amdgpu_cs_ioctl. - */ amdgpu_bo_list_for_each_userptr_entry(e, p->bo_list) { struct amdgpu_bo *bo = ttm_to_amdgpu_bo(e->tv.bo); - r |= !amdgpu_ttm_tt_get_user_pages_done(bo->tbo.ttm); - } - if (r) { - r = -EAGAIN; - goto error_abort; + if (amdgpu_ttm_tt_userptr_needs_pages(bo->tbo.ttm)) { + r = -ERESTARTSYS; + goto error_abort; + } } job->owner = p->filp; @@ -1299,7 +1338,6 @@ int amdgpu_cs_ioctl(struct drm_device *dev, void *data, struct drm_file *filp) out: amdgpu_cs_parser_fini(&parser, r, reserved_buffers); - return r; } diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c index 9ee8d7a3c6d4..61107cfc9af6 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c @@ -329,24 +329,26 @@ int amdgpu_gem_userptr_ioctl(struct drm_device *dev, void *data, r = amdgpu_bo_reserve(bo, true); if (r) - goto user_pages_done; + goto free_pages; amdgpu_bo_placement_from_domain(bo, AMDGPU_GEM_DOMAIN_GTT); r = ttm_bo_validate(&bo->tbo, &bo->placement, &ctx); amdgpu_bo_unreserve(bo); if (r) - goto user_pages_done; + goto free_pages; } r = drm_gem_handle_create(filp, gobj, &handle); + /* drop reference from allocate - handle holds it now */ + drm_gem_object_put_unlocked(gobj); if (r) - goto user_pages_done; + return r; args->handle = handle; + return 0; -user_pages_done: - if (args->flags & AMDGPU_GEM_USERPTR_VALIDATE) - amdgpu_ttm_tt_get_user_pages_done(bo->tbo.ttm); +free_pages: + release_pages(bo->tbo.ttm->pages, bo->tbo.ttm->num_pages); release_object: drm_gem_object_put_unlocked(gobj); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_mn.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_mn.c index 41ccee49a224..f000704f984d 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_mn.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_mn.c @@ -220,6 +220,8 @@ static void amdgpu_mn_invalidate_node(struct amdgpu_mn_node *node, true, false, MAX_SCHEDULE_TIMEOUT); if (r <= 0) DRM_ERROR("(%ld) failed to wait for user bo\n", r); + + amdgpu_ttm_tt_mark_user_pages(bo->tbo.ttm); } } @@ -500,26 +502,3 @@ void amdgpu_mn_unregister(struct amdgpu_bo *bo) mutex_unlock(&adev->mn_lock); } -/* flags used by HMM internal, not related to CPU/GPU PTE flags */ -static const uint64_t hmm_range_flags[HMM_PFN_FLAG_MAX] = { - (1 << 0), /* HMM_PFN_VALID */ - (1 << 1), /* HMM_PFN_WRITE */ - 0 /* HMM_PFN_DEVICE_PRIVATE */ -}; - -static const uint64_t hmm_range_values[HMM_PFN_VALUE_MAX] = { - 0xfffffffffffffffeUL, /* HMM_PFN_ERROR */ - 0, /* HMM_PFN_NONE */ - 0xfffffffffffffffcUL /* HMM_PFN_SPECIAL */ -}; - -void amdgpu_hmm_init_range(struct hmm_range *range) -{ - if (range) { - range->flags = hmm_range_flags; - range->values = hmm_range_values; - range->pfn_shift = PAGE_SHIFT; - range->pfns = NULL; - INIT_LIST_HEAD(&range->list); - } -} diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_mn.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_mn.h index 4803e216e174..0a51fd00021c 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_mn.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_mn.h @@ -25,10 +25,9 @@ #define __AMDGPU_MN_H__ /* - * HMM mirror + * MMU Notifier */ struct amdgpu_mn; -struct hmm_range; enum amdgpu_mn_type { AMDGPU_MN_TYPE_GFX, @@ -42,7 +41,6 @@ struct amdgpu_mn *amdgpu_mn_get(struct amdgpu_device *adev, enum amdgpu_mn_type type); int amdgpu_mn_register(struct amdgpu_bo *bo, unsigned long addr); void amdgpu_mn_unregister(struct amdgpu_bo *bo); -void amdgpu_hmm_init_range(struct hmm_range *range); #else static inline void amdgpu_mn_lock(struct amdgpu_mn *mn) {} static inline void amdgpu_mn_unlock(struct amdgpu_mn *mn) {} diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c index ba781088e0e3..0c52d1f9fe0f 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c @@ -43,7 +43,6 @@ #include <linux/pagemap.h> #include <linux/debugfs.h> #include <linux/iommu.h> -#include <linux/hmm.h> #include "amdgpu.h" #include "amdgpu_object.h" #include "amdgpu_trace.h" @@ -704,102 +703,98 @@ static unsigned long amdgpu_ttm_io_mem_pfn(struct ttm_buffer_object *bo, /* * TTM backend functions. */ +struct amdgpu_ttm_gup_task_list { + struct list_head list; + struct task_struct *task; +}; + struct amdgpu_ttm_tt { struct ttm_dma_tt ttm; u64 offset; uint64_t userptr; struct task_struct *usertask; uint32_t userflags; - struct hmm_range range; + spinlock_t guptasklock; + struct list_head guptasks; + atomic_t mmu_invalidations; + uint32_t last_set_pages; }; /** - * amdgpu_ttm_tt_get_user_pages - get device accessible pages that back user - * memory and start HMM tracking CPU page table update + * amdgpu_ttm_tt_get_user_pages - Pin pages of memory pointed to by a USERPTR + * pointer to memory * - * Calling function must call amdgpu_ttm_tt_userptr_range_done() once and only - * once afterwards to stop HMM tracking + * Called by amdgpu_gem_userptr_ioctl() and amdgpu_cs_parser_bos(). + * This provides a wrapper around the get_user_pages() call to provide + * device accessible pages that back user memory. */ int amdgpu_ttm_tt_get_user_pages(struct ttm_tt *ttm, struct page **pages) { struct amdgpu_ttm_tt *gtt = (void *)ttm; struct mm_struct *mm = gtt->usertask->mm; - unsigned long end = gtt->userptr + ttm->num_pages * PAGE_SIZE; - struct hmm_range *range = >t->range; - int r = 0, i; + unsigned int flags = 0; + unsigned pinned = 0; + int r; if (!mm) /* Happens during process shutdown */ return -ESRCH; - amdgpu_hmm_init_range(range); + if (!(gtt->userflags & AMDGPU_GEM_USERPTR_READONLY)) + flags |= FOLL_WRITE; down_read(&mm->mmap_sem); - range->vma = find_vma(mm, gtt->userptr); - if (!range_in_vma(range->vma, gtt->userptr, end)) - r = -EFAULT; - else if ((gtt->userflags & AMDGPU_GEM_USERPTR_ANONONLY) && - range->vma->vm_file) - r = -EPERM; - if (r) - goto out; + if (gtt->userflags & AMDGPU_GEM_USERPTR_ANONONLY) { + /* + * check that we only use anonymous memory to prevent problems + * with writeback + */ + unsigned long end = gtt->userptr + ttm->num_pages * PAGE_SIZE; + struct vm_area_struct *vma; - range->pfns = kvmalloc_array(ttm->num_pages, sizeof(uint64_t), - GFP_KERNEL); - if (range->pfns == NULL) { - r = -ENOMEM; - goto out; + vma = find_vma(mm, gtt->userptr); + if (!vma || vma->vm_file || vma->vm_end < end) { + up_read(&mm->mmap_sem); + return -EPERM; + } } - range->start = gtt->userptr; - range->end = end; - range->pfns[0] = range->flags[HMM_PFN_VALID]; - range->pfns[0] |= amdgpu_ttm_tt_is_readonly(ttm) ? - 0 : range->flags[HMM_PFN_WRITE]; - for (i = 1; i < ttm->num_pages; i++) - range->pfns[i] = range->pfns[0]; + /* loop enough times using contiguous pages of memory */ + do { + unsigned num_pages = ttm->num_pages - pinned; + uint64_t userptr = gtt->userptr + pinned * PAGE_SIZE; + struct page **p = pages + pinned; + struct amdgpu_ttm_gup_task_list guptask; - /* This may trigger page table update */ - r = hmm_vma_fault(range, true); - if (r) - goto out_free_pfns; + guptask.task = current; + spin_lock(>t->guptasklock); + list_add(&guptask.list, >t->guptasks); + spin_unlock(>t->guptasklock); - up_read(&mm->mmap_sem); + if (mm == current->mm) + r = get_user_pages(userptr, num_pages, flags, p, NULL); + else + r = get_user_pages_remote(gtt->usertask, + mm, userptr, num_pages, + flags, p, NULL, NULL); - for (i = 0; i < ttm->num_pages; i++) - pages[i] = hmm_pfn_to_page(range, range->pfns[i]); - - return 0; + spin_lock(>t->guptasklock); + list_del(&guptask.list); + spin_unlock(>t->guptasklock); -out_free_pfns: - kvfree(range->pfns); - range->pfns = NULL; -out: - up_read(&mm->mmap_sem); - return r; -} + if (r < 0) + goto release_pages; -/** - * amdgpu_ttm_tt_userptr_range_done - stop HMM track the CPU page table change - * Check if the pages backing this ttm range have been invalidated - * - * Returns: true if pages are still valid - */ -bool amdgpu_ttm_tt_get_user_pages_done(struct ttm_tt *ttm) -{ - struct amdgpu_ttm_tt *gtt = (void *)ttm; - bool r = false; + pinned += r; - if (!gtt || !gtt->userptr) - return false; + } while (pinned < ttm->num_pages); - WARN_ONCE(!gtt->range.pfns, "No user pages to check\n"); - if (gtt->range.pfns) { - r = hmm_vma_range_done(>t->range); - kvfree(gtt->range.pfns); - gtt->range.pfns = NULL; - } + up_read(&mm->mmap_sem); + return 0; +release_pages: + release_pages(pages, pinned); + up_read(&mm->mmap_sem); return r; } @@ -812,10 +807,16 @@ bool amdgpu_ttm_tt_get_user_pages_done(struct ttm_tt *ttm) */ void amdgpu_ttm_tt_set_user_pages(struct ttm_tt *ttm, struct page **pages) { + struct amdgpu_ttm_tt *gtt = (void *)ttm; unsigned i; - for (i = 0; i < ttm->num_pages; ++i) + gtt->last_set_pages = atomic_read(>t->mmu_invalidations); + for (i = 0; i < ttm->num_pages; ++i) { + if (ttm->pages[i]) + put_page(ttm->pages[i]); + ttm->pages[i] = pages ? pages[i] : NULL; + } } /** @@ -900,11 +901,10 @@ static void amdgpu_ttm_tt_unpin_userptr(struct ttm_tt *ttm) /* unmap the pages mapped to the device */ dma_unmap_sg(adev->dev, ttm->sg->sgl, ttm->sg->nents, direction); - sg_free_table(ttm->sg); + /* mark the pages as dirty */ + amdgpu_ttm_tt_mark_user_pages(ttm); - if (gtt->range.pfns && - ttm->pages[0] == hmm_pfn_to_page(>t->range, gtt->range.pfns[0])) - WARN_ONCE(1, "Missing get_user_page_done\n"); + sg_free_table(ttm->sg); } int amdgpu_ttm_gart_bind(struct amdgpu_device *adev, @@ -1254,6 +1254,11 @@ int amdgpu_ttm_tt_set_userptr(struct ttm_tt *ttm, uint64_t addr, gtt->usertask = current->group_leader; get_task_struct(gtt->usertask); + spin_lock_init(>t->guptasklock); + INIT_LIST_HEAD(>t->guptasks); + atomic_set(>t->mmu_invalidations, 0); + gtt->last_set_pages = 0; + return 0; } @@ -1282,6 +1287,7 @@ bool amdgpu_ttm_tt_affect_userptr(struct ttm_tt *ttm, unsigned long start, unsigned long end) { struct amdgpu_ttm_tt *gtt = (void *)ttm; + struct amdgpu_ttm_gup_task_list *entry; unsigned long size; if (gtt == NULL || !gtt->userptr) @@ -1294,20 +1300,48 @@ bool amdgpu_ttm_tt_affect_userptr(struct ttm_tt *ttm, unsigned long start, if (gtt->userptr > end || gtt->userptr + size <= start) return false; + /* Search the lists of tasks that hold this mapping and see + * if current is one of them. If it is return false. + */ + spin_lock(>t->guptasklock); + list_for_each_entry(entry, >t->guptasks, list) { + if (entry->task == current) { + spin_unlock(>t->guptasklock); + return false; + } + } + spin_unlock(>t->guptasklock); + + atomic_inc(>t->mmu_invalidations); + return true; } /** - * amdgpu_ttm_tt_is_userptr - Have the pages backing by userptr? + * amdgpu_ttm_tt_userptr_invalidated - Has the ttm_tt object been invalidated? */ -bool amdgpu_ttm_tt_is_userptr(struct ttm_tt *ttm) +bool amdgpu_ttm_tt_userptr_invalidated(struct ttm_tt *ttm, + int *last_invalidated) +{ + struct amdgpu_ttm_tt *gtt = (void *)ttm; + int prev_invalidated = *last_invalidated; + + *last_invalidated = atomic_read(>t->mmu_invalidations); + return prev_invalidated != *last_invalidated; +} + +/** + * amdgpu_ttm_tt_userptr_needs_pages - Have the pages backing this ttm_tt object + * been invalidated since the last time they've been set? + */ +bool amdgpu_ttm_tt_userptr_needs_pages(struct ttm_tt *ttm) { struct amdgpu_ttm_tt *gtt = (void *)ttm; if (gtt == NULL || !gtt->userptr) return false; - return true; + return atomic_read(>t->mmu_invalidations) != gtt->last_set_pages; } /** diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h index 8988c87fff9d..b5b2d101f7db 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h @@ -102,7 +102,6 @@ int amdgpu_ttm_alloc_gart(struct ttm_buffer_object *bo); int amdgpu_ttm_recover_gart(struct ttm_buffer_object *tbo); int amdgpu_ttm_tt_get_user_pages(struct ttm_tt *ttm, struct page **pages); -bool amdgpu_ttm_tt_get_user_pages_done(struct ttm_tt *ttm); void amdgpu_ttm_tt_set_user_pages(struct ttm_tt *ttm, struct page **pages); void amdgpu_ttm_tt_mark_user_pages(struct ttm_tt *ttm); int amdgpu_ttm_tt_set_userptr(struct ttm_tt *ttm, uint64_t addr, @@ -113,7 +112,7 @@ bool amdgpu_ttm_tt_affect_userptr(struct ttm_tt *ttm, unsigned long start, unsigned long end); bool amdgpu_ttm_tt_userptr_invalidated(struct ttm_tt *ttm, int *last_invalidated); -bool amdgpu_ttm_tt_is_userptr(struct ttm_tt *ttm); +bool amdgpu_ttm_tt_userptr_needs_pages(struct ttm_tt *ttm); bool amdgpu_ttm_tt_is_readonly(struct ttm_tt *ttm); uint64_t amdgpu_ttm_tt_pde_flags(struct ttm_tt *ttm, struct ttm_mem_reg *mem); uint64_t amdgpu_ttm_tt_pte_flags(struct amdgpu_device *adev, struct ttm_tt *ttm, |