diff options
author | Thomas Gleixner <tglx@linutronix.de> | 2019-08-12 14:36:27 +0200 |
---|---|---|
committer | Thomas Gleixner <tglx@linutronix.de> | 2019-08-12 14:36:27 +0200 |
commit | cbd32a1c56e36fedaa93a727699188bd3e6e6f67 (patch) | |
tree | 199e302eb5a66725a9d1774e47367a87098ba397 /drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c | |
parent | 48c7d73b2362ce61503551ad70052617b3e8857d (diff) | |
parent | b61fbc887af7a13a1c90c84c1feaeb4c9780e1e2 (diff) |
Merge tag 'efi-urgent' of git://git.kernel.org/pub/scm/linux/kernel/git/efi/efi into efi/urgent
Pull a single EFI fix for v5.3 from Ard:
- Fix mixed mode breakage in EFI config table handling for TPM.
Diffstat (limited to 'drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c')
-rw-r--r-- | drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c | 365 |
1 files changed, 200 insertions, 165 deletions
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c index 0c52d1f9fe0f..e51b48ac48eb 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c @@ -29,20 +29,26 @@ * Thomas Hellstrom <thomas-at-tungstengraphics-dot-com> * Dave Airlie */ + +#include <linux/dma-mapping.h> +#include <linux/iommu.h> +#include <linux/hmm.h> +#include <linux/pagemap.h> +#include <linux/sched/task.h> +#include <linux/seq_file.h> +#include <linux/slab.h> +#include <linux/swap.h> +#include <linux/swiotlb.h> + #include <drm/ttm/ttm_bo_api.h> #include <drm/ttm/ttm_bo_driver.h> #include <drm/ttm/ttm_placement.h> #include <drm/ttm/ttm_module.h> #include <drm/ttm/ttm_page_alloc.h> -#include <drm/drmP.h> + +#include <drm/drm_debugfs.h> #include <drm/amdgpu_drm.h> -#include <linux/seq_file.h> -#include <linux/slab.h> -#include <linux/swiotlb.h> -#include <linux/swap.h> -#include <linux/pagemap.h> -#include <linux/debugfs.h> -#include <linux/iommu.h> + #include "amdgpu.h" #include "amdgpu_object.h" #include "amdgpu_trace.h" @@ -385,6 +391,7 @@ int amdgpu_ttm_copy_mem_to_mem(struct amdgpu_device *adev, src_node_start = amdgpu_mm_node_addr(src->bo, ++src_mm, src->mem); src_node_size = (src_mm->size << PAGE_SHIFT); + src_page_offset = 0; } else { src_node_start += cur_size; src_page_offset = src_node_start & (PAGE_SIZE - 1); @@ -394,6 +401,7 @@ int amdgpu_ttm_copy_mem_to_mem(struct amdgpu_device *adev, dst_node_start = amdgpu_mm_node_addr(dst->bo, ++dst_mm, dst->mem); dst_node_size = (dst_mm->size << PAGE_SHIFT); + dst_page_offset = 0; } else { dst_node_start += cur_size; dst_page_offset = dst_node_start & (PAGE_SIZE - 1); @@ -481,6 +489,7 @@ static int amdgpu_move_vram_ram(struct ttm_buffer_object *bo, bool evict, placements.flags = TTM_PL_MASK_CACHING | TTM_PL_FLAG_TT; r = ttm_bo_mem_space(bo, &placement, &tmp_mem, ctx); if (unlikely(r)) { + pr_err("Failed to find GTT space for blit from VRAM\n"); return r; } @@ -539,6 +548,7 @@ static int amdgpu_move_ram_vram(struct ttm_buffer_object *bo, bool evict, placements.flags = TTM_PL_MASK_CACHING | TTM_PL_FLAG_TT; r = ttm_bo_mem_space(bo, &placement, &tmp_mem, ctx); if (unlikely(r)) { + pr_err("Failed to find GTT space for blit to VRAM\n"); return r; } @@ -559,6 +569,30 @@ out_cleanup: } /** + * amdgpu_mem_visible - Check that memory can be accessed by ttm_bo_move_memcpy + * + * Called by amdgpu_bo_move() + */ +static bool amdgpu_mem_visible(struct amdgpu_device *adev, + struct ttm_mem_reg *mem) +{ + struct drm_mm_node *nodes = mem->mm_node; + + if (mem->mem_type == TTM_PL_SYSTEM || + mem->mem_type == TTM_PL_TT) + return true; + if (mem->mem_type != TTM_PL_VRAM) + return false; + + /* ttm_mem_reg_ioremap only supports contiguous memory */ + if (nodes->size != mem->num_pages) + return false; + + return ((nodes->start + nodes->size) << PAGE_SHIFT) + <= adev->gmc.visible_vram_size; +} + +/** * amdgpu_bo_move - Move a buffer object to a new memory location * * Called by ttm_bo_handle_move_mem() @@ -602,8 +636,10 @@ static int amdgpu_bo_move(struct ttm_buffer_object *bo, bool evict, return 0; } - if (!adev->mman.buffer_funcs_enabled) + if (!adev->mman.buffer_funcs_enabled) { + r = -ENODEV; goto memcpy; + } if (old_mem->mem_type == TTM_PL_VRAM && new_mem->mem_type == TTM_PL_SYSTEM) { @@ -618,10 +654,16 @@ static int amdgpu_bo_move(struct ttm_buffer_object *bo, bool evict, if (r) { memcpy: - r = ttm_bo_move_memcpy(bo, ctx, new_mem); - if (r) { + /* Check that all memory is CPU accessible */ + if (!amdgpu_mem_visible(adev, old_mem) || + !amdgpu_mem_visible(adev, new_mem)) { + pr_err("Move buffer fallback to memcpy unavailable\n"); return r; } + + r = ttm_bo_move_memcpy(bo, ctx, new_mem); + if (r) + return r; } if (bo->type == ttm_bo_type_device && @@ -703,143 +745,183 @@ static unsigned long amdgpu_ttm_io_mem_pfn(struct ttm_buffer_object *bo, /* * TTM backend functions. */ -struct amdgpu_ttm_gup_task_list { - struct list_head list; - struct task_struct *task; -}; - struct amdgpu_ttm_tt { struct ttm_dma_tt ttm; u64 offset; uint64_t userptr; struct task_struct *usertask; uint32_t userflags; - spinlock_t guptasklock; - struct list_head guptasks; - atomic_t mmu_invalidations; - uint32_t last_set_pages; +#if IS_ENABLED(CONFIG_DRM_AMDGPU_USERPTR) + struct hmm_range *range; +#endif }; /** - * amdgpu_ttm_tt_get_user_pages - Pin pages of memory pointed to by a USERPTR - * pointer to memory + * amdgpu_ttm_tt_get_user_pages - get device accessible pages that back user + * memory and start HMM tracking CPU page table update * - * Called by amdgpu_gem_userptr_ioctl() and amdgpu_cs_parser_bos(). - * This provides a wrapper around the get_user_pages() call to provide - * device accessible pages that back user memory. + * Calling function must call amdgpu_ttm_tt_userptr_range_done() once and only + * once afterwards to stop HMM tracking */ -int amdgpu_ttm_tt_get_user_pages(struct ttm_tt *ttm, struct page **pages) +#if IS_ENABLED(CONFIG_DRM_AMDGPU_USERPTR) + +#define MAX_RETRY_HMM_RANGE_FAULT 16 + +int amdgpu_ttm_tt_get_user_pages(struct amdgpu_bo *bo, struct page **pages) { + struct hmm_mirror *mirror = bo->mn ? &bo->mn->mirror : NULL; + struct ttm_tt *ttm = bo->tbo.ttm; struct amdgpu_ttm_tt *gtt = (void *)ttm; struct mm_struct *mm = gtt->usertask->mm; - unsigned int flags = 0; - unsigned pinned = 0; - int r; + unsigned long start = gtt->userptr; + struct vm_area_struct *vma; + struct hmm_range *range; + unsigned long i; + uint64_t *pfns; + int retry = 0; + int r = 0; if (!mm) /* Happens during process shutdown */ return -ESRCH; - if (!(gtt->userflags & AMDGPU_GEM_USERPTR_READONLY)) - flags |= FOLL_WRITE; + if (unlikely(!mirror)) { + DRM_DEBUG_DRIVER("Failed to get hmm_mirror\n"); + r = -EFAULT; + goto out; + } - down_read(&mm->mmap_sem); + vma = find_vma(mm, start); + if (unlikely(!vma || start < vma->vm_start)) { + r = -EFAULT; + goto out; + } + if (unlikely((gtt->userflags & AMDGPU_GEM_USERPTR_ANONONLY) && + vma->vm_file)) { + r = -EPERM; + goto out; + } - if (gtt->userflags & AMDGPU_GEM_USERPTR_ANONONLY) { - /* - * check that we only use anonymous memory to prevent problems - * with writeback - */ - unsigned long end = gtt->userptr + ttm->num_pages * PAGE_SIZE; - struct vm_area_struct *vma; + range = kzalloc(sizeof(*range), GFP_KERNEL); + if (unlikely(!range)) { + r = -ENOMEM; + goto out; + } - vma = find_vma(mm, gtt->userptr); - if (!vma || vma->vm_file || vma->vm_end < end) { - up_read(&mm->mmap_sem); - return -EPERM; - } + pfns = kvmalloc_array(ttm->num_pages, sizeof(*pfns), GFP_KERNEL); + if (unlikely(!pfns)) { + r = -ENOMEM; + goto out_free_ranges; } - /* loop enough times using contiguous pages of memory */ - do { - unsigned num_pages = ttm->num_pages - pinned; - uint64_t userptr = gtt->userptr + pinned * PAGE_SIZE; - struct page **p = pages + pinned; - struct amdgpu_ttm_gup_task_list guptask; + amdgpu_hmm_init_range(range); + range->default_flags = range->flags[HMM_PFN_VALID]; + range->default_flags |= amdgpu_ttm_tt_is_readonly(ttm) ? + 0 : range->flags[HMM_PFN_WRITE]; + range->pfn_flags_mask = 0; + range->pfns = pfns; + hmm_range_register(range, mirror, start, + start + ttm->num_pages * PAGE_SIZE, PAGE_SHIFT); - guptask.task = current; - spin_lock(>t->guptasklock); - list_add(&guptask.list, >t->guptasks); - spin_unlock(>t->guptasklock); +retry: + /* + * Just wait for range to be valid, safe to ignore return value as we + * will use the return value of hmm_range_fault() below under the + * mmap_sem to ascertain the validity of the range. + */ + hmm_range_wait_until_valid(range, HMM_RANGE_DEFAULT_TIMEOUT); + + down_read(&mm->mmap_sem); + + r = hmm_range_fault(range, true); + if (unlikely(r < 0)) { + if (likely(r == -EAGAIN)) { + /* + * return -EAGAIN, mmap_sem is dropped + */ + if (retry++ < MAX_RETRY_HMM_RANGE_FAULT) + goto retry; + else + pr_err("Retry hmm fault too many times\n"); + } - if (mm == current->mm) - r = get_user_pages(userptr, num_pages, flags, p, NULL); - else - r = get_user_pages_remote(gtt->usertask, - mm, userptr, num_pages, - flags, p, NULL, NULL); + goto out_up_read; + } - spin_lock(>t->guptasklock); - list_del(&guptask.list); - spin_unlock(>t->guptasklock); + up_read(&mm->mmap_sem); - if (r < 0) - goto release_pages; + for (i = 0; i < ttm->num_pages; i++) { + pages[i] = hmm_device_entry_to_page(range, pfns[i]); + if (unlikely(!pages[i])) { + pr_err("Page fault failed for pfn[%lu] = 0x%llx\n", + i, pfns[i]); + r = -ENOMEM; - pinned += r; + goto out_free_pfns; + } + } - } while (pinned < ttm->num_pages); + gtt->range = range; - up_read(&mm->mmap_sem); return 0; -release_pages: - release_pages(pages, pinned); - up_read(&mm->mmap_sem); +out_up_read: + if (likely(r != -EAGAIN)) + up_read(&mm->mmap_sem); +out_free_pfns: + hmm_range_unregister(range); + kvfree(pfns); +out_free_ranges: + kfree(range); +out: return r; } /** - * amdgpu_ttm_tt_set_user_pages - Copy pages in, putting old pages as necessary. + * amdgpu_ttm_tt_userptr_range_done - stop HMM track the CPU page table change + * Check if the pages backing this ttm range have been invalidated * - * Called by amdgpu_cs_list_validate(). This creates the page list - * that backs user memory and will ultimately be mapped into the device - * address space. + * Returns: true if pages are still valid */ -void amdgpu_ttm_tt_set_user_pages(struct ttm_tt *ttm, struct page **pages) +bool amdgpu_ttm_tt_get_user_pages_done(struct ttm_tt *ttm) { struct amdgpu_ttm_tt *gtt = (void *)ttm; - unsigned i; + bool r = false; - gtt->last_set_pages = atomic_read(>t->mmu_invalidations); - for (i = 0; i < ttm->num_pages; ++i) { - if (ttm->pages[i]) - put_page(ttm->pages[i]); + if (!gtt || !gtt->userptr) + return false; - ttm->pages[i] = pages ? pages[i] : NULL; + DRM_DEBUG_DRIVER("user_pages_done 0x%llx pages 0x%lx\n", + gtt->userptr, ttm->num_pages); + + WARN_ONCE(!gtt->range || !gtt->range->pfns, + "No user pages to check\n"); + + if (gtt->range) { + r = hmm_range_valid(gtt->range); + hmm_range_unregister(gtt->range); + + kvfree(gtt->range->pfns); + kfree(gtt->range); + gtt->range = NULL; } + + return r; } +#endif /** - * amdgpu_ttm_tt_mark_user_page - Mark pages as dirty + * amdgpu_ttm_tt_set_user_pages - Copy pages in, putting old pages as necessary. * - * Called while unpinning userptr pages + * Called by amdgpu_cs_list_validate(). This creates the page list + * that backs user memory and will ultimately be mapped into the device + * address space. */ -void amdgpu_ttm_tt_mark_user_pages(struct ttm_tt *ttm) +void amdgpu_ttm_tt_set_user_pages(struct ttm_tt *ttm, struct page **pages) { - struct amdgpu_ttm_tt *gtt = (void *)ttm; - unsigned i; - - for (i = 0; i < ttm->num_pages; ++i) { - struct page *page = ttm->pages[i]; - - if (!page) - continue; + unsigned long i; - if (!(gtt->userflags & AMDGPU_GEM_USERPTR_READONLY)) - set_page_dirty(page); - - mark_page_accessed(page); - } + for (i = 0; i < ttm->num_pages; ++i) + ttm->pages[i] = pages ? pages[i] : NULL; } /** @@ -901,10 +983,14 @@ static void amdgpu_ttm_tt_unpin_userptr(struct ttm_tt *ttm) /* unmap the pages mapped to the device */ dma_unmap_sg(adev->dev, ttm->sg->sgl, ttm->sg->nents, direction); - /* mark the pages as dirty */ - amdgpu_ttm_tt_mark_user_pages(ttm); - sg_free_table(ttm->sg); + +#if IS_ENABLED(CONFIG_DRM_AMDGPU_USERPTR) + if (gtt->range && + ttm->pages[0] == hmm_device_entry_to_page(gtt->range, + gtt->range->pfns[0])) + WARN_ONCE(1, "Missing get_user_page_done\n"); +#endif } int amdgpu_ttm_gart_bind(struct amdgpu_device *adev, @@ -925,8 +1011,8 @@ int amdgpu_ttm_gart_bind(struct amdgpu_device *adev, goto gart_bind_fail; /* Patch mtype of the second part BO */ - flags &= ~AMDGPU_PTE_MTYPE_MASK; - flags |= AMDGPU_PTE_MTYPE(AMDGPU_MTYPE_NC); + flags &= ~AMDGPU_PTE_MTYPE_VG10_MASK; + flags |= AMDGPU_PTE_MTYPE_VG10(AMDGPU_MTYPE_NC); r = amdgpu_gart_bind(adev, gtt->offset + (page_idx << PAGE_SHIFT), @@ -1254,11 +1340,6 @@ int amdgpu_ttm_tt_set_userptr(struct ttm_tt *ttm, uint64_t addr, gtt->usertask = current->group_leader; get_task_struct(gtt->usertask); - spin_lock_init(>t->guptasklock); - INIT_LIST_HEAD(>t->guptasks); - atomic_set(>t->mmu_invalidations, 0); - gtt->last_set_pages = 0; - return 0; } @@ -1287,7 +1368,6 @@ bool amdgpu_ttm_tt_affect_userptr(struct ttm_tt *ttm, unsigned long start, unsigned long end) { struct amdgpu_ttm_tt *gtt = (void *)ttm; - struct amdgpu_ttm_gup_task_list *entry; unsigned long size; if (gtt == NULL || !gtt->userptr) @@ -1300,48 +1380,20 @@ bool amdgpu_ttm_tt_affect_userptr(struct ttm_tt *ttm, unsigned long start, if (gtt->userptr > end || gtt->userptr + size <= start) return false; - /* Search the lists of tasks that hold this mapping and see - * if current is one of them. If it is return false. - */ - spin_lock(>t->guptasklock); - list_for_each_entry(entry, >t->guptasks, list) { - if (entry->task == current) { - spin_unlock(>t->guptasklock); - return false; - } - } - spin_unlock(>t->guptasklock); - - atomic_inc(>t->mmu_invalidations); - return true; } /** - * amdgpu_ttm_tt_userptr_invalidated - Has the ttm_tt object been invalidated? + * amdgpu_ttm_tt_is_userptr - Have the pages backing by userptr? */ -bool amdgpu_ttm_tt_userptr_invalidated(struct ttm_tt *ttm, - int *last_invalidated) -{ - struct amdgpu_ttm_tt *gtt = (void *)ttm; - int prev_invalidated = *last_invalidated; - - *last_invalidated = atomic_read(>t->mmu_invalidations); - return prev_invalidated != *last_invalidated; -} - -/** - * amdgpu_ttm_tt_userptr_needs_pages - Have the pages backing this ttm_tt object - * been invalidated since the last time they've been set? - */ -bool amdgpu_ttm_tt_userptr_needs_pages(struct ttm_tt *ttm) +bool amdgpu_ttm_tt_is_userptr(struct ttm_tt *ttm) { struct amdgpu_ttm_tt *gtt = (void *)ttm; if (gtt == NULL || !gtt->userptr) return false; - return atomic_read(>t->mmu_invalidations) != gtt->last_set_pages; + return true; } /** @@ -1753,44 +1805,26 @@ int amdgpu_ttm_init(struct amdgpu_device *adev) /* Initialize various on-chip memory pools */ r = ttm_bo_init_mm(&adev->mman.bdev, AMDGPU_PL_GDS, - adev->gds.mem.total_size); + adev->gds.gds_size); if (r) { DRM_ERROR("Failed initializing GDS heap.\n"); return r; } - r = amdgpu_bo_create_kernel(adev, adev->gds.mem.gfx_partition_size, - 4, AMDGPU_GEM_DOMAIN_GDS, - &adev->gds.gds_gfx_bo, NULL, NULL); - if (r) - return r; - r = ttm_bo_init_mm(&adev->mman.bdev, AMDGPU_PL_GWS, - adev->gds.gws.total_size); + adev->gds.gws_size); if (r) { DRM_ERROR("Failed initializing gws heap.\n"); return r; } - r = amdgpu_bo_create_kernel(adev, adev->gds.gws.gfx_partition_size, - 1, AMDGPU_GEM_DOMAIN_GWS, - &adev->gds.gws_gfx_bo, NULL, NULL); - if (r) - return r; - r = ttm_bo_init_mm(&adev->mman.bdev, AMDGPU_PL_OA, - adev->gds.oa.total_size); + adev->gds.oa_size); if (r) { DRM_ERROR("Failed initializing oa heap.\n"); return r; } - r = amdgpu_bo_create_kernel(adev, adev->gds.oa.gfx_partition_size, - 1, AMDGPU_GEM_DOMAIN_OA, - &adev->gds.oa_gfx_bo, NULL, NULL); - if (r) - return r; - /* Register debugfs entries for amdgpu_ttm */ r = amdgpu_ttm_debugfs_init(adev); if (r) { @@ -2061,9 +2095,9 @@ int amdgpu_fill_buffer(struct amdgpu_bo *bo, mm_node = bo->tbo.mem.mm_node; num_loops = 0; while (num_pages) { - uint32_t byte_count = mm_node->size << PAGE_SHIFT; + uint64_t byte_count = mm_node->size << PAGE_SHIFT; - num_loops += DIV_ROUND_UP(byte_count, max_bytes); + num_loops += DIV_ROUND_UP_ULL(byte_count, max_bytes); num_pages -= mm_node->size; ++mm_node; } @@ -2089,12 +2123,13 @@ int amdgpu_fill_buffer(struct amdgpu_bo *bo, mm_node = bo->tbo.mem.mm_node; while (num_pages) { - uint32_t byte_count = mm_node->size << PAGE_SHIFT; + uint64_t byte_count = mm_node->size << PAGE_SHIFT; uint64_t dst_addr; dst_addr = amdgpu_mm_node_addr(&bo->tbo, mm_node, &bo->tbo.mem); while (byte_count) { - uint32_t cur_size_in_bytes = min(byte_count, max_bytes); + uint32_t cur_size_in_bytes = min_t(uint64_t, byte_count, + max_bytes); amdgpu_emit_fill_buffer(adev, &job->ibs[0], src_data, dst_addr, cur_size_in_bytes); |