diff options
author | Dave Airlie <airlied@redhat.com> | 2020-05-08 13:31:06 +1000 |
---|---|---|
committer | Dave Airlie <airlied@redhat.com> | 2020-05-08 13:31:08 +1000 |
commit | 370fb6b0aaf07c66a3317d5b35fba4345b31035c (patch) | |
tree | b68550f0d7b340440dc4048ed007b59ffa563e60 /drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c | |
parent | 937eea297e26effac6809a0bf8c20e6ca9d90b9a (diff) | |
parent | b8020b0304c8f44e5e29f0b1a04d31e0bf68d26a (diff) |
Merge tag 'amd-drm-next-5.8-2020-04-30' of git://people.freedesktop.org/~agd5f/linux into drm-next
amd-drm-next-5.8-2020-04-30:
amdgpu:
- SR-IOV fixes
- SDMA fix for Navi
- VCN 2.5 DPG fixes
- Display fixes
- Display stuttering fixes for pageflip and cursor
- Add support for handling encrypted GPU memory
- Add UAPI for encrypted GPU memory
- Rework IB pool handling
amdkfd:
- Expose asic revision in topology
- Add UAPI for GWS (Global Wave Sync) resource management
UAPI:
- Add amdgpu UAPI for encrypted GPU memory
Used by: https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/4401
- Add amdkfd UAPI for GWS (Global Wave Sync) resource management
Thunk usage of KFD ioctl: https://github.com/RadeonOpenCompute/ROCT-Thunk-Interface/blob/roc-2.8.0/src/queues.c#L840
ROCr usage of Thunk API: https://github.com/RadeonOpenCompute/ROCR-Runtime/blob/roc-3.1.0/src/core/runtime/amd_gpu_agent.cpp#L597
HCC code using ROCr API: https://github.com/RadeonOpenCompute/hcc/blob/98ee9f34945d3b5f572d7a4c15cbffa506487734/lib/hsa/mcwamp_hsa.cpp#L2161
HIP code using HCC API: https://github.com/ROCm-Developer-Tools/HIP/blob/cf8589b8c8a40ddcc55fa3a51e23390a49824130/src/hip_module.cpp#L567
Signed-off-by: Dave Airlie <airlied@redhat.com>
From: Alex Deucher <alexdeucher@gmail.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20200430212951.3902-1-alexander.deucher@amd.com
Diffstat (limited to 'drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c')
-rw-r--r-- | drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c | 91 |
1 files changed, 51 insertions, 40 deletions
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c index bcdaf5204d05..414a0b1c2e5a 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c @@ -726,7 +726,7 @@ bool amdgpu_vm_ready(struct amdgpu_vm *vm) * @adev: amdgpu_device pointer * @vm: VM to clear BO from * @bo: BO to clear - * @direct: use a direct update + * @immediate: use an immediate update * * Root PD needs to be reserved when calling this. * @@ -736,7 +736,7 @@ bool amdgpu_vm_ready(struct amdgpu_vm *vm) static int amdgpu_vm_clear_bo(struct amdgpu_device *adev, struct amdgpu_vm *vm, struct amdgpu_bo *bo, - bool direct) + bool immediate) { struct ttm_operation_ctx ctx = { true, false }; unsigned level = adev->vm_manager.root_level; @@ -795,7 +795,7 @@ static int amdgpu_vm_clear_bo(struct amdgpu_device *adev, memset(¶ms, 0, sizeof(params)); params.adev = adev; params.vm = vm; - params.direct = direct; + params.immediate = immediate; r = vm->update_funcs->prepare(¶ms, NULL, AMDGPU_SYNC_EXPLICIT); if (r) @@ -850,11 +850,11 @@ static int amdgpu_vm_clear_bo(struct amdgpu_device *adev, * @adev: amdgpu_device pointer * @vm: requesting vm * @level: the page table level - * @direct: use a direct update + * @immediate: use a immediate update * @bp: resulting BO allocation parameters */ static void amdgpu_vm_bo_param(struct amdgpu_device *adev, struct amdgpu_vm *vm, - int level, bool direct, + int level, bool immediate, struct amdgpu_bo_param *bp) { memset(bp, 0, sizeof(*bp)); @@ -870,7 +870,7 @@ static void amdgpu_vm_bo_param(struct amdgpu_device *adev, struct amdgpu_vm *vm, else if (!vm->root.base.bo || vm->root.base.bo->shadow) bp->flags |= AMDGPU_GEM_CREATE_SHADOW; bp->type = ttm_bo_type_kernel; - bp->no_wait_gpu = direct; + bp->no_wait_gpu = immediate; if (vm->root.base.bo) bp->resv = vm->root.base.bo->tbo.base.resv; } @@ -881,7 +881,7 @@ static void amdgpu_vm_bo_param(struct amdgpu_device *adev, struct amdgpu_vm *vm, * @adev: amdgpu_device pointer * @vm: VM to allocate page tables for * @cursor: Which page table to allocate - * @direct: use a direct update + * @immediate: use an immediate update * * Make sure a specific page table or directory is allocated. * @@ -892,7 +892,7 @@ static void amdgpu_vm_bo_param(struct amdgpu_device *adev, struct amdgpu_vm *vm, static int amdgpu_vm_alloc_pts(struct amdgpu_device *adev, struct amdgpu_vm *vm, struct amdgpu_vm_pt_cursor *cursor, - bool direct) + bool immediate) { struct amdgpu_vm_pt *entry = cursor->entry; struct amdgpu_bo_param bp; @@ -913,7 +913,7 @@ static int amdgpu_vm_alloc_pts(struct amdgpu_device *adev, if (entry->base.bo) return 0; - amdgpu_vm_bo_param(adev, vm, cursor->level, direct, &bp); + amdgpu_vm_bo_param(adev, vm, cursor->level, immediate, &bp); r = amdgpu_bo_create(adev, &bp, &pt); if (r) @@ -925,7 +925,7 @@ static int amdgpu_vm_alloc_pts(struct amdgpu_device *adev, pt->parent = amdgpu_bo_ref(cursor->parent->base.bo); amdgpu_vm_bo_base_init(&entry->base, vm, pt); - r = amdgpu_vm_clear_bo(adev, vm, pt, direct); + r = amdgpu_vm_clear_bo(adev, vm, pt, immediate); if (r) goto error_free_pt; @@ -1276,7 +1276,7 @@ static void amdgpu_vm_invalidate_pds(struct amdgpu_device *adev, * * @adev: amdgpu_device pointer * @vm: requested vm - * @direct: submit directly to the paging queue + * @immediate: submit immediately to the paging queue * * Makes sure all directories are up to date. * @@ -1284,7 +1284,7 @@ static void amdgpu_vm_invalidate_pds(struct amdgpu_device *adev, * 0 for success, error for failure. */ int amdgpu_vm_update_pdes(struct amdgpu_device *adev, - struct amdgpu_vm *vm, bool direct) + struct amdgpu_vm *vm, bool immediate) { struct amdgpu_vm_update_params params; int r; @@ -1295,7 +1295,7 @@ int amdgpu_vm_update_pdes(struct amdgpu_device *adev, memset(¶ms, 0, sizeof(params)); params.adev = adev; params.vm = vm; - params.direct = direct; + params.immediate = immediate; r = vm->update_funcs->prepare(¶ms, NULL, AMDGPU_SYNC_EXPLICIT); if (r) @@ -1446,20 +1446,24 @@ static int amdgpu_vm_update_ptes(struct amdgpu_vm_update_params *params, uint64_t incr, entry_end, pe_start; struct amdgpu_bo *pt; - if (flags & (AMDGPU_PTE_VALID | AMDGPU_PTE_PRT)) { + if (!params->unlocked) { /* make sure that the page tables covering the * address range are actually allocated */ r = amdgpu_vm_alloc_pts(params->adev, params->vm, - &cursor, params->direct); + &cursor, params->immediate); if (r) return r; } shift = amdgpu_vm_level_shift(adev, cursor.level); parent_shift = amdgpu_vm_level_shift(adev, cursor.level - 1); - if (adev->asic_type < CHIP_VEGA10 && - (flags & AMDGPU_PTE_VALID)) { + if (params->unlocked) { + /* Unlocked updates are only allowed on the leaves */ + if (amdgpu_vm_pt_descendant(adev, &cursor)) + continue; + } else if (adev->asic_type < CHIP_VEGA10 && + (flags & AMDGPU_PTE_VALID)) { /* No huge page support before GMC v9 */ if (cursor.level != AMDGPU_VM_PTB) { if (!amdgpu_vm_pt_descendant(adev, &cursor)) @@ -1557,7 +1561,8 @@ static int amdgpu_vm_update_ptes(struct amdgpu_vm_update_params *params, * * @adev: amdgpu_device pointer * @vm: requested vm - * @direct: direct submission in a page fault + * @immediate: immediate submission in a page fault + * @unlocked: unlocked invalidation during MM callback * @resv: fences we need to sync to * @start: start of mapped range * @last: last mapped entry @@ -1572,8 +1577,8 @@ static int amdgpu_vm_update_ptes(struct amdgpu_vm_update_params *params, * 0 for success, -EINVAL for failure. */ static int amdgpu_vm_bo_update_mapping(struct amdgpu_device *adev, - struct amdgpu_vm *vm, bool direct, - struct dma_resv *resv, + struct amdgpu_vm *vm, bool immediate, + bool unlocked, struct dma_resv *resv, uint64_t start, uint64_t last, uint64_t flags, uint64_t addr, dma_addr_t *pages_addr, @@ -1586,8 +1591,9 @@ static int amdgpu_vm_bo_update_mapping(struct amdgpu_device *adev, memset(¶ms, 0, sizeof(params)); params.adev = adev; params.vm = vm; - params.direct = direct; + params.immediate = immediate; params.pages_addr = pages_addr; + params.unlocked = unlocked; /* Implicitly sync to command submissions in the same VM before * unmapping. Sync to moving fences before mapping. @@ -1603,11 +1609,12 @@ static int amdgpu_vm_bo_update_mapping(struct amdgpu_device *adev, goto error_unlock; } - if (flags & (AMDGPU_PTE_VALID | AMDGPU_PTE_PRT)) { - struct amdgpu_bo *root = vm->root.base.bo; + if (!unlocked && !dma_fence_is_signaled(vm->last_unlocked)) { + struct dma_fence *tmp = dma_fence_get_stub(); - if (!dma_fence_is_signaled(vm->last_direct)) - amdgpu_bo_fence(root, vm->last_direct, true); + amdgpu_bo_fence(vm->root.base.bo, vm->last_unlocked, true); + swap(vm->last_unlocked, tmp); + dma_fence_put(tmp); } r = vm->update_funcs->prepare(¶ms, resv, sync_mode); @@ -1721,7 +1728,7 @@ static int amdgpu_vm_bo_split_mapping(struct amdgpu_device *adev, } last = min((uint64_t)mapping->last, start + max_entries - 1); - r = amdgpu_vm_bo_update_mapping(adev, vm, false, resv, + r = amdgpu_vm_bo_update_mapping(adev, vm, false, false, resv, start, last, flags, addr, dma_addr, fence); if (r) @@ -1784,6 +1791,10 @@ int amdgpu_vm_bo_update(struct amdgpu_device *adev, struct amdgpu_bo_va *bo_va, if (bo) { flags = amdgpu_ttm_tt_pte_flags(adev, bo->tbo.ttm, mem); + + if (amdgpu_bo_encrypted(bo)) + flags |= AMDGPU_PTE_TMZ; + bo_adev = amdgpu_ttm_adev(bo->tbo.bdev); } else { flags = 0x0; @@ -2014,7 +2025,7 @@ int amdgpu_vm_clear_freed(struct amdgpu_device *adev, mapping->start < AMDGPU_GMC_HOLE_START) init_pte_value = AMDGPU_PTE_DEFAULT_ATC; - r = amdgpu_vm_bo_update_mapping(adev, vm, false, resv, + r = amdgpu_vm_bo_update_mapping(adev, vm, false, false, resv, mapping->start, mapping->last, init_pte_value, 0, NULL, &f); amdgpu_vm_free_mapping(adev, vm, mapping, f); @@ -2578,7 +2589,7 @@ bool amdgpu_vm_evictable(struct amdgpu_bo *bo) return false; /* Don't evict VM page tables while they are updated */ - if (!dma_fence_is_signaled(bo_base->vm->last_direct)) { + if (!dma_fence_is_signaled(bo_base->vm->last_unlocked)) { amdgpu_vm_eviction_unlock(bo_base->vm); return false; } @@ -2755,7 +2766,7 @@ long amdgpu_vm_wait_idle(struct amdgpu_vm *vm, long timeout) if (timeout <= 0) return timeout; - return dma_fence_wait_timeout(vm->last_direct, true, timeout); + return dma_fence_wait_timeout(vm->last_unlocked, true, timeout); } /** @@ -2791,7 +2802,7 @@ int amdgpu_vm_init(struct amdgpu_device *adev, struct amdgpu_vm *vm, /* create scheduler entities for page table updates */ - r = drm_sched_entity_init(&vm->direct, DRM_SCHED_PRIORITY_NORMAL, + r = drm_sched_entity_init(&vm->immediate, DRM_SCHED_PRIORITY_NORMAL, adev->vm_manager.vm_pte_scheds, adev->vm_manager.vm_pte_num_scheds, NULL); if (r) @@ -2801,7 +2812,7 @@ int amdgpu_vm_init(struct amdgpu_device *adev, struct amdgpu_vm *vm, adev->vm_manager.vm_pte_scheds, adev->vm_manager.vm_pte_num_scheds, NULL); if (r) - goto error_free_direct; + goto error_free_immediate; vm->pte_support_ats = false; vm->is_compute_context = false; @@ -2827,7 +2838,7 @@ int amdgpu_vm_init(struct amdgpu_device *adev, struct amdgpu_vm *vm, else vm->update_funcs = &amdgpu_vm_sdma_funcs; vm->last_update = NULL; - vm->last_direct = dma_fence_get_stub(); + vm->last_unlocked = dma_fence_get_stub(); mutex_init(&vm->eviction_lock); vm->evicting = false; @@ -2881,11 +2892,11 @@ error_free_root: vm->root.base.bo = NULL; error_free_delayed: - dma_fence_put(vm->last_direct); + dma_fence_put(vm->last_unlocked); drm_sched_entity_destroy(&vm->delayed); -error_free_direct: - drm_sched_entity_destroy(&vm->direct); +error_free_immediate: + drm_sched_entity_destroy(&vm->immediate); return r; } @@ -3082,8 +3093,8 @@ void amdgpu_vm_fini(struct amdgpu_device *adev, struct amdgpu_vm *vm) vm->pasid = 0; } - dma_fence_wait(vm->last_direct, false); - dma_fence_put(vm->last_direct); + dma_fence_wait(vm->last_unlocked, false); + dma_fence_put(vm->last_unlocked); list_for_each_entry_safe(mapping, tmp, &vm->freed, list) { if (mapping->flags & AMDGPU_PTE_PRT && prt_fini_needed) { @@ -3100,7 +3111,7 @@ void amdgpu_vm_fini(struct amdgpu_device *adev, struct amdgpu_vm *vm) amdgpu_bo_unref(&root); WARN_ON(vm->root.base.bo); - drm_sched_entity_destroy(&vm->direct); + drm_sched_entity_destroy(&vm->immediate); drm_sched_entity_destroy(&vm->delayed); if (!RB_EMPTY_ROOT(&vm->va.rb_root)) { @@ -3333,8 +3344,8 @@ bool amdgpu_vm_handle_fault(struct amdgpu_device *adev, unsigned int pasid, value = 0; } - r = amdgpu_vm_bo_update_mapping(adev, vm, true, NULL, addr, addr + 1, - flags, value, NULL, NULL); + r = amdgpu_vm_bo_update_mapping(adev, vm, true, false, NULL, addr, + addr + 1, flags, value, NULL, NULL); if (r) goto error_unlock; |