diff options
Diffstat (limited to 'drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c')
| -rw-r--r-- | drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c | 126 |
1 files changed, 90 insertions, 36 deletions
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c index 94089069c9ad..3abfa66d72a2 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c @@ -333,7 +333,7 @@ void amdgpu_vm_bo_base_init(struct amdgpu_vm_bo_base *base, base->next = bo->vm_bo; bo->vm_bo = base; - if (bo->tbo.base.resv != vm->root.bo->tbo.base.resv) + if (!amdgpu_vm_is_bo_always_valid(vm, bo)) return; dma_resv_assert_held(vm->root.bo->tbo.base.resv); @@ -886,6 +886,44 @@ static void amdgpu_vm_tlb_seq_cb(struct dma_fence *fence, } /** + * amdgpu_vm_tlb_flush - prepare TLB flush + * + * @params: parameters for update + * @fence: input fence to sync TLB flush with + * @tlb_cb: the callback structure + * + * Increments the tlb sequence to make sure that future CS execute a VM flush. + */ +static void +amdgpu_vm_tlb_flush(struct amdgpu_vm_update_params *params, + struct dma_fence **fence, + struct amdgpu_vm_tlb_seq_struct *tlb_cb) +{ + struct amdgpu_vm *vm = params->vm; + + if (!fence || !*fence) + return; + + tlb_cb->vm = vm; + if (!dma_fence_add_callback(*fence, &tlb_cb->cb, + amdgpu_vm_tlb_seq_cb)) { + dma_fence_put(vm->last_tlb_flush); + vm->last_tlb_flush = dma_fence_get(*fence); + } else { + amdgpu_vm_tlb_seq_cb(NULL, &tlb_cb->cb); + } + + /* Prepare a TLB flush fence to be attached to PTs */ + if (!params->unlocked && vm->is_compute_context) { + amdgpu_vm_tlb_fence_create(params->adev, vm, fence); + + /* Makes sure no PD/PT is freed before the flush */ + dma_resv_add_fence(vm->root.bo->tbo.base.resv, *fence, + DMA_RESV_USAGE_BOOKKEEP); + } +} + +/** * amdgpu_vm_update_range - update a range in the vm page table * * @adev: amdgpu_device pointer to use for commands @@ -916,8 +954,8 @@ int amdgpu_vm_update_range(struct amdgpu_device *adev, struct amdgpu_vm *vm, struct ttm_resource *res, dma_addr_t *pages_addr, struct dma_fence **fence) { - struct amdgpu_vm_update_params params; struct amdgpu_vm_tlb_seq_struct *tlb_cb; + struct amdgpu_vm_update_params params; struct amdgpu_res_cursor cursor; enum amdgpu_sync_mode sync_mode; int r, idx; @@ -927,8 +965,8 @@ int amdgpu_vm_update_range(struct amdgpu_device *adev, struct amdgpu_vm *vm, tlb_cb = kmalloc(sizeof(*tlb_cb), GFP_KERNEL); if (!tlb_cb) { - r = -ENOMEM; - goto error_unlock; + drm_dev_exit(idx); + return -ENOMEM; } /* Vega20+XGMI where PTEs get inadvertently cached in L2 texture cache, @@ -948,7 +986,9 @@ int amdgpu_vm_update_range(struct amdgpu_device *adev, struct amdgpu_vm *vm, params.immediate = immediate; params.pages_addr = pages_addr; params.unlocked = unlocked; + params.needs_flush = flush_tlb; params.allow_override = allow_override; + INIT_LIST_HEAD(¶ms.tlb_flush_waitlist); /* Implicitly sync to command submissions in the same VM before * unmapping. Sync to moving fences before mapping. @@ -1015,7 +1055,7 @@ int amdgpu_vm_update_range(struct amdgpu_device *adev, struct amdgpu_vm *vm, params.pages_addr = NULL; } - } else if (flags & (AMDGPU_PTE_VALID | AMDGPU_PTE_PRT)) { + } else if (flags & (AMDGPU_PTE_VALID | AMDGPU_PTE_PRT_FLAG(adev))) { addr = vram_base + cursor.start; } else { addr = 0; @@ -1031,24 +1071,18 @@ int amdgpu_vm_update_range(struct amdgpu_device *adev, struct amdgpu_vm *vm, } r = vm->update_funcs->commit(¶ms, fence); + if (r) + goto error_free; - if (flush_tlb || params.table_freed) { - tlb_cb->vm = vm; - if (fence && *fence && - !dma_fence_add_callback(*fence, &tlb_cb->cb, - amdgpu_vm_tlb_seq_cb)) { - dma_fence_put(vm->last_tlb_flush); - vm->last_tlb_flush = dma_fence_get(*fence); - } else { - amdgpu_vm_tlb_seq_cb(NULL, &tlb_cb->cb); - } + if (params.needs_flush) { + amdgpu_vm_tlb_flush(¶ms, fence, tlb_cb); tlb_cb = NULL; } + amdgpu_vm_pt_free_list(adev, ¶ms); + error_free: kfree(tlb_cb); - -error_unlock: amdgpu_vm_eviction_unlock(vm); drm_dev_exit(idx); return r; @@ -1067,13 +1101,13 @@ static void amdgpu_vm_bo_get_memory(struct amdgpu_bo_va *bo_va, * For now ignore BOs which are currently locked and potentially * changing their location. */ - if (bo->tbo.base.resv != vm->root.bo->tbo.base.resv && + if (!amdgpu_vm_is_bo_always_valid(vm, bo) && !dma_resv_trylock(bo->tbo.base.resv)) return; amdgpu_bo_get_memory(bo, stats); - if (bo->tbo.base.resv != vm->root.bo->tbo.base.resv) - dma_resv_unlock(bo->tbo.base.resv); + if (!amdgpu_vm_is_bo_always_valid(vm, bo)) + dma_resv_unlock(bo->tbo.base.resv); } void amdgpu_vm_get_memory(struct amdgpu_vm *vm, @@ -1169,8 +1203,7 @@ int amdgpu_vm_bo_update(struct amdgpu_device *adev, struct amdgpu_bo_va *bo_va, uncached = false; } - if (clear || (bo && bo->tbo.base.resv == - vm->root.bo->tbo.base.resv)) + if (clear || amdgpu_vm_is_bo_always_valid(vm, bo)) last_update = &vm->last_update; else last_update = &bo_va->last_pt_update; @@ -1212,7 +1245,7 @@ int amdgpu_vm_bo_update(struct amdgpu_device *adev, struct amdgpu_bo_va *bo_va, * the evicted list so that it gets validated again on the * next command submission. */ - if (bo && bo->tbo.base.resv == vm->root.bo->tbo.base.resv) { + if (amdgpu_vm_is_bo_always_valid(vm, bo)) { uint32_t mem_type = bo->tbo.resource->mem_type; if (!(bo->preferred_domains & @@ -1335,7 +1368,7 @@ static void amdgpu_vm_free_mapping(struct amdgpu_device *adev, struct amdgpu_bo_va_mapping *mapping, struct dma_fence *fence) { - if (mapping->flags & AMDGPU_PTE_PRT) + if (mapping->flags & AMDGPU_PTE_PRT_FLAG(adev)) amdgpu_vm_add_prt_cb(adev, fence); kfree(mapping); } @@ -1603,13 +1636,12 @@ static void amdgpu_vm_bo_insert_map(struct amdgpu_device *adev, list_add(&mapping->list, &bo_va->invalids); amdgpu_vm_it_insert(mapping, &vm->va); - if (mapping->flags & AMDGPU_PTE_PRT) + if (mapping->flags & AMDGPU_PTE_PRT_FLAG(adev)) amdgpu_vm_prt_get(adev); - if (bo && bo->tbo.base.resv == vm->root.bo->tbo.base.resv && - !bo_va->base.moved) { + if (amdgpu_vm_is_bo_always_valid(vm, bo) && !bo_va->base.moved) amdgpu_vm_bo_moved(&bo_va->base); - } + trace_amdgpu_vm_bo_map(bo_va, mapping); } @@ -1905,10 +1937,10 @@ int amdgpu_vm_bo_clear_mappings(struct amdgpu_device *adev, struct amdgpu_bo *bo = before->bo_va->base.bo; amdgpu_vm_it_insert(before, &vm->va); - if (before->flags & AMDGPU_PTE_PRT) + if (before->flags & AMDGPU_PTE_PRT_FLAG(adev)) amdgpu_vm_prt_get(adev); - if (bo && bo->tbo.base.resv == vm->root.bo->tbo.base.resv && + if (amdgpu_vm_is_bo_always_valid(vm, bo) && !before->bo_va->base.moved) amdgpu_vm_bo_moved(&before->bo_va->base); } else { @@ -1920,10 +1952,10 @@ int amdgpu_vm_bo_clear_mappings(struct amdgpu_device *adev, struct amdgpu_bo *bo = after->bo_va->base.bo; amdgpu_vm_it_insert(after, &vm->va); - if (after->flags & AMDGPU_PTE_PRT) + if (after->flags & AMDGPU_PTE_PRT_FLAG(adev)) amdgpu_vm_prt_get(adev); - if (bo && bo->tbo.base.resv == vm->root.bo->tbo.base.resv && + if (amdgpu_vm_is_bo_always_valid(vm, bo) && !after->bo_va->base.moved) amdgpu_vm_bo_moved(&after->bo_va->base); } else { @@ -2003,7 +2035,7 @@ void amdgpu_vm_bo_del(struct amdgpu_device *adev, if (bo) { dma_resv_assert_held(bo->tbo.base.resv); - if (bo->tbo.base.resv == vm->root.bo->tbo.base.resv) + if (amdgpu_vm_is_bo_always_valid(vm, bo)) ttm_bo_set_bulk_move(&bo->tbo, NULL); for (base = &bo_va->base.bo->vm_bo; *base; @@ -2097,7 +2129,7 @@ void amdgpu_vm_bo_invalidate(struct amdgpu_device *adev, for (bo_base = bo->vm_bo; bo_base; bo_base = bo_base->next) { struct amdgpu_vm *vm = bo_base->vm; - if (evicted && bo->tbo.base.resv == vm->root.bo->tbo.base.resv) { + if (evicted && amdgpu_vm_is_bo_always_valid(vm, bo)) { amdgpu_vm_bo_evicted(bo_base); continue; } @@ -2108,7 +2140,7 @@ void amdgpu_vm_bo_invalidate(struct amdgpu_device *adev, if (bo->tbo.type == ttm_bo_type_kernel) amdgpu_vm_bo_relocated(bo_base); - else if (bo->tbo.base.resv == vm->root.bo->tbo.base.resv) + else if (amdgpu_vm_is_bo_always_valid(vm, bo)) amdgpu_vm_bo_moved(bo_base); else amdgpu_vm_bo_invalidated(bo_base); @@ -2411,6 +2443,7 @@ int amdgpu_vm_init(struct amdgpu_device *adev, struct amdgpu_vm *vm, mutex_init(&vm->eviction_lock); vm->evicting = false; + vm->tlb_fence_context = dma_fence_context_alloc(1); r = amdgpu_vm_pt_create(adev, vm, adev->vm_manager.root_level, false, &root, xcp_id); @@ -2570,7 +2603,7 @@ void amdgpu_vm_fini(struct amdgpu_device *adev, struct amdgpu_vm *vm) dma_fence_put(vm->last_tlb_flush); list_for_each_entry_safe(mapping, tmp, &vm->freed, list) { - if (mapping->flags & AMDGPU_PTE_PRT && prt_fini_needed) { + if (mapping->flags & AMDGPU_PTE_PRT_FLAG(adev) && prt_fini_needed) { amdgpu_vm_prt_fini(adev, vm); prt_fini_needed = false; } @@ -2944,6 +2977,14 @@ void amdgpu_vm_update_fault_cache(struct amdgpu_device *adev, if (vm && status) { vm->fault_info.addr = addr; vm->fault_info.status = status; + /* + * Update the fault information globally for later usage + * when vm could be stale or freed. + */ + adev->vm_manager.fault_info.addr = addr; + adev->vm_manager.fault_info.vmhub = vmhub; + adev->vm_manager.fault_info.status = status; + if (AMDGPU_IS_GFXHUB(vmhub)) { vm->fault_info.vmhub = AMDGPU_VMHUB_TYPE_GFX; vm->fault_info.vmhub |= @@ -2963,3 +3004,16 @@ void amdgpu_vm_update_fault_cache(struct amdgpu_device *adev, xa_unlock_irqrestore(&adev->vm_manager.pasids, flags); } +/** + * amdgpu_vm_is_bo_always_valid - check if the BO is VM always valid + * + * @vm: VM to test against. + * @bo: BO to be tested. + * + * Returns true if the BO shares the dma_resv object with the root PD and is + * always guaranteed to be valid inside the VM. + */ +bool amdgpu_vm_is_bo_always_valid(struct amdgpu_vm *vm, struct amdgpu_bo *bo) +{ + return bo && bo->tbo.base.resv == vm->root.bo->tbo.base.resv; +} |