diff options
Diffstat (limited to 'drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c')
-rw-r--r-- | drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c | 1062 |
1 files changed, 730 insertions, 332 deletions
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c index 8ecf82c5fe74..bd20ff018512 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c @@ -77,8 +77,12 @@ struct amdgpu_pte_update_params { void (*func)(struct amdgpu_pte_update_params *params, uint64_t pe, uint64_t addr, unsigned count, uint32_t incr, uint64_t flags); - /* indicate update pt or its shadow */ - bool shadow; + /* The next two are used during VM update by CPU + * DMA addresses to use for mapping + * Kernel pointer of PD/PT BO that needs to be updated + */ + dma_addr_t *pages_addr; + void *kptr; }; /* Helper to disable partial resident texture feature from a fence callback */ @@ -155,11 +159,18 @@ void amdgpu_vm_get_pd_bo(struct amdgpu_vm *vm, */ static int amdgpu_vm_validate_level(struct amdgpu_vm_pt *parent, int (*validate)(void *, struct amdgpu_bo *), - void *param) + void *param, bool use_cpu_for_update, + struct ttm_bo_global *glob) { unsigned i; int r; + if (use_cpu_for_update) { + r = amdgpu_bo_kmap(parent->bo, NULL); + if (r) + return r; + } + if (!parent->entries) return 0; @@ -173,11 +184,18 @@ static int amdgpu_vm_validate_level(struct amdgpu_vm_pt *parent, if (r) return r; + spin_lock(&glob->lru_lock); + ttm_bo_move_to_lru_tail(&entry->bo->tbo); + if (entry->bo->shadow) + ttm_bo_move_to_lru_tail(&entry->bo->shadow->tbo); + spin_unlock(&glob->lru_lock); + /* * Recurse into the sub directory. This is harmless because we * have only a maximum of 5 layers. */ - r = amdgpu_vm_validate_level(entry, validate, param); + r = amdgpu_vm_validate_level(entry, validate, param, + use_cpu_for_update, glob); if (r) return r; } @@ -208,54 +226,12 @@ int amdgpu_vm_validate_pt_bos(struct amdgpu_device *adev, struct amdgpu_vm *vm, if (num_evictions == vm->last_eviction_counter) return 0; - return amdgpu_vm_validate_level(&vm->root, validate, param); -} - -/** - * amdgpu_vm_move_level_in_lru - move one level of PT BOs to the LRU tail - * - * @adev: amdgpu device instance - * @vm: vm providing the BOs - * - * Move the PT BOs to the tail of the LRU. - */ -static void amdgpu_vm_move_level_in_lru(struct amdgpu_vm_pt *parent) -{ - unsigned i; - - if (!parent->entries) - return; - - for (i = 0; i <= parent->last_entry_used; ++i) { - struct amdgpu_vm_pt *entry = &parent->entries[i]; - - if (!entry->bo) - continue; - - ttm_bo_move_to_lru_tail(&entry->bo->tbo); - amdgpu_vm_move_level_in_lru(entry); - } + return amdgpu_vm_validate_level(&vm->root, validate, param, + vm->use_cpu_for_update, + adev->mman.bdev.glob); } /** - * amdgpu_vm_move_pt_bos_in_lru - move the PT BOs to the LRU tail - * - * @adev: amdgpu device instance - * @vm: vm providing the BOs - * - * Move the PT BOs to the tail of the LRU. - */ -void amdgpu_vm_move_pt_bos_in_lru(struct amdgpu_device *adev, - struct amdgpu_vm *vm) -{ - struct ttm_bo_global *glob = adev->mman.bdev.glob; - - spin_lock(&glob->lru_lock); - amdgpu_vm_move_level_in_lru(&vm->root); - spin_unlock(&glob->lru_lock); -} - - /** * amdgpu_vm_alloc_levels - allocate the PD/PT levels * * @adev: amdgpu_device pointer @@ -275,12 +251,15 @@ static int amdgpu_vm_alloc_levels(struct amdgpu_device *adev, adev->vm_manager.block_size; unsigned pt_idx, from, to; int r; + u64 flags; + uint64_t init_value = 0; if (!parent->entries) { unsigned num_entries = amdgpu_vm_num_entries(adev, level); - parent->entries = drm_calloc_large(num_entries, - sizeof(struct amdgpu_vm_pt)); + parent->entries = kvmalloc_array(num_entries, + sizeof(struct amdgpu_vm_pt), + GFP_KERNEL | __GFP_ZERO); if (!parent->entries) return -ENOMEM; memset(parent->entries, 0 , sizeof(struct amdgpu_vm_pt)); @@ -299,6 +278,20 @@ static int amdgpu_vm_alloc_levels(struct amdgpu_device *adev, saddr = saddr & ((1 << shift) - 1); eaddr = eaddr & ((1 << shift) - 1); + flags = AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS | + AMDGPU_GEM_CREATE_VRAM_CLEARED; + if (vm->use_cpu_for_update) + flags |= AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED; + else + flags |= (AMDGPU_GEM_CREATE_NO_CPU_ACCESS | + AMDGPU_GEM_CREATE_SHADOW); + + if (vm->pte_support_ats) { + init_value = AMDGPU_PTE_SYSTEM; + if (level != adev->vm_manager.num_level - 1) + init_value |= AMDGPU_PDE_PTE; + } + /* walk over the address space and allocate the page tables */ for (pt_idx = from; pt_idx <= to; ++pt_idx) { struct reservation_object *resv = vm->root.bo->tbo.resv; @@ -310,14 +303,19 @@ static int amdgpu_vm_alloc_levels(struct amdgpu_device *adev, amdgpu_vm_bo_size(adev, level), AMDGPU_GPU_PAGE_SIZE, true, AMDGPU_GEM_DOMAIN_VRAM, - AMDGPU_GEM_CREATE_NO_CPU_ACCESS | - AMDGPU_GEM_CREATE_SHADOW | - AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS | - AMDGPU_GEM_CREATE_VRAM_CLEARED, - NULL, resv, &pt); + flags, + NULL, resv, init_value, &pt); if (r) return r; + if (vm->use_cpu_for_update) { + r = amdgpu_bo_kmap(pt, NULL); + if (r) { + amdgpu_bo_unref(&pt); + return r; + } + } + /* Keep a reference to the root directory to avoid * freeing them up in the wrong order. */ @@ -391,6 +389,71 @@ static bool amdgpu_vm_had_gpu_reset(struct amdgpu_device *adev, atomic_read(&adev->gpu_reset_counter); } +static bool amdgpu_vm_reserved_vmid_ready(struct amdgpu_vm *vm, unsigned vmhub) +{ + return !!vm->reserved_vmid[vmhub]; +} + +/* idr_mgr->lock must be held */ +static int amdgpu_vm_grab_reserved_vmid_locked(struct amdgpu_vm *vm, + struct amdgpu_ring *ring, + struct amdgpu_sync *sync, + struct dma_fence *fence, + struct amdgpu_job *job) +{ + struct amdgpu_device *adev = ring->adev; + unsigned vmhub = ring->funcs->vmhub; + uint64_t fence_context = adev->fence_context + ring->idx; + struct amdgpu_vm_id *id = vm->reserved_vmid[vmhub]; + struct amdgpu_vm_id_manager *id_mgr = &adev->vm_manager.id_mgr[vmhub]; + struct dma_fence *updates = sync->last_vm_update; + int r = 0; + struct dma_fence *flushed, *tmp; + bool needs_flush = vm->use_cpu_for_update; + + flushed = id->flushed_updates; + if ((amdgpu_vm_had_gpu_reset(adev, id)) || + (atomic64_read(&id->owner) != vm->client_id) || + (job->vm_pd_addr != id->pd_gpu_addr) || + (updates && (!flushed || updates->context != flushed->context || + dma_fence_is_later(updates, flushed))) || + (!id->last_flush || (id->last_flush->context != fence_context && + !dma_fence_is_signaled(id->last_flush)))) { + needs_flush = true; + /* to prevent one context starved by another context */ + id->pd_gpu_addr = 0; + tmp = amdgpu_sync_peek_fence(&id->active, ring); + if (tmp) { + r = amdgpu_sync_fence(adev, sync, tmp); + return r; + } + } + + /* Good we can use this VMID. Remember this submission as + * user of the VMID. + */ + r = amdgpu_sync_fence(ring->adev, &id->active, fence); + if (r) + goto out; + + if (updates && (!flushed || updates->context != flushed->context || + dma_fence_is_later(updates, flushed))) { + dma_fence_put(id->flushed_updates); + id->flushed_updates = dma_fence_get(updates); + } + id->pd_gpu_addr = job->vm_pd_addr; + atomic64_set(&id->owner, vm->client_id); + job->vm_needs_flush = needs_flush; + if (needs_flush) { + dma_fence_put(id->last_flush); + id->last_flush = NULL; + } + job->vm_id = id - id_mgr->ids; + trace_amdgpu_vm_grab_id(vm, ring, job); +out: + return r; +} + /** * amdgpu_vm_grab_id - allocate the next free VMID * @@ -415,12 +478,17 @@ int amdgpu_vm_grab_id(struct amdgpu_vm *vm, struct amdgpu_ring *ring, unsigned i; int r = 0; + mutex_lock(&id_mgr->lock); + if (amdgpu_vm_reserved_vmid_ready(vm, vmhub)) { + r = amdgpu_vm_grab_reserved_vmid_locked(vm, ring, sync, fence, job); + mutex_unlock(&id_mgr->lock); + return r; + } fences = kmalloc_array(sizeof(void *), id_mgr->num_ids, GFP_KERNEL); - if (!fences) + if (!fences) { + mutex_unlock(&id_mgr->lock); return -ENOMEM; - - mutex_lock(&id_mgr->lock); - + } /* Check if we have an idle VMID */ i = 0; list_for_each_entry(idle, &id_mgr->ids_lru, list) { @@ -462,11 +530,11 @@ int amdgpu_vm_grab_id(struct amdgpu_vm *vm, struct amdgpu_ring *ring, } kfree(fences); - job->vm_needs_flush = false; + job->vm_needs_flush = vm->use_cpu_for_update; /* Check if we can use a VMID already assigned to this VM */ list_for_each_entry_reverse(id, &id_mgr->ids_lru, list) { struct dma_fence *flushed; - bool needs_flush = false; + bool needs_flush = vm->use_cpu_for_update; /* Check all the prerequisites to using this VMID */ if (amdgpu_vm_had_gpu_reset(adev, id)) @@ -521,7 +589,6 @@ int amdgpu_vm_grab_id(struct amdgpu_vm *vm, struct amdgpu_ring *ring, id->pd_gpu_addr = job->vm_pd_addr; dma_fence_put(id->flushed_updates); id->flushed_updates = dma_fence_get(updates); - id->current_gpu_reset_count = atomic_read(&adev->gpu_reset_counter); atomic64_set(&id->owner, vm->client_id); needs_flush: @@ -540,40 +607,118 @@ error: return r; } -static bool amdgpu_vm_ring_has_compute_vm_bug(struct amdgpu_ring *ring) +static void amdgpu_vm_free_reserved_vmid(struct amdgpu_device *adev, + struct amdgpu_vm *vm, + unsigned vmhub) +{ + struct amdgpu_vm_id_manager *id_mgr = &adev->vm_manager.id_mgr[vmhub]; + + mutex_lock(&id_mgr->lock); + if (vm->reserved_vmid[vmhub]) { + list_add(&vm->reserved_vmid[vmhub]->list, + &id_mgr->ids_lru); + vm->reserved_vmid[vmhub] = NULL; + atomic_dec(&id_mgr->reserved_vmid_num); + } + mutex_unlock(&id_mgr->lock); +} + +static int amdgpu_vm_alloc_reserved_vmid(struct amdgpu_device *adev, + struct amdgpu_vm *vm, + unsigned vmhub) +{ + struct amdgpu_vm_id_manager *id_mgr; + struct amdgpu_vm_id *idle; + int r = 0; + + id_mgr = &adev->vm_manager.id_mgr[vmhub]; + mutex_lock(&id_mgr->lock); + if (vm->reserved_vmid[vmhub]) + goto unlock; + if (atomic_inc_return(&id_mgr->reserved_vmid_num) > + AMDGPU_VM_MAX_RESERVED_VMID) { + DRM_ERROR("Over limitation of reserved vmid\n"); + atomic_dec(&id_mgr->reserved_vmid_num); + r = -EINVAL; + goto unlock; + } + /* Select the first entry VMID */ + idle = list_first_entry(&id_mgr->ids_lru, struct amdgpu_vm_id, list); + list_del_init(&idle->list); + vm->reserved_vmid[vmhub] = idle; + mutex_unlock(&id_mgr->lock); + + return 0; +unlock: + mutex_unlock(&id_mgr->lock); + return r; +} + +/** + * amdgpu_vm_check_compute_bug - check whether asic has compute vm bug + * + * @adev: amdgpu_device pointer + */ +void amdgpu_vm_check_compute_bug(struct amdgpu_device *adev) { - struct amdgpu_device *adev = ring->adev; const struct amdgpu_ip_block *ip_block; + bool has_compute_vm_bug; + struct amdgpu_ring *ring; + int i; - if (ring->funcs->type != AMDGPU_RING_TYPE_COMPUTE) - /* only compute rings */ - return false; + has_compute_vm_bug = false; ip_block = amdgpu_get_ip_block(adev, AMD_IP_BLOCK_TYPE_GFX); - if (!ip_block) - return false; + if (ip_block) { + /* Compute has a VM bug for GFX version < 7. + Compute has a VM bug for GFX 8 MEC firmware version < 673.*/ + if (ip_block->version->major <= 7) + has_compute_vm_bug = true; + else if (ip_block->version->major == 8) + if (adev->gfx.mec_fw_version < 673) + has_compute_vm_bug = true; + } - if (ip_block->version->major <= 7) { - /* gfx7 has no workaround */ - return true; - } else if (ip_block->version->major == 8) { - if (adev->gfx.mec_fw_version >= 673) - /* gfx8 is fixed in MEC firmware 673 */ - return false; + for (i = 0; i < adev->num_rings; i++) { + ring = adev->rings[i]; + if (ring->funcs->type == AMDGPU_RING_TYPE_COMPUTE) + /* only compute rings */ + ring->has_compute_vm_bug = has_compute_vm_bug; else - return true; + ring->has_compute_vm_bug = false; } - return false; } -static u64 amdgpu_vm_adjust_mc_addr(struct amdgpu_device *adev, u64 mc_addr) +bool amdgpu_vm_need_pipeline_sync(struct amdgpu_ring *ring, + struct amdgpu_job *job) { - u64 addr = mc_addr; + struct amdgpu_device *adev = ring->adev; + unsigned vmhub = ring->funcs->vmhub; + struct amdgpu_vm_id_manager *id_mgr = &adev->vm_manager.id_mgr[vmhub]; + struct amdgpu_vm_id *id; + bool gds_switch_needed; + bool vm_flush_needed = job->vm_needs_flush || ring->has_compute_vm_bug; + + if (job->vm_id == 0) + return false; + id = &id_mgr->ids[job->vm_id]; + gds_switch_needed = ring->funcs->emit_gds_switch && ( + id->gds_base != job->gds_base || + id->gds_size != job->gds_size || + id->gws_base != job->gws_base || + id->gws_size != job->gws_size || + id->oa_base != job->oa_base || + id->oa_size != job->oa_size); + + if (amdgpu_vm_had_gpu_reset(adev, id)) + return true; - if (adev->gart.gart_funcs->adjust_mc_addr) - addr = adev->gart.gart_funcs->adjust_mc_addr(adev, addr); + return vm_flush_needed || gds_switch_needed; +} - return addr; +static bool amdgpu_vm_is_large_bar(struct amdgpu_device *adev) +{ + return (adev->mc.real_vram_size == adev->mc.visible_vram_size); } /** @@ -585,7 +730,7 @@ static u64 amdgpu_vm_adjust_mc_addr(struct amdgpu_device *adev, u64 mc_addr) * * Emit a VM flush when it is necessary. */ -int amdgpu_vm_flush(struct amdgpu_ring *ring, struct amdgpu_job *job) +int amdgpu_vm_flush(struct amdgpu_ring *ring, struct amdgpu_job *job, bool need_pipe_sync) { struct amdgpu_device *adev = ring->adev; unsigned vmhub = ring->funcs->vmhub; @@ -598,8 +743,7 @@ int amdgpu_vm_flush(struct amdgpu_ring *ring, struct amdgpu_job *job) id->gws_size != job->gws_size || id->oa_base != job->oa_base || id->oa_size != job->oa_size); - bool vm_flush_needed = job->vm_needs_flush || - amdgpu_vm_ring_has_compute_vm_bug(ring); + bool vm_flush_needed = job->vm_needs_flush; unsigned patch_offset = 0; int r; @@ -608,21 +752,20 @@ int amdgpu_vm_flush(struct amdgpu_ring *ring, struct amdgpu_job *job) vm_flush_needed = true; } - if (!vm_flush_needed && !gds_switch_needed) + if (!vm_flush_needed && !gds_switch_needed && !need_pipe_sync) return 0; if (ring->funcs->init_cond_exec) patch_offset = amdgpu_ring_init_cond_exec(ring); - if (ring->funcs->emit_pipeline_sync && !job->need_pipeline_sync) + if (need_pipe_sync) amdgpu_ring_emit_pipeline_sync(ring); if (ring->funcs->emit_vm_flush && vm_flush_needed) { - u64 pd_addr = amdgpu_vm_adjust_mc_addr(adev, job->vm_pd_addr); struct dma_fence *fence; - trace_amdgpu_vm_flush(ring, job->vm_id, pd_addr); - amdgpu_ring_emit_vm_flush(ring, job->vm_id, pd_addr); + trace_amdgpu_vm_flush(ring, job->vm_id, job->vm_pd_addr); + amdgpu_ring_emit_vm_flush(ring, job->vm_id, job->vm_pd_addr); r = amdgpu_fence_emit(ring, &fence); if (r) @@ -631,6 +774,7 @@ int amdgpu_vm_flush(struct amdgpu_ring *ring, struct amdgpu_job *job) mutex_lock(&id_mgr->lock); dma_fence_put(id->last_flush); id->last_flush = fence; + id->current_gpu_reset_count = atomic_read(&adev->gpu_reset_counter); mutex_unlock(&id_mgr->lock); } @@ -718,8 +862,8 @@ struct amdgpu_bo_va *amdgpu_vm_bo_find(struct amdgpu_vm *vm, { struct amdgpu_bo_va *bo_va; - list_for_each_entry(bo_va, &bo->va, bo_list) { - if (bo_va->vm == vm) { + list_for_each_entry(bo_va, &bo->va, base.bo_list) { + if (bo_va->base.vm == vm) { return bo_va; } } @@ -805,6 +949,52 @@ static uint64_t amdgpu_vm_map_gart(const dma_addr_t *pages_addr, uint64_t addr) return result; } +/** + * amdgpu_vm_cpu_set_ptes - helper to update page tables via CPU + * + * @params: see amdgpu_pte_update_params definition + * @pe: kmap addr of the page entry + * @addr: dst addr to write into pe + * @count: number of page entries to update + * @incr: increase next addr by incr bytes + * @flags: hw access flags + * + * Write count number of PT/PD entries directly. + */ +static void amdgpu_vm_cpu_set_ptes(struct amdgpu_pte_update_params *params, + uint64_t pe, uint64_t addr, + unsigned count, uint32_t incr, + uint64_t flags) +{ + unsigned int i; + uint64_t value; + + trace_amdgpu_vm_set_ptes(pe, addr, count, incr, flags); + + for (i = 0; i < count; i++) { + value = params->pages_addr ? + amdgpu_vm_map_gart(params->pages_addr, addr) : + addr; + amdgpu_gart_set_pte_pde(params->adev, (void *)(uintptr_t)pe, + i, value, flags); + addr += incr; + } +} + +static int amdgpu_vm_wait_pd(struct amdgpu_device *adev, struct amdgpu_vm *vm, + void *owner) +{ + struct amdgpu_sync sync; + int r; + + amdgpu_sync_create(&sync); + amdgpu_sync_resv(adev, &sync, vm->root.bo->tbo.resv, owner); + r = amdgpu_sync_wait(&sync, true); + amdgpu_sync_free(&sync); + + return r; +} + /* * amdgpu_vm_update_level - update a single level in the hierarchy * @@ -821,11 +1011,11 @@ static int amdgpu_vm_update_level(struct amdgpu_device *adev, unsigned level) { struct amdgpu_bo *shadow; - struct amdgpu_ring *ring; - uint64_t pd_addr, shadow_addr; + struct amdgpu_ring *ring = NULL; + uint64_t pd_addr, shadow_addr = 0; uint32_t incr = amdgpu_vm_bo_size(adev, level + 1); uint64_t last_pde = ~0, last_pt = ~0, last_shadow = ~0; - unsigned count = 0, pt_idx, ndw; + unsigned count = 0, pt_idx, ndw = 0; struct amdgpu_job *job; struct amdgpu_pte_update_params params; struct dma_fence *fence = NULL; @@ -834,34 +1024,45 @@ static int amdgpu_vm_update_level(struct amdgpu_device *adev, if (!parent->entries) return 0; - ring = container_of(vm->entity.sched, struct amdgpu_ring, sched); - /* padding, etc. */ - ndw = 64; + memset(¶ms, 0, sizeof(params)); + params.adev = adev; + shadow = parent->bo->shadow; - /* assume the worst case */ - ndw += parent->last_entry_used * 6; + if (vm->use_cpu_for_update) { + pd_addr = (unsigned long)amdgpu_bo_kptr(parent->bo); + r = amdgpu_vm_wait_pd(adev, vm, AMDGPU_FENCE_OWNER_VM); + if (unlikely(r)) + return r; - pd_addr = amdgpu_bo_gpu_offset(parent->bo); + params.func = amdgpu_vm_cpu_set_ptes; + } else { + ring = container_of(vm->entity.sched, struct amdgpu_ring, + sched); - shadow = parent->bo->shadow; - if (shadow) { - r = amdgpu_ttm_bind(&shadow->tbo, &shadow->tbo.mem); + /* padding, etc. */ + ndw = 64; + + /* assume the worst case */ + ndw += parent->last_entry_used * 6; + + pd_addr = amdgpu_bo_gpu_offset(parent->bo); + + if (shadow) { + shadow_addr = amdgpu_bo_gpu_offset(shadow); + ndw *= 2; + } else { + shadow_addr = 0; + } + + r = amdgpu_job_alloc_with_ib(adev, ndw * 4, &job); if (r) return r; - shadow_addr = amdgpu_bo_gpu_offset(shadow); - ndw *= 2; - } else { - shadow_addr = 0; - } - r = amdgpu_job_alloc_with_ib(adev, ndw * 4, &job); - if (r) - return r; + params.ib = &job->ibs[0]; + params.func = amdgpu_vm_do_set_ptes; + } - memset(¶ms, 0, sizeof(params)); - params.adev = adev; - params.ib = &job->ibs[0]; /* walk over the address space and update the directory */ for (pt_idx = 0; pt_idx <= parent->last_entry_used; ++pt_idx) { @@ -871,20 +1072,14 @@ static int amdgpu_vm_update_level(struct amdgpu_device *adev, if (bo == NULL) continue; - if (bo->shadow) { - struct amdgpu_bo *pt_shadow = bo->shadow; - - r = amdgpu_ttm_bind(&pt_shadow->tbo, - &pt_shadow->tbo.mem); - if (r) - return r; - } - pt = amdgpu_bo_gpu_offset(bo); - if (parent->entries[pt_idx].addr == pt) + pt = amdgpu_gart_get_vm_pde(adev, pt); + /* Don't update huge pages here */ + if ((parent->entries[pt_idx].addr & AMDGPU_PDE_PTE) || + parent->entries[pt_idx].addr == (pt | AMDGPU_PTE_VALID)) continue; - parent->entries[pt_idx].addr = pt; + parent->entries[pt_idx].addr = pt | AMDGPU_PTE_VALID; pde = pd_addr + pt_idx * 8; if (((last_pde + 8 * count) != pde) || @@ -892,19 +1087,16 @@ static int amdgpu_vm_update_level(struct amdgpu_device *adev, (count == AMDGPU_VM_MAX_UPDATE_SIZE)) { if (count) { - uint64_t pt_addr = - amdgpu_vm_adjust_mc_addr(adev, last_pt); - if (shadow) - amdgpu_vm_do_set_ptes(¶ms, - last_shadow, - pt_addr, count, - incr, - AMDGPU_PTE_VALID); - - amdgpu_vm_do_set_ptes(¶ms, last_pde, - pt_addr, count, incr, - AMDGPU_PTE_VALID); + params.func(¶ms, + last_shadow, + last_pt, count, + incr, + AMDGPU_PTE_VALID); + + params.func(¶ms, last_pde, + last_pt, count, incr, + AMDGPU_PTE_VALID); } count = 1; @@ -917,36 +1109,37 @@ static int amdgpu_vm_update_level(struct amdgpu_device *adev, } if (count) { - uint64_t pt_addr = amdgpu_vm_adjust_mc_addr(adev, last_pt); - if (vm->root.bo->shadow) - amdgpu_vm_do_set_ptes(¶ms, last_shadow, pt_addr, - count, incr, AMDGPU_PTE_VALID); + params.func(¶ms, last_shadow, last_pt, + count, incr, AMDGPU_PTE_VALID); - amdgpu_vm_do_set_ptes(¶ms, last_pde, pt_addr, - count, incr, AMDGPU_PTE_VALID); + params.func(¶ms, last_pde, last_pt, + count, incr, AMDGPU_PTE_VALID); } - if (params.ib->length_dw == 0) { - amdgpu_job_free(job); - } else { - amdgpu_ring_pad_ib(ring, params.ib); - amdgpu_sync_resv(adev, &job->sync, parent->bo->tbo.resv, - AMDGPU_FENCE_OWNER_VM); - if (shadow) - amdgpu_sync_resv(adev, &job->sync, shadow->tbo.resv, + if (!vm->use_cpu_for_update) { + if (params.ib->length_dw == 0) { + amdgpu_job_free(job); + } else { + amdgpu_ring_pad_ib(ring, params.ib); + amdgpu_sync_resv(adev, &job->sync, parent->bo->tbo.resv, AMDGPU_FENCE_OWNER_VM); + if (shadow) + amdgpu_sync_resv(adev, &job->sync, + shadow->tbo.resv, + AMDGPU_FENCE_OWNER_VM); + + WARN_ON(params.ib->length_dw > ndw); + r = amdgpu_job_submit(job, ring, &vm->entity, + AMDGPU_FENCE_OWNER_VM, &fence); + if (r) + goto error_free; - WARN_ON(params.ib->length_dw > ndw); - r = amdgpu_job_submit(job, ring, &vm->entity, - AMDGPU_FENCE_OWNER_VM, &fence); - if (r) - goto error_free; - - amdgpu_bo_fence(parent->bo, fence, true); - dma_fence_put(vm->last_dir_update); - vm->last_dir_update = dma_fence_get(fence); - dma_fence_put(fence); + amdgpu_bo_fence(parent->bo, fence, true); + dma_fence_put(vm->last_dir_update); + vm->last_dir_update = dma_fence_get(fence); + dma_fence_put(fence); + } } /* * Recurse into the subdirectories. This recursion is harmless because @@ -971,6 +1164,32 @@ error_free: } /* + * amdgpu_vm_invalidate_level - mark all PD levels as invalid + * + * @parent: parent PD + * + * Mark all PD level as invalid after an error. + */ +static void amdgpu_vm_invalidate_level(struct amdgpu_vm_pt *parent) +{ + unsigned pt_idx; + + /* + * Recurse into the subdirectories. This recursion is harmless because + * we only have a maximum of 5 layers. + */ + for (pt_idx = 0; pt_idx <= parent->last_entry_used; ++pt_idx) { + struct amdgpu_vm_pt *entry = &parent->entries[pt_idx]; + + if (!entry->bo) + continue; + + entry->addr = ~0ULL; + amdgpu_vm_invalidate_level(entry); + } +} + +/* * amdgpu_vm_update_directories - make sure that all directories are valid * * @adev: amdgpu_device pointer @@ -982,33 +1201,118 @@ error_free: int amdgpu_vm_update_directories(struct amdgpu_device *adev, struct amdgpu_vm *vm) { - return amdgpu_vm_update_level(adev, vm, &vm->root, 0); + int r; + + r = amdgpu_vm_update_level(adev, vm, &vm->root, 0); + if (r) + amdgpu_vm_invalidate_level(&vm->root); + + if (vm->use_cpu_for_update) { + /* Flush HDP */ + mb(); + amdgpu_gart_flush_gpu_tlb(adev, 0); + } + + return r; } /** - * amdgpu_vm_find_pt - find the page table for an address + * amdgpu_vm_find_entry - find the entry for an address * * @p: see amdgpu_pte_update_params definition * @addr: virtual address in question + * @entry: resulting entry or NULL + * @parent: parent entry * - * Find the page table BO for a virtual address, return NULL when none found. + * Find the vm_pt entry and it's parent for the given address. */ -static struct amdgpu_bo *amdgpu_vm_get_pt(struct amdgpu_pte_update_params *p, - uint64_t addr) +void amdgpu_vm_get_entry(struct amdgpu_pte_update_params *p, uint64_t addr, + struct amdgpu_vm_pt **entry, + struct amdgpu_vm_pt **parent) { - struct amdgpu_vm_pt *entry = &p->vm->root; unsigned idx, level = p->adev->vm_manager.num_level; - while (entry->entries) { + *parent = NULL; + *entry = &p->vm->root; + while ((*entry)->entries) { idx = addr >> (p->adev->vm_manager.block_size * level--); - idx %= amdgpu_bo_size(entry->bo) / 8; - entry = &entry->entries[idx]; + idx %= amdgpu_bo_size((*entry)->bo) / 8; + *parent = *entry; + *entry = &(*entry)->entries[idx]; } if (level) - return NULL; + *entry = NULL; +} + +/** + * amdgpu_vm_handle_huge_pages - handle updating the PD with huge pages + * + * @p: see amdgpu_pte_update_params definition + * @entry: vm_pt entry to check + * @parent: parent entry + * @nptes: number of PTEs updated with this operation + * @dst: destination address where the PTEs should point to + * @flags: access flags fro the PTEs + * + * Check if we can update the PD with a huge page. + */ +static void amdgpu_vm_handle_huge_pages(struct amdgpu_pte_update_params *p, + struct amdgpu_vm_pt *entry, + struct amdgpu_vm_pt *parent, + unsigned nptes, uint64_t dst, + uint64_t flags) +{ + bool use_cpu_update = (p->func == amdgpu_vm_cpu_set_ptes); + uint64_t pd_addr, pde; + + /* In the case of a mixed PT the PDE must point to it*/ + if (p->adev->asic_type < CHIP_VEGA10 || + nptes != AMDGPU_VM_PTE_COUNT(p->adev) || + p->src || + !(flags & AMDGPU_PTE_VALID)) { + + dst = amdgpu_bo_gpu_offset(entry->bo); + dst = amdgpu_gart_get_vm_pde(p->adev, dst); + flags = AMDGPU_PTE_VALID; + } else { + /* Set the huge page flag to stop scanning at this PDE */ + flags |= AMDGPU_PDE_PTE; + } + + if (entry->addr == (dst | flags)) + return; + + entry->addr = (dst | flags); + + if (use_cpu_update) { + /* In case a huge page is replaced with a system + * memory mapping, p->pages_addr != NULL and + * amdgpu_vm_cpu_set_ptes would try to translate dst + * through amdgpu_vm_map_gart. But dst is already a + * GPU address (of the page table). Disable + * amdgpu_vm_map_gart temporarily. + */ + dma_addr_t *tmp; - return entry->bo; + tmp = p->pages_addr; + p->pages_addr = NULL; + + pd_addr = (unsigned long)amdgpu_bo_kptr(parent->bo); + pde = pd_addr + (entry - parent->entries) * 8; + amdgpu_vm_cpu_set_ptes(p, pde, dst, 1, 0, flags); + + p->pages_addr = tmp; + } else { + if (parent->bo->shadow) { + pd_addr = amdgpu_bo_gpu_offset(parent->bo->shadow); + pde = pd_addr + (entry - parent->entries) * 8; + amdgpu_vm_do_set_ptes(p, pde, dst, 1, 0, flags); + } + pd_addr = amdgpu_bo_gpu_offset(parent->bo); + pde = pd_addr + (entry - parent->entries) * 8; + amdgpu_vm_do_set_ptes(p, pde, dst, 1, 0, flags); + } } /** @@ -1022,92 +1326,59 @@ static struct amdgpu_bo *amdgpu_vm_get_pt(struct amdgpu_pte_update_params *p, * @flags: mapping flags * * Update the page tables in the range @start - @end. + * Returns 0 for success, -EINVAL for failure. */ -static void amdgpu_vm_update_ptes(struct amdgpu_pte_update_params *params, +static int amdgpu_vm_update_ptes(struct amdgpu_pte_update_params *params, uint64_t start, uint64_t end, uint64_t dst, uint64_t flags) { struct amdgpu_device *adev = params->adev; const uint64_t mask = AMDGPU_VM_PTE_COUNT(adev) - 1; - uint64_t cur_pe_start, cur_nptes, cur_dst; - uint64_t addr; /* next GPU address to be updated */ + uint64_t addr, pe_start; struct amdgpu_bo *pt; - unsigned nptes; /* next number of ptes to be updated */ - uint64_t next_pe_start; - - /* initialize the variables */ - addr = start; - pt = amdgpu_vm_get_pt(params, addr); - if (!pt) { - pr_err("PT not found, aborting update_ptes\n"); - return; - } - - if (params->shadow) { - if (!pt->shadow) - return; - pt = pt->shadow; - } - if ((addr & ~mask) == (end & ~mask)) - nptes = end - addr; - else - nptes = AMDGPU_VM_PTE_COUNT(adev) - (addr & mask); - - cur_pe_start = amdgpu_bo_gpu_offset(pt); - cur_pe_start += (addr & mask) * 8; - cur_nptes = nptes; - cur_dst = dst; - - /* for next ptb*/ - addr += nptes; - dst += nptes * AMDGPU_GPU_PAGE_SIZE; + unsigned nptes; + bool use_cpu_update = (params->func == amdgpu_vm_cpu_set_ptes); /* walk over the address space and update the page tables */ - while (addr < end) { - pt = amdgpu_vm_get_pt(params, addr); - if (!pt) { - pr_err("PT not found, aborting update_ptes\n"); - return; - } + for (addr = start; addr < end; addr += nptes, + dst += nptes * AMDGPU_GPU_PAGE_SIZE) { + struct amdgpu_vm_pt *entry, *parent; - if (params->shadow) { - if (!pt->shadow) - return; - pt = pt->shadow; - } + amdgpu_vm_get_entry(params, addr, &entry, &parent); + if (!entry) + return -ENOENT; if ((addr & ~mask) == (end & ~mask)) nptes = end - addr; else nptes = AMDGPU_VM_PTE_COUNT(adev) - (addr & mask); - next_pe_start = amdgpu_bo_gpu_offset(pt); - next_pe_start += (addr & mask) * 8; + amdgpu_vm_handle_huge_pages(params, entry, parent, + nptes, dst, flags); + /* We don't need to update PTEs for huge pages */ + if (entry->addr & AMDGPU_PDE_PTE) + continue; - if ((cur_pe_start + 8 * cur_nptes) == next_pe_start && - ((cur_nptes + nptes) <= AMDGPU_VM_MAX_UPDATE_SIZE)) { - /* The next ptb is consecutive to current ptb. - * Don't call the update function now. - * Will update two ptbs together in future. - */ - cur_nptes += nptes; + pt = entry->bo; + if (use_cpu_update) { + pe_start = (unsigned long)amdgpu_bo_kptr(pt); } else { - params->func(params, cur_pe_start, cur_dst, cur_nptes, - AMDGPU_GPU_PAGE_SIZE, flags); - - cur_pe_start = next_pe_start; - cur_nptes = nptes; - cur_dst = dst; + if (pt->shadow) { + pe_start = amdgpu_bo_gpu_offset(pt->shadow); + pe_start += (addr & mask) * 8; + params->func(params, pe_start, dst, nptes, + AMDGPU_GPU_PAGE_SIZE, flags); + } + pe_start = amdgpu_bo_gpu_offset(pt); } - /* for next ptb*/ - addr += nptes; - dst += nptes * AMDGPU_GPU_PAGE_SIZE; + pe_start += (addr & mask) * 8; + params->func(params, pe_start, dst, nptes, + AMDGPU_GPU_PAGE_SIZE, flags); } - params->func(params, cur_pe_start, cur_dst, cur_nptes, - AMDGPU_GPU_PAGE_SIZE, flags); + return 0; } /* @@ -1119,11 +1390,14 @@ static void amdgpu_vm_update_ptes(struct amdgpu_pte_update_params *params, * @end: last PTE to handle * @dst: addr those PTEs should point to * @flags: hw mapping flags + * Returns 0 for success, -EINVAL for failure. */ -static void amdgpu_vm_frag_ptes(struct amdgpu_pte_update_params *params, +static int amdgpu_vm_frag_ptes(struct amdgpu_pte_update_params *params, uint64_t start, uint64_t end, uint64_t dst, uint64_t flags) { + int r; + /** * The MC L1 TLB supports variable sized pages, based on a fragment * field in the PTE. When this field is set to a non-zero value, page @@ -1142,38 +1416,39 @@ static void amdgpu_vm_frag_ptes(struct amdgpu_pte_update_params *params, * Userspace can support this by aligning virtual base address and * allocation size to the fragment size. */ - - /* SI and newer are optimized for 64KB */ - uint64_t frag_flags = AMDGPU_PTE_FRAG(AMDGPU_LOG2_PAGES_PER_FRAG); - uint64_t frag_align = 1 << AMDGPU_LOG2_PAGES_PER_FRAG; + unsigned pages_per_frag = params->adev->vm_manager.fragment_size; + uint64_t frag_flags = AMDGPU_PTE_FRAG(pages_per_frag); + uint64_t frag_align = 1 << pages_per_frag; uint64_t frag_start = ALIGN(start, frag_align); uint64_t frag_end = end & ~(frag_align - 1); /* system pages are non continuously */ if (params->src || !(flags & AMDGPU_PTE_VALID) || - (frag_start >= frag_end)) { - - amdgpu_vm_update_ptes(params, start, end, dst, flags); - return; - } + (frag_start >= frag_end)) + return amdgpu_vm_update_ptes(params, start, end, dst, flags); /* handle the 4K area at the beginning */ if (start != frag_start) { - amdgpu_vm_update_ptes(params, start, frag_start, - dst, flags); + r = amdgpu_vm_update_ptes(params, start, frag_start, + dst, flags); + if (r) + return r; dst += (frag_start - start) * AMDGPU_GPU_PAGE_SIZE; } /* handle the area in the middle */ - amdgpu_vm_update_ptes(params, frag_start, frag_end, dst, - flags | frag_flags); + r = amdgpu_vm_update_ptes(params, frag_start, frag_end, dst, + flags | frag_flags); + if (r) + return r; /* handle the 4K area at the end */ if (frag_end != end) { dst += (frag_end - frag_start) * AMDGPU_GPU_PAGE_SIZE; - amdgpu_vm_update_ptes(params, frag_end, end, dst, flags); + r = amdgpu_vm_update_ptes(params, frag_end, end, dst, flags); } + return r; } /** @@ -1215,12 +1490,30 @@ static int amdgpu_vm_bo_update_mapping(struct amdgpu_device *adev, params.vm = vm; params.src = src; - ring = container_of(vm->entity.sched, struct amdgpu_ring, sched); - /* sync to everything on unmapping */ if (!(flags & AMDGPU_PTE_VALID)) owner = AMDGPU_FENCE_OWNER_UNDEFINED; + if (vm->use_cpu_for_update) { + /* params.src is used as flag to indicate system Memory */ + if (pages_addr) + params.src = ~0; + + /* Wait for PT BOs to be free. PTs share the same resv. object + * as the root PD BO + */ + r = amdgpu_vm_wait_pd(adev, vm, owner); + if (unlikely(r)) + return r; + + params.func = amdgpu_vm_cpu_set_ptes; + params.pages_addr = pages_addr; + return amdgpu_vm_frag_ptes(¶ms, start, last + 1, + addr, flags); + } + + ring = container_of(vm->entity.sched, struct amdgpu_ring, sched); + nptes = last - start + 1; /* @@ -1232,6 +1525,9 @@ static int amdgpu_vm_bo_update_mapping(struct amdgpu_device *adev, /* padding, etc. */ ndw = 64; + /* one PDE write for each huge page */ + ndw += ((nptes >> adev->vm_manager.block_size) + 1) * 6; + if (src) { /* only copy commands needed */ ndw += ncmds * 7; @@ -1293,10 +1589,9 @@ static int amdgpu_vm_bo_update_mapping(struct amdgpu_device *adev, if (r) goto error_free; - params.shadow = true; - amdgpu_vm_frag_ptes(¶ms, start, last + 1, addr, flags); - params.shadow = false; - amdgpu_vm_frag_ptes(¶ms, start, last + 1, addr, flags); + r = amdgpu_vm_frag_ptes(¶ms, start, last + 1, addr, flags); + if (r) + goto error_free; amdgpu_ring_pad_ib(ring, params.ib); WARN_ON(params.ib->length_dw > ndw); @@ -1312,6 +1607,7 @@ static int amdgpu_vm_bo_update_mapping(struct amdgpu_device *adev, error_free: amdgpu_job_free(job); + amdgpu_vm_invalidate_level(&vm->root); return r; } @@ -1320,7 +1616,6 @@ error_free: * * @adev: amdgpu_device pointer * @exclusive: fence we need to sync to - * @gtt_flags: flags as they are used for GTT * @pages_addr: DMA addresses to use for mapping * @vm: requested vm * @mapping: mapped range and flags to use for the update @@ -1334,7 +1629,6 @@ error_free: */ static int amdgpu_vm_bo_split_mapping(struct amdgpu_device *adev, struct dma_fence *exclusive, - uint64_t gtt_flags, dma_addr_t *pages_addr, struct amdgpu_vm *vm, struct amdgpu_bo_va_mapping *mapping, @@ -1389,11 +1683,7 @@ static int amdgpu_vm_bo_split_mapping(struct amdgpu_device *adev, } if (pages_addr) { - if (flags == gtt_flags) - src = adev->gart.table_addr + - (addr >> AMDGPU_GPU_PAGE_SHIFT) * 8; - else - max_entries = min(max_entries, 16ull * 1024ull); + max_entries = min(max_entries, 16ull * 1024ull); addr = 0; } else if (flags & AMDGPU_PTE_VALID) { addr += adev->vm_manager.vram_base_offset; @@ -1434,50 +1724,45 @@ int amdgpu_vm_bo_update(struct amdgpu_device *adev, struct amdgpu_bo_va *bo_va, bool clear) { - struct amdgpu_vm *vm = bo_va->vm; + struct amdgpu_bo *bo = bo_va->base.bo; + struct amdgpu_vm *vm = bo_va->base.vm; struct amdgpu_bo_va_mapping *mapping; dma_addr_t *pages_addr = NULL; - uint64_t gtt_flags, flags; struct ttm_mem_reg *mem; struct drm_mm_node *nodes; struct dma_fence *exclusive; + uint64_t flags; int r; - if (clear || !bo_va->bo) { + if (clear || !bo_va->base.bo) { mem = NULL; nodes = NULL; exclusive = NULL; } else { struct ttm_dma_tt *ttm; - mem = &bo_va->bo->tbo.mem; + mem = &bo_va->base.bo->tbo.mem; nodes = mem->mm_node; if (mem->mem_type == TTM_PL_TT) { - ttm = container_of(bo_va->bo->tbo.ttm, struct - ttm_dma_tt, ttm); + ttm = container_of(bo_va->base.bo->tbo.ttm, + struct ttm_dma_tt, ttm); pages_addr = ttm->dma_address; } - exclusive = reservation_object_get_excl(bo_va->bo->tbo.resv); + exclusive = reservation_object_get_excl(bo->tbo.resv); } - if (bo_va->bo) { - flags = amdgpu_ttm_tt_pte_flags(adev, bo_va->bo->tbo.ttm, mem); - gtt_flags = (amdgpu_ttm_is_bound(bo_va->bo->tbo.ttm) && - adev == amdgpu_ttm_adev(bo_va->bo->tbo.bdev)) ? - flags : 0; - } else { + if (bo) + flags = amdgpu_ttm_tt_pte_flags(adev, bo->tbo.ttm, mem); + else flags = 0x0; - gtt_flags = ~0x0; - } spin_lock(&vm->status_lock); - if (!list_empty(&bo_va->vm_status)) + if (!list_empty(&bo_va->base.vm_status)) list_splice_init(&bo_va->valids, &bo_va->invalids); spin_unlock(&vm->status_lock); list_for_each_entry(mapping, &bo_va->invalids, list) { - r = amdgpu_vm_bo_split_mapping(adev, exclusive, - gtt_flags, pages_addr, vm, + r = amdgpu_vm_bo_split_mapping(adev, exclusive, pages_addr, vm, mapping, flags, nodes, &bo_va->last_pt_update); if (r) @@ -1494,11 +1779,17 @@ int amdgpu_vm_bo_update(struct amdgpu_device *adev, spin_lock(&vm->status_lock); list_splice_init(&bo_va->invalids, &bo_va->valids); - list_del_init(&bo_va->vm_status); + list_del_init(&bo_va->base.vm_status); if (clear) - list_add(&bo_va->vm_status, &vm->cleared); + list_add(&bo_va->base.vm_status, &vm->cleared); spin_unlock(&vm->status_lock); + if (vm->use_cpu_for_update) { + /* Flush HDP */ + mb(); + amdgpu_gart_flush_gpu_tlb(adev, 0); + } + return 0; } @@ -1652,15 +1943,19 @@ int amdgpu_vm_clear_freed(struct amdgpu_device *adev, struct amdgpu_bo_va_mapping *mapping; struct dma_fence *f = NULL; int r; + uint64_t init_pte_value = 0; while (!list_empty(&vm->freed)) { mapping = list_first_entry(&vm->freed, struct amdgpu_bo_va_mapping, list); list_del(&mapping->list); + if (vm->pte_support_ats) + init_pte_value = AMDGPU_PTE_SYSTEM; + r = amdgpu_vm_bo_update_mapping(adev, NULL, 0, NULL, vm, mapping->start, mapping->last, - 0, 0, &f); + init_pte_value, 0, &f); amdgpu_vm_free_mapping(adev, vm, mapping, f); if (r) { dma_fence_put(f); @@ -1680,26 +1975,26 @@ int amdgpu_vm_clear_freed(struct amdgpu_device *adev, } /** - * amdgpu_vm_clear_invalids - clear invalidated BOs in the PT + * amdgpu_vm_clear_moved - clear moved BOs in the PT * * @adev: amdgpu_device pointer * @vm: requested vm * - * Make sure all invalidated BOs are cleared in the PT. + * Make sure all moved BOs are cleared in the PT. * Returns 0 for success. * * PTs have to be reserved and mutex must be locked! */ -int amdgpu_vm_clear_invalids(struct amdgpu_device *adev, - struct amdgpu_vm *vm, struct amdgpu_sync *sync) +int amdgpu_vm_clear_moved(struct amdgpu_device *adev, struct amdgpu_vm *vm, + struct amdgpu_sync *sync) { struct amdgpu_bo_va *bo_va = NULL; int r = 0; spin_lock(&vm->status_lock); - while (!list_empty(&vm->invalidated)) { - bo_va = list_first_entry(&vm->invalidated, - struct amdgpu_bo_va, vm_status); + while (!list_empty(&vm->moved)) { + bo_va = list_first_entry(&vm->moved, + struct amdgpu_bo_va, base.vm_status); spin_unlock(&vm->status_lock); r = amdgpu_vm_bo_update(adev, bo_va, true); @@ -1739,16 +2034,17 @@ struct amdgpu_bo_va *amdgpu_vm_bo_add(struct amdgpu_device *adev, if (bo_va == NULL) { return NULL; } - bo_va->vm = vm; - bo_va->bo = bo; + bo_va->base.vm = vm; + bo_va->base.bo = bo; + INIT_LIST_HEAD(&bo_va->base.bo_list); + INIT_LIST_HEAD(&bo_va->base.vm_status); + bo_va->ref_count = 1; - INIT_LIST_HEAD(&bo_va->bo_list); INIT_LIST_HEAD(&bo_va->valids); INIT_LIST_HEAD(&bo_va->invalids); - INIT_LIST_HEAD(&bo_va->vm_status); if (bo) - list_add_tail(&bo_va->bo_list, &bo->va); + list_add_tail(&bo_va->base.bo_list, &bo->va); return bo_va; } @@ -1773,7 +2069,8 @@ int amdgpu_vm_bo_map(struct amdgpu_device *adev, uint64_t size, uint64_t flags) { struct amdgpu_bo_va_mapping *mapping, *tmp; - struct amdgpu_vm *vm = bo_va->vm; + struct amdgpu_bo *bo = bo_va->base.bo; + struct amdgpu_vm *vm = bo_va->base.vm; uint64_t eaddr; /* validate the parameters */ @@ -1784,7 +2081,7 @@ int amdgpu_vm_bo_map(struct amdgpu_device *adev, /* make sure object fit at this offset */ eaddr = saddr + size - 1; if (saddr >= eaddr || - (bo_va->bo && offset + size > amdgpu_bo_size(bo_va->bo))) + (bo && offset + size > amdgpu_bo_size(bo))) return -EINVAL; saddr /= AMDGPU_GPU_PAGE_SIZE; @@ -1794,7 +2091,7 @@ int amdgpu_vm_bo_map(struct amdgpu_device *adev, if (tmp) { /* bo and tmp overlap, invalid addr */ dev_err(adev->dev, "bo %p va 0x%010Lx-0x%010Lx conflict with " - "0x%010Lx-0x%010Lx\n", bo_va->bo, saddr, eaddr, + "0x%010Lx-0x%010Lx\n", bo, saddr, eaddr, tmp->start, tmp->last + 1); return -EINVAL; } @@ -1839,7 +2136,8 @@ int amdgpu_vm_bo_replace_map(struct amdgpu_device *adev, uint64_t size, uint64_t flags) { struct amdgpu_bo_va_mapping *mapping; - struct amdgpu_vm *vm = bo_va->vm; + struct amdgpu_bo *bo = bo_va->base.bo; + struct amdgpu_vm *vm = bo_va->base.vm; uint64_t eaddr; int r; @@ -1851,7 +2149,7 @@ int amdgpu_vm_bo_replace_map(struct amdgpu_device *adev, /* make sure object fit at this offset */ eaddr = saddr + size - 1; if (saddr >= eaddr || - (bo_va->bo && offset + size > amdgpu_bo_size(bo_va->bo))) + (bo && offset + size > amdgpu_bo_size(bo))) return -EINVAL; /* Allocate all the needed memory */ @@ -1859,7 +2157,7 @@ int amdgpu_vm_bo_replace_map(struct amdgpu_device *adev, if (!mapping) return -ENOMEM; - r = amdgpu_vm_bo_clear_mappings(adev, bo_va->vm, saddr, size); + r = amdgpu_vm_bo_clear_mappings(adev, bo_va->base.vm, saddr, size); if (r) { kfree(mapping); return r; @@ -1899,7 +2197,7 @@ int amdgpu_vm_bo_unmap(struct amdgpu_device *adev, uint64_t saddr) { struct amdgpu_bo_va_mapping *mapping; - struct amdgpu_vm *vm = bo_va->vm; + struct amdgpu_vm *vm = bo_va->base.vm; bool valid = true; saddr /= AMDGPU_GPU_PAGE_SIZE; @@ -2047,12 +2345,12 @@ void amdgpu_vm_bo_rmv(struct amdgpu_device *adev, struct amdgpu_bo_va *bo_va) { struct amdgpu_bo_va_mapping *mapping, *next; - struct amdgpu_vm *vm = bo_va->vm; + struct amdgpu_vm *vm = bo_va->base.vm; - list_del(&bo_va->bo_list); + list_del(&bo_va->base.bo_list); spin_lock(&vm->status_lock); - list_del(&bo_va->vm_status); + list_del(&bo_va->base.vm_status); spin_unlock(&vm->status_lock); list_for_each_entry_safe(mapping, next, &bo_va->valids, list) { @@ -2084,13 +2382,14 @@ void amdgpu_vm_bo_rmv(struct amdgpu_device *adev, void amdgpu_vm_bo_invalidate(struct amdgpu_device *adev, struct amdgpu_bo *bo) { - struct amdgpu_bo_va *bo_va; - - list_for_each_entry(bo_va, &bo->va, bo_list) { - spin_lock(&bo_va->vm->status_lock); - if (list_empty(&bo_va->vm_status)) - list_add(&bo_va->vm_status, &bo_va->vm->invalidated); - spin_unlock(&bo_va->vm->status_lock); + struct amdgpu_vm_bo_base *bo_base; + + list_for_each_entry(bo_base, &bo->va, bo_list) { + spin_lock(&bo_base->vm->status_lock); + if (list_empty(&bo_base->vm_status)) + list_add(&bo_base->vm_status, + &bo_base->vm->moved); + spin_unlock(&bo_base->vm->status_lock); } } @@ -2108,12 +2407,26 @@ static uint32_t amdgpu_vm_get_block_size(uint64_t vm_size) } /** - * amdgpu_vm_adjust_size - adjust vm size and block size + * amdgpu_vm_set_fragment_size - adjust fragment size in PTE + * + * @adev: amdgpu_device pointer + * @fragment_size_default: the default fragment size if it's set auto + */ +void amdgpu_vm_set_fragment_size(struct amdgpu_device *adev, uint32_t fragment_size_default) +{ + if (amdgpu_vm_fragment_size == -1) + adev->vm_manager.fragment_size = fragment_size_default; + else + adev->vm_manager.fragment_size = amdgpu_vm_fragment_size; +} + +/** + * amdgpu_vm_adjust_size - adjust vm size, block size and fragment size * * @adev: amdgpu_device pointer * @vm_size: the default vm size if it's set auto */ -void amdgpu_vm_adjust_size(struct amdgpu_device *adev, uint64_t vm_size) +void amdgpu_vm_adjust_size(struct amdgpu_device *adev, uint64_t vm_size, uint32_t fragment_size_default) { /* adjust vm size firstly */ if (amdgpu_vm_size == -1) @@ -2128,8 +2441,11 @@ void amdgpu_vm_adjust_size(struct amdgpu_device *adev, uint64_t vm_size) else adev->vm_manager.block_size = amdgpu_vm_block_size; - DRM_INFO("vm size is %llu GB, block size is %u-bit\n", - adev->vm_manager.vm_size, adev->vm_manager.block_size); + amdgpu_vm_set_fragment_size(adev, fragment_size_default); + + DRM_INFO("vm size is %llu GB, block size is %u-bit, fragment size is %u-bit\n", + adev->vm_manager.vm_size, adev->vm_manager.block_size, + adev->vm_manager.fragment_size); } /** @@ -2137,22 +2453,28 @@ void amdgpu_vm_adjust_size(struct amdgpu_device *adev, uint64_t vm_size) * * @adev: amdgpu_device pointer * @vm: requested vm + * @vm_context: Indicates if it GFX or Compute context * * Init @vm fields. */ -int amdgpu_vm_init(struct amdgpu_device *adev, struct amdgpu_vm *vm) +int amdgpu_vm_init(struct amdgpu_device *adev, struct amdgpu_vm *vm, + int vm_context) { const unsigned align = min(AMDGPU_VM_PTB_ALIGN_SIZE, AMDGPU_VM_PTE_COUNT(adev) * 8); unsigned ring_instance; struct amdgpu_ring *ring; struct amd_sched_rq *rq; - int r; + int r, i; + u64 flags; + uint64_t init_pde_value = 0; - vm->va = RB_ROOT; + vm->va = RB_ROOT_CACHED; vm->client_id = atomic64_inc_return(&adev->vm_manager.client_counter); + for (i = 0; i < AMDGPU_MAX_VMHUBS; i++) + vm->reserved_vmid[i] = NULL; spin_lock_init(&vm->status_lock); - INIT_LIST_HEAD(&vm->invalidated); + INIT_LIST_HEAD(&vm->moved); INIT_LIST_HEAD(&vm->cleared); INIT_LIST_HEAD(&vm->freed); @@ -2167,15 +2489,37 @@ int amdgpu_vm_init(struct amdgpu_device *adev, struct amdgpu_vm *vm) if (r) return r; + vm->pte_support_ats = false; + + if (vm_context == AMDGPU_VM_CONTEXT_COMPUTE) { + vm->use_cpu_for_update = !!(adev->vm_manager.vm_update_mode & + AMDGPU_VM_USE_CPU_FOR_COMPUTE); + + if (adev->asic_type == CHIP_RAVEN) { + vm->pte_support_ats = true; + init_pde_value = AMDGPU_PTE_SYSTEM | AMDGPU_PDE_PTE; + } + } else + vm->use_cpu_for_update = !!(adev->vm_manager.vm_update_mode & + AMDGPU_VM_USE_CPU_FOR_GFX); + DRM_DEBUG_DRIVER("VM update mode is %s\n", + vm->use_cpu_for_update ? "CPU" : "SDMA"); + WARN_ONCE((vm->use_cpu_for_update & !amdgpu_vm_is_large_bar(adev)), + "CPU update of VM recommended only for large BAR system\n"); vm->last_dir_update = NULL; + flags = AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS | + AMDGPU_GEM_CREATE_VRAM_CLEARED; + if (vm->use_cpu_for_update) + flags |= AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED; + else + flags |= (AMDGPU_GEM_CREATE_NO_CPU_ACCESS | + AMDGPU_GEM_CREATE_SHADOW); + r = amdgpu_bo_create(adev, amdgpu_vm_bo_size(adev, 0), align, true, AMDGPU_GEM_DOMAIN_VRAM, - AMDGPU_GEM_CREATE_NO_CPU_ACCESS | - AMDGPU_GEM_CREATE_SHADOW | - AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS | - AMDGPU_GEM_CREATE_VRAM_CLEARED, - NULL, NULL, &vm->root.bo); + flags, + NULL, NULL, init_pde_value, &vm->root.bo); if (r) goto error_free_sched_entity; @@ -2184,6 +2528,13 @@ int amdgpu_vm_init(struct amdgpu_device *adev, struct amdgpu_vm *vm) goto error_free_root; vm->last_eviction_counter = atomic64_read(&adev->num_evictions); + + if (vm->use_cpu_for_update) { + r = amdgpu_bo_kmap(vm->root.bo, NULL); + if (r) + goto error_free_root; + } + amdgpu_bo_unreserve(vm->root.bo); return 0; @@ -2219,7 +2570,7 @@ static void amdgpu_vm_free_levels(struct amdgpu_vm_pt *level) for (i = 0; i <= level->last_entry_used; i++) amdgpu_vm_free_levels(&level->entries[i]); - drm_free_large(level->entries); + kvfree(level->entries); } /** @@ -2235,13 +2586,15 @@ void amdgpu_vm_fini(struct amdgpu_device *adev, struct amdgpu_vm *vm) { struct amdgpu_bo_va_mapping *mapping, *tmp; bool prt_fini_needed = !!adev->gart.gart_funcs->set_prt; + int i; amd_sched_entity_fini(vm->entity.sched, &vm->entity); - if (!RB_EMPTY_ROOT(&vm->va)) { + if (!RB_EMPTY_ROOT(&vm->va.rb_root)) { dev_err(adev->dev, "still active bo inside vm\n"); } - rbtree_postorder_for_each_entry_safe(mapping, tmp, &vm->va, rb) { + rbtree_postorder_for_each_entry_safe(mapping, tmp, + &vm->va.rb_root, rb) { list_del(&mapping->list); amdgpu_vm_it_remove(mapping, &vm->va); kfree(mapping); @@ -2258,6 +2611,8 @@ void amdgpu_vm_fini(struct amdgpu_device *adev, struct amdgpu_vm *vm) amdgpu_vm_free_levels(&vm->root); dma_fence_put(vm->last_dir_update); + for (i = 0; i < AMDGPU_MAX_VMHUBS; i++) + amdgpu_vm_free_reserved_vmid(adev, vm, i); } /** @@ -2277,6 +2632,7 @@ void amdgpu_vm_manager_init(struct amdgpu_device *adev) mutex_init(&id_mgr->lock); INIT_LIST_HEAD(&id_mgr->ids_lru); + atomic_set(&id_mgr->reserved_vmid_num, 0); /* skip over VMID 0, since it is the system VM */ for (j = 1; j < id_mgr->num_ids; ++j) { @@ -2295,6 +2651,23 @@ void amdgpu_vm_manager_init(struct amdgpu_device *adev) atomic64_set(&adev->vm_manager.client_counter, 0); spin_lock_init(&adev->vm_manager.prt_lock); atomic_set(&adev->vm_manager.num_prt_users, 0); + + /* If not overridden by the user, by default, only in large BAR systems + * Compute VM tables will be updated by CPU + */ +#ifdef CONFIG_X86_64 + if (amdgpu_vm_update_mode == -1) { + if (amdgpu_vm_is_large_bar(adev)) + adev->vm_manager.vm_update_mode = + AMDGPU_VM_USE_CPU_FOR_COMPUTE; + else + adev->vm_manager.vm_update_mode = 0; + } else + adev->vm_manager.vm_update_mode = amdgpu_vm_update_mode; +#else + adev->vm_manager.vm_update_mode = 0; +#endif + } /** @@ -2322,3 +2695,28 @@ void amdgpu_vm_manager_fini(struct amdgpu_device *adev) } } } + +int amdgpu_vm_ioctl(struct drm_device *dev, void *data, struct drm_file *filp) +{ + union drm_amdgpu_vm *args = data; + struct amdgpu_device *adev = dev->dev_private; + struct amdgpu_fpriv *fpriv = filp->driver_priv; + int r; + + switch (args->in.op) { + case AMDGPU_VM_OP_RESERVE_VMID: + /* current, we only have requirement to reserve vmid from gfxhub */ + r = amdgpu_vm_alloc_reserved_vmid(adev, &fpriv->vm, + AMDGPU_GFXHUB); + if (r) + return r; + break; + case AMDGPU_VM_OP_UNRESERVE_VMID: + amdgpu_vm_free_reserved_vmid(adev, &fpriv->vm, AMDGPU_GFXHUB); + break; + default: + return -EINVAL; + } + + return 0; +} |