diff options
author | Mark Brown <broonie@kernel.org> | 2016-11-04 12:16:38 -0600 |
---|---|---|
committer | Mark Brown <broonie@kernel.org> | 2016-11-04 12:16:38 -0600 |
commit | cc9b94029e9ef51787af908e9856b1eed314bc00 (patch) | |
tree | 9675310b89d0f6fb1f7bd9423f0638c4ee5226fd /drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c | |
parent | 13bed58ce8748d430a26e353a09b89f9d613a71f (diff) | |
parent | 1b5b42216469b05ef4b5916cb40b127dfab1da88 (diff) |
Merge branch 'topic/error' of git://git.kernel.org/pub/scm/linux/kernel/git/broonie/regulator into regulator-fixed
Diffstat (limited to 'drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c')
-rw-r--r-- | drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c | 815 |
1 files changed, 518 insertions, 297 deletions
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c index 9f36ed30ba11..06f24322e7c3 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c @@ -25,6 +25,7 @@ * Alex Deucher * Jerome Glisse */ +#include <linux/fence-array.h> #include <drm/drmP.h> #include <drm/amdgpu_drm.h> #include "amdgpu.h" @@ -50,19 +51,22 @@ * SI supports 16. */ -/* Special value that no flush is necessary */ -#define AMDGPU_VM_NO_FLUSH (~0ll) - /* Local structure. Encapsulate some VM table update parameters to reduce * the number of function parameters */ -struct amdgpu_vm_update_params { +struct amdgpu_pte_update_params { + /* amdgpu device we do this update for */ + struct amdgpu_device *adev; /* address where to copy page table entries from */ uint64_t src; - /* DMA addresses to use for mapping */ - dma_addr_t *pages_addr; /* indirect buffer to fill with commands */ struct amdgpu_ib *ib; + /* Function which actually does the update */ + void (*func)(struct amdgpu_pte_update_params *params, uint64_t pe, + uint64_t addr, unsigned count, uint32_t incr, + uint32_t flags); + /* indicate update pt or its shadow */ + bool shadow; }; /** @@ -114,16 +118,26 @@ void amdgpu_vm_get_pd_bo(struct amdgpu_vm *vm, /** * amdgpu_vm_get_bos - add the vm BOs to a duplicates list * + * @adev: amdgpu device pointer * @vm: vm providing the BOs * @duplicates: head of duplicates list * * Add the page directory to the BO duplicates list * for command submission. */ -void amdgpu_vm_get_pt_bos(struct amdgpu_vm *vm, struct list_head *duplicates) +void amdgpu_vm_get_pt_bos(struct amdgpu_device *adev, struct amdgpu_vm *vm, + struct list_head *duplicates) { + uint64_t num_evictions; unsigned i; + /* We only need to validate the page tables + * if they aren't already valid. + */ + num_evictions = atomic64_read(&adev->num_evictions); + if (num_evictions == vm->last_eviction_counter) + return; + /* add the vm page table to the list */ for (i = 0; i <= vm->max_pde_used; ++i) { struct amdgpu_bo_list_entry *entry = &vm->page_tables[i].entry; @@ -162,6 +176,13 @@ void amdgpu_vm_move_pt_bos_in_lru(struct amdgpu_device *adev, spin_unlock(&glob->lru_lock); } +static bool amdgpu_vm_is_gpu_reset(struct amdgpu_device *adev, + struct amdgpu_vm_id *id) +{ + return id->current_gpu_reset_count != + atomic_read(&adev->gpu_reset_counter) ? true : false; +} + /** * amdgpu_vm_grab_id - allocate the next free VMID * @@ -174,18 +195,67 @@ void amdgpu_vm_move_pt_bos_in_lru(struct amdgpu_device *adev, */ int amdgpu_vm_grab_id(struct amdgpu_vm *vm, struct amdgpu_ring *ring, struct amdgpu_sync *sync, struct fence *fence, - unsigned *vm_id, uint64_t *vm_pd_addr) + struct amdgpu_job *job) { - uint64_t pd_addr = amdgpu_bo_gpu_offset(vm->page_directory); struct amdgpu_device *adev = ring->adev; + uint64_t fence_context = adev->fence_context + ring->idx; struct fence *updates = sync->last_vm_update; - struct amdgpu_vm_id *id; - unsigned i = ring->idx; - int r; + struct amdgpu_vm_id *id, *idle; + struct fence **fences; + unsigned i; + int r = 0; + + fences = kmalloc_array(sizeof(void *), adev->vm_manager.num_ids, + GFP_KERNEL); + if (!fences) + return -ENOMEM; mutex_lock(&adev->vm_manager.lock); + /* Check if we have an idle VMID */ + i = 0; + list_for_each_entry(idle, &adev->vm_manager.ids_lru, list) { + fences[i] = amdgpu_sync_peek_fence(&idle->active, ring); + if (!fences[i]) + break; + ++i; + } + + /* If we can't find a idle VMID to use, wait till one becomes available */ + if (&idle->list == &adev->vm_manager.ids_lru) { + u64 fence_context = adev->vm_manager.fence_context + ring->idx; + unsigned seqno = ++adev->vm_manager.seqno[ring->idx]; + struct fence_array *array; + unsigned j; + + for (j = 0; j < i; ++j) + fence_get(fences[j]); + + array = fence_array_create(i, fences, fence_context, + seqno, true); + if (!array) { + for (j = 0; j < i; ++j) + fence_put(fences[j]); + kfree(fences); + r = -ENOMEM; + goto error; + } + + + r = amdgpu_sync_fence(ring->adev, sync, &array->base); + fence_put(&array->base); + if (r) + goto error; + + mutex_unlock(&adev->vm_manager.lock); + return 0; + + } + kfree(fences); + + job->vm_needs_flush = true; /* Check if we can use a VMID already assigned to this VM */ + i = ring->idx; do { struct fence *flushed; @@ -196,67 +266,52 @@ int amdgpu_vm_grab_id(struct amdgpu_vm *vm, struct amdgpu_ring *ring, /* Check all the prerequisites to using this VMID */ if (!id) continue; + if (amdgpu_vm_is_gpu_reset(adev, id)) + continue; if (atomic64_read(&id->owner) != vm->client_id) continue; - if (pd_addr != id->pd_gpu_addr) + if (job->vm_pd_addr != id->pd_gpu_addr) continue; - if (id->last_user != ring && - (!id->last_flush || !fence_is_signaled(id->last_flush))) + if (!id->last_flush) continue; - flushed = id->flushed_updates; - if (updates && (!flushed || fence_is_later(updates, flushed))) + if (id->last_flush->context != fence_context && + !fence_is_signaled(id->last_flush)) continue; - /* Good we can use this VMID */ - if (id->last_user == ring) { - r = amdgpu_sync_fence(ring->adev, sync, - id->first); - if (r) - goto error; - } + flushed = id->flushed_updates; + if (updates && + (!flushed || fence_is_later(updates, flushed))) + continue; - /* And remember this submission as user of the VMID */ + /* Good we can use this VMID. Remember this submission as + * user of the VMID. + */ r = amdgpu_sync_fence(ring->adev, &id->active, fence); if (r) goto error; + id->current_gpu_reset_count = atomic_read(&adev->gpu_reset_counter); list_move_tail(&id->list, &adev->vm_manager.ids_lru); vm->ids[ring->idx] = id; - *vm_id = id - adev->vm_manager.ids; - *vm_pd_addr = AMDGPU_VM_NO_FLUSH; - trace_amdgpu_vm_grab_id(vm, ring->idx, *vm_id, *vm_pd_addr); + job->vm_id = id - adev->vm_manager.ids; + job->vm_needs_flush = false; + trace_amdgpu_vm_grab_id(vm, ring->idx, job); mutex_unlock(&adev->vm_manager.lock); return 0; } while (i != ring->idx); - id = list_first_entry(&adev->vm_manager.ids_lru, - struct amdgpu_vm_id, - list); - - if (!amdgpu_sync_is_idle(&id->active)) { - struct list_head *head = &adev->vm_manager.ids_lru; - struct amdgpu_vm_id *tmp; - - list_for_each_entry_safe(id, tmp, &adev->vm_manager.ids_lru, - list) { - if (amdgpu_sync_is_idle(&id->active)) { - list_move(&id->list, head); - head = &id->list; - } - } - id = list_first_entry(&adev->vm_manager.ids_lru, - struct amdgpu_vm_id, - list); - } + /* Still no ID to use? Then use the idle one found earlier */ + id = idle; - r = amdgpu_sync_cycle_fences(sync, &id->active, fence); + /* Remember this submission as user of the VMID */ + r = amdgpu_sync_fence(ring->adev, &id->active, fence); if (r) goto error; @@ -269,22 +324,46 @@ int amdgpu_vm_grab_id(struct amdgpu_vm *vm, struct amdgpu_ring *ring, fence_put(id->flushed_updates); id->flushed_updates = fence_get(updates); - id->pd_gpu_addr = pd_addr; - + id->pd_gpu_addr = job->vm_pd_addr; + id->current_gpu_reset_count = atomic_read(&adev->gpu_reset_counter); list_move_tail(&id->list, &adev->vm_manager.ids_lru); - id->last_user = ring; atomic64_set(&id->owner, vm->client_id); vm->ids[ring->idx] = id; - *vm_id = id - adev->vm_manager.ids; - *vm_pd_addr = pd_addr; - trace_amdgpu_vm_grab_id(vm, ring->idx, *vm_id, *vm_pd_addr); + job->vm_id = id - adev->vm_manager.ids; + trace_amdgpu_vm_grab_id(vm, ring->idx, job); error: mutex_unlock(&adev->vm_manager.lock); return r; } +static bool amdgpu_vm_ring_has_compute_vm_bug(struct amdgpu_ring *ring) +{ + struct amdgpu_device *adev = ring->adev; + const struct amdgpu_ip_block_version *ip_block; + + if (ring->type != AMDGPU_RING_TYPE_COMPUTE) + /* only compute rings */ + return false; + + ip_block = amdgpu_get_ip_block(adev, AMD_IP_BLOCK_TYPE_GFX); + if (!ip_block) + return false; + + if (ip_block->major <= 7) { + /* gfx7 has no workaround */ + return true; + } else if (ip_block->major == 8) { + if (adev->gfx.mec_fw_version >= 673) + /* gfx8 is fixed in MEC firmware 673 */ + return false; + else + return true; + } + return false; +} + /** * amdgpu_vm_flush - hardware flush the vm * @@ -294,59 +373,52 @@ error: * * Emit a VM flush when it is necessary. */ -int amdgpu_vm_flush(struct amdgpu_ring *ring, - unsigned vm_id, uint64_t pd_addr, - uint32_t gds_base, uint32_t gds_size, - uint32_t gws_base, uint32_t gws_size, - uint32_t oa_base, uint32_t oa_size) +int amdgpu_vm_flush(struct amdgpu_ring *ring, struct amdgpu_job *job) { struct amdgpu_device *adev = ring->adev; - struct amdgpu_vm_id *id = &adev->vm_manager.ids[vm_id]; + struct amdgpu_vm_id *id = &adev->vm_manager.ids[job->vm_id]; bool gds_switch_needed = ring->funcs->emit_gds_switch && ( - id->gds_base != gds_base || - id->gds_size != gds_size || - id->gws_base != gws_base || - id->gws_size != gws_size || - id->oa_base != oa_base || - id->oa_size != oa_size); + id->gds_base != job->gds_base || + id->gds_size != job->gds_size || + id->gws_base != job->gws_base || + id->gws_size != job->gws_size || + id->oa_base != job->oa_base || + id->oa_size != job->oa_size); int r; if (ring->funcs->emit_pipeline_sync && ( - pd_addr != AMDGPU_VM_NO_FLUSH || gds_switch_needed || - ring->type == AMDGPU_RING_TYPE_COMPUTE)) + job->vm_needs_flush || gds_switch_needed || + amdgpu_vm_ring_has_compute_vm_bug(ring))) amdgpu_ring_emit_pipeline_sync(ring); - if (ring->funcs->emit_vm_flush && - pd_addr != AMDGPU_VM_NO_FLUSH) { + if (ring->funcs->emit_vm_flush && (job->vm_needs_flush || + amdgpu_vm_is_gpu_reset(adev, id))) { struct fence *fence; - trace_amdgpu_vm_flush(pd_addr, ring->idx, vm_id); - amdgpu_ring_emit_vm_flush(ring, vm_id, pd_addr); + trace_amdgpu_vm_flush(job->vm_pd_addr, ring->idx, job->vm_id); + amdgpu_ring_emit_vm_flush(ring, job->vm_id, job->vm_pd_addr); + + r = amdgpu_fence_emit(ring, &fence); + if (r) + return r; mutex_lock(&adev->vm_manager.lock); - if ((id->pd_gpu_addr == pd_addr) && (id->last_user == ring)) { - r = amdgpu_fence_emit(ring, &fence); - if (r) { - mutex_unlock(&adev->vm_manager.lock); - return r; - } - fence_put(id->last_flush); - id->last_flush = fence; - } + fence_put(id->last_flush); + id->last_flush = fence; mutex_unlock(&adev->vm_manager.lock); } if (gds_switch_needed) { - id->gds_base = gds_base; - id->gds_size = gds_size; - id->gws_base = gws_base; - id->gws_size = gws_size; - id->oa_base = oa_base; - id->oa_size = oa_size; - amdgpu_ring_emit_gds_switch(ring, vm_id, - gds_base, gds_size, - gws_base, gws_size, - oa_base, oa_size); + id->gds_base = job->gds_base; + id->gds_size = job->gds_size; + id->gws_base = job->gws_base; + id->gws_size = job->gws_size; + id->oa_base = job->oa_base; + id->oa_size = job->oa_size; + amdgpu_ring_emit_gds_switch(ring, job->vm_id, + job->gds_base, job->gds_size, + job->gws_base, job->gws_size, + job->oa_base, job->oa_size); } return 0; @@ -398,10 +470,9 @@ struct amdgpu_bo_va *amdgpu_vm_bo_find(struct amdgpu_vm *vm, } /** - * amdgpu_vm_update_pages - helper to call the right asic function + * amdgpu_vm_do_set_ptes - helper to call the right asic function * - * @adev: amdgpu_device pointer - * @vm_update_params: see amdgpu_vm_update_params definition + * @params: see amdgpu_pte_update_params definition * @pe: addr of the page entry * @addr: dst addr to write into pe * @count: number of page entries to update @@ -411,32 +482,46 @@ struct amdgpu_bo_va *amdgpu_vm_bo_find(struct amdgpu_vm *vm, * Traces the parameters and calls the right asic functions * to setup the page table using the DMA. */ -static void amdgpu_vm_update_pages(struct amdgpu_device *adev, - struct amdgpu_vm_update_params - *vm_update_params, +static void amdgpu_vm_do_set_ptes(struct amdgpu_pte_update_params *params, + uint64_t pe, uint64_t addr, + unsigned count, uint32_t incr, + uint32_t flags) +{ + trace_amdgpu_vm_set_ptes(pe, addr, count, incr, flags); + + if (count < 3) { + amdgpu_vm_write_pte(params->adev, params->ib, pe, + addr | flags, count, incr); + + } else { + amdgpu_vm_set_pte_pde(params->adev, params->ib, pe, addr, + count, incr, flags); + } +} + +/** + * amdgpu_vm_do_copy_ptes - copy the PTEs from the GART + * + * @params: see amdgpu_pte_update_params definition + * @pe: addr of the page entry + * @addr: dst addr to write into pe + * @count: number of page entries to update + * @incr: increase next addr by incr bytes + * @flags: hw access flags + * + * Traces the parameters and calls the DMA function to copy the PTEs. + */ +static void amdgpu_vm_do_copy_ptes(struct amdgpu_pte_update_params *params, uint64_t pe, uint64_t addr, unsigned count, uint32_t incr, uint32_t flags) { - trace_amdgpu_vm_set_page(pe, addr, count, incr, flags); + uint64_t src = (params->src + (addr >> 12) * 8); - if (vm_update_params->src) { - amdgpu_vm_copy_pte(adev, vm_update_params->ib, - pe, (vm_update_params->src + (addr >> 12) * 8), count); - } else if (vm_update_params->pages_addr) { - amdgpu_vm_write_pte(adev, vm_update_params->ib, - vm_update_params->pages_addr, - pe, addr, count, incr, flags); + trace_amdgpu_vm_copy_ptes(pe, src, count); - } else if (count < 3) { - amdgpu_vm_write_pte(adev, vm_update_params->ib, NULL, pe, addr, - count, incr, flags); - - } else { - amdgpu_vm_set_pte_pde(adev, vm_update_params->ib, pe, addr, - count, incr, flags); - } + amdgpu_vm_copy_pte(params->adev, params->ib, pe, src, count); } /** @@ -454,12 +539,11 @@ static int amdgpu_vm_clear_bo(struct amdgpu_device *adev, struct amdgpu_ring *ring; struct fence *fence = NULL; struct amdgpu_job *job; - struct amdgpu_vm_update_params vm_update_params; + struct amdgpu_pte_update_params params; unsigned entries; uint64_t addr; int r; - memset(&vm_update_params, 0, sizeof(vm_update_params)); ring = container_of(vm->entity.sched, struct amdgpu_ring, sched); r = reservation_object_reserve_shared(bo->tbo.resv); @@ -470,6 +554,10 @@ static int amdgpu_vm_clear_bo(struct amdgpu_device *adev, if (r) goto error; + r = amdgpu_ttm_bind(&bo->tbo, &bo->tbo.mem); + if (r) + goto error; + addr = amdgpu_bo_gpu_offset(bo); entries = amdgpu_bo_size(bo) / 8; @@ -477,9 +565,10 @@ static int amdgpu_vm_clear_bo(struct amdgpu_device *adev, if (r) goto error; - vm_update_params.ib = &job->ibs[0]; - amdgpu_vm_update_pages(adev, &vm_update_params, addr, 0, entries, - 0, 0); + memset(¶ms, 0, sizeof(params)); + params.adev = adev; + params.ib = &job->ibs[0]; + amdgpu_vm_do_set_ptes(¶ms, addr, 0, entries, 0, 0); amdgpu_ring_pad_ib(ring, &job->ibs[0]); WARN_ON(job->ibs[0].length_dw > 64); @@ -508,55 +597,46 @@ error: * Look up the physical address of the page that the pte resolves * to and return the pointer for the page table entry. */ -uint64_t amdgpu_vm_map_gart(const dma_addr_t *pages_addr, uint64_t addr) +static uint64_t amdgpu_vm_map_gart(const dma_addr_t *pages_addr, uint64_t addr) { uint64_t result; - if (pages_addr) { - /* page table offset */ - result = pages_addr[addr >> PAGE_SHIFT]; - - /* in case cpu page size != gpu page size*/ - result |= addr & (~PAGE_MASK); + /* page table offset */ + result = pages_addr[addr >> PAGE_SHIFT]; - } else { - /* No mapping required */ - result = addr; - } + /* in case cpu page size != gpu page size*/ + result |= addr & (~PAGE_MASK); result &= 0xFFFFFFFFFFFFF000ULL; return result; } -/** - * amdgpu_vm_update_pdes - make sure that page directory is valid - * - * @adev: amdgpu_device pointer - * @vm: requested vm - * @start: start of GPU address range - * @end: end of GPU address range - * - * Allocates new page tables if necessary - * and updates the page directory. - * Returns 0 for success, error for failure. - */ -int amdgpu_vm_update_page_directory(struct amdgpu_device *adev, - struct amdgpu_vm *vm) +static int amdgpu_vm_update_pd_or_shadow(struct amdgpu_device *adev, + struct amdgpu_vm *vm, + bool shadow) { struct amdgpu_ring *ring; - struct amdgpu_bo *pd = vm->page_directory; - uint64_t pd_addr = amdgpu_bo_gpu_offset(pd); + struct amdgpu_bo *pd = shadow ? vm->page_directory->shadow : + vm->page_directory; + uint64_t pd_addr; uint32_t incr = AMDGPU_VM_PTE_COUNT * 8; uint64_t last_pde = ~0, last_pt = ~0; unsigned count = 0, pt_idx, ndw; struct amdgpu_job *job; - struct amdgpu_vm_update_params vm_update_params; + struct amdgpu_pte_update_params params; struct fence *fence = NULL; int r; - memset(&vm_update_params, 0, sizeof(vm_update_params)); + if (!pd) + return 0; + + r = amdgpu_ttm_bind(&pd->tbo, &pd->tbo.mem); + if (r) + return r; + + pd_addr = amdgpu_bo_gpu_offset(pd); ring = container_of(vm->entity.sched, struct amdgpu_ring, sched); /* padding, etc. */ @@ -569,7 +649,9 @@ int amdgpu_vm_update_page_directory(struct amdgpu_device *adev, if (r) return r; - vm_update_params.ib = &job->ibs[0]; + memset(¶ms, 0, sizeof(params)); + params.adev = adev; + params.ib = &job->ibs[0]; /* walk over the address space and update the page directory */ for (pt_idx = 0; pt_idx <= vm->max_pde_used; ++pt_idx) { @@ -579,20 +661,34 @@ int amdgpu_vm_update_page_directory(struct amdgpu_device *adev, if (bo == NULL) continue; + if (bo->shadow) { + struct amdgpu_bo *shadow = bo->shadow; + + r = amdgpu_ttm_bind(&shadow->tbo, &shadow->tbo.mem); + if (r) + return r; + } + pt = amdgpu_bo_gpu_offset(bo); - if (vm->page_tables[pt_idx].addr == pt) - continue; - vm->page_tables[pt_idx].addr = pt; + if (!shadow) { + if (vm->page_tables[pt_idx].addr == pt) + continue; + vm->page_tables[pt_idx].addr = pt; + } else { + if (vm->page_tables[pt_idx].shadow_addr == pt) + continue; + vm->page_tables[pt_idx].shadow_addr = pt; + } pde = pd_addr + pt_idx * 8; if (((last_pde + 8 * count) != pde) || - ((last_pt + incr * count) != pt)) { + ((last_pt + incr * count) != pt) || + (count == AMDGPU_VM_MAX_UPDATE_SIZE)) { if (count) { - amdgpu_vm_update_pages(adev, &vm_update_params, - last_pde, last_pt, - count, incr, - AMDGPU_PTE_VALID); + amdgpu_vm_do_set_ptes(¶ms, last_pde, + last_pt, count, incr, + AMDGPU_PTE_VALID); } count = 1; @@ -604,15 +700,14 @@ int amdgpu_vm_update_page_directory(struct amdgpu_device *adev, } if (count) - amdgpu_vm_update_pages(adev, &vm_update_params, - last_pde, last_pt, - count, incr, AMDGPU_PTE_VALID); + amdgpu_vm_do_set_ptes(¶ms, last_pde, last_pt, + count, incr, AMDGPU_PTE_VALID); - if (vm_update_params.ib->length_dw != 0) { - amdgpu_ring_pad_ib(ring, vm_update_params.ib); + if (params.ib->length_dw != 0) { + amdgpu_ring_pad_ib(ring, params.ib); amdgpu_sync_resv(adev, &job->sync, pd->tbo.resv, AMDGPU_FENCE_OWNER_VM); - WARN_ON(vm_update_params.ib->length_dw > ndw); + WARN_ON(params.ib->length_dw > ndw); r = amdgpu_job_submit(job, ring, &vm->entity, AMDGPU_FENCE_OWNER_VM, &fence); if (r) @@ -634,21 +729,135 @@ error_free: return r; } +/* + * amdgpu_vm_update_pdes - make sure that page directory is valid + * + * @adev: amdgpu_device pointer + * @vm: requested vm + * @start: start of GPU address range + * @end: end of GPU address range + * + * Allocates new page tables if necessary + * and updates the page directory. + * Returns 0 for success, error for failure. + */ +int amdgpu_vm_update_page_directory(struct amdgpu_device *adev, + struct amdgpu_vm *vm) +{ + int r; + + r = amdgpu_vm_update_pd_or_shadow(adev, vm, true); + if (r) + return r; + return amdgpu_vm_update_pd_or_shadow(adev, vm, false); +} + /** + * amdgpu_vm_update_ptes - make sure that page tables are valid + * + * @params: see amdgpu_pte_update_params definition + * @vm: requested vm + * @start: start of GPU address range + * @end: end of GPU address range + * @dst: destination address to map to, the next dst inside the function + * @flags: mapping flags + * + * Update the page tables in the range @start - @end. + */ +static void amdgpu_vm_update_ptes(struct amdgpu_pte_update_params *params, + struct amdgpu_vm *vm, + uint64_t start, uint64_t end, + uint64_t dst, uint32_t flags) +{ + const uint64_t mask = AMDGPU_VM_PTE_COUNT - 1; + + uint64_t cur_pe_start, cur_nptes, cur_dst; + uint64_t addr; /* next GPU address to be updated */ + uint64_t pt_idx; + struct amdgpu_bo *pt; + unsigned nptes; /* next number of ptes to be updated */ + uint64_t next_pe_start; + + /* initialize the variables */ + addr = start; + pt_idx = addr >> amdgpu_vm_block_size; + pt = vm->page_tables[pt_idx].entry.robj; + if (params->shadow) { + if (!pt->shadow) + return; + pt = vm->page_tables[pt_idx].entry.robj->shadow; + } + if ((addr & ~mask) == (end & ~mask)) + nptes = end - addr; + else + nptes = AMDGPU_VM_PTE_COUNT - (addr & mask); + + cur_pe_start = amdgpu_bo_gpu_offset(pt); + cur_pe_start += (addr & mask) * 8; + cur_nptes = nptes; + cur_dst = dst; + + /* for next ptb*/ + addr += nptes; + dst += nptes * AMDGPU_GPU_PAGE_SIZE; + + /* walk over the address space and update the page tables */ + while (addr < end) { + pt_idx = addr >> amdgpu_vm_block_size; + pt = vm->page_tables[pt_idx].entry.robj; + if (params->shadow) { + if (!pt->shadow) + return; + pt = vm->page_tables[pt_idx].entry.robj->shadow; + } + + if ((addr & ~mask) == (end & ~mask)) + nptes = end - addr; + else + nptes = AMDGPU_VM_PTE_COUNT - (addr & mask); + + next_pe_start = amdgpu_bo_gpu_offset(pt); + next_pe_start += (addr & mask) * 8; + + if ((cur_pe_start + 8 * cur_nptes) == next_pe_start && + ((cur_nptes + nptes) <= AMDGPU_VM_MAX_UPDATE_SIZE)) { + /* The next ptb is consecutive to current ptb. + * Don't call the update function now. + * Will update two ptbs together in future. + */ + cur_nptes += nptes; + } else { + params->func(params, cur_pe_start, cur_dst, cur_nptes, + AMDGPU_GPU_PAGE_SIZE, flags); + + cur_pe_start = next_pe_start; + cur_nptes = nptes; + cur_dst = dst; + } + + /* for next ptb*/ + addr += nptes; + dst += nptes * AMDGPU_GPU_PAGE_SIZE; + } + + params->func(params, cur_pe_start, cur_dst, cur_nptes, + AMDGPU_GPU_PAGE_SIZE, flags); +} + +/* * amdgpu_vm_frag_ptes - add fragment information to PTEs * - * @adev: amdgpu_device pointer - * @vm_update_params: see amdgpu_vm_update_params definition - * @pe_start: first PTE to handle - * @pe_end: last PTE to handle - * @addr: addr those PTEs should point to + * @params: see amdgpu_pte_update_params definition + * @vm: requested vm + * @start: first PTE to handle + * @end: last PTE to handle + * @dst: addr those PTEs should point to * @flags: hw mapping flags */ -static void amdgpu_vm_frag_ptes(struct amdgpu_device *adev, - struct amdgpu_vm_update_params - *vm_update_params, - uint64_t pe_start, uint64_t pe_end, - uint64_t addr, uint32_t flags) +static void amdgpu_vm_frag_ptes(struct amdgpu_pte_update_params *params, + struct amdgpu_vm *vm, + uint64_t start, uint64_t end, + uint64_t dst, uint32_t flags) { /** * The MC L1 TLB supports variable sized pages, based on a fragment @@ -670,116 +879,43 @@ static void amdgpu_vm_frag_ptes(struct amdgpu_device *adev, */ /* SI and newer are optimized for 64KB */ - uint64_t frag_flags = AMDGPU_PTE_FRAG_64KB; - uint64_t frag_align = 0x80; - - uint64_t frag_start = ALIGN(pe_start, frag_align); - uint64_t frag_end = pe_end & ~(frag_align - 1); + uint64_t frag_flags = AMDGPU_PTE_FRAG(AMDGPU_LOG2_PAGES_PER_FRAG); + uint64_t frag_align = 1 << AMDGPU_LOG2_PAGES_PER_FRAG; - unsigned count; - - /* Abort early if there isn't anything to do */ - if (pe_start == pe_end) - return; + uint64_t frag_start = ALIGN(start, frag_align); + uint64_t frag_end = end & ~(frag_align - 1); /* system pages are non continuously */ - if (vm_update_params->src || vm_update_params->pages_addr || - !(flags & AMDGPU_PTE_VALID) || (frag_start >= frag_end)) { + if (params->src || !(flags & AMDGPU_PTE_VALID) || + (frag_start >= frag_end)) { - count = (pe_end - pe_start) / 8; - amdgpu_vm_update_pages(adev, vm_update_params, pe_start, - addr, count, AMDGPU_GPU_PAGE_SIZE, - flags); + amdgpu_vm_update_ptes(params, vm, start, end, dst, flags); return; } /* handle the 4K area at the beginning */ - if (pe_start != frag_start) { - count = (frag_start - pe_start) / 8; - amdgpu_vm_update_pages(adev, vm_update_params, pe_start, addr, - count, AMDGPU_GPU_PAGE_SIZE, flags); - addr += AMDGPU_GPU_PAGE_SIZE * count; + if (start != frag_start) { + amdgpu_vm_update_ptes(params, vm, start, frag_start, + dst, flags); + dst += (frag_start - start) * AMDGPU_GPU_PAGE_SIZE; } /* handle the area in the middle */ - count = (frag_end - frag_start) / 8; - amdgpu_vm_update_pages(adev, vm_update_params, frag_start, addr, count, - AMDGPU_GPU_PAGE_SIZE, flags | frag_flags); + amdgpu_vm_update_ptes(params, vm, frag_start, frag_end, dst, + flags | frag_flags); /* handle the 4K area at the end */ - if (frag_end != pe_end) { - addr += AMDGPU_GPU_PAGE_SIZE * count; - count = (pe_end - frag_end) / 8; - amdgpu_vm_update_pages(adev, vm_update_params, frag_end, addr, - count, AMDGPU_GPU_PAGE_SIZE, flags); + if (frag_end != end) { + dst += (frag_end - frag_start) * AMDGPU_GPU_PAGE_SIZE; + amdgpu_vm_update_ptes(params, vm, frag_end, end, dst, flags); } } /** - * amdgpu_vm_update_ptes - make sure that page tables are valid - * - * @adev: amdgpu_device pointer - * @vm_update_params: see amdgpu_vm_update_params definition - * @vm: requested vm - * @start: start of GPU address range - * @end: end of GPU address range - * @dst: destination address to map to - * @flags: mapping flags - * - * Update the page tables in the range @start - @end. - */ -static void amdgpu_vm_update_ptes(struct amdgpu_device *adev, - struct amdgpu_vm_update_params - *vm_update_params, - struct amdgpu_vm *vm, - uint64_t start, uint64_t end, - uint64_t dst, uint32_t flags) -{ - const uint64_t mask = AMDGPU_VM_PTE_COUNT - 1; - - uint64_t last_pe_start = ~0, last_pe_end = ~0, last_dst = ~0; - uint64_t addr; - - /* walk over the address space and update the page tables */ - for (addr = start; addr < end; ) { - uint64_t pt_idx = addr >> amdgpu_vm_block_size; - struct amdgpu_bo *pt = vm->page_tables[pt_idx].entry.robj; - unsigned nptes; - uint64_t pe_start; - - if ((addr & ~mask) == (end & ~mask)) - nptes = end - addr; - else - nptes = AMDGPU_VM_PTE_COUNT - (addr & mask); - - pe_start = amdgpu_bo_gpu_offset(pt); - pe_start += (addr & mask) * 8; - - if (last_pe_end != pe_start) { - - amdgpu_vm_frag_ptes(adev, vm_update_params, - last_pe_start, last_pe_end, - last_dst, flags); - - last_pe_start = pe_start; - last_pe_end = pe_start + 8 * nptes; - last_dst = dst; - } else { - last_pe_end += 8 * nptes; - } - - addr += nptes; - dst += nptes * AMDGPU_GPU_PAGE_SIZE; - } - - amdgpu_vm_frag_ptes(adev, vm_update_params, last_pe_start, - last_pe_end, last_dst, flags); -} - -/** * amdgpu_vm_bo_update_mapping - update a mapping in the vm page table * * @adev: amdgpu_device pointer + * @exclusive: fence we need to sync to * @src: address where to copy page table entries from * @pages_addr: DMA addresses to use for mapping * @vm: requested vm @@ -793,6 +929,7 @@ static void amdgpu_vm_update_ptes(struct amdgpu_device *adev, * Returns 0 for success, -EINVAL for failure. */ static int amdgpu_vm_bo_update_mapping(struct amdgpu_device *adev, + struct fence *exclusive, uint64_t src, dma_addr_t *pages_addr, struct amdgpu_vm *vm, @@ -804,14 +941,19 @@ static int amdgpu_vm_bo_update_mapping(struct amdgpu_device *adev, void *owner = AMDGPU_FENCE_OWNER_VM; unsigned nptes, ncmds, ndw; struct amdgpu_job *job; - struct amdgpu_vm_update_params vm_update_params; + struct amdgpu_pte_update_params params; struct fence *f = NULL; int r; + memset(¶ms, 0, sizeof(params)); + params.adev = adev; + params.src = src; + ring = container_of(vm->entity.sched, struct amdgpu_ring, sched); - memset(&vm_update_params, 0, sizeof(vm_update_params)); - vm_update_params.src = src; - vm_update_params.pages_addr = pages_addr; + + memset(¶ms, 0, sizeof(params)); + params.adev = adev; + params.src = src; /* sync to everything on unmapping */ if (!(flags & AMDGPU_PTE_VALID)) @@ -828,30 +970,57 @@ static int amdgpu_vm_bo_update_mapping(struct amdgpu_device *adev, /* padding, etc. */ ndw = 64; - if (vm_update_params.src) { + if (src) { /* only copy commands needed */ ndw += ncmds * 7; - } else if (vm_update_params.pages_addr) { - /* header for write data commands */ - ndw += ncmds * 4; + params.func = amdgpu_vm_do_copy_ptes; + + } else if (pages_addr) { + /* copy commands needed */ + ndw += ncmds * 7; - /* body of write data command */ + /* and also PTEs */ ndw += nptes * 2; + params.func = amdgpu_vm_do_copy_ptes; + } else { /* set page commands needed */ ndw += ncmds * 10; /* two extra commands for begin/end of fragment */ ndw += 2 * 10; + + params.func = amdgpu_vm_do_set_ptes; } r = amdgpu_job_alloc_with_ib(adev, ndw * 4, &job); if (r) return r; - vm_update_params.ib = &job->ibs[0]; + params.ib = &job->ibs[0]; + + if (!src && pages_addr) { + uint64_t *pte; + unsigned i; + + /* Put the PTEs at the end of the IB. */ + i = ndw - nptes * 2; + pte= (uint64_t *)&(job->ibs->ptr[i]); + params.src = job->ibs->gpu_addr + i * 4; + + for (i = 0; i < nptes; ++i) { + pte[i] = amdgpu_vm_map_gart(pages_addr, addr + i * + AMDGPU_GPU_PAGE_SIZE); + pte[i] |= flags; + } + addr = 0; + } + + r = amdgpu_sync_fence(adev, &job->sync, exclusive); + if (r) + goto error_free; r = amdgpu_sync_resv(adev, &job->sync, vm->page_directory->tbo.resv, owner); @@ -862,11 +1031,13 @@ static int amdgpu_vm_bo_update_mapping(struct amdgpu_device *adev, if (r) goto error_free; - amdgpu_vm_update_ptes(adev, &vm_update_params, vm, start, - last + 1, addr, flags); + params.shadow = true; + amdgpu_vm_frag_ptes(¶ms, vm, start, last + 1, addr, flags); + params.shadow = false; + amdgpu_vm_frag_ptes(¶ms, vm, start, last + 1, addr, flags); - amdgpu_ring_pad_ib(ring, vm_update_params.ib); - WARN_ON(vm_update_params.ib->length_dw > ndw); + amdgpu_ring_pad_ib(ring, params.ib); + WARN_ON(params.ib->length_dw > ndw); r = amdgpu_job_submit(job, ring, &vm->entity, AMDGPU_FENCE_OWNER_VM, &f); if (r) @@ -889,6 +1060,7 @@ error_free: * amdgpu_vm_bo_split_mapping - split a mapping into smaller chunks * * @adev: amdgpu_device pointer + * @exclusive: fence we need to sync to * @gtt_flags: flags as they are used for GTT * @pages_addr: DMA addresses to use for mapping * @vm: requested vm @@ -902,6 +1074,7 @@ error_free: * Returns 0 for success, -EINVAL for failure. */ static int amdgpu_vm_bo_split_mapping(struct amdgpu_device *adev, + struct fence *exclusive, uint32_t gtt_flags, dma_addr_t *pages_addr, struct amdgpu_vm *vm, @@ -932,7 +1105,8 @@ static int amdgpu_vm_bo_split_mapping(struct amdgpu_device *adev, addr += mapping->offset; if (!pages_addr || src) - return amdgpu_vm_bo_update_mapping(adev, src, pages_addr, vm, + return amdgpu_vm_bo_update_mapping(adev, exclusive, + src, pages_addr, vm, start, mapping->it.last, flags, addr, fence); @@ -940,7 +1114,8 @@ static int amdgpu_vm_bo_split_mapping(struct amdgpu_device *adev, uint64_t last; last = min((uint64_t)mapping->it.last, start + max_size - 1); - r = amdgpu_vm_bo_update_mapping(adev, src, pages_addr, vm, + r = amdgpu_vm_bo_update_mapping(adev, exclusive, + src, pages_addr, vm, start, last, flags, addr, fence); if (r) @@ -958,27 +1133,32 @@ static int amdgpu_vm_bo_split_mapping(struct amdgpu_device *adev, * * @adev: amdgpu_device pointer * @bo_va: requested BO and VM object - * @mem: ttm mem + * @clear: if true clear the entries * * Fill in the page table entries for @bo_va. * Returns 0 for success, -EINVAL for failure. - * - * Object have to be reserved and mutex must be locked! */ int amdgpu_vm_bo_update(struct amdgpu_device *adev, struct amdgpu_bo_va *bo_va, - struct ttm_mem_reg *mem) + bool clear) { struct amdgpu_vm *vm = bo_va->vm; struct amdgpu_bo_va_mapping *mapping; dma_addr_t *pages_addr = NULL; uint32_t gtt_flags, flags; + struct ttm_mem_reg *mem; + struct fence *exclusive; uint64_t addr; int r; - if (mem) { + if (clear) { + mem = NULL; + addr = 0; + exclusive = NULL; + } else { struct ttm_dma_tt *ttm; + mem = &bo_va->bo->tbo.mem; addr = (u64)mem->start << PAGE_SHIFT; switch (mem->mem_type) { case TTM_PL_TT: @@ -994,12 +1174,13 @@ int amdgpu_vm_bo_update(struct amdgpu_device *adev, default: break; } - } else { - addr = 0; + + exclusive = reservation_object_get_excl(bo_va->bo->tbo.resv); } flags = amdgpu_ttm_tt_pte_flags(adev, bo_va->bo->tbo.ttm, mem); - gtt_flags = (adev == bo_va->bo->adev) ? flags : 0; + gtt_flags = (amdgpu_ttm_is_bound(bo_va->bo->tbo.ttm) && + adev == bo_va->bo->adev) ? flags : 0; spin_lock(&vm->status_lock); if (!list_empty(&bo_va->vm_status)) @@ -1007,7 +1188,8 @@ int amdgpu_vm_bo_update(struct amdgpu_device *adev, spin_unlock(&vm->status_lock); list_for_each_entry(mapping, &bo_va->invalids, list) { - r = amdgpu_vm_bo_split_mapping(adev, gtt_flags, pages_addr, vm, + r = amdgpu_vm_bo_split_mapping(adev, exclusive, + gtt_flags, pages_addr, vm, mapping, flags, addr, &bo_va->last_pt_update); if (r) @@ -1025,7 +1207,7 @@ int amdgpu_vm_bo_update(struct amdgpu_device *adev, spin_lock(&vm->status_lock); list_splice_init(&bo_va->invalids, &bo_va->valids); list_del_init(&bo_va->vm_status); - if (!mem) + if (clear) list_add(&bo_va->vm_status, &vm->cleared); spin_unlock(&vm->status_lock); @@ -1054,7 +1236,7 @@ int amdgpu_vm_clear_freed(struct amdgpu_device *adev, struct amdgpu_bo_va_mapping, list); list_del(&mapping->list); - r = amdgpu_vm_bo_split_mapping(adev, 0, NULL, vm, mapping, + r = amdgpu_vm_bo_split_mapping(adev, NULL, 0, NULL, vm, mapping, 0, 0, NULL); kfree(mapping); if (r) @@ -1088,7 +1270,7 @@ int amdgpu_vm_clear_invalids(struct amdgpu_device *adev, struct amdgpu_bo_va, vm_status); spin_unlock(&vm->status_lock); - r = amdgpu_vm_bo_update(adev, bo_va, NULL); + r = amdgpu_vm_bo_update(adev, bo_va, true); if (r) return r; @@ -1233,7 +1415,8 @@ int amdgpu_vm_bo_map(struct amdgpu_device *adev, r = amdgpu_bo_create(adev, AMDGPU_VM_PTE_COUNT * 8, AMDGPU_GPU_PAGE_SIZE, true, AMDGPU_GEM_DOMAIN_VRAM, - AMDGPU_GEM_CREATE_NO_CPU_ACCESS, + AMDGPU_GEM_CREATE_NO_CPU_ACCESS | + AMDGPU_GEM_CREATE_SHADOW, NULL, resv, &pt); if (r) goto error_free; @@ -1245,10 +1428,20 @@ int amdgpu_vm_bo_map(struct amdgpu_device *adev, r = amdgpu_vm_clear_bo(adev, vm, pt); if (r) { + amdgpu_bo_unref(&pt->shadow); amdgpu_bo_unref(&pt); goto error_free; } + if (pt->shadow) { + r = amdgpu_vm_clear_bo(adev, vm, pt->shadow); + if (r) { + amdgpu_bo_unref(&pt->shadow); + amdgpu_bo_unref(&pt); + goto error_free; + } + } + entry->robj = pt; entry->priority = 0; entry->tv.bo = &entry->robj->tbo; @@ -1426,13 +1619,14 @@ int amdgpu_vm_init(struct amdgpu_device *adev, struct amdgpu_vm *vm) r = amd_sched_entity_init(&ring->sched, &vm->entity, rq, amdgpu_sched_jobs); if (r) - return r; + goto err; vm->page_directory_fence = NULL; r = amdgpu_bo_create(adev, pd_size, align, true, AMDGPU_GEM_DOMAIN_VRAM, - AMDGPU_GEM_CREATE_NO_CPU_ACCESS, + AMDGPU_GEM_CREATE_NO_CPU_ACCESS | + AMDGPU_GEM_CREATE_SHADOW, NULL, NULL, &vm->page_directory); if (r) goto error_free_sched_entity; @@ -1442,19 +1636,34 @@ int amdgpu_vm_init(struct amdgpu_device *adev, struct amdgpu_vm *vm) goto error_free_page_directory; r = amdgpu_vm_clear_bo(adev, vm, vm->page_directory); - amdgpu_bo_unreserve(vm->page_directory); if (r) - goto error_free_page_directory; + goto error_unreserve; + + if (vm->page_directory->shadow) { + r = amdgpu_vm_clear_bo(adev, vm, vm->page_directory->shadow); + if (r) + goto error_unreserve; + } + + vm->last_eviction_counter = atomic64_read(&adev->num_evictions); + amdgpu_bo_unreserve(vm->page_directory); return 0; +error_unreserve: + amdgpu_bo_unreserve(vm->page_directory); + error_free_page_directory: + amdgpu_bo_unref(&vm->page_directory->shadow); amdgpu_bo_unref(&vm->page_directory); vm->page_directory = NULL; error_free_sched_entity: amd_sched_entity_fini(&ring->sched, &vm->entity); +err: + drm_free_large(vm->page_tables); + return r; } @@ -1487,10 +1696,18 @@ void amdgpu_vm_fini(struct amdgpu_device *adev, struct amdgpu_vm *vm) kfree(mapping); } - for (i = 0; i < amdgpu_vm_num_pdes(adev); i++) - amdgpu_bo_unref(&vm->page_tables[i].entry.robj); + for (i = 0; i < amdgpu_vm_num_pdes(adev); i++) { + struct amdgpu_bo *pt = vm->page_tables[i].entry.robj; + + if (!pt) + continue; + + amdgpu_bo_unref(&pt->shadow); + amdgpu_bo_unref(&pt); + } drm_free_large(vm->page_tables); + amdgpu_bo_unref(&vm->page_directory->shadow); amdgpu_bo_unref(&vm->page_directory); fence_put(vm->page_directory_fence); } @@ -1516,6 +1733,10 @@ void amdgpu_vm_manager_init(struct amdgpu_device *adev) &adev->vm_manager.ids_lru); } + adev->vm_manager.fence_context = fence_context_alloc(AMDGPU_MAX_RINGS); + for (i = 0; i < AMDGPU_MAX_RINGS; ++i) + adev->vm_manager.seqno[i] = 0; + atomic_set(&adev->vm_manager.vm_pte_next_ring, 0); atomic64_set(&adev->vm_manager.client_counter, 0); } |