aboutsummaryrefslogtreecommitdiff
path: root/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c')
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c234
1 files changed, 105 insertions, 129 deletions
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
index 6904d794d60a..ead851413c0a 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
@@ -107,14 +107,6 @@ struct amdgpu_pte_update_params {
* DMA addresses to use for mapping, used during VM update by CPU
*/
dma_addr_t *pages_addr;
-
- /**
- * @kptr:
- *
- * Kernel pointer of PD/PT BO that needs to be updated,
- * used during VM update by CPU
- */
- void *kptr;
};
/**
@@ -181,7 +173,7 @@ static unsigned amdgpu_vm_num_entries(struct amdgpu_device *adev,
if (level == adev->vm_manager.root_level)
/* For the root directory */
- return round_up(adev->vm_manager.max_pfn, 1 << shift) >> shift;
+ return round_up(adev->vm_manager.max_pfn, 1ULL << shift) >> shift;
else if (level != AMDGPU_VM_PTB)
/* Everything in between */
return 512;
@@ -542,7 +534,8 @@ static void amdgpu_vm_pt_next_leaf(struct amdgpu_device *adev,
struct amdgpu_vm_pt_cursor *cursor)
{
amdgpu_vm_pt_next(adev, cursor);
- while (amdgpu_vm_pt_descendant(adev, cursor));
+ if (cursor->pfn != ~0ll)
+ while (amdgpu_vm_pt_descendant(adev, cursor));
}
/**
@@ -616,11 +609,34 @@ void amdgpu_vm_get_pd_bo(struct amdgpu_vm *vm,
{
entry->priority = 0;
entry->tv.bo = &vm->root.base.bo->tbo;
- entry->tv.shared = true;
+ /* One for the VM updates, one for TTM and one for the CS job */
+ entry->tv.num_shared = 3;
entry->user_pages = NULL;
list_add(&entry->tv.head, validated);
}
+void amdgpu_vm_del_from_lru_notify(struct ttm_buffer_object *bo)
+{
+ struct amdgpu_bo *abo;
+ struct amdgpu_vm_bo_base *bo_base;
+
+ if (!amdgpu_bo_is_amdgpu_bo(bo))
+ return;
+
+ if (bo->mem.placement & TTM_PL_FLAG_NO_EVICT)
+ return;
+
+ abo = ttm_to_amdgpu_bo(bo);
+ if (!abo->parent)
+ return;
+ for (bo_base = abo->vm_bo; bo_base; bo_base = bo_base->next) {
+ struct amdgpu_vm *vm = bo_base->vm;
+
+ if (abo->tbo.resv == vm->root.base.bo->tbo.resv)
+ vm->bulk_moveable = false;
+ }
+
+}
/**
* amdgpu_vm_move_to_lru_tail - move all BOs to the end of LRU
*
@@ -636,12 +652,14 @@ void amdgpu_vm_move_to_lru_tail(struct amdgpu_device *adev,
struct ttm_bo_global *glob = adev->mman.bdev.glob;
struct amdgpu_vm_bo_base *bo_base;
+#if 0
if (vm->bulk_moveable) {
spin_lock(&glob->lru_lock);
ttm_bo_bulk_move_lru_tail(&vm->lru_bulk_move);
spin_unlock(&glob->lru_lock);
return;
}
+#endif
memset(&vm->lru_bulk_move, 0, sizeof(vm->lru_bulk_move));
@@ -682,8 +700,6 @@ int amdgpu_vm_validate_pt_bos(struct amdgpu_device *adev, struct amdgpu_vm *vm,
struct amdgpu_vm_bo_base *bo_base, *tmp;
int r = 0;
- vm->bulk_moveable &= list_empty(&vm->evicted);
-
list_for_each_entry_safe(bo_base, tmp, &vm->evicted, vm_status) {
struct amdgpu_bo *bo = bo_base->bo;
@@ -772,10 +788,6 @@ static int amdgpu_vm_clear_bo(struct amdgpu_device *adev,
ring = container_of(vm->entity.rq->sched, struct amdgpu_ring, sched);
- r = reservation_object_reserve_shared(bo->tbo.resv);
- if (r)
- return r;
-
r = ttm_bo_validate(&bo->tbo, &bo->placement, &ctx);
if (r)
goto error;
@@ -801,15 +813,22 @@ static int amdgpu_vm_clear_bo(struct amdgpu_device *adev,
addr += ats_entries * 8;
}
- if (entries)
+ if (entries) {
+ uint64_t value = 0;
+
+ /* Workaround for fault priority problem on GMC9 */
+ if (level == AMDGPU_VM_PTB && adev->asic_type >= CHIP_VEGA10)
+ value = AMDGPU_PTE_EXECUTABLE;
+
amdgpu_vm_set_pte_pde(adev, &job->ibs[0], addr, 0,
- entries, 0, 0);
+ entries, 0, value);
+ }
amdgpu_ring_pad_ib(ring, &job->ibs[0]);
WARN_ON(job->ibs[0].length_dw > 64);
r = amdgpu_sync_resv(adev, &job->sync, bo->tbo.resv,
- AMDGPU_FENCE_OWNER_UNDEFINED, false);
+ AMDGPU_FENCE_OWNER_KFD, false);
if (r)
goto error_free;
@@ -849,9 +868,6 @@ static void amdgpu_vm_bo_param(struct amdgpu_device *adev, struct amdgpu_vm *vm,
bp->size = amdgpu_vm_bo_size(adev, level);
bp->byte_align = AMDGPU_GPU_PAGE_SIZE;
bp->domain = AMDGPU_GEM_DOMAIN_VRAM;
- if (bp->size <= PAGE_SIZE && adev->asic_type >= CHIP_VEGA10 &&
- adev->flags & AMD_IS_APU)
- bp->domain |= AMDGPU_GEM_DOMAIN_GTT;
bp->domain = amdgpu_bo_get_preferred_pin_domain(adev, bp->domain);
bp->flags = AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS |
AMDGPU_GEM_CREATE_CPU_GTT_USWC;
@@ -1316,31 +1332,6 @@ static void amdgpu_vm_cpu_set_ptes(struct amdgpu_pte_update_params *params,
}
}
-
-/**
- * amdgpu_vm_wait_pd - Wait for PT BOs to be free.
- *
- * @adev: amdgpu_device pointer
- * @vm: related vm
- * @owner: fence owner
- *
- * Returns:
- * 0 on success, errno otherwise.
- */
-static int amdgpu_vm_wait_pd(struct amdgpu_device *adev, struct amdgpu_vm *vm,
- void *owner)
-{
- struct amdgpu_sync sync;
- int r;
-
- amdgpu_sync_create(&sync);
- amdgpu_sync_resv(adev, &sync, vm->root.base.bo->tbo.resv, owner, false);
- r = amdgpu_sync_wait(&sync, true);
- amdgpu_sync_free(&sync);
-
- return r;
-}
-
/**
* amdgpu_vm_update_func - helper to call update function
*
@@ -1435,7 +1426,8 @@ restart:
params.adev = adev;
if (vm->use_cpu_for_update) {
- r = amdgpu_vm_wait_pd(adev, vm, AMDGPU_FENCE_OWNER_VM);
+ r = amdgpu_bo_sync_wait(vm->root.base.bo,
+ AMDGPU_FENCE_OWNER_VM, true);
if (unlikely(r))
return r;
@@ -1508,20 +1500,27 @@ error:
}
/**
- * amdgpu_vm_update_huge - figure out parameters for PTE updates
+ * amdgpu_vm_update_flags - figure out flags for PTE updates
*
* Make sure to set the right flags for the PTEs at the desired level.
*/
-static void amdgpu_vm_update_huge(struct amdgpu_pte_update_params *params,
- struct amdgpu_bo *bo, unsigned level,
- uint64_t pe, uint64_t addr,
- unsigned count, uint32_t incr,
- uint64_t flags)
+static void amdgpu_vm_update_flags(struct amdgpu_pte_update_params *params,
+ struct amdgpu_bo *bo, unsigned level,
+ uint64_t pe, uint64_t addr,
+ unsigned count, uint32_t incr,
+ uint64_t flags)
{
if (level != AMDGPU_VM_PTB) {
flags |= AMDGPU_PDE_PTE;
amdgpu_gmc_get_vm_pde(params->adev, level, &addr, &flags);
+
+ } else if (params->adev->asic_type >= CHIP_VEGA10 &&
+ !(flags & AMDGPU_PTE_VALID) &&
+ !(flags & AMDGPU_PTE_PRT)) {
+
+ /* Workaround for fault priority problem on GMC9 */
+ flags |= AMDGPU_PTE_EXECUTABLE;
}
amdgpu_vm_update_func(params, bo, pe, addr, count, incr, flags);
@@ -1631,13 +1630,6 @@ static int amdgpu_vm_update_ptes(struct amdgpu_pte_update_params *params,
continue;
}
- /* First check if the entry is already handled */
- if (cursor.pfn < frag_start) {
- cursor.entry->huge = true;
- amdgpu_vm_pt_next(adev, &cursor);
- continue;
- }
-
/* If it isn't already handled it can't be a huge page */
if (cursor.entry->huge) {
/* Add the entry to the relocated list to update it. */
@@ -1662,9 +1654,11 @@ static int amdgpu_vm_update_ptes(struct amdgpu_pte_update_params *params,
if (!amdgpu_vm_pt_descendant(adev, &cursor))
return -ENOENT;
continue;
- } else if (frag >= parent_shift) {
+ } else if (frag >= parent_shift &&
+ cursor.level - 1 != adev->vm_manager.root_level) {
/* If the fragment size is even larger than the parent
- * shift we should go up one level and check it again.
+ * shift we should go up one level and check it again
+ * unless one level up is the root level.
*/
if (!amdgpu_vm_pt_ancestor(&cursor))
return -ENOENT;
@@ -1672,10 +1666,10 @@ static int amdgpu_vm_update_ptes(struct amdgpu_pte_update_params *params,
}
/* Looks good so far, calculate parameters for the update */
- incr = AMDGPU_GPU_PAGE_SIZE << shift;
+ incr = (uint64_t)AMDGPU_GPU_PAGE_SIZE << shift;
mask = amdgpu_vm_entries_mask(adev, cursor.level);
pe_start = ((cursor.pfn >> shift) & mask) * 8;
- entry_end = (mask + 1) << shift;
+ entry_end = (uint64_t)(mask + 1) << shift;
entry_end += cursor.pfn & ~(entry_end - 1);
entry_end = min(entry_end, end);
@@ -1683,12 +1677,12 @@ static int amdgpu_vm_update_ptes(struct amdgpu_pte_update_params *params,
uint64_t upd_end = min(entry_end, frag_end);
unsigned nptes = (upd_end - frag_start) >> shift;
- amdgpu_vm_update_huge(params, pt, cursor.level,
- pe_start, dst, nptes, incr,
- flags | AMDGPU_PTE_FRAG(frag));
+ amdgpu_vm_update_flags(params, pt, cursor.level,
+ pe_start, dst, nptes, incr,
+ flags | AMDGPU_PTE_FRAG(frag));
pe_start += nptes * 8;
- dst += nptes * AMDGPU_GPU_PAGE_SIZE << shift;
+ dst += (uint64_t)nptes * AMDGPU_GPU_PAGE_SIZE << shift;
frag_start = upd_end;
if (frag_start >= frag_end) {
@@ -1700,8 +1694,17 @@ static int amdgpu_vm_update_ptes(struct amdgpu_pte_update_params *params,
}
} while (frag_start < entry_end);
- if (frag >= shift)
+ if (amdgpu_vm_pt_descendant(adev, &cursor)) {
+ /* Mark all child entries as huge */
+ while (cursor.pfn < frag_start) {
+ cursor.entry->huge = true;
+ amdgpu_vm_pt_next(adev, &cursor);
+ }
+
+ } else if (frag >= shift) {
+ /* or just move on to the next on the same level. */
amdgpu_vm_pt_next(adev, &cursor);
+ }
}
return 0;
@@ -1745,22 +1748,29 @@ static int amdgpu_vm_bo_update_mapping(struct amdgpu_device *adev,
params.adev = adev;
params.vm = vm;
- /* sync to everything on unmapping */
+ /* sync to everything except eviction fences on unmapping */
if (!(flags & AMDGPU_PTE_VALID))
- owner = AMDGPU_FENCE_OWNER_UNDEFINED;
+ owner = AMDGPU_FENCE_OWNER_KFD;
if (vm->use_cpu_for_update) {
/* params.src is used as flag to indicate system Memory */
if (pages_addr)
params.src = ~0;
- /* Wait for PT BOs to be free. PTs share the same resv. object
+ /* Wait for PT BOs to be idle. PTs share the same resv. object
* as the root PD BO
*/
- r = amdgpu_vm_wait_pd(adev, vm, owner);
+ r = amdgpu_bo_sync_wait(vm->root.base.bo, owner, true);
if (unlikely(r))
return r;
+ /* Wait for any BO move to be completed */
+ if (exclusive) {
+ r = dma_fence_wait(exclusive, true);
+ if (unlikely(r))
+ return r;
+ }
+
params.func = amdgpu_vm_cpu_set_ptes;
params.pages_addr = pages_addr;
return amdgpu_vm_update_ptes(&params, start, last + 1,
@@ -1774,13 +1784,12 @@ static int amdgpu_vm_bo_update_mapping(struct amdgpu_device *adev,
/*
* reserve space for two commands every (1 << BLOCK_SIZE)
* entries or 2k dwords (whatever is smaller)
- *
- * The second command is for the shadow pagetables.
*/
+ ncmds = ((nptes >> min(adev->vm_manager.block_size, 11u)) + 1);
+
+ /* The second command is for the shadow pagetables. */
if (vm->root.base.bo->shadow)
- ncmds = ((nptes >> min(adev->vm_manager.block_size, 11u)) + 1) * 2;
- else
- ncmds = ((nptes >> min(adev->vm_manager.block_size, 11u)) + 1);
+ ncmds *= 2;
/* padding, etc. */
ndw = 64;
@@ -1799,10 +1808,11 @@ static int amdgpu_vm_bo_update_mapping(struct amdgpu_device *adev,
ndw += ncmds * 10;
/* extra commands for begin/end fragments */
+ ncmds = 2 * adev->vm_manager.fragment_size;
if (vm->root.base.bo->shadow)
- ndw += 2 * 10 * adev->vm_manager.fragment_size * 2;
- else
- ndw += 2 * 10 * adev->vm_manager.fragment_size;
+ ncmds *= 2;
+
+ ndw += 10 * ncmds;
params.func = amdgpu_vm_do_set_ptes;
}
@@ -1839,10 +1849,6 @@ static int amdgpu_vm_bo_update_mapping(struct amdgpu_device *adev,
if (r)
goto error_free;
- r = reservation_object_reserve_shared(vm->root.base.bo->tbo.resv);
- if (r)
- goto error_free;
-
r = amdgpu_vm_update_ptes(&params, start, last + 1, addr, flags);
if (r)
goto error_free;
@@ -3008,7 +3014,7 @@ int amdgpu_vm_init(struct amdgpu_device *adev, struct amdgpu_vm *vm,
}
DRM_DEBUG_DRIVER("VM update mode is %s\n",
vm->use_cpu_for_update ? "CPU" : "SDMA");
- WARN_ONCE((vm->use_cpu_for_update & !amdgpu_gmc_vram_full_visible(&adev->gmc)),
+ WARN_ONCE((vm->use_cpu_for_update && !amdgpu_gmc_vram_full_visible(&adev->gmc)),
"CPU update of VM recommended only for large BAR system\n");
vm->last_update = NULL;
@@ -3023,6 +3029,10 @@ int amdgpu_vm_init(struct amdgpu_device *adev, struct amdgpu_vm *vm,
if (r)
goto error_free_root;
+ r = reservation_object_reserve_shared(root->tbo.resv, 1);
+ if (r)
+ goto error_unreserve;
+
r = amdgpu_vm_clear_bo(adev, vm, root,
adev->vm_manager.root_level,
vm->pte_support_ats);
@@ -3052,7 +3062,6 @@ int amdgpu_vm_init(struct amdgpu_device *adev, struct amdgpu_vm *vm,
}
INIT_KFIFO(vm->faults);
- vm->fault_credit = 16;
return 0;
@@ -3135,7 +3144,7 @@ int amdgpu_vm_make_compute(struct amdgpu_device *adev, struct amdgpu_vm *vm, uns
vm->pte_support_ats = pte_support_ats;
DRM_DEBUG_DRIVER("VM update mode is %s\n",
vm->use_cpu_for_update ? "CPU" : "SDMA");
- WARN_ONCE((vm->use_cpu_for_update & !amdgpu_gmc_vram_full_visible(&adev->gmc)),
+ WARN_ONCE((vm->use_cpu_for_update && !amdgpu_gmc_vram_full_visible(&adev->gmc)),
"CPU update of VM recommended only for large BAR system\n");
if (vm->pasid) {
@@ -3234,8 +3243,10 @@ void amdgpu_vm_fini(struct amdgpu_device *adev, struct amdgpu_vm *vm)
}
rbtree_postorder_for_each_entry_safe(mapping, tmp,
&vm->va.rb_root, rb) {
+ /* Don't remove the mapping here, we don't want to trigger a
+ * rebalance and the tree is about to be destroyed anyway.
+ */
list_del(&mapping->list);
- amdgpu_vm_it_remove(mapping, &vm->va);
kfree(mapping);
}
list_for_each_entry_safe(mapping, tmp, &vm->freed, list) {
@@ -3263,42 +3274,6 @@ void amdgpu_vm_fini(struct amdgpu_device *adev, struct amdgpu_vm *vm)
}
/**
- * amdgpu_vm_pasid_fault_credit - Check fault credit for given PASID
- *
- * @adev: amdgpu_device pointer
- * @pasid: PASID do identify the VM
- *
- * This function is expected to be called in interrupt context.
- *
- * Returns:
- * True if there was fault credit, false otherwise
- */
-bool amdgpu_vm_pasid_fault_credit(struct amdgpu_device *adev,
- unsigned int pasid)
-{
- struct amdgpu_vm *vm;
-
- spin_lock(&adev->vm_manager.pasid_lock);
- vm = idr_find(&adev->vm_manager.pasid_idr, pasid);
- if (!vm) {
- /* VM not found, can't track fault credit */
- spin_unlock(&adev->vm_manager.pasid_lock);
- return true;
- }
-
- /* No lock needed. only accessed by IRQ handler */
- if (!vm->fault_credit) {
- /* Too many faults in this VM */
- spin_unlock(&adev->vm_manager.pasid_lock);
- return false;
- }
-
- vm->fault_credit--;
- spin_unlock(&adev->vm_manager.pasid_lock);
- return true;
-}
-
-/**
* amdgpu_vm_manager_init - init the VM manager
*
* @adev: amdgpu_device pointer
@@ -3399,14 +3374,15 @@ void amdgpu_vm_get_task_info(struct amdgpu_device *adev, unsigned int pasid,
struct amdgpu_task_info *task_info)
{
struct amdgpu_vm *vm;
+ unsigned long flags;
- spin_lock(&adev->vm_manager.pasid_lock);
+ spin_lock_irqsave(&adev->vm_manager.pasid_lock, flags);
vm = idr_find(&adev->vm_manager.pasid_idr, pasid);
if (vm)
*task_info = vm->task_info;
- spin_unlock(&adev->vm_manager.pasid_lock);
+ spin_unlock_irqrestore(&adev->vm_manager.pasid_lock, flags);
}
/**