diff options
Diffstat (limited to 'drivers/gpu/drm/xe/xe_vm.c')
-rw-r--r-- | drivers/gpu/drm/xe/xe_vm.c | 287 |
1 files changed, 241 insertions, 46 deletions
diff --git a/drivers/gpu/drm/xe/xe_vm.c b/drivers/gpu/drm/xe/xe_vm.c index 30db264d34a3..e3bde897f6e8 100644 --- a/drivers/gpu/drm/xe/xe_vm.c +++ b/drivers/gpu/drm/xe/xe_vm.c @@ -13,11 +13,14 @@ #include <drm/ttm/ttm_execbuf_util.h> #include <drm/ttm/ttm_tt.h> #include <drm/xe_drm.h> +#include <linux/ascii85.h> #include <linux/delay.h> #include <linux/kthread.h> #include <linux/mm.h> #include <linux/swap.h> +#include <generated/xe_wa_oob.h> + #include "xe_assert.h" #include "xe_bo.h" #include "xe_device.h" @@ -34,11 +37,8 @@ #include "xe_res_cursor.h" #include "xe_sync.h" #include "xe_trace.h" -#include "generated/xe_wa_oob.h" #include "xe_wa.h" -#define TEST_VM_ASYNC_OPS_ERROR - static struct drm_gem_object *xe_vm_obj(struct xe_vm *vm) { return vm->gpuvm.r_obj; @@ -114,11 +114,8 @@ retry: num_pages - pinned, read_only ? 0 : FOLL_WRITE, &pages[pinned]); - if (ret < 0) { - if (in_kthread) - ret = 0; + if (ret < 0) break; - } pinned += ret; ret = 0; @@ -797,6 +794,7 @@ static void xe_vma_free(struct xe_vma *vma) #define VMA_CREATE_FLAG_READ_ONLY BIT(0) #define VMA_CREATE_FLAG_IS_NULL BIT(1) +#define VMA_CREATE_FLAG_DUMPABLE BIT(2) static struct xe_vma *xe_vma_create(struct xe_vm *vm, struct xe_bo *bo, @@ -809,6 +807,7 @@ static struct xe_vma *xe_vma_create(struct xe_vm *vm, u8 id; bool read_only = (flags & VMA_CREATE_FLAG_READ_ONLY); bool is_null = (flags & VMA_CREATE_FLAG_IS_NULL); + bool dumpable = (flags & VMA_CREATE_FLAG_DUMPABLE); xe_assert(vm->xe, start < end); xe_assert(vm->xe, end < vm->size); @@ -843,6 +842,8 @@ static struct xe_vma *xe_vma_create(struct xe_vm *vm, vma->gpuva.va.range = end - start + 1; if (read_only) vma->gpuva.flags |= XE_VMA_READ_ONLY; + if (dumpable) + vma->gpuva.flags |= XE_VMA_DUMPABLE; for_each_tile(tile, vm->xe, id) vma->tile_mask |= 0x1 << id; @@ -1000,9 +1001,16 @@ int xe_vm_prepare_vma(struct drm_exec *exec, struct xe_vma *vma, int err; XE_WARN_ON(!vm); - err = drm_exec_prepare_obj(exec, xe_vm_obj(vm), num_shared); - if (!err && bo && !bo->vm) - err = drm_exec_prepare_obj(exec, &bo->ttm.base, num_shared); + if (num_shared) + err = drm_exec_prepare_obj(exec, xe_vm_obj(vm), num_shared); + else + err = drm_exec_lock_obj(exec, xe_vm_obj(vm)); + if (!err && bo && !bo->vm) { + if (num_shared) + err = drm_exec_prepare_obj(exec, &bo->ttm.base, num_shared); + else + err = drm_exec_lock_obj(exec, &bo->ttm.base); + } return err; } @@ -1049,7 +1057,9 @@ static int xe_vm_insert_vma(struct xe_vm *vm, struct xe_vma *vma) xe_assert(vm->xe, xe_vma_vm(vma) == vm); lockdep_assert_held(&vm->lock); + mutex_lock(&vm->snap_mutex); err = drm_gpuva_insert(&vm->gpuvm, &vma->gpuva); + mutex_unlock(&vm->snap_mutex); XE_WARN_ON(err); /* Shouldn't be possible */ return err; @@ -1060,7 +1070,9 @@ static void xe_vm_remove_vma(struct xe_vm *vm, struct xe_vma *vma) xe_assert(vm->xe, xe_vma_vm(vma) == vm); lockdep_assert_held(&vm->lock); + mutex_lock(&vm->snap_mutex); drm_gpuva_remove(&vma->gpuva); + mutex_unlock(&vm->snap_mutex); if (vm->usm.last_fault_vma == vma) vm->usm.last_fault_vma = NULL; } @@ -1079,7 +1091,7 @@ static struct drm_gpuva_op *xe_vm_op_alloc(void) static void xe_vm_free(struct drm_gpuvm *gpuvm); -static struct drm_gpuvm_ops gpuvm_ops = { +static const struct drm_gpuvm_ops gpuvm_ops = { .op_alloc = xe_vm_op_alloc, .vm_bo_validate = xe_gpuvm_validate, .vm_free = xe_vm_free, @@ -1287,6 +1299,7 @@ struct xe_vm *xe_vm_create(struct xe_device *xe, u32 flags) vm->flags = flags; init_rwsem(&vm->lock); + mutex_init(&vm->snap_mutex); INIT_LIST_HEAD(&vm->rebind_list); @@ -1412,6 +1425,7 @@ err_close: return ERR_PTR(err); err_no_resv: + mutex_destroy(&vm->snap_mutex); for_each_tile(tile, xe, id) xe_range_fence_tree_fini(&vm->rftree[id]); kfree(vm); @@ -1536,6 +1550,8 @@ static void vm_destroy_work_func(struct work_struct *w) /* xe_vm_close_and_put was not called? */ xe_assert(xe, !vm->size); + mutex_destroy(&vm->snap_mutex); + if (!(vm->flags & XE_VM_FLAG_MIGRATION)) { xe_device_mem_access_put(xe); @@ -1984,6 +2000,7 @@ static int xe_vm_prefetch(struct xe_vm *vm, struct xe_vma *vma, xe_exec_queue_last_fence_get(wait_exec_queue, vm); xe_sync_entry_signal(&syncs[i], NULL, fence); + dma_fence_put(fence); } } @@ -2064,7 +2081,6 @@ vm_bind_ioctl_ops_create(struct xe_vm *vm, struct xe_bo *bo, struct drm_gem_object *obj = bo ? &bo->ttm.base : NULL; struct drm_gpuva_ops *ops; struct drm_gpuva_op *__op; - struct xe_vma_op *op; struct drm_gpuvm_bo *vm_bo; int err; @@ -2111,15 +2127,6 @@ vm_bind_ioctl_ops_create(struct xe_vm *vm, struct xe_bo *bo, if (IS_ERR(ops)) return ops; -#ifdef TEST_VM_ASYNC_OPS_ERROR - if (operation & FORCE_ASYNC_OP_ERROR) { - op = list_first_entry_or_null(&ops->list, struct xe_vma_op, - base.entry); - if (op) - op->inject_error = true; - } -#endif - drm_gpuva_for_each_op(__op, ops) { struct xe_vma_op *op = gpuva_op_to_vma_op(__op); @@ -2129,6 +2136,7 @@ vm_bind_ioctl_ops_create(struct xe_vm *vm, struct xe_bo *bo, op->map.read_only = flags & DRM_XE_VM_BIND_FLAG_READONLY; op->map.is_null = flags & DRM_XE_VM_BIND_FLAG_NULL; + op->map.dumpable = flags & DRM_XE_VM_BIND_FLAG_DUMPABLE; op->map.pat_index = pat_index; } else if (__op->op == DRM_GPUVA_OP_PREFETCH) { op->prefetch.region = prefetch_region; @@ -2197,13 +2205,17 @@ static u64 xe_vma_max_pte_size(struct xe_vma *vma) { if (vma->gpuva.flags & XE_VMA_PTE_1G) return SZ_1G; - else if (vma->gpuva.flags & XE_VMA_PTE_2M) + else if (vma->gpuva.flags & (XE_VMA_PTE_2M | XE_VMA_PTE_COMPACT)) return SZ_2M; + else if (vma->gpuva.flags & XE_VMA_PTE_64K) + return SZ_64K; + else if (vma->gpuva.flags & XE_VMA_PTE_4K) + return SZ_4K; - return SZ_4K; + return SZ_1G; /* Uninitialized, used max size */ } -static u64 xe_vma_set_pte_size(struct xe_vma *vma, u64 size) +static void xe_vma_set_pte_size(struct xe_vma *vma, u64 size) { switch (size) { case SZ_1G: @@ -2212,9 +2224,13 @@ static u64 xe_vma_set_pte_size(struct xe_vma *vma, u64 size) case SZ_2M: vma->gpuva.flags |= XE_VMA_PTE_2M; break; + case SZ_64K: + vma->gpuva.flags |= XE_VMA_PTE_64K; + break; + case SZ_4K: + vma->gpuva.flags |= XE_VMA_PTE_4K; + break; } - - return SZ_4K; } static int xe_vma_op_commit(struct xe_vm *vm, struct xe_vma_op *op) @@ -2286,6 +2302,7 @@ static int vm_bind_ioctl_ops_parse(struct xe_vm *vm, struct xe_exec_queue *q, struct xe_sync_entry *syncs, u32 num_syncs, struct list_head *ops_list, bool last) { + struct xe_device *xe = vm->xe; struct xe_vma_op *last_op = NULL; struct drm_gpuva_op *__op; int err = 0; @@ -2316,6 +2333,8 @@ static int vm_bind_ioctl_ops_parse(struct xe_vm *vm, struct xe_exec_queue *q, VMA_CREATE_FLAG_READ_ONLY : 0; flags |= op->map.is_null ? VMA_CREATE_FLAG_IS_NULL : 0; + flags |= op->map.dumpable ? + VMA_CREATE_FLAG_DUMPABLE : 0; vma = new_vma(vm, &op->base.map, op->map.pat_index, flags); @@ -2340,6 +2359,9 @@ static int vm_bind_ioctl_ops_parse(struct xe_vm *vm, struct xe_exec_queue *q, flags |= op->base.remap.unmap->va->flags & DRM_GPUVA_SPARSE ? VMA_CREATE_FLAG_IS_NULL : 0; + flags |= op->base.remap.unmap->va->flags & + XE_VMA_DUMPABLE ? + VMA_CREATE_FLAG_DUMPABLE : 0; vma = new_vma(vm, op->base.remap.prev, old->pat_index, flags); @@ -2361,6 +2383,9 @@ static int vm_bind_ioctl_ops_parse(struct xe_vm *vm, struct xe_exec_queue *q, xe_vma_end(vma) - xe_vma_start(old); op->remap.start = xe_vma_end(vma); + vm_dbg(&xe->drm, "REMAP:SKIP_PREV: addr=0x%016llx, range=0x%016llx", + (ULL)op->remap.start, + (ULL)op->remap.range); } } @@ -2371,6 +2396,9 @@ static int vm_bind_ioctl_ops_parse(struct xe_vm *vm, struct xe_exec_queue *q, flags |= op->base.remap.unmap->va->flags & DRM_GPUVA_SPARSE ? VMA_CREATE_FLAG_IS_NULL : 0; + flags |= op->base.remap.unmap->va->flags & + XE_VMA_DUMPABLE ? + VMA_CREATE_FLAG_DUMPABLE : 0; vma = new_vma(vm, op->base.remap.next, old->pat_index, flags); @@ -2391,6 +2419,9 @@ static int vm_bind_ioctl_ops_parse(struct xe_vm *vm, struct xe_exec_queue *q, op->remap.range -= xe_vma_end(old) - xe_vma_start(vma); + vm_dbg(&xe->drm, "REMAP:SKIP_NEXT: addr=0x%016llx, range=0x%016llx", + (ULL)op->remap.start, + (ULL)op->remap.range); } } break; @@ -2530,13 +2561,25 @@ retry_userptr: } drm_exec_fini(&exec); - if (err == -EAGAIN && xe_vma_is_userptr(vma)) { + if (err == -EAGAIN) { lockdep_assert_held_write(&vm->lock); - err = xe_vma_userptr_pin_pages(to_userptr_vma(vma)); - if (!err) - goto retry_userptr; - trace_xe_vma_fail(vma); + if (op->base.op == DRM_GPUVA_OP_REMAP) { + if (!op->remap.unmap_done) + vma = gpuva_to_vma(op->base.remap.unmap->va); + else if (op->remap.prev) + vma = op->remap.prev; + else + vma = op->remap.next; + } + + if (xe_vma_is_userptr(vma)) { + err = xe_vma_userptr_pin_pages(to_userptr_vma(vma)); + if (!err) + goto retry_userptr; + + trace_xe_vma_fail(vma); + } } return err; @@ -2548,13 +2591,6 @@ static int xe_vma_op_execute(struct xe_vm *vm, struct xe_vma_op *op) lockdep_assert_held_write(&vm->lock); -#ifdef TEST_VM_ASYNC_OPS_ERROR - if (op->inject_error) { - op->inject_error = false; - return -ENOMEM; - } -#endif - switch (op->base.op) { case DRM_GPUVA_OP_MAP: ret = __xe_vma_op_execute(vm, op->map.vma, op); @@ -2669,7 +2705,7 @@ static void vm_bind_ioctl_ops_unwind(struct xe_vm *vm, { int i; - for (i = num_ops_list - 1; i; ++i) { + for (i = num_ops_list - 1; i >= 0; --i) { struct drm_gpuva_ops *__ops = ops[i]; struct drm_gpuva_op *__op; @@ -2714,16 +2750,10 @@ static int vm_bind_ioctl_ops_execute(struct xe_vm *vm, return 0; } -#ifdef TEST_VM_ASYNC_OPS_ERROR -#define SUPPORTED_FLAGS \ - (FORCE_ASYNC_OP_ERROR | DRM_XE_VM_BIND_FLAG_READONLY | \ - DRM_XE_VM_BIND_FLAG_IMMEDIATE | DRM_XE_VM_BIND_FLAG_NULL | 0xffff) -#else #define SUPPORTED_FLAGS \ (DRM_XE_VM_BIND_FLAG_READONLY | \ DRM_XE_VM_BIND_FLAG_IMMEDIATE | DRM_XE_VM_BIND_FLAG_NULL | \ - 0xffff) -#endif + DRM_XE_VM_BIND_FLAG_DUMPABLE) #define XE_64K_PAGE_MASK 0xffffull #define ALL_DRM_XE_SYNCS_FLAGS (DRM_XE_SYNCS_FLAG_WAIT_FOR_OP) @@ -3248,3 +3278,168 @@ int xe_analyze_vm(struct drm_printer *p, struct xe_vm *vm, int gt_id) return 0; } + +struct xe_vm_snapshot { + unsigned long num_snaps; + struct { + u64 ofs, bo_ofs; + unsigned long len; + struct xe_bo *bo; + void *data; + struct mm_struct *mm; + } snap[]; +}; + +struct xe_vm_snapshot *xe_vm_snapshot_capture(struct xe_vm *vm) +{ + unsigned long num_snaps = 0, i; + struct xe_vm_snapshot *snap = NULL; + struct drm_gpuva *gpuva; + + if (!vm) + return NULL; + + mutex_lock(&vm->snap_mutex); + drm_gpuvm_for_each_va(gpuva, &vm->gpuvm) { + if (gpuva->flags & XE_VMA_DUMPABLE) + num_snaps++; + } + + if (num_snaps) + snap = kvzalloc(offsetof(struct xe_vm_snapshot, snap[num_snaps]), GFP_NOWAIT); + if (!snap) + goto out_unlock; + + snap->num_snaps = num_snaps; + i = 0; + drm_gpuvm_for_each_va(gpuva, &vm->gpuvm) { + struct xe_vma *vma = gpuva_to_vma(gpuva); + struct xe_bo *bo = vma->gpuva.gem.obj ? + gem_to_xe_bo(vma->gpuva.gem.obj) : NULL; + + if (!(gpuva->flags & XE_VMA_DUMPABLE)) + continue; + + snap->snap[i].ofs = xe_vma_start(vma); + snap->snap[i].len = xe_vma_size(vma); + if (bo) { + snap->snap[i].bo = xe_bo_get(bo); + snap->snap[i].bo_ofs = xe_vma_bo_offset(vma); + } else if (xe_vma_is_userptr(vma)) { + struct mm_struct *mm = + to_userptr_vma(vma)->userptr.notifier.mm; + + if (mmget_not_zero(mm)) + snap->snap[i].mm = mm; + else + snap->snap[i].data = ERR_PTR(-EFAULT); + + snap->snap[i].bo_ofs = xe_vma_userptr(vma); + } else { + snap->snap[i].data = ERR_PTR(-ENOENT); + } + i++; + } + +out_unlock: + mutex_unlock(&vm->snap_mutex); + return snap; +} + +void xe_vm_snapshot_capture_delayed(struct xe_vm_snapshot *snap) +{ + for (int i = 0; i < snap->num_snaps; i++) { + struct xe_bo *bo = snap->snap[i].bo; + struct iosys_map src; + int err; + + if (IS_ERR(snap->snap[i].data)) + continue; + + snap->snap[i].data = kvmalloc(snap->snap[i].len, GFP_USER); + if (!snap->snap[i].data) { + snap->snap[i].data = ERR_PTR(-ENOMEM); + goto cleanup_bo; + } + + if (bo) { + dma_resv_lock(bo->ttm.base.resv, NULL); + err = ttm_bo_vmap(&bo->ttm, &src); + if (!err) { + xe_map_memcpy_from(xe_bo_device(bo), + snap->snap[i].data, + &src, snap->snap[i].bo_ofs, + snap->snap[i].len); + ttm_bo_vunmap(&bo->ttm, &src); + } + dma_resv_unlock(bo->ttm.base.resv); + } else { + void __user *userptr = (void __user *)(size_t)snap->snap[i].bo_ofs; + + kthread_use_mm(snap->snap[i].mm); + if (!copy_from_user(snap->snap[i].data, userptr, snap->snap[i].len)) + err = 0; + else + err = -EFAULT; + kthread_unuse_mm(snap->snap[i].mm); + + mmput(snap->snap[i].mm); + snap->snap[i].mm = NULL; + } + + if (err) { + kvfree(snap->snap[i].data); + snap->snap[i].data = ERR_PTR(err); + } + +cleanup_bo: + xe_bo_put(bo); + snap->snap[i].bo = NULL; + } +} + +void xe_vm_snapshot_print(struct xe_vm_snapshot *snap, struct drm_printer *p) +{ + unsigned long i, j; + + for (i = 0; i < snap->num_snaps; i++) { + if (IS_ERR(snap->snap[i].data)) + goto uncaptured; + + drm_printf(p, "[%llx].length: 0x%lx\n", snap->snap[i].ofs, snap->snap[i].len); + drm_printf(p, "[%llx].data: ", + snap->snap[i].ofs); + + for (j = 0; j < snap->snap[i].len; j += sizeof(u32)) { + u32 *val = snap->snap[i].data + j; + char dumped[ASCII85_BUFSZ]; + + drm_puts(p, ascii85_encode(*val, dumped)); + } + + drm_puts(p, "\n"); + continue; + +uncaptured: + drm_printf(p, "Unable to capture range [%llx-%llx]: %li\n", + snap->snap[i].ofs, snap->snap[i].ofs + snap->snap[i].len - 1, + PTR_ERR(snap->snap[i].data)); + } +} + +void xe_vm_snapshot_free(struct xe_vm_snapshot *snap) +{ + unsigned long i; + + if (!snap) + return; + + for (i = 0; i < snap->num_snaps; i++) { + if (!IS_ERR(snap->snap[i].data)) + kvfree(snap->snap[i].data); + xe_bo_put(snap->snap[i].bo); + if (snap->snap[i].mm) + mmput(snap->snap[i].mm); + } + kvfree(snap); +} |