diff options
Diffstat (limited to 'drivers/gpu/drm/xe/xe_exec_queue.c')
-rw-r--r-- | drivers/gpu/drm/xe/xe_exec_queue.c | 240 |
1 files changed, 143 insertions, 97 deletions
diff --git a/drivers/gpu/drm/xe/xe_exec_queue.c b/drivers/gpu/drm/xe/xe_exec_queue.c index 9731dcd0b1bd..7f28b7fc68d5 100644 --- a/drivers/gpu/drm/xe/xe_exec_queue.c +++ b/drivers/gpu/drm/xe/xe_exec_queue.c @@ -9,11 +9,12 @@ #include <drm/drm_device.h> #include <drm/drm_file.h> -#include <drm/xe_drm.h> +#include <uapi/drm/xe_drm.h> #include "xe_device.h" #include "xe_gt.h" #include "xe_hw_engine_class_sysfs.h" +#include "xe_hw_engine_group.h" #include "xe_hw_fence.h" #include "xe_lrc.h" #include "xe_macros.h" @@ -73,6 +74,7 @@ static struct xe_exec_queue *__xe_exec_queue_alloc(struct xe_device *xe, q->ops = gt->exec_queue_ops; INIT_LIST_HEAD(&q->lr.link); INIT_LIST_HEAD(&q->multi_gt_link); + INIT_LIST_HEAD(&q->hw_engine_group_link); q->sched_props.timeslice_us = hwe->eclass->sched_props.timeslice_us; q->sched_props.preempt_timeout_us = @@ -166,7 +168,8 @@ err_post_alloc: struct xe_exec_queue *xe_exec_queue_create_class(struct xe_device *xe, struct xe_gt *gt, struct xe_vm *vm, - enum xe_engine_class class, u32 flags) + enum xe_engine_class class, + u32 flags, u64 extensions) { struct xe_hw_engine *hwe, *hwe0 = NULL; enum xe_hw_engine_id id; @@ -186,7 +189,56 @@ struct xe_exec_queue *xe_exec_queue_create_class(struct xe_device *xe, struct xe if (!logical_mask) return ERR_PTR(-ENODEV); - return xe_exec_queue_create(xe, vm, logical_mask, 1, hwe0, flags, 0); + return xe_exec_queue_create(xe, vm, logical_mask, 1, hwe0, flags, extensions); +} + +/** + * xe_exec_queue_create_bind() - Create bind exec queue. + * @xe: Xe device. + * @tile: tile which bind exec queue belongs to. + * @flags: exec queue creation flags + * @extensions: exec queue creation extensions + * + * Normalize bind exec queue creation. Bind exec queue is tied to migration VM + * for access to physical memory required for page table programming. On a + * faulting devices the reserved copy engine instance must be used to avoid + * deadlocking (user binds cannot get stuck behind faults as kernel binds which + * resolve faults depend on user binds). On non-faulting devices any copy engine + * can be used. + * + * Returns exec queue on success, ERR_PTR on failure + */ +struct xe_exec_queue *xe_exec_queue_create_bind(struct xe_device *xe, + struct xe_tile *tile, + u32 flags, u64 extensions) +{ + struct xe_gt *gt = tile->primary_gt; + struct xe_exec_queue *q; + struct xe_vm *migrate_vm; + + migrate_vm = xe_migrate_get_vm(tile->migrate); + if (xe->info.has_usm) { + struct xe_hw_engine *hwe = xe_gt_hw_engine(gt, + XE_ENGINE_CLASS_COPY, + gt->usm.reserved_bcs_instance, + false); + + if (!hwe) { + xe_vm_put(migrate_vm); + return ERR_PTR(-EINVAL); + } + + q = xe_exec_queue_create(xe, migrate_vm, + BIT(hwe->logical_instance), 1, hwe, + flags, extensions); + } else { + q = xe_exec_queue_create_class(xe, gt, migrate_vm, + XE_ENGINE_CLASS_COPY, flags, + extensions); + } + xe_vm_put(migrate_vm); + + return q; } void xe_exec_queue_destroy(struct kref *ref) @@ -418,63 +470,6 @@ static int exec_queue_user_extensions(struct xe_device *xe, struct xe_exec_queue return 0; } -static const enum xe_engine_class user_to_xe_engine_class[] = { - [DRM_XE_ENGINE_CLASS_RENDER] = XE_ENGINE_CLASS_RENDER, - [DRM_XE_ENGINE_CLASS_COPY] = XE_ENGINE_CLASS_COPY, - [DRM_XE_ENGINE_CLASS_VIDEO_DECODE] = XE_ENGINE_CLASS_VIDEO_DECODE, - [DRM_XE_ENGINE_CLASS_VIDEO_ENHANCE] = XE_ENGINE_CLASS_VIDEO_ENHANCE, - [DRM_XE_ENGINE_CLASS_COMPUTE] = XE_ENGINE_CLASS_COMPUTE, -}; - -static struct xe_hw_engine * -find_hw_engine(struct xe_device *xe, - struct drm_xe_engine_class_instance eci) -{ - u32 idx; - - if (eci.engine_class >= ARRAY_SIZE(user_to_xe_engine_class)) - return NULL; - - if (eci.gt_id >= xe->info.gt_count) - return NULL; - - idx = array_index_nospec(eci.engine_class, - ARRAY_SIZE(user_to_xe_engine_class)); - - return xe_gt_hw_engine(xe_device_get_gt(xe, eci.gt_id), - user_to_xe_engine_class[idx], - eci.engine_instance, true); -} - -static u32 bind_exec_queue_logical_mask(struct xe_device *xe, struct xe_gt *gt, - struct drm_xe_engine_class_instance *eci, - u16 width, u16 num_placements) -{ - struct xe_hw_engine *hwe; - enum xe_hw_engine_id id; - u32 logical_mask = 0; - - if (XE_IOCTL_DBG(xe, width != 1)) - return 0; - if (XE_IOCTL_DBG(xe, num_placements != 1)) - return 0; - if (XE_IOCTL_DBG(xe, eci[0].engine_instance != 0)) - return 0; - - eci[0].engine_class = DRM_XE_ENGINE_CLASS_COPY; - - for_each_hw_engine(hwe, gt, id) { - if (xe_hw_engine_is_reserved(hwe)) - continue; - - if (hwe->class == - user_to_xe_engine_class[DRM_XE_ENGINE_CLASS_COPY]) - logical_mask |= BIT(hwe->logical_instance); - } - - return logical_mask; -} - static u32 calc_validate_logical_mask(struct xe_device *xe, struct xe_gt *gt, struct drm_xe_engine_class_instance *eci, u16 width, u16 num_placements) @@ -497,7 +492,7 @@ static u32 calc_validate_logical_mask(struct xe_device *xe, struct xe_gt *gt, n = j * width + i; - hwe = find_hw_engine(xe, eci[n]); + hwe = xe_hw_engine_lookup(xe, eci[n]); if (XE_IOCTL_DBG(xe, !hwe)) return 0; @@ -536,8 +531,9 @@ int xe_exec_queue_create_ioctl(struct drm_device *dev, void *data, struct drm_xe_engine_class_instance __user *user_eci = u64_to_user_ptr(args->instances); struct xe_hw_engine *hwe; - struct xe_vm *vm, *migrate_vm; + struct xe_vm *vm; struct xe_gt *gt; + struct xe_tile *tile; struct xe_exec_queue *q = NULL; u32 logical_mask; u32 id; @@ -562,37 +558,20 @@ int xe_exec_queue_create_ioctl(struct drm_device *dev, void *data, return -EINVAL; if (eci[0].engine_class == DRM_XE_ENGINE_CLASS_VM_BIND) { - for_each_gt(gt, xe, id) { - struct xe_exec_queue *new; - u32 flags; - - if (xe_gt_is_media_type(gt)) - continue; - - eci[0].gt_id = gt->info.id; - logical_mask = bind_exec_queue_logical_mask(xe, gt, eci, - args->width, - args->num_placements); - if (XE_IOCTL_DBG(xe, !logical_mask)) - return -EINVAL; - - hwe = find_hw_engine(xe, eci[0]); - if (XE_IOCTL_DBG(xe, !hwe)) - return -EINVAL; - - /* The migration vm doesn't hold rpm ref */ - xe_pm_runtime_get_noresume(xe); - - flags = EXEC_QUEUE_FLAG_VM | (id ? EXEC_QUEUE_FLAG_BIND_ENGINE_CHILD : 0); + if (XE_IOCTL_DBG(xe, args->width != 1) || + XE_IOCTL_DBG(xe, args->num_placements != 1) || + XE_IOCTL_DBG(xe, eci[0].engine_instance != 0)) + return -EINVAL; - migrate_vm = xe_migrate_get_vm(gt_to_tile(gt)->migrate); - new = xe_exec_queue_create(xe, migrate_vm, logical_mask, - args->width, hwe, flags, - args->extensions); + for_each_tile(tile, xe, id) { + struct xe_exec_queue *new; + u32 flags = EXEC_QUEUE_FLAG_VM; - xe_pm_runtime_put(xe); /* now held by engine */ + if (id) + flags |= EXEC_QUEUE_FLAG_BIND_ENGINE_CHILD; - xe_vm_put(migrate_vm); + new = xe_exec_queue_create_bind(xe, tile, flags, + args->extensions); if (IS_ERR(new)) { err = PTR_ERR(new); if (q) @@ -613,7 +592,7 @@ int xe_exec_queue_create_ioctl(struct drm_device *dev, void *data, if (XE_IOCTL_DBG(xe, !logical_mask)) return -EINVAL; - hwe = find_hw_engine(xe, eci[0]); + hwe = xe_hw_engine_lookup(xe, eci[0]); if (XE_IOCTL_DBG(xe, !hwe)) return -EINVAL; @@ -648,6 +627,12 @@ int xe_exec_queue_create_ioctl(struct drm_device *dev, void *data, if (XE_IOCTL_DBG(xe, err)) goto put_exec_queue; } + + if (q->vm && q->hwe->hw_engine_group) { + err = xe_hw_engine_group_add_exec_queue(q->hwe->hw_engine_group, q); + if (err) + goto put_exec_queue; + } } mutex_lock(&xef->exec_queue.lock); @@ -798,6 +783,15 @@ void xe_exec_queue_update_run_ticks(struct xe_exec_queue *q) xef->run_ticks[q->class] += (new_ts - old_ts) * q->width; } +/** + * xe_exec_queue_kill - permanently stop all execution from an exec queue + * @q: The exec queue + * + * This function permanently stops all activity on an exec queue. If the queue + * is actively executing on the HW, it will be kicked off the engine; any + * pending jobs are discarded and all future submissions are rejected. + * This function is safe to call multiple times. + */ void xe_exec_queue_kill(struct xe_exec_queue *q) { struct xe_exec_queue *eq = q, *next; @@ -830,6 +824,9 @@ int xe_exec_queue_destroy_ioctl(struct drm_device *dev, void *data, if (XE_IOCTL_DBG(xe, !q)) return -ENOENT; + if (q->vm && q->hwe->hw_engine_group) + xe_hw_engine_group_del_exec_queue(q->hwe->hw_engine_group, q); + xe_exec_queue_kill(q); trace_xe_exec_queue_close(q); @@ -841,10 +838,12 @@ int xe_exec_queue_destroy_ioctl(struct drm_device *dev, void *data, static void xe_exec_queue_last_fence_lockdep_assert(struct xe_exec_queue *q, struct xe_vm *vm) { - if (q->flags & EXEC_QUEUE_FLAG_VM) + if (q->flags & EXEC_QUEUE_FLAG_VM) { lockdep_assert_held(&vm->lock); - else + } else { xe_vm_assert_held(vm); + lockdep_assert_held(&q->hwe->hw_engine_group->mode_sem); + } } /** @@ -856,10 +855,7 @@ void xe_exec_queue_last_fence_put(struct xe_exec_queue *q, struct xe_vm *vm) { xe_exec_queue_last_fence_lockdep_assert(q, vm); - if (q->last_fence) { - dma_fence_put(q->last_fence); - q->last_fence = NULL; - } + xe_exec_queue_last_fence_put_unlocked(q); } /** @@ -902,6 +898,33 @@ struct dma_fence *xe_exec_queue_last_fence_get(struct xe_exec_queue *q, } /** + * xe_exec_queue_last_fence_get_for_resume() - Get last fence + * @q: The exec queue + * @vm: The VM the engine does a bind or exec for + * + * Get last fence, takes a ref. Only safe to be called in the context of + * resuming the hw engine group's long-running exec queue, when the group + * semaphore is held. + * + * Returns: last fence if not signaled, dma fence stub if signaled + */ +struct dma_fence *xe_exec_queue_last_fence_get_for_resume(struct xe_exec_queue *q, + struct xe_vm *vm) +{ + struct dma_fence *fence; + + lockdep_assert_held_write(&q->hwe->hw_engine_group->mode_sem); + + if (q->last_fence && + test_bit(DMA_FENCE_FLAG_SIGNALED_BIT, &q->last_fence->flags)) + xe_exec_queue_last_fence_put_unlocked(q); + + fence = q->last_fence ? q->last_fence : dma_fence_get_stub(); + dma_fence_get(fence); + return fence; +} + +/** * xe_exec_queue_last_fence_set() - Set last fence * @q: The exec queue * @vm: The VM the engine does a bind or exec for @@ -918,3 +941,26 @@ void xe_exec_queue_last_fence_set(struct xe_exec_queue *q, struct xe_vm *vm, xe_exec_queue_last_fence_put(q, vm); q->last_fence = dma_fence_get(fence); } + +/** + * xe_exec_queue_last_fence_test_dep - Test last fence dependency of queue + * @q: The exec queue + * @vm: The VM the engine does a bind or exec for + * + * Returns: + * -ETIME if there exists an unsignalled last fence dependency, zero otherwise. + */ +int xe_exec_queue_last_fence_test_dep(struct xe_exec_queue *q, struct xe_vm *vm) +{ + struct dma_fence *fence; + int err = 0; + + fence = xe_exec_queue_last_fence_get(q, vm); + if (fence) { + err = test_bit(DMA_FENCE_FLAG_SIGNALED_BIT, &fence->flags) ? + 0 : -ETIME; + dma_fence_put(fence); + } + + return err; +} |