diff options
| author | Linus Torvalds <[email protected]> | 2024-05-24 17:28:02 -0700 |
|---|---|---|
| committer | Linus Torvalds <[email protected]> | 2024-05-24 17:28:02 -0700 |
| commit | 56fb6f92854f29dcb6c3dc3ba92eeda1b615e88c (patch) | |
| tree | 85e95788af18acd2db7ba58faafed4058949e662 /drivers/gpu/drm/panthor/panthor_sched.c | |
| parent | 0b32d436c015d5a88b3368405e3d8fe82f195a54 (diff) | |
| parent | 32a0bb7ef217aa37e6b67ca7950f5e504312ed72 (diff) | |
Merge tag 'drm-next-2024-05-25' of https://gitlab.freedesktop.org/drm/kernel
Pull drm fixes from Dave Airlie:
"Some fixes for the end of the merge window, mostly amdgpu and panthor,
with one nouveau uAPI change that fixes a bad decision we made a few
months back.
nouveau:
- fix bo metadata uAPI for vm bind
panthor:
- Fixes for panthor's heap logical block.
- Reset on unrecoverable fault
- Fix VM references.
- Reset fix.
xlnx:
- xlnx compile and doc fixes.
amdgpu:
- Handle vbios table integrated info v2.3
amdkfd:
- Handle duplicate BOs in reserve_bo_and_cond_vms
- Handle memory limitations on small APUs
dp/mst:
- MST null deref fix.
bridge:
- Don't let next bridge create connector in adv7511 to make probe
work"
* tag 'drm-next-2024-05-25' of https://gitlab.freedesktop.org/drm/kernel:
drm/amdgpu/atomfirmware: add intergrated info v2.3 table
drm/mst: Fix NULL pointer dereference at drm_dp_add_payload_part2
drm/amdkfd: Let VRAM allocations go to GTT domain on small APUs
drm/amdkfd: handle duplicate BOs in reserve_bo_and_cond_vms
drm/bridge: adv7511: Attach next bridge without creating connector
drm/buddy: Fix the warn on's during force merge
drm/nouveau: use tile_mode and pte_kind for VM_BIND bo allocations
drm/panthor: Call panthor_sched_post_reset() even if the reset failed
drm/panthor: Reset the FW VM to NULL on unplug
drm/panthor: Keep a ref to the VM at the panthor_kernel_bo level
drm/panthor: Force an immediate reset on unrecoverable faults
drm/panthor: Document drm_panthor_tiler_heap_destroy::handle validity constraints
drm/panthor: Fix an off-by-one in the heap context retrieval logic
drm/panthor: Relax the constraints on the tiler chunk size
drm/panthor: Make sure the tiler initial/max chunks are consistent
drm/panthor: Fix tiler OOM handling to allow incremental rendering
drm: xlnx: zynqmp_dpsub: Fix compilation error
drm: xlnx: zynqmp_dpsub: Fix few function comments
Diffstat (limited to 'drivers/gpu/drm/panthor/panthor_sched.c')
| -rw-r--r-- | drivers/gpu/drm/panthor/panthor_sched.c | 48 |
1 files changed, 35 insertions, 13 deletions
diff --git a/drivers/gpu/drm/panthor/panthor_sched.c b/drivers/gpu/drm/panthor/panthor_sched.c index 7f16a4a14e9a..79ffcbc41d78 100644 --- a/drivers/gpu/drm/panthor/panthor_sched.c +++ b/drivers/gpu/drm/panthor/panthor_sched.c @@ -826,8 +826,8 @@ static void group_free_queue(struct panthor_group *group, struct panthor_queue * panthor_queue_put_syncwait_obj(queue); - panthor_kernel_bo_destroy(group->vm, queue->ringbuf); - panthor_kernel_bo_destroy(panthor_fw_vm(group->ptdev), queue->iface.mem); + panthor_kernel_bo_destroy(queue->ringbuf); + panthor_kernel_bo_destroy(queue->iface.mem); kfree(queue); } @@ -837,15 +837,14 @@ static void group_release_work(struct work_struct *work) struct panthor_group *group = container_of(work, struct panthor_group, release_work); - struct panthor_device *ptdev = group->ptdev; u32 i; for (i = 0; i < group->queue_count; i++) group_free_queue(group, group->queues[i]); - panthor_kernel_bo_destroy(panthor_fw_vm(ptdev), group->suspend_buf); - panthor_kernel_bo_destroy(panthor_fw_vm(ptdev), group->protm_suspend_buf); - panthor_kernel_bo_destroy(group->vm, group->syncobjs); + panthor_kernel_bo_destroy(group->suspend_buf); + panthor_kernel_bo_destroy(group->protm_suspend_buf); + panthor_kernel_bo_destroy(group->syncobjs); panthor_vm_put(group->vm); kfree(group); @@ -1281,7 +1280,16 @@ cs_slot_process_fatal_event_locked(struct panthor_device *ptdev, if (group) group->fatal_queues |= BIT(cs_id); - sched_queue_delayed_work(sched, tick, 0); + if (CS_EXCEPTION_TYPE(fatal) == DRM_PANTHOR_EXCEPTION_CS_UNRECOVERABLE) { + /* If this exception is unrecoverable, queue a reset, and make + * sure we stop scheduling groups until the reset has happened. + */ + panthor_device_schedule_reset(ptdev); + cancel_delayed_work(&sched->tick_work); + } else { + sched_queue_delayed_work(sched, tick, 0); + } + drm_warn(&ptdev->base, "CSG slot %d CS slot: %d\n" "CS_FATAL.EXCEPTION_TYPE: 0x%x (%s)\n" @@ -1385,7 +1393,12 @@ static int group_process_tiler_oom(struct panthor_group *group, u32 cs_id) pending_frag_count, &new_chunk_va); } - if (ret && ret != -EBUSY) { + /* If the heap context doesn't have memory for us, we want to let the + * FW try to reclaim memory by waiting for fragment jobs to land or by + * executing the tiler OOM exception handler, which is supposed to + * implement incremental rendering. + */ + if (ret && ret != -ENOMEM) { drm_warn(&ptdev->base, "Failed to extend the tiler heap\n"); group->fatal_queues |= BIT(cs_id); sched_queue_delayed_work(sched, tick, 0); @@ -2720,15 +2733,22 @@ void panthor_sched_pre_reset(struct panthor_device *ptdev) mutex_unlock(&sched->reset.lock); } -void panthor_sched_post_reset(struct panthor_device *ptdev) +void panthor_sched_post_reset(struct panthor_device *ptdev, bool reset_failed) { struct panthor_scheduler *sched = ptdev->scheduler; struct panthor_group *group, *group_tmp; mutex_lock(&sched->reset.lock); - list_for_each_entry_safe(group, group_tmp, &sched->reset.stopped_groups, run_node) + list_for_each_entry_safe(group, group_tmp, &sched->reset.stopped_groups, run_node) { + /* Consider all previously running group as terminated if the + * reset failed. + */ + if (reset_failed) + group->state = PANTHOR_CS_GROUP_TERMINATED; + panthor_group_start(group); + } /* We're done resetting the GPU, clear the reset.in_progress bit so we can * kick the scheduler. @@ -2736,9 +2756,11 @@ void panthor_sched_post_reset(struct panthor_device *ptdev) atomic_set(&sched->reset.in_progress, false); mutex_unlock(&sched->reset.lock); - sched_queue_delayed_work(sched, tick, 0); - - sched_queue_work(sched, sync_upd); + /* No need to queue a tick and update syncs if the reset failed. */ + if (!reset_failed) { + sched_queue_delayed_work(sched, tick, 0); + sched_queue_work(sched, sync_upd); + } } static void group_sync_upd_work(struct work_struct *work) |