diff options
author | Daniel Vetter <daniel.vetter@ffwll.ch> | 2024-08-27 14:33:12 +0200 |
---|---|---|
committer | Daniel Vetter <daniel.vetter@ffwll.ch> | 2024-08-27 14:33:12 +0200 |
commit | e55ef65510a401862b902dc979441ea10ae25c61 (patch) | |
tree | 2fefc98a3f5a81f9c648d09a826e46fde9dc338c /drivers/gpu/drm/amd/amdgpu/amdgpu_job.c | |
parent | 4461e9e5c374f8c11fee8e4a0e3290b072cfd538 (diff) | |
parent | 3376f922bfe070eff762164b3fc66981e3079417 (diff) |
Merge tag 'amd-drm-next-6.12-2024-08-26' of https://gitlab.freedesktop.org/agd5f/linux into drm-next
amd-drm-next-6.12-2024-08-26:
amdgpu:
- SDMA devcoredump support
- DCN 4.0.1 updates
- DC SUBVP fixes
- Refactor OPP in DC
- Refactor MMHUBBUB in DC
- DC DML 2.1 updates
- DC FAMS2 updates
- RAS updates
- GFX12 updates
- VCN 4.0.3 updates
- JPEG 4.0.3 updates
- Enable wave kill (soft recovery) for compute queues
- Clean up CP error interrupt handling
- Enable CP bad opcode interrupts
- VCN 4.x fixes
- VCN 5.x fixes
- GPU reset fixes
- Fix vbios embedded EDID size handling
- SMU 14.x updates
- Misc code cleanups and spelling fixes
- VCN devcoredump support
- ISP MFD i2c support
- DC vblank fixes
- GFX 12 fixes
- PSR fixes
- Convert vbios embedded EDID to drm_edid
- DCN 3.5 updates
- DMCUB updates
- Cursor fixes
- Overdrive support for SMU 14.x
- GFX CP padding optimizations
- DCC fixes
- DSC fixes
- Preliminary per queue reset infrastructure
- Initial per queue reset support for GFX 9
- Initial per queue reset support for GFX 7, 8
- DCN 3.2 fixes
- DP MST fixes
- SR-IOV fixes
- GFX 9.4.3/4 devcoredump support
- Add process isolation framework
- Enable process isolation support for GFX 9.4.3/4
- Take IOMMU remapping into account for P2P DMA checks
amdkfd:
- CRIU fixes
- Improved input validation for user queues
- HMM fix
- Enable process isolation support for GFX 9.4.3/4
- Initial per queue reset support for GFX 9
- Allow users to target recommended SDMA engines
radeon:
- remove .load and drm_dev_alloc
- Fix vbios embedded EDID size handling
- Convert vbios embedded EDID to drm_edid
- Use GEM references instead of TTM
- r100 cp init cleanup
- Fix potential overflows in evergreen CS offset tracking
UAPI:
- KFD support for targetting queues on recommended SDMA engines
Proposed userspace:
https://github.com/ROCm/ROCR-Runtime/commit/2f588a24065f41c208c3701945e20be746d8faf7
https://github.com/ROCm/ROCR-Runtime/commit/eb30a5bbc7719c6ffcf2d2dd2878bc53a47b3f30
drm/buddy:
- Add start address support for trim function
From: Alex Deucher <alexander.deucher@amd.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20240826201528.55307-1-alexander.deucher@amd.com
Diffstat (limited to 'drivers/gpu/drm/amd/amdgpu/amdgpu_job.c')
-rw-r--r-- | drivers/gpu/drm/amd/amdgpu/amdgpu_job.c | 20 |
1 files changed, 20 insertions, 0 deletions
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c index 908e13455152..597489dea114 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c @@ -72,6 +72,26 @@ static enum drm_gpu_sched_stat amdgpu_job_timedout(struct drm_sched_job *s_job) dma_fence_set_error(&s_job->s_fence->finished, -ETIME); + /* attempt a per ring reset */ + if (amdgpu_gpu_recovery && + ring->funcs->reset) { + /* stop the scheduler, but don't mess with the + * bad job yet because if ring reset fails + * we'll fall back to full GPU reset. + */ + drm_sched_wqueue_stop(&ring->sched); + r = amdgpu_ring_reset(ring, job->vmid); + if (!r) { + if (amdgpu_ring_sched_ready(ring)) + drm_sched_stop(&ring->sched, s_job); + atomic_inc(&ring->adev->gpu_reset_counter); + amdgpu_fence_driver_force_completion(ring); + if (amdgpu_ring_sched_ready(ring)) + drm_sched_start(&ring->sched); + goto exit; + } + } + if (amdgpu_device_should_recover_gpu(ring->adev)) { struct amdgpu_reset_context reset_context; memset(&reset_context, 0, sizeof(reset_context)); |