aboutsummaryrefslogtreecommitdiff
path: root/drivers/gpu/drm/i915/i915_gpu_error.c
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2023-02-22 18:28:03 -0800
committerLinus Torvalds <torvalds@linux-foundation.org>2023-02-22 18:28:03 -0800
commita5c95ca18a98d742d0a4a04063c32556b5b66378 (patch)
treefdd897b23a1c45b3d03bd1e75e5df42057f339d1 /drivers/gpu/drm/i915/i915_gpu_error.c
parent307e14c039063f0c9bd7a18a7add8f940580dcc9 (diff)
parenta48bba98380cb0b43dcd01d276c7efc282e3c33f (diff)
Merge tag 'drm-next-2023-02-23' of git://anongit.freedesktop.org/drm/drm
Pull drm updates from Dave Airlie: "There are a bunch of changes all over in the usual places. Highlights: - habanalabs moves from misc to accel - first accel driver for Intel VPU (Versatile Processing Unit) inference engine - dropped all the ancient legacy DRI1 drivers. I think it's been at least 10 years since anyone has heard about these. - Intel DG2 updates and prelim Meteorlake enablement - etnaviv adds support for Versilicon NPU device (a GPU like engine with inference accelerators) Detailed summary: Removals: - remove legacy dri1 drivers: i810, mga, r128, savage, sis, tdfx, via New driver: - intel VPU accelerator driver - habanalabs comes via drm tree now drm/core: - use drm_dbg_ helpers in several places - Document defaults for CRTC backgrounds - Document use of drm_minor edid: - improve mode parsing and refactoring connector: - support analog TV mode property media: - add some common formats udmabuf: - add vmap/vunmap methods fourcc: - add XRGB1555 and RGB565 formats - document open source user waiver firmware: - fix color-format selection for system framebuffer format-helper: - Add conversion from XRGB8888 to various sysfb formats - Make XRGB8888 the only driver-emulated legacy format - Add conversion from XRGB8888 to XBGR8888 and ABGR8888 fb-helper: - fix preferred depth and bpp values across drivers - Avoid blank consoles from selecting an incorrect color format probe-helper: - Enable/disable HPD on connectors scheduler: - Fix lockup in drm_sched_entity_kill() - Deprecate drm_sched_resubmit_jobs() bridge: - remove unused functions - implement i2c probe_new in various drivers - ite-it6505: Locking fixes, Cache EDID data - ite-it66121: Support IT6610 chip - lontium-tl9611: Fix HDMI on DragonBoard 845c - parade-ps8640: Use atomic bridge functions - Support i.MX93 LDB plus DT bindings debugfs: - add per device helpers and convert drivers displayport: - mst fixes - add DP adaptive sync DPCD definitions fbdev: - always pick 32bpp as default - remove some unused code simpledrm: - support system memory framebuffers panel: - add orientation quirks for Lenovo Yoga Tab 3 X90F and DynaBook K50 - Use ktime_get_boottime() to measure power-down delay - Fix auto-suspend delay - Visionox VTDR6130 AMOLED DSI - Support Himax HX8394 - Convert many drivers to common generic DSI write-sequence helper - AUO A030JTN01 ttm: - drop bo wait wrapper - fix MIPS build habanalabs: - moved driver to accel subsystem - gaudi2 decoder error improvement - more trace events - Gaudi2 abrupt reset by firmware support - add uAPI to flush memory transactions - add uAPI to pass through userspace reqs to fw - remove dma-buf export by handle amdgpu: - add new INFO queries for peak and min sclk/mclk for profile modes - Add PCIe info to the INFO IOCTL - secure display support for multiple displays - DML optimizations - DCN 3.2 updates - PSR updates - DP 2.1 updates - SR-IOV RAS updates - VCN RAS support - SMU 13.x updates - Switch 1 element arrays to flexible arrays - Add RAS support for DF 4.3 - Stack size improvements - S0ix rework - Allow 0 as a vram limit on APUs - Handle profiling modes for SMU13.x - Fix possible segfault in failure case - Rework FW requests to happen in early_init for all IPs so that we don't lose the sbios console if FW is missing - Fix power reporting on certain firmwares for CZN/RN - Allow S0ix without BIOS support - Enable freesync over PCon - Re-enable the AGP aperture on GMC 11.x amdkfd: - Error handling fixes - PASID fixes - Fix for cleared VRAM BOs - Fix cleanup if GPUVM creation fails - Memory accounting fix - Use resource_size rather than open codeing it - GC11 mGPU fix radeon: - Switch 1 element arrays to flexible arrays - Fix memory leak on shutdown - move to new logging i915: - Meteorlake display/OA/GSC fw/workarounds enabling - DP MST DSC support - Gamma/degamma readout support for the state checker - Enable SDP split support for DP 2.0 - Add probe blocking support to i915.force_probe parameter - Enable Xe HP 4tile support - Avoid display direct calls to uncore - Fix HuC delayed load memory leaks - Add DG2 workarounds Wa_18018764978 and Wa_18019271663 - Improve suspend / resume times with VT-d scanout workaround active - Fix DG2 visual corruption on small BAR systems by not forgetting to copy CCS aux state - Fix TLB invalidation for Gen12.50 video and compute engines - Enable HF-EEODB by switching HDMI, DP and LVDS to use struct drm_edid - Start using unversioned DMC firmware paths for new platforms - ELD refactor: Stop using hardware buffer, precompute ELD - lots of display code refactoring nouveau: - drop legacy ioctl support - replace 0-sized array msm: - dpu/dsi/mdss: Support for SM8350, SM8450 SM8550 and SC8280XP platform - Added bindings for SM8150 - dpu: Partial support for DSC on SM8150 and SM8250 - dpu: Fixed color transformation matrix being lost on suspend/resume - dp: Support SDM845 and SC8280XP platforms - dp: Support for limiting DP link rate via DT property - dsi: Validate display modes according to the DSI OPP table - dsi: DSI PHY support for the SM6375 platform - Add MSM_SUBMIT_BO_NO_IMPLICI - a2xx: Support to load legacy firmware - a6xx: GPU devcore dump updates for a650/a660 - GPU devfreq tuning and fixes - Turn 8960 HDMI PHY into clock provider, - Make 8960 HDMI PHY use PXO clock from DT etnaviv: - experimental versilicon NPU support - report GPU load via fdinfo format - MMU fault message improvements tegra: - rework syncpoint interrupt mediatek: - DSI timing fix - fix config deps ast: - various fixes exynos: - restore bridge chain order fixes gud: - convert to shadow plane buffers - perform flushing synchronously during atomic update - Use new debugfs helpers arm/hdlcd: - Use new debugfs helper ili9486: - Support 16-bit pixel data imx: - Split off IPUv3 driver mipi-dbi: - convert to DRM shadow-plane helpers - rsp driver changes - Support separate I/O-voltage supply mxsfb: - Depend on ARCH_MXS or ARCH_MXC sun4i: - convert to new TV mode property vc4: - convert to new TV mode property - kunit tests - Support RGB565 and RGB666 formats - convert dsi driver to bridge - Various HVS an CRTC fixes v3d: - Do not opencode drm_gem_object_lookup() virtio: - improve tracing vkms: - support small cursors in IGT tests - Fix SEGFAULT from incorrect GEM-buffer mapping rcar-du: - fixes and improvements" * tag 'drm-next-2023-02-23' of git://anongit.freedesktop.org/drm/drm: (1455 commits) msm/fbdev: fix unused variable warning with clang. drm/fb-helper: Remove drm_fb_helper_unprepare() from drm_fb_helper_fini() dma-buf: make kobj_type structure constant drm/shmem-helper: Fix locking for drm_gem_shmem_get_pages_sgt() drm/amd/display: disable SubVP + DRR to prevent underflow drm/amd/display: Fail atomic_check early on normalize_zpos error drm/amd/pm: avoid unaligned access warnings drm/amd/display: avoid unaligned access warnings drm/amd/display: Remove duplicate/repeating expressions drm/amd/display: Remove duplicate/repeating expression drm/amd/display: Make variables declaration inside ifdef guard drm/amd/display: Fix excess arguments on kernel-doc drm/amd/display: Add previously missing includes drm/amd/amdgpu: Add function prototypes to headers drm/amd/display: Add function prototypes to headers drm/amd/display: Turn global functions into static drm/amd/display: remove unused _calculate_degamma_curve function drm/amd/display: remove unused func declaration from resource headers drm/amd/display: unset initial value for tf since it's never used drm/amd/display: camel case cleanup in color_gamma file ...
Diffstat (limited to 'drivers/gpu/drm/i915/i915_gpu_error.c')
-rw-r--r--drivers/gpu/drm/i915/i915_gpu_error.c71
1 files changed, 47 insertions, 24 deletions
diff --git a/drivers/gpu/drm/i915/i915_gpu_error.c b/drivers/gpu/drm/i915/i915_gpu_error.c
index b20bd6365615..904f21e1380c 100644
--- a/drivers/gpu/drm/i915/i915_gpu_error.c
+++ b/drivers/gpu/drm/i915/i915_gpu_error.c
@@ -1370,14 +1370,14 @@ static void engine_record_execlists(struct intel_engine_coredump *ee)
}
static bool record_context(struct i915_gem_context_coredump *e,
- const struct i915_request *rq)
+ struct intel_context *ce)
{
struct i915_gem_context *ctx;
struct task_struct *task;
bool simulated;
rcu_read_lock();
- ctx = rcu_dereference(rq->context->gem_context);
+ ctx = rcu_dereference(ce->gem_context);
if (ctx && !kref_get_unless_zero(&ctx->ref))
ctx = NULL;
rcu_read_unlock();
@@ -1396,8 +1396,8 @@ static bool record_context(struct i915_gem_context_coredump *e,
e->guilty = atomic_read(&ctx->guilty_count);
e->active = atomic_read(&ctx->active_count);
- e->total_runtime = intel_context_get_total_runtime_ns(rq->context);
- e->avg_runtime = intel_context_get_avg_runtime_ns(rq->context);
+ e->total_runtime = intel_context_get_total_runtime_ns(ce);
+ e->avg_runtime = intel_context_get_avg_runtime_ns(ce);
simulated = i915_gem_context_no_error_capture(ctx);
@@ -1532,15 +1532,37 @@ intel_engine_coredump_alloc(struct intel_engine_cs *engine, gfp_t gfp, u32 dump_
return ee;
}
+static struct intel_engine_capture_vma *
+engine_coredump_add_context(struct intel_engine_coredump *ee,
+ struct intel_context *ce,
+ gfp_t gfp)
+{
+ struct intel_engine_capture_vma *vma = NULL;
+
+ ee->simulated |= record_context(&ee->context, ce);
+ if (ee->simulated)
+ return NULL;
+
+ /*
+ * We need to copy these to an anonymous buffer
+ * as the simplest method to avoid being overwritten
+ * by userspace.
+ */
+ vma = capture_vma(vma, ce->ring->vma, "ring", gfp);
+ vma = capture_vma(vma, ce->state, "HW context", gfp);
+
+ return vma;
+}
+
struct intel_engine_capture_vma *
intel_engine_coredump_add_request(struct intel_engine_coredump *ee,
struct i915_request *rq,
gfp_t gfp)
{
- struct intel_engine_capture_vma *vma = NULL;
+ struct intel_engine_capture_vma *vma;
- ee->simulated |= record_context(&ee->context, rq);
- if (ee->simulated)
+ vma = engine_coredump_add_context(ee, rq->context, gfp);
+ if (!vma)
return NULL;
/*
@@ -1550,8 +1572,6 @@ intel_engine_coredump_add_request(struct intel_engine_coredump *ee,
*/
vma = capture_vma_snapshot(vma, rq->batch_res, gfp, "batch");
vma = capture_user(vma, rq, gfp);
- vma = capture_vma(vma, rq->ring->vma, "ring", gfp);
- vma = capture_vma(vma, rq->context->state, "HW context", gfp);
ee->rq_head = rq->head;
ee->rq_post = rq->postfix;
@@ -1604,25 +1624,28 @@ capture_engine(struct intel_engine_cs *engine,
return NULL;
intel_engine_get_hung_entity(engine, &ce, &rq);
- if (!rq || !i915_request_started(rq))
- goto no_request_capture;
+ if (rq && !i915_request_started(rq))
+ drm_info(&engine->gt->i915->drm, "Got hung context on %s with active request %lld:%lld [0x%04X] not yet started\n",
+ engine->name, rq->fence.context, rq->fence.seqno, ce->guc_id.id);
+
+ if (rq) {
+ capture = intel_engine_coredump_add_request(ee, rq, ATOMIC_MAYFAIL);
+ i915_request_put(rq);
+ } else if (ce) {
+ capture = engine_coredump_add_context(ee, ce, ATOMIC_MAYFAIL);
+ }
- capture = intel_engine_coredump_add_request(ee, rq, ATOMIC_MAYFAIL);
- if (!capture)
- goto no_request_capture;
- if (dump_flags & CORE_DUMP_FLAG_IS_GUC_CAPTURE)
- intel_guc_capture_get_matching_node(engine->gt, ee, ce);
+ if (capture) {
+ intel_engine_coredump_add_vma(ee, capture, compress);
- intel_engine_coredump_add_vma(ee, capture, compress);
- i915_request_put(rq);
+ if (dump_flags & CORE_DUMP_FLAG_IS_GUC_CAPTURE)
+ intel_guc_capture_get_matching_node(engine->gt, ee, ce);
+ } else {
+ kfree(ee);
+ ee = NULL;
+ }
return ee;
-
-no_request_capture:
- if (rq)
- i915_request_put(rq);
- kfree(ee);
- return NULL;
}
static void