aboutsummaryrefslogtreecommitdiff
path: root/drivers/gpu/drm/i915/i915_gpu_error.c
diff options
context:
space:
mode:
authorDave Airlie <airlied@redhat.com>2020-02-27 08:59:19 +1000
committerDave Airlie <airlied@redhat.com>2020-02-27 09:00:25 +1000
commit4825b61a3d39eceef7db723808103aa60fc24520 (patch)
tree87e1a754aef38ef088a5ec8e1613790c5a17c078 /drivers/gpu/drm/i915/i915_gpu_error.c
parentaaa9d265a21e7c4fcec12b1203cbfa516277e4ad (diff)
parent53e3ca6749186b5c147964bddc4eb47ba8b5f69e (diff)
Merge tag 'drm-intel-next-2020-02-25' of git://anongit.freedesktop.org/drm/drm-intel into drm-next
- A backmerge of drm-next solving conflicts on i915/gt/intel_lrc.c - Clean up shadow batch after I915_EXEC_SECURE - Drop assertion that active->fence is unchanged Here goes drm-intel-next-2020-02-25: - A backmerge of drm-next solving conflicts on i915/gt/intel_lrc.c - Clean up shadow batch after I915_EXEC_SECURE - Drop assertion that active->fence is unchanged drm-intel-next-2020-02-24-1: - RC6 fixes - Chris - Add extra slice common debug register - Lionel - Align virtual engines uabi_class/instance with i915_drm.h - Tvrtko - Avoid potential division by zero in computing CS timestamp - Chris - Avoid using various globals - Michal Winiarski, Matt Auld - Break up long lists of GEM object reclaim - Chris - Check that the vma hasn't been closed before we insert it - Chris - Consolidate SDVO HDMI force_dvi handling - Ville - Conversion to new logging and warn macros and functions - Pankaj, Wambul, Chris - DC3CO fixes - Jose - Disable use of hwsp_cacheline for kernel_context - Chris - Display IRQ pre/post uninstall refactor - Jani - Display port sync refactor for robustness and fixes - Ville, Manasi - Do not attempt to reprogram IA/ring frequencies for dgfx - Chris - Drop alpha_support for good in favor of force_probe - Jani - DSI ACPI related fixes and refactors - Vivek, Jani, Rajat - Encoder refactor for flexibility to add more information, especiallly DSI related - Jani, Vandita - Engine workarounds refactor for robustness around resue - Daniele - FBC simplification and tracepoints - Various fixes for build - Jani, Kees Cook, Chris, Zhang Xiaoxu - Fix cmdparser - Chris - Fix DRM_I915_GEM_MMAP_OFFFSET - Chris - Fix i915_request flags - Chris - Fix inconsistency between pfit enable and scaler freeing - Stanislav - Fix inverted warn_on on display code - Chris - Fix modeset locks in sanitize_watermarks - Ville - Fix OA context id overlap with idle context id - Umesh - Fix pipe and vblank enable for MST - Jani - Fix VBT handling for timing parameters - Vandita - Fixes o kernel doc - Chris, Ville - Force full modeset whenever DSC is enabled at probe - Jani - Various GEM locking simplification and fixes - Jani , Chris, Jose - Including some changes in preparation for making GEM execbuf parallel - Chris - Gen11 pcode error codes - Matt Roper - Gen8+ interrupt handler refactor - Chris - Many fixes and improvements around GuC code - Daniele, Michal Wajdeczko - i915 parameters improvements sfor flexible input and better debugability - Chris, Jani - Ice Lake and Elkhart Lake Fixes and workarounds - Matt Roper, Jose, Vivek, Matt Atwood - Improvements on execlists, requests and other areas, fixing hangs and also improving hang detection, recover and debugability - Chris - Also introducing offline GT error capture - Chris - Introduce encoder->compute_config_late() to help MST - Ville - Make dbuf configuration const - Jani - Few misc clean ups - Ville, Chris - Never allow userptr into the new mapping types - Janusz - Poison rings after use and GTT scratch pages - Chris - Protect signaler walk with RCU - Chris - PSR fixes - Jose - Pull sseu context updates under gt - Chris - Read rawclk_freq earlier - Chris - Refactor around VBT handling to allow geting information through the encoder - Jani - Refactor l3cc/mocs availability - Chris - Refactor to use intel_connector over drm_connector - Ville - Remove i915_energy_uJ from debugfs - Tvrtko - Remove lite restore defines - Mika Kuoppala - Remove prefault_disable modparam - Chris - Many selftests fixes and improvements - Chris - Set intel_dp_set_m_n() for MST slaves - Jose - Simplify hot plug pin handling and other fixes around pin and polled modes - Ville - Skip CPU synchronization on dma-buf attachments - chris - Skip global serialization of clear_range for bxt vtd - Chris - Skip rmw for marked register - Chris - Some other GEM Fixes - Chris - Some small changes for satisfying static code analysis - Colin, Chris - Suppress warnings for unused debugging locals - Tiger Lake enabling, including re-enable -f RPS, workarounds and other display fixes and changes - Chris, Matt Roper, Mika Kuoppala, Anshuman, Jose, Radhakrishna, Rafael. - Track hw reported context runtime - Tvrtko - Update bug filling URL - Jani - Use async bind for PIN_USER into bsw/bxt ggtt - Chris - Use the kernel_context to measuer the breadcrumb size - Chris - Userptr fixes and robustness for big pages - Matt Auld - Various Display refactors and clean-ups, specially around logs and use of drm_i915_private - Jani, Ville - Various display refactors and fixes, especially around cdclk, modeset, and encoder - Chris, Jani - Various eDP/DP fixes around DPCD - Lyude - Various fixes and refactors for better Display watermark handling - Ville, Stanislav - Various other display refactors - Ville - Various refactor for better handling of display plane states - Ville - Wean off drm_pci_alloc/drm_pci_free - Chris - Correctly terminate connector iteration- Ville - Downgrade gen7 (ivb, byt, hsw) back to aliasing-ppgtt - Chris Signed-off-by: Dave Airlie <airlied@redhat.com> From: Rodrigo Vivi <rodrigo.vivi@intel.com> Link: https://patchwork.freedesktop.org/patch/msgid/20200225185853.GA3282832@intel.com
Diffstat (limited to 'drivers/gpu/drm/i915/i915_gpu_error.c')
-rw-r--r--drivers/gpu/drm/i915/i915_gpu_error.c34
1 files changed, 26 insertions, 8 deletions
diff --git a/drivers/gpu/drm/i915/i915_gpu_error.c b/drivers/gpu/drm/i915/i915_gpu_error.c
index 594341e27a47..2a4cd0ba5464 100644
--- a/drivers/gpu/drm/i915/i915_gpu_error.c
+++ b/drivers/gpu/drm/i915/i915_gpu_error.c
@@ -37,6 +37,7 @@
#include <drm/drm_print.h>
#include "display/intel_atomic.h"
+#include "display/intel_csr.h"
#include "display/intel_overlay.h"
#include "gem/i915_gem_context.h"
@@ -47,7 +48,6 @@
#include "i915_gpu_error.h"
#include "i915_memcpy.h"
#include "i915_scatterlist.h"
-#include "intel_csr.h"
#define ALLOW_FAIL (GFP_KERNEL | __GFP_RETRY_MAYFAIL | __GFP_NOWARN)
#define ATOMIC_MAYFAIL (GFP_ATOMIC | __GFP_NOWARN)
@@ -450,6 +450,14 @@ static void error_print_instdone(struct drm_i915_error_state_buf *m,
err_printf(m, " ROW_INSTDONE[%d][%d]: 0x%08x\n",
slice, subslice,
ee->instdone.row[slice][subslice]);
+
+ if (INTEL_GEN(m->i915) < 12)
+ return;
+
+ err_printf(m, " SC_INSTDONE_EXTRA: 0x%08x\n",
+ ee->instdone.slice_common_extra[0]);
+ err_printf(m, " SC_INSTDONE_EXTRA2: 0x%08x\n",
+ ee->instdone.slice_common_extra[1]);
}
static void error_print_request(struct drm_i915_error_state_buf *m,
@@ -473,9 +481,13 @@ static void error_print_context(struct drm_i915_error_state_buf *m,
const char *header,
const struct i915_gem_context_coredump *ctx)
{
- err_printf(m, "%s%s[%d] prio %d, guilty %d active %d\n",
+ const u32 period = RUNTIME_INFO(m->i915)->cs_timestamp_period_ns;
+
+ err_printf(m, "%s%s[%d] prio %d, guilty %d active %d, runtime total %lluns, avg %lluns\n",
header, ctx->comm, ctx->pid, ctx->sched_attr.priority,
- ctx->guilty, ctx->active);
+ ctx->guilty, ctx->active,
+ ctx->total_runtime * period,
+ mul_u32_u32(ctx->avg_runtime, period));
}
static struct i915_vma_coredump *
@@ -515,6 +527,7 @@ static void error_print_engine(struct drm_i915_error_state_buf *m,
(u32)(ee->acthd>>32), (u32)ee->acthd);
err_printf(m, " IPEIR: 0x%08x\n", ee->ipeir);
err_printf(m, " IPEHR: 0x%08x\n", ee->ipehr);
+ err_printf(m, " ESR: 0x%08x\n", ee->esr);
error_print_instdone(m, ee);
@@ -1102,6 +1115,7 @@ static void engine_record_registers(struct intel_engine_coredump *ee)
}
if (INTEL_GEN(i915) >= 4) {
+ ee->esr = ENGINE_READ(engine, RING_ESR);
ee->faddr = ENGINE_READ(engine, RING_DMA_FADD);
ee->ipeir = ENGINE_READ(engine, RING_IPEIR);
ee->ipehr = ENGINE_READ(engine, RING_IPEHR);
@@ -1228,7 +1242,7 @@ static bool record_context(struct i915_gem_context_coredump *e,
{
struct i915_gem_context *ctx;
struct task_struct *task;
- bool capture;
+ bool simulated;
rcu_read_lock();
ctx = rcu_dereference(rq->context->gem_context);
@@ -1236,7 +1250,7 @@ static bool record_context(struct i915_gem_context_coredump *e,
ctx = NULL;
rcu_read_unlock();
if (!ctx)
- return false;
+ return true;
rcu_read_lock();
task = pid_task(ctx->pid, PIDTYPE_PID);
@@ -1250,10 +1264,13 @@ static bool record_context(struct i915_gem_context_coredump *e,
e->guilty = atomic_read(&ctx->guilty_count);
e->active = atomic_read(&ctx->active_count);
- capture = i915_gem_context_no_error_capture(ctx);
+ e->total_runtime = rq->context->runtime.total;
+ e->avg_runtime = ewma_runtime_read(&rq->context->runtime.avg);
+
+ simulated = i915_gem_context_no_error_capture(ctx);
i915_gem_context_put(ctx);
- return capture;
+ return simulated;
}
struct intel_engine_capture_vma {
@@ -1852,7 +1869,8 @@ void i915_error_state_store(struct i915_gpu_coredump *error)
if (!xchg(&warned, true) &&
ktime_get_real_seconds() - DRIVER_TIMESTAMP < DAY_AS_SECONDS(180)) {
pr_info("GPU hangs can indicate a bug anywhere in the entire gfx stack, including userspace.\n");
- pr_info("Please file a _new_ bug report on bugs.freedesktop.org against DRI -> DRM/Intel\n");
+ pr_info("Please file a _new_ bug report at https://gitlab.freedesktop.org/drm/intel/issues/new.\n");
+ pr_info("Please see https://gitlab.freedesktop.org/drm/intel/-/wikis/How-to-file-i915-bugs for details.\n");
pr_info("drm/i915 developers can then reassign to the right component if it's not a kernel issue.\n");
pr_info("The GPU crash dump is required to analyze GPU hangs, so please always attach it.\n");
pr_info("GPU crash dump saved to /sys/class/drm/card%d/error\n",