From c96b63a6a7ac4bd670ec2e663793a9a31418b790 Mon Sep 17 00:00:00 2001 From: Ander Conselvan de Oliveira Date: Fri, 20 Jan 2017 16:28:42 +0200 Subject: drm/i915: Don't leak edid in intel_crt_detect_ddc() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit In the path where intel_crt_detect_ddc() detects a CRT, if would return true without freeing the edid. Fixes: a2bd1f541f19 ("drm/i915: check whether we actually received an edid in detect_ddc") Cc: Chris Wilson Cc: Daniel Vetter Cc: Daniel Vetter Cc: Jani Nikula Cc: intel-gfx@lists.freedesktop.org Cc: # v3.6+ Signed-off-by: Ander Conselvan de Oliveira Reviewed-by: Ville Syrjälä Reviewed-by: Jani Nikula Link: http://patchwork.freedesktop.org/patch/msgid/1484922525-6131-1-git-send-email-ander.conselvan.de.oliveira@intel.com --- drivers/gpu/drm/i915/intel_crt.c | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/drivers/gpu/drm/i915/intel_crt.c b/drivers/gpu/drm/i915/intel_crt.c index 385e29af8baa..2bf5aca6e37c 100644 --- a/drivers/gpu/drm/i915/intel_crt.c +++ b/drivers/gpu/drm/i915/intel_crt.c @@ -499,6 +499,7 @@ static bool intel_crt_detect_ddc(struct drm_connector *connector) struct drm_i915_private *dev_priv = to_i915(crt->base.base.dev); struct edid *edid; struct i2c_adapter *i2c; + bool ret = false; BUG_ON(crt->base.type != INTEL_OUTPUT_ANALOG); @@ -515,17 +516,17 @@ static bool intel_crt_detect_ddc(struct drm_connector *connector) */ if (!is_digital) { DRM_DEBUG_KMS("CRT detected via DDC:0x50 [EDID]\n"); - return true; + ret = true; + } else { + DRM_DEBUG_KMS("CRT not detected via DDC:0x50 [EDID reports a digital panel]\n"); } - - DRM_DEBUG_KMS("CRT not detected via DDC:0x50 [EDID reports a digital panel]\n"); } else { DRM_DEBUG_KMS("CRT not detected via DDC:0x50 [no valid EDID found]\n"); } kfree(edid); - return false; + return ret; } static enum drm_connector_status -- cgit From 04313b00b79405f86d815100f85c47a2ee5b8ca0 Mon Sep 17 00:00:00 2001 From: Ander Conselvan de Oliveira Date: Fri, 20 Jan 2017 16:28:43 +0200 Subject: drm/i915: Don't init hpd polling for vlv and chv from runtime_suspend() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit An error in the condition for avoiding the call to intel_hpd_poll_init() for valleyview and cherryview from intel_runtime_suspend() caused it to be called unconditionally. Fix it. Fixes: 19625e85c6ec ("drm/i915: Enable polling when we don't have hpd") Cc: stable@vger.kernel.org Cc: Ville Syrjälä Cc: Daniel Vetter Cc: Lyude Cc: Daniel Vetter Cc: Jani Nikula Cc: intel-gfx@lists.freedesktop.org Cc: # v4.9+ Signed-off-by: Ander Conselvan de Oliveira Reviewed-by: Ville Syrjälä Link: http://patchwork.freedesktop.org/patch/msgid/1484922525-6131-2-git-send-email-ander.conselvan.de.oliveira@intel.com --- drivers/gpu/drm/i915/i915_drv.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/i915/i915_drv.c b/drivers/gpu/drm/i915/i915_drv.c index 4ae69ebe166e..ca168b22ee26 100644 --- a/drivers/gpu/drm/i915/i915_drv.c +++ b/drivers/gpu/drm/i915/i915_drv.c @@ -2377,7 +2377,7 @@ static int intel_runtime_suspend(struct device *kdev) assert_forcewakes_inactive(dev_priv); - if (!IS_VALLEYVIEW(dev_priv) || !IS_CHERRYVIEW(dev_priv)) + if (!IS_VALLEYVIEW(dev_priv) && !IS_CHERRYVIEW(dev_priv)) intel_hpd_poll_init(dev_priv); DRM_DEBUG_KMS("Device suspended\n"); -- cgit From 46a1bd289507dfcc428fb9daf65421ed6be6af8b Mon Sep 17 00:00:00 2001 From: Ander Conselvan de Oliveira Date: Fri, 20 Jan 2017 16:28:44 +0200 Subject: drm/i915: Fix calculation of rotated x and y offsets for planar formats MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Parameters tile_size, tile_width and tile_height were passed in the wrong order to _intel_adjust_tile_offset() when calculating the rotated offsets. This doesn't fix any user visible bug, since for packed formats new and old offset are the same and the rotated offsets are within a tile before they are fed to _intel_adjust_tile_offset(). In that case, the offsets are unchanged. That is not true for planar formats, but those are currently not supported. Fixes: 66a2d927cb0e ("drm/i915: Make intel_adjust_tile_offset() work for linear buffers") Cc: Ville Syrjälä Cc: Sivakumar Thulasimani Cc: Daniel Vetter Cc: Jani Nikula Cc: intel-gfx@lists.freedesktop.org Cc: # v4.9+ Signed-off-by: Ander Conselvan de Oliveira Reviewed-by: Ville Syrjälä Link: http://patchwork.freedesktop.org/patch/msgid/1484922525-6131-3-git-send-email-ander.conselvan.de.oliveira@intel.com --- drivers/gpu/drm/i915/intel_display.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c index 0f4272f98648..0d2dad1bb86b 100644 --- a/drivers/gpu/drm/i915/intel_display.c +++ b/drivers/gpu/drm/i915/intel_display.c @@ -2578,8 +2578,9 @@ intel_fill_fb_info(struct drm_i915_private *dev_priv, * We only keep the x/y offsets, so push all of the * gtt offset into the x/y offsets. */ - _intel_adjust_tile_offset(&x, &y, tile_size, - tile_width, tile_height, pitch_tiles, + _intel_adjust_tile_offset(&x, &y, + tile_width, tile_height, + tile_size, pitch_tiles, gtt_offset_rotated * tile_size, 0); gtt_offset_rotated += rot_info->plane[i].width * rot_info->plane[i].height; -- cgit From 31bb2ef97ea9db343348f9b5ccaa9bb6f48fc655 Mon Sep 17 00:00:00 2001 From: Ander Conselvan de Oliveira Date: Fri, 20 Jan 2017 16:28:45 +0200 Subject: drm/i915: Check for NULL atomic state in intel_crtc_disable_noatomic() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit In intel_crtc_disable_noatomic(), bail on a failure to allocate an atomic state to avoid a NULL pointer dereference. Fixes: 4a80655827af ("drm/i915: Pass atomic state to crtc enable/disable functions") Cc: Maarten Lankhorst Cc: Daniel Vetter Cc: Daniel Vetter Cc: Jani Nikula Cc: intel-gfx@lists.freedesktop.org Cc: # v4.9+ Signed-off-by: Ander Conselvan de Oliveira Reviewed-by: Ville Syrjälä Link: http://patchwork.freedesktop.org/patch/msgid/1484922525-6131-4-git-send-email-ander.conselvan.de.oliveira@intel.com --- drivers/gpu/drm/i915/intel_display.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c index 0d2dad1bb86b..09edff4633f6 100644 --- a/drivers/gpu/drm/i915/intel_display.c +++ b/drivers/gpu/drm/i915/intel_display.c @@ -6882,6 +6882,12 @@ static void intel_crtc_disable_noatomic(struct drm_crtc *crtc) } state = drm_atomic_state_alloc(crtc->dev); + if (!state) { + DRM_DEBUG_KMS("failed to disable [CRTC:%d:%s], out of memory", + crtc->base.id, crtc->name); + return; + } + state->acquire_ctx = crtc->dev->mode_config.acquire_ctx; /* Everything's already locked, -EDEADLK can't happen. */ -- cgit From 62fa0ce2bd590ef95b4557fca780441dc33ba427 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Sat, 21 Jan 2017 18:22:33 +0000 Subject: agp/intel: Move intel_fake_agp_sizes into #ifdef block MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Move the intel_fake_agp_sizes array into the same #ifdef block as it is used to avoid instantiation when not used, and so triggering a compiler warning drivers/char/agp/intel-gtt.c:335:42: warning: ‘intel_fake_agp_sizes’ defined but not used [-Wunused-const-variable=] Signed-off-by: Chris Wilson Link: http://patchwork.freedesktop.org/patch/msgid/20170121182233.30852-1-chris@chris-wilson.co.uk Reviewed-by: Jani Nikula Reviewed-by: Joonas Lahtinen --- drivers/char/agp/intel-gtt.c | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/drivers/char/agp/intel-gtt.c b/drivers/char/agp/intel-gtt.c index 9702c78f458d..7fcc2a9d1d5a 100644 --- a/drivers/char/agp/intel-gtt.c +++ b/drivers/char/agp/intel-gtt.c @@ -332,14 +332,6 @@ static void i810_write_entry(dma_addr_t addr, unsigned int entry, writel_relaxed(addr | pte_flags, intel_private.gtt + entry); } -static const struct aper_size_info_fixed intel_fake_agp_sizes[] = { - {32, 8192, 3}, - {64, 16384, 4}, - {128, 32768, 5}, - {256, 65536, 6}, - {512, 131072, 7}, -}; - static unsigned int intel_gtt_stolen_size(void) { u16 gmch_ctrl; @@ -670,6 +662,14 @@ static int intel_gtt_init(void) } #if IS_ENABLED(CONFIG_AGP_INTEL) +static const struct aper_size_info_fixed intel_fake_agp_sizes[] = { + {32, 8192, 3}, + {64, 16384, 4}, + {128, 32768, 5}, + {256, 65536, 6}, + {512, 131072, 7}, +}; + static int intel_fake_agp_fetch_size(void) { int num_sizes = ARRAY_SIZE(intel_fake_agp_sizes); -- cgit From ec78828ed60a9833582cbb582248bd1b0200f50e Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Wed, 18 Jan 2017 12:18:08 +0000 Subject: drm/i915: Fix W=1 warning for csr_load_work_fn() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit drivers/gpu/drm/i915/intel_csr.c: In function ‘csr_load_work_fn’: drivers/gpu/drm/i915/intel_csr.c:399:6: error: variable ‘ret’ set but not used [-Werror=unused-but-set-variable] Signed-off-by: Chris Wilson Link: http://patchwork.freedesktop.org/patch/msgid/20170118121808.27869-2-chris@chris-wilson.co.uk Reviewed-by: Mika Kuoppala --- drivers/gpu/drm/i915/intel_csr.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/drivers/gpu/drm/i915/intel_csr.c b/drivers/gpu/drm/i915/intel_csr.c index 0085bc745f6a..9dcc434d3b74 100644 --- a/drivers/gpu/drm/i915/intel_csr.c +++ b/drivers/gpu/drm/i915/intel_csr.c @@ -396,13 +396,11 @@ static void csr_load_work_fn(struct work_struct *work) struct drm_i915_private *dev_priv; struct intel_csr *csr; const struct firmware *fw = NULL; - int ret; dev_priv = container_of(work, typeof(*dev_priv), csr.work); csr = &dev_priv->csr; - ret = request_firmware(&fw, dev_priv->csr.fw_path, - &dev_priv->drm.pdev->dev); + request_firmware(&fw, dev_priv->csr.fw_path, &dev_priv->drm.pdev->dev); if (fw) dev_priv->csr.dmc_payload = parse_csr_fw(dev_priv, fw); -- cgit From e27414a04f1a571eeb0b214ebfc77e6dc9bb8a08 Mon Sep 17 00:00:00 2001 From: Geliang Tang Date: Fri, 20 Jan 2017 22:36:55 +0800 Subject: drm/i915/error: use rb_entry() To make the code clearer, use rb_entry() instead of container_of() to deal with rbtree. Signed-off-by: Geliang Tang Link: http://patchwork.freedesktop.org/patch/msgid/b08fd4be9d4c45d88c158a17b854c3fd628840ed.1484816339.git.geliangtang@gmail.com Reviewed-by: Chris Wilson Signed-off-by: Chris Wilson --- drivers/gpu/drm/i915/i915_gpu_error.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/i915/i915_gpu_error.c b/drivers/gpu/drm/i915/i915_gpu_error.c index 9cd22cda17af..e5375323eb06 100644 --- a/drivers/gpu/drm/i915/i915_gpu_error.c +++ b/drivers/gpu/drm/i915/i915_gpu_error.c @@ -1080,7 +1080,7 @@ static void error_record_engine_waiters(struct intel_engine_cs *engine, ee->waiters = waiter; for (rb = rb_first(&b->waiters); rb; rb = rb_next(rb)) { - struct intel_wait *w = container_of(rb, typeof(*w), node); + struct intel_wait *w = rb_entry(rb, typeof(*w), node); strcpy(waiter->comm, w->tsk->comm); waiter->pid = w->tsk->pid; -- cgit From 2f1ac9cc68e0ca569405bd854979aa7eb787f774 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Mon, 23 Jan 2017 09:37:24 +0000 Subject: drm/i915: Queue hangcheck when irqs are disabled Ensure that the hangcheck is queued even in the absence of interrupts. Signed-off-by: Chris Wilson Cc: Mika Kuoppala Link: http://patchwork.freedesktop.org/patch/msgid/20170123093724.18592-1-chris@chris-wilson.co.uk Reviewed-by: Mika Kuoppala --- drivers/gpu/drm/i915/intel_breadcrumbs.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/gpu/drm/i915/intel_breadcrumbs.c b/drivers/gpu/drm/i915/intel_breadcrumbs.c index fcfa423d08bd..c6fa77177615 100644 --- a/drivers/gpu/drm/i915/intel_breadcrumbs.c +++ b/drivers/gpu/drm/i915/intel_breadcrumbs.c @@ -127,6 +127,7 @@ static void __intel_breadcrumbs_enable_irq(struct intel_breadcrumbs *b) if (!b->irq_enabled || test_bit(engine->id, &i915->gpu_error.missed_irq_rings)) { mod_timer(&b->fake_irq, jiffies + 1); + i915_queue_hangcheck(i915); } else { /* Ensure we never sleep indefinitely */ GEM_BUG_ON(!time_after(b->timeout, jiffies)); -- cgit From 5d12fcef4ef15d00ae3228cc1ed80d2d81bc2c06 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Mon, 23 Jan 2017 11:31:31 +0000 Subject: drm/i915: Assert that the kernel_context is hw-id 0 For easy recognisability, we want the kernel context to have id 0 and all user contexts to have non-zero ids. Signed-off-by: Chris Wilson Reviewed-by: Joonas Lahtinen Link: http://patchwork.freedesktop.org/patch/msgid/20170123113132.18665-1-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/i915_gem_context.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/drivers/gpu/drm/i915/i915_gem_context.c b/drivers/gpu/drm/i915/i915_gem_context.c index 17f90c618208..77458da9627d 100644 --- a/drivers/gpu/drm/i915/i915_gem_context.c +++ b/drivers/gpu/drm/i915/i915_gem_context.c @@ -451,6 +451,11 @@ int i915_gem_context_init(struct drm_i915_private *dev_priv) return PTR_ERR(ctx); } + /* For easy recognisablity, we want the kernel context to be 0 and then + * all user contexts will have non-zero hw_id. + */ + GEM_BUG_ON(ctx->hw_id); + i915_gem_context_clear_bannable(ctx); ctx->priority = I915_PRIORITY_MIN; /* lowest priority; idle task */ dev_priv->kernel_context = ctx; -- cgit From 86aa7e760a6709789a21bdf186bd0c3635da6b9a Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Mon, 23 Jan 2017 11:31:32 +0000 Subject: drm/i915: Assert that the context-switch completion matches our context When execlists signals the context completion, it also provides the context id for the status event. Assert that id matches the one we expect. v2: The upper dword of the context status is a duplicate of the upper dword from elsp submission (i.e. includes the group id as well as the context id). Include this check as well. v3: Only check against lrc_desc (as this contains the hw_id check) Signed-off-by: Chris Wilson Cc: Joonas Lahtinen Cc: Tvrtko Ursulin Cc: Mika Kuoppala Reviewed-by: Mika Kuoppala Reviewed-by: Joonas Lahtinen Reviewed-by: Tvrtko Ursulin Link: http://patchwork.freedesktop.org/patch/msgid/20170123113132.18665-2-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/intel_lrc.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c index 432ee495dec2..eceffe25c022 100644 --- a/drivers/gpu/drm/i915/intel_lrc.c +++ b/drivers/gpu/drm/i915/intel_lrc.c @@ -595,6 +595,11 @@ static void intel_lrc_irq_handler(unsigned long data) if (!(status & GEN8_CTX_STATUS_COMPLETED_MASK)) continue; + /* Check the context/desc id for this event matches */ + GEM_BUG_ON(readl(buf + 2 * idx + 1) != + upper_32_bits(intel_lr_context_descriptor(port[0].request->ctx, + engine))); + GEM_BUG_ON(port[0].count == 0); if (--port[0].count == 0) { GEM_BUG_ON(status & GEN8_CTX_STATUS_PREEMPTED); -- cgit From 4c01ded5732d6533a2858fae30c197f734745062 Mon Sep 17 00:00:00 2001 From: Maarten Lankhorst Date: Thu, 22 Dec 2016 11:33:23 +0100 Subject: drm/i915: Use atomic page flip for intel again. This reverts commit 527b6abe5fd2d2 (Revert "drm/i915: Use atomic commits for legacy page_flips") and reapplies commit ee042aa40b66d1. ("drm/i915: Use atomic commits for legacy page_flips") The reason for the revert was because legacy cursor updates were forced to wait for pending page flips and rendering after they were converted to atomic. Commit f79f26921ee12c6f (drm/i915: Add a cursor hack to allow converting legacy page flip to atomic, v3) adds a fastpath to cursor updates, which fixes the stuttering issues. With these changes I feel confident enough to re-enable cursor updates. Legacy cursor update won't block in the following cases: - Moving cursor - Changing cursor fb The legacy cursor update will still block in the following cases: - Showing/hiding cursor. - Cursor size or scaling changes. - cursor update while cursor is invisible (could be fixed, if it turns out to be important). - Cursor tiling changes (Not sure we support tiled cursors.) - Last update was a modeset. Cc: Steven Newbury Cc: Rafael Ristovski Cc: Chris Wilson Cc: Daniel Vetter Tested-by: Rafael Ristovski Testcase: igt/kms_cursor_legacy Signed-off-by: Maarten Lankhorst Reviewed-by: Daniel Vetter --- drivers/gpu/drm/i915/intel_display.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c index 09edff4633f6..0bf8e1bfbe7e 100644 --- a/drivers/gpu/drm/i915/intel_display.c +++ b/drivers/gpu/drm/i915/intel_display.c @@ -12146,6 +12146,7 @@ void intel_check_page_flip(struct drm_i915_private *dev_priv, int pipe) spin_unlock(&dev->event_lock); } +__maybe_unused static int intel_crtc_page_flip(struct drm_crtc *crtc, struct drm_framebuffer *fb, struct drm_pending_vblank_event *event, @@ -14732,7 +14733,7 @@ static const struct drm_crtc_funcs intel_crtc_funcs = { .set_config = drm_atomic_helper_set_config, .set_property = drm_atomic_helper_crtc_set_property, .destroy = intel_crtc_destroy, - .page_flip = intel_crtc_page_flip, + .page_flip = drm_atomic_helper_page_flip, .atomic_duplicate_state = intel_crtc_duplicate_state, .atomic_destroy_state = intel_crtc_destroy_state, .set_crc_source = intel_crtc_set_crc_source, -- cgit From 7c518460303353084ebcfca99bc4b67ce33745a1 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Mon, 23 Jan 2017 14:52:45 +0000 Subject: drm/i915: Pevent copying uninitialised garbage into vma->ggtt_view Since tweaking i915_vma_compare() we allowed constructors to skip clearing the ggtt_view believing that we didn't access the unused members. That, as it turns out, was not entirely true. In particular, i915_gem_fault() uses ret = remap_io_mapping(area, area->vm_start + (vma->ggtt_view.partial.offset << PAGE_SHIFT), (ggtt->mappable_base + vma->node.start) >> PAGE_SHIFT, min_t(u64, vma->size, area->vm_end - area->vm_start), &ggtt->mappable); i.e. the ggtt_view.partial for both normal and partial views. If we allowed garbage into the normal vma->ggtt_view and then try userspace tried to mmap it, we could explode in an unobvious fashion. Fixes: 7b92c047bae2 ("drm/i915: Eliminate superfluous i915_ggtt_view_rotated") Fixes: 3bf4d5751943 ("drm/i915: Stop clearing i915_ggtt_view") Reported-by: Matthew Auld Signed-off-by: Chris Wilson Cc: Joonas Lahtinen Cc: Matthew Auld Link: http://patchwork.freedesktop.org/patch/msgid/20170123145245.3972-1-chris@chris-wilson.co.uk Tested-by: Matthew Auld Reviewed-by: Matthew Auld --- drivers/gpu/drm/i915/i915_vma.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/i915/i915_vma.c b/drivers/gpu/drm/i915/i915_vma.c index 307b22ae7791..155906e84812 100644 --- a/drivers/gpu/drm/i915/i915_vma.c +++ b/drivers/gpu/drm/i915/i915_vma.c @@ -91,7 +91,7 @@ vma_create(struct drm_i915_gem_object *obj, vma->size = obj->base.size; vma->display_alignment = I915_GTT_MIN_ALIGNMENT; - if (view) { + if (view && view->type != I915_GGTT_VIEW_NORMAL) { vma->ggtt_view = *view; if (view->type == I915_GGTT_VIEW_PARTIAL) { GEM_BUG_ON(range_overflows_t(u64, -- cgit From 70962fbe5c75e785d250c04db4d01c18b7316c13 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Mon, 23 Jan 2017 13:05:56 +0000 Subject: drm/i915: Remove disable_lite_restore_wa This w/a (WaEnableForceRestoreInCtxtDescForVCS) was only used for preproduction hw, which is no longer in use. Remove the workaround to simplify the code. Signed-off-by: Chris Wilson Cc: Mika Kuoppala Cc: Joonas Lahtinen Cc: Tvrtko Ursulin Link: http://patchwork.freedesktop.org/patch/msgid/20170123130601.2281-1-chris@chris-wilson.co.uk Reviewed-by: Joonas Lahtinen --- drivers/gpu/drm/i915/intel_lrc.c | 11 +---------- drivers/gpu/drm/i915/intel_ringbuffer.h | 1 - 2 files changed, 1 insertion(+), 11 deletions(-) diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c index eceffe25c022..8ffa4961aa40 100644 --- a/drivers/gpu/drm/i915/intel_lrc.c +++ b/drivers/gpu/drm/i915/intel_lrc.c @@ -272,10 +272,6 @@ logical_ring_init_platform_invariants(struct intel_engine_cs *engine) { struct drm_i915_private *dev_priv = engine->i915; - engine->disable_lite_restore_wa = - IS_BXT_REVID(dev_priv, 0, BXT_REVID_A1) && - (engine->id == VCS || engine->id == VCS2); - engine->ctx_desc_template = GEN8_CTX_VALID; if (IS_GEN8(dev_priv)) engine->ctx_desc_template |= GEN8_CTX_L3LLC_COHERENT; @@ -284,11 +280,6 @@ logical_ring_init_platform_invariants(struct intel_engine_cs *engine) /* TODO: WaDisableLiteRestore when we start using semaphore * signalling between Command Streamers */ /* ring->ctx_desc_template |= GEN8_CTX_FORCE_RESTORE; */ - - /* WaEnableForceRestoreInCtxtDescForVCS:skl */ - /* WaEnableForceRestoreInCtxtDescForVCS:bxt */ - if (engine->disable_lite_restore_wa) - engine->ctx_desc_template |= GEN8_CTX_FORCE_RESTORE; } /** @@ -558,7 +549,7 @@ static bool execlists_elsp_ready(struct intel_engine_cs *engine) int port; port = 1; /* wait for a free slot */ - if (engine->disable_lite_restore_wa || engine->preempt_wa) + if (engine->preempt_wa) port = 0; /* wait for GPU to be idle before continuing */ return !engine->execlist_port[port].request; diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.h b/drivers/gpu/drm/i915/intel_ringbuffer.h index 79c2b8d72322..9183d148b36a 100644 --- a/drivers/gpu/drm/i915/intel_ringbuffer.h +++ b/drivers/gpu/drm/i915/intel_ringbuffer.h @@ -380,7 +380,6 @@ struct intel_engine_cs { struct rb_root execlist_queue; struct rb_node *execlist_first; unsigned int fw_domains; - bool disable_lite_restore_wa; bool preempt_wa; u32 ctx_desc_template; -- cgit From f8dd2934c4ec03a56cfc1b7d23c0248aa6e4adf0 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Mon, 23 Jan 2017 13:05:57 +0000 Subject: drm/i915: Remove BXT incoherent seqno write workaround This w/a was only used for preproduction hw, which is no longer in use. Remove the workaround to simplify the code. Signed-off-by: Chris Wilson Cc: Mika Kuoppala Cc: Joonas Lahtinen Cc: Tvrtko Ursulin Link: http://patchwork.freedesktop.org/patch/msgid/20170123130601.2281-2-chris@chris-wilson.co.uk Reviewed-by: Joonas Lahtinen --- drivers/gpu/drm/i915/intel_lrc.c | 17 ----------------- 1 file changed, 17 deletions(-) diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c index 8ffa4961aa40..202ce1e6e499 100644 --- a/drivers/gpu/drm/i915/intel_lrc.c +++ b/drivers/gpu/drm/i915/intel_lrc.c @@ -1638,21 +1638,6 @@ static int gen8_emit_flush_render(struct drm_i915_gem_request *request, return 0; } -static void bxt_a_seqno_barrier(struct intel_engine_cs *engine) -{ - /* - * On BXT A steppings there is a HW coherency issue whereby the - * MI_STORE_DATA_IMM storing the completed request's seqno - * occasionally doesn't invalidate the CPU cache. Work around this by - * clflushing the corresponding cacheline whenever the caller wants - * the coherency to be guaranteed. Note that this cacheline is known - * to be clean at this point, since we only write it in - * bxt_a_set_seqno(), where we also do a clflush after the write. So - * this clflush in practice becomes an invalidate operation. - */ - intel_flush_status_page(engine, I915_GEM_HWS_INDEX); -} - /* * Reserve space for 2 NOOPs at the end of each request to be * used as a workaround for not being allowed to do lite @@ -1800,8 +1785,6 @@ logical_ring_default_vfuncs(struct intel_engine_cs *engine) engine->irq_enable = gen8_logical_ring_enable_irq; engine->irq_disable = gen8_logical_ring_disable_irq; engine->emit_bb_start = gen8_emit_bb_start; - if (IS_BXT_REVID(engine->i915, 0, BXT_REVID_A1)) - engine->irq_seqno_barrier = bxt_a_seqno_barrier; } static inline void -- cgit From 32ebc292119a92e3f753a287923d2d31846247f6 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Mon, 23 Jan 2017 13:05:58 +0000 Subject: drm/i915: Remove BXT restore arbitration around ctx switch This w/a (WaDisableCtxRestoreArbitration) was only used for preproduction hw, which is no longer in use. Remove the workaround to simplify the code. Signed-off-by: Chris Wilson Cc: Mika Kuoppala Cc: Joonas Lahtinen Cc: Tvrtko Ursulin Link: http://patchwork.freedesktop.org/patch/msgid/20170123130601.2281-3-chris@chris-wilson.co.uk Reviewed-by: Joonas Lahtinen --- drivers/gpu/drm/i915/intel_lrc.c | 8 -------- 1 file changed, 8 deletions(-) diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c index 202ce1e6e499..b3d1a9520616 100644 --- a/drivers/gpu/drm/i915/intel_lrc.c +++ b/drivers/gpu/drm/i915/intel_lrc.c @@ -1102,10 +1102,6 @@ static int gen9_init_indirectctx_bb(struct intel_engine_cs *engine, struct drm_i915_private *dev_priv = engine->i915; uint32_t index = wa_ctx_start(wa_ctx, *offset, CACHELINE_DWORDS); - /* WaDisableCtxRestoreArbitration:bxt */ - if (IS_BXT_REVID(dev_priv, 0, BXT_REVID_A1)) - wa_ctx_emit(batch, index, MI_ARB_ON_OFF | MI_ARB_DISABLE); - /* WaFlushCoherentL3CacheLinesAtContextSwitch:skl,bxt */ ret = gen8_emit_flush_coherentl3_wa(engine, batch, index); if (ret < 0) @@ -1202,10 +1198,6 @@ static int gen9_init_perctx_bb(struct intel_engine_cs *engine, wa_ctx_emit(batch, index, MI_NOOP); } - /* WaDisableCtxRestoreArbitration:bxt */ - if (IS_BXT_REVID(engine->i915, 0, BXT_REVID_A1)) - wa_ctx_emit(batch, index, MI_ARB_ON_OFF | MI_ARB_ENABLE); - wa_ctx_emit(batch, index, MI_BATCH_BUFFER_END); return wa_ctx_end(wa_ctx, *offset = index, 1); -- cgit From 68bee6157384017521e881c9436b3b6336cfac96 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Mon, 23 Jan 2017 13:05:59 +0000 Subject: drm/i915: Remove BXT disable pixel mask clamping w/a This w/a (WaSetDisablePixMaskCammingAndRhwoInCommonSliceChicken) was only used for preproduction hw, which is no longer in use. Remove the workaround to simplify the code. Signed-off-by: Chris Wilson Cc: Mika Kuoppala Cc: Joonas Lahtinen Cc: Tvrtko Ursulin Link: http://patchwork.freedesktop.org/patch/msgid/20170123130601.2281-4-chris@chris-wilson.co.uk Reviewed-by: Joonas Lahtinen --- drivers/gpu/drm/i915/intel_lrc.c | 9 --------- 1 file changed, 9 deletions(-) diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c index b3d1a9520616..029a60e8683d 100644 --- a/drivers/gpu/drm/i915/intel_lrc.c +++ b/drivers/gpu/drm/i915/intel_lrc.c @@ -1170,15 +1170,6 @@ static int gen9_init_perctx_bb(struct intel_engine_cs *engine, { uint32_t index = wa_ctx_start(wa_ctx, *offset, CACHELINE_DWORDS); - /* WaSetDisablePixMaskCammingAndRhwoInCommonSliceChicken:bxt */ - if (IS_BXT_REVID(engine->i915, 0, BXT_REVID_A1)) { - wa_ctx_emit(batch, index, MI_LOAD_REGISTER_IMM(1)); - wa_ctx_emit_reg(batch, index, GEN9_SLICE_COMMON_ECO_CHICKEN0); - wa_ctx_emit(batch, index, - _MASKED_BIT_ENABLE(DISABLE_PIXEL_MASK_CAMMING)); - wa_ctx_emit(batch, index, MI_NOOP); - } - /* WaClearTdlStateAckDirtyBits:bxt */ if (IS_BXT_REVID(engine->i915, 0, BXT_REVID_B0)) { wa_ctx_emit(batch, index, MI_LOAD_REGISTER_IMM(4)); -- cgit From b403c8feaf4838ad54cb9c15d0ad8453aa90af50 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Mon, 23 Jan 2017 13:06:00 +0000 Subject: drm/i915: Remove BXT TDL state w/a This w/a (WaClearTdlStateAckDirtyBits) was only used for preproduction hw, which is no longer in use. Remove the workaround to simplify the code. Signed-off-by: Chris Wilson Cc: Mika Kuoppala Cc: Joonas Lahtinen Cc: Tvrtko Ursulin Link: http://patchwork.freedesktop.org/patch/msgid/20170123130601.2281-5-chris@chris-wilson.co.uk Reviewed-by: Joonas Lahtinen --- drivers/gpu/drm/i915/intel_lrc.c | 19 ------------------- 1 file changed, 19 deletions(-) diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c index 029a60e8683d..32096d141d57 100644 --- a/drivers/gpu/drm/i915/intel_lrc.c +++ b/drivers/gpu/drm/i915/intel_lrc.c @@ -1170,25 +1170,6 @@ static int gen9_init_perctx_bb(struct intel_engine_cs *engine, { uint32_t index = wa_ctx_start(wa_ctx, *offset, CACHELINE_DWORDS); - /* WaClearTdlStateAckDirtyBits:bxt */ - if (IS_BXT_REVID(engine->i915, 0, BXT_REVID_B0)) { - wa_ctx_emit(batch, index, MI_LOAD_REGISTER_IMM(4)); - - wa_ctx_emit_reg(batch, index, GEN8_STATE_ACK); - wa_ctx_emit(batch, index, _MASKED_BIT_DISABLE(GEN9_SUBSLICE_TDL_ACK_BITS)); - - wa_ctx_emit_reg(batch, index, GEN9_STATE_ACK_SLICE1); - wa_ctx_emit(batch, index, _MASKED_BIT_DISABLE(GEN9_SUBSLICE_TDL_ACK_BITS)); - - wa_ctx_emit_reg(batch, index, GEN9_STATE_ACK_SLICE2); - wa_ctx_emit(batch, index, _MASKED_BIT_DISABLE(GEN9_SUBSLICE_TDL_ACK_BITS)); - - wa_ctx_emit_reg(batch, index, GEN7_ROW_CHICKEN2); - /* dummy write to CS, mask bits are 0 to ensure the register is not modified */ - wa_ctx_emit(batch, index, 0x0); - wa_ctx_emit(batch, index, MI_NOOP); - } - wa_ctx_emit(batch, index, MI_BATCH_BUFFER_END); return wa_ctx_end(wa_ctx, *offset = index, 1); -- cgit From bb8f0f5abdd7845175962a3fb99a5681290f9566 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Tue, 24 Jan 2017 11:01:34 +0000 Subject: drm/i915: Split intel_engine allocation and initialisation In order to reset the GPU early on in the module load sequence, we need to allocate the basic engine structs (to populate the mmio offsets etc). Currently, the engine initialisation allocates both the base struct and also allocate auxiliary objects, which depend upon state setup quite late in the load sequence. We split off the allocation callback for later and allow ourselves to allocate the engine structs themselves early. v2: Different paint for the unwind following error. Signed-off-by: Chris Wilson Cc: Joonas Lahtinen Reviewed-by: Joonas Lahtinen Link: http://patchwork.freedesktop.org/patch/msgid/20170124110135.6418-1-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/i915_drv.c | 19 +++++++- drivers/gpu/drm/i915/i915_drv.h | 3 ++ drivers/gpu/drm/i915/intel_engine_cs.c | 80 ++++++++++++++++++++++++++-------- drivers/gpu/drm/i915/intel_lrc.h | 2 - 4 files changed, 81 insertions(+), 23 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_drv.c b/drivers/gpu/drm/i915/i915_drv.c index ca168b22ee26..f8d1ffeccc69 100644 --- a/drivers/gpu/drm/i915/i915_drv.c +++ b/drivers/gpu/drm/i915/i915_drv.c @@ -755,6 +755,15 @@ out_err: return -ENOMEM; } +static void i915_engines_cleanup(struct drm_i915_private *i915) +{ + struct intel_engine_cs *engine; + enum intel_engine_id id; + + for_each_engine(engine, i915, id) + kfree(engine); +} + static void i915_workqueues_cleanup(struct drm_i915_private *dev_priv) { destroy_workqueue(dev_priv->hotplug.dp_wq); @@ -817,12 +826,15 @@ static int i915_driver_init_early(struct drm_i915_private *dev_priv, mutex_init(&dev_priv->pps_mutex); intel_uc_init_early(dev_priv); - i915_memcpy_init_early(dev_priv); + ret = intel_engines_init_early(dev_priv); + if (ret) + return ret; + ret = i915_workqueues_init(dev_priv); if (ret < 0) - return ret; + goto err_engines; ret = intel_gvt_init(dev_priv); if (ret < 0) @@ -857,6 +869,8 @@ err_gvt: intel_gvt_cleanup(dev_priv); err_workqueues: i915_workqueues_cleanup(dev_priv); +err_engines: + i915_engines_cleanup(dev_priv); return ret; } @@ -869,6 +883,7 @@ static void i915_driver_cleanup_early(struct drm_i915_private *dev_priv) i915_perf_fini(dev_priv); i915_gem_load_cleanup(dev_priv); i915_workqueues_cleanup(dev_priv); + i915_engines_cleanup(dev_priv); } static int i915_mmio_setup(struct drm_i915_private *dev_priv) diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index ead3b6417c54..628f0fd53827 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -2940,6 +2940,9 @@ extern unsigned long i915_gfx_val(struct drm_i915_private *dev_priv); extern void i915_update_gfx_val(struct drm_i915_private *dev_priv); int vlv_force_gfx_clock(struct drm_i915_private *dev_priv, bool on); +int intel_engines_init_early(struct drm_i915_private *dev_priv); +int intel_engines_init(struct drm_i915_private *dev_priv); + /* intel_hotplug.c */ void intel_hpd_irq_handler(struct drm_i915_private *dev_priv, u32 pin_mask, u32 long_mask); diff --git a/drivers/gpu/drm/i915/intel_engine_cs.c b/drivers/gpu/drm/i915/intel_engine_cs.c index 371acf109e34..69a6416d1223 100644 --- a/drivers/gpu/drm/i915/intel_engine_cs.c +++ b/drivers/gpu/drm/i915/intel_engine_cs.c @@ -110,21 +110,20 @@ intel_engine_setup(struct drm_i915_private *dev_priv, } /** - * intel_engines_init() - allocate, populate and init the Engine Command Streamers + * intel_engines_init_early() - allocate the Engine Command Streamers * @dev_priv: i915 device private * * Return: non-zero if the initialization failed. */ -int intel_engines_init(struct drm_i915_private *dev_priv) +int intel_engines_init_early(struct drm_i915_private *dev_priv) { struct intel_device_info *device_info = mkwrite_device_info(dev_priv); unsigned int ring_mask = INTEL_INFO(dev_priv)->ring_mask; unsigned int mask = 0; - int (*init)(struct intel_engine_cs *engine); struct intel_engine_cs *engine; enum intel_engine_id id; unsigned int i; - int ret; + int err; WARN_ON(ring_mask == 0); WARN_ON(ring_mask & @@ -134,23 +133,65 @@ int intel_engines_init(struct drm_i915_private *dev_priv) if (!HAS_ENGINE(dev_priv, i)) continue; + err = intel_engine_setup(dev_priv, i); + if (err) + goto cleanup; + + mask |= ENGINE_MASK(i); + } + + /* + * Catch failures to update intel_engines table when the new engines + * are added to the driver by a warning and disabling the forgotten + * engines. + */ + if (WARN_ON(mask != ring_mask)) + device_info->ring_mask = mask; + + device_info->num_rings = hweight32(mask); + + return 0; + +cleanup: + for_each_engine(engine, dev_priv, id) + kfree(engine); + return err; +} + +/** + * intel_engines_init() - allocate, populate and init the Engine Command Streamers + * @dev_priv: i915 device private + * + * Return: non-zero if the initialization failed. + */ +int intel_engines_init(struct drm_i915_private *dev_priv) +{ + struct intel_device_info *device_info = mkwrite_device_info(dev_priv); + struct intel_engine_cs *engine; + enum intel_engine_id id, err_id; + unsigned int mask = 0; + int err = 0; + + for_each_engine(engine, dev_priv, id) { + int (*init)(struct intel_engine_cs *engine); + if (i915.enable_execlists) - init = intel_engines[i].init_execlists; + init = intel_engines[id].init_execlists; else - init = intel_engines[i].init_legacy; - - if (!init) + init = intel_engines[id].init_legacy; + if (!init) { + kfree(engine); + dev_priv->engine[id] = NULL; continue; + } - ret = intel_engine_setup(dev_priv, i); - if (ret) - goto cleanup; - - ret = init(dev_priv->engine[i]); - if (ret) + err = init(engine); + if (err) { + err_id = id; goto cleanup; + } - mask |= ENGINE_MASK(i); + mask |= ENGINE_MASK(id); } /* @@ -158,7 +199,7 @@ int intel_engines_init(struct drm_i915_private *dev_priv) * are added to the driver by a warning and disabling the forgotten * engines. */ - if (WARN_ON(mask != ring_mask)) + if (WARN_ON(mask != INTEL_INFO(dev_priv)->ring_mask)) device_info->ring_mask = mask; device_info->num_rings = hweight32(mask); @@ -167,13 +208,14 @@ int intel_engines_init(struct drm_i915_private *dev_priv) cleanup: for_each_engine(engine, dev_priv, id) { - if (i915.enable_execlists) + if (id >= err_id) + kfree(engine); + else if (i915.enable_execlists) intel_logical_ring_cleanup(engine); else intel_engine_cleanup(engine); } - - return ret; + return err; } void intel_engine_init_global_seqno(struct intel_engine_cs *engine, u32 seqno) diff --git a/drivers/gpu/drm/i915/intel_lrc.h b/drivers/gpu/drm/i915/intel_lrc.h index 0c852c024227..c8009c7bfbdd 100644 --- a/drivers/gpu/drm/i915/intel_lrc.h +++ b/drivers/gpu/drm/i915/intel_lrc.h @@ -68,8 +68,6 @@ void intel_logical_ring_cleanup(struct intel_engine_cs *engine); int logical_render_ring_init(struct intel_engine_cs *engine); int logical_xcs_ring_init(struct intel_engine_cs *engine); -int intel_engines_init(struct drm_i915_private *dev_priv); - /* Logical Ring Contexts */ /* One extra page is added before LRC for GuC as shared data */ -- cgit From 241455172b8683f65a325490baf2d8dd683b3832 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Tue, 24 Jan 2017 11:01:35 +0000 Subject: drm/i915: Reset the gpu on takeover The GPU may be in an unknown state following resume and module load. The previous occupant may have left contexts loaded, or other dangerous state, which can cause an immediate GPU hang for us. The only save course of action is to reset the GPU prior to using it - similarly to how we reset the GPU prior to unload (before a second user may be affected by our leftover state). We need to reset the GPU very early in our load/resume sequence so that any stale HW pointers are revoked prior to any resource allocations we make (that may conflict). A reset should only be a couple of milliseconds on a slow device, a cost we should easily be able to absorb into our initialisation times. Signed-off-by: Chris Wilson Reviewed-by: Joonas Lahtinen Link: http://patchwork.freedesktop.org/patch/msgid/20170124110135.6418-2-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/i915_drv.c | 3 +++ drivers/gpu/drm/i915/i915_drv.h | 2 ++ drivers/gpu/drm/i915/i915_gem.c | 27 +++++++++++++++++++++++---- 3 files changed, 28 insertions(+), 4 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_drv.c b/drivers/gpu/drm/i915/i915_drv.c index f8d1ffeccc69..507a6f69426f 100644 --- a/drivers/gpu/drm/i915/i915_drv.c +++ b/drivers/gpu/drm/i915/i915_drv.c @@ -950,6 +950,7 @@ static int i915_driver_init_mmio(struct drm_i915_private *dev_priv) goto put_bridge; intel_uncore_init(dev_priv); + i915_gem_init_mmio(dev_priv); return 0; @@ -1731,6 +1732,8 @@ static int i915_drm_resume_early(struct drm_device *dev) !(dev_priv->suspended_to_idle && dev_priv->csr.dmc_payload)) intel_power_domains_init_hw(dev_priv, true); + i915_gem_sanitize(dev_priv); + enable_rpm_wakeref_asserts(dev_priv); out: diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index 628f0fd53827..a53f6b30b695 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -3120,6 +3120,7 @@ int i915_gem_get_aperture_ioctl(struct drm_device *dev, void *data, struct drm_file *file_priv); int i915_gem_wait_ioctl(struct drm_device *dev, void *data, struct drm_file *file_priv); +void i915_gem_sanitize(struct drm_i915_private *i915); int i915_gem_load_init(struct drm_i915_private *dev_priv); void i915_gem_load_cleanup(struct drm_i915_private *dev_priv); void i915_gem_load_init_fences(struct drm_i915_private *dev_priv); @@ -3335,6 +3336,7 @@ int i915_gem_reset_prepare(struct drm_i915_private *dev_priv); void i915_gem_reset_finish(struct drm_i915_private *dev_priv); void i915_gem_set_wedged(struct drm_i915_private *dev_priv); void i915_gem_clflush_object(struct drm_i915_gem_object *obj, bool force); +void i915_gem_init_mmio(struct drm_i915_private *i915); int __must_check i915_gem_init(struct drm_i915_private *dev_priv); int __must_check i915_gem_init_hw(struct drm_i915_private *dev_priv); void i915_gem_init_swizzling(struct drm_i915_private *dev_priv); diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index a07b62732923..2522d4895ff7 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -4201,6 +4201,23 @@ static void assert_kernel_context_is_current(struct drm_i915_private *dev_priv) !i915_gem_context_is_kernel(engine->last_retired_context)); } +void i915_gem_sanitize(struct drm_i915_private *i915) +{ + /* + * If we inherit context state from the BIOS or earlier occupants + * of the GPU, the GPU may be in an inconsistent state when we + * try to take over. The only way to remove the earlier state + * is by resetting. However, resetting on earlier gen is tricky as + * it may impact the display and we are uncertain about the stability + * of the reset, so we only reset recent machines with logical + * context support (that must be reset to remove any stray contexts). + */ + if (HAS_HW_CONTEXTS(i915)) { + int reset = intel_gpu_reset(i915, ALL_ENGINES); + WARN_ON(reset && reset != -ENODEV); + } +} + int i915_gem_suspend(struct drm_i915_private *dev_priv) { struct drm_device *dev = &dev_priv->drm; @@ -4271,10 +4288,7 @@ int i915_gem_suspend(struct drm_i915_private *dev_priv) * machines is a good idea, we don't - just in case it leaves the * machine in an unusable condition. */ - if (HAS_HW_CONTEXTS(dev_priv)) { - int reset = intel_gpu_reset(dev_priv, ALL_ENGINES); - WARN_ON(reset && reset != -ENODEV); - } + i915_gem_sanitize(dev_priv); return 0; @@ -4492,6 +4506,11 @@ out_unlock: return ret; } +void i915_gem_init_mmio(struct drm_i915_private *i915) +{ + i915_gem_sanitize(i915); +} + void i915_gem_cleanup_engines(struct drm_i915_private *dev_priv) { -- cgit From 5ec63bbdf527d26606c8ba9d3a841755d3887fbe Mon Sep 17 00:00:00 2001 From: Jani Nikula Date: Fri, 20 Jan 2017 19:04:06 +0200 Subject: drm/i915/dp: do not proceed with autotests if we don't ACK them There is no point in setting intel_dp->compliance.test_type, and proceeding with the autotests, if we're about to NAK the request. Some drive-by cleanups while at it. v2: look at the ACK bit, as the result may also contain TEST_EDID_CHECKSUM_WRITE Cc: Manasi Navare Reviewed-by: Manasi Navare Tested-by: Manasi Navare Signed-off-by: Jani Nikula Link: http://patchwork.freedesktop.org/patch/msgid/1484931846-25390-1-git-send-email-jani.nikula@intel.com --- drivers/gpu/drm/i915/intel_dp.c | 21 +++++++++------------ 1 file changed, 9 insertions(+), 12 deletions(-) diff --git a/drivers/gpu/drm/i915/intel_dp.c b/drivers/gpu/drm/i915/intel_dp.c index e80d620846c8..247fbf352576 100644 --- a/drivers/gpu/drm/i915/intel_dp.c +++ b/drivers/gpu/drm/i915/intel_dp.c @@ -3985,45 +3985,42 @@ static uint8_t intel_dp_autotest_phy_pattern(struct intel_dp *intel_dp) static void intel_dp_handle_test_request(struct intel_dp *intel_dp) { uint8_t response = DP_TEST_NAK; - uint8_t rxdata = 0; - int status = 0; + uint8_t request = 0; + int status; - status = drm_dp_dpcd_read(&intel_dp->aux, DP_TEST_REQUEST, &rxdata, 1); + status = drm_dp_dpcd_readb(&intel_dp->aux, DP_TEST_REQUEST, &request); if (status <= 0) { DRM_DEBUG_KMS("Could not read test request from sink\n"); goto update_status; } - switch (rxdata) { + switch (request) { case DP_TEST_LINK_TRAINING: DRM_DEBUG_KMS("LINK_TRAINING test requested\n"); - intel_dp->compliance.test_type = DP_TEST_LINK_TRAINING; response = intel_dp_autotest_link_training(intel_dp); break; case DP_TEST_LINK_VIDEO_PATTERN: DRM_DEBUG_KMS("TEST_PATTERN test requested\n"); - intel_dp->compliance.test_type = DP_TEST_LINK_VIDEO_PATTERN; response = intel_dp_autotest_video_pattern(intel_dp); break; case DP_TEST_LINK_EDID_READ: DRM_DEBUG_KMS("EDID test requested\n"); - intel_dp->compliance.test_type = DP_TEST_LINK_EDID_READ; response = intel_dp_autotest_edid(intel_dp); break; case DP_TEST_LINK_PHY_TEST_PATTERN: DRM_DEBUG_KMS("PHY_PATTERN test requested\n"); - intel_dp->compliance.test_type = DP_TEST_LINK_PHY_TEST_PATTERN; response = intel_dp_autotest_phy_pattern(intel_dp); break; default: - DRM_DEBUG_KMS("Invalid test request '%02x'\n", rxdata); + DRM_DEBUG_KMS("Invalid test request '%02x'\n", request); break; } + if (response & DP_TEST_ACK) + intel_dp->compliance.test_type = request; + update_status: - status = drm_dp_dpcd_write(&intel_dp->aux, - DP_TEST_RESPONSE, - &response, 1); + status = drm_dp_dpcd_writeb(&intel_dp->aux, DP_TEST_RESPONSE, response); if (status <= 0) DRM_DEBUG_KMS("Could not write test response to sink\n"); } -- cgit From c816e605ffb26ce1d3c06238c7de6662569ecb1e Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Tue, 24 Jan 2017 11:00:02 +0000 Subject: drm/i915: Assert that we don't submit to execlists whilst a preempt is pending To complement the check in execlists_elsp_ready(), also assert that we don't submit the same context while it has a lite restore still pending. Signed-off-by: Chris Wilson Reviewed-by: Mika Kuoppala Link: http://patchwork.freedesktop.org/patch/msgid/20170124110009.28947-1-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/intel_lrc.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c index 32096d141d57..9dd612a2df16 100644 --- a/drivers/gpu/drm/i915/intel_lrc.c +++ b/drivers/gpu/drm/i915/intel_lrc.c @@ -375,6 +375,7 @@ static void execlists_submit_ports(struct intel_engine_cs *engine) dev_priv->regs + i915_mmio_reg_offset(RING_ELSP(engine)); u64 desc[2]; + GEM_BUG_ON(port[0].count > 1); if (!port[0].count) execlists_context_status_change(port[0].request, INTEL_CONTEXT_SCHEDULE_IN); -- cgit From 816ee798ec2b46b1f92aaebb1c7b5d2e1abdc43e Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Tue, 24 Jan 2017 11:00:03 +0000 Subject: drm/i915: Only disable execlist preemption for the duration of the request We need to prevent resubmission of the context immediately following an initial resubmit (which does a lite-restore preemption). Currently we do this by disabling all submission whilst the context is still active, but we can improve this by limiting the restriction to only until we receive notification from the context-switch interrupt that the lite-restore preemption is complete. Signed-off-by: Chris Wilson Reviewed-by: Mika Kuoppala Link: http://patchwork.freedesktop.org/patch/msgid/20170124110009.28947-2-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/i915_debugfs.c | 18 ++++++++++++------ drivers/gpu/drm/i915/intel_lrc.c | 14 ++++---------- drivers/gpu/drm/i915/intel_ringbuffer.h | 1 - 3 files changed, 16 insertions(+), 17 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c index fa69d72fdcb9..9d7a77ecec3d 100644 --- a/drivers/gpu/drm/i915/i915_debugfs.c +++ b/drivers/gpu/drm/i915/i915_debugfs.c @@ -3320,15 +3320,21 @@ static int i915_engine_info(struct seq_file *m, void *unused) rcu_read_lock(); rq = READ_ONCE(engine->execlist_port[0].request); - if (rq) - print_request(m, rq, "\t\tELSP[0] "); - else + if (rq) { + seq_printf(m, "\t\tELSP[0] count=%d, ", + engine->execlist_port[0].count); + print_request(m, rq, "rq: "); + } else { seq_printf(m, "\t\tELSP[0] idle\n"); + } rq = READ_ONCE(engine->execlist_port[1].request); - if (rq) - print_request(m, rq, "\t\tELSP[1] "); - else + if (rq) { + seq_printf(m, "\t\tELSP[1] count=%d, ", + engine->execlist_port[1].count); + print_request(m, rq, "rq: "); + } else { seq_printf(m, "\t\tELSP[1] idle\n"); + } rcu_read_unlock(); spin_lock_irq(&engine->timeline->lock); diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c index 9dd612a2df16..9896027880ea 100644 --- a/drivers/gpu/drm/i915/intel_lrc.c +++ b/drivers/gpu/drm/i915/intel_lrc.c @@ -380,7 +380,7 @@ static void execlists_submit_ports(struct intel_engine_cs *engine) execlists_context_status_change(port[0].request, INTEL_CONTEXT_SCHEDULE_IN); desc[0] = execlists_update_context(port[0].request); - engine->preempt_wa = port[0].count++; /* bdw only? fixed on skl? */ + port[0].count++; if (port[1].request) { GEM_BUG_ON(port[1].count); @@ -545,15 +545,11 @@ bool intel_execlists_idle(struct drm_i915_private *dev_priv) return true; } -static bool execlists_elsp_ready(struct intel_engine_cs *engine) +static bool execlists_elsp_ready(const struct intel_engine_cs *engine) { - int port; + const struct execlist_port *port = engine->execlist_port; - port = 1; /* wait for a free slot */ - if (engine->preempt_wa) - port = 0; /* wait for GPU to be idle before continuing */ - - return !engine->execlist_port[port].request; + return port[0].count + port[1].count < 2; } /* @@ -601,8 +597,6 @@ static void intel_lrc_irq_handler(unsigned long data) i915_gem_request_put(port[0].request); port[0] = port[1]; memset(&port[1], 0, sizeof(port[1])); - - engine->preempt_wa = false; } GEM_BUG_ON(port[0].count == 0 && diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.h b/drivers/gpu/drm/i915/intel_ringbuffer.h index 9183d148b36a..dbd32585f27a 100644 --- a/drivers/gpu/drm/i915/intel_ringbuffer.h +++ b/drivers/gpu/drm/i915/intel_ringbuffer.h @@ -380,7 +380,6 @@ struct intel_engine_cs { struct rb_root execlist_queue; struct rb_node *execlist_first; unsigned int fw_domains; - bool preempt_wa; u32 ctx_desc_template; /* Contexts are pinned whilst they are active on the GPU. The last -- cgit From 538b257dae83268cc3536fb4c4ab4f57901d449d Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Tue, 24 Jan 2017 15:18:05 +0000 Subject: drm/i915: Move breadcrumbs irq_posted up a level to engine In the next patch, we will use the irq_posted technique for another engine interrupt, rather than use two members for the atomic updates, we can use two bits of one instead. First, we need to update the breadcrumbs to use the new common engine->irq_posted. v2: Use set_bit() rather than __set_bit() to ensure atomicity with respect to other bits in the mask Signed-off-by: Chris Wilson Cc: Mika Kuoppala Cc: Tvrtko Ursulin Link: http://patchwork.freedesktop.org/patch/msgid/20170124151805.26146-1-chris@chris-wilson.co.uk Reviewed-by: Mika Kuoppala --- drivers/gpu/drm/i915/i915_drv.h | 2 +- drivers/gpu/drm/i915/i915_irq.c | 2 +- drivers/gpu/drm/i915/intel_breadcrumbs.c | 9 ++++----- drivers/gpu/drm/i915/intel_ringbuffer.h | 4 +++- 4 files changed, 9 insertions(+), 8 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index a53f6b30b695..6eff81fb939c 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -3963,7 +3963,7 @@ __i915_request_irq_complete(struct drm_i915_gem_request *req) */ if (engine->irq_seqno_barrier && rcu_access_pointer(engine->breadcrumbs.irq_seqno_bh) == current && - cmpxchg_relaxed(&engine->breadcrumbs.irq_posted, 1, 0)) { + test_and_clear_bit(ENGINE_IRQ_BREADCRUMB, &engine->irq_posted)) { struct task_struct *tsk; /* The ordering of irq_posted versus applying the barrier diff --git a/drivers/gpu/drm/i915/i915_irq.c b/drivers/gpu/drm/i915/i915_irq.c index 6fefc34ef602..7e087c344265 100644 --- a/drivers/gpu/drm/i915/i915_irq.c +++ b/drivers/gpu/drm/i915/i915_irq.c @@ -1033,7 +1033,7 @@ static void ironlake_rps_change_irq_handler(struct drm_i915_private *dev_priv) static void notify_ring(struct intel_engine_cs *engine) { - smp_store_mb(engine->breadcrumbs.irq_posted, true); + set_bit(ENGINE_IRQ_BREADCRUMB, &engine->irq_posted); if (intel_engine_wakeup(engine)) trace_i915_gem_request_notify(engine); } diff --git a/drivers/gpu/drm/i915/intel_breadcrumbs.c b/drivers/gpu/drm/i915/intel_breadcrumbs.c index c6fa77177615..6b24f2544b6b 100644 --- a/drivers/gpu/drm/i915/intel_breadcrumbs.c +++ b/drivers/gpu/drm/i915/intel_breadcrumbs.c @@ -81,7 +81,7 @@ static void irq_enable(struct intel_engine_cs *engine) * we still need to force the barrier before reading the seqno, * just in case. */ - engine->breadcrumbs.irq_posted = true; + set_bit(ENGINE_IRQ_BREADCRUMB, &engine->irq_posted); /* Caller disables interrupts */ spin_lock(&engine->i915->irq_lock); @@ -95,8 +95,6 @@ static void irq_disable(struct intel_engine_cs *engine) spin_lock(&engine->i915->irq_lock); engine->irq_disable(engine); spin_unlock(&engine->i915->irq_lock); - - engine->breadcrumbs.irq_posted = false; } static void __intel_breadcrumbs_enable_irq(struct intel_breadcrumbs *b) @@ -257,7 +255,8 @@ static bool __intel_engine_add_wait(struct intel_engine_cs *engine, * in case the seqno passed. */ __intel_breadcrumbs_enable_irq(b); - if (READ_ONCE(b->irq_posted)) + if (test_bit(ENGINE_IRQ_BREADCRUMB, + &engine->irq_posted)) wake_up_process(to_wait(next)->tsk); } @@ -610,7 +609,7 @@ void intel_engine_reset_breadcrumbs(struct intel_engine_cs *engine) if (intel_engine_has_waiter(engine)) { b->timeout = wait_timeout(); __intel_breadcrumbs_enable_irq(b); - if (READ_ONCE(b->irq_posted)) + if (test_bit(ENGINE_IRQ_BREADCRUMB, &engine->irq_posted)) wake_up_process(b->first_wait->tsk); } else { /* sanitize the IMR and unmask any auxiliary interrupts */ diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.h b/drivers/gpu/drm/i915/intel_ringbuffer.h index dbd32585f27a..a9ea84ea3155 100644 --- a/drivers/gpu/drm/i915/intel_ringbuffer.h +++ b/drivers/gpu/drm/i915/intel_ringbuffer.h @@ -211,6 +211,9 @@ struct intel_engine_cs { struct intel_render_state *render_state; + unsigned long irq_posted; +#define ENGINE_IRQ_BREADCRUMB 0 + /* Rather than have every client wait upon all user interrupts, * with the herd waking after every interrupt and each doing the * heavyweight seqno dance, we delegate the task (of being the @@ -229,7 +232,6 @@ struct intel_engine_cs { */ struct intel_breadcrumbs { struct task_struct __rcu *irq_seqno_bh; /* bh for interrupts */ - bool irq_posted; spinlock_t lock; /* protects the lists of requests; irqsafe */ struct rb_root waiters; /* sorted by retirement, priority */ -- cgit From f747026c2b350fdb3c2d6fad51b7ebed4851183e Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Tue, 24 Jan 2017 15:20:21 +0000 Subject: drm/i915: Only run execlist context-switch handler after an interrupt Mark when we run the execlist tasklet following the interrupt, so we don't probe a potentially uninitialised register when submitting the contexts multiple times before the hardware responds. v2: Use a shared engine->irq_posted v3: Always use locked bitops to be sure of atomicity wrt to other bits in the mask. Signed-off-by: Chris Wilson Cc: Mika Kuoppala Cc: Tvrtko Ursulin Link: http://patchwork.freedesktop.org/patch/msgid/20170124152021.26587-1-chris@chris-wilson.co.uk Reviewed-by: Mika Kuoppala --- drivers/gpu/drm/i915/i915_irq.c | 7 +++++-- drivers/gpu/drm/i915/intel_lrc.c | 3 ++- drivers/gpu/drm/i915/intel_ringbuffer.h | 1 + 3 files changed, 8 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_irq.c b/drivers/gpu/drm/i915/i915_irq.c index 7e087c344265..3f3c9082b0f8 100644 --- a/drivers/gpu/drm/i915/i915_irq.c +++ b/drivers/gpu/drm/i915/i915_irq.c @@ -1349,8 +1349,11 @@ gen8_cs_irq_handler(struct intel_engine_cs *engine, u32 iir, int test_shift) { if (iir & (GT_RENDER_USER_INTERRUPT << test_shift)) notify_ring(engine); - if (iir & (GT_CONTEXT_SWITCH_INTERRUPT << test_shift)) - tasklet_schedule(&engine->irq_tasklet); + + if (iir & (GT_CONTEXT_SWITCH_INTERRUPT << test_shift)) { + set_bit(ENGINE_IRQ_EXECLIST, &engine->irq_posted); + tasklet_hi_schedule(&engine->irq_tasklet); + } } static irqreturn_t gen8_gt_irq_ack(struct drm_i915_private *dev_priv, diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c index 9896027880ea..f729568e5e54 100644 --- a/drivers/gpu/drm/i915/intel_lrc.c +++ b/drivers/gpu/drm/i915/intel_lrc.c @@ -564,7 +564,7 @@ static void intel_lrc_irq_handler(unsigned long data) intel_uncore_forcewake_get(dev_priv, engine->fw_domains); - if (!execlists_elsp_idle(engine)) { + while (test_and_clear_bit(ENGINE_IRQ_EXECLIST, &engine->irq_posted)) { u32 __iomem *csb_mmio = dev_priv->regs + i915_mmio_reg_offset(RING_CONTEXT_STATUS_PTR(engine)); u32 __iomem *buf = @@ -1297,6 +1297,7 @@ static int gen8_init_common_ring(struct intel_engine_cs *engine) DRM_DEBUG_DRIVER("Execlists enabled for %s\n", engine->name); /* After a GPU reset, we may have requests to replay */ + clear_bit(ENGINE_IRQ_EXECLIST, &engine->irq_posted); if (!execlists_elsp_idle(engine)) { engine->execlist_port[0].count = 0; engine->execlist_port[1].count = 0; diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.h b/drivers/gpu/drm/i915/intel_ringbuffer.h index a9ea84ea3155..8e872730f8eb 100644 --- a/drivers/gpu/drm/i915/intel_ringbuffer.h +++ b/drivers/gpu/drm/i915/intel_ringbuffer.h @@ -213,6 +213,7 @@ struct intel_engine_cs { unsigned long irq_posted; #define ENGINE_IRQ_BREADCRUMB 0 +#define ENGINE_IRQ_EXECLIST 1 /* Rather than have every client wait upon all user interrupts, * with the herd waking after every interrupt and each doing the -- cgit From a37951ac9f9642819e400814f6f30689db233c24 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Tue, 24 Jan 2017 11:00:06 +0000 Subject: drm/i915: Skip the execlists CSB scan and rewrite if the ring is untouched If the CSB head/tail pointers are unchanged, we can skip the update of the CSB register afterwards. Signed-off-by: Chris Wilson Reviewed-by: Joonas Lahtinen Link: http://patchwork.freedesktop.org/patch/msgid/20170124110009.28947-5-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/intel_lrc.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c index f729568e5e54..99ab2d70be37 100644 --- a/drivers/gpu/drm/i915/intel_lrc.c +++ b/drivers/gpu/drm/i915/intel_lrc.c @@ -574,9 +574,12 @@ static void intel_lrc_irq_handler(unsigned long data) csb = readl(csb_mmio); head = GEN8_CSB_READ_PTR(csb); tail = GEN8_CSB_WRITE_PTR(csb); + if (head == tail) + break; + if (tail < head) tail += GEN8_CSB_ENTRIES; - while (head < tail) { + do { unsigned int idx = ++head % GEN8_CSB_ENTRIES; unsigned int status = readl(buf + 2 * idx); @@ -601,7 +604,7 @@ static void intel_lrc_irq_handler(unsigned long data) GEM_BUG_ON(port[0].count == 0 && !(status & GEN8_CTX_STATUS_ACTIVE_IDLE)); - } + } while (head < tail); writel(_MASKED_FIELD(GEN8_CSB_READ_PTR_MASK, GEN8_CSB_WRITE_PTR(csb) << 8), -- cgit From 3833281adb0f01ca5b279664d14611d8ed1deb98 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Tue, 24 Jan 2017 11:00:07 +0000 Subject: drm/i915: Only attempt to pass the first request to execlists Only the first request added to the execlist queue can be submitted. If this request is not the first request on the queue, it means that there are already higher priority requests waiting upon the tasklet and kicking it will make no difference. This is more relevant for a later patch, where we more eagerly try and kick the tasklet to handle the submission of new requests. Signed-off-by: Chris Wilson Cc: Mika Kuoppala Cc: Tvrtko Ursulin Link: http://patchwork.freedesktop.org/patch/msgid/20170124110009.28947-6-chris@chris-wilson.co.uk Reviewed-by: Mika Kuoppala --- drivers/gpu/drm/i915/intel_lrc.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c index 99ab2d70be37..11568ab9f248 100644 --- a/drivers/gpu/drm/i915/intel_lrc.c +++ b/drivers/gpu/drm/i915/intel_lrc.c @@ -651,10 +651,11 @@ static void execlists_submit_request(struct drm_i915_gem_request *request) /* Will be called from irq-context when using foreign fences. */ spin_lock_irqsave(&engine->timeline->lock, flags); - if (insert_request(&request->priotree, &engine->execlist_queue)) + if (insert_request(&request->priotree, &engine->execlist_queue)) { engine->execlist_first = &request->priotree.node; - if (execlists_elsp_idle(engine)) - tasklet_hi_schedule(&engine->irq_tasklet); + if (execlists_elsp_idle(engine)) + tasklet_hi_schedule(&engine->irq_tasklet); + } spin_unlock_irqrestore(&engine->timeline->lock, flags); } -- cgit From 48ea2554f46e7c1771dba1529a17aa04792871b2 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Tue, 24 Jan 2017 11:00:08 +0000 Subject: drm/i915: Dequeue execlists on a new request if any port is available If the second ELSP port is available, schedule the execlists tasklet to see if the new request can occupy it. Signed-off-by: Chris Wilson Cc: Tvrtko Ursulin Reviewed-by: Tvrtko Ursulin Link: http://patchwork.freedesktop.org/patch/msgid/20170124110009.28947-7-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/intel_lrc.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c index 11568ab9f248..bee9d565b8f3 100644 --- a/drivers/gpu/drm/i915/intel_lrc.c +++ b/drivers/gpu/drm/i915/intel_lrc.c @@ -653,7 +653,7 @@ static void execlists_submit_request(struct drm_i915_gem_request *request) if (insert_request(&request->priotree, &engine->execlist_queue)) { engine->execlist_first = &request->priotree.node; - if (execlists_elsp_idle(engine)) + if (execlists_elsp_ready(engine)) tasklet_hi_schedule(&engine->irq_tasklet); } -- cgit From 7c9e934ef8a09a1a42f15ce9f0f872fdfdb67b97 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Tue, 24 Jan 2017 11:00:09 +0000 Subject: drm/i915: Emit dma-fence (and execlists submit) first from signaler When introduced, I thought that reducing client latency from the signaler was the priority. Since its inception the signaler has become responsible for keeping the execlists full, via the dma-fence. As this is very important to minimise overall execution time, signal the dma-fence first and then signal any waiting clients. Signed-off-by: Chris Wilson Cc: Tvrtko Ursulin Reviewed-by: Tvrtko Ursulin Link: http://patchwork.freedesktop.org/patch/msgid/20170124110009.28947-8-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/intel_breadcrumbs.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/gpu/drm/i915/intel_breadcrumbs.c b/drivers/gpu/drm/i915/intel_breadcrumbs.c index 6b24f2544b6b..9fd002bcebb6 100644 --- a/drivers/gpu/drm/i915/intel_breadcrumbs.c +++ b/drivers/gpu/drm/i915/intel_breadcrumbs.c @@ -459,16 +459,16 @@ static int intel_breadcrumbs_signaler(void *arg) */ request = READ_ONCE(b->first_signal); if (signal_complete(request)) { + local_bh_disable(); + dma_fence_signal(&request->fence); + local_bh_enable(); /* kick start the tasklets */ + /* Wake up all other completed waiters and select the * next bottom-half for the next user interrupt. */ intel_engine_remove_wait(engine, &request->signaling.wait); - local_bh_disable(); - dma_fence_signal(&request->fence); - local_bh_enable(); /* kick start the tasklets */ - /* Find the next oldest signal. Note that as we have * not been holding the lock, another client may * have installed an even older signal than the one -- cgit From eb955eee27d9dc176871540c43c9070ee4701642 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Mon, 23 Jan 2017 21:29:39 +0000 Subject: drm/i915: Move atomic state free from out of fence release Fences are required to support being released from under an atomic context. The drm_atomic_state struct may take a mutex when being released and so we cannot drop a reference to the drm_atomic_state from the fence release path directly, and so we need to defer that unreference to a worker. [ 326.576697] WARNING: CPU: 2 PID: 366 at kernel/sched/core.c:7737 __might_sleep+0x5d/0x80 [ 326.576816] do not call blocking ops when !TASK_RUNNING; state=1 set at [] intel_breadcrumbs_signaler+0x59/0x270 [i915] [ 326.576818] Modules linked in: rfcomm fuse snd_hda_codec_hdmi bnep snd_hda_codec_realtek snd_hda_codec_generic snd_hda_intel snd_hda_codec snd_hwdep snd_hda_core snd_pcm snd_seq_midi snd_seq_midi_event snd_rawmidi snd_seq snd_seq_device snd_timer input_leds led_class snd punit_atom_debug btusb btrtl btbcm btintel intel_rapl bluetooth i915 drm_kms_helper syscopyarea sysfillrect iwlwifi sysimgblt soundcore fb_sys_fops mei_txe cfg80211 drm pwm_lpss_platform pwm_lpss pinctrl_cherryview fjes acpi_pad parport_pc ppdev parport autofs4 [ 326.576899] CPU: 2 PID: 366 Comm: i915/signal:0 Tainted: G U 4.10.0-rc3-patser+ #5030 [ 326.576902] Hardware name: /NUC5PPYB, BIOS PYBSWCEL.86A.0031.2015.0601.1712 06/01/2015 [ 326.576905] Call Trace: [ 326.576920] dump_stack+0x4d/0x6d [ 326.576926] __warn+0xc0/0xe0 [ 326.576931] warn_slowpath_fmt+0x5a/0x80 [ 326.577004] ? intel_breadcrumbs_signaler+0x59/0x270 [i915] [ 326.577075] ? intel_breadcrumbs_signaler+0x59/0x270 [i915] [ 326.577079] __might_sleep+0x5d/0x80 [ 326.577087] mutex_lock+0x1b/0x40 [ 326.577133] drm_property_free_blob+0x1e/0x80 [drm] [ 326.577167] ? drm_property_destroy+0xe0/0xe0 [drm] [ 326.577200] drm_mode_object_unreference+0x5c/0x70 [drm] [ 326.577233] drm_property_unreference_blob+0xe/0x10 [drm] [ 326.577260] __drm_atomic_helper_crtc_destroy_state+0x14/0x40 [drm_kms_helper] [ 326.577278] drm_atomic_helper_crtc_destroy_state+0x10/0x20 [drm_kms_helper] [ 326.577352] intel_crtc_destroy_state+0x9/0x10 [i915] [ 326.577388] drm_atomic_state_default_clear+0xea/0x1d0 [drm] [ 326.577462] intel_atomic_state_clear+0xd/0x20 [i915] [ 326.577497] drm_atomic_state_clear+0x1a/0x30 [drm] [ 326.577532] __drm_atomic_state_free+0x13/0x60 [drm] [ 326.577607] intel_atomic_commit_ready+0x6f/0x78 [i915] [ 326.577670] i915_sw_fence_release+0x3a/0x50 [i915] [ 326.577733] dma_i915_sw_fence_wake+0x39/0x80 [i915] [ 326.577741] dma_fence_signal+0xda/0x120 [ 326.577812] ? intel_breadcrumbs_signaler+0x59/0x270 [i915] [ 326.577884] intel_breadcrumbs_signaler+0xb1/0x270 [i915] [ 326.577889] kthread+0x127/0x130 [ 326.577961] ? intel_engine_remove_wait+0x1a0/0x1a0 [i915] [ 326.577964] ? kthread_stop+0x120/0x120 [ 326.577970] ret_from_fork+0x22/0x30 Fixes: c004a90b7263 ("drm/i915: Restore nonblocking awaits for modesetting") Reported-by: Maarten Lankhorst Signed-off-by: Chris Wilson Cc: Chris Wilson Cc: Joonas Lahtinen Cc: Maarten Lankhorst Cc: Daniel Vetter Link: http://patchwork.freedesktop.org/patch/msgid/20170123212939.30345-1-chris@chris-wilson.co.uk Cc: # v4.10-rc1+ Reviewed-by: Joonas Lahtinen --- drivers/gpu/drm/i915/i915_drv.h | 5 +++++ drivers/gpu/drm/i915/intel_display.c | 28 ++++++++++++++++++++++++++-- drivers/gpu/drm/i915/intel_drv.h | 2 ++ 3 files changed, 33 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index 6eff81fb939c..3e07b8b0ec89 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -2241,6 +2241,11 @@ struct drm_i915_private { struct i915_frontbuffer_tracking fb_tracking; + struct intel_atomic_helper { + struct llist_head free_list; + struct work_struct free_work; + } atomic_helper; + u16 orig_clock; bool mchbar_need_disable; diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c index 0bf8e1bfbe7e..5f55bc37c538 100644 --- a/drivers/gpu/drm/i915/intel_display.c +++ b/drivers/gpu/drm/i915/intel_display.c @@ -14570,8 +14570,14 @@ intel_atomic_commit_ready(struct i915_sw_fence *fence, break; case FENCE_FREE: - drm_atomic_state_put(&state->base); - break; + { + struct intel_atomic_helper *helper = + &to_i915(state->base.dev)->atomic_helper; + + if (llist_add(&state->freed, &helper->free_list)) + schedule_work(&helper->free_work); + break; + } } return NOTIFY_DONE; @@ -16594,6 +16600,18 @@ fail: drm_modeset_acquire_fini(&ctx); } +static void intel_atomic_helper_free_state(struct work_struct *work) +{ + struct drm_i915_private *dev_priv = + container_of(work, typeof(*dev_priv), atomic_helper.free_work); + struct intel_atomic_state *state, *next; + struct llist_node *freed; + + freed = llist_del_all(&dev_priv->atomic_helper.free_list); + llist_for_each_entry_safe(state, next, freed, freed) + drm_atomic_state_put(&state->base); +} + int intel_modeset_init(struct drm_device *dev) { struct drm_i915_private *dev_priv = to_i915(dev); @@ -16613,6 +16631,9 @@ int intel_modeset_init(struct drm_device *dev) dev->mode_config.funcs = &intel_mode_funcs; + INIT_WORK(&dev_priv->atomic_helper.free_work, + intel_atomic_helper_free_state); + intel_init_quirks(dev); intel_init_pm(dev_priv); @@ -17270,6 +17291,9 @@ void intel_modeset_cleanup(struct drm_device *dev) { struct drm_i915_private *dev_priv = to_i915(dev); + flush_work(&dev_priv->atomic_helper.free_work); + WARN_ON(!llist_empty(&dev_priv->atomic_helper.free_list)); + intel_disable_gt_powersave(dev_priv); /* diff --git a/drivers/gpu/drm/i915/intel_drv.h b/drivers/gpu/drm/i915/intel_drv.h index 0cec0013ace0..396356ce4206 100644 --- a/drivers/gpu/drm/i915/intel_drv.h +++ b/drivers/gpu/drm/i915/intel_drv.h @@ -371,6 +371,8 @@ struct intel_atomic_state { struct skl_wm_values wm_results; struct i915_sw_fence commit_ready; + + struct llist_node freed; }; struct intel_plane_state { -- cgit From 8da53efaa228f12f709b23d7cd08c5644af474c4 Mon Sep 17 00:00:00 2001 From: Rodrigo Vivi Date: Mon, 23 Jan 2017 10:32:36 -0800 Subject: drm/i915/kbl: Apply WaIncreaseDefaultTLBEntries on KBL. According to wa_database this Wa persist on KBL as it was on SKL. Cc: Tim Gore Cc: Mika Kuoppala Cc: Tvrtko Ursulin Signed-off-by: Rodrigo Vivi Reviewed-by: Ander Conselvan de Oliveira Acked-by: Jani Nikula Link: http://patchwork.freedesktop.org/patch/msgid/1485196357-30599-1-git-send-email-rodrigo.vivi@intel.com --- drivers/gpu/drm/i915/i915_gem_gtt.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c b/drivers/gpu/drm/i915/i915_gem_gtt.c index e808aad203d8..cc0bb7bb6775 100644 --- a/drivers/gpu/drm/i915/i915_gem_gtt.c +++ b/drivers/gpu/drm/i915/i915_gem_gtt.c @@ -2184,12 +2184,12 @@ static void gtt_write_workarounds(struct drm_i915_private *dev_priv) * called on driver load and after a GPU reset, so you can place * workarounds here even if they get overwritten by GPU reset. */ - /* WaIncreaseDefaultTLBEntries:chv,bdw,skl,bxt */ + /* WaIncreaseDefaultTLBEntries:chv,bdw,skl,bxt,kbl */ if (IS_BROADWELL(dev_priv)) I915_WRITE(GEN8_L3_LRA_1_GPGPU, GEN8_L3_LRA_1_GPGPU_DEFAULT_VALUE_BDW); else if (IS_CHERRYVIEW(dev_priv)) I915_WRITE(GEN8_L3_LRA_1_GPGPU, GEN8_L3_LRA_1_GPGPU_DEFAULT_VALUE_CHV); - else if (IS_SKYLAKE(dev_priv)) + else if (IS_SKYLAKE(dev_priv) || IS_KABYLAKE(dev_priv)) I915_WRITE(GEN8_L3_LRA_1_GPGPU, GEN9_L3_LRA_1_GPGPU_DEFAULT_VALUE_SKL); else if (IS_BROXTON(dev_priv)) I915_WRITE(GEN8_L3_LRA_1_GPGPU, GEN9_L3_LRA_1_GPGPU_DEFAULT_VALUE_BXT); -- cgit From b976dc53ec43da887fb5731f2f7e1f770cec6074 Mon Sep 17 00:00:00 2001 From: Rodrigo Vivi Date: Mon, 23 Jan 2017 10:32:37 -0800 Subject: drm/i915: Introduce IS_GEN9_BC for Skylake and Kabylake. Along with GLK it was introduced the .is_lp and IS_GEN9_LP. So, following the same simplification standard we can put Skylake and Kabylake under the same bucket for most of the things. So let's add the IS_GEN9_BC for "Big Core" (non Atom based platforms). The i915_drv.c was let out of this patch on purpose because that is really a decision per platform, just like other cases where IS_KABYLAKE is different from IS_SKYLAKE. v2: fix conflict with IS_LP and 3 new cases for this big core bucket: - intel_ddi.c: intel_ddi_get_link_dpll - intel_fbc.c: find_compression_threshold - i915_gem_gtt.c: gtt_write_workarounds Cc: Anusha Srivatsa Cc: Ander Conselvan de Oliveira Cc: Jani Nikula Signed-off-by: Rodrigo Vivi Reviewed-by: Ander Conselvan de Oliveira Acked-by: Jani Nikula Link: http://patchwork.freedesktop.org/patch/msgid/1485196357-30599-2-git-send-email-rodrigo.vivi@intel.com --- drivers/gpu/drm/i915/i915_debugfs.c | 17 +++++++---------- drivers/gpu/drm/i915/i915_drv.h | 3 ++- drivers/gpu/drm/i915/i915_gem_gtt.c | 2 +- drivers/gpu/drm/i915/intel_audio.c | 2 +- drivers/gpu/drm/i915/intel_color.c | 4 ++-- drivers/gpu/drm/i915/intel_ddi.c | 20 ++++++++++---------- drivers/gpu/drm/i915/intel_display.c | 12 ++++++------ drivers/gpu/drm/i915/intel_dp.c | 5 ++--- drivers/gpu/drm/i915/intel_dpll_mgr.c | 2 +- drivers/gpu/drm/i915/intel_fbc.c | 3 +-- drivers/gpu/drm/i915/intel_i2c.c | 4 ++-- drivers/gpu/drm/i915/intel_mocs.c | 2 +- drivers/gpu/drm/i915/intel_pm.c | 13 ++++++------- drivers/gpu/drm/i915/intel_runtime_pm.c | 10 +++++----- 14 files changed, 47 insertions(+), 52 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c index 9d7a77ecec3d..6d9914648970 100644 --- a/drivers/gpu/drm/i915/i915_debugfs.c +++ b/drivers/gpu/drm/i915/i915_debugfs.c @@ -1224,21 +1224,18 @@ static int i915_frequency_info(struct seq_file *m, void *unused) max_freq = (IS_GEN9_LP(dev_priv) ? rp_state_cap >> 0 : rp_state_cap >> 16) & 0xff; - max_freq *= (IS_SKYLAKE(dev_priv) || IS_KABYLAKE(dev_priv) ? - GEN9_FREQ_SCALER : 1); + max_freq *= (IS_GEN9_BC(dev_priv) ? GEN9_FREQ_SCALER : 1); seq_printf(m, "Lowest (RPN) frequency: %dMHz\n", intel_gpu_freq(dev_priv, max_freq)); max_freq = (rp_state_cap & 0xff00) >> 8; - max_freq *= (IS_SKYLAKE(dev_priv) || IS_KABYLAKE(dev_priv) ? - GEN9_FREQ_SCALER : 1); + max_freq *= (IS_GEN9_BC(dev_priv) ? GEN9_FREQ_SCALER : 1); seq_printf(m, "Nominal (RP1) frequency: %dMHz\n", intel_gpu_freq(dev_priv, max_freq)); max_freq = (IS_GEN9_LP(dev_priv) ? rp_state_cap >> 16 : rp_state_cap >> 0) & 0xff; - max_freq *= (IS_SKYLAKE(dev_priv) || IS_KABYLAKE(dev_priv) ? - GEN9_FREQ_SCALER : 1); + max_freq *= (IS_GEN9_BC(dev_priv) ? GEN9_FREQ_SCALER : 1); seq_printf(m, "Max non-overclocked (RP0) frequency: %dMHz\n", intel_gpu_freq(dev_priv, max_freq)); seq_printf(m, "Max overclocked frequency: %dMHz\n", @@ -1814,7 +1811,7 @@ static int i915_ring_freq_table(struct seq_file *m, void *unused) if (ret) goto out; - if (IS_SKYLAKE(dev_priv) || IS_KABYLAKE(dev_priv)) { + if (IS_GEN9_BC(dev_priv)) { /* Convert GT frequency to 50 HZ units */ min_gpu_freq = dev_priv->rps.min_freq_softlimit / GEN9_FREQ_SCALER; @@ -1834,8 +1831,8 @@ static int i915_ring_freq_table(struct seq_file *m, void *unused) &ia_freq); seq_printf(m, "%d\t\t%d\t\t\t\t%d\n", intel_gpu_freq(dev_priv, (gpu_freq * - (IS_SKYLAKE(dev_priv) || IS_KABYLAKE(dev_priv) ? - GEN9_FREQ_SCALER : 1))), + (IS_GEN9_BC(dev_priv) ? + GEN9_FREQ_SCALER : 1))), ((ia_freq >> 0) & 0xff) * 100, ((ia_freq >> 8) & 0xff) * 100); } @@ -4450,7 +4447,7 @@ static void gen9_sseu_device_status(struct drm_i915_private *dev_priv, sseu->slice_mask |= BIT(s); - if (IS_SKYLAKE(dev_priv) || IS_KABYLAKE(dev_priv)) + if (IS_GEN9_BC(dev_priv)) sseu->subslice_mask = INTEL_INFO(dev_priv)->sseu.subslice_mask; diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index 3e07b8b0ec89..3623a6e46283 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -2760,8 +2760,9 @@ intel_info(const struct drm_i915_private *dev_priv) #define IS_GEN8(dev_priv) (!!((dev_priv)->info.gen_mask & BIT(7))) #define IS_GEN9(dev_priv) (!!((dev_priv)->info.gen_mask & BIT(8))) -#define IS_GEN9_LP(dev_priv) (IS_GEN9(dev_priv) && INTEL_INFO(dev_priv)->is_lp) #define IS_LP(dev_priv) (INTEL_INFO(dev_priv)->is_lp) +#define IS_GEN9_LP(dev_priv) (IS_GEN9(dev_priv) && IS_LP(dev_priv)) +#define IS_GEN9_BC(dev_priv) (IS_GEN9(dev_priv) && !IS_LP(dev_priv)) #define ENGINE_MASK(id) BIT(id) #define RENDER_RING ENGINE_MASK(RCS) diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c b/drivers/gpu/drm/i915/i915_gem_gtt.c index cc0bb7bb6775..40685c68260b 100644 --- a/drivers/gpu/drm/i915/i915_gem_gtt.c +++ b/drivers/gpu/drm/i915/i915_gem_gtt.c @@ -2189,7 +2189,7 @@ static void gtt_write_workarounds(struct drm_i915_private *dev_priv) I915_WRITE(GEN8_L3_LRA_1_GPGPU, GEN8_L3_LRA_1_GPGPU_DEFAULT_VALUE_BDW); else if (IS_CHERRYVIEW(dev_priv)) I915_WRITE(GEN8_L3_LRA_1_GPGPU, GEN8_L3_LRA_1_GPGPU_DEFAULT_VALUE_CHV); - else if (IS_SKYLAKE(dev_priv) || IS_KABYLAKE(dev_priv)) + else if (IS_GEN9_BC(dev_priv)) I915_WRITE(GEN8_L3_LRA_1_GPGPU, GEN9_L3_LRA_1_GPGPU_DEFAULT_VALUE_SKL); else if (IS_BROXTON(dev_priv)) I915_WRITE(GEN8_L3_LRA_1_GPGPU, GEN9_L3_LRA_1_GPGPU_DEFAULT_VALUE_BXT); diff --git a/drivers/gpu/drm/i915/intel_audio.c b/drivers/gpu/drm/i915/intel_audio.c index 16c202781db0..cd9d20763140 100644 --- a/drivers/gpu/drm/i915/intel_audio.c +++ b/drivers/gpu/drm/i915/intel_audio.c @@ -702,7 +702,7 @@ static void i915_audio_component_codec_wake_override(struct device *kdev, struct drm_i915_private *dev_priv = kdev_to_i915(kdev); u32 tmp; - if (!IS_SKYLAKE(dev_priv) && !IS_KABYLAKE(dev_priv)) + if (!IS_GEN9_BC(dev_priv)) return; i915_audio_component_get_power(kdev); diff --git a/drivers/gpu/drm/i915/intel_color.c b/drivers/gpu/drm/i915/intel_color.c index d81232b79f00..34952d04485e 100644 --- a/drivers/gpu/drm/i915/intel_color.c +++ b/drivers/gpu/drm/i915/intel_color.c @@ -536,8 +536,8 @@ void intel_color_init(struct drm_crtc *crtc) } else if (IS_HASWELL(dev_priv)) { dev_priv->display.load_csc_matrix = i9xx_load_csc_matrix; dev_priv->display.load_luts = haswell_load_luts; - } else if (IS_BROADWELL(dev_priv) || IS_SKYLAKE(dev_priv) || - IS_BROXTON(dev_priv) || IS_KABYLAKE(dev_priv)) { + } else if (IS_BROADWELL(dev_priv) || IS_GEN9_BC(dev_priv) || + IS_BROXTON(dev_priv)) { dev_priv->display.load_csc_matrix = i9xx_load_csc_matrix; dev_priv->display.load_luts = broadwell_load_luts; } else { diff --git a/drivers/gpu/drm/i915/intel_ddi.c b/drivers/gpu/drm/i915/intel_ddi.c index 66b367d0771a..9a9a670f8549 100644 --- a/drivers/gpu/drm/i915/intel_ddi.c +++ b/drivers/gpu/drm/i915/intel_ddi.c @@ -445,7 +445,7 @@ static int intel_ddi_hdmi_level(struct drm_i915_private *dev_priv, enum port por if (IS_GEN9_LP(dev_priv)) return hdmi_level; - if (IS_SKYLAKE(dev_priv) || IS_KABYLAKE(dev_priv)) { + if (IS_GEN9_BC(dev_priv)) { skl_get_buf_trans_hdmi(dev_priv, &n_hdmi_entries); hdmi_default_entry = 8; } else if (IS_BROADWELL(dev_priv)) { @@ -518,7 +518,7 @@ void intel_prepare_dp_ddi_buffers(struct intel_encoder *encoder) n_dp_entries = ARRAY_SIZE(bdw_ddi_translations_dp); } - if (IS_SKYLAKE(dev_priv) || IS_KABYLAKE(dev_priv)) { + if (IS_GEN9_BC(dev_priv)) { /* If we're boosting the current, set bit 31 of trans1 */ if (dev_priv->vbt.ddi_port_info[port].dp_boost_level) iboost_bit = DDI_BUF_BALANCE_LEG_ENABLE; @@ -572,7 +572,7 @@ static void intel_prepare_hdmi_ddi_buffers(struct intel_encoder *encoder) hdmi_level = intel_ddi_hdmi_level(dev_priv, port); - if (IS_SKYLAKE(dev_priv) || IS_KABYLAKE(dev_priv)) { + if (IS_GEN9_BC(dev_priv)) { ddi_translations_hdmi = skl_get_buf_trans_hdmi(dev_priv, &n_hdmi_entries); /* If we're boosting the current, set bit 31 of trans1 */ @@ -1089,7 +1089,7 @@ void intel_ddi_clock_get(struct intel_encoder *encoder, if (INTEL_GEN(dev_priv) <= 8) hsw_ddi_clock_get(encoder, pipe_config); - else if (IS_SKYLAKE(dev_priv) || IS_KABYLAKE(dev_priv)) + else if (IS_GEN9_BC(dev_priv)) skl_ddi_clock_get(encoder, pipe_config); else if (IS_GEN9_LP(dev_priv)) bxt_ddi_clock_get(encoder, pipe_config); @@ -1150,7 +1150,7 @@ bool intel_ddi_pll_select(struct intel_crtc *intel_crtc, struct intel_encoder *intel_encoder = intel_ddi_get_crtc_new_encoder(crtc_state); - if (IS_SKYLAKE(dev_priv) || IS_KABYLAKE(dev_priv)) + if (IS_GEN9_BC(dev_priv)) return skl_ddi_pll_select(intel_crtc, crtc_state, intel_encoder); else if (IS_GEN9_LP(dev_priv)) @@ -1641,7 +1641,7 @@ uint32_t ddi_signal_levels(struct intel_dp *intel_dp) level = translate_signal_level(signal_levels); - if (IS_SKYLAKE(dev_priv) || IS_KABYLAKE(dev_priv)) + if (IS_GEN9_BC(dev_priv)) skl_ddi_set_iboost(encoder, level); else if (IS_GEN9_LP(dev_priv)) bxt_ddi_vswing_sequence(dev_priv, level, port, encoder->type); @@ -1658,7 +1658,7 @@ void intel_ddi_clk_select(struct intel_encoder *encoder, if (WARN_ON(!pll)) return; - if (IS_SKYLAKE(dev_priv) || IS_KABYLAKE(dev_priv)) { + if (IS_GEN9_BC(dev_priv)) { uint32_t val; /* DDI -> PLL mapping */ @@ -1714,7 +1714,7 @@ static void intel_ddi_pre_enable_hdmi(struct intel_encoder *encoder, intel_dp_dual_mode_set_tmds_output(intel_hdmi, true); intel_ddi_clk_select(encoder, pll); intel_prepare_hdmi_ddi_buffers(encoder); - if (IS_SKYLAKE(dev_priv) || IS_KABYLAKE(dev_priv)) + if (IS_GEN9_BC(dev_priv)) skl_ddi_set_iboost(encoder, level); else if (IS_GEN9_LP(dev_priv)) bxt_ddi_vswing_sequence(dev_priv, level, port, @@ -1784,7 +1784,7 @@ static void intel_ddi_post_disable(struct intel_encoder *intel_encoder, intel_edp_panel_off(intel_dp); } - if (IS_SKYLAKE(dev_priv) || IS_KABYLAKE(dev_priv)) + if (IS_GEN9_BC(dev_priv)) I915_WRITE(DPLL_CTRL2, (I915_READ(DPLL_CTRL2) | DPLL_CTRL2_DDI_CLK_OFF(port))); else if (INTEL_GEN(dev_priv) < 9) @@ -2157,7 +2157,7 @@ intel_ddi_get_link_dpll(struct intel_dp *intel_dp, int clock) pll->state = tmp_pll_state; return NULL; } - } else if (IS_SKYLAKE(dev_priv) || IS_KABYLAKE(dev_priv)) { + } else if (IS_GEN9_BC(dev_priv)) { pll = skl_find_link_pll(dev_priv, clock); } else if (IS_HASWELL(dev_priv) || IS_BROADWELL(dev_priv)) { pll = hsw_ddi_dp_get_dpll(encoder, clock); diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c index 5f55bc37c538..5a9da484bb23 100644 --- a/drivers/gpu/drm/i915/intel_display.c +++ b/drivers/gpu/drm/i915/intel_display.c @@ -5800,7 +5800,7 @@ static int skl_calc_cdclk(int max_pixclk, int vco); static void intel_update_max_cdclk(struct drm_i915_private *dev_priv) { - if (IS_SKYLAKE(dev_priv) || IS_KABYLAKE(dev_priv)) { + if (IS_GEN9_BC(dev_priv)) { u32 limit = I915_READ(SKL_DFSM) & SKL_DFSM_CDCLK_LIMIT_MASK; int max_cdclk, vco; @@ -10673,7 +10673,7 @@ static void haswell_get_ddi_port_state(struct intel_crtc *crtc, port = (tmp & TRANS_DDI_PORT_MASK) >> TRANS_DDI_PORT_SHIFT; - if (IS_SKYLAKE(dev_priv) || IS_KABYLAKE(dev_priv)) + if (IS_GEN9_BC(dev_priv)) skylake_get_ddi_pll(dev_priv, port, pipe_config); else if (IS_GEN9_LP(dev_priv)) bxt_get_ddi_pll(dev_priv, port, pipe_config); @@ -15681,7 +15681,7 @@ static void intel_setup_outputs(struct drm_i915_private *dev_priv) */ found = I915_READ(DDI_BUF_CTL(PORT_A)) & DDI_INIT_DISPLAY_DETECTED; /* WaIgnoreDDIAStrap: skl */ - if (found || IS_SKYLAKE(dev_priv) || IS_KABYLAKE(dev_priv)) + if (found || IS_GEN9_BC(dev_priv)) intel_ddi_init(dev_priv, PORT_A); /* DDI B, C and D detection is indicated by the SFUSE_STRAP @@ -15697,7 +15697,7 @@ static void intel_setup_outputs(struct drm_i915_private *dev_priv) /* * On SKL we don't have a way to detect DDI-E so we rely on VBT. */ - if ((IS_SKYLAKE(dev_priv) || IS_KABYLAKE(dev_priv)) && + if (IS_GEN9_BC(dev_priv) && (dev_priv->vbt.ddi_port_info[PORT_E].supports_dp || dev_priv->vbt.ddi_port_info[PORT_E].supports_dvi || dev_priv->vbt.ddi_port_info[PORT_E].supports_hdmi)) @@ -16196,7 +16196,7 @@ void intel_init_display_hooks(struct drm_i915_private *dev_priv) } /* Returns the core display clock speed */ - if (IS_SKYLAKE(dev_priv) || IS_KABYLAKE(dev_priv)) + if (IS_GEN9_BC(dev_priv)) dev_priv->display.get_display_clock_speed = skylake_get_display_clock_speed; else if (IS_GEN9_LP(dev_priv)) @@ -16277,7 +16277,7 @@ void intel_init_display_hooks(struct drm_i915_private *dev_priv) bxt_modeset_commit_cdclk; dev_priv->display.modeset_calc_cdclk = bxt_modeset_calc_cdclk; - } else if (IS_SKYLAKE(dev_priv) || IS_KABYLAKE(dev_priv)) { + } else if (IS_GEN9_BC(dev_priv)) { dev_priv->display.modeset_commit_cdclk = skl_modeset_commit_cdclk; dev_priv->display.modeset_calc_cdclk = diff --git a/drivers/gpu/drm/i915/intel_dp.c b/drivers/gpu/drm/i915/intel_dp.c index 247fbf352576..e0f9b9e49acc 100644 --- a/drivers/gpu/drm/i915/intel_dp.c +++ b/drivers/gpu/drm/i915/intel_dp.c @@ -226,7 +226,7 @@ intel_dp_source_rates(struct intel_dp *intel_dp, const int **source_rates) if (IS_GEN9_LP(dev_priv)) { *source_rates = bxt_rates; size = ARRAY_SIZE(bxt_rates); - } else if (IS_SKYLAKE(dev_priv) || IS_KABYLAKE(dev_priv)) { + } else if (IS_GEN9_BC(dev_priv)) { *source_rates = skl_rates; size = ARRAY_SIZE(skl_rates); } else { @@ -1751,8 +1751,7 @@ found: * DPLL0 VCO may need to be adjusted to get the correct * clock for eDP. This will affect cdclk as well. */ - if (is_edp(intel_dp) && - (IS_SKYLAKE(dev_priv) || IS_KABYLAKE(dev_priv))) { + if (is_edp(intel_dp) && IS_GEN9_BC(dev_priv)) { int vco; switch (pipe_config->port_clock / 2) { diff --git a/drivers/gpu/drm/i915/intel_dpll_mgr.c b/drivers/gpu/drm/i915/intel_dpll_mgr.c index c92a2558beb4..368eff93a0a6 100644 --- a/drivers/gpu/drm/i915/intel_dpll_mgr.c +++ b/drivers/gpu/drm/i915/intel_dpll_mgr.c @@ -2015,7 +2015,7 @@ void intel_shared_dpll_init(struct drm_device *dev) const struct dpll_info *dpll_info; int i; - if (IS_SKYLAKE(dev_priv) || IS_KABYLAKE(dev_priv)) + if (IS_GEN9_BC(dev_priv)) dpll_mgr = &skl_pll_mgr; else if (IS_GEN9_LP(dev_priv)) dpll_mgr = &bxt_pll_mgr; diff --git a/drivers/gpu/drm/i915/intel_fbc.c b/drivers/gpu/drm/i915/intel_fbc.c index 89fe5c8464df..51585008fb9d 100644 --- a/drivers/gpu/drm/i915/intel_fbc.c +++ b/drivers/gpu/drm/i915/intel_fbc.c @@ -537,8 +537,7 @@ static int find_compression_threshold(struct drm_i915_private *dev_priv, * reserved range size, so it always assumes the maximum (8mb) is used. * If we enable FBC using a CFB on that memory range we'll get FIFO * underruns, even if that range is not reserved by the BIOS. */ - if (IS_BROADWELL(dev_priv) || - IS_SKYLAKE(dev_priv) || IS_KABYLAKE(dev_priv)) + if (IS_BROADWELL(dev_priv) || IS_GEN9_BC(dev_priv)) end = ggtt->stolen_size - 8 * 1024 * 1024; else end = U64_MAX; diff --git a/drivers/gpu/drm/i915/intel_i2c.c b/drivers/gpu/drm/i915/intel_i2c.c index bce1ba80f277..b6401e8f1bd6 100644 --- a/drivers/gpu/drm/i915/intel_i2c.c +++ b/drivers/gpu/drm/i915/intel_i2c.c @@ -74,7 +74,7 @@ static const struct gmbus_pin *get_gmbus_pin(struct drm_i915_private *dev_priv, { if (IS_GEN9_LP(dev_priv)) return &gmbus_pins_bxt[pin]; - else if (IS_SKYLAKE(dev_priv) || IS_KABYLAKE(dev_priv)) + else if (IS_GEN9_BC(dev_priv)) return &gmbus_pins_skl[pin]; else if (IS_BROADWELL(dev_priv)) return &gmbus_pins_bdw[pin]; @@ -89,7 +89,7 @@ bool intel_gmbus_is_valid_pin(struct drm_i915_private *dev_priv, if (IS_GEN9_LP(dev_priv)) size = ARRAY_SIZE(gmbus_pins_bxt); - else if (IS_SKYLAKE(dev_priv) || IS_KABYLAKE(dev_priv)) + else if (IS_GEN9_BC(dev_priv)) size = ARRAY_SIZE(gmbus_pins_skl); else if (IS_BROADWELL(dev_priv)) size = ARRAY_SIZE(gmbus_pins_bdw); diff --git a/drivers/gpu/drm/i915/intel_mocs.c b/drivers/gpu/drm/i915/intel_mocs.c index c787fc4e6eb9..8f98fc714fe9 100644 --- a/drivers/gpu/drm/i915/intel_mocs.c +++ b/drivers/gpu/drm/i915/intel_mocs.c @@ -178,7 +178,7 @@ static bool get_mocs_settings(struct drm_i915_private *dev_priv, { bool result = false; - if (IS_SKYLAKE(dev_priv) || IS_KABYLAKE(dev_priv)) { + if (IS_GEN9_BC(dev_priv)) { table->size = ARRAY_SIZE(skylake_mocs_table); table->table = skylake_mocs_table; result = true; diff --git a/drivers/gpu/drm/i915/intel_pm.c b/drivers/gpu/drm/i915/intel_pm.c index 249623d45be0..d4db14b53b61 100644 --- a/drivers/gpu/drm/i915/intel_pm.c +++ b/drivers/gpu/drm/i915/intel_pm.c @@ -2895,8 +2895,7 @@ static bool skl_needs_memory_bw_wa(struct intel_atomic_state *state) { struct drm_i915_private *dev_priv = to_i915(state->base.dev); - if (IS_SKYLAKE(dev_priv) || IS_BROXTON(dev_priv) || - IS_KABYLAKE(dev_priv)) + if (IS_GEN9_BC(dev_priv) || IS_BROXTON(dev_priv)) return true; return false; @@ -5294,7 +5293,7 @@ static void gen6_init_rps_frequencies(struct drm_i915_private *dev_priv) dev_priv->rps.efficient_freq = dev_priv->rps.rp1_freq; if (IS_HASWELL(dev_priv) || IS_BROADWELL(dev_priv) || - IS_SKYLAKE(dev_priv) || IS_KABYLAKE(dev_priv)) { + IS_GEN9_BC(dev_priv)) { u32 ddcc_status = 0; if (sandybridge_pcode_read(dev_priv, @@ -5307,7 +5306,7 @@ static void gen6_init_rps_frequencies(struct drm_i915_private *dev_priv) dev_priv->rps.max_freq); } - if (IS_SKYLAKE(dev_priv) || IS_KABYLAKE(dev_priv)) { + if (IS_GEN9_BC(dev_priv)) { /* Store the frequency values in 16.66 MHZ units, which is * the natural hardware unit for SKL */ @@ -5637,7 +5636,7 @@ static void gen6_update_ring_freq(struct drm_i915_private *dev_priv) /* convert DDR frequency from units of 266.6MHz to bandwidth */ min_ring_freq = mult_frac(min_ring_freq, 8, 3); - if (IS_SKYLAKE(dev_priv) || IS_KABYLAKE(dev_priv)) { + if (IS_GEN9_BC(dev_priv)) { /* Convert GT frequency to 50 HZ units */ min_gpu_freq = dev_priv->rps.min_freq / GEN9_FREQ_SCALER; max_gpu_freq = dev_priv->rps.max_freq / GEN9_FREQ_SCALER; @@ -5655,7 +5654,7 @@ static void gen6_update_ring_freq(struct drm_i915_private *dev_priv) int diff = max_gpu_freq - gpu_freq; unsigned int ia_freq = 0, ring_freq = 0; - if (IS_SKYLAKE(dev_priv) || IS_KABYLAKE(dev_priv)) { + if (IS_GEN9_BC(dev_priv)) { /* * ring_freq = 2 * GT. ring_freq is in 100MHz units * No floor required for ring frequency on SKL. @@ -6775,7 +6774,7 @@ void intel_enable_gt_powersave(struct drm_i915_private *dev_priv) } else if (INTEL_GEN(dev_priv) >= 9) { gen9_enable_rc6(dev_priv); gen9_enable_rps(dev_priv); - if (IS_SKYLAKE(dev_priv) || IS_KABYLAKE(dev_priv)) + if (IS_GEN9_BC(dev_priv)) gen6_update_ring_freq(dev_priv); } else if (IS_BROADWELL(dev_priv)) { gen8_enable_rps(dev_priv); diff --git a/drivers/gpu/drm/i915/intel_runtime_pm.c b/drivers/gpu/drm/i915/intel_runtime_pm.c index c0b7e95b5b8e..66aa1bbb42f0 100644 --- a/drivers/gpu/drm/i915/intel_runtime_pm.c +++ b/drivers/gpu/drm/i915/intel_runtime_pm.c @@ -732,7 +732,7 @@ gen9_sanitize_power_well_requests(struct drm_i915_private *dev_priv, * other request bits to be set, so WARN for those. */ if (power_well_id == SKL_DISP_PW_1 || - ((IS_SKYLAKE(dev_priv) || IS_KABYLAKE(dev_priv)) && + (IS_GEN9_BC(dev_priv) && power_well_id == SKL_DISP_PW_MISC_IO)) DRM_DEBUG_DRIVER("Clearing auxiliary requests for %s forced on " "by DMC\n", power_well->name); @@ -2323,7 +2323,7 @@ static uint32_t get_allowed_dc_mask(const struct drm_i915_private *dev_priv, int requested_dc; int max_dc; - if (IS_SKYLAKE(dev_priv) || IS_KABYLAKE(dev_priv)) { + if (IS_GEN9_BC(dev_priv)) { max_dc = 2; mask = 0; } else if (IS_GEN9_LP(dev_priv)) { @@ -2398,7 +2398,7 @@ int intel_power_domains_init(struct drm_i915_private *dev_priv) set_power_wells(power_domains, hsw_power_wells); } else if (IS_BROADWELL(dev_priv)) { set_power_wells(power_domains, bdw_power_wells); - } else if (IS_SKYLAKE(dev_priv) || IS_KABYLAKE(dev_priv)) { + } else if (IS_GEN9_BC(dev_priv)) { set_power_wells(power_domains, skl_power_wells); } else if (IS_BROXTON(dev_priv)) { set_power_wells(power_domains, bxt_power_wells); @@ -2730,7 +2730,7 @@ void intel_power_domains_init_hw(struct drm_i915_private *dev_priv, bool resume) power_domains->initializing = true; - if (IS_SKYLAKE(dev_priv) || IS_KABYLAKE(dev_priv)) { + if (IS_GEN9_BC(dev_priv)) { skl_display_core_init(dev_priv, resume); } else if (IS_GEN9_LP(dev_priv)) { bxt_display_core_init(dev_priv, resume); @@ -2769,7 +2769,7 @@ void intel_power_domains_suspend(struct drm_i915_private *dev_priv) if (!i915.disable_power_well) intel_display_power_put(dev_priv, POWER_DOMAIN_INIT); - if (IS_SKYLAKE(dev_priv) || IS_KABYLAKE(dev_priv)) + if (IS_GEN9_BC(dev_priv)) skl_display_core_uninit(dev_priv); else if (IS_GEN9_LP(dev_priv)) bxt_display_core_uninit(dev_priv); -- cgit From 1c044f9bbc107313af5f3c6bfa21300a6d0a08cd Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Wed, 25 Jan 2017 17:26:01 +0000 Subject: drm/i915: Remove early pre-production RPS workarounds for BXT Remove WaGsvDisableTurbo and WaRsUseTimeoutMode as these were only for pre-production Broxton devices, and this code is now defunct. Signed-off-by: Chris Wilson Cc: Mika Kuoppala Cc: Joonas Lahtinen Reviewed-by: Joonas Lahtinen --- drivers/gpu/drm/i915/intel_pm.c | 35 +++-------------------------------- 1 file changed, 3 insertions(+), 32 deletions(-) diff --git a/drivers/gpu/drm/i915/intel_pm.c b/drivers/gpu/drm/i915/intel_pm.c index d4db14b53b61..a04d81aa1fc1 100644 --- a/drivers/gpu/drm/i915/intel_pm.c +++ b/drivers/gpu/drm/i915/intel_pm.c @@ -4935,10 +4935,6 @@ static u32 gen6_rps_pm_mask(struct drm_i915_private *dev_priv, u8 val) * update the GEN6_RP_INTERRUPT_LIMITS register accordingly. */ static void gen6_set_rps(struct drm_i915_private *dev_priv, u8 val) { - /* WaGsvDisableTurbo: Workaround to disable turbo on BXT A* */ - if (IS_BXT_REVID(dev_priv, 0, BXT_REVID_A1)) - return; - WARN_ON(!mutex_is_locked(&dev_priv->rps.hw_lock)); WARN_ON(val > dev_priv->rps.max_freq); WARN_ON(val < dev_priv->rps.min_freq); @@ -5335,22 +5331,6 @@ static void gen9_enable_rps(struct drm_i915_private *dev_priv) { intel_uncore_forcewake_get(dev_priv, FORCEWAKE_ALL); - /* WaGsvDisableTurbo: Workaround to disable turbo on BXT A* */ - if (IS_BXT_REVID(dev_priv, 0, BXT_REVID_A1)) { - /* - * BIOS could leave the Hw Turbo enabled, so need to explicitly - * clear out the Control register just to avoid inconsitency - * with debugfs interface, which will show Turbo as enabled - * only and that is not expected by the User after adding the - * WaGsvDisableTurbo. Apart from this there is no problem even - * if the Turbo is left enabled in the Control register, as the - * Up/Down interrupts would remain masked. - */ - gen9_disable_rps(dev_priv); - intel_uncore_forcewake_put(dev_priv, FORCEWAKE_ALL); - return; - } - /* Program defaults and thresholds for RPS*/ I915_WRITE(GEN6_RC_VIDEO_FREQ, GEN9_FREQUENCY(dev_priv->rps.rp1_freq)); @@ -5410,18 +5390,9 @@ static void gen9_enable_rc6(struct drm_i915_private *dev_priv) if (intel_enable_rc6() & INTEL_RC6_ENABLE) rc6_mask = GEN6_RC_CTL_RC6_ENABLE; DRM_INFO("RC6 %s\n", onoff(rc6_mask & GEN6_RC_CTL_RC6_ENABLE)); - /* WaRsUseTimeoutMode:bxt */ - if (IS_BXT_REVID(dev_priv, 0, BXT_REVID_A1)) { - I915_WRITE(GEN6_RC6_THRESHOLD, 625); /* 800us */ - I915_WRITE(GEN6_RC_CONTROL, GEN6_RC_CTL_HW_ENABLE | - GEN7_RC_CTL_TO_MODE | - rc6_mask); - } else { - I915_WRITE(GEN6_RC6_THRESHOLD, 37500); /* 37.5/125ms per EI */ - I915_WRITE(GEN6_RC_CONTROL, GEN6_RC_CTL_HW_ENABLE | - GEN6_RC_CTL_EI_MODE(1) | - rc6_mask); - } + I915_WRITE(GEN6_RC6_THRESHOLD, 37500); /* 37.5/125ms per EI */ + I915_WRITE(GEN6_RC_CONTROL, + GEN6_RC_CTL_HW_ENABLE | GEN6_RC_CTL_EI_MODE(1) | rc6_mask); /* * 3b: Enable Coarse Power Gating only when RC6 is enabled. -- cgit From da15f7cb1914aa6ca3efb874e04ae3d96038641a Mon Sep 17 00:00:00 2001 From: Manasi Navare Date: Tue, 24 Jan 2017 08:16:34 -0800 Subject: drm/i915: Add support for DP link training compliance This patch adds support to handle automated DP compliance link training test requests. This patch has been tested with Unigraf DPR-120 DP Compliance device for testing Link Training Compliance. After we get a short pulse Compliance test request, test request values are read and hotplug uevent is sent in order to trigger another modeset during which the pipe is configured and link is retrained and enabled for link parameters requested by the test. v5: * Only modify the compliance structure after all validation is done (Jani Nikula) * Remove the variable test_result (Jani Nikula) v4: * Return TEST_NAK for read failures and invalid values (Jani Nikula) * Conver the test link BW to link rate before storing (Jani Nikula) v3: * Validate the test link rate and lane count as soon as the request comes (Jani Nikula) v2: * Validate the test lane count before using it in intel_dp_compute_config (Jani Nikula) Signed-off-by: Manasi Navare Cc: Jani Nikula Cc: Daniel Vetter Cc: Ville Syrjala Signed-off-by: Jani Nikula Link: http://patchwork.freedesktop.org/patch/msgid/1485274594-17361-1-git-send-email-manasi.d.navare@intel.com --- drivers/gpu/drm/i915/intel_dp.c | 65 ++++++++++++++++++++++++++++++++++++---- drivers/gpu/drm/i915/intel_drv.h | 2 ++ 2 files changed, 61 insertions(+), 6 deletions(-) diff --git a/drivers/gpu/drm/i915/intel_dp.c b/drivers/gpu/drm/i915/intel_dp.c index e0f9b9e49acc..26d7fc1bc541 100644 --- a/drivers/gpu/drm/i915/intel_dp.c +++ b/drivers/gpu/drm/i915/intel_dp.c @@ -1613,6 +1613,7 @@ intel_dp_compute_config(struct intel_encoder *encoder, /* Conveniently, the link BW constants become indices with a shift...*/ int min_clock = 0; int max_clock; + int link_rate_index; int bpp, mode_rate; int link_avail, link_clock; int common_rates[DP_MAX_SUPPORTED_RATES] = {}; @@ -1654,6 +1655,15 @@ intel_dp_compute_config(struct intel_encoder *encoder, if (adjusted_mode->flags & DRM_MODE_FLAG_DBLCLK) return false; + /* Use values requested by Compliance Test Request */ + if (intel_dp->compliance.test_type == DP_TEST_LINK_TRAINING) { + link_rate_index = intel_dp_link_rate_index(intel_dp, + common_rates, + intel_dp->compliance.test_link_rate); + if (link_rate_index >= 0) + min_clock = max_clock = link_rate_index; + min_lane_count = max_lane_count = intel_dp->compliance.test_lane_count; + } DRM_DEBUG_KMS("DP link computation with max lane count %i " "max bw %d pixel clock %iKHz\n", max_lane_count, common_rates[max_clock], @@ -3919,8 +3929,46 @@ intel_dp_get_sink_irq_esi(struct intel_dp *intel_dp, u8 *sink_irq_vector) static uint8_t intel_dp_autotest_link_training(struct intel_dp *intel_dp) { - uint8_t test_result = DP_TEST_ACK; - return test_result; + int status = 0; + int min_lane_count = 1; + int common_rates[DP_MAX_SUPPORTED_RATES] = {}; + int link_rate_index, test_link_rate; + uint8_t test_lane_count, test_link_bw; + /* (DP CTS 1.2) + * 4.3.1.11 + */ + /* Read the TEST_LANE_COUNT and TEST_LINK_RTAE fields (DP CTS 3.1.4) */ + status = drm_dp_dpcd_readb(&intel_dp->aux, DP_TEST_LANE_COUNT, + &test_lane_count); + + if (status <= 0) { + DRM_DEBUG_KMS("Lane count read failed\n"); + return DP_TEST_NAK; + } + test_lane_count &= DP_MAX_LANE_COUNT_MASK; + /* Validate the requested lane count */ + if (test_lane_count < min_lane_count || + test_lane_count > intel_dp->max_sink_lane_count) + return DP_TEST_NAK; + + status = drm_dp_dpcd_readb(&intel_dp->aux, DP_TEST_LINK_RATE, + &test_link_bw); + if (status <= 0) { + DRM_DEBUG_KMS("Link Rate read failed\n"); + return DP_TEST_NAK; + } + /* Validate the requested link rate */ + test_link_rate = drm_dp_bw_code_to_link_rate(test_link_bw); + link_rate_index = intel_dp_link_rate_index(intel_dp, + common_rates, + test_link_rate); + if (link_rate_index < 0) + return DP_TEST_NAK; + + intel_dp->compliance.test_lane_count = test_lane_count; + intel_dp->compliance.test_link_rate = test_link_rate; + + return DP_TEST_ACK; } static uint8_t intel_dp_autotest_video_pattern(struct intel_dp *intel_dp) @@ -4131,9 +4179,8 @@ intel_dp_check_link_status(struct intel_dp *intel_dp) if (!intel_dp->lane_count) return; - /* if link training is requested we should perform it always */ - if ((intel_dp->compliance.test_type == DP_TEST_LINK_TRAINING) || - (!drm_dp_channel_eq_ok(link_status, intel_dp->lane_count))) { + /* Retrain if Channel EQ or CR not ok */ + if (!drm_dp_channel_eq_ok(link_status, intel_dp->lane_count)) { DRM_DEBUG_KMS("%s: channel EQ not ok, retraining\n", intel_encoder->base.name); @@ -4158,6 +4205,7 @@ static bool intel_dp_short_pulse(struct intel_dp *intel_dp) { struct drm_device *dev = intel_dp_to_dev(intel_dp); + struct intel_encoder *intel_encoder = &dp_to_dig_port(intel_dp)->base; u8 sink_irq_vector = 0; u8 old_sink_count = intel_dp->sink_count; bool ret; @@ -4191,7 +4239,7 @@ intel_dp_short_pulse(struct intel_dp *intel_dp) sink_irq_vector); if (sink_irq_vector & DP_AUTOMATED_TEST_REQUEST) - DRM_DEBUG_DRIVER("Test request in short pulse not handled\n"); + intel_dp_handle_test_request(intel_dp); if (sink_irq_vector & (DP_CP_IRQ | DP_SINK_SPECIFIC_IRQ)) DRM_DEBUG_DRIVER("CP or sink specific irq unhandled\n"); } @@ -4199,6 +4247,11 @@ intel_dp_short_pulse(struct intel_dp *intel_dp) drm_modeset_lock(&dev->mode_config.connection_mutex, NULL); intel_dp_check_link_status(intel_dp); drm_modeset_unlock(&dev->mode_config.connection_mutex); + if (intel_dp->compliance.test_type == DP_TEST_LINK_TRAINING) { + DRM_DEBUG_KMS("Link Training Compliance Test requested\n"); + /* Send a Hotplug Uevent to userspace to start modeset */ + drm_kms_helper_hotplug_event(intel_encoder->base.dev); + } return true; } diff --git a/drivers/gpu/drm/i915/intel_drv.h b/drivers/gpu/drm/i915/intel_drv.h index 396356ce4206..fbd20dab5b45 100644 --- a/drivers/gpu/drm/i915/intel_drv.h +++ b/drivers/gpu/drm/i915/intel_drv.h @@ -896,6 +896,8 @@ struct intel_dp_compliance { unsigned long test_type; struct intel_dp_compliance_data test_data; bool test_active; + int test_link_rate; + u8 test_lane_count; }; struct intel_dp { -- cgit From b48a5ba9710f83d998f16917d57bfa622991e9d6 Mon Sep 17 00:00:00 2001 From: Manasi Navare Date: Fri, 20 Jan 2017 19:09:28 -0800 Subject: drm/i915: Fixes to support DP Compliance EDID tests This patch addresses a few issues from the original patch for DP Compliance EDID test support submitted by Todd Previte Video Mode requested in the EDID test handler for the EDID Read test (CTS 4.2.2.3) should be set to PREFERRED as per the CTS spec. v2: * Added read debugfs data from test_data.edid if its EDID test (Jani NIkula) Signed-off-by: Manasi Navare Cc: Jani Nikula Cc: Daniel Vetter Cc: Ville Syrjala Signed-off-by: Jani Nikula Link: http://patchwork.freedesktop.org/patch/msgid/1484968170-12467-3-git-send-email-manasi.d.navare@intel.com --- drivers/gpu/drm/i915/i915_debugfs.c | 5 ++++- drivers/gpu/drm/i915/intel_dp.c | 4 ++-- 2 files changed, 6 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c index 6d9914648970..726256fa35ce 100644 --- a/drivers/gpu/drm/i915/i915_debugfs.c +++ b/drivers/gpu/drm/i915/i915_debugfs.c @@ -3775,7 +3775,10 @@ static int i915_displayport_test_data_show(struct seq_file *m, void *data) if (connector->status == connector_status_connected && connector->encoder != NULL) { intel_dp = enc_to_intel_dp(connector->encoder); - seq_printf(m, "%lx", intel_dp->compliance.test_data.edid); + if (intel_dp->compliance.test_type == + DP_TEST_LINK_EDID_READ) + seq_printf(m, "%lx", + intel_dp->compliance.test_data.edid); } else seq_puts(m, "0"); } diff --git a/drivers/gpu/drm/i915/intel_dp.c b/drivers/gpu/drm/i915/intel_dp.c index 26d7fc1bc541..e245802c5727 100644 --- a/drivers/gpu/drm/i915/intel_dp.c +++ b/drivers/gpu/drm/i915/intel_dp.c @@ -3979,7 +3979,7 @@ static uint8_t intel_dp_autotest_video_pattern(struct intel_dp *intel_dp) static uint8_t intel_dp_autotest_edid(struct intel_dp *intel_dp) { - uint8_t test_result = DP_TEST_NAK; + uint8_t test_result = DP_TEST_ACK; struct intel_connector *intel_connector = intel_dp->attached_connector; struct drm_connector *connector = &intel_connector->base; @@ -4014,7 +4014,7 @@ static uint8_t intel_dp_autotest_edid(struct intel_dp *intel_dp) DRM_DEBUG_KMS("Failed to write EDID checksum\n"); test_result = DP_TEST_ACK | DP_TEST_EDID_CHECKSUM_WRITE; - intel_dp->compliance.test_data.edid = INTEL_DP_RESOLUTION_STANDARD; + intel_dp->compliance.test_data.edid = INTEL_DP_RESOLUTION_PREFERRED; } /* Set test active flag here so userspace doesn't interrupt things */ -- cgit From 08b79f62a1a3f6774cf27db4d12f3f938486c4f9 Mon Sep 17 00:00:00 2001 From: Manasi Navare Date: Fri, 20 Jan 2017 19:09:29 -0800 Subject: drm: Add definitions for DP compliance Video pattern tests v4: * Remove redundant single bit defs (Jani Nikula) v3: * Fix the conventions in bit definitions (Jani Nikula) v2: * Add all the other DP Complianec TEST register defs (Jani Nikula) Cc: dri-devel@lists.freedesktop.org Cc: Jani Nikula Cc: Daniel Vetter Cc: Ville Syrjala Signed-off-by: Manasi Navare Signed-off-by: Jani Nikula Link: http://patchwork.freedesktop.org/patch/msgid/1484968170-12467-4-git-send-email-manasi.d.navare@intel.com --- include/drm/drm_dp_helper.h | 57 +++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 57 insertions(+) diff --git a/include/drm/drm_dp_helper.h b/include/drm/drm_dp_helper.h index 04681359a6f5..ba89295c8651 100644 --- a/include/drm/drm_dp_helper.h +++ b/include/drm/drm_dp_helper.h @@ -417,6 +417,63 @@ #define DP_TEST_LANE_COUNT 0x220 #define DP_TEST_PATTERN 0x221 +# define DP_NO_TEST_PATTERN 0x0 +# define DP_COLOR_RAMP 0x1 +# define DP_BLACK_AND_WHITE_VERTICAL_LINES 0x2 +# define DP_COLOR_SQUARE 0x3 + +#define DP_TEST_H_TOTAL_HI 0x222 +#define DP_TEST_H_TOTAL_LO 0x223 + +#define DP_TEST_V_TOTAL_HI 0x224 +#define DP_TEST_V_TOTAL_LO 0x225 + +#define DP_TEST_H_START_HI 0x226 +#define DP_TEST_H_START_LO 0x227 + +#define DP_TEST_V_START_HI 0x228 +#define DP_TEST_V_START_LO 0x229 + +#define DP_TEST_HSYNC_HI 0x22A +# define DP_TEST_HSYNC_POLARITY (1 << 7) +# define DP_TEST_HSYNC_WIDTH_HI_MASK (127 << 0) +#define DP_TEST_HSYNC_WIDTH_LO 0x22B + +#define DP_TEST_VSYNC_HI 0x22C +# define DP_TEST_VSYNC_POLARITY (1 << 7) +# define DP_TEST_VSYNC_WIDTH_HI_MASK (127 << 0) +#define DP_TEST_VSYNC_WIDTH_LO 0x22D + +#define DP_TEST_H_WIDTH_HI 0x22E +#define DP_TEST_H_WIDTH_LO 0x22F + +#define DP_TEST_V_HEIGHT_HI 0x230 +#define DP_TEST_V_HEIGHT_LO 0x231 + +#define DP_TEST_MISC0 0x232 +# define DP_TEST_SYNC_CLOCK (1 << 0) +# define DP_TEST_COLOR_FORMAT_MASK (3 << 1) +# define DP_TEST_COLOR_FORMAT_SHIFT 1 +# define DP_COLOR_FORMAT_RGB (0 << 1) +# define DP_COLOR_FORMAT_YCbCr422 (1 << 1) +# define DP_COLOR_FORMAT_YCbCr444 (2 << 1) +# define DP_TEST_DYNAMIC_RANGE_CEA (1 << 3) +# define DP_TEST_YCBCR_COEFFICIENTS (1 << 4) +# define DP_YCBCR_COEFFICIENTS_ITU601 (0 << 4) +# define DP_YCBCR_COEFFICIENTS_ITU709 (1 << 4) +# define DP_TEST_BIT_DEPTH_MASK (7 << 5) +# define DP_TEST_BIT_DEPTH_SHIFT 5 +# define DP_TEST_BIT_DEPTH_6 (0 << 5) +# define DP_TEST_BIT_DEPTH_8 (1 << 5) +# define DP_TEST_BIT_DEPTH_10 (2 << 5) +# define DP_TEST_BIT_DEPTH_12 (3 << 5) +# define DP_TEST_BIT_DEPTH_16 (4 << 5) + +#define DP_TEST_MISC1 0x233 +# define DP_TEST_REFRESH_DENOMINATOR (1 << 0) +# define DP_TEST_INTERLACED (1 << 1) + +#define DP_TEST_REFRESH_RATE_NUMERATOR 0x234 #define DP_TEST_CRC_R_CR 0x240 #define DP_TEST_CRC_G_Y 0x242 -- cgit From 611032bfa71a7634e801142016f2d41485f8638f Mon Sep 17 00:00:00 2001 From: Manasi Navare Date: Tue, 24 Jan 2017 08:21:49 -0800 Subject: drm/i915: Add support for DP Video pattern compliance tests The intel_dp_autotest_video_pattern() function gets invoked through the compliance test handler on a HPD short pulse if the test type is set to DP_TEST_VIDEO_PATTERN. This performs the DPCD registers reads to read the requested test pattern, video pattern resolution, frame rate and bits per color value. The results of this analysis are handed off to userspace so that the userspace app can set the video pattern mode appropriately for the test result/response. When the test is requested with specific BPC value, we read the BPC value from the DPCD register. If this BPC value in intel_dp structure has a non-zero value and we're on a display port connector, then we use the value to calculate the bpp for the pipe. Also in this case if its a 18bpp video pattern request, then we force the dithering on pipe to be disabled since it causes CRC mismatches. The compliance_test_active flag is set at the end of the individual test handling functions. This is so that the kernel-side operations can be completed without the risk of interruption from the userspace app that is polling on that flag. v5: * Remove test_result variable * Populate the compliance test data at the end of the function (Jani Nikula) v4: *Return TEST_NAK on read failures and invalid values (Jani Nikula) * Address CRC mismatch errors v3: * Use the updated properly shifted bit definitions (Jani Nikula) * Force dithering to be disabled on 18bpp compliance test request (Manasi Navare) v2: * Updated the DPCD Register reads based on proper defines in header (Jani Nikula) * Squahsed the patch that forced the pipe bpp to compliance test bpp (Jani Nikula) Signed-off-by: Manasi Navare Cc: Jani Nikula Cc: Daniel Vetter Cc: Ville Syrjala Signed-off-by: Jani Nikula Link: http://patchwork.freedesktop.org/patch/msgid/1485274909-17470-1-git-send-email-manasi.d.navare@intel.com --- drivers/gpu/drm/i915/i915_debugfs.c | 9 +++++ drivers/gpu/drm/i915/intel_display.c | 7 ++-- drivers/gpu/drm/i915/intel_dp.c | 68 ++++++++++++++++++++++++++++++++++-- drivers/gpu/drm/i915/intel_dp_mst.c | 7 +++- drivers/gpu/drm/i915/intel_drv.h | 11 ++++++ 5 files changed, 97 insertions(+), 5 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c index 726256fa35ce..0a39487386ad 100644 --- a/drivers/gpu/drm/i915/i915_debugfs.c +++ b/drivers/gpu/drm/i915/i915_debugfs.c @@ -3779,6 +3779,15 @@ static int i915_displayport_test_data_show(struct seq_file *m, void *data) DP_TEST_LINK_EDID_READ) seq_printf(m, "%lx", intel_dp->compliance.test_data.edid); + else if (intel_dp->compliance.test_type == + DP_TEST_LINK_VIDEO_PATTERN) { + seq_printf(m, "hdisplay: %d\n", + intel_dp->compliance.test_data.hdisplay); + seq_printf(m, "vdisplay: %d\n", + intel_dp->compliance.test_data.vdisplay); + seq_printf(m, "bpc: %u\n", + intel_dp->compliance.test_data.bpc); + } } else seq_puts(m, "0"); } diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c index 5a9da484bb23..ef65b4be5e56 100644 --- a/drivers/gpu/drm/i915/intel_display.c +++ b/drivers/gpu/drm/i915/intel_display.c @@ -13060,8 +13060,11 @@ encoder_retry: } /* Dithering seems to not pass-through bits correctly when it should, so - * only enable it on 6bpc panels. */ - pipe_config->dither = pipe_config->pipe_bpp == 6*3; + * only enable it on 6bpc panels and when its not a compliance + * test requesting 6bpc video pattern. + */ + pipe_config->dither = (pipe_config->pipe_bpp == 6*3) && + !pipe_config->dither_force_disable; DRM_DEBUG_KMS("hw max bpp: %i, pipe bpp: %i, dithering: %i\n", base_bpp, pipe_config->pipe_bpp, pipe_config->dither); diff --git a/drivers/gpu/drm/i915/intel_dp.c b/drivers/gpu/drm/i915/intel_dp.c index e245802c5727..0fafbec6dacf 100644 --- a/drivers/gpu/drm/i915/intel_dp.c +++ b/drivers/gpu/drm/i915/intel_dp.c @@ -28,8 +28,10 @@ #include #include #include +#include #include #include +#include #include #include #include @@ -1593,6 +1595,13 @@ static int intel_dp_compute_bpp(struct intel_dp *intel_dp, if (bpc > 0) bpp = min(bpp, 3*bpc); + /* For DP Compliance we override the computed bpp for the pipe */ + if (intel_dp->compliance.test_data.bpc != 0) { + pipe_config->pipe_bpp = 3*intel_dp->compliance.test_data.bpc; + pipe_config->dither_force_disable = pipe_config->pipe_bpp == 6*3; + DRM_DEBUG_KMS("Setting pipe_bpp to %d\n", + pipe_config->pipe_bpp); + } return bpp; } @@ -3973,8 +3982,63 @@ static uint8_t intel_dp_autotest_link_training(struct intel_dp *intel_dp) static uint8_t intel_dp_autotest_video_pattern(struct intel_dp *intel_dp) { - uint8_t test_result = DP_TEST_NAK; - return test_result; + uint8_t test_pattern; + uint16_t test_misc; + __be16 h_width, v_height; + int status = 0; + + /* Read the TEST_PATTERN (DP CTS 3.1.5) */ + status = drm_dp_dpcd_read(&intel_dp->aux, DP_TEST_PATTERN, + &test_pattern, 1); + if (status <= 0) { + DRM_DEBUG_KMS("Test pattern read failed\n"); + return DP_TEST_NAK; + } + if (test_pattern != DP_COLOR_RAMP) + return DP_TEST_NAK; + + status = drm_dp_dpcd_read(&intel_dp->aux, DP_TEST_H_WIDTH_HI, + &h_width, 2); + if (status <= 0) { + DRM_DEBUG_KMS("H Width read failed\n"); + return DP_TEST_NAK; + } + + status = drm_dp_dpcd_read(&intel_dp->aux, DP_TEST_V_HEIGHT_HI, + &v_height, 2); + if (status <= 0) { + DRM_DEBUG_KMS("V Height read failed\n"); + return DP_TEST_NAK; + } + + status = drm_dp_dpcd_read(&intel_dp->aux, DP_TEST_MISC0, + &test_misc, 1); + if (status <= 0) { + DRM_DEBUG_KMS("TEST MISC read failed\n"); + return DP_TEST_NAK; + } + if ((test_misc & DP_TEST_COLOR_FORMAT_MASK) != DP_COLOR_FORMAT_RGB) + return DP_TEST_NAK; + if (test_misc & DP_TEST_DYNAMIC_RANGE_CEA) + return DP_TEST_NAK; + switch (test_misc & DP_TEST_BIT_DEPTH_MASK) { + case DP_TEST_BIT_DEPTH_6: + intel_dp->compliance.test_data.bpc = 6; + break; + case DP_TEST_BIT_DEPTH_8: + intel_dp->compliance.test_data.bpc = 8; + break; + default: + return DP_TEST_NAK; + } + + intel_dp->compliance.test_data.video_pattern = test_pattern; + intel_dp->compliance.test_data.hdisplay = be16_to_cpu(h_width); + intel_dp->compliance.test_data.vdisplay = be16_to_cpu(v_height); + /* Set test active flag here so userspace doesn't interrupt things */ + intel_dp->compliance.test_active = 1; + + return DP_TEST_ACK; } static uint8_t intel_dp_autotest_edid(struct intel_dp *intel_dp) diff --git a/drivers/gpu/drm/i915/intel_dp_mst.c b/drivers/gpu/drm/i915/intel_dp_mst.c index 205fe4748ec5..29a9af177734 100644 --- a/drivers/gpu/drm/i915/intel_dp_mst.c +++ b/drivers/gpu/drm/i915/intel_dp_mst.c @@ -47,6 +47,11 @@ static bool intel_dp_mst_compute_config(struct intel_encoder *encoder, pipe_config->has_pch_encoder = false; bpp = 24; + if (intel_dp->compliance.test_data.bpc) { + bpp = intel_dp->compliance.test_data.bpc * 3; + DRM_DEBUG_KMS("Setting pipe bpp to %d\n", + bpp); + } /* * for MST we always configure max link bw - the spec doesn't * seem to suggest we should do otherwise. @@ -55,7 +60,7 @@ static bool intel_dp_mst_compute_config(struct intel_encoder *encoder, pipe_config->lane_count = lane_count; - pipe_config->pipe_bpp = 24; + pipe_config->pipe_bpp = bpp; pipe_config->port_clock = intel_dp_max_link_rate(intel_dp); state = pipe_config->base.state; diff --git a/drivers/gpu/drm/i915/intel_drv.h b/drivers/gpu/drm/i915/intel_drv.h index fbd20dab5b45..1fb593ecc60c 100644 --- a/drivers/gpu/drm/i915/intel_drv.h +++ b/drivers/gpu/drm/i915/intel_drv.h @@ -580,6 +580,14 @@ struct intel_crtc_state { */ bool dither; + /* + * Dither gets enabled for 18bpp which causes CRC mismatch errors for + * compliance video pattern tests. + * Disable dither only if it is a compliance test request for + * 18bpp. + */ + bool dither_force_disable; + /* Controls for the clock computation, to override various stages. */ bool clock_set; @@ -890,6 +898,9 @@ struct intel_dp_desc { struct intel_dp_compliance_data { unsigned long edid; + uint8_t video_pattern; + uint16_t hdisplay, vdisplay; + uint8_t bpc; }; struct intel_dp_compliance { -- cgit From 9fcee2f77e88f1d7c3f72fca418cdae3e79a6e83 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Thu, 26 Jan 2017 10:19:19 +0000 Subject: drm/i915: Report the failure to write to the punit The write to the punit may fail, so propagate the error code back to its callers. Of particular interest are the RPS writes, so add appropriate user error codes and logging. v2: Add DEBUG for failed frequency changes during RPS. Signed-off-by: Chris Wilson Cc: Mika Kuoppala Link: http://patchwork.freedesktop.org/patch/msgid/20170126101919.13211-1-chris@chris-wilson.co.uk Reviewed-by: Mika Kuoppala --- drivers/gpu/drm/i915/i915_debugfs.c | 6 +++-- drivers/gpu/drm/i915/i915_drv.h | 4 ++-- drivers/gpu/drm/i915/i915_irq.c | 5 +++- drivers/gpu/drm/i915/i915_sysfs.c | 9 ++++--- drivers/gpu/drm/i915/intel_pm.c | 45 +++++++++++++++++++++++++---------- drivers/gpu/drm/i915/intel_sideband.c | 10 +++++--- 6 files changed, 53 insertions(+), 26 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c index 0a39487386ad..3ae06568df7b 100644 --- a/drivers/gpu/drm/i915/i915_debugfs.c +++ b/drivers/gpu/drm/i915/i915_debugfs.c @@ -4278,7 +4278,8 @@ i915_max_freq_set(void *data, u64 val) dev_priv->rps.max_freq_softlimit = val; - intel_set_rps(dev_priv, val); + if (intel_set_rps(dev_priv, val)) + DRM_DEBUG_DRIVER("failed to update RPS to new softlimit\n"); mutex_unlock(&dev_priv->rps.hw_lock); @@ -4333,7 +4334,8 @@ i915_min_freq_set(void *data, u64 val) dev_priv->rps.min_freq_softlimit = val; - intel_set_rps(dev_priv, val); + if (intel_set_rps(dev_priv, val)) + DRM_DEBUG_DRIVER("failed to update RPS to new softlimit\n"); mutex_unlock(&dev_priv->rps.hw_lock); diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index 3623a6e46283..457129b64d79 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -3701,7 +3701,7 @@ extern void i915_redisable_vga(struct drm_i915_private *dev_priv); extern void i915_redisable_vga_power_on(struct drm_i915_private *dev_priv); extern bool ironlake_set_drps(struct drm_i915_private *dev_priv, u8 val); extern void intel_init_pch_refclk(struct drm_i915_private *dev_priv); -extern void intel_set_rps(struct drm_i915_private *dev_priv, u8 val); +extern int intel_set_rps(struct drm_i915_private *dev_priv, u8 val); extern bool intel_set_memory_cxsr(struct drm_i915_private *dev_priv, bool enable); @@ -3727,7 +3727,7 @@ int skl_pcode_request(struct drm_i915_private *dev_priv, u32 mbox, u32 request, /* intel_sideband.c */ u32 vlv_punit_read(struct drm_i915_private *dev_priv, u32 addr); -void vlv_punit_write(struct drm_i915_private *dev_priv, u32 addr, u32 val); +int vlv_punit_write(struct drm_i915_private *dev_priv, u32 addr, u32 val); u32 vlv_nc_read(struct drm_i915_private *dev_priv, u8 addr); u32 vlv_iosf_sb_read(struct drm_i915_private *dev_priv, u8 port, u32 reg); void vlv_iosf_sb_write(struct drm_i915_private *dev_priv, u8 port, u32 reg, u32 val); diff --git a/drivers/gpu/drm/i915/i915_irq.c b/drivers/gpu/drm/i915/i915_irq.c index 3f3c9082b0f8..059d66b46e1a 100644 --- a/drivers/gpu/drm/i915/i915_irq.c +++ b/drivers/gpu/drm/i915/i915_irq.c @@ -1209,7 +1209,10 @@ static void gen6_pm_rps_work(struct work_struct *work) new_delay += adj; new_delay = clamp_t(int, new_delay, min, max); - intel_set_rps(dev_priv, new_delay); + if (intel_set_rps(dev_priv, new_delay)) { + DRM_DEBUG_DRIVER("Failed to set new GPU frequency\n"); + dev_priv->rps.last_adj = 0; + } mutex_unlock(&dev_priv->rps.hw_lock); } diff --git a/drivers/gpu/drm/i915/i915_sysfs.c b/drivers/gpu/drm/i915/i915_sysfs.c index 376ac957cd1c..a721ff116101 100644 --- a/drivers/gpu/drm/i915/i915_sysfs.c +++ b/drivers/gpu/drm/i915/i915_sysfs.c @@ -395,13 +395,13 @@ static ssize_t gt_max_freq_mhz_store(struct device *kdev, /* We still need *_set_rps to process the new max_delay and * update the interrupt limits and PMINTRMSK even though * frequency request may be unchanged. */ - intel_set_rps(dev_priv, val); + ret = intel_set_rps(dev_priv, val); mutex_unlock(&dev_priv->rps.hw_lock); intel_runtime_pm_put(dev_priv); - return count; + return ret ?: count; } static ssize_t gt_min_freq_mhz_show(struct device *kdev, struct device_attribute *attr, char *buf) @@ -448,14 +448,13 @@ static ssize_t gt_min_freq_mhz_store(struct device *kdev, /* We still need *_set_rps to process the new min_delay and * update the interrupt limits and PMINTRMSK even though * frequency request may be unchanged. */ - intel_set_rps(dev_priv, val); + ret = intel_set_rps(dev_priv, val); mutex_unlock(&dev_priv->rps.hw_lock); intel_runtime_pm_put(dev_priv); - return count; - + return ret ?: count; } static DEVICE_ATTR(gt_act_freq_mhz, S_IRUGO, gt_act_freq_mhz_show, NULL); diff --git a/drivers/gpu/drm/i915/intel_pm.c b/drivers/gpu/drm/i915/intel_pm.c index a04d81aa1fc1..bec4283cc228 100644 --- a/drivers/gpu/drm/i915/intel_pm.c +++ b/drivers/gpu/drm/i915/intel_pm.c @@ -4933,7 +4933,7 @@ static u32 gen6_rps_pm_mask(struct drm_i915_private *dev_priv, u8 val) /* gen6_set_rps is called to update the frequency request, but should also be * called when the range (min_delay and max_delay) is modified so that we can * update the GEN6_RP_INTERRUPT_LIMITS register accordingly. */ -static void gen6_set_rps(struct drm_i915_private *dev_priv, u8 val) +static int gen6_set_rps(struct drm_i915_private *dev_priv, u8 val) { WARN_ON(!mutex_is_locked(&dev_priv->rps.hw_lock)); WARN_ON(val > dev_priv->rps.max_freq); @@ -4968,10 +4968,14 @@ static void gen6_set_rps(struct drm_i915_private *dev_priv, u8 val) dev_priv->rps.cur_freq = val; trace_intel_gpu_freq_change(intel_gpu_freq(dev_priv, val)); + + return 0; } -static void valleyview_set_rps(struct drm_i915_private *dev_priv, u8 val) +static int valleyview_set_rps(struct drm_i915_private *dev_priv, u8 val) { + int err; + WARN_ON(!mutex_is_locked(&dev_priv->rps.hw_lock)); WARN_ON(val > dev_priv->rps.max_freq); WARN_ON(val < dev_priv->rps.min_freq); @@ -4983,13 +4987,18 @@ static void valleyview_set_rps(struct drm_i915_private *dev_priv, u8 val) I915_WRITE(GEN6_PMINTRMSK, gen6_rps_pm_mask(dev_priv, val)); if (val != dev_priv->rps.cur_freq) { - vlv_punit_write(dev_priv, PUNIT_REG_GPU_FREQ_REQ, val); + err = vlv_punit_write(dev_priv, PUNIT_REG_GPU_FREQ_REQ, val); + if (err) + return err; + if (!IS_CHERRYVIEW(dev_priv)) gen6_set_rps_thresholds(dev_priv, val); } dev_priv->rps.cur_freq = val; trace_intel_gpu_freq_change(intel_gpu_freq(dev_priv, val)); + + return 0; } /* vlv_set_rps_idle: Set the frequency to idle, if Gfx clocks are down @@ -5002,6 +5011,7 @@ static void valleyview_set_rps(struct drm_i915_private *dev_priv, u8 val) static void vlv_set_rps_idle(struct drm_i915_private *dev_priv) { u32 val = dev_priv->rps.idle_freq; + int err; if (dev_priv->rps.cur_freq <= val) return; @@ -5019,8 +5029,11 @@ static void vlv_set_rps_idle(struct drm_i915_private *dev_priv) * power than the render powerwell. */ intel_uncore_forcewake_get(dev_priv, FORCEWAKE_MEDIA); - valleyview_set_rps(dev_priv, val); + err = valleyview_set_rps(dev_priv, val); intel_uncore_forcewake_put(dev_priv, FORCEWAKE_MEDIA); + + if (err) + DRM_ERROR("Failed to set RPS for idle\n"); } void gen6_rps_busy(struct drm_i915_private *dev_priv) @@ -5035,10 +5048,11 @@ void gen6_rps_busy(struct drm_i915_private *dev_priv) gen6_enable_rps_interrupts(dev_priv); /* Ensure we start at the user's desired frequency */ - intel_set_rps(dev_priv, - clamp(dev_priv->rps.cur_freq, - dev_priv->rps.min_freq_softlimit, - dev_priv->rps.max_freq_softlimit)); + if (intel_set_rps(dev_priv, + clamp(dev_priv->rps.cur_freq, + dev_priv->rps.min_freq_softlimit, + dev_priv->rps.max_freq_softlimit))) + DRM_DEBUG_DRIVER("Failed to set idle frequency\n"); } mutex_unlock(&dev_priv->rps.hw_lock); } @@ -5106,12 +5120,16 @@ void gen6_rps_boost(struct drm_i915_private *dev_priv, spin_unlock(&dev_priv->rps.client_lock); } -void intel_set_rps(struct drm_i915_private *dev_priv, u8 val) +int intel_set_rps(struct drm_i915_private *dev_priv, u8 val) { + int err; + if (IS_VALLEYVIEW(dev_priv) || IS_CHERRYVIEW(dev_priv)) - valleyview_set_rps(dev_priv, val); + err = valleyview_set_rps(dev_priv, val); else - gen6_set_rps(dev_priv, val); + err = gen6_set_rps(dev_priv, val); + + return err; } static void gen9_disable_rc6(struct drm_i915_private *dev_priv) @@ -5315,7 +5333,7 @@ static void gen6_init_rps_frequencies(struct drm_i915_private *dev_priv) } static void reset_rps(struct drm_i915_private *dev_priv, - void (*set)(struct drm_i915_private *, u8)) + int (*set)(struct drm_i915_private *, u8)) { u8 freq = dev_priv->rps.cur_freq; @@ -5323,7 +5341,8 @@ static void reset_rps(struct drm_i915_private *dev_priv, dev_priv->rps.power = -1; dev_priv->rps.cur_freq = -1; - set(dev_priv, freq); + if (set(dev_priv, freq)) + DRM_ERROR("Failed to reset RPS to initial values\n"); } /* See the Gen9_GT_PM_Programming_Guide doc for the below */ diff --git a/drivers/gpu/drm/i915/intel_sideband.c b/drivers/gpu/drm/i915/intel_sideband.c index 1a840bf92eea..d3d49a09c919 100644 --- a/drivers/gpu/drm/i915/intel_sideband.c +++ b/drivers/gpu/drm/i915/intel_sideband.c @@ -93,14 +93,18 @@ u32 vlv_punit_read(struct drm_i915_private *dev_priv, u32 addr) return val; } -void vlv_punit_write(struct drm_i915_private *dev_priv, u32 addr, u32 val) +int vlv_punit_write(struct drm_i915_private *dev_priv, u32 addr, u32 val) { + int err; + WARN_ON(!mutex_is_locked(&dev_priv->rps.hw_lock)); mutex_lock(&dev_priv->sb_lock); - vlv_sideband_rw(dev_priv, PCI_DEVFN(0, 0), IOSF_PORT_PUNIT, - SB_CRWRDA_NP, addr, &val); + err = vlv_sideband_rw(dev_priv, PCI_DEVFN(0, 0), IOSF_PORT_PUNIT, + SB_CRWRDA_NP, addr, &val); mutex_unlock(&dev_priv->sb_lock); + + return err; } u32 vlv_bunit_read(struct drm_i915_private *dev_priv, u32 reg) -- cgit From ed576a5ca47fbe374cd762cb1fde4bdc41e233b4 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Wed, 25 Jan 2017 13:48:08 +0000 Subject: drm/i915: Also clear the punit's PDATA sideband register As the previous punit i/o may have failed, the contents of the PDATA are undefined. Always clear it to 0 prior to sending the command. Signed-off-by: Chris Wilson Cc: Mika Kuoppala Link: http://patchwork.freedesktop.org/patch/msgid/20170125134808.6152-2-chris@chris-wilson.co.uk Reviewed-by: Mika Kuoppala --- drivers/gpu/drm/i915/intel_sideband.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/drivers/gpu/drm/i915/intel_sideband.c b/drivers/gpu/drm/i915/intel_sideband.c index d3d49a09c919..9f782b5eb6e6 100644 --- a/drivers/gpu/drm/i915/intel_sideband.c +++ b/drivers/gpu/drm/i915/intel_sideband.c @@ -60,8 +60,7 @@ static int vlv_sideband_rw(struct drm_i915_private *dev_priv, u32 devfn, } I915_WRITE(VLV_IOSF_ADDR, addr); - if (!is_read) - I915_WRITE(VLV_IOSF_DATA, *val); + I915_WRITE(VLV_IOSF_DATA, is_read ? 0 : *val); I915_WRITE(VLV_IOSF_DOORBELL_REQ, cmd); if (intel_wait_for_register(dev_priv, @@ -74,7 +73,6 @@ static int vlv_sideband_rw(struct drm_i915_private *dev_priv, u32 devfn, if (is_read) *val = I915_READ(VLV_IOSF_DATA); - I915_WRITE(VLV_IOSF_DATA, 0); return 0; } -- cgit From bc384c77e3bb3df2b396f4fb246b452571896c5e Mon Sep 17 00:00:00 2001 From: Paulo Zanoni Date: Tue, 24 Jan 2017 16:47:22 -0200 Subject: x86/gpu: GLK uses the same GMS values as SKL So don't forget to reserve its stolen memory bits. Cc: Ingo Molnar Cc: H. Peter Anvin Cc: Ander Conselvan de Oliveira Cc: x86@kernel.org Reviewed-by: Ander Conselvan de Oliveira Signed-off-by: Paulo Zanoni Acked-by: Ingo Molnar Signed-off-by: Jani Nikula Link: http://patchwork.freedesktop.org/patch/msgid/1485283642-14401-1-git-send-email-paulo.r.zanoni@intel.com --- arch/x86/kernel/early-quirks.c | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/x86/kernel/early-quirks.c b/arch/x86/kernel/early-quirks.c index 6a08e25a48d8..23c4f1ce0718 100644 --- a/arch/x86/kernel/early-quirks.c +++ b/arch/x86/kernel/early-quirks.c @@ -526,6 +526,7 @@ static const struct pci_device_id intel_early_ids[] __initconst = { INTEL_SKL_IDS(&gen9_early_ops), INTEL_BXT_IDS(&gen9_early_ops), INTEL_KBL_IDS(&gen9_early_ops), + INTEL_GLK_IDS(&gen9_early_ops), }; static void __init -- cgit From 2eebe4f2d5f4c91edc37801d828ba29edfbc7722 Mon Sep 17 00:00:00 2001 From: Jani Nikula Date: Mon, 23 Jan 2017 11:42:59 +0200 Subject: drm/color: un-inline drm_color_lut_extract() The function is not that big, but it's also not used for anything performance critical. Make it a normal function. As a side effect, this apparently makes sparse smarter about what it's doing, and gets rid of the warning: ./include/drm/drm_color_mgmt.h:53:28: warning: shift too big (4294967295) for type unsigned long ./include/drm/drm_color_mgmt.h:53:28: warning: cast truncates bits from constant value (8000000000000000 becomes 0) Cc: Lionel Landwerlin Reviewed-by: Daniel Vetter Reviewed-by: Lionel Landwerlin Signed-off-by: Jani Nikula Link: http://patchwork.freedesktop.org/patch/msgid/1485164579-16250-1-git-send-email-jani.nikula@intel.com --- drivers/gpu/drm/drm_color_mgmt.c | 24 ++++++++++++++++++++++++ include/drm/drm_color_mgmt.h | 27 ++------------------------- 2 files changed, 26 insertions(+), 25 deletions(-) diff --git a/drivers/gpu/drm/drm_color_mgmt.c b/drivers/gpu/drm/drm_color_mgmt.c index 789b4c65cd69..5618f60c7690 100644 --- a/drivers/gpu/drm/drm_color_mgmt.c +++ b/drivers/gpu/drm/drm_color_mgmt.c @@ -87,6 +87,30 @@ * "GAMMA_LUT" property above. */ +/** + * drm_color_lut_extract - clamp&round LUT entries + * @user_input: input value + * @bit_precision: number of bits the hw LUT supports + * + * Extract a degamma/gamma LUT value provided by user (in the form of + * &drm_color_lut entries) and round it to the precision supported by the + * hardware. + */ +uint32_t drm_color_lut_extract(uint32_t user_input, uint32_t bit_precision) +{ + uint32_t val = user_input; + uint32_t max = 0xffff >> (16 - bit_precision); + + /* Round only if we're not using full precision. */ + if (bit_precision < 16) { + val += 1UL << (16 - bit_precision - 1); + val >>= 16 - bit_precision; + } + + return clamp_val(val, 0, max); +} +EXPORT_SYMBOL(drm_color_lut_extract); + /** * drm_crtc_enable_color_mgmt - enable color management properties * @crtc: DRM CRTC diff --git a/include/drm/drm_color_mgmt.h b/include/drm/drm_color_mgmt.h index c767238ac9d5..bce4a532836d 100644 --- a/include/drm/drm_color_mgmt.h +++ b/include/drm/drm_color_mgmt.h @@ -25,6 +25,8 @@ #include +uint32_t drm_color_lut_extract(uint32_t user_input, uint32_t bit_precision); + void drm_crtc_enable_color_mgmt(struct drm_crtc *crtc, uint degamma_lut_size, bool has_ctm, @@ -33,29 +35,4 @@ void drm_crtc_enable_color_mgmt(struct drm_crtc *crtc, int drm_mode_crtc_set_gamma_size(struct drm_crtc *crtc, int gamma_size); -/** - * drm_color_lut_extract - clamp&round LUT entries - * @user_input: input value - * @bit_precision: number of bits the hw LUT supports - * - * Extract a degamma/gamma LUT value provided by user (in the form of - * &drm_color_lut entries) and round it to the precision supported by the - * hardware. - */ -static inline uint32_t drm_color_lut_extract(uint32_t user_input, - uint32_t bit_precision) -{ - uint32_t val = user_input; - uint32_t max = 0xffff >> (16 - bit_precision); - - /* Round only if we're not using full precision. */ - if (bit_precision < 16) { - val += 1UL << (16 - bit_precision - 1); - val >>= 16 - bit_precision; - } - - return clamp_val(val, 0, max); -} - - #endif -- cgit From 920bcd1820a6966b6224f62eadcb4e931bb72e8e Mon Sep 17 00:00:00 2001 From: Paulo Zanoni Date: Thu, 26 Jan 2017 18:19:07 -0200 Subject: drm/i915: make i915_stolen_to_physical() return phys_addr_t The i915_stolen_to_physical() function has 'unsigned long' as its return type but it returns the 'base' variable, which is of type 'u32'. The only place where this function is called assigns the returned value to dev_priv->mm.stolen_base, which is of type 'phys_addr_t'. The return value is actually a physical address and everything else in the stolen memory code seems to be using phys_addr_t, so fix i915_stolen_to_physical() to use phys_addr_t. v2: Add missing blank lines after declarations (Chris, checkpatch.pl). Reviewed-by: Chris Wilson Signed-off-by: Paulo Zanoni Link: http://patchwork.freedesktop.org/patch/msgid/1485461947-16030-1-git-send-email-paulo.r.zanoni@intel.com --- drivers/gpu/drm/i915/i915_gem_stolen.c | 18 +++++++++++------- 1 file changed, 11 insertions(+), 7 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_gem_stolen.c b/drivers/gpu/drm/i915/i915_gem_stolen.c index 127d698e7c84..729dba017b78 100644 --- a/drivers/gpu/drm/i915/i915_gem_stolen.c +++ b/drivers/gpu/drm/i915/i915_gem_stolen.c @@ -79,12 +79,12 @@ void i915_gem_stolen_remove_node(struct drm_i915_private *dev_priv, mutex_unlock(&dev_priv->mm.stolen_lock); } -static unsigned long i915_stolen_to_physical(struct drm_i915_private *dev_priv) +static phys_addr_t i915_stolen_to_physical(struct drm_i915_private *dev_priv) { struct pci_dev *pdev = dev_priv->drm.pdev; struct i915_ggtt *ggtt = &dev_priv->ggtt; struct resource *r; - u32 base; + phys_addr_t base; /* Almost universally we can find the Graphics Base of Stolen Memory * at register BSM (0x5c) in the igfx configuration space. On a few @@ -196,7 +196,7 @@ static unsigned long i915_stolen_to_physical(struct drm_i915_private *dev_priv) if (INTEL_GEN(dev_priv) <= 4 && !IS_G33(dev_priv) && !IS_PINEVIEW(dev_priv) && !IS_G4X(dev_priv)) { struct { - u32 start, end; + phys_addr_t start, end; } stolen[2] = { { .start = base, .end = base + ggtt->stolen_size, }, { .start = base, .end = base + ggtt->stolen_size, }, @@ -228,11 +228,13 @@ static unsigned long i915_stolen_to_physical(struct drm_i915_private *dev_priv) if (stolen[0].start != stolen[1].start || stolen[0].end != stolen[1].end) { + phys_addr_t end = base + ggtt->stolen_size - 1; + DRM_DEBUG_KMS("GTT within stolen memory at 0x%llx-0x%llx\n", (unsigned long long)ggtt_start, (unsigned long long)ggtt_end - 1); - DRM_DEBUG_KMS("Stolen memory adjusted to 0x%x-0x%x\n", - base, base + (u32)ggtt->stolen_size - 1); + DRM_DEBUG_KMS("Stolen memory adjusted to %pa - %pa\n", + &base, &end); } } @@ -261,8 +263,10 @@ static unsigned long i915_stolen_to_physical(struct drm_i915_private *dev_priv) * range. Apparently this works. */ if (r == NULL && !IS_GEN3(dev_priv)) { - DRM_ERROR("conflict detected with stolen region: [0x%08x - 0x%08x]\n", - base, base + (uint32_t)ggtt->stolen_size); + phys_addr_t end = base + ggtt->stolen_size; + + DRM_ERROR("conflict detected with stolen region: [%pa - %pa]\n", + &base, &end); base = 0; } } -- cgit From 05c41f926fcc7ef838c80a6a99d84f67b4e0b824 Mon Sep 17 00:00:00 2001 From: Andrey Ryabinin Date: Thu, 26 Jan 2017 17:32:11 +0300 Subject: drm/i915: fix use-after-free in page_flip_completed() page_flip_completed() dereferences 'work' variable after executing queue_work(). This is not safe as the 'work' item might be already freed by queued work: BUG: KASAN: use-after-free in page_flip_completed+0x3ff/0x490 at addr ffff8803dc010f90 Call Trace: __asan_report_load8_noabort+0x59/0x80 page_flip_completed+0x3ff/0x490 intel_finish_page_flip_mmio+0xe3/0x130 intel_pipe_handle_vblank+0x2d/0x40 gen8_irq_handler+0x4a7/0xed0 __handle_irq_event_percpu+0xf6/0x860 handle_irq_event_percpu+0x6b/0x160 handle_irq_event+0xc7/0x1b0 handle_edge_irq+0x1f4/0xa50 handle_irq+0x41/0x70 do_IRQ+0x9a/0x200 common_interrupt+0x89/0x89 Freed: kfree+0x113/0x4d0 intel_unpin_work_fn+0x29a/0x3b0 process_one_work+0x79e/0x1b70 worker_thread+0x611/0x1460 kthread+0x241/0x3a0 ret_from_fork+0x27/0x40 Move queue_work() after trace_i915_flip_complete() to fix this. Fixes: e5510fac98a7 ("drm/i915: add tracepoints for flip requests & completions") Signed-off-by: Andrey Ryabinin Cc: # v2.6.36+ Reviewed-by: Chris Wilson Signed-off-by: Daniel Vetter Link: http://patchwork.freedesktop.org/patch/msgid/20170126143211.24013-1-aryabinin@virtuozzo.com --- drivers/gpu/drm/i915/intel_display.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c index ef65b4be5e56..37f71e992e63 100644 --- a/drivers/gpu/drm/i915/intel_display.c +++ b/drivers/gpu/drm/i915/intel_display.c @@ -4253,10 +4253,10 @@ static void page_flip_completed(struct intel_crtc *intel_crtc) drm_crtc_vblank_put(&intel_crtc->base); wake_up_all(&dev_priv->pending_flip_queue); - queue_work(dev_priv->wq, &work->unpin_work); - trace_i915_flip_complete(intel_crtc->plane, work->pending_flip_obj); + + queue_work(dev_priv->wq, &work->unpin_work); } static int intel_crtc_wait_for_pending_flips(struct drm_crtc *crtc) -- cgit From 40f62bb75e354dddfd8d7b6d7b96c4ab93cb00f1 Mon Sep 17 00:00:00 2001 From: Jani Nikula Date: Fri, 27 Jan 2017 16:11:29 +0200 Subject: Revert "drm/color: un-inline drm_color_lut_extract()" This reverts commit 2eebe4f2d5f4c91edc37801d828ba29edfbc7722, because I screwed up and applied it to the wrong branch. Signed-off-by: Jani Nikula --- drivers/gpu/drm/drm_color_mgmt.c | 24 ------------------------ include/drm/drm_color_mgmt.h | 27 +++++++++++++++++++++++++-- 2 files changed, 25 insertions(+), 26 deletions(-) diff --git a/drivers/gpu/drm/drm_color_mgmt.c b/drivers/gpu/drm/drm_color_mgmt.c index 5618f60c7690..789b4c65cd69 100644 --- a/drivers/gpu/drm/drm_color_mgmt.c +++ b/drivers/gpu/drm/drm_color_mgmt.c @@ -87,30 +87,6 @@ * "GAMMA_LUT" property above. */ -/** - * drm_color_lut_extract - clamp&round LUT entries - * @user_input: input value - * @bit_precision: number of bits the hw LUT supports - * - * Extract a degamma/gamma LUT value provided by user (in the form of - * &drm_color_lut entries) and round it to the precision supported by the - * hardware. - */ -uint32_t drm_color_lut_extract(uint32_t user_input, uint32_t bit_precision) -{ - uint32_t val = user_input; - uint32_t max = 0xffff >> (16 - bit_precision); - - /* Round only if we're not using full precision. */ - if (bit_precision < 16) { - val += 1UL << (16 - bit_precision - 1); - val >>= 16 - bit_precision; - } - - return clamp_val(val, 0, max); -} -EXPORT_SYMBOL(drm_color_lut_extract); - /** * drm_crtc_enable_color_mgmt - enable color management properties * @crtc: DRM CRTC diff --git a/include/drm/drm_color_mgmt.h b/include/drm/drm_color_mgmt.h index bce4a532836d..c767238ac9d5 100644 --- a/include/drm/drm_color_mgmt.h +++ b/include/drm/drm_color_mgmt.h @@ -25,8 +25,6 @@ #include -uint32_t drm_color_lut_extract(uint32_t user_input, uint32_t bit_precision); - void drm_crtc_enable_color_mgmt(struct drm_crtc *crtc, uint degamma_lut_size, bool has_ctm, @@ -35,4 +33,29 @@ void drm_crtc_enable_color_mgmt(struct drm_crtc *crtc, int drm_mode_crtc_set_gamma_size(struct drm_crtc *crtc, int gamma_size); +/** + * drm_color_lut_extract - clamp&round LUT entries + * @user_input: input value + * @bit_precision: number of bits the hw LUT supports + * + * Extract a degamma/gamma LUT value provided by user (in the form of + * &drm_color_lut entries) and round it to the precision supported by the + * hardware. + */ +static inline uint32_t drm_color_lut_extract(uint32_t user_input, + uint32_t bit_precision) +{ + uint32_t val = user_input; + uint32_t max = 0xffff >> (16 - bit_precision); + + /* Round only if we're not using full precision. */ + if (bit_precision < 16) { + val += 1UL << (16 - bit_precision - 1); + val >>= 16 - bit_precision; + } + + return clamp_val(val, 0, max); +} + + #endif -- cgit From 77ae9957897df86e627089688265e0db029dd0df Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Fri, 27 Jan 2017 09:40:07 +0000 Subject: drm/i915: Enable userspace to opt-out of implicit fencing Userspace is faced with a dilemma. The kernel requires implicit fencing to manage resource usage (we always must wait for the GPU to finish before releasing its PTE) and for third parties. However, userspace may wish to avoid this serialisation if it is either using explicit fencing between parties and wants more fine-grained access to buffers (e.g. it may partition the buffer between uses and track fences on ranges rather than the implicit fences tracking the whole object). It follows that userspace needs a mechanism to avoid the kernel's serialisation on its implicit fences before execbuf execution. The next question is whether this is an object, execbuf or context flag. Hybrid users (such as using explicit EGL_ANDROID_native_sync fencing on shared winsys buffers, but implicit fencing on internal surfaces) require a per-object level flag. Given that this flag need to be only set once for the lifetime of the object, this reduces the convenience of having an execbuf or context level flag (and avoids having multiple pieces of uABI controlling the same feature). Incorrect use of this flag will result in rendering corruption and GPU hangs - but will not result in use-after-free or similar resource tracking issues. Serious caveat: write ordering is not strictly correct after setting this flag on a render target on multiple engines. This affects all subsequent GEM operations (execbuf, set-domain, pread) and shared dma-buf operations. A fix is possible - but costly (both in terms of further ABI changes and runtime overhead). Testcase: igt/gem_exec_async Signed-off-by: Chris Wilson Reviewed-by: Joonas Lahtinen Acked-by: Chad Versace Link: http://patchwork.freedesktop.org/patch/msgid/20170127094008.27489-1-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/i915_drv.c | 1 + drivers/gpu/drm/i915/i915_gem_execbuffer.c | 3 +++ include/uapi/drm/i915_drm.h | 29 ++++++++++++++++++++++++++++- 3 files changed, 32 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/i915/i915_drv.c b/drivers/gpu/drm/i915/i915_drv.c index 507a6f69426f..6c9c2037fadf 100644 --- a/drivers/gpu/drm/i915/i915_drv.c +++ b/drivers/gpu/drm/i915/i915_drv.c @@ -349,6 +349,7 @@ static int i915_getparam(struct drm_device *dev, void *data, case I915_PARAM_HAS_EXEC_HANDLE_LUT: case I915_PARAM_HAS_COHERENT_PHYS_GTT: case I915_PARAM_HAS_EXEC_SOFTPIN: + case I915_PARAM_HAS_EXEC_ASYNC: /* For the time being all of these are always true; * if some supported hardware does not have one of these * features this value needs to be provided from diff --git a/drivers/gpu/drm/i915/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/i915_gem_execbuffer.c index c66e90571031..6fd60682bf93 100644 --- a/drivers/gpu/drm/i915/i915_gem_execbuffer.c +++ b/drivers/gpu/drm/i915/i915_gem_execbuffer.c @@ -1111,6 +1111,9 @@ i915_gem_execbuffer_move_to_gpu(struct drm_i915_gem_request *req, list_for_each_entry(vma, vmas, exec_list) { struct drm_i915_gem_object *obj = vma->obj; + if (vma->exec_entry->flags & EXEC_OBJECT_ASYNC) + continue; + ret = i915_gem_request_await_object (req, obj, obj->base.pending_write_domain); if (ret) diff --git a/include/uapi/drm/i915_drm.h b/include/uapi/drm/i915_drm.h index 57093b455db6..6620b6ad76ed 100644 --- a/include/uapi/drm/i915_drm.h +++ b/include/uapi/drm/i915_drm.h @@ -397,6 +397,12 @@ typedef struct drm_i915_irq_wait { #define I915_PARAM_HAS_SCHEDULER 41 #define I915_PARAM_HUC_STATUS 42 +/* Query whether DRM_I915_GEM_EXECBUFFER2 supports the ability to opt-out of + * synchronisation with implicit fencing on individual objects. + * See EXEC_OBJECT_ASYNC. + */ +#define I915_PARAM_HAS_EXEC_ASYNC 43 + typedef struct drm_i915_getparam { __s32 param; /* @@ -737,8 +743,29 @@ struct drm_i915_gem_exec_object2 { #define EXEC_OBJECT_SUPPORTS_48B_ADDRESS (1<<3) #define EXEC_OBJECT_PINNED (1<<4) #define EXEC_OBJECT_PAD_TO_SIZE (1<<5) +/* The kernel implicitly tracks GPU activity on all GEM objects, and + * synchronises operations with outstanding rendering. This includes + * rendering on other devices if exported via dma-buf. However, sometimes + * this tracking is too coarse and the user knows better. For example, + * if the object is split into non-overlapping ranges shared between different + * clients or engines (i.e. suballocating objects), the implicit tracking + * by kernel assumes that each operation affects the whole object rather + * than an individual range, causing needless synchronisation between clients. + * The kernel will also forgo any CPU cache flushes prior to rendering from + * the object as the client is expected to be also handling such domain + * tracking. + * + * The kernel maintains the implicit tracking in order to manage resources + * used by the GPU - this flag only disables the synchronisation prior to + * rendering with this object in this execbuf. + * + * Opting out of implicit synhronisation requires the user to do its own + * explicit tracking to avoid rendering corruption. See, for example, + * I915_PARAM_HAS_EXEC_FENCE to order execbufs and execute them asynchronously. + */ +#define EXEC_OBJECT_ASYNC (1<<6) /* All remaining bits are MBZ and RESERVED FOR FUTURE USE */ -#define __EXEC_OBJECT_UNKNOWN_FLAGS -(EXEC_OBJECT_PAD_TO_SIZE<<1) +#define __EXEC_OBJECT_UNKNOWN_FLAGS -(EXEC_OBJECT_ASYNC<<1) __u64 flags; union { -- cgit From fec0445caa273209d2809760ac7c63e743d6f512 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Fri, 27 Jan 2017 09:40:08 +0000 Subject: drm/i915: Support explicit fencing for execbuf Now that the user can opt-out of implicit fencing, we need to give them back control over the fencing. We employ sync_file to wrap our drm_i915_gem_request and provide an fd that userspace can merge with other sync_file fds and pass back to the kernel to wait upon before future execution. Testcase: igt/gem_exec_fence Signed-off-by: Chris Wilson Reviewed-by: Joonas Lahtinen Acked-by: Chad Versace Link: http://patchwork.freedesktop.org/patch/msgid/20170127094008.27489-2-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/Kconfig | 1 + drivers/gpu/drm/i915/i915_drv.c | 3 +- drivers/gpu/drm/i915/i915_gem_execbuffer.c | 54 +++++++++++++++++++++++++++--- include/uapi/drm/i915_drm.h | 36 +++++++++++++++++++- 4 files changed, 87 insertions(+), 7 deletions(-) diff --git a/drivers/gpu/drm/i915/Kconfig b/drivers/gpu/drm/i915/Kconfig index 183f5dc1c3f2..1ae0bb91ee60 100644 --- a/drivers/gpu/drm/i915/Kconfig +++ b/drivers/gpu/drm/i915/Kconfig @@ -19,6 +19,7 @@ config DRM_I915 select INPUT if ACPI select ACPI_VIDEO if ACPI select ACPI_BUTTON if ACPI + select SYNC_FILE help Choose this option if you have a system that has "Intel Graphics Media Accelerator" or "HD Graphics" integrated graphics, diff --git a/drivers/gpu/drm/i915/i915_drv.c b/drivers/gpu/drm/i915/i915_drv.c index 6c9c2037fadf..3aa5bf58cf32 100644 --- a/drivers/gpu/drm/i915/i915_drv.c +++ b/drivers/gpu/drm/i915/i915_drv.c @@ -350,6 +350,7 @@ static int i915_getparam(struct drm_device *dev, void *data, case I915_PARAM_HAS_COHERENT_PHYS_GTT: case I915_PARAM_HAS_EXEC_SOFTPIN: case I915_PARAM_HAS_EXEC_ASYNC: + case I915_PARAM_HAS_EXEC_FENCE: /* For the time being all of these are always true; * if some supported hardware does not have one of these * features this value needs to be provided from @@ -2550,7 +2551,7 @@ static const struct drm_ioctl_desc i915_ioctls[] = { DRM_IOCTL_DEF_DRV(I915_HWS_ADDR, drm_noop, DRM_AUTH|DRM_MASTER|DRM_ROOT_ONLY), DRM_IOCTL_DEF_DRV(I915_GEM_INIT, drm_noop, DRM_AUTH|DRM_MASTER|DRM_ROOT_ONLY), DRM_IOCTL_DEF_DRV(I915_GEM_EXECBUFFER, i915_gem_execbuffer, DRM_AUTH), - DRM_IOCTL_DEF_DRV(I915_GEM_EXECBUFFER2, i915_gem_execbuffer2, DRM_AUTH|DRM_RENDER_ALLOW), + DRM_IOCTL_DEF_DRV(I915_GEM_EXECBUFFER2_WR, i915_gem_execbuffer2, DRM_AUTH|DRM_RENDER_ALLOW), DRM_IOCTL_DEF_DRV(I915_GEM_PIN, i915_gem_reject_pin_ioctl, DRM_AUTH|DRM_ROOT_ONLY), DRM_IOCTL_DEF_DRV(I915_GEM_UNPIN, i915_gem_reject_pin_ioctl, DRM_AUTH|DRM_ROOT_ONLY), DRM_IOCTL_DEF_DRV(I915_GEM_BUSY, i915_gem_busy_ioctl, DRM_AUTH|DRM_RENDER_ALLOW), diff --git a/drivers/gpu/drm/i915/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/i915_gem_execbuffer.c index 6fd60682bf93..91c2393199a3 100644 --- a/drivers/gpu/drm/i915/i915_gem_execbuffer.c +++ b/drivers/gpu/drm/i915/i915_gem_execbuffer.c @@ -28,6 +28,7 @@ #include #include +#include #include #include @@ -1595,6 +1596,9 @@ i915_gem_do_execbuffer(struct drm_device *dev, void *data, struct i915_execbuffer_params *params = ¶ms_master; const u32 ctx_id = i915_execbuffer2_get_context_id(*args); u32 dispatch_flags; + struct dma_fence *in_fence = NULL; + struct sync_file *out_fence = NULL; + int out_fence_fd = -1; int ret; bool need_relocs; @@ -1638,6 +1642,23 @@ i915_gem_do_execbuffer(struct drm_device *dev, void *data, dispatch_flags |= I915_DISPATCH_RS; } + if (args->flags & I915_EXEC_FENCE_IN) { + in_fence = sync_file_get_fence(lower_32_bits(args->rsvd2)); + if (!in_fence) { + ret = -EINVAL; + goto pre_mutex_err; + } + } + + if (args->flags & I915_EXEC_FENCE_OUT) { + out_fence_fd = get_unused_fd_flags(O_CLOEXEC); + if (out_fence_fd < 0) { + ret = out_fence_fd; + out_fence_fd = -1; + goto pre_mutex_err; + } + } + /* Take a local wakeref for preparing to dispatch the execbuf as * we expect to access the hardware fairly frequently in the * process. Upon first dispatch, we acquire another prolonged @@ -1782,6 +1803,21 @@ i915_gem_do_execbuffer(struct drm_device *dev, void *data, goto err_batch_unpin; } + if (in_fence) { + ret = i915_gem_request_await_dma_fence(params->request, + in_fence); + if (ret < 0) + goto err_request; + } + + if (out_fence_fd != -1) { + out_fence = sync_file_create(¶ms->request->fence); + if (!out_fence) { + ret = -ENOMEM; + goto err_request; + } + } + /* Whilst this request exists, batch_obj will be on the * active_list, and so will hold the active reference. Only when this * request is retired will the the batch_obj be moved onto the @@ -1809,6 +1845,16 @@ i915_gem_do_execbuffer(struct drm_device *dev, void *data, ret = execbuf_submit(params, args, &eb->vmas); err_request: __i915_add_request(params->request, ret == 0); + if (out_fence) { + if (ret == 0) { + fd_install(out_fence_fd, out_fence->file); + args->rsvd2 &= GENMASK_ULL(0, 31); /* keep in-fence */ + args->rsvd2 |= (u64)out_fence_fd << 32; + out_fence_fd = -1; + } else { + fput(out_fence->file); + } + } err_batch_unpin: /* @@ -1830,6 +1876,9 @@ pre_mutex_err: /* intel_gpu_busy should also get a ref, so it will free when the device * is really idle. */ intel_runtime_pm_put(dev_priv); + if (out_fence_fd != -1) + put_unused_fd(out_fence_fd); + dma_fence_put(in_fence); return ret; } @@ -1937,11 +1986,6 @@ i915_gem_execbuffer2(struct drm_device *dev, void *data, return -EINVAL; } - if (args->rsvd2 != 0) { - DRM_DEBUG("dirty rvsd2 field\n"); - return -EINVAL; - } - exec2_list = drm_malloc_gfp(args->buffer_count, sizeof(*exec2_list), GFP_TEMPORARY); diff --git a/include/uapi/drm/i915_drm.h b/include/uapi/drm/i915_drm.h index 6620b6ad76ed..3554495bef13 100644 --- a/include/uapi/drm/i915_drm.h +++ b/include/uapi/drm/i915_drm.h @@ -246,6 +246,7 @@ typedef struct _drm_i915_sarea { #define DRM_I915_OVERLAY_PUT_IMAGE 0x27 #define DRM_I915_OVERLAY_ATTRS 0x28 #define DRM_I915_GEM_EXECBUFFER2 0x29 +#define DRM_I915_GEM_EXECBUFFER2_WR DRM_I915_GEM_EXECBUFFER2 #define DRM_I915_GET_SPRITE_COLORKEY 0x2a #define DRM_I915_SET_SPRITE_COLORKEY 0x2b #define DRM_I915_GEM_WAIT 0x2c @@ -280,6 +281,7 @@ typedef struct _drm_i915_sarea { #define DRM_IOCTL_I915_GEM_INIT DRM_IOW(DRM_COMMAND_BASE + DRM_I915_GEM_INIT, struct drm_i915_gem_init) #define DRM_IOCTL_I915_GEM_EXECBUFFER DRM_IOW(DRM_COMMAND_BASE + DRM_I915_GEM_EXECBUFFER, struct drm_i915_gem_execbuffer) #define DRM_IOCTL_I915_GEM_EXECBUFFER2 DRM_IOW(DRM_COMMAND_BASE + DRM_I915_GEM_EXECBUFFER2, struct drm_i915_gem_execbuffer2) +#define DRM_IOCTL_I915_GEM_EXECBUFFER2_WR DRM_IOWR(DRM_COMMAND_BASE + DRM_I915_GEM_EXECBUFFER2_WR, struct drm_i915_gem_execbuffer2) #define DRM_IOCTL_I915_GEM_PIN DRM_IOWR(DRM_COMMAND_BASE + DRM_I915_GEM_PIN, struct drm_i915_gem_pin) #define DRM_IOCTL_I915_GEM_UNPIN DRM_IOW(DRM_COMMAND_BASE + DRM_I915_GEM_UNPIN, struct drm_i915_gem_unpin) #define DRM_IOCTL_I915_GEM_BUSY DRM_IOWR(DRM_COMMAND_BASE + DRM_I915_GEM_BUSY, struct drm_i915_gem_busy) @@ -403,6 +405,13 @@ typedef struct drm_i915_irq_wait { */ #define I915_PARAM_HAS_EXEC_ASYNC 43 +/* Query whether DRM_I915_GEM_EXECBUFFER2 supports explicit fence support - + * both being able to pass in a sync_file fd to wait upon before executing, + * and being able to return a new sync_file fd that is signaled when the + * current request is complete. See I915_EXEC_FENCE_IN and I915_EXEC_FENCE_OUT. + */ +#define I915_PARAM_HAS_EXEC_FENCE 44 + typedef struct drm_i915_getparam { __s32 param; /* @@ -855,7 +864,32 @@ struct drm_i915_gem_execbuffer2 { */ #define I915_EXEC_RESOURCE_STREAMER (1<<15) -#define __I915_EXEC_UNKNOWN_FLAGS -(I915_EXEC_RESOURCE_STREAMER<<1) +/* Setting I915_EXEC_FENCE_IN implies that lower_32_bits(rsvd2) represent + * a sync_file fd to wait upon (in a nonblocking manner) prior to executing + * the batch. + * + * Returns -EINVAL if the sync_file fd cannot be found. + */ +#define I915_EXEC_FENCE_IN (1<<16) + +/* Setting I915_EXEC_FENCE_OUT causes the ioctl to return a sync_file fd + * in the upper_32_bits(rsvd2) upon success. Ownership of the fd is given + * to the caller, and it should be close() after use. (The fd is a regular + * file descriptor and will be cleaned up on process termination. It holds + * a reference to the request, but nothing else.) + * + * The sync_file fd can be combined with other sync_file and passed either + * to execbuf using I915_EXEC_FENCE_IN, to atomic KMS ioctls (so that a flip + * will only occur after this request completes), or to other devices. + * + * Using I915_EXEC_FENCE_OUT requires use of + * DRM_IOCTL_I915_GEM_EXECBUFFER2_WR ioctl so that the result is written + * back to userspace. Failure to do so will cause the out-fence to always + * be reported as zero, and the real fence fd to be leaked. + */ +#define I915_EXEC_FENCE_OUT (1<<17) + +#define __I915_EXEC_UNKNOWN_FLAGS (-(I915_EXEC_FENCE_OUT<<1)) #define I915_EXEC_CONTEXT_ID_MASK (0xffffffff) #define i915_execbuffer2_set_context_id(eb2, context) \ -- cgit From 9fb5026f857dc3145cf13eedabadb28ed028d093 Mon Sep 17 00:00:00 2001 From: Ander Conselvan de Oliveira Date: Thu, 26 Jan 2017 11:16:58 +0200 Subject: drm/i915/glk: Turn on workarounds that apply to Geminilake too Apply workarounds to Geminilake, and annotate those that are applied unconditionally when they apply to GLK based on the workaround database. v2: Fix commit message typos. (David) v3: Rebase. Cc: David Weinehall Cc: Rodrigo Vivi Signed-off-by: Ander Conselvan de Oliveira Reviewed-by: David Weinehall Link: http://patchwork.freedesktop.org/patch/msgid/1485422218-9102-1-git-send-email-ander.conselvan.de.oliveira@intel.com --- drivers/gpu/drm/i915/i915_gem_gtt.c | 4 ++-- drivers/gpu/drm/i915/intel_lrc.c | 6 +++--- drivers/gpu/drm/i915/intel_mocs.c | 2 +- drivers/gpu/drm/i915/intel_pm.c | 23 ++++++++++++++++---- drivers/gpu/drm/i915/intel_ringbuffer.c | 37 +++++++++++++++++++++++++-------- 5 files changed, 53 insertions(+), 19 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c b/drivers/gpu/drm/i915/i915_gem_gtt.c index 40685c68260b..048040efc3f0 100644 --- a/drivers/gpu/drm/i915/i915_gem_gtt.c +++ b/drivers/gpu/drm/i915/i915_gem_gtt.c @@ -2184,14 +2184,14 @@ static void gtt_write_workarounds(struct drm_i915_private *dev_priv) * called on driver load and after a GPU reset, so you can place * workarounds here even if they get overwritten by GPU reset. */ - /* WaIncreaseDefaultTLBEntries:chv,bdw,skl,bxt,kbl */ + /* WaIncreaseDefaultTLBEntries:chv,bdw,skl,bxt,kbl,glk */ if (IS_BROADWELL(dev_priv)) I915_WRITE(GEN8_L3_LRA_1_GPGPU, GEN8_L3_LRA_1_GPGPU_DEFAULT_VALUE_BDW); else if (IS_CHERRYVIEW(dev_priv)) I915_WRITE(GEN8_L3_LRA_1_GPGPU, GEN8_L3_LRA_1_GPGPU_DEFAULT_VALUE_CHV); else if (IS_GEN9_BC(dev_priv)) I915_WRITE(GEN8_L3_LRA_1_GPGPU, GEN9_L3_LRA_1_GPGPU_DEFAULT_VALUE_SKL); - else if (IS_BROXTON(dev_priv)) + else if (IS_GEN9_LP(dev_priv)) I915_WRITE(GEN8_L3_LRA_1_GPGPU, GEN9_L3_LRA_1_GPGPU_DEFAULT_VALUE_BXT); } diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c index bee9d565b8f3..0e7b950bceac 100644 --- a/drivers/gpu/drm/i915/intel_lrc.c +++ b/drivers/gpu/drm/i915/intel_lrc.c @@ -1101,13 +1101,13 @@ static int gen9_init_indirectctx_bb(struct intel_engine_cs *engine, struct drm_i915_private *dev_priv = engine->i915; uint32_t index = wa_ctx_start(wa_ctx, *offset, CACHELINE_DWORDS); - /* WaFlushCoherentL3CacheLinesAtContextSwitch:skl,bxt */ + /* WaFlushCoherentL3CacheLinesAtContextSwitch:skl,bxt,glk */ ret = gen8_emit_flush_coherentl3_wa(engine, batch, index); if (ret < 0) return ret; index = ret; - /* WaDisableGatherAtSetShaderCommonSlice:skl,bxt,kbl */ + /* WaDisableGatherAtSetShaderCommonSlice:skl,bxt,kbl,glk */ wa_ctx_emit(batch, index, MI_LOAD_REGISTER_IMM(1)); wa_ctx_emit_reg(batch, index, COMMON_SLICE_CHICKEN2); wa_ctx_emit(batch, index, _MASKED_BIT_DISABLE( @@ -1131,7 +1131,7 @@ static int gen9_init_indirectctx_bb(struct intel_engine_cs *engine, wa_ctx_emit(batch, index, 0); } - /* WaMediaPoolStateCmdInWABB:bxt */ + /* WaMediaPoolStateCmdInWABB:bxt,glk */ if (HAS_POOLED_EU(engine->i915)) { /* * EU pool configuration is setup along with golden context diff --git a/drivers/gpu/drm/i915/intel_mocs.c b/drivers/gpu/drm/i915/intel_mocs.c index 8f98fc714fe9..773e36253e7c 100644 --- a/drivers/gpu/drm/i915/intel_mocs.c +++ b/drivers/gpu/drm/i915/intel_mocs.c @@ -191,7 +191,7 @@ static bool get_mocs_settings(struct drm_i915_private *dev_priv, "Platform that should have a MOCS table does not.\n"); } - /* WaDisableSkipCaching:skl,bxt,kbl */ + /* WaDisableSkipCaching:skl,bxt,kbl,glk */ if (IS_GEN9(dev_priv)) { int i; diff --git a/drivers/gpu/drm/i915/intel_pm.c b/drivers/gpu/drm/i915/intel_pm.c index bec4283cc228..ec16f3d6dd2e 100644 --- a/drivers/gpu/drm/i915/intel_pm.c +++ b/drivers/gpu/drm/i915/intel_pm.c @@ -65,12 +65,12 @@ static void gen9_init_clock_gating(struct drm_i915_private *dev_priv) I915_WRITE(GEN8_CONFIG0, I915_READ(GEN8_CONFIG0) | GEN9_DEFAULT_FIXES); - /* WaEnableChickenDCPR:skl,bxt,kbl */ + /* WaEnableChickenDCPR:skl,bxt,kbl,glk */ I915_WRITE(GEN8_CHICKEN_DCPR_1, I915_READ(GEN8_CHICKEN_DCPR_1) | MASK_WAKEMEM); /* WaFbcTurnOffFbcWatermark:skl,bxt,kbl */ - /* WaFbcWakeMemOn:skl,bxt,kbl */ + /* WaFbcWakeMemOn:skl,bxt,kbl,glk */ I915_WRITE(DISP_ARB_CTL, I915_READ(DISP_ARB_CTL) | DISP_FBC_WM_DIS | DISP_FBC_MEMORY_WAKE); @@ -104,6 +104,19 @@ static void bxt_init_clock_gating(struct drm_i915_private *dev_priv) PWM1_GATING_DIS | PWM2_GATING_DIS); } +static void glk_init_clock_gating(struct drm_i915_private *dev_priv) +{ + gen9_init_clock_gating(dev_priv); + + /* + * WaDisablePWMClockGating:glk + * Backlight PWM may stop in the asserted state, causing backlight + * to stay fully on. + */ + I915_WRITE(GEN9_CLKGATE_DIS_0, I915_READ(GEN9_CLKGATE_DIS_0) | + PWM1_GATING_DIS | PWM2_GATING_DIS); +} + static void i915_pineview_get_mem_freq(struct drm_i915_private *dev_priv) { u32 tmp; @@ -2173,7 +2186,7 @@ static void intel_read_wm_latency(struct drm_i915_private *dev_priv, } /* - * WaWmMemoryReadLatency:skl + * WaWmMemoryReadLatency:skl,glk * * punit doesn't take into account the read latency so we need * to add 2us to the various latency levels we retrieve from the @@ -7645,8 +7658,10 @@ void intel_init_clock_gating_hooks(struct drm_i915_private *dev_priv) dev_priv->display.init_clock_gating = skylake_init_clock_gating; else if (IS_KABYLAKE(dev_priv)) dev_priv->display.init_clock_gating = kabylake_init_clock_gating; - else if (IS_GEN9_LP(dev_priv)) + else if (IS_BROXTON(dev_priv)) dev_priv->display.init_clock_gating = bxt_init_clock_gating; + else if (IS_GEMINILAKE(dev_priv)) + dev_priv->display.init_clock_gating = glk_init_clock_gating; else if (IS_BROADWELL(dev_priv)) dev_priv->display.init_clock_gating = broadwell_init_clock_gating; else if (IS_CHERRYVIEW(dev_priv)) diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c b/drivers/gpu/drm/i915/intel_ringbuffer.c index 69035e4f9b3b..d32cbba25d98 100644 --- a/drivers/gpu/drm/i915/intel_ringbuffer.c +++ b/drivers/gpu/drm/i915/intel_ringbuffer.c @@ -812,10 +812,10 @@ static int gen9_init_workarounds(struct intel_engine_cs *engine) struct drm_i915_private *dev_priv = engine->i915; int ret; - /* WaConextSwitchWithConcurrentTLBInvalidate:skl,bxt,kbl */ + /* WaConextSwitchWithConcurrentTLBInvalidate:skl,bxt,kbl,glk */ I915_WRITE(GEN9_CSFE_CHICKEN1_RCS, _MASKED_BIT_ENABLE(GEN9_PREEMPT_GPGPU_SYNC_SWITCH_DISABLE)); - /* WaEnableLbsSlaRetryTimerDecrement:skl,bxt,kbl */ + /* WaEnableLbsSlaRetryTimerDecrement:skl,bxt,kbl,glk */ I915_WRITE(BDW_SCRATCH1, I915_READ(BDW_SCRATCH1) | GEN9_LBS_SLA_RETRY_TIMER_DECREMENT_ENABLE); @@ -823,8 +823,8 @@ static int gen9_init_workarounds(struct intel_engine_cs *engine) I915_WRITE(GAM_ECOCHK, I915_READ(GAM_ECOCHK) | ECOCHK_DIS_TLB); - /* WaClearFlowControlGpgpuContextSave:skl,bxt,kbl */ - /* WaDisablePartialInstShootdown:skl,bxt,kbl */ + /* WaClearFlowControlGpgpuContextSave:skl,bxt,kbl,glk */ + /* WaDisablePartialInstShootdown:skl,bxt,kbl,glk */ WA_SET_BIT_MASKED(GEN8_ROW_CHICKEN, FLOW_CONTROL_ENABLE | PARTIAL_INSTRUCTION_SHOOTDOWN_DISABLE); @@ -853,12 +853,12 @@ static int gen9_init_workarounds(struct intel_engine_cs *engine) WA_SET_BIT_MASKED(GEN9_HALF_SLICE_CHICKEN7, GEN9_ENABLE_GPGPU_PREEMPTION); - /* Wa4x4STCOptimizationDisable:skl,bxt,kbl */ + /* Wa4x4STCOptimizationDisable:skl,bxt,kbl,glk */ /* WaDisablePartialResolveInVc:skl,bxt,kbl */ WA_SET_BIT_MASKED(CACHE_MODE_1, (GEN8_4x4_STC_OPTIMIZATION_DISABLE | GEN9_PARTIAL_RESOLVE_IN_VC_DISABLE)); - /* WaCcsTlbPrefetchDisable:skl,bxt,kbl */ + /* WaCcsTlbPrefetchDisable:skl,bxt,kbl,glk */ WA_CLR_BIT_MASKED(GEN9_HALF_SLICE_CHICKEN5, GEN9_CCS_TLB_PREFETCH_ENABLE); @@ -900,14 +900,14 @@ static int gen9_init_workarounds(struct intel_engine_cs *engine) WA_SET_BIT_MASKED(HALF_SLICE_CHICKEN3, GEN8_SAMPLER_POWER_BYPASS_DIS); - /* WaDisableSTUnitPowerOptimization:skl,bxt,kbl */ + /* WaDisableSTUnitPowerOptimization:skl,bxt,kbl,glk */ WA_SET_BIT_MASKED(HALF_SLICE_CHICKEN2, GEN8_ST_PO_DISABLE); /* WaOCLCoherentLineFlush:skl,bxt,kbl */ I915_WRITE(GEN8_L3SQCREG4, (I915_READ(GEN8_L3SQCREG4) | GEN8_LQSC_FLUSH_COHERENT_LINES)); - /* WaVFEStateAfterPipeControlwithMediaStateClear:skl,bxt */ + /* WaVFEStateAfterPipeControlwithMediaStateClear:skl,bxt,glk */ ret = wa_ring_whitelist_reg(engine, GEN9_CTX_PREEMPT_REG); if (ret) return ret; @@ -917,7 +917,7 @@ static int gen9_init_workarounds(struct intel_engine_cs *engine) if (ret) return ret; - /* WaAllowUMDToModifyHDCChicken1:skl,bxt,kbl */ + /* WaAllowUMDToModifyHDCChicken1:skl,bxt,kbl,glk */ ret = wa_ring_whitelist_reg(engine, GEN8_HDC_CHICKEN1); if (ret) return ret; @@ -1120,6 +1120,22 @@ static int kbl_init_workarounds(struct intel_engine_cs *engine) return 0; } +static int glk_init_workarounds(struct intel_engine_cs *engine) +{ + struct drm_i915_private *dev_priv = engine->i915; + int ret; + + ret = gen9_init_workarounds(engine); + if (ret) + return ret; + + /* WaToEnableHwFixForPushConstHWBug:glk */ + WA_SET_BIT_MASKED(COMMON_SLICE_CHICKEN2, + GEN8_SBE_DISABLE_REPLAY_BUF_OPTIMIZATION); + + return 0; +} + int init_workarounds_ring(struct intel_engine_cs *engine) { struct drm_i915_private *dev_priv = engine->i915; @@ -1144,6 +1160,9 @@ int init_workarounds_ring(struct intel_engine_cs *engine) if (IS_KABYLAKE(dev_priv)) return kbl_init_workarounds(engine); + if (IS_GEMINILAKE(dev_priv)) + return glk_init_workarounds(engine); + return 0; } -- cgit From bafbcc2fd148610e897ba49a4681f088a0eb58f9 Mon Sep 17 00:00:00 2001 From: Ander Conselvan de Oliveira Date: Thu, 26 Jan 2017 13:24:21 +0200 Subject: drm/i915: Disable plane gamma in SKL+ sprite planes MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The plane gamma tables are never programmed, so just disable it, like it is done for the primary plane. Signed-off-by: Ander Conselvan de Oliveira Reviewed-by: Ville Syrjälä Link: http://patchwork.freedesktop.org/patch/msgid/1485429865-10687-2-git-send-email-ander.conselvan.de.oliveira@intel.com --- drivers/gpu/drm/i915/intel_sprite.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/gpu/drm/i915/intel_sprite.c b/drivers/gpu/drm/i915/intel_sprite.c index 9ef54688872a..c05545f64f81 100644 --- a/drivers/gpu/drm/i915/intel_sprite.c +++ b/drivers/gpu/drm/i915/intel_sprite.c @@ -226,6 +226,7 @@ skl_update_plane(struct drm_plane *drm_plane, plane_ctl |= skl_plane_ctl_format(fb->format->format); plane_ctl |= skl_plane_ctl_tiling(fb->modifier); + plane_ctl |= PLANE_CTL_PLANE_GAMMA_DISABLE; plane_ctl |= skl_plane_ctl_rotation(rotation); if (key->flags) { -- cgit From 47f9ea8b9143890c9b98ad9e16e8ad793be2d9e3 Mon Sep 17 00:00:00 2001 From: Ander Conselvan de Oliveira Date: Thu, 26 Jan 2017 13:24:22 +0200 Subject: drm/i915/glk: Plane color correction register changes MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit In Geminilake, the bits for enabling pipe csc, pipe gamma and plane gamma moved to a new register. So update the plane update functions to set the right bits. Pipe CSC is kept disabled though, since enabling that also enables the dedicated degamma table, and that is not properly programmed yet, leading to a black screen. v2: Use plane_id. (Ville) Remove unnecessary variable. (Ville) Keep registers in offset order. (Ville) Don't set plane gamma disable twice. (Ander) Cc: Ville Syrjälä Signed-off-by: Ander Conselvan De Oliveira Reviewed-by: Ville Syrjälä Link: http://patchwork.freedesktop.org/patch/msgid/1485429865-10687-3-git-send-email-ander.conselvan.de.oliveira@intel.com --- drivers/gpu/drm/i915/i915_reg.h | 19 ++++++++++++++++++- drivers/gpu/drm/i915/intel_display.c | 16 ++++++++++++---- drivers/gpu/drm/i915/intel_sprite.c | 17 ++++++++++++----- 3 files changed, 42 insertions(+), 10 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h index 72f9f36ae5ce..c220736363fe 100644 --- a/drivers/gpu/drm/i915/i915_reg.h +++ b/drivers/gpu/drm/i915/i915_reg.h @@ -5869,11 +5869,18 @@ enum { #define _PLANE_KEYMSK_2_A 0x70298 #define _PLANE_KEYMAX_1_A 0x701a0 #define _PLANE_KEYMAX_2_A 0x702a0 +#define _PLANE_COLOR_CTL_1_A 0x701CC /* GLK+ */ +#define _PLANE_COLOR_CTL_2_A 0x702CC /* GLK+ */ +#define _PLANE_COLOR_CTL_3_A 0x703CC /* GLK+ */ +#define PLANE_COLOR_PIPE_GAMMA_ENABLE (1 << 30) +#define PLANE_COLOR_PIPE_CSC_ENABLE (1 << 23) +#define PLANE_COLOR_PLANE_GAMMA_DISABLE (1 << 13) #define _PLANE_BUF_CFG_1_A 0x7027c #define _PLANE_BUF_CFG_2_A 0x7037c #define _PLANE_NV12_BUF_CFG_1_A 0x70278 #define _PLANE_NV12_BUF_CFG_2_A 0x70378 + #define _PLANE_CTL_1_B 0x71180 #define _PLANE_CTL_2_B 0x71280 #define _PLANE_CTL_3_B 0x71380 @@ -5968,7 +5975,17 @@ enum { #define PLANE_NV12_BUF_CFG(pipe, plane) \ _MMIO_PLANE(plane, _PLANE_NV12_BUF_CFG_1(pipe), _PLANE_NV12_BUF_CFG_2(pipe)) -/* SKL new cursor registers */ +#define _PLANE_COLOR_CTL_1_B 0x711CC +#define _PLANE_COLOR_CTL_2_B 0x712CC +#define _PLANE_COLOR_CTL_3_B 0x713CC +#define _PLANE_COLOR_CTL_1(pipe) \ + _PIPE(pipe, _PLANE_COLOR_CTL_1_A, _PLANE_COLOR_CTL_1_B) +#define _PLANE_COLOR_CTL_2(pipe) \ + _PIPE(pipe, _PLANE_COLOR_CTL_2_A, _PLANE_COLOR_CTL_2_B) +#define PLANE_COLOR_CTL(pipe, plane) \ + _MMIO_PLANE(plane, _PLANE_COLOR_CTL_1(pipe), _PLANE_COLOR_CTL_2(pipe)) + +#/* SKL new cursor registers */ #define _CUR_BUF_CFG_A 0x7017c #define _CUR_BUF_CFG_B 0x7117c #define CUR_BUF_CFG(pipe) _MMIO_PIPE(pipe, _CUR_BUF_CFG_A, _CUR_BUF_CFG_B) diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c index 37f71e992e63..ac25706b7d4d 100644 --- a/drivers/gpu/drm/i915/intel_display.c +++ b/drivers/gpu/drm/i915/intel_display.c @@ -3386,13 +3386,21 @@ static void skylake_update_primary_plane(struct drm_plane *plane, int dst_w = drm_rect_width(&plane_state->base.dst); int dst_h = drm_rect_height(&plane_state->base.dst); - plane_ctl = PLANE_CTL_ENABLE | - PLANE_CTL_PIPE_GAMMA_ENABLE | - PLANE_CTL_PIPE_CSC_ENABLE; + plane_ctl = PLANE_CTL_ENABLE; + + if (IS_GEMINILAKE(dev_priv)) { + I915_WRITE(PLANE_COLOR_CTL(pipe, plane_id), + PLANE_COLOR_PIPE_GAMMA_ENABLE | + PLANE_COLOR_PLANE_GAMMA_DISABLE); + } else { + plane_ctl |= + PLANE_CTL_PIPE_GAMMA_ENABLE | + PLANE_CTL_PIPE_CSC_ENABLE | + PLANE_CTL_PLANE_GAMMA_DISABLE; + } plane_ctl |= skl_plane_ctl_format(fb->format->format); plane_ctl |= skl_plane_ctl_tiling(fb->modifier); - plane_ctl |= PLANE_CTL_PLANE_GAMMA_DISABLE; plane_ctl |= skl_plane_ctl_rotation(rotation); /* Sizes are 0 based */ diff --git a/drivers/gpu/drm/i915/intel_sprite.c b/drivers/gpu/drm/i915/intel_sprite.c index c05545f64f81..b16a29591803 100644 --- a/drivers/gpu/drm/i915/intel_sprite.c +++ b/drivers/gpu/drm/i915/intel_sprite.c @@ -219,14 +219,21 @@ skl_update_plane(struct drm_plane *drm_plane, uint32_t src_w = drm_rect_width(&plane_state->base.src) >> 16; uint32_t src_h = drm_rect_height(&plane_state->base.src) >> 16; - plane_ctl = PLANE_CTL_ENABLE | - PLANE_CTL_PIPE_GAMMA_ENABLE | - PLANE_CTL_PIPE_CSC_ENABLE; + plane_ctl = PLANE_CTL_ENABLE; + + if (IS_GEMINILAKE(dev_priv)) { + I915_WRITE(PLANE_COLOR_CTL(pipe, plane_id), + PLANE_COLOR_PIPE_GAMMA_ENABLE | + PLANE_COLOR_PLANE_GAMMA_DISABLE); + } else { + plane_ctl |= + PLANE_CTL_PIPE_GAMMA_ENABLE | + PLANE_CTL_PIPE_CSC_ENABLE | + PLANE_CTL_PLANE_GAMMA_DISABLE; + } plane_ctl |= skl_plane_ctl_format(fb->format->format); plane_ctl |= skl_plane_ctl_tiling(fb->modifier); - - plane_ctl |= PLANE_CTL_PLANE_GAMMA_DISABLE; plane_ctl |= skl_plane_ctl_rotation(rotation); if (key->flags) { -- cgit From 2fcb206654e8b3826424ffa7ef65bd74cc65ff16 Mon Sep 17 00:00:00 2001 From: Ander Conselvan de Oliveira Date: Thu, 26 Jan 2017 13:24:23 +0200 Subject: drm/i915: Split broadwell_load_luts() into smaller functions MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Split the logic for progamming each LUT out of broadwell_load_luts(), so we can reuse part of it for geminilake. Cc: Ville Syrjälä Signed-off-by: Ander Conselvan de Oliveira Reviewed-by: Ville Syrjälä Link: http://patchwork.freedesktop.org/patch/msgid/1485429865-10687-4-git-send-email-ander.conselvan.de.oliveira@intel.com --- drivers/gpu/drm/i915/i915_reg.h | 1 + drivers/gpu/drm/i915/intel_color.c | 43 ++++++++++++++++++++++++++++---------- 2 files changed, 33 insertions(+), 11 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h index c220736363fe..d16627a72ccb 100644 --- a/drivers/gpu/drm/i915/i915_reg.h +++ b/drivers/gpu/drm/i915/i915_reg.h @@ -8166,6 +8166,7 @@ enum { #define PAL_PREC_10_12_BIT (0 << 31) #define PAL_PREC_SPLIT_MODE (1 << 31) #define PAL_PREC_AUTO_INCREMENT (1 << 15) +#define PAL_PREC_INDEX_VALUE_MASK (0x3ff << 0) #define _PAL_PREC_DATA_A 0x4A404 #define _PAL_PREC_DATA_B 0x4AC04 #define _PAL_PREC_DATA_C 0x4B404 diff --git a/drivers/gpu/drm/i915/intel_color.c b/drivers/gpu/drm/i915/intel_color.c index 34952d04485e..82a3bc9befa8 100644 --- a/drivers/gpu/drm/i915/intel_color.c +++ b/drivers/gpu/drm/i915/intel_color.c @@ -340,20 +340,12 @@ static void haswell_load_luts(struct drm_crtc_state *crtc_state) hsw_enable_ips(intel_crtc); } -/* Loads the palette/gamma unit for the CRTC on Broadwell+. */ -static void broadwell_load_luts(struct drm_crtc_state *state) +static void bdw_load_degamma_lut(struct drm_crtc_state *state) { - struct drm_crtc *crtc = state->crtc; - struct drm_i915_private *dev_priv = to_i915(crtc->dev); - struct intel_crtc_state *intel_state = to_intel_crtc_state(state); - enum pipe pipe = to_intel_crtc(crtc)->pipe; + struct drm_i915_private *dev_priv = to_i915(state->crtc->dev); + enum pipe pipe = to_intel_crtc(state->crtc)->pipe; uint32_t i, lut_size = INTEL_INFO(dev_priv)->color.degamma_lut_size; - if (crtc_state_is_legacy(state)) { - haswell_load_luts(state); - return; - } - I915_WRITE(PREC_PAL_INDEX(pipe), PAL_PREC_SPLIT_MODE | PAL_PREC_AUTO_INCREMENT); @@ -377,6 +369,18 @@ static void broadwell_load_luts(struct drm_crtc_state *state) (v << 20) | (v << 10) | v); } } +} + +static void bdw_load_gamma_lut(struct drm_crtc_state *state, u32 offset) +{ + struct drm_i915_private *dev_priv = to_i915(state->crtc->dev); + enum pipe pipe = to_intel_crtc(state->crtc)->pipe; + uint32_t i, lut_size = INTEL_INFO(dev_priv)->color.gamma_lut_size; + + WARN_ON(offset & ~PAL_PREC_INDEX_VALUE_MASK); + + I915_WRITE(PREC_PAL_INDEX(pipe), + PAL_PREC_SPLIT_MODE | PAL_PREC_AUTO_INCREMENT | offset); if (state->gamma_lut) { struct drm_color_lut *lut = @@ -410,6 +414,23 @@ static void broadwell_load_luts(struct drm_crtc_state *state) I915_WRITE(PREC_PAL_GC_MAX(pipe, 1), (1 << 16) - 1); I915_WRITE(PREC_PAL_GC_MAX(pipe, 2), (1 << 16) - 1); } +} + +/* Loads the palette/gamma unit for the CRTC on Broadwell+. */ +static void broadwell_load_luts(struct drm_crtc_state *state) +{ + struct drm_i915_private *dev_priv = to_i915(state->crtc->dev); + struct intel_crtc_state *intel_state = to_intel_crtc_state(state); + enum pipe pipe = to_intel_crtc(state->crtc)->pipe; + + if (crtc_state_is_legacy(state)) { + haswell_load_luts(state); + return; + } + + bdw_load_degamma_lut(state); + bdw_load_gamma_lut(state, + INTEL_INFO(dev_priv)->color.degamma_lut_size); intel_state->gamma_mode = GAMMA_MODE_MODE_SPLIT; I915_WRITE(GAMMA_MODE(pipe), GAMMA_MODE_MODE_SPLIT); -- cgit From 9751bafc43d599262d9de7e6d988804dc5f5d4aa Mon Sep 17 00:00:00 2001 From: Ander Conselvan de Oliveira Date: Fri, 27 Jan 2017 11:02:30 +0200 Subject: drm/i915/glk: Program pipe gamma and degamma tables MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The gamma tables in Geminilake were changed. There is no split-gamma mode. Instead, there is a dedicated degamma table that is enabled whenever pipe CSC is enabled. The dedicated gamma table has 16 bit precision but doesn't support separate channels. Since that doesn't match the per-channel format of the degamma LUT property, for now only a linear table is loaded and the property ignored. v2: Remove empty line. (Ville) Reuse broadwell code. (Ville) v3: Don't write PIPE_CSC_MODE. (Ville) Cc: Ville Syrjälä Signed-off-by: Ander Conselvan de Oliveira Reviewed-by: Ville Syrjälä Link: http://patchwork.freedesktop.org/patch/msgid/20170127090230.20302-1-ander.conselvan.de.oliveira@intel.com --- drivers/gpu/drm/i915/i915_pci.c | 1 + drivers/gpu/drm/i915/i915_reg.h | 14 +++++++++ drivers/gpu/drm/i915/intel_color.c | 58 +++++++++++++++++++++++++++++++++++++- 3 files changed, 72 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/i915/i915_pci.c b/drivers/gpu/drm/i915/i915_pci.c index ecb487b5356f..df2051b41fa1 100644 --- a/drivers/gpu/drm/i915/i915_pci.c +++ b/drivers/gpu/drm/i915/i915_pci.c @@ -403,6 +403,7 @@ static const struct intel_device_info intel_geminilake_info = { .platform = INTEL_GEMINILAKE, .is_alpha_support = 1, .ddb_size = 1024, + .color = { .degamma_lut_size = 0, .gamma_lut_size = 1024 } }; static const struct intel_device_info intel_kabylake_info = { diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h index d16627a72ccb..4322c67388f6 100644 --- a/drivers/gpu/drm/i915/i915_reg.h +++ b/drivers/gpu/drm/i915/i915_reg.h @@ -8176,12 +8176,26 @@ enum { #define _PAL_PREC_EXT_GC_MAX_A 0x4A420 #define _PAL_PREC_EXT_GC_MAX_B 0x4AC20 #define _PAL_PREC_EXT_GC_MAX_C 0x4B420 +#define _PAL_PREC_EXT2_GC_MAX_A 0x4A430 +#define _PAL_PREC_EXT2_GC_MAX_B 0x4AC30 +#define _PAL_PREC_EXT2_GC_MAX_C 0x4B430 #define PREC_PAL_INDEX(pipe) _MMIO_PIPE(pipe, _PAL_PREC_INDEX_A, _PAL_PREC_INDEX_B) #define PREC_PAL_DATA(pipe) _MMIO_PIPE(pipe, _PAL_PREC_DATA_A, _PAL_PREC_DATA_B) #define PREC_PAL_GC_MAX(pipe, i) _MMIO(_PIPE(pipe, _PAL_PREC_GC_MAX_A, _PAL_PREC_GC_MAX_B) + (i) * 4) #define PREC_PAL_EXT_GC_MAX(pipe, i) _MMIO(_PIPE(pipe, _PAL_PREC_EXT_GC_MAX_A, _PAL_PREC_EXT_GC_MAX_B) + (i) * 4) +#define _PRE_CSC_GAMC_INDEX_A 0x4A484 +#define _PRE_CSC_GAMC_INDEX_B 0x4AC84 +#define _PRE_CSC_GAMC_INDEX_C 0x4B484 +#define PRE_CSC_GAMC_AUTO_INCREMENT (1 << 10) +#define _PRE_CSC_GAMC_DATA_A 0x4A488 +#define _PRE_CSC_GAMC_DATA_B 0x4AC88 +#define _PRE_CSC_GAMC_DATA_C 0x4B488 + +#define PRE_CSC_GAMC_INDEX(pipe) _MMIO_PIPE(pipe, _PRE_CSC_GAMC_INDEX_A, _PRE_CSC_GAMC_INDEX_B) +#define PRE_CSC_GAMC_DATA(pipe) _MMIO_PIPE(pipe, _PRE_CSC_GAMC_DATA_A, _PRE_CSC_GAMC_DATA_B) + /* pipe CSC & degamma/gamma LUTs on CHV */ #define _CGM_PIPE_A_CSC_COEFF01 (VLV_DISPLAY_BASE + 0x67900) #define _CGM_PIPE_A_CSC_COEFF23 (VLV_DISPLAY_BASE + 0x67904) diff --git a/drivers/gpu/drm/i915/intel_color.c b/drivers/gpu/drm/i915/intel_color.c index 82a3bc9befa8..0627eee1cb64 100644 --- a/drivers/gpu/drm/i915/intel_color.c +++ b/drivers/gpu/drm/i915/intel_color.c @@ -380,7 +380,9 @@ static void bdw_load_gamma_lut(struct drm_crtc_state *state, u32 offset) WARN_ON(offset & ~PAL_PREC_INDEX_VALUE_MASK); I915_WRITE(PREC_PAL_INDEX(pipe), - PAL_PREC_SPLIT_MODE | PAL_PREC_AUTO_INCREMENT | offset); + (offset ? PAL_PREC_SPLIT_MODE : 0) | + PAL_PREC_AUTO_INCREMENT | + offset); if (state->gamma_lut) { struct drm_color_lut *lut = @@ -443,6 +445,57 @@ static void broadwell_load_luts(struct drm_crtc_state *state) I915_WRITE(PREC_PAL_INDEX(pipe), 0); } +static void glk_load_degamma_lut(struct drm_crtc_state *state) +{ + struct drm_i915_private *dev_priv = to_i915(state->crtc->dev); + enum pipe pipe = to_intel_crtc(state->crtc)->pipe; + const uint32_t lut_size = 33; + uint32_t i; + + /* + * When setting the auto-increment bit, the hardware seems to + * ignore the index bits, so we need to reset it to index 0 + * separately. + */ + I915_WRITE(PRE_CSC_GAMC_INDEX(pipe), 0); + I915_WRITE(PRE_CSC_GAMC_INDEX(pipe), PRE_CSC_GAMC_AUTO_INCREMENT); + + /* + * FIXME: The pipe degamma table in geminilake doesn't support + * different values per channel, so this just loads a linear table. + */ + for (i = 0; i < lut_size; i++) { + uint32_t v = (i * ((1 << 16) - 1)) / (lut_size - 1); + + I915_WRITE(PRE_CSC_GAMC_DATA(pipe), v); + } + + /* Clamp values > 1.0. */ + while (i++ < 35) + I915_WRITE(PRE_CSC_GAMC_DATA(pipe), (1 << 16) - 1); +} + +static void glk_load_luts(struct drm_crtc_state *state) +{ + struct drm_crtc *crtc = state->crtc; + struct drm_device *dev = crtc->dev; + struct drm_i915_private *dev_priv = to_i915(dev); + struct intel_crtc_state *intel_state = to_intel_crtc_state(state); + enum pipe pipe = to_intel_crtc(crtc)->pipe; + + if (crtc_state_is_legacy(state)) { + haswell_load_luts(state); + return; + } + + glk_load_degamma_lut(state); + bdw_load_gamma_lut(state, 0); + + intel_state->gamma_mode = GAMMA_MODE_MODE_10BIT; + I915_WRITE(GAMMA_MODE(pipe), GAMMA_MODE_MODE_10BIT); + POSTING_READ(GAMMA_MODE(pipe)); +} + /* Loads the palette/gamma unit for the CRTC on CherryView. */ static void cherryview_load_luts(struct drm_crtc_state *state) { @@ -561,6 +614,9 @@ void intel_color_init(struct drm_crtc *crtc) IS_BROXTON(dev_priv)) { dev_priv->display.load_csc_matrix = i9xx_load_csc_matrix; dev_priv->display.load_luts = broadwell_load_luts; + } else if (IS_GEMINILAKE(dev_priv)) { + dev_priv->display.load_csc_matrix = i9xx_load_csc_matrix; + dev_priv->display.load_luts = glk_load_luts; } else { dev_priv->display.load_luts = i9xx_load_luts; } -- cgit From 2355cf088d469f65d180618c24400ded38895b92 Mon Sep 17 00:00:00 2001 From: Mika Kuoppala Date: Fri, 27 Jan 2017 15:03:09 +0200 Subject: drm/i915: Create context desc template when context is created Move the invariant parts of context desc setup from execlist init to context creation. This is advantageous when we need to create different templates based on the context parametrization, ie. for svm capable contexts. v2: s/create/default, remove engine->ctx_desc_template Cc: Chris Wilson Signed-off-by: Mika Kuoppala Reviewed-by: Chris Wilson Signed-off-by: Mika Kuoppala Link: http://patchwork.freedesktop.org/patch/msgid/1485522189-31984-1-git-send-email-mika.kuoppala@intel.com --- drivers/gpu/drm/i915/i915_gem_context.c | 23 ++++++++++++++++++++-- drivers/gpu/drm/i915/i915_reg.h | 15 ++++++++++++++ drivers/gpu/drm/i915/intel_lrc.c | 35 ++------------------------------- drivers/gpu/drm/i915/intel_ringbuffer.h | 1 - 4 files changed, 38 insertions(+), 36 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_gem_context.c b/drivers/gpu/drm/i915/i915_gem_context.c index 77458da9627d..680105421bb9 100644 --- a/drivers/gpu/drm/i915/i915_gem_context.c +++ b/drivers/gpu/drm/i915/i915_gem_context.c @@ -236,6 +236,26 @@ static int assign_hw_id(struct drm_i915_private *dev_priv, unsigned *out) return 0; } +static u32 default_desc_template(const struct drm_i915_private *dev_priv) +{ + u32 desc; + + desc = GEN8_CTX_VALID | + GEN8_CTX_PRIVILEGE | + GEN8_CTX_ADDRESSING_MODE(dev_priv) << + GEN8_CTX_ADDRESSING_MODE_SHIFT; + + if (IS_GEN8(dev_priv)) + desc |= GEN8_CTX_L3LLC_COHERENT; + + /* TODO: WaDisableLiteRestore when we start using semaphore + * signalling between Command Streamers + * ring->ctx_desc_template |= GEN8_CTX_FORCE_RESTORE; + */ + + return desc; +} + static struct i915_gem_context * __create_hw_context(struct drm_i915_private *dev_priv, struct drm_i915_file_private *file_priv) @@ -309,8 +329,7 @@ __create_hw_context(struct drm_i915_private *dev_priv, i915_gem_context_set_bannable(ctx); ctx->ring_size = 4 * PAGE_SIZE; - ctx->desc_template = GEN8_CTX_ADDRESSING_MODE(dev_priv) << - GEN8_CTX_ADDRESSING_MODE_SHIFT; + ctx->desc_template = default_desc_template(dev_priv); ATOMIC_INIT_NOTIFIER_HEAD(&ctx->status_notifier); /* GuC requires the ring to be placed above GUC_WOPCM_TOP. If GuC is not diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h index 4322c67388f6..feefbee0a717 100644 --- a/drivers/gpu/drm/i915/i915_reg.h +++ b/drivers/gpu/drm/i915/i915_reg.h @@ -3358,11 +3358,26 @@ enum { INTEL_LEGACY_64B_CONTEXT }; +enum { + FAULT_AND_HANG = 0, + FAULT_AND_HALT, /* Debug only */ + FAULT_AND_STREAM, + FAULT_AND_CONTINUE /* Unsupported */ +}; + +#define GEN8_CTX_VALID (1<<0) +#define GEN8_CTX_FORCE_PD_RESTORE (1<<1) +#define GEN8_CTX_FORCE_RESTORE (1<<2) +#define GEN8_CTX_L3LLC_COHERENT (1<<5) +#define GEN8_CTX_PRIVILEGE (1<<8) #define GEN8_CTX_ADDRESSING_MODE_SHIFT 3 #define GEN8_CTX_ADDRESSING_MODE(dev_priv) (USES_FULL_48BIT_PPGTT(dev_priv) ?\ INTEL_LEGACY_64B_CONTEXT : \ INTEL_LEGACY_32B_CONTEXT) +#define GEN8_CTX_ID_SHIFT 32 +#define GEN8_CTX_ID_WIDTH 21 + #define CHV_CLK_CTL1 _MMIO(0x101100) #define VLV_CLK_CTL2 _MMIO(0x101104) #define CLK_CTL2_CZCOUNT_30NS_SHIFT 28 diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c index 0e7b950bceac..0e99d53d5523 100644 --- a/drivers/gpu/drm/i915/intel_lrc.c +++ b/drivers/gpu/drm/i915/intel_lrc.c @@ -190,12 +190,6 @@ #define CTX_R_PWR_CLK_STATE 0x42 #define CTX_GPGPU_CSR_BASE_ADDRESS 0x44 -#define GEN8_CTX_VALID (1<<0) -#define GEN8_CTX_FORCE_PD_RESTORE (1<<1) -#define GEN8_CTX_FORCE_RESTORE (1<<2) -#define GEN8_CTX_L3LLC_COHERENT (1<<5) -#define GEN8_CTX_PRIVILEGE (1<<8) - #define ASSIGN_CTX_REG(reg_state, pos, reg, val) do { \ (reg_state)[(pos)+0] = i915_mmio_reg_offset(reg); \ (reg_state)[(pos)+1] = (val); \ @@ -212,14 +206,6 @@ reg_state[CTX_PDP0_LDW + 1] = lower_32_bits(px_dma(&ppgtt->pml4)); \ } while (0) -enum { - FAULT_AND_HANG = 0, - FAULT_AND_HALT, /* Debug only */ - FAULT_AND_STREAM, - FAULT_AND_CONTINUE /* Unsupported */ -}; -#define GEN8_CTX_ID_SHIFT 32 -#define GEN8_CTX_ID_WIDTH 21 #define GEN8_CTX_RCS_INDIRECT_CTX_OFFSET_DEFAULT 0x17 #define GEN9_CTX_RCS_INDIRECT_CTX_OFFSET_DEFAULT 0x26 @@ -267,21 +253,6 @@ int intel_sanitize_enable_execlists(struct drm_i915_private *dev_priv, int enabl return 0; } -static void -logical_ring_init_platform_invariants(struct intel_engine_cs *engine) -{ - struct drm_i915_private *dev_priv = engine->i915; - - engine->ctx_desc_template = GEN8_CTX_VALID; - if (IS_GEN8(dev_priv)) - engine->ctx_desc_template |= GEN8_CTX_L3LLC_COHERENT; - engine->ctx_desc_template |= GEN8_CTX_PRIVILEGE; - - /* TODO: WaDisableLiteRestore when we start using semaphore - * signalling between Command Streamers */ - /* ring->ctx_desc_template |= GEN8_CTX_FORCE_RESTORE; */ -} - /** * intel_lr_context_descriptor_update() - calculate & cache the descriptor * descriptor for a pinned context @@ -295,7 +266,7 @@ logical_ring_init_platform_invariants(struct intel_engine_cs *engine) * * This is what a descriptor looks like, from LSB to MSB:: * - * bits 0-11: flags, GEN8_CTX_* (cached in ctx_desc_template) + * bits 0-11: flags, GEN8_CTX_* (cached in ctx->desc_template) * bits 12-31: LRCA, GTT address of (the HWSP of) this context * bits 32-52: ctx ID, a globally unique tag * bits 53-54: mbz, reserved for use by hardware @@ -310,8 +281,7 @@ intel_lr_context_descriptor_update(struct i915_gem_context *ctx, BUILD_BUG_ON(MAX_CONTEXT_HW_ID > (1<desc_template; /* bits 3-4 */ - desc |= engine->ctx_desc_template; /* bits 0-11 */ + desc = ctx->desc_template; /* bits 0-11 */ desc |= i915_ggtt_offset(ce->state) + LRC_PPHWSP_PN * PAGE_SIZE; /* bits 12-31 */ desc |= (u64)ctx->hw_id << GEN8_CTX_ID_SHIFT; /* bits 32-52 */ @@ -1805,7 +1775,6 @@ logical_ring_setup(struct intel_engine_cs *engine) tasklet_init(&engine->irq_tasklet, intel_lrc_irq_handler, (unsigned long)engine); - logical_ring_init_platform_invariants(engine); logical_ring_default_vfuncs(engine); logical_ring_default_irqs(engine); } diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.h b/drivers/gpu/drm/i915/intel_ringbuffer.h index 8e872730f8eb..b9c15cd40fbf 100644 --- a/drivers/gpu/drm/i915/intel_ringbuffer.h +++ b/drivers/gpu/drm/i915/intel_ringbuffer.h @@ -383,7 +383,6 @@ struct intel_engine_cs { struct rb_root execlist_queue; struct rb_node *execlist_first; unsigned int fw_domains; - u32 ctx_desc_template; /* Contexts are pinned whilst they are active on the GPU. The last * context executed remains active whilst the GPU is idle - the -- cgit From ce64645d86ac5550559f3dca07fd9ba42166e196 Mon Sep 17 00:00:00 2001 From: Jani Nikula Date: Fri, 27 Jan 2017 17:57:06 +0200 Subject: drm/i915: use variadic macros and arrays to choose port/pipe based registers MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This allows the use of more than 3 ports/pipes/whatever without tricks, even if the register offsets are not evenly spaced. There's the risk of out of bounds access if we're not careful; currently that would "just" lead to the wrong register offset being used. It might be possible to add build bug ons for build time constant indexing. We already have ports defined up to E, not sure if we might have bugs related to them and the current _PORT3() macro. text data bss dec hex filename 1239868 46199 4096 1290163 13afb3 drivers/gpu/drm/i915/i915.ko 1238828 46199 4096 1289123 13aba3 drivers/gpu/drm/i915/i915.ko Cc: Chris Wilson Cc: Ville Syrjälä Cc: Tvrtko Ursulin Reviewed-by: Chris Wilson Reviewed-by: Ville Syrjälä Acked-by: Daniel Vetter Signed-off-by: Jani Nikula Link: http://patchwork.freedesktop.org/patch/msgid/1485532626-20923-1-git-send-email-jani.nikula@intel.com --- drivers/gpu/drm/i915/i915_reg.h | 11 +++++------ 1 file changed, 5 insertions(+), 6 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h index feefbee0a717..02a65ddae3a3 100644 --- a/drivers/gpu/drm/i915/i915_reg.h +++ b/drivers/gpu/drm/i915/i915_reg.h @@ -48,6 +48,8 @@ static inline bool i915_mmio_reg_valid(i915_reg_t reg) return !i915_mmio_reg_equal(reg, INVALID_MMIO_REG); } +#define _PICK(__index, ...) (((const u32 []){ __VA_ARGS__ })[__index]) + #define _PIPE(pipe, a, b) ((a) + (pipe)*((b)-(a))) #define _MMIO_PIPE(pipe, a, b) _MMIO(_PIPE(pipe, a, b)) #define _PLANE(plane, a, b) _PIPE(plane, a, b) @@ -56,14 +58,11 @@ static inline bool i915_mmio_reg_valid(i915_reg_t reg) #define _MMIO_TRANS(tran, a, b) _MMIO(_TRANS(tran, a, b)) #define _PORT(port, a, b) ((a) + (port)*((b)-(a))) #define _MMIO_PORT(port, a, b) _MMIO(_PORT(port, a, b)) -#define _PIPE3(pipe, a, b, c) ((pipe) == PIPE_A ? (a) : \ - (pipe) == PIPE_B ? (b) : (c)) +#define _PIPE3(pipe, ...) _PICK(pipe, __VA_ARGS__) #define _MMIO_PIPE3(pipe, a, b, c) _MMIO(_PIPE3(pipe, a, b, c)) -#define _PORT3(port, a, b, c) ((port) == PORT_A ? (a) : \ - (port) == PORT_B ? (b) : (c)) +#define _PORT3(port, ...) _PICK(port, __VA_ARGS__) #define _MMIO_PORT3(pipe, a, b, c) _MMIO(_PORT3(pipe, a, b, c)) -#define _PHY3(phy, a, b, c) ((phy) == DPIO_PHY0 ? (a) : \ - (phy) == DPIO_PHY1 ? (b) : (c)) +#define _PHY3(phy, ...) _PICK(phy, __VA_ARGS__) #define _MMIO_PHY3(phy, a, b, c) _MMIO(_PHY3(phy, a, b, c)) #define _MASKED_FIELD(mask, value) ({ \ -- cgit From 248a124d6fc0fde44b8fafc695ca4a2c24eb0856 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Mon, 30 Jan 2017 10:44:56 +0000 Subject: drm/i915: Make intel_detect_preproduction_hw easier to extend MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit As we add new generations, we should keep detecting new pre-production system development platforms that were temporarily enabled to facilitate initial development and now superseded by production systems. To make it easier to add more platforms, split the if into a series of logical operations. v2: s/sdv/pre/ - not all system development vehicles are for preproduction usage. Signed-off-by: Chris Wilson Cc: "Zanoni, Paulo R" Cc: Rodrigo Vivi Cc: Jani Nikula Cc: Ville Syrjälä Cc: Matt Roper Link: http://patchwork.freedesktop.org/patch/msgid/20170130104458.2653-1-chris@chris-wilson.co.uk Acked-by: Jani Nikula Reviewed-by: Joonas Lahtinen --- drivers/gpu/drm/i915/i915_drv.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_drv.c b/drivers/gpu/drm/i915/i915_drv.c index 3aa5bf58cf32..b960aa929070 100644 --- a/drivers/gpu/drm/i915/i915_drv.c +++ b/drivers/gpu/drm/i915/i915_drv.c @@ -779,8 +779,12 @@ static void i915_workqueues_cleanup(struct drm_i915_private *dev_priv) */ static void intel_detect_preproduction_hw(struct drm_i915_private *dev_priv) { - if (IS_HSW_EARLY_SDV(dev_priv) || - IS_SKL_REVID(dev_priv, 0, SKL_REVID_F0)) + bool pre = false; + + pre |= IS_HSW_EARLY_SDV(dev_priv); + pre |= IS_SKL_REVID(dev_priv, 0, SKL_REVID_F0); + + if (pre) DRM_ERROR("This is a pre-production stepping. " "It may not be fully functional.\n"); } -- cgit From 7c5ff4a2c588c89d00ab0e0dd61f44942233d63d Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Mon, 30 Jan 2017 10:44:57 +0000 Subject: drm/i915: Mark the kernel as tainted if we fail the preproduction check MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Preproduction sdv are not supported beyond the release of production hardware, and continued use is ill-advised. Mark the kernel as tainted to reinforce the error. Signed-off-by: Chris Wilson Cc: "Zanoni, Paulo R" Cc: Rodrigo Vivi Cc: Jani Nikula Cc: Ville Syrjälä Cc: Matt Roper Acked-by: Jani Nikula Link: http://patchwork.freedesktop.org/patch/msgid/20170130104458.2653-2-chris@chris-wilson.co.uk Reviewed-by: Joonas Lahtinen --- drivers/gpu/drm/i915/i915_drv.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/i915/i915_drv.c b/drivers/gpu/drm/i915/i915_drv.c index b960aa929070..0f5dbd434991 100644 --- a/drivers/gpu/drm/i915/i915_drv.c +++ b/drivers/gpu/drm/i915/i915_drv.c @@ -784,9 +784,11 @@ static void intel_detect_preproduction_hw(struct drm_i915_private *dev_priv) pre |= IS_HSW_EARLY_SDV(dev_priv); pre |= IS_SKL_REVID(dev_priv, 0, SKL_REVID_F0); - if (pre) + if (pre) { DRM_ERROR("This is a pre-production stepping. " "It may not be fully functional.\n"); + add_taint(TAINT_MACHINE_CHECK, LOCKDEP_STILL_OK); + } } /** -- cgit From 0102ba1fd8af8c2719436eaadc743f940ab525c2 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Mon, 30 Jan 2017 10:44:58 +0000 Subject: drm/i915: Add early BXT sdv to the list of preproduction machines MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Extend intel_detect_preproduction_hw() to include BXT A and B steppings. Signed-off-by: Chris Wilson Cc: "Zanoni, Paulo R" Cc: Rodrigo Vivi Cc: Jani Nikula Cc: Ville Syrjälä Cc: Matt Roper Link: http://patchwork.freedesktop.org/patch/msgid/20170130104458.2653-3-chris@chris-wilson.co.uk Acked-by: Jani Nikula Reviewed-by: Joonas Lahtinen --- drivers/gpu/drm/i915/i915_drv.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/gpu/drm/i915/i915_drv.c b/drivers/gpu/drm/i915/i915_drv.c index 0f5dbd434991..0e99cb22362c 100644 --- a/drivers/gpu/drm/i915/i915_drv.c +++ b/drivers/gpu/drm/i915/i915_drv.c @@ -783,6 +783,7 @@ static void intel_detect_preproduction_hw(struct drm_i915_private *dev_priv) pre |= IS_HSW_EARLY_SDV(dev_priv); pre |= IS_SKL_REVID(dev_priv, 0, SKL_REVID_F0); + pre |= IS_BXT_REVID(dev_priv, 0, BXT_REVID_B_LAST); if (pre) { DRM_ERROR("This is a pre-production stepping. " -- cgit From 4fa6053efdc7a30720fffddb9df353b0273ce17f Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Sun, 29 Jan 2017 09:24:33 +0000 Subject: drm/i915: Record more information about the hanging contexts Include extra information such as the user_handle and hw_id so that userspace can identify which of their contexts hung, useful if they are performing self-diagnositics. Signed-off-by: Chris Wilson Cc: Mika Kuoppala Link: http://patchwork.freedesktop.org/patch/msgid/20170129092433.10483-1-chris@chris-wilson.co.uk Reviewed-by: Mika Kuoppala --- drivers/gpu/drm/i915/i915_drv.h | 14 +++++-- drivers/gpu/drm/i915/i915_gpu_error.c | 77 ++++++++++++++++++++++------------- 2 files changed, 59 insertions(+), 32 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index 457129b64d79..448e5d85d1e0 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -969,6 +969,16 @@ struct drm_i915_error_state { u32 semaphore_mboxes[I915_NUM_ENGINES - 1]; struct intel_instdone instdone; + struct drm_i915_error_context { + char comm[TASK_COMM_LEN]; + pid_t pid; + u32 handle; + u32 hw_id; + int ban_score; + int active; + int guilty; + } context; + struct drm_i915_error_object { u64 gtt_offset; u64 gtt_size; @@ -1002,10 +1012,6 @@ struct drm_i915_error_state { u32 pp_dir_base; }; } vm_info; - - pid_t pid; - char comm[TASK_COMM_LEN]; - int context_bans; } engine[I915_NUM_ENGINES]; struct drm_i915_error_buffer { diff --git a/drivers/gpu/drm/i915/i915_gpu_error.c b/drivers/gpu/drm/i915/i915_gpu_error.c index e5375323eb06..5283fe815a4d 100644 --- a/drivers/gpu/drm/i915/i915_gpu_error.c +++ b/drivers/gpu/drm/i915/i915_gpu_error.c @@ -384,6 +384,15 @@ static void error_print_request(struct drm_i915_error_state_buf *m, erq->head, erq->tail); } +static void error_print_context(struct drm_i915_error_state_buf *m, + const char *header, + struct drm_i915_error_context *ctx) +{ + err_printf(m, "%s%s[%d] user_handle %d hw_id %d, ban score %d guilty %d active %d\n", + header, ctx->comm, ctx->pid, ctx->handle, ctx->hw_id, + ctx->ban_score, ctx->guilty, ctx->active); +} + static void error_print_engine(struct drm_i915_error_state_buf *m, struct drm_i915_error_engine *ee) { @@ -457,6 +466,7 @@ static void error_print_engine(struct drm_i915_error_state_buf *m, error_print_request(m, " ELSP[0]: ", &ee->execlist[0]); error_print_request(m, " ELSP[1]: ", &ee->execlist[1]); + error_print_context(m, " Active context: ", &ee->context); } void i915_error_printf(struct drm_i915_error_state_buf *e, const char *f, ...) @@ -562,12 +572,12 @@ int i915_error_state_to_str(struct drm_i915_error_state_buf *m, for (i = 0; i < ARRAY_SIZE(error->engine); i++) { if (error->engine[i].hangcheck_stalled && - error->engine[i].pid != -1) { - err_printf(m, "Active process (on ring %s): %s [%d], context bans %d\n", + error->engine[i].context.pid) { + err_printf(m, "Active process (on ring %s): %s [%d], score %d\n", engine_str(i), - error->engine[i].comm, - error->engine[i].pid, - error->engine[i].context_bans); + error->engine[i].context.comm, + error->engine[i].context.pid, + error->engine[i].context.ban_score); } } err_printf(m, "Reset count: %u\n", error->reset_count); @@ -658,11 +668,13 @@ int i915_error_state_to_str(struct drm_i915_error_state_buf *m, obj = ee->batchbuffer; if (obj) { err_puts(m, dev_priv->engine[i]->name); - if (ee->pid != -1) - err_printf(m, " (submitted by %s [%d], bans %d)", - ee->comm, - ee->pid, - ee->context_bans); + if (ee->context.pid) + err_printf(m, " (submitted by %s [%d], ctx %d [%d], score %d)", + ee->context.comm, + ee->context.pid, + ee->context.handle, + ee->context.hw_id, + ee->context.ban_score); err_printf(m, " --- gtt_offset = 0x%08x %08x\n", upper_32_bits(obj->gtt_offset), lower_32_bits(obj->gtt_offset)); @@ -1267,6 +1279,28 @@ static void error_record_engine_execlists(struct intel_engine_cs *engine, &ee->execlist[n]); } +static void record_context(struct drm_i915_error_context *e, + struct i915_gem_context *ctx) +{ + if (ctx->pid) { + struct task_struct *task; + + rcu_read_lock(); + task = pid_task(ctx->pid, PIDTYPE_PID); + if (task) { + strcpy(e->comm, task->comm); + e->pid = task->pid; + } + rcu_read_unlock(); + } + + e->handle = ctx->user_handle; + e->hw_id = ctx->hw_id; + e->ban_score = ctx->ban_score; + e->guilty = ctx->guilty_count; + e->active = ctx->active_count; +} + static void i915_gem_record_rings(struct drm_i915_private *dev_priv, struct drm_i915_error_state *error) { @@ -1281,7 +1315,6 @@ static void i915_gem_record_rings(struct drm_i915_private *dev_priv, struct drm_i915_error_engine *ee = &error->engine[i]; struct drm_i915_gem_request *request; - ee->pid = -1; ee->engine_id = -1; if (!engine) @@ -1296,11 +1329,12 @@ static void i915_gem_record_rings(struct drm_i915_private *dev_priv, request = i915_gem_find_active_request(engine); if (request) { struct intel_ring *ring; - struct pid *pid; ee->vm = request->ctx->ppgtt ? &request->ctx->ppgtt->base : &ggtt->base; + record_context(&ee->context, request->ctx); + /* We need to copy these to an anonymous buffer * as the simplest method to avoid being overwritten * by userspace. @@ -1318,19 +1352,6 @@ static void i915_gem_record_rings(struct drm_i915_private *dev_priv, i915_error_object_create(dev_priv, request->ctx->engine[i].state); - pid = request->ctx->pid; - if (pid) { - struct task_struct *task; - - rcu_read_lock(); - task = pid_task(pid, PIDTYPE_PID); - if (task) { - strcpy(ee->comm, task->comm); - ee->pid = task->pid; - } - rcu_read_unlock(); - } - error->simulated |= i915_gem_context_no_error_capture(request->ctx); @@ -1534,12 +1555,12 @@ static void i915_error_capture_msg(struct drm_i915_private *dev_priv, "GPU HANG: ecode %d:%d:0x%08x", INTEL_GEN(dev_priv), engine_id, ecode); - if (engine_id != -1 && error->engine[engine_id].pid != -1) + if (engine_id != -1 && error->engine[engine_id].context.pid) len += scnprintf(error->error_msg + len, sizeof(error->error_msg) - len, ", in %s [%d]", - error->engine[engine_id].comm, - error->engine[engine_id].pid); + error->engine[engine_id].context.comm, + error->engine[engine_id].context.pid); scnprintf(error->error_msg + len, sizeof(error->error_msg) - len, ", reason: %s, action: %s", -- cgit From 517f188c6be5bcc71f92c049d91a26539482a6f8 Mon Sep 17 00:00:00 2001 From: Weinan Li Date: Thu, 26 Jan 2017 13:20:13 +0800 Subject: drm/i915: noop forcewake get/put when vgpu activated For a virtualized GPU, the host maintains the forcewake state on the real device. As we don't control forcewake ourselves, we can simply set force_wake_get() and force_wake_put() to be no-ops. By setting the vfuncs, we adjust both the manual control of forcewake and around the mmio accessors (making our vgpu specific mmio routines redundant and to be removed in the next patch). Signed-off-by: Weinan Li Reviewed-by: Chris Wilson Link: http://patchwork.freedesktop.org/patch/msgid/1485408013-12780-1-git-send-email-weinan.z.li@intel.com Signed-off-by: Chris Wilson --- drivers/gpu/drm/i915/intel_uncore.c | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/drivers/gpu/drm/i915/intel_uncore.c b/drivers/gpu/drm/i915/intel_uncore.c index abe08885a5ba..da81874dd4a5 100644 --- a/drivers/gpu/drm/i915/intel_uncore.c +++ b/drivers/gpu/drm/i915/intel_uncore.c @@ -132,6 +132,13 @@ fw_domains_put(struct drm_i915_private *dev_priv, enum forcewake_domains fw_doma } } +static void +vgpu_fw_domains_nop(struct drm_i915_private *dev_priv, + enum forcewake_domains fw_domains) +{ + /* Guest driver doesn't need to takes care forcewake. */ +} + static void fw_domains_posting_read(struct drm_i915_private *dev_priv) { @@ -1375,6 +1382,11 @@ static void intel_uncore_fw_domains_init(struct drm_i915_private *dev_priv) FORCEWAKE, FORCEWAKE_ACK); } + if (intel_vgpu_active(dev_priv)) { + dev_priv->uncore.funcs.force_wake_get = vgpu_fw_domains_nop; + dev_priv->uncore.funcs.force_wake_put = vgpu_fw_domains_nop; + } + /* All future platforms are expected to require complex power gating */ WARN_ON(dev_priv->uncore.fw_domains == 0); } -- cgit From 9b51b105f2a52a873107d7b28f0aca94e52ad7bd Mon Sep 17 00:00:00 2001 From: Weinan Li Date: Thu, 26 Jan 2017 13:23:48 +0800 Subject: drm/i915: clean up unused vgpu_read/write Having converted the force_wake_get/_put routines for a vGPU to be no-op, we can use the common mmio accessors and remove our specialised routines that simply skipped the calls to control force_wake. Signed-off-by: Weinan Li Reviewed-by: Chris Wilson Link: http://patchwork.freedesktop.org/patch/msgid/1485408228-12932-1-git-send-email-weinan.z.li@intel.com Signed-off-by: Chris Wilson --- drivers/gpu/drm/i915/intel_uncore.c | 58 ------------------------------------- 1 file changed, 58 deletions(-) diff --git a/drivers/gpu/drm/i915/intel_uncore.c b/drivers/gpu/drm/i915/intel_uncore.c index da81874dd4a5..3d243fefe09b 100644 --- a/drivers/gpu/drm/i915/intel_uncore.c +++ b/drivers/gpu/drm/i915/intel_uncore.c @@ -1052,34 +1052,6 @@ __gen6_read(64) #undef GEN6_READ_FOOTER #undef GEN6_READ_HEADER -#define VGPU_READ_HEADER(x) \ - unsigned long irqflags; \ - u##x val = 0; \ - assert_rpm_device_not_suspended(dev_priv); \ - spin_lock_irqsave(&dev_priv->uncore.lock, irqflags) - -#define VGPU_READ_FOOTER \ - spin_unlock_irqrestore(&dev_priv->uncore.lock, irqflags); \ - trace_i915_reg_rw(false, reg, val, sizeof(val), trace); \ - return val - -#define __vgpu_read(x) \ -static u##x \ -vgpu_read##x(struct drm_i915_private *dev_priv, i915_reg_t reg, bool trace) { \ - VGPU_READ_HEADER(x); \ - val = __raw_i915_read##x(dev_priv, reg); \ - VGPU_READ_FOOTER; \ -} - -__vgpu_read(8) -__vgpu_read(16) -__vgpu_read(32) -__vgpu_read(64) - -#undef __vgpu_read -#undef VGPU_READ_FOOTER -#undef VGPU_READ_HEADER - #define GEN2_WRITE_HEADER \ trace_i915_reg_rw(true, reg, val, sizeof(val), trace); \ assert_rpm_wakelock_held(dev_priv); \ @@ -1202,31 +1174,6 @@ __gen6_write(32) #undef GEN6_WRITE_FOOTER #undef GEN6_WRITE_HEADER -#define VGPU_WRITE_HEADER \ - unsigned long irqflags; \ - trace_i915_reg_rw(true, reg, val, sizeof(val), trace); \ - assert_rpm_device_not_suspended(dev_priv); \ - spin_lock_irqsave(&dev_priv->uncore.lock, irqflags) - -#define VGPU_WRITE_FOOTER \ - spin_unlock_irqrestore(&dev_priv->uncore.lock, irqflags) - -#define __vgpu_write(x) \ -static void vgpu_write##x(struct drm_i915_private *dev_priv, \ - i915_reg_t reg, u##x val, bool trace) { \ - VGPU_WRITE_HEADER; \ - __raw_i915_write##x(dev_priv, reg, val); \ - VGPU_WRITE_FOOTER; \ -} - -__vgpu_write(8) -__vgpu_write(16) -__vgpu_write(32) - -#undef __vgpu_write -#undef VGPU_WRITE_FOOTER -#undef VGPU_WRITE_HEADER - #define ASSIGN_WRITE_MMIO_VFUNCS(x) \ do { \ dev_priv->uncore.funcs.mmio_writeb = x##_write8; \ @@ -1461,11 +1408,6 @@ void intel_uncore_init(struct drm_i915_private *dev_priv) if (INTEL_GEN(dev_priv) >= 8) intel_shadow_table_check(); - if (intel_vgpu_active(dev_priv)) { - ASSIGN_WRITE_MMIO_VFUNCS(vgpu); - ASSIGN_READ_MMIO_VFUNCS(vgpu); - } - i915_check_and_clear_faults(dev_priv); } #undef ASSIGN_WRITE_MMIO_VFUNCS -- cgit From 4703b0472e126c715019a9671ea0fe38556114bb Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Tue, 31 Jan 2017 10:46:30 +0000 Subject: drm/i915: Be defensive when cleaning up i915_gem_internal pages If we abort the i915_gem_internal get_pages, we mark the failing sg as the last. However, that means we iterate upto and including the failing sg element and results in us trying to free the unallocated sg_page(). Signed-off-by: Chris Wilson Link: http://patchwork.freedesktop.org/patch/msgid/20170131104630.3074-1-chris@chris-wilson.co.uk Reviewed-by: Matthew Auld --- drivers/gpu/drm/i915/i915_gem_internal.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_gem_internal.c b/drivers/gpu/drm/i915/i915_gem_internal.c index 17ce53d0d092..64d8fb3fd764 100644 --- a/drivers/gpu/drm/i915/i915_gem_internal.c +++ b/drivers/gpu/drm/i915/i915_gem_internal.c @@ -35,8 +35,10 @@ static void internal_free_pages(struct sg_table *st) { struct scatterlist *sg; - for (sg = st->sgl; sg; sg = __sg_next(sg)) - __free_pages(sg_page(sg), get_order(sg->length)); + for (sg = st->sgl; sg; sg = __sg_next(sg)) { + if (sg_page(sg)) + __free_pages(sg_page(sg), get_order(sg->length)); + } sg_free_table(st); kfree(st); @@ -116,6 +118,7 @@ i915_gem_object_get_pages_internal(struct drm_i915_gem_object *obj) return st; err: + sg_set_page(sg, NULL, 0, 0); sg_mark_end(sg); internal_free_pages(st); return ERR_PTR(-ENOMEM); -- cgit From c88473878d47131ccfc67a00ba688d4d7d0f4519 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Fri, 27 Jan 2017 16:55:30 +0000 Subject: drm/i915: Treat stolen memory as DMA addresses MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The conversion of stolen to use phys_addr_t (from essentially u32) sparked an interesting discussion. We treat stolen memory as only accessible from the GPU (the DMA device) - an attempt to use it from the CPU will generate a MCE on gen6 onwards, although it is in theory a physical address that can be dereferenced from the CPU as demonstrated by earlier generations. As such, using phys_addr_t has the wrong connotations and as we pass the address into the DMA device via dma_addr_t (through the scatterlists used to program the GTT entries), we should treat it as dma_addr_t throughout. Signed-off-by: Chris Wilson Cc: Paulo Zanoni Cc: Ville Syrjälä Link: http://patchwork.freedesktop.org/patch/msgid/20170127165531.28135-1-chris@chris-wilson.co.uk Reviewed-by: Paulo Zanoni --- drivers/gpu/drm/i915/i915_drv.h | 2 +- drivers/gpu/drm/i915/i915_gem_stolen.c | 36 +++++++++++++++++----------------- 2 files changed, 19 insertions(+), 19 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index 448e5d85d1e0..15f0fc48c442 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -1461,7 +1461,7 @@ struct i915_gem_mm { struct work_struct free_work; /** Usable portion of the GTT for GEM */ - phys_addr_t stolen_base; /* limited to low memory (32-bit) */ + dma_addr_t stolen_base; /* limited to low memory (32-bit) */ /** PPGTT used for aliasing the PPGTT with the GTT */ struct i915_hw_ppgtt *aliasing_ppgtt; diff --git a/drivers/gpu/drm/i915/i915_gem_stolen.c b/drivers/gpu/drm/i915/i915_gem_stolen.c index 729dba017b78..a75de8c6ece0 100644 --- a/drivers/gpu/drm/i915/i915_gem_stolen.c +++ b/drivers/gpu/drm/i915/i915_gem_stolen.c @@ -79,12 +79,12 @@ void i915_gem_stolen_remove_node(struct drm_i915_private *dev_priv, mutex_unlock(&dev_priv->mm.stolen_lock); } -static phys_addr_t i915_stolen_to_physical(struct drm_i915_private *dev_priv) +static dma_addr_t i915_stolen_to_dma(struct drm_i915_private *dev_priv) { struct pci_dev *pdev = dev_priv->drm.pdev; struct i915_ggtt *ggtt = &dev_priv->ggtt; struct resource *r; - phys_addr_t base; + dma_addr_t base; /* Almost universally we can find the Graphics Base of Stolen Memory * at register BSM (0x5c) in the igfx configuration space. On a few @@ -196,7 +196,7 @@ static phys_addr_t i915_stolen_to_physical(struct drm_i915_private *dev_priv) if (INTEL_GEN(dev_priv) <= 4 && !IS_G33(dev_priv) && !IS_PINEVIEW(dev_priv) && !IS_G4X(dev_priv)) { struct { - phys_addr_t start, end; + dma_addr_t start, end; } stolen[2] = { { .start = base, .end = base + ggtt->stolen_size, }, { .start = base, .end = base + ggtt->stolen_size, }, @@ -228,12 +228,12 @@ static phys_addr_t i915_stolen_to_physical(struct drm_i915_private *dev_priv) if (stolen[0].start != stolen[1].start || stolen[0].end != stolen[1].end) { - phys_addr_t end = base + ggtt->stolen_size - 1; + dma_addr_t end = base + ggtt->stolen_size - 1; DRM_DEBUG_KMS("GTT within stolen memory at 0x%llx-0x%llx\n", (unsigned long long)ggtt_start, (unsigned long long)ggtt_end - 1); - DRM_DEBUG_KMS("Stolen memory adjusted to %pa - %pa\n", + DRM_DEBUG_KMS("Stolen memory adjusted to %pad - %pad\n", &base, &end); } } @@ -263,9 +263,9 @@ static phys_addr_t i915_stolen_to_physical(struct drm_i915_private *dev_priv) * range. Apparently this works. */ if (r == NULL && !IS_GEN3(dev_priv)) { - phys_addr_t end = base + ggtt->stolen_size; + dma_addr_t end = base + ggtt->stolen_size; - DRM_ERROR("conflict detected with stolen region: [%pa - %pa]\n", + DRM_ERROR("conflict detected with stolen region: [%pad - %pad]\n", &base, &end); base = 0; } @@ -285,13 +285,13 @@ void i915_gem_cleanup_stolen(struct drm_device *dev) } static void g4x_get_stolen_reserved(struct drm_i915_private *dev_priv, - phys_addr_t *base, u32 *size) + dma_addr_t *base, u32 *size) { struct i915_ggtt *ggtt = &dev_priv->ggtt; uint32_t reg_val = I915_READ(IS_GM45(dev_priv) ? CTG_STOLEN_RESERVED : ELK_STOLEN_RESERVED); - phys_addr_t stolen_top = dev_priv->mm.stolen_base + ggtt->stolen_size; + dma_addr_t stolen_top = dev_priv->mm.stolen_base + ggtt->stolen_size; *base = (reg_val & G4X_STOLEN_RESERVED_ADDR2_MASK) << 16; @@ -308,7 +308,7 @@ static void g4x_get_stolen_reserved(struct drm_i915_private *dev_priv, } static void gen6_get_stolen_reserved(struct drm_i915_private *dev_priv, - phys_addr_t *base, u32 *size) + dma_addr_t *base, u32 *size) { uint32_t reg_val = I915_READ(GEN6_STOLEN_RESERVED); @@ -334,7 +334,7 @@ static void gen6_get_stolen_reserved(struct drm_i915_private *dev_priv, } static void gen7_get_stolen_reserved(struct drm_i915_private *dev_priv, - phys_addr_t *base, u32 *size) + dma_addr_t *base, u32 *size) { uint32_t reg_val = I915_READ(GEN6_STOLEN_RESERVED); @@ -354,7 +354,7 @@ static void gen7_get_stolen_reserved(struct drm_i915_private *dev_priv, } static void chv_get_stolen_reserved(struct drm_i915_private *dev_priv, - phys_addr_t *base, u32 *size) + dma_addr_t *base, u32 *size) { uint32_t reg_val = I915_READ(GEN6_STOLEN_RESERVED); @@ -380,11 +380,11 @@ static void chv_get_stolen_reserved(struct drm_i915_private *dev_priv, } static void bdw_get_stolen_reserved(struct drm_i915_private *dev_priv, - phys_addr_t *base, u32 *size) + dma_addr_t *base, u32 *size) { struct i915_ggtt *ggtt = &dev_priv->ggtt; uint32_t reg_val = I915_READ(GEN6_STOLEN_RESERVED); - phys_addr_t stolen_top; + dma_addr_t stolen_top; stolen_top = dev_priv->mm.stolen_base + ggtt->stolen_size; @@ -403,7 +403,7 @@ static void bdw_get_stolen_reserved(struct drm_i915_private *dev_priv, int i915_gem_init_stolen(struct drm_i915_private *dev_priv) { struct i915_ggtt *ggtt = &dev_priv->ggtt; - phys_addr_t reserved_base, stolen_top; + dma_addr_t reserved_base, stolen_top; u32 reserved_total, reserved_size; u32 stolen_usable_start; @@ -419,7 +419,7 @@ int i915_gem_init_stolen(struct drm_i915_private *dev_priv) if (ggtt->stolen_size == 0) return 0; - dev_priv->mm.stolen_base = i915_stolen_to_physical(dev_priv); + dev_priv->mm.stolen_base = i915_stolen_to_dma(dev_priv); if (dev_priv->mm.stolen_base == 0) return 0; @@ -468,8 +468,8 @@ int i915_gem_init_stolen(struct drm_i915_private *dev_priv) if (reserved_base < dev_priv->mm.stolen_base || reserved_base + reserved_size > stolen_top) { - phys_addr_t reserved_top = reserved_base + reserved_size; - DRM_DEBUG_KMS("Stolen reserved area [%pa - %pa] outside stolen memory [%pa - %pa]\n", + dma_addr_t reserved_top = reserved_base + reserved_size; + DRM_DEBUG_KMS("Stolen reserved area [%pad - %pad] outside stolen memory [%pad - %pad]\n", &reserved_base, &reserved_top, &dev_priv->mm.stolen_base, &stolen_top); return 0; -- cgit From 1692cd60d999b00a0491692dab0286e6011abd36 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Mon, 30 Jan 2017 13:47:21 +0000 Subject: drm/i915: Sanity check the computed size and base of stolen memory Just do a quick check that the stolen memory address range doesn't overflow our chosen integer type. v2: Add add_overflows() to utils with the promise that gcc7 can do this better than C and then maybe it will have a proper definition in core. Signed-off-by: Chris Wilson Cc: Joonas Lahtinen Link: http://patchwork.freedesktop.org/patch/msgid/20170130134721.5159-1-chris@chris-wilson.co.uk Reviewed-by: Joonas Lahtinen --- drivers/gpu/drm/i915/i915_gem_stolen.c | 2 +- drivers/gpu/drm/i915/i915_utils.h | 11 +++++++++++ 2 files changed, 12 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/i915/i915_gem_stolen.c b/drivers/gpu/drm/i915/i915_gem_stolen.c index a75de8c6ece0..cba12ee90cbf 100644 --- a/drivers/gpu/drm/i915/i915_gem_stolen.c +++ b/drivers/gpu/drm/i915/i915_gem_stolen.c @@ -189,7 +189,7 @@ static dma_addr_t i915_stolen_to_dma(struct drm_i915_private *dev_priv) base = tom - tseg_size - ggtt->stolen_size; } - if (base == 0) + if (base == 0 || add_overflows(base, ggtt->stolen_size)) return 0; /* make sure we don't clobber the GTT if it's within stolen memory */ diff --git a/drivers/gpu/drm/i915/i915_utils.h b/drivers/gpu/drm/i915/i915_utils.h index 34020873e1f6..b8ba0f2f92af 100644 --- a/drivers/gpu/drm/i915/i915_utils.h +++ b/drivers/gpu/drm/i915/i915_utils.h @@ -25,6 +25,17 @@ #ifndef __I915_UTILS_H #define __I915_UTILS_H +#if GCC_VERSION >= 70000 +#define add_overflows(A, B) \ + __builtin_add_overflow_p((A), (B), (typeof((A) + (B)))0) +#else +#define add_overflows(A, B) ({ \ + typeof(A) a = (A); \ + typeof(B) b = (B); \ + a + b < a; \ +}) +#endif + #define range_overflows(start, size, max) ({ \ typeof(start) start__ = (start); \ typeof(size) size__ = (size); \ -- cgit From 69aeafeae9b30d797c439a30d1a4ccc8dc5b0eb0 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Mon, 9 Jan 2017 11:19:32 +0000 Subject: drm/i915: Flush untouched framebuffers before display on !llc On a non-llc system, the objects are created with .cache_level = CACHE_NONE and so the transition to uncached for scanout is a no-op. However, if the object was never written to, it will still be in the CPU domain (having been zeroed out by shmemfs). Those cachelines need to be flushed prior to display. Reported-and-tested-by: Vito Caputo Fixes: a6a7cc4b7db6 ("drm/i915: Always flush the dirty CPU cache when pinning the scanout") Signed-off-by: Chris Wilson Cc: # v4.10-rc1+ Link: http://patchwork.freedesktop.org/patch/msgid/20170109111932.6342-1-chris@chris-wilson.co.uk Reviewed-by: Daniel Vetter --- drivers/gpu/drm/i915/i915_gem.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index 2522d4895ff7..f3b5cb497921 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -3519,7 +3519,7 @@ i915_gem_object_pin_to_display_plane(struct drm_i915_gem_object *obj, vma->display_alignment = max_t(u64, vma->display_alignment, alignment); /* Treat this as an end-of-frame, like intel_user_framebuffer_dirty() */ - if (obj->cache_dirty) { + if (obj->cache_dirty || obj->base.write_domain == I915_GEM_DOMAIN_CPU) { i915_gem_clflush_object(obj, true); intel_fb_obj_flush(obj, false, ORIGIN_DIRTYFB); } -- cgit From 1881a4234ef03751daf55b62b17e6bb0dbf7792a Mon Sep 17 00:00:00 2001 From: Uma Shankar Date: Wed, 25 Jan 2017 19:43:23 +0530 Subject: drm/i915: Add MIPI_IO WA and program DSI regulators Enable MIPI IO WA for BXT DSI as per bspec and program the DSI regulators. v2: Moved IO enable to pre-enable as per Mika's review comments. Also reused the existing register definition for BXT_P_CR_GT_DISP_PWRON. v3: Added Programming the DSI regulators as per disable/enable sequences. v4: Restricting regulator changes to BXT as suggested by Jani/Mika v5: Removed redundant read/modify for regulator register as per Jani's comment. Maintain enable/disable symmetry as per spec. Signed-off-by: Uma Shankar Signed-off-by: Vidya Srinivas Acked-by: Mika Kahola Reviewed-by: Jani Nikula Signed-off-by: Jani Nikula Link: http://patchwork.freedesktop.org/patch/msgid/1485353603-11260-1-git-send-email-vidya.srinivas@intel.com --- drivers/gpu/drm/i915/i915_reg.h | 7 +++++++ drivers/gpu/drm/i915/intel_dsi.c | 24 ++++++++++++++++++++++++ 2 files changed, 31 insertions(+) diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h index 02a65ddae3a3..a9538ae81673 100644 --- a/drivers/gpu/drm/i915/i915_reg.h +++ b/drivers/gpu/drm/i915/i915_reg.h @@ -1552,6 +1552,7 @@ enum skl_disp_power_wells { _MMIO(_BXT_PHY_CH(phy, ch, reg_ch0, reg_ch1)) #define BXT_P_CR_GT_DISP_PWRON _MMIO(0x138090) +#define MIPIO_RST_CTRL (1 << 2) #define _BXT_PHY_CTL_DDI_A 0x64C00 #define _BXT_PHY_CTL_DDI_B 0x64C10 @@ -8361,6 +8362,12 @@ enum { #define _BXT_MIPIC_PORT_CTRL 0x6B8C0 #define BXT_MIPI_PORT_CTRL(tc) _MMIO_MIPI(tc, _BXT_MIPIA_PORT_CTRL, _BXT_MIPIC_PORT_CTRL) +#define BXT_P_DSI_REGULATOR_CFG _MMIO(0x160020) +#define STAP_SELECT (1 << 0) + +#define BXT_P_DSI_REGULATOR_TX_CTRL _MMIO(0x160054) +#define HS_IO_CTRL_SELECT (1 << 0) + #define DPI_ENABLE (1 << 31) /* A + C */ #define MIPIA_MIPI4DPHY_DELAY_COUNT_SHIFT 27 #define MIPIA_MIPI4DPHY_DELAY_COUNT_MASK (0xf << 27) diff --git a/drivers/gpu/drm/i915/intel_dsi.c b/drivers/gpu/drm/i915/intel_dsi.c index 16732e7bc08e..c98234eca2a6 100644 --- a/drivers/gpu/drm/i915/intel_dsi.c +++ b/drivers/gpu/drm/i915/intel_dsi.c @@ -548,6 +548,7 @@ static void intel_dsi_pre_enable(struct intel_encoder *encoder, struct drm_i915_private *dev_priv = to_i915(encoder->base.dev); struct intel_dsi *intel_dsi = enc_to_intel_dsi(&encoder->base); enum port port; + u32 val; DRM_DEBUG_KMS("\n"); @@ -558,6 +559,17 @@ static void intel_dsi_pre_enable(struct intel_encoder *encoder, intel_disable_dsi_pll(encoder); intel_enable_dsi_pll(encoder, pipe_config); + if (IS_BROXTON(dev_priv)) { + /* Add MIPI IO reset programming for modeset */ + val = I915_READ(BXT_P_CR_GT_DISP_PWRON); + I915_WRITE(BXT_P_CR_GT_DISP_PWRON, + val | MIPIO_RST_CTRL); + + /* Power up DSI regulator */ + I915_WRITE(BXT_P_DSI_REGULATOR_CFG, STAP_SELECT); + I915_WRITE(BXT_P_DSI_REGULATOR_TX_CTRL, 0); + } + intel_dsi_prepare(encoder, pipe_config); /* Panel Enable over CRC PMIC */ @@ -707,6 +719,7 @@ static void intel_dsi_post_disable(struct intel_encoder *encoder, { struct drm_i915_private *dev_priv = to_i915(encoder->base.dev); struct intel_dsi *intel_dsi = enc_to_intel_dsi(&encoder->base); + u32 val; DRM_DEBUG_KMS("\n"); @@ -714,6 +727,17 @@ static void intel_dsi_post_disable(struct intel_encoder *encoder, intel_dsi_clear_device_ready(encoder); + if (IS_BROXTON(dev_priv)) { + /* Power down DSI regulator to save power */ + I915_WRITE(BXT_P_DSI_REGULATOR_CFG, STAP_SELECT); + I915_WRITE(BXT_P_DSI_REGULATOR_TX_CTRL, HS_IO_CTRL_SELECT); + + /* Add MIPI IO reset programming for modeset */ + val = I915_READ(BXT_P_CR_GT_DISP_PWRON); + I915_WRITE(BXT_P_CR_GT_DISP_PWRON, + val & ~MIPIO_RST_CTRL); + } + intel_disable_dsi_pll(encoder); if (IS_VALLEYVIEW(dev_priv) || IS_CHERRYVIEW(dev_priv)) { -- cgit From e2de845e0b84ebb7e910bb898febbf813b7df483 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Wed, 1 Feb 2017 12:53:38 +0000 Subject: drm/i915/execlists: Skip resetting RING_CONTEXT_STATUS_PTR As we now flag when the GPU signals a context-switch and do not read the status register before we see that signal, we do not have to ensure that it is cleared upon reset (and can leave it to the GPU to reset it from the power context). Signed-off-by: Chris Wilson Cc: Mika Kuoppala Cc: Tvrtko Ursulin Link: http://patchwork.freedesktop.org/patch/msgid/20170201125338.12932-1-chris@chris-wilson.co.uk Reviewed-by: Mika Kuoppala --- drivers/gpu/drm/i915/intel_lrc.c | 2 -- 1 file changed, 2 deletions(-) diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c index 0e99d53d5523..753458452997 100644 --- a/drivers/gpu/drm/i915/intel_lrc.c +++ b/drivers/gpu/drm/i915/intel_lrc.c @@ -1317,7 +1317,6 @@ static int gen9_init_render_ring(struct intel_engine_cs *engine) static void reset_common_ring(struct intel_engine_cs *engine, struct drm_i915_gem_request *request) { - struct drm_i915_private *dev_priv = engine->i915; struct execlist_port *port = engine->execlist_port; struct intel_context *ce = &request->ctx->engine[engine->id]; @@ -1344,7 +1343,6 @@ static void reset_common_ring(struct intel_engine_cs *engine, return; /* Catch up with any missed context-switch interrupts */ - I915_WRITE(RING_CONTEXT_STATUS_PTR(engine), _MASKED_FIELD(0xffff, 0)); if (request->ctx != port[0].request->ctx) { i915_gem_request_put(port[0].request); port[0] = port[1]; -- cgit From 453cfe21711f644e744e834d5c61042b2fac218d Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Wed, 1 Feb 2017 13:12:22 +0000 Subject: drm/i915/execlists: Add interrupt-pending check to intel_execlists_idle() Primarily this serves as a sanity check that the bit has been cleared before we suspend (and hasn't reappeared after resume). Signed-off-by: Chris Wilson Cc: Mika Kuoppala Cc: Tvrtko Ursulin Link: http://patchwork.freedesktop.org/patch/msgid/20170201131222.11882-1-chris@chris-wilson.co.uk Reviewed-by: Mika Kuoppala --- drivers/gpu/drm/i915/intel_lrc.c | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c index 753458452997..44a92ea464ba 100644 --- a/drivers/gpu/drm/i915/intel_lrc.c +++ b/drivers/gpu/drm/i915/intel_lrc.c @@ -508,9 +508,15 @@ bool intel_execlists_idle(struct drm_i915_private *dev_priv) if (!i915.enable_execlists) return true; - for_each_engine(engine, dev_priv, id) + for_each_engine(engine, dev_priv, id) { + /* Interrupt/tasklet pending? */ + if (test_bit(ENGINE_IRQ_EXECLIST, &engine->irq_posted)) + return false; + + /* Both ports drained, no more ELSP submission? */ if (!execlists_elsp_idle(engine)) return false; + } return true; } -- cgit From a667fb402c1e856209bf9e77ba41fc1cf356b867 Mon Sep 17 00:00:00 2001 From: Maarten Lankhorst Date: Thu, 15 Dec 2016 15:29:44 +0100 Subject: drm/i915: Disable all crtcs during driver unload, v2. We may keep the crtc's enabled when userspace unsets all framebuffers but keeps the crtc active. This exposes a WARN in fbc_global disable, and a lot of bugs in our hardware readout code. Solve this by disabling all crtc's for now. Changes since v1: - Use lock_all_ctx instead of lock_all. Signed-off-by: Maarten Lankhorst Link: http://patchwork.freedesktop.org/patch/msgid/1481812185-19098-4-git-send-email-maarten.lankhorst@linux.intel.com Reviewed-by: Daniel Vetter --- drivers/gpu/drm/i915/i915_drv.c | 21 +++++++++++++++++++++ 1 file changed, 21 insertions(+) diff --git a/drivers/gpu/drm/i915/i915_drv.c b/drivers/gpu/drm/i915/i915_drv.c index 0e99cb22362c..0667f480df97 100644 --- a/drivers/gpu/drm/i915/i915_drv.c +++ b/drivers/gpu/drm/i915/i915_drv.c @@ -43,6 +43,7 @@ #include #include +#include #include #include "i915_drv.h" @@ -1307,6 +1308,8 @@ void i915_driver_unload(struct drm_device *dev) { struct drm_i915_private *dev_priv = to_i915(dev); struct pci_dev *pdev = dev_priv->drm.pdev; + struct drm_modeset_acquire_ctx ctx; + int ret; intel_fbdev_fini(dev); @@ -1315,6 +1318,24 @@ void i915_driver_unload(struct drm_device *dev) intel_display_power_get(dev_priv, POWER_DOMAIN_INIT); + drm_modeset_acquire_init(&ctx, 0); + while (1) { + ret = drm_modeset_lock_all_ctx(dev, &ctx); + if (!ret) + ret = drm_atomic_helper_disable_all(dev, &ctx); + + if (ret != -EDEADLK) + break; + + drm_modeset_backoff(&ctx); + } + + if (ret) + DRM_ERROR("Disabling all crtc's during unload failed with %i\n", ret); + + drm_modeset_drop_locks(&ctx); + drm_modeset_acquire_fini(&ctx); + i915_driver_unregister(dev_priv); drm_vblank_cleanup(dev); -- cgit From 37255d8d3f4d0c88fc9f60f50bb4c3c30e6688cc Mon Sep 17 00:00:00 2001 From: Maarten Lankhorst Date: Thu, 15 Dec 2016 15:29:43 +0100 Subject: drm/i915: Fix POWER_DOMAIN_AUDIO refcounting. If the crtc was brought up with audio before the driver loads, then crtc_disable will remove a refcount to audio that doesn't exist before. Fortunately we already set power domains on readout, so we can just add the power domain handling to get_crtc_power_domains, which will update the power domains correctly in all cases. This was found when testing module reload on CI with the crtc enabled, which resulted in the following warn after module reload + modeset: [ 24.197041] ------------[ cut here ]------------ [ 24.197075] WARNING: CPU: 0 PID: 99 at drivers/gpu/drm/i915/intel_runtime_pm.c:1790 intel_display_power_put+0x134/0x140 [i915] [ 24.197076] Use count on domain AUDIO is already zero [ 24.197098] CPU: 0 PID: 99 Comm: kworker/u8:2 Not tainted 4.9.0-CI-Trybot_393+ #1 [ 24.197099] Hardware name: /NUC6i5SYB, BIOS SYSKLi35.86A.0042.2016.0409.1246 04/09/2016 [ 24.197102] Workqueue: events_unbound async_run_entry_fn [ 24.197105] ffffc900003c7688 ffffffff81435b35 ffffc900003c76d8 0000000000000000 [ 24.197107] ffffc900003c76c8 ffffffff8107e4d6 000006fe5dc36f28 ffff88025dc30054 [ 24.197109] ffff88025dc36f28 ffff88025dc30000 ffff88025dc30000 0000000000000015 [ 24.197110] Call Trace: [ 24.197113] [] dump_stack+0x67/0x92 [ 24.197116] [] __warn+0xc6/0xe0 [ 24.197118] [] warn_slowpath_fmt+0x4a/0x50 [ 24.197149] [] intel_display_power_put+0x134/0x140 [i915] [ 24.197187] [] intel_disable_ddi+0x4d/0x80 [i915] [ 24.197223] [] intel_encoders_disable.isra.74+0x7f/0x90 [i915] [ 24.197257] [] haswell_crtc_disable+0x55/0x170 [i915] [ 24.197292] [] intel_atomic_commit_tail+0x108/0xfd0 [i915] [ 24.197295] [] ? __lock_is_held+0x66/0x90 [ 24.197330] [] intel_atomic_commit+0x429/0x560 [i915] [ 24.197332] [] ?drm_atomic_add_affected_connectors+0x56/0xf0 [ 24.197334] [] drm_atomic_commit+0x46/0x50 [ 24.197336] [] restore_fbdev_mode+0x147/0x270 [ 24.197337] [] drm_fb_helper_restore_fbdev_mode_unlocked+0x2e/0x70 [ 24.197339] [] drm_fb_helper_set_par+0x28/0x50 [ 24.197374] [] intel_fbdev_set_par+0x13/0x70 [i915] [ 24.197376] [] fbcon_init+0x57a/0x600 [ 24.197379] [] visual_init+0xd1/0x130 [ 24.197381] [] do_bind_con_driver+0x1bc/0x3a0 [ 24.197384] [] do_take_over_console+0x111/0x180 [ 24.197386] [] do_fbcon_takeover+0x52/0xb0 [ 24.197387] [] fbcon_event_notify+0x723/0x850 [ 24.197390] [] ?__blocking_notifier_call_chain+0x30/0x70 [ 24.197392] [] notifier_call_chain+0x34/0xa0 [ 24.197394] [] __blocking_notifier_call_chain+0x48/0x70 [ 24.197397] [] blocking_notifier_call_chain+0x11/0x20 [ 24.197398] [] fb_notifier_call_chain+0x16/0x20 [ 24.197400] [] register_framebuffer+0x24c/0x330 [ 24.197402] [] drm_fb_helper_initial_config+0x219/0x3c0 [ 24.197436] [] intel_fbdev_initial_config+0x13/0x30 [i915] [ 24.197438] [] async_run_entry_fn+0x34/0x140 [ 24.197440] [] process_one_work+0x1ec/0x6b0 [ 24.197442] [] ? process_one_work+0x166/0x6b0 [ 24.197445] [] worker_thread+0x49/0x490 [ 24.197447] [] ? process_one_work+0x6b0/0x6b0 [ 24.197448] [] kthread+0xeb/0x110 [ 24.197451] [] ? kthread_park+0x60/0x60 [ 24.197453] [] ret_from_fork+0x27/0x40 [ 24.197476] ---[ end trace bda64b683b8e8162 ]--- Signed-off-by: Maarten Lankhorst Link: http://patchwork.freedesktop.org/patch/msgid/1481812185-19098-3-git-send-email-maarten.lankhorst@linux.intel.com Reviewed-by: Daniel Vetter --- drivers/gpu/drm/i915/intel_ddi.c | 14 ++------------ drivers/gpu/drm/i915/intel_display.c | 4 ++++ drivers/gpu/drm/i915/intel_dp_mst.c | 9 ++------- 3 files changed, 8 insertions(+), 19 deletions(-) diff --git a/drivers/gpu/drm/i915/intel_ddi.c b/drivers/gpu/drm/i915/intel_ddi.c index 9a9a670f8549..d957211c76ad 100644 --- a/drivers/gpu/drm/i915/intel_ddi.c +++ b/drivers/gpu/drm/i915/intel_ddi.c @@ -1835,8 +1835,6 @@ static void intel_enable_ddi(struct intel_encoder *intel_encoder, struct drm_connector_state *conn_state) { struct drm_encoder *encoder = &intel_encoder->base; - struct drm_crtc *crtc = encoder->crtc; - struct intel_crtc *intel_crtc = to_intel_crtc(crtc); struct drm_i915_private *dev_priv = to_i915(encoder->dev); enum port port = intel_ddi_get_encoder_port(intel_encoder); int type = intel_encoder->type; @@ -1863,10 +1861,8 @@ static void intel_enable_ddi(struct intel_encoder *intel_encoder, intel_edp_drrs_enable(intel_dp, pipe_config); } - if (intel_crtc->config->has_audio) { - intel_display_power_get(dev_priv, POWER_DOMAIN_AUDIO); + if (pipe_config->has_audio) intel_audio_codec_enable(intel_encoder, pipe_config, conn_state); - } } static void intel_disable_ddi(struct intel_encoder *intel_encoder, @@ -1874,16 +1870,10 @@ static void intel_disable_ddi(struct intel_encoder *intel_encoder, struct drm_connector_state *old_conn_state) { struct drm_encoder *encoder = &intel_encoder->base; - struct drm_crtc *crtc = encoder->crtc; - struct intel_crtc *intel_crtc = to_intel_crtc(crtc); int type = intel_encoder->type; - struct drm_device *dev = encoder->dev; - struct drm_i915_private *dev_priv = to_i915(dev); - if (intel_crtc->config->has_audio) { + if (old_crtc_state->has_audio) intel_audio_codec_disable(intel_encoder); - intel_display_power_put(dev_priv, POWER_DOMAIN_AUDIO); - } if (type == INTEL_OUTPUT_EDP) { struct intel_dp *intel_dp = enc_to_intel_dp(encoder); diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c index b44e9466d394..88689a0b4183 100644 --- a/drivers/gpu/drm/i915/intel_display.c +++ b/drivers/gpu/drm/i915/intel_display.c @@ -5730,6 +5730,7 @@ static unsigned long get_crtc_power_domains(struct drm_crtc *crtc, struct intel_crtc_state *crtc_state) { struct drm_device *dev = crtc->dev; + struct drm_i915_private *dev_priv = to_i915(dev); struct drm_encoder *encoder; struct intel_crtc *intel_crtc = to_intel_crtc(crtc); enum pipe pipe = intel_crtc->pipe; @@ -5751,6 +5752,9 @@ static unsigned long get_crtc_power_domains(struct drm_crtc *crtc, mask |= BIT(intel_display_port_power_domain(intel_encoder)); } + if (HAS_DDI(dev_priv) && crtc_state->has_audio) + mask |= BIT(POWER_DOMAIN_AUDIO); + if (crtc_state->shared_dpll) mask |= BIT(POWER_DOMAIN_PLLS); diff --git a/drivers/gpu/drm/i915/intel_dp_mst.c b/drivers/gpu/drm/i915/intel_dp_mst.c index 95267fcef71b..6a85d388f936 100644 --- a/drivers/gpu/drm/i915/intel_dp_mst.c +++ b/drivers/gpu/drm/i915/intel_dp_mst.c @@ -92,7 +92,6 @@ static void intel_mst_disable_dp(struct intel_encoder *encoder, struct intel_dp *intel_dp = &intel_dig_port->dp; struct intel_connector *connector = to_intel_connector(old_conn_state->connector); - struct drm_i915_private *dev_priv = to_i915(encoder->base.dev); int ret; DRM_DEBUG_KMS("%d\n", intel_dp->active_mst_links); @@ -103,10 +102,8 @@ static void intel_mst_disable_dp(struct intel_encoder *encoder, if (ret) { DRM_ERROR("failed to update payload %d\n", ret); } - if (old_crtc_state->has_audio) { + if (old_crtc_state->has_audio) intel_audio_codec_disable(encoder); - intel_display_power_put(dev_priv, POWER_DOMAIN_AUDIO); - } } static void intel_mst_post_disable_dp(struct intel_encoder *encoder, @@ -219,10 +216,8 @@ static void intel_mst_enable_dp(struct intel_encoder *encoder, ret = drm_dp_check_act_status(&intel_dp->mst_mgr); ret = drm_dp_update_payload_part2(&intel_dp->mst_mgr); - if (pipe_config->has_audio) { - intel_display_power_get(dev_priv, POWER_DOMAIN_AUDIO); + if (pipe_config->has_audio) intel_audio_codec_enable(encoder, pipe_config, conn_state); - } } static bool intel_dp_mst_enc_get_hw_state(struct intel_encoder *encoder, -- cgit From 5584f1b1d73e9cc95092734c316e467c6c4468f9 Mon Sep 17 00:00:00 2001 From: Juergen Gross Date: Thu, 2 Feb 2017 10:47:11 +0100 Subject: drm/i915: fix i915 running as dom0 under Xen Commit 920cf4194954ec ("drm/i915: Introduce an internal allocator for disposable private objects") introduced a regression for the kernel running as Xen dom0: when switching to graphics mode a GPU HANG occurred. Reason seems to be a missing adaption similar to that done in commit 7453c549f5f648 ("swiotlb: Export swiotlb_max_segment to users") to i915_gem_object_get_pages_internal(). So limit the maximum page order to be used according to the maximum swiotlb segment size instead to the complete swiotlb size. Fixes: 920cf4194954 ("drm/i915: Introduce an internal allocator for disposable private objects") Signed-off-by: Juergen Gross Link: http://patchwork.freedesktop.org/patch/msgid/20170202094711.939-1-jgross@suse.com Cc: Chris Wilson Cc: Tvrtko Ursulin Cc: Daniel Vetter Cc: Jani Nikula Cc: intel-gfx@lists.freedesktop.org Cc: # v4.10-rc1+ Reviewed-by: Tvrtko Ursulin Signed-off-by: Chris Wilson --- drivers/gpu/drm/i915/i915_gem_internal.c | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_gem_internal.c b/drivers/gpu/drm/i915/i915_gem_internal.c index 64d8fb3fd764..2b9d5e94a8ae 100644 --- a/drivers/gpu/drm/i915/i915_gem_internal.c +++ b/drivers/gpu/drm/i915/i915_gem_internal.c @@ -68,8 +68,16 @@ i915_gem_object_get_pages_internal(struct drm_i915_gem_object *obj) max_order = MAX_ORDER; #ifdef CONFIG_SWIOTLB - if (swiotlb_nr_tbl()) /* minimum max swiotlb size is IO_TLB_SEGSIZE */ - max_order = min(max_order, ilog2(IO_TLB_SEGPAGES)); + if (swiotlb_nr_tbl()) { + unsigned int max_segment; + + max_segment = swiotlb_max_segment(); + if (max_segment) { + max_segment = max_t(unsigned int, max_segment, + PAGE_SIZE) >> PAGE_SHIFT; + max_order = min(max_order, ilog2(max_segment)); + } + } #endif gfp = GFP_KERNEL | __GFP_HIGHMEM | __GFP_RECLAIMABLE; -- cgit From 170b03c4a88298e71f3a76ab79f6895f6949f3a1 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Wed, 25 Jan 2017 10:11:02 +0000 Subject: drm/i915: Build DRM range manager selftests for CI Build the struct drm_mm selftests so that we can trivially run them within our CI. "Enable debug, become developer." - Joonas Lahtinen Signed-off-by: Chris Wilson Reviewed-by: Joonas Lahtinen Link: http://patchwork.freedesktop.org/patch/msgid/20170125101102.9010-3-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/Kconfig.debug | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/gpu/drm/i915/Kconfig.debug b/drivers/gpu/drm/i915/Kconfig.debug index 597648c7a645..598551dbf62c 100644 --- a/drivers/gpu/drm/i915/Kconfig.debug +++ b/drivers/gpu/drm/i915/Kconfig.debug @@ -24,6 +24,7 @@ config DRM_I915_DEBUG select X86_MSR # used by igt/pm_rpm select DRM_VGEM # used by igt/prime_vgem (dmabuf interop checks) select DRM_DEBUG_MM if DRM=y + select DRM_DEBUG_MM_SELFTEST select DRM_I915_SW_FENCE_DEBUG_OBJECTS default n help -- cgit From 3aac4acb89710fe782c9e78e7b1febf76e112c6c Mon Sep 17 00:00:00 2001 From: Jani Nikula Date: Wed, 1 Feb 2017 15:46:09 +0200 Subject: drm/i915: don't warn about Skylake CPU - KabyPoint PCH combo Apparently there are machines out there with Skylake CPU and KabyPoint PCH. Judging from our driver code, there doesn't seem to be any code paths that would do anything different between SunrisePoint and KabyPoint PCHs, so it would seem okay to accept the combo without warnings. Fixes: 22dea0be50b2 ("drm/i915: Introduce Kabypoint PCH for Kabylake H/DT.") References: https://lists.freedesktop.org/archives/intel-gfx/2017-February/118611.html Reported-by: Rainer Koenig Cc: Rainer Koenig Cc: Rodrigo Vivi Cc: # v4.8+ Reviewed-by: Rodrigo Vivi Signed-off-by: Jani Nikula Link: http://patchwork.freedesktop.org/patch/msgid/1485956769-26015-1-git-send-email-jani.nikula@intel.com --- drivers/gpu/drm/i915/i915_drv.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/i915/i915_drv.c b/drivers/gpu/drm/i915/i915_drv.c index 0667f480df97..276676b2bbc8 100644 --- a/drivers/gpu/drm/i915/i915_drv.c +++ b/drivers/gpu/drm/i915/i915_drv.c @@ -214,7 +214,8 @@ static void intel_detect_pch(struct drm_i915_private *dev_priv) } else if (id == INTEL_PCH_KBP_DEVICE_ID_TYPE) { dev_priv->pch_type = PCH_KBP; DRM_DEBUG_KMS("Found KabyPoint PCH\n"); - WARN_ON(!IS_KABYLAKE(dev_priv)); + WARN_ON(!IS_SKYLAKE(dev_priv) && + !IS_KABYLAKE(dev_priv)); } else if ((id == INTEL_PCH_P2X_DEVICE_ID_TYPE) || (id == INTEL_PCH_P3X_DEVICE_ID_TYPE) || ((id == INTEL_PCH_QEMU_DEVICE_ID_TYPE) && -- cgit From bb96dcf5830e5d81a1da2e2a14e6c0f7dfc64348 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Thu, 2 Feb 2017 13:27:21 +0000 Subject: drm/i915: Recreate internal objects with single page segments if dmar fails If we fail to dma-map the object, the most common cause is lack of space inside the SW-IOTLB due to fragmentation. If we recreate the_sg_table using segments of PAGE_SIZE (and single page allocations), we may succeed in remapping the scatterlist. First became a significant problem for the mock selftests after commit 5584f1b1d73e ("drm/i915: fix i915 running as dom0 under Xen") increased the max_order. Fixes: 920cf4194954 ("drm/i915: Introduce an internal allocator for disposable private objects") Fixes: 5584f1b1d73e ("drm/i915: fix i915 running as dom0 under Xen") Signed-off-by: Chris Wilson Cc: Tvrtko Ursulin Link: http://patchwork.freedesktop.org/patch/msgid/20170202132721.12711-1-chris@chris-wilson.co.uk Reviewed-by: Tvrtko Ursulin Cc: # v4.10-rc1+ --- drivers/gpu/drm/i915/i915_gem_internal.c | 37 ++++++++++++++++++++------------ 1 file changed, 23 insertions(+), 14 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_gem_internal.c b/drivers/gpu/drm/i915/i915_gem_internal.c index 2b9d5e94a8ae..fc950abbe400 100644 --- a/drivers/gpu/drm/i915/i915_gem_internal.c +++ b/drivers/gpu/drm/i915/i915_gem_internal.c @@ -48,24 +48,12 @@ static struct sg_table * i915_gem_object_get_pages_internal(struct drm_i915_gem_object *obj) { struct drm_i915_private *i915 = to_i915(obj->base.dev); - unsigned int npages = obj->base.size / PAGE_SIZE; struct sg_table *st; struct scatterlist *sg; + unsigned int npages; int max_order; gfp_t gfp; - st = kmalloc(sizeof(*st), GFP_KERNEL); - if (!st) - return ERR_PTR(-ENOMEM); - - if (sg_alloc_table(st, npages, GFP_KERNEL)) { - kfree(st); - return ERR_PTR(-ENOMEM); - } - - sg = st->sgl; - st->nents = 0; - max_order = MAX_ORDER; #ifdef CONFIG_SWIOTLB if (swiotlb_nr_tbl()) { @@ -87,6 +75,20 @@ i915_gem_object_get_pages_internal(struct drm_i915_gem_object *obj) gfp |= __GFP_DMA32; } +create_st: + st = kmalloc(sizeof(*st), GFP_KERNEL); + if (!st) + return ERR_PTR(-ENOMEM); + + npages = obj->base.size / PAGE_SIZE; + if (sg_alloc_table(st, npages, GFP_KERNEL)) { + kfree(st); + return ERR_PTR(-ENOMEM); + } + + sg = st->sgl; + st->nents = 0; + do { int order = min(fls(npages) - 1, max_order); struct page *page; @@ -114,8 +116,15 @@ i915_gem_object_get_pages_internal(struct drm_i915_gem_object *obj) sg = __sg_next(sg); } while (1); - if (i915_gem_gtt_prepare_pages(obj, st)) + if (i915_gem_gtt_prepare_pages(obj, st)) { + /* Failed to dma-map try again with single page sg segments */ + if (get_order(st->sgl->length)) { + internal_free_pages(st); + max_order = 0; + goto create_st; + } goto err; + } /* Mark the pages as dontneed whilst they are still pinned. As soon * as they are unpinned they are allowed to be reaped by the shrinker, -- cgit From e32e836afe39691848270f1ac2a9d0e09ad173fd Mon Sep 17 00:00:00 2001 From: Matthew Auld Date: Thu, 2 Feb 2017 14:55:00 +0000 Subject: drm/i915: remove 512GB allocation warning Now that we have selftests in place exercising truly huge allocations we will start to hit the 512GB warning, so now seems like a good time to remove this user-triggerable WARN. Cc: Joonas Lahtinen Cc: Chris Wilson Signed-off-by: Matthew Auld Link: http://patchwork.freedesktop.org/patch/msgid/1486047300-13198-1-git-send-email-matthew.auld@intel.com Reviewed-by: Joonas Lahtinen Signed-off-by: Chris Wilson --- drivers/gpu/drm/i915/i915_gem_gtt.c | 4 ---- 1 file changed, 4 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c b/drivers/gpu/drm/i915/i915_gem_gtt.c index 048040efc3f0..c567b34800cf 100644 --- a/drivers/gpu/drm/i915/i915_gem_gtt.c +++ b/drivers/gpu/drm/i915/i915_gem_gtt.c @@ -1423,10 +1423,6 @@ static int gen8_alloc_va_range_4lvl(struct i915_address_space *vm, if (ret) return ret; - WARN(bitmap_weight(new_pdps, GEN8_PML4ES_PER_PML4) > 2, - "The allocation has spanned more than 512GB. " - "It is highly likely this is incorrect."); - gen8_for_each_pml4e(pdp, pml4, start, length, pml4e) { WARN_ON(!pdp); -- cgit From 0aab2c721d81590012a5021a516f00666646741f Mon Sep 17 00:00:00 2001 From: "Lee, Shawn C" Date: Fri, 3 Feb 2017 12:32:09 +0800 Subject: drm/i915/bxt: Add MST support when do DPLL calculation Add the missing INTEL_OUTPUT_DP_MST case in bxt_get_dpll() to correctly initialize the crtc_state and port plls when link training a DP MST monitor on BXT/APL devices. Fixes: a277ca7dc01d ("drm/i915: Split bxt_ddi_pll_select()") Bugs: https://bugs.freedesktop.org/show_bug.cgi?id=99572 Reviewed-by: Cooper Chiou Reviewed-by: Gary C Wang Reviewed-by: Ciobanu, Nathan D Reviewed-by: Herbert, Marc Reviewed-by: Bride, Jim Reviewed-by: Navare, Manasi D Cc: Jani Nikula Cc: # v4.9+ Signed-off-by: Lee, Shawn C Signed-off-by: Jani Nikula Link: http://patchwork.freedesktop.org/patch/msgid/1486096329-6255-1-git-send-email-shawn.c.lee@intel.com --- drivers/gpu/drm/i915/intel_dpll_mgr.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/i915/intel_dpll_mgr.c b/drivers/gpu/drm/i915/intel_dpll_mgr.c index 368eff93a0a6..79f656ca593d 100644 --- a/drivers/gpu/drm/i915/intel_dpll_mgr.c +++ b/drivers/gpu/drm/i915/intel_dpll_mgr.c @@ -1855,7 +1855,8 @@ bxt_get_dpll(struct intel_crtc *crtc, return NULL; if ((encoder->type == INTEL_OUTPUT_DP || - encoder->type == INTEL_OUTPUT_EDP) && + encoder->type == INTEL_OUTPUT_EDP || + encoder->type == INTEL_OUTPUT_DP_MST) && !bxt_ddi_dp_set_dpll_hw_state(clock, &dpll_hw_state)) return NULL; -- cgit From 52da22e7aba155be238faff4f6e97b2eb9de64f3 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Fri, 3 Feb 2017 10:56:52 +0000 Subject: drm/i915: Reject set-tiling-ioctl with stride==0 and a tiling mode In commit 957870f93412 ("drm/i915: Split out i915_gem_object_set_tiling()"), I swapped an alignment check for IS_ALIGNED and in the process removed the less-than check. That check turns out to be important as it was the only rejection for stride == 0. Tvrtko did spot it, but I was overconfident in the IS_ALIGNED() conversion. Fixes: 957870f93412 ("drm/i915: Split out i915_gem_object_set_tiling()") Testcase: igt/gem_tiling_max_stride Signed-off-by: Chris Wilson Cc: Chris Wilson Cc: Tvrtko Ursulin Link: http://patchwork.freedesktop.org/patch/msgid/20170203105652.27819-1-chris@chris-wilson.co.uk Reviewed-by: Tvrtko Ursulin --- drivers/gpu/drm/i915/i915_gem_tiling.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/i915/i915_gem_tiling.c b/drivers/gpu/drm/i915/i915_gem_tiling.c index b1361cfd4c5c..974ac08df473 100644 --- a/drivers/gpu/drm/i915/i915_gem_tiling.c +++ b/drivers/gpu/drm/i915/i915_gem_tiling.c @@ -173,7 +173,7 @@ i915_tiling_ok(struct drm_i915_gem_object *obj, else tile_width = 512; - if (!IS_ALIGNED(stride, tile_width)) + if (!stride || !IS_ALIGNED(stride, tile_width)) return false; /* 965+ just needs multiples of tile width */ -- cgit From a3a1e5336c41ab3763c64152df5f868cb91d20bf Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Fri, 3 Feb 2017 11:50:35 +0000 Subject: drm/i915: Tidy the tail of i915_tiling_ok() The current tail breaks the pattern of if (check) return false, which can catch the reader out. If we move the gen2/3 power-of-two test into the earlier gen2/3 branch, we can eliminate the contrary tail. Suggested-by: Tvrtko Ursulin Signed-off-by: Chris Wilson Cc: Tvrtko Ursulin Link: http://patchwork.freedesktop.org/patch/msgid/20170203115036.24743-1-chris@chris-wilson.co.uk Reviewed-by: Tvrtko Ursulin --- drivers/gpu/drm/i915/i915_gem_tiling.c | 10 ++++------ 1 file changed, 4 insertions(+), 6 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_gem_tiling.c b/drivers/gpu/drm/i915/i915_gem_tiling.c index 974ac08df473..152bed2dcb11 100644 --- a/drivers/gpu/drm/i915/i915_gem_tiling.c +++ b/drivers/gpu/drm/i915/i915_gem_tiling.c @@ -158,6 +158,9 @@ i915_tiling_ok(struct drm_i915_gem_object *obj, if (stride > 8192) return false; + if (!is_power_of_2(stride)) + return false; + if (IS_GEN3(i915)) { if (obj->base.size > I830_FENCE_MAX_SIZE_VAL << 20) return false; @@ -176,12 +179,7 @@ i915_tiling_ok(struct drm_i915_gem_object *obj, if (!stride || !IS_ALIGNED(stride, tile_width)) return false; - /* 965+ just needs multiples of tile width */ - if (INTEL_GEN(i915) >= 4) - return true; - - /* Pre-965 needs power of two tile widths */ - return is_power_of_2(stride); + return true; } static bool i915_vma_fence_prepare(struct i915_vma *vma, -- cgit From 955b8e9ca8b924f8c21d43797f8bf8f39908efd3 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Fri, 3 Feb 2017 11:46:00 +0000 Subject: drm/i915: Allow large objects to be tiled on gen2/3 We now have partial VMA support to break large objects into fence sized regions and no longer have to restrict tiling to small objects on gen2/3 Signed-off-by: Chris Wilson Cc: Tvrtko Ursulin Cc: Joonas Lahtinen Link: http://patchwork.freedesktop.org/patch/msgid/20170203115036.24743-2-chris@chris-wilson.co.uk Reviewed-by: Joonas Lahtinen --- drivers/gpu/drm/i915/i915_gem_tiling.c | 8 -------- 1 file changed, 8 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_gem_tiling.c b/drivers/gpu/drm/i915/i915_gem_tiling.c index 152bed2dcb11..8b2b507bdf7e 100644 --- a/drivers/gpu/drm/i915/i915_gem_tiling.c +++ b/drivers/gpu/drm/i915/i915_gem_tiling.c @@ -160,14 +160,6 @@ i915_tiling_ok(struct drm_i915_gem_object *obj, if (!is_power_of_2(stride)) return false; - - if (IS_GEN3(i915)) { - if (obj->base.size > I830_FENCE_MAX_SIZE_VAL << 20) - return false; - } else { - if (obj->base.size > I830_FENCE_MAX_SIZE_VAL << 19) - return false; - } } if (IS_GEN2(i915) || -- cgit From 4a04e371228937d104fd03d5219f8c25fda29757 Mon Sep 17 00:00:00 2001 From: Daniele Ceraolo Spurio Date: Fri, 3 Feb 2017 14:45:29 -0800 Subject: drm/i915: fix pm refcounting on fence error in execbuf Fences are creted/checked before the pm ref is taken, so if we jump to pre_mutex_err we will uncorrectly call intel_runtime_pm_put. v2: Massage unwind error paths Fixes: fec0445caa27 (drm/i915: Support explicit fencing for execbuf) Testcase: igt/gem_exec_params Cc: Chris Wilson Signed-off-by: Daniele Ceraolo Spurio Link: http://patchwork.freedesktop.org/patch/msgid/1486161930-11764-1-git-send-email-daniele.ceraolospurio@intel.com Reviewed-by: Chris Wilson Signed-off-by: Chris Wilson --- drivers/gpu/drm/i915/i915_gem_execbuffer.c | 10 ++++------ 1 file changed, 4 insertions(+), 6 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/i915_gem_execbuffer.c index 91c2393199a3..26c26a6675df 100644 --- a/drivers/gpu/drm/i915/i915_gem_execbuffer.c +++ b/drivers/gpu/drm/i915/i915_gem_execbuffer.c @@ -1644,18 +1644,15 @@ i915_gem_do_execbuffer(struct drm_device *dev, void *data, if (args->flags & I915_EXEC_FENCE_IN) { in_fence = sync_file_get_fence(lower_32_bits(args->rsvd2)); - if (!in_fence) { - ret = -EINVAL; - goto pre_mutex_err; - } + if (!in_fence) + return -EINVAL; } if (args->flags & I915_EXEC_FENCE_OUT) { out_fence_fd = get_unused_fd_flags(O_CLOEXEC); if (out_fence_fd < 0) { ret = out_fence_fd; - out_fence_fd = -1; - goto pre_mutex_err; + goto err_in_fence; } } @@ -1878,6 +1875,7 @@ pre_mutex_err: intel_runtime_pm_put(dev_priv); if (out_fence_fd != -1) put_unused_fd(out_fence_fd); +err_in_fence: dma_fence_put(in_fence); return ret; } -- cgit From ccfceda22cc018ce78c8523c102937c39272e543 Mon Sep 17 00:00:00 2001 From: Daniele Ceraolo Spurio Date: Fri, 3 Feb 2017 17:23:29 -0800 Subject: drm/i915: refactor register fw read/write macros for recent GENs The only difference for the more recent of those macros is the version of the *_reg__fw_domains function. Passing the function prefix in allows us to re-use the same macro to generate functions for different GENs and will make it easier to add new accessors in the future Cc: Tvrtko Ursulin Signed-off-by: Daniele Ceraolo Spurio Link: http://patchwork.freedesktop.org/patch/msgid/1486171409-21542-1-git-send-email-daniele.ceraolospurio@intel.com Reviewed-by: Tvrtko Ursulin Signed-off-by: Chris Wilson --- drivers/gpu/drm/i915/intel_uncore.c | 40 ++++++++++--------------------------- 1 file changed, 10 insertions(+), 30 deletions(-) diff --git a/drivers/gpu/drm/i915/intel_uncore.c b/drivers/gpu/drm/i915/intel_uncore.c index 3d243fefe09b..1ff8fd9911d7 100644 --- a/drivers/gpu/drm/i915/intel_uncore.c +++ b/drivers/gpu/drm/i915/intel_uncore.c @@ -992,29 +992,19 @@ static inline void __force_wake_auto(struct drm_i915_private *dev_priv, ___force_wake_auto(dev_priv, fw_domains); } -#define __gen6_read(x) \ +#define __gen_read(func, x) \ static u##x \ -gen6_read##x(struct drm_i915_private *dev_priv, i915_reg_t reg, bool trace) { \ +func##_read##x(struct drm_i915_private *dev_priv, i915_reg_t reg, bool trace) { \ enum forcewake_domains fw_engine; \ GEN6_READ_HEADER(x); \ - fw_engine = __gen6_reg_read_fw_domains(offset); \ - if (fw_engine) \ - __force_wake_auto(dev_priv, fw_engine); \ - val = __raw_i915_read##x(dev_priv, reg); \ - GEN6_READ_FOOTER; \ -} - -#define __fwtable_read(x) \ -static u##x \ -fwtable_read##x(struct drm_i915_private *dev_priv, i915_reg_t reg, bool trace) { \ - enum forcewake_domains fw_engine; \ - GEN6_READ_HEADER(x); \ - fw_engine = __fwtable_reg_read_fw_domains(offset); \ + fw_engine = __##func##_reg_read_fw_domains(offset); \ if (fw_engine) \ __force_wake_auto(dev_priv, fw_engine); \ val = __raw_i915_read##x(dev_priv, reg); \ GEN6_READ_FOOTER; \ } +#define __gen6_read(x) __gen_read(gen6, x) +#define __fwtable_read(x) __gen_read(fwtable, x) #define __gen9_decoupled_read(x) \ static u##x \ @@ -1115,29 +1105,19 @@ gen6_write##x(struct drm_i915_private *dev_priv, i915_reg_t reg, u##x val, bool GEN6_WRITE_FOOTER; \ } -#define __gen8_write(x) \ +#define __gen_write(func, x) \ static void \ -gen8_write##x(struct drm_i915_private *dev_priv, i915_reg_t reg, u##x val, bool trace) { \ +func##_write##x(struct drm_i915_private *dev_priv, i915_reg_t reg, u##x val, bool trace) { \ enum forcewake_domains fw_engine; \ GEN6_WRITE_HEADER; \ - fw_engine = __gen8_reg_write_fw_domains(offset); \ - if (fw_engine) \ - __force_wake_auto(dev_priv, fw_engine); \ - __raw_i915_write##x(dev_priv, reg, val); \ - GEN6_WRITE_FOOTER; \ -} - -#define __fwtable_write(x) \ -static void \ -fwtable_write##x(struct drm_i915_private *dev_priv, i915_reg_t reg, u##x val, bool trace) { \ - enum forcewake_domains fw_engine; \ - GEN6_WRITE_HEADER; \ - fw_engine = __fwtable_reg_write_fw_domains(offset); \ + fw_engine = __##func##_reg_write_fw_domains(offset); \ if (fw_engine) \ __force_wake_auto(dev_priv, fw_engine); \ __raw_i915_write##x(dev_priv, reg, val); \ GEN6_WRITE_FOOTER; \ } +#define __gen8_write(x) __gen_write(gen8, x) +#define __fwtable_write(x) __gen_write(fwtable, x) #define __gen9_decoupled_write(x) \ static void \ -- cgit From 28b6def6ec6408e08970a0f1107b483961bfc7ae Mon Sep 17 00:00:00 2001 From: Daniel Vetter Date: Mon, 6 Feb 2017 10:23:13 +0100 Subject: drm/i915: Update DRIVER_DATE to 20170206 Signed-off-by: Daniel Vetter --- drivers/gpu/drm/i915/i915_drv.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index bd5a38be5b83..1e0d9561fd67 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -79,8 +79,8 @@ #define DRIVER_NAME "i915" #define DRIVER_DESC "Intel Graphics" -#define DRIVER_DATE "20170123" -#define DRIVER_TIMESTAMP 1485156432 +#define DRIVER_DATE "20170206" +#define DRIVER_TIMESTAMP 1486372993 #undef WARN_ON /* Many gcc seem to no see through this and fall over :( */ -- cgit From ba318c61a9719577b6f451c055f364e4116874b2 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Thu, 2 Feb 2017 20:47:41 +0000 Subject: drm/i915: Drain the freed state from the tail of the next commit If we have any residual freed atomic state from earlier commits, flush the freed list after performing the current modeset. This prevents the freed list from ever-growing if userspace manages to starve the kernel threads (i.e. we are never able to run our free state worker and eventually the system may even oom). Fixes: eb955eee27d9 ("drm/i915: Move atomic state free from out of fence release") Testcase: igt/kms_cursor/legacy/all-pipes-single-bo Reported-by: Maarten Lankhorst Signed-off-by: Chris Wilson Cc: Maarten Lankhorst Cc: Joonas Lahtinen Cc: Daniel Vetter Link: http://patchwork.freedesktop.org/patch/msgid/20170202204741.18231-1-chris@chris-wilson.co.uk Reviewed-by: Maarten Lankhorst --- drivers/gpu/drm/i915/intel_display.c | 34 +++++++++++++++++++++------------- 1 file changed, 21 insertions(+), 13 deletions(-) diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c index 88689a0b4183..45e587496886 100644 --- a/drivers/gpu/drm/i915/intel_display.c +++ b/drivers/gpu/drm/i915/intel_display.c @@ -14395,6 +14395,24 @@ static void skl_update_crtcs(struct drm_atomic_state *state, } while (progress); } +static void intel_atomic_helper_free_state(struct drm_i915_private *dev_priv) +{ + struct intel_atomic_state *state, *next; + struct llist_node *freed; + + freed = llist_del_all(&dev_priv->atomic_helper.free_list); + llist_for_each_entry_safe(state, next, freed, freed) + drm_atomic_state_put(&state->base); +} + +static void intel_atomic_helper_free_state_worker(struct work_struct *work) +{ + struct drm_i915_private *dev_priv = + container_of(work, typeof(*dev_priv), atomic_helper.free_work); + + intel_atomic_helper_free_state(dev_priv); +} + static void intel_atomic_commit_tail(struct drm_atomic_state *state) { struct drm_device *dev = state->dev; @@ -14561,6 +14579,8 @@ static void intel_atomic_commit_tail(struct drm_atomic_state *state) * can happen also when the device is completely off. */ intel_uncore_arm_unclaimed_mmio_detection(dev_priv); + + intel_atomic_helper_free_state(dev_priv); } static void intel_atomic_commit_work(struct work_struct *work) @@ -16615,18 +16635,6 @@ fail: drm_modeset_acquire_fini(&ctx); } -static void intel_atomic_helper_free_state(struct work_struct *work) -{ - struct drm_i915_private *dev_priv = - container_of(work, typeof(*dev_priv), atomic_helper.free_work); - struct intel_atomic_state *state, *next; - struct llist_node *freed; - - freed = llist_del_all(&dev_priv->atomic_helper.free_list); - llist_for_each_entry_safe(state, next, freed, freed) - drm_atomic_state_put(&state->base); -} - int intel_modeset_init(struct drm_device *dev) { struct drm_i915_private *dev_priv = to_i915(dev); @@ -16647,7 +16655,7 @@ int intel_modeset_init(struct drm_device *dev) dev->mode_config.funcs = &intel_mode_funcs; INIT_WORK(&dev_priv->atomic_helper.free_work, - intel_atomic_helper_free_state); + intel_atomic_helper_free_state_worker); intel_init_quirks(dev); -- cgit From 22cc440eae52d805ced3b9c3fcd42713d748140f Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Sat, 4 Feb 2017 11:05:19 +0000 Subject: drm/i915: Print execlists restart after reset After resetting, show the requests that each engine restarts from in the debug log. Signed-off-by: Chris Wilson Cc: Mika Kuoppala Link: http://patchwork.freedesktop.org/patch/msgid/20170204110519.7645-1-chris@chris-wilson.co.uk Reviewed-by: Mika Kuoppala --- drivers/gpu/drm/i915/intel_lrc.c | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c index 44a92ea464ba..754f77c394fb 100644 --- a/drivers/gpu/drm/i915/intel_lrc.c +++ b/drivers/gpu/drm/i915/intel_lrc.c @@ -1254,6 +1254,11 @@ out: return ret; } +static u32 port_seqno(struct execlist_port *port) +{ + return port->request ? port->request->global_seqno : 0; +} + static int gen8_init_common_ring(struct intel_engine_cs *engine) { struct drm_i915_private *dev_priv = engine->i915; @@ -1279,6 +1284,10 @@ static int gen8_init_common_ring(struct intel_engine_cs *engine) /* After a GPU reset, we may have requests to replay */ clear_bit(ENGINE_IRQ_EXECLIST, &engine->irq_posted); if (!execlists_elsp_idle(engine)) { + DRM_DEBUG_DRIVER("Restarting %s from requests [0x%x, 0x%x]\n", + engine->name, + port_seqno(&engine->execlist_port[0]), + port_seqno(&engine->execlist_port[1])); engine->execlist_port[0].count = 0; engine->execlist_port[1].count = 0; execlists_submit_ports(engine); -- cgit From b196fbc719871e2746e71ac9e62cde2262555c9b Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Mon, 6 Feb 2017 08:45:45 +0000 Subject: drm/i915: Manipulate the Global GTT size using I915_GTT_PAGE_SIZE I incorrectly converted the exclusion of the last 4096 bytes (that avoids any potential prefetching past the end of the GTT) to PAGE_SIZE and not to I915_GTT_PAGE_SIZE as it should be. Signed-off-by: Chris Wilson Link: http://patchwork.freedesktop.org/patch/msgid/20170206084547.27921-1-chris@chris-wilson.co.uk Reviewed-by: Matthew Auld --- drivers/gpu/drm/i915/i915_gem_gtt.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c b/drivers/gpu/drm/i915/i915_gem_gtt.c index c567b34800cf..5cf1ac4f070b 100644 --- a/drivers/gpu/drm/i915/i915_gem_gtt.c +++ b/drivers/gpu/drm/i915/i915_gem_gtt.c @@ -3243,9 +3243,9 @@ int i915_ggtt_init_hw(struct drm_i915_private *dev_priv) * shrink the range used by drm_mm. */ mutex_lock(&dev_priv->drm.struct_mutex); - ggtt->base.total -= PAGE_SIZE; + ggtt->base.total -= I915_GTT_PAGE_SIZE; i915_address_space_init(&ggtt->base, dev_priv, "[global]"); - ggtt->base.total += PAGE_SIZE; + ggtt->base.total += I915_GTT_PAGE_SIZE; if (!HAS_LLC(dev_priv)) ggtt->base.mm.color_adjust = i915_gtt_color_adjust; mutex_unlock(&dev_priv->drm.struct_mutex); -- cgit From 47db922fa1c8498c61cbcae4e5faba84713cae70 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Mon, 6 Feb 2017 08:45:46 +0000 Subject: drm/i915: Assign I915_COLOR_UNEVICTABLE to the address space head_node The drm_mm range manager (within i915_address_space) uses a special drm_mm_node that excludes the unavailable range (beyond the end of the drm_mm). However, we play games with the global GTT to use the head_node to exclude the tail page but tell ourselves that the whole range is available. This causes an issue when we try to evict using the full range of the global GTT which is wider than the drm_mm, resulting in complete confusion and catastrophe. One way to resolve this would be to use a reserved node to exclude the guard page, or we can treat the drm_mm's head_node as our guard page and assign it the appropriate colour. Signed-off-by: Chris Wilson Link: http://patchwork.freedesktop.org/patch/msgid/20170206084547.27921-2-chris@chris-wilson.co.uk Reviewed-by: Matthew Auld --- drivers/gpu/drm/i915/i915_gem_gtt.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c b/drivers/gpu/drm/i915/i915_gem_gtt.c index 5cf1ac4f070b..9f1f3bfead59 100644 --- a/drivers/gpu/drm/i915/i915_gem_gtt.c +++ b/drivers/gpu/drm/i915/i915_gem_gtt.c @@ -2160,10 +2160,14 @@ static void i915_address_space_init(struct i915_address_space *vm, const char *name) { i915_gem_timeline_init(dev_priv, &vm->timeline, name); + drm_mm_init(&vm->mm, vm->start, vm->total); + vm->mm.head_node.color = I915_COLOR_UNEVICTABLE; + INIT_LIST_HEAD(&vm->active_list); INIT_LIST_HEAD(&vm->inactive_list); INIT_LIST_HEAD(&vm->unbound_list); + list_add_tail(&vm->global_link, &dev_priv->vm_list); } -- cgit From a6508ded2a6601fea903185034adc3622d94da0b Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Mon, 6 Feb 2017 08:45:47 +0000 Subject: drm/i915: Use page coloring to provide the guard page at the end of the GTT As we now mark the reserved hole (drm_mm.head_node) with the special UNEVICTABLE color, we can use the page coloring to avoid prefetching of the CS beyond the end of the GTT. Signed-off-by: Chris Wilson Link: http://patchwork.freedesktop.org/patch/msgid/20170206084547.27921-3-chris@chris-wilson.co.uk Reviewed-by: Matthew Auld --- drivers/gpu/drm/i915/i915_gem_evict.c | 10 ++++++++-- drivers/gpu/drm/i915/i915_gem_gtt.c | 19 ++++++++++++------- 2 files changed, 20 insertions(+), 9 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_gem_evict.c b/drivers/gpu/drm/i915/i915_gem_evict.c index a43e44e18042..6d2fcfafd3c5 100644 --- a/drivers/gpu/drm/i915/i915_gem_evict.c +++ b/drivers/gpu/drm/i915/i915_gem_evict.c @@ -253,6 +253,9 @@ int i915_gem_evict_for_node(struct i915_address_space *vm, int ret = 0; lockdep_assert_held(&vm->i915->drm.struct_mutex); + GEM_BUG_ON(!IS_ALIGNED(start, I915_GTT_PAGE_SIZE)); + GEM_BUG_ON(!IS_ALIGNED(end, I915_GTT_PAGE_SIZE)); + trace_i915_gem_evict_node(vm, target, flags); /* Retire before we search the active list. Although we have @@ -268,9 +271,11 @@ int i915_gem_evict_for_node(struct i915_address_space *vm, /* Expand search to cover neighbouring guard pages (or lack!) */ if (start > vm->start) start -= I915_GTT_PAGE_SIZE; - if (end < vm->start + vm->total) - end += I915_GTT_PAGE_SIZE; + + /* Always look at the page afterwards to avoid the end-of-GTT */ + end += I915_GTT_PAGE_SIZE; } + GEM_BUG_ON(start >= end); drm_mm_for_each_node_in_range(node, &vm->mm, start, end) { /* If we find any non-objects (!vma), we cannot evict them */ @@ -279,6 +284,7 @@ int i915_gem_evict_for_node(struct i915_address_space *vm, break; } + GEM_BUG_ON(!node->allocated); vma = container_of(node, typeof(*vma), node); /* If we are using coloring to insert guard pages between diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c b/drivers/gpu/drm/i915/i915_gem_gtt.c index 9f1f3bfead59..850d40ec77af 100644 --- a/drivers/gpu/drm/i915/i915_gem_gtt.c +++ b/drivers/gpu/drm/i915/i915_gem_gtt.c @@ -2718,11 +2718,16 @@ static void i915_gtt_color_adjust(const struct drm_mm_node *node, u64 *start, u64 *end) { - if (node->color != color) + if (node->allocated && node->color != color) *start += I915_GTT_PAGE_SIZE; + /* Also leave a space between the unallocated reserved node after the + * GTT and any objects within the GTT, i.e. we use the color adjustment + * to insert a guard page to prevent prefetches crossing over the + * GTT boundary. + */ node = list_next_entry(node, node_list); - if (node->allocated && node->color != color) + if (node->color != color) *end -= I915_GTT_PAGE_SIZE; } @@ -3243,14 +3248,14 @@ int i915_ggtt_init_hw(struct drm_i915_private *dev_priv) INIT_LIST_HEAD(&dev_priv->vm_list); - /* Subtract the guard page before address space initialization to - * shrink the range used by drm_mm. + /* Note that we use page colouring to enforce a guard page at the + * end of the address space. This is required as the CS may prefetch + * beyond the end of the batch buffer, across the page boundary, + * and beyond the end of the GTT if we do not provide a guard. */ mutex_lock(&dev_priv->drm.struct_mutex); - ggtt->base.total -= I915_GTT_PAGE_SIZE; i915_address_space_init(&ggtt->base, dev_priv, "[global]"); - ggtt->base.total += I915_GTT_PAGE_SIZE; - if (!HAS_LLC(dev_priv)) + if (!HAS_LLC(dev_priv) && !USES_PPGTT(dev_priv)) ggtt->base.mm.color_adjust = i915_gtt_color_adjust; mutex_unlock(&dev_priv->drm.struct_mutex); -- cgit From 7fff8126d9cc902b2636d05d5d34894a75174993 Mon Sep 17 00:00:00 2001 From: Imre Deak Date: Fri, 27 Jan 2017 11:39:18 +0200 Subject: drm/i915/gen9+: Enable hotplug detection early MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit For LSPCON resume time initialization we need to sample the corresponding pin's HPD level, but this is only available when HPD detection is enabled. Currently we enable detection only when enabling HPD interrupts which is too late, so bring the enabling of detection earlier. This is needed by the next patch. Cc: Shashank Sharma Cc: Jani Nikula Cc: Ville Syrjälä Cc: Daniel Vetter Cc: # v4.9+ Signed-off-by: Imre Deak Reviewed-by: Shashank Sharma Link: http://patchwork.freedesktop.org/patch/msgid/1485509961-9010-2-git-send-email-imre.deak@intel.com --- drivers/gpu/drm/i915/i915_irq.c | 69 +++++++++++++++++++++++++++++------------ 1 file changed, 50 insertions(+), 19 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_irq.c b/drivers/gpu/drm/i915/i915_irq.c index 059d66b46e1a..8b868405e99b 100644 --- a/drivers/gpu/drm/i915/i915_irq.c +++ b/drivers/gpu/drm/i915/i915_irq.c @@ -3129,19 +3129,16 @@ static void ibx_hpd_irq_setup(struct drm_i915_private *dev_priv) I915_WRITE(PCH_PORT_HOTPLUG, hotplug); } -static void spt_hpd_irq_setup(struct drm_i915_private *dev_priv) +static void spt_hpd_detection_setup(struct drm_i915_private *dev_priv) { - u32 hotplug_irqs, hotplug, enabled_irqs; - - hotplug_irqs = SDE_HOTPLUG_MASK_SPT; - enabled_irqs = intel_hpd_enabled_irqs(dev_priv, hpd_spt); - - ibx_display_interrupt_update(dev_priv, hotplug_irqs, enabled_irqs); + u32 hotplug; /* Enable digital hotplug on the PCH */ hotplug = I915_READ(PCH_PORT_HOTPLUG); - hotplug |= PORTD_HOTPLUG_ENABLE | PORTC_HOTPLUG_ENABLE | - PORTB_HOTPLUG_ENABLE | PORTA_HOTPLUG_ENABLE; + hotplug |= PORTA_HOTPLUG_ENABLE | + PORTB_HOTPLUG_ENABLE | + PORTC_HOTPLUG_ENABLE | + PORTD_HOTPLUG_ENABLE; I915_WRITE(PCH_PORT_HOTPLUG, hotplug); hotplug = I915_READ(PCH_PORT_HOTPLUG2); @@ -3149,6 +3146,18 @@ static void spt_hpd_irq_setup(struct drm_i915_private *dev_priv) I915_WRITE(PCH_PORT_HOTPLUG2, hotplug); } +static void spt_hpd_irq_setup(struct drm_i915_private *dev_priv) +{ + u32 hotplug_irqs, enabled_irqs; + + hotplug_irqs = SDE_HOTPLUG_MASK_SPT; + enabled_irqs = intel_hpd_enabled_irqs(dev_priv, hpd_spt); + + ibx_display_interrupt_update(dev_priv, hotplug_irqs, enabled_irqs); + + spt_hpd_detection_setup(dev_priv); +} + static void ilk_hpd_irq_setup(struct drm_i915_private *dev_priv) { u32 hotplug_irqs, hotplug, enabled_irqs; @@ -3183,18 +3192,15 @@ static void ilk_hpd_irq_setup(struct drm_i915_private *dev_priv) ibx_hpd_irq_setup(dev_priv); } -static void bxt_hpd_irq_setup(struct drm_i915_private *dev_priv) +static void __bxt_hpd_detection_setup(struct drm_i915_private *dev_priv, + u32 enabled_irqs) { - u32 hotplug_irqs, hotplug, enabled_irqs; - - enabled_irqs = intel_hpd_enabled_irqs(dev_priv, hpd_bxt); - hotplug_irqs = BXT_DE_PORT_HOTPLUG_MASK; - - bdw_update_port_irq(dev_priv, hotplug_irqs, enabled_irqs); + u32 hotplug; hotplug = I915_READ(PCH_PORT_HOTPLUG); - hotplug |= PORTC_HOTPLUG_ENABLE | PORTB_HOTPLUG_ENABLE | - PORTA_HOTPLUG_ENABLE; + hotplug |= PORTA_HOTPLUG_ENABLE | + PORTB_HOTPLUG_ENABLE | + PORTC_HOTPLUG_ENABLE; DRM_DEBUG_KMS("Invert bit setting: hp_ctl:%x hp_port:%x\n", hotplug, enabled_irqs); @@ -3204,7 +3210,6 @@ static void bxt_hpd_irq_setup(struct drm_i915_private *dev_priv) * For BXT invert bit has to be set based on AOB design * for HPD detection logic, update it based on VBT fields. */ - if ((enabled_irqs & BXT_DE_PORT_HP_DDIA) && intel_bios_is_port_hpd_inverted(dev_priv, PORT_A)) hotplug |= BXT_DDIA_HPD_INVERT; @@ -3218,6 +3223,23 @@ static void bxt_hpd_irq_setup(struct drm_i915_private *dev_priv) I915_WRITE(PCH_PORT_HOTPLUG, hotplug); } +static void bxt_hpd_detection_setup(struct drm_i915_private *dev_priv) +{ + __bxt_hpd_detection_setup(dev_priv, BXT_DE_PORT_HOTPLUG_MASK); +} + +static void bxt_hpd_irq_setup(struct drm_i915_private *dev_priv) +{ + u32 hotplug_irqs, enabled_irqs; + + enabled_irqs = intel_hpd_enabled_irqs(dev_priv, hpd_bxt); + hotplug_irqs = BXT_DE_PORT_HOTPLUG_MASK; + + bdw_update_port_irq(dev_priv, hotplug_irqs, enabled_irqs); + + __bxt_hpd_detection_setup(dev_priv, enabled_irqs); +} + static void ibx_irq_postinstall(struct drm_device *dev) { struct drm_i915_private *dev_priv = to_i915(dev); @@ -3233,6 +3255,12 @@ static void ibx_irq_postinstall(struct drm_device *dev) gen5_assert_iir_is_zero(dev_priv, SDEIIR); I915_WRITE(SDEIMR, ~mask); + + if (HAS_PCH_IBX(dev_priv) || HAS_PCH_CPT(dev_priv) || + HAS_PCH_LPT(dev_priv)) + ; /* TODO: Enable HPD detection on older PCH platforms too */ + else + spt_hpd_detection_setup(dev_priv); } static void gen5_gt_irq_postinstall(struct drm_device *dev) @@ -3444,6 +3472,9 @@ static void gen8_de_irq_postinstall(struct drm_i915_private *dev_priv) GEN5_IRQ_INIT(GEN8_DE_PORT_, ~de_port_masked, de_port_enables); GEN5_IRQ_INIT(GEN8_DE_MISC_, ~de_misc_masked, de_misc_masked); + + if (IS_GEN9_LP(dev_priv)) + bxt_hpd_detection_setup(dev_priv); } static int gen8_irq_postinstall(struct drm_device *dev) -- cgit From 390b4e00241ce14ca3967c4698c8f6a158c5a674 Mon Sep 17 00:00:00 2001 From: Imre Deak Date: Fri, 27 Jan 2017 11:39:19 +0200 Subject: drm/i915/lspcon: Fix resume time initialization due to unasserted HPD MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit During system resume time initialization the HPD level on LSPCON ports can stay low for an extended amount of time, leading to failed AUX transfers and LSPCON initialization. Fix this by waiting for HPD to get asserted. Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=99178 Cc: Shashank Sharma Cc: Jani Nikula Cc: Ville Syrjälä Cc: Daniel Vetter Cc: # v4.9+ Signed-off-by: Imre Deak Reviewed-by: Shashank Sharma Link: http://patchwork.freedesktop.org/patch/msgid/1485509961-9010-3-git-send-email-imre.deak@intel.com --- drivers/gpu/drm/i915/intel_dp.c | 4 ++-- drivers/gpu/drm/i915/intel_drv.h | 2 ++ drivers/gpu/drm/i915/intel_lspcon.c | 5 ++++- 3 files changed, 8 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/i915/intel_dp.c b/drivers/gpu/drm/i915/intel_dp.c index 2ab0192595c2..21924cf4c8ad 100644 --- a/drivers/gpu/drm/i915/intel_dp.c +++ b/drivers/gpu/drm/i915/intel_dp.c @@ -4519,8 +4519,8 @@ static bool bxt_digital_port_connected(struct drm_i915_private *dev_priv, * * Return %true if @port is connected, %false otherwise. */ -static bool intel_digital_port_connected(struct drm_i915_private *dev_priv, - struct intel_digital_port *port) +bool intel_digital_port_connected(struct drm_i915_private *dev_priv, + struct intel_digital_port *port) { if (HAS_PCH_IBX(dev_priv)) return ibx_digital_port_connected(dev_priv, port); diff --git a/drivers/gpu/drm/i915/intel_drv.h b/drivers/gpu/drm/i915/intel_drv.h index 1fb593ecc60c..424f7ea04b68 100644 --- a/drivers/gpu/drm/i915/intel_drv.h +++ b/drivers/gpu/drm/i915/intel_drv.h @@ -1500,6 +1500,8 @@ bool __intel_dp_read_desc(struct intel_dp *intel_dp, bool intel_dp_read_desc(struct intel_dp *intel_dp); int intel_dp_link_required(int pixel_clock, int bpp); int intel_dp_max_data_rate(int max_link_clock, int max_lanes); +bool intel_digital_port_connected(struct drm_i915_private *dev_priv, + struct intel_digital_port *port); /* intel_dp_aux_backlight.c */ int intel_dp_aux_init_backlight_funcs(struct intel_connector *intel_connector); diff --git a/drivers/gpu/drm/i915/intel_lspcon.c b/drivers/gpu/drm/i915/intel_lspcon.c index f6d4e6940257..c300647ef604 100644 --- a/drivers/gpu/drm/i915/intel_lspcon.c +++ b/drivers/gpu/drm/i915/intel_lspcon.c @@ -158,6 +158,8 @@ static bool lspcon_probe(struct intel_lspcon *lspcon) static void lspcon_resume_in_pcon_wa(struct intel_lspcon *lspcon) { struct intel_dp *intel_dp = lspcon_to_intel_dp(lspcon); + struct intel_digital_port *dig_port = dp_to_dig_port(intel_dp); + struct drm_i915_private *dev_priv = to_i915(dig_port->base.base.dev); unsigned long start = jiffies; if (!lspcon->desc_valid) @@ -173,7 +175,8 @@ static void lspcon_resume_in_pcon_wa(struct intel_lspcon *lspcon) if (!__intel_dp_read_desc(intel_dp, &desc)) return; - if (!memcmp(&intel_dp->desc, &desc, sizeof(desc))) { + if (intel_digital_port_connected(dev_priv, dig_port) && + !memcmp(&intel_dp->desc, &desc, sizeof(desc))) { DRM_DEBUG_KMS("LSPCON recovering in PCON mode after %u ms\n", jiffies_to_msecs(jiffies - start)); return; -- cgit From f123959594c000a43b267c75695bd3ffd0748eb1 Mon Sep 17 00:00:00 2001 From: Imre Deak Date: Fri, 27 Jan 2017 11:39:20 +0200 Subject: drm/i915/lspcon: Remove DPCD compare based resume time workaround MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This effectively reverts commit 489375c866c111f16cea93b2467ebe59c9022cc7 Author: Imre Deak Date: Mon Oct 24 19:33:31 2016 +0300 drm/i915/lspcon: Add workaround for resuming in PCON mode The workaround was added without considering that HPD is low during the failed AUX transfers the WA fixed. Since the previous patch we wait for HPD to get asserted. My tests also show that this happens _after_ the DPCD reads start to return correct values. This suggests that we don't need this WA any more, let's try to remove it to reduce the clutter. Cc: Shashank Sharma Cc: Jani Nikula Cc: Ville Syrjälä Cc: Daniel Vetter Signed-off-by: Imre Deak Reviewed-by: Shashank Sharma Link: http://patchwork.freedesktop.org/patch/msgid/1485509961-9010-4-git-send-email-imre.deak@intel.com --- drivers/gpu/drm/i915/intel_drv.h | 1 - drivers/gpu/drm/i915/intel_lspcon.c | 17 ++--------------- 2 files changed, 2 insertions(+), 16 deletions(-) diff --git a/drivers/gpu/drm/i915/intel_drv.h b/drivers/gpu/drm/i915/intel_drv.h index 424f7ea04b68..973d7b6a7d5c 100644 --- a/drivers/gpu/drm/i915/intel_drv.h +++ b/drivers/gpu/drm/i915/intel_drv.h @@ -1002,7 +1002,6 @@ struct intel_dp { struct intel_lspcon { bool active; enum drm_lspcon_mode mode; - bool desc_valid; }; struct intel_digital_port { diff --git a/drivers/gpu/drm/i915/intel_lspcon.c b/drivers/gpu/drm/i915/intel_lspcon.c index c300647ef604..71cbe9c08932 100644 --- a/drivers/gpu/drm/i915/intel_lspcon.c +++ b/drivers/gpu/drm/i915/intel_lspcon.c @@ -162,21 +162,8 @@ static void lspcon_resume_in_pcon_wa(struct intel_lspcon *lspcon) struct drm_i915_private *dev_priv = to_i915(dig_port->base.base.dev); unsigned long start = jiffies; - if (!lspcon->desc_valid) - return; - while (1) { - struct intel_dp_desc desc; - - /* - * The w/a only applies in PCON mode and we don't expect any - * AUX errors. - */ - if (!__intel_dp_read_desc(intel_dp, &desc)) - return; - - if (intel_digital_port_connected(dev_priv, dig_port) && - !memcmp(&intel_dp->desc, &desc, sizeof(desc))) { + if (intel_digital_port_connected(dev_priv, dig_port)) { DRM_DEBUG_KMS("LSPCON recovering in PCON mode after %u ms\n", jiffies_to_msecs(jiffies - start)); return; @@ -253,7 +240,7 @@ bool lspcon_init(struct intel_digital_port *intel_dig_port) return false; } - lspcon->desc_valid = intel_dp_read_desc(dp); + intel_dp_read_desc(dp); DRM_DEBUG_KMS("Success: LSPCON init\n"); return true; -- cgit From 1a56b1a2db2d0c6422973fd4145d05135d8fb97c Mon Sep 17 00:00:00 2001 From: Imre Deak Date: Fri, 27 Jan 2017 11:39:21 +0200 Subject: drm/i915/gen5+, pch: Enable hotplug detection early MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit To be consistent with the recent change to enable hotplug detection early on GEN9 platforms do the same on all non-GMCH platforms starting from GEN5. On GMCH platforms enabling detection without interrupts isn't trivial, since AUX and HPD have a shared interrupt line. It could be done there too by using a SW interrupt mask, but I punt on that for now. Cc: Shashank Sharma Cc: Jani Nikula Cc: Ville Syrjälä Cc: Daniel Vetter Signed-off-by: Imre Deak Reviewed-by: Shashank Sharma Link: http://patchwork.freedesktop.org/patch/msgid/1485509961-9010-5-git-send-email-imre.deak@intel.com --- drivers/gpu/drm/i915/i915_irq.c | 73 ++++++++++++++++++++++++++--------------- 1 file changed, 47 insertions(+), 26 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_irq.c b/drivers/gpu/drm/i915/i915_irq.c index 8b868405e99b..9eb4c9c8a10b 100644 --- a/drivers/gpu/drm/i915/i915_irq.c +++ b/drivers/gpu/drm/i915/i915_irq.c @@ -3096,19 +3096,9 @@ static u32 intel_hpd_enabled_irqs(struct drm_i915_private *dev_priv, return enabled_irqs; } -static void ibx_hpd_irq_setup(struct drm_i915_private *dev_priv) +static void ibx_hpd_detection_setup(struct drm_i915_private *dev_priv) { - u32 hotplug_irqs, hotplug, enabled_irqs; - - if (HAS_PCH_IBX(dev_priv)) { - hotplug_irqs = SDE_HOTPLUG_MASK; - enabled_irqs = intel_hpd_enabled_irqs(dev_priv, hpd_ibx); - } else { - hotplug_irqs = SDE_HOTPLUG_MASK_CPT; - enabled_irqs = intel_hpd_enabled_irqs(dev_priv, hpd_cpt); - } - - ibx_display_interrupt_update(dev_priv, hotplug_irqs, enabled_irqs); + u32 hotplug; /* * Enable digital hotplug on the PCH, and configure the DP short pulse @@ -3116,10 +3106,12 @@ static void ibx_hpd_irq_setup(struct drm_i915_private *dev_priv) * The pulse duration bits are reserved on LPT+. */ hotplug = I915_READ(PCH_PORT_HOTPLUG); - hotplug &= ~(PORTD_PULSE_DURATION_MASK|PORTC_PULSE_DURATION_MASK|PORTB_PULSE_DURATION_MASK); - hotplug |= PORTD_HOTPLUG_ENABLE | PORTD_PULSE_DURATION_2ms; - hotplug |= PORTC_HOTPLUG_ENABLE | PORTC_PULSE_DURATION_2ms; + hotplug &= ~(PORTB_PULSE_DURATION_MASK | + PORTC_PULSE_DURATION_MASK | + PORTD_PULSE_DURATION_MASK); hotplug |= PORTB_HOTPLUG_ENABLE | PORTB_PULSE_DURATION_2ms; + hotplug |= PORTC_HOTPLUG_ENABLE | PORTC_PULSE_DURATION_2ms; + hotplug |= PORTD_HOTPLUG_ENABLE | PORTD_PULSE_DURATION_2ms; /* * When CPU and PCH are on the same package, port A * HPD must be enabled in both north and south. @@ -3129,6 +3121,23 @@ static void ibx_hpd_irq_setup(struct drm_i915_private *dev_priv) I915_WRITE(PCH_PORT_HOTPLUG, hotplug); } +static void ibx_hpd_irq_setup(struct drm_i915_private *dev_priv) +{ + u32 hotplug_irqs, enabled_irqs; + + if (HAS_PCH_IBX(dev_priv)) { + hotplug_irqs = SDE_HOTPLUG_MASK; + enabled_irqs = intel_hpd_enabled_irqs(dev_priv, hpd_ibx); + } else { + hotplug_irqs = SDE_HOTPLUG_MASK_CPT; + enabled_irqs = intel_hpd_enabled_irqs(dev_priv, hpd_cpt); + } + + ibx_display_interrupt_update(dev_priv, hotplug_irqs, enabled_irqs); + + ibx_hpd_detection_setup(dev_priv); +} + static void spt_hpd_detection_setup(struct drm_i915_private *dev_priv) { u32 hotplug; @@ -3158,9 +3167,25 @@ static void spt_hpd_irq_setup(struct drm_i915_private *dev_priv) spt_hpd_detection_setup(dev_priv); } +static void ilk_hpd_detection_setup(struct drm_i915_private *dev_priv) +{ + u32 hotplug; + + /* + * Enable digital hotplug on the CPU, and configure the DP short pulse + * duration to 2ms (which is the minimum in the Display Port spec) + * The pulse duration bits are reserved on HSW+. + */ + hotplug = I915_READ(DIGITAL_PORT_HOTPLUG_CNTRL); + hotplug &= ~DIGITAL_PORTA_PULSE_DURATION_MASK; + hotplug |= DIGITAL_PORTA_HOTPLUG_ENABLE | + DIGITAL_PORTA_PULSE_DURATION_2ms; + I915_WRITE(DIGITAL_PORT_HOTPLUG_CNTRL, hotplug); +} + static void ilk_hpd_irq_setup(struct drm_i915_private *dev_priv) { - u32 hotplug_irqs, hotplug, enabled_irqs; + u32 hotplug_irqs, enabled_irqs; if (INTEL_GEN(dev_priv) >= 8) { hotplug_irqs = GEN8_PORT_DP_A_HOTPLUG; @@ -3179,15 +3204,7 @@ static void ilk_hpd_irq_setup(struct drm_i915_private *dev_priv) ilk_update_display_irq(dev_priv, hotplug_irqs, enabled_irqs); } - /* - * Enable digital hotplug on the CPU, and configure the DP short pulse - * duration to 2ms (which is the minimum in the Display Port spec) - * The pulse duration bits are reserved on HSW+. - */ - hotplug = I915_READ(DIGITAL_PORT_HOTPLUG_CNTRL); - hotplug &= ~DIGITAL_PORTA_PULSE_DURATION_MASK; - hotplug |= DIGITAL_PORTA_HOTPLUG_ENABLE | DIGITAL_PORTA_PULSE_DURATION_2ms; - I915_WRITE(DIGITAL_PORT_HOTPLUG_CNTRL, hotplug); + ilk_hpd_detection_setup(dev_priv); ibx_hpd_irq_setup(dev_priv); } @@ -3258,7 +3275,7 @@ static void ibx_irq_postinstall(struct drm_device *dev) if (HAS_PCH_IBX(dev_priv) || HAS_PCH_CPT(dev_priv) || HAS_PCH_LPT(dev_priv)) - ; /* TODO: Enable HPD detection on older PCH platforms too */ + ibx_hpd_detection_setup(dev_priv); else spt_hpd_detection_setup(dev_priv); } @@ -3335,6 +3352,8 @@ static int ironlake_irq_postinstall(struct drm_device *dev) gen5_gt_irq_postinstall(dev); + ilk_hpd_detection_setup(dev_priv); + ibx_irq_postinstall(dev); if (IS_IRONLAKE_M(dev_priv)) { @@ -3475,6 +3494,8 @@ static void gen8_de_irq_postinstall(struct drm_i915_private *dev_priv) if (IS_GEN9_LP(dev_priv)) bxt_hpd_detection_setup(dev_priv); + else if (IS_BROADWELL(dev_priv)) + ilk_hpd_detection_setup(dev_priv); } static int gen8_irq_postinstall(struct drm_device *dev) -- cgit From 04da811b3d821567e7a9a8a0baf48a6c1718b582 Mon Sep 17 00:00:00 2001 From: Zhi Wang Date: Mon, 6 Feb 2017 18:37:16 +0800 Subject: drm/i915: Let execlist_update_context() cover !FULL_PPGTT mode. execlist_update_context() will try to update PDPs in a context before a ELSP submission only for full PPGTT mode, while PDPs was populated during context initialization. Now the latter code path is removed. Let execlist_update_context() also cover !FULL_PPGTT mode. Fixes: 34869776c76b ("drm/i915: check ppgtt validity when init reg state") Cc: Tvrtko Ursulin Cc: Michal Winiarski Cc: Michel Thierry Cc: Joonas Lahtinen Cc: Chris Wilson Cc: Zhenyu Wang Cc: Zhiyuan Lv Signed-off-by: Zhi Wang Link: http://patchwork.freedesktop.org/patch/msgid/1486377436-15380-1-git-send-email-zhi.a.wang@intel.com Reviewed-by: Chris Wilson Signed-off-by: Chris Wilson --- drivers/gpu/drm/i915/intel_lrc.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c index 754f77c394fb..15b8a132b8e0 100644 --- a/drivers/gpu/drm/i915/intel_lrc.c +++ b/drivers/gpu/drm/i915/intel_lrc.c @@ -321,7 +321,8 @@ execlists_update_context_pdps(struct i915_hw_ppgtt *ppgtt, u32 *reg_state) static u64 execlists_update_context(struct drm_i915_gem_request *rq) { struct intel_context *ce = &rq->ctx->engine[rq->engine->id]; - struct i915_hw_ppgtt *ppgtt = rq->ctx->ppgtt; + struct i915_hw_ppgtt *ppgtt = + rq->ctx->ppgtt ?: rq->i915->mm.aliasing_ppgtt; u32 *reg_state = ce->lrc_reg_state; reg_state[CTX_RING_TAIL+1] = rq->tail; -- cgit From eca56a35111c9e6663fbcd7dc37bcc572367efa3 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Mon, 6 Feb 2017 17:05:01 +0000 Subject: drm/i915: Mark the end of intel_ring_begin() and check in intel_ring_advance() It is required that the caller declare the exact number of dwords they wish to write into the ring. This is required for two reasons, we need to allocate sufficient space for the entire command packet and we need to be sure that the contents are completely written to avoid executing stale data. The current interface requires for any bug to be caught in review, the reader has to carefully count the number of intel_ring_emit() between intel_ring_begin() and intel_ring_advance(). If we record the end of the packet of each intel_ring_begin() we can also have CI check for us. Signed-off-by: Chris Wilson Cc: Tvrtko Ursulin Cc: Joonas Lahtinen Reviewed-by: Mika Kuoppala Link: http://patchwork.freedesktop.org/patch/msgid/20170206170502.30944-1-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/i915_gem.h | 9 +++++++++ drivers/gpu/drm/i915/intel_ringbuffer.c | 1 + drivers/gpu/drm/i915/intel_ringbuffer.h | 3 +++ 3 files changed, 13 insertions(+) diff --git a/drivers/gpu/drm/i915/i915_gem.h b/drivers/gpu/drm/i915/i915_gem.h index a585d47c420a..412fa2794cbb 100644 --- a/drivers/gpu/drm/i915/i915_gem.h +++ b/drivers/gpu/drm/i915/i915_gem.h @@ -28,9 +28,18 @@ #ifdef CONFIG_DRM_I915_DEBUG_GEM #define GEM_BUG_ON(expr) BUG_ON(expr) #define GEM_WARN_ON(expr) WARN_ON(expr) + +#define GEM_BUG_ONLY(expr) expr +#define GEM_BUG_ONLY_DECLARE(var) var +#define GEM_BUG_ONLY_ON(expr) GEM_BUG_ON(expr) + #else #define GEM_BUG_ON(expr) BUILD_BUG_ON_INVALID(expr) #define GEM_WARN_ON(expr) (BUILD_BUG_ON_INVALID(expr), 0) + +#define GEM_BUG_ONLY(expr) do { } while (0) +#define GEM_BUG_ONLY_DECLARE(var) +#define GEM_BUG_ONLY_ON(expr) #endif #define I915_NUM_ENGINES 5 diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c b/drivers/gpu/drm/i915/intel_ringbuffer.c index d32cbba25d98..383083ef2210 100644 --- a/drivers/gpu/drm/i915/intel_ringbuffer.c +++ b/drivers/gpu/drm/i915/intel_ringbuffer.c @@ -2238,6 +2238,7 @@ int intel_ring_begin(struct drm_i915_gem_request *req, int num_dwords) ring->space -= bytes; GEM_BUG_ON(ring->space < 0); + GEM_BUG_ONLY(ring->advance = ring->tail + bytes); return 0; } diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.h b/drivers/gpu/drm/i915/intel_ringbuffer.h index b9c15cd40fbf..2c6d3655985e 100644 --- a/drivers/gpu/drm/i915/intel_ringbuffer.h +++ b/drivers/gpu/drm/i915/intel_ringbuffer.h @@ -144,6 +144,8 @@ struct intel_ring { u32 head; u32 tail; + GEM_BUG_ONLY_DECLARE(u32 advance); + int space; int size; int effective_size; @@ -516,6 +518,7 @@ static inline void intel_ring_advance(struct intel_ring *ring) * reserved for the command packet (i.e. the value passed to * intel_ring_begin()). */ + GEM_BUG_ONLY_ON(ring->tail != ring->advance); } static inline u32 intel_ring_offset(struct intel_ring *ring, void *addr) -- cgit From 2ffe80aa442461eb2fa3cd4c5dda81832e5dd291 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Mon, 6 Feb 2017 17:05:02 +0000 Subject: drm/i915: Avoid unguarded reads from the request pointer In commit 86aa7e760a67 ("drm/i915: Assert that the context-switch completion matches our context") I added a read to the irq tasklet handler that compared the on-chip status with that of our sw tracking, using an unguarded read of the request pointer to get the context and beyond. Whilst we hold a reference to the request, we do not hold anything on the context and if we are unlucky it may be reaped from a second thread retiring the request (since it may retire the request as soon as the breadcrumb is complete, even before we finish processing the context switch) as we try to read from the context pointer. Avoid the racy read from underneath the request by storing the expected result in the execlist_port[]. v2: Include commentary about port[].request being unprotected. Fixes: 86aa7e760a67 ("drm/i915: Assert that the context-switch completion matches our context") Reported-by: Mika Kuoppala Testcase: igt/gem_ctx_create Signed-off-by: Chris Wilson Cc: Joonas Lahtinen Cc: Tvrtko Ursulin Cc: Mika Kuoppala Reviewed-by: Mika Kuoppala Link: http://patchwork.freedesktop.org/patch/msgid/20170206170502.30944-2-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/intel_lrc.c | 24 +++++++++++++++++++++--- drivers/gpu/drm/i915/intel_ringbuffer.h | 1 + 2 files changed, 22 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c index 15b8a132b8e0..df8e6f74dc7e 100644 --- a/drivers/gpu/drm/i915/intel_lrc.c +++ b/drivers/gpu/drm/i915/intel_lrc.c @@ -351,6 +351,7 @@ static void execlists_submit_ports(struct intel_engine_cs *engine) execlists_context_status_change(port[0].request, INTEL_CONTEXT_SCHEDULE_IN); desc[0] = execlists_update_context(port[0].request); + GEM_BUG_ONLY(port[0].context_id = upper_32_bits(desc[0])); port[0].count++; if (port[1].request) { @@ -358,6 +359,7 @@ static void execlists_submit_ports(struct intel_engine_cs *engine) execlists_context_status_change(port[1].request, INTEL_CONTEXT_SCHEDULE_IN); desc[1] = execlists_update_context(port[1].request); + GEM_BUG_ONLY(port[1].context_id = upper_32_bits(desc[1])); port[1].count = 1; } else { desc[1] = 0; @@ -560,13 +562,29 @@ static void intel_lrc_irq_handler(unsigned long data) unsigned int idx = ++head % GEN8_CSB_ENTRIES; unsigned int status = readl(buf + 2 * idx); + /* We are flying near dragons again. + * + * We hold a reference to the request in execlist_port[] + * but no more than that. We are operating in softirq + * context and so cannot hold any mutex or sleep. That + * prevents us stopping the requests we are processing + * in port[] from being retired simultaneously (the + * breadcrumb will be complete before we see the + * context-switch). As we only hold the reference to the + * request, any pointer chasing underneath the request + * is subject to a potential use-after-free. Thus we + * store all of the bookkeeping within port[] as + * required, and avoid using unguarded pointers beneath + * request itself. The same applies to the atomic + * status notifier. + */ + if (!(status & GEN8_CTX_STATUS_COMPLETED_MASK)) continue; /* Check the context/desc id for this event matches */ - GEM_BUG_ON(readl(buf + 2 * idx + 1) != - upper_32_bits(intel_lr_context_descriptor(port[0].request->ctx, - engine))); + GEM_BUG_ONLY_ON(readl(buf + 2 * idx + 1) != + port[0].context_id); GEM_BUG_ON(port[0].count == 0); if (--port[0].count == 0) { diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.h b/drivers/gpu/drm/i915/intel_ringbuffer.h index 2c6d3655985e..896838ca502c 100644 --- a/drivers/gpu/drm/i915/intel_ringbuffer.h +++ b/drivers/gpu/drm/i915/intel_ringbuffer.h @@ -381,6 +381,7 @@ struct intel_engine_cs { struct execlist_port { struct drm_i915_gem_request *request; unsigned int count; + GEM_BUG_ONLY_DECLARE(u32 context_id); } execlist_port[2]; struct rb_root execlist_queue; struct rb_node *execlist_first; -- cgit From b8cf691e28fa48f36f3a318385e1edc9a0a291fd Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Mon, 6 Feb 2017 21:36:05 +0000 Subject: drm/i915: Generate i915_params {} using a macro I want to print the struct from the error state and so would like to use the existing struct definition as the template ala DEV_INFO* v2: Use MEMBER() rather than p(). Signed-off-by: Chris Wilson Reviewed-by: Joonas Lahtinen Acked-by: Jani Nikula Acked-by: Daniel Vetter Link: http://patchwork.freedesktop.org/patch/msgid/20170206213608.31328-1-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/i915_params.h | 81 ++++++++++++++++++++------------------ 1 file changed, 43 insertions(+), 38 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_params.h b/drivers/gpu/drm/i915/i915_params.h index 8e433de04679..9a8c60342a82 100644 --- a/drivers/gpu/drm/i915/i915_params.h +++ b/drivers/gpu/drm/i915/i915_params.h @@ -27,46 +27,51 @@ #include /* for __read_mostly */ +#define I915_PARAMS_FOR_EACH(func) \ + func(int, modeset); \ + func(int, panel_ignore_lid); \ + func(int, semaphores); \ + func(int, lvds_channel_mode); \ + func(int, panel_use_ssc); \ + func(int, vbt_sdvo_panel_type); \ + func(int, enable_rc6); \ + func(int, enable_dc); \ + func(int, enable_fbc); \ + func(int, enable_ppgtt); \ + func(int, enable_execlists); \ + func(int, enable_psr); \ + func(unsigned int, alpha_support); \ + func(int, disable_power_well); \ + func(int, enable_ips); \ + func(int, invert_brightness); \ + func(int, enable_guc_loading); \ + func(int, enable_guc_submission); \ + func(int, guc_log_level); \ + func(int, use_mmio_flip); \ + func(int, mmio_debug); \ + func(int, edp_vswing); \ + func(unsigned int, inject_load_failure); \ + /* leave bools at the end to not create holes */ \ + func(bool, enable_cmd_parser); \ + func(bool, enable_hangcheck); \ + func(bool, fastboot); \ + func(bool, prefault_disable); \ + func(bool, load_detect_test); \ + func(bool, force_reset_modeset_test); \ + func(bool, reset); \ + func(bool, error_capture); \ + func(bool, disable_display); \ + func(bool, verbose_state_checks); \ + func(bool, nuclear_pageflip); \ + func(bool, enable_dp_mst); \ + func(bool, enable_dpcd_backlight); \ + func(bool, enable_gvt) + +#define MEMBER(T, member) T member struct i915_params { - int modeset; - int panel_ignore_lid; - int semaphores; - int lvds_channel_mode; - int panel_use_ssc; - int vbt_sdvo_panel_type; - int enable_rc6; - int enable_dc; - int enable_fbc; - int enable_ppgtt; - int enable_execlists; - int enable_psr; - unsigned int alpha_support; - int disable_power_well; - int enable_ips; - int invert_brightness; - int enable_guc_loading; - int enable_guc_submission; - int guc_log_level; - int use_mmio_flip; - int mmio_debug; - int edp_vswing; - unsigned int inject_load_failure; - /* leave bools at the end to not create holes */ - bool enable_cmd_parser; - bool enable_hangcheck; - bool fastboot; - bool prefault_disable; - bool load_detect_test; - bool force_reset_modeset_test; - bool reset; - bool error_capture; - bool disable_display; - bool verbose_state_checks; - bool nuclear_pageflip; - bool enable_dp_mst; - bool enable_dpcd_backlight; - bool enable_gvt; + I915_PARAMS_FOR_EACH(MEMBER); }; +#undef MEMBER extern struct i915_params i915 __read_mostly; -- cgit From 1a2010ca52f951b7a9e0cadb92d4bbcee643194c Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Mon, 6 Feb 2017 21:36:06 +0000 Subject: drm/i915: Use bool i915_param.alpha_support The alpha_support module option can only take one of two values, so assign it to a boolean type. The only advantage is in pretty printing via /sys/module/i915/parameters/alpha_support and elsewhere. Signed-off-by: Chris Wilson Cc: Jani Nikula Cc: Rodrigo Vivi Cc: Daniel Vetter Acked-by: Jani Nikula Link: http://patchwork.freedesktop.org/patch/msgid/20170206213608.31328-2-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/i915_params.c | 2 +- drivers/gpu/drm/i915/i915_params.h | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_params.c b/drivers/gpu/drm/i915/i915_params.c index 0e280fbd52f1..c2679fa7ed11 100644 --- a/drivers/gpu/drm/i915/i915_params.c +++ b/drivers/gpu/drm/i915/i915_params.c @@ -145,7 +145,7 @@ MODULE_PARM_DESC(enable_psr, "Enable PSR " "(0=disabled, 1=enabled - link mode chosen per-platform, 2=force link-standby mode, 3=force link-off mode) " "Default: -1 (use per-chip default)"); -module_param_named_unsafe(alpha_support, i915.alpha_support, int, 0400); +module_param_named_unsafe(alpha_support, i915.alpha_support, bool, 0400); MODULE_PARM_DESC(alpha_support, "Enable alpha quality driver support for latest hardware. " "See also CONFIG_DRM_I915_ALPHA_SUPPORT."); diff --git a/drivers/gpu/drm/i915/i915_params.h b/drivers/gpu/drm/i915/i915_params.h index 9a8c60342a82..55d47eea172e 100644 --- a/drivers/gpu/drm/i915/i915_params.h +++ b/drivers/gpu/drm/i915/i915_params.h @@ -40,7 +40,6 @@ func(int, enable_ppgtt); \ func(int, enable_execlists); \ func(int, enable_psr); \ - func(unsigned int, alpha_support); \ func(int, disable_power_well); \ func(int, enable_ips); \ func(int, invert_brightness); \ @@ -52,6 +51,7 @@ func(int, edp_vswing); \ func(unsigned int, inject_load_failure); \ /* leave bools at the end to not create holes */ \ + func(bool, alpha_support); \ func(bool, enable_cmd_parser); \ func(bool, enable_hangcheck); \ func(bool, fastboot); \ -- cgit From 642c8a72533f26b1614d9f11361947f368fb3e57 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Mon, 6 Feb 2017 21:36:07 +0000 Subject: drm/i915: Capture module parameters for the GPU error state They include useful material such as what mode the VM address space is running in, what submission mode, extra quirks, etc. v2: Undef the right macro, use type specific pretty printers v3: Use strcmp(TYPENAME) rather than creating per-type pretty printers v4: Use __always_inline to force GCC to eliminate the calls to strcmp and generate the right call to seq_printf for each parameter. v5: With the strcmp elimination, we can now use BUILD_BUG to ensure there are no unhandled types, also use __builtin_strcmp to make it look even more magic. Signed-off-by: Chris Wilson Reviewed-by: Joonas Lahtinen Acked-by: Mika Kuoppala Acked-by: Jani Nikula Link: http://patchwork.freedesktop.org/patch/msgid/20170206213608.31328-3-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/i915_drv.h | 1 + drivers/gpu/drm/i915/i915_gpu_error.c | 42 +++++++++++++++++++++++++++++------ 2 files changed, 36 insertions(+), 7 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index 1e0d9561fd67..6006694bffb5 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -904,6 +904,7 @@ struct drm_i915_error_state { u32 reset_count; u32 suspend_count; struct intel_device_info device_info; + struct i915_params params; /* Generic register state */ u32 eir; diff --git a/drivers/gpu/drm/i915/i915_gpu_error.c b/drivers/gpu/drm/i915/i915_gpu_error.c index 5283fe815a4d..c28ccfef84ab 100644 --- a/drivers/gpu/drm/i915/i915_gpu_error.c +++ b/drivers/gpu/drm/i915/i915_gpu_error.c @@ -546,6 +546,29 @@ static void err_print_capabilities(struct drm_i915_error_state_buf *m, #undef PRINT_FLAG } +static __always_inline void err_print_param(struct drm_i915_error_state_buf *m, + const char *name, + const char *type, + const void *x) +{ + if (!__builtin_strcmp(type, "bool")) + err_printf(m, "i915.%s=%s\n", name, yesno(*(const bool *)x)); + else if (!__builtin_strcmp(type, "int")) + err_printf(m, "i915.%s=%d\n", name, *(const int *)x); + else if (!__builtin_strcmp(type, "unsigned int")) + err_printf(m, "i915.%s=%u\n", name, *(const unsigned int *)x); + else + BUILD_BUG(); +} + +static void err_print_params(struct drm_i915_error_state_buf *m, + const struct i915_params *p) +{ +#define PRINT(T, x) err_print_param(m, #x, #T, &p->x); + I915_PARAMS_FOR_EACH(PRINT); +#undef PRINT +} + int i915_error_state_to_str(struct drm_i915_error_state_buf *m, const struct i915_error_state_file_priv *error_priv) { @@ -568,7 +591,6 @@ int i915_error_state_to_str(struct drm_i915_error_state_buf *m, error->boottime.tv_sec, error->boottime.tv_usec); err_printf(m, "Uptime: %ld s %ld us\n", error->uptime.tv_sec, error->uptime.tv_usec); - err_print_capabilities(m, &error->device_info); for (i = 0; i < ARRAY_SIZE(error->engine); i++) { if (error->engine[i].hangcheck_stalled && @@ -588,6 +610,7 @@ int i915_error_state_to_str(struct drm_i915_error_state_buf *m, err_printf(m, "PCI Subsystem: %04x:%04x\n", pdev->subsystem_vendor, pdev->subsystem_device); + err_printf(m, "IOMMU enabled?: %d\n", error->iommu); if (HAS_CSR(dev_priv)) { @@ -730,6 +753,9 @@ int i915_error_state_to_str(struct drm_i915_error_state_buf *m, if (error->display) intel_display_print_error_state(m, dev_priv, error->display); + err_print_capabilities(m, &error->device_info); + err_print_params(m, &error->params); + out: if (m->bytes == 0 && m->err) return m->err; @@ -1587,6 +1613,14 @@ static int capture(void *data) { struct drm_i915_error_state *error = data; + do_gettimeofday(&error->time); + error->boottime = ktime_to_timeval(ktime_get_boottime()); + error->uptime = + ktime_to_timeval(ktime_sub(ktime_get(), + error->i915->gt.last_init_time)); + + error->params = i915; + i915_capture_gen_state(error->i915, error); i915_capture_reg_state(error->i915, error); i915_gem_record_fences(error->i915, error); @@ -1595,12 +1629,6 @@ static int capture(void *data) i915_capture_pinned_buffers(error->i915, error); i915_gem_capture_guc_log_buffer(error->i915, error); - do_gettimeofday(&error->time); - error->boottime = ktime_to_timeval(ktime_get_boottime()); - error->uptime = - ktime_to_timeval(ktime_sub(ktime_get(), - error->i915->gt.last_init_time)); - error->overlay = intel_overlay_capture_error_state(error->i915); error->display = intel_display_capture_error_state(error->i915); -- cgit From 418e3cd8005121601facd46dfc452ef9e40894f6 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Mon, 6 Feb 2017 21:36:08 +0000 Subject: drm/i915: Show the current i915_params in debugfs/i915_capabilites Alongside the hw capabilities, it is useful to know which of those have been overridden by the user setting module parameters. v2: Use __always_inline and BUILD_BUG magic Signed-off-by: Chris Wilson Acked-by: Jani Nikula Reviewed-by: Joonas Lahtinen Link: http://patchwork.freedesktop.org/patch/msgid/20170206213608.31328-4-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/i915_debugfs.c | 22 ++++++++++++++++++++++ 1 file changed, 22 insertions(+) diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c index 3ae06568df7b..152f23db4ec2 100644 --- a/drivers/gpu/drm/i915/i915_debugfs.c +++ b/drivers/gpu/drm/i915/i915_debugfs.c @@ -61,6 +61,21 @@ drm_add_fake_info_node(struct drm_minor *minor, return 0; } +static __always_inline void seq_print_param(struct seq_file *m, + const char *name, + const char *type, + const void *x) +{ + if (!__builtin_strcmp(type, "bool")) + seq_printf(m, "i915.%s=%s\n", name, yesno(*(const bool *)x)); + else if (!__builtin_strcmp(type, "int")) + seq_printf(m, "i915.%s=%d\n", name, *(const int *)x); + else if (!__builtin_strcmp(type, "unsigned int")) + seq_printf(m, "i915.%s=%u\n", name, *(const unsigned int *)x); + else + BUILD_BUG(); +} + static int i915_capabilities(struct seq_file *m, void *data) { struct drm_i915_private *dev_priv = node_to_i915(m->private); @@ -69,10 +84,17 @@ static int i915_capabilities(struct seq_file *m, void *data) seq_printf(m, "gen: %d\n", INTEL_GEN(dev_priv)); seq_printf(m, "platform: %s\n", intel_platform_name(info->platform)); seq_printf(m, "pch: %d\n", INTEL_PCH_TYPE(dev_priv)); + #define PRINT_FLAG(x) seq_printf(m, #x ": %s\n", yesno(info->x)) DEV_INFO_FOR_EACH_FLAG(PRINT_FLAG); #undef PRINT_FLAG + kernel_param_lock(THIS_MODULE); +#define PRINT_PARAM(T, x) seq_print_param(m, #x, #T, &i915.x); + I915_PARAMS_FOR_EACH(PRINT_PARAM); +#undef PRINT_PARAM + kernel_param_unlock(THIS_MODULE); + return 0; } -- cgit From e0ec3ec698851a6c97a12d696407b3ff77700c23 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Fri, 3 Feb 2017 12:57:17 +0000 Subject: drm/i915: Remove overzealous fence warn on runtime suspend The goal of the WARN was to catch when we are still actively using the fence as we go into the runtime suspend. However, the reg->pin_count is too coarse as it does not distinguish between exclusive ownership of the fence register from activity. I've not improved on the WARN, nor have we captured this WARN in an exact igt, but it is showing up regularly in the wild: [ 1915.935332] WARNING: CPU: 1 PID: 10861 at drivers/gpu/drm/i915/i915_gem.c:2022 i915_gem_runtime_suspend+0x116/0x130 [i915] [ 1915.935383] WARN_ON(reg->pin_count)[ 1915.935399] Modules linked in: snd_hda_intel i915 drm_kms_helper vgem netconsole scsi_transport_iscsi fuse vfat fat x86_pkg_temp_thermal coretemp intel_cstate intel_uncore snd_hda_codec_hdmi snd_hda_codec_generic snd_hda_codec snd_hwdep snd_hda_core snd_pcm snd_timer snd mei_me mei serio_raw intel_rapl_perf intel_pch_thermal soundcore wmi acpi_pad i2c_algo_bit syscopyarea sysfillrect sysimgblt fb_sys_fops drm r8169 mii video [last unloaded: drm_kms_helper] [ 1915.935785] CPU: 1 PID: 10861 Comm: kworker/1:0 Tainted: G U W 4.9.0-rc5+ #170 [ 1915.935799] Hardware name: LENOVO 80MX/Lenovo E31-80, BIOS DCCN34WW(V2.03) 12/01/2015 [ 1915.935822] Workqueue: pm pm_runtime_work [ 1915.935845] ffffc900044fbbf0 ffffffffac3220bc ffffc900044fbc40 0000000000000000 [ 1915.935890] ffffc900044fbc30 ffffffffac059bcb 000007e6044fbc60 ffff8801626e3198 [ 1915.935937] ffff8801626e0000 0000000000000002 ffffffffc05e5d4e 0000000000000000 [ 1915.935985] Call Trace: [ 1915.936013] [] dump_stack+0x4f/0x73 [ 1915.936038] [] __warn+0xcb/0xf0 [ 1915.936060] [] warn_slowpath_fmt+0x5f/0x80 [ 1915.936158] [] i915_gem_runtime_suspend+0x116/0x130 [i915] [ 1915.936251] [] intel_runtime_suspend+0x64/0x280 [i915] [ 1915.936277] [] ? dequeue_entity+0x241/0xbc0 [ 1915.936298] [] pci_pm_runtime_suspend+0x55/0x180 [ 1915.936317] [] ? pci_pm_runtime_resume+0xa0/0xa0 [ 1915.936339] [] __rpm_callback+0x32/0x70 [ 1915.936356] [] rpm_callback+0x24/0x80 [ 1915.936375] [] ? pci_pm_runtime_resume+0xa0/0xa0 [ 1915.936392] [] rpm_suspend+0x12d/0x680 [ 1915.936415] [] ? _raw_spin_unlock_irq+0x17/0x30 [ 1915.936435] [] ? finish_task_switch+0x88/0x220 [ 1915.936455] [] pm_runtime_work+0x6f/0xb0 [ 1915.936477] [] process_one_work+0x1f3/0x4d0 [ 1915.936501] [] worker_thread+0x48/0x4e0 [ 1915.936523] [] ? process_one_work+0x4d0/0x4d0 [ 1915.936542] [] ? process_one_work+0x4d0/0x4d0 [ 1915.936559] [] kthread+0xd9/0xf0 [ 1915.936580] [] ? kthread_park+0x60/0x60 [ 1915.936600] [] ret_from_fork+0x22/0x30 In the case the register is pinned, it should be present and we will need to invalidate them to be restored upon resume as we cannot expect the owner of the pin to call get_fence prior to use after resume. Fixes: 7c108fd8feac ("drm/i915: Move fence cancellation to runtime suspend") Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=98804 Reported-by: Lionel Landwerlin Signed-off-by: Chris Wilson Cc: Daniel Vetter Cc: Imre Deak Cc: Jani Nikula Cc: # v4.10-rc1+ Link: http://patchwork.freedesktop.org/patch/msgid/20170203125717.8431-1-chris@chris-wilson.co.uk Reviewed-by: Joonas Lahtinen --- drivers/gpu/drm/i915/i915_drv.c | 1 + drivers/gpu/drm/i915/i915_gem.c | 12 ++++++++++-- 2 files changed, 11 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_drv.c b/drivers/gpu/drm/i915/i915_drv.c index 276676b2bbc8..78191e00a00c 100644 --- a/drivers/gpu/drm/i915/i915_drv.c +++ b/drivers/gpu/drm/i915/i915_drv.c @@ -2475,6 +2475,7 @@ static int intel_runtime_resume(struct device *kdev) * we can do is to hope that things will still work (and disable RPM). */ i915_gem_init_swizzling(dev_priv); + i915_gem_restore_fences(dev_priv); intel_runtime_pm_enable_interrupts(dev_priv); diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index f3b5cb497921..495f1176d45e 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -2011,8 +2011,16 @@ void i915_gem_runtime_suspend(struct drm_i915_private *dev_priv) for (i = 0; i < dev_priv->num_fence_regs; i++) { struct drm_i915_fence_reg *reg = &dev_priv->fence_regs[i]; - if (WARN_ON(reg->pin_count)) - continue; + /* Ideally we want to assert that the fence register is not + * live at this point (i.e. that no piece of code will be + * trying to write through fence + GTT, as that both violates + * our tracking of activity and associated locking/barriers, + * but also is illegal given that the hw is powered down). + * + * Previously we used reg->pin_count as a "liveness" indicator. + * That is not sufficient, and we need a more fine-grained + * tool if we want to have a sanity check here. + */ if (!reg->vma) continue; -- cgit From 6248017ae5301ccb51cda92c4117b573b9aff6bb Mon Sep 17 00:00:00 2001 From: Arthur Heymans Date: Wed, 1 Feb 2017 00:50:26 +0100 Subject: drm/i915: Get correct display clock on 945gm MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This is according to Mobile Intel® 945 Express Chipset Family datasheet. Signed-off-by: Arthur Heymans Link: http://patchwork.freedesktop.org/patch/msgid/20170131235026.26003-1-arthur@aheymans.xyz Reviewed-by: Ville Syrjälä Signed-off-by: Ville Syrjälä --- drivers/gpu/drm/i915/i915_reg.h | 2 +- drivers/gpu/drm/i915/intel_display.c | 27 +++++++++++++++++++++++++-- 2 files changed, 26 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h index a9538ae81673..4b5761d7716c 100644 --- a/drivers/gpu/drm/i915/i915_reg.h +++ b/drivers/gpu/drm/i915/i915_reg.h @@ -119,7 +119,7 @@ static inline bool i915_mmio_reg_valid(i915_reg_t reg) #define GCFGC 0xf0 /* 915+ only */ #define GC_LOW_FREQUENCY_ENABLE (1 << 7) #define GC_DISPLAY_CLOCK_190_200_MHZ (0 << 4) -#define GC_DISPLAY_CLOCK_333_MHZ (4 << 4) +#define GC_DISPLAY_CLOCK_333_320_MHZ (4 << 4) #define GC_DISPLAY_CLOCK_267_MHZ_PNV (0 << 4) #define GC_DISPLAY_CLOCK_333_MHZ_PNV (1 << 4) #define GC_DISPLAY_CLOCK_444_MHZ_PNV (2 << 4) diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c index 45e587496886..d0d042495dc7 100644 --- a/drivers/gpu/drm/i915/intel_display.c +++ b/drivers/gpu/drm/i915/intel_display.c @@ -7411,6 +7411,26 @@ static int i945_get_display_clock_speed(struct drm_i915_private *dev_priv) return 400000; } +static int i945gm_get_display_clock_speed(struct drm_i915_private *dev_priv) +{ + struct pci_dev *pdev = dev_priv->drm.pdev; + u16 gcfgc = 0; + + pci_read_config_word(pdev, GCFGC, &gcfgc); + + if (gcfgc & GC_LOW_FREQUENCY_ENABLE) + return 133333; + else { + switch (gcfgc & GC_DISPLAY_CLOCK_MASK) { + case GC_DISPLAY_CLOCK_333_320_MHZ: + return 320000; + default: + case GC_DISPLAY_CLOCK_190_200_MHZ: + return 200000; + } + } +} + static int i915_get_display_clock_speed(struct drm_i915_private *dev_priv) { return 333333; @@ -7457,7 +7477,7 @@ static int i915gm_get_display_clock_speed(struct drm_i915_private *dev_priv) return 133333; else { switch (gcfgc & GC_DISPLAY_CLOCK_MASK) { - case GC_DISPLAY_CLOCK_333_MHZ: + case GC_DISPLAY_CLOCK_333_320_MHZ: return 333333; default: case GC_DISPLAY_CLOCK_190_200_MHZ: @@ -16268,9 +16288,12 @@ void intel_init_display_hooks(struct drm_i915_private *dev_priv) else if (IS_I915G(dev_priv)) dev_priv->display.get_display_clock_speed = i915_get_display_clock_speed; - else if (IS_I945GM(dev_priv) || IS_I845G(dev_priv)) + else if (IS_I845G(dev_priv)) dev_priv->display.get_display_clock_speed = i9xx_misc_get_display_clock_speed; + else if (IS_I945GM(dev_priv)) + dev_priv->display.get_display_clock_speed = + i945gm_get_display_clock_speed; else if (IS_I915GM(dev_priv)) dev_priv->display.get_display_clock_speed = i915gm_get_display_clock_speed; -- cgit From c0dcb203fb009678e5be9e7782329dcfbbf16439 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Tue, 7 Feb 2017 15:24:37 +0000 Subject: drm/i915: Restore context and pd for ringbuffer submission after reset Following a reset, the context and page directory registers are lost. However, the queue of requests that we resubmit after the reset may depend upon them - the registers are restored from a context image, but that restore may be inhibited and may simply be absent from the request if it was in the middle of a sequence using the same context. If we prime the CCID/PD registers with the first request in the queue (even for the hung request), we prevent invalid memory access for the following requests (and continually hung engines). v2: Magic BIT(8), reserved for future use but still appears unused. v3: Some commentary on handling innocent vs guilty requests v4: Add a wait for PD_BASE fetch. The reload appears to be instant on my Ivybridge, but this bit probably exists for a reason. Fixes: 821ed7df6e2a ("drm/i915: Update reset path to fix incomplete requests") Signed-off-by: Chris Wilson Cc: Tvrtko Ursulin Cc: Mika Kuoppala Link: http://patchwork.freedesktop.org/patch/msgid/20170207152437.4252-1-chris@chris-wilson.co.uk Reviewed-by: Mika Kuoppala --- drivers/gpu/drm/i915/i915_gem.c | 18 ++++------ drivers/gpu/drm/i915/i915_reg.h | 6 ++-- drivers/gpu/drm/i915/intel_lrc.c | 16 ++++++++- drivers/gpu/drm/i915/intel_ringbuffer.c | 58 +++++++++++++++++++++++++++++++-- 4 files changed, 81 insertions(+), 17 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index 495f1176d45e..d1841bc1cb50 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -2745,21 +2745,17 @@ static void i915_gem_reset_engine(struct intel_engine_cs *engine) engine->irq_seqno_barrier(engine); request = i915_gem_find_active_request(engine); - if (!request) - return; - - if (!i915_gem_reset_request(request)) - return; + if (request && i915_gem_reset_request(request)) { + DRM_DEBUG_DRIVER("resetting %s to restart from tail of request 0x%x\n", + engine->name, request->global_seqno); - DRM_DEBUG_DRIVER("resetting %s to restart from tail of request 0x%x\n", - engine->name, request->global_seqno); + /* If this context is now banned, skip all pending requests. */ + if (i915_gem_context_is_banned(request->ctx)) + engine_skip_context(request); + } /* Setup the CS to resume from the breadcrumb of the hung request */ engine->reset_hw(engine, request); - - /* If this context is now banned, skip all of its pending requests. */ - if (i915_gem_context_is_banned(request->ctx)) - engine_skip_context(request); } void i915_gem_reset_finish(struct drm_i915_private *dev_priv) diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h index 4b5761d7716c..bd2ff1f87f32 100644 --- a/drivers/gpu/drm/i915/i915_reg.h +++ b/drivers/gpu/drm/i915/i915_reg.h @@ -3307,8 +3307,10 @@ enum skl_disp_power_wells { /* * Logical Context regs */ -#define CCID _MMIO(0x2180) -#define CCID_EN (1<<0) +#define CCID _MMIO(0x2180) +#define CCID_EN BIT(0) +#define CCID_EXTENDED_STATE_RESTORE BIT(2) +#define CCID_EXTENDED_STATE_SAVE BIT(3) /* * Notes on SNB/IVB/VLV context size: * - Power context is saved elsewhere (LLC or stolen) diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c index df8e6f74dc7e..e42990b56fa8 100644 --- a/drivers/gpu/drm/i915/intel_lrc.c +++ b/drivers/gpu/drm/i915/intel_lrc.c @@ -1352,7 +1352,20 @@ static void reset_common_ring(struct intel_engine_cs *engine, struct drm_i915_gem_request *request) { struct execlist_port *port = engine->execlist_port; - struct intel_context *ce = &request->ctx->engine[engine->id]; + struct intel_context *ce; + + /* If the request was innocent, we leave the request in the ELSP + * and will try to replay it on restarting. The context image may + * have been corrupted by the reset, in which case we may have + * to service a new GPU hang, but more likely we can continue on + * without impact. + * + * If the request was guilty, we presume the context is corrupt + * and have to at least restore the RING register in the context + * image back to the expected values to skip over the guilty request. + */ + if (!request || request->fence.error != -EIO) + return; /* We want a simple context + ring to execute the breadcrumb update. * We cannot rely on the context being intact across the GPU hang, @@ -1361,6 +1374,7 @@ static void reset_common_ring(struct intel_engine_cs *engine, * future request will be after userspace has had the opportunity * to recreate its own state. */ + ce = &request->ctx->engine[engine->id]; execlists_init_reg_state(ce->lrc_reg_state, request->ctx, engine, ce->ring); diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c b/drivers/gpu/drm/i915/intel_ringbuffer.c index 383083ef2210..d3d1e64f2498 100644 --- a/drivers/gpu/drm/i915/intel_ringbuffer.c +++ b/drivers/gpu/drm/i915/intel_ringbuffer.c @@ -599,10 +599,62 @@ out: static void reset_ring_common(struct intel_engine_cs *engine, struct drm_i915_gem_request *request) { - struct intel_ring *ring = request->ring; + /* Try to restore the logical GPU state to match the continuation + * of the request queue. If we skip the context/PD restore, then + * the next request may try to execute assuming that its context + * is valid and loaded on the GPU and so may try to access invalid + * memory, prompting repeated GPU hangs. + * + * If the request was guilty, we still restore the logical state + * in case the next request requires it (e.g. the aliasing ppgtt), + * but skip over the hung batch. + * + * If the request was innocent, we try to replay the request with + * the restored context. + */ + if (request) { + struct drm_i915_private *dev_priv = request->i915; + struct intel_context *ce = &request->ctx->engine[engine->id]; + struct i915_hw_ppgtt *ppgtt; + + /* FIXME consider gen8 reset */ + + if (ce->state) { + I915_WRITE(CCID, + i915_ggtt_offset(ce->state) | + BIT(8) /* must be set! */ | + CCID_EXTENDED_STATE_SAVE | + CCID_EXTENDED_STATE_RESTORE | + CCID_EN); + } - ring->head = request->postfix; - ring->last_retired_head = -1; + ppgtt = request->ctx->ppgtt ?: engine->i915->mm.aliasing_ppgtt; + if (ppgtt) { + u32 pd_offset = ppgtt->pd.base.ggtt_offset << 10; + + I915_WRITE(RING_PP_DIR_DCLV(engine), PP_DIR_DCLV_2G); + I915_WRITE(RING_PP_DIR_BASE(engine), pd_offset); + + /* Wait for the PD reload to complete */ + if (intel_wait_for_register(dev_priv, + RING_PP_DIR_BASE(engine), + BIT(0), 0, + 10)) + DRM_ERROR("Wait for reload of ppgtt page-directory timed out\n"); + + ppgtt->pd_dirty_rings &= ~intel_engine_flag(engine); + } + + /* If the rq hung, jump to its breadcrumb and skip the batch */ + if (request->fence.error == -EIO) { + struct intel_ring *ring = request->ring; + + ring->head = request->postfix; + ring->last_retired_head = -1; + } + } else { + engine->legacy_active_context = NULL; + } } static int intel_ring_workarounds_emit(struct drm_i915_gem_request *req) -- cgit From 038c95a313e4ca954ee5ab8a0c7559a646b0f462 Mon Sep 17 00:00:00 2001 From: Michał Winiarski Date: Tue, 7 Feb 2017 20:55:59 +0100 Subject: drm/i915: Always convert incoming exec offsets to non-canonical MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit We're using non-canonical addresses in drm_mm, and we're making sure that userspace is using canonical addressing - both in case of softpin (verifying incoming offset) and when relocating (converting to canonical when updating offset returned to userspace). Unfortunately when considering the need for relocations, we're comparing offset from userspace (in canonical form) with drm_mm node (in non-canonical form), and as a result, we end up always relocating if our offsets are in the "problematic" range. Let's always convert the offsets to avoid the performance impact of relocations. Fixes: a5f0edf63bdf ("drm/i915: Avoid writing relocs with addresses in non-canonical form") Cc: Chris Wilson Cc: Michel Thierry Reported-by: Michał Pyrzowski Signed-off-by: Michał Winiarski Link: http://patchwork.freedesktop.org/patch/msgid/20170207195559.18798-1-michal.winiarski@intel.com Reviewed-by: Chris Wilson Signed-off-by: Chris Wilson --- drivers/gpu/drm/i915/i915_gem_execbuffer.c | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/i915_gem_execbuffer.c index 26c26a6675df..ae31065ab708 100644 --- a/drivers/gpu/drm/i915/i915_gem_execbuffer.c +++ b/drivers/gpu/drm/i915/i915_gem_execbuffer.c @@ -1185,14 +1185,14 @@ validate_exec_list(struct drm_device *dev, if (exec[i].offset != gen8_canonical_addr(exec[i].offset & PAGE_MASK)) return -EINVAL; - - /* From drm_mm perspective address space is continuous, - * so from this point we're always using non-canonical - * form internally. - */ - exec[i].offset = gen8_noncanonical_addr(exec[i].offset); } + /* From drm_mm perspective address space is continuous, + * so from this point we're always using non-canonical + * form internally. + */ + exec[i].offset = gen8_noncanonical_addr(exec[i].offset); + if (exec[i].alignment && !is_power_of_2(exec[i].alignment)) return -EINVAL; -- cgit From 3c779a49bd7ce6c9ecc35a679f28f94fe3cfaeab Mon Sep 17 00:00:00 2001 From: Joonas Lahtinen Date: Wed, 8 Feb 2017 15:12:09 +0200 Subject: drm/i915: Avoid BIT(max) - 1 and use GENMASK(max - 1, 0) "BIT(max) - 1" will overflow when max = 32, and GCC will complain. We already have GENMASK for generating the mask, use it! v2: Majestic off by one spotted (Chris) Signed-off-by: Joonas Lahtinen Cc: Chris Wilson Reviewed-by: Chris Wilson --- drivers/gpu/drm/i915/intel_device_info.c | 2 +- drivers/gpu/drm/i915/intel_fbdev.c | 2 +- drivers/gpu/drm/i915/intel_runtime_pm.c | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/i915/intel_device_info.c b/drivers/gpu/drm/i915/intel_device_info.c index fcf81815daff..0891cc0e8626 100644 --- a/drivers/gpu/drm/i915/intel_device_info.c +++ b/drivers/gpu/drm/i915/intel_device_info.c @@ -234,7 +234,7 @@ static void broadwell_sseu_info_init(struct drm_i915_private *dev_priv) * The subslice disable field is global, i.e. it applies * to each of the enabled slices. */ - sseu->subslice_mask = BIT(ss_max) - 1; + sseu->subslice_mask = GENMASK(ss_max - 1, 0); sseu->subslice_mask &= ~((fuse2 & GEN8_F2_SS_DIS_MASK) >> GEN8_F2_SS_DIS_SHIFT); diff --git a/drivers/gpu/drm/i915/intel_fbdev.c b/drivers/gpu/drm/i915/intel_fbdev.c index e0d9e72cf3d1..03b7391fb566 100644 --- a/drivers/gpu/drm/i915/intel_fbdev.c +++ b/drivers/gpu/drm/i915/intel_fbdev.c @@ -371,7 +371,7 @@ static bool intel_fb_initial_config(struct drm_fb_helper *fb_helper, return false; memcpy(save_enabled, enabled, count); - mask = BIT(count) - 1; + mask = GENMASK(count - 1, 0); conn_configured = 0; retry: for (i = 0; i < count; i++) { diff --git a/drivers/gpu/drm/i915/intel_runtime_pm.c b/drivers/gpu/drm/i915/intel_runtime_pm.c index 66aa1bbb42f0..94df466a4801 100644 --- a/drivers/gpu/drm/i915/intel_runtime_pm.c +++ b/drivers/gpu/drm/i915/intel_runtime_pm.c @@ -1249,7 +1249,7 @@ static void vlv_dpio_cmn_power_well_disable(struct drm_i915_private *dev_priv, vlv_set_power_well(dev_priv, power_well, false); } -#define POWER_DOMAIN_MASK (BIT(POWER_DOMAIN_NUM) - 1) +#define POWER_DOMAIN_MASK (GENMASK(POWER_DOMAIN_NUM - 1, 0)) static struct i915_power_well *lookup_power_well(struct drm_i915_private *dev_priv, int power_well_id) -- cgit From 8385c2ecd47941072bdb3900bc272553ceb957e3 Mon Sep 17 00:00:00 2001 From: Joonas Lahtinen Date: Wed, 8 Feb 2017 15:12:10 +0200 Subject: drm/i915: Use for_each_power_domain() in i915_power_domain_info() Macro seems to do exactly the same thing. Signed-off-by: Joonas Lahtinen Cc: Chris Wilson Reviewed-by: Chris Wilson Link: http://patchwork.freedesktop.org/patch/msgid/1486559530-15141-1-git-send-email-joonas.lahtinen@linux.intel.com --- drivers/gpu/drm/i915/i915_debugfs.c | 7 +------ 1 file changed, 1 insertion(+), 6 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c index 152f23db4ec2..6400f83d3ad1 100644 --- a/drivers/gpu/drm/i915/i915_debugfs.c +++ b/drivers/gpu/drm/i915/i915_debugfs.c @@ -2822,15 +2822,10 @@ static int i915_power_domain_info(struct seq_file *m, void *unused) seq_printf(m, "%-25s %d\n", power_well->name, power_well->count); - for (power_domain = 0; power_domain < POWER_DOMAIN_NUM; - power_domain++) { - if (!(BIT(power_domain) & power_well->domains)) - continue; - + for_each_power_domain(power_domain, power_well->domains) seq_printf(m, " %-23s %d\n", intel_display_power_domain_str(power_domain), power_domains->domain_use_count[power_domain]); - } } mutex_unlock(&power_domains->lock); -- cgit From fb51ff40956ddef8f7d9448265ad3edc67eb0b2b Mon Sep 17 00:00:00 2001 From: Tvrtko Ursulin Date: Tue, 7 Feb 2017 08:50:25 +0000 Subject: drm/i915/guc: Log significant events at the info level Currently to establish whether GuC firmware has been loaded or submission enabled (default DRM log level), one has to detect the absence of the message saying that the load has been skipped and infer the opposite. It is better to log the fact GuC firmware has been loaded and/or submission enabled explicitly to avoid any guesswork when looking at the logs. v2: * Log message polish. (Chris) * Future proof by reporting found firmware version. (Michal) Signed-off-by: Tvrtko Ursulin Cc: Arkadiusz Hiler Cc: Chris Wilson Cc: Michal Wajdeczko Reviewed-by: Chris Wilson Reviewed-by: Michal Wajdeczko (v1) Link: http://patchwork.freedesktop.org/patch/msgid/1486457425-32548-1-git-send-email-tvrtko.ursulin@linux.intel.com --- drivers/gpu/drm/i915/intel_guc_loader.c | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/drivers/gpu/drm/i915/intel_guc_loader.c b/drivers/gpu/drm/i915/intel_guc_loader.c index 2f1cf9aea04e..8ef33d88d5a0 100644 --- a/drivers/gpu/drm/i915/intel_guc_loader.c +++ b/drivers/gpu/drm/i915/intel_guc_loader.c @@ -520,10 +520,6 @@ int intel_guc_setup(struct drm_i915_private *dev_priv) guc_fw->load_status = INTEL_UC_FIRMWARE_SUCCESS; - DRM_DEBUG_DRIVER("GuC fw status: fetch %s, load %s\n", - intel_uc_fw_status_repr(guc_fw->fetch_status), - intel_uc_fw_status_repr(guc_fw->load_status)); - intel_guc_auth_huc(dev_priv); if (i915.enable_guc_submission) { @@ -536,6 +532,11 @@ int intel_guc_setup(struct drm_i915_private *dev_priv) guc_interrupts_capture(dev_priv); } + DRM_INFO("GuC %s (firmware %s [version %u.%u])\n", + i915.enable_guc_submission ? "submission enabled" : "loaded", + guc_fw->path, + guc_fw->major_ver_found, guc_fw->minor_ver_found); + return 0; fail: -- cgit From a7d1b3f41a2d3246736816ed1b966e1904760f7f Mon Sep 17 00:00:00 2001 From: Ville Syrjälä Date: Thu, 26 Jan 2017 21:50:31 +0200 Subject: drm/i915: Store the pipe pixel rate in the crtc state MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Rather than recomputing the pipe pixel rate on demand everywhere, let's just stick the precomputed value into the crtc state. v2: Rebase due to min_pixclk[] code movement Document the new pixel_rate struct member (Ander) Combine vlv/chv with bdw+ in intel_modeset_readout_hw_state() v3: Fix typos in commit message (David) Cc: Ander Conselvan De Oliveira Signed-off-by: Ville Syrjälä Reviewed-by: Ander Conselvan de Oliveira Link: http://patchwork.freedesktop.org/patch/msgid/20170126195031.32343-1-ville.syrjala@linux.intel.com --- drivers/gpu/drm/i915/intel_display.c | 36 ++++++++++++++++++++++++++++-------- drivers/gpu/drm/i915/intel_drv.h | 6 ++++++ drivers/gpu/drm/i915/intel_fbc.c | 3 +-- drivers/gpu/drm/i915/intel_pm.c | 14 +++++++------- 4 files changed, 42 insertions(+), 17 deletions(-) diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c index d0d042495dc7..cd1248bc8753 100644 --- a/drivers/gpu/drm/i915/intel_display.c +++ b/drivers/gpu/drm/i915/intel_display.c @@ -7184,7 +7184,7 @@ static bool pipe_config_supports_ips(struct drm_i915_private *dev_priv, * * Should measure whether using a lower cdclk w/o IPS */ - return ilk_pipe_pixel_rate(pipe_config) <= + return pipe_config->pixel_rate <= dev_priv->max_cdclk_freq * 95 / 100; } @@ -7208,6 +7208,19 @@ static bool intel_crtc_supports_double_wide(const struct intel_crtc *crtc) (crtc->pipe == PIPE_A || IS_I915G(dev_priv)); } +static void intel_crtc_compute_pixel_rate(struct intel_crtc_state *crtc_state) +{ + struct drm_i915_private *dev_priv = to_i915(crtc_state->base.crtc->dev); + + if (HAS_GMCH_DISPLAY(dev_priv)) + /* FIXME calculate proper pipe pixel rate for GMCH pfit */ + crtc_state->pixel_rate = + crtc_state->base.adjusted_mode.crtc_clock; + else + crtc_state->pixel_rate = + ilk_pipe_pixel_rate(crtc_state); +} + static int intel_crtc_compute_config(struct intel_crtc *crtc, struct intel_crtc_state *pipe_config) { @@ -7254,6 +7267,8 @@ static int intel_crtc_compute_config(struct intel_crtc *crtc, adjusted_mode->crtc_hsync_start == adjusted_mode->crtc_hdisplay) return -EINVAL; + intel_crtc_compute_pixel_rate(pipe_config); + if (HAS_IPS(dev_priv)) hsw_compute_ips_config(crtc, pipe_config); @@ -10322,7 +10337,7 @@ static int ilk_max_pixel_rate(struct drm_atomic_state *state) continue; } - pixel_rate = ilk_pipe_pixel_rate(crtc_state); + pixel_rate = crtc_state->pixel_rate; if (IS_BROADWELL(dev_priv) || IS_GEN9(dev_priv)) pixel_rate = bdw_adjust_min_pipe_pixel_rate(crtc_state, @@ -12832,9 +12847,10 @@ static void intel_dump_pipe_config(struct intel_crtc *crtc, DRM_DEBUG_KMS("adjusted mode:\n"); drm_mode_debug_printmodeline(&pipe_config->base.adjusted_mode); intel_dump_crtc_timings(&pipe_config->base.adjusted_mode); - DRM_DEBUG_KMS("port clock: %d, pipe src size: %dx%d\n", + DRM_DEBUG_KMS("port clock: %d, pipe src size: %dx%d, pixel rate %d\n", pipe_config->port_clock, - pipe_config->pipe_src_w, pipe_config->pipe_src_h); + pipe_config->pipe_src_w, pipe_config->pipe_src_h, + pipe_config->pixel_rate); if (INTEL_GEN(dev_priv) >= 9) DRM_DEBUG_KMS("num_scalers: %d, scaler_users: 0x%x, scaler_id: %d\n", @@ -13410,6 +13426,7 @@ intel_pipe_config_compare(struct drm_i915_private *dev_priv, } PIPE_CONF_CHECK_I(scaler_state.scaler_id); + PIPE_CONF_CHECK_CLOCK_FUZZY(pixel_rate); } /* BDW+ don't expose a synchronous way to read the state */ @@ -13701,6 +13718,8 @@ verify_crtc_state(struct drm_crtc *crtc, } } + intel_crtc_compute_pixel_rate(pipe_config); + if (!new_crtc_state->active) return; @@ -17177,10 +17196,11 @@ static void intel_modeset_readout_hw_state(struct drm_device *dev) */ crtc_state->base.mode.private_flags = I915_MODE_FLAG_INHERITED; - if (INTEL_GEN(dev_priv) >= 9 || IS_BROADWELL(dev_priv)) - pixclk = ilk_pipe_pixel_rate(crtc_state); - else if (IS_VALLEYVIEW(dev_priv) || IS_CHERRYVIEW(dev_priv)) - pixclk = crtc_state->base.adjusted_mode.crtc_clock; + intel_crtc_compute_pixel_rate(crtc_state); + + if (INTEL_GEN(dev_priv) >= 9 || IS_BROADWELL(dev_priv) || + IS_VALLEYVIEW(dev_priv) || IS_CHERRYVIEW(dev_priv)) + pixclk = crtc_state->pixel_rate; else WARN_ON(dev_priv->display.modeset_calc_cdclk); diff --git a/drivers/gpu/drm/i915/intel_drv.h b/drivers/gpu/drm/i915/intel_drv.h index 973d7b6a7d5c..e1dbd9aa5701 100644 --- a/drivers/gpu/drm/i915/intel_drv.h +++ b/drivers/gpu/drm/i915/intel_drv.h @@ -544,6 +544,12 @@ struct intel_crtc_state { * and get clipped at the edges. */ int pipe_src_w, pipe_src_h; + /* + * Pipe pixel rate, adjusted for + * panel fitter/pipe scaler downscaling. + */ + unsigned int pixel_rate; + /* Whether to set up the PCH/FDI. Note that we never allow sharing * between pch encoders and cpu encoders. */ bool has_pch_encoder; diff --git a/drivers/gpu/drm/i915/intel_fbc.c b/drivers/gpu/drm/i915/intel_fbc.c index 51585008fb9d..c66ba7e36289 100644 --- a/drivers/gpu/drm/i915/intel_fbc.c +++ b/drivers/gpu/drm/i915/intel_fbc.c @@ -742,8 +742,7 @@ static void intel_fbc_update_state_cache(struct intel_crtc *crtc, cache->crtc.mode_flags = crtc_state->base.adjusted_mode.flags; if (IS_HASWELL(dev_priv) || IS_BROADWELL(dev_priv)) - cache->crtc.hsw_bdw_pixel_rate = - ilk_pipe_pixel_rate(crtc_state); + cache->crtc.hsw_bdw_pixel_rate = crtc_state->pixel_rate; cache->plane.rotation = plane_state->base.rotation; cache->plane.src_w = drm_rect_width(&plane_state->base.src) >> 16; diff --git a/drivers/gpu/drm/i915/intel_pm.c b/drivers/gpu/drm/i915/intel_pm.c index ec16f3d6dd2e..79cbe5736ed1 100644 --- a/drivers/gpu/drm/i915/intel_pm.c +++ b/drivers/gpu/drm/i915/intel_pm.c @@ -1820,12 +1820,12 @@ static uint32_t ilk_compute_pri_wm(const struct intel_crtc_state *cstate, cpp = pstate->base.fb->format->cpp[0]; - method1 = ilk_wm_method1(ilk_pipe_pixel_rate(cstate), cpp, mem_value); + method1 = ilk_wm_method1(cstate->pixel_rate, cpp, mem_value); if (!is_lp) return method1; - method2 = ilk_wm_method2(ilk_pipe_pixel_rate(cstate), + method2 = ilk_wm_method2(cstate->pixel_rate, cstate->base.adjusted_mode.crtc_htotal, drm_rect_width(&pstate->base.dst), cpp, mem_value); @@ -1849,8 +1849,8 @@ static uint32_t ilk_compute_spr_wm(const struct intel_crtc_state *cstate, cpp = pstate->base.fb->format->cpp[0]; - method1 = ilk_wm_method1(ilk_pipe_pixel_rate(cstate), cpp, mem_value); - method2 = ilk_wm_method2(ilk_pipe_pixel_rate(cstate), + method1 = ilk_wm_method1(cstate->pixel_rate, cpp, mem_value); + method2 = ilk_wm_method2(cstate->pixel_rate, cstate->base.adjusted_mode.crtc_htotal, drm_rect_width(&pstate->base.dst), cpp, mem_value); @@ -1876,7 +1876,7 @@ static uint32_t ilk_compute_cur_wm(const struct intel_crtc_state *cstate, if (!cstate->base.active) return 0; - return ilk_wm_method2(ilk_pipe_pixel_rate(cstate), + return ilk_wm_method2(cstate->pixel_rate, cstate->base.adjusted_mode.crtc_htotal, width, cpp, mem_value); } @@ -3559,7 +3559,7 @@ static uint32_t skl_adjusted_plane_pixel_rate(const struct intel_crtc_state *cst * Adjusted plane pixel rate is just the pipe's adjusted pixel rate * with additional adjustments for plane-specific scaling. */ - adjusted_pixel_rate = ilk_pipe_pixel_rate(cstate); + adjusted_pixel_rate = cstate->pixel_rate; downscale_amount = skl_plane_downscale_amount(pstate); pixel_rate = adjusted_pixel_rate * downscale_amount >> 16; @@ -3787,7 +3787,7 @@ skl_compute_linetime_wm(struct intel_crtc_state *cstate) if (!cstate->base.active) return 0; - pixel_rate = ilk_pipe_pixel_rate(cstate); + pixel_rate = cstate->pixel_rate; if (WARN_ON(pixel_rate == 0)) return 0; -- cgit From 4e841ecd4e185af2c6353f6d4ef480fb1f533d8a Mon Sep 17 00:00:00 2001 From: Ville Syrjälä Date: Fri, 20 Jan 2017 20:21:53 +0200 Subject: drm/i915: Nuke intel_mode_max_pixclk() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit ilk_max_pixel_rate() will now give the "correct" pixel rate for all platforms, so let's rename it to intel_max_pixel_rate() and kill off intel_mode_max_pixclk(). v2: Fix typo in commit message (Ander) Signed-off-by: Ville Syrjälä Reviewed-by: Ander Conselvan de Oliveira Link: http://patchwork.freedesktop.org/patch/msgid/20170120182205.8141-3-ville.syrjala@linux.intel.com --- drivers/gpu/drm/i915/intel_display.c | 41 ++++++------------------------------ 1 file changed, 6 insertions(+), 35 deletions(-) diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c index cd1248bc8753..07d74e8e4ce7 100644 --- a/drivers/gpu/drm/i915/intel_display.c +++ b/drivers/gpu/drm/i915/intel_display.c @@ -122,7 +122,7 @@ static void ironlake_pfit_disable(struct intel_crtc *crtc, bool force); static void ironlake_pfit_enable(struct intel_crtc *crtc); static void intel_modeset_setup_hw_state(struct drm_device *dev); static void intel_pre_disable_primary_noatomic(struct drm_crtc *crtc); -static int ilk_max_pixel_rate(struct drm_atomic_state *state); +static int intel_max_pixel_rate(struct drm_atomic_state *state); static int glk_calc_cdclk(int max_pixclk); static int bxt_calc_cdclk(int max_pixclk); @@ -6566,40 +6566,11 @@ static int bxt_calc_cdclk(int max_pixclk) return 144000; } -/* Compute the max pixel clock for new configuration. */ -static int intel_mode_max_pixclk(struct drm_device *dev, - struct drm_atomic_state *state) -{ - struct intel_atomic_state *intel_state = to_intel_atomic_state(state); - struct drm_i915_private *dev_priv = to_i915(dev); - struct drm_crtc *crtc; - struct drm_crtc_state *crtc_state; - unsigned max_pixclk = 0, i; - enum pipe pipe; - - memcpy(intel_state->min_pixclk, dev_priv->min_pixclk, - sizeof(intel_state->min_pixclk)); - - for_each_crtc_in_state(state, crtc, crtc_state, i) { - int pixclk = 0; - - if (crtc_state->enable) - pixclk = crtc_state->adjusted_mode.crtc_clock; - - intel_state->min_pixclk[i] = pixclk; - } - - for_each_pipe(dev_priv, pipe) - max_pixclk = max(intel_state->min_pixclk[pipe], max_pixclk); - - return max_pixclk; -} - static int valleyview_modeset_calc_cdclk(struct drm_atomic_state *state) { struct drm_device *dev = state->dev; struct drm_i915_private *dev_priv = to_i915(dev); - int max_pixclk = intel_mode_max_pixclk(dev, state); + int max_pixclk = intel_max_pixel_rate(state); struct intel_atomic_state *intel_state = to_intel_atomic_state(state); @@ -6615,7 +6586,7 @@ static int valleyview_modeset_calc_cdclk(struct drm_atomic_state *state) static int bxt_modeset_calc_cdclk(struct drm_atomic_state *state) { struct drm_i915_private *dev_priv = to_i915(state->dev); - int max_pixclk = ilk_max_pixel_rate(state); + int max_pixclk = intel_max_pixel_rate(state); struct intel_atomic_state *intel_state = to_intel_atomic_state(state); int cdclk; @@ -10315,7 +10286,7 @@ static int bdw_adjust_min_pipe_pixel_rate(struct intel_crtc_state *crtc_state, } /* compute the max rate for new configuration */ -static int ilk_max_pixel_rate(struct drm_atomic_state *state) +static int intel_max_pixel_rate(struct drm_atomic_state *state) { struct intel_atomic_state *intel_state = to_intel_atomic_state(state); struct drm_i915_private *dev_priv = to_i915(state->dev); @@ -10447,7 +10418,7 @@ static int broadwell_modeset_calc_cdclk(struct drm_atomic_state *state) { struct drm_i915_private *dev_priv = to_i915(state->dev); struct intel_atomic_state *intel_state = to_intel_atomic_state(state); - int max_pixclk = ilk_max_pixel_rate(state); + int max_pixclk = intel_max_pixel_rate(state); int cdclk; /* @@ -10483,7 +10454,7 @@ static int skl_modeset_calc_cdclk(struct drm_atomic_state *state) { struct intel_atomic_state *intel_state = to_intel_atomic_state(state); struct drm_i915_private *dev_priv = to_i915(state->dev); - const int max_pixclk = ilk_max_pixel_rate(state); + const int max_pixclk = intel_max_pixel_rate(state); int vco = intel_state->cdclk_pll_vco; int cdclk; -- cgit From c49a0d054a054cc07f9ef4982a72e55a710e87d0 Mon Sep 17 00:00:00 2001 From: Ville Syrjälä Date: Tue, 7 Feb 2017 20:31:46 +0200 Subject: drm/i915: s/get_display_clock_speed/get_cdclk/ MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Rename the .get_display_clock_speed() hook to .get_cdclk(). .get_cdclk() is more specific (which clock) and it's much shorter. v2: Deal with IS_GEN9_BC() v3: Deal with i945gm_get_display_clock_speed() Signed-off-by: Ville Syrjälä Reviewed-by: Ander Conselvan de Oliveira Link: http://patchwork.freedesktop.org/patch/msgid/20170207183146.19420-1-ville.syrjala@linux.intel.com --- drivers/gpu/drm/i915/i915_drv.h | 2 +- drivers/gpu/drm/i915/intel_display.c | 98 +++++++++++++-------------------- drivers/gpu/drm/i915/intel_runtime_pm.c | 3 +- 3 files changed, 41 insertions(+), 62 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index 6006694bffb5..f41496405484 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -602,7 +602,7 @@ struct intel_limit; struct dpll; struct drm_i915_display_funcs { - int (*get_display_clock_speed)(struct drm_i915_private *dev_priv); + int (*get_cdclk)(struct drm_i915_private *dev_priv); int (*get_fifo_size)(struct drm_i915_private *dev_priv, int plane); int (*compute_pipe_wm)(struct intel_crtc_state *cstate); int (*compute_intermediate_wm)(struct drm_device *dev, diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c index 07d74e8e4ce7..8f560737c309 100644 --- a/drivers/gpu/drm/i915/intel_display.c +++ b/drivers/gpu/drm/i915/intel_display.c @@ -5873,7 +5873,7 @@ static void intel_update_max_cdclk(struct drm_i915_private *dev_priv) static void intel_update_cdclk(struct drm_i915_private *dev_priv) { - dev_priv->cdclk_freq = dev_priv->display.get_display_clock_speed(dev_priv); + dev_priv->cdclk_freq = dev_priv->display.get_cdclk(dev_priv); if (INTEL_GEN(dev_priv) >= 9) DRM_DEBUG_DRIVER("Current CD clock rate: %d kHz, VCO: %d kHz, ref: %d kHz\n", @@ -6411,8 +6411,7 @@ static void valleyview_set_cdclk(struct drm_device *dev, int cdclk) struct drm_i915_private *dev_priv = to_i915(dev); u32 val, cmd; - WARN_ON(dev_priv->display.get_display_clock_speed(dev_priv) - != dev_priv->cdclk_freq); + WARN_ON(dev_priv->display.get_cdclk(dev_priv) != dev_priv->cdclk_freq); if (cdclk >= 320000) /* jump to highest voltage for 400MHz too */ cmd = 2; @@ -6476,8 +6475,7 @@ static void cherryview_set_cdclk(struct drm_device *dev, int cdclk) struct drm_i915_private *dev_priv = to_i915(dev); u32 val, cmd; - WARN_ON(dev_priv->display.get_display_clock_speed(dev_priv) - != dev_priv->cdclk_freq); + WARN_ON(dev_priv->display.get_cdclk(dev_priv) != dev_priv->cdclk_freq); switch (cdclk) { case 333333: @@ -7249,7 +7247,7 @@ static int intel_crtc_compute_config(struct intel_crtc *crtc, return 0; } -static int skylake_get_display_clock_speed(struct drm_i915_private *dev_priv) +static int skylake_get_cdclk(struct drm_i915_private *dev_priv) { u32 cdctl; @@ -7310,7 +7308,7 @@ static void bxt_de_pll_update(struct drm_i915_private *dev_priv) dev_priv->cdclk_pll.ref; } -static int broxton_get_display_clock_speed(struct drm_i915_private *dev_priv) +static int broxton_get_cdclk(struct drm_i915_private *dev_priv) { u32 divider; int div, vco; @@ -7345,7 +7343,7 @@ static int broxton_get_display_clock_speed(struct drm_i915_private *dev_priv) return DIV_ROUND_CLOSEST(vco, div); } -static int broadwell_get_display_clock_speed(struct drm_i915_private *dev_priv) +static int broadwell_get_cdclk(struct drm_i915_private *dev_priv) { uint32_t lcpll = I915_READ(LCPLL_CTL); uint32_t freq = lcpll & LCPLL_CLK_FREQ_MASK; @@ -7364,7 +7362,7 @@ static int broadwell_get_display_clock_speed(struct drm_i915_private *dev_priv) return 675000; } -static int haswell_get_display_clock_speed(struct drm_i915_private *dev_priv) +static int haswell_get_cdclk(struct drm_i915_private *dev_priv) { uint32_t lcpll = I915_READ(LCPLL_CTL); uint32_t freq = lcpll & LCPLL_CLK_FREQ_MASK; @@ -7381,23 +7379,23 @@ static int haswell_get_display_clock_speed(struct drm_i915_private *dev_priv) return 540000; } -static int valleyview_get_display_clock_speed(struct drm_i915_private *dev_priv) +static int valleyview_get_cdclk(struct drm_i915_private *dev_priv) { return vlv_get_cck_clock_hpll(dev_priv, "cdclk", CCK_DISPLAY_CLOCK_CONTROL); } -static int ilk_get_display_clock_speed(struct drm_i915_private *dev_priv) +static int ilk_get_cdclk(struct drm_i915_private *dev_priv) { return 450000; } -static int i945_get_display_clock_speed(struct drm_i915_private *dev_priv) +static int i945_get_cdclk(struct drm_i915_private *dev_priv) { return 400000; } -static int i945gm_get_display_clock_speed(struct drm_i915_private *dev_priv) +static int i945gm_get_cdclk(struct drm_i915_private *dev_priv) { struct pci_dev *pdev = dev_priv->drm.pdev; u16 gcfgc = 0; @@ -7417,17 +7415,17 @@ static int i945gm_get_display_clock_speed(struct drm_i915_private *dev_priv) } } -static int i915_get_display_clock_speed(struct drm_i915_private *dev_priv) +static int i915_get_cdclk(struct drm_i915_private *dev_priv) { return 333333; } -static int i9xx_misc_get_display_clock_speed(struct drm_i915_private *dev_priv) +static int i9xx_misc_get_cdclk(struct drm_i915_private *dev_priv) { return 200000; } -static int pnv_get_display_clock_speed(struct drm_i915_private *dev_priv) +static int pnv_get_cdclk(struct drm_i915_private *dev_priv) { struct pci_dev *pdev = dev_priv->drm.pdev; u16 gcfgc = 0; @@ -7452,7 +7450,7 @@ static int pnv_get_display_clock_speed(struct drm_i915_private *dev_priv) } } -static int i915gm_get_display_clock_speed(struct drm_i915_private *dev_priv) +static int i915gm_get_cdclk(struct drm_i915_private *dev_priv) { struct pci_dev *pdev = dev_priv->drm.pdev; u16 gcfgc = 0; @@ -7472,12 +7470,12 @@ static int i915gm_get_display_clock_speed(struct drm_i915_private *dev_priv) } } -static int i865_get_display_clock_speed(struct drm_i915_private *dev_priv) +static int i865_get_cdclk(struct drm_i915_private *dev_priv) { return 266667; } -static int i85x_get_display_clock_speed(struct drm_i915_private *dev_priv) +static int i85x_get_cdclk(struct drm_i915_private *dev_priv) { struct pci_dev *pdev = dev_priv->drm.pdev; u16 hpllcc = 0; @@ -7515,7 +7513,7 @@ static int i85x_get_display_clock_speed(struct drm_i915_private *dev_priv) return 0; } -static int i830_get_display_clock_speed(struct drm_i915_private *dev_priv) +static int i830_get_cdclk(struct drm_i915_private *dev_priv) { return 133333; } @@ -7588,7 +7586,7 @@ static unsigned int intel_hpll_vco(struct drm_i915_private *dev_priv) return vco; } -static int gm45_get_display_clock_speed(struct drm_i915_private *dev_priv) +static int gm45_get_cdclk(struct drm_i915_private *dev_priv) { struct pci_dev *pdev = dev_priv->drm.pdev; unsigned int cdclk_sel, vco = intel_hpll_vco(dev_priv); @@ -7611,7 +7609,7 @@ static int gm45_get_display_clock_speed(struct drm_i915_private *dev_priv) } } -static int i965gm_get_display_clock_speed(struct drm_i915_private *dev_priv) +static int i965gm_get_cdclk(struct drm_i915_private *dev_priv) { struct pci_dev *pdev = dev_priv->drm.pdev; static const uint8_t div_3200[] = { 16, 10, 8 }; @@ -7649,7 +7647,7 @@ fail: return 200000; } -static int g33_get_display_clock_speed(struct drm_i915_private *dev_priv) +static int g33_get_cdclk(struct drm_i915_private *dev_priv) { struct pci_dev *pdev = dev_priv->drm.pdev; static const uint8_t div_3200[] = { 12, 10, 8, 7, 5, 16 }; @@ -16242,61 +16240,43 @@ void intel_init_display_hooks(struct drm_i915_private *dev_priv) /* Returns the core display clock speed */ if (IS_GEN9_BC(dev_priv)) - dev_priv->display.get_display_clock_speed = - skylake_get_display_clock_speed; + dev_priv->display.get_cdclk = skylake_get_cdclk; else if (IS_GEN9_LP(dev_priv)) - dev_priv->display.get_display_clock_speed = - broxton_get_display_clock_speed; + dev_priv->display.get_cdclk = broxton_get_cdclk; else if (IS_BROADWELL(dev_priv)) - dev_priv->display.get_display_clock_speed = - broadwell_get_display_clock_speed; + dev_priv->display.get_cdclk = broadwell_get_cdclk; else if (IS_HASWELL(dev_priv)) - dev_priv->display.get_display_clock_speed = - haswell_get_display_clock_speed; + dev_priv->display.get_cdclk = haswell_get_cdclk; else if (IS_VALLEYVIEW(dev_priv) || IS_CHERRYVIEW(dev_priv)) - dev_priv->display.get_display_clock_speed = - valleyview_get_display_clock_speed; + dev_priv->display.get_cdclk = valleyview_get_cdclk; else if (IS_GEN5(dev_priv)) - dev_priv->display.get_display_clock_speed = - ilk_get_display_clock_speed; + dev_priv->display.get_cdclk = ilk_get_cdclk; else if (IS_I945G(dev_priv) || IS_I965G(dev_priv) || IS_GEN6(dev_priv) || IS_IVYBRIDGE(dev_priv)) - dev_priv->display.get_display_clock_speed = - i945_get_display_clock_speed; + dev_priv->display.get_cdclk = i945_get_cdclk; else if (IS_GM45(dev_priv)) - dev_priv->display.get_display_clock_speed = - gm45_get_display_clock_speed; + dev_priv->display.get_cdclk = gm45_get_cdclk; else if (IS_I965GM(dev_priv)) - dev_priv->display.get_display_clock_speed = - i965gm_get_display_clock_speed; + dev_priv->display.get_cdclk = i965gm_get_cdclk; else if (IS_PINEVIEW(dev_priv)) - dev_priv->display.get_display_clock_speed = - pnv_get_display_clock_speed; + dev_priv->display.get_cdclk = pnv_get_cdclk; else if (IS_G33(dev_priv) || IS_G4X(dev_priv)) - dev_priv->display.get_display_clock_speed = - g33_get_display_clock_speed; + dev_priv->display.get_cdclk = g33_get_cdclk; else if (IS_I915G(dev_priv)) - dev_priv->display.get_display_clock_speed = - i915_get_display_clock_speed; + dev_priv->display.get_cdclk = i915_get_cdclk; else if (IS_I845G(dev_priv)) - dev_priv->display.get_display_clock_speed = - i9xx_misc_get_display_clock_speed; + dev_priv->display.get_cdclk = i9xx_misc_get_cdclk; else if (IS_I945GM(dev_priv)) - dev_priv->display.get_display_clock_speed = - i945gm_get_display_clock_speed; + dev_priv->display.get_cdclk = i945gm_get_cdclk; else if (IS_I915GM(dev_priv)) - dev_priv->display.get_display_clock_speed = - i915gm_get_display_clock_speed; + dev_priv->display.get_cdclk = i915gm_get_cdclk; else if (IS_I865G(dev_priv)) - dev_priv->display.get_display_clock_speed = - i865_get_display_clock_speed; + dev_priv->display.get_cdclk = i865_get_cdclk; else if (IS_I85X(dev_priv)) - dev_priv->display.get_display_clock_speed = - i85x_get_display_clock_speed; + dev_priv->display.get_cdclk = i85x_get_cdclk; else { /* 830 */ WARN(!IS_I830(dev_priv), "Unknown platform. Assuming 133 MHz CDCLK\n"); - dev_priv->display.get_display_clock_speed = - i830_get_display_clock_speed; + dev_priv->display.get_cdclk = i830_get_cdclk; } if (IS_GEN5(dev_priv)) { diff --git a/drivers/gpu/drm/i915/intel_runtime_pm.c b/drivers/gpu/drm/i915/intel_runtime_pm.c index 94df466a4801..915914f9444c 100644 --- a/drivers/gpu/drm/i915/intel_runtime_pm.c +++ b/drivers/gpu/drm/i915/intel_runtime_pm.c @@ -966,8 +966,7 @@ static void gen9_dc_off_power_well_enable(struct drm_i915_private *dev_priv, { gen9_set_dc_state(dev_priv, DC_STATE_DISABLE); - WARN_ON(dev_priv->cdclk_freq != - dev_priv->display.get_display_clock_speed(dev_priv)); + WARN_ON(dev_priv->cdclk_freq != dev_priv->display.get_cdclk(dev_priv)); gen9_assert_dbuf_enabled(dev_priv); -- cgit From 4717e8bb7341a00f44da3d1814ac53c5a2a1ce75 Mon Sep 17 00:00:00 2001 From: Ville Syrjälä Date: Tue, 7 Feb 2017 20:32:26 +0200 Subject: drm/i915: Clean up the .get_cdclk() assignment if ladder MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Let's clean up the mess we have in the if ladder that assigns the .get_cdclk() hooks. The grouping of the platforms by the function results in a thing that's not really legible, so let's do it the other way around and order the if ladder by platform and duplicate whatever assignments we need. To further avoid confusion with the function names let's rename them to just fixed__get_cdclk(). The other option would be to duplicate the functions entirely but it seems quite pointless to do that since each one just returns a fixed value. v2: Deal with i945gm_get_cdclk() Signed-off-by: Ville Syrjälä Reviewed-by: Ander Conselvan de Oliveira Link: http://patchwork.freedesktop.org/patch/msgid/20170207183226.19537-1-ville.syrjala@linux.intel.com --- drivers/gpu/drm/i915/intel_display.c | 39 ++++++++++++++++++++---------------- 1 file changed, 22 insertions(+), 17 deletions(-) diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c index 8f560737c309..c407cc41c0c9 100644 --- a/drivers/gpu/drm/i915/intel_display.c +++ b/drivers/gpu/drm/i915/intel_display.c @@ -7385,12 +7385,12 @@ static int valleyview_get_cdclk(struct drm_i915_private *dev_priv) CCK_DISPLAY_CLOCK_CONTROL); } -static int ilk_get_cdclk(struct drm_i915_private *dev_priv) +static int fixed_450mhz_get_cdclk(struct drm_i915_private *dev_priv) { return 450000; } -static int i945_get_cdclk(struct drm_i915_private *dev_priv) +static int fixed_400mhz_get_cdclk(struct drm_i915_private *dev_priv) { return 400000; } @@ -7415,12 +7415,12 @@ static int i945gm_get_cdclk(struct drm_i915_private *dev_priv) } } -static int i915_get_cdclk(struct drm_i915_private *dev_priv) +static int fixed_333mhz_get_cdclk(struct drm_i915_private *dev_priv) { return 333333; } -static int i9xx_misc_get_cdclk(struct drm_i915_private *dev_priv) +static int fixed_200mhz_get_cdclk(struct drm_i915_private *dev_priv) { return 200000; } @@ -7470,7 +7470,7 @@ static int i915gm_get_cdclk(struct drm_i915_private *dev_priv) } } -static int i865_get_cdclk(struct drm_i915_private *dev_priv) +static int fixed_266mhz_get_cdclk(struct drm_i915_private *dev_priv) { return 266667; } @@ -7513,7 +7513,7 @@ static int i85x_get_cdclk(struct drm_i915_private *dev_priv) return 0; } -static int i830_get_cdclk(struct drm_i915_private *dev_priv) +static int fixed_133mhz_get_cdclk(struct drm_i915_private *dev_priv) { return 133333; } @@ -16249,34 +16249,39 @@ void intel_init_display_hooks(struct drm_i915_private *dev_priv) dev_priv->display.get_cdclk = haswell_get_cdclk; else if (IS_VALLEYVIEW(dev_priv) || IS_CHERRYVIEW(dev_priv)) dev_priv->display.get_cdclk = valleyview_get_cdclk; + else if (IS_GEN6(dev_priv) || IS_IVYBRIDGE(dev_priv)) + dev_priv->display.get_cdclk = fixed_400mhz_get_cdclk; else if (IS_GEN5(dev_priv)) - dev_priv->display.get_cdclk = ilk_get_cdclk; - else if (IS_I945G(dev_priv) || IS_I965G(dev_priv) || - IS_GEN6(dev_priv) || IS_IVYBRIDGE(dev_priv)) - dev_priv->display.get_cdclk = i945_get_cdclk; + dev_priv->display.get_cdclk = fixed_450mhz_get_cdclk; else if (IS_GM45(dev_priv)) dev_priv->display.get_cdclk = gm45_get_cdclk; + else if (IS_G4X(dev_priv)) + dev_priv->display.get_cdclk = g33_get_cdclk; else if (IS_I965GM(dev_priv)) dev_priv->display.get_cdclk = i965gm_get_cdclk; + else if (IS_I965G(dev_priv)) + dev_priv->display.get_cdclk = fixed_400mhz_get_cdclk; else if (IS_PINEVIEW(dev_priv)) dev_priv->display.get_cdclk = pnv_get_cdclk; - else if (IS_G33(dev_priv) || IS_G4X(dev_priv)) + else if (IS_G33(dev_priv)) dev_priv->display.get_cdclk = g33_get_cdclk; - else if (IS_I915G(dev_priv)) - dev_priv->display.get_cdclk = i915_get_cdclk; - else if (IS_I845G(dev_priv)) - dev_priv->display.get_cdclk = i9xx_misc_get_cdclk; else if (IS_I945GM(dev_priv)) dev_priv->display.get_cdclk = i945gm_get_cdclk; + else if (IS_I945G(dev_priv)) + dev_priv->display.get_cdclk = fixed_400mhz_get_cdclk; else if (IS_I915GM(dev_priv)) dev_priv->display.get_cdclk = i915gm_get_cdclk; + else if (IS_I915G(dev_priv)) + dev_priv->display.get_cdclk = fixed_333mhz_get_cdclk; else if (IS_I865G(dev_priv)) - dev_priv->display.get_cdclk = i865_get_cdclk; + dev_priv->display.get_cdclk = fixed_266mhz_get_cdclk; else if (IS_I85X(dev_priv)) dev_priv->display.get_cdclk = i85x_get_cdclk; + else if (IS_I845G(dev_priv)) + dev_priv->display.get_cdclk = fixed_200mhz_get_cdclk; else { /* 830 */ WARN(!IS_I830(dev_priv), "Unknown platform. Assuming 133 MHz CDCLK\n"); - dev_priv->display.get_cdclk = i830_get_cdclk; + dev_priv->display.get_cdclk = fixed_133mhz_get_cdclk; } if (IS_GEN5(dev_priv)) { -- cgit From 7ff89ca2135814db74d7a1706b9abfb1448b8bed Mon Sep 17 00:00:00 2001 From: Ville Syrjälä Date: Tue, 7 Feb 2017 20:33:05 +0200 Subject: drm/i915: Move most cdclk/rawclk related code to intel_cdclk.c MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Let's try to shrink intel_display.c a bit by moving the cdclk/rawclk stuff to a new file. It's all reasonably self contained so we don't even have to add that many non-static symbols. We'll also take the opportunity to shuffle around the functions a bit to get things in a more consistent order based on the platform. v2: Add kernel-docs (Ander) v3: Deal with IS_GEN9_BC() v4: Deal with i945gm_get_cdclk() Signed-off-by: Ville Syrjälä Reviewed-by: Ander Conselvan de Oliveira Link: http://patchwork.freedesktop.org/patch/msgid/20170207183305.19656-1-ville.syrjala@linux.intel.com --- Documentation/gpu/i915.rst | 9 + drivers/gpu/drm/i915/Makefile | 1 + drivers/gpu/drm/i915/intel_cdclk.c | 1794 ++++++++++++++++++++++++++++ drivers/gpu/drm/i915/intel_display.c | 2137 ++++------------------------------ drivers/gpu/drm/i915/intel_drv.h | 9 +- 5 files changed, 2036 insertions(+), 1914 deletions(-) create mode 100644 drivers/gpu/drm/i915/intel_cdclk.c diff --git a/Documentation/gpu/i915.rst b/Documentation/gpu/i915.rst index 104296dffad1..1cd0ee518dee 100644 --- a/Documentation/gpu/i915.rst +++ b/Documentation/gpu/i915.rst @@ -213,6 +213,15 @@ Video BIOS Table (VBT) .. kernel-doc:: drivers/gpu/drm/i915/intel_vbt_defs.h :internal: +Display clocks +-------------- + +.. kernel-doc:: drivers/gpu/drm/i915/intel_cdclk.c + :doc: CDCLK / RAWCLK + +.. kernel-doc:: drivers/gpu/drm/i915/intel_cdclk.c + :internal: + Display PLLs ------------ diff --git a/drivers/gpu/drm/i915/Makefile b/drivers/gpu/drm/i915/Makefile index 74ca2e8b2494..e58fc8f1b83b 100644 --- a/drivers/gpu/drm/i915/Makefile +++ b/drivers/gpu/drm/i915/Makefile @@ -72,6 +72,7 @@ i915-y += intel_audio.o \ intel_atomic.o \ intel_atomic_plane.o \ intel_bios.o \ + intel_cdclk.o \ intel_color.o \ intel_display.o \ intel_dpio_phy.o \ diff --git a/drivers/gpu/drm/i915/intel_cdclk.c b/drivers/gpu/drm/i915/intel_cdclk.c new file mode 100644 index 000000000000..04a46c14c898 --- /dev/null +++ b/drivers/gpu/drm/i915/intel_cdclk.c @@ -0,0 +1,1794 @@ +/* + * Copyright © 2006-2017 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +#include "intel_drv.h" + +/** + * DOC: CDCLK / RAWCLK + * + * The display engine uses several different clocks to do its work. There + * are two main clocks involved that aren't directly related to the actual + * pixel clock or any symbol/bit clock of the actual output port. These + * are the core display clock (CDCLK) and RAWCLK. + * + * CDCLK clocks most of the display pipe logic, and thus its frequency + * must be high enough to support the rate at which pixels are flowing + * through the pipes. Downscaling must also be accounted as that increases + * the effective pixel rate. + * + * On several platforms the CDCLK frequency can be changed dynamically + * to minimize power consumption for a given display configuration. + * Typically changes to the CDCLK frequency require all the display pipes + * to be shut down while the frequency is being changed. + * + * On SKL+ the DMC will toggle the CDCLK off/on during DC5/6 entry/exit. + * DMC will not change the active CDCLK frequency however, so that part + * will still be performed by the driver directly. + * + * RAWCLK is a fixed frequency clock, often used by various auxiliary + * blocks such as AUX CH or backlight PWM. Hence the only thing we + * really need to know about RAWCLK is its frequency so that various + * dividers can be programmed correctly. + */ + +static int fixed_133mhz_get_cdclk(struct drm_i915_private *dev_priv) +{ + return 133333; +} + +static int fixed_200mhz_get_cdclk(struct drm_i915_private *dev_priv) +{ + return 200000; +} + +static int fixed_266mhz_get_cdclk(struct drm_i915_private *dev_priv) +{ + return 266667; +} + +static int fixed_333mhz_get_cdclk(struct drm_i915_private *dev_priv) +{ + return 333333; +} + +static int fixed_400mhz_get_cdclk(struct drm_i915_private *dev_priv) +{ + return 400000; +} + +static int fixed_450mhz_get_cdclk(struct drm_i915_private *dev_priv) +{ + return 450000; +} + +static int i85x_get_cdclk(struct drm_i915_private *dev_priv) +{ + struct pci_dev *pdev = dev_priv->drm.pdev; + u16 hpllcc = 0; + + /* + * 852GM/852GMV only supports 133 MHz and the HPLLCC + * encoding is different :( + * FIXME is this the right way to detect 852GM/852GMV? + */ + if (pdev->revision == 0x1) + return 133333; + + pci_bus_read_config_word(pdev->bus, + PCI_DEVFN(0, 3), HPLLCC, &hpllcc); + + /* Assume that the hardware is in the high speed state. This + * should be the default. + */ + switch (hpllcc & GC_CLOCK_CONTROL_MASK) { + case GC_CLOCK_133_200: + case GC_CLOCK_133_200_2: + case GC_CLOCK_100_200: + return 200000; + case GC_CLOCK_166_250: + return 250000; + case GC_CLOCK_100_133: + return 133333; + case GC_CLOCK_133_266: + case GC_CLOCK_133_266_2: + case GC_CLOCK_166_266: + return 266667; + } + + /* Shouldn't happen */ + return 0; +} + +static int i915gm_get_cdclk(struct drm_i915_private *dev_priv) +{ + struct pci_dev *pdev = dev_priv->drm.pdev; + u16 gcfgc = 0; + + pci_read_config_word(pdev, GCFGC, &gcfgc); + + if (gcfgc & GC_LOW_FREQUENCY_ENABLE) + return 133333; + + switch (gcfgc & GC_DISPLAY_CLOCK_MASK) { + case GC_DISPLAY_CLOCK_333_320_MHZ: + return 333333; + default: + case GC_DISPLAY_CLOCK_190_200_MHZ: + return 190000; + } +} + +static int i945gm_get_cdclk(struct drm_i915_private *dev_priv) +{ + struct pci_dev *pdev = dev_priv->drm.pdev; + u16 gcfgc = 0; + + pci_read_config_word(pdev, GCFGC, &gcfgc); + + if (gcfgc & GC_LOW_FREQUENCY_ENABLE) + return 133333; + + switch (gcfgc & GC_DISPLAY_CLOCK_MASK) { + case GC_DISPLAY_CLOCK_333_320_MHZ: + return 320000; + default: + case GC_DISPLAY_CLOCK_190_200_MHZ: + return 200000; + } +} + +static unsigned int intel_hpll_vco(struct drm_i915_private *dev_priv) +{ + static const unsigned int blb_vco[8] = { + [0] = 3200000, + [1] = 4000000, + [2] = 5333333, + [3] = 4800000, + [4] = 6400000, + }; + static const unsigned int pnv_vco[8] = { + [0] = 3200000, + [1] = 4000000, + [2] = 5333333, + [3] = 4800000, + [4] = 2666667, + }; + static const unsigned int cl_vco[8] = { + [0] = 3200000, + [1] = 4000000, + [2] = 5333333, + [3] = 6400000, + [4] = 3333333, + [5] = 3566667, + [6] = 4266667, + }; + static const unsigned int elk_vco[8] = { + [0] = 3200000, + [1] = 4000000, + [2] = 5333333, + [3] = 4800000, + }; + static const unsigned int ctg_vco[8] = { + [0] = 3200000, + [1] = 4000000, + [2] = 5333333, + [3] = 6400000, + [4] = 2666667, + [5] = 4266667, + }; + const unsigned int *vco_table; + unsigned int vco; + uint8_t tmp = 0; + + /* FIXME other chipsets? */ + if (IS_GM45(dev_priv)) + vco_table = ctg_vco; + else if (IS_G4X(dev_priv)) + vco_table = elk_vco; + else if (IS_I965GM(dev_priv)) + vco_table = cl_vco; + else if (IS_PINEVIEW(dev_priv)) + vco_table = pnv_vco; + else if (IS_G33(dev_priv)) + vco_table = blb_vco; + else + return 0; + + tmp = I915_READ(IS_MOBILE(dev_priv) ? HPLLVCO_MOBILE : HPLLVCO); + + vco = vco_table[tmp & 0x7]; + if (vco == 0) + DRM_ERROR("Bad HPLL VCO (HPLLVCO=0x%02x)\n", tmp); + else + DRM_DEBUG_KMS("HPLL VCO %u kHz\n", vco); + + return vco; +} + +static int g33_get_cdclk(struct drm_i915_private *dev_priv) +{ + struct pci_dev *pdev = dev_priv->drm.pdev; + static const uint8_t div_3200[] = { 12, 10, 8, 7, 5, 16 }; + static const uint8_t div_4000[] = { 14, 12, 10, 8, 6, 20 }; + static const uint8_t div_4800[] = { 20, 14, 12, 10, 8, 24 }; + static const uint8_t div_5333[] = { 20, 16, 12, 12, 8, 28 }; + const uint8_t *div_table; + unsigned int cdclk_sel, vco = intel_hpll_vco(dev_priv); + uint16_t tmp = 0; + + pci_read_config_word(pdev, GCFGC, &tmp); + + cdclk_sel = (tmp >> 4) & 0x7; + + if (cdclk_sel >= ARRAY_SIZE(div_3200)) + goto fail; + + switch (vco) { + case 3200000: + div_table = div_3200; + break; + case 4000000: + div_table = div_4000; + break; + case 4800000: + div_table = div_4800; + break; + case 5333333: + div_table = div_5333; + break; + default: + goto fail; + } + + return DIV_ROUND_CLOSEST(vco, div_table[cdclk_sel]); + +fail: + DRM_ERROR("Unable to determine CDCLK. HPLL VCO=%u kHz, CFGC=0x%08x\n", + vco, tmp); + return 190476; +} + +static int pnv_get_cdclk(struct drm_i915_private *dev_priv) +{ + struct pci_dev *pdev = dev_priv->drm.pdev; + u16 gcfgc = 0; + + pci_read_config_word(pdev, GCFGC, &gcfgc); + + switch (gcfgc & GC_DISPLAY_CLOCK_MASK) { + case GC_DISPLAY_CLOCK_267_MHZ_PNV: + return 266667; + case GC_DISPLAY_CLOCK_333_MHZ_PNV: + return 333333; + case GC_DISPLAY_CLOCK_444_MHZ_PNV: + return 444444; + case GC_DISPLAY_CLOCK_200_MHZ_PNV: + return 200000; + default: + DRM_ERROR("Unknown pnv display core clock 0x%04x\n", gcfgc); + case GC_DISPLAY_CLOCK_133_MHZ_PNV: + return 133333; + case GC_DISPLAY_CLOCK_167_MHZ_PNV: + return 166667; + } +} + +static int i965gm_get_cdclk(struct drm_i915_private *dev_priv) +{ + struct pci_dev *pdev = dev_priv->drm.pdev; + static const uint8_t div_3200[] = { 16, 10, 8 }; + static const uint8_t div_4000[] = { 20, 12, 10 }; + static const uint8_t div_5333[] = { 24, 16, 14 }; + const uint8_t *div_table; + unsigned int cdclk_sel, vco = intel_hpll_vco(dev_priv); + uint16_t tmp = 0; + + pci_read_config_word(pdev, GCFGC, &tmp); + + cdclk_sel = ((tmp >> 8) & 0x1f) - 1; + + if (cdclk_sel >= ARRAY_SIZE(div_3200)) + goto fail; + + switch (vco) { + case 3200000: + div_table = div_3200; + break; + case 4000000: + div_table = div_4000; + break; + case 5333333: + div_table = div_5333; + break; + default: + goto fail; + } + + return DIV_ROUND_CLOSEST(vco, div_table[cdclk_sel]); + +fail: + DRM_ERROR("Unable to determine CDCLK. HPLL VCO=%u kHz, CFGC=0x%04x\n", + vco, tmp); + return 200000; +} + +static int gm45_get_cdclk(struct drm_i915_private *dev_priv) +{ + struct pci_dev *pdev = dev_priv->drm.pdev; + unsigned int cdclk_sel, vco = intel_hpll_vco(dev_priv); + uint16_t tmp = 0; + + pci_read_config_word(pdev, GCFGC, &tmp); + + cdclk_sel = (tmp >> 12) & 0x1; + + switch (vco) { + case 2666667: + case 4000000: + case 5333333: + return cdclk_sel ? 333333 : 222222; + case 3200000: + return cdclk_sel ? 320000 : 228571; + default: + DRM_ERROR("Unable to determine CDCLK. HPLL VCO=%u, CFGC=0x%04x\n", + vco, tmp); + return 222222; + } +} + +static int hsw_get_cdclk(struct drm_i915_private *dev_priv) +{ + uint32_t lcpll = I915_READ(LCPLL_CTL); + uint32_t freq = lcpll & LCPLL_CLK_FREQ_MASK; + + if (lcpll & LCPLL_CD_SOURCE_FCLK) + return 800000; + else if (I915_READ(FUSE_STRAP) & HSW_CDCLK_LIMIT) + return 450000; + else if (freq == LCPLL_CLK_FREQ_450) + return 450000; + else if (IS_HSW_ULT(dev_priv)) + return 337500; + else + return 540000; +} + +static int vlv_calc_cdclk(struct drm_i915_private *dev_priv, + int max_pixclk) +{ + int freq_320 = (dev_priv->hpll_freq << 1) % 320000 != 0 ? + 333333 : 320000; + int limit = IS_CHERRYVIEW(dev_priv) ? 95 : 90; + + /* + * We seem to get an unstable or solid color picture at 200MHz. + * Not sure what's wrong. For now use 200MHz only when all pipes + * are off. + */ + if (!IS_CHERRYVIEW(dev_priv) && + max_pixclk > freq_320*limit/100) + return 400000; + else if (max_pixclk > 266667*limit/100) + return freq_320; + else if (max_pixclk > 0) + return 266667; + else + return 200000; +} + +static int vlv_get_cdclk(struct drm_i915_private *dev_priv) +{ + return vlv_get_cck_clock_hpll(dev_priv, "cdclk", + CCK_DISPLAY_CLOCK_CONTROL); +} + +static void vlv_program_pfi_credits(struct drm_i915_private *dev_priv) +{ + unsigned int credits, default_credits; + + if (IS_CHERRYVIEW(dev_priv)) + default_credits = PFI_CREDIT(12); + else + default_credits = PFI_CREDIT(8); + + if (dev_priv->cdclk_freq >= dev_priv->czclk_freq) { + /* CHV suggested value is 31 or 63 */ + if (IS_CHERRYVIEW(dev_priv)) + credits = PFI_CREDIT_63; + else + credits = PFI_CREDIT(15); + } else { + credits = default_credits; + } + + /* + * WA - write default credits before re-programming + * FIXME: should we also set the resend bit here? + */ + I915_WRITE(GCI_CONTROL, VGA_FAST_MODE_DISABLE | + default_credits); + + I915_WRITE(GCI_CONTROL, VGA_FAST_MODE_DISABLE | + credits | PFI_CREDIT_RESEND); + + /* + * FIXME is this guaranteed to clear + * immediately or should we poll for it? + */ + WARN_ON(I915_READ(GCI_CONTROL) & PFI_CREDIT_RESEND); +} + +static void vlv_set_cdclk(struct drm_device *dev, int cdclk) +{ + struct drm_i915_private *dev_priv = to_i915(dev); + u32 val, cmd; + + WARN_ON(dev_priv->display.get_cdclk(dev_priv) != dev_priv->cdclk_freq); + + if (cdclk >= 320000) /* jump to highest voltage for 400MHz too */ + cmd = 2; + else if (cdclk == 266667) + cmd = 1; + else + cmd = 0; + + mutex_lock(&dev_priv->rps.hw_lock); + val = vlv_punit_read(dev_priv, PUNIT_REG_DSPFREQ); + val &= ~DSPFREQGUAR_MASK; + val |= (cmd << DSPFREQGUAR_SHIFT); + vlv_punit_write(dev_priv, PUNIT_REG_DSPFREQ, val); + if (wait_for((vlv_punit_read(dev_priv, PUNIT_REG_DSPFREQ) & + DSPFREQSTAT_MASK) == (cmd << DSPFREQSTAT_SHIFT), + 50)) { + DRM_ERROR("timed out waiting for CDclk change\n"); + } + mutex_unlock(&dev_priv->rps.hw_lock); + + mutex_lock(&dev_priv->sb_lock); + + if (cdclk == 400000) { + u32 divider; + + divider = DIV_ROUND_CLOSEST(dev_priv->hpll_freq << 1, + cdclk) - 1; + + /* adjust cdclk divider */ + val = vlv_cck_read(dev_priv, CCK_DISPLAY_CLOCK_CONTROL); + val &= ~CCK_FREQUENCY_VALUES; + val |= divider; + vlv_cck_write(dev_priv, CCK_DISPLAY_CLOCK_CONTROL, val); + + if (wait_for((vlv_cck_read(dev_priv, CCK_DISPLAY_CLOCK_CONTROL) & + CCK_FREQUENCY_STATUS) == (divider << CCK_FREQUENCY_STATUS_SHIFT), + 50)) + DRM_ERROR("timed out waiting for CDclk change\n"); + } + + /* adjust self-refresh exit latency value */ + val = vlv_bunit_read(dev_priv, BUNIT_REG_BISOC); + val &= ~0x7f; + + /* + * For high bandwidth configs, we set a higher latency in the bunit + * so that the core display fetch happens in time to avoid underruns. + */ + if (cdclk == 400000) + val |= 4500 / 250; /* 4.5 usec */ + else + val |= 3000 / 250; /* 3.0 usec */ + vlv_bunit_write(dev_priv, BUNIT_REG_BISOC, val); + + mutex_unlock(&dev_priv->sb_lock); + + intel_update_cdclk(dev_priv); +} + +static void chv_set_cdclk(struct drm_device *dev, int cdclk) +{ + struct drm_i915_private *dev_priv = to_i915(dev); + u32 val, cmd; + + WARN_ON(dev_priv->display.get_cdclk(dev_priv) != dev_priv->cdclk_freq); + + switch (cdclk) { + case 333333: + case 320000: + case 266667: + case 200000: + break; + default: + MISSING_CASE(cdclk); + return; + } + + /* + * Specs are full of misinformation, but testing on actual + * hardware has shown that we just need to write the desired + * CCK divider into the Punit register. + */ + cmd = DIV_ROUND_CLOSEST(dev_priv->hpll_freq << 1, cdclk) - 1; + + mutex_lock(&dev_priv->rps.hw_lock); + val = vlv_punit_read(dev_priv, PUNIT_REG_DSPFREQ); + val &= ~DSPFREQGUAR_MASK_CHV; + val |= (cmd << DSPFREQGUAR_SHIFT_CHV); + vlv_punit_write(dev_priv, PUNIT_REG_DSPFREQ, val); + if (wait_for((vlv_punit_read(dev_priv, PUNIT_REG_DSPFREQ) & + DSPFREQSTAT_MASK_CHV) == (cmd << DSPFREQSTAT_SHIFT_CHV), + 50)) { + DRM_ERROR("timed out waiting for CDclk change\n"); + } + mutex_unlock(&dev_priv->rps.hw_lock); + + intel_update_cdclk(dev_priv); +} + +static int bdw_calc_cdclk(int max_pixclk) +{ + if (max_pixclk > 540000) + return 675000; + else if (max_pixclk > 450000) + return 540000; + else if (max_pixclk > 337500) + return 450000; + else + return 337500; +} + +static int bdw_get_cdclk(struct drm_i915_private *dev_priv) +{ + uint32_t lcpll = I915_READ(LCPLL_CTL); + uint32_t freq = lcpll & LCPLL_CLK_FREQ_MASK; + + if (lcpll & LCPLL_CD_SOURCE_FCLK) + return 800000; + else if (I915_READ(FUSE_STRAP) & HSW_CDCLK_LIMIT) + return 450000; + else if (freq == LCPLL_CLK_FREQ_450) + return 450000; + else if (freq == LCPLL_CLK_FREQ_54O_BDW) + return 540000; + else if (freq == LCPLL_CLK_FREQ_337_5_BDW) + return 337500; + else + return 675000; +} + +static void bdw_set_cdclk(struct drm_device *dev, int cdclk) +{ + struct drm_i915_private *dev_priv = to_i915(dev); + uint32_t val, data; + int ret; + + if (WARN((I915_READ(LCPLL_CTL) & + (LCPLL_PLL_DISABLE | LCPLL_PLL_LOCK | + LCPLL_CD_CLOCK_DISABLE | LCPLL_ROOT_CD_CLOCK_DISABLE | + LCPLL_CD2X_CLOCK_DISABLE | LCPLL_POWER_DOWN_ALLOW | + LCPLL_CD_SOURCE_FCLK)) != LCPLL_PLL_LOCK, + "trying to change cdclk frequency with cdclk not enabled\n")) + return; + + mutex_lock(&dev_priv->rps.hw_lock); + ret = sandybridge_pcode_write(dev_priv, + BDW_PCODE_DISPLAY_FREQ_CHANGE_REQ, 0x0); + mutex_unlock(&dev_priv->rps.hw_lock); + if (ret) { + DRM_ERROR("failed to inform pcode about cdclk change\n"); + return; + } + + val = I915_READ(LCPLL_CTL); + val |= LCPLL_CD_SOURCE_FCLK; + I915_WRITE(LCPLL_CTL, val); + + if (wait_for_us(I915_READ(LCPLL_CTL) & + LCPLL_CD_SOURCE_FCLK_DONE, 1)) + DRM_ERROR("Switching to FCLK failed\n"); + + val = I915_READ(LCPLL_CTL); + val &= ~LCPLL_CLK_FREQ_MASK; + + switch (cdclk) { + case 450000: + val |= LCPLL_CLK_FREQ_450; + data = 0; + break; + case 540000: + val |= LCPLL_CLK_FREQ_54O_BDW; + data = 1; + break; + case 337500: + val |= LCPLL_CLK_FREQ_337_5_BDW; + data = 2; + break; + case 675000: + val |= LCPLL_CLK_FREQ_675_BDW; + data = 3; + break; + default: + WARN(1, "invalid cdclk frequency\n"); + return; + } + + I915_WRITE(LCPLL_CTL, val); + + val = I915_READ(LCPLL_CTL); + val &= ~LCPLL_CD_SOURCE_FCLK; + I915_WRITE(LCPLL_CTL, val); + + if (wait_for_us((I915_READ(LCPLL_CTL) & + LCPLL_CD_SOURCE_FCLK_DONE) == 0, 1)) + DRM_ERROR("Switching back to LCPLL failed\n"); + + mutex_lock(&dev_priv->rps.hw_lock); + sandybridge_pcode_write(dev_priv, HSW_PCODE_DE_WRITE_FREQ_REQ, data); + mutex_unlock(&dev_priv->rps.hw_lock); + + I915_WRITE(CDCLK_FREQ, DIV_ROUND_CLOSEST(cdclk, 1000) - 1); + + intel_update_cdclk(dev_priv); + + WARN(cdclk != dev_priv->cdclk_freq, + "cdclk requested %d kHz but got %d kHz\n", + cdclk, dev_priv->cdclk_freq); +} + +static int skl_calc_cdclk(int max_pixclk, int vco) +{ + if (vco == 8640000) { + if (max_pixclk > 540000) + return 617143; + else if (max_pixclk > 432000) + return 540000; + else if (max_pixclk > 308571) + return 432000; + else + return 308571; + } else { + if (max_pixclk > 540000) + return 675000; + else if (max_pixclk > 450000) + return 540000; + else if (max_pixclk > 337500) + return 450000; + else + return 337500; + } +} + +static void skl_dpll0_update(struct drm_i915_private *dev_priv) +{ + u32 val; + + dev_priv->cdclk_pll.ref = 24000; + dev_priv->cdclk_pll.vco = 0; + + val = I915_READ(LCPLL1_CTL); + if ((val & LCPLL_PLL_ENABLE) == 0) + return; + + if (WARN_ON((val & LCPLL_PLL_LOCK) == 0)) + return; + + val = I915_READ(DPLL_CTRL1); + + if (WARN_ON((val & (DPLL_CTRL1_HDMI_MODE(SKL_DPLL0) | + DPLL_CTRL1_SSC(SKL_DPLL0) | + DPLL_CTRL1_OVERRIDE(SKL_DPLL0))) != + DPLL_CTRL1_OVERRIDE(SKL_DPLL0))) + return; + + switch (val & DPLL_CTRL1_LINK_RATE_MASK(SKL_DPLL0)) { + case DPLL_CTRL1_LINK_RATE(DPLL_CTRL1_LINK_RATE_810, SKL_DPLL0): + case DPLL_CTRL1_LINK_RATE(DPLL_CTRL1_LINK_RATE_1350, SKL_DPLL0): + case DPLL_CTRL1_LINK_RATE(DPLL_CTRL1_LINK_RATE_1620, SKL_DPLL0): + case DPLL_CTRL1_LINK_RATE(DPLL_CTRL1_LINK_RATE_2700, SKL_DPLL0): + dev_priv->cdclk_pll.vco = 8100000; + break; + case DPLL_CTRL1_LINK_RATE(DPLL_CTRL1_LINK_RATE_1080, SKL_DPLL0): + case DPLL_CTRL1_LINK_RATE(DPLL_CTRL1_LINK_RATE_2160, SKL_DPLL0): + dev_priv->cdclk_pll.vco = 8640000; + break; + default: + MISSING_CASE(val & DPLL_CTRL1_LINK_RATE_MASK(SKL_DPLL0)); + break; + } +} + +static int skl_get_cdclk(struct drm_i915_private *dev_priv) +{ + u32 cdctl; + + skl_dpll0_update(dev_priv); + + if (dev_priv->cdclk_pll.vco == 0) + return dev_priv->cdclk_pll.ref; + + cdctl = I915_READ(CDCLK_CTL); + + if (dev_priv->cdclk_pll.vco == 8640000) { + switch (cdctl & CDCLK_FREQ_SEL_MASK) { + case CDCLK_FREQ_450_432: + return 432000; + case CDCLK_FREQ_337_308: + return 308571; + case CDCLK_FREQ_540: + return 540000; + case CDCLK_FREQ_675_617: + return 617143; + default: + MISSING_CASE(cdctl & CDCLK_FREQ_SEL_MASK); + } + } else { + switch (cdctl & CDCLK_FREQ_SEL_MASK) { + case CDCLK_FREQ_450_432: + return 450000; + case CDCLK_FREQ_337_308: + return 337500; + case CDCLK_FREQ_540: + return 540000; + case CDCLK_FREQ_675_617: + return 675000; + default: + MISSING_CASE(cdctl & CDCLK_FREQ_SEL_MASK); + } + } + + return dev_priv->cdclk_pll.ref; +} + +/* convert from kHz to .1 fixpoint MHz with -1MHz offset */ +static int skl_cdclk_decimal(int cdclk) +{ + return DIV_ROUND_CLOSEST(cdclk - 1000, 500); +} + +static void skl_set_preferred_cdclk_vco(struct drm_i915_private *dev_priv, + int vco) +{ + bool changed = dev_priv->skl_preferred_vco_freq != vco; + + dev_priv->skl_preferred_vco_freq = vco; + + if (changed) + intel_update_max_cdclk(dev_priv); +} + +static void skl_dpll0_enable(struct drm_i915_private *dev_priv, int vco) +{ + int min_cdclk = skl_calc_cdclk(0, vco); + u32 val; + + WARN_ON(vco != 8100000 && vco != 8640000); + + /* select the minimum CDCLK before enabling DPLL 0 */ + val = CDCLK_FREQ_337_308 | skl_cdclk_decimal(min_cdclk); + I915_WRITE(CDCLK_CTL, val); + POSTING_READ(CDCLK_CTL); + + /* + * We always enable DPLL0 with the lowest link rate possible, but still + * taking into account the VCO required to operate the eDP panel at the + * desired frequency. The usual DP link rates operate with a VCO of + * 8100 while the eDP 1.4 alternate link rates need a VCO of 8640. + * The modeset code is responsible for the selection of the exact link + * rate later on, with the constraint of choosing a frequency that + * works with vco. + */ + val = I915_READ(DPLL_CTRL1); + + val &= ~(DPLL_CTRL1_HDMI_MODE(SKL_DPLL0) | DPLL_CTRL1_SSC(SKL_DPLL0) | + DPLL_CTRL1_LINK_RATE_MASK(SKL_DPLL0)); + val |= DPLL_CTRL1_OVERRIDE(SKL_DPLL0); + if (vco == 8640000) + val |= DPLL_CTRL1_LINK_RATE(DPLL_CTRL1_LINK_RATE_1080, + SKL_DPLL0); + else + val |= DPLL_CTRL1_LINK_RATE(DPLL_CTRL1_LINK_RATE_810, + SKL_DPLL0); + + I915_WRITE(DPLL_CTRL1, val); + POSTING_READ(DPLL_CTRL1); + + I915_WRITE(LCPLL1_CTL, I915_READ(LCPLL1_CTL) | LCPLL_PLL_ENABLE); + + if (intel_wait_for_register(dev_priv, + LCPLL1_CTL, LCPLL_PLL_LOCK, LCPLL_PLL_LOCK, + 5)) + DRM_ERROR("DPLL0 not locked\n"); + + dev_priv->cdclk_pll.vco = vco; + + /* We'll want to keep using the current vco from now on. */ + skl_set_preferred_cdclk_vco(dev_priv, vco); +} + +static void skl_dpll0_disable(struct drm_i915_private *dev_priv) +{ + I915_WRITE(LCPLL1_CTL, I915_READ(LCPLL1_CTL) & ~LCPLL_PLL_ENABLE); + if (intel_wait_for_register(dev_priv, + LCPLL1_CTL, LCPLL_PLL_LOCK, 0, + 1)) + DRM_ERROR("Couldn't disable DPLL0\n"); + + dev_priv->cdclk_pll.vco = 0; +} + +static void skl_set_cdclk(struct drm_i915_private *dev_priv, + int cdclk, int vco) +{ + u32 freq_select, pcu_ack; + int ret; + + WARN_ON((cdclk == 24000) != (vco == 0)); + + DRM_DEBUG_DRIVER("Changing CDCLK to %d kHz (VCO %d kHz)\n", + cdclk, vco); + + mutex_lock(&dev_priv->rps.hw_lock); + ret = skl_pcode_request(dev_priv, SKL_PCODE_CDCLK_CONTROL, + SKL_CDCLK_PREPARE_FOR_CHANGE, + SKL_CDCLK_READY_FOR_CHANGE, + SKL_CDCLK_READY_FOR_CHANGE, 3); + mutex_unlock(&dev_priv->rps.hw_lock); + if (ret) { + DRM_ERROR("Failed to inform PCU about cdclk change (%d)\n", + ret); + return; + } + + /* set CDCLK_CTL */ + switch (cdclk) { + case 450000: + case 432000: + freq_select = CDCLK_FREQ_450_432; + pcu_ack = 1; + break; + case 540000: + freq_select = CDCLK_FREQ_540; + pcu_ack = 2; + break; + case 308571: + case 337500: + default: + freq_select = CDCLK_FREQ_337_308; + pcu_ack = 0; + break; + case 617143: + case 675000: + freq_select = CDCLK_FREQ_675_617; + pcu_ack = 3; + break; + } + + if (dev_priv->cdclk_pll.vco != 0 && + dev_priv->cdclk_pll.vco != vco) + skl_dpll0_disable(dev_priv); + + if (dev_priv->cdclk_pll.vco != vco) + skl_dpll0_enable(dev_priv, vco); + + I915_WRITE(CDCLK_CTL, freq_select | skl_cdclk_decimal(cdclk)); + POSTING_READ(CDCLK_CTL); + + /* inform PCU of the change */ + mutex_lock(&dev_priv->rps.hw_lock); + sandybridge_pcode_write(dev_priv, SKL_PCODE_CDCLK_CONTROL, pcu_ack); + mutex_unlock(&dev_priv->rps.hw_lock); + + intel_update_cdclk(dev_priv); +} + +static void skl_sanitize_cdclk(struct drm_i915_private *dev_priv) +{ + uint32_t cdctl, expected; + + /* + * check if the pre-os initialized the display + * There is SWF18 scratchpad register defined which is set by the + * pre-os which can be used by the OS drivers to check the status + */ + if ((I915_READ(SWF_ILK(0x18)) & 0x00FFFFFF) == 0) + goto sanitize; + + intel_update_cdclk(dev_priv); + /* Is PLL enabled and locked ? */ + if (dev_priv->cdclk_pll.vco == 0 || + dev_priv->cdclk_freq == dev_priv->cdclk_pll.ref) + goto sanitize; + + /* DPLL okay; verify the cdclock + * + * Noticed in some instances that the freq selection is correct but + * decimal part is programmed wrong from BIOS where pre-os does not + * enable display. Verify the same as well. + */ + cdctl = I915_READ(CDCLK_CTL); + expected = (cdctl & CDCLK_FREQ_SEL_MASK) | + skl_cdclk_decimal(dev_priv->cdclk_freq); + if (cdctl == expected) + /* All well; nothing to sanitize */ + return; + +sanitize: + DRM_DEBUG_KMS("Sanitizing cdclk programmed by pre-os\n"); + + /* force cdclk programming */ + dev_priv->cdclk_freq = 0; + /* force full PLL disable + enable */ + dev_priv->cdclk_pll.vco = -1; +} + +/** + * skl_init_cdclk - Initialize CDCLK on SKL + * @dev_priv: i915 device + * + * Initialize CDCLK for SKL and derivatives. This is generally + * done only during the display core initialization sequence, + * after which the DMC will take care of turning CDCLK off/on + * as needed. + */ +void skl_init_cdclk(struct drm_i915_private *dev_priv) +{ + int cdclk, vco; + + skl_sanitize_cdclk(dev_priv); + + if (dev_priv->cdclk_freq != 0 && dev_priv->cdclk_pll.vco != 0) { + /* + * Use the current vco as our initial + * guess as to what the preferred vco is. + */ + if (dev_priv->skl_preferred_vco_freq == 0) + skl_set_preferred_cdclk_vco(dev_priv, + dev_priv->cdclk_pll.vco); + return; + } + + vco = dev_priv->skl_preferred_vco_freq; + if (vco == 0) + vco = 8100000; + cdclk = skl_calc_cdclk(0, vco); + + skl_set_cdclk(dev_priv, cdclk, vco); +} + +/** + * skl_uninit_cdclk - Uninitialize CDCLK on SKL + * @dev_priv: i915 device + * + * Uninitialize CDCLK for SKL and derivatives. This is done only + * during the display core uninitialization sequence. + */ +void skl_uninit_cdclk(struct drm_i915_private *dev_priv) +{ + skl_set_cdclk(dev_priv, dev_priv->cdclk_pll.ref, 0); +} + +static int bxt_calc_cdclk(int max_pixclk) +{ + if (max_pixclk > 576000) + return 624000; + else if (max_pixclk > 384000) + return 576000; + else if (max_pixclk > 288000) + return 384000; + else if (max_pixclk > 144000) + return 288000; + else + return 144000; +} + +static int glk_calc_cdclk(int max_pixclk) +{ + if (max_pixclk > 2 * 158400) + return 316800; + else if (max_pixclk > 2 * 79200) + return 158400; + else + return 79200; +} + +static int bxt_de_pll_vco(struct drm_i915_private *dev_priv, int cdclk) +{ + int ratio; + + if (cdclk == dev_priv->cdclk_pll.ref) + return 0; + + switch (cdclk) { + default: + MISSING_CASE(cdclk); + case 144000: + case 288000: + case 384000: + case 576000: + ratio = 60; + break; + case 624000: + ratio = 65; + break; + } + + return dev_priv->cdclk_pll.ref * ratio; +} + +static int glk_de_pll_vco(struct drm_i915_private *dev_priv, int cdclk) +{ + int ratio; + + if (cdclk == dev_priv->cdclk_pll.ref) + return 0; + + switch (cdclk) { + default: + MISSING_CASE(cdclk); + case 79200: + case 158400: + case 316800: + ratio = 33; + break; + } + + return dev_priv->cdclk_pll.ref * ratio; +} + +static void bxt_de_pll_update(struct drm_i915_private *dev_priv) +{ + u32 val; + + dev_priv->cdclk_pll.ref = 19200; + dev_priv->cdclk_pll.vco = 0; + + val = I915_READ(BXT_DE_PLL_ENABLE); + if ((val & BXT_DE_PLL_PLL_ENABLE) == 0) + return; + + if (WARN_ON((val & BXT_DE_PLL_LOCK) == 0)) + return; + + val = I915_READ(BXT_DE_PLL_CTL); + dev_priv->cdclk_pll.vco = (val & BXT_DE_PLL_RATIO_MASK) * + dev_priv->cdclk_pll.ref; +} + +static int bxt_get_cdclk(struct drm_i915_private *dev_priv) +{ + u32 divider; + int div, vco; + + bxt_de_pll_update(dev_priv); + + vco = dev_priv->cdclk_pll.vco; + if (vco == 0) + return dev_priv->cdclk_pll.ref; + + divider = I915_READ(CDCLK_CTL) & BXT_CDCLK_CD2X_DIV_SEL_MASK; + + switch (divider) { + case BXT_CDCLK_CD2X_DIV_SEL_1: + div = 2; + break; + case BXT_CDCLK_CD2X_DIV_SEL_1_5: + WARN(IS_GEMINILAKE(dev_priv), "Unsupported divider\n"); + div = 3; + break; + case BXT_CDCLK_CD2X_DIV_SEL_2: + div = 4; + break; + case BXT_CDCLK_CD2X_DIV_SEL_4: + div = 8; + break; + default: + MISSING_CASE(divider); + return dev_priv->cdclk_pll.ref; + } + + return DIV_ROUND_CLOSEST(vco, div); +} + +static void bxt_de_pll_disable(struct drm_i915_private *dev_priv) +{ + I915_WRITE(BXT_DE_PLL_ENABLE, 0); + + /* Timeout 200us */ + if (intel_wait_for_register(dev_priv, + BXT_DE_PLL_ENABLE, BXT_DE_PLL_LOCK, 0, + 1)) + DRM_ERROR("timeout waiting for DE PLL unlock\n"); + + dev_priv->cdclk_pll.vco = 0; +} + +static void bxt_de_pll_enable(struct drm_i915_private *dev_priv, int vco) +{ + int ratio = DIV_ROUND_CLOSEST(vco, dev_priv->cdclk_pll.ref); + u32 val; + + val = I915_READ(BXT_DE_PLL_CTL); + val &= ~BXT_DE_PLL_RATIO_MASK; + val |= BXT_DE_PLL_RATIO(ratio); + I915_WRITE(BXT_DE_PLL_CTL, val); + + I915_WRITE(BXT_DE_PLL_ENABLE, BXT_DE_PLL_PLL_ENABLE); + + /* Timeout 200us */ + if (intel_wait_for_register(dev_priv, + BXT_DE_PLL_ENABLE, + BXT_DE_PLL_LOCK, + BXT_DE_PLL_LOCK, + 1)) + DRM_ERROR("timeout waiting for DE PLL lock\n"); + + dev_priv->cdclk_pll.vco = vco; +} + +static void bxt_set_cdclk(struct drm_i915_private *dev_priv, int cdclk) +{ + u32 val, divider; + int vco, ret; + + if (IS_GEMINILAKE(dev_priv)) + vco = glk_de_pll_vco(dev_priv, cdclk); + else + vco = bxt_de_pll_vco(dev_priv, cdclk); + + DRM_DEBUG_DRIVER("Changing CDCLK to %d kHz (VCO %d kHz)\n", + cdclk, vco); + + /* cdclk = vco / 2 / div{1,1.5,2,4} */ + switch (DIV_ROUND_CLOSEST(vco, cdclk)) { + case 8: + divider = BXT_CDCLK_CD2X_DIV_SEL_4; + break; + case 4: + divider = BXT_CDCLK_CD2X_DIV_SEL_2; + break; + case 3: + WARN(IS_GEMINILAKE(dev_priv), "Unsupported divider\n"); + divider = BXT_CDCLK_CD2X_DIV_SEL_1_5; + break; + case 2: + divider = BXT_CDCLK_CD2X_DIV_SEL_1; + break; + default: + WARN_ON(cdclk != dev_priv->cdclk_pll.ref); + WARN_ON(vco != 0); + + divider = BXT_CDCLK_CD2X_DIV_SEL_1; + break; + } + + /* Inform power controller of upcoming frequency change */ + mutex_lock(&dev_priv->rps.hw_lock); + ret = sandybridge_pcode_write(dev_priv, HSW_PCODE_DE_WRITE_FREQ_REQ, + 0x80000000); + mutex_unlock(&dev_priv->rps.hw_lock); + + if (ret) { + DRM_ERROR("PCode CDCLK freq change notify failed (err %d, freq %d)\n", + ret, cdclk); + return; + } + + if (dev_priv->cdclk_pll.vco != 0 && + dev_priv->cdclk_pll.vco != vco) + bxt_de_pll_disable(dev_priv); + + if (dev_priv->cdclk_pll.vco != vco) + bxt_de_pll_enable(dev_priv, vco); + + val = divider | skl_cdclk_decimal(cdclk); + /* + * FIXME if only the cd2x divider needs changing, it could be done + * without shutting off the pipe (if only one pipe is active). + */ + val |= BXT_CDCLK_CD2X_PIPE_NONE; + /* + * Disable SSA Precharge when CD clock frequency < 500 MHz, + * enable otherwise. + */ + if (cdclk >= 500000) + val |= BXT_CDCLK_SSA_PRECHARGE_ENABLE; + I915_WRITE(CDCLK_CTL, val); + + mutex_lock(&dev_priv->rps.hw_lock); + ret = sandybridge_pcode_write(dev_priv, HSW_PCODE_DE_WRITE_FREQ_REQ, + DIV_ROUND_UP(cdclk, 25000)); + mutex_unlock(&dev_priv->rps.hw_lock); + + if (ret) { + DRM_ERROR("PCode CDCLK freq set failed, (err %d, freq %d)\n", + ret, cdclk); + return; + } + + intel_update_cdclk(dev_priv); +} + +static void bxt_sanitize_cdclk(struct drm_i915_private *dev_priv) +{ + u32 cdctl, expected; + + intel_update_cdclk(dev_priv); + + if (dev_priv->cdclk_pll.vco == 0 || + dev_priv->cdclk_freq == dev_priv->cdclk_pll.ref) + goto sanitize; + + /* DPLL okay; verify the cdclock + * + * Some BIOS versions leave an incorrect decimal frequency value and + * set reserved MBZ bits in CDCLK_CTL at least during exiting from S4, + * so sanitize this register. + */ + cdctl = I915_READ(CDCLK_CTL); + /* + * Let's ignore the pipe field, since BIOS could have configured the + * dividers both synching to an active pipe, or asynchronously + * (PIPE_NONE). + */ + cdctl &= ~BXT_CDCLK_CD2X_PIPE_NONE; + + expected = (cdctl & BXT_CDCLK_CD2X_DIV_SEL_MASK) | + skl_cdclk_decimal(dev_priv->cdclk_freq); + /* + * Disable SSA Precharge when CD clock frequency < 500 MHz, + * enable otherwise. + */ + if (dev_priv->cdclk_freq >= 500000) + expected |= BXT_CDCLK_SSA_PRECHARGE_ENABLE; + + if (cdctl == expected) + /* All well; nothing to sanitize */ + return; + +sanitize: + DRM_DEBUG_KMS("Sanitizing cdclk programmed by pre-os\n"); + + /* force cdclk programming */ + dev_priv->cdclk_freq = 0; + + /* force full PLL disable + enable */ + dev_priv->cdclk_pll.vco = -1; +} + +/** + * bxt_init_cdclk - Initialize CDCLK on BXT + * @dev_priv: i915 device + * + * Initialize CDCLK for BXT and derivatives. This is generally + * done only during the display core initialization sequence, + * after which the DMC will take care of turning CDCLK off/on + * as needed. + */ +void bxt_init_cdclk(struct drm_i915_private *dev_priv) +{ + int cdclk; + + bxt_sanitize_cdclk(dev_priv); + + if (dev_priv->cdclk_freq != 0 && dev_priv->cdclk_pll.vco != 0) + return; + + /* + * FIXME: + * - The initial CDCLK needs to be read from VBT. + * Need to make this change after VBT has changes for BXT. + */ + if (IS_GEMINILAKE(dev_priv)) + cdclk = glk_calc_cdclk(0); + else + cdclk = bxt_calc_cdclk(0); + + bxt_set_cdclk(dev_priv, cdclk); +} + +/** + * bxt_uninit_cdclk - Uninitialize CDCLK on BXT + * @dev_priv: i915 device + * + * Uninitialize CDCLK for BXT and derivatives. This is done only + * during the display core uninitialization sequence. + */ +void bxt_uninit_cdclk(struct drm_i915_private *dev_priv) +{ + bxt_set_cdclk(dev_priv, dev_priv->cdclk_pll.ref); +} + +static int bdw_adjust_min_pipe_pixel_rate(struct intel_crtc_state *crtc_state, + int pixel_rate) +{ + struct drm_i915_private *dev_priv = + to_i915(crtc_state->base.crtc->dev); + + /* pixel rate mustn't exceed 95% of cdclk with IPS on BDW */ + if (IS_BROADWELL(dev_priv) && crtc_state->ips_enabled) + pixel_rate = DIV_ROUND_UP(pixel_rate * 100, 95); + + /* BSpec says "Do not use DisplayPort with CDCLK less than + * 432 MHz, audio enabled, port width x4, and link rate + * HBR2 (5.4 GHz), or else there may be audio corruption or + * screen corruption." + */ + if (intel_crtc_has_dp_encoder(crtc_state) && + crtc_state->has_audio && + crtc_state->port_clock >= 540000 && + crtc_state->lane_count == 4) + pixel_rate = max(432000, pixel_rate); + + return pixel_rate; +} + +/* compute the max rate for new configuration */ +static int intel_max_pixel_rate(struct drm_atomic_state *state) +{ + struct intel_atomic_state *intel_state = to_intel_atomic_state(state); + struct drm_i915_private *dev_priv = to_i915(state->dev); + struct drm_crtc *crtc; + struct drm_crtc_state *cstate; + struct intel_crtc_state *crtc_state; + unsigned int max_pixel_rate = 0, i; + enum pipe pipe; + + memcpy(intel_state->min_pixclk, dev_priv->min_pixclk, + sizeof(intel_state->min_pixclk)); + + for_each_crtc_in_state(state, crtc, cstate, i) { + int pixel_rate; + + crtc_state = to_intel_crtc_state(cstate); + if (!crtc_state->base.enable) { + intel_state->min_pixclk[i] = 0; + continue; + } + + pixel_rate = crtc_state->pixel_rate; + + if (IS_BROADWELL(dev_priv) || IS_GEN9(dev_priv)) + pixel_rate = + bdw_adjust_min_pipe_pixel_rate(crtc_state, + pixel_rate); + + intel_state->min_pixclk[i] = pixel_rate; + } + + for_each_pipe(dev_priv, pipe) + max_pixel_rate = max(intel_state->min_pixclk[pipe], + max_pixel_rate); + + return max_pixel_rate; +} + +static int vlv_modeset_calc_cdclk(struct drm_atomic_state *state) +{ + struct drm_device *dev = state->dev; + struct drm_i915_private *dev_priv = to_i915(dev); + int max_pixclk = intel_max_pixel_rate(state); + struct intel_atomic_state *intel_state = + to_intel_atomic_state(state); + + intel_state->cdclk = intel_state->dev_cdclk = + vlv_calc_cdclk(dev_priv, max_pixclk); + + if (!intel_state->active_crtcs) + intel_state->dev_cdclk = vlv_calc_cdclk(dev_priv, 0); + + return 0; +} + +static void vlv_modeset_commit_cdclk(struct drm_atomic_state *old_state) +{ + struct drm_device *dev = old_state->dev; + struct drm_i915_private *dev_priv = to_i915(dev); + struct intel_atomic_state *old_intel_state = + to_intel_atomic_state(old_state); + unsigned int req_cdclk = old_intel_state->dev_cdclk; + + /* + * FIXME: We can end up here with all power domains off, yet + * with a CDCLK frequency other than the minimum. To account + * for this take the PIPE-A power domain, which covers the HW + * blocks needed for the following programming. This can be + * removed once it's guaranteed that we get here either with + * the minimum CDCLK set, or the required power domains + * enabled. + */ + intel_display_power_get(dev_priv, POWER_DOMAIN_PIPE_A); + + if (IS_CHERRYVIEW(dev_priv)) + chv_set_cdclk(dev, req_cdclk); + else + vlv_set_cdclk(dev, req_cdclk); + + vlv_program_pfi_credits(dev_priv); + + intel_display_power_put(dev_priv, POWER_DOMAIN_PIPE_A); +} + +static int bdw_modeset_calc_cdclk(struct drm_atomic_state *state) +{ + struct drm_i915_private *dev_priv = to_i915(state->dev); + struct intel_atomic_state *intel_state = to_intel_atomic_state(state); + int max_pixclk = intel_max_pixel_rate(state); + int cdclk; + + /* + * FIXME should also account for plane ratio + * once 64bpp pixel formats are supported. + */ + cdclk = bdw_calc_cdclk(max_pixclk); + + if (cdclk > dev_priv->max_cdclk_freq) { + DRM_DEBUG_KMS("requested cdclk (%d kHz) exceeds max (%d kHz)\n", + cdclk, dev_priv->max_cdclk_freq); + return -EINVAL; + } + + intel_state->cdclk = intel_state->dev_cdclk = cdclk; + if (!intel_state->active_crtcs) + intel_state->dev_cdclk = bdw_calc_cdclk(0); + + return 0; +} + +static void bdw_modeset_commit_cdclk(struct drm_atomic_state *old_state) +{ + struct drm_device *dev = old_state->dev; + struct intel_atomic_state *old_intel_state = + to_intel_atomic_state(old_state); + unsigned int req_cdclk = old_intel_state->dev_cdclk; + + bdw_set_cdclk(dev, req_cdclk); +} + +static int skl_modeset_calc_cdclk(struct drm_atomic_state *state) +{ + struct intel_atomic_state *intel_state = to_intel_atomic_state(state); + struct drm_i915_private *dev_priv = to_i915(state->dev); + const int max_pixclk = intel_max_pixel_rate(state); + int vco = intel_state->cdclk_pll_vco; + int cdclk; + + /* + * FIXME should also account for plane ratio + * once 64bpp pixel formats are supported. + */ + cdclk = skl_calc_cdclk(max_pixclk, vco); + + /* + * FIXME move the cdclk caclulation to + * compute_config() so we can fail gracegully. + */ + if (cdclk > dev_priv->max_cdclk_freq) { + DRM_ERROR("requested cdclk (%d kHz) exceeds max (%d kHz)\n", + cdclk, dev_priv->max_cdclk_freq); + cdclk = dev_priv->max_cdclk_freq; + } + + intel_state->cdclk = intel_state->dev_cdclk = cdclk; + if (!intel_state->active_crtcs) + intel_state->dev_cdclk = skl_calc_cdclk(0, vco); + + return 0; +} + +static void skl_modeset_commit_cdclk(struct drm_atomic_state *old_state) +{ + struct drm_i915_private *dev_priv = to_i915(old_state->dev); + struct intel_atomic_state *intel_state = + to_intel_atomic_state(old_state); + unsigned int req_cdclk = intel_state->dev_cdclk; + unsigned int req_vco = intel_state->cdclk_pll_vco; + + skl_set_cdclk(dev_priv, req_cdclk, req_vco); +} + +static int bxt_modeset_calc_cdclk(struct drm_atomic_state *state) +{ + struct drm_i915_private *dev_priv = to_i915(state->dev); + int max_pixclk = intel_max_pixel_rate(state); + struct intel_atomic_state *intel_state = + to_intel_atomic_state(state); + int cdclk; + + if (IS_GEMINILAKE(dev_priv)) + cdclk = glk_calc_cdclk(max_pixclk); + else + cdclk = bxt_calc_cdclk(max_pixclk); + + intel_state->cdclk = intel_state->dev_cdclk = cdclk; + + if (!intel_state->active_crtcs) { + if (IS_GEMINILAKE(dev_priv)) + cdclk = glk_calc_cdclk(0); + else + cdclk = bxt_calc_cdclk(0); + + intel_state->dev_cdclk = cdclk; + } + + return 0; +} + +static void bxt_modeset_commit_cdclk(struct drm_atomic_state *old_state) +{ + struct drm_device *dev = old_state->dev; + struct intel_atomic_state *old_intel_state = + to_intel_atomic_state(old_state); + unsigned int req_cdclk = old_intel_state->dev_cdclk; + + bxt_set_cdclk(to_i915(dev), req_cdclk); +} + +static int intel_compute_max_dotclk(struct drm_i915_private *dev_priv) +{ + int max_cdclk_freq = dev_priv->max_cdclk_freq; + + if (IS_GEMINILAKE(dev_priv)) + return 2 * max_cdclk_freq; + else if (INTEL_INFO(dev_priv)->gen >= 9 || + IS_HASWELL(dev_priv) || IS_BROADWELL(dev_priv)) + return max_cdclk_freq; + else if (IS_CHERRYVIEW(dev_priv)) + return max_cdclk_freq*95/100; + else if (INTEL_INFO(dev_priv)->gen < 4) + return 2*max_cdclk_freq*90/100; + else + return max_cdclk_freq*90/100; +} + +/** + * intel_update_max_cdclk - Determine the maximum support CDCLK frequency + * @dev_priv: i915 device + * + * Determine the maximum CDCLK frequency the platform supports, and also + * derive the maximum dot clock frequency the maximum CDCLK frequency + * allows. + */ +void intel_update_max_cdclk(struct drm_i915_private *dev_priv) +{ + if (IS_GEN9_BC(dev_priv)) { + u32 limit = I915_READ(SKL_DFSM) & SKL_DFSM_CDCLK_LIMIT_MASK; + int max_cdclk, vco; + + vco = dev_priv->skl_preferred_vco_freq; + WARN_ON(vco != 8100000 && vco != 8640000); + + /* + * Use the lower (vco 8640) cdclk values as a + * first guess. skl_calc_cdclk() will correct it + * if the preferred vco is 8100 instead. + */ + if (limit == SKL_DFSM_CDCLK_LIMIT_675) + max_cdclk = 617143; + else if (limit == SKL_DFSM_CDCLK_LIMIT_540) + max_cdclk = 540000; + else if (limit == SKL_DFSM_CDCLK_LIMIT_450) + max_cdclk = 432000; + else + max_cdclk = 308571; + + dev_priv->max_cdclk_freq = skl_calc_cdclk(max_cdclk, vco); + } else if (IS_GEMINILAKE(dev_priv)) { + dev_priv->max_cdclk_freq = 316800; + } else if (IS_BROXTON(dev_priv)) { + dev_priv->max_cdclk_freq = 624000; + } else if (IS_BROADWELL(dev_priv)) { + /* + * FIXME with extra cooling we can allow + * 540 MHz for ULX and 675 Mhz for ULT. + * How can we know if extra cooling is + * available? PCI ID, VTB, something else? + */ + if (I915_READ(FUSE_STRAP) & HSW_CDCLK_LIMIT) + dev_priv->max_cdclk_freq = 450000; + else if (IS_BDW_ULX(dev_priv)) + dev_priv->max_cdclk_freq = 450000; + else if (IS_BDW_ULT(dev_priv)) + dev_priv->max_cdclk_freq = 540000; + else + dev_priv->max_cdclk_freq = 675000; + } else if (IS_CHERRYVIEW(dev_priv)) { + dev_priv->max_cdclk_freq = 320000; + } else if (IS_VALLEYVIEW(dev_priv)) { + dev_priv->max_cdclk_freq = 400000; + } else { + /* otherwise assume cdclk is fixed */ + dev_priv->max_cdclk_freq = dev_priv->cdclk_freq; + } + + dev_priv->max_dotclk_freq = intel_compute_max_dotclk(dev_priv); + + DRM_DEBUG_DRIVER("Max CD clock rate: %d kHz\n", + dev_priv->max_cdclk_freq); + + DRM_DEBUG_DRIVER("Max dotclock rate: %d kHz\n", + dev_priv->max_dotclk_freq); +} + +/** + * intel_update_cdclk - Determine the current CDCLK frequency + * @dev_priv: i915 device + * + * Determine the current CDCLK frequency. + */ +void intel_update_cdclk(struct drm_i915_private *dev_priv) +{ + dev_priv->cdclk_freq = dev_priv->display.get_cdclk(dev_priv); + + if (INTEL_GEN(dev_priv) >= 9) + DRM_DEBUG_DRIVER("Current CD clock rate: %d kHz, VCO: %d kHz, ref: %d kHz\n", + dev_priv->cdclk_freq, dev_priv->cdclk_pll.vco, + dev_priv->cdclk_pll.ref); + else + DRM_DEBUG_DRIVER("Current CD clock rate: %d kHz\n", + dev_priv->cdclk_freq); + + /* + * 9:0 CMBUS [sic] CDCLK frequency (cdfreq): + * Programmng [sic] note: bit[9:2] should be programmed to the number + * of cdclk that generates 4MHz reference clock freq which is used to + * generate GMBus clock. This will vary with the cdclk freq. + */ + if (IS_VALLEYVIEW(dev_priv) || IS_CHERRYVIEW(dev_priv)) + I915_WRITE(GMBUSFREQ_VLV, + DIV_ROUND_UP(dev_priv->cdclk_freq, 1000)); +} + +static int pch_rawclk(struct drm_i915_private *dev_priv) +{ + return (I915_READ(PCH_RAWCLK_FREQ) & RAWCLK_FREQ_MASK) * 1000; +} + +static int vlv_hrawclk(struct drm_i915_private *dev_priv) +{ + /* RAWCLK_FREQ_VLV register updated from power well code */ + return vlv_get_cck_clock_hpll(dev_priv, "hrawclk", + CCK_DISPLAY_REF_CLOCK_CONTROL); +} + +static int g4x_hrawclk(struct drm_i915_private *dev_priv) +{ + uint32_t clkcfg; + + /* hrawclock is 1/4 the FSB frequency */ + clkcfg = I915_READ(CLKCFG); + switch (clkcfg & CLKCFG_FSB_MASK) { + case CLKCFG_FSB_400: + return 100000; + case CLKCFG_FSB_533: + return 133333; + case CLKCFG_FSB_667: + return 166667; + case CLKCFG_FSB_800: + return 200000; + case CLKCFG_FSB_1067: + return 266667; + case CLKCFG_FSB_1333: + return 333333; + /* these two are just a guess; one of them might be right */ + case CLKCFG_FSB_1600: + case CLKCFG_FSB_1600_ALT: + return 400000; + default: + return 133333; + } +} + +/** + * intel_update_rawclk - Determine the current RAWCLK frequency + * @dev_priv: i915 device + * + * Determine the current RAWCLK frequency. RAWCLK is a fixed + * frequency clock so this needs to done only once. + */ +void intel_update_rawclk(struct drm_i915_private *dev_priv) +{ + if (HAS_PCH_SPLIT(dev_priv)) + dev_priv->rawclk_freq = pch_rawclk(dev_priv); + else if (IS_VALLEYVIEW(dev_priv) || IS_CHERRYVIEW(dev_priv)) + dev_priv->rawclk_freq = vlv_hrawclk(dev_priv); + else if (IS_G4X(dev_priv) || IS_PINEVIEW(dev_priv)) + dev_priv->rawclk_freq = g4x_hrawclk(dev_priv); + else + /* no rawclk on other platforms, or no need to know it */ + return; + + DRM_DEBUG_DRIVER("rawclk rate: %d kHz\n", dev_priv->rawclk_freq); +} + +/** + * intel_init_cdclk_hooks - Initialize CDCLK related modesetting hooks + * @dev_priv: i915 device + */ +void intel_init_cdclk_hooks(struct drm_i915_private *dev_priv) +{ + if (IS_VALLEYVIEW(dev_priv) || IS_CHERRYVIEW(dev_priv)) { + dev_priv->display.modeset_commit_cdclk = + vlv_modeset_commit_cdclk; + dev_priv->display.modeset_calc_cdclk = + vlv_modeset_calc_cdclk; + } else if (IS_BROADWELL(dev_priv)) { + dev_priv->display.modeset_commit_cdclk = + bdw_modeset_commit_cdclk; + dev_priv->display.modeset_calc_cdclk = + bdw_modeset_calc_cdclk; + } else if (IS_GEN9_LP(dev_priv)) { + dev_priv->display.modeset_commit_cdclk = + bxt_modeset_commit_cdclk; + dev_priv->display.modeset_calc_cdclk = + bxt_modeset_calc_cdclk; + } else if (IS_GEN9_BC(dev_priv)) { + dev_priv->display.modeset_commit_cdclk = + skl_modeset_commit_cdclk; + dev_priv->display.modeset_calc_cdclk = + skl_modeset_calc_cdclk; + } + + if (IS_GEN9_BC(dev_priv)) + dev_priv->display.get_cdclk = skl_get_cdclk; + else if (IS_GEN9_LP(dev_priv)) + dev_priv->display.get_cdclk = bxt_get_cdclk; + else if (IS_BROADWELL(dev_priv)) + dev_priv->display.get_cdclk = bdw_get_cdclk; + else if (IS_HASWELL(dev_priv)) + dev_priv->display.get_cdclk = hsw_get_cdclk; + else if (IS_VALLEYVIEW(dev_priv) || IS_CHERRYVIEW(dev_priv)) + dev_priv->display.get_cdclk = vlv_get_cdclk; + else if (IS_GEN6(dev_priv) || IS_IVYBRIDGE(dev_priv)) + dev_priv->display.get_cdclk = fixed_400mhz_get_cdclk; + else if (IS_GEN5(dev_priv)) + dev_priv->display.get_cdclk = fixed_450mhz_get_cdclk; + else if (IS_GM45(dev_priv)) + dev_priv->display.get_cdclk = gm45_get_cdclk; + else if (IS_G4X(dev_priv)) + dev_priv->display.get_cdclk = g33_get_cdclk; + else if (IS_I965GM(dev_priv)) + dev_priv->display.get_cdclk = i965gm_get_cdclk; + else if (IS_I965G(dev_priv)) + dev_priv->display.get_cdclk = fixed_400mhz_get_cdclk; + else if (IS_PINEVIEW(dev_priv)) + dev_priv->display.get_cdclk = pnv_get_cdclk; + else if (IS_G33(dev_priv)) + dev_priv->display.get_cdclk = g33_get_cdclk; + else if (IS_I945GM(dev_priv)) + dev_priv->display.get_cdclk = i945gm_get_cdclk; + else if (IS_I945G(dev_priv)) + dev_priv->display.get_cdclk = fixed_400mhz_get_cdclk; + else if (IS_I915GM(dev_priv)) + dev_priv->display.get_cdclk = i915gm_get_cdclk; + else if (IS_I915G(dev_priv)) + dev_priv->display.get_cdclk = fixed_333mhz_get_cdclk; + else if (IS_I865G(dev_priv)) + dev_priv->display.get_cdclk = fixed_266mhz_get_cdclk; + else if (IS_I85X(dev_priv)) + dev_priv->display.get_cdclk = i85x_get_cdclk; + else if (IS_I845G(dev_priv)) + dev_priv->display.get_cdclk = fixed_200mhz_get_cdclk; + else { /* 830 */ + WARN(!IS_I830(dev_priv), + "Unknown platform. Assuming 133 MHz CDCLK\n"); + dev_priv->display.get_cdclk = fixed_133mhz_get_cdclk; + } +} diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c index c407cc41c0c9..e996f8ec8f08 100644 --- a/drivers/gpu/drm/i915/intel_display.c +++ b/drivers/gpu/drm/i915/intel_display.c @@ -122,9 +122,6 @@ static void ironlake_pfit_disable(struct intel_crtc *crtc, bool force); static void ironlake_pfit_enable(struct intel_crtc *crtc); static void intel_modeset_setup_hw_state(struct drm_device *dev); static void intel_pre_disable_primary_noatomic(struct drm_crtc *crtc); -static int intel_max_pixel_rate(struct drm_atomic_state *state); -static int glk_calc_cdclk(int max_pixclk); -static int bxt_calc_cdclk(int max_pixclk); struct intel_limit { struct { @@ -170,8 +167,8 @@ int vlv_get_cck_clock(struct drm_i915_private *dev_priv, return DIV_ROUND_CLOSEST(ref_freq << 1, divider + 1); } -static int vlv_get_cck_clock_hpll(struct drm_i915_private *dev_priv, - const char *name, u32 reg) +int vlv_get_cck_clock_hpll(struct drm_i915_private *dev_priv, + const char *name, u32 reg) { if (dev_priv->hpll_freq == 0) dev_priv->hpll_freq = valleyview_get_vco(dev_priv); @@ -180,63 +177,6 @@ static int vlv_get_cck_clock_hpll(struct drm_i915_private *dev_priv, dev_priv->hpll_freq); } -static int -intel_pch_rawclk(struct drm_i915_private *dev_priv) -{ - return (I915_READ(PCH_RAWCLK_FREQ) & RAWCLK_FREQ_MASK) * 1000; -} - -static int -intel_vlv_hrawclk(struct drm_i915_private *dev_priv) -{ - /* RAWCLK_FREQ_VLV register updated from power well code */ - return vlv_get_cck_clock_hpll(dev_priv, "hrawclk", - CCK_DISPLAY_REF_CLOCK_CONTROL); -} - -static int -intel_g4x_hrawclk(struct drm_i915_private *dev_priv) -{ - uint32_t clkcfg; - - /* hrawclock is 1/4 the FSB frequency */ - clkcfg = I915_READ(CLKCFG); - switch (clkcfg & CLKCFG_FSB_MASK) { - case CLKCFG_FSB_400: - return 100000; - case CLKCFG_FSB_533: - return 133333; - case CLKCFG_FSB_667: - return 166667; - case CLKCFG_FSB_800: - return 200000; - case CLKCFG_FSB_1067: - return 266667; - case CLKCFG_FSB_1333: - return 333333; - /* these two are just a guess; one of them might be right */ - case CLKCFG_FSB_1600: - case CLKCFG_FSB_1600_ALT: - return 400000; - default: - return 133333; - } -} - -void intel_update_rawclk(struct drm_i915_private *dev_priv) -{ - if (HAS_PCH_SPLIT(dev_priv)) - dev_priv->rawclk_freq = intel_pch_rawclk(dev_priv); - else if (IS_VALLEYVIEW(dev_priv) || IS_CHERRYVIEW(dev_priv)) - dev_priv->rawclk_freq = intel_vlv_hrawclk(dev_priv); - else if (IS_G4X(dev_priv) || IS_PINEVIEW(dev_priv)) - dev_priv->rawclk_freq = intel_g4x_hrawclk(dev_priv); - else - return; /* no rawclk on other platforms, or no need to know it */ - - DRM_DEBUG_DRIVER("rawclk rate: %d kHz\n", dev_priv->rawclk_freq); -} - static void intel_update_czclk(struct drm_i915_private *dev_priv) { if (!(IS_VALLEYVIEW(dev_priv) || IS_CHERRYVIEW(dev_priv))) @@ -5791,1093 +5731,211 @@ static void modeset_put_power_domains(struct drm_i915_private *dev_priv, intel_display_power_put(dev_priv, domain); } -static int intel_compute_max_dotclk(struct drm_i915_private *dev_priv) +static void valleyview_crtc_enable(struct intel_crtc_state *pipe_config, + struct drm_atomic_state *old_state) { - int max_cdclk_freq = dev_priv->max_cdclk_freq; + struct drm_crtc *crtc = pipe_config->base.crtc; + struct drm_device *dev = crtc->dev; + struct drm_i915_private *dev_priv = to_i915(dev); + struct intel_crtc *intel_crtc = to_intel_crtc(crtc); + int pipe = intel_crtc->pipe; - if (IS_GEMINILAKE(dev_priv)) - return 2 * max_cdclk_freq; - else if (INTEL_INFO(dev_priv)->gen >= 9 || - IS_HASWELL(dev_priv) || IS_BROADWELL(dev_priv)) - return max_cdclk_freq; - else if (IS_CHERRYVIEW(dev_priv)) - return max_cdclk_freq*95/100; - else if (INTEL_INFO(dev_priv)->gen < 4) - return 2*max_cdclk_freq*90/100; - else - return max_cdclk_freq*90/100; -} + if (WARN_ON(intel_crtc->active)) + return; -static int skl_calc_cdclk(int max_pixclk, int vco); + if (intel_crtc_has_dp_encoder(intel_crtc->config)) + intel_dp_set_m_n(intel_crtc, M1_N1); -static void intel_update_max_cdclk(struct drm_i915_private *dev_priv) -{ - if (IS_GEN9_BC(dev_priv)) { - u32 limit = I915_READ(SKL_DFSM) & SKL_DFSM_CDCLK_LIMIT_MASK; - int max_cdclk, vco; + intel_set_pipe_timings(intel_crtc); + intel_set_pipe_src_size(intel_crtc); - vco = dev_priv->skl_preferred_vco_freq; - WARN_ON(vco != 8100000 && vco != 8640000); + if (IS_CHERRYVIEW(dev_priv) && pipe == PIPE_B) { + struct drm_i915_private *dev_priv = to_i915(dev); - /* - * Use the lower (vco 8640) cdclk values as a - * first guess. skl_calc_cdclk() will correct it - * if the preferred vco is 8100 instead. - */ - if (limit == SKL_DFSM_CDCLK_LIMIT_675) - max_cdclk = 617143; - else if (limit == SKL_DFSM_CDCLK_LIMIT_540) - max_cdclk = 540000; - else if (limit == SKL_DFSM_CDCLK_LIMIT_450) - max_cdclk = 432000; - else - max_cdclk = 308571; - - dev_priv->max_cdclk_freq = skl_calc_cdclk(max_cdclk, vco); - } else if (IS_GEMINILAKE(dev_priv)) { - dev_priv->max_cdclk_freq = 316800; - } else if (IS_BROXTON(dev_priv)) { - dev_priv->max_cdclk_freq = 624000; - } else if (IS_BROADWELL(dev_priv)) { - /* - * FIXME with extra cooling we can allow - * 540 MHz for ULX and 675 Mhz for ULT. - * How can we know if extra cooling is - * available? PCI ID, VTB, something else? - */ - if (I915_READ(FUSE_STRAP) & HSW_CDCLK_LIMIT) - dev_priv->max_cdclk_freq = 450000; - else if (IS_BDW_ULX(dev_priv)) - dev_priv->max_cdclk_freq = 450000; - else if (IS_BDW_ULT(dev_priv)) - dev_priv->max_cdclk_freq = 540000; - else - dev_priv->max_cdclk_freq = 675000; - } else if (IS_CHERRYVIEW(dev_priv)) { - dev_priv->max_cdclk_freq = 320000; - } else if (IS_VALLEYVIEW(dev_priv)) { - dev_priv->max_cdclk_freq = 400000; - } else { - /* otherwise assume cdclk is fixed */ - dev_priv->max_cdclk_freq = dev_priv->cdclk_freq; + I915_WRITE(CHV_BLEND(pipe), CHV_BLEND_LEGACY); + I915_WRITE(CHV_CANVAS(pipe), 0); } - dev_priv->max_dotclk_freq = intel_compute_max_dotclk(dev_priv); - - DRM_DEBUG_DRIVER("Max CD clock rate: %d kHz\n", - dev_priv->max_cdclk_freq); - - DRM_DEBUG_DRIVER("Max dotclock rate: %d kHz\n", - dev_priv->max_dotclk_freq); -} - -static void intel_update_cdclk(struct drm_i915_private *dev_priv) -{ - dev_priv->cdclk_freq = dev_priv->display.get_cdclk(dev_priv); - - if (INTEL_GEN(dev_priv) >= 9) - DRM_DEBUG_DRIVER("Current CD clock rate: %d kHz, VCO: %d kHz, ref: %d kHz\n", - dev_priv->cdclk_freq, dev_priv->cdclk_pll.vco, - dev_priv->cdclk_pll.ref); - else - DRM_DEBUG_DRIVER("Current CD clock rate: %d kHz\n", - dev_priv->cdclk_freq); - - /* - * 9:0 CMBUS [sic] CDCLK frequency (cdfreq): - * Programmng [sic] note: bit[9:2] should be programmed to the number - * of cdclk that generates 4MHz reference clock freq which is used to - * generate GMBus clock. This will vary with the cdclk freq. - */ - if (IS_VALLEYVIEW(dev_priv) || IS_CHERRYVIEW(dev_priv)) - I915_WRITE(GMBUSFREQ_VLV, DIV_ROUND_UP(dev_priv->cdclk_freq, 1000)); -} + i9xx_set_pipeconf(intel_crtc); -/* convert from kHz to .1 fixpoint MHz with -1MHz offset */ -static int skl_cdclk_decimal(int cdclk) -{ - return DIV_ROUND_CLOSEST(cdclk - 1000, 500); -} + intel_crtc->active = true; -static int bxt_de_pll_vco(struct drm_i915_private *dev_priv, int cdclk) -{ - int ratio; + intel_set_cpu_fifo_underrun_reporting(dev_priv, pipe, true); - if (cdclk == dev_priv->cdclk_pll.ref) - return 0; + intel_encoders_pre_pll_enable(crtc, pipe_config, old_state); - switch (cdclk) { - default: - MISSING_CASE(cdclk); - case 144000: - case 288000: - case 384000: - case 576000: - ratio = 60; - break; - case 624000: - ratio = 65; - break; + if (IS_CHERRYVIEW(dev_priv)) { + chv_prepare_pll(intel_crtc, intel_crtc->config); + chv_enable_pll(intel_crtc, intel_crtc->config); + } else { + vlv_prepare_pll(intel_crtc, intel_crtc->config); + vlv_enable_pll(intel_crtc, intel_crtc->config); } - return dev_priv->cdclk_pll.ref * ratio; -} + intel_encoders_pre_enable(crtc, pipe_config, old_state); -static int glk_de_pll_vco(struct drm_i915_private *dev_priv, int cdclk) -{ - int ratio; + i9xx_pfit_enable(intel_crtc); - if (cdclk == dev_priv->cdclk_pll.ref) - return 0; + intel_color_load_luts(&pipe_config->base); - switch (cdclk) { - default: - MISSING_CASE(cdclk); - case 79200: - case 158400: - case 316800: - ratio = 33; - break; - } + intel_update_watermarks(intel_crtc); + intel_enable_pipe(intel_crtc); + + assert_vblank_disabled(crtc); + drm_crtc_vblank_on(crtc); - return dev_priv->cdclk_pll.ref * ratio; + intel_encoders_enable(crtc, pipe_config, old_state); } -static void bxt_de_pll_disable(struct drm_i915_private *dev_priv) +static void i9xx_set_pll_dividers(struct intel_crtc *crtc) { - I915_WRITE(BXT_DE_PLL_ENABLE, 0); - - /* Timeout 200us */ - if (intel_wait_for_register(dev_priv, - BXT_DE_PLL_ENABLE, BXT_DE_PLL_LOCK, 0, - 1)) - DRM_ERROR("timeout waiting for DE PLL unlock\n"); + struct drm_device *dev = crtc->base.dev; + struct drm_i915_private *dev_priv = to_i915(dev); - dev_priv->cdclk_pll.vco = 0; + I915_WRITE(FP0(crtc->pipe), crtc->config->dpll_hw_state.fp0); + I915_WRITE(FP1(crtc->pipe), crtc->config->dpll_hw_state.fp1); } -static void bxt_de_pll_enable(struct drm_i915_private *dev_priv, int vco) +static void i9xx_crtc_enable(struct intel_crtc_state *pipe_config, + struct drm_atomic_state *old_state) { - int ratio = DIV_ROUND_CLOSEST(vco, dev_priv->cdclk_pll.ref); - u32 val; + struct drm_crtc *crtc = pipe_config->base.crtc; + struct drm_device *dev = crtc->dev; + struct drm_i915_private *dev_priv = to_i915(dev); + struct intel_crtc *intel_crtc = to_intel_crtc(crtc); + enum pipe pipe = intel_crtc->pipe; - val = I915_READ(BXT_DE_PLL_CTL); - val &= ~BXT_DE_PLL_RATIO_MASK; - val |= BXT_DE_PLL_RATIO(ratio); - I915_WRITE(BXT_DE_PLL_CTL, val); + if (WARN_ON(intel_crtc->active)) + return; - I915_WRITE(BXT_DE_PLL_ENABLE, BXT_DE_PLL_PLL_ENABLE); + i9xx_set_pll_dividers(intel_crtc); - /* Timeout 200us */ - if (intel_wait_for_register(dev_priv, - BXT_DE_PLL_ENABLE, - BXT_DE_PLL_LOCK, - BXT_DE_PLL_LOCK, - 1)) - DRM_ERROR("timeout waiting for DE PLL lock\n"); + if (intel_crtc_has_dp_encoder(intel_crtc->config)) + intel_dp_set_m_n(intel_crtc, M1_N1); - dev_priv->cdclk_pll.vco = vco; -} + intel_set_pipe_timings(intel_crtc); + intel_set_pipe_src_size(intel_crtc); -static void bxt_set_cdclk(struct drm_i915_private *dev_priv, int cdclk) -{ - u32 val, divider; - int vco, ret; + i9xx_set_pipeconf(intel_crtc); - if (IS_GEMINILAKE(dev_priv)) - vco = glk_de_pll_vco(dev_priv, cdclk); - else - vco = bxt_de_pll_vco(dev_priv, cdclk); + intel_crtc->active = true; - DRM_DEBUG_DRIVER("Changing CDCLK to %d kHz (VCO %d kHz)\n", cdclk, vco); + if (!IS_GEN2(dev_priv)) + intel_set_cpu_fifo_underrun_reporting(dev_priv, pipe, true); - /* cdclk = vco / 2 / div{1,1.5,2,4} */ - switch (DIV_ROUND_CLOSEST(vco, cdclk)) { - case 8: - divider = BXT_CDCLK_CD2X_DIV_SEL_4; - break; - case 4: - divider = BXT_CDCLK_CD2X_DIV_SEL_2; - break; - case 3: - WARN(IS_GEMINILAKE(dev_priv), "Unsupported divider\n"); - divider = BXT_CDCLK_CD2X_DIV_SEL_1_5; - break; - case 2: - divider = BXT_CDCLK_CD2X_DIV_SEL_1; - break; - default: - WARN_ON(cdclk != dev_priv->cdclk_pll.ref); - WARN_ON(vco != 0); + intel_encoders_pre_enable(crtc, pipe_config, old_state); - divider = BXT_CDCLK_CD2X_DIV_SEL_1; - break; - } + i9xx_enable_pll(intel_crtc); - /* Inform power controller of upcoming frequency change */ - mutex_lock(&dev_priv->rps.hw_lock); - ret = sandybridge_pcode_write(dev_priv, HSW_PCODE_DE_WRITE_FREQ_REQ, - 0x80000000); - mutex_unlock(&dev_priv->rps.hw_lock); + i9xx_pfit_enable(intel_crtc); - if (ret) { - DRM_ERROR("PCode CDCLK freq change notify failed (err %d, freq %d)\n", - ret, cdclk); - return; - } + intel_color_load_luts(&pipe_config->base); - if (dev_priv->cdclk_pll.vco != 0 && - dev_priv->cdclk_pll.vco != vco) - bxt_de_pll_disable(dev_priv); + intel_update_watermarks(intel_crtc); + intel_enable_pipe(intel_crtc); - if (dev_priv->cdclk_pll.vco != vco) - bxt_de_pll_enable(dev_priv, vco); + assert_vblank_disabled(crtc); + drm_crtc_vblank_on(crtc); - val = divider | skl_cdclk_decimal(cdclk); - /* - * FIXME if only the cd2x divider needs changing, it could be done - * without shutting off the pipe (if only one pipe is active). - */ - val |= BXT_CDCLK_CD2X_PIPE_NONE; - /* - * Disable SSA Precharge when CD clock frequency < 500 MHz, - * enable otherwise. - */ - if (cdclk >= 500000) - val |= BXT_CDCLK_SSA_PRECHARGE_ENABLE; - I915_WRITE(CDCLK_CTL, val); + intel_encoders_enable(crtc, pipe_config, old_state); +} - mutex_lock(&dev_priv->rps.hw_lock); - ret = sandybridge_pcode_write(dev_priv, HSW_PCODE_DE_WRITE_FREQ_REQ, - DIV_ROUND_UP(cdclk, 25000)); - mutex_unlock(&dev_priv->rps.hw_lock); +static void i9xx_pfit_disable(struct intel_crtc *crtc) +{ + struct drm_device *dev = crtc->base.dev; + struct drm_i915_private *dev_priv = to_i915(dev); - if (ret) { - DRM_ERROR("PCode CDCLK freq set failed, (err %d, freq %d)\n", - ret, cdclk); + if (!crtc->config->gmch_pfit.control) return; - } - intel_update_cdclk(dev_priv); + assert_pipe_disabled(dev_priv, crtc->pipe); + + DRM_DEBUG_DRIVER("disabling pfit, current: 0x%08x\n", + I915_READ(PFIT_CONTROL)); + I915_WRITE(PFIT_CONTROL, 0); } -static void bxt_sanitize_cdclk(struct drm_i915_private *dev_priv) +static void i9xx_crtc_disable(struct intel_crtc_state *old_crtc_state, + struct drm_atomic_state *old_state) { - u32 cdctl, expected; - - intel_update_cdclk(dev_priv); - - if (dev_priv->cdclk_pll.vco == 0 || - dev_priv->cdclk_freq == dev_priv->cdclk_pll.ref) - goto sanitize; - - /* DPLL okay; verify the cdclock - * - * Some BIOS versions leave an incorrect decimal frequency value and - * set reserved MBZ bits in CDCLK_CTL at least during exiting from S4, - * so sanitize this register. - */ - cdctl = I915_READ(CDCLK_CTL); - /* - * Let's ignore the pipe field, since BIOS could have configured the - * dividers both synching to an active pipe, or asynchronously - * (PIPE_NONE). - */ - cdctl &= ~BXT_CDCLK_CD2X_PIPE_NONE; + struct drm_crtc *crtc = old_crtc_state->base.crtc; + struct drm_device *dev = crtc->dev; + struct drm_i915_private *dev_priv = to_i915(dev); + struct intel_crtc *intel_crtc = to_intel_crtc(crtc); + int pipe = intel_crtc->pipe; - expected = (cdctl & BXT_CDCLK_CD2X_DIV_SEL_MASK) | - skl_cdclk_decimal(dev_priv->cdclk_freq); /* - * Disable SSA Precharge when CD clock frequency < 500 MHz, - * enable otherwise. + * On gen2 planes are double buffered but the pipe isn't, so we must + * wait for planes to fully turn off before disabling the pipe. */ - if (dev_priv->cdclk_freq >= 500000) - expected |= BXT_CDCLK_SSA_PRECHARGE_ENABLE; - - if (cdctl == expected) - /* All well; nothing to sanitize */ - return; + if (IS_GEN2(dev_priv)) + intel_wait_for_vblank(dev_priv, pipe); -sanitize: - DRM_DEBUG_KMS("Sanitizing cdclk programmed by pre-os\n"); + intel_encoders_disable(crtc, old_crtc_state, old_state); - /* force cdclk programming */ - dev_priv->cdclk_freq = 0; + drm_crtc_vblank_off(crtc); + assert_vblank_disabled(crtc); - /* force full PLL disable + enable */ - dev_priv->cdclk_pll.vco = -1; -} + intel_disable_pipe(intel_crtc); -void bxt_init_cdclk(struct drm_i915_private *dev_priv) -{ - int cdclk; + i9xx_pfit_disable(intel_crtc); - bxt_sanitize_cdclk(dev_priv); + intel_encoders_post_disable(crtc, old_crtc_state, old_state); - if (dev_priv->cdclk_freq != 0 && dev_priv->cdclk_pll.vco != 0) - return; + if (!intel_crtc_has_type(intel_crtc->config, INTEL_OUTPUT_DSI)) { + if (IS_CHERRYVIEW(dev_priv)) + chv_disable_pll(dev_priv, pipe); + else if (IS_VALLEYVIEW(dev_priv)) + vlv_disable_pll(dev_priv, pipe); + else + i9xx_disable_pll(intel_crtc); + } - /* - * FIXME: - * - The initial CDCLK needs to be read from VBT. - * Need to make this change after VBT has changes for BXT. - */ - if (IS_GEMINILAKE(dev_priv)) - cdclk = glk_calc_cdclk(0); - else - cdclk = bxt_calc_cdclk(0); + intel_encoders_post_pll_disable(crtc, old_crtc_state, old_state); - bxt_set_cdclk(dev_priv, cdclk); + if (!IS_GEN2(dev_priv)) + intel_set_cpu_fifo_underrun_reporting(dev_priv, pipe, false); } -void bxt_uninit_cdclk(struct drm_i915_private *dev_priv) +static void intel_crtc_disable_noatomic(struct drm_crtc *crtc) { - bxt_set_cdclk(dev_priv, dev_priv->cdclk_pll.ref); -} + struct intel_encoder *encoder; + struct intel_crtc *intel_crtc = to_intel_crtc(crtc); + struct drm_i915_private *dev_priv = to_i915(crtc->dev); + enum intel_display_power_domain domain; + unsigned long domains; + struct drm_atomic_state *state; + struct intel_crtc_state *crtc_state; + int ret; -static int skl_calc_cdclk(int max_pixclk, int vco) -{ - if (vco == 8640000) { - if (max_pixclk > 540000) - return 617143; - else if (max_pixclk > 432000) - return 540000; - else if (max_pixclk > 308571) - return 432000; - else - return 308571; - } else { - if (max_pixclk > 540000) - return 675000; - else if (max_pixclk > 450000) - return 540000; - else if (max_pixclk > 337500) - return 450000; - else - return 337500; - } -} + if (!intel_crtc->active) + return; -static void -skl_dpll0_update(struct drm_i915_private *dev_priv) -{ - u32 val; + if (crtc->primary->state->visible) { + WARN_ON(intel_crtc->flip_work); - dev_priv->cdclk_pll.ref = 24000; - dev_priv->cdclk_pll.vco = 0; + intel_pre_disable_primary_noatomic(crtc); - val = I915_READ(LCPLL1_CTL); - if ((val & LCPLL_PLL_ENABLE) == 0) - return; + intel_crtc_disable_planes(crtc, 1 << drm_plane_index(crtc->primary)); + crtc->primary->state->visible = false; + } - if (WARN_ON((val & LCPLL_PLL_LOCK) == 0)) + state = drm_atomic_state_alloc(crtc->dev); + if (!state) { + DRM_DEBUG_KMS("failed to disable [CRTC:%d:%s], out of memory", + crtc->base.id, crtc->name); return; + } - val = I915_READ(DPLL_CTRL1); + state->acquire_ctx = crtc->dev->mode_config.acquire_ctx; - if (WARN_ON((val & (DPLL_CTRL1_HDMI_MODE(SKL_DPLL0) | - DPLL_CTRL1_SSC(SKL_DPLL0) | - DPLL_CTRL1_OVERRIDE(SKL_DPLL0))) != - DPLL_CTRL1_OVERRIDE(SKL_DPLL0))) - return; + /* Everything's already locked, -EDEADLK can't happen. */ + crtc_state = intel_atomic_get_crtc_state(state, intel_crtc); + ret = drm_atomic_add_affected_connectors(state, crtc); - switch (val & DPLL_CTRL1_LINK_RATE_MASK(SKL_DPLL0)) { - case DPLL_CTRL1_LINK_RATE(DPLL_CTRL1_LINK_RATE_810, SKL_DPLL0): - case DPLL_CTRL1_LINK_RATE(DPLL_CTRL1_LINK_RATE_1350, SKL_DPLL0): - case DPLL_CTRL1_LINK_RATE(DPLL_CTRL1_LINK_RATE_1620, SKL_DPLL0): - case DPLL_CTRL1_LINK_RATE(DPLL_CTRL1_LINK_RATE_2700, SKL_DPLL0): - dev_priv->cdclk_pll.vco = 8100000; - break; - case DPLL_CTRL1_LINK_RATE(DPLL_CTRL1_LINK_RATE_1080, SKL_DPLL0): - case DPLL_CTRL1_LINK_RATE(DPLL_CTRL1_LINK_RATE_2160, SKL_DPLL0): - dev_priv->cdclk_pll.vco = 8640000; - break; - default: - MISSING_CASE(val & DPLL_CTRL1_LINK_RATE_MASK(SKL_DPLL0)); - break; - } -} + WARN_ON(IS_ERR(crtc_state) || ret); -void skl_set_preferred_cdclk_vco(struct drm_i915_private *dev_priv, int vco) -{ - bool changed = dev_priv->skl_preferred_vco_freq != vco; - - dev_priv->skl_preferred_vco_freq = vco; - - if (changed) - intel_update_max_cdclk(dev_priv); -} - -static void -skl_dpll0_enable(struct drm_i915_private *dev_priv, int vco) -{ - int min_cdclk = skl_calc_cdclk(0, vco); - u32 val; - - WARN_ON(vco != 8100000 && vco != 8640000); - - /* select the minimum CDCLK before enabling DPLL 0 */ - val = CDCLK_FREQ_337_308 | skl_cdclk_decimal(min_cdclk); - I915_WRITE(CDCLK_CTL, val); - POSTING_READ(CDCLK_CTL); - - /* - * We always enable DPLL0 with the lowest link rate possible, but still - * taking into account the VCO required to operate the eDP panel at the - * desired frequency. The usual DP link rates operate with a VCO of - * 8100 while the eDP 1.4 alternate link rates need a VCO of 8640. - * The modeset code is responsible for the selection of the exact link - * rate later on, with the constraint of choosing a frequency that - * works with vco. - */ - val = I915_READ(DPLL_CTRL1); - - val &= ~(DPLL_CTRL1_HDMI_MODE(SKL_DPLL0) | DPLL_CTRL1_SSC(SKL_DPLL0) | - DPLL_CTRL1_LINK_RATE_MASK(SKL_DPLL0)); - val |= DPLL_CTRL1_OVERRIDE(SKL_DPLL0); - if (vco == 8640000) - val |= DPLL_CTRL1_LINK_RATE(DPLL_CTRL1_LINK_RATE_1080, - SKL_DPLL0); - else - val |= DPLL_CTRL1_LINK_RATE(DPLL_CTRL1_LINK_RATE_810, - SKL_DPLL0); - - I915_WRITE(DPLL_CTRL1, val); - POSTING_READ(DPLL_CTRL1); - - I915_WRITE(LCPLL1_CTL, I915_READ(LCPLL1_CTL) | LCPLL_PLL_ENABLE); - - if (intel_wait_for_register(dev_priv, - LCPLL1_CTL, LCPLL_PLL_LOCK, LCPLL_PLL_LOCK, - 5)) - DRM_ERROR("DPLL0 not locked\n"); - - dev_priv->cdclk_pll.vco = vco; - - /* We'll want to keep using the current vco from now on. */ - skl_set_preferred_cdclk_vco(dev_priv, vco); -} - -static void -skl_dpll0_disable(struct drm_i915_private *dev_priv) -{ - I915_WRITE(LCPLL1_CTL, I915_READ(LCPLL1_CTL) & ~LCPLL_PLL_ENABLE); - if (intel_wait_for_register(dev_priv, - LCPLL1_CTL, LCPLL_PLL_LOCK, 0, - 1)) - DRM_ERROR("Couldn't disable DPLL0\n"); - - dev_priv->cdclk_pll.vco = 0; -} - -static void skl_set_cdclk(struct drm_i915_private *dev_priv, int cdclk, int vco) -{ - u32 freq_select, pcu_ack; - int ret; - - WARN_ON((cdclk == 24000) != (vco == 0)); - - DRM_DEBUG_DRIVER("Changing CDCLK to %d kHz (VCO %d kHz)\n", cdclk, vco); - - mutex_lock(&dev_priv->rps.hw_lock); - ret = skl_pcode_request(dev_priv, SKL_PCODE_CDCLK_CONTROL, - SKL_CDCLK_PREPARE_FOR_CHANGE, - SKL_CDCLK_READY_FOR_CHANGE, - SKL_CDCLK_READY_FOR_CHANGE, 3); - mutex_unlock(&dev_priv->rps.hw_lock); - if (ret) { - DRM_ERROR("Failed to inform PCU about cdclk change (%d)\n", - ret); - return; - } - - /* set CDCLK_CTL */ - switch (cdclk) { - case 450000: - case 432000: - freq_select = CDCLK_FREQ_450_432; - pcu_ack = 1; - break; - case 540000: - freq_select = CDCLK_FREQ_540; - pcu_ack = 2; - break; - case 308571: - case 337500: - default: - freq_select = CDCLK_FREQ_337_308; - pcu_ack = 0; - break; - case 617143: - case 675000: - freq_select = CDCLK_FREQ_675_617; - pcu_ack = 3; - break; - } - - if (dev_priv->cdclk_pll.vco != 0 && - dev_priv->cdclk_pll.vco != vco) - skl_dpll0_disable(dev_priv); - - if (dev_priv->cdclk_pll.vco != vco) - skl_dpll0_enable(dev_priv, vco); - - I915_WRITE(CDCLK_CTL, freq_select | skl_cdclk_decimal(cdclk)); - POSTING_READ(CDCLK_CTL); - - /* inform PCU of the change */ - mutex_lock(&dev_priv->rps.hw_lock); - sandybridge_pcode_write(dev_priv, SKL_PCODE_CDCLK_CONTROL, pcu_ack); - mutex_unlock(&dev_priv->rps.hw_lock); - - intel_update_cdclk(dev_priv); -} - -static void skl_sanitize_cdclk(struct drm_i915_private *dev_priv); - -void skl_uninit_cdclk(struct drm_i915_private *dev_priv) -{ - skl_set_cdclk(dev_priv, dev_priv->cdclk_pll.ref, 0); -} - -void skl_init_cdclk(struct drm_i915_private *dev_priv) -{ - int cdclk, vco; - - skl_sanitize_cdclk(dev_priv); - - if (dev_priv->cdclk_freq != 0 && dev_priv->cdclk_pll.vco != 0) { - /* - * Use the current vco as our initial - * guess as to what the preferred vco is. - */ - if (dev_priv->skl_preferred_vco_freq == 0) - skl_set_preferred_cdclk_vco(dev_priv, - dev_priv->cdclk_pll.vco); - return; - } - - vco = dev_priv->skl_preferred_vco_freq; - if (vco == 0) - vco = 8100000; - cdclk = skl_calc_cdclk(0, vco); - - skl_set_cdclk(dev_priv, cdclk, vco); -} - -static void skl_sanitize_cdclk(struct drm_i915_private *dev_priv) -{ - uint32_t cdctl, expected; - - /* - * check if the pre-os intialized the display - * There is SWF18 scratchpad register defined which is set by the - * pre-os which can be used by the OS drivers to check the status - */ - if ((I915_READ(SWF_ILK(0x18)) & 0x00FFFFFF) == 0) - goto sanitize; - - intel_update_cdclk(dev_priv); - /* Is PLL enabled and locked ? */ - if (dev_priv->cdclk_pll.vco == 0 || - dev_priv->cdclk_freq == dev_priv->cdclk_pll.ref) - goto sanitize; - - /* DPLL okay; verify the cdclock - * - * Noticed in some instances that the freq selection is correct but - * decimal part is programmed wrong from BIOS where pre-os does not - * enable display. Verify the same as well. - */ - cdctl = I915_READ(CDCLK_CTL); - expected = (cdctl & CDCLK_FREQ_SEL_MASK) | - skl_cdclk_decimal(dev_priv->cdclk_freq); - if (cdctl == expected) - /* All well; nothing to sanitize */ - return; - -sanitize: - DRM_DEBUG_KMS("Sanitizing cdclk programmed by pre-os\n"); - - /* force cdclk programming */ - dev_priv->cdclk_freq = 0; - /* force full PLL disable + enable */ - dev_priv->cdclk_pll.vco = -1; -} - -/* Adjust CDclk dividers to allow high res or save power if possible */ -static void valleyview_set_cdclk(struct drm_device *dev, int cdclk) -{ - struct drm_i915_private *dev_priv = to_i915(dev); - u32 val, cmd; - - WARN_ON(dev_priv->display.get_cdclk(dev_priv) != dev_priv->cdclk_freq); - - if (cdclk >= 320000) /* jump to highest voltage for 400MHz too */ - cmd = 2; - else if (cdclk == 266667) - cmd = 1; - else - cmd = 0; - - mutex_lock(&dev_priv->rps.hw_lock); - val = vlv_punit_read(dev_priv, PUNIT_REG_DSPFREQ); - val &= ~DSPFREQGUAR_MASK; - val |= (cmd << DSPFREQGUAR_SHIFT); - vlv_punit_write(dev_priv, PUNIT_REG_DSPFREQ, val); - if (wait_for((vlv_punit_read(dev_priv, PUNIT_REG_DSPFREQ) & - DSPFREQSTAT_MASK) == (cmd << DSPFREQSTAT_SHIFT), - 50)) { - DRM_ERROR("timed out waiting for CDclk change\n"); - } - mutex_unlock(&dev_priv->rps.hw_lock); - - mutex_lock(&dev_priv->sb_lock); - - if (cdclk == 400000) { - u32 divider; - - divider = DIV_ROUND_CLOSEST(dev_priv->hpll_freq << 1, cdclk) - 1; - - /* adjust cdclk divider */ - val = vlv_cck_read(dev_priv, CCK_DISPLAY_CLOCK_CONTROL); - val &= ~CCK_FREQUENCY_VALUES; - val |= divider; - vlv_cck_write(dev_priv, CCK_DISPLAY_CLOCK_CONTROL, val); - - if (wait_for((vlv_cck_read(dev_priv, CCK_DISPLAY_CLOCK_CONTROL) & - CCK_FREQUENCY_STATUS) == (divider << CCK_FREQUENCY_STATUS_SHIFT), - 50)) - DRM_ERROR("timed out waiting for CDclk change\n"); - } - - /* adjust self-refresh exit latency value */ - val = vlv_bunit_read(dev_priv, BUNIT_REG_BISOC); - val &= ~0x7f; - - /* - * For high bandwidth configs, we set a higher latency in the bunit - * so that the core display fetch happens in time to avoid underruns. - */ - if (cdclk == 400000) - val |= 4500 / 250; /* 4.5 usec */ - else - val |= 3000 / 250; /* 3.0 usec */ - vlv_bunit_write(dev_priv, BUNIT_REG_BISOC, val); - - mutex_unlock(&dev_priv->sb_lock); - - intel_update_cdclk(dev_priv); -} - -static void cherryview_set_cdclk(struct drm_device *dev, int cdclk) -{ - struct drm_i915_private *dev_priv = to_i915(dev); - u32 val, cmd; - - WARN_ON(dev_priv->display.get_cdclk(dev_priv) != dev_priv->cdclk_freq); - - switch (cdclk) { - case 333333: - case 320000: - case 266667: - case 200000: - break; - default: - MISSING_CASE(cdclk); - return; - } - - /* - * Specs are full of misinformation, but testing on actual - * hardware has shown that we just need to write the desired - * CCK divider into the Punit register. - */ - cmd = DIV_ROUND_CLOSEST(dev_priv->hpll_freq << 1, cdclk) - 1; - - mutex_lock(&dev_priv->rps.hw_lock); - val = vlv_punit_read(dev_priv, PUNIT_REG_DSPFREQ); - val &= ~DSPFREQGUAR_MASK_CHV; - val |= (cmd << DSPFREQGUAR_SHIFT_CHV); - vlv_punit_write(dev_priv, PUNIT_REG_DSPFREQ, val); - if (wait_for((vlv_punit_read(dev_priv, PUNIT_REG_DSPFREQ) & - DSPFREQSTAT_MASK_CHV) == (cmd << DSPFREQSTAT_SHIFT_CHV), - 50)) { - DRM_ERROR("timed out waiting for CDclk change\n"); - } - mutex_unlock(&dev_priv->rps.hw_lock); - - intel_update_cdclk(dev_priv); -} - -static int valleyview_calc_cdclk(struct drm_i915_private *dev_priv, - int max_pixclk) -{ - int freq_320 = (dev_priv->hpll_freq << 1) % 320000 != 0 ? 333333 : 320000; - int limit = IS_CHERRYVIEW(dev_priv) ? 95 : 90; - - /* - * Really only a few cases to deal with, as only 4 CDclks are supported: - * 200MHz - * 267MHz - * 320/333MHz (depends on HPLL freq) - * 400MHz (VLV only) - * So we check to see whether we're above 90% (VLV) or 95% (CHV) - * of the lower bin and adjust if needed. - * - * We seem to get an unstable or solid color picture at 200MHz. - * Not sure what's wrong. For now use 200MHz only when all pipes - * are off. - */ - if (!IS_CHERRYVIEW(dev_priv) && - max_pixclk > freq_320*limit/100) - return 400000; - else if (max_pixclk > 266667*limit/100) - return freq_320; - else if (max_pixclk > 0) - return 266667; - else - return 200000; -} - -static int glk_calc_cdclk(int max_pixclk) -{ - if (max_pixclk > 2 * 158400) - return 316800; - else if (max_pixclk > 2 * 79200) - return 158400; - else - return 79200; -} - -static int bxt_calc_cdclk(int max_pixclk) -{ - if (max_pixclk > 576000) - return 624000; - else if (max_pixclk > 384000) - return 576000; - else if (max_pixclk > 288000) - return 384000; - else if (max_pixclk > 144000) - return 288000; - else - return 144000; -} - -static int valleyview_modeset_calc_cdclk(struct drm_atomic_state *state) -{ - struct drm_device *dev = state->dev; - struct drm_i915_private *dev_priv = to_i915(dev); - int max_pixclk = intel_max_pixel_rate(state); - struct intel_atomic_state *intel_state = - to_intel_atomic_state(state); - - intel_state->cdclk = intel_state->dev_cdclk = - valleyview_calc_cdclk(dev_priv, max_pixclk); - - if (!intel_state->active_crtcs) - intel_state->dev_cdclk = valleyview_calc_cdclk(dev_priv, 0); - - return 0; -} - -static int bxt_modeset_calc_cdclk(struct drm_atomic_state *state) -{ - struct drm_i915_private *dev_priv = to_i915(state->dev); - int max_pixclk = intel_max_pixel_rate(state); - struct intel_atomic_state *intel_state = - to_intel_atomic_state(state); - int cdclk; - - if (IS_GEMINILAKE(dev_priv)) - cdclk = glk_calc_cdclk(max_pixclk); - else - cdclk = bxt_calc_cdclk(max_pixclk); - - intel_state->cdclk = intel_state->dev_cdclk = cdclk; - - if (!intel_state->active_crtcs) { - if (IS_GEMINILAKE(dev_priv)) - cdclk = glk_calc_cdclk(0); - else - cdclk = bxt_calc_cdclk(0); - - intel_state->dev_cdclk = cdclk; - } - - return 0; -} - -static void vlv_program_pfi_credits(struct drm_i915_private *dev_priv) -{ - unsigned int credits, default_credits; - - if (IS_CHERRYVIEW(dev_priv)) - default_credits = PFI_CREDIT(12); - else - default_credits = PFI_CREDIT(8); - - if (dev_priv->cdclk_freq >= dev_priv->czclk_freq) { - /* CHV suggested value is 31 or 63 */ - if (IS_CHERRYVIEW(dev_priv)) - credits = PFI_CREDIT_63; - else - credits = PFI_CREDIT(15); - } else { - credits = default_credits; - } - - /* - * WA - write default credits before re-programming - * FIXME: should we also set the resend bit here? - */ - I915_WRITE(GCI_CONTROL, VGA_FAST_MODE_DISABLE | - default_credits); - - I915_WRITE(GCI_CONTROL, VGA_FAST_MODE_DISABLE | - credits | PFI_CREDIT_RESEND); - - /* - * FIXME is this guaranteed to clear - * immediately or should we poll for it? - */ - WARN_ON(I915_READ(GCI_CONTROL) & PFI_CREDIT_RESEND); -} - -static void valleyview_modeset_commit_cdclk(struct drm_atomic_state *old_state) -{ - struct drm_device *dev = old_state->dev; - struct drm_i915_private *dev_priv = to_i915(dev); - struct intel_atomic_state *old_intel_state = - to_intel_atomic_state(old_state); - unsigned req_cdclk = old_intel_state->dev_cdclk; - - /* - * FIXME: We can end up here with all power domains off, yet - * with a CDCLK frequency other than the minimum. To account - * for this take the PIPE-A power domain, which covers the HW - * blocks needed for the following programming. This can be - * removed once it's guaranteed that we get here either with - * the minimum CDCLK set, or the required power domains - * enabled. - */ - intel_display_power_get(dev_priv, POWER_DOMAIN_PIPE_A); - - if (IS_CHERRYVIEW(dev_priv)) - cherryview_set_cdclk(dev, req_cdclk); - else - valleyview_set_cdclk(dev, req_cdclk); - - vlv_program_pfi_credits(dev_priv); - - intel_display_power_put(dev_priv, POWER_DOMAIN_PIPE_A); -} - -static void valleyview_crtc_enable(struct intel_crtc_state *pipe_config, - struct drm_atomic_state *old_state) -{ - struct drm_crtc *crtc = pipe_config->base.crtc; - struct drm_device *dev = crtc->dev; - struct drm_i915_private *dev_priv = to_i915(dev); - struct intel_crtc *intel_crtc = to_intel_crtc(crtc); - int pipe = intel_crtc->pipe; - - if (WARN_ON(intel_crtc->active)) - return; - - if (intel_crtc_has_dp_encoder(intel_crtc->config)) - intel_dp_set_m_n(intel_crtc, M1_N1); - - intel_set_pipe_timings(intel_crtc); - intel_set_pipe_src_size(intel_crtc); - - if (IS_CHERRYVIEW(dev_priv) && pipe == PIPE_B) { - struct drm_i915_private *dev_priv = to_i915(dev); - - I915_WRITE(CHV_BLEND(pipe), CHV_BLEND_LEGACY); - I915_WRITE(CHV_CANVAS(pipe), 0); - } - - i9xx_set_pipeconf(intel_crtc); - - intel_crtc->active = true; - - intel_set_cpu_fifo_underrun_reporting(dev_priv, pipe, true); - - intel_encoders_pre_pll_enable(crtc, pipe_config, old_state); - - if (IS_CHERRYVIEW(dev_priv)) { - chv_prepare_pll(intel_crtc, intel_crtc->config); - chv_enable_pll(intel_crtc, intel_crtc->config); - } else { - vlv_prepare_pll(intel_crtc, intel_crtc->config); - vlv_enable_pll(intel_crtc, intel_crtc->config); - } - - intel_encoders_pre_enable(crtc, pipe_config, old_state); - - i9xx_pfit_enable(intel_crtc); - - intel_color_load_luts(&pipe_config->base); - - intel_update_watermarks(intel_crtc); - intel_enable_pipe(intel_crtc); - - assert_vblank_disabled(crtc); - drm_crtc_vblank_on(crtc); - - intel_encoders_enable(crtc, pipe_config, old_state); -} - -static void i9xx_set_pll_dividers(struct intel_crtc *crtc) -{ - struct drm_device *dev = crtc->base.dev; - struct drm_i915_private *dev_priv = to_i915(dev); - - I915_WRITE(FP0(crtc->pipe), crtc->config->dpll_hw_state.fp0); - I915_WRITE(FP1(crtc->pipe), crtc->config->dpll_hw_state.fp1); -} - -static void i9xx_crtc_enable(struct intel_crtc_state *pipe_config, - struct drm_atomic_state *old_state) -{ - struct drm_crtc *crtc = pipe_config->base.crtc; - struct drm_device *dev = crtc->dev; - struct drm_i915_private *dev_priv = to_i915(dev); - struct intel_crtc *intel_crtc = to_intel_crtc(crtc); - enum pipe pipe = intel_crtc->pipe; - - if (WARN_ON(intel_crtc->active)) - return; - - i9xx_set_pll_dividers(intel_crtc); - - if (intel_crtc_has_dp_encoder(intel_crtc->config)) - intel_dp_set_m_n(intel_crtc, M1_N1); - - intel_set_pipe_timings(intel_crtc); - intel_set_pipe_src_size(intel_crtc); - - i9xx_set_pipeconf(intel_crtc); - - intel_crtc->active = true; - - if (!IS_GEN2(dev_priv)) - intel_set_cpu_fifo_underrun_reporting(dev_priv, pipe, true); - - intel_encoders_pre_enable(crtc, pipe_config, old_state); - - i9xx_enable_pll(intel_crtc); - - i9xx_pfit_enable(intel_crtc); - - intel_color_load_luts(&pipe_config->base); - - intel_update_watermarks(intel_crtc); - intel_enable_pipe(intel_crtc); - - assert_vblank_disabled(crtc); - drm_crtc_vblank_on(crtc); - - intel_encoders_enable(crtc, pipe_config, old_state); -} - -static void i9xx_pfit_disable(struct intel_crtc *crtc) -{ - struct drm_device *dev = crtc->base.dev; - struct drm_i915_private *dev_priv = to_i915(dev); - - if (!crtc->config->gmch_pfit.control) - return; - - assert_pipe_disabled(dev_priv, crtc->pipe); - - DRM_DEBUG_DRIVER("disabling pfit, current: 0x%08x\n", - I915_READ(PFIT_CONTROL)); - I915_WRITE(PFIT_CONTROL, 0); -} - -static void i9xx_crtc_disable(struct intel_crtc_state *old_crtc_state, - struct drm_atomic_state *old_state) -{ - struct drm_crtc *crtc = old_crtc_state->base.crtc; - struct drm_device *dev = crtc->dev; - struct drm_i915_private *dev_priv = to_i915(dev); - struct intel_crtc *intel_crtc = to_intel_crtc(crtc); - int pipe = intel_crtc->pipe; - - /* - * On gen2 planes are double buffered but the pipe isn't, so we must - * wait for planes to fully turn off before disabling the pipe. - */ - if (IS_GEN2(dev_priv)) - intel_wait_for_vblank(dev_priv, pipe); - - intel_encoders_disable(crtc, old_crtc_state, old_state); - - drm_crtc_vblank_off(crtc); - assert_vblank_disabled(crtc); - - intel_disable_pipe(intel_crtc); - - i9xx_pfit_disable(intel_crtc); - - intel_encoders_post_disable(crtc, old_crtc_state, old_state); - - if (!intel_crtc_has_type(intel_crtc->config, INTEL_OUTPUT_DSI)) { - if (IS_CHERRYVIEW(dev_priv)) - chv_disable_pll(dev_priv, pipe); - else if (IS_VALLEYVIEW(dev_priv)) - vlv_disable_pll(dev_priv, pipe); - else - i9xx_disable_pll(intel_crtc); - } - - intel_encoders_post_pll_disable(crtc, old_crtc_state, old_state); - - if (!IS_GEN2(dev_priv)) - intel_set_cpu_fifo_underrun_reporting(dev_priv, pipe, false); -} - -static void intel_crtc_disable_noatomic(struct drm_crtc *crtc) -{ - struct intel_encoder *encoder; - struct intel_crtc *intel_crtc = to_intel_crtc(crtc); - struct drm_i915_private *dev_priv = to_i915(crtc->dev); - enum intel_display_power_domain domain; - unsigned long domains; - struct drm_atomic_state *state; - struct intel_crtc_state *crtc_state; - int ret; - - if (!intel_crtc->active) - return; - - if (crtc->primary->state->visible) { - WARN_ON(intel_crtc->flip_work); - - intel_pre_disable_primary_noatomic(crtc); - - intel_crtc_disable_planes(crtc, 1 << drm_plane_index(crtc->primary)); - crtc->primary->state->visible = false; - } - - state = drm_atomic_state_alloc(crtc->dev); - if (!state) { - DRM_DEBUG_KMS("failed to disable [CRTC:%d:%s], out of memory", - crtc->base.id, crtc->name); - return; - } - - state->acquire_ctx = crtc->dev->mode_config.acquire_ctx; - - /* Everything's already locked, -EDEADLK can't happen. */ - crtc_state = intel_atomic_get_crtc_state(state, intel_crtc); - ret = drm_atomic_add_affected_connectors(state, crtc); - - WARN_ON(IS_ERR(crtc_state) || ret); - - dev_priv->display.crtc_disable(crtc_state, state); + dev_priv->display.crtc_disable(crtc_state, state); drm_atomic_state_put(state); @@ -7130,563 +6188,121 @@ retry: goto retry; } - if (needs_recompute) - return RETRY; - - return ret; -} - -static bool pipe_config_supports_ips(struct drm_i915_private *dev_priv, - struct intel_crtc_state *pipe_config) -{ - if (pipe_config->pipe_bpp > 24) - return false; - - /* HSW can handle pixel rate up to cdclk? */ - if (IS_HASWELL(dev_priv)) - return true; - - /* - * We compare against max which means we must take - * the increased cdclk requirement into account when - * calculating the new cdclk. - * - * Should measure whether using a lower cdclk w/o IPS - */ - return pipe_config->pixel_rate <= - dev_priv->max_cdclk_freq * 95 / 100; -} - -static void hsw_compute_ips_config(struct intel_crtc *crtc, - struct intel_crtc_state *pipe_config) -{ - struct drm_device *dev = crtc->base.dev; - struct drm_i915_private *dev_priv = to_i915(dev); - - pipe_config->ips_enabled = i915.enable_ips && - hsw_crtc_supports_ips(crtc) && - pipe_config_supports_ips(dev_priv, pipe_config); -} - -static bool intel_crtc_supports_double_wide(const struct intel_crtc *crtc) -{ - const struct drm_i915_private *dev_priv = to_i915(crtc->base.dev); - - /* GDG double wide on either pipe, otherwise pipe A only */ - return INTEL_INFO(dev_priv)->gen < 4 && - (crtc->pipe == PIPE_A || IS_I915G(dev_priv)); -} - -static void intel_crtc_compute_pixel_rate(struct intel_crtc_state *crtc_state) -{ - struct drm_i915_private *dev_priv = to_i915(crtc_state->base.crtc->dev); - - if (HAS_GMCH_DISPLAY(dev_priv)) - /* FIXME calculate proper pipe pixel rate for GMCH pfit */ - crtc_state->pixel_rate = - crtc_state->base.adjusted_mode.crtc_clock; - else - crtc_state->pixel_rate = - ilk_pipe_pixel_rate(crtc_state); -} - -static int intel_crtc_compute_config(struct intel_crtc *crtc, - struct intel_crtc_state *pipe_config) -{ - struct drm_device *dev = crtc->base.dev; - struct drm_i915_private *dev_priv = to_i915(dev); - const struct drm_display_mode *adjusted_mode = &pipe_config->base.adjusted_mode; - int clock_limit = dev_priv->max_dotclk_freq; - - if (INTEL_GEN(dev_priv) < 4) { - clock_limit = dev_priv->max_cdclk_freq * 9 / 10; - - /* - * Enable double wide mode when the dot clock - * is > 90% of the (display) core speed. - */ - if (intel_crtc_supports_double_wide(crtc) && - adjusted_mode->crtc_clock > clock_limit) { - clock_limit = dev_priv->max_dotclk_freq; - pipe_config->double_wide = true; - } - } - - if (adjusted_mode->crtc_clock > clock_limit) { - DRM_DEBUG_KMS("requested pixel clock (%d kHz) too high (max: %d kHz, double wide: %s)\n", - adjusted_mode->crtc_clock, clock_limit, - yesno(pipe_config->double_wide)); - return -EINVAL; - } - - /* - * Pipe horizontal size must be even in: - * - DVO ganged mode - * - LVDS dual channel mode - * - Double wide pipe - */ - if ((intel_crtc_has_type(pipe_config, INTEL_OUTPUT_LVDS) && - intel_is_dual_link_lvds(dev)) || pipe_config->double_wide) - pipe_config->pipe_src_w &= ~1; - - /* Cantiga+ cannot handle modes with a hsync front porch of 0. - * WaPruneModeWithIncorrectHsyncOffset:ctg,elk,ilk,snb,ivb,vlv,hsw. - */ - if ((INTEL_GEN(dev_priv) > 4 || IS_G4X(dev_priv)) && - adjusted_mode->crtc_hsync_start == adjusted_mode->crtc_hdisplay) - return -EINVAL; - - intel_crtc_compute_pixel_rate(pipe_config); - - if (HAS_IPS(dev_priv)) - hsw_compute_ips_config(crtc, pipe_config); - - if (pipe_config->has_pch_encoder) - return ironlake_fdi_compute_config(crtc, pipe_config); - - return 0; -} - -static int skylake_get_cdclk(struct drm_i915_private *dev_priv) -{ - u32 cdctl; - - skl_dpll0_update(dev_priv); - - if (dev_priv->cdclk_pll.vco == 0) - return dev_priv->cdclk_pll.ref; - - cdctl = I915_READ(CDCLK_CTL); - - if (dev_priv->cdclk_pll.vco == 8640000) { - switch (cdctl & CDCLK_FREQ_SEL_MASK) { - case CDCLK_FREQ_450_432: - return 432000; - case CDCLK_FREQ_337_308: - return 308571; - case CDCLK_FREQ_540: - return 540000; - case CDCLK_FREQ_675_617: - return 617143; - default: - MISSING_CASE(cdctl & CDCLK_FREQ_SEL_MASK); - } - } else { - switch (cdctl & CDCLK_FREQ_SEL_MASK) { - case CDCLK_FREQ_450_432: - return 450000; - case CDCLK_FREQ_337_308: - return 337500; - case CDCLK_FREQ_540: - return 540000; - case CDCLK_FREQ_675_617: - return 675000; - default: - MISSING_CASE(cdctl & CDCLK_FREQ_SEL_MASK); - } - } - - return dev_priv->cdclk_pll.ref; -} - -static void bxt_de_pll_update(struct drm_i915_private *dev_priv) -{ - u32 val; - - dev_priv->cdclk_pll.ref = 19200; - dev_priv->cdclk_pll.vco = 0; - - val = I915_READ(BXT_DE_PLL_ENABLE); - if ((val & BXT_DE_PLL_PLL_ENABLE) == 0) - return; - - if (WARN_ON((val & BXT_DE_PLL_LOCK) == 0)) - return; - - val = I915_READ(BXT_DE_PLL_CTL); - dev_priv->cdclk_pll.vco = (val & BXT_DE_PLL_RATIO_MASK) * - dev_priv->cdclk_pll.ref; -} - -static int broxton_get_cdclk(struct drm_i915_private *dev_priv) -{ - u32 divider; - int div, vco; - - bxt_de_pll_update(dev_priv); - - vco = dev_priv->cdclk_pll.vco; - if (vco == 0) - return dev_priv->cdclk_pll.ref; - - divider = I915_READ(CDCLK_CTL) & BXT_CDCLK_CD2X_DIV_SEL_MASK; - - switch (divider) { - case BXT_CDCLK_CD2X_DIV_SEL_1: - div = 2; - break; - case BXT_CDCLK_CD2X_DIV_SEL_1_5: - WARN(IS_GEMINILAKE(dev_priv), "Unsupported divider\n"); - div = 3; - break; - case BXT_CDCLK_CD2X_DIV_SEL_2: - div = 4; - break; - case BXT_CDCLK_CD2X_DIV_SEL_4: - div = 8; - break; - default: - MISSING_CASE(divider); - return dev_priv->cdclk_pll.ref; - } - - return DIV_ROUND_CLOSEST(vco, div); -} - -static int broadwell_get_cdclk(struct drm_i915_private *dev_priv) -{ - uint32_t lcpll = I915_READ(LCPLL_CTL); - uint32_t freq = lcpll & LCPLL_CLK_FREQ_MASK; - - if (lcpll & LCPLL_CD_SOURCE_FCLK) - return 800000; - else if (I915_READ(FUSE_STRAP) & HSW_CDCLK_LIMIT) - return 450000; - else if (freq == LCPLL_CLK_FREQ_450) - return 450000; - else if (freq == LCPLL_CLK_FREQ_54O_BDW) - return 540000; - else if (freq == LCPLL_CLK_FREQ_337_5_BDW) - return 337500; - else - return 675000; -} - -static int haswell_get_cdclk(struct drm_i915_private *dev_priv) -{ - uint32_t lcpll = I915_READ(LCPLL_CTL); - uint32_t freq = lcpll & LCPLL_CLK_FREQ_MASK; - - if (lcpll & LCPLL_CD_SOURCE_FCLK) - return 800000; - else if (I915_READ(FUSE_STRAP) & HSW_CDCLK_LIMIT) - return 450000; - else if (freq == LCPLL_CLK_FREQ_450) - return 450000; - else if (IS_HSW_ULT(dev_priv)) - return 337500; - else - return 540000; -} - -static int valleyview_get_cdclk(struct drm_i915_private *dev_priv) -{ - return vlv_get_cck_clock_hpll(dev_priv, "cdclk", - CCK_DISPLAY_CLOCK_CONTROL); -} - -static int fixed_450mhz_get_cdclk(struct drm_i915_private *dev_priv) -{ - return 450000; -} - -static int fixed_400mhz_get_cdclk(struct drm_i915_private *dev_priv) -{ - return 400000; -} - -static int i945gm_get_cdclk(struct drm_i915_private *dev_priv) -{ - struct pci_dev *pdev = dev_priv->drm.pdev; - u16 gcfgc = 0; - - pci_read_config_word(pdev, GCFGC, &gcfgc); - - if (gcfgc & GC_LOW_FREQUENCY_ENABLE) - return 133333; - else { - switch (gcfgc & GC_DISPLAY_CLOCK_MASK) { - case GC_DISPLAY_CLOCK_333_320_MHZ: - return 320000; - default: - case GC_DISPLAY_CLOCK_190_200_MHZ: - return 200000; - } - } -} - -static int fixed_333mhz_get_cdclk(struct drm_i915_private *dev_priv) -{ - return 333333; -} - -static int fixed_200mhz_get_cdclk(struct drm_i915_private *dev_priv) -{ - return 200000; -} - -static int pnv_get_cdclk(struct drm_i915_private *dev_priv) -{ - struct pci_dev *pdev = dev_priv->drm.pdev; - u16 gcfgc = 0; - - pci_read_config_word(pdev, GCFGC, &gcfgc); - - switch (gcfgc & GC_DISPLAY_CLOCK_MASK) { - case GC_DISPLAY_CLOCK_267_MHZ_PNV: - return 266667; - case GC_DISPLAY_CLOCK_333_MHZ_PNV: - return 333333; - case GC_DISPLAY_CLOCK_444_MHZ_PNV: - return 444444; - case GC_DISPLAY_CLOCK_200_MHZ_PNV: - return 200000; - default: - DRM_ERROR("Unknown pnv display core clock 0x%04x\n", gcfgc); - case GC_DISPLAY_CLOCK_133_MHZ_PNV: - return 133333; - case GC_DISPLAY_CLOCK_167_MHZ_PNV: - return 166667; - } -} - -static int i915gm_get_cdclk(struct drm_i915_private *dev_priv) -{ - struct pci_dev *pdev = dev_priv->drm.pdev; - u16 gcfgc = 0; - - pci_read_config_word(pdev, GCFGC, &gcfgc); - - if (gcfgc & GC_LOW_FREQUENCY_ENABLE) - return 133333; - else { - switch (gcfgc & GC_DISPLAY_CLOCK_MASK) { - case GC_DISPLAY_CLOCK_333_320_MHZ: - return 333333; - default: - case GC_DISPLAY_CLOCK_190_200_MHZ: - return 190000; - } - } -} - -static int fixed_266mhz_get_cdclk(struct drm_i915_private *dev_priv) -{ - return 266667; + if (needs_recompute) + return RETRY; + + return ret; } -static int i85x_get_cdclk(struct drm_i915_private *dev_priv) +static bool pipe_config_supports_ips(struct drm_i915_private *dev_priv, + struct intel_crtc_state *pipe_config) { - struct pci_dev *pdev = dev_priv->drm.pdev; - u16 hpllcc = 0; - - /* - * 852GM/852GMV only supports 133 MHz and the HPLLCC - * encoding is different :( - * FIXME is this the right way to detect 852GM/852GMV? - */ - if (pdev->revision == 0x1) - return 133333; + if (pipe_config->pipe_bpp > 24) + return false; - pci_bus_read_config_word(pdev->bus, - PCI_DEVFN(0, 3), HPLLCC, &hpllcc); + /* HSW can handle pixel rate up to cdclk? */ + if (IS_HASWELL(dev_priv)) + return true; - /* Assume that the hardware is in the high speed state. This - * should be the default. + /* + * We compare against max which means we must take + * the increased cdclk requirement into account when + * calculating the new cdclk. + * + * Should measure whether using a lower cdclk w/o IPS */ - switch (hpllcc & GC_CLOCK_CONTROL_MASK) { - case GC_CLOCK_133_200: - case GC_CLOCK_133_200_2: - case GC_CLOCK_100_200: - return 200000; - case GC_CLOCK_166_250: - return 250000; - case GC_CLOCK_100_133: - return 133333; - case GC_CLOCK_133_266: - case GC_CLOCK_133_266_2: - case GC_CLOCK_166_266: - return 266667; - } - - /* Shouldn't happen */ - return 0; + return pipe_config->pixel_rate <= + dev_priv->max_cdclk_freq * 95 / 100; } -static int fixed_133mhz_get_cdclk(struct drm_i915_private *dev_priv) +static void hsw_compute_ips_config(struct intel_crtc *crtc, + struct intel_crtc_state *pipe_config) { - return 133333; + struct drm_device *dev = crtc->base.dev; + struct drm_i915_private *dev_priv = to_i915(dev); + + pipe_config->ips_enabled = i915.enable_ips && + hsw_crtc_supports_ips(crtc) && + pipe_config_supports_ips(dev_priv, pipe_config); } -static unsigned int intel_hpll_vco(struct drm_i915_private *dev_priv) +static bool intel_crtc_supports_double_wide(const struct intel_crtc *crtc) { - static const unsigned int blb_vco[8] = { - [0] = 3200000, - [1] = 4000000, - [2] = 5333333, - [3] = 4800000, - [4] = 6400000, - }; - static const unsigned int pnv_vco[8] = { - [0] = 3200000, - [1] = 4000000, - [2] = 5333333, - [3] = 4800000, - [4] = 2666667, - }; - static const unsigned int cl_vco[8] = { - [0] = 3200000, - [1] = 4000000, - [2] = 5333333, - [3] = 6400000, - [4] = 3333333, - [5] = 3566667, - [6] = 4266667, - }; - static const unsigned int elk_vco[8] = { - [0] = 3200000, - [1] = 4000000, - [2] = 5333333, - [3] = 4800000, - }; - static const unsigned int ctg_vco[8] = { - [0] = 3200000, - [1] = 4000000, - [2] = 5333333, - [3] = 6400000, - [4] = 2666667, - [5] = 4266667, - }; - const unsigned int *vco_table; - unsigned int vco; - uint8_t tmp = 0; - - /* FIXME other chipsets? */ - if (IS_GM45(dev_priv)) - vco_table = ctg_vco; - else if (IS_G4X(dev_priv)) - vco_table = elk_vco; - else if (IS_I965GM(dev_priv)) - vco_table = cl_vco; - else if (IS_PINEVIEW(dev_priv)) - vco_table = pnv_vco; - else if (IS_G33(dev_priv)) - vco_table = blb_vco; - else - return 0; - - tmp = I915_READ(IS_MOBILE(dev_priv) ? HPLLVCO_MOBILE : HPLLVCO); - - vco = vco_table[tmp & 0x7]; - if (vco == 0) - DRM_ERROR("Bad HPLL VCO (HPLLVCO=0x%02x)\n", tmp); - else - DRM_DEBUG_KMS("HPLL VCO %u kHz\n", vco); + const struct drm_i915_private *dev_priv = to_i915(crtc->base.dev); - return vco; + /* GDG double wide on either pipe, otherwise pipe A only */ + return INTEL_INFO(dev_priv)->gen < 4 && + (crtc->pipe == PIPE_A || IS_I915G(dev_priv)); } -static int gm45_get_cdclk(struct drm_i915_private *dev_priv) +static void intel_crtc_compute_pixel_rate(struct intel_crtc_state *crtc_state) { - struct pci_dev *pdev = dev_priv->drm.pdev; - unsigned int cdclk_sel, vco = intel_hpll_vco(dev_priv); - uint16_t tmp = 0; - - pci_read_config_word(pdev, GCFGC, &tmp); - - cdclk_sel = (tmp >> 12) & 0x1; + struct drm_i915_private *dev_priv = to_i915(crtc_state->base.crtc->dev); - switch (vco) { - case 2666667: - case 4000000: - case 5333333: - return cdclk_sel ? 333333 : 222222; - case 3200000: - return cdclk_sel ? 320000 : 228571; - default: - DRM_ERROR("Unable to determine CDCLK. HPLL VCO=%u, CFGC=0x%04x\n", vco, tmp); - return 222222; - } + if (HAS_GMCH_DISPLAY(dev_priv)) + /* FIXME calculate proper pipe pixel rate for GMCH pfit */ + crtc_state->pixel_rate = + crtc_state->base.adjusted_mode.crtc_clock; + else + crtc_state->pixel_rate = + ilk_pipe_pixel_rate(crtc_state); } -static int i965gm_get_cdclk(struct drm_i915_private *dev_priv) +static int intel_crtc_compute_config(struct intel_crtc *crtc, + struct intel_crtc_state *pipe_config) { - struct pci_dev *pdev = dev_priv->drm.pdev; - static const uint8_t div_3200[] = { 16, 10, 8 }; - static const uint8_t div_4000[] = { 20, 12, 10 }; - static const uint8_t div_5333[] = { 24, 16, 14 }; - const uint8_t *div_table; - unsigned int cdclk_sel, vco = intel_hpll_vco(dev_priv); - uint16_t tmp = 0; - - pci_read_config_word(pdev, GCFGC, &tmp); - - cdclk_sel = ((tmp >> 8) & 0x1f) - 1; + struct drm_device *dev = crtc->base.dev; + struct drm_i915_private *dev_priv = to_i915(dev); + const struct drm_display_mode *adjusted_mode = &pipe_config->base.adjusted_mode; + int clock_limit = dev_priv->max_dotclk_freq; - if (cdclk_sel >= ARRAY_SIZE(div_3200)) - goto fail; + if (INTEL_GEN(dev_priv) < 4) { + clock_limit = dev_priv->max_cdclk_freq * 9 / 10; - switch (vco) { - case 3200000: - div_table = div_3200; - break; - case 4000000: - div_table = div_4000; - break; - case 5333333: - div_table = div_5333; - break; - default: - goto fail; + /* + * Enable double wide mode when the dot clock + * is > 90% of the (display) core speed. + */ + if (intel_crtc_supports_double_wide(crtc) && + adjusted_mode->crtc_clock > clock_limit) { + clock_limit = dev_priv->max_dotclk_freq; + pipe_config->double_wide = true; + } } - return DIV_ROUND_CLOSEST(vco, div_table[cdclk_sel]); - -fail: - DRM_ERROR("Unable to determine CDCLK. HPLL VCO=%u kHz, CFGC=0x%04x\n", vco, tmp); - return 200000; -} - -static int g33_get_cdclk(struct drm_i915_private *dev_priv) -{ - struct pci_dev *pdev = dev_priv->drm.pdev; - static const uint8_t div_3200[] = { 12, 10, 8, 7, 5, 16 }; - static const uint8_t div_4000[] = { 14, 12, 10, 8, 6, 20 }; - static const uint8_t div_4800[] = { 20, 14, 12, 10, 8, 24 }; - static const uint8_t div_5333[] = { 20, 16, 12, 12, 8, 28 }; - const uint8_t *div_table; - unsigned int cdclk_sel, vco = intel_hpll_vco(dev_priv); - uint16_t tmp = 0; + if (adjusted_mode->crtc_clock > clock_limit) { + DRM_DEBUG_KMS("requested pixel clock (%d kHz) too high (max: %d kHz, double wide: %s)\n", + adjusted_mode->crtc_clock, clock_limit, + yesno(pipe_config->double_wide)); + return -EINVAL; + } - pci_read_config_word(pdev, GCFGC, &tmp); + /* + * Pipe horizontal size must be even in: + * - DVO ganged mode + * - LVDS dual channel mode + * - Double wide pipe + */ + if ((intel_crtc_has_type(pipe_config, INTEL_OUTPUT_LVDS) && + intel_is_dual_link_lvds(dev)) || pipe_config->double_wide) + pipe_config->pipe_src_w &= ~1; - cdclk_sel = (tmp >> 4) & 0x7; + /* Cantiga+ cannot handle modes with a hsync front porch of 0. + * WaPruneModeWithIncorrectHsyncOffset:ctg,elk,ilk,snb,ivb,vlv,hsw. + */ + if ((INTEL_GEN(dev_priv) > 4 || IS_G4X(dev_priv)) && + adjusted_mode->crtc_hsync_start == adjusted_mode->crtc_hdisplay) + return -EINVAL; - if (cdclk_sel >= ARRAY_SIZE(div_3200)) - goto fail; + intel_crtc_compute_pixel_rate(pipe_config); - switch (vco) { - case 3200000: - div_table = div_3200; - break; - case 4000000: - div_table = div_4000; - break; - case 4800000: - div_table = div_4800; - break; - case 5333333: - div_table = div_5333; - break; - default: - goto fail; - } + if (HAS_IPS(dev_priv)) + hsw_compute_ips_config(crtc, pipe_config); - return DIV_ROUND_CLOSEST(vco, div_table[cdclk_sel]); + if (pipe_config->has_pch_encoder) + return ironlake_fdi_compute_config(crtc, pipe_config); -fail: - DRM_ERROR("Unable to determine CDCLK. HPLL VCO=%u kHz, CFGC=0x%08x\n", vco, tmp); - return 190476; + return 0; } static void @@ -10250,245 +8866,6 @@ void hsw_disable_pc8(struct drm_i915_private *dev_priv) } } -static void bxt_modeset_commit_cdclk(struct drm_atomic_state *old_state) -{ - struct drm_device *dev = old_state->dev; - struct intel_atomic_state *old_intel_state = - to_intel_atomic_state(old_state); - unsigned int req_cdclk = old_intel_state->dev_cdclk; - - bxt_set_cdclk(to_i915(dev), req_cdclk); -} - -static int bdw_adjust_min_pipe_pixel_rate(struct intel_crtc_state *crtc_state, - int pixel_rate) -{ - struct drm_i915_private *dev_priv = to_i915(crtc_state->base.crtc->dev); - - /* pixel rate mustn't exceed 95% of cdclk with IPS on BDW */ - if (IS_BROADWELL(dev_priv) && crtc_state->ips_enabled) - pixel_rate = DIV_ROUND_UP(pixel_rate * 100, 95); - - /* BSpec says "Do not use DisplayPort with CDCLK less than - * 432 MHz, audio enabled, port width x4, and link rate - * HBR2 (5.4 GHz), or else there may be audio corruption or - * screen corruption." - */ - if (intel_crtc_has_dp_encoder(crtc_state) && - crtc_state->has_audio && - crtc_state->port_clock >= 540000 && - crtc_state->lane_count == 4) - pixel_rate = max(432000, pixel_rate); - - return pixel_rate; -} - -/* compute the max rate for new configuration */ -static int intel_max_pixel_rate(struct drm_atomic_state *state) -{ - struct intel_atomic_state *intel_state = to_intel_atomic_state(state); - struct drm_i915_private *dev_priv = to_i915(state->dev); - struct drm_crtc *crtc; - struct drm_crtc_state *cstate; - struct intel_crtc_state *crtc_state; - unsigned max_pixel_rate = 0, i; - enum pipe pipe; - - memcpy(intel_state->min_pixclk, dev_priv->min_pixclk, - sizeof(intel_state->min_pixclk)); - - for_each_crtc_in_state(state, crtc, cstate, i) { - int pixel_rate; - - crtc_state = to_intel_crtc_state(cstate); - if (!crtc_state->base.enable) { - intel_state->min_pixclk[i] = 0; - continue; - } - - pixel_rate = crtc_state->pixel_rate; - - if (IS_BROADWELL(dev_priv) || IS_GEN9(dev_priv)) - pixel_rate = bdw_adjust_min_pipe_pixel_rate(crtc_state, - pixel_rate); - - intel_state->min_pixclk[i] = pixel_rate; - } - - for_each_pipe(dev_priv, pipe) - max_pixel_rate = max(intel_state->min_pixclk[pipe], max_pixel_rate); - - return max_pixel_rate; -} - -static void broadwell_set_cdclk(struct drm_device *dev, int cdclk) -{ - struct drm_i915_private *dev_priv = to_i915(dev); - uint32_t val, data; - int ret; - - if (WARN((I915_READ(LCPLL_CTL) & - (LCPLL_PLL_DISABLE | LCPLL_PLL_LOCK | - LCPLL_CD_CLOCK_DISABLE | LCPLL_ROOT_CD_CLOCK_DISABLE | - LCPLL_CD2X_CLOCK_DISABLE | LCPLL_POWER_DOWN_ALLOW | - LCPLL_CD_SOURCE_FCLK)) != LCPLL_PLL_LOCK, - "trying to change cdclk frequency with cdclk not enabled\n")) - return; - - mutex_lock(&dev_priv->rps.hw_lock); - ret = sandybridge_pcode_write(dev_priv, - BDW_PCODE_DISPLAY_FREQ_CHANGE_REQ, 0x0); - mutex_unlock(&dev_priv->rps.hw_lock); - if (ret) { - DRM_ERROR("failed to inform pcode about cdclk change\n"); - return; - } - - val = I915_READ(LCPLL_CTL); - val |= LCPLL_CD_SOURCE_FCLK; - I915_WRITE(LCPLL_CTL, val); - - if (wait_for_us(I915_READ(LCPLL_CTL) & - LCPLL_CD_SOURCE_FCLK_DONE, 1)) - DRM_ERROR("Switching to FCLK failed\n"); - - val = I915_READ(LCPLL_CTL); - val &= ~LCPLL_CLK_FREQ_MASK; - - switch (cdclk) { - case 450000: - val |= LCPLL_CLK_FREQ_450; - data = 0; - break; - case 540000: - val |= LCPLL_CLK_FREQ_54O_BDW; - data = 1; - break; - case 337500: - val |= LCPLL_CLK_FREQ_337_5_BDW; - data = 2; - break; - case 675000: - val |= LCPLL_CLK_FREQ_675_BDW; - data = 3; - break; - default: - WARN(1, "invalid cdclk frequency\n"); - return; - } - - I915_WRITE(LCPLL_CTL, val); - - val = I915_READ(LCPLL_CTL); - val &= ~LCPLL_CD_SOURCE_FCLK; - I915_WRITE(LCPLL_CTL, val); - - if (wait_for_us((I915_READ(LCPLL_CTL) & - LCPLL_CD_SOURCE_FCLK_DONE) == 0, 1)) - DRM_ERROR("Switching back to LCPLL failed\n"); - - mutex_lock(&dev_priv->rps.hw_lock); - sandybridge_pcode_write(dev_priv, HSW_PCODE_DE_WRITE_FREQ_REQ, data); - mutex_unlock(&dev_priv->rps.hw_lock); - - I915_WRITE(CDCLK_FREQ, DIV_ROUND_CLOSEST(cdclk, 1000) - 1); - - intel_update_cdclk(dev_priv); - - WARN(cdclk != dev_priv->cdclk_freq, - "cdclk requested %d kHz but got %d kHz\n", - cdclk, dev_priv->cdclk_freq); -} - -static int broadwell_calc_cdclk(int max_pixclk) -{ - if (max_pixclk > 540000) - return 675000; - else if (max_pixclk > 450000) - return 540000; - else if (max_pixclk > 337500) - return 450000; - else - return 337500; -} - -static int broadwell_modeset_calc_cdclk(struct drm_atomic_state *state) -{ - struct drm_i915_private *dev_priv = to_i915(state->dev); - struct intel_atomic_state *intel_state = to_intel_atomic_state(state); - int max_pixclk = intel_max_pixel_rate(state); - int cdclk; - - /* - * FIXME should also account for plane ratio - * once 64bpp pixel formats are supported. - */ - cdclk = broadwell_calc_cdclk(max_pixclk); - - if (cdclk > dev_priv->max_cdclk_freq) { - DRM_DEBUG_KMS("requested cdclk (%d kHz) exceeds max (%d kHz)\n", - cdclk, dev_priv->max_cdclk_freq); - return -EINVAL; - } - - intel_state->cdclk = intel_state->dev_cdclk = cdclk; - if (!intel_state->active_crtcs) - intel_state->dev_cdclk = broadwell_calc_cdclk(0); - - return 0; -} - -static void broadwell_modeset_commit_cdclk(struct drm_atomic_state *old_state) -{ - struct drm_device *dev = old_state->dev; - struct intel_atomic_state *old_intel_state = - to_intel_atomic_state(old_state); - unsigned req_cdclk = old_intel_state->dev_cdclk; - - broadwell_set_cdclk(dev, req_cdclk); -} - -static int skl_modeset_calc_cdclk(struct drm_atomic_state *state) -{ - struct intel_atomic_state *intel_state = to_intel_atomic_state(state); - struct drm_i915_private *dev_priv = to_i915(state->dev); - const int max_pixclk = intel_max_pixel_rate(state); - int vco = intel_state->cdclk_pll_vco; - int cdclk; - - /* - * FIXME should also account for plane ratio - * once 64bpp pixel formats are supported. - */ - cdclk = skl_calc_cdclk(max_pixclk, vco); - - /* - * FIXME move the cdclk caclulation to - * compute_config() so we can fail gracegully. - */ - if (cdclk > dev_priv->max_cdclk_freq) { - DRM_ERROR("requested cdclk (%d kHz) exceeds max (%d kHz)\n", - cdclk, dev_priv->max_cdclk_freq); - cdclk = dev_priv->max_cdclk_freq; - } - - intel_state->cdclk = intel_state->dev_cdclk = cdclk; - if (!intel_state->active_crtcs) - intel_state->dev_cdclk = skl_calc_cdclk(0, vco); - - return 0; -} - -static void skl_modeset_commit_cdclk(struct drm_atomic_state *old_state) -{ - struct drm_i915_private *dev_priv = to_i915(old_state->dev); - struct intel_atomic_state *intel_state = to_intel_atomic_state(old_state); - unsigned int req_cdclk = intel_state->dev_cdclk; - unsigned int req_vco = intel_state->cdclk_pll_vco; - - skl_set_cdclk(dev_priv, req_cdclk, req_vco); -} - static int haswell_crtc_compute_clock(struct intel_crtc *crtc, struct intel_crtc_state *crtc_state) { @@ -16170,6 +14547,8 @@ static const struct drm_mode_config_funcs intel_mode_funcs = { */ void intel_init_display_hooks(struct drm_i915_private *dev_priv) { + intel_init_cdclk_hooks(dev_priv); + if (INTEL_INFO(dev_priv)->gen >= 9) { dev_priv->display.get_pipe_config = haswell_get_pipe_config; dev_priv->display.get_initial_plane_config = @@ -16238,52 +14617,6 @@ void intel_init_display_hooks(struct drm_i915_private *dev_priv) dev_priv->display.crtc_disable = i9xx_crtc_disable; } - /* Returns the core display clock speed */ - if (IS_GEN9_BC(dev_priv)) - dev_priv->display.get_cdclk = skylake_get_cdclk; - else if (IS_GEN9_LP(dev_priv)) - dev_priv->display.get_cdclk = broxton_get_cdclk; - else if (IS_BROADWELL(dev_priv)) - dev_priv->display.get_cdclk = broadwell_get_cdclk; - else if (IS_HASWELL(dev_priv)) - dev_priv->display.get_cdclk = haswell_get_cdclk; - else if (IS_VALLEYVIEW(dev_priv) || IS_CHERRYVIEW(dev_priv)) - dev_priv->display.get_cdclk = valleyview_get_cdclk; - else if (IS_GEN6(dev_priv) || IS_IVYBRIDGE(dev_priv)) - dev_priv->display.get_cdclk = fixed_400mhz_get_cdclk; - else if (IS_GEN5(dev_priv)) - dev_priv->display.get_cdclk = fixed_450mhz_get_cdclk; - else if (IS_GM45(dev_priv)) - dev_priv->display.get_cdclk = gm45_get_cdclk; - else if (IS_G4X(dev_priv)) - dev_priv->display.get_cdclk = g33_get_cdclk; - else if (IS_I965GM(dev_priv)) - dev_priv->display.get_cdclk = i965gm_get_cdclk; - else if (IS_I965G(dev_priv)) - dev_priv->display.get_cdclk = fixed_400mhz_get_cdclk; - else if (IS_PINEVIEW(dev_priv)) - dev_priv->display.get_cdclk = pnv_get_cdclk; - else if (IS_G33(dev_priv)) - dev_priv->display.get_cdclk = g33_get_cdclk; - else if (IS_I945GM(dev_priv)) - dev_priv->display.get_cdclk = i945gm_get_cdclk; - else if (IS_I945G(dev_priv)) - dev_priv->display.get_cdclk = fixed_400mhz_get_cdclk; - else if (IS_I915GM(dev_priv)) - dev_priv->display.get_cdclk = i915gm_get_cdclk; - else if (IS_I915G(dev_priv)) - dev_priv->display.get_cdclk = fixed_333mhz_get_cdclk; - else if (IS_I865G(dev_priv)) - dev_priv->display.get_cdclk = fixed_266mhz_get_cdclk; - else if (IS_I85X(dev_priv)) - dev_priv->display.get_cdclk = i85x_get_cdclk; - else if (IS_I845G(dev_priv)) - dev_priv->display.get_cdclk = fixed_200mhz_get_cdclk; - else { /* 830 */ - WARN(!IS_I830(dev_priv), "Unknown platform. Assuming 133 MHz CDCLK\n"); - dev_priv->display.get_cdclk = fixed_133mhz_get_cdclk; - } - if (IS_GEN5(dev_priv)) { dev_priv->display.fdi_link_train = ironlake_fdi_link_train; } else if (IS_GEN6(dev_priv)) { @@ -16295,28 +14628,6 @@ void intel_init_display_hooks(struct drm_i915_private *dev_priv) dev_priv->display.fdi_link_train = hsw_fdi_link_train; } - if (IS_BROADWELL(dev_priv)) { - dev_priv->display.modeset_commit_cdclk = - broadwell_modeset_commit_cdclk; - dev_priv->display.modeset_calc_cdclk = - broadwell_modeset_calc_cdclk; - } else if (IS_VALLEYVIEW(dev_priv) || IS_CHERRYVIEW(dev_priv)) { - dev_priv->display.modeset_commit_cdclk = - valleyview_modeset_commit_cdclk; - dev_priv->display.modeset_calc_cdclk = - valleyview_modeset_calc_cdclk; - } else if (IS_GEN9_LP(dev_priv)) { - dev_priv->display.modeset_commit_cdclk = - bxt_modeset_commit_cdclk; - dev_priv->display.modeset_calc_cdclk = - bxt_modeset_calc_cdclk; - } else if (IS_GEN9_BC(dev_priv)) { - dev_priv->display.modeset_commit_cdclk = - skl_modeset_commit_cdclk; - dev_priv->display.modeset_calc_cdclk = - skl_modeset_calc_cdclk; - } - if (dev_priv->info.gen >= 9) dev_priv->display.update_crtcs = skl_update_crtcs; else diff --git a/drivers/gpu/drm/i915/intel_drv.h b/drivers/gpu/drm/i915/intel_drv.h index e1dbd9aa5701..4bdb1f3cb0b5 100644 --- a/drivers/gpu/drm/i915/intel_drv.h +++ b/drivers/gpu/drm/i915/intel_drv.h @@ -1246,12 +1246,19 @@ void intel_audio_codec_disable(struct intel_encoder *encoder); void i915_audio_component_init(struct drm_i915_private *dev_priv); void i915_audio_component_cleanup(struct drm_i915_private *dev_priv); +/* intel_cdclk.c */ +void intel_init_cdclk_hooks(struct drm_i915_private *dev_priv); +void intel_update_max_cdclk(struct drm_i915_private *dev_priv); +void intel_update_cdclk(struct drm_i915_private *dev_priv); +void intel_update_rawclk(struct drm_i915_private *dev_priv); + /* intel_display.c */ enum transcoder intel_crtc_pch_transcoder(struct intel_crtc *crtc); -void skl_set_preferred_cdclk_vco(struct drm_i915_private *dev_priv, int vco); void intel_update_rawclk(struct drm_i915_private *dev_priv); int vlv_get_cck_clock(struct drm_i915_private *dev_priv, const char *name, u32 reg, int ref_freq); +int vlv_get_cck_clock_hpll(struct drm_i915_private *dev_priv, + const char *name, u32 reg); void lpt_disable_pch_transcoder(struct drm_i915_private *dev_priv); void lpt_disable_iclkip(struct drm_i915_private *dev_priv); extern const struct drm_plane_funcs intel_plane_funcs; -- cgit From 8f0cfa4d2a628a53e469f98aff7988555d654bc6 Mon Sep 17 00:00:00 2001 From: Ville Syrjälä Date: Fri, 20 Jan 2017 20:21:57 +0200 Subject: drm/i915: Pass computed vco to bxt_set_cdclk() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Rather than compute the vco inside bxt_set_cdclk() let's precompute it outside and pass it in. A small step towards a fully precomputed cdclk state. Signed-off-by: Ville Syrjälä Reviewed-by: Ander Conselvan de Oliveira Link: http://patchwork.freedesktop.org/patch/msgid/20170120182205.8141-7-ville.syrjala@linux.intel.com --- drivers/gpu/drm/i915/intel_cdclk.c | 33 +++++++++++++++++++-------------- 1 file changed, 19 insertions(+), 14 deletions(-) diff --git a/drivers/gpu/drm/i915/intel_cdclk.c b/drivers/gpu/drm/i915/intel_cdclk.c index 04a46c14c898..a0736b5f2d47 100644 --- a/drivers/gpu/drm/i915/intel_cdclk.c +++ b/drivers/gpu/drm/i915/intel_cdclk.c @@ -1143,15 +1143,11 @@ static void bxt_de_pll_enable(struct drm_i915_private *dev_priv, int vco) dev_priv->cdclk_pll.vco = vco; } -static void bxt_set_cdclk(struct drm_i915_private *dev_priv, int cdclk) +static void bxt_set_cdclk(struct drm_i915_private *dev_priv, + int cdclk, int vco) { u32 val, divider; - int vco, ret; - - if (IS_GEMINILAKE(dev_priv)) - vco = glk_de_pll_vco(dev_priv, cdclk); - else - vco = bxt_de_pll_vco(dev_priv, cdclk); + int ret; DRM_DEBUG_DRIVER("Changing CDCLK to %d kHz (VCO %d kHz)\n", cdclk, vco); @@ -1284,7 +1280,7 @@ sanitize: */ void bxt_init_cdclk(struct drm_i915_private *dev_priv) { - int cdclk; + int cdclk, vco; bxt_sanitize_cdclk(dev_priv); @@ -1296,12 +1292,15 @@ void bxt_init_cdclk(struct drm_i915_private *dev_priv) * - The initial CDCLK needs to be read from VBT. * Need to make this change after VBT has changes for BXT. */ - if (IS_GEMINILAKE(dev_priv)) + if (IS_GEMINILAKE(dev_priv)) { cdclk = glk_calc_cdclk(0); - else + vco = glk_de_pll_vco(dev_priv, cdclk); + } else { cdclk = bxt_calc_cdclk(0); + vco = bxt_de_pll_vco(dev_priv, cdclk); + } - bxt_set_cdclk(dev_priv, cdclk); + bxt_set_cdclk(dev_priv, cdclk, vco); } /** @@ -1313,7 +1312,7 @@ void bxt_init_cdclk(struct drm_i915_private *dev_priv) */ void bxt_uninit_cdclk(struct drm_i915_private *dev_priv) { - bxt_set_cdclk(dev_priv, dev_priv->cdclk_pll.ref); + bxt_set_cdclk(dev_priv, dev_priv->cdclk_pll.ref, 0); } static int bdw_adjust_min_pipe_pixel_rate(struct intel_crtc_state *crtc_state, @@ -1533,12 +1532,18 @@ static int bxt_modeset_calc_cdclk(struct drm_atomic_state *state) static void bxt_modeset_commit_cdclk(struct drm_atomic_state *old_state) { - struct drm_device *dev = old_state->dev; + struct drm_i915_private *dev_priv = to_i915(old_state->dev); struct intel_atomic_state *old_intel_state = to_intel_atomic_state(old_state); unsigned int req_cdclk = old_intel_state->dev_cdclk; + unsigned int req_vco; + + if (IS_GEMINILAKE(dev_priv)) + req_vco = glk_de_pll_vco(dev_priv, req_cdclk); + else + req_vco = bxt_de_pll_vco(dev_priv, req_cdclk); - bxt_set_cdclk(to_i915(dev), req_cdclk); + bxt_set_cdclk(dev_priv, req_cdclk, req_vco); } static int intel_compute_max_dotclk(struct drm_i915_private *dev_priv) -- cgit From 49cd97a35d9041b53ecf39d447f6a0f8f2de75eb Mon Sep 17 00:00:00 2001 From: Ville Syrjälä Date: Tue, 7 Feb 2017 20:33:45 +0200 Subject: drm/i915: Start moving the cdclk stuff into a distinct state structure MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Introduce intel_cdclk state which for now will track the cdclk frequency, the vco frequency and the reference frequency (not sure we want the last one, but I put it there anyway). We'll also make the .get_cdclk() function fill out this state structure rather than just returning the current cdclk frequency. One immediate benefit is that calling .get_cdclk() will no longer clobber state stored under dev_priv unless ex[plicitly told to do so. Previously it clobbered the vco and reference clocks stored there on some platforms. We'll expand the use of this structure to actually precomputing the state and whatnot later. v2: Constify intel_cdclk_state_compare() v3: Document intel_cdclk_state_compare() v4: Deal with i945gm_get_cdclk() Signed-off-by: Ville Syrjälä Reviewed-by: Ander Conselvan de Oliveira Link: http://patchwork.freedesktop.org/patch/msgid/20170207183345.19763-1-ville.syrjala@linux.intel.com --- drivers/gpu/drm/i915/i915_debugfs.c | 2 +- drivers/gpu/drm/i915/i915_drv.h | 14 +- drivers/gpu/drm/i915/intel_audio.c | 2 +- drivers/gpu/drm/i915/intel_cdclk.c | 382 +++++++++++++++++++------------- drivers/gpu/drm/i915/intel_display.c | 18 +- drivers/gpu/drm/i915/intel_dp.c | 2 +- drivers/gpu/drm/i915/intel_drv.h | 3 + drivers/gpu/drm/i915/intel_fbc.c | 2 +- drivers/gpu/drm/i915/intel_panel.c | 4 +- drivers/gpu/drm/i915/intel_runtime_pm.c | 5 +- 10 files changed, 258 insertions(+), 176 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c index 6400f83d3ad1..1ccc2978a21a 100644 --- a/drivers/gpu/drm/i915/i915_debugfs.c +++ b/drivers/gpu/drm/i915/i915_debugfs.c @@ -1281,7 +1281,7 @@ static int i915_frequency_info(struct seq_file *m, void *unused) seq_puts(m, "no P-state info available\n"); } - seq_printf(m, "Current CD clock frequency: %d kHz\n", dev_priv->cdclk_freq); + seq_printf(m, "Current CD clock frequency: %d kHz\n", dev_priv->cdclk.hw.cdclk); seq_printf(m, "Max CD clock frequency: %d kHz\n", dev_priv->max_cdclk_freq); seq_printf(m, "Max pixel clock frequency: %d kHz\n", dev_priv->max_dotclk_freq); diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index f41496405484..aa59b2153374 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -600,9 +600,11 @@ struct intel_initial_plane_config; struct intel_crtc; struct intel_limit; struct dpll; +struct intel_cdclk_state; struct drm_i915_display_funcs { - int (*get_cdclk)(struct drm_i915_private *dev_priv); + void (*get_cdclk)(struct drm_i915_private *dev_priv, + struct intel_cdclk_state *cdclk_state); int (*get_fifo_size)(struct drm_i915_private *dev_priv, int plane); int (*compute_pipe_wm)(struct intel_crtc_state *cstate); int (*compute_intermediate_wm)(struct drm_device *dev, @@ -2060,6 +2062,10 @@ struct i915_oa_ops { bool (*oa_buffer_is_empty)(struct drm_i915_private *dev_priv); }; +struct intel_cdclk_state { + unsigned int cdclk, vco, ref; +}; + struct drm_i915_private { struct drm_device drm; @@ -2164,7 +2170,7 @@ struct drm_i915_private { unsigned int fsb_freq, mem_freq, is_ddr3; unsigned int skl_preferred_vco_freq; - unsigned int cdclk_freq, max_cdclk_freq; + unsigned int max_cdclk_freq; /* * For reading holding any crtc lock is sufficient, @@ -2178,8 +2184,8 @@ struct drm_i915_private { unsigned int czclk_freq; struct { - unsigned int vco, ref; - } cdclk_pll; + struct intel_cdclk_state hw; + } cdclk; /** * wq - Driver workqueue for GEM. diff --git a/drivers/gpu/drm/i915/intel_audio.c b/drivers/gpu/drm/i915/intel_audio.c index cd9d20763140..1ab401faed34 100644 --- a/drivers/gpu/drm/i915/intel_audio.c +++ b/drivers/gpu/drm/i915/intel_audio.c @@ -734,7 +734,7 @@ static int i915_audio_component_get_cdclk_freq(struct device *kdev) if (WARN_ON_ONCE(!HAS_DDI(dev_priv))) return -ENODEV; - return dev_priv->cdclk_freq; + return dev_priv->cdclk.hw.cdclk; } /* diff --git a/drivers/gpu/drm/i915/intel_cdclk.c b/drivers/gpu/drm/i915/intel_cdclk.c index a0736b5f2d47..4e74e87a8676 100644 --- a/drivers/gpu/drm/i915/intel_cdclk.c +++ b/drivers/gpu/drm/i915/intel_cdclk.c @@ -51,37 +51,44 @@ * dividers can be programmed correctly. */ -static int fixed_133mhz_get_cdclk(struct drm_i915_private *dev_priv) +static void fixed_133mhz_get_cdclk(struct drm_i915_private *dev_priv, + struct intel_cdclk_state *cdclk_state) { - return 133333; + cdclk_state->cdclk = 133333; } -static int fixed_200mhz_get_cdclk(struct drm_i915_private *dev_priv) +static void fixed_200mhz_get_cdclk(struct drm_i915_private *dev_priv, + struct intel_cdclk_state *cdclk_state) { - return 200000; + cdclk_state->cdclk = 200000; } -static int fixed_266mhz_get_cdclk(struct drm_i915_private *dev_priv) +static void fixed_266mhz_get_cdclk(struct drm_i915_private *dev_priv, + struct intel_cdclk_state *cdclk_state) { - return 266667; + cdclk_state->cdclk = 266667; } -static int fixed_333mhz_get_cdclk(struct drm_i915_private *dev_priv) +static void fixed_333mhz_get_cdclk(struct drm_i915_private *dev_priv, + struct intel_cdclk_state *cdclk_state) { - return 333333; + cdclk_state->cdclk = 333333; } -static int fixed_400mhz_get_cdclk(struct drm_i915_private *dev_priv) +static void fixed_400mhz_get_cdclk(struct drm_i915_private *dev_priv, + struct intel_cdclk_state *cdclk_state) { - return 400000; + cdclk_state->cdclk = 400000; } -static int fixed_450mhz_get_cdclk(struct drm_i915_private *dev_priv) +static void fixed_450mhz_get_cdclk(struct drm_i915_private *dev_priv, + struct intel_cdclk_state *cdclk_state) { - return 450000; + cdclk_state->cdclk = 450000; } -static int i85x_get_cdclk(struct drm_i915_private *dev_priv) +static void i85x_get_cdclk(struct drm_i915_private *dev_priv, + struct intel_cdclk_state *cdclk_state) { struct pci_dev *pdev = dev_priv->drm.pdev; u16 hpllcc = 0; @@ -91,8 +98,10 @@ static int i85x_get_cdclk(struct drm_i915_private *dev_priv) * encoding is different :( * FIXME is this the right way to detect 852GM/852GMV? */ - if (pdev->revision == 0x1) - return 133333; + if (pdev->revision == 0x1) { + cdclk_state->cdclk = 133333; + return; + } pci_bus_read_config_word(pdev->bus, PCI_DEVFN(0, 3), HPLLCC, &hpllcc); @@ -104,56 +113,67 @@ static int i85x_get_cdclk(struct drm_i915_private *dev_priv) case GC_CLOCK_133_200: case GC_CLOCK_133_200_2: case GC_CLOCK_100_200: - return 200000; + cdclk_state->cdclk = 200000; + break; case GC_CLOCK_166_250: - return 250000; + cdclk_state->cdclk = 250000; + break; case GC_CLOCK_100_133: - return 133333; + cdclk_state->cdclk = 133333; + break; case GC_CLOCK_133_266: case GC_CLOCK_133_266_2: case GC_CLOCK_166_266: - return 266667; + cdclk_state->cdclk = 266667; + break; } - - /* Shouldn't happen */ - return 0; } -static int i915gm_get_cdclk(struct drm_i915_private *dev_priv) +static void i915gm_get_cdclk(struct drm_i915_private *dev_priv, + struct intel_cdclk_state *cdclk_state) { struct pci_dev *pdev = dev_priv->drm.pdev; u16 gcfgc = 0; pci_read_config_word(pdev, GCFGC, &gcfgc); - if (gcfgc & GC_LOW_FREQUENCY_ENABLE) - return 133333; + if (gcfgc & GC_LOW_FREQUENCY_ENABLE) { + cdclk_state->cdclk = 133333; + return; + } switch (gcfgc & GC_DISPLAY_CLOCK_MASK) { case GC_DISPLAY_CLOCK_333_320_MHZ: - return 333333; + cdclk_state->cdclk = 333333; + break; default: case GC_DISPLAY_CLOCK_190_200_MHZ: - return 190000; + cdclk_state->cdclk = 190000; + break; } } -static int i945gm_get_cdclk(struct drm_i915_private *dev_priv) +static void i945gm_get_cdclk(struct drm_i915_private *dev_priv, + struct intel_cdclk_state *cdclk_state) { struct pci_dev *pdev = dev_priv->drm.pdev; u16 gcfgc = 0; pci_read_config_word(pdev, GCFGC, &gcfgc); - if (gcfgc & GC_LOW_FREQUENCY_ENABLE) - return 133333; + if (gcfgc & GC_LOW_FREQUENCY_ENABLE) { + cdclk_state->cdclk = 133333; + return; + } switch (gcfgc & GC_DISPLAY_CLOCK_MASK) { case GC_DISPLAY_CLOCK_333_320_MHZ: - return 320000; + cdclk_state->cdclk = 320000; + break; default: case GC_DISPLAY_CLOCK_190_200_MHZ: - return 200000; + cdclk_state->cdclk = 200000; + break; } } @@ -225,7 +245,8 @@ static unsigned int intel_hpll_vco(struct drm_i915_private *dev_priv) return vco; } -static int g33_get_cdclk(struct drm_i915_private *dev_priv) +static void g33_get_cdclk(struct drm_i915_private *dev_priv, + struct intel_cdclk_state *cdclk_state) { struct pci_dev *pdev = dev_priv->drm.pdev; static const uint8_t div_3200[] = { 12, 10, 8, 7, 5, 16 }; @@ -233,9 +254,11 @@ static int g33_get_cdclk(struct drm_i915_private *dev_priv) static const uint8_t div_4800[] = { 20, 14, 12, 10, 8, 24 }; static const uint8_t div_5333[] = { 20, 16, 12, 12, 8, 28 }; const uint8_t *div_table; - unsigned int cdclk_sel, vco = intel_hpll_vco(dev_priv); + unsigned int cdclk_sel; uint16_t tmp = 0; + cdclk_state->vco = intel_hpll_vco(dev_priv); + pci_read_config_word(pdev, GCFGC, &tmp); cdclk_sel = (tmp >> 4) & 0x7; @@ -243,7 +266,7 @@ static int g33_get_cdclk(struct drm_i915_private *dev_priv) if (cdclk_sel >= ARRAY_SIZE(div_3200)) goto fail; - switch (vco) { + switch (cdclk_state->vco) { case 3200000: div_table = div_3200; break; @@ -260,15 +283,18 @@ static int g33_get_cdclk(struct drm_i915_private *dev_priv) goto fail; } - return DIV_ROUND_CLOSEST(vco, div_table[cdclk_sel]); + cdclk_state->cdclk = DIV_ROUND_CLOSEST(cdclk_state->vco, + div_table[cdclk_sel]); + return; fail: DRM_ERROR("Unable to determine CDCLK. HPLL VCO=%u kHz, CFGC=0x%08x\n", - vco, tmp); - return 190476; + cdclk_state->vco, tmp); + cdclk_state->cdclk = 190476; } -static int pnv_get_cdclk(struct drm_i915_private *dev_priv) +static void pnv_get_cdclk(struct drm_i915_private *dev_priv, + struct intel_cdclk_state *cdclk_state) { struct pci_dev *pdev = dev_priv->drm.pdev; u16 gcfgc = 0; @@ -277,32 +303,41 @@ static int pnv_get_cdclk(struct drm_i915_private *dev_priv) switch (gcfgc & GC_DISPLAY_CLOCK_MASK) { case GC_DISPLAY_CLOCK_267_MHZ_PNV: - return 266667; + cdclk_state->cdclk = 266667; + break; case GC_DISPLAY_CLOCK_333_MHZ_PNV: - return 333333; + cdclk_state->cdclk = 333333; + break; case GC_DISPLAY_CLOCK_444_MHZ_PNV: - return 444444; + cdclk_state->cdclk = 444444; + break; case GC_DISPLAY_CLOCK_200_MHZ_PNV: - return 200000; + cdclk_state->cdclk = 200000; + break; default: DRM_ERROR("Unknown pnv display core clock 0x%04x\n", gcfgc); case GC_DISPLAY_CLOCK_133_MHZ_PNV: - return 133333; + cdclk_state->cdclk = 133333; + break; case GC_DISPLAY_CLOCK_167_MHZ_PNV: - return 166667; + cdclk_state->cdclk = 166667; + break; } } -static int i965gm_get_cdclk(struct drm_i915_private *dev_priv) +static void i965gm_get_cdclk(struct drm_i915_private *dev_priv, + struct intel_cdclk_state *cdclk_state) { struct pci_dev *pdev = dev_priv->drm.pdev; static const uint8_t div_3200[] = { 16, 10, 8 }; static const uint8_t div_4000[] = { 20, 12, 10 }; static const uint8_t div_5333[] = { 24, 16, 14 }; const uint8_t *div_table; - unsigned int cdclk_sel, vco = intel_hpll_vco(dev_priv); + unsigned int cdclk_sel; uint16_t tmp = 0; + cdclk_state->vco = intel_hpll_vco(dev_priv); + pci_read_config_word(pdev, GCFGC, &tmp); cdclk_sel = ((tmp >> 8) & 0x1f) - 1; @@ -310,7 +345,7 @@ static int i965gm_get_cdclk(struct drm_i915_private *dev_priv) if (cdclk_sel >= ARRAY_SIZE(div_3200)) goto fail; - switch (vco) { + switch (cdclk_state->vco) { case 3200000: div_table = div_3200; break; @@ -324,53 +359,62 @@ static int i965gm_get_cdclk(struct drm_i915_private *dev_priv) goto fail; } - return DIV_ROUND_CLOSEST(vco, div_table[cdclk_sel]); + cdclk_state->cdclk = DIV_ROUND_CLOSEST(cdclk_state->vco, + div_table[cdclk_sel]); + return; fail: DRM_ERROR("Unable to determine CDCLK. HPLL VCO=%u kHz, CFGC=0x%04x\n", - vco, tmp); - return 200000; + cdclk_state->vco, tmp); + cdclk_state->cdclk = 200000; } -static int gm45_get_cdclk(struct drm_i915_private *dev_priv) +static void gm45_get_cdclk(struct drm_i915_private *dev_priv, + struct intel_cdclk_state *cdclk_state) { struct pci_dev *pdev = dev_priv->drm.pdev; - unsigned int cdclk_sel, vco = intel_hpll_vco(dev_priv); + unsigned int cdclk_sel; uint16_t tmp = 0; + cdclk_state->vco = intel_hpll_vco(dev_priv); + pci_read_config_word(pdev, GCFGC, &tmp); cdclk_sel = (tmp >> 12) & 0x1; - switch (vco) { + switch (cdclk_state->vco) { case 2666667: case 4000000: case 5333333: - return cdclk_sel ? 333333 : 222222; + cdclk_state->cdclk = cdclk_sel ? 333333 : 222222; + break; case 3200000: - return cdclk_sel ? 320000 : 228571; + cdclk_state->cdclk = cdclk_sel ? 320000 : 228571; + break; default: DRM_ERROR("Unable to determine CDCLK. HPLL VCO=%u, CFGC=0x%04x\n", - vco, tmp); - return 222222; + cdclk_state->vco, tmp); + cdclk_state->cdclk = 222222; + break; } } -static int hsw_get_cdclk(struct drm_i915_private *dev_priv) +static void hsw_get_cdclk(struct drm_i915_private *dev_priv, + struct intel_cdclk_state *cdclk_state) { uint32_t lcpll = I915_READ(LCPLL_CTL); uint32_t freq = lcpll & LCPLL_CLK_FREQ_MASK; if (lcpll & LCPLL_CD_SOURCE_FCLK) - return 800000; + cdclk_state->cdclk = 800000; else if (I915_READ(FUSE_STRAP) & HSW_CDCLK_LIMIT) - return 450000; + cdclk_state->cdclk = 450000; else if (freq == LCPLL_CLK_FREQ_450) - return 450000; + cdclk_state->cdclk = 450000; else if (IS_HSW_ULT(dev_priv)) - return 337500; + cdclk_state->cdclk = 337500; else - return 540000; + cdclk_state->cdclk = 540000; } static int vlv_calc_cdclk(struct drm_i915_private *dev_priv, @@ -396,10 +440,13 @@ static int vlv_calc_cdclk(struct drm_i915_private *dev_priv, return 200000; } -static int vlv_get_cdclk(struct drm_i915_private *dev_priv) +static void vlv_get_cdclk(struct drm_i915_private *dev_priv, + struct intel_cdclk_state *cdclk_state) { - return vlv_get_cck_clock_hpll(dev_priv, "cdclk", - CCK_DISPLAY_CLOCK_CONTROL); + cdclk_state->vco = vlv_get_hpll_vco(dev_priv); + cdclk_state->cdclk = vlv_get_cck_clock(dev_priv, "cdclk", + CCK_DISPLAY_CLOCK_CONTROL, + cdclk_state->vco); } static void vlv_program_pfi_credits(struct drm_i915_private *dev_priv) @@ -411,7 +458,7 @@ static void vlv_program_pfi_credits(struct drm_i915_private *dev_priv) else default_credits = PFI_CREDIT(8); - if (dev_priv->cdclk_freq >= dev_priv->czclk_freq) { + if (dev_priv->cdclk.hw.cdclk >= dev_priv->czclk_freq) { /* CHV suggested value is 31 or 63 */ if (IS_CHERRYVIEW(dev_priv)) credits = PFI_CREDIT_63; @@ -443,8 +490,6 @@ static void vlv_set_cdclk(struct drm_device *dev, int cdclk) struct drm_i915_private *dev_priv = to_i915(dev); u32 val, cmd; - WARN_ON(dev_priv->display.get_cdclk(dev_priv) != dev_priv->cdclk_freq); - if (cdclk >= 320000) /* jump to highest voltage for 400MHz too */ cmd = 2; else if (cdclk == 266667) @@ -508,8 +553,6 @@ static void chv_set_cdclk(struct drm_device *dev, int cdclk) struct drm_i915_private *dev_priv = to_i915(dev); u32 val, cmd; - WARN_ON(dev_priv->display.get_cdclk(dev_priv) != dev_priv->cdclk_freq); - switch (cdclk) { case 333333: case 320000: @@ -555,23 +598,24 @@ static int bdw_calc_cdclk(int max_pixclk) return 337500; } -static int bdw_get_cdclk(struct drm_i915_private *dev_priv) +static void bdw_get_cdclk(struct drm_i915_private *dev_priv, + struct intel_cdclk_state *cdclk_state) { uint32_t lcpll = I915_READ(LCPLL_CTL); uint32_t freq = lcpll & LCPLL_CLK_FREQ_MASK; if (lcpll & LCPLL_CD_SOURCE_FCLK) - return 800000; + cdclk_state->cdclk = 800000; else if (I915_READ(FUSE_STRAP) & HSW_CDCLK_LIMIT) - return 450000; + cdclk_state->cdclk = 450000; else if (freq == LCPLL_CLK_FREQ_450) - return 450000; + cdclk_state->cdclk = 450000; else if (freq == LCPLL_CLK_FREQ_54O_BDW) - return 540000; + cdclk_state->cdclk = 540000; else if (freq == LCPLL_CLK_FREQ_337_5_BDW) - return 337500; + cdclk_state->cdclk = 337500; else - return 675000; + cdclk_state->cdclk = 675000; } static void bdw_set_cdclk(struct drm_device *dev, int cdclk) @@ -648,9 +692,9 @@ static void bdw_set_cdclk(struct drm_device *dev, int cdclk) intel_update_cdclk(dev_priv); - WARN(cdclk != dev_priv->cdclk_freq, + WARN(cdclk != dev_priv->cdclk.hw.cdclk, "cdclk requested %d kHz but got %d kHz\n", - cdclk, dev_priv->cdclk_freq); + cdclk, dev_priv->cdclk.hw.cdclk); } static int skl_calc_cdclk(int max_pixclk, int vco) @@ -676,12 +720,13 @@ static int skl_calc_cdclk(int max_pixclk, int vco) } } -static void skl_dpll0_update(struct drm_i915_private *dev_priv) +static void skl_dpll0_update(struct drm_i915_private *dev_priv, + struct intel_cdclk_state *cdclk_state) { u32 val; - dev_priv->cdclk_pll.ref = 24000; - dev_priv->cdclk_pll.vco = 0; + cdclk_state->ref = 24000; + cdclk_state->vco = 0; val = I915_READ(LCPLL1_CTL); if ((val & LCPLL_PLL_ENABLE) == 0) @@ -703,11 +748,11 @@ static void skl_dpll0_update(struct drm_i915_private *dev_priv) case DPLL_CTRL1_LINK_RATE(DPLL_CTRL1_LINK_RATE_1350, SKL_DPLL0): case DPLL_CTRL1_LINK_RATE(DPLL_CTRL1_LINK_RATE_1620, SKL_DPLL0): case DPLL_CTRL1_LINK_RATE(DPLL_CTRL1_LINK_RATE_2700, SKL_DPLL0): - dev_priv->cdclk_pll.vco = 8100000; + cdclk_state->vco = 8100000; break; case DPLL_CTRL1_LINK_RATE(DPLL_CTRL1_LINK_RATE_1080, SKL_DPLL0): case DPLL_CTRL1_LINK_RATE(DPLL_CTRL1_LINK_RATE_2160, SKL_DPLL0): - dev_priv->cdclk_pll.vco = 8640000; + cdclk_state->vco = 8640000; break; default: MISSING_CASE(val & DPLL_CTRL1_LINK_RATE_MASK(SKL_DPLL0)); @@ -715,46 +760,57 @@ static void skl_dpll0_update(struct drm_i915_private *dev_priv) } } -static int skl_get_cdclk(struct drm_i915_private *dev_priv) +static void skl_get_cdclk(struct drm_i915_private *dev_priv, + struct intel_cdclk_state *cdclk_state) { u32 cdctl; - skl_dpll0_update(dev_priv); + skl_dpll0_update(dev_priv, cdclk_state); - if (dev_priv->cdclk_pll.vco == 0) - return dev_priv->cdclk_pll.ref; + cdclk_state->cdclk = cdclk_state->ref; + + if (cdclk_state->vco == 0) + return; cdctl = I915_READ(CDCLK_CTL); - if (dev_priv->cdclk_pll.vco == 8640000) { + if (cdclk_state->vco == 8640000) { switch (cdctl & CDCLK_FREQ_SEL_MASK) { case CDCLK_FREQ_450_432: - return 432000; + cdclk_state->cdclk = 432000; + break; case CDCLK_FREQ_337_308: - return 308571; + cdclk_state->cdclk = 308571; + break; case CDCLK_FREQ_540: - return 540000; + cdclk_state->cdclk = 540000; + break; case CDCLK_FREQ_675_617: - return 617143; + cdclk_state->cdclk = 617143; + break; default: MISSING_CASE(cdctl & CDCLK_FREQ_SEL_MASK); + break; } } else { switch (cdctl & CDCLK_FREQ_SEL_MASK) { case CDCLK_FREQ_450_432: - return 450000; + cdclk_state->cdclk = 450000; + break; case CDCLK_FREQ_337_308: - return 337500; + cdclk_state->cdclk = 337500; + break; case CDCLK_FREQ_540: - return 540000; + cdclk_state->cdclk = 540000; + break; case CDCLK_FREQ_675_617: - return 675000; + cdclk_state->cdclk = 675000; + break; default: MISSING_CASE(cdctl & CDCLK_FREQ_SEL_MASK); + break; } } - - return dev_priv->cdclk_pll.ref; } /* convert from kHz to .1 fixpoint MHz with -1MHz offset */ @@ -817,7 +873,7 @@ static void skl_dpll0_enable(struct drm_i915_private *dev_priv, int vco) 5)) DRM_ERROR("DPLL0 not locked\n"); - dev_priv->cdclk_pll.vco = vco; + dev_priv->cdclk.hw.vco = vco; /* We'll want to keep using the current vco from now on. */ skl_set_preferred_cdclk_vco(dev_priv, vco); @@ -831,7 +887,7 @@ static void skl_dpll0_disable(struct drm_i915_private *dev_priv) 1)) DRM_ERROR("Couldn't disable DPLL0\n"); - dev_priv->cdclk_pll.vco = 0; + dev_priv->cdclk.hw.vco = 0; } static void skl_set_cdclk(struct drm_i915_private *dev_priv, @@ -881,11 +937,11 @@ static void skl_set_cdclk(struct drm_i915_private *dev_priv, break; } - if (dev_priv->cdclk_pll.vco != 0 && - dev_priv->cdclk_pll.vco != vco) + if (dev_priv->cdclk.hw.vco != 0 && + dev_priv->cdclk.hw.vco != vco) skl_dpll0_disable(dev_priv); - if (dev_priv->cdclk_pll.vco != vco) + if (dev_priv->cdclk.hw.vco != vco) skl_dpll0_enable(dev_priv, vco); I915_WRITE(CDCLK_CTL, freq_select | skl_cdclk_decimal(cdclk)); @@ -913,8 +969,8 @@ static void skl_sanitize_cdclk(struct drm_i915_private *dev_priv) intel_update_cdclk(dev_priv); /* Is PLL enabled and locked ? */ - if (dev_priv->cdclk_pll.vco == 0 || - dev_priv->cdclk_freq == dev_priv->cdclk_pll.ref) + if (dev_priv->cdclk.hw.vco == 0 || + dev_priv->cdclk.hw.cdclk == dev_priv->cdclk.hw.ref) goto sanitize; /* DPLL okay; verify the cdclock @@ -925,7 +981,7 @@ static void skl_sanitize_cdclk(struct drm_i915_private *dev_priv) */ cdctl = I915_READ(CDCLK_CTL); expected = (cdctl & CDCLK_FREQ_SEL_MASK) | - skl_cdclk_decimal(dev_priv->cdclk_freq); + skl_cdclk_decimal(dev_priv->cdclk.hw.cdclk); if (cdctl == expected) /* All well; nothing to sanitize */ return; @@ -934,9 +990,9 @@ sanitize: DRM_DEBUG_KMS("Sanitizing cdclk programmed by pre-os\n"); /* force cdclk programming */ - dev_priv->cdclk_freq = 0; + dev_priv->cdclk.hw.cdclk = 0; /* force full PLL disable + enable */ - dev_priv->cdclk_pll.vco = -1; + dev_priv->cdclk.hw.vco = -1; } /** @@ -954,14 +1010,15 @@ void skl_init_cdclk(struct drm_i915_private *dev_priv) skl_sanitize_cdclk(dev_priv); - if (dev_priv->cdclk_freq != 0 && dev_priv->cdclk_pll.vco != 0) { + if (dev_priv->cdclk.hw.cdclk != 0 && + dev_priv->cdclk.hw.vco != 0) { /* * Use the current vco as our initial * guess as to what the preferred vco is. */ if (dev_priv->skl_preferred_vco_freq == 0) skl_set_preferred_cdclk_vco(dev_priv, - dev_priv->cdclk_pll.vco); + dev_priv->cdclk.hw.vco); return; } @@ -982,7 +1039,7 @@ void skl_init_cdclk(struct drm_i915_private *dev_priv) */ void skl_uninit_cdclk(struct drm_i915_private *dev_priv) { - skl_set_cdclk(dev_priv, dev_priv->cdclk_pll.ref, 0); + skl_set_cdclk(dev_priv, dev_priv->cdclk.hw.ref, 0); } static int bxt_calc_cdclk(int max_pixclk) @@ -1013,7 +1070,7 @@ static int bxt_de_pll_vco(struct drm_i915_private *dev_priv, int cdclk) { int ratio; - if (cdclk == dev_priv->cdclk_pll.ref) + if (cdclk == dev_priv->cdclk.hw.ref) return 0; switch (cdclk) { @@ -1030,14 +1087,14 @@ static int bxt_de_pll_vco(struct drm_i915_private *dev_priv, int cdclk) break; } - return dev_priv->cdclk_pll.ref * ratio; + return dev_priv->cdclk.hw.ref * ratio; } static int glk_de_pll_vco(struct drm_i915_private *dev_priv, int cdclk) { int ratio; - if (cdclk == dev_priv->cdclk_pll.ref) + if (cdclk == dev_priv->cdclk.hw.ref) return 0; switch (cdclk) { @@ -1050,15 +1107,16 @@ static int glk_de_pll_vco(struct drm_i915_private *dev_priv, int cdclk) break; } - return dev_priv->cdclk_pll.ref * ratio; + return dev_priv->cdclk.hw.ref * ratio; } -static void bxt_de_pll_update(struct drm_i915_private *dev_priv) +static void bxt_de_pll_update(struct drm_i915_private *dev_priv, + struct intel_cdclk_state *cdclk_state) { u32 val; - dev_priv->cdclk_pll.ref = 19200; - dev_priv->cdclk_pll.vco = 0; + cdclk_state->ref = 19200; + cdclk_state->vco = 0; val = I915_READ(BXT_DE_PLL_ENABLE); if ((val & BXT_DE_PLL_PLL_ENABLE) == 0) @@ -1068,20 +1126,21 @@ static void bxt_de_pll_update(struct drm_i915_private *dev_priv) return; val = I915_READ(BXT_DE_PLL_CTL); - dev_priv->cdclk_pll.vco = (val & BXT_DE_PLL_RATIO_MASK) * - dev_priv->cdclk_pll.ref; + cdclk_state->vco = (val & BXT_DE_PLL_RATIO_MASK) * cdclk_state->ref; } -static int bxt_get_cdclk(struct drm_i915_private *dev_priv) +static void bxt_get_cdclk(struct drm_i915_private *dev_priv, + struct intel_cdclk_state *cdclk_state) { u32 divider; - int div, vco; + int div; - bxt_de_pll_update(dev_priv); + bxt_de_pll_update(dev_priv, cdclk_state); - vco = dev_priv->cdclk_pll.vco; - if (vco == 0) - return dev_priv->cdclk_pll.ref; + cdclk_state->cdclk = cdclk_state->ref; + + if (cdclk_state->vco == 0) + return; divider = I915_READ(CDCLK_CTL) & BXT_CDCLK_CD2X_DIV_SEL_MASK; @@ -1101,10 +1160,10 @@ static int bxt_get_cdclk(struct drm_i915_private *dev_priv) break; default: MISSING_CASE(divider); - return dev_priv->cdclk_pll.ref; + return; } - return DIV_ROUND_CLOSEST(vco, div); + cdclk_state->cdclk = DIV_ROUND_CLOSEST(cdclk_state->vco, div); } static void bxt_de_pll_disable(struct drm_i915_private *dev_priv) @@ -1117,12 +1176,12 @@ static void bxt_de_pll_disable(struct drm_i915_private *dev_priv) 1)) DRM_ERROR("timeout waiting for DE PLL unlock\n"); - dev_priv->cdclk_pll.vco = 0; + dev_priv->cdclk.hw.vco = 0; } static void bxt_de_pll_enable(struct drm_i915_private *dev_priv, int vco) { - int ratio = DIV_ROUND_CLOSEST(vco, dev_priv->cdclk_pll.ref); + int ratio = DIV_ROUND_CLOSEST(vco, dev_priv->cdclk.hw.ref); u32 val; val = I915_READ(BXT_DE_PLL_CTL); @@ -1140,7 +1199,7 @@ static void bxt_de_pll_enable(struct drm_i915_private *dev_priv, int vco) 1)) DRM_ERROR("timeout waiting for DE PLL lock\n"); - dev_priv->cdclk_pll.vco = vco; + dev_priv->cdclk.hw.vco = vco; } static void bxt_set_cdclk(struct drm_i915_private *dev_priv, @@ -1168,7 +1227,7 @@ static void bxt_set_cdclk(struct drm_i915_private *dev_priv, divider = BXT_CDCLK_CD2X_DIV_SEL_1; break; default: - WARN_ON(cdclk != dev_priv->cdclk_pll.ref); + WARN_ON(cdclk != dev_priv->cdclk.hw.ref); WARN_ON(vco != 0); divider = BXT_CDCLK_CD2X_DIV_SEL_1; @@ -1187,11 +1246,11 @@ static void bxt_set_cdclk(struct drm_i915_private *dev_priv, return; } - if (dev_priv->cdclk_pll.vco != 0 && - dev_priv->cdclk_pll.vco != vco) + if (dev_priv->cdclk.hw.vco != 0 && + dev_priv->cdclk.hw.vco != vco) bxt_de_pll_disable(dev_priv); - if (dev_priv->cdclk_pll.vco != vco) + if (dev_priv->cdclk.hw.vco != vco) bxt_de_pll_enable(dev_priv, vco); val = divider | skl_cdclk_decimal(cdclk); @@ -1228,8 +1287,8 @@ static void bxt_sanitize_cdclk(struct drm_i915_private *dev_priv) intel_update_cdclk(dev_priv); - if (dev_priv->cdclk_pll.vco == 0 || - dev_priv->cdclk_freq == dev_priv->cdclk_pll.ref) + if (dev_priv->cdclk.hw.vco == 0 || + dev_priv->cdclk.hw.cdclk == dev_priv->cdclk.hw.ref) goto sanitize; /* DPLL okay; verify the cdclock @@ -1247,12 +1306,12 @@ static void bxt_sanitize_cdclk(struct drm_i915_private *dev_priv) cdctl &= ~BXT_CDCLK_CD2X_PIPE_NONE; expected = (cdctl & BXT_CDCLK_CD2X_DIV_SEL_MASK) | - skl_cdclk_decimal(dev_priv->cdclk_freq); + skl_cdclk_decimal(dev_priv->cdclk.hw.cdclk); /* * Disable SSA Precharge when CD clock frequency < 500 MHz, * enable otherwise. */ - if (dev_priv->cdclk_freq >= 500000) + if (dev_priv->cdclk.hw.cdclk >= 500000) expected |= BXT_CDCLK_SSA_PRECHARGE_ENABLE; if (cdctl == expected) @@ -1263,10 +1322,10 @@ sanitize: DRM_DEBUG_KMS("Sanitizing cdclk programmed by pre-os\n"); /* force cdclk programming */ - dev_priv->cdclk_freq = 0; + dev_priv->cdclk.hw.cdclk = 0; /* force full PLL disable + enable */ - dev_priv->cdclk_pll.vco = -1; + dev_priv->cdclk.hw.vco = -1; } /** @@ -1284,7 +1343,8 @@ void bxt_init_cdclk(struct drm_i915_private *dev_priv) bxt_sanitize_cdclk(dev_priv); - if (dev_priv->cdclk_freq != 0 && dev_priv->cdclk_pll.vco != 0) + if (dev_priv->cdclk.hw.cdclk != 0 && + dev_priv->cdclk.hw.vco != 0) return; /* @@ -1312,7 +1372,21 @@ void bxt_init_cdclk(struct drm_i915_private *dev_priv) */ void bxt_uninit_cdclk(struct drm_i915_private *dev_priv) { - bxt_set_cdclk(dev_priv, dev_priv->cdclk_pll.ref, 0); + bxt_set_cdclk(dev_priv, dev_priv->cdclk.hw.ref, 0); +} + +/** + * intel_cdclk_state_compare - Determine if two CDCLK states differ + * @a: first CDCLK state + * @b: second CDCLK state + * + * Returns: + * True if the CDCLK states are identical, false if they differ. + */ +bool intel_cdclk_state_compare(const struct intel_cdclk_state *a, + const struct intel_cdclk_state *b) +{ + return memcmp(a, b, sizeof(*a)) == 0; } static int bdw_adjust_min_pipe_pixel_rate(struct intel_crtc_state *crtc_state, @@ -1620,7 +1694,7 @@ void intel_update_max_cdclk(struct drm_i915_private *dev_priv) dev_priv->max_cdclk_freq = 400000; } else { /* otherwise assume cdclk is fixed */ - dev_priv->max_cdclk_freq = dev_priv->cdclk_freq; + dev_priv->max_cdclk_freq = dev_priv->cdclk.hw.cdclk; } dev_priv->max_dotclk_freq = intel_compute_max_dotclk(dev_priv); @@ -1640,15 +1714,11 @@ void intel_update_max_cdclk(struct drm_i915_private *dev_priv) */ void intel_update_cdclk(struct drm_i915_private *dev_priv) { - dev_priv->cdclk_freq = dev_priv->display.get_cdclk(dev_priv); + dev_priv->display.get_cdclk(dev_priv, &dev_priv->cdclk.hw); - if (INTEL_GEN(dev_priv) >= 9) - DRM_DEBUG_DRIVER("Current CD clock rate: %d kHz, VCO: %d kHz, ref: %d kHz\n", - dev_priv->cdclk_freq, dev_priv->cdclk_pll.vco, - dev_priv->cdclk_pll.ref); - else - DRM_DEBUG_DRIVER("Current CD clock rate: %d kHz\n", - dev_priv->cdclk_freq); + DRM_DEBUG_DRIVER("Current CD clock rate: %d kHz, VCO: %d kHz, ref: %d kHz\n", + dev_priv->cdclk.hw.cdclk, dev_priv->cdclk.hw.vco, + dev_priv->cdclk.hw.ref); /* * 9:0 CMBUS [sic] CDCLK frequency (cdfreq): @@ -1658,7 +1728,7 @@ void intel_update_cdclk(struct drm_i915_private *dev_priv) */ if (IS_VALLEYVIEW(dev_priv) || IS_CHERRYVIEW(dev_priv)) I915_WRITE(GMBUSFREQ_VLV, - DIV_ROUND_UP(dev_priv->cdclk_freq, 1000)); + DIV_ROUND_UP(dev_priv->cdclk.hw.cdclk, 1000)); } static int pch_rawclk(struct drm_i915_private *dev_priv) diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c index e996f8ec8f08..691db36d8388 100644 --- a/drivers/gpu/drm/i915/intel_display.c +++ b/drivers/gpu/drm/i915/intel_display.c @@ -135,7 +135,7 @@ struct intel_limit { }; /* returns HPLL frequency in kHz */ -static int valleyview_get_vco(struct drm_i915_private *dev_priv) +int vlv_get_hpll_vco(struct drm_i915_private *dev_priv) { int hpll_freq, vco_freq[] = { 800, 1600, 2000, 2400 }; @@ -171,7 +171,7 @@ int vlv_get_cck_clock_hpll(struct drm_i915_private *dev_priv, const char *name, u32 reg) { if (dev_priv->hpll_freq == 0) - dev_priv->hpll_freq = valleyview_get_vco(dev_priv); + dev_priv->hpll_freq = vlv_get_hpll_vco(dev_priv); return vlv_get_cck_clock(dev_priv, name, reg, dev_priv->hpll_freq); @@ -12413,7 +12413,7 @@ static int intel_modeset_checks(struct drm_atomic_state *state) */ if (dev_priv->display.modeset_calc_cdclk) { if (!intel_state->cdclk_pll_vco) - intel_state->cdclk_pll_vco = dev_priv->cdclk_pll.vco; + intel_state->cdclk_pll_vco = dev_priv->cdclk.hw.vco; if (!intel_state->cdclk_pll_vco) intel_state->cdclk_pll_vco = dev_priv->skl_preferred_vco_freq; @@ -12433,8 +12433,8 @@ static int intel_modeset_checks(struct drm_atomic_state *state) } /* All pipes must be switched off while we change the cdclk. */ - if (intel_state->dev_cdclk != dev_priv->cdclk_freq || - intel_state->cdclk_pll_vco != dev_priv->cdclk_pll.vco) { + if (intel_state->dev_cdclk != dev_priv->cdclk.hw.cdclk || + intel_state->cdclk_pll_vco != dev_priv->cdclk.hw.vco) { ret = intel_modeset_all_pipes(state); if (ret < 0) return ret; @@ -12869,8 +12869,8 @@ static void intel_atomic_commit_tail(struct drm_atomic_state *state) drm_atomic_helper_update_legacy_modeset_state(state->dev, state); if (dev_priv->display.modeset_commit_cdclk && - (intel_state->dev_cdclk != dev_priv->cdclk_freq || - intel_state->cdclk_pll_vco != dev_priv->cdclk_pll.vco)) + (intel_state->dev_cdclk != dev_priv->cdclk.hw.cdclk || + intel_state->cdclk_pll_vco != dev_priv->cdclk.hw.vco)) dev_priv->display.modeset_commit_cdclk(state); /* @@ -14855,7 +14855,7 @@ void intel_modeset_init_hw(struct drm_device *dev) intel_update_cdclk(dev_priv); - dev_priv->atomic_cdclk_freq = dev_priv->cdclk_freq; + dev_priv->atomic_cdclk_freq = dev_priv->cdclk.hw.cdclk; intel_init_clock_gating(dev_priv); } @@ -15031,7 +15031,7 @@ int intel_modeset_init(struct drm_device *dev) intel_update_czclk(dev_priv); intel_update_cdclk(dev_priv); - dev_priv->atomic_cdclk_freq = dev_priv->cdclk_freq; + dev_priv->atomic_cdclk_freq = dev_priv->cdclk.hw.cdclk; intel_shared_dpll_init(dev); diff --git a/drivers/gpu/drm/i915/intel_dp.c b/drivers/gpu/drm/i915/intel_dp.c index 21924cf4c8ad..f8c23fed4a2c 100644 --- a/drivers/gpu/drm/i915/intel_dp.c +++ b/drivers/gpu/drm/i915/intel_dp.c @@ -918,7 +918,7 @@ static uint32_t ilk_get_aux_clock_divider(struct intel_dp *intel_dp, int index) * divide by 2000 and use that */ if (intel_dig_port->port == PORT_A) - return DIV_ROUND_CLOSEST(dev_priv->cdclk_freq, 2000); + return DIV_ROUND_CLOSEST(dev_priv->cdclk.hw.cdclk, 2000); else return DIV_ROUND_CLOSEST(dev_priv->rawclk_freq, 2000); } diff --git a/drivers/gpu/drm/i915/intel_drv.h b/drivers/gpu/drm/i915/intel_drv.h index 4bdb1f3cb0b5..ba0728ccff75 100644 --- a/drivers/gpu/drm/i915/intel_drv.h +++ b/drivers/gpu/drm/i915/intel_drv.h @@ -1251,10 +1251,13 @@ void intel_init_cdclk_hooks(struct drm_i915_private *dev_priv); void intel_update_max_cdclk(struct drm_i915_private *dev_priv); void intel_update_cdclk(struct drm_i915_private *dev_priv); void intel_update_rawclk(struct drm_i915_private *dev_priv); +bool intel_cdclk_state_compare(const struct intel_cdclk_state *a, + const struct intel_cdclk_state *b); /* intel_display.c */ enum transcoder intel_crtc_pch_transcoder(struct intel_crtc *crtc); void intel_update_rawclk(struct drm_i915_private *dev_priv); +int vlv_get_hpll_vco(struct drm_i915_private *dev_priv); int vlv_get_cck_clock(struct drm_i915_private *dev_priv, const char *name, u32 reg, int ref_freq); int vlv_get_cck_clock_hpll(struct drm_i915_private *dev_priv, diff --git a/drivers/gpu/drm/i915/intel_fbc.c b/drivers/gpu/drm/i915/intel_fbc.c index c66ba7e36289..22c56ae086f2 100644 --- a/drivers/gpu/drm/i915/intel_fbc.c +++ b/drivers/gpu/drm/i915/intel_fbc.c @@ -817,7 +817,7 @@ static bool intel_fbc_can_activate(struct intel_crtc *crtc) /* WaFbcExceedCdClockThreshold:hsw,bdw */ if ((IS_HASWELL(dev_priv) || IS_BROADWELL(dev_priv)) && - cache->crtc.hsw_bdw_pixel_rate >= dev_priv->cdclk_freq * 95 / 100) { + cache->crtc.hsw_bdw_pixel_rate >= dev_priv->cdclk.hw.cdclk * 95 / 100) { fbc->no_fbc_reason = "pixel rate is too big"; return false; } diff --git a/drivers/gpu/drm/i915/intel_panel.c b/drivers/gpu/drm/i915/intel_panel.c index 1a6ff26dea20..cb50c527401f 100644 --- a/drivers/gpu/drm/i915/intel_panel.c +++ b/drivers/gpu/drm/i915/intel_panel.c @@ -1315,7 +1315,7 @@ static u32 i9xx_hz_to_pwm(struct intel_connector *connector, u32 pwm_freq_hz) if (IS_PINEVIEW(dev_priv)) clock = KHz(dev_priv->rawclk_freq); else - clock = KHz(dev_priv->cdclk_freq); + clock = KHz(dev_priv->cdclk.hw.cdclk); return DIV_ROUND_CLOSEST(clock, pwm_freq_hz * 32); } @@ -1333,7 +1333,7 @@ static u32 i965_hz_to_pwm(struct intel_connector *connector, u32 pwm_freq_hz) if (IS_G4X(dev_priv)) clock = KHz(dev_priv->rawclk_freq); else - clock = KHz(dev_priv->cdclk_freq); + clock = KHz(dev_priv->cdclk.hw.cdclk); return DIV_ROUND_CLOSEST(clock, pwm_freq_hz * 128); } diff --git a/drivers/gpu/drm/i915/intel_runtime_pm.c b/drivers/gpu/drm/i915/intel_runtime_pm.c index 915914f9444c..0f00a5aab69c 100644 --- a/drivers/gpu/drm/i915/intel_runtime_pm.c +++ b/drivers/gpu/drm/i915/intel_runtime_pm.c @@ -964,9 +964,12 @@ static void gen9_assert_dbuf_enabled(struct drm_i915_private *dev_priv) static void gen9_dc_off_power_well_enable(struct drm_i915_private *dev_priv, struct i915_power_well *power_well) { + struct intel_cdclk_state cdclk_state = {}; + gen9_set_dc_state(dev_priv, DC_STATE_DISABLE); - WARN_ON(dev_priv->cdclk_freq != dev_priv->display.get_cdclk(dev_priv)); + dev_priv->display.get_cdclk(dev_priv, &cdclk_state); + WARN_ON(!intel_cdclk_state_compare(&dev_priv->cdclk.hw, &cdclk_state)); gen9_assert_dbuf_enabled(dev_priv); -- cgit From bb0f4aab0e7677e91cde443fecc18e71fbb85038 Mon Sep 17 00:00:00 2001 From: Ville Syrjälä Date: Fri, 20 Jan 2017 20:21:59 +0200 Subject: drm/i915: Track full cdclk state for the logical and actual cdclk frequencies MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The current dev_cdclk vs. cdclk vs. atomic_cdclk_freq is quite a mess. So here I'm introducing the "actual" and "logical" naming for our cdclk state. "actual" is what we'll bash into the hardware and "logical" is what everyone should use for state computaion/checking and whatnot. We'll track both using the intel_cdclk_state as both will need other differing parameters than just the actual cdclk frequency. While doing that we can at the same time unify the appearance of the .modeset_calc_cdclk() implementations a little bit. v2: Commit dev_priv->cdclk.actual since that already has the new state by the time .modeset_commit_cdclk() is called. v3: s/locical/logical/ and improve the docs a bit Signed-off-by: Ville Syrjälä Reviewed-by: Ander Conselvan de Oliveira Link: http://patchwork.freedesktop.org/patch/msgid/20170120182205.8141-9-ville.syrjala@linux.intel.com --- drivers/gpu/drm/i915/i915_drv.h | 20 ++++-- drivers/gpu/drm/i915/intel_cdclk.c | 123 ++++++++++++++++++++++------------- drivers/gpu/drm/i915/intel_display.c | 39 ++++++----- drivers/gpu/drm/i915/intel_dp.c | 2 +- drivers/gpu/drm/i915/intel_drv.h | 24 ++++--- drivers/gpu/drm/i915/intel_pm.c | 4 +- 6 files changed, 128 insertions(+), 84 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index aa59b2153374..323bf93c3e92 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -2172,18 +2172,26 @@ struct drm_i915_private { unsigned int skl_preferred_vco_freq; unsigned int max_cdclk_freq; - /* - * For reading holding any crtc lock is sufficient, - * for writing must hold all of them. - */ - unsigned int atomic_cdclk_freq; - unsigned int max_dotclk_freq; unsigned int rawclk_freq; unsigned int hpll_freq; unsigned int czclk_freq; struct { + /* + * The current logical cdclk state. + * See intel_atomic_state.cdclk.logical + * + * For reading holding any crtc lock is sufficient, + * for writing must hold all of them. + */ + struct intel_cdclk_state logical; + /* + * The current actual cdclk state. + * See intel_atomic_state.cdclk.actual + */ + struct intel_cdclk_state actual; + /* The current hardware cdclk state */ struct intel_cdclk_state hw; } cdclk; diff --git a/drivers/gpu/drm/i915/intel_cdclk.c b/drivers/gpu/drm/i915/intel_cdclk.c index 4e74e87a8676..6529a2687fdd 100644 --- a/drivers/gpu/drm/i915/intel_cdclk.c +++ b/drivers/gpu/drm/i915/intel_cdclk.c @@ -1460,12 +1460,26 @@ static int vlv_modeset_calc_cdclk(struct drm_atomic_state *state) int max_pixclk = intel_max_pixel_rate(state); struct intel_atomic_state *intel_state = to_intel_atomic_state(state); + int cdclk; + + cdclk = vlv_calc_cdclk(dev_priv, max_pixclk); + + if (cdclk > dev_priv->max_cdclk_freq) { + DRM_DEBUG_KMS("requested cdclk (%d kHz) exceeds max (%d kHz)\n", + cdclk, dev_priv->max_cdclk_freq); + return -EINVAL; + } - intel_state->cdclk = intel_state->dev_cdclk = - vlv_calc_cdclk(dev_priv, max_pixclk); + intel_state->cdclk.logical.cdclk = cdclk; - if (!intel_state->active_crtcs) - intel_state->dev_cdclk = vlv_calc_cdclk(dev_priv, 0); + if (!intel_state->active_crtcs) { + cdclk = vlv_calc_cdclk(dev_priv, 0); + + intel_state->cdclk.actual.cdclk = cdclk; + } else { + intel_state->cdclk.actual = + intel_state->cdclk.logical; + } return 0; } @@ -1474,9 +1488,7 @@ static void vlv_modeset_commit_cdclk(struct drm_atomic_state *old_state) { struct drm_device *dev = old_state->dev; struct drm_i915_private *dev_priv = to_i915(dev); - struct intel_atomic_state *old_intel_state = - to_intel_atomic_state(old_state); - unsigned int req_cdclk = old_intel_state->dev_cdclk; + unsigned int req_cdclk = dev_priv->cdclk.actual.cdclk; /* * FIXME: We can end up here with all power domains off, yet @@ -1518,9 +1530,16 @@ static int bdw_modeset_calc_cdclk(struct drm_atomic_state *state) return -EINVAL; } - intel_state->cdclk = intel_state->dev_cdclk = cdclk; - if (!intel_state->active_crtcs) - intel_state->dev_cdclk = bdw_calc_cdclk(0); + intel_state->cdclk.logical.cdclk = cdclk; + + if (!intel_state->active_crtcs) { + cdclk = bdw_calc_cdclk(0); + + intel_state->cdclk.actual.cdclk = cdclk; + } else { + intel_state->cdclk.actual = + intel_state->cdclk.logical; + } return 0; } @@ -1528,9 +1547,7 @@ static int bdw_modeset_calc_cdclk(struct drm_atomic_state *state) static void bdw_modeset_commit_cdclk(struct drm_atomic_state *old_state) { struct drm_device *dev = old_state->dev; - struct intel_atomic_state *old_intel_state = - to_intel_atomic_state(old_state); - unsigned int req_cdclk = old_intel_state->dev_cdclk; + unsigned int req_cdclk = to_i915(dev)->cdclk.actual.cdclk; bdw_set_cdclk(dev, req_cdclk); } @@ -1540,8 +1557,11 @@ static int skl_modeset_calc_cdclk(struct drm_atomic_state *state) struct intel_atomic_state *intel_state = to_intel_atomic_state(state); struct drm_i915_private *dev_priv = to_i915(state->dev); const int max_pixclk = intel_max_pixel_rate(state); - int vco = intel_state->cdclk_pll_vco; - int cdclk; + int cdclk, vco; + + vco = intel_state->cdclk.logical.vco; + if (!vco) + vco = dev_priv->skl_preferred_vco_freq; /* * FIXME should also account for plane ratio @@ -1549,19 +1569,24 @@ static int skl_modeset_calc_cdclk(struct drm_atomic_state *state) */ cdclk = skl_calc_cdclk(max_pixclk, vco); - /* - * FIXME move the cdclk caclulation to - * compute_config() so we can fail gracegully. - */ if (cdclk > dev_priv->max_cdclk_freq) { - DRM_ERROR("requested cdclk (%d kHz) exceeds max (%d kHz)\n", - cdclk, dev_priv->max_cdclk_freq); - cdclk = dev_priv->max_cdclk_freq; + DRM_DEBUG_KMS("requested cdclk (%d kHz) exceeds max (%d kHz)\n", + cdclk, dev_priv->max_cdclk_freq); + return -EINVAL; } - intel_state->cdclk = intel_state->dev_cdclk = cdclk; - if (!intel_state->active_crtcs) - intel_state->dev_cdclk = skl_calc_cdclk(0, vco); + intel_state->cdclk.logical.vco = vco; + intel_state->cdclk.logical.cdclk = cdclk; + + if (!intel_state->active_crtcs) { + cdclk = skl_calc_cdclk(0, vco); + + intel_state->cdclk.actual.vco = vco; + intel_state->cdclk.actual.cdclk = cdclk; + } else { + intel_state->cdclk.actual = + intel_state->cdclk.logical; + } return 0; } @@ -1569,10 +1594,8 @@ static int skl_modeset_calc_cdclk(struct drm_atomic_state *state) static void skl_modeset_commit_cdclk(struct drm_atomic_state *old_state) { struct drm_i915_private *dev_priv = to_i915(old_state->dev); - struct intel_atomic_state *intel_state = - to_intel_atomic_state(old_state); - unsigned int req_cdclk = intel_state->dev_cdclk; - unsigned int req_vco = intel_state->cdclk_pll_vco; + unsigned int req_cdclk = dev_priv->cdclk.actual.cdclk; + unsigned int req_vco = dev_priv->cdclk.actual.vco; skl_set_cdclk(dev_priv, req_cdclk, req_vco); } @@ -1583,22 +1606,39 @@ static int bxt_modeset_calc_cdclk(struct drm_atomic_state *state) int max_pixclk = intel_max_pixel_rate(state); struct intel_atomic_state *intel_state = to_intel_atomic_state(state); - int cdclk; + int cdclk, vco; - if (IS_GEMINILAKE(dev_priv)) + if (IS_GEMINILAKE(dev_priv)) { cdclk = glk_calc_cdclk(max_pixclk); - else + vco = glk_de_pll_vco(dev_priv, cdclk); + } else { cdclk = bxt_calc_cdclk(max_pixclk); + vco = bxt_de_pll_vco(dev_priv, cdclk); + } + + if (cdclk > dev_priv->max_cdclk_freq) { + DRM_DEBUG_KMS("requested cdclk (%d kHz) exceeds max (%d kHz)\n", + cdclk, dev_priv->max_cdclk_freq); + return -EINVAL; + } - intel_state->cdclk = intel_state->dev_cdclk = cdclk; + intel_state->cdclk.logical.vco = vco; + intel_state->cdclk.logical.cdclk = cdclk; if (!intel_state->active_crtcs) { - if (IS_GEMINILAKE(dev_priv)) + if (IS_GEMINILAKE(dev_priv)) { cdclk = glk_calc_cdclk(0); - else + vco = glk_de_pll_vco(dev_priv, cdclk); + } else { cdclk = bxt_calc_cdclk(0); + vco = bxt_de_pll_vco(dev_priv, cdclk); + } - intel_state->dev_cdclk = cdclk; + intel_state->cdclk.actual.vco = vco; + intel_state->cdclk.actual.cdclk = cdclk; + } else { + intel_state->cdclk.actual = + intel_state->cdclk.logical; } return 0; @@ -1607,15 +1647,8 @@ static int bxt_modeset_calc_cdclk(struct drm_atomic_state *state) static void bxt_modeset_commit_cdclk(struct drm_atomic_state *old_state) { struct drm_i915_private *dev_priv = to_i915(old_state->dev); - struct intel_atomic_state *old_intel_state = - to_intel_atomic_state(old_state); - unsigned int req_cdclk = old_intel_state->dev_cdclk; - unsigned int req_vco; - - if (IS_GEMINILAKE(dev_priv)) - req_vco = glk_de_pll_vco(dev_priv, req_cdclk); - else - req_vco = bxt_de_pll_vco(dev_priv, req_cdclk); + unsigned int req_cdclk = dev_priv->cdclk.actual.cdclk; + unsigned int req_vco = dev_priv->cdclk.actual.vco; bxt_set_cdclk(dev_priv, req_cdclk, req_vco); } diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c index 691db36d8388..c0ad2ddaed9b 100644 --- a/drivers/gpu/drm/i915/intel_display.c +++ b/drivers/gpu/drm/i915/intel_display.c @@ -12393,6 +12393,8 @@ static int intel_modeset_checks(struct drm_atomic_state *state) intel_state->modeset = true; intel_state->active_crtcs = dev_priv->active_crtcs; + intel_state->cdclk.logical = dev_priv->cdclk.logical; + intel_state->cdclk.actual = dev_priv->cdclk.actual; for_each_crtc_in_state(state, crtc, crtc_state, i) { if (crtc_state->active) @@ -12412,38 +12414,35 @@ static int intel_modeset_checks(struct drm_atomic_state *state) * adjusted_mode bits in the crtc directly. */ if (dev_priv->display.modeset_calc_cdclk) { - if (!intel_state->cdclk_pll_vco) - intel_state->cdclk_pll_vco = dev_priv->cdclk.hw.vco; - if (!intel_state->cdclk_pll_vco) - intel_state->cdclk_pll_vco = dev_priv->skl_preferred_vco_freq; - ret = dev_priv->display.modeset_calc_cdclk(state); if (ret < 0) return ret; /* - * Writes to dev_priv->atomic_cdclk_freq must protected by + * Writes to dev_priv->cdclk.logical must protected by * holding all the crtc locks, even if we don't end up * touching the hardware */ - if (intel_state->cdclk != dev_priv->atomic_cdclk_freq) { + if (!intel_cdclk_state_compare(&dev_priv->cdclk.logical, + &intel_state->cdclk.logical)) { ret = intel_lock_all_pipes(state); if (ret < 0) return ret; } /* All pipes must be switched off while we change the cdclk. */ - if (intel_state->dev_cdclk != dev_priv->cdclk.hw.cdclk || - intel_state->cdclk_pll_vco != dev_priv->cdclk.hw.vco) { + if (!intel_cdclk_state_compare(&dev_priv->cdclk.actual, + &intel_state->cdclk.actual)) { ret = intel_modeset_all_pipes(state); if (ret < 0) return ret; } - DRM_DEBUG_KMS("New cdclk calculated to be atomic %u, actual %u\n", - intel_state->cdclk, intel_state->dev_cdclk); + DRM_DEBUG_KMS("New cdclk calculated to be logical %u kHz, actual %u kHz\n", + intel_state->cdclk.logical.cdclk, + intel_state->cdclk.actual.cdclk); } else { - to_intel_atomic_state(state)->cdclk = dev_priv->atomic_cdclk_freq; + to_intel_atomic_state(state)->cdclk.logical = dev_priv->cdclk.logical; } intel_modeset_clear_plls(state); @@ -12546,7 +12545,7 @@ static int intel_atomic_check(struct drm_device *dev, if (ret) return ret; } else { - intel_state->cdclk = dev_priv->atomic_cdclk_freq; + intel_state->cdclk.logical = dev_priv->cdclk.logical; } ret = drm_atomic_helper_check_planes(dev, state); @@ -12869,8 +12868,8 @@ static void intel_atomic_commit_tail(struct drm_atomic_state *state) drm_atomic_helper_update_legacy_modeset_state(state->dev, state); if (dev_priv->display.modeset_commit_cdclk && - (intel_state->dev_cdclk != dev_priv->cdclk.hw.cdclk || - intel_state->cdclk_pll_vco != dev_priv->cdclk.hw.vco)) + !intel_cdclk_state_compare(&dev_priv->cdclk.hw, + &dev_priv->cdclk.actual)) dev_priv->display.modeset_commit_cdclk(state); /* @@ -13059,7 +13058,8 @@ static int intel_atomic_commit(struct drm_device *dev, memcpy(dev_priv->min_pixclk, intel_state->min_pixclk, sizeof(intel_state->min_pixclk)); dev_priv->active_crtcs = intel_state->active_crtcs; - dev_priv->atomic_cdclk_freq = intel_state->cdclk; + dev_priv->cdclk.logical = intel_state->cdclk.logical; + dev_priv->cdclk.actual = intel_state->cdclk.actual; } drm_atomic_state_get(state); @@ -13297,7 +13297,7 @@ skl_max_scale(struct intel_crtc *intel_crtc, struct intel_crtc_state *crtc_state return DRM_PLANE_HELPER_NO_SCALING; crtc_clock = crtc_state->base.adjusted_mode.crtc_clock; - cdclk = to_intel_atomic_state(crtc_state->base.state)->cdclk; + cdclk = to_intel_atomic_state(crtc_state->base.state)->cdclk.logical.cdclk; if (WARN_ON_ONCE(!crtc_clock || cdclk < crtc_clock)) return DRM_PLANE_HELPER_NO_SCALING; @@ -14854,8 +14854,7 @@ void intel_modeset_init_hw(struct drm_device *dev) struct drm_i915_private *dev_priv = to_i915(dev); intel_update_cdclk(dev_priv); - - dev_priv->atomic_cdclk_freq = dev_priv->cdclk.hw.cdclk; + dev_priv->cdclk.logical = dev_priv->cdclk.actual = dev_priv->cdclk.hw; intel_init_clock_gating(dev_priv); } @@ -15031,7 +15030,7 @@ int intel_modeset_init(struct drm_device *dev) intel_update_czclk(dev_priv); intel_update_cdclk(dev_priv); - dev_priv->atomic_cdclk_freq = dev_priv->cdclk.hw.cdclk; + dev_priv->cdclk.logical = dev_priv->cdclk.actual = dev_priv->cdclk.hw; intel_shared_dpll_init(dev); diff --git a/drivers/gpu/drm/i915/intel_dp.c b/drivers/gpu/drm/i915/intel_dp.c index f8c23fed4a2c..0f14e97e519b 100644 --- a/drivers/gpu/drm/i915/intel_dp.c +++ b/drivers/gpu/drm/i915/intel_dp.c @@ -1785,7 +1785,7 @@ found: break; } - to_intel_atomic_state(pipe_config->base.state)->cdclk_pll_vco = vco; + to_intel_atomic_state(pipe_config->base.state)->cdclk.logical.vco = vco; } if (!HAS_DDI(dev_priv)) diff --git a/drivers/gpu/drm/i915/intel_drv.h b/drivers/gpu/drm/i915/intel_drv.h index ba0728ccff75..97d848197ff3 100644 --- a/drivers/gpu/drm/i915/intel_drv.h +++ b/drivers/gpu/drm/i915/intel_drv.h @@ -333,13 +333,20 @@ struct dpll { struct intel_atomic_state { struct drm_atomic_state base; - unsigned int cdclk; - - /* - * Calculated device cdclk, can be different from cdclk - * only when all crtc's are DPMS off. - */ - unsigned int dev_cdclk; + struct { + /* + * Logical state of cdclk (used for all scaling, watermark, + * etc. calculations and checks). This is computed as if all + * enabled crtcs were active. + */ + struct intel_cdclk_state logical; + + /* + * Actual state of cdclk, can be different from the logical + * state only when all crtc's are DPMS off. + */ + struct intel_cdclk_state actual; + } cdclk; bool dpll_set, modeset; @@ -356,9 +363,6 @@ struct intel_atomic_state { unsigned int active_crtcs; unsigned int min_pixclk[I915_MAX_PIPES]; - /* SKL/KBL Only */ - unsigned int cdclk_pll_vco; - struct intel_shared_dpll_state shared_dpll[I915_NUM_PLLS]; /* diff --git a/drivers/gpu/drm/i915/intel_pm.c b/drivers/gpu/drm/i915/intel_pm.c index 79cbe5736ed1..b1d07e63ff8b 100644 --- a/drivers/gpu/drm/i915/intel_pm.c +++ b/drivers/gpu/drm/i915/intel_pm.c @@ -2108,7 +2108,7 @@ hsw_compute_linetime_wm(const struct intel_crtc_state *cstate) return 0; if (WARN_ON(adjusted_mode->crtc_clock == 0)) return 0; - if (WARN_ON(intel_state->cdclk == 0)) + if (WARN_ON(intel_state->cdclk.logical.cdclk == 0)) return 0; /* The WM are computed with base on how long it takes to fill a single @@ -2117,7 +2117,7 @@ hsw_compute_linetime_wm(const struct intel_crtc_state *cstate) linetime = DIV_ROUND_CLOSEST(adjusted_mode->crtc_htotal * 1000 * 8, adjusted_mode->crtc_clock); ips_linetime = DIV_ROUND_CLOSEST(adjusted_mode->crtc_htotal * 1000 * 8, - intel_state->cdclk); + intel_state->cdclk.logical.cdclk); return PIPE_WM_LINETIME_IPS_LINETIME(ips_linetime) | PIPE_WM_LINETIME_TIME(linetime); -- cgit From 3d5dbb10f34ad0521bfb7091bd5b47f6c984b9aa Mon Sep 17 00:00:00 2001 From: Ville Syrjälä Date: Fri, 20 Jan 2017 20:22:00 +0200 Subject: drm/i915: Pass dev_priv to remainder of the cdclk functions MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Clean up the dev vs. dev_priv straggles that are making things look inconsistentt. v2: Deal with churn Signed-off-by: Ville Syrjälä Reviewed-by: Ander Conselvan de Oliveira Link: http://patchwork.freedesktop.org/patch/msgid/20170120182205.8141-10-ville.syrjala@linux.intel.com --- drivers/gpu/drm/i915/intel_cdclk.c | 25 ++++++++++--------------- 1 file changed, 10 insertions(+), 15 deletions(-) diff --git a/drivers/gpu/drm/i915/intel_cdclk.c b/drivers/gpu/drm/i915/intel_cdclk.c index 6529a2687fdd..9d1e2a4859ab 100644 --- a/drivers/gpu/drm/i915/intel_cdclk.c +++ b/drivers/gpu/drm/i915/intel_cdclk.c @@ -485,9 +485,8 @@ static void vlv_program_pfi_credits(struct drm_i915_private *dev_priv) WARN_ON(I915_READ(GCI_CONTROL) & PFI_CREDIT_RESEND); } -static void vlv_set_cdclk(struct drm_device *dev, int cdclk) +static void vlv_set_cdclk(struct drm_i915_private *dev_priv, int cdclk) { - struct drm_i915_private *dev_priv = to_i915(dev); u32 val, cmd; if (cdclk >= 320000) /* jump to highest voltage for 400MHz too */ @@ -548,9 +547,8 @@ static void vlv_set_cdclk(struct drm_device *dev, int cdclk) intel_update_cdclk(dev_priv); } -static void chv_set_cdclk(struct drm_device *dev, int cdclk) +static void chv_set_cdclk(struct drm_i915_private *dev_priv, int cdclk) { - struct drm_i915_private *dev_priv = to_i915(dev); u32 val, cmd; switch (cdclk) { @@ -618,9 +616,8 @@ static void bdw_get_cdclk(struct drm_i915_private *dev_priv, cdclk_state->cdclk = 675000; } -static void bdw_set_cdclk(struct drm_device *dev, int cdclk) +static void bdw_set_cdclk(struct drm_i915_private *dev_priv, int cdclk) { - struct drm_i915_private *dev_priv = to_i915(dev); uint32_t val, data; int ret; @@ -1455,8 +1452,7 @@ static int intel_max_pixel_rate(struct drm_atomic_state *state) static int vlv_modeset_calc_cdclk(struct drm_atomic_state *state) { - struct drm_device *dev = state->dev; - struct drm_i915_private *dev_priv = to_i915(dev); + struct drm_i915_private *dev_priv = to_i915(state->dev); int max_pixclk = intel_max_pixel_rate(state); struct intel_atomic_state *intel_state = to_intel_atomic_state(state); @@ -1486,8 +1482,7 @@ static int vlv_modeset_calc_cdclk(struct drm_atomic_state *state) static void vlv_modeset_commit_cdclk(struct drm_atomic_state *old_state) { - struct drm_device *dev = old_state->dev; - struct drm_i915_private *dev_priv = to_i915(dev); + struct drm_i915_private *dev_priv = to_i915(old_state->dev); unsigned int req_cdclk = dev_priv->cdclk.actual.cdclk; /* @@ -1502,9 +1497,9 @@ static void vlv_modeset_commit_cdclk(struct drm_atomic_state *old_state) intel_display_power_get(dev_priv, POWER_DOMAIN_PIPE_A); if (IS_CHERRYVIEW(dev_priv)) - chv_set_cdclk(dev, req_cdclk); + chv_set_cdclk(dev_priv, req_cdclk); else - vlv_set_cdclk(dev, req_cdclk); + vlv_set_cdclk(dev_priv, req_cdclk); vlv_program_pfi_credits(dev_priv); @@ -1546,10 +1541,10 @@ static int bdw_modeset_calc_cdclk(struct drm_atomic_state *state) static void bdw_modeset_commit_cdclk(struct drm_atomic_state *old_state) { - struct drm_device *dev = old_state->dev; - unsigned int req_cdclk = to_i915(dev)->cdclk.actual.cdclk; + struct drm_i915_private *dev_priv = to_i915(old_state->dev); + unsigned int req_cdclk = dev_priv->cdclk.actual.cdclk; - bdw_set_cdclk(dev, req_cdclk); + bdw_set_cdclk(dev_priv, req_cdclk); } static int skl_modeset_calc_cdclk(struct drm_atomic_state *state) -- cgit From 83c5fda74f98307a19189414889363341ede8067 Mon Sep 17 00:00:00 2001 From: Ville Syrjälä Date: Fri, 20 Jan 2017 20:22:01 +0200 Subject: drm/i915: Pass the cdclk state to the set_cdclk() functions MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Rather than passing all the different parameters (cdclk,vco so far) sparately to the set_cdclk() functions, just pass the entire cdclk state. v2: Deal with churn v3: Drop the usless .ref assignment (Ander) Signed-off-by: Ville Syrjälä Reviewed-by: Ander Conselvan de Oliveira Link: http://patchwork.freedesktop.org/patch/msgid/20170120182205.8141-11-ville.syrjala@linux.intel.com --- drivers/gpu/drm/i915/intel_cdclk.c | 78 +++++++++++++++++++++++--------------- 1 file changed, 48 insertions(+), 30 deletions(-) diff --git a/drivers/gpu/drm/i915/intel_cdclk.c b/drivers/gpu/drm/i915/intel_cdclk.c index 9d1e2a4859ab..d0dc6f94277c 100644 --- a/drivers/gpu/drm/i915/intel_cdclk.c +++ b/drivers/gpu/drm/i915/intel_cdclk.c @@ -485,8 +485,10 @@ static void vlv_program_pfi_credits(struct drm_i915_private *dev_priv) WARN_ON(I915_READ(GCI_CONTROL) & PFI_CREDIT_RESEND); } -static void vlv_set_cdclk(struct drm_i915_private *dev_priv, int cdclk) +static void vlv_set_cdclk(struct drm_i915_private *dev_priv, + const struct intel_cdclk_state *cdclk_state) { + int cdclk = cdclk_state->cdclk; u32 val, cmd; if (cdclk >= 320000) /* jump to highest voltage for 400MHz too */ @@ -547,8 +549,10 @@ static void vlv_set_cdclk(struct drm_i915_private *dev_priv, int cdclk) intel_update_cdclk(dev_priv); } -static void chv_set_cdclk(struct drm_i915_private *dev_priv, int cdclk) +static void chv_set_cdclk(struct drm_i915_private *dev_priv, + const struct intel_cdclk_state *cdclk_state) { + int cdclk = cdclk_state->cdclk; u32 val, cmd; switch (cdclk) { @@ -616,8 +620,10 @@ static void bdw_get_cdclk(struct drm_i915_private *dev_priv, cdclk_state->cdclk = 675000; } -static void bdw_set_cdclk(struct drm_i915_private *dev_priv, int cdclk) +static void bdw_set_cdclk(struct drm_i915_private *dev_priv, + const struct intel_cdclk_state *cdclk_state) { + int cdclk = cdclk_state->cdclk; uint32_t val, data; int ret; @@ -888,8 +894,10 @@ static void skl_dpll0_disable(struct drm_i915_private *dev_priv) } static void skl_set_cdclk(struct drm_i915_private *dev_priv, - int cdclk, int vco) + const struct intel_cdclk_state *cdclk_state) { + int cdclk = cdclk_state->cdclk; + int vco = cdclk_state->vco; u32 freq_select, pcu_ack; int ret; @@ -1003,7 +1011,7 @@ sanitize: */ void skl_init_cdclk(struct drm_i915_private *dev_priv) { - int cdclk, vco; + struct intel_cdclk_state cdclk_state; skl_sanitize_cdclk(dev_priv); @@ -1019,12 +1027,14 @@ void skl_init_cdclk(struct drm_i915_private *dev_priv) return; } - vco = dev_priv->skl_preferred_vco_freq; - if (vco == 0) - vco = 8100000; - cdclk = skl_calc_cdclk(0, vco); + cdclk_state = dev_priv->cdclk.hw; + + cdclk_state.vco = dev_priv->skl_preferred_vco_freq; + if (cdclk_state.vco == 0) + cdclk_state.vco = 8100000; + cdclk_state.cdclk = skl_calc_cdclk(0, cdclk_state.vco); - skl_set_cdclk(dev_priv, cdclk, vco); + skl_set_cdclk(dev_priv, &cdclk_state); } /** @@ -1036,7 +1046,12 @@ void skl_init_cdclk(struct drm_i915_private *dev_priv) */ void skl_uninit_cdclk(struct drm_i915_private *dev_priv) { - skl_set_cdclk(dev_priv, dev_priv->cdclk.hw.ref, 0); + struct intel_cdclk_state cdclk_state = dev_priv->cdclk.hw; + + cdclk_state.cdclk = cdclk_state.ref; + cdclk_state.vco = 0; + + skl_set_cdclk(dev_priv, &cdclk_state); } static int bxt_calc_cdclk(int max_pixclk) @@ -1200,8 +1215,10 @@ static void bxt_de_pll_enable(struct drm_i915_private *dev_priv, int vco) } static void bxt_set_cdclk(struct drm_i915_private *dev_priv, - int cdclk, int vco) + const struct intel_cdclk_state *cdclk_state) { + int cdclk = cdclk_state->cdclk; + int vco = cdclk_state->vco; u32 val, divider; int ret; @@ -1336,7 +1353,7 @@ sanitize: */ void bxt_init_cdclk(struct drm_i915_private *dev_priv) { - int cdclk, vco; + struct intel_cdclk_state cdclk_state; bxt_sanitize_cdclk(dev_priv); @@ -1344,20 +1361,22 @@ void bxt_init_cdclk(struct drm_i915_private *dev_priv) dev_priv->cdclk.hw.vco != 0) return; + cdclk_state = dev_priv->cdclk.hw; + /* * FIXME: * - The initial CDCLK needs to be read from VBT. * Need to make this change after VBT has changes for BXT. */ if (IS_GEMINILAKE(dev_priv)) { - cdclk = glk_calc_cdclk(0); - vco = glk_de_pll_vco(dev_priv, cdclk); + cdclk_state.cdclk = glk_calc_cdclk(0); + cdclk_state.vco = glk_de_pll_vco(dev_priv, cdclk_state.cdclk); } else { - cdclk = bxt_calc_cdclk(0); - vco = bxt_de_pll_vco(dev_priv, cdclk); + cdclk_state.cdclk = bxt_calc_cdclk(0); + cdclk_state.vco = bxt_de_pll_vco(dev_priv, cdclk_state.cdclk); } - bxt_set_cdclk(dev_priv, cdclk, vco); + bxt_set_cdclk(dev_priv, &cdclk_state); } /** @@ -1369,7 +1388,12 @@ void bxt_init_cdclk(struct drm_i915_private *dev_priv) */ void bxt_uninit_cdclk(struct drm_i915_private *dev_priv) { - bxt_set_cdclk(dev_priv, dev_priv->cdclk.hw.ref, 0); + struct intel_cdclk_state cdclk_state = dev_priv->cdclk.hw; + + cdclk_state.cdclk = cdclk_state.ref; + cdclk_state.vco = 0; + + bxt_set_cdclk(dev_priv, &cdclk_state); } /** @@ -1483,7 +1507,6 @@ static int vlv_modeset_calc_cdclk(struct drm_atomic_state *state) static void vlv_modeset_commit_cdclk(struct drm_atomic_state *old_state) { struct drm_i915_private *dev_priv = to_i915(old_state->dev); - unsigned int req_cdclk = dev_priv->cdclk.actual.cdclk; /* * FIXME: We can end up here with all power domains off, yet @@ -1497,9 +1520,9 @@ static void vlv_modeset_commit_cdclk(struct drm_atomic_state *old_state) intel_display_power_get(dev_priv, POWER_DOMAIN_PIPE_A); if (IS_CHERRYVIEW(dev_priv)) - chv_set_cdclk(dev_priv, req_cdclk); + chv_set_cdclk(dev_priv, &dev_priv->cdclk.actual); else - vlv_set_cdclk(dev_priv, req_cdclk); + vlv_set_cdclk(dev_priv, &dev_priv->cdclk.actual); vlv_program_pfi_credits(dev_priv); @@ -1542,9 +1565,8 @@ static int bdw_modeset_calc_cdclk(struct drm_atomic_state *state) static void bdw_modeset_commit_cdclk(struct drm_atomic_state *old_state) { struct drm_i915_private *dev_priv = to_i915(old_state->dev); - unsigned int req_cdclk = dev_priv->cdclk.actual.cdclk; - bdw_set_cdclk(dev_priv, req_cdclk); + bdw_set_cdclk(dev_priv, &dev_priv->cdclk.actual); } static int skl_modeset_calc_cdclk(struct drm_atomic_state *state) @@ -1589,10 +1611,8 @@ static int skl_modeset_calc_cdclk(struct drm_atomic_state *state) static void skl_modeset_commit_cdclk(struct drm_atomic_state *old_state) { struct drm_i915_private *dev_priv = to_i915(old_state->dev); - unsigned int req_cdclk = dev_priv->cdclk.actual.cdclk; - unsigned int req_vco = dev_priv->cdclk.actual.vco; - skl_set_cdclk(dev_priv, req_cdclk, req_vco); + skl_set_cdclk(dev_priv, &dev_priv->cdclk.actual); } static int bxt_modeset_calc_cdclk(struct drm_atomic_state *state) @@ -1642,10 +1662,8 @@ static int bxt_modeset_calc_cdclk(struct drm_atomic_state *state) static void bxt_modeset_commit_cdclk(struct drm_atomic_state *old_state) { struct drm_i915_private *dev_priv = to_i915(old_state->dev); - unsigned int req_cdclk = dev_priv->cdclk.actual.cdclk; - unsigned int req_vco = dev_priv->cdclk.actual.vco; - bxt_set_cdclk(dev_priv, req_cdclk, req_vco); + bxt_set_cdclk(dev_priv, &dev_priv->cdclk.actual); } static int intel_compute_max_dotclk(struct drm_i915_private *dev_priv) -- cgit From 1a5301a58e5fca15598a1b3e79706b522d763574 Mon Sep 17 00:00:00 2001 From: Ville Syrjälä Date: Thu, 26 Jan 2017 21:57:19 +0200 Subject: drm/i915: Move PFI credit reprogramming into vlv/chv_set_cdclk() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Move the vlv_program_pfi_credits() into vlv_set_cdclk() and chv_set_cdclk() so that we can neuter vlv_modeset_commit_cdclk(). v2: Do the PFI programming after cdclk readout since it currently depends on the readout to fill dev_priv->cdclk.hw Signed-off-by: Ville Syrjälä Reviewed-by: Ander Conselvan de Oliveira Link: http://patchwork.freedesktop.org/patch/msgid/20170126195719.309-1-ville.syrjala@linux.intel.com --- drivers/gpu/drm/i915/intel_cdclk.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/i915/intel_cdclk.c b/drivers/gpu/drm/i915/intel_cdclk.c index d0dc6f94277c..305c07820cae 100644 --- a/drivers/gpu/drm/i915/intel_cdclk.c +++ b/drivers/gpu/drm/i915/intel_cdclk.c @@ -547,6 +547,8 @@ static void vlv_set_cdclk(struct drm_i915_private *dev_priv, mutex_unlock(&dev_priv->sb_lock); intel_update_cdclk(dev_priv); + + vlv_program_pfi_credits(dev_priv); } static void chv_set_cdclk(struct drm_i915_private *dev_priv, @@ -586,6 +588,8 @@ static void chv_set_cdclk(struct drm_i915_private *dev_priv, mutex_unlock(&dev_priv->rps.hw_lock); intel_update_cdclk(dev_priv); + + vlv_program_pfi_credits(dev_priv); } static int bdw_calc_cdclk(int max_pixclk) @@ -1524,7 +1528,6 @@ static void vlv_modeset_commit_cdclk(struct drm_atomic_state *old_state) else vlv_set_cdclk(dev_priv, &dev_priv->cdclk.actual); - vlv_program_pfi_credits(dev_priv); intel_display_power_put(dev_priv, POWER_DOMAIN_PIPE_A); } -- cgit From 63ff30442519bb40307f940c5a89a95ca904723a Mon Sep 17 00:00:00 2001 From: Ville Syrjälä Date: Fri, 20 Jan 2017 20:22:03 +0200 Subject: drm/i915: Nuke the VLV/CHV PFI programming power domain workaround MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The hack to grab the pipe A power domain around VLV/CHV cdclk programming has surely outlived its usefulness. We should be holding sufficient power domains during any modeset, so let's just nuke this hack. v2: Fix typo in commit message (Ander) Signed-off-by: Ville Syrjälä Reviewed-by: Ander Conselvan de Oliveira Link: http://patchwork.freedesktop.org/patch/msgid/20170120182205.8141-13-ville.syrjala@linux.intel.com --- drivers/gpu/drm/i915/intel_cdclk.c | 14 -------------- 1 file changed, 14 deletions(-) diff --git a/drivers/gpu/drm/i915/intel_cdclk.c b/drivers/gpu/drm/i915/intel_cdclk.c index 305c07820cae..1d4799124e39 100644 --- a/drivers/gpu/drm/i915/intel_cdclk.c +++ b/drivers/gpu/drm/i915/intel_cdclk.c @@ -1512,24 +1512,10 @@ static void vlv_modeset_commit_cdclk(struct drm_atomic_state *old_state) { struct drm_i915_private *dev_priv = to_i915(old_state->dev); - /* - * FIXME: We can end up here with all power domains off, yet - * with a CDCLK frequency other than the minimum. To account - * for this take the PIPE-A power domain, which covers the HW - * blocks needed for the following programming. This can be - * removed once it's guaranteed that we get here either with - * the minimum CDCLK set, or the required power domains - * enabled. - */ - intel_display_power_get(dev_priv, POWER_DOMAIN_PIPE_A); - if (IS_CHERRYVIEW(dev_priv)) chv_set_cdclk(dev_priv, &dev_priv->cdclk.actual); else vlv_set_cdclk(dev_priv, &dev_priv->cdclk.actual); - - - intel_display_power_put(dev_priv, POWER_DOMAIN_PIPE_A); } static int bdw_modeset_calc_cdclk(struct drm_atomic_state *state) -- cgit From b0587e4d20dcf8af3e350b010b4d2a21f95f2702 Mon Sep 17 00:00:00 2001 From: Ville Syrjälä Date: Thu, 26 Jan 2017 21:52:01 +0200 Subject: drm/i915: Replace the .modeset_commit_cdclk() hook with a more direct .set_cdclk() hook MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit With the cdclk state, all the .modeset_commit_cdclk() hooks are now pointless wrappers. Let's replace them with just a .set_cdclk() function pointer. However let's wrap that in a small helper that does the state comparison and prints a unified debug message across all platforms. We didn't even have the debug print on all platforms previously. This reduces the clutter in intel_atomic_commit_tail() a little bit. v2: Wrap .set_cdclk() in intel_set_cdclk() v3: Add kernel-docs v4: Deal with IS_GEN9_BC() Signed-off-by: Ville Syrjälä Reviewed-by: Ander Conselvan de Oliveira Link: http://patchwork.freedesktop.org/patch/msgid/20170126195201.32638-1-ville.syrjala@linux.intel.com --- drivers/gpu/drm/i915/i915_drv.h | 3 +- drivers/gpu/drm/i915/intel_cdclk.c | 79 +++++++++++++++--------------------- drivers/gpu/drm/i915/intel_display.c | 5 +-- drivers/gpu/drm/i915/intel_drv.h | 2 + 4 files changed, 38 insertions(+), 51 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index 323bf93c3e92..7fb362f1fba5 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -605,6 +605,8 @@ struct intel_cdclk_state; struct drm_i915_display_funcs { void (*get_cdclk)(struct drm_i915_private *dev_priv, struct intel_cdclk_state *cdclk_state); + void (*set_cdclk)(struct drm_i915_private *dev_priv, + const struct intel_cdclk_state *cdclk_state); int (*get_fifo_size)(struct drm_i915_private *dev_priv, int plane); int (*compute_pipe_wm)(struct intel_crtc_state *cstate); int (*compute_intermediate_wm)(struct drm_device *dev, @@ -619,7 +621,6 @@ struct drm_i915_display_funcs { int (*compute_global_watermarks)(struct drm_atomic_state *state); void (*update_wm)(struct intel_crtc *crtc); int (*modeset_calc_cdclk)(struct drm_atomic_state *state); - void (*modeset_commit_cdclk)(struct drm_atomic_state *state); /* Returns the active state of the crtc, and if the crtc is active, * fills out the pipe-config with the hw state. */ bool (*get_pipe_config)(struct intel_crtc *, diff --git a/drivers/gpu/drm/i915/intel_cdclk.c b/drivers/gpu/drm/i915/intel_cdclk.c index 1d4799124e39..d643c0c5321b 100644 --- a/drivers/gpu/drm/i915/intel_cdclk.c +++ b/drivers/gpu/drm/i915/intel_cdclk.c @@ -907,9 +907,6 @@ static void skl_set_cdclk(struct drm_i915_private *dev_priv, WARN_ON((cdclk == 24000) != (vco == 0)); - DRM_DEBUG_DRIVER("Changing CDCLK to %d kHz (VCO %d kHz)\n", - cdclk, vco); - mutex_lock(&dev_priv->rps.hw_lock); ret = skl_pcode_request(dev_priv, SKL_PCODE_CDCLK_CONTROL, SKL_CDCLK_PREPARE_FOR_CHANGE, @@ -1226,9 +1223,6 @@ static void bxt_set_cdclk(struct drm_i915_private *dev_priv, u32 val, divider; int ret; - DRM_DEBUG_DRIVER("Changing CDCLK to %d kHz (VCO %d kHz)\n", - cdclk, vco); - /* cdclk = vco / 2 / div{1,1.5,2,4} */ switch (DIV_ROUND_CLOSEST(vco, cdclk)) { case 8: @@ -1414,6 +1408,30 @@ bool intel_cdclk_state_compare(const struct intel_cdclk_state *a, return memcmp(a, b, sizeof(*a)) == 0; } +/** + * intel_set_cdclk - Push the CDCLK state to the hardware + * @dev_priv: i915 device + * @cdclk_state: new CDCLK state + * + * Program the hardware based on the passed in CDCLK state, + * if necessary. + */ +void intel_set_cdclk(struct drm_i915_private *dev_priv, + const struct intel_cdclk_state *cdclk_state) +{ + if (intel_cdclk_state_compare(&dev_priv->cdclk.hw, cdclk_state)) + return; + + if (WARN_ON_ONCE(!dev_priv->display.set_cdclk)) + return; + + DRM_DEBUG_DRIVER("Changing CDCLK to %d kHz, VCO %d kHz, ref %d kHz\n", + cdclk_state->cdclk, cdclk_state->vco, + cdclk_state->ref); + + dev_priv->display.set_cdclk(dev_priv, cdclk_state); +} + static int bdw_adjust_min_pipe_pixel_rate(struct intel_crtc_state *crtc_state, int pixel_rate) { @@ -1508,16 +1526,6 @@ static int vlv_modeset_calc_cdclk(struct drm_atomic_state *state) return 0; } -static void vlv_modeset_commit_cdclk(struct drm_atomic_state *old_state) -{ - struct drm_i915_private *dev_priv = to_i915(old_state->dev); - - if (IS_CHERRYVIEW(dev_priv)) - chv_set_cdclk(dev_priv, &dev_priv->cdclk.actual); - else - vlv_set_cdclk(dev_priv, &dev_priv->cdclk.actual); -} - static int bdw_modeset_calc_cdclk(struct drm_atomic_state *state) { struct drm_i915_private *dev_priv = to_i915(state->dev); @@ -1551,13 +1559,6 @@ static int bdw_modeset_calc_cdclk(struct drm_atomic_state *state) return 0; } -static void bdw_modeset_commit_cdclk(struct drm_atomic_state *old_state) -{ - struct drm_i915_private *dev_priv = to_i915(old_state->dev); - - bdw_set_cdclk(dev_priv, &dev_priv->cdclk.actual); -} - static int skl_modeset_calc_cdclk(struct drm_atomic_state *state) { struct intel_atomic_state *intel_state = to_intel_atomic_state(state); @@ -1597,13 +1598,6 @@ static int skl_modeset_calc_cdclk(struct drm_atomic_state *state) return 0; } -static void skl_modeset_commit_cdclk(struct drm_atomic_state *old_state) -{ - struct drm_i915_private *dev_priv = to_i915(old_state->dev); - - skl_set_cdclk(dev_priv, &dev_priv->cdclk.actual); -} - static int bxt_modeset_calc_cdclk(struct drm_atomic_state *state) { struct drm_i915_private *dev_priv = to_i915(state->dev); @@ -1648,13 +1642,6 @@ static int bxt_modeset_calc_cdclk(struct drm_atomic_state *state) return 0; } -static void bxt_modeset_commit_cdclk(struct drm_atomic_state *old_state) -{ - struct drm_i915_private *dev_priv = to_i915(old_state->dev); - - bxt_set_cdclk(dev_priv, &dev_priv->cdclk.actual); -} - static int intel_compute_max_dotclk(struct drm_i915_private *dev_priv) { int max_cdclk_freq = dev_priv->max_cdclk_freq; @@ -1834,24 +1821,24 @@ void intel_update_rawclk(struct drm_i915_private *dev_priv) */ void intel_init_cdclk_hooks(struct drm_i915_private *dev_priv) { - if (IS_VALLEYVIEW(dev_priv) || IS_CHERRYVIEW(dev_priv)) { - dev_priv->display.modeset_commit_cdclk = - vlv_modeset_commit_cdclk; + if (IS_CHERRYVIEW(dev_priv)) { + dev_priv->display.set_cdclk = chv_set_cdclk; + dev_priv->display.modeset_calc_cdclk = + vlv_modeset_calc_cdclk; + } else if (IS_VALLEYVIEW(dev_priv)) { + dev_priv->display.set_cdclk = vlv_set_cdclk; dev_priv->display.modeset_calc_cdclk = vlv_modeset_calc_cdclk; } else if (IS_BROADWELL(dev_priv)) { - dev_priv->display.modeset_commit_cdclk = - bdw_modeset_commit_cdclk; + dev_priv->display.set_cdclk = bdw_set_cdclk; dev_priv->display.modeset_calc_cdclk = bdw_modeset_calc_cdclk; } else if (IS_GEN9_LP(dev_priv)) { - dev_priv->display.modeset_commit_cdclk = - bxt_modeset_commit_cdclk; + dev_priv->display.set_cdclk = bxt_set_cdclk; dev_priv->display.modeset_calc_cdclk = bxt_modeset_calc_cdclk; } else if (IS_GEN9_BC(dev_priv)) { - dev_priv->display.modeset_commit_cdclk = - skl_modeset_commit_cdclk; + dev_priv->display.set_cdclk = skl_set_cdclk; dev_priv->display.modeset_calc_cdclk = skl_modeset_calc_cdclk; } diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c index c0ad2ddaed9b..ff764878dd6b 100644 --- a/drivers/gpu/drm/i915/intel_display.c +++ b/drivers/gpu/drm/i915/intel_display.c @@ -12867,10 +12867,7 @@ static void intel_atomic_commit_tail(struct drm_atomic_state *state) if (intel_state->modeset) { drm_atomic_helper_update_legacy_modeset_state(state->dev, state); - if (dev_priv->display.modeset_commit_cdclk && - !intel_cdclk_state_compare(&dev_priv->cdclk.hw, - &dev_priv->cdclk.actual)) - dev_priv->display.modeset_commit_cdclk(state); + intel_set_cdclk(dev_priv, &dev_priv->cdclk.actual); /* * SKL workaround: bspec recommends we disable the SAGV when we diff --git a/drivers/gpu/drm/i915/intel_drv.h b/drivers/gpu/drm/i915/intel_drv.h index 97d848197ff3..41fc4274afa4 100644 --- a/drivers/gpu/drm/i915/intel_drv.h +++ b/drivers/gpu/drm/i915/intel_drv.h @@ -1257,6 +1257,8 @@ void intel_update_cdclk(struct drm_i915_private *dev_priv); void intel_update_rawclk(struct drm_i915_private *dev_priv); bool intel_cdclk_state_compare(const struct intel_cdclk_state *a, const struct intel_cdclk_state *b); +void intel_set_cdclk(struct drm_i915_private *dev_priv, + const struct intel_cdclk_state *cdclk_state); /* intel_display.c */ enum transcoder intel_crtc_pch_transcoder(struct intel_crtc *crtc); -- cgit From ceb993201cf270a178671611a15a3b2037434ef1 Mon Sep 17 00:00:00 2001 From: Ville Syrjälä Date: Fri, 20 Jan 2017 20:22:05 +0200 Subject: drm/i915: Move ilk_pipe_pixel_rate() to intel_display.c MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Move ilk_pipe_pixel_rate() next to its only caller (intel_crtc_compute_pixel_rate()). Signed-off-by: Ville Syrjälä Reviewed-by: Ander Conselvan de Oliveira Link: http://patchwork.freedesktop.org/patch/msgid/20170120182205.8141-15-ville.syrjala@linux.intel.com --- drivers/gpu/drm/i915/intel_display.c | 35 +++++++++++++++++++++++++++++++++++ drivers/gpu/drm/i915/intel_drv.h | 1 - drivers/gpu/drm/i915/intel_pm.c | 33 --------------------------------- 3 files changed, 35 insertions(+), 34 deletions(-) diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c index ff764878dd6b..f6259c949da2 100644 --- a/drivers/gpu/drm/i915/intel_display.c +++ b/drivers/gpu/drm/i915/intel_display.c @@ -6235,6 +6235,41 @@ static bool intel_crtc_supports_double_wide(const struct intel_crtc *crtc) (crtc->pipe == PIPE_A || IS_I915G(dev_priv)); } +static uint32_t ilk_pipe_pixel_rate(const struct intel_crtc_state *pipe_config) +{ + uint32_t pixel_rate; + + pixel_rate = pipe_config->base.adjusted_mode.crtc_clock; + + /* + * We only use IF-ID interlacing. If we ever use + * PF-ID we'll need to adjust the pixel_rate here. + */ + + if (pipe_config->pch_pfit.enabled) { + uint64_t pipe_w, pipe_h, pfit_w, pfit_h; + uint32_t pfit_size = pipe_config->pch_pfit.size; + + pipe_w = pipe_config->pipe_src_w; + pipe_h = pipe_config->pipe_src_h; + + pfit_w = (pfit_size >> 16) & 0xFFFF; + pfit_h = pfit_size & 0xFFFF; + if (pipe_w < pfit_w) + pipe_w = pfit_w; + if (pipe_h < pfit_h) + pipe_h = pfit_h; + + if (WARN_ON(!pfit_w || !pfit_h)) + return pixel_rate; + + pixel_rate = div_u64((uint64_t) pixel_rate * pipe_w * pipe_h, + pfit_w * pfit_h); + } + + return pixel_rate; +} + static void intel_crtc_compute_pixel_rate(struct intel_crtc_state *crtc_state) { struct drm_i915_private *dev_priv = to_i915(crtc_state->base.crtc->dev); diff --git a/drivers/gpu/drm/i915/intel_drv.h b/drivers/gpu/drm/i915/intel_drv.h index 41fc4274afa4..321ba45967aa 100644 --- a/drivers/gpu/drm/i915/intel_drv.h +++ b/drivers/gpu/drm/i915/intel_drv.h @@ -1822,7 +1822,6 @@ bool skl_wm_level_equals(const struct skl_wm_level *l1, bool skl_ddb_allocation_overlaps(const struct skl_ddb_entry **entries, const struct skl_ddb_entry *ddb, int ignore); -uint32_t ilk_pipe_pixel_rate(const struct intel_crtc_state *pipe_config); bool ilk_disable_lp_wm(struct drm_device *dev); int sanitize_rc6_option(struct drm_i915_private *dev_priv, int enable_rc6); static inline int intel_enable_rc6(void) diff --git a/drivers/gpu/drm/i915/intel_pm.c b/drivers/gpu/drm/i915/intel_pm.c index b1d07e63ff8b..c0b0f5a4b9f1 100644 --- a/drivers/gpu/drm/i915/intel_pm.c +++ b/drivers/gpu/drm/i915/intel_pm.c @@ -1714,39 +1714,6 @@ static void i845_update_wm(struct intel_crtc *unused_crtc) I915_WRITE(FW_BLC, fwater_lo); } -uint32_t ilk_pipe_pixel_rate(const struct intel_crtc_state *pipe_config) -{ - uint32_t pixel_rate; - - pixel_rate = pipe_config->base.adjusted_mode.crtc_clock; - - /* We only use IF-ID interlacing. If we ever use PF-ID we'll need to - * adjust the pixel_rate here. */ - - if (pipe_config->pch_pfit.enabled) { - uint64_t pipe_w, pipe_h, pfit_w, pfit_h; - uint32_t pfit_size = pipe_config->pch_pfit.size; - - pipe_w = pipe_config->pipe_src_w; - pipe_h = pipe_config->pipe_src_h; - - pfit_w = (pfit_size >> 16) & 0xFFFF; - pfit_h = pfit_size & 0xFFFF; - if (pipe_w < pfit_w) - pipe_w = pfit_w; - if (pipe_h < pfit_h) - pipe_h = pfit_h; - - if (WARN_ON(!pfit_w || !pfit_h)) - return pixel_rate; - - pixel_rate = div_u64((uint64_t) pixel_rate * pipe_w * pipe_h, - pfit_w * pfit_h); - } - - return pixel_rate; -} - /* latency must be in 0.1us units. */ static uint32_t ilk_wm_method1(uint32_t pixel_rate, uint8_t cpp, uint32_t latency) { -- cgit From 519d52498156b07c4bc69ad28ab8f784cc124628 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Wed, 8 Feb 2017 10:47:10 +0000 Subject: drm/i915: i915_gem_shrink_all() needs an awake device Since to unbind an object, we may need a powered up device to access the GTT entries, we only shrink bound objects if awake. Callers to i915_gem_shrink_all() had to take this into account and take the rpm wakeref, but we can move this wakeref into the shrink_all itself for convenience and making the function live up to its name. Signed-off-by: Chris Wilson Link: http://patchwork.freedesktop.org/patch/msgid/20170208104710.18089-1-chris@chris-wilson.co.uk Reviewed-by: Mika Kuoppala --- drivers/gpu/drm/i915/i915_gem.c | 4 ---- drivers/gpu/drm/i915/i915_gem_shrinker.c | 5 +++-- 2 files changed, 3 insertions(+), 6 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index d1841bc1cb50..ee62f66ea540 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -4648,14 +4648,10 @@ void i915_gem_load_cleanup(struct drm_i915_private *dev_priv) int i915_gem_freeze(struct drm_i915_private *dev_priv) { - intel_runtime_pm_get(dev_priv); - mutex_lock(&dev_priv->drm.struct_mutex); i915_gem_shrink_all(dev_priv); mutex_unlock(&dev_priv->drm.struct_mutex); - intel_runtime_pm_put(dev_priv); - return 0; } diff --git a/drivers/gpu/drm/i915/i915_gem_shrinker.c b/drivers/gpu/drm/i915/i915_gem_shrinker.c index 401006b4c6a3..f249a1eb46ac 100644 --- a/drivers/gpu/drm/i915/i915_gem_shrinker.c +++ b/drivers/gpu/drm/i915/i915_gem_shrinker.c @@ -259,10 +259,13 @@ unsigned long i915_gem_shrink_all(struct drm_i915_private *dev_priv) { unsigned long freed; + intel_runtime_pm_get(dev_priv); freed = i915_gem_shrink(dev_priv, -1UL, I915_SHRINK_BOUND | I915_SHRINK_UNBOUND | I915_SHRINK_ACTIVE); + intel_runtime_pm_put(dev_priv); + rcu_barrier(); /* wait until our RCU delayed slab frees are completed */ return freed; @@ -380,9 +383,7 @@ i915_gem_shrinker_oom(struct notifier_block *nb, unsigned long event, void *ptr) if (!i915_gem_shrinker_lock_uninterruptible(dev_priv, &slu, 5000)) return NOTIFY_DONE; - intel_runtime_pm_get(dev_priv); freed_pages = i915_gem_shrink_all(dev_priv); - intel_runtime_pm_put(dev_priv); /* Because we may be allocating inside our own driver, we cannot * assert that there are no objects with pinned pages that are not -- cgit From 20a8a74ad9473e498da47e3094fa73057f3246d1 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Wed, 8 Feb 2017 14:30:31 +0000 Subject: drm/i915: Move calling engine->init_hw() to its own function Just a simple refactor to move a loop and some support code out of i915_gem_init_hw(). This is in preparation for avoiding a race between the tasklet writing to ELSP whilst simultaneously being written by engine->init_hw() following a GPU reset. Signed-off-by: Chris Wilson Cc: Mika Kuoppala Reviewed-by: Mika Kuoppala Link: http://patchwork.freedesktop.org/patch/msgid/20170208143033.11651-1-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/i915_gem.c | 25 ++++++++++++++++++------- 1 file changed, 18 insertions(+), 7 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index ee62f66ea540..85251964ed8d 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -4367,11 +4367,24 @@ static void init_unused_rings(struct drm_i915_private *dev_priv) } } -int -i915_gem_init_hw(struct drm_i915_private *dev_priv) +static int __i915_gem_restart_engines(void *data) { + struct drm_i915_private *i915 = data; struct intel_engine_cs *engine; enum intel_engine_id id; + int err; + + for_each_engine(engine, i915, id) { + err = engine->init_hw(engine); + if (err) + return err; + } + + return 0; +} + +int i915_gem_init_hw(struct drm_i915_private *dev_priv) +{ int ret; dev_priv->gt.last_init_time = ktime_get(); @@ -4417,11 +4430,9 @@ i915_gem_init_hw(struct drm_i915_private *dev_priv) } /* Need to do basic initialisation of all rings first: */ - for_each_engine(engine, dev_priv, id) { - ret = engine->init_hw(engine); - if (ret) - goto out; - } + ret = __i915_gem_restart_engines(dev_priv); + if (ret) + goto out; intel_mocs_init_l3cc_table(dev_priv); -- cgit From d80270931314a88d79d9bd5e0a5df93c12196375 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Wed, 8 Feb 2017 14:30:32 +0000 Subject: drm/i915: Split GEM resetting into 3 phases Currently we do a reset prepare/finish around the call to reset the GPU, but it looks like we need a later stage after the hw has been reinitialised to allow GEM to restart itself. Start by splitting the 2 GEM phases into 3: prepare - before the reset, check if GEM recovered, then stop GEM reset - after the reset, update GEM bookkeeping finish - after the re-initialisation following the reset, restart GEM Signed-off-by: Chris Wilson Cc: Mika Kuoppala Reviewed-by: Mika Kuoppala Link: http://patchwork.freedesktop.org/patch/msgid/20170208143033.11651-2-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/i915_drv.c | 3 ++- drivers/gpu/drm/i915/i915_drv.h | 1 + drivers/gpu/drm/i915/i915_gem.c | 7 ++++++- 3 files changed, 9 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_drv.c b/drivers/gpu/drm/i915/i915_drv.c index 78191e00a00c..d48c02a7933f 100644 --- a/drivers/gpu/drm/i915/i915_drv.c +++ b/drivers/gpu/drm/i915/i915_drv.c @@ -1836,7 +1836,7 @@ void i915_reset(struct drm_i915_private *dev_priv) goto error; } - i915_gem_reset_finish(dev_priv); + i915_gem_reset(dev_priv); intel_overlay_reset(dev_priv); /* Ok, now get things going again... */ @@ -1859,6 +1859,7 @@ void i915_reset(struct drm_i915_private *dev_priv) goto error; } + i915_gem_reset_finish(dev_priv); i915_queue_hangcheck(dev_priv); wakeup: diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index 7fb362f1fba5..604b20ff0390 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -3362,6 +3362,7 @@ static inline u32 i915_reset_count(struct i915_gpu_error *error) } int i915_gem_reset_prepare(struct drm_i915_private *dev_priv); +void i915_gem_reset(struct drm_i915_private *dev_priv); void i915_gem_reset_finish(struct drm_i915_private *dev_priv); void i915_gem_set_wedged(struct drm_i915_private *dev_priv); void i915_gem_clflush_object(struct drm_i915_gem_object *obj, bool force); diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index 85251964ed8d..e8129935d78b 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -2758,7 +2758,7 @@ static void i915_gem_reset_engine(struct intel_engine_cs *engine) engine->reset_hw(engine, request); } -void i915_gem_reset_finish(struct drm_i915_private *dev_priv) +void i915_gem_reset(struct drm_i915_private *dev_priv) { struct intel_engine_cs *engine; enum intel_engine_id id; @@ -2780,6 +2780,11 @@ void i915_gem_reset_finish(struct drm_i915_private *dev_priv) } } +void i915_gem_reset_finish(struct drm_i915_private *dev_priv) +{ + lockdep_assert_held(&dev_priv->drm.struct_mutex); +} + static void nop_submit_request(struct drm_i915_gem_request *request) { dma_fence_set_error(&request->fence, -EIO); -- cgit From 1f7b847d72c3583df5048d83bd945d0c2c524c28 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Wed, 8 Feb 2017 14:30:33 +0000 Subject: drm/i915: Disable engine->irq_tasklet around resets When we restart the engines, and we have active requests, a request on the first engine may complete and queue a request to the second engine before we try to restart the second engine. That queueing of the request may race with the engine to restart, and so may corrupt the current state. Disabling the engine->irq_tasklet prevents the two paths from writing into ELSP simultaneously (and modifyin the execlists_port[] at the same time). Fixes: 821ed7df6e2a ("drm/i915: Update reset path to fix incomplete requests") Testcase: igt/gem_exec_fence/await-hang Signed-off-by: Chris Wilson Cc: Tvrtko Ursulin Cc: Mika Kuoppala Reviewed-by: Mika Kuoppala Link: http://patchwork.freedesktop.org/patch/msgid/20170208143033.11651-3-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/i915_gem.c | 15 +++++++++++++++ 1 file changed, 15 insertions(+) diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index e8129935d78b..239e5144dbda 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -2643,6 +2643,15 @@ int i915_gem_reset_prepare(struct drm_i915_private *dev_priv) for_each_engine(engine, dev_priv, id) { struct drm_i915_gem_request *request; + /* Prevent request submission to the hardware until we have + * completed the reset in i915_gem_reset_finish(). If a request + * is completed by one engine, it may then queue a request + * to a second via its engine->irq_tasklet *just* as we are + * calling engine->init_hw() and also writing the ELSP. + * Turning off the engine->irq_tasklet until the reset is over + * prevents the race. + */ + tasklet_disable(&engine->irq_tasklet); tasklet_kill(&engine->irq_tasklet); if (engine_stalled(engine)) { @@ -2782,7 +2791,13 @@ void i915_gem_reset(struct drm_i915_private *dev_priv) void i915_gem_reset_finish(struct drm_i915_private *dev_priv) { + struct intel_engine_cs *engine; + enum intel_engine_id id; + lockdep_assert_held(&dev_priv->drm.struct_mutex); + + for_each_engine(engine, dev_priv, id) + tasklet_enable(&engine->irq_tasklet); } static void nop_submit_request(struct drm_i915_gem_request *request) -- cgit From ca4c38909fabb1bf18e2cb805569c9559d7a56dc Mon Sep 17 00:00:00 2001 From: Ander Conselvan de Oliveira Date: Fri, 3 Feb 2017 16:03:13 +0200 Subject: drm/i915: Remove WA for swapped HPD pins in broxton A stepping Remove workaround for swapped HPD pins in broxton A stepping, which is pre-production hardware. Cc: Jani Nikula Signed-off-by: Ander Conselvan de Oliveira Reviewed-by: Jani Nikula Link: http://patchwork.freedesktop.org/patch/msgid/20170203140316.20792-1-ander.conselvan.de.oliveira@intel.com --- drivers/gpu/drm/i915/intel_ddi.c | 9 +-------- drivers/gpu/drm/i915/intel_dp.c | 2 -- drivers/gpu/drm/i915/intel_hdmi.c | 9 +-------- 3 files changed, 2 insertions(+), 18 deletions(-) diff --git a/drivers/gpu/drm/i915/intel_ddi.c b/drivers/gpu/drm/i915/intel_ddi.c index d957211c76ad..b5b3065cb60c 100644 --- a/drivers/gpu/drm/i915/intel_ddi.c +++ b/drivers/gpu/drm/i915/intel_ddi.c @@ -2264,14 +2264,7 @@ void intel_ddi_init(struct drm_i915_private *dev_priv, enum port port) goto err; intel_dig_port->hpd_pulse = intel_dp_hpd_pulse; - /* - * On BXT A0/A1, sw needs to activate DDIA HPD logic and - * interrupts to check the external panel connection. - */ - if (IS_BXT_REVID(dev_priv, 0, BXT_REVID_A1) && port == PORT_B) - dev_priv->hotplug.irq_port[PORT_A] = intel_dig_port; - else - dev_priv->hotplug.irq_port[port] = intel_dig_port; + dev_priv->hotplug.irq_port[port] = intel_dig_port; } /* In theory we don't need the encoder->type check, but leave it just in diff --git a/drivers/gpu/drm/i915/intel_dp.c b/drivers/gpu/drm/i915/intel_dp.c index 0f14e97e519b..dee5fc758109 100644 --- a/drivers/gpu/drm/i915/intel_dp.c +++ b/drivers/gpu/drm/i915/intel_dp.c @@ -5995,8 +5995,6 @@ intel_dp_init_connector(struct intel_digital_port *intel_dig_port, break; case PORT_B: intel_encoder->hpd_pin = HPD_PORT_B; - if (IS_BXT_REVID(dev_priv, 0, BXT_REVID_A1)) - intel_encoder->hpd_pin = HPD_PORT_A; break; case PORT_C: intel_encoder->hpd_pin = HPD_PORT_C; diff --git a/drivers/gpu/drm/i915/intel_hdmi.c b/drivers/gpu/drm/i915/intel_hdmi.c index af16b0fa6b69..6c83442e2152 100644 --- a/drivers/gpu/drm/i915/intel_hdmi.c +++ b/drivers/gpu/drm/i915/intel_hdmi.c @@ -1868,14 +1868,7 @@ void intel_hdmi_init_connector(struct intel_digital_port *intel_dig_port, switch (port) { case PORT_B: - /* - * On BXT A0/A1, sw needs to activate DDIA HPD logic and - * interrupts to check the external panel connection. - */ - if (IS_BXT_REVID(dev_priv, 0, BXT_REVID_A1)) - intel_encoder->hpd_pin = HPD_PORT_A; - else - intel_encoder->hpd_pin = HPD_PORT_B; + intel_encoder->hpd_pin = HPD_PORT_B; break; case PORT_C: intel_encoder->hpd_pin = HPD_PORT_C; -- cgit From b71953a16da6fd2df2cb73824064f885e9a582d0 Mon Sep 17 00:00:00 2001 From: Ander Conselvan de Oliveira Date: Fri, 3 Feb 2017 16:03:14 +0200 Subject: drm/i915/dp: Move initialization of hpd_pin to a new function This shaves a few lines from intel_dp_init_connector() and will serve as a good place to add other port specific information in a follow up patch. While at it, convert BUG() to MISSING_CASE() in the default case. v2: s/BUG/MISSING_CASE. (Chris) Cc: Chris Wilson Cc: Jani Nikula Signed-off-by: Ander Conselvan de Oliveira Reviewed-by: Jani Nikula Link: http://patchwork.freedesktop.org/patch/msgid/20170203140316.20792-2-ander.conselvan.de.oliveira@intel.com --- drivers/gpu/drm/i915/intel_dp.c | 48 ++++++++++++++++++++++++----------------- 1 file changed, 28 insertions(+), 20 deletions(-) diff --git a/drivers/gpu/drm/i915/intel_dp.c b/drivers/gpu/drm/i915/intel_dp.c index dee5fc758109..fa77e96c49c7 100644 --- a/drivers/gpu/drm/i915/intel_dp.c +++ b/drivers/gpu/drm/i915/intel_dp.c @@ -5903,6 +5903,33 @@ out_vdd_off: return false; } +static void +intel_dp_init_connector_port_info(struct intel_digital_port *intel_dig_port) +{ + struct intel_encoder *encoder = &intel_dig_port->base; + + /* Set up the hotplug pin. */ + switch (intel_dig_port->port) { + case PORT_A: + encoder->hpd_pin = HPD_PORT_A; + break; + case PORT_B: + encoder->hpd_pin = HPD_PORT_B; + break; + case PORT_C: + encoder->hpd_pin = HPD_PORT_C; + break; + case PORT_D: + encoder->hpd_pin = HPD_PORT_D; + break; + case PORT_E: + encoder->hpd_pin = HPD_PORT_E; + break; + default: + MISSING_CASE(intel_dig_port->port); + } +} + bool intel_dp_init_connector(struct intel_digital_port *intel_dig_port, struct intel_connector *intel_connector) @@ -5988,26 +6015,7 @@ intel_dp_init_connector(struct intel_digital_port *intel_dig_port, else intel_connector->get_hw_state = intel_connector_get_hw_state; - /* Set up the hotplug pin. */ - switch (port) { - case PORT_A: - intel_encoder->hpd_pin = HPD_PORT_A; - break; - case PORT_B: - intel_encoder->hpd_pin = HPD_PORT_B; - break; - case PORT_C: - intel_encoder->hpd_pin = HPD_PORT_C; - break; - case PORT_D: - intel_encoder->hpd_pin = HPD_PORT_D; - break; - case PORT_E: - intel_encoder->hpd_pin = HPD_PORT_E; - break; - default: - BUG(); - } + intel_dp_init_connector_port_info(intel_dig_port); /* init MST on ports that can support it */ if (HAS_DP_MST(dev_priv) && !is_edp(intel_dp) && -- cgit From e81ecb5e31db6c2a259d694738cf620d9fa70861 Mon Sep 17 00:00:00 2001 From: Zhi Wang Date: Wed, 8 Feb 2017 21:03:33 +0800 Subject: drm/i915: A hotfix for making aliasing PPGTT work for GVT-g This patch makes PPGTT page table non-shrinkable when using aliasing PPGTT mode. It's just a temporary solution for making GVT-g work. Fixes: 2ce5179fe826 ("drm/i915/gtt: Free unused lower-level page tables") Cc: Tvrtko Ursulin Cc: Michal Winiarski Cc: Michel Thierry Cc: Mika Kuoppala Cc: Joonas Lahtinen Cc: Chris Wilson Cc: Daniel Vetter Cc: Zhenyu Wang Cc: Zhiyuan Lv Signed-off-by: Zhi Wang Link: http://patchwork.freedesktop.org/patch/msgid/1486559013-25251-2-git-send-email-zhi.a.wang@intel.com Reviewed-by: Chris Wilson Cc: # v4.10-rc1+ Cc: stable@vger.kernel.org Signed-off-by: Chris Wilson --- drivers/gpu/drm/i915/i915_gem_gtt.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c b/drivers/gpu/drm/i915/i915_gem_gtt.c index 850d40ec77af..90acddbaaa5e 100644 --- a/drivers/gpu/drm/i915/i915_gem_gtt.c +++ b/drivers/gpu/drm/i915/i915_gem_gtt.c @@ -755,9 +755,10 @@ static bool gen8_ppgtt_clear_pt(struct i915_address_space *vm, GEM_BUG_ON(pte_end > GEN8_PTES); bitmap_clear(pt->used_ptes, pte, num_entries); - - if (bitmap_empty(pt->used_ptes, GEN8_PTES)) - return true; + if (USES_FULL_PPGTT(vm->i915)) { + if (bitmap_empty(pt->used_ptes, GEN8_PTES)) + return true; + } pt_vaddr = kmap_px(pt); -- cgit From 969bb72cbfd906d347cf76dc9b8c8dbaf83ba27a Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Wed, 8 Feb 2017 18:12:38 +0000 Subject: drm/i915: Check for timeout completion when waiting for the rq to submitted We first wait for a request to be submitted to hw and assigned a seqno, before we can wait for the hw to signal completion (otherwise we don't know the hw id we need to wait upon). Whilst waiting for the request to be submitted, we may exceed the user's timeout and need to propagate the error back. v2: Make ETIME into an error from wait_for_execute for consistent exit handling. Reported-by: Tvrtko Ursulin Fixes: 4680816be336 ("drm/i915: Wait first for submission, before waiting for request completion") Testcase: igt/gem_wait/basic-await Signed-off-by: Chris Wilson Cc: Tvrtko Ursulin Cc: Joonas Lahtinen Cc: # v4.10-rc1+ Cc: stable@vger.kernel.org Link: http://patchwork.freedesktop.org/patch/msgid/20170208181238.7232-1-chris@chris-wilson.co.uk Reviewed-by: Tvrtko Ursulin --- drivers/gpu/drm/i915/i915_gem_request.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/i915/i915_gem_request.c b/drivers/gpu/drm/i915/i915_gem_request.c index 72b7f7d9461d..f31deeb72703 100644 --- a/drivers/gpu/drm/i915/i915_gem_request.c +++ b/drivers/gpu/drm/i915/i915_gem_request.c @@ -1025,8 +1025,13 @@ __i915_request_wait_for_execute(struct drm_i915_gem_request *request, break; } + if (!timeout) { + timeout = -ETIME; + break; + } + timeout = io_schedule_timeout(timeout); - } while (timeout); + } while (1); finish_wait(&request->execute.wait, &wait); if (flags & I915_WAIT_LOCKED) -- cgit From e1cc3db020c7ef864b7aa02d5e193a110f8f3450 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Thu, 9 Feb 2017 11:19:33 +0000 Subject: drm/i915: Assert that we never create a vma for the aliasing_ppgtt The aliasing_ppgtt is just a container for the HW context that mirrors the global gtt. It should never be used directly, so assert if we make the mistake of trying to allocate a VMA for it. Signed-off-by: Chris Wilson Cc: Joonas Lahtinen Link: http://patchwork.freedesktop.org/patch/msgid/20170209111933.12420-1-chris@chris-wilson.co.uk Reviewed-by: Joonas Lahtinen --- drivers/gpu/drm/i915/i915_vma.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/gpu/drm/i915/i915_vma.c b/drivers/gpu/drm/i915/i915_vma.c index 155906e84812..e75494c1ef52 100644 --- a/drivers/gpu/drm/i915/i915_vma.c +++ b/drivers/gpu/drm/i915/i915_vma.c @@ -78,6 +78,9 @@ vma_create(struct drm_i915_gem_object *obj, struct rb_node *rb, **p; int i; + /* The aliasing_ppgtt should never be used directly! */ + GEM_BUG_ON(vm == &vm->i915->mm.aliasing_ppgtt->base); + vma = kmem_cache_zalloc(vm->i915->vmas, GFP_KERNEL); if (vma == NULL) return ERR_PTR(-ENOMEM); -- cgit From 6d1f9fb3120c9865c28f56ceba8d706a9ecb4b56 Mon Sep 17 00:00:00 2001 From: Joonas Lahtinen Date: Thu, 9 Feb 2017 13:34:25 +0200 Subject: drm/i915: Add __destroy_hw_context __create_hw_context can use a good counterpart. Signed-off-by: Joonas Lahtinen Cc: Chris Wilson Reviewed-by: Chris Wilson Link: http://patchwork.freedesktop.org/patch/msgid/1486640065-13695-1-git-send-email-joonas.lahtinen@linux.intel.com --- drivers/gpu/drm/i915/i915_gem_context.c | 13 +++++++++---- 1 file changed, 9 insertions(+), 4 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_gem_context.c b/drivers/gpu/drm/i915/i915_gem_context.c index 680105421bb9..0c9acd29c4d1 100644 --- a/drivers/gpu/drm/i915/i915_gem_context.c +++ b/drivers/gpu/drm/i915/i915_gem_context.c @@ -351,6 +351,13 @@ err_out: return ERR_PTR(ret); } +static void __destroy_hw_context(struct i915_gem_context *ctx, + struct drm_i915_file_private *file_priv) +{ + idr_remove(&file_priv->context_idr, ctx->user_handle); + context_close(ctx); +} + /** * The default context needs to exist per ring that uses contexts. It stores the * context state of the GPU for applications that don't utilize HW contexts, as @@ -375,8 +382,7 @@ i915_gem_create_context(struct drm_i915_private *dev_priv, if (IS_ERR(ppgtt)) { DRM_DEBUG_DRIVER("PPGTT setup failed (%ld)\n", PTR_ERR(ppgtt)); - idr_remove(&file_priv->context_idr, ctx->user_handle); - context_close(ctx); + __destroy_hw_context(ctx, file_priv); return ERR_CAST(ppgtt); } @@ -1038,8 +1044,7 @@ int i915_gem_context_destroy_ioctl(struct drm_device *dev, void *data, return PTR_ERR(ctx); } - idr_remove(&file_priv->context_idr, ctx->user_handle); - context_close(ctx); + __destroy_hw_context(ctx, file_priv); mutex_unlock(&dev->struct_mutex); DRM_DEBUG("HW context %d destroyed\n", args->ctx_id); -- cgit From 8d2b47dde8a097e6fef2ebb5042cbb267cc75adf Mon Sep 17 00:00:00 2001 From: Maarten Lankhorst Date: Thu, 2 Feb 2017 08:41:42 +0100 Subject: drm/i915: Enable atomic support by default on supported platforms. MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit i915 is pretty much feature complete. Support for atomic i915-specific connector properties is still missing; those properties can (for now) only be set through the legacy ioctl. ILK style watermarks and gen9+ watermarks are handled atomically, and nonblocking modesets work. FBC has also been made to work with atomic. gen4x- and vlv/chv watermarks still need to be fixed, so disable atomic by default there for now. Flip the switch!! Signed-off-by: Maarten Lankhorst Cc: Ander Conselvan de Oliveira Cc: Chris Wilson Cc: Daniel Vetter Cc: Lyude Cc: Matt Roper Cc: Paulo Zanoni Cc: Rodrigo Vivi Cc: Ville Syrjälä Link: http://patchwork.freedesktop.org/patch/msgid/1486021302-24910-1-git-send-email-maarten.lankhorst@linux.intel.com [mlankhorst: Fix checkpatch warning about extra space in match_info] Acked-by: Daniel Stone Reviewed-by: Lyude --- drivers/gpu/drm/i915/i915_drv.c | 10 +++++++--- drivers/gpu/drm/i915/i915_params.c | 4 ++-- 2 files changed, 9 insertions(+), 5 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_drv.c b/drivers/gpu/drm/i915/i915_drv.c index d48c02a7933f..a17dde86dc8c 100644 --- a/drivers/gpu/drm/i915/i915_drv.c +++ b/drivers/gpu/drm/i915/i915_drv.c @@ -1214,11 +1214,15 @@ static void i915_driver_unregister(struct drm_i915_private *dev_priv) */ int i915_driver_load(struct pci_dev *pdev, const struct pci_device_id *ent) { + const struct intel_device_info *match_info = + (struct intel_device_info *)ent->driver_data; struct drm_i915_private *dev_priv; int ret; - if (i915.nuclear_pageflip) - driver.driver_features |= DRIVER_ATOMIC; + /* Enable nuclear pageflip on ILK+, except vlv/chv */ + if (!i915.nuclear_pageflip && + (match_info->gen < 5 || match_info->has_gmch_display)) + driver.driver_features &= ~DRIVER_ATOMIC; ret = -ENOMEM; dev_priv = kzalloc(sizeof(*dev_priv), GFP_KERNEL); @@ -2624,7 +2628,7 @@ static struct drm_driver driver = { */ .driver_features = DRIVER_HAVE_IRQ | DRIVER_IRQ_SHARED | DRIVER_GEM | DRIVER_PRIME | - DRIVER_RENDER | DRIVER_MODESET, + DRIVER_RENDER | DRIVER_MODESET | DRIVER_ATOMIC, .open = i915_driver_open, .lastclose = i915_driver_lastclose, .preclose = i915_driver_preclose, diff --git a/drivers/gpu/drm/i915/i915_params.c b/drivers/gpu/drm/i915/i915_params.c index c2679fa7ed11..2e9645e6555a 100644 --- a/drivers/gpu/drm/i915/i915_params.c +++ b/drivers/gpu/drm/i915/i915_params.c @@ -205,9 +205,9 @@ module_param_named(verbose_state_checks, i915.verbose_state_checks, bool, 0600); MODULE_PARM_DESC(verbose_state_checks, "Enable verbose logs (ie. WARN_ON()) in case of unexpected hw state conditions."); -module_param_named_unsafe(nuclear_pageflip, i915.nuclear_pageflip, bool, 0600); +module_param_named_unsafe(nuclear_pageflip, i915.nuclear_pageflip, bool, 0400); MODULE_PARM_DESC(nuclear_pageflip, - "Force atomic modeset functionality; asynchronous mode is not yet supported. (default: false)."); + "Force enable atomic functionality on platforms that don't have full support yet."); /* WA to get away with the default setting in VBT for early platforms.Will be removed */ module_param_named_unsafe(edp_vswing, i915.edp_vswing, int, 0400); -- cgit From 949e8ab3a94befd514eb79f2535f6017d8b4488f Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Thu, 9 Feb 2017 14:40:36 +0000 Subject: drm/i915: Use the size/type of address space to make decisions Once the address space has been created (using 3 or 4 levels of page tables), we should use that to program the appropriate type into the contexts. This gives us the flexibility to handle different types of address spaces at runtime. Signed-off-by: Chris Wilson Cc: Mika Kuoppala Cc: Matthew Auld Cc: Tvrtko Ursulin Link: http://patchwork.freedesktop.org/patch/msgid/20170209144036.23664-1-chris@chris-wilson.co.uk Reviewed-by: Mika Kuoppala --- drivers/gpu/drm/i915/i915_gem_context.c | 20 +++++++++++++------- drivers/gpu/drm/i915/i915_gem_gtt.h | 6 ++++++ drivers/gpu/drm/i915/i915_reg.h | 3 --- drivers/gpu/drm/i915/intel_lrc.c | 6 +++--- 4 files changed, 22 insertions(+), 13 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_gem_context.c b/drivers/gpu/drm/i915/i915_gem_context.c index 0c9acd29c4d1..fb11b674f319 100644 --- a/drivers/gpu/drm/i915/i915_gem_context.c +++ b/drivers/gpu/drm/i915/i915_gem_context.c @@ -236,16 +236,20 @@ static int assign_hw_id(struct drm_i915_private *dev_priv, unsigned *out) return 0; } -static u32 default_desc_template(const struct drm_i915_private *dev_priv) +static u32 default_desc_template(const struct drm_i915_private *i915, + const struct i915_hw_ppgtt *ppgtt) { + u32 address_mode; u32 desc; - desc = GEN8_CTX_VALID | - GEN8_CTX_PRIVILEGE | - GEN8_CTX_ADDRESSING_MODE(dev_priv) << - GEN8_CTX_ADDRESSING_MODE_SHIFT; + desc = GEN8_CTX_VALID | GEN8_CTX_PRIVILEGE; - if (IS_GEN8(dev_priv)) + address_mode = INTEL_LEGACY_32B_CONTEXT; + if (ppgtt && i915_vm_is_48bit(&ppgtt->base)) + address_mode = INTEL_LEGACY_64B_CONTEXT; + desc |= address_mode << GEN8_CTX_ADDRESSING_MODE_SHIFT; + + if (IS_GEN8(i915)) desc |= GEN8_CTX_L3LLC_COHERENT; /* TODO: WaDisableLiteRestore when we start using semaphore @@ -329,7 +333,8 @@ __create_hw_context(struct drm_i915_private *dev_priv, i915_gem_context_set_bannable(ctx); ctx->ring_size = 4 * PAGE_SIZE; - ctx->desc_template = default_desc_template(dev_priv); + ctx->desc_template = + default_desc_template(dev_priv, dev_priv->mm.aliasing_ppgtt); ATOMIC_INIT_NOTIFIER_HEAD(&ctx->status_notifier); /* GuC requires the ring to be placed above GUC_WOPCM_TOP. If GuC is not @@ -387,6 +392,7 @@ i915_gem_create_context(struct drm_i915_private *dev_priv, } ctx->ppgtt = ppgtt; + ctx->desc_template = default_desc_template(dev_priv, ppgtt); } trace_i915_context_create(ctx); diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.h b/drivers/gpu/drm/i915/i915_gem_gtt.h index 3c5ef5358cef..7e678ce5a9c7 100644 --- a/drivers/gpu/drm/i915/i915_gem_gtt.h +++ b/drivers/gpu/drm/i915/i915_gem_gtt.h @@ -525,6 +525,12 @@ i915_vm_to_ggtt(struct i915_address_space *vm) return container_of(vm, struct i915_ggtt, base); } +static inline bool +i915_vm_is_48bit(const struct i915_address_space *vm) +{ + return (vm->total - 1) >> 32; +} + int i915_ggtt_probe_hw(struct drm_i915_private *dev_priv); int i915_ggtt_init_hw(struct drm_i915_private *dev_priv); int i915_ggtt_enable_hw(struct drm_i915_private *dev_priv); diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h index bd2ff1f87f32..ad666933a88d 100644 --- a/drivers/gpu/drm/i915/i915_reg.h +++ b/drivers/gpu/drm/i915/i915_reg.h @@ -3373,9 +3373,6 @@ enum { #define GEN8_CTX_L3LLC_COHERENT (1<<5) #define GEN8_CTX_PRIVILEGE (1<<8) #define GEN8_CTX_ADDRESSING_MODE_SHIFT 3 -#define GEN8_CTX_ADDRESSING_MODE(dev_priv) (USES_FULL_48BIT_PPGTT(dev_priv) ?\ - INTEL_LEGACY_64B_CONTEXT : \ - INTEL_LEGACY_32B_CONTEXT) #define GEN8_CTX_ID_SHIFT 32 #define GEN8_CTX_ID_WIDTH 21 diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c index e42990b56fa8..b21dbd44045e 100644 --- a/drivers/gpu/drm/i915/intel_lrc.c +++ b/drivers/gpu/drm/i915/intel_lrc.c @@ -332,7 +332,7 @@ static u64 execlists_update_context(struct drm_i915_gem_request *rq) * PML4 is allocated during ppgtt init, so this is not needed * in 48-bit mode. */ - if (ppgtt && !USES_FULL_48BIT_PPGTT(ppgtt->base.dev)) + if (ppgtt && !i915_vm_is_48bit(&ppgtt->base)) execlists_update_context_pdps(ppgtt, reg_state); return ce->lrc_desc; @@ -1447,7 +1447,7 @@ static int gen8_emit_bb_start(struct drm_i915_gem_request *req, * not needed in 48-bit.*/ if (req->ctx->ppgtt && (intel_engine_flag(req->engine) & req->ctx->ppgtt->pd_dirty_rings)) { - if (!USES_FULL_48BIT_PPGTT(req->i915) && + if (!i915_vm_is_48bit(&req->ctx->ppgtt->base) && !intel_vgpu_active(req->i915)) { ret = intel_logical_ring_emit_pdps(req); if (ret) @@ -2045,7 +2045,7 @@ static void execlists_init_reg_state(u32 *reg_state, ASSIGN_CTX_REG(reg_state, CTX_PDP0_LDW, GEN8_RING_PDP_LDW(engine, 0), 0); - if (ppgtt && USES_FULL_48BIT_PPGTT(ppgtt->base.dev)) { + if (ppgtt && i915_vm_is_48bit(&ppgtt->base)) { /* 64b PPGTT (48bit canonical) * PDP0_DESCRIPTOR contains the base address to PML4 and * other PDP Descriptors are ignored. -- cgit From d8fc70b7367b86ca14e825f8508f91bbbf237f3c Mon Sep 17 00:00:00 2001 From: Ander Conselvan de Oliveira Date: Thu, 9 Feb 2017 11:31:21 +0200 Subject: drm/i915: Make power domain masks 64 bit long MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit There are currently 30 power domains, which puts us pretty close to the limit with 32 bit masks. Prepare for the future and increase the limit to 64 bit. v2: Rebase v3: s/unsigned long long/u64/ (Joonas) Allow the 64th bit of the mask to be used. (Joonas) Cc: Jani Nikula Cc: Joonas Lahtinen Cc: Rodrigo Vivi Cc: Ville Syrjälä Signed-off-by: Ander Conselvan de Oliveira Reviewed-by: Joonas Lahtinen Link: http://patchwork.freedesktop.org/patch/msgid/20170209093121.24410-1-ander.conselvan.de.oliveira@intel.com --- drivers/gpu/drm/i915/i915_drv.h | 4 +- drivers/gpu/drm/i915/intel_display.c | 34 +-- drivers/gpu/drm/i915/intel_drv.h | 2 +- drivers/gpu/drm/i915/intel_runtime_pm.c | 378 ++++++++++++++++---------------- 4 files changed, 209 insertions(+), 209 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index 604b20ff0390..03573f084749 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -494,7 +494,7 @@ struct i915_hotplug { #define for_each_power_domain(domain, mask) \ for ((domain) = 0; (domain) < POWER_DOMAIN_NUM; (domain)++) \ - for_each_if ((1 << (domain)) & (mask)) + for_each_if (BIT_ULL(domain) & (mask)) struct drm_i915_private; struct i915_mm_struct; @@ -1405,7 +1405,7 @@ struct i915_power_well { int count; /* cached hw enabled state */ bool hw_enabled; - unsigned long domains; + u64 domains; /* unique identifier for this power well */ unsigned long id; /* diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c index f6259c949da2..ab78df9191f9 100644 --- a/drivers/gpu/drm/i915/intel_display.c +++ b/drivers/gpu/drm/i915/intel_display.c @@ -5666,15 +5666,15 @@ intel_display_port_aux_power_domain(struct intel_encoder *intel_encoder) } } -static unsigned long get_crtc_power_domains(struct drm_crtc *crtc, - struct intel_crtc_state *crtc_state) +static u64 get_crtc_power_domains(struct drm_crtc *crtc, + struct intel_crtc_state *crtc_state) { struct drm_device *dev = crtc->dev; struct drm_i915_private *dev_priv = to_i915(dev); struct drm_encoder *encoder; struct intel_crtc *intel_crtc = to_intel_crtc(crtc); enum pipe pipe = intel_crtc->pipe; - unsigned long mask; + u64 mask; enum transcoder transcoder = crtc_state->cpu_transcoder; if (!crtc_state->base.active) @@ -5684,19 +5684,19 @@ static unsigned long get_crtc_power_domains(struct drm_crtc *crtc, mask |= BIT(POWER_DOMAIN_TRANSCODER(transcoder)); if (crtc_state->pch_pfit.enabled || crtc_state->pch_pfit.force_thru) - mask |= BIT(POWER_DOMAIN_PIPE_PANEL_FITTER(pipe)); + mask |= BIT_ULL(POWER_DOMAIN_PIPE_PANEL_FITTER(pipe)); drm_for_each_encoder_mask(encoder, dev, crtc_state->base.encoder_mask) { struct intel_encoder *intel_encoder = to_intel_encoder(encoder); - mask |= BIT(intel_display_port_power_domain(intel_encoder)); + mask |= BIT_ULL(intel_display_port_power_domain(intel_encoder)); } if (HAS_DDI(dev_priv) && crtc_state->has_audio) mask |= BIT(POWER_DOMAIN_AUDIO); if (crtc_state->shared_dpll) - mask |= BIT(POWER_DOMAIN_PLLS); + mask |= BIT_ULL(POWER_DOMAIN_PLLS); return mask; } @@ -5708,7 +5708,7 @@ modeset_get_crtc_power_domains(struct drm_crtc *crtc, struct drm_i915_private *dev_priv = to_i915(crtc->dev); struct intel_crtc *intel_crtc = to_intel_crtc(crtc); enum intel_display_power_domain domain; - unsigned long domains, new_domains, old_domains; + u64 domains, new_domains, old_domains; old_domains = intel_crtc->enabled_power_domains; intel_crtc->enabled_power_domains = new_domains = @@ -5723,7 +5723,7 @@ modeset_get_crtc_power_domains(struct drm_crtc *crtc, } static void modeset_put_power_domains(struct drm_i915_private *dev_priv, - unsigned long domains) + u64 domains) { enum intel_display_power_domain domain; @@ -8992,7 +8992,7 @@ static void haswell_get_ddi_pll(struct drm_i915_private *dev_priv, static bool hsw_get_transcoder_state(struct intel_crtc *crtc, struct intel_crtc_state *pipe_config, - unsigned long *power_domain_mask) + u64 *power_domain_mask) { struct drm_device *dev = crtc->base.dev; struct drm_i915_private *dev_priv = to_i915(dev); @@ -9034,7 +9034,7 @@ static bool hsw_get_transcoder_state(struct intel_crtc *crtc, power_domain = POWER_DOMAIN_TRANSCODER(pipe_config->cpu_transcoder); if (!intel_display_power_get_if_enabled(dev_priv, power_domain)) return false; - *power_domain_mask |= BIT(power_domain); + *power_domain_mask |= BIT_ULL(power_domain); tmp = I915_READ(PIPECONF(pipe_config->cpu_transcoder)); @@ -9043,7 +9043,7 @@ static bool hsw_get_transcoder_state(struct intel_crtc *crtc, static bool bxt_get_dsi_transcoder_state(struct intel_crtc *crtc, struct intel_crtc_state *pipe_config, - unsigned long *power_domain_mask) + u64 *power_domain_mask) { struct drm_device *dev = crtc->base.dev; struct drm_i915_private *dev_priv = to_i915(dev); @@ -9061,7 +9061,7 @@ static bool bxt_get_dsi_transcoder_state(struct intel_crtc *crtc, power_domain = POWER_DOMAIN_TRANSCODER(cpu_transcoder); if (!intel_display_power_get_if_enabled(dev_priv, power_domain)) continue; - *power_domain_mask |= BIT(power_domain); + *power_domain_mask |= BIT_ULL(power_domain); /* * The PLL needs to be enabled with a valid divider @@ -9136,13 +9136,13 @@ static bool haswell_get_pipe_config(struct intel_crtc *crtc, { struct drm_i915_private *dev_priv = to_i915(crtc->base.dev); enum intel_display_power_domain power_domain; - unsigned long power_domain_mask; + u64 power_domain_mask; bool active; power_domain = POWER_DOMAIN_PIPE(crtc->pipe); if (!intel_display_power_get_if_enabled(dev_priv, power_domain)) return false; - power_domain_mask = BIT(power_domain); + power_domain_mask = BIT_ULL(power_domain); pipe_config->shared_dpll = NULL; @@ -9176,7 +9176,7 @@ static bool haswell_get_pipe_config(struct intel_crtc *crtc, power_domain = POWER_DOMAIN_PIPE_PANEL_FITTER(crtc->pipe); if (intel_display_power_get_if_enabled(dev_priv, power_domain)) { - power_domain_mask |= BIT(power_domain); + power_domain_mask |= BIT_ULL(power_domain); if (INTEL_GEN(dev_priv) >= 9) skylake_get_pfit_config(crtc, pipe_config); else @@ -12841,7 +12841,7 @@ static void intel_atomic_commit_tail(struct drm_atomic_state *state) struct drm_crtc *crtc; struct intel_crtc_state *intel_cstate; bool hw_check = intel_state->modeset; - unsigned long put_domains[I915_MAX_PIPES] = {}; + u64 put_domains[I915_MAX_PIPES] = {}; unsigned crtc_vblank_mask = 0; int i; @@ -15565,7 +15565,7 @@ intel_modeset_setup_hw_state(struct drm_device *dev) ilk_wm_get_hw_state(dev); for_each_intel_crtc(dev, crtc) { - unsigned long put_domains; + u64 put_domains; put_domains = modeset_get_crtc_power_domains(&crtc->base, crtc->config); if (WARN_ON(put_domains)) diff --git a/drivers/gpu/drm/i915/intel_drv.h b/drivers/gpu/drm/i915/intel_drv.h index 321ba45967aa..1298aad7d832 100644 --- a/drivers/gpu/drm/i915/intel_drv.h +++ b/drivers/gpu/drm/i915/intel_drv.h @@ -715,7 +715,7 @@ struct intel_crtc { bool active; bool lowfreq_avail; u8 plane_ids_mask; - unsigned long enabled_power_domains; + unsigned long long enabled_power_domains; struct intel_overlay *overlay; struct intel_flip_work *flip_work; diff --git a/drivers/gpu/drm/i915/intel_runtime_pm.c b/drivers/gpu/drm/i915/intel_runtime_pm.c index 0f00a5aab69c..879567987201 100644 --- a/drivers/gpu/drm/i915/intel_runtime_pm.c +++ b/drivers/gpu/drm/i915/intel_runtime_pm.c @@ -210,7 +210,7 @@ bool __intel_display_power_is_enabled(struct drm_i915_private *dev_priv, is_enabled = true; - for_each_power_well_rev(i, power_well, BIT(domain), power_domains) { + for_each_power_well_rev(i, power_well, BIT_ULL(domain), power_domains) { if (power_well->always_on) continue; @@ -385,124 +385,124 @@ static void hsw_set_power_well(struct drm_i915_private *dev_priv, } #define SKL_DISPLAY_POWERWELL_2_POWER_DOMAINS ( \ - BIT(POWER_DOMAIN_TRANSCODER_A) | \ - BIT(POWER_DOMAIN_PIPE_B) | \ - BIT(POWER_DOMAIN_TRANSCODER_B) | \ - BIT(POWER_DOMAIN_PIPE_C) | \ - BIT(POWER_DOMAIN_TRANSCODER_C) | \ - BIT(POWER_DOMAIN_PIPE_B_PANEL_FITTER) | \ - BIT(POWER_DOMAIN_PIPE_C_PANEL_FITTER) | \ - BIT(POWER_DOMAIN_PORT_DDI_B_LANES) | \ - BIT(POWER_DOMAIN_PORT_DDI_C_LANES) | \ - BIT(POWER_DOMAIN_PORT_DDI_D_LANES) | \ - BIT(POWER_DOMAIN_PORT_DDI_E_LANES) | \ - BIT(POWER_DOMAIN_AUX_B) | \ - BIT(POWER_DOMAIN_AUX_C) | \ - BIT(POWER_DOMAIN_AUX_D) | \ - BIT(POWER_DOMAIN_AUDIO) | \ - BIT(POWER_DOMAIN_VGA) | \ - BIT(POWER_DOMAIN_INIT)) + BIT_ULL(POWER_DOMAIN_TRANSCODER_A) | \ + BIT_ULL(POWER_DOMAIN_PIPE_B) | \ + BIT_ULL(POWER_DOMAIN_TRANSCODER_B) | \ + BIT_ULL(POWER_DOMAIN_PIPE_C) | \ + BIT_ULL(POWER_DOMAIN_TRANSCODER_C) | \ + BIT_ULL(POWER_DOMAIN_PIPE_B_PANEL_FITTER) | \ + BIT_ULL(POWER_DOMAIN_PIPE_C_PANEL_FITTER) | \ + BIT_ULL(POWER_DOMAIN_PORT_DDI_B_LANES) | \ + BIT_ULL(POWER_DOMAIN_PORT_DDI_C_LANES) | \ + BIT_ULL(POWER_DOMAIN_PORT_DDI_D_LANES) | \ + BIT_ULL(POWER_DOMAIN_PORT_DDI_E_LANES) | \ + BIT_ULL(POWER_DOMAIN_AUX_B) | \ + BIT_ULL(POWER_DOMAIN_AUX_C) | \ + BIT_ULL(POWER_DOMAIN_AUX_D) | \ + BIT_ULL(POWER_DOMAIN_AUDIO) | \ + BIT_ULL(POWER_DOMAIN_VGA) | \ + BIT_ULL(POWER_DOMAIN_INIT)) #define SKL_DISPLAY_DDI_A_E_POWER_DOMAINS ( \ - BIT(POWER_DOMAIN_PORT_DDI_A_LANES) | \ - BIT(POWER_DOMAIN_PORT_DDI_E_LANES) | \ - BIT(POWER_DOMAIN_INIT)) + BIT_ULL(POWER_DOMAIN_PORT_DDI_A_LANES) | \ + BIT_ULL(POWER_DOMAIN_PORT_DDI_E_LANES) | \ + BIT_ULL(POWER_DOMAIN_INIT)) #define SKL_DISPLAY_DDI_B_POWER_DOMAINS ( \ - BIT(POWER_DOMAIN_PORT_DDI_B_LANES) | \ - BIT(POWER_DOMAIN_INIT)) + BIT_ULL(POWER_DOMAIN_PORT_DDI_B_LANES) | \ + BIT_ULL(POWER_DOMAIN_INIT)) #define SKL_DISPLAY_DDI_C_POWER_DOMAINS ( \ - BIT(POWER_DOMAIN_PORT_DDI_C_LANES) | \ - BIT(POWER_DOMAIN_INIT)) + BIT_ULL(POWER_DOMAIN_PORT_DDI_C_LANES) | \ + BIT_ULL(POWER_DOMAIN_INIT)) #define SKL_DISPLAY_DDI_D_POWER_DOMAINS ( \ - BIT(POWER_DOMAIN_PORT_DDI_D_LANES) | \ - BIT(POWER_DOMAIN_INIT)) + BIT_ULL(POWER_DOMAIN_PORT_DDI_D_LANES) | \ + BIT_ULL(POWER_DOMAIN_INIT)) #define SKL_DISPLAY_DC_OFF_POWER_DOMAINS ( \ SKL_DISPLAY_POWERWELL_2_POWER_DOMAINS | \ - BIT(POWER_DOMAIN_MODESET) | \ - BIT(POWER_DOMAIN_AUX_A) | \ - BIT(POWER_DOMAIN_INIT)) + BIT_ULL(POWER_DOMAIN_MODESET) | \ + BIT_ULL(POWER_DOMAIN_AUX_A) | \ + BIT_ULL(POWER_DOMAIN_INIT)) #define BXT_DISPLAY_POWERWELL_2_POWER_DOMAINS ( \ - BIT(POWER_DOMAIN_TRANSCODER_A) | \ - BIT(POWER_DOMAIN_PIPE_B) | \ - BIT(POWER_DOMAIN_TRANSCODER_B) | \ - BIT(POWER_DOMAIN_PIPE_C) | \ - BIT(POWER_DOMAIN_TRANSCODER_C) | \ - BIT(POWER_DOMAIN_PIPE_B_PANEL_FITTER) | \ - BIT(POWER_DOMAIN_PIPE_C_PANEL_FITTER) | \ - BIT(POWER_DOMAIN_PORT_DDI_B_LANES) | \ - BIT(POWER_DOMAIN_PORT_DDI_C_LANES) | \ - BIT(POWER_DOMAIN_AUX_B) | \ - BIT(POWER_DOMAIN_AUX_C) | \ - BIT(POWER_DOMAIN_AUDIO) | \ - BIT(POWER_DOMAIN_VGA) | \ - BIT(POWER_DOMAIN_GMBUS) | \ - BIT(POWER_DOMAIN_INIT)) + BIT_ULL(POWER_DOMAIN_TRANSCODER_A) | \ + BIT_ULL(POWER_DOMAIN_PIPE_B) | \ + BIT_ULL(POWER_DOMAIN_TRANSCODER_B) | \ + BIT_ULL(POWER_DOMAIN_PIPE_C) | \ + BIT_ULL(POWER_DOMAIN_TRANSCODER_C) | \ + BIT_ULL(POWER_DOMAIN_PIPE_B_PANEL_FITTER) | \ + BIT_ULL(POWER_DOMAIN_PIPE_C_PANEL_FITTER) | \ + BIT_ULL(POWER_DOMAIN_PORT_DDI_B_LANES) | \ + BIT_ULL(POWER_DOMAIN_PORT_DDI_C_LANES) | \ + BIT_ULL(POWER_DOMAIN_AUX_B) | \ + BIT_ULL(POWER_DOMAIN_AUX_C) | \ + BIT_ULL(POWER_DOMAIN_AUDIO) | \ + BIT_ULL(POWER_DOMAIN_VGA) | \ + BIT_ULL(POWER_DOMAIN_GMBUS) | \ + BIT_ULL(POWER_DOMAIN_INIT)) #define BXT_DISPLAY_DC_OFF_POWER_DOMAINS ( \ BXT_DISPLAY_POWERWELL_2_POWER_DOMAINS | \ - BIT(POWER_DOMAIN_MODESET) | \ - BIT(POWER_DOMAIN_AUX_A) | \ - BIT(POWER_DOMAIN_INIT)) + BIT_ULL(POWER_DOMAIN_MODESET) | \ + BIT_ULL(POWER_DOMAIN_AUX_A) | \ + BIT_ULL(POWER_DOMAIN_INIT)) #define BXT_DPIO_CMN_A_POWER_DOMAINS ( \ - BIT(POWER_DOMAIN_PORT_DDI_A_LANES) | \ - BIT(POWER_DOMAIN_AUX_A) | \ - BIT(POWER_DOMAIN_INIT)) + BIT_ULL(POWER_DOMAIN_PORT_DDI_A_LANES) | \ + BIT_ULL(POWER_DOMAIN_AUX_A) | \ + BIT_ULL(POWER_DOMAIN_INIT)) #define BXT_DPIO_CMN_BC_POWER_DOMAINS ( \ - BIT(POWER_DOMAIN_PORT_DDI_B_LANES) | \ - BIT(POWER_DOMAIN_PORT_DDI_C_LANES) | \ - BIT(POWER_DOMAIN_AUX_B) | \ - BIT(POWER_DOMAIN_AUX_C) | \ - BIT(POWER_DOMAIN_INIT)) + BIT_ULL(POWER_DOMAIN_PORT_DDI_B_LANES) | \ + BIT_ULL(POWER_DOMAIN_PORT_DDI_C_LANES) | \ + BIT_ULL(POWER_DOMAIN_AUX_B) | \ + BIT_ULL(POWER_DOMAIN_AUX_C) | \ + BIT_ULL(POWER_DOMAIN_INIT)) #define GLK_DISPLAY_POWERWELL_2_POWER_DOMAINS ( \ - BIT(POWER_DOMAIN_TRANSCODER_A) | \ - BIT(POWER_DOMAIN_PIPE_B) | \ - BIT(POWER_DOMAIN_TRANSCODER_B) | \ - BIT(POWER_DOMAIN_PIPE_C) | \ - BIT(POWER_DOMAIN_TRANSCODER_C) | \ - BIT(POWER_DOMAIN_PIPE_B_PANEL_FITTER) | \ - BIT(POWER_DOMAIN_PIPE_C_PANEL_FITTER) | \ - BIT(POWER_DOMAIN_PORT_DDI_B_LANES) | \ - BIT(POWER_DOMAIN_PORT_DDI_C_LANES) | \ - BIT(POWER_DOMAIN_AUX_B) | \ - BIT(POWER_DOMAIN_AUX_C) | \ - BIT(POWER_DOMAIN_AUDIO) | \ - BIT(POWER_DOMAIN_VGA) | \ - BIT(POWER_DOMAIN_INIT)) + BIT_ULL(POWER_DOMAIN_TRANSCODER_A) | \ + BIT_ULL(POWER_DOMAIN_PIPE_B) | \ + BIT_ULL(POWER_DOMAIN_TRANSCODER_B) | \ + BIT_ULL(POWER_DOMAIN_PIPE_C) | \ + BIT_ULL(POWER_DOMAIN_TRANSCODER_C) | \ + BIT_ULL(POWER_DOMAIN_PIPE_B_PANEL_FITTER) | \ + BIT_ULL(POWER_DOMAIN_PIPE_C_PANEL_FITTER) | \ + BIT_ULL(POWER_DOMAIN_PORT_DDI_B_LANES) | \ + BIT_ULL(POWER_DOMAIN_PORT_DDI_C_LANES) | \ + BIT_ULL(POWER_DOMAIN_AUX_B) | \ + BIT_ULL(POWER_DOMAIN_AUX_C) | \ + BIT_ULL(POWER_DOMAIN_AUDIO) | \ + BIT_ULL(POWER_DOMAIN_VGA) | \ + BIT_ULL(POWER_DOMAIN_INIT)) #define GLK_DISPLAY_DDI_A_POWER_DOMAINS ( \ - BIT(POWER_DOMAIN_PORT_DDI_A_LANES) | \ - BIT(POWER_DOMAIN_INIT)) + BIT_ULL(POWER_DOMAIN_PORT_DDI_A_LANES) | \ + BIT_ULL(POWER_DOMAIN_INIT)) #define GLK_DISPLAY_DDI_B_POWER_DOMAINS ( \ - BIT(POWER_DOMAIN_PORT_DDI_B_LANES) | \ - BIT(POWER_DOMAIN_INIT)) + BIT_ULL(POWER_DOMAIN_PORT_DDI_B_LANES) | \ + BIT_ULL(POWER_DOMAIN_INIT)) #define GLK_DISPLAY_DDI_C_POWER_DOMAINS ( \ - BIT(POWER_DOMAIN_PORT_DDI_C_LANES) | \ - BIT(POWER_DOMAIN_INIT)) + BIT_ULL(POWER_DOMAIN_PORT_DDI_C_LANES) | \ + BIT_ULL(POWER_DOMAIN_INIT)) #define GLK_DPIO_CMN_A_POWER_DOMAINS ( \ - BIT(POWER_DOMAIN_PORT_DDI_A_LANES) | \ - BIT(POWER_DOMAIN_AUX_A) | \ - BIT(POWER_DOMAIN_INIT)) + BIT_ULL(POWER_DOMAIN_PORT_DDI_A_LANES) | \ + BIT_ULL(POWER_DOMAIN_AUX_A) | \ + BIT_ULL(POWER_DOMAIN_INIT)) #define GLK_DPIO_CMN_B_POWER_DOMAINS ( \ - BIT(POWER_DOMAIN_PORT_DDI_B_LANES) | \ - BIT(POWER_DOMAIN_AUX_B) | \ - BIT(POWER_DOMAIN_INIT)) + BIT_ULL(POWER_DOMAIN_PORT_DDI_B_LANES) | \ + BIT_ULL(POWER_DOMAIN_AUX_B) | \ + BIT_ULL(POWER_DOMAIN_INIT)) #define GLK_DPIO_CMN_C_POWER_DOMAINS ( \ - BIT(POWER_DOMAIN_PORT_DDI_C_LANES) | \ - BIT(POWER_DOMAIN_AUX_C) | \ - BIT(POWER_DOMAIN_INIT)) + BIT_ULL(POWER_DOMAIN_PORT_DDI_C_LANES) | \ + BIT_ULL(POWER_DOMAIN_AUX_C) | \ + BIT_ULL(POWER_DOMAIN_INIT)) #define GLK_DISPLAY_AUX_A_POWER_DOMAINS ( \ - BIT(POWER_DOMAIN_AUX_A) | \ - BIT(POWER_DOMAIN_INIT)) + BIT_ULL(POWER_DOMAIN_AUX_A) | \ + BIT_ULL(POWER_DOMAIN_INIT)) #define GLK_DISPLAY_AUX_B_POWER_DOMAINS ( \ - BIT(POWER_DOMAIN_AUX_B) | \ - BIT(POWER_DOMAIN_INIT)) + BIT_ULL(POWER_DOMAIN_AUX_B) | \ + BIT_ULL(POWER_DOMAIN_INIT)) #define GLK_DISPLAY_AUX_C_POWER_DOMAINS ( \ - BIT(POWER_DOMAIN_AUX_C) | \ - BIT(POWER_DOMAIN_INIT)) + BIT_ULL(POWER_DOMAIN_AUX_C) | \ + BIT_ULL(POWER_DOMAIN_INIT)) #define GLK_DISPLAY_DC_OFF_POWER_DOMAINS ( \ GLK_DISPLAY_POWERWELL_2_POWER_DOMAINS | \ - BIT(POWER_DOMAIN_MODESET) | \ - BIT(POWER_DOMAIN_AUX_A) | \ - BIT(POWER_DOMAIN_INIT)) + BIT_ULL(POWER_DOMAIN_MODESET) | \ + BIT_ULL(POWER_DOMAIN_AUX_A) | \ + BIT_ULL(POWER_DOMAIN_INIT)) static void assert_can_enable_dc9(struct drm_i915_private *dev_priv) { @@ -1251,7 +1251,7 @@ static void vlv_dpio_cmn_power_well_disable(struct drm_i915_private *dev_priv, vlv_set_power_well(dev_priv, power_well, false); } -#define POWER_DOMAIN_MASK (GENMASK(POWER_DOMAIN_NUM - 1, 0)) +#define POWER_DOMAIN_MASK (GENMASK_ULL(POWER_DOMAIN_NUM - 1, 0)) static struct i915_power_well *lookup_power_well(struct drm_i915_private *dev_priv, int power_well_id) @@ -1697,7 +1697,7 @@ __intel_display_power_get_domain(struct drm_i915_private *dev_priv, struct i915_power_well *power_well; int i; - for_each_power_well(i, power_well, BIT(domain), power_domains) + for_each_power_well(i, power_well, BIT_ULL(domain), power_domains) intel_power_well_get(dev_priv, power_well); power_domains->domain_use_count[domain]++; @@ -1792,7 +1792,7 @@ void intel_display_power_put(struct drm_i915_private *dev_priv, intel_display_power_domain_str(domain)); power_domains->domain_use_count[domain]--; - for_each_power_well_rev(i, power_well, BIT(domain), power_domains) + for_each_power_well_rev(i, power_well, BIT_ULL(domain), power_domains) intel_power_well_put(dev_priv, power_well); mutex_unlock(&power_domains->lock); @@ -1801,117 +1801,117 @@ void intel_display_power_put(struct drm_i915_private *dev_priv, } #define HSW_DISPLAY_POWER_DOMAINS ( \ - BIT(POWER_DOMAIN_PIPE_B) | \ - BIT(POWER_DOMAIN_PIPE_C) | \ - BIT(POWER_DOMAIN_PIPE_A_PANEL_FITTER) | \ - BIT(POWER_DOMAIN_PIPE_B_PANEL_FITTER) | \ - BIT(POWER_DOMAIN_PIPE_C_PANEL_FITTER) | \ - BIT(POWER_DOMAIN_TRANSCODER_A) | \ - BIT(POWER_DOMAIN_TRANSCODER_B) | \ - BIT(POWER_DOMAIN_TRANSCODER_C) | \ - BIT(POWER_DOMAIN_PORT_DDI_B_LANES) | \ - BIT(POWER_DOMAIN_PORT_DDI_C_LANES) | \ - BIT(POWER_DOMAIN_PORT_DDI_D_LANES) | \ - BIT(POWER_DOMAIN_PORT_CRT) | /* DDI E */ \ - BIT(POWER_DOMAIN_VGA) | \ - BIT(POWER_DOMAIN_AUDIO) | \ - BIT(POWER_DOMAIN_INIT)) + BIT_ULL(POWER_DOMAIN_PIPE_B) | \ + BIT_ULL(POWER_DOMAIN_PIPE_C) | \ + BIT_ULL(POWER_DOMAIN_PIPE_A_PANEL_FITTER) | \ + BIT_ULL(POWER_DOMAIN_PIPE_B_PANEL_FITTER) | \ + BIT_ULL(POWER_DOMAIN_PIPE_C_PANEL_FITTER) | \ + BIT_ULL(POWER_DOMAIN_TRANSCODER_A) | \ + BIT_ULL(POWER_DOMAIN_TRANSCODER_B) | \ + BIT_ULL(POWER_DOMAIN_TRANSCODER_C) | \ + BIT_ULL(POWER_DOMAIN_PORT_DDI_B_LANES) | \ + BIT_ULL(POWER_DOMAIN_PORT_DDI_C_LANES) | \ + BIT_ULL(POWER_DOMAIN_PORT_DDI_D_LANES) | \ + BIT_ULL(POWER_DOMAIN_PORT_CRT) | /* DDI E */ \ + BIT_ULL(POWER_DOMAIN_VGA) | \ + BIT_ULL(POWER_DOMAIN_AUDIO) | \ + BIT_ULL(POWER_DOMAIN_INIT)) #define BDW_DISPLAY_POWER_DOMAINS ( \ - BIT(POWER_DOMAIN_PIPE_B) | \ - BIT(POWER_DOMAIN_PIPE_C) | \ - BIT(POWER_DOMAIN_PIPE_B_PANEL_FITTER) | \ - BIT(POWER_DOMAIN_PIPE_C_PANEL_FITTER) | \ - BIT(POWER_DOMAIN_TRANSCODER_A) | \ - BIT(POWER_DOMAIN_TRANSCODER_B) | \ - BIT(POWER_DOMAIN_TRANSCODER_C) | \ - BIT(POWER_DOMAIN_PORT_DDI_B_LANES) | \ - BIT(POWER_DOMAIN_PORT_DDI_C_LANES) | \ - BIT(POWER_DOMAIN_PORT_DDI_D_LANES) | \ - BIT(POWER_DOMAIN_PORT_CRT) | /* DDI E */ \ - BIT(POWER_DOMAIN_VGA) | \ - BIT(POWER_DOMAIN_AUDIO) | \ - BIT(POWER_DOMAIN_INIT)) + BIT_ULL(POWER_DOMAIN_PIPE_B) | \ + BIT_ULL(POWER_DOMAIN_PIPE_C) | \ + BIT_ULL(POWER_DOMAIN_PIPE_B_PANEL_FITTER) | \ + BIT_ULL(POWER_DOMAIN_PIPE_C_PANEL_FITTER) | \ + BIT_ULL(POWER_DOMAIN_TRANSCODER_A) | \ + BIT_ULL(POWER_DOMAIN_TRANSCODER_B) | \ + BIT_ULL(POWER_DOMAIN_TRANSCODER_C) | \ + BIT_ULL(POWER_DOMAIN_PORT_DDI_B_LANES) | \ + BIT_ULL(POWER_DOMAIN_PORT_DDI_C_LANES) | \ + BIT_ULL(POWER_DOMAIN_PORT_DDI_D_LANES) | \ + BIT_ULL(POWER_DOMAIN_PORT_CRT) | /* DDI E */ \ + BIT_ULL(POWER_DOMAIN_VGA) | \ + BIT_ULL(POWER_DOMAIN_AUDIO) | \ + BIT_ULL(POWER_DOMAIN_INIT)) #define VLV_DISPLAY_POWER_DOMAINS ( \ - BIT(POWER_DOMAIN_PIPE_A) | \ - BIT(POWER_DOMAIN_PIPE_B) | \ - BIT(POWER_DOMAIN_PIPE_A_PANEL_FITTER) | \ - BIT(POWER_DOMAIN_PIPE_B_PANEL_FITTER) | \ - BIT(POWER_DOMAIN_TRANSCODER_A) | \ - BIT(POWER_DOMAIN_TRANSCODER_B) | \ - BIT(POWER_DOMAIN_PORT_DDI_B_LANES) | \ - BIT(POWER_DOMAIN_PORT_DDI_C_LANES) | \ - BIT(POWER_DOMAIN_PORT_DSI) | \ - BIT(POWER_DOMAIN_PORT_CRT) | \ - BIT(POWER_DOMAIN_VGA) | \ - BIT(POWER_DOMAIN_AUDIO) | \ - BIT(POWER_DOMAIN_AUX_B) | \ - BIT(POWER_DOMAIN_AUX_C) | \ - BIT(POWER_DOMAIN_GMBUS) | \ - BIT(POWER_DOMAIN_INIT)) + BIT_ULL(POWER_DOMAIN_PIPE_A) | \ + BIT_ULL(POWER_DOMAIN_PIPE_B) | \ + BIT_ULL(POWER_DOMAIN_PIPE_A_PANEL_FITTER) | \ + BIT_ULL(POWER_DOMAIN_PIPE_B_PANEL_FITTER) | \ + BIT_ULL(POWER_DOMAIN_TRANSCODER_A) | \ + BIT_ULL(POWER_DOMAIN_TRANSCODER_B) | \ + BIT_ULL(POWER_DOMAIN_PORT_DDI_B_LANES) | \ + BIT_ULL(POWER_DOMAIN_PORT_DDI_C_LANES) | \ + BIT_ULL(POWER_DOMAIN_PORT_DSI) | \ + BIT_ULL(POWER_DOMAIN_PORT_CRT) | \ + BIT_ULL(POWER_DOMAIN_VGA) | \ + BIT_ULL(POWER_DOMAIN_AUDIO) | \ + BIT_ULL(POWER_DOMAIN_AUX_B) | \ + BIT_ULL(POWER_DOMAIN_AUX_C) | \ + BIT_ULL(POWER_DOMAIN_GMBUS) | \ + BIT_ULL(POWER_DOMAIN_INIT)) #define VLV_DPIO_CMN_BC_POWER_DOMAINS ( \ - BIT(POWER_DOMAIN_PORT_DDI_B_LANES) | \ - BIT(POWER_DOMAIN_PORT_DDI_C_LANES) | \ - BIT(POWER_DOMAIN_PORT_CRT) | \ - BIT(POWER_DOMAIN_AUX_B) | \ - BIT(POWER_DOMAIN_AUX_C) | \ - BIT(POWER_DOMAIN_INIT)) + BIT_ULL(POWER_DOMAIN_PORT_DDI_B_LANES) | \ + BIT_ULL(POWER_DOMAIN_PORT_DDI_C_LANES) | \ + BIT_ULL(POWER_DOMAIN_PORT_CRT) | \ + BIT_ULL(POWER_DOMAIN_AUX_B) | \ + BIT_ULL(POWER_DOMAIN_AUX_C) | \ + BIT_ULL(POWER_DOMAIN_INIT)) #define VLV_DPIO_TX_B_LANES_01_POWER_DOMAINS ( \ - BIT(POWER_DOMAIN_PORT_DDI_B_LANES) | \ - BIT(POWER_DOMAIN_AUX_B) | \ - BIT(POWER_DOMAIN_INIT)) + BIT_ULL(POWER_DOMAIN_PORT_DDI_B_LANES) | \ + BIT_ULL(POWER_DOMAIN_AUX_B) | \ + BIT_ULL(POWER_DOMAIN_INIT)) #define VLV_DPIO_TX_B_LANES_23_POWER_DOMAINS ( \ - BIT(POWER_DOMAIN_PORT_DDI_B_LANES) | \ - BIT(POWER_DOMAIN_AUX_B) | \ - BIT(POWER_DOMAIN_INIT)) + BIT_ULL(POWER_DOMAIN_PORT_DDI_B_LANES) | \ + BIT_ULL(POWER_DOMAIN_AUX_B) | \ + BIT_ULL(POWER_DOMAIN_INIT)) #define VLV_DPIO_TX_C_LANES_01_POWER_DOMAINS ( \ - BIT(POWER_DOMAIN_PORT_DDI_C_LANES) | \ - BIT(POWER_DOMAIN_AUX_C) | \ - BIT(POWER_DOMAIN_INIT)) + BIT_ULL(POWER_DOMAIN_PORT_DDI_C_LANES) | \ + BIT_ULL(POWER_DOMAIN_AUX_C) | \ + BIT_ULL(POWER_DOMAIN_INIT)) #define VLV_DPIO_TX_C_LANES_23_POWER_DOMAINS ( \ - BIT(POWER_DOMAIN_PORT_DDI_C_LANES) | \ - BIT(POWER_DOMAIN_AUX_C) | \ - BIT(POWER_DOMAIN_INIT)) + BIT_ULL(POWER_DOMAIN_PORT_DDI_C_LANES) | \ + BIT_ULL(POWER_DOMAIN_AUX_C) | \ + BIT_ULL(POWER_DOMAIN_INIT)) #define CHV_DISPLAY_POWER_DOMAINS ( \ - BIT(POWER_DOMAIN_PIPE_A) | \ - BIT(POWER_DOMAIN_PIPE_B) | \ - BIT(POWER_DOMAIN_PIPE_C) | \ - BIT(POWER_DOMAIN_PIPE_A_PANEL_FITTER) | \ - BIT(POWER_DOMAIN_PIPE_B_PANEL_FITTER) | \ - BIT(POWER_DOMAIN_PIPE_C_PANEL_FITTER) | \ - BIT(POWER_DOMAIN_TRANSCODER_A) | \ - BIT(POWER_DOMAIN_TRANSCODER_B) | \ - BIT(POWER_DOMAIN_TRANSCODER_C) | \ - BIT(POWER_DOMAIN_PORT_DDI_B_LANES) | \ - BIT(POWER_DOMAIN_PORT_DDI_C_LANES) | \ - BIT(POWER_DOMAIN_PORT_DDI_D_LANES) | \ - BIT(POWER_DOMAIN_PORT_DSI) | \ - BIT(POWER_DOMAIN_VGA) | \ - BIT(POWER_DOMAIN_AUDIO) | \ - BIT(POWER_DOMAIN_AUX_B) | \ - BIT(POWER_DOMAIN_AUX_C) | \ - BIT(POWER_DOMAIN_AUX_D) | \ - BIT(POWER_DOMAIN_GMBUS) | \ - BIT(POWER_DOMAIN_INIT)) + BIT_ULL(POWER_DOMAIN_PIPE_A) | \ + BIT_ULL(POWER_DOMAIN_PIPE_B) | \ + BIT_ULL(POWER_DOMAIN_PIPE_C) | \ + BIT_ULL(POWER_DOMAIN_PIPE_A_PANEL_FITTER) | \ + BIT_ULL(POWER_DOMAIN_PIPE_B_PANEL_FITTER) | \ + BIT_ULL(POWER_DOMAIN_PIPE_C_PANEL_FITTER) | \ + BIT_ULL(POWER_DOMAIN_TRANSCODER_A) | \ + BIT_ULL(POWER_DOMAIN_TRANSCODER_B) | \ + BIT_ULL(POWER_DOMAIN_TRANSCODER_C) | \ + BIT_ULL(POWER_DOMAIN_PORT_DDI_B_LANES) | \ + BIT_ULL(POWER_DOMAIN_PORT_DDI_C_LANES) | \ + BIT_ULL(POWER_DOMAIN_PORT_DDI_D_LANES) | \ + BIT_ULL(POWER_DOMAIN_PORT_DSI) | \ + BIT_ULL(POWER_DOMAIN_VGA) | \ + BIT_ULL(POWER_DOMAIN_AUDIO) | \ + BIT_ULL(POWER_DOMAIN_AUX_B) | \ + BIT_ULL(POWER_DOMAIN_AUX_C) | \ + BIT_ULL(POWER_DOMAIN_AUX_D) | \ + BIT_ULL(POWER_DOMAIN_GMBUS) | \ + BIT_ULL(POWER_DOMAIN_INIT)) #define CHV_DPIO_CMN_BC_POWER_DOMAINS ( \ - BIT(POWER_DOMAIN_PORT_DDI_B_LANES) | \ - BIT(POWER_DOMAIN_PORT_DDI_C_LANES) | \ - BIT(POWER_DOMAIN_AUX_B) | \ - BIT(POWER_DOMAIN_AUX_C) | \ - BIT(POWER_DOMAIN_INIT)) + BIT_ULL(POWER_DOMAIN_PORT_DDI_B_LANES) | \ + BIT_ULL(POWER_DOMAIN_PORT_DDI_C_LANES) | \ + BIT_ULL(POWER_DOMAIN_AUX_B) | \ + BIT_ULL(POWER_DOMAIN_AUX_C) | \ + BIT_ULL(POWER_DOMAIN_INIT)) #define CHV_DPIO_CMN_D_POWER_DOMAINS ( \ - BIT(POWER_DOMAIN_PORT_DDI_D_LANES) | \ - BIT(POWER_DOMAIN_AUX_D) | \ - BIT(POWER_DOMAIN_INIT)) + BIT_ULL(POWER_DOMAIN_PORT_DDI_D_LANES) | \ + BIT_ULL(POWER_DOMAIN_AUX_D) | \ + BIT_ULL(POWER_DOMAIN_INIT)) static const struct i915_power_well_ops i9xx_always_on_power_well_ops = { .sync_hw = i9xx_always_on_power_well_noop, @@ -2388,7 +2388,7 @@ int intel_power_domains_init(struct drm_i915_private *dev_priv) dev_priv->csr.allowed_dc_mask = get_allowed_dc_mask(dev_priv, i915.enable_dc); - BUILD_BUG_ON(POWER_DOMAIN_NUM > 31); + BUILD_BUG_ON(POWER_DOMAIN_NUM > 64); mutex_init(&power_domains->lock); -- cgit From 370a81fb89cbb1a7f3ef119f84b3bd6d0ffebcf6 Mon Sep 17 00:00:00 2001 From: Ander Conselvan de Oliveira Date: Fri, 13 Jan 2017 14:20:32 +0200 Subject: drm/i915: Remove unused function intel_ddi_get_link_dpll() The function intel_ddi_get_link_dpll() was added in f169660ed4e5 ("drm/i915/dp: Add a standalone function to obtain shared dpll for HSW/BDW/SKL/BXT") to "allow for the implementation of a platform neutral upfront link training function", but such implementation never landed. So remove that function and clean up the exported shared DPLL interface. Fixes: f169660ed4e5 ("drm/i915/dp: Add a standalone function to obtain shared dpll for HSW/BDW/SKL/BXT") Cc: Durgadoss R Cc: Manasi Navare Cc: Jim Bride Cc: Rodrigo Vivi Cc: Daniel Vetter Cc: Jani Nikula Cc: intel-gfx@lists.freedesktop.org Signed-off-by: Ander Conselvan de Oliveira Acked-by: Jani Nikula Link: http://patchwork.freedesktop.org/patch/msgid/1484310032-1863-1-git-send-email-ander.conselvan.de.oliveira@intel.com --- drivers/gpu/drm/i915/intel_ddi.c | 39 -------------------------- drivers/gpu/drm/i915/intel_dpll_mgr.c | 52 ++++++----------------------------- drivers/gpu/drm/i915/intel_dpll_mgr.h | 16 ----------- drivers/gpu/drm/i915/intel_drv.h | 2 -- 4 files changed, 8 insertions(+), 101 deletions(-) diff --git a/drivers/gpu/drm/i915/intel_ddi.c b/drivers/gpu/drm/i915/intel_ddi.c index b5b3065cb60c..cd6fedd229a0 100644 --- a/drivers/gpu/drm/i915/intel_ddi.c +++ b/drivers/gpu/drm/i915/intel_ddi.c @@ -2116,45 +2116,6 @@ intel_ddi_init_hdmi_connector(struct intel_digital_port *intel_dig_port) return connector; } -struct intel_shared_dpll * -intel_ddi_get_link_dpll(struct intel_dp *intel_dp, int clock) -{ - struct intel_connector *connector = intel_dp->attached_connector; - struct intel_encoder *encoder = connector->encoder; - struct drm_i915_private *dev_priv = to_i915(encoder->base.dev); - struct intel_digital_port *dig_port = dp_to_dig_port(intel_dp); - struct intel_shared_dpll *pll = NULL; - struct intel_shared_dpll_state tmp_pll_state; - enum intel_dpll_id dpll_id; - - if (IS_GEN9_LP(dev_priv)) { - dpll_id = (enum intel_dpll_id)dig_port->port; - /* - * Select the required PLL. This works for platforms where - * there is no shared DPLL. - */ - pll = &dev_priv->shared_dplls[dpll_id]; - if (WARN_ON(pll->active_mask)) { - - DRM_ERROR("Shared DPLL in use. active_mask:%x\n", - pll->active_mask); - return NULL; - } - tmp_pll_state = pll->state; - if (!bxt_ddi_dp_set_dpll_hw_state(clock, - &pll->state.hw_state)) { - DRM_ERROR("Could not setup DPLL\n"); - pll->state = tmp_pll_state; - return NULL; - } - } else if (IS_GEN9_BC(dev_priv)) { - pll = skl_find_link_pll(dev_priv, clock); - } else if (IS_HASWELL(dev_priv) || IS_BROADWELL(dev_priv)) { - pll = hsw_ddi_dp_get_dpll(encoder, clock); - } - return pll; -} - void intel_ddi_init(struct drm_i915_private *dev_priv, enum port port) { struct intel_digital_port *intel_dig_port; diff --git a/drivers/gpu/drm/i915/intel_dpll_mgr.c b/drivers/gpu/drm/i915/intel_dpll_mgr.c index 79f656ca593d..b4de632f1158 100644 --- a/drivers/gpu/drm/i915/intel_dpll_mgr.c +++ b/drivers/gpu/drm/i915/intel_dpll_mgr.c @@ -42,44 +42,6 @@ * commit phase. */ -struct intel_shared_dpll * -skl_find_link_pll(struct drm_i915_private *dev_priv, int clock) -{ - struct intel_shared_dpll *pll = NULL; - struct intel_dpll_hw_state dpll_hw_state; - enum intel_dpll_id i; - bool found = false; - - if (!skl_ddi_dp_set_dpll_hw_state(clock, &dpll_hw_state)) - return pll; - - for (i = DPLL_ID_SKL_DPLL1; i <= DPLL_ID_SKL_DPLL3; i++) { - pll = &dev_priv->shared_dplls[i]; - - /* Only want to check enabled timings first */ - if (pll->state.crtc_mask == 0) - continue; - - if (memcmp(&dpll_hw_state, &pll->state.hw_state, - sizeof(pll->state.hw_state)) == 0) { - found = true; - break; - } - } - - /* Ok no matching timings, maybe there's a free one? */ - for (i = DPLL_ID_SKL_DPLL1; - ((found == false) && (i <= DPLL_ID_SKL_DPLL3)); i++) { - pll = &dev_priv->shared_dplls[i]; - if (pll->state.crtc_mask == 0) { - pll->state.hw_state = dpll_hw_state; - break; - } - } - - return pll; -} - static void intel_atomic_duplicate_dpll_state(struct drm_i915_private *dev_priv, struct intel_shared_dpll_state *shared_dpll) @@ -811,8 +773,8 @@ static struct intel_shared_dpll *hsw_ddi_hdmi_get_dpll(int clock, return pll; } -struct intel_shared_dpll *hsw_ddi_dp_get_dpll(struct intel_encoder *encoder, - int clock) +static struct intel_shared_dpll * +hsw_ddi_dp_get_dpll(struct intel_encoder *encoder, int clock) { struct drm_i915_private *dev_priv = to_i915(encoder->base.dev); struct intel_shared_dpll *pll; @@ -1360,8 +1322,9 @@ static bool skl_ddi_hdmi_pll_dividers(struct intel_crtc *crtc, } -bool skl_ddi_dp_set_dpll_hw_state(int clock, - struct intel_dpll_hw_state *dpll_hw_state) +static bool +skl_ddi_dp_set_dpll_hw_state(int clock, + struct intel_dpll_hw_state *dpll_hw_state) { uint32_t ctrl1; @@ -1816,8 +1779,9 @@ static bool bxt_ddi_set_dpll_hw_state(int clock, return true; } -bool bxt_ddi_dp_set_dpll_hw_state(int clock, - struct intel_dpll_hw_state *dpll_hw_state) +static bool +bxt_ddi_dp_set_dpll_hw_state(int clock, + struct intel_dpll_hw_state *dpll_hw_state) { struct bxt_clk_div clk_div = {0}; diff --git a/drivers/gpu/drm/i915/intel_dpll_mgr.h b/drivers/gpu/drm/i915/intel_dpll_mgr.h index af1497eb4f9c..f8d13a947c13 100644 --- a/drivers/gpu/drm/i915/intel_dpll_mgr.h +++ b/drivers/gpu/drm/i915/intel_dpll_mgr.h @@ -282,20 +282,4 @@ void intel_shared_dpll_init(struct drm_device *dev); void intel_dpll_dump_hw_state(struct drm_i915_private *dev_priv, struct intel_dpll_hw_state *hw_state); -/* BXT dpll related functions */ -bool bxt_ddi_dp_set_dpll_hw_state(int clock, - struct intel_dpll_hw_state *dpll_hw_state); - - -/* SKL dpll related functions */ -bool skl_ddi_dp_set_dpll_hw_state(int clock, - struct intel_dpll_hw_state *dpll_hw_state); -struct intel_shared_dpll *skl_find_link_pll(struct drm_i915_private *dev_priv, - int clock); - - -/* HSW dpll related functions */ -struct intel_shared_dpll *hsw_ddi_dp_get_dpll(struct intel_encoder *encoder, - int clock); - #endif /* _INTEL_DPLL_MGR_H_ */ diff --git a/drivers/gpu/drm/i915/intel_drv.h b/drivers/gpu/drm/i915/intel_drv.h index 1298aad7d832..7845d40f203e 100644 --- a/drivers/gpu/drm/i915/intel_drv.h +++ b/drivers/gpu/drm/i915/intel_drv.h @@ -1232,8 +1232,6 @@ void intel_ddi_clock_get(struct intel_encoder *encoder, struct intel_crtc_state *pipe_config); void intel_ddi_set_vc_payload_alloc(struct drm_crtc *crtc, bool state); uint32_t ddi_signal_levels(struct intel_dp *intel_dp); -struct intel_shared_dpll *intel_ddi_get_link_dpll(struct intel_dp *intel_dp, - int clock); unsigned int intel_fb_align_height(struct drm_device *dev, unsigned int height, uint32_t pixel_format, -- cgit From 72b72ae47324a0b9851e254bb525d1dc47995e4e Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Fri, 10 Feb 2017 10:14:22 +0000 Subject: drm/i915: Always pin contexts into the high GGTT Now that we have fast top-down insertion into the drm_mm, we can use it for frequent runtime operations like insertion of the context object, whereas before we limited it to the one-off insertion of the pinned kernel context. Keeping the active context objects out of the mappable region of the global GTT (except under memory pressure) improves our ability to allocate mappable aperture region without triggering a GPU stall. Signed-off-by: Chris Wilson Cc: Joonas Lahtinen Link: http://patchwork.freedesktop.org/patch/msgid/20170210101422.1598-1-chris@chris-wilson.co.uk Reviewed-by: Joonas Lahtinen Reviewed-by: Mika Kuoppala --- drivers/gpu/drm/i915/intel_lrc.c | 4 +--- drivers/gpu/drm/i915/intel_ringbuffer.c | 12 +++--------- 2 files changed, 4 insertions(+), 12 deletions(-) diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c index b21dbd44045e..697776d427b9 100644 --- a/drivers/gpu/drm/i915/intel_lrc.c +++ b/drivers/gpu/drm/i915/intel_lrc.c @@ -773,11 +773,9 @@ static int execlists_context_pin(struct intel_engine_cs *engine, } GEM_BUG_ON(!ce->state); - flags = PIN_GLOBAL; + flags = PIN_GLOBAL | PIN_HIGH; if (ctx->ggtt_offset_bias) flags |= PIN_OFFSET_BIAS | ctx->ggtt_offset_bias; - if (i915_gem_context_is_kernel(ctx)) - flags |= PIN_HIGH; ret = i915_vma_pin(ce->state, 0, GEN8_LR_CONTEXT_ALIGN, flags); if (ret) diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c b/drivers/gpu/drm/i915/intel_ringbuffer.c index d3d1e64f2498..8ae78b79178f 100644 --- a/drivers/gpu/drm/i915/intel_ringbuffer.c +++ b/drivers/gpu/drm/i915/intel_ringbuffer.c @@ -2004,7 +2004,7 @@ intel_ring_free(struct intel_ring *ring) kfree(ring); } -static int context_pin(struct i915_gem_context *ctx, unsigned int flags) +static int context_pin(struct i915_gem_context *ctx) { struct i915_vma *vma = ctx->engine[RCS].state; int ret; @@ -2019,7 +2019,7 @@ static int context_pin(struct i915_gem_context *ctx, unsigned int flags) return ret; } - return i915_vma_pin(vma, 0, ctx->ggtt_alignment, PIN_GLOBAL | flags); + return i915_vma_pin(vma, 0, ctx->ggtt_alignment, PIN_GLOBAL | PIN_HIGH); } static int intel_ring_context_pin(struct intel_engine_cs *engine, @@ -2034,13 +2034,7 @@ static int intel_ring_context_pin(struct intel_engine_cs *engine, return 0; if (ce->state) { - unsigned int flags; - - flags = 0; - if (i915_gem_context_is_kernel(ctx)) - flags = PIN_HIGH; - - ret = context_pin(ctx, flags); + ret = context_pin(ctx); if (ret) goto error; } -- cgit From 4f4631af8faf791a60264cf028a16d61804414c8 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Fri, 10 Feb 2017 13:36:32 +0000 Subject: drm/i915/byt: Take powerwell for reading PIPESTAT in debugfs [12493.693827] WARNING: CPU: 1 PID: 14860 at drivers/gpu/drm/i915/intel_uncore.c:795 __unclaimed_reg_debug+0x5d/0x80 [i915] [12493.693868] Unclaimed read from register 0x1f0024 [12493.693905] Modules linked in: vgem i915 drm_kms_helper drm intel_gtt i2c_algo_bit syscopyarea sysfillrect sysimgblt fb_sys_fops prime_numbers intel_powerclamp crct10dif_pclmul crc32_pclmul crc32c_intel ghash_clmulni_intel cryptd lpc_ich i2c_i801 mfd_core video i2c_designware_platform i2c_designware_core i2c_core button autofs4 sd_mod ahci libahci libata scsi_mod [last unloaded: i915] [12493.694039] CPU: 1 PID: 14860 Comm: intel-gpu-overl Tainted: G U 4.10.0-rc7+ #11 [12493.694079] Hardware name: GIGABYTE GB-BXBT-1900/MZBAYAB-00, BIOS F8 03/02/2016 [12493.694121] Call Trace: [12493.694169] dump_stack+0x67/0x9d [12493.694235] __warn+0x117/0x140 [12493.694288] warn_slowpath_fmt+0x4f/0x60 [12493.694344] ? do_raw_spin_lock+0x116/0x180 [12493.694533] ? check_for_unclaimed_mmio+0x98/0xe0 [i915] [12493.694727] __unclaimed_reg_debug+0x5d/0x80 [i915] [12493.694923] fwtable_read32+0x2c5/0x330 [i915] [12493.695108] i915_interrupt_info+0xd52/0xf80 [i915] [12493.695302] ? gen6_write16+0x310/0x310 [i915] [12493.695357] seq_read+0x187/0x710 [12493.695412] full_proxy_read+0x75/0xc0 [12493.695472] __vfs_read+0x5a/0x220 [12493.695524] ? kmem_cache_free+0x6c/0x260 [12493.695577] ? putname+0x97/0xa0 [12493.695629] ? putname+0x97/0xa0 [12493.695682] ? rcu_read_lock_sched_held+0xb8/0xd0 [12493.695735] ? rw_verify_area+0x65/0x140 [12493.695787] vfs_read+0xd1/0x1f0 [12493.695840] SyS_read+0x62/0xc0 [12493.695893] entry_SYSCALL_64_fastpath+0x1c/0xb1 [12493.695943] RIP: 0033:0x7f82dca99ba0 [12493.695985] RSP: 002b:00007ffc0bdfd4f8 EFLAGS: 00000246 ORIG_RAX: 0000000000000000 [12493.696031] RAX: ffffffffffffffda RBX: 00007ffc0be005a0 RCX: 00007f82dca99ba0 [12493.696073] RDX: 0000000000001fff RSI: 00007ffc0bdfd500 RDI: 000000000000001a [12493.696115] RBP: ffffffff810fb639 R08: 302f6972642f6775 R09: 00007f82dca0999a [12493.696157] R10: 00007f82dcd62760 R11: 0000000000000246 R12: ffff880069a17f98 [12493.696199] R13: 00007ffc0bdfd428 R14: 0000000000000003 R15: 00007ffc0bdfd428 [12493.696250] ? trace_hardirqs_off_caller+0xd9/0x130 [12493.696300] ---[ end trace 52ccf4d39793cc59 ]--- Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=99761 Signed-off-by: Chris Wilson Link: http://patchwork.freedesktop.org/patch/msgid/20170210133632.16946-1-chris@chris-wilson.co.uk Reviewed-by: Mika Kuoppala --- drivers/gpu/drm/i915/i915_debugfs.c | 14 +++++++++++++- 1 file changed, 13 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c index 1ccc2978a21a..641527701123 100644 --- a/drivers/gpu/drm/i915/i915_debugfs.c +++ b/drivers/gpu/drm/i915/i915_debugfs.c @@ -875,10 +875,22 @@ static int i915_interrupt_info(struct seq_file *m, void *data) I915_READ(VLV_IIR_RW)); seq_printf(m, "Display IMR:\t%08x\n", I915_READ(VLV_IMR)); - for_each_pipe(dev_priv, pipe) + for_each_pipe(dev_priv, pipe) { + enum intel_display_power_domain power_domain; + + power_domain = POWER_DOMAIN_PIPE(pipe); + if (!intel_display_power_get_if_enabled(dev_priv, + power_domain)) { + seq_printf(m, "Pipe %c power disabled\n", + pipe_name(pipe)); + continue; + } + seq_printf(m, "Pipe %c stat:\t%08x\n", pipe_name(pipe), I915_READ(PIPESTAT(pipe))); + intel_display_power_put(dev_priv, power_domain); + } seq_printf(m, "Master IER:\t%08x\n", I915_READ(VLV_MASTER_IER)); -- cgit From d158694f452252d0fef335a775aeb3eb74fe7af0 Mon Sep 17 00:00:00 2001 From: Ville Syrjälä Date: Wed, 8 Feb 2017 19:52:54 +0200 Subject: drm/i915: Avoid spurious WARNs about the wrong pipe in the PPS code MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Until recently vlv_steal_power_sequencer() wasn't being called for normal DP ports, and hence it could assert that it should only be called for pipe A and B (since pipe C doesn't support eDP). However that changed when we started to consider normal DP ports as well when choosing a PPS. So we will now get spurious warnings when vlv_steal_power_sequencer() does get called for pipe C. Avoid this by moving the WARN down into vlv_detach_power_sequencer() where this assertion should still hold. Cc: Imre Deak Cc: stable@vger.kernel.org Fixes: 9f2bdb006a7e ("drm/i915: Prevent PPS stealing from a normal DP port on VLV/CHV") References: https://bugs.freedesktop.org/show_bug.cgi?id=95287 Signed-off-by: Ville Syrjälä Link: http://patchwork.freedesktop.org/patch/msgid/20170208175254.10958-1-ville.syrjala@linux.intel.com Reviewed-by: Imre Deak --- drivers/gpu/drm/i915/intel_dp.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/i915/intel_dp.c b/drivers/gpu/drm/i915/intel_dp.c index fa77e96c49c7..6ef4f3810082 100644 --- a/drivers/gpu/drm/i915/intel_dp.c +++ b/drivers/gpu/drm/i915/intel_dp.c @@ -2905,6 +2905,9 @@ static void vlv_detach_power_sequencer(struct intel_dp *intel_dp) WARN_ON(intel_dp->active_pipe != INVALID_PIPE); + if (WARN_ON(pipe != PIPE_A && pipe != PIPE_B)) + return; + edp_panel_vdd_off_sync(intel_dp); /* @@ -2932,9 +2935,6 @@ static void vlv_steal_power_sequencer(struct drm_device *dev, lockdep_assert_held(&dev_priv->pps_mutex); - if (WARN_ON(pipe != PIPE_A && pipe != PIPE_B)) - return; - for_each_intel_encoder(dev, encoder) { struct intel_dp *intel_dp; enum port port; -- cgit From 6401c37dbd6264b045961868c1438bf03efdd104 Mon Sep 17 00:00:00 2001 From: Ville Syrjälä Date: Wed, 8 Feb 2017 19:53:28 +0200 Subject: drm/i915: Simplify platform checks in intel_fb_pitch_limit() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Replace the VLV/CHV check with a HAS_GMCH_DISPLAY check in intel_fb_pitch_limit(), because it's shorter. Signed-off-by: Ville Syrjälä Link: http://patchwork.freedesktop.org/patch/msgid/20170208175328.11064-1-ville.syrjala@linux.intel.com Reviewed-by: Ander Conselvan de Oliveira --- drivers/gpu/drm/i915/intel_display.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c index ab78df9191f9..88b7d96c46a4 100644 --- a/drivers/gpu/drm/i915/intel_display.c +++ b/drivers/gpu/drm/i915/intel_display.c @@ -14346,8 +14346,7 @@ u32 intel_fb_pitch_limit(struct drm_i915_private *dev_priv, * pixels and 32K bytes." */ return min(8192 * cpp, 32768); - } else if (gen >= 5 && !IS_VALLEYVIEW(dev_priv) && - !IS_CHERRYVIEW(dev_priv)) { + } else if (gen >= 5 && !HAS_GMCH_DISPLAY(dev_priv)) { return 32*1024; } else if (gen >= 4) { if (fb_modifier == I915_FORMAT_MOD_X_TILED) -- cgit From 317eaa95081bfa081a5bf147e175b4e007e5a105 Mon Sep 17 00:00:00 2001 From: Lyude Date: Fri, 3 Feb 2017 21:18:25 -0500 Subject: drm/i915/debugfs: Add i915_hpd_storm_ctl This adds a file in i915's debugfs directory that allows userspace to manually control HPD storm detection. This is mainly for hotplugging tests, where we might want to test HPD storm functionality or disable storm detection to speed up hotplugging tests without breaking anything. Changes since v1: - Make HPD storm interval configurable - Misc code cleanup Signed-off-by: Lyude Acked-by: Jani Nikula Cc: Tomeu Vizoso --- drivers/gpu/drm/i915/i915_debugfs.c | 78 +++++++++++++++++++++++++++++++++++- drivers/gpu/drm/i915/i915_drv.c | 1 + drivers/gpu/drm/i915/i915_drv.h | 4 ++ drivers/gpu/drm/i915/i915_irq.c | 2 + drivers/gpu/drm/i915/intel_hotplug.c | 15 ++++--- 5 files changed, 94 insertions(+), 6 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c index 641527701123..cd023fda1a3b 100644 --- a/drivers/gpu/drm/i915/i915_debugfs.c +++ b/drivers/gpu/drm/i915/i915_debugfs.c @@ -4653,6 +4653,81 @@ static int i915_forcewake_create(struct dentry *root, struct drm_minor *minor) return drm_add_fake_info_node(minor, ent, &i915_forcewake_fops); } +static int i915_hpd_storm_ctl_show(struct seq_file *m, void *data) +{ + struct drm_i915_private *dev_priv = m->private; + struct i915_hotplug *hotplug = &dev_priv->hotplug; + + seq_printf(m, "Threshold: %d\n", hotplug->hpd_storm_threshold); + seq_printf(m, "Detected: %s\n", + yesno(delayed_work_pending(&hotplug->reenable_work))); + + return 0; +} + +static ssize_t i915_hpd_storm_ctl_write(struct file *file, + const char __user *ubuf, size_t len, + loff_t *offp) +{ + struct seq_file *m = file->private_data; + struct drm_i915_private *dev_priv = m->private; + struct i915_hotplug *hotplug = &dev_priv->hotplug; + unsigned int new_threshold; + int i; + char *newline; + char tmp[16]; + + if (len >= sizeof(tmp)) + return -EINVAL; + + if (copy_from_user(tmp, ubuf, len)) + return -EFAULT; + + tmp[len] = '\0'; + + /* Strip newline, if any */ + newline = strchr(tmp, '\n'); + if (newline) + *newline = '\0'; + + if (strcmp(tmp, "reset") == 0) + new_threshold = HPD_STORM_DEFAULT_THRESHOLD; + else if (kstrtouint(tmp, 10, &new_threshold) != 0) + return -EINVAL; + + if (new_threshold > 0) + DRM_DEBUG_KMS("Setting HPD storm detection threshold to %d\n", + new_threshold); + else + DRM_DEBUG_KMS("Disabling HPD storm detection\n"); + + spin_lock_irq(&dev_priv->irq_lock); + hotplug->hpd_storm_threshold = new_threshold; + /* Reset the HPD storm stats so we don't accidentally trigger a storm */ + for_each_hpd_pin(i) + hotplug->stats[i].count = 0; + spin_unlock_irq(&dev_priv->irq_lock); + + /* Re-enable hpd immediately if we were in an irq storm */ + flush_delayed_work(&dev_priv->hotplug.reenable_work); + + return len; +} + +static int i915_hpd_storm_ctl_open(struct inode *inode, struct file *file) +{ + return single_open(file, i915_hpd_storm_ctl_show, inode->i_private); +} + +static const struct file_operations i915_hpd_storm_ctl_fops = { + .owner = THIS_MODULE, + .open = i915_hpd_storm_ctl_open, + .read = seq_read, + .llseek = seq_lseek, + .release = single_release, + .write = i915_hpd_storm_ctl_write +}; + static int i915_debugfs_create(struct dentry *root, struct drm_minor *minor, const char *name, @@ -4746,7 +4821,8 @@ static const struct i915_debugfs_files { {"i915_dp_test_data", &i915_displayport_test_data_fops}, {"i915_dp_test_type", &i915_displayport_test_type_fops}, {"i915_dp_test_active", &i915_displayport_test_active_fops}, - {"i915_guc_log_control", &i915_guc_log_control_fops} + {"i915_guc_log_control", &i915_guc_log_control_fops}, + {"i915_hpd_storm_ctl", &i915_hpd_storm_ctl_fops} }; int i915_debugfs_register(struct drm_i915_private *dev_priv) diff --git a/drivers/gpu/drm/i915/i915_drv.c b/drivers/gpu/drm/i915/i915_drv.c index a17dde86dc8c..2394de1b19df 100644 --- a/drivers/gpu/drm/i915/i915_drv.c +++ b/drivers/gpu/drm/i915/i915_drv.c @@ -827,6 +827,7 @@ static int i915_driver_init_early(struct drm_i915_private *dev_priv, spin_lock_init(&dev_priv->gpu_error.lock); mutex_init(&dev_priv->backlight_lock); spin_lock_init(&dev_priv->uncore.lock); + spin_lock_init(&dev_priv->mm.object_stat_lock); spin_lock_init(&dev_priv->mmio_flip_lock); spin_lock_init(&dev_priv->wm.dsparb_lock); diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index 03573f084749..f7ff2df0c0c5 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -384,6 +384,8 @@ enum hpd_pin { #define for_each_hpd_pin(__pin) \ for ((__pin) = (HPD_NONE + 1); (__pin) < HPD_NUM_PINS; (__pin)++) +#define HPD_STORM_DEFAULT_THRESHOLD 5 + struct i915_hotplug { struct work_struct hotplug_work; @@ -407,6 +409,8 @@ struct i915_hotplug { struct work_struct poll_init_work; bool poll_enabled; + unsigned int hpd_storm_threshold; + /* * if we get a HPD irq from DP and a HPD irq from non-DP * the non-DP HPD could block the workqueue on a mode config diff --git a/drivers/gpu/drm/i915/i915_irq.c b/drivers/gpu/drm/i915/i915_irq.c index 9eb4c9c8a10b..27c13193f7a2 100644 --- a/drivers/gpu/drm/i915/i915_irq.c +++ b/drivers/gpu/drm/i915/i915_irq.c @@ -4277,6 +4277,8 @@ void intel_irq_init(struct drm_i915_private *dev_priv) if (!IS_GEN2(dev_priv)) dev->vblank_disable_immediate = true; + dev_priv->hotplug.hpd_storm_threshold = HPD_STORM_DEFAULT_THRESHOLD; + dev->driver->get_vblank_timestamp = i915_get_vblank_timestamp; dev->driver->get_scanout_position = i915_get_crtc_scanoutpos; diff --git a/drivers/gpu/drm/i915/intel_hotplug.c b/drivers/gpu/drm/i915/intel_hotplug.c index b62e3f8ad415..6a9c16508ab5 100644 --- a/drivers/gpu/drm/i915/intel_hotplug.c +++ b/drivers/gpu/drm/i915/intel_hotplug.c @@ -100,7 +100,6 @@ bool intel_hpd_pin_to_port(enum hpd_pin pin, enum port *port) } #define HPD_STORM_DETECT_PERIOD 1000 -#define HPD_STORM_THRESHOLD 5 #define HPD_STORM_REENABLE_DELAY (2 * 60 * 1000) /** @@ -112,9 +111,13 @@ bool intel_hpd_pin_to_port(enum hpd_pin pin, enum port *port) * storms. Only the pin specific stats and state are changed, the caller is * responsible for further action. * - * @HPD_STORM_THRESHOLD irqs are allowed within @HPD_STORM_DETECT_PERIOD ms, - * otherwise it's considered an irq storm, and the irq state is set to - * @HPD_MARK_DISABLED. + * The number of irqs that are allowed within @HPD_STORM_DETECT_PERIOD is + * stored in @dev_priv->hotplug.hpd_storm_threshold which defaults to + * @HPD_STORM_DEFAULT_THRESHOLD. If this threshold is exceeded, it's + * considered an irq storm and the irq state is set to @HPD_MARK_DISABLED. + * + * The HPD threshold can be controlled through i915_hpd_storm_ctl in debugfs, + * and should only be adjusted for automated hotplug testing. * * Return true if an irq storm was detected on @pin. */ @@ -123,13 +126,15 @@ static bool intel_hpd_irq_storm_detect(struct drm_i915_private *dev_priv, { unsigned long start = dev_priv->hotplug.stats[pin].last_jiffies; unsigned long end = start + msecs_to_jiffies(HPD_STORM_DETECT_PERIOD); + const int threshold = dev_priv->hotplug.hpd_storm_threshold; bool storm = false; if (!time_in_range(jiffies, start, end)) { dev_priv->hotplug.stats[pin].last_jiffies = jiffies; dev_priv->hotplug.stats[pin].count = 0; DRM_DEBUG_KMS("Received HPD interrupt on PIN %d - cnt: 0\n", pin); - } else if (dev_priv->hotplug.stats[pin].count > HPD_STORM_THRESHOLD) { + } else if (dev_priv->hotplug.stats[pin].count > threshold && + threshold) { dev_priv->hotplug.stats[pin].state = HPD_MARK_DISABLED; DRM_DEBUG_KMS("HPD interrupt storm detected on PIN %d\n", pin); storm = true; -- cgit From cad3688ff00656face6c78a1028fd02288b4a960 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Fri, 10 Feb 2017 16:35:21 +0000 Subject: drm/i915: Split device release from unload We may need to keep our memory management alive after we have unloaded the physical pci device. For example, if we have exported an object via dmabuf, that will keep the device around but the pci device may be removed before the dmabuf itself is released, use of the pci hardware will be revoked, but the memory and object management needs to persist for the dmabuf. Signed-off-by: Chris Wilson Reviewed-by: Joonas Lahtinen Link: http://patchwork.freedesktop.org/patch/msgid/20170210163523.17533-1-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/i915_drv.c | 20 +++++++++++++++----- 1 file changed, 15 insertions(+), 5 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_drv.c b/drivers/gpu/drm/i915/i915_drv.c index 2394de1b19df..221f1c56fd9c 100644 --- a/drivers/gpu/drm/i915/i915_drv.c +++ b/drivers/gpu/drm/i915/i915_drv.c @@ -1231,8 +1231,7 @@ int i915_driver_load(struct pci_dev *pdev, const struct pci_device_id *ent) ret = drm_dev_init(&dev_priv->drm, &driver, &pdev->dev); if (ret) { DRM_DEV_ERROR(&pdev->dev, "allocation failed\n"); - kfree(dev_priv); - return ret; + goto out_free; } dev_priv->drm.pdev = pdev; @@ -1240,7 +1239,7 @@ int i915_driver_load(struct pci_dev *pdev, const struct pci_device_id *ent) ret = pci_enable_device(pdev); if (ret) - goto out_free_priv; + goto out_fini; pci_set_drvdata(pdev, &dev_priv->drm); @@ -1304,9 +1303,11 @@ out_runtime_pm_put: i915_driver_cleanup_early(dev_priv); out_pci_disable: pci_disable_device(pdev); -out_free_priv: +out_fini: i915_load_error(dev_priv, "Device initialization failed (%d)\n", ret); - drm_dev_unref(&dev_priv->drm); + drm_dev_fini(&dev_priv->drm); +out_free: + kfree(dev_priv); return ret; } @@ -1385,8 +1386,16 @@ void i915_driver_unload(struct drm_device *dev) i915_driver_cleanup_mmio(dev_priv); intel_display_power_put(dev_priv, POWER_DOMAIN_INIT); +} + +static void i915_driver_release(struct drm_device *dev) +{ + struct drm_i915_private *dev_priv = to_i915(dev); i915_driver_cleanup_early(dev_priv); + drm_dev_fini(&dev_priv->drm); + + kfree(dev_priv); } static int i915_driver_open(struct drm_device *dev, struct drm_file *file) @@ -2630,6 +2639,7 @@ static struct drm_driver driver = { .driver_features = DRIVER_HAVE_IRQ | DRIVER_IRQ_SHARED | DRIVER_GEM | DRIVER_PRIME | DRIVER_RENDER | DRIVER_MODESET | DRIVER_ATOMIC, + .release = i915_driver_release, .open = i915_driver_open, .lastclose = i915_driver_lastclose, .preclose = i915_driver_preclose, -- cgit From 94d4a2a9aeec18d9a67f3353bfb9861402719fbd Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Fri, 10 Feb 2017 16:35:22 +0000 Subject: drm/i915: Unbind any residual objects/vma from the Global GTT on shutdown We may unload the PCI device before all users (such as dma-buf) are completely shutdown. This may leave VMA in the global GTT which we want to revoke, whilst keeping the objects themselves around to service the dma-buf. Signed-off-by: Chris Wilson Reviewed-by: Joonas Lahtinen Link: http://patchwork.freedesktop.org/patch/msgid/20170210163523.17533-2-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/i915_gem_gtt.c | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c b/drivers/gpu/drm/i915/i915_gem_gtt.c index 3c83f2b91af0..d580e2b4081b 100644 --- a/drivers/gpu/drm/i915/i915_gem_gtt.c +++ b/drivers/gpu/drm/i915/i915_gem_gtt.c @@ -2818,6 +2818,15 @@ err: void i915_ggtt_cleanup_hw(struct drm_i915_private *dev_priv) { struct i915_ggtt *ggtt = &dev_priv->ggtt; + struct i915_vma *vma, *vn; + + ggtt->base.closed = true; + + mutex_lock(&dev_priv->drm.struct_mutex); + WARN_ON(!list_empty(&ggtt->base.active_list)); + list_for_each_entry_safe(vma, vn, &ggtt->base.inactive_list, vm_link) + WARN_ON(i915_vma_unbind(vma)); + mutex_unlock(&dev_priv->drm.struct_mutex); if (dev_priv->mm.aliasing_ppgtt) { struct i915_hw_ppgtt *ppgtt = dev_priv->mm.aliasing_ppgtt; -- cgit From c4d4c1c66be58e1e91a2eeada02c0cbdeb5fb350 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Fri, 10 Feb 2017 16:35:23 +0000 Subject: drm/i915: Flush the freed object queue on device release As dmabufs may live beyond the PCI device removal, we need to flush the freed object worker on device release, and include a warning in case there is a leak. Signed-off-by: Chris Wilson Reviewed-by: Joonas Lahtinen Link: http://patchwork.freedesktop.org/patch/msgid/20170210163523.17533-3-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/i915_gem.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index 306a2fb362a2..303c079b75e8 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -4659,7 +4659,9 @@ err_out: void i915_gem_load_cleanup(struct drm_i915_private *dev_priv) { + i915_gem_drain_freed_objects(dev_priv); WARN_ON(!llist_empty(&dev_priv->mm.free_list)); + WARN_ON(dev_priv->mm.object_count); mutex_lock(&dev_priv->drm.struct_mutex); i915_gem_timeline_fini(&dev_priv->gt.global_timeline); -- cgit From 8c12d121590ebe5a43bf9a0aedbbeb192f257846 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Fri, 10 Feb 2017 18:52:14 +0000 Subject: drm/i915: Move the irq_barrier for reset earlier into reset_prepare When updating the bookkeeping following the reset, we need the seqno to be coherent on the CPU prior to trusting its result for deciding whether any request is completed. We need the irq_barrier before we start making these decisions, i.e. in reset_prepare. References: https://bugs.freedesktop.org/show_bug.cgi?id=99733 Signed-off-by: Chris Wilson Cc: Mika Kuoppala Link: http://patchwork.freedesktop.org/patch/msgid/20170210185214.23463-1-chris@chris-wilson.co.uk Reviewed-by: Michel Thierry --- drivers/gpu/drm/i915/i915_gem.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index 303c079b75e8..b8d869d7937d 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -2652,6 +2652,9 @@ int i915_gem_reset_prepare(struct drm_i915_private *dev_priv) tasklet_disable(&engine->irq_tasklet); tasklet_kill(&engine->irq_tasklet); + if (engine->irq_seqno_barrier) + engine->irq_seqno_barrier(engine); + if (engine_stalled(engine)) { request = i915_gem_find_active_request(engine); if (request && request->fence.error == -EIO) @@ -2748,9 +2751,6 @@ static void i915_gem_reset_engine(struct intel_engine_cs *engine) { struct drm_i915_gem_request *request; - if (engine->irq_seqno_barrier) - engine->irq_seqno_barrier(engine); - request = i915_gem_find_active_request(engine); if (request && i915_gem_reset_request(request)) { DRM_DEBUG_DRIVER("resetting %s to restart from tail of request 0x%x\n", -- cgit From ae9a043b0c5b0d90dcd9fb8b3a2d27f34f0725fc Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Tue, 7 Feb 2017 10:23:19 +0000 Subject: drm/i915: Rename conditional GEM execution macros After a brief discussion, we settled on a naming convention for the conditional GEM debugging data that should be clearer to the casual user: GEM_DEBUG Signed-off-by: Chris Wilson Cc: Joonas Lahtinen Cc: Tvrtko Ursulin Cc: Mika Kuoppala Link: http://patchwork.freedesktop.org/patch/msgid/20170207102319.10910-1-chris@chris-wilson.co.uk Reviewed-by: Joonas Lahtinen --- drivers/gpu/drm/i915/i915_gem.h | 12 ++++++------ drivers/gpu/drm/i915/intel_lrc.c | 8 ++++---- drivers/gpu/drm/i915/intel_ringbuffer.c | 2 +- drivers/gpu/drm/i915/intel_ringbuffer.h | 6 +++--- 4 files changed, 14 insertions(+), 14 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_gem.h b/drivers/gpu/drm/i915/i915_gem.h index 412fa2794cbb..5a49487368ca 100644 --- a/drivers/gpu/drm/i915/i915_gem.h +++ b/drivers/gpu/drm/i915/i915_gem.h @@ -29,17 +29,17 @@ #define GEM_BUG_ON(expr) BUG_ON(expr) #define GEM_WARN_ON(expr) WARN_ON(expr) -#define GEM_BUG_ONLY(expr) expr -#define GEM_BUG_ONLY_DECLARE(var) var -#define GEM_BUG_ONLY_ON(expr) GEM_BUG_ON(expr) +#define GEM_DEBUG_DECL(var) var +#define GEM_DEBUG_EXEC(expr) expr +#define GEM_DEBUG_BUG_ON(expr) GEM_BUG_ON(expr) #else #define GEM_BUG_ON(expr) BUILD_BUG_ON_INVALID(expr) #define GEM_WARN_ON(expr) (BUILD_BUG_ON_INVALID(expr), 0) -#define GEM_BUG_ONLY(expr) do { } while (0) -#define GEM_BUG_ONLY_DECLARE(var) -#define GEM_BUG_ONLY_ON(expr) +#define GEM_DEBUG_DECL(var) +#define GEM_DEBUG_EXEC(expr) do { } while (0) +#define GEM_DEBUG_BUG_ON(expr) #endif #define I915_NUM_ENGINES 5 diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c index 697776d427b9..af717e1356a9 100644 --- a/drivers/gpu/drm/i915/intel_lrc.c +++ b/drivers/gpu/drm/i915/intel_lrc.c @@ -351,7 +351,7 @@ static void execlists_submit_ports(struct intel_engine_cs *engine) execlists_context_status_change(port[0].request, INTEL_CONTEXT_SCHEDULE_IN); desc[0] = execlists_update_context(port[0].request); - GEM_BUG_ONLY(port[0].context_id = upper_32_bits(desc[0])); + GEM_DEBUG_EXEC(port[0].context_id = upper_32_bits(desc[0])); port[0].count++; if (port[1].request) { @@ -359,7 +359,7 @@ static void execlists_submit_ports(struct intel_engine_cs *engine) execlists_context_status_change(port[1].request, INTEL_CONTEXT_SCHEDULE_IN); desc[1] = execlists_update_context(port[1].request); - GEM_BUG_ONLY(port[1].context_id = upper_32_bits(desc[1])); + GEM_DEBUG_EXEC(port[1].context_id = upper_32_bits(desc[1])); port[1].count = 1; } else { desc[1] = 0; @@ -583,8 +583,8 @@ static void intel_lrc_irq_handler(unsigned long data) continue; /* Check the context/desc id for this event matches */ - GEM_BUG_ONLY_ON(readl(buf + 2 * idx + 1) != - port[0].context_id); + GEM_DEBUG_BUG_ON(readl(buf + 2 * idx + 1) != + port[0].context_id); GEM_BUG_ON(port[0].count == 0); if (--port[0].count == 0) { diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c b/drivers/gpu/drm/i915/intel_ringbuffer.c index 8ae78b79178f..9fba5664376e 100644 --- a/drivers/gpu/drm/i915/intel_ringbuffer.c +++ b/drivers/gpu/drm/i915/intel_ringbuffer.c @@ -2284,7 +2284,7 @@ int intel_ring_begin(struct drm_i915_gem_request *req, int num_dwords) ring->space -= bytes; GEM_BUG_ON(ring->space < 0); - GEM_BUG_ONLY(ring->advance = ring->tail + bytes); + GEM_DEBUG_EXEC(ring->advance = ring->tail + bytes); return 0; } diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.h b/drivers/gpu/drm/i915/intel_ringbuffer.h index 896838ca502c..b122c3cc6dbb 100644 --- a/drivers/gpu/drm/i915/intel_ringbuffer.h +++ b/drivers/gpu/drm/i915/intel_ringbuffer.h @@ -144,7 +144,7 @@ struct intel_ring { u32 head; u32 tail; - GEM_BUG_ONLY_DECLARE(u32 advance); + GEM_DEBUG_DECL(u32 advance); int space; int size; @@ -381,7 +381,7 @@ struct intel_engine_cs { struct execlist_port { struct drm_i915_gem_request *request; unsigned int count; - GEM_BUG_ONLY_DECLARE(u32 context_id); + GEM_DEBUG_DECL(u32 context_id); } execlist_port[2]; struct rb_root execlist_queue; struct rb_node *execlist_first; @@ -519,7 +519,7 @@ static inline void intel_ring_advance(struct intel_ring *ring) * reserved for the command packet (i.e. the value passed to * intel_ring_begin()). */ - GEM_BUG_ONLY_ON(ring->tail != ring->advance); + GEM_DEBUG_BUG_ON(ring->tail != ring->advance); } static inline u32 intel_ring_offset(struct intel_ring *ring, void *addr) -- cgit From c00122f33fe65c00103330e9f85dd2d3a63a3d2c Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Sun, 12 Feb 2017 17:19:58 +0000 Subject: drm/i915: Assert that the active request hasn't been signaled As the request is not complete, it should not be signaled. Assert that this is true before we process the request for a reset. References: https://bugs.freedesktop.org/show_bug.cgi?id=99671 Signed-off-by: Chris Wilson Link: http://patchwork.freedesktop.org/patch/msgid/20170212172002.23072-1-chris@chris-wilson.co.uk Reviewed-by: Mika Kuoppala --- drivers/gpu/drm/i915/i915_gem.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index b8d869d7937d..48922ff454e6 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -2611,6 +2611,8 @@ i915_gem_find_active_request(struct intel_engine_cs *engine) continue; GEM_BUG_ON(request->engine != engine); + GEM_BUG_ON(test_bit(DMA_FENCE_FLAG_SIGNALED_BIT, + &request->fence.flags)); return request; } -- cgit From 8d613c539c74fa9055f88f4116196d7c820bd98f Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Sun, 12 Feb 2017 17:19:59 +0000 Subject: drm/i915: Always call i915_gem_reset_finish() following i915_gem_reset_prepare() As i915_gem_reset_finish() undoes the steps from i915_gem_reset_prepare() to leave the system in a fully-working state, e.g. to be able to free the breadcrumb signal threads, make sure that we always call it even on the error path. Signed-off-by: Chris Wilson Link: http://patchwork.freedesktop.org/patch/msgid/20170212172002.23072-2-chris@chris-wilson.co.uk Reviewed-by: Mika Kuoppala --- drivers/gpu/drm/i915/i915_drv.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/i915/i915_drv.c b/drivers/gpu/drm/i915/i915_drv.c index 221f1c56fd9c..ac1b3582ca55 100644 --- a/drivers/gpu/drm/i915/i915_drv.c +++ b/drivers/gpu/drm/i915/i915_drv.c @@ -1873,10 +1873,10 @@ void i915_reset(struct drm_i915_private *dev_priv) goto error; } - i915_gem_reset_finish(dev_priv); i915_queue_hangcheck(dev_priv); wakeup: + i915_gem_reset_finish(dev_priv); enable_irq(dev_priv->drm.irq); wake_up_bit(&error->flags, I915_RESET_IN_PROGRESS); return; -- cgit From 1d309634bcf4e3ce1c2b112bb04fcdec82ac70e1 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Sun, 12 Feb 2017 17:20:00 +0000 Subject: drm/i915: Kill the tasklet then disable Disabling the tasklet leaves it if scheduled on the ready to run list until it is re-enabled. This will leave the ksoftird thread spinning until satisfied. To prevent this situation on starting the GPU reset, we want to kill the tasklet first and then disable. The same problem will arise when a tasklet is scheduled from another device, so a better solution is required for the general case. Reported-by: Tvrtko Ursulin Fixes: 1f7b847d72c3 ("drm/i915: Disable engine->irq_tasklet around resets") Signed-off-by: Chris Wilson Cc: Tvrtko Ursulin Cc: Mika Kuoppala Link: http://patchwork.freedesktop.org/patch/msgid/20170212172002.23072-3-chris@chris-wilson.co.uk Reviewed-by: Mika Kuoppala --- drivers/gpu/drm/i915/i915_gem.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index 48922ff454e6..3c2c2296c1dc 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -2651,8 +2651,8 @@ int i915_gem_reset_prepare(struct drm_i915_private *dev_priv) * Turning off the engine->irq_tasklet until the reset is over * prevents the race. */ - tasklet_disable(&engine->irq_tasklet); tasklet_kill(&engine->irq_tasklet); + tasklet_disable(&engine->irq_tasklet); if (engine->irq_seqno_barrier) engine->irq_seqno_barrier(engine); -- cgit From fe3288b5da2c1286a7aac1fb1b2234caa752a81b Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Sun, 12 Feb 2017 17:20:01 +0000 Subject: drm/i915: Park the breadcrumbs signaler across a GPU reset The signal threads may be running concurrently with the GPU reset. The completion from the GPU run asynchronous with the reset and two threads may see different snapshots of the state, and the signaler may mark a request as complete as we try to reset it. We don't tolerate 2 different views of the same state and complain if we try to mark a request as failed if it is already complete. Disable the signal threads during reset to prevent this conflict (even though the conflict implies that the state we resetting to is invalid, we have already made our decision!). Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=99733 References: https://bugs.freedesktop.org/show_bug.cgi?id=99671 Signed-off-by: Chris Wilson Cc: Mika Kuoppala Cc: Tvrtko Ursulin Link: http://patchwork.freedesktop.org/patch/msgid/20170212172002.23072-4-chris@chris-wilson.co.uk Reviewed-by: Mika Kuoppala --- drivers/gpu/drm/i915/i915_gem.c | 16 +++++++++++++++- drivers/gpu/drm/i915/intel_breadcrumbs.c | 3 +++ 2 files changed, 18 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index 3c2c2296c1dc..730804ce9610 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -35,6 +35,7 @@ #include "intel_frontbuffer.h" #include "intel_mocs.h" #include +#include #include #include #include @@ -2643,6 +2644,17 @@ int i915_gem_reset_prepare(struct drm_i915_private *dev_priv) for_each_engine(engine, dev_priv, id) { struct drm_i915_gem_request *request; + /* Prevent the signaler thread from updating the request + * state (by calling dma_fence_signal) as we are processing + * the reset. The write from the GPU of the seqno is + * asynchronous and the signaler thread may see a different + * value to us and declare the request complete, even though + * the reset routine have picked that request as the active + * (incomplete) request. This conflict is not handled + * gracefully! + */ + kthread_park(engine->breadcrumbs.signaler); + /* Prevent request submission to the hardware until we have * completed the reset in i915_gem_reset_finish(). If a request * is completed by one engine, it may then queue a request @@ -2796,8 +2808,10 @@ void i915_gem_reset_finish(struct drm_i915_private *dev_priv) lockdep_assert_held(&dev_priv->drm.struct_mutex); - for_each_engine(engine, dev_priv, id) + for_each_engine(engine, dev_priv, id) { tasklet_enable(&engine->irq_tasklet); + kthread_unpark(engine->breadcrumbs.signaler); + } } static void nop_submit_request(struct drm_i915_gem_request *request) diff --git a/drivers/gpu/drm/i915/intel_breadcrumbs.c b/drivers/gpu/drm/i915/intel_breadcrumbs.c index 9fd002bcebb6..f5e05343110a 100644 --- a/drivers/gpu/drm/i915/intel_breadcrumbs.c +++ b/drivers/gpu/drm/i915/intel_breadcrumbs.c @@ -490,6 +490,9 @@ static int intel_breadcrumbs_signaler(void *arg) break; schedule(); + + if (kthread_should_park()) + kthread_parkme(); } } while (1); __set_current_state(TASK_RUNNING); -- cgit From 2ae557388d9feb7049ee311ab1b36bb4c6f6feb6 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Sun, 12 Feb 2017 17:20:02 +0000 Subject: drm/i915: Clear the last_retired_context following a hang/reset Following a hang and reset, we know that the engine is idle and all context state has been saved or lost. Consequently, we know that the engine is no longer referencing the last context and we can relinquish our tracking. Signed-off-by: Chris Wilson Cc: Mika Kuoppala Cc: Tvrtko Ursulin Link: http://patchwork.freedesktop.org/patch/msgid/20170212172002.23072-5-chris@chris-wilson.co.uk Reviewed-by: Mika Kuoppala --- drivers/gpu/drm/i915/i915_gem.c | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index 730804ce9610..638b335e72b6 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -2788,8 +2788,14 @@ void i915_gem_reset(struct drm_i915_private *dev_priv) i915_gem_retire_requests(dev_priv); - for_each_engine(engine, dev_priv, id) + for_each_engine(engine, dev_priv, id) { + struct i915_gem_context *ctx; + i915_gem_reset_engine(engine); + ctx = fetch_and_zero(&engine->last_retired_context); + if (ctx) + engine->context_unpin(engine, ctx); + } i915_gem_restore_fences(dev_priv); -- cgit From 953c7f82eb890085c60dbe22578e883d6837c674 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Mon, 13 Feb 2017 17:15:12 +0000 Subject: drm/i915: Provide a hook for selftests Some pieces of code are independent of hardware but are very tricky to exercise through the normal userspace ABI or via debugfs hooks. Being able to create mock unit tests and execute them through CI is vital. Start by adding a central point where we can execute unit tests and a parameter to enable them. This is disabled by default as the expectation is that these tests will occasionally explode. To facilitate integration with igt, any parameter beginning with i915.igt__ is interpreted as a subtest executable independently via igt/drv_selftest. Two classes of selftests are recognised: mock unit tests and integration tests. Mock unit tests are run as soon as the module is loaded, before the device is probed. At that point there is no driver instantiated and all hw interactions must be "mocked". This is very useful for writing universal tests to exercise code not typically run on a broad range of architectures. Alternatively, you can hook into the live selftests and run when the device has been instantiated - hw interactions are real. v2: Add a macro for compiling conditional code for mock objects inside real objects. v3: Differentiate between mock unit tests and late integration test. v4: List the tests in natural order, use igt to sort after modparam. v5: s/late/live/ v6: s/unsigned long/unsigned int/ v7: Use igt_ prefixes for long helpers. v8: Deobfuscate macros overriding functions, stop using -I$(src) Signed-off-by: Chris Wilson Reviewed-by: Tvrtko Ursulin Link: http://patchwork.freedesktop.org/patch/msgid/20170213171558.20942-1-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/Kconfig.debug | 16 ++ drivers/gpu/drm/i915/Makefile | 3 + drivers/gpu/drm/i915/i915_pci.c | 31 ++- drivers/gpu/drm/i915/i915_selftest.h | 102 +++++++++ .../gpu/drm/i915/selftests/i915_live_selftests.h | 11 + .../gpu/drm/i915/selftests/i915_mock_selftests.h | 11 + drivers/gpu/drm/i915/selftests/i915_random.c | 63 ++++++ drivers/gpu/drm/i915/selftests/i915_random.h | 50 +++++ drivers/gpu/drm/i915/selftests/i915_selftest.c | 250 +++++++++++++++++++++ tools/testing/selftests/drivers/gpu/i915.sh | 1 + 10 files changed, 531 insertions(+), 7 deletions(-) create mode 100644 drivers/gpu/drm/i915/i915_selftest.h create mode 100644 drivers/gpu/drm/i915/selftests/i915_live_selftests.h create mode 100644 drivers/gpu/drm/i915/selftests/i915_mock_selftests.h create mode 100644 drivers/gpu/drm/i915/selftests/i915_random.c create mode 100644 drivers/gpu/drm/i915/selftests/i915_random.h create mode 100644 drivers/gpu/drm/i915/selftests/i915_selftest.c diff --git a/drivers/gpu/drm/i915/Kconfig.debug b/drivers/gpu/drm/i915/Kconfig.debug index 598551dbf62c..a4d8cfd77c3c 100644 --- a/drivers/gpu/drm/i915/Kconfig.debug +++ b/drivers/gpu/drm/i915/Kconfig.debug @@ -26,6 +26,7 @@ config DRM_I915_DEBUG select DRM_DEBUG_MM if DRM=y select DRM_DEBUG_MM_SELFTEST select DRM_I915_SW_FENCE_DEBUG_OBJECTS + select DRM_I915_SELFTEST default n help Choose this option to turn on extra driver debugging that may affect @@ -59,3 +60,18 @@ config DRM_I915_SW_FENCE_DEBUG_OBJECTS Recommended for driver developers only. If in doubt, say "N". + +config DRM_I915_SELFTEST + bool "Enable selftests upon driver load" + depends on DRM_I915 + default n + select PRIME_NUMBERS + help + Choose this option to allow the driver to perform selftests upon + loading; also requires the i915.selftest=1 module parameter. To + exit the module after running the selftests (i.e. to prevent normal + module initialisation afterwards) use i915.selftest=-1. + + Recommended for driver developers only. + + If in doubt, say "N". diff --git a/drivers/gpu/drm/i915/Makefile b/drivers/gpu/drm/i915/Makefile index e58fc8f1b83b..d0c9577c7533 100644 --- a/drivers/gpu/drm/i915/Makefile +++ b/drivers/gpu/drm/i915/Makefile @@ -117,6 +117,9 @@ i915-y += dvo_ch7017.o \ # Post-mortem debug and GPU hang state capture i915-$(CONFIG_DRM_I915_CAPTURE_ERROR) += i915_gpu_error.o +i915-$(CONFIG_DRM_I915_SELFTEST) += \ + selftests/i915_random.o \ + selftests/i915_selftest.o # virtual gpu code i915-y += i915_vgpu.o diff --git a/drivers/gpu/drm/i915/i915_pci.c b/drivers/gpu/drm/i915/i915_pci.c index df2051b41fa1..732101ed57fb 100644 --- a/drivers/gpu/drm/i915/i915_pci.c +++ b/drivers/gpu/drm/i915/i915_pci.c @@ -27,6 +27,7 @@ #include #include "i915_drv.h" +#include "i915_selftest.h" #define GEN_DEFAULT_PIPEOFFSETS \ .pipe_offsets = { PIPE_A_OFFSET, PIPE_B_OFFSET, \ @@ -473,10 +474,19 @@ static const struct pci_device_id pciidlist[] = { }; MODULE_DEVICE_TABLE(pci, pciidlist); +static void i915_pci_remove(struct pci_dev *pdev) +{ + struct drm_device *dev = pci_get_drvdata(pdev); + + i915_driver_unload(dev); + drm_dev_unref(dev); +} + static int i915_pci_probe(struct pci_dev *pdev, const struct pci_device_id *ent) { struct intel_device_info *intel_info = (struct intel_device_info *) ent->driver_data; + int err; if (IS_ALPHA_SUPPORT(intel_info) && !i915.alpha_support) { DRM_INFO("The driver support for your hardware in this kernel version is alpha quality\n" @@ -500,15 +510,17 @@ static int i915_pci_probe(struct pci_dev *pdev, const struct pci_device_id *ent) if (vga_switcheroo_client_probe_defer(pdev)) return -EPROBE_DEFER; - return i915_driver_load(pdev, ent); -} + err = i915_driver_load(pdev, ent); + if (err) + return err; -static void i915_pci_remove(struct pci_dev *pdev) -{ - struct drm_device *dev = pci_get_drvdata(pdev); + err = i915_live_selftests(pdev); + if (err) { + i915_pci_remove(pdev); + return err > 0 ? -ENOTTY : err; + } - i915_driver_unload(dev); - drm_dev_unref(dev); + return 0; } static struct pci_driver i915_pci_driver = { @@ -522,6 +534,11 @@ static struct pci_driver i915_pci_driver = { static int __init i915_init(void) { bool use_kms = true; + int err; + + err = i915_mock_selftests(); + if (err) + return err > 0 ? 0 : err; /* * Enable KMS by default, unless explicitly overriden by diff --git a/drivers/gpu/drm/i915/i915_selftest.h b/drivers/gpu/drm/i915/i915_selftest.h new file mode 100644 index 000000000000..8b5994caa301 --- /dev/null +++ b/drivers/gpu/drm/i915/i915_selftest.h @@ -0,0 +1,102 @@ +/* + * Copyright © 2016 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +#ifndef __I915_SELFTEST_H__ +#define __I915_SELFTEST_H__ + +struct pci_dev; +struct drm_i915_private; + +struct i915_selftest { + unsigned long timeout_jiffies; + unsigned int timeout_ms; + unsigned int random_seed; + int mock; + int live; +}; + +#if IS_ENABLED(CONFIG_DRM_I915_SELFTEST) +extern struct i915_selftest i915_selftest; + +int i915_mock_selftests(void); +int i915_live_selftests(struct pci_dev *pdev); + +/* We extract the function declarations from i915_mock_selftests.h and + * i915_live_selftests.h Add your unit test declarations there! + * + * Mock unit tests are run very early upon module load, before the driver + * is probed. All hardware interactions, as well as other subsystems, must + * be "mocked". + * + * Live unit tests are run after the driver is loaded - all hardware + * interactions are real. + */ +#define selftest(name, func) int func(void); +#include "selftests/i915_mock_selftests.h" +#undef selftest +#define selftest(name, func) int func(struct drm_i915_private *i915); +#include "selftests/i915_live_selftests.h" +#undef selftest + +struct i915_subtest { + int (*func)(void *data); + const char *name; +}; + +int __i915_subtests(const char *caller, + const struct i915_subtest *st, + unsigned int count, + void *data); +#define i915_subtests(T, data) \ + __i915_subtests(__func__, T, ARRAY_SIZE(T), data) + +#define SUBTEST(x) { x, #x } + +#define I915_SELFTEST_DECLARE(x) x +#define I915_SELFTEST_ONLY(x) unlikely(x) + +#else /* !IS_ENABLED(CONFIG_DRM_I915_SELFTEST) */ + +static inline int i915_mock_selftests(void) { return 0; } +static inline int i915_live_selftests(struct pci_dev *pdev) { return 0; } + +#define I915_SELFTEST_DECLARE(x) +#define I915_SELFTEST_ONLY(x) 0 + +#endif + +/* Using the i915_selftest_ prefix becomes a little unwieldy with the helpers. + * Instead we use the igt_ shorthand, in reference to the intel-gpu-tools + * suite of uabi test cases (which includes a test runner for our selftests). + */ + +#define IGT_TIMEOUT(name__) \ + unsigned long name__ = jiffies + i915_selftest.timeout_jiffies + +__printf(2, 3) +bool __igt_timeout(unsigned long timeout, const char *fmt, ...); + +#define igt_timeout(t, fmt, ...) \ + __igt_timeout((t), KERN_WARNING pr_fmt(fmt), ##__VA_ARGS__) + +#endif /* !__I915_SELFTEST_H__ */ diff --git a/drivers/gpu/drm/i915/selftests/i915_live_selftests.h b/drivers/gpu/drm/i915/selftests/i915_live_selftests.h new file mode 100644 index 000000000000..f3e17cb10e05 --- /dev/null +++ b/drivers/gpu/drm/i915/selftests/i915_live_selftests.h @@ -0,0 +1,11 @@ +/* List each unit test as selftest(name, function) + * + * The name is used as both an enum and expanded as subtest__name to create + * a module parameter. It must be unique and legal for a C identifier. + * + * The function should be of type int function(void). It may be conditionally + * compiled using #if IS_ENABLED(DRM_I915_SELFTEST). + * + * Tests are executed in order by igt/drv_selftest + */ +selftest(sanitycheck, i915_live_sanitycheck) /* keep first (igt selfcheck) */ diff --git a/drivers/gpu/drm/i915/selftests/i915_mock_selftests.h b/drivers/gpu/drm/i915/selftests/i915_mock_selftests.h new file mode 100644 index 000000000000..69e97a2ba4a6 --- /dev/null +++ b/drivers/gpu/drm/i915/selftests/i915_mock_selftests.h @@ -0,0 +1,11 @@ +/* List each unit test as selftest(name, function) + * + * The name is used as both an enum and expanded as subtest__name to create + * a module parameter. It must be unique and legal for a C identifier. + * + * The function should be of type int function(void). It may be conditionally + * compiled using #if IS_ENABLED(DRM_I915_SELFTEST). + * + * Tests are executed in order by igt/drv_selftest + */ +selftest(sanitycheck, i915_mock_sanitycheck) /* keep first (igt selfcheck) */ diff --git a/drivers/gpu/drm/i915/selftests/i915_random.c b/drivers/gpu/drm/i915/selftests/i915_random.c new file mode 100644 index 000000000000..c17c83c30637 --- /dev/null +++ b/drivers/gpu/drm/i915/selftests/i915_random.c @@ -0,0 +1,63 @@ +/* + * Copyright © 2016 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + * + */ + +#include +#include +#include +#include +#include + +#include "i915_random.h" + +static inline u32 i915_prandom_u32_max_state(u32 ep_ro, struct rnd_state *state) +{ + return upper_32_bits((u64)prandom_u32_state(state) * ep_ro); +} + +void i915_random_reorder(unsigned int *order, unsigned int count, + struct rnd_state *state) +{ + unsigned int i, j; + + for (i = 0; i < count; i++) { + BUILD_BUG_ON(sizeof(unsigned int) > sizeof(u32)); + j = i915_prandom_u32_max_state(count, state); + swap(order[i], order[j]); + } +} + +unsigned int *i915_random_order(unsigned int count, struct rnd_state *state) +{ + unsigned int *order, i; + + order = kmalloc_array(count, sizeof(*order), GFP_TEMPORARY); + if (!order) + return order; + + for (i = 0; i < count; i++) + order[i] = i; + + i915_random_reorder(order, count, state); + return order; +} diff --git a/drivers/gpu/drm/i915/selftests/i915_random.h b/drivers/gpu/drm/i915/selftests/i915_random.h new file mode 100644 index 000000000000..b9c334ce6cd9 --- /dev/null +++ b/drivers/gpu/drm/i915/selftests/i915_random.h @@ -0,0 +1,50 @@ +/* + * Copyright © 2016 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + * + */ + +#ifndef __I915_SELFTESTS_RANDOM_H__ +#define __I915_SELFTESTS_RANDOM_H__ + +#include + +#include "../i915_selftest.h" + +#define I915_RND_STATE_INITIALIZER(x) ({ \ + struct rnd_state state__; \ + prandom_seed_state(&state__, (x)); \ + state__; \ +}) + +#define I915_RND_STATE(name__) \ + struct rnd_state name__ = I915_RND_STATE_INITIALIZER(i915_selftest.random_seed) + +#define I915_RND_SUBSTATE(name__, parent__) \ + struct rnd_state name__ = I915_RND_STATE_INITIALIZER(prandom_u32_state(&(parent__))) + +unsigned int *i915_random_order(unsigned int count, + struct rnd_state *state); +void i915_random_reorder(unsigned int *order, + unsigned int count, + struct rnd_state *state); + +#endif /* !__I915_SELFTESTS_RANDOM_H__ */ diff --git a/drivers/gpu/drm/i915/selftests/i915_selftest.c b/drivers/gpu/drm/i915/selftests/i915_selftest.c new file mode 100644 index 000000000000..6ba3abb10c6f --- /dev/null +++ b/drivers/gpu/drm/i915/selftests/i915_selftest.c @@ -0,0 +1,250 @@ +/* + * Copyright © 2016 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +#include + +#include "../i915_drv.h" +#include "../i915_selftest.h" + +struct i915_selftest i915_selftest __read_mostly = { + .timeout_ms = 1000, +}; + +int i915_mock_sanitycheck(void) +{ + pr_info(DRIVER_NAME ": %s() - ok!\n", __func__); + return 0; +} + +int i915_live_sanitycheck(struct drm_i915_private *i915) +{ + pr_info("%s: %s() - ok!\n", i915->drm.driver->name, __func__); + return 0; +} + +enum { +#define selftest(name, func) mock_##name, +#include "i915_mock_selftests.h" +#undef selftest +}; + +enum { +#define selftest(name, func) live_##name, +#include "i915_live_selftests.h" +#undef selftest +}; + +struct selftest { + bool enabled; + const char *name; + union { + int (*mock)(void); + int (*live)(struct drm_i915_private *); + }; +}; + +#define selftest(n, f) [mock_##n] = { .name = #n, .mock = f }, +static struct selftest mock_selftests[] = { +#include "i915_mock_selftests.h" +}; +#undef selftest + +#define selftest(n, f) [live_##n] = { .name = #n, .live = f }, +static struct selftest live_selftests[] = { +#include "i915_live_selftests.h" +}; +#undef selftest + +/* Embed the line number into the parameter name so that we can order tests */ +#define selftest(n, func) selftest_0(n, func, param(n)) +#define param(n) __PASTE(igt__, __PASTE(__LINE__, __mock_##n)) +#define selftest_0(n, func, id) \ +module_param_named(id, mock_selftests[mock_##n].enabled, bool, 0400); +#include "i915_mock_selftests.h" +#undef selftest_0 +#undef param + +#define param(n) __PASTE(igt__, __PASTE(__LINE__, __live_##n)) +#define selftest_0(n, func, id) \ +module_param_named(id, live_selftests[live_##n].enabled, bool, 0400); +#include "i915_live_selftests.h" +#undef selftest_0 +#undef param +#undef selftest + +static void set_default_test_all(struct selftest *st, unsigned int count) +{ + unsigned int i; + + for (i = 0; i < count; i++) + if (st[i].enabled) + return; + + for (i = 0; i < count; i++) + st[i].enabled = true; +} + +static int __run_selftests(const char *name, + struct selftest *st, + unsigned int count, + void *data) +{ + int err = 0; + + while (!i915_selftest.random_seed) + i915_selftest.random_seed = get_random_int(); + + i915_selftest.timeout_jiffies = + i915_selftest.timeout_ms ? + msecs_to_jiffies_timeout(i915_selftest.timeout_ms) : + MAX_SCHEDULE_TIMEOUT; + + set_default_test_all(st, count); + + pr_info(DRIVER_NAME ": Performing %s selftests with st_random_seed=0x%x st_timeout=%u\n", + name, i915_selftest.random_seed, i915_selftest.timeout_ms); + + /* Tests are listed in order in i915_*_selftests.h */ + for (; count--; st++) { + if (!st->enabled) + continue; + + cond_resched(); + if (signal_pending(current)) + return -EINTR; + + pr_debug(DRIVER_NAME ": Running %s\n", st->name); + if (data) + err = st->live(data); + else + err = st->mock(); + if (err == -EINTR && !signal_pending(current)) + err = 0; + if (err) + break; + } + + if (WARN(err > 0 || err == -ENOTTY, + "%s returned %d, conflicting with selftest's magic values!\n", + st->name, err)) + err = -1; + + return err; +} + +#define run_selftests(x, data) \ + __run_selftests(#x, x##_selftests, ARRAY_SIZE(x##_selftests), data) + +int i915_mock_selftests(void) +{ + int err; + + if (!i915_selftest.mock) + return 0; + + err = run_selftests(mock, NULL); + if (err) { + i915_selftest.mock = err; + return err; + } + + if (i915_selftest.mock < 0) { + i915_selftest.mock = -ENOTTY; + return 1; + } + + return 0; +} + +int i915_live_selftests(struct pci_dev *pdev) +{ + int err; + + if (!i915_selftest.live) + return 0; + + err = run_selftests(live, to_i915(pci_get_drvdata(pdev))); + if (err) { + i915_selftest.live = err; + return err; + } + + if (i915_selftest.live < 0) { + i915_selftest.live = -ENOTTY; + return 1; + } + + return 0; +} + +int __i915_subtests(const char *caller, + const struct i915_subtest *st, + unsigned int count, + void *data) +{ + int err; + + for (; count--; st++) { + cond_resched(); + if (signal_pending(current)) + return -EINTR; + + pr_debug(DRIVER_NAME ": Running %s/%s\n", caller, st->name); + err = st->func(data); + if (err && err != -EINTR) { + pr_err(DRIVER_NAME "/%s: %s failed with error %d\n", + caller, st->name, err); + return err; + } + } + + return 0; +} + +bool __igt_timeout(unsigned long timeout, const char *fmt, ...) +{ + va_list va; + + if (!signal_pending(current)) { + cond_resched(); + if (time_before(jiffies, timeout)) + return false; + } + + if (fmt) { + va_start(va, fmt); + vprintk(fmt, va); + va_end(va); + } + + return true; +} + +module_param_named(st_random_seed, i915_selftest.random_seed, uint, 0400); +module_param_named(st_timeout, i915_selftest.timeout_ms, uint, 0400); + +module_param_named_unsafe(mock_selftests, i915_selftest.mock, int, 0400); +MODULE_PARM_DESC(mock_selftests, "Run selftests before loading, using mock hardware (0:disabled [default], 1:run tests then load driver, -1:run tests then exit module)"); + +module_param_named_unsafe(live_selftests, i915_selftest.live, int, 0400); +MODULE_PARM_DESC(live_selftests, "Run selftests after driver initialisation on the live system (0:disabled [default], 1:run tests then continue, -1:run tests then exit module)"); diff --git a/tools/testing/selftests/drivers/gpu/i915.sh b/tools/testing/selftests/drivers/gpu/i915.sh index d407f0fa1e3a..c06d6e8a8dcc 100755 --- a/tools/testing/selftests/drivers/gpu/i915.sh +++ b/tools/testing/selftests/drivers/gpu/i915.sh @@ -7,6 +7,7 @@ if ! /sbin/modprobe -q -r i915; then fi if /sbin/modprobe -q i915 mock_selftests=-1; then + /sbin/modprobe -q -r i915 echo "drivers/gpu/i915: ok" else echo "drivers/gpu/i915: [FAIL]" -- cgit From 935a2f776aa523cd6ad20c9b7c7d019ba9418759 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Mon, 13 Feb 2017 17:15:13 +0000 Subject: drm/i915: Add some selftests for sg_table manipulation Start exercising the scattergather lists, especially looking at iteration after coalescing. v2: Comment on the peculiarity of table construction (i.e. why this sg_table might be interesting). v3: Added one __func__ to identify expect_pfn_sg() v4: Loop until we have crossed the chain boundary (forcing sg_table to do multiple allocations) before squelching a potential ENOMEM from oom. Signed-off-by: Chris Wilson Cc: Tvrtko Ursulin Reviewed-by: Tvrtko Ursulin Link: http://patchwork.freedesktop.org/patch/msgid/20170213171558.20942-2-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/i915_gem.c | 11 +- .../gpu/drm/i915/selftests/i915_mock_selftests.h | 1 + drivers/gpu/drm/i915/selftests/scatterlist.c | 355 +++++++++++++++++++++ 3 files changed, 364 insertions(+), 3 deletions(-) create mode 100644 drivers/gpu/drm/i915/selftests/scatterlist.c diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index 638b335e72b6..c91e5d10cc58 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -2224,17 +2224,17 @@ unlock: mutex_unlock(&obj->mm.lock); } -static void i915_sg_trim(struct sg_table *orig_st) +static bool i915_sg_trim(struct sg_table *orig_st) { struct sg_table new_st; struct scatterlist *sg, *new_sg; unsigned int i; if (orig_st->nents == orig_st->orig_nents) - return; + return false; if (sg_alloc_table(&new_st, orig_st->nents, GFP_KERNEL | __GFP_NOWARN)) - return; + return false; new_sg = new_st.sgl; for_each_sg(orig_st->sgl, sg, orig_st->nents, i) { @@ -2247,6 +2247,7 @@ static void i915_sg_trim(struct sg_table *orig_st) sg_free_table(orig_st); *orig_st = new_st; + return true; } static struct sg_table * @@ -5019,3 +5020,7 @@ i915_gem_object_get_dma_address(struct drm_i915_gem_object *obj, sg = i915_gem_object_get_sg(obj, n, &offset); return sg_dma_address(sg) + (offset << PAGE_SHIFT); } + +#if IS_ENABLED(CONFIG_DRM_I915_SELFTEST) +#include "selftests/scatterlist.c" +#endif diff --git a/drivers/gpu/drm/i915/selftests/i915_mock_selftests.h b/drivers/gpu/drm/i915/selftests/i915_mock_selftests.h index 69e97a2ba4a6..5f0bdda42ed8 100644 --- a/drivers/gpu/drm/i915/selftests/i915_mock_selftests.h +++ b/drivers/gpu/drm/i915/selftests/i915_mock_selftests.h @@ -9,3 +9,4 @@ * Tests are executed in order by igt/drv_selftest */ selftest(sanitycheck, i915_mock_sanitycheck) /* keep first (igt selfcheck) */ +selftest(scatterlist, scatterlist_mock_selftests) diff --git a/drivers/gpu/drm/i915/selftests/scatterlist.c b/drivers/gpu/drm/i915/selftests/scatterlist.c new file mode 100644 index 000000000000..eb2cda8e2b9f --- /dev/null +++ b/drivers/gpu/drm/i915/selftests/scatterlist.c @@ -0,0 +1,355 @@ +/* + * Copyright © 2016 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +#include +#include + +#include "../i915_selftest.h" + +#define PFN_BIAS (1 << 10) + +struct pfn_table { + struct sg_table st; + unsigned long start, end; +}; + +typedef unsigned int (*npages_fn_t)(unsigned long n, + unsigned long count, + struct rnd_state *rnd); + +static noinline int expect_pfn_sg(struct pfn_table *pt, + npages_fn_t npages_fn, + struct rnd_state *rnd, + const char *who, + unsigned long timeout) +{ + struct scatterlist *sg; + unsigned long pfn, n; + + pfn = pt->start; + for_each_sg(pt->st.sgl, sg, pt->st.nents, n) { + struct page *page = sg_page(sg); + unsigned int npages = npages_fn(n, pt->st.nents, rnd); + + if (page_to_pfn(page) != pfn) { + pr_err("%s: %s left pages out of order, expected pfn %lu, found pfn %lu (using for_each_sg)\n", + __func__, who, pfn, page_to_pfn(page)); + return -EINVAL; + } + + if (sg->length != npages * PAGE_SIZE) { + pr_err("%s: %s copied wrong sg length, expected size %lu, found %u (using for_each_sg)\n", + __func__, who, npages * PAGE_SIZE, sg->length); + return -EINVAL; + } + + if (igt_timeout(timeout, "%s timed out\n", who)) + return -EINTR; + + pfn += npages; + } + if (pfn != pt->end) { + pr_err("%s: %s finished on wrong pfn, expected %lu, found %lu\n", + __func__, who, pt->end, pfn); + return -EINVAL; + } + + return 0; +} + +static noinline int expect_pfn_sg_page_iter(struct pfn_table *pt, + const char *who, + unsigned long timeout) +{ + struct sg_page_iter sgiter; + unsigned long pfn; + + pfn = pt->start; + for_each_sg_page(pt->st.sgl, &sgiter, pt->st.nents, 0) { + struct page *page = sg_page_iter_page(&sgiter); + + if (page != pfn_to_page(pfn)) { + pr_err("%s: %s left pages out of order, expected pfn %lu, found pfn %lu (using for_each_sg_page)\n", + __func__, who, pfn, page_to_pfn(page)); + return -EINVAL; + } + + if (igt_timeout(timeout, "%s timed out\n", who)) + return -EINTR; + + pfn++; + } + if (pfn != pt->end) { + pr_err("%s: %s finished on wrong pfn, expected %lu, found %lu\n", + __func__, who, pt->end, pfn); + return -EINVAL; + } + + return 0; +} + +static noinline int expect_pfn_sgtiter(struct pfn_table *pt, + const char *who, + unsigned long timeout) +{ + struct sgt_iter sgt; + struct page *page; + unsigned long pfn; + + pfn = pt->start; + for_each_sgt_page(page, sgt, &pt->st) { + if (page != pfn_to_page(pfn)) { + pr_err("%s: %s left pages out of order, expected pfn %lu, found pfn %lu (using for_each_sgt_page)\n", + __func__, who, pfn, page_to_pfn(page)); + return -EINVAL; + } + + if (igt_timeout(timeout, "%s timed out\n", who)) + return -EINTR; + + pfn++; + } + if (pfn != pt->end) { + pr_err("%s: %s finished on wrong pfn, expected %lu, found %lu\n", + __func__, who, pt->end, pfn); + return -EINVAL; + } + + return 0; +} + +static int expect_pfn_sgtable(struct pfn_table *pt, + npages_fn_t npages_fn, + struct rnd_state *rnd, + const char *who, + unsigned long timeout) +{ + int err; + + err = expect_pfn_sg(pt, npages_fn, rnd, who, timeout); + if (err) + return err; + + err = expect_pfn_sg_page_iter(pt, who, timeout); + if (err) + return err; + + err = expect_pfn_sgtiter(pt, who, timeout); + if (err) + return err; + + return 0; +} + +static unsigned int one(unsigned long n, + unsigned long count, + struct rnd_state *rnd) +{ + return 1; +} + +static unsigned int grow(unsigned long n, + unsigned long count, + struct rnd_state *rnd) +{ + return n + 1; +} + +static unsigned int shrink(unsigned long n, + unsigned long count, + struct rnd_state *rnd) +{ + return count - n; +} + +static unsigned int random(unsigned long n, + unsigned long count, + struct rnd_state *rnd) +{ + return 1 + (prandom_u32_state(rnd) % 1024); +} + +static int alloc_table(struct pfn_table *pt, + unsigned long count, unsigned long max, + npages_fn_t npages_fn, + struct rnd_state *rnd, + int alloc_error) +{ + struct scatterlist *sg; + unsigned long n, pfn; + + if (sg_alloc_table(&pt->st, max, + GFP_KERNEL | __GFP_NORETRY | __GFP_NOWARN)) + return alloc_error; + + /* count should be less than 20 to prevent overflowing sg->length */ + GEM_BUG_ON(overflows_type(count * PAGE_SIZE, sg->length)); + + /* Construct a table where each scatterlist contains different number + * of entries. The idea is to check that we can iterate the individual + * pages from inside the coalesced lists. + */ + pt->start = PFN_BIAS; + pfn = pt->start; + sg = pt->st.sgl; + for (n = 0; n < count; n++) { + unsigned long npages = npages_fn(n, count, rnd); + + /* Nobody expects the Sparse Memmap! */ + if (pfn_to_page(pfn + npages) != pfn_to_page(pfn) + npages) { + sg_free_table(&pt->st); + return -ENOSPC; + } + + if (n) + sg = sg_next(sg); + sg_set_page(sg, pfn_to_page(pfn), npages * PAGE_SIZE, 0); + + GEM_BUG_ON(page_to_pfn(sg_page(sg)) != pfn); + GEM_BUG_ON(sg->length != npages * PAGE_SIZE); + GEM_BUG_ON(sg->offset != 0); + + pfn += npages; + } + sg_mark_end(sg); + pt->st.nents = n; + pt->end = pfn; + + return 0; +} + +static const npages_fn_t npages_funcs[] = { + one, + grow, + shrink, + random, + NULL, +}; + +static int igt_sg_alloc(void *ignored) +{ + IGT_TIMEOUT(end_time); + const unsigned long max_order = 20; /* approximating a 4GiB object */ + struct rnd_state prng; + unsigned long prime; + int alloc_error = -ENOMEM; + + for_each_prime_number(prime, max_order) { + unsigned long size = BIT(prime); + int offset; + + for (offset = -1; offset <= 1; offset++) { + unsigned long sz = size + offset; + const npages_fn_t *npages; + struct pfn_table pt; + int err; + + for (npages = npages_funcs; *npages; npages++) { + prandom_seed_state(&prng, + i915_selftest.random_seed); + err = alloc_table(&pt, sz, sz, *npages, &prng, + alloc_error); + if (err == -ENOSPC) + break; + if (err) + return err; + + prandom_seed_state(&prng, + i915_selftest.random_seed); + err = expect_pfn_sgtable(&pt, *npages, &prng, + "sg_alloc_table", + end_time); + sg_free_table(&pt.st); + if (err) + return err; + } + } + + /* Test at least one continuation before accepting oom */ + if (size > SG_MAX_SINGLE_ALLOC) + alloc_error = -ENOSPC; + } + + return 0; +} + +static int igt_sg_trim(void *ignored) +{ + IGT_TIMEOUT(end_time); + const unsigned long max = PAGE_SIZE; /* not prime! */ + struct pfn_table pt; + unsigned long prime; + int alloc_error = -ENOMEM; + + for_each_prime_number(prime, max) { + const npages_fn_t *npages; + int err; + + for (npages = npages_funcs; *npages; npages++) { + struct rnd_state prng; + + prandom_seed_state(&prng, i915_selftest.random_seed); + err = alloc_table(&pt, prime, max, *npages, &prng, + alloc_error); + if (err == -ENOSPC) + break; + if (err) + return err; + + if (i915_sg_trim(&pt.st)) { + if (pt.st.orig_nents != prime || + pt.st.nents != prime) { + pr_err("i915_sg_trim failed (nents %u, orig_nents %u), expected %lu\n", + pt.st.nents, pt.st.orig_nents, prime); + err = -EINVAL; + } else { + prandom_seed_state(&prng, + i915_selftest.random_seed); + err = expect_pfn_sgtable(&pt, + *npages, &prng, + "i915_sg_trim", + end_time); + } + } + sg_free_table(&pt.st); + if (err) + return err; + } + + /* Test at least one continuation before accepting oom */ + if (prime > SG_MAX_SINGLE_ALLOC) + alloc_error = -ENOSPC; + } + + return 0; +} + +int scatterlist_mock_selftests(void) +{ + static const struct i915_subtest tests[] = { + SUBTEST(igt_sg_alloc), + SUBTEST(igt_sg_trim), + }; + + return i915_subtests(tests, NULL); +} -- cgit From f97fbf9606a1a96d61d171e1ed1606b2b812fb01 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Mon, 13 Feb 2017 17:15:14 +0000 Subject: drm/i915: Add unit tests for the breadcrumb rbtree, insert/remove First retroactive test, make sure that the waiters are in global seqno order after random inserts and removals. Signed-off-by: Chris Wilson Reviewed-by: Tvrtko Ursulin Link: http://patchwork.freedesktop.org/patch/msgid/20170213171558.20942-3-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/intel_breadcrumbs.c | 21 +++ drivers/gpu/drm/i915/intel_engine_cs.c | 4 + drivers/gpu/drm/i915/intel_ringbuffer.h | 2 + .../gpu/drm/i915/selftests/i915_mock_selftests.h | 1 + drivers/gpu/drm/i915/selftests/intel_breadcrumbs.c | 172 +++++++++++++++++++++ drivers/gpu/drm/i915/selftests/mock_engine.c | 55 +++++++ drivers/gpu/drm/i915/selftests/mock_engine.h | 32 ++++ 7 files changed, 287 insertions(+) create mode 100644 drivers/gpu/drm/i915/selftests/intel_breadcrumbs.c create mode 100644 drivers/gpu/drm/i915/selftests/mock_engine.c create mode 100644 drivers/gpu/drm/i915/selftests/mock_engine.h diff --git a/drivers/gpu/drm/i915/intel_breadcrumbs.c b/drivers/gpu/drm/i915/intel_breadcrumbs.c index f5e05343110a..74cb7b91b5db 100644 --- a/drivers/gpu/drm/i915/intel_breadcrumbs.c +++ b/drivers/gpu/drm/i915/intel_breadcrumbs.c @@ -107,6 +107,18 @@ static void __intel_breadcrumbs_enable_irq(struct intel_breadcrumbs *b) if (b->rpm_wakelock) return; + if (I915_SELFTEST_ONLY(b->mock)) { + /* For our mock objects we want to avoid interaction + * with the real hardware (which is not set up). So + * we simply pretend we have enabled the powerwell + * and the irq, and leave it up to the mock + * implementation to call intel_engine_wakeup() + * itself when it wants to simulate a user interrupt, + */ + b->rpm_wakelock = true; + return; + } + /* Since we are waiting on a request, the GPU should be busy * and should have its own rpm reference. For completeness, * record an rpm reference for ourselves to cover the @@ -142,6 +154,11 @@ static void __intel_breadcrumbs_disable_irq(struct intel_breadcrumbs *b) if (!b->rpm_wakelock) return; + if (I915_SELFTEST_ONLY(b->mock)) { + b->rpm_wakelock = false; + return; + } + if (b->irq_enabled) { irq_disable(engine); b->irq_enabled = false; @@ -664,3 +681,7 @@ unsigned int intel_breadcrumbs_busy(struct drm_i915_private *i915) return mask; } + +#if IS_ENABLED(CONFIG_DRM_I915_SELFTEST) +#include "selftests/intel_breadcrumbs.c" +#endif diff --git a/drivers/gpu/drm/i915/intel_engine_cs.c b/drivers/gpu/drm/i915/intel_engine_cs.c index 69a6416d1223..538d845d7251 100644 --- a/drivers/gpu/drm/i915/intel_engine_cs.c +++ b/drivers/gpu/drm/i915/intel_engine_cs.c @@ -524,3 +524,7 @@ void intel_engine_get_instdone(struct intel_engine_cs *engine, break; } } + +#if IS_ENABLED(CONFIG_DRM_I915_SELFTEST) +#include "selftests/mock_engine.c" +#endif diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.h b/drivers/gpu/drm/i915/intel_ringbuffer.h index b122c3cc6dbb..cc62e89010d3 100644 --- a/drivers/gpu/drm/i915/intel_ringbuffer.h +++ b/drivers/gpu/drm/i915/intel_ringbuffer.h @@ -5,6 +5,7 @@ #include "i915_gem_batch_pool.h" #include "i915_gem_request.h" #include "i915_gem_timeline.h" +#include "i915_selftest.h" #define I915_CMD_HASH_ORDER 9 @@ -249,6 +250,7 @@ struct intel_engine_cs { bool irq_enabled : 1; bool rpm_wakelock : 1; + I915_SELFTEST_DECLARE(bool mock : 1); } breadcrumbs; /* diff --git a/drivers/gpu/drm/i915/selftests/i915_mock_selftests.h b/drivers/gpu/drm/i915/selftests/i915_mock_selftests.h index 5f0bdda42ed8..80458e2a2b04 100644 --- a/drivers/gpu/drm/i915/selftests/i915_mock_selftests.h +++ b/drivers/gpu/drm/i915/selftests/i915_mock_selftests.h @@ -10,3 +10,4 @@ */ selftest(sanitycheck, i915_mock_sanitycheck) /* keep first (igt selfcheck) */ selftest(scatterlist, scatterlist_mock_selftests) +selftest(breadcrumbs, intel_breadcrumbs_mock_selftests) diff --git a/drivers/gpu/drm/i915/selftests/intel_breadcrumbs.c b/drivers/gpu/drm/i915/selftests/intel_breadcrumbs.c new file mode 100644 index 000000000000..6b5acf9de65b --- /dev/null +++ b/drivers/gpu/drm/i915/selftests/intel_breadcrumbs.c @@ -0,0 +1,172 @@ +/* + * Copyright © 2016 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + * + */ + +#include "../i915_selftest.h" +#include "i915_random.h" + +#include "mock_engine.h" + +static int check_rbtree(struct intel_engine_cs *engine, + const unsigned long *bitmap, + const struct intel_wait *waiters, + const int count) +{ + struct intel_breadcrumbs *b = &engine->breadcrumbs; + struct rb_node *rb; + int n; + + if (&b->first_wait->node != rb_first(&b->waiters)) { + pr_err("First waiter does not match first element of wait-tree\n"); + return -EINVAL; + } + + n = find_first_bit(bitmap, count); + for (rb = rb_first(&b->waiters); rb; rb = rb_next(rb)) { + struct intel_wait *w = container_of(rb, typeof(*w), node); + int idx = w - waiters; + + if (!test_bit(idx, bitmap)) { + pr_err("waiter[%d, seqno=%d] removed but still in wait-tree\n", + idx, w->seqno); + return -EINVAL; + } + + if (n != idx) { + pr_err("waiter[%d, seqno=%d] does not match expected next element in tree [%d]\n", + idx, w->seqno, n); + return -EINVAL; + } + + n = find_next_bit(bitmap, count, n + 1); + } + + return 0; +} + +static int check_rbtree_empty(struct intel_engine_cs *engine) +{ + struct intel_breadcrumbs *b = &engine->breadcrumbs; + + if (b->first_wait) { + pr_err("Empty breadcrumbs still has a waiter\n"); + return -EINVAL; + } + + if (!RB_EMPTY_ROOT(&b->waiters)) { + pr_err("Empty breadcrumbs, but wait-tree not empty\n"); + return -EINVAL; + } + + return 0; +} + +static int igt_random_insert_remove(void *arg) +{ + const u32 seqno_bias = 0x1000; + I915_RND_STATE(prng); + struct intel_engine_cs *engine = arg; + struct intel_wait *waiters; + const int count = 4096; + unsigned int *order; + unsigned long *bitmap; + int err = -ENOMEM; + int n; + + mock_engine_reset(engine); + + waiters = drm_malloc_gfp(count, sizeof(*waiters), GFP_TEMPORARY); + if (!waiters) + goto out_engines; + + bitmap = kcalloc(DIV_ROUND_UP(count, BITS_PER_LONG), sizeof(*bitmap), + GFP_TEMPORARY); + if (!bitmap) + goto out_waiters; + + order = i915_random_order(count, &prng); + if (!order) + goto out_bitmap; + + for (n = 0; n < count; n++) + intel_wait_init(&waiters[n], seqno_bias + n); + + err = check_rbtree(engine, bitmap, waiters, count); + if (err) + goto out_order; + + /* Add and remove waiters into the rbtree in random order. At each + * step, we verify that the rbtree is correctly ordered. + */ + for (n = 0; n < count; n++) { + int i = order[n]; + + intel_engine_add_wait(engine, &waiters[i]); + __set_bit(i, bitmap); + + err = check_rbtree(engine, bitmap, waiters, count); + if (err) + goto out_order; + } + + i915_random_reorder(order, count, &prng); + for (n = 0; n < count; n++) { + int i = order[n]; + + intel_engine_remove_wait(engine, &waiters[i]); + __clear_bit(i, bitmap); + + err = check_rbtree(engine, bitmap, waiters, count); + if (err) + goto out_order; + } + + err = check_rbtree_empty(engine); +out_order: + kfree(order); +out_bitmap: + kfree(bitmap); +out_waiters: + drm_free_large(waiters); +out_engines: + mock_engine_flush(engine); + return err; +} + +int intel_breadcrumbs_mock_selftests(void) +{ + static const struct i915_subtest tests[] = { + SUBTEST(igt_random_insert_remove), + }; + struct intel_engine_cs *engine; + int err; + + engine = mock_engine("mock"); + if (!engine) + return -ENOMEM; + + err = i915_subtests(tests, engine); + kfree(engine); + + return err; +} diff --git a/drivers/gpu/drm/i915/selftests/mock_engine.c b/drivers/gpu/drm/i915/selftests/mock_engine.c new file mode 100644 index 000000000000..4a090bbe807b --- /dev/null +++ b/drivers/gpu/drm/i915/selftests/mock_engine.c @@ -0,0 +1,55 @@ +/* + * Copyright © 2016 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + * + */ + +#include "mock_engine.h" + +struct intel_engine_cs *mock_engine(const char *name) +{ + struct intel_engine_cs *engine; + static int id; + + engine = kzalloc(sizeof(*engine) + PAGE_SIZE, GFP_KERNEL); + if (!engine) + return NULL; + + /* minimal engine setup for seqno */ + engine->name = name; + engine->id = id++; + engine->status_page.page_addr = (void *)(engine + 1); + + /* minimal breadcrumbs init */ + spin_lock_init(&engine->breadcrumbs.lock); + engine->breadcrumbs.mock = true; + + return engine; +} + +void mock_engine_flush(struct intel_engine_cs *engine) +{ +} + +void mock_engine_reset(struct intel_engine_cs *engine) +{ + intel_write_status_page(engine, I915_GEM_HWS_INDEX, 0); +} diff --git a/drivers/gpu/drm/i915/selftests/mock_engine.h b/drivers/gpu/drm/i915/selftests/mock_engine.h new file mode 100644 index 000000000000..0ae9a94aaa1e --- /dev/null +++ b/drivers/gpu/drm/i915/selftests/mock_engine.h @@ -0,0 +1,32 @@ +/* + * Copyright © 2016 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + * + */ + +#ifndef __MOCK_ENGINE_H__ +#define __MOCK_ENGINE_H__ + +struct intel_engine_cs *mock_engine(const char *name); +void mock_engine_flush(struct intel_engine_cs *engine); +void mock_engine_reset(struct intel_engine_cs *engine); + +#endif /* !__MOCK_ENGINE_H__ */ -- cgit From ae1f8090b1a7f163a16d72ddc4214a327ce989e0 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Mon, 13 Feb 2017 17:15:15 +0000 Subject: drm/i915: Add unit tests for the breadcrumb rbtree, completion Second retroactive test, make sure that the waiters are removed from the global wait-tree when their seqno completes. Signed-off-by: Chris Wilson Reviewed-by: Tvrtko Ursulin Link: http://patchwork.freedesktop.org/patch/msgid/20170213171558.20942-4-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/selftests/intel_breadcrumbs.c | 107 +++++++++++++++++++++ drivers/gpu/drm/i915/selftests/mock_engine.h | 6 ++ 2 files changed, 113 insertions(+) diff --git a/drivers/gpu/drm/i915/selftests/intel_breadcrumbs.c b/drivers/gpu/drm/i915/selftests/intel_breadcrumbs.c index 6b5acf9de65b..32a27e56c353 100644 --- a/drivers/gpu/drm/i915/selftests/intel_breadcrumbs.c +++ b/drivers/gpu/drm/i915/selftests/intel_breadcrumbs.c @@ -64,6 +64,27 @@ static int check_rbtree(struct intel_engine_cs *engine, return 0; } +static int check_completion(struct intel_engine_cs *engine, + const unsigned long *bitmap, + const struct intel_wait *waiters, + const int count) +{ + int n; + + for (n = 0; n < count; n++) { + if (intel_wait_complete(&waiters[n]) != !!test_bit(n, bitmap)) + continue; + + pr_err("waiter[%d, seqno=%d] is %s, but expected %s\n", + n, waiters[n].seqno, + intel_wait_complete(&waiters[n]) ? "complete" : "active", + test_bit(n, bitmap) ? "active" : "complete"); + return -EINVAL; + } + + return 0; +} + static int check_rbtree_empty(struct intel_engine_cs *engine) { struct intel_breadcrumbs *b = &engine->breadcrumbs; @@ -153,10 +174,96 @@ out_engines: return err; } +static int igt_insert_complete(void *arg) +{ + const u32 seqno_bias = 0x1000; + struct intel_engine_cs *engine = arg; + struct intel_wait *waiters; + const int count = 4096; + unsigned long *bitmap; + int err = -ENOMEM; + int n, m; + + mock_engine_reset(engine); + + waiters = drm_malloc_gfp(count, sizeof(*waiters), GFP_TEMPORARY); + if (!waiters) + goto out_engines; + + bitmap = kcalloc(DIV_ROUND_UP(count, BITS_PER_LONG), sizeof(*bitmap), + GFP_TEMPORARY); + if (!bitmap) + goto out_waiters; + + for (n = 0; n < count; n++) { + intel_wait_init(&waiters[n], n + seqno_bias); + intel_engine_add_wait(engine, &waiters[n]); + __set_bit(n, bitmap); + } + err = check_rbtree(engine, bitmap, waiters, count); + if (err) + goto out_bitmap; + + /* On each step, we advance the seqno so that several waiters are then + * complete (we increase the seqno by increasingly larger values to + * retire more and more waiters at once). All retired waiters should + * be woken and removed from the rbtree, and so that we check. + */ + for (n = 0; n < count; n = m) { + int seqno = 2 * n; + + GEM_BUG_ON(find_first_bit(bitmap, count) != n); + + if (intel_wait_complete(&waiters[n])) { + pr_err("waiter[%d, seqno=%d] completed too early\n", + n, waiters[n].seqno); + err = -EINVAL; + goto out_bitmap; + } + + /* complete the following waiters */ + mock_seqno_advance(engine, seqno + seqno_bias); + for (m = n; m <= seqno; m++) { + if (m == count) + break; + + GEM_BUG_ON(!test_bit(m, bitmap)); + __clear_bit(m, bitmap); + } + + intel_engine_remove_wait(engine, &waiters[n]); + RB_CLEAR_NODE(&waiters[n].node); + + err = check_rbtree(engine, bitmap, waiters, count); + if (err) { + pr_err("rbtree corrupt after seqno advance to %d\n", + seqno + seqno_bias); + goto out_bitmap; + } + + err = check_completion(engine, bitmap, waiters, count); + if (err) { + pr_err("completions after seqno advance to %d failed\n", + seqno + seqno_bias); + goto out_bitmap; + } + } + + err = check_rbtree_empty(engine); +out_bitmap: + kfree(bitmap); +out_waiters: + drm_free_large(waiters); +out_engines: + mock_engine_flush(engine); + return err; +} + int intel_breadcrumbs_mock_selftests(void) { static const struct i915_subtest tests[] = { SUBTEST(igt_random_insert_remove), + SUBTEST(igt_insert_complete), }; struct intel_engine_cs *engine; int err; diff --git a/drivers/gpu/drm/i915/selftests/mock_engine.h b/drivers/gpu/drm/i915/selftests/mock_engine.h index 0ae9a94aaa1e..9cfe9671f860 100644 --- a/drivers/gpu/drm/i915/selftests/mock_engine.h +++ b/drivers/gpu/drm/i915/selftests/mock_engine.h @@ -29,4 +29,10 @@ struct intel_engine_cs *mock_engine(const char *name); void mock_engine_flush(struct intel_engine_cs *engine); void mock_engine_reset(struct intel_engine_cs *engine); +static inline void mock_seqno_advance(struct intel_engine_cs *engine, u32 seqno) +{ + intel_write_status_page(engine, I915_GEM_HWS_INDEX, seqno); + intel_engine_wakeup(engine); +} + #endif /* !__MOCK_ENGINE_H__ */ -- cgit From e62e8ad1ba5701e0e0c1371041cbb677f75ba3fb Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Mon, 13 Feb 2017 17:15:16 +0000 Subject: drm/i915: Add unit tests for the breadcrumb rbtree, wakeups Third retroactive test, make sure that the seqno waiters are woken. v2: Smattering of comments, rearrange code v3: Fix IDLE assert to avoid startup/sleep races Signed-off-by: Chris Wilson Reviewed-by: Tvrtko Ursulin Link: http://patchwork.freedesktop.org/patch/msgid/20170213171558.20942-5-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/selftests/intel_breadcrumbs.c | 201 +++++++++++++++++++++ 1 file changed, 201 insertions(+) diff --git a/drivers/gpu/drm/i915/selftests/intel_breadcrumbs.c b/drivers/gpu/drm/i915/selftests/intel_breadcrumbs.c index 32a27e56c353..d1b99b565500 100644 --- a/drivers/gpu/drm/i915/selftests/intel_breadcrumbs.c +++ b/drivers/gpu/drm/i915/selftests/intel_breadcrumbs.c @@ -259,11 +259,212 @@ out_engines: return err; } +struct igt_wakeup { + struct task_struct *tsk; + atomic_t *ready, *set, *done; + struct intel_engine_cs *engine; + unsigned long flags; +#define STOP 0 +#define IDLE 1 + wait_queue_head_t *wq; + u32 seqno; +}; + +static int wait_atomic(atomic_t *p) +{ + schedule(); + return 0; +} + +static int wait_atomic_timeout(atomic_t *p) +{ + return schedule_timeout(10 * HZ) ? 0 : -ETIMEDOUT; +} + +static bool wait_for_ready(struct igt_wakeup *w) +{ + DEFINE_WAIT(ready); + + set_bit(IDLE, &w->flags); + if (atomic_dec_and_test(w->done)) + wake_up_atomic_t(w->done); + + if (test_bit(STOP, &w->flags)) + goto out; + + for (;;) { + prepare_to_wait(w->wq, &ready, TASK_INTERRUPTIBLE); + if (atomic_read(w->ready) == 0) + break; + + schedule(); + } + finish_wait(w->wq, &ready); + +out: + clear_bit(IDLE, &w->flags); + if (atomic_dec_and_test(w->set)) + wake_up_atomic_t(w->set); + + return !test_bit(STOP, &w->flags); +} + +static int igt_wakeup_thread(void *arg) +{ + struct igt_wakeup *w = arg; + struct intel_wait wait; + + while (wait_for_ready(w)) { + GEM_BUG_ON(kthread_should_stop()); + + intel_wait_init(&wait, w->seqno); + intel_engine_add_wait(w->engine, &wait); + for (;;) { + set_current_state(TASK_UNINTERRUPTIBLE); + if (i915_seqno_passed(intel_engine_get_seqno(w->engine), + w->seqno)) + break; + + if (test_bit(STOP, &w->flags)) /* emergency escape */ + break; + + schedule(); + } + intel_engine_remove_wait(w->engine, &wait); + __set_current_state(TASK_RUNNING); + } + + return 0; +} + +static void igt_wake_all_sync(atomic_t *ready, + atomic_t *set, + atomic_t *done, + wait_queue_head_t *wq, + int count) +{ + atomic_set(set, count); + atomic_set(ready, 0); + wake_up_all(wq); + + wait_on_atomic_t(set, wait_atomic, TASK_UNINTERRUPTIBLE); + atomic_set(ready, count); + atomic_set(done, count); +} + +static int igt_wakeup(void *arg) +{ + I915_RND_STATE(prng); + const int state = TASK_UNINTERRUPTIBLE; + struct intel_engine_cs *engine = arg; + struct igt_wakeup *waiters; + DECLARE_WAIT_QUEUE_HEAD_ONSTACK(wq); + const int count = 4096; + const u32 max_seqno = count / 4; + atomic_t ready, set, done; + int err = -ENOMEM; + int n, step; + + mock_engine_reset(engine); + + waiters = drm_malloc_gfp(count, sizeof(*waiters), GFP_TEMPORARY); + if (!waiters) + goto out_engines; + + /* Create a large number of threads, each waiting on a random seqno. + * Multiple waiters will be waiting for the same seqno. + */ + atomic_set(&ready, count); + for (n = 0; n < count; n++) { + waiters[n].wq = &wq; + waiters[n].ready = &ready; + waiters[n].set = &set; + waiters[n].done = &done; + waiters[n].engine = engine; + waiters[n].flags = BIT(IDLE); + + waiters[n].tsk = kthread_run(igt_wakeup_thread, &waiters[n], + "i915/igt:%d", n); + if (IS_ERR(waiters[n].tsk)) + goto out_waiters; + + get_task_struct(waiters[n].tsk); + } + + for (step = 1; step <= max_seqno; step <<= 1) { + u32 seqno; + + /* The waiter threads start paused as we assign them a random + * seqno and reset the engine. Once the engine is reset, + * we signal that the threads may begin their wait upon their + * seqno. + */ + for (n = 0; n < count; n++) { + GEM_BUG_ON(!test_bit(IDLE, &waiters[n].flags)); + waiters[n].seqno = + 1 + prandom_u32_state(&prng) % max_seqno; + } + mock_seqno_advance(engine, 0); + igt_wake_all_sync(&ready, &set, &done, &wq, count); + + /* Simulate the GPU doing chunks of work, with one or more + * seqno appearing to finish at the same time. A random number + * of threads will be waiting upon the update and hopefully be + * woken. + */ + for (seqno = 1; seqno <= max_seqno + step; seqno += step) { + usleep_range(50, 500); + mock_seqno_advance(engine, seqno); + } + GEM_BUG_ON(intel_engine_get_seqno(engine) < 1 + max_seqno); + + /* With the seqno now beyond any of the waiting threads, they + * should all be woken, see that they are complete and signal + * that they are ready for the next test. We wait until all + * threads are complete and waiting for us (i.e. not a seqno). + */ + err = wait_on_atomic_t(&done, wait_atomic_timeout, state); + if (err) { + pr_err("Timed out waiting for %d remaining waiters\n", + atomic_read(&done)); + break; + } + + err = check_rbtree_empty(engine); + if (err) + break; + } + +out_waiters: + for (n = 0; n < count; n++) { + if (IS_ERR(waiters[n].tsk)) + break; + + set_bit(STOP, &waiters[n].flags); + } + mock_seqno_advance(engine, INT_MAX); /* wakeup any broken waiters */ + igt_wake_all_sync(&ready, &set, &done, &wq, n); + + for (n = 0; n < count; n++) { + if (IS_ERR(waiters[n].tsk)) + break; + + kthread_stop(waiters[n].tsk); + put_task_struct(waiters[n].tsk); + } + + drm_free_large(waiters); +out_engines: + mock_engine_flush(engine); + return err; +} + int intel_breadcrumbs_mock_selftests(void) { static const struct i915_subtest tests[] = { SUBTEST(igt_random_insert_remove), SUBTEST(igt_insert_complete), + SUBTEST(igt_wakeup), }; struct intel_engine_cs *engine; int err; -- cgit From 66d9cb5d805af70229ffe6f961bf06adc511f469 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Mon, 13 Feb 2017 17:15:17 +0000 Subject: drm/i915: Mock the GEM device for self-testing A simulacrum of drm_i915_private to let us pretend interactions with the device. v2: Tidy init error paths Signed-off-by: Chris Wilson Reviewed-by: Matthew Auld Link: http://patchwork.freedesktop.org/patch/msgid/20170213171558.20942-6-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/i915_drv.c | 4 + drivers/gpu/drm/i915/i915_gem.c | 1 + drivers/gpu/drm/i915/selftests/mock_drm.c | 54 ++++++++++++ drivers/gpu/drm/i915/selftests/mock_drm.h | 31 +++++++ drivers/gpu/drm/i915/selftests/mock_gem_device.c | 104 +++++++++++++++++++++++ drivers/gpu/drm/i915/selftests/mock_gem_device.h | 8 ++ drivers/gpu/drm/i915/selftests/mock_gem_object.h | 8 ++ 7 files changed, 210 insertions(+) create mode 100644 drivers/gpu/drm/i915/selftests/mock_drm.c create mode 100644 drivers/gpu/drm/i915/selftests/mock_drm.h create mode 100644 drivers/gpu/drm/i915/selftests/mock_gem_device.c create mode 100644 drivers/gpu/drm/i915/selftests/mock_gem_device.h create mode 100644 drivers/gpu/drm/i915/selftests/mock_gem_object.h diff --git a/drivers/gpu/drm/i915/i915_drv.c b/drivers/gpu/drm/i915/i915_drv.c index ac1b3582ca55..efb1f64f0e41 100644 --- a/drivers/gpu/drm/i915/i915_drv.c +++ b/drivers/gpu/drm/i915/i915_drv.c @@ -2668,3 +2668,7 @@ static struct drm_driver driver = { .minor = DRIVER_MINOR, .patchlevel = DRIVER_PATCHLEVEL, }; + +#if IS_ENABLED(CONFIG_DRM_I915_SELFTEST) +#include "selftests/mock_drm.c" +#endif diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index c91e5d10cc58..db240a8f6b82 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -5023,4 +5023,5 @@ i915_gem_object_get_dma_address(struct drm_i915_gem_object *obj, #if IS_ENABLED(CONFIG_DRM_I915_SELFTEST) #include "selftests/scatterlist.c" +#include "selftests/mock_gem_device.c" #endif diff --git a/drivers/gpu/drm/i915/selftests/mock_drm.c b/drivers/gpu/drm/i915/selftests/mock_drm.c new file mode 100644 index 000000000000..113dec05c7dc --- /dev/null +++ b/drivers/gpu/drm/i915/selftests/mock_drm.c @@ -0,0 +1,54 @@ +/* + * Copyright © 2017 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + * + */ + +#include "mock_drm.h" + +static inline struct inode fake_inode(struct drm_i915_private *i915) +{ + return (struct inode){ .i_rdev = i915->drm.primary->index }; +} + +struct drm_file *mock_file(struct drm_i915_private *i915) +{ + struct inode inode = fake_inode(i915); + struct file filp = {}; + struct drm_file *file; + int err; + + err = drm_open(&inode, &filp); + if (unlikely(err)) + return ERR_PTR(err); + + file = filp.private_data; + file->authenticated = true; + return file; +} + +void mock_file_free(struct drm_i915_private *i915, struct drm_file *file) +{ + struct inode inode = fake_inode(i915); + struct file filp = { .private_data = file }; + + drm_release(&inode, &filp); +} diff --git a/drivers/gpu/drm/i915/selftests/mock_drm.h b/drivers/gpu/drm/i915/selftests/mock_drm.h new file mode 100644 index 000000000000..b39beee9f8f6 --- /dev/null +++ b/drivers/gpu/drm/i915/selftests/mock_drm.h @@ -0,0 +1,31 @@ +/* + * Copyright © 2017 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + * + */ + +#ifndef __MOCK_DRM_H +#define __MOCK_DRM_H + +struct drm_file *mock_file(struct drm_i915_private *i915); +void mock_file_free(struct drm_i915_private *i915, struct drm_file *file); + +#endif /* !__MOCK_DRM_H */ diff --git a/drivers/gpu/drm/i915/selftests/mock_gem_device.c b/drivers/gpu/drm/i915/selftests/mock_gem_device.c new file mode 100644 index 000000000000..15d0a7ccc9d1 --- /dev/null +++ b/drivers/gpu/drm/i915/selftests/mock_gem_device.c @@ -0,0 +1,104 @@ +/* + * Copyright © 2016 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + * + */ + +#include + +#include "mock_gem_device.h" +#include "mock_gem_object.h" + +static void mock_device_release(struct drm_device *dev) +{ + struct drm_i915_private *i915 = to_i915(dev); + + i915_gem_drain_freed_objects(i915); + + kmem_cache_destroy(i915->objects); + + drm_dev_fini(&i915->drm); + put_device(&i915->drm.pdev->dev); +} + +static struct drm_driver mock_driver = { + .name = "mock", + .driver_features = DRIVER_GEM, + .release = mock_device_release, + + .gem_close_object = i915_gem_close_object, + .gem_free_object_unlocked = i915_gem_free_object, +}; + +static void release_dev(struct device *dev) +{ + struct pci_dev *pdev = to_pci_dev(dev); + + kfree(pdev); +} + +struct drm_i915_private *mock_gem_device(void) +{ + struct drm_i915_private *i915; + struct pci_dev *pdev; + int err; + + pdev = kzalloc(sizeof(*pdev) + sizeof(*i915), GFP_KERNEL); + if (!pdev) + goto err; + + device_initialize(&pdev->dev); + pdev->dev.release = release_dev; + dev_set_name(&pdev->dev, "mock"); + dma_coerce_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(32)); + + pm_runtime_dont_use_autosuspend(&pdev->dev); + pm_runtime_get_sync(&pdev->dev); + + i915 = (struct drm_i915_private *)(pdev + 1); + pci_set_drvdata(pdev, i915); + + err = drm_dev_init(&i915->drm, &mock_driver, &pdev->dev); + if (err) { + pr_err("Failed to initialise mock GEM device: err=%d\n", err); + goto put_device; + } + i915->drm.pdev = pdev; + i915->drm.dev_private = i915; + + mkwrite_device_info(i915)->gen = -1; + + spin_lock_init(&i915->mm.object_stat_lock); + + INIT_WORK(&i915->mm.free_work, __i915_gem_free_work); + init_llist_head(&i915->mm.free_list); + + i915->objects = KMEM_CACHE(mock_object, SLAB_HWCACHE_ALIGN); + if (!i915->objects) + goto put_device; + + return i915; + +put_device: + put_device(&pdev->dev); +err: + return NULL; +} diff --git a/drivers/gpu/drm/i915/selftests/mock_gem_device.h b/drivers/gpu/drm/i915/selftests/mock_gem_device.h new file mode 100644 index 000000000000..c557e33c3953 --- /dev/null +++ b/drivers/gpu/drm/i915/selftests/mock_gem_device.h @@ -0,0 +1,8 @@ +#ifndef __MOCK_GEM_DEVICE_H__ +#define __MOCK_GEM_DEVICE_H__ + +struct drm_i915_private; + +struct drm_i915_private *mock_gem_device(void); + +#endif /* !__MOCK_GEM_DEVICE_H__ */ diff --git a/drivers/gpu/drm/i915/selftests/mock_gem_object.h b/drivers/gpu/drm/i915/selftests/mock_gem_object.h new file mode 100644 index 000000000000..9fbf67321662 --- /dev/null +++ b/drivers/gpu/drm/i915/selftests/mock_gem_object.h @@ -0,0 +1,8 @@ +#ifndef __MOCK_GEM_OBJECT_H__ +#define __MOCK_GEM_OBJECT_H__ + +struct mock_object { + struct drm_i915_gem_object base; +}; + +#endif /* !__MOCK_GEM_OBJECT_H__ */ -- cgit From 3b5bb0a37665ce1efaf3b9a551c945c09a726504 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Mon, 13 Feb 2017 17:15:18 +0000 Subject: drm/i915: Mock a GGTT for self-testing A very simple mockery, just a random manager and timeline. Useful for inserting objects and ordering retirement; and not much else. v2: mock_fini_ggtt() to complement mock_init_ggtt(). Signed-off-by: Chris Wilson Reviewed-by: Matthew Auld Link: http://patchwork.freedesktop.org/patch/msgid/20170213171558.20942-7-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/i915_gem_gtt.c | 4 + drivers/gpu/drm/i915/selftests/mock_gem_device.c | 31 +++++ drivers/gpu/drm/i915/selftests/mock_gtt.c | 138 +++++++++++++++++++++++ drivers/gpu/drm/i915/selftests/mock_gtt.h | 35 ++++++ 4 files changed, 208 insertions(+) create mode 100644 drivers/gpu/drm/i915/selftests/mock_gtt.c create mode 100644 drivers/gpu/drm/i915/selftests/mock_gtt.h diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c b/drivers/gpu/drm/i915/i915_gem_gtt.c index d580e2b4081b..520bcf63d271 100644 --- a/drivers/gpu/drm/i915/i915_gem_gtt.c +++ b/drivers/gpu/drm/i915/i915_gem_gtt.c @@ -3757,3 +3757,7 @@ int i915_gem_gtt_insert(struct i915_address_space *vm, size, alignment, color, start, end, DRM_MM_INSERT_EVICT); } + +#if IS_ENABLED(CONFIG_DRM_I915_SELFTEST) +#include "selftests/mock_gtt.c" +#endif diff --git a/drivers/gpu/drm/i915/selftests/mock_gem_device.c b/drivers/gpu/drm/i915/selftests/mock_gem_device.c index 15d0a7ccc9d1..dbd32b125d15 100644 --- a/drivers/gpu/drm/i915/selftests/mock_gem_device.c +++ b/drivers/gpu/drm/i915/selftests/mock_gem_device.c @@ -26,6 +26,7 @@ #include "mock_gem_device.h" #include "mock_gem_object.h" +#include "mock_gtt.h" static void mock_device_release(struct drm_device *dev) { @@ -33,6 +34,12 @@ static void mock_device_release(struct drm_device *dev) i915_gem_drain_freed_objects(i915); + mutex_lock(&i915->drm.struct_mutex); + mock_fini_ggtt(i915); + i915_gem_timeline_fini(&i915->gt.global_timeline); + mutex_unlock(&i915->drm.struct_mutex); + + kmem_cache_destroy(i915->vmas); kmem_cache_destroy(i915->objects); drm_dev_fini(&i915->drm); @@ -84,19 +91,43 @@ struct drm_i915_private *mock_gem_device(void) i915->drm.pdev = pdev; i915->drm.dev_private = i915; + /* Using the global GTT may ask questions about KMS users, so prepare */ + drm_mode_config_init(&i915->drm); + mkwrite_device_info(i915)->gen = -1; spin_lock_init(&i915->mm.object_stat_lock); INIT_WORK(&i915->mm.free_work, __i915_gem_free_work); init_llist_head(&i915->mm.free_list); + INIT_LIST_HEAD(&i915->mm.unbound_list); + INIT_LIST_HEAD(&i915->mm.bound_list); i915->objects = KMEM_CACHE(mock_object, SLAB_HWCACHE_ALIGN); if (!i915->objects) goto put_device; + i915->vmas = KMEM_CACHE(i915_vma, SLAB_HWCACHE_ALIGN); + if (!i915->vmas) + goto err_objects; + + mutex_lock(&i915->drm.struct_mutex); + INIT_LIST_HEAD(&i915->gt.timelines); + err = i915_gem_timeline_init__global(i915); + if (err) { + mutex_unlock(&i915->drm.struct_mutex); + goto err_vmas; + } + + mock_init_ggtt(i915); + mutex_unlock(&i915->drm.struct_mutex); + return i915; +err_vmas: + kmem_cache_destroy(i915->vmas); +err_objects: + kmem_cache_destroy(i915->objects); put_device: put_device(&pdev->dev); err: diff --git a/drivers/gpu/drm/i915/selftests/mock_gtt.c b/drivers/gpu/drm/i915/selftests/mock_gtt.c new file mode 100644 index 000000000000..a61309c7cb3e --- /dev/null +++ b/drivers/gpu/drm/i915/selftests/mock_gtt.c @@ -0,0 +1,138 @@ +/* + * Copyright © 2016 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + * + */ + +#include "mock_gtt.h" + +static void mock_insert_page(struct i915_address_space *vm, + dma_addr_t addr, + u64 offset, + enum i915_cache_level level, + u32 flags) +{ +} + +static void mock_insert_entries(struct i915_address_space *vm, + struct sg_table *st, + u64 start, + enum i915_cache_level level, u32 flags) +{ +} + +static int mock_bind_ppgtt(struct i915_vma *vma, + enum i915_cache_level cache_level, + u32 flags) +{ + GEM_BUG_ON(flags & I915_VMA_GLOBAL_BIND); + vma->pages = vma->obj->mm.pages; + vma->flags |= I915_VMA_LOCAL_BIND; + return 0; +} + +static void mock_unbind_ppgtt(struct i915_vma *vma) +{ +} + +static void mock_cleanup(struct i915_address_space *vm) +{ +} + +struct i915_hw_ppgtt * +mock_ppgtt(struct drm_i915_private *i915, + const char *name) +{ + struct i915_hw_ppgtt *ppgtt; + + ppgtt = kzalloc(sizeof(*ppgtt), GFP_KERNEL); + if (!ppgtt) + return NULL; + + kref_init(&ppgtt->ref); + ppgtt->base.i915 = i915; + ppgtt->base.total = round_down(U64_MAX, PAGE_SIZE); + ppgtt->base.file = ERR_PTR(-ENODEV); + + INIT_LIST_HEAD(&ppgtt->base.active_list); + INIT_LIST_HEAD(&ppgtt->base.inactive_list); + INIT_LIST_HEAD(&ppgtt->base.unbound_list); + + INIT_LIST_HEAD(&ppgtt->base.global_link); + drm_mm_init(&ppgtt->base.mm, 0, ppgtt->base.total); + i915_gem_timeline_init(i915, &ppgtt->base.timeline, name); + + ppgtt->base.clear_range = nop_clear_range; + ppgtt->base.insert_page = mock_insert_page; + ppgtt->base.insert_entries = mock_insert_entries; + ppgtt->base.bind_vma = mock_bind_ppgtt; + ppgtt->base.unbind_vma = mock_unbind_ppgtt; + ppgtt->base.cleanup = mock_cleanup; + + return ppgtt; +} + +static int mock_bind_ggtt(struct i915_vma *vma, + enum i915_cache_level cache_level, + u32 flags) +{ + int err; + + err = i915_get_ggtt_vma_pages(vma); + if (err) + return err; + + vma->flags |= I915_VMA_GLOBAL_BIND | I915_VMA_LOCAL_BIND; + return 0; +} + +static void mock_unbind_ggtt(struct i915_vma *vma) +{ +} + +void mock_init_ggtt(struct drm_i915_private *i915) +{ + struct i915_ggtt *ggtt = &i915->ggtt; + + INIT_LIST_HEAD(&i915->vm_list); + + ggtt->base.i915 = i915; + + ggtt->mappable_base = 0; + ggtt->mappable_end = 2048 * PAGE_SIZE; + ggtt->base.total = 4096 * PAGE_SIZE; + + ggtt->base.clear_range = nop_clear_range; + ggtt->base.insert_page = mock_insert_page; + ggtt->base.insert_entries = mock_insert_entries; + ggtt->base.bind_vma = mock_bind_ggtt; + ggtt->base.unbind_vma = mock_unbind_ggtt; + ggtt->base.cleanup = mock_cleanup; + + i915_address_space_init(&ggtt->base, i915, "global"); +} + +void mock_fini_ggtt(struct drm_i915_private *i915) +{ + struct i915_ggtt *ggtt = &i915->ggtt; + + i915_address_space_fini(&ggtt->base); +} diff --git a/drivers/gpu/drm/i915/selftests/mock_gtt.h b/drivers/gpu/drm/i915/selftests/mock_gtt.h new file mode 100644 index 000000000000..9a0a833bb545 --- /dev/null +++ b/drivers/gpu/drm/i915/selftests/mock_gtt.h @@ -0,0 +1,35 @@ +/* + * Copyright © 2016 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + * + */ + +#ifndef __MOCK_GTT_H +#define __MOCK_GTT_H + +void mock_init_ggtt(struct drm_i915_private *i915); +void mock_fini_ggtt(struct drm_i915_private *i915); + +struct i915_hw_ppgtt * +mock_ppgtt(struct drm_i915_private *i915, + const char *name); + +#endif /* !__MOCK_GTT_H */ -- cgit From 0daf0113cff6884ed947ffe0870a926e73d52f79 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Mon, 13 Feb 2017 17:15:19 +0000 Subject: drm/i915: Mock infrastructure for request emission Create a fake engine that runs requests using a timer to simulate hw. v2: Prevent leaks of ctx->name along error paths Signed-off-by: Chris Wilson Reviewed-by: Tvrtko Ursulin Link: http://patchwork.freedesktop.org/patch/msgid/20170213171558.20942-8-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/i915_gem_context.c | 4 + drivers/gpu/drm/i915/selftests/intel_breadcrumbs.c | 11 +- drivers/gpu/drm/i915/selftests/mock_context.c | 78 ++++++++++ drivers/gpu/drm/i915/selftests/mock_context.h | 34 ++++ drivers/gpu/drm/i915/selftests/mock_engine.c | 172 +++++++++++++++++++-- drivers/gpu/drm/i915/selftests/mock_engine.h | 18 ++- drivers/gpu/drm/i915/selftests/mock_gem_device.c | 95 +++++++++++- drivers/gpu/drm/i915/selftests/mock_gem_device.h | 1 + drivers/gpu/drm/i915/selftests/mock_request.c | 44 ++++++ drivers/gpu/drm/i915/selftests/mock_request.h | 44 ++++++ 10 files changed, 483 insertions(+), 18 deletions(-) create mode 100644 drivers/gpu/drm/i915/selftests/mock_context.c create mode 100644 drivers/gpu/drm/i915/selftests/mock_context.h create mode 100644 drivers/gpu/drm/i915/selftests/mock_request.c create mode 100644 drivers/gpu/drm/i915/selftests/mock_request.h diff --git a/drivers/gpu/drm/i915/i915_gem_context.c b/drivers/gpu/drm/i915/i915_gem_context.c index fb11b674f319..d27c4050b4c5 100644 --- a/drivers/gpu/drm/i915/i915_gem_context.c +++ b/drivers/gpu/drm/i915/i915_gem_context.c @@ -1199,3 +1199,7 @@ int i915_gem_context_reset_stats_ioctl(struct drm_device *dev, return 0; } + +#if IS_ENABLED(CONFIG_DRM_I915_SELFTEST) +#include "selftests/mock_context.c" +#endif diff --git a/drivers/gpu/drm/i915/selftests/intel_breadcrumbs.c b/drivers/gpu/drm/i915/selftests/intel_breadcrumbs.c index d1b99b565500..6426acc9fdca 100644 --- a/drivers/gpu/drm/i915/selftests/intel_breadcrumbs.c +++ b/drivers/gpu/drm/i915/selftests/intel_breadcrumbs.c @@ -25,6 +25,7 @@ #include "../i915_selftest.h" #include "i915_random.h" +#include "mock_gem_device.h" #include "mock_engine.h" static int check_rbtree(struct intel_engine_cs *engine, @@ -466,15 +467,15 @@ int intel_breadcrumbs_mock_selftests(void) SUBTEST(igt_insert_complete), SUBTEST(igt_wakeup), }; - struct intel_engine_cs *engine; + struct drm_i915_private *i915; int err; - engine = mock_engine("mock"); - if (!engine) + i915 = mock_gem_device(); + if (!i915) return -ENOMEM; - err = i915_subtests(tests, engine); - kfree(engine); + err = i915_subtests(tests, i915->engine[RCS]); + drm_dev_unref(&i915->drm); return err; } diff --git a/drivers/gpu/drm/i915/selftests/mock_context.c b/drivers/gpu/drm/i915/selftests/mock_context.c new file mode 100644 index 000000000000..8d3a90c3f8ac --- /dev/null +++ b/drivers/gpu/drm/i915/selftests/mock_context.c @@ -0,0 +1,78 @@ +/* + * Copyright © 2016 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + * + */ + +#include "mock_context.h" +#include "mock_gtt.h" + +struct i915_gem_context * +mock_context(struct drm_i915_private *i915, + const char *name) +{ + struct i915_gem_context *ctx; + int ret; + + ctx = kzalloc(sizeof(*ctx), GFP_KERNEL); + if (!ctx) + return NULL; + + kref_init(&ctx->ref); + INIT_LIST_HEAD(&ctx->link); + ctx->i915 = i915; + + ret = ida_simple_get(&i915->context_hw_ida, + 0, MAX_CONTEXT_HW_ID, GFP_KERNEL); + if (ret < 0) + goto err_free; + ctx->hw_id = ret; + + if (name) { + ctx->name = kstrdup(name, GFP_KERNEL); + if (!ctx->name) + goto err_put; + + ctx->ppgtt = mock_ppgtt(i915, name); + if (!ctx->ppgtt) + goto err_put; + } + + return ctx; + +err_free: + kfree(ctx); + return NULL; + +err_put: + i915_gem_context_set_closed(ctx); + i915_gem_context_put(ctx); + return NULL; +} + +void mock_context_close(struct i915_gem_context *ctx) +{ + i915_gem_context_set_closed(ctx); + + i915_ppgtt_close(&ctx->ppgtt->base); + + i915_gem_context_put(ctx); +} diff --git a/drivers/gpu/drm/i915/selftests/mock_context.h b/drivers/gpu/drm/i915/selftests/mock_context.h new file mode 100644 index 000000000000..2427e5c0916a --- /dev/null +++ b/drivers/gpu/drm/i915/selftests/mock_context.h @@ -0,0 +1,34 @@ +/* + * Copyright © 2016 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + * + */ + +#ifndef __MOCK_CONTEXT_H +#define __MOCK_CONTEXT_H + +struct i915_gem_context * +mock_context(struct drm_i915_private *i915, + const char *name); + +void mock_context_close(struct i915_gem_context *ctx); + +#endif /* !__MOCK_CONTEXT_H */ diff --git a/drivers/gpu/drm/i915/selftests/mock_engine.c b/drivers/gpu/drm/i915/selftests/mock_engine.c index 4a090bbe807b..8d5ba037064c 100644 --- a/drivers/gpu/drm/i915/selftests/mock_engine.c +++ b/drivers/gpu/drm/i915/selftests/mock_engine.c @@ -23,33 +23,185 @@ */ #include "mock_engine.h" +#include "mock_request.h" -struct intel_engine_cs *mock_engine(const char *name) +static struct mock_request *first_request(struct mock_engine *engine) { - struct intel_engine_cs *engine; + return list_first_entry_or_null(&engine->hw_queue, + struct mock_request, + link); +} + +static void hw_delay_complete(unsigned long data) +{ + struct mock_engine *engine = (typeof(engine))data; + struct mock_request *request; + + spin_lock(&engine->hw_lock); + + request = first_request(engine); + if (request) { + list_del_init(&request->link); + mock_seqno_advance(&engine->base, request->base.global_seqno); + } + + request = first_request(engine); + if (request) + mod_timer(&engine->hw_delay, jiffies + request->delay); + + spin_unlock(&engine->hw_lock); +} + +static int mock_context_pin(struct intel_engine_cs *engine, + struct i915_gem_context *ctx) +{ + i915_gem_context_get(ctx); + return 0; +} + +static void mock_context_unpin(struct intel_engine_cs *engine, + struct i915_gem_context *ctx) +{ + i915_gem_context_put(ctx); +} + +static int mock_request_alloc(struct drm_i915_gem_request *request) +{ + struct mock_request *mock = container_of(request, typeof(*mock), base); + + INIT_LIST_HEAD(&mock->link); + mock->delay = 0; + + request->ring = request->engine->buffer; + return 0; +} + +static int mock_emit_flush(struct drm_i915_gem_request *request, + unsigned int flags) +{ + return 0; +} + +static void mock_emit_breadcrumb(struct drm_i915_gem_request *request, + u32 *flags) +{ +} + +static void mock_submit_request(struct drm_i915_gem_request *request) +{ + struct mock_request *mock = container_of(request, typeof(*mock), base); + struct mock_engine *engine = + container_of(request->engine, typeof(*engine), base); + + i915_gem_request_submit(request); + GEM_BUG_ON(!request->global_seqno); + + spin_lock_irq(&engine->hw_lock); + list_add_tail(&mock->link, &engine->hw_queue); + if (mock->link.prev == &engine->hw_queue) + mod_timer(&engine->hw_delay, jiffies + mock->delay); + spin_unlock_irq(&engine->hw_lock); +} + +static struct intel_ring *mock_ring(struct intel_engine_cs *engine) +{ + const unsigned long sz = roundup_pow_of_two(sizeof(struct intel_ring)); + struct intel_ring *ring; + + ring = kzalloc(sizeof(*ring) + sz, GFP_KERNEL); + if (!ring) + return NULL; + + ring->engine = engine; + ring->size = sz; + ring->effective_size = sz; + ring->vaddr = (void *)(ring + 1); + + INIT_LIST_HEAD(&ring->request_list); + ring->last_retired_head = -1; + intel_ring_update_space(ring); + + return ring; +} + +struct intel_engine_cs *mock_engine(struct drm_i915_private *i915, + const char *name) +{ + struct mock_engine *engine; static int id; engine = kzalloc(sizeof(*engine) + PAGE_SIZE, GFP_KERNEL); if (!engine) return NULL; - /* minimal engine setup for seqno */ - engine->name = name; - engine->id = id++; - engine->status_page.page_addr = (void *)(engine + 1); + engine->base.buffer = mock_ring(&engine->base); + if (!engine->base.buffer) { + kfree(engine); + return NULL; + } - /* minimal breadcrumbs init */ - spin_lock_init(&engine->breadcrumbs.lock); - engine->breadcrumbs.mock = true; + /* minimal engine setup for requests */ + engine->base.i915 = i915; + engine->base.name = name; + engine->base.id = id++; + engine->base.status_page.page_addr = (void *)(engine + 1); - return engine; + engine->base.context_pin = mock_context_pin; + engine->base.context_unpin = mock_context_unpin; + engine->base.request_alloc = mock_request_alloc; + engine->base.emit_flush = mock_emit_flush; + engine->base.emit_breadcrumb = mock_emit_breadcrumb; + engine->base.submit_request = mock_submit_request; + + engine->base.timeline = + &i915->gt.global_timeline.engine[engine->base.id]; + + intel_engine_init_breadcrumbs(&engine->base); + engine->base.breadcrumbs.mock = true; /* prevent touching HW for irqs */ + + /* fake hw queue */ + spin_lock_init(&engine->hw_lock); + setup_timer(&engine->hw_delay, + hw_delay_complete, + (unsigned long)engine); + INIT_LIST_HEAD(&engine->hw_queue); + + return &engine->base; } void mock_engine_flush(struct intel_engine_cs *engine) { + struct mock_engine *mock = + container_of(engine, typeof(*mock), base); + struct mock_request *request, *rn; + + del_timer_sync(&mock->hw_delay); + + spin_lock_irq(&mock->hw_lock); + list_for_each_entry_safe(request, rn, &mock->hw_queue, link) { + list_del_init(&request->link); + mock_seqno_advance(&mock->base, request->base.global_seqno); + } + spin_unlock_irq(&mock->hw_lock); } void mock_engine_reset(struct intel_engine_cs *engine) { intel_write_status_page(engine, I915_GEM_HWS_INDEX, 0); } + +void mock_engine_free(struct intel_engine_cs *engine) +{ + struct mock_engine *mock = + container_of(engine, typeof(*mock), base); + + GEM_BUG_ON(timer_pending(&mock->hw_delay)); + + if (engine->last_retired_context) + engine->context_unpin(engine, engine->last_retired_context); + + intel_engine_fini_breadcrumbs(engine); + + kfree(engine->buffer); + kfree(engine); +} diff --git a/drivers/gpu/drm/i915/selftests/mock_engine.h b/drivers/gpu/drm/i915/selftests/mock_engine.h index 9cfe9671f860..e5e240216ba3 100644 --- a/drivers/gpu/drm/i915/selftests/mock_engine.h +++ b/drivers/gpu/drm/i915/selftests/mock_engine.h @@ -25,9 +25,25 @@ #ifndef __MOCK_ENGINE_H__ #define __MOCK_ENGINE_H__ -struct intel_engine_cs *mock_engine(const char *name); +#include +#include +#include + +#include "../intel_ringbuffer.h" + +struct mock_engine { + struct intel_engine_cs base; + + spinlock_t hw_lock; + struct list_head hw_queue; + struct timer_list hw_delay; +}; + +struct intel_engine_cs *mock_engine(struct drm_i915_private *i915, + const char *name); void mock_engine_flush(struct intel_engine_cs *engine); void mock_engine_reset(struct intel_engine_cs *engine); +void mock_engine_free(struct intel_engine_cs *engine); static inline void mock_seqno_advance(struct intel_engine_cs *engine, u32 seqno) { diff --git a/drivers/gpu/drm/i915/selftests/mock_gem_device.c b/drivers/gpu/drm/i915/selftests/mock_gem_device.c index dbd32b125d15..6a8258eacdcb 100644 --- a/drivers/gpu/drm/i915/selftests/mock_gem_device.c +++ b/drivers/gpu/drm/i915/selftests/mock_gem_device.c @@ -24,14 +24,46 @@ #include +#include "mock_engine.h" +#include "mock_context.h" +#include "mock_request.h" #include "mock_gem_device.h" #include "mock_gem_object.h" #include "mock_gtt.h" +void mock_device_flush(struct drm_i915_private *i915) +{ + struct intel_engine_cs *engine; + enum intel_engine_id id; + + lockdep_assert_held(&i915->drm.struct_mutex); + + for_each_engine(engine, i915, id) + mock_engine_flush(engine); + + i915_gem_retire_requests(i915); +} + static void mock_device_release(struct drm_device *dev) { struct drm_i915_private *i915 = to_i915(dev); + struct intel_engine_cs *engine; + enum intel_engine_id id; + + mutex_lock(&i915->drm.struct_mutex); + mock_device_flush(i915); + mutex_unlock(&i915->drm.struct_mutex); + cancel_delayed_work_sync(&i915->gt.retire_work); + cancel_delayed_work_sync(&i915->gt.idle_work); + + mutex_lock(&i915->drm.struct_mutex); + for_each_engine(engine, i915, id) + mock_engine_free(engine); + i915_gem_context_fini(i915); + mutex_unlock(&i915->drm.struct_mutex); + + drain_workqueue(i915->wq); i915_gem_drain_freed_objects(i915); mutex_lock(&i915->drm.struct_mutex); @@ -39,6 +71,10 @@ static void mock_device_release(struct drm_device *dev) i915_gem_timeline_fini(&i915->gt.global_timeline); mutex_unlock(&i915->drm.struct_mutex); + destroy_workqueue(i915->wq); + + kmem_cache_destroy(i915->dependencies); + kmem_cache_destroy(i915->requests); kmem_cache_destroy(i915->vmas); kmem_cache_destroy(i915->objects); @@ -62,9 +98,19 @@ static void release_dev(struct device *dev) kfree(pdev); } +static void mock_retire_work_handler(struct work_struct *work) +{ +} + +static void mock_idle_work_handler(struct work_struct *work) +{ +} + struct drm_i915_private *mock_gem_device(void) { struct drm_i915_private *i915; + struct intel_engine_cs *engine; + enum intel_engine_id id; struct pci_dev *pdev; int err; @@ -98,36 +144,81 @@ struct drm_i915_private *mock_gem_device(void) spin_lock_init(&i915->mm.object_stat_lock); + init_waitqueue_head(&i915->gpu_error.wait_queue); + init_waitqueue_head(&i915->gpu_error.reset_queue); + + i915->wq = alloc_ordered_workqueue("mock", 0); + if (!i915->wq) + goto put_device; + INIT_WORK(&i915->mm.free_work, __i915_gem_free_work); init_llist_head(&i915->mm.free_list); INIT_LIST_HEAD(&i915->mm.unbound_list); INIT_LIST_HEAD(&i915->mm.bound_list); + ida_init(&i915->context_hw_ida); + + INIT_DELAYED_WORK(&i915->gt.retire_work, mock_retire_work_handler); + INIT_DELAYED_WORK(&i915->gt.idle_work, mock_idle_work_handler); + + i915->gt.awake = true; + i915->objects = KMEM_CACHE(mock_object, SLAB_HWCACHE_ALIGN); if (!i915->objects) - goto put_device; + goto err_wq; i915->vmas = KMEM_CACHE(i915_vma, SLAB_HWCACHE_ALIGN); if (!i915->vmas) goto err_objects; + i915->requests = KMEM_CACHE(mock_request, + SLAB_HWCACHE_ALIGN | + SLAB_RECLAIM_ACCOUNT | + SLAB_DESTROY_BY_RCU); + if (!i915->requests) + goto err_vmas; + + i915->dependencies = KMEM_CACHE(i915_dependency, + SLAB_HWCACHE_ALIGN | + SLAB_RECLAIM_ACCOUNT); + if (!i915->dependencies) + goto err_requests; + mutex_lock(&i915->drm.struct_mutex); INIT_LIST_HEAD(&i915->gt.timelines); err = i915_gem_timeline_init__global(i915); if (err) { mutex_unlock(&i915->drm.struct_mutex); - goto err_vmas; + goto err_dependencies; } mock_init_ggtt(i915); mutex_unlock(&i915->drm.struct_mutex); + mkwrite_device_info(i915)->ring_mask = BIT(0); + i915->engine[RCS] = mock_engine(i915, "mock"); + if (!i915->engine[RCS]) + goto err_dependencies; + + i915->kernel_context = mock_context(i915, NULL); + if (!i915->kernel_context) + goto err_engine; + return i915; +err_engine: + for_each_engine(engine, i915, id) + mock_engine_free(engine); +err_dependencies: + kmem_cache_destroy(i915->dependencies); +err_requests: + kmem_cache_destroy(i915->requests); err_vmas: kmem_cache_destroy(i915->vmas); err_objects: kmem_cache_destroy(i915->objects); +err_wq: + destroy_workqueue(i915->wq); put_device: put_device(&pdev->dev); err: diff --git a/drivers/gpu/drm/i915/selftests/mock_gem_device.h b/drivers/gpu/drm/i915/selftests/mock_gem_device.h index c557e33c3953..4cca4d57f52c 100644 --- a/drivers/gpu/drm/i915/selftests/mock_gem_device.h +++ b/drivers/gpu/drm/i915/selftests/mock_gem_device.h @@ -4,5 +4,6 @@ struct drm_i915_private; struct drm_i915_private *mock_gem_device(void); +void mock_device_flush(struct drm_i915_private *i915); #endif /* !__MOCK_GEM_DEVICE_H__ */ diff --git a/drivers/gpu/drm/i915/selftests/mock_request.c b/drivers/gpu/drm/i915/selftests/mock_request.c new file mode 100644 index 000000000000..e23242d1b88a --- /dev/null +++ b/drivers/gpu/drm/i915/selftests/mock_request.c @@ -0,0 +1,44 @@ +/* + * Copyright © 2016 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + * + */ + +#include "mock_request.h" + +struct drm_i915_gem_request * +mock_request(struct intel_engine_cs *engine, + struct i915_gem_context *context, + unsigned long delay) +{ + struct drm_i915_gem_request *request; + struct mock_request *mock; + + /* NB the i915->requests slab cache is enlarged to fit mock_request */ + request = i915_gem_request_alloc(engine, context); + if (!request) + return NULL; + + mock = container_of(request, typeof(*mock), base); + mock->delay = delay; + + return &mock->base; +} diff --git a/drivers/gpu/drm/i915/selftests/mock_request.h b/drivers/gpu/drm/i915/selftests/mock_request.h new file mode 100644 index 000000000000..cc76d4f4eb4e --- /dev/null +++ b/drivers/gpu/drm/i915/selftests/mock_request.h @@ -0,0 +1,44 @@ +/* + * Copyright © 2016 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + * + */ + +#ifndef __MOCK_REQUEST__ +#define __MOCK_REQUEST__ + +#include + +#include "../i915_gem_request.h" + +struct mock_request { + struct drm_i915_gem_request base; + + struct list_head link; + unsigned long delay; +}; + +struct drm_i915_gem_request * +mock_request(struct intel_engine_cs *engine, + struct i915_gem_context *context, + unsigned long delay); + +#endif /* !__MOCK_REQUEST__ */ -- cgit From 44653988ef7c70bf7c55a5e642e22ce5446fb6b1 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Mon, 13 Feb 2017 17:15:20 +0000 Subject: drm/i915: Create a fake object for testing huge allocations We would like to be able to exercise huge allocations even on memory constrained devices. To do this we create an object that allocates only a few pages and remaps them across its whole range - each page is reused multiple times. We can therefore pretend we are rendering into a much larger object. Signed-off-by: Chris Wilson Reviewed-by: Matthew Auld Link: http://patchwork.freedesktop.org/patch/msgid/20170213171558.20942-9-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/i915_gem.c | 1 + drivers/gpu/drm/i915/i915_gem_object.h | 20 ++-- drivers/gpu/drm/i915/selftests/huge_gem_object.c | 135 +++++++++++++++++++++++ drivers/gpu/drm/i915/selftests/huge_gem_object.h | 45 ++++++++ 4 files changed, 193 insertions(+), 8 deletions(-) create mode 100644 drivers/gpu/drm/i915/selftests/huge_gem_object.c create mode 100644 drivers/gpu/drm/i915/selftests/huge_gem_object.h diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index db240a8f6b82..59e10b364ad0 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -5024,4 +5024,5 @@ i915_gem_object_get_dma_address(struct drm_i915_gem_object *obj, #if IS_ENABLED(CONFIG_DRM_I915_SELFTEST) #include "selftests/scatterlist.c" #include "selftests/mock_gem_device.c" +#include "selftests/huge_gem_object.c" #endif diff --git a/drivers/gpu/drm/i915/i915_gem_object.h b/drivers/gpu/drm/i915/i915_gem_object.h index 290eaa7fc9eb..4114cc8a0b9b 100644 --- a/drivers/gpu/drm/i915/i915_gem_object.h +++ b/drivers/gpu/drm/i915/i915_gem_object.h @@ -167,14 +167,18 @@ struct drm_i915_gem_object { /** Record of address bit 17 of each page at last unbind. */ unsigned long *bit_17; - struct i915_gem_userptr { - uintptr_t ptr; - unsigned read_only :1; - - struct i915_mm_struct *mm; - struct i915_mmu_object *mmu_object; - struct work_struct *work; - } userptr; + union { + struct i915_gem_userptr { + uintptr_t ptr; + unsigned read_only :1; + + struct i915_mm_struct *mm; + struct i915_mmu_object *mmu_object; + struct work_struct *work; + } userptr; + + unsigned long scratch; + }; /** for phys allocated objects */ struct drm_dma_handle *phys_handle; diff --git a/drivers/gpu/drm/i915/selftests/huge_gem_object.c b/drivers/gpu/drm/i915/selftests/huge_gem_object.c new file mode 100644 index 000000000000..4e681fc13be4 --- /dev/null +++ b/drivers/gpu/drm/i915/selftests/huge_gem_object.c @@ -0,0 +1,135 @@ +/* + * Copyright © 2016 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + * + */ + +#include "huge_gem_object.h" + +static void huge_free_pages(struct drm_i915_gem_object *obj, + struct sg_table *pages) +{ + unsigned long nreal = obj->scratch / PAGE_SIZE; + struct scatterlist *sg; + + for (sg = pages->sgl; sg && nreal--; sg = __sg_next(sg)) + __free_page(sg_page(sg)); + + sg_free_table(pages); + kfree(pages); +} + +static struct sg_table * +huge_get_pages(struct drm_i915_gem_object *obj) +{ +#define GFP (GFP_KERNEL | __GFP_NOWARN | __GFP_NORETRY) + const unsigned long nreal = obj->scratch / PAGE_SIZE; + const unsigned long npages = obj->base.size / PAGE_SIZE; + struct scatterlist *sg, *src, *end; + struct sg_table *pages; + unsigned long n; + + pages = kmalloc(sizeof(*pages), GFP); + if (!pages) + return ERR_PTR(-ENOMEM); + + if (sg_alloc_table(pages, npages, GFP)) { + kfree(pages); + return ERR_PTR(-ENOMEM); + } + + sg = pages->sgl; + for (n = 0; n < nreal; n++) { + struct page *page; + + page = alloc_page(GFP | __GFP_HIGHMEM); + if (!page) { + sg_mark_end(sg); + goto err; + } + + sg_set_page(sg, page, PAGE_SIZE, 0); + sg = __sg_next(sg); + } + if (nreal < npages) { + for (end = sg, src = pages->sgl; sg; sg = __sg_next(sg)) { + sg_set_page(sg, sg_page(src), PAGE_SIZE, 0); + src = __sg_next(src); + if (src == end) + src = pages->sgl; + } + } + + if (i915_gem_gtt_prepare_pages(obj, pages)) + goto err; + + return pages; + +err: + huge_free_pages(obj, pages); + return ERR_PTR(-ENOMEM); +#undef GFP +} + +static void huge_put_pages(struct drm_i915_gem_object *obj, + struct sg_table *pages) +{ + i915_gem_gtt_finish_pages(obj, pages); + huge_free_pages(obj, pages); + + obj->mm.dirty = false; +} + +static const struct drm_i915_gem_object_ops huge_ops = { + .flags = I915_GEM_OBJECT_HAS_STRUCT_PAGE | + I915_GEM_OBJECT_IS_SHRINKABLE, + .get_pages = huge_get_pages, + .put_pages = huge_put_pages, +}; + +struct drm_i915_gem_object * +huge_gem_object(struct drm_i915_private *i915, + phys_addr_t phys_size, + dma_addr_t dma_size) +{ + struct drm_i915_gem_object *obj; + + GEM_BUG_ON(!phys_size || phys_size > dma_size); + GEM_BUG_ON(!IS_ALIGNED(phys_size, PAGE_SIZE)); + GEM_BUG_ON(!IS_ALIGNED(dma_size, I915_GTT_PAGE_SIZE)); + + if (overflows_type(dma_size, obj->base.size)) + return ERR_PTR(-E2BIG); + + obj = i915_gem_object_alloc(i915); + if (!obj) + return ERR_PTR(-ENOMEM); + + drm_gem_private_object_init(&i915->drm, &obj->base, dma_size); + i915_gem_object_init(obj, &huge_ops); + + obj->base.write_domain = I915_GEM_DOMAIN_CPU; + obj->base.read_domains = I915_GEM_DOMAIN_CPU; + obj->cache_level = HAS_LLC(i915) ? I915_CACHE_LLC : I915_CACHE_NONE; + obj->scratch = phys_size; + + return obj; +} diff --git a/drivers/gpu/drm/i915/selftests/huge_gem_object.h b/drivers/gpu/drm/i915/selftests/huge_gem_object.h new file mode 100644 index 000000000000..a6133a9e8029 --- /dev/null +++ b/drivers/gpu/drm/i915/selftests/huge_gem_object.h @@ -0,0 +1,45 @@ +/* + * Copyright © 2016 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + * + */ + +#ifndef __HUGE_GEM_OBJECT_H +#define __HUGE_GEM_OBJECT_H + +struct drm_i915_gem_object * +huge_gem_object(struct drm_i915_private *i915, + phys_addr_t phys_size, + dma_addr_t dma_size); + +static inline phys_addr_t +huge_gem_object_phys_size(struct drm_i915_gem_object *obj) +{ + return obj->scratch; +} + +static inline dma_addr_t +huge_gem_object_dma_size(struct drm_i915_gem_object *obj) +{ + return obj->base.size; +} + +#endif /* !__HUGE_GEM_OBJECT_H */ -- cgit From c835c5508358311d7922ea7b18bcaf611d1f8bb9 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Mon, 13 Feb 2017 17:15:21 +0000 Subject: drm/i915: Add selftests for i915_gem_request Simple starting point for adding seltests for i915_gem_request, first mock a device (with engines and contexts) that allows us to construct and execute a request, along with waiting for the request to complete. Signed-off-by: Chris Wilson Reviewed-by: Tvrtko Ursulin Link: http://patchwork.freedesktop.org/patch/msgid/20170213171558.20942-10-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/i915_gem_request.c | 5 ++ drivers/gpu/drm/i915/selftests/i915_gem_request.c | 68 ++++++++++++++++++++++ .../gpu/drm/i915/selftests/i915_mock_selftests.h | 1 + 3 files changed, 74 insertions(+) create mode 100644 drivers/gpu/drm/i915/selftests/i915_gem_request.c diff --git a/drivers/gpu/drm/i915/i915_gem_request.c b/drivers/gpu/drm/i915/i915_gem_request.c index f31deeb72703..24571a5289a4 100644 --- a/drivers/gpu/drm/i915/i915_gem_request.c +++ b/drivers/gpu/drm/i915/i915_gem_request.c @@ -1198,3 +1198,8 @@ void i915_gem_retire_requests(struct drm_i915_private *dev_priv) for_each_engine(engine, dev_priv, id) engine_retire_requests(engine); } + +#if IS_ENABLED(CONFIG_DRM_I915_SELFTEST) +#include "selftests/mock_request.c" +#include "selftests/i915_gem_request.c" +#endif diff --git a/drivers/gpu/drm/i915/selftests/i915_gem_request.c b/drivers/gpu/drm/i915/selftests/i915_gem_request.c new file mode 100644 index 000000000000..9921d1c317c0 --- /dev/null +++ b/drivers/gpu/drm/i915/selftests/i915_gem_request.c @@ -0,0 +1,68 @@ +/* + * Copyright © 2016 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + * + */ + +#include "../i915_selftest.h" + +#include "mock_gem_device.h" + +static int igt_add_request(void *arg) +{ + struct drm_i915_private *i915 = arg; + struct drm_i915_gem_request *request; + int err = -ENOMEM; + + /* Basic preliminary test to create a request and let it loose! */ + + mutex_lock(&i915->drm.struct_mutex); + request = mock_request(i915->engine[RCS], + i915->kernel_context, + HZ / 10); + if (!request) + goto out_unlock; + + i915_add_request(request); + + err = 0; +out_unlock: + mutex_unlock(&i915->drm.struct_mutex); + return err; +} + +int i915_gem_request_mock_selftests(void) +{ + static const struct i915_subtest tests[] = { + SUBTEST(igt_add_request), + }; + struct drm_i915_private *i915; + int err; + + i915 = mock_gem_device(); + if (!i915) + return -ENOMEM; + + err = i915_subtests(tests, i915); + drm_dev_unref(&i915->drm); + + return err; +} diff --git a/drivers/gpu/drm/i915/selftests/i915_mock_selftests.h b/drivers/gpu/drm/i915/selftests/i915_mock_selftests.h index 80458e2a2b04..bda982404ad3 100644 --- a/drivers/gpu/drm/i915/selftests/i915_mock_selftests.h +++ b/drivers/gpu/drm/i915/selftests/i915_mock_selftests.h @@ -11,3 +11,4 @@ selftest(sanitycheck, i915_mock_sanitycheck) /* keep first (igt selfcheck) */ selftest(scatterlist, scatterlist_mock_selftests) selftest(breadcrumbs, intel_breadcrumbs_mock_selftests) +selftest(requests, i915_gem_request_mock_selftests) -- cgit From f1ae924d1871391f5a8b21d8220531830475a902 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Mon, 13 Feb 2017 17:15:22 +0000 Subject: drm/i915: Add a simple request selftest for waiting A trivial kselftest to submit a request and wait upon it. Signed-off-by: Chris Wilson Reviewed-by: Tvrtko Ursulin Link: http://patchwork.freedesktop.org/patch/msgid/20170213171558.20942-11-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/selftests/i915_gem_request.c | 71 +++++++++++++++++++++++ 1 file changed, 71 insertions(+) diff --git a/drivers/gpu/drm/i915/selftests/i915_gem_request.c b/drivers/gpu/drm/i915/selftests/i915_gem_request.c index 9921d1c317c0..3cbf4735fe2f 100644 --- a/drivers/gpu/drm/i915/selftests/i915_gem_request.c +++ b/drivers/gpu/drm/i915/selftests/i915_gem_request.c @@ -49,10 +49,81 @@ out_unlock: return err; } +static int igt_wait_request(void *arg) +{ + const long T = HZ / 4; + struct drm_i915_private *i915 = arg; + struct drm_i915_gem_request *request; + int err = -EINVAL; + + /* Submit a request, then wait upon it */ + + mutex_lock(&i915->drm.struct_mutex); + request = mock_request(i915->engine[RCS], i915->kernel_context, T); + if (!request) { + err = -ENOMEM; + goto out_unlock; + } + + if (i915_wait_request(request, I915_WAIT_LOCKED, 0) != -ETIME) { + pr_err("request wait (busy query) succeeded (expected timeout before submit!)\n"); + goto out_unlock; + } + + if (i915_wait_request(request, I915_WAIT_LOCKED, T) != -ETIME) { + pr_err("request wait succeeded (expected timeout before submit!)\n"); + goto out_unlock; + } + + if (i915_gem_request_completed(request)) { + pr_err("request completed before submit!!\n"); + goto out_unlock; + } + + i915_add_request(request); + + if (i915_wait_request(request, I915_WAIT_LOCKED, 0) != -ETIME) { + pr_err("request wait (busy query) succeeded (expected timeout after submit!)\n"); + goto out_unlock; + } + + if (i915_gem_request_completed(request)) { + pr_err("request completed immediately!\n"); + goto out_unlock; + } + + if (i915_wait_request(request, I915_WAIT_LOCKED, T / 2) != -ETIME) { + pr_err("request wait succeeded (expected timeout!)\n"); + goto out_unlock; + } + + if (i915_wait_request(request, I915_WAIT_LOCKED, T) == -ETIME) { + pr_err("request wait timed out!\n"); + goto out_unlock; + } + + if (!i915_gem_request_completed(request)) { + pr_err("request not complete after waiting!\n"); + goto out_unlock; + } + + if (i915_wait_request(request, I915_WAIT_LOCKED, T) == -ETIME) { + pr_err("request wait timed out when already complete!\n"); + goto out_unlock; + } + + err = 0; +out_unlock: + mock_device_flush(i915); + mutex_unlock(&i915->drm.struct_mutex); + return err; +} + int i915_gem_request_mock_selftests(void) { static const struct i915_subtest tests[] = { SUBTEST(igt_add_request), + SUBTEST(igt_wait_request), }; struct drm_i915_private *i915; int err; -- cgit From 5fd4d112d26cae71c0f92f9d83c0d29a22510025 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Mon, 13 Feb 2017 17:15:23 +0000 Subject: drm/i915: Add a simple fence selftest to i915_gem_request Do a quick selftest on in the interoperability of dma_fence_wait on a i915_gem_request. Signed-off-by: Chris Wilson Reviewed-by: Tvrtko Ursulin Link: http://patchwork.freedesktop.org/patch/msgid/20170213171558.20942-12-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/selftests/i915_gem_request.c | 61 +++++++++++++++++++++++ 1 file changed, 61 insertions(+) diff --git a/drivers/gpu/drm/i915/selftests/i915_gem_request.c b/drivers/gpu/drm/i915/selftests/i915_gem_request.c index 3cbf4735fe2f..6fa148b4ff30 100644 --- a/drivers/gpu/drm/i915/selftests/i915_gem_request.c +++ b/drivers/gpu/drm/i915/selftests/i915_gem_request.c @@ -119,11 +119,72 @@ out_unlock: return err; } +static int igt_fence_wait(void *arg) +{ + const long T = HZ / 4; + struct drm_i915_private *i915 = arg; + struct drm_i915_gem_request *request; + int err = -EINVAL; + + /* Submit a request, treat it as a fence and wait upon it */ + + mutex_lock(&i915->drm.struct_mutex); + request = mock_request(i915->engine[RCS], i915->kernel_context, T); + if (!request) { + err = -ENOMEM; + goto out_locked; + } + mutex_unlock(&i915->drm.struct_mutex); /* safe as we are single user */ + + if (dma_fence_wait_timeout(&request->fence, false, T) != -ETIME) { + pr_err("fence wait success before submit (expected timeout)!\n"); + goto out_device; + } + + mutex_lock(&i915->drm.struct_mutex); + i915_add_request(request); + mutex_unlock(&i915->drm.struct_mutex); + + if (dma_fence_is_signaled(&request->fence)) { + pr_err("fence signaled immediately!\n"); + goto out_device; + } + + if (dma_fence_wait_timeout(&request->fence, false, T / 2) != -ETIME) { + pr_err("fence wait success after submit (expected timeout)!\n"); + goto out_device; + } + + if (dma_fence_wait_timeout(&request->fence, false, T) <= 0) { + pr_err("fence wait timed out (expected success)!\n"); + goto out_device; + } + + if (!dma_fence_is_signaled(&request->fence)) { + pr_err("fence unsignaled after waiting!\n"); + goto out_device; + } + + if (dma_fence_wait_timeout(&request->fence, false, T) <= 0) { + pr_err("fence wait timed out when complete (expected success)!\n"); + goto out_device; + } + + err = 0; +out_device: + mutex_lock(&i915->drm.struct_mutex); +out_locked: + mock_device_flush(i915); + mutex_unlock(&i915->drm.struct_mutex); + return err; +} + int i915_gem_request_mock_selftests(void) { static const struct i915_subtest tests[] = { SUBTEST(igt_add_request), SUBTEST(igt_wait_request), + SUBTEST(igt_fence_wait), }; struct drm_i915_private *i915; int err; -- cgit From b348090d6758cc391dc91f8a8233b18f0acc8458 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Mon, 13 Feb 2017 17:15:24 +0000 Subject: drm/i915: Simple selftest to exercise live requests Just create several batches of requests and expect it to not fall over! Signed-off-by: Chris Wilson Reviewed-by: Joonas Lahtinen Link: http://patchwork.freedesktop.org/patch/msgid/20170213171558.20942-13-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/selftests/i915_gem_request.c | 147 +++++++++++++++++++++ .../gpu/drm/i915/selftests/i915_live_selftests.h | 1 + 2 files changed, 148 insertions(+) diff --git a/drivers/gpu/drm/i915/selftests/i915_gem_request.c b/drivers/gpu/drm/i915/selftests/i915_gem_request.c index 6fa148b4ff30..610f89e3696c 100644 --- a/drivers/gpu/drm/i915/selftests/i915_gem_request.c +++ b/drivers/gpu/drm/i915/selftests/i915_gem_request.c @@ -22,6 +22,8 @@ * */ +#include + #include "../i915_selftest.h" #include "mock_gem_device.h" @@ -198,3 +200,148 @@ int i915_gem_request_mock_selftests(void) return err; } + +struct live_test { + struct drm_i915_private *i915; + const char *func; + const char *name; + + unsigned int reset_count; +}; + +static int begin_live_test(struct live_test *t, + struct drm_i915_private *i915, + const char *func, + const char *name) +{ + int err; + + t->i915 = i915; + t->func = func; + t->name = name; + + err = i915_gem_wait_for_idle(i915, I915_WAIT_LOCKED); + if (err) { + pr_err("%s(%s): failed to idle before, with err=%d!", + func, name, err); + return err; + } + + i915_gem_retire_requests(i915); + + i915->gpu_error.missed_irq_rings = 0; + t->reset_count = i915_reset_count(&i915->gpu_error); + + return 0; +} + +static int end_live_test(struct live_test *t) +{ + struct drm_i915_private *i915 = t->i915; + + if (wait_for(intel_execlists_idle(i915), 1)) { + pr_err("%s(%s): GPU not idle\n", t->func, t->name); + return -EIO; + } + + if (t->reset_count != i915_reset_count(&i915->gpu_error)) { + pr_err("%s(%s): GPU was reset %d times!\n", + t->func, t->name, + i915_reset_count(&i915->gpu_error) - t->reset_count); + return -EIO; + } + + if (i915->gpu_error.missed_irq_rings) { + pr_err("%s(%s): Missed interrupts on engines %lx\n", + t->func, t->name, i915->gpu_error.missed_irq_rings); + return -EIO; + } + + return 0; +} + +static int live_nop_request(void *arg) +{ + struct drm_i915_private *i915 = arg; + struct intel_engine_cs *engine; + struct live_test t; + unsigned int id; + int err; + + /* Submit various sized batches of empty requests, to each engine + * (individually), and wait for the batch to complete. We can check + * the overhead of submitting requests to the hardware. + */ + + mutex_lock(&i915->drm.struct_mutex); + + for_each_engine(engine, i915, id) { + IGT_TIMEOUT(end_time); + struct drm_i915_gem_request *request; + unsigned long n, prime; + ktime_t times[2] = {}; + + err = begin_live_test(&t, i915, __func__, engine->name); + if (err) + goto out_unlock; + + for_each_prime_number_from(prime, 1, 8192) { + times[1] = ktime_get_raw(); + + for (n = 0; n < prime; n++) { + request = i915_gem_request_alloc(engine, + i915->kernel_context); + if (IS_ERR(request)) { + err = PTR_ERR(request); + goto out_unlock; + } + + /* This space is left intentionally blank. + * + * We do not actually want to perform any + * action with this request, we just want + * to measure the latency in allocation + * and submission of our breadcrumbs - + * ensuring that the bare request is sufficient + * for the system to work (i.e. proper HEAD + * tracking of the rings, interrupt handling, + * etc). It also gives us the lowest bounds + * for latency. + */ + + i915_add_request(request); + } + i915_wait_request(request, + I915_WAIT_LOCKED, + MAX_SCHEDULE_TIMEOUT); + + times[1] = ktime_sub(ktime_get_raw(), times[1]); + if (prime == 1) + times[0] = times[1]; + + if (__igt_timeout(end_time, NULL)) + break; + } + + err = end_live_test(&t); + if (err) + goto out_unlock; + + pr_info("Request latencies on %s: 1 = %lluns, %lu = %lluns\n", + engine->name, + ktime_to_ns(times[0]), + prime, div64_u64(ktime_to_ns(times[1]), prime)); + } + +out_unlock: + mutex_unlock(&i915->drm.struct_mutex); + return err; +} + +int i915_gem_request_live_selftests(struct drm_i915_private *i915) +{ + static const struct i915_subtest tests[] = { + SUBTEST(live_nop_request), + }; + return i915_subtests(tests, i915); +} diff --git a/drivers/gpu/drm/i915/selftests/i915_live_selftests.h b/drivers/gpu/drm/i915/selftests/i915_live_selftests.h index f3e17cb10e05..09bf538826df 100644 --- a/drivers/gpu/drm/i915/selftests/i915_live_selftests.h +++ b/drivers/gpu/drm/i915/selftests/i915_live_selftests.h @@ -9,3 +9,4 @@ * Tests are executed in order by igt/drv_selftest */ selftest(sanitycheck, i915_live_sanitycheck) /* keep first (igt selfcheck) */ +selftest(requests, i915_gem_request_live_selftests) -- cgit From cf8be13df2ce232ccf0a7a7220613b476f7fedfa Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Mon, 13 Feb 2017 17:15:25 +0000 Subject: drm/i915: Test simultaneously submitting requests to all engines Use a recursive-batch to busy spin on each to ensure that each is being run simultaneously. Signed-off-by: Chris Wilson Reviewed-by: Joonas Lahtinen Link: http://patchwork.freedesktop.org/patch/msgid/20170213171558.20942-14-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/selftests/i915_gem_request.c | 184 ++++++++++++++++++++++ 1 file changed, 184 insertions(+) diff --git a/drivers/gpu/drm/i915/selftests/i915_gem_request.c b/drivers/gpu/drm/i915/selftests/i915_gem_request.c index 610f89e3696c..880a1e82393a 100644 --- a/drivers/gpu/drm/i915/selftests/i915_gem_request.c +++ b/drivers/gpu/drm/i915/selftests/i915_gem_request.c @@ -338,10 +338,194 @@ out_unlock: return err; } +static struct i915_vma *recursive_batch(struct drm_i915_private *i915) +{ + struct i915_gem_context *ctx = i915->kernel_context; + struct i915_address_space *vm = ctx->ppgtt ? &ctx->ppgtt->base : &i915->ggtt.base; + struct drm_i915_gem_object *obj; + const int gen = INTEL_GEN(i915); + struct i915_vma *vma; + u32 *cmd; + int err; + + obj = i915_gem_object_create_internal(i915, PAGE_SIZE); + if (IS_ERR(obj)) + return ERR_CAST(obj); + + vma = i915_vma_instance(obj, vm, NULL); + if (IS_ERR(vma)) { + err = PTR_ERR(vma); + goto err; + } + + err = i915_vma_pin(vma, 0, 0, PIN_USER); + if (err) + goto err; + + err = i915_gem_object_set_to_gtt_domain(obj, true); + if (err) + goto err; + + cmd = i915_gem_object_pin_map(obj, I915_MAP_WC); + if (IS_ERR(cmd)) { + err = PTR_ERR(cmd); + goto err; + } + + if (gen >= 8) { + *cmd++ = MI_BATCH_BUFFER_START | 1 << 8 | 1; + *cmd++ = lower_32_bits(vma->node.start); + *cmd++ = upper_32_bits(vma->node.start); + } else if (gen >= 6) { + *cmd++ = MI_BATCH_BUFFER_START | 1 << 8; + *cmd++ = lower_32_bits(vma->node.start); + } else if (gen >= 4) { + *cmd++ = MI_BATCH_BUFFER_START | MI_BATCH_GTT; + *cmd++ = lower_32_bits(vma->node.start); + } else { + *cmd++ = MI_BATCH_BUFFER_START | MI_BATCH_GTT | 1; + *cmd++ = lower_32_bits(vma->node.start); + } + *cmd++ = MI_BATCH_BUFFER_END; /* terminate early in case of error */ + + wmb(); + i915_gem_object_unpin_map(obj); + + return vma; + +err: + i915_gem_object_put(obj); + return ERR_PTR(err); +} + +static int recursive_batch_resolve(struct i915_vma *batch) +{ + u32 *cmd; + + cmd = i915_gem_object_pin_map(batch->obj, I915_MAP_WC); + if (IS_ERR(cmd)) + return PTR_ERR(cmd); + + *cmd = MI_BATCH_BUFFER_END; + wmb(); + + i915_gem_object_unpin_map(batch->obj); + + return 0; +} + +static int live_all_engines(void *arg) +{ + struct drm_i915_private *i915 = arg; + struct intel_engine_cs *engine; + struct drm_i915_gem_request *request[I915_NUM_ENGINES]; + struct i915_vma *batch; + struct live_test t; + unsigned int id; + int err; + + /* Check we can submit requests to all engines simultaneously. We + * send a recursive batch to each engine - checking that we don't + * block doing so, and that they don't complete too soon. + */ + + mutex_lock(&i915->drm.struct_mutex); + + err = begin_live_test(&t, i915, __func__, ""); + if (err) + goto out_unlock; + + batch = recursive_batch(i915); + if (IS_ERR(batch)) { + err = PTR_ERR(batch); + pr_err("%s: Unable to create batch, err=%d\n", __func__, err); + goto out_unlock; + } + + for_each_engine(engine, i915, id) { + request[id] = i915_gem_request_alloc(engine, + i915->kernel_context); + if (IS_ERR(request[id])) { + err = PTR_ERR(request[id]); + pr_err("%s: Request allocation failed with err=%d\n", + __func__, err); + goto out_request; + } + + err = engine->emit_flush(request[id], EMIT_INVALIDATE); + GEM_BUG_ON(err); + + err = i915_switch_context(request[id]); + GEM_BUG_ON(err); + + err = engine->emit_bb_start(request[id], + batch->node.start, + batch->node.size, + 0); + GEM_BUG_ON(err); + request[id]->batch = batch; + + if (!i915_gem_object_has_active_reference(batch->obj)) { + i915_gem_object_get(batch->obj); + i915_gem_object_set_active_reference(batch->obj); + } + + i915_vma_move_to_active(batch, request[id], 0); + i915_gem_request_get(request[id]); + i915_add_request(request[id]); + } + + for_each_engine(engine, i915, id) { + if (i915_gem_request_completed(request[id])) { + pr_err("%s(%s): request completed too early!\n", + __func__, engine->name); + err = -EINVAL; + goto out_request; + } + } + + err = recursive_batch_resolve(batch); + if (err) { + pr_err("%s: failed to resolve batch, err=%d\n", __func__, err); + goto out_request; + } + + for_each_engine(engine, i915, id) { + long timeout; + + timeout = i915_wait_request(request[id], + I915_WAIT_LOCKED, + MAX_SCHEDULE_TIMEOUT); + if (timeout < 0) { + err = timeout; + pr_err("%s: error waiting for request on %s, err=%d\n", + __func__, engine->name, err); + goto out_request; + } + + GEM_BUG_ON(!i915_gem_request_completed(request[id])); + i915_gem_request_put(request[id]); + request[id] = NULL; + } + + err = end_live_test(&t); + +out_request: + for_each_engine(engine, i915, id) + if (request[id]) + i915_gem_request_put(request[id]); + i915_vma_unpin(batch); + i915_vma_put(batch); +out_unlock: + mutex_unlock(&i915->drm.struct_mutex); + return err; +} + int i915_gem_request_live_selftests(struct drm_i915_private *i915) { static const struct i915_subtest tests[] = { SUBTEST(live_nop_request), + SUBTEST(live_all_engines), }; return i915_subtests(tests, i915); } -- cgit From 97b592b11a262356079aaa66eeb97c077515212c Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Mon, 13 Feb 2017 17:15:26 +0000 Subject: drm/i915: Test request ordering between engines A request on one engine with a dependency on a request on another engine must wait for completion of the first request before starting. Signed-off-by: Chris Wilson Reviewed-by: Joonas Lahtinen Link: http://patchwork.freedesktop.org/patch/msgid/20170213171558.20942-15-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/selftests/i915_gem_request.c | 132 ++++++++++++++++++++++ 1 file changed, 132 insertions(+) diff --git a/drivers/gpu/drm/i915/selftests/i915_gem_request.c b/drivers/gpu/drm/i915/selftests/i915_gem_request.c index 880a1e82393a..e946488d6af2 100644 --- a/drivers/gpu/drm/i915/selftests/i915_gem_request.c +++ b/drivers/gpu/drm/i915/selftests/i915_gem_request.c @@ -521,11 +521,143 @@ out_unlock: return err; } +static int live_sequential_engines(void *arg) +{ + struct drm_i915_private *i915 = arg; + struct drm_i915_gem_request *request[I915_NUM_ENGINES] = {}; + struct drm_i915_gem_request *prev = NULL; + struct intel_engine_cs *engine; + struct live_test t; + unsigned int id; + int err; + + /* Check we can submit requests to all engines sequentially, such + * that each successive request waits for the earlier ones. This + * tests that we don't execute requests out of order, even though + * they are running on independent engines. + */ + + mutex_lock(&i915->drm.struct_mutex); + + err = begin_live_test(&t, i915, __func__, ""); + if (err) + goto out_unlock; + + for_each_engine(engine, i915, id) { + struct i915_vma *batch; + + batch = recursive_batch(i915); + if (IS_ERR(batch)) { + err = PTR_ERR(batch); + pr_err("%s: Unable to create batch for %s, err=%d\n", + __func__, engine->name, err); + goto out_unlock; + } + + request[id] = i915_gem_request_alloc(engine, + i915->kernel_context); + if (IS_ERR(request[id])) { + err = PTR_ERR(request[id]); + pr_err("%s: Request allocation failed for %s with err=%d\n", + __func__, engine->name, err); + goto out_request; + } + + if (prev) { + err = i915_gem_request_await_dma_fence(request[id], + &prev->fence); + if (err) { + i915_add_request(request[id]); + pr_err("%s: Request await failed for %s with err=%d\n", + __func__, engine->name, err); + goto out_request; + } + } + + err = engine->emit_flush(request[id], EMIT_INVALIDATE); + GEM_BUG_ON(err); + + err = i915_switch_context(request[id]); + GEM_BUG_ON(err); + + err = engine->emit_bb_start(request[id], + batch->node.start, + batch->node.size, + 0); + GEM_BUG_ON(err); + request[id]->batch = batch; + + i915_vma_move_to_active(batch, request[id], 0); + i915_gem_object_set_active_reference(batch->obj); + i915_vma_get(batch); + + i915_gem_request_get(request[id]); + i915_add_request(request[id]); + + prev = request[id]; + } + + for_each_engine(engine, i915, id) { + long timeout; + + if (i915_gem_request_completed(request[id])) { + pr_err("%s(%s): request completed too early!\n", + __func__, engine->name); + err = -EINVAL; + goto out_request; + } + + err = recursive_batch_resolve(request[id]->batch); + if (err) { + pr_err("%s: failed to resolve batch, err=%d\n", + __func__, err); + goto out_request; + } + + timeout = i915_wait_request(request[id], + I915_WAIT_LOCKED, + MAX_SCHEDULE_TIMEOUT); + if (timeout < 0) { + err = timeout; + pr_err("%s: error waiting for request on %s, err=%d\n", + __func__, engine->name, err); + goto out_request; + } + + GEM_BUG_ON(!i915_gem_request_completed(request[id])); + } + + err = end_live_test(&t); + +out_request: + for_each_engine(engine, i915, id) { + u32 *cmd; + + if (!request[id]) + break; + + cmd = i915_gem_object_pin_map(request[id]->batch->obj, + I915_MAP_WC); + if (!IS_ERR(cmd)) { + *cmd = MI_BATCH_BUFFER_END; + wmb(); + i915_gem_object_unpin_map(request[id]->batch->obj); + } + + i915_vma_put(request[id]->batch); + i915_gem_request_put(request[id]); + } +out_unlock: + mutex_unlock(&i915->drm.struct_mutex); + return err; +} + int i915_gem_request_live_selftests(struct drm_i915_private *i915) { static const struct i915_subtest tests[] = { SUBTEST(live_nop_request), SUBTEST(live_all_engines), + SUBTEST(live_sequential_engines), }; return i915_subtests(tests, i915); } -- cgit From cd3862dc6e89817dbeefa02881400da926ffceeb Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Mon, 13 Feb 2017 17:15:27 +0000 Subject: drm/i915: Live testing of empty requests Primarily to emphasize the difference between just advancing the breadcrumb using a bare request and the overhead of dispatching an execbuffer. Signed-off-by: Chris Wilson Reviewed-by: Joonas Lahtinen Link: http://patchwork.freedesktop.org/patch/msgid/20170213171558.20942-16-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/selftests/i915_gem_request.c | 155 ++++++++++++++++++++++ 1 file changed, 155 insertions(+) diff --git a/drivers/gpu/drm/i915/selftests/i915_gem_request.c b/drivers/gpu/drm/i915/selftests/i915_gem_request.c index e946488d6af2..9d056a86723d 100644 --- a/drivers/gpu/drm/i915/selftests/i915_gem_request.c +++ b/drivers/gpu/drm/i915/selftests/i915_gem_request.c @@ -338,6 +338,160 @@ out_unlock: return err; } +static struct i915_vma *empty_batch(struct drm_i915_private *i915) +{ + struct drm_i915_gem_object *obj; + struct i915_vma *vma; + u32 *cmd; + int err; + + obj = i915_gem_object_create_internal(i915, PAGE_SIZE); + if (IS_ERR(obj)) + return ERR_CAST(obj); + + cmd = i915_gem_object_pin_map(obj, I915_MAP_WB); + if (IS_ERR(cmd)) { + err = PTR_ERR(cmd); + goto err; + } + *cmd = MI_BATCH_BUFFER_END; + i915_gem_object_unpin_map(obj); + + err = i915_gem_object_set_to_gtt_domain(obj, false); + if (err) + goto err; + + vma = i915_vma_instance(obj, &i915->ggtt.base, NULL); + if (IS_ERR(vma)) { + err = PTR_ERR(vma); + goto err; + } + + err = i915_vma_pin(vma, 0, 0, PIN_USER | PIN_GLOBAL); + if (err) + goto err; + + return vma; + +err: + i915_gem_object_put(obj); + return ERR_PTR(err); +} + +static struct drm_i915_gem_request * +empty_request(struct intel_engine_cs *engine, + struct i915_vma *batch) +{ + struct drm_i915_gem_request *request; + int err; + + request = i915_gem_request_alloc(engine, + engine->i915->kernel_context); + if (IS_ERR(request)) + return request; + + err = engine->emit_flush(request, EMIT_INVALIDATE); + if (err) + goto out_request; + + err = i915_switch_context(request); + if (err) + goto out_request; + + err = engine->emit_bb_start(request, + batch->node.start, + batch->node.size, + I915_DISPATCH_SECURE); + if (err) + goto out_request; + +out_request: + __i915_add_request(request, err == 0); + return err ? ERR_PTR(err) : request; +} + +static int live_empty_request(void *arg) +{ + struct drm_i915_private *i915 = arg; + struct intel_engine_cs *engine; + struct live_test t; + struct i915_vma *batch; + unsigned int id; + int err = 0; + + /* Submit various sized batches of empty requests, to each engine + * (individually), and wait for the batch to complete. We can check + * the overhead of submitting requests to the hardware. + */ + + mutex_lock(&i915->drm.struct_mutex); + + batch = empty_batch(i915); + if (IS_ERR(batch)) { + err = PTR_ERR(batch); + goto out_unlock; + } + + for_each_engine(engine, i915, id) { + IGT_TIMEOUT(end_time); + struct drm_i915_gem_request *request; + unsigned long n, prime; + ktime_t times[2] = {}; + + err = begin_live_test(&t, i915, __func__, engine->name); + if (err) + goto out_batch; + + /* Warmup / preload */ + request = empty_request(engine, batch); + if (IS_ERR(request)) { + err = PTR_ERR(request); + goto out_batch; + } + i915_wait_request(request, + I915_WAIT_LOCKED, + MAX_SCHEDULE_TIMEOUT); + + for_each_prime_number_from(prime, 1, 8192) { + times[1] = ktime_get_raw(); + + for (n = 0; n < prime; n++) { + request = empty_request(engine, batch); + if (IS_ERR(request)) { + err = PTR_ERR(request); + goto out_batch; + } + } + i915_wait_request(request, + I915_WAIT_LOCKED, + MAX_SCHEDULE_TIMEOUT); + + times[1] = ktime_sub(ktime_get_raw(), times[1]); + if (prime == 1) + times[0] = times[1]; + + if (__igt_timeout(end_time, NULL)) + break; + } + + err = end_live_test(&t); + if (err) + goto out_batch; + + pr_info("Batch latencies on %s: 1 = %lluns, %lu = %lluns\n", + engine->name, + ktime_to_ns(times[0]), + prime, div64_u64(ktime_to_ns(times[1]), prime)); + } + +out_batch: + i915_vma_unpin(batch); + i915_vma_put(batch); +out_unlock: + mutex_unlock(&i915->drm.struct_mutex); + return err; +} + static struct i915_vma *recursive_batch(struct drm_i915_private *i915) { struct i915_gem_context *ctx = i915->kernel_context; @@ -658,6 +812,7 @@ int i915_gem_request_live_selftests(struct drm_i915_private *i915) SUBTEST(live_nop_request), SUBTEST(live_all_engines), SUBTEST(live_sequential_engines), + SUBTEST(live_empty_request), }; return i915_subtests(tests, i915); } -- cgit From 8335fd65ce6c880876ea0f94f427aa2499bcd05a Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Mon, 13 Feb 2017 17:15:28 +0000 Subject: drm/i915: Add selftests for object allocation, phys The phys object is a rarely used device (only very old machines require a chunk of physically contiguous pages for a few hardware interactions). As such, it is not exercised by CI and to combat that we want to add a test that exercises the phys object on all platforms. v2: Always set err on error paths and not rely on inheriting the err. Signed-off-by: Chris Wilson Reviewed-by: Tvrtko Ursulin Link: http://patchwork.freedesktop.org/patch/msgid/20170213171558.20942-17-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/i915_gem.c | 1 + drivers/gpu/drm/i915/selftests/i915_gem_object.c | 120 +++++++++++++++++++++ .../gpu/drm/i915/selftests/i915_mock_selftests.h | 1 + 3 files changed, 122 insertions(+) create mode 100644 drivers/gpu/drm/i915/selftests/i915_gem_object.c diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index 59e10b364ad0..ec455f47958a 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -5025,4 +5025,5 @@ i915_gem_object_get_dma_address(struct drm_i915_gem_object *obj, #include "selftests/scatterlist.c" #include "selftests/mock_gem_device.c" #include "selftests/huge_gem_object.c" +#include "selftests/i915_gem_object.c" #endif diff --git a/drivers/gpu/drm/i915/selftests/i915_gem_object.c b/drivers/gpu/drm/i915/selftests/i915_gem_object.c new file mode 100644 index 000000000000..0d76c5e0e2ff --- /dev/null +++ b/drivers/gpu/drm/i915/selftests/i915_gem_object.c @@ -0,0 +1,120 @@ +/* + * Copyright © 2016 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + * + */ + +#include "../i915_selftest.h" + +#include "mock_gem_device.h" + +static int igt_gem_object(void *arg) +{ + struct drm_i915_private *i915 = arg; + struct drm_i915_gem_object *obj; + int err = -ENOMEM; + + /* Basic test to ensure we can create an object */ + + obj = i915_gem_object_create(i915, PAGE_SIZE); + if (IS_ERR(obj)) { + err = PTR_ERR(obj); + pr_err("i915_gem_object_create failed, err=%d\n", err); + goto out; + } + + err = 0; + i915_gem_object_put(obj); +out: + return err; +} + +static int igt_phys_object(void *arg) +{ + struct drm_i915_private *i915 = arg; + struct drm_i915_gem_object *obj; + int err; + + /* Create an object and bind it to a contiguous set of physical pages, + * i.e. exercise the i915_gem_object_phys API. + */ + + obj = i915_gem_object_create(i915, PAGE_SIZE); + if (IS_ERR(obj)) { + err = PTR_ERR(obj); + pr_err("i915_gem_object_create failed, err=%d\n", err); + goto out; + } + + mutex_lock(&i915->drm.struct_mutex); + err = i915_gem_object_attach_phys(obj, PAGE_SIZE); + mutex_unlock(&i915->drm.struct_mutex); + if (err) { + pr_err("i915_gem_object_attach_phys failed, err=%d\n", err); + goto out_obj; + } + + if (obj->ops != &i915_gem_phys_ops) { + pr_err("i915_gem_object_attach_phys did not create a phys object\n"); + err = -EINVAL; + goto out_obj; + } + + if (!atomic_read(&obj->mm.pages_pin_count)) { + pr_err("i915_gem_object_attach_phys did not pin its phys pages\n"); + err = -EINVAL; + goto out_obj; + } + + /* Make the object dirty so that put_pages must do copy back the data */ + mutex_lock(&i915->drm.struct_mutex); + err = i915_gem_object_set_to_gtt_domain(obj, true); + mutex_unlock(&i915->drm.struct_mutex); + if (err) { + pr_err("i915_gem_object_set_to_gtt_domain failed with err=%d\n", + err); + goto out_obj; + } + +out_obj: + i915_gem_object_put(obj); +out: + return err; +} + +int i915_gem_object_mock_selftests(void) +{ + static const struct i915_subtest tests[] = { + SUBTEST(igt_gem_object), + SUBTEST(igt_phys_object), + }; + struct drm_i915_private *i915; + int err; + + i915 = mock_gem_device(); + if (!i915) + return -ENOMEM; + + err = i915_subtests(tests, i915); + + drm_dev_unref(&i915->drm); + return err; +} diff --git a/drivers/gpu/drm/i915/selftests/i915_mock_selftests.h b/drivers/gpu/drm/i915/selftests/i915_mock_selftests.h index bda982404ad3..2ed94e3a71b7 100644 --- a/drivers/gpu/drm/i915/selftests/i915_mock_selftests.h +++ b/drivers/gpu/drm/i915/selftests/i915_mock_selftests.h @@ -12,3 +12,4 @@ selftest(sanitycheck, i915_mock_sanitycheck) /* keep first (igt selfcheck) */ selftest(scatterlist, scatterlist_mock_selftests) selftest(breadcrumbs, intel_breadcrumbs_mock_selftests) selftest(requests, i915_gem_request_mock_selftests) +selftest(objects, i915_gem_object_mock_selftests) -- cgit From 12d30d879398acf14cb2d5300a1c4b2ec777ee02 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Mon, 13 Feb 2017 17:15:29 +0000 Subject: drm/i915: Add a live seftest for GEM objects Starting with a placeholder test just to reassure that we can create a test object, Signed-off-by: Chris Wilson Reviewed-by: Matthew Auld Link: http://patchwork.freedesktop.org/patch/msgid/20170213171558.20942-18-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/selftests/i915_gem_object.c | 49 ++++++++++++++++++++++ .../gpu/drm/i915/selftests/i915_live_selftests.h | 1 + 2 files changed, 50 insertions(+) diff --git a/drivers/gpu/drm/i915/selftests/i915_gem_object.c b/drivers/gpu/drm/i915/selftests/i915_gem_object.c index 0d76c5e0e2ff..895e1e85653f 100644 --- a/drivers/gpu/drm/i915/selftests/i915_gem_object.c +++ b/drivers/gpu/drm/i915/selftests/i915_gem_object.c @@ -100,6 +100,46 @@ out: return err; } +static int igt_gem_huge(void *arg) +{ + const unsigned int nreal = 509; /* just to be awkward */ + struct drm_i915_private *i915 = arg; + struct drm_i915_gem_object *obj; + unsigned int n; + int err; + + /* Basic sanitycheck of our huge fake object allocation */ + + obj = huge_gem_object(i915, + nreal * PAGE_SIZE, + i915->ggtt.base.total + PAGE_SIZE); + if (IS_ERR(obj)) + return PTR_ERR(obj); + + err = i915_gem_object_pin_pages(obj); + if (err) { + pr_err("Failed to allocate %u pages (%lu total), err=%d\n", + nreal, obj->base.size / PAGE_SIZE, err); + goto out; + } + + for (n = 0; n < obj->base.size / PAGE_SIZE; n++) { + if (i915_gem_object_get_page(obj, n) != + i915_gem_object_get_page(obj, n % nreal)) { + pr_err("Page lookup mismatch at index %u [%u]\n", + n, n % nreal); + err = -EINVAL; + goto out_unpin; + } + } + +out_unpin: + i915_gem_object_unpin_pages(obj); +out: + i915_gem_object_put(obj); + return err; +} + int i915_gem_object_mock_selftests(void) { static const struct i915_subtest tests[] = { @@ -118,3 +158,12 @@ int i915_gem_object_mock_selftests(void) drm_dev_unref(&i915->drm); return err; } + +int i915_gem_object_live_selftests(struct drm_i915_private *i915) +{ + static const struct i915_subtest tests[] = { + SUBTEST(igt_gem_huge), + }; + + return i915_subtests(tests, i915); +} diff --git a/drivers/gpu/drm/i915/selftests/i915_live_selftests.h b/drivers/gpu/drm/i915/selftests/i915_live_selftests.h index 09bf538826df..d720b03e2c01 100644 --- a/drivers/gpu/drm/i915/selftests/i915_live_selftests.h +++ b/drivers/gpu/drm/i915/selftests/i915_live_selftests.h @@ -10,3 +10,4 @@ */ selftest(sanitycheck, i915_live_sanitycheck) /* keep first (igt selfcheck) */ selftest(requests, i915_gem_request_live_selftests) +selftest(objects, i915_gem_object_live_selftests) -- cgit From 48d898172075e55cb6fdea52410795b4aeff1352 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Mon, 13 Feb 2017 17:15:30 +0000 Subject: drm/i915: Test partial mappings Create partial mappings to cover a large object, investigating tiling (fenced regions) and VMA reuse. Signed-off-by: Chris Wilson Reviewed-by: Matthew Auld Link: http://patchwork.freedesktop.org/patch/msgid/20170213171558.20942-19-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/selftests/i915_gem_object.c | 293 +++++++++++++++++++++++ 1 file changed, 293 insertions(+) diff --git a/drivers/gpu/drm/i915/selftests/i915_gem_object.c b/drivers/gpu/drm/i915/selftests/i915_gem_object.c index 895e1e85653f..99648f91194f 100644 --- a/drivers/gpu/drm/i915/selftests/i915_gem_object.c +++ b/drivers/gpu/drm/i915/selftests/i915_gem_object.c @@ -140,6 +140,298 @@ out: return err; } +struct tile { + unsigned int width; + unsigned int height; + unsigned int stride; + unsigned int size; + unsigned int tiling; + unsigned int swizzle; +}; + +static u64 swizzle_bit(unsigned int bit, u64 offset) +{ + return (offset & BIT_ULL(bit)) >> (bit - 6); +} + +static u64 tiled_offset(const struct tile *tile, u64 v) +{ + u64 x, y; + + if (tile->tiling == I915_TILING_NONE) + return v; + + y = div64_u64_rem(v, tile->stride, &x); + v = div64_u64_rem(y, tile->height, &y) * tile->stride * tile->height; + + if (tile->tiling == I915_TILING_X) { + v += y * tile->width; + v += div64_u64_rem(x, tile->width, &x) << tile->size; + v += x; + } else { + const unsigned int ytile_span = 16; + const unsigned int ytile_height = 32 * ytile_span; + + v += y * ytile_span; + v += div64_u64_rem(x, ytile_span, &x) * ytile_height; + v += x; + } + + switch (tile->swizzle) { + case I915_BIT_6_SWIZZLE_9: + v ^= swizzle_bit(9, v); + break; + case I915_BIT_6_SWIZZLE_9_10: + v ^= swizzle_bit(9, v) ^ swizzle_bit(10, v); + break; + case I915_BIT_6_SWIZZLE_9_11: + v ^= swizzle_bit(9, v) ^ swizzle_bit(11, v); + break; + case I915_BIT_6_SWIZZLE_9_10_11: + v ^= swizzle_bit(9, v) ^ swizzle_bit(10, v) ^ swizzle_bit(11, v); + break; + } + + return v; +} + +static int check_partial_mapping(struct drm_i915_gem_object *obj, + const struct tile *tile, + unsigned long end_time) +{ + const unsigned int nreal = obj->scratch / PAGE_SIZE; + const unsigned long npages = obj->base.size / PAGE_SIZE; + struct i915_vma *vma; + unsigned long page; + int err; + + if (igt_timeout(end_time, + "%s: timed out before tiling=%d stride=%d\n", + __func__, tile->tiling, tile->stride)) + return -EINTR; + + err = i915_gem_object_set_tiling(obj, tile->tiling, tile->stride); + if (err) + return err; + + GEM_BUG_ON(i915_gem_object_get_tiling(obj) != tile->tiling); + GEM_BUG_ON(i915_gem_object_get_stride(obj) != tile->stride); + + for_each_prime_number_from(page, 1, npages) { + struct i915_ggtt_view view = + compute_partial_view(obj, page, MIN_CHUNK_PAGES); + u32 __iomem *io; + struct page *p; + unsigned int n; + u64 offset; + u32 *cpu; + + GEM_BUG_ON(view.partial.size > nreal); + + err = i915_gem_object_set_to_gtt_domain(obj, true); + if (err) + return err; + + vma = i915_gem_object_ggtt_pin(obj, &view, 0, 0, PIN_MAPPABLE); + if (IS_ERR(vma)) { + pr_err("Failed to pin partial view: offset=%lu\n", + page); + return PTR_ERR(vma); + } + + n = page - view.partial.offset; + GEM_BUG_ON(n >= view.partial.size); + + io = i915_vma_pin_iomap(vma); + i915_vma_unpin(vma); + if (IS_ERR(io)) { + pr_err("Failed to iomap partial view: offset=%lu\n", + page); + return PTR_ERR(io); + } + + err = i915_vma_get_fence(vma); + if (err) { + pr_err("Failed to get fence for partial view: offset=%lu\n", + page); + i915_vma_unpin_iomap(vma); + return err; + } + + iowrite32(page, io + n * PAGE_SIZE/sizeof(*io)); + i915_vma_unpin_iomap(vma); + + offset = tiled_offset(tile, page << PAGE_SHIFT); + if (offset >= obj->base.size) + continue; + + i915_gem_object_flush_gtt_write_domain(obj); + + p = i915_gem_object_get_page(obj, offset >> PAGE_SHIFT); + cpu = kmap(p) + offset_in_page(offset); + drm_clflush_virt_range(cpu, sizeof(*cpu)); + if (*cpu != (u32)page) { + pr_err("Partial view for %lu [%u] (offset=%llu, size=%u [%llu, row size %u], fence=%d, tiling=%d, stride=%d) misalignment, expected write to page (%llu + %u [0x%llx]) of 0x%x, found 0x%x\n", + page, n, + view.partial.offset, + view.partial.size, + vma->size >> PAGE_SHIFT, + tile_row_pages(obj), + vma->fence ? vma->fence->id : -1, tile->tiling, tile->stride, + offset >> PAGE_SHIFT, + (unsigned int)offset_in_page(offset), + offset, + (u32)page, *cpu); + err = -EINVAL; + } + *cpu = 0; + drm_clflush_virt_range(cpu, sizeof(*cpu)); + kunmap(p); + if (err) + return err; + } + + return 0; +} + +static int igt_partial_tiling(void *arg) +{ + const unsigned int nreal = 1 << 12; /* largest tile row x2 */ + struct drm_i915_private *i915 = arg; + struct drm_i915_gem_object *obj; + int tiling; + int err; + + /* We want to check the page mapping and fencing of a large object + * mmapped through the GTT. The object we create is larger than can + * possibly be mmaped as a whole, and so we must use partial GGTT vma. + * We then check that a write through each partial GGTT vma ends up + * in the right set of pages within the object, and with the expected + * tiling, which we verify by manual swizzling. + */ + + obj = huge_gem_object(i915, + nreal << PAGE_SHIFT, + (1 + next_prime_number(i915->ggtt.base.total >> PAGE_SHIFT)) << PAGE_SHIFT); + if (IS_ERR(obj)) + return PTR_ERR(obj); + + err = i915_gem_object_pin_pages(obj); + if (err) { + pr_err("Failed to allocate %u pages (%lu total), err=%d\n", + nreal, obj->base.size / PAGE_SIZE, err); + goto out; + } + + mutex_lock(&i915->drm.struct_mutex); + + if (1) { + IGT_TIMEOUT(end); + struct tile tile; + + tile.height = 1; + tile.width = 1; + tile.size = 0; + tile.stride = 0; + tile.swizzle = I915_BIT_6_SWIZZLE_NONE; + tile.tiling = I915_TILING_NONE; + + err = check_partial_mapping(obj, &tile, end); + if (err && err != -EINTR) + goto out_unlock; + } + + for (tiling = I915_TILING_X; tiling <= I915_TILING_Y; tiling++) { + IGT_TIMEOUT(end); + unsigned int max_pitch; + unsigned int pitch; + struct tile tile; + + tile.tiling = tiling; + switch (tiling) { + case I915_TILING_X: + tile.swizzle = i915->mm.bit_6_swizzle_x; + break; + case I915_TILING_Y: + tile.swizzle = i915->mm.bit_6_swizzle_y; + break; + } + + if (tile.swizzle == I915_BIT_6_SWIZZLE_UNKNOWN || + tile.swizzle == I915_BIT_6_SWIZZLE_9_10_17) + continue; + + if (INTEL_GEN(i915) <= 2) { + tile.height = 16; + tile.width = 128; + tile.size = 11; + } else if (tile.tiling == I915_TILING_Y && + HAS_128_BYTE_Y_TILING(i915)) { + tile.height = 32; + tile.width = 128; + tile.size = 12; + } else { + tile.height = 8; + tile.width = 512; + tile.size = 12; + } + + if (INTEL_GEN(i915) < 4) + max_pitch = 8192 / tile.width; + else if (INTEL_GEN(i915) < 7) + max_pitch = 128 * I965_FENCE_MAX_PITCH_VAL / tile.width; + else + max_pitch = 128 * GEN7_FENCE_MAX_PITCH_VAL / tile.width; + + for (pitch = max_pitch; pitch; pitch >>= 1) { + tile.stride = tile.width * pitch; + err = check_partial_mapping(obj, &tile, end); + if (err == -EINTR) + goto next_tiling; + if (err) + goto out_unlock; + + if (pitch > 2 && INTEL_GEN(i915) >= 4) { + tile.stride = tile.width * (pitch - 1); + err = check_partial_mapping(obj, &tile, end); + if (err == -EINTR) + goto next_tiling; + if (err) + goto out_unlock; + } + + if (pitch < max_pitch && INTEL_GEN(i915) >= 4) { + tile.stride = tile.width * (pitch + 1); + err = check_partial_mapping(obj, &tile, end); + if (err == -EINTR) + goto next_tiling; + if (err) + goto out_unlock; + } + } + + if (INTEL_GEN(i915) >= 4) { + for_each_prime_number(pitch, max_pitch) { + tile.stride = tile.width * pitch; + err = check_partial_mapping(obj, &tile, end); + if (err == -EINTR) + goto next_tiling; + if (err) + goto out_unlock; + } + } + +next_tiling: ; + } + +out_unlock: + mutex_unlock(&i915->drm.struct_mutex); + i915_gem_object_unpin_pages(obj); +out: + i915_gem_object_put(obj); + return err; +} + int i915_gem_object_mock_selftests(void) { static const struct i915_subtest tests[] = { @@ -163,6 +455,7 @@ int i915_gem_object_live_selftests(struct drm_i915_private *i915) { static const struct i915_subtest tests[] = { SUBTEST(igt_gem_huge), + SUBTEST(igt_partial_tiling), }; return i915_subtests(tests, i915); -- cgit From 3d81d589d6e3c89b687771074f65cb8f3b59ccf3 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Mon, 13 Feb 2017 17:15:31 +0000 Subject: drm/i915: Test exhaustion of the mmap space An unlikely error condition that we can simulate by stealing most of the range before trying to insert new objects. Signed-off-by: Chris Wilson Reviewed-by: Matthew Auld Link: http://patchwork.freedesktop.org/patch/msgid/20170213171558.20942-20-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/selftests/i915_gem_object.c | 138 +++++++++++++++++++++++ 1 file changed, 138 insertions(+) diff --git a/drivers/gpu/drm/i915/selftests/i915_gem_object.c b/drivers/gpu/drm/i915/selftests/i915_gem_object.c index 99648f91194f..67d82bf1407f 100644 --- a/drivers/gpu/drm/i915/selftests/i915_gem_object.c +++ b/drivers/gpu/drm/i915/selftests/i915_gem_object.c @@ -25,6 +25,7 @@ #include "../i915_selftest.h" #include "mock_gem_device.h" +#include "huge_gem_object.h" static int igt_gem_object(void *arg) { @@ -432,6 +433,142 @@ out: return err; } +static int make_obj_busy(struct drm_i915_gem_object *obj) +{ + struct drm_i915_private *i915 = to_i915(obj->base.dev); + struct drm_i915_gem_request *rq; + struct i915_vma *vma; + int err; + + vma = i915_vma_instance(obj, &i915->ggtt.base, NULL); + if (IS_ERR(vma)) + return PTR_ERR(vma); + + err = i915_vma_pin(vma, 0, 0, PIN_USER); + if (err) + return err; + + rq = i915_gem_request_alloc(i915->engine[RCS], i915->kernel_context); + if (IS_ERR(rq)) { + i915_vma_unpin(vma); + return PTR_ERR(rq); + } + + i915_vma_move_to_active(vma, rq, 0); + i915_add_request(rq); + + i915_gem_object_set_active_reference(obj); + i915_vma_unpin(vma); + return 0; +} + +static bool assert_mmap_offset(struct drm_i915_private *i915, + unsigned long size, + int expected) +{ + struct drm_i915_gem_object *obj; + int err; + + obj = i915_gem_object_create_internal(i915, size); + if (IS_ERR(obj)) + return PTR_ERR(obj); + + err = i915_gem_object_create_mmap_offset(obj); + i915_gem_object_put(obj); + + return err == expected; +} + +static int igt_mmap_offset_exhaustion(void *arg) +{ + struct drm_i915_private *i915 = arg; + struct drm_mm *mm = &i915->drm.vma_offset_manager->vm_addr_space_mm; + struct drm_i915_gem_object *obj; + struct drm_mm_node resv, *hole; + u64 hole_start, hole_end; + int loop, err; + + /* Trim the device mmap space to only a page */ + memset(&resv, 0, sizeof(resv)); + drm_mm_for_each_hole(hole, mm, hole_start, hole_end) { + resv.start = hole_start; + resv.size = hole_end - hole_start - 1; /* PAGE_SIZE units */ + err = drm_mm_reserve_node(mm, &resv); + if (err) { + pr_err("Failed to trim VMA manager, err=%d\n", err); + return err; + } + break; + } + + /* Just fits! */ + if (!assert_mmap_offset(i915, PAGE_SIZE, 0)) { + pr_err("Unable to insert object into single page hole\n"); + err = -EINVAL; + goto out; + } + + /* Too large */ + if (!assert_mmap_offset(i915, 2*PAGE_SIZE, -ENOSPC)) { + pr_err("Unexpectedly succeeded in inserting too large object into single page hole\n"); + err = -EINVAL; + goto out; + } + + /* Fill the hole, further allocation attempts should then fail */ + obj = i915_gem_object_create_internal(i915, PAGE_SIZE); + if (IS_ERR(obj)) { + err = PTR_ERR(obj); + goto out; + } + + err = i915_gem_object_create_mmap_offset(obj); + if (err) { + pr_err("Unable to insert object into reclaimed hole\n"); + goto err_obj; + } + + if (!assert_mmap_offset(i915, PAGE_SIZE, -ENOSPC)) { + pr_err("Unexpectedly succeeded in inserting object into no holes!\n"); + err = -EINVAL; + goto err_obj; + } + + i915_gem_object_put(obj); + + /* Now fill with busy dead objects that we expect to reap */ + for (loop = 0; loop < 3; loop++) { + obj = i915_gem_object_create_internal(i915, PAGE_SIZE); + if (IS_ERR(obj)) { + err = PTR_ERR(obj); + goto out; + } + + mutex_lock(&i915->drm.struct_mutex); + err = make_obj_busy(obj); + mutex_unlock(&i915->drm.struct_mutex); + if (err) { + pr_err("[loop %d] Failed to busy the object\n", loop); + goto err_obj; + } + + GEM_BUG_ON(!i915_gem_object_is_active(obj)); + err = i915_gem_object_create_mmap_offset(obj); + if (err) { + pr_err("[loop %d] i915_gem_object_create_mmap_offset failed with err=%d\n", + loop, err); + goto out; + } + } + +out: + drm_mm_remove_node(&resv); + return err; +err_obj: + i915_gem_object_put(obj); + goto out; +} + int i915_gem_object_mock_selftests(void) { static const struct i915_subtest tests[] = { @@ -456,6 +593,7 @@ int i915_gem_object_live_selftests(struct drm_i915_private *i915) static const struct i915_subtest tests[] = { SUBTEST(igt_gem_huge), SUBTEST(igt_partial_tiling), + SUBTEST(igt_mmap_offset_exhaustion), }; return i915_subtests(tests, i915); -- cgit From 170594502cf591fd0789d7e5239937b1a87af4c6 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Mon, 13 Feb 2017 17:15:32 +0000 Subject: drm/i915: Test coherency of and barriers between cache domains Write into an object using WB, WC, GTT, and GPU paths and make sure that our internal API is sufficient to ensure coherent reads and writes. v2: Avoid invalid free upon allocation error Signed-off-by: Chris Wilson Reviewed-by: Matthew Auld Link: http://patchwork.freedesktop.org/patch/msgid/20170213171558.20942-21-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/i915_gem.c | 1 + drivers/gpu/drm/i915/i915_selftest.h | 2 + .../gpu/drm/i915/selftests/i915_gem_coherency.c | 384 +++++++++++++++++++++ .../gpu/drm/i915/selftests/i915_live_selftests.h | 1 + 4 files changed, 388 insertions(+) create mode 100644 drivers/gpu/drm/i915/selftests/i915_gem_coherency.c diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index ec455f47958a..82489ed10373 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -5026,4 +5026,5 @@ i915_gem_object_get_dma_address(struct drm_i915_gem_object *obj, #include "selftests/mock_gem_device.c" #include "selftests/huge_gem_object.c" #include "selftests/i915_gem_object.c" +#include "selftests/i915_gem_coherency.c" #endif diff --git a/drivers/gpu/drm/i915/i915_selftest.h b/drivers/gpu/drm/i915/i915_selftest.h index 8b5994caa301..e43777b72211 100644 --- a/drivers/gpu/drm/i915/i915_selftest.h +++ b/drivers/gpu/drm/i915/i915_selftest.h @@ -99,4 +99,6 @@ bool __igt_timeout(unsigned long timeout, const char *fmt, ...); #define igt_timeout(t, fmt, ...) \ __igt_timeout((t), KERN_WARNING pr_fmt(fmt), ##__VA_ARGS__) +#define igt_can_mi_store_dword_imm(D) (INTEL_GEN(D) > 2) + #endif /* !__I915_SELFTEST_H__ */ diff --git a/drivers/gpu/drm/i915/selftests/i915_gem_coherency.c b/drivers/gpu/drm/i915/selftests/i915_gem_coherency.c new file mode 100644 index 000000000000..483cbe02d983 --- /dev/null +++ b/drivers/gpu/drm/i915/selftests/i915_gem_coherency.c @@ -0,0 +1,384 @@ +/* + * Copyright © 2017 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + * + */ + +#include + +#include "../i915_selftest.h" +#include "i915_random.h" + +static int cpu_set(struct drm_i915_gem_object *obj, + unsigned long offset, + u32 v) +{ + unsigned int needs_clflush; + struct page *page; + typeof(v) *map; + int err; + + err = i915_gem_obj_prepare_shmem_write(obj, &needs_clflush); + if (err) + return err; + + page = i915_gem_object_get_page(obj, offset >> PAGE_SHIFT); + map = kmap_atomic(page); + if (needs_clflush & CLFLUSH_BEFORE) + clflush(map+offset_in_page(offset) / sizeof(*map)); + map[offset_in_page(offset) / sizeof(*map)] = v; + if (needs_clflush & CLFLUSH_AFTER) + clflush(map+offset_in_page(offset) / sizeof(*map)); + kunmap_atomic(map); + + i915_gem_obj_finish_shmem_access(obj); + return 0; +} + +static int cpu_get(struct drm_i915_gem_object *obj, + unsigned long offset, + u32 *v) +{ + unsigned int needs_clflush; + struct page *page; + typeof(v) map; + int err; + + err = i915_gem_obj_prepare_shmem_read(obj, &needs_clflush); + if (err) + return err; + + page = i915_gem_object_get_page(obj, offset >> PAGE_SHIFT); + map = kmap_atomic(page); + if (needs_clflush & CLFLUSH_BEFORE) + clflush(map+offset_in_page(offset) / sizeof(*map)); + *v = map[offset_in_page(offset) / sizeof(*map)]; + kunmap_atomic(map); + + i915_gem_obj_finish_shmem_access(obj); + return 0; +} + +static int gtt_set(struct drm_i915_gem_object *obj, + unsigned long offset, + u32 v) +{ + struct i915_vma *vma; + typeof(v) *map; + int err; + + err = i915_gem_object_set_to_gtt_domain(obj, true); + if (err) + return err; + + vma = i915_gem_object_ggtt_pin(obj, NULL, 0, 0, PIN_MAPPABLE); + if (IS_ERR(vma)) + return PTR_ERR(vma); + + map = i915_vma_pin_iomap(vma); + i915_vma_unpin(vma); + if (IS_ERR(map)) + return PTR_ERR(map); + + map[offset / sizeof(*map)] = v; + i915_vma_unpin_iomap(vma); + + return 0; +} + +static int gtt_get(struct drm_i915_gem_object *obj, + unsigned long offset, + u32 *v) +{ + struct i915_vma *vma; + typeof(v) map; + int err; + + err = i915_gem_object_set_to_gtt_domain(obj, false); + if (err) + return err; + + vma = i915_gem_object_ggtt_pin(obj, NULL, 0, 0, PIN_MAPPABLE); + if (IS_ERR(vma)) + return PTR_ERR(vma); + + map = i915_vma_pin_iomap(vma); + i915_vma_unpin(vma); + if (IS_ERR(map)) + return PTR_ERR(map); + + *v = map[offset / sizeof(*map)]; + i915_vma_unpin_iomap(vma); + + return 0; +} + +static int wc_set(struct drm_i915_gem_object *obj, + unsigned long offset, + u32 v) +{ + typeof(v) *map; + int err; + + /* XXX GTT write followed by WC write go missing */ + i915_gem_object_flush_gtt_write_domain(obj); + + err = i915_gem_object_set_to_gtt_domain(obj, true); + if (err) + return err; + + map = i915_gem_object_pin_map(obj, I915_MAP_WC); + if (IS_ERR(map)) + return PTR_ERR(map); + + map[offset / sizeof(*map)] = v; + i915_gem_object_unpin_map(obj); + + return 0; +} + +static int wc_get(struct drm_i915_gem_object *obj, + unsigned long offset, + u32 *v) +{ + typeof(v) map; + int err; + + /* XXX WC write followed by GTT write go missing */ + i915_gem_object_flush_gtt_write_domain(obj); + + err = i915_gem_object_set_to_gtt_domain(obj, false); + if (err) + return err; + + map = i915_gem_object_pin_map(obj, I915_MAP_WC); + if (IS_ERR(map)) + return PTR_ERR(map); + + *v = map[offset / sizeof(*map)]; + i915_gem_object_unpin_map(obj); + + return 0; +} + +static int gpu_set(struct drm_i915_gem_object *obj, + unsigned long offset, + u32 v) +{ + struct drm_i915_private *i915 = to_i915(obj->base.dev); + struct drm_i915_gem_request *rq; + struct i915_vma *vma; + int err; + + err = i915_gem_object_set_to_gtt_domain(obj, true); + if (err) + return err; + + vma = i915_gem_object_ggtt_pin(obj, NULL, 0, 0, 0); + if (IS_ERR(vma)) + return PTR_ERR(vma); + + rq = i915_gem_request_alloc(i915->engine[RCS], i915->kernel_context); + if (IS_ERR(rq)) { + i915_vma_unpin(vma); + return PTR_ERR(rq); + } + + err = intel_ring_begin(rq, 4); + if (err) { + __i915_add_request(rq, false); + i915_vma_unpin(vma); + return err; + } + + if (INTEL_GEN(i915) >= 8) { + intel_ring_emit(rq->ring, MI_STORE_DWORD_IMM_GEN4 | 1 << 22); + intel_ring_emit(rq->ring, lower_32_bits(i915_ggtt_offset(vma) + offset)); + intel_ring_emit(rq->ring, upper_32_bits(i915_ggtt_offset(vma) + offset)); + intel_ring_emit(rq->ring, v); + } else if (INTEL_GEN(i915) >= 4) { + intel_ring_emit(rq->ring, MI_STORE_DWORD_IMM_GEN4 | 1 << 22); + intel_ring_emit(rq->ring, 0); + intel_ring_emit(rq->ring, i915_ggtt_offset(vma) + offset); + intel_ring_emit(rq->ring, v); + } else { + intel_ring_emit(rq->ring, MI_STORE_DWORD_IMM | 1 << 22); + intel_ring_emit(rq->ring, i915_ggtt_offset(vma) + offset); + intel_ring_emit(rq->ring, v); + intel_ring_emit(rq->ring, MI_NOOP); + } + intel_ring_advance(rq->ring); + + i915_vma_move_to_active(vma, rq, EXEC_OBJECT_WRITE); + i915_vma_unpin(vma); + + reservation_object_lock(obj->resv, NULL); + reservation_object_add_excl_fence(obj->resv, &rq->fence); + reservation_object_unlock(obj->resv); + + __i915_add_request(rq, true); + + return 0; +} + +static bool always_valid(struct drm_i915_private *i915) +{ + return true; +} + +static bool needs_mi_store_dword(struct drm_i915_private *i915) +{ + return igt_can_mi_store_dword_imm(i915); +} + +static const struct igt_coherency_mode { + const char *name; + int (*set)(struct drm_i915_gem_object *, unsigned long offset, u32 v); + int (*get)(struct drm_i915_gem_object *, unsigned long offset, u32 *v); + bool (*valid)(struct drm_i915_private *i915); +} igt_coherency_mode[] = { + { "cpu", cpu_set, cpu_get, always_valid }, + { "gtt", gtt_set, gtt_get, always_valid }, + { "wc", wc_set, wc_get, always_valid }, + { "gpu", gpu_set, NULL, needs_mi_store_dword }, + { }, +}; + +static int igt_gem_coherency(void *arg) +{ + const unsigned int ncachelines = PAGE_SIZE/64; + I915_RND_STATE(prng); + struct drm_i915_private *i915 = arg; + const struct igt_coherency_mode *read, *write, *over; + struct drm_i915_gem_object *obj; + unsigned long count, n; + u32 *offsets, *values; + int err; + + /* We repeatedly write, overwrite and read from a sequence of + * cachelines in order to try and detect incoherency (unflushed writes + * from either the CPU or GPU). Each setter/getter uses our cache + * domain API which should prevent incoherency. + */ + + offsets = kmalloc_array(ncachelines, 2*sizeof(u32), GFP_KERNEL); + if (!offsets) + return -ENOMEM; + for (count = 0; count < ncachelines; count++) + offsets[count] = count * 64 + 4 * (count % 16); + + values = offsets + ncachelines; + + mutex_lock(&i915->drm.struct_mutex); + for (over = igt_coherency_mode; over->name; over++) { + if (!over->set) + continue; + + if (!over->valid(i915)) + continue; + + for (write = igt_coherency_mode; write->name; write++) { + if (!write->set) + continue; + + if (!write->valid(i915)) + continue; + + for (read = igt_coherency_mode; read->name; read++) { + if (!read->get) + continue; + + if (!read->valid(i915)) + continue; + + for_each_prime_number_from(count, 1, ncachelines) { + obj = i915_gem_object_create_internal(i915, PAGE_SIZE); + if (IS_ERR(obj)) { + err = PTR_ERR(obj); + goto unlock; + } + + i915_random_reorder(offsets, ncachelines, &prng); + for (n = 0; n < count; n++) + values[n] = prandom_u32_state(&prng); + + for (n = 0; n < count; n++) { + err = over->set(obj, offsets[n], ~values[n]); + if (err) { + pr_err("Failed to set stale value[%ld/%ld] in object using %s, err=%d\n", + n, count, over->name, err); + goto put_object; + } + } + + for (n = 0; n < count; n++) { + err = write->set(obj, offsets[n], values[n]); + if (err) { + pr_err("Failed to set value[%ld/%ld] in object using %s, err=%d\n", + n, count, write->name, err); + goto put_object; + } + } + + for (n = 0; n < count; n++) { + u32 found; + + err = read->get(obj, offsets[n], &found); + if (err) { + pr_err("Failed to get value[%ld/%ld] in object using %s, err=%d\n", + n, count, read->name, err); + goto put_object; + } + + if (found != values[n]) { + pr_err("Value[%ld/%ld] mismatch, (overwrite with %s) wrote [%s] %x read [%s] %x (inverse %x), at offset %x\n", + n, count, over->name, + write->name, values[n], + read->name, found, + ~values[n], offsets[n]); + err = -EINVAL; + goto put_object; + } + } + + __i915_gem_object_release_unless_active(obj); + } + } + } + } +unlock: + mutex_unlock(&i915->drm.struct_mutex); + kfree(offsets); + return err; + +put_object: + __i915_gem_object_release_unless_active(obj); + goto unlock; +} + +int i915_gem_coherency_live_selftests(struct drm_i915_private *i915) +{ + static const struct i915_subtest tests[] = { + SUBTEST(igt_gem_coherency), + }; + + return i915_subtests(tests, i915); +} diff --git a/drivers/gpu/drm/i915/selftests/i915_live_selftests.h b/drivers/gpu/drm/i915/selftests/i915_live_selftests.h index d720b03e2c01..8f0c207dbe22 100644 --- a/drivers/gpu/drm/i915/selftests/i915_live_selftests.h +++ b/drivers/gpu/drm/i915/selftests/i915_live_selftests.h @@ -11,3 +11,4 @@ selftest(sanitycheck, i915_live_sanitycheck) /* keep first (igt selfcheck) */ selftest(requests, i915_gem_request_live_selftests) selftest(objects, i915_gem_object_live_selftests) +selftest(coherency, i915_gem_coherency_live_selftests) -- cgit From 26e7a2a17971400fee4fcacc0132439c043fd0e6 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Mon, 13 Feb 2017 17:15:33 +0000 Subject: drm/i915: Move uncore selfchecks to live selftest infrastructure Now that the kselftest infrastructure exists, put it to use and add to it the existing consistency checks on the fw register lookup tables. v2: s/tabke/table/ Signed-off-by: Chris Wilson Cc: Tvrtko Ursulin Reviewed-by: Matthew Auld Link: http://patchwork.freedesktop.org/patch/msgid/20170213171558.20942-22-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/intel_uncore.c | 52 +---------- .../gpu/drm/i915/selftests/i915_live_selftests.h | 1 + drivers/gpu/drm/i915/selftests/intel_uncore.c | 100 +++++++++++++++++++++ 3 files changed, 105 insertions(+), 48 deletions(-) create mode 100644 drivers/gpu/drm/i915/selftests/intel_uncore.c diff --git a/drivers/gpu/drm/i915/intel_uncore.c b/drivers/gpu/drm/i915/intel_uncore.c index 1ff8fd9911d7..441c51fd9746 100644 --- a/drivers/gpu/drm/i915/intel_uncore.c +++ b/drivers/gpu/drm/i915/intel_uncore.c @@ -642,33 +642,6 @@ find_fw_domain(struct drm_i915_private *dev_priv, u32 offset) return entry->domains; } -static void -intel_fw_table_check(struct drm_i915_private *dev_priv) -{ - const struct intel_forcewake_range *ranges; - unsigned int num_ranges; - s32 prev; - unsigned int i; - - if (!IS_ENABLED(CONFIG_DRM_I915_DEBUG)) - return; - - ranges = dev_priv->uncore.fw_domains_table; - if (!ranges) - return; - - num_ranges = dev_priv->uncore.fw_domains_table_entries; - - for (i = 0, prev = -1; i < num_ranges; i++, ranges++) { - WARN_ON_ONCE(IS_GEN9(dev_priv) && - (prev + 1) != (s32)ranges->start); - WARN_ON_ONCE(prev >= (s32)ranges->start); - prev = ranges->start; - WARN_ON_ONCE(prev >= (s32)ranges->end); - prev = ranges->end; - } -} - #define GEN_FW_RANGE(s, e, d) \ { .start = (s), .end = (e), .domains = (d) } @@ -707,23 +680,6 @@ static const i915_reg_t gen8_shadowed_regs[] = { /* TODO: Other registers are not yet used */ }; -static void intel_shadow_table_check(void) -{ - const i915_reg_t *reg = gen8_shadowed_regs; - s32 prev; - u32 offset; - unsigned int i; - - if (!IS_ENABLED(CONFIG_DRM_I915_DEBUG)) - return; - - for (i = 0, prev = -1; i < ARRAY_SIZE(gen8_shadowed_regs); i++, reg++) { - offset = i915_mmio_reg_offset(*reg); - WARN_ON_ONCE(prev >= (s32)offset); - prev = offset; - } -} - static int mmio_reg_cmp(u32 key, const i915_reg_t *reg) { u32 offset = i915_mmio_reg_offset(*reg); @@ -1384,10 +1340,6 @@ void intel_uncore_init(struct drm_i915_private *dev_priv) break; } - intel_fw_table_check(dev_priv); - if (INTEL_GEN(dev_priv) >= 8) - intel_shadow_table_check(); - i915_check_and_clear_faults(dev_priv); } #undef ASSIGN_WRITE_MMIO_VFUNCS @@ -1905,3 +1857,7 @@ intel_uncore_forcewake_for_reg(struct drm_i915_private *dev_priv, return fw_domains; } + +#if IS_ENABLED(CONFIG_DRM_I915_SELFTEST) +#include "selftests/intel_uncore.c" +#endif diff --git a/drivers/gpu/drm/i915/selftests/i915_live_selftests.h b/drivers/gpu/drm/i915/selftests/i915_live_selftests.h index 8f0c207dbe22..be7892d05290 100644 --- a/drivers/gpu/drm/i915/selftests/i915_live_selftests.h +++ b/drivers/gpu/drm/i915/selftests/i915_live_selftests.h @@ -9,6 +9,7 @@ * Tests are executed in order by igt/drv_selftest */ selftest(sanitycheck, i915_live_sanitycheck) /* keep first (igt selfcheck) */ +selftest(uncore, intel_uncore_live_selftests) selftest(requests, i915_gem_request_live_selftests) selftest(objects, i915_gem_object_live_selftests) selftest(coherency, i915_gem_coherency_live_selftests) diff --git a/drivers/gpu/drm/i915/selftests/intel_uncore.c b/drivers/gpu/drm/i915/selftests/intel_uncore.c new file mode 100644 index 000000000000..5f7bd5c9428f --- /dev/null +++ b/drivers/gpu/drm/i915/selftests/intel_uncore.c @@ -0,0 +1,100 @@ +/* + * Copyright © 2016 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + * + */ + +#include "../i915_selftest.h" + +static int intel_fw_table_check(struct drm_i915_private *i915) +{ + const struct intel_forcewake_range *ranges; + unsigned int num_ranges, i; + s32 prev; + + ranges = i915->uncore.fw_domains_table; + if (!ranges) + return 0; + + num_ranges = i915->uncore.fw_domains_table_entries; + for (i = 0, prev = -1; i < num_ranges; i++, ranges++) { + /* Check that the table is watertight */ + if (IS_GEN9(i915) && (prev + 1) != (s32)ranges->start) { + pr_err("%s: entry[%d]:(%x, %x) is not watertight to previous (%x)\n", + __func__, i, ranges->start, ranges->end, prev); + return -EINVAL; + } + + /* Check that the table never goes backwards */ + if (prev >= (s32)ranges->start) { + pr_err("%s: entry[%d]:(%x, %x) is less than the previous (%x)\n", + __func__, i, ranges->start, ranges->end, prev); + return -EINVAL; + } + + /* Check that the entry is valid */ + if (ranges->start >= ranges->end) { + pr_err("%s: entry[%d]:(%x, %x) has negative length\n", + __func__, i, ranges->start, ranges->end); + return -EINVAL; + } + + prev = ranges->end; + } + + return 0; +} + +static int intel_shadow_table_check(void) +{ + const i915_reg_t *reg = gen8_shadowed_regs; + unsigned int i; + s32 prev; + + for (i = 0, prev = -1; i < ARRAY_SIZE(gen8_shadowed_regs); i++, reg++) { + u32 offset = i915_mmio_reg_offset(*reg); + + if (prev >= (s32)offset) { + pr_err("%s: entry[%d]:(%x) is before previous (%x)\n", + __func__, i, offset, prev); + return -EINVAL; + } + + prev = offset; + } + + return 0; +} + +int intel_uncore_live_selftests(struct drm_i915_private *i915) +{ + int err; + + err = intel_fw_table_check(i915); + if (err) + return err; + + err = intel_shadow_table_check(); + if (err) + return err; + + return 0; +} -- cgit From 9852d543a85f3aaeb408259f5e462c4ce7f1dc99 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Mon, 13 Feb 2017 17:15:34 +0000 Subject: drm/i915: Test all fw tables during mock selftests In addition to just testing the fw table we load, during the initial mock testing we can test that all tables are valid (so the testing is not limited to just the platforms that load that particular table). Signed-off-by: Chris Wilson Cc: Tvrtko Ursulin Reviewed-by: Matthew Auld Link: http://patchwork.freedesktop.org/patch/msgid/20170213171558.20942-23-chris@chris-wilson.co.uk --- .../gpu/drm/i915/selftests/i915_mock_selftests.h | 1 + drivers/gpu/drm/i915/selftests/intel_uncore.c | 49 ++++++++++++++++------ 2 files changed, 37 insertions(+), 13 deletions(-) diff --git a/drivers/gpu/drm/i915/selftests/i915_mock_selftests.h b/drivers/gpu/drm/i915/selftests/i915_mock_selftests.h index 2ed94e3a71b7..c61e08de7913 100644 --- a/drivers/gpu/drm/i915/selftests/i915_mock_selftests.h +++ b/drivers/gpu/drm/i915/selftests/i915_mock_selftests.h @@ -10,6 +10,7 @@ */ selftest(sanitycheck, i915_mock_sanitycheck) /* keep first (igt selfcheck) */ selftest(scatterlist, scatterlist_mock_selftests) +selftest(uncore, intel_uncore_mock_selftests) selftest(breadcrumbs, intel_breadcrumbs_mock_selftests) selftest(requests, i915_gem_request_mock_selftests) selftest(objects, i915_gem_object_mock_selftests) diff --git a/drivers/gpu/drm/i915/selftests/intel_uncore.c b/drivers/gpu/drm/i915/selftests/intel_uncore.c index 5f7bd5c9428f..2c2a879857da 100644 --- a/drivers/gpu/drm/i915/selftests/intel_uncore.c +++ b/drivers/gpu/drm/i915/selftests/intel_uncore.c @@ -24,20 +24,16 @@ #include "../i915_selftest.h" -static int intel_fw_table_check(struct drm_i915_private *i915) +static int intel_fw_table_check(const struct intel_forcewake_range *ranges, + unsigned int num_ranges, + bool is_watertight) { - const struct intel_forcewake_range *ranges; - unsigned int num_ranges, i; + unsigned int i; s32 prev; - ranges = i915->uncore.fw_domains_table; - if (!ranges) - return 0; - - num_ranges = i915->uncore.fw_domains_table_entries; for (i = 0, prev = -1; i < num_ranges; i++, ranges++) { /* Check that the table is watertight */ - if (IS_GEN9(i915) && (prev + 1) != (s32)ranges->start) { + if (is_watertight && (prev + 1) != (s32)ranges->start) { pr_err("%s: entry[%d]:(%x, %x) is not watertight to previous (%x)\n", __func__, i, ranges->start, ranges->end, prev); return -EINVAL; @@ -84,15 +80,42 @@ static int intel_shadow_table_check(void) return 0; } -int intel_uncore_live_selftests(struct drm_i915_private *i915) +int intel_uncore_mock_selftests(void) { - int err; + struct { + const struct intel_forcewake_range *ranges; + unsigned int num_ranges; + bool is_watertight; + } fw[] = { + { __vlv_fw_ranges, ARRAY_SIZE(__vlv_fw_ranges), false }, + { __chv_fw_ranges, ARRAY_SIZE(__chv_fw_ranges), false }, + { __gen9_fw_ranges, ARRAY_SIZE(__gen9_fw_ranges), true }, + }; + int err, i; + + for (i = 0; i < ARRAY_SIZE(fw); i++) { + err = intel_fw_table_check(fw[i].ranges, + fw[i].num_ranges, + fw[i].is_watertight); + if (err) + return err; + } - err = intel_fw_table_check(i915); + err = intel_shadow_table_check(); if (err) return err; - err = intel_shadow_table_check(); + return 0; +} + +int intel_uncore_live_selftests(struct drm_i915_private *i915) +{ + int err; + + /* Confirm the table we load is still valid */ + err = intel_fw_table_check(i915->uncore.fw_domains_table, + i915->uncore.fw_domains_table_entries, + INTEL_GEN(i915) >= 9); if (err) return err; -- cgit From a8fb2bad82b64047d9c6090b859467cacc69e9d2 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Mon, 13 Feb 2017 17:15:35 +0000 Subject: drm/i915: Sanity check all registers for matching fw domains Add a late selftest that walks over all forcewake registers (those below 0x40000) and uses the mmio debug register to check to see if any are unclaimed. This is possible if we fail to wake the appropriate powerwells for the register. Signed-off-by: Chris Wilson Reviewed-by: Matthew Auld Link: http://patchwork.freedesktop.org/patch/msgid/20170213171558.20942-24-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/selftests/intel_uncore.c | 59 +++++++++++++++++++++++++++ 1 file changed, 59 insertions(+) diff --git a/drivers/gpu/drm/i915/selftests/intel_uncore.c b/drivers/gpu/drm/i915/selftests/intel_uncore.c index 2c2a879857da..2d0fef2cfca6 100644 --- a/drivers/gpu/drm/i915/selftests/intel_uncore.c +++ b/drivers/gpu/drm/i915/selftests/intel_uncore.c @@ -108,6 +108,61 @@ int intel_uncore_mock_selftests(void) return 0; } +static int intel_uncore_check_forcewake_domains(struct drm_i915_private *dev_priv) +{ +#define FW_RANGE 0x40000 + unsigned long *valid; + u32 offset; + int err; + + if (!HAS_FPGA_DBG_UNCLAIMED(dev_priv) && + !IS_VALLEYVIEW(dev_priv) && + !IS_CHERRYVIEW(dev_priv)) + return 0; + + if (IS_VALLEYVIEW(dev_priv)) /* XXX system lockup! */ + return 0; + + if (IS_BROADWELL(dev_priv)) /* XXX random GPU hang afterwards! */ + return 0; + + valid = kzalloc(BITS_TO_LONGS(FW_RANGE) * sizeof(*valid), + GFP_TEMPORARY); + if (!valid) + return -ENOMEM; + + intel_uncore_forcewake_get(dev_priv, FORCEWAKE_ALL); + + check_for_unclaimed_mmio(dev_priv); + for (offset = 0; offset < FW_RANGE; offset += 4) { + i915_reg_t reg = { offset }; + + (void)I915_READ_FW(reg); + if (!check_for_unclaimed_mmio(dev_priv)) + set_bit(offset, valid); + } + + intel_uncore_forcewake_put(dev_priv, FORCEWAKE_ALL); + + err = 0; + for_each_set_bit(offset, valid, FW_RANGE) { + i915_reg_t reg = { offset }; + + intel_uncore_forcewake_reset(dev_priv, false); + check_for_unclaimed_mmio(dev_priv); + + (void)I915_READ(reg); + if (check_for_unclaimed_mmio(dev_priv)) { + pr_err("Unclaimed mmio read to register 0x%04x\n", + offset); + err = -EINVAL; + } + } + + kfree(valid); + return err; +} + int intel_uncore_live_selftests(struct drm_i915_private *i915) { int err; @@ -119,5 +174,9 @@ int intel_uncore_live_selftests(struct drm_i915_private *i915) if (err) return err; + err = intel_uncore_check_forcewake_domains(i915); + if (err) + return err; + return 0; } -- cgit From 6cca22ede8a448d341b3e4565943f54bd8b8dcd0 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Mon, 13 Feb 2017 17:15:36 +0000 Subject: drm/i915: Add some mock tests for dmabuf interop Check that we can create both dmabuf and objects from dmabuf. v2: Cleanups, correct include, fix unpin on dead path and prevent explosion on dmabuf init failure Signed-off-by: Chris Wilson Reviewed-by: Matthew Auld Link: http://patchwork.freedesktop.org/patch/msgid/20170213171558.20942-25-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/i915_gem_dmabuf.c | 5 + drivers/gpu/drm/i915/selftests/i915_gem_dmabuf.c | 294 +++++++++++++++++++++ .../gpu/drm/i915/selftests/i915_mock_selftests.h | 1 + drivers/gpu/drm/i915/selftests/mock_dmabuf.c | 176 ++++++++++++ drivers/gpu/drm/i915/selftests/mock_dmabuf.h | 41 +++ 5 files changed, 517 insertions(+) create mode 100644 drivers/gpu/drm/i915/selftests/i915_gem_dmabuf.c create mode 100644 drivers/gpu/drm/i915/selftests/mock_dmabuf.c create mode 100644 drivers/gpu/drm/i915/selftests/mock_dmabuf.h diff --git a/drivers/gpu/drm/i915/i915_gem_dmabuf.c b/drivers/gpu/drm/i915/i915_gem_dmabuf.c index d037adcda6f2..3e276eee0450 100644 --- a/drivers/gpu/drm/i915/i915_gem_dmabuf.c +++ b/drivers/gpu/drm/i915/i915_gem_dmabuf.c @@ -307,3 +307,8 @@ fail_detach: return ERR_PTR(ret); } + +#if IS_ENABLED(CONFIG_DRM_I915_SELFTEST) +#include "selftests/mock_dmabuf.c" +#include "selftests/i915_gem_dmabuf.c" +#endif diff --git a/drivers/gpu/drm/i915/selftests/i915_gem_dmabuf.c b/drivers/gpu/drm/i915/selftests/i915_gem_dmabuf.c new file mode 100644 index 000000000000..a2393fcf9fa8 --- /dev/null +++ b/drivers/gpu/drm/i915/selftests/i915_gem_dmabuf.c @@ -0,0 +1,294 @@ +/* + * Copyright © 2016 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + * + */ + +#include "../i915_selftest.h" + +#include "mock_gem_device.h" +#include "mock_dmabuf.h" + +static int igt_dmabuf_export(void *arg) +{ + struct drm_i915_private *i915 = arg; + struct drm_i915_gem_object *obj; + struct dma_buf *dmabuf; + + obj = i915_gem_object_create(i915, PAGE_SIZE); + if (IS_ERR(obj)) + return PTR_ERR(obj); + + dmabuf = i915_gem_prime_export(&i915->drm, &obj->base, 0); + i915_gem_object_put(obj); + if (IS_ERR(dmabuf)) { + pr_err("i915_gem_prime_export failed with err=%d\n", + (int)PTR_ERR(dmabuf)); + return PTR_ERR(dmabuf); + } + + dma_buf_put(dmabuf); + return 0; +} + +static int igt_dmabuf_import_self(void *arg) +{ + struct drm_i915_private *i915 = arg; + struct drm_i915_gem_object *obj; + struct drm_gem_object *import; + struct dma_buf *dmabuf; + int err; + + obj = i915_gem_object_create(i915, PAGE_SIZE); + if (IS_ERR(obj)) + return PTR_ERR(obj); + + dmabuf = i915_gem_prime_export(&i915->drm, &obj->base, 0); + if (IS_ERR(dmabuf)) { + pr_err("i915_gem_prime_export failed with err=%d\n", + (int)PTR_ERR(dmabuf)); + err = PTR_ERR(dmabuf); + goto out; + } + + import = i915_gem_prime_import(&i915->drm, dmabuf); + if (IS_ERR(import)) { + pr_err("i915_gem_prime_import failed with err=%d\n", + (int)PTR_ERR(import)); + err = PTR_ERR(import); + goto out_dmabuf; + } + + if (import != &obj->base) { + pr_err("i915_gem_prime_import created a new object!\n"); + err = -EINVAL; + goto out_import; + } + + err = 0; +out_import: + i915_gem_object_put(to_intel_bo(import)); +out_dmabuf: + dma_buf_put(dmabuf); +out: + i915_gem_object_put(obj); + return err; +} + +static int igt_dmabuf_import(void *arg) +{ + struct drm_i915_private *i915 = arg; + struct drm_i915_gem_object *obj; + struct dma_buf *dmabuf; + void *obj_map, *dma_map; + u32 pattern[] = { 0, 0xaa, 0xcc, 0x55, 0xff }; + int err, i; + + dmabuf = mock_dmabuf(1); + if (IS_ERR(dmabuf)) + return PTR_ERR(dmabuf); + + obj = to_intel_bo(i915_gem_prime_import(&i915->drm, dmabuf)); + if (IS_ERR(obj)) { + pr_err("i915_gem_prime_import failed with err=%d\n", + (int)PTR_ERR(obj)); + err = PTR_ERR(obj); + goto out_dmabuf; + } + + if (obj->base.dev != &i915->drm) { + pr_err("i915_gem_prime_import created a non-i915 object!\n"); + err = -EINVAL; + goto out_obj; + } + + if (obj->base.size != PAGE_SIZE) { + pr_err("i915_gem_prime_import is wrong size found %lld, expected %ld\n", + (long long)obj->base.size, PAGE_SIZE); + err = -EINVAL; + goto out_obj; + } + + dma_map = dma_buf_vmap(dmabuf); + if (!dma_map) { + pr_err("dma_buf_vmap failed\n"); + err = -ENOMEM; + goto out_obj; + } + + if (0) { /* Can not yet map dmabuf */ + obj_map = i915_gem_object_pin_map(obj, I915_MAP_WB); + if (IS_ERR(obj_map)) { + err = PTR_ERR(obj_map); + pr_err("i915_gem_object_pin_map failed with err=%d\n", err); + goto out_dma_map; + } + + for (i = 0; i < ARRAY_SIZE(pattern); i++) { + memset(dma_map, pattern[i], PAGE_SIZE); + if (memchr_inv(obj_map, pattern[i], PAGE_SIZE)) { + err = -EINVAL; + pr_err("imported vmap not all set to %x!\n", pattern[i]); + i915_gem_object_unpin_map(obj); + goto out_dma_map; + } + } + + for (i = 0; i < ARRAY_SIZE(pattern); i++) { + memset(obj_map, pattern[i], PAGE_SIZE); + if (memchr_inv(dma_map, pattern[i], PAGE_SIZE)) { + err = -EINVAL; + pr_err("exported vmap not all set to %x!\n", pattern[i]); + i915_gem_object_unpin_map(obj); + goto out_dma_map; + } + } + + i915_gem_object_unpin_map(obj); + } + + err = 0; +out_dma_map: + dma_buf_vunmap(dmabuf, dma_map); +out_obj: + i915_gem_object_put(obj); +out_dmabuf: + dma_buf_put(dmabuf); + return err; +} + +static int igt_dmabuf_import_ownership(void *arg) +{ + struct drm_i915_private *i915 = arg; + struct drm_i915_gem_object *obj; + struct dma_buf *dmabuf; + void *ptr; + int err; + + dmabuf = mock_dmabuf(1); + if (IS_ERR(dmabuf)) + return PTR_ERR(dmabuf); + + ptr = dma_buf_vmap(dmabuf); + if (!ptr) { + pr_err("dma_buf_vmap failed\n"); + err = -ENOMEM; + goto err_dmabuf; + } + + memset(ptr, 0xc5, PAGE_SIZE); + dma_buf_vunmap(dmabuf, ptr); + + obj = to_intel_bo(i915_gem_prime_import(&i915->drm, dmabuf)); + if (IS_ERR(obj)) { + pr_err("i915_gem_prime_import failed with err=%d\n", + (int)PTR_ERR(obj)); + err = PTR_ERR(obj); + goto err_dmabuf; + } + + dma_buf_put(dmabuf); + + err = i915_gem_object_pin_pages(obj); + if (err) { + pr_err("i915_gem_object_pin_pages failed with err=%d\n", err); + goto out_obj; + } + + err = 0; + i915_gem_object_unpin_pages(obj); +out_obj: + i915_gem_object_put(obj); + return err; + +err_dmabuf: + dma_buf_put(dmabuf); + return err; +} + +static int igt_dmabuf_export_vmap(void *arg) +{ + struct drm_i915_private *i915 = arg; + struct drm_i915_gem_object *obj; + struct dma_buf *dmabuf; + void *ptr; + int err; + + obj = i915_gem_object_create(i915, PAGE_SIZE); + if (IS_ERR(obj)) + return PTR_ERR(obj); + + dmabuf = i915_gem_prime_export(&i915->drm, &obj->base, 0); + if (IS_ERR(dmabuf)) { + pr_err("i915_gem_prime_export failed with err=%d\n", + (int)PTR_ERR(dmabuf)); + err = PTR_ERR(dmabuf); + goto err_obj; + } + i915_gem_object_put(obj); + + ptr = dma_buf_vmap(dmabuf); + if (IS_ERR(ptr)) { + err = PTR_ERR(ptr); + pr_err("dma_buf_vmap failed with err=%d\n", err); + goto out; + } + + if (memchr_inv(ptr, 0, dmabuf->size)) { + pr_err("Exported object not initialiased to zero!\n"); + err = -EINVAL; + goto out; + } + + memset(ptr, 0xc5, dmabuf->size); + + err = 0; + dma_buf_vunmap(dmabuf, ptr); +out: + dma_buf_put(dmabuf); + return err; + +err_obj: + i915_gem_object_put(obj); + return err; +} + +int i915_gem_dmabuf_mock_selftests(void) +{ + static const struct i915_subtest tests[] = { + SUBTEST(igt_dmabuf_export), + SUBTEST(igt_dmabuf_import_self), + SUBTEST(igt_dmabuf_import), + SUBTEST(igt_dmabuf_import_ownership), + SUBTEST(igt_dmabuf_export_vmap), + }; + struct drm_i915_private *i915; + int err; + + i915 = mock_gem_device(); + if (!i915) + return -ENOMEM; + + err = i915_subtests(tests, i915); + + drm_dev_unref(&i915->drm); + return err; +} diff --git a/drivers/gpu/drm/i915/selftests/i915_mock_selftests.h b/drivers/gpu/drm/i915/selftests/i915_mock_selftests.h index c61e08de7913..955a4d6ccdaf 100644 --- a/drivers/gpu/drm/i915/selftests/i915_mock_selftests.h +++ b/drivers/gpu/drm/i915/selftests/i915_mock_selftests.h @@ -14,3 +14,4 @@ selftest(uncore, intel_uncore_mock_selftests) selftest(breadcrumbs, intel_breadcrumbs_mock_selftests) selftest(requests, i915_gem_request_mock_selftests) selftest(objects, i915_gem_object_mock_selftests) +selftest(dmabuf, i915_gem_dmabuf_mock_selftests) diff --git a/drivers/gpu/drm/i915/selftests/mock_dmabuf.c b/drivers/gpu/drm/i915/selftests/mock_dmabuf.c new file mode 100644 index 000000000000..99da8f4ef497 --- /dev/null +++ b/drivers/gpu/drm/i915/selftests/mock_dmabuf.c @@ -0,0 +1,176 @@ +/* + * Copyright © 2016 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + * + */ + +#include "mock_dmabuf.h" + +static struct sg_table *mock_map_dma_buf(struct dma_buf_attachment *attachment, + enum dma_data_direction dir) +{ + struct mock_dmabuf *mock = to_mock(attachment->dmabuf); + struct sg_table *st; + struct scatterlist *sg; + int i, err; + + st = kmalloc(sizeof(*st), GFP_KERNEL); + if (!st) + return ERR_PTR(-ENOMEM); + + err = sg_alloc_table(st, mock->npages, GFP_KERNEL); + if (err) + goto err_free; + + sg = st->sgl; + for (i = 0; i < mock->npages; i++) { + sg_set_page(sg, mock->pages[i], PAGE_SIZE, 0); + sg = sg_next(sg); + } + + if (!dma_map_sg(attachment->dev, st->sgl, st->nents, dir)) { + err = -ENOMEM; + goto err_st; + } + + return st; + +err_st: + sg_free_table(st); +err_free: + kfree(st); + return ERR_PTR(err); +} + +static void mock_unmap_dma_buf(struct dma_buf_attachment *attachment, + struct sg_table *st, + enum dma_data_direction dir) +{ + dma_unmap_sg(attachment->dev, st->sgl, st->nents, dir); + sg_free_table(st); + kfree(st); +} + +static void mock_dmabuf_release(struct dma_buf *dma_buf) +{ + struct mock_dmabuf *mock = to_mock(dma_buf); + int i; + + for (i = 0; i < mock->npages; i++) + put_page(mock->pages[i]); + + kfree(mock); +} + +static void *mock_dmabuf_vmap(struct dma_buf *dma_buf) +{ + struct mock_dmabuf *mock = to_mock(dma_buf); + + return vm_map_ram(mock->pages, mock->npages, 0, PAGE_KERNEL); +} + +static void mock_dmabuf_vunmap(struct dma_buf *dma_buf, void *vaddr) +{ + struct mock_dmabuf *mock = to_mock(dma_buf); + + vm_unmap_ram(vaddr, mock->npages); +} + +static void *mock_dmabuf_kmap_atomic(struct dma_buf *dma_buf, unsigned long page_num) +{ + struct mock_dmabuf *mock = to_mock(dma_buf); + + return kmap_atomic(mock->pages[page_num]); +} + +static void mock_dmabuf_kunmap_atomic(struct dma_buf *dma_buf, unsigned long page_num, void *addr) +{ + kunmap_atomic(addr); +} + +static void *mock_dmabuf_kmap(struct dma_buf *dma_buf, unsigned long page_num) +{ + struct mock_dmabuf *mock = to_mock(dma_buf); + + return kmap(mock->pages[page_num]); +} + +static void mock_dmabuf_kunmap(struct dma_buf *dma_buf, unsigned long page_num, void *addr) +{ + struct mock_dmabuf *mock = to_mock(dma_buf); + + return kunmap(mock->pages[page_num]); +} + +static int mock_dmabuf_mmap(struct dma_buf *dma_buf, struct vm_area_struct *vma) +{ + return -ENODEV; +} + +static const struct dma_buf_ops mock_dmabuf_ops = { + .map_dma_buf = mock_map_dma_buf, + .unmap_dma_buf = mock_unmap_dma_buf, + .release = mock_dmabuf_release, + .kmap = mock_dmabuf_kmap, + .kmap_atomic = mock_dmabuf_kmap_atomic, + .kunmap = mock_dmabuf_kunmap, + .kunmap_atomic = mock_dmabuf_kunmap_atomic, + .mmap = mock_dmabuf_mmap, + .vmap = mock_dmabuf_vmap, + .vunmap = mock_dmabuf_vunmap, +}; + +static struct dma_buf *mock_dmabuf(int npages) +{ + struct mock_dmabuf *mock; + DEFINE_DMA_BUF_EXPORT_INFO(exp_info); + struct dma_buf *dmabuf; + int i; + + mock = kmalloc(sizeof(*mock) + npages * sizeof(struct page *), + GFP_KERNEL); + if (!mock) + return ERR_PTR(-ENOMEM); + + mock->npages = npages; + for (i = 0; i < npages; i++) { + mock->pages[i] = alloc_page(GFP_KERNEL); + if (!mock->pages[i]) + goto err; + } + + exp_info.ops = &mock_dmabuf_ops; + exp_info.size = npages * PAGE_SIZE; + exp_info.flags = O_CLOEXEC; + exp_info.priv = mock; + + dmabuf = dma_buf_export(&exp_info); + if (IS_ERR(dmabuf)) + goto err; + + return dmabuf; + +err: + while (i--) + put_page(mock->pages[i]); + kfree(mock); + return ERR_PTR(-ENOMEM); +} diff --git a/drivers/gpu/drm/i915/selftests/mock_dmabuf.h b/drivers/gpu/drm/i915/selftests/mock_dmabuf.h new file mode 100644 index 000000000000..ec80613159b9 --- /dev/null +++ b/drivers/gpu/drm/i915/selftests/mock_dmabuf.h @@ -0,0 +1,41 @@ + +/* + * Copyright © 2016 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + * + */ + +#ifndef __MOCK_DMABUF_H__ +#define __MOCK_DMABUF_H__ + +#include + +struct mock_dmabuf { + int npages; + struct page *pages[]; +}; + +static struct mock_dmabuf *to_mock(struct dma_buf *buf) +{ + return buf->priv; +} + +#endif /* !__MOCK_DMABUF_H__ */ -- cgit From ced01afdf672d6ceab71f14e0aefb79b914ddd34 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Mon, 13 Feb 2017 17:15:37 +0000 Subject: drm/i915: Add a live dmabuf selftest Though we have good coverage of our dmabuf interface through the mock tests, we also want to check the heavy module unload paths of the live i915 driver. Signed-off-by: Chris Wilson Reviewed-by: Joonas Lahtinen Link: http://patchwork.freedesktop.org/patch/msgid/20170213171558.20942-26-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/selftests/i915_gem_dmabuf.c | 9 +++++++++ drivers/gpu/drm/i915/selftests/i915_live_selftests.h | 1 + 2 files changed, 10 insertions(+) diff --git a/drivers/gpu/drm/i915/selftests/i915_gem_dmabuf.c b/drivers/gpu/drm/i915/selftests/i915_gem_dmabuf.c index a2393fcf9fa8..817bef74bbcb 100644 --- a/drivers/gpu/drm/i915/selftests/i915_gem_dmabuf.c +++ b/drivers/gpu/drm/i915/selftests/i915_gem_dmabuf.c @@ -292,3 +292,12 @@ int i915_gem_dmabuf_mock_selftests(void) drm_dev_unref(&i915->drm); return err; } + +int i915_gem_dmabuf_live_selftests(struct drm_i915_private *i915) +{ + static const struct i915_subtest tests[] = { + SUBTEST(igt_dmabuf_export), + }; + + return i915_subtests(tests, i915); +} diff --git a/drivers/gpu/drm/i915/selftests/i915_live_selftests.h b/drivers/gpu/drm/i915/selftests/i915_live_selftests.h index be7892d05290..fc605695d118 100644 --- a/drivers/gpu/drm/i915/selftests/i915_live_selftests.h +++ b/drivers/gpu/drm/i915/selftests/i915_live_selftests.h @@ -12,4 +12,5 @@ selftest(sanitycheck, i915_live_sanitycheck) /* keep first (igt selfcheck) */ selftest(uncore, intel_uncore_live_selftests) selftest(requests, i915_gem_request_live_selftests) selftest(objects, i915_gem_object_live_selftests) +selftest(dmabuf, i915_gem_dmabuf_live_selftests) selftest(coherency, i915_gem_coherency_live_selftests) -- cgit From 1c42819a14a0868edd5d4b2db4594c1f7fc324d4 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Mon, 13 Feb 2017 17:15:38 +0000 Subject: drm/i915: Add initial selftests for i915_gem_gtt Simple starting point for adding selftests for i915_gem_gtt, first try creating a ppGTT and filling it. Signed-off-by: Chris Wilson Reviewed-by: Matthew Auld Link: http://patchwork.freedesktop.org/patch/msgid/20170213171558.20942-27-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/i915_gem_gtt.c | 1 + drivers/gpu/drm/i915/selftests/i915_gem_gtt.c | 99 ++++++++++++++++++++++ .../gpu/drm/i915/selftests/i915_live_selftests.h | 1 + 3 files changed, 101 insertions(+) create mode 100644 drivers/gpu/drm/i915/selftests/i915_gem_gtt.c diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c b/drivers/gpu/drm/i915/i915_gem_gtt.c index 520bcf63d271..9daea3cd9cdc 100644 --- a/drivers/gpu/drm/i915/i915_gem_gtt.c +++ b/drivers/gpu/drm/i915/i915_gem_gtt.c @@ -3760,4 +3760,5 @@ int i915_gem_gtt_insert(struct i915_address_space *vm, #if IS_ENABLED(CONFIG_DRM_I915_SELFTEST) #include "selftests/mock_gtt.c" +#include "selftests/i915_gem_gtt.c" #endif diff --git a/drivers/gpu/drm/i915/selftests/i915_gem_gtt.c b/drivers/gpu/drm/i915/selftests/i915_gem_gtt.c new file mode 100644 index 000000000000..f3359be62515 --- /dev/null +++ b/drivers/gpu/drm/i915/selftests/i915_gem_gtt.c @@ -0,0 +1,99 @@ +/* + * Copyright © 2016 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + * + */ + +#include "../i915_selftest.h" + +static int igt_ppgtt_alloc(void *arg) +{ + struct drm_i915_private *dev_priv = arg; + struct i915_hw_ppgtt *ppgtt; + u64 size, last; + int err; + + /* Allocate a ppggt and try to fill the entire range */ + + if (!USES_PPGTT(dev_priv)) + return 0; + + ppgtt = kzalloc(sizeof(*ppgtt), GFP_KERNEL); + if (!ppgtt) + return -ENOMEM; + + mutex_lock(&dev_priv->drm.struct_mutex); + err = __hw_ppgtt_init(ppgtt, dev_priv); + if (err) + goto err_ppgtt; + + if (!ppgtt->base.allocate_va_range) + goto err_ppgtt_cleanup; + + /* Check we can allocate the entire range */ + for (size = 4096; + size <= ppgtt->base.total; + size <<= 2) { + err = ppgtt->base.allocate_va_range(&ppgtt->base, 0, size); + if (err) { + if (err == -ENOMEM) { + pr_info("[1] Ran out of memory for va_range [0 + %llx] [bit %d]\n", + size, ilog2(size)); + err = 0; /* virtual space too large! */ + } + goto err_ppgtt_cleanup; + } + + ppgtt->base.clear_range(&ppgtt->base, 0, size); + } + + /* Check we can incrementally allocate the entire range */ + for (last = 0, size = 4096; + size <= ppgtt->base.total; + last = size, size <<= 2) { + err = ppgtt->base.allocate_va_range(&ppgtt->base, + last, size - last); + if (err) { + if (err == -ENOMEM) { + pr_info("[2] Ran out of memory for va_range [%llx + %llx] [bit %d]\n", + last, size - last, ilog2(size)); + err = 0; /* virtual space too large! */ + } + goto err_ppgtt_cleanup; + } + } + +err_ppgtt_cleanup: + ppgtt->base.cleanup(&ppgtt->base); +err_ppgtt: + mutex_unlock(&dev_priv->drm.struct_mutex); + kfree(ppgtt); + return err; +} + +int i915_gem_gtt_live_selftests(struct drm_i915_private *i915) +{ + static const struct i915_subtest tests[] = { + SUBTEST(igt_ppgtt_alloc), + }; + + return i915_subtests(tests, i915); +} diff --git a/drivers/gpu/drm/i915/selftests/i915_live_selftests.h b/drivers/gpu/drm/i915/selftests/i915_live_selftests.h index fc605695d118..d6c869c5b54f 100644 --- a/drivers/gpu/drm/i915/selftests/i915_live_selftests.h +++ b/drivers/gpu/drm/i915/selftests/i915_live_selftests.h @@ -14,3 +14,4 @@ selftest(requests, i915_gem_request_live_selftests) selftest(objects, i915_gem_object_live_selftests) selftest(dmabuf, i915_gem_dmabuf_live_selftests) selftest(coherency, i915_gem_coherency_live_selftests) +selftest(gtt, i915_gem_gtt_live_selftests) -- cgit From 8d28ba4568f4973b7fc5b12100a720979c16b4f0 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Mon, 13 Feb 2017 17:15:39 +0000 Subject: drm/i915: Exercise filling the top/bottom portions of the ppgtt Allocate objects with varying number of pages (which should hopefully consist of a mixture of contiguous page chunks and so coalesced sg lists) and check that the sg walkers in insert_pages cope. v2: Check both small <-> large Signed-off-by: Chris Wilson Reviewed-by: Joonas Lahtinen Link: http://patchwork.freedesktop.org/patch/msgid/20170213171558.20942-28-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/i915_gem_object.h | 3 + drivers/gpu/drm/i915/selftests/i915_gem_gtt.c | 362 ++++++++++++++++++++++++++ 2 files changed, 365 insertions(+) diff --git a/drivers/gpu/drm/i915/i915_gem_object.h b/drivers/gpu/drm/i915/i915_gem_object.h index 4114cc8a0b9b..02365a5f7c80 100644 --- a/drivers/gpu/drm/i915/i915_gem_object.h +++ b/drivers/gpu/drm/i915/i915_gem_object.h @@ -33,6 +33,8 @@ #include +#include "i915_selftest.h" + struct drm_i915_gem_object_ops { unsigned int flags; #define I915_GEM_OBJECT_HAS_STRUCT_PAGE 0x1 @@ -84,6 +86,7 @@ struct drm_i915_gem_object { struct list_head obj_exec_link; struct list_head batch_pool_link; + I915_SELFTEST_DECLARE(struct list_head st_link); unsigned long flags; diff --git a/drivers/gpu/drm/i915/selftests/i915_gem_gtt.c b/drivers/gpu/drm/i915/selftests/i915_gem_gtt.c index f3359be62515..f97580cbd7f7 100644 --- a/drivers/gpu/drm/i915/selftests/i915_gem_gtt.c +++ b/drivers/gpu/drm/i915/selftests/i915_gem_gtt.c @@ -22,8 +22,98 @@ * */ +#include + #include "../i915_selftest.h" +#include "mock_drm.h" + +static void fake_free_pages(struct drm_i915_gem_object *obj, + struct sg_table *pages) +{ + sg_free_table(pages); + kfree(pages); +} + +static struct sg_table * +fake_get_pages(struct drm_i915_gem_object *obj) +{ +#define GFP (GFP_KERNEL | __GFP_NOWARN | __GFP_NORETRY) +#define PFN_BIAS 0x1000 + struct sg_table *pages; + struct scatterlist *sg; + typeof(obj->base.size) rem; + + pages = kmalloc(sizeof(*pages), GFP); + if (!pages) + return ERR_PTR(-ENOMEM); + + rem = round_up(obj->base.size, BIT(31)) >> 31; + if (sg_alloc_table(pages, rem, GFP)) { + kfree(pages); + return ERR_PTR(-ENOMEM); + } + + rem = obj->base.size; + for (sg = pages->sgl; sg; sg = sg_next(sg)) { + unsigned long len = min_t(typeof(rem), rem, BIT(31)); + + sg_set_page(sg, pfn_to_page(PFN_BIAS), len, 0); + sg_dma_address(sg) = page_to_phys(sg_page(sg)); + sg_dma_len(sg) = len; + + rem -= len; + } + + obj->mm.madv = I915_MADV_DONTNEED; + return pages; +#undef GFP +} + +static void fake_put_pages(struct drm_i915_gem_object *obj, + struct sg_table *pages) +{ + fake_free_pages(obj, pages); + obj->mm.dirty = false; + obj->mm.madv = I915_MADV_WILLNEED; +} + +static const struct drm_i915_gem_object_ops fake_ops = { + .flags = I915_GEM_OBJECT_IS_SHRINKABLE, + .get_pages = fake_get_pages, + .put_pages = fake_put_pages, +}; + +static struct drm_i915_gem_object * +fake_dma_object(struct drm_i915_private *i915, u64 size) +{ + struct drm_i915_gem_object *obj; + + GEM_BUG_ON(!size); + GEM_BUG_ON(!IS_ALIGNED(size, I915_GTT_PAGE_SIZE)); + + if (overflows_type(size, obj->base.size)) + return ERR_PTR(-E2BIG); + + obj = i915_gem_object_alloc(i915); + if (!obj) + return ERR_PTR(-ENOMEM); + + drm_gem_private_object_init(&i915->drm, &obj->base, size); + i915_gem_object_init(obj, &fake_ops); + + obj->base.write_domain = I915_GEM_DOMAIN_CPU; + obj->base.read_domains = I915_GEM_DOMAIN_CPU; + obj->cache_level = I915_CACHE_NONE; + + /* Preallocate the "backing storage" */ + if (i915_gem_object_pin_pages(obj)) + return ERR_PTR(-ENOMEM); + + i915_gem_object_unpin_pages(obj); + return obj; +} + static int igt_ppgtt_alloc(void *arg) { struct drm_i915_private *dev_priv = arg; @@ -89,10 +179,282 @@ err_ppgtt: return err; } +static void close_object_list(struct list_head *objects, + struct i915_address_space *vm) +{ + struct drm_i915_gem_object *obj, *on; + + list_for_each_entry_safe(obj, on, objects, st_link) { + struct i915_vma *vma; + + vma = i915_vma_instance(obj, vm, NULL); + if (!IS_ERR(vma)) + i915_vma_close(vma); + + list_del(&obj->st_link); + i915_gem_object_put(obj); + } +} + +static int fill_hole(struct drm_i915_private *i915, + struct i915_address_space *vm, + u64 hole_start, u64 hole_end, + unsigned long end_time) +{ + const u64 hole_size = hole_end - hole_start; + struct drm_i915_gem_object *obj; + const unsigned long max_pages = + min_t(u64, ULONG_MAX - 1, hole_size/2 >> PAGE_SHIFT); + const unsigned long max_step = max(int_sqrt(max_pages), 2UL); + unsigned long npages, prime, flags; + struct i915_vma *vma; + LIST_HEAD(objects); + int err; + + /* Try binding many VMA working inwards from either edge */ + + flags = PIN_OFFSET_FIXED | PIN_USER; + if (i915_is_ggtt(vm)) + flags |= PIN_GLOBAL; + + for_each_prime_number_from(prime, 2, max_step) { + for (npages = 1; npages <= max_pages; npages *= prime) { + const u64 full_size = npages << PAGE_SHIFT; + const struct { + const char *name; + u64 offset; + int step; + } phases[] = { + { "top-down", hole_end, -1, }, + { "bottom-up", hole_start, 1, }, + { } + }, *p; + + obj = fake_dma_object(i915, full_size); + if (IS_ERR(obj)) + break; + + list_add(&obj->st_link, &objects); + + /* Align differing sized objects against the edges, and + * check we don't walk off into the void when binding + * them into the GTT. + */ + for (p = phases; p->name; p++) { + u64 offset; + + offset = p->offset; + list_for_each_entry(obj, &objects, st_link) { + vma = i915_vma_instance(obj, vm, NULL); + if (IS_ERR(vma)) + continue; + + if (p->step < 0) { + if (offset < hole_start + obj->base.size) + break; + offset -= obj->base.size; + } + + err = i915_vma_pin(vma, 0, 0, offset | flags); + if (err) { + pr_err("%s(%s) pin (forward) failed with err=%d on size=%lu pages (prime=%lu), offset=%llx\n", + __func__, p->name, err, npages, prime, offset); + goto err; + } + + if (!drm_mm_node_allocated(&vma->node) || + i915_vma_misplaced(vma, 0, 0, offset | flags)) { + pr_err("%s(%s) (forward) insert failed: vma.node=%llx + %llx [allocated? %d], expected offset %llx\n", + __func__, p->name, vma->node.start, vma->node.size, drm_mm_node_allocated(&vma->node), + offset); + err = -EINVAL; + goto err; + } + + i915_vma_unpin(vma); + + if (p->step > 0) { + if (offset + obj->base.size > hole_end) + break; + offset += obj->base.size; + } + } + + offset = p->offset; + list_for_each_entry(obj, &objects, st_link) { + vma = i915_vma_instance(obj, vm, NULL); + if (IS_ERR(vma)) + continue; + + if (p->step < 0) { + if (offset < hole_start + obj->base.size) + break; + offset -= obj->base.size; + } + + if (!drm_mm_node_allocated(&vma->node) || + i915_vma_misplaced(vma, 0, 0, offset | flags)) { + pr_err("%s(%s) (forward) moved vma.node=%llx + %llx, expected offset %llx\n", + __func__, p->name, vma->node.start, vma->node.size, + offset); + err = -EINVAL; + goto err; + } + + err = i915_vma_unbind(vma); + if (err) { + pr_err("%s(%s) (forward) unbind of vma.node=%llx + %llx failed with err=%d\n", + __func__, p->name, vma->node.start, vma->node.size, + err); + goto err; + } + + if (p->step > 0) { + if (offset + obj->base.size > hole_end) + break; + offset += obj->base.size; + } + } + + offset = p->offset; + list_for_each_entry_reverse(obj, &objects, st_link) { + vma = i915_vma_instance(obj, vm, NULL); + if (IS_ERR(vma)) + continue; + + if (p->step < 0) { + if (offset < hole_start + obj->base.size) + break; + offset -= obj->base.size; + } + + err = i915_vma_pin(vma, 0, 0, offset | flags); + if (err) { + pr_err("%s(%s) pin (backward) failed with err=%d on size=%lu pages (prime=%lu), offset=%llx\n", + __func__, p->name, err, npages, prime, offset); + goto err; + } + + if (!drm_mm_node_allocated(&vma->node) || + i915_vma_misplaced(vma, 0, 0, offset | flags)) { + pr_err("%s(%s) (backward) insert failed: vma.node=%llx + %llx [allocated? %d], expected offset %llx\n", + __func__, p->name, vma->node.start, vma->node.size, drm_mm_node_allocated(&vma->node), + offset); + err = -EINVAL; + goto err; + } + + i915_vma_unpin(vma); + + if (p->step > 0) { + if (offset + obj->base.size > hole_end) + break; + offset += obj->base.size; + } + } + + offset = p->offset; + list_for_each_entry_reverse(obj, &objects, st_link) { + vma = i915_vma_instance(obj, vm, NULL); + if (IS_ERR(vma)) + continue; + + if (p->step < 0) { + if (offset < hole_start + obj->base.size) + break; + offset -= obj->base.size; + } + + if (!drm_mm_node_allocated(&vma->node) || + i915_vma_misplaced(vma, 0, 0, offset | flags)) { + pr_err("%s(%s) (backward) moved vma.node=%llx + %llx [allocated? %d], expected offset %llx\n", + __func__, p->name, vma->node.start, vma->node.size, drm_mm_node_allocated(&vma->node), + offset); + err = -EINVAL; + goto err; + } + + err = i915_vma_unbind(vma); + if (err) { + pr_err("%s(%s) (backward) unbind of vma.node=%llx + %llx failed with err=%d\n", + __func__, p->name, vma->node.start, vma->node.size, + err); + goto err; + } + + if (p->step > 0) { + if (offset + obj->base.size > hole_end) + break; + offset += obj->base.size; + } + } + } + + if (igt_timeout(end_time, "%s timed out (npages=%lu, prime=%lu)\n", + __func__, npages, prime)) { + err = -EINTR; + goto err; + } + } + + close_object_list(&objects, vm); + } + + return 0; + +err: + close_object_list(&objects, vm); + return err; +} + +static int exercise_ppgtt(struct drm_i915_private *dev_priv, + int (*func)(struct drm_i915_private *i915, + struct i915_address_space *vm, + u64 hole_start, u64 hole_end, + unsigned long end_time)) +{ + struct drm_file *file; + struct i915_hw_ppgtt *ppgtt; + IGT_TIMEOUT(end_time); + int err; + + if (!USES_FULL_PPGTT(dev_priv)) + return 0; + + file = mock_file(dev_priv); + if (IS_ERR(file)) + return PTR_ERR(file); + + mutex_lock(&dev_priv->drm.struct_mutex); + ppgtt = i915_ppgtt_create(dev_priv, file->driver_priv, "mock"); + if (IS_ERR(ppgtt)) { + err = PTR_ERR(ppgtt); + goto out_unlock; + } + GEM_BUG_ON(offset_in_page(ppgtt->base.total)); + GEM_BUG_ON(ppgtt->base.closed); + + err = func(dev_priv, &ppgtt->base, 0, ppgtt->base.total, end_time); + + i915_ppgtt_close(&ppgtt->base); + i915_ppgtt_put(ppgtt); +out_unlock: + mutex_unlock(&dev_priv->drm.struct_mutex); + + mock_file_free(dev_priv, file); + return err; +} + +static int igt_ppgtt_fill(void *arg) +{ + return exercise_ppgtt(arg, fill_hole); +} + int i915_gem_gtt_live_selftests(struct drm_i915_private *i915) { static const struct i915_subtest tests[] = { SUBTEST(igt_ppgtt_alloc), + SUBTEST(igt_ppgtt_fill), }; return i915_subtests(tests, i915); -- cgit From 62c981cfe78513aace9a156b0abf0246ba7c79f2 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Mon, 13 Feb 2017 17:15:40 +0000 Subject: drm/i915: Exercise filling the top/bottom portions of the global GTT Same test as previously for the per-process GTT instead applied to the global GTT. Signed-off-by: Chris Wilson Reviewed-by: Joonas Lahtinen Link: http://patchwork.freedesktop.org/patch/msgid/20170213171558.20942-29-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/selftests/i915_gem_gtt.c | 61 ++++++++++++++++++++++++++- 1 file changed, 60 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/i915/selftests/i915_gem_gtt.c b/drivers/gpu/drm/i915/selftests/i915_gem_gtt.c index f97580cbd7f7..86b2e09d9f85 100644 --- a/drivers/gpu/drm/i915/selftests/i915_gem_gtt.c +++ b/drivers/gpu/drm/i915/selftests/i915_gem_gtt.c @@ -22,6 +22,7 @@ * */ +#include #include #include "../i915_selftest.h" @@ -188,7 +189,8 @@ static void close_object_list(struct list_head *objects, struct i915_vma *vma; vma = i915_vma_instance(obj, vm, NULL); - if (!IS_ERR(vma)) + /* Only ppgtt vma may be closed before the object is freed */ + if (!IS_ERR(vma) && !i915_vma_is_ggtt(vma)) i915_vma_close(vma); list_del(&obj->st_link); @@ -450,12 +452,69 @@ static int igt_ppgtt_fill(void *arg) return exercise_ppgtt(arg, fill_hole); } +static int sort_holes(void *priv, struct list_head *A, struct list_head *B) +{ + struct drm_mm_node *a = list_entry(A, typeof(*a), hole_stack); + struct drm_mm_node *b = list_entry(B, typeof(*b), hole_stack); + + if (a->start < b->start) + return -1; + else + return 1; +} + +static int exercise_ggtt(struct drm_i915_private *i915, + int (*func)(struct drm_i915_private *i915, + struct i915_address_space *vm, + u64 hole_start, u64 hole_end, + unsigned long end_time)) +{ + struct i915_ggtt *ggtt = &i915->ggtt; + u64 hole_start, hole_end, last = 0; + struct drm_mm_node *node; + IGT_TIMEOUT(end_time); + int err; + + mutex_lock(&i915->drm.struct_mutex); +restart: + list_sort(NULL, &ggtt->base.mm.hole_stack, sort_holes); + drm_mm_for_each_hole(node, &ggtt->base.mm, hole_start, hole_end) { + if (hole_start < last) + continue; + + if (ggtt->base.mm.color_adjust) + ggtt->base.mm.color_adjust(node, 0, + &hole_start, &hole_end); + if (hole_start >= hole_end) + continue; + + err = func(i915, &ggtt->base, hole_start, hole_end, end_time); + if (err) + break; + + /* As we have manipulated the drm_mm, the list may be corrupt */ + last = hole_end; + goto restart; + } + mutex_unlock(&i915->drm.struct_mutex); + + return err; +} + +static int igt_ggtt_fill(void *arg) +{ + return exercise_ggtt(arg, fill_hole); +} + int i915_gem_gtt_live_selftests(struct drm_i915_private *i915) { static const struct i915_subtest tests[] = { SUBTEST(igt_ppgtt_alloc), SUBTEST(igt_ppgtt_fill), + SUBTEST(igt_ggtt_fill), }; + GEM_BUG_ON(offset_in_page(i915->ggtt.base.total)); + return i915_subtests(tests, i915); } -- cgit From 6e32ab3d47776247526ca83e434d6d0a746cfd64 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Mon, 13 Feb 2017 17:15:41 +0000 Subject: drm/i915: Fill different pages of the GTT Exercise filling different pages of the GTT v2: Walk all holes until we timeout Signed-off-by: Chris Wilson Reviewed-by: Joonas Lahtinen Link: http://patchwork.freedesktop.org/patch/msgid/20170213171558.20942-30-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/selftests/i915_gem_gtt.c | 93 +++++++++++++++++++++++++++ 1 file changed, 93 insertions(+) diff --git a/drivers/gpu/drm/i915/selftests/i915_gem_gtt.c b/drivers/gpu/drm/i915/selftests/i915_gem_gtt.c index 86b2e09d9f85..73845404f664 100644 --- a/drivers/gpu/drm/i915/selftests/i915_gem_gtt.c +++ b/drivers/gpu/drm/i915/selftests/i915_gem_gtt.c @@ -409,6 +409,87 @@ err: return err; } +static int walk_hole(struct drm_i915_private *i915, + struct i915_address_space *vm, + u64 hole_start, u64 hole_end, + unsigned long end_time) +{ + const u64 hole_size = hole_end - hole_start; + const unsigned long max_pages = + min_t(u64, ULONG_MAX - 1, hole_size >> PAGE_SHIFT); + unsigned long flags; + u64 size; + + /* Try binding a single VMA in different positions within the hole */ + + flags = PIN_OFFSET_FIXED | PIN_USER; + if (i915_is_ggtt(vm)) + flags |= PIN_GLOBAL; + + for_each_prime_number_from(size, 1, max_pages) { + struct drm_i915_gem_object *obj; + struct i915_vma *vma; + u64 addr; + int err = 0; + + obj = fake_dma_object(i915, size << PAGE_SHIFT); + if (IS_ERR(obj)) + break; + + vma = i915_vma_instance(obj, vm, NULL); + if (IS_ERR(vma)) { + err = PTR_ERR(vma); + goto err; + } + + for (addr = hole_start; + addr + obj->base.size < hole_end; + addr += obj->base.size) { + err = i915_vma_pin(vma, 0, 0, addr | flags); + if (err) { + pr_err("%s bind failed at %llx + %llx [hole %llx- %llx] with err=%d\n", + __func__, addr, vma->size, + hole_start, hole_end, err); + goto err; + } + i915_vma_unpin(vma); + + if (!drm_mm_node_allocated(&vma->node) || + i915_vma_misplaced(vma, 0, 0, addr | flags)) { + pr_err("%s incorrect at %llx + %llx\n", + __func__, addr, vma->size); + err = -EINVAL; + goto err; + } + + err = i915_vma_unbind(vma); + if (err) { + pr_err("%s unbind failed at %llx + %llx with err=%d\n", + __func__, addr, vma->size, err); + goto err; + } + + GEM_BUG_ON(drm_mm_node_allocated(&vma->node)); + + if (igt_timeout(end_time, + "%s timed out at %llx\n", + __func__, addr)) { + err = -EINTR; + goto err; + } + } + +err: + if (!i915_vma_is_ggtt(vma)) + i915_vma_close(vma); + i915_gem_object_put(obj); + if (err) + return err; + } + + return 0; +} + static int exercise_ppgtt(struct drm_i915_private *dev_priv, int (*func)(struct drm_i915_private *i915, struct i915_address_space *vm, @@ -452,6 +533,11 @@ static int igt_ppgtt_fill(void *arg) return exercise_ppgtt(arg, fill_hole); } +static int igt_ppgtt_walk(void *arg) +{ + return exercise_ppgtt(arg, walk_hole); +} + static int sort_holes(void *priv, struct list_head *A, struct list_head *B) { struct drm_mm_node *a = list_entry(A, typeof(*a), hole_stack); @@ -506,11 +592,18 @@ static int igt_ggtt_fill(void *arg) return exercise_ggtt(arg, fill_hole); } +static int igt_ggtt_walk(void *arg) +{ + return exercise_ggtt(arg, walk_hole); +} + int i915_gem_gtt_live_selftests(struct drm_i915_private *i915) { static const struct i915_subtest tests[] = { SUBTEST(igt_ppgtt_alloc), + SUBTEST(igt_ppgtt_walk), SUBTEST(igt_ppgtt_fill), + SUBTEST(igt_ggtt_walk), SUBTEST(igt_ggtt_fill), }; -- cgit From 5c3bff482ad19657d6bb07659b8a29554a909b58 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Mon, 13 Feb 2017 17:15:42 +0000 Subject: drm/i915: Exercise filling and removing random ranges from the live GTT Test the low-level i915_address_space interfaces to sanity check the live insertion/removal of address ranges. v2: Split out the low-level operations to a new test Signed-off-by: Chris Wilson Reviewed-by: Matthew Auld Link: http://patchwork.freedesktop.org/patch/msgid/20170213171558.20942-31-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/selftests/i915_gem_gtt.c | 113 ++++++++++++++++++++++++++ 1 file changed, 113 insertions(+) diff --git a/drivers/gpu/drm/i915/selftests/i915_gem_gtt.c b/drivers/gpu/drm/i915/selftests/i915_gem_gtt.c index 73845404f664..c4ed0fd9f160 100644 --- a/drivers/gpu/drm/i915/selftests/i915_gem_gtt.c +++ b/drivers/gpu/drm/i915/selftests/i915_gem_gtt.c @@ -26,6 +26,7 @@ #include #include "../i915_selftest.h" +#include "i915_random.h" #include "mock_drm.h" @@ -490,6 +491,106 @@ err: return 0; } +static int drunk_hole(struct drm_i915_private *i915, + struct i915_address_space *vm, + u64 hole_start, u64 hole_end, + unsigned long end_time) +{ + I915_RND_STATE(prng); + unsigned int size; + unsigned long flags; + + flags = PIN_OFFSET_FIXED | PIN_USER; + if (i915_is_ggtt(vm)) + flags |= PIN_GLOBAL; + + /* Keep creating larger objects until one cannot fit into the hole */ + for (size = 12; (hole_end - hole_start) >> size; size++) { + struct drm_i915_gem_object *obj; + unsigned int *order, count, n; + struct i915_vma *vma; + u64 hole_size; + int err; + + hole_size = (hole_end - hole_start) >> size; + if (hole_size > KMALLOC_MAX_SIZE / sizeof(u32)) + hole_size = KMALLOC_MAX_SIZE / sizeof(u32); + count = hole_size; + do { + count >>= 1; + order = i915_random_order(count, &prng); + } while (!order && count); + if (!order) + break; + + /* Ignore allocation failures (i.e. don't report them as + * a test failure) as we are purposefully allocating very + * large objects without checking that we have sufficient + * memory. We expect to hit -ENOMEM. + */ + + obj = fake_dma_object(i915, BIT_ULL(size)); + if (IS_ERR(obj)) { + kfree(order); + break; + } + + vma = i915_vma_instance(obj, vm, NULL); + if (IS_ERR(vma)) { + err = PTR_ERR(vma); + goto err_obj; + } + + GEM_BUG_ON(vma->size != BIT_ULL(size)); + + for (n = 0; n < count; n++) { + u64 addr = hole_start + order[n] * BIT_ULL(size); + + err = i915_vma_pin(vma, 0, 0, addr | flags); + if (err) { + pr_err("%s failed to pin object at %llx + %llx in hole [%llx - %llx], with err=%d\n", + __func__, + addr, BIT_ULL(size), + hole_start, hole_end, + err); + goto err; + } + + if (!drm_mm_node_allocated(&vma->node) || + i915_vma_misplaced(vma, 0, 0, addr | flags)) { + pr_err("%s incorrect at %llx + %llx\n", + __func__, addr, BIT_ULL(size)); + i915_vma_unpin(vma); + err = i915_vma_unbind(vma); + err = -EINVAL; + goto err; + } + + i915_vma_unpin(vma); + err = i915_vma_unbind(vma); + GEM_BUG_ON(err); + + if (igt_timeout(end_time, + "%s timed out after %d/%d\n", + __func__, n, count)) { + err = -EINTR; + goto err; + } + } + +err: + if (!i915_vma_is_ggtt(vma)) + i915_vma_close(vma); +err_obj: + i915_gem_object_put(obj); + kfree(order); + if (err) + return err; + } + + return 0; +} + static int exercise_ppgtt(struct drm_i915_private *dev_priv, int (*func)(struct drm_i915_private *i915, struct i915_address_space *vm, @@ -538,6 +639,11 @@ static int igt_ppgtt_walk(void *arg) return exercise_ppgtt(arg, walk_hole); } +static int igt_ppgtt_drunk(void *arg) +{ + return exercise_ppgtt(arg, drunk_hole); +} + static int sort_holes(void *priv, struct list_head *A, struct list_head *B) { struct drm_mm_node *a = list_entry(A, typeof(*a), hole_stack); @@ -597,12 +703,19 @@ static int igt_ggtt_walk(void *arg) return exercise_ggtt(arg, walk_hole); } +static int igt_ggtt_drunk(void *arg) +{ + return exercise_ggtt(arg, drunk_hole); +} + int i915_gem_gtt_live_selftests(struct drm_i915_private *i915) { static const struct i915_subtest tests[] = { SUBTEST(igt_ppgtt_alloc), + SUBTEST(igt_ppgtt_drunk), SUBTEST(igt_ppgtt_walk), SUBTEST(igt_ppgtt_fill), + SUBTEST(igt_ggtt_drunk), SUBTEST(igt_ggtt_walk), SUBTEST(igt_ggtt_fill), }; -- cgit From 4a6f13fce1331df68e1fc7dd4eb0c0b461d5f845 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Mon, 13 Feb 2017 17:15:43 +0000 Subject: drm/i915: Live testing of lowlevel GTT operations Directly test allocating the va range and clearing it, this bypasses the use of i915_vma_bind() and inserting the pages to focus on testing of the pagetables. Signed-off-by: Chris Wilson Reviewed-by: Matthew Auld Link: http://patchwork.freedesktop.org/patch/msgid/20170213171558.20942-32-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/selftests/i915_gem_gtt.c | 98 +++++++++++++++++++++++++++ 1 file changed, 98 insertions(+) diff --git a/drivers/gpu/drm/i915/selftests/i915_gem_gtt.c b/drivers/gpu/drm/i915/selftests/i915_gem_gtt.c index c4ed0fd9f160..d0ec74671ef8 100644 --- a/drivers/gpu/drm/i915/selftests/i915_gem_gtt.c +++ b/drivers/gpu/drm/i915/selftests/i915_gem_gtt.c @@ -181,6 +181,92 @@ err_ppgtt: return err; } +static int lowlevel_hole(struct drm_i915_private *i915, + struct i915_address_space *vm, + u64 hole_start, u64 hole_end, + unsigned long end_time) +{ + I915_RND_STATE(seed_prng); + unsigned int size; + + /* Keep creating larger objects until one cannot fit into the hole */ + for (size = 12; (hole_end - hole_start) >> size; size++) { + I915_RND_SUBSTATE(prng, seed_prng); + struct drm_i915_gem_object *obj; + unsigned int *order, count, n; + u64 hole_size; + + hole_size = (hole_end - hole_start) >> size; + if (hole_size > KMALLOC_MAX_SIZE / sizeof(u32)) + hole_size = KMALLOC_MAX_SIZE / sizeof(u32); + count = hole_size; + do { + count >>= 1; + order = i915_random_order(count, &prng); + } while (!order && count); + if (!order) + break; + + GEM_BUG_ON(count * BIT_ULL(size) > vm->total); + GEM_BUG_ON(hole_start + count * BIT_ULL(size) > hole_end); + + /* Ignore allocation failures (i.e. don't report them as + * a test failure) as we are purposefully allocating very + * large objects without checking that we have sufficient + * memory. We expect to hit -ENOMEM. + */ + + obj = fake_dma_object(i915, BIT_ULL(size)); + if (IS_ERR(obj)) { + kfree(order); + break; + } + + GEM_BUG_ON(obj->base.size != BIT_ULL(size)); + + if (i915_gem_object_pin_pages(obj)) { + i915_gem_object_put(obj); + kfree(order); + break; + } + + for (n = 0; n < count; n++) { + u64 addr = hole_start + order[n] * BIT_ULL(size); + + GEM_BUG_ON(addr + BIT_ULL(size) > vm->total); + + if (vm->allocate_va_range && + vm->allocate_va_range(vm, addr, BIT_ULL(size))) + break; + + vm->insert_entries(vm, obj->mm.pages, addr, + I915_CACHE_NONE, 0); + if (igt_timeout(end_time, + "%s timed out after %d/%d\n", + __func__, n, count)) { + hole_end = hole_start; /* quit */ + break; + } + } + count = n; + + i915_random_reorder(order, count, &prng); + for (n = 0; n < count; n++) { + u64 addr = hole_start + order[n] * BIT_ULL(size); + + GEM_BUG_ON(addr + BIT_ULL(size) > vm->total); + vm->clear_range(vm, addr, BIT_ULL(size)); + } + + i915_gem_object_unpin_pages(obj); + i915_gem_object_put(obj); + + kfree(order); + } + + return 0; +} + static void close_object_list(struct list_head *objects, struct i915_address_space *vm) { @@ -644,6 +730,11 @@ static int igt_ppgtt_drunk(void *arg) return exercise_ppgtt(arg, drunk_hole); } +static int igt_ppgtt_lowlevel(void *arg) +{ + return exercise_ppgtt(arg, lowlevel_hole); +} + static int sort_holes(void *priv, struct list_head *A, struct list_head *B) { struct drm_mm_node *a = list_entry(A, typeof(*a), hole_stack); @@ -708,13 +799,20 @@ static int igt_ggtt_drunk(void *arg) return exercise_ggtt(arg, drunk_hole); } +static int igt_ggtt_lowlevel(void *arg) +{ + return exercise_ggtt(arg, lowlevel_hole); +} + int i915_gem_gtt_live_selftests(struct drm_i915_private *i915) { static const struct i915_subtest tests[] = { SUBTEST(igt_ppgtt_alloc), + SUBTEST(igt_ppgtt_lowlevel), SUBTEST(igt_ppgtt_drunk), SUBTEST(igt_ppgtt_walk), SUBTEST(igt_ppgtt_fill), + SUBTEST(igt_ggtt_lowlevel), SUBTEST(igt_ggtt_drunk), SUBTEST(igt_ggtt_walk), SUBTEST(igt_ggtt_fill), -- cgit From aae4a3d811753d3f79c43c932b8f7d1bcbf42fb0 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Mon, 13 Feb 2017 17:15:44 +0000 Subject: drm/i915: Use fault-injection to force the shrinker to run in live GTT tests It is possible whilst allocating the page-directory tree for a ppgtt bind that the shrinker may run and reap unused parts of the tree. If the shrinker happens to remove a chunk of the tree that the allocate_va_range has already processed, we may then try to insert into the dangling tree. This test uses the fault-injection framework to force the shrinker to be invoked before we allocate new pages, i.e. new chunks of the PD tree. References: https://bugs.freedesktop.org/show_bug.cgi?id=99295 Signed-off-by: Chris Wilson Reviewed-by: Matthew Auld --- drivers/gpu/drm/i915/Kconfig.debug | 1 + drivers/gpu/drm/i915/i915_drv.h | 2 + drivers/gpu/drm/i915/i915_gem_gtt.c | 6 ++ drivers/gpu/drm/i915/i915_selftest.h | 2 + drivers/gpu/drm/i915/selftests/i915_gem_gtt.c | 98 +++++++++++++++++++++++++++ 5 files changed, 109 insertions(+) diff --git a/drivers/gpu/drm/i915/Kconfig.debug b/drivers/gpu/drm/i915/Kconfig.debug index a4d8cfd77c3c..68ff072f8b76 100644 --- a/drivers/gpu/drm/i915/Kconfig.debug +++ b/drivers/gpu/drm/i915/Kconfig.debug @@ -65,6 +65,7 @@ config DRM_I915_SELFTEST bool "Enable selftests upon driver load" depends on DRM_I915 default n + select FAULT_INJECTION select PRIME_NUMBERS help Choose this option to allow the driver to perform selftests upon diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index f7ff2df0c0c5..80b653fb3722 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -2485,6 +2485,8 @@ struct drm_i915_private { /* Used to save the pipe-to-encoder mapping for audio */ struct intel_encoder *av_enc_map[I915_MAX_PIPES]; + I915_SELFTEST_DECLARE(struct fault_attr vm_fault); + /* * NOTE: This is the dri1/ums dungeon, don't add stuff here. Your patch * will be rejected. Instead look for a better place. diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c b/drivers/gpu/drm/i915/i915_gem_gtt.c index 9daea3cd9cdc..f8ac69380a0f 100644 --- a/drivers/gpu/drm/i915/i915_gem_gtt.c +++ b/drivers/gpu/drm/i915/i915_gem_gtt.c @@ -23,6 +23,9 @@ * */ +#include /* fault-inject.h is not standalone! */ + +#include #include #include #include @@ -345,6 +348,9 @@ static int __setup_page_dma(struct drm_i915_private *dev_priv, { struct device *kdev = &dev_priv->drm.pdev->dev; + if (I915_SELFTEST_ONLY(should_fail(&dev_priv->vm_fault, 1))) + i915_gem_shrink_all(dev_priv); + p->page = alloc_page(flags); if (!p->page) return -ENOMEM; diff --git a/drivers/gpu/drm/i915/i915_selftest.h b/drivers/gpu/drm/i915/i915_selftest.h index e43777b72211..9d7d86f1733d 100644 --- a/drivers/gpu/drm/i915/i915_selftest.h +++ b/drivers/gpu/drm/i915/i915_selftest.h @@ -36,6 +36,8 @@ struct i915_selftest { }; #if IS_ENABLED(CONFIG_DRM_I915_SELFTEST) +#include + extern struct i915_selftest i915_selftest; int i915_mock_selftests(void); diff --git a/drivers/gpu/drm/i915/selftests/i915_gem_gtt.c b/drivers/gpu/drm/i915/selftests/i915_gem_gtt.c index d0ec74671ef8..2e571bbe98ae 100644 --- a/drivers/gpu/drm/i915/selftests/i915_gem_gtt.c +++ b/drivers/gpu/drm/i915/selftests/i915_gem_gtt.c @@ -271,11 +271,14 @@ static void close_object_list(struct list_head *objects, struct i915_address_space *vm) { struct drm_i915_gem_object *obj, *on; + int ignored; list_for_each_entry_safe(obj, on, objects, st_link) { struct i915_vma *vma; vma = i915_vma_instance(obj, vm, NULL); + if (!IS_ERR(vma)) + ignored = i915_vma_unbind(vma); /* Only ppgtt vma may be closed before the object is freed */ if (!IS_ERR(vma) && !i915_vma_is_ggtt(vma)) i915_vma_close(vma); @@ -677,6 +680,95 @@ err_obj: return 0; } +static int __shrink_hole(struct drm_i915_private *i915, + struct i915_address_space *vm, + u64 hole_start, u64 hole_end, + unsigned long end_time) +{ + struct drm_i915_gem_object *obj; + unsigned long flags = PIN_OFFSET_FIXED | PIN_USER; + unsigned int order = 12; + LIST_HEAD(objects); + int err = 0; + u64 addr; + + /* Keep creating larger objects until one cannot fit into the hole */ + for (addr = hole_start; addr < hole_end; ) { + struct i915_vma *vma; + u64 size = BIT_ULL(order++); + + size = min(size, hole_end - addr); + obj = fake_dma_object(i915, size); + if (IS_ERR(obj)) { + err = PTR_ERR(obj); + break; + } + + list_add(&obj->st_link, &objects); + + vma = i915_vma_instance(obj, vm, NULL); + if (IS_ERR(vma)) { + err = PTR_ERR(vma); + break; + } + + GEM_BUG_ON(vma->size != size); + + err = i915_vma_pin(vma, 0, 0, addr | flags); + if (err) { + pr_err("%s failed to pin object at %llx + %llx in hole [%llx - %llx], with err=%d\n", + __func__, addr, size, hole_start, hole_end, err); + break; + } + + if (!drm_mm_node_allocated(&vma->node) || + i915_vma_misplaced(vma, 0, 0, addr | flags)) { + pr_err("%s incorrect at %llx + %llx\n", + __func__, addr, size); + i915_vma_unpin(vma); + err = i915_vma_unbind(vma); + err = -EINVAL; + break; + } + + i915_vma_unpin(vma); + addr += size; + + if (igt_timeout(end_time, + "%s timed out at ofset %llx [%llx - %llx]\n", + __func__, addr, hole_start, hole_end)) { + err = -EINTR; + break; + } + } + + close_object_list(&objects, vm); + return err; +} + +static int shrink_hole(struct drm_i915_private *i915, + struct i915_address_space *vm, + u64 hole_start, u64 hole_end, + unsigned long end_time) +{ + unsigned long prime; + int err; + + i915->vm_fault.probability = 999; + atomic_set(&i915->vm_fault.times, -1); + + for_each_prime_number_from(prime, 0, ULONG_MAX - 1) { + i915->vm_fault.interval = prime; + err = __shrink_hole(i915, vm, hole_start, hole_end, end_time); + if (err) + break; + } + + memset(&i915->vm_fault, 0, sizeof(i915->vm_fault)); + + return err; +} + static int exercise_ppgtt(struct drm_i915_private *dev_priv, int (*func)(struct drm_i915_private *i915, struct i915_address_space *vm, @@ -735,6 +827,11 @@ static int igt_ppgtt_lowlevel(void *arg) return exercise_ppgtt(arg, lowlevel_hole); } +static int igt_ppgtt_shrink(void *arg) +{ + return exercise_ppgtt(arg, shrink_hole); +} + static int sort_holes(void *priv, struct list_head *A, struct list_head *B) { struct drm_mm_node *a = list_entry(A, typeof(*a), hole_stack); @@ -812,6 +909,7 @@ int i915_gem_gtt_live_selftests(struct drm_i915_private *i915) SUBTEST(igt_ppgtt_drunk), SUBTEST(igt_ppgtt_walk), SUBTEST(igt_ppgtt_fill), + SUBTEST(igt_ppgtt_shrink), SUBTEST(igt_ggtt_lowlevel), SUBTEST(igt_ggtt_drunk), SUBTEST(igt_ggtt_walk), -- cgit From e3c7a1c5377e54f99689a18cb85ae7f3b633b58a Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Mon, 13 Feb 2017 17:15:45 +0000 Subject: drm/i915: Test creation of VMA Simple test to exercise creation and lookup of VMA within an object. Signed-off-by: Chris Wilson Reviewed-by: Joonas Lahtinen Link: http://patchwork.freedesktop.org/patch/msgid/20170213171558.20942-34-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/i915_vma.c | 3 + .../gpu/drm/i915/selftests/i915_mock_selftests.h | 1 + drivers/gpu/drm/i915/selftests/i915_vma.c | 224 +++++++++++++++++++++ 3 files changed, 228 insertions(+) create mode 100644 drivers/gpu/drm/i915/selftests/i915_vma.c diff --git a/drivers/gpu/drm/i915/i915_vma.c b/drivers/gpu/drm/i915/i915_vma.c index e75494c1ef52..e6c565fd6e61 100644 --- a/drivers/gpu/drm/i915/i915_vma.c +++ b/drivers/gpu/drm/i915/i915_vma.c @@ -690,3 +690,6 @@ destroy: return 0; } +#if IS_ENABLED(CONFIG_DRM_I915_SELFTEST) +#include "selftests/i915_vma.c" +#endif diff --git a/drivers/gpu/drm/i915/selftests/i915_mock_selftests.h b/drivers/gpu/drm/i915/selftests/i915_mock_selftests.h index 955a4d6ccdaf..b450eab7e6e1 100644 --- a/drivers/gpu/drm/i915/selftests/i915_mock_selftests.h +++ b/drivers/gpu/drm/i915/selftests/i915_mock_selftests.h @@ -15,3 +15,4 @@ selftest(breadcrumbs, intel_breadcrumbs_mock_selftests) selftest(requests, i915_gem_request_mock_selftests) selftest(objects, i915_gem_object_mock_selftests) selftest(dmabuf, i915_gem_dmabuf_mock_selftests) +selftest(vma, i915_vma_mock_selftests) diff --git a/drivers/gpu/drm/i915/selftests/i915_vma.c b/drivers/gpu/drm/i915/selftests/i915_vma.c new file mode 100644 index 000000000000..eb794fcc90ca --- /dev/null +++ b/drivers/gpu/drm/i915/selftests/i915_vma.c @@ -0,0 +1,224 @@ +/* + * Copyright © 2016 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + * + */ + +#include + +#include "../i915_selftest.h" + +#include "mock_gem_device.h" +#include "mock_context.h" + +static bool assert_vma(struct i915_vma *vma, + struct drm_i915_gem_object *obj, + struct i915_gem_context *ctx) +{ + bool ok = true; + + if (vma->vm != &ctx->ppgtt->base) { + pr_err("VMA created with wrong VM\n"); + ok = false; + } + + if (vma->size != obj->base.size) { + pr_err("VMA created with wrong size, found %llu, expected %zu\n", + vma->size, obj->base.size); + ok = false; + } + + if (vma->ggtt_view.type != I915_GGTT_VIEW_NORMAL) { + pr_err("VMA created with wrong type [%d]\n", + vma->ggtt_view.type); + ok = false; + } + + return ok; +} + +static struct i915_vma * +checked_vma_instance(struct drm_i915_gem_object *obj, + struct i915_address_space *vm, + struct i915_ggtt_view *view) +{ + struct i915_vma *vma; + bool ok = true; + + vma = i915_vma_instance(obj, vm, view); + if (IS_ERR(vma)) + return vma; + + /* Manual checks, will be reinforced by i915_vma_compare! */ + if (vma->vm != vm) { + pr_err("VMA's vm [%p] does not match request [%p]\n", + vma->vm, vm); + ok = false; + } + + if (i915_is_ggtt(vm) != i915_vma_is_ggtt(vma)) { + pr_err("VMA ggtt status [%d] does not match parent [%d]\n", + i915_vma_is_ggtt(vma), i915_is_ggtt(vm)); + ok = false; + } + + if (i915_vma_compare(vma, vm, view)) { + pr_err("i915_vma_compare failed with create parmaters!\n"); + return ERR_PTR(-EINVAL); + } + + if (i915_vma_compare(vma, vma->vm, + i915_vma_is_ggtt(vma) ? &vma->ggtt_view : NULL)) { + pr_err("i915_vma_compare failed with itself\n"); + return ERR_PTR(-EINVAL); + } + + if (!ok) { + pr_err("i915_vma_compare failed to detect the difference!\n"); + return ERR_PTR(-EINVAL); + } + + return vma; +} + +static int create_vmas(struct drm_i915_private *i915, + struct list_head *objects, + struct list_head *contexts) +{ + struct drm_i915_gem_object *obj; + struct i915_gem_context *ctx; + int pinned; + + list_for_each_entry(obj, objects, st_link) { + for (pinned = 0; pinned <= 1; pinned++) { + list_for_each_entry(ctx, contexts, link) { + struct i915_address_space *vm = + &ctx->ppgtt->base; + struct i915_vma *vma; + int err; + + vma = checked_vma_instance(obj, vm, NULL); + if (IS_ERR(vma)) + return PTR_ERR(vma); + + if (!assert_vma(vma, obj, ctx)) { + pr_err("VMA lookup/create failed\n"); + return -EINVAL; + } + + if (!pinned) { + err = i915_vma_pin(vma, 0, 0, PIN_USER); + if (err) { + pr_err("Failed to pin VMA\n"); + return err; + } + } else { + i915_vma_unpin(vma); + } + } + } + } + + return 0; +} + +static int igt_vma_create(void *arg) +{ + struct drm_i915_private *i915 = arg; + struct drm_i915_gem_object *obj, *on; + struct i915_gem_context *ctx, *cn; + unsigned long num_obj, num_ctx; + unsigned long no, nc; + IGT_TIMEOUT(end_time); + LIST_HEAD(contexts); + LIST_HEAD(objects); + int err; + + /* Exercise creating many vma amonst many objections, checking the + * vma creation and lookup routines. + */ + + no = 0; + for_each_prime_number(num_obj, ULONG_MAX - 1) { + for (; no < num_obj; no++) { + obj = i915_gem_object_create_internal(i915, PAGE_SIZE); + if (IS_ERR(obj)) + goto out; + + list_add(&obj->st_link, &objects); + } + + nc = 0; + for_each_prime_number(num_ctx, MAX_CONTEXT_HW_ID) { + for (; nc < num_ctx; nc++) { + ctx = mock_context(i915, "mock"); + if (!ctx) + goto out; + + list_move(&ctx->link, &contexts); + } + + err = create_vmas(i915, &objects, &contexts); + if (err) + goto out; + + if (igt_timeout(end_time, + "%s timed out: after %lu objects in %lu contexts\n", + __func__, no, nc)) + goto end; + } + + list_for_each_entry_safe(ctx, cn, &contexts, link) + mock_context_close(ctx); + } + +end: + /* Final pass to lookup all created contexts */ + err = create_vmas(i915, &objects, &contexts); +out: + list_for_each_entry_safe(ctx, cn, &contexts, link) + mock_context_close(ctx); + + list_for_each_entry_safe(obj, on, &objects, st_link) + i915_gem_object_put(obj); + return err; +} + +int i915_vma_mock_selftests(void) +{ + static const struct i915_subtest tests[] = { + SUBTEST(igt_vma_create), + }; + struct drm_i915_private *i915; + int err; + + i915 = mock_gem_device(); + if (!i915) + return -ENOMEM; + + mutex_lock(&i915->drm.struct_mutex); + err = i915_subtests(tests, i915); + mutex_unlock(&i915->drm.struct_mutex); + + drm_dev_unref(&i915->drm); + return err; +} + -- cgit From 782a3e9ef2051f2bec7c4fb255aa6a8f7c530da8 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Mon, 13 Feb 2017 17:15:46 +0000 Subject: drm/i915: Exercise i915_vma_pin/i915_vma_insert High-level testing of the struct drm_mm by verifying our handling of weird requests to i915_vma_pin. Signed-off-by: Chris Wilson Reviewed-by: Joonas Lahtinen Link: http://patchwork.freedesktop.org/patch/msgid/20170213171558.20942-35-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/i915_vma.c | 4 +- drivers/gpu/drm/i915/i915_vma.h | 4 +- drivers/gpu/drm/i915/selftests/i915_vma.c | 151 ++++++++++++++++++++++++++++++ 3 files changed, 155 insertions(+), 4 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_vma.c b/drivers/gpu/drm/i915/i915_vma.c index e6c565fd6e61..0dc994b76924 100644 --- a/drivers/gpu/drm/i915/i915_vma.c +++ b/drivers/gpu/drm/i915/i915_vma.c @@ -327,8 +327,8 @@ void i915_vma_unpin_and_release(struct i915_vma **p_vma) __i915_gem_object_release_unless_active(obj); } -bool -i915_vma_misplaced(struct i915_vma *vma, u64 size, u64 alignment, u64 flags) +bool i915_vma_misplaced(const struct i915_vma *vma, + u64 size, u64 alignment, u64 flags) { if (!drm_mm_node_allocated(&vma->node)) return false; diff --git a/drivers/gpu/drm/i915/i915_vma.h b/drivers/gpu/drm/i915/i915_vma.h index e39d922cfb6f..2e03f81dddbe 100644 --- a/drivers/gpu/drm/i915/i915_vma.h +++ b/drivers/gpu/drm/i915/i915_vma.h @@ -228,8 +228,8 @@ i915_vma_compare(struct i915_vma *vma, int i915_vma_bind(struct i915_vma *vma, enum i915_cache_level cache_level, u32 flags); bool i915_gem_valid_gtt_space(struct i915_vma *vma, unsigned long cache_level); -bool -i915_vma_misplaced(struct i915_vma *vma, u64 size, u64 alignment, u64 flags); +bool i915_vma_misplaced(const struct i915_vma *vma, + u64 size, u64 alignment, u64 flags); void __i915_vma_set_map_and_fenceable(struct i915_vma *vma); int __must_check i915_vma_unbind(struct i915_vma *vma); void i915_vma_close(struct i915_vma *vma); diff --git a/drivers/gpu/drm/i915/selftests/i915_vma.c b/drivers/gpu/drm/i915/selftests/i915_vma.c index eb794fcc90ca..111158f7daeb 100644 --- a/drivers/gpu/drm/i915/selftests/i915_vma.c +++ b/drivers/gpu/drm/i915/selftests/i915_vma.c @@ -202,10 +202,161 @@ out: return err; } +struct pin_mode { + u64 size; + u64 flags; + bool (*assert)(const struct i915_vma *, + const struct pin_mode *mode, + int result); + const char *string; +}; + +static bool assert_pin_valid(const struct i915_vma *vma, + const struct pin_mode *mode, + int result) +{ + if (result) + return false; + + if (i915_vma_misplaced(vma, mode->size, 0, mode->flags)) + return false; + + return true; +} + +__maybe_unused +static bool assert_pin_e2big(const struct i915_vma *vma, + const struct pin_mode *mode, + int result) +{ + return result == -E2BIG; +} + +__maybe_unused +static bool assert_pin_enospc(const struct i915_vma *vma, + const struct pin_mode *mode, + int result) +{ + return result == -ENOSPC; +} + +__maybe_unused +static bool assert_pin_einval(const struct i915_vma *vma, + const struct pin_mode *mode, + int result) +{ + return result == -EINVAL; +} + +static int igt_vma_pin1(void *arg) +{ + struct drm_i915_private *i915 = arg; + const struct pin_mode modes[] = { +#define VALID(sz, fl) { .size = (sz), .flags = (fl), .assert = assert_pin_valid, .string = #sz ", " #fl ", (valid) " } +#define __INVALID(sz, fl, check, eval) { .size = (sz), .flags = (fl), .assert = (check), .string = #sz ", " #fl ", (invalid " #eval ")" } +#define INVALID(sz, fl) __INVALID(sz, fl, assert_pin_einval, EINVAL) +#define TOOBIG(sz, fl) __INVALID(sz, fl, assert_pin_e2big, E2BIG) +#define NOSPACE(sz, fl) __INVALID(sz, fl, assert_pin_enospc, ENOSPC) + VALID(0, PIN_GLOBAL), + VALID(0, PIN_GLOBAL | PIN_MAPPABLE), + + VALID(0, PIN_GLOBAL | PIN_OFFSET_BIAS | 4096), + VALID(0, PIN_GLOBAL | PIN_OFFSET_BIAS | 8192), + VALID(0, PIN_GLOBAL | PIN_OFFSET_BIAS | (i915->ggtt.mappable_end - 4096)), + VALID(0, PIN_GLOBAL | PIN_MAPPABLE | PIN_OFFSET_BIAS | (i915->ggtt.mappable_end - 4096)), + VALID(0, PIN_GLOBAL | PIN_OFFSET_BIAS | (i915->ggtt.base.total - 4096)), + + VALID(0, PIN_GLOBAL | PIN_MAPPABLE | PIN_OFFSET_FIXED | (i915->ggtt.mappable_end - 4096)), + INVALID(0, PIN_GLOBAL | PIN_MAPPABLE | PIN_OFFSET_FIXED | i915->ggtt.mappable_end), + VALID(0, PIN_GLOBAL | PIN_OFFSET_FIXED | (i915->ggtt.base.total - 4096)), + INVALID(0, PIN_GLOBAL | PIN_OFFSET_FIXED | i915->ggtt.base.total), + INVALID(0, PIN_GLOBAL | PIN_OFFSET_FIXED | round_down(U64_MAX, PAGE_SIZE)), + + VALID(4096, PIN_GLOBAL), + VALID(8192, PIN_GLOBAL), + VALID(i915->ggtt.mappable_end - 4096, PIN_GLOBAL | PIN_MAPPABLE), + VALID(i915->ggtt.mappable_end, PIN_GLOBAL | PIN_MAPPABLE), + TOOBIG(i915->ggtt.mappable_end + 4096, PIN_GLOBAL | PIN_MAPPABLE), + VALID(i915->ggtt.base.total - 4096, PIN_GLOBAL), + VALID(i915->ggtt.base.total, PIN_GLOBAL), + TOOBIG(i915->ggtt.base.total + 4096, PIN_GLOBAL), + TOOBIG(round_down(U64_MAX, PAGE_SIZE), PIN_GLOBAL), + INVALID(8192, PIN_GLOBAL | PIN_MAPPABLE | PIN_OFFSET_FIXED | (i915->ggtt.mappable_end - 4096)), + INVALID(8192, PIN_GLOBAL | PIN_OFFSET_FIXED | (i915->ggtt.base.total - 4096)), + INVALID(8192, PIN_GLOBAL | PIN_OFFSET_FIXED | (round_down(U64_MAX, PAGE_SIZE) - 4096)), + + VALID(8192, PIN_GLOBAL | PIN_OFFSET_BIAS | (i915->ggtt.mappable_end - 4096)), + +#if !IS_ENABLED(CONFIG_DRM_I915_DEBUG_GEM) + /* Misusing BIAS is a programming error (it is not controllable + * from userspace) so when debugging is enabled, it explodes. + * However, the tests are still quite interesting for checking + * variable start, end and size. + */ + NOSPACE(0, PIN_GLOBAL | PIN_MAPPABLE | PIN_OFFSET_BIAS | i915->ggtt.mappable_end), + NOSPACE(0, PIN_GLOBAL | PIN_OFFSET_BIAS | i915->ggtt.base.total), + NOSPACE(8192, PIN_GLOBAL | PIN_MAPPABLE | PIN_OFFSET_BIAS | (i915->ggtt.mappable_end - 4096)), + NOSPACE(8192, PIN_GLOBAL | PIN_OFFSET_BIAS | (i915->ggtt.base.total - 4096)), +#endif + { }, +#undef NOSPACE +#undef TOOBIG +#undef INVALID +#undef __INVALID +#undef VALID + }, *m; + struct drm_i915_gem_object *obj; + struct i915_vma *vma; + int err = -EINVAL; + + /* Exercise all the weird and wonderful i915_vma_pin requests, + * focusing on error handling of boundary conditions. + */ + + GEM_BUG_ON(!drm_mm_clean(&i915->ggtt.base.mm)); + + obj = i915_gem_object_create_internal(i915, PAGE_SIZE); + if (IS_ERR(obj)) + return PTR_ERR(obj); + + vma = checked_vma_instance(obj, &i915->ggtt.base, NULL); + if (IS_ERR(vma)) + goto out; + + for (m = modes; m->assert; m++) { + err = i915_vma_pin(vma, m->size, 0, m->flags); + if (!m->assert(vma, m, err)) { + pr_err("%s to pin single page into GGTT with mode[%d:%s]: size=%llx flags=%llx, err=%d\n", + m->assert == assert_pin_valid ? "Failed" : "Unexpectedly succeeded", + (int)(m - modes), m->string, m->size, m->flags, + err); + if (!err) + i915_vma_unpin(vma); + err = -EINVAL; + goto out; + } + + if (!err) { + i915_vma_unpin(vma); + err = i915_vma_unbind(vma); + if (err) { + pr_err("Failed to unbind single page from GGTT, err=%d\n", err); + goto out; + } + } + } + + err = 0; +out: + i915_gem_object_put(obj); + return err; +} + int i915_vma_mock_selftests(void) { static const struct i915_subtest tests[] = { SUBTEST(igt_vma_create), + SUBTEST(igt_vma_pin1), }; struct drm_i915_private *i915; int err; -- cgit From a231bf643dcc5a221afbc52093e73f23d50d96ff Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Mon, 13 Feb 2017 17:15:47 +0000 Subject: drm/i915: Verify page layout for rotated VMA Exercise creating rotated VMA and checking the page order within. v2..v3: Be more creative in rotated params Signed-off-by: Chris Wilson Reviewed-by: Tvrtko Ursulin Link: http://patchwork.freedesktop.org/patch/msgid/20170213171558.20942-36-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/selftests/i915_vma.c | 179 ++++++++++++++++++++++++++++++ 1 file changed, 179 insertions(+) diff --git a/drivers/gpu/drm/i915/selftests/i915_vma.c b/drivers/gpu/drm/i915/selftests/i915_vma.c index 111158f7daeb..c8869fc217fd 100644 --- a/drivers/gpu/drm/i915/selftests/i915_vma.c +++ b/drivers/gpu/drm/i915/selftests/i915_vma.c @@ -352,11 +352,190 @@ out: return err; } +static unsigned long rotated_index(const struct intel_rotation_info *r, + unsigned int n, + unsigned int x, + unsigned int y) +{ + return (r->plane[n].stride * (r->plane[n].height - y - 1) + + r->plane[n].offset + x); +} + +static struct scatterlist * +assert_rotated(struct drm_i915_gem_object *obj, + const struct intel_rotation_info *r, unsigned int n, + struct scatterlist *sg) +{ + unsigned int x, y; + + for (x = 0; x < r->plane[n].width; x++) { + for (y = 0; y < r->plane[n].height; y++) { + unsigned long src_idx; + dma_addr_t src; + + if (!sg) { + pr_err("Invalid sg table: too short at plane %d, (%d, %d)!\n", + n, x, y); + return ERR_PTR(-EINVAL); + } + + src_idx = rotated_index(r, n, x, y); + src = i915_gem_object_get_dma_address(obj, src_idx); + + if (sg_dma_len(sg) != PAGE_SIZE) { + pr_err("Invalid sg.length, found %d, expected %lu for rotated page (%d, %d) [src index %lu]\n", + sg_dma_len(sg), PAGE_SIZE, + x, y, src_idx); + return ERR_PTR(-EINVAL); + } + + if (sg_dma_address(sg) != src) { + pr_err("Invalid address for rotated page (%d, %d) [src index %lu]\n", + x, y, src_idx); + return ERR_PTR(-EINVAL); + } + + sg = sg_next(sg); + } + } + + return sg; +} + +static unsigned int rotated_size(const struct intel_rotation_plane_info *a, + const struct intel_rotation_plane_info *b) +{ + return a->width * a->height + b->width * b->height; +} + +static int igt_vma_rotate(void *arg) +{ + struct drm_i915_private *i915 = arg; + struct i915_address_space *vm = &i915->ggtt.base; + struct drm_i915_gem_object *obj; + const struct intel_rotation_plane_info planes[] = { + { .width = 1, .height = 1, .stride = 1 }, + { .width = 2, .height = 2, .stride = 2 }, + { .width = 4, .height = 4, .stride = 4 }, + { .width = 8, .height = 8, .stride = 8 }, + + { .width = 3, .height = 5, .stride = 3 }, + { .width = 3, .height = 5, .stride = 4 }, + { .width = 3, .height = 5, .stride = 5 }, + + { .width = 5, .height = 3, .stride = 5 }, + { .width = 5, .height = 3, .stride = 7 }, + { .width = 5, .height = 3, .stride = 9 }, + + { .width = 4, .height = 6, .stride = 6 }, + { .width = 6, .height = 4, .stride = 6 }, + { } + }, *a, *b; + const unsigned int max_pages = 64; + int err = -ENOMEM; + + /* Create VMA for many different combinations of planes and check + * that the page layout within the rotated VMA match our expectations. + */ + + obj = i915_gem_object_create_internal(i915, max_pages * PAGE_SIZE); + if (IS_ERR(obj)) + goto out; + + for (a = planes; a->width; a++) { + for (b = planes + ARRAY_SIZE(planes); b-- != planes; ) { + struct i915_ggtt_view view; + unsigned int n, max_offset; + + max_offset = max(a->stride * a->height, + b->stride * b->height); + GEM_BUG_ON(max_offset > max_pages); + max_offset = max_pages - max_offset; + + view.type = I915_GGTT_VIEW_ROTATED; + view.rotated.plane[0] = *a; + view.rotated.plane[1] = *b; + + for_each_prime_number_from(view.rotated.plane[0].offset, 0, max_offset) { + for_each_prime_number_from(view.rotated.plane[1].offset, 0, max_offset) { + struct scatterlist *sg; + struct i915_vma *vma; + + vma = checked_vma_instance(obj, vm, &view); + if (IS_ERR(vma)) { + err = PTR_ERR(vma); + goto out_object; + } + + err = i915_vma_pin(vma, 0, 0, PIN_GLOBAL); + if (err) { + pr_err("Failed to pin VMA, err=%d\n", err); + goto out_object; + } + + if (vma->size != rotated_size(a, b) * PAGE_SIZE) { + pr_err("VMA is wrong size, expected %lu, found %llu\n", + PAGE_SIZE * rotated_size(a, b), vma->size); + err = -EINVAL; + goto out_object; + } + + if (vma->pages->nents != rotated_size(a, b)) { + pr_err("sg table is wrong sizeo, expected %u, found %u nents\n", + rotated_size(a, b), vma->pages->nents); + err = -EINVAL; + goto out_object; + } + + if (vma->node.size < vma->size) { + pr_err("VMA binding too small, expected %llu, found %llu\n", + vma->size, vma->node.size); + err = -EINVAL; + goto out_object; + } + + if (vma->pages == obj->mm.pages) { + pr_err("VMA using unrotated object pages!\n"); + err = -EINVAL; + goto out_object; + } + + sg = vma->pages->sgl; + for (n = 0; n < ARRAY_SIZE(view.rotated.plane); n++) { + sg = assert_rotated(obj, &view.rotated, n, sg); + if (IS_ERR(sg)) { + pr_err("Inconsistent VMA pages for plane %d: [(%d, %d, %d, %d), (%d, %d, %d, %d)]\n", n, + view.rotated.plane[0].width, + view.rotated.plane[0].height, + view.rotated.plane[0].stride, + view.rotated.plane[0].offset, + view.rotated.plane[1].width, + view.rotated.plane[1].height, + view.rotated.plane[1].stride, + view.rotated.plane[1].offset); + err = -EINVAL; + goto out_object; + } + } + + i915_vma_unpin(vma); + } + } + } + } + +out_object: + i915_gem_object_put(obj); +out: + return err; +} + int i915_vma_mock_selftests(void) { static const struct i915_subtest tests[] = { SUBTEST(igt_vma_create), SUBTEST(igt_vma_pin1), + SUBTEST(igt_vma_rotate), }; struct drm_i915_private *i915; int err; -- cgit From af1f83a152b5cf6a9a780b7204805cbe0ff905da Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Mon, 13 Feb 2017 17:15:48 +0000 Subject: drm/i915: Test creation of partial VMA Mock testing to ensure we can create and lookup partial VMA. v2: Named phases Signed-off-by: Chris Wilson Reviewed-by: Joonas Lahtinen Link: http://patchwork.freedesktop.org/patch/msgid/20170213171558.20942-37-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/selftests/i915_vma.c | 192 ++++++++++++++++++++++++++++++ 1 file changed, 192 insertions(+) diff --git a/drivers/gpu/drm/i915/selftests/i915_vma.c b/drivers/gpu/drm/i915/selftests/i915_vma.c index c8869fc217fd..ad56566e24db 100644 --- a/drivers/gpu/drm/i915/selftests/i915_vma.c +++ b/drivers/gpu/drm/i915/selftests/i915_vma.c @@ -530,12 +530,204 @@ out: return err; } +static bool assert_partial(struct drm_i915_gem_object *obj, + struct i915_vma *vma, + unsigned long offset, + unsigned long size) +{ + struct sgt_iter sgt; + dma_addr_t dma; + + for_each_sgt_dma(dma, sgt, vma->pages) { + dma_addr_t src; + + if (!size) { + pr_err("Partial scattergather list too long\n"); + return false; + } + + src = i915_gem_object_get_dma_address(obj, offset); + if (src != dma) { + pr_err("DMA mismatch for partial page offset %lu\n", + offset); + return false; + } + + offset++; + size--; + } + + return true; +} + +static bool assert_pin(struct i915_vma *vma, + struct i915_ggtt_view *view, + u64 size, + const char *name) +{ + bool ok = true; + + if (vma->size != size) { + pr_err("(%s) VMA is wrong size, expected %llu, found %llu\n", + name, size, vma->size); + ok = false; + } + + if (vma->node.size < vma->size) { + pr_err("(%s) VMA binding too small, expected %llu, found %llu\n", + name, vma->size, vma->node.size); + ok = false; + } + + if (view && view->type != I915_GGTT_VIEW_NORMAL) { + if (memcmp(&vma->ggtt_view, view, sizeof(*view))) { + pr_err("(%s) VMA mismatch upon creation!\n", + name); + ok = false; + } + + if (vma->pages == vma->obj->mm.pages) { + pr_err("(%s) VMA using original object pages!\n", + name); + ok = false; + } + } else { + if (vma->ggtt_view.type != I915_GGTT_VIEW_NORMAL) { + pr_err("Not the normal ggtt view! Found %d\n", + vma->ggtt_view.type); + ok = false; + } + + if (vma->pages != vma->obj->mm.pages) { + pr_err("VMA not using object pages!\n"); + ok = false; + } + } + + return ok; +} + +static int igt_vma_partial(void *arg) +{ + struct drm_i915_private *i915 = arg; + struct i915_address_space *vm = &i915->ggtt.base; + const unsigned int npages = 1021; /* prime! */ + struct drm_i915_gem_object *obj; + const struct phase { + const char *name; + } phases[] = { + { "create" }, + { "lookup" }, + { }, + }, *p; + unsigned int sz, offset; + struct i915_vma *vma; + int err = -ENOMEM; + + /* Create lots of different VMA for the object and check that + * we are returned the same VMA when we later request the same range. + */ + + obj = i915_gem_object_create_internal(i915, npages*PAGE_SIZE); + if (IS_ERR(obj)) + goto out; + + for (p = phases; p->name; p++) { /* exercise both create/lookup */ + unsigned int count, nvma; + + nvma = 0; + for_each_prime_number_from(sz, 1, npages) { + for_each_prime_number_from(offset, 0, npages - sz) { + struct i915_ggtt_view view; + + view.type = I915_GGTT_VIEW_PARTIAL; + view.partial.offset = offset; + view.partial.size = sz; + + if (sz == npages) + view.type = I915_GGTT_VIEW_NORMAL; + + vma = checked_vma_instance(obj, vm, &view); + if (IS_ERR(vma)) { + err = PTR_ERR(vma); + goto out_object; + } + + err = i915_vma_pin(vma, 0, 0, PIN_GLOBAL); + if (err) + goto out_object; + + if (!assert_pin(vma, &view, sz*PAGE_SIZE, p->name)) { + pr_err("(%s) Inconsistent partial pinning for (offset=%d, size=%d)\n", + p->name, offset, sz); + err = -EINVAL; + goto out_object; + } + + if (!assert_partial(obj, vma, offset, sz)) { + pr_err("(%s) Inconsistent partial pages for (offset=%d, size=%d)\n", + p->name, offset, sz); + err = -EINVAL; + goto out_object; + } + + i915_vma_unpin(vma); + nvma++; + } + } + + count = 0; + list_for_each_entry(vma, &obj->vma_list, obj_link) + count++; + if (count != nvma) { + pr_err("(%s) All partial vma were not recorded on the obj->vma_list: found %u, expected %u\n", + p->name, count, nvma); + err = -EINVAL; + goto out_object; + } + + /* Check that we did create the whole object mapping */ + vma = checked_vma_instance(obj, vm, NULL); + if (IS_ERR(vma)) { + err = PTR_ERR(vma); + goto out_object; + } + + err = i915_vma_pin(vma, 0, 0, PIN_GLOBAL); + if (err) + goto out_object; + + if (!assert_pin(vma, NULL, obj->base.size, p->name)) { + pr_err("(%s) inconsistent full pin\n", p->name); + err = -EINVAL; + goto out_object; + } + + i915_vma_unpin(vma); + + count = 0; + list_for_each_entry(vma, &obj->vma_list, obj_link) + count++; + if (count != nvma) { + pr_err("(%s) allocated an extra full vma!\n", p->name); + err = -EINVAL; + goto out_object; + } + } + +out_object: + i915_gem_object_put(obj); +out: + return err; +} + int i915_vma_mock_selftests(void) { static const struct i915_subtest tests[] = { SUBTEST(igt_vma_create), SUBTEST(igt_vma_pin1), SUBTEST(igt_vma_rotate), + SUBTEST(igt_vma_partial), }; struct drm_i915_private *i915; int err; -- cgit From 791ff39ae32a34816cdca40891f142f4122ad409 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Mon, 13 Feb 2017 17:15:49 +0000 Subject: drm/i915: Live testing for context execution Check we can create and execution within a context. v2: Write one set of dwords through each context/engine to exercise more contexts within the same time period. Signed-off-by: Chris Wilson Reviewed-by: Joonas Lahtinen Link: http://patchwork.freedesktop.org/patch/msgid/20170213171558.20942-38-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/i915_gem_context.c | 1 + drivers/gpu/drm/i915/selftests/i915_gem_context.c | 404 +++++++++++++++++++++ .../gpu/drm/i915/selftests/i915_live_selftests.h | 1 + 3 files changed, 406 insertions(+) create mode 100644 drivers/gpu/drm/i915/selftests/i915_gem_context.c diff --git a/drivers/gpu/drm/i915/i915_gem_context.c b/drivers/gpu/drm/i915/i915_gem_context.c index d27c4050b4c5..c73bf02870d5 100644 --- a/drivers/gpu/drm/i915/i915_gem_context.c +++ b/drivers/gpu/drm/i915/i915_gem_context.c @@ -1202,4 +1202,5 @@ int i915_gem_context_reset_stats_ioctl(struct drm_device *dev, #if IS_ENABLED(CONFIG_DRM_I915_SELFTEST) #include "selftests/mock_context.c" +#include "selftests/i915_gem_context.c" #endif diff --git a/drivers/gpu/drm/i915/selftests/i915_gem_context.c b/drivers/gpu/drm/i915/selftests/i915_gem_context.c new file mode 100644 index 000000000000..4cfdc2f0eda7 --- /dev/null +++ b/drivers/gpu/drm/i915/selftests/i915_gem_context.c @@ -0,0 +1,404 @@ +/* + * Copyright © 2017 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + * + */ + +#include "../i915_selftest.h" + +#include "mock_drm.h" +#include "huge_gem_object.h" + +#define DW_PER_PAGE (PAGE_SIZE / sizeof(u32)) + +static struct i915_vma * +gpu_fill_dw(struct i915_vma *vma, u64 offset, unsigned long count, u32 value) +{ + struct drm_i915_gem_object *obj; + const int gen = INTEL_GEN(vma->vm->i915); + unsigned long n, size; + u32 *cmd; + int err; + + GEM_BUG_ON(!igt_can_mi_store_dword_imm(vma->vm->i915)); + + size = (4 * count + 1) * sizeof(u32); + size = round_up(size, PAGE_SIZE); + obj = i915_gem_object_create_internal(vma->vm->i915, size); + if (IS_ERR(obj)) + return ERR_CAST(obj); + + cmd = i915_gem_object_pin_map(obj, I915_MAP_WB); + if (IS_ERR(cmd)) { + err = PTR_ERR(cmd); + goto err; + } + + GEM_BUG_ON(offset + (count - 1) * PAGE_SIZE > vma->node.size); + offset += vma->node.start; + + for (n = 0; n < count; n++) { + if (gen >= 8) { + *cmd++ = MI_STORE_DWORD_IMM_GEN4; + *cmd++ = lower_32_bits(offset); + *cmd++ = upper_32_bits(offset); + *cmd++ = value; + } else if (gen >= 4) { + *cmd++ = MI_STORE_DWORD_IMM_GEN4 | + (gen < 6 ? 1 << 22 : 0); + *cmd++ = 0; + *cmd++ = offset; + *cmd++ = value; + } else { + *cmd++ = MI_STORE_DWORD_IMM | 1 << 22; + *cmd++ = offset; + *cmd++ = value; + } + offset += PAGE_SIZE; + } + *cmd = MI_BATCH_BUFFER_END; + i915_gem_object_unpin_map(obj); + + err = i915_gem_object_set_to_gtt_domain(obj, false); + if (err) + goto err; + + vma = i915_vma_instance(obj, vma->vm, NULL); + if (IS_ERR(vma)) { + err = PTR_ERR(vma); + goto err; + } + + err = i915_vma_pin(vma, 0, 0, PIN_USER); + if (err) + goto err; + + return vma; + +err: + i915_gem_object_put(obj); + return ERR_PTR(err); +} + +static unsigned long real_page_count(struct drm_i915_gem_object *obj) +{ + return huge_gem_object_phys_size(obj) >> PAGE_SHIFT; +} + +static unsigned long fake_page_count(struct drm_i915_gem_object *obj) +{ + return huge_gem_object_dma_size(obj) >> PAGE_SHIFT; +} + +static int gpu_fill(struct drm_i915_gem_object *obj, + struct i915_gem_context *ctx, + struct intel_engine_cs *engine, + unsigned int dw) +{ + struct drm_i915_private *i915 = to_i915(obj->base.dev); + struct i915_address_space *vm = + ctx->ppgtt ? &ctx->ppgtt->base : &i915->ggtt.base; + struct drm_i915_gem_request *rq; + struct i915_vma *vma; + struct i915_vma *batch; + unsigned int flags; + int err; + + GEM_BUG_ON(obj->base.size > vm->total); + + vma = i915_vma_instance(obj, vm, NULL); + if (IS_ERR(vma)) + return PTR_ERR(vma); + + err = i915_gem_object_set_to_gtt_domain(obj, false); + if (err) + return err; + + err = i915_vma_pin(vma, 0, 0, PIN_HIGH | PIN_USER); + if (err) + return err; + + /* Within the GTT the huge objects maps every page onto + * its 1024 real pages (using phys_pfn = dma_pfn % 1024). + * We set the nth dword within the page using the nth + * mapping via the GTT - this should exercise the GTT mapping + * whilst checking that each context provides a unique view + * into the object. + */ + batch = gpu_fill_dw(vma, + (dw * real_page_count(obj)) << PAGE_SHIFT | + (dw * sizeof(u32)), + real_page_count(obj), + dw); + if (IS_ERR(batch)) { + err = PTR_ERR(batch); + goto err_vma; + } + + rq = i915_gem_request_alloc(engine, ctx); + if (IS_ERR(rq)) { + err = PTR_ERR(rq); + goto err_batch; + } + + err = engine->emit_flush(rq, EMIT_INVALIDATE); + if (err) + goto err_request; + + err = i915_switch_context(rq); + if (err) + goto err_request; + + flags = 0; + if (INTEL_GEN(vm->i915) <= 5) + flags |= I915_DISPATCH_SECURE; + + err = engine->emit_bb_start(rq, + batch->node.start, batch->node.size, + flags); + if (err) + goto err_request; + + i915_vma_move_to_active(batch, rq, 0); + i915_gem_object_set_active_reference(batch->obj); + i915_vma_unpin(batch); + i915_vma_close(batch); + + i915_vma_move_to_active(vma, rq, 0); + i915_vma_unpin(vma); + + reservation_object_lock(obj->resv, NULL); + reservation_object_add_excl_fence(obj->resv, &rq->fence); + reservation_object_unlock(obj->resv); + + __i915_add_request(rq, true); + + return 0; + +err_request: + __i915_add_request(rq, false); +err_batch: + i915_vma_unpin(batch); +err_vma: + i915_vma_unpin(vma); + return err; +} + +static int cpu_fill(struct drm_i915_gem_object *obj, u32 value) +{ + const bool has_llc = HAS_LLC(to_i915(obj->base.dev)); + unsigned int n, m, need_flush; + int err; + + err = i915_gem_obj_prepare_shmem_write(obj, &need_flush); + if (err) + return err; + + for (n = 0; n < real_page_count(obj); n++) { + u32 *map; + + map = kmap_atomic(i915_gem_object_get_page(obj, n)); + for (m = 0; m < DW_PER_PAGE; m++) + map[m] = value; + if (!has_llc) + drm_clflush_virt_range(map, PAGE_SIZE); + kunmap_atomic(map); + } + + i915_gem_obj_finish_shmem_access(obj); + obj->base.read_domains = I915_GEM_DOMAIN_GTT | I915_GEM_DOMAIN_CPU; + obj->base.write_domain = 0; + return 0; +} + +static int cpu_check(struct drm_i915_gem_object *obj, unsigned int max) +{ + unsigned int n, m, needs_flush; + int err; + + err = i915_gem_obj_prepare_shmem_read(obj, &needs_flush); + if (err) + return err; + + for (n = 0; n < real_page_count(obj); n++) { + u32 *map; + + map = kmap_atomic(i915_gem_object_get_page(obj, n)); + if (needs_flush & CLFLUSH_BEFORE) + drm_clflush_virt_range(map, PAGE_SIZE); + + for (m = 0; m < max; m++) { + if (map[m] != m) { + pr_err("Invalid value at page %d, offset %d: found %x expected %x\n", + n, m, map[m], m); + err = -EINVAL; + goto out_unmap; + } + } + + for (; m < DW_PER_PAGE; m++) { + if (map[m] != 0xdeadbeef) { + pr_err("Invalid value at page %d, offset %d: found %x expected %x\n", + n, m, map[m], 0xdeadbeef); + err = -EINVAL; + goto out_unmap; + } + } + +out_unmap: + kunmap_atomic(map); + if (err) + break; + } + + i915_gem_obj_finish_shmem_access(obj); + return err; +} + +static struct drm_i915_gem_object * +create_test_object(struct i915_gem_context *ctx, + struct drm_file *file, + struct list_head *objects) +{ + struct drm_i915_gem_object *obj; + struct i915_address_space *vm = + ctx->ppgtt ? &ctx->ppgtt->base : &ctx->i915->ggtt.base; + u64 size; + u32 handle; + int err; + + size = min(vm->total / 2, 1024ull * DW_PER_PAGE * PAGE_SIZE); + size = round_down(size, DW_PER_PAGE * PAGE_SIZE); + + obj = huge_gem_object(ctx->i915, DW_PER_PAGE * PAGE_SIZE, size); + if (IS_ERR(obj)) + return obj; + + /* tie the handle to the drm_file for easy reaping */ + err = drm_gem_handle_create(file, &obj->base, &handle); + i915_gem_object_put(obj); + if (err) + return ERR_PTR(err); + + err = cpu_fill(obj, 0xdeadbeef); + if (err) { + pr_err("Failed to fill object with cpu, err=%d\n", + err); + return ERR_PTR(err); + } + + list_add_tail(&obj->st_link, objects); + return obj; +} + +static unsigned long max_dwords(struct drm_i915_gem_object *obj) +{ + unsigned long npages = fake_page_count(obj); + + GEM_BUG_ON(!IS_ALIGNED(npages, DW_PER_PAGE)); + return npages / DW_PER_PAGE; +} + +static int igt_ctx_exec(void *arg) +{ + struct drm_i915_private *i915 = arg; + struct drm_file *file = mock_file(i915); + struct drm_i915_gem_object *obj; + IGT_TIMEOUT(end_time); + LIST_HEAD(objects); + unsigned long ncontexts, ndwords, dw; + int err; + + /* Create a few different contexts (with different mm) and write + * through each ctx/mm using the GPU making sure those writes end + * up in the expected pages of our obj. + */ + + mutex_lock(&i915->drm.struct_mutex); + + ncontexts = 0; + ndwords = 0; + dw = 0; + while (!time_after(jiffies, end_time)) { + struct intel_engine_cs *engine; + struct i915_gem_context *ctx; + unsigned int id; + + ctx = i915_gem_create_context(i915, file->driver_priv); + if (IS_ERR(ctx)) { + err = PTR_ERR(ctx); + goto out_unlock; + } + + for_each_engine(engine, i915, id) { + if (dw == 0) { + obj = create_test_object(ctx, file, &objects); + if (IS_ERR(obj)) { + err = PTR_ERR(obj); + goto out_unlock; + } + } + + err = gpu_fill(obj, ctx, engine, dw); + if (err) { + pr_err("Failed to fill dword %lu [%lu/%lu] with gpu (%s) in ctx %u [full-ppgtt? %s], err=%d\n", + ndwords, dw, max_dwords(obj), + engine->name, ctx->hw_id, + yesno(!!ctx->ppgtt), err); + goto out_unlock; + } + + if (++dw == max_dwords(obj)) + dw = 0; + ndwords++; + } + ncontexts++; + } + pr_info("Submitted %lu contexts (across %u engines), filling %lu dwords\n", + ncontexts, INTEL_INFO(i915)->num_rings, ndwords); + + dw = 0; + list_for_each_entry(obj, &objects, st_link) { + unsigned int rem = + min_t(unsigned int, ndwords - dw, max_dwords(obj)); + + err = cpu_check(obj, rem); + if (err) + break; + + dw += rem; + } + +out_unlock: + mutex_unlock(&i915->drm.struct_mutex); + + mock_file_free(i915, file); + return err; +} + +int i915_gem_context_live_selftests(struct drm_i915_private *i915) +{ + static const struct i915_subtest tests[] = { + SUBTEST(igt_ctx_exec), + }; + + return i915_subtests(tests, i915); +} diff --git a/drivers/gpu/drm/i915/selftests/i915_live_selftests.h b/drivers/gpu/drm/i915/selftests/i915_live_selftests.h index d6c869c5b54f..eedd7901ce61 100644 --- a/drivers/gpu/drm/i915/selftests/i915_live_selftests.h +++ b/drivers/gpu/drm/i915/selftests/i915_live_selftests.h @@ -15,3 +15,4 @@ selftest(objects, i915_gem_object_live_selftests) selftest(dmabuf, i915_gem_dmabuf_live_selftests) selftest(coherency, i915_gem_coherency_live_selftests) selftest(gtt, i915_gem_gtt_live_selftests) +selftest(contexts, i915_gem_context_live_selftests) -- cgit From 6cde9a02e001532744a56052b2aecbda8c51bf16 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Mon, 13 Feb 2017 17:15:50 +0000 Subject: drm/i915: Extract aliasing ppgtt setup In order to force testing of the aliasing ppgtt, extract its initialisation function. v2: Also extract the cleanup function for symmetry. Signed-off-by: Chris Wilson Reviewed-by: Joonas Lahtinen Link: http://patchwork.freedesktop.org/patch/msgid/20170213171558.20942-39-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/i915_gem_gtt.c | 90 +++++++++++++++++++++++-------------- drivers/gpu/drm/i915/i915_gem_gtt.h | 3 ++ 2 files changed, 59 insertions(+), 34 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c b/drivers/gpu/drm/i915/i915_gem_gtt.c index f8ac69380a0f..eebbffdb9a0b 100644 --- a/drivers/gpu/drm/i915/i915_gem_gtt.c +++ b/drivers/gpu/drm/i915/i915_gem_gtt.c @@ -2738,6 +2738,59 @@ static void i915_gtt_color_adjust(const struct drm_mm_node *node, *end -= I915_GTT_PAGE_SIZE; } +int i915_gem_init_aliasing_ppgtt(struct drm_i915_private *i915) +{ + struct i915_ggtt *ggtt = &i915->ggtt; + struct i915_hw_ppgtt *ppgtt; + int err; + + ppgtt = kzalloc(sizeof(*ppgtt), GFP_KERNEL); + if (!ppgtt) + return -ENOMEM; + + err = __hw_ppgtt_init(ppgtt, i915); + if (err) + goto err_ppgtt; + + if (ppgtt->base.allocate_va_range) { + err = ppgtt->base.allocate_va_range(&ppgtt->base, + 0, ppgtt->base.total); + if (err) + goto err_ppgtt_cleanup; + } + + ppgtt->base.clear_range(&ppgtt->base, + ppgtt->base.start, + ppgtt->base.total); + + i915->mm.aliasing_ppgtt = ppgtt; + WARN_ON(ggtt->base.bind_vma != ggtt_bind_vma); + ggtt->base.bind_vma = aliasing_gtt_bind_vma; + + return 0; + +err_ppgtt_cleanup: + ppgtt->base.cleanup(&ppgtt->base); +err_ppgtt: + kfree(ppgtt); + return err; +} + +void i915_gem_fini_aliasing_ppgtt(struct drm_i915_private *i915) +{ + struct i915_ggtt *ggtt = &i915->ggtt; + struct i915_hw_ppgtt *ppgtt; + + ppgtt = fetch_and_zero(&i915->mm.aliasing_ppgtt); + if (!ppgtt) + return; + + ppgtt->base.cleanup(&ppgtt->base); + kfree(ppgtt); + + ggtt->base.bind_vma = ggtt_bind_vma; +} + int i915_gem_init_ggtt(struct drm_i915_private *dev_priv) { /* Let GEM Manage all of the aperture. @@ -2751,7 +2804,6 @@ int i915_gem_init_ggtt(struct drm_i915_private *dev_priv) */ struct i915_ggtt *ggtt = &dev_priv->ggtt; unsigned long hole_start, hole_end; - struct i915_hw_ppgtt *ppgtt; struct drm_mm_node *entry; int ret; @@ -2780,38 +2832,13 @@ int i915_gem_init_ggtt(struct drm_i915_private *dev_priv) ggtt->base.total - PAGE_SIZE, PAGE_SIZE); if (USES_PPGTT(dev_priv) && !USES_FULL_PPGTT(dev_priv)) { - ppgtt = kzalloc(sizeof(*ppgtt), GFP_KERNEL); - if (!ppgtt) { - ret = -ENOMEM; - goto err; - } - - ret = __hw_ppgtt_init(ppgtt, dev_priv); + ret = i915_gem_init_aliasing_ppgtt(dev_priv); if (ret) - goto err_ppgtt; - - if (ppgtt->base.allocate_va_range) { - ret = ppgtt->base.allocate_va_range(&ppgtt->base, 0, - ppgtt->base.total); - if (ret) - goto err_ppgtt_cleanup; - } - - ppgtt->base.clear_range(&ppgtt->base, - ppgtt->base.start, - ppgtt->base.total); - - dev_priv->mm.aliasing_ppgtt = ppgtt; - WARN_ON(ggtt->base.bind_vma != ggtt_bind_vma); - ggtt->base.bind_vma = aliasing_gtt_bind_vma; + goto err; } return 0; -err_ppgtt_cleanup: - ppgtt->base.cleanup(&ppgtt->base); -err_ppgtt: - kfree(ppgtt); err: drm_mm_remove_node(&ggtt->error_capture); return ret; @@ -2834,12 +2861,7 @@ void i915_ggtt_cleanup_hw(struct drm_i915_private *dev_priv) WARN_ON(i915_vma_unbind(vma)); mutex_unlock(&dev_priv->drm.struct_mutex); - if (dev_priv->mm.aliasing_ppgtt) { - struct i915_hw_ppgtt *ppgtt = dev_priv->mm.aliasing_ppgtt; - ppgtt->base.cleanup(&ppgtt->base); - kfree(ppgtt); - } - + i915_gem_fini_aliasing_ppgtt(dev_priv); i915_gem_cleanup_stolen(&dev_priv->drm); if (drm_mm_node_allocated(&ggtt->error_capture)) diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.h b/drivers/gpu/drm/i915/i915_gem_gtt.h index 7e678ce5a9c7..fe922059a412 100644 --- a/drivers/gpu/drm/i915/i915_gem_gtt.h +++ b/drivers/gpu/drm/i915/i915_gem_gtt.h @@ -531,6 +531,9 @@ i915_vm_is_48bit(const struct i915_address_space *vm) return (vm->total - 1) >> 32; } +int i915_gem_init_aliasing_ppgtt(struct drm_i915_private *i915); +void i915_gem_fini_aliasing_ppgtt(struct drm_i915_private *i915); + int i915_ggtt_probe_hw(struct drm_i915_private *dev_priv); int i915_ggtt_init_hw(struct drm_i915_private *dev_priv); int i915_ggtt_enable_hw(struct drm_i915_private *dev_priv); -- cgit From 92fdf8d4a39a9545c27b7ec2d5b138ff8cde5f91 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Mon, 13 Feb 2017 17:15:51 +0000 Subject: drm/i915: Force an aliasing_ppgtt test for context execution Ensure that we minimally exercise the aliasing_ppgtt, even on a full-ppgtt, by allocating one and similarly creating a context to use it. Signed-off-by: Chris Wilson Reviewed-by: Joonas Lahtinen Link: http://patchwork.freedesktop.org/patch/msgid/20170213171558.20942-40-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/selftests/i915_gem_context.c | 61 +++++++++++++++++++++-- 1 file changed, 58 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/i915/selftests/i915_gem_context.c b/drivers/gpu/drm/i915/selftests/i915_gem_context.c index 4cfdc2f0eda7..3813a19a6179 100644 --- a/drivers/gpu/drm/i915/selftests/i915_gem_context.c +++ b/drivers/gpu/drm/i915/selftests/i915_gem_context.c @@ -325,6 +325,7 @@ static int igt_ctx_exec(void *arg) IGT_TIMEOUT(end_time); LIST_HEAD(objects); unsigned long ncontexts, ndwords, dw; + bool first_shared_gtt = true; int err; /* Create a few different contexts (with different mm) and write @@ -342,7 +343,12 @@ static int igt_ctx_exec(void *arg) struct i915_gem_context *ctx; unsigned int id; - ctx = i915_gem_create_context(i915, file->driver_priv); + if (first_shared_gtt) { + ctx = __create_hw_context(i915, file->driver_priv); + first_shared_gtt = false; + } else { + ctx = i915_gem_create_context(i915, file->driver_priv); + } if (IS_ERR(ctx)) { err = PTR_ERR(ctx); goto out_unlock; @@ -394,11 +400,60 @@ out_unlock: return err; } -int i915_gem_context_live_selftests(struct drm_i915_private *i915) +static int fake_aliasing_ppgtt_enable(struct drm_i915_private *i915) +{ + struct drm_i915_gem_object *obj; + int err; + + err = i915_gem_init_aliasing_ppgtt(i915); + if (err) + return err; + + list_for_each_entry(obj, &i915->mm.bound_list, global_link) { + struct i915_vma *vma; + + vma = i915_vma_instance(obj, &i915->ggtt.base, NULL); + if (IS_ERR(vma)) + continue; + + vma->flags &= ~I915_VMA_LOCAL_BIND; + } + + return 0; +} + +static void fake_aliasing_ppgtt_disable(struct drm_i915_private *i915) +{ + i915_gem_fini_aliasing_ppgtt(i915); +} + +int i915_gem_context_live_selftests(struct drm_i915_private *dev_priv) { static const struct i915_subtest tests[] = { SUBTEST(igt_ctx_exec), }; + bool fake_alias = false; + int err; + + /* Install a fake aliasing gtt for exercise */ + if (USES_PPGTT(dev_priv) && !dev_priv->mm.aliasing_ppgtt) { + mutex_lock(&dev_priv->drm.struct_mutex); + err = fake_aliasing_ppgtt_enable(dev_priv); + mutex_unlock(&dev_priv->drm.struct_mutex); + if (err) + return err; + + GEM_BUG_ON(!dev_priv->mm.aliasing_ppgtt); + fake_alias = true; + } + + err = i915_subtests(tests, dev_priv); - return i915_subtests(tests, i915); + if (fake_alias) { + mutex_lock(&dev_priv->drm.struct_mutex); + fake_aliasing_ppgtt_disable(dev_priv); + mutex_unlock(&dev_priv->drm.struct_mutex); + } + + return err; } -- cgit From f40a7b7558ef95bbd869b3c8268c0b2a239087c9 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Mon, 13 Feb 2017 17:15:52 +0000 Subject: drm/i915: Initial selftests for exercising eviction Very simple tests to just ask eviction to find some free space in a full GTT and one with some available space. Signed-off-by: Chris Wilson Reviewed-by: Matthew Auld Link: http://patchwork.freedesktop.org/patch/msgid/20170213171558.20942-41-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/i915_gem_evict.c | 4 + drivers/gpu/drm/i915/selftests/i915_gem_evict.c | 260 +++++++++++++++++++++ .../gpu/drm/i915/selftests/i915_mock_selftests.h | 1 + 3 files changed, 265 insertions(+) create mode 100644 drivers/gpu/drm/i915/selftests/i915_gem_evict.c diff --git a/drivers/gpu/drm/i915/i915_gem_evict.c b/drivers/gpu/drm/i915/i915_gem_evict.c index 776c508cb9ae..ca25b1f7f6d1 100644 --- a/drivers/gpu/drm/i915/i915_gem_evict.c +++ b/drivers/gpu/drm/i915/i915_gem_evict.c @@ -393,3 +393,7 @@ int i915_gem_evict_vm(struct i915_address_space *vm, bool do_idle) return 0; } + +#if IS_ENABLED(CONFIG_DRM_I915_SELFTEST) +#include "selftests/i915_gem_evict.c" +#endif diff --git a/drivers/gpu/drm/i915/selftests/i915_gem_evict.c b/drivers/gpu/drm/i915/selftests/i915_gem_evict.c new file mode 100644 index 000000000000..97af353db218 --- /dev/null +++ b/drivers/gpu/drm/i915/selftests/i915_gem_evict.c @@ -0,0 +1,260 @@ +/* + * Copyright © 2016 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + * + */ + +#include "../i915_selftest.h" + +#include "mock_gem_device.h" + +static int populate_ggtt(struct drm_i915_private *i915) +{ + struct drm_i915_gem_object *obj; + u64 size; + + for (size = 0; + size + I915_GTT_PAGE_SIZE <= i915->ggtt.base.total; + size += I915_GTT_PAGE_SIZE) { + struct i915_vma *vma; + + obj = i915_gem_object_create_internal(i915, I915_GTT_PAGE_SIZE); + if (IS_ERR(obj)) + return PTR_ERR(obj); + + vma = i915_gem_object_ggtt_pin(obj, NULL, 0, 0, 0); + if (IS_ERR(vma)) + return PTR_ERR(vma); + } + + if (!list_empty(&i915->mm.unbound_list)) { + size = 0; + list_for_each_entry(obj, &i915->mm.unbound_list, global_link) + size++; + + pr_err("Found %lld objects unbound!\n", size); + return -EINVAL; + } + + if (list_empty(&i915->ggtt.base.inactive_list)) { + pr_err("No objects on the GGTT inactive list!\n"); + return -EINVAL; + } + + return 0; +} + +static void unpin_ggtt(struct drm_i915_private *i915) +{ + struct i915_vma *vma; + + list_for_each_entry(vma, &i915->ggtt.base.inactive_list, vm_link) + i915_vma_unpin(vma); +} + +static void cleanup_objects(struct drm_i915_private *i915) +{ + struct drm_i915_gem_object *obj, *on; + + list_for_each_entry_safe(obj, on, &i915->mm.unbound_list, global_link) + i915_gem_object_put(obj); + + list_for_each_entry_safe(obj, on, &i915->mm.bound_list, global_link) + i915_gem_object_put(obj); + + mutex_unlock(&i915->drm.struct_mutex); + + i915_gem_drain_freed_objects(i915); + + mutex_lock(&i915->drm.struct_mutex); +} + +static int igt_evict_something(void *arg) +{ + struct drm_i915_private *i915 = arg; + struct i915_ggtt *ggtt = &i915->ggtt; + int err; + + /* Fill the GGTT with pinned objects and try to evict one. */ + + err = populate_ggtt(i915); + if (err) + goto cleanup; + + /* Everything is pinned, nothing should happen */ + err = i915_gem_evict_something(&ggtt->base, + I915_GTT_PAGE_SIZE, 0, 0, + 0, U64_MAX, + 0); + if (err != -ENOSPC) { + pr_err("i915_gem_evict_something failed on a full GGTT with err=%d\n", + err); + goto cleanup; + } + + unpin_ggtt(i915); + + /* Everything is unpinned, we should be able to evict something */ + err = i915_gem_evict_something(&ggtt->base, + I915_GTT_PAGE_SIZE, 0, 0, + 0, U64_MAX, + 0); + if (err) { + pr_err("i915_gem_evict_something failed on a full GGTT with err=%d\n", + err); + goto cleanup; + } + +cleanup: + cleanup_objects(i915); + return err; +} + +static int igt_overcommit(void *arg) +{ + struct drm_i915_private *i915 = arg; + struct drm_i915_gem_object *obj; + struct i915_vma *vma; + int err; + + /* Fill the GGTT with pinned objects and then try to pin one more. + * We expect it to fail. + */ + + err = populate_ggtt(i915); + if (err) + goto cleanup; + + obj = i915_gem_object_create_internal(i915, I915_GTT_PAGE_SIZE); + if (IS_ERR(obj)) { + err = PTR_ERR(obj); + goto cleanup; + } + + list_move(&obj->global_link, &i915->mm.unbound_list); + + vma = i915_gem_object_ggtt_pin(obj, NULL, 0, 0, 0); + if (!IS_ERR(vma) || PTR_ERR(vma) != -ENOSPC) { + pr_err("Failed to evict+insert, i915_gem_object_ggtt_pin returned err=%d\n", (int)PTR_ERR(vma)); + err = -EINVAL; + goto cleanup; + } + +cleanup: + cleanup_objects(i915); + return err; +} + +static int igt_evict_for_vma(void *arg) +{ + struct drm_i915_private *i915 = arg; + struct i915_ggtt *ggtt = &i915->ggtt; + struct drm_mm_node target = { + .start = 0, + .size = 4096, + }; + int err; + + /* Fill the GGTT with pinned objects and try to evict a range. */ + + err = populate_ggtt(i915); + if (err) + goto cleanup; + + /* Everything is pinned, nothing should happen */ + err = i915_gem_evict_for_node(&ggtt->base, &target, 0); + if (err != -ENOSPC) { + pr_err("i915_gem_evict_for_node on a full GGTT returned err=%d\n", + err); + goto cleanup; + } + + unpin_ggtt(i915); + + /* Everything is unpinned, we should be able to evict the node */ + err = i915_gem_evict_for_node(&ggtt->base, &target, 0); + if (err) { + pr_err("i915_gem_evict_for_node returned err=%d\n", + err); + goto cleanup; + } + +cleanup: + cleanup_objects(i915); + return err; +} + +static int igt_evict_vm(void *arg) +{ + struct drm_i915_private *i915 = arg; + struct i915_ggtt *ggtt = &i915->ggtt; + int err; + + /* Fill the GGTT with pinned objects and try to evict everything. */ + + err = populate_ggtt(i915); + if (err) + goto cleanup; + + /* Everything is pinned, nothing should happen */ + err = i915_gem_evict_vm(&ggtt->base, false); + if (err) { + pr_err("i915_gem_evict_vm on a full GGTT returned err=%d]\n", + err); + goto cleanup; + } + + unpin_ggtt(i915); + + err = i915_gem_evict_vm(&ggtt->base, false); + if (err) { + pr_err("i915_gem_evict_vm on a full GGTT returned err=%d]\n", + err); + goto cleanup; + } + +cleanup: + cleanup_objects(i915); + return err; +} + +int i915_gem_evict_mock_selftests(void) +{ + static const struct i915_subtest tests[] = { + SUBTEST(igt_evict_something), + SUBTEST(igt_evict_for_vma), + SUBTEST(igt_evict_vm), + SUBTEST(igt_overcommit), + }; + struct drm_i915_private *i915; + int err; + + i915 = mock_gem_device(); + if (!i915) + return -ENOMEM; + + mutex_lock(&i915->drm.struct_mutex); + err = i915_subtests(tests, i915); + mutex_unlock(&i915->drm.struct_mutex); + + drm_dev_unref(&i915->drm); + return err; +} diff --git a/drivers/gpu/drm/i915/selftests/i915_mock_selftests.h b/drivers/gpu/drm/i915/selftests/i915_mock_selftests.h index b450eab7e6e1..cfbd3f5486ae 100644 --- a/drivers/gpu/drm/i915/selftests/i915_mock_selftests.h +++ b/drivers/gpu/drm/i915/selftests/i915_mock_selftests.h @@ -16,3 +16,4 @@ selftest(requests, i915_gem_request_mock_selftests) selftest(objects, i915_gem_object_mock_selftests) selftest(dmabuf, i915_gem_dmabuf_mock_selftests) selftest(vma, i915_vma_mock_selftests) +selftest(evict, i915_gem_evict_mock_selftests) -- cgit From e619cd0d223d55b69732d0d61d1c497e506670ac Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Mon, 13 Feb 2017 17:15:53 +0000 Subject: drm/i915: Add mock exercise for i915_gem_gtt_reserve i915_gem_gtt_reserve should put the node exactly as requested in the GTT, evicting as required. Signed-off-by: Chris Wilson Reviewed-by: Joonas Lahtinen Link: http://patchwork.freedesktop.org/patch/msgid/20170213171558.20942-42-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/selftests/i915_gem_gtt.c | 195 +++++++++++++++++++++ .../gpu/drm/i915/selftests/i915_mock_selftests.h | 1 + 2 files changed, 196 insertions(+) diff --git a/drivers/gpu/drm/i915/selftests/i915_gem_gtt.c b/drivers/gpu/drm/i915/selftests/i915_gem_gtt.c index 2e571bbe98ae..4f87e437bf1f 100644 --- a/drivers/gpu/drm/i915/selftests/i915_gem_gtt.c +++ b/drivers/gpu/drm/i915/selftests/i915_gem_gtt.c @@ -29,6 +29,7 @@ #include "i915_random.h" #include "mock_drm.h" +#include "mock_gem_device.h" static void fake_free_pages(struct drm_i915_gem_object *obj, struct sg_table *pages) @@ -901,6 +902,200 @@ static int igt_ggtt_lowlevel(void *arg) return exercise_ggtt(arg, lowlevel_hole); } +static void track_vma_bind(struct i915_vma *vma) +{ + struct drm_i915_gem_object *obj = vma->obj; + + obj->bind_count++; /* track for eviction later */ + __i915_gem_object_pin_pages(obj); + + vma->pages = obj->mm.pages; + list_move_tail(&vma->vm_link, &vma->vm->inactive_list); +} + +static int igt_gtt_reserve(void *arg) +{ + struct drm_i915_private *i915 = arg; + struct drm_i915_gem_object *obj, *on; + LIST_HEAD(objects); + u64 total; + int err; + + /* i915_gem_gtt_reserve() tries to reserve the precise range + * for the node, and evicts if it has to. So our test checks that + * it can give us the requsted space and prevent overlaps. + */ + + /* Start by filling the GGTT */ + for (total = 0; + total + 2*I915_GTT_PAGE_SIZE <= i915->ggtt.base.total; + total += 2*I915_GTT_PAGE_SIZE) { + struct i915_vma *vma; + + obj = i915_gem_object_create_internal(i915, 2*PAGE_SIZE); + if (IS_ERR(obj)) { + err = PTR_ERR(obj); + goto out; + } + + err = i915_gem_object_pin_pages(obj); + if (err) { + i915_gem_object_put(obj); + goto out; + } + + list_add(&obj->st_link, &objects); + + vma = i915_vma_instance(obj, &i915->ggtt.base, NULL); + if (IS_ERR(vma)) { + err = PTR_ERR(vma); + goto out; + } + + err = i915_gem_gtt_reserve(&i915->ggtt.base, &vma->node, + obj->base.size, + total, + obj->cache_level, + 0); + if (err) { + pr_err("i915_gem_gtt_reserve (pass 1) failed at %llu/%llu with err=%d\n", + total, i915->ggtt.base.total, err); + goto out; + } + track_vma_bind(vma); + + GEM_BUG_ON(!drm_mm_node_allocated(&vma->node)); + if (vma->node.start != total || + vma->node.size != 2*I915_GTT_PAGE_SIZE) { + pr_err("i915_gem_gtt_reserve (pass 1) placement failed, found (%llx + %llx), expected (%llx + %lx)\n", + vma->node.start, vma->node.size, + total, 2*I915_GTT_PAGE_SIZE); + err = -EINVAL; + goto out; + } + } + + /* Now we start forcing evictions */ + for (total = I915_GTT_PAGE_SIZE; + total + 2*I915_GTT_PAGE_SIZE <= i915->ggtt.base.total; + total += 2*I915_GTT_PAGE_SIZE) { + struct i915_vma *vma; + + obj = i915_gem_object_create_internal(i915, 2*PAGE_SIZE); + if (IS_ERR(obj)) { + err = PTR_ERR(obj); + goto out; + } + + err = i915_gem_object_pin_pages(obj); + if (err) { + i915_gem_object_put(obj); + goto out; + } + + list_add(&obj->st_link, &objects); + + vma = i915_vma_instance(obj, &i915->ggtt.base, NULL); + if (IS_ERR(vma)) { + err = PTR_ERR(vma); + goto out; + } + + err = i915_gem_gtt_reserve(&i915->ggtt.base, &vma->node, + obj->base.size, + total, + obj->cache_level, + 0); + if (err) { + pr_err("i915_gem_gtt_reserve (pass 2) failed at %llu/%llu with err=%d\n", + total, i915->ggtt.base.total, err); + goto out; + } + track_vma_bind(vma); + + GEM_BUG_ON(!drm_mm_node_allocated(&vma->node)); + if (vma->node.start != total || + vma->node.size != 2*I915_GTT_PAGE_SIZE) { + pr_err("i915_gem_gtt_reserve (pass 2) placement failed, found (%llx + %llx), expected (%llx + %lx)\n", + vma->node.start, vma->node.size, + total, 2*I915_GTT_PAGE_SIZE); + err = -EINVAL; + goto out; + } + } + + /* And then try at random */ + list_for_each_entry_safe(obj, on, &objects, st_link) { + struct i915_vma *vma; + u64 offset; + + vma = i915_vma_instance(obj, &i915->ggtt.base, NULL); + if (IS_ERR(vma)) { + err = PTR_ERR(vma); + goto out; + } + + err = i915_vma_unbind(vma); + if (err) { + pr_err("i915_vma_unbind failed with err=%d!\n", err); + goto out; + } + + offset = random_offset(0, i915->ggtt.base.total, + 2*I915_GTT_PAGE_SIZE, + I915_GTT_MIN_ALIGNMENT); + + err = i915_gem_gtt_reserve(&i915->ggtt.base, &vma->node, + obj->base.size, + offset, + obj->cache_level, + 0); + if (err) { + pr_err("i915_gem_gtt_reserve (pass 3) failed at %llu/%llu with err=%d\n", + total, i915->ggtt.base.total, err); + goto out; + } + track_vma_bind(vma); + + GEM_BUG_ON(!drm_mm_node_allocated(&vma->node)); + if (vma->node.start != offset || + vma->node.size != 2*I915_GTT_PAGE_SIZE) { + pr_err("i915_gem_gtt_reserve (pass 3) placement failed, found (%llx + %llx), expected (%llx + %lx)\n", + vma->node.start, vma->node.size, + offset, 2*I915_GTT_PAGE_SIZE); + err = -EINVAL; + goto out; + } + } + +out: + list_for_each_entry_safe(obj, on, &objects, st_link) { + i915_gem_object_unpin_pages(obj); + i915_gem_object_put(obj); + } + return err; +} + +int i915_gem_gtt_mock_selftests(void) +{ + static const struct i915_subtest tests[] = { + SUBTEST(igt_gtt_reserve), + }; + struct drm_i915_private *i915; + int err; + + i915 = mock_gem_device(); + if (!i915) + return -ENOMEM; + + mutex_lock(&i915->drm.struct_mutex); + err = i915_subtests(tests, i915); + mutex_unlock(&i915->drm.struct_mutex); + + drm_dev_unref(&i915->drm); + return err; +} + int i915_gem_gtt_live_selftests(struct drm_i915_private *i915) { static const struct i915_subtest tests[] = { diff --git a/drivers/gpu/drm/i915/selftests/i915_mock_selftests.h b/drivers/gpu/drm/i915/selftests/i915_mock_selftests.h index cfbd3f5486ae..be9a9ebf5692 100644 --- a/drivers/gpu/drm/i915/selftests/i915_mock_selftests.h +++ b/drivers/gpu/drm/i915/selftests/i915_mock_selftests.h @@ -17,3 +17,4 @@ selftest(objects, i915_gem_object_mock_selftests) selftest(dmabuf, i915_gem_dmabuf_mock_selftests) selftest(vma, i915_vma_mock_selftests) selftest(evict, i915_gem_evict_mock_selftests) +selftest(gtt, i915_gem_gtt_mock_selftests) -- cgit From 5f32616edb8d717f5ef99ab9391388a6185afc58 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Mon, 13 Feb 2017 17:15:54 +0000 Subject: drm/i915: Add mock exercise for i915_gem_gtt_insert i915_gem_gtt_insert should allocate from the available free space in the GTT, evicting as necessary to create space. Signed-off-by: Chris Wilson Reviewed-by: Joonas Lahtinen Link: http://patchwork.freedesktop.org/patch/msgid/20170213171558.20942-43-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/selftests/i915_gem_gtt.c | 208 ++++++++++++++++++++++++++ 1 file changed, 208 insertions(+) diff --git a/drivers/gpu/drm/i915/selftests/i915_gem_gtt.c b/drivers/gpu/drm/i915/selftests/i915_gem_gtt.c index 4f87e437bf1f..349b0de1c47e 100644 --- a/drivers/gpu/drm/i915/selftests/i915_gem_gtt.c +++ b/drivers/gpu/drm/i915/selftests/i915_gem_gtt.c @@ -1076,10 +1076,218 @@ out: return err; } +static int igt_gtt_insert(void *arg) +{ + struct drm_i915_private *i915 = arg; + struct drm_i915_gem_object *obj, *on; + struct drm_mm_node tmp = {}; + const struct invalid_insert { + u64 size; + u64 alignment; + u64 start, end; + } invalid_insert[] = { + { + i915->ggtt.base.total + I915_GTT_PAGE_SIZE, 0, + 0, i915->ggtt.base.total, + }, + { + 2*I915_GTT_PAGE_SIZE, 0, + 0, I915_GTT_PAGE_SIZE, + }, + { + -(u64)I915_GTT_PAGE_SIZE, 0, + 0, 4*I915_GTT_PAGE_SIZE, + }, + { + -(u64)2*I915_GTT_PAGE_SIZE, 2*I915_GTT_PAGE_SIZE, + 0, 4*I915_GTT_PAGE_SIZE, + }, + { + I915_GTT_PAGE_SIZE, I915_GTT_MIN_ALIGNMENT << 1, + I915_GTT_MIN_ALIGNMENT, I915_GTT_MIN_ALIGNMENT << 1, + }, + {} + }, *ii; + LIST_HEAD(objects); + u64 total; + int err; + + /* i915_gem_gtt_insert() tries to allocate some free space in the GTT + * to the node, evicting if required. + */ + + /* Check a couple of obviously invalid requests */ + for (ii = invalid_insert; ii->size; ii++) { + err = i915_gem_gtt_insert(&i915->ggtt.base, &tmp, + ii->size, ii->alignment, + I915_COLOR_UNEVICTABLE, + ii->start, ii->end, + 0); + if (err != -ENOSPC) { + pr_err("Invalid i915_gem_gtt_insert(.size=%llx, .alignment=%llx, .start=%llx, .end=%llx) succeeded (err=%d)\n", + ii->size, ii->alignment, ii->start, ii->end, + err); + return -EINVAL; + } + } + + /* Start by filling the GGTT */ + for (total = 0; + total + I915_GTT_PAGE_SIZE <= i915->ggtt.base.total; + total += I915_GTT_PAGE_SIZE) { + struct i915_vma *vma; + + obj = i915_gem_object_create_internal(i915, I915_GTT_PAGE_SIZE); + if (IS_ERR(obj)) { + err = PTR_ERR(obj); + goto out; + } + + err = i915_gem_object_pin_pages(obj); + if (err) { + i915_gem_object_put(obj); + goto out; + } + + list_add(&obj->st_link, &objects); + + vma = i915_vma_instance(obj, &i915->ggtt.base, NULL); + if (IS_ERR(vma)) { + err = PTR_ERR(vma); + goto out; + } + + err = i915_gem_gtt_insert(&i915->ggtt.base, &vma->node, + obj->base.size, 0, obj->cache_level, + 0, i915->ggtt.base.total, + 0); + if (err == -ENOSPC) { + /* maxed out the GGTT space */ + i915_gem_object_put(obj); + break; + } + if (err) { + pr_err("i915_gem_gtt_insert (pass 1) failed at %llu/%llu with err=%d\n", + total, i915->ggtt.base.total, err); + goto out; + } + track_vma_bind(vma); + __i915_vma_pin(vma); + + GEM_BUG_ON(!drm_mm_node_allocated(&vma->node)); + } + + list_for_each_entry(obj, &objects, st_link) { + struct i915_vma *vma; + + vma = i915_vma_instance(obj, &i915->ggtt.base, NULL); + if (IS_ERR(vma)) { + err = PTR_ERR(vma); + goto out; + } + + if (!drm_mm_node_allocated(&vma->node)) { + pr_err("VMA was unexpectedly evicted!\n"); + err = -EINVAL; + goto out; + } + + __i915_vma_unpin(vma); + } + + /* If we then reinsert, we should find the same hole */ + list_for_each_entry_safe(obj, on, &objects, st_link) { + struct i915_vma *vma; + u64 offset; + + vma = i915_vma_instance(obj, &i915->ggtt.base, NULL); + if (IS_ERR(vma)) { + err = PTR_ERR(vma); + goto out; + } + + GEM_BUG_ON(!drm_mm_node_allocated(&vma->node)); + offset = vma->node.start; + + err = i915_vma_unbind(vma); + if (err) { + pr_err("i915_vma_unbind failed with err=%d!\n", err); + goto out; + } + + err = i915_gem_gtt_insert(&i915->ggtt.base, &vma->node, + obj->base.size, 0, obj->cache_level, + 0, i915->ggtt.base.total, + 0); + if (err) { + pr_err("i915_gem_gtt_insert (pass 2) failed at %llu/%llu with err=%d\n", + total, i915->ggtt.base.total, err); + goto out; + } + track_vma_bind(vma); + + GEM_BUG_ON(!drm_mm_node_allocated(&vma->node)); + if (vma->node.start != offset) { + pr_err("i915_gem_gtt_insert did not return node to its previous location (the only hole), expected address %llx, found %llx\n", + offset, vma->node.start); + err = -EINVAL; + goto out; + } + } + + /* And then force evictions */ + for (total = 0; + total + 2*I915_GTT_PAGE_SIZE <= i915->ggtt.base.total; + total += 2*I915_GTT_PAGE_SIZE) { + struct i915_vma *vma; + + obj = i915_gem_object_create_internal(i915, 2*I915_GTT_PAGE_SIZE); + if (IS_ERR(obj)) { + err = PTR_ERR(obj); + goto out; + } + + err = i915_gem_object_pin_pages(obj); + if (err) { + i915_gem_object_put(obj); + goto out; + } + + list_add(&obj->st_link, &objects); + + vma = i915_vma_instance(obj, &i915->ggtt.base, NULL); + if (IS_ERR(vma)) { + err = PTR_ERR(vma); + goto out; + } + + err = i915_gem_gtt_insert(&i915->ggtt.base, &vma->node, + obj->base.size, 0, obj->cache_level, + 0, i915->ggtt.base.total, + 0); + if (err) { + pr_err("i915_gem_gtt_insert (pass 3) failed at %llu/%llu with err=%d\n", + total, i915->ggtt.base.total, err); + goto out; + } + track_vma_bind(vma); + + GEM_BUG_ON(!drm_mm_node_allocated(&vma->node)); + } + +out: + list_for_each_entry_safe(obj, on, &objects, st_link) { + i915_gem_object_unpin_pages(obj); + i915_gem_object_put(obj); + } + return err; +} + int i915_gem_gtt_mock_selftests(void) { static const struct i915_subtest tests[] = { SUBTEST(igt_gtt_reserve), + SUBTEST(igt_gtt_insert), }; struct drm_i915_private *i915; int err; -- cgit From 210e8ac48f26c965d07db16cff7a5b3d02f8e337 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Mon, 13 Feb 2017 17:15:55 +0000 Subject: drm/i915: Add mock tests for GTT/VMA handling Use the live tests against the mock ppgtt for quick testing on all platforms of the VMA layer. Signed-off-by: Chris Wilson Reviewed-by: Matthew Auld Reviewed-by: Joonas Lahtinen Link: http://patchwork.freedesktop.org/patch/msgid/20170213171558.20942-44-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/selftests/i915_gem_gtt.c | 43 +++++++++++++++++++++++++++ 1 file changed, 43 insertions(+) diff --git a/drivers/gpu/drm/i915/selftests/i915_gem_gtt.c b/drivers/gpu/drm/i915/selftests/i915_gem_gtt.c index 349b0de1c47e..5caa5153a394 100644 --- a/drivers/gpu/drm/i915/selftests/i915_gem_gtt.c +++ b/drivers/gpu/drm/i915/selftests/i915_gem_gtt.c @@ -28,6 +28,7 @@ #include "../i915_selftest.h" #include "i915_random.h" +#include "mock_context.h" #include "mock_drm.h" #include "mock_gem_device.h" @@ -913,6 +914,45 @@ static void track_vma_bind(struct i915_vma *vma) list_move_tail(&vma->vm_link, &vma->vm->inactive_list); } +static int exercise_mock(struct drm_i915_private *i915, + int (*func)(struct drm_i915_private *i915, + struct i915_address_space *vm, + u64 hole_start, u64 hole_end, + unsigned long end_time)) +{ + struct i915_gem_context *ctx; + struct i915_hw_ppgtt *ppgtt; + IGT_TIMEOUT(end_time); + int err; + + ctx = mock_context(i915, "mock"); + if (!ctx) + return -ENOMEM; + + ppgtt = ctx->ppgtt; + GEM_BUG_ON(!ppgtt); + + err = func(i915, &ppgtt->base, 0, ppgtt->base.total, end_time); + + mock_context_close(ctx); + return err; +} + +static int igt_mock_fill(void *arg) +{ + return exercise_mock(arg, fill_hole); +} + +static int igt_mock_walk(void *arg) +{ + return exercise_mock(arg, walk_hole); +} + +static int igt_mock_drunk(void *arg) +{ + return exercise_mock(arg, drunk_hole); +} + static int igt_gtt_reserve(void *arg) { struct drm_i915_private *i915 = arg; @@ -1286,6 +1326,9 @@ out: int i915_gem_gtt_mock_selftests(void) { static const struct i915_subtest tests[] = { + SUBTEST(igt_mock_drunk), + SUBTEST(igt_mock_walk), + SUBTEST(igt_mock_fill), SUBTEST(igt_gtt_reserve), SUBTEST(igt_gtt_insert), }; -- cgit From af85f50d1890e4ff29e083ad9ba2f93dbc684276 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Mon, 13 Feb 2017 17:15:56 +0000 Subject: drm/i915: Exercise manipulate of single pages in the GGTT Move a single page of an object around within the GGTT and check coherency of writes and reads. Signed-off-by: Chris Wilson Reviewed-by: Matthew Auld Link: http://patchwork.freedesktop.org/patch/msgid/20170213171558.20942-45-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/selftests/i915_gem_gtt.c | 91 +++++++++++++++++++++++++++ 1 file changed, 91 insertions(+) diff --git a/drivers/gpu/drm/i915/selftests/i915_gem_gtt.c b/drivers/gpu/drm/i915/selftests/i915_gem_gtt.c index 5caa5153a394..36e5cb582af4 100644 --- a/drivers/gpu/drm/i915/selftests/i915_gem_gtt.c +++ b/drivers/gpu/drm/i915/selftests/i915_gem_gtt.c @@ -903,6 +903,96 @@ static int igt_ggtt_lowlevel(void *arg) return exercise_ggtt(arg, lowlevel_hole); } +static int igt_ggtt_page(void *arg) +{ + const unsigned int count = PAGE_SIZE/sizeof(u32); + I915_RND_STATE(prng); + struct drm_i915_private *i915 = arg; + struct i915_ggtt *ggtt = &i915->ggtt; + struct drm_i915_gem_object *obj; + struct drm_mm_node tmp; + unsigned int *order, n; + int err; + + mutex_lock(&i915->drm.struct_mutex); + + obj = i915_gem_object_create_internal(i915, PAGE_SIZE); + if (IS_ERR(obj)) { + err = PTR_ERR(obj); + goto out_unlock; + } + + err = i915_gem_object_pin_pages(obj); + if (err) + goto out_free; + + memset(&tmp, 0, sizeof(tmp)); + err = drm_mm_insert_node_in_range(&ggtt->base.mm, &tmp, + 1024 * PAGE_SIZE, 0, + I915_COLOR_UNEVICTABLE, + 0, ggtt->mappable_end, + DRM_MM_INSERT_LOW); + if (err) + goto out_unpin; + + order = i915_random_order(count, &prng); + if (!order) { + err = -ENOMEM; + goto out_remove; + } + + for (n = 0; n < count; n++) { + u64 offset = tmp.start + order[n] * PAGE_SIZE; + u32 __iomem *vaddr; + + ggtt->base.insert_page(&ggtt->base, + i915_gem_object_get_dma_address(obj, 0), + offset, I915_CACHE_NONE, 0); + + vaddr = io_mapping_map_atomic_wc(&ggtt->mappable, offset); + iowrite32(n, vaddr + n); + io_mapping_unmap_atomic(vaddr); + + wmb(); + ggtt->base.clear_range(&ggtt->base, offset, PAGE_SIZE); + } + + i915_random_reorder(order, count, &prng); + for (n = 0; n < count; n++) { + u64 offset = tmp.start + order[n] * PAGE_SIZE; + u32 __iomem *vaddr; + u32 val; + + ggtt->base.insert_page(&ggtt->base, + i915_gem_object_get_dma_address(obj, 0), + offset, I915_CACHE_NONE, 0); + + vaddr = io_mapping_map_atomic_wc(&ggtt->mappable, offset); + val = ioread32(vaddr + n); + io_mapping_unmap_atomic(vaddr); + + ggtt->base.clear_range(&ggtt->base, offset, PAGE_SIZE); + + if (val != n) { + pr_err("insert page failed: found %d, expected %d\n", + val, n); + err = -EINVAL; + break; + } + } + + kfree(order); +out_remove: + drm_mm_remove_node(&tmp); +out_unpin: + i915_gem_object_unpin_pages(obj); +out_free: + i915_gem_object_put(obj); +out_unlock: + mutex_unlock(&i915->drm.struct_mutex); + return err; +} + static void track_vma_bind(struct i915_vma *vma) { struct drm_i915_gem_object *obj = vma->obj; @@ -1360,6 +1450,7 @@ int i915_gem_gtt_live_selftests(struct drm_i915_private *i915) SUBTEST(igt_ggtt_drunk), SUBTEST(igt_ggtt_walk), SUBTEST(igt_ggtt_fill), + SUBTEST(igt_ggtt_page), }; GEM_BUG_ON(offset_in_page(i915->ggtt.base.total)); -- cgit From 7db4dceafa443f0b6c7df90f01b16ec8b9f8b25d Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Mon, 13 Feb 2017 17:15:57 +0000 Subject: drm/i915: Exercise crossing pot boundaries in the GTT As the page-table trees within the GTT are naturally aligned to power-of-two boundaries, by inserting an object that crosses a power-of-two (and the power-of-two intervals) we can quickly check the code for errors in switching between levels in the tree. Signed-off-by: Chris Wilson Reviewed-by: Matthew Auld Link: http://patchwork.freedesktop.org/patch/msgid/20170213171558.20942-46-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/selftests/i915_gem_gtt.c | 94 +++++++++++++++++++++++++++ 1 file changed, 94 insertions(+) diff --git a/drivers/gpu/drm/i915/selftests/i915_gem_gtt.c b/drivers/gpu/drm/i915/selftests/i915_gem_gtt.c index 36e5cb582af4..5af9339087b7 100644 --- a/drivers/gpu/drm/i915/selftests/i915_gem_gtt.c +++ b/drivers/gpu/drm/i915/selftests/i915_gem_gtt.c @@ -582,6 +582,82 @@ err: return 0; } +static int pot_hole(struct drm_i915_private *i915, + struct i915_address_space *vm, + u64 hole_start, u64 hole_end, + unsigned long end_time) +{ + struct drm_i915_gem_object *obj; + struct i915_vma *vma; + unsigned long flags; + unsigned int pot; + int err; + + flags = PIN_OFFSET_FIXED | PIN_USER; + if (i915_is_ggtt(vm)) + flags |= PIN_GLOBAL; + + obj = i915_gem_object_create_internal(i915, 2 * I915_GTT_PAGE_SIZE); + if (IS_ERR(obj)) + return PTR_ERR(obj); + + vma = i915_vma_instance(obj, vm, NULL); + if (IS_ERR(vma)) { + err = PTR_ERR(vma); + goto err_obj; + } + + /* Insert a pair of pages across every pot boundary within the hole */ + for (pot = fls64(hole_end - 1) - 1; + pot > ilog2(2 * I915_GTT_PAGE_SIZE); + pot--) { + u64 step = BIT_ULL(pot); + u64 addr; + + for (addr = round_up(hole_start + I915_GTT_PAGE_SIZE, step) - I915_GTT_PAGE_SIZE; + addr + vma->size <= hole_end; + addr += step) { + err = i915_vma_pin(vma, 0, 0, addr | flags); + if (err) { + pr_err("%s failed to pin object at %llx in hole [%llx - %llx], with err=%d\n", + __func__, + addr, + hole_start, hole_end, + err); + goto err; + } + + if (!drm_mm_node_allocated(&vma->node) || + i915_vma_misplaced(vma, 0, 0, addr | flags)) { + pr_err("%s incorrect at %llx + %llx\n", + __func__, addr, vma->size); + i915_vma_unpin(vma); + err = i915_vma_unbind(vma); + err = -EINVAL; + goto err; + } + + i915_vma_unpin(vma); + err = i915_vma_unbind(vma); + GEM_BUG_ON(err); + } + + if (igt_timeout(end_time, + "%s timed out after %d/%d\n", + __func__, pot, fls64(hole_end - 1) - 1)) { + err = -EINTR; + goto err; + } + } + +err: + if (!i915_vma_is_ggtt(vma)) + i915_vma_close(vma); +err_obj: + i915_gem_object_put(obj); + return err; +} + static int drunk_hole(struct drm_i915_private *i915, struct i915_address_space *vm, u64 hole_start, u64 hole_end, @@ -819,6 +895,11 @@ static int igt_ppgtt_walk(void *arg) return exercise_ppgtt(arg, walk_hole); } +static int igt_ppgtt_pot(void *arg) +{ + return exercise_ppgtt(arg, pot_hole); +} + static int igt_ppgtt_drunk(void *arg) { return exercise_ppgtt(arg, drunk_hole); @@ -893,6 +974,11 @@ static int igt_ggtt_walk(void *arg) return exercise_ggtt(arg, walk_hole); } +static int igt_ggtt_pot(void *arg) +{ + return exercise_ggtt(arg, pot_hole); +} + static int igt_ggtt_drunk(void *arg) { return exercise_ggtt(arg, drunk_hole); @@ -1038,6 +1124,11 @@ static int igt_mock_walk(void *arg) return exercise_mock(arg, walk_hole); } +static int igt_mock_pot(void *arg) +{ + return exercise_mock(arg, pot_hole); +} + static int igt_mock_drunk(void *arg) { return exercise_mock(arg, drunk_hole); @@ -1418,6 +1509,7 @@ int i915_gem_gtt_mock_selftests(void) static const struct i915_subtest tests[] = { SUBTEST(igt_mock_drunk), SUBTEST(igt_mock_walk), + SUBTEST(igt_mock_pot), SUBTEST(igt_mock_fill), SUBTEST(igt_gtt_reserve), SUBTEST(igt_gtt_insert), @@ -1444,11 +1536,13 @@ int i915_gem_gtt_live_selftests(struct drm_i915_private *i915) SUBTEST(igt_ppgtt_lowlevel), SUBTEST(igt_ppgtt_drunk), SUBTEST(igt_ppgtt_walk), + SUBTEST(igt_ppgtt_pot), SUBTEST(igt_ppgtt_fill), SUBTEST(igt_ppgtt_shrink), SUBTEST(igt_ggtt_lowlevel), SUBTEST(igt_ggtt_drunk), SUBTEST(igt_ggtt_walk), + SUBTEST(igt_ggtt_pot), SUBTEST(igt_ggtt_fill), SUBTEST(igt_ggtt_page), }; -- cgit From 496b575e3ccbf6fbe57a674c721af43dc8826361 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Mon, 13 Feb 2017 17:15:58 +0000 Subject: drm/i915: Add initial selftests for hang detection and resets Check that we can reset the GPU and continue executing from the next request. Signed-off-by: Chris Wilson Reviewed-by: Mika Kuoppala Link: http://patchwork.freedesktop.org/patch/msgid/20170213171558.20942-47-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/i915_drv.h | 4 +- drivers/gpu/drm/i915/intel_hangcheck.c | 4 + .../gpu/drm/i915/selftests/i915_live_selftests.h | 1 + drivers/gpu/drm/i915/selftests/intel_hangcheck.c | 537 +++++++++++++++++++++ 4 files changed, 544 insertions(+), 2 deletions(-) create mode 100644 drivers/gpu/drm/i915/selftests/intel_hangcheck.c diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index 80b653fb3722..6cfdb64926d7 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -3377,8 +3377,8 @@ int __must_check i915_gem_init(struct drm_i915_private *dev_priv); int __must_check i915_gem_init_hw(struct drm_i915_private *dev_priv); void i915_gem_init_swizzling(struct drm_i915_private *dev_priv); void i915_gem_cleanup_engines(struct drm_i915_private *dev_priv); -int __must_check i915_gem_wait_for_idle(struct drm_i915_private *dev_priv, - unsigned int flags); +int i915_gem_wait_for_idle(struct drm_i915_private *dev_priv, + unsigned int flags); int __must_check i915_gem_suspend(struct drm_i915_private *dev_priv); void i915_gem_resume(struct drm_i915_private *dev_priv); int i915_gem_fault(struct vm_area_struct *vma, struct vm_fault *vmf); diff --git a/drivers/gpu/drm/i915/intel_hangcheck.c b/drivers/gpu/drm/i915/intel_hangcheck.c index f05971f5586f..dce742243ba6 100644 --- a/drivers/gpu/drm/i915/intel_hangcheck.c +++ b/drivers/gpu/drm/i915/intel_hangcheck.c @@ -480,3 +480,7 @@ void intel_hangcheck_init(struct drm_i915_private *i915) INIT_DELAYED_WORK(&i915->gpu_error.hangcheck_work, i915_hangcheck_elapsed); } + +#if IS_ENABLED(CONFIG_DRM_I915_SELFTEST) +#include "selftests/intel_hangcheck.c" +#endif diff --git a/drivers/gpu/drm/i915/selftests/i915_live_selftests.h b/drivers/gpu/drm/i915/selftests/i915_live_selftests.h index eedd7901ce61..18b174d855ca 100644 --- a/drivers/gpu/drm/i915/selftests/i915_live_selftests.h +++ b/drivers/gpu/drm/i915/selftests/i915_live_selftests.h @@ -16,3 +16,4 @@ selftest(dmabuf, i915_gem_dmabuf_live_selftests) selftest(coherency, i915_gem_coherency_live_selftests) selftest(gtt, i915_gem_gtt_live_selftests) selftest(contexts, i915_gem_context_live_selftests) +selftest(hangcheck, intel_hangcheck_live_selftests) diff --git a/drivers/gpu/drm/i915/selftests/intel_hangcheck.c b/drivers/gpu/drm/i915/selftests/intel_hangcheck.c new file mode 100644 index 000000000000..d4acee6730e9 --- /dev/null +++ b/drivers/gpu/drm/i915/selftests/intel_hangcheck.c @@ -0,0 +1,537 @@ +/* + * Copyright © 2016 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + * + */ + +#include "../i915_selftest.h" + +struct hang { + struct drm_i915_private *i915; + struct drm_i915_gem_object *hws; + struct drm_i915_gem_object *obj; + u32 *seqno; + u32 *batch; +}; + +static int hang_init(struct hang *h, struct drm_i915_private *i915) +{ + void *vaddr; + int err; + + memset(h, 0, sizeof(*h)); + h->i915 = i915; + + h->hws = i915_gem_object_create_internal(i915, PAGE_SIZE); + if (IS_ERR(h->hws)) + return PTR_ERR(h->hws); + + h->obj = i915_gem_object_create_internal(i915, PAGE_SIZE); + if (IS_ERR(h->obj)) { + err = PTR_ERR(h->obj); + goto err_hws; + } + + i915_gem_object_set_cache_level(h->hws, I915_CACHE_LLC); + vaddr = i915_gem_object_pin_map(h->hws, I915_MAP_WB); + if (IS_ERR(vaddr)) { + err = PTR_ERR(vaddr); + goto err_obj; + } + h->seqno = memset(vaddr, 0xff, PAGE_SIZE); + + vaddr = i915_gem_object_pin_map(h->obj, + HAS_LLC(i915) ? I915_MAP_WB : I915_MAP_WC); + if (IS_ERR(vaddr)) { + err = PTR_ERR(vaddr); + goto err_unpin_hws; + } + h->batch = vaddr; + + return 0; + +err_unpin_hws: + i915_gem_object_unpin_map(h->hws); +err_obj: + i915_gem_object_put(h->obj); +err_hws: + i915_gem_object_put(h->hws); + return err; +} + +static u64 hws_address(const struct i915_vma *hws, + const struct drm_i915_gem_request *rq) +{ + return hws->node.start + offset_in_page(sizeof(u32)*rq->fence.context); +} + +static int emit_recurse_batch(struct hang *h, + struct drm_i915_gem_request *rq) +{ + struct drm_i915_private *i915 = h->i915; + struct i915_address_space *vm = rq->ctx->ppgtt ? &rq->ctx->ppgtt->base : &i915->ggtt.base; + struct i915_vma *hws, *vma; + unsigned int flags; + u32 *batch; + int err; + + vma = i915_vma_instance(h->obj, vm, NULL); + if (IS_ERR(vma)) + return PTR_ERR(vma); + + hws = i915_vma_instance(h->hws, vm, NULL); + if (IS_ERR(hws)) + return PTR_ERR(hws); + + err = i915_vma_pin(vma, 0, 0, PIN_USER); + if (err) + return err; + + err = i915_vma_pin(hws, 0, 0, PIN_USER); + if (err) + goto unpin_vma; + + err = rq->engine->emit_flush(rq, EMIT_INVALIDATE); + if (err) + goto unpin_hws; + + err = i915_switch_context(rq); + if (err) + goto unpin_hws; + + i915_vma_move_to_active(vma, rq, 0); + if (!i915_gem_object_has_active_reference(vma->obj)) { + i915_gem_object_get(vma->obj); + i915_gem_object_set_active_reference(vma->obj); + } + + i915_vma_move_to_active(hws, rq, 0); + if (!i915_gem_object_has_active_reference(hws->obj)) { + i915_gem_object_get(hws->obj); + i915_gem_object_set_active_reference(hws->obj); + } + + batch = h->batch; + if (INTEL_GEN(i915) >= 8) { + *batch++ = MI_STORE_DWORD_IMM_GEN4; + *batch++ = lower_32_bits(hws_address(hws, rq)); + *batch++ = upper_32_bits(hws_address(hws, rq)); + *batch++ = rq->fence.seqno; + *batch++ = MI_BATCH_BUFFER_START | 1 << 8 | 1; + *batch++ = lower_32_bits(vma->node.start); + *batch++ = upper_32_bits(vma->node.start); + } else if (INTEL_GEN(i915) >= 6) { + *batch++ = MI_STORE_DWORD_IMM_GEN4; + *batch++ = 0; + *batch++ = lower_32_bits(hws_address(hws, rq)); + *batch++ = rq->fence.seqno; + *batch++ = MI_BATCH_BUFFER_START | 1 << 8; + *batch++ = lower_32_bits(vma->node.start); + } else if (INTEL_GEN(i915) >= 4) { + *batch++ = MI_STORE_DWORD_IMM_GEN4 | 1 << 22; + *batch++ = 0; + *batch++ = lower_32_bits(hws_address(hws, rq)); + *batch++ = rq->fence.seqno; + *batch++ = MI_BATCH_BUFFER_START | 2 << 6; + *batch++ = lower_32_bits(vma->node.start); + } else { + *batch++ = MI_STORE_DWORD_IMM; + *batch++ = lower_32_bits(hws_address(hws, rq)); + *batch++ = rq->fence.seqno; + *batch++ = MI_BATCH_BUFFER_START | 2 << 6 | 1; + *batch++ = lower_32_bits(vma->node.start); + } + *batch++ = MI_BATCH_BUFFER_END; /* not reached */ + + flags = 0; + if (INTEL_GEN(vm->i915) <= 5) + flags |= I915_DISPATCH_SECURE; + + err = rq->engine->emit_bb_start(rq, vma->node.start, PAGE_SIZE, flags); + +unpin_hws: + i915_vma_unpin(hws); +unpin_vma: + i915_vma_unpin(vma); + return err; +} + +static struct drm_i915_gem_request * +hang_create_request(struct hang *h, + struct intel_engine_cs *engine, + struct i915_gem_context *ctx) +{ + struct drm_i915_gem_request *rq; + int err; + + if (i915_gem_object_is_active(h->obj)) { + struct drm_i915_gem_object *obj; + void *vaddr; + + obj = i915_gem_object_create_internal(h->i915, PAGE_SIZE); + if (IS_ERR(obj)) + return ERR_CAST(obj); + + vaddr = i915_gem_object_pin_map(obj, + HAS_LLC(h->i915) ? I915_MAP_WB : I915_MAP_WC); + if (IS_ERR(vaddr)) { + i915_gem_object_put(obj); + return ERR_CAST(vaddr); + } + + i915_gem_object_unpin_map(h->obj); + i915_gem_object_put(h->obj); + + h->obj = obj; + h->batch = vaddr; + } + + rq = i915_gem_request_alloc(engine, ctx); + if (IS_ERR(rq)) + return rq; + + err = emit_recurse_batch(h, rq); + if (err) { + __i915_add_request(rq, false); + return ERR_PTR(err); + } + + return rq; +} + +static u32 hws_seqno(const struct hang *h, + const struct drm_i915_gem_request *rq) +{ + return READ_ONCE(h->seqno[rq->fence.context % (PAGE_SIZE/sizeof(u32))]); +} + +static void hang_fini(struct hang *h) +{ + *h->batch = MI_BATCH_BUFFER_END; + wmb(); + + i915_gem_object_unpin_map(h->obj); + i915_gem_object_put(h->obj); + + i915_gem_object_unpin_map(h->hws); + i915_gem_object_put(h->hws); + + i915_gem_wait_for_idle(h->i915, I915_WAIT_LOCKED); + i915_gem_retire_requests(h->i915); +} + +static int igt_hang_sanitycheck(void *arg) +{ + struct drm_i915_private *i915 = arg; + struct drm_i915_gem_request *rq; + struct intel_engine_cs *engine; + enum intel_engine_id id; + struct hang h; + int err; + + /* Basic check that we can execute our hanging batch */ + + if (!igt_can_mi_store_dword_imm(i915)) + return 0; + + mutex_lock(&i915->drm.struct_mutex); + err = hang_init(&h, i915); + if (err) + goto unlock; + + for_each_engine(engine, i915, id) { + long timeout; + + rq = hang_create_request(&h, engine, i915->kernel_context); + if (IS_ERR(rq)) { + err = PTR_ERR(rq); + pr_err("Failed to create request for %s, err=%d\n", + engine->name, err); + goto fini; + } + + i915_gem_request_get(rq); + + *h.batch = MI_BATCH_BUFFER_END; + __i915_add_request(rq, true); + + timeout = i915_wait_request(rq, + I915_WAIT_LOCKED, + MAX_SCHEDULE_TIMEOUT); + i915_gem_request_put(rq); + + if (timeout < 0) { + err = timeout; + pr_err("Wait for request failed on %s, err=%d\n", + engine->name, err); + goto fini; + } + } + +fini: + hang_fini(&h); +unlock: + mutex_unlock(&i915->drm.struct_mutex); + return err; +} + +static int igt_global_reset(void *arg) +{ + struct drm_i915_private *i915 = arg; + unsigned int reset_count; + int err = 0; + + /* Check that we can issue a global GPU reset */ + + set_bit(I915_RESET_IN_PROGRESS, &i915->gpu_error.flags); + + mutex_lock(&i915->drm.struct_mutex); + reset_count = i915_reset_count(&i915->gpu_error); + + i915_reset(i915); + + if (i915_reset_count(&i915->gpu_error) == reset_count) { + pr_err("No GPU reset recorded!\n"); + err = -EINVAL; + } + mutex_unlock(&i915->drm.struct_mutex); + + GEM_BUG_ON(test_bit(I915_RESET_IN_PROGRESS, &i915->gpu_error.flags)); + if (i915_terminally_wedged(&i915->gpu_error)) + err = -EIO; + + return err; +} + +static u32 fake_hangcheck(struct drm_i915_gem_request *rq) +{ + u32 reset_count; + + rq->engine->hangcheck.stalled = true; + rq->engine->hangcheck.seqno = intel_engine_get_seqno(rq->engine); + + reset_count = i915_reset_count(&rq->i915->gpu_error); + + set_bit(I915_RESET_IN_PROGRESS, &rq->i915->gpu_error.flags); + wake_up_all(&rq->i915->gpu_error.wait_queue); + + return reset_count; +} + +static bool wait_for_hang(struct hang *h, struct drm_i915_gem_request *rq) +{ + return !(wait_for_us(i915_seqno_passed(hws_seqno(h, rq), + rq->fence.seqno), + 10) && + wait_for(i915_seqno_passed(hws_seqno(h, rq), + rq->fence.seqno), + 1000)); +} + +static int igt_wait_reset(void *arg) +{ + struct drm_i915_private *i915 = arg; + struct drm_i915_gem_request *rq; + unsigned int reset_count; + struct hang h; + long timeout; + int err; + + /* Check that we detect a stuck waiter and issue a reset */ + + set_bit(I915_RESET_IN_PROGRESS, &i915->gpu_error.flags); + + mutex_lock(&i915->drm.struct_mutex); + err = hang_init(&h, i915); + if (err) + goto unlock; + + rq = hang_create_request(&h, i915->engine[RCS], i915->kernel_context); + if (IS_ERR(rq)) { + err = PTR_ERR(rq); + goto fini; + } + + i915_gem_request_get(rq); + __i915_add_request(rq, true); + + if (!wait_for_hang(&h, rq)) { + pr_err("Failed to start request %x\n", rq->fence.seqno); + err = -EIO; + goto out_rq; + } + + reset_count = fake_hangcheck(rq); + + timeout = i915_wait_request(rq, I915_WAIT_LOCKED, 10); + if (timeout < 0) { + pr_err("i915_wait_request failed on a stuck request: err=%ld\n", + timeout); + err = timeout; + goto out_rq; + } + GEM_BUG_ON(test_bit(I915_RESET_IN_PROGRESS, &i915->gpu_error.flags)); + + if (i915_reset_count(&i915->gpu_error) == reset_count) { + pr_err("No GPU reset recorded!\n"); + err = -EINVAL; + goto out_rq; + } + +out_rq: + i915_gem_request_put(rq); +fini: + hang_fini(&h); +unlock: + mutex_unlock(&i915->drm.struct_mutex); + + if (i915_terminally_wedged(&i915->gpu_error)) + return -EIO; + + return err; +} + +static int igt_reset_queue(void *arg) +{ + struct drm_i915_private *i915 = arg; + struct intel_engine_cs *engine; + enum intel_engine_id id; + struct hang h; + int err; + + /* Check that we replay pending requests following a hang */ + + if (!igt_can_mi_store_dword_imm(i915)) + return 0; + + mutex_lock(&i915->drm.struct_mutex); + err = hang_init(&h, i915); + if (err) + goto unlock; + + for_each_engine(engine, i915, id) { + struct drm_i915_gem_request *prev; + IGT_TIMEOUT(end_time); + unsigned int count; + + prev = hang_create_request(&h, engine, i915->kernel_context); + if (IS_ERR(prev)) { + err = PTR_ERR(prev); + goto fini; + } + + i915_gem_request_get(prev); + __i915_add_request(prev, true); + + count = 0; + do { + struct drm_i915_gem_request *rq; + unsigned int reset_count; + + rq = hang_create_request(&h, + engine, + i915->kernel_context); + if (IS_ERR(rq)) { + err = PTR_ERR(rq); + goto fini; + } + + i915_gem_request_get(rq); + __i915_add_request(rq, true); + + if (!wait_for_hang(&h, prev)) { + pr_err("Failed to start request %x\n", + prev->fence.seqno); + i915_gem_request_put(rq); + i915_gem_request_put(prev); + err = -EIO; + goto fini; + } + + reset_count = fake_hangcheck(prev); + + i915_reset(i915); + + GEM_BUG_ON(test_bit(I915_RESET_IN_PROGRESS, + &i915->gpu_error.flags)); + if (prev->fence.error != -EIO) { + pr_err("GPU reset not recorded on hanging request [fence.error=%d]!\n", + prev->fence.error); + i915_gem_request_put(rq); + i915_gem_request_put(prev); + err = -EINVAL; + goto fini; + } + + if (rq->fence.error) { + pr_err("Fence error status not zero [%d] after unrelated reset\n", + rq->fence.error); + i915_gem_request_put(rq); + i915_gem_request_put(prev); + err = -EINVAL; + goto fini; + } + + if (i915_reset_count(&i915->gpu_error) == reset_count) { + pr_err("No GPU reset recorded!\n"); + i915_gem_request_put(rq); + i915_gem_request_put(prev); + err = -EINVAL; + goto fini; + } + + i915_gem_request_put(prev); + prev = rq; + count++; + } while (time_before(jiffies, end_time)); + pr_info("%s: Completed %d resets\n", engine->name, count); + + *h.batch = MI_BATCH_BUFFER_END; + wmb(); + + i915_gem_request_put(prev); + } + +fini: + hang_fini(&h); +unlock: + mutex_unlock(&i915->drm.struct_mutex); + + if (i915_terminally_wedged(&i915->gpu_error)) + return -EIO; + + return err; +} + +int intel_hangcheck_live_selftests(struct drm_i915_private *i915) +{ + static const struct i915_subtest tests[] = { + SUBTEST(igt_hang_sanitycheck), + SUBTEST(igt_global_reset), + SUBTEST(igt_wait_reset), + SUBTEST(igt_reset_queue), + }; + + if (!intel_has_gpu_reset(i915)) + return 0; + + return i915_subtests(tests, i915); +} -- cgit From 04a68a35ce6d7b54749989f943993020f48fed62 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Wed, 9 Nov 2016 10:39:05 +0000 Subject: drm/i915/gvt: Disable access to stolen memory as a guest Explicitly disable stolen memory when running as a guest in a virtual machine, since the memory is not mediated between clients and reserved entirely for the host. The actual size should be reported as zero, but like every other quirk we want to tell the user what is happening. Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=99028 Signed-off-by: Chris Wilson Cc: Zhenyu Wang Cc: Joonas Lahtinen Link: http://patchwork.freedesktop.org/patch/msgid/20161109103905.17860-1-chris@chris-wilson.co.uk Reviewed-by: Zhenyu Wang Cc: stable@vger.kernel.org --- drivers/gpu/drm/i915/i915_gem_stolen.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/drivers/gpu/drm/i915/i915_gem_stolen.c b/drivers/gpu/drm/i915/i915_gem_stolen.c index 82ca8f49fec1..f3abdc27c5dd 100644 --- a/drivers/gpu/drm/i915/i915_gem_stolen.c +++ b/drivers/gpu/drm/i915/i915_gem_stolen.c @@ -409,6 +409,11 @@ int i915_gem_init_stolen(struct drm_i915_private *dev_priv) mutex_init(&dev_priv->mm.stolen_lock); + if (intel_vgpu_active(dev_priv)) { + DRM_INFO("iGVT-g active, disabling use of stolen memory\n"); + return 0; + } + #ifdef CONFIG_INTEL_IOMMU if (intel_iommu_gfx_mapped && INTEL_GEN(dev_priv) < 8) { DRM_INFO("DMAR active, disabling use of stolen memory\n"); -- cgit From d892e9398ecf6defc7972a62227b77dad6be20bd Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Sun, 12 Feb 2017 21:53:43 +0000 Subject: drm/i915: Pass timeout==0 on to i915_gem_object_wait_fence() The i915_gem_object_wait_fence() uses an incoming timeout=0 to query whether the current fence is busy or idle, without waiting. This can be used by the wait-ioctl to implement a busy query. Fixes: e95433c73a11 ("drm/i915: Rearrange i915_wait_request() accounting with callers") Testcase: igt/gem_wait/basic-busy-write-all Signed-off-by: Chris Wilson Cc: Matthew Auld Cc: Joonas Lahtinen Cc: # v4.10-rc1+ Cc: stable@vger.kernel.org Link: http://patchwork.freedesktop.org/patch/msgid/20170212215344.16600-1-chris@chris-wilson.co.uk Reviewed-by: Joonas Lahtinen --- drivers/gpu/drm/i915/i915_gem.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index 82489ed10373..71297920fdf4 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -441,7 +441,7 @@ i915_gem_object_wait_reservation(struct reservation_object *resv, timeout = i915_gem_object_wait_fence(shared[i], flags, timeout, rps); - if (timeout <= 0) + if (timeout < 0) break; dma_fence_put(shared[i]); @@ -454,7 +454,7 @@ i915_gem_object_wait_reservation(struct reservation_object *resv, excl = reservation_object_get_excl_rcu(resv); } - if (excl && timeout > 0) + if (excl && timeout >= 0) timeout = i915_gem_object_wait_fence(excl, flags, timeout, rps); dma_fence_put(excl); -- cgit From d2d1501625e96170958f38646a9fcc9b69bbc2df Mon Sep 17 00:00:00 2001 From: Ander Conselvan de Oliveira Date: Mon, 13 Feb 2017 16:57:33 +0200 Subject: drm/i915: Convert remaining users of 32bit power domain masks MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit I screwed up the rebase of commit d8fc70b7367b ("drm/i915: Make power domain masks 64 bit long") before sending v2, causing a couple of conversions from 32 to 64 bit masks to be lost. Fixes: d8fc70b7367b ("drm/i915: Make power domain masks 64 bit long") Cc: Jani Nikula Cc: Joonas Lahtinen Cc: Rodrigo Vivi Cc: Ville Syrjälä Cc: Ander Conselvan de Oliveira Cc: Daniel Vetter Cc: intel-gfx@lists.freedesktop.org Signed-off-by: Ander Conselvan de Oliveira Reviewed-by: Joonas Lahtinen Link: http://patchwork.freedesktop.org/patch/msgid/20170213145733.8779-1-ander.conselvan.de.oliveira@intel.com --- drivers/gpu/drm/i915/intel_display.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c index 88b7d96c46a4..34c000859c0d 100644 --- a/drivers/gpu/drm/i915/intel_display.c +++ b/drivers/gpu/drm/i915/intel_display.c @@ -5701,7 +5701,7 @@ static u64 get_crtc_power_domains(struct drm_crtc *crtc, return mask; } -static unsigned long +static u64 modeset_get_crtc_power_domains(struct drm_crtc *crtc, struct intel_crtc_state *crtc_state) { @@ -5903,7 +5903,7 @@ static void intel_crtc_disable_noatomic(struct drm_crtc *crtc) struct intel_crtc *intel_crtc = to_intel_crtc(crtc); struct drm_i915_private *dev_priv = to_i915(crtc->dev); enum intel_display_power_domain domain; - unsigned long domains; + u64 domains; struct drm_atomic_state *state; struct intel_crtc_state *crtc_state; int ret; -- cgit From 73dec95e6ba37d8138bb111be5c9b8a1f3a622ae Mon Sep 17 00:00:00 2001 From: Tvrtko Ursulin Date: Tue, 14 Feb 2017 11:32:42 +0000 Subject: drm/i915: Emit to ringbuffer directly This removes the usage of intel_ring_emit in favour of directly writing to the ring buffer. intel_ring_emit was preventing the compiler for optimising fetch and increment of the current ring buffer pointer and therefore generating very verbose code for every write. It had no useful purpose since all ringbuffer operations are started and ended with intel_ring_begin and intel_ring_advance respectively, with no bail out in the middle possible, so it is fine to increment the tail in intel_ring_begin and let the code manage the pointer itself. Useless instruction removal amounts to approximately two and half kilobytes of saved text on my build. Not sure if this has any measurable performance implications but executing a ton of useless instructions on fast paths cannot be good. v2: * Change return from intel_ring_begin to error pointer by popular demand. * Move tail increment to intel_ring_advance to enable some error checking. v3: * Move tail advance back into intel_ring_begin. * Rebase and tidy. v4: * Complete rebase after a few months since v3. v5: * Remove unecessary cast and fix !debug compile. (Chris Wilson) v6: * Make intel_ring_offset take request as well. * Fix recording of request postfix plus a sprinkle of asserts. (Chris Wilson) v7: * Use intel_ring_offset to get the postfix. (Chris Wilson) * Convert GVT code as well. v8: * Rename *out++ to *cs++. v9: * Fix GVT out to cs conversion in GVT. v10: * Rebase for new intel_ring_begin in selftests. Signed-off-by: Tvrtko Ursulin Cc: Chris Wilson Cc: Joonas Lahtinen Cc: Zhi Wang Reviewed-by: Chris Wilson Acked-by: Joonas Lahtinen Link: http://patchwork.freedesktop.org/patch/msgid/20170214113242.29241-1-tvrtko.ursulin@linux.intel.com --- drivers/gpu/drm/i915/gvt/cmd_parser.c | 36 +- drivers/gpu/drm/i915/i915_gem_context.c | 76 ++- drivers/gpu/drm/i915/i915_gem_execbuffer.c | 40 +- drivers/gpu/drm/i915/i915_gem_gtt.c | 69 ++- drivers/gpu/drm/i915/i915_gem_request.c | 8 +- drivers/gpu/drm/i915/intel_display.c | 131 ++--- drivers/gpu/drm/i915/intel_lrc.c | 207 ++++--- drivers/gpu/drm/i915/intel_mocs.c | 51 +- drivers/gpu/drm/i915/intel_overlay.c | 79 ++- drivers/gpu/drm/i915/intel_ringbuffer.c | 592 ++++++++++----------- drivers/gpu/drm/i915/intel_ringbuffer.h | 30 +- .../gpu/drm/i915/selftests/i915_gem_coherency.c | 33 +- 12 files changed, 625 insertions(+), 727 deletions(-) diff --git a/drivers/gpu/drm/i915/gvt/cmd_parser.c b/drivers/gpu/drm/i915/gvt/cmd_parser.c index 9a4b23c3ee97..c3d44c03157c 100644 --- a/drivers/gpu/drm/i915/gvt/cmd_parser.c +++ b/drivers/gpu/drm/i915/gvt/cmd_parser.c @@ -1513,7 +1513,7 @@ static int copy_gma_to_hva(struct intel_vgpu *vgpu, struct intel_vgpu_mm *mm, len += copy_len; gma += copy_len; } - return 0; + return len; } @@ -1630,7 +1630,7 @@ static int perform_bb_shadow(struct parser_exec_state *s) ret = copy_gma_to_hva(s->vgpu, s->vgpu->gtt.ggtt_mm, gma, gma + bb_size, dst); - if (ret) { + if (ret < 0) { gvt_err("fail to copy guest ring buffer\n"); goto unmap_src; } @@ -2594,11 +2594,8 @@ out: static int shadow_workload_ring_buffer(struct intel_vgpu_workload *workload) { struct intel_vgpu *vgpu = workload->vgpu; - int ring_id = workload->ring_id; - struct i915_gem_context *shadow_ctx = vgpu->shadow_ctx; - struct intel_ring *ring = shadow_ctx->engine[ring_id].ring; unsigned long gma_head, gma_tail, gma_top, guest_rb_size; - unsigned int copy_len = 0; + u32 *cs; int ret; guest_rb_size = _RING_CTL_BUF_SIZE(workload->rb_ctl); @@ -2612,36 +2609,33 @@ static int shadow_workload_ring_buffer(struct intel_vgpu_workload *workload) gma_top = workload->rb_start + guest_rb_size; /* allocate shadow ring buffer */ - ret = intel_ring_begin(workload->req, workload->rb_len / 4); - if (ret) - return ret; + cs = intel_ring_begin(workload->req, workload->rb_len / sizeof(u32)); + if (IS_ERR(cs)) + return PTR_ERR(cs); /* get shadow ring buffer va */ - workload->shadow_ring_buffer_va = ring->vaddr + ring->tail; + workload->shadow_ring_buffer_va = cs; /* head > tail --> copy head <-> top */ if (gma_head > gma_tail) { ret = copy_gma_to_hva(vgpu, vgpu->gtt.ggtt_mm, - gma_head, gma_top, - workload->shadow_ring_buffer_va); - if (ret) { + gma_head, gma_top, cs); + if (ret < 0) { gvt_err("fail to copy guest ring buffer\n"); return ret; } - copy_len = gma_top - gma_head; + cs += ret / sizeof(u32); gma_head = workload->rb_start; } /* copy head or start <-> tail */ - ret = copy_gma_to_hva(vgpu, vgpu->gtt.ggtt_mm, - gma_head, gma_tail, - workload->shadow_ring_buffer_va + copy_len); - if (ret) { + ret = copy_gma_to_hva(vgpu, vgpu->gtt.ggtt_mm, gma_head, gma_tail, cs); + if (ret < 0) { gvt_err("fail to copy guest ring buffer\n"); return ret; } - ring->tail += workload->rb_len; - intel_ring_advance(ring); + cs += ret / sizeof(u32); + intel_ring_advance(workload->req, cs); return 0; } @@ -2695,7 +2689,7 @@ static int shadow_indirect_ctx(struct intel_shadow_wa_ctx *wa_ctx) wa_ctx->workload->vgpu->gtt.ggtt_mm, guest_gma, guest_gma + ctx_size, map); - if (ret) { + if (ret < 0) { gvt_err("fail to copy guest indirect ctx\n"); goto unmap_src; } diff --git a/drivers/gpu/drm/i915/i915_gem_context.c b/drivers/gpu/drm/i915/i915_gem_context.c index c73bf02870d5..9037356536dc 100644 --- a/drivers/gpu/drm/i915/i915_gem_context.c +++ b/drivers/gpu/drm/i915/i915_gem_context.c @@ -596,10 +596,9 @@ static inline int mi_set_context(struct drm_i915_gem_request *req, u32 hw_flags) { struct drm_i915_private *dev_priv = req->i915; - struct intel_ring *ring = req->ring; struct intel_engine_cs *engine = req->engine; enum intel_engine_id id; - u32 flags = hw_flags | MI_MM_SPACE_GTT; + u32 *cs, flags = hw_flags | MI_MM_SPACE_GTT; const int num_rings = /* Use an extended w/a on ivb+ if signalling from other rings */ i915.semaphores ? @@ -629,99 +628,92 @@ mi_set_context(struct drm_i915_gem_request *req, u32 hw_flags) if (INTEL_GEN(dev_priv) >= 7) len += 2 + (num_rings ? 4*num_rings + 6 : 0); - ret = intel_ring_begin(req, len); - if (ret) - return ret; + cs = intel_ring_begin(req, len); + if (IS_ERR(cs)) + return PTR_ERR(cs); /* WaProgramMiArbOnOffAroundMiSetContext:ivb,vlv,hsw,bdw,chv */ if (INTEL_GEN(dev_priv) >= 7) { - intel_ring_emit(ring, MI_ARB_ON_OFF | MI_ARB_DISABLE); + *cs++ = MI_ARB_ON_OFF | MI_ARB_DISABLE; if (num_rings) { struct intel_engine_cs *signaller; - intel_ring_emit(ring, - MI_LOAD_REGISTER_IMM(num_rings)); + *cs++ = MI_LOAD_REGISTER_IMM(num_rings); for_each_engine(signaller, dev_priv, id) { if (signaller == engine) continue; - intel_ring_emit_reg(ring, - RING_PSMI_CTL(signaller->mmio_base)); - intel_ring_emit(ring, - _MASKED_BIT_ENABLE(GEN6_PSMI_SLEEP_MSG_DISABLE)); + *cs++ = i915_mmio_reg_offset( + RING_PSMI_CTL(signaller->mmio_base)); + *cs++ = _MASKED_BIT_ENABLE( + GEN6_PSMI_SLEEP_MSG_DISABLE); } } } - intel_ring_emit(ring, MI_NOOP); - intel_ring_emit(ring, MI_SET_CONTEXT); - intel_ring_emit(ring, - i915_ggtt_offset(req->ctx->engine[RCS].state) | flags); + *cs++ = MI_NOOP; + *cs++ = MI_SET_CONTEXT; + *cs++ = i915_ggtt_offset(req->ctx->engine[RCS].state) | flags; /* * w/a: MI_SET_CONTEXT must always be followed by MI_NOOP * WaMiSetContext_Hang:snb,ivb,vlv */ - intel_ring_emit(ring, MI_NOOP); + *cs++ = MI_NOOP; if (INTEL_GEN(dev_priv) >= 7) { if (num_rings) { struct intel_engine_cs *signaller; i915_reg_t last_reg = {}; /* keep gcc quiet */ - intel_ring_emit(ring, - MI_LOAD_REGISTER_IMM(num_rings)); + *cs++ = MI_LOAD_REGISTER_IMM(num_rings); for_each_engine(signaller, dev_priv, id) { if (signaller == engine) continue; last_reg = RING_PSMI_CTL(signaller->mmio_base); - intel_ring_emit_reg(ring, last_reg); - intel_ring_emit(ring, - _MASKED_BIT_DISABLE(GEN6_PSMI_SLEEP_MSG_DISABLE)); + *cs++ = i915_mmio_reg_offset(last_reg); + *cs++ = _MASKED_BIT_DISABLE( + GEN6_PSMI_SLEEP_MSG_DISABLE); } /* Insert a delay before the next switch! */ - intel_ring_emit(ring, - MI_STORE_REGISTER_MEM | - MI_SRM_LRM_GLOBAL_GTT); - intel_ring_emit_reg(ring, last_reg); - intel_ring_emit(ring, - i915_ggtt_offset(engine->scratch)); - intel_ring_emit(ring, MI_NOOP); + *cs++ = MI_STORE_REGISTER_MEM | MI_SRM_LRM_GLOBAL_GTT; + *cs++ = i915_mmio_reg_offset(last_reg); + *cs++ = i915_ggtt_offset(engine->scratch); + *cs++ = MI_NOOP; } - intel_ring_emit(ring, MI_ARB_ON_OFF | MI_ARB_ENABLE); + *cs++ = MI_ARB_ON_OFF | MI_ARB_ENABLE; } - intel_ring_advance(ring); + intel_ring_advance(req, cs); return ret; } static int remap_l3(struct drm_i915_gem_request *req, int slice) { - u32 *remap_info = req->i915->l3_parity.remap_info[slice]; - struct intel_ring *ring = req->ring; - int i, ret; + u32 *cs, *remap_info = req->i915->l3_parity.remap_info[slice]; + int i; if (!remap_info) return 0; - ret = intel_ring_begin(req, GEN7_L3LOG_SIZE/4 * 2 + 2); - if (ret) - return ret; + cs = intel_ring_begin(req, GEN7_L3LOG_SIZE/4 * 2 + 2); + if (IS_ERR(cs)) + return PTR_ERR(cs); /* * Note: We do not worry about the concurrent register cacheline hang * here because no other code should access these registers other than * at initialization time. */ - intel_ring_emit(ring, MI_LOAD_REGISTER_IMM(GEN7_L3LOG_SIZE/4)); + *cs++ = MI_LOAD_REGISTER_IMM(GEN7_L3LOG_SIZE/4); for (i = 0; i < GEN7_L3LOG_SIZE/4; i++) { - intel_ring_emit_reg(ring, GEN7_L3LOG(slice, i)); - intel_ring_emit(ring, remap_info[i]); + *cs++ = i915_mmio_reg_offset(GEN7_L3LOG(slice, i)); + *cs++ = remap_info[i]; } - intel_ring_emit(ring, MI_NOOP); - intel_ring_advance(ring); + *cs++ = MI_NOOP; + intel_ring_advance(req, cs); return 0; } diff --git a/drivers/gpu/drm/i915/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/i915_gem_execbuffer.c index 111b7f8c617a..da0846fe2ad6 100644 --- a/drivers/gpu/drm/i915/i915_gem_execbuffer.c +++ b/drivers/gpu/drm/i915/i915_gem_execbuffer.c @@ -1336,25 +1336,25 @@ i915_gem_execbuffer_move_to_active(struct list_head *vmas, static int i915_reset_gen7_sol_offsets(struct drm_i915_gem_request *req) { - struct intel_ring *ring = req->ring; - int ret, i; + u32 *cs; + int i; if (!IS_GEN7(req->i915) || req->engine->id != RCS) { DRM_DEBUG("sol reset is gen7/rcs only\n"); return -EINVAL; } - ret = intel_ring_begin(req, 4 * 3); - if (ret) - return ret; + cs = intel_ring_begin(req, 4 * 3); + if (IS_ERR(cs)) + return PTR_ERR(cs); for (i = 0; i < 4; i++) { - intel_ring_emit(ring, MI_LOAD_REGISTER_IMM(1)); - intel_ring_emit_reg(ring, GEN7_SO_WRITE_OFFSET(i)); - intel_ring_emit(ring, 0); + *cs++ = MI_LOAD_REGISTER_IMM(1); + *cs++ = i915_mmio_reg_offset(GEN7_SO_WRITE_OFFSET(i)); + *cs++ = 0; } - intel_ring_advance(ring); + intel_ring_advance(req, cs); return 0; } @@ -1415,7 +1415,7 @@ execbuf_submit(struct i915_execbuffer_params *params, struct drm_i915_private *dev_priv = params->request->i915; u64 exec_start, exec_len; int instp_mode; - u32 instp_mask; + u32 instp_mask, *cs; int ret; ret = i915_gem_execbuffer_move_to_gpu(params->request, vmas); @@ -1461,17 +1461,15 @@ execbuf_submit(struct i915_execbuffer_params *params, if (params->engine->id == RCS && instp_mode != dev_priv->relative_constants_mode) { - struct intel_ring *ring = params->request->ring; - - ret = intel_ring_begin(params->request, 4); - if (ret) - return ret; - - intel_ring_emit(ring, MI_NOOP); - intel_ring_emit(ring, MI_LOAD_REGISTER_IMM(1)); - intel_ring_emit_reg(ring, INSTPM); - intel_ring_emit(ring, instp_mask << 16 | instp_mode); - intel_ring_advance(ring); + cs = intel_ring_begin(params->request, 4); + if (IS_ERR(cs)) + return PTR_ERR(cs); + + *cs++ = MI_NOOP; + *cs++ = MI_LOAD_REGISTER_IMM(1); + *cs++ = i915_mmio_reg_offset(INSTPM); + *cs++ = instp_mask << 16 | instp_mode; + intel_ring_advance(params->request, cs); dev_priv->relative_constants_mode = instp_mode; } diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c b/drivers/gpu/drm/i915/i915_gem_gtt.c index eebbffdb9a0b..cb5ea5d38f11 100644 --- a/drivers/gpu/drm/i915/i915_gem_gtt.c +++ b/drivers/gpu/drm/i915/i915_gem_gtt.c @@ -686,23 +686,22 @@ static int gen8_write_pdp(struct drm_i915_gem_request *req, unsigned entry, dma_addr_t addr) { - struct intel_ring *ring = req->ring; struct intel_engine_cs *engine = req->engine; - int ret; + u32 *cs; BUG_ON(entry >= 4); - ret = intel_ring_begin(req, 6); - if (ret) - return ret; + cs = intel_ring_begin(req, 6); + if (IS_ERR(cs)) + return PTR_ERR(cs); - intel_ring_emit(ring, MI_LOAD_REGISTER_IMM(1)); - intel_ring_emit_reg(ring, GEN8_RING_PDP_UDW(engine, entry)); - intel_ring_emit(ring, upper_32_bits(addr)); - intel_ring_emit(ring, MI_LOAD_REGISTER_IMM(1)); - intel_ring_emit_reg(ring, GEN8_RING_PDP_LDW(engine, entry)); - intel_ring_emit(ring, lower_32_bits(addr)); - intel_ring_advance(ring); + *cs++ = MI_LOAD_REGISTER_IMM(1); + *cs++ = i915_mmio_reg_offset(GEN8_RING_PDP_UDW(engine, entry)); + *cs++ = upper_32_bits(addr); + *cs++ = MI_LOAD_REGISTER_IMM(1); + *cs++ = i915_mmio_reg_offset(GEN8_RING_PDP_LDW(engine, entry)); + *cs++ = lower_32_bits(addr); + intel_ring_advance(req, cs); return 0; } @@ -1730,8 +1729,8 @@ static uint32_t get_pd_offset(struct i915_hw_ppgtt *ppgtt) static int hsw_mm_switch(struct i915_hw_ppgtt *ppgtt, struct drm_i915_gem_request *req) { - struct intel_ring *ring = req->ring; struct intel_engine_cs *engine = req->engine; + u32 *cs; int ret; /* NB: TLBs must be flushed and invalidated before a switch */ @@ -1739,17 +1738,17 @@ static int hsw_mm_switch(struct i915_hw_ppgtt *ppgtt, if (ret) return ret; - ret = intel_ring_begin(req, 6); - if (ret) - return ret; + cs = intel_ring_begin(req, 6); + if (IS_ERR(cs)) + return PTR_ERR(cs); - intel_ring_emit(ring, MI_LOAD_REGISTER_IMM(2)); - intel_ring_emit_reg(ring, RING_PP_DIR_DCLV(engine)); - intel_ring_emit(ring, PP_DIR_DCLV_2G); - intel_ring_emit_reg(ring, RING_PP_DIR_BASE(engine)); - intel_ring_emit(ring, get_pd_offset(ppgtt)); - intel_ring_emit(ring, MI_NOOP); - intel_ring_advance(ring); + *cs++ = MI_LOAD_REGISTER_IMM(2); + *cs++ = i915_mmio_reg_offset(RING_PP_DIR_DCLV(engine)); + *cs++ = PP_DIR_DCLV_2G; + *cs++ = i915_mmio_reg_offset(RING_PP_DIR_BASE(engine)); + *cs++ = get_pd_offset(ppgtt); + *cs++ = MI_NOOP; + intel_ring_advance(req, cs); return 0; } @@ -1757,8 +1756,8 @@ static int hsw_mm_switch(struct i915_hw_ppgtt *ppgtt, static int gen7_mm_switch(struct i915_hw_ppgtt *ppgtt, struct drm_i915_gem_request *req) { - struct intel_ring *ring = req->ring; struct intel_engine_cs *engine = req->engine; + u32 *cs; int ret; /* NB: TLBs must be flushed and invalidated before a switch */ @@ -1766,17 +1765,17 @@ static int gen7_mm_switch(struct i915_hw_ppgtt *ppgtt, if (ret) return ret; - ret = intel_ring_begin(req, 6); - if (ret) - return ret; - - intel_ring_emit(ring, MI_LOAD_REGISTER_IMM(2)); - intel_ring_emit_reg(ring, RING_PP_DIR_DCLV(engine)); - intel_ring_emit(ring, PP_DIR_DCLV_2G); - intel_ring_emit_reg(ring, RING_PP_DIR_BASE(engine)); - intel_ring_emit(ring, get_pd_offset(ppgtt)); - intel_ring_emit(ring, MI_NOOP); - intel_ring_advance(ring); + cs = intel_ring_begin(req, 6); + if (IS_ERR(cs)) + return PTR_ERR(cs); + + *cs++ = MI_LOAD_REGISTER_IMM(2); + *cs++ = i915_mmio_reg_offset(RING_PP_DIR_DCLV(engine)); + *cs++ = PP_DIR_DCLV_2G; + *cs++ = i915_mmio_reg_offset(RING_PP_DIR_BASE(engine)); + *cs++ = get_pd_offset(ppgtt); + *cs++ = MI_NOOP; + intel_ring_advance(req, cs); /* XXX: RCS is the only one to auto invalidate the TLBs? */ if (engine->id != RCS) { diff --git a/drivers/gpu/drm/i915/i915_gem_request.c b/drivers/gpu/drm/i915/i915_gem_request.c index 24571a5289a4..2f6cfa47dc61 100644 --- a/drivers/gpu/drm/i915/i915_gem_request.c +++ b/drivers/gpu/drm/i915/i915_gem_request.c @@ -824,6 +824,7 @@ void __i915_add_request(struct drm_i915_gem_request *request, bool flush_caches) struct intel_ring *ring = request->ring; struct intel_timeline *timeline = request->timeline; struct drm_i915_gem_request *prev; + u32 *cs; int err; lockdep_assert_held(&request->i915->drm.struct_mutex); @@ -862,10 +863,9 @@ void __i915_add_request(struct drm_i915_gem_request *request, bool flush_caches) * GPU processing the request, we never over-estimate the * position of the ring's HEAD. */ - err = intel_ring_begin(request, engine->emit_breadcrumb_sz); - GEM_BUG_ON(err); - request->postfix = ring->tail; - ring->tail += engine->emit_breadcrumb_sz * sizeof(u32); + cs = intel_ring_begin(request, engine->emit_breadcrumb_sz); + GEM_BUG_ON(IS_ERR(cs)); + request->postfix = intel_ring_offset(request, cs); /* Seal the request and mark it as pending execution. Note that * we may inspect this state, without holding any locks, during diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c index 34c000859c0d..02e6bb2d614c 100644 --- a/drivers/gpu/drm/i915/intel_display.c +++ b/drivers/gpu/drm/i915/intel_display.c @@ -10158,14 +10158,12 @@ static int intel_gen2_queue_flip(struct drm_device *dev, struct drm_i915_gem_request *req, uint32_t flags) { - struct intel_ring *ring = req->ring; struct intel_crtc *intel_crtc = to_intel_crtc(crtc); - u32 flip_mask; - int ret; + u32 flip_mask, *cs; - ret = intel_ring_begin(req, 6); - if (ret) - return ret; + cs = intel_ring_begin(req, 6); + if (IS_ERR(cs)) + return PTR_ERR(cs); /* Can't queue multiple flips, so wait for the previous * one to finish before executing the next. @@ -10174,13 +10172,12 @@ static int intel_gen2_queue_flip(struct drm_device *dev, flip_mask = MI_WAIT_FOR_PLANE_B_FLIP; else flip_mask = MI_WAIT_FOR_PLANE_A_FLIP; - intel_ring_emit(ring, MI_WAIT_FOR_EVENT | flip_mask); - intel_ring_emit(ring, MI_NOOP); - intel_ring_emit(ring, MI_DISPLAY_FLIP | - MI_DISPLAY_FLIP_PLANE(intel_crtc->plane)); - intel_ring_emit(ring, fb->pitches[0]); - intel_ring_emit(ring, intel_crtc->flip_work->gtt_offset); - intel_ring_emit(ring, 0); /* aux display base address, unused */ + *cs++ = MI_WAIT_FOR_EVENT | flip_mask; + *cs++ = MI_NOOP; + *cs++ = MI_DISPLAY_FLIP | MI_DISPLAY_FLIP_PLANE(intel_crtc->plane); + *cs++ = fb->pitches[0]; + *cs++ = intel_crtc->flip_work->gtt_offset; + *cs++ = 0; /* aux display base address, unused */ return 0; } @@ -10192,26 +10189,23 @@ static int intel_gen3_queue_flip(struct drm_device *dev, struct drm_i915_gem_request *req, uint32_t flags) { - struct intel_ring *ring = req->ring; struct intel_crtc *intel_crtc = to_intel_crtc(crtc); - u32 flip_mask; - int ret; + u32 flip_mask, *cs; - ret = intel_ring_begin(req, 6); - if (ret) - return ret; + cs = intel_ring_begin(req, 6); + if (IS_ERR(cs)) + return PTR_ERR(cs); if (intel_crtc->plane) flip_mask = MI_WAIT_FOR_PLANE_B_FLIP; else flip_mask = MI_WAIT_FOR_PLANE_A_FLIP; - intel_ring_emit(ring, MI_WAIT_FOR_EVENT | flip_mask); - intel_ring_emit(ring, MI_NOOP); - intel_ring_emit(ring, MI_DISPLAY_FLIP_I915 | - MI_DISPLAY_FLIP_PLANE(intel_crtc->plane)); - intel_ring_emit(ring, fb->pitches[0]); - intel_ring_emit(ring, intel_crtc->flip_work->gtt_offset); - intel_ring_emit(ring, MI_NOOP); + *cs++ = MI_WAIT_FOR_EVENT | flip_mask; + *cs++ = MI_NOOP; + *cs++ = MI_DISPLAY_FLIP_I915 | MI_DISPLAY_FLIP_PLANE(intel_crtc->plane); + *cs++ = fb->pitches[0]; + *cs++ = intel_crtc->flip_work->gtt_offset; + *cs++ = MI_NOOP; return 0; } @@ -10223,25 +10217,22 @@ static int intel_gen4_queue_flip(struct drm_device *dev, struct drm_i915_gem_request *req, uint32_t flags) { - struct intel_ring *ring = req->ring; struct drm_i915_private *dev_priv = to_i915(dev); struct intel_crtc *intel_crtc = to_intel_crtc(crtc); - uint32_t pf, pipesrc; - int ret; + u32 pf, pipesrc, *cs; - ret = intel_ring_begin(req, 4); - if (ret) - return ret; + cs = intel_ring_begin(req, 4); + if (IS_ERR(cs)) + return PTR_ERR(cs); /* i965+ uses the linear or tiled offsets from the * Display Registers (which do not change across a page-flip) * so we need only reprogram the base address. */ - intel_ring_emit(ring, MI_DISPLAY_FLIP | - MI_DISPLAY_FLIP_PLANE(intel_crtc->plane)); - intel_ring_emit(ring, fb->pitches[0]); - intel_ring_emit(ring, intel_crtc->flip_work->gtt_offset | - intel_fb_modifier_to_tiling(fb->modifier)); + *cs++ = MI_DISPLAY_FLIP | MI_DISPLAY_FLIP_PLANE(intel_crtc->plane); + *cs++ = fb->pitches[0]; + *cs++ = intel_crtc->flip_work->gtt_offset | + intel_fb_modifier_to_tiling(fb->modifier); /* XXX Enabling the panel-fitter across page-flip is so far * untested on non-native modes, so ignore it for now. @@ -10249,7 +10240,7 @@ static int intel_gen4_queue_flip(struct drm_device *dev, */ pf = 0; pipesrc = I915_READ(PIPESRC(intel_crtc->pipe)) & 0x0fff0fff; - intel_ring_emit(ring, pf | pipesrc); + *cs++ = pf | pipesrc; return 0; } @@ -10261,21 +10252,17 @@ static int intel_gen6_queue_flip(struct drm_device *dev, struct drm_i915_gem_request *req, uint32_t flags) { - struct intel_ring *ring = req->ring; struct drm_i915_private *dev_priv = to_i915(dev); struct intel_crtc *intel_crtc = to_intel_crtc(crtc); - uint32_t pf, pipesrc; - int ret; + u32 pf, pipesrc, *cs; - ret = intel_ring_begin(req, 4); - if (ret) - return ret; + cs = intel_ring_begin(req, 4); + if (IS_ERR(cs)) + return PTR_ERR(cs); - intel_ring_emit(ring, MI_DISPLAY_FLIP | - MI_DISPLAY_FLIP_PLANE(intel_crtc->plane)); - intel_ring_emit(ring, fb->pitches[0] | - intel_fb_modifier_to_tiling(fb->modifier)); - intel_ring_emit(ring, intel_crtc->flip_work->gtt_offset); + *cs++ = MI_DISPLAY_FLIP | MI_DISPLAY_FLIP_PLANE(intel_crtc->plane); + *cs++ = fb->pitches[0] | intel_fb_modifier_to_tiling(fb->modifier); + *cs++ = intel_crtc->flip_work->gtt_offset; /* Contrary to the suggestions in the documentation, * "Enable Panel Fitter" does not seem to be required when page @@ -10285,7 +10272,7 @@ static int intel_gen6_queue_flip(struct drm_device *dev, */ pf = 0; pipesrc = I915_READ(PIPESRC(intel_crtc->pipe)) & 0x0fff0fff; - intel_ring_emit(ring, pf | pipesrc); + *cs++ = pf | pipesrc; return 0; } @@ -10298,9 +10285,8 @@ static int intel_gen7_queue_flip(struct drm_device *dev, uint32_t flags) { struct drm_i915_private *dev_priv = to_i915(dev); - struct intel_ring *ring = req->ring; struct intel_crtc *intel_crtc = to_intel_crtc(crtc); - uint32_t plane_bit = 0; + u32 *cs, plane_bit = 0; int len, ret; switch (intel_crtc->plane) { @@ -10344,9 +10330,9 @@ static int intel_gen7_queue_flip(struct drm_device *dev, if (ret) return ret; - ret = intel_ring_begin(req, len); - if (ret) - return ret; + cs = intel_ring_begin(req, len); + if (IS_ERR(cs)) + return PTR_ERR(cs); /* Unmask the flip-done completion message. Note that the bspec says that * we should do this for both the BCS and RCS, and that we must not unmask @@ -10358,31 +10344,28 @@ static int intel_gen7_queue_flip(struct drm_device *dev, * to zero does lead to lockups within MI_DISPLAY_FLIP. */ if (req->engine->id == RCS) { - intel_ring_emit(ring, MI_LOAD_REGISTER_IMM(1)); - intel_ring_emit_reg(ring, DERRMR); - intel_ring_emit(ring, ~(DERRMR_PIPEA_PRI_FLIP_DONE | - DERRMR_PIPEB_PRI_FLIP_DONE | - DERRMR_PIPEC_PRI_FLIP_DONE)); + *cs++ = MI_LOAD_REGISTER_IMM(1); + *cs++ = i915_mmio_reg_offset(DERRMR); + *cs++ = ~(DERRMR_PIPEA_PRI_FLIP_DONE | + DERRMR_PIPEB_PRI_FLIP_DONE | + DERRMR_PIPEC_PRI_FLIP_DONE); if (IS_GEN8(dev_priv)) - intel_ring_emit(ring, MI_STORE_REGISTER_MEM_GEN8 | - MI_SRM_LRM_GLOBAL_GTT); + *cs++ = MI_STORE_REGISTER_MEM_GEN8 | + MI_SRM_LRM_GLOBAL_GTT; else - intel_ring_emit(ring, MI_STORE_REGISTER_MEM | - MI_SRM_LRM_GLOBAL_GTT); - intel_ring_emit_reg(ring, DERRMR); - intel_ring_emit(ring, - i915_ggtt_offset(req->engine->scratch) + 256); + *cs++ = MI_STORE_REGISTER_MEM | MI_SRM_LRM_GLOBAL_GTT; + *cs++ = i915_mmio_reg_offset(DERRMR); + *cs++ = i915_ggtt_offset(req->engine->scratch) + 256; if (IS_GEN8(dev_priv)) { - intel_ring_emit(ring, 0); - intel_ring_emit(ring, MI_NOOP); + *cs++ = 0; + *cs++ = MI_NOOP; } } - intel_ring_emit(ring, MI_DISPLAY_FLIP_I915 | plane_bit); - intel_ring_emit(ring, fb->pitches[0] | - intel_fb_modifier_to_tiling(fb->modifier)); - intel_ring_emit(ring, intel_crtc->flip_work->gtt_offset); - intel_ring_emit(ring, (MI_NOOP)); + *cs++ = MI_DISPLAY_FLIP_I915 | plane_bit; + *cs++ = fb->pitches[0] | intel_fb_modifier_to_tiling(fb->modifier); + *cs++ = intel_crtc->flip_work->gtt_offset; + *cs++ = MI_NOOP; return 0; } diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c index af717e1356a9..f567d4b08863 100644 --- a/drivers/gpu/drm/i915/intel_lrc.c +++ b/drivers/gpu/drm/i915/intel_lrc.c @@ -834,6 +834,7 @@ static int execlists_request_alloc(struct drm_i915_gem_request *request) { struct intel_engine_cs *engine = request->engine; struct intel_context *ce = &request->ctx->engine[engine->id]; + u32 *cs; int ret; GEM_BUG_ON(!ce->pin_count); @@ -858,9 +859,11 @@ static int execlists_request_alloc(struct drm_i915_gem_request *request) goto err; } - ret = intel_ring_begin(request, 0); - if (ret) + cs = intel_ring_begin(request, 0); + if (IS_ERR(cs)) { + ret = PTR_ERR(cs); goto err_unreserve; + } if (!ce->initialised) { ret = engine->init_context(request); @@ -889,9 +892,9 @@ err: static int intel_logical_ring_workarounds_emit(struct drm_i915_gem_request *req) { - int ret, i; - struct intel_ring *ring = req->ring; struct i915_workarounds *w = &req->i915->workarounds; + u32 *cs; + int ret, i; if (w->count == 0) return 0; @@ -900,18 +903,18 @@ static int intel_logical_ring_workarounds_emit(struct drm_i915_gem_request *req) if (ret) return ret; - ret = intel_ring_begin(req, w->count * 2 + 2); - if (ret) - return ret; + cs = intel_ring_begin(req, w->count * 2 + 2); + if (IS_ERR(cs)) + return PTR_ERR(cs); - intel_ring_emit(ring, MI_LOAD_REGISTER_IMM(w->count)); + *cs++ = MI_LOAD_REGISTER_IMM(w->count); for (i = 0; i < w->count; i++) { - intel_ring_emit_reg(ring, w->reg[i].addr); - intel_ring_emit(ring, w->reg[i].value); + *cs++ = i915_mmio_reg_offset(w->reg[i].addr); + *cs++ = w->reg[i].value; } - intel_ring_emit(ring, MI_NOOP); + *cs++ = MI_NOOP; - intel_ring_advance(ring); + intel_ring_advance(req, cs); ret = req->engine->emit_flush(req, EMIT_BARRIER); if (ret) @@ -1404,27 +1407,27 @@ static void reset_common_ring(struct intel_engine_cs *engine, static int intel_logical_ring_emit_pdps(struct drm_i915_gem_request *req) { struct i915_hw_ppgtt *ppgtt = req->ctx->ppgtt; - struct intel_ring *ring = req->ring; struct intel_engine_cs *engine = req->engine; const int num_lri_cmds = GEN8_LEGACY_PDPES * 2; - int i, ret; + u32 *cs; + int i; - ret = intel_ring_begin(req, num_lri_cmds * 2 + 2); - if (ret) - return ret; + cs = intel_ring_begin(req, num_lri_cmds * 2 + 2); + if (IS_ERR(cs)) + return PTR_ERR(cs); - intel_ring_emit(ring, MI_LOAD_REGISTER_IMM(num_lri_cmds)); + *cs++ = MI_LOAD_REGISTER_IMM(num_lri_cmds); for (i = GEN8_LEGACY_PDPES - 1; i >= 0; i--) { const dma_addr_t pd_daddr = i915_page_dir_dma_addr(ppgtt, i); - intel_ring_emit_reg(ring, GEN8_RING_PDP_UDW(engine, i)); - intel_ring_emit(ring, upper_32_bits(pd_daddr)); - intel_ring_emit_reg(ring, GEN8_RING_PDP_LDW(engine, i)); - intel_ring_emit(ring, lower_32_bits(pd_daddr)); + *cs++ = i915_mmio_reg_offset(GEN8_RING_PDP_UDW(engine, i)); + *cs++ = upper_32_bits(pd_daddr); + *cs++ = i915_mmio_reg_offset(GEN8_RING_PDP_LDW(engine, i)); + *cs++ = lower_32_bits(pd_daddr); } - intel_ring_emit(ring, MI_NOOP); - intel_ring_advance(ring); + *cs++ = MI_NOOP; + intel_ring_advance(req, cs); return 0; } @@ -1433,8 +1436,8 @@ static int gen8_emit_bb_start(struct drm_i915_gem_request *req, u64 offset, u32 len, unsigned int dispatch_flags) { - struct intel_ring *ring = req->ring; bool ppgtt = !(dispatch_flags & I915_DISPATCH_SECURE); + u32 *cs; int ret; /* Don't rely in hw updating PDPs, specially in lite-restore. @@ -1455,19 +1458,17 @@ static int gen8_emit_bb_start(struct drm_i915_gem_request *req, req->ctx->ppgtt->pd_dirty_rings &= ~intel_engine_flag(req->engine); } - ret = intel_ring_begin(req, 4); - if (ret) - return ret; + cs = intel_ring_begin(req, 4); + if (IS_ERR(cs)) + return PTR_ERR(cs); /* FIXME(BDW): Address space and security selectors. */ - intel_ring_emit(ring, MI_BATCH_BUFFER_START_GEN8 | - (ppgtt<<8) | - (dispatch_flags & I915_DISPATCH_RS ? - MI_BATCH_RESOURCE_STREAMER : 0)); - intel_ring_emit(ring, lower_32_bits(offset)); - intel_ring_emit(ring, upper_32_bits(offset)); - intel_ring_emit(ring, MI_NOOP); - intel_ring_advance(ring); + *cs++ = MI_BATCH_BUFFER_START_GEN8 | (ppgtt << 8) | (dispatch_flags & + I915_DISPATCH_RS ? MI_BATCH_RESOURCE_STREAMER : 0); + *cs++ = lower_32_bits(offset); + *cs++ = upper_32_bits(offset); + *cs++ = MI_NOOP; + intel_ring_advance(req, cs); return 0; } @@ -1488,13 +1489,11 @@ static void gen8_logical_ring_disable_irq(struct intel_engine_cs *engine) static int gen8_emit_flush(struct drm_i915_gem_request *request, u32 mode) { - struct intel_ring *ring = request->ring; - u32 cmd; - int ret; + u32 cmd, *cs; - ret = intel_ring_begin(request, 4); - if (ret) - return ret; + cs = intel_ring_begin(request, 4); + if (IS_ERR(cs)) + return PTR_ERR(cs); cmd = MI_FLUSH_DW + 1; @@ -1511,13 +1510,11 @@ static int gen8_emit_flush(struct drm_i915_gem_request *request, u32 mode) cmd |= MI_INVALIDATE_BSD; } - intel_ring_emit(ring, cmd); - intel_ring_emit(ring, - I915_GEM_HWS_SCRATCH_ADDR | - MI_FLUSH_DW_USE_GTT); - intel_ring_emit(ring, 0); /* upper addr */ - intel_ring_emit(ring, 0); /* value */ - intel_ring_advance(ring); + *cs++ = cmd; + *cs++ = I915_GEM_HWS_SCRATCH_ADDR | MI_FLUSH_DW_USE_GTT; + *cs++ = 0; /* upper addr */ + *cs++ = 0; /* value */ + intel_ring_advance(request, cs); return 0; } @@ -1525,13 +1522,11 @@ static int gen8_emit_flush(struct drm_i915_gem_request *request, u32 mode) static int gen8_emit_flush_render(struct drm_i915_gem_request *request, u32 mode) { - struct intel_ring *ring = request->ring; struct intel_engine_cs *engine = request->engine; u32 scratch_addr = i915_ggtt_offset(engine->scratch) + 2 * CACHELINE_BYTES; bool vf_flush_wa = false, dc_flush_wa = false; - u32 flags = 0; - int ret; + u32 *cs, flags = 0; int len; flags |= PIPE_CONTROL_CS_STALL; @@ -1573,45 +1568,45 @@ static int gen8_emit_flush_render(struct drm_i915_gem_request *request, if (dc_flush_wa) len += 12; - ret = intel_ring_begin(request, len); - if (ret) - return ret; + cs = intel_ring_begin(request, len); + if (IS_ERR(cs)) + return PTR_ERR(cs); if (vf_flush_wa) { - intel_ring_emit(ring, GFX_OP_PIPE_CONTROL(6)); - intel_ring_emit(ring, 0); - intel_ring_emit(ring, 0); - intel_ring_emit(ring, 0); - intel_ring_emit(ring, 0); - intel_ring_emit(ring, 0); + *cs++ = GFX_OP_PIPE_CONTROL(6); + *cs++ = 0; + *cs++ = 0; + *cs++ = 0; + *cs++ = 0; + *cs++ = 0; } if (dc_flush_wa) { - intel_ring_emit(ring, GFX_OP_PIPE_CONTROL(6)); - intel_ring_emit(ring, PIPE_CONTROL_DC_FLUSH_ENABLE); - intel_ring_emit(ring, 0); - intel_ring_emit(ring, 0); - intel_ring_emit(ring, 0); - intel_ring_emit(ring, 0); + *cs++ = GFX_OP_PIPE_CONTROL(6); + *cs++ = PIPE_CONTROL_DC_FLUSH_ENABLE; + *cs++ = 0; + *cs++ = 0; + *cs++ = 0; + *cs++ = 0; } - intel_ring_emit(ring, GFX_OP_PIPE_CONTROL(6)); - intel_ring_emit(ring, flags); - intel_ring_emit(ring, scratch_addr); - intel_ring_emit(ring, 0); - intel_ring_emit(ring, 0); - intel_ring_emit(ring, 0); + *cs++ = GFX_OP_PIPE_CONTROL(6); + *cs++ = flags; + *cs++ = scratch_addr; + *cs++ = 0; + *cs++ = 0; + *cs++ = 0; if (dc_flush_wa) { - intel_ring_emit(ring, GFX_OP_PIPE_CONTROL(6)); - intel_ring_emit(ring, PIPE_CONTROL_CS_STALL); - intel_ring_emit(ring, 0); - intel_ring_emit(ring, 0); - intel_ring_emit(ring, 0); - intel_ring_emit(ring, 0); + *cs++ = GFX_OP_PIPE_CONTROL(6); + *cs++ = PIPE_CONTROL_CS_STALL; + *cs++ = 0; + *cs++ = 0; + *cs++ = 0; + *cs++ = 0; } - intel_ring_advance(ring); + intel_ring_advance(request, cs); return 0; } @@ -1621,34 +1616,33 @@ static int gen8_emit_flush_render(struct drm_i915_gem_request *request, * used as a workaround for not being allowed to do lite * restore with HEAD==TAIL (WaIdleLiteRestore). */ -static void gen8_emit_wa_tail(struct drm_i915_gem_request *request, u32 *out) +static void gen8_emit_wa_tail(struct drm_i915_gem_request *request, u32 *cs) { - *out++ = MI_NOOP; - *out++ = MI_NOOP; - request->wa_tail = intel_ring_offset(request->ring, out); + *cs++ = MI_NOOP; + *cs++ = MI_NOOP; + request->wa_tail = intel_ring_offset(request, cs); } -static void gen8_emit_breadcrumb(struct drm_i915_gem_request *request, - u32 *out) +static void gen8_emit_breadcrumb(struct drm_i915_gem_request *request, u32 *cs) { /* w/a: bit 5 needs to be zero for MI_FLUSH_DW address. */ BUILD_BUG_ON(I915_GEM_HWS_INDEX_ADDR & (1 << 5)); - *out++ = (MI_FLUSH_DW + 1) | MI_FLUSH_DW_OP_STOREDW; - *out++ = intel_hws_seqno_address(request->engine) | MI_FLUSH_DW_USE_GTT; - *out++ = 0; - *out++ = request->global_seqno; - *out++ = MI_USER_INTERRUPT; - *out++ = MI_NOOP; - request->tail = intel_ring_offset(request->ring, out); + *cs++ = (MI_FLUSH_DW + 1) | MI_FLUSH_DW_OP_STOREDW; + *cs++ = intel_hws_seqno_address(request->engine) | MI_FLUSH_DW_USE_GTT; + *cs++ = 0; + *cs++ = request->global_seqno; + *cs++ = MI_USER_INTERRUPT; + *cs++ = MI_NOOP; + request->tail = intel_ring_offset(request, cs); - gen8_emit_wa_tail(request, out); + gen8_emit_wa_tail(request, cs); } static const int gen8_emit_breadcrumb_sz = 6 + WA_TAIL_DWORDS; static void gen8_emit_breadcrumb_render(struct drm_i915_gem_request *request, - u32 *out) + u32 *cs) { /* We're using qword write, seqno should be aligned to 8 bytes. */ BUILD_BUG_ON(I915_GEM_HWS_INDEX & 1); @@ -1657,20 +1651,19 @@ static void gen8_emit_breadcrumb_render(struct drm_i915_gem_request *request, * need a prior CS_STALL, which is emitted by the flush * following the batch. */ - *out++ = GFX_OP_PIPE_CONTROL(6); - *out++ = (PIPE_CONTROL_GLOBAL_GTT_IVB | - PIPE_CONTROL_CS_STALL | - PIPE_CONTROL_QW_WRITE); - *out++ = intel_hws_seqno_address(request->engine); - *out++ = 0; - *out++ = request->global_seqno; + *cs++ = GFX_OP_PIPE_CONTROL(6); + *cs++ = PIPE_CONTROL_GLOBAL_GTT_IVB | PIPE_CONTROL_CS_STALL | + PIPE_CONTROL_QW_WRITE; + *cs++ = intel_hws_seqno_address(request->engine); + *cs++ = 0; + *cs++ = request->global_seqno; /* We're thrashing one dword of HWS. */ - *out++ = 0; - *out++ = MI_USER_INTERRUPT; - *out++ = MI_NOOP; - request->tail = intel_ring_offset(request->ring, out); + *cs++ = 0; + *cs++ = MI_USER_INTERRUPT; + *cs++ = MI_NOOP; + request->tail = intel_ring_offset(request, cs); - gen8_emit_wa_tail(request, out); + gen8_emit_wa_tail(request, cs); } static const int gen8_emit_breadcrumb_render_sz = 8 + WA_TAIL_DWORDS; diff --git a/drivers/gpu/drm/i915/intel_mocs.c b/drivers/gpu/drm/i915/intel_mocs.c index 773e36253e7c..92e461c68385 100644 --- a/drivers/gpu/drm/i915/intel_mocs.c +++ b/drivers/gpu/drm/i915/intel_mocs.c @@ -276,23 +276,22 @@ int intel_mocs_init_engine(struct intel_engine_cs *engine) static int emit_mocs_control_table(struct drm_i915_gem_request *req, const struct drm_i915_mocs_table *table) { - struct intel_ring *ring = req->ring; enum intel_engine_id engine = req->engine->id; unsigned int index; - int ret; + u32 *cs; if (WARN_ON(table->size > GEN9_NUM_MOCS_ENTRIES)) return -ENODEV; - ret = intel_ring_begin(req, 2 + 2 * GEN9_NUM_MOCS_ENTRIES); - if (ret) - return ret; + cs = intel_ring_begin(req, 2 + 2 * GEN9_NUM_MOCS_ENTRIES); + if (IS_ERR(cs)) + return PTR_ERR(cs); - intel_ring_emit(ring, MI_LOAD_REGISTER_IMM(GEN9_NUM_MOCS_ENTRIES)); + *cs++ = MI_LOAD_REGISTER_IMM(GEN9_NUM_MOCS_ENTRIES); for (index = 0; index < table->size; index++) { - intel_ring_emit_reg(ring, mocs_register(engine, index)); - intel_ring_emit(ring, table->table[index].control_value); + *cs++ = i915_mmio_reg_offset(mocs_register(engine, index)); + *cs++ = table->table[index].control_value; } /* @@ -304,12 +303,12 @@ static int emit_mocs_control_table(struct drm_i915_gem_request *req, * that value to all the used entries. */ for (; index < GEN9_NUM_MOCS_ENTRIES; index++) { - intel_ring_emit_reg(ring, mocs_register(engine, index)); - intel_ring_emit(ring, table->table[0].control_value); + *cs++ = i915_mmio_reg_offset(mocs_register(engine, index)); + *cs++ = table->table[0].control_value; } - intel_ring_emit(ring, MI_NOOP); - intel_ring_advance(ring); + *cs++ = MI_NOOP; + intel_ring_advance(req, cs); return 0; } @@ -336,29 +335,27 @@ static inline u32 l3cc_combine(const struct drm_i915_mocs_table *table, static int emit_mocs_l3cc_table(struct drm_i915_gem_request *req, const struct drm_i915_mocs_table *table) { - struct intel_ring *ring = req->ring; unsigned int i; - int ret; + u32 *cs; if (WARN_ON(table->size > GEN9_NUM_MOCS_ENTRIES)) return -ENODEV; - ret = intel_ring_begin(req, 2 + GEN9_NUM_MOCS_ENTRIES); - if (ret) - return ret; + cs = intel_ring_begin(req, 2 + GEN9_NUM_MOCS_ENTRIES); + if (IS_ERR(cs)) + return PTR_ERR(cs); - intel_ring_emit(ring, - MI_LOAD_REGISTER_IMM(GEN9_NUM_MOCS_ENTRIES / 2)); + *cs++ = MI_LOAD_REGISTER_IMM(GEN9_NUM_MOCS_ENTRIES / 2); for (i = 0; i < table->size/2; i++) { - intel_ring_emit_reg(ring, GEN9_LNCFCMOCS(i)); - intel_ring_emit(ring, l3cc_combine(table, 2*i, 2*i+1)); + *cs++ = i915_mmio_reg_offset(GEN9_LNCFCMOCS(i)); + *cs++ = l3cc_combine(table, 2 * i, 2 * i + 1); } if (table->size & 0x01) { /* Odd table size - 1 left over */ - intel_ring_emit_reg(ring, GEN9_LNCFCMOCS(i)); - intel_ring_emit(ring, l3cc_combine(table, 2*i, 0)); + *cs++ = i915_mmio_reg_offset(GEN9_LNCFCMOCS(i)); + *cs++ = l3cc_combine(table, 2 * i, 0); i++; } @@ -368,12 +365,12 @@ static int emit_mocs_l3cc_table(struct drm_i915_gem_request *req, * they are reserved by the hardware. */ for (; i < GEN9_NUM_MOCS_ENTRIES / 2; i++) { - intel_ring_emit_reg(ring, GEN9_LNCFCMOCS(i)); - intel_ring_emit(ring, l3cc_combine(table, 0, 0)); + *cs++ = i915_mmio_reg_offset(GEN9_LNCFCMOCS(i)); + *cs++ = l3cc_combine(table, 0, 0); } - intel_ring_emit(ring, MI_NOOP); - intel_ring_advance(ring); + *cs++ = MI_NOOP; + intel_ring_advance(req, cs); return 0; } diff --git a/drivers/gpu/drm/i915/intel_overlay.c b/drivers/gpu/drm/i915/intel_overlay.c index 0608fad7f593..5ef9f5bfb92c 100644 --- a/drivers/gpu/drm/i915/intel_overlay.c +++ b/drivers/gpu/drm/i915/intel_overlay.c @@ -267,8 +267,7 @@ static int intel_overlay_on(struct intel_overlay *overlay) { struct drm_i915_private *dev_priv = overlay->i915; struct drm_i915_gem_request *req; - struct intel_ring *ring; - int ret; + u32 *cs; WARN_ON(overlay->active); WARN_ON(IS_I830(dev_priv) && !(dev_priv->quirks & QUIRK_PIPEA_FORCE)); @@ -277,10 +276,10 @@ static int intel_overlay_on(struct intel_overlay *overlay) if (IS_ERR(req)) return PTR_ERR(req); - ret = intel_ring_begin(req, 4); - if (ret) { + cs = intel_ring_begin(req, 4); + if (IS_ERR(cs)) { i915_add_request_no_flush(req); - return ret; + return PTR_ERR(cs); } overlay->active = true; @@ -288,12 +287,11 @@ static int intel_overlay_on(struct intel_overlay *overlay) if (IS_I830(dev_priv)) i830_overlay_clock_gating(dev_priv, false); - ring = req->ring; - intel_ring_emit(ring, MI_OVERLAY_FLIP | MI_OVERLAY_ON); - intel_ring_emit(ring, overlay->flip_addr | OFC_UPDATE); - intel_ring_emit(ring, MI_WAIT_FOR_EVENT | MI_WAIT_FOR_OVERLAY_FLIP); - intel_ring_emit(ring, MI_NOOP); - intel_ring_advance(ring); + *cs++ = MI_OVERLAY_FLIP | MI_OVERLAY_ON; + *cs++ = overlay->flip_addr | OFC_UPDATE; + *cs++ = MI_WAIT_FOR_EVENT | MI_WAIT_FOR_OVERLAY_FLIP; + *cs++ = MI_NOOP; + intel_ring_advance(req, cs); return intel_overlay_do_wait_request(overlay, req, NULL); } @@ -326,10 +324,8 @@ static int intel_overlay_continue(struct intel_overlay *overlay, { struct drm_i915_private *dev_priv = overlay->i915; struct drm_i915_gem_request *req; - struct intel_ring *ring; u32 flip_addr = overlay->flip_addr; - u32 tmp; - int ret; + u32 tmp, *cs; WARN_ON(!overlay->active); @@ -345,16 +341,15 @@ static int intel_overlay_continue(struct intel_overlay *overlay, if (IS_ERR(req)) return PTR_ERR(req); - ret = intel_ring_begin(req, 2); - if (ret) { + cs = intel_ring_begin(req, 2); + if (IS_ERR(cs)) { i915_add_request_no_flush(req); - return ret; + return PTR_ERR(cs); } - ring = req->ring; - intel_ring_emit(ring, MI_OVERLAY_FLIP | MI_OVERLAY_CONTINUE); - intel_ring_emit(ring, flip_addr); - intel_ring_advance(ring); + *cs++ = MI_OVERLAY_FLIP | MI_OVERLAY_CONTINUE; + *cs++ = flip_addr; + intel_ring_advance(req, cs); intel_overlay_flip_prepare(overlay, vma); @@ -408,9 +403,7 @@ static void intel_overlay_off_tail(struct i915_gem_active *active, static int intel_overlay_off(struct intel_overlay *overlay) { struct drm_i915_gem_request *req; - struct intel_ring *ring; - u32 flip_addr = overlay->flip_addr; - int ret; + u32 *cs, flip_addr = overlay->flip_addr; WARN_ON(!overlay->active); @@ -424,25 +417,23 @@ static int intel_overlay_off(struct intel_overlay *overlay) if (IS_ERR(req)) return PTR_ERR(req); - ret = intel_ring_begin(req, 6); - if (ret) { + cs = intel_ring_begin(req, 6); + if (IS_ERR(cs)) { i915_add_request_no_flush(req); - return ret; + return PTR_ERR(cs); } - ring = req->ring; - /* wait for overlay to go idle */ - intel_ring_emit(ring, MI_OVERLAY_FLIP | MI_OVERLAY_CONTINUE); - intel_ring_emit(ring, flip_addr); - intel_ring_emit(ring, MI_WAIT_FOR_EVENT | MI_WAIT_FOR_OVERLAY_FLIP); + *cs++ = MI_OVERLAY_FLIP | MI_OVERLAY_CONTINUE; + *cs++ = flip_addr; + *cs++ = MI_WAIT_FOR_EVENT | MI_WAIT_FOR_OVERLAY_FLIP; /* turn overlay off */ - intel_ring_emit(ring, MI_OVERLAY_FLIP | MI_OVERLAY_OFF); - intel_ring_emit(ring, flip_addr); - intel_ring_emit(ring, MI_WAIT_FOR_EVENT | MI_WAIT_FOR_OVERLAY_FLIP); + *cs++ = MI_OVERLAY_FLIP | MI_OVERLAY_OFF; + *cs++ = flip_addr; + *cs++ = MI_WAIT_FOR_EVENT | MI_WAIT_FOR_OVERLAY_FLIP; - intel_ring_advance(ring); + intel_ring_advance(req, cs); intel_overlay_flip_prepare(overlay, NULL); @@ -465,6 +456,7 @@ static int intel_overlay_recover_from_interrupt(struct intel_overlay *overlay) static int intel_overlay_release_old_vid(struct intel_overlay *overlay) { struct drm_i915_private *dev_priv = overlay->i915; + u32 *cs; int ret; lockdep_assert_held(&dev_priv->drm.struct_mutex); @@ -478,23 +470,20 @@ static int intel_overlay_release_old_vid(struct intel_overlay *overlay) if (I915_READ(ISR) & I915_OVERLAY_PLANE_FLIP_PENDING_INTERRUPT) { /* synchronous slowpath */ struct drm_i915_gem_request *req; - struct intel_ring *ring; req = alloc_request(overlay); if (IS_ERR(req)) return PTR_ERR(req); - ret = intel_ring_begin(req, 2); - if (ret) { + cs = intel_ring_begin(req, 2); + if (IS_ERR(cs)) { i915_add_request_no_flush(req); - return ret; + return PTR_ERR(cs); } - ring = req->ring; - intel_ring_emit(ring, - MI_WAIT_FOR_EVENT | MI_WAIT_FOR_OVERLAY_FLIP); - intel_ring_emit(ring, MI_NOOP); - intel_ring_advance(ring); + *cs++ = MI_WAIT_FOR_EVENT | MI_WAIT_FOR_OVERLAY_FLIP; + *cs++ = MI_NOOP; + intel_ring_advance(req, cs); ret = intel_overlay_do_wait_request(overlay, req, intel_overlay_release_old_vid_tail); diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c b/drivers/gpu/drm/i915/intel_ringbuffer.c index 9fba5664376e..0b9030c36625 100644 --- a/drivers/gpu/drm/i915/intel_ringbuffer.c +++ b/drivers/gpu/drm/i915/intel_ringbuffer.c @@ -61,22 +61,20 @@ void intel_ring_update_space(struct intel_ring *ring) static int gen2_render_ring_flush(struct drm_i915_gem_request *req, u32 mode) { - struct intel_ring *ring = req->ring; - u32 cmd; - int ret; + u32 cmd, *cs; cmd = MI_FLUSH; if (mode & EMIT_INVALIDATE) cmd |= MI_READ_FLUSH; - ret = intel_ring_begin(req, 2); - if (ret) - return ret; + cs = intel_ring_begin(req, 2); + if (IS_ERR(cs)) + return PTR_ERR(cs); - intel_ring_emit(ring, cmd); - intel_ring_emit(ring, MI_NOOP); - intel_ring_advance(ring); + *cs++ = cmd; + *cs++ = MI_NOOP; + intel_ring_advance(req, cs); return 0; } @@ -84,9 +82,7 @@ gen2_render_ring_flush(struct drm_i915_gem_request *req, u32 mode) static int gen4_render_ring_flush(struct drm_i915_gem_request *req, u32 mode) { - struct intel_ring *ring = req->ring; - u32 cmd; - int ret; + u32 cmd, *cs; /* * read/write caches: @@ -123,13 +119,13 @@ gen4_render_ring_flush(struct drm_i915_gem_request *req, u32 mode) cmd |= MI_INVALIDATE_ISP; } - ret = intel_ring_begin(req, 2); - if (ret) - return ret; + cs = intel_ring_begin(req, 2); + if (IS_ERR(cs)) + return PTR_ERR(cs); - intel_ring_emit(ring, cmd); - intel_ring_emit(ring, MI_NOOP); - intel_ring_advance(ring); + *cs++ = cmd; + *cs++ = MI_NOOP; + intel_ring_advance(req, cs); return 0; } @@ -174,35 +170,33 @@ gen4_render_ring_flush(struct drm_i915_gem_request *req, u32 mode) static int intel_emit_post_sync_nonzero_flush(struct drm_i915_gem_request *req) { - struct intel_ring *ring = req->ring; u32 scratch_addr = i915_ggtt_offset(req->engine->scratch) + 2 * CACHELINE_BYTES; - int ret; - - ret = intel_ring_begin(req, 6); - if (ret) - return ret; - - intel_ring_emit(ring, GFX_OP_PIPE_CONTROL(5)); - intel_ring_emit(ring, PIPE_CONTROL_CS_STALL | - PIPE_CONTROL_STALL_AT_SCOREBOARD); - intel_ring_emit(ring, scratch_addr | PIPE_CONTROL_GLOBAL_GTT); - intel_ring_emit(ring, 0); /* low dword */ - intel_ring_emit(ring, 0); /* high dword */ - intel_ring_emit(ring, MI_NOOP); - intel_ring_advance(ring); - - ret = intel_ring_begin(req, 6); - if (ret) - return ret; - - intel_ring_emit(ring, GFX_OP_PIPE_CONTROL(5)); - intel_ring_emit(ring, PIPE_CONTROL_QW_WRITE); - intel_ring_emit(ring, scratch_addr | PIPE_CONTROL_GLOBAL_GTT); - intel_ring_emit(ring, 0); - intel_ring_emit(ring, 0); - intel_ring_emit(ring, MI_NOOP); - intel_ring_advance(ring); + u32 *cs; + + cs = intel_ring_begin(req, 6); + if (IS_ERR(cs)) + return PTR_ERR(cs); + + *cs++ = GFX_OP_PIPE_CONTROL(5); + *cs++ = PIPE_CONTROL_CS_STALL | PIPE_CONTROL_STALL_AT_SCOREBOARD; + *cs++ = scratch_addr | PIPE_CONTROL_GLOBAL_GTT; + *cs++ = 0; /* low dword */ + *cs++ = 0; /* high dword */ + *cs++ = MI_NOOP; + intel_ring_advance(req, cs); + + cs = intel_ring_begin(req, 6); + if (IS_ERR(cs)) + return PTR_ERR(cs); + + *cs++ = GFX_OP_PIPE_CONTROL(5); + *cs++ = PIPE_CONTROL_QW_WRITE; + *cs++ = scratch_addr | PIPE_CONTROL_GLOBAL_GTT; + *cs++ = 0; + *cs++ = 0; + *cs++ = MI_NOOP; + intel_ring_advance(req, cs); return 0; } @@ -210,10 +204,9 @@ intel_emit_post_sync_nonzero_flush(struct drm_i915_gem_request *req) static int gen6_render_ring_flush(struct drm_i915_gem_request *req, u32 mode) { - struct intel_ring *ring = req->ring; u32 scratch_addr = i915_ggtt_offset(req->engine->scratch) + 2 * CACHELINE_BYTES; - u32 flags = 0; + u32 *cs, flags = 0; int ret; /* Force SNB workarounds for PIPE_CONTROL flushes */ @@ -247,15 +240,15 @@ gen6_render_ring_flush(struct drm_i915_gem_request *req, u32 mode) flags |= PIPE_CONTROL_QW_WRITE | PIPE_CONTROL_CS_STALL; } - ret = intel_ring_begin(req, 4); - if (ret) - return ret; + cs = intel_ring_begin(req, 4); + if (IS_ERR(cs)) + return PTR_ERR(cs); - intel_ring_emit(ring, GFX_OP_PIPE_CONTROL(4)); - intel_ring_emit(ring, flags); - intel_ring_emit(ring, scratch_addr | PIPE_CONTROL_GLOBAL_GTT); - intel_ring_emit(ring, 0); - intel_ring_advance(ring); + *cs++ = GFX_OP_PIPE_CONTROL(4); + *cs++ = flags; + *cs++ = scratch_addr | PIPE_CONTROL_GLOBAL_GTT; + *cs++ = 0; + intel_ring_advance(req, cs); return 0; } @@ -263,20 +256,17 @@ gen6_render_ring_flush(struct drm_i915_gem_request *req, u32 mode) static int gen7_render_ring_cs_stall_wa(struct drm_i915_gem_request *req) { - struct intel_ring *ring = req->ring; - int ret; + u32 *cs; - ret = intel_ring_begin(req, 4); - if (ret) - return ret; + cs = intel_ring_begin(req, 4); + if (IS_ERR(cs)) + return PTR_ERR(cs); - intel_ring_emit(ring, GFX_OP_PIPE_CONTROL(4)); - intel_ring_emit(ring, - PIPE_CONTROL_CS_STALL | - PIPE_CONTROL_STALL_AT_SCOREBOARD); - intel_ring_emit(ring, 0); - intel_ring_emit(ring, 0); - intel_ring_advance(ring); + *cs++ = GFX_OP_PIPE_CONTROL(4); + *cs++ = PIPE_CONTROL_CS_STALL | PIPE_CONTROL_STALL_AT_SCOREBOARD; + *cs++ = 0; + *cs++ = 0; + intel_ring_advance(req, cs); return 0; } @@ -284,11 +274,9 @@ gen7_render_ring_cs_stall_wa(struct drm_i915_gem_request *req) static int gen7_render_ring_flush(struct drm_i915_gem_request *req, u32 mode) { - struct intel_ring *ring = req->ring; u32 scratch_addr = i915_ggtt_offset(req->engine->scratch) + 2 * CACHELINE_BYTES; - u32 flags = 0; - int ret; + u32 *cs, flags = 0; /* * Ensure that any following seqno writes only happen when the render @@ -332,15 +320,15 @@ gen7_render_ring_flush(struct drm_i915_gem_request *req, u32 mode) gen7_render_ring_cs_stall_wa(req); } - ret = intel_ring_begin(req, 4); - if (ret) - return ret; + cs = intel_ring_begin(req, 4); + if (IS_ERR(cs)) + return PTR_ERR(cs); - intel_ring_emit(ring, GFX_OP_PIPE_CONTROL(4)); - intel_ring_emit(ring, flags); - intel_ring_emit(ring, scratch_addr); - intel_ring_emit(ring, 0); - intel_ring_advance(ring); + *cs++ = GFX_OP_PIPE_CONTROL(4); + *cs++ = flags; + *cs++ = scratch_addr; + *cs++ = 0; + intel_ring_advance(req, cs); return 0; } @@ -349,20 +337,19 @@ static int gen8_emit_pipe_control(struct drm_i915_gem_request *req, u32 flags, u32 scratch_addr) { - struct intel_ring *ring = req->ring; - int ret; + u32 *cs; - ret = intel_ring_begin(req, 6); - if (ret) - return ret; + cs = intel_ring_begin(req, 6); + if (IS_ERR(cs)) + return PTR_ERR(cs); - intel_ring_emit(ring, GFX_OP_PIPE_CONTROL(6)); - intel_ring_emit(ring, flags); - intel_ring_emit(ring, scratch_addr); - intel_ring_emit(ring, 0); - intel_ring_emit(ring, 0); - intel_ring_emit(ring, 0); - intel_ring_advance(ring); + *cs++ = GFX_OP_PIPE_CONTROL(6); + *cs++ = flags; + *cs++ = scratch_addr; + *cs++ = 0; + *cs++ = 0; + *cs++ = 0; + intel_ring_advance(req, cs); return 0; } @@ -659,8 +646,8 @@ static void reset_ring_common(struct intel_engine_cs *engine, static int intel_ring_workarounds_emit(struct drm_i915_gem_request *req) { - struct intel_ring *ring = req->ring; struct i915_workarounds *w = &req->i915->workarounds; + u32 *cs; int ret, i; if (w->count == 0) @@ -670,18 +657,18 @@ static int intel_ring_workarounds_emit(struct drm_i915_gem_request *req) if (ret) return ret; - ret = intel_ring_begin(req, (w->count * 2 + 2)); - if (ret) - return ret; + cs = intel_ring_begin(req, (w->count * 2 + 2)); + if (IS_ERR(cs)) + return PTR_ERR(cs); - intel_ring_emit(ring, MI_LOAD_REGISTER_IMM(w->count)); + *cs++ = MI_LOAD_REGISTER_IMM(w->count); for (i = 0; i < w->count; i++) { - intel_ring_emit_reg(ring, w->reg[i].addr); - intel_ring_emit(ring, w->reg[i].value); + *cs++ = i915_mmio_reg_offset(w->reg[i].addr); + *cs++ = w->reg[i].value; } - intel_ring_emit(ring, MI_NOOP); + *cs++ = MI_NOOP; - intel_ring_advance(ring); + intel_ring_advance(req, cs); ret = req->engine->emit_flush(req, EMIT_BARRIER); if (ret) @@ -1276,7 +1263,7 @@ static void render_ring_cleanup(struct intel_engine_cs *engine) i915_vma_unpin_and_release(&dev_priv->semaphore); } -static u32 *gen8_rcs_signal(struct drm_i915_gem_request *req, u32 *out) +static u32 *gen8_rcs_signal(struct drm_i915_gem_request *req, u32 *cs) { struct drm_i915_private *dev_priv = req->i915; struct intel_engine_cs *waiter; @@ -1287,23 +1274,22 @@ static u32 *gen8_rcs_signal(struct drm_i915_gem_request *req, u32 *out) if (gtt_offset == MI_SEMAPHORE_SYNC_INVALID) continue; - *out++ = GFX_OP_PIPE_CONTROL(6); - *out++ = (PIPE_CONTROL_GLOBAL_GTT_IVB | - PIPE_CONTROL_QW_WRITE | - PIPE_CONTROL_CS_STALL); - *out++ = lower_32_bits(gtt_offset); - *out++ = upper_32_bits(gtt_offset); - *out++ = req->global_seqno; - *out++ = 0; - *out++ = (MI_SEMAPHORE_SIGNAL | - MI_SEMAPHORE_TARGET(waiter->hw_id)); - *out++ = 0; + *cs++ = GFX_OP_PIPE_CONTROL(6); + *cs++ = PIPE_CONTROL_GLOBAL_GTT_IVB | PIPE_CONTROL_QW_WRITE | + PIPE_CONTROL_CS_STALL; + *cs++ = lower_32_bits(gtt_offset); + *cs++ = upper_32_bits(gtt_offset); + *cs++ = req->global_seqno; + *cs++ = 0; + *cs++ = MI_SEMAPHORE_SIGNAL | + MI_SEMAPHORE_TARGET(waiter->hw_id); + *cs++ = 0; } - return out; + return cs; } -static u32 *gen8_xcs_signal(struct drm_i915_gem_request *req, u32 *out) +static u32 *gen8_xcs_signal(struct drm_i915_gem_request *req, u32 *cs) { struct drm_i915_private *dev_priv = req->i915; struct intel_engine_cs *waiter; @@ -1314,19 +1300,19 @@ static u32 *gen8_xcs_signal(struct drm_i915_gem_request *req, u32 *out) if (gtt_offset == MI_SEMAPHORE_SYNC_INVALID) continue; - *out++ = (MI_FLUSH_DW + 1) | MI_FLUSH_DW_OP_STOREDW; - *out++ = lower_32_bits(gtt_offset) | MI_FLUSH_DW_USE_GTT; - *out++ = upper_32_bits(gtt_offset); - *out++ = req->global_seqno; - *out++ = (MI_SEMAPHORE_SIGNAL | - MI_SEMAPHORE_TARGET(waiter->hw_id)); - *out++ = 0; + *cs++ = (MI_FLUSH_DW + 1) | MI_FLUSH_DW_OP_STOREDW; + *cs++ = lower_32_bits(gtt_offset) | MI_FLUSH_DW_USE_GTT; + *cs++ = upper_32_bits(gtt_offset); + *cs++ = req->global_seqno; + *cs++ = MI_SEMAPHORE_SIGNAL | + MI_SEMAPHORE_TARGET(waiter->hw_id); + *cs++ = 0; } - return out; + return cs; } -static u32 *gen6_signal(struct drm_i915_gem_request *req, u32 *out) +static u32 *gen6_signal(struct drm_i915_gem_request *req, u32 *cs) { struct drm_i915_private *dev_priv = req->i915; struct intel_engine_cs *engine; @@ -1341,16 +1327,16 @@ static u32 *gen6_signal(struct drm_i915_gem_request *req, u32 *out) mbox_reg = req->engine->semaphore.mbox.signal[engine->hw_id]; if (i915_mmio_reg_valid(mbox_reg)) { - *out++ = MI_LOAD_REGISTER_IMM(1); - *out++ = i915_mmio_reg_offset(mbox_reg); - *out++ = req->global_seqno; + *cs++ = MI_LOAD_REGISTER_IMM(1); + *cs++ = i915_mmio_reg_offset(mbox_reg); + *cs++ = req->global_seqno; num_rings++; } } if (num_rings & 1) - *out++ = MI_NOOP; + *cs++ = MI_NOOP; - return out; + return cs; } static void i9xx_submit_request(struct drm_i915_gem_request *request) @@ -1362,15 +1348,14 @@ static void i9xx_submit_request(struct drm_i915_gem_request *request) I915_WRITE_TAIL(request->engine, request->tail); } -static void i9xx_emit_breadcrumb(struct drm_i915_gem_request *req, - u32 *out) +static void i9xx_emit_breadcrumb(struct drm_i915_gem_request *req, u32 *cs) { - *out++ = MI_STORE_DWORD_INDEX; - *out++ = I915_GEM_HWS_INDEX << MI_STORE_DWORD_INDEX_SHIFT; - *out++ = req->global_seqno; - *out++ = MI_USER_INTERRUPT; + *cs++ = MI_STORE_DWORD_INDEX; + *cs++ = I915_GEM_HWS_INDEX << MI_STORE_DWORD_INDEX_SHIFT; + *cs++ = req->global_seqno; + *cs++ = MI_USER_INTERRUPT; - req->tail = intel_ring_offset(req->ring, out); + req->tail = intel_ring_offset(req, cs); } static const int i9xx_emit_breadcrumb_sz = 4; @@ -1383,34 +1368,32 @@ static const int i9xx_emit_breadcrumb_sz = 4; * Update the mailbox registers in the *other* rings with the current seqno. * This acts like a signal in the canonical semaphore. */ -static void gen6_sema_emit_breadcrumb(struct drm_i915_gem_request *req, - u32 *out) +static void gen6_sema_emit_breadcrumb(struct drm_i915_gem_request *req, u32 *cs) { return i9xx_emit_breadcrumb(req, - req->engine->semaphore.signal(req, out)); + req->engine->semaphore.signal(req, cs)); } static void gen8_render_emit_breadcrumb(struct drm_i915_gem_request *req, - u32 *out) + u32 *cs) { struct intel_engine_cs *engine = req->engine; if (engine->semaphore.signal) - out = engine->semaphore.signal(req, out); - - *out++ = GFX_OP_PIPE_CONTROL(6); - *out++ = (PIPE_CONTROL_GLOBAL_GTT_IVB | - PIPE_CONTROL_CS_STALL | - PIPE_CONTROL_QW_WRITE); - *out++ = intel_hws_seqno_address(engine); - *out++ = 0; - *out++ = req->global_seqno; + cs = engine->semaphore.signal(req, cs); + + *cs++ = GFX_OP_PIPE_CONTROL(6); + *cs++ = PIPE_CONTROL_GLOBAL_GTT_IVB | PIPE_CONTROL_CS_STALL | + PIPE_CONTROL_QW_WRITE; + *cs++ = intel_hws_seqno_address(engine); + *cs++ = 0; + *cs++ = req->global_seqno; /* We're thrashing one dword of HWS. */ - *out++ = 0; - *out++ = MI_USER_INTERRUPT; - *out++ = MI_NOOP; + *cs++ = 0; + *cs++ = MI_USER_INTERRUPT; + *cs++ = MI_NOOP; - req->tail = intel_ring_offset(req->ring, out); + req->tail = intel_ring_offset(req, cs); } static const int gen8_render_emit_breadcrumb_sz = 8; @@ -1427,24 +1410,21 @@ static int gen8_ring_sync_to(struct drm_i915_gem_request *req, struct drm_i915_gem_request *signal) { - struct intel_ring *ring = req->ring; struct drm_i915_private *dev_priv = req->i915; u64 offset = GEN8_WAIT_OFFSET(req->engine, signal->engine->id); struct i915_hw_ppgtt *ppgtt; - int ret; + u32 *cs; - ret = intel_ring_begin(req, 4); - if (ret) - return ret; + cs = intel_ring_begin(req, 4); + if (IS_ERR(cs)) + return PTR_ERR(cs); - intel_ring_emit(ring, - MI_SEMAPHORE_WAIT | - MI_SEMAPHORE_GLOBAL_GTT | - MI_SEMAPHORE_SAD_GTE_SDD); - intel_ring_emit(ring, signal->global_seqno); - intel_ring_emit(ring, lower_32_bits(offset)); - intel_ring_emit(ring, upper_32_bits(offset)); - intel_ring_advance(ring); + *cs++ = MI_SEMAPHORE_WAIT | MI_SEMAPHORE_GLOBAL_GTT | + MI_SEMAPHORE_SAD_GTE_SDD; + *cs++ = signal->global_seqno; + *cs++ = lower_32_bits(offset); + *cs++ = upper_32_bits(offset); + intel_ring_advance(req, cs); /* When the !RCS engines idle waiting upon a semaphore, they lose their * pagetables and we must reload them before executing the batch. @@ -1461,28 +1441,27 @@ static int gen6_ring_sync_to(struct drm_i915_gem_request *req, struct drm_i915_gem_request *signal) { - struct intel_ring *ring = req->ring; u32 dw1 = MI_SEMAPHORE_MBOX | MI_SEMAPHORE_COMPARE | MI_SEMAPHORE_REGISTER; u32 wait_mbox = signal->engine->semaphore.mbox.wait[req->engine->hw_id]; - int ret; + u32 *cs; WARN_ON(wait_mbox == MI_SEMAPHORE_SYNC_INVALID); - ret = intel_ring_begin(req, 4); - if (ret) - return ret; + cs = intel_ring_begin(req, 4); + if (IS_ERR(cs)) + return PTR_ERR(cs); - intel_ring_emit(ring, dw1 | wait_mbox); + *cs++ = dw1 | wait_mbox; /* Throughout all of the GEM code, seqno passed implies our current * seqno is >= the last seqno executed. However for hardware the * comparison is strictly greater than. */ - intel_ring_emit(ring, signal->global_seqno - 1); - intel_ring_emit(ring, 0); - intel_ring_emit(ring, MI_NOOP); - intel_ring_advance(ring); + *cs++ = signal->global_seqno - 1; + *cs++ = 0; + *cs++ = MI_NOOP; + intel_ring_advance(req, cs); return 0; } @@ -1583,16 +1562,15 @@ i8xx_irq_disable(struct intel_engine_cs *engine) static int bsd_ring_flush(struct drm_i915_gem_request *req, u32 mode) { - struct intel_ring *ring = req->ring; - int ret; + u32 *cs; - ret = intel_ring_begin(req, 2); - if (ret) - return ret; + cs = intel_ring_begin(req, 2); + if (IS_ERR(cs)) + return PTR_ERR(cs); - intel_ring_emit(ring, MI_FLUSH); - intel_ring_emit(ring, MI_NOOP); - intel_ring_advance(ring); + *cs++ = MI_FLUSH; + *cs++ = MI_NOOP; + intel_ring_advance(req, cs); return 0; } @@ -1658,20 +1636,16 @@ i965_emit_bb_start(struct drm_i915_gem_request *req, u64 offset, u32 length, unsigned int dispatch_flags) { - struct intel_ring *ring = req->ring; - int ret; + u32 *cs; - ret = intel_ring_begin(req, 2); - if (ret) - return ret; + cs = intel_ring_begin(req, 2); + if (IS_ERR(cs)) + return PTR_ERR(cs); - intel_ring_emit(ring, - MI_BATCH_BUFFER_START | - MI_BATCH_GTT | - (dispatch_flags & I915_DISPATCH_SECURE ? - 0 : MI_BATCH_NON_SECURE_I965)); - intel_ring_emit(ring, offset); - intel_ring_advance(ring); + *cs++ = MI_BATCH_BUFFER_START | MI_BATCH_GTT | (dispatch_flags & + I915_DISPATCH_SECURE ? 0 : MI_BATCH_NON_SECURE_I965); + *cs++ = offset; + intel_ring_advance(req, cs); return 0; } @@ -1685,59 +1659,56 @@ i830_emit_bb_start(struct drm_i915_gem_request *req, u64 offset, u32 len, unsigned int dispatch_flags) { - struct intel_ring *ring = req->ring; - u32 cs_offset = i915_ggtt_offset(req->engine->scratch); - int ret; + u32 *cs, cs_offset = i915_ggtt_offset(req->engine->scratch); - ret = intel_ring_begin(req, 6); - if (ret) - return ret; + cs = intel_ring_begin(req, 6); + if (IS_ERR(cs)) + return PTR_ERR(cs); /* Evict the invalid PTE TLBs */ - intel_ring_emit(ring, COLOR_BLT_CMD | BLT_WRITE_RGBA); - intel_ring_emit(ring, BLT_DEPTH_32 | BLT_ROP_COLOR_COPY | 4096); - intel_ring_emit(ring, I830_TLB_ENTRIES << 16 | 4); /* load each page */ - intel_ring_emit(ring, cs_offset); - intel_ring_emit(ring, 0xdeadbeef); - intel_ring_emit(ring, MI_NOOP); - intel_ring_advance(ring); + *cs++ = COLOR_BLT_CMD | BLT_WRITE_RGBA; + *cs++ = BLT_DEPTH_32 | BLT_ROP_COLOR_COPY | 4096; + *cs++ = I830_TLB_ENTRIES << 16 | 4; /* load each page */ + *cs++ = cs_offset; + *cs++ = 0xdeadbeef; + *cs++ = MI_NOOP; + intel_ring_advance(req, cs); if ((dispatch_flags & I915_DISPATCH_PINNED) == 0) { if (len > I830_BATCH_LIMIT) return -ENOSPC; - ret = intel_ring_begin(req, 6 + 2); - if (ret) - return ret; + cs = intel_ring_begin(req, 6 + 2); + if (IS_ERR(cs)) + return PTR_ERR(cs); /* Blit the batch (which has now all relocs applied) to the * stable batch scratch bo area (so that the CS never * stumbles over its tlb invalidation bug) ... */ - intel_ring_emit(ring, SRC_COPY_BLT_CMD | BLT_WRITE_RGBA); - intel_ring_emit(ring, - BLT_DEPTH_32 | BLT_ROP_SRC_COPY | 4096); - intel_ring_emit(ring, DIV_ROUND_UP(len, 4096) << 16 | 4096); - intel_ring_emit(ring, cs_offset); - intel_ring_emit(ring, 4096); - intel_ring_emit(ring, offset); - - intel_ring_emit(ring, MI_FLUSH); - intel_ring_emit(ring, MI_NOOP); - intel_ring_advance(ring); + *cs++ = SRC_COPY_BLT_CMD | BLT_WRITE_RGBA; + *cs++ = BLT_DEPTH_32 | BLT_ROP_SRC_COPY | 4096; + *cs++ = DIV_ROUND_UP(len, 4096) << 16 | 4096; + *cs++ = cs_offset; + *cs++ = 4096; + *cs++ = offset; + + *cs++ = MI_FLUSH; + *cs++ = MI_NOOP; + intel_ring_advance(req, cs); /* ... and execute it. */ offset = cs_offset; } - ret = intel_ring_begin(req, 2); - if (ret) - return ret; + cs = intel_ring_begin(req, 2); + if (IS_ERR(cs)) + return PTR_ERR(cs); - intel_ring_emit(ring, MI_BATCH_BUFFER_START | MI_BATCH_GTT); - intel_ring_emit(ring, offset | (dispatch_flags & I915_DISPATCH_SECURE ? - 0 : MI_BATCH_NON_SECURE)); - intel_ring_advance(ring); + *cs++ = MI_BATCH_BUFFER_START | MI_BATCH_GTT; + *cs++ = offset | (dispatch_flags & I915_DISPATCH_SECURE ? 0 : + MI_BATCH_NON_SECURE); + intel_ring_advance(req, cs); return 0; } @@ -1747,17 +1718,16 @@ i915_emit_bb_start(struct drm_i915_gem_request *req, u64 offset, u32 len, unsigned int dispatch_flags) { - struct intel_ring *ring = req->ring; - int ret; + u32 *cs; - ret = intel_ring_begin(req, 2); - if (ret) - return ret; + cs = intel_ring_begin(req, 2); + if (IS_ERR(cs)) + return PTR_ERR(cs); - intel_ring_emit(ring, MI_BATCH_BUFFER_START | MI_BATCH_GTT); - intel_ring_emit(ring, offset | (dispatch_flags & I915_DISPATCH_SECURE ? - 0 : MI_BATCH_NON_SECURE)); - intel_ring_advance(ring); + *cs++ = MI_BATCH_BUFFER_START | MI_BATCH_GTT; + *cs++ = offset | (dispatch_flags & I915_DISPATCH_SECURE ? 0 : + MI_BATCH_NON_SECURE); + intel_ring_advance(req, cs); return 0; } @@ -2165,7 +2135,7 @@ void intel_legacy_submission_resume(struct drm_i915_private *dev_priv) static int ring_request_alloc(struct drm_i915_gem_request *request) { - int ret; + u32 *cs; GEM_BUG_ON(!request->ctx->engine[request->engine->id].pin_count); @@ -2178,9 +2148,9 @@ static int ring_request_alloc(struct drm_i915_gem_request *request) GEM_BUG_ON(!request->engine->buffer); request->ring = request->engine->buffer; - ret = intel_ring_begin(request, 0); - if (ret) - return ret; + cs = intel_ring_begin(request, 0); + if (IS_ERR(cs)) + return PTR_ERR(cs); request->reserved_space -= LEGACY_REQUEST_SIZE; return 0; @@ -2235,7 +2205,7 @@ static int wait_for_space(struct drm_i915_gem_request *req, int bytes) return 0; } -int intel_ring_begin(struct drm_i915_gem_request *req, int num_dwords) +u32 *intel_ring_begin(struct drm_i915_gem_request *req, int num_dwords) { struct intel_ring *ring = req->ring; int remain_actual = ring->size - ring->tail; @@ -2243,6 +2213,7 @@ int intel_ring_begin(struct drm_i915_gem_request *req, int num_dwords) int bytes = num_dwords * sizeof(u32); int total_bytes, wait_bytes; bool need_wrap = false; + u32 *cs; total_bytes = bytes + req->reserved_space; @@ -2269,7 +2240,7 @@ int intel_ring_begin(struct drm_i915_gem_request *req, int num_dwords) if (wait_bytes > ring->space) { int ret = wait_for_space(req, wait_bytes); if (unlikely(ret)) - return ret; + return ERR_PTR(ret); } if (unlikely(need_wrap)) { @@ -2282,32 +2253,34 @@ int intel_ring_begin(struct drm_i915_gem_request *req, int num_dwords) ring->space -= remain_actual; } + GEM_BUG_ON(ring->tail > ring->size - bytes); + cs = ring->vaddr + ring->tail; + ring->tail += bytes; ring->space -= bytes; GEM_BUG_ON(ring->space < 0); - GEM_DEBUG_EXEC(ring->advance = ring->tail + bytes); - return 0; + + return cs; } /* Align the ring tail to a cacheline boundary */ int intel_ring_cacheline_align(struct drm_i915_gem_request *req) { - struct intel_ring *ring = req->ring; int num_dwords = - (ring->tail & (CACHELINE_BYTES - 1)) / sizeof(uint32_t); - int ret; + (req->ring->tail & (CACHELINE_BYTES - 1)) / sizeof(uint32_t); + u32 *cs; if (num_dwords == 0) return 0; num_dwords = CACHELINE_BYTES / sizeof(uint32_t) - num_dwords; - ret = intel_ring_begin(req, num_dwords); - if (ret) - return ret; + cs = intel_ring_begin(req, num_dwords); + if (IS_ERR(cs)) + return PTR_ERR(cs); while (num_dwords--) - intel_ring_emit(ring, MI_NOOP); + *cs++ = MI_NOOP; - intel_ring_advance(ring); + intel_ring_advance(req, cs); return 0; } @@ -2351,13 +2324,11 @@ static void gen6_bsd_submit_request(struct drm_i915_gem_request *request) static int gen6_bsd_ring_flush(struct drm_i915_gem_request *req, u32 mode) { - struct intel_ring *ring = req->ring; - uint32_t cmd; - int ret; + u32 cmd, *cs; - ret = intel_ring_begin(req, 4); - if (ret) - return ret; + cs = intel_ring_begin(req, 4); + if (IS_ERR(cs)) + return PTR_ERR(cs); cmd = MI_FLUSH_DW; if (INTEL_GEN(req->i915) >= 8) @@ -2379,16 +2350,16 @@ static int gen6_bsd_ring_flush(struct drm_i915_gem_request *req, u32 mode) if (mode & EMIT_INVALIDATE) cmd |= MI_INVALIDATE_TLB | MI_INVALIDATE_BSD; - intel_ring_emit(ring, cmd); - intel_ring_emit(ring, I915_GEM_HWS_SCRATCH_ADDR | MI_FLUSH_DW_USE_GTT); + *cs++ = cmd; + *cs++ = I915_GEM_HWS_SCRATCH_ADDR | MI_FLUSH_DW_USE_GTT; if (INTEL_GEN(req->i915) >= 8) { - intel_ring_emit(ring, 0); /* upper addr */ - intel_ring_emit(ring, 0); /* value */ + *cs++ = 0; /* upper addr */ + *cs++ = 0; /* value */ } else { - intel_ring_emit(ring, 0); - intel_ring_emit(ring, MI_NOOP); + *cs++ = 0; + *cs++ = MI_NOOP; } - intel_ring_advance(ring); + intel_ring_advance(req, cs); return 0; } @@ -2397,23 +2368,21 @@ gen8_emit_bb_start(struct drm_i915_gem_request *req, u64 offset, u32 len, unsigned int dispatch_flags) { - struct intel_ring *ring = req->ring; bool ppgtt = USES_PPGTT(req->i915) && !(dispatch_flags & I915_DISPATCH_SECURE); - int ret; + u32 *cs; - ret = intel_ring_begin(req, 4); - if (ret) - return ret; + cs = intel_ring_begin(req, 4); + if (IS_ERR(cs)) + return PTR_ERR(cs); /* FIXME(BDW): Address space and security selectors. */ - intel_ring_emit(ring, MI_BATCH_BUFFER_START_GEN8 | (ppgtt<<8) | - (dispatch_flags & I915_DISPATCH_RS ? - MI_BATCH_RESOURCE_STREAMER : 0)); - intel_ring_emit(ring, lower_32_bits(offset)); - intel_ring_emit(ring, upper_32_bits(offset)); - intel_ring_emit(ring, MI_NOOP); - intel_ring_advance(ring); + *cs++ = MI_BATCH_BUFFER_START_GEN8 | (ppgtt << 8) | (dispatch_flags & + I915_DISPATCH_RS ? MI_BATCH_RESOURCE_STREAMER : 0); + *cs++ = lower_32_bits(offset); + *cs++ = upper_32_bits(offset); + *cs++ = MI_NOOP; + intel_ring_advance(req, cs); return 0; } @@ -2423,22 +2392,19 @@ hsw_emit_bb_start(struct drm_i915_gem_request *req, u64 offset, u32 len, unsigned int dispatch_flags) { - struct intel_ring *ring = req->ring; - int ret; + u32 *cs; - ret = intel_ring_begin(req, 2); - if (ret) - return ret; + cs = intel_ring_begin(req, 2); + if (IS_ERR(cs)) + return PTR_ERR(cs); - intel_ring_emit(ring, - MI_BATCH_BUFFER_START | - (dispatch_flags & I915_DISPATCH_SECURE ? - 0 : MI_BATCH_PPGTT_HSW | MI_BATCH_NON_SECURE_HSW) | - (dispatch_flags & I915_DISPATCH_RS ? - MI_BATCH_RESOURCE_STREAMER : 0)); + *cs++ = MI_BATCH_BUFFER_START | (dispatch_flags & I915_DISPATCH_SECURE ? + 0 : MI_BATCH_PPGTT_HSW | MI_BATCH_NON_SECURE_HSW) | + (dispatch_flags & I915_DISPATCH_RS ? + MI_BATCH_RESOURCE_STREAMER : 0); /* bit0-7 is the length on GEN6+ */ - intel_ring_emit(ring, offset); - intel_ring_advance(ring); + *cs++ = offset; + intel_ring_advance(req, cs); return 0; } @@ -2448,20 +2414,17 @@ gen6_emit_bb_start(struct drm_i915_gem_request *req, u64 offset, u32 len, unsigned int dispatch_flags) { - struct intel_ring *ring = req->ring; - int ret; + u32 *cs; - ret = intel_ring_begin(req, 2); - if (ret) - return ret; + cs = intel_ring_begin(req, 2); + if (IS_ERR(cs)) + return PTR_ERR(cs); - intel_ring_emit(ring, - MI_BATCH_BUFFER_START | - (dispatch_flags & I915_DISPATCH_SECURE ? - 0 : MI_BATCH_NON_SECURE_I965)); + *cs++ = MI_BATCH_BUFFER_START | (dispatch_flags & I915_DISPATCH_SECURE ? + 0 : MI_BATCH_NON_SECURE_I965); /* bit0-7 is the length on GEN6+ */ - intel_ring_emit(ring, offset); - intel_ring_advance(ring); + *cs++ = offset; + intel_ring_advance(req, cs); return 0; } @@ -2470,13 +2433,11 @@ gen6_emit_bb_start(struct drm_i915_gem_request *req, static int gen6_ring_flush(struct drm_i915_gem_request *req, u32 mode) { - struct intel_ring *ring = req->ring; - uint32_t cmd; - int ret; + u32 cmd, *cs; - ret = intel_ring_begin(req, 4); - if (ret) - return ret; + cs = intel_ring_begin(req, 4); + if (IS_ERR(cs)) + return PTR_ERR(cs); cmd = MI_FLUSH_DW; if (INTEL_GEN(req->i915) >= 8) @@ -2497,17 +2458,16 @@ static int gen6_ring_flush(struct drm_i915_gem_request *req, u32 mode) */ if (mode & EMIT_INVALIDATE) cmd |= MI_INVALIDATE_TLB; - intel_ring_emit(ring, cmd); - intel_ring_emit(ring, - I915_GEM_HWS_SCRATCH_ADDR | MI_FLUSH_DW_USE_GTT); + *cs++ = cmd; + *cs++ = I915_GEM_HWS_SCRATCH_ADDR | MI_FLUSH_DW_USE_GTT; if (INTEL_GEN(req->i915) >= 8) { - intel_ring_emit(ring, 0); /* upper addr */ - intel_ring_emit(ring, 0); /* value */ + *cs++ = 0; /* upper addr */ + *cs++ = 0; /* value */ } else { - intel_ring_emit(ring, 0); - intel_ring_emit(ring, MI_NOOP); + *cs++ = 0; + *cs++ = MI_NOOP; } - intel_ring_advance(ring); + intel_ring_advance(req, cs); return 0; } diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.h b/drivers/gpu/drm/i915/intel_ringbuffer.h index cc62e89010d3..4350713dbc58 100644 --- a/drivers/gpu/drm/i915/intel_ringbuffer.h +++ b/drivers/gpu/drm/i915/intel_ringbuffer.h @@ -145,7 +145,6 @@ struct intel_ring { u32 head; u32 tail; - GEM_DEBUG_DECL(u32 advance); int space; int size; @@ -292,7 +291,7 @@ struct intel_engine_cs { #define I915_DISPATCH_PINNED BIT(1) #define I915_DISPATCH_RS BIT(2) void (*emit_breadcrumb)(struct drm_i915_gem_request *req, - u32 *out); + u32 *cs); int emit_breadcrumb_sz; /* Pass the request to the hardware queue (e.g. directly into @@ -375,7 +374,7 @@ struct intel_engine_cs { /* AKA wait() */ int (*sync_to)(struct drm_i915_gem_request *req, struct drm_i915_gem_request *signal); - u32 *(*signal)(struct drm_i915_gem_request *req, u32 *out); + u32 *(*signal)(struct drm_i915_gem_request *req, u32 *cs); } semaphore; /* Execlists */ @@ -497,21 +496,12 @@ void intel_engine_cleanup(struct intel_engine_cs *engine); void intel_legacy_submission_resume(struct drm_i915_private *dev_priv); -int __must_check intel_ring_begin(struct drm_i915_gem_request *req, int n); int __must_check intel_ring_cacheline_align(struct drm_i915_gem_request *req); -static inline void intel_ring_emit(struct intel_ring *ring, u32 data) -{ - *(uint32_t *)(ring->vaddr + ring->tail) = data; - ring->tail += 4; -} +u32 __must_check *intel_ring_begin(struct drm_i915_gem_request *req, int n); -static inline void intel_ring_emit_reg(struct intel_ring *ring, i915_reg_t reg) -{ - intel_ring_emit(ring, i915_mmio_reg_offset(reg)); -} - -static inline void intel_ring_advance(struct intel_ring *ring) +static inline void +intel_ring_advance(struct drm_i915_gem_request *req, u32 *cs) { /* Dummy function. * @@ -521,14 +511,16 @@ static inline void intel_ring_advance(struct intel_ring *ring) * reserved for the command packet (i.e. the value passed to * intel_ring_begin()). */ - GEM_DEBUG_BUG_ON(ring->tail != ring->advance); + GEM_BUG_ON((req->ring->vaddr + req->ring->tail) != cs); } -static inline u32 intel_ring_offset(struct intel_ring *ring, void *addr) +static inline u32 +intel_ring_offset(struct drm_i915_gem_request *req, void *addr) { /* Don't write ring->size (equivalent to 0) as that hangs some GPUs. */ - u32 offset = addr - ring->vaddr; - return offset & (ring->size - 1); + u32 offset = addr - req->ring->vaddr; + GEM_BUG_ON(offset > req->ring->size); + return offset & (req->ring->size - 1); } int __intel_ring_space(int head, int tail, int size); diff --git a/drivers/gpu/drm/i915/selftests/i915_gem_coherency.c b/drivers/gpu/drm/i915/selftests/i915_gem_coherency.c index 483cbe02d983..ad92c60290f1 100644 --- a/drivers/gpu/drm/i915/selftests/i915_gem_coherency.c +++ b/drivers/gpu/drm/i915/selftests/i915_gem_coherency.c @@ -186,6 +186,7 @@ static int gpu_set(struct drm_i915_gem_object *obj, struct drm_i915_private *i915 = to_i915(obj->base.dev); struct drm_i915_gem_request *rq; struct i915_vma *vma; + u32 *cs; int err; err = i915_gem_object_set_to_gtt_domain(obj, true); @@ -202,30 +203,30 @@ static int gpu_set(struct drm_i915_gem_object *obj, return PTR_ERR(rq); } - err = intel_ring_begin(rq, 4); - if (err) { + cs = intel_ring_begin(rq, 4); + if (IS_ERR(cs)) { __i915_add_request(rq, false); i915_vma_unpin(vma); - return err; + return PTR_ERR(cs); } if (INTEL_GEN(i915) >= 8) { - intel_ring_emit(rq->ring, MI_STORE_DWORD_IMM_GEN4 | 1 << 22); - intel_ring_emit(rq->ring, lower_32_bits(i915_ggtt_offset(vma) + offset)); - intel_ring_emit(rq->ring, upper_32_bits(i915_ggtt_offset(vma) + offset)); - intel_ring_emit(rq->ring, v); + *cs++ = MI_STORE_DWORD_IMM_GEN4 | 1 << 22; + *cs++ = lower_32_bits(i915_ggtt_offset(vma) + offset); + *cs++ = upper_32_bits(i915_ggtt_offset(vma) + offset); + *cs++ = v; } else if (INTEL_GEN(i915) >= 4) { - intel_ring_emit(rq->ring, MI_STORE_DWORD_IMM_GEN4 | 1 << 22); - intel_ring_emit(rq->ring, 0); - intel_ring_emit(rq->ring, i915_ggtt_offset(vma) + offset); - intel_ring_emit(rq->ring, v); + *cs++ = MI_STORE_DWORD_IMM_GEN4 | 1 << 22; + *cs++ = 0; + *cs++ = i915_ggtt_offset(vma) + offset; + *cs++ = v; } else { - intel_ring_emit(rq->ring, MI_STORE_DWORD_IMM | 1 << 22); - intel_ring_emit(rq->ring, i915_ggtt_offset(vma) + offset); - intel_ring_emit(rq->ring, v); - intel_ring_emit(rq->ring, MI_NOOP); + *cs++ = MI_STORE_DWORD_IMM | 1 << 22; + *cs++ = i915_ggtt_offset(vma) + offset; + *cs++ = v; + *cs++ = MI_NOOP; } - intel_ring_advance(rq->ring); + intel_ring_advance(rq, cs); i915_vma_move_to_active(vma, rq, EXEC_OBJECT_WRITE); i915_vma_unpin(vma); -- cgit From a937eaf8242a44f402f63a8ed135026bdc2ab0e9 Mon Sep 17 00:00:00 2001 From: Tvrtko Ursulin Date: Tue, 14 Feb 2017 15:29:01 +0000 Subject: drm/i915: Fix uninitialized return from mi_set_context For some reason my compiler (and CI as well) failed to spot the uninitialized ret in mi_set_context. Signed-off-by: Tvrtko Ursulin Fixes: 73dec95e6ba3 ("drm/i915: Emit to ringbuffer directly") Cc: Tvrtko Ursulin Cc: Chris Wilson Cc: Joonas Lahtinen Reviewed-by: Chris Wilson Link: http://patchwork.freedesktop.org/patch/msgid/20170214152901.20361-1-tvrtko.ursulin@linux.intel.com --- drivers/gpu/drm/i915/i915_gem_context.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_gem_context.c b/drivers/gpu/drm/i915/i915_gem_context.c index 9037356536dc..262452055563 100644 --- a/drivers/gpu/drm/i915/i915_gem_context.c +++ b/drivers/gpu/drm/i915/i915_gem_context.c @@ -604,7 +604,7 @@ mi_set_context(struct drm_i915_gem_request *req, u32 hw_flags) i915.semaphores ? INTEL_INFO(dev_priv)->num_rings - 1 : 0; - int len, ret; + int len; /* w/a: If Flush TLB Invalidation Mode is enabled, driver must do a TLB * invalidation prior to MI_SET_CONTEXT. On GEN6 we don't set the value @@ -612,7 +612,7 @@ mi_set_context(struct drm_i915_gem_request *req, u32 hw_flags) * itlb_before_ctx_switch. */ if (IS_GEN6(dev_priv)) { - ret = engine->emit_flush(req, EMIT_INVALIDATE); + int ret = engine->emit_flush(req, EMIT_INVALIDATE); if (ret) return ret; } @@ -687,7 +687,7 @@ mi_set_context(struct drm_i915_gem_request *req, u32 hw_flags) intel_ring_advance(req, cs); - return ret; + return 0; } static int remap_l3(struct drm_i915_gem_request *req, int slice) -- cgit From 9cc19733fd7c83ac0577f2b80121dae3c289351b Mon Sep 17 00:00:00 2001 From: Robert Bragg Date: Sun, 12 Feb 2017 13:32:52 +0000 Subject: drm/i915: fix for WaDisableDopClockGating:bdw MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This workaround for BDW was incomplete as it also requires EUTC clock gating to be disabled via UCGCTL1. v2: read modify write UCGTL1 in broadwell_init_clock_gating (Ville) Signed-off-by: Robert Bragg Cc: Ville Syrjälä Link: http://patchwork.freedesktop.org/patch/msgid/20170212133252.20990-1-robert@sixbynine.org Reviewed-by: Ville Syrjälä Signed-off-by: Ville Syrjälä --- drivers/gpu/drm/i915/intel_pm.c | 8 ++++++++ drivers/gpu/drm/i915/intel_ringbuffer.c | 6 +++++- 2 files changed, 13 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/i915/intel_pm.c b/drivers/gpu/drm/i915/intel_pm.c index c0b0f5a4b9f1..3c13be8985f1 100644 --- a/drivers/gpu/drm/i915/intel_pm.c +++ b/drivers/gpu/drm/i915/intel_pm.c @@ -7229,6 +7229,14 @@ static void broadwell_init_clock_gating(struct drm_i915_private *dev_priv) | KVM_CONFIG_CHANGE_NOTIFICATION_SELECT); lpt_init_clock_gating(dev_priv); + + /* WaDisableDopClockGating:bdw + * + * Also see the CHICKEN2 write in bdw_init_workarounds() to disable DOP + * clock gating. + */ + I915_WRITE(GEN6_UCGCTL1, + I915_READ(GEN6_UCGCTL1) | GEN6_EU_TCUNIT_CLOCK_GATE_DISABLE); } static void haswell_init_clock_gating(struct drm_i915_private *dev_priv) diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c b/drivers/gpu/drm/i915/intel_ringbuffer.c index 0b9030c36625..95906c45289b 100644 --- a/drivers/gpu/drm/i915/intel_ringbuffer.c +++ b/drivers/gpu/drm/i915/intel_ringbuffer.c @@ -812,7 +812,11 @@ static int bdw_init_workarounds(struct intel_engine_cs *engine) /* WaDisableThreadStallDopClockGating:bdw (pre-production) */ WA_SET_BIT_MASKED(GEN8_ROW_CHICKEN, STALL_DOP_GATING_DISABLE); - /* WaDisableDopClockGating:bdw */ + /* WaDisableDopClockGating:bdw + * + * Also see the related UCGTCL1 write in broadwell_init_clock_gating() + * to disable EUTC clock gating. + */ WA_SET_BIT_MASKED(GEN7_ROW_CHICKEN2, DOP_CLOCK_GATING_DISABLE); -- cgit From 64d83e34264bb4eeaf4f0f0fa69b8cf0bc558a7e Mon Sep 17 00:00:00 2001 From: Ville Syrjälä Date: Wed, 21 Dec 2016 16:31:14 +0200 Subject: drm/i915: Dump more configuration information for DSI MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Dump out more of the DSI configuration details during init. This includes pclk, burst_mode_ratio, lane_count, pixel_overlap, video_mode_format and reset_timer_val. v2: Dump more info (Chris) v3: Use the VIDEO_MODE_ defines for consistency (Chris) Dump dphy_reg too (Chris) Cc: Chris Wilson Signed-off-by: Ville Syrjälä Reviewed-by: Chris Wilson Link: http://patchwork.freedesktop.org/patch/msgid/20161221143114.23530-1-ville.syrjala@linux.intel.com --- drivers/gpu/drm/i915/intel_dsi_panel_vbt.c | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/drivers/gpu/drm/i915/intel_dsi_panel_vbt.c b/drivers/gpu/drm/i915/intel_dsi_panel_vbt.c index 8f683b8b1816..84b3683c1f46 100644 --- a/drivers/gpu/drm/i915/intel_dsi_panel_vbt.c +++ b/drivers/gpu/drm/i915/intel_dsi_panel_vbt.c @@ -801,6 +801,19 @@ struct drm_panel *vbt_panel_init(struct intel_dsi *intel_dsi, u16 panel_id) 8); intel_dsi->clk_hs_to_lp_count += extra_byte_count; + DRM_DEBUG_KMS("Pclk %d\n", intel_dsi->pclk); + DRM_DEBUG_KMS("Pixel overlap %d\n", intel_dsi->pixel_overlap); + DRM_DEBUG_KMS("Lane count %d\n", intel_dsi->lane_count); + DRM_DEBUG_KMS("DPHY param reg 0x%x\n", intel_dsi->dphy_reg); + DRM_DEBUG_KMS("Video mode format %s\n", + intel_dsi->video_mode_format == VIDEO_MODE_NON_BURST_WITH_SYNC_PULSE ? + "non-burst with sync pulse" : + intel_dsi->video_mode_format == VIDEO_MODE_NON_BURST_WITH_SYNC_EVENTS ? + "non-burst with sync events" : + intel_dsi->video_mode_format == VIDEO_MODE_BURST ? + "burst" : ""); + DRM_DEBUG_KMS("Burst mode ratio %d\n", intel_dsi->burst_mode_ratio); + DRM_DEBUG_KMS("Reset timer %d\n", intel_dsi->rst_timer_val); DRM_DEBUG_KMS("Eot %s\n", enableddisabled(intel_dsi->eotp_pkt)); DRM_DEBUG_KMS("Clockstop %s\n", enableddisabled(!intel_dsi->clock_stop)); DRM_DEBUG_KMS("Mode %s\n", intel_dsi->operation_mode ? "command" : "video"); -- cgit From 96676fe3a18e61ee2d67992da537d6b68f45df66 Mon Sep 17 00:00:00 2001 From: Deepak S Date: Fri, 12 Aug 2016 18:46:41 +0530 Subject: drm/i915/chv: Set min freq to RPn on CHV. MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit With latest Punit FW, vgg input voltag drop falling to minimum is fixed. So reverting the WA patch & moving to turbo freq opreation range to [RPn -> RP0] This is not a 1:1 revert of the commit 5b7c91b78b1ce6663e0f1f037f6cb4d7c9537d44. You can refer to commit 5b5929cbe3f7 ("drm/i915/chv: remove pre-production hardware workarounds") as the reason for the discrepancy commit 5b7c91b78b1ce6663e0f1f037f6cb4d7c9537d44 Author: Deepak S Date: Sat May 9 18:15:46 2015 +0530 drm/i915/chv: Set min freq to efficient frequency on chv v2: Fix inconsistent return type. (Chris) v3: drop pre-production hw case (Ville) Acked-by: Chris Wilson Signed-off-by: Deepak S Link: http://patchwork.freedesktop.org/patch/msgid/1471007801-86075-1-git-send-email-deepak.s@linux.intel.com Reviewed-by: Ville Syrjälä Signed-off-by: Ville Syrjälä --- drivers/gpu/drm/i915/intel_pm.c | 14 ++++++++++++-- 1 file changed, 12 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/i915/intel_pm.c b/drivers/gpu/drm/i915/intel_pm.c index 3c13be8985f1..633cb0478b92 100644 --- a/drivers/gpu/drm/i915/intel_pm.c +++ b/drivers/gpu/drm/i915/intel_pm.c @@ -5708,6 +5708,17 @@ static int cherryview_rps_guar_freq(struct drm_i915_private *dev_priv) return rp1; } +static u32 cherryview_rps_min_freq(struct drm_i915_private *dev_priv) +{ + u32 val, rpn; + + val = vlv_punit_read(dev_priv, FB_GFX_FMIN_AT_VMIN_FUSE); + rpn = ((val >> FB_GFX_FMIN_AT_VMIN_FUSE_SHIFT) & + FB_GFX_FREQ_FUSE_MASK); + + return rpn; +} + static int valleyview_rps_guar_freq(struct drm_i915_private *dev_priv) { u32 val, rp1; @@ -5944,8 +5955,7 @@ static void cherryview_init_gt_powersave(struct drm_i915_private *dev_priv) intel_gpu_freq(dev_priv, dev_priv->rps.rp1_freq), dev_priv->rps.rp1_freq); - /* PUnit validated range is only [RPe, RP0] */ - dev_priv->rps.min_freq = dev_priv->rps.efficient_freq; + dev_priv->rps.min_freq = cherryview_rps_min_freq(dev_priv); DRM_DEBUG_DRIVER("min GPU freq: %d MHz (%u)\n", intel_gpu_freq(dev_priv, dev_priv->rps.min_freq), dev_priv->rps.min_freq); -- cgit From e1c5f754067b594de58d387aa5873dec83b6c9fd Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Tue, 14 Feb 2017 09:23:44 +0000 Subject: drm/i915: Avoid overflow in computing pot_hole loop termination When using the mock_ppgtt selftest, the GTT is large enough to cause an overflow in pot_hole() when adding 2 pages to the address. Avoid the overflow by computing the final valid address and iterating up to that address. Signed-off-by: Chris Wilson Cc: Matthew Auld Link: http://patchwork.freedesktop.org/patch/msgid/20170214092344.12330-1-chris@chris-wilson.co.uk Reviewed-by: Matthew Auld --- drivers/gpu/drm/i915/selftests/i915_gem_gtt.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/i915/selftests/i915_gem_gtt.c b/drivers/gpu/drm/i915/selftests/i915_gem_gtt.c index 5af9339087b7..468f0992db39 100644 --- a/drivers/gpu/drm/i915/selftests/i915_gem_gtt.c +++ b/drivers/gpu/drm/i915/selftests/i915_gem_gtt.c @@ -615,7 +615,7 @@ static int pot_hole(struct drm_i915_private *i915, u64 addr; for (addr = round_up(hole_start + I915_GTT_PAGE_SIZE, step) - I915_GTT_PAGE_SIZE; - addr + vma->size <= hole_end; + addr <= round_down(hole_end - 2*I915_GTT_PAGE_SIZE, step) - I915_GTT_PAGE_SIZE; addr += step) { err = i915_vma_pin(vma, 0, 0, addr | flags); if (err) { -- cgit From 72affdf9729d9e9a81498196ed5ada4d8f1c599e Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Tue, 14 Feb 2017 11:37:56 +0000 Subject: drm/i915: Silence compiler for GTT selftests MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit gcc-4.7 spotted that In file included from drivers/gpu/drm/i915/i915_gem_gtt.c:3791:0: drivers/gpu/drm/i915/selftests/i915_gem_gtt.c: In function ‘pot_hole’: drivers/gpu/drm/i915/selftests/i915_gem_gtt.c:594:6: error: ‘err’ may be used uninitialized in this function [-Werror=maybe-uninitialized] So set it to 0 should we ever skip over a hole smaller than a few pages. Signed-off-by: Chris Wilson Cc: Matthew Auld Link: http://patchwork.freedesktop.org/patch/msgid/20170214113756.27834-1-chris@chris-wilson.co.uk Reviewed-by: Matthew Auld --- drivers/gpu/drm/i915/selftests/i915_gem_gtt.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/i915/selftests/i915_gem_gtt.c b/drivers/gpu/drm/i915/selftests/i915_gem_gtt.c index 468f0992db39..320c6879fd83 100644 --- a/drivers/gpu/drm/i915/selftests/i915_gem_gtt.c +++ b/drivers/gpu/drm/i915/selftests/i915_gem_gtt.c @@ -591,7 +591,7 @@ static int pot_hole(struct drm_i915_private *i915, struct i915_vma *vma; unsigned long flags; unsigned int pot; - int err; + int err = 0; flags = PIN_OFFSET_FIXED | PIN_USER; if (i915_is_ggtt(vm)) -- cgit From b8f2169db98eba2c5009cc67a448dc48a0fd4c48 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Tue, 14 Feb 2017 14:35:09 +0000 Subject: drm/i915: Silence compiler warning for seltests/i915_gem_coherency In general, the compiler should not be able to detect if we do any passes through the test loops: In file included from drivers/gpu/drm/i915/i915_gem.c:5029: drivers/gpu/drm/i915/selftests/i915_gem_coherency.c: In function 'igt_gem_coherency': drivers/gpu/drm/i915/selftests/i915_gem_coherency.c:274: error: 'err' may be used uninitialized in this function Reported-by: kbuild test robot Signed-off-by: Chris Wilson Link: http://patchwork.freedesktop.org/patch/msgid/20170214143509.15719-1-chris@chris-wilson.co.uk Reviewed-by: Matthew Auld --- drivers/gpu/drm/i915/selftests/i915_gem_coherency.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/i915/selftests/i915_gem_coherency.c b/drivers/gpu/drm/i915/selftests/i915_gem_coherency.c index ad92c60290f1..f08d0179b3df 100644 --- a/drivers/gpu/drm/i915/selftests/i915_gem_coherency.c +++ b/drivers/gpu/drm/i915/selftests/i915_gem_coherency.c @@ -272,7 +272,7 @@ static int igt_gem_coherency(void *arg) struct drm_i915_gem_object *obj; unsigned long count, n; u32 *offsets, *values; - int err; + int err = 0; /* We repeatedly write, overwrite and read from a sequence of * cachelines in order to try and detect incoherency (unflushed writes -- cgit From 65300b1f6e04a905200ac5943cb9bfdadc3651a2 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Tue, 14 Feb 2017 13:34:20 +0000 Subject: drm/i915/guc: Don't take struct_mutex for object unreference We no longer need to take the struct_mutex for freeing objects, and on the finalisation paths here the mutex is not been used for serialisation of the pointer access, so remove the BKL wart. Signed-off-by: Chris Wilson Link: http://patchwork.freedesktop.org/patch/msgid/20170214133420.7977-1-chris@chris-wilson.co.uk Reviewed-by: Joonas Lahtinen --- drivers/gpu/drm/i915/intel_guc_loader.c | 14 ++++++-------- drivers/gpu/drm/i915/intel_huc.c | 9 ++++----- 2 files changed, 10 insertions(+), 13 deletions(-) diff --git a/drivers/gpu/drm/i915/intel_guc_loader.c b/drivers/gpu/drm/i915/intel_guc_loader.c index 8ef33d88d5a0..9885f760f2ef 100644 --- a/drivers/gpu/drm/i915/intel_guc_loader.c +++ b/drivers/gpu/drm/i915/intel_guc_loader.c @@ -714,12 +714,9 @@ fail: DRM_DEBUG_DRIVER("uC fw fetch status FAIL; err %d, fw %p, obj %p\n", err, fw, uc_fw->obj); - mutex_lock(&dev_priv->drm.struct_mutex); - obj = uc_fw->obj; + obj = fetch_and_zero(&uc_fw->obj); if (obj) i915_gem_object_put(obj); - uc_fw->obj = NULL; - mutex_unlock(&dev_priv->drm.struct_mutex); release_firmware(fw); /* OK even if fw is NULL */ uc_fw->fetch_status = INTEL_UC_FIRMWARE_FAIL; @@ -793,16 +790,17 @@ void intel_guc_init(struct drm_i915_private *dev_priv) void intel_guc_fini(struct drm_i915_private *dev_priv) { struct intel_uc_fw *guc_fw = &dev_priv->guc.fw; + struct drm_i915_gem_object *obj; mutex_lock(&dev_priv->drm.struct_mutex); guc_interrupts_release(dev_priv); i915_guc_submission_disable(dev_priv); i915_guc_submission_fini(dev_priv); - - if (guc_fw->obj) - i915_gem_object_put(guc_fw->obj); - guc_fw->obj = NULL; mutex_unlock(&dev_priv->drm.struct_mutex); + obj = fetch_and_zero(&guc_fw->obj); + if (obj) + i915_gem_object_put(obj); + guc_fw->fetch_status = INTEL_UC_FIRMWARE_NONE; } diff --git a/drivers/gpu/drm/i915/intel_huc.c b/drivers/gpu/drm/i915/intel_huc.c index c144609425f6..c28543d220a2 100644 --- a/drivers/gpu/drm/i915/intel_huc.c +++ b/drivers/gpu/drm/i915/intel_huc.c @@ -274,12 +274,11 @@ fail: void intel_huc_fini(struct drm_i915_private *dev_priv) { struct intel_uc_fw *huc_fw = &dev_priv->huc.fw; + struct drm_i915_gem_object *obj; - mutex_lock(&dev_priv->drm.struct_mutex); - if (huc_fw->obj) - i915_gem_object_put(huc_fw->obj); - huc_fw->obj = NULL; - mutex_unlock(&dev_priv->drm.struct_mutex); + obj = fetch_and_zero(&huc_fw->obj); + if (obj) + i915_gem_object_put(obj); huc_fw->fetch_status = INTEL_UC_FIRMWARE_NONE; } -- cgit From 5a4c6f1b1b2d91c5252cedf1c7b68a894c3f117e Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Tue, 14 Feb 2017 16:46:11 +0000 Subject: drm/i915: The return of i915_gpu_info to debugfs Once upon a time before we had automated GPU state capture upon hangs, we had intel_gpu_dump. Now we come almost full circle and reinstate that view of the current GPU queues and registers by using the error capture facility to snapshot the GPU state when debugfs/.../i915_gpu_info is opened - which should provided useful debugging to both the error capture routines (without having to cause a hang and avoid the error state being eaten by igt) and generally. v2: Rename drm_i915_error_state to i915_gpu_state to alleviate some name collisions between the error state dump and inspecting the gpu state. Signed-off-by: Chris Wilson Cc: Mika Kuoppala Reviewed-by: Mika Kuoppala Link: http://patchwork.freedesktop.org/patch/msgid/20170214164611.11381-1-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/i915_debugfs.c | 113 ++++++++++++----------- drivers/gpu/drm/i915/i915_drv.c | 2 +- drivers/gpu/drm/i915/i915_drv.h | 46 ++++++---- drivers/gpu/drm/i915/i915_gpu_error.c | 165 ++++++++++++++++++---------------- drivers/gpu/drm/i915/i915_sysfs.c | 26 +++--- drivers/gpu/drm/i915/intel_display.c | 2 +- 6 files changed, 190 insertions(+), 164 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c index cd023fda1a3b..0de0596d41ac 100644 --- a/drivers/gpu/drm/i915/i915_debugfs.c +++ b/drivers/gpu/drm/i915/i915_debugfs.c @@ -988,89 +988,93 @@ static int i915_gem_fence_regs_info(struct seq_file *m, void *data) } #if IS_ENABLED(CONFIG_DRM_I915_CAPTURE_ERROR) - -static ssize_t -i915_error_state_write(struct file *filp, - const char __user *ubuf, - size_t cnt, - loff_t *ppos) +static ssize_t gpu_state_read(struct file *file, char __user *ubuf, + size_t count, loff_t *pos) { - struct i915_error_state_file_priv *error_priv = filp->private_data; + struct i915_gpu_state *error = file->private_data; + struct drm_i915_error_state_buf str; + ssize_t ret; + loff_t tmp; - DRM_DEBUG_DRIVER("Resetting error state\n"); - i915_destroy_error_state(error_priv->i915); - - return cnt; -} - -static int i915_error_state_open(struct inode *inode, struct file *file) -{ - struct drm_i915_private *dev_priv = inode->i_private; - struct i915_error_state_file_priv *error_priv; + if (!error) + return 0; - error_priv = kzalloc(sizeof(*error_priv), GFP_KERNEL); - if (!error_priv) - return -ENOMEM; + ret = i915_error_state_buf_init(&str, error->i915, count, *pos); + if (ret) + return ret; - error_priv->i915 = dev_priv; + ret = i915_error_state_to_str(&str, error); + if (ret) + goto out; - i915_error_state_get(&dev_priv->drm, error_priv); + tmp = 0; + ret = simple_read_from_buffer(ubuf, count, &tmp, str.buf, str.bytes); + if (ret < 0) + goto out; - file->private_data = error_priv; + *pos = str.start + ret; +out: + i915_error_state_buf_release(&str); + return ret; +} +static int gpu_state_release(struct inode *inode, struct file *file) +{ + i915_gpu_state_put(file->private_data); return 0; } -static int i915_error_state_release(struct inode *inode, struct file *file) +static int i915_gpu_info_open(struct inode *inode, struct file *file) { - struct i915_error_state_file_priv *error_priv = file->private_data; + struct i915_gpu_state *gpu; - i915_error_state_put(error_priv); - kfree(error_priv); + gpu = i915_capture_gpu_state(inode->i_private); + if (!gpu) + return -ENOMEM; + file->private_data = gpu; return 0; } -static ssize_t i915_error_state_read(struct file *file, char __user *userbuf, - size_t count, loff_t *pos) +static const struct file_operations i915_gpu_info_fops = { + .owner = THIS_MODULE, + .open = i915_gpu_info_open, + .read = gpu_state_read, + .llseek = default_llseek, + .release = gpu_state_release, +}; + +static ssize_t +i915_error_state_write(struct file *filp, + const char __user *ubuf, + size_t cnt, + loff_t *ppos) { - struct i915_error_state_file_priv *error_priv = file->private_data; - struct drm_i915_error_state_buf error_str; - loff_t tmp_pos = 0; - ssize_t ret_count = 0; - int ret; + struct i915_gpu_state *error = filp->private_data; - ret = i915_error_state_buf_init(&error_str, error_priv->i915, - count, *pos); - if (ret) - return ret; + if (!error) + return 0; - ret = i915_error_state_to_str(&error_str, error_priv); - if (ret) - goto out; + DRM_DEBUG_DRIVER("Resetting error state\n"); + i915_reset_error_state(error->i915); - ret_count = simple_read_from_buffer(userbuf, count, &tmp_pos, - error_str.buf, - error_str.bytes); + return cnt; +} - if (ret_count < 0) - ret = ret_count; - else - *pos = error_str.start + ret_count; -out: - i915_error_state_buf_release(&error_str); - return ret ?: ret_count; +static int i915_error_state_open(struct inode *inode, struct file *file) +{ + file->private_data = i915_first_error_state(inode->i_private); + return 0; } static const struct file_operations i915_error_state_fops = { .owner = THIS_MODULE, .open = i915_error_state_open, - .read = i915_error_state_read, + .read = gpu_state_read, .write = i915_error_state_write, .llseek = default_llseek, - .release = i915_error_state_release, + .release = gpu_state_release, }; - #endif static int @@ -4811,6 +4815,7 @@ static const struct i915_debugfs_files { {"i915_gem_drop_caches", &i915_drop_caches_fops}, #if IS_ENABLED(CONFIG_DRM_I915_CAPTURE_ERROR) {"i915_error_state", &i915_error_state_fops}, + {"i915_gpu_info", &i915_gpu_info_fops}, #endif {"i915_next_seqno", &i915_next_seqno_fops}, {"i915_display_crc_ctl", &i915_display_crc_ctl_fops}, diff --git a/drivers/gpu/drm/i915/i915_drv.c b/drivers/gpu/drm/i915/i915_drv.c index efb1f64f0e41..50de00921cc9 100644 --- a/drivers/gpu/drm/i915/i915_drv.c +++ b/drivers/gpu/drm/i915/i915_drv.c @@ -1370,7 +1370,7 @@ void i915_driver_unload(struct drm_device *dev) /* Free error state after interrupts are fully disabled. */ cancel_delayed_work_sync(&dev_priv->gpu_error.hangcheck_work); - i915_destroy_error_state(dev_priv); + i915_reset_error_state(dev_priv); /* Flush any outstanding unpin_work. */ drain_workqueue(dev_priv->wq); diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index 6cfdb64926d7..992bda004f6b 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -897,7 +897,7 @@ struct intel_device_info { struct intel_display_error_state; -struct drm_i915_error_state { +struct i915_gpu_state { struct kref ref; struct timeval time; struct timeval boottime; @@ -917,7 +917,7 @@ struct drm_i915_error_state { u32 eir; u32 pgtbl_er; u32 ier; - u32 gtier[4]; + u32 gtier[4], ngtier; u32 ccid; u32 derrmr; u32 forcewake; @@ -931,6 +931,7 @@ struct drm_i915_error_state { u32 gab_ctl; u32 gfx_mode; + u32 nfence; u64 fence[I915_MAX_NUM_FENCES]; struct intel_overlay_error_state *overlay; struct intel_display_error_state *display; @@ -1512,11 +1513,6 @@ struct drm_i915_error_state_buf { loff_t pos; }; -struct i915_error_state_file_priv { - struct drm_i915_private *i915; - struct drm_i915_error_state *error; -}; - #define I915_RESET_TIMEOUT (10 * HZ) /* 10s */ #define I915_FENCE_TIMEOUT (10 * HZ) /* 10s */ @@ -1533,7 +1529,7 @@ struct i915_gpu_error { /* For reset and error_state handling. */ spinlock_t lock; /* Protected by the above dev->gpu_error.lock. */ - struct drm_i915_error_state *first_error; + struct i915_gpu_state *first_error; unsigned long missed_irq_rings; @@ -3574,7 +3570,7 @@ static inline void intel_display_crc_init(struct drm_i915_private *dev_priv) {} __printf(2, 3) void i915_error_printf(struct drm_i915_error_state_buf *e, const char *f, ...); int i915_error_state_to_str(struct drm_i915_error_state_buf *estr, - const struct i915_error_state_file_priv *error); + const struct i915_gpu_state *gpu); int i915_error_state_buf_init(struct drm_i915_error_state_buf *eb, struct drm_i915_private *i915, size_t count, loff_t pos); @@ -3583,13 +3579,28 @@ static inline void i915_error_state_buf_release( { kfree(eb->buf); } + +struct i915_gpu_state *i915_capture_gpu_state(struct drm_i915_private *i915); void i915_capture_error_state(struct drm_i915_private *dev_priv, u32 engine_mask, const char *error_msg); -void i915_error_state_get(struct drm_device *dev, - struct i915_error_state_file_priv *error_priv); -void i915_error_state_put(struct i915_error_state_file_priv *error_priv); -void i915_destroy_error_state(struct drm_i915_private *dev_priv); + +static inline struct i915_gpu_state * +i915_gpu_state_get(struct i915_gpu_state *gpu) +{ + kref_get(&gpu->ref); + return gpu; +} + +void __i915_gpu_state_free(struct kref *kref); +static inline void i915_gpu_state_put(struct i915_gpu_state *gpu) +{ + if (gpu) + kref_put(&gpu->ref, __i915_gpu_state_free); +} + +struct i915_gpu_state *i915_first_error_state(struct drm_i915_private *i915); +void i915_reset_error_state(struct drm_i915_private *i915); #else @@ -3599,7 +3610,13 @@ static inline void i915_capture_error_state(struct drm_i915_private *dev_priv, { } -static inline void i915_destroy_error_state(struct drm_i915_private *dev_priv) +static inline struct i915_gpu_state * +i915_first_error_state(struct drm_i915_private *i915) +{ + return NULL; +} + +static inline void i915_reset_error_state(struct drm_i915_private *i915) { } @@ -3747,7 +3764,6 @@ extern void intel_overlay_print_error_state(struct drm_i915_error_state_buf *e, extern struct intel_display_error_state * intel_display_capture_error_state(struct drm_i915_private *dev_priv); extern void intel_display_print_error_state(struct drm_i915_error_state_buf *e, - struct drm_i915_private *dev_priv, struct intel_display_error_state *error); int sandybridge_pcode_read(struct drm_i915_private *dev_priv, u32 mbox, u32 *val); diff --git a/drivers/gpu/drm/i915/i915_gpu_error.c b/drivers/gpu/drm/i915/i915_gpu_error.c index c28ccfef84ab..3a3c7c3c4931 100644 --- a/drivers/gpu/drm/i915/i915_gpu_error.c +++ b/drivers/gpu/drm/i915/i915_gpu_error.c @@ -342,7 +342,7 @@ static void print_error_buffers(struct drm_i915_error_state_buf *m, } static void error_print_instdone(struct drm_i915_error_state_buf *m, - struct drm_i915_error_engine *ee) + const struct drm_i915_error_engine *ee) { int slice; int subslice; @@ -372,7 +372,7 @@ static void error_print_instdone(struct drm_i915_error_state_buf *m, static void error_print_request(struct drm_i915_error_state_buf *m, const char *prefix, - struct drm_i915_error_request *erq) + const struct drm_i915_error_request *erq) { if (!erq->seqno) return; @@ -386,7 +386,7 @@ static void error_print_request(struct drm_i915_error_state_buf *m, static void error_print_context(struct drm_i915_error_state_buf *m, const char *header, - struct drm_i915_error_context *ctx) + const struct drm_i915_error_context *ctx) { err_printf(m, "%s%s[%d] user_handle %d hw_id %d, ban score %d guilty %d active %d\n", header, ctx->comm, ctx->pid, ctx->handle, ctx->hw_id, @@ -394,7 +394,7 @@ static void error_print_context(struct drm_i915_error_state_buf *m, } static void error_print_engine(struct drm_i915_error_state_buf *m, - struct drm_i915_error_engine *ee) + const struct drm_i915_error_engine *ee) { err_printf(m, "%s command stream:\n", engine_str(ee->engine_id)); err_printf(m, " START: 0x%08x\n", ee->start); @@ -569,21 +569,32 @@ static void err_print_params(struct drm_i915_error_state_buf *m, #undef PRINT } +static void err_print_pciid(struct drm_i915_error_state_buf *m, + struct drm_i915_private *i915) +{ + struct pci_dev *pdev = i915->drm.pdev; + + err_printf(m, "PCI ID: 0x%04x\n", pdev->device); + err_printf(m, "PCI Revision: 0x%02x\n", pdev->revision); + err_printf(m, "PCI Subsystem: %04x:%04x\n", + pdev->subsystem_vendor, + pdev->subsystem_device); +} + int i915_error_state_to_str(struct drm_i915_error_state_buf *m, - const struct i915_error_state_file_priv *error_priv) + const struct i915_gpu_state *error) { - struct drm_i915_private *dev_priv = error_priv->i915; - struct pci_dev *pdev = dev_priv->drm.pdev; - struct drm_i915_error_state *error = error_priv->error; + struct drm_i915_private *dev_priv = m->i915; struct drm_i915_error_object *obj; int i, j; if (!error) { - err_printf(m, "no error state collected\n"); - goto out; + err_printf(m, "No error state collected\n"); + return 0; } - err_printf(m, "%s\n", error->error_msg); + if (*error->error_msg) + err_printf(m, "%s\n", error->error_msg); err_printf(m, "Kernel: " UTS_RELEASE "\n"); err_printf(m, "Time: %ld s %ld us\n", error->time.tv_sec, error->time.tv_usec); @@ -605,11 +616,7 @@ int i915_error_state_to_str(struct drm_i915_error_state_buf *m, err_printf(m, "Reset count: %u\n", error->reset_count); err_printf(m, "Suspend count: %u\n", error->suspend_count); err_printf(m, "Platform: %s\n", intel_platform_name(error->device_info.platform)); - err_printf(m, "PCI ID: 0x%04x\n", pdev->device); - err_printf(m, "PCI Revision: 0x%02x\n", pdev->revision); - err_printf(m, "PCI Subsystem: %04x:%04x\n", - pdev->subsystem_vendor, - pdev->subsystem_device); + err_print_pciid(m, error->i915); err_printf(m, "IOMMU enabled?: %d\n", error->iommu); @@ -625,19 +632,15 @@ int i915_error_state_to_str(struct drm_i915_error_state_buf *m, err_printf(m, "EIR: 0x%08x\n", error->eir); err_printf(m, "IER: 0x%08x\n", error->ier); - if (INTEL_GEN(dev_priv) >= 8) { - for (i = 0; i < 4; i++) - err_printf(m, "GTIER gt %d: 0x%08x\n", i, - error->gtier[i]); - } else if (HAS_PCH_SPLIT(dev_priv) || IS_VALLEYVIEW(dev_priv)) - err_printf(m, "GTIER: 0x%08x\n", error->gtier[0]); + for (i = 0; i < error->ngtier; i++) + err_printf(m, "GTIER[%d]: 0x%08x\n", i, error->gtier[i]); err_printf(m, "PGTBL_ER: 0x%08x\n", error->pgtbl_er); err_printf(m, "FORCEWAKE: 0x%08x\n", error->forcewake); err_printf(m, "DERRMR: 0x%08x\n", error->derrmr); err_printf(m, "CCID: 0x%08x\n", error->ccid); err_printf(m, "Missed interrupts: 0x%08lx\n", dev_priv->gpu_error.missed_irq_rings); - for (i = 0; i < dev_priv->num_fence_regs; i++) + for (i = 0; i < error->nfence; i++) err_printf(m, " fence[%d] = %08llx\n", i, error->fence[i]); if (INTEL_GEN(dev_priv) >= 6) { @@ -686,7 +689,7 @@ int i915_error_state_to_str(struct drm_i915_error_state_buf *m, error->pinned_bo_count); for (i = 0; i < ARRAY_SIZE(error->engine); i++) { - struct drm_i915_error_engine *ee = &error->engine[i]; + const struct drm_i915_error_engine *ee = &error->engine[i]; obj = ee->batchbuffer; if (obj) { @@ -751,12 +754,11 @@ int i915_error_state_to_str(struct drm_i915_error_state_buf *m, intel_overlay_print_error_state(m, error->overlay); if (error->display) - intel_display_print_error_state(m, dev_priv, error->display); + intel_display_print_error_state(m, error->display); err_print_capabilities(m, &error->device_info); err_print_params(m, &error->params); -out: if (m->bytes == 0 && m->err) return m->err; @@ -808,10 +810,10 @@ static void i915_error_object_free(struct drm_i915_error_object *obj) kfree(obj); } -static void i915_error_state_free(struct kref *error_ref) +void __i915_gpu_state_free(struct kref *error_ref) { - struct drm_i915_error_state *error = container_of(error_ref, - typeof(*error), ref); + struct i915_gpu_state *error = + container_of(error_ref, typeof(*error), ref); int i; for (i = 0; i < ARRAY_SIZE(error->engine); i++) { @@ -976,7 +978,7 @@ static u32 capture_error_bo(struct drm_i915_error_buffer *err, * It's only a small step better than a random number in its current form. */ static uint32_t i915_error_generate_code(struct drm_i915_private *dev_priv, - struct drm_i915_error_state *error, + struct i915_gpu_state *error, int *engine_id) { uint32_t error_code = 0; @@ -1001,20 +1003,21 @@ static uint32_t i915_error_generate_code(struct drm_i915_private *dev_priv, } static void i915_gem_record_fences(struct drm_i915_private *dev_priv, - struct drm_i915_error_state *error) + struct i915_gpu_state *error) { int i; - if (IS_GEN3(dev_priv) || IS_GEN2(dev_priv)) { + if (INTEL_GEN(dev_priv) >= 6) { for (i = 0; i < dev_priv->num_fence_regs; i++) - error->fence[i] = I915_READ(FENCE_REG(i)); - } else if (IS_GEN5(dev_priv) || IS_GEN4(dev_priv)) { + error->fence[i] = I915_READ64(FENCE_REG_GEN6_LO(i)); + } else if (INTEL_GEN(dev_priv) >= 4) { for (i = 0; i < dev_priv->num_fence_regs; i++) error->fence[i] = I915_READ64(FENCE_REG_965_LO(i)); - } else if (INTEL_GEN(dev_priv) >= 6) { + } else { for (i = 0; i < dev_priv->num_fence_regs; i++) - error->fence[i] = I915_READ64(FENCE_REG_GEN6_LO(i)); + error->fence[i] = I915_READ(FENCE_REG(i)); } + error->nfence = i; } static inline u32 @@ -1038,7 +1041,7 @@ gen8_engine_sync_index(struct intel_engine_cs *engine, return idx; } -static void gen8_record_semaphore_state(struct drm_i915_error_state *error, +static void gen8_record_semaphore_state(struct i915_gpu_state *error, struct intel_engine_cs *engine, struct drm_i915_error_engine *ee) { @@ -1131,7 +1134,7 @@ static void error_record_engine_waiters(struct intel_engine_cs *engine, spin_unlock_irq(&b->lock); } -static void error_record_engine_registers(struct drm_i915_error_state *error, +static void error_record_engine_registers(struct i915_gpu_state *error, struct intel_engine_cs *engine, struct drm_i915_error_engine *ee) { @@ -1328,7 +1331,7 @@ static void record_context(struct drm_i915_error_context *e, } static void i915_gem_record_rings(struct drm_i915_private *dev_priv, - struct drm_i915_error_state *error) + struct i915_gpu_state *error) { struct i915_ggtt *ggtt = &dev_priv->ggtt; int i; @@ -1404,7 +1407,7 @@ static void i915_gem_record_rings(struct drm_i915_private *dev_priv, } static void i915_gem_capture_vm(struct drm_i915_private *dev_priv, - struct drm_i915_error_state *error, + struct i915_gpu_state *error, struct i915_address_space *vm, int idx) { @@ -1430,7 +1433,7 @@ static void i915_gem_capture_vm(struct drm_i915_private *dev_priv, } static void i915_capture_active_buffers(struct drm_i915_private *dev_priv, - struct drm_i915_error_state *error) + struct i915_gpu_state *error) { int cnt = 0, i, j; @@ -1455,7 +1458,7 @@ static void i915_capture_active_buffers(struct drm_i915_private *dev_priv, } static void i915_capture_pinned_buffers(struct drm_i915_private *dev_priv, - struct drm_i915_error_state *error) + struct i915_gpu_state *error) { struct i915_address_space *vm = &dev_priv->ggtt.base; struct drm_i915_error_buffer *bo; @@ -1486,7 +1489,7 @@ static void i915_capture_pinned_buffers(struct drm_i915_private *dev_priv, } static void i915_gem_capture_guc_log_buffer(struct drm_i915_private *dev_priv, - struct drm_i915_error_state *error) + struct i915_gpu_state *error) { /* Capturing log buf contents won't be useful if logging was disabled */ if (!dev_priv->guc.log.vma || (i915.guc_log_level < 0)) @@ -1498,7 +1501,7 @@ static void i915_gem_capture_guc_log_buffer(struct drm_i915_private *dev_priv, /* Capture all registers which don't fit into another category. */ static void i915_capture_reg_state(struct drm_i915_private *dev_priv, - struct drm_i915_error_state *error) + struct i915_gpu_state *error) { int i; @@ -1555,9 +1558,11 @@ static void i915_capture_reg_state(struct drm_i915_private *dev_priv, error->ier = I915_READ(GEN8_DE_MISC_IER); for (i = 0; i < 4; i++) error->gtier[i] = I915_READ(GEN8_GT_IER(i)); + error->ngtier = 4; } else if (HAS_PCH_SPLIT(dev_priv)) { error->ier = I915_READ(DEIER); error->gtier[0] = I915_READ(GTIER); + error->ngtier = 1; } else if (IS_GEN2(dev_priv)) { error->ier = I915_READ16(IER); } else if (!IS_VALLEYVIEW(dev_priv)) { @@ -1568,7 +1573,7 @@ static void i915_capture_reg_state(struct drm_i915_private *dev_priv, } static void i915_error_capture_msg(struct drm_i915_private *dev_priv, - struct drm_i915_error_state *error, + struct i915_gpu_state *error, u32 engine_mask, const char *error_msg) { @@ -1595,7 +1600,7 @@ static void i915_error_capture_msg(struct drm_i915_private *dev_priv, } static void i915_capture_gen_state(struct drm_i915_private *dev_priv, - struct drm_i915_error_state *error) + struct i915_gpu_state *error) { error->iommu = -1; #ifdef CONFIG_INTEL_IOMMU @@ -1611,7 +1616,7 @@ static void i915_capture_gen_state(struct drm_i915_private *dev_priv, static int capture(void *data) { - struct drm_i915_error_state *error = data; + struct i915_gpu_state *error = data; do_gettimeofday(&error->time); error->boottime = ktime_to_timeval(ktime_get_boottime()); @@ -1637,6 +1642,23 @@ static int capture(void *data) #define DAY_AS_SECONDS(x) (24 * 60 * 60 * (x)) +struct i915_gpu_state * +i915_capture_gpu_state(struct drm_i915_private *i915) +{ + struct i915_gpu_state *error; + + error = kzalloc(sizeof(*error), GFP_ATOMIC); + if (!error) + return NULL; + + kref_init(&error->ref); + error->i915 = i915; + + stop_machine(capture, error, NULL); + + return error; +} + /** * i915_capture_error_state - capture an error record for later analysis * @dev: drm device @@ -1651,7 +1673,7 @@ void i915_capture_error_state(struct drm_i915_private *dev_priv, const char *error_msg) { static bool warned; - struct drm_i915_error_state *error; + struct i915_gpu_state *error; unsigned long flags; if (!i915.error_capture) @@ -1660,18 +1682,12 @@ void i915_capture_error_state(struct drm_i915_private *dev_priv, if (READ_ONCE(dev_priv->gpu_error.first_error)) return; - /* Account for pipe specific data like PIPE*STAT */ - error = kzalloc(sizeof(*error), GFP_ATOMIC); + error = i915_capture_gpu_state(dev_priv); if (!error) { DRM_DEBUG_DRIVER("out of memory, not capturing error state\n"); return; } - kref_init(&error->ref); - error->i915 = dev_priv; - - stop_machine(capture, error, NULL); - i915_error_capture_msg(dev_priv, error, engine_mask, error_msg); DRM_INFO("%s\n", error->error_msg); @@ -1685,7 +1701,7 @@ void i915_capture_error_state(struct drm_i915_private *dev_priv, } if (error) { - i915_error_state_free(&error->ref); + __i915_gpu_state_free(&error->ref); return; } @@ -1701,33 +1717,28 @@ void i915_capture_error_state(struct drm_i915_private *dev_priv, } } -void i915_error_state_get(struct drm_device *dev, - struct i915_error_state_file_priv *error_priv) +struct i915_gpu_state * +i915_first_error_state(struct drm_i915_private *i915) { - struct drm_i915_private *dev_priv = to_i915(dev); + struct i915_gpu_state *error; - spin_lock_irq(&dev_priv->gpu_error.lock); - error_priv->error = dev_priv->gpu_error.first_error; - if (error_priv->error) - kref_get(&error_priv->error->ref); - spin_unlock_irq(&dev_priv->gpu_error.lock); -} + spin_lock_irq(&i915->gpu_error.lock); + error = i915->gpu_error.first_error; + if (error) + i915_gpu_state_get(error); + spin_unlock_irq(&i915->gpu_error.lock); -void i915_error_state_put(struct i915_error_state_file_priv *error_priv) -{ - if (error_priv->error) - kref_put(&error_priv->error->ref, i915_error_state_free); + return error; } -void i915_destroy_error_state(struct drm_i915_private *dev_priv) +void i915_reset_error_state(struct drm_i915_private *i915) { - struct drm_i915_error_state *error; + struct i915_gpu_state *error; - spin_lock_irq(&dev_priv->gpu_error.lock); - error = dev_priv->gpu_error.first_error; - dev_priv->gpu_error.first_error = NULL; - spin_unlock_irq(&dev_priv->gpu_error.lock); + spin_lock_irq(&i915->gpu_error.lock); + error = i915->gpu_error.first_error; + i915->gpu_error.first_error = NULL; + spin_unlock_irq(&i915->gpu_error.lock); - if (error) - kref_put(&error->ref, i915_error_state_free); + i915_gpu_state_put(error); } diff --git a/drivers/gpu/drm/i915/i915_sysfs.c b/drivers/gpu/drm/i915/i915_sysfs.c index a721ff116101..af0ac9f261fd 100644 --- a/drivers/gpu/drm/i915/i915_sysfs.c +++ b/drivers/gpu/drm/i915/i915_sysfs.c @@ -522,33 +522,27 @@ static ssize_t error_state_read(struct file *filp, struct kobject *kobj, struct device *kdev = kobj_to_dev(kobj); struct drm_i915_private *dev_priv = kdev_minor_to_i915(kdev); - struct drm_device *dev = &dev_priv->drm; - struct i915_error_state_file_priv error_priv; struct drm_i915_error_state_buf error_str; - ssize_t ret_count = 0; - int ret; - - memset(&error_priv, 0, sizeof(error_priv)); + struct i915_gpu_state *gpu; + ssize_t ret; - ret = i915_error_state_buf_init(&error_str, to_i915(dev), count, off); + ret = i915_error_state_buf_init(&error_str, dev_priv, count, off); if (ret) return ret; - error_priv.i915 = dev_priv; - i915_error_state_get(dev, &error_priv); - - ret = i915_error_state_to_str(&error_str, &error_priv); + gpu = i915_first_error_state(dev_priv); + ret = i915_error_state_to_str(&error_str, gpu); if (ret) goto out; - ret_count = count < error_str.bytes ? count : error_str.bytes; + ret = count < error_str.bytes ? count : error_str.bytes; + memcpy(buf, error_str.buf, ret); - memcpy(buf, error_str.buf, ret_count); out: - i915_error_state_put(&error_priv); + i915_gpu_state_put(gpu); i915_error_state_buf_release(&error_str); - return ret ?: ret_count; + return ret; } static ssize_t error_state_write(struct file *file, struct kobject *kobj, @@ -559,7 +553,7 @@ static ssize_t error_state_write(struct file *file, struct kobject *kobj, struct drm_i915_private *dev_priv = kdev_minor_to_i915(kdev); DRM_DEBUG_DRIVER("Resetting error state\n"); - i915_destroy_error_state(dev_priv); + i915_reset_error_state(dev_priv); return count; } diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c index 02e6bb2d614c..660581fb329a 100644 --- a/drivers/gpu/drm/i915/intel_display.c +++ b/drivers/gpu/drm/i915/intel_display.c @@ -15838,9 +15838,9 @@ intel_display_capture_error_state(struct drm_i915_private *dev_priv) void intel_display_print_error_state(struct drm_i915_error_state_buf *m, - struct drm_i915_private *dev_priv, struct intel_display_error_state *error) { + struct drm_i915_private *dev_priv = m->i915; int i; if (!error) -- cgit From db4c5e0b726f021e864e887e6b49ad6ecebabf5a Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Fri, 10 Feb 2017 15:03:46 +0000 Subject: drm/i915: Enable fine-tuned RPS for cherryview When the RPS tuning was applied to Baytrail, in commit 8fb55197e64d ("drm/i915: Agressive downclocking on Baytrail"), concern was given that it might cause Cherryview excess wakeups of the common power well. However, the static thresholds perform poorly for Kodi, and the GPU is unable to deliver the video frames on time. Enabling the dynamic, finer thresholds used on all other platforms (including Skylake and Broxton that also have the same multiple powerwell concerns) allows the GPU to pick a more appropriate frequency and not drop frames. Signed-off-by: Chris Wilson Link: http://patchwork.freedesktop.org/patch/msgid/20170210150348.22146-1-chris@chris-wilson.co.uk Reviewed-by: Radoslaw Szwichtenberg --- drivers/gpu/drm/i915/intel_pm.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/drivers/gpu/drm/i915/intel_pm.c b/drivers/gpu/drm/i915/intel_pm.c index 633cb0478b92..e801b5d26b90 100644 --- a/drivers/gpu/drm/i915/intel_pm.c +++ b/drivers/gpu/drm/i915/intel_pm.c @@ -4971,8 +4971,7 @@ static int valleyview_set_rps(struct drm_i915_private *dev_priv, u8 val) if (err) return err; - if (!IS_CHERRYVIEW(dev_priv)) - gen6_set_rps_thresholds(dev_priv, val); + gen6_set_rps_thresholds(dev_priv, val); } dev_priv->rps.cur_freq = val; -- cgit From 17136d548e1b5ecd947da996f125907a671313d9 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Fri, 10 Feb 2017 15:03:47 +0000 Subject: drm/i915: Don't accidentally increase the frequency in handling DOWN rps If we receive a DOWN_TIMEOUT rps interrupt, we respond by reducing the GPU clocks significantly. Before we do, double check that the frequency we pick is actually a decrease. Signed-off-by: Chris Wilson Link: http://patchwork.freedesktop.org/patch/msgid/20170210150348.22146-2-chris@chris-wilson.co.uk Reviewed-by: Radoslaw Szwichtenberg --- drivers/gpu/drm/i915/i915_irq.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/i915/i915_irq.c b/drivers/gpu/drm/i915/i915_irq.c index 27c13193f7a2..86cca37a4e4f 100644 --- a/drivers/gpu/drm/i915/i915_irq.c +++ b/drivers/gpu/drm/i915/i915_irq.c @@ -1186,7 +1186,7 @@ static void gen6_pm_rps_work(struct work_struct *work) } else if (pm_iir & GEN6_PM_RP_DOWN_TIMEOUT) { if (dev_priv->rps.cur_freq > dev_priv->rps.efficient_freq) new_delay = dev_priv->rps.efficient_freq; - else + else if (dev_priv->rps.cur_freq > dev_priv->rps.min_freq_softlimit) new_delay = dev_priv->rps.min_freq_softlimit; adj = 0; } else if (pm_iir & GEN6_PM_RP_DOWN_THRESHOLD) { -- cgit From bd64818dfe3b19ecdcf47a7b0726c4b59ff4fb5e Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Fri, 10 Feb 2017 15:03:48 +0000 Subject: drm/i915: Only apply the jump to the "efficient RPS" frequency on startup Currently we apply the jump to rpe if we are below it and the GPU needs more power. For some GPUs, the rpe is 75% of the maximum range causing us to dramatically overshoot low power applications *and* unable to reach the low frequency that can most efficiently deliver their workload. Signed-off-by: Chris Wilson Link: http://patchwork.freedesktop.org/patch/msgid/20170210150348.22146-3-chris@chris-wilson.co.uk Reviewed-by: Radoslaw Szwichtenberg --- drivers/gpu/drm/i915/i915_irq.c | 8 -------- drivers/gpu/drm/i915/intel_pm.c | 11 +++++++++-- 2 files changed, 9 insertions(+), 10 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_irq.c b/drivers/gpu/drm/i915/i915_irq.c index 86cca37a4e4f..84fda3fce4b8 100644 --- a/drivers/gpu/drm/i915/i915_irq.c +++ b/drivers/gpu/drm/i915/i915_irq.c @@ -1173,14 +1173,6 @@ static void gen6_pm_rps_work(struct work_struct *work) if (new_delay >= dev_priv->rps.max_freq_softlimit) adj = 0; - /* - * For better performance, jump directly - * to RPe if we're below it. - */ - if (new_delay < dev_priv->rps.efficient_freq - adj) { - new_delay = dev_priv->rps.efficient_freq; - adj = 0; - } } else if (client_boost || any_waiters(dev_priv)) { adj = 0; } else if (pm_iir & GEN6_PM_RP_DOWN_TIMEOUT) { diff --git a/drivers/gpu/drm/i915/intel_pm.c b/drivers/gpu/drm/i915/intel_pm.c index e801b5d26b90..3d311e100643 100644 --- a/drivers/gpu/drm/i915/intel_pm.c +++ b/drivers/gpu/drm/i915/intel_pm.c @@ -5019,6 +5019,8 @@ void gen6_rps_busy(struct drm_i915_private *dev_priv) { mutex_lock(&dev_priv->rps.hw_lock); if (dev_priv->rps.enabled) { + u8 freq; + if (dev_priv->pm_rps_events & (GEN6_PM_RP_DOWN_EI_EXPIRED | GEN6_PM_RP_UP_EI_EXPIRED)) gen6_rps_reset_ei(dev_priv); I915_WRITE(GEN6_PMINTRMSK, @@ -5026,9 +5028,14 @@ void gen6_rps_busy(struct drm_i915_private *dev_priv) gen6_enable_rps_interrupts(dev_priv); - /* Ensure we start at the user's desired frequency */ + /* Use the user's desired frequency as a guide, but for better + * performance, jump directly to RPe as our starting frequency. + */ + freq = max(dev_priv->rps.cur_freq, + dev_priv->rps.efficient_freq); + if (intel_set_rps(dev_priv, - clamp(dev_priv->rps.cur_freq, + clamp(freq, dev_priv->rps.min_freq_softlimit, dev_priv->rps.max_freq_softlimit))) DRM_DEBUG_DRIVER("Failed to set idle frequency\n"); -- cgit From 4ac9659ef9f110a8f651e5a75c24e1ac439b44ff Mon Sep 17 00:00:00 2001 From: Tvrtko Ursulin Date: Tue, 14 Feb 2017 15:00:17 +0000 Subject: drm/i915: Remove duplicate intel_logical_ring_workarounds_emit intel_ring_workarounds_emit is exactly the same code. Signed-off-by: Tvrtko Ursulin Reviewed-by: Mika Kuoppala Reviewed-by: Chris Wilson Link: http://patchwork.freedesktop.org/patch/msgid/20170214150017.16058-1-tvrtko.ursulin@linux.intel.com --- drivers/gpu/drm/i915/intel_lrc.c | 35 +-------------------------------- drivers/gpu/drm/i915/intel_ringbuffer.c | 2 +- drivers/gpu/drm/i915/intel_ringbuffer.h | 1 + 3 files changed, 3 insertions(+), 35 deletions(-) diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c index f567d4b08863..ee431d39ce06 100644 --- a/drivers/gpu/drm/i915/intel_lrc.c +++ b/drivers/gpu/drm/i915/intel_lrc.c @@ -890,39 +890,6 @@ err: return ret; } -static int intel_logical_ring_workarounds_emit(struct drm_i915_gem_request *req) -{ - struct i915_workarounds *w = &req->i915->workarounds; - u32 *cs; - int ret, i; - - if (w->count == 0) - return 0; - - ret = req->engine->emit_flush(req, EMIT_BARRIER); - if (ret) - return ret; - - cs = intel_ring_begin(req, w->count * 2 + 2); - if (IS_ERR(cs)) - return PTR_ERR(cs); - - *cs++ = MI_LOAD_REGISTER_IMM(w->count); - for (i = 0; i < w->count; i++) { - *cs++ = i915_mmio_reg_offset(w->reg[i].addr); - *cs++ = w->reg[i].value; - } - *cs++ = MI_NOOP; - - intel_ring_advance(req, cs); - - ret = req->engine->emit_flush(req, EMIT_BARRIER); - if (ret) - return ret; - - return 0; -} - #define wa_ctx_emit(batch, index, cmd) \ do { \ int __index = (index)++; \ @@ -1672,7 +1639,7 @@ static int gen8_init_rcs_context(struct drm_i915_gem_request *req) { int ret; - ret = intel_logical_ring_workarounds_emit(req); + ret = intel_ring_workarounds_emit(req); if (ret) return ret; diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c b/drivers/gpu/drm/i915/intel_ringbuffer.c index 95906c45289b..f7499829ecc2 100644 --- a/drivers/gpu/drm/i915/intel_ringbuffer.c +++ b/drivers/gpu/drm/i915/intel_ringbuffer.c @@ -644,7 +644,7 @@ static void reset_ring_common(struct intel_engine_cs *engine, } } -static int intel_ring_workarounds_emit(struct drm_i915_gem_request *req) +int intel_ring_workarounds_emit(struct drm_i915_gem_request *req) { struct i915_workarounds *w = &req->i915->workarounds; u32 *cs; diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.h b/drivers/gpu/drm/i915/intel_ringbuffer.h index 4350713dbc58..16714096810d 100644 --- a/drivers/gpu/drm/i915/intel_ringbuffer.h +++ b/drivers/gpu/drm/i915/intel_ringbuffer.h @@ -560,6 +560,7 @@ static inline u32 intel_engine_last_submit(struct intel_engine_cs *engine) } int init_workarounds_ring(struct intel_engine_cs *engine); +int intel_ring_workarounds_emit(struct drm_i915_gem_request *req); void intel_engine_get_instdone(struct intel_engine_cs *engine, struct intel_instdone *instdone); -- cgit From dfb65e71ea2c1d97ac373cc0587dc60b3307581a Mon Sep 17 00:00:00 2001 From: Hans de Goede Date: Tue, 14 Feb 2017 18:12:38 +0200 Subject: drm/i915: Fix not finding the VBT when it overlaps with OPREGION_ASLE_EXT If there is no OPREGION_ASLE_EXT then a VBT stored in mailbox #4 may use the ASLE_EXT parts of the opregion. Adjust the vbt_size calculation for a vbt in mailbox #4 for this. This fixes the driver not finding the VBT on a jumper ezpad mini3 cherrytrail tablet and on a ACER SW5_017 machine. Cc: stable@vger.kernel.org Signed-off-by: Hans de Goede Signed-off-by: Jani Nikula Link: http://patchwork.freedesktop.org/patch/msgid/1487088758-30050-1-git-send-email-jani.nikula@intel.com --- drivers/gpu/drm/i915/intel_opregion.c | 13 ++++++++++++- 1 file changed, 12 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/i915/intel_opregion.c b/drivers/gpu/drm/i915/intel_opregion.c index f4429f67a4e3..4a862a358c70 100644 --- a/drivers/gpu/drm/i915/intel_opregion.c +++ b/drivers/gpu/drm/i915/intel_opregion.c @@ -982,7 +982,18 @@ int intel_opregion_setup(struct drm_i915_private *dev_priv) opregion->vbt_size = vbt_size; } else { vbt = base + OPREGION_VBT_OFFSET; - vbt_size = OPREGION_ASLE_EXT_OFFSET - OPREGION_VBT_OFFSET; + /* + * The VBT specification says that if the ASLE ext + * mailbox is not used its area is reserved, but + * on some CHT boards the VBT extends into the + * ASLE ext area. Allow this even though it is + * against the spec, so we do not end up rejecting + * the VBT on those boards (and end up not finding the + * LCD panel because of this). + */ + vbt_size = (mboxes & MBOX_ASLE_EXT) ? + OPREGION_ASLE_EXT_OFFSET : OPREGION_SIZE; + vbt_size -= OPREGION_VBT_OFFSET; if (intel_bios_is_valid_vbt(vbt, vbt_size)) { DRM_DEBUG_KMS("Found valid VBT in ACPI OpRegion (Mailbox #4)\n"); opregion->vbt = vbt; -- cgit From ba7a5741b9ee6e9f682f949c8db6a96201ab4ca1 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Wed, 15 Feb 2017 08:43:35 +0000 Subject: drm/i915: Micro-optimise i915_get_ggtt_vma_pages() The predominant VMA class is normal GTT, so allow gcc to emphasize that path and avoid unnecessary stack movement. Signed-off-by: Chris Wilson Reviewed-by: Mika Kuoppala Link: http://patchwork.freedesktop.org/patch/msgid/20170215084357.19977-1-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/i915_gem_gtt.c | 61 +++++++++++++++++++------------------ 1 file changed, 32 insertions(+), 29 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c b/drivers/gpu/drm/i915/i915_gem_gtt.c index cb5ea5d38f11..ae04b6ea1cc3 100644 --- a/drivers/gpu/drm/i915/i915_gem_gtt.c +++ b/drivers/gpu/drm/i915/i915_gem_gtt.c @@ -2621,14 +2621,16 @@ static int ggtt_bind_vma(struct i915_vma *vma, { struct drm_i915_private *i915 = vma->vm->i915; struct drm_i915_gem_object *obj = vma->obj; - u32 pte_flags = 0; - int ret; + u32 pte_flags; - ret = i915_get_ggtt_vma_pages(vma); - if (ret) - return ret; + if (unlikely(!vma->pages)) { + int ret = i915_get_ggtt_vma_pages(vma); + if (ret) + return ret; + } /* Currently applicable only to VLV */ + pte_flags = 0; if (obj->gt_ro) pte_flags |= PTE_READ_ONLY; @@ -2653,18 +2655,18 @@ static int aliasing_gtt_bind_vma(struct i915_vma *vma, { struct drm_i915_private *i915 = vma->vm->i915; u32 pte_flags; - int ret; - ret = i915_get_ggtt_vma_pages(vma); - if (ret) - return ret; + if (unlikely(!vma->pages)) { + int ret = i915_get_ggtt_vma_pages(vma); + if (ret) + return ret; + } /* Currently applicable only to VLV */ pte_flags = 0; if (vma->obj->gt_ro) pte_flags |= PTE_READ_ONLY; - if (flags & I915_VMA_GLOBAL_BIND) { intel_runtime_pm_get(i915); vma->vm->insert_entries(vma->vm, @@ -3430,9 +3432,9 @@ rotate_pages(const dma_addr_t *in, unsigned int offset, return sg; } -static struct sg_table * -intel_rotate_fb_obj_pages(const struct intel_rotation_info *rot_info, - struct drm_i915_gem_object *obj) +static noinline struct sg_table * +intel_rotate_pages(struct intel_rotation_info *rot_info, + struct drm_i915_gem_object *obj) { const size_t n_pages = obj->base.size / PAGE_SIZE; unsigned int size = intel_rotation_info_size(rot_info); @@ -3493,7 +3495,7 @@ err_st_alloc: return ERR_PTR(ret); } -static struct sg_table * +static noinline struct sg_table * intel_partial_pages(const struct i915_ggtt_view *view, struct drm_i915_gem_object *obj) { @@ -3547,7 +3549,7 @@ err_st_alloc: static int i915_get_ggtt_vma_pages(struct i915_vma *vma) { - int ret = 0; + int ret; /* The vma->pages are only valid within the lifespan of the borrowed * obj->mm.pages. When the obj->mm.pages sg_table is regenerated, so @@ -3556,32 +3558,33 @@ i915_get_ggtt_vma_pages(struct i915_vma *vma) */ GEM_BUG_ON(!i915_gem_object_has_pinned_pages(vma->obj)); - if (vma->pages) + switch (vma->ggtt_view.type) { + case I915_GGTT_VIEW_NORMAL: + vma->pages = vma->obj->mm.pages; return 0; - if (vma->ggtt_view.type == I915_GGTT_VIEW_NORMAL) - vma->pages = vma->obj->mm.pages; - else if (vma->ggtt_view.type == I915_GGTT_VIEW_ROTATED) + case I915_GGTT_VIEW_ROTATED: vma->pages = - intel_rotate_fb_obj_pages(&vma->ggtt_view.rotated, - vma->obj); - else if (vma->ggtt_view.type == I915_GGTT_VIEW_PARTIAL) + intel_rotate_pages(&vma->ggtt_view.rotated, vma->obj); + break; + + case I915_GGTT_VIEW_PARTIAL: vma->pages = intel_partial_pages(&vma->ggtt_view, vma->obj); - else + break; + + default: WARN_ONCE(1, "GGTT view %u not implemented!\n", vma->ggtt_view.type); + return -EINVAL; + } - if (!vma->pages) { - DRM_ERROR("Failed to get pages for GGTT view type %u!\n", - vma->ggtt_view.type); - ret = -EINVAL; - } else if (IS_ERR(vma->pages)) { + ret = 0; + if (unlikely(IS_ERR(vma->pages))) { ret = PTR_ERR(vma->pages); vma->pages = NULL; DRM_ERROR("Failed to get pages for VMA view type %u (%d)!\n", vma->ggtt_view.type, ret); } - return ret; } -- cgit From b31144c0daa8c570d3fb60ddbd0e26ba1ea62485 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Wed, 15 Feb 2017 08:43:36 +0000 Subject: drm/i915: Micro-optimise gen6_ppgtt_insert_entries() Inline the address computation to avoid the vfunc call for every page. We still have to pay the high overhead of sg_page_iter_next(), but now at least GCC can optimise the inner most loop, giving a significant boost to some thrashing Unreal Engine workloads. Signed-off-by: Chris Wilson Reviewed-by: Matthew Auld Link: http://patchwork.freedesktop.org/patch/msgid/20170215084357.19977-2-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/i915_gem_gtt.c | 68 ++++++++++++++++++------------------- 1 file changed, 33 insertions(+), 35 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c b/drivers/gpu/drm/i915/i915_gem_gtt.c index ae04b6ea1cc3..b33ff11c1345 100644 --- a/drivers/gpu/drm/i915/i915_gem_gtt.c +++ b/drivers/gpu/drm/i915/i915_gem_gtt.c @@ -1887,6 +1887,11 @@ static void gen6_ppgtt_clear_range(struct i915_address_space *vm, } } +struct sgt_dma { + struct scatterlist *sg; + dma_addr_t dma, max; +}; + static void gen6_ppgtt_insert_entries(struct i915_address_space *vm, struct sg_table *pages, uint64_t start, @@ -1896,27 +1901,34 @@ static void gen6_ppgtt_insert_entries(struct i915_address_space *vm, unsigned first_entry = start >> PAGE_SHIFT; unsigned act_pt = first_entry / GEN6_PTES; unsigned act_pte = first_entry % GEN6_PTES; - gen6_pte_t *pt_vaddr = NULL; - struct sgt_iter sgt_iter; - dma_addr_t addr; + const u32 pte_encode = vm->pte_encode(0, cache_level, flags); + struct sgt_dma iter; + gen6_pte_t *vaddr; + + vaddr = kmap_px(ppgtt->pd.page_table[act_pt]); + iter.sg = pages->sgl; + iter.dma = sg_dma_address(iter.sg); + iter.max = iter.dma + iter.sg->length; + do { + vaddr[act_pte] = pte_encode | GEN6_PTE_ADDR_ENCODE(iter.dma); - for_each_sgt_dma(addr, sgt_iter, pages) { - if (pt_vaddr == NULL) - pt_vaddr = kmap_px(ppgtt->pd.page_table[act_pt]); + iter.dma += PAGE_SIZE; + if (iter.dma == iter.max) { + iter.sg = __sg_next(iter.sg); + if (!iter.sg) + break; - pt_vaddr[act_pte] = - vm->pte_encode(addr, cache_level, flags); + iter.dma = sg_dma_address(iter.sg); + iter.max = iter.dma + iter.sg->length; + } if (++act_pte == GEN6_PTES) { - kunmap_px(ppgtt, pt_vaddr); - pt_vaddr = NULL; - act_pt++; + kunmap_px(ppgtt, vaddr); + vaddr = kmap_px(ppgtt->pd.page_table[++act_pt]); act_pte = 0; } - } - - if (pt_vaddr) - kunmap_px(ppgtt, pt_vaddr); + } while (1); + kunmap_px(ppgtt, vaddr); } static int gen6_alloc_va_range(struct i915_address_space *vm, @@ -2502,27 +2514,13 @@ static void gen6_ggtt_insert_entries(struct i915_address_space *vm, enum i915_cache_level level, u32 flags) { struct i915_ggtt *ggtt = i915_vm_to_ggtt(vm); - struct sgt_iter sgt_iter; - gen6_pte_t __iomem *gtt_entries; - gen6_pte_t gtt_entry; + gen6_pte_t __iomem *entries = (gen6_pte_t __iomem *)ggtt->gsm; + unsigned int i = start >> PAGE_SHIFT; + struct sgt_iter iter; dma_addr_t addr; - int i = 0; - - gtt_entries = (gen6_pte_t __iomem *)ggtt->gsm + (start >> PAGE_SHIFT); - - for_each_sgt_dma(addr, sgt_iter, st) { - gtt_entry = vm->pte_encode(addr, level, flags); - iowrite32(gtt_entry, >t_entries[i++]); - } - - /* XXX: This serves as a posting read to make sure that the PTE has - * actually been updated. There is some concern that even though - * registers and PTEs are within the same BAR that they are potentially - * of NUMA access patterns. Therefore, even with the way we assume - * hardware should work, we must keep this posting read for paranoia. - */ - if (i != 0) - WARN_ON(readl(>t_entries[i-1]) != gtt_entry); + for_each_sgt_dma(addr, iter, st) + iowrite32(vm->pte_encode(addr, level, flags), &entries[i++]); + wmb(); /* This next bit makes the above posting read even more important. We * want to flush the TLBs only after we're certain all the PTE updates -- cgit From 894ccebee2b0e606ba9638d20dd87b33568482d7 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Wed, 15 Feb 2017 08:43:37 +0000 Subject: drm/i915: Micro-optimise gen8_ppgtt_insert_entries() Improve the sg iteration and in hte process eliminate a bug in miscomputing the pml4 length as orig_nents< Reviewed-by: Mika Kuoppala Link: http://patchwork.freedesktop.org/patch/msgid/20170215084357.19977-3-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/i915_gem_gtt.c | 172 +++++++++++++++++++----------------- 1 file changed, 93 insertions(+), 79 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c b/drivers/gpu/drm/i915/i915_gem_gtt.c index b33ff11c1345..f40f9365782f 100644 --- a/drivers/gpu/drm/i915/i915_gem_gtt.c +++ b/drivers/gpu/drm/i915/i915_gem_gtt.c @@ -750,9 +750,9 @@ static bool gen8_ppgtt_clear_pt(struct i915_address_space *vm, unsigned int num_entries = gen8_pte_count(start, length); unsigned int pte = gen8_pte_index(start); unsigned int pte_end = pte + num_entries; - gen8_pte_t *pt_vaddr; - gen8_pte_t scratch_pte = gen8_pte_encode(vm->scratch_page.daddr, - I915_CACHE_LLC); + const gen8_pte_t scratch_pte = + gen8_pte_encode(vm->scratch_page.daddr, I915_CACHE_LLC); + gen8_pte_t *vaddr; if (WARN_ON(!px_page(pt))) return false; @@ -765,12 +765,10 @@ static bool gen8_ppgtt_clear_pt(struct i915_address_space *vm, return true; } - pt_vaddr = kmap_px(pt); - + vaddr = kmap_px(pt); while (pte < pte_end) - pt_vaddr[pte++] = scratch_pte; - - kunmap_px(ppgtt, pt_vaddr); + vaddr[pte++] = scratch_pte; + kunmap_px(ppgtt, vaddr); return false; } @@ -878,71 +876,100 @@ static void gen8_ppgtt_clear_range(struct i915_address_space *vm, gen8_ppgtt_clear_pdp(vm, &ppgtt->pdp, start, length); } -static void -gen8_ppgtt_insert_pte_entries(struct i915_address_space *vm, +struct sgt_dma { + struct scatterlist *sg; + dma_addr_t dma, max; +}; + +static __always_inline bool +gen8_ppgtt_insert_pte_entries(struct i915_hw_ppgtt *ppgtt, struct i915_page_directory_pointer *pdp, - struct sg_page_iter *sg_iter, - uint64_t start, + struct sgt_dma *iter, + u64 start, enum i915_cache_level cache_level) { - struct i915_hw_ppgtt *ppgtt = i915_vm_to_ppgtt(vm); - gen8_pte_t *pt_vaddr; - unsigned pdpe = gen8_pdpe_index(start); - unsigned pde = gen8_pde_index(start); - unsigned pte = gen8_pte_index(start); + unsigned int pdpe = gen8_pdpe_index(start); + unsigned int pde = gen8_pde_index(start); + unsigned int pte = gen8_pte_index(start); + struct i915_page_directory *pd; + const gen8_pte_t pte_encode = gen8_pte_encode(0, cache_level); + gen8_pte_t *vaddr; + bool ret; - pt_vaddr = NULL; + pd = pdp->page_directory[pdpe]; + vaddr = kmap_px(pd->page_table[pde]); + do { + vaddr[pte] = pte_encode | iter->dma; + iter->dma += PAGE_SIZE; + if (iter->dma >= iter->max) { + iter->sg = __sg_next(iter->sg); + if (!iter->sg) { + ret = false; + break; + } - while (__sg_page_iter_next(sg_iter)) { - if (pt_vaddr == NULL) { - struct i915_page_directory *pd = pdp->page_directory[pdpe]; - struct i915_page_table *pt = pd->page_table[pde]; - pt_vaddr = kmap_px(pt); + iter->dma = sg_dma_address(iter->sg); + iter->max = iter->dma + iter->sg->length; } - pt_vaddr[pte] = - gen8_pte_encode(sg_page_iter_dma_address(sg_iter), - cache_level); if (++pte == GEN8_PTES) { - kunmap_px(ppgtt, pt_vaddr); - pt_vaddr = NULL; if (++pde == I915_PDES) { - if (++pdpe == I915_PDPES_PER_PDP(vm->i915)) + /* Limited by sg length for 3lvl */ + if (++pdpe == GEN8_PML4ES_PER_PML4) { + ret = true; break; + } + + GEM_BUG_ON(pdpe > GEN8_LEGACY_PDPES); + pd = pdp->page_directory[pdpe]; pde = 0; } + + kunmap_px(ppgtt, vaddr); + vaddr = kmap_px(pd->page_table[pde]); pte = 0; } - } + } while (1); + kunmap_px(ppgtt, vaddr); - if (pt_vaddr) - kunmap_px(ppgtt, pt_vaddr); + return ret; } -static void gen8_ppgtt_insert_entries(struct i915_address_space *vm, - struct sg_table *pages, - uint64_t start, - enum i915_cache_level cache_level, - u32 unused) +static void gen8_ppgtt_insert_3lvl(struct i915_address_space *vm, + struct sg_table *pages, + u64 start, + enum i915_cache_level cache_level, + u32 unused) { struct i915_hw_ppgtt *ppgtt = i915_vm_to_ppgtt(vm); - struct sg_page_iter sg_iter; + struct sgt_dma iter = { + .sg = pages->sgl, + .dma = sg_dma_address(iter.sg), + .max = iter.dma + iter.sg->length, + }; - __sg_page_iter_start(&sg_iter, pages->sgl, sg_nents(pages->sgl), 0); + gen8_ppgtt_insert_pte_entries(ppgtt, &ppgtt->pdp, &iter, + start, cache_level); +} - if (!USES_FULL_48BIT_PPGTT(vm->i915)) { - gen8_ppgtt_insert_pte_entries(vm, &ppgtt->pdp, &sg_iter, start, - cache_level); - } else { - struct i915_page_directory_pointer *pdp; - uint64_t pml4e; - uint64_t length = (uint64_t)pages->orig_nents << PAGE_SHIFT; +static void gen8_ppgtt_insert_4lvl(struct i915_address_space *vm, + struct sg_table *pages, + uint64_t start, + enum i915_cache_level cache_level, + u32 unused) +{ + struct i915_hw_ppgtt *ppgtt = i915_vm_to_ppgtt(vm); + struct sgt_dma iter = { + .sg = pages->sgl, + .dma = sg_dma_address(iter.sg), + .max = iter.dma + iter.sg->length, + }; + struct i915_page_directory_pointer **pdps = ppgtt->pml4.pdps; + unsigned int pml4e = gen8_pml4e_index(start); - gen8_for_each_pml4e(pdp, &ppgtt->pml4, start, length, pml4e) { - gen8_ppgtt_insert_pte_entries(vm, pdp, &sg_iter, - start, cache_level); - } - } + while (gen8_ppgtt_insert_pte_entries(ppgtt, pdps[pml4e++], &iter, + start, cache_level)) + ; } static void gen8_free_page_tables(struct drm_i915_private *dev_priv, @@ -1524,8 +1551,8 @@ static void gen8_dump_ppgtt(struct i915_hw_ppgtt *ppgtt, struct seq_file *m) struct i915_address_space *vm = &ppgtt->base; uint64_t start = ppgtt->base.start; uint64_t length = ppgtt->base.total; - gen8_pte_t scratch_pte = gen8_pte_encode(vm->scratch_page.daddr, - I915_CACHE_LLC); + const gen8_pte_t scratch_pte = + gen8_pte_encode(vm->scratch_page.daddr, I915_CACHE_LLC); if (!USES_FULL_48BIT_PPGTT(vm->i915)) { gen8_dump_pdp(&ppgtt->pdp, start, length, scratch_pte, m); @@ -1590,7 +1617,6 @@ static int gen8_ppgtt_init(struct i915_hw_ppgtt *ppgtt) ppgtt->base.start = 0; ppgtt->base.cleanup = gen8_ppgtt_cleanup; ppgtt->base.allocate_va_range = gen8_alloc_va_range; - ppgtt->base.insert_entries = gen8_ppgtt_insert_entries; ppgtt->base.clear_range = gen8_ppgtt_clear_range; ppgtt->base.unbind_vma = ppgtt_unbind_vma; ppgtt->base.bind_vma = ppgtt_bind_vma; @@ -1605,6 +1631,8 @@ static int gen8_ppgtt_init(struct i915_hw_ppgtt *ppgtt) ppgtt->base.total = 1ULL << 48; ppgtt->switch_mm = gen8_48b_mm_switch; + + ppgtt->base.insert_entries = gen8_ppgtt_insert_4lvl; } else { ret = __pdp_init(dev_priv, &ppgtt->pdp); if (ret) @@ -1621,6 +1649,8 @@ static int gen8_ppgtt_init(struct i915_hw_ppgtt *ppgtt) if (ret) goto free_scratch; } + + ppgtt->base.insert_entries = gen8_ppgtt_insert_3lvl; } if (intel_vgpu_active(dev_priv)) @@ -1887,11 +1917,6 @@ static void gen6_ppgtt_clear_range(struct i915_address_space *vm, } } -struct sgt_dma { - struct scatterlist *sg; - dma_addr_t dma, max; -}; - static void gen6_ppgtt_insert_entries(struct i915_address_space *vm, struct sg_table *pages, uint64_t start, @@ -2433,26 +2458,15 @@ static void gen8_ggtt_insert_entries(struct i915_address_space *vm, struct i915_ggtt *ggtt = i915_vm_to_ggtt(vm); struct sgt_iter sgt_iter; gen8_pte_t __iomem *gtt_entries; - gen8_pte_t gtt_entry; + const gen8_pte_t pte_encode = gen8_pte_encode(0, level); dma_addr_t addr; - int i = 0; - gtt_entries = (gen8_pte_t __iomem *)ggtt->gsm + (start >> PAGE_SHIFT); + gtt_entries = (gen8_pte_t __iomem *)ggtt->gsm; + gtt_entries += start >> PAGE_SHIFT; + for_each_sgt_dma(addr, sgt_iter, st) + gen8_set_pte(gtt_entries++, pte_encode | addr); - for_each_sgt_dma(addr, sgt_iter, st) { - gtt_entry = gen8_pte_encode(addr, level); - gen8_set_pte(>t_entries[i++], gtt_entry); - } - - /* - * XXX: This serves as a posting read to make sure that the PTE has - * actually been updated. There is some concern that even though - * registers and PTEs are within the same BAR that they are potentially - * of NUMA access patterns. Therefore, even with the way we assume - * hardware should work, we must keep this posting read for paranoia. - */ - if (i != 0) - WARN_ON(readq(>t_entries[i-1]) != gtt_entry); + wmb(); /* This next bit makes the above posting read even more important. We * want to flush the TLBs only after we're certain all the PTE updates @@ -2540,7 +2554,9 @@ static void gen8_ggtt_clear_range(struct i915_address_space *vm, struct i915_ggtt *ggtt = i915_vm_to_ggtt(vm); unsigned first_entry = start >> PAGE_SHIFT; unsigned num_entries = length >> PAGE_SHIFT; - gen8_pte_t scratch_pte, __iomem *gtt_base = + const gen8_pte_t scratch_pte = + gen8_pte_encode(vm->scratch_page.daddr, I915_CACHE_LLC); + gen8_pte_t __iomem *gtt_base = (gen8_pte_t __iomem *)ggtt->gsm + first_entry; const int max_entries = ggtt_total_entries(ggtt) - first_entry; int i; @@ -2550,8 +2566,6 @@ static void gen8_ggtt_clear_range(struct i915_address_space *vm, first_entry, num_entries, max_entries)) num_entries = max_entries; - scratch_pte = gen8_pte_encode(vm->scratch_page.daddr, - I915_CACHE_LLC); for (i = 0; i < num_entries; i++) gen8_set_pte(>t_base[i], scratch_pte); readl(gtt_base); -- cgit From 1188bc66eb33e64ac7452b5acd62ce0395204148 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Wed, 15 Feb 2017 08:43:38 +0000 Subject: drm/i915: Don't special case teardown of aliasing_ppgtt The aliasing_ppgtt is a regular ppgtt, and we can use the regular i915_ppgtt_put() to properly tear it down. Signed-off-by: Chris Wilson Reviewed-by: Matthew Auld Link: http://patchwork.freedesktop.org/patch/msgid/20170215084357.19977-4-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/i915_gem_gtt.c | 50 +++++++++++-------------------------- 1 file changed, 15 insertions(+), 35 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c b/drivers/gpu/drm/i915/i915_gem_gtt.c index f40f9365782f..424a690bc17f 100644 --- a/drivers/gpu/drm/i915/i915_gem_gtt.c +++ b/drivers/gpu/drm/i915/i915_gem_gtt.c @@ -2238,23 +2238,6 @@ static void gtt_write_workarounds(struct drm_i915_private *dev_priv) I915_WRITE(GEN8_L3_LRA_1_GPGPU, GEN9_L3_LRA_1_GPGPU_DEFAULT_VALUE_BXT); } -static int i915_ppgtt_init(struct i915_hw_ppgtt *ppgtt, - struct drm_i915_private *dev_priv, - struct drm_i915_file_private *file_priv, - const char *name) -{ - int ret; - - ret = __hw_ppgtt_init(ppgtt, dev_priv); - if (ret == 0) { - kref_init(&ppgtt->ref); - i915_address_space_init(&ppgtt->base, dev_priv, name); - ppgtt->base.file = file_priv; - } - - return ret; -} - int i915_ppgtt_init_hw(struct drm_i915_private *dev_priv) { gtt_write_workarounds(dev_priv); @@ -2292,12 +2275,16 @@ i915_ppgtt_create(struct drm_i915_private *dev_priv, if (!ppgtt) return ERR_PTR(-ENOMEM); - ret = i915_ppgtt_init(ppgtt, dev_priv, fpriv, name); + ret = __hw_ppgtt_init(ppgtt, dev_priv); if (ret) { kfree(ppgtt); return ERR_PTR(ret); } + kref_init(&ppgtt->ref); + i915_address_space_init(&ppgtt->base, dev_priv, name); + ppgtt->base.file = fpriv; + trace_i915_ppgtt_create(&ppgtt->base); return ppgtt; @@ -2757,19 +2744,15 @@ int i915_gem_init_aliasing_ppgtt(struct drm_i915_private *i915) struct i915_hw_ppgtt *ppgtt; int err; - ppgtt = kzalloc(sizeof(*ppgtt), GFP_KERNEL); - if (!ppgtt) - return -ENOMEM; - - err = __hw_ppgtt_init(ppgtt, i915); - if (err) - goto err_ppgtt; + ppgtt = i915_ppgtt_create(i915, NULL, "[alias]"); + if (IS_ERR(ppgtt)) + return PTR_ERR(ppgtt); if (ppgtt->base.allocate_va_range) { err = ppgtt->base.allocate_va_range(&ppgtt->base, 0, ppgtt->base.total); if (err) - goto err_ppgtt_cleanup; + goto err_ppgtt; } ppgtt->base.clear_range(&ppgtt->base, @@ -2782,10 +2765,8 @@ int i915_gem_init_aliasing_ppgtt(struct drm_i915_private *i915) return 0; -err_ppgtt_cleanup: - ppgtt->base.cleanup(&ppgtt->base); err_ppgtt: - kfree(ppgtt); + i915_ppgtt_put(ppgtt); return err; } @@ -2798,8 +2779,7 @@ void i915_gem_fini_aliasing_ppgtt(struct drm_i915_private *i915) if (!ppgtt) return; - ppgtt->base.cleanup(&ppgtt->base); - kfree(ppgtt); + i915_ppgtt_put(ppgtt); ggtt->base.bind_vma = ggtt_bind_vma; } @@ -2874,21 +2854,21 @@ void i915_ggtt_cleanup_hw(struct drm_i915_private *dev_priv) WARN_ON(i915_vma_unbind(vma)); mutex_unlock(&dev_priv->drm.struct_mutex); - i915_gem_fini_aliasing_ppgtt(dev_priv); i915_gem_cleanup_stolen(&dev_priv->drm); + mutex_lock(&dev_priv->drm.struct_mutex); + i915_gem_fini_aliasing_ppgtt(dev_priv); + if (drm_mm_node_allocated(&ggtt->error_capture)) drm_mm_remove_node(&ggtt->error_capture); if (drm_mm_initialized(&ggtt->base.mm)) { intel_vgt_deballoon(dev_priv); - - mutex_lock(&dev_priv->drm.struct_mutex); i915_address_space_fini(&ggtt->base); - mutex_unlock(&dev_priv->drm.struct_mutex); } ggtt->base.cleanup(&ggtt->base); + mutex_unlock(&dev_priv->drm.struct_mutex); arch_phys_wc_del(ggtt->mtrr); io_mapping_fini(&ggtt->mappable); -- cgit From cbc4e9e6a6d31fcc44921d2be41104425be8ab01 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Wed, 15 Feb 2017 08:43:39 +0000 Subject: drm/i915: Split ggtt/alasing_gtt unbind_vma Similar to how we already split the bind_vma for ggtt/aliasing_gtt, also split up the unbind for symmetry. Signed-off-by: Chris Wilson Reviewed-by: Matthew Auld Link: http://patchwork.freedesktop.org/patch/msgid/20170215084357.19977-5-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/i915_gem_gtt.c | 29 +++++++++++++++++++++-------- 1 file changed, 21 insertions(+), 8 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c b/drivers/gpu/drm/i915/i915_gem_gtt.c index 424a690bc17f..efc7a6b1d6f1 100644 --- a/drivers/gpu/drm/i915/i915_gem_gtt.c +++ b/drivers/gpu/drm/i915/i915_gem_gtt.c @@ -2648,6 +2648,15 @@ static int ggtt_bind_vma(struct i915_vma *vma, return 0; } +static void ggtt_unbind_vma(struct i915_vma *vma) +{ + struct drm_i915_private *i915 = vma->vm->i915; + + intel_runtime_pm_get(i915); + vma->vm->clear_range(vma->vm, vma->node.start, vma->size); + intel_runtime_pm_put(i915); +} + static int aliasing_gtt_bind_vma(struct i915_vma *vma, enum i915_cache_level cache_level, u32 flags) @@ -2684,22 +2693,21 @@ static int aliasing_gtt_bind_vma(struct i915_vma *vma, return 0; } -static void ggtt_unbind_vma(struct i915_vma *vma) +static void aliasing_gtt_unbind_vma(struct i915_vma *vma) { struct drm_i915_private *i915 = vma->vm->i915; - struct i915_hw_ppgtt *appgtt = i915->mm.aliasing_ppgtt; - const u64 size = min(vma->size, vma->node.size); if (vma->flags & I915_VMA_GLOBAL_BIND) { intel_runtime_pm_get(i915); - vma->vm->clear_range(vma->vm, - vma->node.start, size); + vma->vm->clear_range(vma->vm, vma->node.start, vma->size); intel_runtime_pm_put(i915); } - if (vma->flags & I915_VMA_LOCAL_BIND && appgtt) - appgtt->base.clear_range(&appgtt->base, - vma->node.start, size); + if (vma->flags & I915_VMA_LOCAL_BIND) { + struct i915_address_space *vm = &i915->mm.aliasing_ppgtt->base; + + vm->clear_range(vm, vma->node.start, vma->size); + } } void i915_gem_gtt_finish_pages(struct drm_i915_gem_object *obj, @@ -2760,9 +2768,13 @@ int i915_gem_init_aliasing_ppgtt(struct drm_i915_private *i915) ppgtt->base.total); i915->mm.aliasing_ppgtt = ppgtt; + WARN_ON(ggtt->base.bind_vma != ggtt_bind_vma); ggtt->base.bind_vma = aliasing_gtt_bind_vma; + WARN_ON(ggtt->base.unbind_vma != ggtt_unbind_vma); + ggtt->base.unbind_vma = aliasing_gtt_unbind_vma; + return 0; err_ppgtt: @@ -2782,6 +2794,7 @@ void i915_gem_fini_aliasing_ppgtt(struct drm_i915_private *i915) i915_ppgtt_put(ppgtt); ggtt->base.bind_vma = ggtt_bind_vma; + ggtt->base.unbind_vma = ggtt_unbind_vma; } int i915_gem_init_ggtt(struct drm_i915_private *dev_priv) -- cgit From 8448661d65f6f5dbcdb9c5cba185b284f2464b65 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Wed, 15 Feb 2017 08:43:40 +0000 Subject: drm/i915: Convert clflushed pagetables over to WC maps We flush the entire page every time we update a few bytes, making the update of a page table many, many times slower than is required. If we create a WC map of the page for our updates, we can avoid the clflush but incur additional cost for creating the pagetable. We amoritize that cost by reusing page vmappings, and only changing the page protection in batches. Signed-off-by: Chris Wilson Reviewed-by: Mika Kuoppala --- drivers/gpu/drm/i915/i915_drv.h | 2 - drivers/gpu/drm/i915/i915_gem_gtt.c | 333 +++++++++++++------------- drivers/gpu/drm/i915/i915_gem_gtt.h | 8 + drivers/gpu/drm/i915/selftests/i915_gem_gtt.c | 8 +- 4 files changed, 181 insertions(+), 170 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index 992bda004f6b..f582f5492e37 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -2481,8 +2481,6 @@ struct drm_i915_private { /* Used to save the pipe-to-encoder mapping for audio */ struct intel_encoder *av_enc_map[I915_MAX_PIPES]; - I915_SELFTEST_DECLARE(struct fault_attr vm_fault); - /* * NOTE: This is the dri1/ums dungeon, don't add stuff here. Your patch * will be rejected. Instead look for a better place. diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c b/drivers/gpu/drm/i915/i915_gem_gtt.c index efc7a6b1d6f1..4eb0e114a295 100644 --- a/drivers/gpu/drm/i915/i915_gem_gtt.c +++ b/drivers/gpu/drm/i915/i915_gem_gtt.c @@ -343,46 +343,72 @@ static gen6_pte_t iris_pte_encode(dma_addr_t addr, return pte; } -static int __setup_page_dma(struct drm_i915_private *dev_priv, - struct i915_page_dma *p, gfp_t flags) +static struct page *vm_alloc_page(struct i915_address_space *vm, gfp_t gfp) { - struct device *kdev = &dev_priv->drm.pdev->dev; + struct page *page; - if (I915_SELFTEST_ONLY(should_fail(&dev_priv->vm_fault, 1))) - i915_gem_shrink_all(dev_priv); + if (I915_SELFTEST_ONLY(should_fail(&vm->fault_attr, 1))) + i915_gem_shrink_all(vm->i915); - p->page = alloc_page(flags); - if (!p->page) - return -ENOMEM; + if (vm->free_pages.nr) + return vm->free_pages.pages[--vm->free_pages.nr]; - p->daddr = dma_map_page(kdev, - p->page, 0, PAGE_SIZE, PCI_DMA_BIDIRECTIONAL); + page = alloc_page(gfp); + if (!page) + return NULL; - if (dma_mapping_error(kdev, p->daddr)) { - __free_page(p->page); - return -EINVAL; + if (vm->pt_kmap_wc) + set_pages_array_wc(&page, 1); + + return page; +} + +static void vm_free_pages_release(struct i915_address_space *vm) +{ + GEM_BUG_ON(!pagevec_count(&vm->free_pages)); + + if (vm->pt_kmap_wc) + set_pages_array_wb(vm->free_pages.pages, + pagevec_count(&vm->free_pages)); + + __pagevec_release(&vm->free_pages); +} + +static void vm_free_page(struct i915_address_space *vm, struct page *page) +{ + if (!pagevec_add(&vm->free_pages, page)) + vm_free_pages_release(vm); +} + +static int __setup_page_dma(struct i915_address_space *vm, + struct i915_page_dma *p, + gfp_t gfp) +{ + p->page = vm_alloc_page(vm, gfp | __GFP_NOWARN | __GFP_NORETRY); + if (unlikely(!p->page)) + return -ENOMEM; + + p->daddr = dma_map_page(vm->dma, p->page, 0, PAGE_SIZE, + PCI_DMA_BIDIRECTIONAL); + if (unlikely(dma_mapping_error(vm->dma, p->daddr))) { + vm_free_page(vm, p->page); + return -ENOMEM; } return 0; } -static int setup_page_dma(struct drm_i915_private *dev_priv, +static int setup_page_dma(struct i915_address_space *vm, struct i915_page_dma *p) { - return __setup_page_dma(dev_priv, p, I915_GFP_DMA); + return __setup_page_dma(vm, p, I915_GFP_DMA); } -static void cleanup_page_dma(struct drm_i915_private *dev_priv, +static void cleanup_page_dma(struct i915_address_space *vm, struct i915_page_dma *p) { - struct pci_dev *pdev = dev_priv->drm.pdev; - - if (WARN_ON(!p->page)) - return; - - dma_unmap_page(&pdev->dev, p->daddr, PAGE_SIZE, PCI_DMA_BIDIRECTIONAL); - __free_page(p->page); - memset(p, 0, sizeof(*p)); + dma_unmap_page(vm->dma, p->daddr, PAGE_SIZE, PCI_DMA_BIDIRECTIONAL); + vm_free_page(vm, p->page); } static void *kmap_page_dma(struct i915_page_dma *p) @@ -393,67 +419,54 @@ static void *kmap_page_dma(struct i915_page_dma *p) /* We use the flushing unmap only with ppgtt structures: * page directories, page tables and scratch pages. */ -static void kunmap_page_dma(struct drm_i915_private *dev_priv, void *vaddr) +static void kunmap_page_dma(void *vaddr) { - /* There are only few exceptions for gen >=6. chv and bxt. - * And we are not sure about the latter so play safe for now. - */ - if (IS_CHERRYVIEW(dev_priv) || IS_GEN9_LP(dev_priv)) - drm_clflush_virt_range(vaddr, PAGE_SIZE); - kunmap_atomic(vaddr); } #define kmap_px(px) kmap_page_dma(px_base(px)) -#define kunmap_px(ppgtt, vaddr) \ - kunmap_page_dma((ppgtt)->base.i915, (vaddr)) +#define kunmap_px(vaddr) kunmap_page_dma((vaddr)) -#define setup_px(dev_priv, px) setup_page_dma((dev_priv), px_base(px)) -#define cleanup_px(dev_priv, px) cleanup_page_dma((dev_priv), px_base(px)) -#define fill_px(dev_priv, px, v) fill_page_dma((dev_priv), px_base(px), (v)) -#define fill32_px(dev_priv, px, v) \ - fill_page_dma_32((dev_priv), px_base(px), (v)) +#define setup_px(vm, px) setup_page_dma((vm), px_base(px)) +#define cleanup_px(vm, px) cleanup_page_dma((vm), px_base(px)) +#define fill_px(ppgtt, px, v) fill_page_dma((vm), px_base(px), (v)) +#define fill32_px(ppgtt, px, v) fill_page_dma_32((vm), px_base(px), (v)) -static void fill_page_dma(struct drm_i915_private *dev_priv, - struct i915_page_dma *p, const uint64_t val) +static void fill_page_dma(struct i915_address_space *vm, + struct i915_page_dma *p, + const u64 val) { + u64 * const vaddr = kmap_page_dma(p); int i; - uint64_t * const vaddr = kmap_page_dma(p); for (i = 0; i < 512; i++) vaddr[i] = val; - kunmap_page_dma(dev_priv, vaddr); + kunmap_page_dma(vaddr); } -static void fill_page_dma_32(struct drm_i915_private *dev_priv, - struct i915_page_dma *p, const uint32_t val32) +static void fill_page_dma_32(struct i915_address_space *vm, + struct i915_page_dma *p, + const u32 v) { - uint64_t v = val32; - - v = v << 32 | val32; - - fill_page_dma(dev_priv, p, v); + fill_page_dma(vm, p, (u64)v << 32 | v); } static int -setup_scratch_page(struct drm_i915_private *dev_priv, - struct i915_page_dma *scratch, - gfp_t gfp) +setup_scratch_page(struct i915_address_space *vm, gfp_t gfp) { - return __setup_page_dma(dev_priv, scratch, gfp | __GFP_ZERO); + return __setup_page_dma(vm, &vm->scratch_page, gfp | __GFP_ZERO); } -static void cleanup_scratch_page(struct drm_i915_private *dev_priv, - struct i915_page_dma *scratch) +static void cleanup_scratch_page(struct i915_address_space *vm) { - cleanup_page_dma(dev_priv, scratch); + cleanup_page_dma(vm, &vm->scratch_page); } -static struct i915_page_table *alloc_pt(struct drm_i915_private *dev_priv) +static struct i915_page_table *alloc_pt(struct i915_address_space *vm) { struct i915_page_table *pt; - const size_t count = INTEL_GEN(dev_priv) >= 8 ? GEN8_PTES : GEN6_PTES; + const size_t count = INTEL_GEN(vm->i915) >= 8 ? GEN8_PTES : GEN6_PTES; int ret = -ENOMEM; pt = kzalloc(sizeof(*pt), GFP_KERNEL); @@ -466,7 +479,7 @@ static struct i915_page_table *alloc_pt(struct drm_i915_private *dev_priv) if (!pt->used_ptes) goto fail_bitmap; - ret = setup_px(dev_priv, pt); + ret = setup_px(vm, pt); if (ret) goto fail_page_m; @@ -480,10 +493,9 @@ fail_bitmap: return ERR_PTR(ret); } -static void free_pt(struct drm_i915_private *dev_priv, - struct i915_page_table *pt) +static void free_pt(struct i915_address_space *vm, struct i915_page_table *pt) { - cleanup_px(dev_priv, pt); + cleanup_px(vm, pt); kfree(pt->used_ptes); kfree(pt); } @@ -496,7 +508,7 @@ static void gen8_initialize_pt(struct i915_address_space *vm, scratch_pte = gen8_pte_encode(vm->scratch_page.daddr, I915_CACHE_LLC); - fill_px(vm->i915, pt, scratch_pte); + fill_px(vm, pt, scratch_pte); } static void gen6_initialize_pt(struct i915_address_space *vm, @@ -509,10 +521,10 @@ static void gen6_initialize_pt(struct i915_address_space *vm, scratch_pte = vm->pte_encode(vm->scratch_page.daddr, I915_CACHE_LLC, 0); - fill32_px(vm->i915, pt, scratch_pte); + fill32_px(vm, pt, scratch_pte); } -static struct i915_page_directory *alloc_pd(struct drm_i915_private *dev_priv) +static struct i915_page_directory *alloc_pd(struct i915_address_space *vm) { struct i915_page_directory *pd; int ret = -ENOMEM; @@ -526,7 +538,7 @@ static struct i915_page_directory *alloc_pd(struct drm_i915_private *dev_priv) if (!pd->used_pdes) goto fail_bitmap; - ret = setup_px(dev_priv, pd); + ret = setup_px(vm, pd); if (ret) goto fail_page_m; @@ -540,11 +552,11 @@ fail_bitmap: return ERR_PTR(ret); } -static void free_pd(struct drm_i915_private *dev_priv, +static void free_pd(struct i915_address_space *vm, struct i915_page_directory *pd) { if (px_page(pd)) { - cleanup_px(dev_priv, pd); + cleanup_px(vm, pd); kfree(pd->used_pdes); kfree(pd); } @@ -557,7 +569,7 @@ static void gen8_initialize_pd(struct i915_address_space *vm, scratch_pde = gen8_pde_encode(px_dma(vm->scratch_pt), I915_CACHE_LLC); - fill_px(vm->i915, pd, scratch_pde); + fill_px(vm, pd, scratch_pde); } static int __pdp_init(struct drm_i915_private *dev_priv, @@ -591,23 +603,23 @@ static void __pdp_fini(struct i915_page_directory_pointer *pdp) pdp->page_directory = NULL; } -static struct -i915_page_directory_pointer *alloc_pdp(struct drm_i915_private *dev_priv) +static struct i915_page_directory_pointer * +alloc_pdp(struct i915_address_space *vm) { struct i915_page_directory_pointer *pdp; int ret = -ENOMEM; - WARN_ON(!USES_FULL_48BIT_PPGTT(dev_priv)); + WARN_ON(!USES_FULL_48BIT_PPGTT(vm->i915)); pdp = kzalloc(sizeof(*pdp), GFP_KERNEL); if (!pdp) return ERR_PTR(-ENOMEM); - ret = __pdp_init(dev_priv, pdp); + ret = __pdp_init(vm->i915, pdp); if (ret) goto fail_bitmap; - ret = setup_px(dev_priv, pdp); + ret = setup_px(vm, pdp); if (ret) goto fail_page_m; @@ -621,12 +633,12 @@ fail_bitmap: return ERR_PTR(ret); } -static void free_pdp(struct drm_i915_private *dev_priv, +static void free_pdp(struct i915_address_space *vm, struct i915_page_directory_pointer *pdp) { __pdp_fini(pdp); - if (USES_FULL_48BIT_PPGTT(dev_priv)) { - cleanup_px(dev_priv, pdp); + if (USES_FULL_48BIT_PPGTT(vm->i915)) { + cleanup_px(vm, pdp); kfree(pdp); } } @@ -638,7 +650,7 @@ static void gen8_initialize_pdp(struct i915_address_space *vm, scratch_pdpe = gen8_pdpe_encode(px_dma(vm->scratch_pd), I915_CACHE_LLC); - fill_px(vm->i915, pdp, scratch_pdpe); + fill_px(vm, pdp, scratch_pdpe); } static void gen8_initialize_pml4(struct i915_address_space *vm, @@ -649,7 +661,7 @@ static void gen8_initialize_pml4(struct i915_address_space *vm, scratch_pml4e = gen8_pml4e_encode(px_dma(vm->scratch_pdp), I915_CACHE_LLC); - fill_px(vm->i915, pml4, scratch_pml4e); + fill_px(vm, pml4, scratch_pml4e); } static void @@ -665,20 +677,18 @@ gen8_setup_pdpe(struct i915_hw_ppgtt *ppgtt, page_directorypo = kmap_px(pdp); page_directorypo[index] = gen8_pdpe_encode(px_dma(pd), I915_CACHE_LLC); - kunmap_px(ppgtt, page_directorypo); + kunmap_px(page_directorypo); } static void -gen8_setup_pml4e(struct i915_hw_ppgtt *ppgtt, - struct i915_pml4 *pml4, +gen8_setup_pml4e(struct i915_pml4 *pml4, struct i915_page_directory_pointer *pdp, int index) { gen8_ppgtt_pml4e_t *pagemap = kmap_px(pml4); - WARN_ON(!USES_FULL_48BIT_PPGTT(to_i915(ppgtt->base.dev))); pagemap[index] = gen8_pml4e_encode(px_dma(pdp), I915_CACHE_LLC); - kunmap_px(ppgtt, pagemap); + kunmap_px(pagemap); } /* Broadwell Page Directory Pointer Descriptors */ @@ -746,7 +756,6 @@ static bool gen8_ppgtt_clear_pt(struct i915_address_space *vm, uint64_t start, uint64_t length) { - struct i915_hw_ppgtt *ppgtt = i915_vm_to_ppgtt(vm); unsigned int num_entries = gen8_pte_count(start, length); unsigned int pte = gen8_pte_index(start); unsigned int pte_end = pte + num_entries; @@ -768,7 +777,7 @@ static bool gen8_ppgtt_clear_pt(struct i915_address_space *vm, vaddr = kmap_px(pt); while (pte < pte_end) vaddr[pte++] = scratch_pte; - kunmap_px(ppgtt, vaddr); + kunmap_px(vaddr); return false; } @@ -781,7 +790,6 @@ static bool gen8_ppgtt_clear_pd(struct i915_address_space *vm, uint64_t start, uint64_t length) { - struct i915_hw_ppgtt *ppgtt = i915_vm_to_ppgtt(vm); struct i915_page_table *pt; uint64_t pde; gen8_pde_t *pde_vaddr; @@ -796,8 +804,8 @@ static bool gen8_ppgtt_clear_pd(struct i915_address_space *vm, __clear_bit(pde, pd->used_pdes); pde_vaddr = kmap_px(pd); pde_vaddr[pde] = scratch_pde; - kunmap_px(ppgtt, pde_vaddr); - free_pt(vm->i915, pt); + kunmap_px(pde_vaddr); + free_pt(vm, pt); } } @@ -826,7 +834,7 @@ static bool gen8_ppgtt_clear_pdp(struct i915_address_space *vm, if (gen8_ppgtt_clear_pd(vm, pd, start, length)) { __clear_bit(pdpe, pdp->used_pdpes); gen8_setup_pdpe(ppgtt, pdp, vm->scratch_pd, pdpe); - free_pd(vm->i915, pd); + free_pd(vm, pd); } } @@ -847,7 +855,6 @@ static void gen8_ppgtt_clear_pml4(struct i915_address_space *vm, uint64_t start, uint64_t length) { - struct i915_hw_ppgtt *ppgtt = i915_vm_to_ppgtt(vm); struct i915_page_directory_pointer *pdp; uint64_t pml4e; @@ -859,8 +866,8 @@ static void gen8_ppgtt_clear_pml4(struct i915_address_space *vm, if (gen8_ppgtt_clear_pdp(vm, pdp, start, length)) { __clear_bit(pml4e, pml4->used_pml4es); - gen8_setup_pml4e(ppgtt, pml4, vm->scratch_pdp, pml4e); - free_pdp(vm->i915, pdp); + gen8_setup_pml4e(pml4, vm->scratch_pdp, pml4e); + free_pdp(vm, pdp); } } } @@ -925,12 +932,12 @@ gen8_ppgtt_insert_pte_entries(struct i915_hw_ppgtt *ppgtt, pde = 0; } - kunmap_px(ppgtt, vaddr); + kunmap_px(vaddr); vaddr = kmap_px(pd->page_table[pde]); pte = 0; } } while (1); - kunmap_px(ppgtt, vaddr); + kunmap_px(vaddr); return ret; } @@ -972,7 +979,7 @@ static void gen8_ppgtt_insert_4lvl(struct i915_address_space *vm, ; } -static void gen8_free_page_tables(struct drm_i915_private *dev_priv, +static void gen8_free_page_tables(struct i915_address_space *vm, struct i915_page_directory *pd) { int i; @@ -984,34 +991,33 @@ static void gen8_free_page_tables(struct drm_i915_private *dev_priv, if (WARN_ON(!pd->page_table[i])) continue; - free_pt(dev_priv, pd->page_table[i]); + free_pt(vm, pd->page_table[i]); pd->page_table[i] = NULL; } } static int gen8_init_scratch(struct i915_address_space *vm) { - struct drm_i915_private *dev_priv = vm->i915; int ret; - ret = setup_scratch_page(dev_priv, &vm->scratch_page, I915_GFP_DMA); + ret = setup_scratch_page(vm, I915_GFP_DMA); if (ret) return ret; - vm->scratch_pt = alloc_pt(dev_priv); + vm->scratch_pt = alloc_pt(vm); if (IS_ERR(vm->scratch_pt)) { ret = PTR_ERR(vm->scratch_pt); goto free_scratch_page; } - vm->scratch_pd = alloc_pd(dev_priv); + vm->scratch_pd = alloc_pd(vm); if (IS_ERR(vm->scratch_pd)) { ret = PTR_ERR(vm->scratch_pd); goto free_pt; } - if (USES_FULL_48BIT_PPGTT(dev_priv)) { - vm->scratch_pdp = alloc_pdp(dev_priv); + if (USES_FULL_48BIT_PPGTT(dev)) { + vm->scratch_pdp = alloc_pdp(vm); if (IS_ERR(vm->scratch_pdp)) { ret = PTR_ERR(vm->scratch_pdp); goto free_pd; @@ -1026,11 +1032,11 @@ static int gen8_init_scratch(struct i915_address_space *vm) return 0; free_pd: - free_pd(dev_priv, vm->scratch_pd); + free_pd(vm, vm->scratch_pd); free_pt: - free_pt(dev_priv, vm->scratch_pt); + free_pt(vm, vm->scratch_pt); free_scratch_page: - cleanup_scratch_page(dev_priv, &vm->scratch_page); + cleanup_scratch_page(vm); return ret; } @@ -1068,44 +1074,41 @@ static int gen8_ppgtt_notify_vgt(struct i915_hw_ppgtt *ppgtt, bool create) static void gen8_free_scratch(struct i915_address_space *vm) { - struct drm_i915_private *dev_priv = vm->i915; - - if (USES_FULL_48BIT_PPGTT(dev_priv)) - free_pdp(dev_priv, vm->scratch_pdp); - free_pd(dev_priv, vm->scratch_pd); - free_pt(dev_priv, vm->scratch_pt); - cleanup_scratch_page(dev_priv, &vm->scratch_page); + if (USES_FULL_48BIT_PPGTT(vm->i915)) + free_pdp(vm, vm->scratch_pdp); + free_pd(vm, vm->scratch_pd); + free_pt(vm, vm->scratch_pt); + cleanup_scratch_page(vm); } -static void gen8_ppgtt_cleanup_3lvl(struct drm_i915_private *dev_priv, +static void gen8_ppgtt_cleanup_3lvl(struct i915_address_space *vm, struct i915_page_directory_pointer *pdp) { int i; - for_each_set_bit(i, pdp->used_pdpes, I915_PDPES_PER_PDP(dev_priv)) { + for_each_set_bit(i, pdp->used_pdpes, I915_PDPES_PER_PDP(vm->i915)) { if (WARN_ON(!pdp->page_directory[i])) continue; - gen8_free_page_tables(dev_priv, pdp->page_directory[i]); - free_pd(dev_priv, pdp->page_directory[i]); + gen8_free_page_tables(vm, pdp->page_directory[i]); + free_pd(vm, pdp->page_directory[i]); } - free_pdp(dev_priv, pdp); + free_pdp(vm, pdp); } static void gen8_ppgtt_cleanup_4lvl(struct i915_hw_ppgtt *ppgtt) { - struct drm_i915_private *dev_priv = ppgtt->base.i915; int i; for_each_set_bit(i, ppgtt->pml4.used_pml4es, GEN8_PML4ES_PER_PML4) { if (WARN_ON(!ppgtt->pml4.pdps[i])) continue; - gen8_ppgtt_cleanup_3lvl(dev_priv, ppgtt->pml4.pdps[i]); + gen8_ppgtt_cleanup_3lvl(&ppgtt->base, ppgtt->pml4.pdps[i]); } - cleanup_px(dev_priv, &ppgtt->pml4); + cleanup_px(&ppgtt->base, &ppgtt->pml4); } static void gen8_ppgtt_cleanup(struct i915_address_space *vm) @@ -1116,8 +1119,8 @@ static void gen8_ppgtt_cleanup(struct i915_address_space *vm) if (intel_vgpu_active(dev_priv)) gen8_ppgtt_notify_vgt(ppgtt, false); - if (!USES_FULL_48BIT_PPGTT(dev_priv)) - gen8_ppgtt_cleanup_3lvl(dev_priv, &ppgtt->pdp); + if (!USES_FULL_48BIT_PPGTT(vm->i915)) + gen8_ppgtt_cleanup_3lvl(&ppgtt->base, &ppgtt->pdp); else gen8_ppgtt_cleanup_4lvl(ppgtt); @@ -1148,7 +1151,6 @@ static int gen8_ppgtt_alloc_pagetabs(struct i915_address_space *vm, uint64_t length, unsigned long *new_pts) { - struct drm_i915_private *dev_priv = vm->i915; struct i915_page_table *pt; uint32_t pde; @@ -1160,7 +1162,7 @@ static int gen8_ppgtt_alloc_pagetabs(struct i915_address_space *vm, continue; } - pt = alloc_pt(dev_priv); + pt = alloc_pt(vm); if (IS_ERR(pt)) goto unwind_out; @@ -1174,7 +1176,7 @@ static int gen8_ppgtt_alloc_pagetabs(struct i915_address_space *vm, unwind_out: for_each_set_bit(pde, new_pts, I915_PDES) - free_pt(dev_priv, pd->page_table[pde]); + free_pt(vm, pd->page_table[pde]); return -ENOMEM; } @@ -1209,7 +1211,6 @@ gen8_ppgtt_alloc_page_directories(struct i915_address_space *vm, uint64_t length, unsigned long *new_pds) { - struct drm_i915_private *dev_priv = vm->i915; struct i915_page_directory *pd; uint32_t pdpe; uint32_t pdpes = I915_PDPES_PER_PDP(dev_priv); @@ -1220,7 +1221,7 @@ gen8_ppgtt_alloc_page_directories(struct i915_address_space *vm, if (test_bit(pdpe, pdp->used_pdpes)) continue; - pd = alloc_pd(dev_priv); + pd = alloc_pd(vm); if (IS_ERR(pd)) goto unwind_out; @@ -1234,7 +1235,7 @@ gen8_ppgtt_alloc_page_directories(struct i915_address_space *vm, unwind_out: for_each_set_bit(pdpe, new_pds, pdpes) - free_pd(dev_priv, pdp->page_directory[pdpe]); + free_pd(vm, pdp->page_directory[pdpe]); return -ENOMEM; } @@ -1262,7 +1263,6 @@ gen8_ppgtt_alloc_page_dirpointers(struct i915_address_space *vm, uint64_t length, unsigned long *new_pdps) { - struct drm_i915_private *dev_priv = vm->i915; struct i915_page_directory_pointer *pdp; uint32_t pml4e; @@ -1270,7 +1270,7 @@ gen8_ppgtt_alloc_page_dirpointers(struct i915_address_space *vm, gen8_for_each_pml4e(pdp, pml4, start, length, pml4e) { if (!test_bit(pml4e, pml4->used_pml4es)) { - pdp = alloc_pdp(dev_priv); + pdp = alloc_pdp(vm); if (IS_ERR(pdp)) goto unwind_out; @@ -1288,7 +1288,7 @@ gen8_ppgtt_alloc_page_dirpointers(struct i915_address_space *vm, unwind_out: for_each_set_bit(pml4e, new_pdps, GEN8_PML4ES_PER_PML4) - free_pdp(dev_priv, pml4->pdps[pml4e]); + free_pdp(vm, pml4->pdps[pml4e]); return -ENOMEM; } @@ -1337,7 +1337,6 @@ static int gen8_alloc_va_range_3lvl(struct i915_address_space *vm, { struct i915_hw_ppgtt *ppgtt = i915_vm_to_ppgtt(vm); unsigned long *new_page_dirs, *new_page_tables; - struct drm_i915_private *dev_priv = vm->i915; struct i915_page_directory *pd; const uint64_t orig_start = start; const uint64_t orig_length = length; @@ -1406,7 +1405,7 @@ static int gen8_alloc_va_range_3lvl(struct i915_address_space *vm, * point we're still relying on insert_entries() */ } - kunmap_px(ppgtt, page_directory); + kunmap_px(page_directory); __set_bit(pdpe, pdp->used_pdpes); gen8_setup_pdpe(ppgtt, pdp, pd, pdpe); } @@ -1421,12 +1420,11 @@ err_out: for_each_set_bit(temp, new_page_tables + pdpe * BITS_TO_LONGS(I915_PDES), I915_PDES) - free_pt(dev_priv, - pdp->page_directory[pdpe]->page_table[temp]); + free_pt(vm, pdp->page_directory[pdpe]->page_table[temp]); } for_each_set_bit(pdpe, new_page_dirs, pdpes) - free_pd(dev_priv, pdp->page_directory[pdpe]); + free_pd(vm, pdp->page_directory[pdpe]); free_gen8_temp_bitmaps(new_page_dirs, new_page_tables); mark_tlbs_dirty(ppgtt); @@ -1439,7 +1437,6 @@ static int gen8_alloc_va_range_4lvl(struct i915_address_space *vm, uint64_t length) { DECLARE_BITMAP(new_pdps, GEN8_PML4ES_PER_PML4); - struct i915_hw_ppgtt *ppgtt = i915_vm_to_ppgtt(vm); struct i915_page_directory_pointer *pdp; uint64_t pml4e; int ret = 0; @@ -1463,7 +1460,7 @@ static int gen8_alloc_va_range_4lvl(struct i915_address_space *vm, if (ret) goto err_out; - gen8_setup_pml4e(ppgtt, pml4, pdp, pml4e); + gen8_setup_pml4e(pml4, pdp, pml4e); } bitmap_or(pml4->used_pml4es, new_pdps, pml4->used_pml4es, @@ -1473,7 +1470,7 @@ static int gen8_alloc_va_range_4lvl(struct i915_address_space *vm, err_out: for_each_set_bit(pml4e, new_pdps, GEN8_PML4ES_PER_PML4) - gen8_ppgtt_cleanup_3lvl(vm->i915, pml4->pdps[pml4e]); + gen8_ppgtt_cleanup_3lvl(vm, pml4->pdps[pml4e]); return ret; } @@ -1489,7 +1486,8 @@ static int gen8_alloc_va_range(struct i915_address_space *vm, return gen8_alloc_va_range_3lvl(vm, &ppgtt->pdp, start, length); } -static void gen8_dump_pdp(struct i915_page_directory_pointer *pdp, +static void gen8_dump_pdp(struct i915_hw_ppgtt *ppgtt, + struct i915_page_directory_pointer *pdp, uint64_t start, uint64_t length, gen8_pte_t scratch_pte, struct seq_file *m) @@ -1555,7 +1553,7 @@ static void gen8_dump_ppgtt(struct i915_hw_ppgtt *ppgtt, struct seq_file *m) gen8_pte_encode(vm->scratch_page.daddr, I915_CACHE_LLC); if (!USES_FULL_48BIT_PPGTT(vm->i915)) { - gen8_dump_pdp(&ppgtt->pdp, start, length, scratch_pte, m); + gen8_dump_pdp(ppgtt, &ppgtt->pdp, start, length, scratch_pte, m); } else { uint64_t pml4e; struct i915_pml4 *pml4 = &ppgtt->pml4; @@ -1566,7 +1564,7 @@ static void gen8_dump_ppgtt(struct i915_hw_ppgtt *ppgtt, struct seq_file *m) continue; seq_printf(m, " PML4E #%llu\n", pml4e); - gen8_dump_pdp(pdp, start, length, scratch_pte, m); + gen8_dump_pdp(ppgtt, pdp, start, length, scratch_pte, m); } } } @@ -1622,8 +1620,14 @@ static int gen8_ppgtt_init(struct i915_hw_ppgtt *ppgtt) ppgtt->base.bind_vma = ppgtt_bind_vma; ppgtt->debug_dump = gen8_dump_ppgtt; + /* There are only few exceptions for gen >=6. chv and bxt. + * And we are not sure about the latter so play safe for now. + */ + if (IS_CHERRYVIEW(dev_priv) || IS_BROXTON(dev_priv)) + ppgtt->base.pt_kmap_wc = true; + if (USES_FULL_48BIT_PPGTT(dev_priv)) { - ret = setup_px(dev_priv, &ppgtt->pml4); + ret = setup_px(&ppgtt->base, &ppgtt->pml4); if (ret) goto free_scratch; @@ -1712,7 +1716,7 @@ static void gen6_dump_ppgtt(struct i915_hw_ppgtt *ppgtt, struct seq_file *m) } seq_puts(m, "\n"); } - kunmap_px(ppgtt, pt_vaddr); + kunmap_px(pt_vaddr); } } @@ -1909,7 +1913,7 @@ static void gen6_ppgtt_clear_range(struct i915_address_space *vm, for (i = first_pte; i < last_pte; i++) pt_vaddr[i] = scratch_pte; - kunmap_px(ppgtt, pt_vaddr); + kunmap_px(pt_vaddr); num_entries -= last_pte - first_pte; first_pte = 0; @@ -1948,12 +1952,12 @@ static void gen6_ppgtt_insert_entries(struct i915_address_space *vm, } if (++act_pte == GEN6_PTES) { - kunmap_px(ppgtt, vaddr); + kunmap_px(vaddr); vaddr = kmap_px(ppgtt->pd.page_table[++act_pt]); act_pte = 0; } } while (1); - kunmap_px(ppgtt, vaddr); + kunmap_px(vaddr); } static int gen6_alloc_va_range(struct i915_address_space *vm, @@ -1987,7 +1991,7 @@ static int gen6_alloc_va_range(struct i915_address_space *vm, /* We've already allocated a page table */ WARN_ON(!bitmap_empty(pt->used_ptes, GEN6_PTES)); - pt = alloc_pt(dev_priv); + pt = alloc_pt(vm); if (IS_ERR(pt)) { ret = PTR_ERR(pt); goto unwind_out; @@ -2035,7 +2039,7 @@ unwind_out: struct i915_page_table *pt = ppgtt->pd.page_table[pde]; ppgtt->pd.page_table[pde] = vm->scratch_pt; - free_pt(dev_priv, pt); + free_pt(vm, pt); } mark_tlbs_dirty(ppgtt); @@ -2044,16 +2048,15 @@ unwind_out: static int gen6_init_scratch(struct i915_address_space *vm) { - struct drm_i915_private *dev_priv = vm->i915; int ret; - ret = setup_scratch_page(dev_priv, &vm->scratch_page, I915_GFP_DMA); + ret = setup_scratch_page(vm, I915_GFP_DMA); if (ret) return ret; - vm->scratch_pt = alloc_pt(dev_priv); + vm->scratch_pt = alloc_pt(vm); if (IS_ERR(vm->scratch_pt)) { - cleanup_scratch_page(dev_priv, &vm->scratch_page); + cleanup_scratch_page(vm); return PTR_ERR(vm->scratch_pt); } @@ -2064,17 +2067,14 @@ static int gen6_init_scratch(struct i915_address_space *vm) static void gen6_free_scratch(struct i915_address_space *vm) { - struct drm_i915_private *dev_priv = vm->i915; - - free_pt(dev_priv, vm->scratch_pt); - cleanup_scratch_page(dev_priv, &vm->scratch_page); + free_pt(vm, vm->scratch_pt); + cleanup_scratch_page(vm); } static void gen6_ppgtt_cleanup(struct i915_address_space *vm) { struct i915_hw_ppgtt *ppgtt = i915_vm_to_ppgtt(vm); struct i915_page_directory *pd = &ppgtt->pd; - struct drm_i915_private *dev_priv = vm->i915; struct i915_page_table *pt; uint32_t pde; @@ -2082,7 +2082,7 @@ static void gen6_ppgtt_cleanup(struct i915_address_space *vm) gen6_for_all_pdes(pt, pd, pde) if (pt != vm->scratch_pt) - free_pt(dev_priv, pt); + free_pt(vm, pt); gen6_free_scratch(vm); } @@ -2191,6 +2191,7 @@ static int __hw_ppgtt_init(struct i915_hw_ppgtt *ppgtt, struct drm_i915_private *dev_priv) { ppgtt->base.i915 = dev_priv; + ppgtt->base.dma = &dev_priv->drm.pdev->dev; if (INTEL_INFO(dev_priv)->gen < 8) return gen6_ppgtt_init(ppgtt); @@ -2212,10 +2213,14 @@ static void i915_address_space_init(struct i915_address_space *vm, INIT_LIST_HEAD(&vm->unbound_list); list_add_tail(&vm->global_link, &dev_priv->vm_list); + pagevec_init(&vm->free_pages, false); } static void i915_address_space_fini(struct i915_address_space *vm) { + if (pagevec_count(&vm->free_pages)) + vm_free_pages_release(vm); + i915_gem_timeline_fini(&vm->timeline); drm_mm_takedown(&vm->mm); list_del(&vm->global_link); @@ -2323,9 +2328,8 @@ void i915_ppgtt_release(struct kref *kref) WARN_ON(!list_empty(&ppgtt->base.inactive_list)); WARN_ON(!list_empty(&ppgtt->base.unbound_list)); - i915_address_space_fini(&ppgtt->base); - ppgtt->base.cleanup(&ppgtt->base); + i915_address_space_fini(&ppgtt->base); kfree(ppgtt); } @@ -2991,7 +2995,7 @@ static int ggtt_probe_common(struct i915_ggtt *ggtt, u64 size) return -ENOMEM; } - ret = setup_scratch_page(dev_priv, &ggtt->base.scratch_page, GFP_DMA32); + ret = setup_scratch_page(&ggtt->base, GFP_DMA32); if (ret) { DRM_ERROR("Scratch setup failed\n"); /* iounmap will also get called at remove, but meh */ @@ -3080,7 +3084,7 @@ static void gen6_gmch_remove(struct i915_address_space *vm) struct i915_ggtt *ggtt = i915_vm_to_ggtt(vm); iounmap(ggtt->gsm); - cleanup_scratch_page(vm->i915, &vm->scratch_page); + cleanup_scratch_page(vm); } static int gen8_gmch_probe(struct i915_ggtt *ggtt) @@ -3231,6 +3235,7 @@ int i915_ggtt_probe_hw(struct drm_i915_private *dev_priv) int ret; ggtt->base.i915 = dev_priv; + ggtt->base.dma = &dev_priv->drm.pdev->dev; if (INTEL_GEN(dev_priv) <= 5) ret = i915_gmch_probe(ggtt); diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.h b/drivers/gpu/drm/i915/i915_gem_gtt.h index fe922059a412..6162bedc0811 100644 --- a/drivers/gpu/drm/i915/i915_gem_gtt.h +++ b/drivers/gpu/drm/i915/i915_gem_gtt.h @@ -36,9 +36,11 @@ #include #include +#include #include "i915_gem_timeline.h" #include "i915_gem_request.h" +#include "i915_selftest.h" #define I915_GTT_PAGE_SIZE 4096UL #define I915_GTT_MIN_ALIGNMENT I915_GTT_PAGE_SIZE @@ -247,6 +249,7 @@ struct i915_address_space { struct drm_mm mm; struct i915_gem_timeline timeline; struct drm_i915_private *i915; + struct device *dma; /* Every address space belongs to a struct file - except for the global * GTT that is owned by the driver (and so @file is set to NULL). In * principle, no information should leak from one context to another @@ -297,6 +300,9 @@ struct i915_address_space { */ struct list_head unbound_list; + struct pagevec free_pages; + bool pt_kmap_wc; + /* FIXME: Need a more generic return type */ gen6_pte_t (*pte_encode)(dma_addr_t addr, enum i915_cache_level level, @@ -326,6 +332,8 @@ struct i915_address_space { int (*bind_vma)(struct i915_vma *vma, enum i915_cache_level cache_level, u32 flags); + + I915_SELFTEST_DECLARE(struct fault_attr fault_attr); }; #define i915_is_ggtt(V) (!(V)->file) diff --git a/drivers/gpu/drm/i915/selftests/i915_gem_gtt.c b/drivers/gpu/drm/i915/selftests/i915_gem_gtt.c index 320c6879fd83..e23753181720 100644 --- a/drivers/gpu/drm/i915/selftests/i915_gem_gtt.c +++ b/drivers/gpu/drm/i915/selftests/i915_gem_gtt.c @@ -832,17 +832,17 @@ static int shrink_hole(struct drm_i915_private *i915, unsigned long prime; int err; - i915->vm_fault.probability = 999; - atomic_set(&i915->vm_fault.times, -1); + vm->fault_attr.probability = 999; + atomic_set(&vm->fault_attr.times, -1); for_each_prime_number_from(prime, 0, ULONG_MAX - 1) { - i915->vm_fault.interval = prime; + vm->fault_attr.interval = prime; err = __shrink_hole(i915, vm, hole_start, hole_end, end_time); if (err) break; } - memset(&i915->vm_fault, 0, sizeof(i915->vm_fault)); + memset(&vm->fault_attr, 0, sizeof(vm->fault_attr)); return err; } -- cgit From 9231da70b338b336b982c74fad4afab5b55e6534 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Wed, 15 Feb 2017 08:43:41 +0000 Subject: drm/i915: Remove kmap/kunmap wrappers As these are now both plain and simple kmap_atomic/kunmap_atomic pairs, we can remove the wrappers for a small gain of clarity (in particular, not hiding the atomic critical sections!). Signed-off-by: Chris Wilson Reviewed-by: Matthew Auld Link: http://patchwork.freedesktop.org/patch/msgid/20170215084357.19977-7-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/i915_gem_gtt.c | 69 ++++++++++++++----------------------- 1 file changed, 26 insertions(+), 43 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c b/drivers/gpu/drm/i915/i915_gem_gtt.c index 4eb0e114a295..8ac62cabf4cc 100644 --- a/drivers/gpu/drm/i915/i915_gem_gtt.c +++ b/drivers/gpu/drm/i915/i915_gem_gtt.c @@ -411,21 +411,7 @@ static void cleanup_page_dma(struct i915_address_space *vm, vm_free_page(vm, p->page); } -static void *kmap_page_dma(struct i915_page_dma *p) -{ - return kmap_atomic(p->page); -} - -/* We use the flushing unmap only with ppgtt structures: - * page directories, page tables and scratch pages. - */ -static void kunmap_page_dma(void *vaddr) -{ - kunmap_atomic(vaddr); -} - -#define kmap_px(px) kmap_page_dma(px_base(px)) -#define kunmap_px(vaddr) kunmap_page_dma((vaddr)) +#define kmap_atomic_px(px) kmap_atomic(px_base(px)->page) #define setup_px(vm, px) setup_page_dma((vm), px_base(px)) #define cleanup_px(vm, px) cleanup_page_dma((vm), px_base(px)) @@ -436,13 +422,13 @@ static void fill_page_dma(struct i915_address_space *vm, struct i915_page_dma *p, const u64 val) { - u64 * const vaddr = kmap_page_dma(p); + u64 * const vaddr = kmap_atomic(p->page); int i; for (i = 0; i < 512; i++) vaddr[i] = val; - kunmap_page_dma(vaddr); + kunmap_atomic(vaddr); } static void fill_page_dma_32(struct i915_address_space *vm, @@ -675,9 +661,9 @@ gen8_setup_pdpe(struct i915_hw_ppgtt *ppgtt, if (!USES_FULL_48BIT_PPGTT(to_i915(ppgtt->base.dev))) return; - page_directorypo = kmap_px(pdp); + page_directorypo = kmap_atomic_px(pdp); page_directorypo[index] = gen8_pdpe_encode(px_dma(pd), I915_CACHE_LLC); - kunmap_px(page_directorypo); + kunmap_atomic(page_directorypo); } static void @@ -685,10 +671,10 @@ gen8_setup_pml4e(struct i915_pml4 *pml4, struct i915_page_directory_pointer *pdp, int index) { - gen8_ppgtt_pml4e_t *pagemap = kmap_px(pml4); + gen8_ppgtt_pml4e_t *pagemap = kmap_atomic_px(pml4); pagemap[index] = gen8_pml4e_encode(px_dma(pdp), I915_CACHE_LLC); - kunmap_px(pagemap); + kunmap_atomic(pagemap); } /* Broadwell Page Directory Pointer Descriptors */ @@ -774,10 +760,10 @@ static bool gen8_ppgtt_clear_pt(struct i915_address_space *vm, return true; } - vaddr = kmap_px(pt); + vaddr = kmap_atomic_px(pt); while (pte < pte_end) vaddr[pte++] = scratch_pte; - kunmap_px(vaddr); + kunmap_atomic(vaddr); return false; } @@ -802,9 +788,9 @@ static bool gen8_ppgtt_clear_pd(struct i915_address_space *vm, if (gen8_ppgtt_clear_pt(vm, pt, start, length)) { __clear_bit(pde, pd->used_pdes); - pde_vaddr = kmap_px(pd); + pde_vaddr = kmap_atomic_px(pd); pde_vaddr[pde] = scratch_pde; - kunmap_px(pde_vaddr); + kunmap_atomic(pde_vaddr); free_pt(vm, pt); } } @@ -904,7 +890,7 @@ gen8_ppgtt_insert_pte_entries(struct i915_hw_ppgtt *ppgtt, bool ret; pd = pdp->page_directory[pdpe]; - vaddr = kmap_px(pd->page_table[pde]); + vaddr = kmap_atomic_px(pd->page_table[pde]); do { vaddr[pte] = pte_encode | iter->dma; iter->dma += PAGE_SIZE; @@ -932,12 +918,12 @@ gen8_ppgtt_insert_pte_entries(struct i915_hw_ppgtt *ppgtt, pde = 0; } - kunmap_px(vaddr); - vaddr = kmap_px(pd->page_table[pde]); + kunmap_atomic(vaddr); + vaddr = kmap_atomic_px(pd->page_table[pde]); pte = 0; } } while (1); - kunmap_px(vaddr); + kunmap_atomic(vaddr); return ret; } @@ -1370,7 +1356,7 @@ static int gen8_alloc_va_range_3lvl(struct i915_address_space *vm, /* Allocations have completed successfully, so set the bitmaps, and do * the mappings. */ gen8_for_each_pdpe(pd, pdp, start, length, pdpe) { - gen8_pde_t *const page_directory = kmap_px(pd); + gen8_pde_t *const page_directory = kmap_atomic_px(pd); struct i915_page_table *pt; uint64_t pd_len = length; uint64_t pd_start = start; @@ -1405,7 +1391,7 @@ static int gen8_alloc_va_range_3lvl(struct i915_address_space *vm, * point we're still relying on insert_entries() */ } - kunmap_px(page_directory); + kunmap_atomic(page_directory); __set_bit(pdpe, pdp->used_pdpes); gen8_setup_pdpe(ppgtt, pdp, pd, pdpe); } @@ -1512,7 +1498,7 @@ static void gen8_dump_pdp(struct i915_hw_ppgtt *ppgtt, if (!test_bit(pde, pd->used_pdes)) continue; - pt_vaddr = kmap_px(pt); + pt_vaddr = kmap_atomic_px(pt); for (pte = 0; pte < GEN8_PTES; pte += 4) { uint64_t va = (pdpe << GEN8_PDPE_SHIFT) | @@ -1536,9 +1522,6 @@ static void gen8_dump_pdp(struct i915_hw_ppgtt *ppgtt, } seq_puts(m, "\n"); } - /* don't use kunmap_px, it could trigger - * an unnecessary flush. - */ kunmap_atomic(pt_vaddr); } } @@ -1693,7 +1676,7 @@ static void gen6_dump_ppgtt(struct i915_hw_ppgtt *ppgtt, struct seq_file *m) expected); seq_printf(m, "\tPDE: %x\n", pd_entry); - pt_vaddr = kmap_px(ppgtt->pd.page_table[pde]); + pt_vaddr = kmap_atomic_px(ppgtt->pd.page_table[pde]); for (pte = 0; pte < GEN6_PTES; pte+=4) { unsigned long va = @@ -1716,7 +1699,7 @@ static void gen6_dump_ppgtt(struct i915_hw_ppgtt *ppgtt, struct seq_file *m) } seq_puts(m, "\n"); } - kunmap_px(pt_vaddr); + kunmap_atomic(pt_vaddr); } } @@ -1908,12 +1891,12 @@ static void gen6_ppgtt_clear_range(struct i915_address_space *vm, if (last_pte > GEN6_PTES) last_pte = GEN6_PTES; - pt_vaddr = kmap_px(ppgtt->pd.page_table[act_pt]); + pt_vaddr = kmap_atomic_px(ppgtt->pd.page_table[act_pt]); for (i = first_pte; i < last_pte; i++) pt_vaddr[i] = scratch_pte; - kunmap_px(pt_vaddr); + kunmap_atomic(pt_vaddr); num_entries -= last_pte - first_pte; first_pte = 0; @@ -1934,7 +1917,7 @@ static void gen6_ppgtt_insert_entries(struct i915_address_space *vm, struct sgt_dma iter; gen6_pte_t *vaddr; - vaddr = kmap_px(ppgtt->pd.page_table[act_pt]); + vaddr = kmap_atomic_px(ppgtt->pd.page_table[act_pt]); iter.sg = pages->sgl; iter.dma = sg_dma_address(iter.sg); iter.max = iter.dma + iter.sg->length; @@ -1952,12 +1935,12 @@ static void gen6_ppgtt_insert_entries(struct i915_address_space *vm, } if (++act_pte == GEN6_PTES) { - kunmap_px(vaddr); - vaddr = kmap_px(ppgtt->pd.page_table[++act_pt]); + kunmap_atomic(vaddr); + vaddr = kmap_atomic_px(ppgtt->pd.page_table[++act_pt]); act_pte = 0; } } while (1); - kunmap_px(vaddr); + kunmap_atomic(vaddr); } static int gen6_alloc_va_range(struct i915_address_space *vm, -- cgit From ff685975d97f8cb28e2fe4e6d90a71fd93244815 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Wed, 15 Feb 2017 08:43:42 +0000 Subject: drm/i915: Move allocate_va_range to GTT In the future, we need to call allocate_va_range on the aliasing-ppgtt which means moving the call down from the vma into the vm (which is more appropriate for calling the vm function). Signed-off-by: Chris Wilson Reviewed-by: Matthew Auld Link: http://patchwork.freedesktop.org/patch/msgid/20170215084357.19977-8-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/i915_gem_gtt.c | 39 +++++++++++++++++++++++++------------ drivers/gpu/drm/i915/i915_vma.c | 9 --------- 2 files changed, 27 insertions(+), 21 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c b/drivers/gpu/drm/i915/i915_gem_gtt.c index 8ac62cabf4cc..8a24bf006a82 100644 --- a/drivers/gpu/drm/i915/i915_gem_gtt.c +++ b/drivers/gpu/drm/i915/i915_gem_gtt.c @@ -190,11 +190,18 @@ static int ppgtt_bind_vma(struct i915_vma *vma, enum i915_cache_level cache_level, u32 unused) { - u32 pte_flags = 0; + u32 pte_flags; + int ret; + + trace_i915_va_alloc(vma); + ret = vma->vm->allocate_va_range(vma->vm, vma->node.start, vma->size); + if (ret) + return ret; vma->pages = vma->obj->mm.pages; /* Currently applicable only to VLV */ + pte_flags = 0; if (vma->obj->gt_ro) pte_flags |= PTE_READ_ONLY; @@ -206,9 +213,7 @@ static int ppgtt_bind_vma(struct i915_vma *vma, static void ppgtt_unbind_vma(struct i915_vma *vma) { - vma->vm->clear_range(vma->vm, - vma->node.start, - vma->size); + vma->vm->clear_range(vma->vm, vma->node.start, vma->size); } static gen8_pte_t gen8_pte_encode(dma_addr_t addr, @@ -2650,9 +2655,10 @@ static int aliasing_gtt_bind_vma(struct i915_vma *vma, { struct drm_i915_private *i915 = vma->vm->i915; u32 pte_flags; + int ret; if (unlikely(!vma->pages)) { - int ret = i915_get_ggtt_vma_pages(vma); + ret = i915_get_ggtt_vma_pages(vma); if (ret) return ret; } @@ -2662,6 +2668,22 @@ static int aliasing_gtt_bind_vma(struct i915_vma *vma, if (vma->obj->gt_ro) pte_flags |= PTE_READ_ONLY; + if (flags & I915_VMA_LOCAL_BIND) { + struct i915_hw_ppgtt *appgtt = i915->mm.aliasing_ppgtt; + + if (appgtt->base.allocate_va_range) { + ret = appgtt->base.allocate_va_range(&appgtt->base, + vma->node.start, + vma->node.size); + if (ret) + return ret; + } + + appgtt->base.insert_entries(&appgtt->base, + vma->pages, vma->node.start, + cache_level, pte_flags); + } + if (flags & I915_VMA_GLOBAL_BIND) { intel_runtime_pm_get(i915); vma->vm->insert_entries(vma->vm, @@ -2670,13 +2692,6 @@ static int aliasing_gtt_bind_vma(struct i915_vma *vma, intel_runtime_pm_put(i915); } - if (flags & I915_VMA_LOCAL_BIND) { - struct i915_hw_ppgtt *appgtt = i915->mm.aliasing_ppgtt; - appgtt->base.insert_entries(&appgtt->base, - vma->pages, vma->node.start, - cache_level, pte_flags); - } - return 0; } diff --git a/drivers/gpu/drm/i915/i915_vma.c b/drivers/gpu/drm/i915/i915_vma.c index 0dc994b76924..c1abfe7b48ea 100644 --- a/drivers/gpu/drm/i915/i915_vma.c +++ b/drivers/gpu/drm/i915/i915_vma.c @@ -263,15 +263,6 @@ int i915_vma_bind(struct i915_vma *vma, enum i915_cache_level cache_level, vma->vm->total))) return -ENODEV; - if (vma_flags == 0 && vma->vm->allocate_va_range) { - trace_i915_va_alloc(vma); - ret = vma->vm->allocate_va_range(vma->vm, - vma->node.start, - vma->node.size); - if (ret) - return ret; - } - trace_i915_vma_bind(vma, bind_flags); ret = vma->vm->bind_vma(vma, cache_level, bind_flags); if (ret) -- cgit From 52c126ee907f436fc871bfc15836b8011f6eb76d Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Wed, 15 Feb 2017 08:43:43 +0000 Subject: drm/i915: Always preallocate gen6/7 ppgtt The hardware does not cope very well with us changing the PD within an active context (the context must be idle for it to re-read the PD). As we only check whether the page is idle before changing the entry (and on through the PD tree), we cannot reliably replace PD entries on gen6/gen7. To fully avoid changing the tree at runtime, preallocate it on init. Signed-off-by: Chris Wilson Reviewed-by: Mika Kuoppala Link: http://patchwork.freedesktop.org/patch/msgid/20170215084357.19977-9-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/i915_gem_gtt.c | 24 ++++++++++++++---------- 1 file changed, 14 insertions(+), 10 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c b/drivers/gpu/drm/i915/i915_gem_gtt.c index 8a24bf006a82..1c78aef5709e 100644 --- a/drivers/gpu/drm/i915/i915_gem_gtt.c +++ b/drivers/gpu/drm/i915/i915_gem_gtt.c @@ -2103,6 +2103,12 @@ static int gen6_ppgtt_allocate_page_directories(struct i915_hw_ppgtt *ppgtt) if (ppgtt->node.start < ggtt->mappable_end) DRM_DEBUG("Forced to use aperture for PDEs\n"); + ppgtt->pd.base.ggtt_offset = + ppgtt->node.start / PAGE_SIZE * sizeof(gen6_pte_t); + + ppgtt->pd_addr = (gen6_pte_t __iomem *)ggtt->gsm + + ppgtt->pd.base.ggtt_offset / sizeof(gen6_pte_t); + return 0; err_out: @@ -2145,7 +2151,6 @@ static int gen6_ppgtt_init(struct i915_hw_ppgtt *ppgtt) if (ret) return ret; - ppgtt->base.allocate_va_range = gen6_alloc_va_range; ppgtt->base.clear_range = gen6_ppgtt_clear_range; ppgtt->base.insert_entries = gen6_ppgtt_insert_entries; ppgtt->base.unbind_vma = ppgtt_unbind_vma; @@ -2155,22 +2160,21 @@ static int gen6_ppgtt_init(struct i915_hw_ppgtt *ppgtt) ppgtt->base.total = I915_PDES * GEN6_PTES * PAGE_SIZE; ppgtt->debug_dump = gen6_dump_ppgtt; - ppgtt->pd.base.ggtt_offset = - ppgtt->node.start / PAGE_SIZE * sizeof(gen6_pte_t); - - ppgtt->pd_addr = (gen6_pte_t __iomem *)ggtt->gsm + - ppgtt->pd.base.ggtt_offset / sizeof(gen6_pte_t); - gen6_scratch_va_range(ppgtt, 0, ppgtt->base.total); - gen6_write_page_range(dev_priv, &ppgtt->pd, 0, ppgtt->base.total); + ret = gen6_alloc_va_range(&ppgtt->base, 0, ppgtt->base.total); + if (ret) { + gen6_ppgtt_cleanup(&ppgtt->base); + return ret; + } + DRM_DEBUG_DRIVER("Allocated pde space (%lldM) at GTT entry: %llx\n", ppgtt->node.size >> 20, ppgtt->node.start / PAGE_SIZE); - DRM_DEBUG("Adding PPGTT at offset %x\n", - ppgtt->pd.base.ggtt_offset << 10); + DRM_DEBUG_DRIVER("Adding PPGTT at offset %x\n", + ppgtt->pd.base.ggtt_offset << 10); return 0; } -- cgit From f0a22974acbdd17b03cff4bdee880e4f08cccf6d Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Wed, 15 Feb 2017 08:43:44 +0000 Subject: drm/i915: Remove redundant clear of appgtt Upon creation of the va range, it is initialised to point at scratch. Signed-off-by: Chris Wilson Reviewed-by: Matthew Auld Link: http://patchwork.freedesktop.org/patch/msgid/20170215084357.19977-10-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/i915_gem_gtt.c | 4 ---- 1 file changed, 4 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c b/drivers/gpu/drm/i915/i915_gem_gtt.c index 1c78aef5709e..52586432d5e8 100644 --- a/drivers/gpu/drm/i915/i915_gem_gtt.c +++ b/drivers/gpu/drm/i915/i915_gem_gtt.c @@ -2769,10 +2769,6 @@ int i915_gem_init_aliasing_ppgtt(struct drm_i915_private *i915) goto err_ppgtt; } - ppgtt->base.clear_range(&ppgtt->base, - ppgtt->base.start, - ppgtt->base.total); - i915->mm.aliasing_ppgtt = ppgtt; WARN_ON(ggtt->base.bind_vma != ggtt_bind_vma); -- cgit From 16a011c8be2f36f972fd9295d0c350c7dffd4779 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Wed, 15 Feb 2017 08:43:45 +0000 Subject: drm/i915: Tidy gen6_write_pde() Stop passing around unused parameters makes the code more compact. Signed-off-by: Chris Wilson Reviewed-by: Matthew Auld Link: http://patchwork.freedesktop.org/patch/msgid/20170215084357.19977-11-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/i915_gem_gtt.c | 39 +++++++++++++------------------------ 1 file changed, 14 insertions(+), 25 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c b/drivers/gpu/drm/i915/i915_gem_gtt.c index 52586432d5e8..17c5c1a5b652 100644 --- a/drivers/gpu/drm/i915/i915_gem_gtt.c +++ b/drivers/gpu/drm/i915/i915_gem_gtt.c @@ -1709,36 +1709,28 @@ static void gen6_dump_ppgtt(struct i915_hw_ppgtt *ppgtt, struct seq_file *m) } /* Write pde (index) from the page directory @pd to the page table @pt */ -static void gen6_write_pde(struct i915_page_directory *pd, - const int pde, struct i915_page_table *pt) +static inline void gen6_write_pde(const struct i915_hw_ppgtt *ppgtt, + const unsigned int pde, + const struct i915_page_table *pt) { /* Caller needs to make sure the write completes if necessary */ - struct i915_hw_ppgtt *ppgtt = - container_of(pd, struct i915_hw_ppgtt, pd); - u32 pd_entry; - - pd_entry = GEN6_PDE_ADDR_ENCODE(px_dma(pt)); - pd_entry |= GEN6_PDE_VALID; - - writel(pd_entry, ppgtt->pd_addr + pde); + writel_relaxed(GEN6_PDE_ADDR_ENCODE(px_dma(pt)) | GEN6_PDE_VALID, + ppgtt->pd_addr + pde); } /* Write all the page tables found in the ppgtt structure to incrementing page * directories. */ -static void gen6_write_page_range(struct drm_i915_private *dev_priv, - struct i915_page_directory *pd, +static void gen6_write_page_range(struct i915_hw_ppgtt *ppgtt, uint32_t start, uint32_t length) { - struct i915_ggtt *ggtt = &dev_priv->ggtt; struct i915_page_table *pt; - uint32_t pde; + unsigned int pde; - gen6_for_each_pde(pt, pd, start, length, pde) - gen6_write_pde(pd, pde, pt); + gen6_for_each_pde(pt, &ppgtt->pd, start, length, pde) + gen6_write_pde(ppgtt, pde, pt); + wmb(); - /* Make sure write is complete before other code can use this page - * table. Also require for WC mapped PTEs */ - readl(ggtt->gsm); + mark_tlbs_dirty(ppgtt); } static uint32_t get_pd_offset(struct i915_hw_ppgtt *ppgtt) @@ -2003,7 +1995,7 @@ static int gen6_alloc_va_range(struct i915_address_space *vm, gen6_pte_count(start, length)); if (__test_and_clear_bit(pde, new_page_tables)) - gen6_write_pde(&ppgtt->pd, pde, pt); + gen6_write_pde(ppgtt, pde, pt); trace_i915_page_table_entry_map(vm, pde, pt, gen6_pte_index(start), @@ -2161,7 +2153,7 @@ static int gen6_ppgtt_init(struct i915_hw_ppgtt *ppgtt) ppgtt->debug_dump = gen6_dump_ppgtt; gen6_scratch_va_range(ppgtt, 0, ppgtt->base.total); - gen6_write_page_range(dev_priv, &ppgtt->pd, 0, ppgtt->base.total); + gen6_write_page_range(ppgtt, 0, ppgtt->base.total); ret = gen6_alloc_va_range(&ppgtt->base, 0, ppgtt->base.total); if (ret) { @@ -3395,8 +3387,6 @@ void i915_gem_restore_gtt_mappings(struct drm_i915_private *dev_priv) struct i915_address_space *vm; list_for_each_entry(vm, &dev_priv->vm_list, global_link) { - /* TODO: Perhaps it shouldn't be gen6 specific */ - struct i915_hw_ppgtt *ppgtt; if (i915_is_ggtt(vm)) @@ -3404,8 +3394,7 @@ void i915_gem_restore_gtt_mappings(struct drm_i915_private *dev_priv) else ppgtt = i915_vm_to_ppgtt(vm); - gen6_write_page_range(dev_priv, &ppgtt->pd, - 0, ppgtt->base.total); + gen6_write_page_range(ppgtt, 0, ppgtt->base.total); } } -- cgit From dd19674bacba227ae5d3ce680cbc5668198894dc Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Wed, 15 Feb 2017 08:43:46 +0000 Subject: drm/i915: Remove bitmap tracking for used-ptes MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit We only operate on known extents (both for alloc/clear) and so we can use both the knowledge of the bind/unbind range along with the knowledge of the existing pagetable to avoid having to allocate temporary and auxiliary bitmaps. Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=99295 Signed-off-by: Chris Wilson Reviewed-by: Matthew Auld Reviewed-by: Michał Winiarski Link: http://patchwork.freedesktop.org/patch/msgid/20170215084357.19977-12-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/i915_gem_gtt.c | 330 ++++++++++++------------------------ drivers/gpu/drm/i915/i915_gem_gtt.h | 5 +- drivers/gpu/drm/i915/i915_trace.h | 19 +-- 3 files changed, 117 insertions(+), 237 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c b/drivers/gpu/drm/i915/i915_gem_gtt.c index 17c5c1a5b652..ae2ff36b275a 100644 --- a/drivers/gpu/drm/i915/i915_gem_gtt.c +++ b/drivers/gpu/drm/i915/i915_gem_gtt.c @@ -457,62 +457,38 @@ static void cleanup_scratch_page(struct i915_address_space *vm) static struct i915_page_table *alloc_pt(struct i915_address_space *vm) { struct i915_page_table *pt; - const size_t count = INTEL_GEN(vm->i915) >= 8 ? GEN8_PTES : GEN6_PTES; - int ret = -ENOMEM; - pt = kzalloc(sizeof(*pt), GFP_KERNEL); - if (!pt) + pt = kmalloc(sizeof(*pt), GFP_KERNEL | __GFP_NOWARN); + if (unlikely(!pt)) return ERR_PTR(-ENOMEM); - pt->used_ptes = kcalloc(BITS_TO_LONGS(count), sizeof(*pt->used_ptes), - GFP_KERNEL); - - if (!pt->used_ptes) - goto fail_bitmap; - - ret = setup_px(vm, pt); - if (ret) - goto fail_page_m; + if (unlikely(setup_px(vm, pt))) { + kfree(pt); + return ERR_PTR(-ENOMEM); + } + pt->used_ptes = 0; return pt; - -fail_page_m: - kfree(pt->used_ptes); -fail_bitmap: - kfree(pt); - - return ERR_PTR(ret); } static void free_pt(struct i915_address_space *vm, struct i915_page_table *pt) { cleanup_px(vm, pt); - kfree(pt->used_ptes); kfree(pt); } static void gen8_initialize_pt(struct i915_address_space *vm, struct i915_page_table *pt) { - gen8_pte_t scratch_pte; - - scratch_pte = gen8_pte_encode(vm->scratch_page.daddr, - I915_CACHE_LLC); - - fill_px(vm, pt, scratch_pte); + fill_px(vm, pt, + gen8_pte_encode(vm->scratch_page.daddr, I915_CACHE_LLC)); } static void gen6_initialize_pt(struct i915_address_space *vm, struct i915_page_table *pt) { - gen6_pte_t scratch_pte; - - WARN_ON(vm->scratch_page.daddr == 0); - - scratch_pte = vm->pte_encode(vm->scratch_page.daddr, - I915_CACHE_LLC, 0); - - fill32_px(vm, pt, scratch_pte); + fill32_px(vm, pt, + vm->pte_encode(vm->scratch_page.daddr, I915_CACHE_LLC, 0)); } static struct i915_page_directory *alloc_pd(struct i915_address_space *vm) @@ -556,11 +532,12 @@ static void free_pd(struct i915_address_space *vm, static void gen8_initialize_pd(struct i915_address_space *vm, struct i915_page_directory *pd) { - gen8_pde_t scratch_pde; + unsigned int i; - scratch_pde = gen8_pde_encode(px_dma(vm->scratch_pt), I915_CACHE_LLC); - - fill_px(vm, pd, scratch_pde); + fill_px(vm, pd, + gen8_pde_encode(px_dma(vm->scratch_pt), I915_CACHE_LLC)); + for (i = 0; i < I915_PDES; i++) + pd->page_table[i] = vm->scratch_pt; } static int __pdp_init(struct drm_i915_private *dev_priv, @@ -744,8 +721,7 @@ static void mark_tlbs_dirty(struct i915_hw_ppgtt *ppgtt) */ static bool gen8_ppgtt_clear_pt(struct i915_address_space *vm, struct i915_page_table *pt, - uint64_t start, - uint64_t length) + u64 start, u64 length) { unsigned int num_entries = gen8_pte_count(start, length); unsigned int pte = gen8_pte_index(start); @@ -754,16 +730,11 @@ static bool gen8_ppgtt_clear_pt(struct i915_address_space *vm, gen8_pte_encode(vm->scratch_page.daddr, I915_CACHE_LLC); gen8_pte_t *vaddr; - if (WARN_ON(!px_page(pt))) - return false; - - GEM_BUG_ON(pte_end > GEN8_PTES); + GEM_BUG_ON(num_entries > pt->used_ptes); - bitmap_clear(pt->used_ptes, pte, num_entries); - if (USES_FULL_PPGTT(vm->i915)) { - if (bitmap_empty(pt->used_ptes, GEN8_PTES)) - return true; - } + pt->used_ptes -= num_entries; + if (!pt->used_ptes) + return true; vaddr = kmap_atomic_px(pt); while (pte < pte_end) @@ -773,31 +744,38 @@ static bool gen8_ppgtt_clear_pt(struct i915_address_space *vm, return false; } +static void gen8_ppgtt_set_pde(struct i915_address_space *vm, + struct i915_page_directory *pd, + struct i915_page_table *pt, + unsigned int pde) +{ + gen8_pde_t *vaddr; + + pd->page_table[pde] = pt; + + vaddr = kmap_atomic_px(pd); + vaddr[pde] = gen8_pde_encode(px_dma(pt), I915_CACHE_LLC); + kunmap_atomic(vaddr); +} + /* Removes entries from a single page dir, releasing it if it's empty. * Caller can use the return value to update higher-level entries */ static bool gen8_ppgtt_clear_pd(struct i915_address_space *vm, struct i915_page_directory *pd, - uint64_t start, - uint64_t length) + u64 start, u64 length) { struct i915_page_table *pt; - uint64_t pde; - gen8_pde_t *pde_vaddr; - gen8_pde_t scratch_pde = gen8_pde_encode(px_dma(vm->scratch_pt), - I915_CACHE_LLC); + u32 pde; gen8_for_each_pde(pt, pd, start, length, pde) { - if (WARN_ON(!pd->page_table[pde])) - break; + if (!gen8_ppgtt_clear_pt(vm, pt, start, length)) + continue; - if (gen8_ppgtt_clear_pt(vm, pt, start, length)) { - __clear_bit(pde, pd->used_pdes); - pde_vaddr = kmap_atomic_px(pd); - pde_vaddr[pde] = scratch_pde; - kunmap_atomic(pde_vaddr); - free_pt(vm, pt); - } + gen8_ppgtt_set_pde(vm, pd, vm->scratch_pt, pde); + __clear_bit(pde, pd->used_pdes); + + free_pt(vm, pt); } if (bitmap_empty(pd->used_pdes, I915_PDES)) @@ -1124,8 +1102,6 @@ static void gen8_ppgtt_cleanup(struct i915_address_space *vm) * @pd: Page directory for this address range. * @start: Starting virtual address to begin allocations. * @length: Size of the allocations. - * @new_pts: Bitmap set by function with new allocations. Likely used by the - * caller to free on error. * * Allocate the required number of page tables. Extremely similar to * gen8_ppgtt_alloc_page_directories(). The main difference is here we are limited by @@ -1138,37 +1114,30 @@ static void gen8_ppgtt_cleanup(struct i915_address_space *vm) */ static int gen8_ppgtt_alloc_pagetabs(struct i915_address_space *vm, struct i915_page_directory *pd, - uint64_t start, - uint64_t length, - unsigned long *new_pts) + u64 start, u64 length) { struct i915_page_table *pt; + u64 from = start; uint32_t pde; gen8_for_each_pde(pt, pd, start, length, pde) { /* Don't reallocate page tables */ - if (test_bit(pde, pd->used_pdes)) { - /* Scratch is never allocated this way */ - WARN_ON(pt == vm->scratch_pt); - continue; - } - - pt = alloc_pt(vm); - if (IS_ERR(pt)) - goto unwind_out; + if (!test_bit(pde, pd->used_pdes)) { + pt = alloc_pt(vm); + if (IS_ERR(pt)) + goto unwind; - gen8_initialize_pt(vm, pt); - pd->page_table[pde] = pt; - __set_bit(pde, new_pts); + gen8_initialize_pt(vm, pt); + pd->page_table[pde] = pt; + } + pt->used_ptes += gen8_pte_count(start, length); trace_i915_page_table_entry_alloc(vm, pde, start, GEN8_PDE_SHIFT); } return 0; -unwind_out: - for_each_set_bit(pde, new_pts, I915_PDES) - free_pt(vm, pd->page_table[pde]); - +unwind: + gen8_ppgtt_clear_pd(vm, pd, from, start - from); return -ENOMEM; } @@ -1285,9 +1254,8 @@ unwind_out: } static void -free_gen8_temp_bitmaps(unsigned long *new_pds, unsigned long *new_pts) +free_gen8_temp_bitmaps(unsigned long *new_pds) { - kfree(new_pts); kfree(new_pds); } @@ -1296,29 +1264,16 @@ free_gen8_temp_bitmaps(unsigned long *new_pds, unsigned long *new_pts) */ static int __must_check alloc_gen8_temp_bitmaps(unsigned long **new_pds, - unsigned long **new_pts, uint32_t pdpes) { unsigned long *pds; - unsigned long *pts; pds = kcalloc(BITS_TO_LONGS(pdpes), sizeof(unsigned long), GFP_TEMPORARY); if (!pds) return -ENOMEM; - pts = kcalloc(pdpes, BITS_TO_LONGS(I915_PDES) * sizeof(unsigned long), - GFP_TEMPORARY); - if (!pts) - goto err_out; - *new_pds = pds; - *new_pts = pts; - return 0; - -err_out: - free_gen8_temp_bitmaps(pds, pts); - return -ENOMEM; } static int gen8_alloc_va_range_3lvl(struct i915_address_space *vm, @@ -1327,7 +1282,7 @@ static int gen8_alloc_va_range_3lvl(struct i915_address_space *vm, uint64_t length) { struct i915_hw_ppgtt *ppgtt = i915_vm_to_ppgtt(vm); - unsigned long *new_page_dirs, *new_page_tables; + unsigned long *new_page_dirs; struct i915_page_directory *pd; const uint64_t orig_start = start; const uint64_t orig_length = length; @@ -1335,7 +1290,7 @@ static int gen8_alloc_va_range_3lvl(struct i915_address_space *vm, uint32_t pdpes = I915_PDPES_PER_PDP(dev_priv); int ret; - ret = alloc_gen8_temp_bitmaps(&new_page_dirs, &new_page_tables, pdpes); + ret = alloc_gen8_temp_bitmaps(&new_page_dirs, pdpes); if (ret) return ret; @@ -1343,14 +1298,13 @@ static int gen8_alloc_va_range_3lvl(struct i915_address_space *vm, ret = gen8_ppgtt_alloc_page_directories(vm, pdp, start, length, new_page_dirs); if (ret) { - free_gen8_temp_bitmaps(new_page_dirs, new_page_tables); + free_gen8_temp_bitmaps(new_page_dirs); return ret; } /* For every page directory referenced, allocate page tables */ gen8_for_each_pdpe(pd, pdp, start, length, pdpe) { - ret = gen8_ppgtt_alloc_pagetabs(vm, pd, start, length, - new_page_tables + pdpe * BITS_TO_LONGS(I915_PDES)); + ret = gen8_ppgtt_alloc_pagetabs(vm, pd, start, length); if (ret) goto err_out; } @@ -1376,11 +1330,6 @@ static int gen8_alloc_va_range_3lvl(struct i915_address_space *vm, WARN_ON(!pd_len); WARN_ON(!gen8_pte_count(pd_start, pd_len)); - /* Set our used ptes within the page table */ - bitmap_set(pt->used_ptes, - gen8_pte_index(pd_start), - gen8_pte_count(pd_start, pd_len)); - /* Our pde is now pointing to the pagetable, pt */ __set_bit(pde, pd->used_pdes); @@ -1389,8 +1338,7 @@ static int gen8_alloc_va_range_3lvl(struct i915_address_space *vm, I915_CACHE_LLC); trace_i915_page_table_entry_map(&ppgtt->base, pde, pt, gen8_pte_index(start), - gen8_pte_count(start, length), - GEN8_PTES); + gen8_pte_count(start, length)); /* NB: We haven't yet mapped ptes to pages. At this * point we're still relying on insert_entries() */ @@ -1401,23 +1349,15 @@ static int gen8_alloc_va_range_3lvl(struct i915_address_space *vm, gen8_setup_pdpe(ppgtt, pdp, pd, pdpe); } - free_gen8_temp_bitmaps(new_page_dirs, new_page_tables); + free_gen8_temp_bitmaps(new_page_dirs); mark_tlbs_dirty(ppgtt); return 0; err_out: - while (pdpe--) { - unsigned long temp; - - for_each_set_bit(temp, new_page_tables + pdpe * - BITS_TO_LONGS(I915_PDES), I915_PDES) - free_pt(vm, pdp->page_directory[pdpe]->page_table[temp]); - } - for_each_set_bit(pdpe, new_page_dirs, pdpes) free_pd(vm, pdp->page_directory[pdpe]); - free_gen8_temp_bitmaps(new_page_dirs, new_page_tables); + free_gen8_temp_bitmaps(new_page_dirs); mark_tlbs_dirty(ppgtt); return ret; } @@ -1559,14 +1499,14 @@ static void gen8_dump_ppgtt(struct i915_hw_ppgtt *ppgtt, struct seq_file *m) static int gen8_preallocate_top_level_pdps(struct i915_hw_ppgtt *ppgtt) { - unsigned long *new_page_dirs, *new_page_tables; + unsigned long *new_page_dirs; uint32_t pdpes = I915_PDPES_PER_PDP(to_i915(ppgtt->base.dev)); int ret; /* We allocate temp bitmap for page tables for no gain * but as this is for init only, lets keep the things simple */ - ret = alloc_gen8_temp_bitmaps(&new_page_dirs, &new_page_tables, pdpes); + ret = alloc_gen8_temp_bitmaps(&new_page_dirs, pdpes); if (ret) return ret; @@ -1579,7 +1519,7 @@ static int gen8_preallocate_top_level_pdps(struct i915_hw_ppgtt *ppgtt) if (!ret) *ppgtt->pdp.used_pdpes = *new_page_dirs; - free_gen8_temp_bitmaps(new_page_dirs, new_page_tables); + free_gen8_temp_bitmaps(new_page_dirs); return ret; } @@ -1728,16 +1668,15 @@ static void gen6_write_page_range(struct i915_hw_ppgtt *ppgtt, gen6_for_each_pde(pt, &ppgtt->pd, start, length, pde) gen6_write_pde(ppgtt, pde, pt); - wmb(); mark_tlbs_dirty(ppgtt); + wmb(); } -static uint32_t get_pd_offset(struct i915_hw_ppgtt *ppgtt) +static inline uint32_t get_pd_offset(struct i915_hw_ppgtt *ppgtt) { - BUG_ON(ppgtt->pd.base.ggtt_offset & 0x3f); - - return (ppgtt->pd.base.ggtt_offset / 64) << 16; + GEM_BUG_ON(ppgtt->pd.base.ggtt_offset & 0x3f); + return ppgtt->pd.base.ggtt_offset << 10; } static int hsw_mm_switch(struct i915_hw_ppgtt *ppgtt, @@ -1869,35 +1808,36 @@ static void gen6_ppgtt_enable(struct drm_i915_private *dev_priv) /* PPGTT support for Sandybdrige/Gen6 and later */ static void gen6_ppgtt_clear_range(struct i915_address_space *vm, - uint64_t start, - uint64_t length) + u64 start, u64 length) { struct i915_hw_ppgtt *ppgtt = i915_vm_to_ppgtt(vm); - gen6_pte_t *pt_vaddr, scratch_pte; - unsigned first_entry = start >> PAGE_SHIFT; - unsigned num_entries = length >> PAGE_SHIFT; - unsigned act_pt = first_entry / GEN6_PTES; - unsigned first_pte = first_entry % GEN6_PTES; - unsigned last_pte, i; - - scratch_pte = vm->pte_encode(vm->scratch_page.daddr, - I915_CACHE_LLC, 0); + unsigned int first_entry = start >> PAGE_SHIFT; + unsigned int pde = first_entry / GEN6_PTES; + unsigned int pte = first_entry % GEN6_PTES; + unsigned int num_entries = length >> PAGE_SHIFT; + gen6_pte_t scratch_pte = + vm->pte_encode(vm->scratch_page.daddr, I915_CACHE_LLC, 0); while (num_entries) { - last_pte = first_pte + num_entries; - if (last_pte > GEN6_PTES) - last_pte = GEN6_PTES; + struct i915_page_table *pt = ppgtt->pd.page_table[pde++]; + unsigned int end = min(pte + num_entries, GEN6_PTES); + gen6_pte_t *vaddr; - pt_vaddr = kmap_atomic_px(ppgtt->pd.page_table[act_pt]); + num_entries -= end - pte; - for (i = first_pte; i < last_pte; i++) - pt_vaddr[i] = scratch_pte; + /* Note that the hw doesn't support removing PDE on the fly + * (they are cached inside the context with no means to + * invalidate the cache), so we can only reset the PTE + * entries back to scratch. + */ - kunmap_atomic(pt_vaddr); + vaddr = kmap_atomic_px(pt); + do { + vaddr[pte++] = scratch_pte; + } while (pte < end); + kunmap_atomic(vaddr); - num_entries -= last_pte - first_pte; - first_pte = 0; - act_pt++; + pte = 0; } } @@ -1941,89 +1881,37 @@ static void gen6_ppgtt_insert_entries(struct i915_address_space *vm, } static int gen6_alloc_va_range(struct i915_address_space *vm, - uint64_t start_in, uint64_t length_in) + u64 start, u64 length) { - DECLARE_BITMAP(new_page_tables, I915_PDES); - struct drm_i915_private *dev_priv = vm->i915; - struct i915_ggtt *ggtt = &dev_priv->ggtt; struct i915_hw_ppgtt *ppgtt = i915_vm_to_ppgtt(vm); struct i915_page_table *pt; - uint32_t start, length, start_save, length_save; - uint32_t pde; - int ret; - - start = start_save = start_in; - length = length_save = length_in; - - bitmap_zero(new_page_tables, I915_PDES); + u64 from = start; + unsigned int pde; + bool flush = false; - /* The allocation is done in two stages so that we can bail out with - * minimal amount of pain. The first stage finds new page tables that - * need allocation. The second stage marks use ptes within the page - * tables. - */ gen6_for_each_pde(pt, &ppgtt->pd, start, length, pde) { - if (pt != vm->scratch_pt) { - WARN_ON(bitmap_empty(pt->used_ptes, GEN6_PTES)); - continue; - } - - /* We've already allocated a page table */ - WARN_ON(!bitmap_empty(pt->used_ptes, GEN6_PTES)); + if (pt == vm->scratch_pt) { + pt = alloc_pt(vm); + if (IS_ERR(pt)) + goto unwind_out; - pt = alloc_pt(vm); - if (IS_ERR(pt)) { - ret = PTR_ERR(pt); - goto unwind_out; + gen6_initialize_pt(vm, pt); + ppgtt->pd.page_table[pde] = pt; + gen6_write_pde(ppgtt, pde, pt); + flush = true; } - - gen6_initialize_pt(vm, pt); - - ppgtt->pd.page_table[pde] = pt; - __set_bit(pde, new_page_tables); - trace_i915_page_table_entry_alloc(vm, pde, start, GEN6_PDE_SHIFT); } - start = start_save; - length = length_save; - - gen6_for_each_pde(pt, &ppgtt->pd, start, length, pde) { - DECLARE_BITMAP(tmp_bitmap, GEN6_PTES); - - bitmap_zero(tmp_bitmap, GEN6_PTES); - bitmap_set(tmp_bitmap, gen6_pte_index(start), - gen6_pte_count(start, length)); - - if (__test_and_clear_bit(pde, new_page_tables)) - gen6_write_pde(ppgtt, pde, pt); - - trace_i915_page_table_entry_map(vm, pde, pt, - gen6_pte_index(start), - gen6_pte_count(start, length), - GEN6_PTES); - bitmap_or(pt->used_ptes, tmp_bitmap, pt->used_ptes, - GEN6_PTES); + if (flush) { + mark_tlbs_dirty(ppgtt); + wmb(); } - WARN_ON(!bitmap_empty(new_page_tables, I915_PDES)); - - /* Make sure write is complete before other code can use this page - * table. Also require for WC mapped PTEs */ - readl(ggtt->gsm); - - mark_tlbs_dirty(ppgtt); return 0; unwind_out: - for_each_set_bit(pde, new_page_tables, I915_PDES) { - struct i915_page_table *pt = ppgtt->pd.page_table[pde]; - - ppgtt->pd.page_table[pde] = vm->scratch_pt; - free_pt(vm, pt); - } - - mark_tlbs_dirty(ppgtt); - return ret; + gen6_ppgtt_clear_range(vm, from, start); + return -ENOMEM; } static int gen6_init_scratch(struct i915_address_space *vm) diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.h b/drivers/gpu/drm/i915/i915_gem_gtt.h index 6162bedc0811..5ad5b59a01b1 100644 --- a/drivers/gpu/drm/i915/i915_gem_gtt.h +++ b/drivers/gpu/drm/i915/i915_gem_gtt.h @@ -69,7 +69,7 @@ typedef uint64_t gen8_ppgtt_pml4e_t; #define GEN6_PTE_UNCACHED (1 << 1) #define GEN6_PTE_VALID (1 << 0) -#define I915_PTES(pte_len) (PAGE_SIZE / (pte_len)) +#define I915_PTES(pte_len) ((unsigned int)(PAGE_SIZE / (pte_len))) #define I915_PTE_MASK(pte_len) (I915_PTES(pte_len) - 1) #define I915_PDES 512 #define I915_PDE_MASK (I915_PDES - 1) @@ -220,8 +220,7 @@ struct i915_page_dma { struct i915_page_table { struct i915_page_dma base; - - unsigned long *used_ptes; + unsigned int used_ptes; }; struct i915_page_directory { diff --git a/drivers/gpu/drm/i915/i915_trace.h b/drivers/gpu/drm/i915/i915_trace.h index 4461df5a94fe..de31c49781d3 100644 --- a/drivers/gpu/drm/i915/i915_trace.h +++ b/drivers/gpu/drm/i915/i915_trace.h @@ -245,15 +245,14 @@ DEFINE_EVENT_PRINT(i915_px_entry, i915_page_directory_pointer_entry_alloc, DECLARE_EVENT_CLASS(i915_page_table_entry_update, TP_PROTO(struct i915_address_space *vm, u32 pde, - struct i915_page_table *pt, u32 first, u32 count, u32 bits), - TP_ARGS(vm, pde, pt, first, count, bits), + struct i915_page_table *pt, u32 first, u32 count), + TP_ARGS(vm, pde, pt, first, count), TP_STRUCT__entry( __field(struct i915_address_space *, vm) __field(u32, pde) __field(u32, first) __field(u32, last) - __dynamic_array(char, cur_ptes, TRACE_PT_SIZE(bits)) ), TP_fast_assign( @@ -261,22 +260,16 @@ DECLARE_EVENT_CLASS(i915_page_table_entry_update, __entry->pde = pde; __entry->first = first; __entry->last = first + count - 1; - scnprintf(__get_str(cur_ptes), - TRACE_PT_SIZE(bits), - "%*pb", - bits, - pt->used_ptes); ), - TP_printk("vm=%p, pde=%d, updating %u:%u\t%s", - __entry->vm, __entry->pde, __entry->last, __entry->first, - __get_str(cur_ptes)) + TP_printk("vm=%p, pde=%d, updating %u:%u", + __entry->vm, __entry->pde, __entry->last, __entry->first) ); DEFINE_EVENT(i915_page_table_entry_update, i915_page_table_entry_map, TP_PROTO(struct i915_address_space *vm, u32 pde, - struct i915_page_table *pt, u32 first, u32 count, u32 bits), - TP_ARGS(vm, pde, pt, first, count, bits) + struct i915_page_table *pt, u32 first, u32 count), + TP_ARGS(vm, pde, pt, first, count) ); TRACE_EVENT(i915_gem_object_change_domain, -- cgit From fe52e37fa85d8b1404bed1389dde16297dad706a Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Wed, 15 Feb 2017 08:43:47 +0000 Subject: drm/i915: Remove bitmap tracking for used-pdes We only operate on known extents (both for alloc/clear) and so we can use both the knowledge of the bind/unbind range along with the knowledge of the existing pagetable to avoid having to allocate temporary and auxiliary bitmaps. Signed-off-by: Chris Wilson Reviewed-by: Matthew Auld Link: http://patchwork.freedesktop.org/patch/msgid/20170215084357.19977-13-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/i915_gem_gtt.c | 246 ++++++++++++------------------------ drivers/gpu/drm/i915/i915_gem_gtt.h | 6 +- 2 files changed, 84 insertions(+), 168 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c b/drivers/gpu/drm/i915/i915_gem_gtt.c index ae2ff36b275a..f3171d56d914 100644 --- a/drivers/gpu/drm/i915/i915_gem_gtt.c +++ b/drivers/gpu/drm/i915/i915_gem_gtt.c @@ -494,39 +494,25 @@ static void gen6_initialize_pt(struct i915_address_space *vm, static struct i915_page_directory *alloc_pd(struct i915_address_space *vm) { struct i915_page_directory *pd; - int ret = -ENOMEM; - pd = kzalloc(sizeof(*pd), GFP_KERNEL); - if (!pd) + pd = kzalloc(sizeof(*pd), GFP_KERNEL | __GFP_NOWARN); + if (unlikely(!pd)) return ERR_PTR(-ENOMEM); - pd->used_pdes = kcalloc(BITS_TO_LONGS(I915_PDES), - sizeof(*pd->used_pdes), GFP_KERNEL); - if (!pd->used_pdes) - goto fail_bitmap; - - ret = setup_px(vm, pd); - if (ret) - goto fail_page_m; + if (unlikely(setup_px(vm, pd))) { + kfree(pd); + return ERR_PTR(-ENOMEM); + } + pd->used_pdes = 0; return pd; - -fail_page_m: - kfree(pd->used_pdes); -fail_bitmap: - kfree(pd); - - return ERR_PTR(ret); } static void free_pd(struct i915_address_space *vm, struct i915_page_directory *pd) { - if (px_page(pd)) { - cleanup_px(vm, pd); - kfree(pd->used_pdes); - kfree(pd); - } + cleanup_px(vm, pd); + kfree(pd); } static void gen8_initialize_pd(struct i915_address_space *vm, @@ -540,10 +526,11 @@ static void gen8_initialize_pd(struct i915_address_space *vm, pd->page_table[i] = vm->scratch_pt; } -static int __pdp_init(struct drm_i915_private *dev_priv, +static int __pdp_init(struct i915_address_space *vm, struct i915_page_directory_pointer *pdp) { - size_t pdpes = I915_PDPES_PER_PDP(dev_priv); + size_t pdpes = I915_PDPES_PER_PDP(vm->i915); + int i; pdp->used_pdpes = kcalloc(BITS_TO_LONGS(pdpes), sizeof(unsigned long), @@ -551,8 +538,8 @@ static int __pdp_init(struct drm_i915_private *dev_priv, if (!pdp->used_pdpes) return -ENOMEM; - pdp->page_directory = kcalloc(pdpes, sizeof(*pdp->page_directory), - GFP_KERNEL); + pdp->page_directory = kmalloc_array(pdpes, sizeof(*pdp->page_directory), + GFP_KERNEL); if (!pdp->page_directory) { kfree(pdp->used_pdpes); /* the PDP might be the statically allocated top level. Keep it @@ -561,6 +548,9 @@ static int __pdp_init(struct drm_i915_private *dev_priv, return -ENOMEM; } + for (i = 0; i < pdpes; i++) + pdp->page_directory[i] = vm->scratch_pd; + return 0; } @@ -583,7 +573,7 @@ alloc_pdp(struct i915_address_space *vm) if (!pdp) return ERR_PTR(-ENOMEM); - ret = __pdp_init(vm->i915, pdp); + ret = __pdp_init(vm, pdp); if (ret) goto fail_bitmap; @@ -633,25 +623,9 @@ static void gen8_initialize_pml4(struct i915_address_space *vm, } static void -gen8_setup_pdpe(struct i915_hw_ppgtt *ppgtt, - struct i915_page_directory_pointer *pdp, - struct i915_page_directory *pd, - int index) -{ - gen8_ppgtt_pdpe_t *page_directorypo; - - if (!USES_FULL_48BIT_PPGTT(to_i915(ppgtt->base.dev))) - return; - - page_directorypo = kmap_atomic_px(pdp); - page_directorypo[index] = gen8_pdpe_encode(px_dma(pd), I915_CACHE_LLC); - kunmap_atomic(page_directorypo); -} - -static void -gen8_setup_pml4e(struct i915_pml4 *pml4, - struct i915_page_directory_pointer *pdp, - int index) +gen8_ppgtt_set_pml4e(struct i915_pml4 *pml4, + struct i915_page_directory_pointer *pdp, + int index) { gen8_ppgtt_pml4e_t *pagemap = kmap_atomic_px(pml4); @@ -758,9 +732,6 @@ static void gen8_ppgtt_set_pde(struct i915_address_space *vm, kunmap_atomic(vaddr); } -/* Removes entries from a single page dir, releasing it if it's empty. - * Caller can use the return value to update higher-level entries - */ static bool gen8_ppgtt_clear_pd(struct i915_address_space *vm, struct i915_page_directory *pd, u64 start, u64 length) @@ -773,15 +744,28 @@ static bool gen8_ppgtt_clear_pd(struct i915_address_space *vm, continue; gen8_ppgtt_set_pde(vm, pd, vm->scratch_pt, pde); - __clear_bit(pde, pd->used_pdes); + pd->used_pdes--; free_pt(vm, pt); } - if (bitmap_empty(pd->used_pdes, I915_PDES)) - return true; + return !pd->used_pdes; +} - return false; +static void gen8_ppgtt_set_pdpe(struct i915_address_space *vm, + struct i915_page_directory_pointer *pdp, + struct i915_page_directory *pd, + unsigned int pdpe) +{ + gen8_ppgtt_pdpe_t *vaddr; + + pdp->page_directory[pdpe] = pd; + if (!USES_FULL_48BIT_PPGTT(vm->i915)) + return; + + vaddr = kmap_atomic_px(pdp); + vaddr[pdpe] = gen8_pdpe_encode(px_dma(pd), I915_CACHE_LLC); + kunmap_atomic(vaddr); } /* Removes entries from a single page dir pointer, releasing it if it's empty. @@ -789,25 +773,20 @@ static bool gen8_ppgtt_clear_pd(struct i915_address_space *vm, */ static bool gen8_ppgtt_clear_pdp(struct i915_address_space *vm, struct i915_page_directory_pointer *pdp, - uint64_t start, - uint64_t length) + u64 start, u64 length) { - struct i915_hw_ppgtt *ppgtt = i915_vm_to_ppgtt(vm); struct i915_page_directory *pd; - uint64_t pdpe; + unsigned int pdpe; gen8_for_each_pdpe(pd, pdp, start, length, pdpe) { - if (WARN_ON(!pdp->page_directory[pdpe])) - break; + if (!gen8_ppgtt_clear_pd(vm, pd, start, length)) + continue; - if (gen8_ppgtt_clear_pd(vm, pd, start, length)) { - __clear_bit(pdpe, pdp->used_pdpes); - gen8_setup_pdpe(ppgtt, pdp, vm->scratch_pd, pdpe); - free_pd(vm, pd); - } - } + gen8_ppgtt_set_pdpe(vm, pdp, vm->scratch_pd, pdpe); + __clear_bit(pdpe, pdp->used_pdpes); - mark_tlbs_dirty(ppgtt); + free_pd(vm, pd); + } if (bitmap_empty(pdp->used_pdpes, I915_PDPES_PER_PDP(dev_priv))) return true; @@ -815,15 +794,21 @@ static bool gen8_ppgtt_clear_pdp(struct i915_address_space *vm, return false; } +static void gen8_ppgtt_clear_3lvl(struct i915_address_space *vm, + u64 start, u64 length) +{ + gen8_ppgtt_clear_pdp(vm, &i915_vm_to_ppgtt(vm)->pdp, start, length); +} + /* Removes entries from a single pml4. * This is the top-level structure in 4-level page tables used on gen8+. * Empty entries are always scratch pml4e. */ -static void gen8_ppgtt_clear_pml4(struct i915_address_space *vm, - struct i915_pml4 *pml4, - uint64_t start, - uint64_t length) +static void gen8_ppgtt_clear_4lvl(struct i915_address_space *vm, + u64 start, u64 length) { + struct i915_hw_ppgtt *ppgtt = i915_vm_to_ppgtt(vm); + struct i915_pml4 *pml4 = &ppgtt->pml4; struct i915_page_directory_pointer *pdp; uint64_t pml4e; @@ -835,23 +820,12 @@ static void gen8_ppgtt_clear_pml4(struct i915_address_space *vm, if (gen8_ppgtt_clear_pdp(vm, pdp, start, length)) { __clear_bit(pml4e, pml4->used_pml4es); - gen8_setup_pml4e(pml4, vm->scratch_pdp, pml4e); + gen8_ppgtt_set_pml4e(pml4, vm->scratch_pdp, pml4e); free_pdp(vm, pdp); } } } -static void gen8_ppgtt_clear_range(struct i915_address_space *vm, - uint64_t start, uint64_t length) -{ - struct i915_hw_ppgtt *ppgtt = i915_vm_to_ppgtt(vm); - - if (USES_FULL_48BIT_PPGTT(vm->i915)) - gen8_ppgtt_clear_pml4(vm, &ppgtt->pml4, start, length); - else - gen8_ppgtt_clear_pdp(vm, &ppgtt->pdp, start, length); -} - struct sgt_dma { struct scatterlist *sg; dma_addr_t dma, max; @@ -956,12 +930,9 @@ static void gen8_free_page_tables(struct i915_address_space *vm, if (!px_page(pd)) return; - for_each_set_bit(i, pd->used_pdes, I915_PDES) { - if (WARN_ON(!pd->page_table[i])) - continue; - - free_pt(vm, pd->page_table[i]); - pd->page_table[i] = NULL; + for (i = 0; i < I915_PDES; i++) { + if (pd->page_table[i] != vm->scratch_pt) + free_pt(vm, pd->page_table[i]); } } @@ -1056,7 +1027,7 @@ static void gen8_ppgtt_cleanup_3lvl(struct i915_address_space *vm, int i; for_each_set_bit(i, pdp->used_pdpes, I915_PDPES_PER_PDP(vm->i915)) { - if (WARN_ON(!pdp->page_directory[i])) + if (pdp->page_directory[i] == vm->scratch_pd) continue; gen8_free_page_tables(vm, pdp->page_directory[i]); @@ -1096,44 +1067,28 @@ static void gen8_ppgtt_cleanup(struct i915_address_space *vm) gen8_free_scratch(vm); } -/** - * gen8_ppgtt_alloc_pagetabs() - Allocate page tables for VA range. - * @vm: Master vm structure. - * @pd: Page directory for this address range. - * @start: Starting virtual address to begin allocations. - * @length: Size of the allocations. - * - * Allocate the required number of page tables. Extremely similar to - * gen8_ppgtt_alloc_page_directories(). The main difference is here we are limited by - * the page directory boundary (instead of the page directory pointer). That - * boundary is 1GB virtual. Therefore, unlike gen8_ppgtt_alloc_page_directories(), it is - * possible, and likely that the caller will need to use multiple calls of this - * function to achieve the appropriate allocation. - * - * Return: 0 if success; negative error code otherwise. - */ -static int gen8_ppgtt_alloc_pagetabs(struct i915_address_space *vm, - struct i915_page_directory *pd, - u64 start, u64 length) +static int gen8_ppgtt_alloc_pd(struct i915_address_space *vm, + struct i915_page_directory *pd, + u64 start, u64 length) { struct i915_page_table *pt; u64 from = start; - uint32_t pde; + unsigned int pde; gen8_for_each_pde(pt, pd, start, length, pde) { - /* Don't reallocate page tables */ - if (!test_bit(pde, pd->used_pdes)) { + if (pt == vm->scratch_pt) { pt = alloc_pt(vm); if (IS_ERR(pt)) goto unwind; gen8_initialize_pt(vm, pt); - pd->page_table[pde] = pt; + + gen8_ppgtt_set_pde(vm, pd, pt, pde); + pd->used_pdes++; } + pt->used_ptes += gen8_pte_count(start, length); - trace_i915_page_table_entry_alloc(vm, pde, start, GEN8_PDE_SHIFT); } - return 0; unwind: @@ -1210,7 +1165,7 @@ unwind_out: * caller to free on error. * * Allocate the required number of page directory pointers. Extremely similar to - * gen8_ppgtt_alloc_page_directories() and gen8_ppgtt_alloc_pagetabs(). + * gen8_ppgtt_alloc_page_directories() and gen8_ppgtt_alloc_pd(). * The main difference is here we are limited by the pml4 boundary (instead of * the page directory pointer). * @@ -1262,9 +1217,8 @@ free_gen8_temp_bitmaps(unsigned long *new_pds) /* Fills in the page directory bitmap, and the array of page tables bitmap. Both * of these are based on the number of PDPEs in the system. */ -static -int __must_check alloc_gen8_temp_bitmaps(unsigned long **new_pds, - uint32_t pdpes) +static int __must_check +alloc_gen8_temp_bitmaps(unsigned long **new_pds, uint32_t pdpes) { unsigned long *pds; @@ -1284,8 +1238,6 @@ static int gen8_alloc_va_range_3lvl(struct i915_address_space *vm, struct i915_hw_ppgtt *ppgtt = i915_vm_to_ppgtt(vm); unsigned long *new_page_dirs; struct i915_page_directory *pd; - const uint64_t orig_start = start; - const uint64_t orig_length = length; uint32_t pdpe; uint32_t pdpes = I915_PDPES_PER_PDP(dev_priv); int ret; @@ -1304,51 +1256,16 @@ static int gen8_alloc_va_range_3lvl(struct i915_address_space *vm, /* For every page directory referenced, allocate page tables */ gen8_for_each_pdpe(pd, pdp, start, length, pdpe) { - ret = gen8_ppgtt_alloc_pagetabs(vm, pd, start, length); + ret = gen8_ppgtt_alloc_pd(vm, pd, start, length); if (ret) goto err_out; - } - start = orig_start; - length = orig_length; + if (test_and_set_bit(pdpe, pdp->used_pdpes)) + gen8_ppgtt_set_pdpe(vm, pdp, pd, pdpe); + } /* Allocations have completed successfully, so set the bitmaps, and do * the mappings. */ - gen8_for_each_pdpe(pd, pdp, start, length, pdpe) { - gen8_pde_t *const page_directory = kmap_atomic_px(pd); - struct i915_page_table *pt; - uint64_t pd_len = length; - uint64_t pd_start = start; - uint32_t pde; - - /* Every pd should be allocated, we just did that above. */ - WARN_ON(!pd); - - gen8_for_each_pde(pt, pd, pd_start, pd_len, pde) { - /* Same reasoning as pd */ - WARN_ON(!pt); - WARN_ON(!pd_len); - WARN_ON(!gen8_pte_count(pd_start, pd_len)); - - /* Our pde is now pointing to the pagetable, pt */ - __set_bit(pde, pd->used_pdes); - - /* Map the PDE to the page table */ - page_directory[pde] = gen8_pde_encode(px_dma(pt), - I915_CACHE_LLC); - trace_i915_page_table_entry_map(&ppgtt->base, pde, pt, - gen8_pte_index(start), - gen8_pte_count(start, length)); - - /* NB: We haven't yet mapped ptes to pages. At this - * point we're still relying on insert_entries() */ - } - - kunmap_atomic(page_directory); - __set_bit(pdpe, pdp->used_pdpes); - gen8_setup_pdpe(ppgtt, pdp, pd, pdpe); - } - free_gen8_temp_bitmaps(new_page_dirs); mark_tlbs_dirty(ppgtt); return 0; @@ -1391,7 +1308,7 @@ static int gen8_alloc_va_range_4lvl(struct i915_address_space *vm, if (ret) goto err_out; - gen8_setup_pml4e(pml4, pdp, pml4e); + gen8_ppgtt_set_pml4e(pml4, pdp, pml4e); } bitmap_or(pml4->used_pml4es, new_pdps, pml4->used_pml4es, @@ -1440,7 +1357,7 @@ static void gen8_dump_pdp(struct i915_hw_ppgtt *ppgtt, uint32_t pte; gen8_pte_t *pt_vaddr; - if (!test_bit(pde, pd->used_pdes)) + if (pd->page_table[pde] == ppgtt->base.scratch_pt) continue; pt_vaddr = kmap_atomic_px(pt); @@ -1543,7 +1460,6 @@ static int gen8_ppgtt_init(struct i915_hw_ppgtt *ppgtt) ppgtt->base.start = 0; ppgtt->base.cleanup = gen8_ppgtt_cleanup; ppgtt->base.allocate_va_range = gen8_alloc_va_range; - ppgtt->base.clear_range = gen8_ppgtt_clear_range; ppgtt->base.unbind_vma = ppgtt_unbind_vma; ppgtt->base.bind_vma = ppgtt_bind_vma; ppgtt->debug_dump = gen8_dump_ppgtt; @@ -1565,8 +1481,9 @@ static int gen8_ppgtt_init(struct i915_hw_ppgtt *ppgtt) ppgtt->switch_mm = gen8_48b_mm_switch; ppgtt->base.insert_entries = gen8_ppgtt_insert_4lvl; + ppgtt->base.clear_range = gen8_ppgtt_clear_4lvl; } else { - ret = __pdp_init(dev_priv, &ppgtt->pdp); + ret = __pdp_init(&ppgtt->base, &ppgtt->pdp); if (ret) goto free_scratch; @@ -1583,6 +1500,7 @@ static int gen8_ppgtt_init(struct i915_hw_ppgtt *ppgtt) } ppgtt->base.insert_entries = gen8_ppgtt_insert_3lvl; + ppgtt->base.clear_range = gen8_ppgtt_clear_3lvl; } if (intel_vgpu_active(dev_priv)) diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.h b/drivers/gpu/drm/i915/i915_gem_gtt.h index 5ad5b59a01b1..a62b0ef1f3fc 100644 --- a/drivers/gpu/drm/i915/i915_gem_gtt.h +++ b/drivers/gpu/drm/i915/i915_gem_gtt.h @@ -226,8 +226,8 @@ struct i915_page_table { struct i915_page_directory { struct i915_page_dma base; - unsigned long *used_pdes; struct i915_page_table *page_table[I915_PDES]; /* PDEs */ + unsigned int used_pdes; }; struct i915_page_directory_pointer { @@ -520,9 +520,7 @@ static inline size_t gen8_pte_count(uint64_t address, uint64_t length) static inline dma_addr_t i915_page_dir_dma_addr(const struct i915_hw_ppgtt *ppgtt, const unsigned n) { - return test_bit(n, ppgtt->pdp.used_pdpes) ? - px_dma(ppgtt->pdp.page_directory[n]) : - px_dma(ppgtt->base.scratch_pd); + return px_dma(ppgtt->pdp.page_directory[n]); } static inline struct i915_ggtt * -- cgit From e2b763caa6eb68ea56918ee6f79b40b82bdcf7c9 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Wed, 15 Feb 2017 08:43:48 +0000 Subject: drm/i915: Remove bitmap tracking for used-pdpes We only operate on known extents (both for alloc/clear) and so we can use both the knowledge of the bind/unbind range along with the knowledge of the existing pagetable to avoid having to allocate temporary and auxiliary bitmaps. Signed-off-by: Chris Wilson Reviewed-by: Matthew Auld Link: http://patchwork.freedesktop.org/patch/msgid/20170215084357.19977-14-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/i915_gem_gtt.c | 279 +++++++++++------------------------- drivers/gpu/drm/i915/i915_gem_gtt.h | 3 +- 2 files changed, 86 insertions(+), 196 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c b/drivers/gpu/drm/i915/i915_gem_gtt.c index f3171d56d914..46a791b29ae5 100644 --- a/drivers/gpu/drm/i915/i915_gem_gtt.c +++ b/drivers/gpu/drm/i915/i915_gem_gtt.c @@ -529,24 +529,13 @@ static void gen8_initialize_pd(struct i915_address_space *vm, static int __pdp_init(struct i915_address_space *vm, struct i915_page_directory_pointer *pdp) { - size_t pdpes = I915_PDPES_PER_PDP(vm->i915); - int i; - - pdp->used_pdpes = kcalloc(BITS_TO_LONGS(pdpes), - sizeof(unsigned long), - GFP_KERNEL); - if (!pdp->used_pdpes) - return -ENOMEM; + const unsigned int pdpes = I915_PDPES_PER_PDP(vm->i915); + unsigned int i; pdp->page_directory = kmalloc_array(pdpes, sizeof(*pdp->page_directory), - GFP_KERNEL); - if (!pdp->page_directory) { - kfree(pdp->used_pdpes); - /* the PDP might be the statically allocated top level. Keep it - * as clean as possible */ - pdp->used_pdpes = NULL; + GFP_KERNEL | __GFP_NOWARN); + if (unlikely(!pdp->page_directory)) return -ENOMEM; - } for (i = 0; i < pdpes; i++) pdp->page_directory[i] = vm->scratch_pd; @@ -556,7 +545,6 @@ static int __pdp_init(struct i915_address_space *vm, static void __pdp_fini(struct i915_page_directory_pointer *pdp) { - kfree(pdp->used_pdpes); kfree(pdp->page_directory); pdp->page_directory = NULL; } @@ -614,23 +602,12 @@ static void gen8_initialize_pdp(struct i915_address_space *vm, static void gen8_initialize_pml4(struct i915_address_space *vm, struct i915_pml4 *pml4) { - gen8_ppgtt_pml4e_t scratch_pml4e; - - scratch_pml4e = gen8_pml4e_encode(px_dma(vm->scratch_pdp), - I915_CACHE_LLC); - - fill_px(vm, pml4, scratch_pml4e); -} - -static void -gen8_ppgtt_set_pml4e(struct i915_pml4 *pml4, - struct i915_page_directory_pointer *pdp, - int index) -{ - gen8_ppgtt_pml4e_t *pagemap = kmap_atomic_px(pml4); + unsigned int i; - pagemap[index] = gen8_pml4e_encode(px_dma(pdp), I915_CACHE_LLC); - kunmap_atomic(pagemap); + fill_px(vm, pml4, + gen8_pml4e_encode(px_dma(vm->scratch_pdp), I915_CACHE_LLC)); + for (i = 0; i < GEN8_PML4ES_PER_PML4; i++) + pml4->pdps[i] = vm->scratch_pdp; } /* Broadwell Page Directory Pointer Descriptors */ @@ -783,15 +760,12 @@ static bool gen8_ppgtt_clear_pdp(struct i915_address_space *vm, continue; gen8_ppgtt_set_pdpe(vm, pdp, vm->scratch_pd, pdpe); - __clear_bit(pdpe, pdp->used_pdpes); + pdp->used_pdpes--; free_pd(vm, pd); } - if (bitmap_empty(pdp->used_pdpes, I915_PDPES_PER_PDP(dev_priv))) - return true; - - return false; + return !pdp->used_pdpes; } static void gen8_ppgtt_clear_3lvl(struct i915_address_space *vm, @@ -800,6 +774,19 @@ static void gen8_ppgtt_clear_3lvl(struct i915_address_space *vm, gen8_ppgtt_clear_pdp(vm, &i915_vm_to_ppgtt(vm)->pdp, start, length); } +static void gen8_ppgtt_set_pml4e(struct i915_pml4 *pml4, + struct i915_page_directory_pointer *pdp, + unsigned int pml4e) +{ + gen8_ppgtt_pml4e_t *vaddr; + + pml4->pdps[pml4e] = pdp; + + vaddr = kmap_atomic_px(pml4); + vaddr[pml4e] = gen8_pml4e_encode(px_dma(pdp), I915_CACHE_LLC); + kunmap_atomic(vaddr); +} + /* Removes entries from a single pml4. * This is the top-level structure in 4-level page tables used on gen8+. * Empty entries are always scratch pml4e. @@ -810,19 +797,18 @@ static void gen8_ppgtt_clear_4lvl(struct i915_address_space *vm, struct i915_hw_ppgtt *ppgtt = i915_vm_to_ppgtt(vm); struct i915_pml4 *pml4 = &ppgtt->pml4; struct i915_page_directory_pointer *pdp; - uint64_t pml4e; + unsigned int pml4e; GEM_BUG_ON(!USES_FULL_48BIT_PPGTT(vm->i915)); gen8_for_each_pml4e(pdp, pml4, start, length, pml4e) { - if (WARN_ON(!pml4->pdps[pml4e])) - break; + if (!gen8_ppgtt_clear_pdp(vm, pdp, start, length)) + continue; - if (gen8_ppgtt_clear_pdp(vm, pdp, start, length)) { - __clear_bit(pml4e, pml4->used_pml4es); - gen8_ppgtt_set_pml4e(pml4, vm->scratch_pdp, pml4e); - free_pdp(vm, pdp); - } + gen8_ppgtt_set_pml4e(pml4, vm->scratch_pdp, pml4e); + __clear_bit(pml4e, pml4->used_pml4es); + + free_pdp(vm, pdp); } } @@ -1026,7 +1012,7 @@ static void gen8_ppgtt_cleanup_3lvl(struct i915_address_space *vm, { int i; - for_each_set_bit(i, pdp->used_pdpes, I915_PDPES_PER_PDP(vm->i915)) { + for (i = 0; i < I915_PDPES_PER_PDP(vm->i915); i++) { if (pdp->page_directory[i] == vm->scratch_pd) continue; @@ -1096,65 +1082,6 @@ unwind: return -ENOMEM; } -/** - * gen8_ppgtt_alloc_page_directories() - Allocate page directories for VA range. - * @vm: Master vm structure. - * @pdp: Page directory pointer for this address range. - * @start: Starting virtual address to begin allocations. - * @length: Size of the allocations. - * @new_pds: Bitmap set by function with new allocations. Likely used by the - * caller to free on error. - * - * Allocate the required number of page directories starting at the pde index of - * @start, and ending at the pde index @start + @length. This function will skip - * over already allocated page directories within the range, and only allocate - * new ones, setting the appropriate pointer within the pdp as well as the - * correct position in the bitmap @new_pds. - * - * The function will only allocate the pages within the range for a give page - * directory pointer. In other words, if @start + @length straddles a virtually - * addressed PDP boundary (512GB for 4k pages), there will be more allocations - * required by the caller, This is not currently possible, and the BUG in the - * code will prevent it. - * - * Return: 0 if success; negative error code otherwise. - */ -static int -gen8_ppgtt_alloc_page_directories(struct i915_address_space *vm, - struct i915_page_directory_pointer *pdp, - uint64_t start, - uint64_t length, - unsigned long *new_pds) -{ - struct i915_page_directory *pd; - uint32_t pdpe; - uint32_t pdpes = I915_PDPES_PER_PDP(dev_priv); - - WARN_ON(!bitmap_empty(new_pds, pdpes)); - - gen8_for_each_pdpe(pd, pdp, start, length, pdpe) { - if (test_bit(pdpe, pdp->used_pdpes)) - continue; - - pd = alloc_pd(vm); - if (IS_ERR(pd)) - goto unwind_out; - - gen8_initialize_pd(vm, pd); - pdp->page_directory[pdpe] = pd; - __set_bit(pdpe, new_pds); - trace_i915_page_directory_entry_alloc(vm, pdpe, start, GEN8_PDPE_SHIFT); - } - - return 0; - -unwind_out: - for_each_set_bit(pdpe, new_pds, pdpes) - free_pd(vm, pdp->page_directory[pdpe]); - - return -ENOMEM; -} - /** * gen8_ppgtt_alloc_page_dirpointers() - Allocate pdps for VA range. * @vm: Master vm structure. @@ -1175,23 +1102,19 @@ static int gen8_ppgtt_alloc_page_dirpointers(struct i915_address_space *vm, struct i915_pml4 *pml4, uint64_t start, - uint64_t length, - unsigned long *new_pdps) + uint64_t length) { struct i915_page_directory_pointer *pdp; uint32_t pml4e; - WARN_ON(!bitmap_empty(new_pdps, GEN8_PML4ES_PER_PML4)); - gen8_for_each_pml4e(pdp, pml4, start, length, pml4e) { if (!test_bit(pml4e, pml4->used_pml4es)) { pdp = alloc_pdp(vm); if (IS_ERR(pdp)) - goto unwind_out; + return PTR_ERR(pdp); gen8_initialize_pdp(vm, pdp); pml4->pdps[pml4e] = pdp; - __set_bit(pml4e, new_pdps); trace_i915_page_directory_pointer_entry_alloc(vm, pml4e, start, @@ -1200,34 +1123,6 @@ gen8_ppgtt_alloc_page_dirpointers(struct i915_address_space *vm, } return 0; - -unwind_out: - for_each_set_bit(pml4e, new_pdps, GEN8_PML4ES_PER_PML4) - free_pdp(vm, pml4->pdps[pml4e]); - - return -ENOMEM; -} - -static void -free_gen8_temp_bitmaps(unsigned long *new_pds) -{ - kfree(new_pds); -} - -/* Fills in the page directory bitmap, and the array of page tables bitmap. Both - * of these are based on the number of PDPEs in the system. - */ -static int __must_check -alloc_gen8_temp_bitmaps(unsigned long **new_pds, uint32_t pdpes) -{ - unsigned long *pds; - - pds = kcalloc(BITS_TO_LONGS(pdpes), sizeof(unsigned long), GFP_TEMPORARY); - if (!pds) - return -ENOMEM; - - *new_pds = pds; - return 0; } static int gen8_alloc_va_range_3lvl(struct i915_address_space *vm, @@ -1236,47 +1131,37 @@ static int gen8_alloc_va_range_3lvl(struct i915_address_space *vm, uint64_t length) { struct i915_hw_ppgtt *ppgtt = i915_vm_to_ppgtt(vm); - unsigned long *new_page_dirs; struct i915_page_directory *pd; - uint32_t pdpe; - uint32_t pdpes = I915_PDPES_PER_PDP(dev_priv); + u64 from = start; + unsigned int pdpe; int ret; - ret = alloc_gen8_temp_bitmaps(&new_page_dirs, pdpes); - if (ret) - return ret; - - /* Do the allocations first so we can easily bail out */ - ret = gen8_ppgtt_alloc_page_directories(vm, pdp, start, length, - new_page_dirs); - if (ret) { - free_gen8_temp_bitmaps(new_page_dirs); - return ret; - } - - /* For every page directory referenced, allocate page tables */ gen8_for_each_pdpe(pd, pdp, start, length, pdpe) { - ret = gen8_ppgtt_alloc_pd(vm, pd, start, length); - if (ret) - goto err_out; + if (pd == vm->scratch_pd) { + pd = alloc_pd(vm); + if (IS_ERR(pd)) + goto unwind; - if (test_and_set_bit(pdpe, pdp->used_pdpes)) + gen8_initialize_pd(vm, pd); gen8_ppgtt_set_pdpe(vm, pdp, pd, pdpe); + pdp->used_pdpes++; + } + + ret = gen8_ppgtt_alloc_pd(vm, pd, start, length); + if (unlikely(ret)) { + gen8_ppgtt_set_pdpe(vm, pdp, vm->scratch_pd, pdpe); + pdp->used_pdpes--; + free_pd(vm, pd); + goto unwind; + } } - /* Allocations have completed successfully, so set the bitmaps, and do - * the mappings. */ - free_gen8_temp_bitmaps(new_page_dirs); mark_tlbs_dirty(ppgtt); return 0; -err_out: - for_each_set_bit(pdpe, new_page_dirs, pdpes) - free_pd(vm, pdp->page_directory[pdpe]); - - free_gen8_temp_bitmaps(new_page_dirs); - mark_tlbs_dirty(ppgtt); - return ret; +unwind: + gen8_ppgtt_clear_pdp(vm, pdp, from, start - from); + return -ENOMEM; } static int gen8_alloc_va_range_4lvl(struct i915_address_space *vm, @@ -1296,8 +1181,7 @@ static int gen8_alloc_va_range_4lvl(struct i915_address_space *vm, /* The pagedirectory and pagetable allocations are done in the shared 3 * and 4 level code. Just allocate the pdps. */ - ret = gen8_ppgtt_alloc_page_dirpointers(vm, pml4, start, length, - new_pdps); + ret = gen8_ppgtt_alloc_page_dirpointers(vm, pml4, start, length); if (ret) return ret; @@ -1349,7 +1233,7 @@ static void gen8_dump_pdp(struct i915_hw_ppgtt *ppgtt, uint64_t pd_start = start; uint32_t pde; - if (!test_bit(pdpe, pdp->used_pdpes)) + if (pdp->page_directory[pdpe] == ppgtt->base.scratch_pd) continue; seq_printf(m, "\tPDPE #%d\n", pdpe); @@ -1414,31 +1298,36 @@ static void gen8_dump_ppgtt(struct i915_hw_ppgtt *ppgtt, struct seq_file *m) } } -static int gen8_preallocate_top_level_pdps(struct i915_hw_ppgtt *ppgtt) +static int gen8_preallocate_top_level_pdp(struct i915_hw_ppgtt *ppgtt) { - unsigned long *new_page_dirs; - uint32_t pdpes = I915_PDPES_PER_PDP(to_i915(ppgtt->base.dev)); - int ret; + struct i915_address_space *vm = &ppgtt->base; + struct i915_page_directory_pointer *pdp = &ppgtt->pdp; + struct i915_page_directory *pd; + u64 start = 0, length = ppgtt->base.total; + u64 from = start; + unsigned int pdpe; - /* We allocate temp bitmap for page tables for no gain - * but as this is for init only, lets keep the things simple - */ - ret = alloc_gen8_temp_bitmaps(&new_page_dirs, pdpes); - if (ret) - return ret; + gen8_for_each_pdpe(pd, pdp, start, length, pdpe) { + pd = alloc_pd(vm); + if (IS_ERR(pd)) + goto unwind; - /* Allocate for all pdps regardless of how the ppgtt - * was defined. - */ - ret = gen8_ppgtt_alloc_page_directories(&ppgtt->base, &ppgtt->pdp, - 0, 1ULL << 32, - new_page_dirs); - if (!ret) - *ppgtt->pdp.used_pdpes = *new_page_dirs; + gen8_initialize_pd(vm, pd); + gen8_ppgtt_set_pdpe(vm, pdp, pd, pdpe); + pdp->used_pdpes++; + } - free_gen8_temp_bitmaps(new_page_dirs); + pdp->used_pdpes++; /* never remove */ + return 0; - return ret; +unwind: + start -= from; + gen8_for_each_pdpe(pd, pdp, from, start, pdpe) { + gen8_ppgtt_set_pdpe(vm, pdp, vm->scratch_pd, pdpe); + free_pd(vm, pd); + } + pdp->used_pdpes = 0; + return -ENOMEM; } /* @@ -1494,9 +1383,11 @@ static int gen8_ppgtt_init(struct i915_hw_ppgtt *ppgtt) GEN8_PML4E_SHIFT); if (intel_vgpu_active(dev_priv)) { - ret = gen8_preallocate_top_level_pdps(ppgtt); - if (ret) + ret = gen8_preallocate_top_level_pdp(ppgtt); + if (ret) { + __pdp_fini(&ppgtt->pdp); goto free_scratch; + } } ppgtt->base.insert_entries = gen8_ppgtt_insert_3lvl; diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.h b/drivers/gpu/drm/i915/i915_gem_gtt.h index a62b0ef1f3fc..3628c7c564ae 100644 --- a/drivers/gpu/drm/i915/i915_gem_gtt.h +++ b/drivers/gpu/drm/i915/i915_gem_gtt.h @@ -232,9 +232,8 @@ struct i915_page_directory { struct i915_page_directory_pointer { struct i915_page_dma base; - - unsigned long *used_pdpes; struct i915_page_directory **page_directory; + unsigned int used_pdpes; }; struct i915_pml4 { -- cgit From c5d092a4293fa33559d5464c5bd58a2e3a44d6d7 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Wed, 15 Feb 2017 08:43:49 +0000 Subject: drm/i915: Remove bitmap tracking for used-pml4 We only operate on known extents (both for alloc/clear) and so we can use both the knowledge of the bind/unbind range along with the knowledge of the existing pagetable to avoid having to allocate temporary and auxiliary bitmaps. Signed-off-by: Chris Wilson Reviewed-by: Matthew Auld Link: http://patchwork.freedesktop.org/patch/msgid/20170215084357.19977-15-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/i915_gem_gtt.c | 132 +++++++++++------------------------- drivers/gpu/drm/i915/i915_gem_gtt.h | 2 - 2 files changed, 38 insertions(+), 96 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c b/drivers/gpu/drm/i915/i915_gem_gtt.c index 46a791b29ae5..76ec3fa0a67a 100644 --- a/drivers/gpu/drm/i915/i915_gem_gtt.c +++ b/drivers/gpu/drm/i915/i915_gem_gtt.c @@ -806,7 +806,6 @@ static void gen8_ppgtt_clear_4lvl(struct i915_address_space *vm, continue; gen8_ppgtt_set_pml4e(pml4, vm->scratch_pdp, pml4e); - __clear_bit(pml4e, pml4->used_pml4es); free_pdp(vm, pdp); } @@ -1027,8 +1026,8 @@ static void gen8_ppgtt_cleanup_4lvl(struct i915_hw_ppgtt *ppgtt) { int i; - for_each_set_bit(i, ppgtt->pml4.used_pml4es, GEN8_PML4ES_PER_PML4) { - if (WARN_ON(!ppgtt->pml4.pdps[i])) + for (i = 0; i < GEN8_PML4ES_PER_PML4; i++) { + if (ppgtt->pml4.pdps[i] == ppgtt->base.scratch_pdp) continue; gen8_ppgtt_cleanup_3lvl(&ppgtt->base, ppgtt->pml4.pdps[i]); @@ -1082,53 +1081,9 @@ unwind: return -ENOMEM; } -/** - * gen8_ppgtt_alloc_page_dirpointers() - Allocate pdps for VA range. - * @vm: Master vm structure. - * @pml4: Page map level 4 for this address range. - * @start: Starting virtual address to begin allocations. - * @length: Size of the allocations. - * @new_pdps: Bitmap set by function with new allocations. Likely used by the - * caller to free on error. - * - * Allocate the required number of page directory pointers. Extremely similar to - * gen8_ppgtt_alloc_page_directories() and gen8_ppgtt_alloc_pd(). - * The main difference is here we are limited by the pml4 boundary (instead of - * the page directory pointer). - * - * Return: 0 if success; negative error code otherwise. - */ -static int -gen8_ppgtt_alloc_page_dirpointers(struct i915_address_space *vm, - struct i915_pml4 *pml4, - uint64_t start, - uint64_t length) -{ - struct i915_page_directory_pointer *pdp; - uint32_t pml4e; - - gen8_for_each_pml4e(pdp, pml4, start, length, pml4e) { - if (!test_bit(pml4e, pml4->used_pml4es)) { - pdp = alloc_pdp(vm); - if (IS_ERR(pdp)) - return PTR_ERR(pdp); - - gen8_initialize_pdp(vm, pdp); - pml4->pdps[pml4e] = pdp; - trace_i915_page_directory_pointer_entry_alloc(vm, - pml4e, - start, - GEN8_PML4E_SHIFT); - } - } - - return 0; -} - -static int gen8_alloc_va_range_3lvl(struct i915_address_space *vm, - struct i915_page_directory_pointer *pdp, - uint64_t start, - uint64_t length) +static int gen8_ppgtt_alloc_pdp(struct i915_address_space *vm, + struct i915_page_directory_pointer *pdp, + u64 start, u64 length) { struct i915_hw_ppgtt *ppgtt = i915_vm_to_ppgtt(vm); struct i915_page_directory *pd; @@ -1164,58 +1119,46 @@ unwind: return -ENOMEM; } -static int gen8_alloc_va_range_4lvl(struct i915_address_space *vm, - struct i915_pml4 *pml4, - uint64_t start, - uint64_t length) +static int gen8_ppgtt_alloc_3lvl(struct i915_address_space *vm, + u64 start, u64 length) { - DECLARE_BITMAP(new_pdps, GEN8_PML4ES_PER_PML4); - struct i915_page_directory_pointer *pdp; - uint64_t pml4e; - int ret = 0; - - /* Do the pml4 allocations first, so we don't need to track the newly - * allocated tables below the pdp */ - bitmap_zero(new_pdps, GEN8_PML4ES_PER_PML4); + return gen8_ppgtt_alloc_pdp(vm, + &i915_vm_to_ppgtt(vm)->pdp, start, length); +} - /* The pagedirectory and pagetable allocations are done in the shared 3 - * and 4 level code. Just allocate the pdps. - */ - ret = gen8_ppgtt_alloc_page_dirpointers(vm, pml4, start, length); - if (ret) - return ret; +static int gen8_ppgtt_alloc_4lvl(struct i915_address_space *vm, + u64 start, u64 length) +{ + struct i915_hw_ppgtt *ppgtt = i915_vm_to_ppgtt(vm); + struct i915_pml4 *pml4 = &ppgtt->pml4; + struct i915_page_directory_pointer *pdp; + u64 from = start; + u32 pml4e; + int ret; gen8_for_each_pml4e(pdp, pml4, start, length, pml4e) { - WARN_ON(!pdp); + if (pml4->pdps[pml4e] == vm->scratch_pdp) { + pdp = alloc_pdp(vm); + if (IS_ERR(pdp)) + goto unwind; - ret = gen8_alloc_va_range_3lvl(vm, pdp, start, length); - if (ret) - goto err_out; + gen8_initialize_pdp(vm, pdp); + gen8_ppgtt_set_pml4e(pml4, pdp, pml4e); + } - gen8_ppgtt_set_pml4e(pml4, pdp, pml4e); + ret = gen8_ppgtt_alloc_pdp(vm, pdp, start, length); + if (unlikely(ret)) { + gen8_ppgtt_set_pml4e(pml4, vm->scratch_pdp, pml4e); + free_pdp(vm, pdp); + goto unwind; + } } - bitmap_or(pml4->used_pml4es, new_pdps, pml4->used_pml4es, - GEN8_PML4ES_PER_PML4); - return 0; -err_out: - for_each_set_bit(pml4e, new_pdps, GEN8_PML4ES_PER_PML4) - gen8_ppgtt_cleanup_3lvl(vm, pml4->pdps[pml4e]); - - return ret; -} - -static int gen8_alloc_va_range(struct i915_address_space *vm, - uint64_t start, uint64_t length) -{ - struct i915_hw_ppgtt *ppgtt = i915_vm_to_ppgtt(vm); - - if (USES_FULL_48BIT_PPGTT(vm->i915)) - return gen8_alloc_va_range_4lvl(vm, &ppgtt->pml4, start, length); - else - return gen8_alloc_va_range_3lvl(vm, &ppgtt->pdp, start, length); +unwind: + gen8_ppgtt_clear_4lvl(vm, from, start - from); + return -ENOMEM; } static void gen8_dump_pdp(struct i915_hw_ppgtt *ppgtt, @@ -1289,7 +1232,7 @@ static void gen8_dump_ppgtt(struct i915_hw_ppgtt *ppgtt, struct seq_file *m) struct i915_page_directory_pointer *pdp; gen8_for_each_pml4e(pdp, pml4, start, length, pml4e) { - if (!test_bit(pml4e, pml4->used_pml4es)) + if (pml4->pdps[pml4e] == ppgtt->base.scratch_pdp) continue; seq_printf(m, " PML4E #%llu\n", pml4e); @@ -1348,7 +1291,6 @@ static int gen8_ppgtt_init(struct i915_hw_ppgtt *ppgtt) ppgtt->base.start = 0; ppgtt->base.cleanup = gen8_ppgtt_cleanup; - ppgtt->base.allocate_va_range = gen8_alloc_va_range; ppgtt->base.unbind_vma = ppgtt_unbind_vma; ppgtt->base.bind_vma = ppgtt_bind_vma; ppgtt->debug_dump = gen8_dump_ppgtt; @@ -1369,6 +1311,7 @@ static int gen8_ppgtt_init(struct i915_hw_ppgtt *ppgtt) ppgtt->base.total = 1ULL << 48; ppgtt->switch_mm = gen8_48b_mm_switch; + ppgtt->base.allocate_va_range = gen8_ppgtt_alloc_4lvl; ppgtt->base.insert_entries = gen8_ppgtt_insert_4lvl; ppgtt->base.clear_range = gen8_ppgtt_clear_4lvl; } else { @@ -1390,6 +1333,7 @@ static int gen8_ppgtt_init(struct i915_hw_ppgtt *ppgtt) } } + ppgtt->base.allocate_va_range = gen8_ppgtt_alloc_3lvl; ppgtt->base.insert_entries = gen8_ppgtt_insert_3lvl; ppgtt->base.clear_range = gen8_ppgtt_clear_3lvl; } diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.h b/drivers/gpu/drm/i915/i915_gem_gtt.h index 3628c7c564ae..933888725368 100644 --- a/drivers/gpu/drm/i915/i915_gem_gtt.h +++ b/drivers/gpu/drm/i915/i915_gem_gtt.h @@ -238,8 +238,6 @@ struct i915_page_directory_pointer { struct i915_pml4 { struct i915_page_dma base; - - DECLARE_BITMAP(used_pml4es, GEN8_PML4ES_PER_PML4); struct i915_page_directory_pointer *pdps[GEN8_PML4ES_PER_PML4]; }; -- cgit From ec151f31cd81cc99b957d6b528709d6ecfb25801 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Wed, 15 Feb 2017 08:43:50 +0000 Subject: drm/i915: Remove superfluous posting reads after clear GGTT The barrier here is not required - we apply the barrier before the range is ever reused by the GPU instead. Signed-off-by: Chris Wilson Reviewed-by: Matthew Auld Link: http://patchwork.freedesktop.org/patch/msgid/20170215084357.19977-16-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/i915_gem_gtt.c | 3 --- 1 file changed, 3 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c b/drivers/gpu/drm/i915/i915_gem_gtt.c index 76ec3fa0a67a..234a65e8f7b1 100644 --- a/drivers/gpu/drm/i915/i915_gem_gtt.c +++ b/drivers/gpu/drm/i915/i915_gem_gtt.c @@ -2184,7 +2184,6 @@ static void gen8_ggtt_clear_range(struct i915_address_space *vm, for (i = 0; i < num_entries; i++) gen8_set_pte(>t_base[i], scratch_pte); - readl(gtt_base); } static void gen6_ggtt_clear_range(struct i915_address_space *vm, @@ -2209,7 +2208,6 @@ static void gen6_ggtt_clear_range(struct i915_address_space *vm, for (i = 0; i < num_entries; i++) iowrite32(scratch_pte, >t_base[i]); - readl(gtt_base); } static void i915_ggtt_insert_page(struct i915_address_space *vm, @@ -2233,7 +2231,6 @@ static void i915_ggtt_insert_entries(struct i915_address_space *vm, AGP_USER_MEMORY : AGP_USER_CACHED_MEMORY; intel_gtt_insert_sg_entries(pages, start >> PAGE_SHIFT, flags); - } static void i915_ggtt_clear_range(struct i915_address_space *vm, -- cgit From 75afcf72c714cd324b0be5f23916d2d501bdb9e6 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Wed, 15 Feb 2017 08:43:51 +0000 Subject: drm/i915: Always mark the PDP as dirty when altered We want to reload the PDP (and flush the TLB) when the addresses are changed. Signed-off-by: Chris Wilson Reviewed-by: Matthew Auld Link: http://patchwork.freedesktop.org/patch/msgid/20170215084357.19977-17-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/i915_gem_gtt.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c b/drivers/gpu/drm/i915/i915_gem_gtt.c index 234a65e8f7b1..9db033c8e7c5 100644 --- a/drivers/gpu/drm/i915/i915_gem_gtt.c +++ b/drivers/gpu/drm/i915/i915_gem_gtt.c @@ -1085,7 +1085,6 @@ static int gen8_ppgtt_alloc_pdp(struct i915_address_space *vm, struct i915_page_directory_pointer *pdp, u64 start, u64 length) { - struct i915_hw_ppgtt *ppgtt = i915_vm_to_ppgtt(vm); struct i915_page_directory *pd; u64 from = start; unsigned int pdpe; @@ -1100,6 +1099,8 @@ static int gen8_ppgtt_alloc_pdp(struct i915_address_space *vm, gen8_initialize_pd(vm, pd); gen8_ppgtt_set_pdpe(vm, pdp, pd, pdpe); pdp->used_pdpes++; + + mark_tlbs_dirty(i915_vm_to_ppgtt(vm)); } ret = gen8_ppgtt_alloc_pd(vm, pd, start, length); @@ -1111,7 +1112,6 @@ static int gen8_ppgtt_alloc_pdp(struct i915_address_space *vm, } } - mark_tlbs_dirty(ppgtt); return 0; unwind: -- cgit From 3dc523eacef1f73c71b30babad91f028e2946b7d Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Wed, 15 Feb 2017 08:43:52 +0000 Subject: drm/i915: Remove defunct GTT tracepoints The tracepoints are now entirely synonymous with binding and unbinding the VMA (and the tracepoints there). Signed-off-by: Chris Wilson Reviewed-by: Matthew Auld Link: http://patchwork.freedesktop.org/patch/msgid/20170215084357.19977-18-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/i915_gem_gtt.c | 4 -- drivers/gpu/drm/i915/i915_trace.h | 97 ------------------------------------- 2 files changed, 101 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c b/drivers/gpu/drm/i915/i915_gem_gtt.c index 9db033c8e7c5..0748c4406f27 100644 --- a/drivers/gpu/drm/i915/i915_gem_gtt.c +++ b/drivers/gpu/drm/i915/i915_gem_gtt.c @@ -193,7 +193,6 @@ static int ppgtt_bind_vma(struct i915_vma *vma, u32 pte_flags; int ret; - trace_i915_va_alloc(vma); ret = vma->vm->allocate_va_range(vma->vm, vma->node.start, vma->size); if (ret) return ret; @@ -1321,9 +1320,6 @@ static int gen8_ppgtt_init(struct i915_hw_ppgtt *ppgtt) ppgtt->base.total = 1ULL << 32; ppgtt->switch_mm = gen8_legacy_mm_switch; - trace_i915_page_directory_pointer_entry_alloc(&ppgtt->base, - 0, 0, - GEN8_PML4E_SHIFT); if (intel_vgpu_active(dev_priv)) { ret = gen8_preallocate_top_level_pdp(ppgtt); diff --git a/drivers/gpu/drm/i915/i915_trace.h b/drivers/gpu/drm/i915/i915_trace.h index de31c49781d3..7a547cdfc381 100644 --- a/drivers/gpu/drm/i915/i915_trace.h +++ b/drivers/gpu/drm/i915/i915_trace.h @@ -175,103 +175,6 @@ TRACE_EVENT(i915_vma_unbind, __entry->obj, __entry->offset, __entry->size, __entry->vm) ); -TRACE_EVENT(i915_va_alloc, - TP_PROTO(struct i915_vma *vma), - TP_ARGS(vma), - - TP_STRUCT__entry( - __field(struct i915_address_space *, vm) - __field(u64, start) - __field(u64, end) - ), - - TP_fast_assign( - __entry->vm = vma->vm; - __entry->start = vma->node.start; - __entry->end = vma->node.start + vma->node.size - 1; - ), - - TP_printk("vm=%p (%c), 0x%llx-0x%llx", - __entry->vm, i915_is_ggtt(__entry->vm) ? 'G' : 'P', __entry->start, __entry->end) -); - -DECLARE_EVENT_CLASS(i915_px_entry, - TP_PROTO(struct i915_address_space *vm, u32 px, u64 start, u64 px_shift), - TP_ARGS(vm, px, start, px_shift), - - TP_STRUCT__entry( - __field(struct i915_address_space *, vm) - __field(u32, px) - __field(u64, start) - __field(u64, end) - ), - - TP_fast_assign( - __entry->vm = vm; - __entry->px = px; - __entry->start = start; - __entry->end = ((start + (1ULL << px_shift)) & ~((1ULL << px_shift)-1)) - 1; - ), - - TP_printk("vm=%p, pde=%d (0x%llx-0x%llx)", - __entry->vm, __entry->px, __entry->start, __entry->end) -); - -DEFINE_EVENT(i915_px_entry, i915_page_table_entry_alloc, - TP_PROTO(struct i915_address_space *vm, u32 pde, u64 start, u64 pde_shift), - TP_ARGS(vm, pde, start, pde_shift) -); - -DEFINE_EVENT_PRINT(i915_px_entry, i915_page_directory_entry_alloc, - TP_PROTO(struct i915_address_space *vm, u32 pdpe, u64 start, u64 pdpe_shift), - TP_ARGS(vm, pdpe, start, pdpe_shift), - - TP_printk("vm=%p, pdpe=%d (0x%llx-0x%llx)", - __entry->vm, __entry->px, __entry->start, __entry->end) -); - -DEFINE_EVENT_PRINT(i915_px_entry, i915_page_directory_pointer_entry_alloc, - TP_PROTO(struct i915_address_space *vm, u32 pml4e, u64 start, u64 pml4e_shift), - TP_ARGS(vm, pml4e, start, pml4e_shift), - - TP_printk("vm=%p, pml4e=%d (0x%llx-0x%llx)", - __entry->vm, __entry->px, __entry->start, __entry->end) -); - -/* Avoid extra math because we only support two sizes. The format is defined by - * bitmap_scnprintf. Each 32 bits is 8 HEX digits followed by comma */ -#define TRACE_PT_SIZE(bits) \ - ((((bits) == 1024) ? 288 : 144) + 1) - -DECLARE_EVENT_CLASS(i915_page_table_entry_update, - TP_PROTO(struct i915_address_space *vm, u32 pde, - struct i915_page_table *pt, u32 first, u32 count), - TP_ARGS(vm, pde, pt, first, count), - - TP_STRUCT__entry( - __field(struct i915_address_space *, vm) - __field(u32, pde) - __field(u32, first) - __field(u32, last) - ), - - TP_fast_assign( - __entry->vm = vm; - __entry->pde = pde; - __entry->first = first; - __entry->last = first + count - 1; - ), - - TP_printk("vm=%p, pde=%d, updating %u:%u", - __entry->vm, __entry->pde, __entry->last, __entry->first) -); - -DEFINE_EVENT(i915_page_table_entry_update, i915_page_table_entry_map, - TP_PROTO(struct i915_address_space *vm, u32 pde, - struct i915_page_table *pt, u32 first, u32 count), - TP_ARGS(vm, pde, pt, first, count) -); - TRACE_EVENT(i915_gem_object_change_domain, TP_PROTO(struct drm_i915_gem_object *obj, u32 old_read, u32 old_write), TP_ARGS(obj, old_read, old_write), -- cgit From 998f6c00a1dcb8c63de3744c37155eefcf27a421 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Wed, 15 Feb 2017 08:43:53 +0000 Subject: drm/i915: Remove unused ppgtt->enable() We never assign or use the ppgtt->enable() callback, so remove it. Signed-off-by: Chris Wilson Reviewed-by: Joonas Lahtinen Link: http://patchwork.freedesktop.org/patch/msgid/20170215084357.19977-19-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/i915_gem_gtt.h | 1 - 1 file changed, 1 deletion(-) diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.h b/drivers/gpu/drm/i915/i915_gem_gtt.h index 933888725368..6210550e15cf 100644 --- a/drivers/gpu/drm/i915/i915_gem_gtt.h +++ b/drivers/gpu/drm/i915/i915_gem_gtt.h @@ -385,7 +385,6 @@ struct i915_hw_ppgtt { gen6_pte_t __iomem *pd_addr; - int (*enable)(struct i915_hw_ppgtt *ppgtt); int (*switch_mm)(struct i915_hw_ppgtt *ppgtt, struct drm_i915_gem_request *req); void (*debug_dump)(struct i915_hw_ppgtt *ppgtt, struct seq_file *m); -- cgit From 381b943b07027f54083ed685368e261c1089ce53 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Wed, 15 Feb 2017 08:43:54 +0000 Subject: drm/i915: Remove i915_address_space.start Once upon a time, back in the UMS days, we supported userspace initialising the GTT and sharing portions of the GTT with other users. Now, we own the GTT (both global and per-process) and the tables always start at 0 - so we can remove i915_address_space.start and forget about this old complication. Signed-off-by: Chris Wilson Reviewed-by: Matthew Auld Link: http://patchwork.freedesktop.org/patch/msgid/20170215084357.19977-20-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/i915_debugfs.c | 2 +- drivers/gpu/drm/i915/i915_gem_evict.c | 2 +- drivers/gpu/drm/i915/i915_gem_gtt.c | 16 ++++++---------- drivers/gpu/drm/i915/i915_gem_gtt.h | 1 - drivers/gpu/drm/i915/i915_vgpu.c | 9 ++++----- 5 files changed, 12 insertions(+), 18 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c index 0de0596d41ac..20b3d1328a01 100644 --- a/drivers/gpu/drm/i915/i915_debugfs.c +++ b/drivers/gpu/drm/i915/i915_debugfs.c @@ -476,7 +476,7 @@ static int i915_gem_object_info(struct seq_file *m, void *data) dpy_count, dpy_size); seq_printf(m, "%llu [%llu] gtt total\n", - ggtt->base.total, ggtt->mappable_end - ggtt->base.start); + ggtt->base.total, ggtt->mappable_end); seq_putc(m, '\n'); print_batch_pool_stats(m, dev_priv); diff --git a/drivers/gpu/drm/i915/i915_gem_evict.c b/drivers/gpu/drm/i915/i915_gem_evict.c index ca25b1f7f6d1..a0de5734f7d0 100644 --- a/drivers/gpu/drm/i915/i915_gem_evict.c +++ b/drivers/gpu/drm/i915/i915_gem_evict.c @@ -274,7 +274,7 @@ int i915_gem_evict_for_node(struct i915_address_space *vm, check_color = vm->mm.color_adjust; if (check_color) { /* Expand search to cover neighbouring guard pages (or lack!) */ - if (start > vm->start) + if (start) start -= I915_GTT_PAGE_SIZE; /* Always look at the page afterwards to avoid the end-of-GTT */ diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c b/drivers/gpu/drm/i915/i915_gem_gtt.c index 0748c4406f27..ebd87c337394 100644 --- a/drivers/gpu/drm/i915/i915_gem_gtt.c +++ b/drivers/gpu/drm/i915/i915_gem_gtt.c @@ -1218,10 +1218,9 @@ static void gen8_dump_pdp(struct i915_hw_ppgtt *ppgtt, static void gen8_dump_ppgtt(struct i915_hw_ppgtt *ppgtt, struct seq_file *m) { struct i915_address_space *vm = &ppgtt->base; - uint64_t start = ppgtt->base.start; - uint64_t length = ppgtt->base.total; const gen8_pte_t scratch_pte = gen8_pte_encode(vm->scratch_page.daddr, I915_CACHE_LLC); + u64 start = 0, length = ppgtt->base.total; if (!USES_FULL_48BIT_PPGTT(vm->i915)) { gen8_dump_pdp(ppgtt, &ppgtt->pdp, start, length, scratch_pte, m); @@ -1288,7 +1287,6 @@ static int gen8_ppgtt_init(struct i915_hw_ppgtt *ppgtt) if (ret) return ret; - ppgtt->base.start = 0; ppgtt->base.cleanup = gen8_ppgtt_cleanup; ppgtt->base.unbind_vma = ppgtt_unbind_vma; ppgtt->base.bind_vma = ppgtt_bind_vma; @@ -1349,9 +1347,8 @@ static void gen6_dump_ppgtt(struct i915_hw_ppgtt *ppgtt, struct seq_file *m) struct i915_address_space *vm = &ppgtt->base; struct i915_page_table *unused; gen6_pte_t scratch_pte; - uint32_t pd_entry; - uint32_t pte, pde; - uint32_t start = ppgtt->base.start, length = ppgtt->base.total; + u32 pd_entry, pte, pde; + u32 start = 0, length = ppgtt->base.total; scratch_pte = vm->pte_encode(vm->scratch_page.daddr, I915_CACHE_LLC, 0); @@ -1785,7 +1782,6 @@ static int gen6_ppgtt_init(struct i915_hw_ppgtt *ppgtt) ppgtt->base.unbind_vma = ppgtt_unbind_vma; ppgtt->base.bind_vma = ppgtt_bind_vma; ppgtt->base.cleanup = gen6_ppgtt_cleanup; - ppgtt->base.start = 0; ppgtt->base.total = I915_PDES * GEN6_PTES * PAGE_SIZE; ppgtt->debug_dump = gen6_dump_ppgtt; @@ -1826,7 +1822,7 @@ static void i915_address_space_init(struct i915_address_space *vm, { i915_gem_timeline_init(dev_priv, &vm->timeline, name); - drm_mm_init(&vm->mm, vm->start, vm->total); + drm_mm_init(&vm->mm, 0, vm->total); vm->mm.head_node.color = I915_COLOR_UNEVICTABLE; INIT_LIST_HEAD(&vm->active_list); @@ -2012,7 +2008,7 @@ void i915_gem_suspend_gtt_mappings(struct drm_i915_private *dev_priv) i915_check_and_clear_faults(dev_priv); - ggtt->base.clear_range(&ggtt->base, ggtt->base.start, ggtt->base.total); + ggtt->base.clear_range(&ggtt->base, 0, ggtt->base.total); i915_ggtt_invalidate(dev_priv); } @@ -2980,7 +2976,7 @@ void i915_gem_restore_gtt_mappings(struct drm_i915_private *dev_priv) i915_check_and_clear_faults(dev_priv); /* First fill our portion of the GTT with scratch pages */ - ggtt->base.clear_range(&ggtt->base, ggtt->base.start, ggtt->base.total); + ggtt->base.clear_range(&ggtt->base, 0, ggtt->base.total); ggtt->base.closed = true; /* skip rewriting PTE on VMA unbind */ diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.h b/drivers/gpu/drm/i915/i915_gem_gtt.h index 6210550e15cf..e85ff6c97208 100644 --- a/drivers/gpu/drm/i915/i915_gem_gtt.h +++ b/drivers/gpu/drm/i915/i915_gem_gtt.h @@ -256,7 +256,6 @@ struct i915_address_space { */ struct drm_i915_file_private *file; struct list_head global_link; - u64 start; /* Start offset always 0 for dri2 */ u64 total; /* size addr space maps (ex. 2GB for ggtt) */ bool closed; diff --git a/drivers/gpu/drm/i915/i915_vgpu.c b/drivers/gpu/drm/i915/i915_vgpu.c index d0abfd08a01c..14014068dfcf 100644 --- a/drivers/gpu/drm/i915/i915_vgpu.c +++ b/drivers/gpu/drm/i915/i915_vgpu.c @@ -179,7 +179,7 @@ static int vgt_balloon_space(struct i915_ggtt *ggtt, int intel_vgt_balloon(struct drm_i915_private *dev_priv) { struct i915_ggtt *ggtt = &dev_priv->ggtt; - unsigned long ggtt_end = ggtt->base.start + ggtt->base.total; + unsigned long ggtt_end = ggtt->base.total; unsigned long mappable_base, mappable_size, mappable_end; unsigned long unmappable_base, unmappable_size, unmappable_end; @@ -202,8 +202,7 @@ int intel_vgt_balloon(struct drm_i915_private *dev_priv) DRM_INFO("Unmappable graphic memory: base 0x%lx size %ldKiB\n", unmappable_base, unmappable_size / 1024); - if (mappable_base < ggtt->base.start || - mappable_end > ggtt->mappable_end || + if (mappable_end > ggtt->mappable_end || unmappable_base < ggtt->mappable_end || unmappable_end > ggtt_end) { DRM_ERROR("Invalid ballooning configuration!\n"); @@ -231,9 +230,9 @@ int intel_vgt_balloon(struct drm_i915_private *dev_priv) } /* Mappable graphic memory ballooning */ - if (mappable_base > ggtt->base.start) { + if (mappable_base) { ret = vgt_balloon_space(ggtt, &bl_info.space[0], - ggtt->base.start, mappable_base); + 0, mappable_base); if (ret) goto err; -- cgit From e565ceb086d70e577e41315b921014dcfb991a6b Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Wed, 15 Feb 2017 08:43:55 +0000 Subject: drm/i915: Only preallocate the aliasing GTT to the extents of the global GTT As the aliasing GTT is only accessed via the global GTT, we will never use more of it than we expose via the Global GTT and so we only need to preallocate sufficient space within the ppgtt for the full GTT. Equally, if the aliasing GTT is smaller than the global GTT, we have a serious issue and must bail. Signed-off-by: Chris Wilson Reviewed-by: Matthew Auld Reviewed-by: Mika Kuoppala Link: http://patchwork.freedesktop.org/patch/msgid/20170215084357.19977-21-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/i915_gem_gtt.c | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c b/drivers/gpu/drm/i915/i915_gem_gtt.c index ebd87c337394..7fbb10804319 100644 --- a/drivers/gpu/drm/i915/i915_gem_gtt.c +++ b/drivers/gpu/drm/i915/i915_gem_gtt.c @@ -2384,9 +2384,19 @@ int i915_gem_init_aliasing_ppgtt(struct drm_i915_private *i915) if (IS_ERR(ppgtt)) return PTR_ERR(ppgtt); + if (WARN_ON(ppgtt->base.total < ggtt->base.total)) { + err = -ENODEV; + goto err_ppgtt; + } + if (ppgtt->base.allocate_va_range) { + /* Note we only pre-allocate as far as the end of the global + * GTT. On 48b / 4-level page-tables, the difference is very, + * very significant! We have to preallocate as GVT/vgpu does + * not like the page directory disappearing. + */ err = ppgtt->base.allocate_va_range(&ppgtt->base, - 0, ppgtt->base.total); + 0, ggtt->base.total); if (err) goto err_ppgtt; } -- cgit From 57202f47af85381f31f317a2a75fe76ea4c76925 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Wed, 15 Feb 2017 08:43:56 +0000 Subject: drm/i915: Differentiate the aliasing_ppgtt with an invalid filp Use an invalid filp so that the aliasing_ppgtt can be clearly identified. Signed-off-by: Chris Wilson Reviewed-by: Matthew Auld Link: http://patchwork.freedesktop.org/patch/msgid/20170215084357.19977-22-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/i915_gem_gtt.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c b/drivers/gpu/drm/i915/i915_gem_gtt.c index 7fbb10804319..84b50d636688 100644 --- a/drivers/gpu/drm/i915/i915_gem_gtt.c +++ b/drivers/gpu/drm/i915/i915_gem_gtt.c @@ -2380,7 +2380,7 @@ int i915_gem_init_aliasing_ppgtt(struct drm_i915_private *i915) struct i915_hw_ppgtt *ppgtt; int err; - ppgtt = i915_ppgtt_create(i915, NULL, "[alias]"); + ppgtt = i915_ppgtt_create(i915, ERR_PTR(-EPERM), "[alias]"); if (IS_ERR(ppgtt)) return PTR_ERR(ppgtt); -- cgit From 75c7b0b8620ce5a672da02e721a775acb7863c64 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Wed, 15 Feb 2017 08:43:57 +0000 Subject: drm/i915: Use preferred kernel types in i915_gem_gtt.c Make checkpatch happy and make the use of u32/u64 consistent throughout i915_gem_gtt.[ch] Signed-off-by: Chris Wilson Reviewed-by: Mika Kuoppala Reviewed-by: Joonas Lahtinen Link: http://patchwork.freedesktop.org/patch/msgid/20170215084357.19977-23-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/i915_gem_gtt.c | 83 +++++++++++++++++++------------------ drivers/gpu/drm/i915/i915_gem_gtt.h | 56 ++++++++++++------------- 2 files changed, 69 insertions(+), 70 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c b/drivers/gpu/drm/i915/i915_gem_gtt.c index 84b50d636688..47a38272f54c 100644 --- a/drivers/gpu/drm/i915/i915_gem_gtt.c +++ b/drivers/gpu/drm/i915/i915_gem_gtt.c @@ -888,7 +888,7 @@ static void gen8_ppgtt_insert_3lvl(struct i915_address_space *vm, static void gen8_ppgtt_insert_4lvl(struct i915_address_space *vm, struct sg_table *pages, - uint64_t start, + u64 start, enum i915_cache_level cache_level, u32 unused) { @@ -1162,25 +1162,25 @@ unwind: static void gen8_dump_pdp(struct i915_hw_ppgtt *ppgtt, struct i915_page_directory_pointer *pdp, - uint64_t start, uint64_t length, + u64 start, u64 length, gen8_pte_t scratch_pte, struct seq_file *m) { struct i915_page_directory *pd; - uint32_t pdpe; + u32 pdpe; gen8_for_each_pdpe(pd, pdp, start, length, pdpe) { struct i915_page_table *pt; - uint64_t pd_len = length; - uint64_t pd_start = start; - uint32_t pde; + u64 pd_len = length; + u64 pd_start = start; + u32 pde; if (pdp->page_directory[pdpe] == ppgtt->base.scratch_pd) continue; seq_printf(m, "\tPDPE #%d\n", pdpe); gen8_for_each_pde(pt, pd, pd_start, pd_len, pde) { - uint32_t pte; + u32 pte; gen8_pte_t *pt_vaddr; if (pd->page_table[pde] == ppgtt->base.scratch_pt) @@ -1188,10 +1188,9 @@ static void gen8_dump_pdp(struct i915_hw_ppgtt *ppgtt, pt_vaddr = kmap_atomic_px(pt); for (pte = 0; pte < GEN8_PTES; pte += 4) { - uint64_t va = - (pdpe << GEN8_PDPE_SHIFT) | - (pde << GEN8_PDE_SHIFT) | - (pte << GEN8_PTE_SHIFT); + u64 va = (pdpe << GEN8_PDPE_SHIFT | + pde << GEN8_PDE_SHIFT | + pte << GEN8_PTE_SHIFT); int i; bool found = false; @@ -1225,7 +1224,7 @@ static void gen8_dump_ppgtt(struct i915_hw_ppgtt *ppgtt, struct seq_file *m) if (!USES_FULL_48BIT_PPGTT(vm->i915)) { gen8_dump_pdp(ppgtt, &ppgtt->pdp, start, length, scratch_pte, m); } else { - uint64_t pml4e; + u64 pml4e; struct i915_pml4 *pml4 = &ppgtt->pml4; struct i915_page_directory_pointer *pdp; @@ -1407,7 +1406,7 @@ static inline void gen6_write_pde(const struct i915_hw_ppgtt *ppgtt, /* Write all the page tables found in the ppgtt structure to incrementing page * directories. */ static void gen6_write_page_range(struct i915_hw_ppgtt *ppgtt, - uint32_t start, uint32_t length) + u32 start, u32 length) { struct i915_page_table *pt; unsigned int pde; @@ -1419,7 +1418,7 @@ static void gen6_write_page_range(struct i915_hw_ppgtt *ppgtt, wmb(); } -static inline uint32_t get_pd_offset(struct i915_hw_ppgtt *ppgtt) +static inline u32 get_pd_offset(struct i915_hw_ppgtt *ppgtt) { GEM_BUG_ON(ppgtt->pd.base.ggtt_offset & 0x3f); return ppgtt->pd.base.ggtt_offset << 10; @@ -1513,7 +1512,7 @@ static void gen8_ppgtt_enable(struct drm_i915_private *dev_priv) static void gen7_ppgtt_enable(struct drm_i915_private *dev_priv) { struct intel_engine_cs *engine; - uint32_t ecochk, ecobits; + u32 ecochk, ecobits; enum intel_engine_id id; ecobits = I915_READ(GAC_ECO_BITS); @@ -1537,7 +1536,7 @@ static void gen7_ppgtt_enable(struct drm_i915_private *dev_priv) static void gen6_ppgtt_enable(struct drm_i915_private *dev_priv) { - uint32_t ecochk, gab_ctl, ecobits; + u32 ecochk, gab_ctl, ecobits; ecobits = I915_READ(GAC_ECO_BITS); I915_WRITE(GAC_ECO_BITS, ecobits | ECOBITS_SNB_BIT | @@ -1589,8 +1588,9 @@ static void gen6_ppgtt_clear_range(struct i915_address_space *vm, static void gen6_ppgtt_insert_entries(struct i915_address_space *vm, struct sg_table *pages, - uint64_t start, - enum i915_cache_level cache_level, u32 flags) + u64 start, + enum i915_cache_level cache_level, + u32 flags) { struct i915_hw_ppgtt *ppgtt = i915_vm_to_ppgtt(vm); unsigned first_entry = start >> PAGE_SHIFT; @@ -1690,7 +1690,7 @@ static void gen6_ppgtt_cleanup(struct i915_address_space *vm) struct i915_hw_ppgtt *ppgtt = i915_vm_to_ppgtt(vm); struct i915_page_directory *pd = &ppgtt->pd; struct i915_page_table *pt; - uint32_t pde; + u32 pde; drm_mm_remove_node(&ppgtt->node); @@ -1748,10 +1748,10 @@ static int gen6_ppgtt_alloc(struct i915_hw_ppgtt *ppgtt) } static void gen6_scratch_va_range(struct i915_hw_ppgtt *ppgtt, - uint64_t start, uint64_t length) + u64 start, u64 length) { struct i915_page_table *unused; - uint32_t pde; + u32 pde; gen6_for_each_pde(unused, &ppgtt->pd, start, length, pde) ppgtt->pd.page_table[pde] = ppgtt->base.scratch_pt; @@ -2045,7 +2045,7 @@ static void gen8_set_pte(void __iomem *addr, gen8_pte_t pte) static void gen8_ggtt_insert_page(struct i915_address_space *vm, dma_addr_t addr, - uint64_t offset, + u64 offset, enum i915_cache_level level, u32 unused) { @@ -2060,8 +2060,9 @@ static void gen8_ggtt_insert_page(struct i915_address_space *vm, static void gen8_ggtt_insert_entries(struct i915_address_space *vm, struct sg_table *st, - uint64_t start, - enum i915_cache_level level, u32 unused) + u64 start, + enum i915_cache_level level, + u32 unused) { struct i915_ggtt *ggtt = i915_vm_to_ggtt(vm); struct sgt_iter sgt_iter; @@ -2086,7 +2087,7 @@ static void gen8_ggtt_insert_entries(struct i915_address_space *vm, struct insert_entries { struct i915_address_space *vm; struct sg_table *st; - uint64_t start; + u64 start; enum i915_cache_level level; u32 flags; }; @@ -2101,7 +2102,7 @@ static int gen8_ggtt_insert_entries__cb(void *_arg) static void gen8_ggtt_insert_entries__BKL(struct i915_address_space *vm, struct sg_table *st, - uint64_t start, + u64 start, enum i915_cache_level level, u32 flags) { @@ -2111,7 +2112,7 @@ static void gen8_ggtt_insert_entries__BKL(struct i915_address_space *vm, static void gen6_ggtt_insert_page(struct i915_address_space *vm, dma_addr_t addr, - uint64_t offset, + u64 offset, enum i915_cache_level level, u32 flags) { @@ -2132,8 +2133,9 @@ static void gen6_ggtt_insert_page(struct i915_address_space *vm, */ static void gen6_ggtt_insert_entries(struct i915_address_space *vm, struct sg_table *st, - uint64_t start, - enum i915_cache_level level, u32 flags) + u64 start, + enum i915_cache_level level, + u32 flags) { struct i915_ggtt *ggtt = i915_vm_to_ggtt(vm); gen6_pte_t __iomem *entries = (gen6_pte_t __iomem *)ggtt->gsm; @@ -2152,12 +2154,12 @@ static void gen6_ggtt_insert_entries(struct i915_address_space *vm, } static void nop_clear_range(struct i915_address_space *vm, - uint64_t start, uint64_t length) + u64 start, u64 length) { } static void gen8_ggtt_clear_range(struct i915_address_space *vm, - uint64_t start, uint64_t length) + u64 start, u64 length) { struct i915_ggtt *ggtt = i915_vm_to_ggtt(vm); unsigned first_entry = start >> PAGE_SHIFT; @@ -2179,8 +2181,7 @@ static void gen8_ggtt_clear_range(struct i915_address_space *vm, } static void gen6_ggtt_clear_range(struct i915_address_space *vm, - uint64_t start, - uint64_t length) + u64 start, u64 length) { struct i915_ggtt *ggtt = i915_vm_to_ggtt(vm); unsigned first_entry = start >> PAGE_SHIFT; @@ -2204,7 +2205,7 @@ static void gen6_ggtt_clear_range(struct i915_address_space *vm, static void i915_ggtt_insert_page(struct i915_address_space *vm, dma_addr_t addr, - uint64_t offset, + u64 offset, enum i915_cache_level cache_level, u32 unused) { @@ -2216,8 +2217,9 @@ static void i915_ggtt_insert_page(struct i915_address_space *vm, static void i915_ggtt_insert_entries(struct i915_address_space *vm, struct sg_table *pages, - uint64_t start, - enum i915_cache_level cache_level, u32 unused) + u64 start, + enum i915_cache_level cache_level, + u32 unused) { unsigned int flags = (cache_level == I915_CACHE_NONE) ? AGP_USER_MEMORY : AGP_USER_CACHED_MEMORY; @@ -2226,8 +2228,7 @@ static void i915_ggtt_insert_entries(struct i915_address_space *vm, } static void i915_ggtt_clear_range(struct i915_address_space *vm, - uint64_t start, - uint64_t length) + u64 start, u64 length) { intel_gtt_clear_range(start >> PAGE_SHIFT, length >> PAGE_SHIFT); } @@ -2641,7 +2642,7 @@ static int ggtt_probe_common(struct i915_ggtt *ggtt, u64 size) * writing this data shouldn't be harmful even in those cases. */ static void bdw_setup_private_ppat(struct drm_i915_private *dev_priv) { - uint64_t pat; + u64 pat; pat = GEN8_PPAT(0, GEN8_PPAT_WB | GEN8_PPAT_LLC) | /* for normal objects, no eLLC */ GEN8_PPAT(1, GEN8_PPAT_WC | GEN8_PPAT_LLCELLC) | /* for something pointing to ptes? */ @@ -2676,7 +2677,7 @@ static void bdw_setup_private_ppat(struct drm_i915_private *dev_priv) static void chv_setup_private_ppat(struct drm_i915_private *dev_priv) { - uint64_t pat; + u64 pat; /* * Map WB on BDW to snooped on CHV. @@ -3073,7 +3074,7 @@ static noinline struct sg_table * intel_rotate_pages(struct intel_rotation_info *rot_info, struct drm_i915_gem_object *obj) { - const size_t n_pages = obj->base.size / PAGE_SIZE; + const unsigned long n_pages = obj->base.size / PAGE_SIZE; unsigned int size = intel_rotation_info_size(rot_info); struct sgt_iter sgt_iter; dma_addr_t dma_addr; diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.h b/drivers/gpu/drm/i915/i915_gem_gtt.h index e85ff6c97208..f7d4e194a227 100644 --- a/drivers/gpu/drm/i915/i915_gem_gtt.h +++ b/drivers/gpu/drm/i915/i915_gem_gtt.h @@ -53,11 +53,11 @@ struct drm_i915_file_private; struct drm_i915_fence_reg; -typedef uint32_t gen6_pte_t; -typedef uint64_t gen8_pte_t; -typedef uint64_t gen8_pde_t; -typedef uint64_t gen8_ppgtt_pdpe_t; -typedef uint64_t gen8_ppgtt_pml4e_t; +typedef u32 gen6_pte_t; +typedef u64 gen8_pte_t; +typedef u64 gen8_pde_t; +typedef u64 gen8_ppgtt_pdpe_t; +typedef u64 gen8_ppgtt_pml4e_t; #define ggtt_total_entries(ggtt) ((ggtt)->base.total >> PAGE_SHIFT) @@ -143,7 +143,7 @@ typedef uint64_t gen8_ppgtt_pml4e_t; #define GEN8_PPAT_WC (1<<0) #define GEN8_PPAT_UC (0<<0) #define GEN8_PPAT_ELLC_OVERRIDE (0<<2) -#define GEN8_PPAT(i, x) ((uint64_t) (x) << ((i) * 8)) +#define GEN8_PPAT(i, x) ((u64)(x) << ((i) * 8)) struct sg_table; @@ -210,7 +210,7 @@ struct i915_page_dma { /* For gen6/gen7 only. This is the offset in the GGTT * where the page directory entries for PPGTT begin */ - uint32_t ggtt_offset; + u32 ggtt_offset; }; }; @@ -305,20 +305,19 @@ struct i915_address_space { /* flags for pte_encode */ #define PTE_READ_ONLY (1<<0) int (*allocate_va_range)(struct i915_address_space *vm, - uint64_t start, - uint64_t length); + u64 start, u64 length); void (*clear_range)(struct i915_address_space *vm, - uint64_t start, - uint64_t length); + u64 start, u64 length); void (*insert_page)(struct i915_address_space *vm, dma_addr_t addr, - uint64_t offset, + u64 offset, enum i915_cache_level cache_level, u32 flags); void (*insert_entries)(struct i915_address_space *vm, struct sg_table *st, - uint64_t start, - enum i915_cache_level cache_level, u32 flags); + u64 start, + enum i915_cache_level cache_level, + u32 flags); void (*cleanup)(struct i915_address_space *vm); /** Unmap an object from an address space. This usually consists of * setting the valid PTE entries to a reserved scratch page. */ @@ -411,9 +410,9 @@ struct i915_hw_ppgtt { (pt = (pd)->page_table[iter], true); \ ++iter) -static inline uint32_t i915_pte_index(uint64_t address, uint32_t pde_shift) +static inline u32 i915_pte_index(u64 address, unsigned int pde_shift) { - const uint32_t mask = NUM_PTE(pde_shift) - 1; + const u32 mask = NUM_PTE(pde_shift) - 1; return (address >> PAGE_SHIFT) & mask; } @@ -422,11 +421,10 @@ static inline uint32_t i915_pte_index(uint64_t address, uint32_t pde_shift) * does not cross a page table boundary, so the max value would be * GEN6_PTES for GEN6, and GEN8_PTES for GEN8. */ -static inline uint32_t i915_pte_count(uint64_t addr, size_t length, - uint32_t pde_shift) +static inline u32 i915_pte_count(u64 addr, u64 length, unsigned int pde_shift) { - const uint64_t mask = ~((1ULL << pde_shift) - 1); - uint64_t end; + const u64 mask = ~((1ULL << pde_shift) - 1); + u64 end; WARN_ON(length == 0); WARN_ON(offset_in_page(addr|length)); @@ -439,22 +437,22 @@ static inline uint32_t i915_pte_count(uint64_t addr, size_t length, return i915_pte_index(end, pde_shift) - i915_pte_index(addr, pde_shift); } -static inline uint32_t i915_pde_index(uint64_t addr, uint32_t shift) +static inline u32 i915_pde_index(u64 addr, u32 shift) { return (addr >> shift) & I915_PDE_MASK; } -static inline uint32_t gen6_pte_index(uint32_t addr) +static inline u32 gen6_pte_index(u32 addr) { return i915_pte_index(addr, GEN6_PDE_SHIFT); } -static inline size_t gen6_pte_count(uint32_t addr, uint32_t length) +static inline u32 gen6_pte_count(u32 addr, u32 length) { return i915_pte_count(addr, length, GEN6_PDE_SHIFT); } -static inline uint32_t gen6_pde_index(uint32_t addr) +static inline u32 gen6_pde_index(u32 addr) { return i915_pde_index(addr, GEN6_PDE_SHIFT); } @@ -487,27 +485,27 @@ static inline uint32_t gen6_pde_index(uint32_t addr) temp = min(temp - start, length); \ start += temp, length -= temp; }), ++iter) -static inline uint32_t gen8_pte_index(uint64_t address) +static inline u32 gen8_pte_index(u64 address) { return i915_pte_index(address, GEN8_PDE_SHIFT); } -static inline uint32_t gen8_pde_index(uint64_t address) +static inline u32 gen8_pde_index(u64 address) { return i915_pde_index(address, GEN8_PDE_SHIFT); } -static inline uint32_t gen8_pdpe_index(uint64_t address) +static inline u32 gen8_pdpe_index(u64 address) { return (address >> GEN8_PDPE_SHIFT) & GEN8_PDPE_MASK; } -static inline uint32_t gen8_pml4e_index(uint64_t address) +static inline u32 gen8_pml4e_index(u64 address) { return (address >> GEN8_PML4E_SHIFT) & GEN8_PML4E_MASK; } -static inline size_t gen8_pte_count(uint64_t address, uint64_t length) +static inline u64 gen8_pte_count(u64 address, u64 length) { return i915_pte_count(address, length, GEN8_PDE_SHIFT); } -- cgit From d7e8ef02a6b3d11f0c3ad99c0f656341151c50b9 Mon Sep 17 00:00:00 2001 From: Manasi Navare Date: Tue, 7 Feb 2017 16:54:11 -0800 Subject: drm/i915/dp: Reset the link params on HPD/connected boot/resume MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The max link parameters should be set/reset only on HPD or connected boot case or on system resume. Add a flag reset_link_params to intel_dp to decide when to reset the max link parameters. This prevents the parameters from getting reset/overwritten through all other connector->funcs->detect() calls. This is important when link training fails and the max link params are modified to the lower fallback values. Cc: Ville Syrjala Reviewed-by: Ville Syrjälä Signed-off-by: Manasi Navare Signed-off-by: Jani Nikula Link: http://patchwork.freedesktop.org/patch/msgid/1486515251-23469-1-git-send-email-manasi.d.navare@intel.com --- drivers/gpu/drm/i915/intel_dp.c | 16 ++++++++++++---- drivers/gpu/drm/i915/intel_drv.h | 1 + 2 files changed, 13 insertions(+), 4 deletions(-) diff --git a/drivers/gpu/drm/i915/intel_dp.c b/drivers/gpu/drm/i915/intel_dp.c index 6ef4f3810082..024798a9c016 100644 --- a/drivers/gpu/drm/i915/intel_dp.c +++ b/drivers/gpu/drm/i915/intel_dp.c @@ -4624,11 +4624,15 @@ intel_dp_long_pulse(struct intel_connector *intel_connector) yesno(intel_dp_source_supports_hbr2(intel_dp)), yesno(drm_dp_tps3_supported(intel_dp->dpcd))); - /* Set the max lane count for sink */ - intel_dp->max_sink_lane_count = drm_dp_max_lane_count(intel_dp->dpcd); + if (intel_dp->reset_link_params) { + /* Set the max lane count for sink */ + intel_dp->max_sink_lane_count = drm_dp_max_lane_count(intel_dp->dpcd); - /* Set the max link BW for sink */ - intel_dp->max_sink_link_bw = intel_dp_max_link_bw(intel_dp); + /* Set the max link BW for sink */ + intel_dp->max_sink_link_bw = intel_dp_max_link_bw(intel_dp); + + intel_dp->reset_link_params = false; + } intel_dp_print_rates(intel_dp); @@ -5010,6 +5014,8 @@ void intel_dp_encoder_reset(struct drm_encoder *encoder) if (lspcon->active) lspcon_resume(lspcon); + intel_dp->reset_link_params = true; + pps_lock(intel_dp); if (IS_VALLEYVIEW(dev_priv) || IS_CHERRYVIEW(dev_priv)) @@ -5079,6 +5085,7 @@ intel_dp_hpd_pulse(struct intel_digital_port *intel_dig_port, bool long_hpd) long_hpd ? "long" : "short"); if (long_hpd) { + intel_dp->reset_link_params = true; intel_dp->detect_done = false; return IRQ_NONE; } @@ -5947,6 +5954,7 @@ intel_dp_init_connector(struct intel_digital_port *intel_dig_port, intel_dig_port->max_lanes, port_name(port))) return false; + intel_dp->reset_link_params = true; intel_dp->pps_pipe = INVALID_PIPE; intel_dp->active_pipe = INVALID_PIPE; diff --git a/drivers/gpu/drm/i915/intel_drv.h b/drivers/gpu/drm/i915/intel_drv.h index 7845d40f203e..f1cbeae5cf88 100644 --- a/drivers/gpu/drm/i915/intel_drv.h +++ b/drivers/gpu/drm/i915/intel_drv.h @@ -933,6 +933,7 @@ struct intel_dp { bool has_audio; bool detect_done; bool channel_eq_status; + bool reset_link_params; enum hdmi_force_audio force_audio; bool limited_color_range; bool color_range_auto; -- cgit From 645a2f6e03ed4a998da4b3642d80dd25d371b6b1 Mon Sep 17 00:00:00 2001 From: Uma Shankar Date: Wed, 8 Feb 2017 16:20:50 +0530 Subject: drm/i915: Check for platform specific GPIO config Panel GPIO control should be done based on platform. Add a check to restrict VLV and CHT specific GPIO confirguration, so that they dont apply to other platforms. The VBT spec fails to mention the PMIC backlight control option is valid only for VLV/CHT, and the field may be set to "PMIC" for BXT even if PMIC is not desired or possible. Signed-off-by: Uma Shankar Signed-off-by: Vidya Srinivas [Jani: amended commit message a bit and fixed indentation.] Signed-off-by: Jani Nikula Link: http://patchwork.freedesktop.org/patch/msgid/1486551058-22596-2-git-send-email-vidya.srinivas@intel.com --- drivers/gpu/drm/i915/intel_dsi.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/i915/intel_dsi.c b/drivers/gpu/drm/i915/intel_dsi.c index c98234eca2a6..c3d97e0bc59d 100644 --- a/drivers/gpu/drm/i915/intel_dsi.c +++ b/drivers/gpu/drm/i915/intel_dsi.c @@ -1584,7 +1584,8 @@ void intel_dsi_init(struct drm_i915_private *dev_priv) * In case of BYT with CRC PMIC, we need to use GPIO for * Panel control. */ - if (dev_priv->vbt.dsi.config->pwm_blc == PPS_BLC_PMIC) { + if ((IS_VALLEYVIEW(dev_priv) || IS_CHERRYVIEW(dev_priv)) && + (dev_priv->vbt.dsi.config->pwm_blc == PPS_BLC_PMIC)) { intel_dsi->gpio_panel = gpiod_get(dev->dev, "panel", GPIOD_OUT_HIGH); -- cgit From 06a20d2d2d4187b9bc1a4c2f62e989a97a086a76 Mon Sep 17 00:00:00 2001 From: Uma Shankar Date: Wed, 8 Feb 2017 16:20:51 +0530 Subject: drm/i915: Fix PLL 8x/3 divider for MIPI video mode MIPI Video Mode for high res panels (requiring dual link), need a 8X/3 divider to be programmed as 0x2. Modifying the same in this patch. Signed-off-by: Uma Shankar Signed-off-by: Vidya Srinivas Signed-off-by: Jani Nikula Link: http://patchwork.freedesktop.org/patch/msgid/1486551058-22596-3-git-send-email-vidya.srinivas@intel.com --- drivers/gpu/drm/i915/intel_dsi_pll.c | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) diff --git a/drivers/gpu/drm/i915/intel_dsi_pll.c b/drivers/gpu/drm/i915/intel_dsi_pll.c index 61440e5c2563..3a7308681360 100644 --- a/drivers/gpu/drm/i915/intel_dsi_pll.c +++ b/drivers/gpu/drm/i915/intel_dsi_pll.c @@ -416,11 +416,7 @@ static void bxt_dsi_program_clocks(struct drm_device *dev, enum port port, rx_div_lower = rx_div & RX_DIVIDER_BIT_1_2; rx_div_upper = (rx_div & RX_DIVIDER_BIT_3_4) >> 2; - /* As per bpsec program the 8/3X clock divider to the below value */ - if (dev_priv->vbt.dsi.config->is_cmd_mode) - mipi_8by3_divider = 0x2; - else - mipi_8by3_divider = 0x3; + mipi_8by3_divider = 0x2; tmp |= BXT_MIPI_8X_BY3_DIVIDER(port, mipi_8by3_divider); tmp |= BXT_MIPI_TX_ESCLK_DIVIDER(port, tx_div); -- cgit From 8aeaf64c96a8698f05664a8726337855a4d4fdf2 Mon Sep 17 00:00:00 2001 From: Jani Nikula Date: Wed, 15 Feb 2017 17:21:37 +0200 Subject: drm/i915/bxt: apply clock gating workaround to all revisions No need to cater for old A revisions. Reviewed-by: Imre Deak Signed-off-by: Jani Nikula Link: http://patchwork.freedesktop.org/patch/msgid/1487172099-24873-2-git-send-email-jani.nikula@intel.com --- drivers/gpu/drm/i915/intel_pm.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/i915/intel_pm.c b/drivers/gpu/drm/i915/intel_pm.c index 3d311e100643..fe243c65de1a 100644 --- a/drivers/gpu/drm/i915/intel_pm.c +++ b/drivers/gpu/drm/i915/intel_pm.c @@ -99,9 +99,8 @@ static void bxt_init_clock_gating(struct drm_i915_private *dev_priv) * Wa: Backlight PWM may stop in the asserted state, causing backlight * to stay fully on. */ - if (IS_BXT_REVID(dev_priv, BXT_REVID_B0, REVID_FOREVER)) - I915_WRITE(GEN9_CLKGATE_DIS_0, I915_READ(GEN9_CLKGATE_DIS_0) | - PWM1_GATING_DIS | PWM2_GATING_DIS); + I915_WRITE(GEN9_CLKGATE_DIS_0, I915_READ(GEN9_CLKGATE_DIS_0) | + PWM1_GATING_DIS | PWM2_GATING_DIS); } static void glk_init_clock_gating(struct drm_i915_private *dev_priv) -- cgit From 362f8b9edb1066b12a9bfa14e72d0ccb66fc742b Mon Sep 17 00:00:00 2001 From: Jani Nikula Date: Wed, 15 Feb 2017 17:21:38 +0200 Subject: drm/i915/bxt: remove snooping workaround on old A revisions No need to cater for old A revisions. Reviewed-by: Imre Deak Signed-off-by: Jani Nikula Link: http://patchwork.freedesktop.org/patch/msgid/1487172099-24873-3-git-send-email-jani.nikula@intel.com --- drivers/gpu/drm/i915/intel_device_info.c | 4 ---- 1 file changed, 4 deletions(-) diff --git a/drivers/gpu/drm/i915/intel_device_info.c b/drivers/gpu/drm/i915/intel_device_info.c index 0891cc0e8626..2e1fd857e625 100644 --- a/drivers/gpu/drm/i915/intel_device_info.c +++ b/drivers/gpu/drm/i915/intel_device_info.c @@ -410,10 +410,6 @@ void intel_device_info_runtime_init(struct drm_i915_private *dev_priv) info->has_snoop = !info->has_llc; - /* Snooping is broken on BXT A stepping. */ - if (IS_BXT_REVID(dev_priv, 0, BXT_REVID_A1)) - info->has_snoop = false; - DRM_DEBUG_DRIVER("slice mask: %04x\n", info->sseu.slice_mask); DRM_DEBUG_DRIVER("slice total: %u\n", hweight8(info->sseu.slice_mask)); DRM_DEBUG_DRIVER("subslice total: %u\n", -- cgit From f2254d293790a030ccf12ff4bb0d77b679be3f4d Mon Sep 17 00:00:00 2001 From: Jani Nikula Date: Wed, 15 Feb 2017 17:21:39 +0200 Subject: drm/i915/bxt: remove WaRsDisableCoarsePowerGating for early BXT No need to cater for old A revisions. Reviewed-by: Imre Deak Signed-off-by: Jani Nikula Link: http://patchwork.freedesktop.org/patch/msgid/1487172099-24873-4-git-send-email-jani.nikula@intel.com --- drivers/gpu/drm/i915/i915_drv.h | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index f582f5492e37..2a7b277a86de 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -2827,9 +2827,7 @@ intel_info(const struct drm_i915_private *dev_priv) /* WaRsDisableCoarsePowerGating:skl,bxt */ #define NEEDS_WaRsDisableCoarsePowerGating(dev_priv) \ - (IS_BXT_REVID(dev_priv, 0, BXT_REVID_A1) || \ - IS_SKL_GT3(dev_priv) || \ - IS_SKL_GT4(dev_priv)) + (IS_SKL_GT3(dev_priv) || IS_SKL_GT4(dev_priv)) /* * dp aux and gmbus irq on gen4 seems to be able to generate legacy interrupts -- cgit From 262fd485ac6b476479f41f00bb104f6a1766ae66 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Wed, 15 Feb 2017 13:15:47 +0000 Subject: drm/i915: Only enable hotplug interrupts if the display interrupts are enabled MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit In order to prevent accessing the hpd registers outside of the display power wells, we should refrain from writing to the registers before the display interrupts are enabled. [ 4.740136] WARNING: CPU: 1 PID: 221 at drivers/gpu/drm/i915/intel_uncore.c:795 __unclaimed_reg_debug+0x44/0x50 [i915] [ 4.740155] Unclaimed read from register 0x1e1110 [ 4.740168] Modules linked in: i915(+) intel_gtt drm_kms_helper prime_numbers [ 4.740190] CPU: 1 PID: 221 Comm: systemd-udevd Not tainted 4.10.0-rc6+ #384 [ 4.740203] Hardware name: / , BIOS PYBSWCEL.86A.0027.2015.0507.1758 05/07/2015 [ 4.740220] Call Trace: [ 4.740236] dump_stack+0x4d/0x6f [ 4.740251] __warn+0xc1/0xe0 [ 4.740265] warn_slowpath_fmt+0x4a/0x50 [ 4.740281] ? insert_work+0x77/0xc0 [ 4.740355] ? fwtable_write32+0x90/0x130 [i915] [ 4.740431] __unclaimed_reg_debug+0x44/0x50 [i915] [ 4.740507] fwtable_read32+0xd8/0x130 [i915] [ 4.740575] i915_hpd_irq_setup+0xa5/0x100 [i915] [ 4.740649] intel_hpd_init+0x68/0x80 [i915] [ 4.740716] i915_driver_load+0xe19/0x1380 [i915] [ 4.740784] i915_pci_probe+0x32/0x90 [i915] [ 4.740799] pci_device_probe+0x8b/0xf0 [ 4.740815] driver_probe_device+0x2b6/0x450 [ 4.740828] __driver_attach+0xda/0xe0 [ 4.740841] ? driver_probe_device+0x450/0x450 [ 4.740853] bus_for_each_dev+0x5b/0x90 [ 4.740865] driver_attach+0x19/0x20 [ 4.740878] bus_add_driver+0x166/0x260 [ 4.740892] driver_register+0x5b/0xd0 [ 4.740906] ? 0xffffffffa0166000 [ 4.740920] __pci_register_driver+0x47/0x50 [ 4.740985] i915_init+0x5c/0x5e [i915] [ 4.740999] do_one_initcall+0x3e/0x160 [ 4.741015] ? __vunmap+0x7c/0xc0 [ 4.741029] ? kmem_cache_alloc+0xcf/0x120 [ 4.741045] do_init_module+0x55/0x1c4 [ 4.741060] load_module+0x1f3f/0x25b0 [ 4.741073] ? __symbol_put+0x40/0x40 [ 4.741086] ? kernel_read_file+0x100/0x190 [ 4.741100] SYSC_finit_module+0xbc/0xf0 [ 4.741112] SyS_finit_module+0x9/0x10 [ 4.741125] entry_SYSCALL_64_fastpath+0x17/0x98 [ 4.741135] RIP: 0033:0x7f8559a140f9 [ 4.741145] RSP: 002b:00007fff7509a3e8 EFLAGS: 00000246 ORIG_RAX: 0000000000000139 [ 4.741161] RAX: ffffffffffffffda RBX: 00007f855aba02d1 RCX: 00007f8559a140f9 [ 4.741172] RDX: 0000000000000000 RSI: 000055b6db0914f0 RDI: 0000000000000011 [ 4.741183] RBP: 0000000000020000 R08: 0000000000000000 R09: 000000000000000e [ 4.741193] R10: 0000000000000011 R11: 0000000000000246 R12: 000055b6db0854d0 [ 4.741204] R13: 000055b6db091150 R14: 0000000000000000 R15: 000055b6db035924 v2: Set dev_priv->display_irqs_enabled to true for all platforms other than vlv/chv that manually control the display power domain. Fixes: 19625e85c6ec ("drm/i915: Enable polling when we don't have hpd") Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=97798 Suggested-by: Ville Syrjälä Signed-off-by: Chris Wilson Cc: Lyude Cc: Daniel Vetter Cc: Ville Syrjälä Cc: Hans de Goede Cc: stable@vger.kernel.org Link: http://patchwork.freedesktop.org/patch/msgid/20170215131547.5064-1-chris@chris-wilson.co.uk Reviewed-by: Ville Syrjälä --- drivers/gpu/drm/i915/i915_irq.c | 10 ++++++++++ drivers/gpu/drm/i915/intel_hotplug.c | 14 ++++++++------ 2 files changed, 18 insertions(+), 6 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_irq.c b/drivers/gpu/drm/i915/i915_irq.c index 84fda3fce4b8..0d8ebd147da6 100644 --- a/drivers/gpu/drm/i915/i915_irq.c +++ b/drivers/gpu/drm/i915/i915_irq.c @@ -4269,6 +4269,16 @@ void intel_irq_init(struct drm_i915_private *dev_priv) if (!IS_GEN2(dev_priv)) dev->vblank_disable_immediate = true; + /* Most platforms treat the display irq block as an always-on + * power domain. vlv/chv can disable it at runtime and need + * special care to avoid writing any of the display block registers + * outside of the power domain. We defer setting up the display irqs + * in this case to the runtime pm. + */ + dev_priv->display_irqs_enabled = true; + if (IS_VALLEYVIEW(dev_priv) || IS_CHERRYVIEW(dev_priv)) + dev_priv->display_irqs_enabled = false; + dev_priv->hotplug.hpd_storm_threshold = HPD_STORM_DEFAULT_THRESHOLD; dev->driver->get_vblank_timestamp = i915_get_vblank_timestamp; diff --git a/drivers/gpu/drm/i915/intel_hotplug.c b/drivers/gpu/drm/i915/intel_hotplug.c index 6a9c16508ab5..0756bdc672b3 100644 --- a/drivers/gpu/drm/i915/intel_hotplug.c +++ b/drivers/gpu/drm/i915/intel_hotplug.c @@ -224,7 +224,7 @@ static void intel_hpd_irq_storm_reenable_work(struct work_struct *work) } } } - if (dev_priv->display.hpd_irq_setup) + if (dev_priv->display_irqs_enabled && dev_priv->display.hpd_irq_setup) dev_priv->display.hpd_irq_setup(dev_priv); spin_unlock_irq(&dev_priv->irq_lock); @@ -430,7 +430,7 @@ void intel_hpd_irq_handler(struct drm_i915_private *dev_priv, } } - if (storm_detected) + if (storm_detected && dev_priv->display_irqs_enabled) dev_priv->display.hpd_irq_setup(dev_priv); spin_unlock(&dev_priv->irq_lock); @@ -476,10 +476,12 @@ void intel_hpd_init(struct drm_i915_private *dev_priv) * Interrupt setup is already guaranteed to be single-threaded, this is * just to make the assert_spin_locked checks happy. */ - spin_lock_irq(&dev_priv->irq_lock); - if (dev_priv->display.hpd_irq_setup) - dev_priv->display.hpd_irq_setup(dev_priv); - spin_unlock_irq(&dev_priv->irq_lock); + if (dev_priv->display_irqs_enabled && dev_priv->display.hpd_irq_setup) { + spin_lock_irq(&dev_priv->irq_lock); + if (dev_priv->display_irqs_enabled) + dev_priv->display.hpd_irq_setup(dev_priv); + spin_unlock_irq(&dev_priv->irq_lock); + } } static void i915_hpd_poll_init_work(struct work_struct *work) -- cgit From 3582ad1361644ba0f937cf6aa59d5c9ead6763ca Mon Sep 17 00:00:00 2001 From: "sagar.a.kamble@intel.com" Date: Fri, 3 Feb 2017 13:58:33 +0530 Subject: drm/i915: Do RPM Wake during GuC/HuC status read HUC_STATUS, GUC_STATUS, SOFT_SCRATCH registers are read in debugfs and getparam ioctl. This patch covers those accesses by RPM get/put. v2: Covering access in i915_getparam(I915_PARAM_HUC_STATUS) (ChrisW) Cc: Arkadiusz Hiler Cc: Anusha Srivatsa Cc: Fiedorowicz, Lukasz Signed-off-by: Sagar Arun Kamble Reviewed-by: Arkadiusz Hiler Signed-off-by: Tvrtko Ursulin Link: http://patchwork.freedesktop.org/patch/msgid/1486110513-12130-1-git-send-email-sagar.a.kamble@intel.com --- drivers/gpu/drm/i915/i915_debugfs.c | 6 ++++++ drivers/gpu/drm/i915/i915_drv.c | 5 ++--- 2 files changed, 8 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c index 20b3d1328a01..11f02fb1f335 100644 --- a/drivers/gpu/drm/i915/i915_debugfs.c +++ b/drivers/gpu/drm/i915/i915_debugfs.c @@ -2412,7 +2412,9 @@ static int i915_huc_load_status_info(struct seq_file *m, void *data) seq_printf(m, "\tRSA: offset is %d; size = %d\n", huc_fw->rsa_offset, huc_fw->rsa_size); + intel_runtime_pm_get(dev_priv); seq_printf(m, "\nHuC status 0x%08x:\n", I915_READ(HUC_STATUS2)); + intel_runtime_pm_put(dev_priv); return 0; } @@ -2444,6 +2446,8 @@ static int i915_guc_load_status_info(struct seq_file *m, void *data) seq_printf(m, "\tRSA: offset is %d; size = %d\n", guc_fw->rsa_offset, guc_fw->rsa_size); + intel_runtime_pm_get(dev_priv); + tmp = I915_READ(GUC_STATUS); seq_printf(m, "\nGuC status 0x%08x:\n", tmp); @@ -2457,6 +2461,8 @@ static int i915_guc_load_status_info(struct seq_file *m, void *data) for (i = 0; i < 16; i++) seq_printf(m, "\t%2d: \t0x%x\n", i, I915_READ(SOFT_SCRATCH(i))); + intel_runtime_pm_put(dev_priv); + return 0; } diff --git a/drivers/gpu/drm/i915/i915_drv.c b/drivers/gpu/drm/i915/i915_drv.c index 50de00921cc9..c992d406ae74 100644 --- a/drivers/gpu/drm/i915/i915_drv.c +++ b/drivers/gpu/drm/i915/i915_drv.c @@ -319,10 +319,9 @@ static int i915_getparam(struct drm_device *dev, void *data, value = INTEL_INFO(dev_priv)->sseu.min_eu_in_pool; break; case I915_PARAM_HUC_STATUS: - /* The register is already force-woken. We dont need - * any rpm here - */ + intel_runtime_pm_get(dev_priv); value = I915_READ(HUC_STATUS2) & HUC_FW_VERIFIED; + intel_runtime_pm_put(dev_priv); break; case I915_PARAM_MMAP_GTT_VERSION: /* Though we've started our numbering from 1, and so class all -- cgit From 6043801f937ada9c9ed9dfa3c6ce542a79643401 Mon Sep 17 00:00:00 2001 From: Deepak M Date: Tue, 14 Feb 2017 18:46:16 +0530 Subject: drm/i915: Set the Z inversion overlap field Dual link Z-inversion overlap field is present in MIPI_CTRL register unlike the older platforms, hence setting the same in this patch. Signed-off-by: Deepak M Signed-off-by: Madhav Chauhan Signed-off-by: Jani Nikula Link: http://patchwork.freedesktop.org/patch/msgid/1487078180-15147-5-git-send-email-madhav.chauhan@intel.com --- drivers/gpu/drm/i915/intel_dsi.c | 17 +++++++++++++---- 1 file changed, 13 insertions(+), 4 deletions(-) diff --git a/drivers/gpu/drm/i915/intel_dsi.c b/drivers/gpu/drm/i915/intel_dsi.c index c3d97e0bc59d..99dd7e12abc0 100644 --- a/drivers/gpu/drm/i915/intel_dsi.c +++ b/drivers/gpu/drm/i915/intel_dsi.c @@ -456,12 +456,21 @@ static void intel_dsi_port_enable(struct intel_encoder *encoder) if (intel_dsi->dual_link == DSI_DUAL_LINK_FRONT_BACK) { u32 temp; - - temp = I915_READ(VLV_CHICKEN_3); - temp &= ~PIXEL_OVERLAP_CNT_MASK | + if (IS_GEN9_LP(dev_priv)) { + for_each_dsi_port(port, intel_dsi->ports) { + temp = I915_READ(MIPI_CTRL(port)); + temp &= ~BXT_PIXEL_OVERLAP_CNT_MASK | + intel_dsi->pixel_overlap << + BXT_PIXEL_OVERLAP_CNT_SHIFT; + I915_WRITE(MIPI_CTRL(port), temp); + } + } else { + temp = I915_READ(VLV_CHICKEN_3); + temp &= ~PIXEL_OVERLAP_CNT_MASK | intel_dsi->pixel_overlap << PIXEL_OVERLAP_CNT_SHIFT; - I915_WRITE(VLV_CHICKEN_3, temp); + I915_WRITE(VLV_CHICKEN_3, temp); + } } for_each_dsi_port(port, intel_dsi->ports) { -- cgit From eba4daf0dc5861703000f58b1d51110ced2b2fb5 Mon Sep 17 00:00:00 2001 From: Uma Shankar Date: Wed, 8 Feb 2017 16:20:54 +0530 Subject: drm/i915/bxt: Fix BXT DSI ULPS sequence Fix the Sequence to program BXT DSI Latch and ULPS. Signed-off-by: Uma Shankar Signed-off-by: Vidya Srinivas Reviewed-by: Bob Paauwe Signed-off-by: Jani Nikula Link: http://patchwork.freedesktop.org/patch/msgid/1486551058-22596-6-git-send-email-vidya.srinivas@intel.com --- drivers/gpu/drm/i915/intel_dsi.c | 23 +++++------------------ 1 file changed, 5 insertions(+), 18 deletions(-) diff --git a/drivers/gpu/drm/i915/intel_dsi.c b/drivers/gpu/drm/i915/intel_dsi.c index 99dd7e12abc0..e7e823d00444 100644 --- a/drivers/gpu/drm/i915/intel_dsi.c +++ b/drivers/gpu/drm/i915/intel_dsi.c @@ -366,32 +366,19 @@ static void bxt_dsi_device_ready(struct intel_encoder *encoder) DRM_DEBUG_KMS("\n"); - /* Exit Low power state in 4 steps*/ + /* Enable MIPI PHY transparent latch */ for_each_dsi_port(port, intel_dsi->ports) { - - /* 1. Enable MIPI PHY transparent latch */ val = I915_READ(BXT_MIPI_PORT_CTRL(port)); I915_WRITE(BXT_MIPI_PORT_CTRL(port), val | LP_OUTPUT_HOLD); usleep_range(2000, 2500); + } - /* 2. Enter ULPS */ - val = I915_READ(MIPI_DEVICE_READY(port)); - val &= ~ULPS_STATE_MASK; - val |= (ULPS_STATE_ENTER | DEVICE_READY); - I915_WRITE(MIPI_DEVICE_READY(port), val); - /* at least 2us - relaxed for hrtimer subsystem optimization */ - usleep_range(10, 50); - - /* 3. Exit ULPS */ + /* Clear ULPS and set device ready */ + for_each_dsi_port(port, intel_dsi->ports) { val = I915_READ(MIPI_DEVICE_READY(port)); val &= ~ULPS_STATE_MASK; - val |= (ULPS_STATE_EXIT | DEVICE_READY); I915_WRITE(MIPI_DEVICE_READY(port), val); - usleep_range(1000, 1500); - - /* Clear ULPS and set device ready */ - val = I915_READ(MIPI_DEVICE_READY(port)); - val &= ~ULPS_STATE_MASK; + usleep_range(2000, 2500); val |= DEVICE_READY; I915_WRITE(MIPI_DEVICE_READY(port), val); } -- cgit From bbdf0b2ff32aa75c7bd167569130e9391d2e6282 Mon Sep 17 00:00:00 2001 From: Uma Shankar Date: Wed, 8 Feb 2017 16:20:56 +0530 Subject: drm/i915/bxt: Disable device ready before shutdown command Disable device ready before MIPI port shutdown command. This helps to avoid mipi split screen issues. Signed-off-by: Uma Shankar Signed-off-by: Vidya Srinivas Reviewed-by: Bob Paauwe Signed-off-by: Jani Nikula Link: http://patchwork.freedesktop.org/patch/msgid/1486551058-22596-8-git-send-email-vidya.srinivas@intel.com --- drivers/gpu/drm/i915/intel_dsi.c | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/drivers/gpu/drm/i915/intel_dsi.c b/drivers/gpu/drm/i915/intel_dsi.c index e7e823d00444..c26fe4fc0819 100644 --- a/drivers/gpu/drm/i915/intel_dsi.c +++ b/drivers/gpu/drm/i915/intel_dsi.c @@ -612,6 +612,8 @@ static void intel_dsi_pre_disable(struct intel_encoder *encoder, struct intel_crtc_state *old_crtc_state, struct drm_connector_state *old_conn_state) { + struct drm_device *dev = encoder->base.dev; + struct drm_i915_private *dev_priv = dev->dev_private; struct intel_dsi *intel_dsi = enc_to_intel_dsi(&encoder->base); enum port port; @@ -619,6 +621,15 @@ static void intel_dsi_pre_disable(struct intel_encoder *encoder, intel_panel_disable_backlight(intel_dsi->attached_connector); + /* + * Disable Device ready before the port shutdown in order + * to avoid split screen + */ + if (IS_BROXTON(dev_priv)) { + for_each_dsi_port(port, intel_dsi->ports) + I915_WRITE(MIPI_DEVICE_READY(port), 0); + } + if (is_vid_mode(intel_dsi)) { /* Send Shutdown command to the panel in LP mode */ for_each_dsi_port(port, intel_dsi->ports) -- cgit From c1d2061b28c2aa25ec39b60d9c248e6beebd7315 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Thu, 16 Feb 2017 12:54:41 +0000 Subject: drm/i915: Squelch any ktime/jiffie rounding errors for wait-ioctl We wait upon jiffies, but report the time elapsed using a high-resolution timer. This discrepancy can lead to us timing out the wait prior to us reporting the elapsed time as complete. This restores the squelching lost in commit e95433c73a11 ("drm/i915: Rearrange i915_wait_request() accounting with callers"). Fixes: e95433c73a11 ("drm/i915: Rearrange i915_wait_request() accounting with callers") Signed-off-by: Chris Wilson Cc: Matthew Auld Cc: Joonas Lahtinen Cc: # v4.10-rc1+ Cc: stable@vger.kernel.org Link: http://patchwork.freedesktop.org/patch/msgid/20170216125441.30923-1-chris@chris-wilson.co.uk Reviewed-by: Joonas Lahtinen --- drivers/gpu/drm/i915/i915_gem.c | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index 71297920fdf4..e6b8a4f56bbe 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -3072,6 +3072,16 @@ i915_gem_wait_ioctl(struct drm_device *dev, void *data, struct drm_file *file) args->timeout_ns -= ktime_to_ns(ktime_sub(ktime_get(), start)); if (args->timeout_ns < 0) args->timeout_ns = 0; + + /* + * Apparently ktime isn't accurate enough and occasionally has a + * bit of mismatch in the jiffies<->nsecs<->ktime loop. So patch + * things up to make the test happy. We allow up to 1 jiffy. + * + * This is a regression from the timespec->ktime conversion. + */ + if (ret == -ETIME && !nsecs_to_jiffies(args->timeout_ns)) + args->timeout_ns = 0; } i915_gem_object_put(obj); -- cgit From 581ab1fe520b05447b4869fb7e3136c3a600abad Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Wed, 15 Feb 2017 16:39:00 +0000 Subject: drm/i915: Unwind conversion to i915_gem_phys_ops on failure The physical object is treated as permanently pinned. If we fail to take this initial pin during i915_gem_object_attach_phys() we need to revert it back to an ordinary shmemfs object before reporting the failure. v2: git-add Reported-by: Mika Kuoppala Signed-off-by: Chris Wilson Cc: Mika Kuoppala Link: http://patchwork.freedesktop.org/patch/msgid/20170215163900.11606-1-chris@chris-wilson.co.uk Reviewed-by: Mika Kuoppala --- drivers/gpu/drm/i915/i915_gem.c | 13 ++++++++++++- 1 file changed, 12 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index e6b8a4f56bbe..5f7b8c88eb7e 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -313,6 +313,8 @@ static const struct drm_i915_gem_object_ops i915_gem_phys_ops = { .release = i915_gem_object_release_phys, }; +static const struct drm_i915_gem_object_ops i915_gem_object_ops; + int i915_gem_object_unbind(struct drm_i915_gem_object *obj) { struct i915_vma *vma; @@ -586,9 +588,18 @@ i915_gem_object_attach_phys(struct drm_i915_gem_object *obj, if (obj->mm.pages) return -EBUSY; + GEM_BUG_ON(obj->ops != &i915_gem_object_ops); obj->ops = &i915_gem_phys_ops; - return i915_gem_object_pin_pages(obj); + ret = i915_gem_object_pin_pages(obj); + if (ret) + goto err_xfer; + + return 0; + +err_xfer: + obj->ops = &i915_gem_object_ops; + return ret; } static int -- cgit From 70001cd256548391add796cce40bd2e2394d4297 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Thu, 16 Feb 2017 09:46:21 +0000 Subject: drm/i915: Remove struct_mutex for destroying framebuffers We do not need to hold struct_mutex for destroying drm_i915_gem_objects any longer, and with a little care taken over tracking obj->framebuffer_references, we can relinquish BKL locking around the destroy of intel_framebuffer. v2: Use atomic check for WARN_ON framebuffer miscounting Signed-off-by: Chris Wilson Cc: Joonas Lahtinen Link: http://patchwork.freedesktop.org/patch/msgid/20170216094621.3426-1-chris@chris-wilson.co.uk Reviewed-by: Joonas Lahtinen --- drivers/gpu/drm/i915/i915_gem_object.h | 2 +- drivers/gpu/drm/i915/i915_gem_shrinker.c | 2 +- drivers/gpu/drm/i915/i915_gem_tiling.c | 2 +- drivers/gpu/drm/i915/intel_display.c | 9 ++++----- 4 files changed, 7 insertions(+), 8 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_gem_object.h b/drivers/gpu/drm/i915/i915_gem_object.h index 02365a5f7c80..ef4893a4f08c 100644 --- a/drivers/gpu/drm/i915/i915_gem_object.h +++ b/drivers/gpu/drm/i915/i915_gem_object.h @@ -165,7 +165,7 @@ struct drm_i915_gem_object { struct reservation_object *resv; /** References from framebuffers, locks out tiling changes. */ - unsigned long framebuffer_references; + atomic_t framebuffer_references; /** Record of address bit 17 of each page at last unbind. */ unsigned long *bit_17; diff --git a/drivers/gpu/drm/i915/i915_gem_shrinker.c b/drivers/gpu/drm/i915/i915_gem_shrinker.c index f249a1eb46ac..8bc515e8b2a2 100644 --- a/drivers/gpu/drm/i915/i915_gem_shrinker.c +++ b/drivers/gpu/drm/i915/i915_gem_shrinker.c @@ -207,7 +207,7 @@ i915_gem_shrink(struct drm_i915_private *dev_priv, if (!(flags & I915_SHRINK_ACTIVE) && (i915_gem_object_is_active(obj) || - obj->framebuffer_references)) + atomic_read(&obj->framebuffer_references))) continue; if (!can_release_pages(obj)) diff --git a/drivers/gpu/drm/i915/i915_gem_tiling.c b/drivers/gpu/drm/i915/i915_gem_tiling.c index 8b2b507bdf7e..46ade36dcee6 100644 --- a/drivers/gpu/drm/i915/i915_gem_tiling.c +++ b/drivers/gpu/drm/i915/i915_gem_tiling.c @@ -238,7 +238,7 @@ i915_gem_object_set_tiling(struct drm_i915_gem_object *obj, if ((tiling | stride) == obj->tiling_and_stride) return 0; - if (obj->framebuffer_references) + if (atomic_read(&obj->framebuffer_references)) return -EBUSY; /* We need to rebind the object if its current allocation diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c index 660581fb329a..e483540d2439 100644 --- a/drivers/gpu/drm/i915/intel_display.c +++ b/drivers/gpu/drm/i915/intel_display.c @@ -14265,14 +14265,13 @@ static void intel_setup_outputs(struct drm_i915_private *dev_priv) static void intel_user_framebuffer_destroy(struct drm_framebuffer *fb) { - struct drm_device *dev = fb->dev; struct intel_framebuffer *intel_fb = to_intel_framebuffer(fb); drm_framebuffer_cleanup(fb); - mutex_lock(&dev->struct_mutex); - WARN_ON(!intel_fb->obj->framebuffer_references--); + + WARN_ON(atomic_dec_return(&intel_fb->obj->framebuffer_references) < 0); i915_gem_object_put(intel_fb->obj); - mutex_unlock(&dev->struct_mutex); + kfree(intel_fb); } @@ -14509,7 +14508,7 @@ static int intel_framebuffer_init(struct drm_device *dev, return ret; } - intel_fb->obj->framebuffer_references++; + atomic_inc(&intel_fb->obj->framebuffer_references); return 0; } -- cgit From 24dbf51a5517b79de9585dd216488aed09fd5ee8 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Wed, 15 Feb 2017 10:59:18 +0000 Subject: drm/i915: struct_mutex is not required for allocating the framebuffer We do not need the BKL struct_mutex in order to allocate a GEM object, nor to create the framebuffer, so resist the temptation to take the BKL willy nilly. As this changes the locking contract around internal API calls, the patch is a little larger than a plain removal of a pair of mutex_lock/unlock. Signed-off-by: Chris Wilson Link: http://patchwork.freedesktop.org/patch/msgid/20170215105919.7347-2-chris@chris-wilson.co.uk Reviewed-by: Joonas Lahtinen --- drivers/gpu/drm/i915/intel_display.c | 109 +++++++++++++++-------------------- drivers/gpu/drm/i915/intel_drv.h | 7 +-- drivers/gpu/drm/i915/intel_fbdev.c | 21 +++---- 3 files changed, 59 insertions(+), 78 deletions(-) diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c index e483540d2439..5443ba8430da 100644 --- a/drivers/gpu/drm/i915/intel_display.c +++ b/drivers/gpu/drm/i915/intel_display.c @@ -96,10 +96,9 @@ static void i9xx_crtc_clock_get(struct intel_crtc *crtc, static void ironlake_pch_clock_get(struct intel_crtc *crtc, struct intel_crtc_state *pipe_config); -static int intel_framebuffer_init(struct drm_device *dev, - struct intel_framebuffer *ifb, - struct drm_mode_fb_cmd2 *mode_cmd, - struct drm_i915_gem_object *obj); +static int intel_framebuffer_init(struct intel_framebuffer *ifb, + struct drm_i915_gem_object *obj, + struct drm_mode_fb_cmd2 *mode_cmd); static void i9xx_set_pipeconf(struct intel_crtc *intel_crtc); static void intel_set_pipe_timings(struct intel_crtc *intel_crtc); static void intel_set_pipe_src_size(struct intel_crtc *intel_crtc); @@ -2052,11 +2051,13 @@ static void intel_tile_dims(const struct drm_i915_private *dev_priv, } unsigned int -intel_fb_align_height(struct drm_device *dev, unsigned int height, - uint32_t pixel_format, uint64_t fb_modifier) +intel_fb_align_height(struct drm_i915_private *dev_priv, + unsigned int height, + uint32_t pixel_format, + uint64_t fb_modifier) { unsigned int cpp = drm_format_plane_cpp(pixel_format, 0); - unsigned int tile_height = intel_tile_height(to_i915(dev), fb_modifier, cpp); + unsigned int tile_height = intel_tile_height(dev_priv, fb_modifier, cpp); return ALIGN(height, tile_height); } @@ -2622,15 +2623,13 @@ intel_alloc_initial_plane_obj(struct intel_crtc *crtc, return false; mutex_lock(&dev->struct_mutex); - obj = i915_gem_object_create_stolen_for_preallocated(dev_priv, base_aligned, base_aligned, size_aligned); - if (!obj) { - mutex_unlock(&dev->struct_mutex); + mutex_unlock(&dev->struct_mutex); + if (!obj) return false; - } if (plane_config->tiling == I915_TILING_X) obj->tiling_and_stride = fb->pitches[0] | I915_TILING_X; @@ -2642,20 +2641,17 @@ intel_alloc_initial_plane_obj(struct intel_crtc *crtc, mode_cmd.modifier[0] = fb->modifier; mode_cmd.flags = DRM_MODE_FB_MODIFIERS; - if (intel_framebuffer_init(dev, to_intel_framebuffer(fb), - &mode_cmd, obj)) { + if (intel_framebuffer_init(to_intel_framebuffer(fb), obj, &mode_cmd)) { DRM_DEBUG_KMS("intel fb init failed\n"); goto out_unref_obj; } - mutex_unlock(&dev->struct_mutex); DRM_DEBUG_KMS("initial plane fb obj %p\n", obj); return true; out_unref_obj: i915_gem_object_put(obj); - mutex_unlock(&dev->struct_mutex); return false; } @@ -7434,7 +7430,8 @@ i9xx_get_initial_plane_config(struct intel_crtc *crtc, val = I915_READ(DSPSTRIDE(pipe)); fb->pitches[0] = val & 0xffffffc0; - aligned_height = intel_fb_align_height(dev, fb->height, + aligned_height = intel_fb_align_height(dev_priv, + fb->height, fb->format->format, fb->modifier); @@ -8475,7 +8472,8 @@ skylake_get_initial_plane_config(struct intel_crtc *crtc, fb->format->format); fb->pitches[0] = (val & 0x3ff) * stride_mult; - aligned_height = intel_fb_align_height(dev, fb->height, + aligned_height = intel_fb_align_height(dev_priv, + fb->height, fb->format->format, fb->modifier); @@ -8573,7 +8571,8 @@ ironlake_get_initial_plane_config(struct intel_crtc *crtc, val = I915_READ(DSPSTRIDE(pipe)); fb->pitches[0] = val & 0xffffffc0; - aligned_height = intel_fb_align_height(dev, fb->height, + aligned_height = intel_fb_align_height(dev_priv, + fb->height, fb->format->format, fb->modifier); @@ -9399,9 +9398,8 @@ static struct drm_display_mode load_detect_mode = { }; struct drm_framebuffer * -__intel_framebuffer_create(struct drm_device *dev, - struct drm_mode_fb_cmd2 *mode_cmd, - struct drm_i915_gem_object *obj) +intel_framebuffer_create(struct drm_i915_gem_object *obj, + struct drm_mode_fb_cmd2 *mode_cmd) { struct intel_framebuffer *intel_fb; int ret; @@ -9410,7 +9408,7 @@ __intel_framebuffer_create(struct drm_device *dev, if (!intel_fb) return ERR_PTR(-ENOMEM); - ret = intel_framebuffer_init(dev, intel_fb, mode_cmd, obj); + ret = intel_framebuffer_init(intel_fb, obj, mode_cmd); if (ret) goto err; @@ -9421,23 +9419,6 @@ err: return ERR_PTR(ret); } -static struct drm_framebuffer * -intel_framebuffer_create(struct drm_device *dev, - struct drm_mode_fb_cmd2 *mode_cmd, - struct drm_i915_gem_object *obj) -{ - struct drm_framebuffer *fb; - int ret; - - ret = i915_mutex_lock_interruptible(dev); - if (ret) - return ERR_PTR(ret); - fb = __intel_framebuffer_create(dev, mode_cmd, obj); - mutex_unlock(&dev->struct_mutex); - - return fb; -} - static u32 intel_framebuffer_pitch_for_width(int width, int bpp) { @@ -9472,7 +9453,7 @@ intel_framebuffer_create_for_mode(struct drm_device *dev, bpp); mode_cmd.pixel_format = drm_mode_legacy_fb_format(bpp, depth); - fb = intel_framebuffer_create(dev, &mode_cmd, obj); + fb = intel_framebuffer_create(obj, &mode_cmd); if (IS_ERR(fb)) i915_gem_object_put(obj); @@ -14319,7 +14300,7 @@ static u32 intel_fb_pitch_limit(struct drm_i915_private *dev_priv, uint64_t fb_modifier, uint32_t pixel_format) { - u32 gen = INTEL_INFO(dev_priv)->gen; + u32 gen = INTEL_GEN(dev_priv); if (gen >= 9) { int cpp = drm_format_plane_cpp(pixel_format, 0); @@ -14346,18 +14327,17 @@ u32 intel_fb_pitch_limit(struct drm_i915_private *dev_priv, } } -static int intel_framebuffer_init(struct drm_device *dev, - struct intel_framebuffer *intel_fb, - struct drm_mode_fb_cmd2 *mode_cmd, - struct drm_i915_gem_object *obj) +static int intel_framebuffer_init(struct intel_framebuffer *intel_fb, + struct drm_i915_gem_object *obj, + struct drm_mode_fb_cmd2 *mode_cmd) { - struct drm_i915_private *dev_priv = to_i915(dev); + struct drm_i915_private *dev_priv = to_i915(obj->base.dev); unsigned int tiling = i915_gem_object_get_tiling(obj); - int ret; u32 pitch_limit, stride_alignment; struct drm_format_name_buf format_name; + int ret = -EINVAL; - WARN_ON(!mutex_is_locked(&dev->struct_mutex)); + atomic_inc(&obj->framebuffer_references); if (mode_cmd->flags & DRM_MODE_FB_MODIFIERS) { /* @@ -14367,14 +14347,14 @@ static int intel_framebuffer_init(struct drm_device *dev, if (tiling != I915_TILING_NONE && tiling != intel_fb_modifier_to_tiling(mode_cmd->modifier[0])) { DRM_DEBUG("tiling_mode doesn't match fb modifier\n"); - return -EINVAL; + goto err; } } else { if (tiling == I915_TILING_X) { mode_cmd->modifier[0] = I915_FORMAT_MOD_X_TILED; } else if (tiling == I915_TILING_Y) { DRM_DEBUG("No Y tiling for legacy addfb\n"); - return -EINVAL; + goto err; } } @@ -14385,7 +14365,7 @@ static int intel_framebuffer_init(struct drm_device *dev, if (INTEL_GEN(dev_priv) < 9) { DRM_DEBUG("Unsupported tiling 0x%llx!\n", mode_cmd->modifier[0]); - return -EINVAL; + goto err; } case DRM_FORMAT_MOD_NONE: case I915_FORMAT_MOD_X_TILED: @@ -14393,7 +14373,7 @@ static int intel_framebuffer_init(struct drm_device *dev, default: DRM_DEBUG("Unsupported fb modifier 0x%llx!\n", mode_cmd->modifier[0]); - return -EINVAL; + goto err; } /* @@ -14412,7 +14392,7 @@ static int intel_framebuffer_init(struct drm_device *dev, if (mode_cmd->pitches[0] & (stride_alignment - 1)) { DRM_DEBUG("pitch (%d) must be at least %u byte aligned\n", mode_cmd->pitches[0], stride_alignment); - return -EINVAL; + goto err; } pitch_limit = intel_fb_pitch_limit(dev_priv, mode_cmd->modifier[0], @@ -14422,7 +14402,7 @@ static int intel_framebuffer_init(struct drm_device *dev, mode_cmd->modifier[0] != DRM_FORMAT_MOD_NONE ? "tiled" : "linear", mode_cmd->pitches[0], pitch_limit); - return -EINVAL; + goto err; } /* @@ -14434,7 +14414,7 @@ static int intel_framebuffer_init(struct drm_device *dev, DRM_DEBUG("pitch (%d) must match tiling stride (%d)\n", mode_cmd->pitches[0], i915_gem_object_get_stride(obj)); - return -EINVAL; + goto err; } /* Reject formats not supported by any plane early. */ @@ -14493,24 +14473,29 @@ static int intel_framebuffer_init(struct drm_device *dev, /* FIXME need to adjust LINOFF/TILEOFF accordingly. */ if (mode_cmd->offsets[0] != 0) - return -EINVAL; + goto err; - drm_helper_mode_fill_fb_struct(dev, &intel_fb->base, mode_cmd); + drm_helper_mode_fill_fb_struct(&dev_priv->drm, + &intel_fb->base, mode_cmd); intel_fb->obj = obj; ret = intel_fill_fb_info(dev_priv, &intel_fb->base); if (ret) return ret; - ret = drm_framebuffer_init(dev, &intel_fb->base, &intel_fb_funcs); + ret = drm_framebuffer_init(obj->base.dev, + &intel_fb->base, + &intel_fb_funcs); if (ret) { DRM_ERROR("framebuffer init failed %d\n", ret); - return ret; + goto err; } - atomic_inc(&intel_fb->obj->framebuffer_references); - return 0; + +err: + atomic_dec(&obj->framebuffer_references); + return ret; } static struct drm_framebuffer * @@ -14526,7 +14511,7 @@ intel_user_framebuffer_create(struct drm_device *dev, if (!obj) return ERR_PTR(-ENOENT); - fb = intel_framebuffer_create(dev, &mode_cmd, obj); + fb = intel_framebuffer_create(obj, &mode_cmd); if (IS_ERR(fb)) i915_gem_object_put(obj); diff --git a/drivers/gpu/drm/i915/intel_drv.h b/drivers/gpu/drm/i915/intel_drv.h index f1cbeae5cf88..7e27aef0849b 100644 --- a/drivers/gpu/drm/i915/intel_drv.h +++ b/drivers/gpu/drm/i915/intel_drv.h @@ -1233,7 +1233,7 @@ void intel_ddi_clock_get(struct intel_encoder *encoder, struct intel_crtc_state *pipe_config); void intel_ddi_set_vc_payload_alloc(struct drm_crtc *crtc, bool state); uint32_t ddi_signal_levels(struct intel_dp *intel_dp); -unsigned int intel_fb_align_height(struct drm_device *dev, +unsigned int intel_fb_align_height(struct drm_i915_private *dev_priv, unsigned int height, uint32_t pixel_format, uint64_t fb_format_modifier); @@ -1341,9 +1341,8 @@ struct i915_vma * intel_pin_and_fence_fb_obj(struct drm_framebuffer *fb, unsigned int rotation); void intel_unpin_fb_vma(struct i915_vma *vma); struct drm_framebuffer * -__intel_framebuffer_create(struct drm_device *dev, - struct drm_mode_fb_cmd2 *mode_cmd, - struct drm_i915_gem_object *obj); +intel_framebuffer_create(struct drm_i915_gem_object *obj, + struct drm_mode_fb_cmd2 *mode_cmd); void intel_finish_page_flip_cs(struct drm_i915_private *dev_priv, int pipe); void intel_finish_page_flip_mmio(struct drm_i915_private *dev_priv, int pipe); void intel_check_page_flip(struct drm_i915_private *dev_priv, int pipe); diff --git a/drivers/gpu/drm/i915/intel_fbdev.c b/drivers/gpu/drm/i915/intel_fbdev.c index 5f8e1d65a4b7..365f08ddf365 100644 --- a/drivers/gpu/drm/i915/intel_fbdev.c +++ b/drivers/gpu/drm/i915/intel_fbdev.c @@ -121,7 +121,7 @@ static int intelfb_alloc(struct drm_fb_helper *helper, struct drm_i915_private *dev_priv = to_i915(dev); struct i915_ggtt *ggtt = &dev_priv->ggtt; struct drm_mode_fb_cmd2 mode_cmd = {}; - struct drm_i915_gem_object *obj = NULL; + struct drm_i915_gem_object *obj; int size, ret; /* we don't do packed 24bpp */ @@ -136,14 +136,13 @@ static int intelfb_alloc(struct drm_fb_helper *helper, mode_cmd.pixel_format = drm_mode_legacy_fb_format(sizes->surface_bpp, sizes->surface_depth); - mutex_lock(&dev->struct_mutex); - size = mode_cmd.pitches[0] * mode_cmd.height; size = PAGE_ALIGN(size); /* If the FB is too big, just don't use it since fbdev is not very * important and we should probably use that space with FBC or other * features. */ + obj = NULL; if (size * 2 < ggtt->stolen_usable_size) obj = i915_gem_object_create_stolen(dev_priv, size); if (obj == NULL) @@ -151,24 +150,22 @@ static int intelfb_alloc(struct drm_fb_helper *helper, if (IS_ERR(obj)) { DRM_ERROR("failed to allocate framebuffer\n"); ret = PTR_ERR(obj); - goto out; + goto err; } - fb = __intel_framebuffer_create(dev, &mode_cmd, obj); + fb = intel_framebuffer_create(obj, &mode_cmd); if (IS_ERR(fb)) { - i915_gem_object_put(obj); ret = PTR_ERR(fb); - goto out; + goto err_obj; } - mutex_unlock(&dev->struct_mutex); - ifbdev->fb = to_intel_framebuffer(fb); return 0; -out: - mutex_unlock(&dev->struct_mutex); +err_obj: + i915_gem_object_put(obj); +err: return ret; } @@ -631,7 +628,7 @@ static bool intel_fbdev_init_bios(struct drm_device *dev, } cur_size = intel_crtc->config->base.adjusted_mode.crtc_vdisplay; - cur_size = intel_fb_align_height(dev, cur_size, + cur_size = intel_fb_align_height(to_i915(dev), cur_size, fb->base.format->format, fb->base.modifier); cur_size *= fb->base.pitches[0]; -- cgit From fabef825626d7bd05a321e4427fdf31a169b5173 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Wed, 15 Feb 2017 10:59:19 +0000 Subject: drm/i915: Drop struct_mutex around frontbuffer flushes Since the frontbuffer has self-contained locking, it does not require us to hold the BKL struct_mutex as we send invalidate and flush messages. Signed-off-by: Chris Wilson Link: http://patchwork.freedesktop.org/patch/msgid/20170215105919.7347-3-chris@chris-wilson.co.uk Reviewed-by: Joonas Lahtinen --- drivers/gpu/drm/i915/intel_fbdev.c | 43 +++++++++++++++----------------------- 1 file changed, 17 insertions(+), 26 deletions(-) diff --git a/drivers/gpu/drm/i915/intel_fbdev.c b/drivers/gpu/drm/i915/intel_fbdev.c index 365f08ddf365..c404ab524ccc 100644 --- a/drivers/gpu/drm/i915/intel_fbdev.c +++ b/drivers/gpu/drm/i915/intel_fbdev.c @@ -45,6 +45,14 @@ #include #include "i915_drv.h" +static void intel_fbdev_invalidate(struct intel_fbdev *ifbdev) +{ + struct drm_i915_gem_object *obj = ifbdev->fb->obj; + unsigned int origin = ifbdev->vma->fence ? ORIGIN_GTT : ORIGIN_CPU; + + intel_fb_obj_invalidate(obj, origin); +} + static int intel_fbdev_set_par(struct fb_info *info) { struct drm_fb_helper *fb_helper = info->par; @@ -53,12 +61,8 @@ static int intel_fbdev_set_par(struct fb_info *info) int ret; ret = drm_fb_helper_set_par(info); - - if (ret == 0) { - mutex_lock(&fb_helper->dev->struct_mutex); - intel_fb_obj_invalidate(ifbdev->fb->obj, ORIGIN_GTT); - mutex_unlock(&fb_helper->dev->struct_mutex); - } + if (ret == 0) + intel_fbdev_invalidate(ifbdev); return ret; } @@ -71,12 +75,8 @@ static int intel_fbdev_blank(int blank, struct fb_info *info) int ret; ret = drm_fb_helper_blank(blank, info); - - if (ret == 0) { - mutex_lock(&fb_helper->dev->struct_mutex); - intel_fb_obj_invalidate(ifbdev->fb->obj, ORIGIN_GTT); - mutex_unlock(&fb_helper->dev->struct_mutex); - } + if (ret == 0) + intel_fbdev_invalidate(ifbdev); return ret; } @@ -87,15 +87,11 @@ static int intel_fbdev_pan_display(struct fb_var_screeninfo *var, struct drm_fb_helper *fb_helper = info->par; struct intel_fbdev *ifbdev = container_of(fb_helper, struct intel_fbdev, helper); - int ret; - ret = drm_fb_helper_pan_display(var, info); - if (ret == 0) { - mutex_lock(&fb_helper->dev->struct_mutex); - intel_fb_obj_invalidate(ifbdev->fb->obj, ORIGIN_GTT); - mutex_unlock(&fb_helper->dev->struct_mutex); - } + ret = drm_fb_helper_pan_display(var, info); + if (ret == 0) + intel_fbdev_invalidate(ifbdev); return ret; } @@ -838,11 +834,6 @@ void intel_fbdev_restore_mode(struct drm_device *dev) if (!ifbdev->fb) return; - if (drm_fb_helper_restore_fbdev_mode_unlocked(&ifbdev->helper)) { - DRM_DEBUG("failed to restore crtc mode\n"); - } else { - mutex_lock(&dev->struct_mutex); - intel_fb_obj_invalidate(ifbdev->fb->obj, ORIGIN_GTT); - mutex_unlock(&dev->struct_mutex); - } + if (drm_fb_helper_restore_fbdev_mode_unlocked(&ifbdev->helper) == 0) + intel_fbdev_invalidate(ifbdev); } -- cgit From 2f35afe94af0f7dc451253d848af442e5d7a7715 Mon Sep 17 00:00:00 2001 From: Tvrtko Ursulin Date: Thu, 16 Feb 2017 12:23:21 +0000 Subject: drm/i915: Make int __intel_ring_space static It is only used within intel_ringbuffer.c Signed-off-by: Tvrtko Ursulin Reviewed-by: Chris Wilson --- drivers/gpu/drm/i915/intel_ringbuffer.c | 2 +- drivers/gpu/drm/i915/intel_ringbuffer.h | 1 - 2 files changed, 1 insertion(+), 2 deletions(-) diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c b/drivers/gpu/drm/i915/intel_ringbuffer.c index f7499829ecc2..8c17db72489f 100644 --- a/drivers/gpu/drm/i915/intel_ringbuffer.c +++ b/drivers/gpu/drm/i915/intel_ringbuffer.c @@ -39,7 +39,7 @@ */ #define LEGACY_REQUEST_SIZE 200 -int __intel_ring_space(int head, int tail, int size) +static int __intel_ring_space(int head, int tail, int size) { int space = head - tail; if (space <= 0) diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.h b/drivers/gpu/drm/i915/intel_ringbuffer.h index 16714096810d..1d9fd4e42622 100644 --- a/drivers/gpu/drm/i915/intel_ringbuffer.h +++ b/drivers/gpu/drm/i915/intel_ringbuffer.h @@ -523,7 +523,6 @@ intel_ring_offset(struct drm_i915_gem_request *req, void *addr) return offset & (req->ring->size - 1); } -int __intel_ring_space(int head, int tail, int size); void intel_ring_update_space(struct intel_ring *ring); void intel_engine_init_global_seqno(struct intel_engine_cs *engine, u32 seqno); -- cgit From 8ee7c6e23bb1b3c37ef27e81395db056bd7eac53 Mon Sep 17 00:00:00 2001 From: Tvrtko Ursulin Date: Thu, 16 Feb 2017 12:23:22 +0000 Subject: drm/i915: Simplify cleanup path in intel_engines_init We can call the engine cleanup vfunc instead of duplicating the decision making here. Signed-off-by: Tvrtko Ursulin Reviewed-by: Chris Wilson --- drivers/gpu/drm/i915/intel_engine_cs.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/drivers/gpu/drm/i915/intel_engine_cs.c b/drivers/gpu/drm/i915/intel_engine_cs.c index 538d845d7251..012ac9483491 100644 --- a/drivers/gpu/drm/i915/intel_engine_cs.c +++ b/drivers/gpu/drm/i915/intel_engine_cs.c @@ -210,10 +210,8 @@ cleanup: for_each_engine(engine, dev_priv, id) { if (id >= err_id) kfree(engine); - else if (i915.enable_execlists) - intel_logical_ring_cleanup(engine); else - intel_engine_cleanup(engine); + dev_priv->gt.cleanup_engine(engine); } return err; } -- cgit From 133b4bd74d89220c612d4adfabb2f41f6f432184 Mon Sep 17 00:00:00 2001 From: Tvrtko Ursulin Date: Thu, 16 Feb 2017 12:23:23 +0000 Subject: drm/i915: Move common workaround code to intel_engine_cs It is used by all submission backends. Signed-off-by: Tvrtko Ursulin Reviewed-by: Chris Wilson --- drivers/gpu/drm/i915/intel_engine_cs.c | 550 ++++++++++++++++++++++++++++++++ drivers/gpu/drm/i915/intel_ringbuffer.c | 550 -------------------------------- 2 files changed, 550 insertions(+), 550 deletions(-) diff --git a/drivers/gpu/drm/i915/intel_engine_cs.c b/drivers/gpu/drm/i915/intel_engine_cs.c index 012ac9483491..46c94740be53 100644 --- a/drivers/gpu/drm/i915/intel_engine_cs.c +++ b/drivers/gpu/drm/i915/intel_engine_cs.c @@ -523,6 +523,556 @@ void intel_engine_get_instdone(struct intel_engine_cs *engine, } } +static int wa_add(struct drm_i915_private *dev_priv, + i915_reg_t addr, + const u32 mask, const u32 val) +{ + const u32 idx = dev_priv->workarounds.count; + + if (WARN_ON(idx >= I915_MAX_WA_REGS)) + return -ENOSPC; + + dev_priv->workarounds.reg[idx].addr = addr; + dev_priv->workarounds.reg[idx].value = val; + dev_priv->workarounds.reg[idx].mask = mask; + + dev_priv->workarounds.count++; + + return 0; +} + +#define WA_REG(addr, mask, val) do { \ + const int r = wa_add(dev_priv, (addr), (mask), (val)); \ + if (r) \ + return r; \ + } while (0) + +#define WA_SET_BIT_MASKED(addr, mask) \ + WA_REG(addr, (mask), _MASKED_BIT_ENABLE(mask)) + +#define WA_CLR_BIT_MASKED(addr, mask) \ + WA_REG(addr, (mask), _MASKED_BIT_DISABLE(mask)) + +#define WA_SET_FIELD_MASKED(addr, mask, value) \ + WA_REG(addr, mask, _MASKED_FIELD(mask, value)) + +#define WA_SET_BIT(addr, mask) WA_REG(addr, mask, I915_READ(addr) | (mask)) +#define WA_CLR_BIT(addr, mask) WA_REG(addr, mask, I915_READ(addr) & ~(mask)) + +#define WA_WRITE(addr, val) WA_REG(addr, 0xffffffff, val) + +static int wa_ring_whitelist_reg(struct intel_engine_cs *engine, + i915_reg_t reg) +{ + struct drm_i915_private *dev_priv = engine->i915; + struct i915_workarounds *wa = &dev_priv->workarounds; + const uint32_t index = wa->hw_whitelist_count[engine->id]; + + if (WARN_ON(index >= RING_MAX_NONPRIV_SLOTS)) + return -EINVAL; + + WA_WRITE(RING_FORCE_TO_NONPRIV(engine->mmio_base, index), + i915_mmio_reg_offset(reg)); + wa->hw_whitelist_count[engine->id]++; + + return 0; +} + +static int gen8_init_workarounds(struct intel_engine_cs *engine) +{ + struct drm_i915_private *dev_priv = engine->i915; + + WA_SET_BIT_MASKED(INSTPM, INSTPM_FORCE_ORDERING); + + /* WaDisableAsyncFlipPerfMode:bdw,chv */ + WA_SET_BIT_MASKED(MI_MODE, ASYNC_FLIP_PERF_DISABLE); + + /* WaDisablePartialInstShootdown:bdw,chv */ + WA_SET_BIT_MASKED(GEN8_ROW_CHICKEN, + PARTIAL_INSTRUCTION_SHOOTDOWN_DISABLE); + + /* Use Force Non-Coherent whenever executing a 3D context. This is a + * workaround for for a possible hang in the unlikely event a TLB + * invalidation occurs during a PSD flush. + */ + /* WaForceEnableNonCoherent:bdw,chv */ + /* WaHdcDisableFetchWhenMasked:bdw,chv */ + WA_SET_BIT_MASKED(HDC_CHICKEN0, + HDC_DONOT_FETCH_MEM_WHEN_MASKED | + HDC_FORCE_NON_COHERENT); + + /* From the Haswell PRM, Command Reference: Registers, CACHE_MODE_0: + * "The Hierarchical Z RAW Stall Optimization allows non-overlapping + * polygons in the same 8x4 pixel/sample area to be processed without + * stalling waiting for the earlier ones to write to Hierarchical Z + * buffer." + * + * This optimization is off by default for BDW and CHV; turn it on. + */ + WA_CLR_BIT_MASKED(CACHE_MODE_0_GEN7, HIZ_RAW_STALL_OPT_DISABLE); + + /* Wa4x4STCOptimizationDisable:bdw,chv */ + WA_SET_BIT_MASKED(CACHE_MODE_1, GEN8_4x4_STC_OPTIMIZATION_DISABLE); + + /* + * BSpec recommends 8x4 when MSAA is used, + * however in practice 16x4 seems fastest. + * + * Note that PS/WM thread counts depend on the WIZ hashing + * disable bit, which we don't touch here, but it's good + * to keep in mind (see 3DSTATE_PS and 3DSTATE_WM). + */ + WA_SET_FIELD_MASKED(GEN7_GT_MODE, + GEN6_WIZ_HASHING_MASK, + GEN6_WIZ_HASHING_16x4); + + return 0; +} + +static int bdw_init_workarounds(struct intel_engine_cs *engine) +{ + struct drm_i915_private *dev_priv = engine->i915; + int ret; + + ret = gen8_init_workarounds(engine); + if (ret) + return ret; + + /* WaDisableThreadStallDopClockGating:bdw (pre-production) */ + WA_SET_BIT_MASKED(GEN8_ROW_CHICKEN, STALL_DOP_GATING_DISABLE); + + /* WaDisableDopClockGating:bdw + * + * Also see the related UCGTCL1 write in broadwell_init_clock_gating() + * to disable EUTC clock gating. + */ + WA_SET_BIT_MASKED(GEN7_ROW_CHICKEN2, + DOP_CLOCK_GATING_DISABLE); + + WA_SET_BIT_MASKED(HALF_SLICE_CHICKEN3, + GEN8_SAMPLER_POWER_BYPASS_DIS); + + WA_SET_BIT_MASKED(HDC_CHICKEN0, + /* WaForceContextSaveRestoreNonCoherent:bdw */ + HDC_FORCE_CONTEXT_SAVE_RESTORE_NON_COHERENT | + /* WaDisableFenceDestinationToSLM:bdw (pre-prod) */ + (IS_BDW_GT3(dev_priv) ? HDC_FENCE_DEST_SLM_DISABLE : 0)); + + return 0; +} + +static int chv_init_workarounds(struct intel_engine_cs *engine) +{ + struct drm_i915_private *dev_priv = engine->i915; + int ret; + + ret = gen8_init_workarounds(engine); + if (ret) + return ret; + + /* WaDisableThreadStallDopClockGating:chv */ + WA_SET_BIT_MASKED(GEN8_ROW_CHICKEN, STALL_DOP_GATING_DISABLE); + + /* Improve HiZ throughput on CHV. */ + WA_SET_BIT_MASKED(HIZ_CHICKEN, CHV_HZ_8X8_MODE_IN_1X); + + return 0; +} + +static int gen9_init_workarounds(struct intel_engine_cs *engine) +{ + struct drm_i915_private *dev_priv = engine->i915; + int ret; + + /* WaConextSwitchWithConcurrentTLBInvalidate:skl,bxt,kbl,glk */ + I915_WRITE(GEN9_CSFE_CHICKEN1_RCS, _MASKED_BIT_ENABLE(GEN9_PREEMPT_GPGPU_SYNC_SWITCH_DISABLE)); + + /* WaEnableLbsSlaRetryTimerDecrement:skl,bxt,kbl,glk */ + I915_WRITE(BDW_SCRATCH1, I915_READ(BDW_SCRATCH1) | + GEN9_LBS_SLA_RETRY_TIMER_DECREMENT_ENABLE); + + /* WaDisableKillLogic:bxt,skl,kbl */ + I915_WRITE(GAM_ECOCHK, I915_READ(GAM_ECOCHK) | + ECOCHK_DIS_TLB); + + /* WaClearFlowControlGpgpuContextSave:skl,bxt,kbl,glk */ + /* WaDisablePartialInstShootdown:skl,bxt,kbl,glk */ + WA_SET_BIT_MASKED(GEN8_ROW_CHICKEN, + FLOW_CONTROL_ENABLE | + PARTIAL_INSTRUCTION_SHOOTDOWN_DISABLE); + + /* Syncing dependencies between camera and graphics:skl,bxt,kbl */ + WA_SET_BIT_MASKED(HALF_SLICE_CHICKEN3, + GEN9_DISABLE_OCL_OOB_SUPPRESS_LOGIC); + + /* WaDisableDgMirrorFixInHalfSliceChicken5:bxt */ + if (IS_BXT_REVID(dev_priv, 0, BXT_REVID_A1)) + WA_CLR_BIT_MASKED(GEN9_HALF_SLICE_CHICKEN5, + GEN9_DG_MIRROR_FIX_ENABLE); + + /* WaSetDisablePixMaskCammingAndRhwoInCommonSliceChicken:bxt */ + if (IS_BXT_REVID(dev_priv, 0, BXT_REVID_A1)) { + WA_SET_BIT_MASKED(GEN7_COMMON_SLICE_CHICKEN1, + GEN9_RHWO_OPTIMIZATION_DISABLE); + /* + * WA also requires GEN9_SLICE_COMMON_ECO_CHICKEN0[14:14] to be set + * but we do that in per ctx batchbuffer as there is an issue + * with this register not getting restored on ctx restore + */ + } + + /* WaEnableSamplerGPGPUPreemptionSupport:skl,bxt,kbl */ + WA_SET_BIT_MASKED(GEN9_HALF_SLICE_CHICKEN7, + GEN9_ENABLE_GPGPU_PREEMPTION); + + /* Wa4x4STCOptimizationDisable:skl,bxt,kbl,glk */ + /* WaDisablePartialResolveInVc:skl,bxt,kbl */ + WA_SET_BIT_MASKED(CACHE_MODE_1, (GEN8_4x4_STC_OPTIMIZATION_DISABLE | + GEN9_PARTIAL_RESOLVE_IN_VC_DISABLE)); + + /* WaCcsTlbPrefetchDisable:skl,bxt,kbl,glk */ + WA_CLR_BIT_MASKED(GEN9_HALF_SLICE_CHICKEN5, + GEN9_CCS_TLB_PREFETCH_ENABLE); + + /* WaDisableMaskBasedCammingInRCC:bxt */ + if (IS_BXT_REVID(dev_priv, 0, BXT_REVID_A1)) + WA_SET_BIT_MASKED(SLICE_ECO_CHICKEN0, + PIXEL_MASK_CAMMING_DISABLE); + + /* WaForceContextSaveRestoreNonCoherent:skl,bxt,kbl */ + WA_SET_BIT_MASKED(HDC_CHICKEN0, + HDC_FORCE_CONTEXT_SAVE_RESTORE_NON_COHERENT | + HDC_FORCE_CSR_NON_COHERENT_OVR_DISABLE); + + /* WaForceEnableNonCoherent and WaDisableHDCInvalidation are + * both tied to WaForceContextSaveRestoreNonCoherent + * in some hsds for skl. We keep the tie for all gen9. The + * documentation is a bit hazy and so we want to get common behaviour, + * even though there is no clear evidence we would need both on kbl/bxt. + * This area has been source of system hangs so we play it safe + * and mimic the skl regardless of what bspec says. + * + * Use Force Non-Coherent whenever executing a 3D context. This + * is a workaround for a possible hang in the unlikely event + * a TLB invalidation occurs during a PSD flush. + */ + + /* WaForceEnableNonCoherent:skl,bxt,kbl */ + WA_SET_BIT_MASKED(HDC_CHICKEN0, + HDC_FORCE_NON_COHERENT); + + /* WaDisableHDCInvalidation:skl,bxt,kbl */ + I915_WRITE(GAM_ECOCHK, I915_READ(GAM_ECOCHK) | + BDW_DISABLE_HDC_INVALIDATION); + + /* WaDisableSamplerPowerBypassForSOPingPong:skl,bxt,kbl */ + if (IS_SKYLAKE(dev_priv) || + IS_KABYLAKE(dev_priv) || + IS_BXT_REVID(dev_priv, 0, BXT_REVID_B0)) + WA_SET_BIT_MASKED(HALF_SLICE_CHICKEN3, + GEN8_SAMPLER_POWER_BYPASS_DIS); + + /* WaDisableSTUnitPowerOptimization:skl,bxt,kbl,glk */ + WA_SET_BIT_MASKED(HALF_SLICE_CHICKEN2, GEN8_ST_PO_DISABLE); + + /* WaOCLCoherentLineFlush:skl,bxt,kbl */ + I915_WRITE(GEN8_L3SQCREG4, (I915_READ(GEN8_L3SQCREG4) | + GEN8_LQSC_FLUSH_COHERENT_LINES)); + + /* WaVFEStateAfterPipeControlwithMediaStateClear:skl,bxt,glk */ + ret = wa_ring_whitelist_reg(engine, GEN9_CTX_PREEMPT_REG); + if (ret) + return ret; + + /* WaEnablePreemptionGranularityControlByUMD:skl,bxt,kbl */ + ret= wa_ring_whitelist_reg(engine, GEN8_CS_CHICKEN1); + if (ret) + return ret; + + /* WaAllowUMDToModifyHDCChicken1:skl,bxt,kbl,glk */ + ret = wa_ring_whitelist_reg(engine, GEN8_HDC_CHICKEN1); + if (ret) + return ret; + + return 0; +} + +static int skl_tune_iz_hashing(struct intel_engine_cs *engine) +{ + struct drm_i915_private *dev_priv = engine->i915; + u8 vals[3] = { 0, 0, 0 }; + unsigned int i; + + for (i = 0; i < 3; i++) { + u8 ss; + + /* + * Only consider slices where one, and only one, subslice has 7 + * EUs + */ + if (!is_power_of_2(INTEL_INFO(dev_priv)->sseu.subslice_7eu[i])) + continue; + + /* + * subslice_7eu[i] != 0 (because of the check above) and + * ss_max == 4 (maximum number of subslices possible per slice) + * + * -> 0 <= ss <= 3; + */ + ss = ffs(INTEL_INFO(dev_priv)->sseu.subslice_7eu[i]) - 1; + vals[i] = 3 - ss; + } + + if (vals[0] == 0 && vals[1] == 0 && vals[2] == 0) + return 0; + + /* Tune IZ hashing. See intel_device_info_runtime_init() */ + WA_SET_FIELD_MASKED(GEN7_GT_MODE, + GEN9_IZ_HASHING_MASK(2) | + GEN9_IZ_HASHING_MASK(1) | + GEN9_IZ_HASHING_MASK(0), + GEN9_IZ_HASHING(2, vals[2]) | + GEN9_IZ_HASHING(1, vals[1]) | + GEN9_IZ_HASHING(0, vals[0])); + + return 0; +} + +static int skl_init_workarounds(struct intel_engine_cs *engine) +{ + struct drm_i915_private *dev_priv = engine->i915; + int ret; + + ret = gen9_init_workarounds(engine); + if (ret) + return ret; + + /* + * Actual WA is to disable percontext preemption granularity control + * until D0 which is the default case so this is equivalent to + * !WaDisablePerCtxtPreemptionGranularityControl:skl + */ + I915_WRITE(GEN7_FF_SLICE_CS_CHICKEN1, + _MASKED_BIT_ENABLE(GEN9_FFSC_PERCTX_PREEMPT_CTRL)); + + /* WaEnableGapsTsvCreditFix:skl */ + I915_WRITE(GEN8_GARBCNTL, (I915_READ(GEN8_GARBCNTL) | + GEN9_GAPS_TSV_CREDIT_DISABLE)); + + /* WaDisableGafsUnitClkGating:skl */ + WA_SET_BIT(GEN7_UCGCTL4, GEN8_EU_GAUNIT_CLOCK_GATE_DISABLE); + + /* WaInPlaceDecompressionHang:skl */ + if (IS_SKL_REVID(dev_priv, SKL_REVID_H0, REVID_FOREVER)) + WA_SET_BIT(GEN9_GAMT_ECO_REG_RW_IA, + GAMT_ECO_ENABLE_IN_PLACE_DECOMPRESS); + + /* WaDisableLSQCROPERFforOCL:skl */ + ret = wa_ring_whitelist_reg(engine, GEN8_L3SQCREG4); + if (ret) + return ret; + + return skl_tune_iz_hashing(engine); +} + +static int bxt_init_workarounds(struct intel_engine_cs *engine) +{ + struct drm_i915_private *dev_priv = engine->i915; + int ret; + + ret = gen9_init_workarounds(engine); + if (ret) + return ret; + + /* WaStoreMultiplePTEenable:bxt */ + /* This is a requirement according to Hardware specification */ + if (IS_BXT_REVID(dev_priv, 0, BXT_REVID_A1)) + I915_WRITE(TILECTL, I915_READ(TILECTL) | TILECTL_TLBPF); + + /* WaSetClckGatingDisableMedia:bxt */ + if (IS_BXT_REVID(dev_priv, 0, BXT_REVID_A1)) { + I915_WRITE(GEN7_MISCCPCTL, (I915_READ(GEN7_MISCCPCTL) & + ~GEN8_DOP_CLOCK_GATE_MEDIA_ENABLE)); + } + + /* WaDisableThreadStallDopClockGating:bxt */ + WA_SET_BIT_MASKED(GEN8_ROW_CHICKEN, + STALL_DOP_GATING_DISABLE); + + /* WaDisablePooledEuLoadBalancingFix:bxt */ + if (IS_BXT_REVID(dev_priv, BXT_REVID_B0, REVID_FOREVER)) { + WA_SET_BIT_MASKED(FF_SLICE_CS_CHICKEN2, + GEN9_POOLED_EU_LOAD_BALANCING_FIX_DISABLE); + } + + /* WaDisableSbeCacheDispatchPortSharing:bxt */ + if (IS_BXT_REVID(dev_priv, 0, BXT_REVID_B0)) { + WA_SET_BIT_MASKED( + GEN7_HALF_SLICE_CHICKEN1, + GEN7_SBE_SS_CACHE_DISPATCH_PORT_SHARING_DISABLE); + } + + /* WaDisableObjectLevelPreemptionForTrifanOrPolygon:bxt */ + /* WaDisableObjectLevelPreemptionForInstancedDraw:bxt */ + /* WaDisableObjectLevelPreemtionForInstanceId:bxt */ + /* WaDisableLSQCROPERFforOCL:bxt */ + if (IS_BXT_REVID(dev_priv, 0, BXT_REVID_A1)) { + ret = wa_ring_whitelist_reg(engine, GEN9_CS_DEBUG_MODE1); + if (ret) + return ret; + + ret = wa_ring_whitelist_reg(engine, GEN8_L3SQCREG4); + if (ret) + return ret; + } + + /* WaProgramL3SqcReg1DefaultForPerf:bxt */ + if (IS_BXT_REVID(dev_priv, BXT_REVID_B0, REVID_FOREVER)) + I915_WRITE(GEN8_L3SQCREG1, L3_GENERAL_PRIO_CREDITS(62) | + L3_HIGH_PRIO_CREDITS(2)); + + /* WaToEnableHwFixForPushConstHWBug:bxt */ + if (IS_BXT_REVID(dev_priv, BXT_REVID_C0, REVID_FOREVER)) + WA_SET_BIT_MASKED(COMMON_SLICE_CHICKEN2, + GEN8_SBE_DISABLE_REPLAY_BUF_OPTIMIZATION); + + /* WaInPlaceDecompressionHang:bxt */ + if (IS_BXT_REVID(dev_priv, BXT_REVID_C0, REVID_FOREVER)) + WA_SET_BIT(GEN9_GAMT_ECO_REG_RW_IA, + GAMT_ECO_ENABLE_IN_PLACE_DECOMPRESS); + + return 0; +} + +static int kbl_init_workarounds(struct intel_engine_cs *engine) +{ + struct drm_i915_private *dev_priv = engine->i915; + int ret; + + ret = gen9_init_workarounds(engine); + if (ret) + return ret; + + /* WaEnableGapsTsvCreditFix:kbl */ + I915_WRITE(GEN8_GARBCNTL, (I915_READ(GEN8_GARBCNTL) | + GEN9_GAPS_TSV_CREDIT_DISABLE)); + + /* WaDisableDynamicCreditSharing:kbl */ + if (IS_KBL_REVID(dev_priv, 0, KBL_REVID_B0)) + WA_SET_BIT(GAMT_CHKN_BIT_REG, + GAMT_CHKN_DISABLE_DYNAMIC_CREDIT_SHARING); + + /* WaDisableFenceDestinationToSLM:kbl (pre-prod) */ + if (IS_KBL_REVID(dev_priv, KBL_REVID_A0, KBL_REVID_A0)) + WA_SET_BIT_MASKED(HDC_CHICKEN0, + HDC_FENCE_DEST_SLM_DISABLE); + + /* WaToEnableHwFixForPushConstHWBug:kbl */ + if (IS_KBL_REVID(dev_priv, KBL_REVID_C0, REVID_FOREVER)) + WA_SET_BIT_MASKED(COMMON_SLICE_CHICKEN2, + GEN8_SBE_DISABLE_REPLAY_BUF_OPTIMIZATION); + + /* WaDisableGafsUnitClkGating:kbl */ + WA_SET_BIT(GEN7_UCGCTL4, GEN8_EU_GAUNIT_CLOCK_GATE_DISABLE); + + /* WaDisableSbeCacheDispatchPortSharing:kbl */ + WA_SET_BIT_MASKED( + GEN7_HALF_SLICE_CHICKEN1, + GEN7_SBE_SS_CACHE_DISPATCH_PORT_SHARING_DISABLE); + + /* WaInPlaceDecompressionHang:kbl */ + WA_SET_BIT(GEN9_GAMT_ECO_REG_RW_IA, + GAMT_ECO_ENABLE_IN_PLACE_DECOMPRESS); + + /* WaDisableLSQCROPERFforOCL:kbl */ + ret = wa_ring_whitelist_reg(engine, GEN8_L3SQCREG4); + if (ret) + return ret; + + return 0; +} + +static int glk_init_workarounds(struct intel_engine_cs *engine) +{ + struct drm_i915_private *dev_priv = engine->i915; + int ret; + + ret = gen9_init_workarounds(engine); + if (ret) + return ret; + + /* WaToEnableHwFixForPushConstHWBug:glk */ + WA_SET_BIT_MASKED(COMMON_SLICE_CHICKEN2, + GEN8_SBE_DISABLE_REPLAY_BUF_OPTIMIZATION); + + return 0; +} + +int init_workarounds_ring(struct intel_engine_cs *engine) +{ + struct drm_i915_private *dev_priv = engine->i915; + + WARN_ON(engine->id != RCS); + + dev_priv->workarounds.count = 0; + dev_priv->workarounds.hw_whitelist_count[RCS] = 0; + + if (IS_BROADWELL(dev_priv)) + return bdw_init_workarounds(engine); + + if (IS_CHERRYVIEW(dev_priv)) + return chv_init_workarounds(engine); + + if (IS_SKYLAKE(dev_priv)) + return skl_init_workarounds(engine); + + if (IS_BROXTON(dev_priv)) + return bxt_init_workarounds(engine); + + if (IS_KABYLAKE(dev_priv)) + return kbl_init_workarounds(engine); + + if (IS_GEMINILAKE(dev_priv)) + return glk_init_workarounds(engine); + + return 0; +} + +int intel_ring_workarounds_emit(struct drm_i915_gem_request *req) +{ + struct i915_workarounds *w = &req->i915->workarounds; + u32 *cs; + int ret, i; + + if (w->count == 0) + return 0; + + ret = req->engine->emit_flush(req, EMIT_BARRIER); + if (ret) + return ret; + + cs = intel_ring_begin(req, (w->count * 2 + 2)); + if (IS_ERR(cs)) + return PTR_ERR(cs); + + *cs++ = MI_LOAD_REGISTER_IMM(w->count); + for (i = 0; i < w->count; i++) { + *cs++ = i915_mmio_reg_offset(w->reg[i].addr); + *cs++ = w->reg[i].value; + } + *cs++ = MI_NOOP; + + intel_ring_advance(req, cs); + + ret = req->engine->emit_flush(req, EMIT_BARRIER); + if (ret) + return ret; + + DRM_DEBUG_DRIVER("Number of Workarounds emitted: %d\n", w->count); + + return 0; +} + #if IS_ENABLED(CONFIG_DRM_I915_SELFTEST) #include "selftests/mock_engine.c" #endif diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c b/drivers/gpu/drm/i915/intel_ringbuffer.c index 8c17db72489f..629fe6584e61 100644 --- a/drivers/gpu/drm/i915/intel_ringbuffer.c +++ b/drivers/gpu/drm/i915/intel_ringbuffer.c @@ -644,41 +644,6 @@ static void reset_ring_common(struct intel_engine_cs *engine, } } -int intel_ring_workarounds_emit(struct drm_i915_gem_request *req) -{ - struct i915_workarounds *w = &req->i915->workarounds; - u32 *cs; - int ret, i; - - if (w->count == 0) - return 0; - - ret = req->engine->emit_flush(req, EMIT_BARRIER); - if (ret) - return ret; - - cs = intel_ring_begin(req, (w->count * 2 + 2)); - if (IS_ERR(cs)) - return PTR_ERR(cs); - - *cs++ = MI_LOAD_REGISTER_IMM(w->count); - for (i = 0; i < w->count; i++) { - *cs++ = i915_mmio_reg_offset(w->reg[i].addr); - *cs++ = w->reg[i].value; - } - *cs++ = MI_NOOP; - - intel_ring_advance(req, cs); - - ret = req->engine->emit_flush(req, EMIT_BARRIER); - if (ret) - return ret; - - DRM_DEBUG_DRIVER("Number of Workarounds emitted: %d\n", w->count); - - return 0; -} - static int intel_rcs_ctx_init(struct drm_i915_gem_request *req) { int ret; @@ -694,521 +659,6 @@ static int intel_rcs_ctx_init(struct drm_i915_gem_request *req) return 0; } -static int wa_add(struct drm_i915_private *dev_priv, - i915_reg_t addr, - const u32 mask, const u32 val) -{ - const u32 idx = dev_priv->workarounds.count; - - if (WARN_ON(idx >= I915_MAX_WA_REGS)) - return -ENOSPC; - - dev_priv->workarounds.reg[idx].addr = addr; - dev_priv->workarounds.reg[idx].value = val; - dev_priv->workarounds.reg[idx].mask = mask; - - dev_priv->workarounds.count++; - - return 0; -} - -#define WA_REG(addr, mask, val) do { \ - const int r = wa_add(dev_priv, (addr), (mask), (val)); \ - if (r) \ - return r; \ - } while (0) - -#define WA_SET_BIT_MASKED(addr, mask) \ - WA_REG(addr, (mask), _MASKED_BIT_ENABLE(mask)) - -#define WA_CLR_BIT_MASKED(addr, mask) \ - WA_REG(addr, (mask), _MASKED_BIT_DISABLE(mask)) - -#define WA_SET_FIELD_MASKED(addr, mask, value) \ - WA_REG(addr, mask, _MASKED_FIELD(mask, value)) - -#define WA_SET_BIT(addr, mask) WA_REG(addr, mask, I915_READ(addr) | (mask)) -#define WA_CLR_BIT(addr, mask) WA_REG(addr, mask, I915_READ(addr) & ~(mask)) - -#define WA_WRITE(addr, val) WA_REG(addr, 0xffffffff, val) - -static int wa_ring_whitelist_reg(struct intel_engine_cs *engine, - i915_reg_t reg) -{ - struct drm_i915_private *dev_priv = engine->i915; - struct i915_workarounds *wa = &dev_priv->workarounds; - const uint32_t index = wa->hw_whitelist_count[engine->id]; - - if (WARN_ON(index >= RING_MAX_NONPRIV_SLOTS)) - return -EINVAL; - - WA_WRITE(RING_FORCE_TO_NONPRIV(engine->mmio_base, index), - i915_mmio_reg_offset(reg)); - wa->hw_whitelist_count[engine->id]++; - - return 0; -} - -static int gen8_init_workarounds(struct intel_engine_cs *engine) -{ - struct drm_i915_private *dev_priv = engine->i915; - - WA_SET_BIT_MASKED(INSTPM, INSTPM_FORCE_ORDERING); - - /* WaDisableAsyncFlipPerfMode:bdw,chv */ - WA_SET_BIT_MASKED(MI_MODE, ASYNC_FLIP_PERF_DISABLE); - - /* WaDisablePartialInstShootdown:bdw,chv */ - WA_SET_BIT_MASKED(GEN8_ROW_CHICKEN, - PARTIAL_INSTRUCTION_SHOOTDOWN_DISABLE); - - /* Use Force Non-Coherent whenever executing a 3D context. This is a - * workaround for for a possible hang in the unlikely event a TLB - * invalidation occurs during a PSD flush. - */ - /* WaForceEnableNonCoherent:bdw,chv */ - /* WaHdcDisableFetchWhenMasked:bdw,chv */ - WA_SET_BIT_MASKED(HDC_CHICKEN0, - HDC_DONOT_FETCH_MEM_WHEN_MASKED | - HDC_FORCE_NON_COHERENT); - - /* From the Haswell PRM, Command Reference: Registers, CACHE_MODE_0: - * "The Hierarchical Z RAW Stall Optimization allows non-overlapping - * polygons in the same 8x4 pixel/sample area to be processed without - * stalling waiting for the earlier ones to write to Hierarchical Z - * buffer." - * - * This optimization is off by default for BDW and CHV; turn it on. - */ - WA_CLR_BIT_MASKED(CACHE_MODE_0_GEN7, HIZ_RAW_STALL_OPT_DISABLE); - - /* Wa4x4STCOptimizationDisable:bdw,chv */ - WA_SET_BIT_MASKED(CACHE_MODE_1, GEN8_4x4_STC_OPTIMIZATION_DISABLE); - - /* - * BSpec recommends 8x4 when MSAA is used, - * however in practice 16x4 seems fastest. - * - * Note that PS/WM thread counts depend on the WIZ hashing - * disable bit, which we don't touch here, but it's good - * to keep in mind (see 3DSTATE_PS and 3DSTATE_WM). - */ - WA_SET_FIELD_MASKED(GEN7_GT_MODE, - GEN6_WIZ_HASHING_MASK, - GEN6_WIZ_HASHING_16x4); - - return 0; -} - -static int bdw_init_workarounds(struct intel_engine_cs *engine) -{ - struct drm_i915_private *dev_priv = engine->i915; - int ret; - - ret = gen8_init_workarounds(engine); - if (ret) - return ret; - - /* WaDisableThreadStallDopClockGating:bdw (pre-production) */ - WA_SET_BIT_MASKED(GEN8_ROW_CHICKEN, STALL_DOP_GATING_DISABLE); - - /* WaDisableDopClockGating:bdw - * - * Also see the related UCGTCL1 write in broadwell_init_clock_gating() - * to disable EUTC clock gating. - */ - WA_SET_BIT_MASKED(GEN7_ROW_CHICKEN2, - DOP_CLOCK_GATING_DISABLE); - - WA_SET_BIT_MASKED(HALF_SLICE_CHICKEN3, - GEN8_SAMPLER_POWER_BYPASS_DIS); - - WA_SET_BIT_MASKED(HDC_CHICKEN0, - /* WaForceContextSaveRestoreNonCoherent:bdw */ - HDC_FORCE_CONTEXT_SAVE_RESTORE_NON_COHERENT | - /* WaDisableFenceDestinationToSLM:bdw (pre-prod) */ - (IS_BDW_GT3(dev_priv) ? HDC_FENCE_DEST_SLM_DISABLE : 0)); - - return 0; -} - -static int chv_init_workarounds(struct intel_engine_cs *engine) -{ - struct drm_i915_private *dev_priv = engine->i915; - int ret; - - ret = gen8_init_workarounds(engine); - if (ret) - return ret; - - /* WaDisableThreadStallDopClockGating:chv */ - WA_SET_BIT_MASKED(GEN8_ROW_CHICKEN, STALL_DOP_GATING_DISABLE); - - /* Improve HiZ throughput on CHV. */ - WA_SET_BIT_MASKED(HIZ_CHICKEN, CHV_HZ_8X8_MODE_IN_1X); - - return 0; -} - -static int gen9_init_workarounds(struct intel_engine_cs *engine) -{ - struct drm_i915_private *dev_priv = engine->i915; - int ret; - - /* WaConextSwitchWithConcurrentTLBInvalidate:skl,bxt,kbl,glk */ - I915_WRITE(GEN9_CSFE_CHICKEN1_RCS, _MASKED_BIT_ENABLE(GEN9_PREEMPT_GPGPU_SYNC_SWITCH_DISABLE)); - - /* WaEnableLbsSlaRetryTimerDecrement:skl,bxt,kbl,glk */ - I915_WRITE(BDW_SCRATCH1, I915_READ(BDW_SCRATCH1) | - GEN9_LBS_SLA_RETRY_TIMER_DECREMENT_ENABLE); - - /* WaDisableKillLogic:bxt,skl,kbl */ - I915_WRITE(GAM_ECOCHK, I915_READ(GAM_ECOCHK) | - ECOCHK_DIS_TLB); - - /* WaClearFlowControlGpgpuContextSave:skl,bxt,kbl,glk */ - /* WaDisablePartialInstShootdown:skl,bxt,kbl,glk */ - WA_SET_BIT_MASKED(GEN8_ROW_CHICKEN, - FLOW_CONTROL_ENABLE | - PARTIAL_INSTRUCTION_SHOOTDOWN_DISABLE); - - /* Syncing dependencies between camera and graphics:skl,bxt,kbl */ - WA_SET_BIT_MASKED(HALF_SLICE_CHICKEN3, - GEN9_DISABLE_OCL_OOB_SUPPRESS_LOGIC); - - /* WaDisableDgMirrorFixInHalfSliceChicken5:bxt */ - if (IS_BXT_REVID(dev_priv, 0, BXT_REVID_A1)) - WA_CLR_BIT_MASKED(GEN9_HALF_SLICE_CHICKEN5, - GEN9_DG_MIRROR_FIX_ENABLE); - - /* WaSetDisablePixMaskCammingAndRhwoInCommonSliceChicken:bxt */ - if (IS_BXT_REVID(dev_priv, 0, BXT_REVID_A1)) { - WA_SET_BIT_MASKED(GEN7_COMMON_SLICE_CHICKEN1, - GEN9_RHWO_OPTIMIZATION_DISABLE); - /* - * WA also requires GEN9_SLICE_COMMON_ECO_CHICKEN0[14:14] to be set - * but we do that in per ctx batchbuffer as there is an issue - * with this register not getting restored on ctx restore - */ - } - - /* WaEnableSamplerGPGPUPreemptionSupport:skl,bxt,kbl */ - WA_SET_BIT_MASKED(GEN9_HALF_SLICE_CHICKEN7, - GEN9_ENABLE_GPGPU_PREEMPTION); - - /* Wa4x4STCOptimizationDisable:skl,bxt,kbl,glk */ - /* WaDisablePartialResolveInVc:skl,bxt,kbl */ - WA_SET_BIT_MASKED(CACHE_MODE_1, (GEN8_4x4_STC_OPTIMIZATION_DISABLE | - GEN9_PARTIAL_RESOLVE_IN_VC_DISABLE)); - - /* WaCcsTlbPrefetchDisable:skl,bxt,kbl,glk */ - WA_CLR_BIT_MASKED(GEN9_HALF_SLICE_CHICKEN5, - GEN9_CCS_TLB_PREFETCH_ENABLE); - - /* WaDisableMaskBasedCammingInRCC:bxt */ - if (IS_BXT_REVID(dev_priv, 0, BXT_REVID_A1)) - WA_SET_BIT_MASKED(SLICE_ECO_CHICKEN0, - PIXEL_MASK_CAMMING_DISABLE); - - /* WaForceContextSaveRestoreNonCoherent:skl,bxt,kbl */ - WA_SET_BIT_MASKED(HDC_CHICKEN0, - HDC_FORCE_CONTEXT_SAVE_RESTORE_NON_COHERENT | - HDC_FORCE_CSR_NON_COHERENT_OVR_DISABLE); - - /* WaForceEnableNonCoherent and WaDisableHDCInvalidation are - * both tied to WaForceContextSaveRestoreNonCoherent - * in some hsds for skl. We keep the tie for all gen9. The - * documentation is a bit hazy and so we want to get common behaviour, - * even though there is no clear evidence we would need both on kbl/bxt. - * This area has been source of system hangs so we play it safe - * and mimic the skl regardless of what bspec says. - * - * Use Force Non-Coherent whenever executing a 3D context. This - * is a workaround for a possible hang in the unlikely event - * a TLB invalidation occurs during a PSD flush. - */ - - /* WaForceEnableNonCoherent:skl,bxt,kbl */ - WA_SET_BIT_MASKED(HDC_CHICKEN0, - HDC_FORCE_NON_COHERENT); - - /* WaDisableHDCInvalidation:skl,bxt,kbl */ - I915_WRITE(GAM_ECOCHK, I915_READ(GAM_ECOCHK) | - BDW_DISABLE_HDC_INVALIDATION); - - /* WaDisableSamplerPowerBypassForSOPingPong:skl,bxt,kbl */ - if (IS_SKYLAKE(dev_priv) || - IS_KABYLAKE(dev_priv) || - IS_BXT_REVID(dev_priv, 0, BXT_REVID_B0)) - WA_SET_BIT_MASKED(HALF_SLICE_CHICKEN3, - GEN8_SAMPLER_POWER_BYPASS_DIS); - - /* WaDisableSTUnitPowerOptimization:skl,bxt,kbl,glk */ - WA_SET_BIT_MASKED(HALF_SLICE_CHICKEN2, GEN8_ST_PO_DISABLE); - - /* WaOCLCoherentLineFlush:skl,bxt,kbl */ - I915_WRITE(GEN8_L3SQCREG4, (I915_READ(GEN8_L3SQCREG4) | - GEN8_LQSC_FLUSH_COHERENT_LINES)); - - /* WaVFEStateAfterPipeControlwithMediaStateClear:skl,bxt,glk */ - ret = wa_ring_whitelist_reg(engine, GEN9_CTX_PREEMPT_REG); - if (ret) - return ret; - - /* WaEnablePreemptionGranularityControlByUMD:skl,bxt,kbl */ - ret= wa_ring_whitelist_reg(engine, GEN8_CS_CHICKEN1); - if (ret) - return ret; - - /* WaAllowUMDToModifyHDCChicken1:skl,bxt,kbl,glk */ - ret = wa_ring_whitelist_reg(engine, GEN8_HDC_CHICKEN1); - if (ret) - return ret; - - return 0; -} - -static int skl_tune_iz_hashing(struct intel_engine_cs *engine) -{ - struct drm_i915_private *dev_priv = engine->i915; - u8 vals[3] = { 0, 0, 0 }; - unsigned int i; - - for (i = 0; i < 3; i++) { - u8 ss; - - /* - * Only consider slices where one, and only one, subslice has 7 - * EUs - */ - if (!is_power_of_2(INTEL_INFO(dev_priv)->sseu.subslice_7eu[i])) - continue; - - /* - * subslice_7eu[i] != 0 (because of the check above) and - * ss_max == 4 (maximum number of subslices possible per slice) - * - * -> 0 <= ss <= 3; - */ - ss = ffs(INTEL_INFO(dev_priv)->sseu.subslice_7eu[i]) - 1; - vals[i] = 3 - ss; - } - - if (vals[0] == 0 && vals[1] == 0 && vals[2] == 0) - return 0; - - /* Tune IZ hashing. See intel_device_info_runtime_init() */ - WA_SET_FIELD_MASKED(GEN7_GT_MODE, - GEN9_IZ_HASHING_MASK(2) | - GEN9_IZ_HASHING_MASK(1) | - GEN9_IZ_HASHING_MASK(0), - GEN9_IZ_HASHING(2, vals[2]) | - GEN9_IZ_HASHING(1, vals[1]) | - GEN9_IZ_HASHING(0, vals[0])); - - return 0; -} - -static int skl_init_workarounds(struct intel_engine_cs *engine) -{ - struct drm_i915_private *dev_priv = engine->i915; - int ret; - - ret = gen9_init_workarounds(engine); - if (ret) - return ret; - - /* - * Actual WA is to disable percontext preemption granularity control - * until D0 which is the default case so this is equivalent to - * !WaDisablePerCtxtPreemptionGranularityControl:skl - */ - I915_WRITE(GEN7_FF_SLICE_CS_CHICKEN1, - _MASKED_BIT_ENABLE(GEN9_FFSC_PERCTX_PREEMPT_CTRL)); - - /* WaEnableGapsTsvCreditFix:skl */ - I915_WRITE(GEN8_GARBCNTL, (I915_READ(GEN8_GARBCNTL) | - GEN9_GAPS_TSV_CREDIT_DISABLE)); - - /* WaDisableGafsUnitClkGating:skl */ - WA_SET_BIT(GEN7_UCGCTL4, GEN8_EU_GAUNIT_CLOCK_GATE_DISABLE); - - /* WaInPlaceDecompressionHang:skl */ - if (IS_SKL_REVID(dev_priv, SKL_REVID_H0, REVID_FOREVER)) - WA_SET_BIT(GEN9_GAMT_ECO_REG_RW_IA, - GAMT_ECO_ENABLE_IN_PLACE_DECOMPRESS); - - /* WaDisableLSQCROPERFforOCL:skl */ - ret = wa_ring_whitelist_reg(engine, GEN8_L3SQCREG4); - if (ret) - return ret; - - return skl_tune_iz_hashing(engine); -} - -static int bxt_init_workarounds(struct intel_engine_cs *engine) -{ - struct drm_i915_private *dev_priv = engine->i915; - int ret; - - ret = gen9_init_workarounds(engine); - if (ret) - return ret; - - /* WaStoreMultiplePTEenable:bxt */ - /* This is a requirement according to Hardware specification */ - if (IS_BXT_REVID(dev_priv, 0, BXT_REVID_A1)) - I915_WRITE(TILECTL, I915_READ(TILECTL) | TILECTL_TLBPF); - - /* WaSetClckGatingDisableMedia:bxt */ - if (IS_BXT_REVID(dev_priv, 0, BXT_REVID_A1)) { - I915_WRITE(GEN7_MISCCPCTL, (I915_READ(GEN7_MISCCPCTL) & - ~GEN8_DOP_CLOCK_GATE_MEDIA_ENABLE)); - } - - /* WaDisableThreadStallDopClockGating:bxt */ - WA_SET_BIT_MASKED(GEN8_ROW_CHICKEN, - STALL_DOP_GATING_DISABLE); - - /* WaDisablePooledEuLoadBalancingFix:bxt */ - if (IS_BXT_REVID(dev_priv, BXT_REVID_B0, REVID_FOREVER)) { - WA_SET_BIT_MASKED(FF_SLICE_CS_CHICKEN2, - GEN9_POOLED_EU_LOAD_BALANCING_FIX_DISABLE); - } - - /* WaDisableSbeCacheDispatchPortSharing:bxt */ - if (IS_BXT_REVID(dev_priv, 0, BXT_REVID_B0)) { - WA_SET_BIT_MASKED( - GEN7_HALF_SLICE_CHICKEN1, - GEN7_SBE_SS_CACHE_DISPATCH_PORT_SHARING_DISABLE); - } - - /* WaDisableObjectLevelPreemptionForTrifanOrPolygon:bxt */ - /* WaDisableObjectLevelPreemptionForInstancedDraw:bxt */ - /* WaDisableObjectLevelPreemtionForInstanceId:bxt */ - /* WaDisableLSQCROPERFforOCL:bxt */ - if (IS_BXT_REVID(dev_priv, 0, BXT_REVID_A1)) { - ret = wa_ring_whitelist_reg(engine, GEN9_CS_DEBUG_MODE1); - if (ret) - return ret; - - ret = wa_ring_whitelist_reg(engine, GEN8_L3SQCREG4); - if (ret) - return ret; - } - - /* WaProgramL3SqcReg1DefaultForPerf:bxt */ - if (IS_BXT_REVID(dev_priv, BXT_REVID_B0, REVID_FOREVER)) - I915_WRITE(GEN8_L3SQCREG1, L3_GENERAL_PRIO_CREDITS(62) | - L3_HIGH_PRIO_CREDITS(2)); - - /* WaToEnableHwFixForPushConstHWBug:bxt */ - if (IS_BXT_REVID(dev_priv, BXT_REVID_C0, REVID_FOREVER)) - WA_SET_BIT_MASKED(COMMON_SLICE_CHICKEN2, - GEN8_SBE_DISABLE_REPLAY_BUF_OPTIMIZATION); - - /* WaInPlaceDecompressionHang:bxt */ - if (IS_BXT_REVID(dev_priv, BXT_REVID_C0, REVID_FOREVER)) - WA_SET_BIT(GEN9_GAMT_ECO_REG_RW_IA, - GAMT_ECO_ENABLE_IN_PLACE_DECOMPRESS); - - return 0; -} - -static int kbl_init_workarounds(struct intel_engine_cs *engine) -{ - struct drm_i915_private *dev_priv = engine->i915; - int ret; - - ret = gen9_init_workarounds(engine); - if (ret) - return ret; - - /* WaEnableGapsTsvCreditFix:kbl */ - I915_WRITE(GEN8_GARBCNTL, (I915_READ(GEN8_GARBCNTL) | - GEN9_GAPS_TSV_CREDIT_DISABLE)); - - /* WaDisableDynamicCreditSharing:kbl */ - if (IS_KBL_REVID(dev_priv, 0, KBL_REVID_B0)) - WA_SET_BIT(GAMT_CHKN_BIT_REG, - GAMT_CHKN_DISABLE_DYNAMIC_CREDIT_SHARING); - - /* WaDisableFenceDestinationToSLM:kbl (pre-prod) */ - if (IS_KBL_REVID(dev_priv, KBL_REVID_A0, KBL_REVID_A0)) - WA_SET_BIT_MASKED(HDC_CHICKEN0, - HDC_FENCE_DEST_SLM_DISABLE); - - /* WaToEnableHwFixForPushConstHWBug:kbl */ - if (IS_KBL_REVID(dev_priv, KBL_REVID_C0, REVID_FOREVER)) - WA_SET_BIT_MASKED(COMMON_SLICE_CHICKEN2, - GEN8_SBE_DISABLE_REPLAY_BUF_OPTIMIZATION); - - /* WaDisableGafsUnitClkGating:kbl */ - WA_SET_BIT(GEN7_UCGCTL4, GEN8_EU_GAUNIT_CLOCK_GATE_DISABLE); - - /* WaDisableSbeCacheDispatchPortSharing:kbl */ - WA_SET_BIT_MASKED( - GEN7_HALF_SLICE_CHICKEN1, - GEN7_SBE_SS_CACHE_DISPATCH_PORT_SHARING_DISABLE); - - /* WaInPlaceDecompressionHang:kbl */ - WA_SET_BIT(GEN9_GAMT_ECO_REG_RW_IA, - GAMT_ECO_ENABLE_IN_PLACE_DECOMPRESS); - - /* WaDisableLSQCROPERFforOCL:kbl */ - ret = wa_ring_whitelist_reg(engine, GEN8_L3SQCREG4); - if (ret) - return ret; - - return 0; -} - -static int glk_init_workarounds(struct intel_engine_cs *engine) -{ - struct drm_i915_private *dev_priv = engine->i915; - int ret; - - ret = gen9_init_workarounds(engine); - if (ret) - return ret; - - /* WaToEnableHwFixForPushConstHWBug:glk */ - WA_SET_BIT_MASKED(COMMON_SLICE_CHICKEN2, - GEN8_SBE_DISABLE_REPLAY_BUF_OPTIMIZATION); - - return 0; -} - -int init_workarounds_ring(struct intel_engine_cs *engine) -{ - struct drm_i915_private *dev_priv = engine->i915; - - WARN_ON(engine->id != RCS); - - dev_priv->workarounds.count = 0; - dev_priv->workarounds.hw_whitelist_count[RCS] = 0; - - if (IS_BROADWELL(dev_priv)) - return bdw_init_workarounds(engine); - - if (IS_CHERRYVIEW(dev_priv)) - return chv_init_workarounds(engine); - - if (IS_SKYLAKE(dev_priv)) - return skl_init_workarounds(engine); - - if (IS_BROXTON(dev_priv)) - return bxt_init_workarounds(engine); - - if (IS_KABYLAKE(dev_priv)) - return kbl_init_workarounds(engine); - - if (IS_GEMINILAKE(dev_priv)) - return glk_init_workarounds(engine); - - return 0; -} - static int init_render_ring(struct intel_engine_cs *engine) { struct drm_i915_private *dev_priv = engine->i915; -- cgit From 097d4f1c121f324d655d17b9bc5238107f7a1761 Mon Sep 17 00:00:00 2001 From: Tvrtko Ursulin Date: Fri, 17 Feb 2017 07:58:59 +0000 Subject: drm/i915: Tidy workaround batch buffer emission Use the "*batch++ = " style as in the ring emission for better readability and also simplify the logic a bit by consolidating the offset and size calculations and overflow checking. The latter is a programming error so it is not required to check for it after each write to the object, but rather do it once the whole state has been written and fail the driver if something went wrong. v2: Rebase. v3: Keep track of offsets and sizes in bytes for simplicity and rename function pointer variable to _fn suffix. (Chris Wilson) v4: Fix size calc broken in v3 and add alignment warning. (Chris Wilson) v5: Fix return code. v6: I added an exit from loop in v5 but forgot to put back the object teardown. Signed-off-by: Tvrtko Ursulin Reviewed-by: Chris Wilson (v5) Cc: Chris Wilson Reviewed-by: Chris Wilson --- drivers/gpu/drm/i915/intel_lrc.c | 318 ++++++++++++++------------------------- 1 file changed, 117 insertions(+), 201 deletions(-) diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c index ee431d39ce06..5154661a38cf 100644 --- a/drivers/gpu/drm/i915/intel_lrc.c +++ b/drivers/gpu/drm/i915/intel_lrc.c @@ -890,18 +890,6 @@ err: return ret; } -#define wa_ctx_emit(batch, index, cmd) \ - do { \ - int __index = (index)++; \ - if (WARN_ON(__index >= (PAGE_SIZE / sizeof(uint32_t)))) { \ - return -ENOSPC; \ - } \ - batch[__index] = (cmd); \ - } while (0) - -#define wa_ctx_emit_reg(batch, index, reg) \ - wa_ctx_emit((batch), (index), i915_mmio_reg_offset(reg)) - /* * In this WA we need to set GEN8_L3SQCREG4[21:21] and reset it after * PIPE_CONTROL instruction. This is required for the flush to happen correctly @@ -918,56 +906,31 @@ err: * This WA is also required for Gen9 so extracting as a function avoids * code duplication. */ -static inline int gen8_emit_flush_coherentl3_wa(struct intel_engine_cs *engine, - uint32_t *batch, - uint32_t index) -{ - uint32_t l3sqc4_flush = (0x40400000 | GEN8_LQSC_FLUSH_COHERENT_LINES); - - wa_ctx_emit(batch, index, (MI_STORE_REGISTER_MEM_GEN8 | - MI_SRM_LRM_GLOBAL_GTT)); - wa_ctx_emit_reg(batch, index, GEN8_L3SQCREG4); - wa_ctx_emit(batch, index, i915_ggtt_offset(engine->scratch) + 256); - wa_ctx_emit(batch, index, 0); - - wa_ctx_emit(batch, index, MI_LOAD_REGISTER_IMM(1)); - wa_ctx_emit_reg(batch, index, GEN8_L3SQCREG4); - wa_ctx_emit(batch, index, l3sqc4_flush); - - wa_ctx_emit(batch, index, GFX_OP_PIPE_CONTROL(6)); - wa_ctx_emit(batch, index, (PIPE_CONTROL_CS_STALL | - PIPE_CONTROL_DC_FLUSH_ENABLE)); - wa_ctx_emit(batch, index, 0); - wa_ctx_emit(batch, index, 0); - wa_ctx_emit(batch, index, 0); - wa_ctx_emit(batch, index, 0); - - wa_ctx_emit(batch, index, (MI_LOAD_REGISTER_MEM_GEN8 | - MI_SRM_LRM_GLOBAL_GTT)); - wa_ctx_emit_reg(batch, index, GEN8_L3SQCREG4); - wa_ctx_emit(batch, index, i915_ggtt_offset(engine->scratch) + 256); - wa_ctx_emit(batch, index, 0); - - return index; -} - -static inline uint32_t wa_ctx_start(struct i915_wa_ctx_bb *wa_ctx, - uint32_t offset, - uint32_t start_alignment) +static u32 * +gen8_emit_flush_coherentl3_wa(struct intel_engine_cs *engine, u32 *batch) { - return wa_ctx->offset = ALIGN(offset, start_alignment); -} - -static inline int wa_ctx_end(struct i915_wa_ctx_bb *wa_ctx, - uint32_t offset, - uint32_t size_alignment) -{ - wa_ctx->size = offset - wa_ctx->offset; - - WARN(wa_ctx->size % size_alignment, - "wa_ctx_bb failed sanity checks: size %d is not aligned to %d\n", - wa_ctx->size, size_alignment); - return 0; + *batch++ = MI_STORE_REGISTER_MEM_GEN8 | MI_SRM_LRM_GLOBAL_GTT; + *batch++ = i915_mmio_reg_offset(GEN8_L3SQCREG4); + *batch++ = i915_ggtt_offset(engine->scratch) + 256; + *batch++ = 0; + + *batch++ = MI_LOAD_REGISTER_IMM(1); + *batch++ = i915_mmio_reg_offset(GEN8_L3SQCREG4); + *batch++ = 0x40400000 | GEN8_LQSC_FLUSH_COHERENT_LINES; + + *batch++ = GFX_OP_PIPE_CONTROL(6); + *batch++ = PIPE_CONTROL_CS_STALL | PIPE_CONTROL_DC_FLUSH_ENABLE; + *batch++ = 0; + *batch++ = 0; + *batch++ = 0; + *batch++ = 0; + + *batch++ = MI_LOAD_REGISTER_MEM_GEN8 | MI_SRM_LRM_GLOBAL_GTT; + *batch++ = i915_mmio_reg_offset(GEN8_L3SQCREG4); + *batch++ = i915_ggtt_offset(engine->scratch) + 256; + *batch++ = 0; + + return batch; } /* @@ -985,42 +948,28 @@ static inline int wa_ctx_end(struct i915_wa_ctx_bb *wa_ctx, * MI_BATCH_BUFFER_END will be added to perctx batch and both of them together * makes a complete batch buffer. */ -static int gen8_init_indirectctx_bb(struct intel_engine_cs *engine, - struct i915_wa_ctx_bb *wa_ctx, - uint32_t *batch, - uint32_t *offset) +static u32 *gen8_init_indirectctx_bb(struct intel_engine_cs *engine, u32 *batch) { - uint32_t scratch_addr; - uint32_t index = wa_ctx_start(wa_ctx, *offset, CACHELINE_DWORDS); - /* WaDisableCtxRestoreArbitration:bdw,chv */ - wa_ctx_emit(batch, index, MI_ARB_ON_OFF | MI_ARB_DISABLE); + *batch++ = MI_ARB_ON_OFF | MI_ARB_DISABLE; /* WaFlushCoherentL3CacheLinesAtContextSwitch:bdw */ - if (IS_BROADWELL(engine->i915)) { - int rc = gen8_emit_flush_coherentl3_wa(engine, batch, index); - if (rc < 0) - return rc; - index = rc; - } + if (IS_BROADWELL(engine->i915)) + batch = gen8_emit_flush_coherentl3_wa(engine, batch); + *batch++ = GFX_OP_PIPE_CONTROL(6); + *batch++ = PIPE_CONTROL_FLUSH_L3 | PIPE_CONTROL_GLOBAL_GTT_IVB | + PIPE_CONTROL_CS_STALL | PIPE_CONTROL_QW_WRITE; /* WaClearSlmSpaceAtContextSwitch:bdw,chv */ /* Actual scratch location is at 128 bytes offset */ - scratch_addr = i915_ggtt_offset(engine->scratch) + 2 * CACHELINE_BYTES; - - wa_ctx_emit(batch, index, GFX_OP_PIPE_CONTROL(6)); - wa_ctx_emit(batch, index, (PIPE_CONTROL_FLUSH_L3 | - PIPE_CONTROL_GLOBAL_GTT_IVB | - PIPE_CONTROL_CS_STALL | - PIPE_CONTROL_QW_WRITE)); - wa_ctx_emit(batch, index, scratch_addr); - wa_ctx_emit(batch, index, 0); - wa_ctx_emit(batch, index, 0); - wa_ctx_emit(batch, index, 0); + *batch++ = i915_ggtt_offset(engine->scratch) + 2 * CACHELINE_BYTES; + *batch++ = 0; + *batch++ = 0; + *batch++ = 0; /* Pad to end of cacheline */ - while (index % CACHELINE_DWORDS) - wa_ctx_emit(batch, index, MI_NOOP); + while ((unsigned long)batch % CACHELINE_BYTES) + *batch++ = MI_NOOP; /* * MI_BATCH_BUFFER_END is not required in Indirect ctx BB because @@ -1028,7 +977,7 @@ static int gen8_init_indirectctx_bb(struct intel_engine_cs *engine, * in the register CTX_RCS_INDIRECT_CTX */ - return wa_ctx_end(wa_ctx, *offset = index, CACHELINE_DWORDS); + return batch; } /* @@ -1040,58 +989,38 @@ static int gen8_init_indirectctx_bb(struct intel_engine_cs *engine, * This batch is terminated with MI_BATCH_BUFFER_END and so we need not add padding * to align it with cacheline as padding after MI_BATCH_BUFFER_END is redundant. */ -static int gen8_init_perctx_bb(struct intel_engine_cs *engine, - struct i915_wa_ctx_bb *wa_ctx, - uint32_t *batch, - uint32_t *offset) +static u32 *gen8_init_perctx_bb(struct intel_engine_cs *engine, u32 *batch) { - uint32_t index = wa_ctx_start(wa_ctx, *offset, CACHELINE_DWORDS); - /* WaDisableCtxRestoreArbitration:bdw,chv */ - wa_ctx_emit(batch, index, MI_ARB_ON_OFF | MI_ARB_ENABLE); - - wa_ctx_emit(batch, index, MI_BATCH_BUFFER_END); + *batch++ = MI_ARB_ON_OFF | MI_ARB_ENABLE; + *batch++ = MI_BATCH_BUFFER_END; - return wa_ctx_end(wa_ctx, *offset = index, 1); + return batch; } -static int gen9_init_indirectctx_bb(struct intel_engine_cs *engine, - struct i915_wa_ctx_bb *wa_ctx, - uint32_t *batch, - uint32_t *offset) +static u32 *gen9_init_indirectctx_bb(struct intel_engine_cs *engine, u32 *batch) { - int ret; - struct drm_i915_private *dev_priv = engine->i915; - uint32_t index = wa_ctx_start(wa_ctx, *offset, CACHELINE_DWORDS); - /* WaFlushCoherentL3CacheLinesAtContextSwitch:skl,bxt,glk */ - ret = gen8_emit_flush_coherentl3_wa(engine, batch, index); - if (ret < 0) - return ret; - index = ret; + batch = gen8_emit_flush_coherentl3_wa(engine, batch); /* WaDisableGatherAtSetShaderCommonSlice:skl,bxt,kbl,glk */ - wa_ctx_emit(batch, index, MI_LOAD_REGISTER_IMM(1)); - wa_ctx_emit_reg(batch, index, COMMON_SLICE_CHICKEN2); - wa_ctx_emit(batch, index, _MASKED_BIT_DISABLE( - GEN9_DISABLE_GATHER_AT_SET_SHADER_COMMON_SLICE)); - wa_ctx_emit(batch, index, MI_NOOP); + *batch++ = MI_LOAD_REGISTER_IMM(1); + *batch++ = i915_mmio_reg_offset(COMMON_SLICE_CHICKEN2); + *batch++ = _MASKED_BIT_DISABLE( + GEN9_DISABLE_GATHER_AT_SET_SHADER_COMMON_SLICE); + *batch++ = MI_NOOP; /* WaClearSlmSpaceAtContextSwitch:kbl */ /* Actual scratch location is at 128 bytes offset */ - if (IS_KBL_REVID(dev_priv, 0, KBL_REVID_A0)) { - u32 scratch_addr = - i915_ggtt_offset(engine->scratch) + 2 * CACHELINE_BYTES; - - wa_ctx_emit(batch, index, GFX_OP_PIPE_CONTROL(6)); - wa_ctx_emit(batch, index, (PIPE_CONTROL_FLUSH_L3 | - PIPE_CONTROL_GLOBAL_GTT_IVB | - PIPE_CONTROL_CS_STALL | - PIPE_CONTROL_QW_WRITE)); - wa_ctx_emit(batch, index, scratch_addr); - wa_ctx_emit(batch, index, 0); - wa_ctx_emit(batch, index, 0); - wa_ctx_emit(batch, index, 0); + if (IS_KBL_REVID(engine->i915, 0, KBL_REVID_A0)) { + *batch++ = GFX_OP_PIPE_CONTROL(6); + *batch++ = PIPE_CONTROL_FLUSH_L3 | PIPE_CONTROL_GLOBAL_GTT_IVB | + PIPE_CONTROL_CS_STALL | PIPE_CONTROL_QW_WRITE; + *batch++ = i915_ggtt_offset(engine->scratch) + + 2 * CACHELINE_BYTES; + *batch++ = 0; + *batch++ = 0; + *batch++ = 0; } /* WaMediaPoolStateCmdInWABB:bxt,glk */ @@ -1109,41 +1038,37 @@ static int gen9_init_indirectctx_bb(struct intel_engine_cs *engine, * possible configurations, to avoid duplication they are * not shown here again. */ - u32 eu_pool_config = 0x00777000; - wa_ctx_emit(batch, index, GEN9_MEDIA_POOL_STATE); - wa_ctx_emit(batch, index, GEN9_MEDIA_POOL_ENABLE); - wa_ctx_emit(batch, index, eu_pool_config); - wa_ctx_emit(batch, index, 0); - wa_ctx_emit(batch, index, 0); - wa_ctx_emit(batch, index, 0); + *batch++ = GEN9_MEDIA_POOL_STATE; + *batch++ = GEN9_MEDIA_POOL_ENABLE; + *batch++ = 0x00777000; + *batch++ = 0; + *batch++ = 0; + *batch++ = 0; } /* Pad to end of cacheline */ - while (index % CACHELINE_DWORDS) - wa_ctx_emit(batch, index, MI_NOOP); + while ((unsigned long)batch % CACHELINE_BYTES) + *batch++ = MI_NOOP; - return wa_ctx_end(wa_ctx, *offset = index, CACHELINE_DWORDS); + return batch; } -static int gen9_init_perctx_bb(struct intel_engine_cs *engine, - struct i915_wa_ctx_bb *wa_ctx, - uint32_t *batch, - uint32_t *offset) +static u32 *gen9_init_perctx_bb(struct intel_engine_cs *engine, u32 *batch) { - uint32_t index = wa_ctx_start(wa_ctx, *offset, CACHELINE_DWORDS); - - wa_ctx_emit(batch, index, MI_BATCH_BUFFER_END); + *batch++ = MI_BATCH_BUFFER_END; - return wa_ctx_end(wa_ctx, *offset = index, 1); + return batch; } -static int lrc_setup_wa_ctx_obj(struct intel_engine_cs *engine, u32 size) +#define CTX_WA_BB_OBJ_SIZE (PAGE_SIZE) + +static int lrc_setup_wa_ctx(struct intel_engine_cs *engine) { struct drm_i915_gem_object *obj; struct i915_vma *vma; int err; - obj = i915_gem_object_create(engine->i915, PAGE_ALIGN(size)); + obj = i915_gem_object_create(engine->i915, CTX_WA_BB_OBJ_SIZE); if (IS_ERR(obj)) return PTR_ERR(obj); @@ -1165,78 +1090,70 @@ err: return err; } -static void lrc_destroy_wa_ctx_obj(struct intel_engine_cs *engine) +static void lrc_destroy_wa_ctx(struct intel_engine_cs *engine) { i915_vma_unpin_and_release(&engine->wa_ctx.vma); } +typedef u32 *(*wa_bb_func_t)(struct intel_engine_cs *engine, u32 *batch); + static int intel_init_workaround_bb(struct intel_engine_cs *engine) { struct i915_ctx_workarounds *wa_ctx = &engine->wa_ctx; - uint32_t *batch; - uint32_t offset; + struct i915_wa_ctx_bb *wa_bb[2] = { &wa_ctx->indirect_ctx, + &wa_ctx->per_ctx }; + wa_bb_func_t wa_bb_fn[2]; struct page *page; + void *batch, *batch_ptr; + unsigned int i; int ret; - WARN_ON(engine->id != RCS); + if (WARN_ON(engine->id != RCS || !engine->scratch)) + return -EINVAL; - /* update this when WA for higher Gen are added */ - if (INTEL_GEN(engine->i915) > 9) { - DRM_ERROR("WA batch buffer is not initialized for Gen%d\n", - INTEL_GEN(engine->i915)); + switch (INTEL_GEN(engine->i915)) { + case 9: + wa_bb_fn[0] = gen9_init_indirectctx_bb; + wa_bb_fn[1] = gen9_init_perctx_bb; + break; + case 8: + wa_bb_fn[0] = gen8_init_indirectctx_bb; + wa_bb_fn[1] = gen8_init_perctx_bb; + break; + default: + MISSING_CASE(INTEL_GEN(engine->i915)); return 0; } - /* some WA perform writes to scratch page, ensure it is valid */ - if (!engine->scratch) { - DRM_ERROR("scratch page not allocated for %s\n", engine->name); - return -EINVAL; - } - - ret = lrc_setup_wa_ctx_obj(engine, PAGE_SIZE); + ret = lrc_setup_wa_ctx(engine); if (ret) { DRM_DEBUG_DRIVER("Failed to setup context WA page: %d\n", ret); return ret; } page = i915_gem_object_get_dirty_page(wa_ctx->vma->obj, 0); - batch = kmap_atomic(page); - offset = 0; - - if (IS_GEN8(engine->i915)) { - ret = gen8_init_indirectctx_bb(engine, - &wa_ctx->indirect_ctx, - batch, - &offset); - if (ret) - goto out; + batch = batch_ptr = kmap_atomic(page); - ret = gen8_init_perctx_bb(engine, - &wa_ctx->per_ctx, - batch, - &offset); - if (ret) - goto out; - } else if (IS_GEN9(engine->i915)) { - ret = gen9_init_indirectctx_bb(engine, - &wa_ctx->indirect_ctx, - batch, - &offset); - if (ret) - goto out; - - ret = gen9_init_perctx_bb(engine, - &wa_ctx->per_ctx, - batch, - &offset); - if (ret) - goto out; + /* + * Emit the two workaround batch buffers, recording the offset from the + * start of the workaround batch buffer object for each and their + * respective sizes. + */ + for (i = 0; i < ARRAY_SIZE(wa_bb_fn); i++) { + wa_bb[i]->offset = batch_ptr - batch; + if (WARN_ON(!IS_ALIGNED(wa_bb[i]->offset, CACHELINE_BYTES))) { + ret = -EINVAL; + break; + } + batch_ptr = wa_bb_fn[i](engine, batch_ptr); + wa_bb[i]->size = batch_ptr - (batch + wa_bb[i]->offset); } -out: + BUG_ON(batch_ptr - batch > CTX_WA_BB_OBJ_SIZE); + kunmap_atomic(batch); if (ret) - lrc_destroy_wa_ctx_obj(engine); + lrc_destroy_wa_ctx(engine); return ret; } @@ -1685,7 +1602,7 @@ void intel_logical_ring_cleanup(struct intel_engine_cs *engine) intel_engine_cleanup_common(engine); - lrc_destroy_wa_ctx_obj(engine); + lrc_destroy_wa_ctx(engine); engine->i915 = NULL; dev_priv->engine[engine->id] = NULL; kfree(engine); @@ -1971,15 +1888,14 @@ static void execlists_init_reg_state(u32 *reg_state, u32 ggtt_offset = i915_ggtt_offset(wa_ctx->vma); reg_state[CTX_RCS_INDIRECT_CTX+1] = - (ggtt_offset + wa_ctx->indirect_ctx.offset * sizeof(uint32_t)) | - (wa_ctx->indirect_ctx.size / CACHELINE_DWORDS); + (ggtt_offset + wa_ctx->indirect_ctx.offset) | + (wa_ctx->indirect_ctx.size / CACHELINE_BYTES); reg_state[CTX_RCS_INDIRECT_CTX_OFFSET+1] = intel_lr_indirect_ctx_offset(engine) << 6; reg_state[CTX_BB_PER_CTX_PTR+1] = - (ggtt_offset + wa_ctx->per_ctx.offset * sizeof(uint32_t)) | - 0x01; + (ggtt_offset + wa_ctx->per_ctx.offset) | 0x01; } } reg_state[CTX_LRI_HEADER_1] = MI_LOAD_REGISTER_IMM(9) | MI_LRI_FORCE_POSTED; -- cgit From 9f235dfa495856ed7b264ee08920601df57333da Mon Sep 17 00:00:00 2001 From: Tvrtko Ursulin Date: Thu, 16 Feb 2017 12:23:25 +0000 Subject: drm/i915: Consolidate gen8_emit_pipe_control We have a few open coded instances in the execlists code and an almost suitable helper in intel_ringbuf.c We can consolidate to a single helper if we change the existing helper to emit directly to ring buffer memory and move the space reservation outside it. v2: Drop memcpy for memset. (Chris Wilson) Signed-off-by: Tvrtko Ursulin Reviewed-by: Chris Wilson Link: http://patchwork.freedesktop.org/patch/msgid/20170216122325.31391-2-tvrtko.ursulin@linux.intel.com --- drivers/gpu/drm/i915/intel_lrc.c | 77 +++++++++++---------------------- drivers/gpu/drm/i915/intel_ringbuffer.c | 45 +++++++------------ drivers/gpu/drm/i915/intel_ringbuffer.h | 11 +++++ 3 files changed, 52 insertions(+), 81 deletions(-) diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c index 5154661a38cf..afcb1f189162 100644 --- a/drivers/gpu/drm/i915/intel_lrc.c +++ b/drivers/gpu/drm/i915/intel_lrc.c @@ -918,12 +918,10 @@ gen8_emit_flush_coherentl3_wa(struct intel_engine_cs *engine, u32 *batch) *batch++ = i915_mmio_reg_offset(GEN8_L3SQCREG4); *batch++ = 0x40400000 | GEN8_LQSC_FLUSH_COHERENT_LINES; - *batch++ = GFX_OP_PIPE_CONTROL(6); - *batch++ = PIPE_CONTROL_CS_STALL | PIPE_CONTROL_DC_FLUSH_ENABLE; - *batch++ = 0; - *batch++ = 0; - *batch++ = 0; - *batch++ = 0; + batch = gen8_emit_pipe_control(batch, + PIPE_CONTROL_CS_STALL | + PIPE_CONTROL_DC_FLUSH_ENABLE, + 0); *batch++ = MI_LOAD_REGISTER_MEM_GEN8 | MI_SRM_LRM_GLOBAL_GTT; *batch++ = i915_mmio_reg_offset(GEN8_L3SQCREG4); @@ -957,15 +955,15 @@ static u32 *gen8_init_indirectctx_bb(struct intel_engine_cs *engine, u32 *batch) if (IS_BROADWELL(engine->i915)) batch = gen8_emit_flush_coherentl3_wa(engine, batch); - *batch++ = GFX_OP_PIPE_CONTROL(6); - *batch++ = PIPE_CONTROL_FLUSH_L3 | PIPE_CONTROL_GLOBAL_GTT_IVB | - PIPE_CONTROL_CS_STALL | PIPE_CONTROL_QW_WRITE; /* WaClearSlmSpaceAtContextSwitch:bdw,chv */ /* Actual scratch location is at 128 bytes offset */ - *batch++ = i915_ggtt_offset(engine->scratch) + 2 * CACHELINE_BYTES; - *batch++ = 0; - *batch++ = 0; - *batch++ = 0; + batch = gen8_emit_pipe_control(batch, + PIPE_CONTROL_FLUSH_L3 | + PIPE_CONTROL_GLOBAL_GTT_IVB | + PIPE_CONTROL_CS_STALL | + PIPE_CONTROL_QW_WRITE, + i915_ggtt_offset(engine->scratch) + + 2 * CACHELINE_BYTES); /* Pad to end of cacheline */ while ((unsigned long)batch % CACHELINE_BYTES) @@ -1013,14 +1011,13 @@ static u32 *gen9_init_indirectctx_bb(struct intel_engine_cs *engine, u32 *batch) /* WaClearSlmSpaceAtContextSwitch:kbl */ /* Actual scratch location is at 128 bytes offset */ if (IS_KBL_REVID(engine->i915, 0, KBL_REVID_A0)) { - *batch++ = GFX_OP_PIPE_CONTROL(6); - *batch++ = PIPE_CONTROL_FLUSH_L3 | PIPE_CONTROL_GLOBAL_GTT_IVB | - PIPE_CONTROL_CS_STALL | PIPE_CONTROL_QW_WRITE; - *batch++ = i915_ggtt_offset(engine->scratch) + - 2 * CACHELINE_BYTES; - *batch++ = 0; - *batch++ = 0; - *batch++ = 0; + batch = gen8_emit_pipe_control(batch, + PIPE_CONTROL_FLUSH_L3 | + PIPE_CONTROL_GLOBAL_GTT_IVB | + PIPE_CONTROL_CS_STALL | + PIPE_CONTROL_QW_WRITE, + i915_ggtt_offset(engine->scratch) + + 2 * CACHELINE_BYTES); } /* WaMediaPoolStateCmdInWABB:bxt,glk */ @@ -1456,39 +1453,17 @@ static int gen8_emit_flush_render(struct drm_i915_gem_request *request, if (IS_ERR(cs)) return PTR_ERR(cs); - if (vf_flush_wa) { - *cs++ = GFX_OP_PIPE_CONTROL(6); - *cs++ = 0; - *cs++ = 0; - *cs++ = 0; - *cs++ = 0; - *cs++ = 0; - } + if (vf_flush_wa) + cs = gen8_emit_pipe_control(cs, 0, 0); - if (dc_flush_wa) { - *cs++ = GFX_OP_PIPE_CONTROL(6); - *cs++ = PIPE_CONTROL_DC_FLUSH_ENABLE; - *cs++ = 0; - *cs++ = 0; - *cs++ = 0; - *cs++ = 0; - } + if (dc_flush_wa) + cs = gen8_emit_pipe_control(cs, PIPE_CONTROL_DC_FLUSH_ENABLE, + 0); - *cs++ = GFX_OP_PIPE_CONTROL(6); - *cs++ = flags; - *cs++ = scratch_addr; - *cs++ = 0; - *cs++ = 0; - *cs++ = 0; + cs = gen8_emit_pipe_control(cs, flags, scratch_addr); - if (dc_flush_wa) { - *cs++ = GFX_OP_PIPE_CONTROL(6); - *cs++ = PIPE_CONTROL_CS_STALL; - *cs++ = 0; - *cs++ = 0; - *cs++ = 0; - *cs++ = 0; - } + if (dc_flush_wa) + cs = gen8_emit_pipe_control(cs, PIPE_CONTROL_CS_STALL, 0); intel_ring_advance(request, cs); diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c b/drivers/gpu/drm/i915/intel_ringbuffer.c index 629fe6584e61..d56f384938f7 100644 --- a/drivers/gpu/drm/i915/intel_ringbuffer.c +++ b/drivers/gpu/drm/i915/intel_ringbuffer.c @@ -334,35 +334,16 @@ gen7_render_ring_flush(struct drm_i915_gem_request *req, u32 mode) } static int -gen8_emit_pipe_control(struct drm_i915_gem_request *req, - u32 flags, u32 scratch_addr) +gen8_render_ring_flush(struct drm_i915_gem_request *req, u32 mode) { + u32 flags; u32 *cs; - cs = intel_ring_begin(req, 6); + cs = intel_ring_begin(req, mode & EMIT_INVALIDATE ? 12 : 6); if (IS_ERR(cs)) return PTR_ERR(cs); - *cs++ = GFX_OP_PIPE_CONTROL(6); - *cs++ = flags; - *cs++ = scratch_addr; - *cs++ = 0; - *cs++ = 0; - *cs++ = 0; - intel_ring_advance(req, cs); - - return 0; -} - -static int -gen8_render_ring_flush(struct drm_i915_gem_request *req, u32 mode) -{ - u32 scratch_addr = - i915_ggtt_offset(req->engine->scratch) + 2 * CACHELINE_BYTES; - u32 flags = 0; - int ret; - - flags |= PIPE_CONTROL_CS_STALL; + flags = PIPE_CONTROL_CS_STALL; if (mode & EMIT_FLUSH) { flags |= PIPE_CONTROL_RENDER_TARGET_CACHE_FLUSH; @@ -381,15 +362,19 @@ gen8_render_ring_flush(struct drm_i915_gem_request *req, u32 mode) flags |= PIPE_CONTROL_GLOBAL_GTT_IVB; /* WaCsStallBeforeStateCacheInvalidate:bdw,chv */ - ret = gen8_emit_pipe_control(req, - PIPE_CONTROL_CS_STALL | - PIPE_CONTROL_STALL_AT_SCOREBOARD, - 0); - if (ret) - return ret; + cs = gen8_emit_pipe_control(cs, + PIPE_CONTROL_CS_STALL | + PIPE_CONTROL_STALL_AT_SCOREBOARD, + 0); } - return gen8_emit_pipe_control(req, flags, scratch_addr); + cs = gen8_emit_pipe_control(cs, flags, + i915_ggtt_offset(req->engine->scratch) + + 2 * CACHELINE_BYTES); + + intel_ring_advance(req, cs); + + return 0; } static void ring_setup_phys_status_page(struct intel_engine_cs *engine) diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.h b/drivers/gpu/drm/i915/intel_ringbuffer.h index 1d9fd4e42622..4ca5308e12bb 100644 --- a/drivers/gpu/drm/i915/intel_ringbuffer.h +++ b/drivers/gpu/drm/i915/intel_ringbuffer.h @@ -631,4 +631,15 @@ void intel_engine_reset_breadcrumbs(struct intel_engine_cs *engine); void intel_engine_fini_breadcrumbs(struct intel_engine_cs *engine); unsigned int intel_breadcrumbs_busy(struct drm_i915_private *i915); +static inline u32 *gen8_emit_pipe_control(u32 *batch, u32 flags, u32 offset) +{ + memset(batch, 0, 6 * sizeof(u32)); + + batch[0] = GFX_OP_PIPE_CONTROL(6); + batch[1] = flags; + batch[2] = offset; + + return batch + 6; +} + #endif /* _INTEL_RINGBUFFER_H_ */ -- cgit From 8d371db4b0a7aa919df9be2be430187fe3e70399 Mon Sep 17 00:00:00 2001 From: Ander Conselvan de Oliveira Date: Fri, 17 Feb 2017 14:06:28 +0200 Subject: drm/i915/glk: Load the degamma LUT even in legacy gamma mode MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit In Geminilake, the degamma table is enabled or disabled by the pipe CSC enable bit, so its active even when running in the legacy gamma mode. So always set sane values for that table, since the default value is all zeroes. This fixes blank screens after a suspend/resume cycle while legacy gamma is in use. Signed-off-by: Ander Conselvan de Oliveira Reviewed-by: Ville Syrjälä Link: http://patchwork.freedesktop.org/patch/msgid/20170217120630.6143-2-ander.conselvan.de.oliveira@intel.com --- drivers/gpu/drm/i915/intel_color.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/i915/intel_color.c b/drivers/gpu/drm/i915/intel_color.c index 0627eee1cb64..b9e5266d933b 100644 --- a/drivers/gpu/drm/i915/intel_color.c +++ b/drivers/gpu/drm/i915/intel_color.c @@ -483,12 +483,13 @@ static void glk_load_luts(struct drm_crtc_state *state) struct intel_crtc_state *intel_state = to_intel_crtc_state(state); enum pipe pipe = to_intel_crtc(crtc)->pipe; + glk_load_degamma_lut(state); + if (crtc_state_is_legacy(state)) { haswell_load_luts(state); return; } - glk_load_degamma_lut(state); bdw_load_gamma_lut(state, 0); intel_state->gamma_mode = GAMMA_MODE_MODE_10BIT; -- cgit From 3bb56da78153150d72c67ceb8b7df49f36ae269c Mon Sep 17 00:00:00 2001 From: Ander Conselvan de Oliveira Date: Fri, 17 Feb 2017 14:06:29 +0200 Subject: drm/i915/glk: Enable pipe CSC MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Now that the pre-csc degamma table is set up correctly in Geminilake, pipe CSC can be enabled without causing a black screen. v2: Rebase. Signed-off-by: Ander Conselvan de Oliveira Reviewed-by: Ville Syrjälä Link: http://patchwork.freedesktop.org/patch/msgid/20170217120630.6143-3-ander.conselvan.de.oliveira@intel.com --- drivers/gpu/drm/i915/intel_display.c | 1 + drivers/gpu/drm/i915/intel_sprite.c | 1 + 2 files changed, 2 insertions(+) diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c index 5443ba8430da..b60373812d34 100644 --- a/drivers/gpu/drm/i915/intel_display.c +++ b/drivers/gpu/drm/i915/intel_display.c @@ -3327,6 +3327,7 @@ static void skylake_update_primary_plane(struct drm_plane *plane, if (IS_GEMINILAKE(dev_priv)) { I915_WRITE(PLANE_COLOR_CTL(pipe, plane_id), PLANE_COLOR_PIPE_GAMMA_ENABLE | + PLANE_COLOR_PIPE_CSC_ENABLE | PLANE_COLOR_PLANE_GAMMA_DISABLE); } else { plane_ctl |= diff --git a/drivers/gpu/drm/i915/intel_sprite.c b/drivers/gpu/drm/i915/intel_sprite.c index b16a29591803..27e0752d1578 100644 --- a/drivers/gpu/drm/i915/intel_sprite.c +++ b/drivers/gpu/drm/i915/intel_sprite.c @@ -224,6 +224,7 @@ skl_update_plane(struct drm_plane *drm_plane, if (IS_GEMINILAKE(dev_priv)) { I915_WRITE(PLANE_COLOR_CTL(pipe, plane_id), PLANE_COLOR_PIPE_GAMMA_ENABLE | + PLANE_COLOR_PIPE_CSC_ENABLE | PLANE_COLOR_PLANE_GAMMA_DISABLE); } else { plane_ctl |= -- cgit From 2246bea6cf773552106d98ab31ba7952c4ddb3f9 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Fri, 17 Feb 2017 15:13:00 +0000 Subject: drm/i915: Postpone fake breadcrumb interrupt until real interrupts cease When the timer expires for checking on interrupt processing, check to see if any interrupts arrived within the last time period. If real interrupts are still being delivered, we can be reassured that we haven't missed the final interrupt as the waiter will still be woken. Only once all activity ceases, do we have to worry about the waiter never being woken and so need to install a timer to kick the waiter for a slow arrival of a seqno. Signed-off-by: Chris Wilson Cc: Tvrtko Ursulin Reviewed-by: Tvrtko Ursulin Link: http://patchwork.freedesktop.org/patch/msgid/20170217151304.16665-2-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/i915_irq.c | 1 + drivers/gpu/drm/i915/intel_breadcrumbs.c | 22 +++++++++------------- drivers/gpu/drm/i915/intel_ringbuffer.h | 3 ++- 3 files changed, 12 insertions(+), 14 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_irq.c b/drivers/gpu/drm/i915/i915_irq.c index 0d8ebd147da6..0657ac44a582 100644 --- a/drivers/gpu/drm/i915/i915_irq.c +++ b/drivers/gpu/drm/i915/i915_irq.c @@ -1033,6 +1033,7 @@ static void ironlake_rps_change_irq_handler(struct drm_i915_private *dev_priv) static void notify_ring(struct intel_engine_cs *engine) { + atomic_inc(&engine->irq_count); set_bit(ENGINE_IRQ_BREADCRUMB, &engine->irq_posted); if (intel_engine_wakeup(engine)) trace_i915_gem_request_notify(engine); diff --git a/drivers/gpu/drm/i915/intel_breadcrumbs.c b/drivers/gpu/drm/i915/intel_breadcrumbs.c index 74cb7b91b5db..4395b177493e 100644 --- a/drivers/gpu/drm/i915/intel_breadcrumbs.c +++ b/drivers/gpu/drm/i915/intel_breadcrumbs.c @@ -26,6 +26,11 @@ #include "i915_drv.h" +static unsigned long wait_timeout(void) +{ + return round_jiffies_up(jiffies + DRM_I915_HANGCHECK_JIFFIES); +} + static void intel_breadcrumbs_hangcheck(unsigned long data) { struct intel_engine_cs *engine = (struct intel_engine_cs *)data; @@ -34,8 +39,9 @@ static void intel_breadcrumbs_hangcheck(unsigned long data) if (!b->irq_enabled) return; - if (time_before(jiffies, b->timeout)) { - mod_timer(&b->hangcheck, b->timeout); + if (b->hangcheck_interrupts != atomic_read(&engine->irq_count)) { + b->hangcheck_interrupts = atomic_read(&engine->irq_count); + mod_timer(&b->hangcheck, wait_timeout()); return; } @@ -55,11 +61,6 @@ static void intel_breadcrumbs_hangcheck(unsigned long data) i915_queue_hangcheck(engine->i915); } -static unsigned long wait_timeout(void) -{ - return round_jiffies_up(jiffies + DRM_I915_HANGCHECK_JIFFIES); -} - static void intel_breadcrumbs_fake_irq(unsigned long data) { struct intel_engine_cs *engine = (struct intel_engine_cs *)data; @@ -140,8 +141,7 @@ static void __intel_breadcrumbs_enable_irq(struct intel_breadcrumbs *b) i915_queue_hangcheck(i915); } else { /* Ensure we never sleep indefinitely */ - GEM_BUG_ON(!time_after(b->timeout, jiffies)); - mod_timer(&b->hangcheck, b->timeout); + mod_timer(&b->hangcheck, wait_timeout()); } } @@ -258,7 +258,6 @@ static bool __intel_engine_add_wait(struct intel_engine_cs *engine, GEM_BUG_ON(!next && !first); if (next && next != &wait->node) { GEM_BUG_ON(first); - b->timeout = wait_timeout(); b->first_wait = to_wait(next); rcu_assign_pointer(b->irq_seqno_bh, b->first_wait->tsk); /* As there is a delay between reading the current @@ -286,7 +285,6 @@ static bool __intel_engine_add_wait(struct intel_engine_cs *engine, if (first) { GEM_BUG_ON(rb_first(&b->waiters) != &wait->node); - b->timeout = wait_timeout(); b->first_wait = wait; rcu_assign_pointer(b->irq_seqno_bh, wait->tsk); /* After assigning ourselves as the new bottom-half, we must @@ -396,7 +394,6 @@ void intel_engine_remove_wait(struct intel_engine_cs *engine, * the interrupt, or if we have to handle an * exception rather than a seqno completion. */ - b->timeout = wait_timeout(); b->first_wait = to_wait(next); rcu_assign_pointer(b->irq_seqno_bh, b->first_wait->tsk); if (b->first_wait->seqno != wait->seqno) @@ -627,7 +624,6 @@ void intel_engine_reset_breadcrumbs(struct intel_engine_cs *engine) __intel_breadcrumbs_disable_irq(b); if (intel_engine_has_waiter(engine)) { - b->timeout = wait_timeout(); __intel_breadcrumbs_enable_irq(b); if (test_bit(ENGINE_IRQ_BREADCRUMB, &engine->irq_posted)) wake_up_process(b->first_wait->tsk); diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.h b/drivers/gpu/drm/i915/intel_ringbuffer.h index 4ca5308e12bb..4091c97be6ec 100644 --- a/drivers/gpu/drm/i915/intel_ringbuffer.h +++ b/drivers/gpu/drm/i915/intel_ringbuffer.h @@ -213,6 +213,7 @@ struct intel_engine_cs { struct intel_render_state *render_state; + atomic_t irq_count; unsigned long irq_posted; #define ENGINE_IRQ_BREADCRUMB 0 #define ENGINE_IRQ_EXECLIST 1 @@ -245,7 +246,7 @@ struct intel_engine_cs { struct timer_list fake_irq; /* used after a missed interrupt */ struct timer_list hangcheck; /* detect missed interrupts */ - unsigned long timeout; + unsigned int hangcheck_interrupts; bool irq_enabled : 1; bool rpm_wakelock : 1; -- cgit From c33ed067d13b7fae9e19ef1a9e302b8fc90c43d8 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Fri, 17 Feb 2017 15:13:01 +0000 Subject: drm/i915: Break i915_spin_request() if we see an interrupt If an interrupt has been posted, and we were spinning on the active seqno waiting for it to advance but it did not, then we can expect that it will not see its advance in the immediate future and should call into the irq-seqno barrier. We can stop spinning at this point, and leave the difficulty of handling the coherency to the caller. Signed-off-by: Chris Wilson Cc: Tvrtko Ursulin Cc: Mika Kuoppala Reviewed-by: Tvrtko Ursulin Link: http://patchwork.freedesktop.org/patch/msgid/20170217151304.16665-3-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/i915_gem_request.c | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/i915/i915_gem_request.c b/drivers/gpu/drm/i915/i915_gem_request.c index 2f6cfa47dc61..a5fac40d2a4f 100644 --- a/drivers/gpu/drm/i915/i915_gem_request.c +++ b/drivers/gpu/drm/i915/i915_gem_request.c @@ -963,7 +963,8 @@ static bool busywait_stop(unsigned long timeout, unsigned int cpu) bool __i915_spin_request(const struct drm_i915_gem_request *req, int state, unsigned long timeout_us) { - unsigned int cpu; + struct intel_engine_cs *engine = req->engine; + unsigned int irq, cpu; /* When waiting for high frequency requests, e.g. during synchronous * rendering split between the CPU and GPU, the finite amount of time @@ -975,11 +976,20 @@ bool __i915_spin_request(const struct drm_i915_gem_request *req, * takes to sleep on a request, on the order of a microsecond. */ + irq = atomic_read(&engine->irq_count); timeout_us += local_clock_us(&cpu); do { if (__i915_gem_request_completed(req)) return true; + /* Seqno are meant to be ordered *before* the interrupt. If + * we see an interrupt without a corresponding seqno advance, + * assume we won't see one in the near future but require + * the engine->seqno_barrier() to fixup coherency. + */ + if (atomic_read(&engine->irq_count) != irq) + break; + if (signal_pending_state(state, current)) break; -- cgit From 8998567b51141f79309d1267640c919dfd23d3a4 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Fri, 17 Feb 2017 15:13:02 +0000 Subject: drm/i915: Defer declaration of missed-interrupt until the waiter is asleep If the waiter was currently running, assume it hasn't had a chance to process the pending interrupt (e.g, low priority task on a loaded system) and wait until it sleeps before declaring a missed interrupt. References: https://bugs.freedesktop.org/show_bug.cgi?id=99816 Signed-off-by: Chris Wilson Cc: Tvrtko Ursulin Cc: Mika Kuoppala Reviewed-by: Tvrtko Ursulin Reviewed-by: Mika Kuoppala Link: http://patchwork.freedesktop.org/patch/msgid/20170217151304.16665-4-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/intel_breadcrumbs.c | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/drivers/gpu/drm/i915/intel_breadcrumbs.c b/drivers/gpu/drm/i915/intel_breadcrumbs.c index 4395b177493e..f78b9baf61b6 100644 --- a/drivers/gpu/drm/i915/intel_breadcrumbs.c +++ b/drivers/gpu/drm/i915/intel_breadcrumbs.c @@ -45,6 +45,15 @@ static void intel_breadcrumbs_hangcheck(unsigned long data) return; } + /* If the waiter was currently running, assume it hasn't had a chance + * to process the pending interrupt (e.g, low priority task on a loaded + * system) and wait until it sleeps before declaring a missed interrupt. + */ + if (!intel_engine_wakeup(engine)) { + mod_timer(&b->hangcheck, wait_timeout()); + return; + } + DRM_DEBUG("Hangcheck timer elapsed... %s idle\n", engine->name); set_bit(engine->id, &engine->i915->gpu_error.missed_irq_rings); mod_timer(&engine->breadcrumbs.fake_irq, jiffies + 1); -- cgit From 6ef98ea0dacd8bee62d6823b1dea80269137c240 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Fri, 17 Feb 2017 15:13:03 +0000 Subject: drm/i915: Only start with the fake-irq timer if interrupts are dead As a backup to waiting on a user-interrupt from the GPU, we use a heavy and frequent timer to wake up the waiting process should we detect an inconsistency whilst waiting. After seeing a "missed interrupt", the next time we wait, we restart the heavy timer. This patch is more reluctant to restart the timer and will only do so if we have not see any interrupts since when we started the fake irq timer. If we are seeing interrupts, then the waiters are being woken normally and we had an incoherency that caused to miss last time - that is unlikely to reoccur and so taking the risk of stalling again seems pragmatic. Signed-off-by: Chris Wilson Cc: Tvrtko Ursulin Cc: Mika Kuoppala Reviewed-by: Tvrtko Ursulin Link: http://patchwork.freedesktop.org/patch/msgid/20170217151304.16665-5-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/intel_breadcrumbs.c | 20 ++++++++++++++++++-- 1 file changed, 18 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/i915/intel_breadcrumbs.c b/drivers/gpu/drm/i915/intel_breadcrumbs.c index f78b9baf61b6..5932e2dc0c6f 100644 --- a/drivers/gpu/drm/i915/intel_breadcrumbs.c +++ b/drivers/gpu/drm/i915/intel_breadcrumbs.c @@ -107,6 +107,23 @@ static void irq_disable(struct intel_engine_cs *engine) spin_unlock(&engine->i915->irq_lock); } +static bool use_fake_irq(const struct intel_breadcrumbs *b) +{ + const struct intel_engine_cs *engine = + container_of(b, struct intel_engine_cs, breadcrumbs); + + if (!test_bit(engine->id, &engine->i915->gpu_error.missed_irq_rings)) + return false; + + /* Only start with the heavy weight fake irq timer if we have not + * seen any interrupts since enabling it the first time. If the + * interrupts are still arriving, it means we made a mistake in our + * engine->seqno_barrier(), a timing error that should be transient + * and unlikely to reoccur. + */ + return atomic_read(&engine->irq_count) == b->hangcheck_interrupts; +} + static void __intel_breadcrumbs_enable_irq(struct intel_breadcrumbs *b) { struct intel_engine_cs *engine = @@ -144,8 +161,7 @@ static void __intel_breadcrumbs_enable_irq(struct intel_breadcrumbs *b) b->irq_enabled = true; } - if (!b->irq_enabled || - test_bit(engine->id, &i915->gpu_error.missed_irq_rings)) { + if (!b->irq_enabled || use_fake_irq(b)) { mod_timer(&b->fake_irq, jiffies + 1); i915_queue_hangcheck(i915); } else { -- cgit From e54ca97747770a6b08413ae9afa776c0d8c4f6c3 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Fri, 17 Feb 2017 15:13:04 +0000 Subject: drm/i915: Remove completed fences after a wait If we wait upon the full (i.e. all shared fences, or upon an exclusive fence) reservation object successfully, we know that all fences beneath it have been signaled, so long as no new fences were added whilst we slept. If the reservation_object remains the same, as detected by its seqcount, we can then reap all the fences upon completion. Signed-off-by: Chris Wilson Cc: Joonas Lahtinen Reviewed-by: Matthew Auld Link: http://patchwork.freedesktop.org/patch/msgid/20170217151304.16665-6-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/i915_gem.c | 15 ++++++++++++++- 1 file changed, 14 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index 5f7b8c88eb7e..d93032875f28 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -427,7 +427,9 @@ i915_gem_object_wait_reservation(struct reservation_object *resv, long timeout, struct intel_rps_client *rps) { + unsigned int seq = __read_seqcount_begin(&resv->seq); struct dma_fence *excl; + bool prune_fences = false; if (flags & I915_WAIT_ALL) { struct dma_fence **shared; @@ -452,15 +454,26 @@ i915_gem_object_wait_reservation(struct reservation_object *resv, for (; i < count; i++) dma_fence_put(shared[i]); kfree(shared); + + prune_fences = count && timeout >= 0; } else { excl = reservation_object_get_excl_rcu(resv); } - if (excl && timeout >= 0) + if (excl && timeout >= 0) { timeout = i915_gem_object_wait_fence(excl, flags, timeout, rps); + prune_fences = timeout >= 0; + } dma_fence_put(excl); + if (prune_fences && !__read_seqcount_retry(&resv->seq, seq)) { + reservation_object_lock(resv, NULL); + if (!__read_seqcount_retry(&resv->seq, seq)) + reservation_object_add_excl_fence(resv, NULL); + reservation_object_unlock(resv); + } + return timeout; } -- cgit From f4a791819ed00a749a90387aa139706a507aa690 Mon Sep 17 00:00:00 2001 From: Rodrigo Vivi Date: Thu, 16 Feb 2017 17:38:35 -0800 Subject: drm/i915: DMC 1.03 for Geminilake There is a new version of DMC available for Geminilake. It's release notes only mention: - Enhancement in the FW to restore the PG2 state v2: Fixed the platform name on commit message. Noticed by Jani S. v3: cook on top of drm-tip without depending on kbl one so CI can check. v4: make v3 on top of v2. Cc: David Weinehall Cc: Jani Saarinen Cc: Ander Conselvan de Oliveira Signed-off-by: Rodrigo Vivi Reviewed-by: Ander Conselvan de Oliveira Signed-off-by: Ander Conselvan de Oliveira Link: http://patchwork.freedesktop.org/patch/msgid/1487295515-15396-1-git-send-email-rodrigo.vivi@intel.com --- drivers/gpu/drm/i915/intel_csr.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/i915/intel_csr.c b/drivers/gpu/drm/i915/intel_csr.c index 9dcc434d3b74..14659c7e2858 100644 --- a/drivers/gpu/drm/i915/intel_csr.c +++ b/drivers/gpu/drm/i915/intel_csr.c @@ -34,9 +34,9 @@ * low-power state and comes back to normal. */ -#define I915_CSR_GLK "i915/glk_dmc_ver1_01.bin" +#define I915_CSR_GLK "i915/glk_dmc_ver1_03.bin" MODULE_FIRMWARE(I915_CSR_GLK); -#define GLK_CSR_VERSION_REQUIRED CSR_VERSION(1, 1) +#define GLK_CSR_VERSION_REQUIRED CSR_VERSION(1, 3) #define I915_CSR_KBL "i915/kbl_dmc_ver1_01.bin" MODULE_FIRMWARE(I915_CSR_KBL); -- cgit From 5a55b527af43423fea51e9927185a1a6e9c80ee7 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Fri, 17 Feb 2017 14:14:55 +0000 Subject: drm/i915: Only apply legacy PDE overflow detection to 3lvl machines MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Prevent the overflow check from firing on machines with the full 4lvl page tables, that are not restricted to GEN8_LEGACY_PDES. v2: Also fix the off-by-one in the compare Fixes: 894ccebee2b0 ("drm/i915: Micro-optimise gen8_ppgtt_insert_entries()") Signed-off-by: Chris Wilson Cc: Mika Kuoppala Cc: Ville Syrjälä Link: http://patchwork.freedesktop.org/patch/msgid/20170217141455.19877-1-chris@chris-wilson.co.uk Reviewed-by: Mika Kuoppala --- drivers/gpu/drm/i915/i915_gem_gtt.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c b/drivers/gpu/drm/i915/i915_gem_gtt.c index 47a38272f54c..a5162cbc0ea0 100644 --- a/drivers/gpu/drm/i915/i915_gem_gtt.c +++ b/drivers/gpu/drm/i915/i915_gem_gtt.c @@ -854,7 +854,8 @@ gen8_ppgtt_insert_pte_entries(struct i915_hw_ppgtt *ppgtt, break; } - GEM_BUG_ON(pdpe > GEN8_LEGACY_PDPES); + GEM_BUG_ON(!i915_vm_is_48bit(&ppgtt->base) && + pdpe >= GEN8_LEGACY_PDPES); pd = pdp->page_directory[pdpe]; pde = 0; } -- cgit From 48cb91203e1811e24532ab9204d45f645b69a0e7 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Sat, 18 Feb 2017 15:00:50 +0000 Subject: drm/i915: Remove unneeded struct_mutex around rpm We don't need struct_mutex for acquiring an rpm wakeref, and do not need to serialise those register read (it's the wrong mutex for those registers in any case). Begone! Signed-off-by: Chris Wilson Link: http://patchwork.freedesktop.org/patch/msgid/20170218150050.10414-1-chris@chris-wilson.co.uk Reviewed-by: Radoslaw Szwichtenberg --- drivers/gpu/drm/i915/i915_debugfs.c | 7 ------- 1 file changed, 7 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c index 11f02fb1f335..404394ef4b14 100644 --- a/drivers/gpu/drm/i915/i915_debugfs.c +++ b/drivers/gpu/drm/i915/i915_debugfs.c @@ -1110,7 +1110,6 @@ DEFINE_SIMPLE_ATTRIBUTE(i915_next_seqno_fops, static int i915_frequency_info(struct seq_file *m, void *unused) { struct drm_i915_private *dev_priv = node_to_i915(m->private); - struct drm_device *dev = &dev_priv->drm; int ret = 0; intel_runtime_pm_get(dev_priv); @@ -1173,10 +1172,6 @@ static int i915_frequency_info(struct seq_file *m, void *unused) } /* RPSTAT1 is in the GT power well */ - ret = mutex_lock_interruptible(&dev->struct_mutex); - if (ret) - goto out; - intel_uncore_forcewake_get(dev_priv, FORCEWAKE_ALL); reqf = I915_READ(GEN6_RPNSWREQ); @@ -1211,7 +1206,6 @@ static int i915_frequency_info(struct seq_file *m, void *unused) cagf = intel_gpu_freq(dev_priv, cagf); intel_uncore_forcewake_put(dev_priv, FORCEWAKE_ALL); - mutex_unlock(&dev->struct_mutex); if (IS_GEN6(dev_priv) || IS_GEN7(dev_priv)) { pm_ier = I915_READ(GEN6_PMIER); @@ -1301,7 +1295,6 @@ static int i915_frequency_info(struct seq_file *m, void *unused) seq_printf(m, "Max CD clock frequency: %d kHz\n", dev_priv->max_cdclk_freq); seq_printf(m, "Max pixel clock frequency: %d kHz\n", dev_priv->max_dotclk_freq); -out: intel_runtime_pm_put(dev_priv); return ret; } -- cgit From 23f4a287fc97d94b6110ee0e17ec92670cc8de89 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Sat, 18 Feb 2017 11:27:08 +0000 Subject: drm/i915: Prevent divide-by-zero in debugfs/i915_rps_boost_info Either by chance, or by misread, the current evaluation interval may be zero. If that is the case, don't divide by it! Signed-off-by: Chris Wilson Link: http://patchwork.freedesktop.org/patch/msgid/20170218112708.24504-1-chris@chris-wilson.co.uk Reviewed-by: Radoslaw Szwichtenberg --- drivers/gpu/drm/i915/i915_debugfs.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c index 404394ef4b14..768461f7c7c6 100644 --- a/drivers/gpu/drm/i915/i915_debugfs.c +++ b/drivers/gpu/drm/i915/i915_debugfs.c @@ -2356,10 +2356,10 @@ static int i915_rps_boost_info(struct seq_file *m, void *data) seq_printf(m, "\nRPS Autotuning (current \"%s\" window):\n", rps_power_to_str(dev_priv->rps.power)); seq_printf(m, " Avg. up: %d%% [above threshold? %d%%]\n", - 100 * rpup / rpupei, + rpup && rpupei ? 100 * rpup / rpupei : 0, dev_priv->rps.up_threshold); seq_printf(m, " Avg. down: %d%% [below threshold? %d%%]\n", - 100 * rpdown / rpdownei, + rpdown && rpdownei ? 100 * rpdown / rpdownei : 0, dev_priv->rps.down_threshold); } else { seq_puts(m, "\nRPS Autotuning inactive\n"); -- cgit From 784f2f1a9da22b0ab00c1b364ee2c10573f5c248 Mon Sep 17 00:00:00 2001 From: Tvrtko Ursulin Date: Mon, 20 Feb 2017 10:46:57 +0000 Subject: drm/i915: Fix typo in semaphore debug message Pronounces less rude when fixed. Signed-off-by: Tvrtko Ursulin Reviewed-by: Chris Wilson Link: http://patchwork.freedesktop.org/patch/msgid/20170220104657.5237-1-tvrtko.ursulin@linux.intel.com --- drivers/gpu/drm/i915/i915_drv.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/i915/i915_drv.c b/drivers/gpu/drm/i915/i915_drv.c index c992d406ae74..747cd5dfb910 100644 --- a/drivers/gpu/drm/i915/i915_drv.c +++ b/drivers/gpu/drm/i915/i915_drv.c @@ -999,7 +999,7 @@ static void intel_sanitize_options(struct drm_i915_private *dev_priv) DRM_DEBUG_DRIVER("ppgtt mode: %i\n", i915.enable_ppgtt); i915.semaphores = intel_sanitize_semaphores(dev_priv, i915.semaphores); - DRM_DEBUG_DRIVER("use GPU sempahores? %s\n", yesno(i915.semaphores)); + DRM_DEBUG_DRIVER("use GPU semaphores? %s\n", yesno(i915.semaphores)); } /** -- cgit From cfd1c48805208b1f1d5f4df153a15fa4a280eb54 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Mon, 20 Feb 2017 09:47:07 +0000 Subject: drm/i915: Move the common RPS warnings to intel_set_rps() Instead of having each back-end provide identical guards, just have a singular set in intel_set_rps() to verify that the caller is obeying the rules. Signed-off-by: Chris Wilson Reviewed-by: Radoslaw Szwichtenberg Link: http://patchwork.freedesktop.org/patch/msgid/20170220094713.22874-1-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/intel_pm.c | 12 ++++-------- 1 file changed, 4 insertions(+), 8 deletions(-) diff --git a/drivers/gpu/drm/i915/intel_pm.c b/drivers/gpu/drm/i915/intel_pm.c index fe243c65de1a..e1878cb5d569 100644 --- a/drivers/gpu/drm/i915/intel_pm.c +++ b/drivers/gpu/drm/i915/intel_pm.c @@ -4914,10 +4914,6 @@ static u32 gen6_rps_pm_mask(struct drm_i915_private *dev_priv, u8 val) * update the GEN6_RP_INTERRUPT_LIMITS register accordingly. */ static int gen6_set_rps(struct drm_i915_private *dev_priv, u8 val) { - WARN_ON(!mutex_is_locked(&dev_priv->rps.hw_lock)); - WARN_ON(val > dev_priv->rps.max_freq); - WARN_ON(val < dev_priv->rps.min_freq); - /* min/max delay may still have been modified so be sure to * write the limits value. */ @@ -4955,10 +4951,6 @@ static int valleyview_set_rps(struct drm_i915_private *dev_priv, u8 val) { int err; - WARN_ON(!mutex_is_locked(&dev_priv->rps.hw_lock)); - WARN_ON(val > dev_priv->rps.max_freq); - WARN_ON(val < dev_priv->rps.min_freq); - if (WARN_ONCE(IS_CHERRYVIEW(dev_priv) && (val & 1), "Odd GPU freq value\n")) val &= ~1; @@ -5109,6 +5101,10 @@ int intel_set_rps(struct drm_i915_private *dev_priv, u8 val) { int err; + lockdep_assert_held(&dev_priv->rps.hw_lock); + GEM_BUG_ON(val > dev_priv->rps.max_freq); + GEM_BUG_ON(val < dev_priv->rps.min_freq); + if (IS_VALLEYVIEW(dev_priv) || IS_CHERRYVIEW(dev_priv)) err = valleyview_set_rps(dev_priv, val); else -- cgit From 76e4e4b532bc9f289039c569063ec4b2e0ef2cc4 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Mon, 20 Feb 2017 09:47:08 +0000 Subject: drm/i915: Store the requested frequency whilst RPS is disabled If intel_set_rps() is called whilst the hw is disabled, just store the requested frequency (from the user) for application when we wake the hw up. Signed-off-by: Chris Wilson Link: http://patchwork.freedesktop.org/patch/msgid/20170220094713.22874-2-chris@chris-wilson.co.uk Reviewed-by: Radoslaw Szwichtenberg --- drivers/gpu/drm/i915/intel_pm.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/drivers/gpu/drm/i915/intel_pm.c b/drivers/gpu/drm/i915/intel_pm.c index e1878cb5d569..af11c4090c07 100644 --- a/drivers/gpu/drm/i915/intel_pm.c +++ b/drivers/gpu/drm/i915/intel_pm.c @@ -5105,6 +5105,11 @@ int intel_set_rps(struct drm_i915_private *dev_priv, u8 val) GEM_BUG_ON(val > dev_priv->rps.max_freq); GEM_BUG_ON(val < dev_priv->rps.min_freq); + if (!dev_priv->rps.enabled) { + dev_priv->rps.cur_freq = val; + return 0; + } + if (IS_VALLEYVIEW(dev_priv) || IS_CHERRYVIEW(dev_priv)) err = valleyview_set_rps(dev_priv, val); else -- cgit From e18b9431e46ac0deada6694e047cf80c043a26c0 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Mon, 20 Feb 2017 09:47:09 +0000 Subject: drm/i915: Remove unrequired POSTING_READ from gen6_set_rps() The uncached mmio is sufficient to queue the mmio writes without raising forcewake. The forced flush along with acquiring forcewake from the posting read is not required for adjusting the RPS frequency. Signed-off-by: Chris Wilson Link: http://patchwork.freedesktop.org/patch/msgid/20170220094713.22874-3-chris@chris-wilson.co.uk Reviewed-by: Radoslaw Szwichtenberg --- drivers/gpu/drm/i915/intel_pm.c | 2 -- 1 file changed, 2 deletions(-) diff --git a/drivers/gpu/drm/i915/intel_pm.c b/drivers/gpu/drm/i915/intel_pm.c index af11c4090c07..169c4908ad5b 100644 --- a/drivers/gpu/drm/i915/intel_pm.c +++ b/drivers/gpu/drm/i915/intel_pm.c @@ -4939,8 +4939,6 @@ static int gen6_set_rps(struct drm_i915_private *dev_priv, u8 val) I915_WRITE(GEN6_RP_INTERRUPT_LIMITS, intel_rps_limits(dev_priv, val)); I915_WRITE(GEN6_PMINTRMSK, gen6_rps_pm_mask(dev_priv, val)); - POSTING_READ(GEN6_RPNSWREQ); - dev_priv->rps.cur_freq = val; trace_intel_gpu_freq_change(intel_gpu_freq(dev_priv, val)); -- cgit From 3c1b38e63018ea717148a0a6817eec27b020212d Mon Sep 17 00:00:00 2001 From: Imre Deak Date: Fri, 17 Feb 2017 17:39:42 +0200 Subject: drm/i915: Remove redundant toggling from the power well sync_hw hooks MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Doing an explicit enable/disable in the power well sync_hw hook based on the power well's reference count is redundant, since by the time these hooks are called all the power wells are enabled and have a reference. So remove the redundant toggling. This is needed by a follow-up patchset that adds power wells which we can't enable/disable during power domain initialization and so want to preserve their state until modeset init time. Cc: Ander Conselvan de Oliveira Cc: David Weinehall Cc: Ville Syrjälä Signed-off-by: Imre Deak Reviewed-by: Ander Conselvan de Oliveira Link: http://patchwork.freedesktop.org/patch/msgid/1487345986-26511-2-git-send-email-imre.deak@intel.com --- drivers/gpu/drm/i915/intel_runtime_pm.c | 52 +++++++-------------------------- 1 file changed, 10 insertions(+), 42 deletions(-) diff --git a/drivers/gpu/drm/i915/intel_runtime_pm.c b/drivers/gpu/drm/i915/intel_runtime_pm.c index 879567987201..0f64bc12f308 100644 --- a/drivers/gpu/drm/i915/intel_runtime_pm.c +++ b/drivers/gpu/drm/i915/intel_runtime_pm.c @@ -847,8 +847,6 @@ static void skl_set_power_well(struct drm_i915_private *dev_priv, static void hsw_power_well_sync_hw(struct drm_i915_private *dev_priv, struct i915_power_well *power_well) { - hsw_set_power_well(dev_priv, power_well, power_well->count > 0); - /* * We're taking over the BIOS, so clear any requests made by it since * the driver is in charge now. @@ -881,8 +879,6 @@ static bool skl_power_well_enabled(struct drm_i915_private *dev_priv, static void skl_power_well_sync_hw(struct drm_i915_private *dev_priv, struct i915_power_well *power_well) { - skl_set_power_well(dev_priv, power_well, power_well->count > 0); - /* Clear any request made by BIOS as driver is taking over */ I915_WRITE(HSW_PWR_WELL_BIOS, 0); } @@ -917,16 +913,6 @@ static bool bxt_dpio_cmn_power_well_enabled(struct drm_i915_private *dev_priv, return bxt_ddi_phy_is_enabled(dev_priv, power_well->data); } -static void bxt_dpio_cmn_power_well_sync_hw(struct drm_i915_private *dev_priv, - struct i915_power_well *power_well) -{ - if (power_well->count > 0) - bxt_dpio_cmn_power_well_enable(dev_priv, power_well); - else - bxt_dpio_cmn_power_well_disable(dev_priv, power_well); -} - - static void bxt_verify_ddi_phy_power_wells(struct drm_i915_private *dev_priv) { struct i915_power_well *power_well; @@ -989,13 +975,9 @@ static void gen9_dc_off_power_well_disable(struct drm_i915_private *dev_priv, gen9_enable_dc5(dev_priv); } -static void gen9_dc_off_power_well_sync_hw(struct drm_i915_private *dev_priv, - struct i915_power_well *power_well) +static void i9xx_power_well_sync_hw_noop(struct drm_i915_private *dev_priv, + struct i915_power_well *power_well) { - if (power_well->count > 0) - gen9_dc_off_power_well_enable(dev_priv, power_well); - else - gen9_dc_off_power_well_disable(dev_priv, power_well); } static void i9xx_always_on_power_well_noop(struct drm_i915_private *dev_priv, @@ -1045,12 +1027,6 @@ out: mutex_unlock(&dev_priv->rps.hw_lock); } -static void vlv_power_well_sync_hw(struct drm_i915_private *dev_priv, - struct i915_power_well *power_well) -{ - vlv_set_power_well(dev_priv, power_well, power_well->count > 0); -} - static void vlv_power_well_enable(struct drm_i915_private *dev_priv, struct i915_power_well *power_well) { @@ -1661,14 +1637,6 @@ out: mutex_unlock(&dev_priv->rps.hw_lock); } -static void chv_pipe_power_well_sync_hw(struct drm_i915_private *dev_priv, - struct i915_power_well *power_well) -{ - WARN_ON_ONCE(power_well->id != PIPE_A); - - chv_set_pipe_power_well(dev_priv, power_well, power_well->count > 0); -} - static void chv_pipe_power_well_enable(struct drm_i915_private *dev_priv, struct i915_power_well *power_well) { @@ -1914,21 +1882,21 @@ void intel_display_power_put(struct drm_i915_private *dev_priv, BIT_ULL(POWER_DOMAIN_INIT)) static const struct i915_power_well_ops i9xx_always_on_power_well_ops = { - .sync_hw = i9xx_always_on_power_well_noop, + .sync_hw = i9xx_power_well_sync_hw_noop, .enable = i9xx_always_on_power_well_noop, .disable = i9xx_always_on_power_well_noop, .is_enabled = i9xx_always_on_power_well_enabled, }; static const struct i915_power_well_ops chv_pipe_power_well_ops = { - .sync_hw = chv_pipe_power_well_sync_hw, + .sync_hw = i9xx_power_well_sync_hw_noop, .enable = chv_pipe_power_well_enable, .disable = chv_pipe_power_well_disable, .is_enabled = chv_pipe_power_well_enabled, }; static const struct i915_power_well_ops chv_dpio_cmn_power_well_ops = { - .sync_hw = vlv_power_well_sync_hw, + .sync_hw = i9xx_power_well_sync_hw_noop, .enable = chv_dpio_cmn_power_well_enable, .disable = chv_dpio_cmn_power_well_disable, .is_enabled = vlv_power_well_enabled, @@ -1958,14 +1926,14 @@ static const struct i915_power_well_ops skl_power_well_ops = { }; static const struct i915_power_well_ops gen9_dc_off_power_well_ops = { - .sync_hw = gen9_dc_off_power_well_sync_hw, + .sync_hw = i9xx_power_well_sync_hw_noop, .enable = gen9_dc_off_power_well_enable, .disable = gen9_dc_off_power_well_disable, .is_enabled = gen9_dc_off_power_well_enabled, }; static const struct i915_power_well_ops bxt_dpio_cmn_power_well_ops = { - .sync_hw = bxt_dpio_cmn_power_well_sync_hw, + .sync_hw = i9xx_power_well_sync_hw_noop, .enable = bxt_dpio_cmn_power_well_enable, .disable = bxt_dpio_cmn_power_well_disable, .is_enabled = bxt_dpio_cmn_power_well_enabled, @@ -2000,21 +1968,21 @@ static struct i915_power_well bdw_power_wells[] = { }; static const struct i915_power_well_ops vlv_display_power_well_ops = { - .sync_hw = vlv_power_well_sync_hw, + .sync_hw = i9xx_power_well_sync_hw_noop, .enable = vlv_display_power_well_enable, .disable = vlv_display_power_well_disable, .is_enabled = vlv_power_well_enabled, }; static const struct i915_power_well_ops vlv_dpio_cmn_power_well_ops = { - .sync_hw = vlv_power_well_sync_hw, + .sync_hw = i9xx_power_well_sync_hw_noop, .enable = vlv_dpio_cmn_power_well_enable, .disable = vlv_dpio_cmn_power_well_disable, .is_enabled = vlv_power_well_enabled, }; static const struct i915_power_well_ops vlv_dpio_power_well_ops = { - .sync_hw = vlv_power_well_sync_hw, + .sync_hw = i9xx_power_well_sync_hw_noop, .enable = vlv_power_well_enable, .disable = vlv_power_well_disable, .is_enabled = vlv_power_well_enabled, -- cgit From 75ccb2ecb8fc8cfcc676afd24ccad4777f130d58 Mon Sep 17 00:00:00 2001 From: Imre Deak Date: Fri, 17 Feb 2017 17:39:43 +0200 Subject: drm/i915: Call the sync_hw hook for power wells without a domain MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit So far the sync_hw hook wasn't called for power wells not belonging to any power domain, that is the GEN9 PW1 and MISC_IO power wells. This wasn't a problem so far since the goal of the sync_hw hook - to clear the corresponding BIOS request bit - was guaranteed by clearing the whole BIOS request register elsewhere. This will change with the next patch, so fix up the inconsistency. While at it clean up the power well iterator helpers and move them to the rest of iterators. v2: - Clean up the power well iterator helpers. (Ander) - Move the helpers to i915_drv.h. Cc: Ander Conselvan de Oliveira Cc: David Weinehall Cc: Ville Syrjälä Signed-off-by: Imre Deak Reviewed-by: Ander Conselvan de Oliveira Link: http://patchwork.freedesktop.org/patch/msgid/1487345986-26511-3-git-send-email-imre.deak@intel.com --- drivers/gpu/drm/i915/i915_drv.h | 20 ++++++++++++++++++++ drivers/gpu/drm/i915/intel_runtime_pm.c | 28 ++++------------------------ 2 files changed, 24 insertions(+), 24 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index 2a7b277a86de..1fe4010d4f2b 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -500,6 +500,26 @@ struct i915_hotplug { for ((domain) = 0; (domain) < POWER_DOMAIN_NUM; (domain)++) \ for_each_if (BIT_ULL(domain) & (mask)) +#define for_each_power_well(__dev_priv, __power_well) \ + for ((__power_well) = (__dev_priv)->power_domains.power_wells; \ + (__power_well) - (__dev_priv)->power_domains.power_wells < \ + (__dev_priv)->power_domains.power_well_count; \ + (__power_well)++) + +#define for_each_power_well_rev(__dev_priv, __power_well) \ + for ((__power_well) = (__dev_priv)->power_domains.power_wells + \ + (__dev_priv)->power_domains.power_well_count - 1; \ + (__power_well) - (__dev_priv)->power_domains.power_wells >= 0; \ + (__power_well)--) + +#define for_each_power_domain_well(__dev_priv, __power_well, __domain_mask) \ + for_each_power_well(__dev_priv, __power_well) \ + for_each_if ((__power_well)->domains & (__domain_mask)) + +#define for_each_power_domain_well_rev(__dev_priv, __power_well, __domain_mask) \ + for_each_power_well_rev(__dev_priv, __power_well) \ + for_each_if ((__power_well)->domains & (__domain_mask)) + struct drm_i915_private; struct i915_mm_struct; struct i915_mmu_object; diff --git a/drivers/gpu/drm/i915/intel_runtime_pm.c b/drivers/gpu/drm/i915/intel_runtime_pm.c index 0f64bc12f308..9bbbdbc1c843 100644 --- a/drivers/gpu/drm/i915/intel_runtime_pm.c +++ b/drivers/gpu/drm/i915/intel_runtime_pm.c @@ -49,19 +49,6 @@ * present for a given platform. */ -#define for_each_power_well(i, power_well, domain_mask, power_domains) \ - for (i = 0; \ - i < (power_domains)->power_well_count && \ - ((power_well) = &(power_domains)->power_wells[i]); \ - i++) \ - for_each_if ((power_well)->domains & (domain_mask)) - -#define for_each_power_well_rev(i, power_well, domain_mask, power_domains) \ - for (i = (power_domains)->power_well_count - 1; \ - i >= 0 && ((power_well) = &(power_domains)->power_wells[i]);\ - i--) \ - for_each_if ((power_well)->domains & (domain_mask)) - bool intel_display_power_well_is_enabled(struct drm_i915_private *dev_priv, int power_well_id); @@ -198,19 +185,15 @@ static bool hsw_power_well_enabled(struct drm_i915_private *dev_priv, bool __intel_display_power_is_enabled(struct drm_i915_private *dev_priv, enum intel_display_power_domain domain) { - struct i915_power_domains *power_domains; struct i915_power_well *power_well; bool is_enabled; - int i; if (dev_priv->pm.suspended) return false; - power_domains = &dev_priv->power_domains; - is_enabled = true; - for_each_power_well_rev(i, power_well, BIT_ULL(domain), power_domains) { + for_each_power_domain_well_rev(dev_priv, power_well, BIT_ULL(domain)) { if (power_well->always_on) continue; @@ -1663,9 +1646,8 @@ __intel_display_power_get_domain(struct drm_i915_private *dev_priv, { struct i915_power_domains *power_domains = &dev_priv->power_domains; struct i915_power_well *power_well; - int i; - for_each_power_well(i, power_well, BIT_ULL(domain), power_domains) + for_each_power_domain_well(dev_priv, power_well, BIT_ULL(domain)) intel_power_well_get(dev_priv, power_well); power_domains->domain_use_count[domain]++; @@ -1749,7 +1731,6 @@ void intel_display_power_put(struct drm_i915_private *dev_priv, { struct i915_power_domains *power_domains; struct i915_power_well *power_well; - int i; power_domains = &dev_priv->power_domains; @@ -1760,7 +1741,7 @@ void intel_display_power_put(struct drm_i915_private *dev_priv, intel_display_power_domain_str(domain)); power_domains->domain_use_count[domain]--; - for_each_power_well_rev(i, power_well, BIT_ULL(domain), power_domains) + for_each_power_domain_well_rev(dev_priv, power_well, BIT_ULL(domain)) intel_power_well_put(dev_priv, power_well); mutex_unlock(&power_domains->lock); @@ -2424,10 +2405,9 @@ static void intel_power_domains_sync_hw(struct drm_i915_private *dev_priv) { struct i915_power_domains *power_domains = &dev_priv->power_domains; struct i915_power_well *power_well; - int i; mutex_lock(&power_domains->lock); - for_each_power_well(i, power_well, POWER_DOMAIN_MASK, power_domains) { + for_each_power_well(dev_priv, power_well) { power_well->ops->sync_hw(dev_priv, power_well); power_well->hw_enabled = power_well->ops->is_enabled(dev_priv, power_well); -- cgit From 14544e134905b4276dc72122ad8c09965bbc7f41 Mon Sep 17 00:00:00 2001 From: Imre Deak Date: Fri, 17 Feb 2017 17:39:44 +0200 Subject: drm/i915/gen9: Fix clearing of the BIOS power well request register MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Atm, in the power well sync_hw hook we are clearing all BIOS request bits, not just the one corresponding to the given power well. This could turn off an unrelated power well inadvertently if it didn't have a request bit set in the driver request register. This didn't cause a problem so far, since we enabled all power wells explicitly before clearing the BIOS request register. A follow-up patchset will add power wells that won't get enabled this way, so fix up the inconsistency. Note that this patch only makes the clearing of the BIOS req register more logical. Power wells without a reference would still get disabled by the end of power domain initialization, that is fixed by the next patch. v2: - Clarify in the commit log that this patch doesn't address the case of power wells without a reference. (Ander) Cc: Ander Conselvan de Oliveira Cc: David Weinehall Cc: Ville Syrjälä Signed-off-by: Imre Deak Reviewed-by: Ander Conselvan de Oliveira Link: http://patchwork.freedesktop.org/patch/msgid/1487345986-26511-4-git-send-email-imre.deak@intel.com --- drivers/gpu/drm/i915/intel_runtime_pm.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/i915/intel_runtime_pm.c b/drivers/gpu/drm/i915/intel_runtime_pm.c index 9bbbdbc1c843..62c99a96c8e7 100644 --- a/drivers/gpu/drm/i915/intel_runtime_pm.c +++ b/drivers/gpu/drm/i915/intel_runtime_pm.c @@ -862,8 +862,13 @@ static bool skl_power_well_enabled(struct drm_i915_private *dev_priv, static void skl_power_well_sync_hw(struct drm_i915_private *dev_priv, struct i915_power_well *power_well) { + uint32_t mask = SKL_POWER_WELL_REQ(power_well->id); + uint32_t bios_req = I915_READ(HSW_PWR_WELL_BIOS); + /* Clear any request made by BIOS as driver is taking over */ - I915_WRITE(HSW_PWR_WELL_BIOS, 0); + if (bios_req & mask) { + I915_WRITE(HSW_PWR_WELL_BIOS, bios_req & ~mask); + } } static void skl_power_well_enable(struct drm_i915_private *dev_priv, -- cgit From 16e849145dc2ba9d9fc2027c530fc0a682d5ae61 Mon Sep 17 00:00:00 2001 From: Imre Deak Date: Fri, 17 Feb 2017 17:39:45 +0200 Subject: drm/i915: Preserve the state of power wells not explicitly enabled MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Atm, power wells that BIOS has enabled, but which we don't explicitly enable during power domain initialization would get disabled as we clear the BIOS request bit in the given power well sync_hw hook. To prevent this copy over any set request bits in the BIOS request register to the driver request register and clear the BIOS request bit only afterwards. This doesn't make a difference now, since we enable all power wells during power domain initialization. A follow-up patchset will add power wells for which this isn't true, so fix up the inconsistency. Cc: Ander Conselvan de Oliveira Cc: David Weinehall Cc: Ville Syrjälä Signed-off-by: Imre Deak Reviewed-by: Ander Conselvan de Oliveira Link: http://patchwork.freedesktop.org/patch/msgid/1487345986-26511-5-git-send-email-imre.deak@intel.com --- drivers/gpu/drm/i915/intel_runtime_pm.c | 18 ++++++++++++------ 1 file changed, 12 insertions(+), 6 deletions(-) diff --git a/drivers/gpu/drm/i915/intel_runtime_pm.c b/drivers/gpu/drm/i915/intel_runtime_pm.c index 62c99a96c8e7..44d4da3ad7a1 100644 --- a/drivers/gpu/drm/i915/intel_runtime_pm.c +++ b/drivers/gpu/drm/i915/intel_runtime_pm.c @@ -830,12 +830,14 @@ static void skl_set_power_well(struct drm_i915_private *dev_priv, static void hsw_power_well_sync_hw(struct drm_i915_private *dev_priv, struct i915_power_well *power_well) { - /* - * We're taking over the BIOS, so clear any requests made by it since - * the driver is in charge now. - */ - if (I915_READ(HSW_PWR_WELL_BIOS) & HSW_PWR_WELL_ENABLE_REQUEST) + /* Take over the request bit if set by BIOS. */ + if (I915_READ(HSW_PWR_WELL_BIOS) & HSW_PWR_WELL_ENABLE_REQUEST) { + if (!(I915_READ(HSW_PWR_WELL_DRIVER) & + HSW_PWR_WELL_ENABLE_REQUEST)) + I915_WRITE(HSW_PWR_WELL_DRIVER, + HSW_PWR_WELL_ENABLE_REQUEST); I915_WRITE(HSW_PWR_WELL_BIOS, 0); + } } static void hsw_power_well_enable(struct drm_i915_private *dev_priv, @@ -865,8 +867,12 @@ static void skl_power_well_sync_hw(struct drm_i915_private *dev_priv, uint32_t mask = SKL_POWER_WELL_REQ(power_well->id); uint32_t bios_req = I915_READ(HSW_PWR_WELL_BIOS); - /* Clear any request made by BIOS as driver is taking over */ + /* Take over the request bit if set by BIOS. */ if (bios_req & mask) { + uint32_t drv_req = I915_READ(HSW_PWR_WELL_DRIVER); + + if (!(drv_req & mask)) + I915_WRITE(HSW_PWR_WELL_DRIVER, drv_req | mask); I915_WRITE(HSW_PWR_WELL_BIOS, bios_req & ~mask); } } -- cgit From 8d8c386c38692c1183b2c0ecffb84de91b8b32b0 Mon Sep 17 00:00:00 2001 From: Imre Deak Date: Fri, 17 Feb 2017 17:39:46 +0200 Subject: drm/i915: Add power well SW/HW state verification MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Verify that the refcount of all power wells match their HW enabled state at the end of modeset HW state readout. Also add documentation on how the reference count for each power well is supposed to be acquired during initialization and HW state readout. Suggested by Ander. Cc: Ander Conselvan de Oliveira Cc: David Weinehall Cc: Ville Syrjälä Signed-off-by: Imre Deak Reviewed-by: Ander Conselvan de Oliveira Link: http://patchwork.freedesktop.org/patch/msgid/1487345986-26511-6-git-send-email-imre.deak@intel.com --- drivers/gpu/drm/i915/intel_display.c | 2 + drivers/gpu/drm/i915/intel_drv.h | 1 + drivers/gpu/drm/i915/intel_runtime_pm.c | 85 ++++++++++++++++++++++++++++++++- 3 files changed, 87 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c index b60373812d34..e1e828a8c524 100644 --- a/drivers/gpu/drm/i915/intel_display.c +++ b/drivers/gpu/drm/i915/intel_display.c @@ -15540,6 +15540,8 @@ intel_modeset_setup_hw_state(struct drm_device *dev) } intel_display_set_init_power(dev_priv, false); + intel_power_domains_verify_state(dev_priv); + intel_fbc_init_pipe_state(dev_priv); } diff --git a/drivers/gpu/drm/i915/intel_drv.h b/drivers/gpu/drm/i915/intel_drv.h index 7e27aef0849b..11f2e4163bd4 100644 --- a/drivers/gpu/drm/i915/intel_drv.h +++ b/drivers/gpu/drm/i915/intel_drv.h @@ -1693,6 +1693,7 @@ int intel_power_domains_init(struct drm_i915_private *); void intel_power_domains_fini(struct drm_i915_private *); void intel_power_domains_init_hw(struct drm_i915_private *dev_priv, bool resume); void intel_power_domains_suspend(struct drm_i915_private *dev_priv); +void intel_power_domains_verify_state(struct drm_i915_private *dev_priv); void bxt_display_core_init(struct drm_i915_private *dev_priv, bool resume); void bxt_display_core_uninit(struct drm_i915_private *dev_priv); void intel_runtime_pm_enable(struct drm_i915_private *dev_priv); diff --git a/drivers/gpu/drm/i915/intel_runtime_pm.c b/drivers/gpu/drm/i915/intel_runtime_pm.c index 44d4da3ad7a1..6b52258152b7 100644 --- a/drivers/gpu/drm/i915/intel_runtime_pm.c +++ b/drivers/gpu/drm/i915/intel_runtime_pm.c @@ -2683,7 +2683,10 @@ static void vlv_cmnlane_wa(struct drm_i915_private *dev_priv) * @resume: Called from resume code paths or not * * This function initializes the hardware power domain state and enables all - * power domains using intel_display_set_init_power(). + * power wells belonging to the INIT power domain. Power wells in other + * domains (and not in the INIT domain) are referenced or disabled during the + * modeset state HW readout. After that the reference count of each power well + * must match its HW enabled state, see intel_power_domains_verify_state(). */ void intel_power_domains_init_hw(struct drm_i915_private *dev_priv, bool resume) { @@ -2736,6 +2739,86 @@ void intel_power_domains_suspend(struct drm_i915_private *dev_priv) bxt_display_core_uninit(dev_priv); } +static void intel_power_domains_dump_info(struct drm_i915_private *dev_priv) +{ + struct i915_power_domains *power_domains = &dev_priv->power_domains; + struct i915_power_well *power_well; + + for_each_power_well(dev_priv, power_well) { + enum intel_display_power_domain domain; + + DRM_DEBUG_DRIVER("%-25s %d\n", + power_well->name, power_well->count); + + for_each_power_domain(domain, power_well->domains) + DRM_DEBUG_DRIVER(" %-23s %d\n", + intel_display_power_domain_str(domain), + power_domains->domain_use_count[domain]); + } +} + +/** + * intel_power_domains_verify_state - verify the HW/SW state for all power wells + * @dev_priv: i915 device instance + * + * Verify if the reference count of each power well matches its HW enabled + * state and the total refcount of the domains it belongs to. This must be + * called after modeset HW state sanitization, which is responsible for + * acquiring reference counts for any power wells in use and disabling the + * ones left on by BIOS but not required by any active output. + */ +void intel_power_domains_verify_state(struct drm_i915_private *dev_priv) +{ + struct i915_power_domains *power_domains = &dev_priv->power_domains; + struct i915_power_well *power_well; + bool dump_domain_info; + + mutex_lock(&power_domains->lock); + + dump_domain_info = false; + for_each_power_well(dev_priv, power_well) { + enum intel_display_power_domain domain; + int domains_count; + bool enabled; + + /* + * Power wells not belonging to any domain (like the MISC_IO + * and PW1 power wells) are under FW control, so ignore them, + * since their state can change asynchronously. + */ + if (!power_well->domains) + continue; + + enabled = power_well->ops->is_enabled(dev_priv, power_well); + if ((power_well->count || power_well->always_on) != enabled) + DRM_ERROR("power well %s state mismatch (refcount %d/enabled %d)", + power_well->name, power_well->count, enabled); + + domains_count = 0; + for_each_power_domain(domain, power_well->domains) + domains_count += power_domains->domain_use_count[domain]; + + if (power_well->count != domains_count) { + DRM_ERROR("power well %s refcount/domain refcount mismatch " + "(refcount %d/domains refcount %d)\n", + power_well->name, power_well->count, + domains_count); + dump_domain_info = true; + } + } + + if (dump_domain_info) { + static bool dumped; + + if (!dumped) { + intel_power_domains_dump_info(dev_priv); + dumped = true; + } + } + + mutex_unlock(&power_domains->lock); +} + /** * intel_runtime_pm_get - grab a runtime pm reference * @dev_priv: i915 device instance -- cgit From 944a36d472be642d0d082d2480fe2b40046602a9 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Fri, 17 Feb 2017 16:38:33 +0000 Subject: drm/i915: Assert that the request->tail is always qword aligned MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The hardware requires that the tail pointer only advance in qword units, so assert that the value we write is aligned to qwords, and similarly enforce this restriction onto the request->tail. Signed-off-by: Chris Wilson Cc: Tvrtko Ursulin Cc: Mika Kuoppala Link: http://patchwork.freedesktop.org/patch/msgid/20170217163833.731-1-chris@chris-wilson.co.uk Reviewed-by: Michał Winiarski --- drivers/gpu/drm/i915/intel_lrc.c | 4 ++++ drivers/gpu/drm/i915/intel_ringbuffer.c | 3 +++ 2 files changed, 7 insertions(+) diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c index afcb1f189162..d6b67be4715d 100644 --- a/drivers/gpu/drm/i915/intel_lrc.c +++ b/drivers/gpu/drm/i915/intel_lrc.c @@ -325,6 +325,7 @@ static u64 execlists_update_context(struct drm_i915_gem_request *rq) rq->ctx->ppgtt ?: rq->i915->mm.aliasing_ppgtt; u32 *reg_state = ce->lrc_reg_state; + GEM_BUG_ON(!IS_ALIGNED(rq->tail, 8)); reg_state[CTX_RING_TAIL+1] = rq->tail; /* True 32b PPGTT with dynamic page allocation: update PDP @@ -1283,6 +1284,7 @@ static void reset_common_ring(struct intel_engine_cs *engine, /* Reset WaIdleLiteRestore:bdw,skl as well */ request->tail = request->wa_tail - WA_TAIL_DWORDS * sizeof(u32); + GEM_BUG_ON(!IS_ALIGNED(request->tail, 8)); } static int intel_logical_ring_emit_pdps(struct drm_i915_gem_request *req) @@ -1494,6 +1496,7 @@ static void gen8_emit_breadcrumb(struct drm_i915_gem_request *request, u32 *cs) *cs++ = MI_USER_INTERRUPT; *cs++ = MI_NOOP; request->tail = intel_ring_offset(request, cs); + GEM_BUG_ON(!IS_ALIGNED(request->tail, 8)); gen8_emit_wa_tail(request, cs); } @@ -1521,6 +1524,7 @@ static void gen8_emit_breadcrumb_render(struct drm_i915_gem_request *request, *cs++ = MI_USER_INTERRUPT; *cs++ = MI_NOOP; request->tail = intel_ring_offset(request, cs); + GEM_BUG_ON(!IS_ALIGNED(request->tail, 8)); gen8_emit_wa_tail(request, cs); } diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c b/drivers/gpu/drm/i915/intel_ringbuffer.c index d56f384938f7..f62afffef682 100644 --- a/drivers/gpu/drm/i915/intel_ringbuffer.c +++ b/drivers/gpu/drm/i915/intel_ringbuffer.c @@ -784,6 +784,7 @@ static void i9xx_submit_request(struct drm_i915_gem_request *request) i915_gem_request_submit(request); + GEM_BUG_ON(!IS_ALIGNED(request->tail, 8)); I915_WRITE_TAIL(request->engine, request->tail); } @@ -795,6 +796,7 @@ static void i9xx_emit_breadcrumb(struct drm_i915_gem_request *req, u32 *cs) *cs++ = MI_USER_INTERRUPT; req->tail = intel_ring_offset(req, cs); + GEM_BUG_ON(!IS_ALIGNED(req->tail, 8)); } static const int i9xx_emit_breadcrumb_sz = 4; @@ -833,6 +835,7 @@ static void gen8_render_emit_breadcrumb(struct drm_i915_gem_request *req, *cs++ = MI_NOOP; req->tail = intel_ring_offset(req, cs); + GEM_BUG_ON(!IS_ALIGNED(req->tail, 8)); } static const int gen8_render_emit_breadcrumb_sz = 8; -- cgit From e2989f140e707f31afa5fa617512961b3f692aca Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Tue, 21 Feb 2017 09:17:23 +0000 Subject: drm/i915: Use reservation_object_lock() Replace the calls to ww_mutex_lock(&resv->lock) with the helper reservation_object_lock(resv) and similarly for unlock. Signed-off-by: Chris Wilson Link: http://patchwork.freedesktop.org/patch/msgid/20170221091723.6219-1-chris@chris-wilson.co.uk Reviewed-by: Joonas Lahtinen --- drivers/gpu/drm/i915/i915_gem_batch_pool.c | 4 ++-- drivers/gpu/drm/i915/i915_gem_execbuffer.c | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_gem_batch_pool.c b/drivers/gpu/drm/i915/i915_gem_batch_pool.c index b3bc119ec1bb..99ceae7855f8 100644 --- a/drivers/gpu/drm/i915/i915_gem_batch_pool.c +++ b/drivers/gpu/drm/i915/i915_gem_batch_pool.c @@ -122,9 +122,9 @@ i915_gem_batch_pool_get(struct i915_gem_batch_pool *pool, if (tmp->base.size >= size) { /* Clear the set of shared fences early */ - ww_mutex_lock(&tmp->resv->lock, NULL); + reservation_object_lock(tmp->resv, NULL); reservation_object_add_excl_fence(tmp->resv, NULL); - ww_mutex_unlock(&tmp->resv->lock); + reservation_object_unlock(tmp->resv); obj = tmp; break; diff --git a/drivers/gpu/drm/i915/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/i915_gem_execbuffer.c index da0846fe2ad6..baf9039774d7 100644 --- a/drivers/gpu/drm/i915/i915_gem_execbuffer.c +++ b/drivers/gpu/drm/i915/i915_gem_execbuffer.c @@ -1301,12 +1301,12 @@ static void eb_export_fence(struct drm_i915_gem_object *obj, * handle an error right now. Worst case should be missed * synchronisation leading to rendering corruption. */ - ww_mutex_lock(&resv->lock, NULL); + reservation_object_lock(resv, NULL); if (flags & EXEC_OBJECT_WRITE) reservation_object_add_excl_fence(resv, &req->fence); else if (reservation_object_reserve_shared(resv) == 0) reservation_object_add_shared_fence(resv, &req->fence); - ww_mutex_unlock(&resv->lock); + reservation_object_unlock(resv); } static void -- cgit From 56e51bf03666728d4eb1d8319215865c8dbf4338 Mon Sep 17 00:00:00 2001 From: Tvrtko Ursulin Date: Tue, 21 Feb 2017 09:58:39 +0000 Subject: drm/i915: Tidy execlists_init_reg_state Compact the name of the macro and reg_state variable, and cache some data in local variables to make the function more compact and more readable. v2: Fixup some checkpatch warnings. Signed-off-by: Tvrtko Ursulin Reviewed-by: Chris Wilson Link: http://patchwork.freedesktop.org/patch/msgid/20170221095839.30525-1-tvrtko.ursulin@linux.intel.com --- drivers/gpu/drm/i915/intel_lrc.c | 127 +++++++++++++++++---------------------- 1 file changed, 56 insertions(+), 71 deletions(-) diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c index d6b67be4715d..5553f551a1bf 100644 --- a/drivers/gpu/drm/i915/intel_lrc.c +++ b/drivers/gpu/drm/i915/intel_lrc.c @@ -190,7 +190,7 @@ #define CTX_R_PWR_CLK_STATE 0x42 #define CTX_GPGPU_CSR_BASE_ADDRESS 0x44 -#define ASSIGN_CTX_REG(reg_state, pos, reg, val) do { \ +#define CTX_REG(reg_state, pos, reg, val) do { \ (reg_state)[(pos)+0] = i915_mmio_reg_offset(reg); \ (reg_state)[(pos)+1] = (val); \ } while (0) @@ -1812,104 +1812,89 @@ static u32 intel_lr_indirect_ctx_offset(struct intel_engine_cs *engine) return indirect_ctx_offset; } -static void execlists_init_reg_state(u32 *reg_state, +static void execlists_init_reg_state(u32 *regs, struct i915_gem_context *ctx, struct intel_engine_cs *engine, struct intel_ring *ring) { struct drm_i915_private *dev_priv = engine->i915; struct i915_hw_ppgtt *ppgtt = ctx->ppgtt ?: dev_priv->mm.aliasing_ppgtt; + u32 base = engine->mmio_base; + bool rcs = engine->id == RCS; + + /* A context is actually a big batch buffer with several + * MI_LOAD_REGISTER_IMM commands followed by (reg, value) pairs. The + * values we are setting here are only for the first context restore: + * on a subsequent save, the GPU will recreate this batchbuffer with new + * values (including all the missing MI_LOAD_REGISTER_IMM commands that + * we are not initializing here). + */ + regs[CTX_LRI_HEADER_0] = MI_LOAD_REGISTER_IMM(rcs ? 14 : 11) | + MI_LRI_FORCE_POSTED; + + CTX_REG(regs, CTX_CONTEXT_CONTROL, RING_CONTEXT_CONTROL(engine), + _MASKED_BIT_ENABLE(CTX_CTRL_INHIBIT_SYN_CTX_SWITCH | + CTX_CTRL_ENGINE_CTX_RESTORE_INHIBIT | + (HAS_RESOURCE_STREAMER(dev_priv) ? + CTX_CTRL_RS_CTX_ENABLE : 0))); + CTX_REG(regs, CTX_RING_HEAD, RING_HEAD(base), 0); + CTX_REG(regs, CTX_RING_TAIL, RING_TAIL(base), 0); + CTX_REG(regs, CTX_RING_BUFFER_START, RING_START(base), 0); + CTX_REG(regs, CTX_RING_BUFFER_CONTROL, RING_CTL(base), + RING_CTL_SIZE(ring->size) | RING_VALID); + CTX_REG(regs, CTX_BB_HEAD_U, RING_BBADDR_UDW(base), 0); + CTX_REG(regs, CTX_BB_HEAD_L, RING_BBADDR(base), 0); + CTX_REG(regs, CTX_BB_STATE, RING_BBSTATE(base), RING_BB_PPGTT); + CTX_REG(regs, CTX_SECOND_BB_HEAD_U, RING_SBBADDR_UDW(base), 0); + CTX_REG(regs, CTX_SECOND_BB_HEAD_L, RING_SBBADDR(base), 0); + CTX_REG(regs, CTX_SECOND_BB_STATE, RING_SBBSTATE(base), 0); + if (rcs) { + CTX_REG(regs, CTX_BB_PER_CTX_PTR, RING_BB_PER_CTX_PTR(base), 0); + CTX_REG(regs, CTX_RCS_INDIRECT_CTX, RING_INDIRECT_CTX(base), 0); + CTX_REG(regs, CTX_RCS_INDIRECT_CTX_OFFSET, + RING_INDIRECT_CTX_OFFSET(base), 0); - /* A context is actually a big batch buffer with several MI_LOAD_REGISTER_IMM - * commands followed by (reg, value) pairs. The values we are setting here are - * only for the first context restore: on a subsequent save, the GPU will - * recreate this batchbuffer with new values (including all the missing - * MI_LOAD_REGISTER_IMM commands that we are not initializing here). */ - reg_state[CTX_LRI_HEADER_0] = - MI_LOAD_REGISTER_IMM(engine->id == RCS ? 14 : 11) | MI_LRI_FORCE_POSTED; - ASSIGN_CTX_REG(reg_state, CTX_CONTEXT_CONTROL, - RING_CONTEXT_CONTROL(engine), - _MASKED_BIT_ENABLE(CTX_CTRL_INHIBIT_SYN_CTX_SWITCH | - CTX_CTRL_ENGINE_CTX_RESTORE_INHIBIT | - (HAS_RESOURCE_STREAMER(dev_priv) ? - CTX_CTRL_RS_CTX_ENABLE : 0))); - ASSIGN_CTX_REG(reg_state, CTX_RING_HEAD, RING_HEAD(engine->mmio_base), - 0); - ASSIGN_CTX_REG(reg_state, CTX_RING_TAIL, RING_TAIL(engine->mmio_base), - 0); - ASSIGN_CTX_REG(reg_state, CTX_RING_BUFFER_START, - RING_START(engine->mmio_base), 0); - ASSIGN_CTX_REG(reg_state, CTX_RING_BUFFER_CONTROL, - RING_CTL(engine->mmio_base), - RING_CTL_SIZE(ring->size) | RING_VALID); - ASSIGN_CTX_REG(reg_state, CTX_BB_HEAD_U, - RING_BBADDR_UDW(engine->mmio_base), 0); - ASSIGN_CTX_REG(reg_state, CTX_BB_HEAD_L, - RING_BBADDR(engine->mmio_base), 0); - ASSIGN_CTX_REG(reg_state, CTX_BB_STATE, - RING_BBSTATE(engine->mmio_base), - RING_BB_PPGTT); - ASSIGN_CTX_REG(reg_state, CTX_SECOND_BB_HEAD_U, - RING_SBBADDR_UDW(engine->mmio_base), 0); - ASSIGN_CTX_REG(reg_state, CTX_SECOND_BB_HEAD_L, - RING_SBBADDR(engine->mmio_base), 0); - ASSIGN_CTX_REG(reg_state, CTX_SECOND_BB_STATE, - RING_SBBSTATE(engine->mmio_base), 0); - if (engine->id == RCS) { - ASSIGN_CTX_REG(reg_state, CTX_BB_PER_CTX_PTR, - RING_BB_PER_CTX_PTR(engine->mmio_base), 0); - ASSIGN_CTX_REG(reg_state, CTX_RCS_INDIRECT_CTX, - RING_INDIRECT_CTX(engine->mmio_base), 0); - ASSIGN_CTX_REG(reg_state, CTX_RCS_INDIRECT_CTX_OFFSET, - RING_INDIRECT_CTX_OFFSET(engine->mmio_base), 0); if (engine->wa_ctx.vma) { struct i915_ctx_workarounds *wa_ctx = &engine->wa_ctx; u32 ggtt_offset = i915_ggtt_offset(wa_ctx->vma); - reg_state[CTX_RCS_INDIRECT_CTX+1] = + regs[CTX_RCS_INDIRECT_CTX + 1] = (ggtt_offset + wa_ctx->indirect_ctx.offset) | (wa_ctx->indirect_ctx.size / CACHELINE_BYTES); - reg_state[CTX_RCS_INDIRECT_CTX_OFFSET+1] = + regs[CTX_RCS_INDIRECT_CTX_OFFSET + 1] = intel_lr_indirect_ctx_offset(engine) << 6; - reg_state[CTX_BB_PER_CTX_PTR+1] = + regs[CTX_BB_PER_CTX_PTR + 1] = (ggtt_offset + wa_ctx->per_ctx.offset) | 0x01; } } - reg_state[CTX_LRI_HEADER_1] = MI_LOAD_REGISTER_IMM(9) | MI_LRI_FORCE_POSTED; - ASSIGN_CTX_REG(reg_state, CTX_CTX_TIMESTAMP, - RING_CTX_TIMESTAMP(engine->mmio_base), 0); + + regs[CTX_LRI_HEADER_1] = MI_LOAD_REGISTER_IMM(9) | MI_LRI_FORCE_POSTED; + + CTX_REG(regs, CTX_CTX_TIMESTAMP, RING_CTX_TIMESTAMP(base), 0); /* PDP values well be assigned later if needed */ - ASSIGN_CTX_REG(reg_state, CTX_PDP3_UDW, GEN8_RING_PDP_UDW(engine, 3), - 0); - ASSIGN_CTX_REG(reg_state, CTX_PDP3_LDW, GEN8_RING_PDP_LDW(engine, 3), - 0); - ASSIGN_CTX_REG(reg_state, CTX_PDP2_UDW, GEN8_RING_PDP_UDW(engine, 2), - 0); - ASSIGN_CTX_REG(reg_state, CTX_PDP2_LDW, GEN8_RING_PDP_LDW(engine, 2), - 0); - ASSIGN_CTX_REG(reg_state, CTX_PDP1_UDW, GEN8_RING_PDP_UDW(engine, 1), - 0); - ASSIGN_CTX_REG(reg_state, CTX_PDP1_LDW, GEN8_RING_PDP_LDW(engine, 1), - 0); - ASSIGN_CTX_REG(reg_state, CTX_PDP0_UDW, GEN8_RING_PDP_UDW(engine, 0), - 0); - ASSIGN_CTX_REG(reg_state, CTX_PDP0_LDW, GEN8_RING_PDP_LDW(engine, 0), - 0); + CTX_REG(regs, CTX_PDP3_UDW, GEN8_RING_PDP_UDW(engine, 3), 0); + CTX_REG(regs, CTX_PDP3_LDW, GEN8_RING_PDP_LDW(engine, 3), 0); + CTX_REG(regs, CTX_PDP2_UDW, GEN8_RING_PDP_UDW(engine, 2), 0); + CTX_REG(regs, CTX_PDP2_LDW, GEN8_RING_PDP_LDW(engine, 2), 0); + CTX_REG(regs, CTX_PDP1_UDW, GEN8_RING_PDP_UDW(engine, 1), 0); + CTX_REG(regs, CTX_PDP1_LDW, GEN8_RING_PDP_LDW(engine, 1), 0); + CTX_REG(regs, CTX_PDP0_UDW, GEN8_RING_PDP_UDW(engine, 0), 0); + CTX_REG(regs, CTX_PDP0_LDW, GEN8_RING_PDP_LDW(engine, 0), 0); if (ppgtt && i915_vm_is_48bit(&ppgtt->base)) { /* 64b PPGTT (48bit canonical) * PDP0_DESCRIPTOR contains the base address to PML4 and * other PDP Descriptors are ignored. */ - ASSIGN_CTX_PML4(ppgtt, reg_state); + ASSIGN_CTX_PML4(ppgtt, regs); } - if (engine->id == RCS) { - reg_state[CTX_LRI_HEADER_2] = MI_LOAD_REGISTER_IMM(1); - ASSIGN_CTX_REG(reg_state, CTX_R_PWR_CLK_STATE, GEN8_R_PWR_CLK_STATE, - make_rpcs(dev_priv)); + if (rcs) { + regs[CTX_LRI_HEADER_2] = MI_LOAD_REGISTER_IMM(1); + CTX_REG(regs, CTX_R_PWR_CLK_STATE, GEN8_R_PWR_CLK_STATE, + make_rpcs(dev_priv)); } } -- cgit From e235b53019a3d1225601d535795f9a4f7f44587b Mon Sep 17 00:00:00 2001 From: Tvrtko Ursulin Date: Tue, 21 Feb 2017 09:13:43 +0000 Subject: drm/i915/tracepoints: Tidy request event class At the moment only the global seqno is logged which is not set until the request is ready for submission. Add the per-contex seqno and the context hardware id which are both interesting data points. Signed-off-by: Tvrtko Ursulin Reviewed-by: Chris Wilson --- drivers/gpu/drm/i915/i915_trace.h | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_trace.h b/drivers/gpu/drm/i915/i915_trace.h index 7a547cdfc381..4454872d487f 100644 --- a/drivers/gpu/drm/i915/i915_trace.h +++ b/drivers/gpu/drm/i915/i915_trace.h @@ -451,18 +451,23 @@ DECLARE_EVENT_CLASS(i915_gem_request, TP_STRUCT__entry( __field(u32, dev) + __field(u32, ctx) __field(u32, ring) __field(u32, seqno) + __field(u32, global) ), TP_fast_assign( __entry->dev = req->i915->drm.primary->index; + __entry->ctx = req->ctx->hw_id; __entry->ring = req->engine->id; - __entry->seqno = req->global_seqno; + __entry->seqno = req->fence.seqno; + __entry->global = req->global_seqno; ), - TP_printk("dev=%u, ring=%u, seqno=%u", - __entry->dev, __entry->ring, __entry->seqno) + TP_printk("dev=%u, ring=%u, ctx=%u, seqno=%u, global=%u", + __entry->dev, __entry->ring, __entry->ctx, __entry->seqno, + __entry->global) ); DEFINE_EVENT(i915_gem_request, i915_gem_request_add, -- cgit From 1cce8922df06ca662da9dff817cae7f3aca4276c Mon Sep 17 00:00:00 2001 From: Tvrtko Ursulin Date: Tue, 21 Feb 2017 09:13:44 +0000 Subject: drm/i915/tracepoints: Adjust i915_gem_ring_dispatch Rename it to i915_gem_request_queue and fix the logged info equivalent to the i915_gem_request even class. Also moved it a bit further apart from the i915_gem_request_add tracepoint since they otherwise provide similar information too close in time. v2: Remove sw fence singalling. We will rely on the soon to come GuC scheduling backend to enable that. (Chris Wilson) v3: Log hex with 0x prefix for clarity. Signed-off-by: Tvrtko Ursulin Reviewed-by: Chris Wilson --- drivers/gpu/drm/i915/i915_gem_execbuffer.c | 4 ++-- drivers/gpu/drm/i915/i915_trace.h | 12 +++++++----- 2 files changed, 9 insertions(+), 7 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/i915_gem_execbuffer.c index baf9039774d7..dfed503301a6 100644 --- a/drivers/gpu/drm/i915/i915_gem_execbuffer.c +++ b/drivers/gpu/drm/i915/i915_gem_execbuffer.c @@ -1493,8 +1493,6 @@ execbuf_submit(struct i915_execbuffer_params *params, if (ret) return ret; - trace_i915_gem_ring_dispatch(params->request, params->dispatch_flags); - i915_gem_execbuffer_move_to_active(vmas, params->request); return 0; @@ -1836,6 +1834,8 @@ i915_gem_do_execbuffer(struct drm_device *dev, void *data, params->dispatch_flags = dispatch_flags; params->ctx = ctx; + trace_i915_gem_request_queue(params->request, dispatch_flags); + ret = execbuf_submit(params, args, &eb->vmas); err_request: __i915_add_request(params->request, ret == 0); diff --git a/drivers/gpu/drm/i915/i915_trace.h b/drivers/gpu/drm/i915/i915_trace.h index 4454872d487f..d9ade8c38f92 100644 --- a/drivers/gpu/drm/i915/i915_trace.h +++ b/drivers/gpu/drm/i915/i915_trace.h @@ -399,13 +399,14 @@ TRACE_EVENT(i915_gem_ring_sync_to, __entry->seqno) ); -TRACE_EVENT(i915_gem_ring_dispatch, +TRACE_EVENT(i915_gem_request_queue, TP_PROTO(struct drm_i915_gem_request *req, u32 flags), TP_ARGS(req, flags), TP_STRUCT__entry( __field(u32, dev) __field(u32, ring) + __field(u32, ctx) __field(u32, seqno) __field(u32, flags) ), @@ -413,13 +414,14 @@ TRACE_EVENT(i915_gem_ring_dispatch, TP_fast_assign( __entry->dev = req->i915->drm.primary->index; __entry->ring = req->engine->id; - __entry->seqno = req->global_seqno; + __entry->ctx = req->ctx->hw_id; + __entry->seqno = req->fence.seqno; __entry->flags = flags; - dma_fence_enable_sw_signaling(&req->fence); ), - TP_printk("dev=%u, ring=%u, seqno=%u, flags=%x", - __entry->dev, __entry->ring, __entry->seqno, __entry->flags) + TP_printk("dev=%u, ring=%u, ctx=%u, seqno=%u, flags=0x%x", + __entry->dev, __entry->ring, __entry->ctx, __entry->seqno, + __entry->flags) ); TRACE_EVENT(i915_gem_ring_flush, -- cgit From 936925029808242d6cd0950736046abde5b89e0e Mon Sep 17 00:00:00 2001 From: Tvrtko Ursulin Date: Tue, 21 Feb 2017 11:00:24 +0000 Subject: drm/i915/tracepoints: Tidy i915_gem_request_wait_begin Provide the same information as the other request event classes. v2: Pass in flags so we can properly report the blocking status. (Chris Wilson) v3: Log hex with 0x prefix for clarity. v4: Derive blocking status from flags. (Chris Wilson) Signed-off-by: Tvrtko Ursulin Reviewed-by: Chris Wilson --- drivers/gpu/drm/i915/i915_gem_request.c | 2 +- drivers/gpu/drm/i915/i915_trace.h | 22 +++++++++++++--------- 2 files changed, 14 insertions(+), 10 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_gem_request.c b/drivers/gpu/drm/i915/i915_gem_request.c index a5fac40d2a4f..543cef57972b 100644 --- a/drivers/gpu/drm/i915/i915_gem_request.c +++ b/drivers/gpu/drm/i915/i915_gem_request.c @@ -1092,7 +1092,7 @@ long i915_wait_request(struct drm_i915_gem_request *req, if (!timeout) return -ETIME; - trace_i915_gem_request_wait_begin(req); + trace_i915_gem_request_wait_begin(req, flags); if (!i915_sw_fence_done(&req->execute)) { timeout = __i915_request_wait_for_execute(req, flags, timeout); diff --git a/drivers/gpu/drm/i915/i915_trace.h b/drivers/gpu/drm/i915/i915_trace.h index d9ade8c38f92..557195b56087 100644 --- a/drivers/gpu/drm/i915/i915_trace.h +++ b/drivers/gpu/drm/i915/i915_trace.h @@ -508,14 +508,16 @@ DEFINE_EVENT(i915_gem_request, i915_gem_request_complete, ); TRACE_EVENT(i915_gem_request_wait_begin, - TP_PROTO(struct drm_i915_gem_request *req), - TP_ARGS(req), + TP_PROTO(struct drm_i915_gem_request *req, unsigned int flags), + TP_ARGS(req, flags), TP_STRUCT__entry( __field(u32, dev) __field(u32, ring) + __field(u32, ctx) __field(u32, seqno) - __field(bool, blocking) + __field(u32, global) + __field(unsigned int, flags) ), /* NB: the blocking information is racy since mutex_is_locked @@ -527,14 +529,16 @@ TRACE_EVENT(i915_gem_request_wait_begin, TP_fast_assign( __entry->dev = req->i915->drm.primary->index; __entry->ring = req->engine->id; - __entry->seqno = req->global_seqno; - __entry->blocking = - mutex_is_locked(&req->i915->drm.struct_mutex); + __entry->ctx = req->ctx->hw_id; + __entry->seqno = req->fence.seqno; + __entry->global = req->global_seqno; + __entry->flags = flags; ), - TP_printk("dev=%u, ring=%u, seqno=%u, blocking=%s", - __entry->dev, __entry->ring, - __entry->seqno, __entry->blocking ? "yes (NB)" : "no") + TP_printk("dev=%u, ring=%u, ctx=%u, seqno=%u, global=%u, blocking=%u, flags=0x%x", + __entry->dev, __entry->ring, __entry->ctx, __entry->seqno, + __entry->global, !!(__entry->flags & I915_WAIT_LOCKED), + __entry->flags) ); DEFINE_EVENT(i915_gem_request, i915_gem_request_wait_end, -- cgit From 90aa412d50a94b5d48de8991614877d3242f46b1 Mon Sep 17 00:00:00 2001 From: Tvrtko Ursulin Date: Tue, 21 Feb 2017 09:13:46 +0000 Subject: drm/i915/tracepoints: Remove unused i915_gem_request_complete Tracepoint is not used and won't be suitable for its replacement. Signed-off-by: Tvrtko Ursulin Reviewed-by: Chris Wilson --- drivers/gpu/drm/i915/i915_trace.h | 5 ----- 1 file changed, 5 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_trace.h b/drivers/gpu/drm/i915/i915_trace.h index 557195b56087..85accea70b4d 100644 --- a/drivers/gpu/drm/i915/i915_trace.h +++ b/drivers/gpu/drm/i915/i915_trace.h @@ -502,11 +502,6 @@ DEFINE_EVENT(i915_gem_request, i915_gem_request_retire, TP_ARGS(req) ); -DEFINE_EVENT(i915_gem_request, i915_gem_request_complete, - TP_PROTO(struct drm_i915_gem_request *req), - TP_ARGS(req) -); - TRACE_EVENT(i915_gem_request_wait_begin, TP_PROTO(struct drm_i915_gem_request *req, unsigned int flags), TP_ARGS(req, flags), -- cgit From 354d036fcf70654cff2e2cbdda54a835d219b9d2 Mon Sep 17 00:00:00 2001 From: Tvrtko Ursulin Date: Tue, 21 Feb 2017 11:01:42 +0000 Subject: drm/i915/tracepoints: Add request submit and execute tracepoints These new tracepoints are emitted once the request is ready to be submitted to the GPU and once the request is about to be submitted to the GPU, respectively. Former condition triggers as soon as all the fences and dependencies have been resolved, and the latter once the backend is about to submit it to the GPU. New tracepoint are enabled via the new DRM_I915_LOW_LEVEL_TRACEPOINTS Kconfig option which is disabled by default to alleviate the performance impact concerns. v2: Move execute tracepoint to __i915_gem_request_submit. (Chris Wilson) Signed-off-by: Tvrtko Ursulin Reviewed-by: Chris Wilson --- drivers/gpu/drm/i915/Kconfig.debug | 11 +++++++++++ drivers/gpu/drm/i915/i915_gem_request.c | 2 ++ drivers/gpu/drm/i915/i915_trace.h | 24 ++++++++++++++++++++++++ 3 files changed, 37 insertions(+) diff --git a/drivers/gpu/drm/i915/Kconfig.debug b/drivers/gpu/drm/i915/Kconfig.debug index 68ff072f8b76..e091809a9a9e 100644 --- a/drivers/gpu/drm/i915/Kconfig.debug +++ b/drivers/gpu/drm/i915/Kconfig.debug @@ -76,3 +76,14 @@ config DRM_I915_SELFTEST Recommended for driver developers only. If in doubt, say "N". + +config DRM_I915_LOW_LEVEL_TRACEPOINTS + bool "Enable low level request tracing events" + depends on DRM_I915 + default n + help + Choose this option to turn on low level request tracing events. + This provides the ability to precisely monitor engine utilisation + and also analyze the request dependency resolving timeline. + + If in doubt, say "N". diff --git a/drivers/gpu/drm/i915/i915_gem_request.c b/drivers/gpu/drm/i915/i915_gem_request.c index 543cef57972b..12b06e055490 100644 --- a/drivers/gpu/drm/i915/i915_gem_request.c +++ b/drivers/gpu/drm/i915/i915_gem_request.c @@ -426,6 +426,7 @@ void __i915_gem_request_submit(struct drm_i915_gem_request *request) spin_unlock(&request->timeline->lock); i915_sw_fence_commit(&request->execute); + trace_i915_gem_request_execute(request); } void i915_gem_request_submit(struct drm_i915_gem_request *request) @@ -449,6 +450,7 @@ submit_notify(struct i915_sw_fence *fence, enum i915_sw_fence_notify state) switch (state) { case FENCE_COMPLETE: + trace_i915_gem_request_submit(request); request->engine->submit_request(request); break; diff --git a/drivers/gpu/drm/i915/i915_trace.h b/drivers/gpu/drm/i915/i915_trace.h index 85accea70b4d..e3c0f9e94488 100644 --- a/drivers/gpu/drm/i915/i915_trace.h +++ b/drivers/gpu/drm/i915/i915_trace.h @@ -477,6 +477,30 @@ DEFINE_EVENT(i915_gem_request, i915_gem_request_add, TP_ARGS(req) ); +#if defined(CONFIG_DRM_I915_LOW_LEVEL_TRACEPOINTS) +DEFINE_EVENT(i915_gem_request, i915_gem_request_submit, + TP_PROTO(struct drm_i915_gem_request *req), + TP_ARGS(req) +); + +DEFINE_EVENT(i915_gem_request, i915_gem_request_execute, + TP_PROTO(struct drm_i915_gem_request *req), + TP_ARGS(req) +); +#else +#if !defined(TRACE_HEADER_MULTI_READ) +static inline void +trace_i915_gem_request_submit(struct drm_i915_gem_request *req) +{ +} + +static inline void +trace_i915_gem_request_execute(struct drm_i915_gem_request *req) +{ +} +#endif +#endif + TRACE_EVENT(i915_gem_request_notify, TP_PROTO(struct intel_engine_cs *engine), TP_ARGS(engine), -- cgit From dffabc8f4ee3fce98821bc212bf3c5e31247e2b9 Mon Sep 17 00:00:00 2001 From: Tvrtko Ursulin Date: Tue, 21 Feb 2017 09:13:48 +0000 Subject: drm/i915/tracepoints: Rename i915_gem_request_notify i915_gem_ring_notify is more appropriate since we do not have the request information at this point, but it is simply a signal from the engine that some request has been completed. v2: * Always trace and log if there were any waiters. * Rename to intel_engine_notify. (Chris Wilson) Signed-off-by: Tvrtko Ursulin Reviewed-by: Chris Wilson --- drivers/gpu/drm/i915/i915_irq.c | 6 ++++-- drivers/gpu/drm/i915/i915_trace.h | 13 ++++++++----- 2 files changed, 12 insertions(+), 7 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_irq.c b/drivers/gpu/drm/i915/i915_irq.c index 0657ac44a582..312d30e96dc2 100644 --- a/drivers/gpu/drm/i915/i915_irq.c +++ b/drivers/gpu/drm/i915/i915_irq.c @@ -1033,10 +1033,12 @@ static void ironlake_rps_change_irq_handler(struct drm_i915_private *dev_priv) static void notify_ring(struct intel_engine_cs *engine) { + bool waiters; + atomic_inc(&engine->irq_count); set_bit(ENGINE_IRQ_BREADCRUMB, &engine->irq_posted); - if (intel_engine_wakeup(engine)) - trace_i915_gem_request_notify(engine); + waiters = intel_engine_wakeup(engine); + trace_intel_engine_notify(engine, waiters); } static void vlv_c0_read(struct drm_i915_private *dev_priv, diff --git a/drivers/gpu/drm/i915/i915_trace.h b/drivers/gpu/drm/i915/i915_trace.h index e3c0f9e94488..ca2facac4bca 100644 --- a/drivers/gpu/drm/i915/i915_trace.h +++ b/drivers/gpu/drm/i915/i915_trace.h @@ -501,24 +501,27 @@ trace_i915_gem_request_execute(struct drm_i915_gem_request *req) #endif #endif -TRACE_EVENT(i915_gem_request_notify, - TP_PROTO(struct intel_engine_cs *engine), - TP_ARGS(engine), +TRACE_EVENT(intel_engine_notify, + TP_PROTO(struct intel_engine_cs *engine, bool waiters), + TP_ARGS(engine, waiters), TP_STRUCT__entry( __field(u32, dev) __field(u32, ring) __field(u32, seqno) + __field(bool, waiters) ), TP_fast_assign( __entry->dev = engine->i915->drm.primary->index; __entry->ring = engine->id; __entry->seqno = intel_engine_get_seqno(engine); + __entry->waiters = waiters; ), - TP_printk("dev=%u, ring=%u, seqno=%u", - __entry->dev, __entry->ring, __entry->seqno) + TP_printk("dev=%u, ring=%u, seqno=%u, waiters=%u", + __entry->dev, __entry->ring, __entry->seqno, + __entry->waiters) ); DEFINE_EVENT(i915_gem_request, i915_gem_request_retire, -- cgit From d7d96833f277827871f9103def3552fec79cf012 Mon Sep 17 00:00:00 2001 From: Tvrtko Ursulin Date: Tue, 21 Feb 2017 11:03:00 +0000 Subject: drm/i915/tracepoints: Add backend level request in and out tracepoints Two new tracepoints placed at the call sites where requests are actually passed to the GPU enable userspace to track engine utilisation. These tracepoints are only enabled when the DRM_I915_LOW_LEVEL_TRACEPOINTS Kconfig option is enabled. v2: Fix compilation with !CONFIG_DRM_I915_LOW_LEVEL_TRACEPOINTS. v3: Name global seqno consistently across tracepoints. v4: Remove port info from request out tracepoint. (Chris Wilson) Signed-off-by: Tvrtko Ursulin Reviewed-by: Chris Wilson --- drivers/gpu/drm/i915/i915_guc_submission.c | 2 ++ drivers/gpu/drm/i915/i915_trace.h | 49 ++++++++++++++++++++++++++++++ drivers/gpu/drm/i915/intel_lrc.c | 2 ++ 3 files changed, 53 insertions(+) diff --git a/drivers/gpu/drm/i915/i915_guc_submission.c b/drivers/gpu/drm/i915/i915_guc_submission.c index 8ced9e26f075..beec88a30347 100644 --- a/drivers/gpu/drm/i915/i915_guc_submission.c +++ b/drivers/gpu/drm/i915/i915_guc_submission.c @@ -518,6 +518,8 @@ static void __i915_guc_submit(struct drm_i915_gem_request *rq) if (i915_vma_is_map_and_fenceable(rq->ring->vma)) POSTING_READ_FW(GUC_STATUS); + trace_i915_gem_request_in(rq, 0); + b_ret = guc_ring_doorbell(client); client->submissions[engine_id] += 1; diff --git a/drivers/gpu/drm/i915/i915_trace.h b/drivers/gpu/drm/i915/i915_trace.h index ca2facac4bca..0a5f27b34a77 100644 --- a/drivers/gpu/drm/i915/i915_trace.h +++ b/drivers/gpu/drm/i915/i915_trace.h @@ -487,6 +487,45 @@ DEFINE_EVENT(i915_gem_request, i915_gem_request_execute, TP_PROTO(struct drm_i915_gem_request *req), TP_ARGS(req) ); + +DECLARE_EVENT_CLASS(i915_gem_request_hw, + TP_PROTO(struct drm_i915_gem_request *req, + unsigned int port), + TP_ARGS(req, port), + + TP_STRUCT__entry( + __field(u32, dev) + __field(u32, ring) + __field(u32, seqno) + __field(u32, global_seqno) + __field(u32, ctx) + __field(u32, port) + ), + + TP_fast_assign( + __entry->dev = req->i915->drm.primary->index; + __entry->ring = req->engine->id; + __entry->ctx = req->ctx->hw_id; + __entry->seqno = req->fence.seqno; + __entry->global_seqno = req->global_seqno; + __entry->port = port; + ), + + TP_printk("dev=%u, ring=%u, ctx=%u, seqno=%u, global=%u, port=%u", + __entry->dev, __entry->ring, __entry->ctx, + __entry->seqno, __entry->global_seqno, + __entry->port) +); + +DEFINE_EVENT(i915_gem_request_hw, i915_gem_request_in, + TP_PROTO(struct drm_i915_gem_request *req, unsigned int port), + TP_ARGS(req, port) +); + +DEFINE_EVENT(i915_gem_request, i915_gem_request_out, + TP_PROTO(struct drm_i915_gem_request *req), + TP_ARGS(req) +); #else #if !defined(TRACE_HEADER_MULTI_READ) static inline void @@ -498,6 +537,16 @@ static inline void trace_i915_gem_request_execute(struct drm_i915_gem_request *req) { } + +static inline void +trace_i915_gem_request_in(struct drm_i915_gem_request *req, unsigned int port) +{ +} + +static inline void +trace_i915_gem_request_out(struct drm_i915_gem_request *req) +{ +} #endif #endif diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c index 5553f551a1bf..39329d40da46 100644 --- a/drivers/gpu/drm/i915/intel_lrc.c +++ b/drivers/gpu/drm/i915/intel_lrc.c @@ -479,6 +479,7 @@ static void execlists_dequeue(struct intel_engine_cs *engine) cursor->priotree.priority = INT_MAX; __i915_gem_request_submit(cursor); + trace_i915_gem_request_in(cursor, port - engine->execlist_port); last = cursor; submit = true; } @@ -593,6 +594,7 @@ static void intel_lrc_irq_handler(unsigned long data) execlists_context_status_change(port[0].request, INTEL_CONTEXT_SCHEDULE_OUT); + trace_i915_gem_request_out(port[0].request); i915_gem_request_put(port[0].request); port[0] = port[1]; memset(&port[1], 0, sizeof(port[1])); -- cgit From 99c181a0fac084470842133bdeb3f78de3203ba9 Mon Sep 17 00:00:00 2001 From: Tvrtko Ursulin Date: Tue, 21 Feb 2017 09:13:50 +0000 Subject: drm/i915/tracepoints: Add hw_id to context tracepoints It is useful to provide this info to match the one provided in the request tracepoints. Signed-off-by: Tvrtko Ursulin Link: http://patchwork.freedesktop.org/patch/msgid/20170221091350.14605-1-tvrtko.ursulin@linux.intel.com Reviewed-by: Chris Wilson --- drivers/gpu/drm/i915/i915_trace.h | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_trace.h b/drivers/gpu/drm/i915/i915_trace.h index 0a5f27b34a77..0c2ed1289bfc 100644 --- a/drivers/gpu/drm/i915/i915_trace.h +++ b/drivers/gpu/drm/i915/i915_trace.h @@ -747,17 +747,19 @@ DECLARE_EVENT_CLASS(i915_context, TP_STRUCT__entry( __field(u32, dev) __field(struct i915_gem_context *, ctx) + __field(u32, hw_id) __field(struct i915_address_space *, vm) ), TP_fast_assign( + __entry->dev = ctx->i915->drm.primary->index; __entry->ctx = ctx; + __entry->hw_id = ctx->hw_id; __entry->vm = ctx->ppgtt ? &ctx->ppgtt->base : NULL; - __entry->dev = ctx->i915->drm.primary->index; ), - TP_printk("dev=%u, ctx=%p, ctx_vm=%p", - __entry->dev, __entry->ctx, __entry->vm) + TP_printk("dev=%u, ctx=%p, ctx_vm=%p, hw_id=%u", + __entry->dev, __entry->ctx, __entry->vm, __entry->hw_id) ) DEFINE_EVENT(i915_context, i915_context_create, -- cgit From 718e884a0183b09a2af8c06818df5d60f94243ce Mon Sep 17 00:00:00 2001 From: Chuanxiao Dong Date: Thu, 16 Feb 2017 14:36:40 +0800 Subject: drm/i915/gvt: set ring buffer size to default for guc submission When not using GuC submission, the ring buffer size for GVT context is 512KB which is the max size. When switching to GuC submission, the ring buffer size is required to be less than 16KB. So use the GVT context default ring buffer size if GuC submission is enabled. Signed-off-by: Chuanxiao Dong Link: http://patchwork.freedesktop.org/patch/msgid/20170216063639.GA17107@intel.com Reviewed-by: Chris Wilson Signed-off-by: Chris Wilson --- drivers/gpu/drm/i915/i915_gem_context.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/i915/i915_gem_context.c b/drivers/gpu/drm/i915/i915_gem_context.c index 262452055563..99c46f4dbde6 100644 --- a/drivers/gpu/drm/i915/i915_gem_context.c +++ b/drivers/gpu/drm/i915/i915_gem_context.c @@ -431,7 +431,8 @@ i915_gem_context_create_gvt(struct drm_device *dev) i915_gem_context_set_closed(ctx); /* not user accessible */ i915_gem_context_clear_bannable(ctx); i915_gem_context_set_force_single_submission(ctx); - ctx->ring_size = 512 * PAGE_SIZE; /* Max ring buffer size */ + if (!i915.enable_guc_submission) + ctx->ring_size = 512 * PAGE_SIZE; /* Max ring buffer size */ GEM_BUG_ON(i915_gem_context_is_kernel(ctx)); out: -- cgit From 1d6aa7a339713179e5aa15f11a70e244b2334d32 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Tue, 21 Feb 2017 16:26:19 +0000 Subject: drm/i915: Add i915_param charp macro magic Handling the dynamic charp module parameter requires us to copy it for the error state, or remember to lock it when reading (in case it used with 0600). v2: Use __always_inline and __builtin_strcmp Signed-off-by: Chris Wilson Reviewed-by: Joonas Lahtinen Link: http://patchwork.freedesktop.org/patch/msgid/20170221162619.15954-1-chris@chris-wilson.co.uk Reviewed-by: Jani Nikula --- drivers/gpu/drm/i915/i915_debugfs.c | 2 ++ drivers/gpu/drm/i915/i915_gpu_error.c | 22 ++++++++++++++++++++++ 2 files changed, 24 insertions(+) diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c index 768461f7c7c6..655e60d609c2 100644 --- a/drivers/gpu/drm/i915/i915_debugfs.c +++ b/drivers/gpu/drm/i915/i915_debugfs.c @@ -72,6 +72,8 @@ static __always_inline void seq_print_param(struct seq_file *m, seq_printf(m, "i915.%s=%d\n", name, *(const int *)x); else if (!__builtin_strcmp(type, "unsigned int")) seq_printf(m, "i915.%s=%u\n", name, *(const unsigned int *)x); + else if (!__builtin_strcmp(type, "char *")) + seq_printf(m, "i915.%s=%s\n", name, *(const char **)x); else BUILD_BUG(); } diff --git a/drivers/gpu/drm/i915/i915_gpu_error.c b/drivers/gpu/drm/i915/i915_gpu_error.c index 3a3c7c3c4931..2b1d15668192 100644 --- a/drivers/gpu/drm/i915/i915_gpu_error.c +++ b/drivers/gpu/drm/i915/i915_gpu_error.c @@ -557,6 +557,8 @@ static __always_inline void err_print_param(struct drm_i915_error_state_buf *m, err_printf(m, "i915.%s=%d\n", name, *(const int *)x); else if (!__builtin_strcmp(type, "unsigned int")) err_printf(m, "i915.%s=%u\n", name, *(const unsigned int *)x); + else if (!__builtin_strcmp(type, "char *")) + err_printf(m, "i915.%s=%s\n", name, *(const char **)x); else BUILD_BUG(); } @@ -810,6 +812,12 @@ static void i915_error_object_free(struct drm_i915_error_object *obj) kfree(obj); } +static __always_inline void free_param(const char *type, void *x) +{ + if (!__builtin_strcmp(type, "char *")) + kfree(*(void **)x); +} + void __i915_gpu_state_free(struct kref *error_ref) { struct i915_gpu_state *error = @@ -840,6 +848,11 @@ void __i915_gpu_state_free(struct kref *error_ref) kfree(error->overlay); kfree(error->display); + +#define FREE(T, x) free_param(#T, &error->params.x); + I915_PARAMS_FOR_EACH(FREE); +#undef FREE + kfree(error); } @@ -1614,6 +1627,12 @@ static void i915_capture_gen_state(struct drm_i915_private *dev_priv, sizeof(error->device_info)); } +static __always_inline void dup_param(const char *type, void *x) +{ + if (!__builtin_strcmp(type, "char *")) + *(void **)x = kstrdup(*(void **)x, GFP_ATOMIC); +} + static int capture(void *data) { struct i915_gpu_state *error = data; @@ -1625,6 +1644,9 @@ static int capture(void *data) error->i915->gt.last_init_time)); error->params = i915; +#define DUP(T, x) dup_param(#T, &error->params.x); + I915_PARAMS_FOR_EACH(DUP); +#undef DUP i915_capture_gen_state(error->i915, error); i915_capture_reg_state(error->i915, error); -- cgit From 208b84a3757e15a1c002820aefa941a3293b9efa Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Wed, 22 Feb 2017 11:40:44 +0000 Subject: drm/i915: Remove change_domain tracepoint The change_domain tracepoint has been inaccurate for a few years - it doesn't fully capture the domains, especially with userspace bypassing them. It is defunct, misleading and time to be removed. Signed-off-by: Chris Wilson Reviewed-by: Joonas Lahtinen Link: http://patchwork.freedesktop.org/patch/msgid/20170222114049.28456-1-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/i915_gem.c | 30 ------------------------------ drivers/gpu/drm/i915/i915_gem_execbuffer.c | 3 --- drivers/gpu/drm/i915/i915_trace.h | 24 ------------------------ 3 files changed, 57 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index d93032875f28..9c5f091c771f 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -3214,9 +3214,6 @@ i915_gem_object_flush_gtt_write_domain(struct drm_i915_gem_object *obj) intel_fb_obj_flush(obj, false, write_origin(obj, I915_GEM_DOMAIN_GTT)); obj->base.write_domain = 0; - trace_i915_gem_object_change_domain(obj, - obj->base.read_domains, - I915_GEM_DOMAIN_GTT); } /** Flushes the CPU write domain for the object if it's dirty. */ @@ -3230,9 +3227,6 @@ i915_gem_object_flush_cpu_write_domain(struct drm_i915_gem_object *obj) intel_fb_obj_flush(obj, false, ORIGIN_CPU); obj->base.write_domain = 0; - trace_i915_gem_object_change_domain(obj, - obj->base.read_domains, - I915_GEM_DOMAIN_CPU); } /** @@ -3246,7 +3240,6 @@ i915_gem_object_flush_cpu_write_domain(struct drm_i915_gem_object *obj) int i915_gem_object_set_to_gtt_domain(struct drm_i915_gem_object *obj, bool write) { - uint32_t old_write_domain, old_read_domains; int ret; lockdep_assert_held(&obj->base.dev->struct_mutex); @@ -3284,9 +3277,6 @@ i915_gem_object_set_to_gtt_domain(struct drm_i915_gem_object *obj, bool write) if ((obj->base.read_domains & I915_GEM_DOMAIN_GTT) == 0) mb(); - old_write_domain = obj->base.write_domain; - old_read_domains = obj->base.read_domains; - /* It should now be out of any other write domains, and we can update * the domain values for our changes. */ @@ -3298,10 +3288,6 @@ i915_gem_object_set_to_gtt_domain(struct drm_i915_gem_object *obj, bool write) obj->mm.dirty = true; } - trace_i915_gem_object_change_domain(obj, - old_read_domains, - old_write_domain); - i915_gem_object_unpin_pages(obj); return 0; } @@ -3538,7 +3524,6 @@ i915_gem_object_pin_to_display_plane(struct drm_i915_gem_object *obj, const struct i915_ggtt_view *view) { struct i915_vma *vma; - u32 old_read_domains, old_write_domain; int ret; lockdep_assert_held(&obj->base.dev->struct_mutex); @@ -3603,19 +3588,12 @@ i915_gem_object_pin_to_display_plane(struct drm_i915_gem_object *obj, intel_fb_obj_flush(obj, false, ORIGIN_DIRTYFB); } - old_write_domain = obj->base.write_domain; - old_read_domains = obj->base.read_domains; - /* It should now be out of any other write domains, and we can update * the domain values for our changes. */ obj->base.write_domain = 0; obj->base.read_domains |= I915_GEM_DOMAIN_GTT; - trace_i915_gem_object_change_domain(obj, - old_read_domains, - old_write_domain); - return vma; err_unpin_display: @@ -3651,7 +3629,6 @@ i915_gem_object_unpin_from_display_plane(struct i915_vma *vma) int i915_gem_object_set_to_cpu_domain(struct drm_i915_gem_object *obj, bool write) { - uint32_t old_write_domain, old_read_domains; int ret; lockdep_assert_held(&obj->base.dev->struct_mutex); @@ -3670,9 +3647,6 @@ i915_gem_object_set_to_cpu_domain(struct drm_i915_gem_object *obj, bool write) i915_gem_object_flush_gtt_write_domain(obj); - old_write_domain = obj->base.write_domain; - old_read_domains = obj->base.read_domains; - /* Flush the CPU cache if it's still invalid. */ if ((obj->base.read_domains & I915_GEM_DOMAIN_CPU) == 0) { i915_gem_clflush_object(obj, false); @@ -3693,10 +3667,6 @@ i915_gem_object_set_to_cpu_domain(struct drm_i915_gem_object *obj, bool write) obj->base.write_domain = I915_GEM_DOMAIN_CPU; } - trace_i915_gem_object_change_domain(obj, - old_read_domains, - old_write_domain); - return 0; } diff --git a/drivers/gpu/drm/i915/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/i915_gem_execbuffer.c index dfed503301a6..6fb29832bc63 100644 --- a/drivers/gpu/drm/i915/i915_gem_execbuffer.c +++ b/drivers/gpu/drm/i915/i915_gem_execbuffer.c @@ -1317,8 +1317,6 @@ i915_gem_execbuffer_move_to_active(struct list_head *vmas, list_for_each_entry(vma, vmas, exec_list) { struct drm_i915_gem_object *obj = vma->obj; - u32 old_read = obj->base.read_domains; - u32 old_write = obj->base.write_domain; obj->base.write_domain = obj->base.pending_write_domain; if (obj->base.write_domain) @@ -1329,7 +1327,6 @@ i915_gem_execbuffer_move_to_active(struct list_head *vmas, i915_vma_move_to_active(vma, req, vma->exec_entry->flags); eb_export_fence(obj, req, vma->exec_entry->flags); - trace_i915_gem_object_change_domain(obj, old_read, old_write); } } diff --git a/drivers/gpu/drm/i915/i915_trace.h b/drivers/gpu/drm/i915/i915_trace.h index 0c2ed1289bfc..2dbef3147a60 100644 --- a/drivers/gpu/drm/i915/i915_trace.h +++ b/drivers/gpu/drm/i915/i915_trace.h @@ -175,30 +175,6 @@ TRACE_EVENT(i915_vma_unbind, __entry->obj, __entry->offset, __entry->size, __entry->vm) ); -TRACE_EVENT(i915_gem_object_change_domain, - TP_PROTO(struct drm_i915_gem_object *obj, u32 old_read, u32 old_write), - TP_ARGS(obj, old_read, old_write), - - TP_STRUCT__entry( - __field(struct drm_i915_gem_object *, obj) - __field(u32, read_domains) - __field(u32, write_domain) - ), - - TP_fast_assign( - __entry->obj = obj; - __entry->read_domains = obj->base.read_domains | (old_read << 16); - __entry->write_domain = obj->base.write_domain | (old_write << 16); - ), - - TP_printk("obj=%p, read=%02x=>%02x, write=%02x=>%02x", - __entry->obj, - __entry->read_domains >> 16, - __entry->read_domains & 0xffff, - __entry->write_domain >> 16, - __entry->write_domain & 0xffff) -); - TRACE_EVENT(i915_gem_object_pwrite, TP_PROTO(struct drm_i915_gem_object *obj, u32 offset, u32 len), TP_ARGS(obj, offset, len), -- cgit From e59dc1721180b9499ce409945667763628c678c8 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Wed, 22 Feb 2017 11:40:45 +0000 Subject: drm/i915: Move cpu_cache_is_coherent() to header For use in the next patch, take the current is-coherent helper and add it to i915_gem_object.h Signed-off-by: Chris Wilson Reviewed-by: Joonas Lahtinen Link: http://patchwork.freedesktop.org/patch/msgid/20170222114049.28456-2-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/i915_drv.h | 6 ++++++ drivers/gpu/drm/i915/i915_gem.c | 20 ++++++-------------- 2 files changed, 12 insertions(+), 14 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index 1fe4010d4f2b..590c524a1531 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -4097,4 +4097,10 @@ int remap_io_mapping(struct vm_area_struct *vma, unsigned long addr, unsigned long pfn, unsigned long size, struct io_mapping *iomap); +static inline bool i915_gem_object_is_coherent(struct drm_i915_gem_object *obj) +{ + return (obj->cache_level != I915_CACHE_NONE || + HAS_LLC(to_i915(obj->base.dev))); +} + #endif diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index 9c5f091c771f..312154b3d04c 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -48,18 +48,12 @@ static void i915_gem_flush_free_objects(struct drm_i915_private *i915); static void i915_gem_object_flush_gtt_write_domain(struct drm_i915_gem_object *obj); static void i915_gem_object_flush_cpu_write_domain(struct drm_i915_gem_object *obj); -static bool cpu_cache_is_coherent(struct drm_device *dev, - enum i915_cache_level level) -{ - return HAS_LLC(to_i915(dev)) || level != I915_CACHE_NONE; -} - static bool cpu_write_needs_clflush(struct drm_i915_gem_object *obj) { if (obj->base.write_domain == I915_GEM_DOMAIN_CPU) return false; - if (!cpu_cache_is_coherent(obj->base.dev, obj->cache_level)) + if (!i915_gem_object_is_coherent(obj)) return true; return obj->pin_display; @@ -255,7 +249,7 @@ __i915_gem_object_release_shmem(struct drm_i915_gem_object *obj, if (needs_clflush && (obj->base.read_domains & I915_GEM_DOMAIN_CPU) == 0 && - !cpu_cache_is_coherent(obj->base.dev, obj->cache_level)) + !i915_gem_object_is_coherent(obj)) drm_clflush_sg(pages); obj->base.read_domains = I915_GEM_DOMAIN_CPU; @@ -796,8 +790,7 @@ int i915_gem_obj_prepare_shmem_read(struct drm_i915_gem_object *obj, * anyway again before the next pread happens. */ if (!(obj->base.read_domains & I915_GEM_DOMAIN_CPU)) - *needs_clflush = !cpu_cache_is_coherent(obj->base.dev, - obj->cache_level); + *needs_clflush = !i915_gem_object_is_coherent(obj); if (*needs_clflush && !static_cpu_has(X86_FEATURE_CLFLUSH)) { ret = i915_gem_object_set_to_cpu_domain(obj, false); @@ -853,8 +846,7 @@ int i915_gem_obj_prepare_shmem_write(struct drm_i915_gem_object *obj, * before writing. */ if (!(obj->base.read_domains & I915_GEM_DOMAIN_CPU)) - *needs_clflush |= !cpu_cache_is_coherent(obj->base.dev, - obj->cache_level); + *needs_clflush |= !i915_gem_object_is_coherent(obj); if (*needs_clflush && !static_cpu_has(X86_FEATURE_CLFLUSH)) { ret = i915_gem_object_set_to_cpu_domain(obj, true); @@ -3173,7 +3165,7 @@ void i915_gem_clflush_object(struct drm_i915_gem_object *obj, * snooping behaviour occurs naturally as the result of our domain * tracking. */ - if (!force && cpu_cache_is_coherent(obj->base.dev, obj->cache_level)) { + if (!force && i915_gem_object_is_coherent(obj)) { obj->cache_dirty = true; return; } @@ -3412,7 +3404,7 @@ restart: } if (obj->base.write_domain == I915_GEM_DOMAIN_CPU && - cpu_cache_is_coherent(obj->base.dev, obj->cache_level)) + i915_gem_object_is_coherent(obj)) obj->cache_dirty = true; list_for_each_entry(vma, &obj->vma_list, obj_link) -- cgit From 5a97bcc69cc02d4da9599ed2c256b65fc3bd4ee7 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Wed, 22 Feb 2017 11:40:46 +0000 Subject: drm/i915: Amalgamate flushing of display objects We have three different paths by which userspace wants to flush the display plane (i.e. objects with obj->pin_display). Use a common helper to identify those paths and to simplify a later change. v2: Include the conditional in the name, i915_gem_object_flush_if_display Signed-off-by: Chris Wilson Reviewed-by: Joonas Lahtinen Link: http://patchwork.freedesktop.org/patch/msgid/20170222114049.28456-3-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/i915_gem.c | 41 +++++++++++++++++++++------------- drivers/gpu/drm/i915/i915_gem_object.h | 2 ++ drivers/gpu/drm/i915/intel_display.c | 9 ++------ 3 files changed, 30 insertions(+), 22 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index 312154b3d04c..c9ee2a99ffc0 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -1613,23 +1613,16 @@ i915_gem_sw_finish_ioctl(struct drm_device *dev, void *data, { struct drm_i915_gem_sw_finish *args = data; struct drm_i915_gem_object *obj; - int err = 0; obj = i915_gem_object_lookup(file, args->handle); if (!obj) return -ENOENT; /* Pinned buffers may be scanout, so flush the cache */ - if (READ_ONCE(obj->pin_display)) { - err = i915_mutex_lock_interruptible(dev); - if (!err) { - i915_gem_object_flush_cpu_write_domain(obj); - mutex_unlock(&dev->struct_mutex); - } - } - + i915_gem_object_flush_if_display(obj); i915_gem_object_put(obj); - return err; + + return 0; } /** @@ -3221,6 +3214,27 @@ i915_gem_object_flush_cpu_write_domain(struct drm_i915_gem_object *obj) obj->base.write_domain = 0; } +static void __i915_gem_object_flush_for_display(struct drm_i915_gem_object *obj) +{ + if (obj->base.write_domain != I915_GEM_DOMAIN_CPU && !obj->cache_dirty) + return; + + i915_gem_clflush_object(obj, true); + intel_fb_obj_flush(obj, false, ORIGIN_CPU); + + obj->base.write_domain = 0; +} + +void i915_gem_object_flush_if_display(struct drm_i915_gem_object *obj) +{ + if (!READ_ONCE(obj->pin_display)) + return; + + mutex_lock(&obj->base.dev->struct_mutex); + __i915_gem_object_flush_for_display(obj); + mutex_unlock(&obj->base.dev->struct_mutex); +} + /** * Moves a single object to the GTT read, and possibly write domain. * @obj: object to act on @@ -3575,15 +3589,12 @@ i915_gem_object_pin_to_display_plane(struct drm_i915_gem_object *obj, vma->display_alignment = max_t(u64, vma->display_alignment, alignment); /* Treat this as an end-of-frame, like intel_user_framebuffer_dirty() */ - if (obj->cache_dirty || obj->base.write_domain == I915_GEM_DOMAIN_CPU) { - i915_gem_clflush_object(obj, true); - intel_fb_obj_flush(obj, false, ORIGIN_DIRTYFB); - } + __i915_gem_object_flush_for_display(obj); + intel_fb_obj_flush(obj, false, ORIGIN_DIRTYFB); /* It should now be out of any other write domains, and we can update * the domain values for our changes. */ - obj->base.write_domain = 0; obj->base.read_domains |= I915_GEM_DOMAIN_GTT; return vma; diff --git a/drivers/gpu/drm/i915/i915_gem_object.h b/drivers/gpu/drm/i915/i915_gem_object.h index ef4893a4f08c..2fb30a5eb510 100644 --- a/drivers/gpu/drm/i915/i915_gem_object.h +++ b/drivers/gpu/drm/i915/i915_gem_object.h @@ -364,5 +364,7 @@ i915_gem_object_last_write_engine(struct drm_i915_gem_object *obj) return engine; } +void i915_gem_object_flush_if_display(struct drm_i915_gem_object *obj); + #endif diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c index e1e828a8c524..4d67cbb3185c 100644 --- a/drivers/gpu/drm/i915/intel_display.c +++ b/drivers/gpu/drm/i915/intel_display.c @@ -14278,15 +14278,10 @@ static int intel_user_framebuffer_dirty(struct drm_framebuffer *fb, struct drm_clip_rect *clips, unsigned num_clips) { - struct drm_device *dev = fb->dev; - struct intel_framebuffer *intel_fb = to_intel_framebuffer(fb); - struct drm_i915_gem_object *obj = intel_fb->obj; + struct drm_i915_gem_object *obj = intel_fb_obj(fb); - mutex_lock(&dev->struct_mutex); - if (obj->pin_display && obj->cache_dirty) - i915_gem_clflush_object(obj, true); + i915_gem_object_flush_if_display(obj); intel_fb_obj_flush(obj, false, ORIGIN_DIRTYFB); - mutex_unlock(&dev->struct_mutex); return 0; } -- cgit From f6aaba4dfbc8eaa1b2b756b989fb423a789ee4e8 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Wed, 22 Feb 2017 11:40:47 +0000 Subject: drm/i915: Skip clflushes for all non-page backed objects Generalise the skip for physical and stolen objects by skipping anything we do not have a valid address for inside the sg. Signed-off-by: Chris Wilson Reviewed-by: Matthew Auld Link: http://patchwork.freedesktop.org/patch/msgid/20170222114049.28456-4-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/i915_gem.c | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index c9ee2a99ffc0..00213c282796 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -3140,14 +3140,19 @@ void i915_gem_clflush_object(struct drm_i915_gem_object *obj, * to GPU, and we can ignore the cache flush because it'll happen * again at bind time. */ - if (!obj->mm.pages) + if (!obj->mm.pages) { + GEM_BUG_ON(obj->base.write_domain != I915_GEM_DOMAIN_CPU); return; + } /* * Stolen memory is always coherent with the GPU as it is explicitly * marked as wc by the system, or the system is cache-coherent. + * Similarly, we only access struct pages through the CPU cache, so + * anything not backed by physical memory we consider to be always + * coherent and not need clflushing. */ - if (obj->stolen || obj->phys_handle) + if (!i915_gem_object_has_struct_page(obj)) return; /* If the GPU is snooping the contents of the CPU cache, -- cgit From 57822dc6b9cfeb5300e467ff83d8371aead90047 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Wed, 22 Feb 2017 11:40:48 +0000 Subject: drm/i915: Perform object clflushing asynchronously Flushing the cachelines for an object is slow, can be as much as 100ms for a large framebuffer. We currently do this under the struct_mutex BKL on execution or on pageflip. But now with the ability to add fences to obj->resv for both flips and execbuf (and we naturally wait on the fence before CPU access), we can move the clflush operation to a workqueue and signal a fence for completion, thereby doing the work asynchronously and not blocking the driver or its clients. v2: Introduce i915_gem_clflush.h and use a new name, split out some extras into separate patches. Suggested-by: Akash Goel Signed-off-by: Chris Wilson Cc: Joonas Lahtinen Cc: Matthew Auld Reviewed-by: Joonas Lahtinen Link: http://patchwork.freedesktop.org/patch/msgid/20170222114049.28456-5-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/Makefile | 1 + drivers/gpu/drm/i915/i915_drv.h | 2 +- drivers/gpu/drm/i915/i915_gem.c | 54 +-------- drivers/gpu/drm/i915/i915_gem_clflush.c | 189 +++++++++++++++++++++++++++++ drivers/gpu/drm/i915/i915_gem_clflush.h | 37 ++++++ drivers/gpu/drm/i915/i915_gem_execbuffer.c | 9 +- drivers/gpu/drm/i915/intel_display.c | 44 ++++--- 7 files changed, 264 insertions(+), 72 deletions(-) create mode 100644 drivers/gpu/drm/i915/i915_gem_clflush.c create mode 100644 drivers/gpu/drm/i915/i915_gem_clflush.h diff --git a/drivers/gpu/drm/i915/Makefile b/drivers/gpu/drm/i915/Makefile index d0c9577c7533..53e30fcb2751 100644 --- a/drivers/gpu/drm/i915/Makefile +++ b/drivers/gpu/drm/i915/Makefile @@ -29,6 +29,7 @@ i915-$(CONFIG_DEBUG_FS) += i915_debugfs.o intel_pipe_crc.o # GEM code i915-y += i915_cmd_parser.o \ i915_gem_batch_pool.o \ + i915_gem_clflush.o \ i915_gem_context.o \ i915_gem_dmabuf.o \ i915_gem_evict.o \ diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index 590c524a1531..70fc2fa0eaa8 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -3383,7 +3383,7 @@ int i915_gem_reset_prepare(struct drm_i915_private *dev_priv); void i915_gem_reset(struct drm_i915_private *dev_priv); void i915_gem_reset_finish(struct drm_i915_private *dev_priv); void i915_gem_set_wedged(struct drm_i915_private *dev_priv); -void i915_gem_clflush_object(struct drm_i915_gem_object *obj, bool force); + void i915_gem_init_mmio(struct drm_i915_private *i915); int __must_check i915_gem_init(struct drm_i915_private *dev_priv); int __must_check i915_gem_init_hw(struct drm_i915_private *dev_priv); diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index 00213c282796..fad0f5adb970 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -29,6 +29,7 @@ #include #include #include "i915_drv.h" +#include "i915_gem_clflush.h" #include "i915_vgpu.h" #include "i915_trace.h" #include "intel_drv.h" @@ -3133,46 +3134,6 @@ int i915_gem_wait_for_idle(struct drm_i915_private *i915, unsigned int flags) return 0; } -void i915_gem_clflush_object(struct drm_i915_gem_object *obj, - bool force) -{ - /* If we don't have a page list set up, then we're not pinned - * to GPU, and we can ignore the cache flush because it'll happen - * again at bind time. - */ - if (!obj->mm.pages) { - GEM_BUG_ON(obj->base.write_domain != I915_GEM_DOMAIN_CPU); - return; - } - - /* - * Stolen memory is always coherent with the GPU as it is explicitly - * marked as wc by the system, or the system is cache-coherent. - * Similarly, we only access struct pages through the CPU cache, so - * anything not backed by physical memory we consider to be always - * coherent and not need clflushing. - */ - if (!i915_gem_object_has_struct_page(obj)) - return; - - /* If the GPU is snooping the contents of the CPU cache, - * we do not need to manually clear the CPU cache lines. However, - * the caches are only snooped when the render cache is - * flushed/invalidated. As we always have to emit invalidations - * and flushes when moving into and out of the RENDER domain, correct - * snooping behaviour occurs naturally as the result of our domain - * tracking. - */ - if (!force && i915_gem_object_is_coherent(obj)) { - obj->cache_dirty = true; - return; - } - - trace_i915_gem_object_clflush(obj); - drm_clflush_sg(obj->mm.pages); - obj->cache_dirty = false; -} - /** Flushes the GTT write domain for the object if it's dirty. */ static void i915_gem_object_flush_gtt_write_domain(struct drm_i915_gem_object *obj) @@ -3213,9 +3174,7 @@ i915_gem_object_flush_cpu_write_domain(struct drm_i915_gem_object *obj) if (obj->base.write_domain != I915_GEM_DOMAIN_CPU) return; - i915_gem_clflush_object(obj, obj->pin_display); - intel_fb_obj_flush(obj, false, ORIGIN_CPU); - + i915_gem_clflush_object(obj, I915_CLFLUSH_SYNC); obj->base.write_domain = 0; } @@ -3224,9 +3183,7 @@ static void __i915_gem_object_flush_for_display(struct drm_i915_gem_object *obj) if (obj->base.write_domain != I915_GEM_DOMAIN_CPU && !obj->cache_dirty) return; - i915_gem_clflush_object(obj, true); - intel_fb_obj_flush(obj, false, ORIGIN_CPU); - + i915_gem_clflush_object(obj, I915_CLFLUSH_FORCE); obj->base.write_domain = 0; } @@ -3657,8 +3614,7 @@ i915_gem_object_set_to_cpu_domain(struct drm_i915_gem_object *obj, bool write) /* Flush the CPU cache if it's still invalid. */ if ((obj->base.read_domains & I915_GEM_DOMAIN_CPU) == 0) { - i915_gem_clflush_object(obj, false); - + i915_gem_clflush_object(obj, I915_CLFLUSH_SYNC); obj->base.read_domains |= I915_GEM_DOMAIN_CPU; } @@ -4526,6 +4482,8 @@ int i915_gem_init(struct drm_i915_private *dev_priv) mutex_lock(&dev_priv->drm.struct_mutex); + i915_gem_clflush_init(dev_priv); + if (!i915.enable_execlists) { dev_priv->gt.resume = intel_legacy_submission_resume; dev_priv->gt.cleanup_engine = intel_engine_cleanup; diff --git a/drivers/gpu/drm/i915/i915_gem_clflush.c b/drivers/gpu/drm/i915/i915_gem_clflush.c new file mode 100644 index 000000000000..c5fee02f3173 --- /dev/null +++ b/drivers/gpu/drm/i915/i915_gem_clflush.c @@ -0,0 +1,189 @@ +/* + * Copyright © 2016 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + * + */ + +#include "i915_drv.h" +#include "intel_frontbuffer.h" +#include "i915_gem_clflush.h" + +static DEFINE_SPINLOCK(clflush_lock); +static u64 clflush_context; + +struct clflush { + struct dma_fence dma; /* Must be first for dma_fence_free() */ + struct i915_sw_fence wait; + struct work_struct work; + struct drm_i915_gem_object *obj; +}; + +static const char *i915_clflush_get_driver_name(struct dma_fence *fence) +{ + return DRIVER_NAME; +} + +static const char *i915_clflush_get_timeline_name(struct dma_fence *fence) +{ + return "clflush"; +} + +static bool i915_clflush_enable_signaling(struct dma_fence *fence) +{ + return true; +} + +static void i915_clflush_release(struct dma_fence *fence) +{ + struct clflush *clflush = container_of(fence, typeof(*clflush), dma); + + i915_sw_fence_fini(&clflush->wait); + + BUILD_BUG_ON(offsetof(typeof(*clflush), dma)); + dma_fence_free(&clflush->dma); +} + +static const struct dma_fence_ops i915_clflush_ops = { + .get_driver_name = i915_clflush_get_driver_name, + .get_timeline_name = i915_clflush_get_timeline_name, + .enable_signaling = i915_clflush_enable_signaling, + .wait = dma_fence_default_wait, + .release = i915_clflush_release, +}; + +static void __i915_do_clflush(struct drm_i915_gem_object *obj) +{ + drm_clflush_sg(obj->mm.pages); + obj->cache_dirty = false; + + intel_fb_obj_flush(obj, false, ORIGIN_CPU); +} + +static void i915_clflush_work(struct work_struct *work) +{ + struct clflush *clflush = container_of(work, typeof(*clflush), work); + struct drm_i915_gem_object *obj = clflush->obj; + + if (!obj->cache_dirty) + goto out; + + if (i915_gem_object_pin_pages(obj)) { + DRM_ERROR("Failed to acquire obj->pages for clflushing\n"); + goto out; + } + + __i915_do_clflush(obj); + + i915_gem_object_unpin_pages(obj); + +out: + i915_gem_object_put(obj); + + dma_fence_signal(&clflush->dma); + dma_fence_put(&clflush->dma); +} + +static int __i915_sw_fence_call +i915_clflush_notify(struct i915_sw_fence *fence, + enum i915_sw_fence_notify state) +{ + struct clflush *clflush = container_of(fence, typeof(*clflush), wait); + + switch (state) { + case FENCE_COMPLETE: + schedule_work(&clflush->work); + break; + + case FENCE_FREE: + dma_fence_put(&clflush->dma); + break; + } + + return NOTIFY_DONE; +} + +void i915_gem_clflush_object(struct drm_i915_gem_object *obj, + unsigned int flags) +{ + struct clflush *clflush; + + /* + * Stolen memory is always coherent with the GPU as it is explicitly + * marked as wc by the system, or the system is cache-coherent. + * Similarly, we only access struct pages through the CPU cache, so + * anything not backed by physical memory we consider to be always + * coherent and not need clflushing. + */ + if (!i915_gem_object_has_struct_page(obj)) + return; + + obj->cache_dirty = true; + + /* If the GPU is snooping the contents of the CPU cache, + * we do not need to manually clear the CPU cache lines. However, + * the caches are only snooped when the render cache is + * flushed/invalidated. As we always have to emit invalidations + * and flushes when moving into and out of the RENDER domain, correct + * snooping behaviour occurs naturally as the result of our domain + * tracking. + */ + if (!(flags & I915_CLFLUSH_FORCE) && i915_gem_object_is_coherent(obj)) + return; + + trace_i915_gem_object_clflush(obj); + + clflush = NULL; + if (!(flags & I915_CLFLUSH_SYNC)) + clflush = kmalloc(sizeof(*clflush), GFP_KERNEL); + if (clflush) { + dma_fence_init(&clflush->dma, + &i915_clflush_ops, + &clflush_lock, + clflush_context, + 0); + i915_sw_fence_init(&clflush->wait, i915_clflush_notify); + + clflush->obj = i915_gem_object_get(obj); + INIT_WORK(&clflush->work, i915_clflush_work); + + dma_fence_get(&clflush->dma); + + i915_sw_fence_await_reservation(&clflush->wait, + obj->resv, NULL, + false, I915_FENCE_TIMEOUT, + GFP_KERNEL); + + reservation_object_lock(obj->resv, NULL); + reservation_object_add_excl_fence(obj->resv, &clflush->dma); + reservation_object_unlock(obj->resv); + + i915_sw_fence_commit(&clflush->wait); + } else if (obj->mm.pages) { + __i915_do_clflush(obj); + } else { + GEM_BUG_ON(obj->base.write_domain != I915_GEM_DOMAIN_CPU); + } +} + +void i915_gem_clflush_init(struct drm_i915_private *i915) +{ + clflush_context = dma_fence_context_alloc(1); +} diff --git a/drivers/gpu/drm/i915/i915_gem_clflush.h b/drivers/gpu/drm/i915/i915_gem_clflush.h new file mode 100644 index 000000000000..b62d61a2d15f --- /dev/null +++ b/drivers/gpu/drm/i915/i915_gem_clflush.h @@ -0,0 +1,37 @@ +/* + * Copyright © 2016 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + * + */ + +#ifndef __I915_GEM_CLFLUSH_H__ +#define __I915_GEM_CLFLUSH_H__ + +struct drm_i915_private; +struct drm_i915_gem_object; + +void i915_gem_clflush_init(struct drm_i915_private *i915); +void i915_gem_clflush_object(struct drm_i915_gem_object *obj, + unsigned int flags); +#define I915_CLFLUSH_FORCE BIT(0) +#define I915_CLFLUSH_SYNC BIT(1) + +#endif /* __I915_GEM_CLFLUSH_H__ */ diff --git a/drivers/gpu/drm/i915/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/i915_gem_execbuffer.c index 6fb29832bc63..35d2cb979452 100644 --- a/drivers/gpu/drm/i915/i915_gem_execbuffer.c +++ b/drivers/gpu/drm/i915/i915_gem_execbuffer.c @@ -35,6 +35,7 @@ #include #include "i915_drv.h" +#include "i915_gem_clflush.h" #include "i915_trace.h" #include "intel_drv.h" #include "intel_frontbuffer.h" @@ -1114,13 +1115,15 @@ i915_gem_execbuffer_move_to_gpu(struct drm_i915_gem_request *req, if (vma->exec_entry->flags & EXEC_OBJECT_ASYNC) continue; + if (obj->base.write_domain & I915_GEM_DOMAIN_CPU) { + i915_gem_clflush_object(obj, 0); + obj->base.write_domain = 0; + } + ret = i915_gem_request_await_object (req, obj, obj->base.pending_write_domain); if (ret) return ret; - - if (obj->base.write_domain & I915_GEM_DOMAIN_CPU) - i915_gem_clflush_object(obj, false); } /* Unconditionally flush any chipset caches (for streaming writes). */ diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c index 4d67cbb3185c..2806d6b0570d 100644 --- a/drivers/gpu/drm/i915/intel_display.c +++ b/drivers/gpu/drm/i915/intel_display.c @@ -37,6 +37,7 @@ #include "intel_frontbuffer.h" #include #include "i915_drv.h" +#include "i915_gem_clflush.h" #include "intel_dsi.h" #include "i915_trace.h" #include @@ -13188,6 +13189,29 @@ intel_prepare_plane_fb(struct drm_plane *plane, struct drm_i915_gem_object *old_obj = intel_fb_obj(plane->state->fb); int ret; + if (obj) { + if (plane->type == DRM_PLANE_TYPE_CURSOR && + INTEL_INFO(dev_priv)->cursor_needs_physical) { + const int align = IS_I830(dev_priv) ? 16 * 1024 : 256; + + ret = i915_gem_object_attach_phys(obj, align); + if (ret) { + DRM_DEBUG_KMS("failed to attach phys object\n"); + return ret; + } + } else { + struct i915_vma *vma; + + vma = intel_pin_and_fence_fb_obj(fb, new_state->rotation); + if (IS_ERR(vma)) { + DRM_DEBUG_KMS("failed to pin object\n"); + return PTR_ERR(vma); + } + + to_intel_plane_state(new_state)->vma = vma; + } + } + if (!obj && !old_obj) return 0; @@ -13240,26 +13264,6 @@ intel_prepare_plane_fb(struct drm_plane *plane, i915_gem_object_wait_priority(obj, 0, I915_PRIORITY_DISPLAY); } - if (plane->type == DRM_PLANE_TYPE_CURSOR && - INTEL_INFO(dev_priv)->cursor_needs_physical) { - int align = IS_I830(dev_priv) ? 16 * 1024 : 256; - ret = i915_gem_object_attach_phys(obj, align); - if (ret) { - DRM_DEBUG_KMS("failed to attach phys object\n"); - return ret; - } - } else { - struct i915_vma *vma; - - vma = intel_pin_and_fence_fb_obj(fb, new_state->rotation); - if (IS_ERR(vma)) { - DRM_DEBUG_KMS("failed to pin object\n"); - return PTR_ERR(vma); - } - - to_intel_plane_state(new_state)->vma = vma; - } - return 0; } -- cgit From d59b21ec6f33b6df7933e86b9906e9f4ee9b218d Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Wed, 22 Feb 2017 11:40:49 +0000 Subject: drm/i915: Remove 'retire' parameter from intel_fb_obj_flush Setting retire=true is identical to using origin=ORIGIN_CS, so make the same simplification to intel_fb_obj_flush() as already employed for intel_fb_obj_invalidate(). Signed-off-by: Chris Wilson Reviewed-by: Joonas Lahtinen Link: http://patchwork.freedesktop.org/patch/msgid/20170222114049.28456-6-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/i915_gem.c | 12 ++++++------ drivers/gpu/drm/i915/i915_gem_clflush.c | 2 +- drivers/gpu/drm/i915/intel_display.c | 2 +- drivers/gpu/drm/i915/intel_frontbuffer.c | 3 +-- drivers/gpu/drm/i915/intel_frontbuffer.h | 8 ++------ 5 files changed, 11 insertions(+), 16 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index fad0f5adb970..e29b6f0dda29 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -628,7 +628,7 @@ i915_gem_phys_pwrite(struct drm_i915_gem_object *obj, drm_clflush_virt_range(vaddr, args->size); i915_gem_chipset_flush(to_i915(obj->base.dev)); - intel_fb_obj_flush(obj, false, ORIGIN_CPU); + intel_fb_obj_flush(obj, ORIGIN_CPU); return 0; } @@ -1275,7 +1275,7 @@ i915_gem_gtt_pwrite_fast(struct drm_i915_gem_object *obj, user_data += page_length; offset += page_length; } - intel_fb_obj_flush(obj, false, ORIGIN_CPU); + intel_fb_obj_flush(obj, ORIGIN_CPU); mutex_lock(&i915->drm.struct_mutex); out_unpin: @@ -1411,7 +1411,7 @@ i915_gem_shmem_pwrite(struct drm_i915_gem_object *obj, offset = 0; } - intel_fb_obj_flush(obj, false, ORIGIN_CPU); + intel_fb_obj_flush(obj, ORIGIN_CPU); i915_gem_obj_finish_shmem_access(obj); return ret; } @@ -3162,7 +3162,7 @@ i915_gem_object_flush_gtt_write_domain(struct drm_i915_gem_object *obj) if (INTEL_GEN(dev_priv) >= 6 && !HAS_LLC(dev_priv)) POSTING_READ(RING_ACTHD(dev_priv->engine[RCS]->mmio_base)); - intel_fb_obj_flush(obj, false, write_origin(obj, I915_GEM_DOMAIN_GTT)); + intel_fb_obj_flush(obj, write_origin(obj, I915_GEM_DOMAIN_GTT)); obj->base.write_domain = 0; } @@ -3552,7 +3552,7 @@ i915_gem_object_pin_to_display_plane(struct drm_i915_gem_object *obj, /* Treat this as an end-of-frame, like intel_user_framebuffer_dirty() */ __i915_gem_object_flush_for_display(obj); - intel_fb_obj_flush(obj, false, ORIGIN_DIRTYFB); + intel_fb_obj_flush(obj, ORIGIN_DIRTYFB); /* It should now be out of any other write domains, and we can update * the domain values for our changes. @@ -3953,7 +3953,7 @@ frontbuffer_retire(struct i915_gem_active *active, struct drm_i915_gem_object *obj = container_of(active, typeof(*obj), frontbuffer_write); - intel_fb_obj_flush(obj, true, ORIGIN_CS); + intel_fb_obj_flush(obj, ORIGIN_CS); } void i915_gem_object_init(struct drm_i915_gem_object *obj, diff --git a/drivers/gpu/drm/i915/i915_gem_clflush.c b/drivers/gpu/drm/i915/i915_gem_clflush.c index c5fee02f3173..d925fb582ba7 100644 --- a/drivers/gpu/drm/i915/i915_gem_clflush.c +++ b/drivers/gpu/drm/i915/i915_gem_clflush.c @@ -74,7 +74,7 @@ static void __i915_do_clflush(struct drm_i915_gem_object *obj) drm_clflush_sg(obj->mm.pages); obj->cache_dirty = false; - intel_fb_obj_flush(obj, false, ORIGIN_CPU); + intel_fb_obj_flush(obj, ORIGIN_CPU); } static void i915_clflush_work(struct work_struct *work) diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c index 2806d6b0570d..33bde1751679 100644 --- a/drivers/gpu/drm/i915/intel_display.c +++ b/drivers/gpu/drm/i915/intel_display.c @@ -14285,7 +14285,7 @@ static int intel_user_framebuffer_dirty(struct drm_framebuffer *fb, struct drm_i915_gem_object *obj = intel_fb_obj(fb); i915_gem_object_flush_if_display(obj); - intel_fb_obj_flush(obj, false, ORIGIN_DIRTYFB); + intel_fb_obj_flush(obj, ORIGIN_DIRTYFB); return 0; } diff --git a/drivers/gpu/drm/i915/intel_frontbuffer.c b/drivers/gpu/drm/i915/intel_frontbuffer.c index 966de4c7c7a2..fcfc217e754e 100644 --- a/drivers/gpu/drm/i915/intel_frontbuffer.c +++ b/drivers/gpu/drm/i915/intel_frontbuffer.c @@ -114,13 +114,12 @@ static void intel_frontbuffer_flush(struct drm_i915_private *dev_priv, } void __intel_fb_obj_flush(struct drm_i915_gem_object *obj, - bool retire, enum fb_op_origin origin, unsigned int frontbuffer_bits) { struct drm_i915_private *dev_priv = to_i915(obj->base.dev); - if (retire) { + if (origin == ORIGIN_CS) { spin_lock(&dev_priv->fb_tracking.lock); /* Filter out new bits since rendering started. */ frontbuffer_bits &= dev_priv->fb_tracking.busy_bits; diff --git a/drivers/gpu/drm/i915/intel_frontbuffer.h b/drivers/gpu/drm/i915/intel_frontbuffer.h index 7bab41218cf7..63cd9a753a72 100644 --- a/drivers/gpu/drm/i915/intel_frontbuffer.h +++ b/drivers/gpu/drm/i915/intel_frontbuffer.h @@ -38,7 +38,6 @@ void __intel_fb_obj_invalidate(struct drm_i915_gem_object *obj, enum fb_op_origin origin, unsigned int frontbuffer_bits); void __intel_fb_obj_flush(struct drm_i915_gem_object *obj, - bool retire, enum fb_op_origin origin, unsigned int frontbuffer_bits); @@ -69,15 +68,12 @@ static inline bool intel_fb_obj_invalidate(struct drm_i915_gem_object *obj, /** * intel_fb_obj_flush - flush frontbuffer object * @obj: GEM object to flush - * @retire: set when retiring asynchronous rendering * @origin: which operation caused the flush * * This function gets called every time rendering on the given object has - * completed and frontbuffer caching can be started again. If @retire is true - * then any delayed flushes will be unblocked. + * completed and frontbuffer caching can be started again. */ static inline void intel_fb_obj_flush(struct drm_i915_gem_object *obj, - bool retire, enum fb_op_origin origin) { unsigned int frontbuffer_bits; @@ -86,7 +82,7 @@ static inline void intel_fb_obj_flush(struct drm_i915_gem_object *obj, if (!frontbuffer_bits) return; - __intel_fb_obj_flush(obj, retire, origin, frontbuffer_bits); + __intel_fb_obj_flush(obj, origin, frontbuffer_bits); } #endif /* __INTEL_FRONTBUFFER_H__ */ -- cgit From 7ee686034b8b008f19bfd913473acffc1ad51735 Mon Sep 17 00:00:00 2001 From: Lyude Date: Wed, 22 Feb 2017 22:31:02 -0500 Subject: drm/i915/dp: Ratelimit DP aux timeout messages Right now this is just leaving a lot of spam in dmesg that makes real issues more difficult to debug. As well (as noted by the comment right above the DRM_DEBUG_KMS() call) this is normal behavior when there's nothing connected to the DisplayPort connector. Signed-off-by: Lyude --- drivers/gpu/drm/i915/intel_dp.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/i915/intel_dp.c b/drivers/gpu/drm/i915/intel_dp.c index 024798a9c016..fca0fcade2d6 100644 --- a/drivers/gpu/drm/i915/intel_dp.c +++ b/drivers/gpu/drm/i915/intel_dp.c @@ -1121,7 +1121,8 @@ done: /* Timeouts occur when the device isn't connected, so they're * "normal" -- don't fill the kernel log with these */ if (status & DP_AUX_CH_CTL_TIME_OUT_ERROR) { - DRM_DEBUG_KMS("dp_aux_ch timeout status 0x%08x\n", status); + DRM_DEBUG_KMS_RATELIMITED("dp_aux_ch timeout status 0x%08x\n", + status); ret = -ETIMEDOUT; goto out; } -- cgit From 4509276ee824bb967885c095c610767e42345c36 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Mon, 20 Feb 2017 12:47:18 +0000 Subject: drm/i915: Remove Braswell GGTT update w/a Testing with concurrent GGTT accesses no longer show the coherency problems from yonder, commit 5bab6f60cb4d ("drm/i915: Serialise updates to GGTT with access through GGTT on Braswell"). My presumption is that the root cause was more likely fixed by commit 3b5724d702ef ("drm/i915: Wait for writes through the GTT to land before reading back"), along with the use of WC updates to the global gTT in commit 8448661d65f6 ("drm/i915: Convert clflushed pagetables over to WC maps". Given that the original symptoms can no longer be reproduced, time to remove the workaround. Testcase: igt/gem_concurrent_blit Signed-off-by: Chris Wilson Cc: Daniel Vetter Link: http://patchwork.freedesktop.org/patch/msgid/20170220124718.14796-1-chris@chris-wilson.co.uk Reviewed-by: Joonas Lahtinen --- drivers/gpu/drm/i915/i915_gem_gtt.c | 28 ---------------------------- 1 file changed, 28 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c b/drivers/gpu/drm/i915/i915_gem_gtt.c index a5162cbc0ea0..24fa262c2b61 100644 --- a/drivers/gpu/drm/i915/i915_gem_gtt.c +++ b/drivers/gpu/drm/i915/i915_gem_gtt.c @@ -2085,32 +2085,6 @@ static void gen8_ggtt_insert_entries(struct i915_address_space *vm, ggtt->invalidate(vm->i915); } -struct insert_entries { - struct i915_address_space *vm; - struct sg_table *st; - u64 start; - enum i915_cache_level level; - u32 flags; -}; - -static int gen8_ggtt_insert_entries__cb(void *_arg) -{ - struct insert_entries *arg = _arg; - gen8_ggtt_insert_entries(arg->vm, arg->st, - arg->start, arg->level, arg->flags); - return 0; -} - -static void gen8_ggtt_insert_entries__BKL(struct i915_address_space *vm, - struct sg_table *st, - u64 start, - enum i915_cache_level level, - u32 flags) -{ - struct insert_entries arg = { vm, st, start, level, flags }; - stop_machine(gen8_ggtt_insert_entries__cb, &arg, NULL); -} - static void gen6_ggtt_insert_page(struct i915_address_space *vm, dma_addr_t addr, u64 offset, @@ -2762,8 +2736,6 @@ static int gen8_gmch_probe(struct i915_ggtt *ggtt) ggtt->base.clear_range = gen8_ggtt_clear_range; ggtt->base.insert_entries = gen8_ggtt_insert_entries; - if (IS_CHERRYVIEW(dev_priv)) - ggtt->base.insert_entries = gen8_ggtt_insert_entries__BKL; ggtt->invalidate = gen6_ggtt_invalidate; -- cgit From 19c3164db457e0fc65d4501fd354506228576241 Mon Sep 17 00:00:00 2001 From: Ander Conselvan de Oliveira Date: Thu, 23 Feb 2017 09:15:57 +0200 Subject: drm/i915/glk: Fix watermark computations for third sprite plane MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Geminilake has a third sprite plane (or fourth universal plane) that is independent from the cursor. Make sure that for_each_plane_id_on_crtc() is aware of that extra plane so that the watermark code takes it into account. Fixes: e9c9882556fc ("drm/i915/glk: Configure number of sprite planes properly") Cc: Ander Conselvan de Oliveira Cc: Rodrigo Vivi Cc: Daniel Vetter Cc: Jani Nikula Cc: intel-gfx@lists.freedesktop.org Cc: Signed-off-by: Ander Conselvan de Oliveira Reviewed-by: Ville Syrjälä Link: http://patchwork.freedesktop.org/patch/msgid/20170223071600.14356-2-ander.conselvan.de.oliveira@intel.com --- drivers/gpu/drm/i915/i915_drv.h | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index 70fc2fa0eaa8..e89f56476759 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -293,6 +293,7 @@ enum plane_id { PLANE_PRIMARY, PLANE_SPRITE0, PLANE_SPRITE1, + PLANE_SPRITE2, PLANE_CURSOR, I915_MAX_PLANES, }; -- cgit From 5b7280f03cdcfcc8af7f087771bb3e49c4caa81b Mon Sep 17 00:00:00 2001 From: Ander Conselvan de Oliveira Date: Thu, 23 Feb 2017 09:15:58 +0200 Subject: drm/i915/glk: Fix maximum scaling factor for Geminilake scalers MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Geminilake can output two pixels per clock, and that affects the maximum scaling factor for its scalers. Take that into account and avoid the following warning: WARNING: CPU: 1 PID: 593 at drivers/gpu/drm/i915/intel_display.c:13223 skl_max_scale.part.129+0x78/0x80 [i915] WARN_ON_ONCE(!crtc_clock || cdclk < crtc_clock) Modules linked in: x86_pkg_temp_thermal i915 coretemp kvm_intel kvm i2c_algo_bit drm_kms_helper irqbypass crct10dif_pclmul prime_numbers crc32_pclmul drm ghash_clmulni_intel shpchp tpm_tis tpm_tis_core tpm nfsd authw CPU: 1 PID: 593 Comm: kworker/u8:3 Tainted: G W 4.10.0-rc8ander+ #330 Hardware name: Intel Corp. Geminilake/GLK RVP1 DDR4 (05), BIOS GELKRVPA.X64.0035.B33.1702150552 02/15/2017 Workqueue: events_unbound async_run_entry_fn Call Trace: dump_stack+0x86/0xc3 __warn+0xcb/0xf0 warn_slowpath_fmt+0x5f/0x80 skl_max_scale.part.129+0x78/0x80 [i915] intel_check_primary_plane+0xa6/0xc0 [i915] intel_plane_atomic_check_with_state+0xd1/0x1a0 [i915] ? drm_printk+0xb5/0xc0 [drm] intel_plane_atomic_check+0x3d/0x80 [i915] drm_atomic_helper_check_planes+0x7c/0x200 [drm_kms_helper] intel_atomic_check+0xa5b/0x11a0 [i915] drm_atomic_check_only+0x353/0x600 [drm] ? drm_atomic_add_affected_connectors+0x10c/0x120 [drm] drm_atomic_commit+0x18/0x50 [drm] restore_fbdev_mode+0x14c/0x2a0 [drm_kms_helper] drm_fb_helper_restore_fbdev_mode_unlocked+0x34/0x80 [drm_kms_helper] drm_fb_helper_set_par+0x2d/0x60 [drm_kms_helper] intel_fbdev_set_par+0x1a/0x70 [i915] fbcon_init+0x582/0x610 visual_init+0xd6/0x130 do_bind_con_driver+0x1da/0x3c0 do_take_over_console+0x116/0x180 do_fbcon_takeover+0x5c/0xb0 fbcon_event_notify+0x772/0x8a0 ? __blocking_notifier_call_chain+0x35/0x70 notifier_call_chain+0x4a/0x70 __blocking_notifier_call_chain+0x4d/0x70 blocking_notifier_call_chain+0x16/0x20 fb_notifier_call_chain+0x1b/0x20 register_framebuffer+0x278/0x360 drm_fb_helper_initial_config+0x253/0x440 [drm_kms_helper] intel_fbdev_initial_config+0x18/0x30 [i915] async_run_entry_fn+0x39/0x170 process_one_work+0x212/0x670 ? process_one_work+0x197/0x670 worker_thread+0x4e/0x490 kthread+0x101/0x140 ? process_one_work+0x670/0x670 ? kthread_create_on_node+0x60/0x60 ret_from_fork+0x31/0x40 v2: s/max_pixclk/max_dotclk/ (Ville) Cc: Rodrigo Vivi Signed-off-by: Ander Conselvan de Oliveira Reviewed-by: Ville Syrjälä Link: http://patchwork.freedesktop.org/patch/msgid/20170223071600.14356-3-ander.conselvan.de.oliveira@intel.com --- drivers/gpu/drm/i915/intel_display.c | 15 +++++++++++---- 1 file changed, 11 insertions(+), 4 deletions(-) diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c index 33bde1751679..1e0280e5c9c7 100644 --- a/drivers/gpu/drm/i915/intel_display.c +++ b/drivers/gpu/drm/i915/intel_display.c @@ -13291,16 +13291,22 @@ intel_cleanup_plane_fb(struct drm_plane *plane, int skl_max_scale(struct intel_crtc *intel_crtc, struct intel_crtc_state *crtc_state) { + struct drm_i915_private *dev_priv; int max_scale; - int crtc_clock, cdclk; + int crtc_clock, max_dotclk; if (!intel_crtc || !crtc_state->base.enable) return DRM_PLANE_HELPER_NO_SCALING; + dev_priv = to_i915(intel_crtc->base.dev); + crtc_clock = crtc_state->base.adjusted_mode.crtc_clock; - cdclk = to_intel_atomic_state(crtc_state->base.state)->cdclk.logical.cdclk; + max_dotclk = to_intel_atomic_state(crtc_state->base.state)->cdclk.logical.cdclk; + + if (IS_GEMINILAKE(dev_priv)) + max_dotclk *= 2; - if (WARN_ON_ONCE(!crtc_clock || cdclk < crtc_clock)) + if (WARN_ON_ONCE(!crtc_clock || max_dotclk < crtc_clock)) return DRM_PLANE_HELPER_NO_SCALING; /* @@ -13309,7 +13315,8 @@ skl_max_scale(struct intel_crtc *intel_crtc, struct intel_crtc_state *crtc_state * or * cdclk/crtc_clock */ - max_scale = min((1 << 16) * 3 - 1, (1 << 8) * ((cdclk << 8) / crtc_clock)); + max_scale = min((1 << 16) * 3 - 1, + (1 << 8) * ((max_dotclk << 8) / crtc_clock)); return max_scale; } -- cgit From 6ebc69238df36bc61380a68f607a1c2039af134f Mon Sep 17 00:00:00 2001 From: Ander Conselvan de Oliveira Date: Thu, 23 Feb 2017 09:15:59 +0200 Subject: drm/i915/glk: Pass dev_priv to intel_atomic_setup_scalers() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Pass dev_priv to intel_atomic_setup_scalers(). The next patch will need a dev_priv pointer. Cc: Ville Syrjälä Signed-off-by: Ander Conselvan de Oliveira Reviewed-by: Ville Syrjälä Link: http://patchwork.freedesktop.org/patch/msgid/20170223071600.14356-4-ander.conselvan.de.oliveira@intel.com --- drivers/gpu/drm/i915/intel_atomic.c | 10 +++++----- drivers/gpu/drm/i915/intel_display.c | 2 +- drivers/gpu/drm/i915/intel_drv.h | 6 +++--- 3 files changed, 9 insertions(+), 9 deletions(-) diff --git a/drivers/gpu/drm/i915/intel_atomic.c b/drivers/gpu/drm/i915/intel_atomic.c index aa9160e7f1d8..58916e32c6c4 100644 --- a/drivers/gpu/drm/i915/intel_atomic.c +++ b/drivers/gpu/drm/i915/intel_atomic.c @@ -121,7 +121,7 @@ intel_crtc_destroy_state(struct drm_crtc *crtc, /** * intel_atomic_setup_scalers() - setup scalers for crtc per staged requests - * @dev: DRM device + * @dev_priv: i915 device * @crtc: intel crtc * @crtc_state: incoming crtc_state to validate and setup scalers * @@ -136,9 +136,9 @@ intel_crtc_destroy_state(struct drm_crtc *crtc, * 0 - scalers were setup succesfully * error code - otherwise */ -int intel_atomic_setup_scalers(struct drm_device *dev, - struct intel_crtc *intel_crtc, - struct intel_crtc_state *crtc_state) +int intel_atomic_setup_scalers(struct drm_i915_private *dev_priv, + struct intel_crtc *intel_crtc, + struct intel_crtc_state *crtc_state) { struct drm_plane *plane = NULL; struct intel_plane *intel_plane; @@ -199,7 +199,7 @@ int intel_atomic_setup_scalers(struct drm_device *dev, */ if (!plane) { struct drm_plane_state *state; - plane = drm_plane_from_index(dev, i); + plane = drm_plane_from_index(&dev_priv->drm, i); state = drm_atomic_get_plane_state(drm_state, plane); if (IS_ERR(state)) { DRM_DEBUG_KMS("Failed to add [PLANE:%d] to drm_state\n", diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c index 1e0280e5c9c7..5a91f6741fed 100644 --- a/drivers/gpu/drm/i915/intel_display.c +++ b/drivers/gpu/drm/i915/intel_display.c @@ -11036,7 +11036,7 @@ static int intel_crtc_atomic_check(struct drm_crtc *crtc, ret = skl_update_scaler_crtc(pipe_config); if (!ret) - ret = intel_atomic_setup_scalers(dev, intel_crtc, + ret = intel_atomic_setup_scalers(dev_priv, intel_crtc, pipe_config); } diff --git a/drivers/gpu/drm/i915/intel_drv.h b/drivers/gpu/drm/i915/intel_drv.h index 11f2e4163bd4..9b5fcc1423eb 100644 --- a/drivers/gpu/drm/i915/intel_drv.h +++ b/drivers/gpu/drm/i915/intel_drv.h @@ -1894,9 +1894,9 @@ intel_atomic_get_existing_plane_state(struct drm_atomic_state *state, return to_intel_plane_state(plane_state); } -int intel_atomic_setup_scalers(struct drm_device *dev, - struct intel_crtc *intel_crtc, - struct intel_crtc_state *crtc_state); +int intel_atomic_setup_scalers(struct drm_i915_private *dev_priv, + struct intel_crtc *intel_crtc, + struct intel_crtc_state *crtc_state); /* intel_atomic_plane.c */ struct intel_plane_state *intel_create_plane_state(struct drm_plane *plane); -- cgit From 08f5ba97aa9c975e8b3113620123c25998348dc4 Mon Sep 17 00:00:00 2001 From: Ander Conselvan de Oliveira Date: Thu, 23 Feb 2017 09:16:00 +0200 Subject: drm/i915/glk: Fix Geminilake scalers mode programming MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Geminilake scalers can do 7x7 filtering for all supported input sizes, so it doesn't need the "high quality" mode programming, which was actually removed from that platform. v2: Split dev_priv parameter change out. (Ville) Cc: Ville Syrjälä , Signed-off-by: Ander Conselvan de Oliveira Reviewed-by: Ville Syrjälä Link: http://patchwork.freedesktop.org/patch/msgid/20170223071600.14356-5-ander.conselvan.de.oliveira@intel.com --- drivers/gpu/drm/i915/intel_atomic.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/i915/intel_atomic.c b/drivers/gpu/drm/i915/intel_atomic.c index 58916e32c6c4..e65818e5d2ab 100644 --- a/drivers/gpu/drm/i915/intel_atomic.c +++ b/drivers/gpu/drm/i915/intel_atomic.c @@ -247,7 +247,9 @@ int intel_atomic_setup_scalers(struct drm_i915_private *dev_priv, } /* set scaler mode */ - if (num_scalers_need == 1 && intel_crtc->pipe != PIPE_C) { + if (IS_GEMINILAKE(dev_priv)) { + scaler_state->scalers[*scaler_id].mode = 0; + } else if (num_scalers_need == 1 && intel_crtc->pipe != PIPE_C) { /* * when only 1 scaler is in use on either pipe A or B, * scaler 0 operates in high quality (HQ) mode. -- cgit From a5570fe5c2ca012f92666eb790973827554e19df Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Thu, 23 Feb 2017 11:51:02 +0000 Subject: Revert "drm/i915/dp: Ratelimit DP aux timeout messages" MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This reverts commit 7ee686034b8b "drm/i915/dp: Ratelimit DP aux timeout messages" as although it successfully squelches the debug messages, when it does so it generates a warning instead. CI lights up orange with all the warnings! In its current incarnation DRM_DEBUG_RATELIMITED is not usable for us, and we need to first teach lib/ratelimit.c not to warn when used for debug messages. Fixes: 7ee686034b8b ("drm/i915/dp: Ratelimit DP aux timeout messages") Signed-off-by: Chris Wilson Cc: Lyude Cc: Daniel Vetter Cc: Jani Nikula Cc: intel-gfx@lists.freedesktop.org Link: http://patchwork.freedesktop.org/patch/msgid/20170223115102.7059-1-chris@chris-wilson.co.uk Acked-by: Ville Syrjälä --- drivers/gpu/drm/i915/intel_dp.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/drivers/gpu/drm/i915/intel_dp.c b/drivers/gpu/drm/i915/intel_dp.c index fca0fcade2d6..024798a9c016 100644 --- a/drivers/gpu/drm/i915/intel_dp.c +++ b/drivers/gpu/drm/i915/intel_dp.c @@ -1121,8 +1121,7 @@ done: /* Timeouts occur when the device isn't connected, so they're * "normal" -- don't fill the kernel log with these */ if (status & DP_AUX_CH_CTL_TIME_OUT_ERROR) { - DRM_DEBUG_KMS_RATELIMITED("dp_aux_ch timeout status 0x%08x\n", - status); + DRM_DEBUG_KMS("dp_aux_ch timeout status 0x%08x\n", status); ret = -ETIMEDOUT; goto out; } -- cgit From 309663ab7b4f0de1540aff212fd067e3dd92acf3 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Thu, 23 Feb 2017 07:44:07 +0000 Subject: drm/i915: Check against the signaled bit for fences/requests When dma_fence_signal() is called, it sets a flag to indicate the fence is complete. Before the dma_fence is signaled, the seqno check will first be passed. During an unlocked check (such as inside a waiter), it is possible for the fence to be signaled even though the seqno has been reset (by engine wraparound). In this case the waiter will be kicked, but for an extra layer of protection we can check the persistent signaled bit from the fence. Signed-off-by: Chris Wilson Cc: Tvrtko Ursulin Reviewed-by: Tvrtko Ursulin Link: http://patchwork.freedesktop.org/patch/msgid/20170223074422.4125-2-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/i915_drv.h | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index e89f56476759..9220a28fbaa8 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -4013,6 +4013,15 @@ __i915_request_irq_complete(struct drm_i915_gem_request *req) { struct intel_engine_cs *engine = req->engine; + /* Note that the engine may have wrapped around the seqno, and + * so our request->global_seqno will be ahead of the hardware, + * even though it completed the request before wrapping. We catch + * this by kicking all the waiters before resetting the seqno + * in hardware, and also signal the fence. + */ + if (test_bit(DMA_FENCE_FLAG_SIGNALED_BIT, &req->fence.flags)) + return true; + /* Before we do the heavier coherent read of the seqno, * check the value (hopefully) in the CPU cacheline. */ -- cgit From 9b6586ae9f6bf2115cf414d6004bbe69a4255692 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Thu, 23 Feb 2017 07:44:08 +0000 Subject: drm/i915: Keep a global seqno per-engine Replace the global device seqno with one for each engine, and account for in-flight seqno on each separately. This is consistent with dma-fence as each timeline has separate fence-contexts for each engine and a seqno is only ordered within a fence-context (i.e. seqno do not need to be ordered wrt to other engines, just ordered within a single engine). This is required to enable request rewinding for preemption on individual engines (we have to rewind the global seqno to avoid overflow, and we do not have to rewind all engines just to preempt one.) v2: Rename active_seqno to inflight_seqnos to more clearly indicate that it is a counter and not equivalent to the existing seqno. Update functions that operated on active_seqno similarly. Signed-off-by: Chris Wilson Reviewed-by: Joonas Lahtinen Reviewed-by: Tvrtko Ursulin Link: http://patchwork.freedesktop.org/patch/msgid/20170223074422.4125-3-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/i915_debugfs.c | 11 +---- drivers/gpu/drm/i915/i915_gem_request.c | 83 +++++++++++++++++--------------- drivers/gpu/drm/i915/i915_gem_request.h | 8 +-- drivers/gpu/drm/i915/i915_gem_timeline.h | 9 +++- drivers/gpu/drm/i915/intel_breadcrumbs.c | 33 ++++++------- drivers/gpu/drm/i915/intel_engine_cs.c | 2 - drivers/gpu/drm/i915/intel_ringbuffer.h | 4 +- 7 files changed, 70 insertions(+), 80 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c index 655e60d609c2..1a28b5279bec 100644 --- a/drivers/gpu/drm/i915/i915_debugfs.c +++ b/drivers/gpu/drm/i915/i915_debugfs.c @@ -1079,15 +1079,6 @@ static const struct file_operations i915_error_state_fops = { }; #endif -static int -i915_next_seqno_get(void *data, u64 *val) -{ - struct drm_i915_private *dev_priv = data; - - *val = 1 + atomic_read(&dev_priv->gt.global_timeline.seqno); - return 0; -} - static int i915_next_seqno_set(void *data, u64 val) { @@ -1106,7 +1097,7 @@ i915_next_seqno_set(void *data, u64 val) } DEFINE_SIMPLE_ATTRIBUTE(i915_next_seqno_fops, - i915_next_seqno_get, i915_next_seqno_set, + NULL, i915_next_seqno_set, "0x%llx\n"); static int i915_frequency_info(struct seq_file *m, void *unused) diff --git a/drivers/gpu/drm/i915/i915_gem_request.c b/drivers/gpu/drm/i915/i915_gem_request.c index 12b06e055490..b94ae6cf9837 100644 --- a/drivers/gpu/drm/i915/i915_gem_request.c +++ b/drivers/gpu/drm/i915/i915_gem_request.c @@ -198,6 +198,12 @@ i915_priotree_init(struct i915_priotree *pt) pt->priority = INT_MIN; } +static void unreserve_seqno(struct intel_engine_cs *engine) +{ + GEM_BUG_ON(!engine->timeline->inflight_seqnos); + engine->timeline->inflight_seqnos--; +} + void i915_gem_retire_noop(struct i915_gem_active *active, struct drm_i915_gem_request *request) { @@ -237,6 +243,7 @@ static void i915_gem_request_retire(struct drm_i915_gem_request *request) &request->i915->gt.idle_work, msecs_to_jiffies(100)); } + unreserve_seqno(request->engine); /* Walk through the active list, calling retire on each. This allows * objects to track their GPU activity and mark themselves as idle @@ -307,7 +314,7 @@ void i915_gem_request_retire_upto(struct drm_i915_gem_request *req) } while (tmp != req); } -static int i915_gem_init_global_seqno(struct drm_i915_private *i915, u32 seqno) +static int reset_all_global_seqno(struct drm_i915_private *i915, u32 seqno) { struct i915_gem_timeline *timeline = &i915->gt.global_timeline; struct intel_engine_cs *engine; @@ -325,15 +332,19 @@ static int i915_gem_init_global_seqno(struct drm_i915_private *i915, u32 seqno) GEM_BUG_ON(i915->gt.active_requests > 1); /* If the seqno wraps around, we need to clear the breadcrumb rbtree */ - if (!i915_seqno_passed(seqno, atomic_read(&timeline->seqno))) { - while (intel_breadcrumbs_busy(i915)) - cond_resched(); /* spin until threads are complete */ - } - atomic_set(&timeline->seqno, seqno); + for_each_engine(engine, i915, id) { + struct intel_timeline *tl = &timeline->engine[id]; - /* Finally reset hw state */ - for_each_engine(engine, i915, id) + if (!i915_seqno_passed(seqno, tl->seqno)) { + /* spin until threads are complete */ + while (intel_breadcrumbs_busy(engine)) + cond_resched(); + } + + /* Finally reset hw state */ + tl->seqno = seqno; intel_engine_init_global_seqno(engine, seqno); + } list_for_each_entry(timeline, &i915->gt.timelines, link) { for_each_engine(engine, i915, id) { @@ -358,37 +369,38 @@ int i915_gem_set_global_seqno(struct drm_device *dev, u32 seqno) /* HWS page needs to be set less than what we * will inject to ring */ - return i915_gem_init_global_seqno(dev_priv, seqno - 1); + return reset_all_global_seqno(dev_priv, seqno - 1); } -static int reserve_global_seqno(struct drm_i915_private *i915) +static int reserve_seqno(struct intel_engine_cs *engine) { - u32 active_requests = ++i915->gt.active_requests; - u32 seqno = atomic_read(&i915->gt.global_timeline.seqno); + u32 active = ++engine->timeline->inflight_seqnos; + u32 seqno = engine->timeline->seqno; int ret; /* Reservation is fine until we need to wrap around */ - if (likely(seqno + active_requests > seqno)) + if (likely(!add_overflows(seqno, active))) return 0; - ret = i915_gem_init_global_seqno(i915, 0); + /* Even though we are tracking inflight seqno individually on each + * engine, other engines may be observing us using hw semaphores and + * so we need to idle all engines before wrapping around this engine. + * As all engines are then idle, we can reset the seqno on all, so + * we don't stall in quick succession if each engine is being + * similarly utilized. + */ + ret = reset_all_global_seqno(engine->i915, 0); if (ret) { - i915->gt.active_requests--; + engine->timeline->inflight_seqnos--; return ret; } return 0; } -static u32 __timeline_get_seqno(struct i915_gem_timeline *tl) -{ - /* seqno only incremented under a mutex */ - return ++tl->seqno.counter; -} - -static u32 timeline_get_seqno(struct i915_gem_timeline *tl) +static u32 timeline_get_seqno(struct intel_timeline *tl) { - return atomic_inc_return(&tl->seqno); + return ++tl->seqno; } void __i915_gem_request_submit(struct drm_i915_gem_request *request) @@ -402,14 +414,10 @@ void __i915_gem_request_submit(struct drm_i915_gem_request *request) GEM_BUG_ON(timeline == request->timeline); assert_spin_locked(&timeline->lock); - seqno = timeline_get_seqno(timeline->common); + seqno = timeline_get_seqno(timeline); GEM_BUG_ON(!seqno); GEM_BUG_ON(i915_seqno_passed(intel_engine_get_seqno(engine), seqno)); - GEM_BUG_ON(i915_seqno_passed(timeline->last_submitted_seqno, seqno)); - request->previous_seqno = timeline->last_submitted_seqno; - timeline->last_submitted_seqno = seqno; - /* We may be recursing from the signal callback of another i915 fence */ spin_lock_nested(&request->lock, SINGLE_DEPTH_NESTING); request->global_seqno = seqno; @@ -516,7 +524,7 @@ i915_gem_request_alloc(struct intel_engine_cs *engine, if (ret) return ERR_PTR(ret); - ret = reserve_global_seqno(dev_priv); + ret = reserve_seqno(engine); if (ret) goto err_unpin; @@ -568,7 +576,7 @@ i915_gem_request_alloc(struct intel_engine_cs *engine, &i915_fence_ops, &req->lock, req->timeline->fence_context, - __timeline_get_seqno(req->timeline->common)); + timeline_get_seqno(req->timeline)); /* We bump the ref for the fence chain */ i915_sw_fence_init(&i915_gem_request_get(req)->submit, submit_notify); @@ -613,6 +621,8 @@ i915_gem_request_alloc(struct intel_engine_cs *engine, */ req->head = req->ring->tail; + /* Check that we didn't interrupt ourselves with a new request */ + GEM_BUG_ON(req->timeline->seqno != req->fence.seqno); return req; err_ctx: @@ -623,7 +633,7 @@ err_ctx: kmem_cache_free(dev_priv->requests, req); err_unreserve: - dev_priv->gt.active_requests--; + unreserve_seqno(engine); err_unpin: engine->context_unpin(engine, ctx); return ERR_PTR(ret); @@ -836,8 +846,7 @@ void __i915_add_request(struct drm_i915_gem_request *request, bool flush_caches) * our i915_gem_request_alloc() and called __i915_add_request() before * us, the timeline will hold its seqno which is later than ours. */ - GEM_BUG_ON(i915_seqno_passed(timeline->last_submitted_seqno, - request->fence.seqno)); + GEM_BUG_ON(timeline->seqno != request->fence.seqno); /* * To ensure that this call will not fail, space for its emissions @@ -891,16 +900,14 @@ void __i915_add_request(struct drm_i915_gem_request *request, bool flush_caches) list_add_tail(&request->link, &timeline->requests); spin_unlock_irq(&timeline->lock); - GEM_BUG_ON(i915_seqno_passed(timeline->last_submitted_seqno, - request->fence.seqno)); - - timeline->last_submitted_seqno = request->fence.seqno; + GEM_BUG_ON(timeline->seqno != request->fence.seqno); i915_gem_active_set(&timeline->last_request, request); list_add_tail(&request->ring_link, &ring->request_list); request->emitted_jiffies = jiffies; - i915_gem_mark_busy(engine); + if (!request->i915->gt.active_requests++) + i915_gem_mark_busy(engine); /* Let the backend know a new request has arrived that may need * to adjust the existing execution schedule due to a high priority diff --git a/drivers/gpu/drm/i915/i915_gem_request.h b/drivers/gpu/drm/i915/i915_gem_request.h index ea511f06efaf..9049936c571c 100644 --- a/drivers/gpu/drm/i915/i915_gem_request.h +++ b/drivers/gpu/drm/i915/i915_gem_request.h @@ -145,12 +145,6 @@ struct drm_i915_gem_request { u32 global_seqno; - /** GEM sequence number associated with the previous request, - * when the HWS breadcrumb is equal to this the GPU is processing - * this request. - */ - u32 previous_seqno; - /** Position in the ring of the start of the request */ u32 head; @@ -287,7 +281,7 @@ __i915_gem_request_started(const struct drm_i915_gem_request *req) { GEM_BUG_ON(!req->global_seqno); return i915_seqno_passed(intel_engine_get_seqno(req->engine), - req->previous_seqno); + req->global_seqno - 1); } static inline bool diff --git a/drivers/gpu/drm/i915/i915_gem_timeline.h b/drivers/gpu/drm/i915/i915_gem_timeline.h index f2e51f42cc2f..6c53e14cab2a 100644 --- a/drivers/gpu/drm/i915/i915_gem_timeline.h +++ b/drivers/gpu/drm/i915/i915_gem_timeline.h @@ -33,7 +33,13 @@ struct i915_gem_timeline; struct intel_timeline { u64 fence_context; - u32 last_submitted_seqno; + u32 seqno; + + /** + * Count of outstanding requests, from the time they are constructed + * to the moment they are retired. Loosely coupled to hardware. + */ + u32 inflight_seqnos; spinlock_t lock; @@ -56,7 +62,6 @@ struct intel_timeline { struct i915_gem_timeline { struct list_head link; - atomic_t seqno; struct drm_i915_private *i915; const char *name; diff --git a/drivers/gpu/drm/i915/intel_breadcrumbs.c b/drivers/gpu/drm/i915/intel_breadcrumbs.c index 5932e2dc0c6f..4f4e703d1b14 100644 --- a/drivers/gpu/drm/i915/intel_breadcrumbs.c +++ b/drivers/gpu/drm/i915/intel_breadcrumbs.c @@ -676,31 +676,26 @@ void intel_engine_fini_breadcrumbs(struct intel_engine_cs *engine) cancel_fake_irq(engine); } -unsigned int intel_breadcrumbs_busy(struct drm_i915_private *i915) +bool intel_breadcrumbs_busy(struct intel_engine_cs *engine) { - struct intel_engine_cs *engine; - enum intel_engine_id id; - unsigned int mask = 0; - - for_each_engine(engine, i915, id) { - struct intel_breadcrumbs *b = &engine->breadcrumbs; - - spin_lock_irq(&b->lock); + struct intel_breadcrumbs *b = &engine->breadcrumbs; + bool busy = false; - if (b->first_wait) { - wake_up_process(b->first_wait->tsk); - mask |= intel_engine_flag(engine); - } + spin_lock_irq(&b->lock); - if (b->first_signal) { - wake_up_process(b->signaler); - mask |= intel_engine_flag(engine); - } + if (b->first_wait) { + wake_up_process(b->first_wait->tsk); + busy |= intel_engine_flag(engine); + } - spin_unlock_irq(&b->lock); + if (b->first_signal) { + wake_up_process(b->signaler); + busy |= intel_engine_flag(engine); } - return mask; + spin_unlock_irq(&b->lock); + + return busy; } #if IS_ENABLED(CONFIG_DRM_I915_SELFTEST) diff --git a/drivers/gpu/drm/i915/intel_engine_cs.c b/drivers/gpu/drm/i915/intel_engine_cs.c index 46c94740be53..c4d4698f98e7 100644 --- a/drivers/gpu/drm/i915/intel_engine_cs.c +++ b/drivers/gpu/drm/i915/intel_engine_cs.c @@ -252,8 +252,6 @@ void intel_engine_init_global_seqno(struct intel_engine_cs *engine, u32 seqno) engine->irq_seqno_barrier(engine); GEM_BUG_ON(i915_gem_active_isset(&engine->timeline->last_request)); - engine->timeline->last_submitted_seqno = seqno; - engine->hangcheck.seqno = seqno; /* After manually advancing the seqno, fake the interrupt in case diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.h b/drivers/gpu/drm/i915/intel_ringbuffer.h index 4091c97be6ec..b91c2c7ef5a6 100644 --- a/drivers/gpu/drm/i915/intel_ringbuffer.h +++ b/drivers/gpu/drm/i915/intel_ringbuffer.h @@ -556,7 +556,7 @@ static inline u32 intel_engine_last_submit(struct intel_engine_cs *engine) * wtih serialising this hint with anything, so document it as * a hint and nothing more. */ - return READ_ONCE(engine->timeline->last_submitted_seqno); + return READ_ONCE(engine->timeline->seqno); } int init_workarounds_ring(struct intel_engine_cs *engine); @@ -630,7 +630,7 @@ static inline bool intel_engine_wakeup(const struct intel_engine_cs *engine) void intel_engine_reset_breadcrumbs(struct intel_engine_cs *engine); void intel_engine_fini_breadcrumbs(struct intel_engine_cs *engine); -unsigned int intel_breadcrumbs_busy(struct drm_i915_private *i915); +bool intel_breadcrumbs_busy(struct intel_engine_cs *engine); static inline u32 *gen8_emit_pipe_control(u32 *batch, u32 flags, u32 offset) { -- cgit From 12d3173b2e55b4abeb6710dbd7b823f5241c4a45 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Thu, 23 Feb 2017 07:44:09 +0000 Subject: drm/i915: Move reserve_seqno() next to unreserve_seqno() Move the companion functions next to each other. Signed-off-by: Chris Wilson Reviewed-by: Joonas Lahtinen Link: http://patchwork.freedesktop.org/patch/msgid/20170223074422.4125-4-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/i915_gem_request.c | 161 +++++++++++++++----------------- 1 file changed, 77 insertions(+), 84 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_gem_request.c b/drivers/gpu/drm/i915/i915_gem_request.c index b94ae6cf9837..f33cfc19550c 100644 --- a/drivers/gpu/drm/i915/i915_gem_request.c +++ b/drivers/gpu/drm/i915/i915_gem_request.c @@ -198,6 +198,83 @@ i915_priotree_init(struct i915_priotree *pt) pt->priority = INT_MIN; } +static int reset_all_global_seqno(struct drm_i915_private *i915, u32 seqno) +{ + struct i915_gem_timeline *timeline = &i915->gt.global_timeline; + struct intel_engine_cs *engine; + enum intel_engine_id id; + int ret; + + /* Carefully retire all requests without writing to the rings */ + ret = i915_gem_wait_for_idle(i915, + I915_WAIT_INTERRUPTIBLE | + I915_WAIT_LOCKED); + if (ret) + return ret; + + i915_gem_retire_requests(i915); + GEM_BUG_ON(i915->gt.active_requests > 1); + + /* If the seqno wraps around, we need to clear the breadcrumb rbtree */ + for_each_engine(engine, i915, id) { + struct intel_timeline *tl = &timeline->engine[id]; + + if (!i915_seqno_passed(seqno, tl->seqno)) { + /* spin until threads are complete */ + while (intel_breadcrumbs_busy(engine)) + cond_resched(); + } + + /* Finally reset hw state */ + tl->seqno = seqno; + intel_engine_init_global_seqno(engine, seqno); + } + + list_for_each_entry(timeline, &i915->gt.timelines, link) { + for_each_engine(engine, i915, id) { + struct intel_timeline *tl = &timeline->engine[id]; + + memset(tl->sync_seqno, 0, sizeof(tl->sync_seqno)); + } + } + + return 0; +} + +int i915_gem_set_global_seqno(struct drm_device *dev, u32 seqno) +{ + struct drm_i915_private *dev_priv = to_i915(dev); + + lockdep_assert_held(&dev_priv->drm.struct_mutex); + + if (seqno == 0) + return -EINVAL; + + /* HWS page needs to be set less than what we + * will inject to ring + */ + return reset_all_global_seqno(dev_priv, seqno - 1); +} + +static int reserve_seqno(struct intel_engine_cs *engine) +{ + u32 active = ++engine->timeline->inflight_seqnos; + u32 seqno = engine->timeline->seqno; + int ret; + + /* Reservation is fine until we need to wrap around */ + if (likely(!add_overflows(seqno, active))) + return 0; + + ret = reset_all_global_seqno(engine->i915, 0); + if (ret) { + engine->timeline->inflight_seqnos--; + return ret; + } + + return 0; +} + static void unreserve_seqno(struct intel_engine_cs *engine) { GEM_BUG_ON(!engine->timeline->inflight_seqnos); @@ -314,90 +391,6 @@ void i915_gem_request_retire_upto(struct drm_i915_gem_request *req) } while (tmp != req); } -static int reset_all_global_seqno(struct drm_i915_private *i915, u32 seqno) -{ - struct i915_gem_timeline *timeline = &i915->gt.global_timeline; - struct intel_engine_cs *engine; - enum intel_engine_id id; - int ret; - - /* Carefully retire all requests without writing to the rings */ - ret = i915_gem_wait_for_idle(i915, - I915_WAIT_INTERRUPTIBLE | - I915_WAIT_LOCKED); - if (ret) - return ret; - - i915_gem_retire_requests(i915); - GEM_BUG_ON(i915->gt.active_requests > 1); - - /* If the seqno wraps around, we need to clear the breadcrumb rbtree */ - for_each_engine(engine, i915, id) { - struct intel_timeline *tl = &timeline->engine[id]; - - if (!i915_seqno_passed(seqno, tl->seqno)) { - /* spin until threads are complete */ - while (intel_breadcrumbs_busy(engine)) - cond_resched(); - } - - /* Finally reset hw state */ - tl->seqno = seqno; - intel_engine_init_global_seqno(engine, seqno); - } - - list_for_each_entry(timeline, &i915->gt.timelines, link) { - for_each_engine(engine, i915, id) { - struct intel_timeline *tl = &timeline->engine[id]; - - memset(tl->sync_seqno, 0, sizeof(tl->sync_seqno)); - } - } - - return 0; -} - -int i915_gem_set_global_seqno(struct drm_device *dev, u32 seqno) -{ - struct drm_i915_private *dev_priv = to_i915(dev); - - lockdep_assert_held(&dev_priv->drm.struct_mutex); - - if (seqno == 0) - return -EINVAL; - - /* HWS page needs to be set less than what we - * will inject to ring - */ - return reset_all_global_seqno(dev_priv, seqno - 1); -} - -static int reserve_seqno(struct intel_engine_cs *engine) -{ - u32 active = ++engine->timeline->inflight_seqnos; - u32 seqno = engine->timeline->seqno; - int ret; - - /* Reservation is fine until we need to wrap around */ - if (likely(!add_overflows(seqno, active))) - return 0; - - /* Even though we are tracking inflight seqno individually on each - * engine, other engines may be observing us using hw semaphores and - * so we need to idle all engines before wrapping around this engine. - * As all engines are then idle, we can reset the seqno on all, so - * we don't stall in quick succession if each engine is being - * similarly utilized. - */ - ret = reset_all_global_seqno(engine->i915, 0); - if (ret) { - engine->timeline->inflight_seqnos--; - return ret; - } - - return 0; -} - static u32 timeline_get_seqno(struct intel_timeline *tl) { return ++tl->seqno; -- cgit From 4b36b2e506779008c302784437dc102d1ccc7537 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Thu, 23 Feb 2017 07:44:10 +0000 Subject: drm/i915: Use a local to shorten req->i915->gpu_error.wait_queue Use a local variable to avoid having to type out the full name of the gpu_error wait_queue. Signed-off-by: Chris Wilson Reviewed-by: Joonas Lahtinen Reviewed-by: Tvrtko Ursulin Link: http://patchwork.freedesktop.org/patch/msgid/20170223074422.4125-5-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/i915_gem_request.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_gem_request.c b/drivers/gpu/drm/i915/i915_gem_request.c index f33cfc19550c..a40b825763a3 100644 --- a/drivers/gpu/drm/i915/i915_gem_request.c +++ b/drivers/gpu/drm/i915/i915_gem_request.c @@ -1077,6 +1077,7 @@ long i915_wait_request(struct drm_i915_gem_request *req, { const int state = flags & I915_WAIT_INTERRUPTIBLE ? TASK_INTERRUPTIBLE : TASK_UNINTERRUPTIBLE; + wait_queue_head_t *errq = &req->i915->gpu_error.wait_queue; DEFINE_WAIT(reset); struct intel_wait wait; @@ -1112,7 +1113,7 @@ long i915_wait_request(struct drm_i915_gem_request *req, set_current_state(state); if (flags & I915_WAIT_LOCKED) - add_wait_queue(&req->i915->gpu_error.wait_queue, &reset); + add_wait_queue(errq, &reset); intel_wait_init(&wait, req->global_seqno); if (intel_engine_add_wait(req->engine, &wait)) @@ -1163,8 +1164,7 @@ wakeup: i915_reset_in_progress(&req->i915->gpu_error)) { __set_current_state(TASK_RUNNING); i915_reset(req->i915); - reset_wait_queue(&req->i915->gpu_error.wait_queue, - &reset); + reset_wait_queue(errq, &reset); continue; } @@ -1175,7 +1175,7 @@ wakeup: intel_engine_remove_wait(req->engine, &wait); if (flags & I915_WAIT_LOCKED) - remove_wait_queue(&req->i915->gpu_error.wait_queue, &reset); + remove_wait_queue(errq, &reset); __set_current_state(TASK_RUNNING); complete: -- cgit From 7de53bf7e6f3f823267f77b1fe19e6db7532000d Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Thu, 23 Feb 2017 07:44:11 +0000 Subject: drm/i915: Add ourselves to the gpu error waitqueue for the entire wait Add ourselves to the gpu error waitqueue earlier on, even before we determine we have to wait on the seqno. This is so that we can then share the waitqueue between stages in subsequent patches. Signed-off-by: Chris Wilson Reviewed-by: Joonas Lahtinen Reviewed-by: Tvrtko Ursulin Link: http://patchwork.freedesktop.org/patch/msgid/20170223074422.4125-6-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/i915_gem_request.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_gem_request.c b/drivers/gpu/drm/i915/i915_gem_request.c index a40b825763a3..80142a6027da 100644 --- a/drivers/gpu/drm/i915/i915_gem_request.c +++ b/drivers/gpu/drm/i915/i915_gem_request.c @@ -1097,6 +1097,9 @@ long i915_wait_request(struct drm_i915_gem_request *req, trace_i915_gem_request_wait_begin(req, flags); + if (flags & I915_WAIT_LOCKED) + add_wait_queue(errq, &reset); + if (!i915_sw_fence_done(&req->execute)) { timeout = __i915_request_wait_for_execute(req, flags, timeout); if (timeout < 0) @@ -1112,9 +1115,6 @@ long i915_wait_request(struct drm_i915_gem_request *req, goto complete; set_current_state(state); - if (flags & I915_WAIT_LOCKED) - add_wait_queue(errq, &reset); - intel_wait_init(&wait, req->global_seqno); if (intel_engine_add_wait(req->engine, &wait)) /* In order to check that we haven't missed the interrupt @@ -1174,11 +1174,11 @@ wakeup: } intel_engine_remove_wait(req->engine, &wait); - if (flags & I915_WAIT_LOCKED) - remove_wait_queue(errq, &reset); __set_current_state(TASK_RUNNING); complete: + if (flags & I915_WAIT_LOCKED) + remove_wait_queue(errq, &reset); trace_i915_gem_request_wait_end(req); return timeout; -- cgit From 541ca6ed79f0058f8885b2b41409204731946767 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Thu, 23 Feb 2017 07:44:12 +0000 Subject: drm/i915: Inline __i915_gem_request_wait_for_execute() It had only one callsite and existed to keep the code clearer. Now having shared the wait-on-error between phases and with plans to change the wait-for-execute in the next few patches, remove the out of line wait loop and move it into the main body of i915_wait_request. Signed-off-by: Chris Wilson Reviewed-by: Joonas Lahtinen Reviewed-by: Tvrtko Ursulin Link: http://patchwork.freedesktop.org/patch/msgid/20170223074422.4125-7-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/i915_gem_request.c | 78 ++++++++++++--------------------- 1 file changed, 29 insertions(+), 49 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_gem_request.c b/drivers/gpu/drm/i915/i915_gem_request.c index 80142a6027da..34d4c123546e 100644 --- a/drivers/gpu/drm/i915/i915_gem_request.c +++ b/drivers/gpu/drm/i915/i915_gem_request.c @@ -1004,54 +1004,6 @@ bool __i915_spin_request(const struct drm_i915_gem_request *req, return false; } -static long -__i915_request_wait_for_execute(struct drm_i915_gem_request *request, - unsigned int flags, - long timeout) -{ - const int state = flags & I915_WAIT_INTERRUPTIBLE ? - TASK_INTERRUPTIBLE : TASK_UNINTERRUPTIBLE; - wait_queue_head_t *q = &request->i915->gpu_error.wait_queue; - DEFINE_WAIT(reset); - DEFINE_WAIT(wait); - - if (flags & I915_WAIT_LOCKED) - add_wait_queue(q, &reset); - - do { - prepare_to_wait(&request->execute.wait, &wait, state); - - if (i915_sw_fence_done(&request->execute)) - break; - - if (flags & I915_WAIT_LOCKED && - i915_reset_in_progress(&request->i915->gpu_error)) { - __set_current_state(TASK_RUNNING); - i915_reset(request->i915); - reset_wait_queue(q, &reset); - continue; - } - - if (signal_pending_state(state, current)) { - timeout = -ERESTARTSYS; - break; - } - - if (!timeout) { - timeout = -ETIME; - break; - } - - timeout = io_schedule_timeout(timeout); - } while (1); - finish_wait(&request->execute.wait, &wait); - - if (flags & I915_WAIT_LOCKED) - remove_wait_queue(q, &reset); - - return timeout; -} - /** * i915_wait_request - wait until execution of request has finished * @req: the request to wait upon @@ -1101,7 +1053,35 @@ long i915_wait_request(struct drm_i915_gem_request *req, add_wait_queue(errq, &reset); if (!i915_sw_fence_done(&req->execute)) { - timeout = __i915_request_wait_for_execute(req, flags, timeout); + DEFINE_WAIT(exec); + + do { + prepare_to_wait(&req->execute.wait, &exec, state); + if (i915_sw_fence_done(&req->execute)) + break; + + if (flags & I915_WAIT_LOCKED && + i915_reset_in_progress(&req->i915->gpu_error)) { + __set_current_state(TASK_RUNNING); + i915_reset(req->i915); + reset_wait_queue(errq, &reset); + continue; + } + + if (signal_pending_state(state, current)) { + timeout = -ERESTARTSYS; + break; + } + + if (!timeout) { + timeout = -ETIME; + break; + } + + timeout = io_schedule_timeout(timeout); + } while (1); + finish_wait(&req->execute.wait, &exec); + if (timeout < 0) goto complete; -- cgit From fe49789fab974dd04c0fef26a4ce7f7f411b40dd Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Thu, 23 Feb 2017 07:44:13 +0000 Subject: drm/i915: Deconstruct execute fence On reflection, we are only using the execute fence as a waitqueue on the global_seqno and not using it for dependency tracking between fences (unlike the submit and dma fences). By only treating it as a waitqueue, we can then treat it similar to the other waitqueues during submit, making the code simpler. Signed-off-by: Chris Wilson Reviewed-by: Tvrtko Ursulin Link: http://patchwork.freedesktop.org/patch/msgid/20170223074422.4125-8-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/i915_gem_request.c | 50 ++++++++------------------------- drivers/gpu/drm/i915/i915_gem_request.h | 10 +------ 2 files changed, 13 insertions(+), 47 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_gem_request.c b/drivers/gpu/drm/i915/i915_gem_request.c index 34d4c123546e..37ef7084fce5 100644 --- a/drivers/gpu/drm/i915/i915_gem_request.c +++ b/drivers/gpu/drm/i915/i915_gem_request.c @@ -69,7 +69,6 @@ static void i915_fence_release(struct dma_fence *fence) * caught trying to reuse dead objects. */ i915_sw_fence_fini(&req->submit); - i915_sw_fence_fini(&req->execute); kmem_cache_free(req->i915->requests, req); } @@ -294,7 +293,6 @@ static void i915_gem_request_retire(struct drm_i915_gem_request *request) lockdep_assert_held(&request->i915->drm.struct_mutex); GEM_BUG_ON(!i915_sw_fence_signaled(&request->submit)); - GEM_BUG_ON(!i915_sw_fence_signaled(&request->execute)); GEM_BUG_ON(!i915_gem_request_completed(request)); GEM_BUG_ON(!request->i915->gt.active_requests); @@ -402,6 +400,8 @@ void __i915_gem_request_submit(struct drm_i915_gem_request *request) struct intel_timeline *timeline; u32 seqno; + trace_i915_gem_request_execute(request); + /* Transfer from per-context onto the global per-engine timeline */ timeline = engine->timeline; GEM_BUG_ON(timeline == request->timeline); @@ -426,8 +426,7 @@ void __i915_gem_request_submit(struct drm_i915_gem_request *request) list_move_tail(&request->link, &timeline->requests); spin_unlock(&request->timeline->lock); - i915_sw_fence_commit(&request->execute); - trace_i915_gem_request_execute(request); + wake_up_all(&request->execute); } void i915_gem_request_submit(struct drm_i915_gem_request *request) @@ -463,24 +462,6 @@ submit_notify(struct i915_sw_fence *fence, enum i915_sw_fence_notify state) return NOTIFY_DONE; } -static int __i915_sw_fence_call -execute_notify(struct i915_sw_fence *fence, enum i915_sw_fence_notify state) -{ - struct drm_i915_gem_request *request = - container_of(fence, typeof(*request), execute); - - switch (state) { - case FENCE_COMPLETE: - break; - - case FENCE_FREE: - i915_gem_request_put(request); - break; - } - - return NOTIFY_DONE; -} - /** * i915_gem_request_alloc - allocate a request structure * @@ -573,13 +554,7 @@ i915_gem_request_alloc(struct intel_engine_cs *engine, /* We bump the ref for the fence chain */ i915_sw_fence_init(&i915_gem_request_get(req)->submit, submit_notify); - i915_sw_fence_init(&i915_gem_request_get(req)->execute, execute_notify); - - /* Ensure that the execute fence completes after the submit fence - - * as we complete the execute fence from within the submit fence - * callback, its completion would otherwise be visible first. - */ - i915_sw_fence_await_sw_fence(&req->execute, &req->submit, &req->execq); + init_waitqueue_head(&req->execute); i915_priotree_init(&req->priotree); @@ -1031,6 +1006,7 @@ long i915_wait_request(struct drm_i915_gem_request *req, TASK_INTERRUPTIBLE : TASK_UNINTERRUPTIBLE; wait_queue_head_t *errq = &req->i915->gpu_error.wait_queue; DEFINE_WAIT(reset); + DEFINE_WAIT(exec); struct intel_wait wait; might_sleep(); @@ -1052,12 +1028,11 @@ long i915_wait_request(struct drm_i915_gem_request *req, if (flags & I915_WAIT_LOCKED) add_wait_queue(errq, &reset); - if (!i915_sw_fence_done(&req->execute)) { - DEFINE_WAIT(exec); - + reset_wait_queue(&req->execute, &exec); + if (!req->global_seqno) { do { - prepare_to_wait(&req->execute.wait, &exec, state); - if (i915_sw_fence_done(&req->execute)) + set_current_state(state); + if (req->global_seqno) break; if (flags & I915_WAIT_LOCKED && @@ -1080,15 +1055,14 @@ long i915_wait_request(struct drm_i915_gem_request *req, timeout = io_schedule_timeout(timeout); } while (1); - finish_wait(&req->execute.wait, &exec); + finish_wait(&req->execute, &exec); if (timeout < 0) goto complete; - GEM_BUG_ON(!i915_sw_fence_done(&req->execute)); + GEM_BUG_ON(!req->global_seqno); } - GEM_BUG_ON(!i915_sw_fence_done(&req->submit)); - GEM_BUG_ON(!req->global_seqno); + GEM_BUG_ON(!i915_sw_fence_signaled(&req->submit)); /* Optimistic short spin before touching IRQs */ if (i915_spin_request(req, state, 5)) diff --git a/drivers/gpu/drm/i915/i915_gem_request.h b/drivers/gpu/drm/i915/i915_gem_request.h index 9049936c571c..467d3e13fce0 100644 --- a/drivers/gpu/drm/i915/i915_gem_request.h +++ b/drivers/gpu/drm/i915/i915_gem_request.h @@ -119,18 +119,10 @@ struct drm_i915_gem_request { * The submit fence is used to await upon all of the request's * dependencies. When it is signaled, the request is ready to run. * It is used by the driver to then queue the request for execution. - * - * The execute fence is used to signal when the request has been - * sent to hardware. - * - * It is illegal for the submit fence of one request to wait upon the - * execute fence of an earlier request. It should be sufficient to - * wait upon the submit fence of the earlier request. */ struct i915_sw_fence submit; - struct i915_sw_fence execute; wait_queue_t submitq; - wait_queue_t execq; + wait_queue_head_t execute; /* A list of everyone we wait upon, and everyone who waits upon us. * Even though we will not be submitted to the hardware before the -- cgit From 754c9fd5764909bc7c3ba9779355d55cb357be8a Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Thu, 23 Feb 2017 07:44:14 +0000 Subject: drm/i915: Protect the request->global_seqno with the engine->timeline lock A request is assigned a global seqno only when it is on the hardware execution queue. The global seqno can be used to maintain a list of requests on the same engine in retirement order, for example for constructing a priority queue for waiting. Prior to its execution, or if it is subsequently removed in the event of preemption, its global seqno is zero. As both insertion and removal from the execution queue may operate in IRQ context, it is not guarded by the usual struct_mutex BKL. Instead those relying on the global seqno must be prepared for its value to change between reads. Only when the request is complete can the global seqno be stable (due to the memory barriers on submitting the commands to the hardware to write the breadcrumb, if the HWS shows that it has passed the global seqno and the global seqno is unchanged after the read, it is indeed complete). Signed-off-by: Chris Wilson Reviewed-by: Tvrtko Ursulin Link: http://patchwork.freedesktop.org/patch/msgid/20170223074422.4125-9-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/i915_drv.h | 17 +++++- drivers/gpu/drm/i915/i915_gem.c | 16 ++++-- drivers/gpu/drm/i915/i915_gem_request.c | 45 ++++++++++----- drivers/gpu/drm/i915/i915_gem_request.h | 66 +++++++++++++++++----- drivers/gpu/drm/i915/intel_breadcrumbs.c | 11 ++-- drivers/gpu/drm/i915/intel_ringbuffer.h | 39 ++++++++++++- drivers/gpu/drm/i915/selftests/intel_breadcrumbs.c | 6 +- 7 files changed, 155 insertions(+), 45 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index 9220a28fbaa8..e5b34eaf41ca 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -4009,9 +4009,10 @@ wait_remaining_ms_from_jiffies(unsigned long timestamp_jiffies, int to_wait_ms) } static inline bool -__i915_request_irq_complete(struct drm_i915_gem_request *req) +__i915_request_irq_complete(const struct drm_i915_gem_request *req) { struct intel_engine_cs *engine = req->engine; + u32 seqno; /* Note that the engine may have wrapped around the seqno, and * so our request->global_seqno will be ahead of the hardware, @@ -4022,10 +4023,20 @@ __i915_request_irq_complete(struct drm_i915_gem_request *req) if (test_bit(DMA_FENCE_FLAG_SIGNALED_BIT, &req->fence.flags)) return true; + /* The request was dequeued before we were awoken. We check after + * inspecting the hw to confirm that this was the same request + * that generated the HWS update. The memory barriers within + * the request execution are sufficient to ensure that a check + * after reading the value from hw matches this request. + */ + seqno = i915_gem_request_global_seqno(req); + if (!seqno) + return false; + /* Before we do the heavier coherent read of the seqno, * check the value (hopefully) in the CPU cacheline. */ - if (__i915_gem_request_completed(req)) + if (__i915_gem_request_completed(req, seqno)) return true; /* Ensure our read of the seqno is coherent so that we @@ -4076,7 +4087,7 @@ __i915_request_irq_complete(struct drm_i915_gem_request *req) wake_up_process(tsk); rcu_read_unlock(); - if (__i915_gem_request_completed(req)) + if (__i915_gem_request_completed(req, seqno)) return true; } diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index e29b6f0dda29..a8167003c10b 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -397,7 +397,7 @@ out: if (flags & I915_WAIT_LOCKED && i915_gem_request_completed(rq)) i915_gem_request_retire_upto(rq); - if (rps && rq->global_seqno == intel_engine_last_submit(rq->engine)) { + if (rps && i915_gem_request_global_seqno(rq) == intel_engine_last_submit(rq->engine)) { /* The GPU is now idle and this client has stalled. * Since no other client has submitted a request in the * meantime, assume that this client is the only one @@ -2608,7 +2608,8 @@ static void i915_gem_context_mark_innocent(struct i915_gem_context *ctx) struct drm_i915_gem_request * i915_gem_find_active_request(struct intel_engine_cs *engine) { - struct drm_i915_gem_request *request; + struct drm_i915_gem_request *request, *active = NULL; + unsigned long flags; /* We are called by the error capture and reset at a random * point in time. In particular, note that neither is crucially @@ -2618,17 +2619,22 @@ i915_gem_find_active_request(struct intel_engine_cs *engine) * extra delay for a recent interrupt is pointless. Hence, we do * not need an engine->irq_seqno_barrier() before the seqno reads. */ + spin_lock_irqsave(&engine->timeline->lock, flags); list_for_each_entry(request, &engine->timeline->requests, link) { - if (__i915_gem_request_completed(request)) + if (__i915_gem_request_completed(request, + request->global_seqno)) continue; GEM_BUG_ON(request->engine != engine); GEM_BUG_ON(test_bit(DMA_FENCE_FLAG_SIGNALED_BIT, &request->fence.flags)); - return request; + + active = request; + break; } + spin_unlock_irqrestore(&engine->timeline->lock, flags); - return NULL; + return active; } static bool engine_stalled(struct intel_engine_cs *engine) diff --git a/drivers/gpu/drm/i915/i915_gem_request.c b/drivers/gpu/drm/i915/i915_gem_request.c index 37ef7084fce5..d394cbe8de11 100644 --- a/drivers/gpu/drm/i915/i915_gem_request.c +++ b/drivers/gpu/drm/i915/i915_gem_request.c @@ -418,7 +418,6 @@ void __i915_gem_request_submit(struct drm_i915_gem_request *request) intel_engine_enable_signaling(request); spin_unlock(&request->lock); - GEM_BUG_ON(!request->global_seqno); engine->emit_breadcrumb(request, request->ring->vaddr + request->postfix); @@ -505,7 +504,7 @@ i915_gem_request_alloc(struct intel_engine_cs *engine, /* Move the oldest request to the slab-cache (if not in use!) */ req = list_first_entry_or_null(&engine->timeline->requests, typeof(*req), link); - if (req && __i915_gem_request_completed(req)) + if (req && i915_gem_request_completed(req)) i915_gem_request_retire(req); /* Beware: Dragons be flying overhead. @@ -611,6 +610,7 @@ static int i915_gem_request_await_request(struct drm_i915_gem_request *to, struct drm_i915_gem_request *from) { + u32 seqno; int ret; GEM_BUG_ON(to == from); @@ -633,14 +633,15 @@ i915_gem_request_await_request(struct drm_i915_gem_request *to, return ret < 0 ? ret : 0; } - if (!from->global_seqno) { + seqno = i915_gem_request_global_seqno(from); + if (!seqno) { ret = i915_sw_fence_await_dma_fence(&to->submit, &from->fence, 0, GFP_KERNEL); return ret < 0 ? ret : 0; } - if (from->global_seqno <= to->timeline->sync_seqno[from->engine->id]) + if (seqno <= to->timeline->sync_seqno[from->engine->id]) return 0; trace_i915_gem_ring_sync_to(to, from); @@ -658,7 +659,7 @@ i915_gem_request_await_request(struct drm_i915_gem_request *to, return ret; } - to->timeline->sync_seqno[from->engine->id] = from->global_seqno; + to->timeline->sync_seqno[from->engine->id] = seqno; return 0; } @@ -938,7 +939,7 @@ static bool busywait_stop(unsigned long timeout, unsigned int cpu) } bool __i915_spin_request(const struct drm_i915_gem_request *req, - int state, unsigned long timeout_us) + u32 seqno, int state, unsigned long timeout_us) { struct intel_engine_cs *engine = req->engine; unsigned int irq, cpu; @@ -956,7 +957,11 @@ bool __i915_spin_request(const struct drm_i915_gem_request *req, irq = atomic_read(&engine->irq_count); timeout_us += local_clock_us(&cpu); do { - if (__i915_gem_request_completed(req)) + if (seqno != i915_gem_request_global_seqno(req)) + break; + + if (i915_seqno_passed(intel_engine_get_seqno(req->engine), + seqno)) return true; /* Seqno are meant to be ordered *before* the interrupt. If @@ -1028,11 +1033,14 @@ long i915_wait_request(struct drm_i915_gem_request *req, if (flags & I915_WAIT_LOCKED) add_wait_queue(errq, &reset); + intel_wait_init(&wait); + reset_wait_queue(&req->execute, &exec); - if (!req->global_seqno) { + if (!intel_wait_update_request(&wait, req)) { do { set_current_state(state); - if (req->global_seqno) + + if (intel_wait_update_request(&wait, req)) break; if (flags & I915_WAIT_LOCKED && @@ -1060,7 +1068,7 @@ long i915_wait_request(struct drm_i915_gem_request *req, if (timeout < 0) goto complete; - GEM_BUG_ON(!req->global_seqno); + GEM_BUG_ON(!intel_wait_has_seqno(&wait)); } GEM_BUG_ON(!i915_sw_fence_signaled(&req->submit)); @@ -1069,7 +1077,6 @@ long i915_wait_request(struct drm_i915_gem_request *req, goto complete; set_current_state(state); - intel_wait_init(&wait, req->global_seqno); if (intel_engine_add_wait(req->engine, &wait)) /* In order to check that we haven't missed the interrupt * as we enabled it, we need to kick ourselves to do a @@ -1090,7 +1097,8 @@ long i915_wait_request(struct drm_i915_gem_request *req, timeout = io_schedule_timeout(timeout); - if (intel_wait_complete(&wait)) + if (intel_wait_complete(&wait) && + intel_wait_check_request(&wait, req)) break; set_current_state(state); @@ -1141,14 +1149,21 @@ complete: static void engine_retire_requests(struct intel_engine_cs *engine) { struct drm_i915_gem_request *request, *next; + u32 seqno = intel_engine_get_seqno(engine); + LIST_HEAD(retire); + spin_lock_irq(&engine->timeline->lock); list_for_each_entry_safe(request, next, &engine->timeline->requests, link) { - if (!__i915_gem_request_completed(request)) - return; + if (!i915_seqno_passed(seqno, request->global_seqno)) + break; - i915_gem_request_retire(request); + list_move_tail(&request->link, &retire); } + spin_unlock_irq(&engine->timeline->lock); + + list_for_each_entry_safe(request, next, &retire, link) + i915_gem_request_retire(request); } void i915_gem_retire_requests(struct drm_i915_private *dev_priv) diff --git a/drivers/gpu/drm/i915/i915_gem_request.h b/drivers/gpu/drm/i915/i915_gem_request.h index 467d3e13fce0..b81f6709905c 100644 --- a/drivers/gpu/drm/i915/i915_gem_request.h +++ b/drivers/gpu/drm/i915/i915_gem_request.h @@ -135,6 +135,11 @@ struct drm_i915_gem_request { struct i915_priotree priotree; struct i915_dependency dep; + /** GEM sequence number associated with this request on the + * global execution timeline. It is zero when the request is not + * on the HW queue (i.e. not on the engine timeline list). + * Its value is guarded by the timeline spinlock. + */ u32 global_seqno; /** Position in the ring of the start of the request */ @@ -229,6 +234,30 @@ static inline void i915_gem_request_assign(struct drm_i915_gem_request **pdst, *pdst = src; } +/** + * i915_gem_request_global_seqno - report the current global seqno + * @request - the request + * + * A request is assigned a global seqno only when it is on the hardware + * execution queue. The global seqno can be used to maintain a list of + * requests on the same engine in retirement order, for example for + * constructing a priority queue for waiting. Prior to its execution, or + * if it is subsequently removed in the event of preemption, its global + * seqno is zero. As both insertion and removal from the execution queue + * may operate in IRQ context, it is not guarded by the usual struct_mutex + * BKL. Instead those relying on the global seqno must be prepared for its + * value to change between reads. Only when the request is complete can + * the global seqno be stable (due to the memory barriers on submitting + * the commands to the hardware to write the breadcrumb, if the HWS shows + * that it has passed the global seqno and the global seqno is unchanged + * after the read, it is indeed complete). + */ +static u32 +i915_gem_request_global_seqno(const struct drm_i915_gem_request *request) +{ + return READ_ONCE(request->global_seqno); +} + int i915_gem_request_await_object(struct drm_i915_gem_request *to, struct drm_i915_gem_object *obj, @@ -269,46 +298,55 @@ static inline bool i915_seqno_passed(u32 seq1, u32 seq2) } static inline bool -__i915_gem_request_started(const struct drm_i915_gem_request *req) +__i915_gem_request_started(const struct drm_i915_gem_request *req, u32 seqno) { - GEM_BUG_ON(!req->global_seqno); + GEM_BUG_ON(!seqno); return i915_seqno_passed(intel_engine_get_seqno(req->engine), - req->global_seqno - 1); + seqno - 1); } static inline bool i915_gem_request_started(const struct drm_i915_gem_request *req) { - if (!req->global_seqno) + u32 seqno; + + seqno = i915_gem_request_global_seqno(req); + if (!seqno) return false; - return __i915_gem_request_started(req); + return __i915_gem_request_started(req, seqno); } static inline bool -__i915_gem_request_completed(const struct drm_i915_gem_request *req) +__i915_gem_request_completed(const struct drm_i915_gem_request *req, u32 seqno) { - GEM_BUG_ON(!req->global_seqno); - return i915_seqno_passed(intel_engine_get_seqno(req->engine), - req->global_seqno); + GEM_BUG_ON(!seqno); + return i915_seqno_passed(intel_engine_get_seqno(req->engine), seqno) && + seqno == i915_gem_request_global_seqno(req); } static inline bool i915_gem_request_completed(const struct drm_i915_gem_request *req) { - if (!req->global_seqno) + u32 seqno; + + seqno = i915_gem_request_global_seqno(req); + if (!seqno) return false; - return __i915_gem_request_completed(req); + return __i915_gem_request_completed(req, seqno); } bool __i915_spin_request(const struct drm_i915_gem_request *request, - int state, unsigned long timeout_us); + u32 seqno, int state, unsigned long timeout_us); static inline bool i915_spin_request(const struct drm_i915_gem_request *request, int state, unsigned long timeout_us) { - return (__i915_gem_request_started(request) && - __i915_spin_request(request, state, timeout_us)); + u32 seqno; + + seqno = i915_gem_request_global_seqno(request); + return (__i915_gem_request_started(request, seqno) && + __i915_spin_request(request, seqno, state, timeout_us)); } /* We treat requests as fences. This is not be to confused with our diff --git a/drivers/gpu/drm/i915/intel_breadcrumbs.c b/drivers/gpu/drm/i915/intel_breadcrumbs.c index 4f4e703d1b14..d5bf4c0b2deb 100644 --- a/drivers/gpu/drm/i915/intel_breadcrumbs.c +++ b/drivers/gpu/drm/i915/intel_breadcrumbs.c @@ -545,6 +545,7 @@ void intel_engine_enable_signaling(struct drm_i915_gem_request *request) struct intel_breadcrumbs *b = &engine->breadcrumbs; struct rb_node *parent, **p; bool first, wakeup; + u32 seqno; /* Note that we may be called from an interrupt handler on another * device (e.g. nouveau signaling a fence completion causing us @@ -555,11 +556,13 @@ void intel_engine_enable_signaling(struct drm_i915_gem_request *request) /* locked by dma_fence_enable_sw_signaling() (irqsafe fence->lock) */ assert_spin_locked(&request->lock); - if (!request->global_seqno) + + seqno = i915_gem_request_global_seqno(request); + if (!seqno) return; request->signaling.wait.tsk = b->signaler; - request->signaling.wait.seqno = request->global_seqno; + request->signaling.wait.seqno = seqno; i915_gem_request_get(request); spin_lock(&b->lock); @@ -583,8 +586,8 @@ void intel_engine_enable_signaling(struct drm_i915_gem_request *request) p = &b->signals.rb_node; while (*p) { parent = *p; - if (i915_seqno_passed(request->global_seqno, - to_signaler(parent)->global_seqno)) { + if (i915_seqno_passed(seqno, + to_signaler(parent)->signaling.wait.seqno)) { p = &parent->rb_right; first = false; } else { diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.h b/drivers/gpu/drm/i915/intel_ringbuffer.h index b91c2c7ef5a6..04e7d5a9ee3a 100644 --- a/drivers/gpu/drm/i915/intel_ringbuffer.h +++ b/drivers/gpu/drm/i915/intel_ringbuffer.h @@ -582,10 +582,47 @@ static inline u32 intel_hws_seqno_address(struct intel_engine_cs *engine) /* intel_breadcrumbs.c -- user interrupt bottom-half for waiters */ int intel_engine_init_breadcrumbs(struct intel_engine_cs *engine); -static inline void intel_wait_init(struct intel_wait *wait, u32 seqno) +static inline void intel_wait_init(struct intel_wait *wait) { wait->tsk = current; +} + +static inline void intel_wait_init_for_seqno(struct intel_wait *wait, u32 seqno) +{ + wait->tsk = current; + wait->seqno = seqno; +} + +static inline bool intel_wait_has_seqno(const struct intel_wait *wait) +{ + return wait->seqno; +} + +static inline bool +intel_wait_update_seqno(struct intel_wait *wait, u32 seqno) +{ wait->seqno = seqno; + return intel_wait_has_seqno(wait); +} + +static inline bool +intel_wait_update_request(struct intel_wait *wait, + const struct drm_i915_gem_request *rq) +{ + return intel_wait_update_seqno(wait, i915_gem_request_global_seqno(rq)); +} + +static inline bool +intel_wait_check_seqno(const struct intel_wait *wait, u32 seqno) +{ + return wait->seqno == seqno; +} + +static inline bool +intel_wait_check_request(const struct intel_wait *wait, + const struct drm_i915_gem_request *rq) +{ + return intel_wait_check_seqno(wait, i915_gem_request_global_seqno(rq)); } static inline bool intel_wait_complete(const struct intel_wait *wait) diff --git a/drivers/gpu/drm/i915/selftests/intel_breadcrumbs.c b/drivers/gpu/drm/i915/selftests/intel_breadcrumbs.c index 6426acc9fdca..621be1ca53d8 100644 --- a/drivers/gpu/drm/i915/selftests/intel_breadcrumbs.c +++ b/drivers/gpu/drm/i915/selftests/intel_breadcrumbs.c @@ -131,7 +131,7 @@ static int igt_random_insert_remove(void *arg) goto out_bitmap; for (n = 0; n < count; n++) - intel_wait_init(&waiters[n], seqno_bias + n); + intel_wait_init_for_seqno(&waiters[n], seqno_bias + n); err = check_rbtree(engine, bitmap, waiters, count); if (err) @@ -197,7 +197,7 @@ static int igt_insert_complete(void *arg) goto out_waiters; for (n = 0; n < count; n++) { - intel_wait_init(&waiters[n], n + seqno_bias); + intel_wait_init_for_seqno(&waiters[n], n + seqno_bias); intel_engine_add_wait(engine, &waiters[n]); __set_bit(n, bitmap); } @@ -318,7 +318,7 @@ static int igt_wakeup_thread(void *arg) while (wait_for_ready(w)) { GEM_BUG_ON(kthread_should_stop()); - intel_wait_init(&wait, w->seqno); + intel_wait_init_for_seqno(&wait, w->seqno); intel_engine_add_wait(w->engine, &wait); for (;;) { set_current_state(TASK_UNINTERRUPTIBLE); -- cgit From cced5e2f098d9db26cf4119e309068b79cca1274 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Thu, 23 Feb 2017 07:44:15 +0000 Subject: drm/i915: Take a reference whilst processing the signaler request The plan in the near-future is to allow requests to be removed from the signaler. We can no longer then rely on holding a reference to the request for the duration it is in the signaling tree, and instead must obtain a reference to the request for the current operation using RCU. Signed-off-by: Chris Wilson Reviewed-by: Tvrtko Ursulin Link: http://patchwork.freedesktop.org/patch/msgid/20170223074422.4125-10-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/intel_breadcrumbs.c | 22 +++++++++++++++------- drivers/gpu/drm/i915/intel_ringbuffer.h | 2 +- 2 files changed, 16 insertions(+), 8 deletions(-) diff --git a/drivers/gpu/drm/i915/intel_breadcrumbs.c b/drivers/gpu/drm/i915/intel_breadcrumbs.c index d5bf4c0b2deb..a35d59d00bfb 100644 --- a/drivers/gpu/drm/i915/intel_breadcrumbs.c +++ b/drivers/gpu/drm/i915/intel_breadcrumbs.c @@ -496,7 +496,11 @@ static int intel_breadcrumbs_signaler(void *arg) * need to wait for a new interrupt from the GPU or for * a new client. */ - request = READ_ONCE(b->first_signal); + rcu_read_lock(); + request = rcu_dereference(b->first_signal); + if (request) + request = i915_gem_request_get_rcu(request); + rcu_read_unlock(); if (signal_complete(request)) { local_bh_disable(); dma_fence_signal(&request->fence); @@ -515,24 +519,28 @@ static int intel_breadcrumbs_signaler(void *arg) * the oldest before picking the next one. */ spin_lock_irq(&b->lock); - if (request == b->first_signal) { + if (request == rcu_access_pointer(b->first_signal)) { struct rb_node *rb = rb_next(&request->signaling.node); - b->first_signal = rb ? to_signaler(rb) : NULL; + rcu_assign_pointer(b->first_signal, + rb ? to_signaler(rb) : NULL); } rb_erase(&request->signaling.node, &b->signals); spin_unlock_irq(&b->lock); i915_gem_request_put(request); } else { - if (kthread_should_stop()) + if (kthread_should_stop()) { + GEM_BUG_ON(request); break; + } schedule(); if (kthread_should_park()) kthread_parkme(); } + i915_gem_request_put(request); } while (1); __set_current_state(TASK_RUNNING); @@ -597,7 +605,7 @@ void intel_engine_enable_signaling(struct drm_i915_gem_request *request) rb_link_node(&request->signaling.node, parent, p); rb_insert_color(&request->signaling.node, &b->signals); if (first) - smp_store_mb(b->first_signal, request); + rcu_assign_pointer(b->first_signal, request); spin_unlock(&b->lock); @@ -670,7 +678,7 @@ void intel_engine_fini_breadcrumbs(struct intel_engine_cs *engine) /* The engines should be idle and all requests accounted for! */ WARN_ON(READ_ONCE(b->first_wait)); WARN_ON(!RB_EMPTY_ROOT(&b->waiters)); - WARN_ON(READ_ONCE(b->first_signal)); + WARN_ON(rcu_access_pointer(b->first_signal)); WARN_ON(!RB_EMPTY_ROOT(&b->signals)); if (!IS_ERR_OR_NULL(b->signaler)) @@ -691,7 +699,7 @@ bool intel_breadcrumbs_busy(struct intel_engine_cs *engine) busy |= intel_engine_flag(engine); } - if (b->first_signal) { + if (rcu_access_pointer(b->first_signal)) { wake_up_process(b->signaler); busy |= intel_engine_flag(engine); } diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.h b/drivers/gpu/drm/i915/intel_ringbuffer.h index 04e7d5a9ee3a..fbf34af7bc77 100644 --- a/drivers/gpu/drm/i915/intel_ringbuffer.h +++ b/drivers/gpu/drm/i915/intel_ringbuffer.h @@ -242,7 +242,7 @@ struct intel_engine_cs { struct rb_root signals; /* sorted by retirement */ struct intel_wait *first_wait; /* oldest waiter by retirement */ struct task_struct *signaler; /* used for fence signalling */ - struct drm_i915_gem_request *first_signal; + struct drm_i915_gem_request __rcu *first_signal; struct timer_list fake_irq; /* used after a missed interrupt */ struct timer_list hangcheck; /* detect missed interrupts */ -- cgit From 9eb143bbec7dfdfce8aa7f6a3b0f40af4e95e7b7 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Thu, 23 Feb 2017 07:44:16 +0000 Subject: drm/i915: Allow a request to be cancelled If we preempt a request and remove it from the execution queue, we need to undo its global seqno and restart any waiters. Signed-off-by: Chris Wilson Reviewed-by: Tvrtko Ursulin Link: http://patchwork.freedesktop.org/patch/msgid/20170223074422.4125-11-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/intel_breadcrumbs.c | 71 +++++++++++++++++++++++++------- drivers/gpu/drm/i915/intel_ringbuffer.h | 1 + 2 files changed, 57 insertions(+), 15 deletions(-) diff --git a/drivers/gpu/drm/i915/intel_breadcrumbs.c b/drivers/gpu/drm/i915/intel_breadcrumbs.c index a35d59d00bfb..dd39e4f7a560 100644 --- a/drivers/gpu/drm/i915/intel_breadcrumbs.c +++ b/drivers/gpu/drm/i915/intel_breadcrumbs.c @@ -356,22 +356,15 @@ static inline int wakeup_priority(struct intel_breadcrumbs *b, return tsk->prio; } -void intel_engine_remove_wait(struct intel_engine_cs *engine, - struct intel_wait *wait) +static void __intel_engine_remove_wait(struct intel_engine_cs *engine, + struct intel_wait *wait) { struct intel_breadcrumbs *b = &engine->breadcrumbs; - /* Quick check to see if this waiter was already decoupled from - * the tree by the bottom-half to avoid contention on the spinlock - * by the herd. - */ - if (RB_EMPTY_NODE(&wait->node)) - return; - - spin_lock_irq(&b->lock); + assert_spin_locked(&b->lock); if (RB_EMPTY_NODE(&wait->node)) - goto out_unlock; + goto out; if (b->first_wait == wait) { const int priority = wakeup_priority(b, wait->tsk); @@ -436,11 +429,27 @@ void intel_engine_remove_wait(struct intel_engine_cs *engine, GEM_BUG_ON(RB_EMPTY_NODE(&wait->node)); rb_erase(&wait->node, &b->waiters); -out_unlock: +out: GEM_BUG_ON(b->first_wait == wait); GEM_BUG_ON(rb_first(&b->waiters) != (b->first_wait ? &b->first_wait->node : NULL)); GEM_BUG_ON(!rcu_access_pointer(b->irq_seqno_bh) ^ RB_EMPTY_ROOT(&b->waiters)); +} + +void intel_engine_remove_wait(struct intel_engine_cs *engine, + struct intel_wait *wait) +{ + struct intel_breadcrumbs *b = &engine->breadcrumbs; + + /* Quick check to see if this waiter was already decoupled from + * the tree by the bottom-half to avoid contention on the spinlock + * by the herd. + */ + if (RB_EMPTY_NODE(&wait->node)) + return; + + spin_lock_irq(&b->lock); + __intel_engine_remove_wait(engine, wait); spin_unlock_irq(&b->lock); } @@ -506,11 +515,13 @@ static int intel_breadcrumbs_signaler(void *arg) dma_fence_signal(&request->fence); local_bh_enable(); /* kick start the tasklets */ + spin_lock_irq(&b->lock); + /* Wake up all other completed waiters and select the * next bottom-half for the next user interrupt. */ - intel_engine_remove_wait(engine, - &request->signaling.wait); + __intel_engine_remove_wait(engine, + &request->signaling.wait); /* Find the next oldest signal. Note that as we have * not been holding the lock, another client may @@ -518,7 +529,6 @@ static int intel_breadcrumbs_signaler(void *arg) * we just completed - so double check we are still * the oldest before picking the next one. */ - spin_lock_irq(&b->lock); if (request == rcu_access_pointer(b->first_signal)) { struct rb_node *rb = rb_next(&request->signaling.node); @@ -526,6 +536,8 @@ static int intel_breadcrumbs_signaler(void *arg) rb ? to_signaler(rb) : NULL); } rb_erase(&request->signaling.node, &b->signals); + RB_CLEAR_NODE(&request->signaling.node); + spin_unlock_irq(&b->lock); i915_gem_request_put(request); @@ -613,6 +625,35 @@ void intel_engine_enable_signaling(struct drm_i915_gem_request *request) wake_up_process(b->signaler); } +void intel_engine_cancel_signaling(struct drm_i915_gem_request *request) +{ + struct intel_engine_cs *engine = request->engine; + struct intel_breadcrumbs *b = &engine->breadcrumbs; + + assert_spin_locked(&request->lock); + GEM_BUG_ON(!request->signaling.wait.seqno); + + spin_lock(&b->lock); + + if (!RB_EMPTY_NODE(&request->signaling.node)) { + if (request == rcu_access_pointer(b->first_signal)) { + struct rb_node *rb = + rb_next(&request->signaling.node); + rcu_assign_pointer(b->first_signal, + rb ? to_signaler(rb) : NULL); + } + rb_erase(&request->signaling.node, &b->signals); + RB_CLEAR_NODE(&request->signaling.node); + i915_gem_request_put(request); + } + + __intel_engine_remove_wait(engine, &request->signaling.wait); + + spin_unlock(&b->lock); + + request->signaling.wait.seqno = 0; +} + int intel_engine_init_breadcrumbs(struct intel_engine_cs *engine) { struct intel_breadcrumbs *b = &engine->breadcrumbs; diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.h b/drivers/gpu/drm/i915/intel_ringbuffer.h index fbf34af7bc77..0f29e07a9581 100644 --- a/drivers/gpu/drm/i915/intel_ringbuffer.h +++ b/drivers/gpu/drm/i915/intel_ringbuffer.h @@ -635,6 +635,7 @@ bool intel_engine_add_wait(struct intel_engine_cs *engine, void intel_engine_remove_wait(struct intel_engine_cs *engine, struct intel_wait *wait); void intel_engine_enable_signaling(struct drm_i915_gem_request *request); +void intel_engine_cancel_signaling(struct drm_i915_gem_request *request); static inline bool intel_engine_has_waiter(const struct intel_engine_cs *engine) { -- cgit From d6a2289d9d6b3ea47514cf29fed56340fb8fec7e Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Thu, 23 Feb 2017 07:44:17 +0000 Subject: drm/i915: Remove the preempted request from the execution queue After the request is cancelled, we then need to remove it from the global execution timeline and return it to the context timeline, the inverse of submit_request(). v2: Move manipulation of struct intel_wait to helpers Signed-off-by: Chris Wilson Cc: Tvrtko Ursulin Link: http://patchwork.freedesktop.org/patch/msgid/20170223074422.4125-12-chris@chris-wilson.co.uk Reviewed-by: Tvrtko Ursulin --- drivers/gpu/drm/i915/i915_gem_request.c | 55 ++++++++++++++++++++++++++++++++ drivers/gpu/drm/i915/i915_gem_request.h | 3 ++ drivers/gpu/drm/i915/intel_breadcrumbs.c | 17 ++++++++-- 3 files changed, 73 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_gem_request.c b/drivers/gpu/drm/i915/i915_gem_request.c index d394cbe8de11..013abdfa65a6 100644 --- a/drivers/gpu/drm/i915/i915_gem_request.c +++ b/drivers/gpu/drm/i915/i915_gem_request.c @@ -441,6 +441,55 @@ void i915_gem_request_submit(struct drm_i915_gem_request *request) spin_unlock_irqrestore(&engine->timeline->lock, flags); } +void __i915_gem_request_unsubmit(struct drm_i915_gem_request *request) +{ + struct intel_engine_cs *engine = request->engine; + struct intel_timeline *timeline; + + assert_spin_locked(&engine->timeline->lock); + + /* Only unwind in reverse order, required so that the per-context list + * is kept in seqno/ring order. + */ + GEM_BUG_ON(request->global_seqno != engine->timeline->seqno); + engine->timeline->seqno--; + + /* We may be recursing from the signal callback of another i915 fence */ + spin_lock_nested(&request->lock, SINGLE_DEPTH_NESTING); + request->global_seqno = 0; + if (test_bit(DMA_FENCE_FLAG_ENABLE_SIGNAL_BIT, &request->fence.flags)) + intel_engine_cancel_signaling(request); + spin_unlock(&request->lock); + + /* Transfer back from the global per-engine timeline to per-context */ + timeline = request->timeline; + GEM_BUG_ON(timeline == engine->timeline); + + spin_lock(&timeline->lock); + list_move(&request->link, &timeline->requests); + spin_unlock(&timeline->lock); + + /* We don't need to wake_up any waiters on request->execute, they + * will get woken by any other event or us re-adding this request + * to the engine timeline (__i915_gem_request_submit()). The waiters + * should be quite adapt at finding that the request now has a new + * global_seqno to the one they went to sleep on. + */ +} + +void i915_gem_request_unsubmit(struct drm_i915_gem_request *request) +{ + struct intel_engine_cs *engine = request->engine; + unsigned long flags; + + /* Will be called from irq-context when using foreign fences. */ + spin_lock_irqsave(&engine->timeline->lock, flags); + + __i915_gem_request_unsubmit(request); + + spin_unlock_irqrestore(&engine->timeline->lock, flags); +} + static int __i915_sw_fence_call submit_notify(struct i915_sw_fence *fence, enum i915_sw_fence_notify state) { @@ -1035,6 +1084,7 @@ long i915_wait_request(struct drm_i915_gem_request *req, intel_wait_init(&wait); +restart: reset_wait_queue(&req->execute, &exec); if (!intel_wait_update_request(&wait, req)) { do { @@ -1133,6 +1183,11 @@ wakeup: /* Only spin if we know the GPU is processing this request */ if (i915_spin_request(req, state, 2)) break; + + if (!intel_wait_check_request(&wait, req)) { + intel_engine_remove_wait(req->engine, &wait); + goto restart; + } } intel_engine_remove_wait(req->engine, &wait); diff --git a/drivers/gpu/drm/i915/i915_gem_request.h b/drivers/gpu/drm/i915/i915_gem_request.h index b81f6709905c..5f73d8c0a38a 100644 --- a/drivers/gpu/drm/i915/i915_gem_request.h +++ b/drivers/gpu/drm/i915/i915_gem_request.h @@ -274,6 +274,9 @@ void __i915_add_request(struct drm_i915_gem_request *req, bool flush_caches); void __i915_gem_request_submit(struct drm_i915_gem_request *request); void i915_gem_request_submit(struct drm_i915_gem_request *request); +void __i915_gem_request_unsubmit(struct drm_i915_gem_request *request); +void i915_gem_request_unsubmit(struct drm_i915_gem_request *request); + struct intel_rps_client; #define NO_WAITBOOST ERR_PTR(-1) #define IS_RPS_CLIENT(p) (!IS_ERR(p)) diff --git a/drivers/gpu/drm/i915/intel_breadcrumbs.c b/drivers/gpu/drm/i915/intel_breadcrumbs.c index dd39e4f7a560..027c93e34c97 100644 --- a/drivers/gpu/drm/i915/intel_breadcrumbs.c +++ b/drivers/gpu/drm/i915/intel_breadcrumbs.c @@ -453,7 +453,12 @@ void intel_engine_remove_wait(struct intel_engine_cs *engine, spin_unlock_irq(&b->lock); } -static bool signal_complete(struct drm_i915_gem_request *request) +static bool signal_valid(const struct drm_i915_gem_request *request) +{ + return intel_wait_check_request(&request->signaling.wait, request); +} + +static bool signal_complete(const struct drm_i915_gem_request *request) { if (!request) return false; @@ -462,7 +467,7 @@ static bool signal_complete(struct drm_i915_gem_request *request) * signalled that this wait is already completed. */ if (intel_wait_complete(&request->signaling.wait)) - return true; + return signal_valid(request); /* Carefully check if the request is complete, giving time for the * seqno to be visible or if the GPU hung. @@ -542,13 +547,21 @@ static int intel_breadcrumbs_signaler(void *arg) i915_gem_request_put(request); } else { + DEFINE_WAIT(exec); + if (kthread_should_stop()) { GEM_BUG_ON(request); break; } + if (request) + add_wait_queue(&request->execute, &exec); + schedule(); + if (request) + remove_wait_queue(&request->execute, &exec); + if (kthread_should_park()) kthread_parkme(); } -- cgit From 591c0fb85d1c351d28334fb249bf1faaafcf326b Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Thu, 23 Feb 2017 07:44:18 +0000 Subject: drm/i915: Exercise request cancellation using a mock selftest Add a mock selftest to preempt a request and check that we cancel it, requeue the request and then complete its execution. v2: Error leaks no more. Signed-off-by: Chris Wilson Link: http://patchwork.freedesktop.org/patch/msgid/20170223074422.4125-13-chris@chris-wilson.co.uk Reviewed-by: Tvrtko Ursulin --- drivers/gpu/drm/i915/selftests/i915_gem_request.c | 64 +++++++++++++++++++++++ drivers/gpu/drm/i915/selftests/mock_request.c | 19 +++++++ drivers/gpu/drm/i915/selftests/mock_request.h | 2 + 3 files changed, 85 insertions(+) diff --git a/drivers/gpu/drm/i915/selftests/i915_gem_request.c b/drivers/gpu/drm/i915/selftests/i915_gem_request.c index 9d056a86723d..42bdeac93324 100644 --- a/drivers/gpu/drm/i915/selftests/i915_gem_request.c +++ b/drivers/gpu/drm/i915/selftests/i915_gem_request.c @@ -26,6 +26,7 @@ #include "../i915_selftest.h" +#include "mock_context.h" #include "mock_gem_device.h" static int igt_add_request(void *arg) @@ -181,12 +182,75 @@ out_locked: return err; } +static int igt_request_rewind(void *arg) +{ + struct drm_i915_private *i915 = arg; + struct drm_i915_gem_request *request, *vip; + struct i915_gem_context *ctx[2]; + int err = -EINVAL; + + mutex_lock(&i915->drm.struct_mutex); + ctx[0] = mock_context(i915, "A"); + request = mock_request(i915->engine[RCS], ctx[0], 2 * HZ); + if (!request) { + err = -ENOMEM; + goto err_context_0; + } + + i915_gem_request_get(request); + i915_add_request(request); + + ctx[1] = mock_context(i915, "B"); + vip = mock_request(i915->engine[RCS], ctx[1], 0); + if (!vip) { + err = -ENOMEM; + goto err_context_1; + } + + /* Simulate preemption by manual reordering */ + if (!mock_cancel_request(request)) { + pr_err("failed to cancel request (already executed)!\n"); + i915_add_request(vip); + goto err_context_1; + } + i915_gem_request_get(vip); + i915_add_request(vip); + request->engine->submit_request(request); + + mutex_unlock(&i915->drm.struct_mutex); + + if (i915_wait_request(vip, 0, HZ) == -ETIME) { + pr_err("timed out waiting for high priority request, vip.seqno=%d, current seqno=%d\n", + vip->global_seqno, intel_engine_get_seqno(i915->engine[RCS])); + goto err; + } + + if (i915_gem_request_completed(request)) { + pr_err("low priority request already completed\n"); + goto err; + } + + err = 0; +err: + i915_gem_request_put(vip); + mutex_lock(&i915->drm.struct_mutex); +err_context_1: + mock_context_close(ctx[1]); + i915_gem_request_put(request); +err_context_0: + mock_context_close(ctx[0]); + mock_device_flush(i915); + mutex_unlock(&i915->drm.struct_mutex); + return err; +} + int i915_gem_request_mock_selftests(void) { static const struct i915_subtest tests[] = { SUBTEST(igt_add_request), SUBTEST(igt_wait_request), SUBTEST(igt_fence_wait), + SUBTEST(igt_request_rewind), }; struct drm_i915_private *i915; int err; diff --git a/drivers/gpu/drm/i915/selftests/mock_request.c b/drivers/gpu/drm/i915/selftests/mock_request.c index e23242d1b88a..0e8d2e7f8c70 100644 --- a/drivers/gpu/drm/i915/selftests/mock_request.c +++ b/drivers/gpu/drm/i915/selftests/mock_request.c @@ -22,6 +22,7 @@ * */ +#include "mock_engine.h" #include "mock_request.h" struct drm_i915_gem_request * @@ -42,3 +43,21 @@ mock_request(struct intel_engine_cs *engine, return &mock->base; } + +bool mock_cancel_request(struct drm_i915_gem_request *request) +{ + struct mock_request *mock = container_of(request, typeof(*mock), base); + struct mock_engine *engine = + container_of(request->engine, typeof(*engine), base); + bool was_queued; + + spin_lock_irq(&engine->hw_lock); + was_queued = !list_empty(&mock->link); + list_del_init(&mock->link); + spin_unlock_irq(&engine->hw_lock); + + if (was_queued) + i915_gem_request_unsubmit(request); + + return was_queued; +} diff --git a/drivers/gpu/drm/i915/selftests/mock_request.h b/drivers/gpu/drm/i915/selftests/mock_request.h index cc76d4f4eb4e..4dea74c8e96d 100644 --- a/drivers/gpu/drm/i915/selftests/mock_request.h +++ b/drivers/gpu/drm/i915/selftests/mock_request.h @@ -41,4 +41,6 @@ mock_request(struct intel_engine_cs *engine, struct i915_gem_context *context, unsigned long delay); +bool mock_cancel_request(struct drm_i915_gem_request *request); + #endif /* !__MOCK_REQUEST__ */ -- cgit From a49625f90662799426dd99daab97559ef620dce1 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Thu, 23 Feb 2017 07:44:19 +0000 Subject: drm/i915: Replace reset_wait_queue with default_wake_function If we change the wait_queue_t from using the autoremove_wake_function to the default_wake_function, we no longer have to restore the wait_queue_t entry on the wait_queue_head_t list after being woken up by it, as we are unusual in sleeping multiple times on the same wait_queue_t. Signed-off-by: Chris Wilson Reviewed-by: Tvrtko Ursulin Link: http://patchwork.freedesktop.org/patch/msgid/20170223074422.4125-14-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/i915_gem_request.c | 30 +++++++----------------------- 1 file changed, 7 insertions(+), 23 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_gem_request.c b/drivers/gpu/drm/i915/i915_gem_request.c index 013abdfa65a6..43904f412e76 100644 --- a/drivers/gpu/drm/i915/i915_gem_request.c +++ b/drivers/gpu/drm/i915/i915_gem_request.c @@ -945,16 +945,6 @@ void __i915_add_request(struct drm_i915_gem_request *request, bool flush_caches) local_bh_enable(); /* Kick the execlists tasklet if just scheduled */ } -static void reset_wait_queue(wait_queue_head_t *q, wait_queue_t *wait) -{ - unsigned long flags; - - spin_lock_irqsave(&q->lock, flags); - if (list_empty(&wait->task_list)) - __add_wait_queue(q, wait); - spin_unlock_irqrestore(&q->lock, flags); -} - static unsigned long local_clock_us(unsigned int *cpu) { unsigned long t; @@ -1059,8 +1049,8 @@ long i915_wait_request(struct drm_i915_gem_request *req, const int state = flags & I915_WAIT_INTERRUPTIBLE ? TASK_INTERRUPTIBLE : TASK_UNINTERRUPTIBLE; wait_queue_head_t *errq = &req->i915->gpu_error.wait_queue; - DEFINE_WAIT(reset); - DEFINE_WAIT(exec); + DEFINE_WAIT_FUNC(reset, default_wake_function); + DEFINE_WAIT_FUNC(exec, default_wake_function); struct intel_wait wait; might_sleep(); @@ -1079,13 +1069,13 @@ long i915_wait_request(struct drm_i915_gem_request *req, trace_i915_gem_request_wait_begin(req, flags); + add_wait_queue(&req->execute, &exec); if (flags & I915_WAIT_LOCKED) add_wait_queue(errq, &reset); intel_wait_init(&wait); restart: - reset_wait_queue(&req->execute, &exec); if (!intel_wait_update_request(&wait, req)) { do { set_current_state(state); @@ -1097,26 +1087,21 @@ restart: i915_reset_in_progress(&req->i915->gpu_error)) { __set_current_state(TASK_RUNNING); i915_reset(req->i915); - reset_wait_queue(errq, &reset); continue; } if (signal_pending_state(state, current)) { timeout = -ERESTARTSYS; - break; + goto complete; } if (!timeout) { timeout = -ETIME; - break; + goto complete; } timeout = io_schedule_timeout(timeout); } while (1); - finish_wait(&req->execute, &exec); - - if (timeout < 0) - goto complete; GEM_BUG_ON(!intel_wait_has_seqno(&wait)); } @@ -1176,7 +1161,6 @@ wakeup: i915_reset_in_progress(&req->i915->gpu_error)) { __set_current_state(TASK_RUNNING); i915_reset(req->i915); - reset_wait_queue(errq, &reset); continue; } @@ -1191,11 +1175,11 @@ wakeup: } intel_engine_remove_wait(req->engine, &wait); - __set_current_state(TASK_RUNNING); - complete: + __set_current_state(TASK_RUNNING); if (flags & I915_WAIT_LOCKED) remove_wait_queue(errq, &reset); + remove_wait_queue(&req->execute, &exec); trace_i915_gem_request_wait_end(req); return timeout; -- cgit From e07051142cbcb991e4fedf86afd8f00425dbc89a Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Thu, 23 Feb 2017 07:44:20 +0000 Subject: drm/i915: Refactor direct GPU reset from request waiters Combine the common code for the pair of waiters into a single function. v2: Rename reset_request to wait_request_check_and_reset Signed-off-by: Chris Wilson Link: http://patchwork.freedesktop.org/patch/msgid/20170223074422.4125-15-chris@chris-wilson.co.uk Reviewed-by: Tvrtko Ursulin --- drivers/gpu/drm/i915/i915_gem_request.c | 20 ++++++++++++-------- 1 file changed, 12 insertions(+), 8 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_gem_request.c b/drivers/gpu/drm/i915/i915_gem_request.c index 43904f412e76..0582bb7a0b5b 100644 --- a/drivers/gpu/drm/i915/i915_gem_request.c +++ b/drivers/gpu/drm/i915/i915_gem_request.c @@ -1023,6 +1023,16 @@ bool __i915_spin_request(const struct drm_i915_gem_request *req, return false; } +static bool __i915_wait_request_check_and_reset(struct drm_i915_gem_request *request) +{ + if (likely(!i915_reset_in_progress(&request->i915->gpu_error))) + return false; + + __set_current_state(TASK_RUNNING); + i915_reset(request->i915); + return true; +} + /** * i915_wait_request - wait until execution of request has finished * @req: the request to wait upon @@ -1084,11 +1094,8 @@ restart: break; if (flags & I915_WAIT_LOCKED && - i915_reset_in_progress(&req->i915->gpu_error)) { - __set_current_state(TASK_RUNNING); - i915_reset(req->i915); + __i915_wait_request_check_and_reset(req)) continue; - } if (signal_pending_state(state, current)) { timeout = -ERESTARTSYS; @@ -1158,11 +1165,8 @@ wakeup: * itself, or indirectly by recovering the GPU). */ if (flags & I915_WAIT_LOCKED && - i915_reset_in_progress(&req->i915->gpu_error)) { - __set_current_state(TASK_RUNNING); - i915_reset(req->i915); + __i915_wait_request_check_and_reset(req)) continue; - } /* Only spin if we know the GPU is processing this request */ if (i915_spin_request(req, state, 2)) -- cgit From 24f417ec0c41a8e2b27ae7d2d64fe0ea13f322dd Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Thu, 23 Feb 2017 07:44:21 +0000 Subject: drm/i915: Immediately process a reset before starting waiting As we handoff the GPU reset to the waiter, we need to check we don't miss a wakeup if it has already been sent prior to us starting the wait. v2: Tweak checking for reset to be clear to the need before sleeping after changing the task state. Signed-off-by: Chris Wilson Link: http://patchwork.freedesktop.org/patch/msgid/20170223074422.4125-16-chris@chris-wilson.co.uk Reviewed-by: Tvrtko Ursulin --- drivers/gpu/drm/i915/i915_gem_request.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/gpu/drm/i915/i915_gem_request.c b/drivers/gpu/drm/i915/i915_gem_request.c index 0582bb7a0b5b..9669747d1822 100644 --- a/drivers/gpu/drm/i915/i915_gem_request.c +++ b/drivers/gpu/drm/i915/i915_gem_request.c @@ -1126,6 +1126,9 @@ restart: */ goto wakeup; + if (flags & I915_WAIT_LOCKED) + __i915_wait_request_check_and_reset(req); + for (;;) { if (signal_pending_state(state, current)) { timeout = -ERESTARTSYS; -- cgit From 0f2f61d4a8d4611155e7d1633db271ead9057981 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Thu, 23 Feb 2017 07:44:22 +0000 Subject: drm/i915: Remove one level of indention from wait-for-execute Now that the code is getting simpler, we can reduce the indentation when waiting for the global_seqno. Signed-off-by: Chris Wilson Reviewed-by: Tvrtko Ursulin Link: http://patchwork.freedesktop.org/patch/msgid/20170223074422.4125-17-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/i915_gem_request.c | 39 +++++++++++++++------------------ 1 file changed, 18 insertions(+), 21 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_gem_request.c b/drivers/gpu/drm/i915/i915_gem_request.c index 9669747d1822..fbfeb5f8d069 100644 --- a/drivers/gpu/drm/i915/i915_gem_request.c +++ b/drivers/gpu/drm/i915/i915_gem_request.c @@ -1086,32 +1086,29 @@ long i915_wait_request(struct drm_i915_gem_request *req, intel_wait_init(&wait); restart: - if (!intel_wait_update_request(&wait, req)) { - do { - set_current_state(state); - - if (intel_wait_update_request(&wait, req)) - break; + do { + set_current_state(state); + if (intel_wait_update_request(&wait, req)) + break; - if (flags & I915_WAIT_LOCKED && - __i915_wait_request_check_and_reset(req)) - continue; + if (flags & I915_WAIT_LOCKED && + __i915_wait_request_check_and_reset(req)) + continue; - if (signal_pending_state(state, current)) { - timeout = -ERESTARTSYS; - goto complete; - } + if (signal_pending_state(state, current)) { + timeout = -ERESTARTSYS; + goto complete; + } - if (!timeout) { - timeout = -ETIME; - goto complete; - } + if (!timeout) { + timeout = -ETIME; + goto complete; + } - timeout = io_schedule_timeout(timeout); - } while (1); + timeout = io_schedule_timeout(timeout); + } while (1); - GEM_BUG_ON(!intel_wait_has_seqno(&wait)); - } + GEM_BUG_ON(!intel_wait_has_seqno(&wait)); GEM_BUG_ON(!i915_sw_fence_signaled(&req->submit)); /* Optimistic short spin before touching IRQs */ -- cgit From 8d0e9bcb2b0cbc2d5c3668e1a445571b4b73317f Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Thu, 23 Feb 2017 12:20:37 +0000 Subject: drm/i915: Suppress fbc suggestion to increase stolen if disabled If the reserved region of memory has not been setup (most probably because it has been limited by hardware or virtualisation), don't tell the user to try and increase the amount of memory reserved for graphics. Signed-off-by: Chris Wilson Cc: Rodrigo Vivi Link: http://patchwork.freedesktop.org/patch/msgid/20170223122037.16174-1-chris@chris-wilson.co.uk eviewed-by: Rodrigo Vivi --- drivers/gpu/drm/i915/intel_fbc.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/i915/intel_fbc.c b/drivers/gpu/drm/i915/intel_fbc.c index 22c56ae086f2..17d418b23d77 100644 --- a/drivers/gpu/drm/i915/intel_fbc.c +++ b/drivers/gpu/drm/i915/intel_fbc.c @@ -627,7 +627,8 @@ err_fb: kfree(compressed_llb); i915_gem_stolen_remove_node(dev_priv, &fbc->compressed_fb); err_llb: - pr_info_once("drm: not enough stolen space for compressed buffer (need %d more bytes), disabling. Hint: you may be able to increase stolen memory size in the BIOS to avoid this.\n", size); + if (drm_mm_initialized(&dev_priv->mm.stolen)) + pr_info_once("drm: not enough stolen space for compressed buffer (need %d more bytes), disabling. Hint: you may be able to increase stolen memory size in the BIOS to avoid this.\n", size); return -ENOSPC; } -- cgit From 754a76591b12c88f57ad8b4ca533a5c9566a1922 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Fri, 24 Feb 2017 11:43:06 +0000 Subject: drm/i915/fbdev: Stop repeating tile configuration on stagnation If we cease making progress in finding matching outputs for a tiled configuration, stop looping over the remaining unconfigured outputs. v2: Use conn_seq (instead of pass) to only apply tile configuration on first pass. Fixes: b0ee9e7fa5b4 ("drm/fb: add support for tiled monitor configurations. (v2)") Signed-off-by: Chris Wilson Cc: Tomasz Lis Cc: Dave Airlie Cc: Daniel Vetter Cc: Jani Nikula Cc: Sean Paul Cc: # v3.19+ Reviewed-by: Tomasz Lis Link: http://patchwork.freedesktop.org/patch/msgid/20170224114306.4400-1-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/intel_fbdev.c | 10 ++++------ 1 file changed, 4 insertions(+), 6 deletions(-) diff --git a/drivers/gpu/drm/i915/intel_fbdev.c b/drivers/gpu/drm/i915/intel_fbdev.c index c404ab524ccc..82993a87654c 100644 --- a/drivers/gpu/drm/i915/intel_fbdev.c +++ b/drivers/gpu/drm/i915/intel_fbdev.c @@ -350,14 +350,13 @@ static bool intel_fb_initial_config(struct drm_fb_helper *fb_helper, bool *enabled, int width, int height) { struct drm_i915_private *dev_priv = to_i915(fb_helper->dev); - unsigned long conn_configured, mask; + unsigned long conn_configured, conn_seq, mask; unsigned int count = min(fb_helper->connector_count, BITS_PER_LONG); int i, j; bool *save_enabled; bool fallback = true; int num_connectors_enabled = 0; int num_connectors_detected = 0; - int pass = 0; save_enabled = kcalloc(count, sizeof(bool), GFP_KERNEL); if (!save_enabled) @@ -367,6 +366,7 @@ static bool intel_fb_initial_config(struct drm_fb_helper *fb_helper, mask = GENMASK(count - 1, 0); conn_configured = 0; retry: + conn_seq = conn_configured; for (i = 0; i < count; i++) { struct drm_fb_helper_connector *fb_conn; struct drm_connector *connector; @@ -380,7 +380,7 @@ retry: if (conn_configured & BIT(i)) continue; - if (pass == 0 && !connector->has_tile) + if (conn_seq == 0 && !connector->has_tile) continue; if (connector->status == connector_status_connected) @@ -491,10 +491,8 @@ retry: conn_configured |= BIT(i); } - if ((conn_configured & mask) != mask) { - pass++; + if ((conn_configured & mask) != mask && conn_configured != conn_seq) goto retry; - } /* * If the BIOS didn't enable everything it could, fall back to have the -- cgit From 7d1c42e679f920529df5cc11c6f827f17d4f8022 Mon Sep 17 00:00:00 2001 From: Ville Syrjälä Date: Thu, 23 Feb 2017 19:35:05 +0200 Subject: drm/i915: Refactor code to select the DDI buf translation table MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Split the code to select the correct translation table into DP, eDP and FDI specific helpers. This reduces the clutter in intel_prepare_dp_ddi_buffers(), and we'll have other uses for some of these new helper functions later on. v2: Fix typo in commit message (David) Signed-off-by: Ville Syrjälä Link: http://patchwork.freedesktop.org/patch/msgid/20170223173507.17600-1-ville.syrjala@linux.intel.com Reviewed-by: David Weinehall --- drivers/gpu/drm/i915/intel_ddi.c | 126 +++++++++++++++++++++++---------------- 1 file changed, 73 insertions(+), 53 deletions(-) diff --git a/drivers/gpu/drm/i915/intel_ddi.c b/drivers/gpu/drm/i915/intel_ddi.c index cd6fedd229a0..5e4b0172810d 100644 --- a/drivers/gpu/drm/i915/intel_ddi.c +++ b/drivers/gpu/drm/i915/intel_ddi.c @@ -468,6 +468,59 @@ static int intel_ddi_hdmi_level(struct drm_i915_private *dev_priv, enum port por return hdmi_level; } +static const struct ddi_buf_trans * +intel_ddi_get_buf_trans_dp(struct drm_i915_private *dev_priv, + int *n_entries) +{ + if (IS_KABYLAKE(dev_priv)) { + return kbl_get_buf_trans_dp(dev_priv, n_entries); + } else if (IS_SKYLAKE(dev_priv)) { + return skl_get_buf_trans_dp(dev_priv, n_entries); + } else if (IS_BROADWELL(dev_priv)) { + *n_entries = ARRAY_SIZE(bdw_ddi_translations_dp); + return bdw_ddi_translations_dp; + } else if (IS_HASWELL(dev_priv)) { + *n_entries = ARRAY_SIZE(hsw_ddi_translations_dp); + return hsw_ddi_translations_dp; + } + + *n_entries = 0; + return NULL; +} + +static const struct ddi_buf_trans * +intel_ddi_get_buf_trans_edp(struct drm_i915_private *dev_priv, + int *n_entries) +{ + if (IS_KABYLAKE(dev_priv) || IS_SKYLAKE(dev_priv)) { + return skl_get_buf_trans_edp(dev_priv, n_entries); + } else if (IS_BROADWELL(dev_priv)) { + return bdw_get_buf_trans_edp(dev_priv, n_entries); + } else if (IS_HASWELL(dev_priv)) { + *n_entries = ARRAY_SIZE(hsw_ddi_translations_dp); + return hsw_ddi_translations_dp; + } + + *n_entries = 0; + return NULL; +} + +static const struct ddi_buf_trans * +intel_ddi_get_buf_trans_fdi(struct drm_i915_private *dev_priv, + int *n_entries) +{ + if (IS_BROADWELL(dev_priv)) { + *n_entries = ARRAY_SIZE(hsw_ddi_translations_fdi); + return hsw_ddi_translations_fdi; + } else if (IS_HASWELL(dev_priv)) { + *n_entries = ARRAY_SIZE(hsw_ddi_translations_fdi); + return hsw_ddi_translations_fdi; + } + + *n_entries = 0; + return NULL; +} + /* * Starting with Haswell, DDI port buffers must be programmed with correct * values in advance. This function programs the correct values for @@ -477,45 +530,29 @@ void intel_prepare_dp_ddi_buffers(struct intel_encoder *encoder) { struct drm_i915_private *dev_priv = to_i915(encoder->base.dev); u32 iboost_bit = 0; - int i, n_dp_entries, n_edp_entries, size; + int i, n_entries; enum port port = intel_ddi_get_encoder_port(encoder); - const struct ddi_buf_trans *ddi_translations_fdi; - const struct ddi_buf_trans *ddi_translations_dp; - const struct ddi_buf_trans *ddi_translations_edp; const struct ddi_buf_trans *ddi_translations; if (IS_GEN9_LP(dev_priv)) return; - if (IS_KABYLAKE(dev_priv)) { - ddi_translations_fdi = NULL; - ddi_translations_dp = - kbl_get_buf_trans_dp(dev_priv, &n_dp_entries); - ddi_translations_edp = - skl_get_buf_trans_edp(dev_priv, &n_edp_entries); - } else if (IS_SKYLAKE(dev_priv)) { - ddi_translations_fdi = NULL; - ddi_translations_dp = - skl_get_buf_trans_dp(dev_priv, &n_dp_entries); - ddi_translations_edp = - skl_get_buf_trans_edp(dev_priv, &n_edp_entries); - } else if (IS_BROADWELL(dev_priv)) { - ddi_translations_fdi = bdw_ddi_translations_fdi; - ddi_translations_dp = bdw_ddi_translations_dp; - ddi_translations_edp = bdw_get_buf_trans_edp(dev_priv, &n_edp_entries); - n_dp_entries = ARRAY_SIZE(bdw_ddi_translations_dp); - } else if (IS_HASWELL(dev_priv)) { - ddi_translations_fdi = hsw_ddi_translations_fdi; - ddi_translations_dp = hsw_ddi_translations_dp; - ddi_translations_edp = hsw_ddi_translations_dp; - n_dp_entries = n_edp_entries = ARRAY_SIZE(hsw_ddi_translations_dp); - } else { - WARN(1, "ddi translation table missing\n"); - ddi_translations_edp = bdw_ddi_translations_dp; - ddi_translations_fdi = bdw_ddi_translations_fdi; - ddi_translations_dp = bdw_ddi_translations_dp; - n_edp_entries = ARRAY_SIZE(bdw_ddi_translations_edp); - n_dp_entries = ARRAY_SIZE(bdw_ddi_translations_dp); + switch (encoder->type) { + case INTEL_OUTPUT_EDP: + ddi_translations = intel_ddi_get_buf_trans_edp(dev_priv, + &n_entries); + break; + case INTEL_OUTPUT_DP: + ddi_translations = intel_ddi_get_buf_trans_dp(dev_priv, + &n_entries); + break; + case INTEL_OUTPUT_ANALOG: + ddi_translations = intel_ddi_get_buf_trans_fdi(dev_priv, + &n_entries); + break; + default: + MISSING_CASE(encoder->type); + return; } if (IS_GEN9_BC(dev_priv)) { @@ -525,28 +562,11 @@ void intel_prepare_dp_ddi_buffers(struct intel_encoder *encoder) if (WARN_ON(encoder->type == INTEL_OUTPUT_EDP && port != PORT_A && port != PORT_E && - n_edp_entries > 9)) - n_edp_entries = 9; - } - - switch (encoder->type) { - case INTEL_OUTPUT_EDP: - ddi_translations = ddi_translations_edp; - size = n_edp_entries; - break; - case INTEL_OUTPUT_DP: - ddi_translations = ddi_translations_dp; - size = n_dp_entries; - break; - case INTEL_OUTPUT_ANALOG: - ddi_translations = ddi_translations_fdi; - size = n_dp_entries; - break; - default: - BUG(); + n_entries > 9)) + n_entries = 9; } - for (i = 0; i < size; i++) { + for (i = 0; i < n_entries; i++) { I915_WRITE(DDI_BUF_TRANS_LO(port, i), ddi_translations[i].trans1 | iboost_bit); I915_WRITE(DDI_BUF_TRANS_HI(port, i), -- cgit From 97eeb872764ce524d87d0e0e9e25b1c9fae10b3a Mon Sep 17 00:00:00 2001 From: Ville Syrjälä Date: Thu, 23 Feb 2017 19:35:06 +0200 Subject: drm/i915: Refactor translate_signal_level() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Convert the big switch statement in translate_signal_level() into a neat table. The table also serves as documentation for the translation tables. We'll also have other uses for this table later on. v2: Remove superfluous space (David) Signed-off-by: Ville Syrjälä Link: http://patchwork.freedesktop.org/patch/msgid/20170223173507.17600-2-ville.syrjala@linux.intel.com Reviewed-by: David Weinehall --- drivers/gpu/drm/i915/intel_ddi.c | 60 ++++++++++++++-------------------------- 1 file changed, 21 insertions(+), 39 deletions(-) diff --git a/drivers/gpu/drm/i915/intel_ddi.c b/drivers/gpu/drm/i915/intel_ddi.c index 5e4b0172810d..7694f4fcce4e 100644 --- a/drivers/gpu/drm/i915/intel_ddi.c +++ b/drivers/gpu/drm/i915/intel_ddi.c @@ -34,6 +34,19 @@ struct ddi_buf_trans { u8 i_boost; /* SKL: I_boost; valid: 0x0, 0x1, 0x3, 0x7 */ }; +static const u8 index_to_dp_signal_levels[] = { + [0] = DP_TRAIN_VOLTAGE_SWING_LEVEL_0 | DP_TRAIN_PRE_EMPH_LEVEL_0, + [1] = DP_TRAIN_VOLTAGE_SWING_LEVEL_0 | DP_TRAIN_PRE_EMPH_LEVEL_1, + [2] = DP_TRAIN_VOLTAGE_SWING_LEVEL_0 | DP_TRAIN_PRE_EMPH_LEVEL_2, + [3] = DP_TRAIN_VOLTAGE_SWING_LEVEL_0 | DP_TRAIN_PRE_EMPH_LEVEL_3, + [4] = DP_TRAIN_VOLTAGE_SWING_LEVEL_1 | DP_TRAIN_PRE_EMPH_LEVEL_0, + [5] = DP_TRAIN_VOLTAGE_SWING_LEVEL_1 | DP_TRAIN_PRE_EMPH_LEVEL_1, + [6] = DP_TRAIN_VOLTAGE_SWING_LEVEL_1 | DP_TRAIN_PRE_EMPH_LEVEL_2, + [7] = DP_TRAIN_VOLTAGE_SWING_LEVEL_2 | DP_TRAIN_PRE_EMPH_LEVEL_0, + [8] = DP_TRAIN_VOLTAGE_SWING_LEVEL_2 | DP_TRAIN_PRE_EMPH_LEVEL_1, + [9] = DP_TRAIN_VOLTAGE_SWING_LEVEL_3 | DP_TRAIN_PRE_EMPH_LEVEL_0, +}; + /* HDMI/DVI modes ignore everything but the last 2 items. So we share * them for both DP and FDI transports, allowing those ports to * automatically adapt to HDMI connections as well @@ -1604,48 +1617,17 @@ static void bxt_ddi_vswing_sequence(struct drm_i915_private *dev_priv, static uint32_t translate_signal_level(int signal_levels) { - uint32_t level; - - switch (signal_levels) { - default: - DRM_DEBUG_KMS("Unsupported voltage swing/pre-emphasis level: 0x%x\n", - signal_levels); - case DP_TRAIN_VOLTAGE_SWING_LEVEL_0 | DP_TRAIN_PRE_EMPH_LEVEL_0: - level = 0; - break; - case DP_TRAIN_VOLTAGE_SWING_LEVEL_0 | DP_TRAIN_PRE_EMPH_LEVEL_1: - level = 1; - break; - case DP_TRAIN_VOLTAGE_SWING_LEVEL_0 | DP_TRAIN_PRE_EMPH_LEVEL_2: - level = 2; - break; - case DP_TRAIN_VOLTAGE_SWING_LEVEL_0 | DP_TRAIN_PRE_EMPH_LEVEL_3: - level = 3; - break; - - case DP_TRAIN_VOLTAGE_SWING_LEVEL_1 | DP_TRAIN_PRE_EMPH_LEVEL_0: - level = 4; - break; - case DP_TRAIN_VOLTAGE_SWING_LEVEL_1 | DP_TRAIN_PRE_EMPH_LEVEL_1: - level = 5; - break; - case DP_TRAIN_VOLTAGE_SWING_LEVEL_1 | DP_TRAIN_PRE_EMPH_LEVEL_2: - level = 6; - break; - - case DP_TRAIN_VOLTAGE_SWING_LEVEL_2 | DP_TRAIN_PRE_EMPH_LEVEL_0: - level = 7; - break; - case DP_TRAIN_VOLTAGE_SWING_LEVEL_2 | DP_TRAIN_PRE_EMPH_LEVEL_1: - level = 8; - break; + int i; - case DP_TRAIN_VOLTAGE_SWING_LEVEL_3 | DP_TRAIN_PRE_EMPH_LEVEL_0: - level = 9; - break; + for (i = 0; i < ARRAY_SIZE(index_to_dp_signal_levels); i++) { + if (index_to_dp_signal_levels[i] == signal_levels) + return i; } - return level; + WARN(1, "Unsupported voltage swing/pre-emphasis level: 0x%x\n", + signal_levels); + + return 0; } uint32_t ddi_signal_levels(struct intel_dp *intel_dp) -- cgit From ffe5111e28e52212040db1617a75035d6d6f9447 Mon Sep 17 00:00:00 2001 From: Ville Syrjälä Date: Thu, 23 Feb 2017 19:49:01 +0200 Subject: drm/i915: Introduce intel_ddi_dp_voltage_max() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Rather than sprinkling ideas of how big the DDI buf translation tables are somewhere in intel_dp.c, let's concentrate it all in intel_ddi.c where the actual tables are defined. To that end we introduce intel_ddi_dp_voltage_max() which will actually look at the proper translation table to determine what is the maximum voltage swing level supported. v2: Mask out the preemphasis bits from the return value of intel_ddi_dp_voltage_max() Signed-off-by: Ville Syrjälä Reviewed-by: David Weinehall Link: http://patchwork.freedesktop.org/patch/msgid/20170223174901.26749-1-ville.syrjala@linux.intel.com --- drivers/gpu/drm/i915/intel_ddi.c | 19 +++++++++++++++++++ drivers/gpu/drm/i915/intel_dp.c | 5 ++--- drivers/gpu/drm/i915/intel_drv.h | 2 ++ 3 files changed, 23 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/i915/intel_ddi.c b/drivers/gpu/drm/i915/intel_ddi.c index 7694f4fcce4e..e2947b7f5079 100644 --- a/drivers/gpu/drm/i915/intel_ddi.c +++ b/drivers/gpu/drm/i915/intel_ddi.c @@ -1615,6 +1615,25 @@ static void bxt_ddi_vswing_sequence(struct drm_i915_private *dev_priv, ddi_translations[level].deemphasis); } +u8 intel_ddi_dp_voltage_max(struct intel_encoder *encoder) +{ + struct drm_i915_private *dev_priv = to_i915(encoder->base.dev); + int n_entries; + + if (encoder->type == INTEL_OUTPUT_EDP) + intel_ddi_get_buf_trans_edp(dev_priv, &n_entries); + else + intel_ddi_get_buf_trans_dp(dev_priv, &n_entries); + + if (WARN_ON(n_entries < 1)) + n_entries = 1; + if (WARN_ON(n_entries > ARRAY_SIZE(index_to_dp_signal_levels))) + n_entries = ARRAY_SIZE(index_to_dp_signal_levels); + + return index_to_dp_signal_levels[n_entries - 1] & + DP_TRAIN_VOLTAGE_SWING_MASK; +} + static uint32_t translate_signal_level(int signal_levels) { int i; diff --git a/drivers/gpu/drm/i915/intel_dp.c b/drivers/gpu/drm/i915/intel_dp.c index 024798a9c016..e72c92a08c81 100644 --- a/drivers/gpu/drm/i915/intel_dp.c +++ b/drivers/gpu/drm/i915/intel_dp.c @@ -3098,9 +3098,8 @@ intel_dp_voltage_max(struct intel_dp *intel_dp) if (IS_GEN9_LP(dev_priv)) return DP_TRAIN_VOLTAGE_SWING_LEVEL_3; else if (INTEL_GEN(dev_priv) >= 9) { - if (dev_priv->vbt.edp.low_vswing && port == PORT_A) - return DP_TRAIN_VOLTAGE_SWING_LEVEL_3; - return DP_TRAIN_VOLTAGE_SWING_LEVEL_2; + struct intel_encoder *encoder = &dp_to_dig_port(intel_dp)->base; + return intel_ddi_dp_voltage_max(encoder); } else if (IS_VALLEYVIEW(dev_priv) || IS_CHERRYVIEW(dev_priv)) return DP_TRAIN_VOLTAGE_SWING_LEVEL_3; else if (IS_GEN7(dev_priv) && port == PORT_A) diff --git a/drivers/gpu/drm/i915/intel_drv.h b/drivers/gpu/drm/i915/intel_drv.h index 9b5fcc1423eb..c3c8ed726717 100644 --- a/drivers/gpu/drm/i915/intel_drv.h +++ b/drivers/gpu/drm/i915/intel_drv.h @@ -1233,6 +1233,8 @@ void intel_ddi_clock_get(struct intel_encoder *encoder, struct intel_crtc_state *pipe_config); void intel_ddi_set_vc_payload_alloc(struct drm_crtc *crtc, bool state); uint32_t ddi_signal_levels(struct intel_dp *intel_dp); +u8 intel_ddi_dp_voltage_max(struct intel_encoder *encoder); + unsigned int intel_fb_align_height(struct drm_i915_private *dev_priv, unsigned int height, uint32_t pixel_format, -- cgit From fe9ae7a3bfdb1d5933210f53e9efa7655768d238 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Thu, 23 Feb 2017 14:50:31 +0000 Subject: drm/i915/execlists: Detect an out-of-order context switch We require that the request is completed before the context is switched away. Signed-off-by: Chris Wilson Link: http://patchwork.freedesktop.org/patch/msgid/20170223145031.26210-1-chris@chris-wilson.co.uk Reviewed-by: Mika Kuoppala --- drivers/gpu/drm/i915/intel_lrc.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c index 39329d40da46..1c6c71673bfa 100644 --- a/drivers/gpu/drm/i915/intel_lrc.c +++ b/drivers/gpu/drm/i915/intel_lrc.c @@ -591,6 +591,7 @@ static void intel_lrc_irq_handler(unsigned long data) GEM_BUG_ON(port[0].count == 0); if (--port[0].count == 0) { GEM_BUG_ON(status & GEN8_CTX_STATUS_PREEMPTED); + GEM_BUG_ON(!i915_gem_request_completed(port[0].request)); execlists_context_status_change(port[0].request, INTEL_CONTEXT_SCHEDULE_OUT); -- cgit From e393d0d6c12527b2bd7faf0b5ca7edde2543c451 Mon Sep 17 00:00:00 2001 From: Imre Deak Date: Wed, 22 Feb 2017 17:10:52 +0200 Subject: drm/i915/lspcon: Switch back to PCON mode after output replug MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit At least a ParadTech PS175 LSPCON chip/firmware uses long instead of short pulses to signal output unplug/plug events. This is contrary to how branch devices normally work which use short HPD signaling. This chip will also switch to LS mode after an unplug event, which could be the consequence of the long HPD signaling semantics and an effort to save power automatically. Because of this we'll fail to do AUX and detect the output after a replug event. To fix this make sure we are in PCON mode during connector detection. v2: - Switch the mode in the proper spot. Cc: raptorteak@gmail.com Cc: Shashank Sharma Cc: Ville Syrjälä Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=98912 Reported-and-tested-by: raptorteak@gmail.com Signed-off-by: Imre Deak Reviewed-by: Ville Syrjälä Link: http://patchwork.freedesktop.org/patch/msgid/1487776252-6288-1-git-send-email-imre.deak@intel.com --- drivers/gpu/drm/i915/intel_dp.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/gpu/drm/i915/intel_dp.c b/drivers/gpu/drm/i915/intel_dp.c index e72c92a08c81..6c661c64cff6 100644 --- a/drivers/gpu/drm/i915/intel_dp.c +++ b/drivers/gpu/drm/i915/intel_dp.c @@ -4325,9 +4325,13 @@ intel_dp_short_pulse(struct intel_dp *intel_dp) static enum drm_connector_status intel_dp_detect_dpcd(struct intel_dp *intel_dp) { + struct intel_lspcon *lspcon = dp_to_lspcon(intel_dp); uint8_t *dpcd = intel_dp->dpcd; uint8_t type; + if (lspcon->active) + lspcon_resume(lspcon); + if (!intel_dp_get_dpcd(intel_dp)) return connector_status_disconnected; -- cgit From ef0f411f51475f4eebf9fc1b19a85be698af19ff Mon Sep 17 00:00:00 2001 From: Kenneth Graunke Date: Wed, 15 Feb 2017 01:34:46 -0800 Subject: drm/i915: Drop support for I915_EXEC_CONSTANTS_* execbuf parameters. This patch makes the I915_PARAM_HAS_EXEC_CONSTANTS getparam return 0 (indicating the optional feature is not supported), and makes execbuf always return -EINVAL if the flags are used. Apparently, no userspace ever shipped which used this optional feature: I checked the git history of Mesa, xf86-video-intel, libva, and Beignet, and there were zero commits showing a use of these flags. Kernel commit 72bfa19c8deb4 apparently introduced the feature prematurely. According to Chris, the intention was to use this in cairo-drm, but "the use was broken for gen6", so I don't think it ever happened. 'relative_constants_mode' has always been tracked per-device, but this has actually been wrong ever since hardware contexts were introduced, as the INSTPM register is saved (and automatically restored) as part of the render ring context. The software per-device value could therefore get out of sync with the hardware per-context value. This meant that using them is actually unsafe: a client which tried to use them could damage the state of other clients, causing the GPU to interpret their BO offsets as absolute pointers, leading to bogus memory reads. These flags were also never ported to execlist mode, making them no-ops on Gen9+ (which requires execlists), and Gen8 in the default mode. On Gen8+, userspace can write these registers directly, achieving the same effect. On Gen6-7.5, it likely makes sense to extend the command parser to support them. I don't think anyone wants this on Gen4-5. Based on a patch by Dave Gordon. v3: Return -ENODEV for the getparam, as this is what we do for other obsolete features. Suggested by Chris Wilson. Cc: stable@vger.kernel.org Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=92448 Signed-off-by: Kenneth Graunke Reviewed-by: Joonas Lahtinen Reviewed-by: Chris Wilson Link: http://patchwork.freedesktop.org/patch/msgid/20170215093446.21291-1-kenneth@whitecape.org Acked-by: Daniel Vetter Signed-off-by: Chris Wilson --- drivers/gpu/drm/i915/i915_drv.c | 4 +-- drivers/gpu/drm/i915/i915_drv.h | 2 -- drivers/gpu/drm/i915/i915_gem.c | 2 -- drivers/gpu/drm/i915/i915_gem_execbuffer.c | 50 ++---------------------------- 4 files changed, 3 insertions(+), 55 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_drv.c b/drivers/gpu/drm/i915/i915_drv.c index 747cd5dfb910..51241dec3f83 100644 --- a/drivers/gpu/drm/i915/i915_drv.c +++ b/drivers/gpu/drm/i915/i915_drv.c @@ -249,6 +249,7 @@ static int i915_getparam(struct drm_device *dev, void *data, case I915_PARAM_IRQ_ACTIVE: case I915_PARAM_ALLOW_BATCHBUFFER: case I915_PARAM_LAST_DISPATCH: + case I915_PARAM_HAS_EXEC_CONSTANTS: /* Reject all old ums/dri params. */ return -ENODEV; case I915_PARAM_CHIPSET_ID: @@ -275,9 +276,6 @@ static int i915_getparam(struct drm_device *dev, void *data, case I915_PARAM_HAS_BSD2: value = !!dev_priv->engine[VCS2]; break; - case I915_PARAM_HAS_EXEC_CONSTANTS: - value = INTEL_GEN(dev_priv) >= 4; - break; case I915_PARAM_HAS_LLC: value = HAS_LLC(dev_priv); break; diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index e5b34eaf41ca..0a6fdb024b86 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -2098,8 +2098,6 @@ struct drm_i915_private { const struct intel_device_info info; - int relative_constants_mode; - void __iomem *regs; struct intel_uncore uncore; diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index a8167003c10b..01dbba3813c7 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -4632,8 +4632,6 @@ i915_gem_load_init(struct drm_i915_private *dev_priv) init_waitqueue_head(&dev_priv->gpu_error.wait_queue); init_waitqueue_head(&dev_priv->gpu_error.reset_queue); - dev_priv->relative_constants_mode = I915_EXEC_CONSTANTS_REL_GENERAL; - init_waitqueue_head(&dev_priv->pending_flip_queue); dev_priv->mm.interruptible = true; diff --git a/drivers/gpu/drm/i915/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/i915_gem_execbuffer.c index 35d2cb979452..aa75ea2d0578 100644 --- a/drivers/gpu/drm/i915/i915_gem_execbuffer.c +++ b/drivers/gpu/drm/i915/i915_gem_execbuffer.c @@ -1412,10 +1412,7 @@ execbuf_submit(struct i915_execbuffer_params *params, struct drm_i915_gem_execbuffer2 *args, struct list_head *vmas) { - struct drm_i915_private *dev_priv = params->request->i915; u64 exec_start, exec_len; - int instp_mode; - u32 instp_mask, *cs; int ret; ret = i915_gem_execbuffer_move_to_gpu(params->request, vmas); @@ -1426,54 +1423,11 @@ execbuf_submit(struct i915_execbuffer_params *params, if (ret) return ret; - instp_mode = args->flags & I915_EXEC_CONSTANTS_MASK; - instp_mask = I915_EXEC_CONSTANTS_MASK; - switch (instp_mode) { - case I915_EXEC_CONSTANTS_REL_GENERAL: - case I915_EXEC_CONSTANTS_ABSOLUTE: - case I915_EXEC_CONSTANTS_REL_SURFACE: - if (instp_mode != 0 && params->engine->id != RCS) { - DRM_DEBUG("non-0 rel constants mode on non-RCS\n"); - return -EINVAL; - } - - if (instp_mode != dev_priv->relative_constants_mode) { - if (INTEL_INFO(dev_priv)->gen < 4) { - DRM_DEBUG("no rel constants on pre-gen4\n"); - return -EINVAL; - } - - if (INTEL_INFO(dev_priv)->gen > 5 && - instp_mode == I915_EXEC_CONSTANTS_REL_SURFACE) { - DRM_DEBUG("rel surface constants mode invalid on gen5+\n"); - return -EINVAL; - } - - /* The HW changed the meaning on this bit on gen6 */ - if (INTEL_INFO(dev_priv)->gen >= 6) - instp_mask &= ~I915_EXEC_CONSTANTS_REL_SURFACE; - } - break; - default: - DRM_DEBUG("execbuf with unknown constants: %d\n", instp_mode); + if (args->flags & I915_EXEC_CONSTANTS_MASK) { + DRM_DEBUG("I915_EXEC_CONSTANTS_* unsupported\n"); return -EINVAL; } - if (params->engine->id == RCS && - instp_mode != dev_priv->relative_constants_mode) { - cs = intel_ring_begin(params->request, 4); - if (IS_ERR(cs)) - return PTR_ERR(cs); - - *cs++ = MI_NOOP; - *cs++ = MI_LOAD_REGISTER_IMM(1); - *cs++ = i915_mmio_reg_offset(INSTPM); - *cs++ = instp_mask << 16 | instp_mode; - intel_ring_advance(params->request, cs); - - dev_priv->relative_constants_mode = instp_mode; - } - if (args->flags & I915_EXEC_GEN7_SOL_RESET) { ret = i915_reset_gen7_sol_offsets(params->request); if (ret) -- cgit From 91e32157de148ce37422d8110f6f8a84d05da1ec Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Fri, 24 Feb 2017 19:33:15 +0000 Subject: drm/i915: Timeout lowlevel_hole GTT selftest early Check for a timeout in the lowlevel_hole GTT before we allocate state for that pass, as our cleanup phase stops on the iteration before the timeout. References: https://bugs.freedesktop.org/show_bug.cgi?id=99947 Signed-off-by: Chris Wilson Cc: Matthew Auld Link: http://patchwork.freedesktop.org/patch/msgid/20170224193315.21072-1-chris@chris-wilson.co.uk Reviewed-by: Matthew Auld --- drivers/gpu/drm/i915/selftests/i915_gem_gtt.c | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) diff --git a/drivers/gpu/drm/i915/selftests/i915_gem_gtt.c b/drivers/gpu/drm/i915/selftests/i915_gem_gtt.c index e23753181720..6bac267914df 100644 --- a/drivers/gpu/drm/i915/selftests/i915_gem_gtt.c +++ b/drivers/gpu/drm/i915/selftests/i915_gem_gtt.c @@ -237,18 +237,19 @@ static int lowlevel_hole(struct drm_i915_private *i915, GEM_BUG_ON(addr + BIT_ULL(size) > vm->total); + if (igt_timeout(end_time, + "%s timed out before %d/%d\n", + __func__, n, count)) { + hole_end = hole_start; /* quit */ + break; + } + if (vm->allocate_va_range && vm->allocate_va_range(vm, addr, BIT_ULL(size))) break; vm->insert_entries(vm, obj->mm.pages, addr, I915_CACHE_NONE, 0); - if (igt_timeout(end_time, - "%s timed out after %d/%d\n", - __func__, n, count)) { - hole_end = hole_start; /* quit */ - break; - } } count = n; -- cgit From b3bb82885feb74feadc101a700e64ed7ede84b04 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Sat, 25 Feb 2017 18:11:19 +0000 Subject: drm/i915: Assert all sg are initialised in fake_dma_object for selftests Double check that we allocated the right amount of scatterlist elements for our obj->size. Signed-off-by: Chris Wilson Cc: Matthew Auld Reviewed-by: Matthew Auld Link: http://patchwork.freedesktop.org/patch/msgid/20170225181122.4788-1-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/selftests/i915_gem_gtt.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/gpu/drm/i915/selftests/i915_gem_gtt.c b/drivers/gpu/drm/i915/selftests/i915_gem_gtt.c index 6bac267914df..0f3fa34377c6 100644 --- a/drivers/gpu/drm/i915/selftests/i915_gem_gtt.c +++ b/drivers/gpu/drm/i915/selftests/i915_gem_gtt.c @@ -62,12 +62,14 @@ fake_get_pages(struct drm_i915_gem_object *obj) for (sg = pages->sgl; sg; sg = sg_next(sg)) { unsigned long len = min_t(typeof(rem), rem, BIT(31)); + GEM_BUG_ON(!len); sg_set_page(sg, pfn_to_page(PFN_BIAS), len, 0); sg_dma_address(sg) = page_to_phys(sg_page(sg)); sg_dma_len(sg) = len; rem -= len; } + GEM_BUG_ON(rem); obj->mm.madv = I915_MADV_DONTNEED; return pages; -- cgit From 357480ce2fe4dc11ff0b77706d77e2319c96fe31 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Sat, 25 Feb 2017 18:11:20 +0000 Subject: drm/i915: Assert we do not overflow 4lvl page directories Before looking up the page directory entry, check we are still within bounds. Signed-off-by: Chris Wilson Cc: Matthew Auld Reviewed-by: Matthew Auld Link: http://patchwork.freedesktop.org/patch/msgid/20170225181122.4788-2-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/i915_gem_gtt.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c b/drivers/gpu/drm/i915/i915_gem_gtt.c index 24fa262c2b61..e6e0dfa761dd 100644 --- a/drivers/gpu/drm/i915/i915_gem_gtt.c +++ b/drivers/gpu/drm/i915/i915_gem_gtt.c @@ -830,6 +830,7 @@ gen8_ppgtt_insert_pte_entries(struct i915_hw_ppgtt *ppgtt, gen8_pte_t *vaddr; bool ret; + GEM_BUG_ON(pdpe >= I915_PDPES_PER_PDP(vm)); pd = pdp->page_directory[pdpe]; vaddr = kmap_atomic_px(pd->page_table[pde]); do { @@ -854,8 +855,7 @@ gen8_ppgtt_insert_pte_entries(struct i915_hw_ppgtt *ppgtt, break; } - GEM_BUG_ON(!i915_vm_is_48bit(&ppgtt->base) && - pdpe >= GEN8_LEGACY_PDPES); + GEM_BUG_ON(pdpe >= I915_PDPES_PER_PDP(vm)); pd = pdp->page_directory[pdpe]; pde = 0; } @@ -904,7 +904,7 @@ static void gen8_ppgtt_insert_4lvl(struct i915_address_space *vm, while (gen8_ppgtt_insert_pte_entries(ppgtt, pdps[pml4e++], &iter, start, cache_level)) - ; + GEM_BUG_ON(pml4e >= GEN8_PML4ES_PER_PML4); } static void gen8_free_page_tables(struct i915_address_space *vm, -- cgit From aa149431279166025bc457007e5a1fdcb4d2db9b Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Sat, 25 Feb 2017 18:11:21 +0000 Subject: drm/i915: Sanity check the vma->node prior to binding into the GTT We rely on the VMA being allocated inside the drm_mm and for its allotted node being large enough to accommodate all the vma->pages. Signed-off-by: Chris Wilson Cc: Matthew Auld Reviewed-by: Matthew Auld Link: http://patchwork.freedesktop.org/patch/msgid/20170225181122.4788-3-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/i915_vma.c | 15 +++++++++------ 1 file changed, 9 insertions(+), 6 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_vma.c b/drivers/gpu/drm/i915/i915_vma.c index c1abfe7b48ea..6e9eade304b8 100644 --- a/drivers/gpu/drm/i915/i915_vma.c +++ b/drivers/gpu/drm/i915/i915_vma.c @@ -241,7 +241,15 @@ int i915_vma_bind(struct i915_vma *vma, enum i915_cache_level cache_level, u32 vma_flags; int ret; - if (WARN_ON(flags == 0)) + GEM_BUG_ON(!drm_mm_node_allocated(&vma->node)); + GEM_BUG_ON(vma->size > vma->node.size); + + if (GEM_WARN_ON(range_overflows(vma->node.start, + vma->node.size, + vma->vm->total))) + return -ENODEV; + + if (GEM_WARN_ON(!flags)) return -EINVAL; bind_flags = 0; @@ -258,11 +266,6 @@ int i915_vma_bind(struct i915_vma *vma, enum i915_cache_level cache_level, if (bind_flags == 0) return 0; - if (GEM_WARN_ON(range_overflows(vma->node.start, - vma->node.size, - vma->vm->total))) - return -ENODEV; - trace_i915_vma_bind(vma, bind_flags); ret = vma->vm->bind_vma(vma, cache_level, bind_flags); if (ret) -- cgit From 9e89f9ee3b16cca56bed5fa45e63f422d3ac2c3a Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Sat, 25 Feb 2017 18:11:22 +0000 Subject: drm/i915: Advance start address on crossing PML (48b ppgtt) boundary When advancing onto the next 4th level page table entry, we need to reset our indices to 0. Currently we restart from the original address which means we start with an offset into the next PML table. Fixes: 894ccebee2b0 ("drm/i915: Micro-optimise gen8_ppgtt_insert_entries()") Reported-by: Matthew Auld Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=99948 Testcase: igt/drv_selftest/live_gtt Signed-off-by: Chris Wilson Cc: Matthew Auld Tested-by: Matthew Auld Reviewed-by: Matthew Auld Link: http://patchwork.freedesktop.org/patch/msgid/20170225181122.4788-4-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/i915_gem_gtt.c | 63 ++++++++++++++++++++++++------------- 1 file changed, 41 insertions(+), 22 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c b/drivers/gpu/drm/i915/i915_gem_gtt.c index e6e0dfa761dd..6fdbd5ae4fcb 100644 --- a/drivers/gpu/drm/i915/i915_gem_gtt.c +++ b/drivers/gpu/drm/i915/i915_gem_gtt.c @@ -815,26 +815,41 @@ struct sgt_dma { dma_addr_t dma, max; }; +struct gen8_insert_pte { + u16 pml4e; + u16 pdpe; + u16 pde; + u16 pte; +}; + +static __always_inline struct gen8_insert_pte gen8_insert_pte(u64 start) +{ + return (struct gen8_insert_pte) { + gen8_pml4e_index(start), + gen8_pdpe_index(start), + gen8_pde_index(start), + gen8_pte_index(start), + }; +} + static __always_inline bool gen8_ppgtt_insert_pte_entries(struct i915_hw_ppgtt *ppgtt, struct i915_page_directory_pointer *pdp, struct sgt_dma *iter, - u64 start, + struct gen8_insert_pte *idx, enum i915_cache_level cache_level) { - unsigned int pdpe = gen8_pdpe_index(start); - unsigned int pde = gen8_pde_index(start); - unsigned int pte = gen8_pte_index(start); struct i915_page_directory *pd; const gen8_pte_t pte_encode = gen8_pte_encode(0, cache_level); gen8_pte_t *vaddr; bool ret; - GEM_BUG_ON(pdpe >= I915_PDPES_PER_PDP(vm)); - pd = pdp->page_directory[pdpe]; - vaddr = kmap_atomic_px(pd->page_table[pde]); + GEM_BUG_ON(idx->pdpe >= I915_PDPES_PER_PDP(vm)); + pd = pdp->page_directory[idx->pdpe]; + vaddr = kmap_atomic_px(pd->page_table[idx->pde]); do { - vaddr[pte] = pte_encode | iter->dma; + vaddr[idx->pte] = pte_encode | iter->dma; + iter->dma += PAGE_SIZE; if (iter->dma >= iter->max) { iter->sg = __sg_next(iter->sg); @@ -847,22 +862,25 @@ gen8_ppgtt_insert_pte_entries(struct i915_hw_ppgtt *ppgtt, iter->max = iter->dma + iter->sg->length; } - if (++pte == GEN8_PTES) { - if (++pde == I915_PDES) { + if (++idx->pte == GEN8_PTES) { + idx->pte = 0; + + if (++idx->pde == I915_PDES) { + idx->pde = 0; + /* Limited by sg length for 3lvl */ - if (++pdpe == GEN8_PML4ES_PER_PML4) { + if (++idx->pdpe == GEN8_PML4ES_PER_PML4) { + idx->pdpe = 0; ret = true; break; } - GEM_BUG_ON(pdpe >= I915_PDPES_PER_PDP(vm)); - pd = pdp->page_directory[pdpe]; - pde = 0; + GEM_BUG_ON(idx->pdpe >= I915_PDPES_PER_PDP(vm)); + pd = pdp->page_directory[idx->pdpe]; } kunmap_atomic(vaddr); - vaddr = kmap_atomic_px(pd->page_table[pde]); - pte = 0; + vaddr = kmap_atomic_px(pd->page_table[idx->pde]); } } while (1); kunmap_atomic(vaddr); @@ -882,9 +900,10 @@ static void gen8_ppgtt_insert_3lvl(struct i915_address_space *vm, .dma = sg_dma_address(iter.sg), .max = iter.dma + iter.sg->length, }; + struct gen8_insert_pte idx = gen8_insert_pte(start); - gen8_ppgtt_insert_pte_entries(ppgtt, &ppgtt->pdp, &iter, - start, cache_level); + gen8_ppgtt_insert_pte_entries(ppgtt, &ppgtt->pdp, &iter, &idx, + cache_level); } static void gen8_ppgtt_insert_4lvl(struct i915_address_space *vm, @@ -900,11 +919,11 @@ static void gen8_ppgtt_insert_4lvl(struct i915_address_space *vm, .max = iter.dma + iter.sg->length, }; struct i915_page_directory_pointer **pdps = ppgtt->pml4.pdps; - unsigned int pml4e = gen8_pml4e_index(start); + struct gen8_insert_pte idx = gen8_insert_pte(start); - while (gen8_ppgtt_insert_pte_entries(ppgtt, pdps[pml4e++], &iter, - start, cache_level)) - GEM_BUG_ON(pml4e >= GEN8_PML4ES_PER_PML4); + while (gen8_ppgtt_insert_pte_entries(ppgtt, pdps[idx.pml4e++], &iter, + &idx, cache_level)) + GEM_BUG_ON(idx.pml4e >= GEN8_PML4ES_PER_PML4); } static void gen8_free_page_tables(struct i915_address_space *vm, -- cgit From 5432fcaff3cee045ddbd94f29802ee3ac4f50283 Mon Sep 17 00:00:00 2001 From: Ander Conselvan de Oliveira Date: Wed, 22 Feb 2017 08:34:26 +0200 Subject: drm/i915: Store aux power domain in intel_dp The aux power domain only makes sense in the DP code. Storing it in struct intel_dp avoids some indirection. v2: Rebase Signed-off-by: Ander Conselvan de Oliveira Reviewed-by: Imre Deak Link: http://patchwork.freedesktop.org/patch/msgid/20170222063431.10060-2-ander.conselvan.de.oliveira@intel.com --- drivers/gpu/drm/i915/intel_display.c | 50 ----------------------------- drivers/gpu/drm/i915/intel_dp.c | 61 ++++++++++++++---------------------- drivers/gpu/drm/i915/intel_drv.h | 3 +- 3 files changed, 24 insertions(+), 90 deletions(-) diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c index 5a91f6741fed..e9fb2edb9dd7 100644 --- a/drivers/gpu/drm/i915/intel_display.c +++ b/drivers/gpu/drm/i915/intel_display.c @@ -5587,26 +5587,6 @@ static enum intel_display_power_domain port_to_power_domain(enum port port) } } -static enum intel_display_power_domain port_to_aux_power_domain(enum port port) -{ - switch (port) { - case PORT_A: - return POWER_DOMAIN_AUX_A; - case PORT_B: - return POWER_DOMAIN_AUX_B; - case PORT_C: - return POWER_DOMAIN_AUX_C; - case PORT_D: - return POWER_DOMAIN_AUX_D; - case PORT_E: - /* FIXME: Check VBT for actual wiring of PORT E */ - return POWER_DOMAIN_AUX_D; - default: - MISSING_CASE(port); - return POWER_DOMAIN_AUX_A; - } -} - enum intel_display_power_domain intel_display_port_power_domain(struct intel_encoder *intel_encoder) { @@ -5634,36 +5614,6 @@ intel_display_port_power_domain(struct intel_encoder *intel_encoder) } } -enum intel_display_power_domain -intel_display_port_aux_power_domain(struct intel_encoder *intel_encoder) -{ - struct drm_i915_private *dev_priv = to_i915(intel_encoder->base.dev); - struct intel_digital_port *intel_dig_port; - - switch (intel_encoder->type) { - case INTEL_OUTPUT_UNKNOWN: - case INTEL_OUTPUT_HDMI: - /* - * Only DDI platforms should ever use these output types. - * We can get here after the HDMI detect code has already set - * the type of the shared encoder. Since we can't be sure - * what's the status of the given connectors, play safe and - * run the DP detection too. - */ - WARN_ON_ONCE(!HAS_DDI(dev_priv)); - case INTEL_OUTPUT_DP: - case INTEL_OUTPUT_EDP: - intel_dig_port = enc_to_dig_port(&intel_encoder->base); - return port_to_aux_power_domain(intel_dig_port->port); - case INTEL_OUTPUT_DP_MST: - intel_dig_port = enc_to_mst(&intel_encoder->base)->primary; - return port_to_aux_power_domain(intel_dig_port->port); - default: - MISSING_CASE(intel_encoder->type); - return POWER_DOMAIN_AUX_A; - } -} - static u64 get_crtc_power_domains(struct drm_crtc *crtc, struct intel_crtc_state *crtc_state) { diff --git a/drivers/gpu/drm/i915/intel_dp.c b/drivers/gpu/drm/i915/intel_dp.c index 6c661c64cff6..aa0c79087bbf 100644 --- a/drivers/gpu/drm/i915/intel_dp.c +++ b/drivers/gpu/drm/i915/intel_dp.c @@ -396,14 +396,12 @@ static void pps_lock(struct intel_dp *intel_dp) struct intel_encoder *encoder = &intel_dig_port->base; struct drm_device *dev = encoder->base.dev; struct drm_i915_private *dev_priv = to_i915(dev); - enum intel_display_power_domain power_domain; /* * See vlv_power_sequencer_reset() why we need * a power domain reference here. */ - power_domain = intel_display_port_aux_power_domain(encoder); - intel_display_power_get(dev_priv, power_domain); + intel_display_power_get(dev_priv, intel_dp->aux_power_domain); mutex_lock(&dev_priv->pps_mutex); } @@ -414,12 +412,10 @@ static void pps_unlock(struct intel_dp *intel_dp) struct intel_encoder *encoder = &intel_dig_port->base; struct drm_device *dev = encoder->base.dev; struct drm_i915_private *dev_priv = to_i915(dev); - enum intel_display_power_domain power_domain; mutex_unlock(&dev_priv->pps_mutex); - power_domain = intel_display_port_aux_power_domain(encoder); - intel_display_power_put(dev_priv, power_domain); + intel_display_power_put(dev_priv, intel_dp->aux_power_domain); } static void @@ -2005,9 +2001,7 @@ static bool edp_panel_vdd_on(struct intel_dp *intel_dp) { struct drm_device *dev = intel_dp_to_dev(intel_dp); struct intel_digital_port *intel_dig_port = dp_to_dig_port(intel_dp); - struct intel_encoder *intel_encoder = &intel_dig_port->base; struct drm_i915_private *dev_priv = to_i915(dev); - enum intel_display_power_domain power_domain; u32 pp; i915_reg_t pp_stat_reg, pp_ctrl_reg; bool need_to_disable = !intel_dp->want_panel_vdd; @@ -2023,8 +2017,7 @@ static bool edp_panel_vdd_on(struct intel_dp *intel_dp) if (edp_have_panel_vdd(intel_dp)) return need_to_disable; - power_domain = intel_display_port_aux_power_domain(intel_encoder); - intel_display_power_get(dev_priv, power_domain); + intel_display_power_get(dev_priv, intel_dp->aux_power_domain); DRM_DEBUG_KMS("Turning eDP port %c VDD on\n", port_name(intel_dig_port->port)); @@ -2082,8 +2075,6 @@ static void edp_panel_vdd_off_sync(struct intel_dp *intel_dp) struct drm_i915_private *dev_priv = to_i915(dev); struct intel_digital_port *intel_dig_port = dp_to_dig_port(intel_dp); - struct intel_encoder *intel_encoder = &intel_dig_port->base; - enum intel_display_power_domain power_domain; u32 pp; i915_reg_t pp_stat_reg, pp_ctrl_reg; @@ -2113,8 +2104,7 @@ static void edp_panel_vdd_off_sync(struct intel_dp *intel_dp) if ((pp & PANEL_POWER_ON) == 0) intel_dp->panel_power_off_time = ktime_get_boottime(); - power_domain = intel_display_port_aux_power_domain(intel_encoder); - intel_display_power_put(dev_priv, power_domain); + intel_display_power_put(dev_priv, intel_dp->aux_power_domain); } static void edp_panel_vdd_work(struct work_struct *__work) @@ -2227,11 +2217,8 @@ void intel_edp_panel_on(struct intel_dp *intel_dp) static void edp_panel_off(struct intel_dp *intel_dp) { - struct intel_digital_port *intel_dig_port = dp_to_dig_port(intel_dp); - struct intel_encoder *intel_encoder = &intel_dig_port->base; struct drm_device *dev = intel_dp_to_dev(intel_dp); struct drm_i915_private *dev_priv = to_i915(dev); - enum intel_display_power_domain power_domain; u32 pp; i915_reg_t pp_ctrl_reg; @@ -2263,8 +2250,7 @@ static void edp_panel_off(struct intel_dp *intel_dp) wait_panel_off(intel_dp); /* We got a reference when we enabled the VDD. */ - power_domain = intel_display_port_aux_power_domain(intel_encoder); - intel_display_power_put(dev_priv, power_domain); + intel_display_power_put(dev_priv, intel_dp->aux_power_domain); } void intel_edp_panel_off(struct intel_dp *intel_dp) @@ -4590,11 +4576,9 @@ intel_dp_long_pulse(struct intel_connector *intel_connector) struct intel_encoder *intel_encoder = &intel_dig_port->base; struct drm_device *dev = connector->dev; enum drm_connector_status status; - enum intel_display_power_domain power_domain; u8 sink_irq_vector = 0; - power_domain = intel_display_port_aux_power_domain(intel_encoder); - intel_display_power_get(to_i915(dev), power_domain); + intel_display_power_get(to_i915(dev), intel_dp->aux_power_domain); /* Can't disconnect eDP, but you can close the lid... */ if (is_edp(intel_dp)) @@ -4695,7 +4679,7 @@ out: if (status != connector_status_connected && !intel_dp->is_mst) intel_dp_unset_edid(intel_dp); - intel_display_power_put(to_i915(dev), power_domain); + intel_display_power_put(to_i915(dev), intel_dp->aux_power_domain); return status; } @@ -4723,7 +4707,6 @@ intel_dp_force(struct drm_connector *connector) struct intel_dp *intel_dp = intel_attached_dp(connector); struct intel_encoder *intel_encoder = &dp_to_dig_port(intel_dp)->base; struct drm_i915_private *dev_priv = to_i915(intel_encoder->base.dev); - enum intel_display_power_domain power_domain; DRM_DEBUG_KMS("[CONNECTOR:%d:%s]\n", connector->base.id, connector->name); @@ -4732,12 +4715,11 @@ intel_dp_force(struct drm_connector *connector) if (connector->status != connector_status_connected) return; - power_domain = intel_display_port_aux_power_domain(intel_encoder); - intel_display_power_get(dev_priv, power_domain); + intel_display_power_get(dev_priv, intel_dp->aux_power_domain); intel_dp_set_edid(intel_dp); - intel_display_power_put(dev_priv, power_domain); + intel_display_power_put(dev_priv, intel_dp->aux_power_domain); if (intel_encoder->type != INTEL_OUTPUT_EDP) intel_encoder->type = INTEL_OUTPUT_DP; @@ -4972,7 +4954,6 @@ static void intel_edp_panel_vdd_sanitize(struct intel_dp *intel_dp) struct intel_digital_port *intel_dig_port = dp_to_dig_port(intel_dp); struct drm_device *dev = intel_dig_port->base.base.dev; struct drm_i915_private *dev_priv = to_i915(dev); - enum intel_display_power_domain power_domain; lockdep_assert_held(&dev_priv->pps_mutex); @@ -4986,8 +4967,7 @@ static void intel_edp_panel_vdd_sanitize(struct intel_dp *intel_dp) * indefinitely. */ DRM_DEBUG_KMS("VDD left on by BIOS, adjusting state tracking\n"); - power_domain = intel_display_port_aux_power_domain(&intel_dig_port->base); - intel_display_power_get(dev_priv, power_domain); + intel_display_power_get(dev_priv, intel_dp->aux_power_domain); edp_panel_vdd_schedule_off(intel_dp); } @@ -5061,10 +5041,8 @@ enum irqreturn intel_dp_hpd_pulse(struct intel_digital_port *intel_dig_port, bool long_hpd) { struct intel_dp *intel_dp = &intel_dig_port->dp; - struct intel_encoder *intel_encoder = &intel_dig_port->base; struct drm_device *dev = intel_dig_port->base.base.dev; struct drm_i915_private *dev_priv = to_i915(dev); - enum intel_display_power_domain power_domain; enum irqreturn ret = IRQ_NONE; if (intel_dig_port->base.type != INTEL_OUTPUT_EDP && @@ -5093,8 +5071,7 @@ intel_dp_hpd_pulse(struct intel_digital_port *intel_dig_port, bool long_hpd) return IRQ_NONE; } - power_domain = intel_display_port_aux_power_domain(intel_encoder); - intel_display_power_get(dev_priv, power_domain); + intel_display_power_get(dev_priv, intel_dp->aux_power_domain); if (intel_dp->is_mst) { if (intel_dp_check_mst_status(intel_dp) == -EINVAL) { @@ -5122,7 +5099,7 @@ intel_dp_hpd_pulse(struct intel_digital_port *intel_dig_port, bool long_hpd) ret = IRQ_HANDLED; put_power: - intel_display_power_put(dev_priv, power_domain); + intel_display_power_put(dev_priv, intel_dp->aux_power_domain); return ret; } @@ -5913,27 +5890,35 @@ out_vdd_off: return false; } +/* Set up the hotplug pin and aux power domain. */ static void intel_dp_init_connector_port_info(struct intel_digital_port *intel_dig_port) { struct intel_encoder *encoder = &intel_dig_port->base; + struct intel_dp *intel_dp = &intel_dig_port->dp; - /* Set up the hotplug pin. */ switch (intel_dig_port->port) { case PORT_A: encoder->hpd_pin = HPD_PORT_A; + intel_dp->aux_power_domain = POWER_DOMAIN_AUX_A; break; case PORT_B: encoder->hpd_pin = HPD_PORT_B; + intel_dp->aux_power_domain = POWER_DOMAIN_AUX_B; break; case PORT_C: encoder->hpd_pin = HPD_PORT_C; + intel_dp->aux_power_domain = POWER_DOMAIN_AUX_C; break; case PORT_D: encoder->hpd_pin = HPD_PORT_D; + intel_dp->aux_power_domain = POWER_DOMAIN_AUX_D; break; case PORT_E: encoder->hpd_pin = HPD_PORT_E; + + /* FIXME: Check VBT for actual wiring of PORT E */ + intel_dp->aux_power_domain = POWER_DOMAIN_AUX_D; break; default: MISSING_CASE(intel_dig_port->port); @@ -6014,6 +5999,8 @@ intel_dp_init_connector(struct intel_digital_port *intel_dig_port, connector->interlace_allowed = true; connector->doublescan_allowed = 0; + intel_dp_init_connector_port_info(intel_dig_port); + intel_dp_aux_init(intel_dp); INIT_DELAYED_WORK(&intel_dp->panel_vdd_work, @@ -6026,8 +6013,6 @@ intel_dp_init_connector(struct intel_digital_port *intel_dig_port, else intel_connector->get_hw_state = intel_connector_get_hw_state; - intel_dp_init_connector_port_info(intel_dig_port); - /* init MST on ports that can support it */ if (HAS_DP_MST(dev_priv) && !is_edp(intel_dp) && (port == PORT_B || port == PORT_C || port == PORT_D)) diff --git a/drivers/gpu/drm/i915/intel_drv.h b/drivers/gpu/drm/i915/intel_drv.h index c3c8ed726717..f2c717a42f30 100644 --- a/drivers/gpu/drm/i915/intel_drv.h +++ b/drivers/gpu/drm/i915/intel_drv.h @@ -951,6 +951,7 @@ struct intel_dp { /* sink or branch descriptor */ struct intel_dp_desc desc; struct drm_dp_aux aux; + enum intel_display_power_domain aux_power_domain; uint8_t train_set[4]; int panel_power_up_delay; int panel_power_down_delay; @@ -1421,8 +1422,6 @@ void hsw_enable_ips(struct intel_crtc *crtc); void hsw_disable_ips(struct intel_crtc *crtc); enum intel_display_power_domain intel_display_port_power_domain(struct intel_encoder *intel_encoder); -enum intel_display_power_domain -intel_display_port_aux_power_domain(struct intel_encoder *intel_encoder); void intel_mode_from_pipe_config(struct drm_display_mode *mode, struct intel_crtc_state *pipe_config); -- cgit From 79f255a0c99cf5d871161959d91bd7ba711f949f Mon Sep 17 00:00:00 2001 From: Ander Conselvan de Oliveira Date: Wed, 22 Feb 2017 08:34:27 +0200 Subject: drm/i915: Store encoder power domain in struct intel_encoder The encoder power domain is obviously tied to the encoder, so store it in struct intel_encoder. This avoids some indirection. v2: Rebase Signed-off-by: Ander Conselvan de Oliveira Reviewed-by: Imre Deak Link: http://patchwork.freedesktop.org/patch/msgid/20170222063431.10060-3-ander.conselvan.de.oliveira@intel.com --- drivers/gpu/drm/i915/intel_crt.c | 21 +++++++++------------ drivers/gpu/drm/i915/intel_ddi.c | 15 +++++++-------- drivers/gpu/drm/i915/intel_display.c | 31 ++----------------------------- drivers/gpu/drm/i915/intel_dp.c | 8 ++++---- drivers/gpu/drm/i915/intel_dp_mst.c | 1 + drivers/gpu/drm/i915/intel_drv.h | 4 ++-- drivers/gpu/drm/i915/intel_dsi.c | 9 +++++---- drivers/gpu/drm/i915/intel_dvo.c | 1 + drivers/gpu/drm/i915/intel_hdmi.c | 8 ++++---- drivers/gpu/drm/i915/intel_lvds.c | 8 ++++---- drivers/gpu/drm/i915/intel_sdvo.c | 1 + drivers/gpu/drm/i915/intel_tv.c | 1 + 12 files changed, 41 insertions(+), 67 deletions(-) diff --git a/drivers/gpu/drm/i915/intel_crt.c b/drivers/gpu/drm/i915/intel_crt.c index 2bf5aca6e37c..8c82607294c6 100644 --- a/drivers/gpu/drm/i915/intel_crt.c +++ b/drivers/gpu/drm/i915/intel_crt.c @@ -69,12 +69,11 @@ static bool intel_crt_get_hw_state(struct intel_encoder *encoder, struct drm_device *dev = encoder->base.dev; struct drm_i915_private *dev_priv = to_i915(dev); struct intel_crt *crt = intel_encoder_to_crt(encoder); - enum intel_display_power_domain power_domain; u32 tmp; bool ret; - power_domain = intel_display_port_power_domain(encoder); - if (!intel_display_power_get_if_enabled(dev_priv, power_domain)) + if (!intel_display_power_get_if_enabled(dev_priv, + encoder->power_domain)) return false; ret = false; @@ -91,7 +90,7 @@ static bool intel_crt_get_hw_state(struct intel_encoder *encoder, ret = true; out: - intel_display_power_put(dev_priv, power_domain); + intel_display_power_put(dev_priv, encoder->power_domain); return ret; } @@ -676,7 +675,6 @@ intel_crt_detect(struct drm_connector *connector, bool force) struct drm_i915_private *dev_priv = to_i915(connector->dev); struct intel_crt *crt = intel_attached_crt(connector); struct intel_encoder *intel_encoder = &crt->base; - enum intel_display_power_domain power_domain; enum drm_connector_status status; struct intel_load_detect_pipe tmp; struct drm_modeset_acquire_ctx ctx; @@ -689,8 +687,7 @@ intel_crt_detect(struct drm_connector *connector, bool force) if (dmi_check_system(intel_spurious_crt_detect)) return connector_status_disconnected; - power_domain = intel_display_port_power_domain(intel_encoder); - intel_display_power_get(dev_priv, power_domain); + intel_display_power_get(dev_priv, intel_encoder->power_domain); if (I915_HAS_HOTPLUG(dev_priv)) { /* We can not rely on the HPD pin always being correctly wired @@ -745,7 +742,7 @@ intel_crt_detect(struct drm_connector *connector, bool force) drm_modeset_acquire_fini(&ctx); out: - intel_display_power_put(dev_priv, power_domain); + intel_display_power_put(dev_priv, intel_encoder->power_domain); return status; } @@ -761,12 +758,10 @@ static int intel_crt_get_modes(struct drm_connector *connector) struct drm_i915_private *dev_priv = to_i915(dev); struct intel_crt *crt = intel_attached_crt(connector); struct intel_encoder *intel_encoder = &crt->base; - enum intel_display_power_domain power_domain; int ret; struct i2c_adapter *i2c; - power_domain = intel_display_port_power_domain(intel_encoder); - intel_display_power_get(dev_priv, power_domain); + intel_display_power_get(dev_priv, intel_encoder->power_domain); i2c = intel_gmbus_get_adapter(dev_priv, dev_priv->vbt.crt_ddc_pin); ret = intel_crt_ddc_get_modes(connector, i2c); @@ -778,7 +773,7 @@ static int intel_crt_get_modes(struct drm_connector *connector) ret = intel_crt_ddc_get_modes(connector, i2c); out: - intel_display_power_put(dev_priv, power_domain); + intel_display_power_put(dev_priv, intel_encoder->power_domain); return ret; } @@ -904,6 +899,8 @@ void intel_crt_init(struct drm_i915_private *dev_priv) crt->adpa_reg = adpa_reg; + crt->base.power_domain = POWER_DOMAIN_PORT_CRT; + crt->base.compute_config = intel_crt_compute_config; if (HAS_PCH_SPLIT(dev_priv)) { crt->base.disable = pch_disable_crt; diff --git a/drivers/gpu/drm/i915/intel_ddi.c b/drivers/gpu/drm/i915/intel_ddi.c index e2947b7f5079..2cd1cc30b11b 100644 --- a/drivers/gpu/drm/i915/intel_ddi.c +++ b/drivers/gpu/drm/i915/intel_ddi.c @@ -1349,12 +1349,11 @@ bool intel_ddi_connector_get_hw_state(struct intel_connector *intel_connector) enum port port = intel_ddi_get_encoder_port(intel_encoder); enum pipe pipe = 0; enum transcoder cpu_transcoder; - enum intel_display_power_domain power_domain; uint32_t tmp; bool ret; - power_domain = intel_display_port_power_domain(intel_encoder); - if (!intel_display_power_get_if_enabled(dev_priv, power_domain)) + if (!intel_display_power_get_if_enabled(dev_priv, + intel_encoder->power_domain)) return false; if (!intel_encoder->get_hw_state(intel_encoder, &pipe)) { @@ -1396,7 +1395,7 @@ bool intel_ddi_connector_get_hw_state(struct intel_connector *intel_connector) } out: - intel_display_power_put(dev_priv, power_domain); + intel_display_power_put(dev_priv, intel_encoder->power_domain); return ret; } @@ -1407,13 +1406,12 @@ bool intel_ddi_get_hw_state(struct intel_encoder *encoder, struct drm_device *dev = encoder->base.dev; struct drm_i915_private *dev_priv = to_i915(dev); enum port port = intel_ddi_get_encoder_port(encoder); - enum intel_display_power_domain power_domain; u32 tmp; int i; bool ret; - power_domain = intel_display_port_power_domain(encoder); - if (!intel_display_power_get_if_enabled(dev_priv, power_domain)) + if (!intel_display_power_get_if_enabled(dev_priv, + encoder->power_domain)) return false; ret = false; @@ -1470,7 +1468,7 @@ out: "(PHY_CTL %08x)\n", port_name(port), tmp); } - intel_display_power_put(dev_priv, power_domain); + intel_display_power_put(dev_priv, encoder->power_domain); return ret; } @@ -2237,6 +2235,7 @@ void intel_ddi_init(struct drm_i915_private *dev_priv, enum port port) intel_dig_port->max_lanes = max_lanes; intel_encoder->type = INTEL_OUTPUT_UNKNOWN; + intel_encoder->power_domain = intel_port_to_power_domain(port); intel_encoder->port = port; intel_encoder->crtc_mask = (1 << 0) | (1 << 1) | (1 << 2); intel_encoder->cloneable = 0; diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c index e9fb2edb9dd7..bad4a692d36f 100644 --- a/drivers/gpu/drm/i915/intel_display.c +++ b/drivers/gpu/drm/i915/intel_display.c @@ -5568,7 +5568,7 @@ static void i9xx_pfit_enable(struct intel_crtc *crtc) I915_WRITE(BCLRPAT(crtc->pipe), 0); } -static enum intel_display_power_domain port_to_power_domain(enum port port) +enum intel_display_power_domain intel_port_to_power_domain(enum port port) { switch (port) { case PORT_A: @@ -5587,33 +5587,6 @@ static enum intel_display_power_domain port_to_power_domain(enum port port) } } -enum intel_display_power_domain -intel_display_port_power_domain(struct intel_encoder *intel_encoder) -{ - struct drm_i915_private *dev_priv = to_i915(intel_encoder->base.dev); - struct intel_digital_port *intel_dig_port; - - switch (intel_encoder->type) { - case INTEL_OUTPUT_UNKNOWN: - /* Only DDI platforms should ever use this output type */ - WARN_ON_ONCE(!HAS_DDI(dev_priv)); - case INTEL_OUTPUT_DP: - case INTEL_OUTPUT_HDMI: - case INTEL_OUTPUT_EDP: - intel_dig_port = enc_to_dig_port(&intel_encoder->base); - return port_to_power_domain(intel_dig_port->port); - case INTEL_OUTPUT_DP_MST: - intel_dig_port = enc_to_mst(&intel_encoder->base)->primary; - return port_to_power_domain(intel_dig_port->port); - case INTEL_OUTPUT_ANALOG: - return POWER_DOMAIN_PORT_CRT; - case INTEL_OUTPUT_DSI: - return POWER_DOMAIN_PORT_DSI; - default: - return POWER_DOMAIN_PORT_OTHER; - } -} - static u64 get_crtc_power_domains(struct drm_crtc *crtc, struct intel_crtc_state *crtc_state) { @@ -5637,7 +5610,7 @@ static u64 get_crtc_power_domains(struct drm_crtc *crtc, drm_for_each_encoder_mask(encoder, dev, crtc_state->base.encoder_mask) { struct intel_encoder *intel_encoder = to_intel_encoder(encoder); - mask |= BIT_ULL(intel_display_port_power_domain(intel_encoder)); + mask |= BIT_ULL(intel_encoder->power_domain); } if (HAS_DDI(dev_priv) && crtc_state->has_audio) diff --git a/drivers/gpu/drm/i915/intel_dp.c b/drivers/gpu/drm/i915/intel_dp.c index aa0c79087bbf..fd96a6cf7326 100644 --- a/drivers/gpu/drm/i915/intel_dp.c +++ b/drivers/gpu/drm/i915/intel_dp.c @@ -2496,12 +2496,11 @@ static bool intel_dp_get_hw_state(struct intel_encoder *encoder, enum port port = dp_to_dig_port(intel_dp)->port; struct drm_device *dev = encoder->base.dev; struct drm_i915_private *dev_priv = to_i915(dev); - enum intel_display_power_domain power_domain; u32 tmp; bool ret; - power_domain = intel_display_port_power_domain(encoder); - if (!intel_display_power_get_if_enabled(dev_priv, power_domain)) + if (!intel_display_power_get_if_enabled(dev_priv, + encoder->power_domain)) return false; ret = false; @@ -2537,7 +2536,7 @@ static bool intel_dp_get_hw_state(struct intel_encoder *encoder, ret = true; out: - intel_display_power_put(dev_priv, power_domain); + intel_display_power_put(dev_priv, encoder->power_domain); return ret; } @@ -6097,6 +6096,7 @@ bool intel_dp_init(struct drm_i915_private *dev_priv, intel_dig_port->max_lanes = 4; intel_encoder->type = INTEL_OUTPUT_DP; + intel_encoder->power_domain = intel_port_to_power_domain(port); if (IS_CHERRYVIEW(dev_priv)) { if (port == PORT_D) intel_encoder->crtc_mask = 1 << 2; diff --git a/drivers/gpu/drm/i915/intel_dp_mst.c b/drivers/gpu/drm/i915/intel_dp_mst.c index 6a85d388f936..d94fd4b80963 100644 --- a/drivers/gpu/drm/i915/intel_dp_mst.c +++ b/drivers/gpu/drm/i915/intel_dp_mst.c @@ -548,6 +548,7 @@ intel_dp_create_fake_mst_encoder(struct intel_digital_port *intel_dig_port, enum DRM_MODE_ENCODER_DPMST, "DP-MST %c", pipe_name(pipe)); intel_encoder->type = INTEL_OUTPUT_DP_MST; + intel_encoder->power_domain = intel_dig_port->base.power_domain; intel_encoder->port = intel_dig_port->port; intel_encoder->crtc_mask = 0x7; intel_encoder->cloneable = 0; diff --git a/drivers/gpu/drm/i915/intel_drv.h b/drivers/gpu/drm/i915/intel_drv.h index f2c717a42f30..82f8f828539b 100644 --- a/drivers/gpu/drm/i915/intel_drv.h +++ b/drivers/gpu/drm/i915/intel_drv.h @@ -249,6 +249,7 @@ struct intel_encoder { void (*suspend)(struct intel_encoder *); int crtc_mask; enum hpd_pin hpd_pin; + enum intel_display_power_domain power_domain; /* for communication with audio component; protected by av_mutex */ const struct drm_connector *audio_connector; }; @@ -1420,8 +1421,7 @@ int chv_calc_dpll_params(int refclk, struct dpll *pll_clock); bool intel_crtc_active(struct intel_crtc *crtc); void hsw_enable_ips(struct intel_crtc *crtc); void hsw_disable_ips(struct intel_crtc *crtc); -enum intel_display_power_domain -intel_display_port_power_domain(struct intel_encoder *intel_encoder); +enum intel_display_power_domain intel_port_to_power_domain(enum port port); void intel_mode_from_pipe_config(struct drm_display_mode *mode, struct intel_crtc_state *pipe_config); diff --git a/drivers/gpu/drm/i915/intel_dsi.c b/drivers/gpu/drm/i915/intel_dsi.c index c26fe4fc0819..d7ffb9ac3c8a 100644 --- a/drivers/gpu/drm/i915/intel_dsi.c +++ b/drivers/gpu/drm/i915/intel_dsi.c @@ -775,14 +775,13 @@ static bool intel_dsi_get_hw_state(struct intel_encoder *encoder, { struct drm_i915_private *dev_priv = to_i915(encoder->base.dev); struct intel_dsi *intel_dsi = enc_to_intel_dsi(&encoder->base); - enum intel_display_power_domain power_domain; enum port port; bool active = false; DRM_DEBUG_KMS("\n"); - power_domain = intel_display_port_power_domain(encoder); - if (!intel_display_power_get_if_enabled(dev_priv, power_domain)) + if (!intel_display_power_get_if_enabled(dev_priv, + encoder->power_domain)) return false; /* @@ -838,7 +837,7 @@ static bool intel_dsi_get_hw_state(struct intel_encoder *encoder, } out_put_power: - intel_display_power_put(dev_priv, power_domain); + intel_display_power_put(dev_priv, encoder->power_domain); return active; } @@ -1516,6 +1515,7 @@ void intel_dsi_init(struct drm_i915_private *dev_priv) intel_connector->get_hw_state = intel_connector_get_hw_state; intel_encoder->port = port; + /* * On BYT/CHV, pipe A maps to MIPI DSI port A, pipe B maps to MIPI DSI * port C. BXT isn't limited like this. @@ -1603,6 +1603,7 @@ void intel_dsi_init(struct drm_i915_private *dev_priv) } intel_encoder->type = INTEL_OUTPUT_DSI; + intel_encoder->power_domain = POWER_DOMAIN_PORT_DSI; intel_encoder->cloneable = 0; drm_connector_init(dev, connector, &intel_dsi_connector_funcs, DRM_MODE_CONNECTOR_DSI); diff --git a/drivers/gpu/drm/i915/intel_dvo.c b/drivers/gpu/drm/i915/intel_dvo.c index 50da89dcb92b..6025839ed3b7 100644 --- a/drivers/gpu/drm/i915/intel_dvo.c +++ b/drivers/gpu/drm/i915/intel_dvo.c @@ -515,6 +515,7 @@ void intel_dvo_init(struct drm_i915_private *dev_priv) "DVO %c", port_name(port)); intel_encoder->type = INTEL_OUTPUT_DVO; + intel_encoder->power_domain = POWER_DOMAIN_PORT_OTHER; intel_encoder->port = port; intel_encoder->crtc_mask = (1 << 0) | (1 << 1); diff --git a/drivers/gpu/drm/i915/intel_hdmi.c b/drivers/gpu/drm/i915/intel_hdmi.c index 6c83442e2152..048f76d329bb 100644 --- a/drivers/gpu/drm/i915/intel_hdmi.c +++ b/drivers/gpu/drm/i915/intel_hdmi.c @@ -901,12 +901,11 @@ static bool intel_hdmi_get_hw_state(struct intel_encoder *encoder, struct drm_device *dev = encoder->base.dev; struct drm_i915_private *dev_priv = to_i915(dev); struct intel_hdmi *intel_hdmi = enc_to_intel_hdmi(&encoder->base); - enum intel_display_power_domain power_domain; u32 tmp; bool ret; - power_domain = intel_display_port_power_domain(encoder); - if (!intel_display_power_get_if_enabled(dev_priv, power_domain)) + if (!intel_display_power_get_if_enabled(dev_priv, + encoder->power_domain)) return false; ret = false; @@ -926,7 +925,7 @@ static bool intel_hdmi_get_hw_state(struct intel_encoder *encoder, ret = true; out: - intel_display_power_put(dev_priv, power_domain); + intel_display_power_put(dev_priv, encoder->power_domain); return ret; } @@ -1980,6 +1979,7 @@ void intel_hdmi_init(struct drm_i915_private *dev_priv, } intel_encoder->type = INTEL_OUTPUT_HDMI; + intel_encoder->power_domain = intel_port_to_power_domain(port); intel_encoder->port = port; if (IS_CHERRYVIEW(dev_priv)) { if (port == PORT_D) diff --git a/drivers/gpu/drm/i915/intel_lvds.c b/drivers/gpu/drm/i915/intel_lvds.c index 9ca4dc4d2378..8b942ef2b3ec 100644 --- a/drivers/gpu/drm/i915/intel_lvds.c +++ b/drivers/gpu/drm/i915/intel_lvds.c @@ -91,12 +91,11 @@ static bool intel_lvds_get_hw_state(struct intel_encoder *encoder, struct drm_device *dev = encoder->base.dev; struct drm_i915_private *dev_priv = to_i915(dev); struct intel_lvds_encoder *lvds_encoder = to_lvds_encoder(&encoder->base); - enum intel_display_power_domain power_domain; u32 tmp; bool ret; - power_domain = intel_display_port_power_domain(encoder); - if (!intel_display_power_get_if_enabled(dev_priv, power_domain)) + if (!intel_display_power_get_if_enabled(dev_priv, + encoder->power_domain)) return false; ret = false; @@ -114,7 +113,7 @@ static bool intel_lvds_get_hw_state(struct intel_encoder *encoder, ret = true; out: - intel_display_power_put(dev_priv, power_domain); + intel_display_power_put(dev_priv, encoder->power_domain); return ret; } @@ -1066,6 +1065,7 @@ void intel_lvds_init(struct drm_i915_private *dev_priv) intel_connector_attach_encoder(intel_connector, intel_encoder); intel_encoder->type = INTEL_OUTPUT_LVDS; + intel_encoder->power_domain = POWER_DOMAIN_PORT_OTHER; intel_encoder->port = PORT_NONE; intel_encoder->cloneable = 0; if (HAS_PCH_SPLIT(dev_priv)) diff --git a/drivers/gpu/drm/i915/intel_sdvo.c b/drivers/gpu/drm/i915/intel_sdvo.c index 2ad13903a054..816a6f5a3fd9 100644 --- a/drivers/gpu/drm/i915/intel_sdvo.c +++ b/drivers/gpu/drm/i915/intel_sdvo.c @@ -2981,6 +2981,7 @@ bool intel_sdvo_init(struct drm_i915_private *dev_priv, /* encoder type will be decided later */ intel_encoder = &intel_sdvo->base; intel_encoder->type = INTEL_OUTPUT_SDVO; + intel_encoder->power_domain = POWER_DOMAIN_PORT_OTHER; intel_encoder->port = port; drm_encoder_init(&dev_priv->drm, &intel_encoder->base, &intel_sdvo_enc_funcs, 0, diff --git a/drivers/gpu/drm/i915/intel_tv.c b/drivers/gpu/drm/i915/intel_tv.c index eb692e4ffe01..6ed1a3ce47b7 100644 --- a/drivers/gpu/drm/i915/intel_tv.c +++ b/drivers/gpu/drm/i915/intel_tv.c @@ -1621,6 +1621,7 @@ intel_tv_init(struct drm_i915_private *dev_priv) intel_connector_attach_encoder(intel_connector, intel_encoder); intel_encoder->type = INTEL_OUTPUT_TVOUT; + intel_encoder->power_domain = POWER_DOMAIN_PORT_OTHER; intel_encoder->port = PORT_NONE; intel_encoder->crtc_mask = (1 << 0) | (1 << 1); intel_encoder->cloneable = 0; -- cgit From 9a5da00b7ce0dec87fc0707c5cd17fed03e183cd Mon Sep 17 00:00:00 2001 From: Ander Conselvan de Oliveira Date: Fri, 24 Feb 2017 16:18:45 +0200 Subject: drm/i915: Check encoder type in enc_to_dig_port() Don't allow conversion from arbitraty encoder types to a digital port. Calling enc_to_dig_port() with the wrong encoder may seem far fetched, but certain paths of the ddi code may be called with hasell's analog encoder and the conversion is wrong for DP mst encoders too, so safe guard against it. v2: Warn if encoder type is unknown and device is not DDI. (Imre) v3: Remove stray hunk from rebase error. (Ander) Signed-off-by: Ander Conselvan de Oliveira Reviewed-by: Imre Deak Link: http://patchwork.freedesktop.org/patch/msgid/20170224141845.5836-1-ander.conselvan.de.oliveira@intel.com --- drivers/gpu/drm/i915/intel_drv.h | 14 +++++++++++++- 1 file changed, 13 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/i915/intel_drv.h b/drivers/gpu/drm/i915/intel_drv.h index 82f8f828539b..7f35e3fa68db 100644 --- a/drivers/gpu/drm/i915/intel_drv.h +++ b/drivers/gpu/drm/i915/intel_drv.h @@ -1121,7 +1121,19 @@ intel_attached_encoder(struct drm_connector *connector) static inline struct intel_digital_port * enc_to_dig_port(struct drm_encoder *encoder) { - return container_of(encoder, struct intel_digital_port, base.base); + struct intel_encoder *intel_encoder = to_intel_encoder(encoder); + + switch (intel_encoder->type) { + case INTEL_OUTPUT_UNKNOWN: + WARN_ON(!HAS_DDI(to_i915(encoder->dev))); + case INTEL_OUTPUT_DP: + case INTEL_OUTPUT_EDP: + case INTEL_OUTPUT_HDMI: + return container_of(encoder, struct intel_digital_port, + base.base); + default: + return NULL; + } } static inline struct intel_dp_mst_encoder * -- cgit From f4f4b59be52b28823642f82312e15369d783d858 Mon Sep 17 00:00:00 2001 From: Ander Conselvan de Oliveira Date: Wed, 22 Feb 2017 08:34:29 +0200 Subject: drm/i915/glk: Implement WaDDIIOTimeout Implement WaDDIIOTimeout to avoid a timeout when enabling the DDI IO power domains. Signed-off-by: Ander Conselvan de Oliveira Reviewed-by: Imre Deak Link: http://patchwork.freedesktop.org/patch/msgid/20170222063431.10060-5-ander.conselvan.de.oliveira@intel.com --- drivers/gpu/drm/i915/i915_drv.h | 6 ++++++ drivers/gpu/drm/i915/i915_reg.h | 5 +++++ drivers/gpu/drm/i915/intel_pm.c | 10 ++++++++++ 3 files changed, 21 insertions(+) diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index 0a6fdb024b86..c2472ea762da 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -2787,6 +2787,12 @@ intel_info(const struct drm_i915_private *dev_priv) #define IS_KBL_REVID(dev_priv, since, until) \ (IS_KABYLAKE(dev_priv) && IS_REVID(dev_priv, since, until)) +#define GLK_REVID_A0 0x0 +#define GLK_REVID_A1 0x1 + +#define IS_GLK_REVID(dev_priv, since, until) \ + (IS_GEMINILAKE(dev_priv) && IS_REVID(dev_priv, since, until)) + /* * The genX designation typically refers to the render engine, so render * capability related checks should use IS_GEN, while display and other checks diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h index ad666933a88d..bdce90084d43 100644 --- a/drivers/gpu/drm/i915/i915_reg.h +++ b/drivers/gpu/drm/i915/i915_reg.h @@ -6479,6 +6479,11 @@ enum { #define CHICKEN_PAR2_1 _MMIO(0x42090) #define KVM_CONFIG_CHANGE_NOTIFICATION_SELECT (1 << 14) +#define CHICKEN_MISC_2 _MMIO(0x42084) +#define GLK_CL0_PWR_DOWN (1 << 10) +#define GLK_CL1_PWR_DOWN (1 << 11) +#define GLK_CL2_PWR_DOWN (1 << 12) + #define _CHICKEN_PIPESL_1_A 0x420b0 #define _CHICKEN_PIPESL_1_B 0x420b4 #define HSW_FBCQ_DIS (1 << 22) diff --git a/drivers/gpu/drm/i915/intel_pm.c b/drivers/gpu/drm/i915/intel_pm.c index 169c4908ad5b..b38707efb620 100644 --- a/drivers/gpu/drm/i915/intel_pm.c +++ b/drivers/gpu/drm/i915/intel_pm.c @@ -114,6 +114,16 @@ static void glk_init_clock_gating(struct drm_i915_private *dev_priv) */ I915_WRITE(GEN9_CLKGATE_DIS_0, I915_READ(GEN9_CLKGATE_DIS_0) | PWM1_GATING_DIS | PWM2_GATING_DIS); + + /* WaDDIIOTimeout:glk */ + if (IS_GLK_REVID(dev_priv, 0, GLK_REVID_A1)) { + u32 val = I915_READ(CHICKEN_MISC_2); + val &= ~(GLK_CL0_PWR_DOWN | + GLK_CL1_PWR_DOWN | + GLK_CL2_PWR_DOWN); + I915_WRITE(CHICKEN_MISC_2, val); + } + } static void i915_pineview_get_mem_freq(struct drm_i915_private *dev_priv) -- cgit From 71cc22e5db8994ff69c947a296235e8547039188 Mon Sep 17 00:00:00 2001 From: Ander Conselvan de Oliveira Date: Wed, 22 Feb 2017 08:34:30 +0200 Subject: drm/i915/glk: Don't enable DDI IO power domains during init In Geminilake, the DDI IO power domains can't be enabled before a DPLL is running and mapped to the appropriate DDI. At least on Geminilake, attempting to enable those during init will lead to a timeout. The failure to enable the power domain also causes issues with the state verifier during resume from suspend. After all the init power domains are enabled, the call to intel_power_domains_sync_hw() from the resume path will cause the hw_enabled field on the respective power wells to be false while the usage count remains above zero. Further attempts to enable the power domain caused by a modeset will simply update the usage count without doing anything else. When the state verifier attempts to read the state of a DDI encoder, intel_display_power_get_if_enabled() returns false, leading to the following WARN: WARNING: CPU: 3 PID: 1743 at drivers/gpu/drm/i915/intel_display.c:7001 verify_connector_state.isra.80+0x26c/0x2b0 [i915] attached crtc is active, but connector isn't Modules linked in: i915(E) tun ip6t_rpfilter ip6t_REJECT nf_reject_ipv6 xt_conntrack ebtable_broute bridge stp llc ebtable_nat ip6table_mangle ip6table_security ip6table_nat nf_conntrack_ipv6 nf_defrag_ipv6 nf_nat_ipv6 ip6table_raw iptable_mangle iptable_security iptable_nat nf_conntrack_ipv4 nf_defrag_ipv4 nf_nat_ipv4 nf_nat nf_conntrack iptable_raw ebtable_filter ebtables ip6table_filter ip6_tables x86_pkg_temp_thermal coretemp kvm_intel kvm i2c_algo_bit drm_kms_helper irqbypass crct10dif_pclmul crc32_pclmul ghash_clmulni_intel drm shpchp tpm_tis tpm_tis_core tpm nfsd auth_rpcgss nfs_acl lockd grace sunrpc crc32c_intel serio_raw [last unloaded: i915] CPU: 3 PID: 1743 Comm: kworker/u8:22 Tainted: G W E 4.10.0-rc3ander+ #300 Hardware name: Intel Corp. Geminilake/GLK RVP1 DDR4 (05), BIOS GELKRVPA.X64.0023.B40.1611302145 11/30/2016 Workqueue: events_unbound async_run_entry_fn Call Trace: dump_stack+0x86/0xc3 __warn+0xcb/0xf0 warn_slowpath_fmt+0x5f/0x80 verify_connector_state.isra.80+0x26c/0x2b0 [i915] intel_atomic_commit_tail+0x520/0x1000 [i915] ? remove_wait_queue+0x70/0x70 intel_atomic_commit+0x3f8/0x520 [i915] ? intel_runtime_pm_put+0x6e/0xa0 [i915] drm_atomic_commit+0x4b/0x50 [drm] __intel_display_resume+0x72/0xc0 [i915] intel_display_resume+0x107/0x150 [i915] i915_drm_resume+0xe0/0x180 [i915] i915_pm_restore+0x1e/0x30 [i915] i915_pm_resume+0xe/0x10 [i915] pci_pm_resume+0x64/0xa0 dpm_run_callback+0xa1/0x2a0 ? pci_pm_thaw+0x90/0x90 device_resume+0xe3/0x200 async_resume+0x1d/0x50 async_run_entry_fn+0x39/0x170 process_one_work+0x212/0x670 ? process_one_work+0x197/0x670 worker_thread+0x4e/0x490 kthread+0x101/0x140 ? process_one_work+0x670/0x670 ? kthread_create_on_node+0x60/0x60 ret_from_fork+0x2a/0x40 Cc: David Weinehall Signed-off-by: Ander Conselvan de Oliveira Reviewed-by: David Weinehall Link: http://patchwork.freedesktop.org/patch/msgid/20170222063431.10060-6-ander.conselvan.de.oliveira@intel.com --- drivers/gpu/drm/i915/intel_runtime_pm.c | 9 +++------ 1 file changed, 3 insertions(+), 6 deletions(-) diff --git a/drivers/gpu/drm/i915/intel_runtime_pm.c b/drivers/gpu/drm/i915/intel_runtime_pm.c index 6b52258152b7..514ef56f562d 100644 --- a/drivers/gpu/drm/i915/intel_runtime_pm.c +++ b/drivers/gpu/drm/i915/intel_runtime_pm.c @@ -452,14 +452,11 @@ static void hsw_set_power_well(struct drm_i915_private *dev_priv, BIT_ULL(POWER_DOMAIN_VGA) | \ BIT_ULL(POWER_DOMAIN_INIT)) #define GLK_DISPLAY_DDI_A_POWER_DOMAINS ( \ - BIT_ULL(POWER_DOMAIN_PORT_DDI_A_LANES) | \ - BIT_ULL(POWER_DOMAIN_INIT)) + BIT_ULL(POWER_DOMAIN_PORT_DDI_A_LANES)) #define GLK_DISPLAY_DDI_B_POWER_DOMAINS ( \ - BIT_ULL(POWER_DOMAIN_PORT_DDI_B_LANES) | \ - BIT_ULL(POWER_DOMAIN_INIT)) + BIT_ULL(POWER_DOMAIN_PORT_DDI_B_LANES)) #define GLK_DISPLAY_DDI_C_POWER_DOMAINS ( \ - BIT_ULL(POWER_DOMAIN_PORT_DDI_C_LANES) | \ - BIT_ULL(POWER_DOMAIN_INIT)) + BIT_ULL(POWER_DOMAIN_PORT_DDI_C_LANES)) #define GLK_DPIO_CMN_A_POWER_DOMAINS ( \ BIT_ULL(POWER_DOMAIN_PORT_DDI_A_LANES) | \ BIT_ULL(POWER_DOMAIN_AUX_A) | \ -- cgit From 62b695662a2413286a25df418b0af665bf2899c5 Mon Sep 17 00:00:00 2001 From: Ander Conselvan de Oliveira Date: Fri, 24 Feb 2017 16:19:59 +0200 Subject: drm/i915: Only enable DDI IO power domains after enabling DPLL According to bspec, the DDI IO power domains should be enabled after enabling the DPLL and mapping it to the DDI. The current order doesn't seem to create problems with Skylake and Kabylake, but causes enable timeouts in Geminilake. v2: Rebase. - Take power domain references before sanitizing encoders. (Imre) - Add comment to get_encoder_power_domains() defition. (Ander) v3: Don't put the domain if called with HSW/BDW's analog encoder. (CI) v4: Put IO power domain before unmapping DPLL. (Imre) - Change return type of intel_ddi_get_power_domains() to u64. (Imre) Cc: David Weinehall Cc: Imre Deak Signed-off-by: Ander Conselvan de Oliveira Reviewed-by: David Weinehall # v1 Reviewed-by: Imre Deak Link: http://patchwork.freedesktop.org/patch/msgid/20170224141959.5955-1-ander.conselvan.de.oliveira@intel.com --- drivers/gpu/drm/i915/i915_drv.h | 5 +++ drivers/gpu/drm/i915/intel_ddi.c | 49 ++++++++++++++++++++++++ drivers/gpu/drm/i915/intel_display.c | 20 ++++++++++ drivers/gpu/drm/i915/intel_drv.h | 4 ++ drivers/gpu/drm/i915/intel_runtime_pm.c | 68 +++++++++++++++++++-------------- 5 files changed, 117 insertions(+), 29 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index c2472ea762da..933874ddea75 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -344,6 +344,11 @@ enum intel_display_power_domain { POWER_DOMAIN_PORT_DDI_C_LANES, POWER_DOMAIN_PORT_DDI_D_LANES, POWER_DOMAIN_PORT_DDI_E_LANES, + POWER_DOMAIN_PORT_DDI_A_IO, + POWER_DOMAIN_PORT_DDI_B_IO, + POWER_DOMAIN_PORT_DDI_C_IO, + POWER_DOMAIN_PORT_DDI_D_IO, + POWER_DOMAIN_PORT_DDI_E_IO, POWER_DOMAIN_PORT_DSI, POWER_DOMAIN_PORT_CRT, POWER_DOMAIN_PORT_OTHER, diff --git a/drivers/gpu/drm/i915/intel_ddi.c b/drivers/gpu/drm/i915/intel_ddi.c index 2cd1cc30b11b..a7c08d7027fa 100644 --- a/drivers/gpu/drm/i915/intel_ddi.c +++ b/drivers/gpu/drm/i915/intel_ddi.c @@ -1473,6 +1473,17 @@ out: return ret; } +static u64 intel_ddi_get_power_domains(struct intel_encoder *encoder) +{ + struct intel_digital_port *dig_port = enc_to_dig_port(&encoder->base); + enum pipe pipe; + + if (intel_ddi_get_hw_state(encoder, &pipe)) + return BIT_ULL(dig_port->ddi_io_power_domain); + + return 0; +} + void intel_ddi_enable_pipe_clock(struct intel_crtc *intel_crtc) { struct drm_crtc *crtc = &intel_crtc->base; @@ -1703,6 +1714,7 @@ static void intel_ddi_pre_enable_dp(struct intel_encoder *encoder, struct intel_dp *intel_dp = enc_to_intel_dp(&encoder->base); struct drm_i915_private *dev_priv = to_i915(encoder->base.dev); enum port port = intel_ddi_get_encoder_port(encoder); + struct intel_digital_port *dig_port = enc_to_dig_port(&encoder->base); intel_dp_set_link_params(intel_dp, link_rate, lane_count, link_mst); @@ -1710,6 +1722,9 @@ static void intel_ddi_pre_enable_dp(struct intel_encoder *encoder, intel_edp_panel_on(intel_dp); intel_ddi_clk_select(encoder, pll); + + intel_display_power_get(dev_priv, dig_port->ddi_io_power_domain); + intel_prepare_dp_ddi_buffers(encoder); intel_ddi_init_dp_buf_reg(encoder); intel_dp_sink_dpms(intel_dp, DRM_MODE_DPMS_ON); @@ -1729,9 +1744,13 @@ static void intel_ddi_pre_enable_hdmi(struct intel_encoder *encoder, struct drm_encoder *drm_encoder = &encoder->base; enum port port = intel_ddi_get_encoder_port(encoder); int level = intel_ddi_hdmi_level(dev_priv, port); + struct intel_digital_port *dig_port = enc_to_dig_port(&encoder->base); intel_dp_dual_mode_set_tmds_output(intel_hdmi, true); intel_ddi_clk_select(encoder, pll); + + intel_display_power_get(dev_priv, dig_port->ddi_io_power_domain); + intel_prepare_hdmi_ddi_buffers(encoder); if (IS_GEN9_BC(dev_priv)) skl_ddi_set_iboost(encoder, level); @@ -1775,6 +1794,7 @@ static void intel_ddi_post_disable(struct intel_encoder *intel_encoder, struct drm_encoder *encoder = &intel_encoder->base; struct drm_i915_private *dev_priv = to_i915(encoder->dev); enum port port = intel_ddi_get_encoder_port(intel_encoder); + struct intel_digital_port *dig_port = enc_to_dig_port(encoder); int type = intel_encoder->type; uint32_t val; bool wait = false; @@ -1803,6 +1823,9 @@ static void intel_ddi_post_disable(struct intel_encoder *intel_encoder, intel_edp_panel_off(intel_dp); } + if (dig_port) + intel_display_power_put(dev_priv, dig_port->ddi_io_power_domain); + if (IS_GEN9_BC(dev_priv)) I915_WRITE(DPLL_CTRL2, (I915_READ(DPLL_CTRL2) | DPLL_CTRL2_DDI_CLK_OFF(port))); @@ -2211,12 +2234,38 @@ void intel_ddi_init(struct drm_i915_private *dev_priv, enum port port) intel_encoder->get_hw_state = intel_ddi_get_hw_state; intel_encoder->get_config = intel_ddi_get_config; intel_encoder->suspend = intel_dp_encoder_suspend; + intel_encoder->get_power_domains = intel_ddi_get_power_domains; intel_dig_port->port = port; intel_dig_port->saved_port_bits = I915_READ(DDI_BUF_CTL(port)) & (DDI_BUF_PORT_REVERSAL | DDI_A_4_LANES); + switch (port) { + case PORT_A: + intel_dig_port->ddi_io_power_domain = + POWER_DOMAIN_PORT_DDI_A_IO; + break; + case PORT_B: + intel_dig_port->ddi_io_power_domain = + POWER_DOMAIN_PORT_DDI_B_IO; + break; + case PORT_C: + intel_dig_port->ddi_io_power_domain = + POWER_DOMAIN_PORT_DDI_C_IO; + break; + case PORT_D: + intel_dig_port->ddi_io_power_domain = + POWER_DOMAIN_PORT_DDI_D_IO; + break; + case PORT_E: + intel_dig_port->ddi_io_power_domain = + POWER_DOMAIN_PORT_DDI_E_IO; + break; + default: + MISSING_CASE(port); + } + /* * Bspec says that DDI_A_4_LANES is the only supported configuration * for Broxton. Yet some BIOS fail to set this bit on port A if eDP diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c index bad4a692d36f..93371c2377b2 100644 --- a/drivers/gpu/drm/i915/intel_display.c +++ b/drivers/gpu/drm/i915/intel_display.c @@ -15412,6 +15412,24 @@ static void intel_modeset_readout_hw_state(struct drm_device *dev) } } +static void +get_encoder_power_domains(struct drm_i915_private *dev_priv) +{ + struct intel_encoder *encoder; + + for_each_intel_encoder(&dev_priv->drm, encoder) { + u64 get_domains; + enum intel_display_power_domain domain; + + if (!encoder->get_power_domains) + continue; + + get_domains = encoder->get_power_domains(encoder); + for_each_power_domain(domain, get_domains) + intel_display_power_get(dev_priv, domain); + } +} + /* Scan out the current hw modeset state, * and sanitizes it to the current state */ @@ -15427,6 +15445,8 @@ intel_modeset_setup_hw_state(struct drm_device *dev) intel_modeset_readout_hw_state(dev); /* HW state is read out, now we need to sanitize this mess. */ + get_encoder_power_domains(dev_priv); + for_each_intel_encoder(dev, encoder) { intel_sanitize_encoder(encoder); } diff --git a/drivers/gpu/drm/i915/intel_drv.h b/drivers/gpu/drm/i915/intel_drv.h index 7f35e3fa68db..741beba0c9c0 100644 --- a/drivers/gpu/drm/i915/intel_drv.h +++ b/drivers/gpu/drm/i915/intel_drv.h @@ -241,6 +241,9 @@ struct intel_encoder { * be set correctly before calling this function. */ void (*get_config)(struct intel_encoder *, struct intel_crtc_state *pipe_config); + /* Returns a mask of power domains that need to be referenced as part + * of the hardware state readout code. */ + u64 (*get_power_domains)(struct intel_encoder *encoder); /* * Called during system suspend after all pending requests for the * encoder are flushed (for example for DP AUX transactions) and @@ -1027,6 +1030,7 @@ struct intel_digital_port { enum irqreturn (*hpd_pulse)(struct intel_digital_port *, bool); bool release_cl2_override; uint8_t max_lanes; + enum intel_display_power_domain ddi_io_power_domain; }; struct intel_dp_mst_encoder { diff --git a/drivers/gpu/drm/i915/intel_runtime_pm.c b/drivers/gpu/drm/i915/intel_runtime_pm.c index 514ef56f562d..012bc358a33a 100644 --- a/drivers/gpu/drm/i915/intel_runtime_pm.c +++ b/drivers/gpu/drm/i915/intel_runtime_pm.c @@ -93,6 +93,16 @@ intel_display_power_domain_str(enum intel_display_power_domain domain) return "PORT_DDI_D_LANES"; case POWER_DOMAIN_PORT_DDI_E_LANES: return "PORT_DDI_E_LANES"; + case POWER_DOMAIN_PORT_DDI_A_IO: + return "PORT_DDI_A_IO"; + case POWER_DOMAIN_PORT_DDI_B_IO: + return "PORT_DDI_B_IO"; + case POWER_DOMAIN_PORT_DDI_C_IO: + return "PORT_DDI_C_IO"; + case POWER_DOMAIN_PORT_DDI_D_IO: + return "PORT_DDI_D_IO"; + case POWER_DOMAIN_PORT_DDI_E_IO: + return "PORT_DDI_E_IO"; case POWER_DOMAIN_PORT_DSI: return "PORT_DSI"; case POWER_DOMAIN_PORT_CRT: @@ -385,18 +395,18 @@ static void hsw_set_power_well(struct drm_i915_private *dev_priv, BIT_ULL(POWER_DOMAIN_AUDIO) | \ BIT_ULL(POWER_DOMAIN_VGA) | \ BIT_ULL(POWER_DOMAIN_INIT)) -#define SKL_DISPLAY_DDI_A_E_POWER_DOMAINS ( \ - BIT_ULL(POWER_DOMAIN_PORT_DDI_A_LANES) | \ - BIT_ULL(POWER_DOMAIN_PORT_DDI_E_LANES) | \ +#define SKL_DISPLAY_DDI_IO_A_E_POWER_DOMAINS ( \ + BIT_ULL(POWER_DOMAIN_PORT_DDI_A_IO) | \ + BIT_ULL(POWER_DOMAIN_PORT_DDI_E_IO) | \ BIT_ULL(POWER_DOMAIN_INIT)) -#define SKL_DISPLAY_DDI_B_POWER_DOMAINS ( \ - BIT_ULL(POWER_DOMAIN_PORT_DDI_B_LANES) | \ +#define SKL_DISPLAY_DDI_IO_B_POWER_DOMAINS ( \ + BIT_ULL(POWER_DOMAIN_PORT_DDI_B_IO) | \ BIT_ULL(POWER_DOMAIN_INIT)) -#define SKL_DISPLAY_DDI_C_POWER_DOMAINS ( \ - BIT_ULL(POWER_DOMAIN_PORT_DDI_C_LANES) | \ +#define SKL_DISPLAY_DDI_IO_C_POWER_DOMAINS ( \ + BIT_ULL(POWER_DOMAIN_PORT_DDI_C_IO) | \ BIT_ULL(POWER_DOMAIN_INIT)) -#define SKL_DISPLAY_DDI_D_POWER_DOMAINS ( \ - BIT_ULL(POWER_DOMAIN_PORT_DDI_D_LANES) | \ +#define SKL_DISPLAY_DDI_IO_D_POWER_DOMAINS ( \ + BIT_ULL(POWER_DOMAIN_PORT_DDI_D_IO) | \ BIT_ULL(POWER_DOMAIN_INIT)) #define SKL_DISPLAY_DC_OFF_POWER_DOMAINS ( \ SKL_DISPLAY_POWERWELL_2_POWER_DOMAINS | \ @@ -451,12 +461,12 @@ static void hsw_set_power_well(struct drm_i915_private *dev_priv, BIT_ULL(POWER_DOMAIN_AUDIO) | \ BIT_ULL(POWER_DOMAIN_VGA) | \ BIT_ULL(POWER_DOMAIN_INIT)) -#define GLK_DISPLAY_DDI_A_POWER_DOMAINS ( \ - BIT_ULL(POWER_DOMAIN_PORT_DDI_A_LANES)) -#define GLK_DISPLAY_DDI_B_POWER_DOMAINS ( \ - BIT_ULL(POWER_DOMAIN_PORT_DDI_B_LANES)) -#define GLK_DISPLAY_DDI_C_POWER_DOMAINS ( \ - BIT_ULL(POWER_DOMAIN_PORT_DDI_C_LANES)) +#define GLK_DISPLAY_DDI_IO_A_POWER_DOMAINS ( \ + BIT_ULL(POWER_DOMAIN_PORT_DDI_A_IO)) +#define GLK_DISPLAY_DDI_IO_B_POWER_DOMAINS ( \ + BIT_ULL(POWER_DOMAIN_PORT_DDI_B_IO)) +#define GLK_DISPLAY_DDI_IO_C_POWER_DOMAINS ( \ + BIT_ULL(POWER_DOMAIN_PORT_DDI_C_IO)) #define GLK_DPIO_CMN_A_POWER_DOMAINS ( \ BIT_ULL(POWER_DOMAIN_PORT_DDI_A_LANES) | \ BIT_ULL(POWER_DOMAIN_AUX_A) | \ @@ -2114,26 +2124,26 @@ static struct i915_power_well skl_power_wells[] = { .id = SKL_DISP_PW_2, }, { - .name = "DDI A/E power well", - .domains = SKL_DISPLAY_DDI_A_E_POWER_DOMAINS, + .name = "DDI A/E IO power well", + .domains = SKL_DISPLAY_DDI_IO_A_E_POWER_DOMAINS, .ops = &skl_power_well_ops, .id = SKL_DISP_PW_DDI_A_E, }, { - .name = "DDI B power well", - .domains = SKL_DISPLAY_DDI_B_POWER_DOMAINS, + .name = "DDI B IO power well", + .domains = SKL_DISPLAY_DDI_IO_B_POWER_DOMAINS, .ops = &skl_power_well_ops, .id = SKL_DISP_PW_DDI_B, }, { - .name = "DDI C power well", - .domains = SKL_DISPLAY_DDI_C_POWER_DOMAINS, + .name = "DDI C IO power well", + .domains = SKL_DISPLAY_DDI_IO_C_POWER_DOMAINS, .ops = &skl_power_well_ops, .id = SKL_DISP_PW_DDI_C, }, { - .name = "DDI D power well", - .domains = SKL_DISPLAY_DDI_D_POWER_DOMAINS, + .name = "DDI D IO power well", + .domains = SKL_DISPLAY_DDI_IO_D_POWER_DOMAINS, .ops = &skl_power_well_ops, .id = SKL_DISP_PW_DDI_D, }, @@ -2246,20 +2256,20 @@ static struct i915_power_well glk_power_wells[] = { .id = GLK_DISP_PW_AUX_C, }, { - .name = "DDI A power well", - .domains = GLK_DISPLAY_DDI_A_POWER_DOMAINS, + .name = "DDI A IO power well", + .domains = GLK_DISPLAY_DDI_IO_A_POWER_DOMAINS, .ops = &skl_power_well_ops, .id = GLK_DISP_PW_DDI_A, }, { - .name = "DDI B power well", - .domains = GLK_DISPLAY_DDI_B_POWER_DOMAINS, + .name = "DDI B IO power well", + .domains = GLK_DISPLAY_DDI_IO_B_POWER_DOMAINS, .ops = &skl_power_well_ops, .id = SKL_DISP_PW_DDI_B, }, { - .name = "DDI C power well", - .domains = GLK_DISPLAY_DDI_C_POWER_DOMAINS, + .name = "DDI C IO power well", + .domains = GLK_DISPLAY_DDI_IO_C_POWER_DOMAINS, .ops = &skl_power_well_ops, .id = SKL_DISP_PW_DDI_C, }, -- cgit From ca7a45ba6fb9e7ceca56d10b91db29c2f3451a2e Mon Sep 17 00:00:00 2001 From: Michał Winiarski Date: Mon, 27 Feb 2017 12:22:56 +0100 Subject: drm/i915/skl: Add missing SKL ID MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Used by production device: Intel(R) Iris(TM) Graphics P555 Cc: Cc: Mika Kuoppala Cc: Rodrigo Vivi Signed-off-by: Michał Winiarski Reviewed-by: Rodrigo Vivi Reviewed-by: Mika Kuoppala Signed-off-by: Mika Kuoppala Link: http://patchwork.freedesktop.org/patch/msgid/20170227112256.20060-1-michal.winiarski@intel.com --- include/drm/i915_pciids.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/include/drm/i915_pciids.h b/include/drm/i915_pciids.h index a1dd21d6b723..466c71592a6f 100644 --- a/include/drm/i915_pciids.h +++ b/include/drm/i915_pciids.h @@ -265,7 +265,8 @@ INTEL_VGA_DEVICE(0x1923, info), /* ULT GT3 */ \ INTEL_VGA_DEVICE(0x1926, info), /* ULT GT3 */ \ INTEL_VGA_DEVICE(0x1927, info), /* ULT GT3 */ \ - INTEL_VGA_DEVICE(0x192B, info) /* Halo GT3 */ \ + INTEL_VGA_DEVICE(0x192B, info), /* Halo GT3 */ \ + INTEL_VGA_DEVICE(0x192D, info) /* SRV GT3 */ #define INTEL_SKL_GT4_IDS(info) \ INTEL_VGA_DEVICE(0x1932, info), /* DT GT4 */ \ -- cgit From 69060d96440b83da151989d83fd180eb05d84780 Mon Sep 17 00:00:00 2001 From: Kelvin Gardiner Date: Fri, 24 Feb 2017 11:15:24 -0800 Subject: drm/i915/bdw: Do not write the replay bit of the ring mode register The replay bit of the ring mode register is not a valid bit for Gen8+. Do not write to this bit. Signed-off-by: Kelvin Gardiner Cc: Joonas Lahtinen Cc: Ceraolo Spurio, Daniele Reviewed-by: Daniele Ceraolo Spurio [Joonas: Fixed commit message line to be under 72 chars] Signed-off-by: Joonas Lahtinen Link: http://patchwork.freedesktop.org/patch/msgid/1487963724-4824-1-git-send-email-kelvin.gardiner@intel.com --- drivers/gpu/drm/i915/intel_lrc.c | 1 - 1 file changed, 1 deletion(-) diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c index 1c6c71673bfa..f9a8545474bc 100644 --- a/drivers/gpu/drm/i915/intel_lrc.c +++ b/drivers/gpu/drm/i915/intel_lrc.c @@ -1178,7 +1178,6 @@ static int gen8_init_common_ring(struct intel_engine_cs *engine) I915_WRITE(RING_HWSTAM(engine->mmio_base), 0xffffffff); I915_WRITE(RING_MODE_GEN7(engine), - _MASKED_BIT_DISABLE(GFX_REPLAY_MODE) | _MASKED_BIT_ENABLE(GFX_RUN_LIST_ENABLE)); I915_WRITE(RING_HWS_PGA(engine->mmio_base), engine->status_page.ggtt_offset); -- cgit From bf75d59eff679d2e2b7af5c6958a088f8a458f7a Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Mon, 27 Feb 2017 12:26:52 +0000 Subject: drm/i915: Only unwind the local pgtable layer if empty Only if we allocated the layer and the lower level failed should we remove this layer when unwinding. Otherwise we ignore the overlapping entries by overwriting the old layer with scratch. Fixes: c5d092a4293f ("drm/i915: Remove bitmap tracking for used-pml4") Fixes: e2b763caa6eb ("drm/i915: Remove bitmap tracking for used-pdpes") Reported-by: Matthew Auld Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=99947 Testcase: igt/drv_selftest/live_gtt Signed-off-by: Chris Wilson Cc: Matthew Auld Tested-by: Matthew Auld Reviewed-by: Matthew Auld Link: http://patchwork.freedesktop.org/patch/msgid/20170227122654.27651-1-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/i915_gem_gtt.c | 37 ++++++++++++++++++++++++++----------- 1 file changed, 26 insertions(+), 11 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c b/drivers/gpu/drm/i915/i915_gem_gtt.c index 6fdbd5ae4fcb..c3a121ab8914 100644 --- a/drivers/gpu/drm/i915/i915_gem_gtt.c +++ b/drivers/gpu/drm/i915/i915_gem_gtt.c @@ -716,10 +716,13 @@ static bool gen8_ppgtt_clear_pd(struct i915_address_space *vm, u32 pde; gen8_for_each_pde(pt, pd, start, length, pde) { + GEM_BUG_ON(pt == vm->scratch_pt); + if (!gen8_ppgtt_clear_pt(vm, pt, start, length)) continue; gen8_ppgtt_set_pde(vm, pd, vm->scratch_pt, pde); + GEM_BUG_ON(!pd->used_pdes); pd->used_pdes--; free_pt(vm, pt); @@ -755,10 +758,13 @@ static bool gen8_ppgtt_clear_pdp(struct i915_address_space *vm, unsigned int pdpe; gen8_for_each_pdpe(pd, pdp, start, length, pdpe) { + GEM_BUG_ON(pd == vm->scratch_pd); + if (!gen8_ppgtt_clear_pd(vm, pd, start, length)) continue; gen8_ppgtt_set_pdpe(vm, pdp, vm->scratch_pd, pdpe); + GEM_BUG_ON(!pdp->used_pdpes); pdp->used_pdpes--; free_pd(vm, pd); @@ -801,6 +807,8 @@ static void gen8_ppgtt_clear_4lvl(struct i915_address_space *vm, GEM_BUG_ON(!USES_FULL_48BIT_PPGTT(vm->i915)); gen8_for_each_pml4e(pdp, pml4, start, length, pml4e) { + GEM_BUG_ON(pdp == vm->scratch_pdp); + if (!gen8_ppgtt_clear_pdp(vm, pdp, start, length)) continue; @@ -1089,6 +1097,7 @@ static int gen8_ppgtt_alloc_pd(struct i915_address_space *vm, gen8_ppgtt_set_pde(vm, pd, pt, pde); pd->used_pdes++; + GEM_BUG_ON(pd->used_pdes > I915_PDES); } pt->used_ptes += gen8_pte_count(start, length); @@ -1118,21 +1127,25 @@ static int gen8_ppgtt_alloc_pdp(struct i915_address_space *vm, gen8_initialize_pd(vm, pd); gen8_ppgtt_set_pdpe(vm, pdp, pd, pdpe); pdp->used_pdpes++; + GEM_BUG_ON(pdp->used_pdpes > I915_PDPES_PER_PDP(vm)); mark_tlbs_dirty(i915_vm_to_ppgtt(vm)); } ret = gen8_ppgtt_alloc_pd(vm, pd, start, length); - if (unlikely(ret)) { - gen8_ppgtt_set_pdpe(vm, pdp, vm->scratch_pd, pdpe); - pdp->used_pdpes--; - free_pd(vm, pd); - goto unwind; - } + if (unlikely(ret)) + goto unwind_pd; } return 0; +unwind_pd: + if (!pd->used_pdes) { + gen8_ppgtt_set_pdpe(vm, pdp, vm->scratch_pd, pdpe); + GEM_BUG_ON(!pdp->used_pdpes); + pdp->used_pdpes--; + free_pd(vm, pd); + } unwind: gen8_ppgtt_clear_pdp(vm, pdp, from, start - from); return -ENOMEM; @@ -1166,15 +1179,17 @@ static int gen8_ppgtt_alloc_4lvl(struct i915_address_space *vm, } ret = gen8_ppgtt_alloc_pdp(vm, pdp, start, length); - if (unlikely(ret)) { - gen8_ppgtt_set_pml4e(pml4, vm->scratch_pdp, pml4e); - free_pdp(vm, pdp); - goto unwind; - } + if (unlikely(ret)) + goto unwind_pdp; } return 0; +unwind_pdp: + if (!pdp->used_pdpes) { + gen8_ppgtt_set_pml4e(pml4, vm->scratch_pdp, pml4e); + free_pdp(vm, pdp); + } unwind: gen8_ppgtt_clear_4lvl(vm, from, start - from); return -ENOMEM; -- cgit From 2f7399af94a2d6f6459373969d266708e6a7fc38 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Mon, 27 Feb 2017 12:26:53 +0000 Subject: drm/i915: Unwind vma->pages allocation upon failure If we fail to allocate the ppgtt range after allocating the pages for the vma, we should unwind the local allocation before reporting back the failure. Fixes: ff685975d97f ("drm/i915: Move allocate_va_range to GTT") Signed-off-by: Chris Wilson Cc: Matthew Auld Reviewed-by: Matthew Auld Link: http://patchwork.freedesktop.org/patch/msgid/20170227122654.27651-2-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/i915_gem_gtt.c | 13 ++++++++++++- 1 file changed, 12 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c b/drivers/gpu/drm/i915/i915_gem_gtt.c index c3a121ab8914..875a48b9d05a 100644 --- a/drivers/gpu/drm/i915/i915_gem_gtt.c +++ b/drivers/gpu/drm/i915/i915_gem_gtt.c @@ -2312,7 +2312,7 @@ static int aliasing_gtt_bind_vma(struct i915_vma *vma, vma->node.start, vma->node.size); if (ret) - return ret; + goto err_pages; } appgtt->base.insert_entries(&appgtt->base, @@ -2329,6 +2329,17 @@ static int aliasing_gtt_bind_vma(struct i915_vma *vma, } return 0; + +err_pages: + if (!(vma->flags & (I915_VMA_GLOBAL_BIND | I915_VMA_LOCAL_BIND))) { + if (vma->pages != vma->obj->mm.pages) { + GEM_BUG_ON(!vma->pages); + sg_free_table(vma->pages); + kfree(vma->pages); + } + vma->pages = NULL; + } + return ret; } static void aliasing_gtt_unbind_vma(struct i915_vma *vma) -- cgit From 31c7effa39f21f0fea1b3250ae9ff32b9c7e1ae5 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Mon, 27 Feb 2017 12:26:54 +0000 Subject: drm/i915: Remove the vma from the drm_mm if binding fails As we track whether a vma has been inserted into the drm_mm using the vma->flags, if we fail to bind the vma into the GTT we do not update those bits and will attempt to reinsert the vma into the drm_mm on future passes. To prevent that, we want to unwind i915_vma_insert() if we fail in our attempt to bind. Fixes: 59bfa1248e22 ("drm/i915: Start passing around i915_vma from execbuffer") Testcase: igt/drv_selftest/live_gtt Signed-off-by: Chris Wilson Cc: Matthew Auld Cc: Joonas Lahtinen Cc: # v4.9+ Reviewed-by: Joonas Lahtinen Link: http://patchwork.freedesktop.org/patch/msgid/20170227122654.27651-3-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/i915_vma.c | 57 ++++++++++++++++++++++++++--------------- 1 file changed, 37 insertions(+), 20 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_vma.c b/drivers/gpu/drm/i915/i915_vma.c index 6e9eade304b8..1aba47024656 100644 --- a/drivers/gpu/drm/i915/i915_vma.c +++ b/drivers/gpu/drm/i915/i915_vma.c @@ -509,10 +509,36 @@ err_unpin: return ret; } +static void +i915_vma_remove(struct i915_vma *vma) +{ + struct drm_i915_gem_object *obj = vma->obj; + + GEM_BUG_ON(!drm_mm_node_allocated(&vma->node)); + GEM_BUG_ON(vma->flags & (I915_VMA_GLOBAL_BIND | I915_VMA_LOCAL_BIND)); + + drm_mm_remove_node(&vma->node); + list_move_tail(&vma->vm_link, &vma->vm->unbound_list); + + /* Since the unbound list is global, only move to that list if + * no more VMAs exist. + */ + if (--obj->bind_count == 0) + list_move_tail(&obj->global_link, + &to_i915(obj->base.dev)->mm.unbound_list); + + /* And finally now the object is completely decoupled from this vma, + * we can drop its hold on the backing storage and allow it to be + * reaped by the shrinker. + */ + i915_gem_object_unpin_pages(obj); + GEM_BUG_ON(atomic_read(&obj->mm.pages_pin_count) < obj->bind_count); +} + int __i915_vma_do_pin(struct i915_vma *vma, u64 size, u64 alignment, u64 flags) { - unsigned int bound = vma->flags; + const unsigned int bound = vma->flags; int ret; lockdep_assert_held(&vma->vm->i915->drm.struct_mutex); @@ -521,18 +547,18 @@ int __i915_vma_do_pin(struct i915_vma *vma, if (WARN_ON(bound & I915_VMA_PIN_OVERFLOW)) { ret = -EBUSY; - goto err; + goto err_unpin; } if ((bound & I915_VMA_BIND_MASK) == 0) { ret = i915_vma_insert(vma, size, alignment, flags); if (ret) - goto err; + goto err_unpin; } ret = i915_vma_bind(vma, vma->obj->cache_level, flags); if (ret) - goto err; + goto err_remove; if ((bound ^ vma->flags) & I915_VMA_GLOBAL_BIND) __i915_vma_set_map_and_fenceable(vma); @@ -541,7 +567,12 @@ int __i915_vma_do_pin(struct i915_vma *vma, GEM_BUG_ON(i915_vma_misplaced(vma, size, alignment, flags)); return 0; -err: +err_remove: + if ((bound & I915_VMA_BIND_MASK) == 0) { + GEM_BUG_ON(vma->pages); + i915_vma_remove(vma); + } +err_unpin: __i915_vma_unpin(vma); return ret; } @@ -654,9 +685,6 @@ int i915_vma_unbind(struct i915_vma *vma) } vma->flags &= ~(I915_VMA_GLOBAL_BIND | I915_VMA_LOCAL_BIND); - drm_mm_remove_node(&vma->node); - list_move_tail(&vma->vm_link, &vma->vm->unbound_list); - if (vma->pages != obj->mm.pages) { GEM_BUG_ON(!vma->pages); sg_free_table(vma->pages); @@ -664,18 +692,7 @@ int i915_vma_unbind(struct i915_vma *vma) } vma->pages = NULL; - /* Since the unbound list is global, only move to that list if - * no more VMAs exist. */ - if (--obj->bind_count == 0) - list_move_tail(&obj->global_link, - &to_i915(obj->base.dev)->mm.unbound_list); - - /* And finally now the object is completely decoupled from this vma, - * we can drop its hold on the backing storage and allow it to be - * reaped by the shrinker. - */ - i915_gem_object_unpin_pages(obj); - GEM_BUG_ON(atomic_read(&obj->mm.pages_pin_count) < obj->bind_count); + i915_vma_remove(vma); destroy: if (unlikely(i915_vma_is_closed(vma))) -- cgit From 6067a27d1f0184596d51decbac1c1fdc4acb012f Mon Sep 17 00:00:00 2001 From: Mika Kuoppala Date: Wed, 15 Feb 2017 15:52:59 +0200 Subject: drm/i915: Avoid tweaking evaluation thresholds on Baytrail v3 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Certain Baytrails, namely the 4 cpu core variants, have been plaqued by spurious system hangs, mostly occurring with light loads. Multiple bisects by various people point to a commit which changes the reclocking strategy for Baytrail to follow its bigger brethen: commit 8fb55197e64d ("drm/i915: Agressive downclocking on Baytrail") There is also a review comment attached to this commit from Deepak S on avoiding punit access on Cherryview and thus it was excluded on common reclocking path. By taking the same approach and omitting the punit access by not tweaking the thresholds when the hardware has been asked to move into different frequency, considerable gains in stability have been observed. With J1900 box, light render/video load would end up in system hang in usually less than 12 hours. With this patch applied, the cumulative uptime has now been 34 days without issues. To provoke system hang, light loads on both render and bsd engines in parallel have been used: glxgears >/dev/null 2>/dev/null & mpv --vo=vaapi --hwdec=vaapi --loop=inf vid.mp4 So far, author has not witnessed system hang with above load and this patch applied. Reports from the tenacious people at kernel bugzilla are also promising. Considering that the punit access frequency with this patch is considerably less, there is a possibility that this will push the, still unknown, root cause past the triggering point on most loads. But as we now can reliably reproduce the hang independently, we can reduce the pain that users are having and use a static thresholds until a root cause is found. v3: don't break debugfs and simplification (Chris Wilson) References: https://bugzilla.kernel.org/show_bug.cgi?id=109051 Cc: Chris Wilson Cc: Ville Syrjälä Cc: Len Brown Cc: Daniel Vetter Cc: Jani Nikula Cc: fritsch@xbmc.org Cc: miku@iki.fi Cc: Ezequiel Garcia CC: Michal Feix Cc: Hans de Goede Cc: Deepak S Cc: Jarkko Nikula Cc: # v4.2+ Acked-by: Daniel Vetter Acked-by: Chris Wilson Signed-off-by: Mika Kuoppala Link: http://patchwork.freedesktop.org/patch/msgid/1487166779-26945-1-git-send-email-mika.kuoppala@intel.com --- drivers/gpu/drm/i915/intel_pm.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/drivers/gpu/drm/i915/intel_pm.c b/drivers/gpu/drm/i915/intel_pm.c index b38707efb620..ac0cd82f61e5 100644 --- a/drivers/gpu/drm/i915/intel_pm.c +++ b/drivers/gpu/drm/i915/intel_pm.c @@ -4879,6 +4879,12 @@ static void gen6_set_rps_thresholds(struct drm_i915_private *dev_priv, u8 val) break; } + /* When byt can survive without system hang with dynamic + * sw freq adjustments, this restriction can be lifted. + */ + if (IS_VALLEYVIEW(dev_priv)) + goto skip_hw_write; + I915_WRITE(GEN6_RP_UP_EI, GT_INTERVAL_FROM_US(dev_priv, ei_up)); I915_WRITE(GEN6_RP_UP_THRESHOLD, @@ -4899,6 +4905,7 @@ static void gen6_set_rps_thresholds(struct drm_i915_private *dev_priv, u8 val) GEN6_RP_UP_BUSY_AVG | GEN6_RP_DOWN_IDLE_AVG); +skip_hw_write: dev_priv->rps.power = new_power; dev_priv->rps.up_threshold = threshold_up; dev_priv->rps.down_threshold = threshold_down; -- cgit From 20fe17aa52dcafd3bdeb9e37281d027b0c1c5a15 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Mon, 27 Feb 2017 13:59:11 +0000 Subject: drm/i915: Remove redundant TLB invalidate on switching contexts MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit We are required to reload the TLBs around context switches (MI_SET_CONTEXT specifically) and the recommendation is do that before the MI_SET_CONTEXT so that it is serialised with the switch and not forgotten: [DevSNB] If Flush TLB invalidation Mode is enabled it’s the driver’s responsibility to invalidate the TLBs at least once after the previous context switch after any GTT mappings changed (including new GTT entries). This can be done by a pipeline PIPE_CONTROL with TLB inv bit set immediately before MI_SET_CONTEXT. However, we already do an unconditional TLB invalidate before every batch so this condition is satifisfied. Signed-off-by: Chris Wilson Reviewed-by: Mika Kuoppala Link: http://patchwork.freedesktop.org/patch/msgid/20170227135913.8056-1-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/i915_gem_context.c | 11 ----------- 1 file changed, 11 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_gem_context.c b/drivers/gpu/drm/i915/i915_gem_context.c index 99c46f4dbde6..521e6f4705b1 100644 --- a/drivers/gpu/drm/i915/i915_gem_context.c +++ b/drivers/gpu/drm/i915/i915_gem_context.c @@ -607,17 +607,6 @@ mi_set_context(struct drm_i915_gem_request *req, u32 hw_flags) 0; int len; - /* w/a: If Flush TLB Invalidation Mode is enabled, driver must do a TLB - * invalidation prior to MI_SET_CONTEXT. On GEN6 we don't set the value - * explicitly, so we rely on the value at ring init, stored in - * itlb_before_ctx_switch. - */ - if (IS_GEN6(dev_priv)) { - int ret = engine->emit_flush(req, EMIT_INVALIDATE); - if (ret) - return ret; - } - /* These flags are for resource streamer on HSW+ */ if (IS_HASWELL(dev_priv) || INTEL_GEN(dev_priv) >= 8) flags |= (HSW_MI_RS_SAVE_STATE_EN | HSW_MI_RS_RESTORE_STATE_EN); -- cgit From 12946eceeb97a36a7087995646e077e9872ac99b Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Mon, 27 Feb 2017 13:59:12 +0000 Subject: drm/i915: Remove redundant TLB invalidate on switching ppgtt We are required to reload the TLBs around ppgtt switches. However, we already do an unconditional TLB invalidate before every batch and a flush afterwards, so this condition is already satisfied without extra flushes around the LRI instructions. Signed-off-by: Chris Wilson Reviewed-by: Mika Kuoppala Link: http://patchwork.freedesktop.org/patch/msgid/20170227135913.8056-2-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/i915_gem_gtt.c | 17 ----------------- 1 file changed, 17 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c b/drivers/gpu/drm/i915/i915_gem_gtt.c index 875a48b9d05a..e0c9542a90c1 100644 --- a/drivers/gpu/drm/i915/i915_gem_gtt.c +++ b/drivers/gpu/drm/i915/i915_gem_gtt.c @@ -1464,13 +1464,8 @@ static int hsw_mm_switch(struct i915_hw_ppgtt *ppgtt, { struct intel_engine_cs *engine = req->engine; u32 *cs; - int ret; /* NB: TLBs must be flushed and invalidated before a switch */ - ret = engine->emit_flush(req, EMIT_INVALIDATE | EMIT_FLUSH); - if (ret) - return ret; - cs = intel_ring_begin(req, 6); if (IS_ERR(cs)) return PTR_ERR(cs); @@ -1491,13 +1486,8 @@ static int gen7_mm_switch(struct i915_hw_ppgtt *ppgtt, { struct intel_engine_cs *engine = req->engine; u32 *cs; - int ret; /* NB: TLBs must be flushed and invalidated before a switch */ - ret = engine->emit_flush(req, EMIT_INVALIDATE | EMIT_FLUSH); - if (ret) - return ret; - cs = intel_ring_begin(req, 6); if (IS_ERR(cs)) return PTR_ERR(cs); @@ -1510,13 +1500,6 @@ static int gen7_mm_switch(struct i915_hw_ppgtt *ppgtt, *cs++ = MI_NOOP; intel_ring_advance(req, cs); - /* XXX: RCS is the only one to auto invalidate the TLBs? */ - if (engine->id != RCS) { - ret = engine->emit_flush(req, EMIT_INVALIDATE | EMIT_FLUSH); - if (ret) - return ret; - } - return 0; } -- cgit From afeddf50811253ee512fb9192cfb443891929776 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Mon, 27 Feb 2017 13:59:13 +0000 Subject: drm/i915: Reduce context alignment No hardware was ever shipped that needed more than 4096 byte alignment and future hardware will not use this legacy path. So reduce the alignment to make it easier and quicker to launch workloads. Signed-off-by: Chris Wilson Reviewed-by: Mika Kuoppala Link: http://patchwork.freedesktop.org/patch/msgid/20170227135913.8056-3-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/i915_gem_context.c | 17 ----------------- drivers/gpu/drm/i915/i915_gem_context.h | 2 -- drivers/gpu/drm/i915/intel_ringbuffer.c | 3 ++- 3 files changed, 2 insertions(+), 20 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_gem_context.c b/drivers/gpu/drm/i915/i915_gem_context.c index 521e6f4705b1..baceca14f5e0 100644 --- a/drivers/gpu/drm/i915/i915_gem_context.c +++ b/drivers/gpu/drm/i915/i915_gem_context.c @@ -92,21 +92,6 @@ #define ALL_L3_SLICES(dev) (1 << NUM_L3_SLICES(dev)) - 1 -/* This is a HW constraint. The value below is the largest known requirement - * I've seen in a spec to date, and that was a workaround for a non-shipping - * part. It should be safe to decrease this, but it's more future proof as is. - */ -#define GEN6_CONTEXT_ALIGN (64<<10) -#define GEN7_CONTEXT_ALIGN I915_GTT_MIN_ALIGNMENT - -static size_t get_context_alignment(struct drm_i915_private *dev_priv) -{ - if (IS_GEN6(dev_priv)) - return GEN6_CONTEXT_ALIGN; - - return GEN7_CONTEXT_ALIGN; -} - static int get_context_size(struct drm_i915_private *dev_priv) { int ret; @@ -281,8 +266,6 @@ __create_hw_context(struct drm_i915_private *dev_priv, list_add_tail(&ctx->link, &dev_priv->context_list); ctx->i915 = dev_priv; - ctx->ggtt_alignment = get_context_alignment(dev_priv); - if (dev_priv->hw_context_size) { struct drm_i915_gem_object *obj; struct i915_vma *vma; diff --git a/drivers/gpu/drm/i915/i915_gem_context.h b/drivers/gpu/drm/i915/i915_gem_context.h index 0ac750b90f3d..81268c9770a6 100644 --- a/drivers/gpu/drm/i915/i915_gem_context.h +++ b/drivers/gpu/drm/i915/i915_gem_context.h @@ -140,8 +140,6 @@ struct i915_gem_context { */ int priority; - /** ggtt_alignment: alignment restriction for context objects */ - u32 ggtt_alignment; /** ggtt_offset_bias: placement restriction for context objects */ u32 ggtt_offset_bias; diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c b/drivers/gpu/drm/i915/intel_ringbuffer.c index f62afffef682..4a864f8c9387 100644 --- a/drivers/gpu/drm/i915/intel_ringbuffer.c +++ b/drivers/gpu/drm/i915/intel_ringbuffer.c @@ -1431,7 +1431,8 @@ static int context_pin(struct i915_gem_context *ctx) return ret; } - return i915_vma_pin(vma, 0, ctx->ggtt_alignment, PIN_GLOBAL | PIN_HIGH); + return i915_vma_pin(vma, 0, I915_GTT_MIN_ALIGNMENT, + PIN_GLOBAL | PIN_HIGH); } static int intel_ring_context_pin(struct intel_engine_cs *engine, -- cgit From b0734f77b3d1ae00603bf478611662d5bf6c9b54 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Thu, 23 Feb 2017 14:10:20 +0000 Subject: drm/i915: Distinguish between timeout and error in sideband transactions After initiating a sideband transaction, we only want to wait for the transaction to become idle. If, as we are, we wait for both the busy and error flag to clear, if an error is raised we just spin until the timeout. Once the hw is idle, we can then check to see if the hw flagged an error, and report it distinctly. Signed-off-by: Chris Wilson Cc: Jani Nikula Link: http://patchwork.freedesktop.org/patch/msgid/20170223141020.13250-1-chris@chris-wilson.co.uk Reviewed-by: Jani Nikula --- drivers/gpu/drm/i915/intel_sideband.c | 20 ++++++++++++++++---- 1 file changed, 16 insertions(+), 4 deletions(-) diff --git a/drivers/gpu/drm/i915/intel_sideband.c b/drivers/gpu/drm/i915/intel_sideband.c index 9f782b5eb6e6..7d971cb56116 100644 --- a/drivers/gpu/drm/i915/intel_sideband.c +++ b/drivers/gpu/drm/i915/intel_sideband.c @@ -216,6 +216,7 @@ u32 intel_sbi_read(struct drm_i915_private *dev_priv, u16 reg, } I915_WRITE(SBI_ADDR, (reg << 16)); + I915_WRITE(SBI_DATA, 0); if (destination == SBI_ICLK) value = SBI_CTL_DEST_ICLK | SBI_CTL_OP_CRRD; @@ -225,10 +226,15 @@ u32 intel_sbi_read(struct drm_i915_private *dev_priv, u16 reg, if (intel_wait_for_register(dev_priv, SBI_CTL_STAT, - SBI_BUSY | SBI_RESPONSE_FAIL, + SBI_BUSY, 0, 100)) { - DRM_ERROR("timeout waiting for SBI to complete read transaction\n"); + DRM_ERROR("timeout waiting for SBI to complete read\n"); + return 0; + } + + if (I915_READ(SBI_CTL_STAT) & SBI_RESPONSE_FAIL) { + DRM_ERROR("error during SBI read of reg %x\n", reg); return 0; } @@ -260,10 +266,16 @@ void intel_sbi_write(struct drm_i915_private *dev_priv, u16 reg, u32 value, if (intel_wait_for_register(dev_priv, SBI_CTL_STAT, - SBI_BUSY | SBI_RESPONSE_FAIL, + SBI_BUSY, 0, 100)) { - DRM_ERROR("timeout waiting for SBI to complete write transaction\n"); + DRM_ERROR("timeout waiting for SBI to complete write\n"); + return; + } + + if (I915_READ(SBI_CTL_STAT) & SBI_RESPONSE_FAIL) { + DRM_ERROR("error during SBI write of %x to reg %x\n", + value, reg); return; } } -- cgit From 8d769ea7bc16c34c9dc5143be021e943014c4cd1 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Mon, 27 Feb 2017 20:58:47 +0000 Subject: drm/i915: Report both waiters and success from intel_engine_wakeup() The two users of the return value from intel_engine_wakeup() are expecting different results. In the breadcrumbs hangcheck, we are using it to determine whether wake_up_process() detected the waiter was currently running (and if so we presume that it hasn't yet missed the interrupt). However, in the fake_irq path, we are using the return value as a check as to whether there are any waiters, and so we may incorrectly stop the fake-irq if that waiter was currently running. To handle the two different needs, return both bits of information! We uninline it from the irq path in preparation for the next patch which makes the irq hotpath special and relegates intel_engine_wakeup() to the slow fixup paths. v2: s/ret/result/ Signed-off-by: Chris Wilson Cc: Tvrtko Ursulin Cc: Mika Kuoppala Reviewed-by: Tvrtko Ursulin Link: http://patchwork.freedesktop.org/patch/msgid/20170227205850.2828-1-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/intel_breadcrumbs.c | 28 +++++++++++++++++++++++++++- drivers/gpu/drm/i915/intel_ringbuffer.h | 26 +++----------------------- 2 files changed, 30 insertions(+), 24 deletions(-) diff --git a/drivers/gpu/drm/i915/intel_breadcrumbs.c b/drivers/gpu/drm/i915/intel_breadcrumbs.c index 027c93e34c97..c8361f350350 100644 --- a/drivers/gpu/drm/i915/intel_breadcrumbs.c +++ b/drivers/gpu/drm/i915/intel_breadcrumbs.c @@ -26,6 +26,32 @@ #include "i915_drv.h" +unsigned int intel_engine_wakeup(struct intel_engine_cs *engine) +{ + unsigned int result = 0; + + /* Note that for this not to dangerously chase a dangling pointer, + * we must hold the rcu_read_lock here. + * + * Also note that tsk is likely to be in !TASK_RUNNING state so an + * early test for tsk->state != TASK_RUNNING before wake_up_process() + * is unlikely to be beneficial. + */ + if (intel_engine_has_waiter(engine)) { + struct task_struct *tsk; + + result = ENGINE_WAKEUP_WAITER; + + rcu_read_lock(); + tsk = rcu_dereference(engine->breadcrumbs.irq_seqno_bh); + if (tsk && !wake_up_process(tsk)) + result |= ENGINE_WAKEUP_ACTIVE; + rcu_read_unlock(); + } + + return result; +} + static unsigned long wait_timeout(void) { return round_jiffies_up(jiffies + DRM_I915_HANGCHECK_JIFFIES); @@ -49,7 +75,7 @@ static void intel_breadcrumbs_hangcheck(unsigned long data) * to process the pending interrupt (e.g, low priority task on a loaded * system) and wait until it sleeps before declaring a missed interrupt. */ - if (!intel_engine_wakeup(engine)) { + if (intel_engine_wakeup(engine) & ENGINE_WAKEUP_ACTIVE) { mod_timer(&b->hangcheck, wait_timeout()); return; } diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.h b/drivers/gpu/drm/i915/intel_ringbuffer.h index 0f29e07a9581..7d753dc1b89d 100644 --- a/drivers/gpu/drm/i915/intel_ringbuffer.h +++ b/drivers/gpu/drm/i915/intel_ringbuffer.h @@ -642,29 +642,9 @@ static inline bool intel_engine_has_waiter(const struct intel_engine_cs *engine) return rcu_access_pointer(engine->breadcrumbs.irq_seqno_bh); } -static inline bool intel_engine_wakeup(const struct intel_engine_cs *engine) -{ - bool wakeup = false; - - /* Note that for this not to dangerously chase a dangling pointer, - * we must hold the rcu_read_lock here. - * - * Also note that tsk is likely to be in !TASK_RUNNING state so an - * early test for tsk->state != TASK_RUNNING before wake_up_process() - * is unlikely to be beneficial. - */ - if (intel_engine_has_waiter(engine)) { - struct task_struct *tsk; - - rcu_read_lock(); - tsk = rcu_dereference(engine->breadcrumbs.irq_seqno_bh); - if (tsk) - wakeup = wake_up_process(tsk); - rcu_read_unlock(); - } - - return wakeup; -} +unsigned int intel_engine_wakeup(struct intel_engine_cs *engine); +#define ENGINE_WAKEUP_WAITER BIT(0) +#define ENGINE_WAKEUP_ACTIVE BIT(1) void intel_engine_reset_breadcrumbs(struct intel_engine_cs *engine); void intel_engine_fini_breadcrumbs(struct intel_engine_cs *engine); -- cgit From 56299fb7d9047cc1d25362827073b2ac0984ed21 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Mon, 27 Feb 2017 20:58:48 +0000 Subject: drm/i915: Signal first fence from irq handler if complete As execlists and other non-semaphore multi-engine devices coordinate between engines using interrupts, we can shave off a few 10s of microsecond of scheduling latency by doing the fence signaling from the interrupt as opposed to a RT kthread. (Realistically the delay adds about 1% to an individual cross-engine workload.) We only signal the first fence in order to limit the amount of work we move into the interrupt handler. We also have to remember that our breadcrumbs may be unordered with respect to the interrupt and so we still require the waiter process to perform some heavyweight coherency fixups, as well as traversing the tree of waiters. v2: No need for early exit in irq handler - it breaks the flow between patches and prevents the tracepoint v3: Restore rcu hold across irq signaling of request Signed-off-by: Chris Wilson Cc: Tvrtko Ursulin Cc: Mika Kuoppala Reviewed-by: Tvrtko Ursulin Link: http://patchwork.freedesktop.org/patch/msgid/20170227205850.2828-2-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/i915_drv.h | 13 ++++++------ drivers/gpu/drm/i915/i915_gem_request.c | 2 +- drivers/gpu/drm/i915/i915_gem_request.h | 2 ++ drivers/gpu/drm/i915/i915_irq.c | 36 +++++++++++++++++++++++++++++--- drivers/gpu/drm/i915/intel_breadcrumbs.c | 32 +++++++--------------------- drivers/gpu/drm/i915/intel_ringbuffer.h | 8 +++---- 6 files changed, 54 insertions(+), 39 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index 933874ddea75..71152656ebbf 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -4060,9 +4060,9 @@ __i915_request_irq_complete(const struct drm_i915_gem_request *req) * is woken. */ if (engine->irq_seqno_barrier && - rcu_access_pointer(engine->breadcrumbs.irq_seqno_bh) == current && test_and_clear_bit(ENGINE_IRQ_BREADCRUMB, &engine->irq_posted)) { - struct task_struct *tsk; + struct intel_breadcrumbs *b = &engine->breadcrumbs; + unsigned long flags; /* The ordering of irq_posted versus applying the barrier * is crucial. The clearing of the current irq_posted must @@ -4084,17 +4084,16 @@ __i915_request_irq_complete(const struct drm_i915_gem_request *req) * the seqno before we believe it coherent since they see * irq_posted == false but we are still running). */ - rcu_read_lock(); - tsk = rcu_dereference(engine->breadcrumbs.irq_seqno_bh); - if (tsk && tsk != current) + spin_lock_irqsave(&b->lock, flags); + if (b->first_wait && b->first_wait->tsk != current) /* Note that if the bottom-half is changed as we * are sending the wake-up, the new bottom-half will * be woken by whomever made the change. We only have * to worry about when we steal the irq-posted for * ourself. */ - wake_up_process(tsk); - rcu_read_unlock(); + wake_up_process(b->first_wait->tsk); + spin_unlock_irqrestore(&b->lock, flags); if (__i915_gem_request_completed(req, seqno)) return true; diff --git a/drivers/gpu/drm/i915/i915_gem_request.c b/drivers/gpu/drm/i915/i915_gem_request.c index fbfeb5f8d069..77c3ee7a3fd0 100644 --- a/drivers/gpu/drm/i915/i915_gem_request.c +++ b/drivers/gpu/drm/i915/i915_gem_request.c @@ -1083,7 +1083,7 @@ long i915_wait_request(struct drm_i915_gem_request *req, if (flags & I915_WAIT_LOCKED) add_wait_queue(errq, &reset); - intel_wait_init(&wait); + intel_wait_init(&wait, req); restart: do { diff --git a/drivers/gpu/drm/i915/i915_gem_request.h b/drivers/gpu/drm/i915/i915_gem_request.h index 5f73d8c0a38a..0efee879df23 100644 --- a/drivers/gpu/drm/i915/i915_gem_request.h +++ b/drivers/gpu/drm/i915/i915_gem_request.h @@ -32,10 +32,12 @@ struct drm_file; struct drm_i915_gem_object; +struct drm_i915_gem_request; struct intel_wait { struct rb_node node; struct task_struct *tsk; + struct drm_i915_gem_request *request; u32 seqno; }; diff --git a/drivers/gpu/drm/i915/i915_irq.c b/drivers/gpu/drm/i915/i915_irq.c index 312d30e96dc2..e06e6eb99801 100644 --- a/drivers/gpu/drm/i915/i915_irq.c +++ b/drivers/gpu/drm/i915/i915_irq.c @@ -1033,12 +1033,42 @@ static void ironlake_rps_change_irq_handler(struct drm_i915_private *dev_priv) static void notify_ring(struct intel_engine_cs *engine) { - bool waiters; + struct drm_i915_gem_request *rq = NULL; + struct intel_wait *wait; atomic_inc(&engine->irq_count); set_bit(ENGINE_IRQ_BREADCRUMB, &engine->irq_posted); - waiters = intel_engine_wakeup(engine); - trace_intel_engine_notify(engine, waiters); + + rcu_read_lock(); + + spin_lock(&engine->breadcrumbs.lock); + wait = engine->breadcrumbs.first_wait; + if (wait) { + /* We use a callback from the dma-fence to submit + * requests after waiting on our own requests. To + * ensure minimum delay in queuing the next request to + * hardware, signal the fence now rather than wait for + * the signaler to be woken up. We still wake up the + * waiter in order to handle the irq-seqno coherency + * issues (we may receive the interrupt before the + * seqno is written, see __i915_request_irq_complete()) + * and to handle coalescing of multiple seqno updates + * and many waiters. + */ + if (i915_seqno_passed(intel_engine_get_seqno(engine), + wait->seqno)) + rq = wait->request; + + wake_up_process(wait->tsk); + } + spin_unlock(&engine->breadcrumbs.lock); + + if (rq) + dma_fence_signal(&rq->fence); + + rcu_read_unlock(); + + trace_intel_engine_notify(engine, wait); } static void vlv_c0_read(struct drm_i915_private *dev_priv, diff --git a/drivers/gpu/drm/i915/intel_breadcrumbs.c b/drivers/gpu/drm/i915/intel_breadcrumbs.c index c8361f350350..e7f4a4c923a2 100644 --- a/drivers/gpu/drm/i915/intel_breadcrumbs.c +++ b/drivers/gpu/drm/i915/intel_breadcrumbs.c @@ -28,26 +28,18 @@ unsigned int intel_engine_wakeup(struct intel_engine_cs *engine) { + struct intel_wait *wait; + unsigned long flags; unsigned int result = 0; - /* Note that for this not to dangerously chase a dangling pointer, - * we must hold the rcu_read_lock here. - * - * Also note that tsk is likely to be in !TASK_RUNNING state so an - * early test for tsk->state != TASK_RUNNING before wake_up_process() - * is unlikely to be beneficial. - */ - if (intel_engine_has_waiter(engine)) { - struct task_struct *tsk; - + spin_lock_irqsave(&engine->breadcrumbs.lock, flags); + wait = engine->breadcrumbs.first_wait; + if (wait) { result = ENGINE_WAKEUP_WAITER; - - rcu_read_lock(); - tsk = rcu_dereference(engine->breadcrumbs.irq_seqno_bh); - if (tsk && !wake_up_process(tsk)) + if (!wake_up_process(wait->tsk)) result |= ENGINE_WAKEUP_ACTIVE; - rcu_read_unlock(); } + spin_unlock_irqrestore(&engine->breadcrumbs.lock, flags); return result; } @@ -301,7 +293,6 @@ static bool __intel_engine_add_wait(struct intel_engine_cs *engine, } rb_link_node(&wait->node, parent, p); rb_insert_color(&wait->node, &b->waiters); - GEM_BUG_ON(!first && !rcu_access_pointer(b->irq_seqno_bh)); if (completed) { struct rb_node *next = rb_next(completed); @@ -310,7 +301,6 @@ static bool __intel_engine_add_wait(struct intel_engine_cs *engine, if (next && next != &wait->node) { GEM_BUG_ON(first); b->first_wait = to_wait(next); - rcu_assign_pointer(b->irq_seqno_bh, b->first_wait->tsk); /* As there is a delay between reading the current * seqno, processing the completed tasks and selecting * the next waiter, we may have missed the interrupt @@ -337,7 +327,6 @@ static bool __intel_engine_add_wait(struct intel_engine_cs *engine, if (first) { GEM_BUG_ON(rb_first(&b->waiters) != &wait->node); b->first_wait = wait; - rcu_assign_pointer(b->irq_seqno_bh, wait->tsk); /* After assigning ourselves as the new bottom-half, we must * perform a cursory check to prevent a missed interrupt. * Either we miss the interrupt whilst programming the hardware, @@ -348,7 +337,6 @@ static bool __intel_engine_add_wait(struct intel_engine_cs *engine, */ __intel_breadcrumbs_enable_irq(b); } - GEM_BUG_ON(!rcu_access_pointer(b->irq_seqno_bh)); GEM_BUG_ON(!b->first_wait); GEM_BUG_ON(rb_first(&b->waiters) != &b->first_wait->node); @@ -396,8 +384,6 @@ static void __intel_engine_remove_wait(struct intel_engine_cs *engine, const int priority = wakeup_priority(b, wait->tsk); struct rb_node *next; - GEM_BUG_ON(rcu_access_pointer(b->irq_seqno_bh) != wait->tsk); - /* We are the current bottom-half. Find the next candidate, * the first waiter in the queue on the remaining oldest * request. As multiple seqnos may complete in the time it @@ -439,13 +425,11 @@ static void __intel_engine_remove_wait(struct intel_engine_cs *engine, * exception rather than a seqno completion. */ b->first_wait = to_wait(next); - rcu_assign_pointer(b->irq_seqno_bh, b->first_wait->tsk); if (b->first_wait->seqno != wait->seqno) __intel_breadcrumbs_enable_irq(b); wake_up_process(b->first_wait->tsk); } else { b->first_wait = NULL; - rcu_assign_pointer(b->irq_seqno_bh, NULL); __intel_breadcrumbs_disable_irq(b); } } else { @@ -459,7 +443,6 @@ out: GEM_BUG_ON(b->first_wait == wait); GEM_BUG_ON(rb_first(&b->waiters) != (b->first_wait ? &b->first_wait->node : NULL)); - GEM_BUG_ON(!rcu_access_pointer(b->irq_seqno_bh) ^ RB_EMPTY_ROOT(&b->waiters)); } void intel_engine_remove_wait(struct intel_engine_cs *engine, @@ -621,6 +604,7 @@ void intel_engine_enable_signaling(struct drm_i915_gem_request *request) return; request->signaling.wait.tsk = b->signaler; + request->signaling.wait.request = request; request->signaling.wait.seqno = seqno; i915_gem_request_get(request); diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.h b/drivers/gpu/drm/i915/intel_ringbuffer.h index 7d753dc1b89d..974ec11d2b1d 100644 --- a/drivers/gpu/drm/i915/intel_ringbuffer.h +++ b/drivers/gpu/drm/i915/intel_ringbuffer.h @@ -235,8 +235,6 @@ struct intel_engine_cs { * the overhead of waking that client is much preferred. */ struct intel_breadcrumbs { - struct task_struct __rcu *irq_seqno_bh; /* bh for interrupts */ - spinlock_t lock; /* protects the lists of requests; irqsafe */ struct rb_root waiters; /* sorted by retirement, priority */ struct rb_root signals; /* sorted by retirement */ @@ -582,9 +580,11 @@ static inline u32 intel_hws_seqno_address(struct intel_engine_cs *engine) /* intel_breadcrumbs.c -- user interrupt bottom-half for waiters */ int intel_engine_init_breadcrumbs(struct intel_engine_cs *engine); -static inline void intel_wait_init(struct intel_wait *wait) +static inline void intel_wait_init(struct intel_wait *wait, + struct drm_i915_gem_request *rq) { wait->tsk = current; + wait->request = rq; } static inline void intel_wait_init_for_seqno(struct intel_wait *wait, u32 seqno) @@ -639,7 +639,7 @@ void intel_engine_cancel_signaling(struct drm_i915_gem_request *request); static inline bool intel_engine_has_waiter(const struct intel_engine_cs *engine) { - return rcu_access_pointer(engine->breadcrumbs.irq_seqno_bh); + return READ_ONCE(engine->breadcrumbs.first_wait); } unsigned int intel_engine_wakeup(struct intel_engine_cs *engine); -- cgit From 19d0a5727123509f0fd8b84551d4715e86937df9 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Mon, 27 Feb 2017 20:58:49 +0000 Subject: drm/i915: Defer enabling hangcheck to the first fake breadcrumb interrupt By deferring hangcheck to the fake breadcrumb interrupt, we can simply the enabling procedure slightly - as by enabling the fake, we then enable the hangcheck. By always enabling the hangcheck from each fake interrupt (it will be a no-op for an already queued hangcheck), it will make restoring the breadcrumbs after a reset simpler in the next patch. Signed-off-by: Chris Wilson Cc: Tvrtko Ursulin Cc: Mika Kuoppala Reviewed-by: Tvrtko Ursulin Link: http://patchwork.freedesktop.org/patch/msgid/20170227205850.2828-3-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/intel_breadcrumbs.c | 36 ++++++++++++++++---------------- 1 file changed, 18 insertions(+), 18 deletions(-) diff --git a/drivers/gpu/drm/i915/intel_breadcrumbs.c b/drivers/gpu/drm/i915/intel_breadcrumbs.c index e7f4a4c923a2..3855c8c39b35 100644 --- a/drivers/gpu/drm/i915/intel_breadcrumbs.c +++ b/drivers/gpu/drm/i915/intel_breadcrumbs.c @@ -75,17 +75,6 @@ static void intel_breadcrumbs_hangcheck(unsigned long data) DRM_DEBUG("Hangcheck timer elapsed... %s idle\n", engine->name); set_bit(engine->id, &engine->i915->gpu_error.missed_irq_rings); mod_timer(&engine->breadcrumbs.fake_irq, jiffies + 1); - - /* Ensure that even if the GPU hangs, we get woken up. - * - * However, note that if no one is waiting, we never notice - * a gpu hang. Eventually, we will have to wait for a resource - * held by the GPU and so trigger a hangcheck. In the most - * pathological case, this will be upon memory starvation! To - * prevent this, we also queue the hangcheck from the retire - * worker. - */ - i915_queue_hangcheck(engine->i915); } static void intel_breadcrumbs_fake_irq(unsigned long data) @@ -99,8 +88,21 @@ static void intel_breadcrumbs_fake_irq(unsigned long data) * every jiffie in order to kick the oldest waiter to do the * coherent seqno check. */ - if (intel_engine_wakeup(engine)) - mod_timer(&engine->breadcrumbs.fake_irq, jiffies + 1); + if (!intel_engine_wakeup(engine)) + return; + + mod_timer(&engine->breadcrumbs.fake_irq, jiffies + 1); + + /* Ensure that even if the GPU hangs, we get woken up. + * + * However, note that if no one is waiting, we never notice + * a gpu hang. Eventually, we will have to wait for a resource + * held by the GPU and so trigger a hangcheck. In the most + * pathological case, this will be upon memory starvation! To + * prevent this, we also queue the hangcheck from the retire + * worker. + */ + i915_queue_hangcheck(engine->i915); } static void irq_enable(struct intel_engine_cs *engine) @@ -179,13 +181,11 @@ static void __intel_breadcrumbs_enable_irq(struct intel_breadcrumbs *b) b->irq_enabled = true; } - if (!b->irq_enabled || use_fake_irq(b)) { + /* Ensure we never sleep indefinitely */ + if (!b->irq_enabled || use_fake_irq(b)) mod_timer(&b->fake_irq, jiffies + 1); - i915_queue_hangcheck(i915); - } else { - /* Ensure we never sleep indefinitely */ + else mod_timer(&b->hangcheck, wait_timeout()); - } } static void __intel_breadcrumbs_disable_irq(struct intel_breadcrumbs *b) -- cgit From 67b807a892302d184f6987b0c3cc2dc2505e4a2d Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Mon, 27 Feb 2017 20:58:50 +0000 Subject: drm/i915: Delay disabling the user interrupt for breadcrumbs A significant cost in setting up a wait is the overhead of enabling the interrupt. As we disable the interrupt whenever the queue of waiters is empty, if we are frequently waiting on alternating batches, we end up re-enabling the interrupt on a frequent basis. We do want to disable the interrupt during normal operations as under high load it may add several thousand interrupts/s - we have been known in the past to occupy whole cores with our interrupt handler after accidentally leaving user interrupts enabled. As a compromise, leave the interrupt enabled until the next IRQ, or the system is idle. This gives a small window for a waiter to keep the interrupt active and not be delayed by having to re-enable the interrupt. v2: Restore hangcheck/missed-irq detection for continuations v3: Be more careful restoring the hangcheck timer after reset v4: Be more careful restoring the fake irq after reset (if required!) v5: Redo changes to intel_engine_wakeup() v6: Factor out __intel_engine_wakeup() v7: Improve commentary for declaring a missed wakeup Signed-off-by: Chris Wilson Cc: Tvrtko Ursulin Cc: Mika Kuoppala Reviewed-by: Tvrtko Ursulin Link: http://patchwork.freedesktop.org/patch/msgid/20170227205850.2828-4-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/i915_gem.c | 4 +- drivers/gpu/drm/i915/i915_irq.c | 2 + drivers/gpu/drm/i915/intel_breadcrumbs.c | 176 ++++++++++++++++++++----------- drivers/gpu/drm/i915/intel_ringbuffer.h | 7 +- 4 files changed, 125 insertions(+), 64 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index 01dbba3813c7..0ad080984877 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -2988,8 +2988,10 @@ i915_gem_idle_work_handler(struct work_struct *work) if (wait_for(intel_execlists_idle(dev_priv), 10)) DRM_ERROR("Timeout waiting for engines to idle\n"); - for_each_engine(engine, dev_priv, id) + for_each_engine(engine, dev_priv, id) { + intel_engine_disarm_breadcrumbs(engine); i915_gem_batch_pool_fini(&engine->batch_pool); + } GEM_BUG_ON(!dev_priv->gt.awake); dev_priv->gt.awake = false; diff --git a/drivers/gpu/drm/i915/i915_irq.c b/drivers/gpu/drm/i915/i915_irq.c index e06e6eb99801..19892854707f 100644 --- a/drivers/gpu/drm/i915/i915_irq.c +++ b/drivers/gpu/drm/i915/i915_irq.c @@ -1060,6 +1060,8 @@ static void notify_ring(struct intel_engine_cs *engine) rq = wait->request; wake_up_process(wait->tsk); + } else { + __intel_engine_disarm_breadcrumbs(engine); } spin_unlock(&engine->breadcrumbs.lock); diff --git a/drivers/gpu/drm/i915/intel_breadcrumbs.c b/drivers/gpu/drm/i915/intel_breadcrumbs.c index 3855c8c39b35..7c7867d65a39 100644 --- a/drivers/gpu/drm/i915/intel_breadcrumbs.c +++ b/drivers/gpu/drm/i915/intel_breadcrumbs.c @@ -26,20 +26,30 @@ #include "i915_drv.h" -unsigned int intel_engine_wakeup(struct intel_engine_cs *engine) +static unsigned int __intel_breadcrumbs_wakeup(struct intel_breadcrumbs *b) { struct intel_wait *wait; - unsigned long flags; unsigned int result = 0; - spin_lock_irqsave(&engine->breadcrumbs.lock, flags); - wait = engine->breadcrumbs.first_wait; + wait = b->first_wait; if (wait) { result = ENGINE_WAKEUP_WAITER; - if (!wake_up_process(wait->tsk)) - result |= ENGINE_WAKEUP_ACTIVE; + if (wake_up_process(wait->tsk)) + result |= ENGINE_WAKEUP_ASLEEP; } - spin_unlock_irqrestore(&engine->breadcrumbs.lock, flags); + + return result; +} + +unsigned int intel_engine_wakeup(struct intel_engine_cs *engine) +{ + struct intel_breadcrumbs *b = &engine->breadcrumbs; + unsigned long flags; + unsigned int result; + + spin_lock_irqsave(&b->lock, flags); + result = __intel_breadcrumbs_wakeup(b); + spin_unlock_irqrestore(&b->lock, flags); return result; } @@ -54,7 +64,7 @@ static void intel_breadcrumbs_hangcheck(unsigned long data) struct intel_engine_cs *engine = (struct intel_engine_cs *)data; struct intel_breadcrumbs *b = &engine->breadcrumbs; - if (!b->irq_enabled) + if (!b->irq_armed) return; if (b->hangcheck_interrupts != atomic_read(&engine->irq_count)) { @@ -63,23 +73,32 @@ static void intel_breadcrumbs_hangcheck(unsigned long data) return; } - /* If the waiter was currently running, assume it hasn't had a chance + /* We keep the hangcheck time alive until we disarm the irq, even + * if there are no waiters at present. + * + * If the waiter was currently running, assume it hasn't had a chance * to process the pending interrupt (e.g, low priority task on a loaded * system) and wait until it sleeps before declaring a missed interrupt. + * + * If the waiter was asleep (and not even pending a wakeup), then we + * must have missed an interrupt as the GPU has stopped advancing + * but we still have a waiter. Assuming all batches complete within + * DRM_I915_HANGCHECK_JIFFIES [1.5s]! */ - if (intel_engine_wakeup(engine) & ENGINE_WAKEUP_ACTIVE) { + if (intel_engine_wakeup(engine) & ENGINE_WAKEUP_ASLEEP) { + DRM_DEBUG("Hangcheck timer elapsed... %s idle\n", engine->name); + set_bit(engine->id, &engine->i915->gpu_error.missed_irq_rings); + mod_timer(&engine->breadcrumbs.fake_irq, jiffies + 1); + } else { mod_timer(&b->hangcheck, wait_timeout()); - return; } - - DRM_DEBUG("Hangcheck timer elapsed... %s idle\n", engine->name); - set_bit(engine->id, &engine->i915->gpu_error.missed_irq_rings); - mod_timer(&engine->breadcrumbs.fake_irq, jiffies + 1); } static void intel_breadcrumbs_fake_irq(unsigned long data) { struct intel_engine_cs *engine = (struct intel_engine_cs *)data; + struct intel_breadcrumbs *b = &engine->breadcrumbs; + unsigned long flags; /* * The timer persists in case we cannot enable interrupts, @@ -88,10 +107,15 @@ static void intel_breadcrumbs_fake_irq(unsigned long data) * every jiffie in order to kick the oldest waiter to do the * coherent seqno check. */ - if (!intel_engine_wakeup(engine)) + + spin_lock_irqsave(&b->lock, flags); + if (!__intel_breadcrumbs_wakeup(b)) + __intel_engine_disarm_breadcrumbs(engine); + spin_unlock_irqrestore(&b->lock, flags); + if (!b->irq_armed) return; - mod_timer(&engine->breadcrumbs.fake_irq, jiffies + 1); + mod_timer(&b->fake_irq, jiffies + 1); /* Ensure that even if the GPU hangs, we get woken up. * @@ -127,6 +151,42 @@ static void irq_disable(struct intel_engine_cs *engine) spin_unlock(&engine->i915->irq_lock); } +void __intel_engine_disarm_breadcrumbs(struct intel_engine_cs *engine) +{ + struct intel_breadcrumbs *b = &engine->breadcrumbs; + + assert_spin_locked(&b->lock); + + if (b->irq_enabled) { + irq_disable(engine); + b->irq_enabled = false; + } + + b->irq_armed = false; +} + +void intel_engine_disarm_breadcrumbs(struct intel_engine_cs *engine) +{ + struct intel_breadcrumbs *b = &engine->breadcrumbs; + unsigned long flags; + + if (!b->irq_armed) + return; + + spin_lock_irqsave(&b->lock, flags); + + /* We only disarm the irq when we are idle (all requests completed), + * so if there remains a sleeping waiter, it missed the request + * completion. + */ + if (__intel_breadcrumbs_wakeup(b) & ENGINE_WAKEUP_ASLEEP) + set_bit(engine->id, &engine->i915->gpu_error.missed_irq_rings); + + __intel_engine_disarm_breadcrumbs(engine); + + spin_unlock_irqrestore(&b->lock, flags); +} + static bool use_fake_irq(const struct intel_breadcrumbs *b) { const struct intel_engine_cs *engine = @@ -144,6 +204,15 @@ static bool use_fake_irq(const struct intel_breadcrumbs *b) return atomic_read(&engine->irq_count) == b->hangcheck_interrupts; } +static void enable_fake_irq(struct intel_breadcrumbs *b) +{ + /* Ensure we never sleep indefinitely */ + if (!b->irq_enabled || use_fake_irq(b)) + mod_timer(&b->fake_irq, jiffies + 1); + else + mod_timer(&b->hangcheck, wait_timeout()); +} + static void __intel_breadcrumbs_enable_irq(struct intel_breadcrumbs *b) { struct intel_engine_cs *engine = @@ -151,9 +220,17 @@ static void __intel_breadcrumbs_enable_irq(struct intel_breadcrumbs *b) struct drm_i915_private *i915 = engine->i915; assert_spin_locked(&b->lock); - if (b->rpm_wakelock) + if (b->irq_armed) return; + /* The breadcrumb irq will be disarmed on the interrupt after the + * waiters are signaled. This gives us a single interrupt window in + * which we can add a new waiter and avoid the cost of re-enabling + * the irq. + */ + b->irq_armed = true; + GEM_BUG_ON(b->irq_enabled); + if (I915_SELFTEST_ONLY(b->mock)) { /* For our mock objects we want to avoid interaction * with the real hardware (which is not set up). So @@ -162,17 +239,15 @@ static void __intel_breadcrumbs_enable_irq(struct intel_breadcrumbs *b) * implementation to call intel_engine_wakeup() * itself when it wants to simulate a user interrupt, */ - b->rpm_wakelock = true; return; } /* Since we are waiting on a request, the GPU should be busy - * and should have its own rpm reference. For completeness, - * record an rpm reference for ourselves to cover the - * interrupt we unmask. + * and should have its own rpm reference. This is tracked + * by i915->gt.awake, we can forgo holding our own wakref + * for the interrupt as before i915->gt.awake is released (when + * the driver is idle) we disarm the breadcrumbs. */ - intel_runtime_pm_get_noresume(i915); - b->rpm_wakelock = true; /* No interrupts? Kick the waiter every jiffie! */ if (intel_irqs_enabled(i915)) { @@ -181,34 +256,7 @@ static void __intel_breadcrumbs_enable_irq(struct intel_breadcrumbs *b) b->irq_enabled = true; } - /* Ensure we never sleep indefinitely */ - if (!b->irq_enabled || use_fake_irq(b)) - mod_timer(&b->fake_irq, jiffies + 1); - else - mod_timer(&b->hangcheck, wait_timeout()); -} - -static void __intel_breadcrumbs_disable_irq(struct intel_breadcrumbs *b) -{ - struct intel_engine_cs *engine = - container_of(b, struct intel_engine_cs, breadcrumbs); - - assert_spin_locked(&b->lock); - if (!b->rpm_wakelock) - return; - - if (I915_SELFTEST_ONLY(b->mock)) { - b->rpm_wakelock = false; - return; - } - - if (b->irq_enabled) { - irq_disable(engine); - b->irq_enabled = false; - } - - intel_runtime_pm_put(engine->i915); - b->rpm_wakelock = false; + enable_fake_irq(b); } static inline struct intel_wait *to_wait(struct rb_node *node) @@ -430,7 +478,6 @@ static void __intel_engine_remove_wait(struct intel_engine_cs *engine, wake_up_process(b->first_wait->tsk); } else { b->first_wait = NULL; - __intel_breadcrumbs_disable_irq(b); } } else { GEM_BUG_ON(rb_first(&b->waiters) == &wait->node); @@ -722,15 +769,22 @@ void intel_engine_reset_breadcrumbs(struct intel_engine_cs *engine) cancel_fake_irq(engine); spin_lock_irq(&b->lock); - __intel_breadcrumbs_disable_irq(b); - if (intel_engine_has_waiter(engine)) { - __intel_breadcrumbs_enable_irq(b); - if (test_bit(ENGINE_IRQ_BREADCRUMB, &engine->irq_posted)) - wake_up_process(b->first_wait->tsk); - } else { - /* sanitize the IMR and unmask any auxiliary interrupts */ + if (b->irq_enabled) + irq_enable(engine); + else irq_disable(engine); - } + + /* We set the IRQ_BREADCRUMB bit when we enable the irq presuming the + * GPU is active and may have already executed the MI_USER_INTERRUPT + * before the CPU is ready to receive. However, the engine is currently + * idle (we haven't started it yet), there is no possibility for a + * missed interrupt as we enabled the irq and so we can clear the + * immediate wakeup (until a real interrupt arrives for the waiter). + */ + clear_bit(ENGINE_IRQ_BREADCRUMB, &engine->irq_posted); + + if (b->irq_armed) + enable_fake_irq(b); spin_unlock_irq(&b->lock); } diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.h b/drivers/gpu/drm/i915/intel_ringbuffer.h index 974ec11d2b1d..3dd6eee4a08b 100644 --- a/drivers/gpu/drm/i915/intel_ringbuffer.h +++ b/drivers/gpu/drm/i915/intel_ringbuffer.h @@ -246,8 +246,8 @@ struct intel_engine_cs { unsigned int hangcheck_interrupts; + bool irq_armed : 1; bool irq_enabled : 1; - bool rpm_wakelock : 1; I915_SELFTEST_DECLARE(bool mock : 1); } breadcrumbs; @@ -644,7 +644,10 @@ static inline bool intel_engine_has_waiter(const struct intel_engine_cs *engine) unsigned int intel_engine_wakeup(struct intel_engine_cs *engine); #define ENGINE_WAKEUP_WAITER BIT(0) -#define ENGINE_WAKEUP_ACTIVE BIT(1) +#define ENGINE_WAKEUP_ASLEEP BIT(1) + +void __intel_engine_disarm_breadcrumbs(struct intel_engine_cs *engine); +void intel_engine_disarm_breadcrumbs(struct intel_engine_cs *engine); void intel_engine_reset_breadcrumbs(struct intel_engine_cs *engine); void intel_engine_fini_breadcrumbs(struct intel_engine_cs *engine); -- cgit From 39299838eea1a310b1309b538b80cd00603698fd Mon Sep 17 00:00:00 2001 From: Deepak M Date: Fri, 17 Feb 2017 18:13:29 +0530 Subject: drm/i915/glk: Program dphy param reg for GLK For GEMINILAKE, dphy param reg values are programmed in terms of HS byte clock count while for older platforms in terms of HS ddr clk count. v2: Added comments to clarify ddr clock count calculation v3: Use multiplier variable instead of IS_GEMINILAKE() check everywhere (Jani) Signed-off-by: Deepak M Signed-off-by: Madhav Chauhan Reviewed-by: Jani Nikula Signed-off-by: Jani Nikula Link: http://patchwork.freedesktop.org/patch/msgid/1487335415-14766-2-git-send-email-madhav.chauhan@intel.com --- drivers/gpu/drm/i915/intel_dsi_panel_vbt.c | 31 +++++++++++++++--------------- 1 file changed, 16 insertions(+), 15 deletions(-) diff --git a/drivers/gpu/drm/i915/intel_dsi_panel_vbt.c b/drivers/gpu/drm/i915/intel_dsi_panel_vbt.c index 84b3683c1f46..405a0f74f455 100644 --- a/drivers/gpu/drm/i915/intel_dsi_panel_vbt.c +++ b/drivers/gpu/drm/i915/intel_dsi_panel_vbt.c @@ -571,6 +571,7 @@ struct drm_panel *vbt_panel_init(struct intel_dsi *intel_dsi, u16 panel_id) u32 tclk_prepare_clkzero, ths_prepare_hszero; u32 lp_to_hs_switch, hs_to_lp_switch; u32 pclk, computed_ddr; + u32 mul; u16 burst_mode_ratio; enum port port; @@ -674,11 +675,6 @@ struct drm_panel *vbt_panel_init(struct intel_dsi *intel_dsi, u16 panel_id) break; } - /* - * ui(s) = 1/f [f in hz] - * ui(ns) = 10^9 / (f*10^6) [f in Mhz] -> 10^3/f(Mhz) - */ - /* in Kbps */ ui_num = NS_KHZ_RATIO; ui_den = bitrate; @@ -692,21 +688,26 @@ struct drm_panel *vbt_panel_init(struct intel_dsi *intel_dsi, u16 panel_id) */ intel_dsi->lp_byte_clk = DIV_ROUND_UP(tlpx_ns * ui_den, 8 * ui_num); - /* count values in UI = (ns value) * (bitrate / (2 * 10^6)) + /* DDR clock period = 2 * UI + * UI(sec) = 1/(bitrate * 10^3) (bitrate is in KHZ) + * UI(nsec) = 10^6 / bitrate + * DDR clock period (nsec) = 2 * UI = (2 * 10^6)/ bitrate + * DDR clock count = ns_value / DDR clock period * - * Since txddrclkhs_i is 2xUI, all the count values programmed in - * DPHY param register are divided by 2 - * - * prepare count + * For GEMINILAKE dphy_param_reg will be programmed in terms of + * HS byte clock count for other platform in HS ddr clock count */ + mul = IS_GEMINILAKE(dev_priv) ? 8 : 2; ths_prepare_ns = max(mipi_config->ths_prepare, mipi_config->tclk_prepare); - prepare_cnt = DIV_ROUND_UP(ths_prepare_ns * ui_den, ui_num * 2); + + /* prepare count */ + prepare_cnt = DIV_ROUND_UP(ths_prepare_ns * ui_den, ui_num * mul); /* exit zero count */ exit_zero_cnt = DIV_ROUND_UP( (ths_prepare_hszero - ths_prepare_ns) * ui_den, - ui_num * 2 + ui_num * mul ); /* @@ -720,12 +721,12 @@ struct drm_panel *vbt_panel_init(struct intel_dsi *intel_dsi, u16 panel_id) /* clk zero count */ clk_zero_cnt = DIV_ROUND_UP( - (tclk_prepare_clkzero - ths_prepare_ns) - * ui_den, 2 * ui_num); + (tclk_prepare_clkzero - ths_prepare_ns) + * ui_den, ui_num * mul); /* trail count */ tclk_trail_ns = max(mipi_config->tclk_trail, mipi_config->ths_trail); - trail_cnt = DIV_ROUND_UP(tclk_trail_ns * ui_den, 2 * ui_num); + trail_cnt = DIV_ROUND_UP(tclk_trail_ns * ui_den, ui_num * mul); if (prepare_cnt > PREPARE_CNT_MAX || exit_zero_cnt > EXIT_ZERO_CNT_MAX || -- cgit From b426f985158d9a723ab195258748c0c5e0793a52 Mon Sep 17 00:00:00 2001 From: Deepak M Date: Fri, 17 Feb 2017 18:13:30 +0530 Subject: drm/i915/glk: Program new MIPI DSI PHY registers for GLK Program the clk lane and tlpx time count registers to configure DSI PHY. v2: Addressed Jani's Review comments(renamed bit field macros) v3: Program clk lane timing reg same as dphy param reg. v4: Removed "line over 80 character" warning Signed-off-by: Deepak M Signed-off-by: Madhav Chauhan Reviewed-by: Jani Nikula Signed-off-by: Jani Nikula Link: http://patchwork.freedesktop.org/patch/msgid/1487335415-14766-3-git-send-email-madhav.chauhan@intel.com --- drivers/gpu/drm/i915/i915_reg.h | 8 ++++++++ drivers/gpu/drm/i915/intel_dsi.c | 8 ++++++++ 2 files changed, 16 insertions(+) diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h index bdce90084d43..31e38ce9730b 100644 --- a/drivers/gpu/drm/i915/i915_reg.h +++ b/drivers/gpu/drm/i915/i915_reg.h @@ -8625,6 +8625,14 @@ enum { #define LP_BYTECLK_SHIFT 0 #define LP_BYTECLK_MASK (0xffff << 0) +#define _MIPIA_TLPX_TIME_COUNT (dev_priv->mipi_mmio_base + 0xb0a4) +#define _MIPIC_TLPX_TIME_COUNT (dev_priv->mipi_mmio_base + 0xb8a4) +#define MIPI_TLPX_TIME_COUNT(port) _MMIO_MIPI(port, _MIPIA_TLPX_TIME_COUNT, _MIPIC_TLPX_TIME_COUNT) + +#define _MIPIA_CLK_LANE_TIMING (dev_priv->mipi_mmio_base + 0xb098) +#define _MIPIC_CLK_LANE_TIMING (dev_priv->mipi_mmio_base + 0xb898) +#define MIPI_CLK_LANE_TIMING(port) _MMIO_MIPI(port, _MIPIA_CLK_LANE_TIMING, _MIPIC_CLK_LANE_TIMING) + /* bits 31:0 */ #define _MIPIA_LP_GEN_DATA (dev_priv->mipi_mmio_base + 0xb064) #define _MIPIC_LP_GEN_DATA (dev_priv->mipi_mmio_base + 0xb864) diff --git a/drivers/gpu/drm/i915/intel_dsi.c b/drivers/gpu/drm/i915/intel_dsi.c index d7ffb9ac3c8a..f00b4e22f95b 100644 --- a/drivers/gpu/drm/i915/intel_dsi.c +++ b/drivers/gpu/drm/i915/intel_dsi.c @@ -1309,6 +1309,14 @@ static void intel_dsi_prepare(struct intel_encoder *intel_encoder, */ I915_WRITE(MIPI_LP_BYTECLK(port), intel_dsi->lp_byte_clk); + if (IS_GEMINILAKE(dev_priv)) { + I915_WRITE(MIPI_TLPX_TIME_COUNT(port), + intel_dsi->lp_byte_clk); + /* Shadow of DPHY reg */ + I915_WRITE(MIPI_CLK_LANE_TIMING(port), + intel_dsi->dphy_reg); + } + /* the bw essential for transmitting 16 long packets containing * 252 bytes meant for dcs write memory command is programmed in * this register in terms of byte clocks. based on dsi transfer -- cgit From f340c2ff5ebdd213682fe8b9a14838878cd0ff01 Mon Sep 17 00:00:00 2001 From: Deepak M Date: Fri, 17 Feb 2017 18:13:32 +0530 Subject: drm/i915/glk: Add DSI PLL divider range for glk PLL divider range for GLK is different than that of BXT, hence adding the GLK range check in this patch. v2: Code restructure using min and max ratio variables (Ander) v3: Code changes to avoid "maybe-uninitialized" warning (Jani) Signed-off-by: Deepak M Signed-off-by: Madhav Chauhan Reviewed-by: Jani Nikula Signed-off-by: Jani Nikula Link: http://patchwork.freedesktop.org/patch/msgid/1487335415-14766-5-git-send-email-madhav.chauhan@intel.com --- drivers/gpu/drm/i915/i915_reg.h | 4 ++++ drivers/gpu/drm/i915/intel_dsi_pll.c | 24 +++++++++++++++++------- 2 files changed, 21 insertions(+), 7 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h index 31e38ce9730b..89093efc9b42 100644 --- a/drivers/gpu/drm/i915/i915_reg.h +++ b/drivers/gpu/drm/i915/i915_reg.h @@ -8337,10 +8337,12 @@ enum { #define BXT_DSI_PLL_PVD_RATIO_SHIFT 16 #define BXT_DSI_PLL_PVD_RATIO_MASK (3 << BXT_DSI_PLL_PVD_RATIO_SHIFT) #define BXT_DSI_PLL_PVD_RATIO_1 (1 << BXT_DSI_PLL_PVD_RATIO_SHIFT) +#define BXT_DSIC_16X_BY1 (0 << 10) #define BXT_DSIC_16X_BY2 (1 << 10) #define BXT_DSIC_16X_BY3 (2 << 10) #define BXT_DSIC_16X_BY4 (3 << 10) #define BXT_DSIC_16X_MASK (3 << 10) +#define BXT_DSIA_16X_BY1 (0 << 8) #define BXT_DSIA_16X_BY2 (1 << 8) #define BXT_DSIA_16X_BY3 (2 << 8) #define BXT_DSIA_16X_BY4 (3 << 8) @@ -8350,6 +8352,8 @@ enum { #define BXT_DSI_PLL_RATIO_MAX 0x7D #define BXT_DSI_PLL_RATIO_MIN 0x22 +#define GLK_DSI_PLL_RATIO_MAX 0x6F +#define GLK_DSI_PLL_RATIO_MIN 0x22 #define BXT_DSI_PLL_RATIO_MASK 0xFF #define BXT_REF_CLOCK_KHZ 19200 diff --git a/drivers/gpu/drm/i915/intel_dsi_pll.c b/drivers/gpu/drm/i915/intel_dsi_pll.c index 3a7308681360..3edfba862227 100644 --- a/drivers/gpu/drm/i915/intel_dsi_pll.c +++ b/drivers/gpu/drm/i915/intel_dsi_pll.c @@ -426,11 +426,12 @@ static void bxt_dsi_program_clocks(struct drm_device *dev, enum port port, I915_WRITE(BXT_MIPI_CLOCK_CTL, tmp); } -static int bxt_compute_dsi_pll(struct intel_encoder *encoder, +static int gen9lp_compute_dsi_pll(struct intel_encoder *encoder, struct intel_crtc_state *config) { + struct drm_i915_private *dev_priv = to_i915(encoder->base.dev); struct intel_dsi *intel_dsi = enc_to_intel_dsi(&encoder->base); - u8 dsi_ratio; + u8 dsi_ratio, dsi_ratio_min, dsi_ratio_max; u32 dsi_clk; dsi_clk = dsi_clk_from_pclk(intel_dsi->pclk, intel_dsi->pixel_format, @@ -442,11 +443,20 @@ static int bxt_compute_dsi_pll(struct intel_encoder *encoder, * round 'up' the result */ dsi_ratio = DIV_ROUND_UP(dsi_clk * 2, BXT_REF_CLOCK_KHZ); - if (dsi_ratio < BXT_DSI_PLL_RATIO_MIN || - dsi_ratio > BXT_DSI_PLL_RATIO_MAX) { + + if (IS_BROXTON(dev_priv)) { + dsi_ratio_min = BXT_DSI_PLL_RATIO_MIN; + dsi_ratio_max = BXT_DSI_PLL_RATIO_MAX; + } else { + dsi_ratio_min = GLK_DSI_PLL_RATIO_MIN; + dsi_ratio_max = GLK_DSI_PLL_RATIO_MAX; + } + + if (dsi_ratio < dsi_ratio_min || dsi_ratio > dsi_ratio_max) { DRM_ERROR("Cant get a suitable ratio from DSI PLL ratios\n"); return -ECHRNG; - } + } else + DRM_DEBUG_KMS("DSI PLL calculation is Done!!\n"); /* * Program DSI ratio and Select MIPIC and MIPIA PLL output as 8x @@ -458,7 +468,7 @@ static int bxt_compute_dsi_pll(struct intel_encoder *encoder, /* As per recommendation from hardware team, * Prog PVD ratio =1 if dsi ratio <= 50 */ - if (dsi_ratio <= 50) + if (IS_BROXTON(dev_priv) && dsi_ratio <= 50) config->dsi_pll.ctrl |= BXT_DSI_PLL_PVD_RATIO_1; return 0; @@ -518,7 +528,7 @@ int intel_compute_dsi_pll(struct intel_encoder *encoder, if (IS_VALLEYVIEW(dev_priv) || IS_CHERRYVIEW(dev_priv)) return vlv_compute_dsi_pll(encoder, config); else if (IS_GEN9_LP(dev_priv)) - return bxt_compute_dsi_pll(encoder, config); + return gen9lp_compute_dsi_pll(encoder, config); return -ENODEV; } -- cgit From 09a568e7ac2c053a2f51653deb11744d133d41a4 Mon Sep 17 00:00:00 2001 From: Deepak M Date: Fri, 17 Feb 2017 18:13:33 +0530 Subject: drm/i915i/glk: Program MIPI_CLOCK_CTRL only for BXT Register MIPI_CLOCK_CTRL is applicable only for BXT platform. Future platform have other registers to program the escape clock dividers. Signed-off-by: Deepak M Signed-off-by: Madhav Chauhan Reviewed-by: Jani Nikula Signed-off-by: Jani Nikula Link: http://patchwork.freedesktop.org/patch/msgid/1487335415-14766-6-git-send-email-madhav.chauhan@intel.com --- drivers/gpu/drm/i915/intel_dsi_pll.c | 25 +++++++++++++++---------- 1 file changed, 15 insertions(+), 10 deletions(-) diff --git a/drivers/gpu/drm/i915/intel_dsi_pll.c b/drivers/gpu/drm/i915/intel_dsi_pll.c index 3edfba862227..0a9a5c431a04 100644 --- a/drivers/gpu/drm/i915/intel_dsi_pll.c +++ b/drivers/gpu/drm/i915/intel_dsi_pll.c @@ -489,8 +489,10 @@ static void bxt_enable_dsi_pll(struct intel_encoder *encoder, POSTING_READ(BXT_DSI_PLL_CTL); /* Program TX, RX, Dphy clocks */ - for_each_dsi_port(port, intel_dsi->ports) - bxt_dsi_program_clocks(encoder->base.dev, port, config); + if (IS_BROXTON(dev_priv)) { + for_each_dsi_port(port, intel_dsi->ports) + bxt_dsi_program_clocks(encoder->base.dev, port, config); + } /* Enable DSI PLL */ val = I915_READ(BXT_DSI_PLL_ENABLE); @@ -554,19 +556,22 @@ void intel_disable_dsi_pll(struct intel_encoder *encoder) bxt_disable_dsi_pll(encoder); } -static void bxt_dsi_reset_clocks(struct intel_encoder *encoder, enum port port) +static void gen9lp_dsi_reset_clocks(struct intel_encoder *encoder, + enum port port) { u32 tmp; struct drm_device *dev = encoder->base.dev; struct drm_i915_private *dev_priv = to_i915(dev); /* Clear old configurations */ - tmp = I915_READ(BXT_MIPI_CLOCK_CTL); - tmp &= ~(BXT_MIPI_TX_ESCLK_FIXDIV_MASK(port)); - tmp &= ~(BXT_MIPI_RX_ESCLK_UPPER_FIXDIV_MASK(port)); - tmp &= ~(BXT_MIPI_8X_BY3_DIVIDER_MASK(port)); - tmp &= ~(BXT_MIPI_RX_ESCLK_LOWER_FIXDIV_MASK(port)); - I915_WRITE(BXT_MIPI_CLOCK_CTL, tmp); + if (IS_BROXTON(dev_priv)) { + tmp = I915_READ(BXT_MIPI_CLOCK_CTL); + tmp &= ~(BXT_MIPI_TX_ESCLK_FIXDIV_MASK(port)); + tmp &= ~(BXT_MIPI_RX_ESCLK_UPPER_FIXDIV_MASK(port)); + tmp &= ~(BXT_MIPI_8X_BY3_DIVIDER_MASK(port)); + tmp &= ~(BXT_MIPI_RX_ESCLK_LOWER_FIXDIV_MASK(port)); + I915_WRITE(BXT_MIPI_CLOCK_CTL, tmp); + } I915_WRITE(MIPI_EOT_DISABLE(port), CLOCKSTOP); } @@ -575,7 +580,7 @@ void intel_dsi_reset_clocks(struct intel_encoder *encoder, enum port port) struct drm_i915_private *dev_priv = to_i915(encoder->base.dev); if (IS_GEN9_LP(dev_priv)) - bxt_dsi_reset_clocks(encoder, port); + gen9lp_dsi_reset_clocks(encoder, port); else if (IS_VALLEYVIEW(dev_priv) || IS_CHERRYVIEW(dev_priv)) vlv_dsi_reset_clocks(encoder, port); } -- cgit From bcc65700484115dc4f80817de91dc86eeeb8b361 Mon Sep 17 00:00:00 2001 From: Deepak M Date: Fri, 17 Feb 2017 18:13:34 +0530 Subject: drm/i915/glk: Program txesc clock divider for GLK v2: Addressed Jani's Review comments(renamed bit field macros) Txesc clock divider is calculated and programmed for geminilake platform. Signed-off-by: Deepak M Signed-off-by: Madhav Chauhan Reviewed-by: Jani Nikula Signed-off-by: Jani Nikula Link: http://patchwork.freedesktop.org/patch/msgid/1487335415-14766-7-git-send-email-madhav.chauhan@intel.com --- drivers/gpu/drm/i915/i915_reg.h | 5 +++ drivers/gpu/drm/i915/intel_dsi_pll.c | 61 ++++++++++++++++++++++++++++++++++-- 2 files changed, 64 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h index 89093efc9b42..1343170d8f5c 100644 --- a/drivers/gpu/drm/i915/i915_reg.h +++ b/drivers/gpu/drm/i915/i915_reg.h @@ -8251,6 +8251,11 @@ enum { #define _MIPI_PORT(port, a, c) _PORT3(port, a, 0, c) /* ports A and C only */ #define _MMIO_MIPI(port, a, c) _MMIO(_MIPI_PORT(port, a, c)) +#define MIPIO_TXESC_CLK_DIV1 _MMIO(0x160004) +#define GLK_TX_ESC_CLK_DIV1_MASK 0x3FF +#define MIPIO_TXESC_CLK_DIV2 _MMIO(0x160008) +#define GLK_TX_ESC_CLK_DIV2_MASK 0x3FF + /* BXT MIPI clock controls */ #define BXT_MAX_VAR_OUTPUT_KHZ 39500 diff --git a/drivers/gpu/drm/i915/intel_dsi_pll.c b/drivers/gpu/drm/i915/intel_dsi_pll.c index 0a9a5c431a04..7a7617b24cb2 100644 --- a/drivers/gpu/drm/i915/intel_dsi_pll.c +++ b/drivers/gpu/drm/i915/intel_dsi_pll.c @@ -372,6 +372,53 @@ static void vlv_dsi_reset_clocks(struct intel_encoder *encoder, enum port port) ESCAPE_CLOCK_DIVIDER_SHIFT); } +static void glk_dsi_program_esc_clock(struct drm_device *dev, + const struct intel_crtc_state *config) +{ + struct drm_i915_private *dev_priv = to_i915(dev); + u32 dsi_rate = 0; + u32 pll_ratio = 0; + u32 ddr_clk = 0; + u32 div1_value = 0; + u32 div2_value = 0; + u32 txesc1_div = 0; + u32 txesc2_div = 0; + + pll_ratio = config->dsi_pll.ctrl & BXT_DSI_PLL_RATIO_MASK; + + dsi_rate = (BXT_REF_CLOCK_KHZ * pll_ratio) / 2; + + ddr_clk = dsi_rate / 2; + + /* Variable divider value */ + div1_value = DIV_ROUND_CLOSEST(ddr_clk, 20000); + + /* Calculate TXESC1 divider */ + if (div1_value <= 10) + txesc1_div = div1_value; + else if ((div1_value > 10) && (div1_value <= 20)) + txesc1_div = DIV_ROUND_UP(div1_value, 2); + else if ((div1_value > 20) && (div1_value <= 30)) + txesc1_div = DIV_ROUND_UP(div1_value, 4); + else if ((div1_value > 30) && (div1_value <= 40)) + txesc1_div = DIV_ROUND_UP(div1_value, 6); + else if ((div1_value > 40) && (div1_value <= 50)) + txesc1_div = DIV_ROUND_UP(div1_value, 8); + else + txesc1_div = 10; + + /* Calculate TXESC2 divider */ + div2_value = DIV_ROUND_UP(div1_value, txesc1_div); + + if (div2_value < 10) + txesc2_div = div2_value; + else + txesc2_div = 10; + + I915_WRITE(MIPIO_TXESC_CLK_DIV1, txesc1_div & GLK_TX_ESC_CLK_DIV1_MASK); + I915_WRITE(MIPIO_TXESC_CLK_DIV2, txesc2_div & GLK_TX_ESC_CLK_DIV2_MASK); +} + /* Program BXT Mipi clocks and dividers */ static void bxt_dsi_program_clocks(struct drm_device *dev, enum port port, const struct intel_crtc_state *config) @@ -474,7 +521,7 @@ static int gen9lp_compute_dsi_pll(struct intel_encoder *encoder, return 0; } -static void bxt_enable_dsi_pll(struct intel_encoder *encoder, +static void gen9lp_enable_dsi_pll(struct intel_encoder *encoder, const struct intel_crtc_state *config) { struct drm_i915_private *dev_priv = to_i915(encoder->base.dev); @@ -492,6 +539,8 @@ static void bxt_enable_dsi_pll(struct intel_encoder *encoder, if (IS_BROXTON(dev_priv)) { for_each_dsi_port(port, intel_dsi->ports) bxt_dsi_program_clocks(encoder->base.dev, port, config); + } else { + glk_dsi_program_esc_clock(encoder->base.dev, config); } /* Enable DSI PLL */ @@ -543,7 +592,7 @@ void intel_enable_dsi_pll(struct intel_encoder *encoder, if (IS_VALLEYVIEW(dev_priv) || IS_CHERRYVIEW(dev_priv)) vlv_enable_dsi_pll(encoder, config); else if (IS_GEN9_LP(dev_priv)) - bxt_enable_dsi_pll(encoder, config); + gen9lp_enable_dsi_pll(encoder, config); } void intel_disable_dsi_pll(struct intel_encoder *encoder) @@ -571,6 +620,14 @@ static void gen9lp_dsi_reset_clocks(struct intel_encoder *encoder, tmp &= ~(BXT_MIPI_8X_BY3_DIVIDER_MASK(port)); tmp &= ~(BXT_MIPI_RX_ESCLK_LOWER_FIXDIV_MASK(port)); I915_WRITE(BXT_MIPI_CLOCK_CTL, tmp); + } else { + tmp = I915_READ(MIPIO_TXESC_CLK_DIV1); + tmp &= ~GLK_TX_ESC_CLK_DIV1_MASK; + I915_WRITE(MIPIO_TXESC_CLK_DIV1, tmp); + + tmp = I915_READ(MIPIO_TXESC_CLK_DIV2); + tmp &= ~GLK_TX_ESC_CLK_DIV2_MASK; + I915_WRITE(MIPIO_TXESC_CLK_DIV2, tmp); } I915_WRITE(MIPI_EOT_DISABLE(port), CLOCKSTOP); } -- cgit From ebeac38025ff36b76b0f6fa0aeb52729e96bb6bc Mon Sep 17 00:00:00 2001 From: Madhav Chauhan Date: Fri, 17 Feb 2017 18:13:35 +0530 Subject: drm/i915/glk: Validate only DSI PORT A PLL divider As per BSPEC, GLK supports MIPI DSI 8X clk only on PORT A. Therefore only for PORT A PLL divider value should be validated. Signed-off-by: Madhav Chauhan Reviewed-by: Jani Nikula Signed-off-by: Jani Nikula Link: http://patchwork.freedesktop.org/patch/msgid/1487335415-14766-8-git-send-email-madhav.chauhan@intel.com --- drivers/gpu/drm/i915/intel_dsi_pll.c | 19 +++++++++++++------ 1 file changed, 13 insertions(+), 6 deletions(-) diff --git a/drivers/gpu/drm/i915/intel_dsi_pll.c b/drivers/gpu/drm/i915/intel_dsi_pll.c index 7a7617b24cb2..2ff2ee7f3b78 100644 --- a/drivers/gpu/drm/i915/intel_dsi_pll.c +++ b/drivers/gpu/drm/i915/intel_dsi_pll.c @@ -206,17 +206,24 @@ static bool bxt_dsi_pll_is_enabled(struct drm_i915_private *dev_priv) return false; /* - * Both dividers must be programmed with valid values even if only one - * of the PLL is used, see BSpec/Broxton Clocks. Check this here for + * Dividers must be programmed with valid values. As per BSEPC, for + * GEMINLAKE only PORT A divider values are checked while for BXT + * both divider values are validated. Check this here for * paranoia, since BIOS is known to misconfigure PLLs in this way at * times, and since accessing DSI registers with invalid dividers * causes a system hang. */ val = I915_READ(BXT_DSI_PLL_CTL); - if (!(val & BXT_DSIA_16X_MASK) || !(val & BXT_DSIC_16X_MASK)) { - DRM_DEBUG_DRIVER("PLL is enabled with invalid divider settings (%08x)\n", - val); - enabled = false; + if (IS_GEMINILAKE(dev_priv)) { + if (!(val & BXT_DSIA_16X_MASK)) { + DRM_DEBUG_DRIVER("Invalid PLL divider (%08x)\n", val); + enabled = false; + } + } else { + if (!(val & BXT_DSIA_16X_MASK) || !(val & BXT_DSIC_16X_MASK)) { + DRM_DEBUG_DRIVER("Invalid PLL divider (%08x)\n", val); + enabled = false; + } } return enabled; -- cgit From 80166e406e4b88e49b5091cec1ac6b03b6f372ed Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Tue, 28 Feb 2017 08:50:18 +0000 Subject: drm/i915: Consolidate reporting of "missed breadcrumbs" Move the setting of gpu_error->missed_irq_ring bit to a common function so that we can get the debug logging for either path. v2: Add %pF caller Signed-off-by: Chris Wilson Cc: Tvrtko Ursulin Reviewed-by: Tvrtko Ursulin Link: http://patchwork.freedesktop.org/patch/msgid/20170228085018.3225-1-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/intel_breadcrumbs.c | 15 ++++++++++++--- 1 file changed, 12 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/i915/intel_breadcrumbs.c b/drivers/gpu/drm/i915/intel_breadcrumbs.c index 7c7867d65a39..5752f23fd289 100644 --- a/drivers/gpu/drm/i915/intel_breadcrumbs.c +++ b/drivers/gpu/drm/i915/intel_breadcrumbs.c @@ -59,6 +59,16 @@ static unsigned long wait_timeout(void) return round_jiffies_up(jiffies + DRM_I915_HANGCHECK_JIFFIES); } +static noinline void missed_breadcrumb(struct intel_engine_cs *engine) +{ + DRM_DEBUG_DRIVER("%s missed breadcrumb at %pF, irq posted? %s\n", + engine->name, __builtin_return_address(0), + yesno(test_bit(ENGINE_IRQ_BREADCRUMB, + &engine->irq_posted))); + + set_bit(engine->id, &engine->i915->gpu_error.missed_irq_rings); +} + static void intel_breadcrumbs_hangcheck(unsigned long data) { struct intel_engine_cs *engine = (struct intel_engine_cs *)data; @@ -86,8 +96,7 @@ static void intel_breadcrumbs_hangcheck(unsigned long data) * DRM_I915_HANGCHECK_JIFFIES [1.5s]! */ if (intel_engine_wakeup(engine) & ENGINE_WAKEUP_ASLEEP) { - DRM_DEBUG("Hangcheck timer elapsed... %s idle\n", engine->name); - set_bit(engine->id, &engine->i915->gpu_error.missed_irq_rings); + missed_breadcrumb(engine); mod_timer(&engine->breadcrumbs.fake_irq, jiffies + 1); } else { mod_timer(&b->hangcheck, wait_timeout()); @@ -180,7 +189,7 @@ void intel_engine_disarm_breadcrumbs(struct intel_engine_cs *engine) * completion. */ if (__intel_breadcrumbs_wakeup(b) & ENGINE_WAKEUP_ASLEEP) - set_bit(engine->id, &engine->i915->gpu_error.missed_irq_rings); + missed_breadcrumb(engine); __intel_engine_disarm_breadcrumbs(engine); -- cgit From 3870b89a810bc7a3e26ec469ee86077c1de6f9b2 Mon Sep 17 00:00:00 2001 From: Hans de Goede Date: Tue, 28 Feb 2017 11:26:16 +0200 Subject: drm/i915/dsi: Move calling of wait_for_dsi_fifo_empty to mipi_exec_send_packet Instead of calling wait_for_dsi_fifo_empty on all dsi ports after calling a drm_panel_foo helper which calls VBT sequences, move it to the VBT mipi_exec_send_packet helper, which is the one VBT instruction which actually puts data in the fifo. This results in a nice cleanup making it clearer what all the steps on intel_dsi_enable / disable are and this also makes the VBT code properly wait till a command has actually been send before executing the next steps (typically a delay) in the VBT sequence. Signed-off-by: Hans de Goede Reviewed-by: Bob Paauwe Acked-by: Jani Nikula Signed-off-by: Jani Nikula Link: http://patchwork.freedesktop.org/patch/msgid/289977b5699e252fea5c211d1d1645f9e79cca79.1488273823.git.jani.nikula@intel.com --- drivers/gpu/drm/i915/intel_dsi.c | 12 +----------- drivers/gpu/drm/i915/intel_dsi.h | 2 ++ drivers/gpu/drm/i915/intel_dsi_panel_vbt.c | 2 ++ 3 files changed, 5 insertions(+), 11 deletions(-) diff --git a/drivers/gpu/drm/i915/intel_dsi.c b/drivers/gpu/drm/i915/intel_dsi.c index f00b4e22f95b..598ac658e361 100644 --- a/drivers/gpu/drm/i915/intel_dsi.c +++ b/drivers/gpu/drm/i915/intel_dsi.c @@ -80,7 +80,7 @@ enum mipi_dsi_pixel_format pixel_format_from_register_bits(u32 fmt) } } -static void wait_for_dsi_fifo_empty(struct intel_dsi *intel_dsi, enum port port) +void wait_for_dsi_fifo_empty(struct intel_dsi *intel_dsi, enum port port) { struct drm_encoder *encoder = &intel_dsi->base.base; struct drm_device *dev = encoder->dev; @@ -525,9 +525,6 @@ static void intel_dsi_enable(struct intel_encoder *encoder) drm_panel_enable(intel_dsi->panel); - for_each_dsi_port(port, intel_dsi->ports) - wait_for_dsi_fifo_empty(intel_dsi, port); - intel_dsi_port_enable(encoder); } @@ -543,7 +540,6 @@ static void intel_dsi_pre_enable(struct intel_encoder *encoder, { struct drm_i915_private *dev_priv = to_i915(encoder->base.dev); struct intel_dsi *intel_dsi = enc_to_intel_dsi(&encoder->base); - enum port port; u32 val; DRM_DEBUG_KMS("\n"); @@ -588,9 +584,6 @@ static void intel_dsi_pre_enable(struct intel_encoder *encoder, drm_panel_prepare(intel_dsi->panel); - for_each_dsi_port(port, intel_dsi->ports) - wait_for_dsi_fifo_empty(intel_dsi, port); - /* Enable port in pre-enable phase itself because as per hw team * recommendation, port should be enabled befor plane & pipe */ intel_dsi_enable(encoder); @@ -672,9 +665,6 @@ static void intel_dsi_disable(struct intel_encoder *encoder) /* if disable packets are sent before sending shutdown packet then in * some next enable sequence send turn on packet error is observed */ drm_panel_disable(intel_dsi->panel); - - for_each_dsi_port(port, intel_dsi->ports) - wait_for_dsi_fifo_empty(intel_dsi, port); } static void intel_dsi_clear_device_ready(struct intel_encoder *encoder) diff --git a/drivers/gpu/drm/i915/intel_dsi.h b/drivers/gpu/drm/i915/intel_dsi.h index 5967ea6d6045..d56782396f60 100644 --- a/drivers/gpu/drm/i915/intel_dsi.h +++ b/drivers/gpu/drm/i915/intel_dsi.h @@ -130,6 +130,8 @@ static inline struct intel_dsi *enc_to_intel_dsi(struct drm_encoder *encoder) return container_of(encoder, struct intel_dsi, base.base); } +void wait_for_dsi_fifo_empty(struct intel_dsi *intel_dsi, enum port port); + bool intel_dsi_pll_is_enabled(struct drm_i915_private *dev_priv); int intel_compute_dsi_pll(struct intel_encoder *encoder, struct intel_crtc_state *config); diff --git a/drivers/gpu/drm/i915/intel_dsi_panel_vbt.c b/drivers/gpu/drm/i915/intel_dsi_panel_vbt.c index 405a0f74f455..7daaa58289e3 100644 --- a/drivers/gpu/drm/i915/intel_dsi_panel_vbt.c +++ b/drivers/gpu/drm/i915/intel_dsi_panel_vbt.c @@ -192,6 +192,8 @@ static const u8 *mipi_exec_send_packet(struct intel_dsi *intel_dsi, break; } + wait_for_dsi_fifo_empty(intel_dsi, port); + out: data += len; -- cgit From 5a2e65e742cac8939ab411f38ef1a36ef927181c Mon Sep 17 00:00:00 2001 From: Hans de Goede Date: Tue, 28 Feb 2017 11:26:17 +0200 Subject: drm/i915/dsi: Merge intel_dsi_disable/enable into their respective callers intel_dsi_disable/enable only have one caller, merge them into their respective callers. Change msleep(2) into usleep_range(2000, 5000) to make checkpatch happy, otherwise no functional changes. The main advantage of this change is that it makes it easier to follow all the steps of the panel enable / disable sequence when reading the code. Signed-off-by: Hans de Goede Acked-by: Jani Nikula Reviewed-by: Bob Paauwe Signed-off-by: Jani Nikula Link: http://patchwork.freedesktop.org/patch/msgid/d7249612e6d2e9639ecd1d8d106ca37d5794f2a4.1488273823.git.jani.nikula@intel.com --- drivers/gpu/drm/i915/intel_dsi.c | 108 ++++++++++++++++----------------------- 1 file changed, 44 insertions(+), 64 deletions(-) diff --git a/drivers/gpu/drm/i915/intel_dsi.c b/drivers/gpu/drm/i915/intel_dsi.c index 598ac658e361..859d4d71f8e1 100644 --- a/drivers/gpu/drm/i915/intel_dsi.c +++ b/drivers/gpu/drm/i915/intel_dsi.c @@ -505,32 +505,6 @@ static void intel_dsi_port_disable(struct intel_encoder *encoder) } } -static void intel_dsi_enable(struct intel_encoder *encoder) -{ - struct drm_device *dev = encoder->base.dev; - struct drm_i915_private *dev_priv = to_i915(dev); - struct intel_dsi *intel_dsi = enc_to_intel_dsi(&encoder->base); - enum port port; - - DRM_DEBUG_KMS("\n"); - - if (is_cmd_mode(intel_dsi)) { - for_each_dsi_port(port, intel_dsi->ports) - I915_WRITE(MIPI_MAX_RETURN_PKT_SIZE(port), 8 * 4); - } else { - msleep(20); /* XXX */ - for_each_dsi_port(port, intel_dsi->ports) - dpi_send_cmd(intel_dsi, TURN_ON, false, port); - msleep(100); - - drm_panel_enable(intel_dsi->panel); - - intel_dsi_port_enable(encoder); - } - - intel_panel_enable_backlight(intel_dsi->attached_connector); -} - static void intel_dsi_prepare(struct intel_encoder *intel_encoder, struct intel_crtc_state *pipe_config); @@ -540,6 +514,7 @@ static void intel_dsi_pre_enable(struct intel_encoder *encoder, { struct drm_i915_private *dev_priv = to_i915(encoder->base.dev); struct intel_dsi *intel_dsi = enc_to_intel_dsi(&encoder->base); + enum port port; u32 val; DRM_DEBUG_KMS("\n"); @@ -586,7 +561,21 @@ static void intel_dsi_pre_enable(struct intel_encoder *encoder, /* Enable port in pre-enable phase itself because as per hw team * recommendation, port should be enabled befor plane & pipe */ - intel_dsi_enable(encoder); + if (is_cmd_mode(intel_dsi)) { + for_each_dsi_port(port, intel_dsi->ports) + I915_WRITE(MIPI_MAX_RETURN_PKT_SIZE(port), 8 * 4); + } else { + msleep(20); /* XXX */ + for_each_dsi_port(port, intel_dsi->ports) + dpi_send_cmd(intel_dsi, TURN_ON, false, port); + msleep(100); + + drm_panel_enable(intel_dsi->panel); + + intel_dsi_port_enable(encoder); + } + + intel_panel_enable_backlight(intel_dsi->attached_connector); } static void intel_dsi_enable_nop(struct intel_encoder *encoder, @@ -631,42 +620,6 @@ static void intel_dsi_pre_disable(struct intel_encoder *encoder, } } -static void intel_dsi_disable(struct intel_encoder *encoder) -{ - struct drm_device *dev = encoder->base.dev; - struct drm_i915_private *dev_priv = to_i915(dev); - struct intel_dsi *intel_dsi = enc_to_intel_dsi(&encoder->base); - enum port port; - u32 temp; - - DRM_DEBUG_KMS("\n"); - - if (is_vid_mode(intel_dsi)) { - for_each_dsi_port(port, intel_dsi->ports) - wait_for_dsi_fifo_empty(intel_dsi, port); - - intel_dsi_port_disable(encoder); - msleep(2); - } - - for_each_dsi_port(port, intel_dsi->ports) { - /* Panel commands can be sent when clock is in LP11 */ - I915_WRITE(MIPI_DEVICE_READY(port), 0x0); - - intel_dsi_reset_clocks(encoder, port); - I915_WRITE(MIPI_EOT_DISABLE(port), CLOCKSTOP); - - temp = I915_READ(MIPI_DSI_FUNC_PRG(port)); - temp &= ~VID_MODE_FORMAT_MASK; - I915_WRITE(MIPI_DSI_FUNC_PRG(port), temp); - - I915_WRITE(MIPI_DEVICE_READY(port), 0x1); - } - /* if disable packets are sent before sending shutdown packet then in - * some next enable sequence send turn on packet error is observed */ - drm_panel_disable(intel_dsi->panel); -} - static void intel_dsi_clear_device_ready(struct intel_encoder *encoder) { struct drm_i915_private *dev_priv = to_i915(encoder->base.dev); @@ -716,11 +669,38 @@ static void intel_dsi_post_disable(struct intel_encoder *encoder, { struct drm_i915_private *dev_priv = to_i915(encoder->base.dev); struct intel_dsi *intel_dsi = enc_to_intel_dsi(&encoder->base); + enum port port; u32 val; DRM_DEBUG_KMS("\n"); - intel_dsi_disable(encoder); + if (is_vid_mode(intel_dsi)) { + for_each_dsi_port(port, intel_dsi->ports) + wait_for_dsi_fifo_empty(intel_dsi, port); + + intel_dsi_port_disable(encoder); + usleep_range(2000, 5000); + } + + for_each_dsi_port(port, intel_dsi->ports) { + /* Panel commands can be sent when clock is in LP11 */ + I915_WRITE(MIPI_DEVICE_READY(port), 0x0); + + intel_dsi_reset_clocks(encoder, port); + I915_WRITE(MIPI_EOT_DISABLE(port), CLOCKSTOP); + + val = I915_READ(MIPI_DSI_FUNC_PRG(port)); + val &= ~VID_MODE_FORMAT_MASK; + I915_WRITE(MIPI_DSI_FUNC_PRG(port), val); + + I915_WRITE(MIPI_DEVICE_READY(port), 0x1); + } + + /* + * if disable packets are sent before sending shutdown packet then in + * some next enable sequence send turn on packet error is observed + */ + drm_panel_disable(intel_dsi->panel); intel_dsi_clear_device_ready(encoder); -- cgit From c7991ecad6a99664506cf290118ecec985854011 Mon Sep 17 00:00:00 2001 From: Hans de Goede Date: Tue, 28 Feb 2017 11:26:18 +0200 Subject: drm/i915/dsi: Add intel_dsi_unprepare() helper The enable path has an intel_dsi_prepare() helper which prepares various registers for the mode-set. Move the code undoing this to a new intel_dsi_unprepare() helper function for better symmetry between the enable and disable paths. No functional changes. Signed-off-by: Hans de Goede Acked-by: Jani Nikula Reviewed-by: Bob Paauwe Signed-off-by: Jani Nikula Link: http://patchwork.freedesktop.org/patch/msgid/cc0baaf04ea74a20031b4b5bb128591dcfa78406.1488273823.git.jani.nikula@intel.com --- drivers/gpu/drm/i915/intel_dsi.c | 37 ++++++++++++++++++++++++------------- 1 file changed, 24 insertions(+), 13 deletions(-) diff --git a/drivers/gpu/drm/i915/intel_dsi.c b/drivers/gpu/drm/i915/intel_dsi.c index 859d4d71f8e1..1f839a837e9c 100644 --- a/drivers/gpu/drm/i915/intel_dsi.c +++ b/drivers/gpu/drm/i915/intel_dsi.c @@ -507,6 +507,7 @@ static void intel_dsi_port_disable(struct intel_encoder *encoder) static void intel_dsi_prepare(struct intel_encoder *intel_encoder, struct intel_crtc_state *pipe_config); +static void intel_dsi_unprepare(struct intel_encoder *encoder); static void intel_dsi_pre_enable(struct intel_encoder *encoder, struct intel_crtc_state *pipe_config, @@ -682,19 +683,7 @@ static void intel_dsi_post_disable(struct intel_encoder *encoder, usleep_range(2000, 5000); } - for_each_dsi_port(port, intel_dsi->ports) { - /* Panel commands can be sent when clock is in LP11 */ - I915_WRITE(MIPI_DEVICE_READY(port), 0x0); - - intel_dsi_reset_clocks(encoder, port); - I915_WRITE(MIPI_EOT_DISABLE(port), CLOCKSTOP); - - val = I915_READ(MIPI_DSI_FUNC_PRG(port)); - val &= ~VID_MODE_FORMAT_MASK; - I915_WRITE(MIPI_DSI_FUNC_PRG(port), val); - - I915_WRITE(MIPI_DEVICE_READY(port), 0x1); - } + intel_dsi_unprepare(encoder); /* * if disable packets are sent before sending shutdown packet then in @@ -1310,6 +1299,28 @@ static void intel_dsi_prepare(struct intel_encoder *intel_encoder, } } +static void intel_dsi_unprepare(struct intel_encoder *encoder) +{ + struct drm_i915_private *dev_priv = to_i915(encoder->base.dev); + struct intel_dsi *intel_dsi = enc_to_intel_dsi(&encoder->base); + enum port port; + u32 val; + + for_each_dsi_port(port, intel_dsi->ports) { + /* Panel commands can be sent when clock is in LP11 */ + I915_WRITE(MIPI_DEVICE_READY(port), 0x0); + + intel_dsi_reset_clocks(encoder, port); + I915_WRITE(MIPI_EOT_DISABLE(port), CLOCKSTOP); + + val = I915_READ(MIPI_DSI_FUNC_PRG(port)); + val &= ~VID_MODE_FORMAT_MASK; + I915_WRITE(MIPI_DSI_FUNC_PRG(port), val); + + I915_WRITE(MIPI_DEVICE_READY(port), 0x1); + } +} + static int intel_dsi_get_modes(struct drm_connector *connector) { struct intel_connector *intel_connector = to_intel_connector(connector); -- cgit From 14be7a5c29c2bf3b9567da1e41a514c4bbcdacdf Mon Sep 17 00:00:00 2001 From: Hans de Goede Date: Tue, 28 Feb 2017 11:26:19 +0200 Subject: drm/i915/dsi: Move intel_dsi_clear_device_ready() Move the intel_dsi_clear_device_ready() function to higher up in intel_dsi.c this pairs it with intel_dsi_device_ready(); and pairs intel_dsi_*enable* with intel_dsi_*disable without intel_dsi_clear_device_ready() sitting in the middle of them. This commit purely moves code around, it does not make any changes what-so-ever. Signed-off-by: Hans de Goede Acked-by: Jani Nikula Reviewed-by: Bob Paauwe Signed-off-by: Jani Nikula Link: http://patchwork.freedesktop.org/patch/msgid/f971d18ea6d350890447860aeb541dba072a6e47.1488273823.git.jani.nikula@intel.com --- drivers/gpu/drm/i915/intel_dsi.c | 86 ++++++++++++++++++++-------------------- 1 file changed, 43 insertions(+), 43 deletions(-) diff --git a/drivers/gpu/drm/i915/intel_dsi.c b/drivers/gpu/drm/i915/intel_dsi.c index 1f839a837e9c..4cca31061a6f 100644 --- a/drivers/gpu/drm/i915/intel_dsi.c +++ b/drivers/gpu/drm/i915/intel_dsi.c @@ -433,6 +433,49 @@ static void intel_dsi_device_ready(struct intel_encoder *encoder) bxt_dsi_device_ready(encoder); } +static void intel_dsi_clear_device_ready(struct intel_encoder *encoder) +{ + struct drm_i915_private *dev_priv = to_i915(encoder->base.dev); + struct intel_dsi *intel_dsi = enc_to_intel_dsi(&encoder->base); + enum port port; + + DRM_DEBUG_KMS("\n"); + for_each_dsi_port(port, intel_dsi->ports) { + /* Common bit for both MIPI Port A & MIPI Port C on VLV/CHV */ + i915_reg_t port_ctrl = IS_GEN9_LP(dev_priv) ? + BXT_MIPI_PORT_CTRL(port) : MIPI_PORT_CTRL(PORT_A); + u32 val; + + I915_WRITE(MIPI_DEVICE_READY(port), DEVICE_READY | + ULPS_STATE_ENTER); + usleep_range(2000, 2500); + + I915_WRITE(MIPI_DEVICE_READY(port), DEVICE_READY | + ULPS_STATE_EXIT); + usleep_range(2000, 2500); + + I915_WRITE(MIPI_DEVICE_READY(port), DEVICE_READY | + ULPS_STATE_ENTER); + usleep_range(2000, 2500); + + /* Wait till Clock lanes are in LP-00 state for MIPI Port A + * only. MIPI Port C has no similar bit for checking + */ + if (intel_wait_for_register(dev_priv, + port_ctrl, AFE_LATCHOUT, 0, + 30)) + DRM_ERROR("DSI LP not going Low\n"); + + /* Disable MIPI PHY transparent latch */ + val = I915_READ(port_ctrl); + I915_WRITE(port_ctrl, val & ~LP_OUTPUT_HOLD); + usleep_range(1000, 1500); + + I915_WRITE(MIPI_DEVICE_READY(port), 0x00); + usleep_range(2000, 2500); + } +} + static void intel_dsi_port_enable(struct intel_encoder *encoder) { struct drm_device *dev = encoder->base.dev; @@ -621,49 +664,6 @@ static void intel_dsi_pre_disable(struct intel_encoder *encoder, } } -static void intel_dsi_clear_device_ready(struct intel_encoder *encoder) -{ - struct drm_i915_private *dev_priv = to_i915(encoder->base.dev); - struct intel_dsi *intel_dsi = enc_to_intel_dsi(&encoder->base); - enum port port; - - DRM_DEBUG_KMS("\n"); - for_each_dsi_port(port, intel_dsi->ports) { - /* Common bit for both MIPI Port A & MIPI Port C on VLV/CHV */ - i915_reg_t port_ctrl = IS_GEN9_LP(dev_priv) ? - BXT_MIPI_PORT_CTRL(port) : MIPI_PORT_CTRL(PORT_A); - u32 val; - - I915_WRITE(MIPI_DEVICE_READY(port), DEVICE_READY | - ULPS_STATE_ENTER); - usleep_range(2000, 2500); - - I915_WRITE(MIPI_DEVICE_READY(port), DEVICE_READY | - ULPS_STATE_EXIT); - usleep_range(2000, 2500); - - I915_WRITE(MIPI_DEVICE_READY(port), DEVICE_READY | - ULPS_STATE_ENTER); - usleep_range(2000, 2500); - - /* Wait till Clock lanes are in LP-00 state for MIPI Port A - * only. MIPI Port C has no similar bit for checking - */ - if (intel_wait_for_register(dev_priv, - port_ctrl, AFE_LATCHOUT, 0, - 30)) - DRM_ERROR("DSI LP not going Low\n"); - - /* Disable MIPI PHY transparent latch */ - val = I915_READ(port_ctrl); - I915_WRITE(port_ctrl, val & ~LP_OUTPUT_HOLD); - usleep_range(1000, 1500); - - I915_WRITE(MIPI_DEVICE_READY(port), 0x00); - usleep_range(2000, 2500); - } -} - static void intel_dsi_post_disable(struct intel_encoder *encoder, struct intel_crtc_state *pipe_config, struct drm_connector_state *conn_state) -- cgit From 18a00095a5f3b73cabab513119db9275afb62321 Mon Sep 17 00:00:00 2001 From: Hans de Goede Date: Tue, 28 Feb 2017 11:26:20 +0200 Subject: drm/i915/dsi: Make intel_dsi_enable/disable directly exec VBT sequences The drm_panel_enable/disable and drm_panel_prepare/unprepare calls are not fine grained enough to abstract all the different steps we need to take (and VBT sequences we need to exec) properly. So simply remove the panel _enable/disable and prepare/unprepare callbacks and instead export intel_dsi_exec_vbt_sequence() from intel_dsi_panel_vbt.c and call that from intel_dsi_enable/disable(). No functional changes. Signed-off-by: Hans de Goede Reviewed-by: Bob Paauwe Signed-off-by: Jani Nikula Link: http://patchwork.freedesktop.org/patch/msgid/b4ca5185d4788d92df2ed60837a24b8962a8e8ba.1488273823.git.jani.nikula@intel.com --- drivers/gpu/drm/i915/intel_dsi.c | 14 +++++++--- drivers/gpu/drm/i915/intel_dsi.h | 3 +++ drivers/gpu/drm/i915/intel_dsi_panel_vbt.c | 43 ++---------------------------- 3 files changed, 15 insertions(+), 45 deletions(-) diff --git a/drivers/gpu/drm/i915/intel_dsi.c b/drivers/gpu/drm/i915/intel_dsi.c index 4cca31061a6f..a4ff1e64c030 100644 --- a/drivers/gpu/drm/i915/intel_dsi.c +++ b/drivers/gpu/drm/i915/intel_dsi.c @@ -601,7 +601,10 @@ static void intel_dsi_pre_enable(struct intel_encoder *encoder, /* put device in ready state */ intel_dsi_device_ready(encoder); - drm_panel_prepare(intel_dsi->panel); + intel_dsi_exec_vbt_sequence(intel_dsi, MIPI_SEQ_ASSERT_RESET); + intel_dsi_exec_vbt_sequence(intel_dsi, MIPI_SEQ_POWER_ON); + intel_dsi_exec_vbt_sequence(intel_dsi, MIPI_SEQ_DEASSERT_RESET); + intel_dsi_exec_vbt_sequence(intel_dsi, MIPI_SEQ_INIT_OTP); /* Enable port in pre-enable phase itself because as per hw team * recommendation, port should be enabled befor plane & pipe */ @@ -614,7 +617,8 @@ static void intel_dsi_pre_enable(struct intel_encoder *encoder, dpi_send_cmd(intel_dsi, TURN_ON, false, port); msleep(100); - drm_panel_enable(intel_dsi->panel); + intel_dsi_exec_vbt_sequence(intel_dsi, MIPI_SEQ_DISPLAY_ON); + intel_dsi_exec_vbt_sequence(intel_dsi, MIPI_SEQ_BACKLIGHT_ON); intel_dsi_port_enable(encoder); } @@ -689,7 +693,8 @@ static void intel_dsi_post_disable(struct intel_encoder *encoder, * if disable packets are sent before sending shutdown packet then in * some next enable sequence send turn on packet error is observed */ - drm_panel_disable(intel_dsi->panel); + intel_dsi_exec_vbt_sequence(intel_dsi, MIPI_SEQ_BACKLIGHT_OFF); + intel_dsi_exec_vbt_sequence(intel_dsi, MIPI_SEQ_DISPLAY_OFF); intel_dsi_clear_device_ready(encoder); @@ -714,7 +719,8 @@ static void intel_dsi_post_disable(struct intel_encoder *encoder, I915_WRITE(DSPCLK_GATE_D, val); } - drm_panel_unprepare(intel_dsi->panel); + intel_dsi_exec_vbt_sequence(intel_dsi, MIPI_SEQ_ASSERT_RESET); + intel_dsi_exec_vbt_sequence(intel_dsi, MIPI_SEQ_POWER_OFF); msleep(intel_dsi->panel_off_delay); diff --git a/drivers/gpu/drm/i915/intel_dsi.h b/drivers/gpu/drm/i915/intel_dsi.h index d56782396f60..548649158abd 100644 --- a/drivers/gpu/drm/i915/intel_dsi.h +++ b/drivers/gpu/drm/i915/intel_dsi.h @@ -132,6 +132,9 @@ static inline struct intel_dsi *enc_to_intel_dsi(struct drm_encoder *encoder) void wait_for_dsi_fifo_empty(struct intel_dsi *intel_dsi, enum port port); +void intel_dsi_exec_vbt_sequence(struct intel_dsi *intel_dsi, + enum mipi_seq seq_id); + bool intel_dsi_pll_is_enabled(struct drm_i915_private *dev_priv); int intel_compute_dsi_pll(struct intel_encoder *encoder, struct intel_crtc_state *config); diff --git a/drivers/gpu/drm/i915/intel_dsi_panel_vbt.c b/drivers/gpu/drm/i915/intel_dsi_panel_vbt.c index 7daaa58289e3..ab922b6eb36a 100644 --- a/drivers/gpu/drm/i915/intel_dsi_panel_vbt.c +++ b/drivers/gpu/drm/i915/intel_dsi_panel_vbt.c @@ -426,10 +426,9 @@ static const char *sequence_name(enum mipi_seq seq_id) return "(unknown)"; } -static void generic_exec_sequence(struct drm_panel *panel, enum mipi_seq seq_id) +void intel_dsi_exec_vbt_sequence(struct intel_dsi *intel_dsi, + enum mipi_seq seq_id) { - struct vbt_panel *vbt_panel = to_vbt_panel(panel); - struct intel_dsi *intel_dsi = vbt_panel->intel_dsi; struct drm_i915_private *dev_priv = to_i915(intel_dsi->base.base.dev); const u8 *data; fn_mipi_elem_exec mipi_elem_exec; @@ -493,40 +492,6 @@ static void generic_exec_sequence(struct drm_panel *panel, enum mipi_seq seq_id) } } -static int vbt_panel_prepare(struct drm_panel *panel) -{ - generic_exec_sequence(panel, MIPI_SEQ_ASSERT_RESET); - generic_exec_sequence(panel, MIPI_SEQ_POWER_ON); - generic_exec_sequence(panel, MIPI_SEQ_DEASSERT_RESET); - generic_exec_sequence(panel, MIPI_SEQ_INIT_OTP); - - return 0; -} - -static int vbt_panel_unprepare(struct drm_panel *panel) -{ - generic_exec_sequence(panel, MIPI_SEQ_ASSERT_RESET); - generic_exec_sequence(panel, MIPI_SEQ_POWER_OFF); - - return 0; -} - -static int vbt_panel_enable(struct drm_panel *panel) -{ - generic_exec_sequence(panel, MIPI_SEQ_DISPLAY_ON); - generic_exec_sequence(panel, MIPI_SEQ_BACKLIGHT_ON); - - return 0; -} - -static int vbt_panel_disable(struct drm_panel *panel) -{ - generic_exec_sequence(panel, MIPI_SEQ_BACKLIGHT_OFF); - generic_exec_sequence(panel, MIPI_SEQ_DISPLAY_OFF); - - return 0; -} - static int vbt_panel_get_modes(struct drm_panel *panel) { struct vbt_panel *vbt_panel = to_vbt_panel(panel); @@ -550,10 +515,6 @@ static int vbt_panel_get_modes(struct drm_panel *panel) } static const struct drm_panel_funcs vbt_panel_funcs = { - .disable = vbt_panel_disable, - .unprepare = vbt_panel_unprepare, - .prepare = vbt_panel_prepare, - .enable = vbt_panel_enable, .get_modes = vbt_panel_get_modes, }; -- cgit From 1e08a260b178a6cb5547a28153f88661970474e5 Mon Sep 17 00:00:00 2001 From: Hans de Goede Date: Tue, 28 Feb 2017 11:26:21 +0200 Subject: drm/i915/dsi: VLV/CHT Only wait for LP00 on MIPI PORT A On some devices only MIPI PORT C is used, in this case checking the MIPI PORT A CTRL AFE_LATCHOUT bit (there is no such bit for PORT C on VLV/CHT) will result in false positive "DSI LP not going Low" errors as this checks the PORT A clk status. In case both ports are used we have already checked the AFE_LATCHOUT bit when going through the for_each_dsi_port() loop for PORT A and checking the same bit again for PORT C is a no-op. Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=97061 Signed-off-by: Hans de Goede Reviewed-by: Bob Paauwe Signed-off-by: Jani Nikula Link: http://patchwork.freedesktop.org/patch/msgid/242e4438bf29ebffc66eaa182f22b9d60d304bc2.1488273823.git.jani.nikula@intel.com --- drivers/gpu/drm/i915/intel_dsi.c | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/i915/intel_dsi.c b/drivers/gpu/drm/i915/intel_dsi.c index a4ff1e64c030..20ed799714de 100644 --- a/drivers/gpu/drm/i915/intel_dsi.c +++ b/drivers/gpu/drm/i915/intel_dsi.c @@ -458,10 +458,12 @@ static void intel_dsi_clear_device_ready(struct intel_encoder *encoder) ULPS_STATE_ENTER); usleep_range(2000, 2500); - /* Wait till Clock lanes are in LP-00 state for MIPI Port A - * only. MIPI Port C has no similar bit for checking + /* + * On VLV/CHV, wait till Clock lanes are in LP-00 state for MIPI + * Port A only. MIPI Port C has no similar bit for checking. */ - if (intel_wait_for_register(dev_priv, + if ((IS_GEN9_LP(dev_priv) || port == PORT_A) && + intel_wait_for_register(dev_priv, port_ctrl, AFE_LATCHOUT, 0, 30)) DRM_ERROR("DSI LP not going Low\n"); -- cgit From 349ab9192cc32e578ce981936c6ab99ae3a96165 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Tue, 28 Feb 2017 11:28:02 +0000 Subject: drm/i915/guc: Make wq_lock irq-safe Following the use of dma_fence_signal() from within our interrupt handler, we need to make guc->wq_lock also irq-safe. This was done previously as part of the guc scheduler patch (which also started mixing our fences with the interrupt handler), but is now required to fix the current guc submission backend. v4: Document that __i915_guc_submit is always under an irq disabled section v5: Move wq_rsvd adjustment to its own function Fixes: 67b807a89230 ("drm/i915: Delay disabling the user interrupt for breadcrumbs") Signed-off-by: Chris Wilson Cc: Tvrtko Ursulin Cc: Mika Kuoppala Reviewed-by: Michal Wajdeczko Link: http://patchwork.freedesktop.org/patch/msgid/20170228112803.11646-2-chris@chris-wilson.co.uk Reviewed-by: Tvrtko Ursulin --- drivers/gpu/drm/i915/i915_guc_submission.c | 28 ++++++++++++++++++++-------- 1 file changed, 20 insertions(+), 8 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_guc_submission.c b/drivers/gpu/drm/i915/i915_guc_submission.c index beec88a30347..d6a6cf2540a1 100644 --- a/drivers/gpu/drm/i915/i915_guc_submission.c +++ b/drivers/gpu/drm/i915/i915_guc_submission.c @@ -348,7 +348,7 @@ int i915_guc_wq_reserve(struct drm_i915_gem_request *request) u32 freespace; int ret; - spin_lock(&client->wq_lock); + spin_lock_irq(&client->wq_lock); freespace = CIRC_SPACE(client->wq_tail, desc->head, client->wq_size); freespace -= client->wq_rsvd; if (likely(freespace >= wqi_size)) { @@ -358,21 +358,27 @@ int i915_guc_wq_reserve(struct drm_i915_gem_request *request) client->no_wq_space++; ret = -EAGAIN; } - spin_unlock(&client->wq_lock); + spin_unlock_irq(&client->wq_lock); return ret; } +static void guc_client_update_wq_rsvd(struct i915_guc_client *client, int size) +{ + unsigned long flags; + + spin_lock_irqsave(&client->wq_lock, flags); + client->wq_rsvd += size; + spin_unlock_irqrestore(&client->wq_lock, flags); +} + void i915_guc_wq_unreserve(struct drm_i915_gem_request *request) { - const size_t wqi_size = sizeof(struct guc_wq_item); + const int wqi_size = sizeof(struct guc_wq_item); struct i915_guc_client *client = request->i915->guc.execbuf_client; GEM_BUG_ON(READ_ONCE(client->wq_rsvd) < wqi_size); - - spin_lock(&client->wq_lock); - client->wq_rsvd -= wqi_size; - spin_unlock(&client->wq_lock); + guc_client_update_wq_rsvd(client, -wqi_size); } /* Construct a Work Item and append it to the GuC's Work Queue */ @@ -511,6 +517,9 @@ static void __i915_guc_submit(struct drm_i915_gem_request *rq) struct i915_guc_client *client = guc->execbuf_client; int b_ret; + /* We are always called with irqs disabled */ + GEM_BUG_ON(!irqs_disabled()); + spin_lock(&client->wq_lock); guc_wq_item_append(client, rq); @@ -945,16 +954,19 @@ int i915_guc_submission_enable(struct drm_i915_private *dev_priv) /* Take over from manual control of ELSP (execlists) */ for_each_engine(engine, dev_priv, id) { + const int wqi_size = sizeof(struct guc_wq_item); struct drm_i915_gem_request *rq; engine->submit_request = i915_guc_submit; engine->schedule = NULL; /* Replay the current set of previously submitted requests */ + spin_lock_irq(&engine->timeline->lock); list_for_each_entry(rq, &engine->timeline->requests, link) { - client->wq_rsvd += sizeof(struct guc_wq_item); + guc_client_update_wq_rsvd(client, wqi_size); __i915_guc_submit(rq); } + spin_unlock_irq(&engine->timeline->lock); } return 0; -- cgit From 0c33518db7d394d4eef1ce13f9dce32713f14beb Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Tue, 28 Feb 2017 11:28:03 +0000 Subject: drm/i915/guc: Reorder __i915_guc_submit to reduce spinlock holdtime A couple of operations, the flushes and the tracepoint, do not require serialisation by client->wq_lock, so move them before we take it. Signed-off-by: Chris Wilson Cc: Tvrtko Ursulin Link: http://patchwork.freedesktop.org/patch/msgid/20170228112803.11646-3-chris@chris-wilson.co.uk Reviewed-by: Tvrtko Ursulin --- drivers/gpu/drm/i915/i915_guc_submission.c | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_guc_submission.c b/drivers/gpu/drm/i915/i915_guc_submission.c index d6a6cf2540a1..7b535a32fc27 100644 --- a/drivers/gpu/drm/i915/i915_guc_submission.c +++ b/drivers/gpu/drm/i915/i915_guc_submission.c @@ -517,18 +517,18 @@ static void __i915_guc_submit(struct drm_i915_gem_request *rq) struct i915_guc_client *client = guc->execbuf_client; int b_ret; - /* We are always called with irqs disabled */ - GEM_BUG_ON(!irqs_disabled()); - - spin_lock(&client->wq_lock); - guc_wq_item_append(client, rq); - /* WA to flush out the pending GMADR writes to ring buffer. */ if (i915_vma_is_map_and_fenceable(rq->ring->vma)) POSTING_READ_FW(GUC_STATUS); trace_i915_gem_request_in(rq, 0); + /* We are always called with irqs disabled */ + GEM_BUG_ON(!irqs_disabled()); + + spin_lock(&client->wq_lock); + + guc_wq_item_append(client, rq); b_ret = guc_ring_doorbell(client); client->submissions[engine_id] += 1; @@ -538,6 +538,7 @@ static void __i915_guc_submit(struct drm_i915_gem_request *rq) guc->submissions[engine_id] += 1; guc->last_seqno[engine_id] = rq->global_seqno; + spin_unlock(&client->wq_lock); } -- cgit From f4c3a88e5f044e0200cd4b03a4d86cd6b6306ec4 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Tue, 28 Feb 2017 14:55:19 +0000 Subject: drm/i915: Tighten mmio arrays for MIPI_PORT MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit drivers/gpu/drm/i915/intel_dsi.c: In function ‘intel_dsi_prepare’: drivers/gpu/drm/i915/intel_dsi.c:1308:1: error: the frame size of 2488 bytes is larger than 2048 bytes [-Werror=frame-larger-than=] which is caused by the compiling expanding every _MIPI_PORT into an on-stack array of u32[3] at every callsite. Not sure why only one machine/compiler appears susceptible, but with a minor tweak to _MIPI_PORT we can defer the error until later. This is a partial revert of commit ce64645d86ac ("drm/i915: use variadic macros and arrays to choose port/pipe based registers") for a particular bad offender. Signed-off-by: Chris Wilson Cc: Jani Nikula Link: http://patchwork.freedesktop.org/patch/msgid/20170228145519.18012-1-chris@chris-wilson.co.uk Acked-by: Jani Nikula --- drivers/gpu/drm/i915/i915_reg.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h index 1343170d8f5c..0d9ae27fddff 100644 --- a/drivers/gpu/drm/i915/i915_reg.h +++ b/drivers/gpu/drm/i915/i915_reg.h @@ -8248,7 +8248,7 @@ enum { /* MIPI DSI registers */ -#define _MIPI_PORT(port, a, c) _PORT3(port, a, 0, c) /* ports A and C only */ +#define _MIPI_PORT(port, a, c) ((port) ? c : a) /* ports A and C only */ #define _MMIO_MIPI(port, a, c) _MMIO(_MIPI_PORT(port, a, c)) #define MIPIO_TXESC_CLK_DIV1 _MMIO(0x160004) -- cgit From 4644848369c0c06352e9ba3bad830c9dd7281380 Mon Sep 17 00:00:00 2001 From: Deepak M Date: Wed, 1 Mar 2017 12:51:33 +0530 Subject: drm/i915/glk: Add MIPIIO Enable/disable sequence v2: Addressed Jani's Review comments(renamed bit field macros) v3: Jani's Review comment for aligning code to platforms and added wrapper functions. v4: Corrected enable/disable seuqence as per BSPEC v5: Corrected waiting twice for same bit (Review comments: Jani) v6: Rebased to Han's patches(dsi restructuring code) Signed-off-by: Deepak M Signed-off-by: Madhav Chauhan Signed-off-by: Jani Nikula Link: http://patchwork.freedesktop.org/patch/msgid/1488352893-29916-2-git-send-email-madhav.chauhan@intel.com --- drivers/gpu/drm/i915/intel_dsi.c | 206 ++++++++++++++++++++++++++++++++++++--- 1 file changed, 195 insertions(+), 11 deletions(-) diff --git a/drivers/gpu/drm/i915/intel_dsi.c b/drivers/gpu/drm/i915/intel_dsi.c index 20ed799714de..8e97bae378f1 100644 --- a/drivers/gpu/drm/i915/intel_dsi.c +++ b/drivers/gpu/drm/i915/intel_dsi.c @@ -357,6 +357,109 @@ static bool intel_dsi_compute_config(struct intel_encoder *encoder, return true; } +static void glk_dsi_device_ready(struct intel_encoder *encoder) +{ + struct drm_i915_private *dev_priv = to_i915(encoder->base.dev); + struct intel_dsi *intel_dsi = enc_to_intel_dsi(&encoder->base); + enum port port; + u32 tmp, val; + + /* Set the MIPI mode + * If MIPI_Mode is off, then writing to LP_Wake bit is not reflecting. + * Power ON MIPI IO first and then write into IO reset and LP wake bits + */ + for_each_dsi_port(port, intel_dsi->ports) { + tmp = I915_READ(MIPI_CTRL(port)); + I915_WRITE(MIPI_CTRL(port), tmp | GLK_MIPIIO_ENABLE); + } + + /* Put the IO into reset */ + tmp = I915_READ(MIPI_CTRL(PORT_A)); + tmp &= ~GLK_MIPIIO_RESET_RELEASED; + I915_WRITE(MIPI_CTRL(PORT_A), tmp); + + /* Program LP Wake */ + for_each_dsi_port(port, intel_dsi->ports) { + tmp = I915_READ(MIPI_CTRL(port)); + tmp |= GLK_LP_WAKE; + I915_WRITE(MIPI_CTRL(port), tmp); + } + + /* Wait for Pwr ACK */ + for_each_dsi_port(port, intel_dsi->ports) { + if (intel_wait_for_register(dev_priv, + MIPI_CTRL(port), GLK_MIPIIO_PORT_POWERED, + GLK_MIPIIO_PORT_POWERED, 20)) + DRM_ERROR("MIPIO port is powergated\n"); + } + + /* Wait for MIPI PHY status bit to set */ + for_each_dsi_port(port, intel_dsi->ports) { + if (intel_wait_for_register(dev_priv, + MIPI_CTRL(port), GLK_PHY_STATUS_PORT_READY, + GLK_PHY_STATUS_PORT_READY, 20)) + DRM_ERROR("PHY is not ON\n"); + } + + /* Get IO out of reset */ + tmp = I915_READ(MIPI_CTRL(PORT_A)); + I915_WRITE(MIPI_CTRL(PORT_A), tmp | GLK_MIPIIO_RESET_RELEASED); + + /* Get IO out of Low power state*/ + for_each_dsi_port(port, intel_dsi->ports) { + if (!(I915_READ(MIPI_DEVICE_READY(port)) & DEVICE_READY)) { + val = I915_READ(MIPI_DEVICE_READY(port)); + val &= ~ULPS_STATE_MASK; + val |= DEVICE_READY; + I915_WRITE(MIPI_DEVICE_READY(port), val); + usleep_range(10, 15); + } + + /* Enter ULPS */ + val = I915_READ(MIPI_DEVICE_READY(port)); + val &= ~ULPS_STATE_MASK; + val |= (ULPS_STATE_ENTER | DEVICE_READY); + I915_WRITE(MIPI_DEVICE_READY(port), val); + + /* Wait for ULPS Not active */ + if (intel_wait_for_register(dev_priv, + MIPI_CTRL(port), GLK_ULPS_NOT_ACTIVE, + GLK_ULPS_NOT_ACTIVE, 20)) + + /* Exit ULPS */ + val = I915_READ(MIPI_DEVICE_READY(port)); + val &= ~ULPS_STATE_MASK; + val |= (ULPS_STATE_EXIT | DEVICE_READY); + I915_WRITE(MIPI_DEVICE_READY(port), val); + + /* Enter Normal Mode */ + val = I915_READ(MIPI_DEVICE_READY(port)); + val &= ~ULPS_STATE_MASK; + val |= (ULPS_STATE_NORMAL_OPERATION | DEVICE_READY); + I915_WRITE(MIPI_DEVICE_READY(port), val); + + tmp = I915_READ(MIPI_CTRL(port)); + tmp &= ~GLK_LP_WAKE; + I915_WRITE(MIPI_CTRL(port), tmp); + } + + /* Wait for Stop state */ + for_each_dsi_port(port, intel_dsi->ports) { + if (intel_wait_for_register(dev_priv, + MIPI_CTRL(port), GLK_DATA_LANE_STOP_STATE, + GLK_DATA_LANE_STOP_STATE, 20)) + DRM_ERROR("Date lane not in STOP state\n"); + } + + /* Wait for AFE LATCH */ + for_each_dsi_port(port, intel_dsi->ports) { + if (intel_wait_for_register(dev_priv, + BXT_MIPI_PORT_CTRL(port), AFE_LATCHOUT, + AFE_LATCHOUT, 20)) + DRM_ERROR("D-PHY not entering LP-11 state\n"); + } +} + static void bxt_dsi_device_ready(struct intel_encoder *encoder) { struct drm_i915_private *dev_priv = to_i915(encoder->base.dev); @@ -429,11 +532,79 @@ static void intel_dsi_device_ready(struct intel_encoder *encoder) if (IS_VALLEYVIEW(dev_priv) || IS_CHERRYVIEW(dev_priv)) vlv_dsi_device_ready(encoder); - else if (IS_GEN9_LP(dev_priv)) + else if (IS_BROXTON(dev_priv)) bxt_dsi_device_ready(encoder); + else if (IS_GEMINILAKE(dev_priv)) + glk_dsi_device_ready(encoder); } -static void intel_dsi_clear_device_ready(struct intel_encoder *encoder) +static void glk_dsi_enter_low_power_mode(struct intel_encoder *encoder) +{ + struct drm_i915_private *dev_priv = to_i915(encoder->base.dev); + struct intel_dsi *intel_dsi = enc_to_intel_dsi(&encoder->base); + enum port port; + u32 val; + + /* Enter ULPS */ + for_each_dsi_port(port, intel_dsi->ports) { + val = I915_READ(MIPI_DEVICE_READY(port)); + val &= ~ULPS_STATE_MASK; + val |= (ULPS_STATE_ENTER | DEVICE_READY); + I915_WRITE(MIPI_DEVICE_READY(port), val); + } + + /* Wait for MIPI PHY status bit to unset */ + for_each_dsi_port(port, intel_dsi->ports) { + if (intel_wait_for_register(dev_priv, + MIPI_CTRL(port), + GLK_PHY_STATUS_PORT_READY, 0, 20)) + DRM_ERROR("PHY is not turning OFF\n"); + } + + /* Wait for Pwr ACK bit to unset */ + for_each_dsi_port(port, intel_dsi->ports) { + if (intel_wait_for_register(dev_priv, + MIPI_CTRL(port), + GLK_MIPIIO_PORT_POWERED, 0, 20)) + DRM_ERROR("MIPI IO Port is not powergated\n"); + } +} + +static void glk_dsi_disable_mipi_io(struct intel_encoder *encoder) +{ + struct drm_i915_private *dev_priv = to_i915(encoder->base.dev); + struct intel_dsi *intel_dsi = enc_to_intel_dsi(&encoder->base); + enum port port; + u32 tmp; + + /* Put the IO into reset */ + tmp = I915_READ(MIPI_CTRL(PORT_A)); + tmp &= ~GLK_MIPIIO_RESET_RELEASED; + I915_WRITE(MIPI_CTRL(PORT_A), tmp); + + /* Wait for MIPI PHY status bit to unset */ + for_each_dsi_port(port, intel_dsi->ports) { + if (intel_wait_for_register(dev_priv, + MIPI_CTRL(port), + GLK_PHY_STATUS_PORT_READY, 0, 20)) + DRM_ERROR("PHY is not turning OFF\n"); + } + + /* Clear MIPI mode */ + for_each_dsi_port(port, intel_dsi->ports) { + tmp = I915_READ(MIPI_CTRL(port)); + tmp &= ~GLK_MIPIIO_ENABLE; + I915_WRITE(MIPI_CTRL(port), tmp); + } +} + +static void glk_dsi_clear_device_ready(struct intel_encoder *encoder) +{ + glk_dsi_enter_low_power_mode(encoder); + glk_dsi_disable_mipi_io(encoder); +} + +static void vlv_dsi_clear_device_ready(struct intel_encoder *encoder) { struct drm_i915_private *dev_priv = to_i915(encoder->base.dev); struct intel_dsi *intel_dsi = enc_to_intel_dsi(&encoder->base); @@ -670,6 +841,17 @@ static void intel_dsi_pre_disable(struct intel_encoder *encoder, } } +static void intel_dsi_clear_device_ready(struct intel_encoder *encoder) +{ + struct drm_i915_private *dev_priv = to_i915(encoder->base.dev); + + if (IS_VALLEYVIEW(dev_priv) || IS_CHERRYVIEW(dev_priv) || + IS_BROXTON(dev_priv)) + vlv_dsi_clear_device_ready(encoder); + else if (IS_GEMINILAKE(dev_priv)) + glk_dsi_clear_device_ready(encoder); +} + static void intel_dsi_post_disable(struct intel_encoder *encoder, struct intel_crtc_state *pipe_config, struct drm_connector_state *conn_state) @@ -1314,18 +1496,20 @@ static void intel_dsi_unprepare(struct intel_encoder *encoder) enum port port; u32 val; - for_each_dsi_port(port, intel_dsi->ports) { - /* Panel commands can be sent when clock is in LP11 */ - I915_WRITE(MIPI_DEVICE_READY(port), 0x0); + if (!IS_GEMINILAKE(dev_priv)) { + for_each_dsi_port(port, intel_dsi->ports) { + /* Panel commands can be sent when clock is in LP11 */ + I915_WRITE(MIPI_DEVICE_READY(port), 0x0); - intel_dsi_reset_clocks(encoder, port); - I915_WRITE(MIPI_EOT_DISABLE(port), CLOCKSTOP); + intel_dsi_reset_clocks(encoder, port); + I915_WRITE(MIPI_EOT_DISABLE(port), CLOCKSTOP); - val = I915_READ(MIPI_DSI_FUNC_PRG(port)); - val &= ~VID_MODE_FORMAT_MASK; - I915_WRITE(MIPI_DSI_FUNC_PRG(port), val); + val = I915_READ(MIPI_DSI_FUNC_PRG(port)); + val &= ~VID_MODE_FORMAT_MASK; + I915_WRITE(MIPI_DSI_FUNC_PRG(port), val); - I915_WRITE(MIPI_DEVICE_READY(port), 0x1); + I915_WRITE(MIPI_DEVICE_READY(port), 0x1); + } } } -- cgit From 0129936ddda26afd5d9d207c4e86b2425952579f Mon Sep 17 00:00:00 2001 From: Imre Deak Date: Fri, 24 Feb 2017 16:32:10 +0200 Subject: drm/i915/gen9: Increase PCODE request timeout to 50ms MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit After commit 2c7d0602c815277f7cb7c932b091288710d8aba7 Author: Imre Deak Date: Mon Dec 5 18:27:37 2016 +0200 drm/i915/gen9: Fix PCODE polling during CDCLK change notification there is still one report of the CDCLK-change request timing out on a KBL machine, see the Reference link. On that machine the maximum time the request took to succeed was 34ms, so increase the timeout to 50ms. v2: - Change timeout from 100 to 50 ms to maintain the current 50 ms limit for atomic waits in the driver. (Chris, Tvrtko) Reference: https://bugs.freedesktop.org/show_bug.cgi?id=99345 Cc: Ville Syrjälä Cc: Chris Wilson Cc: Tvrtko Ursulin Cc: Signed-off-by: Imre Deak Acked-by: Chris Wilson Link: http://patchwork.freedesktop.org/patch/msgid/1487946730-17162-1-git-send-email-imre.deak@intel.com --- drivers/gpu/drm/i915/intel_pm.c | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/drivers/gpu/drm/i915/intel_pm.c b/drivers/gpu/drm/i915/intel_pm.c index ac0cd82f61e5..c6938350a6c4 100644 --- a/drivers/gpu/drm/i915/intel_pm.c +++ b/drivers/gpu/drm/i915/intel_pm.c @@ -7926,10 +7926,10 @@ static bool skl_pcode_try_request(struct drm_i915_private *dev_priv, u32 mbox, * @timeout_base_ms: timeout for polling with preemption enabled * * Keep resending the @request to @mbox until PCODE acknowledges it, PCODE - * reports an error or an overall timeout of @timeout_base_ms+10 ms expires. + * reports an error or an overall timeout of @timeout_base_ms+50 ms expires. * The request is acknowledged once the PCODE reply dword equals @reply after * applying @reply_mask. Polling is first attempted with preemption enabled - * for @timeout_base_ms and if this times out for another 10 ms with + * for @timeout_base_ms and if this times out for another 50 ms with * preemption disabled. * * Returns 0 on success, %-ETIMEDOUT in case of a timeout, <0 in case of some @@ -7965,14 +7965,15 @@ int skl_pcode_request(struct drm_i915_private *dev_priv, u32 mbox, u32 request, * worst case) _and_ PCODE was busy for some reason even after a * (queued) request and @timeout_base_ms delay. As a workaround retry * the poll with preemption disabled to maximize the number of - * requests. Increase the timeout from @timeout_base_ms to 10ms to + * requests. Increase the timeout from @timeout_base_ms to 50ms to * account for interrupts that could reduce the number of these - * requests. + * requests, and for any quirks of the PCODE firmware that delays + * the request completion. */ DRM_DEBUG_KMS("PCODE timeout, retrying with preemption disabled\n"); WARN_ON_ONCE(timeout_base_ms > 3); preempt_disable(); - ret = wait_for_atomic(COND, 10); + ret = wait_for_atomic(COND, 50); preempt_enable(); out: -- cgit From 9160095c0e89a4fd58b74b64645b0ceaf11d119c Mon Sep 17 00:00:00 2001 From: Jani Nikula Date: Tue, 28 Feb 2017 13:11:43 +0200 Subject: drm/i915: use BUILD_BUG_ON to ensure platform name has been set up Leave the runtime check in place in case the platform variable itself comes from bogus sources. Reviewed-by: Chris Wilson Signed-off-by: Jani Nikula Link: http://patchwork.freedesktop.org/patch/msgid/1488280303-9323-1-git-send-email-jani.nikula@intel.com --- drivers/gpu/drm/i915/i915_drv.h | 1 + drivers/gpu/drm/i915/intel_device_info.c | 2 ++ 2 files changed, 3 insertions(+) diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index 71152656ebbf..48b34a937792 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -889,6 +889,7 @@ enum intel_platform { INTEL_BROXTON, INTEL_KABYLAKE, INTEL_GEMINILAKE, + INTEL_MAX_PLATFORMS }; struct intel_device_info { diff --git a/drivers/gpu/drm/i915/intel_device_info.c b/drivers/gpu/drm/i915/intel_device_info.c index 2e1fd857e625..9fc6ab783008 100644 --- a/drivers/gpu/drm/i915/intel_device_info.c +++ b/drivers/gpu/drm/i915/intel_device_info.c @@ -56,6 +56,8 @@ static const char * const platform_names[] = { const char *intel_platform_name(enum intel_platform platform) { + BUILD_BUG_ON(ARRAY_SIZE(platform_names) != INTEL_MAX_PLATFORMS); + if (WARN_ON_ONCE(platform >= ARRAY_SIZE(platform_names) || platform_names[platform] == NULL)) return ""; -- cgit From 249f6962353117de4f98654b416f581f4216013c Mon Sep 17 00:00:00 2001 From: Hans de Goede Date: Wed, 1 Mar 2017 15:14:57 +0200 Subject: drm/i915/dsi: Document the panel enable / disable sequences from the spec Document the DSI panel enable / disable sequences from the spec, for easy comparison between the code and the spec. Signed-off-by: Hans de Goede Acked-by: Jani Nikula Reviewed-by: Bob Paauwe Signed-off-by: Jani Nikula Link: http://patchwork.freedesktop.org/patch/msgid/1488374106-4949-2-git-send-email-jani.nikula@intel.com --- drivers/gpu/drm/i915/intel_dsi.c | 37 +++++++++++++++++++++++++++++++++++++ 1 file changed, 37 insertions(+) diff --git a/drivers/gpu/drm/i915/intel_dsi.c b/drivers/gpu/drm/i915/intel_dsi.c index 8e97bae378f1..234592c62141 100644 --- a/drivers/gpu/drm/i915/intel_dsi.c +++ b/drivers/gpu/drm/i915/intel_dsi.c @@ -725,6 +725,43 @@ static void intel_dsi_prepare(struct intel_encoder *intel_encoder, struct intel_crtc_state *pipe_config); static void intel_dsi_unprepare(struct intel_encoder *encoder); +/* + * Panel enable/disable sequences from the VBT spec. + * + * Note the spec has AssertReset / DeassertReset swapped from their + * usual naming. We use the normal names to avoid confusion (so below + * they are swapped compared to the spec). + * + * Steps starting with MIPI refer to VBT sequences, note that for v2 + * VBTs several steps which have a VBT in v2 are expected to be handled + * directly by the driver, by directly driving gpios for example. + * + * v2 video mode seq v3 video mode seq command mode seq + * - power on - MIPIPanelPowerOn - power on + * - wait t1+t2 - wait t1+t2 + * - MIPIDeassertResetPin - MIPIDeassertResetPin - MIPIDeassertResetPin + * - io lines to lp-11 - io lines to lp-11 - io lines to lp-11 + * - MIPISendInitialDcsCmds - MIPISendInitialDcsCmds - MIPISendInitialDcsCmds + * - MIPITearOn + * - MIPIDisplayOn + * - turn on DPI - turn on DPI - set pipe to dsr mode + * - MIPIDisplayOn - MIPIDisplayOn + * - wait t5 - wait t5 + * - backlight on - MIPIBacklightOn - backlight on + * ... ... ... issue mem cmds ... + * - backlight off - MIPIBacklightOff - backlight off + * - wait t6 - wait t6 + * - MIPIDisplayOff + * - turn off DPI - turn off DPI - disable pipe dsr mode + * - MIPITearOff + * - MIPIDisplayOff - MIPIDisplayOff + * - io lines to lp-00 - io lines to lp-00 - io lines to lp-00 + * - MIPIAssertResetPin - MIPIAssertResetPin - MIPIAssertResetPin + * - wait t3 - wait t3 + * - power off - MIPIPanelPowerOff - power off + * - wait t4 - wait t4 + */ + static void intel_dsi_pre_enable(struct intel_encoder *encoder, struct intel_crtc_state *pipe_config, struct drm_connector_state *conn_state) -- cgit From 19c17df3cb5e6a7f4cc2b8df63a9409c12c5a610 Mon Sep 17 00:00:00 2001 From: Hans de Goede Date: Wed, 1 Mar 2017 15:14:58 +0200 Subject: drm/i915/dsi: Drop bogus MIPI_SEQ_ASSERT_RESET before POWER_ON intel_dsi_post_disable(), which does the MIPI_SEQ_ASSERT_RESET, will always be called at some point before intel_dsi_pre_enable() making the MIPI_SEQ_ASSERT_RESET in intel_dsi_pre_enable() redundant. In addition, calling MIPI_SEQ_ASSERT_RESET in the enable path goes against the VBT spec. Signed-off-by: Hans de Goede Reviewed-by: Bob Paauwe Signed-off-by: Jani Nikula Link: http://patchwork.freedesktop.org/patch/msgid/1488374106-4949-3-git-send-email-jani.nikula@intel.com --- drivers/gpu/drm/i915/intel_dsi.c | 1 - 1 file changed, 1 deletion(-) diff --git a/drivers/gpu/drm/i915/intel_dsi.c b/drivers/gpu/drm/i915/intel_dsi.c index 234592c62141..f9b0767ef073 100644 --- a/drivers/gpu/drm/i915/intel_dsi.c +++ b/drivers/gpu/drm/i915/intel_dsi.c @@ -811,7 +811,6 @@ static void intel_dsi_pre_enable(struct intel_encoder *encoder, /* put device in ready state */ intel_dsi_device_ready(encoder); - intel_dsi_exec_vbt_sequence(intel_dsi, MIPI_SEQ_ASSERT_RESET); intel_dsi_exec_vbt_sequence(intel_dsi, MIPI_SEQ_POWER_ON); intel_dsi_exec_vbt_sequence(intel_dsi, MIPI_SEQ_DEASSERT_RESET); intel_dsi_exec_vbt_sequence(intel_dsi, MIPI_SEQ_INIT_OTP); -- cgit From c7dc5275bcbda8bff2acfad98a08964017644186 Mon Sep 17 00:00:00 2001 From: Hans de Goede Date: Wed, 1 Mar 2017 15:14:59 +0200 Subject: drm/i915/dsi: Move MIPI_SEQ_POWER_ON/OFF calls together with pmic gpio calls Now that we are no longer bound to the drm_panel_ callbacks, call MIPI_SEQ_POWER_ON/OFF at the proper place. Signed-off-by: Hans de Goede Reviewed-by: Bob Paauwe Signed-off-by: Jani Nikula Link: http://patchwork.freedesktop.org/patch/msgid/1488374106-4949-4-git-send-email-jani.nikula@intel.com --- drivers/gpu/drm/i915/intel_dsi.c | 10 ++++------ 1 file changed, 4 insertions(+), 6 deletions(-) diff --git a/drivers/gpu/drm/i915/intel_dsi.c b/drivers/gpu/drm/i915/intel_dsi.c index f9b0767ef073..ab905474b9dd 100644 --- a/drivers/gpu/drm/i915/intel_dsi.c +++ b/drivers/gpu/drm/i915/intel_dsi.c @@ -793,10 +793,10 @@ static void intel_dsi_pre_enable(struct intel_encoder *encoder, intel_dsi_prepare(encoder, pipe_config); - /* Panel Enable over CRC PMIC */ + /* Power on, try both CRC pmic gpio and VBT */ if (intel_dsi->gpio_panel) gpiod_set_value_cansleep(intel_dsi->gpio_panel, 1); - + intel_dsi_exec_vbt_sequence(intel_dsi, MIPI_SEQ_POWER_ON); msleep(intel_dsi->panel_on_delay); if (IS_VALLEYVIEW(dev_priv) || IS_CHERRYVIEW(dev_priv)) { @@ -811,7 +811,6 @@ static void intel_dsi_pre_enable(struct intel_encoder *encoder, /* put device in ready state */ intel_dsi_device_ready(encoder); - intel_dsi_exec_vbt_sequence(intel_dsi, MIPI_SEQ_POWER_ON); intel_dsi_exec_vbt_sequence(intel_dsi, MIPI_SEQ_DEASSERT_RESET); intel_dsi_exec_vbt_sequence(intel_dsi, MIPI_SEQ_INIT_OTP); @@ -940,11 +939,10 @@ static void intel_dsi_post_disable(struct intel_encoder *encoder, } intel_dsi_exec_vbt_sequence(intel_dsi, MIPI_SEQ_ASSERT_RESET); - intel_dsi_exec_vbt_sequence(intel_dsi, MIPI_SEQ_POWER_OFF); + /* Power off, try both CRC pmic gpio and VBT */ msleep(intel_dsi->panel_off_delay); - - /* Panel Disable over CRC PMIC */ + intel_dsi_exec_vbt_sequence(intel_dsi, MIPI_SEQ_POWER_OFF); if (intel_dsi->gpio_panel) gpiod_set_value_cansleep(intel_dsi->gpio_panel, 0); -- cgit From deae2006a3a8d71ebf2b07a9f8621cfda0b3b6e7 Mon Sep 17 00:00:00 2001 From: Hans de Goede Date: Wed, 1 Mar 2017 15:15:00 +0200 Subject: drm/i915/dsi: Group DPOunit clock gate workaround with PLL enable Move the DPOunit clock gate workaround to directly after the PLL enable. The exact location of the workaround does not matter and there are 2 reasons to group it with the PLL enable: 1) This moves it out of the middle of the init sequence from the spec, making it easier to follow the init sequence / compare it to the spec 2) It is grouped with the pll disable call in intel_dsi_post_disable, so for consistency it should be grouped with the pll enable in intel_dsi_pre_enable Signed-off-by: Hans de Goede Reviewed-by: Bob Paauwe Signed-off-by: Jani Nikula Link: http://patchwork.freedesktop.org/patch/msgid/1488374106-4949-5-git-send-email-jani.nikula@intel.com --- drivers/gpu/drm/i915/intel_dsi.c | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/drivers/gpu/drm/i915/intel_dsi.c b/drivers/gpu/drm/i915/intel_dsi.c index ab905474b9dd..da798a579e11 100644 --- a/drivers/gpu/drm/i915/intel_dsi.c +++ b/drivers/gpu/drm/i915/intel_dsi.c @@ -791,14 +791,6 @@ static void intel_dsi_pre_enable(struct intel_encoder *encoder, I915_WRITE(BXT_P_DSI_REGULATOR_TX_CTRL, 0); } - intel_dsi_prepare(encoder, pipe_config); - - /* Power on, try both CRC pmic gpio and VBT */ - if (intel_dsi->gpio_panel) - gpiod_set_value_cansleep(intel_dsi->gpio_panel, 1); - intel_dsi_exec_vbt_sequence(intel_dsi, MIPI_SEQ_POWER_ON); - msleep(intel_dsi->panel_on_delay); - if (IS_VALLEYVIEW(dev_priv) || IS_CHERRYVIEW(dev_priv)) { u32 val; @@ -808,6 +800,14 @@ static void intel_dsi_pre_enable(struct intel_encoder *encoder, I915_WRITE(DSPCLK_GATE_D, val); } + intel_dsi_prepare(encoder, pipe_config); + + /* Power on, try both CRC pmic gpio and VBT */ + if (intel_dsi->gpio_panel) + gpiod_set_value_cansleep(intel_dsi->gpio_panel, 1); + intel_dsi_exec_vbt_sequence(intel_dsi, MIPI_SEQ_POWER_ON); + msleep(intel_dsi->panel_on_delay); + /* put device in ready state */ intel_dsi_device_ready(encoder); -- cgit From 3e40fa8a3168da060e3835a17cf1ada5a837ddd6 Mon Sep 17 00:00:00 2001 From: Hans de Goede Date: Wed, 1 Mar 2017 15:15:01 +0200 Subject: drm/i915/dsi: Execute MIPI_SEQ_DEASSERT_RESET before calling device_ready() Execute MIPI_SEQ_DEASSERT_RESET before putting the device in ready state (LP-11), this is the sequence in which things should be done according to the spec. Signed-off-by: Hans de Goede Reviewed-by: Bob Paauwe Signed-off-by: Jani Nikula Link: http://patchwork.freedesktop.org/patch/msgid/1488374106-4949-6-git-send-email-jani.nikula@intel.com --- drivers/gpu/drm/i915/intel_dsi.c | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/i915/intel_dsi.c b/drivers/gpu/drm/i915/intel_dsi.c index da798a579e11..b7b590d78b31 100644 --- a/drivers/gpu/drm/i915/intel_dsi.c +++ b/drivers/gpu/drm/i915/intel_dsi.c @@ -808,10 +808,13 @@ static void intel_dsi_pre_enable(struct intel_encoder *encoder, intel_dsi_exec_vbt_sequence(intel_dsi, MIPI_SEQ_POWER_ON); msleep(intel_dsi->panel_on_delay); - /* put device in ready state */ + /* Deassert reset */ + intel_dsi_exec_vbt_sequence(intel_dsi, MIPI_SEQ_DEASSERT_RESET); + + /* Put device in ready state (LP-11) */ intel_dsi_device_ready(encoder); - intel_dsi_exec_vbt_sequence(intel_dsi, MIPI_SEQ_DEASSERT_RESET); + /* Send initialization commands in LP mode */ intel_dsi_exec_vbt_sequence(intel_dsi, MIPI_SEQ_INIT_OTP); /* Enable port in pre-enable phase itself because as per hw team @@ -915,6 +918,7 @@ static void intel_dsi_post_disable(struct intel_encoder *encoder, intel_dsi_exec_vbt_sequence(intel_dsi, MIPI_SEQ_BACKLIGHT_OFF); intel_dsi_exec_vbt_sequence(intel_dsi, MIPI_SEQ_DISPLAY_OFF); + /* Transition to LP-00 */ intel_dsi_clear_device_ready(encoder); if (IS_BROXTON(dev_priv)) { @@ -938,6 +942,7 @@ static void intel_dsi_post_disable(struct intel_encoder *encoder, I915_WRITE(DSPCLK_GATE_D, val); } + /* Assert reset */ intel_dsi_exec_vbt_sequence(intel_dsi, MIPI_SEQ_ASSERT_RESET); /* Power off, try both CRC pmic gpio and VBT */ -- cgit From f5bce6df8868668d547c5757d9da70471ea40e50 Mon Sep 17 00:00:00 2001 From: Hans de Goede Date: Wed, 1 Mar 2017 15:15:02 +0200 Subject: drm/i915/dsi: Group MIPI_SEQ_BACKLIGHT_ON/OFF with panel_[en|dis]able_backlight Execute the MIPI_SEQ_BACKLIGHT_ON/OFF VBT sequences at the same time as we call intel_panel_enable_backlight() / intel_panel_disable_backlight(). Signed-off-by: Hans de Goede Reviewed-by: Bob Paauwe Signed-off-by: Jani Nikula Link: http://patchwork.freedesktop.org/patch/msgid/1488374106-4949-7-git-send-email-jani.nikula@intel.com --- drivers/gpu/drm/i915/intel_dsi.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/i915/intel_dsi.c b/drivers/gpu/drm/i915/intel_dsi.c index b7b590d78b31..b132841587cc 100644 --- a/drivers/gpu/drm/i915/intel_dsi.c +++ b/drivers/gpu/drm/i915/intel_dsi.c @@ -829,12 +829,12 @@ static void intel_dsi_pre_enable(struct intel_encoder *encoder, msleep(100); intel_dsi_exec_vbt_sequence(intel_dsi, MIPI_SEQ_DISPLAY_ON); - intel_dsi_exec_vbt_sequence(intel_dsi, MIPI_SEQ_BACKLIGHT_ON); intel_dsi_port_enable(encoder); } intel_panel_enable_backlight(intel_dsi->attached_connector); + intel_dsi_exec_vbt_sequence(intel_dsi, MIPI_SEQ_BACKLIGHT_ON); } static void intel_dsi_enable_nop(struct intel_encoder *encoder, @@ -860,6 +860,7 @@ static void intel_dsi_pre_disable(struct intel_encoder *encoder, DRM_DEBUG_KMS("\n"); + intel_dsi_exec_vbt_sequence(intel_dsi, MIPI_SEQ_BACKLIGHT_OFF); intel_panel_disable_backlight(intel_dsi->attached_connector); /* @@ -915,7 +916,6 @@ static void intel_dsi_post_disable(struct intel_encoder *encoder, * if disable packets are sent before sending shutdown packet then in * some next enable sequence send turn on packet error is observed */ - intel_dsi_exec_vbt_sequence(intel_dsi, MIPI_SEQ_BACKLIGHT_OFF); intel_dsi_exec_vbt_sequence(intel_dsi, MIPI_SEQ_DISPLAY_OFF); /* Transition to LP-00 */ -- cgit From 398314516446ad12959e29556a3bafb6534a1065 Mon Sep 17 00:00:00 2001 From: Hans de Goede Date: Wed, 1 Mar 2017 15:15:03 +0200 Subject: drm/i915/dsi: Document always using v3 SHUTDOWN / MIPI_SEQ_DISPLAY_OFF order According to the spec for v2 VBTs we should call MIPI_SEQ_DISPLAY_OFF before sending SHUTDOWN, where as for v3 VBTs we should send SHUTDOWN first. Since the v2 order has known issues, we use the v3 order everywhere, add a comment documenting this. Signed-off-by: Hans de Goede Reviewed-by: Bob Paauwe Signed-off-by: Jani Nikula Link: http://patchwork.freedesktop.org/patch/msgid/1488374106-4949-8-git-send-email-jani.nikula@intel.com --- drivers/gpu/drm/i915/intel_dsi.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/drivers/gpu/drm/i915/intel_dsi.c b/drivers/gpu/drm/i915/intel_dsi.c index b132841587cc..5a50645c3a37 100644 --- a/drivers/gpu/drm/i915/intel_dsi.c +++ b/drivers/gpu/drm/i915/intel_dsi.c @@ -872,6 +872,11 @@ static void intel_dsi_pre_disable(struct intel_encoder *encoder, I915_WRITE(MIPI_DEVICE_READY(port), 0); } + /* + * According to the spec we should send SHUTDOWN before + * MIPI_SEQ_DISPLAY_OFF only for v3+ VBTs, but field testing + * has shown that the v3 sequence works for v2 VBTs too + */ if (is_vid_mode(intel_dsi)) { /* Send Shutdown command to the panel in LP mode */ for_each_dsi_port(port, intel_dsi->ports) -- cgit From 7108b436c2b53cf5d9c3a90139973bdee95898c8 Mon Sep 17 00:00:00 2001 From: Hans de Goede Date: Wed, 1 Mar 2017 15:15:04 +0200 Subject: drm/i915/dsi: Execute MIPI_SEQ_TEAR_OFF from intel_dsi_post_disable For v3+ VBTs we should call MIPI_SEQ_TEAR_OFF before MIPI_SEQ_DISPLAY_OFF, v2 VBTs do not have MIPI_SEQ_TEAR_OFF so there this is a nop. Signed-off-by: Hans de Goede Reviewed-by: Bob Paauwe Signed-off-by: Jani Nikula Link: http://patchwork.freedesktop.org/patch/msgid/1488374106-4949-9-git-send-email-jani.nikula@intel.com --- drivers/gpu/drm/i915/intel_dsi.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/gpu/drm/i915/intel_dsi.c b/drivers/gpu/drm/i915/intel_dsi.c index 5a50645c3a37..7dbef67a5fa0 100644 --- a/drivers/gpu/drm/i915/intel_dsi.c +++ b/drivers/gpu/drm/i915/intel_dsi.c @@ -921,6 +921,8 @@ static void intel_dsi_post_disable(struct intel_encoder *encoder, * if disable packets are sent before sending shutdown packet then in * some next enable sequence send turn on packet error is observed */ + if (is_cmd_mode(intel_dsi)) + intel_dsi_exec_vbt_sequence(intel_dsi, MIPI_SEQ_TEAR_OFF); intel_dsi_exec_vbt_sequence(intel_dsi, MIPI_SEQ_DISPLAY_OFF); /* Transition to LP-00 */ -- cgit From 38dec5c0892a4c475d9ac89fea25f8a84e17a464 Mon Sep 17 00:00:00 2001 From: Hans de Goede Date: Wed, 1 Mar 2017 15:15:05 +0200 Subject: drm/i915/dsi: Call MIPI_SEQ_TEAR_ON and DISPLAY_ON for cmd-mode (untested) According to the spec we should call MIPI_SEQ_TEAR_ON and DISPLAY_ON on enable for cmd-mode, just like we already call their counterparts on disable. Note: untested, my panel is a vid-mode panel. Signed-off-by: Hans de Goede Reviewed-by: Bob Paauwe Signed-off-by: Jani Nikula Link: http://patchwork.freedesktop.org/patch/msgid/1488374106-4949-10-git-send-email-jani.nikula@intel.com --- drivers/gpu/drm/i915/intel_dsi.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/gpu/drm/i915/intel_dsi.c b/drivers/gpu/drm/i915/intel_dsi.c index 7dbef67a5fa0..7d5fb9d3fa64 100644 --- a/drivers/gpu/drm/i915/intel_dsi.c +++ b/drivers/gpu/drm/i915/intel_dsi.c @@ -822,6 +822,8 @@ static void intel_dsi_pre_enable(struct intel_encoder *encoder, if (is_cmd_mode(intel_dsi)) { for_each_dsi_port(port, intel_dsi->ports) I915_WRITE(MIPI_MAX_RETURN_PKT_SIZE(port), 8 * 4); + intel_dsi_exec_vbt_sequence(intel_dsi, MIPI_SEQ_TEAR_ON); + intel_dsi_exec_vbt_sequence(intel_dsi, MIPI_SEQ_DISPLAY_ON); } else { msleep(20); /* XXX */ for_each_dsi_port(port, intel_dsi->ports) -- cgit From 25b4620ee822d5f3c2deb3f1358e82763578788f Mon Sep 17 00:00:00 2001 From: Hans de Goede Date: Wed, 1 Mar 2017 15:15:06 +0200 Subject: drm/i915/dsi: Skip delays for v3 VBTs in vid-mode For v3 VBTs in vid-mode the delays are part of the VBT sequences, so we should not also delay ourselves otherwise we get double delays. Signed-off-by: Hans de Goede Reviewed-by: Bob Paauwe Signed-off-by: Jani Nikula Link: http://patchwork.freedesktop.org/patch/msgid/1488374106-4949-11-git-send-email-jani.nikula@intel.com --- drivers/gpu/drm/i915/intel_dsi.c | 19 +++++++++++++++---- 1 file changed, 15 insertions(+), 4 deletions(-) diff --git a/drivers/gpu/drm/i915/intel_dsi.c b/drivers/gpu/drm/i915/intel_dsi.c index 7d5fb9d3fa64..dd38b6835528 100644 --- a/drivers/gpu/drm/i915/intel_dsi.c +++ b/drivers/gpu/drm/i915/intel_dsi.c @@ -725,6 +725,17 @@ static void intel_dsi_prepare(struct intel_encoder *intel_encoder, struct intel_crtc_state *pipe_config); static void intel_dsi_unprepare(struct intel_encoder *encoder); +static void intel_dsi_msleep(struct intel_dsi *intel_dsi, int msec) +{ + struct drm_i915_private *dev_priv = to_i915(intel_dsi->base.base.dev); + + /* For v3 VBTs in vid-mode the delays are part of the VBT sequences */ + if (is_vid_mode(intel_dsi) && dev_priv->vbt.dsi.seq_version >= 3) + return; + + msleep(msec); +} + /* * Panel enable/disable sequences from the VBT spec. * @@ -806,7 +817,7 @@ static void intel_dsi_pre_enable(struct intel_encoder *encoder, if (intel_dsi->gpio_panel) gpiod_set_value_cansleep(intel_dsi->gpio_panel, 1); intel_dsi_exec_vbt_sequence(intel_dsi, MIPI_SEQ_POWER_ON); - msleep(intel_dsi->panel_on_delay); + intel_dsi_msleep(intel_dsi, intel_dsi->panel_on_delay); /* Deassert reset */ intel_dsi_exec_vbt_sequence(intel_dsi, MIPI_SEQ_DEASSERT_RESET); @@ -828,7 +839,7 @@ static void intel_dsi_pre_enable(struct intel_encoder *encoder, msleep(20); /* XXX */ for_each_dsi_port(port, intel_dsi->ports) dpi_send_cmd(intel_dsi, TURN_ON, false, port); - msleep(100); + intel_dsi_msleep(intel_dsi, 100); intel_dsi_exec_vbt_sequence(intel_dsi, MIPI_SEQ_DISPLAY_ON); @@ -955,7 +966,7 @@ static void intel_dsi_post_disable(struct intel_encoder *encoder, intel_dsi_exec_vbt_sequence(intel_dsi, MIPI_SEQ_ASSERT_RESET); /* Power off, try both CRC pmic gpio and VBT */ - msleep(intel_dsi->panel_off_delay); + intel_dsi_msleep(intel_dsi, intel_dsi->panel_off_delay); intel_dsi_exec_vbt_sequence(intel_dsi, MIPI_SEQ_POWER_OFF); if (intel_dsi->gpio_panel) gpiod_set_value_cansleep(intel_dsi->gpio_panel, 0); @@ -964,7 +975,7 @@ static void intel_dsi_post_disable(struct intel_encoder *encoder, * FIXME As we do with eDP, just make a note of the time here * and perform the wait before the next panel power on. */ - msleep(intel_dsi->panel_pwr_cycle_delay); + intel_dsi_msleep(intel_dsi, intel_dsi->panel_pwr_cycle_delay); } static bool intel_dsi_get_hw_state(struct intel_encoder *encoder, -- cgit From 9aceb5c15d84d4b960f5f80fba846c753554d092 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Wed, 1 Mar 2017 15:41:27 +0000 Subject: drm/i915: Fix all intel_framebuffer_init failures to take the error path MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit No more direct return -EINVAL as we have to unwind the obj->framebuffer_references. Fixes: 24dbf51a5517 ("drm/i915: struct_mutex is not required for allocating the framebuffer") Signed-off-by: Chris Wilson Cc: Ville Syrjälä Reviewed-by: Ville Syrjälä Link: http://patchwork.freedesktop.org/patch/msgid/20170301154128.2841-1-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/intel_display.c | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c index 93371c2377b2..6d2597a4a45f 100644 --- a/drivers/gpu/drm/i915/intel_display.c +++ b/drivers/gpu/drm/i915/intel_display.c @@ -14313,7 +14313,7 @@ static int intel_framebuffer_init(struct intel_framebuffer *intel_fb, if (INTEL_INFO(dev_priv)->gen < 4 && tiling != intel_fb_modifier_to_tiling(mode_cmd->modifier[0])) { DRM_DEBUG("tiling_mode must match fb modifier exactly on gen2/3\n"); - return -EINVAL; + goto err; } stride_alignment = intel_fb_stride_alignment(dev_priv, @@ -14358,7 +14358,7 @@ static int intel_framebuffer_init(struct intel_framebuffer *intel_fb, if (INTEL_GEN(dev_priv) > 3) { DRM_DEBUG("unsupported pixel format: %s\n", drm_get_format_name(mode_cmd->pixel_format, &format_name)); - return -EINVAL; + goto err; } break; case DRM_FORMAT_ABGR8888: @@ -14366,7 +14366,7 @@ static int intel_framebuffer_init(struct intel_framebuffer *intel_fb, INTEL_GEN(dev_priv) < 9) { DRM_DEBUG("unsupported pixel format: %s\n", drm_get_format_name(mode_cmd->pixel_format, &format_name)); - return -EINVAL; + goto err; } break; case DRM_FORMAT_XBGR8888: @@ -14375,14 +14375,14 @@ static int intel_framebuffer_init(struct intel_framebuffer *intel_fb, if (INTEL_GEN(dev_priv) < 4) { DRM_DEBUG("unsupported pixel format: %s\n", drm_get_format_name(mode_cmd->pixel_format, &format_name)); - return -EINVAL; + goto err; } break; case DRM_FORMAT_ABGR2101010: if (!IS_VALLEYVIEW(dev_priv) && !IS_CHERRYVIEW(dev_priv)) { DRM_DEBUG("unsupported pixel format: %s\n", drm_get_format_name(mode_cmd->pixel_format, &format_name)); - return -EINVAL; + goto err; } break; case DRM_FORMAT_YUYV: @@ -14392,13 +14392,13 @@ static int intel_framebuffer_init(struct intel_framebuffer *intel_fb, if (INTEL_GEN(dev_priv) < 5) { DRM_DEBUG("unsupported pixel format: %s\n", drm_get_format_name(mode_cmd->pixel_format, &format_name)); - return -EINVAL; + goto err; } break; default: DRM_DEBUG("unsupported pixel format: %s\n", drm_get_format_name(mode_cmd->pixel_format, &format_name)); - return -EINVAL; + goto err; } /* FIXME need to adjust LINOFF/TILEOFF accordingly. */ @@ -14411,7 +14411,7 @@ static int intel_framebuffer_init(struct intel_framebuffer *intel_fb, ret = intel_fill_fb_info(dev_priv, &intel_fb->base); if (ret) - return ret; + goto err; ret = drm_framebuffer_init(obj->base.dev, &intel_fb->base, -- cgit From dd689287b977597a46adf8c3de19d40212f0ea9f Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Wed, 1 Mar 2017 15:41:28 +0000 Subject: drm/i915: Prevent concurrent tiling/framebuffer modifications MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Reintroduce a lock around tiling vs framebuffer creation to prevent modification of the obj->tiling_and_stride whilst the framebuffer is being created. Rather than use struct_mutex once again, use the per-object lock - this will also be required in future to prevent changing the tiling whilst submitting rendering. Reported-by: Ville Syrjälä Fixes: 24dbf51a5517 ("drm/i915: struct_mutex is not required for allocating the framebuffer") Signed-off-by: Chris Wilson Cc: Ville Syrjälä Reviewed-by: Ville Syrjälä Link: http://patchwork.freedesktop.org/patch/msgid/20170301154128.2841-2-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/i915_gem_object.h | 18 +++++++++++++++++- drivers/gpu/drm/i915/i915_gem_shrinker.c | 2 +- drivers/gpu/drm/i915/i915_gem_tiling.c | 9 ++++++++- drivers/gpu/drm/i915/intel_display.c | 25 ++++++++++++++++--------- 4 files changed, 42 insertions(+), 12 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_gem_object.h b/drivers/gpu/drm/i915/i915_gem_object.h index 2fb30a5eb510..1495eeb03382 100644 --- a/drivers/gpu/drm/i915/i915_gem_object.h +++ b/drivers/gpu/drm/i915/i915_gem_object.h @@ -165,7 +165,7 @@ struct drm_i915_gem_object { struct reservation_object *resv; /** References from framebuffers, locks out tiling changes. */ - atomic_t framebuffer_references; + unsigned int framebuffer_references; /** Record of address bit 17 of each page at last unbind. */ unsigned long *bit_17; @@ -260,6 +260,16 @@ extern void drm_gem_object_unreference(struct drm_gem_object *); __deprecated extern void drm_gem_object_unreference_unlocked(struct drm_gem_object *); +static inline void i915_gem_object_lock(struct drm_i915_gem_object *obj) +{ + reservation_object_lock(obj->resv, NULL); +} + +static inline void i915_gem_object_unlock(struct drm_i915_gem_object *obj) +{ + reservation_object_unlock(obj->resv); +} + static inline bool i915_gem_object_is_dead(const struct drm_i915_gem_object *obj) { @@ -306,6 +316,12 @@ i915_gem_object_clear_active_reference(struct drm_i915_gem_object *obj) void __i915_gem_object_release_unless_active(struct drm_i915_gem_object *obj); +static inline bool +i915_gem_object_is_framebuffer(const struct drm_i915_gem_object *obj) +{ + return READ_ONCE(obj->framebuffer_references); +} + static inline unsigned int i915_gem_object_get_tiling(struct drm_i915_gem_object *obj) { diff --git a/drivers/gpu/drm/i915/i915_gem_shrinker.c b/drivers/gpu/drm/i915/i915_gem_shrinker.c index 8bc515e8b2a2..006a8b908f77 100644 --- a/drivers/gpu/drm/i915/i915_gem_shrinker.c +++ b/drivers/gpu/drm/i915/i915_gem_shrinker.c @@ -207,7 +207,7 @@ i915_gem_shrink(struct drm_i915_private *dev_priv, if (!(flags & I915_SHRINK_ACTIVE) && (i915_gem_object_is_active(obj) || - atomic_read(&obj->framebuffer_references))) + i915_gem_object_is_framebuffer(obj))) continue; if (!can_release_pages(obj)) diff --git a/drivers/gpu/drm/i915/i915_gem_tiling.c b/drivers/gpu/drm/i915/i915_gem_tiling.c index 46ade36dcee6..a0d6d4317a49 100644 --- a/drivers/gpu/drm/i915/i915_gem_tiling.c +++ b/drivers/gpu/drm/i915/i915_gem_tiling.c @@ -238,7 +238,7 @@ i915_gem_object_set_tiling(struct drm_i915_gem_object *obj, if ((tiling | stride) == obj->tiling_and_stride) return 0; - if (atomic_read(&obj->framebuffer_references)) + if (i915_gem_object_is_framebuffer(obj)) return -EBUSY; /* We need to rebind the object if its current allocation @@ -258,6 +258,12 @@ i915_gem_object_set_tiling(struct drm_i915_gem_object *obj, if (err) return err; + i915_gem_object_lock(obj); + if (i915_gem_object_is_framebuffer(obj)) { + i915_gem_object_unlock(obj); + return -EBUSY; + } + /* If the memory has unknown (i.e. varying) swizzling, we pin the * pages to prevent them being swapped out and causing corruption * due to the change in swizzling. @@ -294,6 +300,7 @@ i915_gem_object_set_tiling(struct drm_i915_gem_object *obj, } obj->tiling_and_stride = tiling | stride; + i915_gem_object_unlock(obj); /* Force the fence to be reacquired for GTT access */ i915_gem_release_mmap(obj); diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c index 6d2597a4a45f..9d8c08c4b105 100644 --- a/drivers/gpu/drm/i915/intel_display.c +++ b/drivers/gpu/drm/i915/intel_display.c @@ -14185,7 +14185,10 @@ static void intel_user_framebuffer_destroy(struct drm_framebuffer *fb) drm_framebuffer_cleanup(fb); - WARN_ON(atomic_dec_return(&intel_fb->obj->framebuffer_references) < 0); + i915_gem_object_lock(intel_fb->obj); + WARN_ON(!intel_fb->obj->framebuffer_references--); + i915_gem_object_unlock(intel_fb->obj); + i915_gem_object_put(intel_fb->obj); kfree(intel_fb); @@ -14262,12 +14265,16 @@ static int intel_framebuffer_init(struct intel_framebuffer *intel_fb, struct drm_mode_fb_cmd2 *mode_cmd) { struct drm_i915_private *dev_priv = to_i915(obj->base.dev); - unsigned int tiling = i915_gem_object_get_tiling(obj); - u32 pitch_limit, stride_alignment; struct drm_format_name_buf format_name; + u32 pitch_limit, stride_alignment; + unsigned int tiling, stride; int ret = -EINVAL; - atomic_inc(&obj->framebuffer_references); + i915_gem_object_lock(obj); + obj->framebuffer_references++; + tiling = i915_gem_object_get_tiling(obj); + stride = i915_gem_object_get_stride(obj); + i915_gem_object_unlock(obj); if (mode_cmd->flags & DRM_MODE_FB_MODIFIERS) { /* @@ -14339,11 +14346,9 @@ static int intel_framebuffer_init(struct intel_framebuffer *intel_fb, * If there's a fence, enforce that * the fb pitch and fence stride match. */ - if (tiling != I915_TILING_NONE && - mode_cmd->pitches[0] != i915_gem_object_get_stride(obj)) { + if (tiling != I915_TILING_NONE && mode_cmd->pitches[0] != stride) { DRM_DEBUG("pitch (%d) must match tiling stride (%d)\n", - mode_cmd->pitches[0], - i915_gem_object_get_stride(obj)); + mode_cmd->pitches[0], stride); goto err; } @@ -14424,7 +14429,9 @@ static int intel_framebuffer_init(struct intel_framebuffer *intel_fb, return 0; err: - atomic_dec(&obj->framebuffer_references); + i915_gem_object_lock(obj); + obj->framebuffer_references--; + i915_gem_object_unlock(obj); return ret; } -- cgit From 02e012f1727f9c8fe24872c74d91019e35e5897b Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Wed, 1 Mar 2017 12:11:31 +0000 Subject: drm/i915: Move w/a LRI debug message from context-init to driver load The spam of every context initialisation saying the same thing is annoying me! Move the information to the setup of the engine. Signed-off-by: Chris Wilson Cc: Tvrtko Ursulin Link: http://patchwork.freedesktop.org/patch/msgid/20170301121131.11588-1-chris@chris-wilson.co.uk Reviewed-by: Tvrtko Ursulin --- drivers/gpu/drm/i915/intel_engine_cs.c | 38 +++++++++++++++++----------------- 1 file changed, 19 insertions(+), 19 deletions(-) diff --git a/drivers/gpu/drm/i915/intel_engine_cs.c b/drivers/gpu/drm/i915/intel_engine_cs.c index c4d4698f98e7..c58339b22f6a 100644 --- a/drivers/gpu/drm/i915/intel_engine_cs.c +++ b/drivers/gpu/drm/i915/intel_engine_cs.c @@ -1009,30 +1009,32 @@ static int glk_init_workarounds(struct intel_engine_cs *engine) int init_workarounds_ring(struct intel_engine_cs *engine) { struct drm_i915_private *dev_priv = engine->i915; + int err; WARN_ON(engine->id != RCS); dev_priv->workarounds.count = 0; - dev_priv->workarounds.hw_whitelist_count[RCS] = 0; + dev_priv->workarounds.hw_whitelist_count[engine->id] = 0; if (IS_BROADWELL(dev_priv)) - return bdw_init_workarounds(engine); - - if (IS_CHERRYVIEW(dev_priv)) - return chv_init_workarounds(engine); - - if (IS_SKYLAKE(dev_priv)) - return skl_init_workarounds(engine); - - if (IS_BROXTON(dev_priv)) - return bxt_init_workarounds(engine); - - if (IS_KABYLAKE(dev_priv)) - return kbl_init_workarounds(engine); - - if (IS_GEMINILAKE(dev_priv)) - return glk_init_workarounds(engine); + err = bdw_init_workarounds(engine); + else if (IS_CHERRYVIEW(dev_priv)) + err = chv_init_workarounds(engine); + else if (IS_SKYLAKE(dev_priv)) + err = skl_init_workarounds(engine); + else if (IS_BROXTON(dev_priv)) + err = bxt_init_workarounds(engine); + else if (IS_KABYLAKE(dev_priv)) + err = kbl_init_workarounds(engine); + else if (IS_GEMINILAKE(dev_priv)) + err = glk_init_workarounds(engine); + else + err = 0; + if (err) + return err; + DRM_DEBUG_DRIVER("%s: Number of context specific w/a: %d\n", + engine->name, dev_priv->workarounds.count); return 0; } @@ -1066,8 +1068,6 @@ int intel_ring_workarounds_emit(struct drm_i915_gem_request *req) if (ret) return ret; - DRM_DEBUG_DRIVER("Number of Workarounds emitted: %d\n", w->count); - return 0; } -- cgit From a746095c2db08d765222bf0058e76fb329d69e0e Mon Sep 17 00:00:00 2001 From: Ander Conselvan de Oliveira Date: Wed, 1 Mar 2017 16:13:11 +0200 Subject: drm/i915: Enable DDI IO power domains in the DP MST path MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Commit 62b695662a24 ("drm/i915: Only enable DDI IO power domains after enabling DPLL") changed how the DDI IO power domains get enabled, but neglected the need to enable those domains when enabling a DP connector with MST enabled, leading to Kernel panic - not syncing: Timeout: Not all CPUs entered broadcast exception handler Fixes: 62b695662a24 ("drm/i915: Only enable DDI IO power domains after enabling DPLL") Cc: Imre Deak Cc: Ander Conselvan de Oliveira Cc: David Weinehall Cc: Daniel Vetter Cc: Jani Nikula Cc: intel-gfx@lists.freedesktop.org Cc: Ville Syrjälä Reported-by: Ville Syrjälä Signed-off-by: Ander Conselvan de Oliveira Reviewed-by: Imre Deak Link: http://patchwork.freedesktop.org/patch/msgid/20170301141318.3607-2-ander.conselvan.de.oliveira@intel.com --- drivers/gpu/drm/i915/intel_dp_mst.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/gpu/drm/i915/intel_dp_mst.c b/drivers/gpu/drm/i915/intel_dp_mst.c index d94fd4b80963..a8334e18c4fc 100644 --- a/drivers/gpu/drm/i915/intel_dp_mst.c +++ b/drivers/gpu/drm/i915/intel_dp_mst.c @@ -163,6 +163,9 @@ static void intel_mst_pre_enable_dp(struct intel_encoder *encoder, intel_ddi_clk_select(&intel_dig_port->base, pipe_config->shared_dpll); + intel_display_power_get(dev_priv, + intel_dig_port->ddi_io_power_domain); + intel_prepare_dp_ddi_buffers(&intel_dig_port->base); intel_dp_set_link_params(intel_dp, pipe_config->port_clock, -- cgit From c998e8a0f43a9e73c65f6f3e969805090cf4ac93 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Thu, 2 Mar 2017 08:30:29 +0000 Subject: drm/i915: Hold rpm during GEM suspend in driver unload/suspend i915_gem_suspend() tries to access the device to ensure it is idle and all writes from the device are flushed to memory. It assumed is already held the runtime pm wakeref, but we should explicitly acquire it for our access to be safe. [ 619.926287] WARNING: CPU: 3 PID: 9353 at drivers/gpu/drm/i915/intel_drv.h:1750 gen6_write32+0x23e/0x2a0 [i915] [ 619.926300] RPM wakelock ref not held during HW access [ 619.926311] Modules linked in: vgem x86_pkg_temp_thermal intel_powerclamp snd_hda_codec_hdmi snd_hda_codec_generic snd_hda_codec coretemp snd_hwdep crct10dif_pclmul snd_hda_core crc32_pclmul snd_pcm mei_me mei lpc_ich ghash_clmulni_intel i915(-) sdhci_pci sdhci mmc_core e1000e ptp pps_core prime_numbers [last unloaded: snd_hda_intel] [ 619.926578] CPU: 3 PID: 9353 Comm: drv_module_relo Tainted: G U 4.10.0-CI-Trybot_609+ #1 [ 619.926585] Hardware name: LENOVO 42962WU/42962WU, BIOS 8DET56WW (1.26 ) 12/01/2011 [ 619.926592] Call Trace: [ 619.926609] dump_stack+0x67/0x92 [ 619.926625] __warn+0xc6/0xe0 [ 619.926640] warn_slowpath_fmt+0x4a/0x50 [ 619.926726] gen6_write32+0x23e/0x2a0 [i915] [ 619.926801] gen6_mm_switch+0x38/0x70 [i915] [ 619.926871] i915_switch_context+0xec/0xa10 [i915] [ 619.926942] i915_gem_switch_to_kernel_context+0x13c/0x2b0 [i915] [ 619.927019] i915_gem_suspend+0x2b/0x180 [i915] [ 619.927079] i915_driver_unload+0x22/0x200 [i915] [ 619.927093] ? __this_cpu_preempt_check+0x13/0x20 [ 619.927105] ? trace_hardirqs_on_caller+0xe7/0x200 [ 619.927118] ? trace_hardirqs_on+0xd/0x10 [ 619.927128] ? _raw_spin_unlock_irqrestore+0x3d/0x60 [ 619.927192] i915_pci_remove+0x14/0x20 [i915] [ 619.927205] pci_device_remove+0x34/0xb0 [ 619.927219] device_release_driver_internal+0x158/0x210 [ 619.927234] driver_detach+0x3b/0x80 [ 619.927245] bus_remove_driver+0x53/0xd0 [ 619.927256] driver_unregister+0x27/0x50 [ 619.927267] pci_unregister_driver+0x25/0xa0 [ 619.927351] i915_exit+0x1a/0xb1a [i915] [ 619.927362] SyS_delete_module+0x193/0x1e0 [ 619.927378] entry_SYSCALL_64_fastpath+0x1c/0xb1 [ 619.927386] RIP: 0033:0x7f82b46c5d37 [ 619.927393] RSP: 002b:00007ffdb6f610d8 EFLAGS: 00000246 ORIG_RAX: 00000000000000b0 [ 619.927408] RAX: ffffffffffffffda RBX: ffffffff81481ff3 RCX: 00007f82b46c5d37 [ 619.927415] RDX: 0000000000000001 RSI: 0000000000000800 RDI: 000000000224f558 [ 619.927422] RBP: ffffc90001187f88 R08: 0000000000000000 R09: 00007ffdb6f61100 [ 619.927428] R10: 000000000224f4e0 R11: 0000000000000246 R12: 0000000000000000 [ 619.927435] R13: 00007ffdb6f612b0 R14: 0000000000000000 R15: 0000000000000000 [ 619.927451] ? __this_cpu_preempt_check+0x13/0x20 or [ 641.646590] WARNING: CPU: 1 PID: 8913 at drivers/gpu/drm/i915/intel_drv.h:1750 intel_runtime_pm_get_noresume+0x8b/0x90 [i915] [ 641.646595] RPM wakelock ref not held during HW access [ 641.646600] Modules linked in: vgem snd_hda_codec_hdmi snd_hda_codec_generic x86_pkg_temp_thermal intel_powerclamp coretemp snd_hda_codec snd_hwdep crct10dif_pclmul snd_hda_core crc32_pclmul ghash_clmulni_intel snd_pcm mei_me mei i915(-) r8169 mii prime_numbers i2c_hid [last unloaded: snd_hda_intel] [ 641.646825] CPU: 1 PID: 8913 Comm: drv_module_relo Tainted: G U 4.10.0-CI-Trybot_609+ #1 [ 641.646836] Hardware name: TOSHIBA SATELLITE P50-C/06F4 , BIOS 1.20 10/08/2015 [ 641.646843] Call Trace: [ 641.646857] dump_stack+0x67/0x92 [ 641.646869] __warn+0xc6/0xe0 [ 641.646880] warn_slowpath_fmt+0x4a/0x50 [ 641.646893] ? __this_cpu_preempt_check+0x13/0x20 [ 641.646904] ? trace_hardirqs_on_caller+0xe7/0x200 [ 641.646957] intel_runtime_pm_get_noresume+0x8b/0x90 [i915] [ 641.647022] __i915_add_request+0x423/0x540 [i915] [ 641.647080] i915_gem_switch_to_kernel_context+0x148/0x2b0 [i915] [ 641.647145] i915_gem_suspend+0x2b/0x180 [i915] [ 641.647189] i915_driver_unload+0x22/0x200 [i915] [ 641.647200] ? __this_cpu_preempt_check+0x13/0x20 [ 641.647210] ? trace_hardirqs_on_caller+0xe7/0x200 [ 641.647220] ? trace_hardirqs_on+0xd/0x10 [ 641.647231] ? _raw_spin_unlock_irqrestore+0x3d/0x60 [ 641.647276] i915_pci_remove+0x14/0x20 [i915] [ 641.647293] pci_device_remove+0x34/0xb0 [ 641.647307] device_release_driver_internal+0x158/0x210 [ 641.647321] driver_detach+0x3b/0x80 [ 641.647330] bus_remove_driver+0x53/0xd0 [ 641.647338] driver_unregister+0x27/0x50 [ 641.647348] pci_unregister_driver+0x25/0xa0 [ 641.647415] i915_exit+0x1a/0xb1a [i915] [ 641.647429] SyS_delete_module+0x193/0x1e0 [ 641.647444] entry_SYSCALL_64_fastpath+0x1c/0xb1 [ 641.647453] RIP: 0033:0x7fc622bd2d37 [ 641.647463] RSP: 002b:00007ffff8ffb5c8 EFLAGS: 00000246 ORIG_RAX: 00000000000000b0 [ 641.647475] RAX: ffffffffffffffda RBX: ffffffff81481ff3 RCX: 00007fc622bd2d37 [ 641.647480] RDX: 0000000000000001 RSI: 0000000000000800 RDI: 0000000000d49118 [ 641.647485] RBP: ffffc90000997f88 R08: 0000000000000000 R09: 00007ffff8ffb5f0 [ 641.647491] R10: 0000000000d490a0 R11: 0000000000000246 R12: 0000000000000000 [ 641.647498] R13: 00007ffff8ffb7a0 R14: 0000000000000000 R15: 0000000000000000 [ 641.647510] ? __this_cpu_preempt_check+0x13/0x20 v2: Keep holding rpm until the end to cover i915_gem_sanitize() as well. Fixes: 5ab57c702069 ("drm/i915: Flush logical context image out to memory upon suspend") Fixes: 1c777c5d1dc ("drm/i915/hsw: Fix GPU hang during resume from S3-devices state") Signed-off-by: Chris Wilson Link: http://patchwork.freedesktop.org/patch/msgid/20170302083029.19576-1-chris@chris-wilson.co.uk Reviewed-by: Imre Deak Cc: # v4.9+ --- drivers/gpu/drm/i915/i915_gem.c | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index 0ad080984877..275e9e0799b9 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -4244,6 +4244,7 @@ int i915_gem_suspend(struct drm_i915_private *dev_priv) struct drm_device *dev = &dev_priv->drm; int ret; + intel_runtime_pm_get(dev_priv); intel_suspend_gt_powersave(dev_priv); mutex_lock(&dev->struct_mutex); @@ -4258,13 +4259,13 @@ int i915_gem_suspend(struct drm_i915_private *dev_priv) */ ret = i915_gem_switch_to_kernel_context(dev_priv); if (ret) - goto err; + goto err_unlock; ret = i915_gem_wait_for_idle(dev_priv, I915_WAIT_INTERRUPTIBLE | I915_WAIT_LOCKED); if (ret) - goto err; + goto err_unlock; i915_gem_retire_requests(dev_priv); GEM_BUG_ON(dev_priv->gt.active_requests); @@ -4310,11 +4311,12 @@ int i915_gem_suspend(struct drm_i915_private *dev_priv) * machine in an unusable condition. */ i915_gem_sanitize(dev_priv); + goto out_rpm_put; - return 0; - -err: +err_unlock: mutex_unlock(&dev->struct_mutex); +out_rpm_put: + intel_runtime_pm_put(dev_priv); return ret; } -- cgit From 1f58c8e7eac0d4a7a59037dc18dbed2a9b5bd342 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Thu, 2 Mar 2017 07:41:57 +0000 Subject: drm/i915: Restore the invalid access without RPM warning A long time ago we turned off the warning as it was too painful, we had too much broken code. Turn it back on now as we are mostly clean and need to prevent returning to such orangeness. Signed-off-by: Chris Wilson Cc: Imre Deak Link: http://patchwork.freedesktop.org/patch/msgid/20170302074157.21631-2-chris@chris-wilson.co.uk Reviewed-by: Imre Deak --- drivers/gpu/drm/i915/intel_drv.h | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/drivers/gpu/drm/i915/intel_drv.h b/drivers/gpu/drm/i915/intel_drv.h index 741beba0c9c0..eadc72df5a17 100644 --- a/drivers/gpu/drm/i915/intel_drv.h +++ b/drivers/gpu/drm/i915/intel_drv.h @@ -1739,10 +1739,8 @@ static inline void assert_rpm_wakelock_held(struct drm_i915_private *dev_priv) { assert_rpm_device_not_suspended(dev_priv); - /* FIXME: Needs to be converted back to WARN_ONCE, but currently causes - * too much noise. */ - if (!atomic_read(&dev_priv->pm.wakeref_count)) - DRM_DEBUG_DRIVER("RPM wakelock ref not held during HW access"); + WARN_ONCE(!atomic_read(&dev_priv->pm.wakeref_count), + "RPM wakelock ref not held during HW access"); } /** -- cgit From 13e867f6faaafe4d7fdaa4a894a061d041d0dc88 Mon Sep 17 00:00:00 2001 From: Anusha Srivatsa Date: Wed, 1 Mar 2017 11:58:55 -0800 Subject: i915/HuC: Add an extra check for platforms that do not have HUC Return silently without producing much noise on platforms that have a HuC but the firmware is absent. Cc: Ander Conselvan De Oliveira Cc: Rodrigo Vivi Signed-off-by: Anusha Srivatsa Reviewed-by: Ander Conselvan de Oliveira Signed-off-by: Ander Conselvan de Oliveira Link: http://patchwork.freedesktop.org/patch/msgid/1488398335-13121-1-git-send-email-anusha.srivatsa@intel.com --- drivers/gpu/drm/i915/intel_huc.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/i915/intel_huc.c b/drivers/gpu/drm/i915/intel_huc.c index c28543d220a2..e660109fc51e 100644 --- a/drivers/gpu/drm/i915/intel_huc.c +++ b/drivers/gpu/drm/i915/intel_huc.c @@ -181,12 +181,14 @@ void intel_huc_init(struct drm_i915_private *dev_priv) } huc_fw->path = fw_path; + + if (huc_fw->path == NULL) + return; + huc_fw->fetch_status = INTEL_UC_FIRMWARE_PENDING; DRM_DEBUG_DRIVER("HuC firmware pending, path %s\n", fw_path); - WARN(huc_fw->path == NULL, "HuC present but no fw path\n"); - intel_uc_fw_fetch(dev_priv, huc_fw); } -- cgit From 9ce53745edd515ebfc1d5f9bd3ded77dabce27fd Mon Sep 17 00:00:00 2001 From: Madhav Chauhan Date: Thu, 2 Mar 2017 00:01:22 +0530 Subject: drm/i915/glk: Fix DSI enable I/O sequence One of the if statement covers the next line in enable I/O sequence. This patch correct the same by adding error message. Fixes: 4644848369c0 ("drm/i915/glk: Add MIPIIO Enable/disable sequence") Reported-by: Chris Wilson Signed-off-by: Madhav Chauhan Signed-off-by: Jani Nikula Link: http://patchwork.freedesktop.org/patch/msgid/1488393082-30660-1-git-send-email-madhav.chauhan@intel.com --- drivers/gpu/drm/i915/intel_dsi.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/gpu/drm/i915/intel_dsi.c b/drivers/gpu/drm/i915/intel_dsi.c index dd38b6835528..323fc097c3ee 100644 --- a/drivers/gpu/drm/i915/intel_dsi.c +++ b/drivers/gpu/drm/i915/intel_dsi.c @@ -425,6 +425,7 @@ static void glk_dsi_device_ready(struct intel_encoder *encoder) if (intel_wait_for_register(dev_priv, MIPI_CTRL(port), GLK_ULPS_NOT_ACTIVE, GLK_ULPS_NOT_ACTIVE, 20)) + DRM_ERROR("ULPS is still active\n"); /* Exit ULPS */ val = I915_READ(MIPI_DEVICE_READY(port)); -- cgit From a5509abda48ecfc133fac6268e83fc1a93dba039 Mon Sep 17 00:00:00 2001 From: Ville Syrjälä Date: Fri, 17 Feb 2017 17:01:59 +0200 Subject: drm/i915: Fix legacy cursor vs. watermarks for ILK-BDW MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit In order to make cursor updates actually safe wrt. watermark programming we have to clear the legacy_cursor_update flag in the atomic state. That will cause the regular atomic update path to do the necessary vblank wait after the plane update if needed, otherwise the vblank wait would be skipped and we'd feed the optimal watermarks to the hardware before the plane update has actually happened. To make the slow vs. fast path determination in intel_legacy_cursor_update() a little simpler we can ignore the actual visibility of the plane (which can only get computed once we've already chosen out path) and instead we simply check whether the fb is being set or cleared by the user. This means a fully clipped but logically visible cursor will be considered visible as far as watermark programming is concerned. We can do that for the cursor since it's a fixed size plane and the clipped size doesn't play a role in the watermark computation. This should fix underruns that can occur when the cursor gets enable/disabled or the size gets changed. Hopefully it's good enough that only pure cursor movement and flips go through unthrottled. Cc: Maarten Lankhorst Cc: Daniel Vetter Cc: Uwe Kleine-König Reported-by: Uwe Kleine-König Fixes: f79f26921ee1 ("drm/i915: Add a cursor hack to allow converting legacy page flip to atomic, v3.") Signed-off-by: Ville Syrjälä Link: http://patchwork.freedesktop.org/patch/msgid/20170217150159.11683-1-ville.syrjala@linux.intel.com Reviewed-by: Maarten Lankhorst Tested-by: Rafael Ristovski --- drivers/gpu/drm/i915/intel_display.c | 29 ++++++++++++++++++----------- drivers/gpu/drm/i915/intel_pm.c | 20 ++++++++++++-------- 2 files changed, 30 insertions(+), 19 deletions(-) diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c index 9d8c08c4b105..9908bba5b3d3 100644 --- a/drivers/gpu/drm/i915/intel_display.c +++ b/drivers/gpu/drm/i915/intel_display.c @@ -12955,6 +12955,17 @@ static int intel_atomic_commit(struct drm_device *dev, struct drm_i915_private *dev_priv = to_i915(dev); int ret = 0; + /* + * The intel_legacy_cursor_update() fast path takes care + * of avoiding the vblank waits for simple cursor + * movement and flips. For cursor on/off and size changes, + * we want to perform the vblank waits so that watermark + * updates happen during the correct frames. Gen9+ have + * double buffered watermarks and so shouldn't need this. + */ + if (INTEL_GEN(dev_priv) < 9) + state->legacy_cursor_update = false; + ret = drm_atomic_helper_setup_commit(state, nonblock); if (ret) return ret; @@ -13389,8 +13400,7 @@ intel_legacy_cursor_update(struct drm_plane *plane, old_plane_state->src_h != src_h || old_plane_state->crtc_w != crtc_w || old_plane_state->crtc_h != crtc_h || - !old_plane_state->visible || - old_plane_state->fb->modifier != fb->modifier) + !old_plane_state->fb != !fb) goto slow; new_plane_state = intel_plane_duplicate_state(plane); @@ -13413,10 +13423,6 @@ intel_legacy_cursor_update(struct drm_plane *plane, if (ret) goto out_free; - /* Visibility changed, must take slowpath. */ - if (!new_plane_state->visible) - goto slow_free; - ret = mutex_lock_interruptible(&dev_priv->drm.struct_mutex); if (ret) goto out_free; @@ -13456,9 +13462,12 @@ intel_legacy_cursor_update(struct drm_plane *plane, new_plane_state->fb = old_fb; to_intel_plane_state(new_plane_state)->vma = old_vma; - intel_plane->update_plane(plane, - to_intel_crtc_state(crtc->state), - to_intel_plane_state(plane->state)); + if (plane->state->visible) + intel_plane->update_plane(plane, + to_intel_crtc_state(crtc->state), + to_intel_plane_state(plane->state)); + else + intel_plane->disable_plane(plane, crtc); intel_cleanup_plane_fb(plane, new_plane_state); @@ -13468,8 +13477,6 @@ out_free: intel_plane_destroy_state(plane, new_plane_state); return ret; -slow_free: - intel_plane_destroy_state(plane, new_plane_state); slow: return drm_atomic_helper_update_plane(plane, crtc, fb, crtc_x, crtc_y, crtc_w, crtc_h, diff --git a/drivers/gpu/drm/i915/intel_pm.c b/drivers/gpu/drm/i915/intel_pm.c index c6938350a6c4..b00c95e4a81f 100644 --- a/drivers/gpu/drm/i915/intel_pm.c +++ b/drivers/gpu/drm/i915/intel_pm.c @@ -1841,20 +1841,24 @@ static uint32_t ilk_compute_cur_wm(const struct intel_crtc_state *cstate, const struct intel_plane_state *pstate, uint32_t mem_value) { + int cpp; + /* - * We treat the cursor plane as always-on for the purposes of watermark - * calculation. Until we have two-stage watermark programming merged, - * this is necessary to avoid flickering. + * Treat cursor with fb as always visible since cursor updates + * can happen faster than the vrefresh rate, and the current + * watermark code doesn't handle that correctly. Cursor updates + * which set/clear the fb or change the cursor size are going + * to get throttled by intel_legacy_cursor_update() to work + * around this problem with the watermark code. */ - int cpp = 4; - int width = pstate->base.visible ? pstate->base.crtc_w : 64; - - if (!cstate->base.active) + if (!cstate->base.active || !pstate->base.fb) return 0; + cpp = pstate->base.fb->format->cpp[0]; + return ilk_wm_method2(cstate->pixel_rate, cstate->base.adjusted_mode.crtc_htotal, - width, cpp, mem_value); + pstate->base.crtc_w, cpp, mem_value); } /* Only for WM_LP. */ -- cgit From e60a870d7f83517e583e6094ff5646d3a8d732db Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Thu, 2 Mar 2017 11:51:30 +0000 Subject: drm/i915: Assert that fence->lock is held in an irq-safe manner Everytime we take the fence->lock (aka request->lock), we must do so with irqs disabled since it may be used from within an hardirq context. As sometimes we are taking the lock in a nested manner, assert that the caller did disable the irqs for us. Signed-off-by: Chris Wilson Cc: Tvrtko Ursulin Cc: Mika Kuoppala Cc: Joonas Lahtinen Link: http://patchwork.freedesktop.org/patch/msgid/20170302115130.28434-1-chris@chris-wilson.co.uk Reviewed-by: Joonas Lahtinen --- drivers/gpu/drm/i915/i915_gem_request.c | 5 ++++- drivers/gpu/drm/i915/intel_breadcrumbs.c | 2 ++ 2 files changed, 6 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/i915/i915_gem_request.c b/drivers/gpu/drm/i915/i915_gem_request.c index 77c3ee7a3fd0..582471c00fce 100644 --- a/drivers/gpu/drm/i915/i915_gem_request.c +++ b/drivers/gpu/drm/i915/i915_gem_request.c @@ -400,12 +400,14 @@ void __i915_gem_request_submit(struct drm_i915_gem_request *request) struct intel_timeline *timeline; u32 seqno; + GEM_BUG_ON(!irqs_disabled()); + assert_spin_locked(&engine->timeline->lock); + trace_i915_gem_request_execute(request); /* Transfer from per-context onto the global per-engine timeline */ timeline = engine->timeline; GEM_BUG_ON(timeline == request->timeline); - assert_spin_locked(&timeline->lock); seqno = timeline_get_seqno(timeline); GEM_BUG_ON(!seqno); @@ -446,6 +448,7 @@ void __i915_gem_request_unsubmit(struct drm_i915_gem_request *request) struct intel_engine_cs *engine = request->engine; struct intel_timeline *timeline; + GEM_BUG_ON(!irqs_disabled()); assert_spin_locked(&engine->timeline->lock); /* Only unwind in reverse order, required so that the per-context list diff --git a/drivers/gpu/drm/i915/intel_breadcrumbs.c b/drivers/gpu/drm/i915/intel_breadcrumbs.c index 5752f23fd289..ba54dd1949e8 100644 --- a/drivers/gpu/drm/i915/intel_breadcrumbs.c +++ b/drivers/gpu/drm/i915/intel_breadcrumbs.c @@ -653,6 +653,7 @@ void intel_engine_enable_signaling(struct drm_i915_gem_request *request) */ /* locked by dma_fence_enable_sw_signaling() (irqsafe fence->lock) */ + GEM_BUG_ON(!irqs_disabled()); assert_spin_locked(&request->lock); seqno = i915_gem_request_global_seqno(request); @@ -709,6 +710,7 @@ void intel_engine_cancel_signaling(struct drm_i915_gem_request *request) struct intel_engine_cs *engine = request->engine; struct intel_breadcrumbs *b = &engine->breadcrumbs; + GEM_BUG_ON(!irqs_disabled()); assert_spin_locked(&request->lock); GEM_BUG_ON(!request->signaling.wait.seqno); -- cgit From 675204153e82c1048886da419b411bb95e83c797 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Thu, 2 Mar 2017 13:28:01 +0000 Subject: drm/i915: s/assert_spin_locked/lockdep_assert_held/ MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit assert_spin_locked() becomes an unconditionally compiled BUG_ON(), adding debug code right into the heart of critical routines like interrupt handlers. text data bss dec hex 1296480 19944 2272 1318696 141f28 before (lockdep disabled) 1295984 19944 2272 1318200 141d38 after 1336261 21139 3208 1360608 14c2e0 before (lockdep enabled) 1339920 21139 3208 1364267 14d12b after Small saving for release; hopefully more instructive in debug. Signed-off-by: Chris Wilson Cc: Tvrtko Ursulin Cc: Joonas Lahtinen Cc: Mika Kuoppala Cc: Ville Syrjälä Link: http://patchwork.freedesktop.org/patch/msgid/20170302132801.599-1-chris@chris-wilson.co.uk Reviewed-by: Joonas Lahtinen --- drivers/gpu/drm/i915/i915_gem_request.c | 4 ++-- drivers/gpu/drm/i915/i915_irq.c | 30 +++++++++++++++--------------- drivers/gpu/drm/i915/i915_perf.c | 2 +- drivers/gpu/drm/i915/intel_breadcrumbs.c | 12 ++++++------ drivers/gpu/drm/i915/intel_fifo_underrun.c | 14 +++++++------- drivers/gpu/drm/i915/intel_hotplug.c | 2 +- drivers/gpu/drm/i915/intel_pipe_crc.c | 2 +- drivers/gpu/drm/i915/intel_pm.c | 8 ++++---- drivers/gpu/drm/i915/intel_uncore.c | 4 ++-- 9 files changed, 39 insertions(+), 39 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_gem_request.c b/drivers/gpu/drm/i915/i915_gem_request.c index 582471c00fce..b881d4e1bd1e 100644 --- a/drivers/gpu/drm/i915/i915_gem_request.c +++ b/drivers/gpu/drm/i915/i915_gem_request.c @@ -401,7 +401,7 @@ void __i915_gem_request_submit(struct drm_i915_gem_request *request) u32 seqno; GEM_BUG_ON(!irqs_disabled()); - assert_spin_locked(&engine->timeline->lock); + lockdep_assert_held(&engine->timeline->lock); trace_i915_gem_request_execute(request); @@ -449,7 +449,7 @@ void __i915_gem_request_unsubmit(struct drm_i915_gem_request *request) struct intel_timeline *timeline; GEM_BUG_ON(!irqs_disabled()); - assert_spin_locked(&engine->timeline->lock); + lockdep_assert_held(&engine->timeline->lock); /* Only unwind in reverse order, required so that the per-context list * is kept in seqno/ring order. diff --git a/drivers/gpu/drm/i915/i915_irq.c b/drivers/gpu/drm/i915/i915_irq.c index 19892854707f..e0aa686f9c68 100644 --- a/drivers/gpu/drm/i915/i915_irq.c +++ b/drivers/gpu/drm/i915/i915_irq.c @@ -180,7 +180,7 @@ i915_hotplug_interrupt_update_locked(struct drm_i915_private *dev_priv, { uint32_t val; - assert_spin_locked(&dev_priv->irq_lock); + lockdep_assert_held(&dev_priv->irq_lock); WARN_ON(bits & ~mask); val = I915_READ(PORT_HOTPLUG_EN); @@ -222,7 +222,7 @@ void ilk_update_display_irq(struct drm_i915_private *dev_priv, { uint32_t new_val; - assert_spin_locked(&dev_priv->irq_lock); + lockdep_assert_held(&dev_priv->irq_lock); WARN_ON(enabled_irq_mask & ~interrupt_mask); @@ -250,7 +250,7 @@ static void ilk_update_gt_irq(struct drm_i915_private *dev_priv, uint32_t interrupt_mask, uint32_t enabled_irq_mask) { - assert_spin_locked(&dev_priv->irq_lock); + lockdep_assert_held(&dev_priv->irq_lock); WARN_ON(enabled_irq_mask & ~interrupt_mask); @@ -302,7 +302,7 @@ static void snb_update_pm_irq(struct drm_i915_private *dev_priv, WARN_ON(enabled_irq_mask & ~interrupt_mask); - assert_spin_locked(&dev_priv->irq_lock); + lockdep_assert_held(&dev_priv->irq_lock); new_val = dev_priv->pm_imr; new_val &= ~interrupt_mask; @@ -340,7 +340,7 @@ void gen6_reset_pm_iir(struct drm_i915_private *dev_priv, u32 reset_mask) { i915_reg_t reg = gen6_pm_iir(dev_priv); - assert_spin_locked(&dev_priv->irq_lock); + lockdep_assert_held(&dev_priv->irq_lock); I915_WRITE(reg, reset_mask); I915_WRITE(reg, reset_mask); @@ -349,7 +349,7 @@ void gen6_reset_pm_iir(struct drm_i915_private *dev_priv, u32 reset_mask) void gen6_enable_pm_irq(struct drm_i915_private *dev_priv, u32 enable_mask) { - assert_spin_locked(&dev_priv->irq_lock); + lockdep_assert_held(&dev_priv->irq_lock); dev_priv->pm_ier |= enable_mask; I915_WRITE(gen6_pm_ier(dev_priv), dev_priv->pm_ier); @@ -359,7 +359,7 @@ void gen6_enable_pm_irq(struct drm_i915_private *dev_priv, u32 enable_mask) void gen6_disable_pm_irq(struct drm_i915_private *dev_priv, u32 disable_mask) { - assert_spin_locked(&dev_priv->irq_lock); + lockdep_assert_held(&dev_priv->irq_lock); dev_priv->pm_ier &= ~disable_mask; __gen6_mask_pm_irq(dev_priv, disable_mask); @@ -463,7 +463,7 @@ static void bdw_update_port_irq(struct drm_i915_private *dev_priv, uint32_t new_val; uint32_t old_val; - assert_spin_locked(&dev_priv->irq_lock); + lockdep_assert_held(&dev_priv->irq_lock); WARN_ON(enabled_irq_mask & ~interrupt_mask); @@ -496,7 +496,7 @@ void bdw_update_pipe_irq(struct drm_i915_private *dev_priv, { uint32_t new_val; - assert_spin_locked(&dev_priv->irq_lock); + lockdep_assert_held(&dev_priv->irq_lock); WARN_ON(enabled_irq_mask & ~interrupt_mask); @@ -530,7 +530,7 @@ void ibx_display_interrupt_update(struct drm_i915_private *dev_priv, WARN_ON(enabled_irq_mask & ~interrupt_mask); - assert_spin_locked(&dev_priv->irq_lock); + lockdep_assert_held(&dev_priv->irq_lock); if (WARN_ON(!intel_irqs_enabled(dev_priv))) return; @@ -546,7 +546,7 @@ __i915_enable_pipestat(struct drm_i915_private *dev_priv, enum pipe pipe, i915_reg_t reg = PIPESTAT(pipe); u32 pipestat = I915_READ(reg) & PIPESTAT_INT_ENABLE_MASK; - assert_spin_locked(&dev_priv->irq_lock); + lockdep_assert_held(&dev_priv->irq_lock); WARN_ON(!intel_irqs_enabled(dev_priv)); if (WARN_ONCE(enable_mask & ~PIPESTAT_INT_ENABLE_MASK || @@ -573,7 +573,7 @@ __i915_disable_pipestat(struct drm_i915_private *dev_priv, enum pipe pipe, i915_reg_t reg = PIPESTAT(pipe); u32 pipestat = I915_READ(reg) & PIPESTAT_INT_ENABLE_MASK; - assert_spin_locked(&dev_priv->irq_lock); + lockdep_assert_held(&dev_priv->irq_lock); WARN_ON(!intel_irqs_enabled(dev_priv)); if (WARN_ONCE(enable_mask & ~PIPESTAT_INT_ENABLE_MASK || @@ -3399,7 +3399,7 @@ static int ironlake_irq_postinstall(struct drm_device *dev) void valleyview_enable_display_irqs(struct drm_i915_private *dev_priv) { - assert_spin_locked(&dev_priv->irq_lock); + lockdep_assert_held(&dev_priv->irq_lock); if (dev_priv->display_irqs_enabled) return; @@ -3414,7 +3414,7 @@ void valleyview_enable_display_irqs(struct drm_i915_private *dev_priv) void valleyview_disable_display_irqs(struct drm_i915_private *dev_priv) { - assert_spin_locked(&dev_priv->irq_lock); + lockdep_assert_held(&dev_priv->irq_lock); if (!dev_priv->display_irqs_enabled) return; @@ -4090,7 +4090,7 @@ static void i915_hpd_irq_setup(struct drm_i915_private *dev_priv) { u32 hotplug_en; - assert_spin_locked(&dev_priv->irq_lock); + lockdep_assert_held(&dev_priv->irq_lock); /* Note HDMI and DP share hotplug bits */ /* enable bits are the same for all generations */ diff --git a/drivers/gpu/drm/i915/i915_perf.c b/drivers/gpu/drm/i915/i915_perf.c index a1b7eec58be2..8c121187ff39 100644 --- a/drivers/gpu/drm/i915/i915_perf.c +++ b/drivers/gpu/drm/i915/i915_perf.c @@ -1008,7 +1008,7 @@ static void hsw_disable_metric_set(struct drm_i915_private *dev_priv) static void gen7_update_oacontrol_locked(struct drm_i915_private *dev_priv) { - assert_spin_locked(&dev_priv->perf.hook_lock); + lockdep_assert_held(&dev_priv->perf.hook_lock); if (dev_priv->perf.oa.exclusive_stream->enabled) { struct i915_gem_context *ctx = diff --git a/drivers/gpu/drm/i915/intel_breadcrumbs.c b/drivers/gpu/drm/i915/intel_breadcrumbs.c index ba54dd1949e8..235d4645a5cf 100644 --- a/drivers/gpu/drm/i915/intel_breadcrumbs.c +++ b/drivers/gpu/drm/i915/intel_breadcrumbs.c @@ -164,7 +164,7 @@ void __intel_engine_disarm_breadcrumbs(struct intel_engine_cs *engine) { struct intel_breadcrumbs *b = &engine->breadcrumbs; - assert_spin_locked(&b->lock); + lockdep_assert_held(&b->lock); if (b->irq_enabled) { irq_disable(engine); @@ -228,7 +228,7 @@ static void __intel_breadcrumbs_enable_irq(struct intel_breadcrumbs *b) container_of(b, struct intel_engine_cs, breadcrumbs); struct drm_i915_private *i915 = engine->i915; - assert_spin_locked(&b->lock); + lockdep_assert_held(&b->lock); if (b->irq_armed) return; @@ -276,7 +276,7 @@ static inline struct intel_wait *to_wait(struct rb_node *node) static inline void __intel_breadcrumbs_finish(struct intel_breadcrumbs *b, struct intel_wait *wait) { - assert_spin_locked(&b->lock); + lockdep_assert_held(&b->lock); /* This request is completed, so remove it from the tree, mark it as * complete, and *then* wake up the associated task. @@ -432,7 +432,7 @@ static void __intel_engine_remove_wait(struct intel_engine_cs *engine, { struct intel_breadcrumbs *b = &engine->breadcrumbs; - assert_spin_locked(&b->lock); + lockdep_assert_held(&b->lock); if (RB_EMPTY_NODE(&wait->node)) goto out; @@ -654,7 +654,7 @@ void intel_engine_enable_signaling(struct drm_i915_gem_request *request) /* locked by dma_fence_enable_sw_signaling() (irqsafe fence->lock) */ GEM_BUG_ON(!irqs_disabled()); - assert_spin_locked(&request->lock); + lockdep_assert_held(&request->lock); seqno = i915_gem_request_global_seqno(request); if (!seqno) @@ -711,7 +711,7 @@ void intel_engine_cancel_signaling(struct drm_i915_gem_request *request) struct intel_breadcrumbs *b = &engine->breadcrumbs; GEM_BUG_ON(!irqs_disabled()); - assert_spin_locked(&request->lock); + lockdep_assert_held(&request->lock); GEM_BUG_ON(!request->signaling.wait.seqno); spin_lock(&b->lock); diff --git a/drivers/gpu/drm/i915/intel_fifo_underrun.c b/drivers/gpu/drm/i915/intel_fifo_underrun.c index e660d8b4bbc3..f5ddacc90fde 100644 --- a/drivers/gpu/drm/i915/intel_fifo_underrun.c +++ b/drivers/gpu/drm/i915/intel_fifo_underrun.c @@ -54,7 +54,7 @@ static bool ivb_can_enable_err_int(struct drm_device *dev) struct intel_crtc *crtc; enum pipe pipe; - assert_spin_locked(&dev_priv->irq_lock); + lockdep_assert_held(&dev_priv->irq_lock); for_each_pipe(dev_priv, pipe) { crtc = intel_get_crtc_for_pipe(dev_priv, pipe); @@ -72,7 +72,7 @@ static bool cpt_can_enable_serr_int(struct drm_device *dev) enum pipe pipe; struct intel_crtc *crtc; - assert_spin_locked(&dev_priv->irq_lock); + lockdep_assert_held(&dev_priv->irq_lock); for_each_pipe(dev_priv, pipe) { crtc = intel_get_crtc_for_pipe(dev_priv, pipe); @@ -90,7 +90,7 @@ static void i9xx_check_fifo_underruns(struct intel_crtc *crtc) i915_reg_t reg = PIPESTAT(crtc->pipe); u32 pipestat = I915_READ(reg) & 0xffff0000; - assert_spin_locked(&dev_priv->irq_lock); + lockdep_assert_held(&dev_priv->irq_lock); if ((pipestat & PIPE_FIFO_UNDERRUN_STATUS) == 0) return; @@ -109,7 +109,7 @@ static void i9xx_set_fifo_underrun_reporting(struct drm_device *dev, i915_reg_t reg = PIPESTAT(pipe); u32 pipestat = I915_READ(reg) & 0xffff0000; - assert_spin_locked(&dev_priv->irq_lock); + lockdep_assert_held(&dev_priv->irq_lock); if (enable) { I915_WRITE(reg, pipestat | PIPE_FIFO_UNDERRUN_STATUS); @@ -139,7 +139,7 @@ static void ivybridge_check_fifo_underruns(struct intel_crtc *crtc) enum pipe pipe = crtc->pipe; uint32_t err_int = I915_READ(GEN7_ERR_INT); - assert_spin_locked(&dev_priv->irq_lock); + lockdep_assert_held(&dev_priv->irq_lock); if ((err_int & ERR_INT_FIFO_UNDERRUN(pipe)) == 0) return; @@ -204,7 +204,7 @@ static void cpt_check_pch_fifo_underruns(struct intel_crtc *crtc) enum transcoder pch_transcoder = (enum transcoder) crtc->pipe; uint32_t serr_int = I915_READ(SERR_INT); - assert_spin_locked(&dev_priv->irq_lock); + lockdep_assert_held(&dev_priv->irq_lock); if ((serr_int & SERR_INT_TRANS_FIFO_UNDERRUN(pch_transcoder)) == 0) return; @@ -248,7 +248,7 @@ static bool __intel_set_cpu_fifo_underrun_reporting(struct drm_device *dev, struct intel_crtc *crtc = intel_get_crtc_for_pipe(dev_priv, pipe); bool old; - assert_spin_locked(&dev_priv->irq_lock); + lockdep_assert_held(&dev_priv->irq_lock); old = !crtc->cpu_fifo_underrun_disabled; crtc->cpu_fifo_underrun_disabled = !enable; diff --git a/drivers/gpu/drm/i915/intel_hotplug.c b/drivers/gpu/drm/i915/intel_hotplug.c index 0756bdc672b3..ba763e7d7dcf 100644 --- a/drivers/gpu/drm/i915/intel_hotplug.c +++ b/drivers/gpu/drm/i915/intel_hotplug.c @@ -157,7 +157,7 @@ static void intel_hpd_irq_storm_disable(struct drm_i915_private *dev_priv) enum hpd_pin pin; bool hpd_disabled = false; - assert_spin_locked(&dev_priv->irq_lock); + lockdep_assert_held(&dev_priv->irq_lock); list_for_each_entry(connector, &mode_config->connector_list, head) { if (connector->polled != DRM_CONNECTOR_POLL_HPD) diff --git a/drivers/gpu/drm/i915/intel_pipe_crc.c b/drivers/gpu/drm/i915/intel_pipe_crc.c index c0b1f99da37b..c782b7878288 100644 --- a/drivers/gpu/drm/i915/intel_pipe_crc.c +++ b/drivers/gpu/drm/i915/intel_pipe_crc.c @@ -105,7 +105,7 @@ static int i915_pipe_crc_release(struct inode *inode, struct file *filep) static int pipe_crc_data_count(struct intel_pipe_crc *pipe_crc) { - assert_spin_locked(&pipe_crc->lock); + lockdep_assert_held(&pipe_crc->lock); return CIRC_CNT(pipe_crc->head, pipe_crc->tail, INTEL_PIPE_CRC_ENTRIES_NR); } diff --git a/drivers/gpu/drm/i915/intel_pm.c b/drivers/gpu/drm/i915/intel_pm.c index b00c95e4a81f..50e672f84276 100644 --- a/drivers/gpu/drm/i915/intel_pm.c +++ b/drivers/gpu/drm/i915/intel_pm.c @@ -4672,7 +4672,7 @@ bool ironlake_set_drps(struct drm_i915_private *dev_priv, u8 val) { u16 rgvswctl; - assert_spin_locked(&mchdev_lock); + lockdep_assert_held(&mchdev_lock); rgvswctl = I915_READ16(MEMSWCTL); if (rgvswctl & MEMCTL_CMD_STS) { @@ -6203,7 +6203,7 @@ static unsigned long __i915_chipset_val(struct drm_i915_private *dev_priv) unsigned long now = jiffies_to_msecs(jiffies), diff1; int i; - assert_spin_locked(&mchdev_lock); + lockdep_assert_held(&mchdev_lock); diff1 = now - dev_priv->ips.last_time1; @@ -6308,7 +6308,7 @@ static void __i915_update_gfx_val(struct drm_i915_private *dev_priv) u64 now, diff, diffms; u32 count; - assert_spin_locked(&mchdev_lock); + lockdep_assert_held(&mchdev_lock); now = ktime_get_raw_ns(); diffms = now - dev_priv->ips.last_time2; @@ -6353,7 +6353,7 @@ static unsigned long __i915_gfx_val(struct drm_i915_private *dev_priv) unsigned long t, corr, state1, corr2, state2; u32 pxvid, ext_v; - assert_spin_locked(&mchdev_lock); + lockdep_assert_held(&mchdev_lock); pxvid = I915_READ(PXVFREQ(dev_priv->rps.cur_freq)); pxvid = (pxvid >> 24) & 0x7f; diff --git a/drivers/gpu/drm/i915/intel_uncore.c b/drivers/gpu/drm/i915/intel_uncore.c index 441c51fd9746..b35b7a03eeaf 100644 --- a/drivers/gpu/drm/i915/intel_uncore.c +++ b/drivers/gpu/drm/i915/intel_uncore.c @@ -506,7 +506,7 @@ void intel_uncore_forcewake_get(struct drm_i915_private *dev_priv, void intel_uncore_forcewake_get__locked(struct drm_i915_private *dev_priv, enum forcewake_domains fw_domains) { - assert_spin_locked(&dev_priv->uncore.lock); + lockdep_assert_held(&dev_priv->uncore.lock); if (!dev_priv->uncore.funcs.force_wake_get) return; @@ -564,7 +564,7 @@ void intel_uncore_forcewake_put(struct drm_i915_private *dev_priv, void intel_uncore_forcewake_put__locked(struct drm_i915_private *dev_priv, enum forcewake_domains fw_domains) { - assert_spin_locked(&dev_priv->uncore.lock); + lockdep_assert_held(&dev_priv->uncore.lock); if (!dev_priv->uncore.funcs.force_wake_put) return; -- cgit From 25afdf89ad8c8751be25100b55ee7f09fddaa52e Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Thu, 2 Mar 2017 14:53:23 +0000 Subject: drm/i915/guc: Disable irq for __i915_guc_submit wq_lock __i915_guc_submit may be, despite my assertion, called from outside of an irq-safe spinlock so we need to use a full spin_lock_irqsave and not cheat using a spin_lock. (The initial notify callback from the completed fence is called before the spinlock is taken to wake up all waiters and call their callbacks.) [ 48.166581] kernel BUG at drivers/gpu/drm/i915/i915_guc_submission.c:527! [ 48.166617] invalid opcode: 0000 [#1] PREEMPT SMP [ 48.166644] Modules linked in: i915 prime_numbers x86_pkg_temp_thermal intel_powerclamp coretemp crct10dif_pclmul crc32_pclmul ghash_clmulni_intel mei_me mei i2c_i801 netconsole i2c_hid [last unloaded: i915] [ 48.166733] CPU: 2 PID: 5 Comm: kworker/u8:0 Tainted: G U 4.10.0nightly-170302-guc_scrub+ #19 [ 48.166778] Hardware name: /NUC6i5SYB, BIOS SYSKLi35.86A.0054.2016.0930.1102 09/30/2016 [ 48.166835] Workqueue: i915 __intel_autoenable_gt_powersave [i915] [ 48.166865] task: ffff88084ab7cf40 task.stack: ffffc90000064000 [ 48.166921] RIP: 0010:__i915_guc_submit+0x1e6/0x2a0 [i915] [ 48.166953] RSP: 0018:ffffc90000067c80 EFLAGS: 00010202 [ 48.166979] RAX: 0000000000000202 RBX: ffff8808465e0c68 RCX: 0000000000000201 [ 48.167016] RDX: 0000000080000201 RSI: ffff88084ab7d798 RDI: ffff88082b8a8040 [ 48.167054] RBP: ffffc90000067cd8 R08: 0000000000000001 R09: 0000000000000000 [ 48.167085] R10: 0000000000000000 R11: 0000000000000000 R12: ffff88082b8a8148 [ 48.167126] R13: 0000000000000000 R14: ffff88082f440000 R15: ffff88082e85e660 [ 48.167156] FS: 0000000000000000(0000) GS:ffff88086ed00000(0000) knlGS:0000000000000000 [ 48.167195] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [ 48.167226] CR2: 000055862ffcdc2c CR3: 0000000001e0f000 CR4: 00000000003406e0 [ 48.167257] Call Trace: [ 48.168112] ? trace_hardirqs_on+0xd/0x10 [ 48.168966] ? _raw_spin_unlock_irqrestore+0x4a/0x80 [ 48.169831] i915_guc_submit+0x1a/0x20 [i915] [ 48.170680] submit_notify+0x89/0xc0 [i915] [ 48.171512] __i915_sw_fence_complete+0x175/0x220 [i915] [ 48.172340] i915_sw_fence_complete+0x2a/0x50 [i915] [ 48.173158] i915_sw_fence_commit+0x21/0x30 [i915] [ 48.173968] __i915_add_request+0x238/0x530 [i915] [ 48.174764] __intel_autoenable_gt_powersave+0x8b/0xb0 [i915] [ 48.175549] process_one_work+0x218/0x690 [ 48.176318] ? process_one_work+0x197/0x690 [ 48.177183] worker_thread+0x4e/0x4a0 [ 48.178039] kthread+0x10c/0x140 [ 48.178878] ? process_one_work+0x690/0x690 [ 48.179718] ? kthread_create_on_node+0x40/0x40 [ 48.180568] ret_from_fork+0x31/0x40 [ 48.181423] Code: 02 00 00 43 89 84 ae 50 11 00 00 e8 75 01 62 e1 48 83 c4 30 5b 41 5c 41 5d 41 5e 41 5f 5d c3 48 c1 e0 20 48 09 c2 49 89 d0 eb 82 <0f> 0b 0f 0b 0f 0b 0f 0b 0f 0b 0f 0b 49 c1 e8 20 44 89 43 34 4a [ 48.183336] RIP: __i915_guc_submit+0x1e6/0x2a0 [i915] RSP: ffffc90000067c80 Reported-by: Arkadiusz Hiler Fixes: 349ab9192cc3 ("drm/i915/guc: Make wq_lock irq-safe") Fixes: 67b807a89230 ("drm/i915: Delay disabling the user interrupt for breadcrumbs") Signed-off-by: Chris Wilson Cc: Tvrtko Ursulin Cc: Arkadiusz Hiler Link: http://patchwork.freedesktop.org/patch/msgid/20170302145323.12886-1-chris@chris-wilson.co.uk Reviewed-by: Tvrtko Ursulin Reviewed-by: Arkadiusz Hiler Tested-by: Arkadiusz Hiler --- drivers/gpu/drm/i915/i915_guc_submission.c | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_guc_submission.c b/drivers/gpu/drm/i915/i915_guc_submission.c index 7b535a32fc27..beb38e30d0e9 100644 --- a/drivers/gpu/drm/i915/i915_guc_submission.c +++ b/drivers/gpu/drm/i915/i915_guc_submission.c @@ -515,6 +515,7 @@ static void __i915_guc_submit(struct drm_i915_gem_request *rq) unsigned int engine_id = engine->id; struct intel_guc *guc = &rq->i915->guc; struct i915_guc_client *client = guc->execbuf_client; + unsigned long flags; int b_ret; /* WA to flush out the pending GMADR writes to ring buffer. */ @@ -523,10 +524,7 @@ static void __i915_guc_submit(struct drm_i915_gem_request *rq) trace_i915_gem_request_in(rq, 0); - /* We are always called with irqs disabled */ - GEM_BUG_ON(!irqs_disabled()); - - spin_lock(&client->wq_lock); + spin_lock_irqsave(&client->wq_lock, flags); guc_wq_item_append(client, rq); b_ret = guc_ring_doorbell(client); @@ -539,7 +537,7 @@ static void __i915_guc_submit(struct drm_i915_gem_request *rq) guc->submissions[engine_id] += 1; guc->last_seqno[engine_id] = rq->global_seqno; - spin_unlock(&client->wq_lock); + spin_unlock_irqrestore(&client->wq_lock, flags); } static void i915_guc_submit(struct drm_i915_gem_request *rq) -- cgit From f73b567462e0f086bbff5ae22040ff1bc3d87487 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Thu, 2 Mar 2017 15:03:56 +0000 Subject: drm/i915: Include GT/seqno activity in engine/hangcheck debugfs Whilst investigating some mysterious failures with hangcheck not running during gem_busy/basic-hang-default, the question is why did we decide to cancel the retire_work (which queues the hangcheck)? That decision is based around GT activity, so include that information in the debug report. v2: Include the GT awake status in the error state Signed-off-by: Chris Wilson Cc: Mika Kuoppala Link: http://patchwork.freedesktop.org/patch/msgid/20170302150356.9713-1-chris@chris-wilson.co.uk Reviewed-by: Mika Kuoppala --- drivers/gpu/drm/i915/i915_debugfs.c | 17 +++++++++++++---- drivers/gpu/drm/i915/i915_drv.h | 1 + drivers/gpu/drm/i915/i915_gpu_error.c | 3 +++ 3 files changed, 17 insertions(+), 4 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c index 1a28b5279bec..b84ee7a323d2 100644 --- a/drivers/gpu/drm/i915/i915_debugfs.c +++ b/drivers/gpu/drm/i915/i915_debugfs.c @@ -1361,14 +1361,17 @@ static int i915_hangcheck_info(struct seq_file *m, void *unused) } else seq_printf(m, "Hangcheck inactive\n"); + seq_printf(m, "GT active? %s\n", yesno(dev_priv->gt.awake)); + for_each_engine(engine, dev_priv, id) { struct intel_breadcrumbs *b = &engine->breadcrumbs; struct rb_node *rb; seq_printf(m, "%s:\n", engine->name); - seq_printf(m, "\tseqno = %x [current %x, last %x]\n", + seq_printf(m, "\tseqno = %x [current %x, last %x], inflight %d\n", engine->hangcheck.seqno, seqno[id], - intel_engine_last_submit(engine)); + intel_engine_last_submit(engine), + engine->timeline->inflight_seqnos); seq_printf(m, "\twaiters? %s, fake irq active? %s, stalled? %s\n", yesno(intel_engine_has_waiter(engine)), yesno(test_bit(engine->id, @@ -3253,6 +3256,11 @@ static int i915_engine_info(struct seq_file *m, void *unused) intel_runtime_pm_get(dev_priv); + seq_printf(m, "GT awake? %s\n", + yesno(dev_priv->gt.awake)); + seq_printf(m, "Global active requests: %d\n", + dev_priv->gt.active_requests); + for_each_engine(engine, dev_priv, id) { struct intel_breadcrumbs *b = &engine->breadcrumbs; struct drm_i915_gem_request *rq; @@ -3260,11 +3268,12 @@ static int i915_engine_info(struct seq_file *m, void *unused) u64 addr; seq_printf(m, "%s\n", engine->name); - seq_printf(m, "\tcurrent seqno %x, last %x, hangcheck %x [%d ms]\n", + seq_printf(m, "\tcurrent seqno %x, last %x, hangcheck %x [%d ms], inflight %d\n", intel_engine_get_seqno(engine), intel_engine_last_submit(engine), engine->hangcheck.seqno, - jiffies_to_msecs(jiffies - engine->hangcheck.action_timestamp)); + jiffies_to_msecs(jiffies - engine->hangcheck.action_timestamp), + engine->timeline->inflight_seqnos); rcu_read_lock(); diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index 48b34a937792..d9f29d19285f 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -934,6 +934,7 @@ struct i915_gpu_state { char error_msg[128]; bool simulated; + bool awake; int iommu; u32 reset_count; u32 suspend_count; diff --git a/drivers/gpu/drm/i915/i915_gpu_error.c b/drivers/gpu/drm/i915/i915_gpu_error.c index 2b1d15668192..95797700384a 100644 --- a/drivers/gpu/drm/i915/i915_gpu_error.c +++ b/drivers/gpu/drm/i915/i915_gpu_error.c @@ -632,6 +632,7 @@ int i915_error_state_to_str(struct drm_i915_error_state_buf *m, CSR_VERSION_MINOR(csr->version)); } + err_printf(m, "GT awake: %s\n", yesno(error->awake)); err_printf(m, "EIR: 0x%08x\n", error->eir); err_printf(m, "IER: 0x%08x\n", error->ier); for (i = 0; i < error->ngtier; i++) @@ -1615,6 +1616,8 @@ static void i915_error_capture_msg(struct drm_i915_private *dev_priv, static void i915_capture_gen_state(struct drm_i915_private *dev_priv, struct i915_gpu_state *error) { + error->awake = dev_priv->gt.awake; + error->iommu = -1; #ifdef CONFIG_INTEL_IOMMU error->iommu = intel_iommu_gfx_mapped; -- cgit From e5aac87eaed761f39e747e8711faadb7306df488 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Thu, 2 Mar 2017 15:15:44 +0000 Subject: drm/i915: Include power-management state in gpu error dump Useful for double checking that the device is powered up when it hung, include both the status of the power management and our rpm wakelock. Signed-off-by: Chris Wilson Cc: Mika Kuoppala Link: http://patchwork.freedesktop.org/patch/msgid/20170302151544.16915-1-chris@chris-wilson.co.uk Reviewed-by: Mika Kuoppala --- drivers/gpu/drm/i915/i915_drv.h | 2 ++ drivers/gpu/drm/i915/i915_gpu_error.c | 4 ++++ 2 files changed, 6 insertions(+) diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index d9f29d19285f..430c40dd1622 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -935,6 +935,8 @@ struct i915_gpu_state { char error_msg[128]; bool simulated; bool awake; + bool wakelock; + bool suspended; int iommu; u32 reset_count; u32 suspend_count; diff --git a/drivers/gpu/drm/i915/i915_gpu_error.c b/drivers/gpu/drm/i915/i915_gpu_error.c index 95797700384a..061af8040498 100644 --- a/drivers/gpu/drm/i915/i915_gpu_error.c +++ b/drivers/gpu/drm/i915/i915_gpu_error.c @@ -633,6 +633,8 @@ int i915_error_state_to_str(struct drm_i915_error_state_buf *m, } err_printf(m, "GT awake: %s\n", yesno(error->awake)); + err_printf(m, "RPM wakelock: %s\n", yesno(error->wakelock)); + err_printf(m, "PM suspended: %s\n", yesno(error->suspended)); err_printf(m, "EIR: 0x%08x\n", error->eir); err_printf(m, "IER: 0x%08x\n", error->ier); for (i = 0; i < error->ngtier; i++) @@ -1617,6 +1619,8 @@ static void i915_capture_gen_state(struct drm_i915_private *dev_priv, struct i915_gpu_state *error) { error->awake = dev_priv->gt.awake; + error->wakelock = atomic_read(&dev_priv->pm.wakeref_count); + error->suspended = dev_priv->pm.suspended; error->iommu = -1; #ifdef CONFIG_INTEL_IOMMU -- cgit From 5be6e33400992d3450e1c8234a5af353e1560580 Mon Sep 17 00:00:00 2001 From: Ville Syrjälä Date: Mon, 20 Feb 2017 16:04:43 +0200 Subject: drm/i915: Do .init_clock_gating() earlier to avoid it clobbering watermarks MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Currently ILK-BDW explicitly disable LP1+ watermarks from their .init_clock_gating() hooks. Unfortunately that hook gets called way too late since by that time we've already initialized all the watermark state tracking which then gets out of sync with the hardware state. We may eventually want to consider killing off the explicit LP1+ disable from .init_clock_gating(). In the meantime however, we can avoid the problem by reordering the init sequence such that intel_modeset_init_hw()->intel_init_clock_gating() gets called prior to the hardware state takeover. I suppose prior to the two stage watermark programming we were magically saved by something that forced the watermarks to be reprogrammed fully after .init_clock_gating() got called. But now that no longer happens. Note that the diff might look a bit odd as it kills off one call of intel_update_cdclk(), but that's fine because intel_modeset_init_hw() does the exact same thing. Previously we just did it twice. Actually even this new init sequence is pretty bogus as .init_clock_gating() really should be called before any gem hardware init since it can configure various clock gating workarounds and whatnot that affect the GT side as well. Also intel_modeset_init() really should get split up into better defined init stages. Another "fun" detail is that intel_modeset_gem_init() is where RPS/RC6 gets configured. Why that is done from the display code is beyond me. I've decided to leave all this be for now, and just try to fix the init sequence enough for watermarks to work. Cc: stable@vger.kernel.org Cc: Gabriele Mazzotta Cc: David Purton Cc: Matt Roper Cc: Maarten Lankhorst Reported-by: Gabriele Mazzotta Reported-by: David Purton Tested-by: Gabriele Mazzotta Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=96645 Fixes: ed4a6a7ca853 ("drm/i915: Add two-stage ILK-style watermark programming (v11)") Signed-off-by: Ville Syrjälä Link: http://patchwork.freedesktop.org/patch/msgid/20170220140443.30891-1-ville.syrjala@linux.intel.com Reviewed-by: Daniel Vetter --- drivers/gpu/drm/i915/intel_display.c | 9 +++------ 1 file changed, 3 insertions(+), 6 deletions(-) diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c index 9908bba5b3d3..5edf1ff8b688 100644 --- a/drivers/gpu/drm/i915/intel_display.c +++ b/drivers/gpu/drm/i915/intel_display.c @@ -14970,12 +14970,11 @@ int intel_modeset_init(struct drm_device *dev) } } - intel_update_czclk(dev_priv); - intel_update_cdclk(dev_priv); - dev_priv->cdclk.logical = dev_priv->cdclk.actual = dev_priv->cdclk.hw; - intel_shared_dpll_init(dev); + intel_update_czclk(dev_priv); + intel_modeset_init_hw(dev); + if (dev_priv->max_cdclk_freq == 0) intel_update_max_cdclk(dev_priv); @@ -15555,8 +15554,6 @@ void intel_modeset_gem_init(struct drm_device *dev) intel_init_gt_powersave(dev_priv); - intel_modeset_init_hw(dev); - intel_setup_overlay(dev_priv); } -- cgit From c8659efac544b7df1106022d78617a1ca1d9a38c Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Thu, 2 Mar 2017 12:25:25 +0000 Subject: drm/i915: Drop spinlocks around adding to the client request list Adding to the tail of the client request list as the only other user is in the throttle ioctl that iterates forwards over the list. It only needs protection against deletion of a request as it reads it, it simply won't see a new request added to the end of the list, or it would be too early and rejected. We can further reduce the number of spinlocks required when throttling by removing stale requests from the client_list as we throttle. Signed-off-by: Chris Wilson Reviewed-by: Mika Kuoppala Link: http://patchwork.freedesktop.org/patch/msgid/20170302122525.19675-1-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/i915_debugfs.c | 2 +- drivers/gpu/drm/i915/i915_gem.c | 14 ++++++------ drivers/gpu/drm/i915/i915_gem_execbuffer.c | 14 ++++++++---- drivers/gpu/drm/i915/i915_gem_request.c | 34 ++++++------------------------ drivers/gpu/drm/i915/i915_gem_request.h | 4 +--- 5 files changed, 24 insertions(+), 44 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c index b84ee7a323d2..15a76096fbb0 100644 --- a/drivers/gpu/drm/i915/i915_debugfs.c +++ b/drivers/gpu/drm/i915/i915_debugfs.c @@ -506,7 +506,7 @@ static int i915_gem_object_info(struct seq_file *m, void *data) mutex_lock(&dev->struct_mutex); request = list_first_entry_or_null(&file_priv->mm.request_list, struct drm_i915_gem_request, - client_list); + client_link); rcu_read_lock(); task = pid_task(request && request->ctx->pid ? request->ctx->pid : file->pid, diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index 275e9e0799b9..edc59bd45f53 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -3666,16 +3666,14 @@ i915_gem_ring_throttle(struct drm_device *dev, struct drm_file *file) return -EIO; spin_lock(&file_priv->mm.lock); - list_for_each_entry(request, &file_priv->mm.request_list, client_list) { + list_for_each_entry(request, &file_priv->mm.request_list, client_link) { if (time_after_eq(request->emitted_jiffies, recent_enough)) break; - /* - * Note that the request might not have been submitted yet. - * In which case emitted_jiffies will be zero. - */ - if (!request->emitted_jiffies) - continue; + if (target) { + list_del(&target->client_link); + target->file_priv = NULL; + } target = request; } @@ -4734,7 +4732,7 @@ void i915_gem_release(struct drm_device *dev, struct drm_file *file) * file_priv. */ spin_lock(&file_priv->mm.lock); - list_for_each_entry(request, &file_priv->mm.request_list, client_list) + list_for_each_entry(request, &file_priv->mm.request_list, client_link) request->file_priv = NULL; spin_unlock(&file_priv->mm.lock); diff --git a/drivers/gpu/drm/i915/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/i915_gem_execbuffer.c index aa75ea2d0578..dd7181ed5eca 100644 --- a/drivers/gpu/drm/i915/i915_gem_execbuffer.c +++ b/drivers/gpu/drm/i915/i915_gem_execbuffer.c @@ -1407,6 +1407,14 @@ out: return vma; } +static void +add_to_client(struct drm_i915_gem_request *req, + struct drm_file *file) +{ + req->file_priv = file->driver_priv; + list_add_tail(&req->client_link, &req->file_priv->mm.request_list); +} + static int execbuf_submit(struct i915_execbuffer_params *params, struct drm_i915_gem_execbuffer2 *args, @@ -1772,10 +1780,6 @@ i915_gem_do_execbuffer(struct drm_device *dev, void *data, */ params->request->batch = params->batch; - ret = i915_gem_request_add_to_client(params->request, file); - if (ret) - goto err_request; - /* * Save assorted stuff away to pass through to *_submission(). * NB: This data should be 'persistent' and not local as it will @@ -1793,6 +1797,8 @@ i915_gem_do_execbuffer(struct drm_device *dev, void *data, ret = execbuf_submit(params, args, &eb->vmas); err_request: __i915_add_request(params->request, ret == 0); + add_to_client(params->request, file); + if (out_fence) { if (ret == 0) { fd_install(out_fence_fd, out_fence->file); diff --git a/drivers/gpu/drm/i915/i915_gem_request.c b/drivers/gpu/drm/i915/i915_gem_request.c index b881d4e1bd1e..c2cee9674cb0 100644 --- a/drivers/gpu/drm/i915/i915_gem_request.c +++ b/drivers/gpu/drm/i915/i915_gem_request.c @@ -82,42 +82,20 @@ const struct dma_fence_ops i915_fence_ops = { .release = i915_fence_release, }; -int i915_gem_request_add_to_client(struct drm_i915_gem_request *req, - struct drm_file *file) -{ - struct drm_i915_private *dev_private; - struct drm_i915_file_private *file_priv; - - WARN_ON(!req || !file || req->file_priv); - - if (!req || !file) - return -EINVAL; - - if (req->file_priv) - return -EINVAL; - - dev_private = req->i915; - file_priv = file->driver_priv; - - spin_lock(&file_priv->mm.lock); - req->file_priv = file_priv; - list_add_tail(&req->client_list, &file_priv->mm.request_list); - spin_unlock(&file_priv->mm.lock); - - return 0; -} - static inline void i915_gem_request_remove_from_client(struct drm_i915_gem_request *request) { - struct drm_i915_file_private *file_priv = request->file_priv; + struct drm_i915_file_private *file_priv; + file_priv = request->file_priv; if (!file_priv) return; spin_lock(&file_priv->mm.lock); - list_del(&request->client_list); - request->file_priv = NULL; + if (request->file_priv) { + list_del(&request->client_link); + request->file_priv = NULL; + } spin_unlock(&file_priv->mm.lock); } diff --git a/drivers/gpu/drm/i915/i915_gem_request.h b/drivers/gpu/drm/i915/i915_gem_request.h index 0efee879df23..5018e55922f0 100644 --- a/drivers/gpu/drm/i915/i915_gem_request.h +++ b/drivers/gpu/drm/i915/i915_gem_request.h @@ -180,7 +180,7 @@ struct drm_i915_gem_request { struct drm_i915_file_private *file_priv; /** file_priv list entry for this request */ - struct list_head client_list; + struct list_head client_link; }; extern const struct dma_fence_ops i915_fence_ops; @@ -193,8 +193,6 @@ static inline bool dma_fence_is_i915(const struct dma_fence *fence) struct drm_i915_gem_request * __must_check i915_gem_request_alloc(struct intel_engine_cs *engine, struct i915_gem_context *ctx); -int i915_gem_request_add_to_client(struct drm_i915_gem_request *req, - struct drm_file *file); void i915_gem_request_retire_upto(struct drm_i915_gem_request *req); static inline struct drm_i915_gem_request * -- cgit From 8352aea3c3f44ea9065b77d3baedc58921710d49 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Fri, 3 Mar 2017 09:00:56 +0000 Subject: drm/i915: Differentiate between hangcheck waiting for timer or scheduler Check timer_pending() as well as work_pending() to see if the timer for the hangcheck has already expired and the work is pending execution on some list somewhere. v2: Use a more compact if-chain Signed-off-by: Chris Wilson Cc: Mika Kuoppala Link: http://patchwork.freedesktop.org/patch/msgid/20170303090056.19973-1-chris@chris-wilson.co.uk Reviewed-by: Mika Kuoppala --- drivers/gpu/drm/i915/i915_debugfs.c | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c index 15a76096fbb0..70acbbf50c75 100644 --- a/drivers/gpu/drm/i915/i915_debugfs.c +++ b/drivers/gpu/drm/i915/i915_debugfs.c @@ -1354,12 +1354,14 @@ static int i915_hangcheck_info(struct seq_file *m, void *unused) intel_runtime_pm_put(dev_priv); - if (delayed_work_pending(&dev_priv->gpu_error.hangcheck_work)) { - seq_printf(m, "Hangcheck active, fires in %dms\n", + if (timer_pending(&dev_priv->gpu_error.hangcheck_work.timer)) + seq_printf(m, "Hangcheck active, timer fires in %dms\n", jiffies_to_msecs(dev_priv->gpu_error.hangcheck_work.timer.expires - jiffies)); - } else - seq_printf(m, "Hangcheck inactive\n"); + else if (delayed_work_pending(&dev_priv->gpu_error.hangcheck_work)) + seq_puts(m, "Hangcheck active, work pending\n"); + else + seq_puts(m, "Hangcheck inactive\n"); seq_printf(m, "GT active? %s\n", yesno(dev_priv->gt.awake)); -- cgit From 4cbe4b2b175c9dcfffa2311ea9dddd34be1a4f41 Mon Sep 17 00:00:00 2001 From: Ander Conselvan de Oliveira Date: Thu, 2 Mar 2017 14:58:51 +0200 Subject: drm/i915: Pass intel_crtc to fdi_link_train() hooks MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The implementation of the fdi_link_train() hooks need an intel_crtc so just pass that instead of the generic crtc type. Signed-off-by: Ander Conselvan de Oliveira Reviewed-by: Ville Syrjälä Link: http://patchwork.freedesktop.org/patch/msgid/20170302125857.14665-2-ander.conselvan.de.oliveira@intel.com --- drivers/gpu/drm/i915/i915_drv.h | 2 +- drivers/gpu/drm/i915/intel_ddi.c | 13 ++++---- drivers/gpu/drm/i915/intel_display.c | 63 +++++++++++++++++------------------- drivers/gpu/drm/i915/intel_drv.h | 2 +- 4 files changed, 37 insertions(+), 43 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index 430c40dd1622..b25c1df05145 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -669,7 +669,7 @@ struct drm_i915_display_funcs { struct intel_encoder *encoder, const struct drm_display_mode *adjusted_mode); void (*audio_codec_disable)(struct intel_encoder *encoder); - void (*fdi_link_train)(struct drm_crtc *crtc); + void (*fdi_link_train)(struct intel_crtc *crtc); void (*init_clock_gating)(struct drm_i915_private *dev_priv); int (*queue_flip)(struct drm_device *dev, struct drm_crtc *crtc, struct drm_framebuffer *fb, diff --git a/drivers/gpu/drm/i915/intel_ddi.c b/drivers/gpu/drm/i915/intel_ddi.c index a7c08d7027fa..111f6605bf67 100644 --- a/drivers/gpu/drm/i915/intel_ddi.c +++ b/drivers/gpu/drm/i915/intel_ddi.c @@ -674,15 +674,14 @@ static uint32_t hsw_pll_to_ddi_pll_sel(struct intel_shared_dpll *pll) * DDI A (which is used for eDP) */ -void hsw_fdi_link_train(struct drm_crtc *crtc) +void hsw_fdi_link_train(struct intel_crtc *crtc) { - struct drm_device *dev = crtc->dev; + struct drm_device *dev = crtc->base.dev; struct drm_i915_private *dev_priv = to_i915(dev); - struct intel_crtc *intel_crtc = to_intel_crtc(crtc); struct intel_encoder *encoder; u32 temp, i, rx_ctl_val, ddi_pll_sel; - for_each_encoder_on_crtc(dev, crtc, encoder) { + for_each_encoder_on_crtc(dev, &crtc->base, encoder) { WARN_ON(encoder->type != INTEL_OUTPUT_ANALOG); intel_prepare_dp_ddi_buffers(encoder); } @@ -701,7 +700,7 @@ void hsw_fdi_link_train(struct drm_crtc *crtc) /* Enable the PCH Receiver FDI PLL */ rx_ctl_val = dev_priv->fdi_rx_config | FDI_RX_ENHANCE_FRAME_ENABLE | FDI_RX_PLL_ENABLE | - FDI_DP_PORT_WIDTH(intel_crtc->config->fdi_lanes); + FDI_DP_PORT_WIDTH(crtc->config->fdi_lanes); I915_WRITE(FDI_RX_CTL(PIPE_A), rx_ctl_val); POSTING_READ(FDI_RX_CTL(PIPE_A)); udelay(220); @@ -711,7 +710,7 @@ void hsw_fdi_link_train(struct drm_crtc *crtc) I915_WRITE(FDI_RX_CTL(PIPE_A), rx_ctl_val); /* Configure Port Clock Select */ - ddi_pll_sel = hsw_pll_to_ddi_pll_sel(intel_crtc->config->shared_dpll); + ddi_pll_sel = hsw_pll_to_ddi_pll_sel(crtc->config->shared_dpll); I915_WRITE(PORT_CLK_SEL(PORT_E), ddi_pll_sel); WARN_ON(ddi_pll_sel != PORT_CLK_SEL_SPLL); @@ -731,7 +730,7 @@ void hsw_fdi_link_train(struct drm_crtc *crtc) * port reversal bit */ I915_WRITE(DDI_BUF_CTL(PORT_E), DDI_BUF_CTL_ENABLE | - ((intel_crtc->config->fdi_lanes - 1) << 1) | + ((crtc->config->fdi_lanes - 1) << 1) | DDI_BUF_TRANS_SELECT(i / 2)); POSTING_READ(DDI_BUF_CTL(PORT_E)); diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c index 5edf1ff8b688..f65a37dce954 100644 --- a/drivers/gpu/drm/i915/intel_display.c +++ b/drivers/gpu/drm/i915/intel_display.c @@ -3646,12 +3646,11 @@ static void intel_update_pipe_config(struct intel_crtc *crtc, } } -static void intel_fdi_normal_train(struct drm_crtc *crtc) +static void intel_fdi_normal_train(struct intel_crtc *crtc) { - struct drm_device *dev = crtc->dev; + struct drm_device *dev = crtc->base.dev; struct drm_i915_private *dev_priv = to_i915(dev); - struct intel_crtc *intel_crtc = to_intel_crtc(crtc); - int pipe = intel_crtc->pipe; + int pipe = crtc->pipe; i915_reg_t reg; u32 temp; @@ -3689,12 +3688,11 @@ static void intel_fdi_normal_train(struct drm_crtc *crtc) } /* The FDI link training functions for ILK/Ibexpeak. */ -static void ironlake_fdi_link_train(struct drm_crtc *crtc) +static void ironlake_fdi_link_train(struct intel_crtc *crtc) { - struct drm_device *dev = crtc->dev; + struct drm_device *dev = crtc->base.dev; struct drm_i915_private *dev_priv = to_i915(dev); - struct intel_crtc *intel_crtc = to_intel_crtc(crtc); - int pipe = intel_crtc->pipe; + int pipe = crtc->pipe; i915_reg_t reg; u32 temp, tries; @@ -3715,7 +3713,7 @@ static void ironlake_fdi_link_train(struct drm_crtc *crtc) reg = FDI_TX_CTL(pipe); temp = I915_READ(reg); temp &= ~FDI_DP_PORT_WIDTH_MASK; - temp |= FDI_DP_PORT_WIDTH(intel_crtc->config->fdi_lanes); + temp |= FDI_DP_PORT_WIDTH(crtc->config->fdi_lanes); temp &= ~FDI_LINK_TRAIN_NONE; temp |= FDI_LINK_TRAIN_PATTERN_1; I915_WRITE(reg, temp | FDI_TX_ENABLE); @@ -3790,12 +3788,11 @@ static const int snb_b_fdi_train_param[] = { }; /* The FDI link training functions for SNB/Cougarpoint. */ -static void gen6_fdi_link_train(struct drm_crtc *crtc) +static void gen6_fdi_link_train(struct intel_crtc *crtc) { - struct drm_device *dev = crtc->dev; + struct drm_device *dev = crtc->base.dev; struct drm_i915_private *dev_priv = to_i915(dev); - struct intel_crtc *intel_crtc = to_intel_crtc(crtc); - int pipe = intel_crtc->pipe; + int pipe = crtc->pipe; i915_reg_t reg; u32 temp, i, retry; @@ -3814,7 +3811,7 @@ static void gen6_fdi_link_train(struct drm_crtc *crtc) reg = FDI_TX_CTL(pipe); temp = I915_READ(reg); temp &= ~FDI_DP_PORT_WIDTH_MASK; - temp |= FDI_DP_PORT_WIDTH(intel_crtc->config->fdi_lanes); + temp |= FDI_DP_PORT_WIDTH(crtc->config->fdi_lanes); temp &= ~FDI_LINK_TRAIN_NONE; temp |= FDI_LINK_TRAIN_PATTERN_1; temp &= ~FDI_LINK_TRAIN_VOL_EMP_MASK; @@ -3923,12 +3920,11 @@ static void gen6_fdi_link_train(struct drm_crtc *crtc) } /* Manual link training for Ivy Bridge A0 parts */ -static void ivb_manual_fdi_link_train(struct drm_crtc *crtc) +static void ivb_manual_fdi_link_train(struct intel_crtc *crtc) { - struct drm_device *dev = crtc->dev; + struct drm_device *dev = crtc->base.dev; struct drm_i915_private *dev_priv = to_i915(dev); - struct intel_crtc *intel_crtc = to_intel_crtc(crtc); - int pipe = intel_crtc->pipe; + int pipe = crtc->pipe; i915_reg_t reg; u32 temp, i, j; @@ -3966,7 +3962,7 @@ static void ivb_manual_fdi_link_train(struct drm_crtc *crtc) reg = FDI_TX_CTL(pipe); temp = I915_READ(reg); temp &= ~FDI_DP_PORT_WIDTH_MASK; - temp |= FDI_DP_PORT_WIDTH(intel_crtc->config->fdi_lanes); + temp |= FDI_DP_PORT_WIDTH(crtc->config->fdi_lanes); temp |= FDI_LINK_TRAIN_PATTERN_1_IVB; temp &= ~FDI_LINK_TRAIN_VOL_EMP_MASK; temp |= snb_b_fdi_train_param[j/2]; @@ -4437,12 +4433,12 @@ static void ivybridge_update_fdi_bc_bifurcation(struct intel_crtc *intel_crtc) /* Return which DP Port should be selected for Transcoder DP control */ static enum port -intel_trans_dp_port_sel(struct drm_crtc *crtc) +intel_trans_dp_port_sel(struct intel_crtc *crtc) { - struct drm_device *dev = crtc->dev; + struct drm_device *dev = crtc->base.dev; struct intel_encoder *encoder; - for_each_encoder_on_crtc(dev, crtc, encoder) { + for_each_encoder_on_crtc(dev, &crtc->base, encoder) { if (encoder->type == INTEL_OUTPUT_DP || encoder->type == INTEL_OUTPUT_EDP) return enc_to_dig_port(&encoder->base)->port; @@ -4459,18 +4455,17 @@ intel_trans_dp_port_sel(struct drm_crtc *crtc) * - DP transcoding bits * - transcoder */ -static void ironlake_pch_enable(struct drm_crtc *crtc) +static void ironlake_pch_enable(struct intel_crtc *crtc) { - struct drm_device *dev = crtc->dev; + struct drm_device *dev = crtc->base.dev; struct drm_i915_private *dev_priv = to_i915(dev); - struct intel_crtc *intel_crtc = to_intel_crtc(crtc); - int pipe = intel_crtc->pipe; + int pipe = crtc->pipe; u32 temp; assert_pch_transcoder_disabled(dev_priv, pipe); if (IS_IVYBRIDGE(dev_priv)) - ivybridge_update_fdi_bc_bifurcation(intel_crtc); + ivybridge_update_fdi_bc_bifurcation(crtc); /* Write the TU size bits before fdi link training, so that error * detection works. */ @@ -4488,7 +4483,7 @@ static void ironlake_pch_enable(struct drm_crtc *crtc) temp = I915_READ(PCH_DPLL_SEL); temp |= TRANS_DPLL_ENABLE(pipe); sel = TRANS_DPLLB_SEL(pipe); - if (intel_crtc->config->shared_dpll == + if (crtc->config->shared_dpll == intel_get_shared_dpll_by_id(dev_priv, DPLL_ID_PCH_PLL_B)) temp |= sel; else @@ -4503,19 +4498,19 @@ static void ironlake_pch_enable(struct drm_crtc *crtc) * Note that enable_shared_dpll tries to do the right thing, but * get_shared_dpll unconditionally resets the pll - we need that to have * the right LVDS enable sequence. */ - intel_enable_shared_dpll(intel_crtc); + intel_enable_shared_dpll(crtc); /* set transcoder timing, panel must allow it */ assert_panel_unlocked(dev_priv, pipe); - ironlake_pch_transcoder_set_timings(intel_crtc, pipe); + ironlake_pch_transcoder_set_timings(crtc, pipe); intel_fdi_normal_train(crtc); /* For PCH DP, enable TRANS_DP_CTL */ if (HAS_PCH_CPT(dev_priv) && - intel_crtc_has_dp_encoder(intel_crtc->config)) { + intel_crtc_has_dp_encoder(crtc->config)) { const struct drm_display_mode *adjusted_mode = - &intel_crtc->config->base.adjusted_mode; + &crtc->config->base.adjusted_mode; u32 bpc = (I915_READ(PIPECONF(pipe)) & PIPECONF_BPC_MASK) >> 5; i915_reg_t reg = TRANS_DP_CTL(pipe); temp = I915_READ(reg); @@ -5289,7 +5284,7 @@ static void ironlake_crtc_enable(struct intel_crtc_state *pipe_config, intel_enable_pipe(intel_crtc); if (intel_crtc->config->has_pch_encoder) - ironlake_pch_enable(crtc); + ironlake_pch_enable(intel_crtc); assert_vblank_disabled(crtc); drm_crtc_vblank_on(crtc); @@ -5371,7 +5366,7 @@ static void haswell_crtc_enable(struct intel_crtc_state *pipe_config, intel_encoders_pre_enable(crtc, pipe_config, old_state); if (intel_crtc->config->has_pch_encoder) - dev_priv->display.fdi_link_train(crtc); + dev_priv->display.fdi_link_train(intel_crtc); if (!transcoder_is_dsi(cpu_transcoder)) intel_ddi_enable_pipe_clock(intel_crtc); diff --git a/drivers/gpu/drm/i915/intel_drv.h b/drivers/gpu/drm/i915/intel_drv.h index eadc72df5a17..9caf37170335 100644 --- a/drivers/gpu/drm/i915/intel_drv.h +++ b/drivers/gpu/drm/i915/intel_drv.h @@ -1225,7 +1225,7 @@ void intel_ddi_fdi_post_disable(struct intel_encoder *intel_encoder, struct intel_crtc_state *old_crtc_state, struct drm_connector_state *old_conn_state); void intel_prepare_dp_ddi_buffers(struct intel_encoder *encoder); -void hsw_fdi_link_train(struct drm_crtc *crtc); +void hsw_fdi_link_train(struct intel_crtc *crtc); void intel_ddi_init(struct drm_i915_private *dev_priv, enum port port); enum port intel_ddi_get_encoder_port(struct intel_encoder *intel_encoder); bool intel_ddi_get_hw_state(struct intel_encoder *encoder, enum pipe *pipe); -- cgit From 0dcdc382c3823e959e9253f2f0c4e436e7ef9a32 Mon Sep 17 00:00:00 2001 From: Ander Conselvan de Oliveira Date: Thu, 2 Mar 2017 14:58:52 +0200 Subject: drm/i915: Pass intel_crtc to intel_lpt_pch_enable() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The function intel_lpt_pch_enable() needs an intel_crtc so pass that instead of the generic crtc type. Signed-off-by: Ander Conselvan de Oliveira Reviewed-by: Ville Syrjälä Link: http://patchwork.freedesktop.org/patch/msgid/20170302125857.14665-3-ander.conselvan.de.oliveira@intel.com --- drivers/gpu/drm/i915/intel_display.c | 18 ++++++++---------- 1 file changed, 8 insertions(+), 10 deletions(-) diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c index f65a37dce954..954210b974c1 100644 --- a/drivers/gpu/drm/i915/intel_display.c +++ b/drivers/gpu/drm/i915/intel_display.c @@ -4249,10 +4249,10 @@ void lpt_disable_iclkip(struct drm_i915_private *dev_priv) } /* Program iCLKIP clock to the desired frequency */ -static void lpt_program_iclkip(struct drm_crtc *crtc) +static void lpt_program_iclkip(struct intel_crtc *crtc) { - struct drm_i915_private *dev_priv = to_i915(crtc->dev); - int clock = to_intel_crtc(crtc)->config->base.adjusted_mode.crtc_clock; + struct drm_i915_private *dev_priv = to_i915(crtc->base.dev); + int clock = crtc->config->base.adjusted_mode.crtc_clock; u32 divsel, phaseinc, auxdiv, phasedir = 0; u32 temp; @@ -4545,19 +4545,17 @@ static void ironlake_pch_enable(struct intel_crtc *crtc) ironlake_enable_pch_transcoder(dev_priv, pipe); } -static void lpt_pch_enable(struct drm_crtc *crtc) +static void lpt_pch_enable(struct intel_crtc *crtc) { - struct drm_device *dev = crtc->dev; - struct drm_i915_private *dev_priv = to_i915(dev); - struct intel_crtc *intel_crtc = to_intel_crtc(crtc); - enum transcoder cpu_transcoder = intel_crtc->config->cpu_transcoder; + struct drm_i915_private *dev_priv = to_i915(crtc->base.dev); + enum transcoder cpu_transcoder = crtc->config->cpu_transcoder; assert_pch_transcoder_disabled(dev_priv, TRANSCODER_A); lpt_program_iclkip(crtc); /* Set transcoder timing. */ - ironlake_pch_transcoder_set_timings(intel_crtc, PIPE_A); + ironlake_pch_transcoder_set_timings(crtc, PIPE_A); lpt_enable_pch_transcoder(dev_priv, cpu_transcoder); } @@ -5394,7 +5392,7 @@ static void haswell_crtc_enable(struct intel_crtc_state *pipe_config, intel_enable_pipe(intel_crtc); if (intel_crtc->config->has_pch_encoder) - lpt_pch_enable(crtc); + lpt_pch_enable(intel_crtc); if (intel_crtc_has_type(intel_crtc->config, INTEL_OUTPUT_DP_MST)) intel_ddi_set_vc_payload_alloc(crtc, true); -- cgit From 2ce42273248a8b0fe7a819cb2624896ef1c60fe1 Mon Sep 17 00:00:00 2001 From: Ander Conselvan de Oliveira Date: Thu, 2 Mar 2017 14:58:53 +0200 Subject: drm/i915: Pass pipe_config to pch_enable() functions MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Using crtc->config directly is being removed in favor of passing a pipe_config. Follow the trend and pass pipe_config to pch_enable() functions. v2: s/pipe_config/crtc_state/ (Ville) - constify crtc_state. (Ville) - take crtc from crtc_state. (Ville) Signed-off-by: Ander Conselvan de Oliveira Reviewed-by: Ville Syrjälä Link: http://patchwork.freedesktop.org/patch/msgid/20170302125857.14665-4-ander.conselvan.de.oliveira@intel.com --- drivers/gpu/drm/i915/intel_display.c | 18 ++++++++++-------- 1 file changed, 10 insertions(+), 8 deletions(-) diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c index 954210b974c1..5640f66f3a60 100644 --- a/drivers/gpu/drm/i915/intel_display.c +++ b/drivers/gpu/drm/i915/intel_display.c @@ -4455,8 +4455,9 @@ intel_trans_dp_port_sel(struct intel_crtc *crtc) * - DP transcoding bits * - transcoder */ -static void ironlake_pch_enable(struct intel_crtc *crtc) +static void ironlake_pch_enable(const struct intel_crtc_state *crtc_state) { + struct intel_crtc *crtc = to_intel_crtc(crtc_state->base.crtc); struct drm_device *dev = crtc->base.dev; struct drm_i915_private *dev_priv = to_i915(dev); int pipe = crtc->pipe; @@ -4483,7 +4484,7 @@ static void ironlake_pch_enable(struct intel_crtc *crtc) temp = I915_READ(PCH_DPLL_SEL); temp |= TRANS_DPLL_ENABLE(pipe); sel = TRANS_DPLLB_SEL(pipe); - if (crtc->config->shared_dpll == + if (crtc_state->shared_dpll == intel_get_shared_dpll_by_id(dev_priv, DPLL_ID_PCH_PLL_B)) temp |= sel; else @@ -4508,9 +4509,9 @@ static void ironlake_pch_enable(struct intel_crtc *crtc) /* For PCH DP, enable TRANS_DP_CTL */ if (HAS_PCH_CPT(dev_priv) && - intel_crtc_has_dp_encoder(crtc->config)) { + intel_crtc_has_dp_encoder(crtc_state)) { const struct drm_display_mode *adjusted_mode = - &crtc->config->base.adjusted_mode; + &crtc_state->base.adjusted_mode; u32 bpc = (I915_READ(PIPECONF(pipe)) & PIPECONF_BPC_MASK) >> 5; i915_reg_t reg = TRANS_DP_CTL(pipe); temp = I915_READ(reg); @@ -4545,10 +4546,11 @@ static void ironlake_pch_enable(struct intel_crtc *crtc) ironlake_enable_pch_transcoder(dev_priv, pipe); } -static void lpt_pch_enable(struct intel_crtc *crtc) +static void lpt_pch_enable(const struct intel_crtc_state *crtc_state) { + struct intel_crtc *crtc = to_intel_crtc(crtc_state->base.crtc); struct drm_i915_private *dev_priv = to_i915(crtc->base.dev); - enum transcoder cpu_transcoder = crtc->config->cpu_transcoder; + enum transcoder cpu_transcoder = crtc_state->cpu_transcoder; assert_pch_transcoder_disabled(dev_priv, TRANSCODER_A); @@ -5282,7 +5284,7 @@ static void ironlake_crtc_enable(struct intel_crtc_state *pipe_config, intel_enable_pipe(intel_crtc); if (intel_crtc->config->has_pch_encoder) - ironlake_pch_enable(intel_crtc); + ironlake_pch_enable(pipe_config); assert_vblank_disabled(crtc); drm_crtc_vblank_on(crtc); @@ -5392,7 +5394,7 @@ static void haswell_crtc_enable(struct intel_crtc_state *pipe_config, intel_enable_pipe(intel_crtc); if (intel_crtc->config->has_pch_encoder) - lpt_pch_enable(intel_crtc); + lpt_pch_enable(pipe_config); if (intel_crtc_has_type(intel_crtc->config, INTEL_OUTPUT_DP_MST)) intel_ddi_set_vc_payload_alloc(crtc, true); -- cgit From dc4a109474c6bffacaf754440fa88ff0dd957667 Mon Sep 17 00:00:00 2001 From: Ander Conselvan de Oliveira Date: Thu, 2 Mar 2017 14:58:54 +0200 Subject: drm/i915: Pass pipe_config to fdi_link_train() functions MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit It is preferred to pass pipe_config to functions instead of accessing crtc->config directly. Follow suit and pass pipe_config to the fdi link train functions. v2: Add const; s/pipe_config/crtc_state/ (Ville) Signed-off-by: Ander Conselvan de Oliveira Reviewed-by: Ville Syrjälä Link: http://patchwork.freedesktop.org/patch/msgid/20170302125857.14665-5-ander.conselvan.de.oliveira@intel.com --- drivers/gpu/drm/i915/i915_drv.h | 3 ++- drivers/gpu/drm/i915/intel_ddi.c | 9 +++++---- drivers/gpu/drm/i915/intel_display.c | 19 +++++++++++-------- drivers/gpu/drm/i915/intel_drv.h | 3 ++- 4 files changed, 20 insertions(+), 14 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index b25c1df05145..07bf2e32ffa7 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -669,7 +669,8 @@ struct drm_i915_display_funcs { struct intel_encoder *encoder, const struct drm_display_mode *adjusted_mode); void (*audio_codec_disable)(struct intel_encoder *encoder); - void (*fdi_link_train)(struct intel_crtc *crtc); + void (*fdi_link_train)(struct intel_crtc *crtc, + const struct intel_crtc_state *crtc_state); void (*init_clock_gating)(struct drm_i915_private *dev_priv); int (*queue_flip)(struct drm_device *dev, struct drm_crtc *crtc, struct drm_framebuffer *fb, diff --git a/drivers/gpu/drm/i915/intel_ddi.c b/drivers/gpu/drm/i915/intel_ddi.c index 111f6605bf67..a320378681b3 100644 --- a/drivers/gpu/drm/i915/intel_ddi.c +++ b/drivers/gpu/drm/i915/intel_ddi.c @@ -674,7 +674,8 @@ static uint32_t hsw_pll_to_ddi_pll_sel(struct intel_shared_dpll *pll) * DDI A (which is used for eDP) */ -void hsw_fdi_link_train(struct intel_crtc *crtc) +void hsw_fdi_link_train(struct intel_crtc *crtc, + const struct intel_crtc_state *crtc_state) { struct drm_device *dev = crtc->base.dev; struct drm_i915_private *dev_priv = to_i915(dev); @@ -700,7 +701,7 @@ void hsw_fdi_link_train(struct intel_crtc *crtc) /* Enable the PCH Receiver FDI PLL */ rx_ctl_val = dev_priv->fdi_rx_config | FDI_RX_ENHANCE_FRAME_ENABLE | FDI_RX_PLL_ENABLE | - FDI_DP_PORT_WIDTH(crtc->config->fdi_lanes); + FDI_DP_PORT_WIDTH(crtc_state->fdi_lanes); I915_WRITE(FDI_RX_CTL(PIPE_A), rx_ctl_val); POSTING_READ(FDI_RX_CTL(PIPE_A)); udelay(220); @@ -710,7 +711,7 @@ void hsw_fdi_link_train(struct intel_crtc *crtc) I915_WRITE(FDI_RX_CTL(PIPE_A), rx_ctl_val); /* Configure Port Clock Select */ - ddi_pll_sel = hsw_pll_to_ddi_pll_sel(crtc->config->shared_dpll); + ddi_pll_sel = hsw_pll_to_ddi_pll_sel(crtc_state->shared_dpll); I915_WRITE(PORT_CLK_SEL(PORT_E), ddi_pll_sel); WARN_ON(ddi_pll_sel != PORT_CLK_SEL_SPLL); @@ -730,7 +731,7 @@ void hsw_fdi_link_train(struct intel_crtc *crtc) * port reversal bit */ I915_WRITE(DDI_BUF_CTL(PORT_E), DDI_BUF_CTL_ENABLE | - ((crtc->config->fdi_lanes - 1) << 1) | + ((crtc_state->fdi_lanes - 1) << 1) | DDI_BUF_TRANS_SELECT(i / 2)); POSTING_READ(DDI_BUF_CTL(PORT_E)); diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c index 5640f66f3a60..649251c01639 100644 --- a/drivers/gpu/drm/i915/intel_display.c +++ b/drivers/gpu/drm/i915/intel_display.c @@ -3688,7 +3688,8 @@ static void intel_fdi_normal_train(struct intel_crtc *crtc) } /* The FDI link training functions for ILK/Ibexpeak. */ -static void ironlake_fdi_link_train(struct intel_crtc *crtc) +static void ironlake_fdi_link_train(struct intel_crtc *crtc, + const struct intel_crtc_state *crtc_state) { struct drm_device *dev = crtc->base.dev; struct drm_i915_private *dev_priv = to_i915(dev); @@ -3713,7 +3714,7 @@ static void ironlake_fdi_link_train(struct intel_crtc *crtc) reg = FDI_TX_CTL(pipe); temp = I915_READ(reg); temp &= ~FDI_DP_PORT_WIDTH_MASK; - temp |= FDI_DP_PORT_WIDTH(crtc->config->fdi_lanes); + temp |= FDI_DP_PORT_WIDTH(crtc_state->fdi_lanes); temp &= ~FDI_LINK_TRAIN_NONE; temp |= FDI_LINK_TRAIN_PATTERN_1; I915_WRITE(reg, temp | FDI_TX_ENABLE); @@ -3788,7 +3789,8 @@ static const int snb_b_fdi_train_param[] = { }; /* The FDI link training functions for SNB/Cougarpoint. */ -static void gen6_fdi_link_train(struct intel_crtc *crtc) +static void gen6_fdi_link_train(struct intel_crtc *crtc, + const struct intel_crtc_state *crtc_state) { struct drm_device *dev = crtc->base.dev; struct drm_i915_private *dev_priv = to_i915(dev); @@ -3811,7 +3813,7 @@ static void gen6_fdi_link_train(struct intel_crtc *crtc) reg = FDI_TX_CTL(pipe); temp = I915_READ(reg); temp &= ~FDI_DP_PORT_WIDTH_MASK; - temp |= FDI_DP_PORT_WIDTH(crtc->config->fdi_lanes); + temp |= FDI_DP_PORT_WIDTH(crtc_state->fdi_lanes); temp &= ~FDI_LINK_TRAIN_NONE; temp |= FDI_LINK_TRAIN_PATTERN_1; temp &= ~FDI_LINK_TRAIN_VOL_EMP_MASK; @@ -3920,7 +3922,8 @@ static void gen6_fdi_link_train(struct intel_crtc *crtc) } /* Manual link training for Ivy Bridge A0 parts */ -static void ivb_manual_fdi_link_train(struct intel_crtc *crtc) +static void ivb_manual_fdi_link_train(struct intel_crtc *crtc, + const struct intel_crtc_state *crtc_state) { struct drm_device *dev = crtc->base.dev; struct drm_i915_private *dev_priv = to_i915(dev); @@ -3962,7 +3965,7 @@ static void ivb_manual_fdi_link_train(struct intel_crtc *crtc) reg = FDI_TX_CTL(pipe); temp = I915_READ(reg); temp &= ~FDI_DP_PORT_WIDTH_MASK; - temp |= FDI_DP_PORT_WIDTH(crtc->config->fdi_lanes); + temp |= FDI_DP_PORT_WIDTH(crtc_state->fdi_lanes); temp |= FDI_LINK_TRAIN_PATTERN_1_IVB; temp &= ~FDI_LINK_TRAIN_VOL_EMP_MASK; temp |= snb_b_fdi_train_param[j/2]; @@ -4474,7 +4477,7 @@ static void ironlake_pch_enable(const struct intel_crtc_state *crtc_state) I915_READ(PIPE_DATA_M1(pipe)) & TU_SIZE_MASK); /* For PCH output, training FDI link */ - dev_priv->display.fdi_link_train(crtc); + dev_priv->display.fdi_link_train(crtc, crtc_state); /* We need to program the right clock selection before writing the pixel * mutliplier into the DPLL. */ @@ -5366,7 +5369,7 @@ static void haswell_crtc_enable(struct intel_crtc_state *pipe_config, intel_encoders_pre_enable(crtc, pipe_config, old_state); if (intel_crtc->config->has_pch_encoder) - dev_priv->display.fdi_link_train(intel_crtc); + dev_priv->display.fdi_link_train(intel_crtc, pipe_config); if (!transcoder_is_dsi(cpu_transcoder)) intel_ddi_enable_pipe_clock(intel_crtc); diff --git a/drivers/gpu/drm/i915/intel_drv.h b/drivers/gpu/drm/i915/intel_drv.h index 9caf37170335..b3ded830c1b3 100644 --- a/drivers/gpu/drm/i915/intel_drv.h +++ b/drivers/gpu/drm/i915/intel_drv.h @@ -1225,7 +1225,8 @@ void intel_ddi_fdi_post_disable(struct intel_encoder *intel_encoder, struct intel_crtc_state *old_crtc_state, struct drm_connector_state *old_conn_state); void intel_prepare_dp_ddi_buffers(struct intel_encoder *encoder); -void hsw_fdi_link_train(struct intel_crtc *crtc); +void hsw_fdi_link_train(struct intel_crtc *crtc, + const struct intel_crtc_state *crtc_state); void intel_ddi_init(struct drm_i915_private *dev_priv, enum port port); enum port intel_ddi_get_encoder_port(struct intel_encoder *intel_encoder); bool intel_ddi_get_hw_state(struct intel_encoder *encoder, enum pipe *pipe); -- cgit From e9ce1a625fcae9d9dd64a877535c8d10ddfb89a0 Mon Sep 17 00:00:00 2001 From: Ander Conselvan de Oliveira Date: Thu, 2 Mar 2017 14:58:55 +0200 Subject: drm/i915: Pass intel_crtc to DDI functions called from crtc en/disable MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Pass intel_crtc to functions intel_ddi_enable_transcoder_func(), intel_ddi_set_pipe_settings() and intel_ddi_set_vc_payload_alloc(), instead of the generic crtc type. By changing the functions intel_ddi_get_crtc_encoder() so that it receives an intel_crtc parameter, there is no need for the drm_crtc in the callers. Signed-off-by: Ander Conselvan de Oliveira Reviewed-by: Ville Syrjälä Link: http://patchwork.freedesktop.org/patch/msgid/20170302125857.14665-6-ander.conselvan.de.oliveira@intel.com --- drivers/gpu/drm/i915/intel_ddi.c | 62 ++++++++++++++++-------------------- drivers/gpu/drm/i915/intel_display.c | 8 ++--- drivers/gpu/drm/i915/intel_drv.h | 6 ++-- 3 files changed, 34 insertions(+), 42 deletions(-) diff --git a/drivers/gpu/drm/i915/intel_ddi.c b/drivers/gpu/drm/i915/intel_ddi.c index a320378681b3..0db526ec2f44 100644 --- a/drivers/gpu/drm/i915/intel_ddi.c +++ b/drivers/gpu/drm/i915/intel_ddi.c @@ -818,21 +818,20 @@ void intel_ddi_init_dp_buf_reg(struct intel_encoder *encoder) } static struct intel_encoder * -intel_ddi_get_crtc_encoder(struct drm_crtc *crtc) +intel_ddi_get_crtc_encoder(struct intel_crtc *crtc) { - struct drm_device *dev = crtc->dev; - struct intel_crtc *intel_crtc = to_intel_crtc(crtc); + struct drm_device *dev = crtc->base.dev; struct intel_encoder *intel_encoder, *ret = NULL; int num_encoders = 0; - for_each_encoder_on_crtc(dev, crtc, intel_encoder) { + for_each_encoder_on_crtc(dev, &crtc->base, intel_encoder) { ret = intel_encoder; num_encoders++; } if (num_encoders != 1) WARN(1, "%d encoders on crtc for pipe %c\n", num_encoders, - pipe_name(intel_crtc->pipe)); + pipe_name(crtc->pipe)); BUG_ON(ret == NULL); return ret; @@ -1194,12 +1193,11 @@ bool intel_ddi_pll_select(struct intel_crtc *intel_crtc, intel_encoder); } -void intel_ddi_set_pipe_settings(struct drm_crtc *crtc) +void intel_ddi_set_pipe_settings(struct intel_crtc *crtc) { - struct drm_i915_private *dev_priv = to_i915(crtc->dev); - struct intel_crtc *intel_crtc = to_intel_crtc(crtc); + struct drm_i915_private *dev_priv = to_i915(crtc->base.dev); struct intel_encoder *intel_encoder = intel_ddi_get_crtc_encoder(crtc); - enum transcoder cpu_transcoder = intel_crtc->config->cpu_transcoder; + enum transcoder cpu_transcoder = crtc->config->cpu_transcoder; int type = intel_encoder->type; uint32_t temp; @@ -1207,7 +1205,7 @@ void intel_ddi_set_pipe_settings(struct drm_crtc *crtc) WARN_ON(transcoder_is_dsi(cpu_transcoder)); temp = TRANS_MSA_SYNC_CLK; - switch (intel_crtc->config->pipe_bpp) { + switch (crtc->config->pipe_bpp) { case 18: temp |= TRANS_MSA_6_BPC; break; @@ -1227,12 +1225,10 @@ void intel_ddi_set_pipe_settings(struct drm_crtc *crtc) } } -void intel_ddi_set_vc_payload_alloc(struct drm_crtc *crtc, bool state) +void intel_ddi_set_vc_payload_alloc(struct intel_crtc *crtc, bool state) { - struct intel_crtc *intel_crtc = to_intel_crtc(crtc); - struct drm_device *dev = crtc->dev; - struct drm_i915_private *dev_priv = to_i915(dev); - enum transcoder cpu_transcoder = intel_crtc->config->cpu_transcoder; + struct drm_i915_private *dev_priv = to_i915(crtc->base.dev); + enum transcoder cpu_transcoder = crtc->config->cpu_transcoder; uint32_t temp; temp = I915_READ(TRANS_DDI_FUNC_CTL(cpu_transcoder)); if (state == true) @@ -1242,14 +1238,12 @@ void intel_ddi_set_vc_payload_alloc(struct drm_crtc *crtc, bool state) I915_WRITE(TRANS_DDI_FUNC_CTL(cpu_transcoder), temp); } -void intel_ddi_enable_transcoder_func(struct drm_crtc *crtc) +void intel_ddi_enable_transcoder_func(struct intel_crtc *crtc) { - struct intel_crtc *intel_crtc = to_intel_crtc(crtc); struct intel_encoder *intel_encoder = intel_ddi_get_crtc_encoder(crtc); - struct drm_device *dev = crtc->dev; - struct drm_i915_private *dev_priv = to_i915(dev); - enum pipe pipe = intel_crtc->pipe; - enum transcoder cpu_transcoder = intel_crtc->config->cpu_transcoder; + struct drm_i915_private *dev_priv = to_i915(crtc->base.dev); + enum pipe pipe = crtc->pipe; + enum transcoder cpu_transcoder = crtc->config->cpu_transcoder; enum port port = intel_ddi_get_encoder_port(intel_encoder); int type = intel_encoder->type; uint32_t temp; @@ -1258,7 +1252,7 @@ void intel_ddi_enable_transcoder_func(struct drm_crtc *crtc) temp = TRANS_DDI_FUNC_ENABLE; temp |= TRANS_DDI_SELECT_PORT(port); - switch (intel_crtc->config->pipe_bpp) { + switch (crtc->config->pipe_bpp) { case 18: temp |= TRANS_DDI_BPC_6; break; @@ -1275,9 +1269,9 @@ void intel_ddi_enable_transcoder_func(struct drm_crtc *crtc) BUG(); } - if (intel_crtc->config->base.adjusted_mode.flags & DRM_MODE_FLAG_PVSYNC) + if (crtc->config->base.adjusted_mode.flags & DRM_MODE_FLAG_PVSYNC) temp |= TRANS_DDI_PVSYNC; - if (intel_crtc->config->base.adjusted_mode.flags & DRM_MODE_FLAG_PHSYNC) + if (crtc->config->base.adjusted_mode.flags & DRM_MODE_FLAG_PHSYNC) temp |= TRANS_DDI_PHSYNC; if (cpu_transcoder == TRANSCODER_EDP) { @@ -1288,8 +1282,8 @@ void intel_ddi_enable_transcoder_func(struct drm_crtc *crtc) * using motion blur mitigation (which we don't * support). */ if (IS_HASWELL(dev_priv) && - (intel_crtc->config->pch_pfit.enabled || - intel_crtc->config->pch_pfit.force_thru)) + (crtc->config->pch_pfit.enabled || + crtc->config->pch_pfit.force_thru)) temp |= TRANS_DDI_EDP_INPUT_A_ONOFF; else temp |= TRANS_DDI_EDP_INPUT_A_ON; @@ -1307,20 +1301,20 @@ void intel_ddi_enable_transcoder_func(struct drm_crtc *crtc) } if (type == INTEL_OUTPUT_HDMI) { - if (intel_crtc->config->has_hdmi_sink) + if (crtc->config->has_hdmi_sink) temp |= TRANS_DDI_MODE_SELECT_HDMI; else temp |= TRANS_DDI_MODE_SELECT_DVI; } else if (type == INTEL_OUTPUT_ANALOG) { temp |= TRANS_DDI_MODE_SELECT_FDI; - temp |= (intel_crtc->config->fdi_lanes - 1) << 1; + temp |= (crtc->config->fdi_lanes - 1) << 1; } else if (type == INTEL_OUTPUT_DP || type == INTEL_OUTPUT_EDP) { temp |= TRANS_DDI_MODE_SELECT_DP_SST; - temp |= DDI_PORT_WIDTH(intel_crtc->config->lane_count); + temp |= DDI_PORT_WIDTH(crtc->config->lane_count); } else if (type == INTEL_OUTPUT_DP_MST) { temp |= TRANS_DDI_MODE_SELECT_DP_MST; - temp |= DDI_PORT_WIDTH(intel_crtc->config->lane_count); + temp |= DDI_PORT_WIDTH(crtc->config->lane_count); } else { WARN(1, "Invalid encoder type %d for pipe %c\n", intel_encoder->type, pipe_name(pipe)); @@ -1484,14 +1478,12 @@ static u64 intel_ddi_get_power_domains(struct intel_encoder *encoder) return 0; } -void intel_ddi_enable_pipe_clock(struct intel_crtc *intel_crtc) +void intel_ddi_enable_pipe_clock(struct intel_crtc *crtc) { - struct drm_crtc *crtc = &intel_crtc->base; - struct drm_device *dev = crtc->dev; - struct drm_i915_private *dev_priv = to_i915(dev); + struct drm_i915_private *dev_priv = to_i915(crtc->base.dev); struct intel_encoder *intel_encoder = intel_ddi_get_crtc_encoder(crtc); enum port port = intel_ddi_get_encoder_port(intel_encoder); - enum transcoder cpu_transcoder = intel_crtc->config->cpu_transcoder; + enum transcoder cpu_transcoder = crtc->config->cpu_transcoder; if (cpu_transcoder != TRANSCODER_EDP) I915_WRITE(TRANS_CLK_SEL(cpu_transcoder), diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c index 649251c01639..ebaf9655cbd6 100644 --- a/drivers/gpu/drm/i915/intel_display.c +++ b/drivers/gpu/drm/i915/intel_display.c @@ -5385,9 +5385,9 @@ static void haswell_crtc_enable(struct intel_crtc_state *pipe_config, */ intel_color_load_luts(&pipe_config->base); - intel_ddi_set_pipe_settings(crtc); + intel_ddi_set_pipe_settings(intel_crtc); if (!transcoder_is_dsi(cpu_transcoder)) - intel_ddi_enable_transcoder_func(crtc); + intel_ddi_enable_transcoder_func(intel_crtc); if (dev_priv->display.initial_watermarks != NULL) dev_priv->display.initial_watermarks(old_intel_state, pipe_config); @@ -5400,7 +5400,7 @@ static void haswell_crtc_enable(struct intel_crtc_state *pipe_config, lpt_pch_enable(pipe_config); if (intel_crtc_has_type(intel_crtc->config, INTEL_OUTPUT_DP_MST)) - intel_ddi_set_vc_payload_alloc(crtc, true); + intel_ddi_set_vc_payload_alloc(intel_crtc, true); assert_vblank_disabled(crtc); drm_crtc_vblank_on(crtc); @@ -5522,7 +5522,7 @@ static void haswell_crtc_disable(struct intel_crtc_state *old_crtc_state, intel_disable_pipe(intel_crtc); if (intel_crtc_has_type(intel_crtc->config, INTEL_OUTPUT_DP_MST)) - intel_ddi_set_vc_payload_alloc(crtc, false); + intel_ddi_set_vc_payload_alloc(intel_crtc, false); if (!transcoder_is_dsi(cpu_transcoder)) intel_ddi_disable_transcoder_func(dev_priv, cpu_transcoder); diff --git a/drivers/gpu/drm/i915/intel_drv.h b/drivers/gpu/drm/i915/intel_drv.h index b3ded830c1b3..4cb1f57ad6ba 100644 --- a/drivers/gpu/drm/i915/intel_drv.h +++ b/drivers/gpu/drm/i915/intel_drv.h @@ -1230,14 +1230,14 @@ void hsw_fdi_link_train(struct intel_crtc *crtc, void intel_ddi_init(struct drm_i915_private *dev_priv, enum port port); enum port intel_ddi_get_encoder_port(struct intel_encoder *intel_encoder); bool intel_ddi_get_hw_state(struct intel_encoder *encoder, enum pipe *pipe); -void intel_ddi_enable_transcoder_func(struct drm_crtc *crtc); +void intel_ddi_enable_transcoder_func(struct intel_crtc *crtc); void intel_ddi_disable_transcoder_func(struct drm_i915_private *dev_priv, enum transcoder cpu_transcoder); void intel_ddi_enable_pipe_clock(struct intel_crtc *intel_crtc); void intel_ddi_disable_pipe_clock(struct intel_crtc *intel_crtc); bool intel_ddi_pll_select(struct intel_crtc *crtc, struct intel_crtc_state *crtc_state); -void intel_ddi_set_pipe_settings(struct drm_crtc *crtc); +void intel_ddi_set_pipe_settings(struct intel_crtc *crtc); void intel_ddi_prepare_link_retrain(struct intel_dp *intel_dp); bool intel_ddi_connector_get_hw_state(struct intel_connector *intel_connector); bool intel_ddi_is_audio_enabled(struct drm_i915_private *dev_priv, @@ -1250,7 +1250,7 @@ intel_ddi_get_crtc_new_encoder(struct intel_crtc_state *crtc_state); void intel_ddi_init_dp_buf_reg(struct intel_encoder *encoder); void intel_ddi_clock_get(struct intel_encoder *encoder, struct intel_crtc_state *pipe_config); -void intel_ddi_set_vc_payload_alloc(struct drm_crtc *crtc, bool state); +void intel_ddi_set_vc_payload_alloc(struct intel_crtc *crtc, bool state); uint32_t ddi_signal_levels(struct intel_dp *intel_dp); u8 intel_ddi_dp_voltage_max(struct intel_encoder *encoder); -- cgit From 3dc38eea665f383c84cc8d858b9a7645c0b29c54 Mon Sep 17 00:00:00 2001 From: Ander Conselvan de Oliveira Date: Thu, 2 Mar 2017 14:58:56 +0200 Subject: drm/i915: Remove direct usages of intel_crtc->config from DDI code MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Remove direct usages of intel_crtc->config from the DDI code. Functions that didn't yet take a pipe_config as an argument were coverted to do so. v2: s/pipe_config/const crtc_state/ (Ville) - take crtc from crtc_state. (Ville) Signed-off-by: Ander Conselvan de Oliveira Reviewed-by: Ville Syrjälä Link: http://patchwork.freedesktop.org/patch/msgid/20170302125857.14665-7-ander.conselvan.de.oliveira@intel.com --- drivers/gpu/drm/i915/intel_ddi.c | 62 +++++++++++++++++++----------------- drivers/gpu/drm/i915/intel_display.c | 12 +++---- drivers/gpu/drm/i915/intel_drv.h | 11 ++++--- 3 files changed, 44 insertions(+), 41 deletions(-) diff --git a/drivers/gpu/drm/i915/intel_ddi.c b/drivers/gpu/drm/i915/intel_ddi.c index 0db526ec2f44..ebccd4863d7f 100644 --- a/drivers/gpu/drm/i915/intel_ddi.c +++ b/drivers/gpu/drm/i915/intel_ddi.c @@ -1193,11 +1193,12 @@ bool intel_ddi_pll_select(struct intel_crtc *intel_crtc, intel_encoder); } -void intel_ddi_set_pipe_settings(struct intel_crtc *crtc) +void intel_ddi_set_pipe_settings(const struct intel_crtc_state *crtc_state) { + struct intel_crtc *crtc = to_intel_crtc(crtc_state->base.crtc); struct drm_i915_private *dev_priv = to_i915(crtc->base.dev); struct intel_encoder *intel_encoder = intel_ddi_get_crtc_encoder(crtc); - enum transcoder cpu_transcoder = crtc->config->cpu_transcoder; + enum transcoder cpu_transcoder = crtc_state->cpu_transcoder; int type = intel_encoder->type; uint32_t temp; @@ -1205,7 +1206,7 @@ void intel_ddi_set_pipe_settings(struct intel_crtc *crtc) WARN_ON(transcoder_is_dsi(cpu_transcoder)); temp = TRANS_MSA_SYNC_CLK; - switch (crtc->config->pipe_bpp) { + switch (crtc_state->pipe_bpp) { case 18: temp |= TRANS_MSA_6_BPC; break; @@ -1225,10 +1226,12 @@ void intel_ddi_set_pipe_settings(struct intel_crtc *crtc) } } -void intel_ddi_set_vc_payload_alloc(struct intel_crtc *crtc, bool state) +void intel_ddi_set_vc_payload_alloc(const struct intel_crtc_state *crtc_state, + bool state) { + struct intel_crtc *crtc = to_intel_crtc(crtc_state->base.crtc); struct drm_i915_private *dev_priv = to_i915(crtc->base.dev); - enum transcoder cpu_transcoder = crtc->config->cpu_transcoder; + enum transcoder cpu_transcoder = crtc_state->cpu_transcoder; uint32_t temp; temp = I915_READ(TRANS_DDI_FUNC_CTL(cpu_transcoder)); if (state == true) @@ -1238,12 +1241,13 @@ void intel_ddi_set_vc_payload_alloc(struct intel_crtc *crtc, bool state) I915_WRITE(TRANS_DDI_FUNC_CTL(cpu_transcoder), temp); } -void intel_ddi_enable_transcoder_func(struct intel_crtc *crtc) +void intel_ddi_enable_transcoder_func(const struct intel_crtc_state *crtc_state) { + struct intel_crtc *crtc = to_intel_crtc(crtc_state->base.crtc); struct intel_encoder *intel_encoder = intel_ddi_get_crtc_encoder(crtc); struct drm_i915_private *dev_priv = to_i915(crtc->base.dev); enum pipe pipe = crtc->pipe; - enum transcoder cpu_transcoder = crtc->config->cpu_transcoder; + enum transcoder cpu_transcoder = crtc_state->cpu_transcoder; enum port port = intel_ddi_get_encoder_port(intel_encoder); int type = intel_encoder->type; uint32_t temp; @@ -1252,7 +1256,7 @@ void intel_ddi_enable_transcoder_func(struct intel_crtc *crtc) temp = TRANS_DDI_FUNC_ENABLE; temp |= TRANS_DDI_SELECT_PORT(port); - switch (crtc->config->pipe_bpp) { + switch (crtc_state->pipe_bpp) { case 18: temp |= TRANS_DDI_BPC_6; break; @@ -1269,9 +1273,9 @@ void intel_ddi_enable_transcoder_func(struct intel_crtc *crtc) BUG(); } - if (crtc->config->base.adjusted_mode.flags & DRM_MODE_FLAG_PVSYNC) + if (crtc_state->base.adjusted_mode.flags & DRM_MODE_FLAG_PVSYNC) temp |= TRANS_DDI_PVSYNC; - if (crtc->config->base.adjusted_mode.flags & DRM_MODE_FLAG_PHSYNC) + if (crtc_state->base.adjusted_mode.flags & DRM_MODE_FLAG_PHSYNC) temp |= TRANS_DDI_PHSYNC; if (cpu_transcoder == TRANSCODER_EDP) { @@ -1282,8 +1286,8 @@ void intel_ddi_enable_transcoder_func(struct intel_crtc *crtc) * using motion blur mitigation (which we don't * support). */ if (IS_HASWELL(dev_priv) && - (crtc->config->pch_pfit.enabled || - crtc->config->pch_pfit.force_thru)) + (crtc_state->pch_pfit.enabled || + crtc_state->pch_pfit.force_thru)) temp |= TRANS_DDI_EDP_INPUT_A_ONOFF; else temp |= TRANS_DDI_EDP_INPUT_A_ON; @@ -1301,20 +1305,20 @@ void intel_ddi_enable_transcoder_func(struct intel_crtc *crtc) } if (type == INTEL_OUTPUT_HDMI) { - if (crtc->config->has_hdmi_sink) + if (crtc_state->has_hdmi_sink) temp |= TRANS_DDI_MODE_SELECT_HDMI; else temp |= TRANS_DDI_MODE_SELECT_DVI; } else if (type == INTEL_OUTPUT_ANALOG) { temp |= TRANS_DDI_MODE_SELECT_FDI; - temp |= (crtc->config->fdi_lanes - 1) << 1; + temp |= (crtc_state->fdi_lanes - 1) << 1; } else if (type == INTEL_OUTPUT_DP || type == INTEL_OUTPUT_EDP) { temp |= TRANS_DDI_MODE_SELECT_DP_SST; - temp |= DDI_PORT_WIDTH(crtc->config->lane_count); + temp |= DDI_PORT_WIDTH(crtc_state->lane_count); } else if (type == INTEL_OUTPUT_DP_MST) { temp |= TRANS_DDI_MODE_SELECT_DP_MST; - temp |= DDI_PORT_WIDTH(crtc->config->lane_count); + temp |= DDI_PORT_WIDTH(crtc_state->lane_count); } else { WARN(1, "Invalid encoder type %d for pipe %c\n", intel_encoder->type, pipe_name(pipe)); @@ -1478,22 +1482,23 @@ static u64 intel_ddi_get_power_domains(struct intel_encoder *encoder) return 0; } -void intel_ddi_enable_pipe_clock(struct intel_crtc *crtc) +void intel_ddi_enable_pipe_clock(const struct intel_crtc_state *crtc_state) { + struct intel_crtc *crtc = to_intel_crtc(crtc_state->base.crtc); struct drm_i915_private *dev_priv = to_i915(crtc->base.dev); struct intel_encoder *intel_encoder = intel_ddi_get_crtc_encoder(crtc); enum port port = intel_ddi_get_encoder_port(intel_encoder); - enum transcoder cpu_transcoder = crtc->config->cpu_transcoder; + enum transcoder cpu_transcoder = crtc_state->cpu_transcoder; if (cpu_transcoder != TRANSCODER_EDP) I915_WRITE(TRANS_CLK_SEL(cpu_transcoder), TRANS_CLK_SEL_PORT(port)); } -void intel_ddi_disable_pipe_clock(struct intel_crtc *intel_crtc) +void intel_ddi_disable_pipe_clock(const struct intel_crtc_state *crtc_state) { - struct drm_i915_private *dev_priv = to_i915(intel_crtc->base.dev); - enum transcoder cpu_transcoder = intel_crtc->config->cpu_transcoder; + struct drm_i915_private *dev_priv = to_i915(crtc_state->base.crtc->dev); + enum transcoder cpu_transcoder = crtc_state->cpu_transcoder; if (cpu_transcoder != TRANSCODER_EDP) I915_WRITE(TRANS_CLK_SEL(cpu_transcoder), @@ -1759,23 +1764,21 @@ static void intel_ddi_pre_enable(struct intel_encoder *intel_encoder, struct intel_crtc_state *pipe_config, struct drm_connector_state *conn_state) { - struct drm_encoder *encoder = &intel_encoder->base; - struct intel_crtc *crtc = to_intel_crtc(encoder->crtc); int type = intel_encoder->type; if (type == INTEL_OUTPUT_DP || type == INTEL_OUTPUT_EDP) { intel_ddi_pre_enable_dp(intel_encoder, - crtc->config->port_clock, - crtc->config->lane_count, - crtc->config->shared_dpll, - intel_crtc_has_type(crtc->config, + pipe_config->port_clock, + pipe_config->lane_count, + pipe_config->shared_dpll, + intel_crtc_has_type(pipe_config, INTEL_OUTPUT_DP_MST)); } if (type == INTEL_OUTPUT_HDMI) { intel_ddi_pre_enable_hdmi(intel_encoder, pipe_config->has_hdmi_sink, pipe_config, conn_state, - crtc->config->shared_dpll); + pipe_config->shared_dpll); } } @@ -1922,8 +1925,7 @@ static void bxt_ddi_pre_pll_enable(struct intel_encoder *encoder, struct intel_crtc_state *pipe_config, struct drm_connector_state *conn_state) { - struct intel_crtc *intel_crtc = to_intel_crtc(encoder->base.crtc); - uint8_t mask = intel_crtc->config->lane_lat_optim_mask; + uint8_t mask = pipe_config->lane_lat_optim_mask; bxt_ddi_phy_set_lane_optim_mask(encoder, mask); } diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c index ebaf9655cbd6..b2b8a3b9f982 100644 --- a/drivers/gpu/drm/i915/intel_display.c +++ b/drivers/gpu/drm/i915/intel_display.c @@ -5372,7 +5372,7 @@ static void haswell_crtc_enable(struct intel_crtc_state *pipe_config, dev_priv->display.fdi_link_train(intel_crtc, pipe_config); if (!transcoder_is_dsi(cpu_transcoder)) - intel_ddi_enable_pipe_clock(intel_crtc); + intel_ddi_enable_pipe_clock(pipe_config); if (INTEL_GEN(dev_priv) >= 9) skylake_pfit_enable(intel_crtc); @@ -5385,9 +5385,9 @@ static void haswell_crtc_enable(struct intel_crtc_state *pipe_config, */ intel_color_load_luts(&pipe_config->base); - intel_ddi_set_pipe_settings(intel_crtc); + intel_ddi_set_pipe_settings(pipe_config); if (!transcoder_is_dsi(cpu_transcoder)) - intel_ddi_enable_transcoder_func(intel_crtc); + intel_ddi_enable_transcoder_func(pipe_config); if (dev_priv->display.initial_watermarks != NULL) dev_priv->display.initial_watermarks(old_intel_state, pipe_config); @@ -5400,7 +5400,7 @@ static void haswell_crtc_enable(struct intel_crtc_state *pipe_config, lpt_pch_enable(pipe_config); if (intel_crtc_has_type(intel_crtc->config, INTEL_OUTPUT_DP_MST)) - intel_ddi_set_vc_payload_alloc(intel_crtc, true); + intel_ddi_set_vc_payload_alloc(pipe_config, true); assert_vblank_disabled(crtc); drm_crtc_vblank_on(crtc); @@ -5522,7 +5522,7 @@ static void haswell_crtc_disable(struct intel_crtc_state *old_crtc_state, intel_disable_pipe(intel_crtc); if (intel_crtc_has_type(intel_crtc->config, INTEL_OUTPUT_DP_MST)) - intel_ddi_set_vc_payload_alloc(intel_crtc, false); + intel_ddi_set_vc_payload_alloc(intel_crtc->config, false); if (!transcoder_is_dsi(cpu_transcoder)) intel_ddi_disable_transcoder_func(dev_priv, cpu_transcoder); @@ -5533,7 +5533,7 @@ static void haswell_crtc_disable(struct intel_crtc_state *old_crtc_state, ironlake_pfit_disable(intel_crtc, false); if (!transcoder_is_dsi(cpu_transcoder)) - intel_ddi_disable_pipe_clock(intel_crtc); + intel_ddi_disable_pipe_clock(intel_crtc->config); intel_encoders_post_disable(crtc, old_crtc_state, old_state); diff --git a/drivers/gpu/drm/i915/intel_drv.h b/drivers/gpu/drm/i915/intel_drv.h index 4cb1f57ad6ba..9434c293a581 100644 --- a/drivers/gpu/drm/i915/intel_drv.h +++ b/drivers/gpu/drm/i915/intel_drv.h @@ -1230,14 +1230,14 @@ void hsw_fdi_link_train(struct intel_crtc *crtc, void intel_ddi_init(struct drm_i915_private *dev_priv, enum port port); enum port intel_ddi_get_encoder_port(struct intel_encoder *intel_encoder); bool intel_ddi_get_hw_state(struct intel_encoder *encoder, enum pipe *pipe); -void intel_ddi_enable_transcoder_func(struct intel_crtc *crtc); +void intel_ddi_enable_transcoder_func(const struct intel_crtc_state *crtc_state); void intel_ddi_disable_transcoder_func(struct drm_i915_private *dev_priv, enum transcoder cpu_transcoder); -void intel_ddi_enable_pipe_clock(struct intel_crtc *intel_crtc); -void intel_ddi_disable_pipe_clock(struct intel_crtc *intel_crtc); +void intel_ddi_enable_pipe_clock(const struct intel_crtc_state *crtc_state); +void intel_ddi_disable_pipe_clock(const struct intel_crtc_state *crtc_state); bool intel_ddi_pll_select(struct intel_crtc *crtc, struct intel_crtc_state *crtc_state); -void intel_ddi_set_pipe_settings(struct intel_crtc *crtc); +void intel_ddi_set_pipe_settings(const struct intel_crtc_state *crtc_state); void intel_ddi_prepare_link_retrain(struct intel_dp *intel_dp); bool intel_ddi_connector_get_hw_state(struct intel_connector *intel_connector); bool intel_ddi_is_audio_enabled(struct drm_i915_private *dev_priv, @@ -1250,7 +1250,8 @@ intel_ddi_get_crtc_new_encoder(struct intel_crtc_state *crtc_state); void intel_ddi_init_dp_buf_reg(struct intel_encoder *encoder); void intel_ddi_clock_get(struct intel_encoder *encoder, struct intel_crtc_state *pipe_config); -void intel_ddi_set_vc_payload_alloc(struct intel_crtc *crtc, bool state); +void intel_ddi_set_vc_payload_alloc(const struct intel_crtc_state *crtc_state, + bool state); uint32_t ddi_signal_levels(struct intel_dp *intel_dp); u8 intel_ddi_dp_voltage_max(struct intel_encoder *encoder); -- cgit From e081c8463ac947e75f20ee11c6516f089efddaf6 Mon Sep 17 00:00:00 2001 From: Ander Conselvan de Oliveira Date: Thu, 2 Mar 2017 14:58:57 +0200 Subject: drm/i915: Remove duplicate DDI enabling logic from MST path MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The logic to enable a DDI in intel_mst_pre_enable_dp() is essentially the same as in intel_ddi_pre_enable_dp(). So reuse the latter function by calling the post_disable hook on the intel_dig_port instead of duplicating that code. v2: Don't oops because of a NULL encoder->crtc. (Ville) v3: Warn for MST + PORT_E too. (Ville) Cc: Imre Deak Cc: Ville Syrjälä Signed-off-by: Ander Conselvan de Oliveira Reviewed-by: Ville Syrjälä Link: http://patchwork.freedesktop.org/patch/msgid/20170302125857.14665-8-ander.conselvan.de.oliveira@intel.com --- drivers/gpu/drm/i915/intel_ddi.c | 2 ++ drivers/gpu/drm/i915/intel_dp_mst.c | 23 +++-------------------- 2 files changed, 5 insertions(+), 20 deletions(-) diff --git a/drivers/gpu/drm/i915/intel_ddi.c b/drivers/gpu/drm/i915/intel_ddi.c index ebccd4863d7f..04676760e6fd 100644 --- a/drivers/gpu/drm/i915/intel_ddi.c +++ b/drivers/gpu/drm/i915/intel_ddi.c @@ -1713,6 +1713,8 @@ static void intel_ddi_pre_enable_dp(struct intel_encoder *encoder, enum port port = intel_ddi_get_encoder_port(encoder); struct intel_digital_port *dig_port = enc_to_dig_port(&encoder->base); + WARN_ON(link_mst && (port == PORT_A || port == PORT_E)); + intel_dp_set_link_params(intel_dp, link_rate, lane_count, link_mst); if (encoder->type == INTEL_OUTPUT_EDP) diff --git a/drivers/gpu/drm/i915/intel_dp_mst.c b/drivers/gpu/drm/i915/intel_dp_mst.c index a8334e18c4fc..094cbdcbcd6d 100644 --- a/drivers/gpu/drm/i915/intel_dp_mst.c +++ b/drivers/gpu/drm/i915/intel_dp_mst.c @@ -159,26 +159,9 @@ static void intel_mst_pre_enable_dp(struct intel_encoder *encoder, DRM_DEBUG_KMS("%d\n", intel_dp->active_mst_links); - if (intel_dp->active_mst_links == 0) { - intel_ddi_clk_select(&intel_dig_port->base, - pipe_config->shared_dpll); - - intel_display_power_get(dev_priv, - intel_dig_port->ddi_io_power_domain); - - intel_prepare_dp_ddi_buffers(&intel_dig_port->base); - intel_dp_set_link_params(intel_dp, - pipe_config->port_clock, - pipe_config->lane_count, - true); - - intel_ddi_init_dp_buf_reg(&intel_dig_port->base); - - intel_dp_sink_dpms(intel_dp, DRM_MODE_DPMS_ON); - - intel_dp_start_link_train(intel_dp); - intel_dp_stop_link_train(intel_dp); - } + if (intel_dp->active_mst_links == 0) + intel_dig_port->base.pre_enable(&intel_dig_port->base, + pipe_config, NULL); ret = drm_dp_mst_allocate_vcpi(&intel_dp->mst_mgr, connector->port, -- cgit From 5400367a864ddafdfb9d5d04cf6f9979665ef349 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Fri, 3 Mar 2017 12:19:46 +0000 Subject: drm/i915: Ensure the engine is idle before manually changing HWS During reset_all_global_seqno() on seqno rollover, we have to update the HWS. This causes all in flight requests to be completed, so first we wait. However, we were only waiting for the requests themselves to be completed and clearing out the waiter rbtrees - what I had missed was the extra reference in execlists->port[]. Since commit fe9ae7a3bfdb ("drm/i915/execlists: Detect an out-of-order context switch") we can detect when the request is retired before the context switch interrupt is completed. The impact should be neglible outside of debugging. Testcase: igt/gem_exec_whisper Signed-off-by: Chris Wilson Cc: Mika Kuoppala Link: http://patchwork.freedesktop.org/patch/msgid/20170303121947.20482-1-chris@chris-wilson.co.uk Reviewed-by: Mika Kuoppala --- drivers/gpu/drm/i915/i915_gem_request.c | 3 +++ drivers/gpu/drm/i915/intel_engine_cs.c | 31 +++++++++++++++++++++++++++++++ drivers/gpu/drm/i915/intel_ringbuffer.h | 2 ++ 3 files changed, 36 insertions(+) diff --git a/drivers/gpu/drm/i915/i915_gem_request.c b/drivers/gpu/drm/i915/i915_gem_request.c index c2cee9674cb0..b36a7644e055 100644 --- a/drivers/gpu/drm/i915/i915_gem_request.c +++ b/drivers/gpu/drm/i915/i915_gem_request.c @@ -196,6 +196,9 @@ static int reset_all_global_seqno(struct drm_i915_private *i915, u32 seqno) for_each_engine(engine, i915, id) { struct intel_timeline *tl = &timeline->engine[id]; + if (wait_for(intel_engine_is_idle(engine), 50)) + return -EBUSY; + if (!i915_seqno_passed(seqno, tl->seqno)) { /* spin until threads are complete */ while (intel_breadcrumbs_busy(engine)) diff --git a/drivers/gpu/drm/i915/intel_engine_cs.c b/drivers/gpu/drm/i915/intel_engine_cs.c index c58339b22f6a..53d65dc0c9fb 100644 --- a/drivers/gpu/drm/i915/intel_engine_cs.c +++ b/drivers/gpu/drm/i915/intel_engine_cs.c @@ -1071,6 +1071,37 @@ int intel_ring_workarounds_emit(struct drm_i915_gem_request *req) return 0; } +/** + * intel_engine_is_idle() - Report if the engine has finished process all work + * @engine: the intel_engine_cs + * + * Return true if there are no requests pending, nothing left to be submitted + * to hardware, and that the engine is idle. + */ +bool intel_engine_is_idle(struct intel_engine_cs *engine) +{ + struct drm_i915_private *dev_priv = engine->i915; + + /* Any inflight/incomplete requests? */ + if (!i915_seqno_passed(intel_engine_get_seqno(engine), + intel_engine_last_submit(engine))) + return false; + + /* Interrupt/tasklet pending? */ + if (test_bit(ENGINE_IRQ_EXECLIST, &engine->irq_posted)) + return false; + + /* Both ports drained, no more ELSP submission? */ + if (engine->execlist_port[0].request) + return false; + + /* Ring stopped? */ + if (INTEL_GEN(dev_priv) > 2 && !(I915_READ_MODE(engine) & MODE_IDLE)) + return false; + + return true; +} + #if IS_ENABLED(CONFIG_DRM_I915_SELFTEST) #include "selftests/mock_engine.c" #endif diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.h b/drivers/gpu/drm/i915/intel_ringbuffer.h index 3dd6eee4a08b..38580765bfd6 100644 --- a/drivers/gpu/drm/i915/intel_ringbuffer.h +++ b/drivers/gpu/drm/i915/intel_ringbuffer.h @@ -664,4 +664,6 @@ static inline u32 *gen8_emit_pipe_control(u32 *batch, u32 flags, u32 offset) return batch + 6; } +bool intel_engine_is_idle(struct intel_engine_cs *engine); + #endif /* _INTEL_RINGBUFFER_H_ */ -- cgit From 0542524944c2ae3264ca8e6b5f0c7a111f09a2c2 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Fri, 3 Mar 2017 12:19:47 +0000 Subject: drm/i915: Generalise wait for execlists to be idle The code to check for execlists completion is generic, so move it to intel_engine_cs.c, where we can reuse the new intel_engine_is_idle(). Signed-off-by: Chris Wilson Link: http://patchwork.freedesktop.org/patch/msgid/20170303121947.20482-2-chris@chris-wilson.co.uk Reviewed-by: Mika Kuoppala --- drivers/gpu/drm/i915/i915_gem.c | 8 +++---- drivers/gpu/drm/i915/intel_engine_cs.c | 13 +++++++++++ drivers/gpu/drm/i915/intel_lrc.c | 28 ----------------------- drivers/gpu/drm/i915/intel_lrc.h | 1 - drivers/gpu/drm/i915/intel_ringbuffer.h | 1 + drivers/gpu/drm/i915/selftests/i915_gem_request.c | 2 +- 6 files changed, 19 insertions(+), 34 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index edc59bd45f53..f7e85fb1464f 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -2959,8 +2959,8 @@ i915_gem_idle_work_handler(struct work_struct *work) * new request is submitted. */ wait_for(READ_ONCE(dev_priv->gt.active_requests) || - intel_execlists_idle(dev_priv), 10); - + intel_engines_are_idle(dev_priv), + 10); if (READ_ONCE(dev_priv->gt.active_requests)) return; @@ -2985,7 +2985,7 @@ i915_gem_idle_work_handler(struct work_struct *work) if (dev_priv->gt.active_requests) goto out_unlock; - if (wait_for(intel_execlists_idle(dev_priv), 10)) + if (wait_for(intel_engines_are_idle(dev_priv), 10)) DRM_ERROR("Timeout waiting for engines to idle\n"); for_each_engine(engine, dev_priv, id) { @@ -4287,7 +4287,7 @@ int i915_gem_suspend(struct drm_i915_private *dev_priv) * reset the GPU back to its idle, low power state. */ WARN_ON(dev_priv->gt.awake); - WARN_ON(!intel_execlists_idle(dev_priv)); + WARN_ON(!intel_engines_are_idle(dev_priv)); /* * Neither the BIOS, ourselves or any other kernel diff --git a/drivers/gpu/drm/i915/intel_engine_cs.c b/drivers/gpu/drm/i915/intel_engine_cs.c index 53d65dc0c9fb..5fd4883db235 100644 --- a/drivers/gpu/drm/i915/intel_engine_cs.c +++ b/drivers/gpu/drm/i915/intel_engine_cs.c @@ -1102,6 +1102,19 @@ bool intel_engine_is_idle(struct intel_engine_cs *engine) return true; } +bool intel_engines_are_idle(struct drm_i915_private *dev_priv) +{ + struct intel_engine_cs *engine; + enum intel_engine_id id; + + for_each_engine(engine, dev_priv, id) { + if (!intel_engine_is_idle(engine)) + return false; + } + + return true; +} + #if IS_ENABLED(CONFIG_DRM_I915_SELFTEST) #include "selftests/mock_engine.c" #endif diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c index f9a8545474bc..3834a84fe084 100644 --- a/drivers/gpu/drm/i915/intel_lrc.c +++ b/drivers/gpu/drm/i915/intel_lrc.c @@ -498,34 +498,6 @@ static bool execlists_elsp_idle(struct intel_engine_cs *engine) return !engine->execlist_port[0].request; } -/** - * intel_execlists_idle() - Determine if all engine submission ports are idle - * @dev_priv: i915 device private - * - * Return true if there are no requests pending on any of the submission ports - * of any engines. - */ -bool intel_execlists_idle(struct drm_i915_private *dev_priv) -{ - struct intel_engine_cs *engine; - enum intel_engine_id id; - - if (!i915.enable_execlists) - return true; - - for_each_engine(engine, dev_priv, id) { - /* Interrupt/tasklet pending? */ - if (test_bit(ENGINE_IRQ_EXECLIST, &engine->irq_posted)) - return false; - - /* Both ports drained, no more ELSP submission? */ - if (!execlists_elsp_idle(engine)) - return false; - } - - return true; -} - static bool execlists_elsp_ready(const struct intel_engine_cs *engine) { const struct execlist_port *port = engine->execlist_port; diff --git a/drivers/gpu/drm/i915/intel_lrc.h b/drivers/gpu/drm/i915/intel_lrc.h index c8009c7bfbdd..5fc07761caff 100644 --- a/drivers/gpu/drm/i915/intel_lrc.h +++ b/drivers/gpu/drm/i915/intel_lrc.h @@ -88,6 +88,5 @@ uint64_t intel_lr_context_descriptor(struct i915_gem_context *ctx, int intel_sanitize_enable_execlists(struct drm_i915_private *dev_priv, int enable_execlists); void intel_execlists_enable_submission(struct drm_i915_private *dev_priv); -bool intel_execlists_idle(struct drm_i915_private *dev_priv); #endif /* _INTEL_LRC_H_ */ diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.h b/drivers/gpu/drm/i915/intel_ringbuffer.h index 38580765bfd6..55a6a3f8274c 100644 --- a/drivers/gpu/drm/i915/intel_ringbuffer.h +++ b/drivers/gpu/drm/i915/intel_ringbuffer.h @@ -665,5 +665,6 @@ static inline u32 *gen8_emit_pipe_control(u32 *batch, u32 flags, u32 offset) } bool intel_engine_is_idle(struct intel_engine_cs *engine); +bool intel_engines_are_idle(struct drm_i915_private *dev_priv); #endif /* _INTEL_RINGBUFFER_H_ */ diff --git a/drivers/gpu/drm/i915/selftests/i915_gem_request.c b/drivers/gpu/drm/i915/selftests/i915_gem_request.c index 42bdeac93324..926b24c117d6 100644 --- a/drivers/gpu/drm/i915/selftests/i915_gem_request.c +++ b/drivers/gpu/drm/i915/selftests/i915_gem_request.c @@ -303,7 +303,7 @@ static int end_live_test(struct live_test *t) { struct drm_i915_private *i915 = t->i915; - if (wait_for(intel_execlists_idle(i915), 1)) { + if (wait_for(intel_engines_are_idle(i915), 1)) { pr_err("%s(%s): GPU not idle\n", t->func, t->name); return -EIO; } -- cgit From 3e490042a80f82df80141fce1dbef1baa7850160 Mon Sep 17 00:00:00 2001 From: Mika Kuoppala Date: Tue, 28 Feb 2017 17:28:07 +0200 Subject: drm/i915/gtt: Make I915_PDPES_PER_PDP inline function The macro takes a vm pointer at some sites, and dev_priv on others We were saved as the internal macro never deferences the pointer given. As the number of pdpes depend on vm configuration, make it as a inline function that accepts vm pointer. Cc: Chris Wilson Signed-off-by: Mika Kuoppala Reviewed-by: Chris Wilson Link: http://patchwork.freedesktop.org/patch/msgid/1488295691-9404-1-git-send-email-mika.kuoppala@intel.com --- drivers/gpu/drm/i915/i915_gem_gtt.c | 12 +++++++----- drivers/gpu/drm/i915/i915_gem_gtt.h | 26 ++++++++++++++++---------- 2 files changed, 23 insertions(+), 15 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c b/drivers/gpu/drm/i915/i915_gem_gtt.c index e0c9542a90c1..5299600b4733 100644 --- a/drivers/gpu/drm/i915/i915_gem_gtt.c +++ b/drivers/gpu/drm/i915/i915_gem_gtt.c @@ -528,7 +528,7 @@ static void gen8_initialize_pd(struct i915_address_space *vm, static int __pdp_init(struct i915_address_space *vm, struct i915_page_directory_pointer *pdp) { - const unsigned int pdpes = I915_PDPES_PER_PDP(vm->i915); + const unsigned int pdpes = i915_pdpes_per_pdp(vm); unsigned int i; pdp->page_directory = kmalloc_array(pdpes, sizeof(*pdp->page_directory), @@ -852,7 +852,7 @@ gen8_ppgtt_insert_pte_entries(struct i915_hw_ppgtt *ppgtt, gen8_pte_t *vaddr; bool ret; - GEM_BUG_ON(idx->pdpe >= I915_PDPES_PER_PDP(vm)); + GEM_BUG_ON(idx->pdpe >= i915_pdpes_per_pdp(&ppgtt->base)); pd = pdp->page_directory[idx->pdpe]; vaddr = kmap_atomic_px(pd->page_table[idx->pde]); do { @@ -883,7 +883,7 @@ gen8_ppgtt_insert_pte_entries(struct i915_hw_ppgtt *ppgtt, break; } - GEM_BUG_ON(idx->pdpe >= I915_PDPES_PER_PDP(vm)); + GEM_BUG_ON(idx->pdpe >= i915_pdpes_per_pdp(&ppgtt->base)); pd = pdp->page_directory[idx->pdpe]; } @@ -1036,9 +1036,10 @@ static void gen8_free_scratch(struct i915_address_space *vm) static void gen8_ppgtt_cleanup_3lvl(struct i915_address_space *vm, struct i915_page_directory_pointer *pdp) { + const unsigned int pdpes = i915_pdpes_per_pdp(vm); int i; - for (i = 0; i < I915_PDPES_PER_PDP(vm->i915); i++) { + for (i = 0; i < pdpes; i++) { if (pdp->page_directory[i] == vm->scratch_pd) continue; @@ -1127,7 +1128,7 @@ static int gen8_ppgtt_alloc_pdp(struct i915_address_space *vm, gen8_initialize_pd(vm, pd); gen8_ppgtt_set_pdpe(vm, pdp, pd, pdpe); pdp->used_pdpes++; - GEM_BUG_ON(pdp->used_pdpes > I915_PDPES_PER_PDP(vm)); + GEM_BUG_ON(pdp->used_pdpes > i915_pdpes_per_pdp(vm)); mark_tlbs_dirty(i915_vm_to_ppgtt(vm)); } @@ -1201,6 +1202,7 @@ static void gen8_dump_pdp(struct i915_hw_ppgtt *ppgtt, gen8_pte_t scratch_pte, struct seq_file *m) { + struct i915_address_space *vm = &ppgtt->base; struct i915_page_directory *pd; u32 pdpe; diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.h b/drivers/gpu/drm/i915/i915_gem_gtt.h index f7d4e194a227..562c6329aff6 100644 --- a/drivers/gpu/drm/i915/i915_gem_gtt.h +++ b/drivers/gpu/drm/i915/i915_gem_gtt.h @@ -125,9 +125,6 @@ typedef u64 gen8_ppgtt_pml4e_t; #define GEN8_LEGACY_PDPES 4 #define GEN8_PTES I915_PTES(sizeof(gen8_pte_t)) -#define I915_PDPES_PER_PDP(dev_priv) (USES_FULL_48BIT_PPGTT(dev_priv) ?\ - GEN8_PML4ES_PER_PML4 : GEN8_LEGACY_PDPES) - #define PPAT_UNCACHED_INDEX (_PAGE_PWT | _PAGE_PCD) #define PPAT_CACHED_PDE_INDEX 0 /* WB LLC */ #define PPAT_CACHED_INDEX _PAGE_PAT /* WB LLCeLLC */ @@ -332,6 +329,12 @@ struct i915_address_space { #define i915_is_ggtt(V) (!(V)->file) +static inline bool +i915_vm_is_48bit(const struct i915_address_space *vm) +{ + return (vm->total - 1) >> 32; +} + /* The Graphics Translation Table is the way in which GEN hardware translates a * Graphics Virtual Address into a Physical Address. In addition to the normal * collateral associated with any va->pa translations GEN hardware also has a @@ -457,6 +460,15 @@ static inline u32 gen6_pde_index(u32 addr) return i915_pde_index(addr, GEN6_PDE_SHIFT); } +static inline unsigned int +i915_pdpes_per_pdp(const struct i915_address_space *vm) +{ + if (i915_vm_is_48bit(vm)) + return GEN8_PML4ES_PER_PML4; + + return GEN8_LEGACY_PDPES; +} + /* Equivalent to the gen6 version, For each pde iterates over every pde * between from start until start + length. On gen8+ it simply iterates * over every page directory entry in a page directory. @@ -471,7 +483,7 @@ static inline u32 gen6_pde_index(u32 addr) #define gen8_for_each_pdpe(pd, pdp, start, length, iter) \ for (iter = gen8_pdpe_index(start); \ - length > 0 && iter < I915_PDPES_PER_PDP(dev) && \ + length > 0 && iter < i915_pdpes_per_pdp(vm) && \ (pd = (pdp)->page_directory[iter], true); \ ({ u64 temp = ALIGN(start+1, 1 << GEN8_PDPE_SHIFT); \ temp = min(temp - start, length); \ @@ -523,12 +535,6 @@ i915_vm_to_ggtt(struct i915_address_space *vm) return container_of(vm, struct i915_ggtt, base); } -static inline bool -i915_vm_is_48bit(const struct i915_address_space *vm) -{ - return (vm->total - 1) >> 32; -} - int i915_gem_init_aliasing_ppgtt(struct drm_i915_private *i915); void i915_gem_fini_aliasing_ppgtt(struct drm_i915_private *i915); -- cgit From 54af56dbf883f65396917a88e4ce8c37f9d52009 Mon Sep 17 00:00:00 2001 From: Mika Kuoppala Date: Tue, 28 Feb 2017 17:28:08 +0200 Subject: drm/i915: Don't mark pdps clear if pdps are not submitted Don't mark pdps clear if never do the necessary actions with the hardware to make them clear. v2: totally get rid of confusing ppgtt bool (Chris) Signed-off-by: Mika Kuoppala Reviewed-by: Chris Wilson Link: http://patchwork.freedesktop.org/patch/msgid/1488295691-9404-2-git-send-email-mika.kuoppala@intel.com --- drivers/gpu/drm/i915/intel_lrc.c | 21 ++++++++++----------- 1 file changed, 10 insertions(+), 11 deletions(-) diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c index 3834a84fe084..c8943c42f9e2 100644 --- a/drivers/gpu/drm/i915/intel_lrc.c +++ b/drivers/gpu/drm/i915/intel_lrc.c @@ -1291,9 +1291,8 @@ static int intel_logical_ring_emit_pdps(struct drm_i915_gem_request *req) static int gen8_emit_bb_start(struct drm_i915_gem_request *req, u64 offset, u32 len, - unsigned int dispatch_flags) + const unsigned int flags) { - bool ppgtt = !(dispatch_flags & I915_DISPATCH_SECURE); u32 *cs; int ret; @@ -1304,13 +1303,12 @@ static int gen8_emit_bb_start(struct drm_i915_gem_request *req, * not idle). PML4 is allocated during ppgtt init so this is * not needed in 48-bit.*/ if (req->ctx->ppgtt && - (intel_engine_flag(req->engine) & req->ctx->ppgtt->pd_dirty_rings)) { - if (!i915_vm_is_48bit(&req->ctx->ppgtt->base) && - !intel_vgpu_active(req->i915)) { - ret = intel_logical_ring_emit_pdps(req); - if (ret) - return ret; - } + (intel_engine_flag(req->engine) & req->ctx->ppgtt->pd_dirty_rings) && + !i915_vm_is_48bit(&req->ctx->ppgtt->base) && + !intel_vgpu_active(req->i915)) { + ret = intel_logical_ring_emit_pdps(req); + if (ret) + return ret; req->ctx->ppgtt->pd_dirty_rings &= ~intel_engine_flag(req->engine); } @@ -1320,8 +1318,9 @@ static int gen8_emit_bb_start(struct drm_i915_gem_request *req, return PTR_ERR(cs); /* FIXME(BDW): Address space and security selectors. */ - *cs++ = MI_BATCH_BUFFER_START_GEN8 | (ppgtt << 8) | (dispatch_flags & - I915_DISPATCH_RS ? MI_BATCH_RESOURCE_STREAMER : 0); + *cs++ = MI_BATCH_BUFFER_START_GEN8 | + (flags & I915_DISPATCH_SECURE ? 0 : BIT(8)) | + (flags & I915_DISPATCH_RS ? MI_BATCH_RESOURCE_STREAMER : 0); *cs++ = lower_32_bits(offset); *cs++ = upper_32_bits(offset); *cs++ = MI_NOOP; -- cgit From 1e6437b0e064746a4a2b6b3578c639797cafe4b1 Mon Sep 17 00:00:00 2001 From: Mika Kuoppala Date: Tue, 28 Feb 2017 17:28:09 +0200 Subject: drm/i915/gtt: Prefer i915_vm_is_48bit() over macro If we setup the vm size early, we can use the newly introduced i915_vm_is_48bit() in majority of callsites wanting to know the vm size. As we operate either with 3lvl or 4lvl page table structure, wrap the vm size query inside a function which tells us if 4lvl setup is needed for particular vm, as the following code uses the function names where level is noted. v2: use_4lvl (Chris) Signed-off-by: Mika Kuoppala Reviewed-by: Chris Wilson Link: http://patchwork.freedesktop.org/patch/msgid/1488295691-9404-3-git-send-email-mika.kuoppala@intel.com --- drivers/gpu/drm/i915/i915_gem_gtt.c | 63 ++++++++++++++++++++++--------------- 1 file changed, 38 insertions(+), 25 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c b/drivers/gpu/drm/i915/i915_gem_gtt.c index 5299600b4733..9399906e5528 100644 --- a/drivers/gpu/drm/i915/i915_gem_gtt.c +++ b/drivers/gpu/drm/i915/i915_gem_gtt.c @@ -548,13 +548,18 @@ static void __pdp_fini(struct i915_page_directory_pointer *pdp) pdp->page_directory = NULL; } +static inline bool use_4lvl(const struct i915_address_space *vm) +{ + return i915_vm_is_48bit(vm); +} + static struct i915_page_directory_pointer * alloc_pdp(struct i915_address_space *vm) { struct i915_page_directory_pointer *pdp; int ret = -ENOMEM; - WARN_ON(!USES_FULL_48BIT_PPGTT(vm->i915)); + WARN_ON(!use_4lvl(vm)); pdp = kzalloc(sizeof(*pdp), GFP_KERNEL); if (!pdp) @@ -582,10 +587,12 @@ static void free_pdp(struct i915_address_space *vm, struct i915_page_directory_pointer *pdp) { __pdp_fini(pdp); - if (USES_FULL_48BIT_PPGTT(vm->i915)) { - cleanup_px(vm, pdp); - kfree(pdp); - } + + if (!use_4lvl(vm)) + return; + + cleanup_px(vm, pdp); + kfree(pdp); } static void gen8_initialize_pdp(struct i915_address_space *vm, @@ -739,7 +746,7 @@ static void gen8_ppgtt_set_pdpe(struct i915_address_space *vm, gen8_ppgtt_pdpe_t *vaddr; pdp->page_directory[pdpe] = pd; - if (!USES_FULL_48BIT_PPGTT(vm->i915)) + if (!use_4lvl(vm)) return; vaddr = kmap_atomic_px(pdp); @@ -804,7 +811,7 @@ static void gen8_ppgtt_clear_4lvl(struct i915_address_space *vm, struct i915_page_directory_pointer *pdp; unsigned int pml4e; - GEM_BUG_ON(!USES_FULL_48BIT_PPGTT(vm->i915)); + GEM_BUG_ON(!use_4lvl(vm)); gen8_for_each_pml4e(pdp, pml4, start, length, pml4e) { GEM_BUG_ON(pdp == vm->scratch_pdp); @@ -968,7 +975,7 @@ static int gen8_init_scratch(struct i915_address_space *vm) goto free_pt; } - if (USES_FULL_48BIT_PPGTT(dev)) { + if (use_4lvl(vm)) { vm->scratch_pdp = alloc_pdp(vm); if (IS_ERR(vm->scratch_pdp)) { ret = PTR_ERR(vm->scratch_pdp); @@ -978,7 +985,7 @@ static int gen8_init_scratch(struct i915_address_space *vm) gen8_initialize_pt(vm, vm->scratch_pt); gen8_initialize_pd(vm, vm->scratch_pd); - if (USES_FULL_48BIT_PPGTT(dev_priv)) + if (use_4lvl(vm)) gen8_initialize_pdp(vm, vm->scratch_pdp); return 0; @@ -995,12 +1002,13 @@ free_scratch_page: static int gen8_ppgtt_notify_vgt(struct i915_hw_ppgtt *ppgtt, bool create) { + struct i915_address_space *vm = &ppgtt->base; + struct drm_i915_private *dev_priv = vm->i915; enum vgt_g2v_type msg; - struct drm_i915_private *dev_priv = ppgtt->base.i915; int i; - if (USES_FULL_48BIT_PPGTT(dev_priv)) { - u64 daddr = px_dma(&ppgtt->pml4); + if (use_4lvl(vm)) { + const u64 daddr = px_dma(&ppgtt->pml4); I915_WRITE(vgtif_reg(pdp[0].lo), lower_32_bits(daddr)); I915_WRITE(vgtif_reg(pdp[0].hi), upper_32_bits(daddr)); @@ -1009,7 +1017,7 @@ static int gen8_ppgtt_notify_vgt(struct i915_hw_ppgtt *ppgtt, bool create) VGT_G2V_PPGTT_L4_PAGE_TABLE_DESTROY); } else { for (i = 0; i < GEN8_LEGACY_PDPES; i++) { - u64 daddr = i915_page_dir_dma_addr(ppgtt, i); + const u64 daddr = i915_page_dir_dma_addr(ppgtt, i); I915_WRITE(vgtif_reg(pdp[i].lo), lower_32_bits(daddr)); I915_WRITE(vgtif_reg(pdp[i].hi), upper_32_bits(daddr)); @@ -1026,7 +1034,7 @@ static int gen8_ppgtt_notify_vgt(struct i915_hw_ppgtt *ppgtt, bool create) static void gen8_free_scratch(struct i915_address_space *vm) { - if (USES_FULL_48BIT_PPGTT(vm->i915)) + if (use_4lvl(vm)) free_pdp(vm, vm->scratch_pdp); free_pd(vm, vm->scratch_pd); free_pt(vm, vm->scratch_pt); @@ -1072,10 +1080,10 @@ static void gen8_ppgtt_cleanup(struct i915_address_space *vm) if (intel_vgpu_active(dev_priv)) gen8_ppgtt_notify_vgt(ppgtt, false); - if (!USES_FULL_48BIT_PPGTT(vm->i915)) - gen8_ppgtt_cleanup_3lvl(&ppgtt->base, &ppgtt->pdp); - else + if (use_4lvl(vm)) gen8_ppgtt_cleanup_4lvl(ppgtt); + else + gen8_ppgtt_cleanup_3lvl(&ppgtt->base, &ppgtt->pdp); gen8_free_scratch(vm); } @@ -1258,9 +1266,7 @@ static void gen8_dump_ppgtt(struct i915_hw_ppgtt *ppgtt, struct seq_file *m) gen8_pte_encode(vm->scratch_page.daddr, I915_CACHE_LLC); u64 start = 0, length = ppgtt->base.total; - if (!USES_FULL_48BIT_PPGTT(vm->i915)) { - gen8_dump_pdp(ppgtt, &ppgtt->pdp, start, length, scratch_pte, m); - } else { + if (use_4lvl(vm)) { u64 pml4e; struct i915_pml4 *pml4 = &ppgtt->pml4; struct i915_page_directory_pointer *pdp; @@ -1272,6 +1278,8 @@ static void gen8_dump_ppgtt(struct i915_hw_ppgtt *ppgtt, struct seq_file *m) seq_printf(m, " PML4E #%llu\n", pml4e); gen8_dump_pdp(ppgtt, pdp, start, length, scratch_pte, m); } + } else { + gen8_dump_pdp(ppgtt, &ppgtt->pdp, start, length, scratch_pte, m); } } @@ -1316,12 +1324,19 @@ unwind: */ static int gen8_ppgtt_init(struct i915_hw_ppgtt *ppgtt) { - struct drm_i915_private *dev_priv = ppgtt->base.i915; + struct i915_address_space *vm = &ppgtt->base; + struct drm_i915_private *dev_priv = vm->i915; int ret; + ppgtt->base.total = USES_FULL_48BIT_PPGTT(dev_priv) ? + 1ULL << 48 : + 1ULL << 32; + ret = gen8_init_scratch(&ppgtt->base); - if (ret) + if (ret) { + ppgtt->base.total = 0; return ret; + } ppgtt->base.cleanup = gen8_ppgtt_cleanup; ppgtt->base.unbind_vma = ppgtt_unbind_vma; @@ -1334,14 +1349,13 @@ static int gen8_ppgtt_init(struct i915_hw_ppgtt *ppgtt) if (IS_CHERRYVIEW(dev_priv) || IS_BROXTON(dev_priv)) ppgtt->base.pt_kmap_wc = true; - if (USES_FULL_48BIT_PPGTT(dev_priv)) { + if (use_4lvl(vm)) { ret = setup_px(&ppgtt->base, &ppgtt->pml4); if (ret) goto free_scratch; gen8_initialize_pml4(&ppgtt->base, &ppgtt->pml4); - ppgtt->base.total = 1ULL << 48; ppgtt->switch_mm = gen8_48b_mm_switch; ppgtt->base.allocate_va_range = gen8_ppgtt_alloc_4lvl; @@ -1352,7 +1366,6 @@ static int gen8_ppgtt_init(struct i915_hw_ppgtt *ppgtt) if (ret) goto free_scratch; - ppgtt->base.total = 1ULL << 32; ppgtt->switch_mm = gen8_legacy_mm_switch; if (intel_vgpu_active(dev_priv)) { -- cgit From e71677698b27f3b460d44f67389b43c4353522dd Mon Sep 17 00:00:00 2001 From: Mika Kuoppala Date: Tue, 28 Feb 2017 17:28:10 +0200 Subject: drm/i915: Avoid using word legacy with ppgtt The term legacy is subjective. Use 3lvl and 4lvl where appropriate. Signed-off-by: Mika Kuoppala Reviewed-by: Chris Wilson Link: http://patchwork.freedesktop.org/patch/msgid/1488295691-9404-4-git-send-email-mika.kuoppala@intel.com --- drivers/gpu/drm/i915/i915_gem_gtt.c | 18 ++++++++---------- drivers/gpu/drm/i915/i915_gem_gtt.h | 21 +++++++++++---------- drivers/gpu/drm/i915/intel_lrc.c | 4 ++-- 3 files changed, 21 insertions(+), 22 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c b/drivers/gpu/drm/i915/i915_gem_gtt.c index 9399906e5528..9c9a03ee44b9 100644 --- a/drivers/gpu/drm/i915/i915_gem_gtt.c +++ b/drivers/gpu/drm/i915/i915_gem_gtt.c @@ -641,12 +641,12 @@ static int gen8_write_pdp(struct drm_i915_gem_request *req, return 0; } -static int gen8_legacy_mm_switch(struct i915_hw_ppgtt *ppgtt, - struct drm_i915_gem_request *req) +static int gen8_mm_switch_3lvl(struct i915_hw_ppgtt *ppgtt, + struct drm_i915_gem_request *req) { int i, ret; - for (i = GEN8_LEGACY_PDPES - 1; i >= 0; i--) { + for (i = GEN8_3LVL_PDPES - 1; i >= 0; i--) { const dma_addr_t pd_daddr = i915_page_dir_dma_addr(ppgtt, i); ret = gen8_write_pdp(req, i, pd_daddr); @@ -657,8 +657,8 @@ static int gen8_legacy_mm_switch(struct i915_hw_ppgtt *ppgtt, return 0; } -static int gen8_48b_mm_switch(struct i915_hw_ppgtt *ppgtt, - struct drm_i915_gem_request *req) +static int gen8_mm_switch_4lvl(struct i915_hw_ppgtt *ppgtt, + struct drm_i915_gem_request *req) { return gen8_write_pdp(req, 0, px_dma(&ppgtt->pml4)); } @@ -1016,7 +1016,7 @@ static int gen8_ppgtt_notify_vgt(struct i915_hw_ppgtt *ppgtt, bool create) msg = (create ? VGT_G2V_PPGTT_L4_PAGE_TABLE_CREATE : VGT_G2V_PPGTT_L4_PAGE_TABLE_DESTROY); } else { - for (i = 0; i < GEN8_LEGACY_PDPES; i++) { + for (i = 0; i < GEN8_3LVL_PDPES; i++) { const u64 daddr = i915_page_dir_dma_addr(ppgtt, i); I915_WRITE(vgtif_reg(pdp[i].lo), lower_32_bits(daddr)); @@ -1356,8 +1356,7 @@ static int gen8_ppgtt_init(struct i915_hw_ppgtt *ppgtt) gen8_initialize_pml4(&ppgtt->base, &ppgtt->pml4); - ppgtt->switch_mm = gen8_48b_mm_switch; - + ppgtt->switch_mm = gen8_mm_switch_4lvl; ppgtt->base.allocate_va_range = gen8_ppgtt_alloc_4lvl; ppgtt->base.insert_entries = gen8_ppgtt_insert_4lvl; ppgtt->base.clear_range = gen8_ppgtt_clear_4lvl; @@ -1366,8 +1365,6 @@ static int gen8_ppgtt_init(struct i915_hw_ppgtt *ppgtt) if (ret) goto free_scratch; - ppgtt->switch_mm = gen8_legacy_mm_switch; - if (intel_vgpu_active(dev_priv)) { ret = gen8_preallocate_top_level_pdp(ppgtt); if (ret) { @@ -1376,6 +1373,7 @@ static int gen8_ppgtt_init(struct i915_hw_ppgtt *ppgtt) } } + ppgtt->switch_mm = gen8_mm_switch_3lvl; ppgtt->base.allocate_va_range = gen8_ppgtt_alloc_3lvl; ppgtt->base.insert_entries = gen8_ppgtt_insert_3lvl; ppgtt->base.clear_range = gen8_ppgtt_clear_3lvl; diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.h b/drivers/gpu/drm/i915/i915_gem_gtt.h index 562c6329aff6..fb15684c1d83 100644 --- a/drivers/gpu/drm/i915/i915_gem_gtt.h +++ b/drivers/gpu/drm/i915/i915_gem_gtt.h @@ -101,13 +101,20 @@ typedef u64 gen8_ppgtt_pml4e_t; #define HSW_GTT_ADDR_ENCODE(addr) ((addr) | (((addr) >> 28) & 0x7f0)) #define HSW_PTE_ADDR_ENCODE(addr) HSW_GTT_ADDR_ENCODE(addr) -/* GEN8 legacy style address is defined as a 3 level page table: +/* GEN8 32b style address is defined as a 3 level page table: * 31:30 | 29:21 | 20:12 | 11:0 * PDPE | PDE | PTE | offset * The difference as compared to normal x86 3 level page table is the PDPEs are * programmed via register. - * - * GEN8 48b legacy style address is defined as a 4 level page table: + */ +#define GEN8_3LVL_PDPES 4 +#define GEN8_PDE_SHIFT 21 +#define GEN8_PDE_MASK 0x1ff +#define GEN8_PTE_SHIFT 12 +#define GEN8_PTE_MASK 0x1ff +#define GEN8_PTES I915_PTES(sizeof(gen8_pte_t)) + +/* GEN8 48b style address is defined as a 4 level page table: * 47:39 | 38:30 | 29:21 | 20:12 | 11:0 * PML4E | PDPE | PDE | PTE | offset */ @@ -118,12 +125,6 @@ typedef u64 gen8_ppgtt_pml4e_t; /* NB: GEN8_PDPE_MASK is untrue for 32b platforms, but it has no impact on 32b page * tables */ #define GEN8_PDPE_MASK 0x1ff -#define GEN8_PDE_SHIFT 21 -#define GEN8_PDE_MASK 0x1ff -#define GEN8_PTE_SHIFT 12 -#define GEN8_PTE_MASK 0x1ff -#define GEN8_LEGACY_PDPES 4 -#define GEN8_PTES I915_PTES(sizeof(gen8_pte_t)) #define PPAT_UNCACHED_INDEX (_PAGE_PWT | _PAGE_PCD) #define PPAT_CACHED_PDE_INDEX 0 /* WB LLC */ @@ -466,7 +467,7 @@ i915_pdpes_per_pdp(const struct i915_address_space *vm) if (i915_vm_is_48bit(vm)) return GEN8_PML4ES_PER_PML4; - return GEN8_LEGACY_PDPES; + return GEN8_3LVL_PDPES; } /* Equivalent to the gen6 version, For each pde iterates over every pde diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c index c8943c42f9e2..89f38e7def9f 100644 --- a/drivers/gpu/drm/i915/intel_lrc.c +++ b/drivers/gpu/drm/i915/intel_lrc.c @@ -1265,7 +1265,7 @@ static int intel_logical_ring_emit_pdps(struct drm_i915_gem_request *req) { struct i915_hw_ppgtt *ppgtt = req->ctx->ppgtt; struct intel_engine_cs *engine = req->engine; - const int num_lri_cmds = GEN8_LEGACY_PDPES * 2; + const int num_lri_cmds = GEN8_3LVL_PDPES * 2; u32 *cs; int i; @@ -1274,7 +1274,7 @@ static int intel_logical_ring_emit_pdps(struct drm_i915_gem_request *req) return PTR_ERR(cs); *cs++ = MI_LOAD_REGISTER_IMM(num_lri_cmds); - for (i = GEN8_LEGACY_PDPES - 1; i >= 0; i--) { + for (i = GEN8_3LVL_PDPES - 1; i >= 0; i--) { const dma_addr_t pd_daddr = i915_page_dir_dma_addr(ppgtt, i); *cs++ = i915_mmio_reg_offset(GEN8_RING_PDP_UDW(engine, i)); -- cgit From 054b9acde6f9b7bd5e1e9739777d1cddda048305 Mon Sep 17 00:00:00 2001 From: Mika Kuoppala Date: Tue, 28 Feb 2017 17:28:11 +0200 Subject: drm/i915/gtt: Setup vm callbacks late If we manage to tangle errorpaths and get call to callbacks, it is better to defensively keep them as null until object init is finished so that we get clean null deref on callsite, instead of more cryptic wreckage with partly initialized vm objects. Signed-off-by: Mika Kuoppala Reviewed-by: Chris Wilson Link: http://patchwork.freedesktop.org/patch/msgid/1488295691-9404-5-git-send-email-mika.kuoppala@intel.com --- drivers/gpu/drm/i915/i915_gem_gtt.c | 23 ++++++++++++----------- 1 file changed, 12 insertions(+), 11 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c b/drivers/gpu/drm/i915/i915_gem_gtt.c index 9c9a03ee44b9..cee9c4fec52a 100644 --- a/drivers/gpu/drm/i915/i915_gem_gtt.c +++ b/drivers/gpu/drm/i915/i915_gem_gtt.c @@ -1338,11 +1338,6 @@ static int gen8_ppgtt_init(struct i915_hw_ppgtt *ppgtt) return ret; } - ppgtt->base.cleanup = gen8_ppgtt_cleanup; - ppgtt->base.unbind_vma = ppgtt_unbind_vma; - ppgtt->base.bind_vma = ppgtt_bind_vma; - ppgtt->debug_dump = gen8_dump_ppgtt; - /* There are only few exceptions for gen >=6. chv and bxt. * And we are not sure about the latter so play safe for now. */ @@ -1382,6 +1377,11 @@ static int gen8_ppgtt_init(struct i915_hw_ppgtt *ppgtt) if (intel_vgpu_active(dev_priv)) gen8_ppgtt_notify_vgt(ppgtt, true); + ppgtt->base.cleanup = gen8_ppgtt_cleanup; + ppgtt->base.unbind_vma = ppgtt_unbind_vma; + ppgtt->base.bind_vma = ppgtt_bind_vma; + ppgtt->debug_dump = gen8_dump_ppgtt; + return 0; free_scratch: @@ -1808,13 +1808,7 @@ static int gen6_ppgtt_init(struct i915_hw_ppgtt *ppgtt) if (ret) return ret; - ppgtt->base.clear_range = gen6_ppgtt_clear_range; - ppgtt->base.insert_entries = gen6_ppgtt_insert_entries; - ppgtt->base.unbind_vma = ppgtt_unbind_vma; - ppgtt->base.bind_vma = ppgtt_bind_vma; - ppgtt->base.cleanup = gen6_ppgtt_cleanup; ppgtt->base.total = I915_PDES * GEN6_PTES * PAGE_SIZE; - ppgtt->debug_dump = gen6_dump_ppgtt; gen6_scratch_va_range(ppgtt, 0, ppgtt->base.total); gen6_write_page_range(ppgtt, 0, ppgtt->base.total); @@ -1825,6 +1819,13 @@ static int gen6_ppgtt_init(struct i915_hw_ppgtt *ppgtt) return ret; } + ppgtt->base.clear_range = gen6_ppgtt_clear_range; + ppgtt->base.insert_entries = gen6_ppgtt_insert_entries; + ppgtt->base.unbind_vma = ppgtt_unbind_vma; + ppgtt->base.bind_vma = ppgtt_bind_vma; + ppgtt->base.cleanup = gen6_ppgtt_cleanup; + ppgtt->debug_dump = gen6_dump_ppgtt; + DRM_DEBUG_DRIVER("Allocated pde space (%lldM) at GTT entry: %llx\n", ppgtt->node.size >> 20, ppgtt->node.start / PAGE_SIZE); -- cgit From e9728bd888e1fa2d5c1f77de8817a96eeba31747 Mon Sep 17 00:00:00 2001 From: Ville Syrjälä Date: Thu, 2 Mar 2017 19:14:51 +0200 Subject: drm/i915: Track visible planes in a bitmask MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit In a lot of place we wish to know which planes on the crtc are actually visible, or how many of them there are. Let's start tracking that in a bitmask in the crtc state. We already track enabled planes (ie. ones with an fb and crtc specified by the user) but that's not quite the same thing as enabled planes may still end up being invisible due to clipping and whatnot. Signed-off-by: Ville Syrjälä Reviewed-by: Maarten Lankhorst Link: http://patchwork.freedesktop.org/patch/msgid/20170302171508.1666-2-ville.syrjala@linux.intel.com --- drivers/gpu/drm/i915/intel_atomic_plane.c | 6 +++ drivers/gpu/drm/i915/intel_display.c | 74 +++++++++++++++++++++---------- drivers/gpu/drm/i915/intel_drv.h | 3 ++ 3 files changed, 60 insertions(+), 23 deletions(-) diff --git a/drivers/gpu/drm/i915/intel_atomic_plane.c b/drivers/gpu/drm/i915/intel_atomic_plane.c index 41fd94e62d3c..1eaf840cf9ff 100644 --- a/drivers/gpu/drm/i915/intel_atomic_plane.c +++ b/drivers/gpu/drm/i915/intel_atomic_plane.c @@ -189,6 +189,12 @@ int intel_plane_atomic_check_with_state(struct intel_crtc_state *crtc_state, if (ret) return ret; + /* FIXME pre-g4x don't work like this */ + if (intel_state->base.visible) + crtc_state->active_planes |= BIT(intel_plane->id); + else + crtc_state->active_planes &= ~BIT(intel_plane->id); + return intel_plane_atomic_calc_changes(&crtc_state->base, state); } diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c index b2b8a3b9f982..c4301e0df022 100644 --- a/drivers/gpu/drm/i915/intel_display.c +++ b/drivers/gpu/drm/i915/intel_display.c @@ -2670,6 +2670,29 @@ update_state_fb(struct drm_plane *plane) drm_framebuffer_reference(plane->state->fb); } +static void +intel_set_plane_visible(struct intel_crtc_state *crtc_state, + struct intel_plane_state *plane_state, + bool visible) +{ + struct intel_plane *plane = to_intel_plane(plane_state->base.plane); + + plane_state->base.visible = visible; + + /* FIXME pre-g4x don't work like this */ + if (visible) { + crtc_state->base.plane_mask |= BIT(drm_plane_index(&plane->base)); + crtc_state->active_planes |= BIT(plane->id); + } else { + crtc_state->base.plane_mask &= ~BIT(drm_plane_index(&plane->base)); + crtc_state->active_planes &= ~BIT(plane->id); + } + + DRM_DEBUG_KMS("%s active planes 0x%x\n", + crtc_state->base.crtc->name, + crtc_state->active_planes); +} + static void intel_find_initial_plane_obj(struct intel_crtc *intel_crtc, struct intel_initial_plane_config *plane_config) @@ -2727,8 +2750,9 @@ intel_find_initial_plane_obj(struct intel_crtc *intel_crtc, * simplest solution is to just disable the primary plane now and * pretend the BIOS never had it enabled. */ - plane_state->visible = false; - crtc_state->plane_mask &= ~(1 << drm_plane_index(primary)); + intel_set_plane_visible(to_intel_crtc_state(crtc_state), + to_intel_plane_state(plane_state), + false); intel_pre_disable_primary_noatomic(&intel_crtc->base); intel_plane->disable_plane(primary, &intel_crtc->base); @@ -2768,7 +2792,11 @@ valid_fb: drm_framebuffer_reference(fb); primary->fb = primary->state->fb = fb; primary->crtc = primary->state->crtc = &intel_crtc->base; - intel_crtc->base.state->plane_mask |= (1 << drm_plane_index(primary)); + + intel_set_plane_visible(to_intel_crtc_state(crtc_state), + to_intel_plane_state(plane_state), + true); + atomic_or(to_intel_plane(primary)->frontbuffer_bit, &obj->frontbuffer_bits); } @@ -10756,11 +10784,11 @@ int intel_plane_atomic_calc_changes(struct drm_crtc_state *crtc_state, struct intel_crtc_state *pipe_config = to_intel_crtc_state(crtc_state); struct drm_crtc *crtc = crtc_state->crtc; struct intel_crtc *intel_crtc = to_intel_crtc(crtc); - struct drm_plane *plane = plane_state->plane; + struct intel_plane *plane = to_intel_plane(plane_state->plane); struct drm_device *dev = crtc->dev; struct drm_i915_private *dev_priv = to_i915(dev); struct intel_plane_state *old_plane_state = - to_intel_plane_state(plane->state); + to_intel_plane_state(plane->base.state); bool mode_changed = needs_modeset(crtc_state); bool was_crtc_enabled = crtc->state->active; bool is_crtc_enabled = crtc_state->active; @@ -10768,7 +10796,7 @@ int intel_plane_atomic_calc_changes(struct drm_crtc_state *crtc_state, struct drm_framebuffer *fb = plane_state->fb; int ret; - if (INTEL_GEN(dev_priv) >= 9 && plane->type != DRM_PLANE_TYPE_CURSOR) { + if (INTEL_GEN(dev_priv) >= 9 && plane->id != PLANE_CURSOR) { ret = skl_update_scaler_plane( to_intel_crtc_state(crtc_state), to_intel_plane_state(plane_state)); @@ -10792,8 +10820,10 @@ int intel_plane_atomic_calc_changes(struct drm_crtc_state *crtc_state, * per-plane wm computation to the .check_plane() hook, and * only combine the results from all planes in the current place? */ - if (!is_crtc_enabled) + if (!is_crtc_enabled) { plane_state->visible = visible = false; + to_intel_crtc_state(crtc_state)->active_planes &= ~BIT(plane->id); + } if (!was_visible && !visible) return 0; @@ -10805,13 +10835,12 @@ int intel_plane_atomic_calc_changes(struct drm_crtc_state *crtc_state, turn_on = visible && (!was_visible || mode_changed); DRM_DEBUG_ATOMIC("[CRTC:%d:%s] has [PLANE:%d:%s] with fb %i\n", - intel_crtc->base.base.id, - intel_crtc->base.name, - plane->base.id, plane->name, + intel_crtc->base.base.id, intel_crtc->base.name, + plane->base.base.id, plane->base.name, fb ? fb->base.id : -1); DRM_DEBUG_ATOMIC("[PLANE:%d:%s] visible %i -> %i, off %i, on %i, ms %i\n", - plane->base.id, plane->name, + plane->base.base.id, plane->base.name, was_visible, visible, turn_off, turn_on, mode_changed); @@ -10819,15 +10848,15 @@ int intel_plane_atomic_calc_changes(struct drm_crtc_state *crtc_state, pipe_config->update_wm_pre = true; /* must disable cxsr around plane enable/disable */ - if (plane->type != DRM_PLANE_TYPE_CURSOR) + if (plane->id != PLANE_CURSOR) pipe_config->disable_cxsr = true; } else if (turn_off) { pipe_config->update_wm_post = true; /* must disable cxsr around plane enable/disable */ - if (plane->type != DRM_PLANE_TYPE_CURSOR) + if (plane->id != PLANE_CURSOR) pipe_config->disable_cxsr = true; - } else if (intel_wm_need_update(plane, plane_state)) { + } else if (intel_wm_need_update(&plane->base, plane_state)) { /* FIXME bollocks */ pipe_config->update_wm_pre = true; pipe_config->update_wm_post = true; @@ -10839,7 +10868,7 @@ int intel_plane_atomic_calc_changes(struct drm_crtc_state *crtc_state, to_intel_crtc_state(crtc_state)->wm.need_postvbl_update = true; if (visible || was_visible) - pipe_config->fb_bits |= to_intel_plane(plane)->frontbuffer_bit; + pipe_config->fb_bits |= plane->frontbuffer_bit; /* * WaCxSRDisabledForSpriteScaling:ivb @@ -10847,7 +10876,7 @@ int intel_plane_atomic_calc_changes(struct drm_crtc_state *crtc_state, * cstate->update_wm was already set above, so this flag will * take effect when we commit and program watermarks. */ - if (plane->type == DRM_PLANE_TYPE_OVERLAY && IS_IVYBRIDGE(dev_priv) && + if (plane->id == PLANE_SPRITE0 && IS_IVYBRIDGE(dev_priv) && needs_scaling(to_intel_plane_state(plane_state)) && !needs_scaling(old_plane_state)) pipe_config->disable_lp_wm = true; @@ -15260,15 +15289,14 @@ static bool primary_get_hw_state(struct intel_plane *plane) /* FIXME read out full plane state for all planes */ static void readout_plane_state(struct intel_crtc *crtc) { - struct drm_plane *primary = crtc->base.primary; - struct intel_plane_state *plane_state = - to_intel_plane_state(primary->state); + struct intel_plane *primary = to_intel_plane(crtc->base.primary); + bool visible; - plane_state->base.visible = crtc->active && - primary_get_hw_state(to_intel_plane(primary)); + visible = crtc->active && primary_get_hw_state(primary); - if (plane_state->base.visible) - crtc->base.state->plane_mask |= 1 << drm_plane_index(primary); + intel_set_plane_visible(to_intel_crtc_state(crtc->base.state), + to_intel_plane_state(primary->base.state), + visible); } static void intel_modeset_readout_hw_state(struct drm_device *dev) diff --git a/drivers/gpu/drm/i915/intel_drv.h b/drivers/gpu/drm/i915/intel_drv.h index 9434c293a581..708311837faf 100644 --- a/drivers/gpu/drm/i915/intel_drv.h +++ b/drivers/gpu/drm/i915/intel_drv.h @@ -695,6 +695,9 @@ struct intel_crtc_state { /* Gamma mode programmed on the pipe */ uint32_t gamma_mode; + + /* bitmask of visible planes (enum plane_id) */ + u8 active_planes; }; struct vlv_wm_state { -- cgit From f07d43d2da1cbf4f335359a5fe39f773133c2be4 Mon Sep 17 00:00:00 2001 From: Ville Syrjälä Date: Thu, 2 Mar 2017 19:14:52 +0200 Subject: drm/i915: Track plane fifo sizes under intel_crtc MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Track the plane fifo sizes under intel_crtc instead of under each intel_plane. Avoids looping over the planes in a bunch of places, and later we'll move this tracking into the crtc state properly. v2: Nuke intel_plane_wm_parameters (Maarten) Signed-off-by: Ville Syrjälä Reviewed-by: Maarten Lankhorst Link: http://patchwork.freedesktop.org/patch/msgid/20170302171508.1666-3-ville.syrjala@linux.intel.com --- drivers/gpu/drm/i915/intel_drv.h | 32 ++--------- drivers/gpu/drm/i915/intel_pm.c | 115 ++++++++++++++++----------------------- 2 files changed, 54 insertions(+), 93 deletions(-) diff --git a/drivers/gpu/drm/i915/intel_drv.h b/drivers/gpu/drm/i915/intel_drv.h index 708311837faf..0ffe527e1099 100644 --- a/drivers/gpu/drm/i915/intel_drv.h +++ b/drivers/gpu/drm/i915/intel_drv.h @@ -709,6 +709,10 @@ struct vlv_wm_state { bool cxsr; }; +struct vlv_fifo_state { + uint16_t plane[I915_MAX_PLANES]; +}; + struct intel_crtc { struct drm_crtc base; enum pipe pipe; @@ -758,6 +762,8 @@ struct intel_crtc { /* allow CxSR on this pipe */ bool cxsr_allowed; + + struct vlv_fifo_state fifo_state; } wm; int scanline_offset; @@ -775,25 +781,6 @@ struct intel_crtc { struct vlv_wm_state wm_state; }; -struct intel_plane_wm_parameters { - uint32_t horiz_pixels; - uint32_t vert_pixels; - /* - * For packed pixel formats: - * bytes_per_pixel - holds bytes per pixel - * For planar pixel formats: - * bytes_per_pixel - holds bytes per pixel for uv-plane - * y_bytes_per_pixel - holds bytes per pixel for y-plane - */ - uint8_t bytes_per_pixel; - uint8_t y_bytes_per_pixel; - bool enabled; - bool scaled; - u64 tiling; - unsigned int rotation; - uint16_t fifo_size; -}; - struct intel_plane { struct drm_plane base; u8 plane; @@ -803,13 +790,6 @@ struct intel_plane { int max_downscale; uint32_t frontbuffer_bit; - /* Since we need to change the watermarks before/after - * enabling/disabling the planes, we need to store the parameters here - * as the other pieces of the struct may not reflect the values we want - * for the watermark calculations. Currently only Haswell uses this. - */ - struct intel_plane_wm_parameters wm; - /* * NOTE: Do not place new plane state fields here (e.g., when adding * new plane properties). New runtime state should now be placed in diff --git a/drivers/gpu/drm/i915/intel_pm.c b/drivers/gpu/drm/i915/intel_pm.c index 50e672f84276..7047b35d0e1d 100644 --- a/drivers/gpu/drm/i915/intel_pm.c +++ b/drivers/gpu/drm/i915/intel_pm.c @@ -415,15 +415,14 @@ static const int pessimal_latency_ns = 5000; #define VLV_FIFO_START(dsparb, dsparb2, lo_shift, hi_shift) \ ((((dsparb) >> (lo_shift)) & 0xff) | ((((dsparb2) >> (hi_shift)) & 0x1) << 8)) -static int vlv_get_fifo_size(struct intel_plane *plane) +static void vlv_get_fifo_size(struct intel_crtc *crtc) { - struct drm_i915_private *dev_priv = to_i915(plane->base.dev); - int sprite0_start, sprite1_start, size; - - if (plane->id == PLANE_CURSOR) - return 63; + struct drm_i915_private *dev_priv = to_i915(crtc->base.dev); + struct vlv_fifo_state *fifo_state = &crtc->wm.fifo_state; + enum pipe pipe = crtc->pipe; + int sprite0_start, sprite1_start; - switch (plane->pipe) { + switch (pipe) { uint32_t dsparb, dsparb2, dsparb3; case PIPE_A: dsparb = I915_READ(DSPARB); @@ -444,26 +443,21 @@ static int vlv_get_fifo_size(struct intel_plane *plane) sprite1_start = VLV_FIFO_START(dsparb3, dsparb2, 8, 20); break; default: - return 0; - } - - switch (plane->id) { - case PLANE_PRIMARY: - size = sprite0_start; - break; - case PLANE_SPRITE0: - size = sprite1_start - sprite0_start; - break; - case PLANE_SPRITE1: - size = 512 - 1 - sprite1_start; - break; - default: - return 0; + MISSING_CASE(pipe); + return; } - DRM_DEBUG_KMS("%s FIFO size: %d\n", plane->base.name, size); + fifo_state->plane[PLANE_PRIMARY] = sprite0_start; + fifo_state->plane[PLANE_SPRITE0] = sprite1_start - sprite0_start; + fifo_state->plane[PLANE_SPRITE1] = 511 - sprite1_start; + fifo_state->plane[PLANE_CURSOR] = 63; - return size; + DRM_DEBUG_KMS("Pipe %c FIFO size: %d/%d/%d/%d\n", + pipe_name(pipe), + fifo_state->plane[PLANE_PRIMARY], + fifo_state->plane[PLANE_SPRITE0], + fifo_state->plane[PLANE_SPRITE1], + fifo_state->plane[PLANE_CURSOR]); } static int i9xx_get_fifo_size(struct drm_i915_private *dev_priv, int plane) @@ -1041,8 +1035,9 @@ static uint16_t vlv_compute_wm_level(const struct intel_crtc_state *crtc_state, static void vlv_compute_fifo(struct intel_crtc *crtc) { - struct drm_device *dev = crtc->base.dev; struct vlv_wm_state *wm_state = &crtc->wm_state; + struct vlv_fifo_state *fifo_state = &crtc->wm.fifo_state; + struct drm_device *dev = crtc->base.dev; struct intel_plane *plane; unsigned int total_rate = 0; const int fifo_size = 512 - 1; @@ -1052,7 +1047,7 @@ static void vlv_compute_fifo(struct intel_crtc *crtc) struct intel_plane_state *state = to_intel_plane_state(plane->base.state); - if (plane->base.type == DRM_PLANE_TYPE_CURSOR) + if (plane->id == PLANE_CURSOR) continue; if (state->base.visible) { @@ -1066,19 +1061,19 @@ static void vlv_compute_fifo(struct intel_crtc *crtc) to_intel_plane_state(plane->base.state); unsigned int rate; - if (plane->base.type == DRM_PLANE_TYPE_CURSOR) { - plane->wm.fifo_size = 63; + if (plane->id == PLANE_CURSOR) { + fifo_state->plane[plane->id] = 63; continue; } if (!state->base.visible) { - plane->wm.fifo_size = 0; + fifo_state->plane[plane->id] = 0; continue; } rate = state->base.fb->format->cpp[0]; - plane->wm.fifo_size = fifo_size * rate / total_rate; - fifo_left -= plane->wm.fifo_size; + fifo_state->plane[plane->id] = fifo_size * rate / total_rate; + fifo_left -= fifo_state->plane[plane->id]; } fifo_extra = DIV_ROUND_UP(fifo_left, wm_state->num_active_planes ?: 1); @@ -1090,16 +1085,16 @@ static void vlv_compute_fifo(struct intel_crtc *crtc) if (fifo_left == 0) break; - if (plane->base.type == DRM_PLANE_TYPE_CURSOR) + if (plane->id == PLANE_CURSOR) continue; /* give it all to the first plane if none are active */ - if (plane->wm.fifo_size == 0 && + if (fifo_state->plane[plane->id] == 0 && wm_state->num_active_planes) continue; plane_extra = min(fifo_extra, fifo_left); - plane->wm.fifo_size += plane_extra; + fifo_state->plane[plane->id] += plane_extra; fifo_left -= plane_extra; } @@ -1121,9 +1116,10 @@ static void vlv_invert_wms(struct intel_crtc *crtc) for (level = 0; level < wm_state->num_levels; level++) { struct drm_i915_private *dev_priv = to_i915(crtc->base.dev); + const struct vlv_fifo_state *fifo_state = &crtc->wm.fifo_state; const int sr_fifo_size = INTEL_INFO(dev_priv)->num_pipes * 512 - 1; - struct intel_plane *plane; + enum plane_id plane_id; wm_state->sr[level].plane = vlv_invert_wm_value(wm_state->sr[level].plane, @@ -1132,10 +1128,10 @@ static void vlv_invert_wms(struct intel_crtc *crtc) vlv_invert_wm_value(wm_state->sr[level].cursor, 63); - for_each_intel_plane_on_crtc(&dev_priv->drm, crtc, plane) { - wm_state->wm[level].plane[plane->id] = - vlv_invert_wm_value(wm_state->wm[level].plane[plane->id], - plane->wm.fifo_size); + for_each_plane_id_on_crtc(crtc, plane_id) { + wm_state->wm[level].plane[plane_id] = + vlv_invert_wm_value(wm_state->wm[level].plane[plane_id], + fifo_state->plane[plane_id]); } } } @@ -1144,6 +1140,7 @@ static void vlv_compute_wm(struct intel_crtc *crtc) { struct drm_i915_private *dev_priv = to_i915(crtc->base.dev); struct vlv_wm_state *wm_state = &crtc->wm_state; + const struct vlv_fifo_state *fifo_state = &crtc->wm.fifo_state; struct intel_plane *plane; int level; @@ -1170,7 +1167,7 @@ static void vlv_compute_wm(struct intel_crtc *crtc) /* normal watermarks */ for (level = 0; level < wm_state->num_levels; level++) { int wm = vlv_compute_wm_level(crtc->config, state, level); - int max_wm = plane->wm.fifo_size; + int max_wm = fifo_state->plane[plane->id]; /* hack */ if (WARN_ON(level == 0 && wm > max_wm)) @@ -1214,32 +1211,16 @@ static void vlv_compute_wm(struct intel_crtc *crtc) static void vlv_pipe_set_fifo_size(struct intel_crtc *crtc) { - struct drm_device *dev = crtc->base.dev; - struct drm_i915_private *dev_priv = to_i915(dev); - struct intel_plane *plane; - int sprite0_start = 0, sprite1_start = 0, fifo_size = 0; + struct drm_i915_private *dev_priv = to_i915(crtc->base.dev); + const struct vlv_fifo_state *fifo_state = &crtc->wm.fifo_state; + int sprite0_start, sprite1_start, fifo_size; - for_each_intel_plane_on_crtc(dev, crtc, plane) { - switch (plane->id) { - case PLANE_PRIMARY: - sprite0_start = plane->wm.fifo_size; - break; - case PLANE_SPRITE0: - sprite1_start = sprite0_start + plane->wm.fifo_size; - break; - case PLANE_SPRITE1: - fifo_size = sprite1_start + plane->wm.fifo_size; - break; - case PLANE_CURSOR: - WARN_ON(plane->wm.fifo_size != 63); - break; - default: - MISSING_CASE(plane->id); - break; - } - } + sprite0_start = fifo_state->plane[PLANE_PRIMARY]; + sprite1_start = fifo_state->plane[PLANE_SPRITE0] + sprite0_start; + fifo_size = fifo_state->plane[PLANE_SPRITE1] + sprite1_start; - WARN_ON(fifo_size != 512 - 1); + WARN_ON(fifo_state->plane[PLANE_CURSOR] != 63); + WARN_ON(fifo_size != 511); DRM_DEBUG_KMS("Pipe %c FIFO split %d / %d / %d\n", pipe_name(crtc->pipe), sprite0_start, @@ -4531,14 +4512,14 @@ void vlv_wm_get_hw_state(struct drm_device *dev) { struct drm_i915_private *dev_priv = to_i915(dev); struct vlv_wm_values *wm = &dev_priv->wm.vlv; - struct intel_plane *plane; + struct intel_crtc *crtc; enum pipe pipe; u32 val; vlv_read_wm_values(dev_priv, wm); - for_each_intel_plane(dev, plane) - plane->wm.fifo_size = vlv_get_fifo_size(plane); + for_each_intel_crtc(dev, crtc) + vlv_get_fifo_size(crtc); wm->cxsr = I915_READ(FW_BLC_SELF_VLV) & FW_CSPWRDWNEN; wm->level = VLV_WM_LEVEL_PM2; -- cgit From 7eb4941f048f317b59d5bd5683baf76b440dc891 Mon Sep 17 00:00:00 2001 From: Ville Syrjälä Date: Thu, 2 Mar 2017 19:14:53 +0200 Subject: drm/i915: Move vlv wms from crtc->wm_state to crtc->wm.active.vlv MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit In an effort to make the vlv/chv wm code look and behave more like the ilk+ code, let's move the current active wms next to the corresponding ilk wms. Signed-off-by: Ville Syrjälä Reviewed-by: Maarten Lankhorst Link: http://patchwork.freedesktop.org/patch/msgid/20170302171508.1666-4-ville.syrjala@linux.intel.com --- drivers/gpu/drm/i915/intel_drv.h | 3 +-- drivers/gpu/drm/i915/intel_pm.c | 10 +++++----- 2 files changed, 6 insertions(+), 7 deletions(-) diff --git a/drivers/gpu/drm/i915/intel_drv.h b/drivers/gpu/drm/i915/intel_drv.h index 0ffe527e1099..8b1900786606 100644 --- a/drivers/gpu/drm/i915/intel_drv.h +++ b/drivers/gpu/drm/i915/intel_drv.h @@ -758,6 +758,7 @@ struct intel_crtc { /* watermarks currently being used */ union { struct intel_pipe_wm ilk; + struct vlv_wm_state vlv; } active; /* allow CxSR on this pipe */ @@ -777,8 +778,6 @@ struct intel_crtc { /* scalers available on this crtc */ int num_scalers; - - struct vlv_wm_state wm_state; }; struct intel_plane { diff --git a/drivers/gpu/drm/i915/intel_pm.c b/drivers/gpu/drm/i915/intel_pm.c index 7047b35d0e1d..b06f13fa921f 100644 --- a/drivers/gpu/drm/i915/intel_pm.c +++ b/drivers/gpu/drm/i915/intel_pm.c @@ -1035,7 +1035,7 @@ static uint16_t vlv_compute_wm_level(const struct intel_crtc_state *crtc_state, static void vlv_compute_fifo(struct intel_crtc *crtc) { - struct vlv_wm_state *wm_state = &crtc->wm_state; + struct vlv_wm_state *wm_state = &crtc->wm.active.vlv; struct vlv_fifo_state *fifo_state = &crtc->wm.fifo_state; struct drm_device *dev = crtc->base.dev; struct intel_plane *plane; @@ -1111,7 +1111,7 @@ static u16 vlv_invert_wm_value(u16 wm, u16 fifo_size) static void vlv_invert_wms(struct intel_crtc *crtc) { - struct vlv_wm_state *wm_state = &crtc->wm_state; + struct vlv_wm_state *wm_state = &crtc->wm.active.vlv; int level; for (level = 0; level < wm_state->num_levels; level++) { @@ -1139,7 +1139,7 @@ static void vlv_invert_wms(struct intel_crtc *crtc) static void vlv_compute_wm(struct intel_crtc *crtc) { struct drm_i915_private *dev_priv = to_i915(crtc->base.dev); - struct vlv_wm_state *wm_state = &crtc->wm_state; + struct vlv_wm_state *wm_state = &crtc->wm.active.vlv; const struct vlv_fifo_state *fifo_state = &crtc->wm.fifo_state; struct intel_plane *plane; int level; @@ -1302,7 +1302,7 @@ static void vlv_merge_wm(struct drm_i915_private *dev_priv, wm->cxsr = true; for_each_intel_crtc(&dev_priv->drm, crtc) { - const struct vlv_wm_state *wm_state = &crtc->wm_state; + const struct vlv_wm_state *wm_state = &crtc->wm.active.vlv; if (!crtc->active) continue; @@ -1321,7 +1321,7 @@ static void vlv_merge_wm(struct drm_i915_private *dev_priv, wm->level = VLV_WM_LEVEL_PM2; for_each_intel_crtc(&dev_priv->drm, crtc) { - struct vlv_wm_state *wm_state = &crtc->wm_state; + const struct vlv_wm_state *wm_state = &crtc->wm.active.vlv; enum pipe pipe = crtc->pipe; if (!crtc->active) -- cgit From 855c79f5212acced123c1a6be3c12601f45e8da9 Mon Sep 17 00:00:00 2001 From: Ville Syrjälä Date: Thu, 2 Mar 2017 19:14:54 +0200 Subject: drm/i915: Plop vlv wm state into crtc_state MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Relocate the vlv/chv wm state to live under intel_crtc_state. Note that for now this just behaves as a temporary storage. But it'll be easier to conver the thing over to properly pre-computing the state when it's already in the right place. Signed-off-by: Ville Syrjälä Reviewed-by: Maarten Lankhorst Link: http://patchwork.freedesktop.org/patch/msgid/20170302171508.1666-5-ville.syrjala@linux.intel.com --- drivers/gpu/drm/i915/intel_drv.h | 30 +++++++++++++++++++++--------- drivers/gpu/drm/i915/intel_pm.c | 32 ++++++++++++++++---------------- 2 files changed, 37 insertions(+), 25 deletions(-) diff --git a/drivers/gpu/drm/i915/intel_drv.h b/drivers/gpu/drm/i915/intel_drv.h index 8b1900786606..6d3323b9204b 100644 --- a/drivers/gpu/drm/i915/intel_drv.h +++ b/drivers/gpu/drm/i915/intel_drv.h @@ -492,6 +492,22 @@ struct skl_pipe_wm { uint32_t linetime; }; +enum vlv_wm_level { + VLV_WM_LEVEL_PM2, + VLV_WM_LEVEL_PM5, + VLV_WM_LEVEL_DDR_DVFS, + NUM_VLV_WM_LEVELS, +}; + +struct vlv_wm_state { + struct vlv_pipe_wm wm[NUM_VLV_WM_LEVELS]; + struct vlv_sr_wm sr[NUM_VLV_WM_LEVELS]; + uint8_t num_active_planes; + uint8_t num_levels; + uint8_t level; + bool cxsr; +}; + struct intel_crtc_wm_state { union { struct { @@ -516,6 +532,11 @@ struct intel_crtc_wm_state { struct skl_pipe_wm optimal; struct skl_ddb_entry ddb; } skl; + + struct { + /* optimal watermarks (inverted) */ + struct vlv_wm_state optimal; + } vlv; }; /* @@ -700,15 +721,6 @@ struct intel_crtc_state { u8 active_planes; }; -struct vlv_wm_state { - struct vlv_pipe_wm wm[3]; - struct vlv_sr_wm sr[3]; - uint8_t num_active_planes; - uint8_t num_levels; - uint8_t level; - bool cxsr; -}; - struct vlv_fifo_state { uint16_t plane[I915_MAX_PLANES]; }; diff --git a/drivers/gpu/drm/i915/intel_pm.c b/drivers/gpu/drm/i915/intel_pm.c index b06f13fa921f..dfa85ec35482 100644 --- a/drivers/gpu/drm/i915/intel_pm.c +++ b/drivers/gpu/drm/i915/intel_pm.c @@ -957,12 +957,6 @@ static void vlv_write_wm_values(struct drm_i915_private *dev_priv, #undef FW_WM_VLV -enum vlv_wm_level { - VLV_WM_LEVEL_PM2, - VLV_WM_LEVEL_PM5, - VLV_WM_LEVEL_DDR_DVFS, -}; - /* latency must be in 0.1us units. */ static unsigned int vlv_wm_method2(unsigned int pixel_rate, unsigned int pipe_htotal, @@ -1033,9 +1027,10 @@ static uint16_t vlv_compute_wm_level(const struct intel_crtc_state *crtc_state, return min_t(int, wm, USHRT_MAX); } -static void vlv_compute_fifo(struct intel_crtc *crtc) +static void vlv_compute_fifo(struct intel_crtc_state *crtc_state) { - struct vlv_wm_state *wm_state = &crtc->wm.active.vlv; + struct intel_crtc *crtc = to_intel_crtc(crtc_state->base.crtc); + struct vlv_wm_state *wm_state = &crtc_state->wm.vlv.optimal; struct vlv_fifo_state *fifo_state = &crtc->wm.fifo_state; struct drm_device *dev = crtc->base.dev; struct intel_plane *plane; @@ -1109,9 +1104,10 @@ static u16 vlv_invert_wm_value(u16 wm, u16 fifo_size) return fifo_size - wm; } -static void vlv_invert_wms(struct intel_crtc *crtc) +static void vlv_invert_wms(struct intel_crtc_state *crtc_state) { - struct vlv_wm_state *wm_state = &crtc->wm.active.vlv; + struct intel_crtc *crtc = to_intel_crtc(crtc_state->base.crtc); + struct vlv_wm_state *wm_state = &crtc_state->wm.vlv.optimal; int level; for (level = 0; level < wm_state->num_levels; level++) { @@ -1136,10 +1132,11 @@ static void vlv_invert_wms(struct intel_crtc *crtc) } } -static void vlv_compute_wm(struct intel_crtc *crtc) +static void vlv_compute_wm(struct intel_crtc_state *crtc_state) { + struct intel_crtc *crtc = to_intel_crtc(crtc_state->base.crtc); struct drm_i915_private *dev_priv = to_i915(crtc->base.dev); - struct vlv_wm_state *wm_state = &crtc->wm.active.vlv; + struct vlv_wm_state *wm_state = &crtc_state->wm.vlv.optimal; const struct vlv_fifo_state *fifo_state = &crtc->wm.fifo_state; struct intel_plane *plane; int level; @@ -1151,7 +1148,7 @@ static void vlv_compute_wm(struct intel_crtc *crtc) wm_state->num_active_planes = 0; - vlv_compute_fifo(crtc); + vlv_compute_fifo(crtc_state); if (wm_state->num_active_planes != 1) wm_state->cxsr = false; @@ -1166,7 +1163,7 @@ static void vlv_compute_wm(struct intel_crtc *crtc) /* normal watermarks */ for (level = 0; level < wm_state->num_levels; level++) { - int wm = vlv_compute_wm_level(crtc->config, state, level); + int wm = vlv_compute_wm_level(crtc_state, state, level); int max_wm = fifo_state->plane[plane->id]; /* hack */ @@ -1203,7 +1200,7 @@ static void vlv_compute_wm(struct intel_crtc *crtc) memset(&wm_state->sr[level], 0, sizeof(wm_state->sr[level])); } - vlv_invert_wms(crtc); + vlv_invert_wms(crtc_state); } #define VLV_FIFO(plane, value) \ @@ -1351,11 +1348,14 @@ static bool is_enabling(int old, int new, int threshold) static void vlv_update_wm(struct intel_crtc *crtc) { struct drm_i915_private *dev_priv = to_i915(crtc->base.dev); + struct intel_crtc_state *crtc_state = + to_intel_crtc_state(crtc->base.state); enum pipe pipe = crtc->pipe; struct vlv_wm_values *old_wm = &dev_priv->wm.vlv; struct vlv_wm_values new_wm = {}; - vlv_compute_wm(crtc); + vlv_compute_wm(crtc_state); + crtc->wm.active.vlv = crtc_state->wm.vlv.optimal; vlv_merge_wm(dev_priv, &new_wm); if (memcmp(old_wm, &new_wm, sizeof(new_wm)) == 0) { -- cgit From 814e7f0bf7b7d5805a5ef030348c25647103b6a8 Mon Sep 17 00:00:00 2001 From: Ville Syrjälä Date: Thu, 2 Mar 2017 19:14:55 +0200 Subject: drm/i915: Plop vlv/chv fifo sizes into crtc state MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Move the vlv/chv FIFO size tracking into the crtc_state. As with the wms for now this just acts as temporary storage. Signed-off-by: Ville Syrjälä Reviewed-by: Maarten Lankhorst Link: http://patchwork.freedesktop.org/patch/msgid/20170302171508.1666-6-ville.syrjala@linux.intel.com --- drivers/gpu/drm/i915/intel_drv.h | 12 ++++++------ drivers/gpu/drm/i915/intel_pm.c | 26 +++++++++++++++----------- 2 files changed, 21 insertions(+), 17 deletions(-) diff --git a/drivers/gpu/drm/i915/intel_drv.h b/drivers/gpu/drm/i915/intel_drv.h index 6d3323b9204b..26b0ce0837df 100644 --- a/drivers/gpu/drm/i915/intel_drv.h +++ b/drivers/gpu/drm/i915/intel_drv.h @@ -508,6 +508,10 @@ struct vlv_wm_state { bool cxsr; }; +struct vlv_fifo_state { + u16 plane[I915_MAX_PLANES]; +}; + struct intel_crtc_wm_state { union { struct { @@ -536,6 +540,8 @@ struct intel_crtc_wm_state { struct { /* optimal watermarks (inverted) */ struct vlv_wm_state optimal; + /* display FIFO split */ + struct vlv_fifo_state fifo_state; } vlv; }; @@ -721,10 +727,6 @@ struct intel_crtc_state { u8 active_planes; }; -struct vlv_fifo_state { - uint16_t plane[I915_MAX_PLANES]; -}; - struct intel_crtc { struct drm_crtc base; enum pipe pipe; @@ -775,8 +777,6 @@ struct intel_crtc { /* allow CxSR on this pipe */ bool cxsr_allowed; - - struct vlv_fifo_state fifo_state; } wm; int scanline_offset; diff --git a/drivers/gpu/drm/i915/intel_pm.c b/drivers/gpu/drm/i915/intel_pm.c index dfa85ec35482..063bb531703c 100644 --- a/drivers/gpu/drm/i915/intel_pm.c +++ b/drivers/gpu/drm/i915/intel_pm.c @@ -415,10 +415,11 @@ static const int pessimal_latency_ns = 5000; #define VLV_FIFO_START(dsparb, dsparb2, lo_shift, hi_shift) \ ((((dsparb) >> (lo_shift)) & 0xff) | ((((dsparb2) >> (hi_shift)) & 0x1) << 8)) -static void vlv_get_fifo_size(struct intel_crtc *crtc) +static void vlv_get_fifo_size(struct intel_crtc_state *crtc_state) { + struct intel_crtc *crtc = to_intel_crtc(crtc_state->base.crtc); struct drm_i915_private *dev_priv = to_i915(crtc->base.dev); - struct vlv_fifo_state *fifo_state = &crtc->wm.fifo_state; + struct vlv_fifo_state *fifo_state = &crtc_state->wm.vlv.fifo_state; enum pipe pipe = crtc->pipe; int sprite0_start, sprite1_start; @@ -1031,7 +1032,7 @@ static void vlv_compute_fifo(struct intel_crtc_state *crtc_state) { struct intel_crtc *crtc = to_intel_crtc(crtc_state->base.crtc); struct vlv_wm_state *wm_state = &crtc_state->wm.vlv.optimal; - struct vlv_fifo_state *fifo_state = &crtc->wm.fifo_state; + struct vlv_fifo_state *fifo_state = &crtc_state->wm.vlv.fifo_state; struct drm_device *dev = crtc->base.dev; struct intel_plane *plane; unsigned int total_rate = 0; @@ -1108,11 +1109,12 @@ static void vlv_invert_wms(struct intel_crtc_state *crtc_state) { struct intel_crtc *crtc = to_intel_crtc(crtc_state->base.crtc); struct vlv_wm_state *wm_state = &crtc_state->wm.vlv.optimal; + const struct vlv_fifo_state *fifo_state = + &crtc_state->wm.vlv.fifo_state; int level; for (level = 0; level < wm_state->num_levels; level++) { struct drm_i915_private *dev_priv = to_i915(crtc->base.dev); - const struct vlv_fifo_state *fifo_state = &crtc->wm.fifo_state; const int sr_fifo_size = INTEL_INFO(dev_priv)->num_pipes * 512 - 1; enum plane_id plane_id; @@ -1137,7 +1139,8 @@ static void vlv_compute_wm(struct intel_crtc_state *crtc_state) struct intel_crtc *crtc = to_intel_crtc(crtc_state->base.crtc); struct drm_i915_private *dev_priv = to_i915(crtc->base.dev); struct vlv_wm_state *wm_state = &crtc_state->wm.vlv.optimal; - const struct vlv_fifo_state *fifo_state = &crtc->wm.fifo_state; + const struct vlv_fifo_state *fifo_state = + &crtc_state->wm.vlv.fifo_state; struct intel_plane *plane; int level; @@ -1206,10 +1209,12 @@ static void vlv_compute_wm(struct intel_crtc_state *crtc_state) #define VLV_FIFO(plane, value) \ (((value) << DSPARB_ ## plane ## _SHIFT_VLV) & DSPARB_ ## plane ## _MASK_VLV) -static void vlv_pipe_set_fifo_size(struct intel_crtc *crtc) +static void vlv_pipe_set_fifo_size(const struct intel_crtc_state *crtc_state) { + struct intel_crtc *crtc = to_intel_crtc(crtc_state->base.crtc); struct drm_i915_private *dev_priv = to_i915(crtc->base.dev); - const struct vlv_fifo_state *fifo_state = &crtc->wm.fifo_state; + const struct vlv_fifo_state *fifo_state = + &crtc_state->wm.vlv.fifo_state; int sprite0_start, sprite1_start, fifo_size; sprite0_start = fifo_state->plane[PLANE_PRIMARY]; @@ -1360,8 +1365,7 @@ static void vlv_update_wm(struct intel_crtc *crtc) if (memcmp(old_wm, &new_wm, sizeof(new_wm)) == 0) { /* FIXME should be part of crtc atomic commit */ - vlv_pipe_set_fifo_size(crtc); - + vlv_pipe_set_fifo_size(crtc_state); return; } @@ -1375,7 +1379,7 @@ static void vlv_update_wm(struct intel_crtc *crtc) _intel_set_memory_cxsr(dev_priv, false); /* FIXME should be part of crtc atomic commit */ - vlv_pipe_set_fifo_size(crtc); + vlv_pipe_set_fifo_size(crtc_state); vlv_write_wm_values(dev_priv, &new_wm); @@ -4519,7 +4523,7 @@ void vlv_wm_get_hw_state(struct drm_device *dev) vlv_read_wm_values(dev_priv, wm); for_each_intel_crtc(dev, crtc) - vlv_get_fifo_size(crtc); + vlv_get_fifo_size(to_intel_crtc_state(crtc->base.state)); wm->cxsr = I915_READ(FW_BLC_SELF_VLV) & FW_CSPWRDWNEN; wm->level = VLV_WM_LEVEL_PM2; -- cgit From 5012e604891a8ac3eafc7f59335dc688a6c388f6 Mon Sep 17 00:00:00 2001 From: Ville Syrjälä Date: Thu, 2 Mar 2017 19:14:56 +0200 Subject: drm/i915: Compute VLV/CHV FIFO sizes based on the PM2 watermarks MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Let's compute the watermarks first and the FIFO size second. This way we can make sure the FIFO split is the most accommodating to the watermarks. Previously we could have potentially computed a FIFO split that couldn't accommodate the PM2 watermarks simply due to a bad split even if the total FIFO size would have been sufficient. It'll also allow us to avoid recomputing the wms for all planes whenever the FIFO split would change. Thus we don't have to add any extra planes to the state when the FIFO needs to be repartitioned. To help with this we'll keep around copies of the non-inverted watermarks in the crtc state. For now that doesn't help too much, but once we start to do the watermark computation only for the planes that change we'll need the non-inverted values around for the other planes. v2: s/noninverted/raw/ for consistency with other platforms Fix the memset() of the "raw" watermarks Signed-off-by: Ville Syrjälä Reviewed-by: Maarten Lankhorst Link: http://patchwork.freedesktop.org/patch/msgid/20170302171508.1666-7-ville.syrjala@linux.intel.com --- drivers/gpu/drm/i915/intel_drv.h | 2 + drivers/gpu/drm/i915/intel_pm.c | 111 ++++++++++++++++++--------------------- 2 files changed, 53 insertions(+), 60 deletions(-) diff --git a/drivers/gpu/drm/i915/intel_drv.h b/drivers/gpu/drm/i915/intel_drv.h index 26b0ce0837df..6f6c29cdae73 100644 --- a/drivers/gpu/drm/i915/intel_drv.h +++ b/drivers/gpu/drm/i915/intel_drv.h @@ -538,6 +538,8 @@ struct intel_crtc_wm_state { } skl; struct { + /* "raw" watermarks (not inverted) */ + struct vlv_pipe_wm raw[NUM_VLV_WM_LEVELS]; /* optimal watermarks (inverted) */ struct vlv_wm_state optimal; /* display FIFO split */ diff --git a/drivers/gpu/drm/i915/intel_pm.c b/drivers/gpu/drm/i915/intel_pm.c index 063bb531703c..a6040abffceb 100644 --- a/drivers/gpu/drm/i915/intel_pm.c +++ b/drivers/gpu/drm/i915/intel_pm.c @@ -1028,73 +1028,70 @@ static uint16_t vlv_compute_wm_level(const struct intel_crtc_state *crtc_state, return min_t(int, wm, USHRT_MAX); } -static void vlv_compute_fifo(struct intel_crtc_state *crtc_state) +static int vlv_compute_fifo(struct intel_crtc_state *crtc_state) { struct intel_crtc *crtc = to_intel_crtc(crtc_state->base.crtc); - struct vlv_wm_state *wm_state = &crtc_state->wm.vlv.optimal; + const struct vlv_pipe_wm *raw = + &crtc_state->wm.vlv.raw[VLV_WM_LEVEL_PM2]; struct vlv_fifo_state *fifo_state = &crtc_state->wm.vlv.fifo_state; - struct drm_device *dev = crtc->base.dev; - struct intel_plane *plane; - unsigned int total_rate = 0; - const int fifo_size = 512 - 1; + unsigned int active_planes = crtc_state->active_planes & ~BIT(PLANE_CURSOR); + int num_active_planes = hweight32(active_planes); + const int fifo_size = 511; int fifo_extra, fifo_left = fifo_size; + unsigned int total_rate; + enum plane_id plane_id; - for_each_intel_plane_on_crtc(dev, crtc, plane) { - struct intel_plane_state *state = - to_intel_plane_state(plane->base.state); + total_rate = raw->plane[PLANE_PRIMARY] + + raw->plane[PLANE_SPRITE0] + + raw->plane[PLANE_SPRITE1]; - if (plane->id == PLANE_CURSOR) - continue; + if (total_rate > fifo_size) + return -EINVAL; - if (state->base.visible) { - wm_state->num_active_planes++; - total_rate += state->base.fb->format->cpp[0]; - } - } + if (total_rate == 0) + total_rate = 1; - for_each_intel_plane_on_crtc(dev, crtc, plane) { - struct intel_plane_state *state = - to_intel_plane_state(plane->base.state); + for_each_plane_id_on_crtc(crtc, plane_id) { unsigned int rate; - if (plane->id == PLANE_CURSOR) { - fifo_state->plane[plane->id] = 63; - continue; - } - - if (!state->base.visible) { - fifo_state->plane[plane->id] = 0; + if ((active_planes & BIT(plane_id)) == 0) { + fifo_state->plane[plane_id] = 0; continue; } - rate = state->base.fb->format->cpp[0]; - fifo_state->plane[plane->id] = fifo_size * rate / total_rate; - fifo_left -= fifo_state->plane[plane->id]; + rate = raw->plane[plane_id]; + fifo_state->plane[plane_id] = fifo_size * rate / total_rate; + fifo_left -= fifo_state->plane[plane_id]; } - fifo_extra = DIV_ROUND_UP(fifo_left, wm_state->num_active_planes ?: 1); + fifo_state->plane[PLANE_CURSOR] = 63; + + fifo_extra = DIV_ROUND_UP(fifo_left, num_active_planes ?: 1); /* spread the remainder evenly */ - for_each_intel_plane_on_crtc(dev, crtc, plane) { + for_each_plane_id_on_crtc(crtc, plane_id) { int plane_extra; if (fifo_left == 0) break; - if (plane->id == PLANE_CURSOR) - continue; - - /* give it all to the first plane if none are active */ - if (fifo_state->plane[plane->id] == 0 && - wm_state->num_active_planes) + if ((active_planes & BIT(plane_id)) == 0) continue; plane_extra = min(fifo_extra, fifo_left); - fifo_state->plane[plane->id] += plane_extra; + fifo_state->plane[plane_id] += plane_extra; fifo_left -= plane_extra; } - WARN_ON(fifo_left != 0); + WARN_ON(active_planes != 0 && fifo_left != 0); + + /* give it all to the first plane if none are active */ + if (active_planes == 0) { + WARN_ON(fifo_left != fifo_size); + fifo_state->plane[PLANE_PRIMARY] = fifo_left; + } + + return 0; } static u16 vlv_invert_wm_value(u16 wm, u16 fifo_size) @@ -1139,35 +1136,31 @@ static void vlv_compute_wm(struct intel_crtc_state *crtc_state) struct intel_crtc *crtc = to_intel_crtc(crtc_state->base.crtc); struct drm_i915_private *dev_priv = to_i915(crtc->base.dev); struct vlv_wm_state *wm_state = &crtc_state->wm.vlv.optimal; - const struct vlv_fifo_state *fifo_state = - &crtc_state->wm.vlv.fifo_state; struct intel_plane *plane; int level; memset(wm_state, 0, sizeof(*wm_state)); + memset(&crtc_state->wm.vlv.raw, 0, sizeof(crtc_state->wm.vlv.raw)); wm_state->cxsr = crtc->pipe != PIPE_C && crtc->wm.cxsr_allowed; wm_state->num_levels = dev_priv->wm.max_level + 1; wm_state->num_active_planes = 0; - vlv_compute_fifo(crtc_state); - if (wm_state->num_active_planes != 1) wm_state->cxsr = false; for_each_intel_plane_on_crtc(&dev_priv->drm, crtc, plane) { struct intel_plane_state *state = to_intel_plane_state(plane->base.state); - int level; if (!state->base.visible) continue; - /* normal watermarks */ for (level = 0; level < wm_state->num_levels; level++) { + struct vlv_pipe_wm *raw = &crtc_state->wm.vlv.raw[level]; int wm = vlv_compute_wm_level(crtc_state, state, level); - int max_wm = fifo_state->plane[plane->id]; + int max_wm = plane->id == PLANE_CURSOR ? 63 : 511; /* hack */ if (WARN_ON(level == 0 && wm > max_wm)) @@ -1176,25 +1169,23 @@ static void vlv_compute_wm(struct intel_crtc_state *crtc_state) if (wm > max_wm) break; - wm_state->wm[level].plane[plane->id] = wm; + raw->plane[plane->id] = wm; } wm_state->num_levels = level; + } - if (!wm_state->cxsr) - continue; + vlv_compute_fifo(crtc_state); - /* maxfifo watermarks */ - if (plane->id == PLANE_CURSOR) { - for (level = 0; level < wm_state->num_levels; level++) - wm_state->sr[level].cursor = - wm_state->wm[level].plane[PLANE_CURSOR]; - } else { - for (level = 0; level < wm_state->num_levels; level++) - wm_state->sr[level].plane = - max(wm_state->sr[level].plane, - wm_state->wm[level].plane[plane->id]); - } + for (level = 0; level < wm_state->num_levels; level++) { + struct vlv_pipe_wm *raw = &crtc_state->wm.vlv.raw[level]; + + wm_state->wm[level] = *raw; + + wm_state->sr[level].plane = max3(raw->plane[PLANE_PRIMARY], + raw->plane[PLANE_SPRITE0], + raw->plane[PLANE_SPRITE1]); + wm_state->sr[level].cursor = raw->plane[PLANE_CURSOR]; } /* clear any (partially) filled invalid levels */ -- cgit From ff32c54ef1ce774ad3640d4635803ad35d64b8e8 Mon Sep 17 00:00:00 2001 From: Ville Syrjälä Date: Thu, 2 Mar 2017 19:14:57 +0200 Subject: drm/i915: Compute vlv/chv wms the atomic way MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Start computing the vlv/chv watermarks the atomic way, from the .compute_pipe_wm() hook. We'll recompute the actual watermarks for only planes that are part of the state, the other planes will keep their watermark from the last time it was computed. And the actual watermark programming will happen from the .initial_watermarks() hook. For now we'll just compute the optimal watermarks, and we'll hook up the intermediate watermarks properly later. The DSPARB registers responsible for the FIFO paritioning are double buffered, so they will be programming from intel_begin_crtc_commit(). v2: s/noninverted/raw/ for consistency with other platforms s/vlv_plane_wm_set/vlv_raw_plane_wm_set/ for clarity Signed-off-by: Ville Syrjälä Reviewed-by: Maarten Lankhorst Link: http://patchwork.freedesktop.org/patch/msgid/20170302171508.1666-8-ville.syrjala@linux.intel.com --- drivers/gpu/drm/i915/i915_drv.h | 8 + drivers/gpu/drm/i915/intel_display.c | 21 ++- drivers/gpu/drm/i915/intel_drv.h | 2 - drivers/gpu/drm/i915/intel_pm.c | 302 +++++++++++++++++++++++------------ 4 files changed, 228 insertions(+), 105 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index 07bf2e32ffa7..059335f23706 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -526,6 +526,14 @@ struct i915_hotplug { for_each_power_well_rev(__dev_priv, __power_well) \ for_each_if ((__power_well)->domains & (__domain_mask)) +#define for_each_intel_plane_in_state(__state, plane, plane_state, __i) \ + for ((__i) = 0; \ + (__i) < (__state)->base.dev->mode_config.num_total_plane && \ + ((plane) = to_intel_plane((__state)->base.planes[__i].ptr), \ + (plane_state) = to_intel_plane_state((__state)->base.planes[__i].state), 1); \ + (__i)++) \ + for_each_if (plane_state) + struct drm_i915_private; struct i915_mm_struct; struct i915_mmu_object; diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c index c4301e0df022..45a7c7e18e3d 100644 --- a/drivers/gpu/drm/i915/intel_display.c +++ b/drivers/gpu/drm/i915/intel_display.c @@ -5681,6 +5681,8 @@ static void modeset_put_power_domains(struct drm_i915_private *dev_priv, static void valleyview_crtc_enable(struct intel_crtc_state *pipe_config, struct drm_atomic_state *old_state) { + struct intel_atomic_state *old_intel_state = + to_intel_atomic_state(old_state); struct drm_crtc *crtc = pipe_config->base.crtc; struct drm_device *dev = crtc->dev; struct drm_i915_private *dev_priv = to_i915(dev); @@ -5725,7 +5727,8 @@ static void valleyview_crtc_enable(struct intel_crtc_state *pipe_config, intel_color_load_luts(&pipe_config->base); - intel_update_watermarks(intel_crtc); + dev_priv->display.initial_watermarks(old_intel_state, + pipe_config); intel_enable_pipe(intel_crtc); assert_vblank_disabled(crtc); @@ -5842,6 +5845,9 @@ static void i9xx_crtc_disable(struct intel_crtc_state *old_crtc_state, if (!IS_GEN2(dev_priv)) intel_set_cpu_fifo_underrun_reporting(dev_priv, pipe, false); + + if (!dev_priv->display.initial_watermarks) + intel_update_watermarks(intel_crtc); } static void intel_crtc_disable_noatomic(struct drm_crtc *crtc) @@ -11265,10 +11271,13 @@ static bool check_digital_port_conflicts(struct drm_atomic_state *state) static void clear_intel_crtc_state(struct intel_crtc_state *crtc_state) { + struct drm_i915_private *dev_priv = + to_i915(crtc_state->base.crtc->dev); struct drm_crtc_state tmp_state; struct intel_crtc_scaler_state scaler_state; struct intel_dpll_hw_state dpll_hw_state; struct intel_shared_dpll *shared_dpll; + struct intel_crtc_wm_state wm_state; bool force_thru; /* FIXME: before the switch to atomic started, a new pipe_config was @@ -11281,6 +11290,8 @@ clear_intel_crtc_state(struct intel_crtc_state *crtc_state) shared_dpll = crtc_state->shared_dpll; dpll_hw_state = crtc_state->dpll_hw_state; force_thru = crtc_state->pch_pfit.force_thru; + if (IS_VALLEYVIEW(dev_priv) || IS_CHERRYVIEW(dev_priv)) + wm_state = crtc_state->wm; memset(crtc_state, 0, sizeof *crtc_state); @@ -11289,6 +11300,8 @@ clear_intel_crtc_state(struct intel_crtc_state *crtc_state) crtc_state->shared_dpll = shared_dpll; crtc_state->dpll_hw_state = dpll_hw_state; crtc_state->pch_pfit.force_thru = force_thru; + if (IS_VALLEYVIEW(dev_priv) || IS_CHERRYVIEW(dev_priv)) + crtc_state->wm = wm_state; } static int @@ -12801,12 +12814,12 @@ static void intel_atomic_commit_tail(struct drm_atomic_state *state) /* * Make sure we don't call initial_watermarks * for ILK-style watermark updates. + * + * No clue what this is supposed to achieve. */ - if (dev_priv->display.atomic_update_watermarks) + if (INTEL_GEN(dev_priv) >= 9) dev_priv->display.initial_watermarks(intel_state, to_intel_crtc_state(crtc->state)); - else - intel_update_watermarks(intel_crtc); } } } diff --git a/drivers/gpu/drm/i915/intel_drv.h b/drivers/gpu/drm/i915/intel_drv.h index 6f6c29cdae73..2f23521f0e00 100644 --- a/drivers/gpu/drm/i915/intel_drv.h +++ b/drivers/gpu/drm/i915/intel_drv.h @@ -502,9 +502,7 @@ enum vlv_wm_level { struct vlv_wm_state { struct vlv_pipe_wm wm[NUM_VLV_WM_LEVELS]; struct vlv_sr_wm sr[NUM_VLV_WM_LEVELS]; - uint8_t num_active_planes; uint8_t num_levels; - uint8_t level; bool cxsr; }; diff --git a/drivers/gpu/drm/i915/intel_pm.c b/drivers/gpu/drm/i915/intel_pm.c index a6040abffceb..df76c4fd4cf3 100644 --- a/drivers/gpu/drm/i915/intel_pm.c +++ b/drivers/gpu/drm/i915/intel_pm.c @@ -1094,6 +1094,28 @@ static int vlv_compute_fifo(struct intel_crtc_state *crtc_state) return 0; } +static int vlv_num_wm_levels(struct drm_i915_private *dev_priv) +{ + return dev_priv->wm.max_level + 1; +} + +/* mark all levels starting from 'level' as invalid */ +static void vlv_invalidate_wms(struct intel_crtc *crtc, + struct vlv_wm_state *wm_state, int level) +{ + struct drm_i915_private *dev_priv = to_i915(crtc->base.dev); + + for (; level < vlv_num_wm_levels(dev_priv); level++) { + enum plane_id plane_id; + + for_each_plane_id_on_crtc(crtc, plane_id) + wm_state->wm[level].plane[plane_id] = USHRT_MAX; + + wm_state->sr[level].cursor = USHRT_MAX; + wm_state->sr[level].plane = USHRT_MAX; + } +} + static u16 vlv_invert_wm_value(u16 wm, u16 fifo_size) { if (wm > fifo_size) @@ -1102,105 +1124,162 @@ static u16 vlv_invert_wm_value(u16 wm, u16 fifo_size) return fifo_size - wm; } -static void vlv_invert_wms(struct intel_crtc_state *crtc_state) +/* + * Starting from 'level' set all higher + * levels to 'value' in the "raw" watermarks. + */ +static void vlv_raw_plane_wm_set(struct intel_crtc_state *crtc_state, + int level, enum plane_id plane_id, u16 value) { - struct intel_crtc *crtc = to_intel_crtc(crtc_state->base.crtc); - struct vlv_wm_state *wm_state = &crtc_state->wm.vlv.optimal; - const struct vlv_fifo_state *fifo_state = - &crtc_state->wm.vlv.fifo_state; - int level; - - for (level = 0; level < wm_state->num_levels; level++) { - struct drm_i915_private *dev_priv = to_i915(crtc->base.dev); - const int sr_fifo_size = - INTEL_INFO(dev_priv)->num_pipes * 512 - 1; - enum plane_id plane_id; + struct drm_i915_private *dev_priv = to_i915(crtc_state->base.crtc->dev); + int num_levels = vlv_num_wm_levels(dev_priv); - wm_state->sr[level].plane = - vlv_invert_wm_value(wm_state->sr[level].plane, - sr_fifo_size); - wm_state->sr[level].cursor = - vlv_invert_wm_value(wm_state->sr[level].cursor, - 63); + for (; level < num_levels; level++) { + struct vlv_pipe_wm *raw = &crtc_state->wm.vlv.raw[level]; - for_each_plane_id_on_crtc(crtc, plane_id) { - wm_state->wm[level].plane[plane_id] = - vlv_invert_wm_value(wm_state->wm[level].plane[plane_id], - fifo_state->plane[plane_id]); - } + raw->plane[plane_id] = value; } } -static void vlv_compute_wm(struct intel_crtc_state *crtc_state) +static void vlv_plane_wm_compute(struct intel_crtc_state *crtc_state, + const struct intel_plane_state *plane_state) { - struct intel_crtc *crtc = to_intel_crtc(crtc_state->base.crtc); - struct drm_i915_private *dev_priv = to_i915(crtc->base.dev); - struct vlv_wm_state *wm_state = &crtc_state->wm.vlv.optimal; - struct intel_plane *plane; + struct intel_plane *plane = to_intel_plane(plane_state->base.plane); + enum plane_id plane_id = plane->id; + int num_levels = vlv_num_wm_levels(to_i915(plane->base.dev)); int level; - memset(wm_state, 0, sizeof(*wm_state)); - memset(&crtc_state->wm.vlv.raw, 0, sizeof(crtc_state->wm.vlv.raw)); + if (!plane_state->base.visible) { + vlv_raw_plane_wm_set(crtc_state, 0, plane_id, 0); + return; + } - wm_state->cxsr = crtc->pipe != PIPE_C && crtc->wm.cxsr_allowed; - wm_state->num_levels = dev_priv->wm.max_level + 1; + for (level = 0; level < num_levels; level++) { + struct vlv_pipe_wm *raw = &crtc_state->wm.vlv.raw[level]; + int wm = vlv_compute_wm_level(crtc_state, plane_state, level); + int max_wm = plane_id == PLANE_CURSOR ? 63 : 511; - wm_state->num_active_planes = 0; + /* FIXME just bail */ + if (WARN_ON(level == 0 && wm > max_wm)) + wm = max_wm; - if (wm_state->num_active_planes != 1) - wm_state->cxsr = false; + if (wm > max_wm) + break; - for_each_intel_plane_on_crtc(&dev_priv->drm, crtc, plane) { - struct intel_plane_state *state = - to_intel_plane_state(plane->base.state); + raw->plane[plane_id] = wm; + } - if (!state->base.visible) - continue; + /* mark all higher levels as invalid */ + vlv_raw_plane_wm_set(crtc_state, level, plane_id, USHRT_MAX); - for (level = 0; level < wm_state->num_levels; level++) { - struct vlv_pipe_wm *raw = &crtc_state->wm.vlv.raw[level]; - int wm = vlv_compute_wm_level(crtc_state, state, level); - int max_wm = plane->id == PLANE_CURSOR ? 63 : 511; + DRM_DEBUG_KMS("%s wms: [0]=%d,[1]=%d,[2]=%d\n", + plane->base.name, + crtc_state->wm.vlv.raw[VLV_WM_LEVEL_PM2].plane[plane_id], + crtc_state->wm.vlv.raw[VLV_WM_LEVEL_PM5].plane[plane_id], + crtc_state->wm.vlv.raw[VLV_WM_LEVEL_DDR_DVFS].plane[plane_id]); +} - /* hack */ - if (WARN_ON(level == 0 && wm > max_wm)) - wm = max_wm; +static bool vlv_plane_wm_is_valid(const struct intel_crtc_state *crtc_state, + enum plane_id plane_id, int level) +{ + const struct vlv_pipe_wm *raw = + &crtc_state->wm.vlv.raw[level]; + const struct vlv_fifo_state *fifo_state = + &crtc_state->wm.vlv.fifo_state; - if (wm > max_wm) - break; + return raw->plane[plane_id] <= fifo_state->plane[plane_id]; +} - raw->plane[plane->id] = wm; - } +static bool vlv_crtc_wm_is_valid(const struct intel_crtc_state *crtc_state, int level) +{ + return vlv_plane_wm_is_valid(crtc_state, PLANE_PRIMARY, level) && + vlv_plane_wm_is_valid(crtc_state, PLANE_SPRITE0, level) && + vlv_plane_wm_is_valid(crtc_state, PLANE_SPRITE1, level) && + vlv_plane_wm_is_valid(crtc_state, PLANE_CURSOR, level); +} + +static int vlv_compute_pipe_wm(struct intel_crtc_state *crtc_state) +{ + struct intel_crtc *crtc = to_intel_crtc(crtc_state->base.crtc); + struct drm_i915_private *dev_priv = to_i915(crtc->base.dev); + struct intel_atomic_state *state = + to_intel_atomic_state(crtc_state->base.state); + struct vlv_wm_state *wm_state = &crtc_state->wm.vlv.optimal; + const struct vlv_fifo_state *fifo_state = + &crtc_state->wm.vlv.fifo_state; + int num_active_planes = hweight32(crtc_state->active_planes & + ~BIT(PLANE_CURSOR)); + struct intel_plane_state *plane_state; + struct intel_plane *plane; + enum plane_id plane_id; + int level, ret, i; + + for_each_intel_plane_in_state(state, plane, plane_state, i) { + const struct intel_plane_state *old_plane_state = + to_intel_plane_state(plane->base.state); + + if (plane_state->base.crtc != &crtc->base && + old_plane_state->base.crtc != &crtc->base) + continue; - wm_state->num_levels = level; + vlv_plane_wm_compute(crtc_state, plane_state); } - vlv_compute_fifo(crtc_state); + /* initially allow all levels */ + wm_state->num_levels = vlv_num_wm_levels(dev_priv); + /* + * Note that enabling cxsr with no primary/sprite planes + * enabled can wedge the pipe. Hence we only allow cxsr + * with exactly one enabled primary/sprite plane. + */ + wm_state->cxsr = crtc->pipe != PIPE_C && + crtc->wm.cxsr_allowed && num_active_planes == 1; + + ret = vlv_compute_fifo(crtc_state); + if (ret) + return ret; for (level = 0; level < wm_state->num_levels; level++) { - struct vlv_pipe_wm *raw = &crtc_state->wm.vlv.raw[level]; + const struct vlv_pipe_wm *raw = &crtc_state->wm.vlv.raw[level]; + const int sr_fifo_size = INTEL_INFO(dev_priv)->num_pipes * 512 - 1; - wm_state->wm[level] = *raw; + if (!vlv_crtc_wm_is_valid(crtc_state, level)) + break; - wm_state->sr[level].plane = max3(raw->plane[PLANE_PRIMARY], + for_each_plane_id_on_crtc(crtc, plane_id) { + wm_state->wm[level].plane[plane_id] = + vlv_invert_wm_value(raw->plane[plane_id], + fifo_state->plane[plane_id]); + } + + wm_state->sr[level].plane = + vlv_invert_wm_value(max3(raw->plane[PLANE_PRIMARY], raw->plane[PLANE_SPRITE0], - raw->plane[PLANE_SPRITE1]); - wm_state->sr[level].cursor = raw->plane[PLANE_CURSOR]; - } + raw->plane[PLANE_SPRITE1]), + sr_fifo_size); - /* clear any (partially) filled invalid levels */ - for (level = wm_state->num_levels; level < dev_priv->wm.max_level + 1; level++) { - memset(&wm_state->wm[level], 0, sizeof(wm_state->wm[level])); - memset(&wm_state->sr[level], 0, sizeof(wm_state->sr[level])); + wm_state->sr[level].cursor = + vlv_invert_wm_value(raw->plane[PLANE_CURSOR], + 63); } - vlv_invert_wms(crtc_state); + if (level == 0) + return -EINVAL; + + /* limit to only levels we can actually handle */ + wm_state->num_levels = level; + + /* invalidate the higher levels */ + vlv_invalidate_wms(crtc, wm_state, level); + + return 0; } #define VLV_FIFO(plane, value) \ (((value) << DSPARB_ ## plane ## _SHIFT_VLV) & DSPARB_ ## plane ## _MASK_VLV) -static void vlv_pipe_set_fifo_size(const struct intel_crtc_state *crtc_state) +static void vlv_atomic_update_fifo(struct intel_atomic_state *state, + struct intel_crtc_state *crtc_state) { struct intel_crtc *crtc = to_intel_crtc(crtc_state->base.crtc); struct drm_i915_private *dev_priv = to_i915(crtc->base.dev); @@ -1215,10 +1294,6 @@ static void vlv_pipe_set_fifo_size(const struct intel_crtc_state *crtc_state) WARN_ON(fifo_state->plane[PLANE_CURSOR] != 63); WARN_ON(fifo_size != 511); - DRM_DEBUG_KMS("Pipe %c FIFO split %d / %d / %d\n", - pipe_name(crtc->pipe), sprite0_start, - sprite1_start, fifo_size); - spin_lock(&dev_priv->wm.dsparb_lock); switch (crtc->pipe) { @@ -1317,11 +1392,8 @@ static void vlv_merge_wm(struct drm_i915_private *dev_priv, const struct vlv_wm_state *wm_state = &crtc->wm.active.vlv; enum pipe pipe = crtc->pipe; - if (!crtc->active) - continue; - wm->pipe[pipe] = wm_state->wm[wm->level]; - if (wm->cxsr) + if (crtc->active && wm->cxsr) wm->sr = wm_state->sr[wm->level]; wm->ddl[pipe].plane[PLANE_PRIMARY] = DDL_PRECISION_HIGH | 2; @@ -1341,24 +1413,15 @@ static bool is_enabling(int old, int new, int threshold) return old < threshold && new >= threshold; } -static void vlv_update_wm(struct intel_crtc *crtc) +static void vlv_program_watermarks(struct drm_i915_private *dev_priv) { - struct drm_i915_private *dev_priv = to_i915(crtc->base.dev); - struct intel_crtc_state *crtc_state = - to_intel_crtc_state(crtc->base.state); - enum pipe pipe = crtc->pipe; struct vlv_wm_values *old_wm = &dev_priv->wm.vlv; struct vlv_wm_values new_wm = {}; - vlv_compute_wm(crtc_state); - crtc->wm.active.vlv = crtc_state->wm.vlv.optimal; vlv_merge_wm(dev_priv, &new_wm); - if (memcmp(old_wm, &new_wm, sizeof(new_wm)) == 0) { - /* FIXME should be part of crtc atomic commit */ - vlv_pipe_set_fifo_size(crtc_state); + if (memcmp(old_wm, &new_wm, sizeof(new_wm)) == 0) return; - } if (is_disabling(old_wm->level, new_wm.level, VLV_WM_LEVEL_DDR_DVFS)) chv_set_memory_dvfs(dev_priv, false); @@ -1369,17 +1432,8 @@ static void vlv_update_wm(struct intel_crtc *crtc) if (is_disabling(old_wm->cxsr, new_wm.cxsr, true)) _intel_set_memory_cxsr(dev_priv, false); - /* FIXME should be part of crtc atomic commit */ - vlv_pipe_set_fifo_size(crtc_state); - vlv_write_wm_values(dev_priv, &new_wm); - DRM_DEBUG_KMS("Setting FIFO watermarks - %c: plane=%d, cursor=%d, " - "sprite0=%d, sprite1=%d, SR: plane=%d, cursor=%d level=%d cxsr=%d\n", - pipe_name(pipe), new_wm.pipe[pipe].plane[PLANE_PRIMARY], new_wm.pipe[pipe].plane[PLANE_CURSOR], - new_wm.pipe[pipe].plane[PLANE_SPRITE0], new_wm.pipe[pipe].plane[PLANE_SPRITE1], - new_wm.sr.plane, new_wm.sr.cursor, new_wm.level, new_wm.cxsr); - if (is_enabling(old_wm->cxsr, new_wm.cxsr, true)) _intel_set_memory_cxsr(dev_priv, true); @@ -1392,6 +1446,18 @@ static void vlv_update_wm(struct intel_crtc *crtc) *old_wm = new_wm; } +static void vlv_initial_watermarks(struct intel_atomic_state *state, + struct intel_crtc_state *crtc_state) +{ + struct drm_i915_private *dev_priv = to_i915(crtc_state->base.crtc->dev); + struct intel_crtc *crtc = to_intel_crtc(crtc_state->base.crtc); + + mutex_lock(&dev_priv->wm.wm_mutex); + crtc->wm.active.vlv = crtc_state->wm.vlv.optimal; + vlv_program_watermarks(dev_priv); + mutex_unlock(&dev_priv->wm.wm_mutex); +} + #define single_plane_enabled(mask) is_power_of_2(mask) static void g4x_update_wm(struct intel_crtc *crtc) @@ -4508,14 +4574,10 @@ void vlv_wm_get_hw_state(struct drm_device *dev) struct drm_i915_private *dev_priv = to_i915(dev); struct vlv_wm_values *wm = &dev_priv->wm.vlv; struct intel_crtc *crtc; - enum pipe pipe; u32 val; vlv_read_wm_values(dev_priv, wm); - for_each_intel_crtc(dev, crtc) - vlv_get_fifo_size(to_intel_crtc_state(crtc->base.state)); - wm->cxsr = I915_READ(FW_BLC_SELF_VLV) & FW_CSPWRDWNEN; wm->level = VLV_WM_LEVEL_PM2; @@ -4553,13 +4615,53 @@ void vlv_wm_get_hw_state(struct drm_device *dev) mutex_unlock(&dev_priv->rps.hw_lock); } - for_each_pipe(dev_priv, pipe) + for_each_intel_crtc(dev, crtc) { + struct intel_crtc_state *crtc_state = + to_intel_crtc_state(crtc->base.state); + struct vlv_wm_state *active = &crtc->wm.active.vlv; + const struct vlv_fifo_state *fifo_state = + &crtc_state->wm.vlv.fifo_state; + enum pipe pipe = crtc->pipe; + enum plane_id plane_id; + int level; + + vlv_get_fifo_size(crtc_state); + + active->num_levels = wm->level + 1; + active->cxsr = wm->cxsr; + + /* FIXME sanitize things more */ + for (level = 0; level < active->num_levels; level++) { + struct vlv_pipe_wm *raw = + &crtc_state->wm.vlv.raw[level]; + + active->sr[level].plane = wm->sr.plane; + active->sr[level].cursor = wm->sr.cursor; + + for_each_plane_id_on_crtc(crtc, plane_id) { + active->wm[level].plane[plane_id] = + wm->pipe[pipe].plane[plane_id]; + + raw->plane[plane_id] = + vlv_invert_wm_value(active->wm[level].plane[plane_id], + fifo_state->plane[plane_id]); + } + } + + for_each_plane_id_on_crtc(crtc, plane_id) + vlv_raw_plane_wm_set(crtc_state, level, + plane_id, USHRT_MAX); + vlv_invalidate_wms(crtc, active, level); + + crtc_state->wm.vlv.optimal = *active; + DRM_DEBUG_KMS("Initial watermarks: pipe %c, plane=%d, cursor=%d, sprite0=%d, sprite1=%d\n", pipe_name(pipe), wm->pipe[pipe].plane[PLANE_PRIMARY], wm->pipe[pipe].plane[PLANE_CURSOR], wm->pipe[pipe].plane[PLANE_SPRITE0], wm->pipe[pipe].plane[PLANE_SPRITE1]); + } DRM_DEBUG_KMS("Initial watermarks: SR plane=%d, SR cursor=%d level=%d cxsr=%d\n", wm->sr.plane, wm->sr.cursor, wm->level, wm->cxsr); @@ -7717,7 +7819,9 @@ void intel_init_pm(struct drm_i915_private *dev_priv) } } else if (IS_VALLEYVIEW(dev_priv) || IS_CHERRYVIEW(dev_priv)) { vlv_setup_wm_latency(dev_priv); - dev_priv->display.update_wm = vlv_update_wm; + dev_priv->display.compute_pipe_wm = vlv_compute_pipe_wm; + dev_priv->display.initial_watermarks = vlv_initial_watermarks; + dev_priv->display.atomic_update_watermarks = vlv_atomic_update_fifo; } else if (IS_PINEVIEW(dev_priv)) { if (!intel_get_cxsr_latency(IS_PINEVIEW_G(dev_priv), dev_priv->is_ddr3, -- cgit From 236c48e692458fb55ec96ac1823a176f2bce09f1 Mon Sep 17 00:00:00 2001 From: Ville Syrjälä Date: Thu, 2 Mar 2017 19:14:58 +0200 Subject: drm/i915: Skip useless watermark/FIFO related work on VLV/CHV when not needed MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Check whether anything relevant has actually change when we compute new watermarks for each plane in the state. If the watermarks for no primary/sprite planes changed we don't have to recompute the FIFO split or reprogram the DSBARB registers. And even the cursor watermarks didn't change we can skip the merge+invert step between all the planes on the pipe as well. v2: s/noninverted/raw/ for consistency with other platforms v3: Drop duplicated vlv_get_fifo_size() call during init Signed-off-by: Ville Syrjälä Reviewed-by: Maarten Lankhorst Link: http://patchwork.freedesktop.org/patch/msgid/20170302171508.1666-9-ville.syrjala@linux.intel.com --- drivers/gpu/drm/i915/intel_atomic.c | 1 + drivers/gpu/drm/i915/intel_drv.h | 1 + drivers/gpu/drm/i915/intel_pm.c | 71 +++++++++++++++++++++++++++++-------- 3 files changed, 58 insertions(+), 15 deletions(-) diff --git a/drivers/gpu/drm/i915/intel_atomic.c b/drivers/gpu/drm/i915/intel_atomic.c index e65818e5d2ab..50fb1f76cc5f 100644 --- a/drivers/gpu/drm/i915/intel_atomic.c +++ b/drivers/gpu/drm/i915/intel_atomic.c @@ -99,6 +99,7 @@ intel_crtc_duplicate_state(struct drm_crtc *crtc) crtc_state->update_wm_pre = false; crtc_state->update_wm_post = false; crtc_state->fb_changed = false; + crtc_state->fifo_changed = false; crtc_state->wm.need_postvbl_update = false; crtc_state->fb_bits = 0; diff --git a/drivers/gpu/drm/i915/intel_drv.h b/drivers/gpu/drm/i915/intel_drv.h index 2f23521f0e00..1e05da992aa0 100644 --- a/drivers/gpu/drm/i915/intel_drv.h +++ b/drivers/gpu/drm/i915/intel_drv.h @@ -573,6 +573,7 @@ struct intel_crtc_state { bool disable_cxsr; bool update_wm_pre, update_wm_post; /* watermarks are updated */ bool fb_changed; /* fb on any of the planes is changed */ + bool fifo_changed; /* FIFO split is changed */ /* Pipe source size (ie. panel fitter input size) * All planes will be positioned inside this space, diff --git a/drivers/gpu/drm/i915/intel_pm.c b/drivers/gpu/drm/i915/intel_pm.c index df76c4fd4cf3..39bc4206be7a 100644 --- a/drivers/gpu/drm/i915/intel_pm.c +++ b/drivers/gpu/drm/i915/intel_pm.c @@ -1128,30 +1128,35 @@ static u16 vlv_invert_wm_value(u16 wm, u16 fifo_size) * Starting from 'level' set all higher * levels to 'value' in the "raw" watermarks. */ -static void vlv_raw_plane_wm_set(struct intel_crtc_state *crtc_state, +static bool vlv_raw_plane_wm_set(struct intel_crtc_state *crtc_state, int level, enum plane_id plane_id, u16 value) { struct drm_i915_private *dev_priv = to_i915(crtc_state->base.crtc->dev); int num_levels = vlv_num_wm_levels(dev_priv); + bool dirty = false; for (; level < num_levels; level++) { struct vlv_pipe_wm *raw = &crtc_state->wm.vlv.raw[level]; + dirty |= raw->plane[plane_id] != value; raw->plane[plane_id] = value; } + + return dirty; } -static void vlv_plane_wm_compute(struct intel_crtc_state *crtc_state, +static bool vlv_plane_wm_compute(struct intel_crtc_state *crtc_state, const struct intel_plane_state *plane_state) { struct intel_plane *plane = to_intel_plane(plane_state->base.plane); enum plane_id plane_id = plane->id; int num_levels = vlv_num_wm_levels(to_i915(plane->base.dev)); int level; + bool dirty = false; if (!plane_state->base.visible) { - vlv_raw_plane_wm_set(crtc_state, 0, plane_id, 0); - return; + dirty |= vlv_raw_plane_wm_set(crtc_state, 0, plane_id, 0); + goto out; } for (level = 0; level < num_levels; level++) { @@ -1166,17 +1171,22 @@ static void vlv_plane_wm_compute(struct intel_crtc_state *crtc_state, if (wm > max_wm) break; + dirty |= raw->plane[plane_id] != wm; raw->plane[plane_id] = wm; } /* mark all higher levels as invalid */ - vlv_raw_plane_wm_set(crtc_state, level, plane_id, USHRT_MAX); + dirty |= vlv_raw_plane_wm_set(crtc_state, level, plane_id, USHRT_MAX); - DRM_DEBUG_KMS("%s wms: [0]=%d,[1]=%d,[2]=%d\n", - plane->base.name, - crtc_state->wm.vlv.raw[VLV_WM_LEVEL_PM2].plane[plane_id], - crtc_state->wm.vlv.raw[VLV_WM_LEVEL_PM5].plane[plane_id], - crtc_state->wm.vlv.raw[VLV_WM_LEVEL_DDR_DVFS].plane[plane_id]); +out: + if (dirty) + DRM_DEBUG_KMS("%s wms: [0]=%d,[1]=%d,[2]=%d\n", + plane->base.name, + crtc_state->wm.vlv.raw[VLV_WM_LEVEL_PM2].plane[plane_id], + crtc_state->wm.vlv.raw[VLV_WM_LEVEL_PM5].plane[plane_id], + crtc_state->wm.vlv.raw[VLV_WM_LEVEL_DDR_DVFS].plane[plane_id]); + + return dirty; } static bool vlv_plane_wm_is_valid(const struct intel_crtc_state *crtc_state, @@ -1209,10 +1219,12 @@ static int vlv_compute_pipe_wm(struct intel_crtc_state *crtc_state) &crtc_state->wm.vlv.fifo_state; int num_active_planes = hweight32(crtc_state->active_planes & ~BIT(PLANE_CURSOR)); + bool needs_modeset = drm_atomic_crtc_needs_modeset(&crtc_state->base); struct intel_plane_state *plane_state; struct intel_plane *plane; enum plane_id plane_id; int level, ret, i; + unsigned int dirty = 0; for_each_intel_plane_in_state(state, plane, plane_state, i) { const struct intel_plane_state *old_plane_state = @@ -1222,7 +1234,37 @@ static int vlv_compute_pipe_wm(struct intel_crtc_state *crtc_state) old_plane_state->base.crtc != &crtc->base) continue; - vlv_plane_wm_compute(crtc_state, plane_state); + if (vlv_plane_wm_compute(crtc_state, plane_state)) + dirty |= BIT(plane->id); + } + + /* + * DSPARB registers may have been reset due to the + * power well being turned off. Make sure we restore + * them to a consistent state even if no primary/sprite + * planes are initially active. + */ + if (needs_modeset) + crtc_state->fifo_changed = true; + + if (!dirty) + return 0; + + /* cursor changes don't warrant a FIFO recompute */ + if (dirty & ~BIT(PLANE_CURSOR)) { + const struct intel_crtc_state *old_crtc_state = + to_intel_crtc_state(crtc->base.state); + const struct vlv_fifo_state *old_fifo_state = + &old_crtc_state->wm.vlv.fifo_state; + + ret = vlv_compute_fifo(crtc_state); + if (ret) + return ret; + + if (needs_modeset || + memcmp(old_fifo_state, fifo_state, + sizeof(*fifo_state)) != 0) + crtc_state->fifo_changed = true; } /* initially allow all levels */ @@ -1235,10 +1277,6 @@ static int vlv_compute_pipe_wm(struct intel_crtc_state *crtc_state) wm_state->cxsr = crtc->pipe != PIPE_C && crtc->wm.cxsr_allowed && num_active_planes == 1; - ret = vlv_compute_fifo(crtc_state); - if (ret) - return ret; - for (level = 0; level < wm_state->num_levels; level++) { const struct vlv_pipe_wm *raw = &crtc_state->wm.vlv.raw[level]; const int sr_fifo_size = INTEL_INFO(dev_priv)->num_pipes * 512 - 1; @@ -1287,6 +1325,9 @@ static void vlv_atomic_update_fifo(struct intel_atomic_state *state, &crtc_state->wm.vlv.fifo_state; int sprite0_start, sprite1_start, fifo_size; + if (!crtc_state->fifo_changed) + return; + sprite0_start = fifo_state->plane[PLANE_PRIMARY]; sprite1_start = fifo_state->plane[PLANE_SPRITE0] + sprite0_start; fifo_size = fifo_state->plane[PLANE_SPRITE1] + sprite1_start; -- cgit From 4841da51a753ca65909441e561236de82b6b48a5 Mon Sep 17 00:00:00 2001 From: Ville Syrjälä Date: Thu, 2 Mar 2017 19:14:59 +0200 Subject: drm/i915: Compute proper intermediate wms for vlv/cvh MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Since the watermark registers arent double buffered on VLV/CHV, we'll need to play around with intermediate watermarks same was as we do on ILK-BDW. The watermark registers on VLV/CHV contain inverted values, so to find the intermediate watermark value we just take the minimum of the active and optimal values. This also means that, unlike ILK-BDW, there's no chance that we'd fail to find a working intermediate watermarks. As long as both the active and optimal watermarks are valid the intermediate watermarks will come out valid as well. Signed-off-by: Ville Syrjälä Reviewed-by: Maarten Lankhorst Link: http://patchwork.freedesktop.org/patch/msgid/20170302171508.1666-10-ville.syrjala@linux.intel.com --- drivers/gpu/drm/i915/intel_drv.h | 2 ++ drivers/gpu/drm/i915/intel_pm.c | 59 +++++++++++++++++++++++++++++++++++++++- 2 files changed, 60 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/i915/intel_drv.h b/drivers/gpu/drm/i915/intel_drv.h index 1e05da992aa0..9bab6124cde8 100644 --- a/drivers/gpu/drm/i915/intel_drv.h +++ b/drivers/gpu/drm/i915/intel_drv.h @@ -538,6 +538,8 @@ struct intel_crtc_wm_state { struct { /* "raw" watermarks (not inverted) */ struct vlv_pipe_wm raw[NUM_VLV_WM_LEVELS]; + /* intermediate watermarks (inverted) */ + struct vlv_wm_state intermediate; /* optimal watermarks (inverted) */ struct vlv_wm_state optimal; /* display FIFO split */ diff --git a/drivers/gpu/drm/i915/intel_pm.c b/drivers/gpu/drm/i915/intel_pm.c index 39bc4206be7a..824d5b0806b9 100644 --- a/drivers/gpu/drm/i915/intel_pm.c +++ b/drivers/gpu/drm/i915/intel_pm.c @@ -1401,6 +1401,45 @@ static void vlv_atomic_update_fifo(struct intel_atomic_state *state, #undef VLV_FIFO +static int vlv_compute_intermediate_wm(struct drm_device *dev, + struct intel_crtc *crtc, + struct intel_crtc_state *crtc_state) +{ + struct vlv_wm_state *intermediate = &crtc_state->wm.vlv.intermediate; + const struct vlv_wm_state *optimal = &crtc_state->wm.vlv.optimal; + const struct vlv_wm_state *active = &crtc->wm.active.vlv; + int level; + + intermediate->num_levels = min(optimal->num_levels, active->num_levels); + intermediate->cxsr = optimal->cxsr & active->cxsr; + + for (level = 0; level < intermediate->num_levels; level++) { + enum plane_id plane_id; + + for_each_plane_id_on_crtc(crtc, plane_id) { + intermediate->wm[level].plane[plane_id] = + min(optimal->wm[level].plane[plane_id], + active->wm[level].plane[plane_id]); + } + + intermediate->sr[level].plane = min(optimal->sr[level].plane, + active->sr[level].plane); + intermediate->sr[level].cursor = min(optimal->sr[level].cursor, + active->sr[level].cursor); + } + + vlv_invalidate_wms(crtc, intermediate, level); + + /* + * If our intermediate WM are identical to the final WM, then we can + * omit the post-vblank programming; only update if it's different. + */ + if (memcmp(intermediate, optimal, sizeof(*intermediate)) == 0) + crtc_state->wm.need_postvbl_update = false; + + return 0; +} + static void vlv_merge_wm(struct drm_i915_private *dev_priv, struct vlv_wm_values *wm) { @@ -1494,7 +1533,22 @@ static void vlv_initial_watermarks(struct intel_atomic_state *state, struct intel_crtc *crtc = to_intel_crtc(crtc_state->base.crtc); mutex_lock(&dev_priv->wm.wm_mutex); - crtc->wm.active.vlv = crtc_state->wm.vlv.optimal; + crtc->wm.active.vlv = crtc_state->wm.vlv.intermediate; + vlv_program_watermarks(dev_priv); + mutex_unlock(&dev_priv->wm.wm_mutex); +} + +static void vlv_optimize_watermarks(struct intel_atomic_state *state, + struct intel_crtc_state *crtc_state) +{ + struct drm_i915_private *dev_priv = to_i915(crtc_state->base.crtc->dev); + struct intel_crtc *intel_crtc = to_intel_crtc(crtc_state->base.crtc); + + if (!crtc_state->wm.need_postvbl_update) + return; + + mutex_lock(&dev_priv->wm.wm_mutex); + intel_crtc->wm.active.vlv = crtc_state->wm.vlv.optimal; vlv_program_watermarks(dev_priv); mutex_unlock(&dev_priv->wm.wm_mutex); } @@ -4695,6 +4749,7 @@ void vlv_wm_get_hw_state(struct drm_device *dev) vlv_invalidate_wms(crtc, active, level); crtc_state->wm.vlv.optimal = *active; + crtc_state->wm.vlv.intermediate = *active; DRM_DEBUG_KMS("Initial watermarks: pipe %c, plane=%d, cursor=%d, sprite0=%d, sprite1=%d\n", pipe_name(pipe), @@ -7861,7 +7916,9 @@ void intel_init_pm(struct drm_i915_private *dev_priv) } else if (IS_VALLEYVIEW(dev_priv) || IS_CHERRYVIEW(dev_priv)) { vlv_setup_wm_latency(dev_priv); dev_priv->display.compute_pipe_wm = vlv_compute_pipe_wm; + dev_priv->display.compute_intermediate_wm = vlv_compute_intermediate_wm; dev_priv->display.initial_watermarks = vlv_initial_watermarks; + dev_priv->display.optimize_watermarks = vlv_optimize_watermarks; dev_priv->display.atomic_update_watermarks = vlv_atomic_update_fifo; } else if (IS_PINEVIEW(dev_priv)) { if (!intel_get_cxsr_latency(IS_PINEVIEW_G(dev_priv), -- cgit From 5eeb798bbe398bbbcb09d7e6851825cb584b5bf8 Mon Sep 17 00:00:00 2001 From: Ville Syrjälä Date: Thu, 2 Mar 2017 19:15:00 +0200 Subject: drm/i915: Nuke crtc->wm.cxsr_allowed MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Remove crtc->wm.cxsr_allowed and just rely on crtc_state->disable_cxsr instead. This was used only by vlv/chv to indicate whether to enable cxsr in the wm computation. That doesn't really work anymore, and as far as the optimal watermarks go we'll just consider the number of planes and the current pipe, and for the intermediate watermarks we'll also start to consider disable_cxsr which is set appropriately when planes are being enabled/disabled. We'll also flip over the crtc_state->wm.need_postvbl_update setup so that it's the wm code that will set it. Previously the generic code set it up, and then the wm code cleared it again if it thought it's not needed after all. Signed-off-by: Ville Syrjälä Reviewed-by: Maarten Lankhorst Link: http://patchwork.freedesktop.org/patch/msgid/20170302171508.1666-11-ville.syrjala@linux.intel.com --- drivers/gpu/drm/i915/intel_display.c | 44 +++++++++++------------------------- drivers/gpu/drm/i915/intel_drv.h | 3 --- drivers/gpu/drm/i915/intel_pm.c | 14 ++++++------ 3 files changed, 20 insertions(+), 41 deletions(-) diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c index 45a7c7e18e3d..83f30560217d 100644 --- a/drivers/gpu/drm/i915/intel_display.c +++ b/drivers/gpu/drm/i915/intel_display.c @@ -4997,8 +4997,6 @@ static void intel_post_plane_update(struct intel_crtc_state *old_crtc_state) intel_frontbuffer_flip(to_i915(crtc->base.dev), pipe_config->fb_bits); - crtc->wm.cxsr_allowed = true; - if (pipe_config->update_wm_post && pipe_config->base.active) intel_update_watermarks(crtc); @@ -5045,22 +5043,18 @@ static void intel_pre_plane_update(struct intel_crtc_state *old_crtc_state) intel_pre_disable_primary(&crtc->base); } - if (pipe_config->disable_cxsr && HAS_GMCH_DISPLAY(dev_priv)) { - crtc->wm.cxsr_allowed = false; - - /* - * Vblank time updates from the shadow to live plane control register - * are blocked if the memory self-refresh mode is active at that - * moment. So to make sure the plane gets truly disabled, disable - * first the self-refresh mode. The self-refresh enable bit in turn - * will be checked/applied by the HW only at the next frame start - * event which is after the vblank start event, so we need to have a - * wait-for-vblank between disabling the plane and the pipe. - */ - if (old_crtc_state->base.active && - intel_set_memory_cxsr(dev_priv, false)) - intel_wait_for_vblank(dev_priv, crtc->pipe); - } + /* + * Vblank time updates from the shadow to live plane control register + * are blocked if the memory self-refresh mode is active at that + * moment. So to make sure the plane gets truly disabled, disable + * first the self-refresh mode. The self-refresh enable bit in turn + * will be checked/applied by the HW only at the next frame start + * event which is after the vblank start event, so we need to have a + * wait-for-vblank between disabling the plane and the pipe. + */ + if (HAS_GMCH_DISPLAY(dev_priv) && old_crtc_state->base.active && + pipe_config->disable_cxsr && intel_set_memory_cxsr(dev_priv, false)) + intel_wait_for_vblank(dev_priv, crtc->pipe); /* * IVB workaround: must disable low power watermarks for at least @@ -10868,11 +10862,6 @@ int intel_plane_atomic_calc_changes(struct drm_crtc_state *crtc_state, pipe_config->update_wm_post = true; } - /* Pre-gen9 platforms need two-step watermark updates */ - if ((pipe_config->update_wm_pre || pipe_config->update_wm_post) && - INTEL_GEN(dev_priv) < 9 && dev_priv->display.optimize_watermarks) - to_intel_crtc_state(crtc_state)->wm.need_postvbl_update = true; - if (visible || was_visible) pipe_config->fb_bits |= plane->frontbuffer_bit; @@ -12616,12 +12605,7 @@ static bool needs_vblank_wait(struct intel_crtc_state *crtc_state) if (crtc_state->update_wm_post) return true; - /* - * cxsr is re-enabled after vblank. - * This is already handled by crtc_state->update_wm_post, - * but added for clarity. - */ - if (crtc_state->disable_cxsr) + if (crtc_state->wm.need_postvbl_update) return true; return false; @@ -13890,8 +13874,6 @@ static int intel_crtc_init(struct drm_i915_private *dev_priv, enum pipe pipe) intel_crtc->cursor_cntl = ~0; intel_crtc->cursor_size = ~0; - intel_crtc->wm.cxsr_allowed = true; - /* initialize shared scalers */ intel_crtc_init_scalers(intel_crtc, crtc_state); diff --git a/drivers/gpu/drm/i915/intel_drv.h b/drivers/gpu/drm/i915/intel_drv.h index 9bab6124cde8..f503313f6e65 100644 --- a/drivers/gpu/drm/i915/intel_drv.h +++ b/drivers/gpu/drm/i915/intel_drv.h @@ -777,9 +777,6 @@ struct intel_crtc { struct intel_pipe_wm ilk; struct vlv_wm_state vlv; } active; - - /* allow CxSR on this pipe */ - bool cxsr_allowed; } wm; int scanline_offset; diff --git a/drivers/gpu/drm/i915/intel_pm.c b/drivers/gpu/drm/i915/intel_pm.c index 824d5b0806b9..af7fb532d806 100644 --- a/drivers/gpu/drm/i915/intel_pm.c +++ b/drivers/gpu/drm/i915/intel_pm.c @@ -1274,8 +1274,7 @@ static int vlv_compute_pipe_wm(struct intel_crtc_state *crtc_state) * enabled can wedge the pipe. Hence we only allow cxsr * with exactly one enabled primary/sprite plane. */ - wm_state->cxsr = crtc->pipe != PIPE_C && - crtc->wm.cxsr_allowed && num_active_planes == 1; + wm_state->cxsr = crtc->pipe != PIPE_C && num_active_planes == 1; for (level = 0; level < wm_state->num_levels; level++) { const struct vlv_pipe_wm *raw = &crtc_state->wm.vlv.raw[level]; @@ -1411,7 +1410,8 @@ static int vlv_compute_intermediate_wm(struct drm_device *dev, int level; intermediate->num_levels = min(optimal->num_levels, active->num_levels); - intermediate->cxsr = optimal->cxsr & active->cxsr; + intermediate->cxsr = optimal->cxsr && active->cxsr && + !crtc_state->disable_cxsr; for (level = 0; level < intermediate->num_levels; level++) { enum plane_id plane_id; @@ -1434,8 +1434,8 @@ static int vlv_compute_intermediate_wm(struct drm_device *dev, * If our intermediate WM are identical to the final WM, then we can * omit the post-vblank programming; only update if it's different. */ - if (memcmp(intermediate, optimal, sizeof(*intermediate)) == 0) - crtc_state->wm.need_postvbl_update = false; + if (memcmp(intermediate, optimal, sizeof(*intermediate)) != 0) + crtc_state->wm.need_postvbl_update = true; return 0; } @@ -2628,8 +2628,8 @@ static int ilk_compute_intermediate_wm(struct drm_device *dev, * If our intermediate WM are identical to the final WM, then we can * omit the post-vblank programming; only update if it's different. */ - if (memcmp(a, &newstate->wm.ilk.optimal, sizeof(*a)) == 0) - newstate->wm.need_postvbl_update = false; + if (memcmp(a, &newstate->wm.ilk.optimal, sizeof(*a)) != 0) + newstate->wm.need_postvbl_update = true; return 0; } -- cgit From b4ede6dfa0c5a952161bac6c5f840469a13386ce Mon Sep 17 00:00:00 2001 From: Ville Syrjälä Date: Thu, 2 Mar 2017 19:15:01 +0200 Subject: drm/i915: Only use update_wm_{pre,post} for pre-ilk platforms MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Now that vlv/chv have more proper wm programming support, let's reduce the the update_wm_{pre,post} flags to only cover the pre-ilk platforms. When we finally convert those as well we can drop these flags entirely. Signed-off-by: Ville Syrjälä Reviewed-by: Maarten Lankhorst Link: http://patchwork.freedesktop.org/patch/msgid/20170302171508.1666-12-ville.syrjala@linux.intel.com --- drivers/gpu/drm/i915/intel_display.c | 14 +++++++++----- 1 file changed, 9 insertions(+), 5 deletions(-) diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c index 83f30560217d..089f7e86c706 100644 --- a/drivers/gpu/drm/i915/intel_display.c +++ b/drivers/gpu/drm/i915/intel_display.c @@ -10845,21 +10845,25 @@ int intel_plane_atomic_calc_changes(struct drm_crtc_state *crtc_state, turn_off, turn_on, mode_changed); if (turn_on) { - pipe_config->update_wm_pre = true; + if (INTEL_GEN(dev_priv) < 5) + pipe_config->update_wm_pre = true; /* must disable cxsr around plane enable/disable */ if (plane->id != PLANE_CURSOR) pipe_config->disable_cxsr = true; } else if (turn_off) { - pipe_config->update_wm_post = true; + if (INTEL_GEN(dev_priv) < 5) + pipe_config->update_wm_post = true; /* must disable cxsr around plane enable/disable */ if (plane->id != PLANE_CURSOR) pipe_config->disable_cxsr = true; } else if (intel_wm_need_update(&plane->base, plane_state)) { - /* FIXME bollocks */ - pipe_config->update_wm_pre = true; - pipe_config->update_wm_post = true; + if (INTEL_GEN(dev_priv) < 5) { + /* FIXME bollocks */ + pipe_config->update_wm_pre = true; + pipe_config->update_wm_post = true; + } } if (visible || was_visible) -- cgit From 602ae835505603eca8b50d813e00a56f56158434 Mon Sep 17 00:00:00 2001 From: Ville Syrjälä Date: Thu, 2 Mar 2017 19:15:02 +0200 Subject: drm/i915: Sanitize VLV/CHV watermarks properly MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Clear out the watermark for all disabled planes to 0. This is required to avoid falsely thinking that the inherited watermarks are bogus in case the watermark is actually higher than the FIFO size. v2: s/noninverted/raw/ for consistency with other platforms Signed-off-by: Ville Syrjälä Reviewed-by: Maarten Lankhorst Link: http://patchwork.freedesktop.org/patch/msgid/20170302171508.1666-13-ville.syrjala@linux.intel.com --- drivers/gpu/drm/i915/intel_display.c | 17 +++++++----- drivers/gpu/drm/i915/intel_drv.h | 1 + drivers/gpu/drm/i915/intel_pm.c | 50 +++++++++++++++++++++++++++++++++++- 3 files changed, 61 insertions(+), 7 deletions(-) diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c index 089f7e86c706..1c22f8639b28 100644 --- a/drivers/gpu/drm/i915/intel_display.c +++ b/drivers/gpu/drm/i915/intel_display.c @@ -3478,7 +3478,8 @@ __intel_display_resume(struct drm_device *dev, } /* ignore any reset values/BIOS leftovers in the WM registers */ - to_intel_atomic_state(state)->skip_intermediate_wm = true; + if (!HAS_GMCH_DISPLAY(to_i915(dev))) + to_intel_atomic_state(state)->skip_intermediate_wm = true; ret = drm_atomic_commit(state); @@ -14877,7 +14878,8 @@ retry: * intermediate watermarks (since we don't trust the current * watermarks). */ - intel_state->skip_intermediate_wm = true; + if (!HAS_GMCH_DISPLAY(dev_priv)) + intel_state->skip_intermediate_wm = true; ret = intel_atomic_check(dev, state); if (ret) { @@ -15040,7 +15042,8 @@ int intel_modeset_init(struct drm_device *dev) * Note that we need to do this after reconstructing the BIOS fb's * since the watermark calculation done here will use pstate->fb. */ - sanitize_watermarks(dev); + if (!HAS_GMCH_DISPLAY(dev_priv)) + sanitize_watermarks(dev); return 0; } @@ -15511,12 +15514,14 @@ intel_modeset_setup_hw_state(struct drm_device *dev) pll->on = false; } - if (IS_VALLEYVIEW(dev_priv) || IS_CHERRYVIEW(dev_priv)) + if (IS_VALLEYVIEW(dev_priv) || IS_CHERRYVIEW(dev_priv)) { vlv_wm_get_hw_state(dev); - else if (IS_GEN9(dev_priv)) + vlv_wm_sanitize(dev_priv); + } else if (IS_GEN9(dev_priv)) { skl_wm_get_hw_state(dev); - else if (HAS_PCH_SPLIT(dev_priv)) + } else if (HAS_PCH_SPLIT(dev_priv)) { ilk_wm_get_hw_state(dev); + } for_each_intel_crtc(dev, crtc) { u64 put_domains; diff --git a/drivers/gpu/drm/i915/intel_drv.h b/drivers/gpu/drm/i915/intel_drv.h index f503313f6e65..3ef044efe98c 100644 --- a/drivers/gpu/drm/i915/intel_drv.h +++ b/drivers/gpu/drm/i915/intel_drv.h @@ -1824,6 +1824,7 @@ void skl_ddb_get_hw_state(struct drm_i915_private *dev_priv, struct skl_ddb_allocation *ddb /* out */); void skl_pipe_wm_get_hw_state(struct drm_crtc *crtc, struct skl_pipe_wm *out); +void vlv_wm_sanitize(struct drm_i915_private *dev_priv); bool intel_can_enable_sagv(struct drm_atomic_state *state); int intel_enable_sagv(struct drm_i915_private *dev_priv); int intel_disable_sagv(struct drm_i915_private *dev_priv); diff --git a/drivers/gpu/drm/i915/intel_pm.c b/drivers/gpu/drm/i915/intel_pm.c index af7fb532d806..7caed0006ee7 100644 --- a/drivers/gpu/drm/i915/intel_pm.c +++ b/drivers/gpu/drm/i915/intel_pm.c @@ -4725,7 +4725,6 @@ void vlv_wm_get_hw_state(struct drm_device *dev) active->num_levels = wm->level + 1; active->cxsr = wm->cxsr; - /* FIXME sanitize things more */ for (level = 0; level < active->num_levels; level++) { struct vlv_pipe_wm *raw = &crtc_state->wm.vlv.raw[level]; @@ -4763,6 +4762,55 @@ void vlv_wm_get_hw_state(struct drm_device *dev) wm->sr.plane, wm->sr.cursor, wm->level, wm->cxsr); } +void vlv_wm_sanitize(struct drm_i915_private *dev_priv) +{ + struct intel_plane *plane; + struct intel_crtc *crtc; + + mutex_lock(&dev_priv->wm.wm_mutex); + + for_each_intel_plane(&dev_priv->drm, plane) { + struct intel_crtc *crtc = + intel_get_crtc_for_pipe(dev_priv, plane->pipe); + struct intel_crtc_state *crtc_state = + to_intel_crtc_state(crtc->base.state); + struct intel_plane_state *plane_state = + to_intel_plane_state(plane->base.state); + struct vlv_wm_state *wm_state = &crtc_state->wm.vlv.optimal; + const struct vlv_fifo_state *fifo_state = + &crtc_state->wm.vlv.fifo_state; + enum plane_id plane_id = plane->id; + int level; + + if (plane_state->base.visible) + continue; + + for (level = 0; level < wm_state->num_levels; level++) { + struct vlv_pipe_wm *raw = + &crtc_state->wm.vlv.raw[level]; + + raw->plane[plane_id] = 0; + + wm_state->wm[level].plane[plane_id] = + vlv_invert_wm_value(raw->plane[plane_id], + fifo_state->plane[plane_id]); + } + } + + for_each_intel_crtc(&dev_priv->drm, crtc) { + struct intel_crtc_state *crtc_state = + to_intel_crtc_state(crtc->base.state); + + crtc_state->wm.vlv.intermediate = + crtc_state->wm.vlv.optimal; + crtc->wm.active.vlv = crtc_state->wm.vlv.optimal; + } + + vlv_program_watermarks(dev_priv); + + mutex_unlock(&dev_priv->wm.wm_mutex); +} + void ilk_wm_get_hw_state(struct drm_device *dev) { struct drm_i915_private *dev_priv = to_i915(dev); -- cgit From 1a10ae6ba8c3df4af9bcdb6f40348af71e72bd13 Mon Sep 17 00:00:00 2001 From: Ville Syrjälä Date: Thu, 2 Mar 2017 19:15:03 +0200 Subject: drm/i915: Workaround VLV/CHV sprite1->sprite0 enable underrun MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit On VLV/CHV enabling sprite0 when sprite1 has already been enabled may lead to an underrun. This only happens when sprite0 FIFO size is zero prior to enabling it. Hence an effective workaround is to always allocate at least one cacheline for sprite0 when sprite1 is active. I've not observed this sort of failure during any other type of plane enable/disable sequence. v2: s/noninverted/raw/ for consistency with other platforms Testcase: igt/kms_plane_blinker Signed-off-by: Ville Syrjälä Reviewed-by: Maarten Lankhorst Link: http://patchwork.freedesktop.org/patch/msgid/20170302171508.1666-14-ville.syrjala@linux.intel.com --- drivers/gpu/drm/i915/intel_pm.c | 24 +++++++++++++++++++++++- 1 file changed, 23 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/i915/intel_pm.c b/drivers/gpu/drm/i915/intel_pm.c index 7caed0006ee7..6e2acfd687a1 100644 --- a/drivers/gpu/drm/i915/intel_pm.c +++ b/drivers/gpu/drm/i915/intel_pm.c @@ -1028,6 +1028,12 @@ static uint16_t vlv_compute_wm_level(const struct intel_crtc_state *crtc_state, return min_t(int, wm, USHRT_MAX); } +static bool vlv_need_sprite0_fifo_workaround(unsigned int active_planes) +{ + return (active_planes & (BIT(PLANE_SPRITE0) | + BIT(PLANE_SPRITE1))) == BIT(PLANE_SPRITE1); +} + static int vlv_compute_fifo(struct intel_crtc_state *crtc_state) { struct intel_crtc *crtc = to_intel_crtc(crtc_state->base.crtc); @@ -1038,12 +1044,25 @@ static int vlv_compute_fifo(struct intel_crtc_state *crtc_state) int num_active_planes = hweight32(active_planes); const int fifo_size = 511; int fifo_extra, fifo_left = fifo_size; + int sprite0_fifo_extra = 0; unsigned int total_rate; enum plane_id plane_id; + /* + * When enabling sprite0 after sprite1 has already been enabled + * we tend to get an underrun unless sprite0 already has some + * FIFO space allcoated. Hence we always allocate at least one + * cacheline for sprite0 whenever sprite1 is enabled. + * + * All other plane enable sequences appear immune to this problem. + */ + if (vlv_need_sprite0_fifo_workaround(active_planes)) + sprite0_fifo_extra = 1; + total_rate = raw->plane[PLANE_PRIMARY] + raw->plane[PLANE_SPRITE0] + - raw->plane[PLANE_SPRITE1]; + raw->plane[PLANE_SPRITE1] + + sprite0_fifo_extra; if (total_rate > fifo_size) return -EINVAL; @@ -1064,6 +1083,9 @@ static int vlv_compute_fifo(struct intel_crtc_state *crtc_state) fifo_left -= fifo_state->plane[plane_id]; } + fifo_state->plane[PLANE_SPRITE0] += sprite0_fifo_extra; + fifo_left -= sprite0_fifo_extra; + fifo_state->plane[PLANE_CURSOR] = 63; fifo_extra = DIV_ROUND_UP(fifo_left, num_active_planes ?: 1); -- cgit From 7373728ddf5ebb199b8e000d8e097eb534ab6e6d Mon Sep 17 00:00:00 2001 From: Ville Syrjälä Date: Thu, 2 Mar 2017 19:15:04 +0200 Subject: drm/i915: Kill level 0 wm hack for VLV/CHV MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit We now compute the watermarks correctly, so just return an error if we can't support the configuration. Signed-off-by: Ville Syrjälä Reviewed-by: Maarten Lankhorst Link: http://patchwork.freedesktop.org/patch/msgid/20170302171508.1666-15-ville.syrjala@linux.intel.com --- drivers/gpu/drm/i915/intel_pm.c | 4 ---- 1 file changed, 4 deletions(-) diff --git a/drivers/gpu/drm/i915/intel_pm.c b/drivers/gpu/drm/i915/intel_pm.c index 6e2acfd687a1..9e4c7a0b0407 100644 --- a/drivers/gpu/drm/i915/intel_pm.c +++ b/drivers/gpu/drm/i915/intel_pm.c @@ -1186,10 +1186,6 @@ static bool vlv_plane_wm_compute(struct intel_crtc_state *crtc_state, int wm = vlv_compute_wm_level(crtc_state, plane_state, level); int max_wm = plane_id == PLANE_CURSOR ? 63 : 511; - /* FIXME just bail */ - if (WARN_ON(level == 0 && wm > max_wm)) - wm = max_wm; - if (wm > max_wm) break; -- cgit From 722595362cad7f254d8930b9576e4202010917b6 Mon Sep 17 00:00:00 2001 From: Ville Syrjälä Date: Thu, 2 Mar 2017 19:15:05 +0200 Subject: drm/i915: Add plane update/disable tracepoints MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add tracepoints for plane programming. The tracepoints will dump the frame and scanline counters, so this can be used to verify eg. that the plane gets reprogrammed at the right time with respect to watermark programming (if we have appropriate tracepoints for that as well). v2: Rebase due to legacy cursor changes Signed-off-by: Ville Syrjälä Reviewed-by: Maarten Lankhorst Link: http://patchwork.freedesktop.org/patch/msgid/20170302171508.1666-16-ville.syrjala@linux.intel.com --- drivers/gpu/drm/i915/i915_irq.c | 3 ++ drivers/gpu/drm/i915/i915_trace.h | 56 +++++++++++++++++++++++++++++++ drivers/gpu/drm/i915/intel_atomic_plane.c | 11 ++++-- drivers/gpu/drm/i915/intel_display.c | 15 +++++++-- 4 files changed, 80 insertions(+), 5 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_irq.c b/drivers/gpu/drm/i915/i915_irq.c index e0aa686f9c68..675c28d54658 100644 --- a/drivers/gpu/drm/i915/i915_irq.c +++ b/drivers/gpu/drm/i915/i915_irq.c @@ -783,6 +783,9 @@ static int __intel_get_crtc_scanline(struct intel_crtc *crtc) enum pipe pipe = crtc->pipe; int position, vtotal; + if (!crtc->active) + return -1; + vtotal = mode->crtc_vtotal; if (mode->flags & DRM_MODE_FLAG_INTERLACE) vtotal /= 2; diff --git a/drivers/gpu/drm/i915/i915_trace.h b/drivers/gpu/drm/i915/i915_trace.h index 2dbef3147a60..6cb03bd40aef 100644 --- a/drivers/gpu/drm/i915/i915_trace.h +++ b/drivers/gpu/drm/i915/i915_trace.h @@ -14,6 +14,62 @@ #define TRACE_SYSTEM i915 #define TRACE_INCLUDE_FILE i915_trace +/* plane updates */ + +TRACE_EVENT(intel_update_plane, + TP_PROTO(struct drm_plane *plane, struct intel_crtc *crtc), + TP_ARGS(plane, crtc), + + TP_STRUCT__entry( + __field(enum pipe, pipe) + __field(const char *, name) + __field(u32, frame) + __field(u32, scanline) + __array(int, src, 4) + __array(int, dst, 4) + ), + + TP_fast_assign( + __entry->pipe = crtc->pipe; + __entry->name = plane->name; + __entry->frame = crtc->base.dev->driver->get_vblank_counter(crtc->base.dev, + crtc->pipe); + __entry->scanline = intel_get_crtc_scanline(crtc); + memcpy(__entry->src, &plane->state->src, sizeof(__entry->src)); + memcpy(__entry->dst, &plane->state->dst, sizeof(__entry->dst)); + ), + + TP_printk("pipe %c, plane %s, frame=%u, scanline=%u, " DRM_RECT_FP_FMT " -> " DRM_RECT_FMT, + pipe_name(__entry->pipe), __entry->name, + __entry->frame, __entry->scanline, + DRM_RECT_FP_ARG((const struct drm_rect *)__entry->src), + DRM_RECT_ARG((const struct drm_rect *)__entry->dst)) +); + +TRACE_EVENT(intel_disable_plane, + TP_PROTO(struct drm_plane *plane, struct intel_crtc *crtc), + TP_ARGS(plane, crtc), + + TP_STRUCT__entry( + __field(enum pipe, pipe) + __field(const char *, name) + __field(u32, frame) + __field(u32, scanline) + ), + + TP_fast_assign( + __entry->pipe = crtc->pipe; + __entry->name = plane->name; + __entry->frame = crtc->base.dev->driver->get_vblank_counter(crtc->base.dev, + crtc->pipe); + __entry->scanline = intel_get_crtc_scanline(crtc); + ), + + TP_printk("pipe %c, plane %s, frame=%u, scanline=%u", + pipe_name(__entry->pipe), __entry->name, + __entry->frame, __entry->scanline) +); + /* pipe updates */ TRACE_EVENT(i915_pipe_update_start, diff --git a/drivers/gpu/drm/i915/intel_atomic_plane.c b/drivers/gpu/drm/i915/intel_atomic_plane.c index 1eaf840cf9ff..cfb47293fd53 100644 --- a/drivers/gpu/drm/i915/intel_atomic_plane.c +++ b/drivers/gpu/drm/i915/intel_atomic_plane.c @@ -231,12 +231,19 @@ static void intel_plane_atomic_update(struct drm_plane *plane, to_intel_plane_state(plane->state); struct drm_crtc *crtc = plane->state->crtc ?: old_state->crtc; - if (intel_state->base.visible) + if (intel_state->base.visible) { + trace_intel_update_plane(plane, + to_intel_crtc(crtc)); + intel_plane->update_plane(plane, to_intel_crtc_state(crtc->state), intel_state); - else + } else { + trace_intel_disable_plane(plane, + to_intel_crtc(crtc)); + intel_plane->disable_plane(plane, crtc); + } } const struct drm_plane_helper_funcs intel_plane_helper_funcs = { diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c index 1c22f8639b28..72e2c91707d4 100644 --- a/drivers/gpu/drm/i915/intel_display.c +++ b/drivers/gpu/drm/i915/intel_display.c @@ -2754,6 +2754,7 @@ intel_find_initial_plane_obj(struct intel_crtc *intel_crtc, to_intel_plane_state(plane_state), false); intel_pre_disable_primary_noatomic(&intel_crtc->base); + trace_intel_disable_plane(primary, intel_crtc); intel_plane->disable_plane(primary, &intel_crtc->base); return; @@ -3447,10 +3448,14 @@ static void intel_update_primary_planes(struct drm_device *dev) struct intel_plane_state *plane_state = to_intel_plane_state(plane->base.state); - if (plane_state->base.visible) + if (plane_state->base.visible) { + trace_intel_update_plane(&plane->base, + to_intel_crtc(crtc)); + plane->update_plane(&plane->base, to_intel_crtc_state(crtc->state), plane_state); + } } } @@ -13491,12 +13496,15 @@ intel_legacy_cursor_update(struct drm_plane *plane, new_plane_state->fb = old_fb; to_intel_plane_state(new_plane_state)->vma = old_vma; - if (plane->state->visible) + if (plane->state->visible) { + trace_intel_update_plane(plane, to_intel_crtc(crtc)); intel_plane->update_plane(plane, to_intel_crtc_state(crtc->state), to_intel_plane_state(plane->state)); - else + } else { + trace_intel_disable_plane(plane, to_intel_crtc(crtc)); intel_plane->disable_plane(plane, crtc); + } intel_cleanup_plane_fb(plane, new_plane_state); @@ -15145,6 +15153,7 @@ static void intel_sanitize_crtc(struct intel_crtc *crtc) if (plane->base.type == DRM_PLANE_TYPE_PRIMARY) continue; + trace_intel_disable_plane(&plane->base, crtc); plane->disable_plane(&plane->base, &crtc->base); } } -- cgit From c137d6605fd73635900597d386e3fa8af26d7787 Mon Sep 17 00:00:00 2001 From: Ville Syrjälä Date: Thu, 2 Mar 2017 19:15:06 +0200 Subject: drm/i915: Add VLV/CHV watermark/FIFO programming tracepoints MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add tracepoints for observing the WM/FIFO programming on VLV/CHV. When compared with the plane and pipe update tracepoints this can be used to verify that everything is performed in the right sequence. Signed-off-by: Ville Syrjälä Reviewed-by: Maarten Lankhorst Link: http://patchwork.freedesktop.org/patch/msgid/20170302171508.1666-17-ville.syrjala@linux.intel.com --- drivers/gpu/drm/i915/i915_trace.h | 71 +++++++++++++++++++++++++++++++++++++++ drivers/gpu/drm/i915/intel_pm.c | 4 +++ 2 files changed, 75 insertions(+) diff --git a/drivers/gpu/drm/i915/i915_trace.h b/drivers/gpu/drm/i915/i915_trace.h index 6cb03bd40aef..ca8198785311 100644 --- a/drivers/gpu/drm/i915/i915_trace.h +++ b/drivers/gpu/drm/i915/i915_trace.h @@ -14,6 +14,77 @@ #define TRACE_SYSTEM i915 #define TRACE_INCLUDE_FILE i915_trace +/* watermark/fifo updates */ + +TRACE_EVENT(vlv_wm, + TP_PROTO(struct intel_crtc *crtc, const struct vlv_wm_values *wm), + TP_ARGS(crtc, wm), + + TP_STRUCT__entry( + __field(enum pipe, pipe) + __field(u32, frame) + __field(u32, scanline) + __field(u32, level) + __field(u32, cxsr) + __field(u32, primary) + __field(u32, sprite0) + __field(u32, sprite1) + __field(u32, cursor) + __field(u32, sr_plane) + __field(u32, sr_cursor) + ), + + TP_fast_assign( + __entry->pipe = crtc->pipe; + __entry->frame = crtc->base.dev->driver->get_vblank_counter(crtc->base.dev, + crtc->pipe); + __entry->scanline = intel_get_crtc_scanline(crtc); + __entry->level = wm->level; + __entry->cxsr = wm->cxsr; + __entry->primary = wm->pipe[crtc->pipe].plane[PLANE_PRIMARY]; + __entry->sprite0 = wm->pipe[crtc->pipe].plane[PLANE_SPRITE0]; + __entry->sprite1 = wm->pipe[crtc->pipe].plane[PLANE_SPRITE1]; + __entry->cursor = wm->pipe[crtc->pipe].plane[PLANE_CURSOR]; + __entry->sr_plane = wm->sr.plane; + __entry->sr_cursor = wm->sr.cursor; + ), + + TP_printk("pipe %c, frame=%u, scanline=%u, level=%d, cxsr=%d, wm %d/%d/%d/%d, sr %d/%d", + pipe_name(__entry->pipe), __entry->frame, + __entry->scanline, __entry->level, __entry->cxsr, + __entry->primary, __entry->sprite0, __entry->sprite1, __entry->cursor, + __entry->sr_plane, __entry->sr_cursor) +); + +TRACE_EVENT(vlv_fifo_size, + TP_PROTO(struct intel_crtc *crtc, u32 sprite0_start, u32 sprite1_start, u32 fifo_size), + TP_ARGS(crtc, sprite0_start, sprite1_start, fifo_size), + + TP_STRUCT__entry( + __field(enum pipe, pipe) + __field(u32, frame) + __field(u32, scanline) + __field(u32, sprite0_start) + __field(u32, sprite1_start) + __field(u32, fifo_size) + ), + + TP_fast_assign( + __entry->pipe = crtc->pipe; + __entry->frame = crtc->base.dev->driver->get_vblank_counter(crtc->base.dev, + crtc->pipe); + __entry->scanline = intel_get_crtc_scanline(crtc); + __entry->sprite0_start = sprite0_start; + __entry->sprite1_start = sprite1_start; + __entry->fifo_size = fifo_size; + ), + + TP_printk("pipe %c, frame=%u, scanline=%u, %d/%d/%d", + pipe_name(__entry->pipe), __entry->frame, + __entry->scanline, __entry->sprite0_start, + __entry->sprite1_start, __entry->fifo_size) +); + /* plane updates */ TRACE_EVENT(intel_update_plane, diff --git a/drivers/gpu/drm/i915/intel_pm.c b/drivers/gpu/drm/i915/intel_pm.c index 9e4c7a0b0407..dc85501cfff3 100644 --- a/drivers/gpu/drm/i915/intel_pm.c +++ b/drivers/gpu/drm/i915/intel_pm.c @@ -888,6 +888,8 @@ static void vlv_write_wm_values(struct drm_i915_private *dev_priv, enum pipe pipe; for_each_pipe(dev_priv, pipe) { + trace_vlv_wm(intel_get_crtc_for_pipe(dev_priv, pipe), wm); + I915_WRITE(VLV_DDL(pipe), (wm->ddl[pipe].plane[PLANE_CURSOR] << DDL_CURSOR_SHIFT) | (wm->ddl[pipe].plane[PLANE_SPRITE1] << DDL_SPRITE_SHIFT(1)) | @@ -1352,6 +1354,8 @@ static void vlv_atomic_update_fifo(struct intel_atomic_state *state, WARN_ON(fifo_state->plane[PLANE_CURSOR] != 63); WARN_ON(fifo_size != 511); + trace_vlv_fifo_size(crtc, sprite0_start, sprite1_start, fifo_size); + spin_lock(&dev_priv->wm.dsparb_lock); switch (crtc->pipe) { -- cgit From 1489bba82433d8960d6bfef7453eae77ef9c926a Mon Sep 17 00:00:00 2001 From: Ville Syrjälä Date: Thu, 2 Mar 2017 19:15:07 +0200 Subject: drm/i915: Add cxsr toggle tracepoint MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add a tracepoint for observing changes in the cxsr state. The tracepoint will dump out the frame and scanline counters for each pipe so that the information can be compared with eg. plane update tracepoints. Signed-off-by: Ville Syrjälä Reviewed-by: Maarten Lankhorst Link: http://patchwork.freedesktop.org/patch/msgid/20170302171508.1666-18-ville.syrjala@linux.intel.com --- drivers/gpu/drm/i915/i915_trace.h | 30 ++++++++++++++++++++++++++++++ drivers/gpu/drm/i915/intel_pm.c | 2 ++ 2 files changed, 32 insertions(+) diff --git a/drivers/gpu/drm/i915/i915_trace.h b/drivers/gpu/drm/i915/i915_trace.h index ca8198785311..2537a03555cc 100644 --- a/drivers/gpu/drm/i915/i915_trace.h +++ b/drivers/gpu/drm/i915/i915_trace.h @@ -16,6 +16,36 @@ /* watermark/fifo updates */ +TRACE_EVENT(intel_memory_cxsr, + TP_PROTO(struct drm_i915_private *dev_priv, bool old, bool new), + TP_ARGS(dev_priv, old, new), + + TP_STRUCT__entry( + __array(u32, frame, 3) + __array(u32, scanline, 3) + __field(bool, old) + __field(bool, new) + ), + + TP_fast_assign( + enum pipe pipe; + for_each_pipe(dev_priv, pipe) { + __entry->frame[pipe] = + dev_priv->drm.driver->get_vblank_counter(&dev_priv->drm, pipe); + __entry->scanline[pipe] = + intel_get_crtc_scanline(intel_get_crtc_for_pipe(dev_priv, pipe)); + } + __entry->old = old; + __entry->new = new; + ), + + TP_printk("%s->%s, pipe A: frame=%u, scanline=%u, pipe B: frame=%u, scanline=%u, pipe C: frame=%u, scanline=%u", + onoff(__entry->old), onoff(__entry->new), + __entry->frame[PIPE_A], __entry->scanline[PIPE_A], + __entry->frame[PIPE_B], __entry->scanline[PIPE_B], + __entry->frame[PIPE_C], __entry->scanline[PIPE_C]) +); + TRACE_EVENT(vlv_wm, TP_PROTO(struct intel_crtc *crtc, const struct vlv_wm_values *wm), TP_ARGS(crtc, wm), diff --git a/drivers/gpu/drm/i915/intel_pm.c b/drivers/gpu/drm/i915/intel_pm.c index dc85501cfff3..99e09f63d4b3 100644 --- a/drivers/gpu/drm/i915/intel_pm.c +++ b/drivers/gpu/drm/i915/intel_pm.c @@ -377,6 +377,8 @@ static bool _intel_set_memory_cxsr(struct drm_i915_private *dev_priv, bool enabl return false; } + trace_intel_memory_cxsr(dev_priv, was_enabled, enable); + DRM_DEBUG_KMS("memory self-refresh is %s (was %s)\n", enableddisabled(enable), enableddisabled(was_enabled)); -- cgit From 53a7915cd28b17536609bed6416701b40e8a10de Mon Sep 17 00:00:00 2001 From: Ville Syrjälä Date: Thu, 2 Mar 2017 19:15:08 +0200 Subject: drm/i915: Add FIFO underrun tracepoints MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add tracepoints for display FIFO underruns. Makes it more convenient to correlate the underruns with other display tracepoints. v2: s/i915/intel/ in the tracepoint name Signed-off-by: Ville Syrjälä Reviewed-by: Maarten Lankhorst Link: http://patchwork.freedesktop.org/patch/msgid/20170302171508.1666-19-ville.syrjala@linux.intel.com --- drivers/gpu/drm/i915/i915_trace.h | 43 ++++++++++++++++++++++++++++++ drivers/gpu/drm/i915/intel_fifo_underrun.c | 11 ++++++-- 2 files changed, 52 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_trace.h b/drivers/gpu/drm/i915/i915_trace.h index 2537a03555cc..5503f5ab1e98 100644 --- a/drivers/gpu/drm/i915/i915_trace.h +++ b/drivers/gpu/drm/i915/i915_trace.h @@ -16,6 +16,49 @@ /* watermark/fifo updates */ +TRACE_EVENT(intel_cpu_fifo_underrun, + TP_PROTO(struct drm_i915_private *dev_priv, enum pipe pipe), + TP_ARGS(dev_priv, pipe), + + TP_STRUCT__entry( + __field(enum pipe, pipe) + __field(u32, frame) + __field(u32, scanline) + ), + + TP_fast_assign( + __entry->pipe = pipe; + __entry->frame = dev_priv->drm.driver->get_vblank_counter(&dev_priv->drm, pipe); + __entry->scanline = intel_get_crtc_scanline(intel_get_crtc_for_pipe(dev_priv, pipe)); + ), + + TP_printk("pipe %c, frame=%u, scanline=%u", + pipe_name(__entry->pipe), + __entry->frame, __entry->scanline) +); + +TRACE_EVENT(intel_pch_fifo_underrun, + TP_PROTO(struct drm_i915_private *dev_priv, enum transcoder pch_transcoder), + TP_ARGS(dev_priv, pch_transcoder), + + TP_STRUCT__entry( + __field(enum pipe, pipe) + __field(u32, frame) + __field(u32, scanline) + ), + + TP_fast_assign( + enum pipe pipe = (enum pipe)pch_transcoder; + __entry->pipe = pipe; + __entry->frame = dev_priv->drm.driver->get_vblank_counter(&dev_priv->drm, pipe); + __entry->scanline = intel_get_crtc_scanline(intel_get_crtc_for_pipe(dev_priv, pipe)); + ), + + TP_printk("pch transcoder %c, frame=%u, scanline=%u", + pipe_name(__entry->pipe), + __entry->frame, __entry->scanline) +); + TRACE_EVENT(intel_memory_cxsr, TP_PROTO(struct drm_i915_private *dev_priv, bool old, bool new), TP_ARGS(dev_priv, old, new), diff --git a/drivers/gpu/drm/i915/intel_fifo_underrun.c b/drivers/gpu/drm/i915/intel_fifo_underrun.c index f5ddacc90fde..966e255ca053 100644 --- a/drivers/gpu/drm/i915/intel_fifo_underrun.c +++ b/drivers/gpu/drm/i915/intel_fifo_underrun.c @@ -98,6 +98,7 @@ static void i9xx_check_fifo_underruns(struct intel_crtc *crtc) I915_WRITE(reg, pipestat | PIPE_FIFO_UNDERRUN_STATUS); POSTING_READ(reg); + trace_intel_cpu_fifo_underrun(dev_priv, crtc->pipe); DRM_ERROR("pipe %c underrun\n", pipe_name(crtc->pipe)); } @@ -147,6 +148,7 @@ static void ivybridge_check_fifo_underruns(struct intel_crtc *crtc) I915_WRITE(GEN7_ERR_INT, ERR_INT_FIFO_UNDERRUN(pipe)); POSTING_READ(GEN7_ERR_INT); + trace_intel_cpu_fifo_underrun(dev_priv, pipe); DRM_ERROR("fifo underrun on pipe %c\n", pipe_name(pipe)); } @@ -212,6 +214,7 @@ static void cpt_check_pch_fifo_underruns(struct intel_crtc *crtc) I915_WRITE(SERR_INT, SERR_INT_TRANS_FIFO_UNDERRUN(pch_transcoder)); POSTING_READ(SERR_INT); + trace_intel_pch_fifo_underrun(dev_priv, pch_transcoder); DRM_ERROR("pch fifo underrun on pch transcoder %s\n", transcoder_name(pch_transcoder)); } @@ -368,9 +371,11 @@ void intel_cpu_fifo_underrun_irq_handler(struct drm_i915_private *dev_priv, crtc->cpu_fifo_underrun_disabled) return; - if (intel_set_cpu_fifo_underrun_reporting(dev_priv, pipe, false)) + if (intel_set_cpu_fifo_underrun_reporting(dev_priv, pipe, false)) { + trace_intel_cpu_fifo_underrun(dev_priv, pipe); DRM_ERROR("CPU pipe %c FIFO underrun\n", pipe_name(pipe)); + } intel_fbc_handle_fifo_underrun_irq(dev_priv); } @@ -388,9 +393,11 @@ void intel_pch_fifo_underrun_irq_handler(struct drm_i915_private *dev_priv, enum transcoder pch_transcoder) { if (intel_set_pch_fifo_underrun_reporting(dev_priv, pch_transcoder, - false)) + false)) { + trace_intel_pch_fifo_underrun(dev_priv, pch_transcoder); DRM_ERROR("PCH transcoder %s FIFO underrun\n", transcoder_name(pch_transcoder)); + } } /** -- cgit From 24754d751cb86f6069315d8d613e23afcab06c91 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Fri, 3 Mar 2017 14:45:57 +0000 Subject: drm/i915: Take reference for signaling the request from hardirq Being inside a spinlock signaling that the hardware just completed a request doesn't prevent a second thread already spotting that the request is complete, freeing it and reallocating it! The code currently tries to prevent this using RCU -- but that only prevents the request from being freed, it doesn't prevent us from reallocating it - that requires us to take a reference. [ 206.922985] BUG: spinlock already unlocked on CPU#4, gem_exec_parall/7796 [ 206.922994] lock: 0xffff8801c6047120, .magic: dead4ead, .owner: /-1, .owner_cpu: -1 [ 206.923000] CPU: 4 PID: 7796 Comm: gem_exec_parall Not tainted 4.10.0-CI-Patchwork_4008+ #1 [ 206.923006] Hardware name: System manufacturer System Product Name/Z170M-PLUS, BIOS 1805 06/20/2016 [ 206.923012] Call Trace: [ 206.923014] [ 206.923019] dump_stack+0x67/0x92 [ 206.923023] spin_dump+0x73/0xc0 [ 206.923027] do_raw_spin_unlock+0x79/0xb0 [ 206.923031] _raw_spin_unlock_irqrestore+0x27/0x60 [ 206.923042] dma_fence_signal+0x160/0x230 [ 206.923060] notify_ring+0xae/0x2e0 [i915] [ 206.923073] ? ibx_hpd_irq_handler+0xc0/0xc0 [i915] [ 206.923086] gen8_gt_irq_handler+0x219/0x290 [i915] [ 206.923100] gen8_irq_handler+0x8e/0x6b0 [i915] [ 206.923105] __handle_irq_event_percpu+0x58/0x370 [ 206.923109] handle_irq_event_percpu+0x1e/0x50 [ 206.923113] handle_irq_event+0x34/0x60 [ 206.923117] handle_edge_irq+0xbe/0x150 [ 206.923122] handle_irq+0x15/0x20 [ 206.923126] do_IRQ+0x63/0x130 [ 206.923142] ? i915_mutex_lock_interruptible+0x39/0x140 [i915] [ 206.923148] common_interrupt+0x90/0x90 [ 206.923153] RIP: 0010:osq_lock+0x77/0x110 [ 206.923157] RSP: 0018:ffffc90001cabaa0 EFLAGS: 00000246 ORIG_RAX: ffffffffffffff6e [ 206.923164] RAX: 0000000000000000 RBX: ffff880236d1abc0 RCX: ffff8801ef642fc0 [ 206.923169] RDX: ffff8801ef6427c0 RSI: ffffffff81c6e7fd RDI: ffffffff81c7c848 [ 206.923175] RBP: ffffc90001cabab8 R08: 00000000692bb19b R09: 08c1493200000000 [ 206.923180] R10: 0000000000000001 R11: 0000000000000001 R12: ffff880236cdabc0 [ 206.923185] R13: ffff8802207f00b0 R14: ffffffffa00b7cd9 R15: ffff8802207f0070 [ 206.923191] [ 206.923206] ? i915_mutex_lock_interruptible+0x39/0x140 [i915] [ 206.923213] __mutex_lock+0x649/0x990 [ 206.923217] ? __mutex_lock+0xb0/0x990 [ 206.923221] ? _raw_spin_unlock+0x2c/0x50 [ 206.923226] ? __pm_runtime_resume+0x56/0x80 [ 206.923242] ? i915_mutex_lock_interruptible+0x39/0x140 [i915] [ 206.923249] mutex_lock_interruptible_nested+0x16/0x20 [ 206.923264] i915_mutex_lock_interruptible+0x39/0x140 [i915] [ 206.923270] ? __pm_runtime_resume+0x56/0x80 [ 206.923285] i915_gem_do_execbuffer.isra.15+0x442/0x1d10 [i915] [ 206.923291] ? __lock_acquire+0x449/0x1b50 [ 206.923296] ? __might_fault+0x3e/0x90 [ 206.923301] ? __might_fault+0x87/0x90 [ 206.923305] ? __might_fault+0x3e/0x90 [ 206.923320] i915_gem_execbuffer2+0xb5/0x220 [i915] [ 206.923327] drm_ioctl+0x200/0x450 [ 206.923341] ? i915_gem_execbuffer+0x330/0x330 [i915] [ 206.923348] do_vfs_ioctl+0x90/0x6e0 [ 206.923352] ? __fget+0x108/0x200 [ 206.923356] ? expand_files+0x2b0/0x2b0 [ 206.923361] SyS_ioctl+0x3c/0x70 [ 206.923365] entry_SYSCALL_64_fastpath+0x1c/0xb1 [ 206.923369] RIP: 0033:0x7fdd75fc6357 [ 206.923373] RSP: 002b:00007fdd20e59bf8 EFLAGS: 00000246 ORIG_RAX: 0000000000000010 [ 206.923380] RAX: ffffffffffffffda RBX: ffffffff81481ff3 RCX: 00007fdd75fc6357 [ 206.923385] RDX: 00007fdd20e59c70 RSI: 0000000040406469 RDI: 0000000000000003 [ 206.923390] RBP: ffffc90001cabf88 R08: 0000000000000040 R09: 00000000000003f7 [ 206.923396] R10: 0000000000000000 R11: 0000000000000246 R12: 0000000000000000 [ 206.923401] R13: 0000000000000003 R14: 0000000040406469 R15: 0000000001cf9cb0 [ 206.923408] ? __this_cpu_preempt_check+0x13/0x20 Fixes: 56299fb7d904 ("drm/i915: Signal first fence from irq handler if complete") Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=100051 Signed-off-by: Chris Wilson Cc: Tvrtko Ursulin Cc: Mika Kuoppala Link: http://patchwork.freedesktop.org/patch/msgid/20170303144557.4815-1-chris@chris-wilson.co.uk Reviewed-by: Mika Kuoppala --- drivers/gpu/drm/i915/i915_irq.c | 10 ++++------ 1 file changed, 4 insertions(+), 6 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_irq.c b/drivers/gpu/drm/i915/i915_irq.c index 675c28d54658..cb1424abb7c0 100644 --- a/drivers/gpu/drm/i915/i915_irq.c +++ b/drivers/gpu/drm/i915/i915_irq.c @@ -1042,8 +1042,6 @@ static void notify_ring(struct intel_engine_cs *engine) atomic_inc(&engine->irq_count); set_bit(ENGINE_IRQ_BREADCRUMB, &engine->irq_posted); - rcu_read_lock(); - spin_lock(&engine->breadcrumbs.lock); wait = engine->breadcrumbs.first_wait; if (wait) { @@ -1060,7 +1058,7 @@ static void notify_ring(struct intel_engine_cs *engine) */ if (i915_seqno_passed(intel_engine_get_seqno(engine), wait->seqno)) - rq = wait->request; + rq = i915_gem_request_get(wait->request); wake_up_process(wait->tsk); } else { @@ -1068,10 +1066,10 @@ static void notify_ring(struct intel_engine_cs *engine) } spin_unlock(&engine->breadcrumbs.lock); - if (rq) + if (rq) { dma_fence_signal(&rq->fence); - - rcu_read_unlock(); + i915_gem_request_put(rq); + } trace_intel_engine_notify(engine, wait); } -- cgit From b66255f0f77902ef41b09163a6a092d2d905e151 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Fri, 3 Mar 2017 17:14:22 +0000 Subject: drm/i915: Refactor wakeup of the next breadcrumb waiter Refactor the common task of updating the first_waiter, serialised with the interrupt handler. When we update the first_waiter, we also need to wakeup the new bottom-half in order to complete the actions that we may have delegated to it (such as checking the irq-seqno coherency or waking up other lower priority concurrent waiters). Signed-off-by: Chris Wilson Reviewed-by: Mika Kuoppala Link: http://patchwork.freedesktop.org/patch/msgid/20170303171422.4735-1-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/intel_breadcrumbs.c | 48 ++++++++++++-------------------- 1 file changed, 18 insertions(+), 30 deletions(-) diff --git a/drivers/gpu/drm/i915/intel_breadcrumbs.c b/drivers/gpu/drm/i915/intel_breadcrumbs.c index 235d4645a5cf..2b26f84480cc 100644 --- a/drivers/gpu/drm/i915/intel_breadcrumbs.c +++ b/drivers/gpu/drm/i915/intel_breadcrumbs.c @@ -287,6 +287,22 @@ static inline void __intel_breadcrumbs_finish(struct intel_breadcrumbs *b, wake_up_process(wait->tsk); /* implicit smp_wmb() */ } +static inline void __intel_breadcrumbs_next(struct intel_engine_cs *engine, + struct rb_node *next) +{ + struct intel_breadcrumbs *b = &engine->breadcrumbs; + + GEM_BUG_ON(!b->irq_armed); + b->first_wait = to_wait(next); + + /* We always wake up the next waiter that takes over as the bottom-half + * as we may delegate not only the irq-seqno barrier to the next waiter + * but also the task of waking up concurrent waiters. + */ + if (next) + wake_up_process(to_wait(next)->tsk); +} + static bool __intel_engine_add_wait(struct intel_engine_cs *engine, struct intel_wait *wait) { @@ -357,21 +373,7 @@ static bool __intel_engine_add_wait(struct intel_engine_cs *engine, GEM_BUG_ON(!next && !first); if (next && next != &wait->node) { GEM_BUG_ON(first); - b->first_wait = to_wait(next); - /* As there is a delay between reading the current - * seqno, processing the completed tasks and selecting - * the next waiter, we may have missed the interrupt - * and so need for the next bottom-half to wakeup. - * - * Also as we enable the IRQ, we may miss the - * interrupt for that seqno, so we have to wake up - * the next bottom-half in order to do a coherent check - * in case the seqno passed. - */ - __intel_breadcrumbs_enable_irq(b); - if (test_bit(ENGINE_IRQ_BREADCRUMB, - &engine->irq_posted)) - wake_up_process(to_wait(next)->tsk); + __intel_breadcrumbs_next(engine, next); } do { @@ -473,21 +475,7 @@ static void __intel_engine_remove_wait(struct intel_engine_cs *engine, } } - if (next) { - /* In our haste, we may have completed the first waiter - * before we enabled the interrupt. Do so now as we - * have a second waiter for a future seqno. Afterwards, - * we have to wake up that waiter in case we missed - * the interrupt, or if we have to handle an - * exception rather than a seqno completion. - */ - b->first_wait = to_wait(next); - if (b->first_wait->seqno != wait->seqno) - __intel_breadcrumbs_enable_irq(b); - wake_up_process(b->first_wait->tsk); - } else { - b->first_wait = NULL; - } + __intel_breadcrumbs_next(engine, next); } else { GEM_BUG_ON(rb_first(&b->waiters) == &wait->node); } -- cgit From 61d3dc708077de316bdcafd66016c2285da07275 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Fri, 3 Mar 2017 19:08:24 +0000 Subject: drm/i915: Split breadcrumbs spinlock into two As we now take the breadcrumbs spinlock within the interrupt handler, we wish to minimise its hold time. During the interrupt we do not care about the state of the full rbtree, only that of the first element, so we can guard that with a separate lock. v2: Rename first_wait to irq_wait to make it clearer that it is guarded by irq_lock. Signed-off-by: Chris Wilson Reviewed-by: Mika Kuoppala Link: http://patchwork.freedesktop.org/patch/msgid/20170303190824.1330-1-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/i915_debugfs.c | 12 ++-- drivers/gpu/drm/i915/i915_drv.h | 8 +-- drivers/gpu/drm/i915/i915_gpu_error.c | 8 +-- drivers/gpu/drm/i915/i915_irq.c | 6 +- drivers/gpu/drm/i915/intel_breadcrumbs.c | 80 ++++++++++++---------- drivers/gpu/drm/i915/intel_ringbuffer.h | 8 ++- drivers/gpu/drm/i915/selftests/intel_breadcrumbs.c | 4 +- 7 files changed, 68 insertions(+), 58 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c index 70acbbf50c75..478f19d2f3d8 100644 --- a/drivers/gpu/drm/i915/i915_debugfs.c +++ b/drivers/gpu/drm/i915/i915_debugfs.c @@ -726,14 +726,14 @@ static void i915_ring_seqno_info(struct seq_file *m, seq_printf(m, "Current sequence (%s): %x\n", engine->name, intel_engine_get_seqno(engine)); - spin_lock_irq(&b->lock); + spin_lock_irq(&b->rb_lock); for (rb = rb_first(&b->waiters); rb; rb = rb_next(rb)) { struct intel_wait *w = rb_entry(rb, typeof(*w), node); seq_printf(m, "Waiting (%s): %s [%d] on %x\n", engine->name, w->tsk->comm, w->tsk->pid, w->seqno); } - spin_unlock_irq(&b->lock); + spin_unlock_irq(&b->rb_lock); } static int i915_gem_seqno_info(struct seq_file *m, void *data) @@ -1380,14 +1380,14 @@ static int i915_hangcheck_info(struct seq_file *m, void *unused) &dev_priv->gpu_error.missed_irq_rings)), yesno(engine->hangcheck.stalled)); - spin_lock_irq(&b->lock); + spin_lock_irq(&b->rb_lock); for (rb = rb_first(&b->waiters); rb; rb = rb_next(rb)) { struct intel_wait *w = rb_entry(rb, typeof(*w), node); seq_printf(m, "\t%s [%d] waiting for %x\n", w->tsk->comm, w->tsk->pid, w->seqno); } - spin_unlock_irq(&b->lock); + spin_unlock_irq(&b->rb_lock); seq_printf(m, "\tACTHD = 0x%08llx [current 0x%08llx]\n", (long long)engine->hangcheck.acthd, @@ -3385,14 +3385,14 @@ static int i915_engine_info(struct seq_file *m, void *unused) I915_READ(RING_PP_DIR_DCLV(engine))); } - spin_lock_irq(&b->lock); + spin_lock_irq(&b->rb_lock); for (rb = rb_first(&b->waiters); rb; rb = rb_next(rb)) { struct intel_wait *w = rb_entry(rb, typeof(*w), node); seq_printf(m, "\t%s [%d] waiting for %x\n", w->tsk->comm, w->tsk->pid, w->seqno); } - spin_unlock_irq(&b->lock); + spin_unlock_irq(&b->rb_lock); seq_puts(m, "\n"); } diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index 059335f23706..95050115c700 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -4097,16 +4097,16 @@ __i915_request_irq_complete(const struct drm_i915_gem_request *req) * the seqno before we believe it coherent since they see * irq_posted == false but we are still running). */ - spin_lock_irqsave(&b->lock, flags); - if (b->first_wait && b->first_wait->tsk != current) + spin_lock_irqsave(&b->irq_lock, flags); + if (b->irq_wait && b->irq_wait->tsk != current) /* Note that if the bottom-half is changed as we * are sending the wake-up, the new bottom-half will * be woken by whomever made the change. We only have * to worry about when we steal the irq-posted for * ourself. */ - wake_up_process(b->first_wait->tsk); - spin_unlock_irqrestore(&b->lock, flags); + wake_up_process(b->irq_wait->tsk); + spin_unlock_irqrestore(&b->irq_lock, flags); if (__i915_gem_request_completed(req, seqno)) return true; diff --git a/drivers/gpu/drm/i915/i915_gpu_error.c b/drivers/gpu/drm/i915/i915_gpu_error.c index 061af8040498..8effc59f5cb5 100644 --- a/drivers/gpu/drm/i915/i915_gpu_error.c +++ b/drivers/gpu/drm/i915/i915_gpu_error.c @@ -1111,7 +1111,7 @@ static void error_record_engine_waiters(struct intel_engine_cs *engine, if (RB_EMPTY_ROOT(&b->waiters)) return; - if (!spin_trylock_irq(&b->lock)) { + if (!spin_trylock_irq(&b->rb_lock)) { ee->waiters = ERR_PTR(-EDEADLK); return; } @@ -1119,7 +1119,7 @@ static void error_record_engine_waiters(struct intel_engine_cs *engine, count = 0; for (rb = rb_first(&b->waiters); rb != NULL; rb = rb_next(rb)) count++; - spin_unlock_irq(&b->lock); + spin_unlock_irq(&b->rb_lock); waiter = NULL; if (count) @@ -1129,7 +1129,7 @@ static void error_record_engine_waiters(struct intel_engine_cs *engine, if (!waiter) return; - if (!spin_trylock_irq(&b->lock)) { + if (!spin_trylock_irq(&b->rb_lock)) { kfree(waiter); ee->waiters = ERR_PTR(-EDEADLK); return; @@ -1147,7 +1147,7 @@ static void error_record_engine_waiters(struct intel_engine_cs *engine, if (++ee->num_waiters == count) break; } - spin_unlock_irq(&b->lock); + spin_unlock_irq(&b->rb_lock); } static void error_record_engine_registers(struct i915_gpu_state *error, diff --git a/drivers/gpu/drm/i915/i915_irq.c b/drivers/gpu/drm/i915/i915_irq.c index cb1424abb7c0..29b002dacbd5 100644 --- a/drivers/gpu/drm/i915/i915_irq.c +++ b/drivers/gpu/drm/i915/i915_irq.c @@ -1042,8 +1042,8 @@ static void notify_ring(struct intel_engine_cs *engine) atomic_inc(&engine->irq_count); set_bit(ENGINE_IRQ_BREADCRUMB, &engine->irq_posted); - spin_lock(&engine->breadcrumbs.lock); - wait = engine->breadcrumbs.first_wait; + spin_lock(&engine->breadcrumbs.irq_lock); + wait = engine->breadcrumbs.irq_wait; if (wait) { /* We use a callback from the dma-fence to submit * requests after waiting on our own requests. To @@ -1064,7 +1064,7 @@ static void notify_ring(struct intel_engine_cs *engine) } else { __intel_engine_disarm_breadcrumbs(engine); } - spin_unlock(&engine->breadcrumbs.lock); + spin_unlock(&engine->breadcrumbs.irq_lock); if (rq) { dma_fence_signal(&rq->fence); diff --git a/drivers/gpu/drm/i915/intel_breadcrumbs.c b/drivers/gpu/drm/i915/intel_breadcrumbs.c index 2b26f84480cc..6032d2a937d5 100644 --- a/drivers/gpu/drm/i915/intel_breadcrumbs.c +++ b/drivers/gpu/drm/i915/intel_breadcrumbs.c @@ -31,7 +31,9 @@ static unsigned int __intel_breadcrumbs_wakeup(struct intel_breadcrumbs *b) struct intel_wait *wait; unsigned int result = 0; - wait = b->first_wait; + lockdep_assert_held(&b->irq_lock); + + wait = b->irq_wait; if (wait) { result = ENGINE_WAKEUP_WAITER; if (wake_up_process(wait->tsk)) @@ -47,9 +49,9 @@ unsigned int intel_engine_wakeup(struct intel_engine_cs *engine) unsigned long flags; unsigned int result; - spin_lock_irqsave(&b->lock, flags); + spin_lock_irqsave(&b->irq_lock, flags); result = __intel_breadcrumbs_wakeup(b); - spin_unlock_irqrestore(&b->lock, flags); + spin_unlock_irqrestore(&b->irq_lock, flags); return result; } @@ -117,10 +119,10 @@ static void intel_breadcrumbs_fake_irq(unsigned long data) * coherent seqno check. */ - spin_lock_irqsave(&b->lock, flags); + spin_lock_irqsave(&b->irq_lock, flags); if (!__intel_breadcrumbs_wakeup(b)) __intel_engine_disarm_breadcrumbs(engine); - spin_unlock_irqrestore(&b->lock, flags); + spin_unlock_irqrestore(&b->irq_lock, flags); if (!b->irq_armed) return; @@ -164,7 +166,7 @@ void __intel_engine_disarm_breadcrumbs(struct intel_engine_cs *engine) { struct intel_breadcrumbs *b = &engine->breadcrumbs; - lockdep_assert_held(&b->lock); + lockdep_assert_held(&b->irq_lock); if (b->irq_enabled) { irq_disable(engine); @@ -182,7 +184,7 @@ void intel_engine_disarm_breadcrumbs(struct intel_engine_cs *engine) if (!b->irq_armed) return; - spin_lock_irqsave(&b->lock, flags); + spin_lock_irqsave(&b->irq_lock, flags); /* We only disarm the irq when we are idle (all requests completed), * so if there remains a sleeping waiter, it missed the request @@ -193,7 +195,7 @@ void intel_engine_disarm_breadcrumbs(struct intel_engine_cs *engine) __intel_engine_disarm_breadcrumbs(engine); - spin_unlock_irqrestore(&b->lock, flags); + spin_unlock_irqrestore(&b->irq_lock, flags); } static bool use_fake_irq(const struct intel_breadcrumbs *b) @@ -228,7 +230,7 @@ static void __intel_breadcrumbs_enable_irq(struct intel_breadcrumbs *b) container_of(b, struct intel_engine_cs, breadcrumbs); struct drm_i915_private *i915 = engine->i915; - lockdep_assert_held(&b->lock); + lockdep_assert_held(&b->irq_lock); if (b->irq_armed) return; @@ -276,7 +278,7 @@ static inline struct intel_wait *to_wait(struct rb_node *node) static inline void __intel_breadcrumbs_finish(struct intel_breadcrumbs *b, struct intel_wait *wait) { - lockdep_assert_held(&b->lock); + lockdep_assert_held(&b->rb_lock); /* This request is completed, so remove it from the tree, mark it as * complete, and *then* wake up the associated task. @@ -292,8 +294,10 @@ static inline void __intel_breadcrumbs_next(struct intel_engine_cs *engine, { struct intel_breadcrumbs *b = &engine->breadcrumbs; + spin_lock(&b->irq_lock); GEM_BUG_ON(!b->irq_armed); - b->first_wait = to_wait(next); + b->irq_wait = to_wait(next); + spin_unlock(&b->irq_lock); /* We always wake up the next waiter that takes over as the bottom-half * as we may delegate not only the irq-seqno barrier to the next waiter @@ -384,8 +388,9 @@ static bool __intel_engine_add_wait(struct intel_engine_cs *engine, } if (first) { + spin_lock(&b->irq_lock); GEM_BUG_ON(rb_first(&b->waiters) != &wait->node); - b->first_wait = wait; + b->irq_wait = wait; /* After assigning ourselves as the new bottom-half, we must * perform a cursory check to prevent a missed interrupt. * Either we miss the interrupt whilst programming the hardware, @@ -395,9 +400,10 @@ static bool __intel_engine_add_wait(struct intel_engine_cs *engine, * and so we miss the wake up. */ __intel_breadcrumbs_enable_irq(b); + spin_unlock(&b->irq_lock); } - GEM_BUG_ON(!b->first_wait); - GEM_BUG_ON(rb_first(&b->waiters) != &b->first_wait->node); + GEM_BUG_ON(!b->irq_wait); + GEM_BUG_ON(rb_first(&b->waiters) != &b->irq_wait->node); return first; } @@ -408,9 +414,9 @@ bool intel_engine_add_wait(struct intel_engine_cs *engine, struct intel_breadcrumbs *b = &engine->breadcrumbs; bool first; - spin_lock_irq(&b->lock); + spin_lock_irq(&b->rb_lock); first = __intel_engine_add_wait(engine, wait); - spin_unlock_irq(&b->lock); + spin_unlock_irq(&b->rb_lock); return first; } @@ -434,12 +440,12 @@ static void __intel_engine_remove_wait(struct intel_engine_cs *engine, { struct intel_breadcrumbs *b = &engine->breadcrumbs; - lockdep_assert_held(&b->lock); + lockdep_assert_held(&b->rb_lock); if (RB_EMPTY_NODE(&wait->node)) goto out; - if (b->first_wait == wait) { + if (b->irq_wait == wait) { const int priority = wakeup_priority(b, wait->tsk); struct rb_node *next; @@ -484,9 +490,9 @@ static void __intel_engine_remove_wait(struct intel_engine_cs *engine, rb_erase(&wait->node, &b->waiters); out: - GEM_BUG_ON(b->first_wait == wait); + GEM_BUG_ON(b->irq_wait == wait); GEM_BUG_ON(rb_first(&b->waiters) != - (b->first_wait ? &b->first_wait->node : NULL)); + (b->irq_wait ? &b->irq_wait->node : NULL)); } void intel_engine_remove_wait(struct intel_engine_cs *engine, @@ -501,9 +507,9 @@ void intel_engine_remove_wait(struct intel_engine_cs *engine, if (RB_EMPTY_NODE(&wait->node)) return; - spin_lock_irq(&b->lock); + spin_lock_irq(&b->rb_lock); __intel_engine_remove_wait(engine, wait); - spin_unlock_irq(&b->lock); + spin_unlock_irq(&b->rb_lock); } static bool signal_valid(const struct drm_i915_gem_request *request) @@ -573,7 +579,7 @@ static int intel_breadcrumbs_signaler(void *arg) dma_fence_signal(&request->fence); local_bh_enable(); /* kick start the tasklets */ - spin_lock_irq(&b->lock); + spin_lock_irq(&b->rb_lock); /* Wake up all other completed waiters and select the * next bottom-half for the next user interrupt. @@ -596,7 +602,7 @@ static int intel_breadcrumbs_signaler(void *arg) rb_erase(&request->signaling.node, &b->signals); RB_CLEAR_NODE(&request->signaling.node); - spin_unlock_irq(&b->lock); + spin_unlock_irq(&b->rb_lock); i915_gem_request_put(request); } else { @@ -653,7 +659,7 @@ void intel_engine_enable_signaling(struct drm_i915_gem_request *request) request->signaling.wait.seqno = seqno; i915_gem_request_get(request); - spin_lock(&b->lock); + spin_lock(&b->rb_lock); /* First add ourselves into the list of waiters, but register our * bottom-half as the signaller thread. As per usual, only the oldest @@ -687,7 +693,7 @@ void intel_engine_enable_signaling(struct drm_i915_gem_request *request) if (first) rcu_assign_pointer(b->first_signal, request); - spin_unlock(&b->lock); + spin_unlock(&b->rb_lock); if (wakeup) wake_up_process(b->signaler); @@ -702,7 +708,7 @@ void intel_engine_cancel_signaling(struct drm_i915_gem_request *request) lockdep_assert_held(&request->lock); GEM_BUG_ON(!request->signaling.wait.seqno); - spin_lock(&b->lock); + spin_lock(&b->rb_lock); if (!RB_EMPTY_NODE(&request->signaling.node)) { if (request == rcu_access_pointer(b->first_signal)) { @@ -718,7 +724,7 @@ void intel_engine_cancel_signaling(struct drm_i915_gem_request *request) __intel_engine_remove_wait(engine, &request->signaling.wait); - spin_unlock(&b->lock); + spin_unlock(&b->rb_lock); request->signaling.wait.seqno = 0; } @@ -728,7 +734,9 @@ int intel_engine_init_breadcrumbs(struct intel_engine_cs *engine) struct intel_breadcrumbs *b = &engine->breadcrumbs; struct task_struct *tsk; - spin_lock_init(&b->lock); + spin_lock_init(&b->rb_lock); + spin_lock_init(&b->irq_lock); + setup_timer(&b->fake_irq, intel_breadcrumbs_fake_irq, (unsigned long)engine); @@ -766,7 +774,7 @@ void intel_engine_reset_breadcrumbs(struct intel_engine_cs *engine) struct intel_breadcrumbs *b = &engine->breadcrumbs; cancel_fake_irq(engine); - spin_lock_irq(&b->lock); + spin_lock_irq(&b->irq_lock); if (b->irq_enabled) irq_enable(engine); @@ -785,7 +793,7 @@ void intel_engine_reset_breadcrumbs(struct intel_engine_cs *engine) if (b->irq_armed) enable_fake_irq(b); - spin_unlock_irq(&b->lock); + spin_unlock_irq(&b->irq_lock); } void intel_engine_fini_breadcrumbs(struct intel_engine_cs *engine) @@ -793,7 +801,7 @@ void intel_engine_fini_breadcrumbs(struct intel_engine_cs *engine) struct intel_breadcrumbs *b = &engine->breadcrumbs; /* The engines should be idle and all requests accounted for! */ - WARN_ON(READ_ONCE(b->first_wait)); + WARN_ON(READ_ONCE(b->irq_wait)); WARN_ON(!RB_EMPTY_ROOT(&b->waiters)); WARN_ON(rcu_access_pointer(b->first_signal)); WARN_ON(!RB_EMPTY_ROOT(&b->signals)); @@ -809,10 +817,10 @@ bool intel_breadcrumbs_busy(struct intel_engine_cs *engine) struct intel_breadcrumbs *b = &engine->breadcrumbs; bool busy = false; - spin_lock_irq(&b->lock); + spin_lock_irq(&b->rb_lock); - if (b->first_wait) { - wake_up_process(b->first_wait->tsk); + if (b->irq_wait) { + wake_up_process(b->irq_wait->tsk); busy |= intel_engine_flag(engine); } @@ -821,7 +829,7 @@ bool intel_breadcrumbs_busy(struct intel_engine_cs *engine) busy |= intel_engine_flag(engine); } - spin_unlock_irq(&b->lock); + spin_unlock_irq(&b->rb_lock); return busy; } diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.h b/drivers/gpu/drm/i915/intel_ringbuffer.h index 55a6a3f8274c..9d6b83a16cc8 100644 --- a/drivers/gpu/drm/i915/intel_ringbuffer.h +++ b/drivers/gpu/drm/i915/intel_ringbuffer.h @@ -235,10 +235,12 @@ struct intel_engine_cs { * the overhead of waking that client is much preferred. */ struct intel_breadcrumbs { - spinlock_t lock; /* protects the lists of requests; irqsafe */ + spinlock_t irq_lock; /* protects irq_*; irqsafe */ + struct intel_wait *irq_wait; /* oldest waiter by retirement */ + + spinlock_t rb_lock; /* protects the rb and wraps irq_lock */ struct rb_root waiters; /* sorted by retirement, priority */ struct rb_root signals; /* sorted by retirement */ - struct intel_wait *first_wait; /* oldest waiter by retirement */ struct task_struct *signaler; /* used for fence signalling */ struct drm_i915_gem_request __rcu *first_signal; struct timer_list fake_irq; /* used after a missed interrupt */ @@ -639,7 +641,7 @@ void intel_engine_cancel_signaling(struct drm_i915_gem_request *request); static inline bool intel_engine_has_waiter(const struct intel_engine_cs *engine) { - return READ_ONCE(engine->breadcrumbs.first_wait); + return READ_ONCE(engine->breadcrumbs.irq_wait); } unsigned int intel_engine_wakeup(struct intel_engine_cs *engine); diff --git a/drivers/gpu/drm/i915/selftests/intel_breadcrumbs.c b/drivers/gpu/drm/i915/selftests/intel_breadcrumbs.c index 621be1ca53d8..19860a372d90 100644 --- a/drivers/gpu/drm/i915/selftests/intel_breadcrumbs.c +++ b/drivers/gpu/drm/i915/selftests/intel_breadcrumbs.c @@ -37,7 +37,7 @@ static int check_rbtree(struct intel_engine_cs *engine, struct rb_node *rb; int n; - if (&b->first_wait->node != rb_first(&b->waiters)) { + if (&b->irq_wait->node != rb_first(&b->waiters)) { pr_err("First waiter does not match first element of wait-tree\n"); return -EINVAL; } @@ -90,7 +90,7 @@ static int check_rbtree_empty(struct intel_engine_cs *engine) { struct intel_breadcrumbs *b = &engine->breadcrumbs; - if (b->first_wait) { + if (b->irq_wait) { pr_err("Empty breadcrumbs still has a waiter\n"); return -EINVAL; } -- cgit From 237ae7c79e2683723a6bf83e1d7db40fc890b285 Mon Sep 17 00:00:00 2001 From: Michal Wajdeczko Date: Wed, 1 Mar 2017 20:26:15 +0000 Subject: drm/i915: Don't use enums for hardware engine id Generally we are using macros for any hardware identifiers as these may change between Gens. Do the same with hardware engine ids. v2: move hw engine defs to i915_reg.h (Chris) Signed-off-by: Michal Wajdeczko Cc: Daniele Ceraolo Spurio Cc: Chris Wilson Link: http://patchwork.freedesktop.org/patch/msgid/20170301202615.118632-1-michal.wajdeczko@intel.com Reviewed-by: Chris Wilson Signed-off-by: Chris Wilson --- drivers/gpu/drm/i915/i915_reg.h | 6 ++++++ drivers/gpu/drm/i915/intel_engine_cs.c | 4 ++-- drivers/gpu/drm/i915/intel_ringbuffer.h | 32 ++++++++++++++++---------------- 3 files changed, 24 insertions(+), 18 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h index 0d9ae27fddff..243050ea4938 100644 --- a/drivers/gpu/drm/i915/i915_reg.h +++ b/drivers/gpu/drm/i915/i915_reg.h @@ -77,7 +77,13 @@ static inline bool i915_mmio_reg_valid(i915_reg_t reg) #define _MASKED_BIT_ENABLE(a) ({ typeof(a) _a = (a); _MASKED_FIELD(_a, _a); }) #define _MASKED_BIT_DISABLE(a) (_MASKED_FIELD((a), 0)) +/* Engine ID */ +#define RCS_HW 0 +#define VCS_HW 1 +#define BCS_HW 2 +#define VECS_HW 3 +#define VCS2_HW 4 /* PCI config space */ diff --git a/drivers/gpu/drm/i915/intel_engine_cs.c b/drivers/gpu/drm/i915/intel_engine_cs.c index 5fd4883db235..73fe718516a5 100644 --- a/drivers/gpu/drm/i915/intel_engine_cs.c +++ b/drivers/gpu/drm/i915/intel_engine_cs.c @@ -28,8 +28,8 @@ static const struct engine_info { const char *name; - unsigned exec_id; - enum intel_engine_hw_id hw_id; + unsigned int exec_id; + unsigned int hw_id; u32 mmio_base; unsigned irq_shift; int (*init_legacy)(struct intel_engine_cs *engine); diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.h b/drivers/gpu/drm/i915/intel_ringbuffer.h index 9d6b83a16cc8..0ef491df5b4e 100644 --- a/drivers/gpu/drm/i915/intel_ringbuffer.h +++ b/drivers/gpu/drm/i915/intel_ringbuffer.h @@ -186,26 +186,26 @@ struct i915_ctx_workarounds { struct drm_i915_gem_request; struct intel_render_state; +/* + * Engine IDs definitions. + * Keep instances of the same type engine together. + */ +enum intel_engine_id { + RCS = 0, + BCS, + VCS, + VCS2, +#define _VCS(n) (VCS + (n)) + VECS +}; + struct intel_engine_cs { struct drm_i915_private *i915; const char *name; - enum intel_engine_id { - RCS = 0, - BCS, - VCS, - VCS2, /* Keep instances of the same type engine together. */ - VECS - } id; -#define _VCS(n) (VCS + (n)) + enum intel_engine_id id; unsigned int exec_id; - enum intel_engine_hw_id { - RCS_HW = 0, - VCS_HW, - BCS_HW, - VECS_HW, - VCS2_HW - } hw_id; - enum intel_engine_hw_id guc_id; /* XXX same as hw_id? */ + unsigned int hw_id; + unsigned int guc_id; u32 mmio_base; unsigned int irq_shift; struct intel_ring *buffer; -- cgit From 505b681539a7e14aeb866515d3ef1a67375839bc Mon Sep 17 00:00:00 2001 From: Daniel Vetter Date: Mon, 6 Mar 2017 08:34:44 +0100 Subject: drm/i915: Update DRIVER_DATE to 20170306 Signed-off-by: Daniel Vetter --- drivers/gpu/drm/i915/i915_drv.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index 95050115c700..56e569f536ec 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -79,8 +79,8 @@ #define DRIVER_NAME "i915" #define DRIVER_DESC "Intel Graphics" -#define DRIVER_DATE "20170206" -#define DRIVER_TIMESTAMP 1486372993 +#define DRIVER_DATE "20170306" +#define DRIVER_TIMESTAMP 1488785683 #undef WARN_ON /* Many gcc seem to no see through this and fall over :( */ -- cgit