From 116788689bf8ca3c3337d8dd1a742c08342d2f62 Mon Sep 17 00:00:00 2001 From: Nathan Chancellor Date: Fri, 15 May 2020 19:35:45 -0700 Subject: drm/i915: Mark check_shadow_context_ppgtt as maybe unused When CONFIG_DRM_I915_DEBUG_GEM is not set, clang warns: drivers/gpu/drm/i915/gvt/scheduler.c:884:1: warning: function 'check_shadow_context_ppgtt' is not needed and will not be emitted [-Wunneeded-internal-declaration] check_shadow_context_ppgtt(struct execlist_ring_context *c, struct intel_vgpu_mm *m) ^ 1 warning generated. This warning is similar to -Wunused-function but rather than warning that the function is completely unused, it warns that it is used in some expression within the file but that expression will be evaluated to a constant or be optimized away in the final assembly, essentially making it appeared used but really isn't. Usually, this happens when a function or variable is only used in sizeof, where it will appear to be used but will be evaluated at compile time and not be required to be emitted. In this case, the function is only used in GEM_BUG_ON, which is defined as BUILD_BUG_ON_INVALID, which intentionally follows this pattern. To fix this warning, add __maybe_unused to make it clear that this is intentional depending on the configuration. Fixes: bec3df930fbd ("drm/i915/gvt: Support PPGTT table load command") Link: https://github.com/ClangBuiltLinux/linux/issues/1027 Acked-by: Zhenyu Wang Signed-off-by: Nathan Chancellor Signed-off-by: Zhenyu Wang Link: http://patchwork.freedesktop.org/patch/msgid/20200516023545.3332334-1-natechancellor@gmail.com --- drivers/gpu/drm/i915/gvt/scheduler.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/i915/gvt/scheduler.c b/drivers/gpu/drm/i915/gvt/scheduler.c index c00189432b58..3a9bd8e4d8db 100644 --- a/drivers/gpu/drm/i915/gvt/scheduler.c +++ b/drivers/gpu/drm/i915/gvt/scheduler.c @@ -876,7 +876,7 @@ static void update_guest_pdps(struct intel_vgpu *vgpu, gpa + i * 8, &pdp[7 - i], 4); } -static bool +static __maybe_unused bool check_shadow_context_ppgtt(struct execlist_ring_context *c, struct intel_vgpu_mm *m) { if (m->ppgtt_mm.root_entry_type == GTT_TYPE_PPGTT_ROOT_L4_ENTRY) { -- cgit From cb7ee52284a244fd14caec73df0d49e02891aac4 Mon Sep 17 00:00:00 2001 From: Aishwarya Ramakrishnan Date: Mon, 18 May 2020 20:33:36 +0530 Subject: drm/i915/gvt: Use ARRAY_SIZE for vgpu_types Prefer ARRAY_SIZE instead of using sizeof Fixes coccicheck warning: Use ARRAY_SIZE Reviewed-by: Chris Wilson Signed-off-by: Aishwarya Ramakrishnan Signed-off-by: Zhenyu Wang Link: http://patchwork.freedesktop.org/patch/msgid/20200518150336.15265-1-aishwaryarj100@gmail.com --- drivers/gpu/drm/i915/gvt/vgpu.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/i915/gvt/vgpu.c b/drivers/gpu/drm/i915/gvt/vgpu.c index 1d5ff88078bd..7d361623ff67 100644 --- a/drivers/gpu/drm/i915/gvt/vgpu.c +++ b/drivers/gpu/drm/i915/gvt/vgpu.c @@ -124,7 +124,7 @@ int intel_gvt_init_vgpu_types(struct intel_gvt *gvt) */ low_avail = gvt_aperture_sz(gvt) - HOST_LOW_GM_SIZE; high_avail = gvt_hidden_sz(gvt) - HOST_HIGH_GM_SIZE; - num_types = sizeof(vgpu_types) / sizeof(vgpu_types[0]); + num_types = ARRAY_SIZE(vgpu_types); gvt->types = kcalloc(num_types, sizeof(struct intel_vgpu_type), GFP_KERNEL); -- cgit From 0fad590fd98f308c903fe8205c0f07a649a0b72e Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Tue, 19 May 2020 07:31:12 +0100 Subject: drm/i915: Don't set queue-priority hint when supressing the reschedule We recorded the execlists->queue_priority_hint update for the inflight request without kicking the tasklet. The next submitted request then failed to be scheduled as it had a lower priority than the hint, leaving the HW running with only the inflight request. Fixes: 6cebcf746f3f ("drm/i915: Tweak scheduler's kick_submission()") Signed-off-by: Chris Wilson Reviewed-by: Mika Kuoppala Link: https://patchwork.freedesktop.org/patch/msgid/20200519063123.20673-1-chris@chris-wilson.co.uk (cherry picked from commit b86fc6e5e89e5645b43f57171c26740ef38f9f4a) Signed-off-by: Joonas Lahtinen --- drivers/gpu/drm/i915/i915_scheduler.c | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_scheduler.c b/drivers/gpu/drm/i915/i915_scheduler.c index f4ea318781f0..cbb880b10c65 100644 --- a/drivers/gpu/drm/i915/i915_scheduler.c +++ b/drivers/gpu/drm/i915/i915_scheduler.c @@ -209,14 +209,6 @@ static void kick_submission(struct intel_engine_cs *engine, if (!inflight) goto unlock; - ENGINE_TRACE(engine, - "bumping queue-priority-hint:%d for rq:%llx:%lld, inflight:%llx:%lld prio %d\n", - prio, - rq->fence.context, rq->fence.seqno, - inflight->fence.context, inflight->fence.seqno, - inflight->sched.attr.priority); - engine->execlists.queue_priority_hint = prio; - /* * If we are already the currently executing context, don't * bother evaluating if we should preempt ourselves. @@ -224,6 +216,14 @@ static void kick_submission(struct intel_engine_cs *engine, if (inflight->context == rq->context) goto unlock; + ENGINE_TRACE(engine, + "bumping queue-priority-hint:%d for rq:%llx:%lld, inflight:%llx:%lld prio %d\n", + prio, + rq->fence.context, rq->fence.seqno, + inflight->fence.context, inflight->fence.seqno, + inflight->sched.attr.priority); + + engine->execlists.queue_priority_hint = prio; if (need_preempt(prio, rq_prio(inflight))) tasklet_hi_schedule(&engine->execlists.tasklet); -- cgit From b7ccc7858a33ddb3c5516f39b104a9156957c8bb Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Wed, 20 May 2020 08:30:48 +0100 Subject: drm/i915/gt: Remove errant assertion in __intel_context_do_pin This assertion was removed in commit b412c63f1cba ("drm/i915/gt: Report context-is-closed prior to pinning"), but accidentally restored by a cherry-pick into drm-next and now has percolated back to drm-intel-next-queued. Fixes: 2e46a2a0b014 ("drm/i915: Use explicit flag to mark unreachable intel_context") Fixes: 2b703bbda271 ("Merge drm/drm-next into drm-intel-next-queued") References: b412c63f1cba ("drm/i915/gt: Report context-is-closed prior to pinning") Signed-off-by: Chris Wilson Cc: Rodrigo Vivi Cc: Joonas Lahtinen Reviewed-by: Joonas Lahtinen Link: https://patchwork.freedesktop.org/patch/msgid/20200520073048.2394034-1-chris@chris-wilson.co.uk (cherry picked from commit f2c1061a3677b400a945d9238f17bf33d669acff) Signed-off-by: Joonas Lahtinen --- drivers/gpu/drm/i915/gt/intel_context.c | 2 -- 1 file changed, 2 deletions(-) diff --git a/drivers/gpu/drm/i915/gt/intel_context.c b/drivers/gpu/drm/i915/gt/intel_context.c index 74ddb49b2941..e4aece20bc80 100644 --- a/drivers/gpu/drm/i915/gt/intel_context.c +++ b/drivers/gpu/drm/i915/gt/intel_context.c @@ -97,8 +97,6 @@ int __intel_context_do_pin(struct intel_context *ce) { int err; - GEM_BUG_ON(intel_context_is_closed(ce)); - if (unlikely(!test_bit(CONTEXT_ALLOC_BIT, &ce->flags))) { err = intel_context_alloc_state(ce); if (err) -- cgit From 9ef36fc2d0347f04f75b9d70c0ecc2e3b403bb7f Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Thu, 21 May 2020 15:06:16 +0100 Subject: drm/i915: Disable semaphore inter-engine sync without timeslicing Since the removal of the no-semaphore boosting, we rely on timeslicing to reorder passed inter-dependency hogs across the engines. However, we require preemption to support timeslicing into user payloads, and not all machine support preemption so we do not universally enable timeslicing, even when it would correctly preempt our own inter-engine semaphores. Since timeslicing and semaphore priority deboosting is now disabled on Broadwell/Braswell, we have to follow suite and not use semaphores. Testcase: igt/gem_exec_schedule/semaphore-codependency # bdw/bsw Fixes: 18e4af04d218 ("drm/i915: Drop no-semaphore boosting") Signed-off-by: Chris Wilson Cc: Tvrtko Ursulin Cc: Mika Kuoppala Reviewed-by: Tvrtko Ursulin Link: https://patchwork.freedesktop.org/patch/msgid/20200521140617.30015-1-chris@chris-wilson.co.uk (cherry picked from commit 0eb670aac27b1d615004c29efec595616e3e091a) Signed-off-by: Joonas Lahtinen --- drivers/gpu/drm/i915/gem/i915_gem_context.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/i915/gem/i915_gem_context.c b/drivers/gpu/drm/i915/gem/i915_gem_context.c index 900ea8b7fc8f..f5d59d18cd5b 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_context.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_context.c @@ -230,7 +230,7 @@ static void intel_context_set_gem(struct intel_context *ce, ce->timeline = intel_timeline_get(ctx->timeline); if (ctx->sched.priority >= I915_PRIORITY_NORMAL && - intel_engine_has_semaphores(ce->engine)) + intel_engine_has_timeslices(ce->engine)) __set_bit(CONTEXT_USE_SEMAPHORES, &ce->flags); } @@ -1969,7 +1969,7 @@ static int __apply_priority(struct intel_context *ce, void *arg) { struct i915_gem_context *ctx = arg; - if (!intel_engine_has_semaphores(ce->engine)) + if (!intel_engine_has_timeslices(ce->engine)) return 0; if (ctx->sched.priority >= I915_PRIORITY_NORMAL) -- cgit From ef29440b3ccb93f44cf664311b30fbd5f84d9683 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Thu, 21 May 2020 15:06:17 +0100 Subject: drm/i915: Avoid using rq->engine after free during i915_fence_release In order to be valid to dereference during the i915_fence_release, after retiring the fence and releasing its refererences, we assume that rq->engine can only be a real engine (that stay intact until the device is shutdown after all fences have been flushed). However, due to a quirk of preempt-to-busy, we may retire a request that still belongs to a virtual engine and so eventually free it with rq->engine being invalid. To avoid dereferencing that invalid engine, we look at the execution_mask which if it indicates it may be executed on more than one engine, we know it originated on a virtual engine and may still be on one. Closes: https://gitlab.freedesktop.org/drm/intel/-/issues/1906 Fixes: 43acd6516ca9 ("drm/i915: Keep a per-engine request pool") Signed-off-by: Chris Wilson Cc: Tvrtko Ursulin Reviewed-by: Tvrtko Ursulin Link: https://patchwork.freedesktop.org/patch/msgid/20200521140617.30015-2-chris@chris-wilson.co.uk (cherry picked from commit 32a4605b38c30689a6a18f3f4c7d3133ac9d3277) Signed-off-by: Joonas Lahtinen --- drivers/gpu/drm/i915/i915_request.c | 35 +++++++++++++++++++++++++++++++++-- 1 file changed, 33 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_request.c b/drivers/gpu/drm/i915/i915_request.c index 526c1e9acbd5..c282719ad3ac 100644 --- a/drivers/gpu/drm/i915/i915_request.c +++ b/drivers/gpu/drm/i915/i915_request.c @@ -121,8 +121,39 @@ static void i915_fence_release(struct dma_fence *fence) i915_sw_fence_fini(&rq->submit); i915_sw_fence_fini(&rq->semaphore); - /* Keep one request on each engine for reserved use under mempressure */ - if (!cmpxchg(&rq->engine->request_pool, NULL, rq)) + /* + * Keep one request on each engine for reserved use under mempressure + * + * We do not hold a reference to the engine here and so have to be + * very careful in what rq->engine we poke. The virtual engine is + * referenced via the rq->context and we released that ref during + * i915_request_retire(), ergo we must not dereference a virtual + * engine here. Not that we would want to, as the only consumer of + * the reserved engine->request_pool is the power management parking, + * which must-not-fail, and that is only run on the physical engines. + * + * Since the request must have been executed to be have completed, + * we know that it will have been processed by the HW and will + * not be unsubmitted again, so rq->engine and rq->execution_mask + * at this point is stable. rq->execution_mask will be a single + * bit if the last and _only_ engine it could execution on was a + * physical engine, if it's multiple bits then it started on and + * could still be on a virtual engine. Thus if the mask is not a + * power-of-two we assume that rq->engine may still be a virtual + * engine and so a dangling invalid pointer that we cannot dereference + * + * For example, consider the flow of a bonded request through a virtual + * engine. The request is created with a wide engine mask (all engines + * that we might execute on). On processing the bond, the request mask + * is reduced to one or more engines. If the request is subsequently + * bound to a single engine, it will then be constrained to only + * execute on that engine and never returned to the virtual engine + * after timeslicing away, see __unwind_incomplete_requests(). Thus we + * know that if the rq->execution_mask is a single bit, rq->engine + * can be a physical engine with the exact corresponding mask. + */ + if (is_power_of_2(rq->execution_mask) && + !cmpxchg(&rq->engine->request_pool, NULL, rq)) return; kmem_cache_free(global.slab_requests, rq); -- cgit From 757a9395f33c51c4e6eff2c7c0fbd50226a58224 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Fri, 22 May 2020 14:27:06 +0100 Subject: drm/i915/gem: Avoid iterating an empty list Our __sgt_iter assumes that the scattergather list has at least one element. But during construction we may fail in allocating the first page, and so mark the first element as the terminator. This is unexpected! [22555.524752] RIP: 0010:shmem_get_pages+0x506/0x710 [i915] [22555.524759] Code: 49 8b 2c 24 31 c0 66 89 44 24 40 48 85 ed 0f 84 62 01 00 00 4c 8b 75 00 8b 5d 08 44 8b 7d 0c 48 8b 0d 7e 34 07 e2 49 83 e6 fc <49> 8b 16 41 01 df 48 89 cf 48 89 d0 48 c1 e8 2d 48 85 c9 0f 84 c8 [22555.524765] RSP: 0018:ffffc9000053f9d0 EFLAGS: 00010246 [22555.524770] RAX: 0000000000000000 RBX: 0000000000000000 RCX: ffff8881ffffa000 [22555.524774] RDX: fffffffffffffff4 RSI: ffffffffffffffff RDI: ffffffff821efe00 [22555.524778] RBP: ffff8881b099ab00 R08: 0000000000000000 R09: 00000000fffffff4 [22555.524782] R10: 0000000000000002 R11: 00000000ffec0a02 R12: ffff8881cd3c8d60 [22555.524786] R13: 00000000fffffff4 R14: 0000000000000000 R15: 0000000000000000 [22555.524790] FS: 00007f4fbeb9b9c0(0000) GS:ffff8881f8580000(0000) knlGS:0000000000000000 [22555.524795] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [22555.524799] CR2: 0000000000000000 CR3: 00000001ec7f0004 CR4: 00000000001606e0 [22555.524803] Call Trace: [22555.524919] __i915_gem_object_get_pages+0x4f/0x60 [i915] Fixes: 85d1225ec066 ("drm/i915: Introduce & use new lightweight SGL iterators") Signed-off-by: Chris Wilson Cc: Matthew Auld Cc: Tvrtko Ursulin Cc: # v4.8+ Reviewed-by: Matthew Auld Reviewed-by: Maciej Patelczyk Link: https://patchwork.freedesktop.org/patch/msgid/20200522132706.5133-1-chris@chris-wilson.co.uk (cherry picked from commit 957ad9a02be6faa87594c58ac09460cd3d190d0e) Signed-off-by: Joonas Lahtinen --- drivers/gpu/drm/i915/gem/i915_gem_shmem.c | 15 +++++++++------ 1 file changed, 9 insertions(+), 6 deletions(-) diff --git a/drivers/gpu/drm/i915/gem/i915_gem_shmem.c b/drivers/gpu/drm/i915/gem/i915_gem_shmem.c index 5d5d7eef3f43..7aff3514d97a 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_shmem.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_shmem.c @@ -39,7 +39,6 @@ static int shmem_get_pages(struct drm_i915_gem_object *obj) unsigned long last_pfn = 0; /* suppress gcc warning */ unsigned int max_segment = i915_sg_segment_size(); unsigned int sg_page_sizes; - struct pagevec pvec; gfp_t noreclaim; int ret; @@ -192,13 +191,17 @@ err_sg: sg_mark_end(sg); err_pages: mapping_clear_unevictable(mapping); - pagevec_init(&pvec); - for_each_sgt_page(page, sgt_iter, st) { - if (!pagevec_add(&pvec, page)) + if (sg != st->sgl) { + struct pagevec pvec; + + pagevec_init(&pvec); + for_each_sgt_page(page, sgt_iter, st) { + if (!pagevec_add(&pvec, page)) + check_release_pagevec(&pvec); + } + if (pagevec_count(&pvec)) check_release_pagevec(&pvec); } - if (pagevec_count(&pvec)) - check_release_pagevec(&pvec); sg_free_table(st); kfree(st); -- cgit From 9271dfd9e0f79e2969dcbe28568bce0fdc4f8f73 Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Sun, 24 May 2020 02:46:53 -0400 Subject: drm/amdgpu/pm: return an error during GPU reset or suspend (v2) Return an error for sysfs and debugfs power interfaces during gpu reset and suspend. Prevents access to the hw while it may be in an unusable state. v2: squash in fix to drop suspend check Acked-by: Nirmoy Das Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_pm.c | 171 +++++++++++++++++++++++++++++++++ 1 file changed, 171 insertions(+) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_pm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_pm.c index d7646cbce346..775e389c9a13 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_pm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_pm.c @@ -163,6 +163,9 @@ static ssize_t amdgpu_get_power_dpm_state(struct device *dev, enum amd_pm_state_type pm; int ret; + if (adev->in_gpu_reset) + return -EPERM; + ret = pm_runtime_get_sync(ddev->dev); if (ret < 0) return ret; @@ -196,6 +199,9 @@ static ssize_t amdgpu_set_power_dpm_state(struct device *dev, enum amd_pm_state_type state; int ret; + if (adev->in_gpu_reset) + return -EPERM; + if (strncmp("battery", buf, strlen("battery")) == 0) state = POWER_STATE_TYPE_BATTERY; else if (strncmp("balanced", buf, strlen("balanced")) == 0) @@ -297,6 +303,9 @@ static ssize_t amdgpu_get_power_dpm_force_performance_level(struct device *dev, enum amd_dpm_forced_level level = 0xff; int ret; + if (adev->in_gpu_reset) + return -EPERM; + ret = pm_runtime_get_sync(ddev->dev); if (ret < 0) return ret; @@ -334,6 +343,9 @@ static ssize_t amdgpu_set_power_dpm_force_performance_level(struct device *dev, enum amd_dpm_forced_level current_level = 0xff; int ret = 0; + if (adev->in_gpu_reset) + return -EPERM; + if (strncmp("low", buf, strlen("low")) == 0) { level = AMD_DPM_FORCED_LEVEL_LOW; } else if (strncmp("high", buf, strlen("high")) == 0) { @@ -433,6 +445,9 @@ static ssize_t amdgpu_get_pp_num_states(struct device *dev, struct pp_states_info data; int i, buf_len, ret; + if (adev->in_gpu_reset) + return -EPERM; + ret = pm_runtime_get_sync(ddev->dev); if (ret < 0) return ret; @@ -472,6 +487,9 @@ static ssize_t amdgpu_get_pp_cur_state(struct device *dev, enum amd_pm_state_type pm = 0; int i = 0, ret = 0; + if (adev->in_gpu_reset) + return -EPERM; + ret = pm_runtime_get_sync(ddev->dev); if (ret < 0) return ret; @@ -508,6 +526,9 @@ static ssize_t amdgpu_get_pp_force_state(struct device *dev, struct drm_device *ddev = dev_get_drvdata(dev); struct amdgpu_device *adev = ddev->dev_private; + if (adev->in_gpu_reset) + return -EPERM; + if (adev->pp_force_state_enabled) return amdgpu_get_pp_cur_state(dev, attr, buf); else @@ -525,6 +546,9 @@ static ssize_t amdgpu_set_pp_force_state(struct device *dev, unsigned long idx; int ret; + if (adev->in_gpu_reset) + return -EPERM; + if (strlen(buf) == 1) adev->pp_force_state_enabled = false; else if (is_support_sw_smu(adev)) @@ -580,6 +604,9 @@ static ssize_t amdgpu_get_pp_table(struct device *dev, char *table = NULL; int size, ret; + if (adev->in_gpu_reset) + return -EPERM; + ret = pm_runtime_get_sync(ddev->dev); if (ret < 0) return ret; @@ -619,6 +646,9 @@ static ssize_t amdgpu_set_pp_table(struct device *dev, struct amdgpu_device *adev = ddev->dev_private; int ret = 0; + if (adev->in_gpu_reset) + return -EPERM; + ret = pm_runtime_get_sync(ddev->dev); if (ret < 0) return ret; @@ -721,6 +751,9 @@ static ssize_t amdgpu_set_pp_od_clk_voltage(struct device *dev, const char delimiter[3] = {' ', '\n', '\0'}; uint32_t type; + if (adev->in_gpu_reset) + return -EPERM; + if (count > 127) return -EINVAL; @@ -810,6 +843,9 @@ static ssize_t amdgpu_get_pp_od_clk_voltage(struct device *dev, ssize_t size; int ret; + if (adev->in_gpu_reset) + return -EPERM; + ret = pm_runtime_get_sync(ddev->dev); if (ret < 0) return ret; @@ -859,6 +895,9 @@ static ssize_t amdgpu_set_pp_features(struct device *dev, uint64_t featuremask; int ret; + if (adev->in_gpu_reset) + return -EPERM; + ret = kstrtou64(buf, 0, &featuremask); if (ret) return -EINVAL; @@ -899,6 +938,9 @@ static ssize_t amdgpu_get_pp_features(struct device *dev, ssize_t size; int ret; + if (adev->in_gpu_reset) + return -EPERM; + ret = pm_runtime_get_sync(ddev->dev); if (ret < 0) return ret; @@ -955,6 +997,9 @@ static ssize_t amdgpu_get_pp_dpm_sclk(struct device *dev, ssize_t size; int ret; + if (adev->in_gpu_reset) + return -EPERM; + ret = pm_runtime_get_sync(ddev->dev); if (ret < 0) return ret; @@ -1018,6 +1063,9 @@ static ssize_t amdgpu_set_pp_dpm_sclk(struct device *dev, int ret; uint32_t mask = 0; + if (adev->in_gpu_reset) + return -EPERM; + ret = amdgpu_read_mask(buf, count, &mask); if (ret) return ret; @@ -1049,6 +1097,9 @@ static ssize_t amdgpu_get_pp_dpm_mclk(struct device *dev, ssize_t size; int ret; + if (adev->in_gpu_reset) + return -EPERM; + ret = pm_runtime_get_sync(ddev->dev); if (ret < 0) return ret; @@ -1076,6 +1127,9 @@ static ssize_t amdgpu_set_pp_dpm_mclk(struct device *dev, uint32_t mask = 0; int ret; + if (adev->in_gpu_reset) + return -EPERM; + ret = amdgpu_read_mask(buf, count, &mask); if (ret) return ret; @@ -1107,6 +1161,9 @@ static ssize_t amdgpu_get_pp_dpm_socclk(struct device *dev, ssize_t size; int ret; + if (adev->in_gpu_reset) + return -EPERM; + ret = pm_runtime_get_sync(ddev->dev); if (ret < 0) return ret; @@ -1134,6 +1191,9 @@ static ssize_t amdgpu_set_pp_dpm_socclk(struct device *dev, int ret; uint32_t mask = 0; + if (adev->in_gpu_reset) + return -EPERM; + ret = amdgpu_read_mask(buf, count, &mask); if (ret) return ret; @@ -1167,6 +1227,9 @@ static ssize_t amdgpu_get_pp_dpm_fclk(struct device *dev, ssize_t size; int ret; + if (adev->in_gpu_reset) + return -EPERM; + ret = pm_runtime_get_sync(ddev->dev); if (ret < 0) return ret; @@ -1194,6 +1257,9 @@ static ssize_t amdgpu_set_pp_dpm_fclk(struct device *dev, int ret; uint32_t mask = 0; + if (adev->in_gpu_reset) + return -EPERM; + ret = amdgpu_read_mask(buf, count, &mask); if (ret) return ret; @@ -1227,6 +1293,9 @@ static ssize_t amdgpu_get_pp_dpm_dcefclk(struct device *dev, ssize_t size; int ret; + if (adev->in_gpu_reset) + return -EPERM; + ret = pm_runtime_get_sync(ddev->dev); if (ret < 0) return ret; @@ -1254,6 +1323,9 @@ static ssize_t amdgpu_set_pp_dpm_dcefclk(struct device *dev, int ret; uint32_t mask = 0; + if (adev->in_gpu_reset) + return -EPERM; + ret = amdgpu_read_mask(buf, count, &mask); if (ret) return ret; @@ -1287,6 +1359,9 @@ static ssize_t amdgpu_get_pp_dpm_pcie(struct device *dev, ssize_t size; int ret; + if (adev->in_gpu_reset) + return -EPERM; + ret = pm_runtime_get_sync(ddev->dev); if (ret < 0) return ret; @@ -1314,6 +1389,9 @@ static ssize_t amdgpu_set_pp_dpm_pcie(struct device *dev, int ret; uint32_t mask = 0; + if (adev->in_gpu_reset) + return -EPERM; + ret = amdgpu_read_mask(buf, count, &mask); if (ret) return ret; @@ -1347,6 +1425,9 @@ static ssize_t amdgpu_get_pp_sclk_od(struct device *dev, uint32_t value = 0; int ret; + if (adev->in_gpu_reset) + return -EPERM; + ret = pm_runtime_get_sync(ddev->dev); if (ret < 0) return ret; @@ -1372,6 +1453,9 @@ static ssize_t amdgpu_set_pp_sclk_od(struct device *dev, int ret; long int value; + if (adev->in_gpu_reset) + return -EPERM; + ret = kstrtol(buf, 0, &value); if (ret) @@ -1410,6 +1494,9 @@ static ssize_t amdgpu_get_pp_mclk_od(struct device *dev, uint32_t value = 0; int ret; + if (adev->in_gpu_reset) + return -EPERM; + ret = pm_runtime_get_sync(ddev->dev); if (ret < 0) return ret; @@ -1435,6 +1522,9 @@ static ssize_t amdgpu_set_pp_mclk_od(struct device *dev, int ret; long int value; + if (adev->in_gpu_reset) + return -EPERM; + ret = kstrtol(buf, 0, &value); if (ret) @@ -1493,6 +1583,9 @@ static ssize_t amdgpu_get_pp_power_profile_mode(struct device *dev, ssize_t size; int ret; + if (adev->in_gpu_reset) + return -EPERM; + ret = pm_runtime_get_sync(ddev->dev); if (ret < 0) return ret; @@ -1528,6 +1621,9 @@ static ssize_t amdgpu_set_pp_power_profile_mode(struct device *dev, long int profile_mode = 0; const char delimiter[3] = {' ', '\n', '\0'}; + if (adev->in_gpu_reset) + return -EPERM; + tmp[0] = *(buf); tmp[1] = '\0'; ret = kstrtol(tmp, 0, &profile_mode); @@ -1587,6 +1683,9 @@ static ssize_t amdgpu_get_gpu_busy_percent(struct device *dev, struct amdgpu_device *adev = ddev->dev_private; int r, value, size = sizeof(value); + if (adev->in_gpu_reset) + return -EPERM; + r = pm_runtime_get_sync(ddev->dev); if (r < 0) return r; @@ -1620,6 +1719,9 @@ static ssize_t amdgpu_get_mem_busy_percent(struct device *dev, struct amdgpu_device *adev = ddev->dev_private; int r, value, size = sizeof(value); + if (adev->in_gpu_reset) + return -EPERM; + r = pm_runtime_get_sync(ddev->dev); if (r < 0) return r; @@ -1658,6 +1760,9 @@ static ssize_t amdgpu_get_pcie_bw(struct device *dev, uint64_t count0 = 0, count1 = 0; int ret; + if (adev->in_gpu_reset) + return -EPERM; + if (adev->flags & AMD_IS_APU) return -ENODATA; @@ -1694,6 +1799,9 @@ static ssize_t amdgpu_get_unique_id(struct device *dev, struct drm_device *ddev = dev_get_drvdata(dev); struct amdgpu_device *adev = ddev->dev_private; + if (adev->in_gpu_reset) + return -EPERM; + if (adev->unique_id) return snprintf(buf, PAGE_SIZE, "%016llx\n", adev->unique_id); @@ -1888,6 +1996,9 @@ static ssize_t amdgpu_hwmon_show_temp(struct device *dev, int channel = to_sensor_dev_attr(attr)->index; int r, temp = 0, size = sizeof(temp); + if (adev->in_gpu_reset) + return -EPERM; + if (channel >= PP_TEMP_MAX) return -EINVAL; @@ -2019,6 +2130,9 @@ static ssize_t amdgpu_hwmon_get_pwm1_enable(struct device *dev, u32 pwm_mode = 0; int ret; + if (adev->in_gpu_reset) + return -EPERM; + ret = pm_runtime_get_sync(adev->ddev->dev); if (ret < 0) return ret; @@ -2050,6 +2164,9 @@ static ssize_t amdgpu_hwmon_set_pwm1_enable(struct device *dev, int err, ret; int value; + if (adev->in_gpu_reset) + return -EPERM; + err = kstrtoint(buf, 10, &value); if (err) return err; @@ -2099,6 +2216,9 @@ static ssize_t amdgpu_hwmon_set_pwm1(struct device *dev, u32 value; u32 pwm_mode; + if (adev->in_gpu_reset) + return -EPERM; + err = pm_runtime_get_sync(adev->ddev->dev); if (err < 0) return err; @@ -2148,6 +2268,9 @@ static ssize_t amdgpu_hwmon_get_pwm1(struct device *dev, int err; u32 speed = 0; + if (adev->in_gpu_reset) + return -EPERM; + err = pm_runtime_get_sync(adev->ddev->dev); if (err < 0) return err; @@ -2178,6 +2301,9 @@ static ssize_t amdgpu_hwmon_get_fan1_input(struct device *dev, int err; u32 speed = 0; + if (adev->in_gpu_reset) + return -EPERM; + err = pm_runtime_get_sync(adev->ddev->dev); if (err < 0) return err; @@ -2207,6 +2333,9 @@ static ssize_t amdgpu_hwmon_get_fan1_min(struct device *dev, u32 size = sizeof(min_rpm); int r; + if (adev->in_gpu_reset) + return -EPERM; + r = pm_runtime_get_sync(adev->ddev->dev); if (r < 0) return r; @@ -2232,6 +2361,9 @@ static ssize_t amdgpu_hwmon_get_fan1_max(struct device *dev, u32 size = sizeof(max_rpm); int r; + if (adev->in_gpu_reset) + return -EPERM; + r = pm_runtime_get_sync(adev->ddev->dev); if (r < 0) return r; @@ -2256,6 +2388,9 @@ static ssize_t amdgpu_hwmon_get_fan1_target(struct device *dev, int err; u32 rpm = 0; + if (adev->in_gpu_reset) + return -EPERM; + err = pm_runtime_get_sync(adev->ddev->dev); if (err < 0) return err; @@ -2285,6 +2420,9 @@ static ssize_t amdgpu_hwmon_set_fan1_target(struct device *dev, u32 value; u32 pwm_mode; + if (adev->in_gpu_reset) + return -EPERM; + err = pm_runtime_get_sync(adev->ddev->dev); if (err < 0) return err; @@ -2331,6 +2469,9 @@ static ssize_t amdgpu_hwmon_get_fan1_enable(struct device *dev, u32 pwm_mode = 0; int ret; + if (adev->in_gpu_reset) + return -EPERM; + ret = pm_runtime_get_sync(adev->ddev->dev); if (ret < 0) return ret; @@ -2363,6 +2504,9 @@ static ssize_t amdgpu_hwmon_set_fan1_enable(struct device *dev, int value; u32 pwm_mode; + if (adev->in_gpu_reset) + return -EPERM; + err = kstrtoint(buf, 10, &value); if (err) return err; @@ -2403,6 +2547,9 @@ static ssize_t amdgpu_hwmon_show_vddgfx(struct device *dev, u32 vddgfx; int r, size = sizeof(vddgfx); + if (adev->in_gpu_reset) + return -EPERM; + r = pm_runtime_get_sync(adev->ddev->dev); if (r < 0) return r; @@ -2435,6 +2582,9 @@ static ssize_t amdgpu_hwmon_show_vddnb(struct device *dev, u32 vddnb; int r, size = sizeof(vddnb); + if (adev->in_gpu_reset) + return -EPERM; + /* only APUs have vddnb */ if (!(adev->flags & AMD_IS_APU)) return -EINVAL; @@ -2472,6 +2622,9 @@ static ssize_t amdgpu_hwmon_show_power_avg(struct device *dev, int r, size = sizeof(u32); unsigned uw; + if (adev->in_gpu_reset) + return -EPERM; + r = pm_runtime_get_sync(adev->ddev->dev); if (r < 0) return r; @@ -2508,6 +2661,9 @@ static ssize_t amdgpu_hwmon_show_power_cap_max(struct device *dev, ssize_t size; int r; + if (adev->in_gpu_reset) + return -EPERM; + r = pm_runtime_get_sync(adev->ddev->dev); if (r < 0) return r; @@ -2537,6 +2693,9 @@ static ssize_t amdgpu_hwmon_show_power_cap(struct device *dev, ssize_t size; int r; + if (adev->in_gpu_reset) + return -EPERM; + r = pm_runtime_get_sync(adev->ddev->dev); if (r < 0) return r; @@ -2567,6 +2726,9 @@ static ssize_t amdgpu_hwmon_set_power_cap(struct device *dev, int err; u32 value; + if (adev->in_gpu_reset) + return -EPERM; + if (amdgpu_sriov_vf(adev)) return -EINVAL; @@ -2605,6 +2767,9 @@ static ssize_t amdgpu_hwmon_show_sclk(struct device *dev, uint32_t sclk; int r, size = sizeof(sclk); + if (adev->in_gpu_reset) + return -EPERM; + r = pm_runtime_get_sync(adev->ddev->dev); if (r < 0) return r; @@ -2637,6 +2802,9 @@ static ssize_t amdgpu_hwmon_show_mclk(struct device *dev, uint32_t mclk; int r, size = sizeof(mclk); + if (adev->in_gpu_reset) + return -EPERM; + r = pm_runtime_get_sync(adev->ddev->dev); if (r < 0) return r; @@ -3497,6 +3665,9 @@ static int amdgpu_debugfs_pm_info(struct seq_file *m, void *data) u32 flags = 0; int r; + if (adev->in_gpu_reset) + return -EPERM; + r = pm_runtime_get_sync(dev->dev); if (r < 0) return r; -- cgit From cc831b9781e871e2bde00e9ae0041152139ee304 Mon Sep 17 00:00:00 2001 From: Evan Quan Date: Tue, 12 May 2020 19:06:37 +0800 Subject: drm/amd/powerplay: ack the SMUToHost interrupt on receive V2 There will be no further interrupt without proper ack for current one. V2: fix typo to really set ACK bit only Signed-off-by: Evan Quan Reviewed-by: Alex Deucher Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/powerplay/smu_v11_0.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/drivers/gpu/drm/amd/powerplay/smu_v11_0.c b/drivers/gpu/drm/amd/powerplay/smu_v11_0.c index ae0361e225bb..aa76c2cea747 100644 --- a/drivers/gpu/drm/amd/powerplay/smu_v11_0.c +++ b/drivers/gpu/drm/amd/powerplay/smu_v11_0.c @@ -1561,6 +1561,7 @@ static int smu_v11_0_irq_process(struct amdgpu_device *adev, * events for SMCToHost interrupt. */ uint32_t ctxid = entry->src_data[0]; + uint32_t data; if (client_id == SOC15_IH_CLIENTID_THM) { switch (src_id) { @@ -1590,6 +1591,11 @@ static int smu_v11_0_irq_process(struct amdgpu_device *adev, orderly_poweroff(true); } else if (client_id == SOC15_IH_CLIENTID_MP1) { if (src_id == 0xfe) { + /* ACK SMUToHost interrupt */ + data = RREG32_SOC15(MP1, 0, mmMP1_SMN_IH_SW_INT_CTRL); + data = REG_SET_FIELD(data, MP1_SMN_IH_SW_INT_CTRL, INT_ACK, 1); + WREG32_SOC15(MP1, 0, mmMP1_SMN_IH_SW_INT_CTRL, data); + switch (ctxid) { case 0x3: dev_dbg(adev->dev, "Switched to AC mode!\n"); -- cgit From 14ed1c908a7a623cc0cbf0203f8201d1b7d31d16 Mon Sep 17 00:00:00 2001 From: Harry Wentland Date: Thu, 28 May 2020 09:44:44 -0400 Subject: Revert "drm/amd/display: disable dcn20 abm feature for bring up" This reverts commit 96cb7cf13d8530099c256c053648ad576588c387. This change was used for DCN2 bringup and is no longer desired. In fact it breaks backlight on DCN2 systems. Cc: Alexander Monakov Cc: Hersen Wu Cc: Anthony Koo Cc: Michael Chiu Signed-off-by: Harry Wentland Acked-by: Alex Deucher Reviewed-by: Nicholas Kazlauskas Reported-and-tested-by: Alexander Monakov Signed-off-by: Alex Deucher Cc: stable@vger.kernel.org --- drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c | 11 ++++------- 1 file changed, 4 insertions(+), 7 deletions(-) diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c index d53c60b37cc6..f42e7e67ddba 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c @@ -1356,7 +1356,7 @@ static int dm_late_init(void *handle) unsigned int linear_lut[16]; int i; struct dmcu *dmcu = NULL; - bool ret = false; + bool ret; if (!adev->dm.fw_dmcu) return detect_mst_link_for_all_connectors(adev->ddev); @@ -1377,13 +1377,10 @@ static int dm_late_init(void *handle) */ params.min_abm_backlight = 0x28F; - /* todo will enable for navi10 */ - if (adev->asic_type <= CHIP_RAVEN) { - ret = dmcu_load_iram(dmcu, params); + ret = dmcu_load_iram(dmcu, params); - if (!ret) - return -EINVAL; - } + if (!ret) + return -EINVAL; return detect_mst_link_for_all_connectors(adev->ddev); } -- cgit From a1ef8bad506e4ffa0c57ac5f8cb99ab5cbc3b1fc Mon Sep 17 00:00:00 2001 From: Ben Skeggs Date: Fri, 29 May 2020 15:18:47 +1000 Subject: drm/nouveau/disp/gm200-: fix NV_PDISP_SOR_HDMI2_CTRL(n) selection This is a SOR register, and not indexed by the bound head. Fixes display not coming up on high-bandwidth HDMI displays under a number of configurations. Signed-off-by: Ben Skeggs --- drivers/gpu/drm/nouveau/nvkm/engine/disp/hdmigm200.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/disp/hdmigm200.c b/drivers/gpu/drm/nouveau/nvkm/engine/disp/hdmigm200.c index 9b16a08eb4d9..bf6d41fb0c9f 100644 --- a/drivers/gpu/drm/nouveau/nvkm/engine/disp/hdmigm200.c +++ b/drivers/gpu/drm/nouveau/nvkm/engine/disp/hdmigm200.c @@ -27,10 +27,10 @@ void gm200_hdmi_scdc(struct nvkm_ior *ior, int head, u8 scdc) { struct nvkm_device *device = ior->disp->engine.subdev.device; - const u32 hoff = head * 0x800; + const u32 soff = nv50_ior_base(ior); const u32 ctrl = scdc & 0x3; - nvkm_mask(device, 0x61c5bc + hoff, 0x00000003, ctrl); + nvkm_mask(device, 0x61c5bc + soff, 0x00000003, ctrl); ior->tmds.high_speed = !!(scdc & 0x2); } -- cgit From 0ad679d157aa69ddf0ee46b564c9fbb646cf6d4e Mon Sep 17 00:00:00 2001 From: Ben Skeggs Date: Fri, 29 May 2020 17:57:29 +1000 Subject: drm/nouveau/kms/gt215-: fix race with audio driver runpm The audio driver can call into nouveau right while we're in the middle of re-fetching the EDID, and decide it no longer needs to be awake. Stop depending on EDID in the audio component get_eld() callback, and instead cache whether audio support is present from the prior modeset. Signed-off-by: Ben Skeggs --- drivers/gpu/drm/nouveau/dispnv50/disp.c | 4 +++- drivers/gpu/drm/nouveau/nouveau_encoder.h | 1 + 2 files changed, 4 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/nouveau/dispnv50/disp.c b/drivers/gpu/drm/nouveau/dispnv50/disp.c index 7622490d8602..e8ac510f8298 100644 --- a/drivers/gpu/drm/nouveau/dispnv50/disp.c +++ b/drivers/gpu/drm/nouveau/dispnv50/disp.c @@ -510,7 +510,7 @@ nv50_audio_component_get_eld(struct device *kdev, int port, int dev_id, if (!nv_connector || !nv_crtc || nv_encoder->or != port || nv_crtc->index != dev_id) continue; - *enabled = drm_detect_monitor_audio(nv_connector->edid); + *enabled = nv_encoder->audio; if (*enabled) { ret = drm_eld_size(nv_connector->base.eld); memcpy(buf, nv_connector->base.eld, @@ -600,6 +600,7 @@ nv50_audio_disable(struct drm_encoder *encoder, struct nouveau_crtc *nv_crtc) (0x0100 << nv_crtc->index), }; + nv_encoder->audio = false; nvif_mthd(&disp->disp->object, 0, &args, sizeof(args)); nv50_audio_component_eld_notify(drm->audio.component, nv_encoder->or, @@ -636,6 +637,7 @@ nv50_audio_enable(struct drm_encoder *encoder, struct drm_display_mode *mode) nvif_mthd(&disp->disp->object, 0, &args, sizeof(args.base) + drm_eld_size(args.data)); + nv_encoder->audio = true; nv50_audio_component_eld_notify(drm->audio.component, nv_encoder->or, nv_crtc->index); diff --git a/drivers/gpu/drm/nouveau/nouveau_encoder.h b/drivers/gpu/drm/nouveau/nouveau_encoder.h index de51733b0476..a72c412ac8b1 100644 --- a/drivers/gpu/drm/nouveau/nouveau_encoder.h +++ b/drivers/gpu/drm/nouveau/nouveau_encoder.h @@ -52,6 +52,7 @@ struct nouveau_encoder { * actually programmed on the hw, not the proposed crtc */ struct drm_crtc *crtc; u32 ctrl; + bool audio; struct drm_display_mode mode; int last_dpms; -- cgit From dd873dd51d8d7868887048545d48befdd6bea7ad Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Tue, 26 May 2020 10:07:52 +0100 Subject: drm/i915: Reorder await_execution before await_request Reorder the code so that we can reuse the await_execution from a special case in await_request in the next patch. Signed-off-by: Chris Wilson Reviewed-by: Tvrtko Ursulin Link: https://patchwork.freedesktop.org/patch/msgid/20200526090753.11329-1-chris@chris-wilson.co.uk (cherry picked from commit ffb0c600c240103f6f34e07892a7e0a75502b243) Signed-off-by: Joonas Lahtinen --- drivers/gpu/drm/i915/i915_request.c | 264 ++++++++++++++++++------------------ 1 file changed, 132 insertions(+), 132 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_request.c b/drivers/gpu/drm/i915/i915_request.c index c282719ad3ac..33bbad623e02 100644 --- a/drivers/gpu/drm/i915/i915_request.c +++ b/drivers/gpu/drm/i915/i915_request.c @@ -1053,37 +1053,91 @@ await_fence: I915_FENCE_GFP); } +static bool intel_timeline_sync_has_start(struct intel_timeline *tl, + struct dma_fence *fence) +{ + return __intel_timeline_sync_is_later(tl, + fence->context, + fence->seqno - 1); +} + +static int intel_timeline_sync_set_start(struct intel_timeline *tl, + const struct dma_fence *fence) +{ + return __intel_timeline_sync_set(tl, fence->context, fence->seqno - 1); +} + static int -i915_request_await_request(struct i915_request *to, struct i915_request *from) +__i915_request_await_execution(struct i915_request *to, + struct i915_request *from, + void (*hook)(struct i915_request *rq, + struct dma_fence *signal)) { - int ret; + int err; - GEM_BUG_ON(to == from); - GEM_BUG_ON(to->timeline == from->timeline); + GEM_BUG_ON(intel_context_is_barrier(from->context)); - if (i915_request_completed(from)) { - i915_sw_fence_set_error_once(&to->submit, from->fence.error); + /* Submit both requests at the same time */ + err = __await_execution(to, from, hook, I915_FENCE_GFP); + if (err) + return err; + + /* Squash repeated depenendices to the same timelines */ + if (intel_timeline_sync_has_start(i915_request_timeline(to), + &from->fence)) return 0; + + /* + * Wait until the start of this request. + * + * The execution cb fires when we submit the request to HW. But in + * many cases this may be long before the request itself is ready to + * run (consider that we submit 2 requests for the same context, where + * the request of interest is behind an indefinite spinner). So we hook + * up to both to reduce our queues and keep the execution lag minimised + * in the worst case, though we hope that the await_start is elided. + */ + err = i915_request_await_start(to, from); + if (err < 0) + return err; + + /* + * Ensure both start together [after all semaphores in signal] + * + * Now that we are queued to the HW at roughly the same time (thanks + * to the execute cb) and are ready to run at roughly the same time + * (thanks to the await start), our signaler may still be indefinitely + * delayed by waiting on a semaphore from a remote engine. If our + * signaler depends on a semaphore, so indirectly do we, and we do not + * want to start our payload until our signaler also starts theirs. + * So we wait. + * + * However, there is also a second condition for which we need to wait + * for the precise start of the signaler. Consider that the signaler + * was submitted in a chain of requests following another context + * (with just an ordinary intra-engine fence dependency between the + * two). In this case the signaler is queued to HW, but not for + * immediate execution, and so we must wait until it reaches the + * active slot. + */ + if (intel_engine_has_semaphores(to->engine) && + !i915_request_has_initial_breadcrumb(to)) { + err = __emit_semaphore_wait(to, from, from->fence.seqno - 1); + if (err < 0) + return err; } + /* Couple the dependency tree for PI on this exposed to->fence */ if (to->engine->schedule) { - ret = i915_sched_node_add_dependency(&to->sched, + err = i915_sched_node_add_dependency(&to->sched, &from->sched, - I915_DEPENDENCY_EXTERNAL); - if (ret < 0) - return ret; + I915_DEPENDENCY_WEAK); + if (err < 0) + return err; } - if (to->engine == from->engine) - ret = i915_sw_fence_await_sw_fence_gfp(&to->submit, - &from->submit, - I915_FENCE_GFP); - else - ret = emit_semaphore_wait(to, from, I915_FENCE_GFP); - if (ret < 0) - return ret; - - return 0; + return intel_timeline_sync_set_start(i915_request_timeline(to), + &from->fence); } static void mark_external(struct i915_request *rq) @@ -1136,23 +1190,20 @@ i915_request_await_external(struct i915_request *rq, struct dma_fence *fence) } int -i915_request_await_dma_fence(struct i915_request *rq, struct dma_fence *fence) +i915_request_await_execution(struct i915_request *rq, + struct dma_fence *fence, + void (*hook)(struct i915_request *rq, + struct dma_fence *signal)) { struct dma_fence **child = &fence; unsigned int nchild = 1; int ret; - /* - * Note that if the fence-array was created in signal-on-any mode, - * we should *not* decompose it into its individual fences. However, - * we don't currently store which mode the fence-array is operating - * in. Fortunately, the only user of signal-on-any is private to - * amdgpu and we should not see any incoming fence-array from - * sync-file being in signal-on-any mode. - */ if (dma_fence_is_array(fence)) { struct dma_fence_array *array = to_dma_fence_array(fence); + /* XXX Error for signal-on-any fence arrays */ + child = array->fences; nchild = array->num_fences; GEM_BUG_ON(!nchild); @@ -1165,138 +1216,78 @@ i915_request_await_dma_fence(struct i915_request *rq, struct dma_fence *fence) continue; } - /* - * Requests on the same timeline are explicitly ordered, along - * with their dependencies, by i915_request_add() which ensures - * that requests are submitted in-order through each ring. - */ if (fence->context == rq->fence.context) continue; - /* Squash repeated waits to the same timelines */ - if (fence->context && - intel_timeline_sync_is_later(i915_request_timeline(rq), - fence)) - continue; + /* + * We don't squash repeated fence dependencies here as we + * want to run our callback in all cases. + */ if (dma_fence_is_i915(fence)) - ret = i915_request_await_request(rq, to_request(fence)); + ret = __i915_request_await_execution(rq, + to_request(fence), + hook); else ret = i915_request_await_external(rq, fence); if (ret < 0) return ret; - - /* Record the latest fence used against each timeline */ - if (fence->context) - intel_timeline_sync_set(i915_request_timeline(rq), - fence); } while (--nchild); return 0; } -static bool intel_timeline_sync_has_start(struct intel_timeline *tl, - struct dma_fence *fence) -{ - return __intel_timeline_sync_is_later(tl, - fence->context, - fence->seqno - 1); -} - -static int intel_timeline_sync_set_start(struct intel_timeline *tl, - const struct dma_fence *fence) -{ - return __intel_timeline_sync_set(tl, fence->context, fence->seqno - 1); -} - static int -__i915_request_await_execution(struct i915_request *to, - struct i915_request *from, - void (*hook)(struct i915_request *rq, - struct dma_fence *signal)) +i915_request_await_request(struct i915_request *to, struct i915_request *from) { - int err; - - GEM_BUG_ON(intel_context_is_barrier(from->context)); + int ret; - /* Submit both requests at the same time */ - err = __await_execution(to, from, hook, I915_FENCE_GFP); - if (err) - return err; + GEM_BUG_ON(to == from); + GEM_BUG_ON(to->timeline == from->timeline); - /* Squash repeated depenendices to the same timelines */ - if (intel_timeline_sync_has_start(i915_request_timeline(to), - &from->fence)) + if (i915_request_completed(from)) { + i915_sw_fence_set_error_once(&to->submit, from->fence.error); return 0; - - /* - * Wait until the start of this request. - * - * The execution cb fires when we submit the request to HW. But in - * many cases this may be long before the request itself is ready to - * run (consider that we submit 2 requests for the same context, where - * the request of interest is behind an indefinite spinner). So we hook - * up to both to reduce our queues and keep the execution lag minimised - * in the worst case, though we hope that the await_start is elided. - */ - err = i915_request_await_start(to, from); - if (err < 0) - return err; - - /* - * Ensure both start together [after all semaphores in signal] - * - * Now that we are queued to the HW at roughly the same time (thanks - * to the execute cb) and are ready to run at roughly the same time - * (thanks to the await start), our signaler may still be indefinitely - * delayed by waiting on a semaphore from a remote engine. If our - * signaler depends on a semaphore, so indirectly do we, and we do not - * want to start our payload until our signaler also starts theirs. - * So we wait. - * - * However, there is also a second condition for which we need to wait - * for the precise start of the signaler. Consider that the signaler - * was submitted in a chain of requests following another context - * (with just an ordinary intra-engine fence dependency between the - * two). In this case the signaler is queued to HW, but not for - * immediate execution, and so we must wait until it reaches the - * active slot. - */ - if (intel_engine_has_semaphores(to->engine) && - !i915_request_has_initial_breadcrumb(to)) { - err = __emit_semaphore_wait(to, from, from->fence.seqno - 1); - if (err < 0) - return err; } - /* Couple the dependency tree for PI on this exposed to->fence */ if (to->engine->schedule) { - err = i915_sched_node_add_dependency(&to->sched, + ret = i915_sched_node_add_dependency(&to->sched, &from->sched, - I915_DEPENDENCY_WEAK); - if (err < 0) - return err; + I915_DEPENDENCY_EXTERNAL); + if (ret < 0) + return ret; } - return intel_timeline_sync_set_start(i915_request_timeline(to), - &from->fence); + if (to->engine == READ_ONCE(from->engine)) + ret = i915_sw_fence_await_sw_fence_gfp(&to->submit, + &from->submit, + I915_FENCE_GFP); + else + ret = emit_semaphore_wait(to, from, I915_FENCE_GFP); + if (ret < 0) + return ret; + + return 0; } int -i915_request_await_execution(struct i915_request *rq, - struct dma_fence *fence, - void (*hook)(struct i915_request *rq, - struct dma_fence *signal)) +i915_request_await_dma_fence(struct i915_request *rq, struct dma_fence *fence) { struct dma_fence **child = &fence; unsigned int nchild = 1; int ret; + /* + * Note that if the fence-array was created in signal-on-any mode, + * we should *not* decompose it into its individual fences. However, + * we don't currently store which mode the fence-array is operating + * in. Fortunately, the only user of signal-on-any is private to + * amdgpu and we should not see any incoming fence-array from + * sync-file being in signal-on-any mode. + */ if (dma_fence_is_array(fence)) { struct dma_fence_array *array = to_dma_fence_array(fence); - /* XXX Error for signal-on-any fence arrays */ - child = array->fences; nchild = array->num_fences; GEM_BUG_ON(!nchild); @@ -1309,22 +1300,31 @@ i915_request_await_execution(struct i915_request *rq, continue; } + /* + * Requests on the same timeline are explicitly ordered, along + * with their dependencies, by i915_request_add() which ensures + * that requests are submitted in-order through each ring. + */ if (fence->context == rq->fence.context) continue; - /* - * We don't squash repeated fence dependencies here as we - * want to run our callback in all cases. - */ + /* Squash repeated waits to the same timelines */ + if (fence->context && + intel_timeline_sync_is_later(i915_request_timeline(rq), + fence)) + continue; if (dma_fence_is_i915(fence)) - ret = __i915_request_await_execution(rq, - to_request(fence), - hook); + ret = i915_request_await_request(rq, to_request(fence)); else ret = i915_request_await_external(rq, fence); if (ret < 0) return ret; + + /* Record the latest fence used against each timeline */ + if (fence->context) + intel_timeline_sync_set(i915_request_timeline(rq), + fence); } while (--nchild); return 0; -- cgit From 631a6582b75ffac82bee76a65217caa856fafb5e Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Tue, 26 May 2020 10:07:53 +0100 Subject: drm/i915/gt: Do not schedule normal requests immediately along virtual When we push a virtual request onto the HW, we update the rq->engine to point to the physical engine. A request that is then submitted by the user that waits upon the virtual engine, but along the physical engine in use, will then see that it is due to be submitted to the same engine and take a shortcut (and be queued without waiting for the completion fence). However, the virtual request may be preempted (either by higher priority users, or by timeslicing) and removed from the physical engine to be migrated over to one of its siblings. The dependent normal request however is oblivious to the removal of the virtual request and remains queued to execute on HW, believing that once it reaches the head of its queue all of its predecessors will have completed executing! v2: Beware restriction of signal->execution_mask prior to submission. Fixes: 6d06779e8672 ("drm/i915: Load balancing across a virtual engine") Testcase: igt/gem_exec_balancer/sliced Signed-off-by: Chris Wilson Cc: Tvrtko Ursulin Cc: # v5.3+ Reviewed-by: Tvrtko Ursulin Link: https://patchwork.freedesktop.org/patch/msgid/20200526090753.11329-2-chris@chris-wilson.co.uk (cherry picked from commit 511b6d9aed417739b6aa49d0b6b4354ad21020f1) Signed-off-by: Joonas Lahtinen --- drivers/gpu/drm/i915/i915_request.c | 25 +++++++++++++++++++++---- 1 file changed, 21 insertions(+), 4 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_request.c b/drivers/gpu/drm/i915/i915_request.c index 33bbad623e02..0b07ccc7e9bc 100644 --- a/drivers/gpu/drm/i915/i915_request.c +++ b/drivers/gpu/drm/i915/i915_request.c @@ -1237,6 +1237,25 @@ i915_request_await_execution(struct i915_request *rq, return 0; } +static int +await_request_submit(struct i915_request *to, struct i915_request *from) +{ + /* + * If we are waiting on a virtual engine, then it may be + * constrained to execute on a single engine *prior* to submission. + * When it is submitted, it will be first submitted to the virtual + * engine and then passed to the physical engine. We cannot allow + * the waiter to be submitted immediately to the physical engine + * as it may then bypass the virtual request. + */ + if (to->engine == READ_ONCE(from->engine)) + return i915_sw_fence_await_sw_fence_gfp(&to->submit, + &from->submit, + I915_FENCE_GFP); + else + return __i915_request_await_execution(to, from, NULL); +} + static int i915_request_await_request(struct i915_request *to, struct i915_request *from) { @@ -1258,10 +1277,8 @@ i915_request_await_request(struct i915_request *to, struct i915_request *from) return ret; } - if (to->engine == READ_ONCE(from->engine)) - ret = i915_sw_fence_await_sw_fence_gfp(&to->submit, - &from->submit, - I915_FENCE_GFP); + if (is_power_of_2(to->execution_mask | READ_ONCE(from->execution_mask))) + ret = await_request_submit(to, from); else ret = emit_semaphore_wait(to, from, I915_FENCE_GFP); if (ret < 0) -- cgit From 0e386959272e4adf192867681a65cc0aa580c247 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Fri, 29 May 2020 15:39:26 +0100 Subject: drm/i915: Check for awaits on still currently executing requests With the advent of preempt-to-busy, a request may still be on the GPU as we unwind. And in the case of a unpreemptible [due to HW] request, that request will remain indefinitely on the GPU even though we have returned it back to our submission queue, and cleared the active bit. We only run the execution callbacks on transferring the request from our submission queue to the execution queue, but if this is a bonded request that the HW is waiting for, we will not submit it (as we wait for a fresh execution) even though it is still being executed. As we know that there are always preemption points between requests, we know that only the currently executing request may be still active even though we have cleared the flag. However, we do not precisely know which request is in ELSP[0] due to a delay in processing events, and furthermore we only store the last request in a context in our state tracker. Fixes: 22b7a426bbe1 ("drm/i915/execlists: Preempt-to-busy") Testcase: igt/gem_exec_balancer/bonded-dual Signed-off-by: Chris Wilson Cc: Tvrtko Ursulin Reviewed-by: Tvrtko Ursulin Link: https://patchwork.freedesktop.org/patch/msgid/20200529143926.3245-1-chris@chris-wilson.co.uk (cherry picked from commit b55230e5e800868961fc271b26d9ce53ae1f691e) Signed-off-by: Joonas Lahtinen --- drivers/gpu/drm/i915/i915_request.c | 49 ++++++++++++++++++++++++++++++++++++- 1 file changed, 48 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/i915/i915_request.c b/drivers/gpu/drm/i915/i915_request.c index 0b07ccc7e9bc..def62100e666 100644 --- a/drivers/gpu/drm/i915/i915_request.c +++ b/drivers/gpu/drm/i915/i915_request.c @@ -357,6 +357,53 @@ void i915_request_retire_upto(struct i915_request *rq) } while (i915_request_retire(tmp) && tmp != rq); } +static struct i915_request * const * +__engine_active(struct intel_engine_cs *engine) +{ + return READ_ONCE(engine->execlists.active); +} + +static bool __request_in_flight(const struct i915_request *signal) +{ + struct i915_request * const *port, *rq; + bool inflight = false; + + if (!i915_request_is_ready(signal)) + return false; + + /* + * Even if we have unwound the request, it may still be on + * the GPU (preempt-to-busy). If that request is inside an + * unpreemptible critical section, it will not be removed. Some + * GPU functions may even be stuck waiting for the paired request + * (__await_execution) to be submitted and cannot be preempted + * until the bond is executing. + * + * As we know that there are always preemption points between + * requests, we know that only the currently executing request + * may be still active even though we have cleared the flag. + * However, we can't rely on our tracking of ELSP[0] to known + * which request is currently active and so maybe stuck, as + * the tracking maybe an event behind. Instead assume that + * if the context is still inflight, then it is still active + * even if the active flag has been cleared. + */ + if (!intel_context_inflight(signal->context)) + return false; + + rcu_read_lock(); + for (port = __engine_active(signal->engine); (rq = *port); port++) { + if (rq->context == signal->context) { + inflight = i915_seqno_passed(rq->fence.seqno, + signal->fence.seqno); + break; + } + } + rcu_read_unlock(); + + return inflight; +} + static int __await_execution(struct i915_request *rq, struct i915_request *signal, @@ -387,7 +434,7 @@ __await_execution(struct i915_request *rq, } spin_lock_irq(&signal->lock); - if (i915_request_is_active(signal)) { + if (i915_request_is_active(signal) || __request_in_flight(signal)) { if (hook) { hook(rq, &signal->fence); i915_request_put(signal); -- cgit From 882f38b7f6c406e7229e72ee4328b7f1d5938ae7 Mon Sep 17 00:00:00 2001 From: Ville Syrjälä Date: Wed, 27 May 2020 23:02:45 +0300 Subject: drm/i915: Fix global state use-after-frees with a refcount MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit While the current locking/serialization of the global state suffices for protecting the obj->state access and the actual hardware reprogramming, we do have a problem with accessing the old/new states during nonblocking commits. The state computation and swap will be protected by the crtc locks, but the commit_tails can finish out of order, thus also causing the atomic states to be cleaned up out of order. This would mean the commit that started first but finished last has had its new state freed as the no-longer-needed old state by the other commit. To fix this let's just refcount the states. obj->state amounts to one reference, and the intel_atomic_state holds extra references to both its new and old global obj states. Fixes: 0ef1905ecf2e ("drm/i915: Introduce better global state handling") Signed-off-by: Ville Syrjälä Link: https://patchwork.freedesktop.org/patch/msgid/20200527200245.13184-1-ville.syrjala@linux.intel.com Reviewed-by: Stanislav Lisovskiy (cherry picked from commit f8c86ffa2800adc80adc679c84c45e0c6b027374) Signed-off-by: Joonas Lahtinen --- drivers/gpu/drm/i915/display/intel_global_state.c | 45 ++++++++++++++++++++--- drivers/gpu/drm/i915/display/intel_global_state.h | 3 ++ 2 files changed, 42 insertions(+), 6 deletions(-) diff --git a/drivers/gpu/drm/i915/display/intel_global_state.c b/drivers/gpu/drm/i915/display/intel_global_state.c index 212d4ee68205..7a19215ad844 100644 --- a/drivers/gpu/drm/i915/display/intel_global_state.c +++ b/drivers/gpu/drm/i915/display/intel_global_state.c @@ -10,6 +10,28 @@ #include "intel_display_types.h" #include "intel_global_state.h" +static void __intel_atomic_global_state_free(struct kref *kref) +{ + struct intel_global_state *obj_state = + container_of(kref, struct intel_global_state, ref); + struct intel_global_obj *obj = obj_state->obj; + + obj->funcs->atomic_destroy_state(obj, obj_state); +} + +static void intel_atomic_global_state_put(struct intel_global_state *obj_state) +{ + kref_put(&obj_state->ref, __intel_atomic_global_state_free); +} + +static struct intel_global_state * +intel_atomic_global_state_get(struct intel_global_state *obj_state) +{ + kref_get(&obj_state->ref); + + return obj_state; +} + void intel_atomic_global_obj_init(struct drm_i915_private *dev_priv, struct intel_global_obj *obj, struct intel_global_state *state, @@ -17,6 +39,10 @@ void intel_atomic_global_obj_init(struct drm_i915_private *dev_priv, { memset(obj, 0, sizeof(*obj)); + state->obj = obj; + + kref_init(&state->ref); + obj->state = state; obj->funcs = funcs; list_add_tail(&obj->head, &dev_priv->global_obj_list); @@ -28,7 +54,9 @@ void intel_atomic_global_obj_cleanup(struct drm_i915_private *dev_priv) list_for_each_entry_safe(obj, next, &dev_priv->global_obj_list, head) { list_del(&obj->head); - obj->funcs->atomic_destroy_state(obj, obj->state); + + drm_WARN_ON(&dev_priv->drm, kref_read(&obj->state->ref) != 1); + intel_atomic_global_state_put(obj->state); } } @@ -97,10 +125,14 @@ intel_atomic_get_global_obj_state(struct intel_atomic_state *state, if (!obj_state) return ERR_PTR(-ENOMEM); + obj_state->obj = obj; obj_state->changed = false; + kref_init(&obj_state->ref); + state->global_objs[index].state = obj_state; - state->global_objs[index].old_state = obj->state; + state->global_objs[index].old_state = + intel_atomic_global_state_get(obj->state); state->global_objs[index].new_state = obj_state; state->global_objs[index].ptr = obj; obj_state->state = state; @@ -163,7 +195,9 @@ void intel_atomic_swap_global_state(struct intel_atomic_state *state) new_obj_state->state = NULL; state->global_objs[i].state = old_obj_state; - obj->state = new_obj_state; + + intel_atomic_global_state_put(obj->state); + obj->state = intel_atomic_global_state_get(new_obj_state); } } @@ -172,10 +206,9 @@ void intel_atomic_clear_global_state(struct intel_atomic_state *state) int i; for (i = 0; i < state->num_global_objs; i++) { - struct intel_global_obj *obj = state->global_objs[i].ptr; + intel_atomic_global_state_put(state->global_objs[i].old_state); + intel_atomic_global_state_put(state->global_objs[i].new_state); - obj->funcs->atomic_destroy_state(obj, - state->global_objs[i].state); state->global_objs[i].ptr = NULL; state->global_objs[i].state = NULL; state->global_objs[i].old_state = NULL; diff --git a/drivers/gpu/drm/i915/display/intel_global_state.h b/drivers/gpu/drm/i915/display/intel_global_state.h index e6163a469029..1f16fa3073c9 100644 --- a/drivers/gpu/drm/i915/display/intel_global_state.h +++ b/drivers/gpu/drm/i915/display/intel_global_state.h @@ -6,6 +6,7 @@ #ifndef __INTEL_GLOBAL_STATE_H__ #define __INTEL_GLOBAL_STATE_H__ +#include #include struct drm_i915_private; @@ -54,7 +55,9 @@ struct intel_global_obj { for_each_if(obj) struct intel_global_state { + struct intel_global_obj *obj; struct intel_atomic_state *state; + struct kref ref; bool changed; }; -- cgit From 273500ae71711c040d258a7b3f4b6f44c368fff2 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Mon, 1 Jun 2020 17:19:42 +0100 Subject: drm/i915: Whitelist context-local timestamp in the gen9 cmdparser Allow batch buffers to read their own _local_ cumulative HW runtime of their logical context. Fixes: 0f2f39758341 ("drm/i915: Add gen9 BCS cmdparsing") Signed-off-by: Chris Wilson Cc: Mika Kuoppala Cc: # v5.4+ Reviewed-by: Mika Kuoppala Link: https://patchwork.freedesktop.org/patch/msgid/20200601161942.30854-1-chris@chris-wilson.co.uk (cherry picked from commit f9496520df11de00fbafc3cbd693b9570d600ab3) Signed-off-by: Joonas Lahtinen --- drivers/gpu/drm/i915/i915_cmd_parser.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/gpu/drm/i915/i915_cmd_parser.c b/drivers/gpu/drm/i915/i915_cmd_parser.c index 189b573d02be..372354d33f55 100644 --- a/drivers/gpu/drm/i915/i915_cmd_parser.c +++ b/drivers/gpu/drm/i915/i915_cmd_parser.c @@ -572,6 +572,9 @@ struct drm_i915_reg_descriptor { #define REG32(_reg, ...) \ { .addr = (_reg), __VA_ARGS__ } +#define REG32_IDX(_reg, idx) \ + { .addr = _reg(idx) } + /* * Convenience macro for adding 64-bit registers. * @@ -669,6 +672,7 @@ static const struct drm_i915_reg_descriptor gen9_blt_regs[] = { REG64_IDX(RING_TIMESTAMP, BSD_RING_BASE), REG32(BCS_SWCTRL), REG64_IDX(RING_TIMESTAMP, BLT_RING_BASE), + REG32_IDX(RING_CTX_TIMESTAMP, BLT_RING_BASE), REG64_IDX(BCS_GPR, 0), REG64_IDX(BCS_GPR, 1), REG64_IDX(BCS_GPR, 2), -- cgit From ea2b383ded0f84f310fe330eb43d758f2807e728 Mon Sep 17 00:00:00 2001 From: Jani Nikula Date: Tue, 2 Jun 2020 00:55:08 +0300 Subject: drm/i915/params: don't expose inject_probe_failure in debugfs MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The parameter only makes sense as a module parameter only. Fixes: c43c5a8818d4 ("drm/i915/params: add i915 parameters to debugfs") Cc: Juha-Pekka Heikkilä Cc: Venkata Sandeep Dhanalakota Reviewed-by: Juha-Pekka Heikkila Signed-off-by: Jani Nikula Link: https://patchwork.freedesktop.org/patch/msgid/20200601215510.18379-1-jani.nikula@intel.com (cherry picked from commit dbf4081ffb68c0d9b518a34c715a8d8681658411) Signed-off-by: Joonas Lahtinen --- drivers/gpu/drm/i915/i915_params.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/i915/i915_params.h b/drivers/gpu/drm/i915/i915_params.h index 45323732f099..4f21bfffbf0e 100644 --- a/drivers/gpu/drm/i915/i915_params.h +++ b/drivers/gpu/drm/i915/i915_params.h @@ -64,7 +64,7 @@ struct drm_printer; param(int, mmio_debug, -IS_ENABLED(CONFIG_DRM_I915_DEBUG_MMIO), 0600) \ param(int, edp_vswing, 0, 0400) \ param(unsigned int, reset, 3, 0600) \ - param(unsigned int, inject_probe_failure, 0, 0600) \ + param(unsigned int, inject_probe_failure, 0, 0) \ param(int, fastboot, -1, 0600) \ param(int, enable_dpcd_backlight, -1, 0600) \ param(char *, force_probe, CONFIG_DRM_I915_FORCE_PROBE, 0400) \ -- cgit From 62b6e899a0ad4863b2e799f3c10a77116e3cfadc Mon Sep 17 00:00:00 2001 From: Jani Nikula Date: Tue, 2 Jun 2020 00:55:09 +0300 Subject: drm/i915/params: fix i915.fake_lmem_start module param sysfs permissions fake_lmem_start does not need to be mutable via module param sysfs. It's only used during driver probe. Fixes: 1629224324b6 ("drm/i915/lmem: add the fake lmem region") Cc: Matthew Auld Cc: Joonas Lahtinen Cc: Chris Wilson Reviewed-by: Rodrigo Vivi Signed-off-by: Jani Nikula Link: https://patchwork.freedesktop.org/patch/msgid/20200601215510.18379-2-jani.nikula@intel.com (cherry picked from commit f322e851f20e534cf5305332a9ad5eefadb55d56) Signed-off-by: Joonas Lahtinen --- drivers/gpu/drm/i915/i915_params.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/i915/i915_params.c b/drivers/gpu/drm/i915/i915_params.c index add00ec1f787..a3dde770226d 100644 --- a/drivers/gpu/drm/i915/i915_params.c +++ b/drivers/gpu/drm/i915/i915_params.c @@ -173,7 +173,7 @@ i915_param_named(enable_gvt, bool, 0400, #endif #if IS_ENABLED(CONFIG_DRM_I915_UNSTABLE_FAKE_LMEM) -i915_param_named_unsafe(fake_lmem_start, ulong, 0600, +i915_param_named_unsafe(fake_lmem_start, ulong, 0400, "Fake LMEM start offset (default: 0)"); #endif -- cgit From b7f839d292948142eaab77cedd031aad0bfec872 Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Tue, 2 Jun 2020 17:22:48 -0400 Subject: drm/amdgpu/display: use blanked rather than plane state for sync groups We may end up with no planes set yet, depending on the ordering, but we should have the proper blanking state which is either handled by either DPG or TG depending on the hardware generation. Check both to determine the proper blanked state. Bug: https://gitlab.freedesktop.org/drm/amd/issues/781 Fixes: 5fc0cbfad45648 ("drm/amd/display: determine if a pipe is synced by plane state") Cc: nicholas.kazlauskas@amd.com Reviewed-by: Nicholas Kazlauskas Signed-off-by: Alex Deucher Cc: stable@vger.kernel.org --- drivers/gpu/drm/amd/display/dc/core/dc.c | 24 ++++++++++++++++++++---- 1 file changed, 20 insertions(+), 4 deletions(-) diff --git a/drivers/gpu/drm/amd/display/dc/core/dc.c b/drivers/gpu/drm/amd/display/dc/core/dc.c index 45cfb7c45566..b4e2053bca9f 100644 --- a/drivers/gpu/drm/amd/display/dc/core/dc.c +++ b/drivers/gpu/drm/amd/display/dc/core/dc.c @@ -1016,9 +1016,17 @@ static void program_timing_sync( } } - /* set first pipe with plane as master */ + /* set first unblanked pipe as master */ for (j = 0; j < group_size; j++) { - if (pipe_set[j]->plane_state) { + bool is_blanked; + + if (pipe_set[j]->stream_res.opp->funcs->dpg_is_blanked) + is_blanked = + pipe_set[j]->stream_res.opp->funcs->dpg_is_blanked(pipe_set[j]->stream_res.opp); + else + is_blanked = + pipe_set[j]->stream_res.tg->funcs->is_blanked(pipe_set[j]->stream_res.tg); + if (!is_blanked) { if (j == 0) break; @@ -1039,9 +1047,17 @@ static void program_timing_sync( status->timing_sync_info.master = false; } - /* remove any other pipes with plane as they have already been synced */ + /* remove any other unblanked pipes as they have already been synced */ for (j = j + 1; j < group_size; j++) { - if (pipe_set[j]->plane_state) { + bool is_blanked; + + if (pipe_set[j]->stream_res.opp->funcs->dpg_is_blanked) + is_blanked = + pipe_set[j]->stream_res.opp->funcs->dpg_is_blanked(pipe_set[j]->stream_res.opp); + else + is_blanked = + pipe_set[j]->stream_res.tg->funcs->is_blanked(pipe_set[j]->stream_res.tg); + if (!is_blanked) { group_size--; pipe_set[j] = pipe_set[group_size]; j--; -- cgit From a24eaa5c51255b344d5a321f1eeb3205f2775498 Mon Sep 17 00:00:00 2001 From: Nicholas Kazlauskas Date: Tue, 2 Jun 2020 20:42:33 -0400 Subject: drm/amd/display: Revalidate bandwidth before commiting DC updates [Why] Whenever we switch between tiled formats without also switching pixel formats or doing anything else that recreates the DC plane state we can run into underflow or hangs since we're not updating the DML parameters before committing to the hardware. [How] If the update type is FULL then call validate_bandwidth again to update the DML parmeters before committing the state. This is basically just a workaround and protective measure against update types being added DC where we could run into this issue in the future. We can only fully validate the state in advance before applying it to the hardware if we recreate all the plane and stream states since we can't modify what's currently in use. The next step is to update DM to ensure that we're creating the plane and stream states for whatever could potentially be a full update in DC to pre-emptively recreate the state for DC global validation. The workaround can stay until this has been fixed in DM. Signed-off-by: Nicholas Kazlauskas Reviewed-by: Hersen Wu Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/dc/core/dc.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/drivers/gpu/drm/amd/display/dc/core/dc.c b/drivers/gpu/drm/amd/display/dc/core/dc.c index b4e2053bca9f..6f93a6ca4cf0 100644 --- a/drivers/gpu/drm/amd/display/dc/core/dc.c +++ b/drivers/gpu/drm/amd/display/dc/core/dc.c @@ -2538,6 +2538,12 @@ void dc_commit_updates_for_stream(struct dc *dc, copy_stream_update_to_stream(dc, context, stream, stream_update); + if (!dc->res_pool->funcs->validate_bandwidth(dc, context, false)) { + DC_ERROR("Mode validation failed for stream update!\n"); + dc_release_state(context); + return; + } + commit_planes_for_stream( dc, srf_updates, -- cgit From 6f8dbcf1c9cec3ec5efcf4c17b29a5b1732d1491 Mon Sep 17 00:00:00 2001 From: Ben Skeggs Date: Wed, 3 Jun 2020 11:37:56 +1000 Subject: drm/nouveau/disp: provide hint to OR allocation about HDA requirements Will be used by a subsequent commit to influence SOR allocation policy. Signed-off-by: Ben Skeggs --- drivers/gpu/drm/nouveau/dispnv50/disp.c | 17 ++++++++++++----- drivers/gpu/drm/nouveau/include/nvif/cl5070.h | 3 ++- drivers/gpu/drm/nouveau/nvkm/engine/disp/outp.c | 2 +- drivers/gpu/drm/nouveau/nvkm/engine/disp/outp.h | 2 +- drivers/gpu/drm/nouveau/nvkm/engine/disp/rootnv50.c | 4 ++-- 5 files changed, 18 insertions(+), 10 deletions(-) diff --git a/drivers/gpu/drm/nouveau/dispnv50/disp.c b/drivers/gpu/drm/nouveau/dispnv50/disp.c index e8ac510f8298..d472942102f5 100644 --- a/drivers/gpu/drm/nouveau/dispnv50/disp.c +++ b/drivers/gpu/drm/nouveau/dispnv50/disp.c @@ -277,7 +277,7 @@ nv50_outp_release(struct nouveau_encoder *nv_encoder) } static int -nv50_outp_acquire(struct nouveau_encoder *nv_encoder) +nv50_outp_acquire(struct nouveau_encoder *nv_encoder, bool hda) { struct nouveau_drm *drm = nouveau_drm(nv_encoder->base.base.dev); struct nv50_disp *disp = nv50_disp(drm->dev); @@ -289,6 +289,7 @@ nv50_outp_acquire(struct nouveau_encoder *nv_encoder) .base.method = NV50_DISP_MTHD_V1_ACQUIRE, .base.hasht = nv_encoder->dcb->hasht, .base.hashm = nv_encoder->dcb->hashm, + .info.hda = hda, }; int ret; @@ -393,7 +394,7 @@ nv50_dac_enable(struct drm_encoder *encoder) struct nv50_head_atom *asyh = nv50_head_atom(nv_crtc->base.state); struct nv50_core *core = nv50_disp(encoder->dev)->core; - nv50_outp_acquire(nv_encoder); + nv50_outp_acquire(nv_encoder, false); core->func->dac->ctrl(core, nv_encoder->or, 1 << nv_crtc->index, asyh); asyh->or.depth = 0; @@ -968,7 +969,7 @@ nv50_msto_enable(struct drm_encoder *encoder) DRM_DEBUG_KMS("Failed to allocate VCPI\n"); if (!mstm->links++) - nv50_outp_acquire(mstm->outp); + nv50_outp_acquire(mstm->outp, false /*XXX: MST audio.*/); if (mstm->outp->link & 1) proto = 0x8; @@ -1562,12 +1563,18 @@ nv50_sor_enable(struct drm_encoder *encoder) struct nouveau_drm *drm = nouveau_drm(dev); struct nouveau_connector *nv_connector; struct nvbios *bios = &drm->vbios; + bool hda = false; u8 proto = 0xf; u8 depth = 0x0; nv_connector = nouveau_encoder_connector_get(nv_encoder); nv_encoder->crtc = encoder->crtc; - nv50_outp_acquire(nv_encoder); + + if ((disp->disp->object.oclass == GT214_DISP || + disp->disp->object.oclass >= GF110_DISP) && + drm_detect_monitor_audio(nv_connector->edid)) + hda = true; + nv50_outp_acquire(nv_encoder, hda); switch (nv_encoder->dcb->type) { case DCB_OUTPUT_TMDS: @@ -1777,7 +1784,7 @@ nv50_pior_enable(struct drm_encoder *encoder) u8 owner = 1 << nv_crtc->index; u8 proto; - nv50_outp_acquire(nv_encoder); + nv50_outp_acquire(nv_encoder, false); switch (asyh->or.bpc) { case 10: asyh->or.depth = 0x6; break; diff --git a/drivers/gpu/drm/nouveau/include/nvif/cl5070.h b/drivers/gpu/drm/nouveau/include/nvif/cl5070.h index 38bf4f38e869..53800fb46582 100644 --- a/drivers/gpu/drm/nouveau/include/nvif/cl5070.h +++ b/drivers/gpu/drm/nouveau/include/nvif/cl5070.h @@ -46,7 +46,8 @@ struct nv50_disp_acquire_v0 { __u8 version; __u8 or; __u8 link; - __u8 pad03[5]; + __u8 hda; + __u8 pad04[4]; }; struct nv50_disp_dac_load_v0 { diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/disp/outp.c b/drivers/gpu/drm/nouveau/nvkm/engine/disp/outp.c index c62030c96fba..1b1c6ff0e1bc 100644 --- a/drivers/gpu/drm/nouveau/nvkm/engine/disp/outp.c +++ b/drivers/gpu/drm/nouveau/nvkm/engine/disp/outp.c @@ -112,7 +112,7 @@ nvkm_outp_acquire_ior(struct nvkm_outp *outp, u8 user, struct nvkm_ior *ior) } int -nvkm_outp_acquire(struct nvkm_outp *outp, u8 user) +nvkm_outp_acquire(struct nvkm_outp *outp, u8 user, bool hda) { struct nvkm_ior *ior = outp->ior; enum nvkm_ior_proto proto; diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/disp/outp.h b/drivers/gpu/drm/nouveau/nvkm/engine/disp/outp.h index 721b068b87ef..ee028d30cfe7 100644 --- a/drivers/gpu/drm/nouveau/nvkm/engine/disp/outp.h +++ b/drivers/gpu/drm/nouveau/nvkm/engine/disp/outp.h @@ -32,7 +32,7 @@ int nvkm_outp_new(struct nvkm_disp *, int index, struct dcb_output *, void nvkm_outp_del(struct nvkm_outp **); void nvkm_outp_init(struct nvkm_outp *); void nvkm_outp_fini(struct nvkm_outp *); -int nvkm_outp_acquire(struct nvkm_outp *, u8 user); +int nvkm_outp_acquire(struct nvkm_outp *, u8 user, bool hda); void nvkm_outp_release(struct nvkm_outp *, u8 user); void nvkm_outp_route(struct nvkm_disp *); diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/disp/rootnv50.c b/drivers/gpu/drm/nouveau/nvkm/engine/disp/rootnv50.c index a7672ef17d3b..fb5de44e4b8d 100644 --- a/drivers/gpu/drm/nouveau/nvkm/engine/disp/rootnv50.c +++ b/drivers/gpu/drm/nouveau/nvkm/engine/disp/rootnv50.c @@ -99,7 +99,7 @@ nv50_disp_root_mthd_(struct nvkm_object *object, u32 mthd, void *data, u32 size) } *args = data; int ret = -ENOSYS; if (!(ret = nvif_unpack(ret, &data, &size, args->v0, 0, 0, false))) { - ret = nvkm_outp_acquire(outp, NVKM_OUTP_USER); + ret = nvkm_outp_acquire(outp, NVKM_OUTP_USER, args->v0.hda); if (ret == 0) { args->v0.or = outp->ior->id; args->v0.link = outp->ior->asy.link; @@ -119,7 +119,7 @@ nv50_disp_root_mthd_(struct nvkm_object *object, u32 mthd, void *data, u32 size) if (!(ret = nvif_unpack(ret, &data, &size, args->v0, 0, 0, false))) { if (args->v0.data & 0xfff00000) return -EINVAL; - ret = nvkm_outp_acquire(outp, NVKM_OUTP_PRIV); + ret = nvkm_outp_acquire(outp, NVKM_OUTP_PRIV, false); if (ret) return ret; ret = outp->ior->func->sense(outp->ior, args->v0.data); -- cgit From f24b6ae19fa257ea9b10a9389d0b7046b3efd97c Mon Sep 17 00:00:00 2001 From: Ben Skeggs Date: Wed, 3 Jun 2020 11:40:47 +1000 Subject: drm/nouveau/disp: split part of OR allocation logic into a function No logical changes here, this is just moving the code to make the changes in the next commit more obvious. Signed-off-by: Ben Skeggs --- drivers/gpu/drm/nouveau/nvkm/engine/disp/outp.c | 63 +++++++++++++++---------- 1 file changed, 37 insertions(+), 26 deletions(-) diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/disp/outp.c b/drivers/gpu/drm/nouveau/nvkm/engine/disp/outp.c index 1b1c6ff0e1bc..b56224558a05 100644 --- a/drivers/gpu/drm/nouveau/nvkm/engine/disp/outp.c +++ b/drivers/gpu/drm/nouveau/nvkm/engine/disp/outp.c @@ -111,37 +111,18 @@ nvkm_outp_acquire_ior(struct nvkm_outp *outp, u8 user, struct nvkm_ior *ior) return 0; } -int -nvkm_outp_acquire(struct nvkm_outp *outp, u8 user, bool hda) +static inline int +nvkm_outp_acquire_hda(struct nvkm_outp *outp, enum nvkm_ior_type type, + u8 user, bool hda) { - struct nvkm_ior *ior = outp->ior; - enum nvkm_ior_proto proto; - enum nvkm_ior_type type; - - OUTP_TRACE(outp, "acquire %02x |= %02x %p", outp->acquired, user, ior); - if (ior) { - outp->acquired |= user; - return 0; - } - - /* Lookup a compatible, and unused, OR to assign to the device. */ - proto = nvkm_outp_xlat(outp, &type); - if (proto == UNKNOWN) - return -ENOSYS; - - /* Deal with panels requiring identity-mapped SOR assignment. */ - if (outp->identity) { - ior = nvkm_ior_find(outp->disp, SOR, ffs(outp->info.or) - 1); - if (WARN_ON(!ior)) - return -ENOSPC; - return nvkm_outp_acquire_ior(outp, user, ior); - } + struct nvkm_ior *ior; /* First preference is to reuse the OR that is currently armed * on HW, if any, in order to prevent unnecessary switching. */ list_for_each_entry(ior, &outp->disp->ior, head) { - if (!ior->identity && !ior->asy.outp && ior->arm.outp == outp) + if (!ior->identity && + !ior->asy.outp && ior->arm.outp == outp) return nvkm_outp_acquire_ior(outp, user, ior); } @@ -157,7 +138,8 @@ nvkm_outp_acquire(struct nvkm_outp *outp, u8 user, bool hda) * but will be released during the next modeset. */ list_for_each_entry(ior, &outp->disp->ior, head) { - if (!ior->identity && !ior->asy.outp && ior->type == type && + if (!ior->identity && + !ior->asy.outp && ior->type == type && (ior->func->route.set || ior->id == __ffs(outp->info.or))) return nvkm_outp_acquire_ior(outp, user, ior); } @@ -165,6 +147,35 @@ nvkm_outp_acquire(struct nvkm_outp *outp, u8 user, bool hda) return -ENOSPC; } +int +nvkm_outp_acquire(struct nvkm_outp *outp, u8 user, bool hda) +{ + struct nvkm_ior *ior = outp->ior; + enum nvkm_ior_proto proto; + enum nvkm_ior_type type; + + OUTP_TRACE(outp, "acquire %02x |= %02x %p", outp->acquired, user, ior); + if (ior) { + outp->acquired |= user; + return 0; + } + + /* Lookup a compatible, and unused, OR to assign to the device. */ + proto = nvkm_outp_xlat(outp, &type); + if (proto == UNKNOWN) + return -ENOSYS; + + /* Deal with panels requiring identity-mapped SOR assignment. */ + if (outp->identity) { + ior = nvkm_ior_find(outp->disp, SOR, ffs(outp->info.or) - 1); + if (WARN_ON(!ior)) + return -ENOSPC; + return nvkm_outp_acquire_ior(outp, user, ior); + } + + return nvkm_outp_acquire_hda(outp, type, user, true); +} + void nvkm_outp_fini(struct nvkm_outp *outp) { -- cgit From e6867ffa34340636252efe8e6b82be625c43d9b1 Mon Sep 17 00:00:00 2001 From: Ben Skeggs Date: Wed, 3 Jun 2020 11:43:23 +1000 Subject: drm/nouveau/disp: modify OR allocation policy to account for HDA requirements Since GM200, SORs are no longer tied to a specific connector, and we allocate them instead, with the assumption that all SORs are equally capable. However, there's a 1<->1 mapping between SOR and HDA pin widget, and it turns out that it's possible for some widgets to be disabled... In order to avoid picking a SOR without a valid pin widget, some new rules need to be added. Signed-off-by: Ben Skeggs --- drivers/gpu/drm/nouveau/nvkm/engine/disp/outp.c | 26 +++++++++++++++++++++---- 1 file changed, 22 insertions(+), 4 deletions(-) diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/disp/outp.c b/drivers/gpu/drm/nouveau/nvkm/engine/disp/outp.c index b56224558a05..dcf08249374a 100644 --- a/drivers/gpu/drm/nouveau/nvkm/engine/disp/outp.c +++ b/drivers/gpu/drm/nouveau/nvkm/engine/disp/outp.c @@ -121,14 +121,14 @@ nvkm_outp_acquire_hda(struct nvkm_outp *outp, enum nvkm_ior_type type, * on HW, if any, in order to prevent unnecessary switching. */ list_for_each_entry(ior, &outp->disp->ior, head) { - if (!ior->identity && + if (!ior->identity && !!ior->func->hda.hpd == hda && !ior->asy.outp && ior->arm.outp == outp) return nvkm_outp_acquire_ior(outp, user, ior); } /* Failing that, a completely unused OR is the next best thing. */ list_for_each_entry(ior, &outp->disp->ior, head) { - if (!ior->identity && + if (!ior->identity && !!ior->func->hda.hpd == hda && !ior->asy.outp && ior->type == type && !ior->arm.outp && (ior->func->route.set || ior->id == __ffs(outp->info.or))) return nvkm_outp_acquire_ior(outp, user, ior); @@ -138,7 +138,7 @@ nvkm_outp_acquire_hda(struct nvkm_outp *outp, enum nvkm_ior_type type, * but will be released during the next modeset. */ list_for_each_entry(ior, &outp->disp->ior, head) { - if (!ior->identity && + if (!ior->identity && !!ior->func->hda.hpd == hda && !ior->asy.outp && ior->type == type && (ior->func->route.set || ior->id == __ffs(outp->info.or))) return nvkm_outp_acquire_ior(outp, user, ior); @@ -173,7 +173,25 @@ nvkm_outp_acquire(struct nvkm_outp *outp, u8 user, bool hda) return nvkm_outp_acquire_ior(outp, user, ior); } - return nvkm_outp_acquire_hda(outp, type, user, true); + /* If we don't need HDA, first try to acquire an OR that doesn't + * support it to leave free the ones that do. + */ + if (!hda) { + if (!nvkm_outp_acquire_hda(outp, type, user, false)) + return 0; + + /* Use a HDA-supporting SOR anyway. */ + return nvkm_outp_acquire_hda(outp, type, user, true); + } + + /* We want HDA, try to acquire an OR that supports it. */ + if (!nvkm_outp_acquire_hda(outp, type, user, true)) + return 0; + + /* There weren't any free ORs that support HDA, grab one that + * doesn't and at least allow display to work still. + */ + return nvkm_outp_acquire_hda(outp, type, user, false); } void -- cgit From 9f9f54e887adef3dc9b62f372002c421ef4f1921 Mon Sep 17 00:00:00 2001 From: Ben Skeggs Date: Thu, 4 Jun 2020 14:22:32 +1000 Subject: drm/nouveau/disp/gp100: split SOR implementation from gm200 GP100 needs different HDA detection. Signed-off-by: Ben Skeggs --- drivers/gpu/drm/nouveau/nvkm/engine/disp/Kbuild | 1 + drivers/gpu/drm/nouveau/nvkm/engine/disp/gp100.c | 2 +- drivers/gpu/drm/nouveau/nvkm/engine/disp/gp102.c | 2 +- drivers/gpu/drm/nouveau/nvkm/engine/disp/ior.h | 1 + .../gpu/drm/nouveau/nvkm/engine/disp/sorgp100.c | 59 ++++++++++++++++++++++ 5 files changed, 63 insertions(+), 2 deletions(-) create mode 100644 drivers/gpu/drm/nouveau/nvkm/engine/disp/sorgp100.c diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/disp/Kbuild b/drivers/gpu/drm/nouveau/nvkm/engine/disp/Kbuild index 571687ba85b8..cf075311cdd2 100644 --- a/drivers/gpu/drm/nouveau/nvkm/engine/disp/Kbuild +++ b/drivers/gpu/drm/nouveau/nvkm/engine/disp/Kbuild @@ -39,6 +39,7 @@ nvkm-y += nvkm/engine/disp/sorgf119.o nvkm-y += nvkm/engine/disp/sorgk104.o nvkm-y += nvkm/engine/disp/sorgm107.o nvkm-y += nvkm/engine/disp/sorgm200.o +nvkm-y += nvkm/engine/disp/sorgp100.o nvkm-y += nvkm/engine/disp/sorgv100.o nvkm-y += nvkm/engine/disp/sortu102.o diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/disp/gp100.c b/drivers/gpu/drm/nouveau/nvkm/engine/disp/gp100.c index fd6216684f6d..8471de3f3b61 100644 --- a/drivers/gpu/drm/nouveau/nvkm/engine/disp/gp100.c +++ b/drivers/gpu/drm/nouveau/nvkm/engine/disp/gp100.c @@ -36,7 +36,7 @@ gp100_disp = { .super = gf119_disp_super, .root = &gp100_disp_root_oclass, .head = { .cnt = gf119_head_cnt, .new = gf119_head_new }, - .sor = { .cnt = gf119_sor_cnt, .new = gm200_sor_new }, + .sor = { .cnt = gf119_sor_cnt, .new = gp100_sor_new }, }; int diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/disp/gp102.c b/drivers/gpu/drm/nouveau/nvkm/engine/disp/gp102.c index 3468ddec1270..a3779c5046ea 100644 --- a/drivers/gpu/drm/nouveau/nvkm/engine/disp/gp102.c +++ b/drivers/gpu/drm/nouveau/nvkm/engine/disp/gp102.c @@ -63,7 +63,7 @@ gp102_disp = { .super = gf119_disp_super, .root = &gp102_disp_root_oclass, .head = { .cnt = gf119_head_cnt, .new = gf119_head_new }, - .sor = { .cnt = gf119_sor_cnt, .new = gm200_sor_new }, + .sor = { .cnt = gf119_sor_cnt, .new = gp100_sor_new }, }; int diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/disp/ior.h b/drivers/gpu/drm/nouveau/nvkm/engine/disp/ior.h index c1d7a36e4d3c..1a200a9ba4e4 100644 --- a/drivers/gpu/drm/nouveau/nvkm/engine/disp/ior.h +++ b/drivers/gpu/drm/nouveau/nvkm/engine/disp/ior.h @@ -201,6 +201,7 @@ int gf119_sor_new(struct nvkm_disp *, int); int gk104_sor_new(struct nvkm_disp *, int); int gm107_sor_new(struct nvkm_disp *, int); int gm200_sor_new(struct nvkm_disp *, int); +int gp100_sor_new(struct nvkm_disp *, int); int gv100_sor_cnt(struct nvkm_disp *, unsigned long *); int gv100_sor_new(struct nvkm_disp *, int); diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/disp/sorgp100.c b/drivers/gpu/drm/nouveau/nvkm/engine/disp/sorgp100.c new file mode 100644 index 000000000000..94feb91b16d5 --- /dev/null +++ b/drivers/gpu/drm/nouveau/nvkm/engine/disp/sorgp100.c @@ -0,0 +1,59 @@ +/* + * Copyright 2020 Red Hat Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + */ +#include "ior.h" + +static const struct nvkm_ior_func +gp100_sor = { + .route = { + .get = gm200_sor_route_get, + .set = gm200_sor_route_set, + }, + .state = gf119_sor_state, + .power = nv50_sor_power, + .clock = gf119_sor_clock, + .hdmi = { + .ctrl = gk104_hdmi_ctrl, + .scdc = gm200_hdmi_scdc, + }, + .dp = { + .lanes = { 0, 1, 2, 3 }, + .links = gf119_sor_dp_links, + .power = g94_sor_dp_power, + .pattern = gm107_sor_dp_pattern, + .drive = gm200_sor_dp_drive, + .vcpi = gf119_sor_dp_vcpi, + .audio = gf119_sor_dp_audio, + .audio_sym = gf119_sor_dp_audio_sym, + .watermark = gf119_sor_dp_watermark, + }, + .hda = { + .hpd = gf119_hda_hpd, + .eld = gf119_hda_eld, + .device_entry = gf119_hda_device_entry, + }, +}; + +int +gp100_sor_new(struct nvkm_disp *disp, int id) +{ + return nvkm_ior_new_(&gp100_sor, disp, SOR, id); +} -- cgit From 9b5ca547bb8fc26c251d457ac55cc8e3d9baa6bf Mon Sep 17 00:00:00 2001 From: Ben Skeggs Date: Wed, 3 Jun 2020 12:51:03 +1000 Subject: drm/nouveau/disp/gm200-: detect and potentially disable HDA support on some SORs Some HDA pin widgets may be disabled by BIOS, and unavailable from a SOR. Our SOR allocation policy uses this information to allocate an appropriate SOR when HDA is supported by a display. Thank you to NVIDIA for providing the information to determine this. Signed-off-by: Ben Skeggs --- .../gpu/drm/nouveau/nvkm/engine/disp/sorgm200.c | 36 +++++++++++++++++++++- .../gpu/drm/nouveau/nvkm/engine/disp/sorgp100.c | 36 +++++++++++++++++++++- .../gpu/drm/nouveau/nvkm/engine/disp/sorgv100.c | 35 ++++++++++++++++++++- .../gpu/drm/nouveau/nvkm/engine/disp/sortu102.c | 32 ++++++++++++++++++- 4 files changed, 135 insertions(+), 4 deletions(-) diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/disp/sorgm200.c b/drivers/gpu/drm/nouveau/nvkm/engine/disp/sorgm200.c index cf2075db742a..4dd7f382968e 100644 --- a/drivers/gpu/drm/nouveau/nvkm/engine/disp/sorgm200.c +++ b/drivers/gpu/drm/nouveau/nvkm/engine/disp/sorgm200.c @@ -89,7 +89,7 @@ gm200_sor_route_get(struct nvkm_outp *outp, int *link) } static const struct nvkm_ior_func -gm200_sor = { +gm200_sor_hda = { .route = { .get = gm200_sor_route_get, .set = gm200_sor_route_set, @@ -119,8 +119,42 @@ gm200_sor = { }, }; +static const struct nvkm_ior_func +gm200_sor = { + .route = { + .get = gm200_sor_route_get, + .set = gm200_sor_route_set, + }, + .state = gf119_sor_state, + .power = nv50_sor_power, + .clock = gf119_sor_clock, + .hdmi = { + .ctrl = gk104_hdmi_ctrl, + .scdc = gm200_hdmi_scdc, + }, + .dp = { + .lanes = { 0, 1, 2, 3 }, + .links = gf119_sor_dp_links, + .power = g94_sor_dp_power, + .pattern = gm107_sor_dp_pattern, + .drive = gm200_sor_dp_drive, + .vcpi = gf119_sor_dp_vcpi, + .audio = gf119_sor_dp_audio, + .audio_sym = gf119_sor_dp_audio_sym, + .watermark = gf119_sor_dp_watermark, + }, +}; + int gm200_sor_new(struct nvkm_disp *disp, int id) { + struct nvkm_device *device = disp->engine.subdev.device; + u32 hda; + + if (!((hda = nvkm_rd32(device, 0x08a15c)) & 0x40000000)) + hda = nvkm_rd32(device, 0x101034); + + if (hda & BIT(id)) + return nvkm_ior_new_(&gm200_sor_hda, disp, SOR, id); return nvkm_ior_new_(&gm200_sor, disp, SOR, id); } diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/disp/sorgp100.c b/drivers/gpu/drm/nouveau/nvkm/engine/disp/sorgp100.c index 94feb91b16d5..c54f88317a07 100644 --- a/drivers/gpu/drm/nouveau/nvkm/engine/disp/sorgp100.c +++ b/drivers/gpu/drm/nouveau/nvkm/engine/disp/sorgp100.c @@ -22,7 +22,7 @@ #include "ior.h" static const struct nvkm_ior_func -gp100_sor = { +gp100_sor_hda = { .route = { .get = gm200_sor_route_get, .set = gm200_sor_route_set, @@ -52,8 +52,42 @@ gp100_sor = { }, }; +static const struct nvkm_ior_func +gp100_sor = { + .route = { + .get = gm200_sor_route_get, + .set = gm200_sor_route_set, + }, + .state = gf119_sor_state, + .power = nv50_sor_power, + .clock = gf119_sor_clock, + .hdmi = { + .ctrl = gk104_hdmi_ctrl, + .scdc = gm200_hdmi_scdc, + }, + .dp = { + .lanes = { 0, 1, 2, 3 }, + .links = gf119_sor_dp_links, + .power = g94_sor_dp_power, + .pattern = gm107_sor_dp_pattern, + .drive = gm200_sor_dp_drive, + .vcpi = gf119_sor_dp_vcpi, + .audio = gf119_sor_dp_audio, + .audio_sym = gf119_sor_dp_audio_sym, + .watermark = gf119_sor_dp_watermark, + }, +}; + int gp100_sor_new(struct nvkm_disp *disp, int id) { + struct nvkm_device *device = disp->engine.subdev.device; + u32 hda; + + if (!((hda = nvkm_rd32(device, 0x08a15c)) & 0x40000000)) + hda = nvkm_rd32(device, 0x10ebb0) >> 8; + + if (hda & BIT(id)) + return nvkm_ior_new_(&gp100_sor_hda, disp, SOR, id); return nvkm_ior_new_(&gp100_sor, disp, SOR, id); } diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/disp/sorgv100.c b/drivers/gpu/drm/nouveau/nvkm/engine/disp/sorgv100.c index d11a0dff10c6..4441187e8ec9 100644 --- a/drivers/gpu/drm/nouveau/nvkm/engine/disp/sorgv100.c +++ b/drivers/gpu/drm/nouveau/nvkm/engine/disp/sorgv100.c @@ -78,7 +78,7 @@ gv100_sor_state(struct nvkm_ior *sor, struct nvkm_ior_state *state) } static const struct nvkm_ior_func -gv100_sor = { +gv100_sor_hda = { .route = { .get = gm200_sor_route_get, .set = gm200_sor_route_set, @@ -107,9 +107,42 @@ gv100_sor = { }, }; +static const struct nvkm_ior_func +gv100_sor = { + .route = { + .get = gm200_sor_route_get, + .set = gm200_sor_route_set, + }, + .state = gv100_sor_state, + .power = nv50_sor_power, + .clock = gf119_sor_clock, + .hdmi = { + .ctrl = gv100_hdmi_ctrl, + .scdc = gm200_hdmi_scdc, + }, + .dp = { + .lanes = { 0, 1, 2, 3 }, + .links = gf119_sor_dp_links, + .power = g94_sor_dp_power, + .pattern = gm107_sor_dp_pattern, + .drive = gm200_sor_dp_drive, + .audio = gv100_sor_dp_audio, + .audio_sym = gv100_sor_dp_audio_sym, + .watermark = gv100_sor_dp_watermark, + }, +}; + int gv100_sor_new(struct nvkm_disp *disp, int id) { + struct nvkm_device *device = disp->engine.subdev.device; + u32 hda; + + if (!((hda = nvkm_rd32(device, 0x08a15c)) & 0x40000000)) + hda = nvkm_rd32(device, 0x118fb0) >> 8; + + if (hda & BIT(id)) + return nvkm_ior_new_(&gv100_sor_hda, disp, SOR, id); return nvkm_ior_new_(&gv100_sor, disp, SOR, id); } diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/disp/sortu102.c b/drivers/gpu/drm/nouveau/nvkm/engine/disp/sortu102.c index fa6d74251237..59865a934c4b 100644 --- a/drivers/gpu/drm/nouveau/nvkm/engine/disp/sortu102.c +++ b/drivers/gpu/drm/nouveau/nvkm/engine/disp/sortu102.c @@ -62,7 +62,7 @@ tu102_sor_dp_links(struct nvkm_ior *sor, struct nvkm_i2c_aux *aux) } static const struct nvkm_ior_func -tu102_sor = { +tu102_sor_hda = { .route = { .get = gm200_sor_route_get, .set = gm200_sor_route_set, @@ -92,8 +92,38 @@ tu102_sor = { }, }; +static const struct nvkm_ior_func +tu102_sor = { + .route = { + .get = gm200_sor_route_get, + .set = gm200_sor_route_set, + }, + .state = gv100_sor_state, + .power = nv50_sor_power, + .clock = gf119_sor_clock, + .hdmi = { + .ctrl = gv100_hdmi_ctrl, + .scdc = gm200_hdmi_scdc, + }, + .dp = { + .lanes = { 0, 1, 2, 3 }, + .links = tu102_sor_dp_links, + .power = g94_sor_dp_power, + .pattern = gm107_sor_dp_pattern, + .drive = gm200_sor_dp_drive, + .vcpi = tu102_sor_dp_vcpi, + .audio = gv100_sor_dp_audio, + .audio_sym = gv100_sor_dp_audio_sym, + .watermark = gv100_sor_dp_watermark, + }, +}; + int tu102_sor_new(struct nvkm_disp *disp, int id) { + struct nvkm_device *device = disp->engine.subdev.device; + u32 hda = nvkm_rd32(device, 0x08a15c); + if (hda & BIT(id)) + return nvkm_ior_new_(&tu102_sor_hda, disp, SOR, id); return nvkm_ior_new_(&tu102_sor, disp, SOR, id); } -- cgit From 21454fe697fde188ad6fb541f94b9838fa73ab38 Mon Sep 17 00:00:00 2001 From: Thierry Reding Date: Wed, 3 Jun 2020 16:20:02 +0200 Subject: drm/nouveau: gr/gk20a: Use firmware version 0 Tegra firmware doesn't actually use any version numbers and passing -1 causes the existing firmware binaries not to be found. Use version 0 to find the correct files. Fixes: ef16dc278ec2 ("drm/nouveau/gr/gf100-: select implementation based on available FW") Signed-off-by: Thierry Reding Reviewed-by: Emil Velikov Signed-off-by: Ben Skeggs --- drivers/gpu/drm/nouveau/nvkm/engine/gr/gk20a.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/gr/gk20a.c b/drivers/gpu/drm/nouveau/nvkm/engine/gr/gk20a.c index ec330d791d15..e56880f3e3bd 100644 --- a/drivers/gpu/drm/nouveau/nvkm/engine/gr/gk20a.c +++ b/drivers/gpu/drm/nouveau/nvkm/engine/gr/gk20a.c @@ -352,7 +352,7 @@ gk20a_gr_load(struct gf100_gr *gr, int ver, const struct gf100_gr_fwif *fwif) static const struct gf100_gr_fwif gk20a_gr_fwif[] = { - { -1, gk20a_gr_load, &gk20a_gr }, + { 0, gk20a_gr_load, &gk20a_gr }, {} }; -- cgit From dd67cab5db7e940dad66653a04d780d53bd380d5 Mon Sep 17 00:00:00 2001 From: Ben Skeggs Date: Thu, 4 Jun 2020 11:00:01 +1000 Subject: drm/nouveau/kms/nv50-: clear SW state of disabled windows harder The most innocuous result of not having done this is that we end up sending unnecessary methods when we next enable the window. However, interactions with the code handling skipping disables when an update immediately follows, and window ownership assignment, can lead to upsetting the display hardware on Volta and newer. Signed-off-by: Ben Skeggs --- drivers/gpu/drm/nouveau/dispnv50/wndw.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/nouveau/dispnv50/wndw.c b/drivers/gpu/drm/nouveau/dispnv50/wndw.c index e25ead56052c..99b9b681736d 100644 --- a/drivers/gpu/drm/nouveau/dispnv50/wndw.c +++ b/drivers/gpu/drm/nouveau/dispnv50/wndw.c @@ -192,6 +192,8 @@ nv50_wndw_atomic_check_release(struct nv50_wndw *wndw, wndw->func->release(wndw, asyw, asyh); asyw->ntfy.handle = 0; asyw->sema.handle = 0; + asyw->xlut.handle = 0; + memset(asyw->image.handle, 0x00, sizeof(asyw->image.handle)); } static int @@ -519,7 +521,8 @@ nv50_wndw_prepare_fb(struct drm_plane *plane, struct drm_plane_state *state) return PTR_ERR(ctxdma); } - asyw->image.handle[0] = ctxdma->object.handle; + if (asyw->visible) + asyw->image.handle[0] = ctxdma->object.handle; } asyw->state.fence = dma_resv_get_excl_rcu(nvbo->bo.base.resv); -- cgit