From e5e1fc47eac842daeccc2dd125437f43e0271032 Mon Sep 17 00:00:00 2001 From: Mika Kuoppala Date: Wed, 16 Nov 2016 17:20:31 +0200 Subject: drm/i915: Use request retirement as context progress As hangcheck score was removed, the active decay of score was removed also. This removed feature for hangcheck to detect if the gpu client was accidentally or maliciously causing intermittent hangs. Reinstate the scoring as a per context property, so that if one context starts to act unfavourably, ban it. v2: ban_period_secs as a gate to score check (Chris) v3: decay in proper spot. scores as tunables (Chris) Cc: Chris Wilson Signed-off-by: Mika Kuoppala Reviewed-by: Chris Wilson Signed-off-by: Mika Kuoppala --- drivers/gpu/drm/i915/i915_gem_request.c | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'drivers/gpu/drm/i915/i915_gem_request.c') diff --git a/drivers/gpu/drm/i915/i915_gem_request.c b/drivers/gpu/drm/i915/i915_gem_request.c index 27e8f257fb39..60e63956ea19 100644 --- a/drivers/gpu/drm/i915/i915_gem_request.c +++ b/drivers/gpu/drm/i915/i915_gem_request.c @@ -263,6 +263,10 @@ static void i915_gem_request_retire(struct drm_i915_gem_request *request) request->engine); } + /* Retirement decays the ban score as it is a sign of ctx progress */ + if (request->ctx->hang_stats.ban_score > 0) + request->ctx->hang_stats.ban_score--; + i915_gem_context_put(request->ctx); dma_fence_signal(&request->fence); -- cgit From bc1d53c64773e89175766e8c77cc96aa0763de4a Mon Sep 17 00:00:00 2001 From: Mika Kuoppala Date: Wed, 16 Nov 2016 17:20:34 +0200 Subject: drm/i915: Wipe hang stats as an embedded struct Bannable property, banned status, guilty and active counts are properties of i915_gem_context. Make them so. v2: rebase Cc: Chris Wilson Reviewed-by: Chris Wilson Signed-off-by: Mika Kuoppala Link: http://patchwork.freedesktop.org/patch/msgid/1479309634-28574-1-git-send-email-mika.kuoppala@intel.com --- drivers/gpu/drm/i915/i915_drv.h | 31 ++++++++++-------------------- drivers/gpu/drm/i915/i915_gem.c | 25 ++++++++++-------------- drivers/gpu/drm/i915/i915_gem_context.c | 12 +++++------- drivers/gpu/drm/i915/i915_gem_execbuffer.c | 4 +--- drivers/gpu/drm/i915/i915_gem_request.c | 4 ++-- drivers/gpu/drm/i915/i915_gpu_error.c | 2 +- 6 files changed, 29 insertions(+), 49 deletions(-) (limited to 'drivers/gpu/drm/i915/i915_gem_request.c') diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index 1f0b5c37f6de..c7d5f7a30fe8 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -913,25 +913,6 @@ enum i915_cache_level { I915_CACHE_WT, /* hsw:gt3e WriteThrough for scanouts */ }; -struct i915_ctx_hang_stats { - /* This context had batch pending when hang was declared */ - unsigned batch_pending; - - /* This context had batch active when hang was declared */ - unsigned batch_active; - - bool bannable:1; - - /* This context is banned to submit more work */ - bool banned:1; - -#define CONTEXT_SCORE_GUILTY 10 -#define CONTEXT_SCORE_BAN_THRESHOLD 40 - /* Accumulated score of hangs caused by this context */ - int ban_score; -}; - -/* This must match up with the value previously used for execbuf2.rsvd1. */ #define DEFAULT_CONTEXT_HANDLE 0 /** @@ -961,8 +942,6 @@ struct i915_gem_context { struct pid *pid; const char *name; - struct i915_ctx_hang_stats hang_stats; - unsigned long flags; #define CONTEXT_NO_ZEROMAP BIT(0) #define CONTEXT_NO_ERROR_CAPTURE BIT(1) @@ -991,6 +970,16 @@ struct i915_gem_context { u8 remap_slice; bool closed:1; + bool bannable:1; + bool banned:1; + + unsigned int guilty_count; /* guilty of a hang */ + unsigned int active_count; /* active during hang */ + +#define CONTEXT_SCORE_GUILTY 10 +#define CONTEXT_SCORE_BAN_THRESHOLD 40 + /* Accumulated score of hangs caused by this context */ + int ban_score; }; enum fb_op_origin { diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index a8118386a23b..b38d4f8d1875 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -2622,15 +2622,13 @@ err_unlock: static bool i915_context_is_banned(const struct i915_gem_context *ctx) { - const struct i915_ctx_hang_stats *hs = &ctx->hang_stats; - - if (hs->banned) + if (ctx->banned) return true; - if (!hs->bannable) + if (!ctx->bannable) return false; - if (hs->ban_score >= CONTEXT_SCORE_BAN_THRESHOLD) { + if (ctx->ban_score >= CONTEXT_SCORE_BAN_THRESHOLD) { DRM_DEBUG("context hanging too often, banning!\n"); return true; } @@ -2640,20 +2638,19 @@ static bool i915_context_is_banned(const struct i915_gem_context *ctx) static void i915_gem_context_mark_guilty(struct i915_gem_context *ctx) { - struct i915_ctx_hang_stats *hs = &ctx->hang_stats; - - hs->ban_score += CONTEXT_SCORE_GUILTY; + ctx->ban_score += CONTEXT_SCORE_GUILTY; - hs->banned = i915_context_is_banned(ctx); - hs->batch_active++; + ctx->banned = i915_context_is_banned(ctx); + ctx->guilty_count++; DRM_DEBUG_DRIVER("context %s marked guilty (score %d) banned? %s\n", - ctx->name, hs->ban_score, yesno(hs->banned)); + ctx->name, ctx->ban_score, + yesno(ctx->banned)); if (!ctx->file_priv) return; - if (hs->banned) { + if (ctx->banned) { ctx->file_priv->context_bans++; DRM_DEBUG_DRIVER("client %s has has %d context banned\n", @@ -2664,9 +2661,7 @@ static void i915_gem_context_mark_guilty(struct i915_gem_context *ctx) static void i915_gem_context_mark_innocent(struct i915_gem_context *ctx) { - struct i915_ctx_hang_stats *hs = &ctx->hang_stats; - - hs->batch_pending++; + ctx->active_count++; } struct drm_i915_gem_request * diff --git a/drivers/gpu/drm/i915/i915_gem_context.c b/drivers/gpu/drm/i915/i915_gem_context.c index 88d59866c433..f82936a2fcce 100644 --- a/drivers/gpu/drm/i915/i915_gem_context.c +++ b/drivers/gpu/drm/i915/i915_gem_context.c @@ -331,7 +331,7 @@ __create_hw_context(struct drm_device *dev, * is no remap info, it will be a NOP. */ ctx->remap_slice = ALL_L3_SLICES(dev_priv); - ctx->hang_stats.bannable = true; + ctx->bannable = true; ctx->ring_size = 4 * PAGE_SIZE; ctx->desc_template = GEN8_CTX_ADDRESSING_MODE(dev_priv) << GEN8_CTX_ADDRESSING_MODE_SHIFT; @@ -1115,7 +1115,7 @@ int i915_gem_context_getparam_ioctl(struct drm_device *dev, void *data, args->value = !!(ctx->flags & CONTEXT_NO_ERROR_CAPTURE); break; case I915_CONTEXT_PARAM_BANNABLE: - args->value = ctx->hang_stats.bannable; + args->value = ctx->bannable; break; default: ret = -EINVAL; @@ -1172,7 +1172,7 @@ int i915_gem_context_setparam_ioctl(struct drm_device *dev, void *data, else if (!capable(CAP_SYS_ADMIN) && !args->value) ret = -EPERM; else - ctx->hang_stats.bannable = args->value; + ctx->bannable = args->value; break; default: ret = -EINVAL; @@ -1188,7 +1188,6 @@ int i915_gem_context_reset_stats_ioctl(struct drm_device *dev, { struct drm_i915_private *dev_priv = to_i915(dev); struct drm_i915_reset_stats *args = data; - struct i915_ctx_hang_stats *hs; struct i915_gem_context *ctx; int ret; @@ -1207,15 +1206,14 @@ int i915_gem_context_reset_stats_ioctl(struct drm_device *dev, mutex_unlock(&dev->struct_mutex); return PTR_ERR(ctx); } - hs = &ctx->hang_stats; if (capable(CAP_SYS_ADMIN)) args->reset_count = i915_reset_count(&dev_priv->gpu_error); else args->reset_count = 0; - args->batch_active = hs->batch_active; - args->batch_pending = hs->batch_pending; + args->batch_active = ctx->guilty_count; + args->batch_pending = ctx->active_count; mutex_unlock(&dev->struct_mutex); diff --git a/drivers/gpu/drm/i915/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/i915_gem_execbuffer.c index 097d9d8c2315..522ecfb4dc9d 100644 --- a/drivers/gpu/drm/i915/i915_gem_execbuffer.c +++ b/drivers/gpu/drm/i915/i915_gem_execbuffer.c @@ -1232,14 +1232,12 @@ i915_gem_validate_context(struct drm_device *dev, struct drm_file *file, struct intel_engine_cs *engine, const u32 ctx_id) { struct i915_gem_context *ctx; - struct i915_ctx_hang_stats *hs; ctx = i915_gem_context_lookup(file->driver_priv, ctx_id); if (IS_ERR(ctx)) return ctx; - hs = &ctx->hang_stats; - if (hs->banned) { + if (ctx->banned) { DRM_DEBUG("Context %u tried to submit while banned\n", ctx_id); return ERR_PTR(-EIO); } diff --git a/drivers/gpu/drm/i915/i915_gem_request.c b/drivers/gpu/drm/i915/i915_gem_request.c index 60e63956ea19..9f37eaa3723a 100644 --- a/drivers/gpu/drm/i915/i915_gem_request.c +++ b/drivers/gpu/drm/i915/i915_gem_request.c @@ -264,8 +264,8 @@ static void i915_gem_request_retire(struct drm_i915_gem_request *request) } /* Retirement decays the ban score as it is a sign of ctx progress */ - if (request->ctx->hang_stats.ban_score > 0) - request->ctx->hang_stats.ban_score--; + if (request->ctx->ban_score > 0) + request->ctx->ban_score--; i915_gem_context_put(request->ctx); diff --git a/drivers/gpu/drm/i915/i915_gpu_error.c b/drivers/gpu/drm/i915/i915_gpu_error.c index af4f0ef4fa08..82458ea60150 100644 --- a/drivers/gpu/drm/i915/i915_gpu_error.c +++ b/drivers/gpu/drm/i915/i915_gpu_error.c @@ -1172,7 +1172,7 @@ static void record_request(struct drm_i915_gem_request *request, struct drm_i915_error_request *erq) { erq->context = request->ctx->hw_id; - erq->ban_score = request->ctx->hang_stats.ban_score; + erq->ban_score = request->ctx->ban_score; erq->seqno = request->global_seqno; erq->jiffies = request->emitted_jiffies; erq->head = request->head; -- cgit From 4c266edb4c98e929ea8871525fa2c7bd2483e05f Mon Sep 17 00:00:00 2001 From: Joonas Lahtinen Date: Thu, 24 Nov 2016 14:47:49 +0000 Subject: drm/i915: Rename i915_gem_timeline.next_seqno to .seqno Rename i915_gem_timeline member 'next_seqno' into 'seqno' as the variable is pre-increment. We've already had two bugs due to the confusing name, second is fixed as follow-up patch. Cc: Chris Wilson Signed-off-by: Joonas Lahtinen Reviewed-by: Chris Wilson Signed-off-by: Chris Wilson Link: http://patchwork.freedesktop.org/patch/msgid/20161124144750.2610-1-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/i915_debugfs.c | 4 ++-- drivers/gpu/drm/i915/i915_gem_request.c | 14 +++++++------- drivers/gpu/drm/i915/i915_gem_timeline.h | 2 +- 3 files changed, 10 insertions(+), 10 deletions(-) (limited to 'drivers/gpu/drm/i915/i915_gem_request.c') diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c index 0c6323e0f9f9..66067c439935 100644 --- a/drivers/gpu/drm/i915/i915_debugfs.c +++ b/drivers/gpu/drm/i915/i915_debugfs.c @@ -552,7 +552,7 @@ static int i915_gem_pageflip_info(struct seq_file *m, void *data) seq_printf(m, "Flip queued on %s at seqno %x, next seqno %x [current breadcrumb %x], completed? %d\n", engine->name, work->flip_queued_req->global_seqno, - atomic_read(&dev_priv->gt.global_timeline.next_seqno), + atomic_read(&dev_priv->gt.global_timeline.seqno), intel_engine_get_seqno(engine), i915_gem_request_completed(work->flip_queued_req)); } else @@ -1026,7 +1026,7 @@ i915_next_seqno_get(void *data, u64 *val) { struct drm_i915_private *dev_priv = data; - *val = 1 + atomic_read(&dev_priv->gt.global_timeline.next_seqno); + *val = 1 + atomic_read(&dev_priv->gt.global_timeline.seqno); return 0; } diff --git a/drivers/gpu/drm/i915/i915_gem_request.c b/drivers/gpu/drm/i915/i915_gem_request.c index 9f37eaa3723a..82904595eaae 100644 --- a/drivers/gpu/drm/i915/i915_gem_request.c +++ b/drivers/gpu/drm/i915/i915_gem_request.c @@ -330,11 +330,11 @@ static int i915_gem_init_global_seqno(struct drm_i915_private *i915, u32 seqno) GEM_BUG_ON(i915->gt.active_requests > 1); /* If the seqno wraps around, we need to clear the breadcrumb rbtree */ - if (!i915_seqno_passed(seqno, atomic_read(&timeline->next_seqno))) { + if (!i915_seqno_passed(seqno, atomic_read(&timeline->seqno))) { while (intel_breadcrumbs_busy(i915)) cond_resched(); /* spin until threads are complete */ } - atomic_set(&timeline->next_seqno, seqno); + atomic_set(&timeline->seqno, seqno); /* Finally reset hw state */ for_each_engine(engine, i915, id) @@ -369,11 +369,11 @@ int i915_gem_set_global_seqno(struct drm_device *dev, u32 seqno) static int reserve_global_seqno(struct drm_i915_private *i915) { u32 active_requests = ++i915->gt.active_requests; - u32 next_seqno = atomic_read(&i915->gt.global_timeline.next_seqno); + u32 seqno = atomic_read(&i915->gt.global_timeline.seqno); int ret; /* Reservation is fine until we need to wrap around */ - if (likely(next_seqno + active_requests > next_seqno)) + if (likely(seqno + active_requests > seqno)) return 0; ret = i915_gem_init_global_seqno(i915, 0); @@ -387,13 +387,13 @@ static int reserve_global_seqno(struct drm_i915_private *i915) static u32 __timeline_get_seqno(struct i915_gem_timeline *tl) { - /* next_seqno only incremented under a mutex */ - return ++tl->next_seqno.counter; + /* seqno only incremented under a mutex */ + return ++tl->seqno.counter; } static u32 timeline_get_seqno(struct i915_gem_timeline *tl) { - return atomic_inc_return(&tl->next_seqno); + return atomic_inc_return(&tl->seqno); } void __i915_gem_request_submit(struct drm_i915_gem_request *request) diff --git a/drivers/gpu/drm/i915/i915_gem_timeline.h b/drivers/gpu/drm/i915/i915_gem_timeline.h index 98d99a62b4ae..f2e51f42cc2f 100644 --- a/drivers/gpu/drm/i915/i915_gem_timeline.h +++ b/drivers/gpu/drm/i915/i915_gem_timeline.h @@ -56,7 +56,7 @@ struct intel_timeline { struct i915_gem_timeline { struct list_head link; - atomic_t next_seqno; + atomic_t seqno; struct drm_i915_private *i915; const char *name; -- cgit From 4ffd6e0cfe1816fced8ab8d2612e1edde81aac2a Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Fri, 25 Nov 2016 13:17:15 +0000 Subject: drm/i915: Add is-completed assert to request retire entrypoint While we will check that the request is completed prior to being retired, by placing an assert that the request is complete at the entrypoint of the function we can more clearly document the function's preconditions. Signed-off-by: Chris Wilson Reviewed-by: Joonas Lahtinen Link: http://patchwork.freedesktop.org/patch/msgid/20161125131718.20978-1-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/i915_gem_request.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'drivers/gpu/drm/i915/i915_gem_request.c') diff --git a/drivers/gpu/drm/i915/i915_gem_request.c b/drivers/gpu/drm/i915/i915_gem_request.c index 82904595eaae..bd7b21f70283 100644 --- a/drivers/gpu/drm/i915/i915_gem_request.c +++ b/drivers/gpu/drm/i915/i915_gem_request.c @@ -281,6 +281,8 @@ void i915_gem_request_retire_upto(struct drm_i915_gem_request *req) struct drm_i915_gem_request *tmp; lockdep_assert_held(&req->i915->drm.struct_mutex); + GEM_BUG_ON(!i915_gem_request_completed(req)); + if (list_empty(&req->link)) return; -- cgit From 1618bdb89b5d8b47edf42d9c6ea96ecf001ad625 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Fri, 25 Nov 2016 13:17:16 +0000 Subject: drm/i915: Assert no external observers when unwind a failed request alloc Before we return the request back to the kmem_cache after a failed i915_gem_request_alloc(), we should assert that it has not been added to any global state tracking. Signed-off-by: Chris Wilson Reviewed-by: Joonas Lahtinen Link: http://patchwork.freedesktop.org/patch/msgid/20161125131718.20978-2-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/i915_gem_request.c | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'drivers/gpu/drm/i915/i915_gem_request.c') diff --git a/drivers/gpu/drm/i915/i915_gem_request.c b/drivers/gpu/drm/i915/i915_gem_request.c index bd7b21f70283..ed6cead22a86 100644 --- a/drivers/gpu/drm/i915/i915_gem_request.c +++ b/drivers/gpu/drm/i915/i915_gem_request.c @@ -599,6 +599,11 @@ i915_gem_request_alloc(struct intel_engine_cs *engine, return req; err_ctx: + /* Make sure we didn't add ourselves to external state before freeing */ + GEM_BUG_ON(!list_empty(&req->active_list)); + GEM_BUG_ON(!list_empty(&req->priotree.signalers_list)); + GEM_BUG_ON(!list_empty(&req->priotree.waiters_list)); + i915_gem_context_put(ctx); kmem_cache_free(dev_priv->requests, req); err_unreserve: -- cgit From 48bc2a4a427ad81578f887d71d45794619a77211 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Fri, 25 Nov 2016 13:17:17 +0000 Subject: drm/i915: Hold a reference on the request for its fence chain Currently, we have an active reference for the request until it is retired. Though it cannot be retired before it has been executed by hardware, the request may be completed before we have finished processing the execute fence, i.e. we may continue to process that fence as we free the request. Fixes: 5590af3e115a ("drm/i915: Drive request submission through fence callbacks") Fixes: 23902e49c999 ("drm/i915: Split request submit/execute phase into two") Signed-off-by: Chris Wilson Reviewed-by: Joonas Lahtinen Link: http://patchwork.freedesktop.org/patch/msgid/20161125131718.20978-3-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/i915_gem_request.c | 34 ++++++++++++++++++++++++++------- drivers/gpu/drm/i915/i915_sw_fence.h | 5 +++++ 2 files changed, 32 insertions(+), 7 deletions(-) (limited to 'drivers/gpu/drm/i915/i915_gem_request.c') diff --git a/drivers/gpu/drm/i915/i915_gem_request.c b/drivers/gpu/drm/i915/i915_gem_request.c index ed6cead22a86..94d71b639f12 100644 --- a/drivers/gpu/drm/i915/i915_gem_request.c +++ b/drivers/gpu/drm/i915/i915_gem_request.c @@ -200,8 +200,8 @@ static void i915_gem_request_retire(struct drm_i915_gem_request *request) struct i915_gem_active *active, *next; lockdep_assert_held(&request->i915->drm.struct_mutex); - GEM_BUG_ON(!i915_sw_fence_done(&request->submit)); - GEM_BUG_ON(!i915_sw_fence_done(&request->execute)); + GEM_BUG_ON(!i915_sw_fence_signaled(&request->submit)); + GEM_BUG_ON(!i915_sw_fence_signaled(&request->execute)); GEM_BUG_ON(!i915_gem_request_completed(request)); GEM_BUG_ON(!request->i915->gt.active_requests); @@ -451,11 +451,17 @@ void i915_gem_request_submit(struct drm_i915_gem_request *request) static int __i915_sw_fence_call submit_notify(struct i915_sw_fence *fence, enum i915_sw_fence_notify state) { - if (state == FENCE_COMPLETE) { - struct drm_i915_gem_request *request = - container_of(fence, typeof(*request), submit); + struct drm_i915_gem_request *request = + container_of(fence, typeof(*request), submit); + switch (state) { + case FENCE_COMPLETE: request->engine->submit_request(request); + break; + + case FENCE_FREE: + i915_gem_request_put(request); + break; } return NOTIFY_DONE; @@ -464,6 +470,18 @@ submit_notify(struct i915_sw_fence *fence, enum i915_sw_fence_notify state) static int __i915_sw_fence_call execute_notify(struct i915_sw_fence *fence, enum i915_sw_fence_notify state) { + struct drm_i915_gem_request *request = + container_of(fence, typeof(*request), execute); + + switch (state) { + case FENCE_COMPLETE: + break; + + case FENCE_FREE: + i915_gem_request_put(request); + break; + } + return NOTIFY_DONE; } @@ -551,8 +569,10 @@ i915_gem_request_alloc(struct intel_engine_cs *engine, req->timeline->fence_context, __timeline_get_seqno(req->timeline->common)); - i915_sw_fence_init(&req->submit, submit_notify); - i915_sw_fence_init(&req->execute, execute_notify); + /* We bump the ref for the fence chain */ + i915_sw_fence_init(&i915_gem_request_get(req)->submit, submit_notify); + i915_sw_fence_init(&i915_gem_request_get(req)->execute, execute_notify); + /* Ensure that the execute fence completes after the submit fence - * as we complete the execute fence from within the submit fence * callback, its completion would otherwise be visible first. diff --git a/drivers/gpu/drm/i915/i915_sw_fence.h b/drivers/gpu/drm/i915/i915_sw_fence.h index 7508d23f823b..0f3185ef7f4e 100644 --- a/drivers/gpu/drm/i915/i915_sw_fence.h +++ b/drivers/gpu/drm/i915/i915_sw_fence.h @@ -75,6 +75,11 @@ int i915_sw_fence_await_reservation(struct i915_sw_fence *fence, unsigned long timeout, gfp_t gfp); +static inline bool i915_sw_fence_signaled(const struct i915_sw_fence *fence) +{ + return atomic_read(&fence->pending) <= 0; +} + static inline bool i915_sw_fence_done(const struct i915_sw_fence *fence) { return atomic_read(&fence->pending) < 0; -- cgit From fc1584059d6c438b1febafa1c207ae1d3c6643e8 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Fri, 25 Nov 2016 13:17:18 +0000 Subject: drm/i915: Integrate i915_sw_fence with debugobjects Add the tracking required to enable debugobjects for fences to improve error detection in BAT. The debugobject interface lets us track the lifetime and phases of the fences even while being embedded into larger structs, i.e. to check they are not used after they have been released. v2: Don't populate the stubs, debugobjects checks for a NULL pointer and treats it equivalently. Signed-off-by: Chris Wilson Reviewed-by: Joonas Lahtinen Link: http://patchwork.freedesktop.org/patch/msgid/20161125131718.20978-4-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/Kconfig.debug | 13 +++ drivers/gpu/drm/i915/i915_gem_request.c | 9 ++ drivers/gpu/drm/i915/i915_sw_fence.c | 140 ++++++++++++++++++++++++++++++-- drivers/gpu/drm/i915/i915_sw_fence.h | 6 ++ 4 files changed, 161 insertions(+), 7 deletions(-) (limited to 'drivers/gpu/drm/i915/i915_gem_request.c') diff --git a/drivers/gpu/drm/i915/Kconfig.debug b/drivers/gpu/drm/i915/Kconfig.debug index 51ba630a134b..a6c69b8cb1d2 100644 --- a/drivers/gpu/drm/i915/Kconfig.debug +++ b/drivers/gpu/drm/i915/Kconfig.debug @@ -22,6 +22,7 @@ config DRM_I915_DEBUG select X86_MSR # used by igt/pm_rpm select DRM_VGEM # used by igt/prime_vgem (dmabuf interop checks) select DRM_DEBUG_MM if DRM=y + select DRM_I915_SW_FENCE_DEBUG_OBJECTS if DRM_I915=y default n help Choose this option to turn on extra driver debugging that may affect @@ -43,3 +44,15 @@ config DRM_I915_DEBUG_GEM If in doubt, say "N". +config DRM_I915_SW_FENCE_DEBUG_OBJECTS + bool "Enable additional driver debugging for fence objects" + depends on DRM_I915=y + select DEBUG_OBJECTS + default n + help + Choose this option to turn on extra driver debugging that may affect + performance but will catch some internal issues. + + Recommended for driver developers only. + + If in doubt, say "N". diff --git a/drivers/gpu/drm/i915/i915_gem_request.c b/drivers/gpu/drm/i915/i915_gem_request.c index 94d71b639f12..fcf22b0e2967 100644 --- a/drivers/gpu/drm/i915/i915_gem_request.c +++ b/drivers/gpu/drm/i915/i915_gem_request.c @@ -62,6 +62,15 @@ static void i915_fence_release(struct dma_fence *fence) { struct drm_i915_gem_request *req = to_request(fence); + /* The request is put onto a RCU freelist (i.e. the address + * is immediately reused), mark the fences as being freed now. + * Otherwise the debugobjects for the fences are only marked as + * freed when the slab cache itself is freed, and so we would get + * caught trying to reuse dead objects. + */ + i915_sw_fence_fini(&req->submit); + i915_sw_fence_fini(&req->execute); + kmem_cache_free(req->i915->requests, req); } diff --git a/drivers/gpu/drm/i915/i915_sw_fence.c b/drivers/gpu/drm/i915/i915_sw_fence.c index 147420ccf49c..f5a88092dacf 100644 --- a/drivers/gpu/drm/i915/i915_sw_fence.c +++ b/drivers/gpu/drm/i915/i915_sw_fence.c @@ -17,6 +17,92 @@ static DEFINE_SPINLOCK(i915_sw_fence_lock); +enum { + DEBUG_FENCE_IDLE = 0, + DEBUG_FENCE_NOTIFY, +}; + +#ifdef CONFIG_DRM_I915_SW_FENCE_DEBUG_OBJECTS + +static void *i915_sw_fence_debug_hint(void *addr) +{ + return (void *)(((struct i915_sw_fence *)addr)->flags & I915_SW_FENCE_MASK); +} + +static struct debug_obj_descr i915_sw_fence_debug_descr = { + .name = "i915_sw_fence", + .debug_hint = i915_sw_fence_debug_hint, +}; + +static inline void debug_fence_init(struct i915_sw_fence *fence) +{ + debug_object_init(fence, &i915_sw_fence_debug_descr); +} + +static inline void debug_fence_activate(struct i915_sw_fence *fence) +{ + debug_object_activate(fence, &i915_sw_fence_debug_descr); +} + +static inline void debug_fence_set_state(struct i915_sw_fence *fence, + int old, int new) +{ + debug_object_active_state(fence, &i915_sw_fence_debug_descr, old, new); +} + +static inline void debug_fence_deactivate(struct i915_sw_fence *fence) +{ + debug_object_deactivate(fence, &i915_sw_fence_debug_descr); +} + +static inline void debug_fence_destroy(struct i915_sw_fence *fence) +{ + debug_object_destroy(fence, &i915_sw_fence_debug_descr); +} + +static inline void debug_fence_free(struct i915_sw_fence *fence) +{ + debug_object_free(fence, &i915_sw_fence_debug_descr); +} + +static inline void debug_fence_assert(struct i915_sw_fence *fence) +{ + debug_object_assert_init(fence, &i915_sw_fence_debug_descr); +} + +#else + +static inline void debug_fence_init(struct i915_sw_fence *fence) +{ +} + +static inline void debug_fence_activate(struct i915_sw_fence *fence) +{ +} + +static inline void debug_fence_set_state(struct i915_sw_fence *fence, + int old, int new) +{ +} + +static inline void debug_fence_deactivate(struct i915_sw_fence *fence) +{ +} + +static inline void debug_fence_destroy(struct i915_sw_fence *fence) +{ +} + +static inline void debug_fence_free(struct i915_sw_fence *fence) +{ +} + +static inline void debug_fence_assert(struct i915_sw_fence *fence) +{ +} + +#endif + static int __i915_sw_fence_notify(struct i915_sw_fence *fence, enum i915_sw_fence_notify state) { @@ -26,25 +112,37 @@ static int __i915_sw_fence_notify(struct i915_sw_fence *fence, return fn(fence, state); } -static void i915_sw_fence_free(struct kref *kref) +#ifdef CONFIG_DRM_I915_SW_FENCE_DEBUG_OBJECTS +void i915_sw_fence_fini(struct i915_sw_fence *fence) +{ + debug_fence_free(fence); +} +#endif + +static void i915_sw_fence_release(struct kref *kref) { struct i915_sw_fence *fence = container_of(kref, typeof(*fence), kref); WARN_ON(atomic_read(&fence->pending) > 0); + debug_fence_destroy(fence); - if (fence->flags & I915_SW_FENCE_MASK) + if (fence->flags & I915_SW_FENCE_MASK) { __i915_sw_fence_notify(fence, FENCE_FREE); - else + } else { + i915_sw_fence_fini(fence); kfree(fence); + } } static void i915_sw_fence_put(struct i915_sw_fence *fence) { - kref_put(&fence->kref, i915_sw_fence_free); + debug_fence_assert(fence); + kref_put(&fence->kref, i915_sw_fence_release); } static struct i915_sw_fence *i915_sw_fence_get(struct i915_sw_fence *fence) { + debug_fence_assert(fence); kref_get(&fence->kref); return fence; } @@ -56,6 +154,7 @@ static void __i915_sw_fence_wake_up_all(struct i915_sw_fence *fence, wait_queue_t *pos, *next; unsigned long flags; + debug_fence_deactivate(fence); atomic_set_release(&fence->pending, -1); /* 0 -> -1 [done] */ /* @@ -88,23 +187,33 @@ static void __i915_sw_fence_wake_up_all(struct i915_sw_fence *fence, } while (1); } spin_unlock_irqrestore(&x->lock, flags); + + debug_fence_assert(fence); } static void __i915_sw_fence_complete(struct i915_sw_fence *fence, struct list_head *continuation) { + debug_fence_assert(fence); + if (!atomic_dec_and_test(&fence->pending)) return; + debug_fence_set_state(fence, DEBUG_FENCE_IDLE, DEBUG_FENCE_NOTIFY); + if (fence->flags & I915_SW_FENCE_MASK && __i915_sw_fence_notify(fence, FENCE_COMPLETE) != NOTIFY_DONE) return; + debug_fence_set_state(fence, DEBUG_FENCE_NOTIFY, DEBUG_FENCE_IDLE); + __i915_sw_fence_wake_up_all(fence, continuation); } static void i915_sw_fence_complete(struct i915_sw_fence *fence) { + debug_fence_assert(fence); + if (WARN_ON(i915_sw_fence_done(fence))) return; @@ -113,6 +222,7 @@ static void i915_sw_fence_complete(struct i915_sw_fence *fence) static void i915_sw_fence_await(struct i915_sw_fence *fence) { + debug_fence_assert(fence); WARN_ON(atomic_inc_return(&fence->pending) <= 1); } @@ -123,18 +233,26 @@ void __i915_sw_fence_init(struct i915_sw_fence *fence, { BUG_ON((unsigned long)fn & ~I915_SW_FENCE_MASK); + debug_fence_init(fence); + __init_waitqueue_head(&fence->wait, name, key); kref_init(&fence->kref); atomic_set(&fence->pending, 1); fence->flags = (unsigned long)fn; } -void i915_sw_fence_commit(struct i915_sw_fence *fence) +static void __i915_sw_fence_commit(struct i915_sw_fence *fence) { i915_sw_fence_complete(fence); i915_sw_fence_put(fence); } +void i915_sw_fence_commit(struct i915_sw_fence *fence) +{ + debug_fence_activate(fence); + __i915_sw_fence_commit(fence); +} + static int i915_sw_fence_wake(wait_queue_t *wq, unsigned mode, int flags, void *key) { list_del(&wq->task_list); @@ -206,9 +324,13 @@ static int __i915_sw_fence_await_sw_fence(struct i915_sw_fence *fence, unsigned long flags; int pending; + debug_fence_assert(fence); + if (i915_sw_fence_done(signaler)) return 0; + debug_fence_assert(signaler); + /* The dependency graph must be acyclic. */ if (unlikely(i915_sw_fence_check_if_after(fence, signaler))) return -EINVAL; @@ -279,7 +401,7 @@ static void timer_i915_sw_fence_wake(unsigned long data) dma_fence_put(cb->dma); cb->dma = NULL; - i915_sw_fence_commit(cb->fence); + __i915_sw_fence_commit(cb->fence); cb->timer.function = NULL; } @@ -290,7 +412,7 @@ static void dma_i915_sw_fence_wake(struct dma_fence *dma, del_timer_sync(&cb->timer); if (cb->timer.function) - i915_sw_fence_commit(cb->fence); + __i915_sw_fence_commit(cb->fence); dma_fence_put(cb->dma); kfree(cb); @@ -304,6 +426,8 @@ int i915_sw_fence_await_dma_fence(struct i915_sw_fence *fence, struct i915_sw_dma_fence_cb *cb; int ret; + debug_fence_assert(fence); + if (dma_fence_is_signaled(dma)) return 0; @@ -349,6 +473,8 @@ int i915_sw_fence_await_reservation(struct i915_sw_fence *fence, struct dma_fence *excl; int ret = 0, pending; + debug_fence_assert(fence); + if (write) { struct dma_fence **shared; unsigned int count, i; diff --git a/drivers/gpu/drm/i915/i915_sw_fence.h b/drivers/gpu/drm/i915/i915_sw_fence.h index 0f3185ef7f4e..d31cefbbcc04 100644 --- a/drivers/gpu/drm/i915/i915_sw_fence.h +++ b/drivers/gpu/drm/i915/i915_sw_fence.h @@ -56,6 +56,12 @@ do { \ __i915_sw_fence_init((fence), (fn), NULL, NULL) #endif +#ifdef CONFIG_DRM_I915_SW_FENCE_DEBUG_OBJECTS +void i915_sw_fence_fini(struct i915_sw_fence *fence); +#else +static inline void i915_sw_fence_fini(struct i915_sw_fence *fence) {} +#endif + void i915_sw_fence_commit(struct i915_sw_fence *fence); int i915_sw_fence_await_sw_fence(struct i915_sw_fence *fence, -- cgit From e8a9c58fcd9a5081f71f57f370af1347ed6a310b Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Sun, 18 Dec 2016 15:37:20 +0000 Subject: drm/i915: Unify active context tracking between legacy/execlists/guc The requests conversion introduced a nasty bug where we could generate a new request in the middle of constructing a request if we needed to idle the system in order to evict space for a context. The request to idle would be executed (and waited upon) before the current one, creating a minor havoc in the seqno accounting, as we will consider the current request to already be completed (prior to deferred seqno assignment) but ring->last_retired_head would have been updated and still could allow us to overwrite the current request before execution. We also employed two different mechanisms to track the active context until it was switched out. The legacy method allowed for waiting upon an active context (it could forcibly evict any vma, including context's), but the execlists method took a step backwards by pinning the vma for the entire active lifespan of the context (the only way to evict was to idle the entire GPU, not individual contexts). However, to circumvent the tricky issue of locking (i.e. we cannot take struct_mutex at the time of i915_gem_request_submit(), where we would want to move the previous context onto the active tracker and unpin it), we take the execlists approach and keep the contexts pinned until retirement. The benefit of the execlists approach, more important for execlists than legacy, was the reduction in work in pinning the context for each request - as the context was kept pinned until idle, it could short circuit the pinning for all active contexts. We introduce new engine vfuncs to pin and unpin the context respectively. The context is pinned at the start of the request, and only unpinned when the following request is retired (this ensures that the context is idle and coherent in main memory before we unpin it). We move the engine->last_context tracking into the retirement itself (rather than during request submission) in order to allow the submission to be reordered or unwound without undue difficultly. And finally an ulterior motive for unifying context handling was to prepare for mock requests. v2: Rename to last_retired_context, split out legacy_context tracking for MI_SET_CONTEXT. Signed-off-by: Chris Wilson Reviewed-by: Tvrtko Ursulin Link: http://patchwork.freedesktop.org/patch/msgid/20161218153724.8439-3-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/i915_drv.h | 4 -- drivers/gpu/drm/i915/i915_gem.c | 2 +- drivers/gpu/drm/i915/i915_gem_context.c | 110 +++++------------------------ drivers/gpu/drm/i915/i915_gem_request.c | 38 ++++++---- drivers/gpu/drm/i915/i915_gem_request.h | 11 --- drivers/gpu/drm/i915/i915_guc_submission.c | 11 --- drivers/gpu/drm/i915/i915_perf.c | 18 ++--- drivers/gpu/drm/i915/intel_display.c | 3 +- drivers/gpu/drm/i915/intel_engine_cs.c | 21 +++++- drivers/gpu/drm/i915/intel_lrc.c | 62 ++++++---------- drivers/gpu/drm/i915/intel_lrc.h | 5 +- drivers/gpu/drm/i915/intel_pm.c | 2 +- drivers/gpu/drm/i915/intel_ringbuffer.c | 58 +++++++++------ drivers/gpu/drm/i915/intel_ringbuffer.h | 23 +++++- 14 files changed, 151 insertions(+), 217 deletions(-) (limited to 'drivers/gpu/drm/i915/i915_gem_request.c') diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index 085c8ecd09b5..c89b012af914 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -2446,7 +2446,6 @@ struct drm_i915_private { struct i915_perf_stream *exclusive_stream; u32 specific_ctx_id; - struct i915_vma *pinned_rcs_vma; struct hrtimer poll_check_timer; wait_queue_head_t poll_wq; @@ -3488,9 +3487,6 @@ int i915_gem_context_open(struct drm_device *dev, struct drm_file *file); void i915_gem_context_close(struct drm_device *dev, struct drm_file *file); int i915_switch_context(struct drm_i915_gem_request *req); int i915_gem_switch_to_kernel_context(struct drm_i915_private *dev_priv); -struct i915_vma * -i915_gem_context_pin_legacy(struct i915_gem_context *ctx, - unsigned int flags); void i915_gem_context_free(struct kref *ctx_ref); struct i915_gem_context * i915_gem_context_create_gvt(struct drm_device *dev); diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index 782be625d37d..9b308af89ada 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -4206,7 +4206,7 @@ static void assert_kernel_context_is_current(struct drm_i915_private *dev_priv) enum intel_engine_id id; for_each_engine(engine, dev_priv, id) - GEM_BUG_ON(engine->last_context != dev_priv->kernel_context); + GEM_BUG_ON(engine->last_retired_context != dev_priv->kernel_context); } int i915_gem_suspend(struct drm_i915_private *dev_priv) diff --git a/drivers/gpu/drm/i915/i915_gem_context.c b/drivers/gpu/drm/i915/i915_gem_context.c index a57c22659a3c..598a70d2b695 100644 --- a/drivers/gpu/drm/i915/i915_gem_context.c +++ b/drivers/gpu/drm/i915/i915_gem_context.c @@ -416,21 +416,6 @@ out: return ctx; } -static void i915_gem_context_unpin(struct i915_gem_context *ctx, - struct intel_engine_cs *engine) -{ - if (i915.enable_execlists) { - intel_lr_context_unpin(ctx, engine); - } else { - struct intel_context *ce = &ctx->engine[engine->id]; - - if (ce->state) - i915_vma_unpin(ce->state); - - i915_gem_context_put(ctx); - } -} - int i915_gem_context_init(struct drm_i915_private *dev_priv) { struct i915_gem_context *ctx; @@ -490,10 +475,13 @@ void i915_gem_context_lost(struct drm_i915_private *dev_priv) lockdep_assert_held(&dev_priv->drm.struct_mutex); for_each_engine(engine, dev_priv, id) { - if (engine->last_context) { - i915_gem_context_unpin(engine->last_context, engine); - engine->last_context = NULL; - } + engine->legacy_active_context = NULL; + + if (!engine->last_retired_context) + continue; + + engine->context_unpin(engine, engine->last_retired_context); + engine->last_retired_context = NULL; } /* Force the GPU state to be restored on enabling */ @@ -715,7 +703,7 @@ static inline bool skip_rcs_switch(struct i915_hw_ppgtt *ppgtt, if (ppgtt && (intel_engine_flag(engine) & ppgtt->pd_dirty_rings)) return false; - return to == engine->last_context; + return to == engine->legacy_active_context; } static bool @@ -727,11 +715,11 @@ needs_pd_load_pre(struct i915_hw_ppgtt *ppgtt, return false; /* Always load the ppgtt on first use */ - if (!engine->last_context) + if (!engine->legacy_active_context) return true; /* Same context without new entries, skip */ - if (engine->last_context == to && + if (engine->legacy_active_context == to && !(intel_engine_flag(engine) & ppgtt->pd_dirty_rings)) return false; @@ -761,57 +749,20 @@ needs_pd_load_post(struct i915_hw_ppgtt *ppgtt, return false; } -struct i915_vma * -i915_gem_context_pin_legacy(struct i915_gem_context *ctx, - unsigned int flags) -{ - struct i915_vma *vma = ctx->engine[RCS].state; - int ret; - - /* Clear this page out of any CPU caches for coherent swap-in/out. - * We only want to do this on the first bind so that we do not stall - * on an active context (which by nature is already on the GPU). - */ - if (!(vma->flags & I915_VMA_GLOBAL_BIND)) { - ret = i915_gem_object_set_to_gtt_domain(vma->obj, false); - if (ret) - return ERR_PTR(ret); - } - - ret = i915_vma_pin(vma, 0, ctx->ggtt_alignment, PIN_GLOBAL | flags); - if (ret) - return ERR_PTR(ret); - - return vma; -} - static int do_rcs_switch(struct drm_i915_gem_request *req) { struct i915_gem_context *to = req->ctx; struct intel_engine_cs *engine = req->engine; struct i915_hw_ppgtt *ppgtt = to->ppgtt ?: req->i915->mm.aliasing_ppgtt; - struct i915_vma *vma; - struct i915_gem_context *from; + struct i915_gem_context *from = engine->legacy_active_context; u32 hw_flags; int ret, i; + GEM_BUG_ON(engine->id != RCS); + if (skip_rcs_switch(ppgtt, engine, to)) return 0; - /* Trying to pin first makes error handling easier. */ - vma = i915_gem_context_pin_legacy(to, 0); - if (IS_ERR(vma)) - return PTR_ERR(vma); - - /* - * Pin can switch back to the default context if we end up calling into - * evict_everything - as a last ditch gtt defrag effort that also - * switches to the default context. Hence we need to reload from here. - * - * XXX: Doing so is painfully broken! - */ - from = engine->last_context; - if (needs_pd_load_pre(ppgtt, engine, to)) { /* Older GENs and non render rings still want the load first, * "PP_DCLV followed by PP_DIR_BASE register through Load @@ -820,7 +771,7 @@ static int do_rcs_switch(struct drm_i915_gem_request *req) trace_switch_mm(engine, to); ret = ppgtt->switch_mm(ppgtt, req); if (ret) - goto err; + return ret; } if (!to->engine[RCS].initialised || i915_gem_context_is_default(to)) @@ -837,29 +788,10 @@ static int do_rcs_switch(struct drm_i915_gem_request *req) if (to != from || (hw_flags & MI_FORCE_RESTORE)) { ret = mi_set_context(req, hw_flags); if (ret) - goto err; - } + return ret; - /* The backing object for the context is done after switching to the - * *next* context. Therefore we cannot retire the previous context until - * the next context has already started running. In fact, the below code - * is a bit suboptimal because the retiring can occur simply after the - * MI_SET_CONTEXT instead of when the next seqno has completed. - */ - if (from != NULL) { - /* As long as MI_SET_CONTEXT is serializing, ie. it flushes the - * whole damn pipeline, we don't need to explicitly mark the - * object dirty. The only exception is that the context must be - * correct in case the object gets swapped out. Ideally we'd be - * able to defer doing this until we know the object would be - * swapped, but there is no way to do that yet. - */ - i915_vma_move_to_active(from->engine[RCS].state, req, 0); - /* state is kept alive until the next request */ - i915_vma_unpin(from->engine[RCS].state); - i915_gem_context_put(from); + engine->legacy_active_context = to; } - engine->last_context = i915_gem_context_get(to); /* GEN8 does *not* require an explicit reload if the PDPs have been * setup, and we do not wish to move them. @@ -900,10 +832,6 @@ static int do_rcs_switch(struct drm_i915_gem_request *req) } return 0; - -err: - i915_vma_unpin(vma); - return ret; } /** @@ -943,12 +871,6 @@ int i915_switch_context(struct drm_i915_gem_request *req) ppgtt->pd_dirty_rings &= ~intel_engine_flag(engine); } - if (to != engine->last_context) { - if (engine->last_context) - i915_gem_context_put(engine->last_context); - engine->last_context = i915_gem_context_get(to); - } - return 0; } diff --git a/drivers/gpu/drm/i915/i915_gem_request.c b/drivers/gpu/drm/i915/i915_gem_request.c index fcf22b0e2967..475d557f2301 100644 --- a/drivers/gpu/drm/i915/i915_gem_request.c +++ b/drivers/gpu/drm/i915/i915_gem_request.c @@ -206,6 +206,7 @@ void i915_gem_retire_noop(struct i915_gem_active *active, static void i915_gem_request_retire(struct drm_i915_gem_request *request) { + struct intel_engine_cs *engine = request->engine; struct i915_gem_active *active, *next; lockdep_assert_held(&request->i915->drm.struct_mutex); @@ -216,9 +217,9 @@ static void i915_gem_request_retire(struct drm_i915_gem_request *request) trace_i915_gem_request_retire(request); - spin_lock_irq(&request->engine->timeline->lock); + spin_lock_irq(&engine->timeline->lock); list_del_init(&request->link); - spin_unlock_irq(&request->engine->timeline->lock); + spin_unlock_irq(&engine->timeline->lock); /* We know the GPU must have read the request to have * sent us the seqno + interrupt, so use the position @@ -266,17 +267,20 @@ static void i915_gem_request_retire(struct drm_i915_gem_request *request) i915_gem_request_remove_from_client(request); - if (request->previous_context) { - if (i915.enable_execlists) - intel_lr_context_unpin(request->previous_context, - request->engine); - } - /* Retirement decays the ban score as it is a sign of ctx progress */ if (request->ctx->ban_score > 0) request->ctx->ban_score--; - i915_gem_context_put(request->ctx); + /* The backing object for the context is done after switching to the + * *next* context. Therefore we cannot retire the previous context until + * the next context has already started running. However, since we + * cannot take the required locks at i915_gem_request_submit() we + * defer the unpinning of the active context to now, retirement of + * the subsequent request. + */ + if (engine->last_retired_context) + engine->context_unpin(engine, engine->last_retired_context); + engine->last_retired_context = request->ctx; dma_fence_signal(&request->fence); @@ -524,10 +528,18 @@ i915_gem_request_alloc(struct intel_engine_cs *engine, if (ret) return ERR_PTR(ret); - ret = reserve_global_seqno(dev_priv); + /* Pinning the contexts may generate requests in order to acquire + * GGTT space, so do this first before we reserve a seqno for + * ourselves. + */ + ret = engine->context_pin(engine, ctx); if (ret) return ERR_PTR(ret); + ret = reserve_global_seqno(dev_priv); + if (ret) + goto err_unpin; + /* Move the oldest request to the slab-cache (if not in use!) */ req = list_first_entry_or_null(&engine->timeline->requests, typeof(*req), link); @@ -593,11 +605,10 @@ i915_gem_request_alloc(struct intel_engine_cs *engine, INIT_LIST_HEAD(&req->active_list); req->i915 = dev_priv; req->engine = engine; - req->ctx = i915_gem_context_get(ctx); + req->ctx = ctx; /* No zalloc, must clear what we need by hand */ req->global_seqno = 0; - req->previous_context = NULL; req->file_priv = NULL; req->batch = NULL; @@ -633,10 +644,11 @@ err_ctx: GEM_BUG_ON(!list_empty(&req->priotree.signalers_list)); GEM_BUG_ON(!list_empty(&req->priotree.waiters_list)); - i915_gem_context_put(ctx); kmem_cache_free(dev_priv->requests, req); err_unreserve: dev_priv->gt.active_requests--; +err_unpin: + engine->context_unpin(engine, ctx); return ERR_PTR(ret); } diff --git a/drivers/gpu/drm/i915/i915_gem_request.h b/drivers/gpu/drm/i915/i915_gem_request.h index e2b077df2da0..8569b35a332a 100644 --- a/drivers/gpu/drm/i915/i915_gem_request.h +++ b/drivers/gpu/drm/i915/i915_gem_request.h @@ -170,17 +170,6 @@ struct drm_i915_gem_request { /** Preallocate space in the ring for the emitting the request */ u32 reserved_space; - /** - * Context related to the previous request. - * As the contexts are accessed by the hardware until the switch is - * completed to a new context, the hardware may still be writing - * to the context object after the breadcrumb is visible. We must - * not unpin/unbind/prune that object whilst still active and so - * we keep the previous context pinned until the following (this) - * request is retired. - */ - struct i915_gem_context *previous_context; - /** Batch buffer related to this request if any (used for * error state dump only). */ diff --git a/drivers/gpu/drm/i915/i915_guc_submission.c b/drivers/gpu/drm/i915/i915_guc_submission.c index a1232034f37a..3e20fe2be811 100644 --- a/drivers/gpu/drm/i915/i915_guc_submission.c +++ b/drivers/gpu/drm/i915/i915_guc_submission.c @@ -534,17 +534,6 @@ static void __i915_guc_submit(struct drm_i915_gem_request *rq) static void i915_guc_submit(struct drm_i915_gem_request *rq) { - struct intel_engine_cs *engine = rq->engine; - - /* We keep the previous context alive until we retire the following - * request. This ensures that any the context object is still pinned - * for any residual writes the HW makes into it on the context switch - * into the next object following the breadcrumb. Otherwise, we may - * retire the context too early. - */ - rq->previous_context = engine->last_context; - engine->last_context = rq->ctx; - i915_gem_request_submit(rq); __i915_guc_submit(rq); } diff --git a/drivers/gpu/drm/i915/i915_perf.c b/drivers/gpu/drm/i915/i915_perf.c index ae7bd0ed7b1a..da8537cb8136 100644 --- a/drivers/gpu/drm/i915/i915_perf.c +++ b/drivers/gpu/drm/i915/i915_perf.c @@ -743,7 +743,7 @@ static int i915_oa_read(struct i915_perf_stream *stream, static int oa_get_render_ctx_id(struct i915_perf_stream *stream) { struct drm_i915_private *dev_priv = stream->dev_priv; - struct i915_vma *vma; + struct intel_engine_cs *engine = dev_priv->engine[RCS]; int ret; ret = i915_mutex_lock_interruptible(&dev_priv->drm); @@ -755,19 +755,16 @@ static int oa_get_render_ctx_id(struct i915_perf_stream *stream) * * NB: implied RCS engine... */ - vma = i915_gem_context_pin_legacy(stream->ctx, 0); - if (IS_ERR(vma)) { - ret = PTR_ERR(vma); + ret = engine->context_pin(engine, stream->ctx); + if (ret) goto unlock; - } - - dev_priv->perf.oa.pinned_rcs_vma = vma; /* Explicitly track the ID (instead of calling i915_ggtt_offset() * on the fly) considering the difference with gen8+ and * execlists */ - dev_priv->perf.oa.specific_ctx_id = i915_ggtt_offset(vma); + dev_priv->perf.oa.specific_ctx_id = + i915_ggtt_offset(stream->ctx->engine[engine->id].state); unlock: mutex_unlock(&dev_priv->drm.struct_mutex); @@ -785,13 +782,12 @@ unlock: static void oa_put_render_ctx_id(struct i915_perf_stream *stream) { struct drm_i915_private *dev_priv = stream->dev_priv; + struct intel_engine_cs *engine = dev_priv->engine[RCS]; mutex_lock(&dev_priv->drm.struct_mutex); - i915_vma_unpin(dev_priv->perf.oa.pinned_rcs_vma); - dev_priv->perf.oa.pinned_rcs_vma = NULL; - dev_priv->perf.oa.specific_ctx_id = INVALID_CTX_ID; + engine->context_unpin(engine, stream->ctx); mutex_unlock(&dev_priv->drm.struct_mutex); } diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c index bc1af87789bc..9f356ad5329e 100644 --- a/drivers/gpu/drm/i915/intel_display.c +++ b/drivers/gpu/drm/i915/intel_display.c @@ -12295,7 +12295,8 @@ static int intel_crtc_page_flip(struct drm_crtc *crtc, INIT_WORK(&work->mmio_work, intel_mmio_flip_work_func); queue_work(system_unbound_wq, &work->mmio_work); } else { - request = i915_gem_request_alloc(engine, engine->last_context); + request = i915_gem_request_alloc(engine, + dev_priv->kernel_context); if (IS_ERR(request)) { ret = PTR_ERR(request); goto cleanup_unpin; diff --git a/drivers/gpu/drm/i915/intel_engine_cs.c b/drivers/gpu/drm/i915/intel_engine_cs.c index e8afe1185831..97bbbc3d6aa8 100644 --- a/drivers/gpu/drm/i915/intel_engine_cs.c +++ b/drivers/gpu/drm/i915/intel_engine_cs.c @@ -304,15 +304,30 @@ int intel_engine_init_common(struct intel_engine_cs *engine) { int ret; - ret = intel_engine_init_breadcrumbs(engine); + /* We may need to do things with the shrinker which + * require us to immediately switch back to the default + * context. This can cause a problem as pinning the + * default context also requires GTT space which may not + * be available. To avoid this we always pin the default + * context. + */ + ret = engine->context_pin(engine, engine->i915->kernel_context); if (ret) return ret; + ret = intel_engine_init_breadcrumbs(engine); + if (ret) + goto err_unpin; + ret = i915_gem_render_state_init(engine); if (ret) - return ret; + goto err_unpin; return 0; + +err_unpin: + engine->context_unpin(engine, engine->i915->kernel_context); + return ret; } /** @@ -330,6 +345,8 @@ void intel_engine_cleanup_common(struct intel_engine_cs *engine) intel_engine_fini_breadcrumbs(engine); intel_engine_cleanup_cmd_parser(engine); i915_gem_batch_pool_fini(&engine->batch_pool); + + engine->context_unpin(engine, engine->i915->kernel_context); } u64 intel_engine_get_active_head(struct intel_engine_cs *engine) diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c index b848b5f205ce..599afedc4494 100644 --- a/drivers/gpu/drm/i915/intel_lrc.c +++ b/drivers/gpu/drm/i915/intel_lrc.c @@ -512,15 +512,6 @@ static void execlists_dequeue(struct intel_engine_cs *engine) RB_CLEAR_NODE(&cursor->priotree.node); cursor->priotree.priority = INT_MAX; - /* We keep the previous context alive until we retire the - * following request. This ensures that any the context object - * is still pinned for any residual writes the HW makes into it - * on the context switch into the next object following the - * breadcrumb. Otherwise, we may retire the context too early. - */ - cursor->previous_context = engine->last_context; - engine->last_context = cursor->ctx; - __i915_gem_request_submit(cursor); last = cursor; submit = true; @@ -772,8 +763,8 @@ static void execlists_schedule(struct drm_i915_gem_request *request, int prio) /* XXX Do we need to preempt to make room for us and our deps? */ } -static int intel_lr_context_pin(struct i915_gem_context *ctx, - struct intel_engine_cs *engine) +static int execlists_context_pin(struct intel_engine_cs *engine, + struct i915_gem_context *ctx) { struct intel_context *ce = &ctx->engine[engine->id]; void *vaddr; @@ -784,6 +775,12 @@ static int intel_lr_context_pin(struct i915_gem_context *ctx, if (ce->pin_count++) return 0; + if (!ce->state) { + ret = execlists_context_deferred_alloc(ctx, engine); + if (ret) + goto err; + } + ret = i915_vma_pin(ce->state, 0, GEN8_LR_CONTEXT_ALIGN, PIN_OFFSET_BIAS | GUC_WOPCM_TOP | PIN_GLOBAL); if (ret) @@ -825,8 +822,8 @@ err: return ret; } -void intel_lr_context_unpin(struct i915_gem_context *ctx, - struct intel_engine_cs *engine) +static void execlists_context_unpin(struct intel_engine_cs *engine, + struct i915_gem_context *ctx) { struct intel_context *ce = &ctx->engine[engine->id]; @@ -850,24 +847,17 @@ int intel_logical_ring_alloc_request_extras(struct drm_i915_gem_request *request struct intel_context *ce = &request->ctx->engine[engine->id]; int ret; + GEM_BUG_ON(!ce->pin_count); + /* Flush enough space to reduce the likelihood of waiting after * we start building the request - in which case we will just * have to repeat work. */ request->reserved_space += EXECLISTS_REQUEST_SIZE; - if (!ce->state) { - ret = execlists_context_deferred_alloc(request->ctx, engine); - if (ret) - return ret; - } - + GEM_BUG_ON(!ce->ring); request->ring = ce->ring; - ret = intel_lr_context_pin(request->ctx, engine); - if (ret) - return ret; - if (i915.enable_guc_submission) { /* * Check that the GuC has space for the request before @@ -876,7 +866,7 @@ int intel_logical_ring_alloc_request_extras(struct drm_i915_gem_request *request */ ret = i915_guc_wq_reserve(request); if (ret) - goto err_unpin; + goto err; } ret = intel_ring_begin(request, 0); @@ -904,8 +894,7 @@ int intel_logical_ring_alloc_request_extras(struct drm_i915_gem_request *request err_unreserve: if (i915.enable_guc_submission) i915_guc_wq_unreserve(request); -err_unpin: - intel_lr_context_unpin(request->ctx, engine); +err: return ret; } @@ -1789,13 +1778,12 @@ void intel_logical_ring_cleanup(struct intel_engine_cs *engine) if (engine->cleanup) engine->cleanup(engine); - intel_engine_cleanup_common(engine); - if (engine->status_page.vma) { i915_gem_object_unpin_map(engine->status_page.vma->obj); engine->status_page.vma = NULL; } - intel_lr_context_unpin(dev_priv->kernel_context, engine); + + intel_engine_cleanup_common(engine); lrc_destroy_wa_ctx_obj(engine); engine->i915 = NULL; @@ -1820,6 +1808,10 @@ logical_ring_default_vfuncs(struct intel_engine_cs *engine) /* Default vfuncs which can be overriden by each engine. */ engine->init_hw = gen8_init_common_ring; engine->reset_hw = reset_common_ring; + + engine->context_pin = execlists_context_pin; + engine->context_unpin = execlists_context_unpin; + engine->emit_flush = gen8_emit_flush; engine->emit_breadcrumb = gen8_emit_breadcrumb; engine->emit_breadcrumb_sz = gen8_emit_breadcrumb_sz; @@ -1902,18 +1894,6 @@ logical_ring_init(struct intel_engine_cs *engine) if (ret) goto error; - ret = execlists_context_deferred_alloc(dctx, engine); - if (ret) - goto error; - - /* As this is the default context, always pin it */ - ret = intel_lr_context_pin(dctx, engine); - if (ret) { - DRM_ERROR("Failed to pin context for %s: %d\n", - engine->name, ret); - goto error; - } - /* And setup the hardware status page. */ ret = lrc_setup_hws(engine, dctx->engine[engine->id].state); if (ret) { diff --git a/drivers/gpu/drm/i915/intel_lrc.h b/drivers/gpu/drm/i915/intel_lrc.h index 7c6403243394..b5630331086a 100644 --- a/drivers/gpu/drm/i915/intel_lrc.h +++ b/drivers/gpu/drm/i915/intel_lrc.h @@ -79,13 +79,10 @@ int intel_engines_init(struct drm_i915_private *dev_priv); #define LRC_PPHWSP_PN (LRC_GUCSHR_PN + 1) #define LRC_STATE_PN (LRC_PPHWSP_PN + 1) +struct drm_i915_private; struct i915_gem_context; uint32_t intel_lr_context_size(struct intel_engine_cs *engine); -void intel_lr_context_unpin(struct i915_gem_context *ctx, - struct intel_engine_cs *engine); - -struct drm_i915_private; void intel_lr_context_resume(struct drm_i915_private *dev_priv); uint64_t intel_lr_context_descriptor(struct i915_gem_context *ctx, diff --git a/drivers/gpu/drm/i915/intel_pm.c b/drivers/gpu/drm/i915/intel_pm.c index 06e55967f180..1d4699d8fb10 100644 --- a/drivers/gpu/drm/i915/intel_pm.c +++ b/drivers/gpu/drm/i915/intel_pm.c @@ -6792,7 +6792,7 @@ static void __intel_autoenable_gt_powersave(struct work_struct *work) goto out; rcs = dev_priv->engine[RCS]; - if (rcs->last_context) + if (rcs->last_retired_context) goto out; if (!rcs->init_context) diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c b/drivers/gpu/drm/i915/intel_ringbuffer.c index 0b7d13b6e228..00ff572541b6 100644 --- a/drivers/gpu/drm/i915/intel_ringbuffer.c +++ b/drivers/gpu/drm/i915/intel_ringbuffer.c @@ -1939,8 +1939,26 @@ intel_ring_free(struct intel_ring *ring) kfree(ring); } -static int intel_ring_context_pin(struct i915_gem_context *ctx, - struct intel_engine_cs *engine) +static int context_pin(struct i915_gem_context *ctx, unsigned int flags) +{ + struct i915_vma *vma = ctx->engine[RCS].state; + int ret; + + /* Clear this page out of any CPU caches for coherent swap-in/out. + * We only want to do this on the first bind so that we do not stall + * on an active context (which by nature is already on the GPU). + */ + if (!(vma->flags & I915_VMA_GLOBAL_BIND)) { + ret = i915_gem_object_set_to_gtt_domain(vma->obj, false); + if (ret) + return ret; + } + + return i915_vma_pin(vma, 0, ctx->ggtt_alignment, PIN_GLOBAL | flags); +} + +static int intel_ring_context_pin(struct intel_engine_cs *engine, + struct i915_gem_context *ctx) { struct intel_context *ce = &ctx->engine[engine->id]; int ret; @@ -1951,13 +1969,15 @@ static int intel_ring_context_pin(struct i915_gem_context *ctx, return 0; if (ce->state) { - struct i915_vma *vma; + unsigned int flags; + + flags = 0; + if (ctx == ctx->i915->kernel_context) + flags = PIN_HIGH; - vma = i915_gem_context_pin_legacy(ctx, PIN_HIGH); - if (IS_ERR(vma)) { - ret = PTR_ERR(vma); + ret = context_pin(ctx, flags); + if (ret) goto error; - } } /* The kernel context is only used as a placeholder for flushing the @@ -1978,12 +1998,13 @@ error: return ret; } -static void intel_ring_context_unpin(struct i915_gem_context *ctx, - struct intel_engine_cs *engine) +static void intel_ring_context_unpin(struct intel_engine_cs *engine, + struct i915_gem_context *ctx) { struct intel_context *ce = &ctx->engine[engine->id]; lockdep_assert_held(&ctx->i915->drm.struct_mutex); + GEM_BUG_ON(ce->pin_count == 0); if (--ce->pin_count) return; @@ -2008,17 +2029,6 @@ static int intel_init_ring_buffer(struct intel_engine_cs *engine) if (ret) goto error; - /* We may need to do things with the shrinker which - * require us to immediately switch back to the default - * context. This can cause a problem as pinning the - * default context also requires GTT space which may not - * be available. To avoid this we always pin the default - * context. - */ - ret = intel_ring_context_pin(dev_priv->kernel_context, engine); - if (ret) - goto error; - ring = intel_engine_create_ring(engine, 32 * PAGE_SIZE); if (IS_ERR(ring)) { ret = PTR_ERR(ring); @@ -2077,8 +2087,6 @@ void intel_engine_cleanup(struct intel_engine_cs *engine) intel_engine_cleanup_common(engine); - intel_ring_context_unpin(dev_priv->kernel_context, engine); - engine->i915 = NULL; dev_priv->engine[engine->id] = NULL; kfree(engine); @@ -2099,12 +2107,15 @@ int intel_ring_alloc_request_extras(struct drm_i915_gem_request *request) { int ret; + GEM_BUG_ON(!request->ctx->engine[request->engine->id].pin_count); + /* Flush enough space to reduce the likelihood of waiting after * we start building the request - in which case we will just * have to repeat work. */ request->reserved_space += LEGACY_REQUEST_SIZE; + GEM_BUG_ON(!request->engine->buffer); request->ring = request->engine->buffer; ret = intel_ring_begin(request, 0); @@ -2584,6 +2595,9 @@ static void intel_ring_default_vfuncs(struct drm_i915_private *dev_priv, engine->init_hw = init_ring_common; engine->reset_hw = reset_ring_common; + engine->context_pin = intel_ring_context_pin; + engine->context_unpin = intel_ring_context_unpin; + engine->emit_breadcrumb = i9xx_emit_breadcrumb; engine->emit_breadcrumb_sz = i9xx_emit_breadcrumb_sz; if (i915.semaphores) { diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.h b/drivers/gpu/drm/i915/intel_ringbuffer.h index 3f43adefd1c0..4f1271821fa9 100644 --- a/drivers/gpu/drm/i915/intel_ringbuffer.h +++ b/drivers/gpu/drm/i915/intel_ringbuffer.h @@ -266,6 +266,10 @@ struct intel_engine_cs { void (*reset_hw)(struct intel_engine_cs *engine, struct drm_i915_gem_request *req); + int (*context_pin)(struct intel_engine_cs *engine, + struct i915_gem_context *ctx); + void (*context_unpin)(struct intel_engine_cs *engine, + struct i915_gem_context *ctx); int (*init_context)(struct drm_i915_gem_request *req); int (*emit_flush)(struct drm_i915_gem_request *request, @@ -379,7 +383,24 @@ struct intel_engine_cs { bool preempt_wa; u32 ctx_desc_template; - struct i915_gem_context *last_context; + /* Contexts are pinned whilst they are active on the GPU. The last + * context executed remains active whilst the GPU is idle - the + * switch away and write to the context object only occurs on the + * next execution. Contexts are only unpinned on retirement of the + * following request ensuring that we can always write to the object + * on the context switch even after idling. Across suspend, we switch + * to the kernel context and trash it as the save may not happen + * before the hardware is powered down. + */ + struct i915_gem_context *last_retired_context; + + /* We track the current MI_SET_CONTEXT in order to eliminate + * redudant context switches. This presumes that requests are not + * reordered! Or when they are the tracking is updated along with + * the emission of individual requests into the legacy command + * stream (ring). + */ + struct i915_gem_context *legacy_active_context; struct intel_engine_hangcheck hangcheck; -- cgit From f73e73999d39a274adb8b342d7d8e722ffcf92d5 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Sun, 18 Dec 2016 15:37:24 +0000 Subject: drm/i915: Swap if(enable_execlists) in i915_gem_request_alloc for a vfunc A fairly trivial move of a matching pair of routines (for preparing a request for construction) onto an engine vfunc. The ulterior motive is to be able to create a mock request implementation. Signed-off-by: Chris Wilson Reviewed-by: Tvrtko Ursulin Link: http://patchwork.freedesktop.org/patch/msgid/20161218153724.8439-7-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/i915_gem_request.c | 5 +---- drivers/gpu/drm/i915/intel_lrc.c | 4 +++- drivers/gpu/drm/i915/intel_lrc.h | 2 -- drivers/gpu/drm/i915/intel_ringbuffer.c | 4 +++- drivers/gpu/drm/i915/intel_ringbuffer.h | 3 +-- 5 files changed, 8 insertions(+), 10 deletions(-) (limited to 'drivers/gpu/drm/i915/i915_gem_request.c') diff --git a/drivers/gpu/drm/i915/i915_gem_request.c b/drivers/gpu/drm/i915/i915_gem_request.c index 475d557f2301..7427aac74923 100644 --- a/drivers/gpu/drm/i915/i915_gem_request.c +++ b/drivers/gpu/drm/i915/i915_gem_request.c @@ -622,10 +622,7 @@ i915_gem_request_alloc(struct intel_engine_cs *engine, req->reserved_space = MIN_SPACE_FOR_ADD_REQUEST; GEM_BUG_ON(req->reserved_space < engine->emit_breadcrumb_sz); - if (i915.enable_execlists) - ret = intel_logical_ring_alloc_request_extras(req); - else - ret = intel_ring_alloc_request_extras(req); + ret = engine->request_alloc(req); if (ret) goto err_ctx; diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c index 1f8f35a94157..d322d3c11201 100644 --- a/drivers/gpu/drm/i915/intel_lrc.c +++ b/drivers/gpu/drm/i915/intel_lrc.c @@ -845,7 +845,7 @@ static void execlists_context_unpin(struct intel_engine_cs *engine, i915_gem_context_put(ctx); } -int intel_logical_ring_alloc_request_extras(struct drm_i915_gem_request *request) +static int execlists_request_alloc(struct drm_i915_gem_request *request) { struct intel_engine_cs *engine = request->engine; struct intel_context *ce = &request->ctx->engine[engine->id]; @@ -1816,6 +1816,8 @@ logical_ring_default_vfuncs(struct intel_engine_cs *engine) engine->context_pin = execlists_context_pin; engine->context_unpin = execlists_context_unpin; + engine->request_alloc = execlists_request_alloc; + engine->emit_flush = gen8_emit_flush; engine->emit_breadcrumb = gen8_emit_breadcrumb; engine->emit_breadcrumb_sz = gen8_emit_breadcrumb_sz; diff --git a/drivers/gpu/drm/i915/intel_lrc.h b/drivers/gpu/drm/i915/intel_lrc.h index b5630331086a..01ba36ea125e 100644 --- a/drivers/gpu/drm/i915/intel_lrc.h +++ b/drivers/gpu/drm/i915/intel_lrc.h @@ -63,8 +63,6 @@ enum { }; /* Logical Rings */ -int intel_logical_ring_alloc_request_extras(struct drm_i915_gem_request *request); -int intel_logical_ring_reserve_space(struct drm_i915_gem_request *request); void intel_logical_ring_stop(struct intel_engine_cs *engine); void intel_logical_ring_cleanup(struct intel_engine_cs *engine); int logical_render_ring_init(struct intel_engine_cs *engine); diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c b/drivers/gpu/drm/i915/intel_ringbuffer.c index 00ff572541b6..69ccf4fd22f0 100644 --- a/drivers/gpu/drm/i915/intel_ringbuffer.c +++ b/drivers/gpu/drm/i915/intel_ringbuffer.c @@ -2103,7 +2103,7 @@ void intel_legacy_submission_resume(struct drm_i915_private *dev_priv) } } -int intel_ring_alloc_request_extras(struct drm_i915_gem_request *request) +static int ring_request_alloc(struct drm_i915_gem_request *request) { int ret; @@ -2598,6 +2598,8 @@ static void intel_ring_default_vfuncs(struct drm_i915_private *dev_priv, engine->context_pin = intel_ring_context_pin; engine->context_unpin = intel_ring_context_unpin; + engine->request_alloc = ring_request_alloc; + engine->emit_breadcrumb = i9xx_emit_breadcrumb; engine->emit_breadcrumb_sz = i9xx_emit_breadcrumb_sz; if (i915.semaphores) { diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.h b/drivers/gpu/drm/i915/intel_ringbuffer.h index 4f1271821fa9..0969de7d5ab7 100644 --- a/drivers/gpu/drm/i915/intel_ringbuffer.h +++ b/drivers/gpu/drm/i915/intel_ringbuffer.h @@ -270,6 +270,7 @@ struct intel_engine_cs { struct i915_gem_context *ctx); void (*context_unpin)(struct intel_engine_cs *engine, struct i915_gem_context *ctx); + int (*request_alloc)(struct drm_i915_gem_request *req); int (*init_context)(struct drm_i915_gem_request *req); int (*emit_flush)(struct drm_i915_gem_request *request, @@ -491,8 +492,6 @@ void intel_engine_cleanup(struct intel_engine_cs *engine); void intel_legacy_submission_resume(struct drm_i915_private *dev_priv); -int intel_ring_alloc_request_extras(struct drm_i915_gem_request *request); - int __must_check intel_ring_begin(struct drm_i915_gem_request *req, int n); int __must_check intel_ring_cacheline_align(struct drm_i915_gem_request *req); -- cgit From c781c978e784c50dcd7cb312fe17f5281923f55b Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Wed, 11 Jan 2017 14:08:58 +0000 Subject: drm/i915: Add a sanity check that no request is submitted in the middle It is an error to start a new request on the same timeline (ringbuffer) as the current one before the current is submitted. If there are two requests emitting to the ringbuffer at the same time, the operation is undefined. We can catch this by checking for the timeline having a later seqno than ours when we come to submit our request. Currently we have this check at the end of __i915_add_request, but having an early check as well isolates a failure in the caller versus a failure in sealing the request (i.e. from inside __i915_add_request itself). For example, CI is currently tripping over this late assertion on ctg/ilk: [ 100.329399] [IGT] gem_cs_tlb: starting subtest basic-default [ 100.336333] ------------[ cut here ]------------ [ 100.336341] kernel BUG at drivers/gpu/drm/i915/i915_gem_request.c:908! [ 100.336347] invalid opcode: 0000 [#1] PREEMPT SMP [ 100.336351] Modules linked in: snd_hda_intel i915 snd_hda_codec_generic snd_hda_codec snd_hwdep snd_hda_core snd_pcm coretemp mei_me lpc_ich mei e1000e ptp pps_core [last unloaded: i915] [ 100.336373] CPU: 0 PID: 6308 Comm: gem_cs_tlb Tainted: G U 4.10.0-rc3-CI-CI_DRM_2045+ #1 [ 100.336380] Hardware name: LENOVO 7465CTO/7465CTO, BIOS 6DET44WW (2.08 ) 04/22/2009 [ 100.336386] task: ffff88012b738040 task.stack: ffffc90000560000 [ 100.336441] RIP: 0010:__i915_add_request+0x4aa/0x510 [i915] [ 100.336445] RSP: 0018:ffffc90000563ac0 EFLAGS: 00010212 [ 100.336451] RAX: 0000000000005d52 RBX: ffff880133bb84c0 RCX: 0000000000000001 [ 100.336456] RDX: 0000000080000001 RSI: ffff88012b738860 RDI: 00000000ffffffff [ 100.336461] RBP: ffffc90000563b00 R08: ffff880133bb8780 R09: 0000000000000000 [ 100.336466] R10: 0000000000000000 R11: 0000000000000000 R12: ffff88012f53d950 [ 100.336472] R13: ffff88012a2b0af8 R14: ffff88012a5b0008 R15: ffff88012f53d960 [ 100.336477] FS: 00007f0d19da38c0(0000) GS:ffff88013bc00000(0000) knlGS:0000000000000000 [ 100.336483] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [ 100.336488] CR2: 00007f0d17706000 CR3: 000000012aa3e000 CR4: 00000000000406f0 [ 100.336496] Call Trace: [ 100.336527] i915_gem_switch_to_kernel_context+0x131/0x1b0 [i915] [ 100.336559] i915_gem_evict_vm+0x202/0x2b0 [i915] [ 100.336590] i915_gem_execbuffer_reserve.isra.9+0x3ae/0x440 [i915] [ 100.336623] i915_gem_do_execbuffer.isra.15+0x6d9/0x1b20 [i915] [ 100.336656] i915_gem_execbuffer2+0xc0/0x250 [i915] [ 100.336666] drm_ioctl+0x200/0x450 [ 100.336697] ? i915_gem_execbuffer+0x330/0x330 [i915] [ 100.336708] do_vfs_ioctl+0x90/0x6e0 [ 100.336716] ? up_read+0x1a/0x40 [ 100.336723] ? trace_hardirqs_on_caller+0x122/0x1b0 [ 100.336730] SyS_ioctl+0x3c/0x70 [ 100.336738] entry_SYSCALL_64_fastpath+0x1c/0xb1 [ 100.336745] RIP: 0033:0x7f0d187cb357 [ 100.336750] RSP: 002b:00007ffe0b2f7c28 EFLAGS: 00000246 ORIG_RAX: 0000000000000010 [ 100.336761] RAX: ffffffffffffffda RBX: 00007ffe0b2f7d60 RCX: 00007f0d187cb357 [ 100.336768] RDX: 00007ffe0b2f7d00 RSI: 0000000040406469 RDI: 0000000000000003 [ 100.336775] RBP: 0000000000000000 R08: 0000000000000000 R09: 0000000000000022 [ 100.336782] R10: 0000000000000007 R11: 0000000000000246 R12: 0000000000000002 [ 100.336789] R13: 0000000000419101 R14: 00007ffe0b2f7d60 R15: 00007ffe0b2f7d50 [ 100.336797] Code: 5f 74 1e e9 d4 fb ff ff e8 bc 1e 9c e0 e9 ae fb ff ff 4c 89 e7 e8 77 22 fd ff e9 88 fd ff ff 0f 0b e8 a3 1e 9c e0 e9 b1 fb ff ff <0f> 0b 0f 0b e8 fd af ab e0 85 c0 75 c2 48 c7 c2 80 2c 71 a0 be [ 100.336877] RIP: __i915_add_request+0x4aa/0x510 [i915] RSP: ffffc90000563ac0 [ 100.336886] ---[ end trace 22b36545479e5eb7 ]--- Signed-off-by: Chris Wilson Cc: Tvrtko Ursulin Cc: Joonas Lahtinen Link: http://patchwork.freedesktop.org/patch/msgid/20170111140858.1922-1-chris@chris-wilson.co.uk Reviewed-by: Joonas Lahtinen --- drivers/gpu/drm/i915/i915_gem_request.c | 7 +++++++ 1 file changed, 7 insertions(+) (limited to 'drivers/gpu/drm/i915/i915_gem_request.c') diff --git a/drivers/gpu/drm/i915/i915_gem_request.c b/drivers/gpu/drm/i915/i915_gem_request.c index 99056b948eda..e614398abe2f 100644 --- a/drivers/gpu/drm/i915/i915_gem_request.c +++ b/drivers/gpu/drm/i915/i915_gem_request.c @@ -851,6 +851,13 @@ void __i915_add_request(struct drm_i915_gem_request *request, bool flush_caches) lockdep_assert_held(&request->i915->drm.struct_mutex); trace_i915_gem_request_add(request); + /* Make sure that no request gazumped us - if it was allocated after + * our i915_gem_request_alloc() and called __i915_add_request() before + * us, the timeline will hold its seqno which is later than ours. + */ + GEM_BUG_ON(i915_seqno_passed(timeline->last_submitted_seqno, + request->fence.seqno)); + /* * To ensure that this call will not fail, space for its emissions * should already have been reserved in the ring buffer. Let the ring -- cgit From 6ffb7d0756e34427a39f6ffdf861fe93b49fc0e8 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Sat, 14 Jan 2017 16:23:33 +0000 Subject: drm/i915: Construct a request even if the GPU is currently hung As we now have the ability to directly reset the GPU from the waiter (and so do not need to drop the lock in order to let the reset proceed) and also do not lose requests over a reset, we can now simply queue the request to occur after the reset rather than roundtripping to userspace (or worse failing with EIO). Signed-off-by: Chris Wilson Link: http://patchwork.freedesktop.org/patch/msgid/20170114162334.10271-1-chris@chris-wilson.co.uk Reviewed-by: Joonas Lahtinen --- drivers/gpu/drm/i915/i915_gem_request.c | 28 +++------------------------- 1 file changed, 3 insertions(+), 25 deletions(-) (limited to 'drivers/gpu/drm/i915/i915_gem_request.c') diff --git a/drivers/gpu/drm/i915/i915_gem_request.c b/drivers/gpu/drm/i915/i915_gem_request.c index e614398abe2f..72b7f7d9461d 100644 --- a/drivers/gpu/drm/i915/i915_gem_request.c +++ b/drivers/gpu/drm/i915/i915_gem_request.c @@ -307,26 +307,6 @@ void i915_gem_request_retire_upto(struct drm_i915_gem_request *req) } while (tmp != req); } -static int i915_gem_check_wedge(struct drm_i915_private *dev_priv) -{ - struct i915_gpu_error *error = &dev_priv->gpu_error; - - if (i915_terminally_wedged(error)) - return -EIO; - - if (i915_reset_in_progress(error)) { - /* Non-interruptible callers can't handle -EAGAIN, hence return - * -EIO unconditionally for these. - */ - if (!dev_priv->mm.interruptible) - return -EIO; - - return -EAGAIN; - } - - return 0; -} - static int i915_gem_init_global_seqno(struct drm_i915_private *i915, u32 seqno) { struct i915_gem_timeline *timeline = &i915->gt.global_timeline; @@ -521,12 +501,10 @@ i915_gem_request_alloc(struct intel_engine_cs *engine, lockdep_assert_held(&dev_priv->drm.struct_mutex); /* ABI: Before userspace accesses the GPU (e.g. execbuffer), report - * EIO if the GPU is already wedged, or EAGAIN to drop the struct_mutex - * and restart. + * EIO if the GPU is already wedged. */ - ret = i915_gem_check_wedge(dev_priv); - if (ret) - return ERR_PTR(ret); + if (i915_terminally_wedged(&dev_priv->gpu_error)) + return ERR_PTR(-EIO); /* Pinning the contexts may generate requests in order to acquire * GGTT space, so do this first before we reserve a seqno for -- cgit From 44a027058b24541cc7ec499e6fb91db4b130648a Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Wed, 8 Feb 2017 18:12:38 +0000 Subject: drm/i915: Check for timeout completion when waiting for the rq to submitted We first wait for a request to be submitted to hw and assigned a seqno, before we can wait for the hw to signal completion (otherwise we don't know the hw id we need to wait upon). Whilst waiting for the request to be submitted, we may exceed the user's timeout and need to propagate the error back. v2: Make ETIME into an error from wait_for_execute for consistent exit handling. Reported-by: Tvrtko Ursulin Fixes: 4680816be336 ("drm/i915: Wait first for submission, before waiting for request completion") Testcase: igt/gem_wait/basic-await Signed-off-by: Chris Wilson Cc: Tvrtko Ursulin Cc: Joonas Lahtinen Cc: # v4.10-rc1+ Cc: stable@vger.kernel.org Link: http://patchwork.freedesktop.org/patch/msgid/20170208181238.7232-1-chris@chris-wilson.co.uk Reviewed-by: Tvrtko Ursulin (cherry picked from commit 969bb72cbfd906d347cf76dc9b8c8dbaf83ba27a) Signed-off-by: Jani Nikula --- drivers/gpu/drm/i915/i915_gem_request.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) (limited to 'drivers/gpu/drm/i915/i915_gem_request.c') diff --git a/drivers/gpu/drm/i915/i915_gem_request.c b/drivers/gpu/drm/i915/i915_gem_request.c index 72b7f7d9461d..f31deeb72703 100644 --- a/drivers/gpu/drm/i915/i915_gem_request.c +++ b/drivers/gpu/drm/i915/i915_gem_request.c @@ -1025,8 +1025,13 @@ __i915_request_wait_for_execute(struct drm_i915_gem_request *request, break; } + if (!timeout) { + timeout = -ETIME; + break; + } + timeout = io_schedule_timeout(timeout); - } while (timeout); + } while (1); finish_wait(&request->execute.wait, &wait); if (flags & I915_WAIT_LOCKED) -- cgit From e601757102cfd3eeae068f53b3bc1234f3a2b2e9 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Wed, 1 Feb 2017 16:36:40 +0100 Subject: sched/headers: Prepare for new header dependencies before moving code to We are going to split out of , which will have to be picked up from other headers and .c files. Create a trivial placeholder file that just maps to to make this patch obviously correct and bisectable. Include the new header in the files that are going to need it. Acked-by: Linus Torvalds Cc: Mike Galbraith Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: linux-kernel@vger.kernel.org Signed-off-by: Ingo Molnar --- arch/arm/mach-bcm/platsmp.c | 1 + arch/arm/mach-omap2/pm-debug.c | 1 + arch/arm/probes/kprobes/test-core.c | 1 + arch/cris/kernel/time.c | 2 +- arch/ia64/kernel/setup.c | 1 + arch/microblaze/kernel/timer.c | 1 + arch/mn10300/kernel/time.c | 1 + arch/parisc/kernel/setup.c | 1 + arch/parisc/kernel/time.c | 1 + arch/powerpc/kernel/time.c | 1 + arch/s390/kernel/time.c | 1 + arch/sparc/kernel/ds.c | 1 + arch/sparc/kernel/viohs.c | 1 + arch/tile/kernel/time.c | 1 + arch/x86/events/amd/ibs.c | 1 + arch/x86/events/core.c | 1 + arch/x86/kernel/cpu/amd.c | 1 + arch/x86/kernel/cpu/centaur.c | 1 + arch/x86/kernel/cpu/common.c | 1 + arch/x86/kernel/cpu/cyrix.c | 1 + arch/x86/kernel/cpu/intel.c | 1 + arch/x86/kernel/cpu/transmeta.c | 1 + arch/x86/kernel/kvmclock.c | 1 + arch/x86/kernel/nmi.c | 1 + arch/x86/kernel/tsc.c | 1 + block/cfq-iosched.c | 1 + drivers/acpi/apei/ghes.c | 1 + drivers/clocksource/arm_arch_timer.c | 1 + drivers/clocksource/pxa_timer.c | 1 + drivers/clocksource/timer-digicolor.c | 1 + drivers/cpuidle/cpuidle.c | 1 + drivers/firmware/tegra/bpmp.c | 1 + drivers/gpu/drm/i915/i915_gem_request.c | 2 ++ drivers/gpu/drm/i915/intel_drv.h | 1 + drivers/md/bcache/bset.c | 1 + drivers/md/bcache/btree.c | 1 + drivers/md/bcache/sysfs.c | 1 + drivers/md/bcache/util.c | 1 + drivers/md/bcache/util.h | 1 + drivers/md/bcache/writeback.c | 1 + drivers/misc/cxl/native.c | 1 + drivers/net/irda/pxaficp_ir.c | 1 + drivers/perf/arm_pmu.c | 1 + drivers/vhost/net.c | 1 + include/linux/blkdev.h | 1 + include/linux/sched/clock.h | 6 ++++++ include/linux/skbuff.h | 1 + include/net/busy_poll.h | 1 + kernel/events/core.c | 1 + kernel/locking/lockdep.c | 1 + kernel/locking/qspinlock_stat.h | 1 + kernel/printk/printk.c | 1 + kernel/sched/clock.c | 1 + kernel/sched/core.c | 1 + kernel/sched/sched.h | 1 + kernel/time/sched_clock.c | 1 + kernel/time/tick-sched.c | 1 + kernel/torture.c | 1 + kernel/trace/ring_buffer.c | 1 + kernel/trace/trace_clock.c | 1 + kernel/trace/trace_hwlat.c | 1 + kernel/trace/trace_output.c | 1 + kernel/watchdog.c | 1 + lib/plist.c | 1 + net/ipv4/tcp_cdg.c | 2 ++ 65 files changed, 72 insertions(+), 1 deletion(-) create mode 100644 include/linux/sched/clock.h (limited to 'drivers/gpu/drm/i915/i915_gem_request.c') diff --git a/arch/arm/mach-bcm/platsmp.c b/arch/arm/mach-bcm/platsmp.c index 582886d0d02f..9e3f275934eb 100644 --- a/arch/arm/mach-bcm/platsmp.c +++ b/arch/arm/mach-bcm/platsmp.c @@ -21,6 +21,7 @@ #include #include #include +#include #include #include diff --git a/arch/arm/mach-omap2/pm-debug.c b/arch/arm/mach-omap2/pm-debug.c index 003a6cb248be..5c46ea6756d7 100644 --- a/arch/arm/mach-omap2/pm-debug.c +++ b/arch/arm/mach-omap2/pm-debug.c @@ -21,6 +21,7 @@ #include #include +#include #include #include #include diff --git a/arch/arm/probes/kprobes/test-core.c b/arch/arm/probes/kprobes/test-core.c index 9775de22e2ff..c893726aa52d 100644 --- a/arch/arm/probes/kprobes/test-core.c +++ b/arch/arm/probes/kprobes/test-core.c @@ -203,6 +203,7 @@ #include #include #include +#include #include #include #include diff --git a/arch/cris/kernel/time.c b/arch/cris/kernel/time.c index 2dda6da71521..bc562cf511a6 100644 --- a/arch/cris/kernel/time.c +++ b/arch/cris/kernel/time.c @@ -29,7 +29,7 @@ #include #include #include -#include /* just for sched_clock() - funny that */ +#include #define D(x) diff --git a/arch/ia64/kernel/setup.c b/arch/ia64/kernel/setup.c index d68322966f33..32a6cc36296f 100644 --- a/arch/ia64/kernel/setup.c +++ b/arch/ia64/kernel/setup.c @@ -32,6 +32,7 @@ #include #include #include +#include #include #include #include diff --git a/arch/microblaze/kernel/timer.c b/arch/microblaze/kernel/timer.c index 1d6fad50fa76..999066192715 100644 --- a/arch/microblaze/kernel/timer.c +++ b/arch/microblaze/kernel/timer.c @@ -12,6 +12,7 @@ #include #include #include +#include #include #include #include diff --git a/arch/mn10300/kernel/time.c b/arch/mn10300/kernel/time.c index 67c6416a58f8..06b83b17c5f1 100644 --- a/arch/mn10300/kernel/time.c +++ b/arch/mn10300/kernel/time.c @@ -10,6 +10,7 @@ * 2 of the Licence, or (at your option) any later version. */ #include +#include #include #include #include diff --git a/arch/parisc/kernel/setup.c b/arch/parisc/kernel/setup.c index 068ed3607bac..dee6f9d6a153 100644 --- a/arch/parisc/kernel/setup.c +++ b/arch/parisc/kernel/setup.c @@ -37,6 +37,7 @@ #include #include #include +#include #include #include diff --git a/arch/parisc/kernel/time.c b/arch/parisc/kernel/time.c index 1e22f981cd81..89421df70160 100644 --- a/arch/parisc/kernel/time.c +++ b/arch/parisc/kernel/time.c @@ -14,6 +14,7 @@ #include #include #include +#include #include #include #include diff --git a/arch/powerpc/kernel/time.c b/arch/powerpc/kernel/time.c index bc84a8d47b9e..37833c5dc274 100644 --- a/arch/powerpc/kernel/time.c +++ b/arch/powerpc/kernel/time.c @@ -34,6 +34,7 @@ #include #include #include +#include #include #include #include diff --git a/arch/s390/kernel/time.c b/arch/s390/kernel/time.c index de66abb479c9..c31da46bc037 100644 --- a/arch/s390/kernel/time.c +++ b/arch/s390/kernel/time.c @@ -18,6 +18,7 @@ #include #include #include +#include #include #include #include diff --git a/arch/sparc/kernel/ds.c b/arch/sparc/kernel/ds.c index f87a55d77094..b542cc7c8d94 100644 --- a/arch/sparc/kernel/ds.c +++ b/arch/sparc/kernel/ds.c @@ -9,6 +9,7 @@ #include #include #include +#include #include #include #include diff --git a/arch/sparc/kernel/viohs.c b/arch/sparc/kernel/viohs.c index 526fcb5d8ce9..b30b30ab3ddd 100644 --- a/arch/sparc/kernel/viohs.c +++ b/arch/sparc/kernel/viohs.c @@ -8,6 +8,7 @@ #include #include #include +#include #include #include diff --git a/arch/tile/kernel/time.c b/arch/tile/kernel/time.c index c9357012b1c8..5bd4e88c7c60 100644 --- a/arch/tile/kernel/time.c +++ b/arch/tile/kernel/time.c @@ -20,6 +20,7 @@ #include #include #include +#include #include #include #include diff --git a/arch/x86/events/amd/ibs.c b/arch/x86/events/amd/ibs.c index 496e60391fac..786fd875de92 100644 --- a/arch/x86/events/amd/ibs.c +++ b/arch/x86/events/amd/ibs.c @@ -12,6 +12,7 @@ #include #include #include +#include #include diff --git a/arch/x86/events/core.c b/arch/x86/events/core.c index 1635c0c8df23..2ecc0e97772b 100644 --- a/arch/x86/events/core.c +++ b/arch/x86/events/core.c @@ -21,6 +21,7 @@ #include #include #include +#include #include #include #include diff --git a/arch/x86/kernel/cpu/amd.c b/arch/x86/kernel/cpu/amd.c index 4e95b2e0d95f..35a5d5dca2fa 100644 --- a/arch/x86/kernel/cpu/amd.c +++ b/arch/x86/kernel/cpu/amd.c @@ -5,6 +5,7 @@ #include #include +#include #include #include #include diff --git a/arch/x86/kernel/cpu/centaur.c b/arch/x86/kernel/cpu/centaur.c index 2c234a6d94c4..adc0ebd8bed0 100644 --- a/arch/x86/kernel/cpu/centaur.c +++ b/arch/x86/kernel/cpu/centaur.c @@ -1,5 +1,6 @@ #include +#include #include #include diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c index c64ca5929cb5..c7e2ca966386 100644 --- a/arch/x86/kernel/cpu/common.c +++ b/arch/x86/kernel/cpu/common.c @@ -8,6 +8,7 @@ #include #include #include +#include #include #include #include diff --git a/arch/x86/kernel/cpu/cyrix.c b/arch/x86/kernel/cpu/cyrix.c index 47416f959a48..0a3bc19de017 100644 --- a/arch/x86/kernel/cpu/cyrix.c +++ b/arch/x86/kernel/cpu/cyrix.c @@ -10,6 +10,7 @@ #include #include #include +#include #include "cpu.h" diff --git a/arch/x86/kernel/cpu/intel.c b/arch/x86/kernel/cpu/intel.c index 017ecd3bb553..fe0a615a051b 100644 --- a/arch/x86/kernel/cpu/intel.c +++ b/arch/x86/kernel/cpu/intel.c @@ -4,6 +4,7 @@ #include #include #include +#include #include #include #include diff --git a/arch/x86/kernel/cpu/transmeta.c b/arch/x86/kernel/cpu/transmeta.c index c1ea5b999839..8457b4978668 100644 --- a/arch/x86/kernel/cpu/transmeta.c +++ b/arch/x86/kernel/cpu/transmeta.c @@ -1,5 +1,6 @@ #include #include +#include #include #include #include diff --git a/arch/x86/kernel/kvmclock.c b/arch/x86/kernel/kvmclock.c index bae6ea6cfb94..d88967659098 100644 --- a/arch/x86/kernel/kvmclock.c +++ b/arch/x86/kernel/kvmclock.c @@ -25,6 +25,7 @@ #include #include #include +#include #include #include diff --git a/arch/x86/kernel/nmi.c b/arch/x86/kernel/nmi.c index bfe4d6c96fbd..d17b1a940add 100644 --- a/arch/x86/kernel/nmi.c +++ b/arch/x86/kernel/nmi.c @@ -20,6 +20,7 @@ #include #include #include +#include #if defined(CONFIG_EDAC) #include diff --git a/arch/x86/kernel/tsc.c b/arch/x86/kernel/tsc.c index 2724dc82f992..46bcda4cb1c2 100644 --- a/arch/x86/kernel/tsc.c +++ b/arch/x86/kernel/tsc.c @@ -2,6 +2,7 @@ #include #include +#include #include #include #include diff --git a/block/cfq-iosched.c b/block/cfq-iosched.c index 137944777859..440b95ee593c 100644 --- a/block/cfq-iosched.c +++ b/block/cfq-iosched.c @@ -8,6 +8,7 @@ */ #include #include +#include #include #include #include diff --git a/drivers/acpi/apei/ghes.c b/drivers/acpi/apei/ghes.c index e53bef6cf53c..b192b42a8351 100644 --- a/drivers/acpi/apei/ghes.c +++ b/drivers/acpi/apei/ghes.c @@ -44,6 +44,7 @@ #include #include #include +#include #include #include diff --git a/drivers/clocksource/arm_arch_timer.c b/drivers/clocksource/arm_arch_timer.c index 93aa1364376a..7a8a4117f123 100644 --- a/drivers/clocksource/arm_arch_timer.c +++ b/drivers/clocksource/arm_arch_timer.c @@ -24,6 +24,7 @@ #include #include #include +#include #include #include diff --git a/drivers/clocksource/pxa_timer.c b/drivers/clocksource/pxa_timer.c index 9cae38eebec2..1c24de215c14 100644 --- a/drivers/clocksource/pxa_timer.c +++ b/drivers/clocksource/pxa_timer.c @@ -19,6 +19,7 @@ #include #include #include +#include #include #include diff --git a/drivers/clocksource/timer-digicolor.c b/drivers/clocksource/timer-digicolor.c index 10318cc99c0e..e9f50d289362 100644 --- a/drivers/clocksource/timer-digicolor.c +++ b/drivers/clocksource/timer-digicolor.c @@ -31,6 +31,7 @@ #include #include #include +#include #include #include #include diff --git a/drivers/cpuidle/cpuidle.c b/drivers/cpuidle/cpuidle.c index 62810ff3b00f..548b90be7685 100644 --- a/drivers/cpuidle/cpuidle.c +++ b/drivers/cpuidle/cpuidle.c @@ -12,6 +12,7 @@ #include #include #include +#include #include #include #include diff --git a/drivers/firmware/tegra/bpmp.c b/drivers/firmware/tegra/bpmp.c index 4ff02d310868..84e4c9a58a0c 100644 --- a/drivers/firmware/tegra/bpmp.c +++ b/drivers/firmware/tegra/bpmp.c @@ -19,6 +19,7 @@ #include #include #include +#include #include #include diff --git a/drivers/gpu/drm/i915/i915_gem_request.c b/drivers/gpu/drm/i915/i915_gem_request.c index f31deeb72703..df3fef393dbe 100644 --- a/drivers/gpu/drm/i915/i915_gem_request.c +++ b/drivers/gpu/drm/i915/i915_gem_request.c @@ -24,6 +24,8 @@ #include #include +#include +#include #include "i915_drv.h" diff --git a/drivers/gpu/drm/i915/intel_drv.h b/drivers/gpu/drm/i915/intel_drv.h index b9cde116dab3..344f238b283f 100644 --- a/drivers/gpu/drm/i915/intel_drv.h +++ b/drivers/gpu/drm/i915/intel_drv.h @@ -28,6 +28,7 @@ #include #include #include +#include #include #include "i915_drv.h" #include diff --git a/drivers/md/bcache/bset.c b/drivers/md/bcache/bset.c index 646fe85261c1..18526d44688d 100644 --- a/drivers/md/bcache/bset.c +++ b/drivers/md/bcache/bset.c @@ -11,6 +11,7 @@ #include "bset.h" #include +#include #include #include diff --git a/drivers/md/bcache/btree.c b/drivers/md/bcache/btree.c index a43eedd5804d..384559af310e 100644 --- a/drivers/md/bcache/btree.c +++ b/drivers/md/bcache/btree.c @@ -32,6 +32,7 @@ #include #include #include +#include #include /* diff --git a/drivers/md/bcache/sysfs.c b/drivers/md/bcache/sysfs.c index b3ff57d61dde..f90f13616980 100644 --- a/drivers/md/bcache/sysfs.c +++ b/drivers/md/bcache/sysfs.c @@ -13,6 +13,7 @@ #include #include +#include static const char * const cache_replacement_policies[] = { "lru", diff --git a/drivers/md/bcache/util.c b/drivers/md/bcache/util.c index dde6172f3f10..8c3a938f4bf0 100644 --- a/drivers/md/bcache/util.c +++ b/drivers/md/bcache/util.c @@ -12,6 +12,7 @@ #include #include #include +#include #include "util.h" diff --git a/drivers/md/bcache/util.h b/drivers/md/bcache/util.h index cf2cbc211d83..a126919ed102 100644 --- a/drivers/md/bcache/util.h +++ b/drivers/md/bcache/util.h @@ -6,6 +6,7 @@ #include #include #include +#include #include #include #include diff --git a/drivers/md/bcache/writeback.c b/drivers/md/bcache/writeback.c index 69e1ae59cab8..6ac2e48b9235 100644 --- a/drivers/md/bcache/writeback.c +++ b/drivers/md/bcache/writeback.c @@ -13,6 +13,7 @@ #include #include +#include #include /* Rate limiting */ diff --git a/drivers/misc/cxl/native.c b/drivers/misc/cxl/native.c index 09505f432eda..7ae710585267 100644 --- a/drivers/misc/cxl/native.c +++ b/drivers/misc/cxl/native.c @@ -9,6 +9,7 @@ #include #include +#include #include #include #include diff --git a/drivers/net/irda/pxaficp_ir.c b/drivers/net/irda/pxaficp_ir.c index 6e8f616be48e..1dba16bc7f8d 100644 --- a/drivers/net/irda/pxaficp_ir.c +++ b/drivers/net/irda/pxaficp_ir.c @@ -24,6 +24,7 @@ #include #include #include +#include #include #include diff --git a/drivers/perf/arm_pmu.c b/drivers/perf/arm_pmu.c index 6d9335865880..9612b84bc3e0 100644 --- a/drivers/perf/arm_pmu.c +++ b/drivers/perf/arm_pmu.c @@ -20,6 +20,7 @@ #include #include #include +#include #include #include #include diff --git a/drivers/vhost/net.c b/drivers/vhost/net.c index 2fe35354f20e..5c98ad4d2f4c 100644 --- a/drivers/vhost/net.c +++ b/drivers/vhost/net.c @@ -17,6 +17,7 @@ #include #include #include +#include #include #include diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index aecca0e7d9ca..796016e63c1d 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -2,6 +2,7 @@ #define _LINUX_BLKDEV_H #include +#include #ifdef CONFIG_BLOCK diff --git a/include/linux/sched/clock.h b/include/linux/sched/clock.h new file mode 100644 index 000000000000..7cb7d79b2cf9 --- /dev/null +++ b/include/linux/sched/clock.h @@ -0,0 +1,6 @@ +#ifndef _LINUX_SCHED_CLOCK_H +#define _LINUX_SCHED_CLOCK_H + +#include + +#endif /* _LINUX_SCHED_CLOCK_H */ diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index 69ccd2636911..c776abd86937 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -34,6 +34,7 @@ #include #include #include +#include #include #include #include diff --git a/include/net/busy_poll.h b/include/net/busy_poll.h index b8d637225a07..b96832df239e 100644 --- a/include/net/busy_poll.h +++ b/include/net/busy_poll.h @@ -25,6 +25,7 @@ #define _LINUX_NET_BUSY_POLL_H #include +#include #include #ifdef CONFIG_NET_RX_BUSY_POLL diff --git a/kernel/events/core.c b/kernel/events/core.c index 1031bdf9f012..42fe16a84f97 100644 --- a/kernel/events/core.c +++ b/kernel/events/core.c @@ -46,6 +46,7 @@ #include #include #include +#include #include "internal.h" diff --git a/kernel/locking/lockdep.c b/kernel/locking/lockdep.c index 9812e5dd409e..cdafaff926ca 100644 --- a/kernel/locking/lockdep.c +++ b/kernel/locking/lockdep.c @@ -28,6 +28,7 @@ #define DISABLE_BRANCH_PROFILING #include #include +#include #include #include #include diff --git a/kernel/locking/qspinlock_stat.h b/kernel/locking/qspinlock_stat.h index e852be4851fc..4a30ef63c607 100644 --- a/kernel/locking/qspinlock_stat.h +++ b/kernel/locking/qspinlock_stat.h @@ -63,6 +63,7 @@ enum qlock_stats { */ #include #include +#include #include static const char * const qstat_names[qstat_num + 1] = { diff --git a/kernel/printk/printk.c b/kernel/printk/printk.c index 34da86e73d00..47050eedc206 100644 --- a/kernel/printk/printk.c +++ b/kernel/printk/printk.c @@ -45,6 +45,7 @@ #include #include #include +#include #include #include diff --git a/kernel/sched/clock.c b/kernel/sched/clock.c index ad64efe41722..dd7817cdbf58 100644 --- a/kernel/sched/clock.c +++ b/kernel/sched/clock.c @@ -58,6 +58,7 @@ #include #include #include +#include #include #include #include diff --git a/kernel/sched/core.c b/kernel/sched/core.c index 1bd317db9810..1a7fd3d21e5a 100644 --- a/kernel/sched/core.c +++ b/kernel/sched/core.c @@ -6,6 +6,7 @@ * Copyright (C) 1991-2002 Linus Torvalds */ #include +#include #include #include #include diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h index 683570739f46..4d386461f13a 100644 --- a/kernel/sched/sched.h +++ b/kernel/sched/sched.h @@ -3,6 +3,7 @@ #include #include #include +#include #include #include #include diff --git a/kernel/time/sched_clock.c b/kernel/time/sched_clock.c index a26036d37a38..ea6b610c4c57 100644 --- a/kernel/time/sched_clock.c +++ b/kernel/time/sched_clock.c @@ -13,6 +13,7 @@ #include #include #include +#include #include #include #include diff --git a/kernel/time/tick-sched.c b/kernel/time/tick-sched.c index 2c115fdab397..4fee1c3abd0b 100644 --- a/kernel/time/tick-sched.c +++ b/kernel/time/tick-sched.c @@ -19,6 +19,7 @@ #include #include #include +#include #include #include #include diff --git a/kernel/torture.c b/kernel/torture.c index 01a99976f072..55de96529287 100644 --- a/kernel/torture.c +++ b/kernel/torture.c @@ -30,6 +30,7 @@ #include #include #include +#include #include #include #include diff --git a/kernel/trace/ring_buffer.c b/kernel/trace/ring_buffer.c index a85739efcc30..96fc3c043ad6 100644 --- a/kernel/trace/ring_buffer.c +++ b/kernel/trace/ring_buffer.c @@ -6,6 +6,7 @@ #include #include #include +#include #include #include #include diff --git a/kernel/trace/trace_clock.c b/kernel/trace/trace_clock.c index 0f06532a755b..5fdc779f411d 100644 --- a/kernel/trace/trace_clock.c +++ b/kernel/trace/trace_clock.c @@ -18,6 +18,7 @@ #include #include #include +#include #include #include diff --git a/kernel/trace/trace_hwlat.c b/kernel/trace/trace_hwlat.c index edfacd954e1b..21ea6ae77d93 100644 --- a/kernel/trace/trace_hwlat.c +++ b/kernel/trace/trace_hwlat.c @@ -44,6 +44,7 @@ #include #include #include +#include #include "trace.h" static struct trace_array *hwlat_trace; diff --git a/kernel/trace/trace_output.c b/kernel/trace/trace_output.c index 070866c32eb9..107afca97649 100644 --- a/kernel/trace/trace_output.c +++ b/kernel/trace/trace_output.c @@ -8,6 +8,7 @@ #include #include #include +#include #include "trace_output.h" diff --git a/kernel/watchdog.c b/kernel/watchdog.c index 63177be0159e..144d7b1b0364 100644 --- a/kernel/watchdog.c +++ b/kernel/watchdog.c @@ -21,6 +21,7 @@ #include #include #include +#include #include #include diff --git a/lib/plist.c b/lib/plist.c index 3a30c53db061..199408f91057 100644 --- a/lib/plist.c +++ b/lib/plist.c @@ -175,6 +175,7 @@ void plist_requeue(struct plist_node *node, struct plist_head *head) #ifdef CONFIG_DEBUG_PI_LIST #include +#include #include #include diff --git a/net/ipv4/tcp_cdg.c b/net/ipv4/tcp_cdg.c index 35b280361cb2..50a0f3e51d5b 100644 --- a/net/ipv4/tcp_cdg.c +++ b/net/ipv4/tcp_cdg.c @@ -27,6 +27,8 @@ #include #include #include +#include + #include #define HYSTART_ACK_TRAIN 1 -- cgit From f361bf4a66c9bfabace46f6ff5d97005c9b524fe Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Fri, 3 Feb 2017 23:47:37 +0100 Subject: sched/headers: Prepare for the reduction of 's signal API dependency Instead of including the full , we are going to include the types-only header in , to further decouple the scheduler header from the signal headers. This means that various files which relied on the full need to be updated to gain an explicit dependency on it. Update the code that relies on sched.h's inclusion of the header. Acked-by: Linus Torvalds Cc: Mike Galbraith Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: linux-kernel@vger.kernel.org Signed-off-by: Ingo Molnar --- arch/arm64/kernel/sys_compat.c | 1 + drivers/gpu/drm/i915/i915_gem_request.c | 1 + drivers/isdn/mISDN/stack.c | 2 ++ fs/btrfs/extent-tree.c | 1 + fs/btrfs/free-space-cache.c | 1 + fs/buffer.c | 1 + fs/ceph/addr.c | 1 + fs/dax.c | 1 + fs/ioctl.c | 2 ++ fs/iomap.c | 2 ++ fs/ocfs2/super.c | 1 + include/linux/sched.h | 1 + include/linux/sched/signal.h | 1 + kernel/pid_namespace.c | 1 + mm/page-writeback.c | 1 + 15 files changed, 18 insertions(+) (limited to 'drivers/gpu/drm/i915/i915_gem_request.c') diff --git a/arch/arm64/kernel/sys_compat.c b/arch/arm64/kernel/sys_compat.c index abaf582fc7a8..8b8bbd3eaa52 100644 --- a/arch/arm64/kernel/sys_compat.c +++ b/arch/arm64/kernel/sys_compat.c @@ -21,6 +21,7 @@ #include #include #include +#include #include #include #include diff --git a/drivers/gpu/drm/i915/i915_gem_request.c b/drivers/gpu/drm/i915/i915_gem_request.c index df3fef393dbe..e7c3c0318ff6 100644 --- a/drivers/gpu/drm/i915/i915_gem_request.c +++ b/drivers/gpu/drm/i915/i915_gem_request.c @@ -26,6 +26,7 @@ #include #include #include +#include #include "i915_drv.h" diff --git a/drivers/isdn/mISDN/stack.c b/drivers/isdn/mISDN/stack.c index b324474c0c12..696f22fd5ab4 100644 --- a/drivers/isdn/mISDN/stack.c +++ b/drivers/isdn/mISDN/stack.c @@ -19,6 +19,8 @@ #include #include #include +#include + #include "core.h" static u_int *debug; diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index c35b96633554..dad395b0b9fc 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -16,6 +16,7 @@ * Boston, MA 021110-1307, USA. */ #include +#include #include #include #include diff --git a/fs/btrfs/free-space-cache.c b/fs/btrfs/free-space-cache.c index 1a131f7d6c1b..493a654b6012 100644 --- a/fs/btrfs/free-space-cache.c +++ b/fs/btrfs/free-space-cache.c @@ -18,6 +18,7 @@ #include #include +#include #include #include #include diff --git a/fs/buffer.c b/fs/buffer.c index 28484b3ebc98..9196f2a270da 100644 --- a/fs/buffer.c +++ b/fs/buffer.c @@ -19,6 +19,7 @@ */ #include +#include #include #include #include diff --git a/fs/ceph/addr.c b/fs/ceph/addr.c index f297a9e18642..1a3e1b40799a 100644 --- a/fs/ceph/addr.c +++ b/fs/ceph/addr.c @@ -8,6 +8,7 @@ #include #include #include +#include #include "super.h" #include "mds_client.h" diff --git a/fs/dax.c b/fs/dax.c index 7436c98b92c8..de622d4282a6 100644 --- a/fs/dax.c +++ b/fs/dax.c @@ -27,6 +27,7 @@ #include #include #include +#include #include #include #include diff --git a/fs/ioctl.c b/fs/ioctl.c index cb9b02940805..569db68d02b3 100644 --- a/fs/ioctl.c +++ b/fs/ioctl.c @@ -15,6 +15,8 @@ #include #include #include +#include + #include "internal.h" #include diff --git a/fs/iomap.c b/fs/iomap.c index 0f85f2410605..3ca1a8e44135 100644 --- a/fs/iomap.c +++ b/fs/iomap.c @@ -26,6 +26,8 @@ #include #include #include +#include + #include "internal.h" /* diff --git a/fs/ocfs2/super.c b/fs/ocfs2/super.c index a24e42f95341..ca1646fbcaef 100644 --- a/fs/ocfs2/super.c +++ b/fs/ocfs2/super.c @@ -42,6 +42,7 @@ #include #include #include +#include #define CREATE_TRACE_POINTS #include "ocfs2_trace.h" diff --git a/include/linux/sched.h b/include/linux/sched.h index 8ae7b3d85658..ea05116bc3c2 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -36,6 +36,7 @@ struct sched_param { #include #include #include +#include #include #include #include diff --git a/include/linux/sched/signal.h b/include/linux/sched/signal.h index 0f4e9f4a43fd..7e10a7824523 100644 --- a/include/linux/sched/signal.h +++ b/include/linux/sched/signal.h @@ -1,6 +1,7 @@ #ifndef _LINUX_SCHED_SIGNAL_H #define _LINUX_SCHED_SIGNAL_H +#include #include #include #include diff --git a/kernel/pid_namespace.c b/kernel/pid_namespace.c index 7c8367854dc4..de461aa0bf9a 100644 --- a/kernel/pid_namespace.c +++ b/kernel/pid_namespace.c @@ -20,6 +20,7 @@ #include #include #include +#include struct pid_cache { int nr_ids; diff --git a/mm/page-writeback.c b/mm/page-writeback.c index 26a60818a8fc..d8ac2a7fb9e7 100644 --- a/mm/page-writeback.c +++ b/mm/page-writeback.c @@ -36,6 +36,7 @@ #include #include #include +#include #include #include -- cgit