aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--drivers/gpu/drm/i915/i915_request.c59
-rw-r--r--drivers/gpu/drm/i915/intel_color.c6
-rw-r--r--drivers/gpu/drm/i915/intel_context.c1
-rw-r--r--drivers/gpu/drm/i915/intel_context_types.h3
-rw-r--r--drivers/gpu/drm/i915/intel_workarounds.c7
5 files changed, 68 insertions, 8 deletions
diff --git a/drivers/gpu/drm/i915/i915_request.c b/drivers/gpu/drm/i915/i915_request.c
index b836721d3b13..ce342f7f7ddb 100644
--- a/drivers/gpu/drm/i915/i915_request.c
+++ b/drivers/gpu/drm/i915/i915_request.c
@@ -425,6 +425,26 @@ void __i915_request_submit(struct i915_request *request)
if (i915_gem_context_is_banned(request->gem_context))
i915_request_skip(request, -EIO);
+ /*
+ * Are we using semaphores when the gpu is already saturated?
+ *
+ * Using semaphores incurs a cost in having the GPU poll a
+ * memory location, busywaiting for it to change. The continual
+ * memory reads can have a noticeable impact on the rest of the
+ * system with the extra bus traffic, stalling the cpu as it too
+ * tries to access memory across the bus (perf stat -e bus-cycles).
+ *
+ * If we installed a semaphore on this request and we only submit
+ * the request after the signaler completed, that indicates the
+ * system is overloaded and using semaphores at this time only
+ * increases the amount of work we are doing. If so, we disable
+ * further use of semaphores until we are idle again, whence we
+ * optimistically try again.
+ */
+ if (request->sched.semaphores &&
+ i915_sw_fence_signaled(&request->semaphore))
+ request->hw_context->saturated |= request->sched.semaphores;
+
/* We may be recursing from the signal callback of another i915 fence */
spin_lock_nested(&request->lock, SINGLE_DEPTH_NESTING);
@@ -799,6 +819,39 @@ err_unreserve:
}
static int
+i915_request_await_start(struct i915_request *rq, struct i915_request *signal)
+{
+ if (list_is_first(&signal->ring_link, &signal->ring->request_list))
+ return 0;
+
+ signal = list_prev_entry(signal, ring_link);
+ if (i915_timeline_sync_is_later(rq->timeline, &signal->fence))
+ return 0;
+
+ return i915_sw_fence_await_dma_fence(&rq->submit,
+ &signal->fence, 0,
+ I915_FENCE_GFP);
+}
+
+static intel_engine_mask_t
+already_busywaiting(struct i915_request *rq)
+{
+ /*
+ * Polling a semaphore causes bus traffic, delaying other users of
+ * both the GPU and CPU. We want to limit the impact on others,
+ * while taking advantage of early submission to reduce GPU
+ * latency. Therefore we restrict ourselves to not using more
+ * than one semaphore from each source, and not using a semaphore
+ * if we have detected the engine is saturated (i.e. would not be
+ * submitted early and cause bus traffic reading an already passed
+ * semaphore).
+ *
+ * See the are-we-too-late? check in __i915_request_submit().
+ */
+ return rq->sched.semaphores | rq->hw_context->saturated;
+}
+
+static int
emit_semaphore_wait(struct i915_request *to,
struct i915_request *from,
gfp_t gfp)
@@ -811,11 +864,15 @@ emit_semaphore_wait(struct i915_request *to,
GEM_BUG_ON(INTEL_GEN(to->i915) < 8);
/* Just emit the first semaphore we see as request space is limited. */
- if (to->sched.semaphores & from->engine->mask)
+ if (already_busywaiting(to) & from->engine->mask)
return i915_sw_fence_await_dma_fence(&to->submit,
&from->fence, 0,
I915_FENCE_GFP);
+ err = i915_request_await_start(to, from);
+ if (err < 0)
+ return err;
+
err = i915_sw_fence_await_dma_fence(&to->semaphore,
&from->fence, 0,
I915_FENCE_GFP);
diff --git a/drivers/gpu/drm/i915/intel_color.c b/drivers/gpu/drm/i915/intel_color.c
index ca341a9e47e6..9093daabc290 100644
--- a/drivers/gpu/drm/i915/intel_color.c
+++ b/drivers/gpu/drm/i915/intel_color.c
@@ -173,13 +173,13 @@ static void icl_update_output_csc(struct intel_crtc *crtc,
I915_WRITE(PIPE_CSC_OUTPUT_PREOFF_LO(pipe), preoff[2]);
I915_WRITE(PIPE_CSC_OUTPUT_COEFF_RY_GY(pipe), coeff[0] << 16 | coeff[1]);
- I915_WRITE(PIPE_CSC_OUTPUT_COEFF_BY(pipe), coeff[2]);
+ I915_WRITE(PIPE_CSC_OUTPUT_COEFF_BY(pipe), coeff[2] << 16);
I915_WRITE(PIPE_CSC_OUTPUT_COEFF_RU_GU(pipe), coeff[3] << 16 | coeff[4]);
- I915_WRITE(PIPE_CSC_OUTPUT_COEFF_BU(pipe), coeff[5]);
+ I915_WRITE(PIPE_CSC_OUTPUT_COEFF_BU(pipe), coeff[5] << 16);
I915_WRITE(PIPE_CSC_OUTPUT_COEFF_RV_GV(pipe), coeff[6] << 16 | coeff[7]);
- I915_WRITE(PIPE_CSC_OUTPUT_COEFF_BV(pipe), coeff[8]);
+ I915_WRITE(PIPE_CSC_OUTPUT_COEFF_BV(pipe), coeff[8] << 16);
I915_WRITE(PIPE_CSC_OUTPUT_POSTOFF_HI(pipe), postoff[0]);
I915_WRITE(PIPE_CSC_OUTPUT_POSTOFF_ME(pipe), postoff[1]);
diff --git a/drivers/gpu/drm/i915/intel_context.c b/drivers/gpu/drm/i915/intel_context.c
index 8931e0fee873..924cc556223a 100644
--- a/drivers/gpu/drm/i915/intel_context.c
+++ b/drivers/gpu/drm/i915/intel_context.c
@@ -230,6 +230,7 @@ intel_context_init(struct intel_context *ce,
ce->gem_context = ctx;
ce->engine = engine;
ce->ops = engine->cops;
+ ce->saturated = 0;
INIT_LIST_HEAD(&ce->signal_link);
INIT_LIST_HEAD(&ce->signals);
diff --git a/drivers/gpu/drm/i915/intel_context_types.h b/drivers/gpu/drm/i915/intel_context_types.h
index 68b4ca1611e0..339c7437fe82 100644
--- a/drivers/gpu/drm/i915/intel_context_types.h
+++ b/drivers/gpu/drm/i915/intel_context_types.h
@@ -14,6 +14,7 @@
#include <linux/types.h>
#include "i915_active_types.h"
+#include "intel_engine_types.h"
struct i915_gem_context;
struct i915_vma;
@@ -58,6 +59,8 @@ struct intel_context {
atomic_t pin_count;
struct mutex pin_mutex; /* guards pinning and associated on-gpuing */
+ intel_engine_mask_t saturated; /* submitting semaphores too late? */
+
/**
* active_tracker: Active tracker for the external rq activity
* on this intel_context object.
diff --git a/drivers/gpu/drm/i915/intel_workarounds.c b/drivers/gpu/drm/i915/intel_workarounds.c
index ccaf63679435..9682dd575152 100644
--- a/drivers/gpu/drm/i915/intel_workarounds.c
+++ b/drivers/gpu/drm/i915/intel_workarounds.c
@@ -541,10 +541,6 @@ static void icl_ctx_workarounds_init(struct intel_engine_cs *engine)
WA_SET_BIT_MASKED(GEN7_ROW_CHICKEN2,
GEN11_TDL_CLOCK_GATING_FIX_DISABLE);
- /* WaEnableStateCacheRedirectToCS:icl */
- WA_SET_BIT_MASKED(GEN9_SLICE_COMMON_ECO_CHICKEN1,
- GEN11_STATE_CACHE_REDIRECT_TO_CS);
-
/* Wa_2006665173:icl (pre-prod) */
if (IS_ICL_REVID(i915, ICL_REVID_A0, ICL_REVID_A0))
WA_SET_BIT_MASKED(GEN11_COMMON_SLICE_CHICKEN3,
@@ -1050,6 +1046,9 @@ static void icl_whitelist_build(struct i915_wa_list *w)
/* WaAllowUMDToModifySamplerMode:icl */
whitelist_reg(w, GEN10_SAMPLER_MODE);
+
+ /* WaEnableStateCacheRedirectToCS:icl */
+ whitelist_reg(w, GEN9_SLICE_COMMON_ECO_CHICKEN1);
}
void intel_engine_init_whitelist(struct intel_engine_cs *engine)