diff options
-rw-r--r-- | drivers/gpu/drm/i915/i915_request.c | 59 | ||||
-rw-r--r-- | drivers/gpu/drm/i915/intel_color.c | 6 | ||||
-rw-r--r-- | drivers/gpu/drm/i915/intel_context.c | 1 | ||||
-rw-r--r-- | drivers/gpu/drm/i915/intel_context_types.h | 3 | ||||
-rw-r--r-- | drivers/gpu/drm/i915/intel_workarounds.c | 7 |
5 files changed, 68 insertions, 8 deletions
diff --git a/drivers/gpu/drm/i915/i915_request.c b/drivers/gpu/drm/i915/i915_request.c index b836721d3b13..ce342f7f7ddb 100644 --- a/drivers/gpu/drm/i915/i915_request.c +++ b/drivers/gpu/drm/i915/i915_request.c @@ -425,6 +425,26 @@ void __i915_request_submit(struct i915_request *request) if (i915_gem_context_is_banned(request->gem_context)) i915_request_skip(request, -EIO); + /* + * Are we using semaphores when the gpu is already saturated? + * + * Using semaphores incurs a cost in having the GPU poll a + * memory location, busywaiting for it to change. The continual + * memory reads can have a noticeable impact on the rest of the + * system with the extra bus traffic, stalling the cpu as it too + * tries to access memory across the bus (perf stat -e bus-cycles). + * + * If we installed a semaphore on this request and we only submit + * the request after the signaler completed, that indicates the + * system is overloaded and using semaphores at this time only + * increases the amount of work we are doing. If so, we disable + * further use of semaphores until we are idle again, whence we + * optimistically try again. + */ + if (request->sched.semaphores && + i915_sw_fence_signaled(&request->semaphore)) + request->hw_context->saturated |= request->sched.semaphores; + /* We may be recursing from the signal callback of another i915 fence */ spin_lock_nested(&request->lock, SINGLE_DEPTH_NESTING); @@ -799,6 +819,39 @@ err_unreserve: } static int +i915_request_await_start(struct i915_request *rq, struct i915_request *signal) +{ + if (list_is_first(&signal->ring_link, &signal->ring->request_list)) + return 0; + + signal = list_prev_entry(signal, ring_link); + if (i915_timeline_sync_is_later(rq->timeline, &signal->fence)) + return 0; + + return i915_sw_fence_await_dma_fence(&rq->submit, + &signal->fence, 0, + I915_FENCE_GFP); +} + +static intel_engine_mask_t +already_busywaiting(struct i915_request *rq) +{ + /* + * Polling a semaphore causes bus traffic, delaying other users of + * both the GPU and CPU. We want to limit the impact on others, + * while taking advantage of early submission to reduce GPU + * latency. Therefore we restrict ourselves to not using more + * than one semaphore from each source, and not using a semaphore + * if we have detected the engine is saturated (i.e. would not be + * submitted early and cause bus traffic reading an already passed + * semaphore). + * + * See the are-we-too-late? check in __i915_request_submit(). + */ + return rq->sched.semaphores | rq->hw_context->saturated; +} + +static int emit_semaphore_wait(struct i915_request *to, struct i915_request *from, gfp_t gfp) @@ -811,11 +864,15 @@ emit_semaphore_wait(struct i915_request *to, GEM_BUG_ON(INTEL_GEN(to->i915) < 8); /* Just emit the first semaphore we see as request space is limited. */ - if (to->sched.semaphores & from->engine->mask) + if (already_busywaiting(to) & from->engine->mask) return i915_sw_fence_await_dma_fence(&to->submit, &from->fence, 0, I915_FENCE_GFP); + err = i915_request_await_start(to, from); + if (err < 0) + return err; + err = i915_sw_fence_await_dma_fence(&to->semaphore, &from->fence, 0, I915_FENCE_GFP); diff --git a/drivers/gpu/drm/i915/intel_color.c b/drivers/gpu/drm/i915/intel_color.c index ca341a9e47e6..9093daabc290 100644 --- a/drivers/gpu/drm/i915/intel_color.c +++ b/drivers/gpu/drm/i915/intel_color.c @@ -173,13 +173,13 @@ static void icl_update_output_csc(struct intel_crtc *crtc, I915_WRITE(PIPE_CSC_OUTPUT_PREOFF_LO(pipe), preoff[2]); I915_WRITE(PIPE_CSC_OUTPUT_COEFF_RY_GY(pipe), coeff[0] << 16 | coeff[1]); - I915_WRITE(PIPE_CSC_OUTPUT_COEFF_BY(pipe), coeff[2]); + I915_WRITE(PIPE_CSC_OUTPUT_COEFF_BY(pipe), coeff[2] << 16); I915_WRITE(PIPE_CSC_OUTPUT_COEFF_RU_GU(pipe), coeff[3] << 16 | coeff[4]); - I915_WRITE(PIPE_CSC_OUTPUT_COEFF_BU(pipe), coeff[5]); + I915_WRITE(PIPE_CSC_OUTPUT_COEFF_BU(pipe), coeff[5] << 16); I915_WRITE(PIPE_CSC_OUTPUT_COEFF_RV_GV(pipe), coeff[6] << 16 | coeff[7]); - I915_WRITE(PIPE_CSC_OUTPUT_COEFF_BV(pipe), coeff[8]); + I915_WRITE(PIPE_CSC_OUTPUT_COEFF_BV(pipe), coeff[8] << 16); I915_WRITE(PIPE_CSC_OUTPUT_POSTOFF_HI(pipe), postoff[0]); I915_WRITE(PIPE_CSC_OUTPUT_POSTOFF_ME(pipe), postoff[1]); diff --git a/drivers/gpu/drm/i915/intel_context.c b/drivers/gpu/drm/i915/intel_context.c index 8931e0fee873..924cc556223a 100644 --- a/drivers/gpu/drm/i915/intel_context.c +++ b/drivers/gpu/drm/i915/intel_context.c @@ -230,6 +230,7 @@ intel_context_init(struct intel_context *ce, ce->gem_context = ctx; ce->engine = engine; ce->ops = engine->cops; + ce->saturated = 0; INIT_LIST_HEAD(&ce->signal_link); INIT_LIST_HEAD(&ce->signals); diff --git a/drivers/gpu/drm/i915/intel_context_types.h b/drivers/gpu/drm/i915/intel_context_types.h index 68b4ca1611e0..339c7437fe82 100644 --- a/drivers/gpu/drm/i915/intel_context_types.h +++ b/drivers/gpu/drm/i915/intel_context_types.h @@ -14,6 +14,7 @@ #include <linux/types.h> #include "i915_active_types.h" +#include "intel_engine_types.h" struct i915_gem_context; struct i915_vma; @@ -58,6 +59,8 @@ struct intel_context { atomic_t pin_count; struct mutex pin_mutex; /* guards pinning and associated on-gpuing */ + intel_engine_mask_t saturated; /* submitting semaphores too late? */ + /** * active_tracker: Active tracker for the external rq activity * on this intel_context object. diff --git a/drivers/gpu/drm/i915/intel_workarounds.c b/drivers/gpu/drm/i915/intel_workarounds.c index ccaf63679435..9682dd575152 100644 --- a/drivers/gpu/drm/i915/intel_workarounds.c +++ b/drivers/gpu/drm/i915/intel_workarounds.c @@ -541,10 +541,6 @@ static void icl_ctx_workarounds_init(struct intel_engine_cs *engine) WA_SET_BIT_MASKED(GEN7_ROW_CHICKEN2, GEN11_TDL_CLOCK_GATING_FIX_DISABLE); - /* WaEnableStateCacheRedirectToCS:icl */ - WA_SET_BIT_MASKED(GEN9_SLICE_COMMON_ECO_CHICKEN1, - GEN11_STATE_CACHE_REDIRECT_TO_CS); - /* Wa_2006665173:icl (pre-prod) */ if (IS_ICL_REVID(i915, ICL_REVID_A0, ICL_REVID_A0)) WA_SET_BIT_MASKED(GEN11_COMMON_SLICE_CHICKEN3, @@ -1050,6 +1046,9 @@ static void icl_whitelist_build(struct i915_wa_list *w) /* WaAllowUMDToModifySamplerMode:icl */ whitelist_reg(w, GEN10_SAMPLER_MODE); + + /* WaEnableStateCacheRedirectToCS:icl */ + whitelist_reg(w, GEN9_SLICE_COMMON_ECO_CHICKEN1); } void intel_engine_init_whitelist(struct intel_engine_cs *engine) |