diff options
Diffstat (limited to 'drivers/gpu/drm/i915/i915_request.c')
| -rw-r--r-- | drivers/gpu/drm/i915/i915_request.c | 329 | 
1 files changed, 179 insertions, 150 deletions
diff --git a/drivers/gpu/drm/i915/i915_request.c b/drivers/gpu/drm/i915/i915_request.c index 1c5506822dc7..765bec89fc0d 100644 --- a/drivers/gpu/drm/i915/i915_request.c +++ b/drivers/gpu/drm/i915/i915_request.c @@ -31,6 +31,8 @@  #include "gem/i915_gem_context.h"  #include "gt/intel_context.h" +#include "gt/intel_ring.h" +#include "gt/intel_rps.h"  #include "i915_active.h"  #include "i915_drv.h" @@ -169,16 +171,17 @@ remove_from_client(struct i915_request *request)  {  	struct drm_i915_file_private *file_priv; -	file_priv = READ_ONCE(request->file_priv); -	if (!file_priv) +	if (!READ_ONCE(request->file_priv))  		return; -	spin_lock(&file_priv->mm.lock); -	if (request->file_priv) { +	rcu_read_lock(); +	file_priv = xchg(&request->file_priv, NULL); +	if (file_priv) { +		spin_lock(&file_priv->mm.lock);  		list_del(&request->client_link); -		request->file_priv = NULL; +		spin_unlock(&file_priv->mm.lock);  	} -	spin_unlock(&file_priv->mm.lock); +	rcu_read_unlock();  }  static void free_capture_list(struct i915_request *request) @@ -205,21 +208,18 @@ static void remove_from_engine(struct i915_request *rq)  	 * check that the rq still belongs to the newly locked engine.  	 */  	locked = READ_ONCE(rq->engine); -	spin_lock(&locked->active.lock); +	spin_lock_irq(&locked->active.lock);  	while (unlikely(locked != (engine = READ_ONCE(rq->engine)))) {  		spin_unlock(&locked->active.lock);  		spin_lock(&engine->active.lock);  		locked = engine;  	}  	list_del(&rq->sched.link); -	spin_unlock(&locked->active.lock); +	spin_unlock_irq(&locked->active.lock);  } -static bool i915_request_retire(struct i915_request *rq) +bool i915_request_retire(struct i915_request *rq)  { -	struct i915_active_request *active, *next; - -	lockdep_assert_held(&rq->timeline->mutex);  	if (!i915_request_completed(rq))  		return false; @@ -240,41 +240,11 @@ static bool i915_request_retire(struct i915_request *rq)  	 * Note this requires that we are always called in request  	 * completion order.  	 */ -	GEM_BUG_ON(!list_is_first(&rq->link, &rq->timeline->requests)); +	GEM_BUG_ON(!list_is_first(&rq->link, +				  &i915_request_timeline(rq)->requests));  	rq->ring->head = rq->postfix;  	/* -	 * Walk through the active list, calling retire on each. This allows -	 * objects to track their GPU activity and mark themselves as idle -	 * when their *last* active request is completed (updating state -	 * tracking lists for eviction, active references for GEM, etc). -	 * -	 * As the ->retire() may free the node, we decouple it first and -	 * pass along the auxiliary information (to avoid dereferencing -	 * the node after the callback). -	 */ -	list_for_each_entry_safe(active, next, &rq->active_list, link) { -		/* -		 * In microbenchmarks or focusing upon time inside the kernel, -		 * we may spend an inordinate amount of time simply handling -		 * the retirement of requests and processing their callbacks. -		 * Of which, this loop itself is particularly hot due to the -		 * cache misses when jumping around the list of -		 * i915_active_request.  So we try to keep this loop as -		 * streamlined as possible and also prefetch the next -		 * i915_active_request to try and hide the likely cache miss. -		 */ -		prefetchw(next); - -		INIT_LIST_HEAD(&active->link); -		RCU_INIT_POINTER(active->request, NULL); - -		active->retire(active, rq); -	} - -	local_irq_disable(); - -	/*  	 * We only loosely track inflight requests across preemption,  	 * and so we may find ourselves attempting to retire a _completed_  	 * request that we have removed from the HW and put back on a run @@ -282,24 +252,22 @@ static bool i915_request_retire(struct i915_request *rq)  	 */  	remove_from_engine(rq); -	spin_lock(&rq->lock); +	spin_lock_irq(&rq->lock);  	i915_request_mark_complete(rq);  	if (!i915_request_signaled(rq))  		dma_fence_signal_locked(&rq->fence);  	if (test_bit(DMA_FENCE_FLAG_ENABLE_SIGNAL_BIT, &rq->fence.flags))  		i915_request_cancel_breadcrumb(rq);  	if (i915_request_has_waitboost(rq)) { -		GEM_BUG_ON(!atomic_read(&rq->i915->gt_pm.rps.num_waiters)); -		atomic_dec(&rq->i915->gt_pm.rps.num_waiters); +		GEM_BUG_ON(!atomic_read(&rq->engine->gt->rps.num_waiters)); +		atomic_dec(&rq->engine->gt->rps.num_waiters);  	}  	if (!test_bit(I915_FENCE_FLAG_ACTIVE, &rq->fence.flags)) {  		set_bit(I915_FENCE_FLAG_ACTIVE, &rq->fence.flags);  		__notify_execute_cb(rq);  	}  	GEM_BUG_ON(!list_empty(&rq->execute_cb)); -	spin_unlock(&rq->lock); - -	local_irq_enable(); +	spin_unlock_irq(&rq->lock);  	remove_from_client(rq);  	list_del(&rq->link); @@ -316,7 +284,7 @@ static bool i915_request_retire(struct i915_request *rq)  void i915_request_retire_upto(struct i915_request *rq)  { -	struct intel_timeline * const tl = rq->timeline; +	struct intel_timeline * const tl = i915_request_timeline(rq);  	struct i915_request *tmp;  	GEM_TRACE("%s fence %llx:%lld, current %d\n", @@ -324,7 +292,6 @@ void i915_request_retire_upto(struct i915_request *rq)  		  rq->fence.context, rq->fence.seqno,  		  hwsp_seqno(rq)); -	lockdep_assert_held(&tl->mutex);  	GEM_BUG_ON(!i915_request_completed(rq));  	do { @@ -333,11 +300,11 @@ void i915_request_retire_upto(struct i915_request *rq)  }  static int -__i915_request_await_execution(struct i915_request *rq, -			       struct i915_request *signal, -			       void (*hook)(struct i915_request *rq, -					    struct dma_fence *signal), -			       gfp_t gfp) +__await_execution(struct i915_request *rq, +		  struct i915_request *signal, +		  void (*hook)(struct i915_request *rq, +			       struct dma_fence *signal), +		  gfp_t gfp)  {  	struct execute_cb *cb; @@ -374,6 +341,8 @@ __i915_request_await_execution(struct i915_request *rq,  	}  	spin_unlock_irq(&signal->lock); +	/* Copy across semaphore status as we need the same behaviour */ +	rq->sched.flags |= signal->sched.flags;  	return 0;  } @@ -680,9 +649,12 @@ __i915_request_create(struct intel_context *ce, gfp_t gfp)  	rq->gem_context = ce->gem_context;  	rq->engine = ce->engine;  	rq->ring = ce->ring; -	rq->timeline = tl; +	rq->execution_mask = ce->engine->mask; + +	rcu_assign_pointer(rq->timeline, tl);  	rq->hwsp_seqno = tl->hwsp_seqno;  	rq->hwsp_cacheline = tl->hwsp_cacheline; +  	rq->rcustate = get_state_synchronize_rcu(); /* acts as smp_mb() */  	spin_lock_init(&rq->lock); @@ -700,9 +672,7 @@ __i915_request_create(struct intel_context *ce, gfp_t gfp)  	rq->batch = NULL;  	rq->capture_list = NULL;  	rq->flags = 0; -	rq->execution_mask = ALL_ENGINES; -	INIT_LIST_HEAD(&rq->active_list);  	INIT_LIST_HEAD(&rq->execute_cb);  	/* @@ -741,7 +711,6 @@ err_unwind:  	ce->ring->emit = rq->head;  	/* Make sure we didn't add ourselves to external state before freeing */ -	GEM_BUG_ON(!list_empty(&rq->active_list));  	GEM_BUG_ON(!list_empty(&rq->sched.signalers_list));  	GEM_BUG_ON(!list_empty(&rq->sched.waiters_list)); @@ -786,16 +755,43 @@ err_unlock:  static int  i915_request_await_start(struct i915_request *rq, struct i915_request *signal)  { -	if (list_is_first(&signal->link, &signal->timeline->requests)) -		return 0; +	struct intel_timeline *tl; +	struct dma_fence *fence; +	int err; + +	GEM_BUG_ON(i915_request_timeline(rq) == +		   rcu_access_pointer(signal->timeline)); -	signal = list_prev_entry(signal, link); -	if (intel_timeline_sync_is_later(rq->timeline, &signal->fence)) +	rcu_read_lock(); +	tl = rcu_dereference(signal->timeline); +	if (i915_request_started(signal) || !kref_get_unless_zero(&tl->kref)) +		tl = NULL; +	rcu_read_unlock(); +	if (!tl) /* already started or maybe even completed */  		return 0; -	return i915_sw_fence_await_dma_fence(&rq->submit, -					     &signal->fence, 0, -					     I915_FENCE_GFP); +	fence = ERR_PTR(-EBUSY); +	if (mutex_trylock(&tl->mutex)) { +		fence = NULL; +		if (!i915_request_started(signal) && +		    !list_is_first(&signal->link, &tl->requests)) { +			signal = list_prev_entry(signal, link); +			fence = dma_fence_get(&signal->fence); +		} +		mutex_unlock(&tl->mutex); +	} +	intel_timeline_put(tl); +	if (IS_ERR_OR_NULL(fence)) +		return PTR_ERR_OR_ZERO(fence); + +	err = 0; +	if (intel_timeline_sync_is_later(i915_request_timeline(rq), fence)) +		err = i915_sw_fence_await_dma_fence(&rq->submit, +						    fence, 0, +						    I915_FENCE_GFP); +	dma_fence_put(fence); + +	return err;  }  static intel_engine_mask_t @@ -817,38 +813,27 @@ already_busywaiting(struct i915_request *rq)  }  static int -emit_semaphore_wait(struct i915_request *to, -		    struct i915_request *from, -		    gfp_t gfp) +__emit_semaphore_wait(struct i915_request *to, +		      struct i915_request *from, +		      u32 seqno)  { +	const int has_token = INTEL_GEN(to->i915) >= 12;  	u32 hwsp_offset; +	int len, err;  	u32 *cs; -	int err; -	GEM_BUG_ON(!from->timeline->has_initial_breadcrumb);  	GEM_BUG_ON(INTEL_GEN(to->i915) < 8); -	/* Just emit the first semaphore we see as request space is limited. */ -	if (already_busywaiting(to) & from->engine->mask) -		return i915_sw_fence_await_dma_fence(&to->submit, -						     &from->fence, 0, -						     I915_FENCE_GFP); - -	err = i915_request_await_start(to, from); -	if (err < 0) -		return err; - -	/* Only submit our spinner after the signaler is running! */ -	err = __i915_request_await_execution(to, from, NULL, gfp); -	if (err) -		return err; -  	/* We need to pin the signaler's HWSP until we are finished reading. */  	err = intel_timeline_read_hwsp(from, to, &hwsp_offset);  	if (err)  		return err; -	cs = intel_ring_begin(to, 4); +	len = 4; +	if (has_token) +		len += 2; + +	cs = intel_ring_begin(to, len);  	if (IS_ERR(cs))  		return PTR_ERR(cs); @@ -860,18 +845,50 @@ emit_semaphore_wait(struct i915_request *to,  	 * (post-wrap) values than they were expecting (and so wait  	 * forever).  	 */ -	*cs++ = MI_SEMAPHORE_WAIT | -		MI_SEMAPHORE_GLOBAL_GTT | -		MI_SEMAPHORE_POLL | -		MI_SEMAPHORE_SAD_GTE_SDD; -	*cs++ = from->fence.seqno; +	*cs++ = (MI_SEMAPHORE_WAIT | +		 MI_SEMAPHORE_GLOBAL_GTT | +		 MI_SEMAPHORE_POLL | +		 MI_SEMAPHORE_SAD_GTE_SDD) + +		has_token; +	*cs++ = seqno;  	*cs++ = hwsp_offset;  	*cs++ = 0; +	if (has_token) { +		*cs++ = 0; +		*cs++ = MI_NOOP; +	}  	intel_ring_advance(to, cs); +	return 0; +} + +static int +emit_semaphore_wait(struct i915_request *to, +		    struct i915_request *from, +		    gfp_t gfp) +{ +	/* Just emit the first semaphore we see as request space is limited. */ +	if (already_busywaiting(to) & from->engine->mask) +		goto await_fence; + +	if (i915_request_await_start(to, from) < 0) +		goto await_fence; + +	/* Only submit our spinner after the signaler is running! */ +	if (__await_execution(to, from, NULL, gfp)) +		goto await_fence; + +	if (__emit_semaphore_wait(to, from, from->fence.seqno)) +		goto await_fence; +  	to->sched.semaphores |= from->engine->mask;  	to->sched.flags |= I915_SCHED_HAS_SEMAPHORE_CHAIN;  	return 0; + +await_fence: +	return i915_sw_fence_await_dma_fence(&to->submit, +					     &from->fence, 0, +					     I915_FENCE_GFP);  }  static int @@ -955,26 +972,79 @@ i915_request_await_dma_fence(struct i915_request *rq, struct dma_fence *fence)  		/* Squash repeated waits to the same timelines */  		if (fence->context && -		    intel_timeline_sync_is_later(rq->timeline, fence)) +		    intel_timeline_sync_is_later(i915_request_timeline(rq), +						 fence))  			continue;  		if (dma_fence_is_i915(fence))  			ret = i915_request_await_request(rq, to_request(fence));  		else  			ret = i915_sw_fence_await_dma_fence(&rq->submit, fence, -							    I915_FENCE_TIMEOUT, +							    fence->context ? I915_FENCE_TIMEOUT : 0,  							    I915_FENCE_GFP);  		if (ret < 0)  			return ret;  		/* Record the latest fence used against each timeline */  		if (fence->context) -			intel_timeline_sync_set(rq->timeline, fence); +			intel_timeline_sync_set(i915_request_timeline(rq), +						fence);  	} while (--nchild);  	return 0;  } +static bool intel_timeline_sync_has_start(struct intel_timeline *tl, +					  struct dma_fence *fence) +{ +	return __intel_timeline_sync_is_later(tl, +					      fence->context, +					      fence->seqno - 1); +} + +static int intel_timeline_sync_set_start(struct intel_timeline *tl, +					 const struct dma_fence *fence) +{ +	return __intel_timeline_sync_set(tl, fence->context, fence->seqno - 1); +} + +static int +__i915_request_await_execution(struct i915_request *to, +			       struct i915_request *from, +			       void (*hook)(struct i915_request *rq, +					    struct dma_fence *signal)) +{ +	int err; + +	/* Submit both requests at the same time */ +	err = __await_execution(to, from, hook, I915_FENCE_GFP); +	if (err) +		return err; + +	/* Squash repeated depenendices to the same timelines */ +	if (intel_timeline_sync_has_start(i915_request_timeline(to), +					  &from->fence)) +		return 0; + +	/* Ensure both start together [after all semaphores in signal] */ +	if (intel_engine_has_semaphores(to->engine)) +		err = __emit_semaphore_wait(to, from, from->fence.seqno - 1); +	else +		err = i915_request_await_start(to, from); +	if (err < 0) +		return err; + +	/* Couple the dependency tree for PI on this exposed to->fence */ +	if (to->engine->schedule) { +		err = i915_sched_node_add_dependency(&to->sched, &from->sched); +		if (err < 0) +			return err; +	} + +	return intel_timeline_sync_set_start(i915_request_timeline(to), +					     &from->fence); +} +  int  i915_request_await_execution(struct i915_request *rq,  			     struct dma_fence *fence, @@ -1008,8 +1078,7 @@ i915_request_await_execution(struct i915_request *rq,  		if (dma_fence_is_i915(fence))  			ret = __i915_request_await_execution(rq,  							     to_request(fence), -							     hook, -							     I915_FENCE_GFP); +							     hook);  		else  			ret = i915_sw_fence_await_dma_fence(&rq->submit, fence,  							    I915_FENCE_TIMEOUT, @@ -1111,7 +1180,7 @@ void i915_request_skip(struct i915_request *rq, int error)  static struct i915_request *  __i915_request_add_to_timeline(struct i915_request *rq)  { -	struct intel_timeline *timeline = rq->timeline; +	struct intel_timeline *timeline = i915_request_timeline(rq);  	struct i915_request *prev;  	/* @@ -1134,8 +1203,8 @@ __i915_request_add_to_timeline(struct i915_request *rq)  	 * precludes optimising to use semaphores serialisation of a single  	 * timeline across engines.  	 */ -	prev = rcu_dereference_protected(timeline->last_request.request, -					 lockdep_is_held(&timeline->mutex)); +	prev = to_request(__i915_active_fence_set(&timeline->last_request, +						  &rq->fence));  	if (prev && !i915_request_completed(prev)) {  		if (is_power_of_2(prev->engine->mask | rq->engine->mask))  			i915_sw_fence_await_sw_fence(&rq->submit, @@ -1160,7 +1229,6 @@ __i915_request_add_to_timeline(struct i915_request *rq)  	 * us, the timeline will hold its seqno which is later than ours.  	 */  	GEM_BUG_ON(timeline->seqno != rq->fence.seqno); -	__i915_active_request_set(&timeline->last_request, rq);  	return prev;  } @@ -1224,7 +1292,7 @@ void __i915_request_queue(struct i915_request *rq,  void i915_request_add(struct i915_request *rq)  {  	struct i915_sched_attr attr = rq->gem_context->sched; -	struct intel_timeline * const tl = rq->timeline; +	struct intel_timeline * const tl = i915_request_timeline(rq);  	struct i915_request *prev;  	lockdep_assert_held(&tl->mutex); @@ -1279,7 +1347,9 @@ void i915_request_add(struct i915_request *rq)  	 * work on behalf of others -- but instead we should benefit from  	 * improved resource management. (Well, that's the theory at least.)  	 */ -	if (prev && i915_request_completed(prev) && prev->timeline == tl) +	if (prev && +	    i915_request_completed(prev) && +	    rcu_access_pointer(prev->timeline) == tl)  		i915_request_retire_upto(prev);  	mutex_unlock(&tl->mutex); @@ -1442,7 +1512,7 @@ long i915_request_wait(struct i915_request *rq,  	 * completion. That requires having a good predictor for the request  	 * duration, which we currently lack.  	 */ -	if (CONFIG_DRM_I915_SPIN_REQUEST && +	if (IS_ACTIVE(CONFIG_DRM_I915_SPIN_REQUEST) &&  	    __i915_spin_request(rq, state, CONFIG_DRM_I915_SPIN_REQUEST)) {  		dma_fence_signal(&rq->fence);  		goto out; @@ -1462,7 +1532,7 @@ long i915_request_wait(struct i915_request *rq,  	 */  	if (flags & I915_WAIT_PRIORITY) {  		if (!i915_request_started(rq) && INTEL_GEN(rq->i915) >= 6) -			gen6_rps_boost(rq); +			intel_rps_boost(rq);  		i915_schedule_bump_priority(rq, I915_PRIORITY_WAIT);  	} @@ -1488,6 +1558,7 @@ long i915_request_wait(struct i915_request *rq,  			break;  		} +		intel_engine_flush_submission(rq->engine);  		timeout = io_schedule_timeout(timeout);  	}  	__set_current_state(TASK_RUNNING); @@ -1495,53 +1566,11 @@ long i915_request_wait(struct i915_request *rq,  	dma_fence_remove_callback(&rq->fence, &wait.cb);  out: -	mutex_release(&rq->engine->gt->reset.mutex.dep_map, 0, _THIS_IP_); +	mutex_release(&rq->engine->gt->reset.mutex.dep_map, _THIS_IP_);  	trace_i915_request_wait_end(rq);  	return timeout;  } -bool i915_retire_requests(struct drm_i915_private *i915) -{ -	struct intel_gt_timelines *timelines = &i915->gt.timelines; -	struct intel_timeline *tl, *tn; -	unsigned long flags; -	LIST_HEAD(free); - -	spin_lock_irqsave(&timelines->lock, flags); -	list_for_each_entry_safe(tl, tn, &timelines->active_list, link) { -		if (!mutex_trylock(&tl->mutex)) -			continue; - -		intel_timeline_get(tl); -		GEM_BUG_ON(!tl->active_count); -		tl->active_count++; /* pin the list element */ -		spin_unlock_irqrestore(&timelines->lock, flags); - -		retire_requests(tl); - -		spin_lock_irqsave(&timelines->lock, flags); - -		/* Resume iteration after dropping lock */ -		list_safe_reset_next(tl, tn, link); -		if (!--tl->active_count) -			list_del(&tl->link); - -		mutex_unlock(&tl->mutex); - -		/* Defer the final release to after the spinlock */ -		if (refcount_dec_and_test(&tl->kref.refcount)) { -			GEM_BUG_ON(tl->active_count); -			list_add(&tl->link, &free); -		} -	} -	spin_unlock_irqrestore(&timelines->lock, flags); - -	list_for_each_entry_safe(tl, tn, &free, link) -		__intel_timeline_free(&tl->kref); - -	return !list_empty(&timelines->active_list); -} -  #if IS_ENABLED(CONFIG_DRM_I915_SELFTEST)  #include "selftests/mock_request.c"  #include "selftests/i915_request.c"  |