diff options
Diffstat (limited to 'drivers/gpu/drm/scheduler')
| -rw-r--r-- | drivers/gpu/drm/scheduler/gpu_scheduler_trace.h | 27 | ||||
| -rw-r--r-- | drivers/gpu/drm/scheduler/sched_entity.c | 56 | ||||
| -rw-r--r-- | drivers/gpu/drm/scheduler/sched_main.c | 90 | 
3 files changed, 108 insertions, 65 deletions
| diff --git a/drivers/gpu/drm/scheduler/gpu_scheduler_trace.h b/drivers/gpu/drm/scheduler/gpu_scheduler_trace.h index d79086498aff..877ce9b127f1 100644 --- a/drivers/gpu/drm/scheduler/gpu_scheduler_trace.h +++ b/drivers/gpu/drm/scheduler/gpu_scheduler_trace.h @@ -59,6 +59,33 @@ TRACE_EVENT(drm_sched_job,  		      __entry->job_count, __entry->hw_job_count)  ); +TRACE_EVENT(drm_run_job, +	    TP_PROTO(struct drm_sched_job *sched_job, struct drm_sched_entity *entity), +	    TP_ARGS(sched_job, entity), +	    TP_STRUCT__entry( +			     __field(struct drm_sched_entity *, entity) +			     __field(struct dma_fence *, fence) +			     __field(const char *, name) +			     __field(uint64_t, id) +			     __field(u32, job_count) +			     __field(int, hw_job_count) +			     ), + +	    TP_fast_assign( +			   __entry->entity = entity; +			   __entry->id = sched_job->id; +			   __entry->fence = &sched_job->s_fence->finished; +			   __entry->name = sched_job->sched->name; +			   __entry->job_count = spsc_queue_count(&entity->job_queue); +			   __entry->hw_job_count = atomic_read( +				   &sched_job->sched->hw_rq_count); +			   ), +	    TP_printk("entity=%p, id=%llu, fence=%p, ring=%s, job count:%u, hw job count:%d", +		      __entry->entity, __entry->id, +		      __entry->fence, __entry->name, +		      __entry->job_count, __entry->hw_job_count) +); +  TRACE_EVENT(drm_sched_process_job,  	    TP_PROTO(struct drm_sched_fence *fence),  	    TP_ARGS(fence), diff --git a/drivers/gpu/drm/scheduler/sched_entity.c b/drivers/gpu/drm/scheduler/sched_entity.c index 63bccd201b97..c803e14eed91 100644 --- a/drivers/gpu/drm/scheduler/sched_entity.c +++ b/drivers/gpu/drm/scheduler/sched_entity.c @@ -84,6 +84,24 @@ int drm_sched_entity_init(struct drm_sched_entity *entity,  EXPORT_SYMBOL(drm_sched_entity_init);  /** + * drm_sched_entity_modify_sched - Modify sched of an entity + * @entity: scheduler entity to init + * @sched_list: the list of new drm scheds which will replace + *		 existing entity->sched_list + * @num_sched_list: number of drm sched in sched_list + */ +void drm_sched_entity_modify_sched(struct drm_sched_entity *entity, +				    struct drm_gpu_scheduler **sched_list, +				    unsigned int num_sched_list) +{ +	WARN_ON(!num_sched_list || !sched_list); + +	entity->sched_list = sched_list; +	entity->num_sched_list = num_sched_list; +} +EXPORT_SYMBOL(drm_sched_entity_modify_sched); + +/**   * drm_sched_entity_is_idle - Check if entity is idle   *   * @entity: scheduler entity @@ -120,38 +138,6 @@ bool drm_sched_entity_is_ready(struct drm_sched_entity *entity)  }  /** - * drm_sched_entity_get_free_sched - Get the rq from rq_list with least load - * - * @entity: scheduler entity - * - * Return the pointer to the rq with least load. - */ -static struct drm_sched_rq * -drm_sched_entity_get_free_sched(struct drm_sched_entity *entity) -{ -	struct drm_sched_rq *rq = NULL; -	unsigned int min_score = UINT_MAX, num_score; -	int i; - -	for (i = 0; i < entity->num_sched_list; ++i) { -		struct drm_gpu_scheduler *sched = entity->sched_list[i]; - -		if (!entity->sched_list[i]->ready) { -			DRM_WARN("sched%s is not ready, skipping", sched->name); -			continue; -		} - -		num_score = atomic_read(&sched->score); -		if (num_score < min_score) { -			min_score = num_score; -			rq = &entity->sched_list[i]->sched_rq[entity->priority]; -		} -	} - -	return rq; -} - -/**   * drm_sched_entity_flush - Flush a context entity   *   * @entity: scheduler entity @@ -461,6 +447,7 @@ struct drm_sched_job *drm_sched_entity_pop_job(struct drm_sched_entity *entity)  void drm_sched_entity_select_rq(struct drm_sched_entity *entity)  {  	struct dma_fence *fence; +	struct drm_gpu_scheduler *sched;  	struct drm_sched_rq *rq;  	if (spsc_queue_count(&entity->job_queue) || entity->num_sched_list <= 1) @@ -471,7 +458,8 @@ void drm_sched_entity_select_rq(struct drm_sched_entity *entity)  		return;  	spin_lock(&entity->rq_lock); -	rq = drm_sched_entity_get_free_sched(entity); +	sched = drm_sched_pick_best(entity->sched_list, entity->num_sched_list); +	rq = sched ? &sched->sched_rq[entity->priority] : NULL;  	if (rq != entity->rq) {  		drm_sched_rq_remove_entity(entity->rq, entity);  		entity->rq = rq; @@ -498,7 +486,7 @@ void drm_sched_entity_push_job(struct drm_sched_job *sched_job,  	bool first;  	trace_drm_sched_job(sched_job, entity); -	atomic_inc(&entity->rq->sched->score); +	atomic_inc(&entity->rq->sched->num_jobs);  	WRITE_ONCE(entity->last_user, current->group_leader);  	first = spsc_queue_push(&entity->job_queue, &sched_job->queue_node); diff --git a/drivers/gpu/drm/scheduler/sched_main.c b/drivers/gpu/drm/scheduler/sched_main.c index 71ce6215956f..2f319102ae9f 100644 --- a/drivers/gpu/drm/scheduler/sched_main.c +++ b/drivers/gpu/drm/scheduler/sched_main.c @@ -92,7 +92,6 @@ void drm_sched_rq_add_entity(struct drm_sched_rq *rq,  	if (!list_empty(&entity->list))  		return;  	spin_lock(&rq->lock); -	atomic_inc(&rq->sched->score);  	list_add_tail(&entity->list, &rq->entities);  	spin_unlock(&rq->lock);  } @@ -111,7 +110,6 @@ void drm_sched_rq_remove_entity(struct drm_sched_rq *rq,  	if (list_empty(&entity->list))  		return;  	spin_lock(&rq->lock); -	atomic_dec(&rq->sched->score);  	list_del_init(&entity->list);  	if (rq->current_entity == entity)  		rq->current_entity = NULL; @@ -222,8 +220,7 @@ EXPORT_SYMBOL(drm_sched_fault);   *   * Suspend the delayed work timeout for the scheduler. This is done by   * modifying the delayed work timeout to an arbitrary large value, - * MAX_SCHEDULE_TIMEOUT in this case. Note that this function can be - * called from an IRQ context. + * MAX_SCHEDULE_TIMEOUT in this case.   *   * Returns the timeout remaining   * @@ -252,46 +249,41 @@ EXPORT_SYMBOL(drm_sched_suspend_timeout);   * @sched: scheduler instance for which to resume the timeout   * @remaining: remaining timeout   * - * Resume the delayed work timeout for the scheduler. Note that - * this function can be called from an IRQ context. + * Resume the delayed work timeout for the scheduler.   */  void drm_sched_resume_timeout(struct drm_gpu_scheduler *sched,  		unsigned long remaining)  { -	unsigned long flags; - -	spin_lock_irqsave(&sched->job_list_lock, flags); +	spin_lock(&sched->job_list_lock);  	if (list_empty(&sched->ring_mirror_list))  		cancel_delayed_work(&sched->work_tdr);  	else  		mod_delayed_work(system_wq, &sched->work_tdr, remaining); -	spin_unlock_irqrestore(&sched->job_list_lock, flags); +	spin_unlock(&sched->job_list_lock);  }  EXPORT_SYMBOL(drm_sched_resume_timeout);  static void drm_sched_job_begin(struct drm_sched_job *s_job)  {  	struct drm_gpu_scheduler *sched = s_job->sched; -	unsigned long flags; -	spin_lock_irqsave(&sched->job_list_lock, flags); +	spin_lock(&sched->job_list_lock);  	list_add_tail(&s_job->node, &sched->ring_mirror_list);  	drm_sched_start_timeout(sched); -	spin_unlock_irqrestore(&sched->job_list_lock, flags); +	spin_unlock(&sched->job_list_lock);  }  static void drm_sched_job_timedout(struct work_struct *work)  {  	struct drm_gpu_scheduler *sched;  	struct drm_sched_job *job; -	unsigned long flags;  	sched = container_of(work, struct drm_gpu_scheduler, work_tdr.work);  	/* Protects against concurrent deletion in drm_sched_get_cleanup_job */ -	spin_lock_irqsave(&sched->job_list_lock, flags); +	spin_lock(&sched->job_list_lock);  	job = list_first_entry_or_null(&sched->ring_mirror_list,  				       struct drm_sched_job, node); @@ -302,7 +294,7 @@ static void drm_sched_job_timedout(struct work_struct *work)  		 * is parked at which point it's safe.  		 */  		list_del_init(&job->node); -		spin_unlock_irqrestore(&sched->job_list_lock, flags); +		spin_unlock(&sched->job_list_lock);  		job->sched->ops->timedout_job(job); @@ -315,12 +307,12 @@ static void drm_sched_job_timedout(struct work_struct *work)  			sched->free_guilty = false;  		}  	} else { -		spin_unlock_irqrestore(&sched->job_list_lock, flags); +		spin_unlock(&sched->job_list_lock);  	} -	spin_lock_irqsave(&sched->job_list_lock, flags); +	spin_lock(&sched->job_list_lock);  	drm_sched_start_timeout(sched); -	spin_unlock_irqrestore(&sched->job_list_lock, flags); +	spin_unlock(&sched->job_list_lock);  }   /** @@ -383,7 +375,6 @@ EXPORT_SYMBOL(drm_sched_increase_karma);  void drm_sched_stop(struct drm_gpu_scheduler *sched, struct drm_sched_job *bad)  {  	struct drm_sched_job *s_job, *tmp; -	unsigned long flags;  	kthread_park(sched->thread); @@ -417,9 +408,9 @@ void drm_sched_stop(struct drm_gpu_scheduler *sched, struct drm_sched_job *bad)  			 * remove job from ring_mirror_list.  			 * Locking here is for concurrent resume timeout  			 */ -			spin_lock_irqsave(&sched->job_list_lock, flags); +			spin_lock(&sched->job_list_lock);  			list_del_init(&s_job->node); -			spin_unlock_irqrestore(&sched->job_list_lock, flags); +			spin_unlock(&sched->job_list_lock);  			/*  			 * Wait for job's HW fence callback to finish using s_job @@ -462,7 +453,6 @@ EXPORT_SYMBOL(drm_sched_stop);  void drm_sched_start(struct drm_gpu_scheduler *sched, bool full_recovery)  {  	struct drm_sched_job *s_job, *tmp; -	unsigned long flags;  	int r;  	/* @@ -491,9 +481,9 @@ void drm_sched_start(struct drm_gpu_scheduler *sched, bool full_recovery)  	}  	if (full_recovery) { -		spin_lock_irqsave(&sched->job_list_lock, flags); +		spin_lock(&sched->job_list_lock);  		drm_sched_start_timeout(sched); -		spin_unlock_irqrestore(&sched->job_list_lock, flags); +		spin_unlock(&sched->job_list_lock);  	}  	kthread_unpark(sched->thread); @@ -657,11 +647,13 @@ static void drm_sched_process_job(struct dma_fence *f, struct dma_fence_cb *cb)  	struct drm_gpu_scheduler *sched = s_fence->sched;  	atomic_dec(&sched->hw_rq_count); -	atomic_dec(&sched->score); +	atomic_dec(&sched->num_jobs);  	trace_drm_sched_process_job(s_fence); +	dma_fence_get(&s_fence->finished);  	drm_sched_fence_finished(s_fence); +	dma_fence_put(&s_fence->finished);  	wake_up_interruptible(&sched->wake_up_worker);  } @@ -677,7 +669,6 @@ static struct drm_sched_job *  drm_sched_get_cleanup_job(struct drm_gpu_scheduler *sched)  {  	struct drm_sched_job *job; -	unsigned long flags;  	/*  	 * Don't destroy jobs while the timeout worker is running  OR thread @@ -685,10 +676,10 @@ drm_sched_get_cleanup_job(struct drm_gpu_scheduler *sched)  	 */  	if ((sched->timeout != MAX_SCHEDULE_TIMEOUT &&  	    !cancel_delayed_work(&sched->work_tdr)) || -	    __kthread_should_park(sched->thread)) +	    kthread_should_park())  		return NULL; -	spin_lock_irqsave(&sched->job_list_lock, flags); +	spin_lock(&sched->job_list_lock);  	job = list_first_entry_or_null(&sched->ring_mirror_list,  				       struct drm_sched_job, node); @@ -702,12 +693,48 @@ drm_sched_get_cleanup_job(struct drm_gpu_scheduler *sched)  		drm_sched_start_timeout(sched);  	} -	spin_unlock_irqrestore(&sched->job_list_lock, flags); +	spin_unlock(&sched->job_list_lock);  	return job;  }  /** + * drm_sched_pick_best - Get a drm sched from a sched_list with the least load + * @sched_list: list of drm_gpu_schedulers + * @num_sched_list: number of drm_gpu_schedulers in the sched_list + * + * Returns pointer of the sched with the least load or NULL if none of the + * drm_gpu_schedulers are ready + */ +struct drm_gpu_scheduler * +drm_sched_pick_best(struct drm_gpu_scheduler **sched_list, +		     unsigned int num_sched_list) +{ +	struct drm_gpu_scheduler *sched, *picked_sched = NULL; +	int i; +	unsigned int min_jobs = UINT_MAX, num_jobs; + +	for (i = 0; i < num_sched_list; ++i) { +		sched = sched_list[i]; + +		if (!sched->ready) { +			DRM_WARN("scheduler %s is not ready, skipping", +				 sched->name); +			continue; +		} + +		num_jobs = atomic_read(&sched->num_jobs); +		if (num_jobs < min_jobs) { +			min_jobs = num_jobs; +			picked_sched = sched; +		} +	} + +	return picked_sched; +} +EXPORT_SYMBOL(drm_sched_pick_best); + +/**   * drm_sched_blocked - check if the scheduler is blocked   *   * @sched: scheduler instance @@ -773,6 +800,7 @@ static int drm_sched_main(void *param)  		atomic_inc(&sched->hw_rq_count);  		drm_sched_job_begin(sched_job); +		trace_drm_run_job(sched_job, entity);  		fence = sched->ops->run_job(sched_job);  		drm_sched_fence_scheduled(s_fence); @@ -832,7 +860,7 @@ int drm_sched_init(struct drm_gpu_scheduler *sched,  	spin_lock_init(&sched->job_list_lock);  	atomic_set(&sched->hw_rq_count, 0);  	INIT_DELAYED_WORK(&sched->work_tdr, drm_sched_job_timedout); -	atomic_set(&sched->score, 0); +	atomic_set(&sched->num_jobs, 0);  	atomic64_set(&sched->job_id_count, 0);  	/* Each scheduler will run on a seperate kernel thread */ |