From 0388aee766376edfd4638f203b6f6260831665f5 Mon Sep 17 00:00:00 2001
From: Leo Liu <leo.liu@amd.com>
Date: Fri, 8 Nov 2019 10:23:14 -0500
Subject: drm/amdgpu: use the JPEG structure for general driver support

JPEG1.0 will be functional along with VCN1.0

Signed-off-by: Leo Liu <leo.liu@amd.com>
Reviewed-by: Alex Deucher <alexander.deucher@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c')

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c
index 6614d8a6f4c8..8f2eea92d67c 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c
@@ -169,10 +169,10 @@ static int amdgpu_ctx_init(struct amdgpu_device *adev,
 			}
 			break;
 		case AMDGPU_HW_IP_VCN_JPEG:
-			for (j = 0; j < adev->vcn.num_vcn_inst; ++j) {
+			for (j = 0; j < adev->jpeg.num_jpeg_inst; ++j) {
 				if (adev->vcn.harvest_config & (1 << j))
 					continue;
-				rings[num_rings++] = &adev->vcn.inst[j].ring_jpeg;
+				rings[num_rings++] = &adev->jpeg.inst[j].ring_dec;
 			}
 			break;
 		}
-- 
cgit 


From 3504bd45a99eae374ae54327bf7a6ebf1c8de15a Mon Sep 17 00:00:00 2001
From: Leo Liu <leo.liu@amd.com>
Date: Mon, 9 Dec 2019 12:58:24 -0500
Subject: drm/amdgpu: fix JPEG instance checking when ctx init

Use proper structure.

Fixes: 0388aee766376ed ("drm/amdgpu: use the JPEG structure for general driver support")
Signed-off-by: Leo Liu <leo.liu@amd.com>
Reviewed-by: Alex Deucher <alexander.deucher@amd.com>
Reviewed-by: James Zhu <James.Zhu@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c')

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c
index 8f2eea92d67c..1d2bbf10614e 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c
@@ -170,7 +170,7 @@ static int amdgpu_ctx_init(struct amdgpu_device *adev,
 			break;
 		case AMDGPU_HW_IP_VCN_JPEG:
 			for (j = 0; j < adev->jpeg.num_jpeg_inst; ++j) {
-				if (adev->vcn.harvest_config & (1 << j))
+				if (adev->jpeg.harvest_config & (1 << j))
 					continue;
 				rings[num_rings++] = &adev->jpeg.inst[j].ring_dec;
 			}
-- 
cgit 


From b3ac17667f115e64c67ea6101fc814f47134b530 Mon Sep 17 00:00:00 2001
From: Nirmoy Das <nirmoy.das@amd.com>
Date: Thu, 5 Dec 2019 11:38:00 +0100
Subject: drm/scheduler: rework entity creation
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Entity currently keeps a copy of run_queue list and modify it in
drm_sched_entity_set_priority(). Entities shouldn't modify run_queue
list. Use drm_gpu_scheduler list instead of drm_sched_rq list
in drm_sched_entity struct. In this way we can select a runqueue based
on entity/ctx's priority for a  drm scheduler.

Signed-off-by: Nirmoy Das <nirmoy.das@amd.com>
Reviewed-by: Christian König <christian.koenig@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c  |  7 +--
 drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c  |  8 ++--
 drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c  |  7 +--
 drivers/gpu/drm/amd/amdgpu/amdgpu_vce.c  |  7 +--
 drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c   | 14 ++++--
 drivers/gpu/drm/etnaviv/etnaviv_drv.c    |  7 +--
 drivers/gpu/drm/lima/lima_sched.c        |  5 ++-
 drivers/gpu/drm/panfrost/panfrost_job.c  |  8 ++--
 drivers/gpu/drm/scheduler/sched_entity.c | 74 +++++++++++++-------------------
 drivers/gpu/drm/v3d/v3d_drv.c            |  8 ++--
 include/drm/gpu_scheduler.h              |  8 ++--
 11 files changed, 78 insertions(+), 75 deletions(-)

(limited to 'drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c')

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c
index 1d2bbf10614e..38ec5c919bd9 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c
@@ -122,7 +122,7 @@ static int amdgpu_ctx_init(struct amdgpu_device *adev,
 
 	for (i = 0; i < AMDGPU_HW_IP_NUM; ++i) {
 		struct amdgpu_ring *rings[AMDGPU_MAX_RINGS];
-		struct drm_sched_rq *rqs[AMDGPU_MAX_RINGS];
+		struct drm_gpu_scheduler *sched_list[AMDGPU_MAX_RINGS];
 		unsigned num_rings = 0;
 		unsigned num_rqs = 0;
 
@@ -181,12 +181,13 @@ static int amdgpu_ctx_init(struct amdgpu_device *adev,
 			if (!rings[j]->adev)
 				continue;
 
-			rqs[num_rqs++] = &rings[j]->sched.sched_rq[priority];
+			sched_list[num_rqs++] = &rings[j]->sched;
 		}
 
 		for (j = 0; j < amdgpu_ctx_num_entities[i]; ++j)
 			r = drm_sched_entity_init(&ctx->entities[i][j].entity,
-						  rqs, num_rqs, &ctx->guilty);
+						  priority, sched_list,
+						  num_rqs, &ctx->guilty);
 		if (r)
 			goto error_cleanup_entities;
 	}
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
index 445de594c214..fbf2961202ee 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
@@ -1987,11 +1987,13 @@ void amdgpu_ttm_set_buffer_funcs_status(struct amdgpu_device *adev, bool enable)
 
 	if (enable) {
 		struct amdgpu_ring *ring;
-		struct drm_sched_rq *rq;
+		struct drm_gpu_scheduler *sched;
 
 		ring = adev->mman.buffer_funcs_ring;
-		rq = &ring->sched.sched_rq[DRM_SCHED_PRIORITY_KERNEL];
-		r = drm_sched_entity_init(&adev->mman.entity, &rq, 1, NULL);
+		sched = &ring->sched;
+		r = drm_sched_entity_init(&adev->mman.entity,
+				          DRM_SCHED_PRIORITY_KERNEL, &sched,
+					  1, NULL);
 		if (r) {
 			DRM_ERROR("Failed setting up TTM BO move entity (%d)\n",
 				  r);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c
index d587ffe2af8e..a92f3b18e657 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c
@@ -330,12 +330,13 @@ int amdgpu_uvd_sw_fini(struct amdgpu_device *adev)
 int amdgpu_uvd_entity_init(struct amdgpu_device *adev)
 {
 	struct amdgpu_ring *ring;
-	struct drm_sched_rq *rq;
+	struct drm_gpu_scheduler *sched;
 	int r;
 
 	ring = &adev->uvd.inst[0].ring;
-	rq = &ring->sched.sched_rq[DRM_SCHED_PRIORITY_NORMAL];
-	r = drm_sched_entity_init(&adev->uvd.entity, &rq, 1, NULL);
+	sched = &ring->sched;
+	r = drm_sched_entity_init(&adev->uvd.entity, DRM_SCHED_PRIORITY_NORMAL,
+				  &sched, 1, NULL);
 	if (r) {
 		DRM_ERROR("Failed setting up UVD kernel entity.\n");
 		return r;
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vce.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vce.c
index 46b590af2fd2..ceb0dbf685f1 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vce.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vce.c
@@ -240,12 +240,13 @@ int amdgpu_vce_sw_fini(struct amdgpu_device *adev)
 int amdgpu_vce_entity_init(struct amdgpu_device *adev)
 {
 	struct amdgpu_ring *ring;
-	struct drm_sched_rq *rq;
+	struct drm_gpu_scheduler *sched;
 	int r;
 
 	ring = &adev->vce.ring[0];
-	rq = &ring->sched.sched_rq[DRM_SCHED_PRIORITY_NORMAL];
-	r = drm_sched_entity_init(&adev->vce.entity, &rq, 1, NULL);
+	sched = &ring->sched;
+	r = drm_sched_entity_init(&adev->vce.entity, DRM_SCHED_PRIORITY_NORMAL,
+				  &sched, 1, NULL);
 	if (r != 0) {
 		DRM_ERROR("Failed setting up VCE run queue.\n");
 		return r;
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
index 8f26504a59a7..a0be1d2f02dc 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
@@ -2740,6 +2740,7 @@ int amdgpu_vm_init(struct amdgpu_device *adev, struct amdgpu_vm *vm,
 {
 	struct amdgpu_bo_param bp;
 	struct amdgpu_bo *root;
+	struct drm_gpu_scheduler *sched_list[AMDGPU_MAX_RINGS];
 	int r, i;
 
 	vm->va = RB_ROOT_CACHED;
@@ -2753,14 +2754,19 @@ int amdgpu_vm_init(struct amdgpu_device *adev, struct amdgpu_vm *vm,
 	spin_lock_init(&vm->invalidated_lock);
 	INIT_LIST_HEAD(&vm->freed);
 
+	for (i = 0; i < adev->vm_manager.vm_pte_num_rqs; i++)
+		sched_list[i] = adev->vm_manager.vm_pte_rqs[i]->sched;
+
 	/* create scheduler entities for page table updates */
-	r = drm_sched_entity_init(&vm->direct, adev->vm_manager.vm_pte_rqs,
-				  adev->vm_manager.vm_pte_num_rqs, NULL);
+	r = drm_sched_entity_init(&vm->direct, DRM_SCHED_PRIORITY_NORMAL,
+				  sched_list, adev->vm_manager.vm_pte_num_rqs,
+				  NULL);
 	if (r)
 		return r;
 
-	r = drm_sched_entity_init(&vm->delayed, adev->vm_manager.vm_pte_rqs,
-				  adev->vm_manager.vm_pte_num_rqs, NULL);
+	r = drm_sched_entity_init(&vm->delayed, DRM_SCHED_PRIORITY_NORMAL,
+				  sched_list, adev->vm_manager.vm_pte_num_rqs,
+				  NULL);
 	if (r)
 		goto error_free_direct;
 
diff --git a/drivers/gpu/drm/etnaviv/etnaviv_drv.c b/drivers/gpu/drm/etnaviv/etnaviv_drv.c
index 1f9c01be40d7..76ecdf8bd31c 100644
--- a/drivers/gpu/drm/etnaviv/etnaviv_drv.c
+++ b/drivers/gpu/drm/etnaviv/etnaviv_drv.c
@@ -65,12 +65,13 @@ static int etnaviv_open(struct drm_device *dev, struct drm_file *file)
 
 	for (i = 0; i < ETNA_MAX_PIPES; i++) {
 		struct etnaviv_gpu *gpu = priv->gpu[i];
-		struct drm_sched_rq *rq;
+		struct drm_gpu_scheduler *sched;
 
 		if (gpu) {
-			rq = &gpu->sched.sched_rq[DRM_SCHED_PRIORITY_NORMAL];
+			sched = &gpu->sched;
 			drm_sched_entity_init(&ctx->sched_entity[i],
-					      &rq, 1, NULL);
+					      DRM_SCHED_PRIORITY_NORMAL, &sched,
+					      1, NULL);
 			}
 	}
 
diff --git a/drivers/gpu/drm/lima/lima_sched.c b/drivers/gpu/drm/lima/lima_sched.c
index f522c5f99729..fc8362e4149b 100644
--- a/drivers/gpu/drm/lima/lima_sched.c
+++ b/drivers/gpu/drm/lima/lima_sched.c
@@ -159,9 +159,10 @@ int lima_sched_context_init(struct lima_sched_pipe *pipe,
 			    struct lima_sched_context *context,
 			    atomic_t *guilty)
 {
-	struct drm_sched_rq *rq = pipe->base.sched_rq + DRM_SCHED_PRIORITY_NORMAL;
+	struct drm_gpu_scheduler *sched = &pipe->base;
 
-	return drm_sched_entity_init(&context->base, &rq, 1, guilty);
+	return drm_sched_entity_init(&context->base, DRM_SCHED_PRIORITY_NORMAL,
+				     &sched, 1, guilty);
 }
 
 void lima_sched_context_fini(struct lima_sched_pipe *pipe,
diff --git a/drivers/gpu/drm/panfrost/panfrost_job.c b/drivers/gpu/drm/panfrost/panfrost_job.c
index d411eb6c8eb9..a9ed088ebf08 100644
--- a/drivers/gpu/drm/panfrost/panfrost_job.c
+++ b/drivers/gpu/drm/panfrost/panfrost_job.c
@@ -542,12 +542,14 @@ int panfrost_job_open(struct panfrost_file_priv *panfrost_priv)
 {
 	struct panfrost_device *pfdev = panfrost_priv->pfdev;
 	struct panfrost_job_slot *js = pfdev->js;
-	struct drm_sched_rq *rq;
+	struct drm_gpu_scheduler *sched;
 	int ret, i;
 
 	for (i = 0; i < NUM_JOB_SLOTS; i++) {
-		rq = &js->queue[i].sched.sched_rq[DRM_SCHED_PRIORITY_NORMAL];
-		ret = drm_sched_entity_init(&panfrost_priv->sched_entity[i], &rq, 1, NULL);
+		sched = &js->queue[i].sched;
+		ret = drm_sched_entity_init(&panfrost_priv->sched_entity[i],
+					    DRM_SCHED_PRIORITY_NORMAL, &sched,
+					    1, NULL);
 		if (WARN_ON(ret))
 			return ret;
 	}
diff --git a/drivers/gpu/drm/scheduler/sched_entity.c b/drivers/gpu/drm/scheduler/sched_entity.c
index 461a7a8129f4..f9b6ce29c58f 100644
--- a/drivers/gpu/drm/scheduler/sched_entity.c
+++ b/drivers/gpu/drm/scheduler/sched_entity.c
@@ -38,9 +38,10 @@
  * submit to HW ring.
  *
  * @entity: scheduler entity to init
- * @rq_list: the list of run queue on which jobs from this
+ * @priority: priority of the entity
+ * @sched_list: the list of drm scheds on which jobs from this
  *           entity can be submitted
- * @num_rq_list: number of run queue in rq_list
+ * @num_sched_list: number of drm sched in sched_list
  * @guilty: atomic_t set to 1 when a job on this queue
  *          is found to be guilty causing a timeout
  *
@@ -50,32 +51,35 @@
  * Returns 0 on success or a negative error code on failure.
  */
 int drm_sched_entity_init(struct drm_sched_entity *entity,
-			  struct drm_sched_rq **rq_list,
-			  unsigned int num_rq_list,
+			  enum drm_sched_priority priority,
+			  struct drm_gpu_scheduler **sched_list,
+			  unsigned int num_sched_list,
 			  atomic_t *guilty)
 {
 	int i;
 
-	if (!(entity && rq_list && (num_rq_list == 0 || rq_list[0])))
+	if (!(entity && sched_list && (num_sched_list == 0 || sched_list[0])))
 		return -EINVAL;
 
 	memset(entity, 0, sizeof(struct drm_sched_entity));
 	INIT_LIST_HEAD(&entity->list);
 	entity->rq = NULL;
 	entity->guilty = guilty;
-	entity->num_rq_list = num_rq_list;
-	entity->rq_list = kcalloc(num_rq_list, sizeof(struct drm_sched_rq *),
-				GFP_KERNEL);
-	if (!entity->rq_list)
+	entity->num_sched_list = num_sched_list;
+	entity->priority = priority;
+	entity->sched_list =  kcalloc(num_sched_list,
+				      sizeof(struct drm_gpu_scheduler *), GFP_KERNEL);
+
+	if(!entity->sched_list)
 		return -ENOMEM;
 
 	init_completion(&entity->entity_idle);
 
-	for (i = 0; i < num_rq_list; ++i)
-		entity->rq_list[i] = rq_list[i];
+	for (i = 0; i < num_sched_list; i++)
+		entity->sched_list[i] = sched_list[i];
 
-	if (num_rq_list)
-		entity->rq = rq_list[0];
+	if (num_sched_list)
+		entity->rq = &entity->sched_list[0]->sched_rq[entity->priority];
 
 	entity->last_scheduled = NULL;
 
@@ -139,10 +143,10 @@ drm_sched_entity_get_free_sched(struct drm_sched_entity *entity)
 	unsigned int min_jobs = UINT_MAX, num_jobs;
 	int i;
 
-	for (i = 0; i < entity->num_rq_list; ++i) {
-		struct drm_gpu_scheduler *sched = entity->rq_list[i]->sched;
+	for (i = 0; i < entity->num_sched_list; ++i) {
+		struct drm_gpu_scheduler *sched = entity->sched_list[i];
 
-		if (!entity->rq_list[i]->sched->ready) {
+		if (!entity->sched_list[i]->ready) {
 			DRM_WARN("sched%s is not ready, skipping", sched->name);
 			continue;
 		}
@@ -150,7 +154,7 @@ drm_sched_entity_get_free_sched(struct drm_sched_entity *entity)
 		num_jobs = atomic_read(&sched->num_jobs);
 		if (num_jobs < min_jobs) {
 			min_jobs = num_jobs;
-			rq = entity->rq_list[i];
+			rq = &entity->sched_list[i]->sched_rq[entity->priority];
 		}
 	}
 
@@ -308,7 +312,7 @@ void drm_sched_entity_fini(struct drm_sched_entity *entity)
 
 	dma_fence_put(entity->last_scheduled);
 	entity->last_scheduled = NULL;
-	kfree(entity->rq_list);
+	kfree(entity->sched_list);
 }
 EXPORT_SYMBOL(drm_sched_entity_fini);
 
@@ -353,15 +357,6 @@ static void drm_sched_entity_wakeup(struct dma_fence *f,
 	drm_sched_wakeup(entity->rq->sched);
 }
 
-/**
- * drm_sched_entity_set_rq_priority - helper for drm_sched_entity_set_priority
- */
-static void drm_sched_entity_set_rq_priority(struct drm_sched_rq **rq,
-					     enum drm_sched_priority priority)
-{
-	*rq = &(*rq)->sched->sched_rq[priority];
-}
-
 /**
  * drm_sched_entity_set_priority - Sets priority of the entity
  *
@@ -373,19 +368,8 @@ static void drm_sched_entity_set_rq_priority(struct drm_sched_rq **rq,
 void drm_sched_entity_set_priority(struct drm_sched_entity *entity,
 				   enum drm_sched_priority priority)
 {
-	unsigned int i;
-
 	spin_lock(&entity->rq_lock);
-
-	for (i = 0; i < entity->num_rq_list; ++i)
-		drm_sched_entity_set_rq_priority(&entity->rq_list[i], priority);
-
-	if (entity->rq) {
-		drm_sched_rq_remove_entity(entity->rq, entity);
-		drm_sched_entity_set_rq_priority(&entity->rq, priority);
-		drm_sched_rq_add_entity(entity->rq, entity);
-	}
-
+	entity->priority = priority;
 	spin_unlock(&entity->rq_lock);
 }
 EXPORT_SYMBOL(drm_sched_entity_set_priority);
@@ -490,20 +474,20 @@ void drm_sched_entity_select_rq(struct drm_sched_entity *entity)
 	struct dma_fence *fence;
 	struct drm_sched_rq *rq;
 
-	if (spsc_queue_count(&entity->job_queue) || entity->num_rq_list <= 1)
+	if (spsc_queue_count(&entity->job_queue) || entity->num_sched_list <= 1)
 		return;
 
 	fence = READ_ONCE(entity->last_scheduled);
 	if (fence && !dma_fence_is_signaled(fence))
 		return;
 
+	spin_lock(&entity->rq_lock);
 	rq = drm_sched_entity_get_free_sched(entity);
-	if (rq == entity->rq)
-		return;
+	if (rq != entity->rq) {
+		drm_sched_rq_remove_entity(entity->rq, entity);
+		entity->rq = rq;
+	}
 
-	spin_lock(&entity->rq_lock);
-	drm_sched_rq_remove_entity(entity->rq, entity);
-	entity->rq = rq;
 	spin_unlock(&entity->rq_lock);
 }
 
diff --git a/drivers/gpu/drm/v3d/v3d_drv.c b/drivers/gpu/drm/v3d/v3d_drv.c
index 1a07462b4528..eaa8e9682373 100644
--- a/drivers/gpu/drm/v3d/v3d_drv.c
+++ b/drivers/gpu/drm/v3d/v3d_drv.c
@@ -140,7 +140,7 @@ v3d_open(struct drm_device *dev, struct drm_file *file)
 {
 	struct v3d_dev *v3d = to_v3d_dev(dev);
 	struct v3d_file_priv *v3d_priv;
-	struct drm_sched_rq *rq;
+	struct drm_gpu_scheduler *sched;
 	int i;
 
 	v3d_priv = kzalloc(sizeof(*v3d_priv), GFP_KERNEL);
@@ -150,8 +150,10 @@ v3d_open(struct drm_device *dev, struct drm_file *file)
 	v3d_priv->v3d = v3d;
 
 	for (i = 0; i < V3D_MAX_QUEUES; i++) {
-		rq = &v3d->queue[i].sched.sched_rq[DRM_SCHED_PRIORITY_NORMAL];
-		drm_sched_entity_init(&v3d_priv->sched_entity[i], &rq, 1, NULL);
+		sched = &v3d->queue[i].sched;
+		drm_sched_entity_init(&v3d_priv->sched_entity[i],
+				      DRM_SCHED_PRIORITY_NORMAL, &sched,
+				      1, NULL);
 	}
 
 	file->driver_priv = v3d_priv;
diff --git a/include/drm/gpu_scheduler.h b/include/drm/gpu_scheduler.h
index 684692a8ed76..96a1a1b7526e 100644
--- a/include/drm/gpu_scheduler.h
+++ b/include/drm/gpu_scheduler.h
@@ -81,8 +81,9 @@ enum drm_sched_priority {
 struct drm_sched_entity {
 	struct list_head		list;
 	struct drm_sched_rq		*rq;
-	struct drm_sched_rq		**rq_list;
-	unsigned int                    num_rq_list;
+	unsigned int                    num_sched_list;
+	struct drm_gpu_scheduler        **sched_list;
+	enum drm_sched_priority         priority;
 	spinlock_t			rq_lock;
 
 	struct spsc_queue		job_queue;
@@ -312,7 +313,8 @@ void drm_sched_rq_remove_entity(struct drm_sched_rq *rq,
 				struct drm_sched_entity *entity);
 
 int drm_sched_entity_init(struct drm_sched_entity *entity,
-			  struct drm_sched_rq **rq_list,
+			  enum drm_sched_priority priority,
+			  struct drm_gpu_scheduler **sched_list,
 			  unsigned int num_rq_list,
 			  atomic_t *guilty);
 long drm_sched_entity_flush(struct drm_sched_entity *entity, long timeout);
-- 
cgit 


From f880799d7fcf0a63ca2295d950cd12f5520251d9 Mon Sep 17 00:00:00 2001
From: Nirmoy Das <nirmoy.das@amd.com>
Date: Mon, 16 Dec 2019 14:43:34 +0100
Subject: amd/amdgpu: add sched array to IPs with multiple run-queues
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

This sched array can be passed on to entity creation routine
instead of manually creating such sched array on every context creation.

v2: squash in missing break fix

Signed-off-by: Nirmoy Das <nirmoy.das@amd.com>
Reviewed-by: Christian König <christian.koenig@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c    | 113 ++++++++++++++++++-----------
 drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.h    |   3 +
 drivers/gpu/drm/amd/amdgpu/amdgpu_device.c |   2 +
 drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h    |   4 +
 drivers/gpu/drm/amd/amdgpu/amdgpu_jpeg.h   |   2 +
 drivers/gpu/drm/amd/amdgpu/amdgpu_sdma.h   |   2 +
 drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.h    |   9 ++-
 7 files changed, 89 insertions(+), 46 deletions(-)

(limited to 'drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c')

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c
index 38ec5c919bd9..63f6365312d5 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c
@@ -74,7 +74,7 @@ static int amdgpu_ctx_init(struct amdgpu_device *adev,
 			   struct amdgpu_ctx *ctx)
 {
 	unsigned num_entities = amdgpu_ctx_total_num_entities();
-	unsigned i, j, k;
+	unsigned i, j;
 	int r;
 
 	if (priority < 0 || priority >= DRM_SCHED_PRIORITY_MAX)
@@ -121,73 +121,56 @@ static int amdgpu_ctx_init(struct amdgpu_device *adev,
 	ctx->override_priority = DRM_SCHED_PRIORITY_UNSET;
 
 	for (i = 0; i < AMDGPU_HW_IP_NUM; ++i) {
-		struct amdgpu_ring *rings[AMDGPU_MAX_RINGS];
-		struct drm_gpu_scheduler *sched_list[AMDGPU_MAX_RINGS];
-		unsigned num_rings = 0;
-		unsigned num_rqs = 0;
+		struct drm_gpu_scheduler **scheds;
+		struct drm_gpu_scheduler *sched;
+		unsigned num_scheds = 0;
 
 		switch (i) {
 		case AMDGPU_HW_IP_GFX:
-			rings[0] = &adev->gfx.gfx_ring[0];
-			num_rings = 1;
+			scheds = adev->gfx.gfx_sched;
+			num_scheds = 1;
 			break;
 		case AMDGPU_HW_IP_COMPUTE:
-			for (j = 0; j < adev->gfx.num_compute_rings; ++j)
-				rings[j] = &adev->gfx.compute_ring[j];
-			num_rings = adev->gfx.num_compute_rings;
+			scheds = adev->gfx.compute_sched;
+			num_scheds = adev->gfx.num_compute_sched;
 			break;
 		case AMDGPU_HW_IP_DMA:
-			for (j = 0; j < adev->sdma.num_instances; ++j)
-				rings[j] = &adev->sdma.instance[j].ring;
-			num_rings = adev->sdma.num_instances;
+			scheds = adev->sdma.sdma_sched;
+			num_scheds = adev->sdma.num_sdma_sched;
 			break;
 		case AMDGPU_HW_IP_UVD:
-			rings[0] = &adev->uvd.inst[0].ring;
-			num_rings = 1;
+			sched = &adev->uvd.inst[0].ring.sched;
+			scheds = &sched;
+			num_scheds = 1;
 			break;
 		case AMDGPU_HW_IP_VCE:
-			rings[0] = &adev->vce.ring[0];
-			num_rings = 1;
+			sched = &adev->vce.ring[0].sched;
+			scheds = &sched;
+			num_scheds = 1;
 			break;
 		case AMDGPU_HW_IP_UVD_ENC:
-			rings[0] = &adev->uvd.inst[0].ring_enc[0];
-			num_rings = 1;
+			sched = &adev->uvd.inst[0].ring_enc[0].sched;
+			scheds = &sched;
+			num_scheds = 1;
 			break;
 		case AMDGPU_HW_IP_VCN_DEC:
-			for (j = 0; j < adev->vcn.num_vcn_inst; ++j) {
-				if (adev->vcn.harvest_config & (1 << j))
-					continue;
-				rings[num_rings++] = &adev->vcn.inst[j].ring_dec;
-			}
+			scheds = adev->vcn.vcn_dec_sched;
+			num_scheds =  adev->vcn.num_vcn_dec_sched;
 			break;
 		case AMDGPU_HW_IP_VCN_ENC:
-			for (j = 0; j < adev->vcn.num_vcn_inst; ++j) {
-				if (adev->vcn.harvest_config & (1 << j))
-					continue;
-				for (k = 0; k < adev->vcn.num_enc_rings; ++k)
-					rings[num_rings++] = &adev->vcn.inst[j].ring_enc[k];
-			}
+			scheds = adev->vcn.vcn_enc_sched;
+			num_scheds =  adev->vcn.num_vcn_enc_sched;
 			break;
 		case AMDGPU_HW_IP_VCN_JPEG:
-			for (j = 0; j < adev->jpeg.num_jpeg_inst; ++j) {
-				if (adev->jpeg.harvest_config & (1 << j))
-					continue;
-				rings[num_rings++] = &adev->jpeg.inst[j].ring_dec;
-			}
+			scheds = adev->jpeg.jpeg_sched;
+			num_scheds =  adev->jpeg.num_jpeg_sched;
 			break;
 		}
 
-		for (j = 0; j < num_rings; ++j) {
-			if (!rings[j]->adev)
-				continue;
-
-			sched_list[num_rqs++] = &rings[j]->sched;
-		}
-
 		for (j = 0; j < amdgpu_ctx_num_entities[i]; ++j)
 			r = drm_sched_entity_init(&ctx->entities[i][j].entity,
-						  priority, sched_list,
-						  num_rqs, &ctx->guilty);
+						  priority, scheds,
+						  num_scheds, &ctx->guilty);
 		if (r)
 			goto error_cleanup_entities;
 	}
@@ -628,3 +611,45 @@ void amdgpu_ctx_mgr_fini(struct amdgpu_ctx_mgr *mgr)
 	idr_destroy(&mgr->ctx_handles);
 	mutex_destroy(&mgr->lock);
 }
+
+void amdgpu_ctx_init_sched(struct amdgpu_device *adev)
+{
+	int i, j;
+
+	for (i = 0; i < adev->gfx.num_gfx_rings; i++) {
+		adev->gfx.gfx_sched[i] = &adev->gfx.gfx_ring[i].sched;
+		adev->gfx.num_gfx_sched++;
+	}
+
+	for (i = 0; i < adev->gfx.num_compute_rings; i++) {
+		adev->gfx.compute_sched[i] = &adev->gfx.compute_ring[i].sched;
+		adev->gfx.num_compute_sched++;
+	}
+
+	for (i = 0; i < adev->sdma.num_instances; i++) {
+		adev->sdma.sdma_sched[i] = &adev->sdma.instance[i].ring.sched;
+		adev->sdma.num_sdma_sched++;
+	}
+
+	for (i = 0; i < adev->vcn.num_vcn_inst; ++i) {
+		if (adev->vcn.harvest_config & (1 << i))
+			continue;
+		adev->vcn.vcn_dec_sched[adev->vcn.num_vcn_dec_sched++] =
+			&adev->vcn.inst[i].ring_dec.sched;
+	}
+
+	for (i = 0; i < adev->vcn.num_vcn_inst; ++i) {
+		if (adev->vcn.harvest_config & (1 << i))
+			continue;
+		for (j = 0; j < adev->vcn.num_enc_rings; ++j)
+			adev->vcn.vcn_enc_sched[adev->vcn.num_vcn_enc_sched++] =
+				&adev->vcn.inst[i].ring_enc[j].sched;
+	}
+
+	for (i = 0; i < adev->jpeg.num_jpeg_inst; ++i) {
+		if (adev->jpeg.harvest_config & (1 << i))
+			continue;
+		adev->jpeg.jpeg_sched[adev->jpeg.num_jpeg_sched++] =
+			&adev->jpeg.inst[i].ring_dec.sched;
+	}
+}
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.h
index da808633732b..4ad90a44dc3c 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.h
@@ -87,4 +87,7 @@ void amdgpu_ctx_mgr_entity_fini(struct amdgpu_ctx_mgr *mgr);
 long amdgpu_ctx_mgr_entity_flush(struct amdgpu_ctx_mgr *mgr, long timeout);
 void amdgpu_ctx_mgr_fini(struct amdgpu_ctx_mgr *mgr);
 
+void amdgpu_ctx_init_sched(struct amdgpu_device *adev);
+
+
 #endif
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
index db91663df4f6..e1f8b715301a 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
@@ -3036,6 +3036,8 @@ fence_driver_init:
 			adev->gfx.config.max_cu_per_sh,
 			adev->gfx.cu_info.number);
 
+	amdgpu_ctx_init_sched(adev);
+
 	adev->accel_working = true;
 
 	amdgpu_vm_check_compute_bug(adev);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h
index 0ae0a2715b0d..8e88e0411662 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h
@@ -269,8 +269,12 @@ struct amdgpu_gfx {
 	bool				me_fw_write_wait;
 	bool				cp_fw_write_wait;
 	struct amdgpu_ring		gfx_ring[AMDGPU_MAX_GFX_RINGS];
+	struct drm_gpu_scheduler	*gfx_sched[AMDGPU_MAX_GFX_RINGS];
+	uint32_t			num_gfx_sched;
 	unsigned			num_gfx_rings;
 	struct amdgpu_ring		compute_ring[AMDGPU_MAX_COMPUTE_RINGS];
+	struct drm_gpu_scheduler	*compute_sched[AMDGPU_MAX_COMPUTE_RINGS];
+	uint32_t			num_compute_sched;
 	unsigned			num_compute_rings;
 	struct amdgpu_irq_src		eop_irq;
 	struct amdgpu_irq_src		priv_reg_irq;
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_jpeg.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_jpeg.h
index 5131a0a1bc8a..bd9ef9cc86de 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_jpeg.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_jpeg.h
@@ -43,6 +43,8 @@ struct amdgpu_jpeg {
 	uint8_t	num_jpeg_inst;
 	struct amdgpu_jpeg_inst inst[AMDGPU_MAX_JPEG_INSTANCES];
 	struct amdgpu_jpeg_reg internal;
+	struct drm_gpu_scheduler *jpeg_sched[AMDGPU_MAX_JPEG_INSTANCES];
+	uint32_t num_jpeg_sched;
 	unsigned harvest_config;
 	struct delayed_work idle_work;
 	enum amd_powergating_state cur_state;
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_sdma.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_sdma.h
index 761ff8be6314..346dcb1f7146 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_sdma.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_sdma.h
@@ -52,6 +52,8 @@ struct amdgpu_sdma_instance {
 
 struct amdgpu_sdma {
 	struct amdgpu_sdma_instance instance[AMDGPU_MAX_SDMA_INSTANCES];
+	struct drm_gpu_scheduler    *sdma_sched[AMDGPU_MAX_SDMA_INSTANCES];
+	uint32_t		    num_sdma_sched;
 	struct amdgpu_irq_src	trap_irq;
 	struct amdgpu_irq_src	illegal_inst_irq;
 	struct amdgpu_irq_src	ecc_irq;
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.h
index 3484ead62046..e6dee8224d33 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.h
@@ -31,6 +31,7 @@
 #define AMDGPU_VCN_MAX_ENC_RINGS	3
 
 #define AMDGPU_MAX_VCN_INSTANCES	2
+#define AMDGPU_MAX_VCN_ENC_RINGS  AMDGPU_VCN_MAX_ENC_RINGS * AMDGPU_MAX_VCN_INSTANCES
 
 #define AMDGPU_VCN_HARVEST_VCN0 (1 << 0)
 #define AMDGPU_VCN_HARVEST_VCN1 (1 << 1)
@@ -189,8 +190,12 @@ struct amdgpu_vcn {
 	uint32_t		*dpg_sram_curr_addr;
 
 	uint8_t	num_vcn_inst;
-	struct amdgpu_vcn_inst	inst[AMDGPU_MAX_VCN_INSTANCES];
-	struct amdgpu_vcn_reg	internal;
+	struct amdgpu_vcn_inst	 inst[AMDGPU_MAX_VCN_INSTANCES];
+	struct amdgpu_vcn_reg	 internal;
+	struct drm_gpu_scheduler *vcn_enc_sched[AMDGPU_MAX_VCN_ENC_RINGS];
+	struct drm_gpu_scheduler *vcn_dec_sched[AMDGPU_MAX_VCN_INSTANCES];
+	uint32_t		 num_vcn_enc_sched;
+	uint32_t		 num_vcn_dec_sched;
 
 	unsigned	harvest_config;
 	int (*pause_dpg_mode)(struct amdgpu_device *adev,
-- 
cgit 


From 0a96afc7c56cbac075426a6a5f82a88cadb648f7 Mon Sep 17 00:00:00 2001
From: Le Ma <le.ma@amd.com>
Date: Thu, 19 Dec 2019 19:26:02 +0800
Subject: drm/amdgpu: fix ctx init failure for asics without gfx ring

This workaround does not affect other asics because amdgpu only need expose
one gfx sched to user for now.

Signed-off-by: Le Ma <le.ma@amd.com>
Reviewed-by: Nirmoy Das <nirmoy.das@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

(limited to 'drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c')

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c
index 63f6365312d5..64e2babbc36e 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c
@@ -127,7 +127,8 @@ static int amdgpu_ctx_init(struct amdgpu_device *adev,
 
 		switch (i) {
 		case AMDGPU_HW_IP_GFX:
-			scheds = adev->gfx.gfx_sched;
+			sched = &adev->gfx.gfx_ring[0].sched;
+			scheds = &sched;
 			num_scheds = 1;
 			break;
 		case AMDGPU_HW_IP_COMPUTE:
-- 
cgit 


From 63e3ab9a8226c0271e823f32fc1c3de67d142c78 Mon Sep 17 00:00:00 2001
From: Nirmoy Das <nirmoy.das@amd.com>
Date: Tue, 21 Jan 2020 13:29:20 +0100
Subject: drm/amdgpu: individualize fence allocation per entity
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Allocate fences for each entity and remove ctx->fences reference as
fences should be bound to amdgpu_ctx_entity instead amdgpu_ctx.

Signed-off-by: Nirmoy Das <nirmoy.das@amd.com>
Reviewed-by: Christian König <christian.koenig@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c | 44 ++++++++++++++++++++-------------
 drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.h |  1 -
 2 files changed, 27 insertions(+), 18 deletions(-)

(limited to 'drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c')

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c
index 64e2babbc36e..05c2af61e7de 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c
@@ -87,24 +87,24 @@ static int amdgpu_ctx_init(struct amdgpu_device *adev,
 	memset(ctx, 0, sizeof(*ctx));
 	ctx->adev = adev;
 
-	ctx->fences = kcalloc(amdgpu_sched_jobs * num_entities,
-			      sizeof(struct dma_fence*), GFP_KERNEL);
-	if (!ctx->fences)
-		return -ENOMEM;
 
 	ctx->entities[0] = kcalloc(num_entities,
 				   sizeof(struct amdgpu_ctx_entity),
 				   GFP_KERNEL);
-	if (!ctx->entities[0]) {
-		r = -ENOMEM;
-		goto error_free_fences;
-	}
+	if (!ctx->entities[0])
+		return -ENOMEM;
+
 
 	for (i = 0; i < num_entities; ++i) {
 		struct amdgpu_ctx_entity *entity = &ctx->entities[0][i];
 
 		entity->sequence = 1;
-		entity->fences = &ctx->fences[amdgpu_sched_jobs * i];
+		entity->fences = kcalloc(amdgpu_sched_jobs,
+					 sizeof(struct dma_fence*), GFP_KERNEL);
+		if (!entity->fences) {
+			r = -ENOMEM;
+			goto error_cleanup_memory;
+		}
 	}
 	for (i = 1; i < AMDGPU_HW_IP_NUM; ++i)
 		ctx->entities[i] = ctx->entities[i - 1] +
@@ -181,11 +181,17 @@ static int amdgpu_ctx_init(struct amdgpu_device *adev,
 error_cleanup_entities:
 	for (i = 0; i < num_entities; ++i)
 		drm_sched_entity_destroy(&ctx->entities[0][i].entity);
-	kfree(ctx->entities[0]);
 
-error_free_fences:
-	kfree(ctx->fences);
-	ctx->fences = NULL;
+error_cleanup_memory:
+	for (i = 0; i < num_entities; ++i) {
+		struct amdgpu_ctx_entity *entity = &ctx->entities[0][i];
+
+		kfree(entity->fences);
+		entity->fences = NULL;
+	}
+
+	kfree(ctx->entities[0]);
+	ctx->entities[0] = NULL;
 	return r;
 }
 
@@ -199,12 +205,16 @@ static void amdgpu_ctx_fini(struct kref *ref)
 	if (!adev)
 		return;
 
-	for (i = 0; i < num_entities; ++i)
+	for (i = 0; i < num_entities; ++i) {
+		struct amdgpu_ctx_entity *entity = &ctx->entities[0][i];
+
 		for (j = 0; j < amdgpu_sched_jobs; ++j)
-			dma_fence_put(ctx->entities[0][i].fences[j]);
-	kfree(ctx->fences);
-	kfree(ctx->entities[0]);
+			dma_fence_put(entity->fences[j]);
 
+		kfree(entity->fences);
+	}
+
+	kfree(ctx->entities[0]);
 	mutex_destroy(&ctx->lock);
 
 	kfree(ctx);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.h
index 4ad90a44dc3c..a6cd9d4b078c 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.h
@@ -42,7 +42,6 @@ struct amdgpu_ctx {
 	unsigned			reset_counter_query;
 	uint32_t			vram_lost_counter;
 	spinlock_t			ring_lock;
-	struct dma_fence		**fences;
 	struct amdgpu_ctx_entity	*entities[AMDGPU_HW_IP_NUM];
 	bool				preamble_presented;
 	enum drm_sched_priority		init_priority;
-- 
cgit 


From 977f7e1068be60f3cac7d80ab18692341f3b7fc5 Mon Sep 17 00:00:00 2001
From: Nirmoy Das <nirmoy.das@amd.com>
Date: Tue, 21 Jan 2020 15:53:53 +0100
Subject: drm/amdgpu: allocate entities on demand
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Currently we pre-allocate entities and fences for all the HW IPs on
context creation and some of which are might never be used.

This patch tries to resolve entity/fences wastage by creating entity
only when needed.

v2: allocate memory for entity and fences together

Signed-off-by: Nirmoy Das <nirmoy.das@amd.com>
Reviewed-by: Christian König <christian.koenig@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c | 235 ++++++++++++++++----------------
 drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.h |   6 +-
 2 files changed, 124 insertions(+), 117 deletions(-)

(limited to 'drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c')

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c
index 05c2af61e7de..94a6c42f29ea 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c
@@ -42,19 +42,12 @@ const unsigned int amdgpu_ctx_num_entities[AMDGPU_HW_IP_NUM] = {
 	[AMDGPU_HW_IP_VCN_JPEG]	=	1,
 };
 
-static int amdgpu_ctx_total_num_entities(void)
-{
-	unsigned i, num_entities = 0;
-
-	for (i = 0; i < AMDGPU_HW_IP_NUM; ++i)
-		num_entities += amdgpu_ctx_num_entities[i];
-
-	return num_entities;
-}
-
 static int amdgpu_ctx_priority_permit(struct drm_file *filp,
 				      enum drm_sched_priority priority)
 {
+	if (priority < 0 || priority >= DRM_SCHED_PRIORITY_MAX)
+		return -EINVAL;
+
 	/* NORMAL and below are accessible by everyone */
 	if (priority <= DRM_SCHED_PRIORITY_NORMAL)
 		return 0;
@@ -68,64 +61,24 @@ static int amdgpu_ctx_priority_permit(struct drm_file *filp,
 	return -EACCES;
 }
 
-static int amdgpu_ctx_init(struct amdgpu_device *adev,
-			   enum drm_sched_priority priority,
-			   struct drm_file *filp,
-			   struct amdgpu_ctx *ctx)
+static int amdgpu_ctx_init_entity(struct amdgpu_ctx *ctx, const u32 hw_ip, const u32 ring)
 {
-	unsigned num_entities = amdgpu_ctx_total_num_entities();
-	unsigned i, j;
+	struct amdgpu_device *adev = ctx->adev;
+	struct amdgpu_ctx_entity *entity;
+	struct drm_gpu_scheduler **scheds = NULL, *sched = NULL;
+	unsigned num_scheds = 0;
+	enum drm_sched_priority priority;
 	int r;
 
-	if (priority < 0 || priority >= DRM_SCHED_PRIORITY_MAX)
-		return -EINVAL;
-
-	r = amdgpu_ctx_priority_permit(filp, priority);
-	if (r)
-		return r;
-
-	memset(ctx, 0, sizeof(*ctx));
-	ctx->adev = adev;
-
-
-	ctx->entities[0] = kcalloc(num_entities,
-				   sizeof(struct amdgpu_ctx_entity),
-				   GFP_KERNEL);
-	if (!ctx->entities[0])
-		return -ENOMEM;
-
+	entity = kcalloc(1, offsetof(typeof(*entity), fences[amdgpu_sched_jobs]),
+			 GFP_KERNEL);
+	if (!entity)
+		return  -ENOMEM;
 
-	for (i = 0; i < num_entities; ++i) {
-		struct amdgpu_ctx_entity *entity = &ctx->entities[0][i];
-
-		entity->sequence = 1;
-		entity->fences = kcalloc(amdgpu_sched_jobs,
-					 sizeof(struct dma_fence*), GFP_KERNEL);
-		if (!entity->fences) {
-			r = -ENOMEM;
-			goto error_cleanup_memory;
-		}
-	}
-	for (i = 1; i < AMDGPU_HW_IP_NUM; ++i)
-		ctx->entities[i] = ctx->entities[i - 1] +
-			amdgpu_ctx_num_entities[i - 1];
-
-	kref_init(&ctx->refcount);
-	spin_lock_init(&ctx->ring_lock);
-	mutex_init(&ctx->lock);
-
-	ctx->reset_counter = atomic_read(&adev->gpu_reset_counter);
-	ctx->reset_counter_query = ctx->reset_counter;
-	ctx->vram_lost_counter = atomic_read(&adev->vram_lost_counter);
-	ctx->init_priority = priority;
-	ctx->override_priority = DRM_SCHED_PRIORITY_UNSET;
-
-	for (i = 0; i < AMDGPU_HW_IP_NUM; ++i) {
-		struct drm_gpu_scheduler **scheds;
-		struct drm_gpu_scheduler *sched;
-		unsigned num_scheds = 0;
-
-		switch (i) {
+	entity->sequence = 1;
+	priority = (ctx->override_priority == DRM_SCHED_PRIORITY_UNSET) ?
+				ctx->init_priority : ctx->override_priority;
+	switch (hw_ip) {
 		case AMDGPU_HW_IP_GFX:
 			sched = &adev->gfx.gfx_ring[0].sched;
 			scheds = &sched;
@@ -166,63 +119,90 @@ static int amdgpu_ctx_init(struct amdgpu_device *adev,
 			scheds = adev->jpeg.jpeg_sched;
 			num_scheds =  adev->jpeg.num_jpeg_sched;
 			break;
-		}
-
-		for (j = 0; j < amdgpu_ctx_num_entities[i]; ++j)
-			r = drm_sched_entity_init(&ctx->entities[i][j].entity,
-						  priority, scheds,
-						  num_scheds, &ctx->guilty);
-		if (r)
-			goto error_cleanup_entities;
 	}
 
+	r = drm_sched_entity_init(&entity->entity, priority, scheds, num_scheds,
+				  &ctx->guilty);
+	if (r)
+		goto error_free_entity;
+
+	ctx->entities[hw_ip][ring] = entity;
 	return 0;
 
-error_cleanup_entities:
-	for (i = 0; i < num_entities; ++i)
-		drm_sched_entity_destroy(&ctx->entities[0][i].entity);
+error_free_entity:
+	kfree(entity);
 
-error_cleanup_memory:
-	for (i = 0; i < num_entities; ++i) {
-		struct amdgpu_ctx_entity *entity = &ctx->entities[0][i];
+	return r;
+}
 
-		kfree(entity->fences);
-		entity->fences = NULL;
-	}
+static int amdgpu_ctx_init(struct amdgpu_device *adev,
+			   enum drm_sched_priority priority,
+			   struct drm_file *filp,
+			   struct amdgpu_ctx *ctx)
+{
+	int r;
 
-	kfree(ctx->entities[0]);
-	ctx->entities[0] = NULL;
-	return r;
+	r = amdgpu_ctx_priority_permit(filp, priority);
+	if (r)
+		return r;
+
+	memset(ctx, 0, sizeof(*ctx));
+
+	ctx->adev = adev;
+
+	kref_init(&ctx->refcount);
+	spin_lock_init(&ctx->ring_lock);
+	mutex_init(&ctx->lock);
+
+	ctx->reset_counter = atomic_read(&adev->gpu_reset_counter);
+	ctx->reset_counter_query = ctx->reset_counter;
+	ctx->vram_lost_counter = atomic_read(&adev->vram_lost_counter);
+	ctx->init_priority = priority;
+	ctx->override_priority = DRM_SCHED_PRIORITY_UNSET;
+
+	return 0;
+
+}
+
+static void amdgpu_ctx_fini_entity(struct amdgpu_ctx_entity *entity)
+{
+
+	int i;
+
+	if (!entity)
+		return;
+
+	for (i = 0; i < amdgpu_sched_jobs; ++i)
+		dma_fence_put(entity->fences[i]);
+
+	kfree(entity);
 }
 
 static void amdgpu_ctx_fini(struct kref *ref)
 {
 	struct amdgpu_ctx *ctx = container_of(ref, struct amdgpu_ctx, refcount);
-	unsigned num_entities = amdgpu_ctx_total_num_entities();
 	struct amdgpu_device *adev = ctx->adev;
 	unsigned i, j;
 
 	if (!adev)
 		return;
 
-	for (i = 0; i < num_entities; ++i) {
-		struct amdgpu_ctx_entity *entity = &ctx->entities[0][i];
-
-		for (j = 0; j < amdgpu_sched_jobs; ++j)
-			dma_fence_put(entity->fences[j]);
-
-		kfree(entity->fences);
+	for (i = 0; i < AMDGPU_HW_IP_NUM; ++i) {
+		for (j = 0; j < AMDGPU_MAX_ENTITY_NUM; ++j) {
+			amdgpu_ctx_fini_entity(ctx->entities[i][j]);
+			ctx->entities[i][j] = NULL;
+		}
 	}
 
-	kfree(ctx->entities[0]);
 	mutex_destroy(&ctx->lock);
-
 	kfree(ctx);
 }
 
 int amdgpu_ctx_get_entity(struct amdgpu_ctx *ctx, u32 hw_ip, u32 instance,
 			  u32 ring, struct drm_sched_entity **entity)
 {
+	int r;
+
 	if (hw_ip >= AMDGPU_HW_IP_NUM) {
 		DRM_ERROR("unknown HW IP type: %d\n", hw_ip);
 		return -EINVAL;
@@ -239,7 +219,13 @@ int amdgpu_ctx_get_entity(struct amdgpu_ctx *ctx, u32 hw_ip, u32 instance,
 		return -EINVAL;
 	}
 
-	*entity = &ctx->entities[hw_ip][ring].entity;
+	if (ctx->entities[hw_ip][ring] == NULL) {
+		r = amdgpu_ctx_init_entity(ctx, hw_ip, ring);
+		if (r)
+			return r;
+	}
+
+	*entity = &ctx->entities[hw_ip][ring]->entity;
 	return 0;
 }
 
@@ -279,14 +265,17 @@ static int amdgpu_ctx_alloc(struct amdgpu_device *adev,
 static void amdgpu_ctx_do_release(struct kref *ref)
 {
 	struct amdgpu_ctx *ctx;
-	unsigned num_entities;
-	u32 i;
+	u32 i, j;
 
 	ctx = container_of(ref, struct amdgpu_ctx, refcount);
+	for (i = 0; i < AMDGPU_HW_IP_NUM; ++i) {
+		for (j = 0; j < amdgpu_ctx_num_entities[i]; ++j) {
+			if (!ctx->entities[i][j])
+				continue;
 
-	num_entities = amdgpu_ctx_total_num_entities();
-	for (i = 0; i < num_entities; i++)
-		drm_sched_entity_destroy(&ctx->entities[0][i].entity);
+			drm_sched_entity_destroy(&ctx->entities[i][j]->entity);
+		}
+	}
 
 	amdgpu_ctx_fini(ref);
 }
@@ -516,19 +505,23 @@ struct dma_fence *amdgpu_ctx_get_fence(struct amdgpu_ctx *ctx,
 void amdgpu_ctx_priority_override(struct amdgpu_ctx *ctx,
 				  enum drm_sched_priority priority)
 {
-	unsigned num_entities = amdgpu_ctx_total_num_entities();
 	enum drm_sched_priority ctx_prio;
-	unsigned i;
+	unsigned i, j;
 
 	ctx->override_priority = priority;
 
 	ctx_prio = (ctx->override_priority == DRM_SCHED_PRIORITY_UNSET) ?
 			ctx->init_priority : ctx->override_priority;
+	for (i = 0; i < AMDGPU_HW_IP_NUM; ++i) {
+		for (j = 0; j < amdgpu_ctx_num_entities[i]; ++j) {
+			struct drm_sched_entity *entity;
 
-	for (i = 0; i < num_entities; i++) {
-		struct drm_sched_entity *entity = &ctx->entities[0][i].entity;
+			if (!ctx->entities[i][j])
+				continue;
 
-		drm_sched_entity_set_priority(entity, ctx_prio);
+			entity = &ctx->entities[i][j]->entity;
+			drm_sched_entity_set_priority(entity, ctx_prio);
+		}
 	}
 }
 
@@ -564,20 +557,24 @@ void amdgpu_ctx_mgr_init(struct amdgpu_ctx_mgr *mgr)
 
 long amdgpu_ctx_mgr_entity_flush(struct amdgpu_ctx_mgr *mgr, long timeout)
 {
-	unsigned num_entities = amdgpu_ctx_total_num_entities();
 	struct amdgpu_ctx *ctx;
 	struct idr *idp;
-	uint32_t id, i;
+	uint32_t id, i, j;
 
 	idp = &mgr->ctx_handles;
 
 	mutex_lock(&mgr->lock);
 	idr_for_each_entry(idp, ctx, id) {
-		for (i = 0; i < num_entities; i++) {
-			struct drm_sched_entity *entity;
+		for (i = 0; i < AMDGPU_HW_IP_NUM; ++i) {
+			for (j = 0; j < amdgpu_ctx_num_entities[i]; ++j) {
+				struct drm_sched_entity *entity;
+
+				if (!ctx->entities[i][j])
+					continue;
 
-			entity = &ctx->entities[0][i].entity;
-			timeout = drm_sched_entity_flush(entity, timeout);
+				entity = &ctx->entities[i][j]->entity;
+				timeout = drm_sched_entity_flush(entity, timeout);
+			}
 		}
 	}
 	mutex_unlock(&mgr->lock);
@@ -586,10 +583,9 @@ long amdgpu_ctx_mgr_entity_flush(struct amdgpu_ctx_mgr *mgr, long timeout)
 
 void amdgpu_ctx_mgr_entity_fini(struct amdgpu_ctx_mgr *mgr)
 {
-	unsigned num_entities = amdgpu_ctx_total_num_entities();
 	struct amdgpu_ctx *ctx;
 	struct idr *idp;
-	uint32_t id, i;
+	uint32_t id, i, j;
 
 	idp = &mgr->ctx_handles;
 
@@ -599,8 +595,17 @@ void amdgpu_ctx_mgr_entity_fini(struct amdgpu_ctx_mgr *mgr)
 			continue;
 		}
 
-		for (i = 0; i < num_entities; i++)
-			drm_sched_entity_fini(&ctx->entities[0][i].entity);
+		for (i = 0; i < AMDGPU_HW_IP_NUM; ++i) {
+			for (j = 0; j < amdgpu_ctx_num_entities[i]; ++j) {
+				struct drm_sched_entity *entity;
+
+				if (!ctx->entities[i][j])
+					continue;
+
+				entity = &ctx->entities[i][j]->entity;
+				drm_sched_entity_fini(entity);
+			}
+		}
 	}
 }
 
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.h
index a6cd9d4b078c..de490f183af2 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.h
@@ -29,10 +29,12 @@ struct drm_device;
 struct drm_file;
 struct amdgpu_fpriv;
 
+#define AMDGPU_MAX_ENTITY_NUM 4
+
 struct amdgpu_ctx_entity {
 	uint64_t		sequence;
-	struct dma_fence	**fences;
 	struct drm_sched_entity	entity;
+	struct dma_fence	*fences[];
 };
 
 struct amdgpu_ctx {
@@ -42,7 +44,7 @@ struct amdgpu_ctx {
 	unsigned			reset_counter_query;
 	uint32_t			vram_lost_counter;
 	spinlock_t			ring_lock;
-	struct amdgpu_ctx_entity	*entities[AMDGPU_HW_IP_NUM];
+	struct amdgpu_ctx_entity	*entities[AMDGPU_HW_IP_NUM][AMDGPU_MAX_ENTITY_NUM];
 	bool				preamble_presented;
 	enum drm_sched_priority		init_priority;
 	enum drm_sched_priority		override_priority;
-- 
cgit