Merge branch 'drm-next-4.20' of git://people.freedesktop.org/~agd5f/linux into drm-next

This is a new pull for drm-next on top of last weeks with the following changes: - Fixed 64 bit divide - Fixed vram type on vega20 - Misc vega20 fixes - Misc DC fixes - Fix GDS/GWS/OA domain handling Previous changes from last week: amdgpu/kfd: - Picasso (new APU) support - Raven2 (new APU) support - Vega20 enablement - ACP powergating improvements - Add ABGR/XBGR display support - VCN JPEG engine support - Initial xGMI support - Use load balancing for engine scheduling - Lots of new documentation - Rework and clean up i2c and aux handling in DC - Add DP YCbCr 4:2:0 support in DC - Add DMCU firmware loading for Raven (used for ABM and PSR) - New debugfs features in DC - LVDS support in DC - Implement wave kill for gfx/compute (light weight reset for shaders) - Use AGP aperture to avoid gart mappings when possible - GPUVM performance improvements - Bulk moves for more efficient GPUVM LRU handling - Merge amdgpu and amdkfd into one module - Enable gfxoff and stutter mode on Raven - Misc cleanups Scheduler: - Load balancing support - Bug fixes ttm: - Bulk move functionality - Bug fixes radeon: - Misc cleanups Signed-off-by: Dave Airlie <airlied@redhat.com> From: Alex Deucher <alexdeucher@gmail.com> Link: https://patchwork.freedesktop.org/patch/msgid/20180920150438.12693-1-alexander.deucher@amd.com
author: Dave Airlie <airlied@redhat.com> 2018-09-21 09:52:34 +1000
committer: Dave Airlie <airlied@redhat.com> 2018-09-21 09:52:53 +1000
commit: 36c9c3c91128e2b892c9be0dd9ee9bd82cbe82ad (patch)
tree: 687db2e37b7fdcb4bd756a078812d049da18c804 /drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c
parent: 0320ac5188eab5c6e8b92b110d1eae967ac272d2 (diff)
parent: 846311ae68f3c78365ebf3dff505c99e7da861cf (diff)
1 files changed, 176 insertions, 86 deletions
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c
index df6965761046..f9b54236102d 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c
@@ -27,6 +27,30 @@
 #include "amdgpu.h"
 #include "amdgpu_sched.h"
 
+#define to_amdgpu_ctx_entity(e)	\
+	container_of((e), struct amdgpu_ctx_entity, entity)
+
+const unsigned int amdgpu_ctx_num_entities[AMDGPU_HW_IP_NUM] = {
+	[AMDGPU_HW_IP_GFX]	=	1,
+	[AMDGPU_HW_IP_COMPUTE]	=	4,
+	[AMDGPU_HW_IP_DMA]	=	2,
+	[AMDGPU_HW_IP_UVD]	=	1,
+	[AMDGPU_HW_IP_VCE]	=	1,
+	[AMDGPU_HW_IP_UVD_ENC]	=	1,
+	[AMDGPU_HW_IP_VCN_DEC]	=	1,
+	[AMDGPU_HW_IP_VCN_ENC]	=	1,
+};
+
+static int amdgput_ctx_total_num_entities(void)
+{
+	unsigned i, num_entities = 0;
+
+	for (i = 0; i < AMDGPU_HW_IP_NUM; ++i)
+		num_entities += amdgpu_ctx_num_entities[i];
+
+	return num_entities;
+}
+
 static int amdgpu_ctx_priority_permit(struct drm_file *filp,
 				      enum drm_sched_priority priority)
 {
@@ -48,6 +72,7 @@ static int amdgpu_ctx_init(struct amdgpu_device *adev,
 			   struct drm_file *filp,
 			   struct amdgpu_ctx *ctx)
 {
+	unsigned num_entities = amdgput_ctx_total_num_entities();
 	unsigned i, j;
 	int r;
 
@@ -60,19 +85,33 @@ static int amdgpu_ctx_init(struct amdgpu_device *adev,
 
 	memset(ctx, 0, sizeof(*ctx));
 	ctx->adev = adev;
-	kref_init(&ctx->refcount);
-	spin_lock_init(&ctx->ring_lock);
-	ctx->fences = kcalloc(amdgpu_sched_jobs * AMDGPU_MAX_RINGS,
+
+	ctx->fences = kcalloc(amdgpu_sched_jobs * num_entities,
 			      sizeof(struct dma_fence*), GFP_KERNEL);
 	if (!ctx->fences)
 		return -ENOMEM;
 
-	mutex_init(&ctx->lock);
+	ctx->entities[0] = kcalloc(num_entities,
+				   sizeof(struct amdgpu_ctx_entity),
+				   GFP_KERNEL);
+	if (!ctx->entities[0]) {
+		r = -ENOMEM;
+		goto error_free_fences;
+	}
 
-	for (i = 0; i < AMDGPU_MAX_RINGS; ++i) {
-		ctx->rings[i].sequence = 1;
-		ctx->rings[i].fences = &ctx->fences[amdgpu_sched_jobs * i];
+	for (i = 0; i < num_entities; ++i) {
+		struct amdgpu_ctx_entity *entity = &ctx->entities[0][i];
+
+		entity->sequence = 1;
+		entity->fences = &ctx->fences[amdgpu_sched_jobs * i];
 	}
+	for (i = 1; i < AMDGPU_HW_IP_NUM; ++i)
+		ctx->entities[i] = ctx->entities[i - 1] +
+			amdgpu_ctx_num_entities[i - 1];
+
+	kref_init(&ctx->refcount);
+	spin_lock_init(&ctx->ring_lock);
+	mutex_init(&ctx->lock);
 
 	ctx->reset_counter = atomic_read(&adev->gpu_reset_counter);
 	ctx->reset_counter_query = ctx->reset_counter;
@@ -80,31 +119,70 @@ static int amdgpu_ctx_init(struct amdgpu_device *adev,
 	ctx->init_priority = priority;
 	ctx->override_priority = DRM_SCHED_PRIORITY_UNSET;
 
-	/* create context entity for each ring */
-	for (i = 0; i < adev->num_rings; i++) {
-		struct amdgpu_ring *ring = adev->rings[i];
-		struct drm_sched_rq *rq;
+	for (i = 0; i < AMDGPU_HW_IP_NUM; ++i) {
+		struct amdgpu_ring *rings[AMDGPU_MAX_RINGS];
+		struct drm_sched_rq *rqs[AMDGPU_MAX_RINGS];
+		unsigned num_rings;
+
+		switch (i) {
+		case AMDGPU_HW_IP_GFX:
+			rings[0] = &adev->gfx.gfx_ring[0];
+			num_rings = 1;
+			break;
+		case AMDGPU_HW_IP_COMPUTE:
+			for (j = 0; j < adev->gfx.num_compute_rings; ++j)
+				rings[j] = &adev->gfx.compute_ring[j];
+			num_rings = adev->gfx.num_compute_rings;
+			break;
+		case AMDGPU_HW_IP_DMA:
+			for (j = 0; j < adev->sdma.num_instances; ++j)
+				rings[j] = &adev->sdma.instance[j].ring;
+			num_rings = adev->sdma.num_instances;
+			break;
+		case AMDGPU_HW_IP_UVD:
+			rings[0] = &adev->uvd.inst[0].ring;
+			num_rings = 1;
+			break;
+		case AMDGPU_HW_IP_VCE:
+			rings[0] = &adev->vce.ring[0];
+			num_rings = 1;
+			break;
+		case AMDGPU_HW_IP_UVD_ENC:
+			rings[0] = &adev->uvd.inst[0].ring_enc[0];
+			num_rings = 1;
+			break;
+		case AMDGPU_HW_IP_VCN_DEC:
+			rings[0] = &adev->vcn.ring_dec;
+			num_rings = 1;
+			break;
+		case AMDGPU_HW_IP_VCN_ENC:
+			rings[0] = &adev->vcn.ring_enc[0];
+			num_rings = 1;
+			break;
+		case AMDGPU_HW_IP_VCN_JPEG:
+			rings[0] = &adev->vcn.ring_jpeg;
+			num_rings = 1;
+			break;
+		}
 
-		rq = &ring->sched.sched_rq[priority];
+		for (j = 0; j < num_rings; ++j)
+			rqs[j] = &rings[j]->sched.sched_rq[priority];
 
-		if (ring == &adev->gfx.kiq.ring)
-			continue;
-
-		r = drm_sched_entity_init(&ctx->rings[i].entity,
-					  &rq, 1, &ctx->guilty);
+		for (j = 0; j < amdgpu_ctx_num_entities[i]; ++j)
+			r = drm_sched_entity_init(&ctx->entities[i][j].entity,
+						  rqs, num_rings, &ctx->guilty);
 		if (r)
-			goto failed;
+			goto error_cleanup_entities;
 	}
 
-	r = amdgpu_queue_mgr_init(adev, &ctx->queue_mgr);
-	if (r)
-		goto failed;
-
 	return 0;
 
-failed:
-	for (j = 0; j < i; j++)
-		drm_sched_entity_destroy(&ctx->rings[j].entity);
+error_cleanup_entities:
+	for (i = 0; i < num_entities; ++i)
+		drm_sched_entity_destroy(&ctx->entities[0][i].entity);
+	kfree(ctx->entities[0]);
+
+error_free_fences:
 	kfree(ctx->fences);
 	ctx->fences = NULL;
 	return r;
@@ -113,25 +191,47 @@ failed:
 static void amdgpu_ctx_fini(struct kref *ref)
 {
 	struct amdgpu_ctx *ctx = container_of(ref, struct amdgpu_ctx, refcount);
+	unsigned num_entities = amdgput_ctx_total_num_entities();
 	struct amdgpu_device *adev = ctx->adev;
 	unsigned i, j;
 
 	if (!adev)
 		return;
 
-	for (i = 0; i < AMDGPU_MAX_RINGS; ++i)
+	for (i = 0; i < num_entities; ++i)
 		for (j = 0; j < amdgpu_sched_jobs; ++j)
-			dma_fence_put(ctx->rings[i].fences[j]);
+			dma_fence_put(ctx->entities[0][i].fences[j]);
 	kfree(ctx->fences);
-	ctx->fences = NULL;
-
-	amdgpu_queue_mgr_fini(adev, &ctx->queue_mgr);
+	kfree(ctx->entities[0]);
 
 	mutex_destroy(&ctx->lock);
 
 	kfree(ctx);
 }
 
+int amdgpu_ctx_get_entity(struct amdgpu_ctx *ctx, u32 hw_ip, u32 instance,
+			  u32 ring, struct drm_sched_entity **entity)
+{
+	if (hw_ip >= AMDGPU_HW_IP_NUM) {
+		DRM_ERROR("unknown HW IP type: %d\n", hw_ip);
+		return -EINVAL;
+	}
+
+	/* Right now all IPs have only one instance - multiple rings. */
+	if (instance != 0) {
+		DRM_DEBUG("invalid ip instance: %d\n", instance);
+		return -EINVAL;
+	}
+
+	if (ring >= amdgpu_ctx_num_entities[hw_ip]) {
+		DRM_DEBUG("invalid ring: %d %d\n", hw_ip, ring);
+		return -EINVAL;
+	}
+
+	*entity = &ctx->entities[hw_ip][ring].entity;
+	return 0;
+}
+
 static int amdgpu_ctx_alloc(struct amdgpu_device *adev,
 			    struct amdgpu_fpriv *fpriv,
 			    struct drm_file *filp,
@@ -168,17 +268,17 @@ static int amdgpu_ctx_alloc(struct amdgpu_device *adev,
 static void amdgpu_ctx_do_release(struct kref *ref)
 {
 	struct amdgpu_ctx *ctx;
+	unsigned num_entities;
 	u32 i;
 
 	ctx = container_of(ref, struct amdgpu_ctx, refcount);
 
-	for (i = 0; i < ctx->adev->num_rings; i++) {
+	num_entities = 0;
+	for (i = 0; i < AMDGPU_HW_IP_NUM; i++)
+		num_entities += amdgpu_ctx_num_entities[i];
 
-		if (ctx->adev->rings[i] == &ctx->adev->gfx.kiq.ring)
-			continue;
-
-		drm_sched_entity_destroy(&ctx->rings[i].entity);
-	}
+	for (i = 0; i < num_entities; i++)
+		drm_sched_entity_destroy(&ctx->entities[0][i].entity);
 
 	amdgpu_ctx_fini(ref);
 }
@@ -334,56 +434,56 @@ int amdgpu_ctx_put(struct amdgpu_ctx *ctx)
 	return 0;
 }
 
-int amdgpu_ctx_add_fence(struct amdgpu_ctx *ctx, struct amdgpu_ring *ring,
-			      struct dma_fence *fence, uint64_t* handler)
+void amdgpu_ctx_add_fence(struct amdgpu_ctx *ctx,
+			  struct drm_sched_entity *entity,
+			  struct dma_fence *fence, uint64_t* handle)
 {
-	struct amdgpu_ctx_ring *cring = & ctx->rings[ring->idx];
-	uint64_t seq = cring->sequence;
-	unsigned idx = 0;
+	struct amdgpu_ctx_entity *centity = to_amdgpu_ctx_entity(entity);
+	uint64_t seq = centity->sequence;
 	struct dma_fence *other = NULL;
+	unsigned idx = 0;
 
 	idx = seq & (amdgpu_sched_jobs - 1);
-	other = cring->fences[idx];
+	other = centity->fences[idx];
 	if (other)
 		BUG_ON(!dma_fence_is_signaled(other));
 
 	dma_fence_get(fence);
 
 	spin_lock(&ctx->ring_lock);
-	cring->fences[idx] = fence;
-	cring->sequence++;
+	centity->fences[idx] = fence;
+	centity->sequence++;
 	spin_unlock(&ctx->ring_lock);
 
 	dma_fence_put(other);
-	if (handler)
-		*handler = seq;
-
-	return 0;
+	if (handle)
+		*handle = seq;
 }
 
 struct dma_fence *amdgpu_ctx_get_fence(struct amdgpu_ctx *ctx,
-				       struct amdgpu_ring *ring, uint64_t seq)
+				       struct drm_sched_entity *entity,
+				       uint64_t seq)
 {
-	struct amdgpu_ctx_ring *cring = & ctx->rings[ring->idx];
+	struct amdgpu_ctx_entity *centity = to_amdgpu_ctx_entity(entity);
 	struct dma_fence *fence;
 
 	spin_lock(&ctx->ring_lock);
 
 	if (seq == ~0ull)
-		seq = ctx->rings[ring->idx].sequence - 1;
+		seq = centity->sequence - 1;
 
-	if (seq >= cring->sequence) {
+	if (seq >= centity->sequence) {
 		spin_unlock(&ctx->ring_lock);
 		return ERR_PTR(-EINVAL);
 	}
 
 
-	if (seq + amdgpu_sched_jobs < cring->sequence) {
+	if (seq + amdgpu_sched_jobs < centity->sequence) {
 		spin_unlock(&ctx->ring_lock);
 		return NULL;
 	}
 
-	fence = dma_fence_get(cring->fences[seq & (amdgpu_sched_jobs - 1)]);
+	fence = dma_fence_get(centity->fences[seq & (amdgpu_sched_jobs - 1)]);
 	spin_unlock(&ctx->ring_lock);
 
 	return fence;
@@ -392,35 +492,28 @@ struct dma_fence *amdgpu_ctx_get_fence(struct amdgpu_ctx *ctx,
 void amdgpu_ctx_priority_override(struct amdgpu_ctx *ctx,
 				  enum drm_sched_priority priority)
 {
-	int i;
-	struct amdgpu_device *adev = ctx->adev;
-	struct drm_sched_rq *rq;
-	struct drm_sched_entity *entity;
-	struct amdgpu_ring *ring;
+	unsigned num_entities = amdgput_ctx_total_num_entities();
 	enum drm_sched_priority ctx_prio;
+	unsigned i;
 
 	ctx->override_priority = priority;
 
 	ctx_prio = (ctx->override_priority == DRM_SCHED_PRIORITY_UNSET) ?
 			ctx->init_priority : ctx->override_priority;
 
-	for (i = 0; i < adev->num_rings; i++) {
-		ring = adev->rings[i];
-		entity = &ctx->rings[i].entity;
-		rq = &ring->sched.sched_rq[ctx_prio];
-
-		if (ring->funcs->type == AMDGPU_RING_TYPE_KIQ)
-			continue;
+	for (i = 0; i < num_entities; i++) {
+		struct drm_sched_entity *entity = &ctx->entities[0][i].entity;
 
-		drm_sched_entity_set_rq(entity, rq);
+		drm_sched_entity_set_priority(entity, ctx_prio);
 	}
 }
 
-int amdgpu_ctx_wait_prev_fence(struct amdgpu_ctx *ctx, unsigned ring_id)
+int amdgpu_ctx_wait_prev_fence(struct amdgpu_ctx *ctx,
+			       struct drm_sched_entity *entity)
 {
-	struct amdgpu_ctx_ring *cring = &ctx->rings[ring_id];
-	unsigned idx = cring->sequence & (amdgpu_sched_jobs - 1);
-	struct dma_fence *other = cring->fences[idx];
+	struct amdgpu_ctx_entity *centity = to_amdgpu_ctx_entity(entity);
+	unsigned idx = centity->sequence & (amdgpu_sched_jobs - 1);
+	struct dma_fence *other = centity->fences[idx];
 
 	if (other) {
 		signed long r;
@@ -444,6 +537,7 @@ void amdgpu_ctx_mgr_init(struct amdgpu_ctx_mgr *mgr)
 
 void amdgpu_ctx_mgr_entity_flush(struct amdgpu_ctx_mgr *mgr)
 {
+	unsigned num_entities = amdgput_ctx_total_num_entities();
 	struct amdgpu_ctx *ctx;
 	struct idr *idp;
 	uint32_t id, i;
@@ -459,13 +553,11 @@ void amdgpu_ctx_mgr_entity_flush(struct amdgpu_ctx_mgr *mgr)
 			return;
 		}
 
-		for (i = 0; i < ctx->adev->num_rings; i++) {
+		for (i = 0; i < num_entities; i++) {
+			struct drm_sched_entity *entity;
 
-			if (ctx->adev->rings[i] == &ctx->adev->gfx.kiq.ring)
-				continue;
-
-			max_wait = drm_sched_entity_flush(&ctx->rings[i].entity,
-							  max_wait);
+			entity = &ctx->entities[0][i].entity;
+			max_wait = drm_sched_entity_flush(entity, max_wait);
 		}
 	}
 	mutex_unlock(&mgr->lock);
@@ -473,6 +565,7 @@ void amdgpu_ctx_mgr_entity_flush(struct amdgpu_ctx_mgr *mgr)
 
 void amdgpu_ctx_mgr_entity_fini(struct amdgpu_ctx_mgr *mgr)
 {
+	unsigned num_entities = amdgput_ctx_total_num_entities();
 	struct amdgpu_ctx *ctx;
 	struct idr *idp;
 	uint32_t id, i;
@@ -484,16 +577,13 @@ void amdgpu_ctx_mgr_entity_fini(struct amdgpu_ctx_mgr *mgr)
 		if (!ctx->adev)
 			return;
 
-		for (i = 0; i < ctx->adev->num_rings; i++) {
-
-			if (ctx->adev->rings[i] == &ctx->adev->gfx.kiq.ring)
-				continue;
-
-			if (kref_read(&ctx->refcount) == 1)
-				drm_sched_entity_fini(&ctx->rings[i].entity);
-			else
-				DRM_ERROR("ctx %p is still alive\n", ctx);
+		if (kref_read(&ctx->refcount) != 1) {
+			DRM_ERROR("ctx %p is still alive\n", ctx);
+			continue;
 		}
+
+		for (i = 0; i < num_entities; i++)
+			drm_sched_entity_fini(&ctx->entities[0][i].entity);
 	}
 }
author	Dave Airlie <airlied@redhat.com>	2018-09-21 09:52:34 +1000
committer	Dave Airlie <airlied@redhat.com>	2018-09-21 09:52:53 +1000
commit	36c9c3c91128e2b892c9be0dd9ee9bd82cbe82ad (patch)
tree	687db2e37b7fdcb4bd756a078812d049da18c804 /drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c
parent	0320ac5188eab5c6e8b92b110d1eae967ac272d2 (diff)
parent	846311ae68f3c78365ebf3dff505c99e7da861cf (diff)