aboutsummaryrefslogtreecommitdiff
path: root/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c')
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c607
1 files changed, 338 insertions, 269 deletions
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
index e104d7ef3c3d..1bbd39b3b0fc 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
@@ -64,11 +64,51 @@ static int amdgpu_cs_parser_init(struct amdgpu_cs_parser *p,
return 0;
}
+static int amdgpu_cs_job_idx(struct amdgpu_cs_parser *p,
+ struct drm_amdgpu_cs_chunk_ib *chunk_ib)
+{
+ struct drm_sched_entity *entity;
+ unsigned int i;
+ int r;
+
+ r = amdgpu_ctx_get_entity(p->ctx, chunk_ib->ip_type,
+ chunk_ib->ip_instance,
+ chunk_ib->ring, &entity);
+ if (r)
+ return r;
+
+ /*
+ * Abort if there is no run queue associated with this entity.
+ * Possibly because of disabled HW IP.
+ */
+ if (entity->rq == NULL)
+ return -EINVAL;
+
+ /* Check if we can add this IB to some existing job */
+ for (i = 0; i < p->gang_size; ++i)
+ if (p->entities[i] == entity)
+ return i;
+
+ /* If not increase the gang size if possible */
+ if (i == AMDGPU_CS_GANG_SIZE)
+ return -EINVAL;
+
+ p->entities[i] = entity;
+ p->gang_size = i + 1;
+ return i;
+}
+
static int amdgpu_cs_p1_ib(struct amdgpu_cs_parser *p,
struct drm_amdgpu_cs_chunk_ib *chunk_ib,
unsigned int *num_ibs)
{
- ++(*num_ibs);
+ int r;
+
+ r = amdgpu_cs_job_idx(p, chunk_ib);
+ if (r < 0)
+ return r;
+
+ ++(num_ibs[r]);
return 0;
}
@@ -142,11 +182,12 @@ static int amdgpu_cs_pass1(struct amdgpu_cs_parser *p,
union drm_amdgpu_cs *cs)
{
struct amdgpu_fpriv *fpriv = p->filp->driver_priv;
+ unsigned int num_ibs[AMDGPU_CS_GANG_SIZE] = { };
struct amdgpu_vm *vm = &fpriv->vm;
uint64_t *chunk_array_user;
uint64_t *chunk_array;
- unsigned size, num_ibs = 0;
uint32_t uf_offset = 0;
+ unsigned int size;
int ret;
int i;
@@ -209,7 +250,7 @@ static int amdgpu_cs_pass1(struct amdgpu_cs_parser *p,
if (size < sizeof(struct drm_amdgpu_cs_chunk_ib))
goto free_partial_kdata;
- ret = amdgpu_cs_p1_ib(p, p->chunks[i].kdata, &num_ibs);
+ ret = amdgpu_cs_p1_ib(p, p->chunks[i].kdata, num_ibs);
if (ret)
goto free_partial_kdata;
break;
@@ -246,17 +287,28 @@ static int amdgpu_cs_pass1(struct amdgpu_cs_parser *p,
}
}
- ret = amdgpu_job_alloc(p->adev, num_ibs, &p->job, vm);
- if (ret)
- goto free_all_kdata;
+ if (!p->gang_size)
+ return -EINVAL;
+
+ for (i = 0; i < p->gang_size; ++i) {
+ ret = amdgpu_job_alloc(p->adev, num_ibs[i], &p->jobs[i], vm);
+ if (ret)
+ goto free_all_kdata;
+
+ ret = drm_sched_job_init(&p->jobs[i]->base, p->entities[i],
+ &fpriv->vm);
+ if (ret)
+ goto free_all_kdata;
+ }
+ p->gang_leader = p->jobs[p->gang_size - 1];
- if (p->ctx->vram_lost_counter != p->job->vram_lost_counter) {
+ if (p->ctx->vram_lost_counter != p->gang_leader->vram_lost_counter) {
ret = -ECANCELED;
goto free_all_kdata;
}
if (p->uf_entry.tv.bo)
- p->job->uf_addr = uf_offset;
+ p->gang_leader->uf_addr = uf_offset;
kvfree(chunk_array);
/* Use this opportunity to fill in task info for the vm */
@@ -278,93 +330,69 @@ free_chunk:
return ret;
}
-static int amdgpu_cs_ib_fill(struct amdgpu_device *adev,
- struct amdgpu_cs_parser *parser)
+static int amdgpu_cs_p2_ib(struct amdgpu_cs_parser *p,
+ struct amdgpu_cs_chunk *chunk,
+ unsigned int *ce_preempt,
+ unsigned int *de_preempt)
{
- struct amdgpu_fpriv *fpriv = parser->filp->driver_priv;
+ struct drm_amdgpu_cs_chunk_ib *chunk_ib = chunk->kdata;
+ struct amdgpu_fpriv *fpriv = p->filp->driver_priv;
struct amdgpu_vm *vm = &fpriv->vm;
- int r, ce_preempt = 0, de_preempt = 0;
struct amdgpu_ring *ring;
- int i, j;
-
- for (i = 0, j = 0; i < parser->nchunks && j < parser->job->num_ibs; i++) {
- struct amdgpu_cs_chunk *chunk;
- struct amdgpu_ib *ib;
- struct drm_amdgpu_cs_chunk_ib *chunk_ib;
- struct drm_sched_entity *entity;
-
- chunk = &parser->chunks[i];
- ib = &parser->job->ibs[j];
- chunk_ib = (struct drm_amdgpu_cs_chunk_ib *)chunk->kdata;
-
- if (chunk->chunk_id != AMDGPU_CHUNK_ID_IB)
- continue;
-
- if (chunk_ib->ip_type == AMDGPU_HW_IP_GFX &&
- chunk_ib->flags & AMDGPU_IB_FLAG_PREEMPT) {
- if (chunk_ib->flags & AMDGPU_IB_FLAG_CE)
- ce_preempt++;
- else
- de_preempt++;
+ struct amdgpu_job *job;
+ struct amdgpu_ib *ib;
+ int r;
- /* each GFX command submit allows 0 or 1 IB preemptible for CE & DE */
- if (ce_preempt > 1 || de_preempt > 1)
- return -EINVAL;
- }
+ r = amdgpu_cs_job_idx(p, chunk_ib);
+ if (r < 0)
+ return r;
- r = amdgpu_ctx_get_entity(parser->ctx, chunk_ib->ip_type,
- chunk_ib->ip_instance, chunk_ib->ring,
- &entity);
- if (r)
- return r;
+ job = p->jobs[r];
+ ring = amdgpu_job_ring(job);
+ ib = &job->ibs[job->num_ibs++];
- if (chunk_ib->flags & AMDGPU_IB_FLAG_PREAMBLE)
- parser->job->preamble_status |=
- AMDGPU_PREAMBLE_IB_PRESENT;
+ /* MM engine doesn't support user fences */
+ if (p->uf_entry.tv.bo && ring->funcs->no_user_fence)
+ return -EINVAL;
- if (parser->entity && parser->entity != entity)
- return -EINVAL;
+ if (chunk_ib->ip_type == AMDGPU_HW_IP_GFX &&
+ chunk_ib->flags & AMDGPU_IB_FLAG_PREEMPT) {
+ if (chunk_ib->flags & AMDGPU_IB_FLAG_CE)
+ (*ce_preempt)++;
+ else
+ (*de_preempt)++;
- /* Return if there is no run queue associated with this entity.
- * Possibly because of disabled HW IP*/
- if (entity->rq == NULL)
+ /* Each GFX command submit allows only 1 IB max
+ * preemptible for CE & DE */
+ if (*ce_preempt > 1 || *de_preempt > 1)
return -EINVAL;
+ }
- parser->entity = entity;
-
- ring = to_amdgpu_ring(entity->rq->sched);
- r = amdgpu_ib_get(adev, vm, ring->funcs->parse_cs ?
- chunk_ib->ib_bytes : 0,
- AMDGPU_IB_POOL_DELAYED, ib);
- if (r) {
- DRM_ERROR("Failed to get ib !\n");
- return r;
- }
-
- ib->gpu_addr = chunk_ib->va_start;
- ib->length_dw = chunk_ib->ib_bytes / 4;
- ib->flags = chunk_ib->flags;
+ if (chunk_ib->flags & AMDGPU_IB_FLAG_PREAMBLE)
+ job->preamble_status |= AMDGPU_PREAMBLE_IB_PRESENT;
- j++;
+ r = amdgpu_ib_get(p->adev, vm, ring->funcs->parse_cs ?
+ chunk_ib->ib_bytes : 0,
+ AMDGPU_IB_POOL_DELAYED, ib);
+ if (r) {
+ DRM_ERROR("Failed to get ib !\n");
+ return r;
}
- /* MM engine doesn't support user fences */
- ring = to_amdgpu_ring(parser->entity->rq->sched);
- if (parser->job->uf_addr && ring->funcs->no_user_fence)
- return -EINVAL;
-
+ ib->gpu_addr = chunk_ib->va_start;
+ ib->length_dw = chunk_ib->ib_bytes / 4;
+ ib->flags = chunk_ib->flags;
return 0;
}
-static int amdgpu_cs_process_fence_dep(struct amdgpu_cs_parser *p,
- struct amdgpu_cs_chunk *chunk)
+static int amdgpu_cs_p2_dependencies(struct amdgpu_cs_parser *p,
+ struct amdgpu_cs_chunk *chunk)
{
+ struct drm_amdgpu_cs_chunk_dep *deps = chunk->kdata;
struct amdgpu_fpriv *fpriv = p->filp->driver_priv;
unsigned num_deps;
int i, r;
- struct drm_amdgpu_cs_chunk_dep *deps;
- deps = (struct drm_amdgpu_cs_chunk_dep *)chunk->kdata;
num_deps = chunk->length_dw * 4 /
sizeof(struct drm_amdgpu_cs_chunk_dep);
@@ -402,7 +430,7 @@ static int amdgpu_cs_process_fence_dep(struct amdgpu_cs_parser *p,
dma_fence_put(old);
}
- r = amdgpu_sync_fence(&p->job->sync, fence);
+ r = amdgpu_sync_fence(&p->gang_leader->sync, fence);
dma_fence_put(fence);
if (r)
return r;
@@ -410,9 +438,9 @@ static int amdgpu_cs_process_fence_dep(struct amdgpu_cs_parser *p,
return 0;
}
-static int amdgpu_syncobj_lookup_and_add_to_sync(struct amdgpu_cs_parser *p,
- uint32_t handle, u64 point,
- u64 flags)
+static int amdgpu_syncobj_lookup_and_add(struct amdgpu_cs_parser *p,
+ uint32_t handle, u64 point,
+ u64 flags)
{
struct dma_fence *fence;
int r;
@@ -424,25 +452,23 @@ static int amdgpu_syncobj_lookup_and_add_to_sync(struct amdgpu_cs_parser *p,
return r;
}
- r = amdgpu_sync_fence(&p->job->sync, fence);
+ r = amdgpu_sync_fence(&p->gang_leader->sync, fence);
dma_fence_put(fence);
return r;
}
-static int amdgpu_cs_process_syncobj_in_dep(struct amdgpu_cs_parser *p,
- struct amdgpu_cs_chunk *chunk)
+static int amdgpu_cs_p2_syncobj_in(struct amdgpu_cs_parser *p,
+ struct amdgpu_cs_chunk *chunk)
{
- struct drm_amdgpu_cs_chunk_sem *deps;
+ struct drm_amdgpu_cs_chunk_sem *deps = chunk->kdata;
unsigned num_deps;
int i, r;
- deps = (struct drm_amdgpu_cs_chunk_sem *)chunk->kdata;
num_deps = chunk->length_dw * 4 /
sizeof(struct drm_amdgpu_cs_chunk_sem);
for (i = 0; i < num_deps; ++i) {
- r = amdgpu_syncobj_lookup_and_add_to_sync(p, deps[i].handle,
- 0, 0);
+ r = amdgpu_syncobj_lookup_and_add(p, deps[i].handle, 0, 0);
if (r)
return r;
}
@@ -450,21 +476,19 @@ static int amdgpu_cs_process_syncobj_in_dep(struct amdgpu_cs_parser *p,
return 0;
}
-static int amdgpu_cs_process_syncobj_timeline_in_dep(struct amdgpu_cs_parser *p,
- struct amdgpu_cs_chunk *chunk)
+static int amdgpu_cs_p2_syncobj_timeline_wait(struct amdgpu_cs_parser *p,
+ struct amdgpu_cs_chunk *chunk)
{
- struct drm_amdgpu_cs_chunk_syncobj *syncobj_deps;
+ struct drm_amdgpu_cs_chunk_syncobj *syncobj_deps = chunk->kdata;
unsigned num_deps;
int i, r;
- syncobj_deps = (struct drm_amdgpu_cs_chunk_syncobj *)chunk->kdata;
num_deps = chunk->length_dw * 4 /
sizeof(struct drm_amdgpu_cs_chunk_syncobj);
for (i = 0; i < num_deps; ++i) {
- r = amdgpu_syncobj_lookup_and_add_to_sync(p,
- syncobj_deps[i].handle,
- syncobj_deps[i].point,
- syncobj_deps[i].flags);
+ r = amdgpu_syncobj_lookup_and_add(p, syncobj_deps[i].handle,
+ syncobj_deps[i].point,
+ syncobj_deps[i].flags);
if (r)
return r;
}
@@ -472,14 +496,13 @@ static int amdgpu_cs_process_syncobj_timeline_in_dep(struct amdgpu_cs_parser *p,
return 0;
}
-static int amdgpu_cs_process_syncobj_out_dep(struct amdgpu_cs_parser *p,
- struct amdgpu_cs_chunk *chunk)
+static int amdgpu_cs_p2_syncobj_out(struct amdgpu_cs_parser *p,
+ struct amdgpu_cs_chunk *chunk)
{
- struct drm_amdgpu_cs_chunk_sem *deps;
+ struct drm_amdgpu_cs_chunk_sem *deps = chunk->kdata;
unsigned num_deps;
int i;
- deps = (struct drm_amdgpu_cs_chunk_sem *)chunk->kdata;
num_deps = chunk->length_dw * 4 /
sizeof(struct drm_amdgpu_cs_chunk_sem);
@@ -507,15 +530,13 @@ static int amdgpu_cs_process_syncobj_out_dep(struct amdgpu_cs_parser *p,
return 0;
}
-
-static int amdgpu_cs_process_syncobj_timeline_out_dep(struct amdgpu_cs_parser *p,
- struct amdgpu_cs_chunk *chunk)
+static int amdgpu_cs_p2_syncobj_timeline_signal(struct amdgpu_cs_parser *p,
+ struct amdgpu_cs_chunk *chunk)
{
- struct drm_amdgpu_cs_chunk_syncobj *syncobj_deps;
+ struct drm_amdgpu_cs_chunk_syncobj *syncobj_deps = chunk->kdata;
unsigned num_deps;
int i;
- syncobj_deps = (struct drm_amdgpu_cs_chunk_syncobj *)chunk->kdata;
num_deps = chunk->length_dw * 4 /
sizeof(struct drm_amdgpu_cs_chunk_syncobj);
@@ -552,9 +573,9 @@ static int amdgpu_cs_process_syncobj_timeline_out_dep(struct amdgpu_cs_parser *p
return 0;
}
-static int amdgpu_cs_dependencies(struct amdgpu_device *adev,
- struct amdgpu_cs_parser *p)
+static int amdgpu_cs_pass2(struct amdgpu_cs_parser *p)
{
+ unsigned int ce_preempt = 0, de_preempt = 0;
int i, r;
for (i = 0; i < p->nchunks; ++i) {
@@ -563,29 +584,34 @@ static int amdgpu_cs_dependencies(struct amdgpu_device *adev,
chunk = &p->chunks[i];
switch (chunk->chunk_id) {
+ case AMDGPU_CHUNK_ID_IB:
+ r = amdgpu_cs_p2_ib(p, chunk, &ce_preempt, &de_preempt);
+ if (r)
+ return r;
+ break;
case AMDGPU_CHUNK_ID_DEPENDENCIES:
case AMDGPU_CHUNK_ID_SCHEDULED_DEPENDENCIES:
- r = amdgpu_cs_process_fence_dep(p, chunk);
+ r = amdgpu_cs_p2_dependencies(p, chunk);
if (r)
return r;
break;
case AMDGPU_CHUNK_ID_SYNCOBJ_IN:
- r = amdgpu_cs_process_syncobj_in_dep(p, chunk);
+ r = amdgpu_cs_p2_syncobj_in(p, chunk);
if (r)
return r;
break;
case AMDGPU_CHUNK_ID_SYNCOBJ_OUT:
- r = amdgpu_cs_process_syncobj_out_dep(p, chunk);
+ r = amdgpu_cs_p2_syncobj_out(p, chunk);
if (r)
return r;
break;
case AMDGPU_CHUNK_ID_SYNCOBJ_TIMELINE_WAIT:
- r = amdgpu_cs_process_syncobj_timeline_in_dep(p, chunk);
+ r = amdgpu_cs_p2_syncobj_timeline_wait(p, chunk);
if (r)
return r;
break;
case AMDGPU_CHUNK_ID_SYNCOBJ_TIMELINE_SIGNAL:
- r = amdgpu_cs_process_syncobj_timeline_out_dep(p, chunk);
+ r = amdgpu_cs_p2_syncobj_timeline_signal(p, chunk);
if (r)
return r;
break;
@@ -830,6 +856,7 @@ static int amdgpu_cs_parser_bos(struct amdgpu_cs_parser *p,
struct amdgpu_vm *vm = &fpriv->vm;
struct amdgpu_bo_list_entry *e;
struct list_head duplicates;
+ unsigned int i;
int r;
INIT_LIST_HEAD(&p->validated);
@@ -913,16 +940,6 @@ static int amdgpu_cs_parser_bos(struct amdgpu_cs_parser *p,
e->bo_va = amdgpu_vm_bo_find(vm, bo);
}
- /* Move fence waiting after getting reservation lock of
- * PD root. Then there is no need on a ctx mutex lock.
- */
- r = amdgpu_ctx_wait_prev_fence(p->ctx, p->entity);
- if (unlikely(r != 0)) {
- if (r != -ERESTARTSYS)
- DRM_ERROR("amdgpu_ctx_wait_prev_fence failed.\n");
- goto error_validate;
- }
-
amdgpu_cs_get_threshold_for_moves(p->adev, &p->bytes_moved_threshold,
&p->bytes_moved_vis_threshold);
p->bytes_moved = 0;
@@ -943,124 +960,138 @@ static int amdgpu_cs_parser_bos(struct amdgpu_cs_parser *p,
if (r)
goto error_validate;
- amdgpu_cs_report_moved_bytes(p->adev, p->bytes_moved,
- p->bytes_moved_vis);
-
- amdgpu_job_set_resources(p->job, p->bo_list->gds_obj,
- p->bo_list->gws_obj, p->bo_list->oa_obj);
-
- if (!r && p->uf_entry.tv.bo) {
+ if (p->uf_entry.tv.bo) {
struct amdgpu_bo *uf = ttm_to_amdgpu_bo(p->uf_entry.tv.bo);
r = amdgpu_ttm_alloc_gart(&uf->tbo);
- p->job->uf_addr += amdgpu_bo_gpu_offset(uf);
+ if (r)
+ goto error_validate;
+
+ p->gang_leader->uf_addr += amdgpu_bo_gpu_offset(uf);
}
+ amdgpu_cs_report_moved_bytes(p->adev, p->bytes_moved,
+ p->bytes_moved_vis);
+
+ for (i = 0; i < p->gang_size; ++i)
+ amdgpu_job_set_resources(p->jobs[i], p->bo_list->gds_obj,
+ p->bo_list->gws_obj,
+ p->bo_list->oa_obj);
+ return 0;
+
error_validate:
- if (r)
- ttm_eu_backoff_reservation(&p->ticket, &p->validated);
+ ttm_eu_backoff_reservation(&p->ticket, &p->validated);
out_free_user_pages:
- if (r) {
- amdgpu_bo_list_for_each_userptr_entry(e, p->bo_list) {
- struct amdgpu_bo *bo = ttm_to_amdgpu_bo(e->tv.bo);
+ amdgpu_bo_list_for_each_userptr_entry(e, p->bo_list) {
+ struct amdgpu_bo *bo = ttm_to_amdgpu_bo(e->tv.bo);
- if (!e->user_pages)
- continue;
- amdgpu_ttm_tt_get_user_pages_done(bo->tbo.ttm);
- kvfree(e->user_pages);
- e->user_pages = NULL;
- }
- mutex_unlock(&p->bo_list->bo_list_mutex);
+ if (!e->user_pages)
+ continue;
+ amdgpu_ttm_tt_get_user_pages_done(bo->tbo.ttm);
+ kvfree(e->user_pages);
+ e->user_pages = NULL;
}
return r;
}
-static void trace_amdgpu_cs_ibs(struct amdgpu_cs_parser *parser)
+static void trace_amdgpu_cs_ibs(struct amdgpu_cs_parser *p)
{
- int i;
+ int i, j;
if (!trace_amdgpu_cs_enabled())
return;
- for (i = 0; i < parser->job->num_ibs; i++)
- trace_amdgpu_cs(parser, i);
+ for (i = 0; i < p->gang_size; ++i) {
+ struct amdgpu_job *job = p->jobs[i];
+
+ for (j = 0; j < job->num_ibs; ++j)
+ trace_amdgpu_cs(p, job, &job->ibs[j]);
+ }
}
-static int amdgpu_cs_vm_handling(struct amdgpu_cs_parser *p)
+static int amdgpu_cs_patch_ibs(struct amdgpu_cs_parser *p,
+ struct amdgpu_job *job)
{
- struct amdgpu_ring *ring = to_amdgpu_ring(p->entity->rq->sched);
- struct amdgpu_fpriv *fpriv = p->filp->driver_priv;
- struct amdgpu_device *adev = p->adev;
- struct amdgpu_vm *vm = &fpriv->vm;
- struct amdgpu_bo_list_entry *e;
- struct amdgpu_bo_va *bo_va;
- struct amdgpu_bo *bo;
+ struct amdgpu_ring *ring = amdgpu_job_ring(job);
+ unsigned int i;
int r;
/* Only for UVD/VCE VM emulation */
- if (ring->funcs->parse_cs || ring->funcs->patch_cs_in_place) {
- unsigned i, j;
-
- for (i = 0, j = 0; i < p->nchunks && j < p->job->num_ibs; i++) {
- struct drm_amdgpu_cs_chunk_ib *chunk_ib;
- struct amdgpu_bo_va_mapping *m;
- struct amdgpu_bo *aobj = NULL;
- struct amdgpu_cs_chunk *chunk;
- uint64_t offset, va_start;
- struct amdgpu_ib *ib;
- uint8_t *kptr;
-
- chunk = &p->chunks[i];
- ib = &p->job->ibs[j];
- chunk_ib = chunk->kdata;
-
- if (chunk->chunk_id != AMDGPU_CHUNK_ID_IB)
- continue;
+ if (!ring->funcs->parse_cs && !ring->funcs->patch_cs_in_place)
+ return 0;
- va_start = chunk_ib->va_start & AMDGPU_GMC_HOLE_MASK;
- r = amdgpu_cs_find_mapping(p, va_start, &aobj, &m);
- if (r) {
- DRM_ERROR("IB va_start is invalid\n");
- return r;
- }
+ for (i = 0; i < job->num_ibs; ++i) {
+ struct amdgpu_ib *ib = &job->ibs[i];
+ struct amdgpu_bo_va_mapping *m;
+ struct amdgpu_bo *aobj;
+ uint64_t va_start;
+ uint8_t *kptr;
- if ((va_start + chunk_ib->ib_bytes) >
- (m->last + 1) * AMDGPU_GPU_PAGE_SIZE) {
- DRM_ERROR("IB va_start+ib_bytes is invalid\n");
- return -EINVAL;
- }
+ va_start = ib->gpu_addr & AMDGPU_GMC_HOLE_MASK;
+ r = amdgpu_cs_find_mapping(p, va_start, &aobj, &m);
+ if (r) {
+ DRM_ERROR("IB va_start is invalid\n");
+ return r;
+ }
- /* the IB should be reserved at this point */
- r = amdgpu_bo_kmap(aobj, (void **)&kptr);
- if (r) {
- return r;
- }
+ if ((va_start + ib->length_dw * 4) >
+ (m->last + 1) * AMDGPU_GPU_PAGE_SIZE) {
+ DRM_ERROR("IB va_start+ib_bytes is invalid\n");
+ return -EINVAL;
+ }
- offset = m->start * AMDGPU_GPU_PAGE_SIZE;
- kptr += va_start - offset;
-
- if (ring->funcs->parse_cs) {
- memcpy(ib->ptr, kptr, chunk_ib->ib_bytes);
- amdgpu_bo_kunmap(aobj);
-
- r = amdgpu_ring_parse_cs(ring, p, p->job, ib);
- if (r)
- return r;
- } else {
- ib->ptr = (uint32_t *)kptr;
- r = amdgpu_ring_patch_cs_in_place(ring, p, p->job, ib);
- amdgpu_bo_kunmap(aobj);
- if (r)
- return r;
- }
+ /* the IB should be reserved at this point */
+ r = amdgpu_bo_kmap(aobj, (void **)&kptr);
+ if (r) {
+ return r;
+ }
+
+ kptr += va_start - (m->start * AMDGPU_GPU_PAGE_SIZE);
- j++;
+ if (ring->funcs->parse_cs) {
+ memcpy(ib->ptr, kptr, ib->length_dw * 4);
+ amdgpu_bo_kunmap(aobj);
+
+ r = amdgpu_ring_parse_cs(ring, p, job, ib);
+ if (r)
+ return r;
+ } else {
+ ib->ptr = (uint32_t *)kptr;
+ r = amdgpu_ring_patch_cs_in_place(ring, p, job, ib);
+ amdgpu_bo_kunmap(aobj);
+ if (r)
+ return r;
}
}
- if (!p->job->vm)
- return 0;
+ return 0;
+}
+
+static int amdgpu_cs_patch_jobs(struct amdgpu_cs_parser *p)
+{
+ unsigned int i;
+ int r;
+
+ for (i = 0; i < p->gang_size; ++i) {
+ r = amdgpu_cs_patch_ibs(p, p->jobs[i]);
+ if (r)
+ return r;
+ }
+ return 0;
+}
+
+static int amdgpu_cs_vm_handling(struct amdgpu_cs_parser *p)
+{
+ struct amdgpu_fpriv *fpriv = p->filp->driver_priv;
+ struct amdgpu_job *job = p->gang_leader;
+ struct amdgpu_device *adev = p->adev;
+ struct amdgpu_vm *vm = &fpriv->vm;
+ struct amdgpu_bo_list_entry *e;
+ struct amdgpu_bo_va *bo_va;
+ struct amdgpu_bo *bo;
+ unsigned int i;
+ int r;
r = amdgpu_vm_clear_freed(adev, vm, NULL);
if (r)
@@ -1070,7 +1101,7 @@ static int amdgpu_cs_vm_handling(struct amdgpu_cs_parser *p)
if (r)
return r;
- r = amdgpu_sync_fence(&p->job->sync, fpriv->prt_va->last_pt_update);
+ r = amdgpu_sync_fence(&job->sync, fpriv->prt_va->last_pt_update);
if (r)
return r;
@@ -1081,7 +1112,7 @@ static int amdgpu_cs_vm_handling(struct amdgpu_cs_parser *p)
if (r)
return r;
- r = amdgpu_sync_fence(&p->job->sync, bo_va->last_pt_update);
+ r = amdgpu_sync_fence(&job->sync, bo_va->last_pt_update);
if (r)
return r;
}
@@ -1100,7 +1131,7 @@ static int amdgpu_cs_vm_handling(struct amdgpu_cs_parser *p)
if (r)
return r;
- r = amdgpu_sync_fence(&p->job->sync, bo_va->last_pt_update);
+ r = amdgpu_sync_fence(&job->sync, bo_va->last_pt_update);
if (r)
return r;
}
@@ -1113,11 +1144,18 @@ static int amdgpu_cs_vm_handling(struct amdgpu_cs_parser *p)
if (r)
return r;
- r = amdgpu_sync_fence(&p->job->sync, vm->last_update);
+ r = amdgpu_sync_fence(&job->sync, vm->last_update);
if (r)
return r;
- p->job->vm_pd_addr = amdgpu_gmc_pd_addr(vm->root.bo);
+ for (i = 0; i < p->gang_size; ++i) {
+ job = p->jobs[i];
+
+ if (!job->vm)
+ continue;
+
+ job->vm_pd_addr = amdgpu_gmc_pd_addr(vm->root.bo);
+ }
if (amdgpu_vm_debug) {
/* Invalidate all BOs to test for userspace bugs */
@@ -1138,7 +1176,9 @@ static int amdgpu_cs_vm_handling(struct amdgpu_cs_parser *p)
static int amdgpu_cs_sync_rings(struct amdgpu_cs_parser *p)
{
struct amdgpu_fpriv *fpriv = p->filp->driver_priv;
+ struct amdgpu_job *leader = p->gang_leader;
struct amdgpu_bo_list_entry *e;
+ unsigned int i;
int r;
list_for_each_entry(e, &p->validated, tv.head) {
@@ -1148,12 +1188,23 @@ static int amdgpu_cs_sync_rings(struct amdgpu_cs_parser *p)
sync_mode = amdgpu_bo_explicit_sync(bo) ?
AMDGPU_SYNC_EXPLICIT : AMDGPU_SYNC_NE_OWNER;
- r = amdgpu_sync_resv(p->adev, &p->job->sync, resv, sync_mode,
+ r = amdgpu_sync_resv(p->adev, &leader->sync, resv, sync_mode,
&fpriv->vm);
if (r)
return r;
}
- return 0;
+
+ for (i = 0; i < p->gang_size - 1; ++i) {
+ r = amdgpu_sync_clone(&leader->sync, &p->jobs[i]->sync);
+ if (r)
+ return r;
+ }
+
+ r = amdgpu_ctx_wait_prev_fence(p->ctx, p->entities[p->gang_size - 1]);
+ if (r && r != -ERESTARTSYS)
+ DRM_ERROR("amdgpu_ctx_wait_prev_fence failed.\n");
+
+ return r;
}
static void amdgpu_cs_post_dependencies(struct amdgpu_cs_parser *p)
@@ -1177,20 +1228,28 @@ static int amdgpu_cs_submit(struct amdgpu_cs_parser *p,
union drm_amdgpu_cs *cs)
{
struct amdgpu_fpriv *fpriv = p->filp->driver_priv;
- struct drm_sched_entity *entity = p->entity;
+ struct amdgpu_job *leader = p->gang_leader;
struct amdgpu_bo_list_entry *e;
- struct amdgpu_job *job;
+ unsigned int i;
uint64_t seq;
int r;
- job = p->job;
- p->job = NULL;
+ for (i = 0; i < p->gang_size; ++i)
+ drm_sched_job_arm(&p->jobs[i]->base);
- r = drm_sched_job_init(&job->base, entity, &fpriv->vm);
- if (r)
- goto error_unlock;
+ for (i = 0; i < (p->gang_size - 1); ++i) {
+ struct dma_fence *fence;
- drm_sched_job_arm(&job->base);
+ fence = &p->jobs[i]->base.s_fence->scheduled;
+ r = amdgpu_sync_fence(&leader->sync, fence);
+ if (r)
+ goto error_cleanup;
+ }
+
+ if (p->gang_size > 1) {
+ for (i = 0; i < p->gang_size; ++i)
+ amdgpu_job_set_gang_leader(p->jobs[i], leader);
+ }
/* No memory allocation is allowed while holding the notifier lock.
* The lock is held until amdgpu_cs_submit is finished and fence is
@@ -1201,6 +1260,7 @@ static int amdgpu_cs_submit(struct amdgpu_cs_parser *p,
/* If userptr are invalidated after amdgpu_cs_parser_bos(), return
* -EAGAIN, drmIoctl in libdrm will restart the amdgpu_cs_ioctl.
*/
+ r = 0;
amdgpu_bo_list_for_each_userptr_entry(e, p->bo_list) {
struct amdgpu_bo *bo = ttm_to_amdgpu_bo(e->tv.bo);
@@ -1208,62 +1268,65 @@ static int amdgpu_cs_submit(struct amdgpu_cs_parser *p,
}
if (r) {
r = -EAGAIN;
- goto error_abort;
+ goto error_unlock;
}
- p->fence = dma_fence_get(&job->base.s_fence->finished);
+ p->fence = dma_fence_get(&leader->base.s_fence->finished);
+ list_for_each_entry(e, &p->validated, tv.head) {
- seq = amdgpu_ctx_add_fence(p->ctx, entity, p->fence);
+ /* Everybody except for the gang leader uses READ */
+ for (i = 0; i < (p->gang_size - 1); ++i) {
+ dma_resv_add_fence(e->tv.bo->base.resv,
+ &p->jobs[i]->base.s_fence->finished,
+ DMA_RESV_USAGE_READ);
+ }
+
+ /* The gang leader is remembered as writer */
+ e->tv.num_shared = 0;
+ }
+
+ seq = amdgpu_ctx_add_fence(p->ctx, p->entities[p->gang_size - 1],
+ p->fence);
amdgpu_cs_post_dependencies(p);
- if ((job->preamble_status & AMDGPU_PREAMBLE_IB_PRESENT) &&
+ if ((leader->preamble_status & AMDGPU_PREAMBLE_IB_PRESENT) &&
!p->ctx->preamble_presented) {
- job->preamble_status |= AMDGPU_PREAMBLE_IB_PRESENT_FIRST;
+ leader->preamble_status |= AMDGPU_PREAMBLE_IB_PRESENT_FIRST;
p->ctx->preamble_presented = true;
}
cs->out.handle = seq;
- job->uf_sequence = seq;
-
- amdgpu_job_free_resources(job);
+ leader->uf_sequence = seq;
- trace_amdgpu_cs_ioctl(job);
amdgpu_vm_bo_trace_cs(&fpriv->vm, &p->ticket);
- drm_sched_entity_push_job(&job->base);
+ for (i = 0; i < p->gang_size; ++i) {
+ amdgpu_job_free_resources(p->jobs[i]);
+ trace_amdgpu_cs_ioctl(p->jobs[i]);
+ drm_sched_entity_push_job(&p->jobs[i]->base);
+ p->jobs[i] = NULL;
+ }
amdgpu_vm_move_to_lru_tail(p->adev, &fpriv->vm);
-
- /* Make sure all BOs are remembered as writers */
- amdgpu_bo_list_for_each_entry(e, p->bo_list)
- e->tv.num_shared = 0;
-
ttm_eu_fence_buffer_objects(&p->ticket, &p->validated, p->fence);
+
mutex_unlock(&p->adev->notifier_lock);
mutex_unlock(&p->bo_list->bo_list_mutex);
-
return 0;
-error_abort:
- drm_sched_job_cleanup(&job->base);
+error_unlock:
mutex_unlock(&p->adev->notifier_lock);
-error_unlock:
- amdgpu_job_free(job);
+error_cleanup:
+ for (i = 0; i < p->gang_size; ++i)
+ drm_sched_job_cleanup(&p->jobs[i]->base);
return r;
}
/* Cleanup the parser structure */
-static void amdgpu_cs_parser_fini(struct amdgpu_cs_parser *parser, int error,
- bool backoff)
+static void amdgpu_cs_parser_fini(struct amdgpu_cs_parser *parser)
{
unsigned i;
- if (error && backoff) {
- ttm_eu_backoff_reservation(&parser->ticket,
- &parser->validated);
- mutex_unlock(&parser->bo_list->bo_list_mutex);
- }
-
for (i = 0; i < parser->num_post_deps; i++) {
drm_syncobj_put(parser->post_deps[i].syncobj);
kfree(parser->post_deps[i].chain);
@@ -1280,8 +1343,10 @@ static void amdgpu_cs_parser_fini(struct amdgpu_cs_parser *parser, int error,
for (i = 0; i < parser->nchunks; i++)
kvfree(parser->chunks[i].kdata);
kvfree(parser->chunks);
- if (parser->job)
- amdgpu_job_free(parser->job);
+ for (i = 0; i < parser->gang_size; ++i) {
+ if (parser->jobs[i])
+ amdgpu_job_free(parser->jobs[i]);
+ }
if (parser->uf_entry.tv.bo) {
struct amdgpu_bo *uf = ttm_to_amdgpu_bo(parser->uf_entry.tv.bo);
@@ -1293,7 +1358,6 @@ int amdgpu_cs_ioctl(struct drm_device *dev, void *data, struct drm_file *filp)
{
struct amdgpu_device *adev = drm_to_adev(dev);
struct amdgpu_cs_parser parser;
- bool reserved_buffers = false;
int r;
if (amdgpu_ras_intr_triggered())
@@ -1306,22 +1370,16 @@ int amdgpu_cs_ioctl(struct drm_device *dev, void *data, struct drm_file *filp)
if (r) {
if (printk_ratelimit())
DRM_ERROR("Failed to initialize parser %d!\n", r);
- goto out;
+ return r;
}
r = amdgpu_cs_pass1(&parser, data);
if (r)
- goto out;
+ goto error_fini;
- r = amdgpu_cs_ib_fill(adev, &parser);
+ r = amdgpu_cs_pass2(&parser);
if (r)
- goto out;
-
- r = amdgpu_cs_dependencies(adev, &parser);
- if (r) {
- DRM_ERROR("Failed in the dependencies handling %d!\n", r);
- goto out;
- }
+ goto error_fini;
r = amdgpu_cs_parser_bos(&parser, data);
if (r) {
@@ -1329,25 +1387,36 @@ int amdgpu_cs_ioctl(struct drm_device *dev, void *data, struct drm_file *filp)
DRM_ERROR("Not enough memory for command submission!\n");
else if (r != -ERESTARTSYS && r != -EAGAIN)
DRM_ERROR("Failed to process the buffer list %d!\n", r);
- goto out;
+ goto error_fini;
}
- reserved_buffers = true;
-
- trace_amdgpu_cs_ibs(&parser);
+ r = amdgpu_cs_patch_jobs(&parser);
+ if (r)
+ goto error_backoff;
r = amdgpu_cs_vm_handling(&parser);
if (r)
- goto out;
+ goto error_backoff;
r = amdgpu_cs_sync_rings(&parser);
if (r)
- goto out;
+ goto error_backoff;
+
+ trace_amdgpu_cs_ibs(&parser);
r = amdgpu_cs_submit(&parser, data);
-out:
- amdgpu_cs_parser_fini(&parser, r, reserved_buffers);
+ if (r)
+ goto error_backoff;
+
+ amdgpu_cs_parser_fini(&parser);
+ return 0;
+
+error_backoff:
+ ttm_eu_backoff_reservation(&parser.ticket, &parser.validated);
+ mutex_unlock(&parser.bo_list->bo_list_mutex);
+error_fini:
+ amdgpu_cs_parser_fini(&parser);
return r;
}