aboutsummaryrefslogtreecommitdiff
path: root/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c')
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c893
1 files changed, 439 insertions, 454 deletions
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
index b7bae833c804..e104d7ef3c3d 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
@@ -39,9 +39,42 @@
#include "amdgpu_gem.h"
#include "amdgpu_ras.h"
-static int amdgpu_cs_user_fence_chunk(struct amdgpu_cs_parser *p,
- struct drm_amdgpu_cs_chunk_fence *data,
- uint32_t *offset)
+static int amdgpu_cs_parser_init(struct amdgpu_cs_parser *p,
+ struct amdgpu_device *adev,
+ struct drm_file *filp,
+ union drm_amdgpu_cs *cs)
+{
+ struct amdgpu_fpriv *fpriv = filp->driver_priv;
+
+ if (cs->in.num_chunks == 0)
+ return -EINVAL;
+
+ memset(p, 0, sizeof(*p));
+ p->adev = adev;
+ p->filp = filp;
+
+ p->ctx = amdgpu_ctx_get(fpriv, cs->in.ctx_id);
+ if (!p->ctx)
+ return -EINVAL;
+
+ if (atomic_read(&p->ctx->guilty)) {
+ amdgpu_ctx_put(p->ctx);
+ return -ECANCELED;
+ }
+ return 0;
+}
+
+static int amdgpu_cs_p1_ib(struct amdgpu_cs_parser *p,
+ struct drm_amdgpu_cs_chunk_ib *chunk_ib,
+ unsigned int *num_ibs)
+{
+ ++(*num_ibs);
+ return 0;
+}
+
+static int amdgpu_cs_p1_user_fence(struct amdgpu_cs_parser *p,
+ struct drm_amdgpu_cs_chunk_fence *data,
+ uint32_t *offset)
{
struct drm_gem_object *gobj;
struct amdgpu_bo *bo;
@@ -80,11 +113,11 @@ error_unref:
return r;
}
-static int amdgpu_cs_bo_handles_chunk(struct amdgpu_cs_parser *p,
- struct drm_amdgpu_bo_list_in *data)
+static int amdgpu_cs_p1_bo_handles(struct amdgpu_cs_parser *p,
+ struct drm_amdgpu_bo_list_in *data)
{
+ struct drm_amdgpu_bo_list_entry *info;
int r;
- struct drm_amdgpu_bo_list_entry *info = NULL;
r = amdgpu_bo_create_list_entry_array(data, &info);
if (r)
@@ -104,7 +137,9 @@ error_free:
return r;
}
-static int amdgpu_cs_parser_init(struct amdgpu_cs_parser *p, union drm_amdgpu_cs *cs)
+/* Copy the data from userspace and go over it the first time */
+static int amdgpu_cs_pass1(struct amdgpu_cs_parser *p,
+ union drm_amdgpu_cs *cs)
{
struct amdgpu_fpriv *fpriv = p->filp->driver_priv;
struct amdgpu_vm *vm = &fpriv->vm;
@@ -112,30 +147,14 @@ static int amdgpu_cs_parser_init(struct amdgpu_cs_parser *p, union drm_amdgpu_cs
uint64_t *chunk_array;
unsigned size, num_ibs = 0;
uint32_t uf_offset = 0;
- int i;
int ret;
+ int i;
- if (cs->in.num_chunks == 0)
- return -EINVAL;
-
- chunk_array = kvmalloc_array(cs->in.num_chunks, sizeof(uint64_t), GFP_KERNEL);
+ chunk_array = kvmalloc_array(cs->in.num_chunks, sizeof(uint64_t),
+ GFP_KERNEL);
if (!chunk_array)
return -ENOMEM;
- p->ctx = amdgpu_ctx_get(fpriv, cs->in.ctx_id);
- if (!p->ctx) {
- ret = -EINVAL;
- goto free_chunk;
- }
-
- mutex_lock(&p->ctx->lock);
-
- /* skip guilty context job */
- if (atomic_read(&p->ctx->guilty) == 1) {
- ret = -ECANCELED;
- goto free_chunk;
- }
-
/* get chunks */
chunk_array_user = u64_to_user_ptr(cs->in.chunks);
if (copy_from_user(chunk_array, chunk_array_user,
@@ -170,7 +189,8 @@ static int amdgpu_cs_parser_init(struct amdgpu_cs_parser *p, union drm_amdgpu_cs
size = p->chunks[i].length_dw;
cdata = u64_to_user_ptr(user_chunk.chunk_data);
- p->chunks[i].kdata = kvmalloc_array(size, sizeof(uint32_t), GFP_KERNEL);
+ p->chunks[i].kdata = kvmalloc_array(size, sizeof(uint32_t),
+ GFP_KERNEL);
if (p->chunks[i].kdata == NULL) {
ret = -ENOMEM;
i--;
@@ -182,36 +202,35 @@ static int amdgpu_cs_parser_init(struct amdgpu_cs_parser *p, union drm_amdgpu_cs
goto free_partial_kdata;
}
+ /* Assume the worst on the following checks */
+ ret = -EINVAL;
switch (p->chunks[i].chunk_id) {
case AMDGPU_CHUNK_ID_IB:
- ++num_ibs;
+ if (size < sizeof(struct drm_amdgpu_cs_chunk_ib))
+ goto free_partial_kdata;
+
+ ret = amdgpu_cs_p1_ib(p, p->chunks[i].kdata, &num_ibs);
+ if (ret)
+ goto free_partial_kdata;
break;
case AMDGPU_CHUNK_ID_FENCE:
- size = sizeof(struct drm_amdgpu_cs_chunk_fence);
- if (p->chunks[i].length_dw * sizeof(uint32_t) < size) {
- ret = -EINVAL;
+ if (size < sizeof(struct drm_amdgpu_cs_chunk_fence))
goto free_partial_kdata;
- }
- ret = amdgpu_cs_user_fence_chunk(p, p->chunks[i].kdata,
- &uf_offset);
+ ret = amdgpu_cs_p1_user_fence(p, p->chunks[i].kdata,
+ &uf_offset);
if (ret)
goto free_partial_kdata;
-
break;
case AMDGPU_CHUNK_ID_BO_HANDLES:
- size = sizeof(struct drm_amdgpu_bo_list_in);
- if (p->chunks[i].length_dw * sizeof(uint32_t) < size) {
- ret = -EINVAL;
+ if (size < sizeof(struct drm_amdgpu_bo_list_in))
goto free_partial_kdata;
- }
- ret = amdgpu_cs_bo_handles_chunk(p, p->chunks[i].kdata);
+ ret = amdgpu_cs_p1_bo_handles(p, p->chunks[i].kdata);
if (ret)
goto free_partial_kdata;
-
break;
case AMDGPU_CHUNK_ID_DEPENDENCIES:
@@ -223,7 +242,6 @@ static int amdgpu_cs_parser_init(struct amdgpu_cs_parser *p, union drm_amdgpu_cs
break;
default:
- ret = -EINVAL;
goto free_partial_kdata;
}
}
@@ -260,6 +278,323 @@ free_chunk:
return ret;
}
+static int amdgpu_cs_ib_fill(struct amdgpu_device *adev,
+ struct amdgpu_cs_parser *parser)
+{
+ struct amdgpu_fpriv *fpriv = parser->filp->driver_priv;
+ struct amdgpu_vm *vm = &fpriv->vm;
+ int r, ce_preempt = 0, de_preempt = 0;
+ struct amdgpu_ring *ring;
+ int i, j;
+
+ for (i = 0, j = 0; i < parser->nchunks && j < parser->job->num_ibs; i++) {
+ struct amdgpu_cs_chunk *chunk;
+ struct amdgpu_ib *ib;
+ struct drm_amdgpu_cs_chunk_ib *chunk_ib;
+ struct drm_sched_entity *entity;
+
+ chunk = &parser->chunks[i];
+ ib = &parser->job->ibs[j];
+ chunk_ib = (struct drm_amdgpu_cs_chunk_ib *)chunk->kdata;
+
+ if (chunk->chunk_id != AMDGPU_CHUNK_ID_IB)
+ continue;
+
+ if (chunk_ib->ip_type == AMDGPU_HW_IP_GFX &&
+ chunk_ib->flags & AMDGPU_IB_FLAG_PREEMPT) {
+ if (chunk_ib->flags & AMDGPU_IB_FLAG_CE)
+ ce_preempt++;
+ else
+ de_preempt++;
+
+ /* each GFX command submit allows 0 or 1 IB preemptible for CE & DE */
+ if (ce_preempt > 1 || de_preempt > 1)
+ return -EINVAL;
+ }
+
+ r = amdgpu_ctx_get_entity(parser->ctx, chunk_ib->ip_type,
+ chunk_ib->ip_instance, chunk_ib->ring,
+ &entity);
+ if (r)
+ return r;
+
+ if (chunk_ib->flags & AMDGPU_IB_FLAG_PREAMBLE)
+ parser->job->preamble_status |=
+ AMDGPU_PREAMBLE_IB_PRESENT;
+
+ if (parser->entity && parser->entity != entity)
+ return -EINVAL;
+
+ /* Return if there is no run queue associated with this entity.
+ * Possibly because of disabled HW IP*/
+ if (entity->rq == NULL)
+ return -EINVAL;
+
+ parser->entity = entity;
+
+ ring = to_amdgpu_ring(entity->rq->sched);
+ r = amdgpu_ib_get(adev, vm, ring->funcs->parse_cs ?
+ chunk_ib->ib_bytes : 0,
+ AMDGPU_IB_POOL_DELAYED, ib);
+ if (r) {
+ DRM_ERROR("Failed to get ib !\n");
+ return r;
+ }
+
+ ib->gpu_addr = chunk_ib->va_start;
+ ib->length_dw = chunk_ib->ib_bytes / 4;
+ ib->flags = chunk_ib->flags;
+
+ j++;
+ }
+
+ /* MM engine doesn't support user fences */
+ ring = to_amdgpu_ring(parser->entity->rq->sched);
+ if (parser->job->uf_addr && ring->funcs->no_user_fence)
+ return -EINVAL;
+
+ return 0;
+}
+
+static int amdgpu_cs_process_fence_dep(struct amdgpu_cs_parser *p,
+ struct amdgpu_cs_chunk *chunk)
+{
+ struct amdgpu_fpriv *fpriv = p->filp->driver_priv;
+ unsigned num_deps;
+ int i, r;
+ struct drm_amdgpu_cs_chunk_dep *deps;
+
+ deps = (struct drm_amdgpu_cs_chunk_dep *)chunk->kdata;
+ num_deps = chunk->length_dw * 4 /
+ sizeof(struct drm_amdgpu_cs_chunk_dep);
+
+ for (i = 0; i < num_deps; ++i) {
+ struct amdgpu_ctx *ctx;
+ struct drm_sched_entity *entity;
+ struct dma_fence *fence;
+
+ ctx = amdgpu_ctx_get(fpriv, deps[i].ctx_id);
+ if (ctx == NULL)
+ return -EINVAL;
+
+ r = amdgpu_ctx_get_entity(ctx, deps[i].ip_type,
+ deps[i].ip_instance,
+ deps[i].ring, &entity);
+ if (r) {
+ amdgpu_ctx_put(ctx);
+ return r;
+ }
+
+ fence = amdgpu_ctx_get_fence(ctx, entity, deps[i].handle);
+ amdgpu_ctx_put(ctx);
+
+ if (IS_ERR(fence))
+ return PTR_ERR(fence);
+ else if (!fence)
+ continue;
+
+ if (chunk->chunk_id == AMDGPU_CHUNK_ID_SCHEDULED_DEPENDENCIES) {
+ struct drm_sched_fence *s_fence;
+ struct dma_fence *old = fence;
+
+ s_fence = to_drm_sched_fence(fence);
+ fence = dma_fence_get(&s_fence->scheduled);
+ dma_fence_put(old);
+ }
+
+ r = amdgpu_sync_fence(&p->job->sync, fence);
+ dma_fence_put(fence);
+ if (r)
+ return r;
+ }
+ return 0;
+}
+
+static int amdgpu_syncobj_lookup_and_add_to_sync(struct amdgpu_cs_parser *p,
+ uint32_t handle, u64 point,
+ u64 flags)
+{
+ struct dma_fence *fence;
+ int r;
+
+ r = drm_syncobj_find_fence(p->filp, handle, point, flags, &fence);
+ if (r) {
+ DRM_ERROR("syncobj %u failed to find fence @ %llu (%d)!\n",
+ handle, point, r);
+ return r;
+ }
+
+ r = amdgpu_sync_fence(&p->job->sync, fence);
+ dma_fence_put(fence);
+
+ return r;
+}
+
+static int amdgpu_cs_process_syncobj_in_dep(struct amdgpu_cs_parser *p,
+ struct amdgpu_cs_chunk *chunk)
+{
+ struct drm_amdgpu_cs_chunk_sem *deps;
+ unsigned num_deps;
+ int i, r;
+
+ deps = (struct drm_amdgpu_cs_chunk_sem *)chunk->kdata;
+ num_deps = chunk->length_dw * 4 /
+ sizeof(struct drm_amdgpu_cs_chunk_sem);
+ for (i = 0; i < num_deps; ++i) {
+ r = amdgpu_syncobj_lookup_and_add_to_sync(p, deps[i].handle,
+ 0, 0);
+ if (r)
+ return r;
+ }
+
+ return 0;
+}
+
+static int amdgpu_cs_process_syncobj_timeline_in_dep(struct amdgpu_cs_parser *p,
+ struct amdgpu_cs_chunk *chunk)
+{
+ struct drm_amdgpu_cs_chunk_syncobj *syncobj_deps;
+ unsigned num_deps;
+ int i, r;
+
+ syncobj_deps = (struct drm_amdgpu_cs_chunk_syncobj *)chunk->kdata;
+ num_deps = chunk->length_dw * 4 /
+ sizeof(struct drm_amdgpu_cs_chunk_syncobj);
+ for (i = 0; i < num_deps; ++i) {
+ r = amdgpu_syncobj_lookup_and_add_to_sync(p,
+ syncobj_deps[i].handle,
+ syncobj_deps[i].point,
+ syncobj_deps[i].flags);
+ if (r)
+ return r;
+ }
+
+ return 0;
+}
+
+static int amdgpu_cs_process_syncobj_out_dep(struct amdgpu_cs_parser *p,
+ struct amdgpu_cs_chunk *chunk)
+{
+ struct drm_amdgpu_cs_chunk_sem *deps;
+ unsigned num_deps;
+ int i;
+
+ deps = (struct drm_amdgpu_cs_chunk_sem *)chunk->kdata;
+ num_deps = chunk->length_dw * 4 /
+ sizeof(struct drm_amdgpu_cs_chunk_sem);
+
+ if (p->post_deps)
+ return -EINVAL;
+
+ p->post_deps = kmalloc_array(num_deps, sizeof(*p->post_deps),
+ GFP_KERNEL);
+ p->num_post_deps = 0;
+
+ if (!p->post_deps)
+ return -ENOMEM;
+
+
+ for (i = 0; i < num_deps; ++i) {
+ p->post_deps[i].syncobj =
+ drm_syncobj_find(p->filp, deps[i].handle);
+ if (!p->post_deps[i].syncobj)
+ return -EINVAL;
+ p->post_deps[i].chain = NULL;
+ p->post_deps[i].point = 0;
+ p->num_post_deps++;
+ }
+
+ return 0;
+}
+
+
+static int amdgpu_cs_process_syncobj_timeline_out_dep(struct amdgpu_cs_parser *p,
+ struct amdgpu_cs_chunk *chunk)
+{
+ struct drm_amdgpu_cs_chunk_syncobj *syncobj_deps;
+ unsigned num_deps;
+ int i;
+
+ syncobj_deps = (struct drm_amdgpu_cs_chunk_syncobj *)chunk->kdata;
+ num_deps = chunk->length_dw * 4 /
+ sizeof(struct drm_amdgpu_cs_chunk_syncobj);
+
+ if (p->post_deps)
+ return -EINVAL;
+
+ p->post_deps = kmalloc_array(num_deps, sizeof(*p->post_deps),
+ GFP_KERNEL);
+ p->num_post_deps = 0;
+
+ if (!p->post_deps)
+ return -ENOMEM;
+
+ for (i = 0; i < num_deps; ++i) {
+ struct amdgpu_cs_post_dep *dep = &p->post_deps[i];
+
+ dep->chain = NULL;
+ if (syncobj_deps[i].point) {
+ dep->chain = dma_fence_chain_alloc();
+ if (!dep->chain)
+ return -ENOMEM;
+ }
+
+ dep->syncobj = drm_syncobj_find(p->filp,
+ syncobj_deps[i].handle);
+ if (!dep->syncobj) {
+ dma_fence_chain_free(dep->chain);
+ return -EINVAL;
+ }
+ dep->point = syncobj_deps[i].point;
+ p->num_post_deps++;
+ }
+
+ return 0;
+}
+
+static int amdgpu_cs_dependencies(struct amdgpu_device *adev,
+ struct amdgpu_cs_parser *p)
+{
+ int i, r;
+
+ for (i = 0; i < p->nchunks; ++i) {
+ struct amdgpu_cs_chunk *chunk;
+
+ chunk = &p->chunks[i];
+
+ switch (chunk->chunk_id) {
+ case AMDGPU_CHUNK_ID_DEPENDENCIES:
+ case AMDGPU_CHUNK_ID_SCHEDULED_DEPENDENCIES:
+ r = amdgpu_cs_process_fence_dep(p, chunk);
+ if (r)
+ return r;
+ break;
+ case AMDGPU_CHUNK_ID_SYNCOBJ_IN:
+ r = amdgpu_cs_process_syncobj_in_dep(p, chunk);
+ if (r)
+ return r;
+ break;
+ case AMDGPU_CHUNK_ID_SYNCOBJ_OUT:
+ r = amdgpu_cs_process_syncobj_out_dep(p, chunk);
+ if (r)
+ return r;
+ break;
+ case AMDGPU_CHUNK_ID_SYNCOBJ_TIMELINE_WAIT:
+ r = amdgpu_cs_process_syncobj_timeline_in_dep(p, chunk);
+ if (r)
+ return r;
+ break;
+ case AMDGPU_CHUNK_ID_SYNCOBJ_TIMELINE_SIGNAL:
+ r = amdgpu_cs_process_syncobj_timeline_out_dep(p, chunk);
+ if (r)
+ return r;
+ break;
+ }
+ }
+
+ return 0;
+}
+
/* Convert microseconds to bytes. */
static u64 us_to_bytes(struct amdgpu_device *adev, s64 us)
{
@@ -495,9 +830,6 @@ static int amdgpu_cs_parser_bos(struct amdgpu_cs_parser *p,
struct amdgpu_vm *vm = &fpriv->vm;
struct amdgpu_bo_list_entry *e;
struct list_head duplicates;
- struct amdgpu_bo *gds;
- struct amdgpu_bo *gws;
- struct amdgpu_bo *oa;
int r;
INIT_LIST_HEAD(&p->validated);
@@ -614,22 +946,8 @@ static int amdgpu_cs_parser_bos(struct amdgpu_cs_parser *p,
amdgpu_cs_report_moved_bytes(p->adev, p->bytes_moved,
p->bytes_moved_vis);
- gds = p->bo_list->gds_obj;
- gws = p->bo_list->gws_obj;
- oa = p->bo_list->oa_obj;
-
- if (gds) {
- p->job->gds_base = amdgpu_bo_gpu_offset(gds) >> PAGE_SHIFT;
- p->job->gds_size = amdgpu_bo_size(gds) >> PAGE_SHIFT;
- }
- if (gws) {
- p->job->gws_base = amdgpu_bo_gpu_offset(gws) >> PAGE_SHIFT;
- p->job->gws_size = amdgpu_bo_size(gws) >> PAGE_SHIFT;
- }
- if (oa) {
- p->job->oa_base = amdgpu_bo_gpu_offset(oa) >> PAGE_SHIFT;
- p->job->oa_size = amdgpu_bo_size(oa) >> PAGE_SHIFT;
- }
+ amdgpu_job_set_resources(p->job, p->bo_list->gds_obj,
+ p->bo_list->gws_obj, p->bo_list->oa_obj);
if (!r && p->uf_entry.tv.bo) {
struct amdgpu_bo *uf = ttm_to_amdgpu_bo(p->uf_entry.tv.bo);
@@ -658,72 +976,15 @@ out_free_user_pages:
return r;
}
-static int amdgpu_cs_sync_rings(struct amdgpu_cs_parser *p)
-{
- struct amdgpu_fpriv *fpriv = p->filp->driver_priv;
- struct amdgpu_bo_list_entry *e;
- int r;
-
- list_for_each_entry(e, &p->validated, tv.head) {
- struct amdgpu_bo *bo = ttm_to_amdgpu_bo(e->tv.bo);
- struct dma_resv *resv = bo->tbo.base.resv;
- enum amdgpu_sync_mode sync_mode;
-
- sync_mode = amdgpu_bo_explicit_sync(bo) ?
- AMDGPU_SYNC_EXPLICIT : AMDGPU_SYNC_NE_OWNER;
- r = amdgpu_sync_resv(p->adev, &p->job->sync, resv, sync_mode,
- &fpriv->vm);
- if (r)
- return r;
- }
- return 0;
-}
-
-/**
- * amdgpu_cs_parser_fini() - clean parser states
- * @parser: parser structure holding parsing context.
- * @error: error number
- * @backoff: indicator to backoff the reservation
- *
- * If error is set then unvalidate buffer, otherwise just free memory
- * used by parsing context.
- **/
-static void amdgpu_cs_parser_fini(struct amdgpu_cs_parser *parser, int error,
- bool backoff)
+static void trace_amdgpu_cs_ibs(struct amdgpu_cs_parser *parser)
{
- unsigned i;
-
- if (error && backoff) {
- ttm_eu_backoff_reservation(&parser->ticket,
- &parser->validated);
- mutex_unlock(&parser->bo_list->bo_list_mutex);
- }
-
- for (i = 0; i < parser->num_post_deps; i++) {
- drm_syncobj_put(parser->post_deps[i].syncobj);
- kfree(parser->post_deps[i].chain);
- }
- kfree(parser->post_deps);
-
- dma_fence_put(parser->fence);
-
- if (parser->ctx) {
- mutex_unlock(&parser->ctx->lock);
- amdgpu_ctx_put(parser->ctx);
- }
- if (parser->bo_list)
- amdgpu_bo_list_put(parser->bo_list);
+ int i;
- for (i = 0; i < parser->nchunks; i++)
- kvfree(parser->chunks[i].kdata);
- kvfree(parser->chunks);
- if (parser->job)
- amdgpu_job_free(parser->job);
- if (parser->uf_entry.tv.bo) {
- struct amdgpu_bo *uf = ttm_to_amdgpu_bo(parser->uf_entry.tv.bo);
+ if (!trace_amdgpu_cs_enabled())
+ return;
- amdgpu_bo_unref(&uf);
- }
+ for (i = 0; i < parser->job->num_ibs; i++)
+ trace_amdgpu_cs(parser, i);
}
static int amdgpu_cs_vm_handling(struct amdgpu_cs_parser *p)
@@ -799,8 +1060,7 @@ static int amdgpu_cs_vm_handling(struct amdgpu_cs_parser *p)
}
if (!p->job->vm)
- return amdgpu_cs_sync_rings(p);
-
+ return 0;
r = amdgpu_vm_clear_freed(adev, vm, NULL);
if (r)
@@ -814,7 +1074,7 @@ static int amdgpu_cs_vm_handling(struct amdgpu_cs_parser *p)
if (r)
return r;
- if (amdgpu_mcbp || amdgpu_sriov_vf(adev)) {
+ if (fpriv->csa_va) {
bo_va = fpriv->csa_va;
BUG_ON(!bo_va);
r = amdgpu_vm_bo_update(adev, bo_va, false);
@@ -872,334 +1132,30 @@ static int amdgpu_cs_vm_handling(struct amdgpu_cs_parser *p)
}
}
- return amdgpu_cs_sync_rings(p);
-}
-
-static int amdgpu_cs_ib_fill(struct amdgpu_device *adev,
- struct amdgpu_cs_parser *parser)
-{
- struct amdgpu_fpriv *fpriv = parser->filp->driver_priv;
- struct amdgpu_vm *vm = &fpriv->vm;
- int r, ce_preempt = 0, de_preempt = 0;
- struct amdgpu_ring *ring;
- int i, j;
-
- for (i = 0, j = 0; i < parser->nchunks && j < parser->job->num_ibs; i++) {
- struct amdgpu_cs_chunk *chunk;
- struct amdgpu_ib *ib;
- struct drm_amdgpu_cs_chunk_ib *chunk_ib;
- struct drm_sched_entity *entity;
-
- chunk = &parser->chunks[i];
- ib = &parser->job->ibs[j];
- chunk_ib = (struct drm_amdgpu_cs_chunk_ib *)chunk->kdata;
-
- if (chunk->chunk_id != AMDGPU_CHUNK_ID_IB)
- continue;
-
- if (chunk_ib->ip_type == AMDGPU_HW_IP_GFX &&
- (amdgpu_mcbp || amdgpu_sriov_vf(adev))) {
- if (chunk_ib->flags & AMDGPU_IB_FLAG_PREEMPT) {
- if (chunk_ib->flags & AMDGPU_IB_FLAG_CE)
- ce_preempt++;
- else
- de_preempt++;
- }
-
- /* each GFX command submit allows 0 or 1 IB preemptible for CE & DE */
- if (ce_preempt > 1 || de_preempt > 1)
- return -EINVAL;
- }
-
- r = amdgpu_ctx_get_entity(parser->ctx, chunk_ib->ip_type,
- chunk_ib->ip_instance, chunk_ib->ring,
- &entity);
- if (r)
- return r;
-
- if (chunk_ib->flags & AMDGPU_IB_FLAG_PREAMBLE)
- parser->job->preamble_status |=
- AMDGPU_PREAMBLE_IB_PRESENT;
-
- if (parser->entity && parser->entity != entity)
- return -EINVAL;
-
- /* Return if there is no run queue associated with this entity.
- * Possibly because of disabled HW IP*/
- if (entity->rq == NULL)
- return -EINVAL;
-
- parser->entity = entity;
-
- ring = to_amdgpu_ring(entity->rq->sched);
- r = amdgpu_ib_get(adev, vm, ring->funcs->parse_cs ?
- chunk_ib->ib_bytes : 0,
- AMDGPU_IB_POOL_DELAYED, ib);
- if (r) {
- DRM_ERROR("Failed to get ib !\n");
- return r;
- }
-
- ib->gpu_addr = chunk_ib->va_start;
- ib->length_dw = chunk_ib->ib_bytes / 4;
- ib->flags = chunk_ib->flags;
-
- j++;
- }
-
- /* MM engine doesn't support user fences */
- ring = to_amdgpu_ring(parser->entity->rq->sched);
- if (parser->job->uf_addr && ring->funcs->no_user_fence)
- return -EINVAL;
-
return 0;
}
-static int amdgpu_cs_process_fence_dep(struct amdgpu_cs_parser *p,
- struct amdgpu_cs_chunk *chunk)
+static int amdgpu_cs_sync_rings(struct amdgpu_cs_parser *p)
{
struct amdgpu_fpriv *fpriv = p->filp->driver_priv;
- unsigned num_deps;
- int i, r;
- struct drm_amdgpu_cs_chunk_dep *deps;
-
- deps = (struct drm_amdgpu_cs_chunk_dep *)chunk->kdata;
- num_deps = chunk->length_dw * 4 /
- sizeof(struct drm_amdgpu_cs_chunk_dep);
-
- for (i = 0; i < num_deps; ++i) {
- struct amdgpu_ctx *ctx;
- struct drm_sched_entity *entity;
- struct dma_fence *fence;
-
- ctx = amdgpu_ctx_get(fpriv, deps[i].ctx_id);
- if (ctx == NULL)
- return -EINVAL;
-
- r = amdgpu_ctx_get_entity(ctx, deps[i].ip_type,
- deps[i].ip_instance,
- deps[i].ring, &entity);
- if (r) {
- amdgpu_ctx_put(ctx);
- return r;
- }
-
- fence = amdgpu_ctx_get_fence(ctx, entity, deps[i].handle);
- amdgpu_ctx_put(ctx);
-
- if (IS_ERR(fence))
- return PTR_ERR(fence);
- else if (!fence)
- continue;
-
- if (chunk->chunk_id == AMDGPU_CHUNK_ID_SCHEDULED_DEPENDENCIES) {
- struct drm_sched_fence *s_fence;
- struct dma_fence *old = fence;
-
- s_fence = to_drm_sched_fence(fence);
- fence = dma_fence_get(&s_fence->scheduled);
- dma_fence_put(old);
- }
-
- r = amdgpu_sync_fence(&p->job->sync, fence);
- dma_fence_put(fence);
- if (r)
- return r;
- }
- return 0;
-}
-
-static int amdgpu_syncobj_lookup_and_add_to_sync(struct amdgpu_cs_parser *p,
- uint32_t handle, u64 point,
- u64 flags)
-{
- struct dma_fence *fence;
+ struct amdgpu_bo_list_entry *e;
int r;
- r = drm_syncobj_find_fence(p->filp, handle, point, flags, &fence);
- if (r) {
- DRM_ERROR("syncobj %u failed to find fence @ %llu (%d)!\n",
- handle, point, r);
- return r;
- }
-
- r = amdgpu_sync_fence(&p->job->sync, fence);
- dma_fence_put(fence);
-
- return r;
-}
-
-static int amdgpu_cs_process_syncobj_in_dep(struct amdgpu_cs_parser *p,
- struct amdgpu_cs_chunk *chunk)
-{
- struct drm_amdgpu_cs_chunk_sem *deps;
- unsigned num_deps;
- int i, r;
-
- deps = (struct drm_amdgpu_cs_chunk_sem *)chunk->kdata;
- num_deps = chunk->length_dw * 4 /
- sizeof(struct drm_amdgpu_cs_chunk_sem);
- for (i = 0; i < num_deps; ++i) {
- r = amdgpu_syncobj_lookup_and_add_to_sync(p, deps[i].handle,
- 0, 0);
- if (r)
- return r;
- }
-
- return 0;
-}
-
-
-static int amdgpu_cs_process_syncobj_timeline_in_dep(struct amdgpu_cs_parser *p,
- struct amdgpu_cs_chunk *chunk)
-{
- struct drm_amdgpu_cs_chunk_syncobj *syncobj_deps;
- unsigned num_deps;
- int i, r;
+ list_for_each_entry(e, &p->validated, tv.head) {
+ struct amdgpu_bo *bo = ttm_to_amdgpu_bo(e->tv.bo);
+ struct dma_resv *resv = bo->tbo.base.resv;
+ enum amdgpu_sync_mode sync_mode;
- syncobj_deps = (struct drm_amdgpu_cs_chunk_syncobj *)chunk->kdata;
- num_deps = chunk->length_dw * 4 /
- sizeof(struct drm_amdgpu_cs_chunk_syncobj);
- for (i = 0; i < num_deps; ++i) {
- r = amdgpu_syncobj_lookup_and_add_to_sync(p,
- syncobj_deps[i].handle,
- syncobj_deps[i].point,
- syncobj_deps[i].flags);
+ sync_mode = amdgpu_bo_explicit_sync(bo) ?
+ AMDGPU_SYNC_EXPLICIT : AMDGPU_SYNC_NE_OWNER;
+ r = amdgpu_sync_resv(p->adev, &p->job->sync, resv, sync_mode,
+ &fpriv->vm);
if (r)
return r;
}
-
return 0;
}
-static int amdgpu_cs_process_syncobj_out_dep(struct amdgpu_cs_parser *p,
- struct amdgpu_cs_chunk *chunk)
-{
- struct drm_amdgpu_cs_chunk_sem *deps;
- unsigned num_deps;
- int i;
-
- deps = (struct drm_amdgpu_cs_chunk_sem *)chunk->kdata;
- num_deps = chunk->length_dw * 4 /
- sizeof(struct drm_amdgpu_cs_chunk_sem);
-
- if (p->post_deps)
- return -EINVAL;
-
- p->post_deps = kmalloc_array(num_deps, sizeof(*p->post_deps),
- GFP_KERNEL);
- p->num_post_deps = 0;
-
- if (!p->post_deps)
- return -ENOMEM;
-
-
- for (i = 0; i < num_deps; ++i) {
- p->post_deps[i].syncobj =
- drm_syncobj_find(p->filp, deps[i].handle);
- if (!p->post_deps[i].syncobj)
- return -EINVAL;
- p->post_deps[i].chain = NULL;
- p->post_deps[i].point = 0;
- p->num_post_deps++;
- }
-
- return 0;
-}
-
-
-static int amdgpu_cs_process_syncobj_timeline_out_dep(struct amdgpu_cs_parser *p,
- struct amdgpu_cs_chunk *chunk)
-{
- struct drm_amdgpu_cs_chunk_syncobj *syncobj_deps;
- unsigned num_deps;
- int i;
-
- syncobj_deps = (struct drm_amdgpu_cs_chunk_syncobj *)chunk->kdata;
- num_deps = chunk->length_dw * 4 /
- sizeof(struct drm_amdgpu_cs_chunk_syncobj);
-
- if (p->post_deps)
- return -EINVAL;
-
- p->post_deps = kmalloc_array(num_deps, sizeof(*p->post_deps),
- GFP_KERNEL);
- p->num_post_deps = 0;
-
- if (!p->post_deps)
- return -ENOMEM;
-
- for (i = 0; i < num_deps; ++i) {
- struct amdgpu_cs_post_dep *dep = &p->post_deps[i];
-
- dep->chain = NULL;
- if (syncobj_deps[i].point) {
- dep->chain = dma_fence_chain_alloc();
- if (!dep->chain)
- return -ENOMEM;
- }
-
- dep->syncobj = drm_syncobj_find(p->filp,
- syncobj_deps[i].handle);
- if (!dep->syncobj) {
- dma_fence_chain_free(dep->chain);
- return -EINVAL;
- }
- dep->point = syncobj_deps[i].point;
- p->num_post_deps++;
- }
-
- return 0;
-}
-
-static int amdgpu_cs_dependencies(struct amdgpu_device *adev,
- struct amdgpu_cs_parser *p)
-{
- int i, r;
-
- /* TODO: Investigate why we still need the context lock */
- mutex_unlock(&p->ctx->lock);
-
- for (i = 0; i < p->nchunks; ++i) {
- struct amdgpu_cs_chunk *chunk;
-
- chunk = &p->chunks[i];
-
- switch (chunk->chunk_id) {
- case AMDGPU_CHUNK_ID_DEPENDENCIES:
- case AMDGPU_CHUNK_ID_SCHEDULED_DEPENDENCIES:
- r = amdgpu_cs_process_fence_dep(p, chunk);
- if (r)
- goto out;
- break;
- case AMDGPU_CHUNK_ID_SYNCOBJ_IN:
- r = amdgpu_cs_process_syncobj_in_dep(p, chunk);
- if (r)
- goto out;
- break;
- case AMDGPU_CHUNK_ID_SYNCOBJ_OUT:
- r = amdgpu_cs_process_syncobj_out_dep(p, chunk);
- if (r)
- goto out;
- break;
- case AMDGPU_CHUNK_ID_SYNCOBJ_TIMELINE_WAIT:
- r = amdgpu_cs_process_syncobj_timeline_in_dep(p, chunk);
- if (r)
- goto out;
- break;
- case AMDGPU_CHUNK_ID_SYNCOBJ_TIMELINE_SIGNAL:
- r = amdgpu_cs_process_syncobj_timeline_out_dep(p, chunk);
- if (r)
- goto out;
- break;
- }
- }
-
-out:
- mutex_lock(&p->ctx->lock);
- return r;
-}
-
static void amdgpu_cs_post_dependencies(struct amdgpu_cs_parser *p)
{
int i;
@@ -1296,22 +1252,47 @@ error_unlock:
return r;
}
-static void trace_amdgpu_cs_ibs(struct amdgpu_cs_parser *parser)
+/* Cleanup the parser structure */
+static void amdgpu_cs_parser_fini(struct amdgpu_cs_parser *parser, int error,
+ bool backoff)
{
- int i;
+ unsigned i;
- if (!trace_amdgpu_cs_enabled())
- return;
+ if (error && backoff) {
+ ttm_eu_backoff_reservation(&parser->ticket,
+ &parser->validated);
+ mutex_unlock(&parser->bo_list->bo_list_mutex);
+ }
- for (i = 0; i < parser->job->num_ibs; i++)
- trace_amdgpu_cs(parser, i);
+ for (i = 0; i < parser->num_post_deps; i++) {
+ drm_syncobj_put(parser->post_deps[i].syncobj);
+ kfree(parser->post_deps[i].chain);
+ }
+ kfree(parser->post_deps);
+
+ dma_fence_put(parser->fence);
+
+ if (parser->ctx)
+ amdgpu_ctx_put(parser->ctx);
+ if (parser->bo_list)
+ amdgpu_bo_list_put(parser->bo_list);
+
+ for (i = 0; i < parser->nchunks; i++)
+ kvfree(parser->chunks[i].kdata);
+ kvfree(parser->chunks);
+ if (parser->job)
+ amdgpu_job_free(parser->job);
+ if (parser->uf_entry.tv.bo) {
+ struct amdgpu_bo *uf = ttm_to_amdgpu_bo(parser->uf_entry.tv.bo);
+
+ amdgpu_bo_unref(&uf);
+ }
}
int amdgpu_cs_ioctl(struct drm_device *dev, void *data, struct drm_file *filp)
{
struct amdgpu_device *adev = drm_to_adev(dev);
- union drm_amdgpu_cs *cs = data;
- struct amdgpu_cs_parser parser = {};
+ struct amdgpu_cs_parser parser;
bool reserved_buffers = false;
int r;
@@ -1321,16 +1302,17 @@ int amdgpu_cs_ioctl(struct drm_device *dev, void *data, struct drm_file *filp)
if (!adev->accel_working)
return -EBUSY;
- parser.adev = adev;
- parser.filp = filp;
-
- r = amdgpu_cs_parser_init(&parser, data);
+ r = amdgpu_cs_parser_init(&parser, adev, filp, data);
if (r) {
if (printk_ratelimit())
DRM_ERROR("Failed to initialize parser %d!\n", r);
goto out;
}
+ r = amdgpu_cs_pass1(&parser, data);
+ if (r)
+ goto out;
+
r = amdgpu_cs_ib_fill(adev, &parser);
if (r)
goto out;
@@ -1358,8 +1340,11 @@ int amdgpu_cs_ioctl(struct drm_device *dev, void *data, struct drm_file *filp)
if (r)
goto out;
- r = amdgpu_cs_submit(&parser, cs);
+ r = amdgpu_cs_sync_rings(&parser);
+ if (r)
+ goto out;
+ r = amdgpu_cs_submit(&parser, data);
out:
amdgpu_cs_parser_fini(&parser, r, reserved_buffers);